PyPI - codejury - Versions diffs - 0.1.0__tar.gz → 0.2.0__tar.gz - Mend

codejury 0.1.0tar.gz → 0.2.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (110) hide show

{codejury-0.1.0 → codejury-0.2.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: codejury
-Version: 0.1.0
+Version: 0.2.0
 Summary: General-purpose Application Security AI audit framework -- five-layer architecture, capabilities as first-class data
 Author: 4234288
 License-Expression: MIT
@@ -84,6 +84,12 @@ pip install 'codejury[anthropic]'    # add the provider you'll use (anthropic /
 ## Usage
+A real audit calls a model, so set the provider's key first (see `.env.example`):
+```bash
+export ANTHROPIC_API_KEY=sk-ant-...   # or OPENAI_API_KEY for --provider openai
+```
 ```bash
 # Audit a unified diff against the capability library
 git diff | codejury audit --orchestrator debate --provider anthropic --format markdown -
@@ -91,15 +97,35 @@ git diff | codejury audit --orchestrator debate --provider anthropic --format ma
 # Run a named task preset (tasks/*.yaml)
 git diff | codejury run audit_diff_debate -
-# Score detection quality against the golden cases (needs a provider key)
+# Score detection quality against the golden cases
 codejury eval --provider anthropic
+# Through a LiteLLM proxy / gateway. The flags default to CODEJURY_API_BASE /
+# CODEJURY_API_KEY / CODEJURY_MODEL, so with those in a sourced .env this is just:
+#   codejury audit --provider litellm -
+git diff | codejury audit --provider litellm \
+  --api-base https://litellm.example.com --api-key "$LITELLM_KEY" --model your-alias -
 # No API key needed: prove the pipeline composes with mock layers
 codejury dry-run
 ```
-`audit` and `run` read a diff from a file argument or stdin (`-`). Real providers
-read their key from the environment (e.g. `ANTHROPIC_API_KEY`).
+`audit` and `run` read a diff from a file argument or stdin (`-`). The provider
+key is read from the environment: `ANTHROPIC_API_KEY` for `--provider anthropic`,
+`OPENAI_API_KEY` for `--provider openai`. Without a key the model providers
+raise an authentication error; `codejury dry-run` needs no key.
+A task YAML can pin the provider, model, and base URL (the key stays in the
+environment), so `codejury run` works through a proxy too:
+```yaml
+# mytasks/proxy_scan.yaml -> codejury run proxy_scan --tasks mytasks
+name: proxy_scan
+orchestrator: debate
+provider: litellm
+model: your-alias
+api_base: https://litellm.example.com   # key from CODEJURY_API_KEY
+```
 ## Development

{codejury-0.1.0 → codejury-0.2.0}/README.md RENAMED Viewed

@@ -55,6 +55,12 @@ pip install 'codejury[anthropic]'    # add the provider you'll use (anthropic /
 ## Usage
+A real audit calls a model, so set the provider's key first (see `.env.example`):
+```bash
+export ANTHROPIC_API_KEY=sk-ant-...   # or OPENAI_API_KEY for --provider openai
+```
 ```bash
 # Audit a unified diff against the capability library
 git diff | codejury audit --orchestrator debate --provider anthropic --format markdown -
@@ -62,15 +68,35 @@ git diff | codejury audit --orchestrator debate --provider anthropic --format ma
 # Run a named task preset (tasks/*.yaml)
 git diff | codejury run audit_diff_debate -
-# Score detection quality against the golden cases (needs a provider key)
+# Score detection quality against the golden cases
 codejury eval --provider anthropic
+# Through a LiteLLM proxy / gateway. The flags default to CODEJURY_API_BASE /
+# CODEJURY_API_KEY / CODEJURY_MODEL, so with those in a sourced .env this is just:
+#   codejury audit --provider litellm -
+git diff | codejury audit --provider litellm \
+  --api-base https://litellm.example.com --api-key "$LITELLM_KEY" --model your-alias -
 # No API key needed: prove the pipeline composes with mock layers
 codejury dry-run
 ```
-`audit` and `run` read a diff from a file argument or stdin (`-`). Real providers
-read their key from the environment (e.g. `ANTHROPIC_API_KEY`).
+`audit` and `run` read a diff from a file argument or stdin (`-`). The provider
+key is read from the environment: `ANTHROPIC_API_KEY` for `--provider anthropic`,
+`OPENAI_API_KEY` for `--provider openai`. Without a key the model providers
+raise an authentication error; `codejury dry-run` needs no key.
+A task YAML can pin the provider, model, and base URL (the key stays in the
+environment), so `codejury run` works through a proxy too:
+```yaml
+# mytasks/proxy_scan.yaml -> codejury run proxy_scan --tasks mytasks
+name: proxy_scan
+orchestrator: debate
+provider: litellm
+model: your-alias
+api_base: https://litellm.example.com   # key from CODEJURY_API_KEY
+```
 ## Development

{codejury-0.1.0 → codejury-0.2.0}/codejury/assembly.py RENAMED Viewed

@@ -29,15 +29,19 @@ from codejury.sources.base import Source
 STRATEGIES = ("single", "pipeline", "debate", "reflexion")
 PROVIDERS = ("anthropic", "openai", "litellm")
 DEFAULT_MODEL = os.environ.get("CODEJURY_MODEL", "claude-sonnet-4-6")
+DEFAULT_API_BASE = os.environ.get("CODEJURY_API_BASE")
+DEFAULT_API_KEY = os.environ.get("CODEJURY_API_KEY")
-def make_provider(name: str, *, retries: int = 0) -> Provider:
+def make_provider(
+    name: str, *, api_key: str | None = None, api_base: str | None = None, retries: int = 0
+) -> Provider:
     if name == "openai":
-        provider: Provider = OpenAIProvider()
+        provider: Provider = OpenAIProvider(api_key=api_key, base_url=api_base)
     elif name == "litellm":
-        provider = LiteLLMProvider()
+        provider = LiteLLMProvider(api_key=api_key, api_base=api_base)
     else:
-        provider = AnthropicProvider()
+        provider = AnthropicProvider(api_key=api_key, base_url=api_base)
     if retries > 0:
         provider = RetryProvider(provider, max_attempts=retries + 1)
     return provider

{codejury-0.1.0 → codejury-0.2.0}/codejury/cli.py RENAMED Viewed

@@ -13,6 +13,8 @@ import sys
 from codejury.agents.mock import MockAgent
 from codejury.assembly import (
+    DEFAULT_API_BASE,
+    DEFAULT_API_KEY,
     DEFAULT_MODEL,
     PROVIDERS,
     STRATEGIES,
@@ -135,6 +137,8 @@ def main(argv: list[str] | None = None) -> int:
     audit_p.add_argument("--model", default=DEFAULT_MODEL)
     audit_p.add_argument("--max-tokens", type=int, default=2048)
     audit_p.add_argument("--retries", type=int, default=0, help="provider retry attempts on failure")
+    audit_p.add_argument("--api-base", default=DEFAULT_API_BASE, help="provider base URL (env: CODEJURY_API_BASE)")
+    audit_p.add_argument("--api-key", default=DEFAULT_API_KEY, help="provider API key (env: CODEJURY_API_KEY)")
     run_p = sub.add_parser("run", help="run a named task preset against a unified diff")
     run_p.add_argument("task", help="task name")
@@ -148,6 +152,8 @@ def main(argv: list[str] | None = None) -> int:
     eval_p.add_argument("--capabilities", default=CAPABILITIES_DIR, help="capability YAML directory")
     eval_p.add_argument("--provider", choices=PROVIDERS, default="anthropic")
     eval_p.add_argument("--model", default=DEFAULT_MODEL)
+    eval_p.add_argument("--api-base", default=DEFAULT_API_BASE, help="provider base URL (env: CODEJURY_API_BASE)")
+    eval_p.add_argument("--api-key", default=DEFAULT_API_KEY, help="provider API key (env: CODEJURY_API_KEY)")
     args = parser.parse_args(argv)
@@ -155,7 +161,9 @@ def main(argv: list[str] | None = None) -> int:
         results = audit(
             _read_diff(args.diff),
             load_capabilities(args.capabilities),
-            provider=make_provider(args.provider, retries=args.retries),
+            provider=make_provider(
+                args.provider, api_key=args.api_key, api_base=args.api_base, retries=args.retries
+            ),
             model=args.model,
             max_tokens=args.max_tokens,
             strategy=args.orchestrator,
@@ -175,12 +183,18 @@ def main(argv: list[str] | None = None) -> int:
         return 0
     if args.command == "eval":
-        metrics = evaluate(
-            load_cases(args.golden),
-            load_capabilities(args.capabilities),
-            provider=make_provider(args.provider),
-            model=args.model,
-        )
+        try:
+            metrics = evaluate(
+                load_cases(args.golden),
+                load_capabilities(args.capabilities),
+                provider=make_provider(args.provider, api_key=args.api_key, api_base=args.api_base),
+                model=args.model,
+            )
+        except Exception as exc:
+            # e.g. a missing API key surfaces as a provider auth error -- report it
+            # as one line, not a traceback (audit gets this via the orchestrator).
+            print(f"eval failed: {exc}")
+            return 1
         print(_render_metrics(metrics))
         return 0

codejury-0.2.0/codejury/data/golden/authn_jwt_noverify_vuln.yaml ADDED Viewed

@@ -0,0 +1,6 @@
+capability: authn
+vulnerable: true
+code: |
+  def user_id(token):
+      claims = jwt.decode(token, options={"verify_signature": False})
+      return claims["sub"]

codejury-0.2.0/codejury/data/golden/authn_jwt_verified_safe.yaml ADDED Viewed

@@ -0,0 +1,6 @@
+capability: authn
+vulnerable: false
+code: |
+  def user_id(token):
+      claims = jwt.decode(token, KEY, algorithms=["RS256"], audience=AUD, issuer=ISS)
+      return claims["sub"]

codejury-0.2.0/codejury/data/golden/authn_sha256_checksum_safe.yaml ADDED Viewed

@@ -0,0 +1,6 @@
+capability: authn
+vulnerable: false
+code: |
+  def file_dedup_key(data: bytes) -> str:
+      # content hash for cache dedup -- NOT a password
+      return hashlib.sha256(data).hexdigest()

codejury-0.2.0/codejury/data/golden/authz_idor_vuln.yaml ADDED Viewed

@@ -0,0 +1,5 @@
+capability: authz
+vulnerable: true
+code: |
+  def get_invoice(request):
+      return Invoice.objects.get(id=request.GET["id"])

codejury-0.2.0/codejury/data/golden/authz_owner_safe.yaml ADDED Viewed

@@ -0,0 +1,5 @@
+capability: authz
+vulnerable: false
+code: |
+  def get_invoice(request):
+      return Invoice.objects.get(id=request.GET["id"], owner=request.user)

codejury-0.2.0/codejury/data/golden/cmdi_ossystem_vuln.yaml ADDED Viewed

@@ -0,0 +1,5 @@
+capability: input_validation
+vulnerable: true
+code: |
+  def ping(host):
+      os.system("ping -c 1 " + host)

codejury-0.2.0/codejury/data/golden/cmdi_subprocess_safe.yaml ADDED Viewed

@@ -0,0 +1,5 @@
+capability: input_validation
+vulnerable: false
+code: |
+  def ping(host):
+      subprocess.run(["ping", "-c", "1", host], shell=False)

codejury-0.2.0/codejury/data/golden/crypto_aesgcm_safe.yaml ADDED Viewed

@@ -0,0 +1,6 @@
+capability: crypto
+vulnerable: false
+code: |
+  def encrypt(data, key):
+      nonce = os.urandom(12)
+      return nonce, AESGCM(key).encrypt(nonce, data, None)

codejury-0.2.0/codejury/data/golden/crypto_ecb_vuln.yaml ADDED Viewed

@@ -0,0 +1,6 @@
+capability: crypto
+vulnerable: true
+code: |
+  def encrypt(data, key):
+      cipher = AES.new(key, AES.MODE_ECB)
+      return cipher.encrypt(pad(data, 16))

codejury-0.2.0/codejury/data/golden/path_contained_safe.yaml ADDED Viewed

@@ -0,0 +1,8 @@
+capability: input_validation
+vulnerable: false
+code: |
+  def read_upload(filename):
+      target = (UPLOAD_DIR / filename).resolve()
+      if not target.is_relative_to(UPLOAD_DIR):
+          raise ValueError("path escapes upload dir")
+      return target.read_text()

codejury-0.2.0/codejury/data/golden/path_traversal_vuln.yaml ADDED Viewed

@@ -0,0 +1,5 @@
+capability: input_validation
+vulnerable: true
+code: |
+  def read_upload(filename):
+      return open(os.path.join(UPLOAD_DIR, filename)).read()

codejury-0.2.0/codejury/data/golden/secrets_env_safe.yaml ADDED Viewed

@@ -0,0 +1,5 @@
+capability: secrets
+vulnerable: false
+code: |
+  STRIPE_KEY = os.environ["STRIPE_KEY"]
+  client = stripe.Client(STRIPE_KEY)

codejury-0.2.0/codejury/data/golden/secrets_hardcoded_vuln.yaml ADDED Viewed

@@ -0,0 +1,5 @@
+capability: secrets
+vulnerable: true
+code: |
+  API_KEY = "9c1185a5c5e9fc54612808977ee8f548b2258d31"
+  client = PaymentClient(api_key=API_KEY)

codejury-0.2.0/codejury/data/golden/sqli_format_vuln.yaml ADDED Viewed

@@ -0,0 +1,5 @@
+capability: input_validation
+vulnerable: true
+code: |
+  def find(name):
+      cursor.execute("SELECT * FROM users WHERE name = '{}'".format(name))

codejury-0.2.0/codejury/data/golden/xss_innerhtml_constant_safe.yaml ADDED Viewed

@@ -0,0 +1,7 @@
+capability: output_encoding
+vulnerable: false
+code: |
+  function render() {
+      // static markup, no untrusted input
+      el.innerHTML = "<b>Welcome back</b>";
+  }

codejury-0.2.0/codejury/data/golden/xss_innerhtml_vuln.yaml ADDED Viewed

@@ -0,0 +1,6 @@
+capability: output_encoding
+vulnerable: true
+code: |
+  function render(name) {
+      el.innerHTML = "Hello " + name;
+  }

{codejury-0.1.0 → codejury-0.2.0}/codejury/providers/anthropic.py RENAMED Viewed

@@ -17,8 +17,11 @@ from codejury.providers.base import CompletionResult, Message, Provider
 class AnthropicProvider(Provider):
-    def __init__(self, *, api_key: str | None = None, client: Any | None = None) -> None:
+    def __init__(
+        self, *, api_key: str | None = None, base_url: str | None = None, client: Any | None = None
+    ) -> None:
         self._api_key = api_key
+        self._base_url = base_url
         self._client = client
     def _get_client(self) -> Any:
@@ -29,7 +32,12 @@ class AnthropicProvider(Provider):
                 raise RuntimeError(
                     "anthropic SDK not installed; run: pip install 'codejury[anthropic]'"
                 ) from exc
-            self._client = anthropic.Anthropic(api_key=self._api_key)
+            kwargs: dict[str, Any] = {}
+            if self._api_key:
+                kwargs["api_key"] = self._api_key
+            if self._base_url:
+                kwargs["base_url"] = self._base_url
+            self._client = anthropic.Anthropic(**kwargs)
         return self._client
     def complete(

{codejury-0.1.0 → codejury-0.2.0}/codejury/tasks/base.py RENAMED Viewed

@@ -9,7 +9,14 @@ from __future__ import annotations
 from dataclasses import dataclass
 from typing import Any
-from codejury.assembly import DEFAULT_MODEL, build_orchestration, make_provider, run_over_source
+from codejury.assembly import (
+    DEFAULT_API_BASE,
+    DEFAULT_API_KEY,
+    DEFAULT_MODEL,
+    build_orchestration,
+    make_provider,
+    run_over_source,
+)
 from codejury.domain.capability import Capability
 from codejury.domain.result import AnalysisResult
 from codejury.sources.base import Source
@@ -24,6 +31,7 @@ class Task:
     capabilities: tuple[str, ...] | None = None  # capability ids to check; None = all
     max_tokens: int = 2048
     retries: int = 0  # provider retry attempts on transient failure
+    api_base: str | None = None  # provider base URL (e.g. a LiteLLM proxy); the key stays in the env
     @classmethod
     def from_dict(cls, data: dict[str, Any]) -> Task:
@@ -36,6 +44,7 @@ class Task:
             capabilities=tuple(caps) if caps is not None else None,
             max_tokens=int(data.get("max_tokens", 2048)),
             retries=int(data.get("retries", 0)),
+            api_base=data.get("api_base"),
         )
     def select(self, capabilities: list[Capability]) -> list[Capability]:
@@ -48,7 +57,13 @@ class Task:
 def run_task(
     task: Task, source: Source, capabilities: list[Capability]
 ) -> list[tuple[str, AnalysisResult]]:
-    provider = make_provider(task.provider, retries=task.retries)
+    # api_base may come from the task (non-secret URL); the key only from the env.
+    provider = make_provider(
+        task.provider,
+        api_key=DEFAULT_API_KEY,
+        api_base=task.api_base or DEFAULT_API_BASE,
+        retries=task.retries,
+    )
     agents, orchestrator = build_orchestration(
         task.orchestrator, provider=provider, model=task.model, max_tokens=task.max_tokens
     )

{codejury-0.1.0 → codejury-0.2.0}/codejury.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: codejury
-Version: 0.1.0
+Version: 0.2.0
 Summary: General-purpose Application Security AI audit framework -- five-layer architecture, capabilities as first-class data
 Author: 4234288
 License-Expression: MIT
@@ -84,6 +84,12 @@ pip install 'codejury[anthropic]'    # add the provider you'll use (anthropic /
 ## Usage
+A real audit calls a model, so set the provider's key first (see `.env.example`):
+```bash
+export ANTHROPIC_API_KEY=sk-ant-...   # or OPENAI_API_KEY for --provider openai
+```
 ```bash
 # Audit a unified diff against the capability library
 git diff | codejury audit --orchestrator debate --provider anthropic --format markdown -
@@ -91,15 +97,35 @@ git diff | codejury audit --orchestrator debate --provider anthropic --format ma
 # Run a named task preset (tasks/*.yaml)
 git diff | codejury run audit_diff_debate -
-# Score detection quality against the golden cases (needs a provider key)
+# Score detection quality against the golden cases
 codejury eval --provider anthropic
+# Through a LiteLLM proxy / gateway. The flags default to CODEJURY_API_BASE /
+# CODEJURY_API_KEY / CODEJURY_MODEL, so with those in a sourced .env this is just:
+#   codejury audit --provider litellm -
+git diff | codejury audit --provider litellm \
+  --api-base https://litellm.example.com --api-key "$LITELLM_KEY" --model your-alias -
 # No API key needed: prove the pipeline composes with mock layers
 codejury dry-run
 ```
-`audit` and `run` read a diff from a file argument or stdin (`-`). Real providers
-read their key from the environment (e.g. `ANTHROPIC_API_KEY`).
+`audit` and `run` read a diff from a file argument or stdin (`-`). The provider
+key is read from the environment: `ANTHROPIC_API_KEY` for `--provider anthropic`,
+`OPENAI_API_KEY` for `--provider openai`. Without a key the model providers
+raise an authentication error; `codejury dry-run` needs no key.
+A task YAML can pin the provider, model, and base URL (the key stays in the
+environment), so `codejury run` works through a proxy too:
+```yaml
+# mytasks/proxy_scan.yaml -> codejury run proxy_scan --tasks mytasks
+name: proxy_scan
+orchestrator: debate
+provider: litellm
+model: your-alias
+api_base: https://litellm.example.com   # key from CODEJURY_API_KEY
+```
 ## Development

{codejury-0.1.0 → codejury-0.2.0}/codejury.egg-info/SOURCES.txt RENAMED Viewed

@@ -31,9 +31,25 @@ codejury/data/capabilities/output_encoding.yaml
 codejury/data/capabilities/secrets.yaml
 codejury/data/capabilities/session.yaml
 codejury/data/golden/authn_bcrypt_password.yaml
+codejury/data/golden/authn_jwt_noverify_vuln.yaml
+codejury/data/golden/authn_jwt_verified_safe.yaml
+codejury/data/golden/authn_sha256_checksum_safe.yaml
 codejury/data/golden/authn_sha256_password.yaml
+codejury/data/golden/authz_idor_vuln.yaml
+codejury/data/golden/authz_owner_safe.yaml
+codejury/data/golden/cmdi_ossystem_vuln.yaml
+codejury/data/golden/cmdi_subprocess_safe.yaml
+codejury/data/golden/crypto_aesgcm_safe.yaml
+codejury/data/golden/crypto_ecb_vuln.yaml
+codejury/data/golden/path_contained_safe.yaml
+codejury/data/golden/path_traversal_vuln.yaml
+codejury/data/golden/secrets_env_safe.yaml
+codejury/data/golden/secrets_hardcoded_vuln.yaml
+codejury/data/golden/sqli_format_vuln.yaml
 codejury/data/golden/sqli_fstring_query.yaml
 codejury/data/golden/sqli_parameterized_query.yaml
+codejury/data/golden/xss_innerhtml_constant_safe.yaml
+codejury/data/golden/xss_innerhtml_vuln.yaml
 codejury/data/tasks/audit_diff_debate.yaml
 codejury/data/tasks/quick_scan_single.yaml
 codejury/domain/__init__.py

{codejury-0.1.0 → codejury-0.2.0}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "codejury"
-version = "0.1.0"
+version = "0.2.0"
 description = "General-purpose Application Security AI audit framework -- five-layer architecture, capabilities as first-class data"
 readme = "README.md"
 requires-python = ">=3.12"

{codejury-0.1.0 → codejury-0.2.0}/tests/test_assembly.py RENAMED Viewed

@@ -1,11 +1,15 @@
+from types import SimpleNamespace
 import pytest
-from codejury.assembly import build_orchestration, run_over_source
+from codejury.assembly import build_orchestration, make_provider, run_over_source
 from codejury.domain.capability import Capability
 from codejury.orchestrators.debate import DebateOrchestrator
 from codejury.orchestrators.pipeline import PipelineOrchestrator
 from codejury.orchestrators.reflexion import ReflexionOrchestrator
 from codejury.orchestrators.single import SingleOrchestrator
+from codejury.providers.base import Message
+from codejury.providers.litellm import LiteLLMProvider
 from codejury.providers.mock import MockProvider
 from codejury.sources.mock import MockSource
@@ -25,6 +29,19 @@ def test_build_orchestration_maps_strategy(strategy, orch_cls, roles):
     assert set(agents) == roles
+def test_make_provider_forwards_api_base_and_key():
+    provider = make_provider("litellm", api_base="https://proxy.example", api_key="sk-test")
+    assert isinstance(provider, LiteLLMProvider)
+    captured = {}
+    provider._completion = lambda **kw: captured.update(kw) or SimpleNamespace(
+        choices=[SimpleNamespace(message=SimpleNamespace(content="ok"))]
+    )
+    provider.complete(system="s", messages=[Message(role="user", content="x")], model="m", max_tokens=8)
+    assert captured["api_base"] == "https://proxy.example"
+    assert captured["api_key"] == "sk-test"
 def test_run_over_source_runs_each_artifact():
     provider = MockProvider(default='{"verdicts": [{"sub_capability": "x", "status": "SECURE"}]}')
     agents, orchestrator = build_orchestration("single", provider=provider, model="m", max_tokens=8)

{codejury-0.1.0 → codejury-0.2.0}/tests/test_evaluation.py RENAMED Viewed

@@ -1,7 +1,9 @@
 import json
-from codejury.domain.capability import load_capability
+from codejury import cli
+from codejury.domain.capability import load_capabilities
 from codejury.evaluation import Metrics, evaluate, load_cases
+from codejury.providers.base import Provider
 from codejury.providers.mock import MockProvider
 from codejury.resources import CAPABILITIES_DIR, GOLDEN_DIR
@@ -36,20 +38,32 @@ def test_golden_cases_load():
     assert vuln.capability == "authn" and vuln.vulnerable is True
-def _caps():
-    return [load_capability(CAPABILITIES_DIR / "authentication.yaml"),
-            load_capability(CAPABILITIES_DIR / "input_validation.yaml")]
 def test_evaluate_always_vulnerable_provider():
-    # 2 vulnerable + 2 safe golden cases; a provider that always flags VULNERABLE
-    # -> every positive is right (recall 1.0) but the safe ones are false positives.
-    m = evaluate(load_cases(GOLDEN_DIR), _caps(), provider=MockProvider(default=_VULN), model="m")
-    assert m.tp == 2 and m.fp == 2 and m.fn == 0 and m.tn == 0
-    assert m.recall == 1.0 and m.precision == 0.5
+    # A provider that always flags VULNERABLE: every vulnerable case is a true
+    # positive (recall 1.0), every safe case a false positive.
+    cases = load_cases(GOLDEN_DIR)
+    n_vuln = sum(c.vulnerable for c in cases)
+    n_safe = len(cases) - n_vuln
+    m = evaluate(cases, load_capabilities(CAPABILITIES_DIR), provider=MockProvider(default=_VULN), model="m")
+    assert m.tp == n_vuln and m.fp == n_safe and m.fn == 0 and m.tn == 0
+    assert m.recall == 1.0
 def test_evaluate_always_secure_provider():
-    m = evaluate(load_cases(GOLDEN_DIR), _caps(), provider=MockProvider(default=_SECURE), model="m")
-    assert m.tp == 0 and m.fn == 2 and m.tn == 2 and m.fp == 0
+    cases = load_cases(GOLDEN_DIR)
+    n_vuln = sum(c.vulnerable for c in cases)
+    n_safe = len(cases) - n_vuln
+    m = evaluate(cases, load_capabilities(CAPABILITIES_DIR), provider=MockProvider(default=_SECURE), model="m")
+    assert m.tp == 0 and m.fp == 0 and m.fn == n_vuln and m.tn == n_safe
     assert m.recall == 0.0
+def test_eval_cli_reports_provider_error_without_traceback(monkeypatch, capsys):
+    class _Boom(Provider):
+        def complete(self, **kwargs):
+            raise RuntimeError("Could not resolve authentication method")
+    monkeypatch.setattr("codejury.cli.make_provider", lambda name: _Boom())
+    rc = cli.main(["eval"])
+    assert rc == 1
+    assert "eval failed" in capsys.readouterr().out

{codejury-0.1.0 → codejury-0.2.0}/tests/test_tasks.py RENAMED Viewed

@@ -16,6 +16,31 @@ def test_from_dict_parses_and_defaults():
     assert task.capabilities == ("authn", "crypto")
     assert task.provider == "anthropic"  # default
     assert task.max_tokens == 2048  # default
+    assert task.api_base is None  # default
+def test_from_dict_reads_api_base():
+    task = Task.from_dict({"name": "t", "provider": "litellm", "api_base": "https://proxy.example"})
+    assert task.api_base == "https://proxy.example"
+def test_run_task_forwards_proxy_config_with_key_from_env(monkeypatch):
+    captured = {}
+    def fake_make_provider(name, **kwargs):
+        captured["name"] = name
+        captured.update(kwargs)
+        return MockProvider(default='{"verdicts": []}')
+    monkeypatch.setattr("codejury.tasks.base.make_provider", fake_make_provider)
+    monkeypatch.setattr("codejury.tasks.base.DEFAULT_API_KEY", "sk-from-env")
+    task = Task(name="t", provider="litellm", api_base="https://proxy.example")
+    run_task(task, MockSource(), [Capability(id="authn", name="A")])
+    assert captured["name"] == "litellm"
+    assert captured["api_base"] == "https://proxy.example"  # from the task
+    assert captured["api_key"] == "sk-from-env"  # from the environment, not the task
 def test_select_filters_by_id_and_none_means_all():