codejury 0.1.0__tar.gz → 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (110) hide show
  1. {codejury-0.1.0 → codejury-0.2.0}/PKG-INFO +30 -4
  2. {codejury-0.1.0 → codejury-0.2.0}/README.md +29 -3
  3. {codejury-0.1.0 → codejury-0.2.0}/codejury/assembly.py +8 -4
  4. {codejury-0.1.0 → codejury-0.2.0}/codejury/cli.py +21 -7
  5. codejury-0.2.0/codejury/data/golden/authn_jwt_noverify_vuln.yaml +6 -0
  6. codejury-0.2.0/codejury/data/golden/authn_jwt_verified_safe.yaml +6 -0
  7. codejury-0.2.0/codejury/data/golden/authn_sha256_checksum_safe.yaml +6 -0
  8. codejury-0.2.0/codejury/data/golden/authz_idor_vuln.yaml +5 -0
  9. codejury-0.2.0/codejury/data/golden/authz_owner_safe.yaml +5 -0
  10. codejury-0.2.0/codejury/data/golden/cmdi_ossystem_vuln.yaml +5 -0
  11. codejury-0.2.0/codejury/data/golden/cmdi_subprocess_safe.yaml +5 -0
  12. codejury-0.2.0/codejury/data/golden/crypto_aesgcm_safe.yaml +6 -0
  13. codejury-0.2.0/codejury/data/golden/crypto_ecb_vuln.yaml +6 -0
  14. codejury-0.2.0/codejury/data/golden/path_contained_safe.yaml +8 -0
  15. codejury-0.2.0/codejury/data/golden/path_traversal_vuln.yaml +5 -0
  16. codejury-0.2.0/codejury/data/golden/secrets_env_safe.yaml +5 -0
  17. codejury-0.2.0/codejury/data/golden/secrets_hardcoded_vuln.yaml +5 -0
  18. codejury-0.2.0/codejury/data/golden/sqli_format_vuln.yaml +5 -0
  19. codejury-0.2.0/codejury/data/golden/xss_innerhtml_constant_safe.yaml +7 -0
  20. codejury-0.2.0/codejury/data/golden/xss_innerhtml_vuln.yaml +6 -0
  21. {codejury-0.1.0 → codejury-0.2.0}/codejury/providers/anthropic.py +10 -2
  22. {codejury-0.1.0 → codejury-0.2.0}/codejury/tasks/base.py +17 -2
  23. {codejury-0.1.0 → codejury-0.2.0}/codejury.egg-info/PKG-INFO +30 -4
  24. {codejury-0.1.0 → codejury-0.2.0}/codejury.egg-info/SOURCES.txt +16 -0
  25. {codejury-0.1.0 → codejury-0.2.0}/pyproject.toml +1 -1
  26. {codejury-0.1.0 → codejury-0.2.0}/tests/test_assembly.py +18 -1
  27. {codejury-0.1.0 → codejury-0.2.0}/tests/test_evaluation.py +27 -13
  28. {codejury-0.1.0 → codejury-0.2.0}/tests/test_tasks.py +25 -0
  29. {codejury-0.1.0 → codejury-0.2.0}/LICENSE +0 -0
  30. {codejury-0.1.0 → codejury-0.2.0}/codejury/__init__.py +0 -0
  31. {codejury-0.1.0 → codejury-0.2.0}/codejury/agents/__init__.py +0 -0
  32. {codejury-0.1.0 → codejury-0.2.0}/codejury/agents/base.py +0 -0
  33. {codejury-0.1.0 → codejury-0.2.0}/codejury/agents/debate.py +0 -0
  34. {codejury-0.1.0 → codejury-0.2.0}/codejury/agents/mock.py +0 -0
  35. {codejury-0.1.0 → codejury-0.2.0}/codejury/agents/parsing.py +0 -0
  36. {codejury-0.1.0 → codejury-0.2.0}/codejury/agents/verifier.py +0 -0
  37. {codejury-0.1.0 → codejury-0.2.0}/codejury/data/capabilities/authentication.yaml +0 -0
  38. {codejury-0.1.0 → codejury-0.2.0}/codejury/data/capabilities/authorization.yaml +0 -0
  39. {codejury-0.1.0 → codejury-0.2.0}/codejury/data/capabilities/business_logic.yaml +0 -0
  40. {codejury-0.1.0 → codejury-0.2.0}/codejury/data/capabilities/crypto.yaml +0 -0
  41. {codejury-0.1.0 → codejury-0.2.0}/codejury/data/capabilities/data_protection.yaml +0 -0
  42. {codejury-0.1.0 → codejury-0.2.0}/codejury/data/capabilities/dependency_config.yaml +0 -0
  43. {codejury-0.1.0 → codejury-0.2.0}/codejury/data/capabilities/error_logging.yaml +0 -0
  44. {codejury-0.1.0 → codejury-0.2.0}/codejury/data/capabilities/input_validation.yaml +0 -0
  45. {codejury-0.1.0 → codejury-0.2.0}/codejury/data/capabilities/output_encoding.yaml +0 -0
  46. {codejury-0.1.0 → codejury-0.2.0}/codejury/data/capabilities/secrets.yaml +0 -0
  47. {codejury-0.1.0 → codejury-0.2.0}/codejury/data/capabilities/session.yaml +0 -0
  48. {codejury-0.1.0 → codejury-0.2.0}/codejury/data/golden/authn_bcrypt_password.yaml +0 -0
  49. {codejury-0.1.0 → codejury-0.2.0}/codejury/data/golden/authn_sha256_password.yaml +0 -0
  50. {codejury-0.1.0 → codejury-0.2.0}/codejury/data/golden/sqli_fstring_query.yaml +0 -0
  51. {codejury-0.1.0 → codejury-0.2.0}/codejury/data/golden/sqli_parameterized_query.yaml +0 -0
  52. {codejury-0.1.0 → codejury-0.2.0}/codejury/data/tasks/audit_diff_debate.yaml +0 -0
  53. {codejury-0.1.0 → codejury-0.2.0}/codejury/data/tasks/quick_scan_single.yaml +0 -0
  54. {codejury-0.1.0 → codejury-0.2.0}/codejury/domain/__init__.py +0 -0
  55. {codejury-0.1.0 → codejury-0.2.0}/codejury/domain/artifact.py +0 -0
  56. {codejury-0.1.0 → codejury-0.2.0}/codejury/domain/capability.py +0 -0
  57. {codejury-0.1.0 → codejury-0.2.0}/codejury/domain/context.py +0 -0
  58. {codejury-0.1.0 → codejury-0.2.0}/codejury/domain/observation.py +0 -0
  59. {codejury-0.1.0 → codejury-0.2.0}/codejury/domain/result.py +0 -0
  60. {codejury-0.1.0 → codejury-0.2.0}/codejury/evaluation.py +0 -0
  61. {codejury-0.1.0 → codejury-0.2.0}/codejury/infrastructure/__init__.py +0 -0
  62. {codejury-0.1.0 → codejury-0.2.0}/codejury/infrastructure/json_parse.py +0 -0
  63. {codejury-0.1.0 → codejury-0.2.0}/codejury/orchestrators/__init__.py +0 -0
  64. {codejury-0.1.0 → codejury-0.2.0}/codejury/orchestrators/base.py +0 -0
  65. {codejury-0.1.0 → codejury-0.2.0}/codejury/orchestrators/debate.py +0 -0
  66. {codejury-0.1.0 → codejury-0.2.0}/codejury/orchestrators/pipeline.py +0 -0
  67. {codejury-0.1.0 → codejury-0.2.0}/codejury/orchestrators/reflexion.py +0 -0
  68. {codejury-0.1.0 → codejury-0.2.0}/codejury/orchestrators/single.py +0 -0
  69. {codejury-0.1.0 → codejury-0.2.0}/codejury/providers/__init__.py +0 -0
  70. {codejury-0.1.0 → codejury-0.2.0}/codejury/providers/base.py +0 -0
  71. {codejury-0.1.0 → codejury-0.2.0}/codejury/providers/litellm.py +0 -0
  72. {codejury-0.1.0 → codejury-0.2.0}/codejury/providers/mock.py +0 -0
  73. {codejury-0.1.0 → codejury-0.2.0}/codejury/providers/openai.py +0 -0
  74. {codejury-0.1.0 → codejury-0.2.0}/codejury/providers/openai_format.py +0 -0
  75. {codejury-0.1.0 → codejury-0.2.0}/codejury/providers/retry.py +0 -0
  76. {codejury-0.1.0 → codejury-0.2.0}/codejury/reporting.py +0 -0
  77. {codejury-0.1.0 → codejury-0.2.0}/codejury/resources.py +0 -0
  78. {codejury-0.1.0 → codejury-0.2.0}/codejury/sources/__init__.py +0 -0
  79. {codejury-0.1.0 → codejury-0.2.0}/codejury/sources/base.py +0 -0
  80. {codejury-0.1.0 → codejury-0.2.0}/codejury/sources/chunker.py +0 -0
  81. {codejury-0.1.0 → codejury-0.2.0}/codejury/sources/diff.py +0 -0
  82. {codejury-0.1.0 → codejury-0.2.0}/codejury/sources/function.py +0 -0
  83. {codejury-0.1.0 → codejury-0.2.0}/codejury/sources/mock.py +0 -0
  84. {codejury-0.1.0 → codejury-0.2.0}/codejury/sources/repo.py +0 -0
  85. {codejury-0.1.0 → codejury-0.2.0}/codejury/tasks/__init__.py +0 -0
  86. {codejury-0.1.0 → codejury-0.2.0}/codejury/tasks/registry.py +0 -0
  87. {codejury-0.1.0 → codejury-0.2.0}/codejury.egg-info/dependency_links.txt +0 -0
  88. {codejury-0.1.0 → codejury-0.2.0}/codejury.egg-info/entry_points.txt +0 -0
  89. {codejury-0.1.0 → codejury-0.2.0}/codejury.egg-info/requires.txt +0 -0
  90. {codejury-0.1.0 → codejury-0.2.0}/codejury.egg-info/top_level.txt +0 -0
  91. {codejury-0.1.0 → codejury-0.2.0}/setup.cfg +0 -0
  92. {codejury-0.1.0 → codejury-0.2.0}/tests/test_anthropic_provider.py +0 -0
  93. {codejury-0.1.0 → codejury-0.2.0}/tests/test_audit_pipeline.py +0 -0
  94. {codejury-0.1.0 → codejury-0.2.0}/tests/test_capability.py +0 -0
  95. {codejury-0.1.0 → codejury-0.2.0}/tests/test_cli_audit.py +0 -0
  96. {codejury-0.1.0 → codejury-0.2.0}/tests/test_context.py +0 -0
  97. {codejury-0.1.0 → codejury-0.2.0}/tests/test_debate_agents.py +0 -0
  98. {codejury-0.1.0 → codejury-0.2.0}/tests/test_debate_orchestrator.py +0 -0
  99. {codejury-0.1.0 → codejury-0.2.0}/tests/test_diff_source.py +0 -0
  100. {codejury-0.1.0 → codejury-0.2.0}/tests/test_function_source.py +0 -0
  101. {codejury-0.1.0 → codejury-0.2.0}/tests/test_json_parse.py +0 -0
  102. {codejury-0.1.0 → codejury-0.2.0}/tests/test_litellm_provider.py +0 -0
  103. {codejury-0.1.0 → codejury-0.2.0}/tests/test_openai_provider.py +0 -0
  104. {codejury-0.1.0 → codejury-0.2.0}/tests/test_orchestrator.py +0 -0
  105. {codejury-0.1.0 → codejury-0.2.0}/tests/test_pipeline_orchestrator.py +0 -0
  106. {codejury-0.1.0 → codejury-0.2.0}/tests/test_reflexion_orchestrator.py +0 -0
  107. {codejury-0.1.0 → codejury-0.2.0}/tests/test_repo_source.py +0 -0
  108. {codejury-0.1.0 → codejury-0.2.0}/tests/test_reporting.py +0 -0
  109. {codejury-0.1.0 → codejury-0.2.0}/tests/test_retry_provider.py +0 -0
  110. {codejury-0.1.0 → codejury-0.2.0}/tests/test_verifier.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: codejury
3
- Version: 0.1.0
3
+ Version: 0.2.0
4
4
  Summary: General-purpose Application Security AI audit framework -- five-layer architecture, capabilities as first-class data
5
5
  Author: 4234288
6
6
  License-Expression: MIT
@@ -84,6 +84,12 @@ pip install 'codejury[anthropic]' # add the provider you'll use (anthropic /
84
84
 
85
85
  ## Usage
86
86
 
87
+ A real audit calls a model, so set the provider's key first (see `.env.example`):
88
+
89
+ ```bash
90
+ export ANTHROPIC_API_KEY=sk-ant-... # or OPENAI_API_KEY for --provider openai
91
+ ```
92
+
87
93
  ```bash
88
94
  # Audit a unified diff against the capability library
89
95
  git diff | codejury audit --orchestrator debate --provider anthropic --format markdown -
@@ -91,15 +97,35 @@ git diff | codejury audit --orchestrator debate --provider anthropic --format ma
91
97
  # Run a named task preset (tasks/*.yaml)
92
98
  git diff | codejury run audit_diff_debate -
93
99
 
94
- # Score detection quality against the golden cases (needs a provider key)
100
+ # Score detection quality against the golden cases
95
101
  codejury eval --provider anthropic
96
102
 
103
+ # Through a LiteLLM proxy / gateway. The flags default to CODEJURY_API_BASE /
104
+ # CODEJURY_API_KEY / CODEJURY_MODEL, so with those in a sourced .env this is just:
105
+ # codejury audit --provider litellm -
106
+ git diff | codejury audit --provider litellm \
107
+ --api-base https://litellm.example.com --api-key "$LITELLM_KEY" --model your-alias -
108
+
97
109
  # No API key needed: prove the pipeline composes with mock layers
98
110
  codejury dry-run
99
111
  ```
100
112
 
101
- `audit` and `run` read a diff from a file argument or stdin (`-`). Real providers
102
- read their key from the environment (e.g. `ANTHROPIC_API_KEY`).
113
+ `audit` and `run` read a diff from a file argument or stdin (`-`). The provider
114
+ key is read from the environment: `ANTHROPIC_API_KEY` for `--provider anthropic`,
115
+ `OPENAI_API_KEY` for `--provider openai`. Without a key the model providers
116
+ raise an authentication error; `codejury dry-run` needs no key.
117
+
118
+ A task YAML can pin the provider, model, and base URL (the key stays in the
119
+ environment), so `codejury run` works through a proxy too:
120
+
121
+ ```yaml
122
+ # mytasks/proxy_scan.yaml -> codejury run proxy_scan --tasks mytasks
123
+ name: proxy_scan
124
+ orchestrator: debate
125
+ provider: litellm
126
+ model: your-alias
127
+ api_base: https://litellm.example.com # key from CODEJURY_API_KEY
128
+ ```
103
129
 
104
130
  ## Development
105
131
 
@@ -55,6 +55,12 @@ pip install 'codejury[anthropic]' # add the provider you'll use (anthropic /
55
55
 
56
56
  ## Usage
57
57
 
58
+ A real audit calls a model, so set the provider's key first (see `.env.example`):
59
+
60
+ ```bash
61
+ export ANTHROPIC_API_KEY=sk-ant-... # or OPENAI_API_KEY for --provider openai
62
+ ```
63
+
58
64
  ```bash
59
65
  # Audit a unified diff against the capability library
60
66
  git diff | codejury audit --orchestrator debate --provider anthropic --format markdown -
@@ -62,15 +68,35 @@ git diff | codejury audit --orchestrator debate --provider anthropic --format ma
62
68
  # Run a named task preset (tasks/*.yaml)
63
69
  git diff | codejury run audit_diff_debate -
64
70
 
65
- # Score detection quality against the golden cases (needs a provider key)
71
+ # Score detection quality against the golden cases
66
72
  codejury eval --provider anthropic
67
73
 
74
+ # Through a LiteLLM proxy / gateway. The flags default to CODEJURY_API_BASE /
75
+ # CODEJURY_API_KEY / CODEJURY_MODEL, so with those in a sourced .env this is just:
76
+ # codejury audit --provider litellm -
77
+ git diff | codejury audit --provider litellm \
78
+ --api-base https://litellm.example.com --api-key "$LITELLM_KEY" --model your-alias -
79
+
68
80
  # No API key needed: prove the pipeline composes with mock layers
69
81
  codejury dry-run
70
82
  ```
71
83
 
72
- `audit` and `run` read a diff from a file argument or stdin (`-`). Real providers
73
- read their key from the environment (e.g. `ANTHROPIC_API_KEY`).
84
+ `audit` and `run` read a diff from a file argument or stdin (`-`). The provider
85
+ key is read from the environment: `ANTHROPIC_API_KEY` for `--provider anthropic`,
86
+ `OPENAI_API_KEY` for `--provider openai`. Without a key the model providers
87
+ raise an authentication error; `codejury dry-run` needs no key.
88
+
89
+ A task YAML can pin the provider, model, and base URL (the key stays in the
90
+ environment), so `codejury run` works through a proxy too:
91
+
92
+ ```yaml
93
+ # mytasks/proxy_scan.yaml -> codejury run proxy_scan --tasks mytasks
94
+ name: proxy_scan
95
+ orchestrator: debate
96
+ provider: litellm
97
+ model: your-alias
98
+ api_base: https://litellm.example.com # key from CODEJURY_API_KEY
99
+ ```
74
100
 
75
101
  ## Development
76
102
 
@@ -29,15 +29,19 @@ from codejury.sources.base import Source
29
29
  STRATEGIES = ("single", "pipeline", "debate", "reflexion")
30
30
  PROVIDERS = ("anthropic", "openai", "litellm")
31
31
  DEFAULT_MODEL = os.environ.get("CODEJURY_MODEL", "claude-sonnet-4-6")
32
+ DEFAULT_API_BASE = os.environ.get("CODEJURY_API_BASE")
33
+ DEFAULT_API_KEY = os.environ.get("CODEJURY_API_KEY")
32
34
 
33
35
 
34
- def make_provider(name: str, *, retries: int = 0) -> Provider:
36
+ def make_provider(
37
+ name: str, *, api_key: str | None = None, api_base: str | None = None, retries: int = 0
38
+ ) -> Provider:
35
39
  if name == "openai":
36
- provider: Provider = OpenAIProvider()
40
+ provider: Provider = OpenAIProvider(api_key=api_key, base_url=api_base)
37
41
  elif name == "litellm":
38
- provider = LiteLLMProvider()
42
+ provider = LiteLLMProvider(api_key=api_key, api_base=api_base)
39
43
  else:
40
- provider = AnthropicProvider()
44
+ provider = AnthropicProvider(api_key=api_key, base_url=api_base)
41
45
  if retries > 0:
42
46
  provider = RetryProvider(provider, max_attempts=retries + 1)
43
47
  return provider
@@ -13,6 +13,8 @@ import sys
13
13
 
14
14
  from codejury.agents.mock import MockAgent
15
15
  from codejury.assembly import (
16
+ DEFAULT_API_BASE,
17
+ DEFAULT_API_KEY,
16
18
  DEFAULT_MODEL,
17
19
  PROVIDERS,
18
20
  STRATEGIES,
@@ -135,6 +137,8 @@ def main(argv: list[str] | None = None) -> int:
135
137
  audit_p.add_argument("--model", default=DEFAULT_MODEL)
136
138
  audit_p.add_argument("--max-tokens", type=int, default=2048)
137
139
  audit_p.add_argument("--retries", type=int, default=0, help="provider retry attempts on failure")
140
+ audit_p.add_argument("--api-base", default=DEFAULT_API_BASE, help="provider base URL (env: CODEJURY_API_BASE)")
141
+ audit_p.add_argument("--api-key", default=DEFAULT_API_KEY, help="provider API key (env: CODEJURY_API_KEY)")
138
142
 
139
143
  run_p = sub.add_parser("run", help="run a named task preset against a unified diff")
140
144
  run_p.add_argument("task", help="task name")
@@ -148,6 +152,8 @@ def main(argv: list[str] | None = None) -> int:
148
152
  eval_p.add_argument("--capabilities", default=CAPABILITIES_DIR, help="capability YAML directory")
149
153
  eval_p.add_argument("--provider", choices=PROVIDERS, default="anthropic")
150
154
  eval_p.add_argument("--model", default=DEFAULT_MODEL)
155
+ eval_p.add_argument("--api-base", default=DEFAULT_API_BASE, help="provider base URL (env: CODEJURY_API_BASE)")
156
+ eval_p.add_argument("--api-key", default=DEFAULT_API_KEY, help="provider API key (env: CODEJURY_API_KEY)")
151
157
 
152
158
  args = parser.parse_args(argv)
153
159
 
@@ -155,7 +161,9 @@ def main(argv: list[str] | None = None) -> int:
155
161
  results = audit(
156
162
  _read_diff(args.diff),
157
163
  load_capabilities(args.capabilities),
158
- provider=make_provider(args.provider, retries=args.retries),
164
+ provider=make_provider(
165
+ args.provider, api_key=args.api_key, api_base=args.api_base, retries=args.retries
166
+ ),
159
167
  model=args.model,
160
168
  max_tokens=args.max_tokens,
161
169
  strategy=args.orchestrator,
@@ -175,12 +183,18 @@ def main(argv: list[str] | None = None) -> int:
175
183
  return 0
176
184
 
177
185
  if args.command == "eval":
178
- metrics = evaluate(
179
- load_cases(args.golden),
180
- load_capabilities(args.capabilities),
181
- provider=make_provider(args.provider),
182
- model=args.model,
183
- )
186
+ try:
187
+ metrics = evaluate(
188
+ load_cases(args.golden),
189
+ load_capabilities(args.capabilities),
190
+ provider=make_provider(args.provider, api_key=args.api_key, api_base=args.api_base),
191
+ model=args.model,
192
+ )
193
+ except Exception as exc:
194
+ # e.g. a missing API key surfaces as a provider auth error -- report it
195
+ # as one line, not a traceback (audit gets this via the orchestrator).
196
+ print(f"eval failed: {exc}")
197
+ return 1
184
198
  print(_render_metrics(metrics))
185
199
  return 0
186
200
 
@@ -0,0 +1,6 @@
1
+ capability: authn
2
+ vulnerable: true
3
+ code: |
4
+ def user_id(token):
5
+ claims = jwt.decode(token, options={"verify_signature": False})
6
+ return claims["sub"]
@@ -0,0 +1,6 @@
1
+ capability: authn
2
+ vulnerable: false
3
+ code: |
4
+ def user_id(token):
5
+ claims = jwt.decode(token, KEY, algorithms=["RS256"], audience=AUD, issuer=ISS)
6
+ return claims["sub"]
@@ -0,0 +1,6 @@
1
+ capability: authn
2
+ vulnerable: false
3
+ code: |
4
+ def file_dedup_key(data: bytes) -> str:
5
+ # content hash for cache dedup -- NOT a password
6
+ return hashlib.sha256(data).hexdigest()
@@ -0,0 +1,5 @@
1
+ capability: authz
2
+ vulnerable: true
3
+ code: |
4
+ def get_invoice(request):
5
+ return Invoice.objects.get(id=request.GET["id"])
@@ -0,0 +1,5 @@
1
+ capability: authz
2
+ vulnerable: false
3
+ code: |
4
+ def get_invoice(request):
5
+ return Invoice.objects.get(id=request.GET["id"], owner=request.user)
@@ -0,0 +1,5 @@
1
+ capability: input_validation
2
+ vulnerable: true
3
+ code: |
4
+ def ping(host):
5
+ os.system("ping -c 1 " + host)
@@ -0,0 +1,5 @@
1
+ capability: input_validation
2
+ vulnerable: false
3
+ code: |
4
+ def ping(host):
5
+ subprocess.run(["ping", "-c", "1", host], shell=False)
@@ -0,0 +1,6 @@
1
+ capability: crypto
2
+ vulnerable: false
3
+ code: |
4
+ def encrypt(data, key):
5
+ nonce = os.urandom(12)
6
+ return nonce, AESGCM(key).encrypt(nonce, data, None)
@@ -0,0 +1,6 @@
1
+ capability: crypto
2
+ vulnerable: true
3
+ code: |
4
+ def encrypt(data, key):
5
+ cipher = AES.new(key, AES.MODE_ECB)
6
+ return cipher.encrypt(pad(data, 16))
@@ -0,0 +1,8 @@
1
+ capability: input_validation
2
+ vulnerable: false
3
+ code: |
4
+ def read_upload(filename):
5
+ target = (UPLOAD_DIR / filename).resolve()
6
+ if not target.is_relative_to(UPLOAD_DIR):
7
+ raise ValueError("path escapes upload dir")
8
+ return target.read_text()
@@ -0,0 +1,5 @@
1
+ capability: input_validation
2
+ vulnerable: true
3
+ code: |
4
+ def read_upload(filename):
5
+ return open(os.path.join(UPLOAD_DIR, filename)).read()
@@ -0,0 +1,5 @@
1
+ capability: secrets
2
+ vulnerable: false
3
+ code: |
4
+ STRIPE_KEY = os.environ["STRIPE_KEY"]
5
+ client = stripe.Client(STRIPE_KEY)
@@ -0,0 +1,5 @@
1
+ capability: secrets
2
+ vulnerable: true
3
+ code: |
4
+ API_KEY = "9c1185a5c5e9fc54612808977ee8f548b2258d31"
5
+ client = PaymentClient(api_key=API_KEY)
@@ -0,0 +1,5 @@
1
+ capability: input_validation
2
+ vulnerable: true
3
+ code: |
4
+ def find(name):
5
+ cursor.execute("SELECT * FROM users WHERE name = '{}'".format(name))
@@ -0,0 +1,7 @@
1
+ capability: output_encoding
2
+ vulnerable: false
3
+ code: |
4
+ function render() {
5
+ // static markup, no untrusted input
6
+ el.innerHTML = "<b>Welcome back</b>";
7
+ }
@@ -0,0 +1,6 @@
1
+ capability: output_encoding
2
+ vulnerable: true
3
+ code: |
4
+ function render(name) {
5
+ el.innerHTML = "Hello " + name;
6
+ }
@@ -17,8 +17,11 @@ from codejury.providers.base import CompletionResult, Message, Provider
17
17
 
18
18
 
19
19
  class AnthropicProvider(Provider):
20
- def __init__(self, *, api_key: str | None = None, client: Any | None = None) -> None:
20
+ def __init__(
21
+ self, *, api_key: str | None = None, base_url: str | None = None, client: Any | None = None
22
+ ) -> None:
21
23
  self._api_key = api_key
24
+ self._base_url = base_url
22
25
  self._client = client
23
26
 
24
27
  def _get_client(self) -> Any:
@@ -29,7 +32,12 @@ class AnthropicProvider(Provider):
29
32
  raise RuntimeError(
30
33
  "anthropic SDK not installed; run: pip install 'codejury[anthropic]'"
31
34
  ) from exc
32
- self._client = anthropic.Anthropic(api_key=self._api_key)
35
+ kwargs: dict[str, Any] = {}
36
+ if self._api_key:
37
+ kwargs["api_key"] = self._api_key
38
+ if self._base_url:
39
+ kwargs["base_url"] = self._base_url
40
+ self._client = anthropic.Anthropic(**kwargs)
33
41
  return self._client
34
42
 
35
43
  def complete(
@@ -9,7 +9,14 @@ from __future__ import annotations
9
9
  from dataclasses import dataclass
10
10
  from typing import Any
11
11
 
12
- from codejury.assembly import DEFAULT_MODEL, build_orchestration, make_provider, run_over_source
12
+ from codejury.assembly import (
13
+ DEFAULT_API_BASE,
14
+ DEFAULT_API_KEY,
15
+ DEFAULT_MODEL,
16
+ build_orchestration,
17
+ make_provider,
18
+ run_over_source,
19
+ )
13
20
  from codejury.domain.capability import Capability
14
21
  from codejury.domain.result import AnalysisResult
15
22
  from codejury.sources.base import Source
@@ -24,6 +31,7 @@ class Task:
24
31
  capabilities: tuple[str, ...] | None = None # capability ids to check; None = all
25
32
  max_tokens: int = 2048
26
33
  retries: int = 0 # provider retry attempts on transient failure
34
+ api_base: str | None = None # provider base URL (e.g. a LiteLLM proxy); the key stays in the env
27
35
 
28
36
  @classmethod
29
37
  def from_dict(cls, data: dict[str, Any]) -> Task:
@@ -36,6 +44,7 @@ class Task:
36
44
  capabilities=tuple(caps) if caps is not None else None,
37
45
  max_tokens=int(data.get("max_tokens", 2048)),
38
46
  retries=int(data.get("retries", 0)),
47
+ api_base=data.get("api_base"),
39
48
  )
40
49
 
41
50
  def select(self, capabilities: list[Capability]) -> list[Capability]:
@@ -48,7 +57,13 @@ class Task:
48
57
  def run_task(
49
58
  task: Task, source: Source, capabilities: list[Capability]
50
59
  ) -> list[tuple[str, AnalysisResult]]:
51
- provider = make_provider(task.provider, retries=task.retries)
60
+ # api_base may come from the task (non-secret URL); the key only from the env.
61
+ provider = make_provider(
62
+ task.provider,
63
+ api_key=DEFAULT_API_KEY,
64
+ api_base=task.api_base or DEFAULT_API_BASE,
65
+ retries=task.retries,
66
+ )
52
67
  agents, orchestrator = build_orchestration(
53
68
  task.orchestrator, provider=provider, model=task.model, max_tokens=task.max_tokens
54
69
  )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: codejury
3
- Version: 0.1.0
3
+ Version: 0.2.0
4
4
  Summary: General-purpose Application Security AI audit framework -- five-layer architecture, capabilities as first-class data
5
5
  Author: 4234288
6
6
  License-Expression: MIT
@@ -84,6 +84,12 @@ pip install 'codejury[anthropic]' # add the provider you'll use (anthropic /
84
84
 
85
85
  ## Usage
86
86
 
87
+ A real audit calls a model, so set the provider's key first (see `.env.example`):
88
+
89
+ ```bash
90
+ export ANTHROPIC_API_KEY=sk-ant-... # or OPENAI_API_KEY for --provider openai
91
+ ```
92
+
87
93
  ```bash
88
94
  # Audit a unified diff against the capability library
89
95
  git diff | codejury audit --orchestrator debate --provider anthropic --format markdown -
@@ -91,15 +97,35 @@ git diff | codejury audit --orchestrator debate --provider anthropic --format ma
91
97
  # Run a named task preset (tasks/*.yaml)
92
98
  git diff | codejury run audit_diff_debate -
93
99
 
94
- # Score detection quality against the golden cases (needs a provider key)
100
+ # Score detection quality against the golden cases
95
101
  codejury eval --provider anthropic
96
102
 
103
+ # Through a LiteLLM proxy / gateway. The flags default to CODEJURY_API_BASE /
104
+ # CODEJURY_API_KEY / CODEJURY_MODEL, so with those in a sourced .env this is just:
105
+ # codejury audit --provider litellm -
106
+ git diff | codejury audit --provider litellm \
107
+ --api-base https://litellm.example.com --api-key "$LITELLM_KEY" --model your-alias -
108
+
97
109
  # No API key needed: prove the pipeline composes with mock layers
98
110
  codejury dry-run
99
111
  ```
100
112
 
101
- `audit` and `run` read a diff from a file argument or stdin (`-`). Real providers
102
- read their key from the environment (e.g. `ANTHROPIC_API_KEY`).
113
+ `audit` and `run` read a diff from a file argument or stdin (`-`). The provider
114
+ key is read from the environment: `ANTHROPIC_API_KEY` for `--provider anthropic`,
115
+ `OPENAI_API_KEY` for `--provider openai`. Without a key the model providers
116
+ raise an authentication error; `codejury dry-run` needs no key.
117
+
118
+ A task YAML can pin the provider, model, and base URL (the key stays in the
119
+ environment), so `codejury run` works through a proxy too:
120
+
121
+ ```yaml
122
+ # mytasks/proxy_scan.yaml -> codejury run proxy_scan --tasks mytasks
123
+ name: proxy_scan
124
+ orchestrator: debate
125
+ provider: litellm
126
+ model: your-alias
127
+ api_base: https://litellm.example.com # key from CODEJURY_API_KEY
128
+ ```
103
129
 
104
130
  ## Development
105
131
 
@@ -31,9 +31,25 @@ codejury/data/capabilities/output_encoding.yaml
31
31
  codejury/data/capabilities/secrets.yaml
32
32
  codejury/data/capabilities/session.yaml
33
33
  codejury/data/golden/authn_bcrypt_password.yaml
34
+ codejury/data/golden/authn_jwt_noverify_vuln.yaml
35
+ codejury/data/golden/authn_jwt_verified_safe.yaml
36
+ codejury/data/golden/authn_sha256_checksum_safe.yaml
34
37
  codejury/data/golden/authn_sha256_password.yaml
38
+ codejury/data/golden/authz_idor_vuln.yaml
39
+ codejury/data/golden/authz_owner_safe.yaml
40
+ codejury/data/golden/cmdi_ossystem_vuln.yaml
41
+ codejury/data/golden/cmdi_subprocess_safe.yaml
42
+ codejury/data/golden/crypto_aesgcm_safe.yaml
43
+ codejury/data/golden/crypto_ecb_vuln.yaml
44
+ codejury/data/golden/path_contained_safe.yaml
45
+ codejury/data/golden/path_traversal_vuln.yaml
46
+ codejury/data/golden/secrets_env_safe.yaml
47
+ codejury/data/golden/secrets_hardcoded_vuln.yaml
48
+ codejury/data/golden/sqli_format_vuln.yaml
35
49
  codejury/data/golden/sqli_fstring_query.yaml
36
50
  codejury/data/golden/sqli_parameterized_query.yaml
51
+ codejury/data/golden/xss_innerhtml_constant_safe.yaml
52
+ codejury/data/golden/xss_innerhtml_vuln.yaml
37
53
  codejury/data/tasks/audit_diff_debate.yaml
38
54
  codejury/data/tasks/quick_scan_single.yaml
39
55
  codejury/domain/__init__.py
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "codejury"
3
- version = "0.1.0"
3
+ version = "0.2.0"
4
4
  description = "General-purpose Application Security AI audit framework -- five-layer architecture, capabilities as first-class data"
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.12"
@@ -1,11 +1,15 @@
1
+ from types import SimpleNamespace
2
+
1
3
  import pytest
2
4
 
3
- from codejury.assembly import build_orchestration, run_over_source
5
+ from codejury.assembly import build_orchestration, make_provider, run_over_source
4
6
  from codejury.domain.capability import Capability
5
7
  from codejury.orchestrators.debate import DebateOrchestrator
6
8
  from codejury.orchestrators.pipeline import PipelineOrchestrator
7
9
  from codejury.orchestrators.reflexion import ReflexionOrchestrator
8
10
  from codejury.orchestrators.single import SingleOrchestrator
11
+ from codejury.providers.base import Message
12
+ from codejury.providers.litellm import LiteLLMProvider
9
13
  from codejury.providers.mock import MockProvider
10
14
  from codejury.sources.mock import MockSource
11
15
 
@@ -25,6 +29,19 @@ def test_build_orchestration_maps_strategy(strategy, orch_cls, roles):
25
29
  assert set(agents) == roles
26
30
 
27
31
 
32
+ def test_make_provider_forwards_api_base_and_key():
33
+ provider = make_provider("litellm", api_base="https://proxy.example", api_key="sk-test")
34
+ assert isinstance(provider, LiteLLMProvider)
35
+
36
+ captured = {}
37
+ provider._completion = lambda **kw: captured.update(kw) or SimpleNamespace(
38
+ choices=[SimpleNamespace(message=SimpleNamespace(content="ok"))]
39
+ )
40
+ provider.complete(system="s", messages=[Message(role="user", content="x")], model="m", max_tokens=8)
41
+ assert captured["api_base"] == "https://proxy.example"
42
+ assert captured["api_key"] == "sk-test"
43
+
44
+
28
45
  def test_run_over_source_runs_each_artifact():
29
46
  provider = MockProvider(default='{"verdicts": [{"sub_capability": "x", "status": "SECURE"}]}')
30
47
  agents, orchestrator = build_orchestration("single", provider=provider, model="m", max_tokens=8)
@@ -1,7 +1,9 @@
1
1
  import json
2
2
 
3
- from codejury.domain.capability import load_capability
3
+ from codejury import cli
4
+ from codejury.domain.capability import load_capabilities
4
5
  from codejury.evaluation import Metrics, evaluate, load_cases
6
+ from codejury.providers.base import Provider
5
7
  from codejury.providers.mock import MockProvider
6
8
 
7
9
  from codejury.resources import CAPABILITIES_DIR, GOLDEN_DIR
@@ -36,20 +38,32 @@ def test_golden_cases_load():
36
38
  assert vuln.capability == "authn" and vuln.vulnerable is True
37
39
 
38
40
 
39
- def _caps():
40
- return [load_capability(CAPABILITIES_DIR / "authentication.yaml"),
41
- load_capability(CAPABILITIES_DIR / "input_validation.yaml")]
42
-
43
-
44
41
  def test_evaluate_always_vulnerable_provider():
45
- # 2 vulnerable + 2 safe golden cases; a provider that always flags VULNERABLE
46
- # -> every positive is right (recall 1.0) but the safe ones are false positives.
47
- m = evaluate(load_cases(GOLDEN_DIR), _caps(), provider=MockProvider(default=_VULN), model="m")
48
- assert m.tp == 2 and m.fp == 2 and m.fn == 0 and m.tn == 0
49
- assert m.recall == 1.0 and m.precision == 0.5
42
+ # A provider that always flags VULNERABLE: every vulnerable case is a true
43
+ # positive (recall 1.0), every safe case a false positive.
44
+ cases = load_cases(GOLDEN_DIR)
45
+ n_vuln = sum(c.vulnerable for c in cases)
46
+ n_safe = len(cases) - n_vuln
47
+ m = evaluate(cases, load_capabilities(CAPABILITIES_DIR), provider=MockProvider(default=_VULN), model="m")
48
+ assert m.tp == n_vuln and m.fp == n_safe and m.fn == 0 and m.tn == 0
49
+ assert m.recall == 1.0
50
50
 
51
51
 
52
52
  def test_evaluate_always_secure_provider():
53
- m = evaluate(load_cases(GOLDEN_DIR), _caps(), provider=MockProvider(default=_SECURE), model="m")
54
- assert m.tp == 0 and m.fn == 2 and m.tn == 2 and m.fp == 0
53
+ cases = load_cases(GOLDEN_DIR)
54
+ n_vuln = sum(c.vulnerable for c in cases)
55
+ n_safe = len(cases) - n_vuln
56
+ m = evaluate(cases, load_capabilities(CAPABILITIES_DIR), provider=MockProvider(default=_SECURE), model="m")
57
+ assert m.tp == 0 and m.fp == 0 and m.fn == n_vuln and m.tn == n_safe
55
58
  assert m.recall == 0.0
59
+
60
+
61
+ def test_eval_cli_reports_provider_error_without_traceback(monkeypatch, capsys):
62
+ class _Boom(Provider):
63
+ def complete(self, **kwargs):
64
+ raise RuntimeError("Could not resolve authentication method")
65
+
66
+ monkeypatch.setattr("codejury.cli.make_provider", lambda name: _Boom())
67
+ rc = cli.main(["eval"])
68
+ assert rc == 1
69
+ assert "eval failed" in capsys.readouterr().out
@@ -16,6 +16,31 @@ def test_from_dict_parses_and_defaults():
16
16
  assert task.capabilities == ("authn", "crypto")
17
17
  assert task.provider == "anthropic" # default
18
18
  assert task.max_tokens == 2048 # default
19
+ assert task.api_base is None # default
20
+
21
+
22
+ def test_from_dict_reads_api_base():
23
+ task = Task.from_dict({"name": "t", "provider": "litellm", "api_base": "https://proxy.example"})
24
+ assert task.api_base == "https://proxy.example"
25
+
26
+
27
+ def test_run_task_forwards_proxy_config_with_key_from_env(monkeypatch):
28
+ captured = {}
29
+
30
+ def fake_make_provider(name, **kwargs):
31
+ captured["name"] = name
32
+ captured.update(kwargs)
33
+ return MockProvider(default='{"verdicts": []}')
34
+
35
+ monkeypatch.setattr("codejury.tasks.base.make_provider", fake_make_provider)
36
+ monkeypatch.setattr("codejury.tasks.base.DEFAULT_API_KEY", "sk-from-env")
37
+
38
+ task = Task(name="t", provider="litellm", api_base="https://proxy.example")
39
+ run_task(task, MockSource(), [Capability(id="authn", name="A")])
40
+
41
+ assert captured["name"] == "litellm"
42
+ assert captured["api_base"] == "https://proxy.example" # from the task
43
+ assert captured["api_key"] == "sk-from-env" # from the environment, not the task
19
44
 
20
45
 
21
46
  def test_select_filters_by_id_and_none_means_all():
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes