codejury 0.5.0__tar.gz → 0.6.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {codejury-0.5.0 → codejury-0.6.0}/PKG-INFO +29 -8
- {codejury-0.5.0 → codejury-0.6.0}/README.md +27 -7
- {codejury-0.5.0 → codejury-0.6.0}/codejury/__init__.py +6 -1
- {codejury-0.5.0 → codejury-0.6.0}/codejury/agents/verifier.py +14 -1
- {codejury-0.5.0 → codejury-0.6.0}/codejury/assembly.py +31 -3
- {codejury-0.5.0 → codejury-0.6.0}/codejury/cli.py +48 -17
- {codejury-0.5.0 → codejury-0.6.0}/codejury/data/capabilities/dependency_config.yaml +27 -0
- {codejury-0.5.0 → codejury-0.6.0}/codejury/data/capabilities/input_validation.yaml +60 -0
- codejury-0.6.0/codejury/data/golden/authn_weak_hash_indirect_vuln.yaml +14 -0
- codejury-0.6.0/codejury/data/golden/business_logic_price_tamper_vuln.yaml +14 -0
- codejury-0.6.0/codejury/data/golden/business_logic_server_checked_safe.yaml +15 -0
- codejury-0.6.0/codejury/data/golden/cmdi_fixed_argv_safe.yaml +22 -0
- codejury-0.6.0/codejury/data/golden/data_protection_plaintext_pii_vuln.yaml +14 -0
- codejury-0.6.0/codejury/data/golden/data_protection_tokenized_safe.yaml +16 -0
- codejury-0.6.0/codejury/data/golden/dependency_config_tls_verify_off_vuln.yaml +11 -0
- codejury-0.6.0/codejury/data/golden/dependency_config_tls_verify_on_safe.yaml +11 -0
- codejury-0.6.0/codejury/data/golden/deserialize_json_safe.yaml +13 -0
- codejury-0.6.0/codejury/data/golden/deserialize_pickle_vuln.yaml +12 -0
- codejury-0.6.0/codejury/data/golden/error_logging_redacted_safe.yaml +11 -0
- codejury-0.6.0/codejury/data/golden/error_logging_secret_leak_vuln.yaml +12 -0
- codejury-0.6.0/codejury/data/golden/literal_eval_safe.yaml +14 -0
- codejury-0.6.0/codejury/data/golden/path_basename_safe.yaml +15 -0
- codejury-0.6.0/codejury/data/golden/session_fixation_vuln.yaml +16 -0
- codejury-0.6.0/codejury/data/golden/session_secure_cookie_safe.yaml +18 -0
- codejury-0.6.0/codejury/data/golden/sql_constant_concat_safe.yaml +14 -0
- codejury-0.6.0/codejury/data/golden/sqli_indirect_var_vuln.yaml +16 -0
- codejury-0.6.0/codejury/data/golden/ssrf_allowlist_safe.yaml +17 -0
- codejury-0.6.0/codejury/data/golden/ssrf_constant_url_safe.yaml +13 -0
- codejury-0.6.0/codejury/data/golden/ssrf_substring_allowlist_bypass_vuln.yaml +17 -0
- codejury-0.6.0/codejury/data/golden/ssrf_user_url_vuln.yaml +12 -0
- codejury-0.6.0/codejury/data/golden/xfile_idor_no_check_vuln.yaml +20 -0
- codejury-0.6.0/codejury/data/golden/xfile_idor_owner_checked_safe.yaml +21 -0
- codejury-0.6.0/codejury/data/golden/xfile_path_sanitized_safe.yaml +22 -0
- codejury-0.6.0/codejury/data/golden/xfile_path_tainted_vuln.yaml +19 -0
- codejury-0.6.0/codejury/data/golden/xss_textcontent_safe.yaml +15 -0
- {codejury-0.5.0 → codejury-0.6.0}/codejury/domain/capability.py +16 -1
- {codejury-0.5.0 → codejury-0.6.0}/codejury/domain/observation.py +20 -0
- {codejury-0.5.0 → codejury-0.6.0}/codejury/domain/result.py +15 -1
- {codejury-0.5.0 → codejury-0.6.0}/codejury/evaluation.py +62 -10
- codejury-0.6.0/codejury/infrastructure/cache.py +76 -0
- {codejury-0.5.0 → codejury-0.6.0}/codejury/providers/anthropic.py +1 -0
- {codejury-0.5.0 → codejury-0.6.0}/codejury/providers/litellm.py +1 -1
- {codejury-0.5.0 → codejury-0.6.0}/codejury/providers/openai.py +1 -0
- {codejury-0.5.0 → codejury-0.6.0}/codejury/reporting.py +106 -0
- codejury-0.6.0/codejury/sources/callers.py +104 -0
- {codejury-0.5.0 → codejury-0.6.0}/codejury/sources/repo.py +16 -2
- {codejury-0.5.0 → codejury-0.6.0}/codejury.egg-info/PKG-INFO +29 -8
- {codejury-0.5.0 → codejury-0.6.0}/codejury.egg-info/SOURCES.txt +30 -0
- {codejury-0.5.0 → codejury-0.6.0}/codejury.egg-info/requires.txt +1 -0
- {codejury-0.5.0 → codejury-0.6.0}/pyproject.toml +2 -2
- {codejury-0.5.0 → codejury-0.6.0}/tests/test_anthropic_provider.py +1 -0
- codejury-0.6.0/tests/test_cache.py +117 -0
- {codejury-0.5.0 → codejury-0.6.0}/tests/test_callers.py +27 -1
- codejury-0.6.0/tests/test_evaluation.py +152 -0
- {codejury-0.5.0 → codejury-0.6.0}/tests/test_litellm_provider.py +1 -0
- {codejury-0.5.0 → codejury-0.6.0}/tests/test_openai_provider.py +1 -0
- codejury-0.6.0/tests/test_sarif.py +115 -0
- codejury-0.5.0/codejury/sources/callers.py +0 -46
- codejury-0.5.0/tests/test_evaluation.py +0 -69
- {codejury-0.5.0 → codejury-0.6.0}/LICENSE +0 -0
- {codejury-0.5.0 → codejury-0.6.0}/codejury/agents/__init__.py +0 -0
- {codejury-0.5.0 → codejury-0.6.0}/codejury/agents/base.py +0 -0
- {codejury-0.5.0 → codejury-0.6.0}/codejury/agents/debate.py +0 -0
- {codejury-0.5.0 → codejury-0.6.0}/codejury/agents/mock.py +0 -0
- {codejury-0.5.0 → codejury-0.6.0}/codejury/agents/parsing.py +0 -0
- {codejury-0.5.0 → codejury-0.6.0}/codejury/agents/refuter.py +0 -0
- {codejury-0.5.0 → codejury-0.6.0}/codejury/data/capabilities/authentication.yaml +0 -0
- {codejury-0.5.0 → codejury-0.6.0}/codejury/data/capabilities/authorization.yaml +0 -0
- {codejury-0.5.0 → codejury-0.6.0}/codejury/data/capabilities/business_logic.yaml +0 -0
- {codejury-0.5.0 → codejury-0.6.0}/codejury/data/capabilities/crypto.yaml +0 -0
- {codejury-0.5.0 → codejury-0.6.0}/codejury/data/capabilities/data_protection.yaml +0 -0
- {codejury-0.5.0 → codejury-0.6.0}/codejury/data/capabilities/error_logging.yaml +0 -0
- {codejury-0.5.0 → codejury-0.6.0}/codejury/data/capabilities/output_encoding.yaml +0 -0
- {codejury-0.5.0 → codejury-0.6.0}/codejury/data/capabilities/secrets.yaml +0 -0
- {codejury-0.5.0 → codejury-0.6.0}/codejury/data/capabilities/session.yaml +0 -0
- {codejury-0.5.0 → codejury-0.6.0}/codejury/data/golden/authn_bcrypt_password.yaml +0 -0
- {codejury-0.5.0 → codejury-0.6.0}/codejury/data/golden/authn_jwt_noverify_vuln.yaml +0 -0
- {codejury-0.5.0 → codejury-0.6.0}/codejury/data/golden/authn_jwt_verified_safe.yaml +0 -0
- {codejury-0.5.0 → codejury-0.6.0}/codejury/data/golden/authn_sha256_checksum_safe.yaml +0 -0
- {codejury-0.5.0 → codejury-0.6.0}/codejury/data/golden/authn_sha256_password.yaml +0 -0
- {codejury-0.5.0 → codejury-0.6.0}/codejury/data/golden/authz_idor_vuln.yaml +0 -0
- {codejury-0.5.0 → codejury-0.6.0}/codejury/data/golden/authz_owner_safe.yaml +0 -0
- {codejury-0.5.0 → codejury-0.6.0}/codejury/data/golden/cmdi_ossystem_vuln.yaml +0 -0
- {codejury-0.5.0 → codejury-0.6.0}/codejury/data/golden/cmdi_subprocess_safe.yaml +0 -0
- {codejury-0.5.0 → codejury-0.6.0}/codejury/data/golden/crypto_aesgcm_safe.yaml +0 -0
- {codejury-0.5.0 → codejury-0.6.0}/codejury/data/golden/crypto_ecb_vuln.yaml +0 -0
- {codejury-0.5.0 → codejury-0.6.0}/codejury/data/golden/path_contained_safe.yaml +0 -0
- {codejury-0.5.0 → codejury-0.6.0}/codejury/data/golden/path_traversal_vuln.yaml +0 -0
- {codejury-0.5.0 → codejury-0.6.0}/codejury/data/golden/secrets_env_safe.yaml +0 -0
- {codejury-0.5.0 → codejury-0.6.0}/codejury/data/golden/secrets_hardcoded_vuln.yaml +0 -0
- {codejury-0.5.0 → codejury-0.6.0}/codejury/data/golden/sqli_format_vuln.yaml +0 -0
- {codejury-0.5.0 → codejury-0.6.0}/codejury/data/golden/sqli_fstring_query.yaml +0 -0
- {codejury-0.5.0 → codejury-0.6.0}/codejury/data/golden/sqli_parameterized_query.yaml +0 -0
- {codejury-0.5.0 → codejury-0.6.0}/codejury/data/golden/xss_innerhtml_constant_safe.yaml +0 -0
- {codejury-0.5.0 → codejury-0.6.0}/codejury/data/golden/xss_innerhtml_vuln.yaml +0 -0
- {codejury-0.5.0 → codejury-0.6.0}/codejury/data/suppressions.yaml +0 -0
- {codejury-0.5.0 → codejury-0.6.0}/codejury/data/tasks/audit_diff_debate.yaml +0 -0
- {codejury-0.5.0 → codejury-0.6.0}/codejury/data/tasks/quick_scan_single.yaml +0 -0
- {codejury-0.5.0 → codejury-0.6.0}/codejury/domain/__init__.py +0 -0
- {codejury-0.5.0 → codejury-0.6.0}/codejury/domain/artifact.py +0 -0
- {codejury-0.5.0 → codejury-0.6.0}/codejury/domain/context.py +0 -0
- {codejury-0.5.0 → codejury-0.6.0}/codejury/infrastructure/__init__.py +0 -0
- {codejury-0.5.0 → codejury-0.6.0}/codejury/infrastructure/json_parse.py +0 -0
- {codejury-0.5.0 → codejury-0.6.0}/codejury/integrations/__init__.py +0 -0
- {codejury-0.5.0 → codejury-0.6.0}/codejury/integrations/github.py +0 -0
- {codejury-0.5.0 → codejury-0.6.0}/codejury/orchestrators/__init__.py +0 -0
- {codejury-0.5.0 → codejury-0.6.0}/codejury/orchestrators/base.py +0 -0
- {codejury-0.5.0 → codejury-0.6.0}/codejury/orchestrators/challenge.py +0 -0
- {codejury-0.5.0 → codejury-0.6.0}/codejury/orchestrators/debate.py +0 -0
- {codejury-0.5.0 → codejury-0.6.0}/codejury/orchestrators/pipeline.py +0 -0
- {codejury-0.5.0 → codejury-0.6.0}/codejury/orchestrators/reflexion.py +0 -0
- {codejury-0.5.0 → codejury-0.6.0}/codejury/orchestrators/single.py +0 -0
- {codejury-0.5.0 → codejury-0.6.0}/codejury/providers/__init__.py +0 -0
- {codejury-0.5.0 → codejury-0.6.0}/codejury/providers/base.py +0 -0
- {codejury-0.5.0 → codejury-0.6.0}/codejury/providers/mock.py +0 -0
- {codejury-0.5.0 → codejury-0.6.0}/codejury/providers/openai_format.py +0 -0
- {codejury-0.5.0 → codejury-0.6.0}/codejury/providers/retry.py +0 -0
- {codejury-0.5.0 → codejury-0.6.0}/codejury/resources.py +0 -0
- {codejury-0.5.0 → codejury-0.6.0}/codejury/sources/__init__.py +0 -0
- {codejury-0.5.0 → codejury-0.6.0}/codejury/sources/base.py +0 -0
- {codejury-0.5.0 → codejury-0.6.0}/codejury/sources/chunker.py +0 -0
- {codejury-0.5.0 → codejury-0.6.0}/codejury/sources/diff.py +0 -0
- {codejury-0.5.0 → codejury-0.6.0}/codejury/sources/function.py +0 -0
- {codejury-0.5.0 → codejury-0.6.0}/codejury/sources/mock.py +0 -0
- {codejury-0.5.0 → codejury-0.6.0}/codejury/suppression.py +0 -0
- {codejury-0.5.0 → codejury-0.6.0}/codejury/tasks/__init__.py +0 -0
- {codejury-0.5.0 → codejury-0.6.0}/codejury/tasks/base.py +0 -0
- {codejury-0.5.0 → codejury-0.6.0}/codejury/tasks/registry.py +0 -0
- {codejury-0.5.0 → codejury-0.6.0}/codejury.egg-info/dependency_links.txt +0 -0
- {codejury-0.5.0 → codejury-0.6.0}/codejury.egg-info/entry_points.txt +0 -0
- {codejury-0.5.0 → codejury-0.6.0}/codejury.egg-info/top_level.txt +0 -0
- {codejury-0.5.0 → codejury-0.6.0}/setup.cfg +0 -0
- {codejury-0.5.0 → codejury-0.6.0}/tests/test_assembly.py +0 -0
- {codejury-0.5.0 → codejury-0.6.0}/tests/test_audit_pipeline.py +0 -0
- {codejury-0.5.0 → codejury-0.6.0}/tests/test_capability.py +0 -0
- {codejury-0.5.0 → codejury-0.6.0}/tests/test_challenge.py +0 -0
- {codejury-0.5.0 → codejury-0.6.0}/tests/test_cli_audit.py +0 -0
- {codejury-0.5.0 → codejury-0.6.0}/tests/test_context.py +0 -0
- {codejury-0.5.0 → codejury-0.6.0}/tests/test_debate_agents.py +0 -0
- {codejury-0.5.0 → codejury-0.6.0}/tests/test_debate_orchestrator.py +0 -0
- {codejury-0.5.0 → codejury-0.6.0}/tests/test_diff_source.py +0 -0
- {codejury-0.5.0 → codejury-0.6.0}/tests/test_function_source.py +0 -0
- {codejury-0.5.0 → codejury-0.6.0}/tests/test_integrations.py +0 -0
- {codejury-0.5.0 → codejury-0.6.0}/tests/test_json_parse.py +0 -0
- {codejury-0.5.0 → codejury-0.6.0}/tests/test_orchestrator.py +0 -0
- {codejury-0.5.0 → codejury-0.6.0}/tests/test_pipeline_orchestrator.py +0 -0
- {codejury-0.5.0 → codejury-0.6.0}/tests/test_reflexion_orchestrator.py +0 -0
- {codejury-0.5.0 → codejury-0.6.0}/tests/test_repo_source.py +0 -0
- {codejury-0.5.0 → codejury-0.6.0}/tests/test_reporting.py +0 -0
- {codejury-0.5.0 → codejury-0.6.0}/tests/test_retry_provider.py +0 -0
- {codejury-0.5.0 → codejury-0.6.0}/tests/test_suppression.py +0 -0
- {codejury-0.5.0 → codejury-0.6.0}/tests/test_tasks.py +0 -0
- {codejury-0.5.0 → codejury-0.6.0}/tests/test_verifier.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: codejury
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.6.0
|
|
4
4
|
Summary: General-purpose Application Security AI audit framework -- five-layer architecture, capabilities as first-class data
|
|
5
5
|
Author: AISecLabs
|
|
6
6
|
License-Expression: MIT
|
|
@@ -25,6 +25,7 @@ Provides-Extra: litellm
|
|
|
25
25
|
Requires-Dist: litellm>=1.0; extra == "litellm"
|
|
26
26
|
Provides-Extra: dev
|
|
27
27
|
Requires-Dist: pytest>=8.0; extra == "dev"
|
|
28
|
+
Requires-Dist: jsonschema>=4.0; extra == "dev"
|
|
28
29
|
Dynamic: license-file
|
|
29
30
|
|
|
30
31
|
# codejury
|
|
@@ -80,15 +81,32 @@ git diff | codejury audit --provider anthropic
|
|
|
80
81
|
| `codejury audit [diff]` | Audit a unified diff from a file or stdin (`-`). |
|
|
81
82
|
| `codejury scan <dir>` | Audit a whole directory tree, capability by capability. |
|
|
82
83
|
| `codejury run <task>` | Run a named task preset (see [Tasks](#tasks)). |
|
|
83
|
-
| `codejury eval` | Score the golden cases
|
|
84
|
+
| `codejury eval` | Score the golden cases; report precision / recall / F1, overall and per capability. |
|
|
84
85
|
|
|
85
86
|
Shared flags: `--orchestrator {single,pipeline,debate,reflexion,challenge}`,
|
|
86
|
-
`--provider {anthropic,openai,litellm}`, `--model`,
|
|
87
|
+
`--provider {anthropic,openai,litellm}`, `--model`,
|
|
88
|
+
`--format {text,markdown,json,sarif}`.
|
|
89
|
+
|
|
90
|
+
`--format sarif` emits a SARIF 2.1.0 log (validates against the official schema)
|
|
91
|
+
for CI and security dashboards: each problem with a code location becomes a
|
|
92
|
+
result carrying its capability (as the rule id), CWE, and a precise location.
|
|
87
93
|
|
|
88
94
|
Findings in known-noise categories (availability/DoS, rate limiting, memory safety
|
|
89
95
|
outside C/C++) are dropped by versioned rules in
|
|
90
96
|
`codejury/data/suppressions.yaml`; disable with `--no-suppress`.
|
|
91
97
|
|
|
98
|
+
`codejury eval` takes `--dataset <dir>` (golden YAML directory), `--split <name>`
|
|
99
|
+
(score only cases tagged with that `split:`, e.g. a held-out set), and
|
|
100
|
+
`--format {text,json}` -- the JSON report is a stable schema (overall plus
|
|
101
|
+
per-capability confusion matrix and precision / recall / F1).
|
|
102
|
+
|
|
103
|
+
Runs are deterministic: providers query at temperature 0, and `audit` / `scan`
|
|
104
|
+
cache each verdict on a hash of the normalized code, the in-scope capability
|
|
105
|
+
versions, and the orchestration. Re-auditing unchanged code returns the recorded
|
|
106
|
+
verdicts without re-querying the model; editing a capability YAML changes its
|
|
107
|
+
fingerprint and invalidates affected entries. Pass `--no-cache` to always
|
|
108
|
+
re-query.
|
|
109
|
+
|
|
92
110
|
```bash
|
|
93
111
|
# Multi-round adversarial debate, rendered as Markdown
|
|
94
112
|
git diff | codejury audit --orchestrator debate --format markdown - > report.md
|
|
@@ -167,11 +185,14 @@ independently.
|
|
|
167
185
|
- **Local-pattern checks are sharper than data-flow ones.** Capabilities judged
|
|
168
186
|
from one spot (weak crypto, hardcoded secrets) are reliable; taint / data-flow
|
|
169
187
|
ones like path traversal over-flag in single-file review because the verifier
|
|
170
|
-
can't see whether a value is attacker-controlled. Mitigations that
|
|
171
|
-
not fully solve it: `scan --callers` (
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
188
|
+
can't see whether a value is attacker-controlled. Mitigations that add context
|
|
189
|
+
but do not fully solve it: `scan --callers` (where this file's functions are
|
|
190
|
+
called) and `scan --callees` (the called code it delegates to, so a sink in
|
|
191
|
+
another file is visible) -- pair them for both directions; `--orchestrator
|
|
192
|
+
challenge` (a recall-safe
|
|
193
|
+
refutation pass that drops only provably-safe flags); `--only` to scope; or
|
|
194
|
+
`--orchestrator debate`. Real taint precision still needs data-flow analysis,
|
|
195
|
+
not model skepticism.
|
|
175
196
|
- **`scan` cost scales as files x capabilities.** It is a periodic deep audit,
|
|
176
197
|
not a quick check -- scope it with `--only`. Day to day, audit the diff.
|
|
177
198
|
|
|
@@ -51,15 +51,32 @@ git diff | codejury audit --provider anthropic
|
|
|
51
51
|
| `codejury audit [diff]` | Audit a unified diff from a file or stdin (`-`). |
|
|
52
52
|
| `codejury scan <dir>` | Audit a whole directory tree, capability by capability. |
|
|
53
53
|
| `codejury run <task>` | Run a named task preset (see [Tasks](#tasks)). |
|
|
54
|
-
| `codejury eval` | Score the golden cases
|
|
54
|
+
| `codejury eval` | Score the golden cases; report precision / recall / F1, overall and per capability. |
|
|
55
55
|
|
|
56
56
|
Shared flags: `--orchestrator {single,pipeline,debate,reflexion,challenge}`,
|
|
57
|
-
`--provider {anthropic,openai,litellm}`, `--model`,
|
|
57
|
+
`--provider {anthropic,openai,litellm}`, `--model`,
|
|
58
|
+
`--format {text,markdown,json,sarif}`.
|
|
59
|
+
|
|
60
|
+
`--format sarif` emits a SARIF 2.1.0 log (validates against the official schema)
|
|
61
|
+
for CI and security dashboards: each problem with a code location becomes a
|
|
62
|
+
result carrying its capability (as the rule id), CWE, and a precise location.
|
|
58
63
|
|
|
59
64
|
Findings in known-noise categories (availability/DoS, rate limiting, memory safety
|
|
60
65
|
outside C/C++) are dropped by versioned rules in
|
|
61
66
|
`codejury/data/suppressions.yaml`; disable with `--no-suppress`.
|
|
62
67
|
|
|
68
|
+
`codejury eval` takes `--dataset <dir>` (golden YAML directory), `--split <name>`
|
|
69
|
+
(score only cases tagged with that `split:`, e.g. a held-out set), and
|
|
70
|
+
`--format {text,json}` -- the JSON report is a stable schema (overall plus
|
|
71
|
+
per-capability confusion matrix and precision / recall / F1).
|
|
72
|
+
|
|
73
|
+
Runs are deterministic: providers query at temperature 0, and `audit` / `scan`
|
|
74
|
+
cache each verdict on a hash of the normalized code, the in-scope capability
|
|
75
|
+
versions, and the orchestration. Re-auditing unchanged code returns the recorded
|
|
76
|
+
verdicts without re-querying the model; editing a capability YAML changes its
|
|
77
|
+
fingerprint and invalidates affected entries. Pass `--no-cache` to always
|
|
78
|
+
re-query.
|
|
79
|
+
|
|
63
80
|
```bash
|
|
64
81
|
# Multi-round adversarial debate, rendered as Markdown
|
|
65
82
|
git diff | codejury audit --orchestrator debate --format markdown - > report.md
|
|
@@ -138,11 +155,14 @@ independently.
|
|
|
138
155
|
- **Local-pattern checks are sharper than data-flow ones.** Capabilities judged
|
|
139
156
|
from one spot (weak crypto, hardcoded secrets) are reliable; taint / data-flow
|
|
140
157
|
ones like path traversal over-flag in single-file review because the verifier
|
|
141
|
-
can't see whether a value is attacker-controlled. Mitigations that
|
|
142
|
-
not fully solve it: `scan --callers` (
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
158
|
+
can't see whether a value is attacker-controlled. Mitigations that add context
|
|
159
|
+
but do not fully solve it: `scan --callers` (where this file's functions are
|
|
160
|
+
called) and `scan --callees` (the called code it delegates to, so a sink in
|
|
161
|
+
another file is visible) -- pair them for both directions; `--orchestrator
|
|
162
|
+
challenge` (a recall-safe
|
|
163
|
+
refutation pass that drops only provably-safe flags); `--only` to scope; or
|
|
164
|
+
`--orchestrator debate`. Real taint precision still needs data-flow analysis,
|
|
165
|
+
not model skepticism.
|
|
146
166
|
- **`scan` cost scales as files x capabilities.** It is a periodic deep audit,
|
|
147
167
|
not a quick check -- scope it with `--only`. Day to day, audit the diff.
|
|
148
168
|
|
|
@@ -5,4 +5,9 @@ Domain knowledge lives in YAML capability files as a first-class citizen,
|
|
|
5
5
|
aligned with OWASP ASVS.
|
|
6
6
|
"""
|
|
7
7
|
|
|
8
|
-
|
|
8
|
+
from importlib.metadata import PackageNotFoundError, version
|
|
9
|
+
|
|
10
|
+
try:
|
|
11
|
+
__version__ = version("codejury")
|
|
12
|
+
except PackageNotFoundError: # running from a source tree without an install
|
|
13
|
+
__version__ = "0.0.0"
|
|
@@ -93,15 +93,27 @@ def _build_prompt(path: str, content: str, cap: Capability, context: str = "") -
|
|
|
93
93
|
)
|
|
94
94
|
|
|
95
95
|
|
|
96
|
+
def _anti_pattern_cwes(cap: Capability) -> dict[str, str]:
|
|
97
|
+
"""Map anti_pattern id -> CWE, so a verdict can inherit the CWE it matched."""
|
|
98
|
+
return {
|
|
99
|
+
p.id: p.cwe
|
|
100
|
+
for sub in cap.sub_capabilities.values()
|
|
101
|
+
for p in sub.anti_patterns
|
|
102
|
+
if p.cwe
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
|
|
96
106
|
def _parse_verdicts(text: str, cap: Capability) -> list[Verdict]:
|
|
97
107
|
obj = extract_json_object(text)
|
|
98
108
|
if not obj:
|
|
99
109
|
return []
|
|
110
|
+
cwe_by_id = _anti_pattern_cwes(cap)
|
|
100
111
|
out: list[Verdict] = []
|
|
101
112
|
for v in obj.get("verdicts", []):
|
|
102
113
|
if not isinstance(v, dict):
|
|
103
114
|
continue
|
|
104
115
|
sub = str(v.get("sub_capability", "")).strip()
|
|
116
|
+
matched_anti = str_list(v.get("matched_anti"))
|
|
105
117
|
out.append(
|
|
106
118
|
Verdict(
|
|
107
119
|
capability=f"{cap.id}.{sub}" if sub else cap.id,
|
|
@@ -109,7 +121,8 @@ def _parse_verdicts(text: str, cap: Capability) -> list[Verdict]:
|
|
|
109
121
|
status=one_of(v.get("status"), _VALID_STATUS, "UNKNOWN"),
|
|
110
122
|
reasoning=str(v.get("reasoning", "")),
|
|
111
123
|
matched_correct=str_list(v.get("matched_correct")),
|
|
112
|
-
matched_anti=
|
|
124
|
+
matched_anti=matched_anti,
|
|
125
|
+
cwe=next((cwe_by_id[a] for a in matched_anti if a in cwe_by_id), ""),
|
|
113
126
|
evidence=to_evidence(v.get("evidence")),
|
|
114
127
|
confidence=to_float(v.get("confidence"), 0.5),
|
|
115
128
|
)
|
|
@@ -16,6 +16,7 @@ from codejury.domain.artifact import CodeArtifact
|
|
|
16
16
|
from codejury.domain.capability import Capability
|
|
17
17
|
from codejury.domain.context import AnalysisContext
|
|
18
18
|
from codejury.domain.result import AnalysisResult
|
|
19
|
+
from codejury.infrastructure.cache import VerdictCache, verdict_key
|
|
19
20
|
from codejury.orchestrators.base import Orchestrator
|
|
20
21
|
from codejury.orchestrators.challenge import ChallengeOrchestrator
|
|
21
22
|
from codejury.orchestrators.debate import DebateOrchestrator
|
|
@@ -75,17 +76,38 @@ def build_orchestration(
|
|
|
75
76
|
return verifier, SingleOrchestrator()
|
|
76
77
|
|
|
77
78
|
|
|
79
|
+
def orchestration_descriptor(strategy: str, model: str, max_tokens: int) -> str:
|
|
80
|
+
"""The non-code, non-capability inputs that affect a verdict, as a cache tag."""
|
|
81
|
+
return f"{strategy}|{model}|{max_tokens}"
|
|
82
|
+
|
|
83
|
+
|
|
78
84
|
def run_over_artifacts(
|
|
79
85
|
artifacts: list[CodeArtifact],
|
|
80
86
|
capabilities: list[Capability],
|
|
81
87
|
agents: dict[str, Agent],
|
|
82
88
|
orchestrator: Orchestrator,
|
|
89
|
+
*,
|
|
90
|
+
cache: VerdictCache | None = None,
|
|
91
|
+
orchestration: str = "",
|
|
83
92
|
) -> list[tuple[str, AnalysisResult]]:
|
|
84
|
-
"""Run the orchestration over each artifact, returning (path, result) per artifact.
|
|
93
|
+
"""Run the orchestration over each artifact, returning (path, result) per artifact.
|
|
94
|
+
|
|
95
|
+
When ``cache`` is given, an unchanged artifact returns its recorded result
|
|
96
|
+
instead of re-running the orchestrator (determinism, invariant 2).
|
|
97
|
+
"""
|
|
85
98
|
results = []
|
|
86
99
|
for artifact in artifacts:
|
|
100
|
+
if cache is not None:
|
|
101
|
+
key = verdict_key(artifact, capabilities, orchestration=orchestration)
|
|
102
|
+
hit = cache.get(key)
|
|
103
|
+
if hit is not None:
|
|
104
|
+
results.append((artifact.path, hit))
|
|
105
|
+
continue
|
|
87
106
|
ctx = AnalysisContext(artifact=artifact, capabilities=capabilities)
|
|
88
|
-
|
|
107
|
+
result = orchestrator.run(agents, ctx)
|
|
108
|
+
if cache is not None:
|
|
109
|
+
cache.put(key, result)
|
|
110
|
+
results.append((artifact.path, result))
|
|
89
111
|
return results
|
|
90
112
|
|
|
91
113
|
|
|
@@ -94,5 +116,11 @@ def run_over_source(
|
|
|
94
116
|
capabilities: list[Capability],
|
|
95
117
|
agents: dict[str, Agent],
|
|
96
118
|
orchestrator: Orchestrator,
|
|
119
|
+
*,
|
|
120
|
+
cache: VerdictCache | None = None,
|
|
121
|
+
orchestration: str = "",
|
|
97
122
|
) -> list[tuple[str, AnalysisResult]]:
|
|
98
|
-
return run_over_artifacts(
|
|
123
|
+
return run_over_artifacts(
|
|
124
|
+
source.list_artifacts(), capabilities, agents, orchestrator,
|
|
125
|
+
cache=cache, orchestration=orchestration,
|
|
126
|
+
)
|
|
@@ -9,6 +9,7 @@ library, backed by the Anthropic provider, under a chosen orchestration strategy
|
|
|
9
9
|
from __future__ import annotations
|
|
10
10
|
|
|
11
11
|
import argparse
|
|
12
|
+
import json
|
|
12
13
|
import os
|
|
13
14
|
import sys
|
|
14
15
|
|
|
@@ -21,6 +22,7 @@ from codejury.assembly import (
|
|
|
21
22
|
STRATEGIES,
|
|
22
23
|
build_orchestration,
|
|
23
24
|
make_provider,
|
|
25
|
+
orchestration_descriptor,
|
|
24
26
|
run_over_artifacts,
|
|
25
27
|
run_over_source,
|
|
26
28
|
)
|
|
@@ -29,11 +31,12 @@ from codejury.domain.capability import Capability, load_capabilities
|
|
|
29
31
|
from codejury.domain.context import AnalysisContext
|
|
30
32
|
from codejury.domain.observation import Observation
|
|
31
33
|
from codejury.domain.result import AnalysisResult
|
|
32
|
-
from codejury.evaluation import
|
|
34
|
+
from codejury.evaluation import EvalReport, evaluate, load_cases
|
|
35
|
+
from codejury.infrastructure.cache import VerdictCache
|
|
33
36
|
from codejury.orchestrators.single import SingleOrchestrator
|
|
34
37
|
from codejury.providers.base import Provider
|
|
35
38
|
from codejury.providers.mock import MockProvider
|
|
36
|
-
from codejury.reporting import to_json, to_markdown
|
|
39
|
+
from codejury.reporting import to_json, to_markdown, to_sarif
|
|
37
40
|
from codejury.resources import CAPABILITIES_DIR, GOLDEN_DIR, SUPPRESSIONS_FILE, TASKS_DIR
|
|
38
41
|
from codejury.suppression import filter_results, load_suppressions
|
|
39
42
|
from codejury.integrations.github import build_review, parse_pr_ref, post_review
|
|
@@ -43,7 +46,7 @@ from codejury.sources.repo import RepoSource
|
|
|
43
46
|
from codejury.tasks.base import run_task
|
|
44
47
|
from codejury.tasks.registry import load_tasks
|
|
45
48
|
|
|
46
|
-
_FORMATS = ("text", "markdown", "json")
|
|
49
|
+
_FORMATS = ("text", "markdown", "json", "sarif")
|
|
47
50
|
|
|
48
51
|
|
|
49
52
|
def dry_run() -> AnalysisResult:
|
|
@@ -69,10 +72,14 @@ def audit(
|
|
|
69
72
|
model: str,
|
|
70
73
|
max_tokens: int = 2048,
|
|
71
74
|
strategy: str = "single",
|
|
75
|
+
cache: VerdictCache | None = None,
|
|
72
76
|
) -> list[tuple[str, AnalysisResult]]:
|
|
73
77
|
"""Audit each changed file in `diff_text`, returning (path, result) per file."""
|
|
74
78
|
agents, orchestrator = build_orchestration(strategy, provider=provider, model=model, max_tokens=max_tokens)
|
|
75
|
-
return run_over_source(
|
|
79
|
+
return run_over_source(
|
|
80
|
+
DiffSource(diff_text), capabilities, agents, orchestrator,
|
|
81
|
+
cache=cache, orchestration=orchestration_descriptor(strategy, model, max_tokens),
|
|
82
|
+
)
|
|
76
83
|
|
|
77
84
|
|
|
78
85
|
def scan(
|
|
@@ -86,10 +93,16 @@ def scan(
|
|
|
86
93
|
extensions: tuple[str, ...] = (".py",),
|
|
87
94
|
max_chars: int = 200_000,
|
|
88
95
|
with_callers: bool = False,
|
|
96
|
+
with_callees: bool = False,
|
|
97
|
+
cache: VerdictCache | None = None,
|
|
89
98
|
) -> list[tuple[str, AnalysisResult]]:
|
|
90
99
|
"""Audit every matching file in a directory tree, returning (path, result) per artifact."""
|
|
91
100
|
source = RepoSource(
|
|
92
|
-
directory,
|
|
101
|
+
directory,
|
|
102
|
+
extensions=extensions,
|
|
103
|
+
chunker=Chunker(max_chars=max_chars),
|
|
104
|
+
with_callers=with_callers,
|
|
105
|
+
with_callees=with_callees,
|
|
93
106
|
)
|
|
94
107
|
artifacts = source.list_artifacts()
|
|
95
108
|
calls = len(artifacts) * len(capabilities)
|
|
@@ -98,7 +111,10 @@ def scan(
|
|
|
98
111
|
file=sys.stderr,
|
|
99
112
|
)
|
|
100
113
|
agents, orchestrator = build_orchestration(strategy, provider=provider, model=model, max_tokens=max_tokens)
|
|
101
|
-
return run_over_artifacts(
|
|
114
|
+
return run_over_artifacts(
|
|
115
|
+
artifacts, capabilities, agents, orchestrator,
|
|
116
|
+
cache=cache, orchestration=orchestration_descriptor(strategy, model, max_tokens),
|
|
117
|
+
)
|
|
102
118
|
|
|
103
119
|
|
|
104
120
|
def _render_dry_run(result: AnalysisResult) -> str:
|
|
@@ -137,7 +153,7 @@ def _render_observation(o: Observation) -> str:
|
|
|
137
153
|
|
|
138
154
|
|
|
139
155
|
def _render_results(fmt: str, results: list[tuple[str, AnalysisResult]]) -> str:
|
|
140
|
-
return {"text": _render_audit, "markdown": to_markdown, "json": to_json}[fmt](results)
|
|
156
|
+
return {"text": _render_audit, "markdown": to_markdown, "json": to_json, "sarif": to_sarif}[fmt](results)
|
|
141
157
|
|
|
142
158
|
|
|
143
159
|
def _maybe_suppress(results: list[tuple[str, AnalysisResult]], enabled: bool) -> list[tuple[str, AnalysisResult]]:
|
|
@@ -184,11 +200,16 @@ def _maybe_post_github(ref: str | None, results: list[tuple[str, AnalysisResult]
|
|
|
184
200
|
print(f"github review failed: {exc}", file=sys.stderr)
|
|
185
201
|
|
|
186
202
|
|
|
187
|
-
def
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
203
|
+
def _render_eval(report: EvalReport) -> str:
|
|
204
|
+
def line(label: str, m) -> str:
|
|
205
|
+
return (
|
|
206
|
+
f"{label:<20} tp={m.tp} fp={m.fp} tn={m.tn} fn={m.fn} "
|
|
207
|
+
f"P={m.precision:.2f} R={m.recall:.2f} F1={m.f1:.2f}"
|
|
208
|
+
)
|
|
209
|
+
|
|
210
|
+
lines = [line(f"overall ({report.overall.total} cases)", report.overall)]
|
|
211
|
+
lines += [line(cap, m) for cap, m in sorted(report.by_capability.items())]
|
|
212
|
+
return "\n".join(lines)
|
|
192
213
|
|
|
193
214
|
|
|
194
215
|
def _read_diff(path: str) -> str:
|
|
@@ -216,6 +237,7 @@ def main(argv: list[str] | None = None) -> int:
|
|
|
216
237
|
audit_p.add_argument("--api-base", default=DEFAULT_API_BASE, help="provider base URL (env: CODEJURY_API_BASE)")
|
|
217
238
|
audit_p.add_argument("--api-key", default=DEFAULT_API_KEY, help="provider API key (env: CODEJURY_API_KEY)")
|
|
218
239
|
audit_p.add_argument("--no-suppress", action="store_true", help="disable the known-noise suppression filter")
|
|
240
|
+
audit_p.add_argument("--no-cache", action="store_true", help="bypass the verdict cache (always re-query the model)")
|
|
219
241
|
audit_p.add_argument("--fail-on", choices=_FAIL_ON, default=None, dest="fail_on", help="exit 1 if a finding at/above this severity is found")
|
|
220
242
|
audit_p.add_argument("--github", default=None, help="post a PR review: owner/repo#number (needs GITHUB_TOKEN)")
|
|
221
243
|
|
|
@@ -231,11 +253,15 @@ def main(argv: list[str] | None = None) -> int:
|
|
|
231
253
|
scan_p.add_argument("--max-tokens", type=int, default=2048)
|
|
232
254
|
scan_p.add_argument("--max-chars", type=int, default=200_000, help="chunk budget; default keeps whole files")
|
|
233
255
|
scan_p.add_argument(
|
|
234
|
-
"--callers", action="store_true", help="add cross-file
|
|
256
|
+
"--callers", action="store_true", help="add cross-file context: where this file's functions are called"
|
|
257
|
+
)
|
|
258
|
+
scan_p.add_argument(
|
|
259
|
+
"--callees", action="store_true", help="add cross-file context: the called code this file delegates to"
|
|
235
260
|
)
|
|
236
261
|
scan_p.add_argument("--api-base", default=DEFAULT_API_BASE, help="provider base URL (env: CODEJURY_API_BASE)")
|
|
237
262
|
scan_p.add_argument("--api-key", default=DEFAULT_API_KEY, help="provider API key (env: CODEJURY_API_KEY)")
|
|
238
263
|
scan_p.add_argument("--no-suppress", action="store_true", help="disable the known-noise suppression filter")
|
|
264
|
+
scan_p.add_argument("--no-cache", action="store_true", help="bypass the verdict cache (always re-query the model)")
|
|
239
265
|
scan_p.add_argument("--fail-on", choices=_FAIL_ON, default=None, dest="fail_on", help="exit 1 if a finding at/above this severity is found")
|
|
240
266
|
|
|
241
267
|
run_p = sub.add_parser("run", help="run a named task preset against a unified diff")
|
|
@@ -248,9 +274,11 @@ def main(argv: list[str] | None = None) -> int:
|
|
|
248
274
|
run_p.add_argument("--fail-on", choices=_FAIL_ON, default=None, dest="fail_on", help="exit 1 if a finding at/above this severity is found")
|
|
249
275
|
|
|
250
276
|
eval_p = sub.add_parser("eval", help="score golden cases and report precision/recall")
|
|
251
|
-
eval_p.add_argument("--
|
|
277
|
+
eval_p.add_argument("--dataset", default=GOLDEN_DIR, help="golden case YAML directory")
|
|
278
|
+
eval_p.add_argument("--split", default=None, help="only score cases whose 'split' matches (e.g. held-out)")
|
|
252
279
|
eval_p.add_argument("--capabilities", default=CAPABILITIES_DIR, help="capability YAML directory")
|
|
253
280
|
eval_p.add_argument("--provider", choices=PROVIDERS, default="anthropic")
|
|
281
|
+
eval_p.add_argument("--format", choices=("text", "json"), default="text", dest="fmt")
|
|
254
282
|
eval_p.add_argument("--model", default=DEFAULT_MODEL)
|
|
255
283
|
eval_p.add_argument("--api-base", default=DEFAULT_API_BASE, help="provider base URL (env: CODEJURY_API_BASE)")
|
|
256
284
|
eval_p.add_argument("--api-key", default=DEFAULT_API_KEY, help="provider API key (env: CODEJURY_API_KEY)")
|
|
@@ -267,6 +295,7 @@ def main(argv: list[str] | None = None) -> int:
|
|
|
267
295
|
model=args.model,
|
|
268
296
|
max_tokens=args.max_tokens,
|
|
269
297
|
strategy=args.orchestrator,
|
|
298
|
+
cache=None if args.no_cache else VerdictCache(),
|
|
270
299
|
)
|
|
271
300
|
results = _maybe_suppress(results, not args.no_suppress)
|
|
272
301
|
print(_render_results(args.fmt, results))
|
|
@@ -289,6 +318,8 @@ def main(argv: list[str] | None = None) -> int:
|
|
|
289
318
|
extensions=extensions,
|
|
290
319
|
max_chars=args.max_chars,
|
|
291
320
|
with_callers=args.callers,
|
|
321
|
+
with_callees=args.callees,
|
|
322
|
+
cache=None if args.no_cache else VerdictCache(),
|
|
292
323
|
)
|
|
293
324
|
results = _maybe_suppress(results, not args.no_suppress)
|
|
294
325
|
print(_render_results(args.fmt, results))
|
|
@@ -308,8 +339,8 @@ def main(argv: list[str] | None = None) -> int:
|
|
|
308
339
|
|
|
309
340
|
if args.command == "eval":
|
|
310
341
|
try:
|
|
311
|
-
|
|
312
|
-
load_cases(args.
|
|
342
|
+
report = evaluate(
|
|
343
|
+
load_cases(args.dataset, split=args.split),
|
|
313
344
|
load_capabilities(args.capabilities),
|
|
314
345
|
provider=make_provider(args.provider, api_key=args.api_key, api_base=args.api_base),
|
|
315
346
|
model=args.model,
|
|
@@ -319,7 +350,7 @@ def main(argv: list[str] | None = None) -> int:
|
|
|
319
350
|
# as one line, not a traceback (audit gets this via the orchestrator).
|
|
320
351
|
print(f"eval failed: {exc}")
|
|
321
352
|
return 1
|
|
322
|
-
print(
|
|
353
|
+
print(json.dumps(report.to_dict(), indent=2) if args.fmt == "json" else _render_eval(report))
|
|
323
354
|
return 0
|
|
324
355
|
|
|
325
356
|
if args.command in (None, "dry-run"):
|
|
@@ -46,7 +46,34 @@ sub_capabilities:
|
|
|
46
46
|
signals: ["admin:admin", "password=admin", "changeme"]
|
|
47
47
|
why_bad: Default credentials are public knowledge and trivially abused
|
|
48
48
|
|
|
49
|
+
transport_security:
|
|
50
|
+
correct_patterns:
|
|
51
|
+
- id: TLS-OK-1
|
|
52
|
+
description: >-
|
|
53
|
+
Leave TLS certificate verification at its secure default -- verify omitted or
|
|
54
|
+
verify=True, the default SSL context, hostname checking on
|
|
55
|
+
signals: ["verify=True", "create_default_context", "requests.get(", "requests.post("]
|
|
56
|
+
why_ok: >-
|
|
57
|
+
The secure default validates the certificate chain and hostname. An https:// call
|
|
58
|
+
that does not disable verification is fine; do not flag it just for making a
|
|
59
|
+
request or for omitting verify.
|
|
60
|
+
|
|
61
|
+
anti_patterns:
|
|
62
|
+
- id: TLS-BAD-1
|
|
63
|
+
cwe: CWE-295
|
|
64
|
+
severity: HIGH
|
|
65
|
+
description: >-
|
|
66
|
+
Disable TLS certificate or hostname verification -- verify=False, CERT_NONE,
|
|
67
|
+
check_hostname=False, or an unverified SSL context
|
|
68
|
+
signals: ["verify=False", "CERT_NONE", "check_hostname = False", "_create_unverified_context"]
|
|
69
|
+
why_bad: An unverified TLS connection is open to a man-in-the-middle despite https://
|
|
70
|
+
example_bad: |
|
|
71
|
+
requests.get("https://api.partner.com/data", verify=False)
|
|
72
|
+
example_good: |
|
|
73
|
+
requests.get("https://api.partner.com/data") # verify defaults to True
|
|
74
|
+
|
|
49
75
|
trigger_signals:
|
|
50
76
|
- dependency manifests and lock files
|
|
51
77
|
- install or bootstrap scripts fetching remote code
|
|
52
78
|
- file permission, bucket ACL, or default credential settings
|
|
79
|
+
- TLS client calls that set verify or build a custom SSL context
|
|
@@ -105,7 +105,67 @@ sub_capabilities:
|
|
|
105
105
|
if not target.is_relative_to(UPLOAD_DIR):
|
|
106
106
|
raise ValueError("path escapes upload dir")
|
|
107
107
|
|
|
108
|
+
ssrf:
|
|
109
|
+
correct_patterns:
|
|
110
|
+
- id: SSRF-OK-1
|
|
111
|
+
description: Validate the request URL's host against an allowlist before fetching it
|
|
112
|
+
signals: ["urlparse(", ".hostname", "ALLOWED", "allowlist"]
|
|
113
|
+
why_ok: An attacker cannot redirect the fetch to an internal target the list omits
|
|
114
|
+
|
|
115
|
+
- id: SSRF-OK-2
|
|
116
|
+
description: >-
|
|
117
|
+
Fetch a URL that is not attacker-controlled -- a constant, a value from trusted
|
|
118
|
+
config, or an operator-supplied argument
|
|
119
|
+
why_ok: >-
|
|
120
|
+
SSRF needs an external attacker to control the destination. A constant URL or one
|
|
121
|
+
from trusted config is not a finding, even though it goes through a fetch call.
|
|
122
|
+
|
|
123
|
+
anti_patterns:
|
|
124
|
+
- id: SSRF-BAD-1
|
|
125
|
+
cwe: CWE-918
|
|
126
|
+
severity: HIGH
|
|
127
|
+
description: >-
|
|
128
|
+
Fetch a URL taken from externally controlled input (HTTP request, form, query, or
|
|
129
|
+
message field) without validating its host against an allowlist. NOT this: a
|
|
130
|
+
constant URL, one from trusted config, or an operator-supplied argument.
|
|
131
|
+
signals: ["requests.get(", "urllib.request.urlopen(", "httpx.", "request.args", "request.json"]
|
|
132
|
+
why_bad: >-
|
|
133
|
+
The server makes the request, so attacker input reaches internal-only targets --
|
|
134
|
+
cloud metadata, localhost admin ports, internal APIs behind the firewall.
|
|
135
|
+
example_bad: |
|
|
136
|
+
requests.get(request.args["url"]).text
|
|
137
|
+
example_good: |
|
|
138
|
+
if urlparse(url).hostname not in ALLOWED_HOSTS:
|
|
139
|
+
raise ValueError("host not allowed")
|
|
140
|
+
requests.get(url).text
|
|
141
|
+
|
|
142
|
+
insecure_deserialization:
|
|
143
|
+
correct_patterns:
|
|
144
|
+
- id: DESER-OK-1
|
|
145
|
+
description: >-
|
|
146
|
+
Parse untrusted input with a data-only parser -- json.loads or yaml.safe_load --
|
|
147
|
+
that cannot instantiate arbitrary objects
|
|
148
|
+
signals: ["json.loads", "yaml.safe_load"]
|
|
149
|
+
why_ok: A data-only parser builds plain structures and has no code-execution path
|
|
150
|
+
|
|
151
|
+
anti_patterns:
|
|
152
|
+
- id: DESER-BAD-1
|
|
153
|
+
cwe: CWE-502
|
|
154
|
+
severity: CRITICAL
|
|
155
|
+
description: >-
|
|
156
|
+
Deserialize externally controlled bytes with an object-constructing deserializer --
|
|
157
|
+
pickle, marshal, yaml.load (unsafe Loader), or jsonpickle. NOT this: a data-only
|
|
158
|
+
parser like json.loads or yaml.safe_load.
|
|
159
|
+
signals: ["pickle.loads", "pickle.load(", "yaml.load(", "marshal.loads", "jsonpickle.decode"]
|
|
160
|
+
why_bad: These reconstruct arbitrary objects, so crafted input runs code on unpickle
|
|
161
|
+
example_bad: |
|
|
162
|
+
pickle.loads(base64.b64decode(request.data))
|
|
163
|
+
example_good: |
|
|
164
|
+
json.loads(request.data)
|
|
165
|
+
|
|
108
166
|
trigger_signals:
|
|
109
167
|
- raw SQL strings or cursor.execute calls appear
|
|
110
168
|
- imports of os, subprocess, or shlex with process execution
|
|
111
169
|
- file paths built from request, form, or query parameters
|
|
170
|
+
- outbound HTTP fetches (requests, urllib, httpx) to a non-constant URL
|
|
171
|
+
- deserialization calls (pickle, yaml.load, marshal) on external input
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
# Adversarial positive: weak password hash hidden behind hashlib.new(variable).
|
|
2
|
+
capability: authn
|
|
3
|
+
vulnerable: true
|
|
4
|
+
expected_verdict: VULNERABLE
|
|
5
|
+
cwe: CWE-916
|
|
6
|
+
source: synthetic
|
|
7
|
+
notes: >
|
|
8
|
+
hashlib.new("md5") is the same weak, unsalted, fast hash as hashlib.md5(), just
|
|
9
|
+
reached through a variable algorithm name. Unsuitable for password storage
|
|
10
|
+
(needs bcrypt/scrypt/argon2). The indirection should not hide it.
|
|
11
|
+
code: |
|
|
12
|
+
def hash_pw(pw):
|
|
13
|
+
algo = "md5"
|
|
14
|
+
return hashlib.new(algo, pw.encode()).hexdigest()
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
capability: business_logic
|
|
2
|
+
vulnerable: true
|
|
3
|
+
expected_verdict: VULNERABLE
|
|
4
|
+
cwe: CWE-602
|
|
5
|
+
source: synthetic
|
|
6
|
+
split: held-out
|
|
7
|
+
notes: >
|
|
8
|
+
Quantity and unit price come straight from the request and are never checked
|
|
9
|
+
server-side. A negative quantity credits the customer; a client-set price lets
|
|
10
|
+
them pay anything. Price must come from the catalog and quantity must be > 0.
|
|
11
|
+
code: |
|
|
12
|
+
def add_to_order(order, item_id, quantity, unit_price):
|
|
13
|
+
order.lines.append((item_id, quantity, unit_price))
|
|
14
|
+
order.total += quantity * unit_price
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
capability: business_logic
|
|
2
|
+
vulnerable: false
|
|
3
|
+
expected_verdict: SECURE
|
|
4
|
+
cwe: ""
|
|
5
|
+
source: synthetic
|
|
6
|
+
notes: >
|
|
7
|
+
Quantity is validated as positive and the price is looked up server-side from
|
|
8
|
+
the catalog, so the client cannot tamper with either.
|
|
9
|
+
code: |
|
|
10
|
+
def add_to_order(order, item_id, quantity):
|
|
11
|
+
if quantity < 1:
|
|
12
|
+
raise ValueError("quantity must be positive")
|
|
13
|
+
unit_price = catalog.price_of(item_id) # trusted server-side price
|
|
14
|
+
order.lines.append((item_id, quantity, unit_price))
|
|
15
|
+
order.total += quantity * unit_price
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
# False-positive-prone negative: a subprocess call driven by a request value
|
|
2
|
+
# looks like command injection, but the value only selects a fixed argv from a
|
|
3
|
+
# table and never reaches a shell.
|
|
4
|
+
capability: input_validation
|
|
5
|
+
vulnerable: false
|
|
6
|
+
expected_verdict: SECURE
|
|
7
|
+
cwe: ""
|
|
8
|
+
source: synthetic
|
|
9
|
+
split: held-out
|
|
10
|
+
notes: >
|
|
11
|
+
`name` only indexes a fixed dict of argument lists (KeyError on anything
|
|
12
|
+
unknown); no user string is interpolated into a command and shell=False.
|
|
13
|
+
Flagging this as command injection is a false positive.
|
|
14
|
+
code: |
|
|
15
|
+
ACTIONS = {
|
|
16
|
+
"start": ["systemctl", "start", "web"],
|
|
17
|
+
"stop": ["systemctl", "stop", "web"],
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
def run_action(name):
|
|
21
|
+
argv = ACTIONS[name]
|
|
22
|
+
subprocess.run(argv, shell=False)
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
capability: data_protection
|
|
2
|
+
vulnerable: true
|
|
3
|
+
expected_verdict: VULNERABLE
|
|
4
|
+
cwe: CWE-312
|
|
5
|
+
source: synthetic
|
|
6
|
+
notes: >
|
|
7
|
+
The full card number and the CVV are written to the database in cleartext.
|
|
8
|
+
The CVV must never be stored at all, and the PAN must be tokenized/encrypted.
|
|
9
|
+
code: |
|
|
10
|
+
def save_card(user_id, card_number, cvv):
|
|
11
|
+
db.execute(
|
|
12
|
+
"INSERT INTO cards (user_id, pan, cvv) VALUES (?, ?, ?)",
|
|
13
|
+
(user_id, card_number, cvv),
|
|
14
|
+
)
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
capability: data_protection
|
|
2
|
+
vulnerable: false
|
|
3
|
+
expected_verdict: SECURE
|
|
4
|
+
cwe: ""
|
|
5
|
+
source: synthetic
|
|
6
|
+
split: held-out
|
|
7
|
+
notes: >
|
|
8
|
+
The PAN is replaced by a vault token before storage; the CVV is never
|
|
9
|
+
persisted. Nothing sensitive reaches the database.
|
|
10
|
+
code: |
|
|
11
|
+
def save_card(user_id, card_number):
|
|
12
|
+
token = vault.tokenize(card_number) # store a token, never the PAN
|
|
13
|
+
db.execute(
|
|
14
|
+
"INSERT INTO cards (user_id, token) VALUES (?, ?)",
|
|
15
|
+
(user_id, token),
|
|
16
|
+
)
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
capability: dependency_config
|
|
2
|
+
vulnerable: true
|
|
3
|
+
expected_verdict: VULNERABLE
|
|
4
|
+
cwe: CWE-295
|
|
5
|
+
source: synthetic
|
|
6
|
+
notes: >
|
|
7
|
+
TLS certificate verification is disabled, so the connection is open to a
|
|
8
|
+
man-in-the-middle despite the https:// scheme.
|
|
9
|
+
code: |
|
|
10
|
+
def fetch_partner_data():
|
|
11
|
+
return requests.get("https://api.partner.com/data", verify=False).json()
|