codejury 0.5.1__tar.gz → 0.6.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {codejury-0.5.1 → codejury-0.6.0}/PKG-INFO +21 -3
- {codejury-0.5.1 → codejury-0.6.0}/README.md +19 -2
- {codejury-0.5.1 → codejury-0.6.0}/codejury/__init__.py +6 -1
- {codejury-0.5.1 → codejury-0.6.0}/codejury/agents/verifier.py +14 -1
- {codejury-0.5.1 → codejury-0.6.0}/codejury/assembly.py +31 -3
- {codejury-0.5.1 → codejury-0.6.0}/codejury/cli.py +37 -15
- {codejury-0.5.1 → codejury-0.6.0}/codejury/data/capabilities/dependency_config.yaml +27 -0
- {codejury-0.5.1 → codejury-0.6.0}/codejury/data/capabilities/input_validation.yaml +60 -0
- codejury-0.6.0/codejury/data/golden/authn_weak_hash_indirect_vuln.yaml +14 -0
- codejury-0.6.0/codejury/data/golden/business_logic_price_tamper_vuln.yaml +14 -0
- codejury-0.6.0/codejury/data/golden/business_logic_server_checked_safe.yaml +15 -0
- codejury-0.6.0/codejury/data/golden/cmdi_fixed_argv_safe.yaml +22 -0
- codejury-0.6.0/codejury/data/golden/data_protection_plaintext_pii_vuln.yaml +14 -0
- codejury-0.6.0/codejury/data/golden/data_protection_tokenized_safe.yaml +16 -0
- codejury-0.6.0/codejury/data/golden/dependency_config_tls_verify_off_vuln.yaml +11 -0
- codejury-0.6.0/codejury/data/golden/dependency_config_tls_verify_on_safe.yaml +11 -0
- codejury-0.6.0/codejury/data/golden/deserialize_json_safe.yaml +13 -0
- codejury-0.6.0/codejury/data/golden/deserialize_pickle_vuln.yaml +12 -0
- codejury-0.6.0/codejury/data/golden/error_logging_redacted_safe.yaml +11 -0
- codejury-0.6.0/codejury/data/golden/error_logging_secret_leak_vuln.yaml +12 -0
- codejury-0.6.0/codejury/data/golden/literal_eval_safe.yaml +14 -0
- codejury-0.6.0/codejury/data/golden/path_basename_safe.yaml +15 -0
- codejury-0.6.0/codejury/data/golden/session_fixation_vuln.yaml +16 -0
- codejury-0.6.0/codejury/data/golden/session_secure_cookie_safe.yaml +18 -0
- codejury-0.6.0/codejury/data/golden/sql_constant_concat_safe.yaml +14 -0
- codejury-0.6.0/codejury/data/golden/sqli_indirect_var_vuln.yaml +16 -0
- codejury-0.6.0/codejury/data/golden/ssrf_allowlist_safe.yaml +17 -0
- codejury-0.6.0/codejury/data/golden/ssrf_constant_url_safe.yaml +13 -0
- codejury-0.6.0/codejury/data/golden/ssrf_substring_allowlist_bypass_vuln.yaml +17 -0
- codejury-0.6.0/codejury/data/golden/ssrf_user_url_vuln.yaml +12 -0
- codejury-0.6.0/codejury/data/golden/xfile_idor_no_check_vuln.yaml +20 -0
- codejury-0.6.0/codejury/data/golden/xfile_idor_owner_checked_safe.yaml +21 -0
- codejury-0.6.0/codejury/data/golden/xfile_path_sanitized_safe.yaml +22 -0
- codejury-0.6.0/codejury/data/golden/xfile_path_tainted_vuln.yaml +19 -0
- codejury-0.6.0/codejury/data/golden/xss_textcontent_safe.yaml +15 -0
- {codejury-0.5.1 → codejury-0.6.0}/codejury/domain/capability.py +16 -1
- {codejury-0.5.1 → codejury-0.6.0}/codejury/domain/observation.py +20 -0
- {codejury-0.5.1 → codejury-0.6.0}/codejury/domain/result.py +15 -1
- {codejury-0.5.1 → codejury-0.6.0}/codejury/evaluation.py +62 -10
- codejury-0.6.0/codejury/infrastructure/cache.py +76 -0
- {codejury-0.5.1 → codejury-0.6.0}/codejury/providers/anthropic.py +1 -0
- {codejury-0.5.1 → codejury-0.6.0}/codejury/providers/litellm.py +1 -1
- {codejury-0.5.1 → codejury-0.6.0}/codejury/providers/openai.py +1 -0
- {codejury-0.5.1 → codejury-0.6.0}/codejury/reporting.py +106 -0
- {codejury-0.5.1 → codejury-0.6.0}/codejury.egg-info/PKG-INFO +21 -3
- {codejury-0.5.1 → codejury-0.6.0}/codejury.egg-info/SOURCES.txt +30 -0
- {codejury-0.5.1 → codejury-0.6.0}/codejury.egg-info/requires.txt +1 -0
- {codejury-0.5.1 → codejury-0.6.0}/pyproject.toml +2 -2
- {codejury-0.5.1 → codejury-0.6.0}/tests/test_anthropic_provider.py +1 -0
- codejury-0.6.0/tests/test_cache.py +117 -0
- codejury-0.6.0/tests/test_evaluation.py +152 -0
- {codejury-0.5.1 → codejury-0.6.0}/tests/test_litellm_provider.py +1 -0
- {codejury-0.5.1 → codejury-0.6.0}/tests/test_openai_provider.py +1 -0
- codejury-0.6.0/tests/test_sarif.py +115 -0
- codejury-0.5.1/tests/test_evaluation.py +0 -69
- {codejury-0.5.1 → codejury-0.6.0}/LICENSE +0 -0
- {codejury-0.5.1 → codejury-0.6.0}/codejury/agents/__init__.py +0 -0
- {codejury-0.5.1 → codejury-0.6.0}/codejury/agents/base.py +0 -0
- {codejury-0.5.1 → codejury-0.6.0}/codejury/agents/debate.py +0 -0
- {codejury-0.5.1 → codejury-0.6.0}/codejury/agents/mock.py +0 -0
- {codejury-0.5.1 → codejury-0.6.0}/codejury/agents/parsing.py +0 -0
- {codejury-0.5.1 → codejury-0.6.0}/codejury/agents/refuter.py +0 -0
- {codejury-0.5.1 → codejury-0.6.0}/codejury/data/capabilities/authentication.yaml +0 -0
- {codejury-0.5.1 → codejury-0.6.0}/codejury/data/capabilities/authorization.yaml +0 -0
- {codejury-0.5.1 → codejury-0.6.0}/codejury/data/capabilities/business_logic.yaml +0 -0
- {codejury-0.5.1 → codejury-0.6.0}/codejury/data/capabilities/crypto.yaml +0 -0
- {codejury-0.5.1 → codejury-0.6.0}/codejury/data/capabilities/data_protection.yaml +0 -0
- {codejury-0.5.1 → codejury-0.6.0}/codejury/data/capabilities/error_logging.yaml +0 -0
- {codejury-0.5.1 → codejury-0.6.0}/codejury/data/capabilities/output_encoding.yaml +0 -0
- {codejury-0.5.1 → codejury-0.6.0}/codejury/data/capabilities/secrets.yaml +0 -0
- {codejury-0.5.1 → codejury-0.6.0}/codejury/data/capabilities/session.yaml +0 -0
- {codejury-0.5.1 → codejury-0.6.0}/codejury/data/golden/authn_bcrypt_password.yaml +0 -0
- {codejury-0.5.1 → codejury-0.6.0}/codejury/data/golden/authn_jwt_noverify_vuln.yaml +0 -0
- {codejury-0.5.1 → codejury-0.6.0}/codejury/data/golden/authn_jwt_verified_safe.yaml +0 -0
- {codejury-0.5.1 → codejury-0.6.0}/codejury/data/golden/authn_sha256_checksum_safe.yaml +0 -0
- {codejury-0.5.1 → codejury-0.6.0}/codejury/data/golden/authn_sha256_password.yaml +0 -0
- {codejury-0.5.1 → codejury-0.6.0}/codejury/data/golden/authz_idor_vuln.yaml +0 -0
- {codejury-0.5.1 → codejury-0.6.0}/codejury/data/golden/authz_owner_safe.yaml +0 -0
- {codejury-0.5.1 → codejury-0.6.0}/codejury/data/golden/cmdi_ossystem_vuln.yaml +0 -0
- {codejury-0.5.1 → codejury-0.6.0}/codejury/data/golden/cmdi_subprocess_safe.yaml +0 -0
- {codejury-0.5.1 → codejury-0.6.0}/codejury/data/golden/crypto_aesgcm_safe.yaml +0 -0
- {codejury-0.5.1 → codejury-0.6.0}/codejury/data/golden/crypto_ecb_vuln.yaml +0 -0
- {codejury-0.5.1 → codejury-0.6.0}/codejury/data/golden/path_contained_safe.yaml +0 -0
- {codejury-0.5.1 → codejury-0.6.0}/codejury/data/golden/path_traversal_vuln.yaml +0 -0
- {codejury-0.5.1 → codejury-0.6.0}/codejury/data/golden/secrets_env_safe.yaml +0 -0
- {codejury-0.5.1 → codejury-0.6.0}/codejury/data/golden/secrets_hardcoded_vuln.yaml +0 -0
- {codejury-0.5.1 → codejury-0.6.0}/codejury/data/golden/sqli_format_vuln.yaml +0 -0
- {codejury-0.5.1 → codejury-0.6.0}/codejury/data/golden/sqli_fstring_query.yaml +0 -0
- {codejury-0.5.1 → codejury-0.6.0}/codejury/data/golden/sqli_parameterized_query.yaml +0 -0
- {codejury-0.5.1 → codejury-0.6.0}/codejury/data/golden/xss_innerhtml_constant_safe.yaml +0 -0
- {codejury-0.5.1 → codejury-0.6.0}/codejury/data/golden/xss_innerhtml_vuln.yaml +0 -0
- {codejury-0.5.1 → codejury-0.6.0}/codejury/data/suppressions.yaml +0 -0
- {codejury-0.5.1 → codejury-0.6.0}/codejury/data/tasks/audit_diff_debate.yaml +0 -0
- {codejury-0.5.1 → codejury-0.6.0}/codejury/data/tasks/quick_scan_single.yaml +0 -0
- {codejury-0.5.1 → codejury-0.6.0}/codejury/domain/__init__.py +0 -0
- {codejury-0.5.1 → codejury-0.6.0}/codejury/domain/artifact.py +0 -0
- {codejury-0.5.1 → codejury-0.6.0}/codejury/domain/context.py +0 -0
- {codejury-0.5.1 → codejury-0.6.0}/codejury/infrastructure/__init__.py +0 -0
- {codejury-0.5.1 → codejury-0.6.0}/codejury/infrastructure/json_parse.py +0 -0
- {codejury-0.5.1 → codejury-0.6.0}/codejury/integrations/__init__.py +0 -0
- {codejury-0.5.1 → codejury-0.6.0}/codejury/integrations/github.py +0 -0
- {codejury-0.5.1 → codejury-0.6.0}/codejury/orchestrators/__init__.py +0 -0
- {codejury-0.5.1 → codejury-0.6.0}/codejury/orchestrators/base.py +0 -0
- {codejury-0.5.1 → codejury-0.6.0}/codejury/orchestrators/challenge.py +0 -0
- {codejury-0.5.1 → codejury-0.6.0}/codejury/orchestrators/debate.py +0 -0
- {codejury-0.5.1 → codejury-0.6.0}/codejury/orchestrators/pipeline.py +0 -0
- {codejury-0.5.1 → codejury-0.6.0}/codejury/orchestrators/reflexion.py +0 -0
- {codejury-0.5.1 → codejury-0.6.0}/codejury/orchestrators/single.py +0 -0
- {codejury-0.5.1 → codejury-0.6.0}/codejury/providers/__init__.py +0 -0
- {codejury-0.5.1 → codejury-0.6.0}/codejury/providers/base.py +0 -0
- {codejury-0.5.1 → codejury-0.6.0}/codejury/providers/mock.py +0 -0
- {codejury-0.5.1 → codejury-0.6.0}/codejury/providers/openai_format.py +0 -0
- {codejury-0.5.1 → codejury-0.6.0}/codejury/providers/retry.py +0 -0
- {codejury-0.5.1 → codejury-0.6.0}/codejury/resources.py +0 -0
- {codejury-0.5.1 → codejury-0.6.0}/codejury/sources/__init__.py +0 -0
- {codejury-0.5.1 → codejury-0.6.0}/codejury/sources/base.py +0 -0
- {codejury-0.5.1 → codejury-0.6.0}/codejury/sources/callers.py +0 -0
- {codejury-0.5.1 → codejury-0.6.0}/codejury/sources/chunker.py +0 -0
- {codejury-0.5.1 → codejury-0.6.0}/codejury/sources/diff.py +0 -0
- {codejury-0.5.1 → codejury-0.6.0}/codejury/sources/function.py +0 -0
- {codejury-0.5.1 → codejury-0.6.0}/codejury/sources/mock.py +0 -0
- {codejury-0.5.1 → codejury-0.6.0}/codejury/sources/repo.py +0 -0
- {codejury-0.5.1 → codejury-0.6.0}/codejury/suppression.py +0 -0
- {codejury-0.5.1 → codejury-0.6.0}/codejury/tasks/__init__.py +0 -0
- {codejury-0.5.1 → codejury-0.6.0}/codejury/tasks/base.py +0 -0
- {codejury-0.5.1 → codejury-0.6.0}/codejury/tasks/registry.py +0 -0
- {codejury-0.5.1 → codejury-0.6.0}/codejury.egg-info/dependency_links.txt +0 -0
- {codejury-0.5.1 → codejury-0.6.0}/codejury.egg-info/entry_points.txt +0 -0
- {codejury-0.5.1 → codejury-0.6.0}/codejury.egg-info/top_level.txt +0 -0
- {codejury-0.5.1 → codejury-0.6.0}/setup.cfg +0 -0
- {codejury-0.5.1 → codejury-0.6.0}/tests/test_assembly.py +0 -0
- {codejury-0.5.1 → codejury-0.6.0}/tests/test_audit_pipeline.py +0 -0
- {codejury-0.5.1 → codejury-0.6.0}/tests/test_callers.py +0 -0
- {codejury-0.5.1 → codejury-0.6.0}/tests/test_capability.py +0 -0
- {codejury-0.5.1 → codejury-0.6.0}/tests/test_challenge.py +0 -0
- {codejury-0.5.1 → codejury-0.6.0}/tests/test_cli_audit.py +0 -0
- {codejury-0.5.1 → codejury-0.6.0}/tests/test_context.py +0 -0
- {codejury-0.5.1 → codejury-0.6.0}/tests/test_debate_agents.py +0 -0
- {codejury-0.5.1 → codejury-0.6.0}/tests/test_debate_orchestrator.py +0 -0
- {codejury-0.5.1 → codejury-0.6.0}/tests/test_diff_source.py +0 -0
- {codejury-0.5.1 → codejury-0.6.0}/tests/test_function_source.py +0 -0
- {codejury-0.5.1 → codejury-0.6.0}/tests/test_integrations.py +0 -0
- {codejury-0.5.1 → codejury-0.6.0}/tests/test_json_parse.py +0 -0
- {codejury-0.5.1 → codejury-0.6.0}/tests/test_orchestrator.py +0 -0
- {codejury-0.5.1 → codejury-0.6.0}/tests/test_pipeline_orchestrator.py +0 -0
- {codejury-0.5.1 → codejury-0.6.0}/tests/test_reflexion_orchestrator.py +0 -0
- {codejury-0.5.1 → codejury-0.6.0}/tests/test_repo_source.py +0 -0
- {codejury-0.5.1 → codejury-0.6.0}/tests/test_reporting.py +0 -0
- {codejury-0.5.1 → codejury-0.6.0}/tests/test_retry_provider.py +0 -0
- {codejury-0.5.1 → codejury-0.6.0}/tests/test_suppression.py +0 -0
- {codejury-0.5.1 → codejury-0.6.0}/tests/test_tasks.py +0 -0
- {codejury-0.5.1 → codejury-0.6.0}/tests/test_verifier.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: codejury
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.6.0
|
|
4
4
|
Summary: General-purpose Application Security AI audit framework -- five-layer architecture, capabilities as first-class data
|
|
5
5
|
Author: AISecLabs
|
|
6
6
|
License-Expression: MIT
|
|
@@ -25,6 +25,7 @@ Provides-Extra: litellm
|
|
|
25
25
|
Requires-Dist: litellm>=1.0; extra == "litellm"
|
|
26
26
|
Provides-Extra: dev
|
|
27
27
|
Requires-Dist: pytest>=8.0; extra == "dev"
|
|
28
|
+
Requires-Dist: jsonschema>=4.0; extra == "dev"
|
|
28
29
|
Dynamic: license-file
|
|
29
30
|
|
|
30
31
|
# codejury
|
|
@@ -80,15 +81,32 @@ git diff | codejury audit --provider anthropic
|
|
|
80
81
|
| `codejury audit [diff]` | Audit a unified diff from a file or stdin (`-`). |
|
|
81
82
|
| `codejury scan <dir>` | Audit a whole directory tree, capability by capability. |
|
|
82
83
|
| `codejury run <task>` | Run a named task preset (see [Tasks](#tasks)). |
|
|
83
|
-
| `codejury eval` | Score the golden cases
|
|
84
|
+
| `codejury eval` | Score the golden cases; report precision / recall / F1, overall and per capability. |
|
|
84
85
|
|
|
85
86
|
Shared flags: `--orchestrator {single,pipeline,debate,reflexion,challenge}`,
|
|
86
|
-
`--provider {anthropic,openai,litellm}`, `--model`,
|
|
87
|
+
`--provider {anthropic,openai,litellm}`, `--model`,
|
|
88
|
+
`--format {text,markdown,json,sarif}`.
|
|
89
|
+
|
|
90
|
+
`--format sarif` emits a SARIF 2.1.0 log (validates against the official schema)
|
|
91
|
+
for CI and security dashboards: each problem with a code location becomes a
|
|
92
|
+
result carrying its capability (as the rule id), CWE, and a precise location.
|
|
87
93
|
|
|
88
94
|
Findings in known-noise categories (availability/DoS, rate limiting, memory safety
|
|
89
95
|
outside C/C++) are dropped by versioned rules in
|
|
90
96
|
`codejury/data/suppressions.yaml`; disable with `--no-suppress`.
|
|
91
97
|
|
|
98
|
+
`codejury eval` takes `--dataset <dir>` (golden YAML directory), `--split <name>`
|
|
99
|
+
(score only cases tagged with that `split:`, e.g. a held-out set), and
|
|
100
|
+
`--format {text,json}` -- the JSON report is a stable schema (overall plus
|
|
101
|
+
per-capability confusion matrix and precision / recall / F1).
|
|
102
|
+
|
|
103
|
+
Runs are deterministic: providers query at temperature 0, and `audit` / `scan`
|
|
104
|
+
cache each verdict on a hash of the normalized code, the in-scope capability
|
|
105
|
+
versions, and the orchestration. Re-auditing unchanged code returns the recorded
|
|
106
|
+
verdicts without re-querying the model; editing a capability YAML changes its
|
|
107
|
+
fingerprint and invalidates affected entries. Pass `--no-cache` to always
|
|
108
|
+
re-query.
|
|
109
|
+
|
|
92
110
|
```bash
|
|
93
111
|
# Multi-round adversarial debate, rendered as Markdown
|
|
94
112
|
git diff | codejury audit --orchestrator debate --format markdown - > report.md
|
|
@@ -51,15 +51,32 @@ git diff | codejury audit --provider anthropic
|
|
|
51
51
|
| `codejury audit [diff]` | Audit a unified diff from a file or stdin (`-`). |
|
|
52
52
|
| `codejury scan <dir>` | Audit a whole directory tree, capability by capability. |
|
|
53
53
|
| `codejury run <task>` | Run a named task preset (see [Tasks](#tasks)). |
|
|
54
|
-
| `codejury eval` | Score the golden cases
|
|
54
|
+
| `codejury eval` | Score the golden cases; report precision / recall / F1, overall and per capability. |
|
|
55
55
|
|
|
56
56
|
Shared flags: `--orchestrator {single,pipeline,debate,reflexion,challenge}`,
|
|
57
|
-
`--provider {anthropic,openai,litellm}`, `--model`,
|
|
57
|
+
`--provider {anthropic,openai,litellm}`, `--model`,
|
|
58
|
+
`--format {text,markdown,json,sarif}`.
|
|
59
|
+
|
|
60
|
+
`--format sarif` emits a SARIF 2.1.0 log (validates against the official schema)
|
|
61
|
+
for CI and security dashboards: each problem with a code location becomes a
|
|
62
|
+
result carrying its capability (as the rule id), CWE, and a precise location.
|
|
58
63
|
|
|
59
64
|
Findings in known-noise categories (availability/DoS, rate limiting, memory safety
|
|
60
65
|
outside C/C++) are dropped by versioned rules in
|
|
61
66
|
`codejury/data/suppressions.yaml`; disable with `--no-suppress`.
|
|
62
67
|
|
|
68
|
+
`codejury eval` takes `--dataset <dir>` (golden YAML directory), `--split <name>`
|
|
69
|
+
(score only cases tagged with that `split:`, e.g. a held-out set), and
|
|
70
|
+
`--format {text,json}` -- the JSON report is a stable schema (overall plus
|
|
71
|
+
per-capability confusion matrix and precision / recall / F1).
|
|
72
|
+
|
|
73
|
+
Runs are deterministic: providers query at temperature 0, and `audit` / `scan`
|
|
74
|
+
cache each verdict on a hash of the normalized code, the in-scope capability
|
|
75
|
+
versions, and the orchestration. Re-auditing unchanged code returns the recorded
|
|
76
|
+
verdicts without re-querying the model; editing a capability YAML changes its
|
|
77
|
+
fingerprint and invalidates affected entries. Pass `--no-cache` to always
|
|
78
|
+
re-query.
|
|
79
|
+
|
|
63
80
|
```bash
|
|
64
81
|
# Multi-round adversarial debate, rendered as Markdown
|
|
65
82
|
git diff | codejury audit --orchestrator debate --format markdown - > report.md
|
|
@@ -5,4 +5,9 @@ Domain knowledge lives in YAML capability files as a first-class citizen,
|
|
|
5
5
|
aligned with OWASP ASVS.
|
|
6
6
|
"""
|
|
7
7
|
|
|
8
|
-
|
|
8
|
+
from importlib.metadata import PackageNotFoundError, version
|
|
9
|
+
|
|
10
|
+
try:
|
|
11
|
+
__version__ = version("codejury")
|
|
12
|
+
except PackageNotFoundError: # running from a source tree without an install
|
|
13
|
+
__version__ = "0.0.0"
|
|
@@ -93,15 +93,27 @@ def _build_prompt(path: str, content: str, cap: Capability, context: str = "") -
|
|
|
93
93
|
)
|
|
94
94
|
|
|
95
95
|
|
|
96
|
+
def _anti_pattern_cwes(cap: Capability) -> dict[str, str]:
|
|
97
|
+
"""Map anti_pattern id -> CWE, so a verdict can inherit the CWE it matched."""
|
|
98
|
+
return {
|
|
99
|
+
p.id: p.cwe
|
|
100
|
+
for sub in cap.sub_capabilities.values()
|
|
101
|
+
for p in sub.anti_patterns
|
|
102
|
+
if p.cwe
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
|
|
96
106
|
def _parse_verdicts(text: str, cap: Capability) -> list[Verdict]:
|
|
97
107
|
obj = extract_json_object(text)
|
|
98
108
|
if not obj:
|
|
99
109
|
return []
|
|
110
|
+
cwe_by_id = _anti_pattern_cwes(cap)
|
|
100
111
|
out: list[Verdict] = []
|
|
101
112
|
for v in obj.get("verdicts", []):
|
|
102
113
|
if not isinstance(v, dict):
|
|
103
114
|
continue
|
|
104
115
|
sub = str(v.get("sub_capability", "")).strip()
|
|
116
|
+
matched_anti = str_list(v.get("matched_anti"))
|
|
105
117
|
out.append(
|
|
106
118
|
Verdict(
|
|
107
119
|
capability=f"{cap.id}.{sub}" if sub else cap.id,
|
|
@@ -109,7 +121,8 @@ def _parse_verdicts(text: str, cap: Capability) -> list[Verdict]:
|
|
|
109
121
|
status=one_of(v.get("status"), _VALID_STATUS, "UNKNOWN"),
|
|
110
122
|
reasoning=str(v.get("reasoning", "")),
|
|
111
123
|
matched_correct=str_list(v.get("matched_correct")),
|
|
112
|
-
matched_anti=
|
|
124
|
+
matched_anti=matched_anti,
|
|
125
|
+
cwe=next((cwe_by_id[a] for a in matched_anti if a in cwe_by_id), ""),
|
|
113
126
|
evidence=to_evidence(v.get("evidence")),
|
|
114
127
|
confidence=to_float(v.get("confidence"), 0.5),
|
|
115
128
|
)
|
|
@@ -16,6 +16,7 @@ from codejury.domain.artifact import CodeArtifact
|
|
|
16
16
|
from codejury.domain.capability import Capability
|
|
17
17
|
from codejury.domain.context import AnalysisContext
|
|
18
18
|
from codejury.domain.result import AnalysisResult
|
|
19
|
+
from codejury.infrastructure.cache import VerdictCache, verdict_key
|
|
19
20
|
from codejury.orchestrators.base import Orchestrator
|
|
20
21
|
from codejury.orchestrators.challenge import ChallengeOrchestrator
|
|
21
22
|
from codejury.orchestrators.debate import DebateOrchestrator
|
|
@@ -75,17 +76,38 @@ def build_orchestration(
|
|
|
75
76
|
return verifier, SingleOrchestrator()
|
|
76
77
|
|
|
77
78
|
|
|
79
|
+
def orchestration_descriptor(strategy: str, model: str, max_tokens: int) -> str:
|
|
80
|
+
"""The non-code, non-capability inputs that affect a verdict, as a cache tag."""
|
|
81
|
+
return f"{strategy}|{model}|{max_tokens}"
|
|
82
|
+
|
|
83
|
+
|
|
78
84
|
def run_over_artifacts(
|
|
79
85
|
artifacts: list[CodeArtifact],
|
|
80
86
|
capabilities: list[Capability],
|
|
81
87
|
agents: dict[str, Agent],
|
|
82
88
|
orchestrator: Orchestrator,
|
|
89
|
+
*,
|
|
90
|
+
cache: VerdictCache | None = None,
|
|
91
|
+
orchestration: str = "",
|
|
83
92
|
) -> list[tuple[str, AnalysisResult]]:
|
|
84
|
-
"""Run the orchestration over each artifact, returning (path, result) per artifact.
|
|
93
|
+
"""Run the orchestration over each artifact, returning (path, result) per artifact.
|
|
94
|
+
|
|
95
|
+
When ``cache`` is given, an unchanged artifact returns its recorded result
|
|
96
|
+
instead of re-running the orchestrator (determinism, invariant 2).
|
|
97
|
+
"""
|
|
85
98
|
results = []
|
|
86
99
|
for artifact in artifacts:
|
|
100
|
+
if cache is not None:
|
|
101
|
+
key = verdict_key(artifact, capabilities, orchestration=orchestration)
|
|
102
|
+
hit = cache.get(key)
|
|
103
|
+
if hit is not None:
|
|
104
|
+
results.append((artifact.path, hit))
|
|
105
|
+
continue
|
|
87
106
|
ctx = AnalysisContext(artifact=artifact, capabilities=capabilities)
|
|
88
|
-
|
|
107
|
+
result = orchestrator.run(agents, ctx)
|
|
108
|
+
if cache is not None:
|
|
109
|
+
cache.put(key, result)
|
|
110
|
+
results.append((artifact.path, result))
|
|
89
111
|
return results
|
|
90
112
|
|
|
91
113
|
|
|
@@ -94,5 +116,11 @@ def run_over_source(
|
|
|
94
116
|
capabilities: list[Capability],
|
|
95
117
|
agents: dict[str, Agent],
|
|
96
118
|
orchestrator: Orchestrator,
|
|
119
|
+
*,
|
|
120
|
+
cache: VerdictCache | None = None,
|
|
121
|
+
orchestration: str = "",
|
|
97
122
|
) -> list[tuple[str, AnalysisResult]]:
|
|
98
|
-
return run_over_artifacts(
|
|
123
|
+
return run_over_artifacts(
|
|
124
|
+
source.list_artifacts(), capabilities, agents, orchestrator,
|
|
125
|
+
cache=cache, orchestration=orchestration,
|
|
126
|
+
)
|
|
@@ -9,6 +9,7 @@ library, backed by the Anthropic provider, under a chosen orchestration strategy
|
|
|
9
9
|
from __future__ import annotations
|
|
10
10
|
|
|
11
11
|
import argparse
|
|
12
|
+
import json
|
|
12
13
|
import os
|
|
13
14
|
import sys
|
|
14
15
|
|
|
@@ -21,6 +22,7 @@ from codejury.assembly import (
|
|
|
21
22
|
STRATEGIES,
|
|
22
23
|
build_orchestration,
|
|
23
24
|
make_provider,
|
|
25
|
+
orchestration_descriptor,
|
|
24
26
|
run_over_artifacts,
|
|
25
27
|
run_over_source,
|
|
26
28
|
)
|
|
@@ -29,11 +31,12 @@ from codejury.domain.capability import Capability, load_capabilities
|
|
|
29
31
|
from codejury.domain.context import AnalysisContext
|
|
30
32
|
from codejury.domain.observation import Observation
|
|
31
33
|
from codejury.domain.result import AnalysisResult
|
|
32
|
-
from codejury.evaluation import
|
|
34
|
+
from codejury.evaluation import EvalReport, evaluate, load_cases
|
|
35
|
+
from codejury.infrastructure.cache import VerdictCache
|
|
33
36
|
from codejury.orchestrators.single import SingleOrchestrator
|
|
34
37
|
from codejury.providers.base import Provider
|
|
35
38
|
from codejury.providers.mock import MockProvider
|
|
36
|
-
from codejury.reporting import to_json, to_markdown
|
|
39
|
+
from codejury.reporting import to_json, to_markdown, to_sarif
|
|
37
40
|
from codejury.resources import CAPABILITIES_DIR, GOLDEN_DIR, SUPPRESSIONS_FILE, TASKS_DIR
|
|
38
41
|
from codejury.suppression import filter_results, load_suppressions
|
|
39
42
|
from codejury.integrations.github import build_review, parse_pr_ref, post_review
|
|
@@ -43,7 +46,7 @@ from codejury.sources.repo import RepoSource
|
|
|
43
46
|
from codejury.tasks.base import run_task
|
|
44
47
|
from codejury.tasks.registry import load_tasks
|
|
45
48
|
|
|
46
|
-
_FORMATS = ("text", "markdown", "json")
|
|
49
|
+
_FORMATS = ("text", "markdown", "json", "sarif")
|
|
47
50
|
|
|
48
51
|
|
|
49
52
|
def dry_run() -> AnalysisResult:
|
|
@@ -69,10 +72,14 @@ def audit(
|
|
|
69
72
|
model: str,
|
|
70
73
|
max_tokens: int = 2048,
|
|
71
74
|
strategy: str = "single",
|
|
75
|
+
cache: VerdictCache | None = None,
|
|
72
76
|
) -> list[tuple[str, AnalysisResult]]:
|
|
73
77
|
"""Audit each changed file in `diff_text`, returning (path, result) per file."""
|
|
74
78
|
agents, orchestrator = build_orchestration(strategy, provider=provider, model=model, max_tokens=max_tokens)
|
|
75
|
-
return run_over_source(
|
|
79
|
+
return run_over_source(
|
|
80
|
+
DiffSource(diff_text), capabilities, agents, orchestrator,
|
|
81
|
+
cache=cache, orchestration=orchestration_descriptor(strategy, model, max_tokens),
|
|
82
|
+
)
|
|
76
83
|
|
|
77
84
|
|
|
78
85
|
def scan(
|
|
@@ -87,6 +94,7 @@ def scan(
|
|
|
87
94
|
max_chars: int = 200_000,
|
|
88
95
|
with_callers: bool = False,
|
|
89
96
|
with_callees: bool = False,
|
|
97
|
+
cache: VerdictCache | None = None,
|
|
90
98
|
) -> list[tuple[str, AnalysisResult]]:
|
|
91
99
|
"""Audit every matching file in a directory tree, returning (path, result) per artifact."""
|
|
92
100
|
source = RepoSource(
|
|
@@ -103,7 +111,10 @@ def scan(
|
|
|
103
111
|
file=sys.stderr,
|
|
104
112
|
)
|
|
105
113
|
agents, orchestrator = build_orchestration(strategy, provider=provider, model=model, max_tokens=max_tokens)
|
|
106
|
-
return run_over_artifacts(
|
|
114
|
+
return run_over_artifacts(
|
|
115
|
+
artifacts, capabilities, agents, orchestrator,
|
|
116
|
+
cache=cache, orchestration=orchestration_descriptor(strategy, model, max_tokens),
|
|
117
|
+
)
|
|
107
118
|
|
|
108
119
|
|
|
109
120
|
def _render_dry_run(result: AnalysisResult) -> str:
|
|
@@ -142,7 +153,7 @@ def _render_observation(o: Observation) -> str:
|
|
|
142
153
|
|
|
143
154
|
|
|
144
155
|
def _render_results(fmt: str, results: list[tuple[str, AnalysisResult]]) -> str:
|
|
145
|
-
return {"text": _render_audit, "markdown": to_markdown, "json": to_json}[fmt](results)
|
|
156
|
+
return {"text": _render_audit, "markdown": to_markdown, "json": to_json, "sarif": to_sarif}[fmt](results)
|
|
146
157
|
|
|
147
158
|
|
|
148
159
|
def _maybe_suppress(results: list[tuple[str, AnalysisResult]], enabled: bool) -> list[tuple[str, AnalysisResult]]:
|
|
@@ -189,11 +200,16 @@ def _maybe_post_github(ref: str | None, results: list[tuple[str, AnalysisResult]
|
|
|
189
200
|
print(f"github review failed: {exc}", file=sys.stderr)
|
|
190
201
|
|
|
191
202
|
|
|
192
|
-
def
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
203
|
+
def _render_eval(report: EvalReport) -> str:
|
|
204
|
+
def line(label: str, m) -> str:
|
|
205
|
+
return (
|
|
206
|
+
f"{label:<20} tp={m.tp} fp={m.fp} tn={m.tn} fn={m.fn} "
|
|
207
|
+
f"P={m.precision:.2f} R={m.recall:.2f} F1={m.f1:.2f}"
|
|
208
|
+
)
|
|
209
|
+
|
|
210
|
+
lines = [line(f"overall ({report.overall.total} cases)", report.overall)]
|
|
211
|
+
lines += [line(cap, m) for cap, m in sorted(report.by_capability.items())]
|
|
212
|
+
return "\n".join(lines)
|
|
197
213
|
|
|
198
214
|
|
|
199
215
|
def _read_diff(path: str) -> str:
|
|
@@ -221,6 +237,7 @@ def main(argv: list[str] | None = None) -> int:
|
|
|
221
237
|
audit_p.add_argument("--api-base", default=DEFAULT_API_BASE, help="provider base URL (env: CODEJURY_API_BASE)")
|
|
222
238
|
audit_p.add_argument("--api-key", default=DEFAULT_API_KEY, help="provider API key (env: CODEJURY_API_KEY)")
|
|
223
239
|
audit_p.add_argument("--no-suppress", action="store_true", help="disable the known-noise suppression filter")
|
|
240
|
+
audit_p.add_argument("--no-cache", action="store_true", help="bypass the verdict cache (always re-query the model)")
|
|
224
241
|
audit_p.add_argument("--fail-on", choices=_FAIL_ON, default=None, dest="fail_on", help="exit 1 if a finding at/above this severity is found")
|
|
225
242
|
audit_p.add_argument("--github", default=None, help="post a PR review: owner/repo#number (needs GITHUB_TOKEN)")
|
|
226
243
|
|
|
@@ -244,6 +261,7 @@ def main(argv: list[str] | None = None) -> int:
|
|
|
244
261
|
scan_p.add_argument("--api-base", default=DEFAULT_API_BASE, help="provider base URL (env: CODEJURY_API_BASE)")
|
|
245
262
|
scan_p.add_argument("--api-key", default=DEFAULT_API_KEY, help="provider API key (env: CODEJURY_API_KEY)")
|
|
246
263
|
scan_p.add_argument("--no-suppress", action="store_true", help="disable the known-noise suppression filter")
|
|
264
|
+
scan_p.add_argument("--no-cache", action="store_true", help="bypass the verdict cache (always re-query the model)")
|
|
247
265
|
scan_p.add_argument("--fail-on", choices=_FAIL_ON, default=None, dest="fail_on", help="exit 1 if a finding at/above this severity is found")
|
|
248
266
|
|
|
249
267
|
run_p = sub.add_parser("run", help="run a named task preset against a unified diff")
|
|
@@ -256,9 +274,11 @@ def main(argv: list[str] | None = None) -> int:
|
|
|
256
274
|
run_p.add_argument("--fail-on", choices=_FAIL_ON, default=None, dest="fail_on", help="exit 1 if a finding at/above this severity is found")
|
|
257
275
|
|
|
258
276
|
eval_p = sub.add_parser("eval", help="score golden cases and report precision/recall")
|
|
259
|
-
eval_p.add_argument("--
|
|
277
|
+
eval_p.add_argument("--dataset", default=GOLDEN_DIR, help="golden case YAML directory")
|
|
278
|
+
eval_p.add_argument("--split", default=None, help="only score cases whose 'split' matches (e.g. held-out)")
|
|
260
279
|
eval_p.add_argument("--capabilities", default=CAPABILITIES_DIR, help="capability YAML directory")
|
|
261
280
|
eval_p.add_argument("--provider", choices=PROVIDERS, default="anthropic")
|
|
281
|
+
eval_p.add_argument("--format", choices=("text", "json"), default="text", dest="fmt")
|
|
262
282
|
eval_p.add_argument("--model", default=DEFAULT_MODEL)
|
|
263
283
|
eval_p.add_argument("--api-base", default=DEFAULT_API_BASE, help="provider base URL (env: CODEJURY_API_BASE)")
|
|
264
284
|
eval_p.add_argument("--api-key", default=DEFAULT_API_KEY, help="provider API key (env: CODEJURY_API_KEY)")
|
|
@@ -275,6 +295,7 @@ def main(argv: list[str] | None = None) -> int:
|
|
|
275
295
|
model=args.model,
|
|
276
296
|
max_tokens=args.max_tokens,
|
|
277
297
|
strategy=args.orchestrator,
|
|
298
|
+
cache=None if args.no_cache else VerdictCache(),
|
|
278
299
|
)
|
|
279
300
|
results = _maybe_suppress(results, not args.no_suppress)
|
|
280
301
|
print(_render_results(args.fmt, results))
|
|
@@ -298,6 +319,7 @@ def main(argv: list[str] | None = None) -> int:
|
|
|
298
319
|
max_chars=args.max_chars,
|
|
299
320
|
with_callers=args.callers,
|
|
300
321
|
with_callees=args.callees,
|
|
322
|
+
cache=None if args.no_cache else VerdictCache(),
|
|
301
323
|
)
|
|
302
324
|
results = _maybe_suppress(results, not args.no_suppress)
|
|
303
325
|
print(_render_results(args.fmt, results))
|
|
@@ -317,8 +339,8 @@ def main(argv: list[str] | None = None) -> int:
|
|
|
317
339
|
|
|
318
340
|
if args.command == "eval":
|
|
319
341
|
try:
|
|
320
|
-
|
|
321
|
-
load_cases(args.
|
|
342
|
+
report = evaluate(
|
|
343
|
+
load_cases(args.dataset, split=args.split),
|
|
322
344
|
load_capabilities(args.capabilities),
|
|
323
345
|
provider=make_provider(args.provider, api_key=args.api_key, api_base=args.api_base),
|
|
324
346
|
model=args.model,
|
|
@@ -328,7 +350,7 @@ def main(argv: list[str] | None = None) -> int:
|
|
|
328
350
|
# as one line, not a traceback (audit gets this via the orchestrator).
|
|
329
351
|
print(f"eval failed: {exc}")
|
|
330
352
|
return 1
|
|
331
|
-
print(
|
|
353
|
+
print(json.dumps(report.to_dict(), indent=2) if args.fmt == "json" else _render_eval(report))
|
|
332
354
|
return 0
|
|
333
355
|
|
|
334
356
|
if args.command in (None, "dry-run"):
|
|
@@ -46,7 +46,34 @@ sub_capabilities:
|
|
|
46
46
|
signals: ["admin:admin", "password=admin", "changeme"]
|
|
47
47
|
why_bad: Default credentials are public knowledge and trivially abused
|
|
48
48
|
|
|
49
|
+
transport_security:
|
|
50
|
+
correct_patterns:
|
|
51
|
+
- id: TLS-OK-1
|
|
52
|
+
description: >-
|
|
53
|
+
Leave TLS certificate verification at its secure default -- verify omitted or
|
|
54
|
+
verify=True, the default SSL context, hostname checking on
|
|
55
|
+
signals: ["verify=True", "create_default_context", "requests.get(", "requests.post("]
|
|
56
|
+
why_ok: >-
|
|
57
|
+
The secure default validates the certificate chain and hostname. An https:// call
|
|
58
|
+
that does not disable verification is fine; do not flag it just for making a
|
|
59
|
+
request or for omitting verify.
|
|
60
|
+
|
|
61
|
+
anti_patterns:
|
|
62
|
+
- id: TLS-BAD-1
|
|
63
|
+
cwe: CWE-295
|
|
64
|
+
severity: HIGH
|
|
65
|
+
description: >-
|
|
66
|
+
Disable TLS certificate or hostname verification -- verify=False, CERT_NONE,
|
|
67
|
+
check_hostname=False, or an unverified SSL context
|
|
68
|
+
signals: ["verify=False", "CERT_NONE", "check_hostname = False", "_create_unverified_context"]
|
|
69
|
+
why_bad: An unverified TLS connection is open to a man-in-the-middle despite https://
|
|
70
|
+
example_bad: |
|
|
71
|
+
requests.get("https://api.partner.com/data", verify=False)
|
|
72
|
+
example_good: |
|
|
73
|
+
requests.get("https://api.partner.com/data") # verify defaults to True
|
|
74
|
+
|
|
49
75
|
trigger_signals:
|
|
50
76
|
- dependency manifests and lock files
|
|
51
77
|
- install or bootstrap scripts fetching remote code
|
|
52
78
|
- file permission, bucket ACL, or default credential settings
|
|
79
|
+
- TLS client calls that set verify or build a custom SSL context
|
|
@@ -105,7 +105,67 @@ sub_capabilities:
|
|
|
105
105
|
if not target.is_relative_to(UPLOAD_DIR):
|
|
106
106
|
raise ValueError("path escapes upload dir")
|
|
107
107
|
|
|
108
|
+
ssrf:
|
|
109
|
+
correct_patterns:
|
|
110
|
+
- id: SSRF-OK-1
|
|
111
|
+
description: Validate the request URL's host against an allowlist before fetching it
|
|
112
|
+
signals: ["urlparse(", ".hostname", "ALLOWED", "allowlist"]
|
|
113
|
+
why_ok: An attacker cannot redirect the fetch to an internal target the list omits
|
|
114
|
+
|
|
115
|
+
- id: SSRF-OK-2
|
|
116
|
+
description: >-
|
|
117
|
+
Fetch a URL that is not attacker-controlled -- a constant, a value from trusted
|
|
118
|
+
config, or an operator-supplied argument
|
|
119
|
+
why_ok: >-
|
|
120
|
+
SSRF needs an external attacker to control the destination. A constant URL or one
|
|
121
|
+
from trusted config is not a finding, even though it goes through a fetch call.
|
|
122
|
+
|
|
123
|
+
anti_patterns:
|
|
124
|
+
- id: SSRF-BAD-1
|
|
125
|
+
cwe: CWE-918
|
|
126
|
+
severity: HIGH
|
|
127
|
+
description: >-
|
|
128
|
+
Fetch a URL taken from externally controlled input (HTTP request, form, query, or
|
|
129
|
+
message field) without validating its host against an allowlist. NOT this: a
|
|
130
|
+
constant URL, one from trusted config, or an operator-supplied argument.
|
|
131
|
+
signals: ["requests.get(", "urllib.request.urlopen(", "httpx.", "request.args", "request.json"]
|
|
132
|
+
why_bad: >-
|
|
133
|
+
The server makes the request, so attacker input reaches internal-only targets --
|
|
134
|
+
cloud metadata, localhost admin ports, internal APIs behind the firewall.
|
|
135
|
+
example_bad: |
|
|
136
|
+
requests.get(request.args["url"]).text
|
|
137
|
+
example_good: |
|
|
138
|
+
if urlparse(url).hostname not in ALLOWED_HOSTS:
|
|
139
|
+
raise ValueError("host not allowed")
|
|
140
|
+
requests.get(url).text
|
|
141
|
+
|
|
142
|
+
insecure_deserialization:
|
|
143
|
+
correct_patterns:
|
|
144
|
+
- id: DESER-OK-1
|
|
145
|
+
description: >-
|
|
146
|
+
Parse untrusted input with a data-only parser -- json.loads or yaml.safe_load --
|
|
147
|
+
that cannot instantiate arbitrary objects
|
|
148
|
+
signals: ["json.loads", "yaml.safe_load"]
|
|
149
|
+
why_ok: A data-only parser builds plain structures and has no code-execution path
|
|
150
|
+
|
|
151
|
+
anti_patterns:
|
|
152
|
+
- id: DESER-BAD-1
|
|
153
|
+
cwe: CWE-502
|
|
154
|
+
severity: CRITICAL
|
|
155
|
+
description: >-
|
|
156
|
+
Deserialize externally controlled bytes with an object-constructing deserializer --
|
|
157
|
+
pickle, marshal, yaml.load (unsafe Loader), or jsonpickle. NOT this: a data-only
|
|
158
|
+
parser like json.loads or yaml.safe_load.
|
|
159
|
+
signals: ["pickle.loads", "pickle.load(", "yaml.load(", "marshal.loads", "jsonpickle.decode"]
|
|
160
|
+
why_bad: These reconstruct arbitrary objects, so crafted input runs code on unpickle
|
|
161
|
+
example_bad: |
|
|
162
|
+
pickle.loads(base64.b64decode(request.data))
|
|
163
|
+
example_good: |
|
|
164
|
+
json.loads(request.data)
|
|
165
|
+
|
|
108
166
|
trigger_signals:
|
|
109
167
|
- raw SQL strings or cursor.execute calls appear
|
|
110
168
|
- imports of os, subprocess, or shlex with process execution
|
|
111
169
|
- file paths built from request, form, or query parameters
|
|
170
|
+
- outbound HTTP fetches (requests, urllib, httpx) to a non-constant URL
|
|
171
|
+
- deserialization calls (pickle, yaml.load, marshal) on external input
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
# Adversarial positive: weak password hash hidden behind hashlib.new(variable).
|
|
2
|
+
capability: authn
|
|
3
|
+
vulnerable: true
|
|
4
|
+
expected_verdict: VULNERABLE
|
|
5
|
+
cwe: CWE-916
|
|
6
|
+
source: synthetic
|
|
7
|
+
notes: >
|
|
8
|
+
hashlib.new("md5") is the same weak, unsalted, fast hash as hashlib.md5(), just
|
|
9
|
+
reached through a variable algorithm name. Unsuitable for password storage
|
|
10
|
+
(needs bcrypt/scrypt/argon2). The indirection should not hide it.
|
|
11
|
+
code: |
|
|
12
|
+
def hash_pw(pw):
|
|
13
|
+
algo = "md5"
|
|
14
|
+
return hashlib.new(algo, pw.encode()).hexdigest()
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
capability: business_logic
|
|
2
|
+
vulnerable: true
|
|
3
|
+
expected_verdict: VULNERABLE
|
|
4
|
+
cwe: CWE-602
|
|
5
|
+
source: synthetic
|
|
6
|
+
split: held-out
|
|
7
|
+
notes: >
|
|
8
|
+
Quantity and unit price come straight from the request and are never checked
|
|
9
|
+
server-side. A negative quantity credits the customer; a client-set price lets
|
|
10
|
+
them pay anything. Price must come from the catalog and quantity must be > 0.
|
|
11
|
+
code: |
|
|
12
|
+
def add_to_order(order, item_id, quantity, unit_price):
|
|
13
|
+
order.lines.append((item_id, quantity, unit_price))
|
|
14
|
+
order.total += quantity * unit_price
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
capability: business_logic
|
|
2
|
+
vulnerable: false
|
|
3
|
+
expected_verdict: SECURE
|
|
4
|
+
cwe: ""
|
|
5
|
+
source: synthetic
|
|
6
|
+
notes: >
|
|
7
|
+
Quantity is validated as positive and the price is looked up server-side from
|
|
8
|
+
the catalog, so the client cannot tamper with either.
|
|
9
|
+
code: |
|
|
10
|
+
def add_to_order(order, item_id, quantity):
|
|
11
|
+
if quantity < 1:
|
|
12
|
+
raise ValueError("quantity must be positive")
|
|
13
|
+
unit_price = catalog.price_of(item_id) # trusted server-side price
|
|
14
|
+
order.lines.append((item_id, quantity, unit_price))
|
|
15
|
+
order.total += quantity * unit_price
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
# False-positive-prone negative: a subprocess call driven by a request value
|
|
2
|
+
# looks like command injection, but the value only selects a fixed argv from a
|
|
3
|
+
# table and never reaches a shell.
|
|
4
|
+
capability: input_validation
|
|
5
|
+
vulnerable: false
|
|
6
|
+
expected_verdict: SECURE
|
|
7
|
+
cwe: ""
|
|
8
|
+
source: synthetic
|
|
9
|
+
split: held-out
|
|
10
|
+
notes: >
|
|
11
|
+
`name` only indexes a fixed dict of argument lists (KeyError on anything
|
|
12
|
+
unknown); no user string is interpolated into a command and shell=False.
|
|
13
|
+
Flagging this as command injection is a false positive.
|
|
14
|
+
code: |
|
|
15
|
+
ACTIONS = {
|
|
16
|
+
"start": ["systemctl", "start", "web"],
|
|
17
|
+
"stop": ["systemctl", "stop", "web"],
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
def run_action(name):
|
|
21
|
+
argv = ACTIONS[name]
|
|
22
|
+
subprocess.run(argv, shell=False)
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
capability: data_protection
|
|
2
|
+
vulnerable: true
|
|
3
|
+
expected_verdict: VULNERABLE
|
|
4
|
+
cwe: CWE-312
|
|
5
|
+
source: synthetic
|
|
6
|
+
notes: >
|
|
7
|
+
The full card number and the CVV are written to the database in cleartext.
|
|
8
|
+
The CVV must never be stored at all, and the PAN must be tokenized/encrypted.
|
|
9
|
+
code: |
|
|
10
|
+
def save_card(user_id, card_number, cvv):
|
|
11
|
+
db.execute(
|
|
12
|
+
"INSERT INTO cards (user_id, pan, cvv) VALUES (?, ?, ?)",
|
|
13
|
+
(user_id, card_number, cvv),
|
|
14
|
+
)
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
capability: data_protection
|
|
2
|
+
vulnerable: false
|
|
3
|
+
expected_verdict: SECURE
|
|
4
|
+
cwe: ""
|
|
5
|
+
source: synthetic
|
|
6
|
+
split: held-out
|
|
7
|
+
notes: >
|
|
8
|
+
The PAN is replaced by a vault token before storage; the CVV is never
|
|
9
|
+
persisted. Nothing sensitive reaches the database.
|
|
10
|
+
code: |
|
|
11
|
+
def save_card(user_id, card_number):
|
|
12
|
+
token = vault.tokenize(card_number) # store a token, never the PAN
|
|
13
|
+
db.execute(
|
|
14
|
+
"INSERT INTO cards (user_id, token) VALUES (?, ?)",
|
|
15
|
+
(user_id, token),
|
|
16
|
+
)
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
capability: dependency_config
|
|
2
|
+
vulnerable: true
|
|
3
|
+
expected_verdict: VULNERABLE
|
|
4
|
+
cwe: CWE-295
|
|
5
|
+
source: synthetic
|
|
6
|
+
notes: >
|
|
7
|
+
TLS certificate verification is disabled, so the connection is open to a
|
|
8
|
+
man-in-the-middle despite the https:// scheme.
|
|
9
|
+
code: |
|
|
10
|
+
def fetch_partner_data():
|
|
11
|
+
return requests.get("https://api.partner.com/data", verify=False).json()
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
capability: dependency_config
|
|
2
|
+
vulnerable: false
|
|
3
|
+
expected_verdict: SECURE
|
|
4
|
+
cwe: ""
|
|
5
|
+
source: synthetic
|
|
6
|
+
notes: >
|
|
7
|
+
Certificate verification is left at its secure default (True) and a timeout is
|
|
8
|
+
set. Nothing weakens the TLS configuration.
|
|
9
|
+
code: |
|
|
10
|
+
def fetch_partner_data():
|
|
11
|
+
return requests.get("https://api.partner.com/data", timeout=10).json()
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
# False-positive-prone negative: deserializing request data looks dangerous,
|
|
2
|
+
# but json.loads only builds data -- it cannot execute code like pickle.
|
|
3
|
+
capability: input_validation
|
|
4
|
+
vulnerable: false
|
|
5
|
+
expected_verdict: SECURE
|
|
6
|
+
cwe: ""
|
|
7
|
+
source: synthetic
|
|
8
|
+
notes: >
|
|
9
|
+
json.loads parses untrusted input into plain data structures with no code
|
|
10
|
+
execution path. Treating it like insecure deserialization is a false positive.
|
|
11
|
+
code: |
|
|
12
|
+
def load_state(request):
|
|
13
|
+
return json.loads(request.data)
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
capability: input_validation
|
|
2
|
+
vulnerable: true
|
|
3
|
+
expected_verdict: VULNERABLE
|
|
4
|
+
cwe: CWE-502
|
|
5
|
+
source: synthetic
|
|
6
|
+
notes: >
|
|
7
|
+
pickle.loads on attacker-supplied bytes executes arbitrary code during
|
|
8
|
+
unpickling -- remote code execution.
|
|
9
|
+
code: |
|
|
10
|
+
def load_state(request):
|
|
11
|
+
blob = base64.b64decode(request.data)
|
|
12
|
+
return pickle.loads(blob)
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
capability: error_logging
|
|
2
|
+
vulnerable: false
|
|
3
|
+
expected_verdict: SECURE
|
|
4
|
+
cwe: ""
|
|
5
|
+
source: synthetic
|
|
6
|
+
notes: >
|
|
7
|
+
Only the non-sensitive username is logged; the password never enters the log.
|
|
8
|
+
code: |
|
|
9
|
+
def authenticate(username, password):
|
|
10
|
+
logger.info("login attempt user=%s", username)
|
|
11
|
+
return backend.check(username, password)
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
capability: error_logging
|
|
2
|
+
vulnerable: true
|
|
3
|
+
expected_verdict: VULNERABLE
|
|
4
|
+
cwe: CWE-532
|
|
5
|
+
source: synthetic
|
|
6
|
+
notes: >
|
|
7
|
+
The cleartext password is written to the log. Anyone with log access (or a log
|
|
8
|
+
shipper, or an aggregation backend) now has the credential.
|
|
9
|
+
code: |
|
|
10
|
+
def authenticate(username, password):
|
|
11
|
+
logger.info("login attempt user=%s password=%s", username, password)
|
|
12
|
+
return backend.check(username, password)
|