codejury 0.6.0__tar.gz → 0.8.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {codejury-0.6.0 → codejury-0.8.0}/PKG-INFO +14 -7
- {codejury-0.6.0 → codejury-0.8.0}/README.md +13 -6
- codejury-0.8.0/codejury/analysis/__init__.py +7 -0
- codejury-0.8.0/codejury/analysis/provenance.py +208 -0
- codejury-0.8.0/codejury/analysis/taint.py +273 -0
- {codejury-0.6.0 → codejury-0.8.0}/codejury/assembly.py +4 -1
- {codejury-0.6.0 → codejury-0.8.0}/codejury/cli.py +2 -0
- codejury-0.8.0/codejury/data/capabilities/excessive_agency.yaml +64 -0
- codejury-0.8.0/codejury/data/capabilities/insecure_output_handling.yaml +65 -0
- codejury-0.8.0/codejury/data/capabilities/prompt_injection.yaml +67 -0
- codejury-0.8.0/codejury/data/golden/ag_allowlist_safe.yaml +16 -0
- codejury-0.8.0/codejury/data/golden/ag_arbitrary_tool_vuln.yaml +13 -0
- codejury-0.8.0/codejury/data/golden/ag_destructive_no_confirm_vuln.yaml +14 -0
- codejury-0.8.0/codejury/data/golden/ag_human_approval_safe.yaml +13 -0
- codejury-0.8.0/codejury/data/golden/ioh_escaped_output_safe.yaml +12 -0
- codejury-0.8.0/codejury/data/golden/ioh_exec_output_vuln.yaml +12 -0
- codejury-0.8.0/codejury/data/golden/ioh_innerhtml_output_vuln.yaml +13 -0
- codejury-0.8.0/codejury/data/golden/ioh_schema_validated_safe.yaml +13 -0
- codejury-0.8.0/codejury/data/golden/pi_delimited_data_safe.yaml +15 -0
- codejury-0.8.0/codejury/data/golden/pi_indirect_rag_vuln.yaml +16 -0
- codejury-0.8.0/codejury/data/golden/pi_system_concat_vuln.yaml +12 -0
- codejury-0.8.0/codejury/data/golden/pi_user_role_safe.yaml +14 -0
- codejury-0.8.0/codejury/data/taint.yaml +109 -0
- {codejury-0.6.0 → codejury-0.8.0}/codejury/evaluation.py +10 -3
- codejury-0.8.0/codejury/orchestrators/taint_gate.py +75 -0
- {codejury-0.6.0 → codejury-0.8.0}/codejury/resources.py +1 -0
- {codejury-0.6.0 → codejury-0.8.0}/codejury.egg-info/PKG-INFO +14 -7
- {codejury-0.6.0 → codejury-0.8.0}/codejury.egg-info/SOURCES.txt +24 -0
- {codejury-0.6.0 → codejury-0.8.0}/pyproject.toml +1 -1
- {codejury-0.6.0 → codejury-0.8.0}/tests/test_evaluation.py +14 -0
- codejury-0.8.0/tests/test_provenance.py +105 -0
- codejury-0.8.0/tests/test_taint.py +97 -0
- codejury-0.8.0/tests/test_taint_crossfile.py +68 -0
- codejury-0.8.0/tests/test_taint_gate.py +89 -0
- {codejury-0.6.0 → codejury-0.8.0}/LICENSE +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/codejury/__init__.py +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/codejury/agents/__init__.py +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/codejury/agents/base.py +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/codejury/agents/debate.py +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/codejury/agents/mock.py +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/codejury/agents/parsing.py +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/codejury/agents/refuter.py +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/codejury/agents/verifier.py +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/codejury/data/capabilities/authentication.yaml +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/codejury/data/capabilities/authorization.yaml +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/codejury/data/capabilities/business_logic.yaml +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/codejury/data/capabilities/crypto.yaml +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/codejury/data/capabilities/data_protection.yaml +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/codejury/data/capabilities/dependency_config.yaml +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/codejury/data/capabilities/error_logging.yaml +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/codejury/data/capabilities/input_validation.yaml +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/codejury/data/capabilities/output_encoding.yaml +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/codejury/data/capabilities/secrets.yaml +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/codejury/data/capabilities/session.yaml +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/codejury/data/golden/authn_bcrypt_password.yaml +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/codejury/data/golden/authn_jwt_noverify_vuln.yaml +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/codejury/data/golden/authn_jwt_verified_safe.yaml +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/codejury/data/golden/authn_sha256_checksum_safe.yaml +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/codejury/data/golden/authn_sha256_password.yaml +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/codejury/data/golden/authn_weak_hash_indirect_vuln.yaml +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/codejury/data/golden/authz_idor_vuln.yaml +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/codejury/data/golden/authz_owner_safe.yaml +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/codejury/data/golden/business_logic_price_tamper_vuln.yaml +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/codejury/data/golden/business_logic_server_checked_safe.yaml +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/codejury/data/golden/cmdi_fixed_argv_safe.yaml +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/codejury/data/golden/cmdi_ossystem_vuln.yaml +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/codejury/data/golden/cmdi_subprocess_safe.yaml +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/codejury/data/golden/crypto_aesgcm_safe.yaml +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/codejury/data/golden/crypto_ecb_vuln.yaml +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/codejury/data/golden/data_protection_plaintext_pii_vuln.yaml +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/codejury/data/golden/data_protection_tokenized_safe.yaml +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/codejury/data/golden/dependency_config_tls_verify_off_vuln.yaml +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/codejury/data/golden/dependency_config_tls_verify_on_safe.yaml +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/codejury/data/golden/deserialize_json_safe.yaml +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/codejury/data/golden/deserialize_pickle_vuln.yaml +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/codejury/data/golden/error_logging_redacted_safe.yaml +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/codejury/data/golden/error_logging_secret_leak_vuln.yaml +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/codejury/data/golden/literal_eval_safe.yaml +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/codejury/data/golden/path_basename_safe.yaml +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/codejury/data/golden/path_contained_safe.yaml +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/codejury/data/golden/path_traversal_vuln.yaml +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/codejury/data/golden/secrets_env_safe.yaml +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/codejury/data/golden/secrets_hardcoded_vuln.yaml +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/codejury/data/golden/session_fixation_vuln.yaml +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/codejury/data/golden/session_secure_cookie_safe.yaml +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/codejury/data/golden/sql_constant_concat_safe.yaml +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/codejury/data/golden/sqli_format_vuln.yaml +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/codejury/data/golden/sqli_fstring_query.yaml +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/codejury/data/golden/sqli_indirect_var_vuln.yaml +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/codejury/data/golden/sqli_parameterized_query.yaml +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/codejury/data/golden/ssrf_allowlist_safe.yaml +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/codejury/data/golden/ssrf_constant_url_safe.yaml +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/codejury/data/golden/ssrf_substring_allowlist_bypass_vuln.yaml +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/codejury/data/golden/ssrf_user_url_vuln.yaml +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/codejury/data/golden/xfile_idor_no_check_vuln.yaml +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/codejury/data/golden/xfile_idor_owner_checked_safe.yaml +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/codejury/data/golden/xfile_path_sanitized_safe.yaml +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/codejury/data/golden/xfile_path_tainted_vuln.yaml +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/codejury/data/golden/xss_innerhtml_constant_safe.yaml +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/codejury/data/golden/xss_innerhtml_vuln.yaml +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/codejury/data/golden/xss_textcontent_safe.yaml +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/codejury/data/suppressions.yaml +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/codejury/data/tasks/audit_diff_debate.yaml +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/codejury/data/tasks/quick_scan_single.yaml +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/codejury/domain/__init__.py +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/codejury/domain/artifact.py +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/codejury/domain/capability.py +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/codejury/domain/context.py +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/codejury/domain/observation.py +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/codejury/domain/result.py +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/codejury/infrastructure/__init__.py +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/codejury/infrastructure/cache.py +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/codejury/infrastructure/json_parse.py +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/codejury/integrations/__init__.py +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/codejury/integrations/github.py +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/codejury/orchestrators/__init__.py +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/codejury/orchestrators/base.py +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/codejury/orchestrators/challenge.py +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/codejury/orchestrators/debate.py +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/codejury/orchestrators/pipeline.py +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/codejury/orchestrators/reflexion.py +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/codejury/orchestrators/single.py +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/codejury/providers/__init__.py +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/codejury/providers/anthropic.py +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/codejury/providers/base.py +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/codejury/providers/litellm.py +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/codejury/providers/mock.py +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/codejury/providers/openai.py +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/codejury/providers/openai_format.py +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/codejury/providers/retry.py +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/codejury/reporting.py +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/codejury/sources/__init__.py +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/codejury/sources/base.py +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/codejury/sources/callers.py +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/codejury/sources/chunker.py +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/codejury/sources/diff.py +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/codejury/sources/function.py +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/codejury/sources/mock.py +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/codejury/sources/repo.py +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/codejury/suppression.py +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/codejury/tasks/__init__.py +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/codejury/tasks/base.py +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/codejury/tasks/registry.py +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/codejury.egg-info/dependency_links.txt +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/codejury.egg-info/entry_points.txt +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/codejury.egg-info/requires.txt +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/codejury.egg-info/top_level.txt +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/setup.cfg +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/tests/test_anthropic_provider.py +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/tests/test_assembly.py +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/tests/test_audit_pipeline.py +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/tests/test_cache.py +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/tests/test_callers.py +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/tests/test_capability.py +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/tests/test_challenge.py +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/tests/test_cli_audit.py +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/tests/test_context.py +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/tests/test_debate_agents.py +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/tests/test_debate_orchestrator.py +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/tests/test_diff_source.py +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/tests/test_function_source.py +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/tests/test_integrations.py +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/tests/test_json_parse.py +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/tests/test_litellm_provider.py +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/tests/test_openai_provider.py +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/tests/test_orchestrator.py +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/tests/test_pipeline_orchestrator.py +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/tests/test_reflexion_orchestrator.py +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/tests/test_repo_source.py +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/tests/test_reporting.py +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/tests/test_retry_provider.py +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/tests/test_sarif.py +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/tests/test_suppression.py +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/tests/test_tasks.py +0 -0
- {codejury-0.6.0 → codejury-0.8.0}/tests/test_verifier.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: codejury
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.8.0
|
|
4
4
|
Summary: General-purpose Application Security AI audit framework -- five-layer architecture, capabilities as first-class data
|
|
5
5
|
Author: AISecLabs
|
|
6
6
|
License-Expression: MIT
|
|
@@ -40,7 +40,7 @@ Finder / Challenger / Judge -- that argue and converge on a verdict.
|
|
|
40
40
|
|
|
41
41
|
Why it is built this way:
|
|
42
42
|
|
|
43
|
-
- **Knowledge is data.** Each
|
|
43
|
+
- **Knowledge is data.** Each OWASP ASVS area (and now OWASP LLM Top 10 areas) is a YAML capability
|
|
44
44
|
(safe patterns + anti-patterns, with CWE and examples) -- versioned, reviewable
|
|
45
45
|
in a PR, and editable by non-engineers. The framework core stays small.
|
|
46
46
|
- **Verdicts, not just alerts.** Every capability yields `SECURE` / `VULNERABLE`
|
|
@@ -83,10 +83,16 @@ git diff | codejury audit --provider anthropic
|
|
|
83
83
|
| `codejury run <task>` | Run a named task preset (see [Tasks](#tasks)). |
|
|
84
84
|
| `codejury eval` | Score the golden cases; report precision / recall / F1, overall and per capability. |
|
|
85
85
|
|
|
86
|
-
Shared flags: `--orchestrator {single,pipeline,debate,reflexion,challenge}`,
|
|
86
|
+
Shared flags: `--orchestrator {single,pipeline,debate,reflexion,challenge,taint}`,
|
|
87
87
|
`--provider {anthropic,openai,litellm}`, `--model`,
|
|
88
88
|
`--format {text,markdown,json,sarif}`.
|
|
89
89
|
|
|
90
|
+
`--orchestrator taint` adds a data-flow gate: after the verifier rules, it clears
|
|
91
|
+
an `input_validation` finding only when static provenance analysis proves the
|
|
92
|
+
value reaching the sink is constant, sanitized, or trusted (using cross-file
|
|
93
|
+
caller/callee context). It downgrades only on positive proof, so it removes false
|
|
94
|
+
positives without dropping real findings.
|
|
95
|
+
|
|
90
96
|
`--format sarif` emits a SARIF 2.1.0 log (validates against the official schema)
|
|
91
97
|
for CI and security dashboards: each problem with a code location becomes a
|
|
92
98
|
result carrying its capability (as the rule id), CWE, and a precise location.
|
|
@@ -150,13 +156,14 @@ capabilities: [authn, input_validation, secrets] # omit to check all
|
|
|
150
156
|
|
|
151
157
|
## Capabilities
|
|
152
158
|
|
|
153
|
-
The library covers all 11 OWASP ASVS areas
|
|
154
|
-
`codejury/data/capabilities/`. These ids are
|
|
155
|
-
`capabilities:` accept:
|
|
159
|
+
The library covers all 11 OWASP ASVS areas plus a growing set of OWASP LLM Top 10
|
|
160
|
+
capabilities, one YAML each under `codejury/data/capabilities/`. These ids are
|
|
161
|
+
what `--only` and a task's `capabilities:` accept:
|
|
156
162
|
|
|
157
163
|
`authn` · `authz` · `session` · `input_validation` · `output_encoding` ·
|
|
158
164
|
`crypto` · `secrets` · `data_protection` · `error_logging` ·
|
|
159
|
-
`business_logic` · `dependency_config`
|
|
165
|
+
`business_logic` · `dependency_config` · `prompt_injection` ·
|
|
166
|
+
`insecure_output_handling` · `excessive_agency`
|
|
160
167
|
|
|
161
168
|
To tune for your codebase, edit these files (add patterns / sharpen wording) --
|
|
162
169
|
no code change needed.
|
|
@@ -10,7 +10,7 @@ Finder / Challenger / Judge -- that argue and converge on a verdict.
|
|
|
10
10
|
|
|
11
11
|
Why it is built this way:
|
|
12
12
|
|
|
13
|
-
- **Knowledge is data.** Each
|
|
13
|
+
- **Knowledge is data.** Each OWASP ASVS area (and now OWASP LLM Top 10 areas) is a YAML capability
|
|
14
14
|
(safe patterns + anti-patterns, with CWE and examples) -- versioned, reviewable
|
|
15
15
|
in a PR, and editable by non-engineers. The framework core stays small.
|
|
16
16
|
- **Verdicts, not just alerts.** Every capability yields `SECURE` / `VULNERABLE`
|
|
@@ -53,10 +53,16 @@ git diff | codejury audit --provider anthropic
|
|
|
53
53
|
| `codejury run <task>` | Run a named task preset (see [Tasks](#tasks)). |
|
|
54
54
|
| `codejury eval` | Score the golden cases; report precision / recall / F1, overall and per capability. |
|
|
55
55
|
|
|
56
|
-
Shared flags: `--orchestrator {single,pipeline,debate,reflexion,challenge}`,
|
|
56
|
+
Shared flags: `--orchestrator {single,pipeline,debate,reflexion,challenge,taint}`,
|
|
57
57
|
`--provider {anthropic,openai,litellm}`, `--model`,
|
|
58
58
|
`--format {text,markdown,json,sarif}`.
|
|
59
59
|
|
|
60
|
+
`--orchestrator taint` adds a data-flow gate: after the verifier rules, it clears
|
|
61
|
+
an `input_validation` finding only when static provenance analysis proves the
|
|
62
|
+
value reaching the sink is constant, sanitized, or trusted (using cross-file
|
|
63
|
+
caller/callee context). It downgrades only on positive proof, so it removes false
|
|
64
|
+
positives without dropping real findings.
|
|
65
|
+
|
|
60
66
|
`--format sarif` emits a SARIF 2.1.0 log (validates against the official schema)
|
|
61
67
|
for CI and security dashboards: each problem with a code location becomes a
|
|
62
68
|
result carrying its capability (as the rule id), CWE, and a precise location.
|
|
@@ -120,13 +126,14 @@ capabilities: [authn, input_validation, secrets] # omit to check all
|
|
|
120
126
|
|
|
121
127
|
## Capabilities
|
|
122
128
|
|
|
123
|
-
The library covers all 11 OWASP ASVS areas
|
|
124
|
-
`codejury/data/capabilities/`. These ids are
|
|
125
|
-
`capabilities:` accept:
|
|
129
|
+
The library covers all 11 OWASP ASVS areas plus a growing set of OWASP LLM Top 10
|
|
130
|
+
capabilities, one YAML each under `codejury/data/capabilities/`. These ids are
|
|
131
|
+
what `--only` and a task's `capabilities:` accept:
|
|
126
132
|
|
|
127
133
|
`authn` · `authz` · `session` · `input_validation` · `output_encoding` ·
|
|
128
134
|
`crypto` · `secrets` · `data_protection` · `error_logging` ·
|
|
129
|
-
`business_logic` · `dependency_config`
|
|
135
|
+
`business_logic` · `dependency_config` · `prompt_injection` ·
|
|
136
|
+
`insecure_output_handling` · `excessive_agency`
|
|
130
137
|
|
|
131
138
|
To tune for your codebase, edit these files (add patterns / sharpen wording) --
|
|
132
139
|
no code change needed.
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
"""Static analysis for provenance (P1).
|
|
2
|
+
|
|
3
|
+
The code-graph / data-flow engine that gives the verifier provenance -- whether a
|
|
4
|
+
value reaching a sink is attacker-controlled, sanitized, or a trusted constant.
|
|
5
|
+
This is the real fix for the taint precision floor that single-file LLM review
|
|
6
|
+
cannot reach (see ROADMAP P1). Python / AST based to start.
|
|
7
|
+
"""
|
|
@@ -0,0 +1,208 @@
|
|
|
1
|
+
"""Intra-procedural value-origin tracing (P1-01).
|
|
2
|
+
|
|
3
|
+
Classify where the value of an expression inside a function comes from, so a
|
|
4
|
+
later layer (P1-03) can decide whether it is attacker-controlled. The output is
|
|
5
|
+
an ``Origin``: the parameters, callees, attribute/subscript roots, free names,
|
|
6
|
+
and literals a value derives from.
|
|
7
|
+
|
|
8
|
+
The analysis is deliberately conservative and flow-insensitive: a name assigned
|
|
9
|
+
more than once contributes the union of all its right-hand sides, so a possible
|
|
10
|
+
source is never dropped (recall over precision). A value built only from literals
|
|
11
|
+
is reported as ``is_constant`` -- the signal that distinguishes, for example,
|
|
12
|
+
SQL concatenated from constants (safe) from SQL concatenated from a parameter.
|
|
13
|
+
|
|
14
|
+
This module finds where a value comes from; it does not decide what is a source
|
|
15
|
+
or a sanitizer (that is data, P1-02) nor follow a call into another file (P1-03).
|
|
16
|
+
Python / AST only.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
from __future__ import annotations
|
|
20
|
+
|
|
21
|
+
import ast
|
|
22
|
+
from dataclasses import dataclass
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
@dataclass(frozen=True)
|
|
26
|
+
class Origin:
|
|
27
|
+
params: frozenset[str] = frozenset() # parameters the value derives from
|
|
28
|
+
calls: frozenset[str] = frozenset() # callee names whose return it derives from
|
|
29
|
+
attrs: frozenset[str] = frozenset() # attribute/subscript roots, dotted (e.g. "request.args")
|
|
30
|
+
globals_: frozenset[str] = frozenset() # free names: module globals, imports, builtins
|
|
31
|
+
has_literal: bool = False # a literal contributes to the value
|
|
32
|
+
unknown: bool = False # an unmodelled expression contributes (be cautious)
|
|
33
|
+
|
|
34
|
+
def merge(self, other: Origin) -> Origin:
|
|
35
|
+
return Origin(
|
|
36
|
+
params=self.params | other.params,
|
|
37
|
+
calls=self.calls | other.calls,
|
|
38
|
+
attrs=self.attrs | other.attrs,
|
|
39
|
+
globals_=self.globals_ | other.globals_,
|
|
40
|
+
has_literal=self.has_literal or other.has_literal,
|
|
41
|
+
unknown=self.unknown or other.unknown,
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
@property
|
|
45
|
+
def is_constant(self) -> bool:
|
|
46
|
+
"""True when the value is built only from literals -- no param, call, attr,
|
|
47
|
+
free name, or unmodelled expression contributes."""
|
|
48
|
+
return not (self.params or self.calls or self.attrs or self.globals_ or self.unknown)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
_LITERAL = Origin(has_literal=True)
|
|
52
|
+
_UNKNOWN = Origin(unknown=True)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def parse_function(source: str, name: str) -> ast.FunctionDef | ast.AsyncFunctionDef | None:
|
|
56
|
+
"""Find the first function named ``name`` in ``source`` (any nesting)."""
|
|
57
|
+
try:
|
|
58
|
+
tree = ast.parse(source)
|
|
59
|
+
except SyntaxError:
|
|
60
|
+
return None
|
|
61
|
+
for node in ast.walk(tree):
|
|
62
|
+
if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)) and node.name == name:
|
|
63
|
+
return node
|
|
64
|
+
return None
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def find_calls(scope: ast.AST, callee: str) -> list[ast.Call]:
|
|
68
|
+
"""Every call within ``scope`` whose function is named ``callee`` -- matching a
|
|
69
|
+
bare name (``open``) or the final attribute (``execute`` in ``cur.execute``)."""
|
|
70
|
+
return [node for node in ast.walk(scope) if isinstance(node, ast.Call) and _call_name(node) == callee]
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def trace_value(func: ast.FunctionDef | ast.AsyncFunctionDef, expr: ast.AST) -> Origin:
|
|
74
|
+
"""Trace where ``expr`` (an expression inside ``func``) gets its value from."""
|
|
75
|
+
return _classify(expr, _params(func), _assignments(func), frozenset())
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def parameters(func: ast.FunctionDef | ast.AsyncFunctionDef) -> set[str]:
|
|
79
|
+
"""The parameter names of ``func`` (positional, keyword, *args, **kwargs)."""
|
|
80
|
+
return _params(func)
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def assignments(func: ast.FunctionDef | ast.AsyncFunctionDef) -> dict[str, list[ast.AST]]:
|
|
84
|
+
"""Map each assigned local name to the right-hand sides it is assigned (union)."""
|
|
85
|
+
return _assignments(func)
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def callee(call: ast.Call) -> tuple[str | None, str | None]:
|
|
89
|
+
"""The (dotted, bare) callee of a call: ("json.loads", "loads") or ("open", "open")."""
|
|
90
|
+
return _dotted(call.func), _call_name(call)
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def access_path(node: ast.AST) -> str | None:
|
|
94
|
+
"""Dotted access chain with subscripts collapsed: request.args["x"] -> request.args."""
|
|
95
|
+
return _dotted(node)
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def access_root(node: ast.AST) -> str | None:
|
|
99
|
+
"""Leftmost name of an attribute/subscript chain: request.args["x"] -> request."""
|
|
100
|
+
return _root_name(node)
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def _classify(expr: ast.AST, params: set[str], assigns: dict[str, list[ast.AST]], seen: frozenset[str]) -> Origin:
|
|
104
|
+
if isinstance(expr, ast.Constant):
|
|
105
|
+
return _LITERAL
|
|
106
|
+
if isinstance(expr, ast.JoinedStr): # f-string: literal parts + interpolated exprs
|
|
107
|
+
origin = _LITERAL
|
|
108
|
+
for value in expr.values:
|
|
109
|
+
if isinstance(value, ast.FormattedValue):
|
|
110
|
+
origin = origin.merge(_classify(value.value, params, assigns, seen))
|
|
111
|
+
return origin
|
|
112
|
+
if isinstance(expr, ast.BinOp):
|
|
113
|
+
return _classify(expr.left, params, assigns, seen).merge(_classify(expr.right, params, assigns, seen))
|
|
114
|
+
if isinstance(expr, (ast.BoolOp,)):
|
|
115
|
+
return _merge_all(expr.values, params, assigns, seen)
|
|
116
|
+
if isinstance(expr, ast.IfExp): # value is one branch or the other; the test does not flow in
|
|
117
|
+
return _classify(expr.body, params, assigns, seen).merge(_classify(expr.orelse, params, assigns, seen))
|
|
118
|
+
if isinstance(expr, (ast.List, ast.Tuple, ast.Set)):
|
|
119
|
+
return _merge_all(expr.elts, params, assigns, seen)
|
|
120
|
+
if isinstance(expr, ast.Call):
|
|
121
|
+
# the return value's taint depends on the callee's semantics, which P1-03
|
|
122
|
+
# decides with the sanitizer/propagator catalog; here we just name the callee.
|
|
123
|
+
name = _call_name(expr)
|
|
124
|
+
return Origin(calls=frozenset({name})) if name else _UNKNOWN
|
|
125
|
+
if isinstance(expr, (ast.Attribute, ast.Subscript)):
|
|
126
|
+
dotted = _dotted(expr)
|
|
127
|
+
origin = Origin(attrs=frozenset({dotted})) if dotted else _UNKNOWN
|
|
128
|
+
root = _root_name(expr)
|
|
129
|
+
if root in params: # e.g. request.args[...] where `request` is a parameter
|
|
130
|
+
origin = origin.merge(Origin(params=frozenset({root})))
|
|
131
|
+
return origin
|
|
132
|
+
if isinstance(expr, ast.Name):
|
|
133
|
+
if expr.id in seen: # assignment cycle -- stop
|
|
134
|
+
return Origin()
|
|
135
|
+
if expr.id in params:
|
|
136
|
+
return Origin(params=frozenset({expr.id}))
|
|
137
|
+
if expr.id in assigns:
|
|
138
|
+
return _merge_all(assigns[expr.id], params, assigns, seen | {expr.id})
|
|
139
|
+
return Origin(globals_=frozenset({expr.id})) # module global, import, or builtin
|
|
140
|
+
return _UNKNOWN
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def _merge_all(exprs: list[ast.AST], params, assigns, seen) -> Origin:
|
|
144
|
+
origin = Origin()
|
|
145
|
+
for e in exprs:
|
|
146
|
+
origin = origin.merge(_classify(e, params, assigns, seen))
|
|
147
|
+
return origin
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
def _params(func: ast.AST) -> set[str]:
|
|
151
|
+
a = getattr(func, "args", None)
|
|
152
|
+
if a is None: # a module-level scope has no parameters
|
|
153
|
+
return set()
|
|
154
|
+
names = {arg.arg for arg in (*a.posonlyargs, *a.args, *a.kwonlyargs)}
|
|
155
|
+
if a.vararg:
|
|
156
|
+
names.add(a.vararg.arg)
|
|
157
|
+
if a.kwarg:
|
|
158
|
+
names.add(a.kwarg.arg)
|
|
159
|
+
return names
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
def _assignments(func: ast.FunctionDef | ast.AsyncFunctionDef) -> dict[str, list[ast.AST]]:
|
|
163
|
+
out: dict[str, list[ast.AST]] = {}
|
|
164
|
+
for node in ast.walk(func):
|
|
165
|
+
if isinstance(node, ast.Assign):
|
|
166
|
+
for target in node.targets:
|
|
167
|
+
for name in _target_names(target):
|
|
168
|
+
out.setdefault(name, []).append(node.value)
|
|
169
|
+
elif isinstance(node, ast.AnnAssign) and node.value is not None and isinstance(node.target, ast.Name):
|
|
170
|
+
out.setdefault(node.target.id, []).append(node.value)
|
|
171
|
+
elif isinstance(node, ast.AugAssign) and isinstance(node.target, ast.Name):
|
|
172
|
+
out.setdefault(node.target.id, []).append(node.value)
|
|
173
|
+
return out
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
def _target_names(target: ast.AST) -> list[str]:
|
|
177
|
+
if isinstance(target, ast.Name):
|
|
178
|
+
return [target.id]
|
|
179
|
+
if isinstance(target, (ast.Tuple, ast.List)):
|
|
180
|
+
return [name for elt in target.elts for name in _target_names(elt)]
|
|
181
|
+
return []
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
def _call_name(call: ast.Call) -> str | None:
|
|
185
|
+
func = call.func
|
|
186
|
+
if isinstance(func, ast.Name):
|
|
187
|
+
return func.id
|
|
188
|
+
if isinstance(func, ast.Attribute):
|
|
189
|
+
return func.attr
|
|
190
|
+
return None
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
def _dotted(node: ast.AST) -> str | None:
|
|
194
|
+
"""Dotted access chain, with subscripts collapsed: request.args["x"] -> request.args."""
|
|
195
|
+
if isinstance(node, ast.Name):
|
|
196
|
+
return node.id
|
|
197
|
+
if isinstance(node, ast.Attribute):
|
|
198
|
+
base = _dotted(node.value)
|
|
199
|
+
return f"{base}.{node.attr}" if base else None
|
|
200
|
+
if isinstance(node, ast.Subscript):
|
|
201
|
+
return _dotted(node.value)
|
|
202
|
+
return None
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
def _root_name(node: ast.AST) -> str | None:
|
|
206
|
+
while isinstance(node, (ast.Attribute, ast.Subscript)):
|
|
207
|
+
node = node.value
|
|
208
|
+
return node.id if isinstance(node, ast.Name) else None
|
|
@@ -0,0 +1,273 @@
|
|
|
1
|
+
"""Taint classification (P1-03): turn provenance into a taint verdict.
|
|
2
|
+
|
|
3
|
+
Walks a value expression like P1-01's tracer, but consults the taint vocabulary
|
|
4
|
+
(P1-02) at every call and access: a known source makes a value EXTERNAL, a known
|
|
5
|
+
sanitizer makes it SANITIZED (taint stops), a propagator carries taint through to
|
|
6
|
+
the result, and a trusted origin or a literal is clean.
|
|
7
|
+
|
|
8
|
+
The point is to let a later layer (P1-04) downgrade a taint finding only when the
|
|
9
|
+
value is *provably* not attacker-controlled -- ``classification in SAFE`` -- so
|
|
10
|
+
recall is preserved: anything uncertain is UNKNOWN or PARAM, never quietly safe.
|
|
11
|
+
|
|
12
|
+
Two documented precision leans: a bare module-global name (e.g. ``STATIC_DIR``)
|
|
13
|
+
is treated as TRUSTED (module-level names are conventionally constants), and an
|
|
14
|
+
unknown attribute access (e.g. ``self.x``) is UNKNOWN rather than safe. These are
|
|
15
|
+
revisited against real repositories in P1-05.
|
|
16
|
+
|
|
17
|
+
This layer is intra-procedural: a value that depends on a parameter returns
|
|
18
|
+
PARAM, for the cross-file caller hop (next) to resolve.
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
from __future__ import annotations
|
|
22
|
+
|
|
23
|
+
import ast
|
|
24
|
+
from dataclasses import dataclass
|
|
25
|
+
from enum import Enum
|
|
26
|
+
from pathlib import Path
|
|
27
|
+
|
|
28
|
+
import yaml
|
|
29
|
+
|
|
30
|
+
from codejury.analysis.provenance import (
|
|
31
|
+
access_path,
|
|
32
|
+
access_root,
|
|
33
|
+
assignments,
|
|
34
|
+
callee,
|
|
35
|
+
find_calls,
|
|
36
|
+
parameters,
|
|
37
|
+
)
|
|
38
|
+
from codejury.resources import TAINT_FILE
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class Taint(str, Enum):
|
|
42
|
+
EXTERNAL = "external" # derives from an attacker source, not sanitized
|
|
43
|
+
UNKNOWN = "unknown" # an unknown call / access -- cannot prove either way
|
|
44
|
+
PARAM = "param" # depends on a parameter; resolve at the call site (cross-file)
|
|
45
|
+
SANITIZED = "sanitized" # had an external component, but a sanitizer neutralized it
|
|
46
|
+
TRUSTED = "trusted" # operator/config/global origin
|
|
47
|
+
CONSTANT = "constant" # built only from literals
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
# Provably-not-attacker-controlled: the only classes P1-04 may downgrade on.
|
|
51
|
+
SAFE = frozenset({Taint.CONSTANT, Taint.SANITIZED, Taint.TRUSTED})
|
|
52
|
+
|
|
53
|
+
# Ranked for combining a composite value: the most dangerous component wins.
|
|
54
|
+
_RANK = {
|
|
55
|
+
Taint.EXTERNAL: 6,
|
|
56
|
+
Taint.UNKNOWN: 5,
|
|
57
|
+
Taint.PARAM: 4,
|
|
58
|
+
Taint.SANITIZED: 3,
|
|
59
|
+
Taint.TRUSTED: 2,
|
|
60
|
+
Taint.CONSTANT: 1,
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
@dataclass(frozen=True)
|
|
65
|
+
class TaintVocab:
|
|
66
|
+
sources: tuple[str, ...]
|
|
67
|
+
trusted: tuple[str, ...]
|
|
68
|
+
sanitizers: tuple[str, ...]
|
|
69
|
+
safe_sinks: tuple[str, ...]
|
|
70
|
+
propagators: tuple[str, ...]
|
|
71
|
+
|
|
72
|
+
@classmethod
|
|
73
|
+
def from_dict(cls, data: dict) -> TaintVocab:
|
|
74
|
+
def match(section: str) -> tuple[str, ...]:
|
|
75
|
+
return tuple(m for e in data.get(section, []) for m in e.get("match", []))
|
|
76
|
+
|
|
77
|
+
def calls(section: str) -> tuple[str, ...]:
|
|
78
|
+
return tuple(c for e in data.get(section, []) for c in e.get("calls", []))
|
|
79
|
+
|
|
80
|
+
return cls(
|
|
81
|
+
sources=match("sources"),
|
|
82
|
+
trusted=match("trusted"),
|
|
83
|
+
sanitizers=calls("sanitizers"),
|
|
84
|
+
safe_sinks=calls("safe_sinks"),
|
|
85
|
+
propagators=calls("propagators"),
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def load_vocab(path: str | Path = TAINT_FILE) -> TaintVocab:
|
|
90
|
+
with open(path, encoding="utf-8") as f:
|
|
91
|
+
return TaintVocab.from_dict(yaml.safe_load(f) or {})
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def is_safe_sink(call: ast.Call, vocab: TaintVocab) -> bool:
|
|
95
|
+
"""True if the call itself is a safe parser (json.loads, ast.literal_eval, ...)."""
|
|
96
|
+
return _callee_in(call, vocab.safe_sinks)
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def taint_of(
|
|
100
|
+
func: ast.AST,
|
|
101
|
+
expr: ast.AST,
|
|
102
|
+
vocab: TaintVocab,
|
|
103
|
+
*,
|
|
104
|
+
resolve_param=None,
|
|
105
|
+
) -> Taint:
|
|
106
|
+
"""Classify the taint of ``expr`` within ``func`` using the vocabulary.
|
|
107
|
+
|
|
108
|
+
``resolve_param`` is an optional ``(name) -> Taint`` callback; when given, a
|
|
109
|
+
value that reaches a parameter is resolved through it (the cross-file caller
|
|
110
|
+
hop, P1-03b) instead of returning PARAM.
|
|
111
|
+
"""
|
|
112
|
+
return _walk(func, expr, vocab, assignments(func), parameters(func), frozenset(), resolve_param)
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def _walk(func, expr, vocab, assigns, params, seen, resolve) -> Taint:
|
|
116
|
+
def w(e):
|
|
117
|
+
return _walk(func, e, vocab, assigns, params, seen, resolve)
|
|
118
|
+
|
|
119
|
+
if isinstance(expr, ast.Constant):
|
|
120
|
+
return Taint.CONSTANT
|
|
121
|
+
if isinstance(expr, ast.JoinedStr):
|
|
122
|
+
return _combine([Taint.CONSTANT] + [w(v.value) for v in expr.values if isinstance(v, ast.FormattedValue)])
|
|
123
|
+
if isinstance(expr, ast.BinOp):
|
|
124
|
+
return _combine([w(expr.left), w(expr.right)])
|
|
125
|
+
if isinstance(expr, ast.BoolOp):
|
|
126
|
+
return _combine([w(v) for v in expr.values])
|
|
127
|
+
if isinstance(expr, ast.IfExp):
|
|
128
|
+
return _combine([w(expr.body), w(expr.orelse)])
|
|
129
|
+
if isinstance(expr, (ast.List, ast.Tuple, ast.Set)):
|
|
130
|
+
return _combine([w(e) for e in expr.elts] or [Taint.CONSTANT])
|
|
131
|
+
if isinstance(expr, ast.Call):
|
|
132
|
+
if _callee_in(expr, vocab.sanitizers):
|
|
133
|
+
return Taint.SANITIZED # a sanitizer cleans its result regardless of input
|
|
134
|
+
if _callee_in(expr, vocab.sources):
|
|
135
|
+
return Taint.EXTERNAL # e.g. input()
|
|
136
|
+
if _callee_in(expr, vocab.propagators) or _callee_in(expr, vocab.safe_sinks):
|
|
137
|
+
return _combine([w(a) for a in expr.args] or [Taint.CONSTANT])
|
|
138
|
+
return Taint.UNKNOWN # unknown call -- a cross-file hop may resolve it later
|
|
139
|
+
if isinstance(expr, (ast.Attribute, ast.Subscript)):
|
|
140
|
+
path = access_path(expr)
|
|
141
|
+
if path and _access_in(path, vocab.sources):
|
|
142
|
+
return Taint.EXTERNAL
|
|
143
|
+
if path and _access_in(path, vocab.trusted):
|
|
144
|
+
return Taint.TRUSTED
|
|
145
|
+
root = access_root(expr)
|
|
146
|
+
if root in params: # attribute of a parameter -- resolve at call site
|
|
147
|
+
return resolve(root) if resolve else Taint.PARAM
|
|
148
|
+
return Taint.UNKNOWN # unknown object attribute (e.g. self.x): not provably safe
|
|
149
|
+
if isinstance(expr, ast.Name):
|
|
150
|
+
if expr.id in seen:
|
|
151
|
+
return Taint.CONSTANT # assignment cycle: no new information
|
|
152
|
+
if expr.id in params:
|
|
153
|
+
return resolve(expr.id) if resolve else Taint.PARAM
|
|
154
|
+
if expr.id in assigns:
|
|
155
|
+
return _combine([_walk(func, r, vocab, assigns, params, seen | {expr.id}, resolve)
|
|
156
|
+
for r in assigns[expr.id]])
|
|
157
|
+
return Taint.TRUSTED # free module global / builtin -- conventionally a constant
|
|
158
|
+
return Taint.UNKNOWN
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
def _combine(taints: list[Taint]) -> Taint:
|
|
162
|
+
return max(taints, key=lambda t: _RANK[t])
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
def _callee_in(call: ast.Call, names: tuple[str, ...]) -> bool:
|
|
166
|
+
dotted, bare = callee(call)
|
|
167
|
+
for name in names:
|
|
168
|
+
if "." in name:
|
|
169
|
+
if dotted is not None and (dotted == name or dotted.endswith("." + name)):
|
|
170
|
+
return True
|
|
171
|
+
elif bare == name:
|
|
172
|
+
return True
|
|
173
|
+
return False
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
def _access_in(path: str, prefixes: tuple[str, ...]) -> bool:
|
|
177
|
+
# "request.args" matches the source "request.args" and also "request.args.get"
|
|
178
|
+
return any(path == p or path.startswith(p + ".") for p in prefixes)
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
# --- cross-file one-hop resolution (P1-03b) ---------------------------------
|
|
182
|
+
|
|
183
|
+
def taint_in_repo(
|
|
184
|
+
func: ast.FunctionDef | ast.AsyncFunctionDef,
|
|
185
|
+
expr: ast.AST,
|
|
186
|
+
vocab: TaintVocab,
|
|
187
|
+
files: dict[str, str],
|
|
188
|
+
) -> Taint:
|
|
189
|
+
"""Classify ``expr`` in ``func``, resolving a value that reaches a parameter by
|
|
190
|
+
looking one hop up at how ``func`` is called across ``files``.
|
|
191
|
+
|
|
192
|
+
Combines all call sites: if any caller passes an attacker-controlled value the
|
|
193
|
+
result is EXTERNAL; if every caller passes a sanitized/constant/trusted value it
|
|
194
|
+
is safe. With no caller found, the parameter stays UNKNOWN (not assumed safe).
|
|
195
|
+
"""
|
|
196
|
+
return taint_of(func, expr, vocab, resolve_param=_caller_resolver(func, files, vocab))
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
def _caller_resolver(func, files, vocab):
|
|
200
|
+
def resolve(param_name: str) -> Taint:
|
|
201
|
+
index = _param_index(func, param_name)
|
|
202
|
+
results = []
|
|
203
|
+
for scope, call in _call_sites(func.name, files):
|
|
204
|
+
arg = _arg_for_param(call, index, param_name)
|
|
205
|
+
# one hop only: classify the caller's argument without recursing further
|
|
206
|
+
results.append(taint_of(scope, arg, vocab) if arg is not None else Taint.UNKNOWN)
|
|
207
|
+
return _combine(results) if results else Taint.UNKNOWN
|
|
208
|
+
return resolve
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
def _param_index(func, name: str) -> int | None:
|
|
212
|
+
positional = [*func.args.posonlyargs, *func.args.args]
|
|
213
|
+
for i, arg in enumerate(positional):
|
|
214
|
+
if arg.arg == name:
|
|
215
|
+
return i
|
|
216
|
+
return None # keyword-only or *args: matched by keyword at the call site instead
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
def _arg_for_param(call: ast.Call, index: int | None, name: str) -> ast.AST | None:
|
|
220
|
+
if index is not None and index < len(call.args):
|
|
221
|
+
return call.args[index]
|
|
222
|
+
for kw in call.keywords:
|
|
223
|
+
if kw.arg == name:
|
|
224
|
+
return kw.value
|
|
225
|
+
return None
|
|
226
|
+
|
|
227
|
+
|
|
228
|
+
def _call_sites(name: str, files: dict[str, str]) -> list[tuple[ast.AST, ast.Call]]:
|
|
229
|
+
sites = []
|
|
230
|
+
for source in files.values():
|
|
231
|
+
try:
|
|
232
|
+
tree = ast.parse(source)
|
|
233
|
+
except SyntaxError:
|
|
234
|
+
continue
|
|
235
|
+
funcs = [n for n in ast.walk(tree) if isinstance(n, (ast.FunctionDef, ast.AsyncFunctionDef))]
|
|
236
|
+
for call in find_calls(tree, name):
|
|
237
|
+
sites.append((_enclosing_scope(funcs, call) or tree, call))
|
|
238
|
+
return sites
|
|
239
|
+
|
|
240
|
+
|
|
241
|
+
def _enclosing_scope(funcs: list[ast.AST], call: ast.Call) -> ast.AST | None:
|
|
242
|
+
containing = [f for f in funcs if any(node is call for node in ast.walk(f))]
|
|
243
|
+
if not containing:
|
|
244
|
+
return None # module-level call site
|
|
245
|
+
return min(containing, key=lambda f: sum(1 for _ in ast.walk(f))) # innermost
|
|
246
|
+
|
|
247
|
+
|
|
248
|
+
def worst_sink_taint(content: str, files: dict[str, str], vocab: TaintVocab) -> Taint | None:
|
|
249
|
+
"""The most dangerous taint reaching any potential sink call in ``content``.
|
|
250
|
+
|
|
251
|
+
A "potential sink" is any call that is not a safe sink, sanitizer, or
|
|
252
|
+
propagator (those are not where injection happens). Each such call's argument
|
|
253
|
+
taint is classified with the cross-file resolver, and the worst is returned.
|
|
254
|
+
``Taint.CONSTANT`` when there is no sink to worry about; ``None`` when the
|
|
255
|
+
code does not parse (the caller should then not act).
|
|
256
|
+
|
|
257
|
+
Used by the taint gate to downgrade an input_validation finding only when the
|
|
258
|
+
whole artifact is provably clean -- so a single tainted sink keeps every
|
|
259
|
+
finding (recall preserved).
|
|
260
|
+
"""
|
|
261
|
+
try:
|
|
262
|
+
tree = ast.parse(content)
|
|
263
|
+
except SyntaxError:
|
|
264
|
+
return None
|
|
265
|
+
funcs = [n for n in ast.walk(tree) if isinstance(n, (ast.FunctionDef, ast.AsyncFunctionDef))]
|
|
266
|
+
taints: list[Taint] = []
|
|
267
|
+
for call in [n for n in ast.walk(tree) if isinstance(n, ast.Call)]:
|
|
268
|
+
if is_safe_sink(call, vocab) or _callee_in(call, vocab.sanitizers) or _callee_in(call, vocab.propagators):
|
|
269
|
+
continue # not a place an injection lands
|
|
270
|
+
scope = _enclosing_scope(funcs, call) or tree
|
|
271
|
+
for arg in (*call.args, *(kw.value for kw in call.keywords)):
|
|
272
|
+
taints.append(taint_in_repo(scope, arg, vocab, files))
|
|
273
|
+
return _combine(taints) if taints else Taint.CONSTANT
|
|
@@ -23,6 +23,7 @@ from codejury.orchestrators.debate import DebateOrchestrator
|
|
|
23
23
|
from codejury.orchestrators.pipeline import PipelineOrchestrator
|
|
24
24
|
from codejury.orchestrators.reflexion import ReflexionOrchestrator
|
|
25
25
|
from codejury.orchestrators.single import SingleOrchestrator
|
|
26
|
+
from codejury.orchestrators.taint_gate import TaintGateOrchestrator
|
|
26
27
|
from codejury.providers.anthropic import AnthropicProvider
|
|
27
28
|
from codejury.providers.base import Provider
|
|
28
29
|
from codejury.providers.litellm import LiteLLMProvider
|
|
@@ -30,7 +31,7 @@ from codejury.providers.openai import OpenAIProvider
|
|
|
30
31
|
from codejury.providers.retry import RetryProvider
|
|
31
32
|
from codejury.sources.base import Source
|
|
32
33
|
|
|
33
|
-
STRATEGIES = ("single", "pipeline", "debate", "reflexion", "challenge")
|
|
34
|
+
STRATEGIES = ("single", "pipeline", "debate", "reflexion", "challenge", "taint")
|
|
34
35
|
PROVIDERS = ("anthropic", "openai", "litellm")
|
|
35
36
|
DEFAULT_MODEL = os.environ.get("CODEJURY_MODEL", "claude-sonnet-4-6")
|
|
36
37
|
DEFAULT_API_BASE = os.environ.get("CODEJURY_API_BASE")
|
|
@@ -73,6 +74,8 @@ def build_orchestration(
|
|
|
73
74
|
verifier = {"verifier": VerifierAgent(provider=provider, model=model, max_tokens=max_tokens)}
|
|
74
75
|
if strategy == "pipeline":
|
|
75
76
|
return verifier, PipelineOrchestrator()
|
|
77
|
+
if strategy == "taint":
|
|
78
|
+
return verifier, TaintGateOrchestrator()
|
|
76
79
|
return verifier, SingleOrchestrator()
|
|
77
80
|
|
|
78
81
|
|
|
@@ -276,6 +276,7 @@ def main(argv: list[str] | None = None) -> int:
|
|
|
276
276
|
eval_p = sub.add_parser("eval", help="score golden cases and report precision/recall")
|
|
277
277
|
eval_p.add_argument("--dataset", default=GOLDEN_DIR, help="golden case YAML directory")
|
|
278
278
|
eval_p.add_argument("--split", default=None, help="only score cases whose 'split' matches (e.g. held-out)")
|
|
279
|
+
eval_p.add_argument("--orchestrator", choices=STRATEGIES, default="single")
|
|
279
280
|
eval_p.add_argument("--capabilities", default=CAPABILITIES_DIR, help="capability YAML directory")
|
|
280
281
|
eval_p.add_argument("--provider", choices=PROVIDERS, default="anthropic")
|
|
281
282
|
eval_p.add_argument("--format", choices=("text", "json"), default="text", dest="fmt")
|
|
@@ -344,6 +345,7 @@ def main(argv: list[str] | None = None) -> int:
|
|
|
344
345
|
load_capabilities(args.capabilities),
|
|
345
346
|
provider=make_provider(args.provider, api_key=args.api_key, api_base=args.api_base),
|
|
346
347
|
model=args.model,
|
|
348
|
+
strategy=args.orchestrator,
|
|
347
349
|
)
|
|
348
350
|
except Exception as exc:
|
|
349
351
|
# e.g. a missing API key surfaces as a provider auth error -- report it
|