codejury 0.5.1__tar.gz → 0.7.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (162) hide show
  1. {codejury-0.5.1 → codejury-0.7.0}/PKG-INFO +28 -4
  2. {codejury-0.5.1 → codejury-0.7.0}/README.md +26 -3
  3. {codejury-0.5.1 → codejury-0.7.0}/codejury/__init__.py +6 -1
  4. {codejury-0.5.1 → codejury-0.7.0}/codejury/agents/verifier.py +14 -1
  5. codejury-0.7.0/codejury/analysis/__init__.py +7 -0
  6. codejury-0.7.0/codejury/analysis/provenance.py +208 -0
  7. codejury-0.7.0/codejury/analysis/taint.py +273 -0
  8. {codejury-0.5.1 → codejury-0.7.0}/codejury/assembly.py +35 -4
  9. {codejury-0.5.1 → codejury-0.7.0}/codejury/cli.py +39 -15
  10. {codejury-0.5.1 → codejury-0.7.0}/codejury/data/capabilities/dependency_config.yaml +27 -0
  11. {codejury-0.5.1 → codejury-0.7.0}/codejury/data/capabilities/input_validation.yaml +60 -0
  12. codejury-0.7.0/codejury/data/golden/authn_weak_hash_indirect_vuln.yaml +14 -0
  13. codejury-0.7.0/codejury/data/golden/business_logic_price_tamper_vuln.yaml +14 -0
  14. codejury-0.7.0/codejury/data/golden/business_logic_server_checked_safe.yaml +15 -0
  15. codejury-0.7.0/codejury/data/golden/cmdi_fixed_argv_safe.yaml +22 -0
  16. codejury-0.7.0/codejury/data/golden/data_protection_plaintext_pii_vuln.yaml +14 -0
  17. codejury-0.7.0/codejury/data/golden/data_protection_tokenized_safe.yaml +16 -0
  18. codejury-0.7.0/codejury/data/golden/dependency_config_tls_verify_off_vuln.yaml +11 -0
  19. codejury-0.7.0/codejury/data/golden/dependency_config_tls_verify_on_safe.yaml +11 -0
  20. codejury-0.7.0/codejury/data/golden/deserialize_json_safe.yaml +13 -0
  21. codejury-0.7.0/codejury/data/golden/deserialize_pickle_vuln.yaml +12 -0
  22. codejury-0.7.0/codejury/data/golden/error_logging_redacted_safe.yaml +11 -0
  23. codejury-0.7.0/codejury/data/golden/error_logging_secret_leak_vuln.yaml +12 -0
  24. codejury-0.7.0/codejury/data/golden/literal_eval_safe.yaml +14 -0
  25. codejury-0.7.0/codejury/data/golden/path_basename_safe.yaml +15 -0
  26. codejury-0.7.0/codejury/data/golden/session_fixation_vuln.yaml +16 -0
  27. codejury-0.7.0/codejury/data/golden/session_secure_cookie_safe.yaml +18 -0
  28. codejury-0.7.0/codejury/data/golden/sql_constant_concat_safe.yaml +14 -0
  29. codejury-0.7.0/codejury/data/golden/sqli_indirect_var_vuln.yaml +16 -0
  30. codejury-0.7.0/codejury/data/golden/ssrf_allowlist_safe.yaml +17 -0
  31. codejury-0.7.0/codejury/data/golden/ssrf_constant_url_safe.yaml +13 -0
  32. codejury-0.7.0/codejury/data/golden/ssrf_substring_allowlist_bypass_vuln.yaml +17 -0
  33. codejury-0.7.0/codejury/data/golden/ssrf_user_url_vuln.yaml +12 -0
  34. codejury-0.7.0/codejury/data/golden/xfile_idor_no_check_vuln.yaml +20 -0
  35. codejury-0.7.0/codejury/data/golden/xfile_idor_owner_checked_safe.yaml +21 -0
  36. codejury-0.7.0/codejury/data/golden/xfile_path_sanitized_safe.yaml +22 -0
  37. codejury-0.7.0/codejury/data/golden/xfile_path_tainted_vuln.yaml +19 -0
  38. codejury-0.7.0/codejury/data/golden/xss_textcontent_safe.yaml +15 -0
  39. codejury-0.7.0/codejury/data/taint.yaml +109 -0
  40. {codejury-0.5.1 → codejury-0.7.0}/codejury/domain/capability.py +16 -1
  41. {codejury-0.5.1 → codejury-0.7.0}/codejury/domain/observation.py +20 -0
  42. {codejury-0.5.1 → codejury-0.7.0}/codejury/domain/result.py +15 -1
  43. codejury-0.7.0/codejury/evaluation.py +166 -0
  44. codejury-0.7.0/codejury/infrastructure/cache.py +76 -0
  45. codejury-0.7.0/codejury/orchestrators/taint_gate.py +75 -0
  46. {codejury-0.5.1 → codejury-0.7.0}/codejury/providers/anthropic.py +1 -0
  47. {codejury-0.5.1 → codejury-0.7.0}/codejury/providers/litellm.py +1 -1
  48. {codejury-0.5.1 → codejury-0.7.0}/codejury/providers/openai.py +1 -0
  49. {codejury-0.5.1 → codejury-0.7.0}/codejury/reporting.py +106 -0
  50. {codejury-0.5.1 → codejury-0.7.0}/codejury/resources.py +1 -0
  51. {codejury-0.5.1 → codejury-0.7.0}/codejury.egg-info/PKG-INFO +28 -4
  52. {codejury-0.5.1 → codejury-0.7.0}/codejury.egg-info/SOURCES.txt +39 -0
  53. {codejury-0.5.1 → codejury-0.7.0}/codejury.egg-info/requires.txt +1 -0
  54. {codejury-0.5.1 → codejury-0.7.0}/pyproject.toml +2 -2
  55. {codejury-0.5.1 → codejury-0.7.0}/tests/test_anthropic_provider.py +1 -0
  56. codejury-0.7.0/tests/test_cache.py +117 -0
  57. codejury-0.7.0/tests/test_evaluation.py +166 -0
  58. {codejury-0.5.1 → codejury-0.7.0}/tests/test_litellm_provider.py +1 -0
  59. {codejury-0.5.1 → codejury-0.7.0}/tests/test_openai_provider.py +1 -0
  60. codejury-0.7.0/tests/test_provenance.py +105 -0
  61. codejury-0.7.0/tests/test_sarif.py +115 -0
  62. codejury-0.7.0/tests/test_taint.py +97 -0
  63. codejury-0.7.0/tests/test_taint_crossfile.py +68 -0
  64. codejury-0.7.0/tests/test_taint_gate.py +89 -0
  65. codejury-0.5.1/codejury/evaluation.py +0 -107
  66. codejury-0.5.1/tests/test_evaluation.py +0 -69
  67. {codejury-0.5.1 → codejury-0.7.0}/LICENSE +0 -0
  68. {codejury-0.5.1 → codejury-0.7.0}/codejury/agents/__init__.py +0 -0
  69. {codejury-0.5.1 → codejury-0.7.0}/codejury/agents/base.py +0 -0
  70. {codejury-0.5.1 → codejury-0.7.0}/codejury/agents/debate.py +0 -0
  71. {codejury-0.5.1 → codejury-0.7.0}/codejury/agents/mock.py +0 -0
  72. {codejury-0.5.1 → codejury-0.7.0}/codejury/agents/parsing.py +0 -0
  73. {codejury-0.5.1 → codejury-0.7.0}/codejury/agents/refuter.py +0 -0
  74. {codejury-0.5.1 → codejury-0.7.0}/codejury/data/capabilities/authentication.yaml +0 -0
  75. {codejury-0.5.1 → codejury-0.7.0}/codejury/data/capabilities/authorization.yaml +0 -0
  76. {codejury-0.5.1 → codejury-0.7.0}/codejury/data/capabilities/business_logic.yaml +0 -0
  77. {codejury-0.5.1 → codejury-0.7.0}/codejury/data/capabilities/crypto.yaml +0 -0
  78. {codejury-0.5.1 → codejury-0.7.0}/codejury/data/capabilities/data_protection.yaml +0 -0
  79. {codejury-0.5.1 → codejury-0.7.0}/codejury/data/capabilities/error_logging.yaml +0 -0
  80. {codejury-0.5.1 → codejury-0.7.0}/codejury/data/capabilities/output_encoding.yaml +0 -0
  81. {codejury-0.5.1 → codejury-0.7.0}/codejury/data/capabilities/secrets.yaml +0 -0
  82. {codejury-0.5.1 → codejury-0.7.0}/codejury/data/capabilities/session.yaml +0 -0
  83. {codejury-0.5.1 → codejury-0.7.0}/codejury/data/golden/authn_bcrypt_password.yaml +0 -0
  84. {codejury-0.5.1 → codejury-0.7.0}/codejury/data/golden/authn_jwt_noverify_vuln.yaml +0 -0
  85. {codejury-0.5.1 → codejury-0.7.0}/codejury/data/golden/authn_jwt_verified_safe.yaml +0 -0
  86. {codejury-0.5.1 → codejury-0.7.0}/codejury/data/golden/authn_sha256_checksum_safe.yaml +0 -0
  87. {codejury-0.5.1 → codejury-0.7.0}/codejury/data/golden/authn_sha256_password.yaml +0 -0
  88. {codejury-0.5.1 → codejury-0.7.0}/codejury/data/golden/authz_idor_vuln.yaml +0 -0
  89. {codejury-0.5.1 → codejury-0.7.0}/codejury/data/golden/authz_owner_safe.yaml +0 -0
  90. {codejury-0.5.1 → codejury-0.7.0}/codejury/data/golden/cmdi_ossystem_vuln.yaml +0 -0
  91. {codejury-0.5.1 → codejury-0.7.0}/codejury/data/golden/cmdi_subprocess_safe.yaml +0 -0
  92. {codejury-0.5.1 → codejury-0.7.0}/codejury/data/golden/crypto_aesgcm_safe.yaml +0 -0
  93. {codejury-0.5.1 → codejury-0.7.0}/codejury/data/golden/crypto_ecb_vuln.yaml +0 -0
  94. {codejury-0.5.1 → codejury-0.7.0}/codejury/data/golden/path_contained_safe.yaml +0 -0
  95. {codejury-0.5.1 → codejury-0.7.0}/codejury/data/golden/path_traversal_vuln.yaml +0 -0
  96. {codejury-0.5.1 → codejury-0.7.0}/codejury/data/golden/secrets_env_safe.yaml +0 -0
  97. {codejury-0.5.1 → codejury-0.7.0}/codejury/data/golden/secrets_hardcoded_vuln.yaml +0 -0
  98. {codejury-0.5.1 → codejury-0.7.0}/codejury/data/golden/sqli_format_vuln.yaml +0 -0
  99. {codejury-0.5.1 → codejury-0.7.0}/codejury/data/golden/sqli_fstring_query.yaml +0 -0
  100. {codejury-0.5.1 → codejury-0.7.0}/codejury/data/golden/sqli_parameterized_query.yaml +0 -0
  101. {codejury-0.5.1 → codejury-0.7.0}/codejury/data/golden/xss_innerhtml_constant_safe.yaml +0 -0
  102. {codejury-0.5.1 → codejury-0.7.0}/codejury/data/golden/xss_innerhtml_vuln.yaml +0 -0
  103. {codejury-0.5.1 → codejury-0.7.0}/codejury/data/suppressions.yaml +0 -0
  104. {codejury-0.5.1 → codejury-0.7.0}/codejury/data/tasks/audit_diff_debate.yaml +0 -0
  105. {codejury-0.5.1 → codejury-0.7.0}/codejury/data/tasks/quick_scan_single.yaml +0 -0
  106. {codejury-0.5.1 → codejury-0.7.0}/codejury/domain/__init__.py +0 -0
  107. {codejury-0.5.1 → codejury-0.7.0}/codejury/domain/artifact.py +0 -0
  108. {codejury-0.5.1 → codejury-0.7.0}/codejury/domain/context.py +0 -0
  109. {codejury-0.5.1 → codejury-0.7.0}/codejury/infrastructure/__init__.py +0 -0
  110. {codejury-0.5.1 → codejury-0.7.0}/codejury/infrastructure/json_parse.py +0 -0
  111. {codejury-0.5.1 → codejury-0.7.0}/codejury/integrations/__init__.py +0 -0
  112. {codejury-0.5.1 → codejury-0.7.0}/codejury/integrations/github.py +0 -0
  113. {codejury-0.5.1 → codejury-0.7.0}/codejury/orchestrators/__init__.py +0 -0
  114. {codejury-0.5.1 → codejury-0.7.0}/codejury/orchestrators/base.py +0 -0
  115. {codejury-0.5.1 → codejury-0.7.0}/codejury/orchestrators/challenge.py +0 -0
  116. {codejury-0.5.1 → codejury-0.7.0}/codejury/orchestrators/debate.py +0 -0
  117. {codejury-0.5.1 → codejury-0.7.0}/codejury/orchestrators/pipeline.py +0 -0
  118. {codejury-0.5.1 → codejury-0.7.0}/codejury/orchestrators/reflexion.py +0 -0
  119. {codejury-0.5.1 → codejury-0.7.0}/codejury/orchestrators/single.py +0 -0
  120. {codejury-0.5.1 → codejury-0.7.0}/codejury/providers/__init__.py +0 -0
  121. {codejury-0.5.1 → codejury-0.7.0}/codejury/providers/base.py +0 -0
  122. {codejury-0.5.1 → codejury-0.7.0}/codejury/providers/mock.py +0 -0
  123. {codejury-0.5.1 → codejury-0.7.0}/codejury/providers/openai_format.py +0 -0
  124. {codejury-0.5.1 → codejury-0.7.0}/codejury/providers/retry.py +0 -0
  125. {codejury-0.5.1 → codejury-0.7.0}/codejury/sources/__init__.py +0 -0
  126. {codejury-0.5.1 → codejury-0.7.0}/codejury/sources/base.py +0 -0
  127. {codejury-0.5.1 → codejury-0.7.0}/codejury/sources/callers.py +0 -0
  128. {codejury-0.5.1 → codejury-0.7.0}/codejury/sources/chunker.py +0 -0
  129. {codejury-0.5.1 → codejury-0.7.0}/codejury/sources/diff.py +0 -0
  130. {codejury-0.5.1 → codejury-0.7.0}/codejury/sources/function.py +0 -0
  131. {codejury-0.5.1 → codejury-0.7.0}/codejury/sources/mock.py +0 -0
  132. {codejury-0.5.1 → codejury-0.7.0}/codejury/sources/repo.py +0 -0
  133. {codejury-0.5.1 → codejury-0.7.0}/codejury/suppression.py +0 -0
  134. {codejury-0.5.1 → codejury-0.7.0}/codejury/tasks/__init__.py +0 -0
  135. {codejury-0.5.1 → codejury-0.7.0}/codejury/tasks/base.py +0 -0
  136. {codejury-0.5.1 → codejury-0.7.0}/codejury/tasks/registry.py +0 -0
  137. {codejury-0.5.1 → codejury-0.7.0}/codejury.egg-info/dependency_links.txt +0 -0
  138. {codejury-0.5.1 → codejury-0.7.0}/codejury.egg-info/entry_points.txt +0 -0
  139. {codejury-0.5.1 → codejury-0.7.0}/codejury.egg-info/top_level.txt +0 -0
  140. {codejury-0.5.1 → codejury-0.7.0}/setup.cfg +0 -0
  141. {codejury-0.5.1 → codejury-0.7.0}/tests/test_assembly.py +0 -0
  142. {codejury-0.5.1 → codejury-0.7.0}/tests/test_audit_pipeline.py +0 -0
  143. {codejury-0.5.1 → codejury-0.7.0}/tests/test_callers.py +0 -0
  144. {codejury-0.5.1 → codejury-0.7.0}/tests/test_capability.py +0 -0
  145. {codejury-0.5.1 → codejury-0.7.0}/tests/test_challenge.py +0 -0
  146. {codejury-0.5.1 → codejury-0.7.0}/tests/test_cli_audit.py +0 -0
  147. {codejury-0.5.1 → codejury-0.7.0}/tests/test_context.py +0 -0
  148. {codejury-0.5.1 → codejury-0.7.0}/tests/test_debate_agents.py +0 -0
  149. {codejury-0.5.1 → codejury-0.7.0}/tests/test_debate_orchestrator.py +0 -0
  150. {codejury-0.5.1 → codejury-0.7.0}/tests/test_diff_source.py +0 -0
  151. {codejury-0.5.1 → codejury-0.7.0}/tests/test_function_source.py +0 -0
  152. {codejury-0.5.1 → codejury-0.7.0}/tests/test_integrations.py +0 -0
  153. {codejury-0.5.1 → codejury-0.7.0}/tests/test_json_parse.py +0 -0
  154. {codejury-0.5.1 → codejury-0.7.0}/tests/test_orchestrator.py +0 -0
  155. {codejury-0.5.1 → codejury-0.7.0}/tests/test_pipeline_orchestrator.py +0 -0
  156. {codejury-0.5.1 → codejury-0.7.0}/tests/test_reflexion_orchestrator.py +0 -0
  157. {codejury-0.5.1 → codejury-0.7.0}/tests/test_repo_source.py +0 -0
  158. {codejury-0.5.1 → codejury-0.7.0}/tests/test_reporting.py +0 -0
  159. {codejury-0.5.1 → codejury-0.7.0}/tests/test_retry_provider.py +0 -0
  160. {codejury-0.5.1 → codejury-0.7.0}/tests/test_suppression.py +0 -0
  161. {codejury-0.5.1 → codejury-0.7.0}/tests/test_tasks.py +0 -0
  162. {codejury-0.5.1 → codejury-0.7.0}/tests/test_verifier.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: codejury
3
- Version: 0.5.1
3
+ Version: 0.7.0
4
4
  Summary: General-purpose Application Security AI audit framework -- five-layer architecture, capabilities as first-class data
5
5
  Author: AISecLabs
6
6
  License-Expression: MIT
@@ -25,6 +25,7 @@ Provides-Extra: litellm
25
25
  Requires-Dist: litellm>=1.0; extra == "litellm"
26
26
  Provides-Extra: dev
27
27
  Requires-Dist: pytest>=8.0; extra == "dev"
28
+ Requires-Dist: jsonschema>=4.0; extra == "dev"
28
29
  Dynamic: license-file
29
30
 
30
31
  # codejury
@@ -80,15 +81,38 @@ git diff | codejury audit --provider anthropic
80
81
  | `codejury audit [diff]` | Audit a unified diff from a file or stdin (`-`). |
81
82
  | `codejury scan <dir>` | Audit a whole directory tree, capability by capability. |
82
83
  | `codejury run <task>` | Run a named task preset (see [Tasks](#tasks)). |
83
- | `codejury eval` | Score the golden cases and report precision / recall. |
84
+ | `codejury eval` | Score the golden cases; report precision / recall / F1, overall and per capability. |
84
85
 
85
- Shared flags: `--orchestrator {single,pipeline,debate,reflexion,challenge}`,
86
- `--provider {anthropic,openai,litellm}`, `--model`, `--format {text,markdown,json}`.
86
+ Shared flags: `--orchestrator {single,pipeline,debate,reflexion,challenge,taint}`,
87
+ `--provider {anthropic,openai,litellm}`, `--model`,
88
+ `--format {text,markdown,json,sarif}`.
89
+
90
+ `--orchestrator taint` adds a data-flow gate: after the verifier rules, it clears
91
+ an `input_validation` finding only when static provenance analysis proves the
92
+ value reaching the sink is constant, sanitized, or trusted (using cross-file
93
+ caller/callee context). It downgrades only on positive proof, so it removes false
94
+ positives without dropping real findings.
95
+
96
+ `--format sarif` emits a SARIF 2.1.0 log (validates against the official schema)
97
+ for CI and security dashboards: each problem with a code location becomes a
98
+ result carrying its capability (as the rule id), CWE, and a precise location.
87
99
 
88
100
  Findings in known-noise categories (availability/DoS, rate limiting, memory safety
89
101
  outside C/C++) are dropped by versioned rules in
90
102
  `codejury/data/suppressions.yaml`; disable with `--no-suppress`.
91
103
 
104
+ `codejury eval` takes `--dataset <dir>` (golden YAML directory), `--split <name>`
105
+ (score only cases tagged with that `split:`, e.g. a held-out set), and
106
+ `--format {text,json}` -- the JSON report is a stable schema (overall plus
107
+ per-capability confusion matrix and precision / recall / F1).
108
+
109
+ Runs are deterministic: providers query at temperature 0, and `audit` / `scan`
110
+ cache each verdict on a hash of the normalized code, the in-scope capability
111
+ versions, and the orchestration. Re-auditing unchanged code returns the recorded
112
+ verdicts without re-querying the model; editing a capability YAML changes its
113
+ fingerprint and invalidates affected entries. Pass `--no-cache` to always
114
+ re-query.
115
+
92
116
  ```bash
93
117
  # Multi-round adversarial debate, rendered as Markdown
94
118
  git diff | codejury audit --orchestrator debate --format markdown - > report.md
@@ -51,15 +51,38 @@ git diff | codejury audit --provider anthropic
51
51
  | `codejury audit [diff]` | Audit a unified diff from a file or stdin (`-`). |
52
52
  | `codejury scan <dir>` | Audit a whole directory tree, capability by capability. |
53
53
  | `codejury run <task>` | Run a named task preset (see [Tasks](#tasks)). |
54
- | `codejury eval` | Score the golden cases and report precision / recall. |
54
+ | `codejury eval` | Score the golden cases; report precision / recall / F1, overall and per capability. |
55
55
 
56
- Shared flags: `--orchestrator {single,pipeline,debate,reflexion,challenge}`,
57
- `--provider {anthropic,openai,litellm}`, `--model`, `--format {text,markdown,json}`.
56
+ Shared flags: `--orchestrator {single,pipeline,debate,reflexion,challenge,taint}`,
57
+ `--provider {anthropic,openai,litellm}`, `--model`,
58
+ `--format {text,markdown,json,sarif}`.
59
+
60
+ `--orchestrator taint` adds a data-flow gate: after the verifier rules, it clears
61
+ an `input_validation` finding only when static provenance analysis proves the
62
+ value reaching the sink is constant, sanitized, or trusted (using cross-file
63
+ caller/callee context). It downgrades only on positive proof, so it removes false
64
+ positives without dropping real findings.
65
+
66
+ `--format sarif` emits a SARIF 2.1.0 log (validates against the official schema)
67
+ for CI and security dashboards: each problem with a code location becomes a
68
+ result carrying its capability (as the rule id), CWE, and a precise location.
58
69
 
59
70
  Findings in known-noise categories (availability/DoS, rate limiting, memory safety
60
71
  outside C/C++) are dropped by versioned rules in
61
72
  `codejury/data/suppressions.yaml`; disable with `--no-suppress`.
62
73
 
74
+ `codejury eval` takes `--dataset <dir>` (golden YAML directory), `--split <name>`
75
+ (score only cases tagged with that `split:`, e.g. a held-out set), and
76
+ `--format {text,json}` -- the JSON report is a stable schema (overall plus
77
+ per-capability confusion matrix and precision / recall / F1).
78
+
79
+ Runs are deterministic: providers query at temperature 0, and `audit` / `scan`
80
+ cache each verdict on a hash of the normalized code, the in-scope capability
81
+ versions, and the orchestration. Re-auditing unchanged code returns the recorded
82
+ verdicts without re-querying the model; editing a capability YAML changes its
83
+ fingerprint and invalidates affected entries. Pass `--no-cache` to always
84
+ re-query.
85
+
63
86
  ```bash
64
87
  # Multi-round adversarial debate, rendered as Markdown
65
88
  git diff | codejury audit --orchestrator debate --format markdown - > report.md
@@ -5,4 +5,9 @@ Domain knowledge lives in YAML capability files as a first-class citizen,
5
5
  aligned with OWASP ASVS.
6
6
  """
7
7
 
8
- __version__ = "0.0.0"
8
+ from importlib.metadata import PackageNotFoundError, version
9
+
10
+ try:
11
+ __version__ = version("codejury")
12
+ except PackageNotFoundError: # running from a source tree without an install
13
+ __version__ = "0.0.0"
@@ -93,15 +93,27 @@ def _build_prompt(path: str, content: str, cap: Capability, context: str = "") -
93
93
  )
94
94
 
95
95
 
96
+ def _anti_pattern_cwes(cap: Capability) -> dict[str, str]:
97
+ """Map anti_pattern id -> CWE, so a verdict can inherit the CWE it matched."""
98
+ return {
99
+ p.id: p.cwe
100
+ for sub in cap.sub_capabilities.values()
101
+ for p in sub.anti_patterns
102
+ if p.cwe
103
+ }
104
+
105
+
96
106
  def _parse_verdicts(text: str, cap: Capability) -> list[Verdict]:
97
107
  obj = extract_json_object(text)
98
108
  if not obj:
99
109
  return []
110
+ cwe_by_id = _anti_pattern_cwes(cap)
100
111
  out: list[Verdict] = []
101
112
  for v in obj.get("verdicts", []):
102
113
  if not isinstance(v, dict):
103
114
  continue
104
115
  sub = str(v.get("sub_capability", "")).strip()
116
+ matched_anti = str_list(v.get("matched_anti"))
105
117
  out.append(
106
118
  Verdict(
107
119
  capability=f"{cap.id}.{sub}" if sub else cap.id,
@@ -109,7 +121,8 @@ def _parse_verdicts(text: str, cap: Capability) -> list[Verdict]:
109
121
  status=one_of(v.get("status"), _VALID_STATUS, "UNKNOWN"),
110
122
  reasoning=str(v.get("reasoning", "")),
111
123
  matched_correct=str_list(v.get("matched_correct")),
112
- matched_anti=str_list(v.get("matched_anti")),
124
+ matched_anti=matched_anti,
125
+ cwe=next((cwe_by_id[a] for a in matched_anti if a in cwe_by_id), ""),
113
126
  evidence=to_evidence(v.get("evidence")),
114
127
  confidence=to_float(v.get("confidence"), 0.5),
115
128
  )
@@ -0,0 +1,7 @@
1
+ """Static analysis for provenance (P1).
2
+
3
+ The code-graph / data-flow engine that gives the verifier provenance -- whether a
4
+ value reaching a sink is attacker-controlled, sanitized, or a trusted constant.
5
+ This is the real fix for the taint precision floor that single-file LLM review
6
+ cannot reach (see ROADMAP P1). Python / AST based to start.
7
+ """
@@ -0,0 +1,208 @@
1
+ """Intra-procedural value-origin tracing (P1-01).
2
+
3
+ Classify where the value of an expression inside a function comes from, so a
4
+ later layer (P1-03) can decide whether it is attacker-controlled. The output is
5
+ an ``Origin``: the parameters, callees, attribute/subscript roots, free names,
6
+ and literals a value derives from.
7
+
8
+ The analysis is deliberately conservative and flow-insensitive: a name assigned
9
+ more than once contributes the union of all its right-hand sides, so a possible
10
+ source is never dropped (recall over precision). A value built only from literals
11
+ is reported as ``is_constant`` -- the signal that distinguishes, for example,
12
+ SQL concatenated from constants (safe) from SQL concatenated from a parameter.
13
+
14
+ This module finds where a value comes from; it does not decide what is a source
15
+ or a sanitizer (that is data, P1-02) nor follow a call into another file (P1-03).
16
+ Python / AST only.
17
+ """
18
+
19
+ from __future__ import annotations
20
+
21
+ import ast
22
+ from dataclasses import dataclass
23
+
24
+
25
+ @dataclass(frozen=True)
26
+ class Origin:
27
+ params: frozenset[str] = frozenset() # parameters the value derives from
28
+ calls: frozenset[str] = frozenset() # callee names whose return it derives from
29
+ attrs: frozenset[str] = frozenset() # attribute/subscript roots, dotted (e.g. "request.args")
30
+ globals_: frozenset[str] = frozenset() # free names: module globals, imports, builtins
31
+ has_literal: bool = False # a literal contributes to the value
32
+ unknown: bool = False # an unmodelled expression contributes (be cautious)
33
+
34
+ def merge(self, other: Origin) -> Origin:
35
+ return Origin(
36
+ params=self.params | other.params,
37
+ calls=self.calls | other.calls,
38
+ attrs=self.attrs | other.attrs,
39
+ globals_=self.globals_ | other.globals_,
40
+ has_literal=self.has_literal or other.has_literal,
41
+ unknown=self.unknown or other.unknown,
42
+ )
43
+
44
+ @property
45
+ def is_constant(self) -> bool:
46
+ """True when the value is built only from literals -- no param, call, attr,
47
+ free name, or unmodelled expression contributes."""
48
+ return not (self.params or self.calls or self.attrs or self.globals_ or self.unknown)
49
+
50
+
51
+ _LITERAL = Origin(has_literal=True)
52
+ _UNKNOWN = Origin(unknown=True)
53
+
54
+
55
+ def parse_function(source: str, name: str) -> ast.FunctionDef | ast.AsyncFunctionDef | None:
56
+ """Find the first function named ``name`` in ``source`` (any nesting)."""
57
+ try:
58
+ tree = ast.parse(source)
59
+ except SyntaxError:
60
+ return None
61
+ for node in ast.walk(tree):
62
+ if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)) and node.name == name:
63
+ return node
64
+ return None
65
+
66
+
67
+ def find_calls(scope: ast.AST, callee: str) -> list[ast.Call]:
68
+ """Every call within ``scope`` whose function is named ``callee`` -- matching a
69
+ bare name (``open``) or the final attribute (``execute`` in ``cur.execute``)."""
70
+ return [node for node in ast.walk(scope) if isinstance(node, ast.Call) and _call_name(node) == callee]
71
+
72
+
73
+ def trace_value(func: ast.FunctionDef | ast.AsyncFunctionDef, expr: ast.AST) -> Origin:
74
+ """Trace where ``expr`` (an expression inside ``func``) gets its value from."""
75
+ return _classify(expr, _params(func), _assignments(func), frozenset())
76
+
77
+
78
+ def parameters(func: ast.FunctionDef | ast.AsyncFunctionDef) -> set[str]:
79
+ """The parameter names of ``func`` (positional, keyword, *args, **kwargs)."""
80
+ return _params(func)
81
+
82
+
83
+ def assignments(func: ast.FunctionDef | ast.AsyncFunctionDef) -> dict[str, list[ast.AST]]:
84
+ """Map each assigned local name to the right-hand sides it is assigned (union)."""
85
+ return _assignments(func)
86
+
87
+
88
+ def callee(call: ast.Call) -> tuple[str | None, str | None]:
89
+ """The (dotted, bare) callee of a call: ("json.loads", "loads") or ("open", "open")."""
90
+ return _dotted(call.func), _call_name(call)
91
+
92
+
93
+ def access_path(node: ast.AST) -> str | None:
94
+ """Dotted access chain with subscripts collapsed: request.args["x"] -> request.args."""
95
+ return _dotted(node)
96
+
97
+
98
+ def access_root(node: ast.AST) -> str | None:
99
+ """Leftmost name of an attribute/subscript chain: request.args["x"] -> request."""
100
+ return _root_name(node)
101
+
102
+
103
+ def _classify(expr: ast.AST, params: set[str], assigns: dict[str, list[ast.AST]], seen: frozenset[str]) -> Origin:
104
+ if isinstance(expr, ast.Constant):
105
+ return _LITERAL
106
+ if isinstance(expr, ast.JoinedStr): # f-string: literal parts + interpolated exprs
107
+ origin = _LITERAL
108
+ for value in expr.values:
109
+ if isinstance(value, ast.FormattedValue):
110
+ origin = origin.merge(_classify(value.value, params, assigns, seen))
111
+ return origin
112
+ if isinstance(expr, ast.BinOp):
113
+ return _classify(expr.left, params, assigns, seen).merge(_classify(expr.right, params, assigns, seen))
114
+ if isinstance(expr, (ast.BoolOp,)):
115
+ return _merge_all(expr.values, params, assigns, seen)
116
+ if isinstance(expr, ast.IfExp): # value is one branch or the other; the test does not flow in
117
+ return _classify(expr.body, params, assigns, seen).merge(_classify(expr.orelse, params, assigns, seen))
118
+ if isinstance(expr, (ast.List, ast.Tuple, ast.Set)):
119
+ return _merge_all(expr.elts, params, assigns, seen)
120
+ if isinstance(expr, ast.Call):
121
+ # the return value's taint depends on the callee's semantics, which P1-03
122
+ # decides with the sanitizer/propagator catalog; here we just name the callee.
123
+ name = _call_name(expr)
124
+ return Origin(calls=frozenset({name})) if name else _UNKNOWN
125
+ if isinstance(expr, (ast.Attribute, ast.Subscript)):
126
+ dotted = _dotted(expr)
127
+ origin = Origin(attrs=frozenset({dotted})) if dotted else _UNKNOWN
128
+ root = _root_name(expr)
129
+ if root in params: # e.g. request.args[...] where `request` is a parameter
130
+ origin = origin.merge(Origin(params=frozenset({root})))
131
+ return origin
132
+ if isinstance(expr, ast.Name):
133
+ if expr.id in seen: # assignment cycle -- stop
134
+ return Origin()
135
+ if expr.id in params:
136
+ return Origin(params=frozenset({expr.id}))
137
+ if expr.id in assigns:
138
+ return _merge_all(assigns[expr.id], params, assigns, seen | {expr.id})
139
+ return Origin(globals_=frozenset({expr.id})) # module global, import, or builtin
140
+ return _UNKNOWN
141
+
142
+
143
+ def _merge_all(exprs: list[ast.AST], params, assigns, seen) -> Origin:
144
+ origin = Origin()
145
+ for e in exprs:
146
+ origin = origin.merge(_classify(e, params, assigns, seen))
147
+ return origin
148
+
149
+
150
+ def _params(func: ast.AST) -> set[str]:
151
+ a = getattr(func, "args", None)
152
+ if a is None: # a module-level scope has no parameters
153
+ return set()
154
+ names = {arg.arg for arg in (*a.posonlyargs, *a.args, *a.kwonlyargs)}
155
+ if a.vararg:
156
+ names.add(a.vararg.arg)
157
+ if a.kwarg:
158
+ names.add(a.kwarg.arg)
159
+ return names
160
+
161
+
162
+ def _assignments(func: ast.FunctionDef | ast.AsyncFunctionDef) -> dict[str, list[ast.AST]]:
163
+ out: dict[str, list[ast.AST]] = {}
164
+ for node in ast.walk(func):
165
+ if isinstance(node, ast.Assign):
166
+ for target in node.targets:
167
+ for name in _target_names(target):
168
+ out.setdefault(name, []).append(node.value)
169
+ elif isinstance(node, ast.AnnAssign) and node.value is not None and isinstance(node.target, ast.Name):
170
+ out.setdefault(node.target.id, []).append(node.value)
171
+ elif isinstance(node, ast.AugAssign) and isinstance(node.target, ast.Name):
172
+ out.setdefault(node.target.id, []).append(node.value)
173
+ return out
174
+
175
+
176
+ def _target_names(target: ast.AST) -> list[str]:
177
+ if isinstance(target, ast.Name):
178
+ return [target.id]
179
+ if isinstance(target, (ast.Tuple, ast.List)):
180
+ return [name for elt in target.elts for name in _target_names(elt)]
181
+ return []
182
+
183
+
184
+ def _call_name(call: ast.Call) -> str | None:
185
+ func = call.func
186
+ if isinstance(func, ast.Name):
187
+ return func.id
188
+ if isinstance(func, ast.Attribute):
189
+ return func.attr
190
+ return None
191
+
192
+
193
+ def _dotted(node: ast.AST) -> str | None:
194
+ """Dotted access chain, with subscripts collapsed: request.args["x"] -> request.args."""
195
+ if isinstance(node, ast.Name):
196
+ return node.id
197
+ if isinstance(node, ast.Attribute):
198
+ base = _dotted(node.value)
199
+ return f"{base}.{node.attr}" if base else None
200
+ if isinstance(node, ast.Subscript):
201
+ return _dotted(node.value)
202
+ return None
203
+
204
+
205
+ def _root_name(node: ast.AST) -> str | None:
206
+ while isinstance(node, (ast.Attribute, ast.Subscript)):
207
+ node = node.value
208
+ return node.id if isinstance(node, ast.Name) else None
@@ -0,0 +1,273 @@
1
+ """Taint classification (P1-03): turn provenance into a taint verdict.
2
+
3
+ Walks a value expression like P1-01's tracer, but consults the taint vocabulary
4
+ (P1-02) at every call and access: a known source makes a value EXTERNAL, a known
5
+ sanitizer makes it SANITIZED (taint stops), a propagator carries taint through to
6
+ the result, and a trusted origin or a literal is clean.
7
+
8
+ The point is to let a later layer (P1-04) downgrade a taint finding only when the
9
+ value is *provably* not attacker-controlled -- ``classification in SAFE`` -- so
10
+ recall is preserved: anything uncertain is UNKNOWN or PARAM, never quietly safe.
11
+
12
+ Two documented precision leans: a bare module-global name (e.g. ``STATIC_DIR``)
13
+ is treated as TRUSTED (module-level names are conventionally constants), and an
14
+ unknown attribute access (e.g. ``self.x``) is UNKNOWN rather than safe. These are
15
+ revisited against real repositories in P1-05.
16
+
17
+ This layer is intra-procedural: a value that depends on a parameter returns
18
+ PARAM, for the cross-file caller hop (next) to resolve.
19
+ """
20
+
21
+ from __future__ import annotations
22
+
23
+ import ast
24
+ from dataclasses import dataclass
25
+ from enum import Enum
26
+ from pathlib import Path
27
+
28
+ import yaml
29
+
30
+ from codejury.analysis.provenance import (
31
+ access_path,
32
+ access_root,
33
+ assignments,
34
+ callee,
35
+ find_calls,
36
+ parameters,
37
+ )
38
+ from codejury.resources import TAINT_FILE
39
+
40
+
41
+ class Taint(str, Enum):
42
+ EXTERNAL = "external" # derives from an attacker source, not sanitized
43
+ UNKNOWN = "unknown" # an unknown call / access -- cannot prove either way
44
+ PARAM = "param" # depends on a parameter; resolve at the call site (cross-file)
45
+ SANITIZED = "sanitized" # had an external component, but a sanitizer neutralized it
46
+ TRUSTED = "trusted" # operator/config/global origin
47
+ CONSTANT = "constant" # built only from literals
48
+
49
+
50
+ # Provably-not-attacker-controlled: the only classes P1-04 may downgrade on.
51
+ SAFE = frozenset({Taint.CONSTANT, Taint.SANITIZED, Taint.TRUSTED})
52
+
53
+ # Ranked for combining a composite value: the most dangerous component wins.
54
+ _RANK = {
55
+ Taint.EXTERNAL: 6,
56
+ Taint.UNKNOWN: 5,
57
+ Taint.PARAM: 4,
58
+ Taint.SANITIZED: 3,
59
+ Taint.TRUSTED: 2,
60
+ Taint.CONSTANT: 1,
61
+ }
62
+
63
+
64
+ @dataclass(frozen=True)
65
+ class TaintVocab:
66
+ sources: tuple[str, ...]
67
+ trusted: tuple[str, ...]
68
+ sanitizers: tuple[str, ...]
69
+ safe_sinks: tuple[str, ...]
70
+ propagators: tuple[str, ...]
71
+
72
+ @classmethod
73
+ def from_dict(cls, data: dict) -> TaintVocab:
74
+ def match(section: str) -> tuple[str, ...]:
75
+ return tuple(m for e in data.get(section, []) for m in e.get("match", []))
76
+
77
+ def calls(section: str) -> tuple[str, ...]:
78
+ return tuple(c for e in data.get(section, []) for c in e.get("calls", []))
79
+
80
+ return cls(
81
+ sources=match("sources"),
82
+ trusted=match("trusted"),
83
+ sanitizers=calls("sanitizers"),
84
+ safe_sinks=calls("safe_sinks"),
85
+ propagators=calls("propagators"),
86
+ )
87
+
88
+
89
+ def load_vocab(path: str | Path = TAINT_FILE) -> TaintVocab:
90
+ with open(path, encoding="utf-8") as f:
91
+ return TaintVocab.from_dict(yaml.safe_load(f) or {})
92
+
93
+
94
+ def is_safe_sink(call: ast.Call, vocab: TaintVocab) -> bool:
95
+ """True if the call itself is a safe parser (json.loads, ast.literal_eval, ...)."""
96
+ return _callee_in(call, vocab.safe_sinks)
97
+
98
+
99
+ def taint_of(
100
+ func: ast.AST,
101
+ expr: ast.AST,
102
+ vocab: TaintVocab,
103
+ *,
104
+ resolve_param=None,
105
+ ) -> Taint:
106
+ """Classify the taint of ``expr`` within ``func`` using the vocabulary.
107
+
108
+ ``resolve_param`` is an optional ``(name) -> Taint`` callback; when given, a
109
+ value that reaches a parameter is resolved through it (the cross-file caller
110
+ hop, P1-03b) instead of returning PARAM.
111
+ """
112
+ return _walk(func, expr, vocab, assignments(func), parameters(func), frozenset(), resolve_param)
113
+
114
+
115
+ def _walk(func, expr, vocab, assigns, params, seen, resolve) -> Taint:
116
+ def w(e):
117
+ return _walk(func, e, vocab, assigns, params, seen, resolve)
118
+
119
+ if isinstance(expr, ast.Constant):
120
+ return Taint.CONSTANT
121
+ if isinstance(expr, ast.JoinedStr):
122
+ return _combine([Taint.CONSTANT] + [w(v.value) for v in expr.values if isinstance(v, ast.FormattedValue)])
123
+ if isinstance(expr, ast.BinOp):
124
+ return _combine([w(expr.left), w(expr.right)])
125
+ if isinstance(expr, ast.BoolOp):
126
+ return _combine([w(v) for v in expr.values])
127
+ if isinstance(expr, ast.IfExp):
128
+ return _combine([w(expr.body), w(expr.orelse)])
129
+ if isinstance(expr, (ast.List, ast.Tuple, ast.Set)):
130
+ return _combine([w(e) for e in expr.elts] or [Taint.CONSTANT])
131
+ if isinstance(expr, ast.Call):
132
+ if _callee_in(expr, vocab.sanitizers):
133
+ return Taint.SANITIZED # a sanitizer cleans its result regardless of input
134
+ if _callee_in(expr, vocab.sources):
135
+ return Taint.EXTERNAL # e.g. input()
136
+ if _callee_in(expr, vocab.propagators) or _callee_in(expr, vocab.safe_sinks):
137
+ return _combine([w(a) for a in expr.args] or [Taint.CONSTANT])
138
+ return Taint.UNKNOWN # unknown call -- a cross-file hop may resolve it later
139
+ if isinstance(expr, (ast.Attribute, ast.Subscript)):
140
+ path = access_path(expr)
141
+ if path and _access_in(path, vocab.sources):
142
+ return Taint.EXTERNAL
143
+ if path and _access_in(path, vocab.trusted):
144
+ return Taint.TRUSTED
145
+ root = access_root(expr)
146
+ if root in params: # attribute of a parameter -- resolve at call site
147
+ return resolve(root) if resolve else Taint.PARAM
148
+ return Taint.UNKNOWN # unknown object attribute (e.g. self.x): not provably safe
149
+ if isinstance(expr, ast.Name):
150
+ if expr.id in seen:
151
+ return Taint.CONSTANT # assignment cycle: no new information
152
+ if expr.id in params:
153
+ return resolve(expr.id) if resolve else Taint.PARAM
154
+ if expr.id in assigns:
155
+ return _combine([_walk(func, r, vocab, assigns, params, seen | {expr.id}, resolve)
156
+ for r in assigns[expr.id]])
157
+ return Taint.TRUSTED # free module global / builtin -- conventionally a constant
158
+ return Taint.UNKNOWN
159
+
160
+
161
+ def _combine(taints: list[Taint]) -> Taint:
162
+ return max(taints, key=lambda t: _RANK[t])
163
+
164
+
165
+ def _callee_in(call: ast.Call, names: tuple[str, ...]) -> bool:
166
+ dotted, bare = callee(call)
167
+ for name in names:
168
+ if "." in name:
169
+ if dotted is not None and (dotted == name or dotted.endswith("." + name)):
170
+ return True
171
+ elif bare == name:
172
+ return True
173
+ return False
174
+
175
+
176
+ def _access_in(path: str, prefixes: tuple[str, ...]) -> bool:
177
+ # "request.args" matches the source "request.args" and also "request.args.get"
178
+ return any(path == p or path.startswith(p + ".") for p in prefixes)
179
+
180
+
181
+ # --- cross-file one-hop resolution (P1-03b) ---------------------------------
182
+
183
+ def taint_in_repo(
184
+ func: ast.FunctionDef | ast.AsyncFunctionDef,
185
+ expr: ast.AST,
186
+ vocab: TaintVocab,
187
+ files: dict[str, str],
188
+ ) -> Taint:
189
+ """Classify ``expr`` in ``func``, resolving a value that reaches a parameter by
190
+ looking one hop up at how ``func`` is called across ``files``.
191
+
192
+ Combines all call sites: if any caller passes an attacker-controlled value the
193
+ result is EXTERNAL; if every caller passes a sanitized/constant/trusted value it
194
+ is safe. With no caller found, the parameter stays UNKNOWN (not assumed safe).
195
+ """
196
+ return taint_of(func, expr, vocab, resolve_param=_caller_resolver(func, files, vocab))
197
+
198
+
199
+ def _caller_resolver(func, files, vocab):
200
+ def resolve(param_name: str) -> Taint:
201
+ index = _param_index(func, param_name)
202
+ results = []
203
+ for scope, call in _call_sites(func.name, files):
204
+ arg = _arg_for_param(call, index, param_name)
205
+ # one hop only: classify the caller's argument without recursing further
206
+ results.append(taint_of(scope, arg, vocab) if arg is not None else Taint.UNKNOWN)
207
+ return _combine(results) if results else Taint.UNKNOWN
208
+ return resolve
209
+
210
+
211
+ def _param_index(func, name: str) -> int | None:
212
+ positional = [*func.args.posonlyargs, *func.args.args]
213
+ for i, arg in enumerate(positional):
214
+ if arg.arg == name:
215
+ return i
216
+ return None # keyword-only or *args: matched by keyword at the call site instead
217
+
218
+
219
+ def _arg_for_param(call: ast.Call, index: int | None, name: str) -> ast.AST | None:
220
+ if index is not None and index < len(call.args):
221
+ return call.args[index]
222
+ for kw in call.keywords:
223
+ if kw.arg == name:
224
+ return kw.value
225
+ return None
226
+
227
+
228
+ def _call_sites(name: str, files: dict[str, str]) -> list[tuple[ast.AST, ast.Call]]:
229
+ sites = []
230
+ for source in files.values():
231
+ try:
232
+ tree = ast.parse(source)
233
+ except SyntaxError:
234
+ continue
235
+ funcs = [n for n in ast.walk(tree) if isinstance(n, (ast.FunctionDef, ast.AsyncFunctionDef))]
236
+ for call in find_calls(tree, name):
237
+ sites.append((_enclosing_scope(funcs, call) or tree, call))
238
+ return sites
239
+
240
+
241
+ def _enclosing_scope(funcs: list[ast.AST], call: ast.Call) -> ast.AST | None:
242
+ containing = [f for f in funcs if any(node is call for node in ast.walk(f))]
243
+ if not containing:
244
+ return None # module-level call site
245
+ return min(containing, key=lambda f: sum(1 for _ in ast.walk(f))) # innermost
246
+
247
+
248
+ def worst_sink_taint(content: str, files: dict[str, str], vocab: TaintVocab) -> Taint | None:
249
+ """The most dangerous taint reaching any potential sink call in ``content``.
250
+
251
+ A "potential sink" is any call that is not a safe sink, sanitizer, or
252
+ propagator (those are not where injection happens). Each such call's argument
253
+ taint is classified with the cross-file resolver, and the worst is returned.
254
+ ``Taint.CONSTANT`` when there is no sink to worry about; ``None`` when the
255
+ code does not parse (the caller should then not act).
256
+
257
+ Used by the taint gate to downgrade an input_validation finding only when the
258
+ whole artifact is provably clean -- so a single tainted sink keeps every
259
+ finding (recall preserved).
260
+ """
261
+ try:
262
+ tree = ast.parse(content)
263
+ except SyntaxError:
264
+ return None
265
+ funcs = [n for n in ast.walk(tree) if isinstance(n, (ast.FunctionDef, ast.AsyncFunctionDef))]
266
+ taints: list[Taint] = []
267
+ for call in [n for n in ast.walk(tree) if isinstance(n, ast.Call)]:
268
+ if is_safe_sink(call, vocab) or _callee_in(call, vocab.sanitizers) or _callee_in(call, vocab.propagators):
269
+ continue # not a place an injection lands
270
+ scope = _enclosing_scope(funcs, call) or tree
271
+ for arg in (*call.args, *(kw.value for kw in call.keywords)):
272
+ taints.append(taint_in_repo(scope, arg, vocab, files))
273
+ return _combine(taints) if taints else Taint.CONSTANT