codejury 0.6.0__tar.gz → 0.7.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (160) hide show
  1. {codejury-0.6.0 → codejury-0.7.0}/PKG-INFO +8 -2
  2. {codejury-0.6.0 → codejury-0.7.0}/README.md +7 -1
  3. codejury-0.7.0/codejury/analysis/__init__.py +7 -0
  4. codejury-0.7.0/codejury/analysis/provenance.py +208 -0
  5. codejury-0.7.0/codejury/analysis/taint.py +273 -0
  6. {codejury-0.6.0 → codejury-0.7.0}/codejury/assembly.py +4 -1
  7. {codejury-0.6.0 → codejury-0.7.0}/codejury/cli.py +2 -0
  8. codejury-0.7.0/codejury/data/taint.yaml +109 -0
  9. {codejury-0.6.0 → codejury-0.7.0}/codejury/evaluation.py +10 -3
  10. codejury-0.7.0/codejury/orchestrators/taint_gate.py +75 -0
  11. {codejury-0.6.0 → codejury-0.7.0}/codejury/resources.py +1 -0
  12. {codejury-0.6.0 → codejury-0.7.0}/codejury.egg-info/PKG-INFO +8 -2
  13. {codejury-0.6.0 → codejury-0.7.0}/codejury.egg-info/SOURCES.txt +9 -0
  14. {codejury-0.6.0 → codejury-0.7.0}/pyproject.toml +1 -1
  15. {codejury-0.6.0 → codejury-0.7.0}/tests/test_evaluation.py +14 -0
  16. codejury-0.7.0/tests/test_provenance.py +105 -0
  17. codejury-0.7.0/tests/test_taint.py +97 -0
  18. codejury-0.7.0/tests/test_taint_crossfile.py +68 -0
  19. codejury-0.7.0/tests/test_taint_gate.py +89 -0
  20. {codejury-0.6.0 → codejury-0.7.0}/LICENSE +0 -0
  21. {codejury-0.6.0 → codejury-0.7.0}/codejury/__init__.py +0 -0
  22. {codejury-0.6.0 → codejury-0.7.0}/codejury/agents/__init__.py +0 -0
  23. {codejury-0.6.0 → codejury-0.7.0}/codejury/agents/base.py +0 -0
  24. {codejury-0.6.0 → codejury-0.7.0}/codejury/agents/debate.py +0 -0
  25. {codejury-0.6.0 → codejury-0.7.0}/codejury/agents/mock.py +0 -0
  26. {codejury-0.6.0 → codejury-0.7.0}/codejury/agents/parsing.py +0 -0
  27. {codejury-0.6.0 → codejury-0.7.0}/codejury/agents/refuter.py +0 -0
  28. {codejury-0.6.0 → codejury-0.7.0}/codejury/agents/verifier.py +0 -0
  29. {codejury-0.6.0 → codejury-0.7.0}/codejury/data/capabilities/authentication.yaml +0 -0
  30. {codejury-0.6.0 → codejury-0.7.0}/codejury/data/capabilities/authorization.yaml +0 -0
  31. {codejury-0.6.0 → codejury-0.7.0}/codejury/data/capabilities/business_logic.yaml +0 -0
  32. {codejury-0.6.0 → codejury-0.7.0}/codejury/data/capabilities/crypto.yaml +0 -0
  33. {codejury-0.6.0 → codejury-0.7.0}/codejury/data/capabilities/data_protection.yaml +0 -0
  34. {codejury-0.6.0 → codejury-0.7.0}/codejury/data/capabilities/dependency_config.yaml +0 -0
  35. {codejury-0.6.0 → codejury-0.7.0}/codejury/data/capabilities/error_logging.yaml +0 -0
  36. {codejury-0.6.0 → codejury-0.7.0}/codejury/data/capabilities/input_validation.yaml +0 -0
  37. {codejury-0.6.0 → codejury-0.7.0}/codejury/data/capabilities/output_encoding.yaml +0 -0
  38. {codejury-0.6.0 → codejury-0.7.0}/codejury/data/capabilities/secrets.yaml +0 -0
  39. {codejury-0.6.0 → codejury-0.7.0}/codejury/data/capabilities/session.yaml +0 -0
  40. {codejury-0.6.0 → codejury-0.7.0}/codejury/data/golden/authn_bcrypt_password.yaml +0 -0
  41. {codejury-0.6.0 → codejury-0.7.0}/codejury/data/golden/authn_jwt_noverify_vuln.yaml +0 -0
  42. {codejury-0.6.0 → codejury-0.7.0}/codejury/data/golden/authn_jwt_verified_safe.yaml +0 -0
  43. {codejury-0.6.0 → codejury-0.7.0}/codejury/data/golden/authn_sha256_checksum_safe.yaml +0 -0
  44. {codejury-0.6.0 → codejury-0.7.0}/codejury/data/golden/authn_sha256_password.yaml +0 -0
  45. {codejury-0.6.0 → codejury-0.7.0}/codejury/data/golden/authn_weak_hash_indirect_vuln.yaml +0 -0
  46. {codejury-0.6.0 → codejury-0.7.0}/codejury/data/golden/authz_idor_vuln.yaml +0 -0
  47. {codejury-0.6.0 → codejury-0.7.0}/codejury/data/golden/authz_owner_safe.yaml +0 -0
  48. {codejury-0.6.0 → codejury-0.7.0}/codejury/data/golden/business_logic_price_tamper_vuln.yaml +0 -0
  49. {codejury-0.6.0 → codejury-0.7.0}/codejury/data/golden/business_logic_server_checked_safe.yaml +0 -0
  50. {codejury-0.6.0 → codejury-0.7.0}/codejury/data/golden/cmdi_fixed_argv_safe.yaml +0 -0
  51. {codejury-0.6.0 → codejury-0.7.0}/codejury/data/golden/cmdi_ossystem_vuln.yaml +0 -0
  52. {codejury-0.6.0 → codejury-0.7.0}/codejury/data/golden/cmdi_subprocess_safe.yaml +0 -0
  53. {codejury-0.6.0 → codejury-0.7.0}/codejury/data/golden/crypto_aesgcm_safe.yaml +0 -0
  54. {codejury-0.6.0 → codejury-0.7.0}/codejury/data/golden/crypto_ecb_vuln.yaml +0 -0
  55. {codejury-0.6.0 → codejury-0.7.0}/codejury/data/golden/data_protection_plaintext_pii_vuln.yaml +0 -0
  56. {codejury-0.6.0 → codejury-0.7.0}/codejury/data/golden/data_protection_tokenized_safe.yaml +0 -0
  57. {codejury-0.6.0 → codejury-0.7.0}/codejury/data/golden/dependency_config_tls_verify_off_vuln.yaml +0 -0
  58. {codejury-0.6.0 → codejury-0.7.0}/codejury/data/golden/dependency_config_tls_verify_on_safe.yaml +0 -0
  59. {codejury-0.6.0 → codejury-0.7.0}/codejury/data/golden/deserialize_json_safe.yaml +0 -0
  60. {codejury-0.6.0 → codejury-0.7.0}/codejury/data/golden/deserialize_pickle_vuln.yaml +0 -0
  61. {codejury-0.6.0 → codejury-0.7.0}/codejury/data/golden/error_logging_redacted_safe.yaml +0 -0
  62. {codejury-0.6.0 → codejury-0.7.0}/codejury/data/golden/error_logging_secret_leak_vuln.yaml +0 -0
  63. {codejury-0.6.0 → codejury-0.7.0}/codejury/data/golden/literal_eval_safe.yaml +0 -0
  64. {codejury-0.6.0 → codejury-0.7.0}/codejury/data/golden/path_basename_safe.yaml +0 -0
  65. {codejury-0.6.0 → codejury-0.7.0}/codejury/data/golden/path_contained_safe.yaml +0 -0
  66. {codejury-0.6.0 → codejury-0.7.0}/codejury/data/golden/path_traversal_vuln.yaml +0 -0
  67. {codejury-0.6.0 → codejury-0.7.0}/codejury/data/golden/secrets_env_safe.yaml +0 -0
  68. {codejury-0.6.0 → codejury-0.7.0}/codejury/data/golden/secrets_hardcoded_vuln.yaml +0 -0
  69. {codejury-0.6.0 → codejury-0.7.0}/codejury/data/golden/session_fixation_vuln.yaml +0 -0
  70. {codejury-0.6.0 → codejury-0.7.0}/codejury/data/golden/session_secure_cookie_safe.yaml +0 -0
  71. {codejury-0.6.0 → codejury-0.7.0}/codejury/data/golden/sql_constant_concat_safe.yaml +0 -0
  72. {codejury-0.6.0 → codejury-0.7.0}/codejury/data/golden/sqli_format_vuln.yaml +0 -0
  73. {codejury-0.6.0 → codejury-0.7.0}/codejury/data/golden/sqli_fstring_query.yaml +0 -0
  74. {codejury-0.6.0 → codejury-0.7.0}/codejury/data/golden/sqli_indirect_var_vuln.yaml +0 -0
  75. {codejury-0.6.0 → codejury-0.7.0}/codejury/data/golden/sqli_parameterized_query.yaml +0 -0
  76. {codejury-0.6.0 → codejury-0.7.0}/codejury/data/golden/ssrf_allowlist_safe.yaml +0 -0
  77. {codejury-0.6.0 → codejury-0.7.0}/codejury/data/golden/ssrf_constant_url_safe.yaml +0 -0
  78. {codejury-0.6.0 → codejury-0.7.0}/codejury/data/golden/ssrf_substring_allowlist_bypass_vuln.yaml +0 -0
  79. {codejury-0.6.0 → codejury-0.7.0}/codejury/data/golden/ssrf_user_url_vuln.yaml +0 -0
  80. {codejury-0.6.0 → codejury-0.7.0}/codejury/data/golden/xfile_idor_no_check_vuln.yaml +0 -0
  81. {codejury-0.6.0 → codejury-0.7.0}/codejury/data/golden/xfile_idor_owner_checked_safe.yaml +0 -0
  82. {codejury-0.6.0 → codejury-0.7.0}/codejury/data/golden/xfile_path_sanitized_safe.yaml +0 -0
  83. {codejury-0.6.0 → codejury-0.7.0}/codejury/data/golden/xfile_path_tainted_vuln.yaml +0 -0
  84. {codejury-0.6.0 → codejury-0.7.0}/codejury/data/golden/xss_innerhtml_constant_safe.yaml +0 -0
  85. {codejury-0.6.0 → codejury-0.7.0}/codejury/data/golden/xss_innerhtml_vuln.yaml +0 -0
  86. {codejury-0.6.0 → codejury-0.7.0}/codejury/data/golden/xss_textcontent_safe.yaml +0 -0
  87. {codejury-0.6.0 → codejury-0.7.0}/codejury/data/suppressions.yaml +0 -0
  88. {codejury-0.6.0 → codejury-0.7.0}/codejury/data/tasks/audit_diff_debate.yaml +0 -0
  89. {codejury-0.6.0 → codejury-0.7.0}/codejury/data/tasks/quick_scan_single.yaml +0 -0
  90. {codejury-0.6.0 → codejury-0.7.0}/codejury/domain/__init__.py +0 -0
  91. {codejury-0.6.0 → codejury-0.7.0}/codejury/domain/artifact.py +0 -0
  92. {codejury-0.6.0 → codejury-0.7.0}/codejury/domain/capability.py +0 -0
  93. {codejury-0.6.0 → codejury-0.7.0}/codejury/domain/context.py +0 -0
  94. {codejury-0.6.0 → codejury-0.7.0}/codejury/domain/observation.py +0 -0
  95. {codejury-0.6.0 → codejury-0.7.0}/codejury/domain/result.py +0 -0
  96. {codejury-0.6.0 → codejury-0.7.0}/codejury/infrastructure/__init__.py +0 -0
  97. {codejury-0.6.0 → codejury-0.7.0}/codejury/infrastructure/cache.py +0 -0
  98. {codejury-0.6.0 → codejury-0.7.0}/codejury/infrastructure/json_parse.py +0 -0
  99. {codejury-0.6.0 → codejury-0.7.0}/codejury/integrations/__init__.py +0 -0
  100. {codejury-0.6.0 → codejury-0.7.0}/codejury/integrations/github.py +0 -0
  101. {codejury-0.6.0 → codejury-0.7.0}/codejury/orchestrators/__init__.py +0 -0
  102. {codejury-0.6.0 → codejury-0.7.0}/codejury/orchestrators/base.py +0 -0
  103. {codejury-0.6.0 → codejury-0.7.0}/codejury/orchestrators/challenge.py +0 -0
  104. {codejury-0.6.0 → codejury-0.7.0}/codejury/orchestrators/debate.py +0 -0
  105. {codejury-0.6.0 → codejury-0.7.0}/codejury/orchestrators/pipeline.py +0 -0
  106. {codejury-0.6.0 → codejury-0.7.0}/codejury/orchestrators/reflexion.py +0 -0
  107. {codejury-0.6.0 → codejury-0.7.0}/codejury/orchestrators/single.py +0 -0
  108. {codejury-0.6.0 → codejury-0.7.0}/codejury/providers/__init__.py +0 -0
  109. {codejury-0.6.0 → codejury-0.7.0}/codejury/providers/anthropic.py +0 -0
  110. {codejury-0.6.0 → codejury-0.7.0}/codejury/providers/base.py +0 -0
  111. {codejury-0.6.0 → codejury-0.7.0}/codejury/providers/litellm.py +0 -0
  112. {codejury-0.6.0 → codejury-0.7.0}/codejury/providers/mock.py +0 -0
  113. {codejury-0.6.0 → codejury-0.7.0}/codejury/providers/openai.py +0 -0
  114. {codejury-0.6.0 → codejury-0.7.0}/codejury/providers/openai_format.py +0 -0
  115. {codejury-0.6.0 → codejury-0.7.0}/codejury/providers/retry.py +0 -0
  116. {codejury-0.6.0 → codejury-0.7.0}/codejury/reporting.py +0 -0
  117. {codejury-0.6.0 → codejury-0.7.0}/codejury/sources/__init__.py +0 -0
  118. {codejury-0.6.0 → codejury-0.7.0}/codejury/sources/base.py +0 -0
  119. {codejury-0.6.0 → codejury-0.7.0}/codejury/sources/callers.py +0 -0
  120. {codejury-0.6.0 → codejury-0.7.0}/codejury/sources/chunker.py +0 -0
  121. {codejury-0.6.0 → codejury-0.7.0}/codejury/sources/diff.py +0 -0
  122. {codejury-0.6.0 → codejury-0.7.0}/codejury/sources/function.py +0 -0
  123. {codejury-0.6.0 → codejury-0.7.0}/codejury/sources/mock.py +0 -0
  124. {codejury-0.6.0 → codejury-0.7.0}/codejury/sources/repo.py +0 -0
  125. {codejury-0.6.0 → codejury-0.7.0}/codejury/suppression.py +0 -0
  126. {codejury-0.6.0 → codejury-0.7.0}/codejury/tasks/__init__.py +0 -0
  127. {codejury-0.6.0 → codejury-0.7.0}/codejury/tasks/base.py +0 -0
  128. {codejury-0.6.0 → codejury-0.7.0}/codejury/tasks/registry.py +0 -0
  129. {codejury-0.6.0 → codejury-0.7.0}/codejury.egg-info/dependency_links.txt +0 -0
  130. {codejury-0.6.0 → codejury-0.7.0}/codejury.egg-info/entry_points.txt +0 -0
  131. {codejury-0.6.0 → codejury-0.7.0}/codejury.egg-info/requires.txt +0 -0
  132. {codejury-0.6.0 → codejury-0.7.0}/codejury.egg-info/top_level.txt +0 -0
  133. {codejury-0.6.0 → codejury-0.7.0}/setup.cfg +0 -0
  134. {codejury-0.6.0 → codejury-0.7.0}/tests/test_anthropic_provider.py +0 -0
  135. {codejury-0.6.0 → codejury-0.7.0}/tests/test_assembly.py +0 -0
  136. {codejury-0.6.0 → codejury-0.7.0}/tests/test_audit_pipeline.py +0 -0
  137. {codejury-0.6.0 → codejury-0.7.0}/tests/test_cache.py +0 -0
  138. {codejury-0.6.0 → codejury-0.7.0}/tests/test_callers.py +0 -0
  139. {codejury-0.6.0 → codejury-0.7.0}/tests/test_capability.py +0 -0
  140. {codejury-0.6.0 → codejury-0.7.0}/tests/test_challenge.py +0 -0
  141. {codejury-0.6.0 → codejury-0.7.0}/tests/test_cli_audit.py +0 -0
  142. {codejury-0.6.0 → codejury-0.7.0}/tests/test_context.py +0 -0
  143. {codejury-0.6.0 → codejury-0.7.0}/tests/test_debate_agents.py +0 -0
  144. {codejury-0.6.0 → codejury-0.7.0}/tests/test_debate_orchestrator.py +0 -0
  145. {codejury-0.6.0 → codejury-0.7.0}/tests/test_diff_source.py +0 -0
  146. {codejury-0.6.0 → codejury-0.7.0}/tests/test_function_source.py +0 -0
  147. {codejury-0.6.0 → codejury-0.7.0}/tests/test_integrations.py +0 -0
  148. {codejury-0.6.0 → codejury-0.7.0}/tests/test_json_parse.py +0 -0
  149. {codejury-0.6.0 → codejury-0.7.0}/tests/test_litellm_provider.py +0 -0
  150. {codejury-0.6.0 → codejury-0.7.0}/tests/test_openai_provider.py +0 -0
  151. {codejury-0.6.0 → codejury-0.7.0}/tests/test_orchestrator.py +0 -0
  152. {codejury-0.6.0 → codejury-0.7.0}/tests/test_pipeline_orchestrator.py +0 -0
  153. {codejury-0.6.0 → codejury-0.7.0}/tests/test_reflexion_orchestrator.py +0 -0
  154. {codejury-0.6.0 → codejury-0.7.0}/tests/test_repo_source.py +0 -0
  155. {codejury-0.6.0 → codejury-0.7.0}/tests/test_reporting.py +0 -0
  156. {codejury-0.6.0 → codejury-0.7.0}/tests/test_retry_provider.py +0 -0
  157. {codejury-0.6.0 → codejury-0.7.0}/tests/test_sarif.py +0 -0
  158. {codejury-0.6.0 → codejury-0.7.0}/tests/test_suppression.py +0 -0
  159. {codejury-0.6.0 → codejury-0.7.0}/tests/test_tasks.py +0 -0
  160. {codejury-0.6.0 → codejury-0.7.0}/tests/test_verifier.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: codejury
3
- Version: 0.6.0
3
+ Version: 0.7.0
4
4
  Summary: General-purpose Application Security AI audit framework -- five-layer architecture, capabilities as first-class data
5
5
  Author: AISecLabs
6
6
  License-Expression: MIT
@@ -83,10 +83,16 @@ git diff | codejury audit --provider anthropic
83
83
  | `codejury run <task>` | Run a named task preset (see [Tasks](#tasks)). |
84
84
  | `codejury eval` | Score the golden cases; report precision / recall / F1, overall and per capability. |
85
85
 
86
- Shared flags: `--orchestrator {single,pipeline,debate,reflexion,challenge}`,
86
+ Shared flags: `--orchestrator {single,pipeline,debate,reflexion,challenge,taint}`,
87
87
  `--provider {anthropic,openai,litellm}`, `--model`,
88
88
  `--format {text,markdown,json,sarif}`.
89
89
 
90
+ `--orchestrator taint` adds a data-flow gate: after the verifier rules, it clears
91
+ an `input_validation` finding only when static provenance analysis proves the
92
+ value reaching the sink is constant, sanitized, or trusted (using cross-file
93
+ caller/callee context). It downgrades only on positive proof, so it removes false
94
+ positives without dropping real findings.
95
+
90
96
  `--format sarif` emits a SARIF 2.1.0 log (validates against the official schema)
91
97
  for CI and security dashboards: each problem with a code location becomes a
92
98
  result carrying its capability (as the rule id), CWE, and a precise location.
@@ -53,10 +53,16 @@ git diff | codejury audit --provider anthropic
53
53
  | `codejury run <task>` | Run a named task preset (see [Tasks](#tasks)). |
54
54
  | `codejury eval` | Score the golden cases; report precision / recall / F1, overall and per capability. |
55
55
 
56
- Shared flags: `--orchestrator {single,pipeline,debate,reflexion,challenge}`,
56
+ Shared flags: `--orchestrator {single,pipeline,debate,reflexion,challenge,taint}`,
57
57
  `--provider {anthropic,openai,litellm}`, `--model`,
58
58
  `--format {text,markdown,json,sarif}`.
59
59
 
60
+ `--orchestrator taint` adds a data-flow gate: after the verifier rules, it clears
61
+ an `input_validation` finding only when static provenance analysis proves the
62
+ value reaching the sink is constant, sanitized, or trusted (using cross-file
63
+ caller/callee context). It downgrades only on positive proof, so it removes false
64
+ positives without dropping real findings.
65
+
60
66
  `--format sarif` emits a SARIF 2.1.0 log (validates against the official schema)
61
67
  for CI and security dashboards: each problem with a code location becomes a
62
68
  result carrying its capability (as the rule id), CWE, and a precise location.
@@ -0,0 +1,7 @@
1
+ """Static analysis for provenance (P1).
2
+
3
+ The code-graph / data-flow engine that gives the verifier provenance -- whether a
4
+ value reaching a sink is attacker-controlled, sanitized, or a trusted constant.
5
+ This is the real fix for the taint precision floor that single-file LLM review
6
+ cannot reach (see ROADMAP P1). Python / AST based to start.
7
+ """
@@ -0,0 +1,208 @@
1
+ """Intra-procedural value-origin tracing (P1-01).
2
+
3
+ Classify where the value of an expression inside a function comes from, so a
4
+ later layer (P1-03) can decide whether it is attacker-controlled. The output is
5
+ an ``Origin``: the parameters, callees, attribute/subscript roots, free names,
6
+ and literals a value derives from.
7
+
8
+ The analysis is deliberately conservative and flow-insensitive: a name assigned
9
+ more than once contributes the union of all its right-hand sides, so a possible
10
+ source is never dropped (recall over precision). A value built only from literals
11
+ is reported as ``is_constant`` -- the signal that distinguishes, for example,
12
+ SQL concatenated from constants (safe) from SQL concatenated from a parameter.
13
+
14
+ This module finds where a value comes from; it does not decide what is a source
15
+ or a sanitizer (that is data, P1-02) nor follow a call into another file (P1-03).
16
+ Python / AST only.
17
+ """
18
+
19
+ from __future__ import annotations
20
+
21
+ import ast
22
+ from dataclasses import dataclass
23
+
24
+
25
+ @dataclass(frozen=True)
26
+ class Origin:
27
+ params: frozenset[str] = frozenset() # parameters the value derives from
28
+ calls: frozenset[str] = frozenset() # callee names whose return it derives from
29
+ attrs: frozenset[str] = frozenset() # attribute/subscript roots, dotted (e.g. "request.args")
30
+ globals_: frozenset[str] = frozenset() # free names: module globals, imports, builtins
31
+ has_literal: bool = False # a literal contributes to the value
32
+ unknown: bool = False # an unmodelled expression contributes (be cautious)
33
+
34
+ def merge(self, other: Origin) -> Origin:
35
+ return Origin(
36
+ params=self.params | other.params,
37
+ calls=self.calls | other.calls,
38
+ attrs=self.attrs | other.attrs,
39
+ globals_=self.globals_ | other.globals_,
40
+ has_literal=self.has_literal or other.has_literal,
41
+ unknown=self.unknown or other.unknown,
42
+ )
43
+
44
+ @property
45
+ def is_constant(self) -> bool:
46
+ """True when the value is built only from literals -- no param, call, attr,
47
+ free name, or unmodelled expression contributes."""
48
+ return not (self.params or self.calls or self.attrs or self.globals_ or self.unknown)
49
+
50
+
51
+ _LITERAL = Origin(has_literal=True)
52
+ _UNKNOWN = Origin(unknown=True)
53
+
54
+
55
+ def parse_function(source: str, name: str) -> ast.FunctionDef | ast.AsyncFunctionDef | None:
56
+ """Find the first function named ``name`` in ``source`` (any nesting)."""
57
+ try:
58
+ tree = ast.parse(source)
59
+ except SyntaxError:
60
+ return None
61
+ for node in ast.walk(tree):
62
+ if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)) and node.name == name:
63
+ return node
64
+ return None
65
+
66
+
67
+ def find_calls(scope: ast.AST, callee: str) -> list[ast.Call]:
68
+ """Every call within ``scope`` whose function is named ``callee`` -- matching a
69
+ bare name (``open``) or the final attribute (``execute`` in ``cur.execute``)."""
70
+ return [node for node in ast.walk(scope) if isinstance(node, ast.Call) and _call_name(node) == callee]
71
+
72
+
73
+ def trace_value(func: ast.FunctionDef | ast.AsyncFunctionDef, expr: ast.AST) -> Origin:
74
+ """Trace where ``expr`` (an expression inside ``func``) gets its value from."""
75
+ return _classify(expr, _params(func), _assignments(func), frozenset())
76
+
77
+
78
+ def parameters(func: ast.FunctionDef | ast.AsyncFunctionDef) -> set[str]:
79
+ """The parameter names of ``func`` (positional, keyword, *args, **kwargs)."""
80
+ return _params(func)
81
+
82
+
83
+ def assignments(func: ast.FunctionDef | ast.AsyncFunctionDef) -> dict[str, list[ast.AST]]:
84
+ """Map each assigned local name to the right-hand sides it is assigned (union)."""
85
+ return _assignments(func)
86
+
87
+
88
+ def callee(call: ast.Call) -> tuple[str | None, str | None]:
89
+ """The (dotted, bare) callee of a call: ("json.loads", "loads") or ("open", "open")."""
90
+ return _dotted(call.func), _call_name(call)
91
+
92
+
93
+ def access_path(node: ast.AST) -> str | None:
94
+ """Dotted access chain with subscripts collapsed: request.args["x"] -> request.args."""
95
+ return _dotted(node)
96
+
97
+
98
+ def access_root(node: ast.AST) -> str | None:
99
+ """Leftmost name of an attribute/subscript chain: request.args["x"] -> request."""
100
+ return _root_name(node)
101
+
102
+
103
+ def _classify(expr: ast.AST, params: set[str], assigns: dict[str, list[ast.AST]], seen: frozenset[str]) -> Origin:
104
+ if isinstance(expr, ast.Constant):
105
+ return _LITERAL
106
+ if isinstance(expr, ast.JoinedStr): # f-string: literal parts + interpolated exprs
107
+ origin = _LITERAL
108
+ for value in expr.values:
109
+ if isinstance(value, ast.FormattedValue):
110
+ origin = origin.merge(_classify(value.value, params, assigns, seen))
111
+ return origin
112
+ if isinstance(expr, ast.BinOp):
113
+ return _classify(expr.left, params, assigns, seen).merge(_classify(expr.right, params, assigns, seen))
114
+ if isinstance(expr, (ast.BoolOp,)):
115
+ return _merge_all(expr.values, params, assigns, seen)
116
+ if isinstance(expr, ast.IfExp): # value is one branch or the other; the test does not flow in
117
+ return _classify(expr.body, params, assigns, seen).merge(_classify(expr.orelse, params, assigns, seen))
118
+ if isinstance(expr, (ast.List, ast.Tuple, ast.Set)):
119
+ return _merge_all(expr.elts, params, assigns, seen)
120
+ if isinstance(expr, ast.Call):
121
+ # the return value's taint depends on the callee's semantics, which P1-03
122
+ # decides with the sanitizer/propagator catalog; here we just name the callee.
123
+ name = _call_name(expr)
124
+ return Origin(calls=frozenset({name})) if name else _UNKNOWN
125
+ if isinstance(expr, (ast.Attribute, ast.Subscript)):
126
+ dotted = _dotted(expr)
127
+ origin = Origin(attrs=frozenset({dotted})) if dotted else _UNKNOWN
128
+ root = _root_name(expr)
129
+ if root in params: # e.g. request.args[...] where `request` is a parameter
130
+ origin = origin.merge(Origin(params=frozenset({root})))
131
+ return origin
132
+ if isinstance(expr, ast.Name):
133
+ if expr.id in seen: # assignment cycle -- stop
134
+ return Origin()
135
+ if expr.id in params:
136
+ return Origin(params=frozenset({expr.id}))
137
+ if expr.id in assigns:
138
+ return _merge_all(assigns[expr.id], params, assigns, seen | {expr.id})
139
+ return Origin(globals_=frozenset({expr.id})) # module global, import, or builtin
140
+ return _UNKNOWN
141
+
142
+
143
+ def _merge_all(exprs: list[ast.AST], params, assigns, seen) -> Origin:
144
+ origin = Origin()
145
+ for e in exprs:
146
+ origin = origin.merge(_classify(e, params, assigns, seen))
147
+ return origin
148
+
149
+
150
+ def _params(func: ast.AST) -> set[str]:
151
+ a = getattr(func, "args", None)
152
+ if a is None: # a module-level scope has no parameters
153
+ return set()
154
+ names = {arg.arg for arg in (*a.posonlyargs, *a.args, *a.kwonlyargs)}
155
+ if a.vararg:
156
+ names.add(a.vararg.arg)
157
+ if a.kwarg:
158
+ names.add(a.kwarg.arg)
159
+ return names
160
+
161
+
162
+ def _assignments(func: ast.FunctionDef | ast.AsyncFunctionDef) -> dict[str, list[ast.AST]]:
163
+ out: dict[str, list[ast.AST]] = {}
164
+ for node in ast.walk(func):
165
+ if isinstance(node, ast.Assign):
166
+ for target in node.targets:
167
+ for name in _target_names(target):
168
+ out.setdefault(name, []).append(node.value)
169
+ elif isinstance(node, ast.AnnAssign) and node.value is not None and isinstance(node.target, ast.Name):
170
+ out.setdefault(node.target.id, []).append(node.value)
171
+ elif isinstance(node, ast.AugAssign) and isinstance(node.target, ast.Name):
172
+ out.setdefault(node.target.id, []).append(node.value)
173
+ return out
174
+
175
+
176
+ def _target_names(target: ast.AST) -> list[str]:
177
+ if isinstance(target, ast.Name):
178
+ return [target.id]
179
+ if isinstance(target, (ast.Tuple, ast.List)):
180
+ return [name for elt in target.elts for name in _target_names(elt)]
181
+ return []
182
+
183
+
184
+ def _call_name(call: ast.Call) -> str | None:
185
+ func = call.func
186
+ if isinstance(func, ast.Name):
187
+ return func.id
188
+ if isinstance(func, ast.Attribute):
189
+ return func.attr
190
+ return None
191
+
192
+
193
+ def _dotted(node: ast.AST) -> str | None:
194
+ """Dotted access chain, with subscripts collapsed: request.args["x"] -> request.args."""
195
+ if isinstance(node, ast.Name):
196
+ return node.id
197
+ if isinstance(node, ast.Attribute):
198
+ base = _dotted(node.value)
199
+ return f"{base}.{node.attr}" if base else None
200
+ if isinstance(node, ast.Subscript):
201
+ return _dotted(node.value)
202
+ return None
203
+
204
+
205
+ def _root_name(node: ast.AST) -> str | None:
206
+ while isinstance(node, (ast.Attribute, ast.Subscript)):
207
+ node = node.value
208
+ return node.id if isinstance(node, ast.Name) else None
@@ -0,0 +1,273 @@
1
+ """Taint classification (P1-03): turn provenance into a taint verdict.
2
+
3
+ Walks a value expression like P1-01's tracer, but consults the taint vocabulary
4
+ (P1-02) at every call and access: a known source makes a value EXTERNAL, a known
5
+ sanitizer makes it SANITIZED (taint stops), a propagator carries taint through to
6
+ the result, and a trusted origin or a literal is clean.
7
+
8
+ The point is to let a later layer (P1-04) downgrade a taint finding only when the
9
+ value is *provably* not attacker-controlled -- ``classification in SAFE`` -- so
10
+ recall is preserved: anything uncertain is UNKNOWN or PARAM, never quietly safe.
11
+
12
+ Two documented precision leans: a bare module-global name (e.g. ``STATIC_DIR``)
13
+ is treated as TRUSTED (module-level names are conventionally constants), and an
14
+ unknown attribute access (e.g. ``self.x``) is UNKNOWN rather than safe. These are
15
+ revisited against real repositories in P1-05.
16
+
17
+ This layer is intra-procedural: a value that depends on a parameter returns
18
+ PARAM, for the cross-file caller hop (next) to resolve.
19
+ """
20
+
21
+ from __future__ import annotations
22
+
23
+ import ast
24
+ from dataclasses import dataclass
25
+ from enum import Enum
26
+ from pathlib import Path
27
+
28
+ import yaml
29
+
30
+ from codejury.analysis.provenance import (
31
+ access_path,
32
+ access_root,
33
+ assignments,
34
+ callee,
35
+ find_calls,
36
+ parameters,
37
+ )
38
+ from codejury.resources import TAINT_FILE
39
+
40
+
41
+ class Taint(str, Enum):
42
+ EXTERNAL = "external" # derives from an attacker source, not sanitized
43
+ UNKNOWN = "unknown" # an unknown call / access -- cannot prove either way
44
+ PARAM = "param" # depends on a parameter; resolve at the call site (cross-file)
45
+ SANITIZED = "sanitized" # had an external component, but a sanitizer neutralized it
46
+ TRUSTED = "trusted" # operator/config/global origin
47
+ CONSTANT = "constant" # built only from literals
48
+
49
+
50
+ # Provably-not-attacker-controlled: the only classes P1-04 may downgrade on.
51
+ SAFE = frozenset({Taint.CONSTANT, Taint.SANITIZED, Taint.TRUSTED})
52
+
53
+ # Ranked for combining a composite value: the most dangerous component wins.
54
+ _RANK = {
55
+ Taint.EXTERNAL: 6,
56
+ Taint.UNKNOWN: 5,
57
+ Taint.PARAM: 4,
58
+ Taint.SANITIZED: 3,
59
+ Taint.TRUSTED: 2,
60
+ Taint.CONSTANT: 1,
61
+ }
62
+
63
+
64
+ @dataclass(frozen=True)
65
+ class TaintVocab:
66
+ sources: tuple[str, ...]
67
+ trusted: tuple[str, ...]
68
+ sanitizers: tuple[str, ...]
69
+ safe_sinks: tuple[str, ...]
70
+ propagators: tuple[str, ...]
71
+
72
+ @classmethod
73
+ def from_dict(cls, data: dict) -> TaintVocab:
74
+ def match(section: str) -> tuple[str, ...]:
75
+ return tuple(m for e in data.get(section, []) for m in e.get("match", []))
76
+
77
+ def calls(section: str) -> tuple[str, ...]:
78
+ return tuple(c for e in data.get(section, []) for c in e.get("calls", []))
79
+
80
+ return cls(
81
+ sources=match("sources"),
82
+ trusted=match("trusted"),
83
+ sanitizers=calls("sanitizers"),
84
+ safe_sinks=calls("safe_sinks"),
85
+ propagators=calls("propagators"),
86
+ )
87
+
88
+
89
+ def load_vocab(path: str | Path = TAINT_FILE) -> TaintVocab:
90
+ with open(path, encoding="utf-8") as f:
91
+ return TaintVocab.from_dict(yaml.safe_load(f) or {})
92
+
93
+
94
+ def is_safe_sink(call: ast.Call, vocab: TaintVocab) -> bool:
95
+ """True if the call itself is a safe parser (json.loads, ast.literal_eval, ...)."""
96
+ return _callee_in(call, vocab.safe_sinks)
97
+
98
+
99
+ def taint_of(
100
+ func: ast.AST,
101
+ expr: ast.AST,
102
+ vocab: TaintVocab,
103
+ *,
104
+ resolve_param=None,
105
+ ) -> Taint:
106
+ """Classify the taint of ``expr`` within ``func`` using the vocabulary.
107
+
108
+ ``resolve_param`` is an optional ``(name) -> Taint`` callback; when given, a
109
+ value that reaches a parameter is resolved through it (the cross-file caller
110
+ hop, P1-03b) instead of returning PARAM.
111
+ """
112
+ return _walk(func, expr, vocab, assignments(func), parameters(func), frozenset(), resolve_param)
113
+
114
+
115
+ def _walk(func, expr, vocab, assigns, params, seen, resolve) -> Taint:
116
+ def w(e):
117
+ return _walk(func, e, vocab, assigns, params, seen, resolve)
118
+
119
+ if isinstance(expr, ast.Constant):
120
+ return Taint.CONSTANT
121
+ if isinstance(expr, ast.JoinedStr):
122
+ return _combine([Taint.CONSTANT] + [w(v.value) for v in expr.values if isinstance(v, ast.FormattedValue)])
123
+ if isinstance(expr, ast.BinOp):
124
+ return _combine([w(expr.left), w(expr.right)])
125
+ if isinstance(expr, ast.BoolOp):
126
+ return _combine([w(v) for v in expr.values])
127
+ if isinstance(expr, ast.IfExp):
128
+ return _combine([w(expr.body), w(expr.orelse)])
129
+ if isinstance(expr, (ast.List, ast.Tuple, ast.Set)):
130
+ return _combine([w(e) for e in expr.elts] or [Taint.CONSTANT])
131
+ if isinstance(expr, ast.Call):
132
+ if _callee_in(expr, vocab.sanitizers):
133
+ return Taint.SANITIZED # a sanitizer cleans its result regardless of input
134
+ if _callee_in(expr, vocab.sources):
135
+ return Taint.EXTERNAL # e.g. input()
136
+ if _callee_in(expr, vocab.propagators) or _callee_in(expr, vocab.safe_sinks):
137
+ return _combine([w(a) for a in expr.args] or [Taint.CONSTANT])
138
+ return Taint.UNKNOWN # unknown call -- a cross-file hop may resolve it later
139
+ if isinstance(expr, (ast.Attribute, ast.Subscript)):
140
+ path = access_path(expr)
141
+ if path and _access_in(path, vocab.sources):
142
+ return Taint.EXTERNAL
143
+ if path and _access_in(path, vocab.trusted):
144
+ return Taint.TRUSTED
145
+ root = access_root(expr)
146
+ if root in params: # attribute of a parameter -- resolve at call site
147
+ return resolve(root) if resolve else Taint.PARAM
148
+ return Taint.UNKNOWN # unknown object attribute (e.g. self.x): not provably safe
149
+ if isinstance(expr, ast.Name):
150
+ if expr.id in seen:
151
+ return Taint.CONSTANT # assignment cycle: no new information
152
+ if expr.id in params:
153
+ return resolve(expr.id) if resolve else Taint.PARAM
154
+ if expr.id in assigns:
155
+ return _combine([_walk(func, r, vocab, assigns, params, seen | {expr.id}, resolve)
156
+ for r in assigns[expr.id]])
157
+ return Taint.TRUSTED # free module global / builtin -- conventionally a constant
158
+ return Taint.UNKNOWN
159
+
160
+
161
+ def _combine(taints: list[Taint]) -> Taint:
162
+ return max(taints, key=lambda t: _RANK[t])
163
+
164
+
165
+ def _callee_in(call: ast.Call, names: tuple[str, ...]) -> bool:
166
+ dotted, bare = callee(call)
167
+ for name in names:
168
+ if "." in name:
169
+ if dotted is not None and (dotted == name or dotted.endswith("." + name)):
170
+ return True
171
+ elif bare == name:
172
+ return True
173
+ return False
174
+
175
+
176
+ def _access_in(path: str, prefixes: tuple[str, ...]) -> bool:
177
+ # "request.args" matches the source "request.args" and also "request.args.get"
178
+ return any(path == p or path.startswith(p + ".") for p in prefixes)
179
+
180
+
181
+ # --- cross-file one-hop resolution (P1-03b) ---------------------------------
182
+
183
+ def taint_in_repo(
184
+ func: ast.FunctionDef | ast.AsyncFunctionDef,
185
+ expr: ast.AST,
186
+ vocab: TaintVocab,
187
+ files: dict[str, str],
188
+ ) -> Taint:
189
+ """Classify ``expr`` in ``func``, resolving a value that reaches a parameter by
190
+ looking one hop up at how ``func`` is called across ``files``.
191
+
192
+ Combines all call sites: if any caller passes an attacker-controlled value the
193
+ result is EXTERNAL; if every caller passes a sanitized/constant/trusted value it
194
+ is safe. With no caller found, the parameter stays UNKNOWN (not assumed safe).
195
+ """
196
+ return taint_of(func, expr, vocab, resolve_param=_caller_resolver(func, files, vocab))
197
+
198
+
199
+ def _caller_resolver(func, files, vocab):
200
+ def resolve(param_name: str) -> Taint:
201
+ index = _param_index(func, param_name)
202
+ results = []
203
+ for scope, call in _call_sites(func.name, files):
204
+ arg = _arg_for_param(call, index, param_name)
205
+ # one hop only: classify the caller's argument without recursing further
206
+ results.append(taint_of(scope, arg, vocab) if arg is not None else Taint.UNKNOWN)
207
+ return _combine(results) if results else Taint.UNKNOWN
208
+ return resolve
209
+
210
+
211
+ def _param_index(func, name: str) -> int | None:
212
+ positional = [*func.args.posonlyargs, *func.args.args]
213
+ for i, arg in enumerate(positional):
214
+ if arg.arg == name:
215
+ return i
216
+ return None # keyword-only or *args: matched by keyword at the call site instead
217
+
218
+
219
+ def _arg_for_param(call: ast.Call, index: int | None, name: str) -> ast.AST | None:
220
+ if index is not None and index < len(call.args):
221
+ return call.args[index]
222
+ for kw in call.keywords:
223
+ if kw.arg == name:
224
+ return kw.value
225
+ return None
226
+
227
+
228
+ def _call_sites(name: str, files: dict[str, str]) -> list[tuple[ast.AST, ast.Call]]:
229
+ sites = []
230
+ for source in files.values():
231
+ try:
232
+ tree = ast.parse(source)
233
+ except SyntaxError:
234
+ continue
235
+ funcs = [n for n in ast.walk(tree) if isinstance(n, (ast.FunctionDef, ast.AsyncFunctionDef))]
236
+ for call in find_calls(tree, name):
237
+ sites.append((_enclosing_scope(funcs, call) or tree, call))
238
+ return sites
239
+
240
+
241
+ def _enclosing_scope(funcs: list[ast.AST], call: ast.Call) -> ast.AST | None:
242
+ containing = [f for f in funcs if any(node is call for node in ast.walk(f))]
243
+ if not containing:
244
+ return None # module-level call site
245
+ return min(containing, key=lambda f: sum(1 for _ in ast.walk(f))) # innermost
246
+
247
+
248
+ def worst_sink_taint(content: str, files: dict[str, str], vocab: TaintVocab) -> Taint | None:
249
+ """The most dangerous taint reaching any potential sink call in ``content``.
250
+
251
+ A "potential sink" is any call that is not a safe sink, sanitizer, or
252
+ propagator (those are not where injection happens). Each such call's argument
253
+ taint is classified with the cross-file resolver, and the worst is returned.
254
+ ``Taint.CONSTANT`` when there is no sink to worry about; ``None`` when the
255
+ code does not parse (the caller should then not act).
256
+
257
+ Used by the taint gate to downgrade an input_validation finding only when the
258
+ whole artifact is provably clean -- so a single tainted sink keeps every
259
+ finding (recall preserved).
260
+ """
261
+ try:
262
+ tree = ast.parse(content)
263
+ except SyntaxError:
264
+ return None
265
+ funcs = [n for n in ast.walk(tree) if isinstance(n, (ast.FunctionDef, ast.AsyncFunctionDef))]
266
+ taints: list[Taint] = []
267
+ for call in [n for n in ast.walk(tree) if isinstance(n, ast.Call)]:
268
+ if is_safe_sink(call, vocab) or _callee_in(call, vocab.sanitizers) or _callee_in(call, vocab.propagators):
269
+ continue # not a place an injection lands
270
+ scope = _enclosing_scope(funcs, call) or tree
271
+ for arg in (*call.args, *(kw.value for kw in call.keywords)):
272
+ taints.append(taint_in_repo(scope, arg, vocab, files))
273
+ return _combine(taints) if taints else Taint.CONSTANT
@@ -23,6 +23,7 @@ from codejury.orchestrators.debate import DebateOrchestrator
23
23
  from codejury.orchestrators.pipeline import PipelineOrchestrator
24
24
  from codejury.orchestrators.reflexion import ReflexionOrchestrator
25
25
  from codejury.orchestrators.single import SingleOrchestrator
26
+ from codejury.orchestrators.taint_gate import TaintGateOrchestrator
26
27
  from codejury.providers.anthropic import AnthropicProvider
27
28
  from codejury.providers.base import Provider
28
29
  from codejury.providers.litellm import LiteLLMProvider
@@ -30,7 +31,7 @@ from codejury.providers.openai import OpenAIProvider
30
31
  from codejury.providers.retry import RetryProvider
31
32
  from codejury.sources.base import Source
32
33
 
33
- STRATEGIES = ("single", "pipeline", "debate", "reflexion", "challenge")
34
+ STRATEGIES = ("single", "pipeline", "debate", "reflexion", "challenge", "taint")
34
35
  PROVIDERS = ("anthropic", "openai", "litellm")
35
36
  DEFAULT_MODEL = os.environ.get("CODEJURY_MODEL", "claude-sonnet-4-6")
36
37
  DEFAULT_API_BASE = os.environ.get("CODEJURY_API_BASE")
@@ -73,6 +74,8 @@ def build_orchestration(
73
74
  verifier = {"verifier": VerifierAgent(provider=provider, model=model, max_tokens=max_tokens)}
74
75
  if strategy == "pipeline":
75
76
  return verifier, PipelineOrchestrator()
77
+ if strategy == "taint":
78
+ return verifier, TaintGateOrchestrator()
76
79
  return verifier, SingleOrchestrator()
77
80
 
78
81
 
@@ -276,6 +276,7 @@ def main(argv: list[str] | None = None) -> int:
276
276
  eval_p = sub.add_parser("eval", help="score golden cases and report precision/recall")
277
277
  eval_p.add_argument("--dataset", default=GOLDEN_DIR, help="golden case YAML directory")
278
278
  eval_p.add_argument("--split", default=None, help="only score cases whose 'split' matches (e.g. held-out)")
279
+ eval_p.add_argument("--orchestrator", choices=STRATEGIES, default="single")
279
280
  eval_p.add_argument("--capabilities", default=CAPABILITIES_DIR, help="capability YAML directory")
280
281
  eval_p.add_argument("--provider", choices=PROVIDERS, default="anthropic")
281
282
  eval_p.add_argument("--format", choices=("text", "json"), default="text", dest="fmt")
@@ -344,6 +345,7 @@ def main(argv: list[str] | None = None) -> int:
344
345
  load_capabilities(args.capabilities),
345
346
  provider=make_provider(args.provider, api_key=args.api_key, api_base=args.api_base),
346
347
  model=args.model,
348
+ strategy=args.orchestrator,
347
349
  )
348
350
  except Exception as exc:
349
351
  # e.g. a missing API key surfaces as a provider auth error -- report it