codejury 0.5.0__tar.gz → 0.6.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (153) hide show
  1. {codejury-0.5.0 → codejury-0.6.0}/PKG-INFO +29 -8
  2. {codejury-0.5.0 → codejury-0.6.0}/README.md +27 -7
  3. {codejury-0.5.0 → codejury-0.6.0}/codejury/__init__.py +6 -1
  4. {codejury-0.5.0 → codejury-0.6.0}/codejury/agents/verifier.py +14 -1
  5. {codejury-0.5.0 → codejury-0.6.0}/codejury/assembly.py +31 -3
  6. {codejury-0.5.0 → codejury-0.6.0}/codejury/cli.py +48 -17
  7. {codejury-0.5.0 → codejury-0.6.0}/codejury/data/capabilities/dependency_config.yaml +27 -0
  8. {codejury-0.5.0 → codejury-0.6.0}/codejury/data/capabilities/input_validation.yaml +60 -0
  9. codejury-0.6.0/codejury/data/golden/authn_weak_hash_indirect_vuln.yaml +14 -0
  10. codejury-0.6.0/codejury/data/golden/business_logic_price_tamper_vuln.yaml +14 -0
  11. codejury-0.6.0/codejury/data/golden/business_logic_server_checked_safe.yaml +15 -0
  12. codejury-0.6.0/codejury/data/golden/cmdi_fixed_argv_safe.yaml +22 -0
  13. codejury-0.6.0/codejury/data/golden/data_protection_plaintext_pii_vuln.yaml +14 -0
  14. codejury-0.6.0/codejury/data/golden/data_protection_tokenized_safe.yaml +16 -0
  15. codejury-0.6.0/codejury/data/golden/dependency_config_tls_verify_off_vuln.yaml +11 -0
  16. codejury-0.6.0/codejury/data/golden/dependency_config_tls_verify_on_safe.yaml +11 -0
  17. codejury-0.6.0/codejury/data/golden/deserialize_json_safe.yaml +13 -0
  18. codejury-0.6.0/codejury/data/golden/deserialize_pickle_vuln.yaml +12 -0
  19. codejury-0.6.0/codejury/data/golden/error_logging_redacted_safe.yaml +11 -0
  20. codejury-0.6.0/codejury/data/golden/error_logging_secret_leak_vuln.yaml +12 -0
  21. codejury-0.6.0/codejury/data/golden/literal_eval_safe.yaml +14 -0
  22. codejury-0.6.0/codejury/data/golden/path_basename_safe.yaml +15 -0
  23. codejury-0.6.0/codejury/data/golden/session_fixation_vuln.yaml +16 -0
  24. codejury-0.6.0/codejury/data/golden/session_secure_cookie_safe.yaml +18 -0
  25. codejury-0.6.0/codejury/data/golden/sql_constant_concat_safe.yaml +14 -0
  26. codejury-0.6.0/codejury/data/golden/sqli_indirect_var_vuln.yaml +16 -0
  27. codejury-0.6.0/codejury/data/golden/ssrf_allowlist_safe.yaml +17 -0
  28. codejury-0.6.0/codejury/data/golden/ssrf_constant_url_safe.yaml +13 -0
  29. codejury-0.6.0/codejury/data/golden/ssrf_substring_allowlist_bypass_vuln.yaml +17 -0
  30. codejury-0.6.0/codejury/data/golden/ssrf_user_url_vuln.yaml +12 -0
  31. codejury-0.6.0/codejury/data/golden/xfile_idor_no_check_vuln.yaml +20 -0
  32. codejury-0.6.0/codejury/data/golden/xfile_idor_owner_checked_safe.yaml +21 -0
  33. codejury-0.6.0/codejury/data/golden/xfile_path_sanitized_safe.yaml +22 -0
  34. codejury-0.6.0/codejury/data/golden/xfile_path_tainted_vuln.yaml +19 -0
  35. codejury-0.6.0/codejury/data/golden/xss_textcontent_safe.yaml +15 -0
  36. {codejury-0.5.0 → codejury-0.6.0}/codejury/domain/capability.py +16 -1
  37. {codejury-0.5.0 → codejury-0.6.0}/codejury/domain/observation.py +20 -0
  38. {codejury-0.5.0 → codejury-0.6.0}/codejury/domain/result.py +15 -1
  39. {codejury-0.5.0 → codejury-0.6.0}/codejury/evaluation.py +62 -10
  40. codejury-0.6.0/codejury/infrastructure/cache.py +76 -0
  41. {codejury-0.5.0 → codejury-0.6.0}/codejury/providers/anthropic.py +1 -0
  42. {codejury-0.5.0 → codejury-0.6.0}/codejury/providers/litellm.py +1 -1
  43. {codejury-0.5.0 → codejury-0.6.0}/codejury/providers/openai.py +1 -0
  44. {codejury-0.5.0 → codejury-0.6.0}/codejury/reporting.py +106 -0
  45. codejury-0.6.0/codejury/sources/callers.py +104 -0
  46. {codejury-0.5.0 → codejury-0.6.0}/codejury/sources/repo.py +16 -2
  47. {codejury-0.5.0 → codejury-0.6.0}/codejury.egg-info/PKG-INFO +29 -8
  48. {codejury-0.5.0 → codejury-0.6.0}/codejury.egg-info/SOURCES.txt +30 -0
  49. {codejury-0.5.0 → codejury-0.6.0}/codejury.egg-info/requires.txt +1 -0
  50. {codejury-0.5.0 → codejury-0.6.0}/pyproject.toml +2 -2
  51. {codejury-0.5.0 → codejury-0.6.0}/tests/test_anthropic_provider.py +1 -0
  52. codejury-0.6.0/tests/test_cache.py +117 -0
  53. {codejury-0.5.0 → codejury-0.6.0}/tests/test_callers.py +27 -1
  54. codejury-0.6.0/tests/test_evaluation.py +152 -0
  55. {codejury-0.5.0 → codejury-0.6.0}/tests/test_litellm_provider.py +1 -0
  56. {codejury-0.5.0 → codejury-0.6.0}/tests/test_openai_provider.py +1 -0
  57. codejury-0.6.0/tests/test_sarif.py +115 -0
  58. codejury-0.5.0/codejury/sources/callers.py +0 -46
  59. codejury-0.5.0/tests/test_evaluation.py +0 -69
  60. {codejury-0.5.0 → codejury-0.6.0}/LICENSE +0 -0
  61. {codejury-0.5.0 → codejury-0.6.0}/codejury/agents/__init__.py +0 -0
  62. {codejury-0.5.0 → codejury-0.6.0}/codejury/agents/base.py +0 -0
  63. {codejury-0.5.0 → codejury-0.6.0}/codejury/agents/debate.py +0 -0
  64. {codejury-0.5.0 → codejury-0.6.0}/codejury/agents/mock.py +0 -0
  65. {codejury-0.5.0 → codejury-0.6.0}/codejury/agents/parsing.py +0 -0
  66. {codejury-0.5.0 → codejury-0.6.0}/codejury/agents/refuter.py +0 -0
  67. {codejury-0.5.0 → codejury-0.6.0}/codejury/data/capabilities/authentication.yaml +0 -0
  68. {codejury-0.5.0 → codejury-0.6.0}/codejury/data/capabilities/authorization.yaml +0 -0
  69. {codejury-0.5.0 → codejury-0.6.0}/codejury/data/capabilities/business_logic.yaml +0 -0
  70. {codejury-0.5.0 → codejury-0.6.0}/codejury/data/capabilities/crypto.yaml +0 -0
  71. {codejury-0.5.0 → codejury-0.6.0}/codejury/data/capabilities/data_protection.yaml +0 -0
  72. {codejury-0.5.0 → codejury-0.6.0}/codejury/data/capabilities/error_logging.yaml +0 -0
  73. {codejury-0.5.0 → codejury-0.6.0}/codejury/data/capabilities/output_encoding.yaml +0 -0
  74. {codejury-0.5.0 → codejury-0.6.0}/codejury/data/capabilities/secrets.yaml +0 -0
  75. {codejury-0.5.0 → codejury-0.6.0}/codejury/data/capabilities/session.yaml +0 -0
  76. {codejury-0.5.0 → codejury-0.6.0}/codejury/data/golden/authn_bcrypt_password.yaml +0 -0
  77. {codejury-0.5.0 → codejury-0.6.0}/codejury/data/golden/authn_jwt_noverify_vuln.yaml +0 -0
  78. {codejury-0.5.0 → codejury-0.6.0}/codejury/data/golden/authn_jwt_verified_safe.yaml +0 -0
  79. {codejury-0.5.0 → codejury-0.6.0}/codejury/data/golden/authn_sha256_checksum_safe.yaml +0 -0
  80. {codejury-0.5.0 → codejury-0.6.0}/codejury/data/golden/authn_sha256_password.yaml +0 -0
  81. {codejury-0.5.0 → codejury-0.6.0}/codejury/data/golden/authz_idor_vuln.yaml +0 -0
  82. {codejury-0.5.0 → codejury-0.6.0}/codejury/data/golden/authz_owner_safe.yaml +0 -0
  83. {codejury-0.5.0 → codejury-0.6.0}/codejury/data/golden/cmdi_ossystem_vuln.yaml +0 -0
  84. {codejury-0.5.0 → codejury-0.6.0}/codejury/data/golden/cmdi_subprocess_safe.yaml +0 -0
  85. {codejury-0.5.0 → codejury-0.6.0}/codejury/data/golden/crypto_aesgcm_safe.yaml +0 -0
  86. {codejury-0.5.0 → codejury-0.6.0}/codejury/data/golden/crypto_ecb_vuln.yaml +0 -0
  87. {codejury-0.5.0 → codejury-0.6.0}/codejury/data/golden/path_contained_safe.yaml +0 -0
  88. {codejury-0.5.0 → codejury-0.6.0}/codejury/data/golden/path_traversal_vuln.yaml +0 -0
  89. {codejury-0.5.0 → codejury-0.6.0}/codejury/data/golden/secrets_env_safe.yaml +0 -0
  90. {codejury-0.5.0 → codejury-0.6.0}/codejury/data/golden/secrets_hardcoded_vuln.yaml +0 -0
  91. {codejury-0.5.0 → codejury-0.6.0}/codejury/data/golden/sqli_format_vuln.yaml +0 -0
  92. {codejury-0.5.0 → codejury-0.6.0}/codejury/data/golden/sqli_fstring_query.yaml +0 -0
  93. {codejury-0.5.0 → codejury-0.6.0}/codejury/data/golden/sqli_parameterized_query.yaml +0 -0
  94. {codejury-0.5.0 → codejury-0.6.0}/codejury/data/golden/xss_innerhtml_constant_safe.yaml +0 -0
  95. {codejury-0.5.0 → codejury-0.6.0}/codejury/data/golden/xss_innerhtml_vuln.yaml +0 -0
  96. {codejury-0.5.0 → codejury-0.6.0}/codejury/data/suppressions.yaml +0 -0
  97. {codejury-0.5.0 → codejury-0.6.0}/codejury/data/tasks/audit_diff_debate.yaml +0 -0
  98. {codejury-0.5.0 → codejury-0.6.0}/codejury/data/tasks/quick_scan_single.yaml +0 -0
  99. {codejury-0.5.0 → codejury-0.6.0}/codejury/domain/__init__.py +0 -0
  100. {codejury-0.5.0 → codejury-0.6.0}/codejury/domain/artifact.py +0 -0
  101. {codejury-0.5.0 → codejury-0.6.0}/codejury/domain/context.py +0 -0
  102. {codejury-0.5.0 → codejury-0.6.0}/codejury/infrastructure/__init__.py +0 -0
  103. {codejury-0.5.0 → codejury-0.6.0}/codejury/infrastructure/json_parse.py +0 -0
  104. {codejury-0.5.0 → codejury-0.6.0}/codejury/integrations/__init__.py +0 -0
  105. {codejury-0.5.0 → codejury-0.6.0}/codejury/integrations/github.py +0 -0
  106. {codejury-0.5.0 → codejury-0.6.0}/codejury/orchestrators/__init__.py +0 -0
  107. {codejury-0.5.0 → codejury-0.6.0}/codejury/orchestrators/base.py +0 -0
  108. {codejury-0.5.0 → codejury-0.6.0}/codejury/orchestrators/challenge.py +0 -0
  109. {codejury-0.5.0 → codejury-0.6.0}/codejury/orchestrators/debate.py +0 -0
  110. {codejury-0.5.0 → codejury-0.6.0}/codejury/orchestrators/pipeline.py +0 -0
  111. {codejury-0.5.0 → codejury-0.6.0}/codejury/orchestrators/reflexion.py +0 -0
  112. {codejury-0.5.0 → codejury-0.6.0}/codejury/orchestrators/single.py +0 -0
  113. {codejury-0.5.0 → codejury-0.6.0}/codejury/providers/__init__.py +0 -0
  114. {codejury-0.5.0 → codejury-0.6.0}/codejury/providers/base.py +0 -0
  115. {codejury-0.5.0 → codejury-0.6.0}/codejury/providers/mock.py +0 -0
  116. {codejury-0.5.0 → codejury-0.6.0}/codejury/providers/openai_format.py +0 -0
  117. {codejury-0.5.0 → codejury-0.6.0}/codejury/providers/retry.py +0 -0
  118. {codejury-0.5.0 → codejury-0.6.0}/codejury/resources.py +0 -0
  119. {codejury-0.5.0 → codejury-0.6.0}/codejury/sources/__init__.py +0 -0
  120. {codejury-0.5.0 → codejury-0.6.0}/codejury/sources/base.py +0 -0
  121. {codejury-0.5.0 → codejury-0.6.0}/codejury/sources/chunker.py +0 -0
  122. {codejury-0.5.0 → codejury-0.6.0}/codejury/sources/diff.py +0 -0
  123. {codejury-0.5.0 → codejury-0.6.0}/codejury/sources/function.py +0 -0
  124. {codejury-0.5.0 → codejury-0.6.0}/codejury/sources/mock.py +0 -0
  125. {codejury-0.5.0 → codejury-0.6.0}/codejury/suppression.py +0 -0
  126. {codejury-0.5.0 → codejury-0.6.0}/codejury/tasks/__init__.py +0 -0
  127. {codejury-0.5.0 → codejury-0.6.0}/codejury/tasks/base.py +0 -0
  128. {codejury-0.5.0 → codejury-0.6.0}/codejury/tasks/registry.py +0 -0
  129. {codejury-0.5.0 → codejury-0.6.0}/codejury.egg-info/dependency_links.txt +0 -0
  130. {codejury-0.5.0 → codejury-0.6.0}/codejury.egg-info/entry_points.txt +0 -0
  131. {codejury-0.5.0 → codejury-0.6.0}/codejury.egg-info/top_level.txt +0 -0
  132. {codejury-0.5.0 → codejury-0.6.0}/setup.cfg +0 -0
  133. {codejury-0.5.0 → codejury-0.6.0}/tests/test_assembly.py +0 -0
  134. {codejury-0.5.0 → codejury-0.6.0}/tests/test_audit_pipeline.py +0 -0
  135. {codejury-0.5.0 → codejury-0.6.0}/tests/test_capability.py +0 -0
  136. {codejury-0.5.0 → codejury-0.6.0}/tests/test_challenge.py +0 -0
  137. {codejury-0.5.0 → codejury-0.6.0}/tests/test_cli_audit.py +0 -0
  138. {codejury-0.5.0 → codejury-0.6.0}/tests/test_context.py +0 -0
  139. {codejury-0.5.0 → codejury-0.6.0}/tests/test_debate_agents.py +0 -0
  140. {codejury-0.5.0 → codejury-0.6.0}/tests/test_debate_orchestrator.py +0 -0
  141. {codejury-0.5.0 → codejury-0.6.0}/tests/test_diff_source.py +0 -0
  142. {codejury-0.5.0 → codejury-0.6.0}/tests/test_function_source.py +0 -0
  143. {codejury-0.5.0 → codejury-0.6.0}/tests/test_integrations.py +0 -0
  144. {codejury-0.5.0 → codejury-0.6.0}/tests/test_json_parse.py +0 -0
  145. {codejury-0.5.0 → codejury-0.6.0}/tests/test_orchestrator.py +0 -0
  146. {codejury-0.5.0 → codejury-0.6.0}/tests/test_pipeline_orchestrator.py +0 -0
  147. {codejury-0.5.0 → codejury-0.6.0}/tests/test_reflexion_orchestrator.py +0 -0
  148. {codejury-0.5.0 → codejury-0.6.0}/tests/test_repo_source.py +0 -0
  149. {codejury-0.5.0 → codejury-0.6.0}/tests/test_reporting.py +0 -0
  150. {codejury-0.5.0 → codejury-0.6.0}/tests/test_retry_provider.py +0 -0
  151. {codejury-0.5.0 → codejury-0.6.0}/tests/test_suppression.py +0 -0
  152. {codejury-0.5.0 → codejury-0.6.0}/tests/test_tasks.py +0 -0
  153. {codejury-0.5.0 → codejury-0.6.0}/tests/test_verifier.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: codejury
3
- Version: 0.5.0
3
+ Version: 0.6.0
4
4
  Summary: General-purpose Application Security AI audit framework -- five-layer architecture, capabilities as first-class data
5
5
  Author: AISecLabs
6
6
  License-Expression: MIT
@@ -25,6 +25,7 @@ Provides-Extra: litellm
25
25
  Requires-Dist: litellm>=1.0; extra == "litellm"
26
26
  Provides-Extra: dev
27
27
  Requires-Dist: pytest>=8.0; extra == "dev"
28
+ Requires-Dist: jsonschema>=4.0; extra == "dev"
28
29
  Dynamic: license-file
29
30
 
30
31
  # codejury
@@ -80,15 +81,32 @@ git diff | codejury audit --provider anthropic
80
81
  | `codejury audit [diff]` | Audit a unified diff from a file or stdin (`-`). |
81
82
  | `codejury scan <dir>` | Audit a whole directory tree, capability by capability. |
82
83
  | `codejury run <task>` | Run a named task preset (see [Tasks](#tasks)). |
83
- | `codejury eval` | Score the golden cases and report precision / recall. |
84
+ | `codejury eval` | Score the golden cases; report precision / recall / F1, overall and per capability. |
84
85
 
85
86
  Shared flags: `--orchestrator {single,pipeline,debate,reflexion,challenge}`,
86
- `--provider {anthropic,openai,litellm}`, `--model`, `--format {text,markdown,json}`.
87
+ `--provider {anthropic,openai,litellm}`, `--model`,
88
+ `--format {text,markdown,json,sarif}`.
89
+
90
+ `--format sarif` emits a SARIF 2.1.0 log (validates against the official schema)
91
+ for CI and security dashboards: each problem with a code location becomes a
92
+ result carrying its capability (as the rule id), CWE, and a precise location.
87
93
 
88
94
  Findings in known-noise categories (availability/DoS, rate limiting, memory safety
89
95
  outside C/C++) are dropped by versioned rules in
90
96
  `codejury/data/suppressions.yaml`; disable with `--no-suppress`.
91
97
 
98
+ `codejury eval` takes `--dataset <dir>` (golden YAML directory), `--split <name>`
99
+ (score only cases tagged with that `split:`, e.g. a held-out set), and
100
+ `--format {text,json}` -- the JSON report is a stable schema (overall plus
101
+ per-capability confusion matrix and precision / recall / F1).
102
+
103
+ Runs are deterministic: providers query at temperature 0, and `audit` / `scan`
104
+ cache each verdict on a hash of the normalized code, the in-scope capability
105
+ versions, and the orchestration. Re-auditing unchanged code returns the recorded
106
+ verdicts without re-querying the model; editing a capability YAML changes its
107
+ fingerprint and invalidates affected entries. Pass `--no-cache` to always
108
+ re-query.
109
+
92
110
  ```bash
93
111
  # Multi-round adversarial debate, rendered as Markdown
94
112
  git diff | codejury audit --orchestrator debate --format markdown - > report.md
@@ -167,11 +185,14 @@ independently.
167
185
  - **Local-pattern checks are sharper than data-flow ones.** Capabilities judged
168
186
  from one spot (weak crypto, hardcoded secrets) are reliable; taint / data-flow
169
187
  ones like path traversal over-flag in single-file review because the verifier
170
- can't see whether a value is attacker-controlled. Mitigations that help but do
171
- not fully solve it: `scan --callers` (cross-file call sites for provenance),
172
- `--orchestrator challenge` (a recall-safe refutation pass that drops only
173
- provably-safe flags), `--only` to scope, or `--orchestrator debate`. Real taint
174
- precision needs data-flow analysis, not model skepticism.
188
+ can't see whether a value is attacker-controlled. Mitigations that add context
189
+ but do not fully solve it: `scan --callers` (where this file's functions are
190
+ called) and `scan --callees` (the called code it delegates to, so a sink in
191
+ another file is visible) -- pair them for both directions; `--orchestrator
192
+ challenge` (a recall-safe
193
+ refutation pass that drops only provably-safe flags); `--only` to scope; or
194
+ `--orchestrator debate`. Real taint precision still needs data-flow analysis,
195
+ not model skepticism.
175
196
  - **`scan` cost scales as files x capabilities.** It is a periodic deep audit,
176
197
  not a quick check -- scope it with `--only`. Day to day, audit the diff.
177
198
 
@@ -51,15 +51,32 @@ git diff | codejury audit --provider anthropic
51
51
  | `codejury audit [diff]` | Audit a unified diff from a file or stdin (`-`). |
52
52
  | `codejury scan <dir>` | Audit a whole directory tree, capability by capability. |
53
53
  | `codejury run <task>` | Run a named task preset (see [Tasks](#tasks)). |
54
- | `codejury eval` | Score the golden cases and report precision / recall. |
54
+ | `codejury eval` | Score the golden cases; report precision / recall / F1, overall and per capability. |
55
55
 
56
56
  Shared flags: `--orchestrator {single,pipeline,debate,reflexion,challenge}`,
57
- `--provider {anthropic,openai,litellm}`, `--model`, `--format {text,markdown,json}`.
57
+ `--provider {anthropic,openai,litellm}`, `--model`,
58
+ `--format {text,markdown,json,sarif}`.
59
+
60
+ `--format sarif` emits a SARIF 2.1.0 log (validates against the official schema)
61
+ for CI and security dashboards: each problem with a code location becomes a
62
+ result carrying its capability (as the rule id), CWE, and a precise location.
58
63
 
59
64
  Findings in known-noise categories (availability/DoS, rate limiting, memory safety
60
65
  outside C/C++) are dropped by versioned rules in
61
66
  `codejury/data/suppressions.yaml`; disable with `--no-suppress`.
62
67
 
68
+ `codejury eval` takes `--dataset <dir>` (golden YAML directory), `--split <name>`
69
+ (score only cases tagged with that `split:`, e.g. a held-out set), and
70
+ `--format {text,json}` -- the JSON report is a stable schema (overall plus
71
+ per-capability confusion matrix and precision / recall / F1).
72
+
73
+ Runs are deterministic: providers query at temperature 0, and `audit` / `scan`
74
+ cache each verdict on a hash of the normalized code, the in-scope capability
75
+ versions, and the orchestration. Re-auditing unchanged code returns the recorded
76
+ verdicts without re-querying the model; editing a capability YAML changes its
77
+ fingerprint and invalidates affected entries. Pass `--no-cache` to always
78
+ re-query.
79
+
63
80
  ```bash
64
81
  # Multi-round adversarial debate, rendered as Markdown
65
82
  git diff | codejury audit --orchestrator debate --format markdown - > report.md
@@ -138,11 +155,14 @@ independently.
138
155
  - **Local-pattern checks are sharper than data-flow ones.** Capabilities judged
139
156
  from one spot (weak crypto, hardcoded secrets) are reliable; taint / data-flow
140
157
  ones like path traversal over-flag in single-file review because the verifier
141
- can't see whether a value is attacker-controlled. Mitigations that help but do
142
- not fully solve it: `scan --callers` (cross-file call sites for provenance),
143
- `--orchestrator challenge` (a recall-safe refutation pass that drops only
144
- provably-safe flags), `--only` to scope, or `--orchestrator debate`. Real taint
145
- precision needs data-flow analysis, not model skepticism.
158
+ can't see whether a value is attacker-controlled. Mitigations that add context
159
+ but do not fully solve it: `scan --callers` (where this file's functions are
160
+ called) and `scan --callees` (the called code it delegates to, so a sink in
161
+ another file is visible) -- pair them for both directions; `--orchestrator
162
+ challenge` (a recall-safe
163
+ refutation pass that drops only provably-safe flags); `--only` to scope; or
164
+ `--orchestrator debate`. Real taint precision still needs data-flow analysis,
165
+ not model skepticism.
146
166
  - **`scan` cost scales as files x capabilities.** It is a periodic deep audit,
147
167
  not a quick check -- scope it with `--only`. Day to day, audit the diff.
148
168
 
@@ -5,4 +5,9 @@ Domain knowledge lives in YAML capability files as a first-class citizen,
5
5
  aligned with OWASP ASVS.
6
6
  """
7
7
 
8
- __version__ = "0.0.0"
8
+ from importlib.metadata import PackageNotFoundError, version
9
+
10
+ try:
11
+ __version__ = version("codejury")
12
+ except PackageNotFoundError: # running from a source tree without an install
13
+ __version__ = "0.0.0"
@@ -93,15 +93,27 @@ def _build_prompt(path: str, content: str, cap: Capability, context: str = "") -
93
93
  )
94
94
 
95
95
 
96
+ def _anti_pattern_cwes(cap: Capability) -> dict[str, str]:
97
+ """Map anti_pattern id -> CWE, so a verdict can inherit the CWE it matched."""
98
+ return {
99
+ p.id: p.cwe
100
+ for sub in cap.sub_capabilities.values()
101
+ for p in sub.anti_patterns
102
+ if p.cwe
103
+ }
104
+
105
+
96
106
  def _parse_verdicts(text: str, cap: Capability) -> list[Verdict]:
97
107
  obj = extract_json_object(text)
98
108
  if not obj:
99
109
  return []
110
+ cwe_by_id = _anti_pattern_cwes(cap)
100
111
  out: list[Verdict] = []
101
112
  for v in obj.get("verdicts", []):
102
113
  if not isinstance(v, dict):
103
114
  continue
104
115
  sub = str(v.get("sub_capability", "")).strip()
116
+ matched_anti = str_list(v.get("matched_anti"))
105
117
  out.append(
106
118
  Verdict(
107
119
  capability=f"{cap.id}.{sub}" if sub else cap.id,
@@ -109,7 +121,8 @@ def _parse_verdicts(text: str, cap: Capability) -> list[Verdict]:
109
121
  status=one_of(v.get("status"), _VALID_STATUS, "UNKNOWN"),
110
122
  reasoning=str(v.get("reasoning", "")),
111
123
  matched_correct=str_list(v.get("matched_correct")),
112
- matched_anti=str_list(v.get("matched_anti")),
124
+ matched_anti=matched_anti,
125
+ cwe=next((cwe_by_id[a] for a in matched_anti if a in cwe_by_id), ""),
113
126
  evidence=to_evidence(v.get("evidence")),
114
127
  confidence=to_float(v.get("confidence"), 0.5),
115
128
  )
@@ -16,6 +16,7 @@ from codejury.domain.artifact import CodeArtifact
16
16
  from codejury.domain.capability import Capability
17
17
  from codejury.domain.context import AnalysisContext
18
18
  from codejury.domain.result import AnalysisResult
19
+ from codejury.infrastructure.cache import VerdictCache, verdict_key
19
20
  from codejury.orchestrators.base import Orchestrator
20
21
  from codejury.orchestrators.challenge import ChallengeOrchestrator
21
22
  from codejury.orchestrators.debate import DebateOrchestrator
@@ -75,17 +76,38 @@ def build_orchestration(
75
76
  return verifier, SingleOrchestrator()
76
77
 
77
78
 
79
+ def orchestration_descriptor(strategy: str, model: str, max_tokens: int) -> str:
80
+ """The non-code, non-capability inputs that affect a verdict, as a cache tag."""
81
+ return f"{strategy}|{model}|{max_tokens}"
82
+
83
+
78
84
  def run_over_artifacts(
79
85
  artifacts: list[CodeArtifact],
80
86
  capabilities: list[Capability],
81
87
  agents: dict[str, Agent],
82
88
  orchestrator: Orchestrator,
89
+ *,
90
+ cache: VerdictCache | None = None,
91
+ orchestration: str = "",
83
92
  ) -> list[tuple[str, AnalysisResult]]:
84
- """Run the orchestration over each artifact, returning (path, result) per artifact."""
93
+ """Run the orchestration over each artifact, returning (path, result) per artifact.
94
+
95
+ When ``cache`` is given, an unchanged artifact returns its recorded result
96
+ instead of re-running the orchestrator (determinism, invariant 2).
97
+ """
85
98
  results = []
86
99
  for artifact in artifacts:
100
+ if cache is not None:
101
+ key = verdict_key(artifact, capabilities, orchestration=orchestration)
102
+ hit = cache.get(key)
103
+ if hit is not None:
104
+ results.append((artifact.path, hit))
105
+ continue
87
106
  ctx = AnalysisContext(artifact=artifact, capabilities=capabilities)
88
- results.append((artifact.path, orchestrator.run(agents, ctx)))
107
+ result = orchestrator.run(agents, ctx)
108
+ if cache is not None:
109
+ cache.put(key, result)
110
+ results.append((artifact.path, result))
89
111
  return results
90
112
 
91
113
 
@@ -94,5 +116,11 @@ def run_over_source(
94
116
  capabilities: list[Capability],
95
117
  agents: dict[str, Agent],
96
118
  orchestrator: Orchestrator,
119
+ *,
120
+ cache: VerdictCache | None = None,
121
+ orchestration: str = "",
97
122
  ) -> list[tuple[str, AnalysisResult]]:
98
- return run_over_artifacts(source.list_artifacts(), capabilities, agents, orchestrator)
123
+ return run_over_artifacts(
124
+ source.list_artifacts(), capabilities, agents, orchestrator,
125
+ cache=cache, orchestration=orchestration,
126
+ )
@@ -9,6 +9,7 @@ library, backed by the Anthropic provider, under a chosen orchestration strategy
9
9
  from __future__ import annotations
10
10
 
11
11
  import argparse
12
+ import json
12
13
  import os
13
14
  import sys
14
15
 
@@ -21,6 +22,7 @@ from codejury.assembly import (
21
22
  STRATEGIES,
22
23
  build_orchestration,
23
24
  make_provider,
25
+ orchestration_descriptor,
24
26
  run_over_artifacts,
25
27
  run_over_source,
26
28
  )
@@ -29,11 +31,12 @@ from codejury.domain.capability import Capability, load_capabilities
29
31
  from codejury.domain.context import AnalysisContext
30
32
  from codejury.domain.observation import Observation
31
33
  from codejury.domain.result import AnalysisResult
32
- from codejury.evaluation import Metrics, evaluate, load_cases
34
+ from codejury.evaluation import EvalReport, evaluate, load_cases
35
+ from codejury.infrastructure.cache import VerdictCache
33
36
  from codejury.orchestrators.single import SingleOrchestrator
34
37
  from codejury.providers.base import Provider
35
38
  from codejury.providers.mock import MockProvider
36
- from codejury.reporting import to_json, to_markdown
39
+ from codejury.reporting import to_json, to_markdown, to_sarif
37
40
  from codejury.resources import CAPABILITIES_DIR, GOLDEN_DIR, SUPPRESSIONS_FILE, TASKS_DIR
38
41
  from codejury.suppression import filter_results, load_suppressions
39
42
  from codejury.integrations.github import build_review, parse_pr_ref, post_review
@@ -43,7 +46,7 @@ from codejury.sources.repo import RepoSource
43
46
  from codejury.tasks.base import run_task
44
47
  from codejury.tasks.registry import load_tasks
45
48
 
46
- _FORMATS = ("text", "markdown", "json")
49
+ _FORMATS = ("text", "markdown", "json", "sarif")
47
50
 
48
51
 
49
52
  def dry_run() -> AnalysisResult:
@@ -69,10 +72,14 @@ def audit(
69
72
  model: str,
70
73
  max_tokens: int = 2048,
71
74
  strategy: str = "single",
75
+ cache: VerdictCache | None = None,
72
76
  ) -> list[tuple[str, AnalysisResult]]:
73
77
  """Audit each changed file in `diff_text`, returning (path, result) per file."""
74
78
  agents, orchestrator = build_orchestration(strategy, provider=provider, model=model, max_tokens=max_tokens)
75
- return run_over_source(DiffSource(diff_text), capabilities, agents, orchestrator)
79
+ return run_over_source(
80
+ DiffSource(diff_text), capabilities, agents, orchestrator,
81
+ cache=cache, orchestration=orchestration_descriptor(strategy, model, max_tokens),
82
+ )
76
83
 
77
84
 
78
85
  def scan(
@@ -86,10 +93,16 @@ def scan(
86
93
  extensions: tuple[str, ...] = (".py",),
87
94
  max_chars: int = 200_000,
88
95
  with_callers: bool = False,
96
+ with_callees: bool = False,
97
+ cache: VerdictCache | None = None,
89
98
  ) -> list[tuple[str, AnalysisResult]]:
90
99
  """Audit every matching file in a directory tree, returning (path, result) per artifact."""
91
100
  source = RepoSource(
92
- directory, extensions=extensions, chunker=Chunker(max_chars=max_chars), with_callers=with_callers
101
+ directory,
102
+ extensions=extensions,
103
+ chunker=Chunker(max_chars=max_chars),
104
+ with_callers=with_callers,
105
+ with_callees=with_callees,
93
106
  )
94
107
  artifacts = source.list_artifacts()
95
108
  calls = len(artifacts) * len(capabilities)
@@ -98,7 +111,10 @@ def scan(
98
111
  file=sys.stderr,
99
112
  )
100
113
  agents, orchestrator = build_orchestration(strategy, provider=provider, model=model, max_tokens=max_tokens)
101
- return run_over_artifacts(artifacts, capabilities, agents, orchestrator)
114
+ return run_over_artifacts(
115
+ artifacts, capabilities, agents, orchestrator,
116
+ cache=cache, orchestration=orchestration_descriptor(strategy, model, max_tokens),
117
+ )
102
118
 
103
119
 
104
120
  def _render_dry_run(result: AnalysisResult) -> str:
@@ -137,7 +153,7 @@ def _render_observation(o: Observation) -> str:
137
153
 
138
154
 
139
155
  def _render_results(fmt: str, results: list[tuple[str, AnalysisResult]]) -> str:
140
- return {"text": _render_audit, "markdown": to_markdown, "json": to_json}[fmt](results)
156
+ return {"text": _render_audit, "markdown": to_markdown, "json": to_json, "sarif": to_sarif}[fmt](results)
141
157
 
142
158
 
143
159
  def _maybe_suppress(results: list[tuple[str, AnalysisResult]], enabled: bool) -> list[tuple[str, AnalysisResult]]:
@@ -184,11 +200,16 @@ def _maybe_post_github(ref: str | None, results: list[tuple[str, AnalysisResult]
184
200
  print(f"github review failed: {exc}", file=sys.stderr)
185
201
 
186
202
 
187
- def _render_metrics(m: Metrics) -> str:
188
- return (
189
- f"cases: {m.total} (tp={m.tp} fp={m.fp} tn={m.tn} fn={m.fn})\n"
190
- f"precision: {m.precision:.2f} recall: {m.recall:.2f} accuracy: {m.accuracy:.2f}"
191
- )
203
+ def _render_eval(report: EvalReport) -> str:
204
+ def line(label: str, m) -> str:
205
+ return (
206
+ f"{label:<20} tp={m.tp} fp={m.fp} tn={m.tn} fn={m.fn} "
207
+ f"P={m.precision:.2f} R={m.recall:.2f} F1={m.f1:.2f}"
208
+ )
209
+
210
+ lines = [line(f"overall ({report.overall.total} cases)", report.overall)]
211
+ lines += [line(cap, m) for cap, m in sorted(report.by_capability.items())]
212
+ return "\n".join(lines)
192
213
 
193
214
 
194
215
  def _read_diff(path: str) -> str:
@@ -216,6 +237,7 @@ def main(argv: list[str] | None = None) -> int:
216
237
  audit_p.add_argument("--api-base", default=DEFAULT_API_BASE, help="provider base URL (env: CODEJURY_API_BASE)")
217
238
  audit_p.add_argument("--api-key", default=DEFAULT_API_KEY, help="provider API key (env: CODEJURY_API_KEY)")
218
239
  audit_p.add_argument("--no-suppress", action="store_true", help="disable the known-noise suppression filter")
240
+ audit_p.add_argument("--no-cache", action="store_true", help="bypass the verdict cache (always re-query the model)")
219
241
  audit_p.add_argument("--fail-on", choices=_FAIL_ON, default=None, dest="fail_on", help="exit 1 if a finding at/above this severity is found")
220
242
  audit_p.add_argument("--github", default=None, help="post a PR review: owner/repo#number (needs GITHUB_TOKEN)")
221
243
 
@@ -231,11 +253,15 @@ def main(argv: list[str] | None = None) -> int:
231
253
  scan_p.add_argument("--max-tokens", type=int, default=2048)
232
254
  scan_p.add_argument("--max-chars", type=int, default=200_000, help="chunk budget; default keeps whole files")
233
255
  scan_p.add_argument(
234
- "--callers", action="store_true", help="add cross-file call sites as context (cuts taint false positives)"
256
+ "--callers", action="store_true", help="add cross-file context: where this file's functions are called"
257
+ )
258
+ scan_p.add_argument(
259
+ "--callees", action="store_true", help="add cross-file context: the called code this file delegates to"
235
260
  )
236
261
  scan_p.add_argument("--api-base", default=DEFAULT_API_BASE, help="provider base URL (env: CODEJURY_API_BASE)")
237
262
  scan_p.add_argument("--api-key", default=DEFAULT_API_KEY, help="provider API key (env: CODEJURY_API_KEY)")
238
263
  scan_p.add_argument("--no-suppress", action="store_true", help="disable the known-noise suppression filter")
264
+ scan_p.add_argument("--no-cache", action="store_true", help="bypass the verdict cache (always re-query the model)")
239
265
  scan_p.add_argument("--fail-on", choices=_FAIL_ON, default=None, dest="fail_on", help="exit 1 if a finding at/above this severity is found")
240
266
 
241
267
  run_p = sub.add_parser("run", help="run a named task preset against a unified diff")
@@ -248,9 +274,11 @@ def main(argv: list[str] | None = None) -> int:
248
274
  run_p.add_argument("--fail-on", choices=_FAIL_ON, default=None, dest="fail_on", help="exit 1 if a finding at/above this severity is found")
249
275
 
250
276
  eval_p = sub.add_parser("eval", help="score golden cases and report precision/recall")
251
- eval_p.add_argument("--golden", default=GOLDEN_DIR, help="golden case YAML directory")
277
+ eval_p.add_argument("--dataset", default=GOLDEN_DIR, help="golden case YAML directory")
278
+ eval_p.add_argument("--split", default=None, help="only score cases whose 'split' matches (e.g. held-out)")
252
279
  eval_p.add_argument("--capabilities", default=CAPABILITIES_DIR, help="capability YAML directory")
253
280
  eval_p.add_argument("--provider", choices=PROVIDERS, default="anthropic")
281
+ eval_p.add_argument("--format", choices=("text", "json"), default="text", dest="fmt")
254
282
  eval_p.add_argument("--model", default=DEFAULT_MODEL)
255
283
  eval_p.add_argument("--api-base", default=DEFAULT_API_BASE, help="provider base URL (env: CODEJURY_API_BASE)")
256
284
  eval_p.add_argument("--api-key", default=DEFAULT_API_KEY, help="provider API key (env: CODEJURY_API_KEY)")
@@ -267,6 +295,7 @@ def main(argv: list[str] | None = None) -> int:
267
295
  model=args.model,
268
296
  max_tokens=args.max_tokens,
269
297
  strategy=args.orchestrator,
298
+ cache=None if args.no_cache else VerdictCache(),
270
299
  )
271
300
  results = _maybe_suppress(results, not args.no_suppress)
272
301
  print(_render_results(args.fmt, results))
@@ -289,6 +318,8 @@ def main(argv: list[str] | None = None) -> int:
289
318
  extensions=extensions,
290
319
  max_chars=args.max_chars,
291
320
  with_callers=args.callers,
321
+ with_callees=args.callees,
322
+ cache=None if args.no_cache else VerdictCache(),
292
323
  )
293
324
  results = _maybe_suppress(results, not args.no_suppress)
294
325
  print(_render_results(args.fmt, results))
@@ -308,8 +339,8 @@ def main(argv: list[str] | None = None) -> int:
308
339
 
309
340
  if args.command == "eval":
310
341
  try:
311
- metrics = evaluate(
312
- load_cases(args.golden),
342
+ report = evaluate(
343
+ load_cases(args.dataset, split=args.split),
313
344
  load_capabilities(args.capabilities),
314
345
  provider=make_provider(args.provider, api_key=args.api_key, api_base=args.api_base),
315
346
  model=args.model,
@@ -319,7 +350,7 @@ def main(argv: list[str] | None = None) -> int:
319
350
  # as one line, not a traceback (audit gets this via the orchestrator).
320
351
  print(f"eval failed: {exc}")
321
352
  return 1
322
- print(_render_metrics(metrics))
353
+ print(json.dumps(report.to_dict(), indent=2) if args.fmt == "json" else _render_eval(report))
323
354
  return 0
324
355
 
325
356
  if args.command in (None, "dry-run"):
@@ -46,7 +46,34 @@ sub_capabilities:
46
46
  signals: ["admin:admin", "password=admin", "changeme"]
47
47
  why_bad: Default credentials are public knowledge and trivially abused
48
48
 
49
+ transport_security:
50
+ correct_patterns:
51
+ - id: TLS-OK-1
52
+ description: >-
53
+ Leave TLS certificate verification at its secure default -- verify omitted or
54
+ verify=True, the default SSL context, hostname checking on
55
+ signals: ["verify=True", "create_default_context", "requests.get(", "requests.post("]
56
+ why_ok: >-
57
+ The secure default validates the certificate chain and hostname. An https:// call
58
+ that does not disable verification is fine; do not flag it just for making a
59
+ request or for omitting verify.
60
+
61
+ anti_patterns:
62
+ - id: TLS-BAD-1
63
+ cwe: CWE-295
64
+ severity: HIGH
65
+ description: >-
66
+ Disable TLS certificate or hostname verification -- verify=False, CERT_NONE,
67
+ check_hostname=False, or an unverified SSL context
68
+ signals: ["verify=False", "CERT_NONE", "check_hostname = False", "_create_unverified_context"]
69
+ why_bad: An unverified TLS connection is open to a man-in-the-middle despite https://
70
+ example_bad: |
71
+ requests.get("https://api.partner.com/data", verify=False)
72
+ example_good: |
73
+ requests.get("https://api.partner.com/data") # verify defaults to True
74
+
49
75
  trigger_signals:
50
76
  - dependency manifests and lock files
51
77
  - install or bootstrap scripts fetching remote code
52
78
  - file permission, bucket ACL, or default credential settings
79
+ - TLS client calls that set verify or build a custom SSL context
@@ -105,7 +105,67 @@ sub_capabilities:
105
105
  if not target.is_relative_to(UPLOAD_DIR):
106
106
  raise ValueError("path escapes upload dir")
107
107
 
108
+ ssrf:
109
+ correct_patterns:
110
+ - id: SSRF-OK-1
111
+ description: Validate the request URL's host against an allowlist before fetching it
112
+ signals: ["urlparse(", ".hostname", "ALLOWED", "allowlist"]
113
+ why_ok: An attacker cannot redirect the fetch to an internal target the list omits
114
+
115
+ - id: SSRF-OK-2
116
+ description: >-
117
+ Fetch a URL that is not attacker-controlled -- a constant, a value from trusted
118
+ config, or an operator-supplied argument
119
+ why_ok: >-
120
+ SSRF needs an external attacker to control the destination. A constant URL or one
121
+ from trusted config is not a finding, even though it goes through a fetch call.
122
+
123
+ anti_patterns:
124
+ - id: SSRF-BAD-1
125
+ cwe: CWE-918
126
+ severity: HIGH
127
+ description: >-
128
+ Fetch a URL taken from externally controlled input (HTTP request, form, query, or
129
+ message field) without validating its host against an allowlist. NOT this: a
130
+ constant URL, one from trusted config, or an operator-supplied argument.
131
+ signals: ["requests.get(", "urllib.request.urlopen(", "httpx.", "request.args", "request.json"]
132
+ why_bad: >-
133
+ The server makes the request, so attacker input reaches internal-only targets --
134
+ cloud metadata, localhost admin ports, internal APIs behind the firewall.
135
+ example_bad: |
136
+ requests.get(request.args["url"]).text
137
+ example_good: |
138
+ if urlparse(url).hostname not in ALLOWED_HOSTS:
139
+ raise ValueError("host not allowed")
140
+ requests.get(url).text
141
+
142
+ insecure_deserialization:
143
+ correct_patterns:
144
+ - id: DESER-OK-1
145
+ description: >-
146
+ Parse untrusted input with a data-only parser -- json.loads or yaml.safe_load --
147
+ that cannot instantiate arbitrary objects
148
+ signals: ["json.loads", "yaml.safe_load"]
149
+ why_ok: A data-only parser builds plain structures and has no code-execution path
150
+
151
+ anti_patterns:
152
+ - id: DESER-BAD-1
153
+ cwe: CWE-502
154
+ severity: CRITICAL
155
+ description: >-
156
+ Deserialize externally controlled bytes with an object-constructing deserializer --
157
+ pickle, marshal, yaml.load (unsafe Loader), or jsonpickle. NOT this: a data-only
158
+ parser like json.loads or yaml.safe_load.
159
+ signals: ["pickle.loads", "pickle.load(", "yaml.load(", "marshal.loads", "jsonpickle.decode"]
160
+ why_bad: These reconstruct arbitrary objects, so crafted input runs code on unpickle
161
+ example_bad: |
162
+ pickle.loads(base64.b64decode(request.data))
163
+ example_good: |
164
+ json.loads(request.data)
165
+
108
166
  trigger_signals:
109
167
  - raw SQL strings or cursor.execute calls appear
110
168
  - imports of os, subprocess, or shlex with process execution
111
169
  - file paths built from request, form, or query parameters
170
+ - outbound HTTP fetches (requests, urllib, httpx) to a non-constant URL
171
+ - deserialization calls (pickle, yaml.load, marshal) on external input
@@ -0,0 +1,14 @@
1
+ # Adversarial positive: weak password hash hidden behind hashlib.new(variable).
2
+ capability: authn
3
+ vulnerable: true
4
+ expected_verdict: VULNERABLE
5
+ cwe: CWE-916
6
+ source: synthetic
7
+ notes: >
8
+ hashlib.new("md5") is the same weak, unsalted, fast hash as hashlib.md5(), just
9
+ reached through a variable algorithm name. Unsuitable for password storage
10
+ (needs bcrypt/scrypt/argon2). The indirection should not hide it.
11
+ code: |
12
+ def hash_pw(pw):
13
+ algo = "md5"
14
+ return hashlib.new(algo, pw.encode()).hexdigest()
@@ -0,0 +1,14 @@
1
+ capability: business_logic
2
+ vulnerable: true
3
+ expected_verdict: VULNERABLE
4
+ cwe: CWE-602
5
+ source: synthetic
6
+ split: held-out
7
+ notes: >
8
+ Quantity and unit price come straight from the request and are never checked
9
+ server-side. A negative quantity credits the customer; a client-set price lets
10
+ them pay anything. Price must come from the catalog and quantity must be > 0.
11
+ code: |
12
+ def add_to_order(order, item_id, quantity, unit_price):
13
+ order.lines.append((item_id, quantity, unit_price))
14
+ order.total += quantity * unit_price
@@ -0,0 +1,15 @@
1
+ capability: business_logic
2
+ vulnerable: false
3
+ expected_verdict: SECURE
4
+ cwe: ""
5
+ source: synthetic
6
+ notes: >
7
+ Quantity is validated as positive and the price is looked up server-side from
8
+ the catalog, so the client cannot tamper with either.
9
+ code: |
10
+ def add_to_order(order, item_id, quantity):
11
+ if quantity < 1:
12
+ raise ValueError("quantity must be positive")
13
+ unit_price = catalog.price_of(item_id) # trusted server-side price
14
+ order.lines.append((item_id, quantity, unit_price))
15
+ order.total += quantity * unit_price
@@ -0,0 +1,22 @@
1
+ # False-positive-prone negative: a subprocess call driven by a request value
2
+ # looks like command injection, but the value only selects a fixed argv from a
3
+ # table and never reaches a shell.
4
+ capability: input_validation
5
+ vulnerable: false
6
+ expected_verdict: SECURE
7
+ cwe: ""
8
+ source: synthetic
9
+ split: held-out
10
+ notes: >
11
+ `name` only indexes a fixed dict of argument lists (KeyError on anything
12
+ unknown); no user string is interpolated into a command and shell=False.
13
+ Flagging this as command injection is a false positive.
14
+ code: |
15
+ ACTIONS = {
16
+ "start": ["systemctl", "start", "web"],
17
+ "stop": ["systemctl", "stop", "web"],
18
+ }
19
+
20
+ def run_action(name):
21
+ argv = ACTIONS[name]
22
+ subprocess.run(argv, shell=False)
@@ -0,0 +1,14 @@
1
+ capability: data_protection
2
+ vulnerable: true
3
+ expected_verdict: VULNERABLE
4
+ cwe: CWE-312
5
+ source: synthetic
6
+ notes: >
7
+ The full card number and the CVV are written to the database in cleartext.
8
+ The CVV must never be stored at all, and the PAN must be tokenized/encrypted.
9
+ code: |
10
+ def save_card(user_id, card_number, cvv):
11
+ db.execute(
12
+ "INSERT INTO cards (user_id, pan, cvv) VALUES (?, ?, ?)",
13
+ (user_id, card_number, cvv),
14
+ )
@@ -0,0 +1,16 @@
1
+ capability: data_protection
2
+ vulnerable: false
3
+ expected_verdict: SECURE
4
+ cwe: ""
5
+ source: synthetic
6
+ split: held-out
7
+ notes: >
8
+ The PAN is replaced by a vault token before storage; the CVV is never
9
+ persisted. Nothing sensitive reaches the database.
10
+ code: |
11
+ def save_card(user_id, card_number):
12
+ token = vault.tokenize(card_number) # store a token, never the PAN
13
+ db.execute(
14
+ "INSERT INTO cards (user_id, token) VALUES (?, ?)",
15
+ (user_id, token),
16
+ )
@@ -0,0 +1,11 @@
1
+ capability: dependency_config
2
+ vulnerable: true
3
+ expected_verdict: VULNERABLE
4
+ cwe: CWE-295
5
+ source: synthetic
6
+ notes: >
7
+ TLS certificate verification is disabled, so the connection is open to a
8
+ man-in-the-middle despite the https:// scheme.
9
+ code: |
10
+ def fetch_partner_data():
11
+ return requests.get("https://api.partner.com/data", verify=False).json()