codejury 0.5.1__tar.gz → 0.6.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (152) hide show
  1. {codejury-0.5.1 → codejury-0.6.0}/PKG-INFO +21 -3
  2. {codejury-0.5.1 → codejury-0.6.0}/README.md +19 -2
  3. {codejury-0.5.1 → codejury-0.6.0}/codejury/__init__.py +6 -1
  4. {codejury-0.5.1 → codejury-0.6.0}/codejury/agents/verifier.py +14 -1
  5. {codejury-0.5.1 → codejury-0.6.0}/codejury/assembly.py +31 -3
  6. {codejury-0.5.1 → codejury-0.6.0}/codejury/cli.py +37 -15
  7. {codejury-0.5.1 → codejury-0.6.0}/codejury/data/capabilities/dependency_config.yaml +27 -0
  8. {codejury-0.5.1 → codejury-0.6.0}/codejury/data/capabilities/input_validation.yaml +60 -0
  9. codejury-0.6.0/codejury/data/golden/authn_weak_hash_indirect_vuln.yaml +14 -0
  10. codejury-0.6.0/codejury/data/golden/business_logic_price_tamper_vuln.yaml +14 -0
  11. codejury-0.6.0/codejury/data/golden/business_logic_server_checked_safe.yaml +15 -0
  12. codejury-0.6.0/codejury/data/golden/cmdi_fixed_argv_safe.yaml +22 -0
  13. codejury-0.6.0/codejury/data/golden/data_protection_plaintext_pii_vuln.yaml +14 -0
  14. codejury-0.6.0/codejury/data/golden/data_protection_tokenized_safe.yaml +16 -0
  15. codejury-0.6.0/codejury/data/golden/dependency_config_tls_verify_off_vuln.yaml +11 -0
  16. codejury-0.6.0/codejury/data/golden/dependency_config_tls_verify_on_safe.yaml +11 -0
  17. codejury-0.6.0/codejury/data/golden/deserialize_json_safe.yaml +13 -0
  18. codejury-0.6.0/codejury/data/golden/deserialize_pickle_vuln.yaml +12 -0
  19. codejury-0.6.0/codejury/data/golden/error_logging_redacted_safe.yaml +11 -0
  20. codejury-0.6.0/codejury/data/golden/error_logging_secret_leak_vuln.yaml +12 -0
  21. codejury-0.6.0/codejury/data/golden/literal_eval_safe.yaml +14 -0
  22. codejury-0.6.0/codejury/data/golden/path_basename_safe.yaml +15 -0
  23. codejury-0.6.0/codejury/data/golden/session_fixation_vuln.yaml +16 -0
  24. codejury-0.6.0/codejury/data/golden/session_secure_cookie_safe.yaml +18 -0
  25. codejury-0.6.0/codejury/data/golden/sql_constant_concat_safe.yaml +14 -0
  26. codejury-0.6.0/codejury/data/golden/sqli_indirect_var_vuln.yaml +16 -0
  27. codejury-0.6.0/codejury/data/golden/ssrf_allowlist_safe.yaml +17 -0
  28. codejury-0.6.0/codejury/data/golden/ssrf_constant_url_safe.yaml +13 -0
  29. codejury-0.6.0/codejury/data/golden/ssrf_substring_allowlist_bypass_vuln.yaml +17 -0
  30. codejury-0.6.0/codejury/data/golden/ssrf_user_url_vuln.yaml +12 -0
  31. codejury-0.6.0/codejury/data/golden/xfile_idor_no_check_vuln.yaml +20 -0
  32. codejury-0.6.0/codejury/data/golden/xfile_idor_owner_checked_safe.yaml +21 -0
  33. codejury-0.6.0/codejury/data/golden/xfile_path_sanitized_safe.yaml +22 -0
  34. codejury-0.6.0/codejury/data/golden/xfile_path_tainted_vuln.yaml +19 -0
  35. codejury-0.6.0/codejury/data/golden/xss_textcontent_safe.yaml +15 -0
  36. {codejury-0.5.1 → codejury-0.6.0}/codejury/domain/capability.py +16 -1
  37. {codejury-0.5.1 → codejury-0.6.0}/codejury/domain/observation.py +20 -0
  38. {codejury-0.5.1 → codejury-0.6.0}/codejury/domain/result.py +15 -1
  39. {codejury-0.5.1 → codejury-0.6.0}/codejury/evaluation.py +62 -10
  40. codejury-0.6.0/codejury/infrastructure/cache.py +76 -0
  41. {codejury-0.5.1 → codejury-0.6.0}/codejury/providers/anthropic.py +1 -0
  42. {codejury-0.5.1 → codejury-0.6.0}/codejury/providers/litellm.py +1 -1
  43. {codejury-0.5.1 → codejury-0.6.0}/codejury/providers/openai.py +1 -0
  44. {codejury-0.5.1 → codejury-0.6.0}/codejury/reporting.py +106 -0
  45. {codejury-0.5.1 → codejury-0.6.0}/codejury.egg-info/PKG-INFO +21 -3
  46. {codejury-0.5.1 → codejury-0.6.0}/codejury.egg-info/SOURCES.txt +30 -0
  47. {codejury-0.5.1 → codejury-0.6.0}/codejury.egg-info/requires.txt +1 -0
  48. {codejury-0.5.1 → codejury-0.6.0}/pyproject.toml +2 -2
  49. {codejury-0.5.1 → codejury-0.6.0}/tests/test_anthropic_provider.py +1 -0
  50. codejury-0.6.0/tests/test_cache.py +117 -0
  51. codejury-0.6.0/tests/test_evaluation.py +152 -0
  52. {codejury-0.5.1 → codejury-0.6.0}/tests/test_litellm_provider.py +1 -0
  53. {codejury-0.5.1 → codejury-0.6.0}/tests/test_openai_provider.py +1 -0
  54. codejury-0.6.0/tests/test_sarif.py +115 -0
  55. codejury-0.5.1/tests/test_evaluation.py +0 -69
  56. {codejury-0.5.1 → codejury-0.6.0}/LICENSE +0 -0
  57. {codejury-0.5.1 → codejury-0.6.0}/codejury/agents/__init__.py +0 -0
  58. {codejury-0.5.1 → codejury-0.6.0}/codejury/agents/base.py +0 -0
  59. {codejury-0.5.1 → codejury-0.6.0}/codejury/agents/debate.py +0 -0
  60. {codejury-0.5.1 → codejury-0.6.0}/codejury/agents/mock.py +0 -0
  61. {codejury-0.5.1 → codejury-0.6.0}/codejury/agents/parsing.py +0 -0
  62. {codejury-0.5.1 → codejury-0.6.0}/codejury/agents/refuter.py +0 -0
  63. {codejury-0.5.1 → codejury-0.6.0}/codejury/data/capabilities/authentication.yaml +0 -0
  64. {codejury-0.5.1 → codejury-0.6.0}/codejury/data/capabilities/authorization.yaml +0 -0
  65. {codejury-0.5.1 → codejury-0.6.0}/codejury/data/capabilities/business_logic.yaml +0 -0
  66. {codejury-0.5.1 → codejury-0.6.0}/codejury/data/capabilities/crypto.yaml +0 -0
  67. {codejury-0.5.1 → codejury-0.6.0}/codejury/data/capabilities/data_protection.yaml +0 -0
  68. {codejury-0.5.1 → codejury-0.6.0}/codejury/data/capabilities/error_logging.yaml +0 -0
  69. {codejury-0.5.1 → codejury-0.6.0}/codejury/data/capabilities/output_encoding.yaml +0 -0
  70. {codejury-0.5.1 → codejury-0.6.0}/codejury/data/capabilities/secrets.yaml +0 -0
  71. {codejury-0.5.1 → codejury-0.6.0}/codejury/data/capabilities/session.yaml +0 -0
  72. {codejury-0.5.1 → codejury-0.6.0}/codejury/data/golden/authn_bcrypt_password.yaml +0 -0
  73. {codejury-0.5.1 → codejury-0.6.0}/codejury/data/golden/authn_jwt_noverify_vuln.yaml +0 -0
  74. {codejury-0.5.1 → codejury-0.6.0}/codejury/data/golden/authn_jwt_verified_safe.yaml +0 -0
  75. {codejury-0.5.1 → codejury-0.6.0}/codejury/data/golden/authn_sha256_checksum_safe.yaml +0 -0
  76. {codejury-0.5.1 → codejury-0.6.0}/codejury/data/golden/authn_sha256_password.yaml +0 -0
  77. {codejury-0.5.1 → codejury-0.6.0}/codejury/data/golden/authz_idor_vuln.yaml +0 -0
  78. {codejury-0.5.1 → codejury-0.6.0}/codejury/data/golden/authz_owner_safe.yaml +0 -0
  79. {codejury-0.5.1 → codejury-0.6.0}/codejury/data/golden/cmdi_ossystem_vuln.yaml +0 -0
  80. {codejury-0.5.1 → codejury-0.6.0}/codejury/data/golden/cmdi_subprocess_safe.yaml +0 -0
  81. {codejury-0.5.1 → codejury-0.6.0}/codejury/data/golden/crypto_aesgcm_safe.yaml +0 -0
  82. {codejury-0.5.1 → codejury-0.6.0}/codejury/data/golden/crypto_ecb_vuln.yaml +0 -0
  83. {codejury-0.5.1 → codejury-0.6.0}/codejury/data/golden/path_contained_safe.yaml +0 -0
  84. {codejury-0.5.1 → codejury-0.6.0}/codejury/data/golden/path_traversal_vuln.yaml +0 -0
  85. {codejury-0.5.1 → codejury-0.6.0}/codejury/data/golden/secrets_env_safe.yaml +0 -0
  86. {codejury-0.5.1 → codejury-0.6.0}/codejury/data/golden/secrets_hardcoded_vuln.yaml +0 -0
  87. {codejury-0.5.1 → codejury-0.6.0}/codejury/data/golden/sqli_format_vuln.yaml +0 -0
  88. {codejury-0.5.1 → codejury-0.6.0}/codejury/data/golden/sqli_fstring_query.yaml +0 -0
  89. {codejury-0.5.1 → codejury-0.6.0}/codejury/data/golden/sqli_parameterized_query.yaml +0 -0
  90. {codejury-0.5.1 → codejury-0.6.0}/codejury/data/golden/xss_innerhtml_constant_safe.yaml +0 -0
  91. {codejury-0.5.1 → codejury-0.6.0}/codejury/data/golden/xss_innerhtml_vuln.yaml +0 -0
  92. {codejury-0.5.1 → codejury-0.6.0}/codejury/data/suppressions.yaml +0 -0
  93. {codejury-0.5.1 → codejury-0.6.0}/codejury/data/tasks/audit_diff_debate.yaml +0 -0
  94. {codejury-0.5.1 → codejury-0.6.0}/codejury/data/tasks/quick_scan_single.yaml +0 -0
  95. {codejury-0.5.1 → codejury-0.6.0}/codejury/domain/__init__.py +0 -0
  96. {codejury-0.5.1 → codejury-0.6.0}/codejury/domain/artifact.py +0 -0
  97. {codejury-0.5.1 → codejury-0.6.0}/codejury/domain/context.py +0 -0
  98. {codejury-0.5.1 → codejury-0.6.0}/codejury/infrastructure/__init__.py +0 -0
  99. {codejury-0.5.1 → codejury-0.6.0}/codejury/infrastructure/json_parse.py +0 -0
  100. {codejury-0.5.1 → codejury-0.6.0}/codejury/integrations/__init__.py +0 -0
  101. {codejury-0.5.1 → codejury-0.6.0}/codejury/integrations/github.py +0 -0
  102. {codejury-0.5.1 → codejury-0.6.0}/codejury/orchestrators/__init__.py +0 -0
  103. {codejury-0.5.1 → codejury-0.6.0}/codejury/orchestrators/base.py +0 -0
  104. {codejury-0.5.1 → codejury-0.6.0}/codejury/orchestrators/challenge.py +0 -0
  105. {codejury-0.5.1 → codejury-0.6.0}/codejury/orchestrators/debate.py +0 -0
  106. {codejury-0.5.1 → codejury-0.6.0}/codejury/orchestrators/pipeline.py +0 -0
  107. {codejury-0.5.1 → codejury-0.6.0}/codejury/orchestrators/reflexion.py +0 -0
  108. {codejury-0.5.1 → codejury-0.6.0}/codejury/orchestrators/single.py +0 -0
  109. {codejury-0.5.1 → codejury-0.6.0}/codejury/providers/__init__.py +0 -0
  110. {codejury-0.5.1 → codejury-0.6.0}/codejury/providers/base.py +0 -0
  111. {codejury-0.5.1 → codejury-0.6.0}/codejury/providers/mock.py +0 -0
  112. {codejury-0.5.1 → codejury-0.6.0}/codejury/providers/openai_format.py +0 -0
  113. {codejury-0.5.1 → codejury-0.6.0}/codejury/providers/retry.py +0 -0
  114. {codejury-0.5.1 → codejury-0.6.0}/codejury/resources.py +0 -0
  115. {codejury-0.5.1 → codejury-0.6.0}/codejury/sources/__init__.py +0 -0
  116. {codejury-0.5.1 → codejury-0.6.0}/codejury/sources/base.py +0 -0
  117. {codejury-0.5.1 → codejury-0.6.0}/codejury/sources/callers.py +0 -0
  118. {codejury-0.5.1 → codejury-0.6.0}/codejury/sources/chunker.py +0 -0
  119. {codejury-0.5.1 → codejury-0.6.0}/codejury/sources/diff.py +0 -0
  120. {codejury-0.5.1 → codejury-0.6.0}/codejury/sources/function.py +0 -0
  121. {codejury-0.5.1 → codejury-0.6.0}/codejury/sources/mock.py +0 -0
  122. {codejury-0.5.1 → codejury-0.6.0}/codejury/sources/repo.py +0 -0
  123. {codejury-0.5.1 → codejury-0.6.0}/codejury/suppression.py +0 -0
  124. {codejury-0.5.1 → codejury-0.6.0}/codejury/tasks/__init__.py +0 -0
  125. {codejury-0.5.1 → codejury-0.6.0}/codejury/tasks/base.py +0 -0
  126. {codejury-0.5.1 → codejury-0.6.0}/codejury/tasks/registry.py +0 -0
  127. {codejury-0.5.1 → codejury-0.6.0}/codejury.egg-info/dependency_links.txt +0 -0
  128. {codejury-0.5.1 → codejury-0.6.0}/codejury.egg-info/entry_points.txt +0 -0
  129. {codejury-0.5.1 → codejury-0.6.0}/codejury.egg-info/top_level.txt +0 -0
  130. {codejury-0.5.1 → codejury-0.6.0}/setup.cfg +0 -0
  131. {codejury-0.5.1 → codejury-0.6.0}/tests/test_assembly.py +0 -0
  132. {codejury-0.5.1 → codejury-0.6.0}/tests/test_audit_pipeline.py +0 -0
  133. {codejury-0.5.1 → codejury-0.6.0}/tests/test_callers.py +0 -0
  134. {codejury-0.5.1 → codejury-0.6.0}/tests/test_capability.py +0 -0
  135. {codejury-0.5.1 → codejury-0.6.0}/tests/test_challenge.py +0 -0
  136. {codejury-0.5.1 → codejury-0.6.0}/tests/test_cli_audit.py +0 -0
  137. {codejury-0.5.1 → codejury-0.6.0}/tests/test_context.py +0 -0
  138. {codejury-0.5.1 → codejury-0.6.0}/tests/test_debate_agents.py +0 -0
  139. {codejury-0.5.1 → codejury-0.6.0}/tests/test_debate_orchestrator.py +0 -0
  140. {codejury-0.5.1 → codejury-0.6.0}/tests/test_diff_source.py +0 -0
  141. {codejury-0.5.1 → codejury-0.6.0}/tests/test_function_source.py +0 -0
  142. {codejury-0.5.1 → codejury-0.6.0}/tests/test_integrations.py +0 -0
  143. {codejury-0.5.1 → codejury-0.6.0}/tests/test_json_parse.py +0 -0
  144. {codejury-0.5.1 → codejury-0.6.0}/tests/test_orchestrator.py +0 -0
  145. {codejury-0.5.1 → codejury-0.6.0}/tests/test_pipeline_orchestrator.py +0 -0
  146. {codejury-0.5.1 → codejury-0.6.0}/tests/test_reflexion_orchestrator.py +0 -0
  147. {codejury-0.5.1 → codejury-0.6.0}/tests/test_repo_source.py +0 -0
  148. {codejury-0.5.1 → codejury-0.6.0}/tests/test_reporting.py +0 -0
  149. {codejury-0.5.1 → codejury-0.6.0}/tests/test_retry_provider.py +0 -0
  150. {codejury-0.5.1 → codejury-0.6.0}/tests/test_suppression.py +0 -0
  151. {codejury-0.5.1 → codejury-0.6.0}/tests/test_tasks.py +0 -0
  152. {codejury-0.5.1 → codejury-0.6.0}/tests/test_verifier.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: codejury
3
- Version: 0.5.1
3
+ Version: 0.6.0
4
4
  Summary: General-purpose Application Security AI audit framework -- five-layer architecture, capabilities as first-class data
5
5
  Author: AISecLabs
6
6
  License-Expression: MIT
@@ -25,6 +25,7 @@ Provides-Extra: litellm
25
25
  Requires-Dist: litellm>=1.0; extra == "litellm"
26
26
  Provides-Extra: dev
27
27
  Requires-Dist: pytest>=8.0; extra == "dev"
28
+ Requires-Dist: jsonschema>=4.0; extra == "dev"
28
29
  Dynamic: license-file
29
30
 
30
31
  # codejury
@@ -80,15 +81,32 @@ git diff | codejury audit --provider anthropic
80
81
  | `codejury audit [diff]` | Audit a unified diff from a file or stdin (`-`). |
81
82
  | `codejury scan <dir>` | Audit a whole directory tree, capability by capability. |
82
83
  | `codejury run <task>` | Run a named task preset (see [Tasks](#tasks)). |
83
- | `codejury eval` | Score the golden cases and report precision / recall. |
84
+ | `codejury eval` | Score the golden cases; report precision / recall / F1, overall and per capability. |
84
85
 
85
86
  Shared flags: `--orchestrator {single,pipeline,debate,reflexion,challenge}`,
86
- `--provider {anthropic,openai,litellm}`, `--model`, `--format {text,markdown,json}`.
87
+ `--provider {anthropic,openai,litellm}`, `--model`,
88
+ `--format {text,markdown,json,sarif}`.
89
+
90
+ `--format sarif` emits a SARIF 2.1.0 log (validates against the official schema)
91
+ for CI and security dashboards: each problem with a code location becomes a
92
+ result carrying its capability (as the rule id), CWE, and a precise location.
87
93
 
88
94
  Findings in known-noise categories (availability/DoS, rate limiting, memory safety
89
95
  outside C/C++) are dropped by versioned rules in
90
96
  `codejury/data/suppressions.yaml`; disable with `--no-suppress`.
91
97
 
98
+ `codejury eval` takes `--dataset <dir>` (golden YAML directory), `--split <name>`
99
+ (score only cases tagged with that `split:`, e.g. a held-out set), and
100
+ `--format {text,json}` -- the JSON report is a stable schema (overall plus
101
+ per-capability confusion matrix and precision / recall / F1).
102
+
103
+ Runs are deterministic: providers query at temperature 0, and `audit` / `scan`
104
+ cache each verdict on a hash of the normalized code, the in-scope capability
105
+ versions, and the orchestration. Re-auditing unchanged code returns the recorded
106
+ verdicts without re-querying the model; editing a capability YAML changes its
107
+ fingerprint and invalidates affected entries. Pass `--no-cache` to always
108
+ re-query.
109
+
92
110
  ```bash
93
111
  # Multi-round adversarial debate, rendered as Markdown
94
112
  git diff | codejury audit --orchestrator debate --format markdown - > report.md
@@ -51,15 +51,32 @@ git diff | codejury audit --provider anthropic
51
51
  | `codejury audit [diff]` | Audit a unified diff from a file or stdin (`-`). |
52
52
  | `codejury scan <dir>` | Audit a whole directory tree, capability by capability. |
53
53
  | `codejury run <task>` | Run a named task preset (see [Tasks](#tasks)). |
54
- | `codejury eval` | Score the golden cases and report precision / recall. |
54
+ | `codejury eval` | Score the golden cases; report precision / recall / F1, overall and per capability. |
55
55
 
56
56
  Shared flags: `--orchestrator {single,pipeline,debate,reflexion,challenge}`,
57
- `--provider {anthropic,openai,litellm}`, `--model`, `--format {text,markdown,json}`.
57
+ `--provider {anthropic,openai,litellm}`, `--model`,
58
+ `--format {text,markdown,json,sarif}`.
59
+
60
+ `--format sarif` emits a SARIF 2.1.0 log (validates against the official schema)
61
+ for CI and security dashboards: each problem with a code location becomes a
62
+ result carrying its capability (as the rule id), CWE, and a precise location.
58
63
 
59
64
  Findings in known-noise categories (availability/DoS, rate limiting, memory safety
60
65
  outside C/C++) are dropped by versioned rules in
61
66
  `codejury/data/suppressions.yaml`; disable with `--no-suppress`.
62
67
 
68
+ `codejury eval` takes `--dataset <dir>` (golden YAML directory), `--split <name>`
69
+ (score only cases tagged with that `split:`, e.g. a held-out set), and
70
+ `--format {text,json}` -- the JSON report is a stable schema (overall plus
71
+ per-capability confusion matrix and precision / recall / F1).
72
+
73
+ Runs are deterministic: providers query at temperature 0, and `audit` / `scan`
74
+ cache each verdict on a hash of the normalized code, the in-scope capability
75
+ versions, and the orchestration. Re-auditing unchanged code returns the recorded
76
+ verdicts without re-querying the model; editing a capability YAML changes its
77
+ fingerprint and invalidates affected entries. Pass `--no-cache` to always
78
+ re-query.
79
+
63
80
  ```bash
64
81
  # Multi-round adversarial debate, rendered as Markdown
65
82
  git diff | codejury audit --orchestrator debate --format markdown - > report.md
@@ -5,4 +5,9 @@ Domain knowledge lives in YAML capability files as a first-class citizen,
5
5
  aligned with OWASP ASVS.
6
6
  """
7
7
 
8
- __version__ = "0.0.0"
8
+ from importlib.metadata import PackageNotFoundError, version
9
+
10
+ try:
11
+ __version__ = version("codejury")
12
+ except PackageNotFoundError: # running from a source tree without an install
13
+ __version__ = "0.0.0"
@@ -93,15 +93,27 @@ def _build_prompt(path: str, content: str, cap: Capability, context: str = "") -
93
93
  )
94
94
 
95
95
 
96
+ def _anti_pattern_cwes(cap: Capability) -> dict[str, str]:
97
+ """Map anti_pattern id -> CWE, so a verdict can inherit the CWE it matched."""
98
+ return {
99
+ p.id: p.cwe
100
+ for sub in cap.sub_capabilities.values()
101
+ for p in sub.anti_patterns
102
+ if p.cwe
103
+ }
104
+
105
+
96
106
  def _parse_verdicts(text: str, cap: Capability) -> list[Verdict]:
97
107
  obj = extract_json_object(text)
98
108
  if not obj:
99
109
  return []
110
+ cwe_by_id = _anti_pattern_cwes(cap)
100
111
  out: list[Verdict] = []
101
112
  for v in obj.get("verdicts", []):
102
113
  if not isinstance(v, dict):
103
114
  continue
104
115
  sub = str(v.get("sub_capability", "")).strip()
116
+ matched_anti = str_list(v.get("matched_anti"))
105
117
  out.append(
106
118
  Verdict(
107
119
  capability=f"{cap.id}.{sub}" if sub else cap.id,
@@ -109,7 +121,8 @@ def _parse_verdicts(text: str, cap: Capability) -> list[Verdict]:
109
121
  status=one_of(v.get("status"), _VALID_STATUS, "UNKNOWN"),
110
122
  reasoning=str(v.get("reasoning", "")),
111
123
  matched_correct=str_list(v.get("matched_correct")),
112
- matched_anti=str_list(v.get("matched_anti")),
124
+ matched_anti=matched_anti,
125
+ cwe=next((cwe_by_id[a] for a in matched_anti if a in cwe_by_id), ""),
113
126
  evidence=to_evidence(v.get("evidence")),
114
127
  confidence=to_float(v.get("confidence"), 0.5),
115
128
  )
@@ -16,6 +16,7 @@ from codejury.domain.artifact import CodeArtifact
16
16
  from codejury.domain.capability import Capability
17
17
  from codejury.domain.context import AnalysisContext
18
18
  from codejury.domain.result import AnalysisResult
19
+ from codejury.infrastructure.cache import VerdictCache, verdict_key
19
20
  from codejury.orchestrators.base import Orchestrator
20
21
  from codejury.orchestrators.challenge import ChallengeOrchestrator
21
22
  from codejury.orchestrators.debate import DebateOrchestrator
@@ -75,17 +76,38 @@ def build_orchestration(
75
76
  return verifier, SingleOrchestrator()
76
77
 
77
78
 
79
+ def orchestration_descriptor(strategy: str, model: str, max_tokens: int) -> str:
80
+ """The non-code, non-capability inputs that affect a verdict, as a cache tag."""
81
+ return f"{strategy}|{model}|{max_tokens}"
82
+
83
+
78
84
  def run_over_artifacts(
79
85
  artifacts: list[CodeArtifact],
80
86
  capabilities: list[Capability],
81
87
  agents: dict[str, Agent],
82
88
  orchestrator: Orchestrator,
89
+ *,
90
+ cache: VerdictCache | None = None,
91
+ orchestration: str = "",
83
92
  ) -> list[tuple[str, AnalysisResult]]:
84
- """Run the orchestration over each artifact, returning (path, result) per artifact."""
93
+ """Run the orchestration over each artifact, returning (path, result) per artifact.
94
+
95
+ When ``cache`` is given, an unchanged artifact returns its recorded result
96
+ instead of re-running the orchestrator (determinism, invariant 2).
97
+ """
85
98
  results = []
86
99
  for artifact in artifacts:
100
+ if cache is not None:
101
+ key = verdict_key(artifact, capabilities, orchestration=orchestration)
102
+ hit = cache.get(key)
103
+ if hit is not None:
104
+ results.append((artifact.path, hit))
105
+ continue
87
106
  ctx = AnalysisContext(artifact=artifact, capabilities=capabilities)
88
- results.append((artifact.path, orchestrator.run(agents, ctx)))
107
+ result = orchestrator.run(agents, ctx)
108
+ if cache is not None:
109
+ cache.put(key, result)
110
+ results.append((artifact.path, result))
89
111
  return results
90
112
 
91
113
 
@@ -94,5 +116,11 @@ def run_over_source(
94
116
  capabilities: list[Capability],
95
117
  agents: dict[str, Agent],
96
118
  orchestrator: Orchestrator,
119
+ *,
120
+ cache: VerdictCache | None = None,
121
+ orchestration: str = "",
97
122
  ) -> list[tuple[str, AnalysisResult]]:
98
- return run_over_artifacts(source.list_artifacts(), capabilities, agents, orchestrator)
123
+ return run_over_artifacts(
124
+ source.list_artifacts(), capabilities, agents, orchestrator,
125
+ cache=cache, orchestration=orchestration,
126
+ )
@@ -9,6 +9,7 @@ library, backed by the Anthropic provider, under a chosen orchestration strategy
9
9
  from __future__ import annotations
10
10
 
11
11
  import argparse
12
+ import json
12
13
  import os
13
14
  import sys
14
15
 
@@ -21,6 +22,7 @@ from codejury.assembly import (
21
22
  STRATEGIES,
22
23
  build_orchestration,
23
24
  make_provider,
25
+ orchestration_descriptor,
24
26
  run_over_artifacts,
25
27
  run_over_source,
26
28
  )
@@ -29,11 +31,12 @@ from codejury.domain.capability import Capability, load_capabilities
29
31
  from codejury.domain.context import AnalysisContext
30
32
  from codejury.domain.observation import Observation
31
33
  from codejury.domain.result import AnalysisResult
32
- from codejury.evaluation import Metrics, evaluate, load_cases
34
+ from codejury.evaluation import EvalReport, evaluate, load_cases
35
+ from codejury.infrastructure.cache import VerdictCache
33
36
  from codejury.orchestrators.single import SingleOrchestrator
34
37
  from codejury.providers.base import Provider
35
38
  from codejury.providers.mock import MockProvider
36
- from codejury.reporting import to_json, to_markdown
39
+ from codejury.reporting import to_json, to_markdown, to_sarif
37
40
  from codejury.resources import CAPABILITIES_DIR, GOLDEN_DIR, SUPPRESSIONS_FILE, TASKS_DIR
38
41
  from codejury.suppression import filter_results, load_suppressions
39
42
  from codejury.integrations.github import build_review, parse_pr_ref, post_review
@@ -43,7 +46,7 @@ from codejury.sources.repo import RepoSource
43
46
  from codejury.tasks.base import run_task
44
47
  from codejury.tasks.registry import load_tasks
45
48
 
46
- _FORMATS = ("text", "markdown", "json")
49
+ _FORMATS = ("text", "markdown", "json", "sarif")
47
50
 
48
51
 
49
52
  def dry_run() -> AnalysisResult:
@@ -69,10 +72,14 @@ def audit(
69
72
  model: str,
70
73
  max_tokens: int = 2048,
71
74
  strategy: str = "single",
75
+ cache: VerdictCache | None = None,
72
76
  ) -> list[tuple[str, AnalysisResult]]:
73
77
  """Audit each changed file in `diff_text`, returning (path, result) per file."""
74
78
  agents, orchestrator = build_orchestration(strategy, provider=provider, model=model, max_tokens=max_tokens)
75
- return run_over_source(DiffSource(diff_text), capabilities, agents, orchestrator)
79
+ return run_over_source(
80
+ DiffSource(diff_text), capabilities, agents, orchestrator,
81
+ cache=cache, orchestration=orchestration_descriptor(strategy, model, max_tokens),
82
+ )
76
83
 
77
84
 
78
85
  def scan(
@@ -87,6 +94,7 @@ def scan(
87
94
  max_chars: int = 200_000,
88
95
  with_callers: bool = False,
89
96
  with_callees: bool = False,
97
+ cache: VerdictCache | None = None,
90
98
  ) -> list[tuple[str, AnalysisResult]]:
91
99
  """Audit every matching file in a directory tree, returning (path, result) per artifact."""
92
100
  source = RepoSource(
@@ -103,7 +111,10 @@ def scan(
103
111
  file=sys.stderr,
104
112
  )
105
113
  agents, orchestrator = build_orchestration(strategy, provider=provider, model=model, max_tokens=max_tokens)
106
- return run_over_artifacts(artifacts, capabilities, agents, orchestrator)
114
+ return run_over_artifacts(
115
+ artifacts, capabilities, agents, orchestrator,
116
+ cache=cache, orchestration=orchestration_descriptor(strategy, model, max_tokens),
117
+ )
107
118
 
108
119
 
109
120
  def _render_dry_run(result: AnalysisResult) -> str:
@@ -142,7 +153,7 @@ def _render_observation(o: Observation) -> str:
142
153
 
143
154
 
144
155
  def _render_results(fmt: str, results: list[tuple[str, AnalysisResult]]) -> str:
145
- return {"text": _render_audit, "markdown": to_markdown, "json": to_json}[fmt](results)
156
+ return {"text": _render_audit, "markdown": to_markdown, "json": to_json, "sarif": to_sarif}[fmt](results)
146
157
 
147
158
 
148
159
  def _maybe_suppress(results: list[tuple[str, AnalysisResult]], enabled: bool) -> list[tuple[str, AnalysisResult]]:
@@ -189,11 +200,16 @@ def _maybe_post_github(ref: str | None, results: list[tuple[str, AnalysisResult]
189
200
  print(f"github review failed: {exc}", file=sys.stderr)
190
201
 
191
202
 
192
- def _render_metrics(m: Metrics) -> str:
193
- return (
194
- f"cases: {m.total} (tp={m.tp} fp={m.fp} tn={m.tn} fn={m.fn})\n"
195
- f"precision: {m.precision:.2f} recall: {m.recall:.2f} accuracy: {m.accuracy:.2f}"
196
- )
203
+ def _render_eval(report: EvalReport) -> str:
204
+ def line(label: str, m) -> str:
205
+ return (
206
+ f"{label:<20} tp={m.tp} fp={m.fp} tn={m.tn} fn={m.fn} "
207
+ f"P={m.precision:.2f} R={m.recall:.2f} F1={m.f1:.2f}"
208
+ )
209
+
210
+ lines = [line(f"overall ({report.overall.total} cases)", report.overall)]
211
+ lines += [line(cap, m) for cap, m in sorted(report.by_capability.items())]
212
+ return "\n".join(lines)
197
213
 
198
214
 
199
215
  def _read_diff(path: str) -> str:
@@ -221,6 +237,7 @@ def main(argv: list[str] | None = None) -> int:
221
237
  audit_p.add_argument("--api-base", default=DEFAULT_API_BASE, help="provider base URL (env: CODEJURY_API_BASE)")
222
238
  audit_p.add_argument("--api-key", default=DEFAULT_API_KEY, help="provider API key (env: CODEJURY_API_KEY)")
223
239
  audit_p.add_argument("--no-suppress", action="store_true", help="disable the known-noise suppression filter")
240
+ audit_p.add_argument("--no-cache", action="store_true", help="bypass the verdict cache (always re-query the model)")
224
241
  audit_p.add_argument("--fail-on", choices=_FAIL_ON, default=None, dest="fail_on", help="exit 1 if a finding at/above this severity is found")
225
242
  audit_p.add_argument("--github", default=None, help="post a PR review: owner/repo#number (needs GITHUB_TOKEN)")
226
243
 
@@ -244,6 +261,7 @@ def main(argv: list[str] | None = None) -> int:
244
261
  scan_p.add_argument("--api-base", default=DEFAULT_API_BASE, help="provider base URL (env: CODEJURY_API_BASE)")
245
262
  scan_p.add_argument("--api-key", default=DEFAULT_API_KEY, help="provider API key (env: CODEJURY_API_KEY)")
246
263
  scan_p.add_argument("--no-suppress", action="store_true", help="disable the known-noise suppression filter")
264
+ scan_p.add_argument("--no-cache", action="store_true", help="bypass the verdict cache (always re-query the model)")
247
265
  scan_p.add_argument("--fail-on", choices=_FAIL_ON, default=None, dest="fail_on", help="exit 1 if a finding at/above this severity is found")
248
266
 
249
267
  run_p = sub.add_parser("run", help="run a named task preset against a unified diff")
@@ -256,9 +274,11 @@ def main(argv: list[str] | None = None) -> int:
256
274
  run_p.add_argument("--fail-on", choices=_FAIL_ON, default=None, dest="fail_on", help="exit 1 if a finding at/above this severity is found")
257
275
 
258
276
  eval_p = sub.add_parser("eval", help="score golden cases and report precision/recall")
259
- eval_p.add_argument("--golden", default=GOLDEN_DIR, help="golden case YAML directory")
277
+ eval_p.add_argument("--dataset", default=GOLDEN_DIR, help="golden case YAML directory")
278
+ eval_p.add_argument("--split", default=None, help="only score cases whose 'split' matches (e.g. held-out)")
260
279
  eval_p.add_argument("--capabilities", default=CAPABILITIES_DIR, help="capability YAML directory")
261
280
  eval_p.add_argument("--provider", choices=PROVIDERS, default="anthropic")
281
+ eval_p.add_argument("--format", choices=("text", "json"), default="text", dest="fmt")
262
282
  eval_p.add_argument("--model", default=DEFAULT_MODEL)
263
283
  eval_p.add_argument("--api-base", default=DEFAULT_API_BASE, help="provider base URL (env: CODEJURY_API_BASE)")
264
284
  eval_p.add_argument("--api-key", default=DEFAULT_API_KEY, help="provider API key (env: CODEJURY_API_KEY)")
@@ -275,6 +295,7 @@ def main(argv: list[str] | None = None) -> int:
275
295
  model=args.model,
276
296
  max_tokens=args.max_tokens,
277
297
  strategy=args.orchestrator,
298
+ cache=None if args.no_cache else VerdictCache(),
278
299
  )
279
300
  results = _maybe_suppress(results, not args.no_suppress)
280
301
  print(_render_results(args.fmt, results))
@@ -298,6 +319,7 @@ def main(argv: list[str] | None = None) -> int:
298
319
  max_chars=args.max_chars,
299
320
  with_callers=args.callers,
300
321
  with_callees=args.callees,
322
+ cache=None if args.no_cache else VerdictCache(),
301
323
  )
302
324
  results = _maybe_suppress(results, not args.no_suppress)
303
325
  print(_render_results(args.fmt, results))
@@ -317,8 +339,8 @@ def main(argv: list[str] | None = None) -> int:
317
339
 
318
340
  if args.command == "eval":
319
341
  try:
320
- metrics = evaluate(
321
- load_cases(args.golden),
342
+ report = evaluate(
343
+ load_cases(args.dataset, split=args.split),
322
344
  load_capabilities(args.capabilities),
323
345
  provider=make_provider(args.provider, api_key=args.api_key, api_base=args.api_base),
324
346
  model=args.model,
@@ -328,7 +350,7 @@ def main(argv: list[str] | None = None) -> int:
328
350
  # as one line, not a traceback (audit gets this via the orchestrator).
329
351
  print(f"eval failed: {exc}")
330
352
  return 1
331
- print(_render_metrics(metrics))
353
+ print(json.dumps(report.to_dict(), indent=2) if args.fmt == "json" else _render_eval(report))
332
354
  return 0
333
355
 
334
356
  if args.command in (None, "dry-run"):
@@ -46,7 +46,34 @@ sub_capabilities:
46
46
  signals: ["admin:admin", "password=admin", "changeme"]
47
47
  why_bad: Default credentials are public knowledge and trivially abused
48
48
 
49
+ transport_security:
50
+ correct_patterns:
51
+ - id: TLS-OK-1
52
+ description: >-
53
+ Leave TLS certificate verification at its secure default -- verify omitted or
54
+ verify=True, the default SSL context, hostname checking on
55
+ signals: ["verify=True", "create_default_context", "requests.get(", "requests.post("]
56
+ why_ok: >-
57
+ The secure default validates the certificate chain and hostname. An https:// call
58
+ that does not disable verification is fine; do not flag it just for making a
59
+ request or for omitting verify.
60
+
61
+ anti_patterns:
62
+ - id: TLS-BAD-1
63
+ cwe: CWE-295
64
+ severity: HIGH
65
+ description: >-
66
+ Disable TLS certificate or hostname verification -- verify=False, CERT_NONE,
67
+ check_hostname=False, or an unverified SSL context
68
+ signals: ["verify=False", "CERT_NONE", "check_hostname = False", "_create_unverified_context"]
69
+ why_bad: An unverified TLS connection is open to a man-in-the-middle despite https://
70
+ example_bad: |
71
+ requests.get("https://api.partner.com/data", verify=False)
72
+ example_good: |
73
+ requests.get("https://api.partner.com/data") # verify defaults to True
74
+
49
75
  trigger_signals:
50
76
  - dependency manifests and lock files
51
77
  - install or bootstrap scripts fetching remote code
52
78
  - file permission, bucket ACL, or default credential settings
79
+ - TLS client calls that set verify or build a custom SSL context
@@ -105,7 +105,67 @@ sub_capabilities:
105
105
  if not target.is_relative_to(UPLOAD_DIR):
106
106
  raise ValueError("path escapes upload dir")
107
107
 
108
+ ssrf:
109
+ correct_patterns:
110
+ - id: SSRF-OK-1
111
+ description: Validate the request URL's host against an allowlist before fetching it
112
+ signals: ["urlparse(", ".hostname", "ALLOWED", "allowlist"]
113
+ why_ok: An attacker cannot redirect the fetch to an internal target the list omits
114
+
115
+ - id: SSRF-OK-2
116
+ description: >-
117
+ Fetch a URL that is not attacker-controlled -- a constant, a value from trusted
118
+ config, or an operator-supplied argument
119
+ why_ok: >-
120
+ SSRF needs an external attacker to control the destination. A constant URL or one
121
+ from trusted config is not a finding, even though it goes through a fetch call.
122
+
123
+ anti_patterns:
124
+ - id: SSRF-BAD-1
125
+ cwe: CWE-918
126
+ severity: HIGH
127
+ description: >-
128
+ Fetch a URL taken from externally controlled input (HTTP request, form, query, or
129
+ message field) without validating its host against an allowlist. NOT this: a
130
+ constant URL, one from trusted config, or an operator-supplied argument.
131
+ signals: ["requests.get(", "urllib.request.urlopen(", "httpx.", "request.args", "request.json"]
132
+ why_bad: >-
133
+ The server makes the request, so attacker input reaches internal-only targets --
134
+ cloud metadata, localhost admin ports, internal APIs behind the firewall.
135
+ example_bad: |
136
+ requests.get(request.args["url"]).text
137
+ example_good: |
138
+ if urlparse(url).hostname not in ALLOWED_HOSTS:
139
+ raise ValueError("host not allowed")
140
+ requests.get(url).text
141
+
142
+ insecure_deserialization:
143
+ correct_patterns:
144
+ - id: DESER-OK-1
145
+ description: >-
146
+ Parse untrusted input with a data-only parser -- json.loads or yaml.safe_load --
147
+ that cannot instantiate arbitrary objects
148
+ signals: ["json.loads", "yaml.safe_load"]
149
+ why_ok: A data-only parser builds plain structures and has no code-execution path
150
+
151
+ anti_patterns:
152
+ - id: DESER-BAD-1
153
+ cwe: CWE-502
154
+ severity: CRITICAL
155
+ description: >-
156
+ Deserialize externally controlled bytes with an object-constructing deserializer --
157
+ pickle, marshal, yaml.load (unsafe Loader), or jsonpickle. NOT this: a data-only
158
+ parser like json.loads or yaml.safe_load.
159
+ signals: ["pickle.loads", "pickle.load(", "yaml.load(", "marshal.loads", "jsonpickle.decode"]
160
+ why_bad: These reconstruct arbitrary objects, so crafted input runs code on unpickle
161
+ example_bad: |
162
+ pickle.loads(base64.b64decode(request.data))
163
+ example_good: |
164
+ json.loads(request.data)
165
+
108
166
  trigger_signals:
109
167
  - raw SQL strings or cursor.execute calls appear
110
168
  - imports of os, subprocess, or shlex with process execution
111
169
  - file paths built from request, form, or query parameters
170
+ - outbound HTTP fetches (requests, urllib, httpx) to a non-constant URL
171
+ - deserialization calls (pickle, yaml.load, marshal) on external input
@@ -0,0 +1,14 @@
1
+ # Adversarial positive: weak password hash hidden behind hashlib.new(variable).
2
+ capability: authn
3
+ vulnerable: true
4
+ expected_verdict: VULNERABLE
5
+ cwe: CWE-916
6
+ source: synthetic
7
+ notes: >
8
+ hashlib.new("md5") is the same weak, unsalted, fast hash as hashlib.md5(), just
9
+ reached through a variable algorithm name. Unsuitable for password storage
10
+ (needs bcrypt/scrypt/argon2). The indirection should not hide it.
11
+ code: |
12
+ def hash_pw(pw):
13
+ algo = "md5"
14
+ return hashlib.new(algo, pw.encode()).hexdigest()
@@ -0,0 +1,14 @@
1
+ capability: business_logic
2
+ vulnerable: true
3
+ expected_verdict: VULNERABLE
4
+ cwe: CWE-602
5
+ source: synthetic
6
+ split: held-out
7
+ notes: >
8
+ Quantity and unit price come straight from the request and are never checked
9
+ server-side. A negative quantity credits the customer; a client-set price lets
10
+ them pay anything. Price must come from the catalog and quantity must be > 0.
11
+ code: |
12
+ def add_to_order(order, item_id, quantity, unit_price):
13
+ order.lines.append((item_id, quantity, unit_price))
14
+ order.total += quantity * unit_price
@@ -0,0 +1,15 @@
1
+ capability: business_logic
2
+ vulnerable: false
3
+ expected_verdict: SECURE
4
+ cwe: ""
5
+ source: synthetic
6
+ notes: >
7
+ Quantity is validated as positive and the price is looked up server-side from
8
+ the catalog, so the client cannot tamper with either.
9
+ code: |
10
+ def add_to_order(order, item_id, quantity):
11
+ if quantity < 1:
12
+ raise ValueError("quantity must be positive")
13
+ unit_price = catalog.price_of(item_id) # trusted server-side price
14
+ order.lines.append((item_id, quantity, unit_price))
15
+ order.total += quantity * unit_price
@@ -0,0 +1,22 @@
1
+ # False-positive-prone negative: a subprocess call driven by a request value
2
+ # looks like command injection, but the value only selects a fixed argv from a
3
+ # table and never reaches a shell.
4
+ capability: input_validation
5
+ vulnerable: false
6
+ expected_verdict: SECURE
7
+ cwe: ""
8
+ source: synthetic
9
+ split: held-out
10
+ notes: >
11
+ `name` only indexes a fixed dict of argument lists (KeyError on anything
12
+ unknown); no user string is interpolated into a command and shell=False.
13
+ Flagging this as command injection is a false positive.
14
+ code: |
15
+ ACTIONS = {
16
+ "start": ["systemctl", "start", "web"],
17
+ "stop": ["systemctl", "stop", "web"],
18
+ }
19
+
20
+ def run_action(name):
21
+ argv = ACTIONS[name]
22
+ subprocess.run(argv, shell=False)
@@ -0,0 +1,14 @@
1
+ capability: data_protection
2
+ vulnerable: true
3
+ expected_verdict: VULNERABLE
4
+ cwe: CWE-312
5
+ source: synthetic
6
+ notes: >
7
+ The full card number and the CVV are written to the database in cleartext.
8
+ The CVV must never be stored at all, and the PAN must be tokenized/encrypted.
9
+ code: |
10
+ def save_card(user_id, card_number, cvv):
11
+ db.execute(
12
+ "INSERT INTO cards (user_id, pan, cvv) VALUES (?, ?, ?)",
13
+ (user_id, card_number, cvv),
14
+ )
@@ -0,0 +1,16 @@
1
+ capability: data_protection
2
+ vulnerable: false
3
+ expected_verdict: SECURE
4
+ cwe: ""
5
+ source: synthetic
6
+ split: held-out
7
+ notes: >
8
+ The PAN is replaced by a vault token before storage; the CVV is never
9
+ persisted. Nothing sensitive reaches the database.
10
+ code: |
11
+ def save_card(user_id, card_number):
12
+ token = vault.tokenize(card_number) # store a token, never the PAN
13
+ db.execute(
14
+ "INSERT INTO cards (user_id, token) VALUES (?, ?)",
15
+ (user_id, token),
16
+ )
@@ -0,0 +1,11 @@
1
+ capability: dependency_config
2
+ vulnerable: true
3
+ expected_verdict: VULNERABLE
4
+ cwe: CWE-295
5
+ source: synthetic
6
+ notes: >
7
+ TLS certificate verification is disabled, so the connection is open to a
8
+ man-in-the-middle despite the https:// scheme.
9
+ code: |
10
+ def fetch_partner_data():
11
+ return requests.get("https://api.partner.com/data", verify=False).json()
@@ -0,0 +1,11 @@
1
+ capability: dependency_config
2
+ vulnerable: false
3
+ expected_verdict: SECURE
4
+ cwe: ""
5
+ source: synthetic
6
+ notes: >
7
+ Certificate verification is left at its secure default (True) and a timeout is
8
+ set. Nothing weakens the TLS configuration.
9
+ code: |
10
+ def fetch_partner_data():
11
+ return requests.get("https://api.partner.com/data", timeout=10).json()
@@ -0,0 +1,13 @@
1
+ # False-positive-prone negative: deserializing request data looks dangerous,
2
+ # but json.loads only builds data -- it cannot execute code like pickle.
3
+ capability: input_validation
4
+ vulnerable: false
5
+ expected_verdict: SECURE
6
+ cwe: ""
7
+ source: synthetic
8
+ notes: >
9
+ json.loads parses untrusted input into plain data structures with no code
10
+ execution path. Treating it like insecure deserialization is a false positive.
11
+ code: |
12
+ def load_state(request):
13
+ return json.loads(request.data)
@@ -0,0 +1,12 @@
1
+ capability: input_validation
2
+ vulnerable: true
3
+ expected_verdict: VULNERABLE
4
+ cwe: CWE-502
5
+ source: synthetic
6
+ notes: >
7
+ pickle.loads on attacker-supplied bytes executes arbitrary code during
8
+ unpickling -- remote code execution.
9
+ code: |
10
+ def load_state(request):
11
+ blob = base64.b64decode(request.data)
12
+ return pickle.loads(blob)
@@ -0,0 +1,11 @@
1
+ capability: error_logging
2
+ vulnerable: false
3
+ expected_verdict: SECURE
4
+ cwe: ""
5
+ source: synthetic
6
+ notes: >
7
+ Only the non-sensitive username is logged; the password never enters the log.
8
+ code: |
9
+ def authenticate(username, password):
10
+ logger.info("login attempt user=%s", username)
11
+ return backend.check(username, password)
@@ -0,0 +1,12 @@
1
+ capability: error_logging
2
+ vulnerable: true
3
+ expected_verdict: VULNERABLE
4
+ cwe: CWE-532
5
+ source: synthetic
6
+ notes: >
7
+ The cleartext password is written to the log. Anyone with log access (or a log
8
+ shipper, or an aggregation backend) now has the credential.
9
+ code: |
10
+ def authenticate(username, password):
11
+ logger.info("login attempt user=%s password=%s", username, password)
12
+ return backend.check(username, password)