codejury 0.2.0__tar.gz → 0.4.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (116) hide show
  1. codejury-0.4.0/PKG-INFO +172 -0
  2. codejury-0.4.0/README.md +143 -0
  3. {codejury-0.2.0 → codejury-0.4.0}/codejury/agents/verifier.py +14 -3
  4. {codejury-0.2.0 → codejury-0.4.0}/codejury/assembly.py +13 -3
  5. {codejury-0.2.0 → codejury-0.4.0}/codejury/cli.py +66 -0
  6. {codejury-0.2.0 → codejury-0.4.0}/codejury/data/capabilities/input_validation.yaml +25 -6
  7. codejury-0.4.0/codejury/data/capabilities/secrets.yaml +72 -0
  8. {codejury-0.2.0 → codejury-0.4.0}/codejury/domain/artifact.py +3 -0
  9. codejury-0.4.0/codejury/sources/callers.py +46 -0
  10. {codejury-0.2.0 → codejury-0.4.0}/codejury/sources/chunker.py +4 -1
  11. {codejury-0.2.0 → codejury-0.4.0}/codejury/sources/repo.py +17 -5
  12. codejury-0.4.0/codejury.egg-info/PKG-INFO +172 -0
  13. {codejury-0.2.0 → codejury-0.4.0}/codejury.egg-info/SOURCES.txt +2 -0
  14. {codejury-0.2.0 → codejury-0.4.0}/pyproject.toml +1 -1
  15. codejury-0.4.0/tests/test_callers.py +43 -0
  16. {codejury-0.2.0 → codejury-0.4.0}/tests/test_cli_audit.py +17 -0
  17. codejury-0.2.0/PKG-INFO +0 -136
  18. codejury-0.2.0/README.md +0 -107
  19. codejury-0.2.0/codejury/data/capabilities/secrets.yaml +0 -51
  20. codejury-0.2.0/codejury.egg-info/PKG-INFO +0 -136
  21. {codejury-0.2.0 → codejury-0.4.0}/LICENSE +0 -0
  22. {codejury-0.2.0 → codejury-0.4.0}/codejury/__init__.py +0 -0
  23. {codejury-0.2.0 → codejury-0.4.0}/codejury/agents/__init__.py +0 -0
  24. {codejury-0.2.0 → codejury-0.4.0}/codejury/agents/base.py +0 -0
  25. {codejury-0.2.0 → codejury-0.4.0}/codejury/agents/debate.py +0 -0
  26. {codejury-0.2.0 → codejury-0.4.0}/codejury/agents/mock.py +0 -0
  27. {codejury-0.2.0 → codejury-0.4.0}/codejury/agents/parsing.py +0 -0
  28. {codejury-0.2.0 → codejury-0.4.0}/codejury/data/capabilities/authentication.yaml +0 -0
  29. {codejury-0.2.0 → codejury-0.4.0}/codejury/data/capabilities/authorization.yaml +0 -0
  30. {codejury-0.2.0 → codejury-0.4.0}/codejury/data/capabilities/business_logic.yaml +0 -0
  31. {codejury-0.2.0 → codejury-0.4.0}/codejury/data/capabilities/crypto.yaml +0 -0
  32. {codejury-0.2.0 → codejury-0.4.0}/codejury/data/capabilities/data_protection.yaml +0 -0
  33. {codejury-0.2.0 → codejury-0.4.0}/codejury/data/capabilities/dependency_config.yaml +0 -0
  34. {codejury-0.2.0 → codejury-0.4.0}/codejury/data/capabilities/error_logging.yaml +0 -0
  35. {codejury-0.2.0 → codejury-0.4.0}/codejury/data/capabilities/output_encoding.yaml +0 -0
  36. {codejury-0.2.0 → codejury-0.4.0}/codejury/data/capabilities/session.yaml +0 -0
  37. {codejury-0.2.0 → codejury-0.4.0}/codejury/data/golden/authn_bcrypt_password.yaml +0 -0
  38. {codejury-0.2.0 → codejury-0.4.0}/codejury/data/golden/authn_jwt_noverify_vuln.yaml +0 -0
  39. {codejury-0.2.0 → codejury-0.4.0}/codejury/data/golden/authn_jwt_verified_safe.yaml +0 -0
  40. {codejury-0.2.0 → codejury-0.4.0}/codejury/data/golden/authn_sha256_checksum_safe.yaml +0 -0
  41. {codejury-0.2.0 → codejury-0.4.0}/codejury/data/golden/authn_sha256_password.yaml +0 -0
  42. {codejury-0.2.0 → codejury-0.4.0}/codejury/data/golden/authz_idor_vuln.yaml +0 -0
  43. {codejury-0.2.0 → codejury-0.4.0}/codejury/data/golden/authz_owner_safe.yaml +0 -0
  44. {codejury-0.2.0 → codejury-0.4.0}/codejury/data/golden/cmdi_ossystem_vuln.yaml +0 -0
  45. {codejury-0.2.0 → codejury-0.4.0}/codejury/data/golden/cmdi_subprocess_safe.yaml +0 -0
  46. {codejury-0.2.0 → codejury-0.4.0}/codejury/data/golden/crypto_aesgcm_safe.yaml +0 -0
  47. {codejury-0.2.0 → codejury-0.4.0}/codejury/data/golden/crypto_ecb_vuln.yaml +0 -0
  48. {codejury-0.2.0 → codejury-0.4.0}/codejury/data/golden/path_contained_safe.yaml +0 -0
  49. {codejury-0.2.0 → codejury-0.4.0}/codejury/data/golden/path_traversal_vuln.yaml +0 -0
  50. {codejury-0.2.0 → codejury-0.4.0}/codejury/data/golden/secrets_env_safe.yaml +0 -0
  51. {codejury-0.2.0 → codejury-0.4.0}/codejury/data/golden/secrets_hardcoded_vuln.yaml +0 -0
  52. {codejury-0.2.0 → codejury-0.4.0}/codejury/data/golden/sqli_format_vuln.yaml +0 -0
  53. {codejury-0.2.0 → codejury-0.4.0}/codejury/data/golden/sqli_fstring_query.yaml +0 -0
  54. {codejury-0.2.0 → codejury-0.4.0}/codejury/data/golden/sqli_parameterized_query.yaml +0 -0
  55. {codejury-0.2.0 → codejury-0.4.0}/codejury/data/golden/xss_innerhtml_constant_safe.yaml +0 -0
  56. {codejury-0.2.0 → codejury-0.4.0}/codejury/data/golden/xss_innerhtml_vuln.yaml +0 -0
  57. {codejury-0.2.0 → codejury-0.4.0}/codejury/data/tasks/audit_diff_debate.yaml +0 -0
  58. {codejury-0.2.0 → codejury-0.4.0}/codejury/data/tasks/quick_scan_single.yaml +0 -0
  59. {codejury-0.2.0 → codejury-0.4.0}/codejury/domain/__init__.py +0 -0
  60. {codejury-0.2.0 → codejury-0.4.0}/codejury/domain/capability.py +0 -0
  61. {codejury-0.2.0 → codejury-0.4.0}/codejury/domain/context.py +0 -0
  62. {codejury-0.2.0 → codejury-0.4.0}/codejury/domain/observation.py +0 -0
  63. {codejury-0.2.0 → codejury-0.4.0}/codejury/domain/result.py +0 -0
  64. {codejury-0.2.0 → codejury-0.4.0}/codejury/evaluation.py +0 -0
  65. {codejury-0.2.0 → codejury-0.4.0}/codejury/infrastructure/__init__.py +0 -0
  66. {codejury-0.2.0 → codejury-0.4.0}/codejury/infrastructure/json_parse.py +0 -0
  67. {codejury-0.2.0 → codejury-0.4.0}/codejury/orchestrators/__init__.py +0 -0
  68. {codejury-0.2.0 → codejury-0.4.0}/codejury/orchestrators/base.py +0 -0
  69. {codejury-0.2.0 → codejury-0.4.0}/codejury/orchestrators/debate.py +0 -0
  70. {codejury-0.2.0 → codejury-0.4.0}/codejury/orchestrators/pipeline.py +0 -0
  71. {codejury-0.2.0 → codejury-0.4.0}/codejury/orchestrators/reflexion.py +0 -0
  72. {codejury-0.2.0 → codejury-0.4.0}/codejury/orchestrators/single.py +0 -0
  73. {codejury-0.2.0 → codejury-0.4.0}/codejury/providers/__init__.py +0 -0
  74. {codejury-0.2.0 → codejury-0.4.0}/codejury/providers/anthropic.py +0 -0
  75. {codejury-0.2.0 → codejury-0.4.0}/codejury/providers/base.py +0 -0
  76. {codejury-0.2.0 → codejury-0.4.0}/codejury/providers/litellm.py +0 -0
  77. {codejury-0.2.0 → codejury-0.4.0}/codejury/providers/mock.py +0 -0
  78. {codejury-0.2.0 → codejury-0.4.0}/codejury/providers/openai.py +0 -0
  79. {codejury-0.2.0 → codejury-0.4.0}/codejury/providers/openai_format.py +0 -0
  80. {codejury-0.2.0 → codejury-0.4.0}/codejury/providers/retry.py +0 -0
  81. {codejury-0.2.0 → codejury-0.4.0}/codejury/reporting.py +0 -0
  82. {codejury-0.2.0 → codejury-0.4.0}/codejury/resources.py +0 -0
  83. {codejury-0.2.0 → codejury-0.4.0}/codejury/sources/__init__.py +0 -0
  84. {codejury-0.2.0 → codejury-0.4.0}/codejury/sources/base.py +0 -0
  85. {codejury-0.2.0 → codejury-0.4.0}/codejury/sources/diff.py +0 -0
  86. {codejury-0.2.0 → codejury-0.4.0}/codejury/sources/function.py +0 -0
  87. {codejury-0.2.0 → codejury-0.4.0}/codejury/sources/mock.py +0 -0
  88. {codejury-0.2.0 → codejury-0.4.0}/codejury/tasks/__init__.py +0 -0
  89. {codejury-0.2.0 → codejury-0.4.0}/codejury/tasks/base.py +0 -0
  90. {codejury-0.2.0 → codejury-0.4.0}/codejury/tasks/registry.py +0 -0
  91. {codejury-0.2.0 → codejury-0.4.0}/codejury.egg-info/dependency_links.txt +0 -0
  92. {codejury-0.2.0 → codejury-0.4.0}/codejury.egg-info/entry_points.txt +0 -0
  93. {codejury-0.2.0 → codejury-0.4.0}/codejury.egg-info/requires.txt +0 -0
  94. {codejury-0.2.0 → codejury-0.4.0}/codejury.egg-info/top_level.txt +0 -0
  95. {codejury-0.2.0 → codejury-0.4.0}/setup.cfg +0 -0
  96. {codejury-0.2.0 → codejury-0.4.0}/tests/test_anthropic_provider.py +0 -0
  97. {codejury-0.2.0 → codejury-0.4.0}/tests/test_assembly.py +0 -0
  98. {codejury-0.2.0 → codejury-0.4.0}/tests/test_audit_pipeline.py +0 -0
  99. {codejury-0.2.0 → codejury-0.4.0}/tests/test_capability.py +0 -0
  100. {codejury-0.2.0 → codejury-0.4.0}/tests/test_context.py +0 -0
  101. {codejury-0.2.0 → codejury-0.4.0}/tests/test_debate_agents.py +0 -0
  102. {codejury-0.2.0 → codejury-0.4.0}/tests/test_debate_orchestrator.py +0 -0
  103. {codejury-0.2.0 → codejury-0.4.0}/tests/test_diff_source.py +0 -0
  104. {codejury-0.2.0 → codejury-0.4.0}/tests/test_evaluation.py +0 -0
  105. {codejury-0.2.0 → codejury-0.4.0}/tests/test_function_source.py +0 -0
  106. {codejury-0.2.0 → codejury-0.4.0}/tests/test_json_parse.py +0 -0
  107. {codejury-0.2.0 → codejury-0.4.0}/tests/test_litellm_provider.py +0 -0
  108. {codejury-0.2.0 → codejury-0.4.0}/tests/test_openai_provider.py +0 -0
  109. {codejury-0.2.0 → codejury-0.4.0}/tests/test_orchestrator.py +0 -0
  110. {codejury-0.2.0 → codejury-0.4.0}/tests/test_pipeline_orchestrator.py +0 -0
  111. {codejury-0.2.0 → codejury-0.4.0}/tests/test_reflexion_orchestrator.py +0 -0
  112. {codejury-0.2.0 → codejury-0.4.0}/tests/test_repo_source.py +0 -0
  113. {codejury-0.2.0 → codejury-0.4.0}/tests/test_reporting.py +0 -0
  114. {codejury-0.2.0 → codejury-0.4.0}/tests/test_retry_provider.py +0 -0
  115. {codejury-0.2.0 → codejury-0.4.0}/tests/test_tasks.py +0 -0
  116. {codejury-0.2.0 → codejury-0.4.0}/tests/test_verifier.py +0 -0
@@ -0,0 +1,172 @@
1
+ Metadata-Version: 2.4
2
+ Name: codejury
3
+ Version: 0.4.0
4
+ Summary: General-purpose Application Security AI audit framework -- five-layer architecture, capabilities as first-class data
5
+ Author: 4234288
6
+ License-Expression: MIT
7
+ Project-URL: Homepage, https://github.com/4234288/codejury
8
+ Project-URL: Repository, https://github.com/4234288/codejury
9
+ Keywords: security,appsec,static analysis,llm,owasp,asvs,code review
10
+ Classifier: Development Status :: 4 - Beta
11
+ Classifier: Intended Audience :: Developers
12
+ Classifier: Topic :: Security
13
+ Classifier: Topic :: Software Development :: Quality Assurance
14
+ Classifier: Programming Language :: Python :: 3.12
15
+ Classifier: Operating System :: OS Independent
16
+ Requires-Python: >=3.12
17
+ Description-Content-Type: text/markdown
18
+ License-File: LICENSE
19
+ Requires-Dist: pyyaml>=6.0
20
+ Provides-Extra: anthropic
21
+ Requires-Dist: anthropic>=0.40; extra == "anthropic"
22
+ Provides-Extra: openai
23
+ Requires-Dist: openai>=1.0; extra == "openai"
24
+ Provides-Extra: litellm
25
+ Requires-Dist: litellm>=1.0; extra == "litellm"
26
+ Provides-Extra: dev
27
+ Requires-Dist: pytest>=8.0; extra == "dev"
28
+ Dynamic: license-file
29
+
30
+ # codejury
31
+
32
+ An AI security auditor for code whose knowledge lives in versioned YAML, not in
33
+ prompts. It reviews a diff or a whole repository against the OWASP ASVS and
34
+ reports a verdict per dimension -- both what is **vulnerable** and what is
35
+ **verified safe**.
36
+
37
+ The name is the core idea: code goes before a "jury" of adversarial roles --
38
+ Finder / Challenger / Judge -- that argue and converge on a verdict.
39
+
40
+ Why it is built this way:
41
+
42
+ - **Knowledge is data.** Each of the 11 OWASP ASVS areas is a YAML capability
43
+ (safe patterns + anti-patterns, with CWE and examples) -- versioned, reviewable
44
+ in a PR, and editable by non-engineers. The framework core stays small.
45
+ - **Verdicts, not just alerts.** Every capability yields `SECURE` / `VULNERABLE`
46
+ / `PARTIAL` / `NOT_PRESENT`, so a report shows what was checked and *passed*,
47
+ not only what failed.
48
+ - **Composable.** Four orchestration strategies, four model backends, and
49
+ diff / repo inputs are chosen per run -- mix and match.
50
+
51
+ ## Install
52
+
53
+ ```bash
54
+ pip install codejury # core + CLI
55
+ pip install 'codejury[anthropic]' # the provider you'll use: anthropic | openai | litellm
56
+ ```
57
+
58
+ ## Quickstart
59
+
60
+ ```bash
61
+ # No API key needed -- prove the pipeline runs end to end with mock layers
62
+ codejury dry-run
63
+
64
+ # A real audit: set a key, then review your staged changes
65
+ export ANTHROPIC_API_KEY=sk-ant-...
66
+ git diff | codejury audit --provider anthropic
67
+ ```
68
+
69
+ ## Commands
70
+
71
+ | Command | What it does |
72
+ |---|---|
73
+ | `codejury dry-run` | Run the mock pipeline with no key (smoke test). |
74
+ | `codejury audit [diff]` | Audit a unified diff from a file or stdin (`-`). |
75
+ | `codejury scan <dir>` | Audit a whole directory tree, capability by capability. |
76
+ | `codejury run <task>` | Run a named task preset (see [Tasks](#tasks)). |
77
+ | `codejury eval` | Score the golden cases and report precision / recall. |
78
+
79
+ Shared flags: `--orchestrator {single,pipeline,debate,reflexion}`,
80
+ `--provider {anthropic,openai,litellm}`, `--model`, `--format {text,markdown,json}`.
81
+
82
+ ```bash
83
+ # Multi-round adversarial debate, rendered as Markdown
84
+ git diff | codejury audit --orchestrator debate --format markdown - > report.md
85
+
86
+ # Deep whole-repo scan, scoped to a few capabilities to bound the cost
87
+ codejury scan ./myrepo --only secrets,input_validation,crypto
88
+ ```
89
+
90
+ ## Configuration
91
+
92
+ Provider keys are read from the environment (codejury does **not** auto-load
93
+ `.env` -- copy `.env.example` and `source` it):
94
+
95
+ | Variable | Used by |
96
+ |---|---|
97
+ | `ANTHROPIC_API_KEY` | `--provider anthropic` |
98
+ | `OPENAI_API_KEY` | `--provider openai` |
99
+ | `CODEJURY_API_BASE` / `CODEJURY_API_KEY` / `CODEJURY_MODEL` | defaults for `--api-base` / `--api-key` / `--model` (any provider) |
100
+
101
+ The `CODEJURY_*` overrides make a LiteLLM proxy a one-liner:
102
+
103
+ ```bash
104
+ # with CODEJURY_API_BASE / CODEJURY_API_KEY / CODEJURY_MODEL in a sourced .env
105
+ git diff | codejury audit --provider litellm -
106
+ ```
107
+
108
+ ## Tasks
109
+
110
+ A task is a named preset (capabilities + orchestrator + provider + model). It
111
+ lives in a YAML file; the API key always stays in the environment.
112
+
113
+ ```yaml
114
+ # mytasks/proxy_scan.yaml -> codejury run proxy_scan --tasks mytasks
115
+ name: proxy_scan
116
+ orchestrator: debate
117
+ provider: litellm
118
+ model: your-alias
119
+ api_base: https://litellm.example.com # key from CODEJURY_API_KEY
120
+ capabilities: [authn, input_validation, secrets] # omit to check all
121
+ ```
122
+
123
+ ## Capabilities
124
+
125
+ The library covers all 11 OWASP ASVS areas, one YAML each under
126
+ `codejury/data/capabilities/`. These ids are what `--only` and a task's
127
+ `capabilities:` accept:
128
+
129
+ `authn` · `authz` · `session` · `input_validation` · `output_encoding` ·
130
+ `crypto` · `secrets` · `data_protection` · `error_logging` ·
131
+ `business_logic` · `dependency_config`
132
+
133
+ To tune for your codebase, edit these files (add patterns / sharpen wording) --
134
+ no code change needed.
135
+
136
+ ## Architecture
137
+
138
+ ```
139
+ Layer 5 Task preset: source + capabilities + orchestrator + agents
140
+ Layer 4 Capability YAML domain knowledge (authn / authz / ...)
141
+ Layer 3 Orchestrator strategy (single / pipeline / debate / reflexion)
142
+ Source input (diff / repo / function)
143
+ Agent role (finder / challenger / judge / verifier)
144
+ Layer 2 Provider model backend (anthropic / openai / litellm / mock)
145
+ Layer 1 Infrastructure cross-cutting utilities (json parsing, retry, ...)
146
+ ```
147
+
148
+ Layers talk only through typed data, and each is an abstract base class plus
149
+ implementations, so the axes (task / orchestration / model / input) compose
150
+ independently.
151
+
152
+ ## Limitations
153
+
154
+ - **Prompts are a first pass.** Expect false positives and misses on real code.
155
+ Tune by editing the capability YAML and growing the golden set; measure the
156
+ effect with `codejury eval`.
157
+ - **Local-pattern checks are sharper than data-flow ones.** Capabilities judged
158
+ from one spot (weak crypto, hardcoded secrets) are reliable; taint / data-flow
159
+ ones like path traversal over-flag in single-file review because the verifier
160
+ can't see whether a value is attacker-controlled. `scan --callers` adds
161
+ cross-file call sites for provenance (helps some cases, not a full fix); also
162
+ scope with `--only` or challenge findings with `--orchestrator debate`.
163
+ - **`scan` cost scales as files x capabilities.** It is a periodic deep audit,
164
+ not a quick check -- scope it with `--only`. Day to day, audit the diff.
165
+
166
+ ## Development
167
+
168
+ ```bash
169
+ python -m venv .venv && source .venv/bin/activate
170
+ pip install -e ".[dev]"
171
+ pytest
172
+ ```
@@ -0,0 +1,143 @@
1
+ # codejury
2
+
3
+ An AI security auditor for code whose knowledge lives in versioned YAML, not in
4
+ prompts. It reviews a diff or a whole repository against the OWASP ASVS and
5
+ reports a verdict per dimension -- both what is **vulnerable** and what is
6
+ **verified safe**.
7
+
8
+ The name is the core idea: code goes before a "jury" of adversarial roles --
9
+ Finder / Challenger / Judge -- that argue and converge on a verdict.
10
+
11
+ Why it is built this way:
12
+
13
+ - **Knowledge is data.** Each of the 11 OWASP ASVS areas is a YAML capability
14
+ (safe patterns + anti-patterns, with CWE and examples) -- versioned, reviewable
15
+ in a PR, and editable by non-engineers. The framework core stays small.
16
+ - **Verdicts, not just alerts.** Every capability yields `SECURE` / `VULNERABLE`
17
+ / `PARTIAL` / `NOT_PRESENT`, so a report shows what was checked and *passed*,
18
+ not only what failed.
19
+ - **Composable.** Four orchestration strategies, four model backends, and
20
+ diff / repo inputs are chosen per run -- mix and match.
21
+
22
+ ## Install
23
+
24
+ ```bash
25
+ pip install codejury # core + CLI
26
+ pip install 'codejury[anthropic]' # the provider you'll use: anthropic | openai | litellm
27
+ ```
28
+
29
+ ## Quickstart
30
+
31
+ ```bash
32
+ # No API key needed -- prove the pipeline runs end to end with mock layers
33
+ codejury dry-run
34
+
35
+ # A real audit: set a key, then review your staged changes
36
+ export ANTHROPIC_API_KEY=sk-ant-...
37
+ git diff | codejury audit --provider anthropic
38
+ ```
39
+
40
+ ## Commands
41
+
42
+ | Command | What it does |
43
+ |---|---|
44
+ | `codejury dry-run` | Run the mock pipeline with no key (smoke test). |
45
+ | `codejury audit [diff]` | Audit a unified diff from a file or stdin (`-`). |
46
+ | `codejury scan <dir>` | Audit a whole directory tree, capability by capability. |
47
+ | `codejury run <task>` | Run a named task preset (see [Tasks](#tasks)). |
48
+ | `codejury eval` | Score the golden cases and report precision / recall. |
49
+
50
+ Shared flags: `--orchestrator {single,pipeline,debate,reflexion}`,
51
+ `--provider {anthropic,openai,litellm}`, `--model`, `--format {text,markdown,json}`.
52
+
53
+ ```bash
54
+ # Multi-round adversarial debate, rendered as Markdown
55
+ git diff | codejury audit --orchestrator debate --format markdown - > report.md
56
+
57
+ # Deep whole-repo scan, scoped to a few capabilities to bound the cost
58
+ codejury scan ./myrepo --only secrets,input_validation,crypto
59
+ ```
60
+
61
+ ## Configuration
62
+
63
+ Provider keys are read from the environment (codejury does **not** auto-load
64
+ `.env` -- copy `.env.example` and `source` it):
65
+
66
+ | Variable | Used by |
67
+ |---|---|
68
+ | `ANTHROPIC_API_KEY` | `--provider anthropic` |
69
+ | `OPENAI_API_KEY` | `--provider openai` |
70
+ | `CODEJURY_API_BASE` / `CODEJURY_API_KEY` / `CODEJURY_MODEL` | defaults for `--api-base` / `--api-key` / `--model` (any provider) |
71
+
72
+ The `CODEJURY_*` overrides make a LiteLLM proxy a one-liner:
73
+
74
+ ```bash
75
+ # with CODEJURY_API_BASE / CODEJURY_API_KEY / CODEJURY_MODEL in a sourced .env
76
+ git diff | codejury audit --provider litellm -
77
+ ```
78
+
79
+ ## Tasks
80
+
81
+ A task is a named preset (capabilities + orchestrator + provider + model). It
82
+ lives in a YAML file; the API key always stays in the environment.
83
+
84
+ ```yaml
85
+ # mytasks/proxy_scan.yaml -> codejury run proxy_scan --tasks mytasks
86
+ name: proxy_scan
87
+ orchestrator: debate
88
+ provider: litellm
89
+ model: your-alias
90
+ api_base: https://litellm.example.com # key from CODEJURY_API_KEY
91
+ capabilities: [authn, input_validation, secrets] # omit to check all
92
+ ```
93
+
94
+ ## Capabilities
95
+
96
+ The library covers all 11 OWASP ASVS areas, one YAML each under
97
+ `codejury/data/capabilities/`. These ids are what `--only` and a task's
98
+ `capabilities:` accept:
99
+
100
+ `authn` · `authz` · `session` · `input_validation` · `output_encoding` ·
101
+ `crypto` · `secrets` · `data_protection` · `error_logging` ·
102
+ `business_logic` · `dependency_config`
103
+
104
+ To tune for your codebase, edit these files (add patterns / sharpen wording) --
105
+ no code change needed.
106
+
107
+ ## Architecture
108
+
109
+ ```
110
+ Layer 5 Task preset: source + capabilities + orchestrator + agents
111
+ Layer 4 Capability YAML domain knowledge (authn / authz / ...)
112
+ Layer 3 Orchestrator strategy (single / pipeline / debate / reflexion)
113
+ Source input (diff / repo / function)
114
+ Agent role (finder / challenger / judge / verifier)
115
+ Layer 2 Provider model backend (anthropic / openai / litellm / mock)
116
+ Layer 1 Infrastructure cross-cutting utilities (json parsing, retry, ...)
117
+ ```
118
+
119
+ Layers talk only through typed data, and each is an abstract base class plus
120
+ implementations, so the axes (task / orchestration / model / input) compose
121
+ independently.
122
+
123
+ ## Limitations
124
+
125
+ - **Prompts are a first pass.** Expect false positives and misses on real code.
126
+ Tune by editing the capability YAML and growing the golden set; measure the
127
+ effect with `codejury eval`.
128
+ - **Local-pattern checks are sharper than data-flow ones.** Capabilities judged
129
+ from one spot (weak crypto, hardcoded secrets) are reliable; taint / data-flow
130
+ ones like path traversal over-flag in single-file review because the verifier
131
+ can't see whether a value is attacker-controlled. `scan --callers` adds
132
+ cross-file call sites for provenance (helps some cases, not a full fix); also
133
+ scope with `--only` or challenge findings with `--orchestrator debate`.
134
+ - **`scan` cost scales as files x capabilities.** It is a periodic deep audit,
135
+ not a quick check -- scope it with `--only`. Day to day, audit the diff.
136
+
137
+ ## Development
138
+
139
+ ```bash
140
+ python -m venv .venv && source .venv/bin/activate
141
+ pip install -e ".[dev]"
142
+ pytest
143
+ ```
@@ -44,7 +44,7 @@ class VerifierAgent(Agent):
44
44
  def run(self, ctx: AnalysisContext) -> list[Observation]:
45
45
  verdicts: list[Observation] = []
46
46
  for cap in ctx.capabilities:
47
- prompt = _build_prompt(ctx.artifact.path, ctx.artifact.content, cap)
47
+ prompt = _build_prompt(ctx.artifact.path, ctx.artifact.content, cap, ctx.artifact.context)
48
48
  result = self._provider.complete(
49
49
  system=_SYSTEM,
50
50
  messages=[Message(role="user", content=prompt)],
@@ -70,14 +70,25 @@ def _render_capability(cap: Capability) -> str:
70
70
  return "\n".join(lines)
71
71
 
72
72
 
73
- def _build_prompt(path: str, content: str, cap: Capability) -> str:
73
+ def _build_prompt(path: str, content: str, cap: Capability, context: str = "") -> str:
74
74
  sub_names = ", ".join(cap.sub_capabilities) or "(none)"
75
+ context_block = (
76
+ f"Related code (call sites / usages elsewhere -- for tracing where values come from, "
77
+ f"NOT under review):\n```\n{context}\n```\n\n"
78
+ if context
79
+ else ""
80
+ )
75
81
  return (
76
82
  "Check the code below against this capability.\n\n"
77
83
  f"{_render_capability(cap)}\n\n"
78
84
  f"Code under review ({path}):\n```\n{content}\n```\n\n"
85
+ f"{context_block}"
79
86
  f"For EVERY sub_capability ({sub_names}) output one verdict, even if SECURE "
80
- "or NOT_PRESENT. Cite matched pattern ids and evidence lines.\n\n"
87
+ "or NOT_PRESENT. Cite matched pattern ids and evidence lines.\n"
88
+ "For input-driven issues (injection, path traversal, SSRF), mark VULNERABLE only when "
89
+ "untrusted/external input could plausibly reach the sink in the code shown. A constant, "
90
+ "a stored data field, a value from trusted config, or a path or argument the operator "
91
+ "supplies (e.g. a CLI argument) is not attacker-controlled -- do not flag it.\n\n"
81
92
  "Respond with a single JSON object exactly like:\n" + _JSON_SHAPE
82
93
  )
83
94
 
@@ -11,6 +11,7 @@ import os
11
11
  from codejury.agents.base import Agent
12
12
  from codejury.agents.debate import ChallengerAgent, FinderAgent, JudgeAgent
13
13
  from codejury.agents.verifier import VerifierAgent
14
+ from codejury.domain.artifact import CodeArtifact
14
15
  from codejury.domain.capability import Capability
15
16
  from codejury.domain.context import AnalysisContext
16
17
  from codejury.domain.result import AnalysisResult
@@ -66,15 +67,24 @@ def build_orchestration(
66
67
  return verifier, SingleOrchestrator()
67
68
 
68
69
 
69
- def run_over_source(
70
- source: Source,
70
+ def run_over_artifacts(
71
+ artifacts: list[CodeArtifact],
71
72
  capabilities: list[Capability],
72
73
  agents: dict[str, Agent],
73
74
  orchestrator: Orchestrator,
74
75
  ) -> list[tuple[str, AnalysisResult]]:
75
76
  """Run the orchestration over each artifact, returning (path, result) per artifact."""
76
77
  results = []
77
- for artifact in source.list_artifacts():
78
+ for artifact in artifacts:
78
79
  ctx = AnalysisContext(artifact=artifact, capabilities=capabilities)
79
80
  results.append((artifact.path, orchestrator.run(agents, ctx)))
80
81
  return results
82
+
83
+
84
+ def run_over_source(
85
+ source: Source,
86
+ capabilities: list[Capability],
87
+ agents: dict[str, Agent],
88
+ orchestrator: Orchestrator,
89
+ ) -> list[tuple[str, AnalysisResult]]:
90
+ return run_over_artifacts(source.list_artifacts(), capabilities, agents, orchestrator)
@@ -20,6 +20,7 @@ from codejury.assembly import (
20
20
  STRATEGIES,
21
21
  build_orchestration,
22
22
  make_provider,
23
+ run_over_artifacts,
23
24
  run_over_source,
24
25
  )
25
26
  from codejury.domain.artifact import CodeArtifact
@@ -33,7 +34,9 @@ from codejury.providers.base import Provider
33
34
  from codejury.providers.mock import MockProvider
34
35
  from codejury.reporting import to_json, to_markdown
35
36
  from codejury.resources import CAPABILITIES_DIR, GOLDEN_DIR, TASKS_DIR
37
+ from codejury.sources.chunker import Chunker
36
38
  from codejury.sources.diff import DiffSource
39
+ from codejury.sources.repo import RepoSource
37
40
  from codejury.tasks.base import run_task
38
41
  from codejury.tasks.registry import load_tasks
39
42
 
@@ -69,6 +72,32 @@ def audit(
69
72
  return run_over_source(DiffSource(diff_text), capabilities, agents, orchestrator)
70
73
 
71
74
 
75
+ def scan(
76
+ directory: str,
77
+ capabilities: list[Capability],
78
+ *,
79
+ provider: Provider,
80
+ model: str,
81
+ max_tokens: int = 2048,
82
+ strategy: str = "pipeline",
83
+ extensions: tuple[str, ...] = (".py",),
84
+ max_chars: int = 200_000,
85
+ with_callers: bool = False,
86
+ ) -> list[tuple[str, AnalysisResult]]:
87
+ """Audit every matching file in a directory tree, returning (path, result) per artifact."""
88
+ source = RepoSource(
89
+ directory, extensions=extensions, chunker=Chunker(max_chars=max_chars), with_callers=with_callers
90
+ )
91
+ artifacts = source.list_artifacts()
92
+ calls = len(artifacts) * len(capabilities)
93
+ print(
94
+ f"scanning {len(artifacts)} artifacts x {len(capabilities)} capabilities (~{calls} model calls)",
95
+ file=sys.stderr,
96
+ )
97
+ agents, orchestrator = build_orchestration(strategy, provider=provider, model=model, max_tokens=max_tokens)
98
+ return run_over_artifacts(artifacts, capabilities, agents, orchestrator)
99
+
100
+
72
101
  def _render_dry_run(result: AnalysisResult) -> str:
73
102
  lines = [f"observations: {len(result.observations)}"]
74
103
  for o in result.observations:
@@ -140,6 +169,23 @@ def main(argv: list[str] | None = None) -> int:
140
169
  audit_p.add_argument("--api-base", default=DEFAULT_API_BASE, help="provider base URL (env: CODEJURY_API_BASE)")
141
170
  audit_p.add_argument("--api-key", default=DEFAULT_API_KEY, help="provider API key (env: CODEJURY_API_KEY)")
142
171
 
172
+ scan_p = sub.add_parser("scan", help="audit a whole directory tree (deep, capability by capability)")
173
+ scan_p.add_argument("directory", help="directory to scan")
174
+ scan_p.add_argument("--ext", default=".py", help="comma-separated file extensions (default .py)")
175
+ scan_p.add_argument("--only", default=None, help="comma-separated capability ids to scan (default: all)")
176
+ scan_p.add_argument("--capabilities", default=CAPABILITIES_DIR, help="capability YAML directory")
177
+ scan_p.add_argument("--orchestrator", choices=STRATEGIES, default="pipeline")
178
+ scan_p.add_argument("--provider", choices=PROVIDERS, default="anthropic")
179
+ scan_p.add_argument("--format", choices=_FORMATS, default="text", dest="fmt")
180
+ scan_p.add_argument("--model", default=DEFAULT_MODEL)
181
+ scan_p.add_argument("--max-tokens", type=int, default=2048)
182
+ scan_p.add_argument("--max-chars", type=int, default=200_000, help="chunk budget; default keeps whole files")
183
+ scan_p.add_argument(
184
+ "--callers", action="store_true", help="add cross-file call sites as context (cuts taint false positives)"
185
+ )
186
+ scan_p.add_argument("--api-base", default=DEFAULT_API_BASE, help="provider base URL (env: CODEJURY_API_BASE)")
187
+ scan_p.add_argument("--api-key", default=DEFAULT_API_KEY, help="provider API key (env: CODEJURY_API_KEY)")
188
+
143
189
  run_p = sub.add_parser("run", help="run a named task preset against a unified diff")
144
190
  run_p.add_argument("task", help="task name")
145
191
  run_p.add_argument("diff", nargs="?", default="-", help="unified diff file, or - for stdin")
@@ -171,6 +217,26 @@ def main(argv: list[str] | None = None) -> int:
171
217
  print(_render_results(args.fmt, results))
172
218
  return 0
173
219
 
220
+ if args.command == "scan":
221
+ capabilities = load_capabilities(args.capabilities)
222
+ if args.only:
223
+ wanted = {x.strip() for x in args.only.split(",")}
224
+ capabilities = [c for c in capabilities if c.id in wanted]
225
+ extensions = tuple(e if e.startswith(".") else "." + e for e in args.ext.split(","))
226
+ results = scan(
227
+ args.directory,
228
+ capabilities,
229
+ provider=make_provider(args.provider, api_key=args.api_key, api_base=args.api_base),
230
+ model=args.model,
231
+ max_tokens=args.max_tokens,
232
+ strategy=args.orchestrator,
233
+ extensions=extensions,
234
+ max_chars=args.max_chars,
235
+ with_callers=args.callers,
236
+ )
237
+ print(_render_results(args.fmt, results))
238
+ return 0
239
+
174
240
  if args.command == "run":
175
241
  tasks = load_tasks(args.tasks)
176
242
  if args.task not in tasks:
@@ -44,13 +44,19 @@ sub_capabilities:
44
44
  - id: CMDI-OK-1
45
45
  description: Run subprocesses with an argument list and shell=False
46
46
  signals: ["subprocess.run([", "subprocess.Popen(["]
47
- why_ok: Arguments are passed directly to execve, so the shell never parses input
47
+ why_ok: >-
48
+ Arguments are passed directly to execve, so the shell never parses input. This
49
+ only applies to code that actually spawns a process; an ordinary function,
50
+ method, or library/API call (e.g. provider.complete) is not command execution.
48
51
 
49
52
  anti_patterns:
50
53
  - id: CMDI-BAD-1
51
54
  cwe: CWE-78
52
55
  severity: CRITICAL
53
- description: Invoke a shell with an interpolated command string
56
+ description: >-
57
+ Pass interpolated input to an OS shell or subprocess -- os.system, os.popen,
58
+ subprocess(..., shell=True), or eval/exec. A normal function, method, or
59
+ library/API call is NOT this; flag only an actual shell or process invocation.
54
60
  signals: ["os.system(", "shell=True", "os.popen("]
55
61
  why_bad: Shell metacharacters in input let an attacker run arbitrary commands
56
62
  example_bad: |
@@ -72,15 +78,28 @@ sub_capabilities:
72
78
  signals: ["os.path.realpath", "Path(...).resolve()", "is_relative_to("]
73
79
  why_ok: A resolved path outside the base is rejected before any file access
74
80
 
81
+ - id: PATH-OK-2
82
+ description: >-
83
+ Use a path that is not attacker-controlled -- a data field, a directory read from
84
+ trusted config, or a path the operator passes on the command line
85
+ why_ok: >-
86
+ Traversal needs an external attacker to control the path. A path stored as a
87
+ field, a trusted/configured directory, or an operator-supplied CLI argument is
88
+ not a finding; neither is merely declaring a `path` attribute.
89
+
75
90
  anti_patterns:
76
91
  - id: PATH-BAD-1
77
92
  cwe: CWE-22
78
93
  severity: HIGH
79
- description: Join user input into a filesystem path without containment checks
80
- signals: ["os.path.join(", "open(", "Path("]
81
- why_bad: Sequences like ../ let input escape the intended directory
94
+ description: >-
95
+ Take an externally controlled value (HTTP request, upload, form, query, or message
96
+ field) and use it in a filesystem open/read/write without resolving it and confirming
97
+ it stays in an allowed base. NOT this: a path kept as a data field, a directory from
98
+ trusted config, or a path the operator passes on the CLI.
99
+ signals: ["request.", "upload", "filename", "os.path.join("]
100
+ why_bad: Sequences like ../ let attacker input escape the intended directory
82
101
  example_bad: |
83
- open(os.path.join(UPLOAD_DIR, filename))
102
+ open(os.path.join(UPLOAD_DIR, request.args["filename"]))
84
103
  example_good: |
85
104
  target = (UPLOAD_DIR / filename).resolve()
86
105
  if not target.is_relative_to(UPLOAD_DIR):
@@ -0,0 +1,72 @@
1
+ id: secrets
2
+ name: Secrets Management
3
+ asvs_chapter: V6
4
+ description: How credentials and keys are stored, supplied, and kept out of code, logs, and version control.
5
+
6
+ sub_capabilities:
7
+ storage:
8
+ correct_patterns:
9
+ - id: SEC-OK-1
10
+ description: Load secrets at runtime from environment variables or a secret manager
11
+ signals: ["os.environ[", "os.getenv(", "secretsmanager", "vault"]
12
+ why_ok: >-
13
+ A variable that reads its value from the environment or a secret manager is the
14
+ correct pattern, not a violation -- nothing secret is written in the source.
15
+
16
+ - id: SEC-OK-2
17
+ description: Receive a secret as a function or constructor parameter (dependency injection)
18
+ signals: ["def __init__(self, *, api_key", "api_key: str | None = None", "api_key=api_key"]
19
+ why_ok: >-
20
+ Accepting or forwarding a key through a parameter or variable is correct -- the value
21
+ comes from the caller or the environment, not a literal in the source. Only an actual
22
+ key string written in the code is a finding.
23
+
24
+ anti_patterns:
25
+ - id: SEC-BAD-1
26
+ cwe: CWE-798
27
+ severity: HIGH
28
+ description: >-
29
+ Assign a literal credential string -- an actual key/token value written in the source.
30
+ A variable, parameter, env lookup, or a non-credential string (e.g. a model name or
31
+ URL) that merely holds or forwards a value is NOT this.
32
+ signals: ['api_key = "sk', 'token = "ghp_', 'aws_secret_access_key = "']
33
+ why_bad: The credential leaks with the source and cannot be rotated easily
34
+ example_bad: |
35
+ API_KEY = "sk_live_51HxQ....actual-secret-value" # literal secret in source
36
+ example_good: |
37
+ api_key = os.environ["API_KEY"] # read from env -- fine
38
+ client = Client(api_key=api_key) # passed as a parameter -- fine
39
+
40
+ - id: SEC-BAD-2
41
+ cwe: CWE-259
42
+ severity: HIGH
43
+ description: Assign a literal password string in source
44
+ signals: ['password = "', 'passwd = "']
45
+ why_bad: A fixed password in code is shared, discoverable, and unchangeable
46
+
47
+ exposure:
48
+ correct_patterns:
49
+ - id: SEC-OK-3
50
+ description: Log or render only non-credential data, or redact secrets before logging
51
+ why_ok: >-
52
+ Emitting analysis results, status, or non-secret fields is fine. The risk is logging
53
+ the value of a credential, not handling data in general.
54
+
55
+ anti_patterns:
56
+ - id: SEC-BAD-3
57
+ cwe: CWE-532
58
+ severity: MEDIUM
59
+ description: Write the value of a secret, token, or password to logs or output
60
+ signals: ["log.info(token", "print(password", "logger.debug(secret", "log.info(api_key"]
61
+ why_bad: Logs are widely accessible and long-lived, so a logged secret value spreads
62
+
63
+ - id: SEC-BAD-4
64
+ cwe: CWE-540
65
+ severity: MEDIUM
66
+ description: Commit secrets in config files or a tracked .env
67
+ why_bad: Version history keeps the secret even after it is removed
68
+
69
+ trigger_signals:
70
+ - a literal string assigned to a key, token, password, secret, or credential name
71
+ - imports of a secret manager or vault client
72
+ - .env or config files with credential-looking values
@@ -18,3 +18,6 @@ class CodeArtifact:
18
18
  kind: ArtifactKind
19
19
  path: str # identifier used when building Evidence references
20
20
  content: str # the diff/file/function text the agent analyzes
21
+ # related code (e.g. cross-file call sites) shown to help trace data flow,
22
+ # but not itself under review
23
+ context: str = ""
@@ -0,0 +1,46 @@
1
+ """Lightweight cross-file caller context.
2
+
3
+ For a file under review, find where the functions and classes it defines are
4
+ called elsewhere in the repository. Showing those call sites lets the verifier
5
+ trace where an argument comes from -- which is exactly what single-file review
6
+ lacks for taint-style issues (a path/command that is operator-supplied vs
7
+ attacker-controlled). This is a textual usage finder, not a full call graph.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import ast
13
+ import re
14
+
15
+
16
+ def defined_names(content: str) -> set[str]:
17
+ """Top-level function and class names defined in `content`."""
18
+ try:
19
+ tree = ast.parse(content)
20
+ except SyntaxError:
21
+ return set()
22
+ return {
23
+ node.name
24
+ for node in tree.body
25
+ if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef))
26
+ }
27
+
28
+
29
+ def caller_context(target_path: str, files: dict[str, str], *, max_lines: int = 30) -> str:
30
+ """Lines elsewhere in `files` that call the names defined in `target_path`."""
31
+ names = defined_names(files.get(target_path, ""))
32
+ if not names:
33
+ return ""
34
+ # word-boundary call: `name(` not preceded/followed by other identifier chars
35
+ call = re.compile(r"\b(?:" + "|".join(re.escape(n) for n in names) + r")\s*\(")
36
+
37
+ hits: list[str] = []
38
+ for path in sorted(files):
39
+ if path == target_path:
40
+ continue
41
+ for lineno, line in enumerate(files[path].splitlines(), 1):
42
+ if call.search(line):
43
+ hits.append(f"{path}:{lineno}: {line.strip()}")
44
+ if len(hits) >= max_lines:
45
+ return "\n".join(hits)
46
+ return "\n".join(hits)