codejury 0.3.0__tar.gz → 0.4.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {codejury-0.3.0 → codejury-0.4.0}/PKG-INFO +7 -1
- {codejury-0.3.0 → codejury-0.4.0}/README.md +6 -0
- {codejury-0.3.0 → codejury-0.4.0}/codejury/agents/verifier.py +14 -3
- {codejury-0.3.0 → codejury-0.4.0}/codejury/cli.py +10 -3
- {codejury-0.3.0 → codejury-0.4.0}/codejury/data/capabilities/input_validation.yaml +25 -6
- {codejury-0.3.0 → codejury-0.4.0}/codejury/domain/artifact.py +3 -0
- codejury-0.4.0/codejury/sources/callers.py +46 -0
- {codejury-0.3.0 → codejury-0.4.0}/codejury/sources/chunker.py +4 -1
- {codejury-0.3.0 → codejury-0.4.0}/codejury/sources/repo.py +17 -5
- {codejury-0.3.0 → codejury-0.4.0}/codejury.egg-info/PKG-INFO +7 -1
- {codejury-0.3.0 → codejury-0.4.0}/codejury.egg-info/SOURCES.txt +2 -0
- {codejury-0.3.0 → codejury-0.4.0}/pyproject.toml +1 -1
- codejury-0.4.0/tests/test_callers.py +43 -0
- {codejury-0.3.0 → codejury-0.4.0}/LICENSE +0 -0
- {codejury-0.3.0 → codejury-0.4.0}/codejury/__init__.py +0 -0
- {codejury-0.3.0 → codejury-0.4.0}/codejury/agents/__init__.py +0 -0
- {codejury-0.3.0 → codejury-0.4.0}/codejury/agents/base.py +0 -0
- {codejury-0.3.0 → codejury-0.4.0}/codejury/agents/debate.py +0 -0
- {codejury-0.3.0 → codejury-0.4.0}/codejury/agents/mock.py +0 -0
- {codejury-0.3.0 → codejury-0.4.0}/codejury/agents/parsing.py +0 -0
- {codejury-0.3.0 → codejury-0.4.0}/codejury/assembly.py +0 -0
- {codejury-0.3.0 → codejury-0.4.0}/codejury/data/capabilities/authentication.yaml +0 -0
- {codejury-0.3.0 → codejury-0.4.0}/codejury/data/capabilities/authorization.yaml +0 -0
- {codejury-0.3.0 → codejury-0.4.0}/codejury/data/capabilities/business_logic.yaml +0 -0
- {codejury-0.3.0 → codejury-0.4.0}/codejury/data/capabilities/crypto.yaml +0 -0
- {codejury-0.3.0 → codejury-0.4.0}/codejury/data/capabilities/data_protection.yaml +0 -0
- {codejury-0.3.0 → codejury-0.4.0}/codejury/data/capabilities/dependency_config.yaml +0 -0
- {codejury-0.3.0 → codejury-0.4.0}/codejury/data/capabilities/error_logging.yaml +0 -0
- {codejury-0.3.0 → codejury-0.4.0}/codejury/data/capabilities/output_encoding.yaml +0 -0
- {codejury-0.3.0 → codejury-0.4.0}/codejury/data/capabilities/secrets.yaml +0 -0
- {codejury-0.3.0 → codejury-0.4.0}/codejury/data/capabilities/session.yaml +0 -0
- {codejury-0.3.0 → codejury-0.4.0}/codejury/data/golden/authn_bcrypt_password.yaml +0 -0
- {codejury-0.3.0 → codejury-0.4.0}/codejury/data/golden/authn_jwt_noverify_vuln.yaml +0 -0
- {codejury-0.3.0 → codejury-0.4.0}/codejury/data/golden/authn_jwt_verified_safe.yaml +0 -0
- {codejury-0.3.0 → codejury-0.4.0}/codejury/data/golden/authn_sha256_checksum_safe.yaml +0 -0
- {codejury-0.3.0 → codejury-0.4.0}/codejury/data/golden/authn_sha256_password.yaml +0 -0
- {codejury-0.3.0 → codejury-0.4.0}/codejury/data/golden/authz_idor_vuln.yaml +0 -0
- {codejury-0.3.0 → codejury-0.4.0}/codejury/data/golden/authz_owner_safe.yaml +0 -0
- {codejury-0.3.0 → codejury-0.4.0}/codejury/data/golden/cmdi_ossystem_vuln.yaml +0 -0
- {codejury-0.3.0 → codejury-0.4.0}/codejury/data/golden/cmdi_subprocess_safe.yaml +0 -0
- {codejury-0.3.0 → codejury-0.4.0}/codejury/data/golden/crypto_aesgcm_safe.yaml +0 -0
- {codejury-0.3.0 → codejury-0.4.0}/codejury/data/golden/crypto_ecb_vuln.yaml +0 -0
- {codejury-0.3.0 → codejury-0.4.0}/codejury/data/golden/path_contained_safe.yaml +0 -0
- {codejury-0.3.0 → codejury-0.4.0}/codejury/data/golden/path_traversal_vuln.yaml +0 -0
- {codejury-0.3.0 → codejury-0.4.0}/codejury/data/golden/secrets_env_safe.yaml +0 -0
- {codejury-0.3.0 → codejury-0.4.0}/codejury/data/golden/secrets_hardcoded_vuln.yaml +0 -0
- {codejury-0.3.0 → codejury-0.4.0}/codejury/data/golden/sqli_format_vuln.yaml +0 -0
- {codejury-0.3.0 → codejury-0.4.0}/codejury/data/golden/sqli_fstring_query.yaml +0 -0
- {codejury-0.3.0 → codejury-0.4.0}/codejury/data/golden/sqli_parameterized_query.yaml +0 -0
- {codejury-0.3.0 → codejury-0.4.0}/codejury/data/golden/xss_innerhtml_constant_safe.yaml +0 -0
- {codejury-0.3.0 → codejury-0.4.0}/codejury/data/golden/xss_innerhtml_vuln.yaml +0 -0
- {codejury-0.3.0 → codejury-0.4.0}/codejury/data/tasks/audit_diff_debate.yaml +0 -0
- {codejury-0.3.0 → codejury-0.4.0}/codejury/data/tasks/quick_scan_single.yaml +0 -0
- {codejury-0.3.0 → codejury-0.4.0}/codejury/domain/__init__.py +0 -0
- {codejury-0.3.0 → codejury-0.4.0}/codejury/domain/capability.py +0 -0
- {codejury-0.3.0 → codejury-0.4.0}/codejury/domain/context.py +0 -0
- {codejury-0.3.0 → codejury-0.4.0}/codejury/domain/observation.py +0 -0
- {codejury-0.3.0 → codejury-0.4.0}/codejury/domain/result.py +0 -0
- {codejury-0.3.0 → codejury-0.4.0}/codejury/evaluation.py +0 -0
- {codejury-0.3.0 → codejury-0.4.0}/codejury/infrastructure/__init__.py +0 -0
- {codejury-0.3.0 → codejury-0.4.0}/codejury/infrastructure/json_parse.py +0 -0
- {codejury-0.3.0 → codejury-0.4.0}/codejury/orchestrators/__init__.py +0 -0
- {codejury-0.3.0 → codejury-0.4.0}/codejury/orchestrators/base.py +0 -0
- {codejury-0.3.0 → codejury-0.4.0}/codejury/orchestrators/debate.py +0 -0
- {codejury-0.3.0 → codejury-0.4.0}/codejury/orchestrators/pipeline.py +0 -0
- {codejury-0.3.0 → codejury-0.4.0}/codejury/orchestrators/reflexion.py +0 -0
- {codejury-0.3.0 → codejury-0.4.0}/codejury/orchestrators/single.py +0 -0
- {codejury-0.3.0 → codejury-0.4.0}/codejury/providers/__init__.py +0 -0
- {codejury-0.3.0 → codejury-0.4.0}/codejury/providers/anthropic.py +0 -0
- {codejury-0.3.0 → codejury-0.4.0}/codejury/providers/base.py +0 -0
- {codejury-0.3.0 → codejury-0.4.0}/codejury/providers/litellm.py +0 -0
- {codejury-0.3.0 → codejury-0.4.0}/codejury/providers/mock.py +0 -0
- {codejury-0.3.0 → codejury-0.4.0}/codejury/providers/openai.py +0 -0
- {codejury-0.3.0 → codejury-0.4.0}/codejury/providers/openai_format.py +0 -0
- {codejury-0.3.0 → codejury-0.4.0}/codejury/providers/retry.py +0 -0
- {codejury-0.3.0 → codejury-0.4.0}/codejury/reporting.py +0 -0
- {codejury-0.3.0 → codejury-0.4.0}/codejury/resources.py +0 -0
- {codejury-0.3.0 → codejury-0.4.0}/codejury/sources/__init__.py +0 -0
- {codejury-0.3.0 → codejury-0.4.0}/codejury/sources/base.py +0 -0
- {codejury-0.3.0 → codejury-0.4.0}/codejury/sources/diff.py +0 -0
- {codejury-0.3.0 → codejury-0.4.0}/codejury/sources/function.py +0 -0
- {codejury-0.3.0 → codejury-0.4.0}/codejury/sources/mock.py +0 -0
- {codejury-0.3.0 → codejury-0.4.0}/codejury/tasks/__init__.py +0 -0
- {codejury-0.3.0 → codejury-0.4.0}/codejury/tasks/base.py +0 -0
- {codejury-0.3.0 → codejury-0.4.0}/codejury/tasks/registry.py +0 -0
- {codejury-0.3.0 → codejury-0.4.0}/codejury.egg-info/dependency_links.txt +0 -0
- {codejury-0.3.0 → codejury-0.4.0}/codejury.egg-info/entry_points.txt +0 -0
- {codejury-0.3.0 → codejury-0.4.0}/codejury.egg-info/requires.txt +0 -0
- {codejury-0.3.0 → codejury-0.4.0}/codejury.egg-info/top_level.txt +0 -0
- {codejury-0.3.0 → codejury-0.4.0}/setup.cfg +0 -0
- {codejury-0.3.0 → codejury-0.4.0}/tests/test_anthropic_provider.py +0 -0
- {codejury-0.3.0 → codejury-0.4.0}/tests/test_assembly.py +0 -0
- {codejury-0.3.0 → codejury-0.4.0}/tests/test_audit_pipeline.py +0 -0
- {codejury-0.3.0 → codejury-0.4.0}/tests/test_capability.py +0 -0
- {codejury-0.3.0 → codejury-0.4.0}/tests/test_cli_audit.py +0 -0
- {codejury-0.3.0 → codejury-0.4.0}/tests/test_context.py +0 -0
- {codejury-0.3.0 → codejury-0.4.0}/tests/test_debate_agents.py +0 -0
- {codejury-0.3.0 → codejury-0.4.0}/tests/test_debate_orchestrator.py +0 -0
- {codejury-0.3.0 → codejury-0.4.0}/tests/test_diff_source.py +0 -0
- {codejury-0.3.0 → codejury-0.4.0}/tests/test_evaluation.py +0 -0
- {codejury-0.3.0 → codejury-0.4.0}/tests/test_function_source.py +0 -0
- {codejury-0.3.0 → codejury-0.4.0}/tests/test_json_parse.py +0 -0
- {codejury-0.3.0 → codejury-0.4.0}/tests/test_litellm_provider.py +0 -0
- {codejury-0.3.0 → codejury-0.4.0}/tests/test_openai_provider.py +0 -0
- {codejury-0.3.0 → codejury-0.4.0}/tests/test_orchestrator.py +0 -0
- {codejury-0.3.0 → codejury-0.4.0}/tests/test_pipeline_orchestrator.py +0 -0
- {codejury-0.3.0 → codejury-0.4.0}/tests/test_reflexion_orchestrator.py +0 -0
- {codejury-0.3.0 → codejury-0.4.0}/tests/test_repo_source.py +0 -0
- {codejury-0.3.0 → codejury-0.4.0}/tests/test_reporting.py +0 -0
- {codejury-0.3.0 → codejury-0.4.0}/tests/test_retry_provider.py +0 -0
- {codejury-0.3.0 → codejury-0.4.0}/tests/test_tasks.py +0 -0
- {codejury-0.3.0 → codejury-0.4.0}/tests/test_verifier.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: codejury
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.4.0
|
|
4
4
|
Summary: General-purpose Application Security AI audit framework -- five-layer architecture, capabilities as first-class data
|
|
5
5
|
Author: 4234288
|
|
6
6
|
License-Expression: MIT
|
|
@@ -154,6 +154,12 @@ independently.
|
|
|
154
154
|
- **Prompts are a first pass.** Expect false positives and misses on real code.
|
|
155
155
|
Tune by editing the capability YAML and growing the golden set; measure the
|
|
156
156
|
effect with `codejury eval`.
|
|
157
|
+
- **Local-pattern checks are sharper than data-flow ones.** Capabilities judged
|
|
158
|
+
from one spot (weak crypto, hardcoded secrets) are reliable; taint / data-flow
|
|
159
|
+
ones like path traversal over-flag in single-file review because the verifier
|
|
160
|
+
can't see whether a value is attacker-controlled. `scan --callers` adds
|
|
161
|
+
cross-file call sites for provenance (helps some cases, not a full fix); also
|
|
162
|
+
scope with `--only` or challenge findings with `--orchestrator debate`.
|
|
157
163
|
- **`scan` cost scales as files x capabilities.** It is a periodic deep audit,
|
|
158
164
|
not a quick check -- scope it with `--only`. Day to day, audit the diff.
|
|
159
165
|
|
|
@@ -125,6 +125,12 @@ independently.
|
|
|
125
125
|
- **Prompts are a first pass.** Expect false positives and misses on real code.
|
|
126
126
|
Tune by editing the capability YAML and growing the golden set; measure the
|
|
127
127
|
effect with `codejury eval`.
|
|
128
|
+
- **Local-pattern checks are sharper than data-flow ones.** Capabilities judged
|
|
129
|
+
from one spot (weak crypto, hardcoded secrets) are reliable; taint / data-flow
|
|
130
|
+
ones like path traversal over-flag in single-file review because the verifier
|
|
131
|
+
can't see whether a value is attacker-controlled. `scan --callers` adds
|
|
132
|
+
cross-file call sites for provenance (helps some cases, not a full fix); also
|
|
133
|
+
scope with `--only` or challenge findings with `--orchestrator debate`.
|
|
128
134
|
- **`scan` cost scales as files x capabilities.** It is a periodic deep audit,
|
|
129
135
|
not a quick check -- scope it with `--only`. Day to day, audit the diff.
|
|
130
136
|
|
|
@@ -44,7 +44,7 @@ class VerifierAgent(Agent):
|
|
|
44
44
|
def run(self, ctx: AnalysisContext) -> list[Observation]:
|
|
45
45
|
verdicts: list[Observation] = []
|
|
46
46
|
for cap in ctx.capabilities:
|
|
47
|
-
prompt = _build_prompt(ctx.artifact.path, ctx.artifact.content, cap)
|
|
47
|
+
prompt = _build_prompt(ctx.artifact.path, ctx.artifact.content, cap, ctx.artifact.context)
|
|
48
48
|
result = self._provider.complete(
|
|
49
49
|
system=_SYSTEM,
|
|
50
50
|
messages=[Message(role="user", content=prompt)],
|
|
@@ -70,14 +70,25 @@ def _render_capability(cap: Capability) -> str:
|
|
|
70
70
|
return "\n".join(lines)
|
|
71
71
|
|
|
72
72
|
|
|
73
|
-
def _build_prompt(path: str, content: str, cap: Capability) -> str:
|
|
73
|
+
def _build_prompt(path: str, content: str, cap: Capability, context: str = "") -> str:
|
|
74
74
|
sub_names = ", ".join(cap.sub_capabilities) or "(none)"
|
|
75
|
+
context_block = (
|
|
76
|
+
f"Related code (call sites / usages elsewhere -- for tracing where values come from, "
|
|
77
|
+
f"NOT under review):\n```\n{context}\n```\n\n"
|
|
78
|
+
if context
|
|
79
|
+
else ""
|
|
80
|
+
)
|
|
75
81
|
return (
|
|
76
82
|
"Check the code below against this capability.\n\n"
|
|
77
83
|
f"{_render_capability(cap)}\n\n"
|
|
78
84
|
f"Code under review ({path}):\n```\n{content}\n```\n\n"
|
|
85
|
+
f"{context_block}"
|
|
79
86
|
f"For EVERY sub_capability ({sub_names}) output one verdict, even if SECURE "
|
|
80
|
-
"or NOT_PRESENT. Cite matched pattern ids and evidence lines.\n
|
|
87
|
+
"or NOT_PRESENT. Cite matched pattern ids and evidence lines.\n"
|
|
88
|
+
"For input-driven issues (injection, path traversal, SSRF), mark VULNERABLE only when "
|
|
89
|
+
"untrusted/external input could plausibly reach the sink in the code shown. A constant, "
|
|
90
|
+
"a stored data field, a value from trusted config, or a path or argument the operator "
|
|
91
|
+
"supplies (e.g. a CLI argument) is not attacker-controlled -- do not flag it.\n\n"
|
|
81
92
|
"Respond with a single JSON object exactly like:\n" + _JSON_SHAPE
|
|
82
93
|
)
|
|
83
94
|
|
|
@@ -81,10 +81,13 @@ def scan(
|
|
|
81
81
|
max_tokens: int = 2048,
|
|
82
82
|
strategy: str = "pipeline",
|
|
83
83
|
extensions: tuple[str, ...] = (".py",),
|
|
84
|
-
max_chars: int =
|
|
84
|
+
max_chars: int = 200_000,
|
|
85
|
+
with_callers: bool = False,
|
|
85
86
|
) -> list[tuple[str, AnalysisResult]]:
|
|
86
87
|
"""Audit every matching file in a directory tree, returning (path, result) per artifact."""
|
|
87
|
-
source = RepoSource(
|
|
88
|
+
source = RepoSource(
|
|
89
|
+
directory, extensions=extensions, chunker=Chunker(max_chars=max_chars), with_callers=with_callers
|
|
90
|
+
)
|
|
88
91
|
artifacts = source.list_artifacts()
|
|
89
92
|
calls = len(artifacts) * len(capabilities)
|
|
90
93
|
print(
|
|
@@ -176,7 +179,10 @@ def main(argv: list[str] | None = None) -> int:
|
|
|
176
179
|
scan_p.add_argument("--format", choices=_FORMATS, default="text", dest="fmt")
|
|
177
180
|
scan_p.add_argument("--model", default=DEFAULT_MODEL)
|
|
178
181
|
scan_p.add_argument("--max-tokens", type=int, default=2048)
|
|
179
|
-
scan_p.add_argument("--max-chars", type=int, default=
|
|
182
|
+
scan_p.add_argument("--max-chars", type=int, default=200_000, help="chunk budget; default keeps whole files")
|
|
183
|
+
scan_p.add_argument(
|
|
184
|
+
"--callers", action="store_true", help="add cross-file call sites as context (cuts taint false positives)"
|
|
185
|
+
)
|
|
180
186
|
scan_p.add_argument("--api-base", default=DEFAULT_API_BASE, help="provider base URL (env: CODEJURY_API_BASE)")
|
|
181
187
|
scan_p.add_argument("--api-key", default=DEFAULT_API_KEY, help="provider API key (env: CODEJURY_API_KEY)")
|
|
182
188
|
|
|
@@ -226,6 +232,7 @@ def main(argv: list[str] | None = None) -> int:
|
|
|
226
232
|
strategy=args.orchestrator,
|
|
227
233
|
extensions=extensions,
|
|
228
234
|
max_chars=args.max_chars,
|
|
235
|
+
with_callers=args.callers,
|
|
229
236
|
)
|
|
230
237
|
print(_render_results(args.fmt, results))
|
|
231
238
|
return 0
|
|
@@ -44,13 +44,19 @@ sub_capabilities:
|
|
|
44
44
|
- id: CMDI-OK-1
|
|
45
45
|
description: Run subprocesses with an argument list and shell=False
|
|
46
46
|
signals: ["subprocess.run([", "subprocess.Popen(["]
|
|
47
|
-
why_ok:
|
|
47
|
+
why_ok: >-
|
|
48
|
+
Arguments are passed directly to execve, so the shell never parses input. This
|
|
49
|
+
only applies to code that actually spawns a process; an ordinary function,
|
|
50
|
+
method, or library/API call (e.g. provider.complete) is not command execution.
|
|
48
51
|
|
|
49
52
|
anti_patterns:
|
|
50
53
|
- id: CMDI-BAD-1
|
|
51
54
|
cwe: CWE-78
|
|
52
55
|
severity: CRITICAL
|
|
53
|
-
description:
|
|
56
|
+
description: >-
|
|
57
|
+
Pass interpolated input to an OS shell or subprocess -- os.system, os.popen,
|
|
58
|
+
subprocess(..., shell=True), or eval/exec. A normal function, method, or
|
|
59
|
+
library/API call is NOT this; flag only an actual shell or process invocation.
|
|
54
60
|
signals: ["os.system(", "shell=True", "os.popen("]
|
|
55
61
|
why_bad: Shell metacharacters in input let an attacker run arbitrary commands
|
|
56
62
|
example_bad: |
|
|
@@ -72,15 +78,28 @@ sub_capabilities:
|
|
|
72
78
|
signals: ["os.path.realpath", "Path(...).resolve()", "is_relative_to("]
|
|
73
79
|
why_ok: A resolved path outside the base is rejected before any file access
|
|
74
80
|
|
|
81
|
+
- id: PATH-OK-2
|
|
82
|
+
description: >-
|
|
83
|
+
Use a path that is not attacker-controlled -- a data field, a directory read from
|
|
84
|
+
trusted config, or a path the operator passes on the command line
|
|
85
|
+
why_ok: >-
|
|
86
|
+
Traversal needs an external attacker to control the path. A path stored as a
|
|
87
|
+
field, a trusted/configured directory, or an operator-supplied CLI argument is
|
|
88
|
+
not a finding; neither is merely declaring a `path` attribute.
|
|
89
|
+
|
|
75
90
|
anti_patterns:
|
|
76
91
|
- id: PATH-BAD-1
|
|
77
92
|
cwe: CWE-22
|
|
78
93
|
severity: HIGH
|
|
79
|
-
description:
|
|
80
|
-
|
|
81
|
-
|
|
94
|
+
description: >-
|
|
95
|
+
Take an externally controlled value (HTTP request, upload, form, query, or message
|
|
96
|
+
field) and use it in a filesystem open/read/write without resolving it and confirming
|
|
97
|
+
it stays in an allowed base. NOT this: a path kept as a data field, a directory from
|
|
98
|
+
trusted config, or a path the operator passes on the CLI.
|
|
99
|
+
signals: ["request.", "upload", "filename", "os.path.join("]
|
|
100
|
+
why_bad: Sequences like ../ let attacker input escape the intended directory
|
|
82
101
|
example_bad: |
|
|
83
|
-
open(os.path.join(UPLOAD_DIR, filename))
|
|
102
|
+
open(os.path.join(UPLOAD_DIR, request.args["filename"]))
|
|
84
103
|
example_good: |
|
|
85
104
|
target = (UPLOAD_DIR / filename).resolve()
|
|
86
105
|
if not target.is_relative_to(UPLOAD_DIR):
|
|
@@ -18,3 +18,6 @@ class CodeArtifact:
|
|
|
18
18
|
kind: ArtifactKind
|
|
19
19
|
path: str # identifier used when building Evidence references
|
|
20
20
|
content: str # the diff/file/function text the agent analyzes
|
|
21
|
+
# related code (e.g. cross-file call sites) shown to help trace data flow,
|
|
22
|
+
# but not itself under review
|
|
23
|
+
context: str = ""
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
"""Lightweight cross-file caller context.
|
|
2
|
+
|
|
3
|
+
For a file under review, find where the functions and classes it defines are
|
|
4
|
+
called elsewhere in the repository. Showing those call sites lets the verifier
|
|
5
|
+
trace where an argument comes from -- which is exactly what single-file review
|
|
6
|
+
lacks for taint-style issues (a path/command that is operator-supplied vs
|
|
7
|
+
attacker-controlled). This is a textual usage finder, not a full call graph.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
import ast
|
|
13
|
+
import re
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def defined_names(content: str) -> set[str]:
|
|
17
|
+
"""Top-level function and class names defined in `content`."""
|
|
18
|
+
try:
|
|
19
|
+
tree = ast.parse(content)
|
|
20
|
+
except SyntaxError:
|
|
21
|
+
return set()
|
|
22
|
+
return {
|
|
23
|
+
node.name
|
|
24
|
+
for node in tree.body
|
|
25
|
+
if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef))
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def caller_context(target_path: str, files: dict[str, str], *, max_lines: int = 30) -> str:
|
|
30
|
+
"""Lines elsewhere in `files` that call the names defined in `target_path`."""
|
|
31
|
+
names = defined_names(files.get(target_path, ""))
|
|
32
|
+
if not names:
|
|
33
|
+
return ""
|
|
34
|
+
# word-boundary call: `name(` not preceded/followed by other identifier chars
|
|
35
|
+
call = re.compile(r"\b(?:" + "|".join(re.escape(n) for n in names) + r")\s*\(")
|
|
36
|
+
|
|
37
|
+
hits: list[str] = []
|
|
38
|
+
for path in sorted(files):
|
|
39
|
+
if path == target_path:
|
|
40
|
+
continue
|
|
41
|
+
for lineno, line in enumerate(files[path].splitlines(), 1):
|
|
42
|
+
if call.search(line):
|
|
43
|
+
hits.append(f"{path}:{lineno}: {line.strip()}")
|
|
44
|
+
if len(hits) >= max_lines:
|
|
45
|
+
return "\n".join(hits)
|
|
46
|
+
return "\n".join(hits)
|
|
@@ -4,13 +4,16 @@ Splits on line boundaries into pieces of at most ``max_chars``. Small content is
|
|
|
4
4
|
returned unchanged as a single chunk keeping its path; split content gets a
|
|
5
5
|
``path#N`` suffix per chunk. A single line longer than the budget becomes its own
|
|
6
6
|
(over-budget) chunk rather than being cut mid-line.
|
|
7
|
+
|
|
8
|
+
The default budget is large so a whole file stays in one artifact -- the verifier
|
|
9
|
+
needs the full file to trace where a value comes from. Only very large files split.
|
|
7
10
|
"""
|
|
8
11
|
|
|
9
12
|
from __future__ import annotations
|
|
10
13
|
|
|
11
14
|
|
|
12
15
|
class Chunker:
|
|
13
|
-
def __init__(self, max_chars: int =
|
|
16
|
+
def __init__(self, max_chars: int = 200_000) -> None:
|
|
14
17
|
self._max_chars = max_chars
|
|
15
18
|
|
|
16
19
|
def split(self, path: str, content: str) -> list[tuple[str, str]]:
|
|
@@ -11,6 +11,7 @@ from pathlib import Path
|
|
|
11
11
|
|
|
12
12
|
from codejury.domain.artifact import CodeArtifact
|
|
13
13
|
from codejury.sources.base import Source
|
|
14
|
+
from codejury.sources.callers import caller_context
|
|
14
15
|
from codejury.sources.chunker import Chunker
|
|
15
16
|
|
|
16
17
|
_SKIP_DIRS = frozenset({".git", ".venv", "venv", "node_modules", "__pycache__", ".mypy_cache", ".pytest_cache"})
|
|
@@ -24,21 +25,32 @@ class RepoSource(Source):
|
|
|
24
25
|
extensions: tuple[str, ...] = (".py",),
|
|
25
26
|
chunker: Chunker | None = None,
|
|
26
27
|
skip_dirs: frozenset[str] = _SKIP_DIRS,
|
|
28
|
+
with_callers: bool = False,
|
|
27
29
|
) -> None:
|
|
28
30
|
self._root = Path(root)
|
|
29
31
|
self._extensions = extensions
|
|
30
32
|
self._chunker = chunker or Chunker()
|
|
31
33
|
self._skip_dirs = skip_dirs
|
|
34
|
+
self._with_callers = with_callers
|
|
32
35
|
|
|
33
36
|
def list_artifacts(self) -> list[CodeArtifact]:
|
|
37
|
+
files = self._read_files()
|
|
34
38
|
artifacts: list[CodeArtifact] = []
|
|
35
|
-
for
|
|
39
|
+
for rel, content in sorted(files.items()):
|
|
40
|
+
context = caller_context(rel, files) if self._with_callers else ""
|
|
41
|
+
for chunk_path, chunk_content in self._chunker.split(rel, content):
|
|
42
|
+
artifacts.append(
|
|
43
|
+
CodeArtifact(kind="repo", path=chunk_path, content=chunk_content, context=context)
|
|
44
|
+
)
|
|
45
|
+
return artifacts
|
|
46
|
+
|
|
47
|
+
def _read_files(self) -> dict[str, str]:
|
|
48
|
+
files: dict[str, str] = {}
|
|
49
|
+
for path in self._root.rglob("*"):
|
|
36
50
|
if not path.is_file() or path.suffix not in self._extensions:
|
|
37
51
|
continue
|
|
38
52
|
if any(part in self._skip_dirs for part in path.relative_to(self._root).parts):
|
|
39
53
|
continue
|
|
40
54
|
rel = path.relative_to(self._root).as_posix()
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
artifacts.append(CodeArtifact(kind="repo", path=chunk_path, content=chunk_content))
|
|
44
|
-
return artifacts
|
|
55
|
+
files[rel] = path.read_text(encoding="utf-8", errors="replace")
|
|
56
|
+
return files
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: codejury
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.4.0
|
|
4
4
|
Summary: General-purpose Application Security AI audit framework -- five-layer architecture, capabilities as first-class data
|
|
5
5
|
Author: 4234288
|
|
6
6
|
License-Expression: MIT
|
|
@@ -154,6 +154,12 @@ independently.
|
|
|
154
154
|
- **Prompts are a first pass.** Expect false positives and misses on real code.
|
|
155
155
|
Tune by editing the capability YAML and growing the golden set; measure the
|
|
156
156
|
effect with `codejury eval`.
|
|
157
|
+
- **Local-pattern checks are sharper than data-flow ones.** Capabilities judged
|
|
158
|
+
from one spot (weak crypto, hardcoded secrets) are reliable; taint / data-flow
|
|
159
|
+
ones like path traversal over-flag in single-file review because the verifier
|
|
160
|
+
can't see whether a value is attacker-controlled. `scan --callers` adds
|
|
161
|
+
cross-file call sites for provenance (helps some cases, not a full fix); also
|
|
162
|
+
scope with `--only` or challenge findings with `--orchestrator debate`.
|
|
157
163
|
- **`scan` cost scales as files x capabilities.** It is a periodic deep audit,
|
|
158
164
|
not a quick check -- scope it with `--only`. Day to day, audit the diff.
|
|
159
165
|
|
|
@@ -76,6 +76,7 @@ codejury/providers/openai_format.py
|
|
|
76
76
|
codejury/providers/retry.py
|
|
77
77
|
codejury/sources/__init__.py
|
|
78
78
|
codejury/sources/base.py
|
|
79
|
+
codejury/sources/callers.py
|
|
79
80
|
codejury/sources/chunker.py
|
|
80
81
|
codejury/sources/diff.py
|
|
81
82
|
codejury/sources/function.py
|
|
@@ -87,6 +88,7 @@ codejury/tasks/registry.py
|
|
|
87
88
|
tests/test_anthropic_provider.py
|
|
88
89
|
tests/test_assembly.py
|
|
89
90
|
tests/test_audit_pipeline.py
|
|
91
|
+
tests/test_callers.py
|
|
90
92
|
tests/test_capability.py
|
|
91
93
|
tests/test_cli_audit.py
|
|
92
94
|
tests/test_context.py
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
from codejury.sources.callers import caller_context, defined_names
|
|
2
|
+
from codejury.sources.chunker import Chunker
|
|
3
|
+
from codejury.sources.repo import RepoSource
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def test_defined_names_top_level_only():
|
|
7
|
+
code = "def a():\n def inner(): pass\nclass B: pass\nx = 1\n"
|
|
8
|
+
assert defined_names(code) == {"a", "B"}
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def test_defined_names_tolerates_syntax_error():
|
|
12
|
+
assert defined_names("def broken(:\n") == set()
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def test_caller_context_finds_cross_file_call_sites():
|
|
16
|
+
files = {
|
|
17
|
+
"lib.py": "def load_capability(path):\n return open(path)\n",
|
|
18
|
+
"cli.py": "from lib import load_capability\nload_capability(args.cap_dir)\n",
|
|
19
|
+
"other.py": "y = 2\n",
|
|
20
|
+
}
|
|
21
|
+
ctx = caller_context("lib.py", files)
|
|
22
|
+
assert "cli.py:2: load_capability(args.cap_dir)" in ctx
|
|
23
|
+
assert "other.py" not in ctx
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def test_caller_context_word_boundary_avoids_prefix_matches():
|
|
27
|
+
# load_capability must NOT match load_capabilities(
|
|
28
|
+
files = {
|
|
29
|
+
"lib.py": "def load_capability(path): ...\n",
|
|
30
|
+
"caller.py": "load_capabilities(dirpath)\n",
|
|
31
|
+
}
|
|
32
|
+
assert caller_context("lib.py", files) == ""
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def test_repo_source_attaches_caller_context_when_enabled(tmp_path):
|
|
36
|
+
(tmp_path / "lib.py").write_text("def helper(p):\n return open(p)\n", encoding="utf-8")
|
|
37
|
+
(tmp_path / "cli.py").write_text("from lib import helper\nhelper(args.path)\n", encoding="utf-8")
|
|
38
|
+
|
|
39
|
+
arts = {a.path: a for a in RepoSource(tmp_path, with_callers=True, chunker=Chunker()).list_artifacts()}
|
|
40
|
+
assert "cli.py:2: helper(args.path)" in arts["lib.py"].context
|
|
41
|
+
# without the flag, no context
|
|
42
|
+
arts_off = {a.path: a for a in RepoSource(tmp_path).list_artifacts()}
|
|
43
|
+
assert arts_off["lib.py"].context == ""
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|