codejury 0.5.0__tar.gz → 0.5.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {codejury-0.5.0 → codejury-0.5.1}/PKG-INFO +9 -6
- {codejury-0.5.0 → codejury-0.5.1}/README.md +8 -5
- {codejury-0.5.0 → codejury-0.5.1}/codejury/cli.py +11 -2
- codejury-0.5.1/codejury/sources/callers.py +104 -0
- {codejury-0.5.0 → codejury-0.5.1}/codejury/sources/repo.py +16 -2
- {codejury-0.5.0 → codejury-0.5.1}/codejury.egg-info/PKG-INFO +9 -6
- {codejury-0.5.0 → codejury-0.5.1}/pyproject.toml +1 -1
- {codejury-0.5.0 → codejury-0.5.1}/tests/test_callers.py +27 -1
- codejury-0.5.0/codejury/sources/callers.py +0 -46
- {codejury-0.5.0 → codejury-0.5.1}/LICENSE +0 -0
- {codejury-0.5.0 → codejury-0.5.1}/codejury/__init__.py +0 -0
- {codejury-0.5.0 → codejury-0.5.1}/codejury/agents/__init__.py +0 -0
- {codejury-0.5.0 → codejury-0.5.1}/codejury/agents/base.py +0 -0
- {codejury-0.5.0 → codejury-0.5.1}/codejury/agents/debate.py +0 -0
- {codejury-0.5.0 → codejury-0.5.1}/codejury/agents/mock.py +0 -0
- {codejury-0.5.0 → codejury-0.5.1}/codejury/agents/parsing.py +0 -0
- {codejury-0.5.0 → codejury-0.5.1}/codejury/agents/refuter.py +0 -0
- {codejury-0.5.0 → codejury-0.5.1}/codejury/agents/verifier.py +0 -0
- {codejury-0.5.0 → codejury-0.5.1}/codejury/assembly.py +0 -0
- {codejury-0.5.0 → codejury-0.5.1}/codejury/data/capabilities/authentication.yaml +0 -0
- {codejury-0.5.0 → codejury-0.5.1}/codejury/data/capabilities/authorization.yaml +0 -0
- {codejury-0.5.0 → codejury-0.5.1}/codejury/data/capabilities/business_logic.yaml +0 -0
- {codejury-0.5.0 → codejury-0.5.1}/codejury/data/capabilities/crypto.yaml +0 -0
- {codejury-0.5.0 → codejury-0.5.1}/codejury/data/capabilities/data_protection.yaml +0 -0
- {codejury-0.5.0 → codejury-0.5.1}/codejury/data/capabilities/dependency_config.yaml +0 -0
- {codejury-0.5.0 → codejury-0.5.1}/codejury/data/capabilities/error_logging.yaml +0 -0
- {codejury-0.5.0 → codejury-0.5.1}/codejury/data/capabilities/input_validation.yaml +0 -0
- {codejury-0.5.0 → codejury-0.5.1}/codejury/data/capabilities/output_encoding.yaml +0 -0
- {codejury-0.5.0 → codejury-0.5.1}/codejury/data/capabilities/secrets.yaml +0 -0
- {codejury-0.5.0 → codejury-0.5.1}/codejury/data/capabilities/session.yaml +0 -0
- {codejury-0.5.0 → codejury-0.5.1}/codejury/data/golden/authn_bcrypt_password.yaml +0 -0
- {codejury-0.5.0 → codejury-0.5.1}/codejury/data/golden/authn_jwt_noverify_vuln.yaml +0 -0
- {codejury-0.5.0 → codejury-0.5.1}/codejury/data/golden/authn_jwt_verified_safe.yaml +0 -0
- {codejury-0.5.0 → codejury-0.5.1}/codejury/data/golden/authn_sha256_checksum_safe.yaml +0 -0
- {codejury-0.5.0 → codejury-0.5.1}/codejury/data/golden/authn_sha256_password.yaml +0 -0
- {codejury-0.5.0 → codejury-0.5.1}/codejury/data/golden/authz_idor_vuln.yaml +0 -0
- {codejury-0.5.0 → codejury-0.5.1}/codejury/data/golden/authz_owner_safe.yaml +0 -0
- {codejury-0.5.0 → codejury-0.5.1}/codejury/data/golden/cmdi_ossystem_vuln.yaml +0 -0
- {codejury-0.5.0 → codejury-0.5.1}/codejury/data/golden/cmdi_subprocess_safe.yaml +0 -0
- {codejury-0.5.0 → codejury-0.5.1}/codejury/data/golden/crypto_aesgcm_safe.yaml +0 -0
- {codejury-0.5.0 → codejury-0.5.1}/codejury/data/golden/crypto_ecb_vuln.yaml +0 -0
- {codejury-0.5.0 → codejury-0.5.1}/codejury/data/golden/path_contained_safe.yaml +0 -0
- {codejury-0.5.0 → codejury-0.5.1}/codejury/data/golden/path_traversal_vuln.yaml +0 -0
- {codejury-0.5.0 → codejury-0.5.1}/codejury/data/golden/secrets_env_safe.yaml +0 -0
- {codejury-0.5.0 → codejury-0.5.1}/codejury/data/golden/secrets_hardcoded_vuln.yaml +0 -0
- {codejury-0.5.0 → codejury-0.5.1}/codejury/data/golden/sqli_format_vuln.yaml +0 -0
- {codejury-0.5.0 → codejury-0.5.1}/codejury/data/golden/sqli_fstring_query.yaml +0 -0
- {codejury-0.5.0 → codejury-0.5.1}/codejury/data/golden/sqli_parameterized_query.yaml +0 -0
- {codejury-0.5.0 → codejury-0.5.1}/codejury/data/golden/xss_innerhtml_constant_safe.yaml +0 -0
- {codejury-0.5.0 → codejury-0.5.1}/codejury/data/golden/xss_innerhtml_vuln.yaml +0 -0
- {codejury-0.5.0 → codejury-0.5.1}/codejury/data/suppressions.yaml +0 -0
- {codejury-0.5.0 → codejury-0.5.1}/codejury/data/tasks/audit_diff_debate.yaml +0 -0
- {codejury-0.5.0 → codejury-0.5.1}/codejury/data/tasks/quick_scan_single.yaml +0 -0
- {codejury-0.5.0 → codejury-0.5.1}/codejury/domain/__init__.py +0 -0
- {codejury-0.5.0 → codejury-0.5.1}/codejury/domain/artifact.py +0 -0
- {codejury-0.5.0 → codejury-0.5.1}/codejury/domain/capability.py +0 -0
- {codejury-0.5.0 → codejury-0.5.1}/codejury/domain/context.py +0 -0
- {codejury-0.5.0 → codejury-0.5.1}/codejury/domain/observation.py +0 -0
- {codejury-0.5.0 → codejury-0.5.1}/codejury/domain/result.py +0 -0
- {codejury-0.5.0 → codejury-0.5.1}/codejury/evaluation.py +0 -0
- {codejury-0.5.0 → codejury-0.5.1}/codejury/infrastructure/__init__.py +0 -0
- {codejury-0.5.0 → codejury-0.5.1}/codejury/infrastructure/json_parse.py +0 -0
- {codejury-0.5.0 → codejury-0.5.1}/codejury/integrations/__init__.py +0 -0
- {codejury-0.5.0 → codejury-0.5.1}/codejury/integrations/github.py +0 -0
- {codejury-0.5.0 → codejury-0.5.1}/codejury/orchestrators/__init__.py +0 -0
- {codejury-0.5.0 → codejury-0.5.1}/codejury/orchestrators/base.py +0 -0
- {codejury-0.5.0 → codejury-0.5.1}/codejury/orchestrators/challenge.py +0 -0
- {codejury-0.5.0 → codejury-0.5.1}/codejury/orchestrators/debate.py +0 -0
- {codejury-0.5.0 → codejury-0.5.1}/codejury/orchestrators/pipeline.py +0 -0
- {codejury-0.5.0 → codejury-0.5.1}/codejury/orchestrators/reflexion.py +0 -0
- {codejury-0.5.0 → codejury-0.5.1}/codejury/orchestrators/single.py +0 -0
- {codejury-0.5.0 → codejury-0.5.1}/codejury/providers/__init__.py +0 -0
- {codejury-0.5.0 → codejury-0.5.1}/codejury/providers/anthropic.py +0 -0
- {codejury-0.5.0 → codejury-0.5.1}/codejury/providers/base.py +0 -0
- {codejury-0.5.0 → codejury-0.5.1}/codejury/providers/litellm.py +0 -0
- {codejury-0.5.0 → codejury-0.5.1}/codejury/providers/mock.py +0 -0
- {codejury-0.5.0 → codejury-0.5.1}/codejury/providers/openai.py +0 -0
- {codejury-0.5.0 → codejury-0.5.1}/codejury/providers/openai_format.py +0 -0
- {codejury-0.5.0 → codejury-0.5.1}/codejury/providers/retry.py +0 -0
- {codejury-0.5.0 → codejury-0.5.1}/codejury/reporting.py +0 -0
- {codejury-0.5.0 → codejury-0.5.1}/codejury/resources.py +0 -0
- {codejury-0.5.0 → codejury-0.5.1}/codejury/sources/__init__.py +0 -0
- {codejury-0.5.0 → codejury-0.5.1}/codejury/sources/base.py +0 -0
- {codejury-0.5.0 → codejury-0.5.1}/codejury/sources/chunker.py +0 -0
- {codejury-0.5.0 → codejury-0.5.1}/codejury/sources/diff.py +0 -0
- {codejury-0.5.0 → codejury-0.5.1}/codejury/sources/function.py +0 -0
- {codejury-0.5.0 → codejury-0.5.1}/codejury/sources/mock.py +0 -0
- {codejury-0.5.0 → codejury-0.5.1}/codejury/suppression.py +0 -0
- {codejury-0.5.0 → codejury-0.5.1}/codejury/tasks/__init__.py +0 -0
- {codejury-0.5.0 → codejury-0.5.1}/codejury/tasks/base.py +0 -0
- {codejury-0.5.0 → codejury-0.5.1}/codejury/tasks/registry.py +0 -0
- {codejury-0.5.0 → codejury-0.5.1}/codejury.egg-info/SOURCES.txt +0 -0
- {codejury-0.5.0 → codejury-0.5.1}/codejury.egg-info/dependency_links.txt +0 -0
- {codejury-0.5.0 → codejury-0.5.1}/codejury.egg-info/entry_points.txt +0 -0
- {codejury-0.5.0 → codejury-0.5.1}/codejury.egg-info/requires.txt +0 -0
- {codejury-0.5.0 → codejury-0.5.1}/codejury.egg-info/top_level.txt +0 -0
- {codejury-0.5.0 → codejury-0.5.1}/setup.cfg +0 -0
- {codejury-0.5.0 → codejury-0.5.1}/tests/test_anthropic_provider.py +0 -0
- {codejury-0.5.0 → codejury-0.5.1}/tests/test_assembly.py +0 -0
- {codejury-0.5.0 → codejury-0.5.1}/tests/test_audit_pipeline.py +0 -0
- {codejury-0.5.0 → codejury-0.5.1}/tests/test_capability.py +0 -0
- {codejury-0.5.0 → codejury-0.5.1}/tests/test_challenge.py +0 -0
- {codejury-0.5.0 → codejury-0.5.1}/tests/test_cli_audit.py +0 -0
- {codejury-0.5.0 → codejury-0.5.1}/tests/test_context.py +0 -0
- {codejury-0.5.0 → codejury-0.5.1}/tests/test_debate_agents.py +0 -0
- {codejury-0.5.0 → codejury-0.5.1}/tests/test_debate_orchestrator.py +0 -0
- {codejury-0.5.0 → codejury-0.5.1}/tests/test_diff_source.py +0 -0
- {codejury-0.5.0 → codejury-0.5.1}/tests/test_evaluation.py +0 -0
- {codejury-0.5.0 → codejury-0.5.1}/tests/test_function_source.py +0 -0
- {codejury-0.5.0 → codejury-0.5.1}/tests/test_integrations.py +0 -0
- {codejury-0.5.0 → codejury-0.5.1}/tests/test_json_parse.py +0 -0
- {codejury-0.5.0 → codejury-0.5.1}/tests/test_litellm_provider.py +0 -0
- {codejury-0.5.0 → codejury-0.5.1}/tests/test_openai_provider.py +0 -0
- {codejury-0.5.0 → codejury-0.5.1}/tests/test_orchestrator.py +0 -0
- {codejury-0.5.0 → codejury-0.5.1}/tests/test_pipeline_orchestrator.py +0 -0
- {codejury-0.5.0 → codejury-0.5.1}/tests/test_reflexion_orchestrator.py +0 -0
- {codejury-0.5.0 → codejury-0.5.1}/tests/test_repo_source.py +0 -0
- {codejury-0.5.0 → codejury-0.5.1}/tests/test_reporting.py +0 -0
- {codejury-0.5.0 → codejury-0.5.1}/tests/test_retry_provider.py +0 -0
- {codejury-0.5.0 → codejury-0.5.1}/tests/test_suppression.py +0 -0
- {codejury-0.5.0 → codejury-0.5.1}/tests/test_tasks.py +0 -0
- {codejury-0.5.0 → codejury-0.5.1}/tests/test_verifier.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: codejury
|
|
3
|
-
Version: 0.5.
|
|
3
|
+
Version: 0.5.1
|
|
4
4
|
Summary: General-purpose Application Security AI audit framework -- five-layer architecture, capabilities as first-class data
|
|
5
5
|
Author: AISecLabs
|
|
6
6
|
License-Expression: MIT
|
|
@@ -167,11 +167,14 @@ independently.
|
|
|
167
167
|
- **Local-pattern checks are sharper than data-flow ones.** Capabilities judged
|
|
168
168
|
from one spot (weak crypto, hardcoded secrets) are reliable; taint / data-flow
|
|
169
169
|
ones like path traversal over-flag in single-file review because the verifier
|
|
170
|
-
can't see whether a value is attacker-controlled. Mitigations that
|
|
171
|
-
not fully solve it: `scan --callers` (
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
170
|
+
can't see whether a value is attacker-controlled. Mitigations that add context
|
|
171
|
+
but do not fully solve it: `scan --callers` (where this file's functions are
|
|
172
|
+
called) and `scan --callees` (the called code it delegates to, so a sink in
|
|
173
|
+
another file is visible) -- pair them for both directions; `--orchestrator
|
|
174
|
+
challenge` (a recall-safe
|
|
175
|
+
refutation pass that drops only provably-safe flags); `--only` to scope; or
|
|
176
|
+
`--orchestrator debate`. Real taint precision still needs data-flow analysis,
|
|
177
|
+
not model skepticism.
|
|
175
178
|
- **`scan` cost scales as files x capabilities.** It is a periodic deep audit,
|
|
176
179
|
not a quick check -- scope it with `--only`. Day to day, audit the diff.
|
|
177
180
|
|
|
@@ -138,11 +138,14 @@ independently.
|
|
|
138
138
|
- **Local-pattern checks are sharper than data-flow ones.** Capabilities judged
|
|
139
139
|
from one spot (weak crypto, hardcoded secrets) are reliable; taint / data-flow
|
|
140
140
|
ones like path traversal over-flag in single-file review because the verifier
|
|
141
|
-
can't see whether a value is attacker-controlled. Mitigations that
|
|
142
|
-
not fully solve it: `scan --callers` (
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
141
|
+
can't see whether a value is attacker-controlled. Mitigations that add context
|
|
142
|
+
but do not fully solve it: `scan --callers` (where this file's functions are
|
|
143
|
+
called) and `scan --callees` (the called code it delegates to, so a sink in
|
|
144
|
+
another file is visible) -- pair them for both directions; `--orchestrator
|
|
145
|
+
challenge` (a recall-safe
|
|
146
|
+
refutation pass that drops only provably-safe flags); `--only` to scope; or
|
|
147
|
+
`--orchestrator debate`. Real taint precision still needs data-flow analysis,
|
|
148
|
+
not model skepticism.
|
|
146
149
|
- **`scan` cost scales as files x capabilities.** It is a periodic deep audit,
|
|
147
150
|
not a quick check -- scope it with `--only`. Day to day, audit the diff.
|
|
148
151
|
|
|
@@ -86,10 +86,15 @@ def scan(
|
|
|
86
86
|
extensions: tuple[str, ...] = (".py",),
|
|
87
87
|
max_chars: int = 200_000,
|
|
88
88
|
with_callers: bool = False,
|
|
89
|
+
with_callees: bool = False,
|
|
89
90
|
) -> list[tuple[str, AnalysisResult]]:
|
|
90
91
|
"""Audit every matching file in a directory tree, returning (path, result) per artifact."""
|
|
91
92
|
source = RepoSource(
|
|
92
|
-
directory,
|
|
93
|
+
directory,
|
|
94
|
+
extensions=extensions,
|
|
95
|
+
chunker=Chunker(max_chars=max_chars),
|
|
96
|
+
with_callers=with_callers,
|
|
97
|
+
with_callees=with_callees,
|
|
93
98
|
)
|
|
94
99
|
artifacts = source.list_artifacts()
|
|
95
100
|
calls = len(artifacts) * len(capabilities)
|
|
@@ -231,7 +236,10 @@ def main(argv: list[str] | None = None) -> int:
|
|
|
231
236
|
scan_p.add_argument("--max-tokens", type=int, default=2048)
|
|
232
237
|
scan_p.add_argument("--max-chars", type=int, default=200_000, help="chunk budget; default keeps whole files")
|
|
233
238
|
scan_p.add_argument(
|
|
234
|
-
"--callers", action="store_true", help="add cross-file
|
|
239
|
+
"--callers", action="store_true", help="add cross-file context: where this file's functions are called"
|
|
240
|
+
)
|
|
241
|
+
scan_p.add_argument(
|
|
242
|
+
"--callees", action="store_true", help="add cross-file context: the called code this file delegates to"
|
|
235
243
|
)
|
|
236
244
|
scan_p.add_argument("--api-base", default=DEFAULT_API_BASE, help="provider base URL (env: CODEJURY_API_BASE)")
|
|
237
245
|
scan_p.add_argument("--api-key", default=DEFAULT_API_KEY, help="provider API key (env: CODEJURY_API_KEY)")
|
|
@@ -289,6 +297,7 @@ def main(argv: list[str] | None = None) -> int:
|
|
|
289
297
|
extensions=extensions,
|
|
290
298
|
max_chars=args.max_chars,
|
|
291
299
|
with_callers=args.callers,
|
|
300
|
+
with_callees=args.callees,
|
|
292
301
|
)
|
|
293
302
|
results = _maybe_suppress(results, not args.no_suppress)
|
|
294
303
|
print(_render_results(args.fmt, results))
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
"""Lightweight cross-file caller context.
|
|
2
|
+
|
|
3
|
+
For a file under review, find where the functions and classes it defines are
|
|
4
|
+
called elsewhere in the repository. Showing those call sites lets the verifier
|
|
5
|
+
trace where an argument comes from -- which is exactly what single-file review
|
|
6
|
+
lacks for taint-style issues (a path/command that is operator-supplied vs
|
|
7
|
+
attacker-controlled). This is a textual usage finder, not a full call graph.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
import ast
|
|
13
|
+
import re
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def defined_names(content: str) -> set[str]:
|
|
17
|
+
"""Top-level function and class names defined in `content`."""
|
|
18
|
+
try:
|
|
19
|
+
tree = ast.parse(content)
|
|
20
|
+
except SyntaxError:
|
|
21
|
+
return set()
|
|
22
|
+
return {
|
|
23
|
+
node.name
|
|
24
|
+
for node in tree.body
|
|
25
|
+
if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef))
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def _symbol_sources(files: dict[str, str]) -> dict[str, list[tuple[str, str]]]:
|
|
30
|
+
"""Map each defined function/class/method name to its (path, source) definitions."""
|
|
31
|
+
out: dict[str, list[tuple[str, str]]] = {}
|
|
32
|
+
for path, content in files.items():
|
|
33
|
+
try:
|
|
34
|
+
tree = ast.parse(content)
|
|
35
|
+
except SyntaxError:
|
|
36
|
+
continue
|
|
37
|
+
for node in ast.walk(tree):
|
|
38
|
+
if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef)):
|
|
39
|
+
src = ast.get_source_segment(content, node)
|
|
40
|
+
if src:
|
|
41
|
+
out.setdefault(node.name, []).append((path, src))
|
|
42
|
+
return out
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def _called_names(content: str) -> set[str]:
|
|
46
|
+
try:
|
|
47
|
+
tree = ast.parse(content)
|
|
48
|
+
except SyntaxError:
|
|
49
|
+
return set()
|
|
50
|
+
names: set[str] = set()
|
|
51
|
+
for node in ast.walk(tree):
|
|
52
|
+
if isinstance(node, ast.Call):
|
|
53
|
+
func = node.func
|
|
54
|
+
if isinstance(func, ast.Name):
|
|
55
|
+
names.add(func.id)
|
|
56
|
+
elif isinstance(func, ast.Attribute):
|
|
57
|
+
names.add(func.attr)
|
|
58
|
+
return names
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def callee_context(target_path: str, files: dict[str, str], *, max_chars: int = 12_000) -> str:
|
|
62
|
+
"""Source of functions that `target_path` calls but that are defined in other files.
|
|
63
|
+
|
|
64
|
+
This is the forward direction -- it puts the called code (where a vulnerability
|
|
65
|
+
often lives, e.g. a manager a view delegates to) in front of the verifier, which
|
|
66
|
+
single-file review cannot see.
|
|
67
|
+
"""
|
|
68
|
+
symbols = _symbol_sources(files)
|
|
69
|
+
own = defined_names(files.get(target_path, ""))
|
|
70
|
+
blocks: list[str] = []
|
|
71
|
+
total = 0
|
|
72
|
+
for name in sorted(_called_names(files.get(target_path, ""))):
|
|
73
|
+
if name in own:
|
|
74
|
+
continue
|
|
75
|
+
for path, src in symbols.get(name, []):
|
|
76
|
+
if path == target_path:
|
|
77
|
+
continue
|
|
78
|
+
block = f"# {path} -> {name}\n{src}"
|
|
79
|
+
if total + len(block) > max_chars:
|
|
80
|
+
return "\n\n".join(blocks)
|
|
81
|
+
blocks.append(block)
|
|
82
|
+
total += len(block)
|
|
83
|
+
break # one definition per called name is enough context
|
|
84
|
+
return "\n\n".join(blocks)
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def caller_context(target_path: str, files: dict[str, str], *, max_lines: int = 30) -> str:
|
|
88
|
+
"""Lines elsewhere in `files` that call the names defined in `target_path`."""
|
|
89
|
+
names = defined_names(files.get(target_path, ""))
|
|
90
|
+
if not names:
|
|
91
|
+
return ""
|
|
92
|
+
# word-boundary call: `name(` not preceded/followed by other identifier chars
|
|
93
|
+
call = re.compile(r"\b(?:" + "|".join(re.escape(n) for n in names) + r")\s*\(")
|
|
94
|
+
|
|
95
|
+
hits: list[str] = []
|
|
96
|
+
for path in sorted(files):
|
|
97
|
+
if path == target_path:
|
|
98
|
+
continue
|
|
99
|
+
for lineno, line in enumerate(files[path].splitlines(), 1):
|
|
100
|
+
if call.search(line):
|
|
101
|
+
hits.append(f"{path}:{lineno}: {line.strip()}")
|
|
102
|
+
if len(hits) >= max_lines:
|
|
103
|
+
return "\n".join(hits)
|
|
104
|
+
return "\n".join(hits)
|
|
@@ -11,7 +11,7 @@ from pathlib import Path
|
|
|
11
11
|
|
|
12
12
|
from codejury.domain.artifact import CodeArtifact
|
|
13
13
|
from codejury.sources.base import Source
|
|
14
|
-
from codejury.sources.callers import caller_context
|
|
14
|
+
from codejury.sources.callers import caller_context, callee_context
|
|
15
15
|
from codejury.sources.chunker import Chunker
|
|
16
16
|
|
|
17
17
|
_SKIP_DIRS = frozenset({".git", ".venv", "venv", "node_modules", "__pycache__", ".mypy_cache", ".pytest_cache"})
|
|
@@ -26,24 +26,38 @@ class RepoSource(Source):
|
|
|
26
26
|
chunker: Chunker | None = None,
|
|
27
27
|
skip_dirs: frozenset[str] = _SKIP_DIRS,
|
|
28
28
|
with_callers: bool = False,
|
|
29
|
+
with_callees: bool = False,
|
|
29
30
|
) -> None:
|
|
30
31
|
self._root = Path(root)
|
|
31
32
|
self._extensions = extensions
|
|
32
33
|
self._chunker = chunker or Chunker()
|
|
33
34
|
self._skip_dirs = skip_dirs
|
|
34
35
|
self._with_callers = with_callers
|
|
36
|
+
self._with_callees = with_callees
|
|
35
37
|
|
|
36
38
|
def list_artifacts(self) -> list[CodeArtifact]:
|
|
37
39
|
files = self._read_files()
|
|
38
40
|
artifacts: list[CodeArtifact] = []
|
|
39
41
|
for rel, content in sorted(files.items()):
|
|
40
|
-
context =
|
|
42
|
+
context = self._context(rel, files)
|
|
41
43
|
for chunk_path, chunk_content in self._chunker.split(rel, content):
|
|
42
44
|
artifacts.append(
|
|
43
45
|
CodeArtifact(kind="repo", path=chunk_path, content=chunk_content, context=context)
|
|
44
46
|
)
|
|
45
47
|
return artifacts
|
|
46
48
|
|
|
49
|
+
def _context(self, rel: str, files: dict[str, str]) -> str:
|
|
50
|
+
parts = []
|
|
51
|
+
if self._with_callers:
|
|
52
|
+
callers = caller_context(rel, files)
|
|
53
|
+
if callers:
|
|
54
|
+
parts.append("Callers (where this file's functions are used):\n" + callers)
|
|
55
|
+
if self._with_callees:
|
|
56
|
+
callees = callee_context(rel, files)
|
|
57
|
+
if callees:
|
|
58
|
+
parts.append("Callees (functions this file calls, defined elsewhere):\n" + callees)
|
|
59
|
+
return "\n\n".join(parts)
|
|
60
|
+
|
|
47
61
|
def _read_files(self) -> dict[str, str]:
|
|
48
62
|
files: dict[str, str] = {}
|
|
49
63
|
for path in self._root.rglob("*"):
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: codejury
|
|
3
|
-
Version: 0.5.
|
|
3
|
+
Version: 0.5.1
|
|
4
4
|
Summary: General-purpose Application Security AI audit framework -- five-layer architecture, capabilities as first-class data
|
|
5
5
|
Author: AISecLabs
|
|
6
6
|
License-Expression: MIT
|
|
@@ -167,11 +167,14 @@ independently.
|
|
|
167
167
|
- **Local-pattern checks are sharper than data-flow ones.** Capabilities judged
|
|
168
168
|
from one spot (weak crypto, hardcoded secrets) are reliable; taint / data-flow
|
|
169
169
|
ones like path traversal over-flag in single-file review because the verifier
|
|
170
|
-
can't see whether a value is attacker-controlled. Mitigations that
|
|
171
|
-
not fully solve it: `scan --callers` (
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
170
|
+
can't see whether a value is attacker-controlled. Mitigations that add context
|
|
171
|
+
but do not fully solve it: `scan --callers` (where this file's functions are
|
|
172
|
+
called) and `scan --callees` (the called code it delegates to, so a sink in
|
|
173
|
+
another file is visible) -- pair them for both directions; `--orchestrator
|
|
174
|
+
challenge` (a recall-safe
|
|
175
|
+
refutation pass that drops only provably-safe flags); `--only` to scope; or
|
|
176
|
+
`--orchestrator debate`. Real taint precision still needs data-flow analysis,
|
|
177
|
+
not model skepticism.
|
|
175
178
|
- **`scan` cost scales as files x capabilities.** It is a periodic deep audit,
|
|
176
179
|
not a quick check -- scope it with `--only`. Day to day, audit the diff.
|
|
177
180
|
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from codejury.sources.callers import caller_context, defined_names
|
|
1
|
+
from codejury.sources.callers import callee_context, caller_context, defined_names
|
|
2
2
|
from codejury.sources.chunker import Chunker
|
|
3
3
|
from codejury.sources.repo import RepoSource
|
|
4
4
|
|
|
@@ -32,6 +32,32 @@ def test_caller_context_word_boundary_avoids_prefix_matches():
|
|
|
32
32
|
assert caller_context("lib.py", files) == ""
|
|
33
33
|
|
|
34
34
|
|
|
35
|
+
def test_callee_context_pulls_in_called_code_from_other_files():
|
|
36
|
+
files = {
|
|
37
|
+
"views.py": "from m import bind\ndef handler(req):\n return bind(req.user_handle, req.node)\n",
|
|
38
|
+
"m.py": "def bind(user_handle, node):\n Dao.update(user_handle, node) # no ownership check\n",
|
|
39
|
+
"unrelated.py": "def other(): pass\n",
|
|
40
|
+
}
|
|
41
|
+
ctx = callee_context("views.py", files)
|
|
42
|
+
assert "m.py -> bind" in ctx
|
|
43
|
+
assert "Dao.update(user_handle, node)" in ctx # the called code is surfaced
|
|
44
|
+
assert "other" not in ctx # uncalled code is not
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def test_callee_context_excludes_same_file_definitions():
|
|
48
|
+
files = {"a.py": "def helper(): pass\ndef main():\n helper()\n"}
|
|
49
|
+
assert callee_context("a.py", files) == "" # helper is local, not cross-file
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def test_repo_source_full_review_attaches_callers_and_callees(tmp_path):
|
|
53
|
+
(tmp_path / "views.py").write_text("from m import bind\ndef handler(r):\n return bind(r.h)\n", encoding="utf-8")
|
|
54
|
+
(tmp_path / "m.py").write_text("def bind(h):\n return open(h)\n", encoding="utf-8")
|
|
55
|
+
arts = {a.path: a for a in RepoSource(tmp_path, with_callers=True, with_callees=True).list_artifacts()}
|
|
56
|
+
# the view's artifact should now contain the called bind() source from m.py
|
|
57
|
+
assert "Callees" in arts["views.py"].context
|
|
58
|
+
assert "def bind(h)" in arts["views.py"].context
|
|
59
|
+
|
|
60
|
+
|
|
35
61
|
def test_repo_source_attaches_caller_context_when_enabled(tmp_path):
|
|
36
62
|
(tmp_path / "lib.py").write_text("def helper(p):\n return open(p)\n", encoding="utf-8")
|
|
37
63
|
(tmp_path / "cli.py").write_text("from lib import helper\nhelper(args.path)\n", encoding="utf-8")
|
|
@@ -1,46 +0,0 @@
|
|
|
1
|
-
"""Lightweight cross-file caller context.
|
|
2
|
-
|
|
3
|
-
For a file under review, find where the functions and classes it defines are
|
|
4
|
-
called elsewhere in the repository. Showing those call sites lets the verifier
|
|
5
|
-
trace where an argument comes from -- which is exactly what single-file review
|
|
6
|
-
lacks for taint-style issues (a path/command that is operator-supplied vs
|
|
7
|
-
attacker-controlled). This is a textual usage finder, not a full call graph.
|
|
8
|
-
"""
|
|
9
|
-
|
|
10
|
-
from __future__ import annotations
|
|
11
|
-
|
|
12
|
-
import ast
|
|
13
|
-
import re
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
def defined_names(content: str) -> set[str]:
|
|
17
|
-
"""Top-level function and class names defined in `content`."""
|
|
18
|
-
try:
|
|
19
|
-
tree = ast.parse(content)
|
|
20
|
-
except SyntaxError:
|
|
21
|
-
return set()
|
|
22
|
-
return {
|
|
23
|
-
node.name
|
|
24
|
-
for node in tree.body
|
|
25
|
-
if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef))
|
|
26
|
-
}
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
def caller_context(target_path: str, files: dict[str, str], *, max_lines: int = 30) -> str:
|
|
30
|
-
"""Lines elsewhere in `files` that call the names defined in `target_path`."""
|
|
31
|
-
names = defined_names(files.get(target_path, ""))
|
|
32
|
-
if not names:
|
|
33
|
-
return ""
|
|
34
|
-
# word-boundary call: `name(` not preceded/followed by other identifier chars
|
|
35
|
-
call = re.compile(r"\b(?:" + "|".join(re.escape(n) for n in names) + r")\s*\(")
|
|
36
|
-
|
|
37
|
-
hits: list[str] = []
|
|
38
|
-
for path in sorted(files):
|
|
39
|
-
if path == target_path:
|
|
40
|
-
continue
|
|
41
|
-
for lineno, line in enumerate(files[path].splitlines(), 1):
|
|
42
|
-
if call.search(line):
|
|
43
|
-
hits.append(f"{path}:{lineno}: {line.strip()}")
|
|
44
|
-
if len(hits) >= max_lines:
|
|
45
|
-
return "\n".join(hits)
|
|
46
|
-
return "\n".join(hits)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|