code-review-graph-codeblackwell 2.3.6.post1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- code_review_graph/__init__.py +20 -0
- code_review_graph/__main__.py +4 -0
- code_review_graph/analysis.py +410 -0
- code_review_graph/changes.py +409 -0
- code_review_graph/cli.py +1255 -0
- code_review_graph/communities.py +874 -0
- code_review_graph/constants.py +23 -0
- code_review_graph/context_savings.py +317 -0
- code_review_graph/custom_languages.py +322 -0
- code_review_graph/daemon.py +1009 -0
- code_review_graph/daemon_cli.py +320 -0
- code_review_graph/docs/LLM-OPTIMIZED-REFERENCE.md +71 -0
- code_review_graph/embeddings.py +1006 -0
- code_review_graph/enrich.py +303 -0
- code_review_graph/eval/__init__.py +33 -0
- code_review_graph/eval/benchmarks/__init__.py +1 -0
- code_review_graph/eval/benchmarks/agent_baseline.py +193 -0
- code_review_graph/eval/benchmarks/build_performance.py +60 -0
- code_review_graph/eval/benchmarks/flow_completeness.py +36 -0
- code_review_graph/eval/benchmarks/impact_accuracy.py +220 -0
- code_review_graph/eval/benchmarks/multi_hop_retrieval.py +125 -0
- code_review_graph/eval/benchmarks/search_quality.py +59 -0
- code_review_graph/eval/benchmarks/token_efficiency.py +143 -0
- code_review_graph/eval/configs/code-review-graph.yaml +50 -0
- code_review_graph/eval/configs/express.yaml +45 -0
- code_review_graph/eval/configs/fastapi.yaml +48 -0
- code_review_graph/eval/configs/flask.yaml +50 -0
- code_review_graph/eval/configs/gin.yaml +51 -0
- code_review_graph/eval/configs/httpx.yaml +48 -0
- code_review_graph/eval/reporter.py +301 -0
- code_review_graph/eval/runner.py +211 -0
- code_review_graph/eval/scorer.py +85 -0
- code_review_graph/eval/token_benchmark.py +182 -0
- code_review_graph/exports.py +409 -0
- code_review_graph/flows.py +698 -0
- code_review_graph/graph.py +1427 -0
- code_review_graph/graph_diff.py +122 -0
- code_review_graph/hints.py +384 -0
- code_review_graph/incremental.py +1245 -0
- code_review_graph/jedi_resolver.py +303 -0
- code_review_graph/main.py +1079 -0
- code_review_graph/memory.py +142 -0
- code_review_graph/migrations.py +284 -0
- code_review_graph/parser.py +6957 -0
- code_review_graph/postprocessing.py +134 -0
- code_review_graph/prompts.py +159 -0
- code_review_graph/refactor.py +852 -0
- code_review_graph/registry.py +319 -0
- code_review_graph/rescript_resolver.py +206 -0
- code_review_graph/search.py +447 -0
- code_review_graph/skills.py +1481 -0
- code_review_graph/spring_resolver.py +200 -0
- code_review_graph/temporal_resolver.py +199 -0
- code_review_graph/token_benchmark.py +125 -0
- code_review_graph/tools/__init__.py +156 -0
- code_review_graph/tools/_common.py +176 -0
- code_review_graph/tools/analysis_tools.py +184 -0
- code_review_graph/tools/build.py +541 -0
- code_review_graph/tools/community_tools.py +246 -0
- code_review_graph/tools/context.py +152 -0
- code_review_graph/tools/docs.py +274 -0
- code_review_graph/tools/flows_tools.py +176 -0
- code_review_graph/tools/query.py +692 -0
- code_review_graph/tools/refactor_tools.py +168 -0
- code_review_graph/tools/registry_tools.py +125 -0
- code_review_graph/tools/review.py +477 -0
- code_review_graph/tsconfig_resolver.py +257 -0
- code_review_graph/visualization.py +2184 -0
- code_review_graph/wiki.py +305 -0
- code_review_graph_codeblackwell-2.3.6.post1.dist-info/METADATA +718 -0
- code_review_graph_codeblackwell-2.3.6.post1.dist-info/RECORD +74 -0
- code_review_graph_codeblackwell-2.3.6.post1.dist-info/WHEEL +4 -0
- code_review_graph_codeblackwell-2.3.6.post1.dist-info/entry_points.txt +3 -0
- code_review_graph_codeblackwell-2.3.6.post1.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,303 @@
|
|
|
1
|
+
"""Post-build Jedi enrichment for Python call resolution.
|
|
2
|
+
|
|
3
|
+
After tree-sitter parsing, many method calls on lowercase-receiver variables
|
|
4
|
+
are dropped (e.g. ``svc.authenticate()`` where ``svc = factory()``). Jedi
|
|
5
|
+
can resolve these by tracing return types across files.
|
|
6
|
+
|
|
7
|
+
This module runs as a post-build step: it re-walks Python ASTs to find
|
|
8
|
+
dropped calls, uses ``jedi.Script.goto()`` to resolve them, and adds the
|
|
9
|
+
resulting CALLS edges to the graph database.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
import logging
|
|
15
|
+
import os
|
|
16
|
+
from pathlib import Path
|
|
17
|
+
from typing import Optional
|
|
18
|
+
|
|
19
|
+
from .parser import CodeParser, EdgeInfo
|
|
20
|
+
from .parser import _is_test_file as _parser_is_test_file
|
|
21
|
+
|
|
22
|
+
logger = logging.getLogger(__name__)
|
|
23
|
+
|
|
24
|
+
_SELF_NAMES = frozenset({"self", "cls", "super"})
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def enrich_jedi_calls(store, repo_root: Path) -> dict:
|
|
28
|
+
"""Resolve untracked Python method calls via Jedi.
|
|
29
|
+
|
|
30
|
+
Walks Python files, finds ``receiver.method()`` calls that tree-sitter
|
|
31
|
+
dropped (lowercase receiver, not self/cls), resolves them with Jedi,
|
|
32
|
+
and inserts new CALLS edges.
|
|
33
|
+
|
|
34
|
+
Returns stats dict with ``resolved`` count.
|
|
35
|
+
"""
|
|
36
|
+
try:
|
|
37
|
+
import jedi
|
|
38
|
+
except ImportError:
|
|
39
|
+
logger.info("Jedi not installed, skipping Python enrichment")
|
|
40
|
+
return {"skipped": True, "reason": "jedi not installed"}
|
|
41
|
+
|
|
42
|
+
repo_root = Path(repo_root).resolve()
|
|
43
|
+
|
|
44
|
+
# Get Python files from the graph — skip early if none
|
|
45
|
+
all_files = store.get_all_files()
|
|
46
|
+
py_files = [f for f in all_files if f.endswith(".py")]
|
|
47
|
+
|
|
48
|
+
if not py_files:
|
|
49
|
+
return {"resolved": 0, "files": 0}
|
|
50
|
+
|
|
51
|
+
# Scope the Jedi project to Python-only directories to avoid scanning
|
|
52
|
+
# non-Python files (e.g. node_modules, TS sources). This matters for
|
|
53
|
+
# polyglot monorepos where jedi.Project(path=repo_root) would scan
|
|
54
|
+
# thousands of irrelevant files during initialization.
|
|
55
|
+
py_dirs = sorted({str(Path(f).parent) for f in py_files})
|
|
56
|
+
common_py_root = Path(os.path.commonpath(py_dirs)) if py_dirs else repo_root
|
|
57
|
+
if not str(common_py_root).startswith(str(repo_root)):
|
|
58
|
+
common_py_root = repo_root
|
|
59
|
+
project = jedi.Project(
|
|
60
|
+
path=str(common_py_root),
|
|
61
|
+
added_sys_path=[str(repo_root)],
|
|
62
|
+
smart_sys_path=False,
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
# Pre-parse all Python files to find which ones have pending method calls.
|
|
66
|
+
# This avoids expensive Jedi Script creation for files with nothing to resolve.
|
|
67
|
+
parser = CodeParser()
|
|
68
|
+
ts_parser = parser._get_parser("python")
|
|
69
|
+
if not ts_parser:
|
|
70
|
+
return {"resolved": 0, "files": 0}
|
|
71
|
+
|
|
72
|
+
# Build set of method names that actually exist in project code.
|
|
73
|
+
# No point asking Jedi to resolve `logger.getLogger()` if no project
|
|
74
|
+
# file defines a function called `getLogger`.
|
|
75
|
+
project_func_names = {
|
|
76
|
+
r["name"]
|
|
77
|
+
for r in store._conn.execute(
|
|
78
|
+
"SELECT DISTINCT name FROM nodes WHERE kind IN ('Function', 'Test')"
|
|
79
|
+
).fetchall()
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
files_with_pending: list[tuple[str, bytes, list]] = []
|
|
83
|
+
total_skipped = 0
|
|
84
|
+
for file_path in py_files:
|
|
85
|
+
try:
|
|
86
|
+
source = Path(file_path).read_bytes()
|
|
87
|
+
except (OSError, PermissionError):
|
|
88
|
+
continue
|
|
89
|
+
tree = ts_parser.parse(source)
|
|
90
|
+
is_test = _parser_is_test_file(file_path)
|
|
91
|
+
pending = _find_untracked_method_calls(tree.root_node, is_test)
|
|
92
|
+
if pending:
|
|
93
|
+
# Only keep calls whose method name exists in project code
|
|
94
|
+
filtered = [p for p in pending if p[2] in project_func_names]
|
|
95
|
+
total_skipped += len(pending) - len(filtered)
|
|
96
|
+
if filtered:
|
|
97
|
+
files_with_pending.append((file_path, source, filtered))
|
|
98
|
+
|
|
99
|
+
if not files_with_pending:
|
|
100
|
+
return {"resolved": 0, "files": 0}
|
|
101
|
+
|
|
102
|
+
logger.debug(
|
|
103
|
+
"Jedi: %d/%d Python files have pending calls (%d calls skipped — no project target)",
|
|
104
|
+
len(files_with_pending), len(py_files), total_skipped,
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
resolved_count = 0
|
|
108
|
+
files_enriched = 0
|
|
109
|
+
errors = 0
|
|
110
|
+
|
|
111
|
+
for file_path, source, pending in files_with_pending:
|
|
112
|
+
source_text = source.decode("utf-8", errors="replace")
|
|
113
|
+
|
|
114
|
+
# Get existing CALLS edges for this file to skip duplicates
|
|
115
|
+
existing = set()
|
|
116
|
+
for edge in _get_file_call_edges(store, file_path):
|
|
117
|
+
existing.add((edge.source_qualified, edge.line))
|
|
118
|
+
|
|
119
|
+
# Get function nodes from DB for enclosing-function lookup
|
|
120
|
+
func_nodes = [
|
|
121
|
+
n for n in store.get_nodes_by_file(file_path)
|
|
122
|
+
if n.kind in ("Function", "Test")
|
|
123
|
+
]
|
|
124
|
+
|
|
125
|
+
# Create Jedi script once per file
|
|
126
|
+
try:
|
|
127
|
+
script = jedi.Script(source_text, path=file_path, project=project)
|
|
128
|
+
except Exception as e:
|
|
129
|
+
logger.debug("Jedi failed to load %s: %s", file_path, e)
|
|
130
|
+
errors += 1
|
|
131
|
+
continue
|
|
132
|
+
|
|
133
|
+
file_resolved = 0
|
|
134
|
+
for jedi_line, col, _method_name, _enclosing_name in pending:
|
|
135
|
+
# Find enclosing function qualified name
|
|
136
|
+
enclosing = _find_enclosing(func_nodes, jedi_line)
|
|
137
|
+
if not enclosing:
|
|
138
|
+
enclosing = file_path # module-level
|
|
139
|
+
|
|
140
|
+
# Skip if we already have a CALLS edge from this source at this line
|
|
141
|
+
if (enclosing, jedi_line) in existing:
|
|
142
|
+
continue
|
|
143
|
+
|
|
144
|
+
# Ask Jedi to resolve
|
|
145
|
+
try:
|
|
146
|
+
names = script.goto(jedi_line, col)
|
|
147
|
+
except Exception: # nosec B112 - Jedi may fail on malformed code
|
|
148
|
+
continue
|
|
149
|
+
|
|
150
|
+
if not names:
|
|
151
|
+
continue
|
|
152
|
+
|
|
153
|
+
name = names[0]
|
|
154
|
+
if not name.module_path:
|
|
155
|
+
continue
|
|
156
|
+
|
|
157
|
+
module_path = Path(name.module_path).resolve()
|
|
158
|
+
|
|
159
|
+
# Only emit edges for project-internal definitions
|
|
160
|
+
try:
|
|
161
|
+
module_path.relative_to(repo_root)
|
|
162
|
+
except ValueError:
|
|
163
|
+
continue
|
|
164
|
+
|
|
165
|
+
# Build qualified target: file_path::Class.method or file_path::func
|
|
166
|
+
target_file = str(module_path)
|
|
167
|
+
parent = name.parent()
|
|
168
|
+
if parent and parent.type == "class":
|
|
169
|
+
target = f"{target_file}::{parent.name}.{name.name}"
|
|
170
|
+
else:
|
|
171
|
+
target = f"{target_file}::{name.name}"
|
|
172
|
+
|
|
173
|
+
store.upsert_edge(EdgeInfo(
|
|
174
|
+
kind="CALLS",
|
|
175
|
+
source=enclosing,
|
|
176
|
+
target=target,
|
|
177
|
+
file_path=file_path,
|
|
178
|
+
line=jedi_line,
|
|
179
|
+
))
|
|
180
|
+
existing.add((enclosing, jedi_line))
|
|
181
|
+
file_resolved += 1
|
|
182
|
+
|
|
183
|
+
if file_resolved:
|
|
184
|
+
files_enriched += 1
|
|
185
|
+
resolved_count += file_resolved
|
|
186
|
+
|
|
187
|
+
if resolved_count:
|
|
188
|
+
store.commit()
|
|
189
|
+
logger.info(
|
|
190
|
+
"Jedi enrichment: resolved %d calls in %d files",
|
|
191
|
+
resolved_count, files_enriched,
|
|
192
|
+
)
|
|
193
|
+
|
|
194
|
+
return {
|
|
195
|
+
"resolved": resolved_count,
|
|
196
|
+
"files": files_enriched,
|
|
197
|
+
"errors": errors,
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
def _get_file_call_edges(store, file_path: str):
|
|
202
|
+
"""Get all CALLS edges originating from a file."""
|
|
203
|
+
conn = store._conn
|
|
204
|
+
rows = conn.execute(
|
|
205
|
+
"SELECT * FROM edges WHERE file_path = ? AND kind = 'CALLS'",
|
|
206
|
+
(file_path,),
|
|
207
|
+
).fetchall()
|
|
208
|
+
from .graph import GraphEdge
|
|
209
|
+
return [
|
|
210
|
+
GraphEdge(
|
|
211
|
+
id=r["id"], kind=r["kind"],
|
|
212
|
+
source_qualified=r["source_qualified"],
|
|
213
|
+
target_qualified=r["target_qualified"],
|
|
214
|
+
file_path=r["file_path"], line=r["line"],
|
|
215
|
+
extra={},
|
|
216
|
+
)
|
|
217
|
+
for r in rows
|
|
218
|
+
]
|
|
219
|
+
|
|
220
|
+
|
|
221
|
+
def _find_enclosing(func_nodes, line: int) -> Optional[str]:
|
|
222
|
+
"""Find the qualified name of the function enclosing a given line."""
|
|
223
|
+
best = None
|
|
224
|
+
best_span = float("inf")
|
|
225
|
+
for node in func_nodes:
|
|
226
|
+
if node.line_start <= line <= node.line_end:
|
|
227
|
+
span = node.line_end - node.line_start
|
|
228
|
+
if span < best_span:
|
|
229
|
+
best = node.qualified_name
|
|
230
|
+
best_span = span
|
|
231
|
+
return best
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
def _find_untracked_method_calls(root, is_test_file: bool = False):
|
|
235
|
+
"""Walk Python AST to find method calls the parser would have dropped.
|
|
236
|
+
|
|
237
|
+
Returns list of (jedi_line, col, method_name, enclosing_func_name) tuples.
|
|
238
|
+
Jedi_line is 1-indexed, col is 0-indexed.
|
|
239
|
+
"""
|
|
240
|
+
results: list[tuple[int, int, str, Optional[str]]] = []
|
|
241
|
+
_walk_calls(root, results, is_test_file, enclosing_func=None)
|
|
242
|
+
return results
|
|
243
|
+
|
|
244
|
+
|
|
245
|
+
def _walk_calls(node, results, is_test_file, enclosing_func):
|
|
246
|
+
"""Recursively walk AST collecting dropped method calls."""
|
|
247
|
+
# Track enclosing function scope
|
|
248
|
+
if node.type == "function_definition":
|
|
249
|
+
name = None
|
|
250
|
+
for child in node.children:
|
|
251
|
+
if child.type == "identifier":
|
|
252
|
+
name = child.text.decode("utf-8", errors="replace")
|
|
253
|
+
break
|
|
254
|
+
for child in node.children:
|
|
255
|
+
_walk_calls(child, results, is_test_file, name or enclosing_func)
|
|
256
|
+
return
|
|
257
|
+
|
|
258
|
+
if node.type == "decorated_definition":
|
|
259
|
+
for child in node.children:
|
|
260
|
+
_walk_calls(child, results, is_test_file, enclosing_func)
|
|
261
|
+
return
|
|
262
|
+
|
|
263
|
+
# Check for call expressions with attribute access
|
|
264
|
+
if node.type == "call":
|
|
265
|
+
first = node.children[0] if node.children else None
|
|
266
|
+
if first and first.type == "attribute":
|
|
267
|
+
_check_dropped_call(first, results, is_test_file, enclosing_func)
|
|
268
|
+
|
|
269
|
+
for child in node.children:
|
|
270
|
+
_walk_calls(child, results, is_test_file, enclosing_func)
|
|
271
|
+
|
|
272
|
+
|
|
273
|
+
def _check_dropped_call(attr_node, results, is_test_file, enclosing_func):
|
|
274
|
+
"""Check if an attribute-based call was dropped by the parser."""
|
|
275
|
+
children = attr_node.children
|
|
276
|
+
if len(children) < 2:
|
|
277
|
+
return
|
|
278
|
+
|
|
279
|
+
receiver = children[0]
|
|
280
|
+
# Only handle simple identifier receivers
|
|
281
|
+
if receiver.type != "identifier":
|
|
282
|
+
return
|
|
283
|
+
|
|
284
|
+
receiver_text = receiver.text.decode("utf-8", errors="replace")
|
|
285
|
+
|
|
286
|
+
# The parser keeps: self/cls/super calls and uppercase-receiver calls
|
|
287
|
+
# The parser keeps: calls handled by typed-var enrichment (but those are
|
|
288
|
+
# separate edges -- we check for duplicates via existing-edge set)
|
|
289
|
+
if receiver_text in _SELF_NAMES:
|
|
290
|
+
return
|
|
291
|
+
if receiver_text[:1].isupper():
|
|
292
|
+
return
|
|
293
|
+
if is_test_file:
|
|
294
|
+
return # test files already track all calls
|
|
295
|
+
|
|
296
|
+
# Find the method name identifier
|
|
297
|
+
method_node = children[-1]
|
|
298
|
+
if method_node.type != "identifier":
|
|
299
|
+
return
|
|
300
|
+
|
|
301
|
+
row, col = method_node.start_point # 0-indexed
|
|
302
|
+
method_name = method_node.text.decode("utf-8", errors="replace")
|
|
303
|
+
results.append((row + 1, col, method_name, enclosing_func))
|