codebrain 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- codebrain/__init__.py +3 -0
- codebrain/__main__.py +6 -0
- codebrain/agent_bridge.py +162 -0
- codebrain/analyzer.py +943 -0
- codebrain/api.py +578 -0
- codebrain/api_models.py +102 -0
- codebrain/cli.py +1927 -0
- codebrain/comprehension.py +1939 -0
- codebrain/config.py +46 -0
- codebrain/context.py +276 -0
- codebrain/export.py +334 -0
- codebrain/graph/__init__.py +0 -0
- codebrain/graph/query.py +656 -0
- codebrain/graph/schema.py +113 -0
- codebrain/graph/store.py +295 -0
- codebrain/hook_runner.py +71 -0
- codebrain/hooks.py +107 -0
- codebrain/indexer.py +450 -0
- codebrain/llm.py +676 -0
- codebrain/logging.py +42 -0
- codebrain/mcp_server.py +1635 -0
- codebrain/memory/__init__.py +5 -0
- codebrain/memory/store.py +270 -0
- codebrain/parser/__init__.py +0 -0
- codebrain/parser/base.py +27 -0
- codebrain/parser/config_parser.py +228 -0
- codebrain/parser/models.py +44 -0
- codebrain/parser/python_parser.py +658 -0
- codebrain/parser/registry.py +144 -0
- codebrain/parser/typescript_parser.py +1189 -0
- codebrain/parser/typescript_treesitter.py +535 -0
- codebrain/py.typed +0 -0
- codebrain/resolver.py +171 -0
- codebrain/settings.py +88 -0
- codebrain/utils.py +59 -0
- codebrain/validator.py +563 -0
- codebrain/watcher/__init__.py +0 -0
- codebrain/watcher/file_watcher.py +173 -0
- codebrain-0.1.0.dist-info/METADATA +360 -0
- codebrain-0.1.0.dist-info/RECORD +44 -0
- codebrain-0.1.0.dist-info/WHEEL +5 -0
- codebrain-0.1.0.dist-info/entry_points.txt +6 -0
- codebrain-0.1.0.dist-info/licenses/LICENSE +21 -0
- codebrain-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,658 @@
|
|
|
1
|
+
"""Python AST visitor that extracts structural nodes and edges."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import ast
|
|
6
|
+
import json
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
|
|
9
|
+
from codebrain.parser.base import BaseParser
|
|
10
|
+
from codebrain.parser.models import ParsedEdge, ParsedFile, ParsedNode
|
|
11
|
+
from codebrain.utils import content_hash
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def _get_docstring(node: ast.AST) -> str:
|
|
15
|
+
"""Extract first-line docstring from a function/class/module, or return ''."""
|
|
16
|
+
ds = ast.get_docstring(node)
|
|
17
|
+
if ds:
|
|
18
|
+
first_line = ds.split("\n", 1)[0].strip()
|
|
19
|
+
return first_line[:200]
|
|
20
|
+
return ""
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def _get_decorator_names(node: ast.FunctionDef | ast.AsyncFunctionDef | ast.ClassDef) -> list[str]:
|
|
24
|
+
"""Return a list of decorator name strings."""
|
|
25
|
+
names: list[str] = []
|
|
26
|
+
for dec in node.decorator_list:
|
|
27
|
+
if isinstance(dec, ast.Name):
|
|
28
|
+
names.append(dec.id)
|
|
29
|
+
elif isinstance(dec, ast.Attribute):
|
|
30
|
+
names.append(ast.unparse(dec))
|
|
31
|
+
elif isinstance(dec, ast.Call):
|
|
32
|
+
if isinstance(dec.func, ast.Name):
|
|
33
|
+
names.append(dec.func.id)
|
|
34
|
+
elif isinstance(dec.func, ast.Attribute):
|
|
35
|
+
names.append(ast.unparse(dec.func))
|
|
36
|
+
else:
|
|
37
|
+
names.append(ast.unparse(dec.func))
|
|
38
|
+
else:
|
|
39
|
+
names.append(ast.unparse(dec))
|
|
40
|
+
return names
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def _build_signature(node: ast.FunctionDef | ast.AsyncFunctionDef) -> str:
|
|
44
|
+
"""Build a human-readable function signature string."""
|
|
45
|
+
params: list[str] = []
|
|
46
|
+
args = node.args
|
|
47
|
+
|
|
48
|
+
# positional-only
|
|
49
|
+
for i, arg in enumerate(args.posonlyargs):
|
|
50
|
+
p = arg.arg
|
|
51
|
+
if arg.annotation:
|
|
52
|
+
p += f": {ast.unparse(arg.annotation)}"
|
|
53
|
+
params.append(p)
|
|
54
|
+
if args.posonlyargs:
|
|
55
|
+
params.append("/")
|
|
56
|
+
|
|
57
|
+
# normal positional/keyword
|
|
58
|
+
num_defaults = len(args.defaults)
|
|
59
|
+
num_args = len(args.args)
|
|
60
|
+
for i, arg in enumerate(args.args):
|
|
61
|
+
p = arg.arg
|
|
62
|
+
if arg.annotation:
|
|
63
|
+
p += f": {ast.unparse(arg.annotation)}"
|
|
64
|
+
default_idx = i - (num_args - num_defaults)
|
|
65
|
+
if default_idx >= 0:
|
|
66
|
+
p += f" = {ast.unparse(args.defaults[default_idx])}"
|
|
67
|
+
params.append(p)
|
|
68
|
+
|
|
69
|
+
# *args
|
|
70
|
+
if args.vararg:
|
|
71
|
+
p = f"*{args.vararg.arg}"
|
|
72
|
+
if args.vararg.annotation:
|
|
73
|
+
p += f": {ast.unparse(args.vararg.annotation)}"
|
|
74
|
+
params.append(p)
|
|
75
|
+
elif args.kwonlyargs:
|
|
76
|
+
params.append("*")
|
|
77
|
+
|
|
78
|
+
# keyword-only
|
|
79
|
+
for i, arg in enumerate(args.kwonlyargs):
|
|
80
|
+
p = arg.arg
|
|
81
|
+
if arg.annotation:
|
|
82
|
+
p += f": {ast.unparse(arg.annotation)}"
|
|
83
|
+
if args.kw_defaults[i] is not None:
|
|
84
|
+
p += f" = {ast.unparse(args.kw_defaults[i])}"
|
|
85
|
+
params.append(p)
|
|
86
|
+
|
|
87
|
+
# **kwargs
|
|
88
|
+
if args.kwarg:
|
|
89
|
+
p = f"**{args.kwarg.arg}"
|
|
90
|
+
if args.kwarg.annotation:
|
|
91
|
+
p += f": {ast.unparse(args.kwarg.annotation)}"
|
|
92
|
+
params.append(p)
|
|
93
|
+
|
|
94
|
+
sig = f"({', '.join(params)})"
|
|
95
|
+
if node.returns:
|
|
96
|
+
sig += f" -> {ast.unparse(node.returns)}"
|
|
97
|
+
return sig
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def _call_target_name(node: ast.Call) -> str | None:
|
|
101
|
+
"""Best-effort extraction of the callable name from a Call node."""
|
|
102
|
+
func = node.func
|
|
103
|
+
if isinstance(func, ast.Name):
|
|
104
|
+
return func.id
|
|
105
|
+
if isinstance(func, ast.Attribute):
|
|
106
|
+
return ast.unparse(func)
|
|
107
|
+
return None
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
# Route decorator patterns for API endpoint extraction
|
|
111
|
+
_ROUTE_METHODS = frozenset({"get", "post", "put", "delete", "patch", "head", "options", "route"})
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def _extract_route_info(decorators: list[ast.expr]) -> str | None:
|
|
115
|
+
"""Extract API route path from decorators like @app.get('/path') or @router.post('/path')."""
|
|
116
|
+
for dec in decorators:
|
|
117
|
+
if isinstance(dec, ast.Call) and isinstance(dec.func, ast.Attribute):
|
|
118
|
+
method = dec.func.attr
|
|
119
|
+
if method in _ROUTE_METHODS and dec.args:
|
|
120
|
+
arg = dec.args[0]
|
|
121
|
+
if isinstance(arg, ast.Constant) and isinstance(arg.value, str):
|
|
122
|
+
return f"{method.upper()} {arg.value}"
|
|
123
|
+
return None
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
# MongoDB collection access patterns
|
|
127
|
+
_MONGO_PATTERNS = (
|
|
128
|
+
# db["collection_name"] or db['collection_name']
|
|
129
|
+
r"""(?:db|database|mongo_db|self\.db|self\.database)\s*\[\s*['"](\w+)['"]\s*\]""",
|
|
130
|
+
# db.collection_name (attribute access)
|
|
131
|
+
r"""(?:db|database|mongo_db|self\.db|self\.database)\.(\w+)\b(?!\s*\()""",
|
|
132
|
+
# get_collection("name") or collection("name")
|
|
133
|
+
r"""(?:get_collection|collection)\s*\(\s*['"](\w+)['"]\s*\)""",
|
|
134
|
+
)
|
|
135
|
+
|
|
136
|
+
import re as _re
|
|
137
|
+
_MONGO_RE = [_re.compile(p) for p in _MONGO_PATTERNS]
|
|
138
|
+
|
|
139
|
+
# Significant comment patterns
|
|
140
|
+
_SIGNIFICANT_COMMENT = _re.compile(
|
|
141
|
+
r"""#\s*(TODO|FIXME|HACK|NOTE|WARNING|BUG|XXX|IMPORTANT|REFACTOR)\b[:\s]*(.+)""",
|
|
142
|
+
_re.IGNORECASE,
|
|
143
|
+
)
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def _extract_significant_comments(source: str, max_comments: int = 20) -> list[str]:
|
|
147
|
+
"""Extract TODO, FIXME, HACK, NOTE and other significant comments."""
|
|
148
|
+
comments: list[str] = []
|
|
149
|
+
for m in _SIGNIFICANT_COMMENT.finditer(source):
|
|
150
|
+
tag = m.group(1).upper()
|
|
151
|
+
text = m.group(2).strip()[:100]
|
|
152
|
+
line = source.count("\n", 0, m.start()) + 1
|
|
153
|
+
comments.append(f"L{line} {tag}: {text}")
|
|
154
|
+
if len(comments) >= max_comments:
|
|
155
|
+
break
|
|
156
|
+
return comments
|
|
157
|
+
|
|
158
|
+
# Common MongoDB method names to filter out
|
|
159
|
+
_MONGO_METHODS = frozenset({
|
|
160
|
+
"find", "find_one", "insert_one", "insert_many", "update_one", "update_many",
|
|
161
|
+
"delete_one", "delete_many", "aggregate", "count_documents", "create_index",
|
|
162
|
+
"drop", "distinct", "bulk_write", "watch", "list_collection_names",
|
|
163
|
+
"get_database", "get_collection", "client", "close", "command",
|
|
164
|
+
})
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
def _extract_mongo_collections(source: str) -> list[str]:
|
|
168
|
+
"""Extract MongoDB collection names from source code."""
|
|
169
|
+
collections: set[str] = set()
|
|
170
|
+
for pattern in _MONGO_RE:
|
|
171
|
+
for m in pattern.finditer(source):
|
|
172
|
+
name = m.group(1)
|
|
173
|
+
if name and name not in _MONGO_METHODS and not name.startswith("_"):
|
|
174
|
+
collections.add(name)
|
|
175
|
+
return sorted(collections)
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
class PythonVisitor(ast.NodeVisitor):
|
|
179
|
+
"""Walk a Python AST and collect structural nodes and edges."""
|
|
180
|
+
|
|
181
|
+
def __init__(self, file_path: str, module_name: str) -> None:
|
|
182
|
+
self.file_path = file_path
|
|
183
|
+
self.module_name = module_name
|
|
184
|
+
self.nodes: list[ParsedNode] = []
|
|
185
|
+
self.edges: list[ParsedEdge] = []
|
|
186
|
+
self._scope_stack: list[str] = [] # qualified name parts
|
|
187
|
+
self._all_names: set[str] | None = None # populated if __all__ found
|
|
188
|
+
self._current_class: str | None = None # current class node id
|
|
189
|
+
|
|
190
|
+
def _make_id(self, qualified_name: str) -> str:
|
|
191
|
+
return f"{self.file_path}::{qualified_name}"
|
|
192
|
+
|
|
193
|
+
def _resolve_call_target(self, name: str, class_node_id: str | None) -> str:
|
|
194
|
+
"""Resolve ``self.X`` to ``ClassName.X`` using the enclosing class context."""
|
|
195
|
+
if class_node_id and name.startswith("self."):
|
|
196
|
+
class_qname = class_node_id.split("::", 1)[1] if "::" in class_node_id else class_node_id
|
|
197
|
+
return f"{class_qname}.{name[5:]}"
|
|
198
|
+
return name
|
|
199
|
+
|
|
200
|
+
def _qualified_name(self, name: str) -> str:
|
|
201
|
+
if self._scope_stack:
|
|
202
|
+
return ".".join(self._scope_stack) + "." + name
|
|
203
|
+
return name
|
|
204
|
+
|
|
205
|
+
def _is_exported(self, name: str) -> bool:
|
|
206
|
+
if self._all_names is not None:
|
|
207
|
+
return name in self._all_names
|
|
208
|
+
# If no __all__, top-level non-underscore names are considered exported
|
|
209
|
+
return len(self._scope_stack) == 0 and not name.startswith("_")
|
|
210
|
+
|
|
211
|
+
# ------------------------------------------------------------------
|
|
212
|
+
# Pre-scan for __all__
|
|
213
|
+
# ------------------------------------------------------------------
|
|
214
|
+
def _scan_all(self, tree: ast.Module) -> None:
|
|
215
|
+
for node in ast.iter_child_nodes(tree):
|
|
216
|
+
if isinstance(node, ast.Assign):
|
|
217
|
+
for target in node.targets:
|
|
218
|
+
if isinstance(target, ast.Name) and target.id == "__all__":
|
|
219
|
+
if isinstance(node.value, (ast.List, ast.Tuple)):
|
|
220
|
+
self._all_names = set()
|
|
221
|
+
for elt in node.value.elts:
|
|
222
|
+
if isinstance(elt, ast.Constant) and isinstance(elt.value, str):
|
|
223
|
+
self._all_names.add(elt.value)
|
|
224
|
+
|
|
225
|
+
# ------------------------------------------------------------------
|
|
226
|
+
# Visitors
|
|
227
|
+
# ------------------------------------------------------------------
|
|
228
|
+
def visit_Module(self, node: ast.Module) -> None:
|
|
229
|
+
self._scan_all(node)
|
|
230
|
+
self.generic_visit(node)
|
|
231
|
+
|
|
232
|
+
def visit_FunctionDef(self, node: ast.FunctionDef) -> None:
|
|
233
|
+
self._handle_function(node)
|
|
234
|
+
|
|
235
|
+
def visit_AsyncFunctionDef(self, node: ast.AsyncFunctionDef) -> None:
|
|
236
|
+
self._handle_function(node)
|
|
237
|
+
|
|
238
|
+
def _handle_function(self, node: ast.FunctionDef | ast.AsyncFunctionDef) -> None:
|
|
239
|
+
qname = self._qualified_name(node.name)
|
|
240
|
+
node_id = self._make_id(qname)
|
|
241
|
+
node_type = "method" if self._current_class else "function"
|
|
242
|
+
|
|
243
|
+
decorators = _get_decorator_names(node)
|
|
244
|
+
|
|
245
|
+
# Check for API route decorators
|
|
246
|
+
route_info = _extract_route_info(node.decorator_list)
|
|
247
|
+
if route_info:
|
|
248
|
+
decorators.append(f"endpoint:{route_info}")
|
|
249
|
+
|
|
250
|
+
pnode = ParsedNode(
|
|
251
|
+
id=node_id,
|
|
252
|
+
name=node.name,
|
|
253
|
+
qualified_name=qname,
|
|
254
|
+
type=node_type,
|
|
255
|
+
file_path=self.file_path,
|
|
256
|
+
line_start=node.lineno,
|
|
257
|
+
line_end=node.end_lineno or node.lineno,
|
|
258
|
+
signature=_build_signature(node),
|
|
259
|
+
decorators=decorators,
|
|
260
|
+
docstring=_get_docstring(node),
|
|
261
|
+
is_exported=self._is_exported(node.name),
|
|
262
|
+
)
|
|
263
|
+
self.nodes.append(pnode)
|
|
264
|
+
|
|
265
|
+
# CONTAINS edge from parent
|
|
266
|
+
if self._current_class:
|
|
267
|
+
self.edges.append(ParsedEdge(
|
|
268
|
+
source=self._current_class,
|
|
269
|
+
target=node_id,
|
|
270
|
+
type="CONTAINS",
|
|
271
|
+
file_path=self.file_path,
|
|
272
|
+
line=node.lineno,
|
|
273
|
+
))
|
|
274
|
+
else:
|
|
275
|
+
# file contains this function
|
|
276
|
+
file_node_id = self._make_id(self.module_name)
|
|
277
|
+
self.edges.append(ParsedEdge(
|
|
278
|
+
source=file_node_id,
|
|
279
|
+
target=node_id,
|
|
280
|
+
type="CONTAINS",
|
|
281
|
+
file_path=self.file_path,
|
|
282
|
+
line=node.lineno,
|
|
283
|
+
))
|
|
284
|
+
|
|
285
|
+
# Walk body for calls, nested defs, etc.
|
|
286
|
+
old_class = self._current_class
|
|
287
|
+
self._current_class = None # nested funcs are not methods
|
|
288
|
+
self._scope_stack.append(node.name)
|
|
289
|
+
self._visit_body_for_calls(node, class_node_id=old_class)
|
|
290
|
+
self.generic_visit(node)
|
|
291
|
+
self._scope_stack.pop()
|
|
292
|
+
self._current_class = old_class
|
|
293
|
+
|
|
294
|
+
def visit_ClassDef(self, node: ast.ClassDef) -> None:
|
|
295
|
+
qname = self._qualified_name(node.name)
|
|
296
|
+
node_id = self._make_id(qname)
|
|
297
|
+
|
|
298
|
+
pnode = ParsedNode(
|
|
299
|
+
id=node_id,
|
|
300
|
+
name=node.name,
|
|
301
|
+
qualified_name=qname,
|
|
302
|
+
type="class",
|
|
303
|
+
file_path=self.file_path,
|
|
304
|
+
line_start=node.lineno,
|
|
305
|
+
line_end=node.end_lineno or node.lineno,
|
|
306
|
+
decorators=_get_decorator_names(node),
|
|
307
|
+
docstring=_get_docstring(node),
|
|
308
|
+
is_exported=self._is_exported(node.name),
|
|
309
|
+
)
|
|
310
|
+
self.nodes.append(pnode)
|
|
311
|
+
|
|
312
|
+
# CONTAINS edge from file
|
|
313
|
+
file_node_id = self._make_id(self.module_name)
|
|
314
|
+
self.edges.append(ParsedEdge(
|
|
315
|
+
source=file_node_id,
|
|
316
|
+
target=node_id,
|
|
317
|
+
type="CONTAINS",
|
|
318
|
+
file_path=self.file_path,
|
|
319
|
+
line=node.lineno,
|
|
320
|
+
))
|
|
321
|
+
|
|
322
|
+
# EXTENDS edges for base classes
|
|
323
|
+
for base in node.bases:
|
|
324
|
+
base_name = ast.unparse(base)
|
|
325
|
+
self.edges.append(ParsedEdge(
|
|
326
|
+
source=node_id,
|
|
327
|
+
target=base_name,
|
|
328
|
+
type="EXTENDS",
|
|
329
|
+
file_path=self.file_path,
|
|
330
|
+
line=node.lineno,
|
|
331
|
+
))
|
|
332
|
+
|
|
333
|
+
# Visit children
|
|
334
|
+
old_class = self._current_class
|
|
335
|
+
self._current_class = node_id
|
|
336
|
+
self._scope_stack.append(node.name)
|
|
337
|
+
self.generic_visit(node)
|
|
338
|
+
self._scope_stack.pop()
|
|
339
|
+
self._current_class = old_class
|
|
340
|
+
|
|
341
|
+
def visit_Import(self, node: ast.Import) -> None:
|
|
342
|
+
container_id = self._make_id(
|
|
343
|
+
".".join(self._scope_stack) if self._scope_stack else self.module_name
|
|
344
|
+
)
|
|
345
|
+
for alias in node.names:
|
|
346
|
+
self.edges.append(ParsedEdge(
|
|
347
|
+
source=container_id,
|
|
348
|
+
target=alias.name,
|
|
349
|
+
type="IMPORTS",
|
|
350
|
+
file_path=self.file_path,
|
|
351
|
+
line=node.lineno,
|
|
352
|
+
))
|
|
353
|
+
|
|
354
|
+
def visit_ImportFrom(self, node: ast.ImportFrom) -> None:
|
|
355
|
+
container_id = self._make_id(
|
|
356
|
+
".".join(self._scope_stack) if self._scope_stack else self.module_name
|
|
357
|
+
)
|
|
358
|
+
module = node.module or ""
|
|
359
|
+
if node.names:
|
|
360
|
+
for alias in node.names:
|
|
361
|
+
if alias.name == "*":
|
|
362
|
+
# Star import: create edge to the module itself so dependency
|
|
363
|
+
# tracking knows this file depends on the entire module.
|
|
364
|
+
if module:
|
|
365
|
+
self.edges.append(ParsedEdge(
|
|
366
|
+
source=container_id,
|
|
367
|
+
target=module,
|
|
368
|
+
type="IMPORTS",
|
|
369
|
+
file_path=self.file_path,
|
|
370
|
+
line=node.lineno,
|
|
371
|
+
))
|
|
372
|
+
continue
|
|
373
|
+
target = f"{module}.{alias.name}" if module else alias.name
|
|
374
|
+
self.edges.append(ParsedEdge(
|
|
375
|
+
source=container_id,
|
|
376
|
+
target=target,
|
|
377
|
+
type="IMPORTS",
|
|
378
|
+
file_path=self.file_path,
|
|
379
|
+
line=node.lineno,
|
|
380
|
+
))
|
|
381
|
+
|
|
382
|
+
def _visit_body_for_calls(self, parent: ast.AST, class_node_id: str | None = None) -> None:
|
|
383
|
+
"""Walk all Call nodes inside *parent* and emit CALLS and DATAFLOW edges.
|
|
384
|
+
|
|
385
|
+
DATAFLOW edges represent data dependencies between symbols — cases where
|
|
386
|
+
the return value of one callable flows into another. They use the same
|
|
387
|
+
``source=consumer, target=producer`` convention as CALLS edges, except
|
|
388
|
+
for nested-call patterns where ``source=inner, target=outer`` to show
|
|
389
|
+
the inner call's result feeding into the outer call.
|
|
390
|
+
|
|
391
|
+
DATAFLOW edges are created for the following patterns:
|
|
392
|
+
|
|
393
|
+
1. **Assignment from call**: ``x = foo()`` — the enclosing function
|
|
394
|
+
(container) has a DATAFLOW edge targeting ``foo`` because the
|
|
395
|
+
container consumes the value produced by ``foo``.
|
|
396
|
+
2. **Return of call**: ``return foo()`` — same as (1); the container's
|
|
397
|
+
return value depends on ``foo``'s output.
|
|
398
|
+
3. **Nested calls**: ``outer(inner())`` — ``inner``'s output flows into
|
|
399
|
+
``outer``, so ``source=inner, target=outer``.
|
|
400
|
+
4. **Keyword-arg calls**: ``outer(key=inner())`` — same as (3).
|
|
401
|
+
5. **Yield of call**: ``yield foo()`` — container yields ``foo``'s
|
|
402
|
+
output, so ``source=container, target=foo``.
|
|
403
|
+
6. **Await of call**: ``await foo()`` — container awaits ``foo``'s
|
|
404
|
+
output, so ``source=container, target=foo``.
|
|
405
|
+
|
|
406
|
+
Note: ``impact_of_change`` in ``graph/query.py`` currently traverses
|
|
407
|
+
only CALLS and IMPORTS edges, so DATAFLOW edges do not inflate impact
|
|
408
|
+
results. They are available for finer-grained data-dependency analysis.
|
|
409
|
+
"""
|
|
410
|
+
container_qname = ".".join(self._scope_stack) if self._scope_stack else self.module_name
|
|
411
|
+
container_id = self._make_id(container_qname)
|
|
412
|
+
for child in ast.walk(parent):
|
|
413
|
+
if isinstance(child, ast.Call):
|
|
414
|
+
name = _call_target_name(child)
|
|
415
|
+
if name:
|
|
416
|
+
name = self._resolve_call_target(name, class_node_id)
|
|
417
|
+
self.edges.append(ParsedEdge(
|
|
418
|
+
source=container_id,
|
|
419
|
+
target=name,
|
|
420
|
+
type="CALLS",
|
|
421
|
+
file_path=self.file_path,
|
|
422
|
+
line=getattr(child, "lineno", 0),
|
|
423
|
+
))
|
|
424
|
+
# Data flow: variable = function_call()
|
|
425
|
+
if isinstance(child, ast.Assign):
|
|
426
|
+
if isinstance(child.value, ast.Call):
|
|
427
|
+
call_name = _call_target_name(child.value)
|
|
428
|
+
if call_name:
|
|
429
|
+
call_name = self._resolve_call_target(call_name, class_node_id)
|
|
430
|
+
for target in child.targets:
|
|
431
|
+
if isinstance(target, ast.Name):
|
|
432
|
+
self.edges.append(ParsedEdge(
|
|
433
|
+
source=container_id,
|
|
434
|
+
target=call_name,
|
|
435
|
+
type="DATAFLOW",
|
|
436
|
+
file_path=self.file_path,
|
|
437
|
+
line=getattr(child, "lineno", 0),
|
|
438
|
+
))
|
|
439
|
+
# Data flow: return function_call()
|
|
440
|
+
if isinstance(child, ast.Return) and child.value:
|
|
441
|
+
if isinstance(child.value, ast.Call):
|
|
442
|
+
call_name = _call_target_name(child.value)
|
|
443
|
+
if call_name:
|
|
444
|
+
call_name = self._resolve_call_target(call_name, class_node_id)
|
|
445
|
+
self.edges.append(ParsedEdge(
|
|
446
|
+
source=container_id,
|
|
447
|
+
target=call_name,
|
|
448
|
+
type="DATAFLOW",
|
|
449
|
+
file_path=self.file_path,
|
|
450
|
+
line=getattr(child, "lineno", 0),
|
|
451
|
+
))
|
|
452
|
+
# Data flow: function_call(other_call()) — nested calls
|
|
453
|
+
if isinstance(child, ast.Call):
|
|
454
|
+
for arg in child.args:
|
|
455
|
+
if isinstance(arg, ast.Call):
|
|
456
|
+
inner = _call_target_name(arg)
|
|
457
|
+
outer = _call_target_name(child)
|
|
458
|
+
if inner and outer:
|
|
459
|
+
inner = self._resolve_call_target(inner, class_node_id)
|
|
460
|
+
outer = self._resolve_call_target(outer, class_node_id)
|
|
461
|
+
self.edges.append(ParsedEdge(
|
|
462
|
+
source=inner,
|
|
463
|
+
target=outer,
|
|
464
|
+
type="DATAFLOW",
|
|
465
|
+
file_path=self.file_path,
|
|
466
|
+
line=getattr(child, "lineno", 0),
|
|
467
|
+
))
|
|
468
|
+
# keyword args: func(x=other_call())
|
|
469
|
+
for kw in child.keywords:
|
|
470
|
+
if isinstance(kw.value, ast.Call):
|
|
471
|
+
inner = _call_target_name(kw.value)
|
|
472
|
+
outer = _call_target_name(child)
|
|
473
|
+
if inner and outer:
|
|
474
|
+
inner = self._resolve_call_target(inner, class_node_id)
|
|
475
|
+
outer = self._resolve_call_target(outer, class_node_id)
|
|
476
|
+
self.edges.append(ParsedEdge(
|
|
477
|
+
source=inner,
|
|
478
|
+
target=outer,
|
|
479
|
+
type="DATAFLOW",
|
|
480
|
+
file_path=self.file_path,
|
|
481
|
+
line=getattr(child, "lineno", 0),
|
|
482
|
+
))
|
|
483
|
+
# Data flow: yield call()
|
|
484
|
+
if isinstance(child, ast.Yield) and child.value and isinstance(child.value, ast.Call):
|
|
485
|
+
call_name = _call_target_name(child.value)
|
|
486
|
+
if call_name:
|
|
487
|
+
call_name = self._resolve_call_target(call_name, class_node_id)
|
|
488
|
+
self.edges.append(ParsedEdge(
|
|
489
|
+
source=container_id,
|
|
490
|
+
target=call_name,
|
|
491
|
+
type="DATAFLOW",
|
|
492
|
+
file_path=self.file_path,
|
|
493
|
+
line=getattr(child, "lineno", 0),
|
|
494
|
+
))
|
|
495
|
+
# Data flow: await call()
|
|
496
|
+
if isinstance(child, ast.Await) and isinstance(child.value, ast.Call):
|
|
497
|
+
call_name = _call_target_name(child.value)
|
|
498
|
+
if call_name:
|
|
499
|
+
call_name = self._resolve_call_target(call_name, class_node_id)
|
|
500
|
+
self.edges.append(ParsedEdge(
|
|
501
|
+
source=container_id,
|
|
502
|
+
target=call_name,
|
|
503
|
+
type="DATAFLOW",
|
|
504
|
+
file_path=self.file_path,
|
|
505
|
+
line=getattr(child, "lineno", 0),
|
|
506
|
+
))
|
|
507
|
+
|
|
508
|
+
def visit_Assign(self, node: ast.Assign) -> None:
|
|
509
|
+
# Only capture module-level variable assignments
|
|
510
|
+
if self._scope_stack:
|
|
511
|
+
self.generic_visit(node)
|
|
512
|
+
return
|
|
513
|
+
for target in node.targets:
|
|
514
|
+
if isinstance(target, ast.Name):
|
|
515
|
+
if target.id == "__all__":
|
|
516
|
+
continue
|
|
517
|
+
qname = self._qualified_name(target.id)
|
|
518
|
+
node_id = self._make_id(qname)
|
|
519
|
+
pnode = ParsedNode(
|
|
520
|
+
id=node_id,
|
|
521
|
+
name=target.id,
|
|
522
|
+
qualified_name=qname,
|
|
523
|
+
type="variable",
|
|
524
|
+
file_path=self.file_path,
|
|
525
|
+
line_start=node.lineno,
|
|
526
|
+
line_end=node.end_lineno or node.lineno,
|
|
527
|
+
is_exported=self._is_exported(target.id),
|
|
528
|
+
)
|
|
529
|
+
self.nodes.append(pnode)
|
|
530
|
+
file_node_id = self._make_id(self.module_name)
|
|
531
|
+
self.edges.append(ParsedEdge(
|
|
532
|
+
source=file_node_id,
|
|
533
|
+
target=node_id,
|
|
534
|
+
type="CONTAINS",
|
|
535
|
+
file_path=self.file_path,
|
|
536
|
+
line=node.lineno,
|
|
537
|
+
))
|
|
538
|
+
self.generic_visit(node)
|
|
539
|
+
|
|
540
|
+
def visit_AnnAssign(self, node: ast.AnnAssign) -> None:
|
|
541
|
+
if self._scope_stack:
|
|
542
|
+
self.generic_visit(node)
|
|
543
|
+
return
|
|
544
|
+
if isinstance(node.target, ast.Name):
|
|
545
|
+
name = node.target.id
|
|
546
|
+
qname = self._qualified_name(name)
|
|
547
|
+
node_id = self._make_id(qname)
|
|
548
|
+
pnode = ParsedNode(
|
|
549
|
+
id=node_id,
|
|
550
|
+
name=name,
|
|
551
|
+
qualified_name=qname,
|
|
552
|
+
type="variable",
|
|
553
|
+
file_path=self.file_path,
|
|
554
|
+
line_start=node.lineno,
|
|
555
|
+
line_end=node.end_lineno or node.lineno,
|
|
556
|
+
is_exported=self._is_exported(name),
|
|
557
|
+
)
|
|
558
|
+
self.nodes.append(pnode)
|
|
559
|
+
file_node_id = self._make_id(self.module_name)
|
|
560
|
+
self.edges.append(ParsedEdge(
|
|
561
|
+
source=file_node_id,
|
|
562
|
+
target=node_id,
|
|
563
|
+
type="CONTAINS",
|
|
564
|
+
file_path=self.file_path,
|
|
565
|
+
line=node.lineno,
|
|
566
|
+
))
|
|
567
|
+
self.generic_visit(node)
|
|
568
|
+
|
|
569
|
+
|
|
570
|
+
def parse_file(path: Path, repo_root: Path) -> ParsedFile:
|
|
571
|
+
"""Parse a Python file and return a ParsedFile with nodes and edges."""
|
|
572
|
+
rel_path = path.relative_to(repo_root).as_posix()
|
|
573
|
+
source = path.read_bytes()
|
|
574
|
+
hash_val = content_hash(source)
|
|
575
|
+
|
|
576
|
+
try:
|
|
577
|
+
tree = ast.parse(source, filename=str(path))
|
|
578
|
+
except (SyntaxError, ValueError):
|
|
579
|
+
# ValueError: null bytes in source. SyntaxError: bad syntax.
|
|
580
|
+
# Retry with explicit UTF-8 decoding and null-byte stripping.
|
|
581
|
+
try:
|
|
582
|
+
text = source.decode("utf-8", errors="replace").replace("\x00", "")
|
|
583
|
+
tree = ast.parse(text, filename=str(path))
|
|
584
|
+
except (SyntaxError, ValueError):
|
|
585
|
+
return ParsedFile(path=rel_path, content_hash=hash_val, line_count=source.count(b"\n") + 1)
|
|
586
|
+
|
|
587
|
+
line_count = source.count(b"\n") + 1
|
|
588
|
+
|
|
589
|
+
# Derive a module-like name from the relative path
|
|
590
|
+
module_name = rel_path.replace("/", ".").removesuffix(".py")
|
|
591
|
+
if module_name.endswith(".__init__"):
|
|
592
|
+
module_name = module_name.removesuffix(".__init__")
|
|
593
|
+
|
|
594
|
+
# Create a file-level node
|
|
595
|
+
try:
|
|
596
|
+
source_text_for_comments = source.decode("utf-8", errors="replace")
|
|
597
|
+
except Exception:
|
|
598
|
+
source_text_for_comments = ""
|
|
599
|
+
significant_comments = _extract_significant_comments(source_text_for_comments)
|
|
600
|
+
file_docstring = _get_docstring(tree)
|
|
601
|
+
if significant_comments:
|
|
602
|
+
comment_block = " | ".join(significant_comments)
|
|
603
|
+
if file_docstring:
|
|
604
|
+
file_docstring += f" [Comments: {comment_block}]"
|
|
605
|
+
else:
|
|
606
|
+
file_docstring = f"[Comments: {comment_block}]"
|
|
607
|
+
|
|
608
|
+
file_node = ParsedNode(
|
|
609
|
+
id=f"{rel_path}::{module_name}",
|
|
610
|
+
name=module_name.rsplit(".", 1)[-1],
|
|
611
|
+
qualified_name=module_name,
|
|
612
|
+
type="file",
|
|
613
|
+
file_path=rel_path,
|
|
614
|
+
line_start=1,
|
|
615
|
+
line_end=line_count,
|
|
616
|
+
docstring=file_docstring,
|
|
617
|
+
is_exported=True,
|
|
618
|
+
)
|
|
619
|
+
|
|
620
|
+
visitor = PythonVisitor(file_path=rel_path, module_name=module_name)
|
|
621
|
+
visitor.visit(tree)
|
|
622
|
+
|
|
623
|
+
nodes = [file_node] + visitor.nodes
|
|
624
|
+
edges = visitor.edges
|
|
625
|
+
|
|
626
|
+
# Extract MongoDB collection references
|
|
627
|
+
try:
|
|
628
|
+
source_text = source.decode("utf-8", errors="replace")
|
|
629
|
+
except Exception:
|
|
630
|
+
source_text = ""
|
|
631
|
+
collections = _extract_mongo_collections(source_text)
|
|
632
|
+
file_node_id = f"{rel_path}::{module_name}"
|
|
633
|
+
for coll_name in collections:
|
|
634
|
+
edges.append(ParsedEdge(
|
|
635
|
+
source=file_node_id,
|
|
636
|
+
target=f"mongodb:{coll_name}",
|
|
637
|
+
type="DATAFLOW",
|
|
638
|
+
file_path=rel_path,
|
|
639
|
+
line=0,
|
|
640
|
+
))
|
|
641
|
+
|
|
642
|
+
return ParsedFile(
|
|
643
|
+
path=rel_path,
|
|
644
|
+
content_hash=hash_val,
|
|
645
|
+
nodes=nodes,
|
|
646
|
+
edges=edges,
|
|
647
|
+
line_count=line_count,
|
|
648
|
+
)
|
|
649
|
+
|
|
650
|
+
|
|
651
|
+
class PythonParser(BaseParser):
|
|
652
|
+
"""BaseParser wrapper around the Python AST parser."""
|
|
653
|
+
|
|
654
|
+
def extensions(self) -> frozenset[str]:
|
|
655
|
+
return frozenset({".py"})
|
|
656
|
+
|
|
657
|
+
def parse(self, path: Path, repo_root: Path) -> ParsedFile:
|
|
658
|
+
return parse_file(path, repo_root)
|