@pmaddire/gcie 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENT.md +256 -0
- package/AGENT_USAGE.md +231 -0
- package/ARCHITECTURE.md +151 -0
- package/CLAUDE.md +69 -0
- package/DEBUGGING_PLAYBOOK.md +160 -0
- package/KNOWLEDGE_INDEX.md +154 -0
- package/POTENTIAL_UPDATES +130 -0
- package/PROJECT.md +141 -0
- package/README.md +371 -0
- package/REPO_DIGITAL_TWIN.md +98 -0
- package/ROADMAP.md +301 -0
- package/SETUP_ANY_REPO.md +85 -0
- package/bin/gcie-init.js +20 -0
- package/bin/gcie.js +45 -0
- package/cli/__init__.py +1 -0
- package/cli/app.py +163 -0
- package/cli/commands/__init__.py +1 -0
- package/cli/commands/cache.py +35 -0
- package/cli/commands/context.py +2426 -0
- package/cli/commands/context_slices.py +617 -0
- package/cli/commands/debug.py +24 -0
- package/cli/commands/index.py +17 -0
- package/cli/commands/query.py +20 -0
- package/cli/commands/setup.py +73 -0
- package/config/__init__.py +1 -0
- package/config/scanner_config.py +82 -0
- package/context/__init__.py +1 -0
- package/context/architecture_bootstrap.py +170 -0
- package/context/architecture_index.py +185 -0
- package/context/architecture_parser.py +170 -0
- package/context/architecture_slicer.py +308 -0
- package/context/context_router.py +70 -0
- package/context/fallback_evaluator.py +21 -0
- package/coverage_integration/__init__.py +1 -0
- package/coverage_integration/coverage_loader.py +55 -0
- package/debugging/__init__.py +12 -0
- package/debugging/bug_localizer.py +81 -0
- package/debugging/execution_path_analyzer.py +42 -0
- package/embeddings/__init__.py +6 -0
- package/embeddings/encoder.py +45 -0
- package/embeddings/faiss_index.py +72 -0
- package/git_integration/__init__.py +1 -0
- package/git_integration/git_miner.py +78 -0
- package/graphs/__init__.py +17 -0
- package/graphs/call_graph.py +70 -0
- package/graphs/code_graph.py +81 -0
- package/graphs/execution_graph.py +35 -0
- package/graphs/git_graph.py +43 -0
- package/graphs/graph_store.py +25 -0
- package/graphs/node_factory.py +21 -0
- package/graphs/test_graph.py +65 -0
- package/graphs/validators.py +28 -0
- package/graphs/variable_graph.py +51 -0
- package/knowledge_index/__init__.py +1 -0
- package/knowledge_index/index_builder.py +60 -0
- package/knowledge_index/models.py +35 -0
- package/knowledge_index/query_api.py +38 -0
- package/knowledge_index/store.py +23 -0
- package/llm_context/__init__.py +6 -0
- package/llm_context/context_builder.py +67 -0
- package/llm_context/snippet_selector.py +57 -0
- package/package.json +14 -0
- package/parser/__init__.py +18 -0
- package/parser/ast_parser.py +216 -0
- package/parser/call_resolver.py +52 -0
- package/parser/models.py +75 -0
- package/parser/tree_sitter_adapter.py +56 -0
- package/parser/variable_extractor.py +31 -0
- package/retrieval/__init__.py +17 -0
- package/retrieval/cache.py +22 -0
- package/retrieval/hybrid_retriever.py +249 -0
- package/retrieval/query_parser.py +38 -0
- package/retrieval/ranking.py +43 -0
- package/retrieval/semantic_retriever.py +39 -0
- package/retrieval/symbolic_retriever.py +80 -0
- package/scanner/__init__.py +5 -0
- package/scanner/file_filters.py +37 -0
- package/scanner/models.py +44 -0
- package/scanner/repository_scanner.py +55 -0
- package/scripts/bootstrap_from_github.ps1 +41 -0
- package/tracing/__init__.py +1 -0
- package/tracing/runtime_tracer.py +60 -0
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
"""Knowledge index data models."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
@dataclass(frozen=True, slots=True)
|
|
9
|
+
class FunctionIndexEntry:
|
|
10
|
+
name: str
|
|
11
|
+
file: str
|
|
12
|
+
start_line: int
|
|
13
|
+
end_line: int
|
|
14
|
+
parameters: tuple[str, ...]
|
|
15
|
+
variables_read: tuple[str, ...]
|
|
16
|
+
variables_written: tuple[str, ...]
|
|
17
|
+
functions_called: tuple[str, ...]
|
|
18
|
+
docstring: str | None
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@dataclass(frozen=True, slots=True)
|
|
22
|
+
class ClassIndexEntry:
|
|
23
|
+
name: str
|
|
24
|
+
file: str
|
|
25
|
+
methods: tuple[str, ...]
|
|
26
|
+
attributes: tuple[str, ...]
|
|
27
|
+
base_classes: tuple[str, ...]
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@dataclass(frozen=True, slots=True)
|
|
31
|
+
class FileIndexEntry:
|
|
32
|
+
path: str
|
|
33
|
+
imports: tuple[str, ...]
|
|
34
|
+
classes_defined: tuple[str, ...]
|
|
35
|
+
functions_defined: tuple[str, ...]
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
"""Knowledge index query API."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from .models import ClassIndexEntry, FileIndexEntry, FunctionIndexEntry
|
|
6
|
+
from .store import InMemoryKnowledgeStore
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def find_functions_modifying_variable(store: InMemoryKnowledgeStore, variable: str) -> tuple[FunctionIndexEntry, ...]:
|
|
10
|
+
return tuple(
|
|
11
|
+
fn
|
|
12
|
+
for fn in store.functions.values()
|
|
13
|
+
if variable in fn.variables_written
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def find_functions_calling_function(store: InMemoryKnowledgeStore, function_name: str) -> tuple[FunctionIndexEntry, ...]:
|
|
18
|
+
return tuple(
|
|
19
|
+
fn
|
|
20
|
+
for fn in store.functions.values()
|
|
21
|
+
if function_name in fn.functions_called
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def find_files_importing_module(store: InMemoryKnowledgeStore, module_name: str) -> tuple[FileIndexEntry, ...]:
|
|
26
|
+
return tuple(
|
|
27
|
+
file
|
|
28
|
+
for file in store.files.values()
|
|
29
|
+
if any(imp == module_name or imp.startswith(f"{module_name}.") for imp in file.imports)
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def find_classes_inheriting_from(store: InMemoryKnowledgeStore, base_class: str) -> tuple[ClassIndexEntry, ...]:
|
|
34
|
+
return tuple(
|
|
35
|
+
cls
|
|
36
|
+
for cls in store.classes.values()
|
|
37
|
+
if base_class in cls.base_classes
|
|
38
|
+
)
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
"""In-memory storage for knowledge index."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass, field
|
|
6
|
+
|
|
7
|
+
from .models import ClassIndexEntry, FileIndexEntry, FunctionIndexEntry
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@dataclass(slots=True)
|
|
11
|
+
class InMemoryKnowledgeStore:
|
|
12
|
+
functions: dict[str, FunctionIndexEntry] = field(default_factory=dict)
|
|
13
|
+
classes: dict[str, ClassIndexEntry] = field(default_factory=dict)
|
|
14
|
+
files: dict[str, FileIndexEntry] = field(default_factory=dict)
|
|
15
|
+
|
|
16
|
+
def add_function(self, entry: FunctionIndexEntry) -> None:
|
|
17
|
+
self.functions[f"{entry.file}::{entry.name}"] = entry
|
|
18
|
+
|
|
19
|
+
def add_class(self, entry: ClassIndexEntry) -> None:
|
|
20
|
+
self.classes[f"{entry.file}::{entry.name}"] = entry
|
|
21
|
+
|
|
22
|
+
def add_file(self, entry: FileIndexEntry) -> None:
|
|
23
|
+
self.files[entry.path] = entry
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
"""Context builder for minimal LLM prompts."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
|
|
7
|
+
from .snippet_selector import RankedSnippet, estimate_tokens, select_snippets
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@dataclass(frozen=True, slots=True)
|
|
11
|
+
class ContextPayload:
|
|
12
|
+
query: str
|
|
13
|
+
snippets: tuple[RankedSnippet, ...]
|
|
14
|
+
total_tokens_estimate: int
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
_INTENT_BASE = {
|
|
20
|
+
"edit": 300,
|
|
21
|
+
"refactor": 600,
|
|
22
|
+
"debug": 500,
|
|
23
|
+
"explore": 400,
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _detect_intent(query: str) -> str:
|
|
28
|
+
text = query.lower()
|
|
29
|
+
if any(word in text for word in ("refactor", "rewrite", "migrate", "restructure")):
|
|
30
|
+
return "refactor"
|
|
31
|
+
if any(word in text for word in ("debug", "why", "error", "fail", "bug", "trace")):
|
|
32
|
+
return "debug"
|
|
33
|
+
if any(word in text for word in ("add", "change", "update", "extend", "modify", "remove", "rename")):
|
|
34
|
+
return "edit"
|
|
35
|
+
return "explore"
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def _auto_budget(query: str, ranked_snippets: list[RankedSnippet], intent: str) -> int:
|
|
39
|
+
"""Compute a context budget that scales with intent, query, and candidate size."""
|
|
40
|
+
q_tokens = estimate_tokens(query)
|
|
41
|
+
count = len(ranked_snippets)
|
|
42
|
+
|
|
43
|
+
base = _INTENT_BASE.get(intent, 400)
|
|
44
|
+
budget = base + min(300, q_tokens * 10) + min(400, count * 30)
|
|
45
|
+
return max(200, min(1600, budget))
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def build_context(
|
|
49
|
+
query: str,
|
|
50
|
+
ranked_snippets: list[RankedSnippet],
|
|
51
|
+
*,
|
|
52
|
+
token_budget: int | None = 800,
|
|
53
|
+
mandatory_node_ids: set[str] | None = None,
|
|
54
|
+
intent: str | None = None,
|
|
55
|
+
) -> ContextPayload:
|
|
56
|
+
"""Build minimal context payload for LLM usage."""
|
|
57
|
+
if token_budget is None:
|
|
58
|
+
detected_intent = _detect_intent(query) if intent is None else intent
|
|
59
|
+
token_budget = _auto_budget(query, ranked_snippets, detected_intent)
|
|
60
|
+
|
|
61
|
+
selected = select_snippets(
|
|
62
|
+
ranked_snippets,
|
|
63
|
+
token_budget=token_budget,
|
|
64
|
+
mandatory_node_ids=mandatory_node_ids,
|
|
65
|
+
)
|
|
66
|
+
total = sum(estimate_tokens(item.content) for item in selected)
|
|
67
|
+
return ContextPayload(query=query, snippets=selected, total_tokens_estimate=total)
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
"""Snippet selection logic for LLM context packaging."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
@dataclass(frozen=True, slots=True)
|
|
9
|
+
class RankedSnippet:
|
|
10
|
+
node_id: str
|
|
11
|
+
content: str
|
|
12
|
+
score: float
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def estimate_tokens(text: str) -> int:
|
|
16
|
+
"""Cheap token estimate for budget management."""
|
|
17
|
+
return max(1, len(text.split()))
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def select_snippets(
|
|
21
|
+
ranked: list[RankedSnippet],
|
|
22
|
+
*,
|
|
23
|
+
token_budget: int,
|
|
24
|
+
mandatory_node_ids: set[str] | None = None,
|
|
25
|
+
) -> tuple[RankedSnippet, ...]:
|
|
26
|
+
"""Select minimal high-value snippets under token budget."""
|
|
27
|
+
mandatory_node_ids = mandatory_node_ids or set()
|
|
28
|
+
|
|
29
|
+
selected: list[RankedSnippet] = []
|
|
30
|
+
seen_contents: set[str] = set()
|
|
31
|
+
used_tokens = 0
|
|
32
|
+
|
|
33
|
+
# First, include mandatory snippets if possible.
|
|
34
|
+
for item in ranked:
|
|
35
|
+
if item.node_id not in mandatory_node_ids:
|
|
36
|
+
continue
|
|
37
|
+
if item.content in seen_contents:
|
|
38
|
+
continue
|
|
39
|
+
t = estimate_tokens(item.content)
|
|
40
|
+
if used_tokens + t > token_budget:
|
|
41
|
+
continue
|
|
42
|
+
selected.append(item)
|
|
43
|
+
seen_contents.add(item.content)
|
|
44
|
+
used_tokens += t
|
|
45
|
+
|
|
46
|
+
# Then fill with highest score snippets.
|
|
47
|
+
for item in sorted(ranked, key=lambda s: s.score, reverse=True):
|
|
48
|
+
if item.content in seen_contents:
|
|
49
|
+
continue
|
|
50
|
+
t = estimate_tokens(item.content)
|
|
51
|
+
if used_tokens + t > token_budget:
|
|
52
|
+
continue
|
|
53
|
+
selected.append(item)
|
|
54
|
+
seen_contents.add(item.content)
|
|
55
|
+
used_tokens += t
|
|
56
|
+
|
|
57
|
+
return tuple(selected)
|
package/package.json
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@pmaddire/gcie",
|
|
3
|
+
"version": "0.1.2",
|
|
4
|
+
"description": "GraphCode Intelligence Engine one-command setup and context CLI",
|
|
5
|
+
"bin": {
|
|
6
|
+
"gcie": "bin/gcie.js",
|
|
7
|
+
"gcie-init": "bin/gcie-init.js"
|
|
8
|
+
},
|
|
9
|
+
"scripts": {
|
|
10
|
+
"gcie": "node bin/gcie.js",
|
|
11
|
+
"setup": "node bin/gcie-init.js"
|
|
12
|
+
},
|
|
13
|
+
"license": "MIT"
|
|
14
|
+
}
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
"""Parser package for GCIE."""
|
|
2
|
+
|
|
3
|
+
from .ast_parser import parse_python_file
|
|
4
|
+
from .models import (
|
|
5
|
+
ClassEntry,
|
|
6
|
+
FunctionEntry,
|
|
7
|
+
ModuleParseResult,
|
|
8
|
+
VariableAccess,
|
|
9
|
+
)
|
|
10
|
+
|
|
11
|
+
__all__ = [
|
|
12
|
+
"ClassEntry",
|
|
13
|
+
"FunctionEntry",
|
|
14
|
+
"ModuleParseResult",
|
|
15
|
+
"VariableAccess",
|
|
16
|
+
"parse_python_file",
|
|
17
|
+
]
|
|
18
|
+
|
|
@@ -0,0 +1,216 @@
|
|
|
1
|
+
"""AST parser for Python source files."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import ast
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
from .models import (
|
|
9
|
+
AssignmentEntry,
|
|
10
|
+
ClassEntry,
|
|
11
|
+
FunctionEntry,
|
|
12
|
+
ImportEntry,
|
|
13
|
+
ModuleParseResult,
|
|
14
|
+
VariableAccess,
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class _FunctionAnalyzer(ast.NodeVisitor):
|
|
19
|
+
"""Extract variable reads/writes and called function names."""
|
|
20
|
+
|
|
21
|
+
def __init__(self) -> None:
|
|
22
|
+
self.reads: set[str] = set()
|
|
23
|
+
self.writes: set[str] = set()
|
|
24
|
+
self.calls: set[str] = set()
|
|
25
|
+
self.accesses: list[VariableAccess] = []
|
|
26
|
+
|
|
27
|
+
def visit_Name(self, node: ast.Name) -> None:
|
|
28
|
+
if isinstance(node.ctx, ast.Load):
|
|
29
|
+
self.reads.add(node.id)
|
|
30
|
+
self.accesses.append(VariableAccess(name=node.id, line=node.lineno, access_type="read"))
|
|
31
|
+
elif isinstance(node.ctx, ast.Store):
|
|
32
|
+
self.writes.add(node.id)
|
|
33
|
+
self.accesses.append(VariableAccess(name=node.id, line=node.lineno, access_type="write"))
|
|
34
|
+
self.generic_visit(node)
|
|
35
|
+
|
|
36
|
+
def visit_Attribute(self, node: ast.Attribute) -> None:
|
|
37
|
+
dotted = _attribute_name(node)
|
|
38
|
+
if dotted:
|
|
39
|
+
if isinstance(node.ctx, ast.Load):
|
|
40
|
+
self.reads.add(dotted)
|
|
41
|
+
self.accesses.append(VariableAccess(name=dotted, line=node.lineno, access_type="read"))
|
|
42
|
+
elif isinstance(node.ctx, ast.Store):
|
|
43
|
+
self.writes.add(dotted)
|
|
44
|
+
self.accesses.append(VariableAccess(name=dotted, line=node.lineno, access_type="write"))
|
|
45
|
+
self.generic_visit(node)
|
|
46
|
+
|
|
47
|
+
def visit_Call(self, node: ast.Call) -> None:
|
|
48
|
+
call_name = _call_name(node.func)
|
|
49
|
+
if call_name:
|
|
50
|
+
self.calls.add(call_name)
|
|
51
|
+
self.generic_visit(node)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
class _ClassAnalyzer(ast.NodeVisitor):
|
|
55
|
+
"""Extract class-level attributes and method names."""
|
|
56
|
+
|
|
57
|
+
def __init__(self) -> None:
|
|
58
|
+
self.attributes: set[str] = set()
|
|
59
|
+
self.methods: set[str] = set()
|
|
60
|
+
|
|
61
|
+
def visit_FunctionDef(self, node: ast.FunctionDef) -> None: # pragma: no cover - simple dispatch
|
|
62
|
+
self.methods.add(node.name)
|
|
63
|
+
|
|
64
|
+
def visit_AsyncFunctionDef(self, node: ast.AsyncFunctionDef) -> None: # pragma: no cover - simple dispatch
|
|
65
|
+
self.methods.add(node.name)
|
|
66
|
+
|
|
67
|
+
def visit_Assign(self, node: ast.Assign) -> None:
|
|
68
|
+
for target in node.targets:
|
|
69
|
+
for name in _extract_target_names(target):
|
|
70
|
+
self.attributes.add(name)
|
|
71
|
+
self.generic_visit(node)
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def _extract_target_names(target: ast.expr) -> list[str]:
|
|
75
|
+
names: list[str] = []
|
|
76
|
+
if isinstance(target, ast.Name):
|
|
77
|
+
names.append(target.id)
|
|
78
|
+
elif isinstance(target, ast.Attribute):
|
|
79
|
+
dotted = _attribute_name(target)
|
|
80
|
+
if dotted:
|
|
81
|
+
names.append(dotted)
|
|
82
|
+
elif isinstance(target, (ast.Tuple, ast.List)):
|
|
83
|
+
for element in target.elts:
|
|
84
|
+
names.extend(_extract_target_names(element))
|
|
85
|
+
return names
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def _attribute_name(node: ast.Attribute) -> str:
|
|
89
|
+
parts: list[str] = []
|
|
90
|
+
current: ast.AST = node
|
|
91
|
+
while isinstance(current, ast.Attribute):
|
|
92
|
+
parts.append(current.attr)
|
|
93
|
+
current = current.value
|
|
94
|
+
if isinstance(current, ast.Name):
|
|
95
|
+
parts.append(current.id)
|
|
96
|
+
return ".".join(reversed(parts))
|
|
97
|
+
return ""
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def _annotation_to_name(node: ast.expr) -> str:
|
|
101
|
+
if isinstance(node, ast.Name):
|
|
102
|
+
return node.id
|
|
103
|
+
if isinstance(node, ast.Attribute):
|
|
104
|
+
base = _annotation_to_name(node.value)
|
|
105
|
+
return f"{base}.{node.attr}" if base else node.attr
|
|
106
|
+
if isinstance(node, ast.Subscript):
|
|
107
|
+
return _annotation_to_name(node.value)
|
|
108
|
+
return ""
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def _call_name(node: ast.expr) -> str:
|
|
112
|
+
if isinstance(node, ast.Name):
|
|
113
|
+
return node.id
|
|
114
|
+
if isinstance(node, ast.Attribute):
|
|
115
|
+
base = _call_name(node.value)
|
|
116
|
+
return f"{base}.{node.attr}" if base else node.attr
|
|
117
|
+
return ""
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def _end_lineno(node: ast.AST) -> int:
|
|
121
|
+
return getattr(node, "end_lineno", getattr(node, "lineno", 0))
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def parse_python_source(source: str, file: str | Path = "<memory>") -> ModuleParseResult:
|
|
125
|
+
"""Parse Python source into a normalized module parse result."""
|
|
126
|
+
file_path = Path(file)
|
|
127
|
+
try:
|
|
128
|
+
tree = ast.parse(source)
|
|
129
|
+
except SyntaxError as exc:
|
|
130
|
+
return ModuleParseResult(file=file_path, parse_errors=(str(exc),))
|
|
131
|
+
|
|
132
|
+
functions: list[FunctionEntry] = []
|
|
133
|
+
classes: list[ClassEntry] = []
|
|
134
|
+
imports: list[ImportEntry] = []
|
|
135
|
+
assignments: list[AssignmentEntry] = []
|
|
136
|
+
|
|
137
|
+
for node in tree.body:
|
|
138
|
+
if isinstance(node, ast.Import):
|
|
139
|
+
imports.append(
|
|
140
|
+
ImportEntry(
|
|
141
|
+
module="",
|
|
142
|
+
names=tuple(alias.name for alias in node.names),
|
|
143
|
+
line=node.lineno,
|
|
144
|
+
)
|
|
145
|
+
)
|
|
146
|
+
elif isinstance(node, ast.ImportFrom):
|
|
147
|
+
imports.append(
|
|
148
|
+
ImportEntry(
|
|
149
|
+
module=node.module or "",
|
|
150
|
+
names=tuple(alias.name for alias in node.names),
|
|
151
|
+
line=node.lineno,
|
|
152
|
+
)
|
|
153
|
+
)
|
|
154
|
+
elif isinstance(node, ast.Assign):
|
|
155
|
+
for target in node.targets:
|
|
156
|
+
for name in _extract_target_names(target):
|
|
157
|
+
assignments.append(AssignmentEntry(target=name, line=node.lineno))
|
|
158
|
+
elif isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
|
|
159
|
+
analyzer = _FunctionAnalyzer()
|
|
160
|
+
analyzer.visit(node)
|
|
161
|
+
|
|
162
|
+
parameters: list[str] = [arg.arg for arg in node.args.args]
|
|
163
|
+
parameters.extend(arg.arg for arg in node.args.kwonlyargs)
|
|
164
|
+
if node.args.vararg:
|
|
165
|
+
parameters.append(node.args.vararg.arg)
|
|
166
|
+
if node.args.kwarg:
|
|
167
|
+
parameters.append(node.args.kwarg.arg)
|
|
168
|
+
|
|
169
|
+
functions.append(
|
|
170
|
+
FunctionEntry(
|
|
171
|
+
name=node.name,
|
|
172
|
+
file=file_path,
|
|
173
|
+
start_line=node.lineno,
|
|
174
|
+
end_line=_end_lineno(node),
|
|
175
|
+
parameters=tuple(parameters),
|
|
176
|
+
variables_read=tuple(sorted(analyzer.reads)),
|
|
177
|
+
variables_written=tuple(sorted(analyzer.writes)),
|
|
178
|
+
functions_called=tuple(sorted(analyzer.calls)),
|
|
179
|
+
docstring=ast.get_docstring(node),
|
|
180
|
+
accesses=tuple(analyzer.accesses),
|
|
181
|
+
)
|
|
182
|
+
)
|
|
183
|
+
elif isinstance(node, ast.ClassDef):
|
|
184
|
+
class_analyzer = _ClassAnalyzer()
|
|
185
|
+
for body_node in node.body:
|
|
186
|
+
class_analyzer.visit(body_node)
|
|
187
|
+
|
|
188
|
+
classes.append(
|
|
189
|
+
ClassEntry(
|
|
190
|
+
name=node.name,
|
|
191
|
+
file=file_path,
|
|
192
|
+
start_line=node.lineno,
|
|
193
|
+
end_line=_end_lineno(node),
|
|
194
|
+
methods=tuple(sorted(class_analyzer.methods)),
|
|
195
|
+
attributes=tuple(sorted(class_analyzer.attributes)),
|
|
196
|
+
base_classes=tuple(
|
|
197
|
+
sorted(filter(None, (_annotation_to_name(base) for base in node.bases)))
|
|
198
|
+
),
|
|
199
|
+
docstring=ast.get_docstring(node),
|
|
200
|
+
)
|
|
201
|
+
)
|
|
202
|
+
|
|
203
|
+
return ModuleParseResult(
|
|
204
|
+
file=file_path,
|
|
205
|
+
functions=tuple(functions),
|
|
206
|
+
classes=tuple(classes),
|
|
207
|
+
imports=tuple(imports),
|
|
208
|
+
assignments=tuple(assignments),
|
|
209
|
+
)
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
def parse_python_file(path: str | Path) -> ModuleParseResult:
|
|
213
|
+
"""Parse a Python source file from disk."""
|
|
214
|
+
file_path = Path(path)
|
|
215
|
+
source = file_path.read_text(encoding="utf-8")
|
|
216
|
+
return parse_python_source(source, file=file_path)
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
"""Call resolution utilities for call graph generation."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
|
|
7
|
+
from .models import FunctionEntry, ImportEntry, ModuleParseResult
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@dataclass(frozen=True, slots=True)
|
|
11
|
+
class ResolvedCall:
|
|
12
|
+
"""Resolved function call target."""
|
|
13
|
+
|
|
14
|
+
caller: str
|
|
15
|
+
callee: str
|
|
16
|
+
resolved: bool
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def _import_alias_map(imports: tuple[ImportEntry, ...]) -> dict[str, str]:
|
|
20
|
+
mapping: dict[str, str] = {}
|
|
21
|
+
for entry in imports:
|
|
22
|
+
if entry.module:
|
|
23
|
+
for name in entry.names:
|
|
24
|
+
mapping[name] = f"{entry.module}.{name}"
|
|
25
|
+
else:
|
|
26
|
+
for name in entry.names:
|
|
27
|
+
mapping[name] = name
|
|
28
|
+
return mapping
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def resolve_calls(module: ModuleParseResult) -> tuple[ResolvedCall, ...]:
|
|
32
|
+
"""Resolve function call strings using local symbols and import aliases."""
|
|
33
|
+
local_funcs = {fn.name for fn in module.functions}
|
|
34
|
+
aliases = _import_alias_map(module.imports)
|
|
35
|
+
|
|
36
|
+
resolved_calls: list[ResolvedCall] = []
|
|
37
|
+
for fn in module.functions:
|
|
38
|
+
for called in fn.functions_called:
|
|
39
|
+
if called in local_funcs:
|
|
40
|
+
resolved_calls.append(ResolvedCall(caller=fn.name, callee=called, resolved=True))
|
|
41
|
+
continue
|
|
42
|
+
|
|
43
|
+
head = called.split(".", 1)[0]
|
|
44
|
+
if head in aliases:
|
|
45
|
+
tail = called[len(head) + 1 :] if called.startswith(f"{head}.") else ""
|
|
46
|
+
normalized = aliases[head]
|
|
47
|
+
callee = f"{normalized}.{tail}" if tail else normalized
|
|
48
|
+
resolved_calls.append(ResolvedCall(caller=fn.name, callee=callee, resolved=False))
|
|
49
|
+
else:
|
|
50
|
+
resolved_calls.append(ResolvedCall(caller=fn.name, callee=called, resolved=False))
|
|
51
|
+
|
|
52
|
+
return tuple(resolved_calls)
|
package/parser/models.py
ADDED
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
"""Intermediate representation models emitted by the parser."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass, field
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@dataclass(frozen=True, slots=True)
|
|
10
|
+
class VariableAccess:
|
|
11
|
+
"""Represents a variable read/write access in function scope."""
|
|
12
|
+
|
|
13
|
+
name: str
|
|
14
|
+
line: int
|
|
15
|
+
access_type: str # "read" | "write"
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@dataclass(frozen=True, slots=True)
|
|
19
|
+
class FunctionEntry:
|
|
20
|
+
"""Represents parsed metadata for a function."""
|
|
21
|
+
|
|
22
|
+
name: str
|
|
23
|
+
file: Path
|
|
24
|
+
start_line: int
|
|
25
|
+
end_line: int
|
|
26
|
+
parameters: tuple[str, ...]
|
|
27
|
+
variables_read: tuple[str, ...]
|
|
28
|
+
variables_written: tuple[str, ...]
|
|
29
|
+
functions_called: tuple[str, ...]
|
|
30
|
+
docstring: str | None
|
|
31
|
+
accesses: tuple[VariableAccess, ...] = ()
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
@dataclass(frozen=True, slots=True)
|
|
35
|
+
class ClassEntry:
|
|
36
|
+
"""Represents parsed metadata for a class."""
|
|
37
|
+
|
|
38
|
+
name: str
|
|
39
|
+
file: Path
|
|
40
|
+
start_line: int
|
|
41
|
+
end_line: int
|
|
42
|
+
methods: tuple[str, ...]
|
|
43
|
+
attributes: tuple[str, ...]
|
|
44
|
+
base_classes: tuple[str, ...]
|
|
45
|
+
docstring: str | None
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
@dataclass(frozen=True, slots=True)
|
|
49
|
+
class ImportEntry:
|
|
50
|
+
"""Represents an import statement."""
|
|
51
|
+
|
|
52
|
+
module: str
|
|
53
|
+
names: tuple[str, ...]
|
|
54
|
+
line: int
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
@dataclass(frozen=True, slots=True)
|
|
58
|
+
class AssignmentEntry:
|
|
59
|
+
"""Represents an assignment target at module scope."""
|
|
60
|
+
|
|
61
|
+
target: str
|
|
62
|
+
line: int
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
@dataclass(slots=True)
|
|
66
|
+
class ModuleParseResult:
|
|
67
|
+
"""Top-level parse result for a Python module."""
|
|
68
|
+
|
|
69
|
+
file: Path
|
|
70
|
+
functions: tuple[FunctionEntry, ...] = ()
|
|
71
|
+
classes: tuple[ClassEntry, ...] = ()
|
|
72
|
+
imports: tuple[ImportEntry, ...] = ()
|
|
73
|
+
assignments: tuple[AssignmentEntry, ...] = ()
|
|
74
|
+
parse_errors: tuple[str, ...] = ()
|
|
75
|
+
metadata: dict[str, str] = field(default_factory=dict)
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
"""Tree-sitter adapter contract and graceful fallback for parsing."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Protocol
|
|
8
|
+
|
|
9
|
+
from .ast_parser import parse_python_file
|
|
10
|
+
from .models import ModuleParseResult
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class TreeSitterParser(Protocol):
|
|
14
|
+
"""Protocol for a tree-sitter-backed parser implementation."""
|
|
15
|
+
|
|
16
|
+
def parse_file(self, path: Path) -> ModuleParseResult:
|
|
17
|
+
"""Parse a file and return module parse result."""
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@dataclass(slots=True)
|
|
21
|
+
class ParserFallbackResult:
|
|
22
|
+
"""Result with parser backend metadata."""
|
|
23
|
+
|
|
24
|
+
result: ModuleParseResult
|
|
25
|
+
backend: str
|
|
26
|
+
fallback_reason: str | None = None
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def parse_with_fallback(path: str | Path, tree_sitter: TreeSitterParser | None = None) -> ParserFallbackResult:
|
|
30
|
+
"""Use tree-sitter when available, otherwise fallback to stdlib AST parser."""
|
|
31
|
+
file_path = Path(path)
|
|
32
|
+
|
|
33
|
+
if tree_sitter is None:
|
|
34
|
+
return ParserFallbackResult(
|
|
35
|
+
result=parse_python_file(file_path),
|
|
36
|
+
backend="ast",
|
|
37
|
+
fallback_reason="tree_sitter_unavailable",
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
try:
|
|
41
|
+
ts_result = tree_sitter.parse_file(file_path)
|
|
42
|
+
except Exception as exc: # pragma: no cover - defensive fallback
|
|
43
|
+
return ParserFallbackResult(
|
|
44
|
+
result=parse_python_file(file_path),
|
|
45
|
+
backend="ast",
|
|
46
|
+
fallback_reason=f"tree_sitter_failed:{exc.__class__.__name__}",
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
if ts_result.parse_errors:
|
|
50
|
+
return ParserFallbackResult(
|
|
51
|
+
result=parse_python_file(file_path),
|
|
52
|
+
backend="ast",
|
|
53
|
+
fallback_reason="tree_sitter_partial_or_error",
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
return ParserFallbackResult(result=ts_result, backend="tree_sitter")
|