@pmaddire/gcie 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. package/AGENT.md +256 -0
  2. package/AGENT_USAGE.md +231 -0
  3. package/ARCHITECTURE.md +151 -0
  4. package/CLAUDE.md +69 -0
  5. package/DEBUGGING_PLAYBOOK.md +160 -0
  6. package/KNOWLEDGE_INDEX.md +154 -0
  7. package/POTENTIAL_UPDATES +130 -0
  8. package/PROJECT.md +141 -0
  9. package/README.md +371 -0
  10. package/REPO_DIGITAL_TWIN.md +98 -0
  11. package/ROADMAP.md +301 -0
  12. package/SETUP_ANY_REPO.md +85 -0
  13. package/bin/gcie-init.js +20 -0
  14. package/bin/gcie.js +45 -0
  15. package/cli/__init__.py +1 -0
  16. package/cli/app.py +163 -0
  17. package/cli/commands/__init__.py +1 -0
  18. package/cli/commands/cache.py +35 -0
  19. package/cli/commands/context.py +2426 -0
  20. package/cli/commands/context_slices.py +617 -0
  21. package/cli/commands/debug.py +24 -0
  22. package/cli/commands/index.py +17 -0
  23. package/cli/commands/query.py +20 -0
  24. package/cli/commands/setup.py +73 -0
  25. package/config/__init__.py +1 -0
  26. package/config/scanner_config.py +82 -0
  27. package/context/__init__.py +1 -0
  28. package/context/architecture_bootstrap.py +170 -0
  29. package/context/architecture_index.py +185 -0
  30. package/context/architecture_parser.py +170 -0
  31. package/context/architecture_slicer.py +308 -0
  32. package/context/context_router.py +70 -0
  33. package/context/fallback_evaluator.py +21 -0
  34. package/coverage_integration/__init__.py +1 -0
  35. package/coverage_integration/coverage_loader.py +55 -0
  36. package/debugging/__init__.py +12 -0
  37. package/debugging/bug_localizer.py +81 -0
  38. package/debugging/execution_path_analyzer.py +42 -0
  39. package/embeddings/__init__.py +6 -0
  40. package/embeddings/encoder.py +45 -0
  41. package/embeddings/faiss_index.py +72 -0
  42. package/git_integration/__init__.py +1 -0
  43. package/git_integration/git_miner.py +78 -0
  44. package/graphs/__init__.py +17 -0
  45. package/graphs/call_graph.py +70 -0
  46. package/graphs/code_graph.py +81 -0
  47. package/graphs/execution_graph.py +35 -0
  48. package/graphs/git_graph.py +43 -0
  49. package/graphs/graph_store.py +25 -0
  50. package/graphs/node_factory.py +21 -0
  51. package/graphs/test_graph.py +65 -0
  52. package/graphs/validators.py +28 -0
  53. package/graphs/variable_graph.py +51 -0
  54. package/knowledge_index/__init__.py +1 -0
  55. package/knowledge_index/index_builder.py +60 -0
  56. package/knowledge_index/models.py +35 -0
  57. package/knowledge_index/query_api.py +38 -0
  58. package/knowledge_index/store.py +23 -0
  59. package/llm_context/__init__.py +6 -0
  60. package/llm_context/context_builder.py +67 -0
  61. package/llm_context/snippet_selector.py +57 -0
  62. package/package.json +14 -0
  63. package/parser/__init__.py +18 -0
  64. package/parser/ast_parser.py +216 -0
  65. package/parser/call_resolver.py +52 -0
  66. package/parser/models.py +75 -0
  67. package/parser/tree_sitter_adapter.py +56 -0
  68. package/parser/variable_extractor.py +31 -0
  69. package/retrieval/__init__.py +17 -0
  70. package/retrieval/cache.py +22 -0
  71. package/retrieval/hybrid_retriever.py +249 -0
  72. package/retrieval/query_parser.py +38 -0
  73. package/retrieval/ranking.py +43 -0
  74. package/retrieval/semantic_retriever.py +39 -0
  75. package/retrieval/symbolic_retriever.py +80 -0
  76. package/scanner/__init__.py +5 -0
  77. package/scanner/file_filters.py +37 -0
  78. package/scanner/models.py +44 -0
  79. package/scanner/repository_scanner.py +55 -0
  80. package/scripts/bootstrap_from_github.ps1 +41 -0
  81. package/tracing/__init__.py +1 -0
  82. package/tracing/runtime_tracer.py +60 -0
@@ -0,0 +1,35 @@
1
+ """Knowledge index data models."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass
6
+
7
+
8
+ @dataclass(frozen=True, slots=True)
9
+ class FunctionIndexEntry:
10
+ name: str
11
+ file: str
12
+ start_line: int
13
+ end_line: int
14
+ parameters: tuple[str, ...]
15
+ variables_read: tuple[str, ...]
16
+ variables_written: tuple[str, ...]
17
+ functions_called: tuple[str, ...]
18
+ docstring: str | None
19
+
20
+
21
+ @dataclass(frozen=True, slots=True)
22
+ class ClassIndexEntry:
23
+ name: str
24
+ file: str
25
+ methods: tuple[str, ...]
26
+ attributes: tuple[str, ...]
27
+ base_classes: tuple[str, ...]
28
+
29
+
30
+ @dataclass(frozen=True, slots=True)
31
+ class FileIndexEntry:
32
+ path: str
33
+ imports: tuple[str, ...]
34
+ classes_defined: tuple[str, ...]
35
+ functions_defined: tuple[str, ...]
@@ -0,0 +1,38 @@
1
+ """Knowledge index query API."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from .models import ClassIndexEntry, FileIndexEntry, FunctionIndexEntry
6
+ from .store import InMemoryKnowledgeStore
7
+
8
+
9
+ def find_functions_modifying_variable(store: InMemoryKnowledgeStore, variable: str) -> tuple[FunctionIndexEntry, ...]:
10
+ return tuple(
11
+ fn
12
+ for fn in store.functions.values()
13
+ if variable in fn.variables_written
14
+ )
15
+
16
+
17
+ def find_functions_calling_function(store: InMemoryKnowledgeStore, function_name: str) -> tuple[FunctionIndexEntry, ...]:
18
+ return tuple(
19
+ fn
20
+ for fn in store.functions.values()
21
+ if function_name in fn.functions_called
22
+ )
23
+
24
+
25
+ def find_files_importing_module(store: InMemoryKnowledgeStore, module_name: str) -> tuple[FileIndexEntry, ...]:
26
+ return tuple(
27
+ file
28
+ for file in store.files.values()
29
+ if any(imp == module_name or imp.startswith(f"{module_name}.") for imp in file.imports)
30
+ )
31
+
32
+
33
+ def find_classes_inheriting_from(store: InMemoryKnowledgeStore, base_class: str) -> tuple[ClassIndexEntry, ...]:
34
+ return tuple(
35
+ cls
36
+ for cls in store.classes.values()
37
+ if base_class in cls.base_classes
38
+ )
@@ -0,0 +1,23 @@
1
+ """In-memory storage for knowledge index."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass, field
6
+
7
+ from .models import ClassIndexEntry, FileIndexEntry, FunctionIndexEntry
8
+
9
+
10
+ @dataclass(slots=True)
11
+ class InMemoryKnowledgeStore:
12
+ functions: dict[str, FunctionIndexEntry] = field(default_factory=dict)
13
+ classes: dict[str, ClassIndexEntry] = field(default_factory=dict)
14
+ files: dict[str, FileIndexEntry] = field(default_factory=dict)
15
+
16
+ def add_function(self, entry: FunctionIndexEntry) -> None:
17
+ self.functions[f"{entry.file}::{entry.name}"] = entry
18
+
19
+ def add_class(self, entry: ClassIndexEntry) -> None:
20
+ self.classes[f"{entry.file}::{entry.name}"] = entry
21
+
22
+ def add_file(self, entry: FileIndexEntry) -> None:
23
+ self.files[entry.path] = entry
@@ -0,0 +1,6 @@
1
+ """LLM context package."""
2
+
3
+ from .context_builder import ContextPayload, build_context
4
+ from .snippet_selector import RankedSnippet, select_snippets
5
+
6
+ __all__ = ["ContextPayload", "RankedSnippet", "build_context", "select_snippets"]
@@ -0,0 +1,67 @@
1
+ """Context builder for minimal LLM prompts."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass
6
+
7
+ from .snippet_selector import RankedSnippet, estimate_tokens, select_snippets
8
+
9
+
10
+ @dataclass(frozen=True, slots=True)
11
+ class ContextPayload:
12
+ query: str
13
+ snippets: tuple[RankedSnippet, ...]
14
+ total_tokens_estimate: int
15
+
16
+
17
+
18
+
19
+ _INTENT_BASE = {
20
+ "edit": 300,
21
+ "refactor": 600,
22
+ "debug": 500,
23
+ "explore": 400,
24
+ }
25
+
26
+
27
+ def _detect_intent(query: str) -> str:
28
+ text = query.lower()
29
+ if any(word in text for word in ("refactor", "rewrite", "migrate", "restructure")):
30
+ return "refactor"
31
+ if any(word in text for word in ("debug", "why", "error", "fail", "bug", "trace")):
32
+ return "debug"
33
+ if any(word in text for word in ("add", "change", "update", "extend", "modify", "remove", "rename")):
34
+ return "edit"
35
+ return "explore"
36
+
37
+
38
+ def _auto_budget(query: str, ranked_snippets: list[RankedSnippet], intent: str) -> int:
39
+ """Compute a context budget that scales with intent, query, and candidate size."""
40
+ q_tokens = estimate_tokens(query)
41
+ count = len(ranked_snippets)
42
+
43
+ base = _INTENT_BASE.get(intent, 400)
44
+ budget = base + min(300, q_tokens * 10) + min(400, count * 30)
45
+ return max(200, min(1600, budget))
46
+
47
+
48
+ def build_context(
49
+ query: str,
50
+ ranked_snippets: list[RankedSnippet],
51
+ *,
52
+ token_budget: int | None = 800,
53
+ mandatory_node_ids: set[str] | None = None,
54
+ intent: str | None = None,
55
+ ) -> ContextPayload:
56
+ """Build minimal context payload for LLM usage."""
57
+ if token_budget is None:
58
+ detected_intent = _detect_intent(query) if intent is None else intent
59
+ token_budget = _auto_budget(query, ranked_snippets, detected_intent)
60
+
61
+ selected = select_snippets(
62
+ ranked_snippets,
63
+ token_budget=token_budget,
64
+ mandatory_node_ids=mandatory_node_ids,
65
+ )
66
+ total = sum(estimate_tokens(item.content) for item in selected)
67
+ return ContextPayload(query=query, snippets=selected, total_tokens_estimate=total)
@@ -0,0 +1,57 @@
1
+ """Snippet selection logic for LLM context packaging."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass
6
+
7
+
8
+ @dataclass(frozen=True, slots=True)
9
+ class RankedSnippet:
10
+ node_id: str
11
+ content: str
12
+ score: float
13
+
14
+
15
+ def estimate_tokens(text: str) -> int:
16
+ """Cheap token estimate for budget management."""
17
+ return max(1, len(text.split()))
18
+
19
+
20
+ def select_snippets(
21
+ ranked: list[RankedSnippet],
22
+ *,
23
+ token_budget: int,
24
+ mandatory_node_ids: set[str] | None = None,
25
+ ) -> tuple[RankedSnippet, ...]:
26
+ """Select minimal high-value snippets under token budget."""
27
+ mandatory_node_ids = mandatory_node_ids or set()
28
+
29
+ selected: list[RankedSnippet] = []
30
+ seen_contents: set[str] = set()
31
+ used_tokens = 0
32
+
33
+ # First, include mandatory snippets if possible.
34
+ for item in ranked:
35
+ if item.node_id not in mandatory_node_ids:
36
+ continue
37
+ if item.content in seen_contents:
38
+ continue
39
+ t = estimate_tokens(item.content)
40
+ if used_tokens + t > token_budget:
41
+ continue
42
+ selected.append(item)
43
+ seen_contents.add(item.content)
44
+ used_tokens += t
45
+
46
+ # Then fill with highest score snippets.
47
+ for item in sorted(ranked, key=lambda s: s.score, reverse=True):
48
+ if item.content in seen_contents:
49
+ continue
50
+ t = estimate_tokens(item.content)
51
+ if used_tokens + t > token_budget:
52
+ continue
53
+ selected.append(item)
54
+ seen_contents.add(item.content)
55
+ used_tokens += t
56
+
57
+ return tuple(selected)
package/package.json ADDED
@@ -0,0 +1,14 @@
1
+ {
2
+ "name": "@pmaddire/gcie",
3
+ "version": "0.1.2",
4
+ "description": "GraphCode Intelligence Engine one-command setup and context CLI",
5
+ "bin": {
6
+ "gcie": "bin/gcie.js",
7
+ "gcie-init": "bin/gcie-init.js"
8
+ },
9
+ "scripts": {
10
+ "gcie": "node bin/gcie.js",
11
+ "setup": "node bin/gcie-init.js"
12
+ },
13
+ "license": "MIT"
14
+ }
@@ -0,0 +1,18 @@
1
+ """Parser package for GCIE."""
2
+
3
+ from .ast_parser import parse_python_file
4
+ from .models import (
5
+ ClassEntry,
6
+ FunctionEntry,
7
+ ModuleParseResult,
8
+ VariableAccess,
9
+ )
10
+
11
+ __all__ = [
12
+ "ClassEntry",
13
+ "FunctionEntry",
14
+ "ModuleParseResult",
15
+ "VariableAccess",
16
+ "parse_python_file",
17
+ ]
18
+
@@ -0,0 +1,216 @@
1
+ """AST parser for Python source files."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import ast
6
+ from pathlib import Path
7
+
8
+ from .models import (
9
+ AssignmentEntry,
10
+ ClassEntry,
11
+ FunctionEntry,
12
+ ImportEntry,
13
+ ModuleParseResult,
14
+ VariableAccess,
15
+ )
16
+
17
+
18
+ class _FunctionAnalyzer(ast.NodeVisitor):
19
+ """Extract variable reads/writes and called function names."""
20
+
21
+ def __init__(self) -> None:
22
+ self.reads: set[str] = set()
23
+ self.writes: set[str] = set()
24
+ self.calls: set[str] = set()
25
+ self.accesses: list[VariableAccess] = []
26
+
27
+ def visit_Name(self, node: ast.Name) -> None:
28
+ if isinstance(node.ctx, ast.Load):
29
+ self.reads.add(node.id)
30
+ self.accesses.append(VariableAccess(name=node.id, line=node.lineno, access_type="read"))
31
+ elif isinstance(node.ctx, ast.Store):
32
+ self.writes.add(node.id)
33
+ self.accesses.append(VariableAccess(name=node.id, line=node.lineno, access_type="write"))
34
+ self.generic_visit(node)
35
+
36
+ def visit_Attribute(self, node: ast.Attribute) -> None:
37
+ dotted = _attribute_name(node)
38
+ if dotted:
39
+ if isinstance(node.ctx, ast.Load):
40
+ self.reads.add(dotted)
41
+ self.accesses.append(VariableAccess(name=dotted, line=node.lineno, access_type="read"))
42
+ elif isinstance(node.ctx, ast.Store):
43
+ self.writes.add(dotted)
44
+ self.accesses.append(VariableAccess(name=dotted, line=node.lineno, access_type="write"))
45
+ self.generic_visit(node)
46
+
47
+ def visit_Call(self, node: ast.Call) -> None:
48
+ call_name = _call_name(node.func)
49
+ if call_name:
50
+ self.calls.add(call_name)
51
+ self.generic_visit(node)
52
+
53
+
54
+ class _ClassAnalyzer(ast.NodeVisitor):
55
+ """Extract class-level attributes and method names."""
56
+
57
+ def __init__(self) -> None:
58
+ self.attributes: set[str] = set()
59
+ self.methods: set[str] = set()
60
+
61
+ def visit_FunctionDef(self, node: ast.FunctionDef) -> None: # pragma: no cover - simple dispatch
62
+ self.methods.add(node.name)
63
+
64
+ def visit_AsyncFunctionDef(self, node: ast.AsyncFunctionDef) -> None: # pragma: no cover - simple dispatch
65
+ self.methods.add(node.name)
66
+
67
+ def visit_Assign(self, node: ast.Assign) -> None:
68
+ for target in node.targets:
69
+ for name in _extract_target_names(target):
70
+ self.attributes.add(name)
71
+ self.generic_visit(node)
72
+
73
+
74
+ def _extract_target_names(target: ast.expr) -> list[str]:
75
+ names: list[str] = []
76
+ if isinstance(target, ast.Name):
77
+ names.append(target.id)
78
+ elif isinstance(target, ast.Attribute):
79
+ dotted = _attribute_name(target)
80
+ if dotted:
81
+ names.append(dotted)
82
+ elif isinstance(target, (ast.Tuple, ast.List)):
83
+ for element in target.elts:
84
+ names.extend(_extract_target_names(element))
85
+ return names
86
+
87
+
88
+ def _attribute_name(node: ast.Attribute) -> str:
89
+ parts: list[str] = []
90
+ current: ast.AST = node
91
+ while isinstance(current, ast.Attribute):
92
+ parts.append(current.attr)
93
+ current = current.value
94
+ if isinstance(current, ast.Name):
95
+ parts.append(current.id)
96
+ return ".".join(reversed(parts))
97
+ return ""
98
+
99
+
100
+ def _annotation_to_name(node: ast.expr) -> str:
101
+ if isinstance(node, ast.Name):
102
+ return node.id
103
+ if isinstance(node, ast.Attribute):
104
+ base = _annotation_to_name(node.value)
105
+ return f"{base}.{node.attr}" if base else node.attr
106
+ if isinstance(node, ast.Subscript):
107
+ return _annotation_to_name(node.value)
108
+ return ""
109
+
110
+
111
+ def _call_name(node: ast.expr) -> str:
112
+ if isinstance(node, ast.Name):
113
+ return node.id
114
+ if isinstance(node, ast.Attribute):
115
+ base = _call_name(node.value)
116
+ return f"{base}.{node.attr}" if base else node.attr
117
+ return ""
118
+
119
+
120
+ def _end_lineno(node: ast.AST) -> int:
121
+ return getattr(node, "end_lineno", getattr(node, "lineno", 0))
122
+
123
+
124
+ def parse_python_source(source: str, file: str | Path = "<memory>") -> ModuleParseResult:
125
+ """Parse Python source into a normalized module parse result."""
126
+ file_path = Path(file)
127
+ try:
128
+ tree = ast.parse(source)
129
+ except SyntaxError as exc:
130
+ return ModuleParseResult(file=file_path, parse_errors=(str(exc),))
131
+
132
+ functions: list[FunctionEntry] = []
133
+ classes: list[ClassEntry] = []
134
+ imports: list[ImportEntry] = []
135
+ assignments: list[AssignmentEntry] = []
136
+
137
+ for node in tree.body:
138
+ if isinstance(node, ast.Import):
139
+ imports.append(
140
+ ImportEntry(
141
+ module="",
142
+ names=tuple(alias.name for alias in node.names),
143
+ line=node.lineno,
144
+ )
145
+ )
146
+ elif isinstance(node, ast.ImportFrom):
147
+ imports.append(
148
+ ImportEntry(
149
+ module=node.module or "",
150
+ names=tuple(alias.name for alias in node.names),
151
+ line=node.lineno,
152
+ )
153
+ )
154
+ elif isinstance(node, ast.Assign):
155
+ for target in node.targets:
156
+ for name in _extract_target_names(target):
157
+ assignments.append(AssignmentEntry(target=name, line=node.lineno))
158
+ elif isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
159
+ analyzer = _FunctionAnalyzer()
160
+ analyzer.visit(node)
161
+
162
+ parameters: list[str] = [arg.arg for arg in node.args.args]
163
+ parameters.extend(arg.arg for arg in node.args.kwonlyargs)
164
+ if node.args.vararg:
165
+ parameters.append(node.args.vararg.arg)
166
+ if node.args.kwarg:
167
+ parameters.append(node.args.kwarg.arg)
168
+
169
+ functions.append(
170
+ FunctionEntry(
171
+ name=node.name,
172
+ file=file_path,
173
+ start_line=node.lineno,
174
+ end_line=_end_lineno(node),
175
+ parameters=tuple(parameters),
176
+ variables_read=tuple(sorted(analyzer.reads)),
177
+ variables_written=tuple(sorted(analyzer.writes)),
178
+ functions_called=tuple(sorted(analyzer.calls)),
179
+ docstring=ast.get_docstring(node),
180
+ accesses=tuple(analyzer.accesses),
181
+ )
182
+ )
183
+ elif isinstance(node, ast.ClassDef):
184
+ class_analyzer = _ClassAnalyzer()
185
+ for body_node in node.body:
186
+ class_analyzer.visit(body_node)
187
+
188
+ classes.append(
189
+ ClassEntry(
190
+ name=node.name,
191
+ file=file_path,
192
+ start_line=node.lineno,
193
+ end_line=_end_lineno(node),
194
+ methods=tuple(sorted(class_analyzer.methods)),
195
+ attributes=tuple(sorted(class_analyzer.attributes)),
196
+ base_classes=tuple(
197
+ sorted(filter(None, (_annotation_to_name(base) for base in node.bases)))
198
+ ),
199
+ docstring=ast.get_docstring(node),
200
+ )
201
+ )
202
+
203
+ return ModuleParseResult(
204
+ file=file_path,
205
+ functions=tuple(functions),
206
+ classes=tuple(classes),
207
+ imports=tuple(imports),
208
+ assignments=tuple(assignments),
209
+ )
210
+
211
+
212
+ def parse_python_file(path: str | Path) -> ModuleParseResult:
213
+ """Parse a Python source file from disk."""
214
+ file_path = Path(path)
215
+ source = file_path.read_text(encoding="utf-8")
216
+ return parse_python_source(source, file=file_path)
@@ -0,0 +1,52 @@
1
+ """Call resolution utilities for call graph generation."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass
6
+
7
+ from .models import FunctionEntry, ImportEntry, ModuleParseResult
8
+
9
+
10
+ @dataclass(frozen=True, slots=True)
11
+ class ResolvedCall:
12
+ """Resolved function call target."""
13
+
14
+ caller: str
15
+ callee: str
16
+ resolved: bool
17
+
18
+
19
+ def _import_alias_map(imports: tuple[ImportEntry, ...]) -> dict[str, str]:
20
+ mapping: dict[str, str] = {}
21
+ for entry in imports:
22
+ if entry.module:
23
+ for name in entry.names:
24
+ mapping[name] = f"{entry.module}.{name}"
25
+ else:
26
+ for name in entry.names:
27
+ mapping[name] = name
28
+ return mapping
29
+
30
+
31
+ def resolve_calls(module: ModuleParseResult) -> tuple[ResolvedCall, ...]:
32
+ """Resolve function call strings using local symbols and import aliases."""
33
+ local_funcs = {fn.name for fn in module.functions}
34
+ aliases = _import_alias_map(module.imports)
35
+
36
+ resolved_calls: list[ResolvedCall] = []
37
+ for fn in module.functions:
38
+ for called in fn.functions_called:
39
+ if called in local_funcs:
40
+ resolved_calls.append(ResolvedCall(caller=fn.name, callee=called, resolved=True))
41
+ continue
42
+
43
+ head = called.split(".", 1)[0]
44
+ if head in aliases:
45
+ tail = called[len(head) + 1 :] if called.startswith(f"{head}.") else ""
46
+ normalized = aliases[head]
47
+ callee = f"{normalized}.{tail}" if tail else normalized
48
+ resolved_calls.append(ResolvedCall(caller=fn.name, callee=callee, resolved=False))
49
+ else:
50
+ resolved_calls.append(ResolvedCall(caller=fn.name, callee=called, resolved=False))
51
+
52
+ return tuple(resolved_calls)
@@ -0,0 +1,75 @@
1
+ """Intermediate representation models emitted by the parser."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass, field
6
+ from pathlib import Path
7
+
8
+
9
+ @dataclass(frozen=True, slots=True)
10
+ class VariableAccess:
11
+ """Represents a variable read/write access in function scope."""
12
+
13
+ name: str
14
+ line: int
15
+ access_type: str # "read" | "write"
16
+
17
+
18
+ @dataclass(frozen=True, slots=True)
19
+ class FunctionEntry:
20
+ """Represents parsed metadata for a function."""
21
+
22
+ name: str
23
+ file: Path
24
+ start_line: int
25
+ end_line: int
26
+ parameters: tuple[str, ...]
27
+ variables_read: tuple[str, ...]
28
+ variables_written: tuple[str, ...]
29
+ functions_called: tuple[str, ...]
30
+ docstring: str | None
31
+ accesses: tuple[VariableAccess, ...] = ()
32
+
33
+
34
+ @dataclass(frozen=True, slots=True)
35
+ class ClassEntry:
36
+ """Represents parsed metadata for a class."""
37
+
38
+ name: str
39
+ file: Path
40
+ start_line: int
41
+ end_line: int
42
+ methods: tuple[str, ...]
43
+ attributes: tuple[str, ...]
44
+ base_classes: tuple[str, ...]
45
+ docstring: str | None
46
+
47
+
48
+ @dataclass(frozen=True, slots=True)
49
+ class ImportEntry:
50
+ """Represents an import statement."""
51
+
52
+ module: str
53
+ names: tuple[str, ...]
54
+ line: int
55
+
56
+
57
+ @dataclass(frozen=True, slots=True)
58
+ class AssignmentEntry:
59
+ """Represents an assignment target at module scope."""
60
+
61
+ target: str
62
+ line: int
63
+
64
+
65
+ @dataclass(slots=True)
66
+ class ModuleParseResult:
67
+ """Top-level parse result for a Python module."""
68
+
69
+ file: Path
70
+ functions: tuple[FunctionEntry, ...] = ()
71
+ classes: tuple[ClassEntry, ...] = ()
72
+ imports: tuple[ImportEntry, ...] = ()
73
+ assignments: tuple[AssignmentEntry, ...] = ()
74
+ parse_errors: tuple[str, ...] = ()
75
+ metadata: dict[str, str] = field(default_factory=dict)
@@ -0,0 +1,56 @@
1
+ """Tree-sitter adapter contract and graceful fallback for parsing."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass
6
+ from pathlib import Path
7
+ from typing import Protocol
8
+
9
+ from .ast_parser import parse_python_file
10
+ from .models import ModuleParseResult
11
+
12
+
13
+ class TreeSitterParser(Protocol):
14
+ """Protocol for a tree-sitter-backed parser implementation."""
15
+
16
+ def parse_file(self, path: Path) -> ModuleParseResult:
17
+ """Parse a file and return module parse result."""
18
+
19
+
20
+ @dataclass(slots=True)
21
+ class ParserFallbackResult:
22
+ """Result with parser backend metadata."""
23
+
24
+ result: ModuleParseResult
25
+ backend: str
26
+ fallback_reason: str | None = None
27
+
28
+
29
+ def parse_with_fallback(path: str | Path, tree_sitter: TreeSitterParser | None = None) -> ParserFallbackResult:
30
+ """Use tree-sitter when available, otherwise fallback to stdlib AST parser."""
31
+ file_path = Path(path)
32
+
33
+ if tree_sitter is None:
34
+ return ParserFallbackResult(
35
+ result=parse_python_file(file_path),
36
+ backend="ast",
37
+ fallback_reason="tree_sitter_unavailable",
38
+ )
39
+
40
+ try:
41
+ ts_result = tree_sitter.parse_file(file_path)
42
+ except Exception as exc: # pragma: no cover - defensive fallback
43
+ return ParserFallbackResult(
44
+ result=parse_python_file(file_path),
45
+ backend="ast",
46
+ fallback_reason=f"tree_sitter_failed:{exc.__class__.__name__}",
47
+ )
48
+
49
+ if ts_result.parse_errors:
50
+ return ParserFallbackResult(
51
+ result=parse_python_file(file_path),
52
+ backend="ast",
53
+ fallback_reason="tree_sitter_partial_or_error",
54
+ )
55
+
56
+ return ParserFallbackResult(result=ts_result, backend="tree_sitter")