codegraph-gen 1.0.0__tar.gz → 1.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {codegraph_gen-1.0.0 → codegraph_gen-1.1.0}/PKG-INFO +1 -1
- {codegraph_gen-1.0.0 → codegraph_gen-1.1.0}/pyproject.toml +1 -1
- {codegraph_gen-1.0.0 → codegraph_gen-1.1.0}/src/codegraph_gen/__main__.py +3 -3
- codegraph_gen-1.1.0/src/codegraph_gen/builder.py +27 -0
- {codegraph_gen-1.0.0 → codegraph_gen-1.1.0}/src/codegraph_gen/config.py +1 -1
- {codegraph_gen-1.0.0 → codegraph_gen-1.1.0}/src/codegraph_gen/detect.py +9 -5
- {codegraph_gen-1.0.0 → codegraph_gen-1.1.0}/src/codegraph_gen/engine.py +23 -20
- codegraph_gen-1.1.0/src/codegraph_gen/parser/__init__.py +31 -0
- codegraph_gen-1.1.0/src/codegraph_gen/parser/base.py +154 -0
- codegraph_gen-1.1.0/src/codegraph_gen/parser/cpp.py +335 -0
- codegraph_gen-1.1.0/src/codegraph_gen/parser/go.py +259 -0
- codegraph_gen-1.1.0/src/codegraph_gen/parser/javascript.py +345 -0
- codegraph_gen-1.1.0/src/codegraph_gen/parser/kotlin.py +351 -0
- codegraph_gen-1.1.0/src/codegraph_gen/parser/python.py +360 -0
- codegraph_gen-1.1.0/src/codegraph_gen/parser/rust.py +450 -0
- codegraph_gen-1.1.0/src/codegraph_gen/parser/swift.py +306 -0
- codegraph_gen-1.1.0/src/codegraph_gen/resolver.py +650 -0
- codegraph_gen-1.1.0/src/codegraph_gen/resolver_strategy.py +411 -0
- codegraph_gen-1.0.0/src/codegraph_gen/parser/base.py → codegraph_gen-1.1.0/src/codegraph_gen/schema.py +15 -9
- codegraph_gen-1.0.0/src/codegraph_gen/builder.py +0 -747
- codegraph_gen-1.0.0/src/codegraph_gen/parser/__init__.py +0 -27
- codegraph_gen-1.0.0/src/codegraph_gen/parser/cpp.py +0 -349
- codegraph_gen-1.0.0/src/codegraph_gen/parser/go.py +0 -268
- codegraph_gen-1.0.0/src/codegraph_gen/parser/javascript.py +0 -370
- codegraph_gen-1.0.0/src/codegraph_gen/parser/kotlin.py +0 -387
- codegraph_gen-1.0.0/src/codegraph_gen/parser/python.py +0 -415
- codegraph_gen-1.0.0/src/codegraph_gen/parser/rust.py +0 -497
- codegraph_gen-1.0.0/src/codegraph_gen/parser/swift.py +0 -327
- {codegraph_gen-1.0.0 → codegraph_gen-1.1.0}/README.md +0 -0
- {codegraph_gen-1.0.0 → codegraph_gen-1.1.0}/src/codegraph_gen/__init__.py +0 -0
- {codegraph_gen-1.0.0 → codegraph_gen-1.1.0}/src/codegraph_gen/ai.py +0 -0
- {codegraph_gen-1.0.0 → codegraph_gen-1.1.0}/src/codegraph_gen/analyzer.py +0 -0
- {codegraph_gen-1.0.0 → codegraph_gen-1.1.0}/src/codegraph_gen/cluster.py +0 -0
- {codegraph_gen-1.0.0 → codegraph_gen-1.1.0}/src/codegraph_gen/py.typed +0 -0
- {codegraph_gen-1.0.0 → codegraph_gen-1.1.0}/src/codegraph_gen/renderer.py +0 -0
- {codegraph_gen-1.0.0 → codegraph_gen-1.1.0}/src/codegraph_gen/writer.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: codegraph-gen
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.1.0
|
|
4
4
|
Summary: AST-based codebase knowledge graph generator in Markdown
|
|
5
5
|
Keywords: knowledge-graph,ast,codebase,markdown,tree-sitter,visualization,static-analysis,ai-agent,obsidian
|
|
6
6
|
Author: twn39
|
|
@@ -93,7 +93,7 @@ def build(
|
|
|
93
93
|
|
|
94
94
|
from codegraph_gen.engine import CodegraphEngine, PipelineStage
|
|
95
95
|
|
|
96
|
-
engine = CodegraphEngine(
|
|
96
|
+
engine = CodegraphEngine()
|
|
97
97
|
|
|
98
98
|
# Run pipeline with click progress bar
|
|
99
99
|
with Progress(
|
|
@@ -129,7 +129,7 @@ def build(
|
|
|
129
129
|
elif stage == PipelineStage.COMPLETED:
|
|
130
130
|
progress.update(task, description="Done!")
|
|
131
131
|
|
|
132
|
-
result = engine.run_pipeline(progress_callback=progress_callback)
|
|
132
|
+
result = engine.run_pipeline(config, progress_callback=progress_callback)
|
|
133
133
|
|
|
134
134
|
G = result.graph
|
|
135
135
|
if G.number_of_nodes() == 0:
|
|
@@ -296,7 +296,7 @@ def info():
|
|
|
296
296
|
|
|
297
297
|
ver = version("codegraph-gen")
|
|
298
298
|
except Exception:
|
|
299
|
-
ver = "1.
|
|
299
|
+
ver = "1.1.0"
|
|
300
300
|
console.print(f"[bold]codegraph v{ver}[/bold]")
|
|
301
301
|
console.print(
|
|
302
302
|
"Supported languages: Python, JavaScript, TypeScript, Kotlin, Go, Rust, Swift"
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
import networkx as nx
|
|
4
|
+
from codegraph_gen.schema import ExtractionResult
|
|
5
|
+
from codegraph_gen.resolver import TypeResolver
|
|
6
|
+
|
|
7
|
+
logger = logging.getLogger(__name__)
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def build_graph(extractions: list[ExtractionResult], workspace_dir: Path) -> nx.DiGraph:
|
|
11
|
+
"""
|
|
12
|
+
Assembles a list of ExtractionResults into a single directed graph
|
|
13
|
+
and resolves call, inherit, and import edges using a two-pass scope resolver.
|
|
14
|
+
"""
|
|
15
|
+
G = nx.DiGraph()
|
|
16
|
+
|
|
17
|
+
# 1. Add all nodes to the graph
|
|
18
|
+
for ext in extractions:
|
|
19
|
+
for node in ext.nodes:
|
|
20
|
+
G.add_node(node.id, **node.model_dump())
|
|
21
|
+
|
|
22
|
+
# 2. Run Type Resolver (Two-pass type inference & scope/edge resolution)
|
|
23
|
+
resolver = TypeResolver(G, extractions, workspace_dir)
|
|
24
|
+
resolver.propagate_types()
|
|
25
|
+
resolver.resolve_all_edges()
|
|
26
|
+
|
|
27
|
+
return G
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import os
|
|
2
2
|
from pathlib import Path
|
|
3
3
|
from pydantic import BaseModel, Field
|
|
4
|
-
from codegraph_gen.
|
|
4
|
+
from codegraph_gen.schema import ExtractionResult
|
|
5
5
|
|
|
6
6
|
# Default exclusions for files and directories we want to ignore
|
|
7
7
|
DEFAULT_EXCLUSIONS = {
|
|
@@ -1,11 +1,15 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
from pathlib import Path
|
|
3
|
-
from codegraph_gen.config import
|
|
3
|
+
from codegraph_gen.config import LANGUAGE_EXTENSIONS
|
|
4
4
|
|
|
5
5
|
logger = logging.getLogger(__name__)
|
|
6
6
|
|
|
7
7
|
|
|
8
|
-
def discover_files(
|
|
8
|
+
def discover_files(
|
|
9
|
+
workspace_dir: Path,
|
|
10
|
+
languages: set[str],
|
|
11
|
+
exclusions: set[str],
|
|
12
|
+
) -> list[tuple[Path, str]]:
|
|
9
13
|
"""
|
|
10
14
|
Recursively discovers source files in the workspace directory.
|
|
11
15
|
Filters by allowed languages and ignores files/directories in exclusions.
|
|
@@ -14,17 +18,17 @@ def discover_files(config: CodegraphConfig) -> list[tuple[Path, str]]:
|
|
|
14
18
|
List of tuples: (absolute_file_path, language_name)
|
|
15
19
|
"""
|
|
16
20
|
found_files = []
|
|
17
|
-
workspace =
|
|
21
|
+
workspace = workspace_dir.resolve()
|
|
18
22
|
|
|
19
23
|
# Map extension -> language
|
|
20
24
|
ext_to_lang = {}
|
|
21
|
-
for lang in
|
|
25
|
+
for lang in languages:
|
|
22
26
|
if lang in LANGUAGE_EXTENSIONS:
|
|
23
27
|
for ext in LANGUAGE_EXTENSIONS[lang]:
|
|
24
28
|
ext_to_lang[ext] = lang
|
|
25
29
|
|
|
26
30
|
# Normalize exclusions to lowercase for case-insensitive matching
|
|
27
|
-
exclusions_lower = {exc.lower() for exc in
|
|
31
|
+
exclusions_lower = {exc.lower() for exc in exclusions}
|
|
28
32
|
|
|
29
33
|
def is_ignored(path: Path) -> bool:
|
|
30
34
|
# Check if any part of the path is in exclusions_lower
|
|
@@ -9,7 +9,7 @@ import networkx as nx
|
|
|
9
9
|
from pydantic import BaseModel, ConfigDict
|
|
10
10
|
|
|
11
11
|
from codegraph_gen.config import CodegraphConfig, CacheEntry
|
|
12
|
-
from codegraph_gen.
|
|
12
|
+
from codegraph_gen.schema import ExtractionResult
|
|
13
13
|
from codegraph_gen.detect import discover_files
|
|
14
14
|
from codegraph_gen.parser import get_parser
|
|
15
15
|
from codegraph_gen.builder import build_graph
|
|
@@ -77,13 +77,12 @@ class PipelineResult(BaseModel):
|
|
|
77
77
|
|
|
78
78
|
|
|
79
79
|
class CodegraphEngine:
|
|
80
|
-
def __init__(self
|
|
81
|
-
self.config = config
|
|
82
|
-
self.renderer = MarkdownRenderer(config.workspace_dir)
|
|
80
|
+
def __init__(self):
|
|
83
81
|
self.writer = VaultWriter()
|
|
84
82
|
|
|
85
83
|
def run_pipeline(
|
|
86
84
|
self,
|
|
85
|
+
config: CodegraphConfig,
|
|
87
86
|
progress_callback: Optional[
|
|
88
87
|
Callable[[PipelineStage, Any, int, int], None]
|
|
89
88
|
] = None,
|
|
@@ -91,14 +90,18 @@ class CodegraphEngine:
|
|
|
91
90
|
"""
|
|
92
91
|
Runs the full codegraph generation pipeline.
|
|
93
92
|
Args:
|
|
93
|
+
config: Configuration settings.
|
|
94
94
|
progress_callback: A function taking (stage, current_item, index, total)
|
|
95
95
|
"""
|
|
96
96
|
logger.info("Starting codegraph engine pipeline...")
|
|
97
|
+
renderer = MarkdownRenderer(config.workspace_dir)
|
|
97
98
|
|
|
98
99
|
# 1. Discover files
|
|
99
100
|
if progress_callback:
|
|
100
101
|
progress_callback(PipelineStage.DISCOVERING, None, 0, 0)
|
|
101
|
-
files = discover_files(
|
|
102
|
+
files = discover_files(
|
|
103
|
+
config.workspace_dir, config.languages, config.exclusions
|
|
104
|
+
)
|
|
102
105
|
if not files:
|
|
103
106
|
logger.warning("No supported files found.")
|
|
104
107
|
if progress_callback:
|
|
@@ -116,9 +119,9 @@ class CodegraphEngine:
|
|
|
116
119
|
extractions = []
|
|
117
120
|
total_files = len(files)
|
|
118
121
|
|
|
119
|
-
cache_path =
|
|
122
|
+
cache_path = config.absolute_output_dir / "cache.json"
|
|
120
123
|
cache_entries = {}
|
|
121
|
-
if
|
|
124
|
+
if config.use_cache and cache_path.exists():
|
|
122
125
|
try:
|
|
123
126
|
with open(cache_path, "r", encoding="utf-8") as f:
|
|
124
127
|
cache_data = json.load(f)
|
|
@@ -132,7 +135,7 @@ class CodegraphEngine:
|
|
|
132
135
|
new_cache_entries = {}
|
|
133
136
|
|
|
134
137
|
for file_path, lang in files:
|
|
135
|
-
rel_path = str(file_path.relative_to(
|
|
138
|
+
rel_path = str(file_path.relative_to(config.workspace_dir))
|
|
136
139
|
try:
|
|
137
140
|
stat = file_path.stat()
|
|
138
141
|
mtime = stat.st_mtime
|
|
@@ -170,7 +173,7 @@ class CodegraphEngine:
|
|
|
170
173
|
if progress_callback:
|
|
171
174
|
progress_callback(PipelineStage.PARSING, None, total_files, total_files)
|
|
172
175
|
else:
|
|
173
|
-
max_workers =
|
|
176
|
+
max_workers = config.max_workers
|
|
174
177
|
if max_workers > 1 and len(files_to_parse) > 1:
|
|
175
178
|
logger.info(
|
|
176
179
|
f"Parsing {len(files_to_parse)} files in parallel with {max_workers} workers..."
|
|
@@ -183,7 +186,7 @@ class CodegraphEngine:
|
|
|
183
186
|
_parse_file_worker,
|
|
184
187
|
file_path,
|
|
185
188
|
lang,
|
|
186
|
-
|
|
189
|
+
config.workspace_dir,
|
|
187
190
|
): (file_path, rel_path, mtime, size, file_hash)
|
|
188
191
|
for file_path, lang, rel_path, mtime, size, file_hash in files_to_parse
|
|
189
192
|
}
|
|
@@ -235,7 +238,7 @@ class CodegraphEngine:
|
|
|
235
238
|
)
|
|
236
239
|
try:
|
|
237
240
|
parser = get_parser(lang)
|
|
238
|
-
result = parser.parse_file(file_path,
|
|
241
|
+
result = parser.parse_file(file_path, config.workspace_dir)
|
|
239
242
|
extractions.append(result)
|
|
240
243
|
if file_hash:
|
|
241
244
|
new_cache_entries[rel_path] = CacheEntry(
|
|
@@ -247,7 +250,7 @@ class CodegraphEngine:
|
|
|
247
250
|
# 3. Build graph
|
|
248
251
|
if progress_callback:
|
|
249
252
|
progress_callback(PipelineStage.BUILDING, None, 0, 0)
|
|
250
|
-
G = build_graph(extractions,
|
|
253
|
+
G = build_graph(extractions, config.workspace_dir)
|
|
251
254
|
|
|
252
255
|
# 4. Component clustering
|
|
253
256
|
if progress_callback:
|
|
@@ -271,7 +274,7 @@ class CodegraphEngine:
|
|
|
271
274
|
rendered_nodes = {}
|
|
272
275
|
for nid, ndata in G.nodes(data=True):
|
|
273
276
|
fname = get_node_filename(nid)
|
|
274
|
-
content =
|
|
277
|
+
content = renderer.render_node_page(nid, ndata, G, node_component_map)
|
|
275
278
|
rendered_nodes[fname] = content
|
|
276
279
|
|
|
277
280
|
rendered_components = {}
|
|
@@ -279,7 +282,7 @@ class CodegraphEngine:
|
|
|
279
282
|
comp_name = component_names[cid]
|
|
280
283
|
cohesion = cohesion_scores[cid]
|
|
281
284
|
fname = get_component_filename(comp_name)
|
|
282
|
-
content =
|
|
285
|
+
content = renderer.render_component_page(
|
|
283
286
|
cid,
|
|
284
287
|
members,
|
|
285
288
|
G,
|
|
@@ -292,7 +295,7 @@ class CodegraphEngine:
|
|
|
292
295
|
|
|
293
296
|
# Check if README already has AI Insights and preserve it
|
|
294
297
|
ai_insights = None
|
|
295
|
-
readme_path =
|
|
298
|
+
readme_path = config.absolute_output_dir / "README.md"
|
|
296
299
|
if readme_path.exists():
|
|
297
300
|
try:
|
|
298
301
|
old_readme = readme_path.read_text(encoding="utf-8")
|
|
@@ -315,7 +318,7 @@ class CodegraphEngine:
|
|
|
315
318
|
f"Could not read existing README.md to preserve AI insights: {e}"
|
|
316
319
|
)
|
|
317
320
|
|
|
318
|
-
readme_content =
|
|
321
|
+
readme_content = renderer.render_readme(
|
|
319
322
|
G,
|
|
320
323
|
components,
|
|
321
324
|
cohesion_scores,
|
|
@@ -324,7 +327,7 @@ class CodegraphEngine:
|
|
|
324
327
|
ai_insights=ai_insights,
|
|
325
328
|
)
|
|
326
329
|
|
|
327
|
-
prompt_content =
|
|
330
|
+
prompt_content = renderer.render_agent_prompt(
|
|
328
331
|
G, components, cohesion_scores, component_names, analysis
|
|
329
332
|
)
|
|
330
333
|
|
|
@@ -332,7 +335,7 @@ class CodegraphEngine:
|
|
|
332
335
|
if progress_callback:
|
|
333
336
|
progress_callback(PipelineStage.WRITING, None, 0, 0)
|
|
334
337
|
self.writer.write_vault(
|
|
335
|
-
|
|
338
|
+
config.absolute_output_dir,
|
|
336
339
|
rendered_nodes,
|
|
337
340
|
rendered_components,
|
|
338
341
|
readme_content,
|
|
@@ -340,9 +343,9 @@ class CodegraphEngine:
|
|
|
340
343
|
)
|
|
341
344
|
|
|
342
345
|
# Write updated cache back to disk
|
|
343
|
-
if
|
|
346
|
+
if config.use_cache:
|
|
344
347
|
try:
|
|
345
|
-
|
|
348
|
+
config.absolute_output_dir.mkdir(parents=True, exist_ok=True)
|
|
346
349
|
with open(cache_path, "w", encoding="utf-8") as f:
|
|
347
350
|
json.dump(
|
|
348
351
|
{k: v.model_dump() for k, v in new_cache_entries.items()},
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
import importlib
|
|
2
|
+
import logging
|
|
3
|
+
import pkgutil
|
|
4
|
+
import sys
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from codegraph_gen.parser.base import BaseParser, _PARSER_REGISTRY
|
|
7
|
+
|
|
8
|
+
logger = logging.getLogger(__name__)
|
|
9
|
+
|
|
10
|
+
# Dynamic package scan & load to trigger @register_parser registrations
|
|
11
|
+
package_dir = str(Path(__file__).parent)
|
|
12
|
+
for _, module_name, _ in pkgutil.iter_modules([package_dir]):
|
|
13
|
+
if module_name == "base":
|
|
14
|
+
continue
|
|
15
|
+
full_module_name = f"{__name__}.{module_name}"
|
|
16
|
+
if full_module_name not in sys.modules:
|
|
17
|
+
try:
|
|
18
|
+
importlib.import_module(full_module_name)
|
|
19
|
+
except Exception as e:
|
|
20
|
+
logger.error(
|
|
21
|
+
f"Defensive Loading: Failed to import parser module {full_module_name}: {e}",
|
|
22
|
+
exc_info=True,
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def get_parser(language: str) -> BaseParser:
|
|
27
|
+
"""Returns an instance of the parser for the given language."""
|
|
28
|
+
lang_lower = language.lower()
|
|
29
|
+
if lang_lower not in _PARSER_REGISTRY:
|
|
30
|
+
raise ValueError(f"Unsupported language: {language}")
|
|
31
|
+
return _PARSER_REGISTRY[lang_lower]()
|
|
@@ -0,0 +1,154 @@
|
|
|
1
|
+
from abc import ABC, abstractmethod
|
|
2
|
+
import logging
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
import tree_sitter
|
|
5
|
+
from codegraph_gen.schema import (
|
|
6
|
+
NodeSchema,
|
|
7
|
+
EdgeSchema,
|
|
8
|
+
ExtractionResult,
|
|
9
|
+
SymbolCollector,
|
|
10
|
+
)
|
|
11
|
+
|
|
12
|
+
logger = logging.getLogger(__name__)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class BaseParser(ABC):
|
|
16
|
+
"""Abstract base class for all language-specific AST parsers."""
|
|
17
|
+
|
|
18
|
+
@abstractmethod
|
|
19
|
+
def parse_file(self, file_path: Path, workspace_dir: Path) -> ExtractionResult:
|
|
20
|
+
"""Parses a file and extracts symbols (nodes) and relations (edges)."""
|
|
21
|
+
pass
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
_PARSER_REGISTRY: dict[str, type[BaseParser]] = {}
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def register_parser(*languages: str):
|
|
28
|
+
"""Decorator to register a BaseParser subclass for one or more languages."""
|
|
29
|
+
|
|
30
|
+
def decorator(cls: type[BaseParser]):
|
|
31
|
+
for lang in languages:
|
|
32
|
+
_PARSER_REGISTRY[lang.lower()] = cls
|
|
33
|
+
return cls
|
|
34
|
+
|
|
35
|
+
return decorator
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class ScopeTracker:
|
|
39
|
+
def __init__(self, initial_scope_id: str, initial_scope_type: str = "file"):
|
|
40
|
+
self._stack: list[tuple[str, str]] = [(initial_scope_id, initial_scope_type)]
|
|
41
|
+
|
|
42
|
+
def push(self, scope_id: str, scope_type: str) -> "ScopeTracker":
|
|
43
|
+
"""Pushes a scope onto the stack. Returns self to act as a context manager."""
|
|
44
|
+
self._stack.append((scope_id, scope_type))
|
|
45
|
+
return self
|
|
46
|
+
|
|
47
|
+
def pop(self) -> tuple[str, str]:
|
|
48
|
+
"""Pops the innermost scope from the stack."""
|
|
49
|
+
if len(self._stack) <= 1:
|
|
50
|
+
raise IndexError("Cannot pop the root scope")
|
|
51
|
+
return self._stack.pop()
|
|
52
|
+
|
|
53
|
+
def __enter__(self) -> "ScopeTracker":
|
|
54
|
+
return self
|
|
55
|
+
|
|
56
|
+
def __exit__(self, exc_type, exc_val, exc_tb) -> None:
|
|
57
|
+
self.pop()
|
|
58
|
+
|
|
59
|
+
@property
|
|
60
|
+
def current_id(self) -> str:
|
|
61
|
+
return self._stack[-1][0] if self._stack else ""
|
|
62
|
+
|
|
63
|
+
@property
|
|
64
|
+
def current_type(self) -> str:
|
|
65
|
+
return self._stack[-1][1] if self._stack else ""
|
|
66
|
+
|
|
67
|
+
@property
|
|
68
|
+
def stack(self) -> list[tuple[str, str]]:
|
|
69
|
+
return self._stack
|
|
70
|
+
|
|
71
|
+
def find_parent_by_type(self, type_name: str) -> str | None:
|
|
72
|
+
"""Searches the stack from innermost to outermost for a specific scope type."""
|
|
73
|
+
for scope_id, scope_type in reversed(self._stack):
|
|
74
|
+
if scope_type == type_name:
|
|
75
|
+
return scope_id
|
|
76
|
+
return None
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
class ASTVisitor:
|
|
80
|
+
"""Optimized base AST Visitor for dynamic routing and AST traversal."""
|
|
81
|
+
|
|
82
|
+
def __init__(self, source: bytes, rel_path: str, collector: SymbolCollector):
|
|
83
|
+
self.source = source
|
|
84
|
+
self.rel_path = rel_path
|
|
85
|
+
self.collector = collector
|
|
86
|
+
self._visitor_cache = {}
|
|
87
|
+
self.scope = ScopeTracker(rel_path, "file")
|
|
88
|
+
|
|
89
|
+
def add_node(self, node: NodeSchema) -> None:
|
|
90
|
+
"""Helper to collect a node via the collector."""
|
|
91
|
+
self.collector.add_node(node)
|
|
92
|
+
|
|
93
|
+
def add_edge(self, edge: EdgeSchema) -> None:
|
|
94
|
+
"""Helper to collect an edge via the collector."""
|
|
95
|
+
self.collector.add_edge(edge)
|
|
96
|
+
|
|
97
|
+
@property
|
|
98
|
+
def scope_stack(self) -> list[tuple[str, str]]:
|
|
99
|
+
"""Deprecated: Use self.scope instead. Kept for backward compatibility."""
|
|
100
|
+
return self.scope.stack
|
|
101
|
+
|
|
102
|
+
def visit(self, node: tree_sitter.Node) -> None:
|
|
103
|
+
"""Visits a node by dynamically routing to visit_NodeType."""
|
|
104
|
+
if node.type == "ERROR" or (hasattr(node, "is_error") and node.is_error):
|
|
105
|
+
logger.debug(f"Skipping syntax error node: {node}")
|
|
106
|
+
return
|
|
107
|
+
|
|
108
|
+
node_type = node.type
|
|
109
|
+
visitor = self._visitor_cache.get(node_type)
|
|
110
|
+
if visitor is None:
|
|
111
|
+
# Replace characters invalid in Python identifiers
|
|
112
|
+
safe_type = node_type.replace("-", "_").replace(".", "_")
|
|
113
|
+
visitor = getattr(self, f"visit_{safe_type}", self.generic_visit)
|
|
114
|
+
self._visitor_cache[node_type] = visitor
|
|
115
|
+
|
|
116
|
+
try:
|
|
117
|
+
visitor(node)
|
|
118
|
+
except Exception as e:
|
|
119
|
+
logger.error(
|
|
120
|
+
f"Error visiting node of type {node.type} at line {node.start_point[0] + 1}: {e}",
|
|
121
|
+
exc_info=True,
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
def generic_visit(self, node: tree_sitter.Node) -> None:
|
|
125
|
+
"""Default recursive traversal. Prunes known leaf nodes."""
|
|
126
|
+
if node.type in (
|
|
127
|
+
"string",
|
|
128
|
+
"comment",
|
|
129
|
+
"line_comment",
|
|
130
|
+
"block_comment",
|
|
131
|
+
"number",
|
|
132
|
+
"true",
|
|
133
|
+
"false",
|
|
134
|
+
"null",
|
|
135
|
+
):
|
|
136
|
+
return
|
|
137
|
+
for child in node.children:
|
|
138
|
+
self.visit(child)
|
|
139
|
+
|
|
140
|
+
def get_text(self, node: tree_sitter.Node) -> str:
|
|
141
|
+
"""Helper to extract text from a node using the source bytes."""
|
|
142
|
+
return (
|
|
143
|
+
self.source[node.start_byte : node.end_byte]
|
|
144
|
+
.decode("utf-8", errors="replace")
|
|
145
|
+
.strip()
|
|
146
|
+
)
|
|
147
|
+
|
|
148
|
+
def get_line_range(self, node: tree_sitter.Node) -> tuple[int, int]:
|
|
149
|
+
"""Helper to extract 1-indexed line start and end points."""
|
|
150
|
+
return node.start_point[0] + 1, node.end_point[0] + 1
|
|
151
|
+
|
|
152
|
+
def get_current_parent_id(self) -> str:
|
|
153
|
+
"""Helper to retrieve the current parent scope's ID."""
|
|
154
|
+
return self.scope.current_id
|