codegraph-gen 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,116 @@
1
+ import logging
2
+ import networkx as nx
3
+ from networkx.algorithms.community import louvain_communities
4
+
5
+ logger = logging.getLogger(__name__)
6
+
7
+
8
+ def detect_components(
9
+ G: nx.DiGraph,
10
+ ) -> tuple[dict[int, list[str]], dict[int, float], dict[int, str]]:
11
+ """
12
+ Detects logical components in the graph using modularity clustering.
13
+
14
+ Returns:
15
+ components: dict mapping component_id -> list of node_ids
16
+ cohesion_scores: dict mapping component_id -> cohesion density float
17
+ component_names: dict mapping component_id -> human friendly name
18
+ """
19
+ if G.number_of_nodes() == 0:
20
+ return {}, {}, {}
21
+
22
+ # Convert to undirected weighted graph for Louvain community detection
23
+ U = nx.Graph()
24
+ U.add_nodes_from(G.nodes)
25
+
26
+ for u, v, d in G.edges(data=True):
27
+ relation = d.get("relation")
28
+ if relation == "contains":
29
+ weight = 10.0
30
+ elif relation == "imports":
31
+ weight = 2.0
32
+ elif relation == "calls":
33
+ weight = 1.0
34
+ else:
35
+ weight = 1.0
36
+
37
+ if U.has_edge(u, v):
38
+ U[u][v]["weight"] += weight
39
+ else:
40
+ U.add_edge(u, v, weight=weight)
41
+
42
+ # Run Louvain community clustering with fixed seed for reproducibility
43
+ communities = list(louvain_communities(U, weight="weight", seed=42))
44
+
45
+ # Sort communities by size descending, breaking ties stably by sorted member IDs
46
+ communities.sort(key=lambda s: (-len(s), sorted(list(s))))
47
+
48
+ components = {}
49
+ cohesion_scores = {}
50
+ component_names = {}
51
+ raw_components = []
52
+
53
+ import os
54
+ from collections import Counter
55
+
56
+ for idx, member_set in enumerate(communities, start=1):
57
+ members = list(member_set)
58
+ components[idx] = members
59
+
60
+ # Calculate cohesion: density of the induced subgraph
61
+ subgraph = G.subgraph(members)
62
+ density = nx.density(subgraph)
63
+ cohesion_scores[idx] = round(density, 2)
64
+
65
+ # Name the component by its most central (highest degree) node
66
+ degrees = dict(G.degree(members))
67
+ if degrees:
68
+ # Sort by degree descending, and break ties alphabetically by node ID
69
+ sorted_nodes = sorted(degrees.keys(), key=lambda n: (-degrees[n], n))
70
+ most_central_node = sorted_nodes[0]
71
+ node_label = G.nodes[most_central_node].get("label", most_central_node)
72
+ # Remove trailing parens/extensions to make clean component name
73
+ clean_name = node_label.replace("()", "").split(".")[0]
74
+ else:
75
+ clean_name = f"Component {idx}"
76
+
77
+ # Find the longest common directory path
78
+ paths = []
79
+ for m in members:
80
+ sf = G.nodes[m].get("source_file")
81
+ if sf:
82
+ dir_path = os.path.dirname(sf)
83
+ if dir_path:
84
+ paths.append(dir_path)
85
+
86
+ common_dir = ""
87
+ if paths:
88
+ try:
89
+ common_dir = os.path.commonpath(paths)
90
+ if common_dir in (".", "", "/"):
91
+ common_dir = ""
92
+ except ValueError:
93
+ common_dir = ""
94
+
95
+ raw_components.append((idx, clean_name, common_dir))
96
+
97
+ # Count frequencies of candidate names to detect collisions
98
+ candidate_names = []
99
+ for idx, clean_name, common_dir in raw_components:
100
+ cand = common_dir if common_dir else clean_name
101
+ candidate_names.append(cand)
102
+ name_counts = Counter(candidate_names)
103
+
104
+ # Assign final unique component names
105
+ for idx, clean_name, common_dir in raw_components:
106
+ cand = common_dir if common_dir else clean_name
107
+ if name_counts[cand] == 1:
108
+ component_names[idx] = cand
109
+ else:
110
+ # Collision! Qualify the name to ensure uniqueness and clarity
111
+ if common_dir:
112
+ component_names[idx] = f"{common_dir} ({clean_name})"
113
+ else:
114
+ component_names[idx] = f"{clean_name} (Component {idx})"
115
+
116
+ return components, cohesion_scores, component_names
@@ -0,0 +1,76 @@
1
+ import os
2
+ from pathlib import Path
3
+ from pydantic import BaseModel, Field
4
+ from codegraph_gen.parser.base import ExtractionResult
5
+
6
+ # Default exclusions for files and directories we want to ignore
7
+ DEFAULT_EXCLUSIONS = {
8
+ ".git",
9
+ ".venv",
10
+ "venv",
11
+ "node_modules",
12
+ "third_party",
13
+ "dist",
14
+ "build",
15
+ ".build",
16
+ "__pycache__",
17
+ ".pytest_cache",
18
+ ".codegraph",
19
+ ".idea",
20
+ ".vscode",
21
+ "target",
22
+ "out",
23
+ "bin",
24
+ "obj",
25
+ "vendor",
26
+ "Pods",
27
+ "Carthage",
28
+ "DerivedData",
29
+ "build_output",
30
+ ".next",
31
+ ".nuxt",
32
+ ".cache",
33
+ "build_mac",
34
+ "build_ios",
35
+ "build_ios_sim",
36
+ }
37
+
38
+
39
+ # Mapping of supported languages to file extensions
40
+ LANGUAGE_EXTENSIONS = {
41
+ "python": {".py"},
42
+ "javascript": {".js", ".mjs", ".cjs"},
43
+ "typescript": {".ts", ".tsx"},
44
+ "kotlin": {".kt", ".kts"},
45
+ "go": {".go"},
46
+ "rust": {".rs"},
47
+ "swift": {".swift"},
48
+ "c": {".c", ".h"},
49
+ "cpp": {".cpp", ".cc", ".cxx", ".hpp", ".hxx"},
50
+ }
51
+
52
+ ALL_EXTENSIONS = {ext for exts in LANGUAGE_EXTENSIONS.values() for ext in exts}
53
+
54
+
55
+ class CacheEntry(BaseModel):
56
+ mtime: float
57
+ size: int
58
+ hash: str
59
+ result: ExtractionResult
60
+
61
+
62
+ class CodegraphConfig(BaseModel):
63
+ """Configuration class for codegraph parsing and exporting."""
64
+
65
+ workspace_dir: Path
66
+ output_dir: Path = Field(default_factory=lambda: Path(".codegraph"))
67
+ exclusions: set[str] = Field(default_factory=lambda: DEFAULT_EXCLUSIONS)
68
+ languages: set[str] = Field(default_factory=lambda: set(LANGUAGE_EXTENSIONS.keys()))
69
+ max_workers: int = Field(default_factory=lambda: os.cpu_count() or 4)
70
+ use_cache: bool = Field(default=True)
71
+
72
+ @property
73
+ def absolute_output_dir(self) -> Path:
74
+ if self.output_dir.is_absolute():
75
+ return self.output_dir
76
+ return self.workspace_dir / self.output_dir
@@ -0,0 +1,59 @@
1
+ import logging
2
+ from pathlib import Path
3
+ from codegraph_gen.config import CodegraphConfig, LANGUAGE_EXTENSIONS
4
+
5
+ logger = logging.getLogger(__name__)
6
+
7
+
8
+ def discover_files(config: CodegraphConfig) -> list[tuple[Path, str]]:
9
+ """
10
+ Recursively discovers source files in the workspace directory.
11
+ Filters by allowed languages and ignores files/directories in exclusions.
12
+
13
+ Returns:
14
+ List of tuples: (absolute_file_path, language_name)
15
+ """
16
+ found_files = []
17
+ workspace = config.workspace_dir.resolve()
18
+
19
+ # Map extension -> language
20
+ ext_to_lang = {}
21
+ for lang in config.languages:
22
+ if lang in LANGUAGE_EXTENSIONS:
23
+ for ext in LANGUAGE_EXTENSIONS[lang]:
24
+ ext_to_lang[ext] = lang
25
+
26
+ # Normalize exclusions to lowercase for case-insensitive matching
27
+ exclusions_lower = {exc.lower() for exc in config.exclusions}
28
+
29
+ def is_ignored(path: Path) -> bool:
30
+ # Check if any part of the path is in exclusions_lower
31
+ try:
32
+ rel_parts = path.relative_to(workspace).parts
33
+ except ValueError:
34
+ # Not under workspace
35
+ return True
36
+
37
+ for part in rel_parts:
38
+ if part.lower() in exclusions_lower:
39
+ return True
40
+ return False
41
+
42
+ def scan_dir(directory: Path):
43
+ try:
44
+ for item in directory.iterdir():
45
+ if is_ignored(item):
46
+ continue
47
+ if item.is_dir():
48
+ scan_dir(item)
49
+ elif item.is_file():
50
+ ext = item.suffix.lower()
51
+ if ext in ext_to_lang:
52
+ found_files.append((item.resolve(), ext_to_lang[ext]))
53
+ except PermissionError:
54
+ logger.warning(f"Permission denied: {directory}")
55
+ except Exception as e:
56
+ logger.error(f"Error scanning {directory}: {e}")
57
+
58
+ scan_dir(workspace)
59
+ return found_files
@@ -0,0 +1,367 @@
1
+ import logging
2
+ import json
3
+ import hashlib
4
+ import concurrent.futures
5
+ from enum import Enum
6
+ from pathlib import Path
7
+ from typing import Any, Callable, Dict, List, Optional, Tuple
8
+ import networkx as nx
9
+ from pydantic import BaseModel, ConfigDict
10
+
11
+ from codegraph_gen.config import CodegraphConfig, CacheEntry
12
+ from codegraph_gen.parser.base import ExtractionResult
13
+ from codegraph_gen.detect import discover_files
14
+ from codegraph_gen.parser import get_parser
15
+ from codegraph_gen.builder import build_graph
16
+ from codegraph_gen.cluster import detect_components
17
+ from codegraph_gen.analyzer import analyze_graph, AnalysisResult
18
+ from codegraph_gen.renderer import (
19
+ MarkdownRenderer,
20
+ get_node_filename,
21
+ get_component_filename,
22
+ )
23
+ from codegraph_gen.writer import VaultWriter
24
+
25
+ logger = logging.getLogger(__name__)
26
+
27
+
28
+ def get_file_hash(path: Path) -> str:
29
+ """Computes MD5 hash of a file."""
30
+ hasher = hashlib.md5()
31
+ try:
32
+ with open(path, "rb") as f:
33
+ for chunk in iter(lambda: f.read(4096), b""):
34
+ hasher.update(chunk)
35
+ except Exception:
36
+ return ""
37
+ return hasher.hexdigest()
38
+
39
+
40
+ def _parse_file_worker(
41
+ file_path: Path, lang: str, workspace_dir: Path
42
+ ) -> tuple[Path, Optional[ExtractionResult], Optional[str]]:
43
+ """Worker function for parallel file parsing."""
44
+ try:
45
+ from codegraph_gen.parser import get_parser
46
+
47
+ parser = get_parser(lang)
48
+ result = parser.parse_file(file_path, workspace_dir)
49
+ return file_path, result, None
50
+ except Exception as e:
51
+ import traceback
52
+
53
+ err_msg = f"{e}\n{traceback.format_exc()}"
54
+ return file_path, None, err_msg
55
+
56
+
57
+ class PipelineStage(str, Enum):
58
+ DISCOVERING = "discovering"
59
+ PARSING = "parsing"
60
+ BUILDING = "building"
61
+ CLUSTERING = "clustering"
62
+ ANALYZING = "analyzing"
63
+ RENDERING = "rendering"
64
+ WRITING = "writing"
65
+ COMPLETED = "completed"
66
+
67
+
68
+ class PipelineResult(BaseModel):
69
+ model_config = ConfigDict(arbitrary_types_allowed=True)
70
+
71
+ graph: nx.DiGraph
72
+ files: List[Tuple[Path, str]]
73
+ components: Dict[int, List[str]]
74
+ cohesion_scores: Dict[int, float]
75
+ component_names: Dict[int, str]
76
+ analysis: AnalysisResult
77
+
78
+
79
+ class CodegraphEngine:
80
+ def __init__(self, config: CodegraphConfig):
81
+ self.config = config
82
+ self.renderer = MarkdownRenderer(config.workspace_dir)
83
+ self.writer = VaultWriter()
84
+
85
+ def run_pipeline(
86
+ self,
87
+ progress_callback: Optional[
88
+ Callable[[PipelineStage, Any, int, int], None]
89
+ ] = None,
90
+ ) -> PipelineResult:
91
+ """
92
+ Runs the full codegraph generation pipeline.
93
+ Args:
94
+ progress_callback: A function taking (stage, current_item, index, total)
95
+ """
96
+ logger.info("Starting codegraph engine pipeline...")
97
+
98
+ # 1. Discover files
99
+ if progress_callback:
100
+ progress_callback(PipelineStage.DISCOVERING, None, 0, 0)
101
+ files = discover_files(self.config)
102
+ if not files:
103
+ logger.warning("No supported files found.")
104
+ if progress_callback:
105
+ progress_callback(PipelineStage.COMPLETED, None, 0, 0)
106
+ return PipelineResult(
107
+ graph=nx.DiGraph(),
108
+ files=[],
109
+ components={},
110
+ cohesion_scores={},
111
+ component_names={},
112
+ analysis=AnalysisResult(god_nodes=[], cycles=[], inter_comp_deps={}),
113
+ )
114
+
115
+ # 2. Parse files (with caching and optional parallel processing)
116
+ extractions = []
117
+ total_files = len(files)
118
+
119
+ cache_path = self.config.absolute_output_dir / "cache.json"
120
+ cache_entries = {}
121
+ if self.config.use_cache and cache_path.exists():
122
+ try:
123
+ with open(cache_path, "r", encoding="utf-8") as f:
124
+ cache_data = json.load(f)
125
+ for k, v in cache_data.items():
126
+ cache_entries[k] = CacheEntry(**v)
127
+ logger.info(f"Loaded {len(cache_entries)} cache entries.")
128
+ except Exception as e:
129
+ logger.warning(f"Could not load cache: {e}")
130
+
131
+ files_to_parse = []
132
+ new_cache_entries = {}
133
+
134
+ for file_path, lang in files:
135
+ rel_path = str(file_path.relative_to(self.config.workspace_dir))
136
+ try:
137
+ stat = file_path.stat()
138
+ mtime = stat.st_mtime
139
+ size = stat.st_size
140
+ file_hash = get_file_hash(file_path)
141
+
142
+ # Check cache hit
143
+ if rel_path in cache_entries:
144
+ entry = cache_entries[rel_path]
145
+ if (
146
+ entry.mtime == mtime
147
+ and entry.size == size
148
+ and entry.hash == file_hash
149
+ ):
150
+ extractions.append(entry.result)
151
+ new_cache_entries[rel_path] = entry
152
+ continue
153
+
154
+ # Cache miss
155
+ files_to_parse.append(
156
+ (file_path, lang, rel_path, mtime, size, file_hash)
157
+ )
158
+ except Exception as e:
159
+ logger.error(f"Error accessing file metadata for {file_path}: {e}")
160
+ # Fallback to parsing without cache metadata
161
+ files_to_parse.append((file_path, lang, rel_path, 0.0, 0, ""))
162
+
163
+ num_hits = total_files - len(files_to_parse)
164
+ if num_hits > 0:
165
+ logger.info(
166
+ f"Cache hit: {num_hits} / {total_files} files loaded from cache."
167
+ )
168
+
169
+ if not files_to_parse:
170
+ if progress_callback:
171
+ progress_callback(PipelineStage.PARSING, None, total_files, total_files)
172
+ else:
173
+ max_workers = self.config.max_workers
174
+ if max_workers > 1 and len(files_to_parse) > 1:
175
+ logger.info(
176
+ f"Parsing {len(files_to_parse)} files in parallel with {max_workers} workers..."
177
+ )
178
+ with concurrent.futures.ProcessPoolExecutor(
179
+ max_workers=max_workers
180
+ ) as executor:
181
+ futures = {
182
+ executor.submit(
183
+ _parse_file_worker,
184
+ file_path,
185
+ lang,
186
+ self.config.workspace_dir,
187
+ ): (file_path, rel_path, mtime, size, file_hash)
188
+ for file_path, lang, rel_path, mtime, size, file_hash in files_to_parse
189
+ }
190
+
191
+ for idx, future in enumerate(
192
+ concurrent.futures.as_completed(futures), start=1
193
+ ):
194
+ file_path, rel_path, mtime, size, file_hash = futures[future]
195
+ progress_idx = num_hits + idx
196
+ if progress_callback:
197
+ progress_callback(
198
+ PipelineStage.PARSING,
199
+ file_path,
200
+ progress_idx,
201
+ total_files,
202
+ )
203
+
204
+ try:
205
+ _, result, err_msg = future.result()
206
+ if err_msg:
207
+ logger.error(
208
+ f"Error parsing file {file_path} in worker: {err_msg}"
209
+ )
210
+ elif result:
211
+ extractions.append(result)
212
+ if file_hash:
213
+ new_cache_entries[rel_path] = CacheEntry(
214
+ mtime=mtime,
215
+ size=size,
216
+ hash=file_hash,
217
+ result=result,
218
+ )
219
+ except Exception as e:
220
+ logger.error(f"Failed to parse file {file_path}: {e}")
221
+ else:
222
+ logger.info(f"Parsing {len(files_to_parse)} files sequentially...")
223
+ for idx, (
224
+ file_path,
225
+ lang,
226
+ rel_path,
227
+ mtime,
228
+ size,
229
+ file_hash,
230
+ ) in enumerate(files_to_parse, start=1):
231
+ progress_idx = num_hits + idx
232
+ if progress_callback:
233
+ progress_callback(
234
+ PipelineStage.PARSING, file_path, progress_idx, total_files
235
+ )
236
+ try:
237
+ parser = get_parser(lang)
238
+ result = parser.parse_file(file_path, self.config.workspace_dir)
239
+ extractions.append(result)
240
+ if file_hash:
241
+ new_cache_entries[rel_path] = CacheEntry(
242
+ mtime=mtime, size=size, hash=file_hash, result=result
243
+ )
244
+ except Exception as e:
245
+ logger.error(f"Error parsing file {file_path}: {e}")
246
+
247
+ # 3. Build graph
248
+ if progress_callback:
249
+ progress_callback(PipelineStage.BUILDING, None, 0, 0)
250
+ G = build_graph(extractions, self.config.workspace_dir)
251
+
252
+ # 4. Component clustering
253
+ if progress_callback:
254
+ progress_callback(PipelineStage.CLUSTERING, None, 0, 0)
255
+ components, cohesion_scores, component_names = detect_components(G)
256
+
257
+ # 5. Graph analysis
258
+ if progress_callback:
259
+ progress_callback(PipelineStage.ANALYZING, None, 0, 0)
260
+ analysis = analyze_graph(G, components)
261
+
262
+ # 6. Render pages in memory
263
+ if progress_callback:
264
+ progress_callback(PipelineStage.RENDERING, None, 0, 0)
265
+ node_component_map = {}
266
+ for cid, members in components.items():
267
+ comp_name = component_names.get(cid, f"Component {cid}")
268
+ for member in members:
269
+ node_component_map[member] = comp_name
270
+
271
+ rendered_nodes = {}
272
+ for nid, ndata in G.nodes(data=True):
273
+ fname = get_node_filename(nid)
274
+ content = self.renderer.render_node_page(nid, ndata, G, node_component_map)
275
+ rendered_nodes[fname] = content
276
+
277
+ rendered_components = {}
278
+ for cid, members in components.items():
279
+ comp_name = component_names[cid]
280
+ cohesion = cohesion_scores[cid]
281
+ fname = get_component_filename(comp_name)
282
+ content = self.renderer.render_component_page(
283
+ cid,
284
+ members,
285
+ G,
286
+ cohesion,
287
+ comp_name,
288
+ analysis.inter_comp_deps,
289
+ component_names,
290
+ )
291
+ rendered_components[fname] = content
292
+
293
+ # Check if README already has AI Insights and preserve it
294
+ ai_insights = None
295
+ readme_path = self.config.absolute_output_dir / "README.md"
296
+ if readme_path.exists():
297
+ try:
298
+ old_readme = readme_path.read_text(encoding="utf-8")
299
+ marker = None
300
+ for m in (
301
+ "## AI Architectural Insights",
302
+ "## AI 架构深度洞察 (AI Architectural Insights)",
303
+ "## AI 架构深度洞察",
304
+ ):
305
+ if m in old_readme:
306
+ marker = m
307
+ break
308
+ if marker:
309
+ parts = old_readme.split(marker, 1)
310
+ insights_text = parts[1].strip()
311
+ if insights_text:
312
+ ai_insights = insights_text
313
+ except Exception as e:
314
+ logger.warning(
315
+ f"Could not read existing README.md to preserve AI insights: {e}"
316
+ )
317
+
318
+ readme_content = self.renderer.render_readme(
319
+ G,
320
+ components,
321
+ cohesion_scores,
322
+ component_names,
323
+ analysis,
324
+ ai_insights=ai_insights,
325
+ )
326
+
327
+ prompt_content = self.renderer.render_agent_prompt(
328
+ G, components, cohesion_scores, component_names, analysis
329
+ )
330
+
331
+ # 7. Write vault to disk
332
+ if progress_callback:
333
+ progress_callback(PipelineStage.WRITING, None, 0, 0)
334
+ self.writer.write_vault(
335
+ self.config.absolute_output_dir,
336
+ rendered_nodes,
337
+ rendered_components,
338
+ readme_content,
339
+ prompt_content,
340
+ )
341
+
342
+ # Write updated cache back to disk
343
+ if self.config.use_cache:
344
+ try:
345
+ self.config.absolute_output_dir.mkdir(parents=True, exist_ok=True)
346
+ with open(cache_path, "w", encoding="utf-8") as f:
347
+ json.dump(
348
+ {k: v.model_dump() for k, v in new_cache_entries.items()},
349
+ f,
350
+ indent=2,
351
+ )
352
+ logger.info(f"Saved {len(new_cache_entries)} cache entries.")
353
+ except Exception as e:
354
+ logger.warning(f"Could not save cache: {e}")
355
+
356
+ if progress_callback:
357
+ progress_callback(PipelineStage.COMPLETED, None, 0, 0)
358
+
359
+ logger.info("Pipeline executed successfully.")
360
+ return PipelineResult(
361
+ graph=G,
362
+ files=files,
363
+ components=components,
364
+ cohesion_scores=cohesion_scores,
365
+ component_names=component_names,
366
+ analysis=analysis,
367
+ )
@@ -0,0 +1,27 @@
1
+ from codegraph_gen.parser.base import BaseParser
2
+ from codegraph_gen.parser.python import PythonParser
3
+ from codegraph_gen.parser.javascript import JavaScriptParser
4
+ from codegraph_gen.parser.go import GoParser
5
+ from codegraph_gen.parser.rust import RustParser
6
+ from codegraph_gen.parser.swift import SwiftParser
7
+ from codegraph_gen.parser.cpp import CParser, CppParser
8
+ from codegraph_gen.parser.kotlin import KotlinParser
9
+
10
+ PARSERS: dict[str, type[BaseParser]] = {
11
+ "python": PythonParser,
12
+ "javascript": JavaScriptParser,
13
+ "typescript": JavaScriptParser, # uses same tree-sitter parser
14
+ "go": GoParser,
15
+ "rust": RustParser,
16
+ "swift": SwiftParser,
17
+ "c": CParser,
18
+ "cpp": CppParser,
19
+ "kotlin": KotlinParser,
20
+ }
21
+
22
+
23
+ def get_parser(language: str) -> BaseParser:
24
+ """Returns an instance of the parser for the given language."""
25
+ if language not in PARSERS:
26
+ raise ValueError(f"Unsupported language: {language}")
27
+ return PARSERS[language]()
@@ -0,0 +1,38 @@
1
+ from abc import ABC, abstractmethod
2
+ from pathlib import Path
3
+ from pydantic import BaseModel
4
+
5
+
6
+ class NodeSchema(BaseModel):
7
+ id: str # Unique identifier, e.g. "relative_path::symbol_name"
8
+ label: str # Human readable name, e.g. "my_function"
9
+ type: str # 'file', 'class', 'function', 'method', 'struct', 'interface', 'trait', 'protocol'
10
+ source_file: str # Path relative to workspace
11
+ line_start: int # 1-indexed
12
+ line_end: int # 1-indexed
13
+ signature: str # Signature snippet
14
+ docstring: str = "" # Docstring or comments
15
+ local_bindings: dict[
16
+ str, str
17
+ ] = {} # Maps local variable/parameter name to its type name
18
+
19
+
20
+ class EdgeSchema(BaseModel):
21
+ source: str # Source node ID
22
+ target: str # Target node ID
23
+ relation: str # 'contains', 'imports', 'calls', 'inherits', 'implements'
24
+ import_map: dict[str, str] = {} # Maps local name to original symbol name
25
+
26
+
27
+ class ExtractionResult(BaseModel):
28
+ nodes: list[NodeSchema] = []
29
+ edges: list[EdgeSchema] = []
30
+
31
+
32
+ class BaseParser(ABC):
33
+ """Abstract base class for all language-specific AST parsers."""
34
+
35
+ @abstractmethod
36
+ def parse_file(self, file_path: Path, workspace_dir: Path) -> ExtractionResult:
37
+ """Parses a file and extracts symbols (nodes) and relations (edges)."""
38
+ pass