superlocalmemory 3.3.20 → 3.3.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. package/package.json +1 -1
  2. package/pyproject.toml +9 -1
  3. package/src/superlocalmemory/cli/commands.py +138 -22
  4. package/src/superlocalmemory/cli/daemon.py +372 -0
  5. package/src/superlocalmemory/cli/main.py +8 -0
  6. package/src/superlocalmemory/cli/pending_store.py +158 -0
  7. package/src/superlocalmemory/cli/setup_wizard.py +39 -6
  8. package/src/superlocalmemory/code_graph/__init__.py +46 -0
  9. package/src/superlocalmemory/code_graph/blast_radius.py +177 -0
  10. package/src/superlocalmemory/code_graph/bridge/__init__.py +36 -0
  11. package/src/superlocalmemory/code_graph/bridge/entity_resolver.py +464 -0
  12. package/src/superlocalmemory/code_graph/bridge/event_listeners.py +195 -0
  13. package/src/superlocalmemory/code_graph/bridge/fact_enricher.py +159 -0
  14. package/src/superlocalmemory/code_graph/bridge/hebbian_linker.py +170 -0
  15. package/src/superlocalmemory/code_graph/bridge/temporal_checker.py +152 -0
  16. package/src/superlocalmemory/code_graph/changes.py +363 -0
  17. package/src/superlocalmemory/code_graph/communities.py +299 -0
  18. package/src/superlocalmemory/code_graph/config.py +88 -0
  19. package/src/superlocalmemory/code_graph/database.py +482 -0
  20. package/src/superlocalmemory/code_graph/extractors/__init__.py +78 -0
  21. package/src/superlocalmemory/code_graph/extractors/python.py +413 -0
  22. package/src/superlocalmemory/code_graph/extractors/typescript.py +556 -0
  23. package/src/superlocalmemory/code_graph/flows.py +350 -0
  24. package/src/superlocalmemory/code_graph/git_hooks.py +226 -0
  25. package/src/superlocalmemory/code_graph/graph_engine.py +295 -0
  26. package/src/superlocalmemory/code_graph/graph_store.py +158 -0
  27. package/src/superlocalmemory/code_graph/incremental.py +200 -0
  28. package/src/superlocalmemory/code_graph/models.py +130 -0
  29. package/src/superlocalmemory/code_graph/parser.py +507 -0
  30. package/src/superlocalmemory/code_graph/resolver.py +321 -0
  31. package/src/superlocalmemory/code_graph/search.py +460 -0
  32. package/src/superlocalmemory/code_graph/service.py +95 -0
  33. package/src/superlocalmemory/code_graph/watcher.py +207 -0
  34. package/src/superlocalmemory/core/embedding_worker.py +4 -2
  35. package/src/superlocalmemory/core/embeddings.py +8 -2
  36. package/src/superlocalmemory/core/engine.py +32 -0
  37. package/src/superlocalmemory/core/engine_wiring.py +5 -0
  38. package/src/superlocalmemory/core/store_pipeline.py +23 -1
  39. package/src/superlocalmemory/encoding/fact_extractor.py +68 -7
  40. package/src/superlocalmemory/infra/event_bus.py +5 -0
  41. package/src/superlocalmemory/mcp/server.py +23 -0
  42. package/src/superlocalmemory/mcp/tools_code_graph.py +1592 -0
  43. package/src/superlocalmemory/retrieval/engine.py +137 -2
  44. package/src/superlocalmemory/retrieval/semantic_channel.py +6 -2
  45. package/src/superlocalmemory/retrieval/spreading_activation.py +5 -3
  46. package/src/superlocalmemory/retrieval/strategy.py +16 -0
  47. package/src/superlocalmemory/server/api.py +4 -2
  48. package/src/superlocalmemory/server/ui.py +5 -2
  49. package/src/superlocalmemory/storage/schema_code_graph.py +239 -0
  50. package/src/superlocalmemory/ui/index.html +1879 -0
  51. package/src/superlocalmemory/ui/js/agents.js +192 -0
  52. package/src/superlocalmemory/ui/js/auto-settings.js +399 -0
  53. package/src/superlocalmemory/ui/js/behavioral.js +276 -0
  54. package/src/superlocalmemory/ui/js/clusters.js +206 -0
  55. package/src/superlocalmemory/ui/js/compliance.js +252 -0
  56. package/src/superlocalmemory/ui/js/core.js +246 -0
  57. package/src/superlocalmemory/ui/js/dashboard.js +110 -0
  58. package/src/superlocalmemory/ui/js/events.js +178 -0
  59. package/src/superlocalmemory/ui/js/fact-detail.js +92 -0
  60. package/src/superlocalmemory/ui/js/feedback.js +333 -0
  61. package/src/superlocalmemory/ui/js/graph-core.js +447 -0
  62. package/src/superlocalmemory/ui/js/graph-filters.js +220 -0
  63. package/src/superlocalmemory/ui/js/graph-interactions.js +351 -0
  64. package/src/superlocalmemory/ui/js/graph-ui.js +214 -0
  65. package/src/superlocalmemory/ui/js/ide-status.js +102 -0
  66. package/src/superlocalmemory/ui/js/init.js +45 -0
  67. package/src/superlocalmemory/ui/js/learning.js +435 -0
  68. package/src/superlocalmemory/ui/js/lifecycle.js +298 -0
  69. package/src/superlocalmemory/ui/js/math-health.js +98 -0
  70. package/src/superlocalmemory/ui/js/memories.js +264 -0
  71. package/src/superlocalmemory/ui/js/modal.js +357 -0
  72. package/src/superlocalmemory/ui/js/patterns.js +93 -0
  73. package/src/superlocalmemory/ui/js/profiles.js +236 -0
  74. package/src/superlocalmemory/ui/js/recall-lab.js +292 -0
  75. package/src/superlocalmemory/ui/js/search.js +59 -0
  76. package/src/superlocalmemory/ui/js/settings.js +224 -0
  77. package/src/superlocalmemory/ui/js/timeline.js +32 -0
  78. package/src/superlocalmemory/ui/js/trust-dashboard.js +73 -0
@@ -0,0 +1,130 @@
1
+ # Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
2
+ # Licensed under the MIT License - see LICENSE file
3
+ # Part of SuperLocalMemory v3.4 — CodeGraph Module
4
+
5
+ """Data models for the CodeGraph module.
6
+
7
+ Frozen dataclasses + string enums. All immutable.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import time
13
+ from dataclasses import dataclass, field
14
+ from enum import Enum
15
+ from typing import Any
16
+
17
+ from superlocalmemory.storage.models import _new_id
18
+
19
+
20
+ # ---------------------------------------------------------------------------
21
+ # Enums
22
+ # ---------------------------------------------------------------------------
23
+
24
+ class NodeKind(str, Enum):
25
+ """Kind of code entity in the graph."""
26
+ FILE = "file"
27
+ CLASS = "class"
28
+ FUNCTION = "function"
29
+ METHOD = "method"
30
+ MODULE = "module"
31
+
32
+
33
+ class EdgeKind(str, Enum):
34
+ """Kind of relationship between code entities."""
35
+ CALLS = "calls"
36
+ IMPORTS = "imports"
37
+ INHERITS = "inherits"
38
+ CONTAINS = "contains"
39
+ TESTED_BY = "tested_by"
40
+ DEPENDS_ON = "depends_on"
41
+
42
+
43
+ class LinkType(str, Enum):
44
+ """Type of bridge link between code node and SLM memory."""
45
+ MENTIONS = "mentions"
46
+ DECISION_ABOUT = "decision_about"
47
+ BUG_FIX = "bug_fix"
48
+ REFACTOR = "refactor"
49
+ DESIGN_RATIONALE = "design_rationale"
50
+
51
+
52
+ # ---------------------------------------------------------------------------
53
+ # Frozen Dataclasses
54
+ # ---------------------------------------------------------------------------
55
+
56
+ @dataclass(frozen=True)
57
+ class GraphNode:
58
+ """A code entity in the graph (function, class, file, etc.)."""
59
+ node_id: str = field(default_factory=_new_id)
60
+ kind: NodeKind = NodeKind.FUNCTION
61
+ name: str = ""
62
+ qualified_name: str = ""
63
+ file_path: str = ""
64
+ line_start: int = 0
65
+ line_end: int = 0
66
+ language: str = ""
67
+ parent_name: str | None = None
68
+ signature: str | None = None
69
+ docstring: str | None = None
70
+ is_test: bool = False
71
+ content_hash: str | None = None
72
+ community_id: int | None = None
73
+ extra_json: str = "{}"
74
+ created_at: float = field(default_factory=time.time)
75
+ updated_at: float = field(default_factory=time.time)
76
+
77
+
78
+ @dataclass(frozen=True)
79
+ class GraphEdge:
80
+ """A relationship between two code entities."""
81
+ edge_id: str = field(default_factory=_new_id)
82
+ kind: EdgeKind = EdgeKind.CALLS
83
+ source_node_id: str = ""
84
+ target_node_id: str = ""
85
+ file_path: str = ""
86
+ line: int = 0
87
+ confidence: float = 1.0
88
+ extra_json: str = "{}"
89
+ created_at: float = field(default_factory=time.time)
90
+ updated_at: float = field(default_factory=time.time)
91
+
92
+
93
+ @dataclass(frozen=True)
94
+ class FileRecord:
95
+ """Tracking record for a parsed source file."""
96
+ file_path: str = ""
97
+ content_hash: str = ""
98
+ mtime: float = 0.0
99
+ language: str = ""
100
+ node_count: int = 0
101
+ edge_count: int = 0
102
+ last_indexed: float = field(default_factory=time.time)
103
+
104
+
105
+ @dataclass(frozen=True)
106
+ class CodeMemoryLink:
107
+ """Bridge link between a code graph node and an SLM memory fact."""
108
+ link_id: str = field(default_factory=_new_id)
109
+ code_node_id: str = ""
110
+ slm_fact_id: str = ""
111
+ slm_entity_id: str | None = None
112
+ link_type: LinkType = LinkType.MENTIONS
113
+ confidence: float = 0.8
114
+ created_at: str = ""
115
+ last_verified: str | None = None
116
+ is_stale: bool = False
117
+
118
+
119
+ # ---------------------------------------------------------------------------
120
+ # Parse result containers (used by parser → database pipeline)
121
+ # ---------------------------------------------------------------------------
122
+
123
+ @dataclass(frozen=True)
124
+ class ParseResult:
125
+ """Result of parsing a single file."""
126
+ file_path: str
127
+ nodes: tuple[GraphNode, ...]
128
+ edges: tuple[GraphEdge, ...]
129
+ file_record: FileRecord
130
+ errors: tuple[str, ...] = ()
@@ -0,0 +1,507 @@
1
+ # Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
2
+ # Licensed under the MIT License - see LICENSE file
3
+ # Part of SuperLocalMemory v3.4 — CodeGraph Module
4
+
5
+ """Multi-language tree-sitter parser with parallel execution.
6
+
7
+ Dispatches to language-specific extractors (Python, TypeScript).
8
+ Uses ProcessPoolExecutor for CPU-bound parallel file parsing.
9
+
10
+ tree-sitter imports are lazy (HR-07): only imported when parse_file
11
+ is called, never at module-level or package import time.
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ import hashlib
17
+ import logging
18
+ import os
19
+ import time
20
+ from concurrent.futures import ProcessPoolExecutor, as_completed
21
+ from fnmatch import fnmatch
22
+ from pathlib import Path
23
+ from typing import Any
24
+
25
+ from superlocalmemory.code_graph.config import CodeGraphConfig
26
+ from superlocalmemory.code_graph.models import (
27
+ EdgeKind,
28
+ FileRecord,
29
+ GraphEdge,
30
+ GraphNode,
31
+ NodeKind,
32
+ ParseResult,
33
+ )
34
+ from superlocalmemory.storage.models import _new_id
35
+
36
+ logger = logging.getLogger(__name__)
37
+
38
+
39
+ class UnsupportedLanguageError(Exception):
40
+ """Raised when a language is not supported."""
41
+
42
+
43
+ class ParseError(Exception):
44
+ """Raised when tree-sitter parsing fails."""
45
+
46
+
47
+ def _is_test_file(file_path: str, config: CodeGraphConfig) -> bool:
48
+ """Check if a file is a test file based on config patterns."""
49
+ name = Path(file_path).name
50
+ parts = Path(file_path).parts
51
+ test_patterns = [
52
+ "test_*.py", "*_test.py",
53
+ "*.test.ts", "*.test.tsx", "*.spec.ts", "*.spec.tsx",
54
+ ]
55
+ for pattern in test_patterns:
56
+ if fnmatch(name, pattern):
57
+ return True
58
+ # Check directory patterns
59
+ test_dirs = {"tests", "test", "__tests__", "spec"}
60
+ return bool(test_dirs.intersection(parts))
61
+
62
+
63
+ def _sha256(data: bytes) -> str:
64
+ """Compute SHA-256 hex digest."""
65
+ return hashlib.sha256(data).hexdigest()
66
+
67
+
68
+ def _make_qualified_name(
69
+ file_path: str, name: str, parent_name: str | None
70
+ ) -> str:
71
+ if parent_name:
72
+ return f"{file_path}::{parent_name}.{name}"
73
+ return f"{file_path}::{name}"
74
+
75
+
76
+ # ---------------------------------------------------------------------------
77
+ # Module-level parse function (picklable for ProcessPoolExecutor)
78
+ # ---------------------------------------------------------------------------
79
+
80
+ def _parse_file_standalone(
81
+ file_path_str: str,
82
+ source_bytes: bytes,
83
+ language: str,
84
+ config_dict: dict[str, Any],
85
+ ) -> dict[str, Any]:
86
+ """Parse a single file. Module-level function for pickling.
87
+
88
+ Returns a serializable dict with nodes, edges, errors.
89
+ """
90
+ try:
91
+ # Lazy import (HR-07)
92
+ from tree_sitter_language_pack import get_parser # noqa: F811
93
+
94
+ parser_instance = get_parser(language)
95
+ tree = parser_instance.parse(source_bytes)
96
+ root = tree.root_node
97
+
98
+ # Create a minimal config for the extractor
99
+ config = CodeGraphConfig(**{
100
+ k: v for k, v in config_dict.items()
101
+ if k in CodeGraphConfig.__dataclass_fields__
102
+ })
103
+
104
+ # Select extractor
105
+ if language == "python":
106
+ from superlocalmemory.code_graph.extractors.python import PythonExtractor
107
+ extractor = PythonExtractor(root, source_bytes, file_path_str, config)
108
+ elif language in ("typescript", "tsx", "javascript", "jsx"):
109
+ from superlocalmemory.code_graph.extractors.typescript import TypeScriptExtractor
110
+ extractor = TypeScriptExtractor(root, source_bytes, file_path_str, config)
111
+ else:
112
+ return {"nodes": [], "edges": [], "errors": [f"Unsupported language: {language}"]}
113
+
114
+ nodes, edges = extractor.extract()
115
+
116
+ return {
117
+ "nodes": nodes,
118
+ "edges": edges,
119
+ "errors": [],
120
+ }
121
+ except Exception as exc:
122
+ return {
123
+ "nodes": [],
124
+ "edges": [],
125
+ "errors": [str(exc)],
126
+ }
127
+
128
+
129
+ class CodeParser:
130
+ """Multi-language tree-sitter parser with parallel execution."""
131
+
132
+ def __init__(self, config: CodeGraphConfig) -> None:
133
+ """Store config. Does not import tree-sitter yet (lazy)."""
134
+ self._config = config
135
+
136
+ def discover_files(self, repo_root: Path) -> list[Path]:
137
+ """Find all parseable files under repo_root.
138
+
139
+ Returns relative paths sorted alphabetically.
140
+ Raises FileNotFoundError if repo_root does not exist.
141
+ """
142
+ if not repo_root.exists():
143
+ raise FileNotFoundError(f"Repository root does not exist: {repo_root}")
144
+
145
+ results: list[Path] = []
146
+ exclude_dirs = self._config.exclude_dirs
147
+
148
+ for dirpath, dirnames, filenames in os.walk(repo_root):
149
+ # Prune excluded directories (modifying dirnames in-place)
150
+ dirnames[:] = [
151
+ d for d in dirnames
152
+ if d not in exclude_dirs
153
+ and not any(fnmatch(d, p) for p in exclude_dirs)
154
+ ]
155
+
156
+ for filename in filenames:
157
+ # Check extension
158
+ ext = Path(filename).suffix
159
+ if ext not in self._config.extension_map:
160
+ continue
161
+
162
+ # Check file size
163
+ full_path = Path(dirpath) / filename
164
+ try:
165
+ size = full_path.stat().st_size
166
+ except OSError:
167
+ continue
168
+
169
+ if size > self._config.max_file_size_bytes:
170
+ logger.warning(
171
+ "Skipping large file (%d bytes): %s", size, full_path
172
+ )
173
+ continue
174
+
175
+ # Check exclude patterns
176
+ rel = full_path.relative_to(repo_root)
177
+ skip = False
178
+ for pattern in self._config.exclude_patterns:
179
+ if fnmatch(str(rel), pattern) or fnmatch(filename, pattern):
180
+ skip = True
181
+ break
182
+ if skip:
183
+ continue
184
+
185
+ results.append(rel)
186
+
187
+ return sorted(results)
188
+
189
+ def parse_file(
190
+ self,
191
+ file_path: Path,
192
+ source_bytes: bytes,
193
+ language: str,
194
+ ) -> tuple[list[GraphNode], list[GraphEdge]]:
195
+ """Parse a single file and return extracted nodes and edges.
196
+
197
+ Raises UnsupportedLanguageError if language is not supported.
198
+ """
199
+ supported = {"python", "typescript", "tsx", "javascript", "jsx"}
200
+ if language not in supported:
201
+ raise UnsupportedLanguageError(f"Unsupported language: {language}")
202
+
203
+ try:
204
+ from tree_sitter_language_pack import get_parser
205
+ except ImportError as exc:
206
+ raise ImportError(
207
+ "tree-sitter required. Install: pip install 'superlocalmemory[code-graph]'"
208
+ ) from exc
209
+
210
+ parser = get_parser(language)
211
+ tree = parser.parse(source_bytes)
212
+ root = tree.root_node
213
+
214
+ file_path_str = str(file_path)
215
+ content_hash = _sha256(source_bytes)
216
+
217
+ # Create File node
218
+ file_node = GraphNode(
219
+ node_id=_new_id(),
220
+ kind=NodeKind.FILE,
221
+ name=file_path.name,
222
+ qualified_name=file_path_str,
223
+ file_path=file_path_str,
224
+ line_start=0,
225
+ line_end=root.end_point[0],
226
+ language=language,
227
+ content_hash=content_hash,
228
+ )
229
+
230
+ # Select and run extractor
231
+ if language == "python":
232
+ from superlocalmemory.code_graph.extractors.python import PythonExtractor
233
+ extractor = PythonExtractor(root, source_bytes, file_path_str, self._config)
234
+ else:
235
+ from superlocalmemory.code_graph.extractors.typescript import TypeScriptExtractor
236
+ extractor = TypeScriptExtractor(root, source_bytes, file_path_str, self._config)
237
+
238
+ extracted_nodes, extracted_edges = extractor.extract()
239
+
240
+ # Check if test file
241
+ is_test = _is_test_file(file_path_str, self._config)
242
+
243
+ # Mark test functions
244
+ if is_test:
245
+ marked_nodes: list[GraphNode] = []
246
+ for node in extracted_nodes:
247
+ if node.kind in (NodeKind.FUNCTION, NodeKind.METHOD):
248
+ marked_nodes.append(GraphNode(
249
+ node_id=node.node_id,
250
+ kind=node.kind,
251
+ name=node.name,
252
+ qualified_name=node.qualified_name,
253
+ file_path=node.file_path,
254
+ line_start=node.line_start,
255
+ line_end=node.line_end,
256
+ language=node.language,
257
+ parent_name=node.parent_name,
258
+ signature=node.signature,
259
+ docstring=node.docstring,
260
+ is_test=True,
261
+ content_hash=node.content_hash,
262
+ extra_json=node.extra_json,
263
+ ))
264
+ else:
265
+ marked_nodes.append(node)
266
+ extracted_nodes = marked_nodes
267
+
268
+ all_nodes = [file_node] + extracted_nodes
269
+
270
+ # Generate CONTAINS edges
271
+ contains_edges = self._generate_contains_edges(file_node, extracted_nodes)
272
+
273
+ # Generate TESTED_BY edges
274
+ tested_by_edges = self._generate_tested_by_edges(
275
+ extracted_nodes, extracted_edges
276
+ )
277
+
278
+ all_edges = extracted_edges + contains_edges + tested_by_edges
279
+
280
+ return all_nodes, all_edges
281
+
282
+ def parse_all(
283
+ self, repo_root: Path
284
+ ) -> tuple[list[GraphNode], list[GraphEdge], list[FileRecord]]:
285
+ """Parse entire project in parallel.
286
+
287
+ Returns (all_nodes, all_edges, all_file_records).
288
+ """
289
+ files = self.discover_files(repo_root)
290
+ if not files:
291
+ return [], [], []
292
+
293
+ all_nodes: list[GraphNode] = []
294
+ all_edges: list[GraphEdge] = []
295
+ all_file_records: list[FileRecord] = []
296
+
297
+ # Read files and prepare tasks
298
+ tasks: list[tuple[Path, bytes, str]] = []
299
+ for rel_path in files:
300
+ full_path = repo_root / rel_path
301
+ try:
302
+ source_bytes = full_path.read_bytes()
303
+ except OSError as exc:
304
+ logger.warning("Failed to read %s: %s", full_path, exc)
305
+ continue
306
+
307
+ ext = rel_path.suffix
308
+ language = self._config.extension_map.get(ext)
309
+ if language is None:
310
+ continue
311
+
312
+ tasks.append((rel_path, source_bytes, language))
313
+
314
+ # Parse with ProcessPoolExecutor for parallel CPU-bound work
315
+ # For small numbers of files, run sequentially to avoid overhead
316
+ if len(tasks) <= 2:
317
+ for rel_path, source_bytes, language in tasks:
318
+ try:
319
+ nodes, edges = self.parse_file(rel_path, source_bytes, language)
320
+ all_nodes.extend(nodes)
321
+ all_edges.extend(edges)
322
+ all_file_records.append(FileRecord(
323
+ file_path=str(rel_path),
324
+ content_hash=_sha256(source_bytes),
325
+ mtime=(repo_root / rel_path).stat().st_mtime,
326
+ language=language,
327
+ node_count=len(nodes),
328
+ edge_count=len(edges),
329
+ last_indexed=time.time(),
330
+ ))
331
+ except Exception as exc:
332
+ logger.warning("Failed to parse %s: %s", rel_path, exc)
333
+ return all_nodes, all_edges, all_file_records
334
+
335
+ # Parallel execution
336
+ config_dict = {
337
+ field_name: getattr(self._config, field_name)
338
+ for field_name in CodeGraphConfig.__dataclass_fields__
339
+ if not isinstance(getattr(self._config, field_name), Path)
340
+ }
341
+ # Convert Path fields to strings
342
+ config_dict["repo_root"] = str(self._config.repo_root)
343
+
344
+ workers = min(self._config.parallel_workers, len(tasks))
345
+ with ProcessPoolExecutor(max_workers=workers) as executor:
346
+ future_map = {}
347
+ for rel_path, source_bytes, language in tasks:
348
+ future = executor.submit(
349
+ _parse_file_standalone,
350
+ str(rel_path),
351
+ source_bytes,
352
+ language,
353
+ config_dict,
354
+ )
355
+ future_map[future] = (rel_path, source_bytes, language)
356
+
357
+ for future in as_completed(future_map):
358
+ rel_path, source_bytes, language = future_map[future]
359
+ try:
360
+ result = future.result(timeout=self._config.parse_timeout_seconds)
361
+ except Exception as exc:
362
+ logger.warning("Parse failed for %s: %s", rel_path, exc)
363
+ continue
364
+
365
+ if result["errors"]:
366
+ for err in result["errors"]:
367
+ logger.warning("Parse error in %s: %s", rel_path, err)
368
+ if not result["nodes"]:
369
+ continue
370
+
371
+ file_nodes = result["nodes"]
372
+ file_edges = result["edges"]
373
+
374
+ # Build the full parse result with file node and CONTAINS edges
375
+ file_path_str = str(rel_path)
376
+ content_hash = _sha256(source_bytes)
377
+
378
+ file_node = GraphNode(
379
+ node_id=_new_id(),
380
+ kind=NodeKind.FILE,
381
+ name=rel_path.name,
382
+ qualified_name=file_path_str,
383
+ file_path=file_path_str,
384
+ line_start=0,
385
+ line_end=0,
386
+ language=language,
387
+ content_hash=content_hash,
388
+ )
389
+
390
+ is_test = _is_test_file(file_path_str, self._config)
391
+ if is_test:
392
+ marked: list[GraphNode] = []
393
+ for n in file_nodes:
394
+ if n.kind in (NodeKind.FUNCTION, NodeKind.METHOD):
395
+ marked.append(GraphNode(
396
+ node_id=n.node_id, kind=n.kind, name=n.name,
397
+ qualified_name=n.qualified_name,
398
+ file_path=n.file_path,
399
+ line_start=n.line_start, line_end=n.line_end,
400
+ language=n.language, parent_name=n.parent_name,
401
+ signature=n.signature, docstring=n.docstring,
402
+ is_test=True, content_hash=n.content_hash,
403
+ extra_json=n.extra_json,
404
+ ))
405
+ else:
406
+ marked.append(n)
407
+ file_nodes = marked
408
+
409
+ contains = self._generate_contains_edges(file_node, file_nodes)
410
+ tested_by = self._generate_tested_by_edges(file_nodes, file_edges)
411
+
412
+ final_nodes = [file_node] + file_nodes
413
+ final_edges = file_edges + contains + tested_by
414
+
415
+ all_nodes.extend(final_nodes)
416
+ all_edges.extend(final_edges)
417
+
418
+ try:
419
+ mtime = (repo_root / rel_path).stat().st_mtime
420
+ except OSError:
421
+ mtime = 0.0
422
+
423
+ all_file_records.append(FileRecord(
424
+ file_path=file_path_str,
425
+ content_hash=content_hash,
426
+ mtime=mtime,
427
+ language=language,
428
+ node_count=len(final_nodes),
429
+ edge_count=len(final_edges),
430
+ last_indexed=time.time(),
431
+ ))
432
+
433
+ return all_nodes, all_edges, all_file_records
434
+
435
+ # ------------------------------------------------------------------
436
+ # Private helpers
437
+ # ------------------------------------------------------------------
438
+
439
+ @staticmethod
440
+ def _generate_contains_edges(
441
+ file_node: GraphNode, extracted_nodes: list[GraphNode]
442
+ ) -> list[GraphEdge]:
443
+ """Generate CONTAINS edges: File -> top-level, parent -> child."""
444
+ edges: list[GraphEdge] = []
445
+ # Build name -> node_id map for parent lookup
446
+ name_to_id: dict[str, str] = {}
447
+ for node in extracted_nodes:
448
+ name_to_id[node.name] = node.node_id
449
+
450
+ for node in extracted_nodes:
451
+ if node.parent_name is None:
452
+ # Top-level: File contains this node
453
+ edges.append(GraphEdge(
454
+ edge_id=_new_id(),
455
+ kind=EdgeKind.CONTAINS,
456
+ source_node_id=file_node.node_id,
457
+ target_node_id=node.node_id,
458
+ file_path=file_node.file_path,
459
+ line=node.line_start,
460
+ ))
461
+ else:
462
+ # Child: parent contains this node
463
+ parent_id = name_to_id.get(node.parent_name)
464
+ if parent_id:
465
+ edges.append(GraphEdge(
466
+ edge_id=_new_id(),
467
+ kind=EdgeKind.CONTAINS,
468
+ source_node_id=parent_id,
469
+ target_node_id=node.node_id,
470
+ file_path=file_node.file_path,
471
+ line=node.line_start,
472
+ ))
473
+ else:
474
+ # Fallback: File contains
475
+ edges.append(GraphEdge(
476
+ edge_id=_new_id(),
477
+ kind=EdgeKind.CONTAINS,
478
+ source_node_id=file_node.node_id,
479
+ target_node_id=node.node_id,
480
+ file_path=file_node.file_path,
481
+ line=node.line_start,
482
+ ))
483
+ return edges
484
+
485
+ @staticmethod
486
+ def _generate_tested_by_edges(
487
+ nodes: list[GraphNode], edges: list[GraphEdge]
488
+ ) -> list[GraphEdge]:
489
+ """Generate TESTED_BY edges: for each CALLS from test to non-test."""
490
+ test_node_ids = {n.node_id for n in nodes if n.is_test}
491
+ if not test_node_ids:
492
+ return []
493
+
494
+ tested_by: list[GraphEdge] = []
495
+ for edge in edges:
496
+ if edge.kind == EdgeKind.CALLS and edge.source_node_id in test_node_ids:
497
+ # Only if target is not a test node
498
+ if edge.target_node_id not in test_node_ids:
499
+ tested_by.append(GraphEdge(
500
+ edge_id=_new_id(),
501
+ kind=EdgeKind.TESTED_BY,
502
+ source_node_id=edge.target_node_id,
503
+ target_node_id=edge.source_node_id,
504
+ file_path=edge.file_path,
505
+ line=edge.line,
506
+ ))
507
+ return tested_by