superlocalmemory 3.3.20 → 3.3.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. package/package.json +1 -1
  2. package/pyproject.toml +9 -1
  3. package/src/superlocalmemory/cli/commands.py +138 -22
  4. package/src/superlocalmemory/cli/daemon.py +372 -0
  5. package/src/superlocalmemory/cli/main.py +8 -0
  6. package/src/superlocalmemory/cli/pending_store.py +158 -0
  7. package/src/superlocalmemory/cli/setup_wizard.py +39 -6
  8. package/src/superlocalmemory/code_graph/__init__.py +46 -0
  9. package/src/superlocalmemory/code_graph/blast_radius.py +177 -0
  10. package/src/superlocalmemory/code_graph/bridge/__init__.py +36 -0
  11. package/src/superlocalmemory/code_graph/bridge/entity_resolver.py +464 -0
  12. package/src/superlocalmemory/code_graph/bridge/event_listeners.py +195 -0
  13. package/src/superlocalmemory/code_graph/bridge/fact_enricher.py +159 -0
  14. package/src/superlocalmemory/code_graph/bridge/hebbian_linker.py +170 -0
  15. package/src/superlocalmemory/code_graph/bridge/temporal_checker.py +152 -0
  16. package/src/superlocalmemory/code_graph/changes.py +363 -0
  17. package/src/superlocalmemory/code_graph/communities.py +299 -0
  18. package/src/superlocalmemory/code_graph/config.py +88 -0
  19. package/src/superlocalmemory/code_graph/database.py +482 -0
  20. package/src/superlocalmemory/code_graph/extractors/__init__.py +78 -0
  21. package/src/superlocalmemory/code_graph/extractors/python.py +413 -0
  22. package/src/superlocalmemory/code_graph/extractors/typescript.py +556 -0
  23. package/src/superlocalmemory/code_graph/flows.py +350 -0
  24. package/src/superlocalmemory/code_graph/git_hooks.py +226 -0
  25. package/src/superlocalmemory/code_graph/graph_engine.py +295 -0
  26. package/src/superlocalmemory/code_graph/graph_store.py +158 -0
  27. package/src/superlocalmemory/code_graph/incremental.py +200 -0
  28. package/src/superlocalmemory/code_graph/models.py +130 -0
  29. package/src/superlocalmemory/code_graph/parser.py +507 -0
  30. package/src/superlocalmemory/code_graph/resolver.py +321 -0
  31. package/src/superlocalmemory/code_graph/search.py +460 -0
  32. package/src/superlocalmemory/code_graph/service.py +95 -0
  33. package/src/superlocalmemory/code_graph/watcher.py +207 -0
  34. package/src/superlocalmemory/core/embedding_worker.py +4 -2
  35. package/src/superlocalmemory/core/embeddings.py +8 -2
  36. package/src/superlocalmemory/core/engine.py +32 -0
  37. package/src/superlocalmemory/core/engine_wiring.py +5 -0
  38. package/src/superlocalmemory/core/store_pipeline.py +23 -1
  39. package/src/superlocalmemory/encoding/fact_extractor.py +68 -7
  40. package/src/superlocalmemory/infra/event_bus.py +5 -0
  41. package/src/superlocalmemory/mcp/server.py +23 -0
  42. package/src/superlocalmemory/mcp/tools_code_graph.py +1592 -0
  43. package/src/superlocalmemory/retrieval/engine.py +137 -2
  44. package/src/superlocalmemory/retrieval/semantic_channel.py +6 -2
  45. package/src/superlocalmemory/retrieval/spreading_activation.py +5 -3
  46. package/src/superlocalmemory/retrieval/strategy.py +16 -0
  47. package/src/superlocalmemory/server/api.py +4 -2
  48. package/src/superlocalmemory/server/ui.py +5 -2
  49. package/src/superlocalmemory/storage/schema_code_graph.py +239 -0
  50. package/src/superlocalmemory/ui/index.html +1879 -0
  51. package/src/superlocalmemory/ui/js/agents.js +192 -0
  52. package/src/superlocalmemory/ui/js/auto-settings.js +399 -0
  53. package/src/superlocalmemory/ui/js/behavioral.js +276 -0
  54. package/src/superlocalmemory/ui/js/clusters.js +206 -0
  55. package/src/superlocalmemory/ui/js/compliance.js +252 -0
  56. package/src/superlocalmemory/ui/js/core.js +246 -0
  57. package/src/superlocalmemory/ui/js/dashboard.js +110 -0
  58. package/src/superlocalmemory/ui/js/events.js +178 -0
  59. package/src/superlocalmemory/ui/js/fact-detail.js +92 -0
  60. package/src/superlocalmemory/ui/js/feedback.js +333 -0
  61. package/src/superlocalmemory/ui/js/graph-core.js +447 -0
  62. package/src/superlocalmemory/ui/js/graph-filters.js +220 -0
  63. package/src/superlocalmemory/ui/js/graph-interactions.js +351 -0
  64. package/src/superlocalmemory/ui/js/graph-ui.js +214 -0
  65. package/src/superlocalmemory/ui/js/ide-status.js +102 -0
  66. package/src/superlocalmemory/ui/js/init.js +45 -0
  67. package/src/superlocalmemory/ui/js/learning.js +435 -0
  68. package/src/superlocalmemory/ui/js/lifecycle.js +298 -0
  69. package/src/superlocalmemory/ui/js/math-health.js +98 -0
  70. package/src/superlocalmemory/ui/js/memories.js +264 -0
  71. package/src/superlocalmemory/ui/js/modal.js +357 -0
  72. package/src/superlocalmemory/ui/js/patterns.js +93 -0
  73. package/src/superlocalmemory/ui/js/profiles.js +236 -0
  74. package/src/superlocalmemory/ui/js/recall-lab.js +292 -0
  75. package/src/superlocalmemory/ui/js/search.js +59 -0
  76. package/src/superlocalmemory/ui/js/settings.js +224 -0
  77. package/src/superlocalmemory/ui/js/timeline.js +32 -0
  78. package/src/superlocalmemory/ui/js/trust-dashboard.js +73 -0
@@ -0,0 +1,295 @@
1
+ # Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
2
+ # Licensed under the MIT License - see LICENSE file
3
+ # Part of SuperLocalMemory v3.4 — CodeGraph Module
4
+
5
+ """GraphEngine — rustworkx in-memory directed graph.
6
+
7
+ Loads from SQLite via GraphStore, caches in a rustworkx PyDiGraph,
8
+ and provides O(1) node lookup + O(degree) traversals.
9
+
10
+ Cache invalidation: rebuild when store.version changes.
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ import logging
16
+ from dataclasses import dataclass, field
17
+ from typing import Any
18
+
19
+ from superlocalmemory.code_graph.graph_store import GraphStore
20
+ from superlocalmemory.code_graph.models import EdgeKind, GraphNode
21
+
22
+ logger = logging.getLogger(__name__)
23
+
24
+ try:
25
+ import rustworkx as rx # type: ignore[import-untyped]
26
+ except ImportError as _rx_err:
27
+ rx = None # type: ignore[assignment]
28
+ _RX_IMPORT_ERROR = _rx_err
29
+ else:
30
+ _RX_IMPORT_ERROR = None
31
+
32
+
33
+ class RustworkxNotInstalledError(ImportError):
34
+ """Raised when rustworkx is required but not installed."""
35
+
36
+
37
+ class NodeNotFoundError(KeyError):
38
+ """Raised when a node_id is not present in the graph."""
39
+
40
+
41
+ def _require_rustworkx() -> None:
42
+ """Guard: raise if rustworkx is not available."""
43
+ if rx is None:
44
+ raise RustworkxNotInstalledError(
45
+ "rustworkx is required for GraphEngine. "
46
+ "Install it with: pip install rustworkx"
47
+ ) from _RX_IMPORT_ERROR
48
+
49
+
50
+ # ---------------------------------------------------------------------------
51
+ # GraphIndex — bidirectional ID mapping
52
+ # ---------------------------------------------------------------------------
53
+
54
+ @dataclass(frozen=True)
55
+ class GraphIndex:
56
+ """Bidirectional mapping between SQLite TEXT node_id and rustworkx int index."""
57
+
58
+ id_to_rx: dict[str, int] = field(default_factory=dict)
59
+ rx_to_id: dict[int, str] = field(default_factory=dict)
60
+ qname_to_id: dict[str, str] = field(default_factory=dict)
61
+
62
+
63
+ # ---------------------------------------------------------------------------
64
+ # GraphEngine
65
+ # ---------------------------------------------------------------------------
66
+
67
+ class GraphEngine:
68
+ """In-memory directed graph backed by rustworkx PyDiGraph.
69
+
70
+ Lazily built from SQLite on first query. Automatically rebuilds
71
+ when the underlying GraphStore version changes (writes detected).
72
+ """
73
+
74
+ def __init__(self, store: GraphStore) -> None:
75
+ _require_rustworkx()
76
+ self._store = store
77
+ self._graph: rx.PyDiGraph | None = None # type: ignore[name-defined]
78
+ self._index: GraphIndex = GraphIndex()
79
+ self._graph_version: int = -1
80
+
81
+ # ------------------------------------------------------------------
82
+ # Graph lifecycle
83
+ # ------------------------------------------------------------------
84
+
85
+ def build_graph(self) -> rx.PyDiGraph: # type: ignore[name-defined]
86
+ """Load all nodes and edges from SQLite into a rustworkx PyDiGraph.
87
+
88
+ Returns the cached graph if the store version hasn't changed.
89
+ """
90
+ if (
91
+ self._graph is not None
92
+ and self._graph_version == self._store.version
93
+ ):
94
+ return self._graph
95
+
96
+ nodes, edges = self._store.get_all_nodes_and_edges()
97
+
98
+ graph = rx.PyDiGraph(multigraph=True)
99
+
100
+ id_to_rx: dict[str, int] = {}
101
+ rx_to_id: dict[int, str] = {}
102
+ qname_to_id: dict[str, str] = {}
103
+
104
+ # Add nodes
105
+ for node in nodes:
106
+ node_data: dict[str, Any] = {
107
+ "node_id": node.node_id,
108
+ "kind": node.kind.value,
109
+ "name": node.name,
110
+ "qualified_name": node.qualified_name,
111
+ "file_path": node.file_path,
112
+ "line_start": node.line_start,
113
+ "line_end": node.line_end,
114
+ "language": node.language,
115
+ "parent_name": node.parent_name,
116
+ "is_test": node.is_test,
117
+ "community_id": node.community_id,
118
+ }
119
+ rx_idx = graph.add_node(node_data)
120
+ id_to_rx[node.node_id] = rx_idx
121
+ rx_to_id[rx_idx] = node.node_id
122
+ qname_to_id[node.qualified_name] = node.node_id
123
+
124
+ # Add edges
125
+ for edge in edges:
126
+ src_rx = id_to_rx.get(edge.source_node_id)
127
+ tgt_rx = id_to_rx.get(edge.target_node_id)
128
+ if src_rx is None or tgt_rx is None:
129
+ logger.warning(
130
+ "Skipping dangling edge %s -> %s (kind=%s)",
131
+ edge.source_node_id, edge.target_node_id, edge.kind.value,
132
+ )
133
+ continue
134
+ edge_data: dict[str, Any] = {
135
+ "edge_id": edge.edge_id,
136
+ "kind": edge.kind.value,
137
+ "file_path": edge.file_path,
138
+ "line": edge.line,
139
+ "confidence": edge.confidence,
140
+ }
141
+ graph.add_edge(src_rx, tgt_rx, edge_data)
142
+
143
+ # Cache
144
+ self._graph = graph
145
+ self._index = GraphIndex(
146
+ id_to_rx=id_to_rx,
147
+ rx_to_id=rx_to_id,
148
+ qname_to_id=qname_to_id,
149
+ )
150
+ self._graph_version = self._store.version
151
+ logger.debug(
152
+ "Built graph: %d nodes, %d edges",
153
+ graph.num_nodes(), graph.num_edges(),
154
+ )
155
+ return graph
156
+
157
+ def invalidate(self) -> None:
158
+ """Force a graph rebuild on next access."""
159
+ self._graph = None
160
+ self._graph_version = -1
161
+
162
+ @property
163
+ def index(self) -> GraphIndex:
164
+ """Current graph index (builds graph if needed)."""
165
+ self._ensure_graph()
166
+ return self._index
167
+
168
+ @property
169
+ def graph(self) -> rx.PyDiGraph: # type: ignore[name-defined]
170
+ """Current rustworkx graph (builds if needed)."""
171
+ return self._ensure_graph()
172
+
173
+ # ------------------------------------------------------------------
174
+ # Query operations
175
+ # ------------------------------------------------------------------
176
+
177
+ def get_callers(
178
+ self,
179
+ node_id: str,
180
+ edge_kinds: set[str] | None = None,
181
+ ) -> list[dict[str, Any]]:
182
+ """Return all nodes that have edges pointing *to* node_id.
183
+
184
+ Each result is ``{"node": <node_data>, "edge": <edge_data>}``.
185
+ """
186
+ graph = self._ensure_graph()
187
+ rx_idx = self._resolve_rx(node_id)
188
+
189
+ results: list[dict[str, Any]] = []
190
+ for src_rx, _tgt_rx, edge_data in graph.in_edges(rx_idx):
191
+ if edge_kinds is not None and edge_data["kind"] not in edge_kinds:
192
+ continue
193
+ results.append({
194
+ "node": dict(graph[src_rx]),
195
+ "edge": dict(edge_data),
196
+ })
197
+ return results
198
+
199
+ def get_callees(
200
+ self,
201
+ node_id: str,
202
+ edge_kinds: set[str] | None = None,
203
+ ) -> list[dict[str, Any]]:
204
+ """Return all nodes that node_id has edges pointing *to*.
205
+
206
+ Each result is ``{"node": <node_data>, "edge": <edge_data>}``.
207
+ """
208
+ graph = self._ensure_graph()
209
+ rx_idx = self._resolve_rx(node_id)
210
+
211
+ results: list[dict[str, Any]] = []
212
+ for _src_rx, tgt_rx, edge_data in graph.out_edges(rx_idx):
213
+ if edge_kinds is not None and edge_data["kind"] not in edge_kinds:
214
+ continue
215
+ results.append({
216
+ "node": dict(graph[tgt_rx]),
217
+ "edge": dict(edge_data),
218
+ })
219
+ return results
220
+
221
+ def get_tests_for(self, node_id: str) -> list[dict[str, Any]]:
222
+ """Return test nodes associated with *node_id*.
223
+
224
+ Checks:
225
+ 1. Outgoing TESTED_BY edges from node_id
226
+ 2. Incoming CALLS edges from nodes where is_test=True
227
+ """
228
+ graph = self._ensure_graph()
229
+ rx_idx = self._resolve_rx(node_id)
230
+
231
+ seen_ids: set[str] = set()
232
+ results: list[dict[str, Any]] = []
233
+
234
+ # Outgoing TESTED_BY
235
+ for _src, tgt_rx, edge_data in graph.out_edges(rx_idx):
236
+ if edge_data["kind"] == EdgeKind.TESTED_BY.value:
237
+ tgt_data = dict(graph[tgt_rx])
238
+ tid = tgt_data["node_id"]
239
+ if tid not in seen_ids:
240
+ seen_ids.add(tid)
241
+ results.append(tgt_data)
242
+
243
+ # Incoming CALLS from test nodes
244
+ for src_rx, _tgt, edge_data in graph.in_edges(rx_idx):
245
+ if edge_data["kind"] == EdgeKind.CALLS.value:
246
+ src_data = dict(graph[src_rx])
247
+ if src_data.get("is_test") and src_data["node_id"] not in seen_ids:
248
+ seen_ids.add(src_data["node_id"])
249
+ results.append(src_data)
250
+
251
+ return results
252
+
253
+ def get_connected_component(self, node_id: str) -> list[str]:
254
+ """Return all node_ids in the same weakly-connected component.
255
+
256
+ Uses rustworkx.weakly_connected_components.
257
+ """
258
+ graph = self._ensure_graph()
259
+ rx_idx = self._resolve_rx(node_id)
260
+
261
+ components = rx.weakly_connected_components(graph)
262
+ for component in components:
263
+ if rx_idx in component:
264
+ return [self._index.rx_to_id[i] for i in component]
265
+
266
+ # Shouldn't reach here if node exists, but return just the node
267
+ return [node_id]
268
+
269
+ def get_node_data(self, node_id: str) -> dict[str, Any]:
270
+ """Return the node data dict for a node_id."""
271
+ graph = self._ensure_graph()
272
+ rx_idx = self._resolve_rx(node_id)
273
+ return dict(graph[rx_idx])
274
+
275
+ # ------------------------------------------------------------------
276
+ # Internals
277
+ # ------------------------------------------------------------------
278
+
279
+ def _ensure_graph(self) -> rx.PyDiGraph: # type: ignore[name-defined]
280
+ """Build graph if needed and return it."""
281
+ if (
282
+ self._graph is None
283
+ or self._graph_version != self._store.version
284
+ ):
285
+ return self.build_graph()
286
+ return self._graph
287
+
288
+ def _resolve_rx(self, node_id: str) -> int:
289
+ """Resolve a TEXT node_id to a rustworkx index. Raises NodeNotFoundError."""
290
+ rx_idx = self._index.id_to_rx.get(node_id)
291
+ if rx_idx is None:
292
+ raise NodeNotFoundError(
293
+ f"Node '{node_id}' not found in graph"
294
+ )
295
+ return rx_idx
@@ -0,0 +1,158 @@
1
+ # Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
2
+ # Licensed under the MIT License - see LICENSE file
3
+ # Part of SuperLocalMemory v3.4 — CodeGraph Module
4
+
5
+ """GraphStore — thin graph-specific layer over CodeGraphDatabase.
6
+
7
+ All graph writes go through this layer. Provides:
8
+ - Atomic file replacement (store_file_nodes_edges)
9
+ - Bulk read for in-memory graph building (get_all_nodes_and_edges)
10
+ - File removal (remove_file)
11
+ - Version tracking for cache invalidation
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ import logging
17
+ from typing import Sequence
18
+
19
+ from superlocalmemory.code_graph.database import CodeGraphDatabase
20
+ from superlocalmemory.code_graph.models import (
21
+ FileRecord,
22
+ GraphEdge,
23
+ GraphNode,
24
+ )
25
+
26
+ logger = logging.getLogger(__name__)
27
+
28
+
29
+ class GraphStore:
30
+ """SQLite persistence layer for graph nodes, edges, and file records.
31
+
32
+ Delegates to CodeGraphDatabase but adds higher-level operations
33
+ that Phase 2+ modules depend on (bulk load, atomic replace, version).
34
+ """
35
+
36
+ def __init__(self, db: CodeGraphDatabase) -> None:
37
+ self._db = db
38
+
39
+ # ------------------------------------------------------------------
40
+ # Properties
41
+ # ------------------------------------------------------------------
42
+
43
+ @property
44
+ def db(self) -> CodeGraphDatabase:
45
+ """Underlying database instance."""
46
+ return self._db
47
+
48
+ @property
49
+ def version(self) -> int:
50
+ """Monotonic write-version for cache invalidation."""
51
+ return self._db.version
52
+
53
+ # ------------------------------------------------------------------
54
+ # Write operations
55
+ # ------------------------------------------------------------------
56
+
57
+ def store_file_nodes_edges(
58
+ self,
59
+ file_path: str,
60
+ nodes: Sequence[GraphNode],
61
+ edges: Sequence[GraphEdge],
62
+ file_record: FileRecord,
63
+ ) -> None:
64
+ """Atomically replace all data for *file_path*.
65
+
66
+ Within a single transaction:
67
+ 1. Delete old edges for this file
68
+ 2. Delete old nodes for this file
69
+ 3. Insert new nodes
70
+ 4. Insert new edges
71
+ 5. Upsert file record
72
+
73
+ The database's ``store_file_parse_results`` already does this.
74
+ """
75
+ self._db.store_file_parse_results(
76
+ file_path,
77
+ list(nodes),
78
+ list(edges),
79
+ file_record,
80
+ )
81
+ logger.debug(
82
+ "Stored %d nodes, %d edges for %s",
83
+ len(nodes), len(edges), file_path,
84
+ )
85
+
86
+ def remove_file(self, file_path: str) -> None:
87
+ """Remove all graph data for *file_path*.
88
+
89
+ Deletes nodes (cascade → edges via FK), edges sourced from this
90
+ file, and the file record. All within a transaction.
91
+ """
92
+ with self._db.transaction():
93
+ self._db.delete_edges_by_file(file_path)
94
+ self._db.delete_nodes_by_file(file_path)
95
+ self._db.delete_file_record(file_path)
96
+ logger.debug("Removed all data for %s", file_path)
97
+
98
+ # ------------------------------------------------------------------
99
+ # Read operations
100
+ # ------------------------------------------------------------------
101
+
102
+ def get_all_nodes_and_edges(
103
+ self,
104
+ ) -> tuple[list[GraphNode], list[GraphEdge]]:
105
+ """Load every node and edge — used by GraphEngine.build_graph().
106
+
107
+ Returns (nodes, edges) as plain lists.
108
+ """
109
+ nodes = self._db.get_all_nodes()
110
+ edges = self._db.get_all_edges()
111
+ return nodes, edges
112
+
113
+ def get_nodes_by_file(self, file_path: str) -> list[GraphNode]:
114
+ """All nodes in *file_path*, ordered by line_start."""
115
+ return self._db.get_nodes_by_file(file_path)
116
+
117
+ def get_node(self, node_id: str) -> GraphNode | None:
118
+ """Single node by ID."""
119
+ return self._db.get_node(node_id)
120
+
121
+ def get_file_record(self, file_path: str) -> FileRecord | None:
122
+ """File record by path."""
123
+ return self._db.get_file_record(file_path)
124
+
125
+ def get_all_file_records(self) -> list[FileRecord]:
126
+ """All tracked file records."""
127
+ return self._db.get_all_file_records()
128
+
129
+ # ------------------------------------------------------------------
130
+ # Dependent tracing (used by IncrementalUpdater)
131
+ # ------------------------------------------------------------------
132
+
133
+ def find_dependents(self, file_path: str) -> set[str]:
134
+ """Return file paths that have edges *targeting* nodes in *file_path*.
135
+
136
+ Looks for IMPORTS, CALLS, INHERITS, DEPENDS_ON edges whose
137
+ target lives in *file_path* but whose source is in a *different* file.
138
+ """
139
+ rows = self._db.execute(
140
+ """
141
+ SELECT DISTINCT ge.file_path
142
+ FROM graph_edges ge
143
+ JOIN graph_nodes gn_target
144
+ ON ge.target_node_id = gn_target.node_id
145
+ WHERE gn_target.file_path = ?
146
+ AND ge.file_path != ?
147
+ """,
148
+ (file_path, file_path),
149
+ )
150
+ return {row["file_path"] for row in rows}
151
+
152
+ # ------------------------------------------------------------------
153
+ # Stats
154
+ # ------------------------------------------------------------------
155
+
156
+ def get_stats(self) -> dict[str, int]:
157
+ """Delegate to DB stats."""
158
+ return self._db.get_stats()
@@ -0,0 +1,200 @@
1
+ # Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
2
+ # Licensed under the MIT License - see LICENSE file
3
+ # Part of SuperLocalMemory v3.4 — CodeGraph Module
4
+
5
+ """IncrementalUpdater — hash-based change detection + dependent tracing.
6
+
7
+ Given a list of changed file paths:
8
+ 1. SHA-256 hash check to skip unchanged files
9
+ 2. Re-parse changed files
10
+ 3. Trace dependents via IMPORTS edges
11
+ 4. Re-parse dependent files (their edges may be stale)
12
+ 5. Invalidate the in-memory graph cache
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ import hashlib
18
+ import logging
19
+ import time
20
+ from dataclasses import dataclass, field
21
+ from pathlib import Path
22
+ from typing import Any, Protocol
23
+
24
+ from superlocalmemory.code_graph.graph_engine import GraphEngine
25
+ from superlocalmemory.code_graph.graph_store import GraphStore
26
+ from superlocalmemory.code_graph.models import (
27
+ FileRecord,
28
+ GraphEdge,
29
+ GraphNode,
30
+ ParseResult,
31
+ )
32
+
33
+ logger = logging.getLogger(__name__)
34
+
35
+
36
+ # ---------------------------------------------------------------------------
37
+ # Parser protocol (dependency inversion — no import of Phase 1 parser)
38
+ # ---------------------------------------------------------------------------
39
+
40
+ class ParserProtocol(Protocol):
41
+ """Minimal interface the IncrementalUpdater needs from a parser."""
42
+
43
+ def parse_file(self, file_path: str, repo_root: Path) -> ParseResult:
44
+ """Parse a single file and return nodes, edges, file record."""
45
+ ...
46
+
47
+
48
+ # ---------------------------------------------------------------------------
49
+ # Result dataclass
50
+ # ---------------------------------------------------------------------------
51
+
52
+ @dataclass(frozen=True)
53
+ class UpdateResult:
54
+ """Result of an incremental update run."""
55
+
56
+ parsed: int = 0
57
+ skipped: int = 0
58
+ deleted: int = 0
59
+ dependents_parsed: int = 0
60
+ errors: tuple[str, ...] = ()
61
+
62
+
63
+ # ---------------------------------------------------------------------------
64
+ # IncrementalUpdater
65
+ # ---------------------------------------------------------------------------
66
+
67
+ class IncrementalUpdater:
68
+ """Hash-based incremental graph updater.
69
+
70
+ Usage::
71
+
72
+ updater = IncrementalUpdater(store, engine)
73
+ result = updater.update(
74
+ changed_files=["src/foo.py", "src/bar.py"],
75
+ parser=my_parser,
76
+ repo_root=Path("/my/repo"),
77
+ )
78
+ """
79
+
80
+ def __init__(self, store: GraphStore, engine: GraphEngine) -> None:
81
+ self._store = store
82
+ self._engine = engine
83
+
84
+ def update(
85
+ self,
86
+ changed_files: list[str],
87
+ parser: ParserProtocol,
88
+ repo_root: Path,
89
+ ) -> UpdateResult:
90
+ """Run incremental update for *changed_files*.
91
+
92
+ Parameters
93
+ ----------
94
+ changed_files : relative file paths (relative to repo_root)
95
+ parser : object implementing ParserProtocol
96
+ repo_root : absolute path to repository root
97
+
98
+ Returns
99
+ -------
100
+ UpdateResult with counts of parsed/skipped/deleted/dependents_parsed.
101
+ """
102
+ if not changed_files:
103
+ return UpdateResult()
104
+
105
+ # Load stored hashes
106
+ stored_hashes: dict[str, str] = {}
107
+ for rec in self._store.get_all_file_records():
108
+ stored_hashes[rec.file_path] = rec.content_hash
109
+
110
+ # Partition files
111
+ files_to_parse: list[tuple[str, str]] = [] # (rel_path, new_hash)
112
+ files_to_delete: list[str] = []
113
+ skipped = 0
114
+ errors: list[str] = []
115
+
116
+ for rel_path in changed_files:
117
+ abs_path = repo_root / rel_path
118
+ if not abs_path.exists():
119
+ files_to_delete.append(rel_path)
120
+ continue
121
+
122
+ try:
123
+ content_bytes = abs_path.read_bytes()
124
+ except OSError as exc:
125
+ errors.append(f"Cannot read {rel_path}: {exc}")
126
+ continue
127
+
128
+ new_hash = hashlib.sha256(content_bytes).hexdigest()
129
+
130
+ if stored_hashes.get(rel_path) == new_hash:
131
+ skipped += 1
132
+ continue
133
+
134
+ files_to_parse.append((rel_path, new_hash))
135
+
136
+ # Trace dependents BEFORE modifying the store — FK cascades
137
+ # would destroy the edges we need for dependency resolution.
138
+ all_changing = {fp for fp, _ in files_to_parse} | set(files_to_delete)
139
+ dependent_files: set[str] = set()
140
+ for fp in all_changing:
141
+ deps = self._store.find_dependents(fp)
142
+ dependent_files.update(deps)
143
+
144
+ # Delete removed files
145
+ deleted = 0
146
+ for fp in files_to_delete:
147
+ self._store.remove_file(fp)
148
+ deleted += 1
149
+
150
+ # Parse changed files
151
+ parsed = 0
152
+ parsed_paths: set[str] = set()
153
+ for rel_path, _new_hash in files_to_parse:
154
+ try:
155
+ result = parser.parse_file(rel_path, repo_root)
156
+ self._store.store_file_nodes_edges(
157
+ result.file_path,
158
+ list(result.nodes),
159
+ list(result.edges),
160
+ result.file_record,
161
+ )
162
+ parsed += 1
163
+ parsed_paths.add(rel_path)
164
+ except Exception as exc:
165
+ errors.append(f"Parse error {rel_path}: {exc}")
166
+ logger.warning("Failed to parse %s: %s", rel_path, exc)
167
+
168
+ # Remove already-handled files
169
+ dependent_files -= parsed_paths
170
+ dependent_files -= set(files_to_delete)
171
+
172
+ # Re-parse dependents
173
+ dependents_parsed = 0
174
+ for dep_path in dependent_files:
175
+ abs_dep = repo_root / dep_path
176
+ if not abs_dep.exists():
177
+ continue
178
+ try:
179
+ result = parser.parse_file(dep_path, repo_root)
180
+ self._store.store_file_nodes_edges(
181
+ result.file_path,
182
+ list(result.nodes),
183
+ list(result.edges),
184
+ result.file_record,
185
+ )
186
+ dependents_parsed += 1
187
+ except Exception as exc:
188
+ errors.append(f"Dependent parse error {dep_path}: {exc}")
189
+ logger.warning("Failed to parse dependent %s: %s", dep_path, exc)
190
+
191
+ # Invalidate engine cache
192
+ self._engine.invalidate()
193
+
194
+ return UpdateResult(
195
+ parsed=parsed,
196
+ skipped=skipped,
197
+ deleted=deleted,
198
+ dependents_parsed=dependents_parsed,
199
+ errors=tuple(errors),
200
+ )