superlocalmemory 3.3.20 → 3.3.21
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/pyproject.toml +9 -1
- package/src/superlocalmemory/cli/commands.py +138 -22
- package/src/superlocalmemory/cli/daemon.py +372 -0
- package/src/superlocalmemory/cli/main.py +8 -0
- package/src/superlocalmemory/cli/pending_store.py +158 -0
- package/src/superlocalmemory/cli/setup_wizard.py +39 -6
- package/src/superlocalmemory/code_graph/__init__.py +46 -0
- package/src/superlocalmemory/code_graph/blast_radius.py +177 -0
- package/src/superlocalmemory/code_graph/bridge/__init__.py +36 -0
- package/src/superlocalmemory/code_graph/bridge/entity_resolver.py +464 -0
- package/src/superlocalmemory/code_graph/bridge/event_listeners.py +195 -0
- package/src/superlocalmemory/code_graph/bridge/fact_enricher.py +159 -0
- package/src/superlocalmemory/code_graph/bridge/hebbian_linker.py +170 -0
- package/src/superlocalmemory/code_graph/bridge/temporal_checker.py +152 -0
- package/src/superlocalmemory/code_graph/changes.py +363 -0
- package/src/superlocalmemory/code_graph/communities.py +299 -0
- package/src/superlocalmemory/code_graph/config.py +88 -0
- package/src/superlocalmemory/code_graph/database.py +482 -0
- package/src/superlocalmemory/code_graph/extractors/__init__.py +78 -0
- package/src/superlocalmemory/code_graph/extractors/python.py +413 -0
- package/src/superlocalmemory/code_graph/extractors/typescript.py +556 -0
- package/src/superlocalmemory/code_graph/flows.py +350 -0
- package/src/superlocalmemory/code_graph/git_hooks.py +226 -0
- package/src/superlocalmemory/code_graph/graph_engine.py +295 -0
- package/src/superlocalmemory/code_graph/graph_store.py +158 -0
- package/src/superlocalmemory/code_graph/incremental.py +200 -0
- package/src/superlocalmemory/code_graph/models.py +130 -0
- package/src/superlocalmemory/code_graph/parser.py +507 -0
- package/src/superlocalmemory/code_graph/resolver.py +321 -0
- package/src/superlocalmemory/code_graph/search.py +460 -0
- package/src/superlocalmemory/code_graph/service.py +95 -0
- package/src/superlocalmemory/code_graph/watcher.py +207 -0
- package/src/superlocalmemory/core/embedding_worker.py +4 -2
- package/src/superlocalmemory/core/embeddings.py +8 -2
- package/src/superlocalmemory/core/engine.py +32 -0
- package/src/superlocalmemory/core/engine_wiring.py +5 -0
- package/src/superlocalmemory/core/store_pipeline.py +23 -1
- package/src/superlocalmemory/encoding/fact_extractor.py +68 -7
- package/src/superlocalmemory/infra/event_bus.py +5 -0
- package/src/superlocalmemory/mcp/server.py +23 -0
- package/src/superlocalmemory/mcp/tools_code_graph.py +1592 -0
- package/src/superlocalmemory/retrieval/engine.py +137 -2
- package/src/superlocalmemory/retrieval/semantic_channel.py +6 -2
- package/src/superlocalmemory/retrieval/spreading_activation.py +5 -3
- package/src/superlocalmemory/retrieval/strategy.py +16 -0
- package/src/superlocalmemory/server/api.py +4 -2
- package/src/superlocalmemory/server/ui.py +5 -2
- package/src/superlocalmemory/storage/schema_code_graph.py +239 -0
- package/src/superlocalmemory/ui/index.html +1879 -0
- package/src/superlocalmemory/ui/js/agents.js +192 -0
- package/src/superlocalmemory/ui/js/auto-settings.js +399 -0
- package/src/superlocalmemory/ui/js/behavioral.js +276 -0
- package/src/superlocalmemory/ui/js/clusters.js +206 -0
- package/src/superlocalmemory/ui/js/compliance.js +252 -0
- package/src/superlocalmemory/ui/js/core.js +246 -0
- package/src/superlocalmemory/ui/js/dashboard.js +110 -0
- package/src/superlocalmemory/ui/js/events.js +178 -0
- package/src/superlocalmemory/ui/js/fact-detail.js +92 -0
- package/src/superlocalmemory/ui/js/feedback.js +333 -0
- package/src/superlocalmemory/ui/js/graph-core.js +447 -0
- package/src/superlocalmemory/ui/js/graph-filters.js +220 -0
- package/src/superlocalmemory/ui/js/graph-interactions.js +351 -0
- package/src/superlocalmemory/ui/js/graph-ui.js +214 -0
- package/src/superlocalmemory/ui/js/ide-status.js +102 -0
- package/src/superlocalmemory/ui/js/init.js +45 -0
- package/src/superlocalmemory/ui/js/learning.js +435 -0
- package/src/superlocalmemory/ui/js/lifecycle.js +298 -0
- package/src/superlocalmemory/ui/js/math-health.js +98 -0
- package/src/superlocalmemory/ui/js/memories.js +264 -0
- package/src/superlocalmemory/ui/js/modal.js +357 -0
- package/src/superlocalmemory/ui/js/patterns.js +93 -0
- package/src/superlocalmemory/ui/js/profiles.js +236 -0
- package/src/superlocalmemory/ui/js/recall-lab.js +292 -0
- package/src/superlocalmemory/ui/js/search.js +59 -0
- package/src/superlocalmemory/ui/js/settings.js +224 -0
- package/src/superlocalmemory/ui/js/timeline.js +32 -0
- package/src/superlocalmemory/ui/js/trust-dashboard.js +73 -0
|
@@ -0,0 +1,295 @@
|
|
|
1
|
+
# Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
|
|
2
|
+
# Licensed under the MIT License - see LICENSE file
|
|
3
|
+
# Part of SuperLocalMemory v3.4 — CodeGraph Module
|
|
4
|
+
|
|
5
|
+
"""GraphEngine — rustworkx in-memory directed graph.
|
|
6
|
+
|
|
7
|
+
Loads from SQLite via GraphStore, caches in a rustworkx PyDiGraph,
|
|
8
|
+
and provides O(1) node lookup + O(degree) traversals.
|
|
9
|
+
|
|
10
|
+
Cache invalidation: rebuild when store.version changes.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
import logging
|
|
16
|
+
from dataclasses import dataclass, field
|
|
17
|
+
from typing import Any
|
|
18
|
+
|
|
19
|
+
from superlocalmemory.code_graph.graph_store import GraphStore
|
|
20
|
+
from superlocalmemory.code_graph.models import EdgeKind, GraphNode
|
|
21
|
+
|
|
22
|
+
logger = logging.getLogger(__name__)
|
|
23
|
+
|
|
24
|
+
try:
|
|
25
|
+
import rustworkx as rx # type: ignore[import-untyped]
|
|
26
|
+
except ImportError as _rx_err:
|
|
27
|
+
rx = None # type: ignore[assignment]
|
|
28
|
+
_RX_IMPORT_ERROR = _rx_err
|
|
29
|
+
else:
|
|
30
|
+
_RX_IMPORT_ERROR = None
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class RustworkxNotInstalledError(ImportError):
|
|
34
|
+
"""Raised when rustworkx is required but not installed."""
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class NodeNotFoundError(KeyError):
|
|
38
|
+
"""Raised when a node_id is not present in the graph."""
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def _require_rustworkx() -> None:
|
|
42
|
+
"""Guard: raise if rustworkx is not available."""
|
|
43
|
+
if rx is None:
|
|
44
|
+
raise RustworkxNotInstalledError(
|
|
45
|
+
"rustworkx is required for GraphEngine. "
|
|
46
|
+
"Install it with: pip install rustworkx"
|
|
47
|
+
) from _RX_IMPORT_ERROR
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
# ---------------------------------------------------------------------------
|
|
51
|
+
# GraphIndex — bidirectional ID mapping
|
|
52
|
+
# ---------------------------------------------------------------------------
|
|
53
|
+
|
|
54
|
+
@dataclass(frozen=True)
|
|
55
|
+
class GraphIndex:
|
|
56
|
+
"""Bidirectional mapping between SQLite TEXT node_id and rustworkx int index."""
|
|
57
|
+
|
|
58
|
+
id_to_rx: dict[str, int] = field(default_factory=dict)
|
|
59
|
+
rx_to_id: dict[int, str] = field(default_factory=dict)
|
|
60
|
+
qname_to_id: dict[str, str] = field(default_factory=dict)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
# ---------------------------------------------------------------------------
|
|
64
|
+
# GraphEngine
|
|
65
|
+
# ---------------------------------------------------------------------------
|
|
66
|
+
|
|
67
|
+
class GraphEngine:
|
|
68
|
+
"""In-memory directed graph backed by rustworkx PyDiGraph.
|
|
69
|
+
|
|
70
|
+
Lazily built from SQLite on first query. Automatically rebuilds
|
|
71
|
+
when the underlying GraphStore version changes (writes detected).
|
|
72
|
+
"""
|
|
73
|
+
|
|
74
|
+
def __init__(self, store: GraphStore) -> None:
|
|
75
|
+
_require_rustworkx()
|
|
76
|
+
self._store = store
|
|
77
|
+
self._graph: rx.PyDiGraph | None = None # type: ignore[name-defined]
|
|
78
|
+
self._index: GraphIndex = GraphIndex()
|
|
79
|
+
self._graph_version: int = -1
|
|
80
|
+
|
|
81
|
+
# ------------------------------------------------------------------
|
|
82
|
+
# Graph lifecycle
|
|
83
|
+
# ------------------------------------------------------------------
|
|
84
|
+
|
|
85
|
+
def build_graph(self) -> rx.PyDiGraph: # type: ignore[name-defined]
|
|
86
|
+
"""Load all nodes and edges from SQLite into a rustworkx PyDiGraph.
|
|
87
|
+
|
|
88
|
+
Returns the cached graph if the store version hasn't changed.
|
|
89
|
+
"""
|
|
90
|
+
if (
|
|
91
|
+
self._graph is not None
|
|
92
|
+
and self._graph_version == self._store.version
|
|
93
|
+
):
|
|
94
|
+
return self._graph
|
|
95
|
+
|
|
96
|
+
nodes, edges = self._store.get_all_nodes_and_edges()
|
|
97
|
+
|
|
98
|
+
graph = rx.PyDiGraph(multigraph=True)
|
|
99
|
+
|
|
100
|
+
id_to_rx: dict[str, int] = {}
|
|
101
|
+
rx_to_id: dict[int, str] = {}
|
|
102
|
+
qname_to_id: dict[str, str] = {}
|
|
103
|
+
|
|
104
|
+
# Add nodes
|
|
105
|
+
for node in nodes:
|
|
106
|
+
node_data: dict[str, Any] = {
|
|
107
|
+
"node_id": node.node_id,
|
|
108
|
+
"kind": node.kind.value,
|
|
109
|
+
"name": node.name,
|
|
110
|
+
"qualified_name": node.qualified_name,
|
|
111
|
+
"file_path": node.file_path,
|
|
112
|
+
"line_start": node.line_start,
|
|
113
|
+
"line_end": node.line_end,
|
|
114
|
+
"language": node.language,
|
|
115
|
+
"parent_name": node.parent_name,
|
|
116
|
+
"is_test": node.is_test,
|
|
117
|
+
"community_id": node.community_id,
|
|
118
|
+
}
|
|
119
|
+
rx_idx = graph.add_node(node_data)
|
|
120
|
+
id_to_rx[node.node_id] = rx_idx
|
|
121
|
+
rx_to_id[rx_idx] = node.node_id
|
|
122
|
+
qname_to_id[node.qualified_name] = node.node_id
|
|
123
|
+
|
|
124
|
+
# Add edges
|
|
125
|
+
for edge in edges:
|
|
126
|
+
src_rx = id_to_rx.get(edge.source_node_id)
|
|
127
|
+
tgt_rx = id_to_rx.get(edge.target_node_id)
|
|
128
|
+
if src_rx is None or tgt_rx is None:
|
|
129
|
+
logger.warning(
|
|
130
|
+
"Skipping dangling edge %s -> %s (kind=%s)",
|
|
131
|
+
edge.source_node_id, edge.target_node_id, edge.kind.value,
|
|
132
|
+
)
|
|
133
|
+
continue
|
|
134
|
+
edge_data: dict[str, Any] = {
|
|
135
|
+
"edge_id": edge.edge_id,
|
|
136
|
+
"kind": edge.kind.value,
|
|
137
|
+
"file_path": edge.file_path,
|
|
138
|
+
"line": edge.line,
|
|
139
|
+
"confidence": edge.confidence,
|
|
140
|
+
}
|
|
141
|
+
graph.add_edge(src_rx, tgt_rx, edge_data)
|
|
142
|
+
|
|
143
|
+
# Cache
|
|
144
|
+
self._graph = graph
|
|
145
|
+
self._index = GraphIndex(
|
|
146
|
+
id_to_rx=id_to_rx,
|
|
147
|
+
rx_to_id=rx_to_id,
|
|
148
|
+
qname_to_id=qname_to_id,
|
|
149
|
+
)
|
|
150
|
+
self._graph_version = self._store.version
|
|
151
|
+
logger.debug(
|
|
152
|
+
"Built graph: %d nodes, %d edges",
|
|
153
|
+
graph.num_nodes(), graph.num_edges(),
|
|
154
|
+
)
|
|
155
|
+
return graph
|
|
156
|
+
|
|
157
|
+
def invalidate(self) -> None:
|
|
158
|
+
"""Force a graph rebuild on next access."""
|
|
159
|
+
self._graph = None
|
|
160
|
+
self._graph_version = -1
|
|
161
|
+
|
|
162
|
+
@property
|
|
163
|
+
def index(self) -> GraphIndex:
|
|
164
|
+
"""Current graph index (builds graph if needed)."""
|
|
165
|
+
self._ensure_graph()
|
|
166
|
+
return self._index
|
|
167
|
+
|
|
168
|
+
@property
|
|
169
|
+
def graph(self) -> rx.PyDiGraph: # type: ignore[name-defined]
|
|
170
|
+
"""Current rustworkx graph (builds if needed)."""
|
|
171
|
+
return self._ensure_graph()
|
|
172
|
+
|
|
173
|
+
# ------------------------------------------------------------------
|
|
174
|
+
# Query operations
|
|
175
|
+
# ------------------------------------------------------------------
|
|
176
|
+
|
|
177
|
+
def get_callers(
|
|
178
|
+
self,
|
|
179
|
+
node_id: str,
|
|
180
|
+
edge_kinds: set[str] | None = None,
|
|
181
|
+
) -> list[dict[str, Any]]:
|
|
182
|
+
"""Return all nodes that have edges pointing *to* node_id.
|
|
183
|
+
|
|
184
|
+
Each result is ``{"node": <node_data>, "edge": <edge_data>}``.
|
|
185
|
+
"""
|
|
186
|
+
graph = self._ensure_graph()
|
|
187
|
+
rx_idx = self._resolve_rx(node_id)
|
|
188
|
+
|
|
189
|
+
results: list[dict[str, Any]] = []
|
|
190
|
+
for src_rx, _tgt_rx, edge_data in graph.in_edges(rx_idx):
|
|
191
|
+
if edge_kinds is not None and edge_data["kind"] not in edge_kinds:
|
|
192
|
+
continue
|
|
193
|
+
results.append({
|
|
194
|
+
"node": dict(graph[src_rx]),
|
|
195
|
+
"edge": dict(edge_data),
|
|
196
|
+
})
|
|
197
|
+
return results
|
|
198
|
+
|
|
199
|
+
def get_callees(
|
|
200
|
+
self,
|
|
201
|
+
node_id: str,
|
|
202
|
+
edge_kinds: set[str] | None = None,
|
|
203
|
+
) -> list[dict[str, Any]]:
|
|
204
|
+
"""Return all nodes that node_id has edges pointing *to*.
|
|
205
|
+
|
|
206
|
+
Each result is ``{"node": <node_data>, "edge": <edge_data>}``.
|
|
207
|
+
"""
|
|
208
|
+
graph = self._ensure_graph()
|
|
209
|
+
rx_idx = self._resolve_rx(node_id)
|
|
210
|
+
|
|
211
|
+
results: list[dict[str, Any]] = []
|
|
212
|
+
for _src_rx, tgt_rx, edge_data in graph.out_edges(rx_idx):
|
|
213
|
+
if edge_kinds is not None and edge_data["kind"] not in edge_kinds:
|
|
214
|
+
continue
|
|
215
|
+
results.append({
|
|
216
|
+
"node": dict(graph[tgt_rx]),
|
|
217
|
+
"edge": dict(edge_data),
|
|
218
|
+
})
|
|
219
|
+
return results
|
|
220
|
+
|
|
221
|
+
def get_tests_for(self, node_id: str) -> list[dict[str, Any]]:
|
|
222
|
+
"""Return test nodes associated with *node_id*.
|
|
223
|
+
|
|
224
|
+
Checks:
|
|
225
|
+
1. Outgoing TESTED_BY edges from node_id
|
|
226
|
+
2. Incoming CALLS edges from nodes where is_test=True
|
|
227
|
+
"""
|
|
228
|
+
graph = self._ensure_graph()
|
|
229
|
+
rx_idx = self._resolve_rx(node_id)
|
|
230
|
+
|
|
231
|
+
seen_ids: set[str] = set()
|
|
232
|
+
results: list[dict[str, Any]] = []
|
|
233
|
+
|
|
234
|
+
# Outgoing TESTED_BY
|
|
235
|
+
for _src, tgt_rx, edge_data in graph.out_edges(rx_idx):
|
|
236
|
+
if edge_data["kind"] == EdgeKind.TESTED_BY.value:
|
|
237
|
+
tgt_data = dict(graph[tgt_rx])
|
|
238
|
+
tid = tgt_data["node_id"]
|
|
239
|
+
if tid not in seen_ids:
|
|
240
|
+
seen_ids.add(tid)
|
|
241
|
+
results.append(tgt_data)
|
|
242
|
+
|
|
243
|
+
# Incoming CALLS from test nodes
|
|
244
|
+
for src_rx, _tgt, edge_data in graph.in_edges(rx_idx):
|
|
245
|
+
if edge_data["kind"] == EdgeKind.CALLS.value:
|
|
246
|
+
src_data = dict(graph[src_rx])
|
|
247
|
+
if src_data.get("is_test") and src_data["node_id"] not in seen_ids:
|
|
248
|
+
seen_ids.add(src_data["node_id"])
|
|
249
|
+
results.append(src_data)
|
|
250
|
+
|
|
251
|
+
return results
|
|
252
|
+
|
|
253
|
+
def get_connected_component(self, node_id: str) -> list[str]:
|
|
254
|
+
"""Return all node_ids in the same weakly-connected component.
|
|
255
|
+
|
|
256
|
+
Uses rustworkx.weakly_connected_components.
|
|
257
|
+
"""
|
|
258
|
+
graph = self._ensure_graph()
|
|
259
|
+
rx_idx = self._resolve_rx(node_id)
|
|
260
|
+
|
|
261
|
+
components = rx.weakly_connected_components(graph)
|
|
262
|
+
for component in components:
|
|
263
|
+
if rx_idx in component:
|
|
264
|
+
return [self._index.rx_to_id[i] for i in component]
|
|
265
|
+
|
|
266
|
+
# Shouldn't reach here if node exists, but return just the node
|
|
267
|
+
return [node_id]
|
|
268
|
+
|
|
269
|
+
def get_node_data(self, node_id: str) -> dict[str, Any]:
|
|
270
|
+
"""Return the node data dict for a node_id."""
|
|
271
|
+
graph = self._ensure_graph()
|
|
272
|
+
rx_idx = self._resolve_rx(node_id)
|
|
273
|
+
return dict(graph[rx_idx])
|
|
274
|
+
|
|
275
|
+
# ------------------------------------------------------------------
|
|
276
|
+
# Internals
|
|
277
|
+
# ------------------------------------------------------------------
|
|
278
|
+
|
|
279
|
+
def _ensure_graph(self) -> rx.PyDiGraph: # type: ignore[name-defined]
|
|
280
|
+
"""Build graph if needed and return it."""
|
|
281
|
+
if (
|
|
282
|
+
self._graph is None
|
|
283
|
+
or self._graph_version != self._store.version
|
|
284
|
+
):
|
|
285
|
+
return self.build_graph()
|
|
286
|
+
return self._graph
|
|
287
|
+
|
|
288
|
+
def _resolve_rx(self, node_id: str) -> int:
|
|
289
|
+
"""Resolve a TEXT node_id to a rustworkx index. Raises NodeNotFoundError."""
|
|
290
|
+
rx_idx = self._index.id_to_rx.get(node_id)
|
|
291
|
+
if rx_idx is None:
|
|
292
|
+
raise NodeNotFoundError(
|
|
293
|
+
f"Node '{node_id}' not found in graph"
|
|
294
|
+
)
|
|
295
|
+
return rx_idx
|
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
# Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
|
|
2
|
+
# Licensed under the MIT License - see LICENSE file
|
|
3
|
+
# Part of SuperLocalMemory v3.4 — CodeGraph Module
|
|
4
|
+
|
|
5
|
+
"""GraphStore — thin graph-specific layer over CodeGraphDatabase.
|
|
6
|
+
|
|
7
|
+
All graph writes go through this layer. Provides:
|
|
8
|
+
- Atomic file replacement (store_file_nodes_edges)
|
|
9
|
+
- Bulk read for in-memory graph building (get_all_nodes_and_edges)
|
|
10
|
+
- File removal (remove_file)
|
|
11
|
+
- Version tracking for cache invalidation
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
import logging
|
|
17
|
+
from typing import Sequence
|
|
18
|
+
|
|
19
|
+
from superlocalmemory.code_graph.database import CodeGraphDatabase
|
|
20
|
+
from superlocalmemory.code_graph.models import (
|
|
21
|
+
FileRecord,
|
|
22
|
+
GraphEdge,
|
|
23
|
+
GraphNode,
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
logger = logging.getLogger(__name__)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class GraphStore:
|
|
30
|
+
"""SQLite persistence layer for graph nodes, edges, and file records.
|
|
31
|
+
|
|
32
|
+
Delegates to CodeGraphDatabase but adds higher-level operations
|
|
33
|
+
that Phase 2+ modules depend on (bulk load, atomic replace, version).
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
def __init__(self, db: CodeGraphDatabase) -> None:
|
|
37
|
+
self._db = db
|
|
38
|
+
|
|
39
|
+
# ------------------------------------------------------------------
|
|
40
|
+
# Properties
|
|
41
|
+
# ------------------------------------------------------------------
|
|
42
|
+
|
|
43
|
+
@property
|
|
44
|
+
def db(self) -> CodeGraphDatabase:
|
|
45
|
+
"""Underlying database instance."""
|
|
46
|
+
return self._db
|
|
47
|
+
|
|
48
|
+
@property
|
|
49
|
+
def version(self) -> int:
|
|
50
|
+
"""Monotonic write-version for cache invalidation."""
|
|
51
|
+
return self._db.version
|
|
52
|
+
|
|
53
|
+
# ------------------------------------------------------------------
|
|
54
|
+
# Write operations
|
|
55
|
+
# ------------------------------------------------------------------
|
|
56
|
+
|
|
57
|
+
def store_file_nodes_edges(
|
|
58
|
+
self,
|
|
59
|
+
file_path: str,
|
|
60
|
+
nodes: Sequence[GraphNode],
|
|
61
|
+
edges: Sequence[GraphEdge],
|
|
62
|
+
file_record: FileRecord,
|
|
63
|
+
) -> None:
|
|
64
|
+
"""Atomically replace all data for *file_path*.
|
|
65
|
+
|
|
66
|
+
Within a single transaction:
|
|
67
|
+
1. Delete old edges for this file
|
|
68
|
+
2. Delete old nodes for this file
|
|
69
|
+
3. Insert new nodes
|
|
70
|
+
4. Insert new edges
|
|
71
|
+
5. Upsert file record
|
|
72
|
+
|
|
73
|
+
The database's ``store_file_parse_results`` already does this.
|
|
74
|
+
"""
|
|
75
|
+
self._db.store_file_parse_results(
|
|
76
|
+
file_path,
|
|
77
|
+
list(nodes),
|
|
78
|
+
list(edges),
|
|
79
|
+
file_record,
|
|
80
|
+
)
|
|
81
|
+
logger.debug(
|
|
82
|
+
"Stored %d nodes, %d edges for %s",
|
|
83
|
+
len(nodes), len(edges), file_path,
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
def remove_file(self, file_path: str) -> None:
|
|
87
|
+
"""Remove all graph data for *file_path*.
|
|
88
|
+
|
|
89
|
+
Deletes nodes (cascade → edges via FK), edges sourced from this
|
|
90
|
+
file, and the file record. All within a transaction.
|
|
91
|
+
"""
|
|
92
|
+
with self._db.transaction():
|
|
93
|
+
self._db.delete_edges_by_file(file_path)
|
|
94
|
+
self._db.delete_nodes_by_file(file_path)
|
|
95
|
+
self._db.delete_file_record(file_path)
|
|
96
|
+
logger.debug("Removed all data for %s", file_path)
|
|
97
|
+
|
|
98
|
+
# ------------------------------------------------------------------
|
|
99
|
+
# Read operations
|
|
100
|
+
# ------------------------------------------------------------------
|
|
101
|
+
|
|
102
|
+
def get_all_nodes_and_edges(
|
|
103
|
+
self,
|
|
104
|
+
) -> tuple[list[GraphNode], list[GraphEdge]]:
|
|
105
|
+
"""Load every node and edge — used by GraphEngine.build_graph().
|
|
106
|
+
|
|
107
|
+
Returns (nodes, edges) as plain lists.
|
|
108
|
+
"""
|
|
109
|
+
nodes = self._db.get_all_nodes()
|
|
110
|
+
edges = self._db.get_all_edges()
|
|
111
|
+
return nodes, edges
|
|
112
|
+
|
|
113
|
+
def get_nodes_by_file(self, file_path: str) -> list[GraphNode]:
|
|
114
|
+
"""All nodes in *file_path*, ordered by line_start."""
|
|
115
|
+
return self._db.get_nodes_by_file(file_path)
|
|
116
|
+
|
|
117
|
+
def get_node(self, node_id: str) -> GraphNode | None:
|
|
118
|
+
"""Single node by ID."""
|
|
119
|
+
return self._db.get_node(node_id)
|
|
120
|
+
|
|
121
|
+
def get_file_record(self, file_path: str) -> FileRecord | None:
|
|
122
|
+
"""File record by path."""
|
|
123
|
+
return self._db.get_file_record(file_path)
|
|
124
|
+
|
|
125
|
+
def get_all_file_records(self) -> list[FileRecord]:
|
|
126
|
+
"""All tracked file records."""
|
|
127
|
+
return self._db.get_all_file_records()
|
|
128
|
+
|
|
129
|
+
# ------------------------------------------------------------------
|
|
130
|
+
# Dependent tracing (used by IncrementalUpdater)
|
|
131
|
+
# ------------------------------------------------------------------
|
|
132
|
+
|
|
133
|
+
def find_dependents(self, file_path: str) -> set[str]:
|
|
134
|
+
"""Return file paths that have edges *targeting* nodes in *file_path*.
|
|
135
|
+
|
|
136
|
+
Looks for IMPORTS, CALLS, INHERITS, DEPENDS_ON edges whose
|
|
137
|
+
target lives in *file_path* but whose source is in a *different* file.
|
|
138
|
+
"""
|
|
139
|
+
rows = self._db.execute(
|
|
140
|
+
"""
|
|
141
|
+
SELECT DISTINCT ge.file_path
|
|
142
|
+
FROM graph_edges ge
|
|
143
|
+
JOIN graph_nodes gn_target
|
|
144
|
+
ON ge.target_node_id = gn_target.node_id
|
|
145
|
+
WHERE gn_target.file_path = ?
|
|
146
|
+
AND ge.file_path != ?
|
|
147
|
+
""",
|
|
148
|
+
(file_path, file_path),
|
|
149
|
+
)
|
|
150
|
+
return {row["file_path"] for row in rows}
|
|
151
|
+
|
|
152
|
+
# ------------------------------------------------------------------
|
|
153
|
+
# Stats
|
|
154
|
+
# ------------------------------------------------------------------
|
|
155
|
+
|
|
156
|
+
def get_stats(self) -> dict[str, int]:
|
|
157
|
+
"""Delegate to DB stats."""
|
|
158
|
+
return self._db.get_stats()
|
|
@@ -0,0 +1,200 @@
|
|
|
1
|
+
# Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
|
|
2
|
+
# Licensed under the MIT License - see LICENSE file
|
|
3
|
+
# Part of SuperLocalMemory v3.4 — CodeGraph Module
|
|
4
|
+
|
|
5
|
+
"""IncrementalUpdater — hash-based change detection + dependent tracing.
|
|
6
|
+
|
|
7
|
+
Given a list of changed file paths:
|
|
8
|
+
1. SHA-256 hash check to skip unchanged files
|
|
9
|
+
2. Re-parse changed files
|
|
10
|
+
3. Trace dependents via IMPORTS edges
|
|
11
|
+
4. Re-parse dependent files (their edges may be stale)
|
|
12
|
+
5. Invalidate the in-memory graph cache
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
import hashlib
|
|
18
|
+
import logging
|
|
19
|
+
import time
|
|
20
|
+
from dataclasses import dataclass, field
|
|
21
|
+
from pathlib import Path
|
|
22
|
+
from typing import Any, Protocol
|
|
23
|
+
|
|
24
|
+
from superlocalmemory.code_graph.graph_engine import GraphEngine
|
|
25
|
+
from superlocalmemory.code_graph.graph_store import GraphStore
|
|
26
|
+
from superlocalmemory.code_graph.models import (
|
|
27
|
+
FileRecord,
|
|
28
|
+
GraphEdge,
|
|
29
|
+
GraphNode,
|
|
30
|
+
ParseResult,
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
logger = logging.getLogger(__name__)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
# ---------------------------------------------------------------------------
|
|
37
|
+
# Parser protocol (dependency inversion — no import of Phase 1 parser)
|
|
38
|
+
# ---------------------------------------------------------------------------
|
|
39
|
+
|
|
40
|
+
class ParserProtocol(Protocol):
|
|
41
|
+
"""Minimal interface the IncrementalUpdater needs from a parser."""
|
|
42
|
+
|
|
43
|
+
def parse_file(self, file_path: str, repo_root: Path) -> ParseResult:
|
|
44
|
+
"""Parse a single file and return nodes, edges, file record."""
|
|
45
|
+
...
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
# ---------------------------------------------------------------------------
|
|
49
|
+
# Result dataclass
|
|
50
|
+
# ---------------------------------------------------------------------------
|
|
51
|
+
|
|
52
|
+
@dataclass(frozen=True)
|
|
53
|
+
class UpdateResult:
|
|
54
|
+
"""Result of an incremental update run."""
|
|
55
|
+
|
|
56
|
+
parsed: int = 0
|
|
57
|
+
skipped: int = 0
|
|
58
|
+
deleted: int = 0
|
|
59
|
+
dependents_parsed: int = 0
|
|
60
|
+
errors: tuple[str, ...] = ()
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
# ---------------------------------------------------------------------------
|
|
64
|
+
# IncrementalUpdater
|
|
65
|
+
# ---------------------------------------------------------------------------
|
|
66
|
+
|
|
67
|
+
class IncrementalUpdater:
|
|
68
|
+
"""Hash-based incremental graph updater.
|
|
69
|
+
|
|
70
|
+
Usage::
|
|
71
|
+
|
|
72
|
+
updater = IncrementalUpdater(store, engine)
|
|
73
|
+
result = updater.update(
|
|
74
|
+
changed_files=["src/foo.py", "src/bar.py"],
|
|
75
|
+
parser=my_parser,
|
|
76
|
+
repo_root=Path("/my/repo"),
|
|
77
|
+
)
|
|
78
|
+
"""
|
|
79
|
+
|
|
80
|
+
def __init__(self, store: GraphStore, engine: GraphEngine) -> None:
|
|
81
|
+
self._store = store
|
|
82
|
+
self._engine = engine
|
|
83
|
+
|
|
84
|
+
def update(
|
|
85
|
+
self,
|
|
86
|
+
changed_files: list[str],
|
|
87
|
+
parser: ParserProtocol,
|
|
88
|
+
repo_root: Path,
|
|
89
|
+
) -> UpdateResult:
|
|
90
|
+
"""Run incremental update for *changed_files*.
|
|
91
|
+
|
|
92
|
+
Parameters
|
|
93
|
+
----------
|
|
94
|
+
changed_files : relative file paths (relative to repo_root)
|
|
95
|
+
parser : object implementing ParserProtocol
|
|
96
|
+
repo_root : absolute path to repository root
|
|
97
|
+
|
|
98
|
+
Returns
|
|
99
|
+
-------
|
|
100
|
+
UpdateResult with counts of parsed/skipped/deleted/dependents_parsed.
|
|
101
|
+
"""
|
|
102
|
+
if not changed_files:
|
|
103
|
+
return UpdateResult()
|
|
104
|
+
|
|
105
|
+
# Load stored hashes
|
|
106
|
+
stored_hashes: dict[str, str] = {}
|
|
107
|
+
for rec in self._store.get_all_file_records():
|
|
108
|
+
stored_hashes[rec.file_path] = rec.content_hash
|
|
109
|
+
|
|
110
|
+
# Partition files
|
|
111
|
+
files_to_parse: list[tuple[str, str]] = [] # (rel_path, new_hash)
|
|
112
|
+
files_to_delete: list[str] = []
|
|
113
|
+
skipped = 0
|
|
114
|
+
errors: list[str] = []
|
|
115
|
+
|
|
116
|
+
for rel_path in changed_files:
|
|
117
|
+
abs_path = repo_root / rel_path
|
|
118
|
+
if not abs_path.exists():
|
|
119
|
+
files_to_delete.append(rel_path)
|
|
120
|
+
continue
|
|
121
|
+
|
|
122
|
+
try:
|
|
123
|
+
content_bytes = abs_path.read_bytes()
|
|
124
|
+
except OSError as exc:
|
|
125
|
+
errors.append(f"Cannot read {rel_path}: {exc}")
|
|
126
|
+
continue
|
|
127
|
+
|
|
128
|
+
new_hash = hashlib.sha256(content_bytes).hexdigest()
|
|
129
|
+
|
|
130
|
+
if stored_hashes.get(rel_path) == new_hash:
|
|
131
|
+
skipped += 1
|
|
132
|
+
continue
|
|
133
|
+
|
|
134
|
+
files_to_parse.append((rel_path, new_hash))
|
|
135
|
+
|
|
136
|
+
# Trace dependents BEFORE modifying the store — FK cascades
|
|
137
|
+
# would destroy the edges we need for dependency resolution.
|
|
138
|
+
all_changing = {fp for fp, _ in files_to_parse} | set(files_to_delete)
|
|
139
|
+
dependent_files: set[str] = set()
|
|
140
|
+
for fp in all_changing:
|
|
141
|
+
deps = self._store.find_dependents(fp)
|
|
142
|
+
dependent_files.update(deps)
|
|
143
|
+
|
|
144
|
+
# Delete removed files
|
|
145
|
+
deleted = 0
|
|
146
|
+
for fp in files_to_delete:
|
|
147
|
+
self._store.remove_file(fp)
|
|
148
|
+
deleted += 1
|
|
149
|
+
|
|
150
|
+
# Parse changed files
|
|
151
|
+
parsed = 0
|
|
152
|
+
parsed_paths: set[str] = set()
|
|
153
|
+
for rel_path, _new_hash in files_to_parse:
|
|
154
|
+
try:
|
|
155
|
+
result = parser.parse_file(rel_path, repo_root)
|
|
156
|
+
self._store.store_file_nodes_edges(
|
|
157
|
+
result.file_path,
|
|
158
|
+
list(result.nodes),
|
|
159
|
+
list(result.edges),
|
|
160
|
+
result.file_record,
|
|
161
|
+
)
|
|
162
|
+
parsed += 1
|
|
163
|
+
parsed_paths.add(rel_path)
|
|
164
|
+
except Exception as exc:
|
|
165
|
+
errors.append(f"Parse error {rel_path}: {exc}")
|
|
166
|
+
logger.warning("Failed to parse %s: %s", rel_path, exc)
|
|
167
|
+
|
|
168
|
+
# Remove already-handled files
|
|
169
|
+
dependent_files -= parsed_paths
|
|
170
|
+
dependent_files -= set(files_to_delete)
|
|
171
|
+
|
|
172
|
+
# Re-parse dependents
|
|
173
|
+
dependents_parsed = 0
|
|
174
|
+
for dep_path in dependent_files:
|
|
175
|
+
abs_dep = repo_root / dep_path
|
|
176
|
+
if not abs_dep.exists():
|
|
177
|
+
continue
|
|
178
|
+
try:
|
|
179
|
+
result = parser.parse_file(dep_path, repo_root)
|
|
180
|
+
self._store.store_file_nodes_edges(
|
|
181
|
+
result.file_path,
|
|
182
|
+
list(result.nodes),
|
|
183
|
+
list(result.edges),
|
|
184
|
+
result.file_record,
|
|
185
|
+
)
|
|
186
|
+
dependents_parsed += 1
|
|
187
|
+
except Exception as exc:
|
|
188
|
+
errors.append(f"Dependent parse error {dep_path}: {exc}")
|
|
189
|
+
logger.warning("Failed to parse dependent %s: %s", dep_path, exc)
|
|
190
|
+
|
|
191
|
+
# Invalidate engine cache
|
|
192
|
+
self._engine.invalidate()
|
|
193
|
+
|
|
194
|
+
return UpdateResult(
|
|
195
|
+
parsed=parsed,
|
|
196
|
+
skipped=skipped,
|
|
197
|
+
deleted=deleted,
|
|
198
|
+
dependents_parsed=dependents_parsed,
|
|
199
|
+
errors=tuple(errors),
|
|
200
|
+
)
|