code-context-control 2.28.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cli/__init__.py +1 -0
- cli/_hook_utils.py +99 -0
- cli/c3.py +6152 -0
- cli/commands/__init__.py +1 -0
- cli/commands/common.py +312 -0
- cli/commands/parser.py +286 -0
- cli/docs.html +3178 -0
- cli/edits.html +878 -0
- cli/hook_auto_snapshot.py +142 -0
- cli/hook_c3_signal.py +61 -0
- cli/hook_c3read.py +116 -0
- cli/hook_edit_ledger.py +213 -0
- cli/hook_edit_unlock.py +170 -0
- cli/hook_filter.py +130 -0
- cli/hook_ghost_files.py +238 -0
- cli/hook_pretool_enforce.py +334 -0
- cli/hook_read.py +200 -0
- cli/hook_session_stats.py +62 -0
- cli/hook_terse_advisor.py +190 -0
- cli/hub.html +3764 -0
- cli/hub_server.py +1619 -0
- cli/mcp_proxy.py +428 -0
- cli/mcp_server.py +660 -0
- cli/server.py +2985 -0
- cli/tools/__init__.py +4 -0
- cli/tools/_helpers.py +65 -0
- cli/tools/agent.py +1165 -0
- cli/tools/compress.py +215 -0
- cli/tools/delegate.py +1184 -0
- cli/tools/edit.py +313 -0
- cli/tools/edits.py +118 -0
- cli/tools/filter.py +285 -0
- cli/tools/impact.py +163 -0
- cli/tools/memory.py +469 -0
- cli/tools/read.py +224 -0
- cli/tools/search.py +337 -0
- cli/tools/session.py +95 -0
- cli/tools/shell.py +193 -0
- cli/tools/status.py +306 -0
- cli/tools/validate.py +310 -0
- cli/ui/api.js +36 -0
- cli/ui/app.js +207 -0
- cli/ui/components/chat.js +758 -0
- cli/ui/components/dashboard.js +689 -0
- cli/ui/components/edits.js +220 -0
- cli/ui/components/instructions.js +481 -0
- cli/ui/components/memory.js +626 -0
- cli/ui/components/sessions.js +606 -0
- cli/ui/components/settings.js +1404 -0
- cli/ui/components/sidebar.js +156 -0
- cli/ui/icons.js +51 -0
- cli/ui/shared.js +119 -0
- cli/ui/theme.js +22 -0
- cli/ui.html +168 -0
- cli/ui_legacy.html +6797 -0
- cli/ui_nano.html +503 -0
- code_context_control-2.28.0.dist-info/METADATA +248 -0
- code_context_control-2.28.0.dist-info/RECORD +150 -0
- code_context_control-2.28.0.dist-info/WHEEL +5 -0
- code_context_control-2.28.0.dist-info/entry_points.txt +4 -0
- code_context_control-2.28.0.dist-info/licenses/LICENSE +201 -0
- code_context_control-2.28.0.dist-info/top_level.txt +5 -0
- core/__init__.py +75 -0
- core/config.py +269 -0
- core/ide.py +188 -0
- oracle/__init__.py +1 -0
- oracle/config.py +75 -0
- oracle/oracle.html +3900 -0
- oracle/oracle_server.py +663 -0
- oracle/services/__init__.py +1 -0
- oracle/services/c3_bridge.py +210 -0
- oracle/services/chat_engine.py +1103 -0
- oracle/services/chat_store.py +155 -0
- oracle/services/cross_memory.py +154 -0
- oracle/services/federated_graph.py +463 -0
- oracle/services/health_checker.py +117 -0
- oracle/services/insight_engine.py +307 -0
- oracle/services/memory_reader.py +106 -0
- oracle/services/memory_writer.py +182 -0
- oracle/services/ollama_bridge.py +332 -0
- oracle/services/project_scanner.py +87 -0
- oracle/services/review_agent.py +206 -0
- services/__init__.py +1 -0
- services/activity_log.py +93 -0
- services/agent_base.py +124 -0
- services/agents.py +1529 -0
- services/auto_memory.py +407 -0
- services/bench/__init__.py +6 -0
- services/bench/external/__init__.py +29 -0
- services/bench/external/aider_polyglot.py +405 -0
- services/bench/external/swe_bench.py +485 -0
- services/benchmark_dashboard.py +596 -0
- services/claude_md.py +785 -0
- services/compressor.py +592 -0
- services/context_snapshot.py +356 -0
- services/conversation_store.py +870 -0
- services/doc_index.py +537 -0
- services/e2e_benchmark.py +2884 -0
- services/e2e_evaluator.py +396 -0
- services/e2e_tasks.py +743 -0
- services/edit_ledger.py +459 -0
- services/embedding_index.py +341 -0
- services/error_reporting.py +123 -0
- services/file_memory.py +734 -0
- services/hub_service.py +585 -0
- services/indexer.py +712 -0
- services/memory.py +318 -0
- services/memory_consolidator.py +538 -0
- services/memory_graph.py +382 -0
- services/memory_grounder.py +304 -0
- services/memory_scorer.py +246 -0
- services/metrics.py +86 -0
- services/notifications.py +209 -0
- services/ollama_client.py +201 -0
- services/output_filter.py +488 -0
- services/parser.py +1238 -0
- services/project_manager.py +579 -0
- services/protocol.py +306 -0
- services/proxy_state.py +152 -0
- services/retrieval_broker.py +129 -0
- services/router.py +414 -0
- services/runtime.py +326 -0
- services/session_benchmark.py +1945 -0
- services/session_manager.py +1026 -0
- services/session_preloader.py +251 -0
- services/text_index.py +90 -0
- services/tool_classifier.py +176 -0
- services/transcript_index.py +340 -0
- services/validation_cache.py +155 -0
- services/vector_store.py +299 -0
- services/version_tracker.py +271 -0
- services/watcher.py +192 -0
- tui/__init__.py +0 -0
- tui/backend.py +59 -0
- tui/main.py +145 -0
- tui/screens/__init__.py +1 -0
- tui/screens/benchmark_view.py +109 -0
- tui/screens/claudemd_view.py +46 -0
- tui/screens/compress_view.py +52 -0
- tui/screens/index_view.py +74 -0
- tui/screens/init_view.py +82 -0
- tui/screens/mcp_view.py +73 -0
- tui/screens/optimize_view.py +41 -0
- tui/screens/pipe_view.py +46 -0
- tui/screens/projects_view.py +355 -0
- tui/screens/search_view.py +55 -0
- tui/screens/session_view.py +143 -0
- tui/screens/stats.py +158 -0
- tui/screens/ui_view.py +54 -0
- tui/theme.tcss +335 -0
services/memory_graph.py
ADDED
|
@@ -0,0 +1,382 @@
|
|
|
1
|
+
"""Memory Graph — directed graph connecting facts, files, and symbols.
|
|
2
|
+
|
|
3
|
+
Facts are nodes; relationships are weighted directed edges with types.
|
|
4
|
+
The graph enables spreading activation (recall neighbours of recalled facts),
|
|
5
|
+
cluster detection, and gap analysis.
|
|
6
|
+
|
|
7
|
+
Edge types:
|
|
8
|
+
co_recalled — two facts recalled in the same query/session
|
|
9
|
+
caused_by — causal chain (user-stated or inferred)
|
|
10
|
+
leads_to — consequence/dependency
|
|
11
|
+
touches — fact references a file or symbol
|
|
12
|
+
contradicts — newer fact overrides or conflicts with older one
|
|
13
|
+
refines — fact was updated; old version linked
|
|
14
|
+
clusters_with — computed via community detection on co-recall edges
|
|
15
|
+
|
|
16
|
+
Storage: .c3/memory_graph.json (adjacency list)
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
from __future__ import annotations
|
|
20
|
+
|
|
21
|
+
import json
|
|
22
|
+
from collections import defaultdict
|
|
23
|
+
from datetime import datetime, timezone
|
|
24
|
+
from pathlib import Path
|
|
25
|
+
|
|
26
|
+
EDGE_TYPES = {
|
|
27
|
+
"co_recalled", "caused_by", "leads_to", "touches",
|
|
28
|
+
"contradicts", "refines", "clusters_with",
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
# Limits
|
|
32
|
+
MAX_EDGES_PER_NODE = 50
|
|
33
|
+
MAX_TOTAL_EDGES = 5000
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class MemoryGraph:
|
|
37
|
+
"""Persistent directed graph over memory facts."""
|
|
38
|
+
|
|
39
|
+
def __init__(self, project_path: str, data_dir: str = ".c3/facts"):
|
|
40
|
+
self.project_path = Path(project_path)
|
|
41
|
+
self.data_dir = self.project_path / data_dir
|
|
42
|
+
self.data_dir.mkdir(parents=True, exist_ok=True)
|
|
43
|
+
self.graph_file = self.data_dir / "memory_graph.json"
|
|
44
|
+
self._edges: list[dict] = []
|
|
45
|
+
self._adjacency: dict[str, list[dict]] = defaultdict(list)
|
|
46
|
+
self._load()
|
|
47
|
+
|
|
48
|
+
# ── Edge management ─────────────────────────────────────────────
|
|
49
|
+
|
|
50
|
+
def add_edge(
|
|
51
|
+
self,
|
|
52
|
+
src: str,
|
|
53
|
+
dst: str,
|
|
54
|
+
edge_type: str,
|
|
55
|
+
weight: float = 1.0,
|
|
56
|
+
metadata: dict | None = None,
|
|
57
|
+
) -> dict:
|
|
58
|
+
"""Add or strengthen an edge between two nodes."""
|
|
59
|
+
if edge_type not in EDGE_TYPES:
|
|
60
|
+
return {"error": f"unknown edge type: {edge_type}"}
|
|
61
|
+
|
|
62
|
+
existing = self._find_edge(src, dst, edge_type)
|
|
63
|
+
now = datetime.now(timezone.utc).isoformat()
|
|
64
|
+
|
|
65
|
+
if existing:
|
|
66
|
+
existing["weight"] = round(existing.get("weight", 1.0) + weight, 4)
|
|
67
|
+
existing["last_seen"] = now
|
|
68
|
+
existing["hit_count"] = existing.get("hit_count", 1) + 1
|
|
69
|
+
if metadata:
|
|
70
|
+
existing.setdefault("metadata", {}).update(metadata)
|
|
71
|
+
self._save()
|
|
72
|
+
return {"strengthened": True, "edge": existing}
|
|
73
|
+
|
|
74
|
+
edge = {
|
|
75
|
+
"src": src,
|
|
76
|
+
"dst": dst,
|
|
77
|
+
"type": edge_type,
|
|
78
|
+
"weight": round(weight, 4),
|
|
79
|
+
"created_at": now,
|
|
80
|
+
"last_seen": now,
|
|
81
|
+
"hit_count": 1,
|
|
82
|
+
"metadata": metadata or {},
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
# Enforce limits
|
|
86
|
+
if len(self._edges) >= MAX_TOTAL_EDGES:
|
|
87
|
+
self._prune_weakest(count=MAX_TOTAL_EDGES // 10)
|
|
88
|
+
|
|
89
|
+
src_edges = self._adjacency[src]
|
|
90
|
+
if len(src_edges) >= MAX_EDGES_PER_NODE:
|
|
91
|
+
self._prune_node_edges(src, keep=MAX_EDGES_PER_NODE - 5)
|
|
92
|
+
|
|
93
|
+
self._edges.append(edge)
|
|
94
|
+
self._adjacency[src].append(edge)
|
|
95
|
+
self._adjacency[dst].append(edge)
|
|
96
|
+
self._save()
|
|
97
|
+
return {"added": True, "edge": edge}
|
|
98
|
+
|
|
99
|
+
def remove_edge(self, src: str, dst: str, edge_type: str) -> dict:
|
|
100
|
+
"""Remove a specific edge."""
|
|
101
|
+
edge = self._find_edge(src, dst, edge_type)
|
|
102
|
+
if not edge:
|
|
103
|
+
return {"error": "not found"}
|
|
104
|
+
self._edges.remove(edge)
|
|
105
|
+
self._rebuild_adjacency()
|
|
106
|
+
self._save()
|
|
107
|
+
return {"removed": True}
|
|
108
|
+
|
|
109
|
+
def remove_node(self, node_id: str) -> dict:
|
|
110
|
+
"""Remove all edges involving a node (when a fact is deleted)."""
|
|
111
|
+
before = len(self._edges)
|
|
112
|
+
self._edges = [
|
|
113
|
+
e for e in self._edges
|
|
114
|
+
if e["src"] != node_id and e["dst"] != node_id
|
|
115
|
+
]
|
|
116
|
+
self._rebuild_adjacency()
|
|
117
|
+
self._save()
|
|
118
|
+
return {"removed_edges": before - len(self._edges)}
|
|
119
|
+
|
|
120
|
+
# ── Query ───────────────────────────────────────────────────────
|
|
121
|
+
|
|
122
|
+
def get_edges(self, node_id: str, edge_type: str | None = None) -> list[dict]:
|
|
123
|
+
"""Get all edges for a node, optionally filtered by type."""
|
|
124
|
+
edges = self._adjacency.get(node_id, [])
|
|
125
|
+
if edge_type:
|
|
126
|
+
edges = [e for e in edges if e["type"] == edge_type]
|
|
127
|
+
return edges
|
|
128
|
+
|
|
129
|
+
def get_neighbors(self, node_id: str, edge_type: str | None = None) -> list[str]:
|
|
130
|
+
"""Get neighbor node IDs."""
|
|
131
|
+
neighbors = set()
|
|
132
|
+
for e in self.get_edges(node_id, edge_type):
|
|
133
|
+
if e["src"] == node_id:
|
|
134
|
+
neighbors.add(e["dst"])
|
|
135
|
+
else:
|
|
136
|
+
neighbors.add(e["src"])
|
|
137
|
+
return list(neighbors)
|
|
138
|
+
|
|
139
|
+
def spreading_activation(
|
|
140
|
+
self,
|
|
141
|
+
seed_ids: list[str],
|
|
142
|
+
max_depth: int = 2,
|
|
143
|
+
min_weight: float = 0.5,
|
|
144
|
+
max_results: int = 20,
|
|
145
|
+
) -> list[dict]:
|
|
146
|
+
"""Activate from seed nodes and spread through the graph.
|
|
147
|
+
|
|
148
|
+
Returns nodes ranked by accumulated activation energy.
|
|
149
|
+
Activation decays by 0.5 at each hop and is weighted by edge weight.
|
|
150
|
+
"""
|
|
151
|
+
activation: dict[str, float] = {}
|
|
152
|
+
visited: set[str] = set()
|
|
153
|
+
frontier = [(nid, 1.0) for nid in seed_ids]
|
|
154
|
+
|
|
155
|
+
for depth in range(max_depth + 1):
|
|
156
|
+
next_frontier: list[tuple[str, float]] = []
|
|
157
|
+
for node_id, energy in frontier:
|
|
158
|
+
if node_id in visited:
|
|
159
|
+
activation[node_id] = max(
|
|
160
|
+
activation.get(node_id, 0.0), energy
|
|
161
|
+
)
|
|
162
|
+
continue
|
|
163
|
+
visited.add(node_id)
|
|
164
|
+
activation[node_id] = max(
|
|
165
|
+
activation.get(node_id, 0.0), energy
|
|
166
|
+
)
|
|
167
|
+
|
|
168
|
+
if depth < max_depth:
|
|
169
|
+
for edge in self._adjacency.get(node_id, []):
|
|
170
|
+
neighbor = (
|
|
171
|
+
edge["dst"] if edge["src"] == node_id
|
|
172
|
+
else edge["src"]
|
|
173
|
+
)
|
|
174
|
+
if neighbor in visited:
|
|
175
|
+
continue
|
|
176
|
+
edge_weight = edge.get("weight", 1.0)
|
|
177
|
+
if edge_weight < min_weight:
|
|
178
|
+
continue
|
|
179
|
+
# Decay: energy * 0.5 * normalized_edge_weight
|
|
180
|
+
prop_energy = energy * 0.5 * min(edge_weight / 5.0, 1.0)
|
|
181
|
+
if prop_energy > 0.01:
|
|
182
|
+
next_frontier.append((neighbor, prop_energy))
|
|
183
|
+
|
|
184
|
+
frontier = next_frontier
|
|
185
|
+
|
|
186
|
+
# Remove seeds from results (caller already has them)
|
|
187
|
+
seed_set = set(seed_ids)
|
|
188
|
+
results = [
|
|
189
|
+
{"id": nid, "activation": round(act, 4)}
|
|
190
|
+
for nid, act in activation.items()
|
|
191
|
+
if nid not in seed_set
|
|
192
|
+
]
|
|
193
|
+
results.sort(key=lambda x: x["activation"], reverse=True)
|
|
194
|
+
return results[:max_results]
|
|
195
|
+
|
|
196
|
+
# ── Co-recall tracking ──────────────────────────────────────────
|
|
197
|
+
|
|
198
|
+
def record_co_recall(self, fact_ids: list[str]) -> int:
|
|
199
|
+
"""Record that a set of facts were recalled together.
|
|
200
|
+
|
|
201
|
+
Creates/strengthens co_recalled edges between all pairs.
|
|
202
|
+
Returns the number of edges created or strengthened.
|
|
203
|
+
"""
|
|
204
|
+
count = 0
|
|
205
|
+
for i, a in enumerate(fact_ids):
|
|
206
|
+
for b in fact_ids[i + 1:]:
|
|
207
|
+
self.add_edge(a, b, "co_recalled", weight=1.0)
|
|
208
|
+
count += 1
|
|
209
|
+
return count
|
|
210
|
+
|
|
211
|
+
# ── Cluster detection ───────────────────────────────────────────
|
|
212
|
+
|
|
213
|
+
def detect_clusters(self, min_cluster_size: int = 3) -> list[list[str]]:
|
|
214
|
+
"""Simple connected-component clustering on co_recalled edges.
|
|
215
|
+
|
|
216
|
+
Returns list of clusters (each is a list of fact IDs).
|
|
217
|
+
"""
|
|
218
|
+
co_edges = [e for e in self._edges if e["type"] == "co_recalled"]
|
|
219
|
+
adj: dict[str, set[str]] = defaultdict(set)
|
|
220
|
+
for e in co_edges:
|
|
221
|
+
adj[e["src"]].add(e["dst"])
|
|
222
|
+
adj[e["dst"]].add(e["src"])
|
|
223
|
+
|
|
224
|
+
visited: set[str] = set()
|
|
225
|
+
clusters: list[list[str]] = []
|
|
226
|
+
|
|
227
|
+
for node in adj:
|
|
228
|
+
if node in visited:
|
|
229
|
+
continue
|
|
230
|
+
# BFS
|
|
231
|
+
cluster: list[str] = []
|
|
232
|
+
queue = [node]
|
|
233
|
+
while queue:
|
|
234
|
+
current = queue.pop(0)
|
|
235
|
+
if current in visited:
|
|
236
|
+
continue
|
|
237
|
+
visited.add(current)
|
|
238
|
+
cluster.append(current)
|
|
239
|
+
for neighbor in adj.get(current, set()):
|
|
240
|
+
if neighbor not in visited:
|
|
241
|
+
queue.append(neighbor)
|
|
242
|
+
if len(cluster) >= min_cluster_size:
|
|
243
|
+
clusters.append(cluster)
|
|
244
|
+
|
|
245
|
+
clusters.sort(key=len, reverse=True)
|
|
246
|
+
return clusters
|
|
247
|
+
|
|
248
|
+
# ── File/symbol touch tracking ──────────────────────────────────
|
|
249
|
+
|
|
250
|
+
def record_touch(self, fact_id: str, file_path: str, symbol: str = "") -> dict:
|
|
251
|
+
"""Link a fact to a file (and optionally a symbol)."""
|
|
252
|
+
target = f"file:{file_path}"
|
|
253
|
+
result = self.add_edge(fact_id, target, "touches")
|
|
254
|
+
if symbol:
|
|
255
|
+
sym_target = f"symbol:{file_path}:{symbol}"
|
|
256
|
+
self.add_edge(fact_id, sym_target, "touches")
|
|
257
|
+
return result
|
|
258
|
+
|
|
259
|
+
def get_facts_touching(self, file_path: str) -> list[str]:
|
|
260
|
+
"""Get fact IDs that touch a given file."""
|
|
261
|
+
target = f"file:{file_path}"
|
|
262
|
+
return [
|
|
263
|
+
e["src"] for e in self._adjacency.get(target, [])
|
|
264
|
+
if e["type"] == "touches"
|
|
265
|
+
]
|
|
266
|
+
|
|
267
|
+
# ── Contradiction/refinement tracking ───────────────────────────
|
|
268
|
+
|
|
269
|
+
def record_contradiction(self, old_fact_id: str, new_fact_id: str) -> dict:
|
|
270
|
+
"""Mark that new_fact contradicts old_fact."""
|
|
271
|
+
return self.add_edge(new_fact_id, old_fact_id, "contradicts")
|
|
272
|
+
|
|
273
|
+
def record_refinement(self, old_fact_id: str, new_fact_id: str) -> dict:
|
|
274
|
+
"""Mark that new_fact refines/updates old_fact."""
|
|
275
|
+
return self.add_edge(new_fact_id, old_fact_id, "refines")
|
|
276
|
+
|
|
277
|
+
# ── Stats ───────────────────────────────────────────────────────
|
|
278
|
+
|
|
279
|
+
def stats(self) -> dict:
|
|
280
|
+
"""Summary statistics about the graph."""
|
|
281
|
+
type_counts: dict[str, int] = defaultdict(int)
|
|
282
|
+
for e in self._edges:
|
|
283
|
+
type_counts[e["type"]] += 1
|
|
284
|
+
nodes = set()
|
|
285
|
+
for e in self._edges:
|
|
286
|
+
nodes.add(e["src"])
|
|
287
|
+
nodes.add(e["dst"])
|
|
288
|
+
return {
|
|
289
|
+
"total_edges": len(self._edges),
|
|
290
|
+
"total_nodes": len(nodes),
|
|
291
|
+
"edge_types": dict(type_counts),
|
|
292
|
+
"clusters": len(self.detect_clusters()),
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
# ── Maintenance ─────────────────────────────────────────────────
|
|
296
|
+
|
|
297
|
+
def decay_edges(self, half_life_days: float = 30.0) -> int:
|
|
298
|
+
"""Reduce weight of stale edges. Returns count of decayed edges."""
|
|
299
|
+
import math
|
|
300
|
+
now = datetime.now(timezone.utc)
|
|
301
|
+
decayed = 0
|
|
302
|
+
to_remove: list[dict] = []
|
|
303
|
+
|
|
304
|
+
for edge in self._edges:
|
|
305
|
+
try:
|
|
306
|
+
last = datetime.fromisoformat(edge.get("last_seen", ""))
|
|
307
|
+
if last.tzinfo is None:
|
|
308
|
+
last = last.replace(tzinfo=timezone.utc)
|
|
309
|
+
age_days = (now - last).total_seconds() / 86400
|
|
310
|
+
except (ValueError, TypeError):
|
|
311
|
+
age_days = 0
|
|
312
|
+
|
|
313
|
+
if age_days > 0:
|
|
314
|
+
decay = math.exp(-0.693 * age_days / half_life_days)
|
|
315
|
+
edge["weight"] = round(edge["weight"] * decay, 4)
|
|
316
|
+
decayed += 1
|
|
317
|
+
|
|
318
|
+
if edge["weight"] < 0.01:
|
|
319
|
+
to_remove.append(edge)
|
|
320
|
+
|
|
321
|
+
for edge in to_remove:
|
|
322
|
+
self._edges.remove(edge)
|
|
323
|
+
|
|
324
|
+
if to_remove or decayed:
|
|
325
|
+
self._rebuild_adjacency()
|
|
326
|
+
self._save()
|
|
327
|
+
|
|
328
|
+
return decayed
|
|
329
|
+
|
|
330
|
+
# ── Internal ────────────────────────────────────────────────────
|
|
331
|
+
|
|
332
|
+
def _find_edge(self, src: str, dst: str, edge_type: str) -> dict | None:
|
|
333
|
+
for e in self._adjacency.get(src, []):
|
|
334
|
+
if e["dst"] == dst and e["type"] == edge_type:
|
|
335
|
+
return e
|
|
336
|
+
if e["src"] == dst and e["type"] == edge_type:
|
|
337
|
+
return e
|
|
338
|
+
return None
|
|
339
|
+
|
|
340
|
+
def _prune_weakest(self, count: int = 100):
|
|
341
|
+
"""Remove the weakest edges globally."""
|
|
342
|
+
self._edges.sort(key=lambda e: e.get("weight", 0))
|
|
343
|
+
del self._edges[:count]
|
|
344
|
+
self._rebuild_adjacency()
|
|
345
|
+
|
|
346
|
+
def _prune_node_edges(self, node_id: str, keep: int = 40):
|
|
347
|
+
"""Prune weakest edges for a specific node."""
|
|
348
|
+
node_edges = [
|
|
349
|
+
e for e in self._edges
|
|
350
|
+
if e["src"] == node_id or e["dst"] == node_id
|
|
351
|
+
]
|
|
352
|
+
if len(node_edges) <= keep:
|
|
353
|
+
return
|
|
354
|
+
node_edges.sort(key=lambda e: e.get("weight", 0))
|
|
355
|
+
to_remove = set(id(e) for e in node_edges[:len(node_edges) - keep])
|
|
356
|
+
self._edges = [e for e in self._edges if id(e) not in to_remove]
|
|
357
|
+
self._rebuild_adjacency()
|
|
358
|
+
|
|
359
|
+
def _rebuild_adjacency(self):
|
|
360
|
+
self._adjacency = defaultdict(list)
|
|
361
|
+
for e in self._edges:
|
|
362
|
+
self._adjacency[e["src"]].append(e)
|
|
363
|
+
self._adjacency[e["dst"]].append(e)
|
|
364
|
+
|
|
365
|
+
def _load(self):
|
|
366
|
+
if not self.graph_file.exists():
|
|
367
|
+
self._edges = []
|
|
368
|
+
self._adjacency = defaultdict(list)
|
|
369
|
+
return
|
|
370
|
+
try:
|
|
371
|
+
with open(self.graph_file, encoding="utf-8") as f:
|
|
372
|
+
data = json.load(f)
|
|
373
|
+
self._edges = data.get("edges", [])
|
|
374
|
+
self._rebuild_adjacency()
|
|
375
|
+
except Exception:
|
|
376
|
+
self._edges = []
|
|
377
|
+
self._adjacency = defaultdict(list)
|
|
378
|
+
|
|
379
|
+
def _save(self):
|
|
380
|
+
data = {"edges": self._edges}
|
|
381
|
+
with open(self.graph_file, "w", encoding="utf-8") as f:
|
|
382
|
+
json.dump(data, f, indent=2)
|
|
@@ -0,0 +1,304 @@
|
|
|
1
|
+
"""Memory Grounder — validates facts against codebase reality.
|
|
2
|
+
|
|
3
|
+
Facts can reference files, symbols, and patterns that may have been
|
|
4
|
+
renamed, deleted, or refactored. The grounder checks these references
|
|
5
|
+
and adjusts confidence/salience accordingly.
|
|
6
|
+
|
|
7
|
+
Grounding checks:
|
|
8
|
+
- File existence: does the referenced file still exist?
|
|
9
|
+
- Symbol existence: does the referenced function/class still exist?
|
|
10
|
+
- Content drift: has the file changed significantly since the fact was created?
|
|
11
|
+
- Confidence decay: ungrounded facts lose confidence over time
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
import re
|
|
17
|
+
from datetime import datetime, timezone
|
|
18
|
+
from pathlib import Path
|
|
19
|
+
from typing import Any
|
|
20
|
+
|
|
21
|
+
# Patterns to extract file/symbol references from fact text
|
|
22
|
+
_FILE_PATTERN = re.compile(
|
|
23
|
+
r"""(?:^|[\s(,'"`])""" # boundary
|
|
24
|
+
r"""((?:[\w./-]+/)?""" # optional directory prefix
|
|
25
|
+
r"""[\w.-]+""" # filename stem
|
|
26
|
+
r"""\.(?:py|js|ts|tsx|jsx|go|rs|java|rb|c|cpp|h|hpp|css|html|json|yaml|yml|toml|md|sql|sh|bat))""" # extension
|
|
27
|
+
r"""(?:[\s),'"`:]|$)""", # boundary
|
|
28
|
+
re.MULTILINE,
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
_SYMBOL_PATTERN = re.compile(
|
|
32
|
+
r"""(?:class|def|function|func|fn|struct|interface|type|const|var|let)\s+"""
|
|
33
|
+
r"""([\w]+)""",
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
# Also match "ClassName", "function_name", "methodName" when preceded by context clues
|
|
37
|
+
_NAMED_REF_PATTERN = re.compile(
|
|
38
|
+
r"""(?:(?:class|function|method|module|service|handler|middleware|component|hook)\s+)"""
|
|
39
|
+
r"""`?([\w.]+)`?""",
|
|
40
|
+
re.IGNORECASE,
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class GroundingResult:
|
|
45
|
+
"""Result of grounding a single fact."""
|
|
46
|
+
|
|
47
|
+
def __init__(self, fact_id: str):
|
|
48
|
+
self.fact_id = fact_id
|
|
49
|
+
self.file_refs: list[dict] = [] # {path, exists, changed}
|
|
50
|
+
self.symbol_refs: list[dict] = [] # {name, found, file}
|
|
51
|
+
self.grounded = True
|
|
52
|
+
self.issues: list[str] = []
|
|
53
|
+
self.confidence_delta: float = 0.0
|
|
54
|
+
|
|
55
|
+
def to_dict(self) -> dict:
|
|
56
|
+
return {
|
|
57
|
+
"fact_id": self.fact_id,
|
|
58
|
+
"grounded": self.grounded,
|
|
59
|
+
"file_refs": self.file_refs,
|
|
60
|
+
"symbol_refs": self.symbol_refs,
|
|
61
|
+
"issues": self.issues,
|
|
62
|
+
"confidence_delta": round(self.confidence_delta, 4),
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
class MemoryGrounder:
|
|
67
|
+
"""Validates memory facts against the current codebase state."""
|
|
68
|
+
|
|
69
|
+
def __init__(
|
|
70
|
+
self,
|
|
71
|
+
project_path: str,
|
|
72
|
+
memory_store: Any = None,
|
|
73
|
+
graph: Any = None,
|
|
74
|
+
file_memory: Any = None,
|
|
75
|
+
):
|
|
76
|
+
self.project_path = Path(project_path)
|
|
77
|
+
self.memory = memory_store
|
|
78
|
+
self.graph = graph
|
|
79
|
+
self.file_memory = file_memory
|
|
80
|
+
|
|
81
|
+
# ── Public API ──────────────────────────────────────────────────
|
|
82
|
+
|
|
83
|
+
def ground_fact(self, fact: dict) -> GroundingResult:
|
|
84
|
+
"""Validate a single fact against the codebase."""
|
|
85
|
+
result = GroundingResult(fact.get("id", ""))
|
|
86
|
+
text = fact.get("fact", "")
|
|
87
|
+
|
|
88
|
+
# Check file references
|
|
89
|
+
file_refs = self._extract_file_refs(text)
|
|
90
|
+
for ref in file_refs:
|
|
91
|
+
exists = self._file_exists(ref)
|
|
92
|
+
entry = {"path": ref, "exists": exists}
|
|
93
|
+
if not exists:
|
|
94
|
+
result.grounded = False
|
|
95
|
+
result.issues.append(f"file not found: {ref}")
|
|
96
|
+
result.confidence_delta -= 0.15
|
|
97
|
+
result.file_refs.append(entry)
|
|
98
|
+
|
|
99
|
+
# Check symbol references
|
|
100
|
+
symbol_refs = self._extract_symbol_refs(text)
|
|
101
|
+
for sym in symbol_refs:
|
|
102
|
+
found, location = self._symbol_exists(sym, file_refs)
|
|
103
|
+
entry = {"name": sym, "found": found, "file": location}
|
|
104
|
+
if not found and file_refs:
|
|
105
|
+
# Only penalize if we had file context to search in
|
|
106
|
+
result.grounded = False
|
|
107
|
+
result.issues.append(f"symbol not found: {sym}")
|
|
108
|
+
result.confidence_delta -= 0.10
|
|
109
|
+
result.symbol_refs.append(entry)
|
|
110
|
+
|
|
111
|
+
# Bonus: fact with no extractable references is neither grounded nor ungrounded
|
|
112
|
+
if not file_refs and not symbol_refs:
|
|
113
|
+
result.grounded = True # neutral — can't disprove
|
|
114
|
+
|
|
115
|
+
return result
|
|
116
|
+
|
|
117
|
+
def ground_all(self, max_facts: int = 100) -> dict:
|
|
118
|
+
"""Ground all active facts. Returns summary stats."""
|
|
119
|
+
if not self.memory:
|
|
120
|
+
return {"error": "no memory store"}
|
|
121
|
+
|
|
122
|
+
facts = [
|
|
123
|
+
f for f in self.memory.facts
|
|
124
|
+
if f.get("lifecycle") == "active"
|
|
125
|
+
][:max_facts]
|
|
126
|
+
|
|
127
|
+
results: list[dict] = []
|
|
128
|
+
grounded_count = 0
|
|
129
|
+
ungrounded_count = 0
|
|
130
|
+
confidence_updates = 0
|
|
131
|
+
|
|
132
|
+
for fact in facts:
|
|
133
|
+
gr = self.ground_fact(fact)
|
|
134
|
+
results.append(gr.to_dict())
|
|
135
|
+
|
|
136
|
+
if gr.grounded:
|
|
137
|
+
grounded_count += 1
|
|
138
|
+
else:
|
|
139
|
+
ungrounded_count += 1
|
|
140
|
+
|
|
141
|
+
# Apply confidence delta
|
|
142
|
+
if gr.confidence_delta != 0.0:
|
|
143
|
+
new_conf = max(
|
|
144
|
+
0.0,
|
|
145
|
+
min(1.0, fact.get("confidence", 1.0) + gr.confidence_delta),
|
|
146
|
+
)
|
|
147
|
+
if new_conf != fact.get("confidence", 1.0):
|
|
148
|
+
fact["confidence"] = round(new_conf, 4)
|
|
149
|
+
# Also bump contradiction_count for scorer
|
|
150
|
+
if gr.confidence_delta < 0:
|
|
151
|
+
fact["contradiction_count"] = (
|
|
152
|
+
fact.get("contradiction_count", 0) + 1
|
|
153
|
+
)
|
|
154
|
+
confidence_updates += 1
|
|
155
|
+
|
|
156
|
+
if confidence_updates:
|
|
157
|
+
self.memory._save_facts()
|
|
158
|
+
|
|
159
|
+
# Update graph — mark file references
|
|
160
|
+
if self.graph:
|
|
161
|
+
for fact, gr_dict in zip(facts, results):
|
|
162
|
+
for fref in gr_dict.get("file_refs", []):
|
|
163
|
+
if fref.get("exists"):
|
|
164
|
+
self.graph.record_touch(fact["id"], fref["path"])
|
|
165
|
+
|
|
166
|
+
ungrounded_facts = [
|
|
167
|
+
r for r in results if not r["grounded"]
|
|
168
|
+
]
|
|
169
|
+
|
|
170
|
+
return {
|
|
171
|
+
"total": len(facts),
|
|
172
|
+
"grounded": grounded_count,
|
|
173
|
+
"ungrounded": ungrounded_count,
|
|
174
|
+
"confidence_updates": confidence_updates,
|
|
175
|
+
"ungrounded_details": ungrounded_facts[:10],
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
def apply_confidence_decay(self, decay_per_day: float = 0.02) -> dict:
|
|
179
|
+
"""Apply daily confidence decay to ungrounded facts.
|
|
180
|
+
|
|
181
|
+
Facts with file/symbol references that failed grounding
|
|
182
|
+
lose confidence over time. Fully grounded facts are unaffected.
|
|
183
|
+
"""
|
|
184
|
+
if not self.memory:
|
|
185
|
+
return {"error": "no memory store"}
|
|
186
|
+
|
|
187
|
+
facts = [
|
|
188
|
+
f for f in self.memory.facts
|
|
189
|
+
if f.get("lifecycle") == "active"
|
|
190
|
+
]
|
|
191
|
+
|
|
192
|
+
decayed = 0
|
|
193
|
+
now = datetime.now(timezone.utc)
|
|
194
|
+
|
|
195
|
+
for fact in facts:
|
|
196
|
+
# Only decay facts that have contradiction signals
|
|
197
|
+
if fact.get("contradiction_count", 0) <= 0:
|
|
198
|
+
continue
|
|
199
|
+
|
|
200
|
+
ref = fact.get("last_accessed_at") or fact.get("timestamp")
|
|
201
|
+
if not ref:
|
|
202
|
+
continue
|
|
203
|
+
try:
|
|
204
|
+
dt = datetime.fromisoformat(ref)
|
|
205
|
+
if dt.tzinfo is None:
|
|
206
|
+
dt = dt.replace(tzinfo=timezone.utc)
|
|
207
|
+
age_days = (now - dt).total_seconds() / 86400
|
|
208
|
+
except (ValueError, TypeError):
|
|
209
|
+
continue
|
|
210
|
+
|
|
211
|
+
if age_days < 1:
|
|
212
|
+
continue
|
|
213
|
+
|
|
214
|
+
current_conf = fact.get("confidence", 1.0)
|
|
215
|
+
new_conf = max(0.0, current_conf - (decay_per_day * age_days * 0.1))
|
|
216
|
+
if new_conf < current_conf:
|
|
217
|
+
fact["confidence"] = round(new_conf, 4)
|
|
218
|
+
decayed += 1
|
|
219
|
+
|
|
220
|
+
if decayed:
|
|
221
|
+
self.memory._save_facts()
|
|
222
|
+
|
|
223
|
+
return {"decayed": decayed, "total": len(facts)}
|
|
224
|
+
|
|
225
|
+
# ── Extraction ──────────────────────────────────────────────────
|
|
226
|
+
|
|
227
|
+
def _extract_file_refs(self, text: str) -> list[str]:
|
|
228
|
+
"""Extract file path references from fact text."""
|
|
229
|
+
refs = set()
|
|
230
|
+
for match in _FILE_PATTERN.finditer(text):
|
|
231
|
+
path = match.group(1).strip()
|
|
232
|
+
if path:
|
|
233
|
+
refs.add(path)
|
|
234
|
+
# Also look for explicit path-like references
|
|
235
|
+
# e.g., "services/auth.py" or "cli/tools/memory.py"
|
|
236
|
+
for word in text.split():
|
|
237
|
+
word = word.strip("`,.'\"()[]{}:")
|
|
238
|
+
if "/" in word and "." in word.split("/")[-1]:
|
|
239
|
+
ext = word.rsplit(".", 1)[-1].lower()
|
|
240
|
+
if ext in {
|
|
241
|
+
"py", "js", "ts", "tsx", "jsx", "go", "rs", "java",
|
|
242
|
+
"rb", "c", "cpp", "h", "hpp", "css", "html", "json",
|
|
243
|
+
"yaml", "yml", "toml", "md", "sql", "sh", "bat",
|
|
244
|
+
}:
|
|
245
|
+
refs.add(word)
|
|
246
|
+
return sorted(refs)
|
|
247
|
+
|
|
248
|
+
def _extract_symbol_refs(self, text: str) -> list[str]:
|
|
249
|
+
"""Extract symbol references from fact text."""
|
|
250
|
+
symbols = set()
|
|
251
|
+
for match in _SYMBOL_PATTERN.finditer(text):
|
|
252
|
+
symbols.add(match.group(1))
|
|
253
|
+
for match in _NAMED_REF_PATTERN.finditer(text):
|
|
254
|
+
symbols.add(match.group(1))
|
|
255
|
+
# Filter out common English words that match patterns
|
|
256
|
+
noise = {
|
|
257
|
+
"the", "a", "an", "is", "in", "on", "at", "to", "for",
|
|
258
|
+
"of", "and", "or", "not", "with", "from", "by", "as",
|
|
259
|
+
"that", "this", "it", "be", "are", "was", "were", "has",
|
|
260
|
+
"have", "had", "do", "does", "did", "will", "would",
|
|
261
|
+
"True", "False", "None", "true", "false", "null",
|
|
262
|
+
}
|
|
263
|
+
return sorted(s for s in symbols if s not in noise and len(s) > 2)
|
|
264
|
+
|
|
265
|
+
# ── Existence checks ────────────────────────────────────────────
|
|
266
|
+
|
|
267
|
+
def _file_exists(self, ref: str) -> bool:
|
|
268
|
+
"""Check if a file reference exists in the project."""
|
|
269
|
+
# Try exact path
|
|
270
|
+
full = self.project_path / ref
|
|
271
|
+
if full.exists():
|
|
272
|
+
return True
|
|
273
|
+
# Try common prefixes
|
|
274
|
+
for prefix in ["", "src/", "lib/", "app/"]:
|
|
275
|
+
if (self.project_path / prefix / ref).exists():
|
|
276
|
+
return True
|
|
277
|
+
return False
|
|
278
|
+
|
|
279
|
+
def _symbol_exists(
|
|
280
|
+
self, symbol: str, file_refs: list[str]
|
|
281
|
+
) -> tuple[bool, str]:
|
|
282
|
+
"""Check if a symbol exists in the referenced files or project."""
|
|
283
|
+
# Search in referenced files first
|
|
284
|
+
for ref in file_refs:
|
|
285
|
+
full = self.project_path / ref
|
|
286
|
+
if not full.exists():
|
|
287
|
+
continue
|
|
288
|
+
try:
|
|
289
|
+
content = full.read_text(encoding="utf-8", errors="ignore")
|
|
290
|
+
if symbol in content:
|
|
291
|
+
return True, ref
|
|
292
|
+
except Exception:
|
|
293
|
+
continue
|
|
294
|
+
|
|
295
|
+
# If file_memory is available, use structural index
|
|
296
|
+
if self.file_memory:
|
|
297
|
+
try:
|
|
298
|
+
results = self.file_memory.search(symbol, top_k=1)
|
|
299
|
+
if results:
|
|
300
|
+
return True, results[0].get("file", "")
|
|
301
|
+
except Exception:
|
|
302
|
+
pass
|
|
303
|
+
|
|
304
|
+
return False, ""
|