code-context-control 2.28.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (150) hide show
  1. cli/__init__.py +1 -0
  2. cli/_hook_utils.py +99 -0
  3. cli/c3.py +6152 -0
  4. cli/commands/__init__.py +1 -0
  5. cli/commands/common.py +312 -0
  6. cli/commands/parser.py +286 -0
  7. cli/docs.html +3178 -0
  8. cli/edits.html +878 -0
  9. cli/hook_auto_snapshot.py +142 -0
  10. cli/hook_c3_signal.py +61 -0
  11. cli/hook_c3read.py +116 -0
  12. cli/hook_edit_ledger.py +213 -0
  13. cli/hook_edit_unlock.py +170 -0
  14. cli/hook_filter.py +130 -0
  15. cli/hook_ghost_files.py +238 -0
  16. cli/hook_pretool_enforce.py +334 -0
  17. cli/hook_read.py +200 -0
  18. cli/hook_session_stats.py +62 -0
  19. cli/hook_terse_advisor.py +190 -0
  20. cli/hub.html +3764 -0
  21. cli/hub_server.py +1619 -0
  22. cli/mcp_proxy.py +428 -0
  23. cli/mcp_server.py +660 -0
  24. cli/server.py +2985 -0
  25. cli/tools/__init__.py +4 -0
  26. cli/tools/_helpers.py +65 -0
  27. cli/tools/agent.py +1165 -0
  28. cli/tools/compress.py +215 -0
  29. cli/tools/delegate.py +1184 -0
  30. cli/tools/edit.py +313 -0
  31. cli/tools/edits.py +118 -0
  32. cli/tools/filter.py +285 -0
  33. cli/tools/impact.py +163 -0
  34. cli/tools/memory.py +469 -0
  35. cli/tools/read.py +224 -0
  36. cli/tools/search.py +337 -0
  37. cli/tools/session.py +95 -0
  38. cli/tools/shell.py +193 -0
  39. cli/tools/status.py +306 -0
  40. cli/tools/validate.py +310 -0
  41. cli/ui/api.js +36 -0
  42. cli/ui/app.js +207 -0
  43. cli/ui/components/chat.js +758 -0
  44. cli/ui/components/dashboard.js +689 -0
  45. cli/ui/components/edits.js +220 -0
  46. cli/ui/components/instructions.js +481 -0
  47. cli/ui/components/memory.js +626 -0
  48. cli/ui/components/sessions.js +606 -0
  49. cli/ui/components/settings.js +1404 -0
  50. cli/ui/components/sidebar.js +156 -0
  51. cli/ui/icons.js +51 -0
  52. cli/ui/shared.js +119 -0
  53. cli/ui/theme.js +22 -0
  54. cli/ui.html +168 -0
  55. cli/ui_legacy.html +6797 -0
  56. cli/ui_nano.html +503 -0
  57. code_context_control-2.28.0.dist-info/METADATA +248 -0
  58. code_context_control-2.28.0.dist-info/RECORD +150 -0
  59. code_context_control-2.28.0.dist-info/WHEEL +5 -0
  60. code_context_control-2.28.0.dist-info/entry_points.txt +4 -0
  61. code_context_control-2.28.0.dist-info/licenses/LICENSE +201 -0
  62. code_context_control-2.28.0.dist-info/top_level.txt +5 -0
  63. core/__init__.py +75 -0
  64. core/config.py +269 -0
  65. core/ide.py +188 -0
  66. oracle/__init__.py +1 -0
  67. oracle/config.py +75 -0
  68. oracle/oracle.html +3900 -0
  69. oracle/oracle_server.py +663 -0
  70. oracle/services/__init__.py +1 -0
  71. oracle/services/c3_bridge.py +210 -0
  72. oracle/services/chat_engine.py +1103 -0
  73. oracle/services/chat_store.py +155 -0
  74. oracle/services/cross_memory.py +154 -0
  75. oracle/services/federated_graph.py +463 -0
  76. oracle/services/health_checker.py +117 -0
  77. oracle/services/insight_engine.py +307 -0
  78. oracle/services/memory_reader.py +106 -0
  79. oracle/services/memory_writer.py +182 -0
  80. oracle/services/ollama_bridge.py +332 -0
  81. oracle/services/project_scanner.py +87 -0
  82. oracle/services/review_agent.py +206 -0
  83. services/__init__.py +1 -0
  84. services/activity_log.py +93 -0
  85. services/agent_base.py +124 -0
  86. services/agents.py +1529 -0
  87. services/auto_memory.py +407 -0
  88. services/bench/__init__.py +6 -0
  89. services/bench/external/__init__.py +29 -0
  90. services/bench/external/aider_polyglot.py +405 -0
  91. services/bench/external/swe_bench.py +485 -0
  92. services/benchmark_dashboard.py +596 -0
  93. services/claude_md.py +785 -0
  94. services/compressor.py +592 -0
  95. services/context_snapshot.py +356 -0
  96. services/conversation_store.py +870 -0
  97. services/doc_index.py +537 -0
  98. services/e2e_benchmark.py +2884 -0
  99. services/e2e_evaluator.py +396 -0
  100. services/e2e_tasks.py +743 -0
  101. services/edit_ledger.py +459 -0
  102. services/embedding_index.py +341 -0
  103. services/error_reporting.py +123 -0
  104. services/file_memory.py +734 -0
  105. services/hub_service.py +585 -0
  106. services/indexer.py +712 -0
  107. services/memory.py +318 -0
  108. services/memory_consolidator.py +538 -0
  109. services/memory_graph.py +382 -0
  110. services/memory_grounder.py +304 -0
  111. services/memory_scorer.py +246 -0
  112. services/metrics.py +86 -0
  113. services/notifications.py +209 -0
  114. services/ollama_client.py +201 -0
  115. services/output_filter.py +488 -0
  116. services/parser.py +1238 -0
  117. services/project_manager.py +579 -0
  118. services/protocol.py +306 -0
  119. services/proxy_state.py +152 -0
  120. services/retrieval_broker.py +129 -0
  121. services/router.py +414 -0
  122. services/runtime.py +326 -0
  123. services/session_benchmark.py +1945 -0
  124. services/session_manager.py +1026 -0
  125. services/session_preloader.py +251 -0
  126. services/text_index.py +90 -0
  127. services/tool_classifier.py +176 -0
  128. services/transcript_index.py +340 -0
  129. services/validation_cache.py +155 -0
  130. services/vector_store.py +299 -0
  131. services/version_tracker.py +271 -0
  132. services/watcher.py +192 -0
  133. tui/__init__.py +0 -0
  134. tui/backend.py +59 -0
  135. tui/main.py +145 -0
  136. tui/screens/__init__.py +1 -0
  137. tui/screens/benchmark_view.py +109 -0
  138. tui/screens/claudemd_view.py +46 -0
  139. tui/screens/compress_view.py +52 -0
  140. tui/screens/index_view.py +74 -0
  141. tui/screens/init_view.py +82 -0
  142. tui/screens/mcp_view.py +73 -0
  143. tui/screens/optimize_view.py +41 -0
  144. tui/screens/pipe_view.py +46 -0
  145. tui/screens/projects_view.py +355 -0
  146. tui/screens/search_view.py +55 -0
  147. tui/screens/session_view.py +143 -0
  148. tui/screens/stats.py +158 -0
  149. tui/screens/ui_view.py +54 -0
  150. tui/theme.tcss +335 -0
@@ -0,0 +1,382 @@
1
+ """Memory Graph — directed graph connecting facts, files, and symbols.
2
+
3
+ Facts are nodes; relationships are weighted directed edges with types.
4
+ The graph enables spreading activation (recall neighbours of recalled facts),
5
+ cluster detection, and gap analysis.
6
+
7
+ Edge types:
8
+ co_recalled — two facts recalled in the same query/session
9
+ caused_by — causal chain (user-stated or inferred)
10
+ leads_to — consequence/dependency
11
+ touches — fact references a file or symbol
12
+ contradicts — newer fact overrides or conflicts with older one
13
+ refines — fact was updated; old version linked
14
+ clusters_with — computed via community detection on co-recall edges
15
+
16
+ Storage: .c3/memory_graph.json (adjacency list)
17
+ """
18
+
19
+ from __future__ import annotations
20
+
21
+ import json
22
+ from collections import defaultdict
23
+ from datetime import datetime, timezone
24
+ from pathlib import Path
25
+
26
+ EDGE_TYPES = {
27
+ "co_recalled", "caused_by", "leads_to", "touches",
28
+ "contradicts", "refines", "clusters_with",
29
+ }
30
+
31
+ # Limits
32
+ MAX_EDGES_PER_NODE = 50
33
+ MAX_TOTAL_EDGES = 5000
34
+
35
+
36
+ class MemoryGraph:
37
+ """Persistent directed graph over memory facts."""
38
+
39
+ def __init__(self, project_path: str, data_dir: str = ".c3/facts"):
40
+ self.project_path = Path(project_path)
41
+ self.data_dir = self.project_path / data_dir
42
+ self.data_dir.mkdir(parents=True, exist_ok=True)
43
+ self.graph_file = self.data_dir / "memory_graph.json"
44
+ self._edges: list[dict] = []
45
+ self._adjacency: dict[str, list[dict]] = defaultdict(list)
46
+ self._load()
47
+
48
+ # ── Edge management ─────────────────────────────────────────────
49
+
50
+ def add_edge(
51
+ self,
52
+ src: str,
53
+ dst: str,
54
+ edge_type: str,
55
+ weight: float = 1.0,
56
+ metadata: dict | None = None,
57
+ ) -> dict:
58
+ """Add or strengthen an edge between two nodes."""
59
+ if edge_type not in EDGE_TYPES:
60
+ return {"error": f"unknown edge type: {edge_type}"}
61
+
62
+ existing = self._find_edge(src, dst, edge_type)
63
+ now = datetime.now(timezone.utc).isoformat()
64
+
65
+ if existing:
66
+ existing["weight"] = round(existing.get("weight", 1.0) + weight, 4)
67
+ existing["last_seen"] = now
68
+ existing["hit_count"] = existing.get("hit_count", 1) + 1
69
+ if metadata:
70
+ existing.setdefault("metadata", {}).update(metadata)
71
+ self._save()
72
+ return {"strengthened": True, "edge": existing}
73
+
74
+ edge = {
75
+ "src": src,
76
+ "dst": dst,
77
+ "type": edge_type,
78
+ "weight": round(weight, 4),
79
+ "created_at": now,
80
+ "last_seen": now,
81
+ "hit_count": 1,
82
+ "metadata": metadata or {},
83
+ }
84
+
85
+ # Enforce limits
86
+ if len(self._edges) >= MAX_TOTAL_EDGES:
87
+ self._prune_weakest(count=MAX_TOTAL_EDGES // 10)
88
+
89
+ src_edges = self._adjacency[src]
90
+ if len(src_edges) >= MAX_EDGES_PER_NODE:
91
+ self._prune_node_edges(src, keep=MAX_EDGES_PER_NODE - 5)
92
+
93
+ self._edges.append(edge)
94
+ self._adjacency[src].append(edge)
95
+ self._adjacency[dst].append(edge)
96
+ self._save()
97
+ return {"added": True, "edge": edge}
98
+
99
+ def remove_edge(self, src: str, dst: str, edge_type: str) -> dict:
100
+ """Remove a specific edge."""
101
+ edge = self._find_edge(src, dst, edge_type)
102
+ if not edge:
103
+ return {"error": "not found"}
104
+ self._edges.remove(edge)
105
+ self._rebuild_adjacency()
106
+ self._save()
107
+ return {"removed": True}
108
+
109
+ def remove_node(self, node_id: str) -> dict:
110
+ """Remove all edges involving a node (when a fact is deleted)."""
111
+ before = len(self._edges)
112
+ self._edges = [
113
+ e for e in self._edges
114
+ if e["src"] != node_id and e["dst"] != node_id
115
+ ]
116
+ self._rebuild_adjacency()
117
+ self._save()
118
+ return {"removed_edges": before - len(self._edges)}
119
+
120
+ # ── Query ───────────────────────────────────────────────────────
121
+
122
+ def get_edges(self, node_id: str, edge_type: str | None = None) -> list[dict]:
123
+ """Get all edges for a node, optionally filtered by type."""
124
+ edges = self._adjacency.get(node_id, [])
125
+ if edge_type:
126
+ edges = [e for e in edges if e["type"] == edge_type]
127
+ return edges
128
+
129
+ def get_neighbors(self, node_id: str, edge_type: str | None = None) -> list[str]:
130
+ """Get neighbor node IDs."""
131
+ neighbors = set()
132
+ for e in self.get_edges(node_id, edge_type):
133
+ if e["src"] == node_id:
134
+ neighbors.add(e["dst"])
135
+ else:
136
+ neighbors.add(e["src"])
137
+ return list(neighbors)
138
+
139
+ def spreading_activation(
140
+ self,
141
+ seed_ids: list[str],
142
+ max_depth: int = 2,
143
+ min_weight: float = 0.5,
144
+ max_results: int = 20,
145
+ ) -> list[dict]:
146
+ """Activate from seed nodes and spread through the graph.
147
+
148
+ Returns nodes ranked by accumulated activation energy.
149
+ Activation decays by 0.5 at each hop and is weighted by edge weight.
150
+ """
151
+ activation: dict[str, float] = {}
152
+ visited: set[str] = set()
153
+ frontier = [(nid, 1.0) for nid in seed_ids]
154
+
155
+ for depth in range(max_depth + 1):
156
+ next_frontier: list[tuple[str, float]] = []
157
+ for node_id, energy in frontier:
158
+ if node_id in visited:
159
+ activation[node_id] = max(
160
+ activation.get(node_id, 0.0), energy
161
+ )
162
+ continue
163
+ visited.add(node_id)
164
+ activation[node_id] = max(
165
+ activation.get(node_id, 0.0), energy
166
+ )
167
+
168
+ if depth < max_depth:
169
+ for edge in self._adjacency.get(node_id, []):
170
+ neighbor = (
171
+ edge["dst"] if edge["src"] == node_id
172
+ else edge["src"]
173
+ )
174
+ if neighbor in visited:
175
+ continue
176
+ edge_weight = edge.get("weight", 1.0)
177
+ if edge_weight < min_weight:
178
+ continue
179
+ # Decay: energy * 0.5 * normalized_edge_weight
180
+ prop_energy = energy * 0.5 * min(edge_weight / 5.0, 1.0)
181
+ if prop_energy > 0.01:
182
+ next_frontier.append((neighbor, prop_energy))
183
+
184
+ frontier = next_frontier
185
+
186
+ # Remove seeds from results (caller already has them)
187
+ seed_set = set(seed_ids)
188
+ results = [
189
+ {"id": nid, "activation": round(act, 4)}
190
+ for nid, act in activation.items()
191
+ if nid not in seed_set
192
+ ]
193
+ results.sort(key=lambda x: x["activation"], reverse=True)
194
+ return results[:max_results]
195
+
196
+ # ── Co-recall tracking ──────────────────────────────────────────
197
+
198
+ def record_co_recall(self, fact_ids: list[str]) -> int:
199
+ """Record that a set of facts were recalled together.
200
+
201
+ Creates/strengthens co_recalled edges between all pairs.
202
+ Returns the number of edges created or strengthened.
203
+ """
204
+ count = 0
205
+ for i, a in enumerate(fact_ids):
206
+ for b in fact_ids[i + 1:]:
207
+ self.add_edge(a, b, "co_recalled", weight=1.0)
208
+ count += 1
209
+ return count
210
+
211
+ # ── Cluster detection ───────────────────────────────────────────
212
+
213
+ def detect_clusters(self, min_cluster_size: int = 3) -> list[list[str]]:
214
+ """Simple connected-component clustering on co_recalled edges.
215
+
216
+ Returns list of clusters (each is a list of fact IDs).
217
+ """
218
+ co_edges = [e for e in self._edges if e["type"] == "co_recalled"]
219
+ adj: dict[str, set[str]] = defaultdict(set)
220
+ for e in co_edges:
221
+ adj[e["src"]].add(e["dst"])
222
+ adj[e["dst"]].add(e["src"])
223
+
224
+ visited: set[str] = set()
225
+ clusters: list[list[str]] = []
226
+
227
+ for node in adj:
228
+ if node in visited:
229
+ continue
230
+ # BFS
231
+ cluster: list[str] = []
232
+ queue = [node]
233
+ while queue:
234
+ current = queue.pop(0)
235
+ if current in visited:
236
+ continue
237
+ visited.add(current)
238
+ cluster.append(current)
239
+ for neighbor in adj.get(current, set()):
240
+ if neighbor not in visited:
241
+ queue.append(neighbor)
242
+ if len(cluster) >= min_cluster_size:
243
+ clusters.append(cluster)
244
+
245
+ clusters.sort(key=len, reverse=True)
246
+ return clusters
247
+
248
+ # ── File/symbol touch tracking ──────────────────────────────────
249
+
250
+ def record_touch(self, fact_id: str, file_path: str, symbol: str = "") -> dict:
251
+ """Link a fact to a file (and optionally a symbol)."""
252
+ target = f"file:{file_path}"
253
+ result = self.add_edge(fact_id, target, "touches")
254
+ if symbol:
255
+ sym_target = f"symbol:{file_path}:{symbol}"
256
+ self.add_edge(fact_id, sym_target, "touches")
257
+ return result
258
+
259
+ def get_facts_touching(self, file_path: str) -> list[str]:
260
+ """Get fact IDs that touch a given file."""
261
+ target = f"file:{file_path}"
262
+ return [
263
+ e["src"] for e in self._adjacency.get(target, [])
264
+ if e["type"] == "touches"
265
+ ]
266
+
267
+ # ── Contradiction/refinement tracking ───────────────────────────
268
+
269
+ def record_contradiction(self, old_fact_id: str, new_fact_id: str) -> dict:
270
+ """Mark that new_fact contradicts old_fact."""
271
+ return self.add_edge(new_fact_id, old_fact_id, "contradicts")
272
+
273
+ def record_refinement(self, old_fact_id: str, new_fact_id: str) -> dict:
274
+ """Mark that new_fact refines/updates old_fact."""
275
+ return self.add_edge(new_fact_id, old_fact_id, "refines")
276
+
277
+ # ── Stats ───────────────────────────────────────────────────────
278
+
279
+ def stats(self) -> dict:
280
+ """Summary statistics about the graph."""
281
+ type_counts: dict[str, int] = defaultdict(int)
282
+ for e in self._edges:
283
+ type_counts[e["type"]] += 1
284
+ nodes = set()
285
+ for e in self._edges:
286
+ nodes.add(e["src"])
287
+ nodes.add(e["dst"])
288
+ return {
289
+ "total_edges": len(self._edges),
290
+ "total_nodes": len(nodes),
291
+ "edge_types": dict(type_counts),
292
+ "clusters": len(self.detect_clusters()),
293
+ }
294
+
295
+ # ── Maintenance ─────────────────────────────────────────────────
296
+
297
+ def decay_edges(self, half_life_days: float = 30.0) -> int:
298
+ """Reduce weight of stale edges. Returns count of decayed edges."""
299
+ import math
300
+ now = datetime.now(timezone.utc)
301
+ decayed = 0
302
+ to_remove: list[dict] = []
303
+
304
+ for edge in self._edges:
305
+ try:
306
+ last = datetime.fromisoformat(edge.get("last_seen", ""))
307
+ if last.tzinfo is None:
308
+ last = last.replace(tzinfo=timezone.utc)
309
+ age_days = (now - last).total_seconds() / 86400
310
+ except (ValueError, TypeError):
311
+ age_days = 0
312
+
313
+ if age_days > 0:
314
+ decay = math.exp(-0.693 * age_days / half_life_days)
315
+ edge["weight"] = round(edge["weight"] * decay, 4)
316
+ decayed += 1
317
+
318
+ if edge["weight"] < 0.01:
319
+ to_remove.append(edge)
320
+
321
+ for edge in to_remove:
322
+ self._edges.remove(edge)
323
+
324
+ if to_remove or decayed:
325
+ self._rebuild_adjacency()
326
+ self._save()
327
+
328
+ return decayed
329
+
330
+ # ── Internal ────────────────────────────────────────────────────
331
+
332
+ def _find_edge(self, src: str, dst: str, edge_type: str) -> dict | None:
333
+ for e in self._adjacency.get(src, []):
334
+ if e["dst"] == dst and e["type"] == edge_type:
335
+ return e
336
+ if e["src"] == dst and e["type"] == edge_type:
337
+ return e
338
+ return None
339
+
340
+ def _prune_weakest(self, count: int = 100):
341
+ """Remove the weakest edges globally."""
342
+ self._edges.sort(key=lambda e: e.get("weight", 0))
343
+ del self._edges[:count]
344
+ self._rebuild_adjacency()
345
+
346
+ def _prune_node_edges(self, node_id: str, keep: int = 40):
347
+ """Prune weakest edges for a specific node."""
348
+ node_edges = [
349
+ e for e in self._edges
350
+ if e["src"] == node_id or e["dst"] == node_id
351
+ ]
352
+ if len(node_edges) <= keep:
353
+ return
354
+ node_edges.sort(key=lambda e: e.get("weight", 0))
355
+ to_remove = set(id(e) for e in node_edges[:len(node_edges) - keep])
356
+ self._edges = [e for e in self._edges if id(e) not in to_remove]
357
+ self._rebuild_adjacency()
358
+
359
+ def _rebuild_adjacency(self):
360
+ self._adjacency = defaultdict(list)
361
+ for e in self._edges:
362
+ self._adjacency[e["src"]].append(e)
363
+ self._adjacency[e["dst"]].append(e)
364
+
365
+ def _load(self):
366
+ if not self.graph_file.exists():
367
+ self._edges = []
368
+ self._adjacency = defaultdict(list)
369
+ return
370
+ try:
371
+ with open(self.graph_file, encoding="utf-8") as f:
372
+ data = json.load(f)
373
+ self._edges = data.get("edges", [])
374
+ self._rebuild_adjacency()
375
+ except Exception:
376
+ self._edges = []
377
+ self._adjacency = defaultdict(list)
378
+
379
+ def _save(self):
380
+ data = {"edges": self._edges}
381
+ with open(self.graph_file, "w", encoding="utf-8") as f:
382
+ json.dump(data, f, indent=2)
@@ -0,0 +1,304 @@
1
+ """Memory Grounder — validates facts against codebase reality.
2
+
3
+ Facts can reference files, symbols, and patterns that may have been
4
+ renamed, deleted, or refactored. The grounder checks these references
5
+ and adjusts confidence/salience accordingly.
6
+
7
+ Grounding checks:
8
+ - File existence: does the referenced file still exist?
9
+ - Symbol existence: does the referenced function/class still exist?
10
+ - Content drift: has the file changed significantly since the fact was created?
11
+ - Confidence decay: ungrounded facts lose confidence over time
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ import re
17
+ from datetime import datetime, timezone
18
+ from pathlib import Path
19
+ from typing import Any
20
+
21
+ # Patterns to extract file/symbol references from fact text
22
+ _FILE_PATTERN = re.compile(
23
+ r"""(?:^|[\s(,'"`])""" # boundary
24
+ r"""((?:[\w./-]+/)?""" # optional directory prefix
25
+ r"""[\w.-]+""" # filename stem
26
+ r"""\.(?:py|js|ts|tsx|jsx|go|rs|java|rb|c|cpp|h|hpp|css|html|json|yaml|yml|toml|md|sql|sh|bat))""" # extension
27
+ r"""(?:[\s),'"`:]|$)""", # boundary
28
+ re.MULTILINE,
29
+ )
30
+
31
+ _SYMBOL_PATTERN = re.compile(
32
+ r"""(?:class|def|function|func|fn|struct|interface|type|const|var|let)\s+"""
33
+ r"""([\w]+)""",
34
+ )
35
+
36
+ # Also match "ClassName", "function_name", "methodName" when preceded by context clues
37
+ _NAMED_REF_PATTERN = re.compile(
38
+ r"""(?:(?:class|function|method|module|service|handler|middleware|component|hook)\s+)"""
39
+ r"""`?([\w.]+)`?""",
40
+ re.IGNORECASE,
41
+ )
42
+
43
+
44
+ class GroundingResult:
45
+ """Result of grounding a single fact."""
46
+
47
+ def __init__(self, fact_id: str):
48
+ self.fact_id = fact_id
49
+ self.file_refs: list[dict] = [] # {path, exists, changed}
50
+ self.symbol_refs: list[dict] = [] # {name, found, file}
51
+ self.grounded = True
52
+ self.issues: list[str] = []
53
+ self.confidence_delta: float = 0.0
54
+
55
+ def to_dict(self) -> dict:
56
+ return {
57
+ "fact_id": self.fact_id,
58
+ "grounded": self.grounded,
59
+ "file_refs": self.file_refs,
60
+ "symbol_refs": self.symbol_refs,
61
+ "issues": self.issues,
62
+ "confidence_delta": round(self.confidence_delta, 4),
63
+ }
64
+
65
+
66
+ class MemoryGrounder:
67
+ """Validates memory facts against the current codebase state."""
68
+
69
+ def __init__(
70
+ self,
71
+ project_path: str,
72
+ memory_store: Any = None,
73
+ graph: Any = None,
74
+ file_memory: Any = None,
75
+ ):
76
+ self.project_path = Path(project_path)
77
+ self.memory = memory_store
78
+ self.graph = graph
79
+ self.file_memory = file_memory
80
+
81
+ # ── Public API ──────────────────────────────────────────────────
82
+
83
+ def ground_fact(self, fact: dict) -> GroundingResult:
84
+ """Validate a single fact against the codebase."""
85
+ result = GroundingResult(fact.get("id", ""))
86
+ text = fact.get("fact", "")
87
+
88
+ # Check file references
89
+ file_refs = self._extract_file_refs(text)
90
+ for ref in file_refs:
91
+ exists = self._file_exists(ref)
92
+ entry = {"path": ref, "exists": exists}
93
+ if not exists:
94
+ result.grounded = False
95
+ result.issues.append(f"file not found: {ref}")
96
+ result.confidence_delta -= 0.15
97
+ result.file_refs.append(entry)
98
+
99
+ # Check symbol references
100
+ symbol_refs = self._extract_symbol_refs(text)
101
+ for sym in symbol_refs:
102
+ found, location = self._symbol_exists(sym, file_refs)
103
+ entry = {"name": sym, "found": found, "file": location}
104
+ if not found and file_refs:
105
+ # Only penalize if we had file context to search in
106
+ result.grounded = False
107
+ result.issues.append(f"symbol not found: {sym}")
108
+ result.confidence_delta -= 0.10
109
+ result.symbol_refs.append(entry)
110
+
111
+ # Bonus: fact with no extractable references is neither grounded nor ungrounded
112
+ if not file_refs and not symbol_refs:
113
+ result.grounded = True # neutral — can't disprove
114
+
115
+ return result
116
+
117
+ def ground_all(self, max_facts: int = 100) -> dict:
118
+ """Ground all active facts. Returns summary stats."""
119
+ if not self.memory:
120
+ return {"error": "no memory store"}
121
+
122
+ facts = [
123
+ f for f in self.memory.facts
124
+ if f.get("lifecycle") == "active"
125
+ ][:max_facts]
126
+
127
+ results: list[dict] = []
128
+ grounded_count = 0
129
+ ungrounded_count = 0
130
+ confidence_updates = 0
131
+
132
+ for fact in facts:
133
+ gr = self.ground_fact(fact)
134
+ results.append(gr.to_dict())
135
+
136
+ if gr.grounded:
137
+ grounded_count += 1
138
+ else:
139
+ ungrounded_count += 1
140
+
141
+ # Apply confidence delta
142
+ if gr.confidence_delta != 0.0:
143
+ new_conf = max(
144
+ 0.0,
145
+ min(1.0, fact.get("confidence", 1.0) + gr.confidence_delta),
146
+ )
147
+ if new_conf != fact.get("confidence", 1.0):
148
+ fact["confidence"] = round(new_conf, 4)
149
+ # Also bump contradiction_count for scorer
150
+ if gr.confidence_delta < 0:
151
+ fact["contradiction_count"] = (
152
+ fact.get("contradiction_count", 0) + 1
153
+ )
154
+ confidence_updates += 1
155
+
156
+ if confidence_updates:
157
+ self.memory._save_facts()
158
+
159
+ # Update graph — mark file references
160
+ if self.graph:
161
+ for fact, gr_dict in zip(facts, results):
162
+ for fref in gr_dict.get("file_refs", []):
163
+ if fref.get("exists"):
164
+ self.graph.record_touch(fact["id"], fref["path"])
165
+
166
+ ungrounded_facts = [
167
+ r for r in results if not r["grounded"]
168
+ ]
169
+
170
+ return {
171
+ "total": len(facts),
172
+ "grounded": grounded_count,
173
+ "ungrounded": ungrounded_count,
174
+ "confidence_updates": confidence_updates,
175
+ "ungrounded_details": ungrounded_facts[:10],
176
+ }
177
+
178
+ def apply_confidence_decay(self, decay_per_day: float = 0.02) -> dict:
179
+ """Apply daily confidence decay to ungrounded facts.
180
+
181
+ Facts with file/symbol references that failed grounding
182
+ lose confidence over time. Fully grounded facts are unaffected.
183
+ """
184
+ if not self.memory:
185
+ return {"error": "no memory store"}
186
+
187
+ facts = [
188
+ f for f in self.memory.facts
189
+ if f.get("lifecycle") == "active"
190
+ ]
191
+
192
+ decayed = 0
193
+ now = datetime.now(timezone.utc)
194
+
195
+ for fact in facts:
196
+ # Only decay facts that have contradiction signals
197
+ if fact.get("contradiction_count", 0) <= 0:
198
+ continue
199
+
200
+ ref = fact.get("last_accessed_at") or fact.get("timestamp")
201
+ if not ref:
202
+ continue
203
+ try:
204
+ dt = datetime.fromisoformat(ref)
205
+ if dt.tzinfo is None:
206
+ dt = dt.replace(tzinfo=timezone.utc)
207
+ age_days = (now - dt).total_seconds() / 86400
208
+ except (ValueError, TypeError):
209
+ continue
210
+
211
+ if age_days < 1:
212
+ continue
213
+
214
+ current_conf = fact.get("confidence", 1.0)
215
+ new_conf = max(0.0, current_conf - (decay_per_day * age_days * 0.1))
216
+ if new_conf < current_conf:
217
+ fact["confidence"] = round(new_conf, 4)
218
+ decayed += 1
219
+
220
+ if decayed:
221
+ self.memory._save_facts()
222
+
223
+ return {"decayed": decayed, "total": len(facts)}
224
+
225
+ # ── Extraction ──────────────────────────────────────────────────
226
+
227
+ def _extract_file_refs(self, text: str) -> list[str]:
228
+ """Extract file path references from fact text."""
229
+ refs = set()
230
+ for match in _FILE_PATTERN.finditer(text):
231
+ path = match.group(1).strip()
232
+ if path:
233
+ refs.add(path)
234
+ # Also look for explicit path-like references
235
+ # e.g., "services/auth.py" or "cli/tools/memory.py"
236
+ for word in text.split():
237
+ word = word.strip("`,.'\"()[]{}:")
238
+ if "/" in word and "." in word.split("/")[-1]:
239
+ ext = word.rsplit(".", 1)[-1].lower()
240
+ if ext in {
241
+ "py", "js", "ts", "tsx", "jsx", "go", "rs", "java",
242
+ "rb", "c", "cpp", "h", "hpp", "css", "html", "json",
243
+ "yaml", "yml", "toml", "md", "sql", "sh", "bat",
244
+ }:
245
+ refs.add(word)
246
+ return sorted(refs)
247
+
248
+ def _extract_symbol_refs(self, text: str) -> list[str]:
249
+ """Extract symbol references from fact text."""
250
+ symbols = set()
251
+ for match in _SYMBOL_PATTERN.finditer(text):
252
+ symbols.add(match.group(1))
253
+ for match in _NAMED_REF_PATTERN.finditer(text):
254
+ symbols.add(match.group(1))
255
+ # Filter out common English words that match patterns
256
+ noise = {
257
+ "the", "a", "an", "is", "in", "on", "at", "to", "for",
258
+ "of", "and", "or", "not", "with", "from", "by", "as",
259
+ "that", "this", "it", "be", "are", "was", "were", "has",
260
+ "have", "had", "do", "does", "did", "will", "would",
261
+ "True", "False", "None", "true", "false", "null",
262
+ }
263
+ return sorted(s for s in symbols if s not in noise and len(s) > 2)
264
+
265
+ # ── Existence checks ────────────────────────────────────────────
266
+
267
+ def _file_exists(self, ref: str) -> bool:
268
+ """Check if a file reference exists in the project."""
269
+ # Try exact path
270
+ full = self.project_path / ref
271
+ if full.exists():
272
+ return True
273
+ # Try common prefixes
274
+ for prefix in ["", "src/", "lib/", "app/"]:
275
+ if (self.project_path / prefix / ref).exists():
276
+ return True
277
+ return False
278
+
279
+ def _symbol_exists(
280
+ self, symbol: str, file_refs: list[str]
281
+ ) -> tuple[bool, str]:
282
+ """Check if a symbol exists in the referenced files or project."""
283
+ # Search in referenced files first
284
+ for ref in file_refs:
285
+ full = self.project_path / ref
286
+ if not full.exists():
287
+ continue
288
+ try:
289
+ content = full.read_text(encoding="utf-8", errors="ignore")
290
+ if symbol in content:
291
+ return True, ref
292
+ except Exception:
293
+ continue
294
+
295
+ # If file_memory is available, use structural index
296
+ if self.file_memory:
297
+ try:
298
+ results = self.file_memory.search(symbol, top_k=1)
299
+ if results:
300
+ return True, results[0].get("file", "")
301
+ except Exception:
302
+ pass
303
+
304
+ return False, ""