code-context-control 2.28.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (150) hide show
  1. cli/__init__.py +1 -0
  2. cli/_hook_utils.py +99 -0
  3. cli/c3.py +6152 -0
  4. cli/commands/__init__.py +1 -0
  5. cli/commands/common.py +312 -0
  6. cli/commands/parser.py +286 -0
  7. cli/docs.html +3178 -0
  8. cli/edits.html +878 -0
  9. cli/hook_auto_snapshot.py +142 -0
  10. cli/hook_c3_signal.py +61 -0
  11. cli/hook_c3read.py +116 -0
  12. cli/hook_edit_ledger.py +213 -0
  13. cli/hook_edit_unlock.py +170 -0
  14. cli/hook_filter.py +130 -0
  15. cli/hook_ghost_files.py +238 -0
  16. cli/hook_pretool_enforce.py +334 -0
  17. cli/hook_read.py +200 -0
  18. cli/hook_session_stats.py +62 -0
  19. cli/hook_terse_advisor.py +190 -0
  20. cli/hub.html +3764 -0
  21. cli/hub_server.py +1619 -0
  22. cli/mcp_proxy.py +428 -0
  23. cli/mcp_server.py +660 -0
  24. cli/server.py +2985 -0
  25. cli/tools/__init__.py +4 -0
  26. cli/tools/_helpers.py +65 -0
  27. cli/tools/agent.py +1165 -0
  28. cli/tools/compress.py +215 -0
  29. cli/tools/delegate.py +1184 -0
  30. cli/tools/edit.py +313 -0
  31. cli/tools/edits.py +118 -0
  32. cli/tools/filter.py +285 -0
  33. cli/tools/impact.py +163 -0
  34. cli/tools/memory.py +469 -0
  35. cli/tools/read.py +224 -0
  36. cli/tools/search.py +337 -0
  37. cli/tools/session.py +95 -0
  38. cli/tools/shell.py +193 -0
  39. cli/tools/status.py +306 -0
  40. cli/tools/validate.py +310 -0
  41. cli/ui/api.js +36 -0
  42. cli/ui/app.js +207 -0
  43. cli/ui/components/chat.js +758 -0
  44. cli/ui/components/dashboard.js +689 -0
  45. cli/ui/components/edits.js +220 -0
  46. cli/ui/components/instructions.js +481 -0
  47. cli/ui/components/memory.js +626 -0
  48. cli/ui/components/sessions.js +606 -0
  49. cli/ui/components/settings.js +1404 -0
  50. cli/ui/components/sidebar.js +156 -0
  51. cli/ui/icons.js +51 -0
  52. cli/ui/shared.js +119 -0
  53. cli/ui/theme.js +22 -0
  54. cli/ui.html +168 -0
  55. cli/ui_legacy.html +6797 -0
  56. cli/ui_nano.html +503 -0
  57. code_context_control-2.28.0.dist-info/METADATA +248 -0
  58. code_context_control-2.28.0.dist-info/RECORD +150 -0
  59. code_context_control-2.28.0.dist-info/WHEEL +5 -0
  60. code_context_control-2.28.0.dist-info/entry_points.txt +4 -0
  61. code_context_control-2.28.0.dist-info/licenses/LICENSE +201 -0
  62. code_context_control-2.28.0.dist-info/top_level.txt +5 -0
  63. core/__init__.py +75 -0
  64. core/config.py +269 -0
  65. core/ide.py +188 -0
  66. oracle/__init__.py +1 -0
  67. oracle/config.py +75 -0
  68. oracle/oracle.html +3900 -0
  69. oracle/oracle_server.py +663 -0
  70. oracle/services/__init__.py +1 -0
  71. oracle/services/c3_bridge.py +210 -0
  72. oracle/services/chat_engine.py +1103 -0
  73. oracle/services/chat_store.py +155 -0
  74. oracle/services/cross_memory.py +154 -0
  75. oracle/services/federated_graph.py +463 -0
  76. oracle/services/health_checker.py +117 -0
  77. oracle/services/insight_engine.py +307 -0
  78. oracle/services/memory_reader.py +106 -0
  79. oracle/services/memory_writer.py +182 -0
  80. oracle/services/ollama_bridge.py +332 -0
  81. oracle/services/project_scanner.py +87 -0
  82. oracle/services/review_agent.py +206 -0
  83. services/__init__.py +1 -0
  84. services/activity_log.py +93 -0
  85. services/agent_base.py +124 -0
  86. services/agents.py +1529 -0
  87. services/auto_memory.py +407 -0
  88. services/bench/__init__.py +6 -0
  89. services/bench/external/__init__.py +29 -0
  90. services/bench/external/aider_polyglot.py +405 -0
  91. services/bench/external/swe_bench.py +485 -0
  92. services/benchmark_dashboard.py +596 -0
  93. services/claude_md.py +785 -0
  94. services/compressor.py +592 -0
  95. services/context_snapshot.py +356 -0
  96. services/conversation_store.py +870 -0
  97. services/doc_index.py +537 -0
  98. services/e2e_benchmark.py +2884 -0
  99. services/e2e_evaluator.py +396 -0
  100. services/e2e_tasks.py +743 -0
  101. services/edit_ledger.py +459 -0
  102. services/embedding_index.py +341 -0
  103. services/error_reporting.py +123 -0
  104. services/file_memory.py +734 -0
  105. services/hub_service.py +585 -0
  106. services/indexer.py +712 -0
  107. services/memory.py +318 -0
  108. services/memory_consolidator.py +538 -0
  109. services/memory_graph.py +382 -0
  110. services/memory_grounder.py +304 -0
  111. services/memory_scorer.py +246 -0
  112. services/metrics.py +86 -0
  113. services/notifications.py +209 -0
  114. services/ollama_client.py +201 -0
  115. services/output_filter.py +488 -0
  116. services/parser.py +1238 -0
  117. services/project_manager.py +579 -0
  118. services/protocol.py +306 -0
  119. services/proxy_state.py +152 -0
  120. services/retrieval_broker.py +129 -0
  121. services/router.py +414 -0
  122. services/runtime.py +326 -0
  123. services/session_benchmark.py +1945 -0
  124. services/session_manager.py +1026 -0
  125. services/session_preloader.py +251 -0
  126. services/text_index.py +90 -0
  127. services/tool_classifier.py +176 -0
  128. services/transcript_index.py +340 -0
  129. services/validation_cache.py +155 -0
  130. services/vector_store.py +299 -0
  131. services/version_tracker.py +271 -0
  132. services/watcher.py +192 -0
  133. tui/__init__.py +0 -0
  134. tui/backend.py +59 -0
  135. tui/main.py +145 -0
  136. tui/screens/__init__.py +1 -0
  137. tui/screens/benchmark_view.py +109 -0
  138. tui/screens/claudemd_view.py +46 -0
  139. tui/screens/compress_view.py +52 -0
  140. tui/screens/index_view.py +74 -0
  141. tui/screens/init_view.py +82 -0
  142. tui/screens/mcp_view.py +73 -0
  143. tui/screens/optimize_view.py +41 -0
  144. tui/screens/pipe_view.py +46 -0
  145. tui/screens/projects_view.py +355 -0
  146. tui/screens/search_view.py +55 -0
  147. tui/screens/session_view.py +143 -0
  148. tui/screens/stats.py +158 -0
  149. tui/screens/ui_view.py +54 -0
  150. tui/theme.tcss +335 -0
@@ -0,0 +1,463 @@
1
+ """Federated memory graph across up to ~99 C3 projects.
2
+
3
+ Merges per-project fact graphs into a unified graph, adds cross-project
4
+ "cross_similar" edges via embeddings (Ollama, when reachable) with a
5
+ TF-IDF fallback so Ollama is optional.
6
+
7
+ Cache: ~/.c3/oracle/federated_graph.json, invalidated per-project via
8
+ .c3/facts/facts.json mtime. Rebuilds only changed projects.
9
+ """
10
+ from __future__ import annotations
11
+
12
+ import hashlib
13
+ import json
14
+ import math
15
+ import re
16
+ import time
17
+ from pathlib import Path
18
+ from typing import Any
19
+
20
+ from oracle.config import ORACLE_DIR, load_config
21
+ from oracle.services.cross_memory import CrossMemory
22
+ from oracle.services.memory_reader import MemoryReader
23
+
24
+ _CACHE_FILE = ORACLE_DIR / "federated_graph.json"
25
+ _EMBED_CACHE_FILE = ORACLE_DIR / "federated_embeddings.json"
26
+
27
+
28
+ def _slugify(project_path: str) -> str:
29
+ name = Path(project_path).name or "project"
30
+ digest = hashlib.md5(project_path.encode("utf-8")).hexdigest()[:6]
31
+ slug = re.sub(r"[^a-zA-Z0-9_-]+", "-", name).strip("-").lower() or "project"
32
+ return f"{slug}-{digest}"
33
+
34
+
35
+ def _tokenize(text: str) -> list[str]:
36
+ return re.findall(r"[A-Za-z0-9_]{2,}", (text or "").lower())
37
+
38
+
39
+ def _tfidf_vectors(docs: list[str]) -> tuple[list[dict[str, float]], dict[str, float]]:
40
+ """Return (per-doc sparse tf-idf dict, idf dict)."""
41
+ from collections import Counter
42
+ tokenized = [_tokenize(d) for d in docs]
43
+ df: Counter = Counter()
44
+ for toks in tokenized:
45
+ for t in set(toks):
46
+ df[t] += 1
47
+ n = max(1, len(docs))
48
+ idf = {t: math.log((n + 1) / (df_t + 1)) + 1 for t, df_t in df.items()}
49
+ vectors: list[dict[str, float]] = []
50
+ for toks in tokenized:
51
+ if not toks:
52
+ vectors.append({})
53
+ continue
54
+ tf = Counter(toks)
55
+ length = len(toks)
56
+ vec = {t: (count / length) * idf.get(t, 0.0) for t, count in tf.items()}
57
+ norm = math.sqrt(sum(v * v for v in vec.values())) or 1.0
58
+ vectors.append({t: v / norm for t, v in vec.items()})
59
+ return vectors, idf
60
+
61
+
62
+ def _sparse_cos(a: dict[str, float], b: dict[str, float]) -> float:
63
+ if not a or not b:
64
+ return 0.0
65
+ shared = set(a) & set(b)
66
+ if not shared:
67
+ return 0.0
68
+ return sum(a[t] * b[t] for t in shared)
69
+
70
+
71
+ def _dense_cos_matrix(matrix: list[list[float]]) -> Any:
72
+ import numpy as np # lazy
73
+ arr = np.array(matrix, dtype="float32")
74
+ norms = np.linalg.norm(arr, axis=1, keepdims=True)
75
+ norms[norms == 0] = 1.0
76
+ arr = arr / norms
77
+ return arr @ arr.T
78
+
79
+
80
+ class FederatedGraph:
81
+ """Build and cache a cross-project memory graph."""
82
+
83
+ def __init__(self, reader: MemoryReader | None = None,
84
+ cross_memory: CrossMemory | None = None,
85
+ ollama_bridge: Any | None = None):
86
+ self.reader = reader or MemoryReader()
87
+ self.cross = cross_memory or CrossMemory()
88
+ self.ollama = ollama_bridge
89
+ self._cfg = load_config()
90
+ self._embed_cache: dict[str, list[float]] = self._load_embed_cache()
91
+
92
+ # ── Cache ────────────────────────────────────────────────────────
93
+
94
+ def _load_embed_cache(self) -> dict[str, list[float]]:
95
+ if _EMBED_CACHE_FILE.exists():
96
+ try:
97
+ return json.loads(_EMBED_CACHE_FILE.read_text(encoding="utf-8"))
98
+ except Exception:
99
+ return {}
100
+ return {}
101
+
102
+ def _save_embed_cache(self):
103
+ ORACLE_DIR.mkdir(parents=True, exist_ok=True)
104
+ try:
105
+ _EMBED_CACHE_FILE.write_text(
106
+ json.dumps(self._embed_cache), encoding="utf-8"
107
+ )
108
+ except Exception:
109
+ pass
110
+
111
+ def _cache_key(self, fact_text: str, model: str) -> str:
112
+ return hashlib.md5(f"{model}:{fact_text}".encode("utf-8")).hexdigest()
113
+
114
+ def _project_mtime(self, project_path: str) -> float:
115
+ f = Path(project_path) / ".c3" / "facts" / "facts.json"
116
+ g = Path(project_path) / ".c3" / "facts" / "memory_graph.json"
117
+ m = 0.0
118
+ for p in (f, g):
119
+ if p.is_file():
120
+ try:
121
+ m = max(m, p.stat().st_mtime)
122
+ except Exception:
123
+ pass
124
+ return m
125
+
126
+ # ── Public API ──────────────────────────────────────────────────
127
+
128
+ def build(self, project_paths: list[str], force: bool = False,
129
+ min_sim: float | None = None,
130
+ top_k: int | None = None,
131
+ max_facts_per_project: int | None = None) -> dict:
132
+ if not project_paths:
133
+ return {"nodes": [], "edges": [], "clusters": [], "projects": [], "stats": {}}
134
+
135
+ project_paths = project_paths[:99]
136
+ min_sim = float(min_sim if min_sim is not None else self._cfg.get("cross_sim_threshold", 0.75))
137
+ top_k = int(top_k if top_k is not None else self._cfg.get("cross_top_k_neighbors", 3))
138
+ max_per = int(max_facts_per_project if max_facts_per_project is not None
139
+ else self._cfg.get("cross_max_facts_per_project", 200))
140
+
141
+ cached = self._try_cached(project_paths, min_sim, top_k, max_per)
142
+ if cached is not None and not force:
143
+ return cached
144
+
145
+ projects: list[dict] = []
146
+ nodes: list[dict] = []
147
+ edges: list[dict] = []
148
+ all_fact_nodes: list[dict] = [] # for cross-similarity
149
+
150
+ for path in project_paths:
151
+ slug = _slugify(path)
152
+ facts = self.reader.read_facts(path)
153
+ facts = [f for f in facts if f.get("lifecycle") != "archived"]
154
+ facts.sort(key=lambda f: f.get("relevance_count", 0), reverse=True)
155
+ facts = facts[:max_per]
156
+ fact_ids_local = {f.get("id") for f in facts if f.get("id")}
157
+
158
+ projects.append({
159
+ "slug": slug,
160
+ "path": path,
161
+ "name": Path(path).name,
162
+ "fact_count": len(facts),
163
+ })
164
+
165
+ for f in facts:
166
+ nid = f"{slug}:{f['id']}"
167
+ node = {
168
+ "id": nid,
169
+ "kind": "fact",
170
+ "project": slug,
171
+ "project_path": path,
172
+ "local_id": f["id"],
173
+ "label": (f.get("fact", "")[:80]),
174
+ "text": f.get("fact", ""),
175
+ "category": f.get("category", "general"),
176
+ "relevance": f.get("relevance_count", 0),
177
+ "confidence": f.get("confidence", 1.0),
178
+ }
179
+ nodes.append(node)
180
+ all_fact_nodes.append(node)
181
+
182
+ graph = self.reader.read_graph(path)
183
+ for e in graph.get("edges", []):
184
+ src = e.get("src")
185
+ dst = e.get("dst")
186
+ if src not in fact_ids_local or dst not in fact_ids_local:
187
+ continue # skip file/symbol targets + orphaned edges
188
+ edges.append({
189
+ "src": f"{slug}:{src}",
190
+ "dst": f"{slug}:{dst}",
191
+ "type": e.get("type", "co_recalled"),
192
+ "weight": e.get("weight", 1.0),
193
+ "scope": "within_project",
194
+ })
195
+
196
+ cross_edges, sim_method = self._cross_similar_edges(all_fact_nodes, min_sim, top_k)
197
+ edges.extend(cross_edges)
198
+
199
+ insight_edges = self._insight_edges(project_paths, {n["id"] for n in nodes})
200
+ edges.extend(insight_edges)
201
+
202
+ clusters = self._clusters(nodes, [e for e in edges if e["scope"] == "within_project"])
203
+
204
+ result = {
205
+ "nodes": nodes,
206
+ "edges": edges,
207
+ "clusters": clusters,
208
+ "projects": projects,
209
+ "stats": {
210
+ "total_nodes": len(nodes),
211
+ "total_edges": len(edges),
212
+ "within_project": sum(1 for e in edges if e["scope"] == "within_project"),
213
+ "cross_similar": sum(1 for e in edges if e["scope"] == "cross_similar"),
214
+ "linked_via_insight": sum(1 for e in edges if e["scope"] == "linked_via_insight"),
215
+ "projects": len(projects),
216
+ "similarity_method": sim_method,
217
+ "min_sim": min_sim,
218
+ "top_k": top_k,
219
+ },
220
+ "generated_at": time.time(),
221
+ "inputs": {
222
+ "projects": sorted(project_paths),
223
+ "min_sim": min_sim,
224
+ "top_k": top_k,
225
+ "max_facts_per_project": max_per,
226
+ "mtimes": {p: self._project_mtime(p) for p in project_paths},
227
+ },
228
+ }
229
+ self._save_cache(result)
230
+ return result
231
+
232
+ def invalidate(self):
233
+ try:
234
+ if _CACHE_FILE.exists():
235
+ _CACHE_FILE.unlink()
236
+ except Exception:
237
+ pass
238
+
239
+ # ── Internals ───────────────────────────────────────────────────
240
+
241
+ def _try_cached(self, project_paths: list[str], min_sim: float,
242
+ top_k: int, max_per: int) -> dict | None:
243
+ if not _CACHE_FILE.exists():
244
+ return None
245
+ try:
246
+ data = json.loads(_CACHE_FILE.read_text(encoding="utf-8"))
247
+ except Exception:
248
+ return None
249
+ inputs = data.get("inputs", {})
250
+ if sorted(inputs.get("projects", [])) != sorted(project_paths):
251
+ return None
252
+ if inputs.get("min_sim") != min_sim or inputs.get("top_k") != top_k:
253
+ return None
254
+ if inputs.get("max_facts_per_project") != max_per:
255
+ return None
256
+ ttl = float(self._cfg.get("federated_graph_ttl_sec", 3600))
257
+ if time.time() - float(data.get("generated_at", 0)) > ttl:
258
+ return None
259
+ cached_mtimes = inputs.get("mtimes", {})
260
+ for p in project_paths:
261
+ if self._project_mtime(p) > float(cached_mtimes.get(p, 0)) + 0.001:
262
+ return None
263
+ return data
264
+
265
+ def _save_cache(self, data: dict):
266
+ ORACLE_DIR.mkdir(parents=True, exist_ok=True)
267
+ try:
268
+ _CACHE_FILE.write_text(json.dumps(data), encoding="utf-8")
269
+ except Exception:
270
+ pass
271
+
272
+ def _cross_similar_edges(self, fact_nodes: list[dict], min_sim: float,
273
+ top_k: int) -> tuple[list[dict], str]:
274
+ if len(fact_nodes) < 2:
275
+ return [], "none"
276
+
277
+ # Attempt embedding path (Ollama)
278
+ vectors: list[list[float]] | None = None
279
+ method = "tfidf"
280
+ model = self._cfg.get("embedding_model", "nomic-embed-text")
281
+ if self.ollama is not None and hasattr(self.ollama, "embed"):
282
+ try:
283
+ vectors = self._embed_all(fact_nodes, model)
284
+ if vectors:
285
+ method = f"embedding:{model}"
286
+ except Exception:
287
+ vectors = None
288
+
289
+ edges: list[dict] = []
290
+ if vectors:
291
+ try:
292
+ sim = _dense_cos_matrix(vectors)
293
+ edges = self._top_k_edges_from_dense(fact_nodes, sim, min_sim, top_k)
294
+ except Exception:
295
+ vectors = None
296
+
297
+ if not vectors:
298
+ docs = [n["text"] for n in fact_nodes]
299
+ tfidf, _ = _tfidf_vectors(docs)
300
+ edges = self._top_k_edges_from_sparse(fact_nodes, tfidf, min_sim, top_k)
301
+ method = "tfidf"
302
+
303
+ return edges, method
304
+
305
+ def _embed_all(self, fact_nodes: list[dict], model: str) -> list[list[float]] | None:
306
+ uncached_idx: list[int] = []
307
+ uncached_texts: list[str] = []
308
+ result: list[list[float] | None] = [None] * len(fact_nodes)
309
+
310
+ for i, n in enumerate(fact_nodes):
311
+ key = self._cache_key(n["text"], model)
312
+ vec = self._embed_cache.get(key)
313
+ if vec:
314
+ result[i] = vec
315
+ else:
316
+ uncached_idx.append(i)
317
+ uncached_texts.append(n["text"])
318
+
319
+ # batch in chunks of 32
320
+ for start in range(0, len(uncached_texts), 32):
321
+ batch = uncached_texts[start:start + 32]
322
+ batch_idx = uncached_idx[start:start + 32]
323
+ vecs = None
324
+ if hasattr(self.ollama, "embed_batch"):
325
+ vecs = self.ollama.embed_batch(batch, model=model)
326
+ if not vecs:
327
+ vecs = []
328
+ for t in batch:
329
+ v = self.ollama.embed(t, model=model)
330
+ if not v:
331
+ return None
332
+ vecs.append(v)
333
+ for j, v in zip(batch_idx, vecs):
334
+ if not v:
335
+ return None
336
+ result[j] = v
337
+ key = self._cache_key(fact_nodes[j]["text"], model)
338
+ self._embed_cache[key] = v
339
+
340
+ if any(v is None for v in result):
341
+ return None
342
+ if uncached_idx:
343
+ self._save_embed_cache()
344
+ return result # type: ignore[return-value]
345
+
346
+ def _top_k_edges_from_dense(self, fact_nodes, sim_matrix, min_sim: float,
347
+ top_k: int) -> list[dict]:
348
+ import numpy as np
349
+ n = len(fact_nodes)
350
+ seen: set[tuple[str, str]] = set()
351
+ edges: list[dict] = []
352
+ for i in range(n):
353
+ row = sim_matrix[i].copy()
354
+ row[i] = -1.0
355
+ # mask same-project
356
+ proj_i = fact_nodes[i]["project"]
357
+ for k in range(n):
358
+ if fact_nodes[k]["project"] == proj_i:
359
+ row[k] = -1.0
360
+ if not np.any(row > min_sim):
361
+ continue
362
+ order = np.argsort(-row)[:top_k]
363
+ for j in order:
364
+ s = float(row[j])
365
+ if s < min_sim:
366
+ break
367
+ a, b = fact_nodes[i]["id"], fact_nodes[int(j)]["id"]
368
+ key = (a, b) if a < b else (b, a)
369
+ if key in seen:
370
+ continue
371
+ seen.add(key)
372
+ edges.append({
373
+ "src": key[0],
374
+ "dst": key[1],
375
+ "type": "cross_similar",
376
+ "weight": round(s, 4),
377
+ "scope": "cross_similar",
378
+ })
379
+ return edges
380
+
381
+ def _top_k_edges_from_sparse(self, fact_nodes, vectors, min_sim: float,
382
+ top_k: int) -> list[dict]:
383
+ n = len(fact_nodes)
384
+ seen: set[tuple[str, str]] = set()
385
+ edges: list[dict] = []
386
+ for i in range(n):
387
+ scored: list[tuple[float, int]] = []
388
+ proj_i = fact_nodes[i]["project"]
389
+ for j in range(n):
390
+ if i == j or fact_nodes[j]["project"] == proj_i:
391
+ continue
392
+ s = _sparse_cos(vectors[i], vectors[j])
393
+ if s >= min_sim:
394
+ scored.append((s, j))
395
+ scored.sort(reverse=True)
396
+ for s, j in scored[:top_k]:
397
+ a, b = fact_nodes[i]["id"], fact_nodes[j]["id"]
398
+ key = (a, b) if a < b else (b, a)
399
+ if key in seen:
400
+ continue
401
+ seen.add(key)
402
+ edges.append({
403
+ "src": key[0],
404
+ "dst": key[1],
405
+ "type": "cross_similar",
406
+ "weight": round(s, 4),
407
+ "scope": "cross_similar",
408
+ })
409
+ return edges
410
+
411
+ def _insight_edges(self, project_paths: list[str], node_ids: set[str]) -> list[dict]:
412
+ """Link pairs of projects that share an insight (project-level edge)."""
413
+ edges: list[dict] = []
414
+ try:
415
+ insights = self.cross.get_all_insights()
416
+ except Exception:
417
+ return edges
418
+ slug_by_path = {p: _slugify(p) for p in project_paths}
419
+ paths_set = set(project_paths)
420
+ for ins in insights:
421
+ srcs = [p for p in ins.get("source_projects", []) if p in paths_set]
422
+ if len(srcs) < 2:
423
+ continue
424
+ for i in range(len(srcs)):
425
+ for j in range(i + 1, len(srcs)):
426
+ a = f"project:{slug_by_path[srcs[i]]}"
427
+ b = f"project:{slug_by_path[srcs[j]]}"
428
+ edges.append({
429
+ "src": a,
430
+ "dst": b,
431
+ "type": ins.get("type", "insight"),
432
+ "weight": 1.0,
433
+ "scope": "linked_via_insight",
434
+ "insight_id": ins.get("id", ""),
435
+ })
436
+ return edges
437
+
438
+ def _clusters(self, nodes: list[dict], within_edges: list[dict]) -> list[list[str]]:
439
+ from collections import defaultdict
440
+ adj: dict[str, set[str]] = defaultdict(set)
441
+ for e in within_edges:
442
+ adj[e["src"]].add(e["dst"])
443
+ adj[e["dst"]].add(e["src"])
444
+ visited: set[str] = set()
445
+ clusters: list[list[str]] = []
446
+ for nid in (n["id"] for n in nodes):
447
+ if nid in visited or nid not in adj:
448
+ continue
449
+ stack = [nid]
450
+ cluster: list[str] = []
451
+ while stack:
452
+ cur = stack.pop()
453
+ if cur in visited:
454
+ continue
455
+ visited.add(cur)
456
+ cluster.append(cur)
457
+ for nb in adj.get(cur, ()):
458
+ if nb not in visited:
459
+ stack.append(nb)
460
+ if len(cluster) >= 3:
461
+ clusters.append(cluster)
462
+ clusters.sort(key=len, reverse=True)
463
+ return clusters
@@ -0,0 +1,117 @@
1
+ """Validates per-project .c3/ structure and fact integrity."""
2
+
3
+ import json
4
+ from pathlib import Path
5
+
6
+ from oracle.services.memory_reader import MemoryReader
7
+
8
+
9
+ class HealthChecker:
10
+ """Heuristic health checks on project memory — no LLM calls."""
11
+
12
+ def __init__(self, reader: MemoryReader):
13
+ self.reader = reader
14
+
15
+ def check(self, project_path: str) -> dict:
16
+ """Run all checks and return a health report."""
17
+ root = Path(project_path)
18
+ c3_dir = root / ".c3"
19
+ issues: list[dict] = []
20
+
21
+ # ── Structure check ──
22
+ structure_ok = True
23
+ for required in ["facts/facts.json"]:
24
+ if not (c3_dir / required).is_file():
25
+ structure_ok = False
26
+ issues.append({"severity": "error", "message": f"Missing {required}"})
27
+
28
+ for optional in ["facts/memory_graph.json", "config.json"]:
29
+ if not (c3_dir / optional).is_file():
30
+ issues.append({"severity": "warning", "message": f"Missing optional {optional}"})
31
+
32
+ # ── Validate JSON parse ──
33
+ for json_file in ["facts/facts.json", "facts/memory_graph.json"]:
34
+ fp = c3_dir / json_file
35
+ if fp.is_file():
36
+ try:
37
+ with open(fp, encoding="utf-8") as f:
38
+ json.load(f)
39
+ except Exception as e:
40
+ structure_ok = False
41
+ issues.append({"severity": "error", "message": f"Invalid JSON in {json_file}: {e}"})
42
+
43
+ # ── Fact integrity ──
44
+ facts = self.reader.read_facts(project_path)
45
+ seen_ids = set()
46
+ required_fields = {"id", "fact", "category", "timestamp", "lifecycle"}
47
+ for fact in facts:
48
+ fid = fact.get("id", "")
49
+ missing = required_fields - set(fact.keys())
50
+ if missing:
51
+ issues.append({"severity": "warning", "message": f"Fact {fid}: missing fields {missing}"})
52
+ if fid in seen_ids:
53
+ issues.append({"severity": "warning", "message": f"Duplicate fact ID: {fid}"})
54
+ seen_ids.add(fid)
55
+
56
+ # ── Graph integrity ──
57
+ graph_stats = self.reader.get_graph_stats(project_path)
58
+ if (graph_stats.get("orphaned_edges") or 0) > 0:
59
+ issues.append({
60
+ "severity": "warning",
61
+ "message": f"{graph_stats['orphaned_edges']} orphaned graph edges (reference deleted facts)",
62
+ })
63
+
64
+ # ── Tier distribution ──
65
+ fact_stats = self.reader.get_fact_stats(project_path)
66
+
67
+ # ── Freshness ──
68
+ freshness = self._compute_freshness(facts)
69
+ if (freshness.get("days_since_last_fact") or 0) > 30:
70
+ issues.append({"severity": "info", "message": "No new facts in over 30 days"})
71
+
72
+ # ── Overall status ──
73
+ error_count = sum(1 for i in issues if i["severity"] == "error")
74
+ warn_count = sum(1 for i in issues if i["severity"] == "warning")
75
+ if error_count > 0:
76
+ status = "error"
77
+ elif warn_count > 0:
78
+ status = "warning"
79
+ else:
80
+ status = "ok"
81
+
82
+ return {
83
+ "project_path": project_path,
84
+ "status": status,
85
+ "structure_ok": structure_ok,
86
+ "fact_stats": fact_stats,
87
+ "graph_stats": graph_stats,
88
+ "freshness": freshness,
89
+ "issues": issues,
90
+ }
91
+
92
+ def _compute_freshness(self, facts: list[dict]) -> dict:
93
+ """Compute how fresh the memory is."""
94
+ if not facts:
95
+ return {"last_fact_timestamp": None, "days_since_last_fact": None}
96
+
97
+ from datetime import datetime, timezone
98
+
99
+ timestamps = []
100
+ for f in facts:
101
+ ts = f.get("timestamp")
102
+ if ts:
103
+ try:
104
+ timestamps.append(datetime.fromisoformat(ts))
105
+ except Exception:
106
+ pass
107
+
108
+ if not timestamps:
109
+ return {"last_fact_timestamp": None, "days_since_last_fact": None}
110
+
111
+ latest = max(timestamps)
112
+ days = (datetime.now(timezone.utc) - latest.replace(tzinfo=timezone.utc if latest.tzinfo is None else latest.tzinfo)).days
113
+
114
+ return {
115
+ "last_fact_timestamp": latest.isoformat(),
116
+ "days_since_last_fact": days,
117
+ }