code-context-control 2.28.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cli/__init__.py +1 -0
- cli/_hook_utils.py +99 -0
- cli/c3.py +6152 -0
- cli/commands/__init__.py +1 -0
- cli/commands/common.py +312 -0
- cli/commands/parser.py +286 -0
- cli/docs.html +3178 -0
- cli/edits.html +878 -0
- cli/hook_auto_snapshot.py +142 -0
- cli/hook_c3_signal.py +61 -0
- cli/hook_c3read.py +116 -0
- cli/hook_edit_ledger.py +213 -0
- cli/hook_edit_unlock.py +170 -0
- cli/hook_filter.py +130 -0
- cli/hook_ghost_files.py +238 -0
- cli/hook_pretool_enforce.py +334 -0
- cli/hook_read.py +200 -0
- cli/hook_session_stats.py +62 -0
- cli/hook_terse_advisor.py +190 -0
- cli/hub.html +3764 -0
- cli/hub_server.py +1619 -0
- cli/mcp_proxy.py +428 -0
- cli/mcp_server.py +660 -0
- cli/server.py +2985 -0
- cli/tools/__init__.py +4 -0
- cli/tools/_helpers.py +65 -0
- cli/tools/agent.py +1165 -0
- cli/tools/compress.py +215 -0
- cli/tools/delegate.py +1184 -0
- cli/tools/edit.py +313 -0
- cli/tools/edits.py +118 -0
- cli/tools/filter.py +285 -0
- cli/tools/impact.py +163 -0
- cli/tools/memory.py +469 -0
- cli/tools/read.py +224 -0
- cli/tools/search.py +337 -0
- cli/tools/session.py +95 -0
- cli/tools/shell.py +193 -0
- cli/tools/status.py +306 -0
- cli/tools/validate.py +310 -0
- cli/ui/api.js +36 -0
- cli/ui/app.js +207 -0
- cli/ui/components/chat.js +758 -0
- cli/ui/components/dashboard.js +689 -0
- cli/ui/components/edits.js +220 -0
- cli/ui/components/instructions.js +481 -0
- cli/ui/components/memory.js +626 -0
- cli/ui/components/sessions.js +606 -0
- cli/ui/components/settings.js +1404 -0
- cli/ui/components/sidebar.js +156 -0
- cli/ui/icons.js +51 -0
- cli/ui/shared.js +119 -0
- cli/ui/theme.js +22 -0
- cli/ui.html +168 -0
- cli/ui_legacy.html +6797 -0
- cli/ui_nano.html +503 -0
- code_context_control-2.28.0.dist-info/METADATA +248 -0
- code_context_control-2.28.0.dist-info/RECORD +150 -0
- code_context_control-2.28.0.dist-info/WHEEL +5 -0
- code_context_control-2.28.0.dist-info/entry_points.txt +4 -0
- code_context_control-2.28.0.dist-info/licenses/LICENSE +201 -0
- code_context_control-2.28.0.dist-info/top_level.txt +5 -0
- core/__init__.py +75 -0
- core/config.py +269 -0
- core/ide.py +188 -0
- oracle/__init__.py +1 -0
- oracle/config.py +75 -0
- oracle/oracle.html +3900 -0
- oracle/oracle_server.py +663 -0
- oracle/services/__init__.py +1 -0
- oracle/services/c3_bridge.py +210 -0
- oracle/services/chat_engine.py +1103 -0
- oracle/services/chat_store.py +155 -0
- oracle/services/cross_memory.py +154 -0
- oracle/services/federated_graph.py +463 -0
- oracle/services/health_checker.py +117 -0
- oracle/services/insight_engine.py +307 -0
- oracle/services/memory_reader.py +106 -0
- oracle/services/memory_writer.py +182 -0
- oracle/services/ollama_bridge.py +332 -0
- oracle/services/project_scanner.py +87 -0
- oracle/services/review_agent.py +206 -0
- services/__init__.py +1 -0
- services/activity_log.py +93 -0
- services/agent_base.py +124 -0
- services/agents.py +1529 -0
- services/auto_memory.py +407 -0
- services/bench/__init__.py +6 -0
- services/bench/external/__init__.py +29 -0
- services/bench/external/aider_polyglot.py +405 -0
- services/bench/external/swe_bench.py +485 -0
- services/benchmark_dashboard.py +596 -0
- services/claude_md.py +785 -0
- services/compressor.py +592 -0
- services/context_snapshot.py +356 -0
- services/conversation_store.py +870 -0
- services/doc_index.py +537 -0
- services/e2e_benchmark.py +2884 -0
- services/e2e_evaluator.py +396 -0
- services/e2e_tasks.py +743 -0
- services/edit_ledger.py +459 -0
- services/embedding_index.py +341 -0
- services/error_reporting.py +123 -0
- services/file_memory.py +734 -0
- services/hub_service.py +585 -0
- services/indexer.py +712 -0
- services/memory.py +318 -0
- services/memory_consolidator.py +538 -0
- services/memory_graph.py +382 -0
- services/memory_grounder.py +304 -0
- services/memory_scorer.py +246 -0
- services/metrics.py +86 -0
- services/notifications.py +209 -0
- services/ollama_client.py +201 -0
- services/output_filter.py +488 -0
- services/parser.py +1238 -0
- services/project_manager.py +579 -0
- services/protocol.py +306 -0
- services/proxy_state.py +152 -0
- services/retrieval_broker.py +129 -0
- services/router.py +414 -0
- services/runtime.py +326 -0
- services/session_benchmark.py +1945 -0
- services/session_manager.py +1026 -0
- services/session_preloader.py +251 -0
- services/text_index.py +90 -0
- services/tool_classifier.py +176 -0
- services/transcript_index.py +340 -0
- services/validation_cache.py +155 -0
- services/vector_store.py +299 -0
- services/version_tracker.py +271 -0
- services/watcher.py +192 -0
- tui/__init__.py +0 -0
- tui/backend.py +59 -0
- tui/main.py +145 -0
- tui/screens/__init__.py +1 -0
- tui/screens/benchmark_view.py +109 -0
- tui/screens/claudemd_view.py +46 -0
- tui/screens/compress_view.py +52 -0
- tui/screens/index_view.py +74 -0
- tui/screens/init_view.py +82 -0
- tui/screens/mcp_view.py +73 -0
- tui/screens/optimize_view.py +41 -0
- tui/screens/pipe_view.py +46 -0
- tui/screens/projects_view.py +355 -0
- tui/screens/search_view.py +55 -0
- tui/screens/session_view.py +143 -0
- tui/screens/stats.py +158 -0
- tui/screens/ui_view.py +54 -0
- tui/theme.tcss +335 -0
|
@@ -0,0 +1,463 @@
|
|
|
1
|
+
"""Federated memory graph across up to ~99 C3 projects.
|
|
2
|
+
|
|
3
|
+
Merges per-project fact graphs into a unified graph, adds cross-project
|
|
4
|
+
"cross_similar" edges via embeddings (Ollama, when reachable) with a
|
|
5
|
+
TF-IDF fallback so Ollama is optional.
|
|
6
|
+
|
|
7
|
+
Cache: ~/.c3/oracle/federated_graph.json, invalidated per-project via
|
|
8
|
+
.c3/facts/facts.json mtime. Rebuilds only changed projects.
|
|
9
|
+
"""
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
import hashlib
|
|
13
|
+
import json
|
|
14
|
+
import math
|
|
15
|
+
import re
|
|
16
|
+
import time
|
|
17
|
+
from pathlib import Path
|
|
18
|
+
from typing import Any
|
|
19
|
+
|
|
20
|
+
from oracle.config import ORACLE_DIR, load_config
|
|
21
|
+
from oracle.services.cross_memory import CrossMemory
|
|
22
|
+
from oracle.services.memory_reader import MemoryReader
|
|
23
|
+
|
|
24
|
+
_CACHE_FILE = ORACLE_DIR / "federated_graph.json"
|
|
25
|
+
_EMBED_CACHE_FILE = ORACLE_DIR / "federated_embeddings.json"
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _slugify(project_path: str) -> str:
|
|
29
|
+
name = Path(project_path).name or "project"
|
|
30
|
+
digest = hashlib.md5(project_path.encode("utf-8")).hexdigest()[:6]
|
|
31
|
+
slug = re.sub(r"[^a-zA-Z0-9_-]+", "-", name).strip("-").lower() or "project"
|
|
32
|
+
return f"{slug}-{digest}"
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def _tokenize(text: str) -> list[str]:
|
|
36
|
+
return re.findall(r"[A-Za-z0-9_]{2,}", (text or "").lower())
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def _tfidf_vectors(docs: list[str]) -> tuple[list[dict[str, float]], dict[str, float]]:
|
|
40
|
+
"""Return (per-doc sparse tf-idf dict, idf dict)."""
|
|
41
|
+
from collections import Counter
|
|
42
|
+
tokenized = [_tokenize(d) for d in docs]
|
|
43
|
+
df: Counter = Counter()
|
|
44
|
+
for toks in tokenized:
|
|
45
|
+
for t in set(toks):
|
|
46
|
+
df[t] += 1
|
|
47
|
+
n = max(1, len(docs))
|
|
48
|
+
idf = {t: math.log((n + 1) / (df_t + 1)) + 1 for t, df_t in df.items()}
|
|
49
|
+
vectors: list[dict[str, float]] = []
|
|
50
|
+
for toks in tokenized:
|
|
51
|
+
if not toks:
|
|
52
|
+
vectors.append({})
|
|
53
|
+
continue
|
|
54
|
+
tf = Counter(toks)
|
|
55
|
+
length = len(toks)
|
|
56
|
+
vec = {t: (count / length) * idf.get(t, 0.0) for t, count in tf.items()}
|
|
57
|
+
norm = math.sqrt(sum(v * v for v in vec.values())) or 1.0
|
|
58
|
+
vectors.append({t: v / norm for t, v in vec.items()})
|
|
59
|
+
return vectors, idf
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def _sparse_cos(a: dict[str, float], b: dict[str, float]) -> float:
|
|
63
|
+
if not a or not b:
|
|
64
|
+
return 0.0
|
|
65
|
+
shared = set(a) & set(b)
|
|
66
|
+
if not shared:
|
|
67
|
+
return 0.0
|
|
68
|
+
return sum(a[t] * b[t] for t in shared)
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def _dense_cos_matrix(matrix: list[list[float]]) -> Any:
|
|
72
|
+
import numpy as np # lazy
|
|
73
|
+
arr = np.array(matrix, dtype="float32")
|
|
74
|
+
norms = np.linalg.norm(arr, axis=1, keepdims=True)
|
|
75
|
+
norms[norms == 0] = 1.0
|
|
76
|
+
arr = arr / norms
|
|
77
|
+
return arr @ arr.T
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
class FederatedGraph:
|
|
81
|
+
"""Build and cache a cross-project memory graph."""
|
|
82
|
+
|
|
83
|
+
def __init__(self, reader: MemoryReader | None = None,
|
|
84
|
+
cross_memory: CrossMemory | None = None,
|
|
85
|
+
ollama_bridge: Any | None = None):
|
|
86
|
+
self.reader = reader or MemoryReader()
|
|
87
|
+
self.cross = cross_memory or CrossMemory()
|
|
88
|
+
self.ollama = ollama_bridge
|
|
89
|
+
self._cfg = load_config()
|
|
90
|
+
self._embed_cache: dict[str, list[float]] = self._load_embed_cache()
|
|
91
|
+
|
|
92
|
+
# ── Cache ────────────────────────────────────────────────────────
|
|
93
|
+
|
|
94
|
+
def _load_embed_cache(self) -> dict[str, list[float]]:
|
|
95
|
+
if _EMBED_CACHE_FILE.exists():
|
|
96
|
+
try:
|
|
97
|
+
return json.loads(_EMBED_CACHE_FILE.read_text(encoding="utf-8"))
|
|
98
|
+
except Exception:
|
|
99
|
+
return {}
|
|
100
|
+
return {}
|
|
101
|
+
|
|
102
|
+
def _save_embed_cache(self):
|
|
103
|
+
ORACLE_DIR.mkdir(parents=True, exist_ok=True)
|
|
104
|
+
try:
|
|
105
|
+
_EMBED_CACHE_FILE.write_text(
|
|
106
|
+
json.dumps(self._embed_cache), encoding="utf-8"
|
|
107
|
+
)
|
|
108
|
+
except Exception:
|
|
109
|
+
pass
|
|
110
|
+
|
|
111
|
+
def _cache_key(self, fact_text: str, model: str) -> str:
|
|
112
|
+
return hashlib.md5(f"{model}:{fact_text}".encode("utf-8")).hexdigest()
|
|
113
|
+
|
|
114
|
+
def _project_mtime(self, project_path: str) -> float:
|
|
115
|
+
f = Path(project_path) / ".c3" / "facts" / "facts.json"
|
|
116
|
+
g = Path(project_path) / ".c3" / "facts" / "memory_graph.json"
|
|
117
|
+
m = 0.0
|
|
118
|
+
for p in (f, g):
|
|
119
|
+
if p.is_file():
|
|
120
|
+
try:
|
|
121
|
+
m = max(m, p.stat().st_mtime)
|
|
122
|
+
except Exception:
|
|
123
|
+
pass
|
|
124
|
+
return m
|
|
125
|
+
|
|
126
|
+
# ── Public API ──────────────────────────────────────────────────
|
|
127
|
+
|
|
128
|
+
def build(self, project_paths: list[str], force: bool = False,
|
|
129
|
+
min_sim: float | None = None,
|
|
130
|
+
top_k: int | None = None,
|
|
131
|
+
max_facts_per_project: int | None = None) -> dict:
|
|
132
|
+
if not project_paths:
|
|
133
|
+
return {"nodes": [], "edges": [], "clusters": [], "projects": [], "stats": {}}
|
|
134
|
+
|
|
135
|
+
project_paths = project_paths[:99]
|
|
136
|
+
min_sim = float(min_sim if min_sim is not None else self._cfg.get("cross_sim_threshold", 0.75))
|
|
137
|
+
top_k = int(top_k if top_k is not None else self._cfg.get("cross_top_k_neighbors", 3))
|
|
138
|
+
max_per = int(max_facts_per_project if max_facts_per_project is not None
|
|
139
|
+
else self._cfg.get("cross_max_facts_per_project", 200))
|
|
140
|
+
|
|
141
|
+
cached = self._try_cached(project_paths, min_sim, top_k, max_per)
|
|
142
|
+
if cached is not None and not force:
|
|
143
|
+
return cached
|
|
144
|
+
|
|
145
|
+
projects: list[dict] = []
|
|
146
|
+
nodes: list[dict] = []
|
|
147
|
+
edges: list[dict] = []
|
|
148
|
+
all_fact_nodes: list[dict] = [] # for cross-similarity
|
|
149
|
+
|
|
150
|
+
for path in project_paths:
|
|
151
|
+
slug = _slugify(path)
|
|
152
|
+
facts = self.reader.read_facts(path)
|
|
153
|
+
facts = [f for f in facts if f.get("lifecycle") != "archived"]
|
|
154
|
+
facts.sort(key=lambda f: f.get("relevance_count", 0), reverse=True)
|
|
155
|
+
facts = facts[:max_per]
|
|
156
|
+
fact_ids_local = {f.get("id") for f in facts if f.get("id")}
|
|
157
|
+
|
|
158
|
+
projects.append({
|
|
159
|
+
"slug": slug,
|
|
160
|
+
"path": path,
|
|
161
|
+
"name": Path(path).name,
|
|
162
|
+
"fact_count": len(facts),
|
|
163
|
+
})
|
|
164
|
+
|
|
165
|
+
for f in facts:
|
|
166
|
+
nid = f"{slug}:{f['id']}"
|
|
167
|
+
node = {
|
|
168
|
+
"id": nid,
|
|
169
|
+
"kind": "fact",
|
|
170
|
+
"project": slug,
|
|
171
|
+
"project_path": path,
|
|
172
|
+
"local_id": f["id"],
|
|
173
|
+
"label": (f.get("fact", "")[:80]),
|
|
174
|
+
"text": f.get("fact", ""),
|
|
175
|
+
"category": f.get("category", "general"),
|
|
176
|
+
"relevance": f.get("relevance_count", 0),
|
|
177
|
+
"confidence": f.get("confidence", 1.0),
|
|
178
|
+
}
|
|
179
|
+
nodes.append(node)
|
|
180
|
+
all_fact_nodes.append(node)
|
|
181
|
+
|
|
182
|
+
graph = self.reader.read_graph(path)
|
|
183
|
+
for e in graph.get("edges", []):
|
|
184
|
+
src = e.get("src")
|
|
185
|
+
dst = e.get("dst")
|
|
186
|
+
if src not in fact_ids_local or dst not in fact_ids_local:
|
|
187
|
+
continue # skip file/symbol targets + orphaned edges
|
|
188
|
+
edges.append({
|
|
189
|
+
"src": f"{slug}:{src}",
|
|
190
|
+
"dst": f"{slug}:{dst}",
|
|
191
|
+
"type": e.get("type", "co_recalled"),
|
|
192
|
+
"weight": e.get("weight", 1.0),
|
|
193
|
+
"scope": "within_project",
|
|
194
|
+
})
|
|
195
|
+
|
|
196
|
+
cross_edges, sim_method = self._cross_similar_edges(all_fact_nodes, min_sim, top_k)
|
|
197
|
+
edges.extend(cross_edges)
|
|
198
|
+
|
|
199
|
+
insight_edges = self._insight_edges(project_paths, {n["id"] for n in nodes})
|
|
200
|
+
edges.extend(insight_edges)
|
|
201
|
+
|
|
202
|
+
clusters = self._clusters(nodes, [e for e in edges if e["scope"] == "within_project"])
|
|
203
|
+
|
|
204
|
+
result = {
|
|
205
|
+
"nodes": nodes,
|
|
206
|
+
"edges": edges,
|
|
207
|
+
"clusters": clusters,
|
|
208
|
+
"projects": projects,
|
|
209
|
+
"stats": {
|
|
210
|
+
"total_nodes": len(nodes),
|
|
211
|
+
"total_edges": len(edges),
|
|
212
|
+
"within_project": sum(1 for e in edges if e["scope"] == "within_project"),
|
|
213
|
+
"cross_similar": sum(1 for e in edges if e["scope"] == "cross_similar"),
|
|
214
|
+
"linked_via_insight": sum(1 for e in edges if e["scope"] == "linked_via_insight"),
|
|
215
|
+
"projects": len(projects),
|
|
216
|
+
"similarity_method": sim_method,
|
|
217
|
+
"min_sim": min_sim,
|
|
218
|
+
"top_k": top_k,
|
|
219
|
+
},
|
|
220
|
+
"generated_at": time.time(),
|
|
221
|
+
"inputs": {
|
|
222
|
+
"projects": sorted(project_paths),
|
|
223
|
+
"min_sim": min_sim,
|
|
224
|
+
"top_k": top_k,
|
|
225
|
+
"max_facts_per_project": max_per,
|
|
226
|
+
"mtimes": {p: self._project_mtime(p) for p in project_paths},
|
|
227
|
+
},
|
|
228
|
+
}
|
|
229
|
+
self._save_cache(result)
|
|
230
|
+
return result
|
|
231
|
+
|
|
232
|
+
def invalidate(self):
|
|
233
|
+
try:
|
|
234
|
+
if _CACHE_FILE.exists():
|
|
235
|
+
_CACHE_FILE.unlink()
|
|
236
|
+
except Exception:
|
|
237
|
+
pass
|
|
238
|
+
|
|
239
|
+
# ── Internals ───────────────────────────────────────────────────
|
|
240
|
+
|
|
241
|
+
def _try_cached(self, project_paths: list[str], min_sim: float,
|
|
242
|
+
top_k: int, max_per: int) -> dict | None:
|
|
243
|
+
if not _CACHE_FILE.exists():
|
|
244
|
+
return None
|
|
245
|
+
try:
|
|
246
|
+
data = json.loads(_CACHE_FILE.read_text(encoding="utf-8"))
|
|
247
|
+
except Exception:
|
|
248
|
+
return None
|
|
249
|
+
inputs = data.get("inputs", {})
|
|
250
|
+
if sorted(inputs.get("projects", [])) != sorted(project_paths):
|
|
251
|
+
return None
|
|
252
|
+
if inputs.get("min_sim") != min_sim or inputs.get("top_k") != top_k:
|
|
253
|
+
return None
|
|
254
|
+
if inputs.get("max_facts_per_project") != max_per:
|
|
255
|
+
return None
|
|
256
|
+
ttl = float(self._cfg.get("federated_graph_ttl_sec", 3600))
|
|
257
|
+
if time.time() - float(data.get("generated_at", 0)) > ttl:
|
|
258
|
+
return None
|
|
259
|
+
cached_mtimes = inputs.get("mtimes", {})
|
|
260
|
+
for p in project_paths:
|
|
261
|
+
if self._project_mtime(p) > float(cached_mtimes.get(p, 0)) + 0.001:
|
|
262
|
+
return None
|
|
263
|
+
return data
|
|
264
|
+
|
|
265
|
+
def _save_cache(self, data: dict):
|
|
266
|
+
ORACLE_DIR.mkdir(parents=True, exist_ok=True)
|
|
267
|
+
try:
|
|
268
|
+
_CACHE_FILE.write_text(json.dumps(data), encoding="utf-8")
|
|
269
|
+
except Exception:
|
|
270
|
+
pass
|
|
271
|
+
|
|
272
|
+
def _cross_similar_edges(self, fact_nodes: list[dict], min_sim: float,
|
|
273
|
+
top_k: int) -> tuple[list[dict], str]:
|
|
274
|
+
if len(fact_nodes) < 2:
|
|
275
|
+
return [], "none"
|
|
276
|
+
|
|
277
|
+
# Attempt embedding path (Ollama)
|
|
278
|
+
vectors: list[list[float]] | None = None
|
|
279
|
+
method = "tfidf"
|
|
280
|
+
model = self._cfg.get("embedding_model", "nomic-embed-text")
|
|
281
|
+
if self.ollama is not None and hasattr(self.ollama, "embed"):
|
|
282
|
+
try:
|
|
283
|
+
vectors = self._embed_all(fact_nodes, model)
|
|
284
|
+
if vectors:
|
|
285
|
+
method = f"embedding:{model}"
|
|
286
|
+
except Exception:
|
|
287
|
+
vectors = None
|
|
288
|
+
|
|
289
|
+
edges: list[dict] = []
|
|
290
|
+
if vectors:
|
|
291
|
+
try:
|
|
292
|
+
sim = _dense_cos_matrix(vectors)
|
|
293
|
+
edges = self._top_k_edges_from_dense(fact_nodes, sim, min_sim, top_k)
|
|
294
|
+
except Exception:
|
|
295
|
+
vectors = None
|
|
296
|
+
|
|
297
|
+
if not vectors:
|
|
298
|
+
docs = [n["text"] for n in fact_nodes]
|
|
299
|
+
tfidf, _ = _tfidf_vectors(docs)
|
|
300
|
+
edges = self._top_k_edges_from_sparse(fact_nodes, tfidf, min_sim, top_k)
|
|
301
|
+
method = "tfidf"
|
|
302
|
+
|
|
303
|
+
return edges, method
|
|
304
|
+
|
|
305
|
+
def _embed_all(self, fact_nodes: list[dict], model: str) -> list[list[float]] | None:
|
|
306
|
+
uncached_idx: list[int] = []
|
|
307
|
+
uncached_texts: list[str] = []
|
|
308
|
+
result: list[list[float] | None] = [None] * len(fact_nodes)
|
|
309
|
+
|
|
310
|
+
for i, n in enumerate(fact_nodes):
|
|
311
|
+
key = self._cache_key(n["text"], model)
|
|
312
|
+
vec = self._embed_cache.get(key)
|
|
313
|
+
if vec:
|
|
314
|
+
result[i] = vec
|
|
315
|
+
else:
|
|
316
|
+
uncached_idx.append(i)
|
|
317
|
+
uncached_texts.append(n["text"])
|
|
318
|
+
|
|
319
|
+
# batch in chunks of 32
|
|
320
|
+
for start in range(0, len(uncached_texts), 32):
|
|
321
|
+
batch = uncached_texts[start:start + 32]
|
|
322
|
+
batch_idx = uncached_idx[start:start + 32]
|
|
323
|
+
vecs = None
|
|
324
|
+
if hasattr(self.ollama, "embed_batch"):
|
|
325
|
+
vecs = self.ollama.embed_batch(batch, model=model)
|
|
326
|
+
if not vecs:
|
|
327
|
+
vecs = []
|
|
328
|
+
for t in batch:
|
|
329
|
+
v = self.ollama.embed(t, model=model)
|
|
330
|
+
if not v:
|
|
331
|
+
return None
|
|
332
|
+
vecs.append(v)
|
|
333
|
+
for j, v in zip(batch_idx, vecs):
|
|
334
|
+
if not v:
|
|
335
|
+
return None
|
|
336
|
+
result[j] = v
|
|
337
|
+
key = self._cache_key(fact_nodes[j]["text"], model)
|
|
338
|
+
self._embed_cache[key] = v
|
|
339
|
+
|
|
340
|
+
if any(v is None for v in result):
|
|
341
|
+
return None
|
|
342
|
+
if uncached_idx:
|
|
343
|
+
self._save_embed_cache()
|
|
344
|
+
return result # type: ignore[return-value]
|
|
345
|
+
|
|
346
|
+
def _top_k_edges_from_dense(self, fact_nodes, sim_matrix, min_sim: float,
|
|
347
|
+
top_k: int) -> list[dict]:
|
|
348
|
+
import numpy as np
|
|
349
|
+
n = len(fact_nodes)
|
|
350
|
+
seen: set[tuple[str, str]] = set()
|
|
351
|
+
edges: list[dict] = []
|
|
352
|
+
for i in range(n):
|
|
353
|
+
row = sim_matrix[i].copy()
|
|
354
|
+
row[i] = -1.0
|
|
355
|
+
# mask same-project
|
|
356
|
+
proj_i = fact_nodes[i]["project"]
|
|
357
|
+
for k in range(n):
|
|
358
|
+
if fact_nodes[k]["project"] == proj_i:
|
|
359
|
+
row[k] = -1.0
|
|
360
|
+
if not np.any(row > min_sim):
|
|
361
|
+
continue
|
|
362
|
+
order = np.argsort(-row)[:top_k]
|
|
363
|
+
for j in order:
|
|
364
|
+
s = float(row[j])
|
|
365
|
+
if s < min_sim:
|
|
366
|
+
break
|
|
367
|
+
a, b = fact_nodes[i]["id"], fact_nodes[int(j)]["id"]
|
|
368
|
+
key = (a, b) if a < b else (b, a)
|
|
369
|
+
if key in seen:
|
|
370
|
+
continue
|
|
371
|
+
seen.add(key)
|
|
372
|
+
edges.append({
|
|
373
|
+
"src": key[0],
|
|
374
|
+
"dst": key[1],
|
|
375
|
+
"type": "cross_similar",
|
|
376
|
+
"weight": round(s, 4),
|
|
377
|
+
"scope": "cross_similar",
|
|
378
|
+
})
|
|
379
|
+
return edges
|
|
380
|
+
|
|
381
|
+
def _top_k_edges_from_sparse(self, fact_nodes, vectors, min_sim: float,
|
|
382
|
+
top_k: int) -> list[dict]:
|
|
383
|
+
n = len(fact_nodes)
|
|
384
|
+
seen: set[tuple[str, str]] = set()
|
|
385
|
+
edges: list[dict] = []
|
|
386
|
+
for i in range(n):
|
|
387
|
+
scored: list[tuple[float, int]] = []
|
|
388
|
+
proj_i = fact_nodes[i]["project"]
|
|
389
|
+
for j in range(n):
|
|
390
|
+
if i == j or fact_nodes[j]["project"] == proj_i:
|
|
391
|
+
continue
|
|
392
|
+
s = _sparse_cos(vectors[i], vectors[j])
|
|
393
|
+
if s >= min_sim:
|
|
394
|
+
scored.append((s, j))
|
|
395
|
+
scored.sort(reverse=True)
|
|
396
|
+
for s, j in scored[:top_k]:
|
|
397
|
+
a, b = fact_nodes[i]["id"], fact_nodes[j]["id"]
|
|
398
|
+
key = (a, b) if a < b else (b, a)
|
|
399
|
+
if key in seen:
|
|
400
|
+
continue
|
|
401
|
+
seen.add(key)
|
|
402
|
+
edges.append({
|
|
403
|
+
"src": key[0],
|
|
404
|
+
"dst": key[1],
|
|
405
|
+
"type": "cross_similar",
|
|
406
|
+
"weight": round(s, 4),
|
|
407
|
+
"scope": "cross_similar",
|
|
408
|
+
})
|
|
409
|
+
return edges
|
|
410
|
+
|
|
411
|
+
def _insight_edges(self, project_paths: list[str], node_ids: set[str]) -> list[dict]:
|
|
412
|
+
"""Link pairs of projects that share an insight (project-level edge)."""
|
|
413
|
+
edges: list[dict] = []
|
|
414
|
+
try:
|
|
415
|
+
insights = self.cross.get_all_insights()
|
|
416
|
+
except Exception:
|
|
417
|
+
return edges
|
|
418
|
+
slug_by_path = {p: _slugify(p) for p in project_paths}
|
|
419
|
+
paths_set = set(project_paths)
|
|
420
|
+
for ins in insights:
|
|
421
|
+
srcs = [p for p in ins.get("source_projects", []) if p in paths_set]
|
|
422
|
+
if len(srcs) < 2:
|
|
423
|
+
continue
|
|
424
|
+
for i in range(len(srcs)):
|
|
425
|
+
for j in range(i + 1, len(srcs)):
|
|
426
|
+
a = f"project:{slug_by_path[srcs[i]]}"
|
|
427
|
+
b = f"project:{slug_by_path[srcs[j]]}"
|
|
428
|
+
edges.append({
|
|
429
|
+
"src": a,
|
|
430
|
+
"dst": b,
|
|
431
|
+
"type": ins.get("type", "insight"),
|
|
432
|
+
"weight": 1.0,
|
|
433
|
+
"scope": "linked_via_insight",
|
|
434
|
+
"insight_id": ins.get("id", ""),
|
|
435
|
+
})
|
|
436
|
+
return edges
|
|
437
|
+
|
|
438
|
+
def _clusters(self, nodes: list[dict], within_edges: list[dict]) -> list[list[str]]:
|
|
439
|
+
from collections import defaultdict
|
|
440
|
+
adj: dict[str, set[str]] = defaultdict(set)
|
|
441
|
+
for e in within_edges:
|
|
442
|
+
adj[e["src"]].add(e["dst"])
|
|
443
|
+
adj[e["dst"]].add(e["src"])
|
|
444
|
+
visited: set[str] = set()
|
|
445
|
+
clusters: list[list[str]] = []
|
|
446
|
+
for nid in (n["id"] for n in nodes):
|
|
447
|
+
if nid in visited or nid not in adj:
|
|
448
|
+
continue
|
|
449
|
+
stack = [nid]
|
|
450
|
+
cluster: list[str] = []
|
|
451
|
+
while stack:
|
|
452
|
+
cur = stack.pop()
|
|
453
|
+
if cur in visited:
|
|
454
|
+
continue
|
|
455
|
+
visited.add(cur)
|
|
456
|
+
cluster.append(cur)
|
|
457
|
+
for nb in adj.get(cur, ()):
|
|
458
|
+
if nb not in visited:
|
|
459
|
+
stack.append(nb)
|
|
460
|
+
if len(cluster) >= 3:
|
|
461
|
+
clusters.append(cluster)
|
|
462
|
+
clusters.sort(key=len, reverse=True)
|
|
463
|
+
return clusters
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
"""Validates per-project .c3/ structure and fact integrity."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
from oracle.services.memory_reader import MemoryReader
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class HealthChecker:
|
|
10
|
+
"""Heuristic health checks on project memory — no LLM calls."""
|
|
11
|
+
|
|
12
|
+
def __init__(self, reader: MemoryReader):
|
|
13
|
+
self.reader = reader
|
|
14
|
+
|
|
15
|
+
def check(self, project_path: str) -> dict:
|
|
16
|
+
"""Run all checks and return a health report."""
|
|
17
|
+
root = Path(project_path)
|
|
18
|
+
c3_dir = root / ".c3"
|
|
19
|
+
issues: list[dict] = []
|
|
20
|
+
|
|
21
|
+
# ── Structure check ──
|
|
22
|
+
structure_ok = True
|
|
23
|
+
for required in ["facts/facts.json"]:
|
|
24
|
+
if not (c3_dir / required).is_file():
|
|
25
|
+
structure_ok = False
|
|
26
|
+
issues.append({"severity": "error", "message": f"Missing {required}"})
|
|
27
|
+
|
|
28
|
+
for optional in ["facts/memory_graph.json", "config.json"]:
|
|
29
|
+
if not (c3_dir / optional).is_file():
|
|
30
|
+
issues.append({"severity": "warning", "message": f"Missing optional {optional}"})
|
|
31
|
+
|
|
32
|
+
# ── Validate JSON parse ──
|
|
33
|
+
for json_file in ["facts/facts.json", "facts/memory_graph.json"]:
|
|
34
|
+
fp = c3_dir / json_file
|
|
35
|
+
if fp.is_file():
|
|
36
|
+
try:
|
|
37
|
+
with open(fp, encoding="utf-8") as f:
|
|
38
|
+
json.load(f)
|
|
39
|
+
except Exception as e:
|
|
40
|
+
structure_ok = False
|
|
41
|
+
issues.append({"severity": "error", "message": f"Invalid JSON in {json_file}: {e}"})
|
|
42
|
+
|
|
43
|
+
# ── Fact integrity ──
|
|
44
|
+
facts = self.reader.read_facts(project_path)
|
|
45
|
+
seen_ids = set()
|
|
46
|
+
required_fields = {"id", "fact", "category", "timestamp", "lifecycle"}
|
|
47
|
+
for fact in facts:
|
|
48
|
+
fid = fact.get("id", "")
|
|
49
|
+
missing = required_fields - set(fact.keys())
|
|
50
|
+
if missing:
|
|
51
|
+
issues.append({"severity": "warning", "message": f"Fact {fid}: missing fields {missing}"})
|
|
52
|
+
if fid in seen_ids:
|
|
53
|
+
issues.append({"severity": "warning", "message": f"Duplicate fact ID: {fid}"})
|
|
54
|
+
seen_ids.add(fid)
|
|
55
|
+
|
|
56
|
+
# ── Graph integrity ──
|
|
57
|
+
graph_stats = self.reader.get_graph_stats(project_path)
|
|
58
|
+
if (graph_stats.get("orphaned_edges") or 0) > 0:
|
|
59
|
+
issues.append({
|
|
60
|
+
"severity": "warning",
|
|
61
|
+
"message": f"{graph_stats['orphaned_edges']} orphaned graph edges (reference deleted facts)",
|
|
62
|
+
})
|
|
63
|
+
|
|
64
|
+
# ── Tier distribution ──
|
|
65
|
+
fact_stats = self.reader.get_fact_stats(project_path)
|
|
66
|
+
|
|
67
|
+
# ── Freshness ──
|
|
68
|
+
freshness = self._compute_freshness(facts)
|
|
69
|
+
if (freshness.get("days_since_last_fact") or 0) > 30:
|
|
70
|
+
issues.append({"severity": "info", "message": "No new facts in over 30 days"})
|
|
71
|
+
|
|
72
|
+
# ── Overall status ──
|
|
73
|
+
error_count = sum(1 for i in issues if i["severity"] == "error")
|
|
74
|
+
warn_count = sum(1 for i in issues if i["severity"] == "warning")
|
|
75
|
+
if error_count > 0:
|
|
76
|
+
status = "error"
|
|
77
|
+
elif warn_count > 0:
|
|
78
|
+
status = "warning"
|
|
79
|
+
else:
|
|
80
|
+
status = "ok"
|
|
81
|
+
|
|
82
|
+
return {
|
|
83
|
+
"project_path": project_path,
|
|
84
|
+
"status": status,
|
|
85
|
+
"structure_ok": structure_ok,
|
|
86
|
+
"fact_stats": fact_stats,
|
|
87
|
+
"graph_stats": graph_stats,
|
|
88
|
+
"freshness": freshness,
|
|
89
|
+
"issues": issues,
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
def _compute_freshness(self, facts: list[dict]) -> dict:
|
|
93
|
+
"""Compute how fresh the memory is."""
|
|
94
|
+
if not facts:
|
|
95
|
+
return {"last_fact_timestamp": None, "days_since_last_fact": None}
|
|
96
|
+
|
|
97
|
+
from datetime import datetime, timezone
|
|
98
|
+
|
|
99
|
+
timestamps = []
|
|
100
|
+
for f in facts:
|
|
101
|
+
ts = f.get("timestamp")
|
|
102
|
+
if ts:
|
|
103
|
+
try:
|
|
104
|
+
timestamps.append(datetime.fromisoformat(ts))
|
|
105
|
+
except Exception:
|
|
106
|
+
pass
|
|
107
|
+
|
|
108
|
+
if not timestamps:
|
|
109
|
+
return {"last_fact_timestamp": None, "days_since_last_fact": None}
|
|
110
|
+
|
|
111
|
+
latest = max(timestamps)
|
|
112
|
+
days = (datetime.now(timezone.utc) - latest.replace(tzinfo=timezone.utc if latest.tzinfo is None else latest.tzinfo)).days
|
|
113
|
+
|
|
114
|
+
return {
|
|
115
|
+
"last_fact_timestamp": latest.isoformat(),
|
|
116
|
+
"days_since_last_fact": days,
|
|
117
|
+
}
|