code-context-control 2.28.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cli/__init__.py +1 -0
- cli/_hook_utils.py +99 -0
- cli/c3.py +6152 -0
- cli/commands/__init__.py +1 -0
- cli/commands/common.py +312 -0
- cli/commands/parser.py +286 -0
- cli/docs.html +3178 -0
- cli/edits.html +878 -0
- cli/hook_auto_snapshot.py +142 -0
- cli/hook_c3_signal.py +61 -0
- cli/hook_c3read.py +116 -0
- cli/hook_edit_ledger.py +213 -0
- cli/hook_edit_unlock.py +170 -0
- cli/hook_filter.py +130 -0
- cli/hook_ghost_files.py +238 -0
- cli/hook_pretool_enforce.py +334 -0
- cli/hook_read.py +200 -0
- cli/hook_session_stats.py +62 -0
- cli/hook_terse_advisor.py +190 -0
- cli/hub.html +3764 -0
- cli/hub_server.py +1619 -0
- cli/mcp_proxy.py +428 -0
- cli/mcp_server.py +660 -0
- cli/server.py +2985 -0
- cli/tools/__init__.py +4 -0
- cli/tools/_helpers.py +65 -0
- cli/tools/agent.py +1165 -0
- cli/tools/compress.py +215 -0
- cli/tools/delegate.py +1184 -0
- cli/tools/edit.py +313 -0
- cli/tools/edits.py +118 -0
- cli/tools/filter.py +285 -0
- cli/tools/impact.py +163 -0
- cli/tools/memory.py +469 -0
- cli/tools/read.py +224 -0
- cli/tools/search.py +337 -0
- cli/tools/session.py +95 -0
- cli/tools/shell.py +193 -0
- cli/tools/status.py +306 -0
- cli/tools/validate.py +310 -0
- cli/ui/api.js +36 -0
- cli/ui/app.js +207 -0
- cli/ui/components/chat.js +758 -0
- cli/ui/components/dashboard.js +689 -0
- cli/ui/components/edits.js +220 -0
- cli/ui/components/instructions.js +481 -0
- cli/ui/components/memory.js +626 -0
- cli/ui/components/sessions.js +606 -0
- cli/ui/components/settings.js +1404 -0
- cli/ui/components/sidebar.js +156 -0
- cli/ui/icons.js +51 -0
- cli/ui/shared.js +119 -0
- cli/ui/theme.js +22 -0
- cli/ui.html +168 -0
- cli/ui_legacy.html +6797 -0
- cli/ui_nano.html +503 -0
- code_context_control-2.28.0.dist-info/METADATA +248 -0
- code_context_control-2.28.0.dist-info/RECORD +150 -0
- code_context_control-2.28.0.dist-info/WHEEL +5 -0
- code_context_control-2.28.0.dist-info/entry_points.txt +4 -0
- code_context_control-2.28.0.dist-info/licenses/LICENSE +201 -0
- code_context_control-2.28.0.dist-info/top_level.txt +5 -0
- core/__init__.py +75 -0
- core/config.py +269 -0
- core/ide.py +188 -0
- oracle/__init__.py +1 -0
- oracle/config.py +75 -0
- oracle/oracle.html +3900 -0
- oracle/oracle_server.py +663 -0
- oracle/services/__init__.py +1 -0
- oracle/services/c3_bridge.py +210 -0
- oracle/services/chat_engine.py +1103 -0
- oracle/services/chat_store.py +155 -0
- oracle/services/cross_memory.py +154 -0
- oracle/services/federated_graph.py +463 -0
- oracle/services/health_checker.py +117 -0
- oracle/services/insight_engine.py +307 -0
- oracle/services/memory_reader.py +106 -0
- oracle/services/memory_writer.py +182 -0
- oracle/services/ollama_bridge.py +332 -0
- oracle/services/project_scanner.py +87 -0
- oracle/services/review_agent.py +206 -0
- services/__init__.py +1 -0
- services/activity_log.py +93 -0
- services/agent_base.py +124 -0
- services/agents.py +1529 -0
- services/auto_memory.py +407 -0
- services/bench/__init__.py +6 -0
- services/bench/external/__init__.py +29 -0
- services/bench/external/aider_polyglot.py +405 -0
- services/bench/external/swe_bench.py +485 -0
- services/benchmark_dashboard.py +596 -0
- services/claude_md.py +785 -0
- services/compressor.py +592 -0
- services/context_snapshot.py +356 -0
- services/conversation_store.py +870 -0
- services/doc_index.py +537 -0
- services/e2e_benchmark.py +2884 -0
- services/e2e_evaluator.py +396 -0
- services/e2e_tasks.py +743 -0
- services/edit_ledger.py +459 -0
- services/embedding_index.py +341 -0
- services/error_reporting.py +123 -0
- services/file_memory.py +734 -0
- services/hub_service.py +585 -0
- services/indexer.py +712 -0
- services/memory.py +318 -0
- services/memory_consolidator.py +538 -0
- services/memory_graph.py +382 -0
- services/memory_grounder.py +304 -0
- services/memory_scorer.py +246 -0
- services/metrics.py +86 -0
- services/notifications.py +209 -0
- services/ollama_client.py +201 -0
- services/output_filter.py +488 -0
- services/parser.py +1238 -0
- services/project_manager.py +579 -0
- services/protocol.py +306 -0
- services/proxy_state.py +152 -0
- services/retrieval_broker.py +129 -0
- services/router.py +414 -0
- services/runtime.py +326 -0
- services/session_benchmark.py +1945 -0
- services/session_manager.py +1026 -0
- services/session_preloader.py +251 -0
- services/text_index.py +90 -0
- services/tool_classifier.py +176 -0
- services/transcript_index.py +340 -0
- services/validation_cache.py +155 -0
- services/vector_store.py +299 -0
- services/version_tracker.py +271 -0
- services/watcher.py +192 -0
- tui/__init__.py +0 -0
- tui/backend.py +59 -0
- tui/main.py +145 -0
- tui/screens/__init__.py +1 -0
- tui/screens/benchmark_view.py +109 -0
- tui/screens/claudemd_view.py +46 -0
- tui/screens/compress_view.py +52 -0
- tui/screens/index_view.py +74 -0
- tui/screens/init_view.py +82 -0
- tui/screens/mcp_view.py +73 -0
- tui/screens/optimize_view.py +41 -0
- tui/screens/pipe_view.py +46 -0
- tui/screens/projects_view.py +355 -0
- tui/screens/search_view.py +55 -0
- tui/screens/session_view.py +143 -0
- tui/screens/stats.py +158 -0
- tui/screens/ui_view.py +54 -0
- tui/theme.tcss +335 -0
|
@@ -0,0 +1,538 @@
|
|
|
1
|
+
"""Memory Consolidator — 4-phase pipeline for memory maintenance.
|
|
2
|
+
|
|
3
|
+
Inspired by biological memory consolidation ("sleep cycles"):
|
|
4
|
+
Phase 1 (Triage): Score all facts, detect new co-recall edges
|
|
5
|
+
Phase 2 (Merge): Cluster similar facts, merge duplicates
|
|
6
|
+
Phase 3 (Reinforce): Pre-warm graph neighbourhood for working files
|
|
7
|
+
Phase 4 (Prune): Archive low-salience facts, decay stale edges
|
|
8
|
+
|
|
9
|
+
Also provides cross-session relevance:
|
|
10
|
+
- Session fingerprints (files touched + facts recalled + decisions)
|
|
11
|
+
- Session similarity matching for context priming
|
|
12
|
+
- Fact lifespan analysis (foundational vs contextual)
|
|
13
|
+
- Trend detection (hot zones under active development)
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
|
|
18
|
+
import json
|
|
19
|
+
from collections import Counter
|
|
20
|
+
from datetime import datetime, timezone
|
|
21
|
+
from pathlib import Path
|
|
22
|
+
from typing import Any
|
|
23
|
+
|
|
24
|
+
from services.memory_scorer import MemoryScorer
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class SessionFingerprint:
|
|
28
|
+
"""Compact representation of a session for similarity matching."""
|
|
29
|
+
|
|
30
|
+
def __init__(
|
|
31
|
+
self,
|
|
32
|
+
session_id: str,
|
|
33
|
+
files: list[str],
|
|
34
|
+
facts_recalled: list[str],
|
|
35
|
+
decisions: list[str],
|
|
36
|
+
timestamp: str = "",
|
|
37
|
+
):
|
|
38
|
+
self.session_id = session_id
|
|
39
|
+
self.files = set(files)
|
|
40
|
+
self.facts_recalled = set(facts_recalled)
|
|
41
|
+
self.decisions = decisions
|
|
42
|
+
self.timestamp = timestamp or datetime.now(timezone.utc).isoformat()
|
|
43
|
+
|
|
44
|
+
def similarity(self, other: "SessionFingerprint") -> float:
|
|
45
|
+
"""Jaccard similarity across files + facts."""
|
|
46
|
+
file_sim = _jaccard_sets(self.files, other.files)
|
|
47
|
+
fact_sim = _jaccard_sets(self.facts_recalled, other.facts_recalled)
|
|
48
|
+
# Weight files more — they're more stable signals
|
|
49
|
+
return 0.6 * file_sim + 0.4 * fact_sim
|
|
50
|
+
|
|
51
|
+
def to_dict(self) -> dict:
|
|
52
|
+
return {
|
|
53
|
+
"session_id": self.session_id,
|
|
54
|
+
"files": sorted(self.files),
|
|
55
|
+
"facts_recalled": sorted(self.facts_recalled),
|
|
56
|
+
"decisions": self.decisions,
|
|
57
|
+
"timestamp": self.timestamp,
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
@classmethod
|
|
61
|
+
def from_dict(cls, data: dict) -> "SessionFingerprint":
|
|
62
|
+
return cls(
|
|
63
|
+
session_id=data.get("session_id", ""),
|
|
64
|
+
files=data.get("files", []),
|
|
65
|
+
facts_recalled=data.get("facts_recalled", []),
|
|
66
|
+
decisions=data.get("decisions", []),
|
|
67
|
+
timestamp=data.get("timestamp", ""),
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
class MemoryConsolidator:
|
|
72
|
+
"""Orchestrates the 4-phase memory consolidation pipeline."""
|
|
73
|
+
|
|
74
|
+
def __init__(
|
|
75
|
+
self,
|
|
76
|
+
memory_store: Any,
|
|
77
|
+
graph: Any,
|
|
78
|
+
scorer: MemoryScorer | None = None,
|
|
79
|
+
project_path: str = "",
|
|
80
|
+
data_dir: str = ".c3/facts",
|
|
81
|
+
):
|
|
82
|
+
self.memory = memory_store
|
|
83
|
+
self.graph = graph
|
|
84
|
+
self.scorer = scorer or MemoryScorer()
|
|
85
|
+
self.project_path = Path(project_path) if project_path else Path(".")
|
|
86
|
+
self.data_dir = self.project_path / data_dir
|
|
87
|
+
self.data_dir.mkdir(parents=True, exist_ok=True)
|
|
88
|
+
self.fingerprints_file = self.data_dir / "session_fingerprints.json"
|
|
89
|
+
self.fingerprints: list[SessionFingerprint] = self._load_fingerprints()
|
|
90
|
+
|
|
91
|
+
# ── Full pipeline ───────────────────────────────────────────────
|
|
92
|
+
|
|
93
|
+
def run(self, current_session: dict | None = None) -> dict:
|
|
94
|
+
"""Execute all 4 phases. Returns combined stats."""
|
|
95
|
+
stats: dict[str, Any] = {"phases": {}}
|
|
96
|
+
|
|
97
|
+
# Phase 1: Triage
|
|
98
|
+
triage = self.phase_triage(current_session)
|
|
99
|
+
stats["phases"]["triage"] = triage
|
|
100
|
+
|
|
101
|
+
# Phase 2: Merge
|
|
102
|
+
merge = self.phase_merge()
|
|
103
|
+
stats["phases"]["merge"] = merge
|
|
104
|
+
|
|
105
|
+
# Phase 3: Reinforce (only if we have a current session)
|
|
106
|
+
if current_session:
|
|
107
|
+
reinforce = self.phase_reinforce(current_session)
|
|
108
|
+
stats["phases"]["reinforce"] = reinforce
|
|
109
|
+
|
|
110
|
+
# Phase 4: Prune
|
|
111
|
+
prune = self.phase_prune()
|
|
112
|
+
stats["phases"]["prune"] = prune
|
|
113
|
+
|
|
114
|
+
stats["total_facts"] = len([
|
|
115
|
+
f for f in self.memory.facts
|
|
116
|
+
if f.get("lifecycle") == "active"
|
|
117
|
+
])
|
|
118
|
+
return stats
|
|
119
|
+
|
|
120
|
+
# ── Phase 1: Triage ─────────────────────────────────────────────
|
|
121
|
+
|
|
122
|
+
def phase_triage(self, session: dict | None = None) -> dict:
|
|
123
|
+
"""Score all facts and record session fingerprint."""
|
|
124
|
+
facts = [
|
|
125
|
+
f for f in self.memory.facts
|
|
126
|
+
if f.get("lifecycle") == "active"
|
|
127
|
+
]
|
|
128
|
+
|
|
129
|
+
# Score everything
|
|
130
|
+
scores = self.scorer.score_batch(facts, self.graph)
|
|
131
|
+
tier_counts = Counter(s["tier"] for s in scores)
|
|
132
|
+
|
|
133
|
+
# Record co-recall edges from this session's recalled facts
|
|
134
|
+
co_recall_edges = 0
|
|
135
|
+
if session:
|
|
136
|
+
recalled_ids = []
|
|
137
|
+
for f in facts:
|
|
138
|
+
sessions = f.get("recall_sessions", [])
|
|
139
|
+
sid = session.get("id", "")
|
|
140
|
+
if sid and sid in sessions:
|
|
141
|
+
recalled_ids.append(f["id"])
|
|
142
|
+
if len(recalled_ids) >= 2 and self.graph:
|
|
143
|
+
co_recall_edges = self.graph.record_co_recall(recalled_ids)
|
|
144
|
+
|
|
145
|
+
# Save session fingerprint
|
|
146
|
+
files = [
|
|
147
|
+
fc.get("file", "") for fc in session.get("files_touched", [])
|
|
148
|
+
if fc.get("file")
|
|
149
|
+
]
|
|
150
|
+
decisions = [
|
|
151
|
+
d.get("decision", "") for d in session.get("decisions", [])
|
|
152
|
+
if d.get("decision")
|
|
153
|
+
]
|
|
154
|
+
fp = SessionFingerprint(
|
|
155
|
+
session_id=session.get("id", ""),
|
|
156
|
+
files=files,
|
|
157
|
+
facts_recalled=recalled_ids,
|
|
158
|
+
decisions=decisions,
|
|
159
|
+
)
|
|
160
|
+
self.fingerprints.append(fp)
|
|
161
|
+
# Keep last 100 fingerprints
|
|
162
|
+
self.fingerprints = self.fingerprints[-100:]
|
|
163
|
+
self._save_fingerprints()
|
|
164
|
+
|
|
165
|
+
return {
|
|
166
|
+
"scored": len(scores),
|
|
167
|
+
"tiers": dict(tier_counts),
|
|
168
|
+
"co_recall_edges": co_recall_edges,
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
# ── Phase 2: Merge ──────────────────────────────────────────────
|
|
172
|
+
|
|
173
|
+
def phase_merge(self) -> dict:
|
|
174
|
+
"""Merge duplicate facts using graph clusters and text similarity."""
|
|
175
|
+
facts = [
|
|
176
|
+
f for f in self.memory.facts
|
|
177
|
+
if f.get("lifecycle") == "active"
|
|
178
|
+
]
|
|
179
|
+
if len(facts) < 2:
|
|
180
|
+
return {"merged": 0}
|
|
181
|
+
|
|
182
|
+
merged = 0
|
|
183
|
+
to_delete: set[str] = set()
|
|
184
|
+
|
|
185
|
+
# Use graph clusters first — facts in the same cluster are related
|
|
186
|
+
clusters = self.graph.detect_clusters(min_cluster_size=2) if self.graph else []
|
|
187
|
+
|
|
188
|
+
for cluster in clusters:
|
|
189
|
+
cluster_facts = [
|
|
190
|
+
f for f in facts
|
|
191
|
+
if f["id"] in set(cluster) and f["id"] not in to_delete
|
|
192
|
+
]
|
|
193
|
+
if len(cluster_facts) < 2:
|
|
194
|
+
continue
|
|
195
|
+
|
|
196
|
+
# Within each cluster, check for text similarity
|
|
197
|
+
for i, a in enumerate(cluster_facts):
|
|
198
|
+
if a["id"] in to_delete:
|
|
199
|
+
continue
|
|
200
|
+
for b in cluster_facts[i + 1:]:
|
|
201
|
+
if b["id"] in to_delete:
|
|
202
|
+
continue
|
|
203
|
+
sim = _jaccard_text(a["fact"], b["fact"])
|
|
204
|
+
if sim > 0.55:
|
|
205
|
+
keeper, victim = self._pick_keeper(a, b)
|
|
206
|
+
if sim < 0.85:
|
|
207
|
+
keeper["fact"] = _merge_texts(
|
|
208
|
+
keeper["fact"], victim["fact"]
|
|
209
|
+
)
|
|
210
|
+
try:
|
|
211
|
+
self.memory.update_fact(
|
|
212
|
+
keeper["id"], keeper["fact"],
|
|
213
|
+
keeper.get("category", "general"),
|
|
214
|
+
)
|
|
215
|
+
except Exception:
|
|
216
|
+
pass
|
|
217
|
+
# Transfer graph edges from victim to keeper
|
|
218
|
+
if self.graph:
|
|
219
|
+
self.graph.record_refinement(victim["id"], keeper["id"])
|
|
220
|
+
to_delete.add(victim["id"])
|
|
221
|
+
merged += 1
|
|
222
|
+
|
|
223
|
+
# Also do a global pass for non-clustered duplicates
|
|
224
|
+
unclustered = [
|
|
225
|
+
f for f in facts
|
|
226
|
+
if f["id"] not in to_delete
|
|
227
|
+
and not any(f["id"] in c for c in clusters)
|
|
228
|
+
]
|
|
229
|
+
for i, a in enumerate(unclustered):
|
|
230
|
+
if a["id"] in to_delete:
|
|
231
|
+
continue
|
|
232
|
+
for b in unclustered[i + 1:]:
|
|
233
|
+
if b["id"] in to_delete:
|
|
234
|
+
continue
|
|
235
|
+
sim = _jaccard_text(a["fact"], b["fact"])
|
|
236
|
+
if sim > 0.55:
|
|
237
|
+
keeper, victim = self._pick_keeper(a, b)
|
|
238
|
+
if sim < 0.85:
|
|
239
|
+
keeper["fact"] = _merge_texts(
|
|
240
|
+
keeper["fact"], victim["fact"]
|
|
241
|
+
)
|
|
242
|
+
try:
|
|
243
|
+
self.memory.update_fact(
|
|
244
|
+
keeper["id"], keeper["fact"],
|
|
245
|
+
keeper.get("category", "general"),
|
|
246
|
+
)
|
|
247
|
+
except Exception:
|
|
248
|
+
pass
|
|
249
|
+
if self.graph:
|
|
250
|
+
self.graph.record_refinement(victim["id"], keeper["id"])
|
|
251
|
+
to_delete.add(victim["id"])
|
|
252
|
+
merged += 1
|
|
253
|
+
if merged >= 50: # safety cap per run
|
|
254
|
+
break
|
|
255
|
+
|
|
256
|
+
for fid in to_delete:
|
|
257
|
+
try:
|
|
258
|
+
self.memory.delete_fact(fid)
|
|
259
|
+
except Exception:
|
|
260
|
+
pass
|
|
261
|
+
if self.graph:
|
|
262
|
+
self.graph.remove_node(fid)
|
|
263
|
+
|
|
264
|
+
return {"merged": merged, "deleted": len(to_delete)}
|
|
265
|
+
|
|
266
|
+
# ── Phase 3: Reinforce ──────────────────────────────────────────
|
|
267
|
+
|
|
268
|
+
def phase_reinforce(self, session: dict) -> dict:
|
|
269
|
+
"""Pre-warm memory for the current working context.
|
|
270
|
+
|
|
271
|
+
Uses session fingerprint similarity and graph spreading activation
|
|
272
|
+
to identify facts likely to be relevant.
|
|
273
|
+
"""
|
|
274
|
+
# Find similar past sessions
|
|
275
|
+
current_files = [
|
|
276
|
+
fc.get("file", "") for fc in session.get("files_touched", [])
|
|
277
|
+
if fc.get("file")
|
|
278
|
+
]
|
|
279
|
+
current_fp = SessionFingerprint(
|
|
280
|
+
session_id=session.get("id", ""),
|
|
281
|
+
files=current_files,
|
|
282
|
+
facts_recalled=[],
|
|
283
|
+
decisions=[],
|
|
284
|
+
)
|
|
285
|
+
|
|
286
|
+
similar_sessions = self.find_similar_sessions(current_fp, top_k=3)
|
|
287
|
+
|
|
288
|
+
# Collect fact IDs from similar sessions
|
|
289
|
+
primed_fact_ids: set[str] = set()
|
|
290
|
+
for sfp, sim in similar_sessions:
|
|
291
|
+
primed_fact_ids.update(sfp.facts_recalled)
|
|
292
|
+
|
|
293
|
+
# Also use graph spreading activation from file-touching facts
|
|
294
|
+
if self.graph:
|
|
295
|
+
file_facts: list[str] = []
|
|
296
|
+
for f in current_files[:10]:
|
|
297
|
+
file_facts.extend(self.graph.get_facts_touching(f))
|
|
298
|
+
|
|
299
|
+
if file_facts:
|
|
300
|
+
activated = self.graph.spreading_activation(
|
|
301
|
+
seed_ids=file_facts, max_depth=2, max_results=20
|
|
302
|
+
)
|
|
303
|
+
for a in activated:
|
|
304
|
+
primed_fact_ids.add(a["id"])
|
|
305
|
+
|
|
306
|
+
return {
|
|
307
|
+
"similar_sessions": len(similar_sessions),
|
|
308
|
+
"primed_facts": len(primed_fact_ids),
|
|
309
|
+
"primed_ids": sorted(primed_fact_ids)[:20],
|
|
310
|
+
}
|
|
311
|
+
|
|
312
|
+
# ── Phase 4: Prune ──────────────────────────────────────────────
|
|
313
|
+
|
|
314
|
+
def phase_prune(self) -> dict:
|
|
315
|
+
"""Archive low-salience facts and decay stale graph edges."""
|
|
316
|
+
facts = [
|
|
317
|
+
f for f in self.memory.facts
|
|
318
|
+
if f.get("lifecycle") == "active"
|
|
319
|
+
]
|
|
320
|
+
|
|
321
|
+
archived = 0
|
|
322
|
+
to_delete: set[str] = set()
|
|
323
|
+
|
|
324
|
+
for f in facts:
|
|
325
|
+
score = self.scorer.score(f, self.graph)
|
|
326
|
+
tier = score["tier"]
|
|
327
|
+
|
|
328
|
+
if tier == "ephemeral":
|
|
329
|
+
# Auto-archive ephemeral facts older than 3 days
|
|
330
|
+
age = self._fact_age_days(f)
|
|
331
|
+
if age >= 3:
|
|
332
|
+
to_delete.add(f["id"])
|
|
333
|
+
archived += 1
|
|
334
|
+
|
|
335
|
+
elif tier == "dormant":
|
|
336
|
+
# Archive dormant facts older than 14 days
|
|
337
|
+
age = self._fact_age_days(f)
|
|
338
|
+
if age >= 14:
|
|
339
|
+
to_delete.add(f["id"])
|
|
340
|
+
archived += 1
|
|
341
|
+
|
|
342
|
+
# Rolling window: keep only last 5 auto:session entries
|
|
343
|
+
session_facts = sorted(
|
|
344
|
+
[f for f in facts
|
|
345
|
+
if f.get("category") == "auto:session"
|
|
346
|
+
and f["id"] not in to_delete],
|
|
347
|
+
key=lambda f: f.get("timestamp", ""),
|
|
348
|
+
reverse=True,
|
|
349
|
+
)
|
|
350
|
+
for f in session_facts[5:]:
|
|
351
|
+
to_delete.add(f["id"])
|
|
352
|
+
archived += 1
|
|
353
|
+
|
|
354
|
+
for fid in to_delete:
|
|
355
|
+
try:
|
|
356
|
+
self.memory.delete_fact(fid)
|
|
357
|
+
except Exception:
|
|
358
|
+
pass
|
|
359
|
+
if self.graph:
|
|
360
|
+
self.graph.remove_node(fid)
|
|
361
|
+
|
|
362
|
+
# Decay graph edges
|
|
363
|
+
edges_decayed = 0
|
|
364
|
+
if self.graph:
|
|
365
|
+
edges_decayed = self.graph.decay_edges(half_life_days=30.0)
|
|
366
|
+
|
|
367
|
+
return {
|
|
368
|
+
"archived": archived,
|
|
369
|
+
"edges_decayed": edges_decayed,
|
|
370
|
+
"remaining": len(facts) - archived,
|
|
371
|
+
}
|
|
372
|
+
|
|
373
|
+
# ── Cross-session analysis ──────────────────────────────────────
|
|
374
|
+
|
|
375
|
+
def find_similar_sessions(
|
|
376
|
+
self,
|
|
377
|
+
target: SessionFingerprint,
|
|
378
|
+
top_k: int = 5,
|
|
379
|
+
) -> list[tuple[SessionFingerprint, float]]:
|
|
380
|
+
"""Find past sessions most similar to the target."""
|
|
381
|
+
results: list[tuple[SessionFingerprint, float]] = []
|
|
382
|
+
for fp in self.fingerprints:
|
|
383
|
+
if fp.session_id == target.session_id:
|
|
384
|
+
continue
|
|
385
|
+
sim = target.similarity(fp)
|
|
386
|
+
if sim > 0.1:
|
|
387
|
+
results.append((fp, round(sim, 4)))
|
|
388
|
+
results.sort(key=lambda x: x[1], reverse=True)
|
|
389
|
+
return results[:top_k]
|
|
390
|
+
|
|
391
|
+
def fact_lifespan_analysis(self) -> dict:
|
|
392
|
+
"""Classify facts as foundational vs contextual.
|
|
393
|
+
|
|
394
|
+
Foundational: recalled across many different sessions
|
|
395
|
+
Contextual: recalled only within a narrow set of sessions
|
|
396
|
+
"""
|
|
397
|
+
facts = [
|
|
398
|
+
f for f in self.memory.facts
|
|
399
|
+
if f.get("lifecycle") == "active"
|
|
400
|
+
]
|
|
401
|
+
|
|
402
|
+
foundational: list[dict] = []
|
|
403
|
+
contextual: list[dict] = []
|
|
404
|
+
|
|
405
|
+
for f in facts:
|
|
406
|
+
sessions = set(f.get("recall_sessions", []))
|
|
407
|
+
session_count = len(sessions)
|
|
408
|
+
score = self.scorer.score(f, self.graph)
|
|
409
|
+
|
|
410
|
+
entry = {
|
|
411
|
+
"id": f["id"],
|
|
412
|
+
"fact": f["fact"][:80],
|
|
413
|
+
"session_spread": session_count,
|
|
414
|
+
"salience": score["salience"],
|
|
415
|
+
"tier": score["tier"],
|
|
416
|
+
}
|
|
417
|
+
|
|
418
|
+
if session_count >= 3:
|
|
419
|
+
foundational.append(entry)
|
|
420
|
+
elif session_count <= 1 and self._fact_age_days(f) > 7:
|
|
421
|
+
contextual.append(entry)
|
|
422
|
+
|
|
423
|
+
foundational.sort(key=lambda x: x["session_spread"], reverse=True)
|
|
424
|
+
contextual.sort(key=lambda x: x["salience"])
|
|
425
|
+
|
|
426
|
+
return {
|
|
427
|
+
"foundational": foundational[:20],
|
|
428
|
+
"contextual": contextual[:20],
|
|
429
|
+
"total_facts": len(facts),
|
|
430
|
+
}
|
|
431
|
+
|
|
432
|
+
def detect_trends(self) -> dict:
|
|
433
|
+
"""Detect hot zones — files/areas under active development.
|
|
434
|
+
|
|
435
|
+
Looks at recent session fingerprints to find frequently-touched files
|
|
436
|
+
and frequently-recalled facts.
|
|
437
|
+
"""
|
|
438
|
+
recent = self.fingerprints[-20:] # last 20 sessions
|
|
439
|
+
if not recent:
|
|
440
|
+
return {"hot_files": [], "hot_facts": [], "sessions_analyzed": 0}
|
|
441
|
+
|
|
442
|
+
file_counts: Counter = Counter()
|
|
443
|
+
fact_counts: Counter = Counter()
|
|
444
|
+
|
|
445
|
+
for fp in recent:
|
|
446
|
+
for f in fp.files:
|
|
447
|
+
file_counts[f] += 1
|
|
448
|
+
for fid in fp.facts_recalled:
|
|
449
|
+
fact_counts[fid] += 1
|
|
450
|
+
|
|
451
|
+
hot_files = [
|
|
452
|
+
{"file": f, "sessions": c}
|
|
453
|
+
for f, c in file_counts.most_common(10)
|
|
454
|
+
if c >= 2
|
|
455
|
+
]
|
|
456
|
+
hot_facts = [
|
|
457
|
+
{"fact_id": fid, "sessions": c}
|
|
458
|
+
for fid, c in fact_counts.most_common(10)
|
|
459
|
+
if c >= 2
|
|
460
|
+
]
|
|
461
|
+
|
|
462
|
+
# Enrich hot_facts with fact text
|
|
463
|
+
facts_by_id = {f["id"]: f for f in self.memory.facts}
|
|
464
|
+
for hf in hot_facts:
|
|
465
|
+
fact = facts_by_id.get(hf["fact_id"])
|
|
466
|
+
if fact:
|
|
467
|
+
hf["fact"] = fact["fact"][:80]
|
|
468
|
+
|
|
469
|
+
return {
|
|
470
|
+
"hot_files": hot_files,
|
|
471
|
+
"hot_facts": hot_facts,
|
|
472
|
+
"sessions_analyzed": len(recent),
|
|
473
|
+
}
|
|
474
|
+
|
|
475
|
+
# ── Helpers ─────────────────────────────────────────────────────
|
|
476
|
+
|
|
477
|
+
def _pick_keeper(self, a: dict, b: dict) -> tuple[dict, dict]:
|
|
478
|
+
"""Pick which fact to keep based on salience."""
|
|
479
|
+
sa = self.scorer.score(a, self.graph)["salience"]
|
|
480
|
+
sb = self.scorer.score(b, self.graph)["salience"]
|
|
481
|
+
return (a, b) if sa >= sb else (b, a)
|
|
482
|
+
|
|
483
|
+
@staticmethod
|
|
484
|
+
def _fact_age_days(fact: dict) -> float:
|
|
485
|
+
ref = fact.get("last_accessed_at") or fact.get("timestamp")
|
|
486
|
+
if not ref:
|
|
487
|
+
return 0.0
|
|
488
|
+
try:
|
|
489
|
+
dt = datetime.fromisoformat(ref)
|
|
490
|
+
if dt.tzinfo is None:
|
|
491
|
+
dt = dt.replace(tzinfo=timezone.utc)
|
|
492
|
+
return (datetime.now(timezone.utc) - dt).total_seconds() / 86400
|
|
493
|
+
except (ValueError, TypeError):
|
|
494
|
+
return 0.0
|
|
495
|
+
|
|
496
|
+
def _load_fingerprints(self) -> list[SessionFingerprint]:
|
|
497
|
+
if not self.fingerprints_file.exists():
|
|
498
|
+
return []
|
|
499
|
+
try:
|
|
500
|
+
with open(self.fingerprints_file, encoding="utf-8") as f:
|
|
501
|
+
data = json.load(f)
|
|
502
|
+
return [SessionFingerprint.from_dict(d) for d in data]
|
|
503
|
+
except Exception:
|
|
504
|
+
return []
|
|
505
|
+
|
|
506
|
+
def _save_fingerprints(self):
|
|
507
|
+
data = [fp.to_dict() for fp in self.fingerprints]
|
|
508
|
+
with open(self.fingerprints_file, "w", encoding="utf-8") as f:
|
|
509
|
+
json.dump(data, f, indent=2)
|
|
510
|
+
|
|
511
|
+
|
|
512
|
+
# ── Module-level helpers ────────────────────────────────────────────
|
|
513
|
+
|
|
514
|
+
def _jaccard_sets(a: set, b: set) -> float:
|
|
515
|
+
if not a and not b:
|
|
516
|
+
return 0.0
|
|
517
|
+
intersection = len(a & b)
|
|
518
|
+
union = len(a | b)
|
|
519
|
+
return intersection / union if union else 0.0
|
|
520
|
+
|
|
521
|
+
|
|
522
|
+
def _jaccard_text(a: str, b: str) -> float:
|
|
523
|
+
ta = set(a.lower().split())
|
|
524
|
+
tb = set(b.lower().split())
|
|
525
|
+
return _jaccard_sets(ta, tb)
|
|
526
|
+
|
|
527
|
+
|
|
528
|
+
def _merge_texts(existing: str, new: str) -> str:
|
|
529
|
+
"""Merge two fact texts, preferring the more complete one."""
|
|
530
|
+
if len(new) > len(existing) * 1.3:
|
|
531
|
+
return new
|
|
532
|
+
if len(existing) > len(new) * 1.3:
|
|
533
|
+
return existing
|
|
534
|
+
# Similar length — combine unique sentences
|
|
535
|
+
existing_sentences = set(s.strip() for s in existing.split(".") if s.strip())
|
|
536
|
+
new_sentences = set(s.strip() for s in new.split(".") if s.strip())
|
|
537
|
+
combined = existing_sentences | new_sentences
|
|
538
|
+
return ". ".join(sorted(combined)) + "." if combined else existing
|