code-context-control 2.28.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (150) hide show
  1. cli/__init__.py +1 -0
  2. cli/_hook_utils.py +99 -0
  3. cli/c3.py +6152 -0
  4. cli/commands/__init__.py +1 -0
  5. cli/commands/common.py +312 -0
  6. cli/commands/parser.py +286 -0
  7. cli/docs.html +3178 -0
  8. cli/edits.html +878 -0
  9. cli/hook_auto_snapshot.py +142 -0
  10. cli/hook_c3_signal.py +61 -0
  11. cli/hook_c3read.py +116 -0
  12. cli/hook_edit_ledger.py +213 -0
  13. cli/hook_edit_unlock.py +170 -0
  14. cli/hook_filter.py +130 -0
  15. cli/hook_ghost_files.py +238 -0
  16. cli/hook_pretool_enforce.py +334 -0
  17. cli/hook_read.py +200 -0
  18. cli/hook_session_stats.py +62 -0
  19. cli/hook_terse_advisor.py +190 -0
  20. cli/hub.html +3764 -0
  21. cli/hub_server.py +1619 -0
  22. cli/mcp_proxy.py +428 -0
  23. cli/mcp_server.py +660 -0
  24. cli/server.py +2985 -0
  25. cli/tools/__init__.py +4 -0
  26. cli/tools/_helpers.py +65 -0
  27. cli/tools/agent.py +1165 -0
  28. cli/tools/compress.py +215 -0
  29. cli/tools/delegate.py +1184 -0
  30. cli/tools/edit.py +313 -0
  31. cli/tools/edits.py +118 -0
  32. cli/tools/filter.py +285 -0
  33. cli/tools/impact.py +163 -0
  34. cli/tools/memory.py +469 -0
  35. cli/tools/read.py +224 -0
  36. cli/tools/search.py +337 -0
  37. cli/tools/session.py +95 -0
  38. cli/tools/shell.py +193 -0
  39. cli/tools/status.py +306 -0
  40. cli/tools/validate.py +310 -0
  41. cli/ui/api.js +36 -0
  42. cli/ui/app.js +207 -0
  43. cli/ui/components/chat.js +758 -0
  44. cli/ui/components/dashboard.js +689 -0
  45. cli/ui/components/edits.js +220 -0
  46. cli/ui/components/instructions.js +481 -0
  47. cli/ui/components/memory.js +626 -0
  48. cli/ui/components/sessions.js +606 -0
  49. cli/ui/components/settings.js +1404 -0
  50. cli/ui/components/sidebar.js +156 -0
  51. cli/ui/icons.js +51 -0
  52. cli/ui/shared.js +119 -0
  53. cli/ui/theme.js +22 -0
  54. cli/ui.html +168 -0
  55. cli/ui_legacy.html +6797 -0
  56. cli/ui_nano.html +503 -0
  57. code_context_control-2.28.0.dist-info/METADATA +248 -0
  58. code_context_control-2.28.0.dist-info/RECORD +150 -0
  59. code_context_control-2.28.0.dist-info/WHEEL +5 -0
  60. code_context_control-2.28.0.dist-info/entry_points.txt +4 -0
  61. code_context_control-2.28.0.dist-info/licenses/LICENSE +201 -0
  62. code_context_control-2.28.0.dist-info/top_level.txt +5 -0
  63. core/__init__.py +75 -0
  64. core/config.py +269 -0
  65. core/ide.py +188 -0
  66. oracle/__init__.py +1 -0
  67. oracle/config.py +75 -0
  68. oracle/oracle.html +3900 -0
  69. oracle/oracle_server.py +663 -0
  70. oracle/services/__init__.py +1 -0
  71. oracle/services/c3_bridge.py +210 -0
  72. oracle/services/chat_engine.py +1103 -0
  73. oracle/services/chat_store.py +155 -0
  74. oracle/services/cross_memory.py +154 -0
  75. oracle/services/federated_graph.py +463 -0
  76. oracle/services/health_checker.py +117 -0
  77. oracle/services/insight_engine.py +307 -0
  78. oracle/services/memory_reader.py +106 -0
  79. oracle/services/memory_writer.py +182 -0
  80. oracle/services/ollama_bridge.py +332 -0
  81. oracle/services/project_scanner.py +87 -0
  82. oracle/services/review_agent.py +206 -0
  83. services/__init__.py +1 -0
  84. services/activity_log.py +93 -0
  85. services/agent_base.py +124 -0
  86. services/agents.py +1529 -0
  87. services/auto_memory.py +407 -0
  88. services/bench/__init__.py +6 -0
  89. services/bench/external/__init__.py +29 -0
  90. services/bench/external/aider_polyglot.py +405 -0
  91. services/bench/external/swe_bench.py +485 -0
  92. services/benchmark_dashboard.py +596 -0
  93. services/claude_md.py +785 -0
  94. services/compressor.py +592 -0
  95. services/context_snapshot.py +356 -0
  96. services/conversation_store.py +870 -0
  97. services/doc_index.py +537 -0
  98. services/e2e_benchmark.py +2884 -0
  99. services/e2e_evaluator.py +396 -0
  100. services/e2e_tasks.py +743 -0
  101. services/edit_ledger.py +459 -0
  102. services/embedding_index.py +341 -0
  103. services/error_reporting.py +123 -0
  104. services/file_memory.py +734 -0
  105. services/hub_service.py +585 -0
  106. services/indexer.py +712 -0
  107. services/memory.py +318 -0
  108. services/memory_consolidator.py +538 -0
  109. services/memory_graph.py +382 -0
  110. services/memory_grounder.py +304 -0
  111. services/memory_scorer.py +246 -0
  112. services/metrics.py +86 -0
  113. services/notifications.py +209 -0
  114. services/ollama_client.py +201 -0
  115. services/output_filter.py +488 -0
  116. services/parser.py +1238 -0
  117. services/project_manager.py +579 -0
  118. services/protocol.py +306 -0
  119. services/proxy_state.py +152 -0
  120. services/retrieval_broker.py +129 -0
  121. services/router.py +414 -0
  122. services/runtime.py +326 -0
  123. services/session_benchmark.py +1945 -0
  124. services/session_manager.py +1026 -0
  125. services/session_preloader.py +251 -0
  126. services/text_index.py +90 -0
  127. services/tool_classifier.py +176 -0
  128. services/transcript_index.py +340 -0
  129. services/validation_cache.py +155 -0
  130. services/vector_store.py +299 -0
  131. services/version_tracker.py +271 -0
  132. services/watcher.py +192 -0
  133. tui/__init__.py +0 -0
  134. tui/backend.py +59 -0
  135. tui/main.py +145 -0
  136. tui/screens/__init__.py +1 -0
  137. tui/screens/benchmark_view.py +109 -0
  138. tui/screens/claudemd_view.py +46 -0
  139. tui/screens/compress_view.py +52 -0
  140. tui/screens/index_view.py +74 -0
  141. tui/screens/init_view.py +82 -0
  142. tui/screens/mcp_view.py +73 -0
  143. tui/screens/optimize_view.py +41 -0
  144. tui/screens/pipe_view.py +46 -0
  145. tui/screens/projects_view.py +355 -0
  146. tui/screens/search_view.py +55 -0
  147. tui/screens/session_view.py +143 -0
  148. tui/screens/stats.py +158 -0
  149. tui/screens/ui_view.py +54 -0
  150. tui/theme.tcss +335 -0
@@ -0,0 +1,538 @@
1
+ """Memory Consolidator — 4-phase pipeline for memory maintenance.
2
+
3
+ Inspired by biological memory consolidation ("sleep cycles"):
4
+ Phase 1 (Triage): Score all facts, detect new co-recall edges
5
+ Phase 2 (Merge): Cluster similar facts, merge duplicates
6
+ Phase 3 (Reinforce): Pre-warm graph neighbourhood for working files
7
+ Phase 4 (Prune): Archive low-salience facts, decay stale edges
8
+
9
+ Also provides cross-session relevance:
10
+ - Session fingerprints (files touched + facts recalled + decisions)
11
+ - Session similarity matching for context priming
12
+ - Fact lifespan analysis (foundational vs contextual)
13
+ - Trend detection (hot zones under active development)
14
+ """
15
+
16
+ from __future__ import annotations
17
+
18
+ import json
19
+ from collections import Counter
20
+ from datetime import datetime, timezone
21
+ from pathlib import Path
22
+ from typing import Any
23
+
24
+ from services.memory_scorer import MemoryScorer
25
+
26
+
27
+ class SessionFingerprint:
28
+ """Compact representation of a session for similarity matching."""
29
+
30
+ def __init__(
31
+ self,
32
+ session_id: str,
33
+ files: list[str],
34
+ facts_recalled: list[str],
35
+ decisions: list[str],
36
+ timestamp: str = "",
37
+ ):
38
+ self.session_id = session_id
39
+ self.files = set(files)
40
+ self.facts_recalled = set(facts_recalled)
41
+ self.decisions = decisions
42
+ self.timestamp = timestamp or datetime.now(timezone.utc).isoformat()
43
+
44
+ def similarity(self, other: "SessionFingerprint") -> float:
45
+ """Jaccard similarity across files + facts."""
46
+ file_sim = _jaccard_sets(self.files, other.files)
47
+ fact_sim = _jaccard_sets(self.facts_recalled, other.facts_recalled)
48
+ # Weight files more — they're more stable signals
49
+ return 0.6 * file_sim + 0.4 * fact_sim
50
+
51
+ def to_dict(self) -> dict:
52
+ return {
53
+ "session_id": self.session_id,
54
+ "files": sorted(self.files),
55
+ "facts_recalled": sorted(self.facts_recalled),
56
+ "decisions": self.decisions,
57
+ "timestamp": self.timestamp,
58
+ }
59
+
60
+ @classmethod
61
+ def from_dict(cls, data: dict) -> "SessionFingerprint":
62
+ return cls(
63
+ session_id=data.get("session_id", ""),
64
+ files=data.get("files", []),
65
+ facts_recalled=data.get("facts_recalled", []),
66
+ decisions=data.get("decisions", []),
67
+ timestamp=data.get("timestamp", ""),
68
+ )
69
+
70
+
71
+ class MemoryConsolidator:
72
+ """Orchestrates the 4-phase memory consolidation pipeline."""
73
+
74
+ def __init__(
75
+ self,
76
+ memory_store: Any,
77
+ graph: Any,
78
+ scorer: MemoryScorer | None = None,
79
+ project_path: str = "",
80
+ data_dir: str = ".c3/facts",
81
+ ):
82
+ self.memory = memory_store
83
+ self.graph = graph
84
+ self.scorer = scorer or MemoryScorer()
85
+ self.project_path = Path(project_path) if project_path else Path(".")
86
+ self.data_dir = self.project_path / data_dir
87
+ self.data_dir.mkdir(parents=True, exist_ok=True)
88
+ self.fingerprints_file = self.data_dir / "session_fingerprints.json"
89
+ self.fingerprints: list[SessionFingerprint] = self._load_fingerprints()
90
+
91
+ # ── Full pipeline ───────────────────────────────────────────────
92
+
93
+ def run(self, current_session: dict | None = None) -> dict:
94
+ """Execute all 4 phases. Returns combined stats."""
95
+ stats: dict[str, Any] = {"phases": {}}
96
+
97
+ # Phase 1: Triage
98
+ triage = self.phase_triage(current_session)
99
+ stats["phases"]["triage"] = triage
100
+
101
+ # Phase 2: Merge
102
+ merge = self.phase_merge()
103
+ stats["phases"]["merge"] = merge
104
+
105
+ # Phase 3: Reinforce (only if we have a current session)
106
+ if current_session:
107
+ reinforce = self.phase_reinforce(current_session)
108
+ stats["phases"]["reinforce"] = reinforce
109
+
110
+ # Phase 4: Prune
111
+ prune = self.phase_prune()
112
+ stats["phases"]["prune"] = prune
113
+
114
+ stats["total_facts"] = len([
115
+ f for f in self.memory.facts
116
+ if f.get("lifecycle") == "active"
117
+ ])
118
+ return stats
119
+
120
+ # ── Phase 1: Triage ─────────────────────────────────────────────
121
+
122
+ def phase_triage(self, session: dict | None = None) -> dict:
123
+ """Score all facts and record session fingerprint."""
124
+ facts = [
125
+ f for f in self.memory.facts
126
+ if f.get("lifecycle") == "active"
127
+ ]
128
+
129
+ # Score everything
130
+ scores = self.scorer.score_batch(facts, self.graph)
131
+ tier_counts = Counter(s["tier"] for s in scores)
132
+
133
+ # Record co-recall edges from this session's recalled facts
134
+ co_recall_edges = 0
135
+ if session:
136
+ recalled_ids = []
137
+ for f in facts:
138
+ sessions = f.get("recall_sessions", [])
139
+ sid = session.get("id", "")
140
+ if sid and sid in sessions:
141
+ recalled_ids.append(f["id"])
142
+ if len(recalled_ids) >= 2 and self.graph:
143
+ co_recall_edges = self.graph.record_co_recall(recalled_ids)
144
+
145
+ # Save session fingerprint
146
+ files = [
147
+ fc.get("file", "") for fc in session.get("files_touched", [])
148
+ if fc.get("file")
149
+ ]
150
+ decisions = [
151
+ d.get("decision", "") for d in session.get("decisions", [])
152
+ if d.get("decision")
153
+ ]
154
+ fp = SessionFingerprint(
155
+ session_id=session.get("id", ""),
156
+ files=files,
157
+ facts_recalled=recalled_ids,
158
+ decisions=decisions,
159
+ )
160
+ self.fingerprints.append(fp)
161
+ # Keep last 100 fingerprints
162
+ self.fingerprints = self.fingerprints[-100:]
163
+ self._save_fingerprints()
164
+
165
+ return {
166
+ "scored": len(scores),
167
+ "tiers": dict(tier_counts),
168
+ "co_recall_edges": co_recall_edges,
169
+ }
170
+
171
+ # ── Phase 2: Merge ──────────────────────────────────────────────
172
+
173
+ def phase_merge(self) -> dict:
174
+ """Merge duplicate facts using graph clusters and text similarity."""
175
+ facts = [
176
+ f for f in self.memory.facts
177
+ if f.get("lifecycle") == "active"
178
+ ]
179
+ if len(facts) < 2:
180
+ return {"merged": 0}
181
+
182
+ merged = 0
183
+ to_delete: set[str] = set()
184
+
185
+ # Use graph clusters first — facts in the same cluster are related
186
+ clusters = self.graph.detect_clusters(min_cluster_size=2) if self.graph else []
187
+
188
+ for cluster in clusters:
189
+ cluster_facts = [
190
+ f for f in facts
191
+ if f["id"] in set(cluster) and f["id"] not in to_delete
192
+ ]
193
+ if len(cluster_facts) < 2:
194
+ continue
195
+
196
+ # Within each cluster, check for text similarity
197
+ for i, a in enumerate(cluster_facts):
198
+ if a["id"] in to_delete:
199
+ continue
200
+ for b in cluster_facts[i + 1:]:
201
+ if b["id"] in to_delete:
202
+ continue
203
+ sim = _jaccard_text(a["fact"], b["fact"])
204
+ if sim > 0.55:
205
+ keeper, victim = self._pick_keeper(a, b)
206
+ if sim < 0.85:
207
+ keeper["fact"] = _merge_texts(
208
+ keeper["fact"], victim["fact"]
209
+ )
210
+ try:
211
+ self.memory.update_fact(
212
+ keeper["id"], keeper["fact"],
213
+ keeper.get("category", "general"),
214
+ )
215
+ except Exception:
216
+ pass
217
+ # Transfer graph edges from victim to keeper
218
+ if self.graph:
219
+ self.graph.record_refinement(victim["id"], keeper["id"])
220
+ to_delete.add(victim["id"])
221
+ merged += 1
222
+
223
+ # Also do a global pass for non-clustered duplicates
224
+ unclustered = [
225
+ f for f in facts
226
+ if f["id"] not in to_delete
227
+ and not any(f["id"] in c for c in clusters)
228
+ ]
229
+ for i, a in enumerate(unclustered):
230
+ if a["id"] in to_delete:
231
+ continue
232
+ for b in unclustered[i + 1:]:
233
+ if b["id"] in to_delete:
234
+ continue
235
+ sim = _jaccard_text(a["fact"], b["fact"])
236
+ if sim > 0.55:
237
+ keeper, victim = self._pick_keeper(a, b)
238
+ if sim < 0.85:
239
+ keeper["fact"] = _merge_texts(
240
+ keeper["fact"], victim["fact"]
241
+ )
242
+ try:
243
+ self.memory.update_fact(
244
+ keeper["id"], keeper["fact"],
245
+ keeper.get("category", "general"),
246
+ )
247
+ except Exception:
248
+ pass
249
+ if self.graph:
250
+ self.graph.record_refinement(victim["id"], keeper["id"])
251
+ to_delete.add(victim["id"])
252
+ merged += 1
253
+ if merged >= 50: # safety cap per run
254
+ break
255
+
256
+ for fid in to_delete:
257
+ try:
258
+ self.memory.delete_fact(fid)
259
+ except Exception:
260
+ pass
261
+ if self.graph:
262
+ self.graph.remove_node(fid)
263
+
264
+ return {"merged": merged, "deleted": len(to_delete)}
265
+
266
+ # ── Phase 3: Reinforce ──────────────────────────────────────────
267
+
268
+ def phase_reinforce(self, session: dict) -> dict:
269
+ """Pre-warm memory for the current working context.
270
+
271
+ Uses session fingerprint similarity and graph spreading activation
272
+ to identify facts likely to be relevant.
273
+ """
274
+ # Find similar past sessions
275
+ current_files = [
276
+ fc.get("file", "") for fc in session.get("files_touched", [])
277
+ if fc.get("file")
278
+ ]
279
+ current_fp = SessionFingerprint(
280
+ session_id=session.get("id", ""),
281
+ files=current_files,
282
+ facts_recalled=[],
283
+ decisions=[],
284
+ )
285
+
286
+ similar_sessions = self.find_similar_sessions(current_fp, top_k=3)
287
+
288
+ # Collect fact IDs from similar sessions
289
+ primed_fact_ids: set[str] = set()
290
+ for sfp, sim in similar_sessions:
291
+ primed_fact_ids.update(sfp.facts_recalled)
292
+
293
+ # Also use graph spreading activation from file-touching facts
294
+ if self.graph:
295
+ file_facts: list[str] = []
296
+ for f in current_files[:10]:
297
+ file_facts.extend(self.graph.get_facts_touching(f))
298
+
299
+ if file_facts:
300
+ activated = self.graph.spreading_activation(
301
+ seed_ids=file_facts, max_depth=2, max_results=20
302
+ )
303
+ for a in activated:
304
+ primed_fact_ids.add(a["id"])
305
+
306
+ return {
307
+ "similar_sessions": len(similar_sessions),
308
+ "primed_facts": len(primed_fact_ids),
309
+ "primed_ids": sorted(primed_fact_ids)[:20],
310
+ }
311
+
312
+ # ── Phase 4: Prune ──────────────────────────────────────────────
313
+
314
+ def phase_prune(self) -> dict:
315
+ """Archive low-salience facts and decay stale graph edges."""
316
+ facts = [
317
+ f for f in self.memory.facts
318
+ if f.get("lifecycle") == "active"
319
+ ]
320
+
321
+ archived = 0
322
+ to_delete: set[str] = set()
323
+
324
+ for f in facts:
325
+ score = self.scorer.score(f, self.graph)
326
+ tier = score["tier"]
327
+
328
+ if tier == "ephemeral":
329
+ # Auto-archive ephemeral facts older than 3 days
330
+ age = self._fact_age_days(f)
331
+ if age >= 3:
332
+ to_delete.add(f["id"])
333
+ archived += 1
334
+
335
+ elif tier == "dormant":
336
+ # Archive dormant facts older than 14 days
337
+ age = self._fact_age_days(f)
338
+ if age >= 14:
339
+ to_delete.add(f["id"])
340
+ archived += 1
341
+
342
+ # Rolling window: keep only last 5 auto:session entries
343
+ session_facts = sorted(
344
+ [f for f in facts
345
+ if f.get("category") == "auto:session"
346
+ and f["id"] not in to_delete],
347
+ key=lambda f: f.get("timestamp", ""),
348
+ reverse=True,
349
+ )
350
+ for f in session_facts[5:]:
351
+ to_delete.add(f["id"])
352
+ archived += 1
353
+
354
+ for fid in to_delete:
355
+ try:
356
+ self.memory.delete_fact(fid)
357
+ except Exception:
358
+ pass
359
+ if self.graph:
360
+ self.graph.remove_node(fid)
361
+
362
+ # Decay graph edges
363
+ edges_decayed = 0
364
+ if self.graph:
365
+ edges_decayed = self.graph.decay_edges(half_life_days=30.0)
366
+
367
+ return {
368
+ "archived": archived,
369
+ "edges_decayed": edges_decayed,
370
+ "remaining": len(facts) - archived,
371
+ }
372
+
373
+ # ── Cross-session analysis ──────────────────────────────────────
374
+
375
+ def find_similar_sessions(
376
+ self,
377
+ target: SessionFingerprint,
378
+ top_k: int = 5,
379
+ ) -> list[tuple[SessionFingerprint, float]]:
380
+ """Find past sessions most similar to the target."""
381
+ results: list[tuple[SessionFingerprint, float]] = []
382
+ for fp in self.fingerprints:
383
+ if fp.session_id == target.session_id:
384
+ continue
385
+ sim = target.similarity(fp)
386
+ if sim > 0.1:
387
+ results.append((fp, round(sim, 4)))
388
+ results.sort(key=lambda x: x[1], reverse=True)
389
+ return results[:top_k]
390
+
391
+ def fact_lifespan_analysis(self) -> dict:
392
+ """Classify facts as foundational vs contextual.
393
+
394
+ Foundational: recalled across many different sessions
395
+ Contextual: recalled only within a narrow set of sessions
396
+ """
397
+ facts = [
398
+ f for f in self.memory.facts
399
+ if f.get("lifecycle") == "active"
400
+ ]
401
+
402
+ foundational: list[dict] = []
403
+ contextual: list[dict] = []
404
+
405
+ for f in facts:
406
+ sessions = set(f.get("recall_sessions", []))
407
+ session_count = len(sessions)
408
+ score = self.scorer.score(f, self.graph)
409
+
410
+ entry = {
411
+ "id": f["id"],
412
+ "fact": f["fact"][:80],
413
+ "session_spread": session_count,
414
+ "salience": score["salience"],
415
+ "tier": score["tier"],
416
+ }
417
+
418
+ if session_count >= 3:
419
+ foundational.append(entry)
420
+ elif session_count <= 1 and self._fact_age_days(f) > 7:
421
+ contextual.append(entry)
422
+
423
+ foundational.sort(key=lambda x: x["session_spread"], reverse=True)
424
+ contextual.sort(key=lambda x: x["salience"])
425
+
426
+ return {
427
+ "foundational": foundational[:20],
428
+ "contextual": contextual[:20],
429
+ "total_facts": len(facts),
430
+ }
431
+
432
+ def detect_trends(self) -> dict:
433
+ """Detect hot zones — files/areas under active development.
434
+
435
+ Looks at recent session fingerprints to find frequently-touched files
436
+ and frequently-recalled facts.
437
+ """
438
+ recent = self.fingerprints[-20:] # last 20 sessions
439
+ if not recent:
440
+ return {"hot_files": [], "hot_facts": [], "sessions_analyzed": 0}
441
+
442
+ file_counts: Counter = Counter()
443
+ fact_counts: Counter = Counter()
444
+
445
+ for fp in recent:
446
+ for f in fp.files:
447
+ file_counts[f] += 1
448
+ for fid in fp.facts_recalled:
449
+ fact_counts[fid] += 1
450
+
451
+ hot_files = [
452
+ {"file": f, "sessions": c}
453
+ for f, c in file_counts.most_common(10)
454
+ if c >= 2
455
+ ]
456
+ hot_facts = [
457
+ {"fact_id": fid, "sessions": c}
458
+ for fid, c in fact_counts.most_common(10)
459
+ if c >= 2
460
+ ]
461
+
462
+ # Enrich hot_facts with fact text
463
+ facts_by_id = {f["id"]: f for f in self.memory.facts}
464
+ for hf in hot_facts:
465
+ fact = facts_by_id.get(hf["fact_id"])
466
+ if fact:
467
+ hf["fact"] = fact["fact"][:80]
468
+
469
+ return {
470
+ "hot_files": hot_files,
471
+ "hot_facts": hot_facts,
472
+ "sessions_analyzed": len(recent),
473
+ }
474
+
475
+ # ── Helpers ─────────────────────────────────────────────────────
476
+
477
+ def _pick_keeper(self, a: dict, b: dict) -> tuple[dict, dict]:
478
+ """Pick which fact to keep based on salience."""
479
+ sa = self.scorer.score(a, self.graph)["salience"]
480
+ sb = self.scorer.score(b, self.graph)["salience"]
481
+ return (a, b) if sa >= sb else (b, a)
482
+
483
+ @staticmethod
484
+ def _fact_age_days(fact: dict) -> float:
485
+ ref = fact.get("last_accessed_at") or fact.get("timestamp")
486
+ if not ref:
487
+ return 0.0
488
+ try:
489
+ dt = datetime.fromisoformat(ref)
490
+ if dt.tzinfo is None:
491
+ dt = dt.replace(tzinfo=timezone.utc)
492
+ return (datetime.now(timezone.utc) - dt).total_seconds() / 86400
493
+ except (ValueError, TypeError):
494
+ return 0.0
495
+
496
+ def _load_fingerprints(self) -> list[SessionFingerprint]:
497
+ if not self.fingerprints_file.exists():
498
+ return []
499
+ try:
500
+ with open(self.fingerprints_file, encoding="utf-8") as f:
501
+ data = json.load(f)
502
+ return [SessionFingerprint.from_dict(d) for d in data]
503
+ except Exception:
504
+ return []
505
+
506
+ def _save_fingerprints(self):
507
+ data = [fp.to_dict() for fp in self.fingerprints]
508
+ with open(self.fingerprints_file, "w", encoding="utf-8") as f:
509
+ json.dump(data, f, indent=2)
510
+
511
+
512
+ # ── Module-level helpers ────────────────────────────────────────────
513
+
514
+ def _jaccard_sets(a: set, b: set) -> float:
515
+ if not a and not b:
516
+ return 0.0
517
+ intersection = len(a & b)
518
+ union = len(a | b)
519
+ return intersection / union if union else 0.0
520
+
521
+
522
+ def _jaccard_text(a: str, b: str) -> float:
523
+ ta = set(a.lower().split())
524
+ tb = set(b.lower().split())
525
+ return _jaccard_sets(ta, tb)
526
+
527
+
528
+ def _merge_texts(existing: str, new: str) -> str:
529
+ """Merge two fact texts, preferring the more complete one."""
530
+ if len(new) > len(existing) * 1.3:
531
+ return new
532
+ if len(existing) > len(new) * 1.3:
533
+ return existing
534
+ # Similar length — combine unique sentences
535
+ existing_sentences = set(s.strip() for s in existing.split(".") if s.strip())
536
+ new_sentences = set(s.strip() for s in new.split(".") if s.strip())
537
+ combined = existing_sentences | new_sentences
538
+ return ". ".join(sorted(combined)) + "." if combined else existing