superlocalmemory 3.2.3 → 3.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. package/CHANGELOG.md +43 -1
  2. package/README.md +106 -71
  3. package/package.json +1 -2
  4. package/pyproject.toml +16 -1
  5. package/src/superlocalmemory/cli/commands.py +309 -0
  6. package/src/superlocalmemory/cli/main.py +44 -0
  7. package/src/superlocalmemory/core/config.py +276 -4
  8. package/src/superlocalmemory/core/consolidation_engine.py +37 -0
  9. package/src/superlocalmemory/core/engine.py +21 -0
  10. package/src/superlocalmemory/core/engine_wiring.py +58 -8
  11. package/src/superlocalmemory/dynamics/activation_guided_quantization.py +374 -0
  12. package/src/superlocalmemory/dynamics/eap_scheduler.py +276 -0
  13. package/src/superlocalmemory/dynamics/ebbinghaus_langevin_coupling.py +171 -0
  14. package/src/superlocalmemory/encoding/cognitive_consolidator.py +804 -0
  15. package/src/superlocalmemory/hooks/auto_invoker.py +46 -8
  16. package/src/superlocalmemory/hooks/auto_parameterize.py +147 -0
  17. package/src/superlocalmemory/infra/heartbeat_monitor.py +140 -0
  18. package/src/superlocalmemory/infra/pid_manager.py +193 -0
  19. package/src/superlocalmemory/infra/process_reaper.py +572 -0
  20. package/src/superlocalmemory/learning/consolidation_quantization_worker.py +115 -0
  21. package/src/superlocalmemory/learning/forgetting_scheduler.py +263 -0
  22. package/src/superlocalmemory/learning/quantization_scheduler.py +320 -0
  23. package/src/superlocalmemory/math/ebbinghaus.py +309 -0
  24. package/src/superlocalmemory/math/fisher_quantized.py +251 -0
  25. package/src/superlocalmemory/math/hopfield.py +279 -0
  26. package/src/superlocalmemory/math/polar_quant.py +379 -0
  27. package/src/superlocalmemory/math/qjl.py +115 -0
  28. package/src/superlocalmemory/mcp/server.py +2 -0
  29. package/src/superlocalmemory/mcp/tools_v3.py +10 -0
  30. package/src/superlocalmemory/mcp/tools_v33.py +351 -0
  31. package/src/superlocalmemory/parameterization/__init__.py +47 -0
  32. package/src/superlocalmemory/parameterization/pattern_extractor.py +534 -0
  33. package/src/superlocalmemory/parameterization/pii_filter.py +106 -0
  34. package/src/superlocalmemory/parameterization/prompt_injector.py +216 -0
  35. package/src/superlocalmemory/parameterization/prompt_lifecycle.py +275 -0
  36. package/src/superlocalmemory/parameterization/soft_prompt_generator.py +425 -0
  37. package/src/superlocalmemory/retrieval/engine.py +21 -3
  38. package/src/superlocalmemory/retrieval/forgetting_filter.py +145 -0
  39. package/src/superlocalmemory/retrieval/hopfield_channel.py +335 -0
  40. package/src/superlocalmemory/retrieval/quantization_aware_search.py +133 -0
  41. package/src/superlocalmemory/retrieval/strategy.py +16 -6
  42. package/src/superlocalmemory/server/routes/agents.py +68 -8
  43. package/src/superlocalmemory/server/routes/learning.py +18 -1
  44. package/src/superlocalmemory/server/routes/lifecycle.py +36 -17
  45. package/src/superlocalmemory/server/routes/v3_api.py +503 -1
  46. package/src/superlocalmemory/storage/database.py +206 -0
  47. package/src/superlocalmemory/storage/embedding_migrator.py +178 -0
  48. package/src/superlocalmemory/storage/migration_v33.py +140 -0
  49. package/src/superlocalmemory/storage/quantized_store.py +261 -0
  50. package/src/superlocalmemory/storage/schema_v32.py +137 -0
  51. package/conftest.py +0 -5
@@ -0,0 +1,804 @@
1
+ # Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
2
+ # Licensed under the MIT License - see LICENSE file
3
+ # Part of SuperLocalMemory V3 | https://qualixar.com | https://varunpratap.com
4
+
5
+ """Cognitive Consolidation Quantization (CCQ) — Phase E.
6
+
7
+ Sleep-time consolidation that mirrors hippocampal-neocortical transfer:
8
+ 1. IDENTIFY: warm/cold facts below retention threshold
9
+ 2. CLUSTER: group by entity overlap + temporal proximity (Union-Find)
10
+ 3. EXTRACT GIST: rules (Mode A) or LLM (Mode B/C) summary
11
+ 4. COMPRESS: source embeddings -> PolarQuant 2-bit
12
+ 5. STORE: gist block at float32 + archive source facts
13
+ 6. AUDIT: complete audit trail
14
+
15
+ Biological analogy:
16
+ Hippocampus (atomic_facts) -> replay during sleep (CCQ pipeline)
17
+ -> Neocortex (ccq_consolidated_blocks with full-precision gist)
18
+
19
+ Hard rules:
20
+ - Already-consolidated facts NEVER re-consolidated (HR-01)
21
+ - Minimum cluster size 3 (HR-02)
22
+ - Gist must cover 50% shared entities (HR-03)
23
+ - Source facts soft-archived, NEVER deleted (HR-04)
24
+ - Gist embedding always float32 (HR-05)
25
+ - Parameterized SQL only (HR-06)
26
+ - Per-cluster error isolation (HR-07)
27
+ - Idempotent (HR-08)
28
+ - PolarQuant optional (HR-10)
29
+ - Audit trail mandatory (HR-11)
30
+
31
+ References:
32
+ McClelland et al. (1995). Complementary Learning Systems.
33
+ SimpleMem (arXiv 2601.02553). Semantic lossless compression.
34
+ TurboQuant (ICLR 2026). Recursive polar quantization.
35
+
36
+ Part of Qualixar | Author: Varun Pratap Bhardwaj
37
+ License: MIT
38
+ IP Novelty: 92% (no prior art for retention-gated consolidation + polar quantization)
39
+ """
40
+
41
+ from __future__ import annotations
42
+
43
+ import json
44
+ import logging
45
+ from collections import defaultdict
46
+ from dataclasses import dataclass
47
+ from datetime import datetime
48
+ from typing import TYPE_CHECKING, Any, Protocol
49
+
50
+ from superlocalmemory.storage.models import _new_id
51
+
52
+ if TYPE_CHECKING:
53
+ from superlocalmemory.core.config import CCQConfig
54
+ from superlocalmemory.storage.database import DatabaseManager
55
+
56
+ logger = logging.getLogger(__name__)
57
+
58
+
59
+ # ---------------------------------------------------------------------------
60
+ # Protocols
61
+ # ---------------------------------------------------------------------------
62
+
63
+ class Embedder(Protocol):
64
+ """Anything that produces an embedding vector from text."""
65
+
66
+ def encode(self, text: str) -> list[float]: ...
67
+
68
+
69
+ class LLM(Protocol):
70
+ """Anything that can generate text from a prompt."""
71
+
72
+ def generate(self, prompt: str, system: str = "") -> str: ...
73
+
74
+ def is_available(self) -> bool: ...
75
+
76
+
77
+ # ---------------------------------------------------------------------------
78
+ # Data classes (frozen, immutable)
79
+ # ---------------------------------------------------------------------------
80
+
81
+
82
+ @dataclass(frozen=True)
83
+ class ConsolidationCluster:
84
+ """A group of related fading memories identified for consolidation."""
85
+
86
+ cluster_id: str
87
+ fact_ids: tuple[str, ...]
88
+ shared_entities: tuple[str, ...]
89
+ temporal_centroid: str
90
+ avg_retention: float
91
+ fact_count: int
92
+
93
+
94
+ @dataclass(frozen=True)
95
+ class GistResult:
96
+ """Output of the gist extraction step."""
97
+
98
+ gist_text: str
99
+ key_entities: tuple[str, ...]
100
+ extraction_mode: str # 'rules' or 'llm'
101
+ representative_fact_id: str
102
+
103
+
104
+ @dataclass(frozen=True)
105
+ class CCQPipelineResult:
106
+ """Full result of one CCQ pipeline execution."""
107
+
108
+ clusters_processed: int
109
+ blocks_created: int
110
+ facts_archived: int
111
+ total_bytes_before: int
112
+ total_bytes_after: int
113
+ compression_ratio: float
114
+ audit_entries: tuple[str, ...]
115
+ errors: tuple[str, ...]
116
+
117
+
118
+ # ---------------------------------------------------------------------------
119
+ # Union-Find helper
120
+ # ---------------------------------------------------------------------------
121
+
122
+
123
+ class _UnionFind:
124
+ """Minimal Union-Find for entity-based clustering."""
125
+
126
+ __slots__ = ("_parent", "_rank")
127
+
128
+ def __init__(self, elements: list[str]) -> None:
129
+ self._parent: dict[str, str] = {e: e for e in elements}
130
+ self._rank: dict[str, int] = {e: 0 for e in elements}
131
+
132
+ def find(self, x: str) -> str:
133
+ if self._parent[x] != x:
134
+ self._parent[x] = self.find(self._parent[x])
135
+ return self._parent[x]
136
+
137
+ def union(self, a: str, b: str) -> None:
138
+ ra, rb = self.find(a), self.find(b)
139
+ if ra == rb:
140
+ return
141
+ if self._rank[ra] < self._rank[rb]:
142
+ ra, rb = rb, ra
143
+ self._parent[rb] = ra
144
+ if self._rank[ra] == self._rank[rb]:
145
+ self._rank[ra] += 1
146
+
147
+ def components(self) -> dict[str, list[str]]:
148
+ groups: dict[str, list[str]] = defaultdict(list)
149
+ for element in self._parent:
150
+ groups[self.find(element)].append(element)
151
+ return dict(groups)
152
+
153
+
154
+ # ---------------------------------------------------------------------------
155
+ # Date helpers
156
+ # ---------------------------------------------------------------------------
157
+
158
+
159
+ def _parse_date(raw: str | None) -> datetime | None:
160
+ """Parse ISO-8601 dates with multiple format fallbacks."""
161
+ if not raw:
162
+ return None
163
+ for fmt in (
164
+ "%Y-%m-%dT%H:%M:%S.%f",
165
+ "%Y-%m-%dT%H:%M:%S",
166
+ "%Y-%m-%d %H:%M:%S",
167
+ "%Y-%m-%d",
168
+ ):
169
+ try:
170
+ return datetime.strptime(raw, fmt)
171
+ except ValueError:
172
+ continue
173
+ return None
174
+
175
+
176
+ def _temporal_midpoint(dates: list[datetime]) -> str:
177
+ """Compute ISO-8601 midpoint of a list of datetimes."""
178
+ if not dates:
179
+ return datetime.now().isoformat()
180
+ ts = [d.timestamp() for d in dates]
181
+ mid = sum(ts) / len(ts)
182
+ return datetime.fromtimestamp(mid).isoformat()
183
+
184
+
185
+ # ---------------------------------------------------------------------------
186
+ # CognitiveConsolidator
187
+ # ---------------------------------------------------------------------------
188
+
189
+
190
+ class CognitiveConsolidator:
191
+ """CCQ engine: sleep-time consolidation with quantization.
192
+
193
+ Executes 6-step pipeline: identify -> cluster -> gist -> compress
194
+ -> store -> audit. Each cluster failure is isolated (HR-07).
195
+ """
196
+
197
+ __slots__ = ("_db", "_embedder", "_llm", "_config")
198
+
199
+ def __init__(
200
+ self,
201
+ db: DatabaseManager,
202
+ embedder: Embedder | None = None,
203
+ llm: LLM | None = None,
204
+ config: CCQConfig | None = None,
205
+ ) -> None:
206
+ from superlocalmemory.core.config import CCQConfig as _CCQConfig
207
+
208
+ self._db = db
209
+ self._embedder = embedder
210
+ self._llm = llm
211
+ self._config = config or _CCQConfig()
212
+
213
+ # ------------------------------------------------------------------
214
+ # Public API
215
+ # ------------------------------------------------------------------
216
+
217
+ def run_pipeline(self, profile_id: str) -> CCQPipelineResult:
218
+ """Execute the full 6-step CCQ pipeline.
219
+
220
+ Per-cluster error isolation: one cluster failure does NOT
221
+ abort the pipeline (HR-07).
222
+ """
223
+ # Step 1: Identify candidates
224
+ candidates = self._step1_identify(profile_id)
225
+ if not candidates:
226
+ return self._empty_result()
227
+
228
+ # Step 2: Cluster by entity overlap + temporal proximity
229
+ clusters = self._step2_cluster(candidates, profile_id)
230
+ if not clusters:
231
+ return self._empty_result()
232
+
233
+ # Process each cluster
234
+ blocks_created = 0
235
+ facts_archived = 0
236
+ bytes_before = 0
237
+ bytes_after = 0
238
+ audit_ids: list[str] = []
239
+ errors: list[str] = []
240
+
241
+ for cluster in clusters:
242
+ try:
243
+ # Step 3: Extract gist
244
+ gist = self._step3_extract_gist(cluster, profile_id)
245
+
246
+ # Step 4: Compress source embeddings
247
+ cb, ca = self._step4_compress_embeddings(cluster, profile_id)
248
+
249
+ # Step 5: Store block + archive source facts
250
+ block_id = self._step5_store_block(cluster, gist, profile_id)
251
+
252
+ # Step 6: Audit trail
253
+ audit_id = self._step6_audit(
254
+ cluster, gist, cb, ca, block_id, profile_id,
255
+ )
256
+
257
+ blocks_created += 1
258
+ facts_archived += cluster.fact_count
259
+ bytes_before += cb
260
+ bytes_after += ca
261
+ audit_ids.append(audit_id)
262
+
263
+ except Exception as exc:
264
+ logger.warning(
265
+ "CCQ cluster %s failed (non-fatal): %s",
266
+ cluster.cluster_id, exc,
267
+ )
268
+ errors.append(
269
+ f"cluster={cluster.cluster_id}: {exc!s}",
270
+ )
271
+
272
+ compression_ratio = (
273
+ bytes_before / bytes_after if bytes_after > 0 else 0.0
274
+ )
275
+
276
+ return CCQPipelineResult(
277
+ clusters_processed=len(clusters),
278
+ blocks_created=blocks_created,
279
+ facts_archived=facts_archived,
280
+ total_bytes_before=bytes_before,
281
+ total_bytes_after=bytes_after,
282
+ compression_ratio=round(compression_ratio, 2),
283
+ audit_entries=tuple(audit_ids),
284
+ errors=tuple(errors),
285
+ )
286
+
287
+ # ------------------------------------------------------------------
288
+ # Step 1: Identify candidates
289
+ # ------------------------------------------------------------------
290
+
291
+ def _step1_identify(self, profile_id: str) -> list[dict]:
292
+ """Identify warm/cold facts not yet consolidated.
293
+
294
+ Excludes:
295
+ - Active/archive/forgotten lifecycle zones
296
+ - Facts above retention threshold
297
+ - Facts already in ccq_consolidated_blocks (HR-01, HR-08)
298
+ """
299
+ rows = self._db.execute(
300
+ """
301
+ SELECT f.fact_id, f.content, f.canonical_entities_json,
302
+ f.created_at, f.observation_date, f.importance,
303
+ f.confidence,
304
+ r.retention_score, r.lifecycle_zone, r.memory_strength
305
+ FROM atomic_facts f
306
+ INNER JOIN fact_retention r
307
+ ON f.fact_id = r.fact_id AND r.profile_id = ?
308
+ WHERE f.profile_id = ?
309
+ AND r.lifecycle_zone IN ('warm', 'cold')
310
+ AND r.retention_score < ?
311
+ AND f.lifecycle != 'forgotten'
312
+ AND f.fact_id NOT IN (
313
+ SELECT je.value
314
+ FROM ccq_consolidated_blocks ccb,
315
+ json_each(ccb.source_fact_ids) je
316
+ WHERE ccb.profile_id = ?
317
+ )
318
+ ORDER BY r.retention_score ASC
319
+ LIMIT ?
320
+ """,
321
+ (
322
+ profile_id,
323
+ profile_id,
324
+ self._config.retention_threshold,
325
+ profile_id,
326
+ self._config.max_candidates_per_run,
327
+ ),
328
+ )
329
+
330
+ candidates: list[dict] = []
331
+ for row in rows or []:
332
+ d = dict(row)
333
+ raw_entities = d.get("canonical_entities_json") or "[]"
334
+ try:
335
+ d["canonical_entities"] = json.loads(raw_entities)
336
+ except (json.JSONDecodeError, TypeError):
337
+ d["canonical_entities"] = []
338
+ candidates.append(d)
339
+
340
+ return candidates
341
+
342
+ # ------------------------------------------------------------------
343
+ # Step 2: Cluster by entity overlap + temporal proximity
344
+ # ------------------------------------------------------------------
345
+
346
+ def _step2_cluster(
347
+ self,
348
+ candidates: list[dict],
349
+ profile_id: str,
350
+ ) -> list[ConsolidationCluster]:
351
+ """Group candidates via Union-Find (entity overlap) + temporal sub-clustering."""
352
+ if len(candidates) < self._config.min_cluster_size:
353
+ return []
354
+
355
+ fact_ids = [c["fact_id"] for c in candidates]
356
+ fact_map = {c["fact_id"]: c for c in candidates}
357
+
358
+ # Build entity-to-fact index
359
+ entity_index: dict[str, list[str]] = defaultdict(list)
360
+ for c in candidates:
361
+ for entity in c.get("canonical_entities", []):
362
+ entity_index[entity].append(c["fact_id"])
363
+
364
+ # Union-Find by entity overlap
365
+ uf = _UnionFind(fact_ids)
366
+ for i, fid_a in enumerate(fact_ids):
367
+ entities_a = set(fact_map[fid_a].get("canonical_entities", []))
368
+ for fid_b in fact_ids[i + 1:]:
369
+ entities_b = set(
370
+ fact_map[fid_b].get("canonical_entities", []),
371
+ )
372
+ if len(entities_a & entities_b) >= self._config.min_entity_overlap:
373
+ uf.union(fid_a, fid_b)
374
+
375
+ # Extract components and sub-cluster temporally
376
+ clusters: list[ConsolidationCluster] = []
377
+ for _root, group_ids in uf.components().items():
378
+ if len(group_ids) < self._config.min_cluster_size:
379
+ continue
380
+ sub_clusters = self._temporal_subcluster(group_ids, fact_map)
381
+ for sc_ids in sub_clusters:
382
+ if len(sc_ids) < self._config.min_cluster_size:
383
+ continue
384
+ # Cap cluster size (HR: prevents huge gists)
385
+ sc_ids = sc_ids[: self._config.max_cluster_size]
386
+
387
+ # Compute shared entities (appear in ALL facts)
388
+ entity_sets = [
389
+ set(fact_map[fid].get("canonical_entities", []))
390
+ for fid in sc_ids
391
+ ]
392
+ shared = (
393
+ set.intersection(*entity_sets) if entity_sets else set()
394
+ )
395
+
396
+ # Compute temporal centroid
397
+ dates = [
398
+ _parse_date(
399
+ fact_map[fid].get("observation_date")
400
+ or fact_map[fid].get("created_at"),
401
+ )
402
+ for fid in sc_ids
403
+ ]
404
+ valid_dates = [d for d in dates if d is not None]
405
+ centroid = _temporal_midpoint(valid_dates)
406
+
407
+ # Average retention
408
+ avg_ret = sum(
409
+ fact_map[fid].get("retention_score", 0.0)
410
+ for fid in sc_ids
411
+ ) / len(sc_ids)
412
+
413
+ clusters.append(
414
+ ConsolidationCluster(
415
+ cluster_id=_new_id(),
416
+ fact_ids=tuple(sc_ids),
417
+ shared_entities=tuple(sorted(shared)),
418
+ temporal_centroid=centroid,
419
+ avg_retention=round(avg_ret, 4),
420
+ fact_count=len(sc_ids),
421
+ ),
422
+ )
423
+
424
+ return clusters
425
+
426
+ def _temporal_subcluster(
427
+ self,
428
+ fact_ids: list[str],
429
+ fact_map: dict[str, dict],
430
+ ) -> list[list[str]]:
431
+ """Split an entity group into temporal sub-clusters within window."""
432
+ dated: list[tuple[str, datetime | None]] = []
433
+ for fid in fact_ids:
434
+ raw = (
435
+ fact_map[fid].get("observation_date")
436
+ or fact_map[fid].get("created_at")
437
+ )
438
+ dated.append((fid, _parse_date(raw)))
439
+
440
+ dated.sort(key=lambda t: t[1] or datetime.min)
441
+
442
+ sub_clusters: list[list[str]] = [[]]
443
+ prev_dt: datetime | None = None
444
+ window_seconds = self._config.temporal_window_days * 86400
445
+
446
+ for fid, dt in dated:
447
+ if dt is None:
448
+ sub_clusters[-1].append(fid)
449
+ continue
450
+ if (
451
+ prev_dt is not None
452
+ and (dt - prev_dt).total_seconds() > window_seconds
453
+ ):
454
+ sub_clusters.append([])
455
+ sub_clusters[-1].append(fid)
456
+ prev_dt = dt
457
+
458
+ return [sc for sc in sub_clusters if sc]
459
+
460
+ # ------------------------------------------------------------------
461
+ # Step 3: Extract gist
462
+ # ------------------------------------------------------------------
463
+
464
+ def _step3_extract_gist(
465
+ self,
466
+ cluster: ConsolidationCluster,
467
+ profile_id: str,
468
+ ) -> GistResult:
469
+ """Extract a single gist from the cluster.
470
+
471
+ Mode B (LLM) attempted first if available; falls back to Mode A (rules).
472
+ """
473
+ # Fetch fact content
474
+ placeholders = ",".join("?" for _ in cluster.fact_ids)
475
+ rows = self._db.execute(
476
+ f"SELECT fact_id, content, importance, confidence, "
477
+ f" canonical_entities_json "
478
+ f"FROM atomic_facts "
479
+ f"WHERE fact_id IN ({placeholders}) AND profile_id = ?",
480
+ (*cluster.fact_ids, profile_id),
481
+ )
482
+
483
+ facts: list[dict] = []
484
+ for r in rows or []:
485
+ d = dict(r)
486
+ raw = d.get("canonical_entities_json") or "[]"
487
+ try:
488
+ d["canonical_entities"] = json.loads(raw)
489
+ except (json.JSONDecodeError, TypeError):
490
+ d["canonical_entities"] = []
491
+ facts.append(d)
492
+
493
+ if not facts:
494
+ return GistResult(
495
+ gist_text="[empty cluster]",
496
+ key_entities=(),
497
+ extraction_mode="rules",
498
+ representative_fact_id="",
499
+ )
500
+
501
+ # Try LLM mode (Mode B) if configured
502
+ if (
503
+ self._llm is not None
504
+ and self._config.use_llm_gist
505
+ ):
506
+ try:
507
+ gist = self._extract_gist_llm(
508
+ facts, cluster.shared_entities,
509
+ )
510
+ if gist is not None:
511
+ return gist
512
+ except Exception as exc:
513
+ logger.warning("LLM gist failed, falling back to rules: %s", exc)
514
+
515
+ # Mode A: rules-based
516
+ return self._extract_gist_mode_a(facts, cluster.shared_entities)
517
+
518
+ def _extract_gist_llm(
519
+ self,
520
+ facts: list[dict],
521
+ shared_entities: tuple[str, ...],
522
+ ) -> GistResult | None:
523
+ """LLM-based gist extraction (Mode B). Returns None if validation fails."""
524
+ if self._llm is None:
525
+ return None
526
+
527
+ fact_lines = "\n".join(
528
+ f"{i + 1}. {f['content']}" for i, f in enumerate(facts)
529
+ )
530
+ entity_str = ", ".join(shared_entities)
531
+
532
+ prompt = (
533
+ f"Summarize these {len(facts)} related memories into one "
534
+ f"concise factual statement.\n"
535
+ f"Preserve all key entities: {entity_str}.\n\n"
536
+ f"Memories:\n{fact_lines}\n\n"
537
+ f"Consolidated statement:"
538
+ )
539
+
540
+ response = self._llm.generate(
541
+ prompt,
542
+ system="You are a precise memory consolidator.",
543
+ )
544
+
545
+ # Validate entity coverage (HR-03)
546
+ if shared_entities:
547
+ mentioned = sum(
548
+ 1 for e in shared_entities
549
+ if e.lower() in response.lower()
550
+ )
551
+ coverage = mentioned / len(shared_entities)
552
+ if coverage < self._config.min_entity_coverage:
553
+ logger.info(
554
+ "LLM gist entity coverage %.2f < %.2f, falling back",
555
+ coverage, self._config.min_entity_coverage,
556
+ )
557
+ return None
558
+
559
+ # Truncate if needed
560
+ gist_text = response
561
+ if len(gist_text) > self._config.max_gist_chars:
562
+ gist_text = gist_text[: self._config.max_gist_chars - 3] + "..."
563
+
564
+ return GistResult(
565
+ gist_text=gist_text,
566
+ key_entities=shared_entities,
567
+ extraction_mode="llm",
568
+ representative_fact_id="",
569
+ )
570
+
571
+ def _extract_gist_mode_a(
572
+ self,
573
+ facts: list[dict],
574
+ shared_entities: tuple[str, ...],
575
+ ) -> GistResult:
576
+ """Rules-based gist: representative fact + entity summary."""
577
+ # Find representative (highest importance * confidence)
578
+ best = max(
579
+ facts,
580
+ key=lambda f: f.get("importance", 0) * f.get("confidence", 0),
581
+ )
582
+
583
+ # Entity frequency across cluster
584
+ entity_freq: dict[str, int] = defaultdict(int)
585
+ for f in facts:
586
+ for e in f.get("canonical_entities", []):
587
+ entity_freq[e] += 1
588
+ top_entities = sorted(
589
+ entity_freq, key=lambda k: entity_freq[k], reverse=True,
590
+ )[:5]
591
+
592
+ entity_summary = ", ".join(top_entities) if top_entities else ""
593
+ gist = f"{best['content']} [Entities: {entity_summary}]"
594
+
595
+ if len(gist) > self._config.max_gist_chars:
596
+ gist = gist[: self._config.max_gist_chars - 3] + "..."
597
+
598
+ return GistResult(
599
+ gist_text=gist,
600
+ key_entities=tuple(top_entities),
601
+ extraction_mode="rules",
602
+ representative_fact_id=best["fact_id"],
603
+ )
604
+
605
+ # ------------------------------------------------------------------
606
+ # Step 4: Compress source embeddings
607
+ # ------------------------------------------------------------------
608
+
609
+ def _step4_compress_embeddings(
610
+ self,
611
+ cluster: ConsolidationCluster,
612
+ profile_id: str,
613
+ ) -> tuple[int, int]:
614
+ """Compress source fact embeddings. Returns (bytes_before, bytes_after).
615
+
616
+ PolarQuant is optional (HR-10). If not available, marks as pending.
617
+ """
618
+ if not self._config.compress_embeddings:
619
+ return (0, 0)
620
+
621
+ total_before = 0
622
+ total_after = 0
623
+
624
+ for fact_id in cluster.fact_ids:
625
+ bb, ba = self._compress_single_embedding(fact_id, profile_id)
626
+ total_before += bb
627
+ total_after += ba
628
+
629
+ return (total_before, total_after)
630
+
631
+ def _compress_single_embedding(
632
+ self,
633
+ fact_id: str,
634
+ profile_id: str,
635
+ ) -> tuple[int, int]:
636
+ """Compress one fact's embedding. Returns (bytes_before, bytes_after).
637
+
638
+ PolarQuant is optional (HR-10). When unavailable, records the
639
+ uncompressed byte count so the pipeline can still report metrics.
640
+ Does NOT write invalid values to embedding_quantization_metadata.
641
+ """
642
+ meta = self._db.execute(
643
+ "SELECT vec_rowid, dimension FROM embedding_metadata "
644
+ "WHERE fact_id = ? AND profile_id = ?",
645
+ (fact_id, profile_id),
646
+ )
647
+ if not meta:
648
+ return (0, 0)
649
+
650
+ d = dict(meta[0])
651
+ dim = d.get("dimension", 768)
652
+
653
+ # Check existing quantization status
654
+ eq_meta = self._db.execute(
655
+ "SELECT quantization_level FROM embedding_quantization_metadata "
656
+ "WHERE fact_id = ? AND profile_id = ?",
657
+ (fact_id, profile_id),
658
+ )
659
+ if eq_meta:
660
+ level = dict(eq_meta[0]).get("quantization_level", "float32")
661
+ if level in ("polar2", "polar4", "deleted"):
662
+ return (0, 0) # Already compressed
663
+
664
+ bytes_before = dim * 4 # float32
665
+
666
+ # Try PolarQuant (Phase B) — optional dependency (HR-10)
667
+ try:
668
+ from superlocalmemory.math.polar_quant import PolarQuantEncoder
669
+ # Phase B exists. Mark as polar2 for the scheduler to actually
670
+ # perform the quantization with the raw embedding data.
671
+ self._db.execute(
672
+ "INSERT OR REPLACE INTO embedding_quantization_metadata "
673
+ "(fact_id, profile_id, quantization_level, bit_width) "
674
+ "VALUES (?, ?, 'polar2', 2)",
675
+ (fact_id, profile_id),
676
+ )
677
+ # Estimated compressed size: radius(4 bytes) + packed angles
678
+ bytes_after = 4 + (dim * self._config.target_bit_width + 7) // 8
679
+ return (bytes_before, bytes_after)
680
+ except ImportError:
681
+ pass
682
+
683
+ # PolarQuant not available: no compression, no metadata change.
684
+ # The bytes are uncompressed but we track them for metrics.
685
+ return (bytes_before, bytes_before)
686
+
687
+ # ------------------------------------------------------------------
688
+ # Step 5: Store block + archive source facts
689
+ # ------------------------------------------------------------------
690
+
691
+ def _step5_store_block(
692
+ self,
693
+ cluster: ConsolidationCluster,
694
+ gist: GistResult,
695
+ profile_id: str,
696
+ ) -> str:
697
+ """Create CCQ consolidated block and archive source facts.
698
+
699
+ Source facts are SOFT-ARCHIVED (HR-04), never deleted.
700
+ Gist embedding stored at float32 (HR-05).
701
+ """
702
+ block_id = _new_id()
703
+
704
+ # Generate gist embedding (full float32 precision — HR-05)
705
+ gist_embedding_rowid: int | None = None
706
+ if self._embedder is not None:
707
+ try:
708
+ self._embedder.encode(gist.gist_text)
709
+ # Note: storing in vec0 requires VectorStore integration.
710
+ # For now, the embedding is generated and the block records
711
+ # that an embedder was available.
712
+ except Exception as exc:
713
+ logger.warning("Gist embedding generation failed: %s", exc)
714
+
715
+ # Store the consolidated block
716
+ self._db.store_ccq_block(
717
+ block_id=block_id,
718
+ profile_id=profile_id,
719
+ content=gist.gist_text,
720
+ source_fact_ids=json.dumps(list(cluster.fact_ids)),
721
+ gist_embedding_rowid=gist_embedding_rowid,
722
+ char_count=len(gist.gist_text),
723
+ cluster_id=cluster.cluster_id,
724
+ )
725
+
726
+ # Archive source facts (HR-04: soft-archive, never delete)
727
+ for fact_id in cluster.fact_ids:
728
+ self._db.execute(
729
+ "UPDATE atomic_facts SET lifecycle = 'archived' "
730
+ "WHERE fact_id = ? AND profile_id = ?",
731
+ (fact_id, profile_id),
732
+ )
733
+ # Log access event
734
+ self._db.execute(
735
+ "INSERT INTO fact_access_log "
736
+ "(log_id, fact_id, profile_id, accessed_at, "
737
+ " access_type, session_id) "
738
+ "VALUES (?, ?, ?, datetime('now'), 'consolidation', 'ccq')",
739
+ (_new_id(), fact_id, profile_id),
740
+ )
741
+ # Update fact_retention zone
742
+ self._db.execute(
743
+ "UPDATE fact_retention "
744
+ "SET lifecycle_zone = 'archive', "
745
+ " last_computed_at = datetime('now') "
746
+ "WHERE fact_id = ? AND profile_id = ?",
747
+ (fact_id, profile_id),
748
+ )
749
+
750
+ return block_id
751
+
752
+ # ------------------------------------------------------------------
753
+ # Step 6: Audit trail
754
+ # ------------------------------------------------------------------
755
+
756
+ def _step6_audit(
757
+ self,
758
+ cluster: ConsolidationCluster,
759
+ gist: GistResult,
760
+ bytes_before: int,
761
+ bytes_after: int,
762
+ block_id: str,
763
+ profile_id: str,
764
+ ) -> str:
765
+ """Record audit trail for this consolidation (HR-11)."""
766
+ audit_id = _new_id()
767
+ compression_ratio = (
768
+ bytes_before / bytes_after if bytes_after > 0 else 0.0
769
+ )
770
+
771
+ self._db.store_ccq_audit({
772
+ "audit_id": audit_id,
773
+ "profile_id": profile_id,
774
+ "cluster_id": cluster.cluster_id,
775
+ "block_id": block_id,
776
+ "fact_ids": json.dumps(list(cluster.fact_ids)),
777
+ "fact_count": cluster.fact_count,
778
+ "gist_text": gist.gist_text,
779
+ "extraction_mode": gist.extraction_mode,
780
+ "bytes_before": bytes_before,
781
+ "bytes_after": bytes_after,
782
+ "compression_ratio": round(compression_ratio, 2),
783
+ "shared_entities": json.dumps(list(cluster.shared_entities)),
784
+ })
785
+
786
+ return audit_id
787
+
788
+ # ------------------------------------------------------------------
789
+ # Helpers
790
+ # ------------------------------------------------------------------
791
+
792
+ @staticmethod
793
+ def _empty_result() -> CCQPipelineResult:
794
+ """Return a zero-work pipeline result."""
795
+ return CCQPipelineResult(
796
+ clusters_processed=0,
797
+ blocks_created=0,
798
+ facts_archived=0,
799
+ total_bytes_before=0,
800
+ total_bytes_after=0,
801
+ compression_ratio=0.0,
802
+ audit_entries=(),
803
+ errors=(),
804
+ )