superlocalmemory 3.4.9 → 3.4.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. package/README.md +23 -3
  2. package/docs/cloud-backup.md +174 -0
  3. package/docs/skill-evolution.md +256 -0
  4. package/ide/hooks/tool-event-hook.sh +101 -11
  5. package/package.json +1 -1
  6. package/pyproject.toml +3 -2
  7. package/src/superlocalmemory/cli/commands.py +359 -0
  8. package/src/superlocalmemory/cli/ingest_cmd.py +81 -29
  9. package/src/superlocalmemory/cli/main.py +32 -0
  10. package/src/superlocalmemory/cli/setup_wizard.py +54 -11
  11. package/src/superlocalmemory/core/config.py +35 -0
  12. package/src/superlocalmemory/core/consolidation_engine.py +138 -0
  13. package/src/superlocalmemory/core/embedding_worker.py +1 -1
  14. package/src/superlocalmemory/core/engine.py +19 -0
  15. package/src/superlocalmemory/core/fact_consolidator.py +425 -0
  16. package/src/superlocalmemory/core/graph_pruner.py +290 -0
  17. package/src/superlocalmemory/core/maintenance_scheduler.py +44 -3
  18. package/src/superlocalmemory/core/recall_pipeline.py +9 -0
  19. package/src/superlocalmemory/core/tier_manager.py +325 -0
  20. package/src/superlocalmemory/encoding/entity_resolver.py +96 -28
  21. package/src/superlocalmemory/evolution/__init__.py +29 -0
  22. package/src/superlocalmemory/evolution/blind_verifier.py +115 -0
  23. package/src/superlocalmemory/evolution/evolution_store.py +302 -0
  24. package/src/superlocalmemory/evolution/mutation_generator.py +181 -0
  25. package/src/superlocalmemory/evolution/skill_evolver.py +555 -0
  26. package/src/superlocalmemory/evolution/triggers.py +367 -0
  27. package/src/superlocalmemory/evolution/types.py +92 -0
  28. package/src/superlocalmemory/hooks/hook_handlers.py +13 -0
  29. package/src/superlocalmemory/infra/backup.py +63 -20
  30. package/src/superlocalmemory/infra/cloud_backup.py +703 -0
  31. package/src/superlocalmemory/learning/skill_performance_miner.py +422 -0
  32. package/src/superlocalmemory/mcp/server.py +4 -0
  33. package/src/superlocalmemory/mcp/tools_evolution.py +338 -0
  34. package/src/superlocalmemory/retrieval/engine.py +64 -4
  35. package/src/superlocalmemory/retrieval/forgetting_filter.py +22 -7
  36. package/src/superlocalmemory/retrieval/strategy.py +2 -2
  37. package/src/superlocalmemory/server/routes/backup.py +512 -8
  38. package/src/superlocalmemory/server/routes/behavioral.py +39 -17
  39. package/src/superlocalmemory/server/routes/evolution.py +213 -0
  40. package/src/superlocalmemory/server/routes/tiers.py +195 -0
  41. package/src/superlocalmemory/server/unified_daemon.py +36 -5
  42. package/src/superlocalmemory/storage/schema_v3410.py +159 -0
  43. package/src/superlocalmemory/storage/schema_v3411.py +149 -0
  44. package/src/superlocalmemory/ui/index.html +59 -3
  45. package/src/superlocalmemory/ui/js/core.js +3 -0
  46. package/src/superlocalmemory/ui/js/lifecycle.js +83 -0
  47. package/src/superlocalmemory/ui/js/ng-entities.js +27 -3
  48. package/src/superlocalmemory/ui/js/ng-shell.js +33 -0
  49. package/src/superlocalmemory/ui/js/ng-skills.js +611 -0
  50. package/src/superlocalmemory/ui/js/settings.js +311 -1
  51. package/src/superlocalmemory.egg-info/PKG-INFO +16 -1
  52. package/src/superlocalmemory.egg-info/SOURCES.txt +18 -0
@@ -0,0 +1,425 @@
1
+ # Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
2
+ # Licensed under AGPL-3.0-or-later - see LICENSE file
3
+ # Part of SuperLocalMemory V3 | https://qualixar.com | https://varunpratap.com
4
+
5
+ """SuperLocalMemory V3.4.11 "Scale-Ready" — Fact Consolidation Engine.
6
+
7
+ Merges clusters of related facts about the same entity into single
8
+ comprehensive summary facts. Original facts move to 'archived' tier
9
+ but are NEVER deleted — searchable via deep recall.
10
+
11
+ Uses Mode B (Ollama LLM) for summarization, with Mode A (extractive)
12
+ fallback if LLM is unavailable.
13
+
14
+ CRITICAL RULES:
15
+ 1. NEVER delete original facts
16
+ 2. Original facts → lifecycle='archived' (not deleted)
17
+ 3. Consolidated fact links back to originals via fact_consolidations table
18
+ 4. Only consolidates facts that are already 'warm' or 'cold' tier
19
+ 5. Never touches 'active' or 'pinned' facts
20
+ 6. All writes per cluster wrapped in SAVEPOINT for atomicity
21
+ 7. Entity ID LIKE patterns use JSON-boundary quoting to prevent
22
+ substring false positives
23
+
24
+ Part of Qualixar | Author: Varun Pratap Bhardwaj
25
+ """
26
+
27
+ from __future__ import annotations
28
+
29
+ import json
30
+ import logging
31
+ import sqlite3
32
+ import uuid
33
+ from datetime import datetime, timezone
34
+ from pathlib import Path
35
+
36
+ logger = logging.getLogger("superlocalmemory.fact_consolidator")
37
+
38
+ _MAX_CLUSTER_SIZE = 10 # Max facts to merge into one
39
+ _MIN_CLUSTER_SIZE = 3 # Need at least 3 related facts to consolidate
40
+ _MAX_CONSOLIDATED_CHARS = 2000
41
+
42
+
43
+ def consolidate_facts(
44
+ db_path: str | Path,
45
+ profile_id: str = "default",
46
+ max_clusters: int = 20,
47
+ dry_run: bool = False,
48
+ config: object | None = None,
49
+ ) -> dict:
50
+ """Find and consolidate clusters of related facts.
51
+
52
+ Mode behavior:
53
+ - Mode A: Extractive only (no LLM). Always available.
54
+ - Mode B: Ollama LLM summarization. Falls back to extractive if Ollama down.
55
+ - Mode C: Cloud LLM (user's configured provider). Falls back to extractive.
56
+
57
+ Returns stats: consolidated, clusters_found, facts_archived, errors.
58
+ """
59
+ stats = {
60
+ "clusters_found": 0,
61
+ "consolidated": 0,
62
+ "facts_archived": 0,
63
+ "errors": 0,
64
+ "error_detail": "",
65
+ "mode": "a",
66
+ }
67
+
68
+ if config:
69
+ mode = getattr(config, 'mode', None)
70
+ if mode:
71
+ mode_str = getattr(mode, 'value', str(mode)).lower()
72
+ stats["mode"] = mode_str
73
+
74
+ conn = sqlite3.connect(str(db_path))
75
+ wal_mode = conn.execute("PRAGMA journal_mode=WAL").fetchone()
76
+ if wal_mode and wal_mode[0] != "wal":
77
+ logger.warning("WAL mode not active, got: %s", wal_mode[0])
78
+ conn.execute("PRAGMA busy_timeout=10000")
79
+ conn.row_factory = sqlite3.Row
80
+
81
+ try:
82
+ clusters = _find_consolidation_clusters(conn, profile_id, max_clusters)
83
+ stats["clusters_found"] = len(clusters)
84
+
85
+ for entity_id, entity_name, fact_ids in clusters:
86
+ try:
87
+ result = _consolidate_cluster(
88
+ conn, profile_id, entity_id, entity_name,
89
+ fact_ids, dry_run, config,
90
+ )
91
+ if result:
92
+ stats["consolidated"] += 1
93
+ stats["facts_archived"] += len(fact_ids)
94
+ except Exception as exc:
95
+ logger.warning(
96
+ "Consolidation failed for %s: %s",
97
+ entity_name, exc, exc_info=True,
98
+ )
99
+ stats["errors"] += 1
100
+
101
+ if not dry_run:
102
+ conn.commit()
103
+
104
+ if stats["consolidated"] > 0:
105
+ logger.info(
106
+ "Fact consolidation: %d clusters merged, %d facts archived",
107
+ stats["consolidated"], stats["facts_archived"],
108
+ )
109
+ except Exception as exc:
110
+ logger.error("Fact consolidation failed: %s", exc, exc_info=True)
111
+ stats["errors"] += 1
112
+ stats["error_detail"] = str(exc)
113
+ finally:
114
+ conn.close()
115
+
116
+ return stats
117
+
118
+
119
+ def _find_consolidation_clusters(
120
+ conn: sqlite3.Connection,
121
+ profile_id: str,
122
+ max_clusters: int,
123
+ ) -> list[tuple[str, str, list[str]]]:
124
+ """Find entities with clusters of warm/cold facts ready for consolidation.
125
+
126
+ Uses JSON-boundary quoting on entity_id to prevent substring false positives.
127
+ Both outer count and inner fact query are scoped to profile_id.
128
+ """
129
+ c = conn.cursor()
130
+
131
+ # Find entities with many non-active, non-pinned facts
132
+ # Uses '%" entity_id "%' pattern for JSON boundary matching
133
+ entities = c.execute("""
134
+ SELECT ce.entity_id, ce.canonical_name, COUNT(af.fact_id) as fact_count
135
+ FROM canonical_entities ce
136
+ JOIN atomic_facts af
137
+ ON af.canonical_entities_json LIKE '%"' || ce.entity_id || '"%'
138
+ AND af.profile_id = ?
139
+ WHERE ce.profile_id = ?
140
+ AND af.lifecycle IN ('warm', 'cold')
141
+ AND af.fact_id NOT IN (
142
+ SELECT fact_id FROM pinned_facts WHERE profile_id = ?
143
+ )
144
+ GROUP BY ce.entity_id
145
+ HAVING COUNT(af.fact_id) >= ?
146
+ ORDER BY COUNT(af.fact_id) DESC
147
+ LIMIT ?
148
+ """, (profile_id, profile_id, profile_id, _MIN_CLUSTER_SIZE,
149
+ max_clusters)).fetchall()
150
+
151
+ clusters = []
152
+ for entity in entities:
153
+ eid = entity["entity_id"]
154
+ facts = c.execute("""
155
+ SELECT af.fact_id FROM atomic_facts af
156
+ WHERE af.canonical_entities_json LIKE ?
157
+ AND af.profile_id = ?
158
+ AND af.lifecycle IN ('warm', 'cold')
159
+ AND af.fact_id NOT IN (
160
+ SELECT fact_id FROM pinned_facts WHERE profile_id = ?
161
+ )
162
+ ORDER BY af.confidence DESC, af.created_at DESC
163
+ LIMIT ?
164
+ """, (f'%"{eid}"%', profile_id, profile_id,
165
+ _MAX_CLUSTER_SIZE)).fetchall()
166
+
167
+ fact_ids = [f["fact_id"] for f in facts]
168
+ if len(fact_ids) >= _MIN_CLUSTER_SIZE:
169
+ clusters.append((eid, entity["canonical_name"], fact_ids))
170
+
171
+ return clusters
172
+
173
+
174
+ def _consolidate_cluster(
175
+ conn: sqlite3.Connection,
176
+ profile_id: str,
177
+ entity_id: str,
178
+ entity_name: str,
179
+ fact_ids: list[str],
180
+ dry_run: bool,
181
+ config: object | None = None,
182
+ ) -> dict | None:
183
+ """Merge a cluster of facts into one consolidated fact.
184
+
185
+ All writes are wrapped in a SAVEPOINT for atomicity — if any step fails,
186
+ the entire cluster consolidation is rolled back.
187
+ """
188
+ c = conn.cursor()
189
+
190
+ # Load fact contents including canonical_entities_json
191
+ placeholders = ",".join("?" * len(fact_ids))
192
+ facts = c.execute(
193
+ f"SELECT fact_id, content, confidence, created_at, canonical_entities_json "
194
+ f"FROM atomic_facts "
195
+ f"WHERE fact_id IN ({placeholders}) ORDER BY created_at",
196
+ fact_ids,
197
+ ).fetchall()
198
+
199
+ if len(facts) < _MIN_CLUSTER_SIZE:
200
+ return None
201
+
202
+ summary = _generate_summary(entity_name, facts, config)
203
+ if not summary:
204
+ return None
205
+
206
+ if dry_run:
207
+ return {"entity": entity_name, "facts": len(facts), "summary_len": len(summary)}
208
+
209
+ # Use SAVEPOINT for atomic multi-step write
210
+ savepoint_name = f"consolidate_{uuid.uuid4().hex[:8]}"
211
+ c.execute(f"SAVEPOINT {savepoint_name}")
212
+
213
+ try:
214
+ new_fact_id = uuid.uuid4().hex[:16]
215
+ now = datetime.now(timezone.utc).isoformat()
216
+ avg_confidence = sum(f["confidence"] or 0.5 for f in facts) / len(facts)
217
+
218
+ # Collect entities from ALL source facts (already in the SELECT)
219
+ all_entities = set()
220
+ raw_entities = set()
221
+ for f in facts:
222
+ cej = f["canonical_entities_json"]
223
+ if cej:
224
+ try:
225
+ all_entities.update(json.loads(cej))
226
+ except (json.JSONDecodeError, TypeError):
227
+ pass
228
+
229
+ c.execute("""
230
+ INSERT INTO atomic_facts
231
+ (fact_id, memory_id, profile_id, content, fact_type,
232
+ entities_json, canonical_entities_json,
233
+ confidence, importance, evidence_count, access_count,
234
+ created_at, lifecycle)
235
+ VALUES (?, '', ?, ?, 'semantic', ?, ?, ?, 0.8, ?, 0, ?, 'active')
236
+ """, (
237
+ new_fact_id, profile_id, summary,
238
+ json.dumps(list(all_entities)),
239
+ json.dumps(list(all_entities)),
240
+ round(avg_confidence, 3), len(facts), now,
241
+ ))
242
+
243
+ # Record the consolidation
244
+ consolidation_id = uuid.uuid4().hex[:16]
245
+ c.execute("""
246
+ INSERT INTO fact_consolidations
247
+ (consolidation_id, profile_id, consolidated_fact_id,
248
+ source_fact_ids, strategy, created_at)
249
+ VALUES (?, ?, ?, ?, 'entity_cluster', ?)
250
+ """, (consolidation_id, profile_id, new_fact_id,
251
+ json.dumps(fact_ids), now))
252
+
253
+ # Archive the original facts (NEVER delete) — scoped to profile_id
254
+ c.execute(
255
+ f"UPDATE atomic_facts SET lifecycle = 'archived' "
256
+ f"WHERE fact_id IN ({placeholders}) AND profile_id = ?",
257
+ (*fact_ids, profile_id),
258
+ )
259
+
260
+ c.execute(f"RELEASE SAVEPOINT {savepoint_name}")
261
+
262
+ except Exception:
263
+ c.execute(f"ROLLBACK TO SAVEPOINT {savepoint_name}")
264
+ raise
265
+
266
+ logger.info(
267
+ "Consolidated %d facts about '%s' → %s (%d chars)",
268
+ len(facts), entity_name, new_fact_id[:8], len(summary),
269
+ )
270
+
271
+ return {"entity": entity_name, "facts": len(facts), "new_fact_id": new_fact_id}
272
+
273
+
274
+ def _generate_summary(
275
+ entity_name: str,
276
+ facts: list,
277
+ config: object | None = None,
278
+ ) -> str | None:
279
+ """Generate a consolidated summary based on the user's configured mode.
280
+
281
+ All modes cap output at _MAX_CONSOLIDATED_CHARS.
282
+ """
283
+ mode = "a"
284
+ if config:
285
+ m = getattr(config, 'mode', None)
286
+ if m:
287
+ mode = getattr(m, 'value', str(m)).lower()
288
+
289
+ result = None
290
+
291
+ if mode == "a":
292
+ result = _summarize_extractive(entity_name, facts)
293
+ elif mode == "b":
294
+ result = _summarize_with_ollama(entity_name, facts, config)
295
+ if not result:
296
+ result = _summarize_extractive(entity_name, facts)
297
+ elif mode == "c":
298
+ result = _summarize_with_cloud_llm(entity_name, facts, config)
299
+ if not result:
300
+ result = _summarize_with_ollama(entity_name, facts, config)
301
+ if not result:
302
+ result = _summarize_extractive(entity_name, facts)
303
+ else:
304
+ result = _summarize_extractive(entity_name, facts)
305
+
306
+ # Uniform cap across all modes
307
+ if result and len(result) > _MAX_CONSOLIDATED_CHARS:
308
+ result = result[:_MAX_CONSOLIDATED_CHARS - 3] + "..."
309
+
310
+ return result
311
+
312
+
313
+ def _summarize_with_ollama(
314
+ entity_name: str,
315
+ facts: list,
316
+ config: object | None = None,
317
+ ) -> str | None:
318
+ """Mode B: Summarize using local Ollama LLM."""
319
+ try:
320
+ import urllib.request
321
+
322
+ api_base = "http://localhost:11434"
323
+ model = "llama3.2"
324
+ timeout = 30
325
+
326
+ if config and hasattr(config, 'llm'):
327
+ api_base = getattr(config.llm, 'api_base', api_base) or api_base
328
+ model = getattr(config.llm, 'model', model) or model
329
+ timeout = getattr(config.llm, 'timeout', timeout) or timeout
330
+
331
+ fact_texts = "\n".join(f"- {f['content']}" for f in facts[:_MAX_CLUSTER_SIZE])
332
+ prompt = (
333
+ f"Merge these {len(facts)} facts about '{entity_name}' into ONE concise "
334
+ f"summary paragraph. Keep all key information. Maximum 500 words. "
335
+ f"No preamble.\n\nFacts:\n{fact_texts}"
336
+ )
337
+
338
+ payload = json.dumps({
339
+ "model": model,
340
+ "prompt": prompt,
341
+ "stream": False,
342
+ "options": {"num_predict": 600},
343
+ }).encode()
344
+
345
+ req = urllib.request.Request(
346
+ f"{api_base}/api/generate",
347
+ data=payload,
348
+ headers={"Content-Type": "application/json"},
349
+ )
350
+ resp = urllib.request.urlopen(req, timeout=timeout)
351
+ result = json.loads(resp.read().decode())
352
+ text = result.get("response", "").strip()
353
+ return text if text and len(text) > 50 else None
354
+ except Exception as exc:
355
+ logger.warning("Ollama summarization failed: %s", exc)
356
+ return None
357
+
358
+
359
+ def _summarize_with_cloud_llm(
360
+ entity_name: str,
361
+ facts: list,
362
+ config: object | None = None,
363
+ ) -> str | None:
364
+ """Mode C: Summarize using the user's configured cloud LLM provider."""
365
+ if not config or not hasattr(config, 'llm'):
366
+ return None
367
+
368
+ llm_config = config.llm
369
+ provider = getattr(llm_config, 'provider', '')
370
+ if not provider:
371
+ return None
372
+
373
+ try:
374
+ from superlocalmemory.llm.backbone import LLMBackbone
375
+ llm = LLMBackbone(llm_config)
376
+ if not llm.is_available():
377
+ return None
378
+
379
+ fact_texts = "\n".join(f"- {f['content']}" for f in facts[:_MAX_CLUSTER_SIZE])
380
+ prompt = (
381
+ f"Merge these {len(facts)} facts about '{entity_name}' into ONE concise "
382
+ f"summary paragraph. Keep all key information. Maximum 500 words. "
383
+ f"No preamble.\n\nFacts:\n{fact_texts}"
384
+ )
385
+
386
+ response = llm.generate(
387
+ prompt=prompt,
388
+ system="You are a precise fact summarizer. Output only the merged summary.",
389
+ max_tokens=600,
390
+ temperature=0.1,
391
+ )
392
+ text = response.strip() if response else None
393
+ return text if text and len(text) > 50 else None
394
+ except Exception as exc:
395
+ logger.warning("Cloud LLM summarization failed: %s", exc)
396
+ return None
397
+
398
+
399
+ def _summarize_extractive(entity_name: str, facts: list) -> str:
400
+ """Extractive summary — all sentences from all facts, deduped.
401
+
402
+ Includes ALL sentences from each fact (not just the first one)
403
+ to preserve complete information.
404
+ """
405
+ header = f"{entity_name}: "
406
+ seen = set()
407
+ sentences = []
408
+
409
+ for f in facts:
410
+ content = f["content"]
411
+ # Split on sentence boundaries and include ALL sentences
412
+ raw_sentences = [s.strip() for s in content.split(". ") if s.strip()]
413
+ for sent in raw_sentences:
414
+ if not sent.endswith("."):
415
+ sent += "."
416
+ normalized = sent.lower()
417
+ if normalized not in seen:
418
+ seen.add(normalized)
419
+ sentences.append(sent)
420
+
421
+ body = " ".join(sentences)
422
+ result = header + body
423
+ if len(result) > _MAX_CONSOLIDATED_CHARS:
424
+ result = result[:_MAX_CONSOLIDATED_CHARS - 3] + "..."
425
+ return result