superlocalmemory 3.4.18 → 3.4.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (172) hide show
  1. package/CHANGELOG.md +35 -0
  2. package/README.md +42 -34
  3. package/bin/slm +11 -0
  4. package/bin/slm.bat +12 -0
  5. package/package.json +4 -3
  6. package/pyproject.toml +3 -2
  7. package/scripts/build-slm-hook.ps1 +40 -0
  8. package/scripts/build-slm-hook.sh +45 -0
  9. package/scripts/build_entry.py +452 -0
  10. package/scripts/ci/stage5b_gate.sh +50 -0
  11. package/scripts/postinstall/validation.js +187 -0
  12. package/scripts/postinstall-interactive.js +756 -0
  13. package/scripts/postinstall_binary.js +287 -0
  14. package/scripts/release_manifest.py +273 -0
  15. package/scripts/slm-hook.spec +56 -0
  16. package/skills/slm-build-graph/SKILL.md +423 -0
  17. package/skills/slm-list-recent/SKILL.md +348 -0
  18. package/skills/slm-recall/SKILL.md +343 -0
  19. package/skills/slm-remember/SKILL.md +194 -0
  20. package/skills/slm-show-patterns/SKILL.md +224 -0
  21. package/skills/slm-status/SKILL.md +363 -0
  22. package/skills/slm-switch-profile/SKILL.md +442 -0
  23. package/src/superlocalmemory/cli/commands.py +219 -79
  24. package/src/superlocalmemory/cli/context_commands.py +192 -0
  25. package/src/superlocalmemory/cli/daemon.py +15 -1
  26. package/src/superlocalmemory/cli/db_migrate.py +80 -0
  27. package/src/superlocalmemory/cli/escape_hatch.py +220 -0
  28. package/src/superlocalmemory/cli/main.py +72 -1
  29. package/src/superlocalmemory/core/context_cache.py +397 -0
  30. package/src/superlocalmemory/core/embeddings.py +8 -2
  31. package/src/superlocalmemory/core/engine.py +38 -2
  32. package/src/superlocalmemory/core/engine_wiring.py +1 -1
  33. package/src/superlocalmemory/core/ram_lock.py +111 -0
  34. package/src/superlocalmemory/core/recall_pipeline.py +433 -3
  35. package/src/superlocalmemory/core/recall_worker.py +8 -3
  36. package/src/superlocalmemory/core/security_primitives.py +635 -0
  37. package/src/superlocalmemory/core/shadow_router.py +319 -0
  38. package/src/superlocalmemory/core/slm_disabled.py +87 -0
  39. package/src/superlocalmemory/core/slmignore.py +125 -0
  40. package/src/superlocalmemory/core/topic_signature.py +143 -0
  41. package/src/superlocalmemory/core/worker_pool.py +14 -3
  42. package/src/superlocalmemory/encoding/cognitive_consolidator.py +2 -2
  43. package/src/superlocalmemory/evolution/budget.py +321 -0
  44. package/src/superlocalmemory/evolution/llm_dispatch.py +508 -0
  45. package/src/superlocalmemory/evolution/skill_evolver.py +144 -94
  46. package/src/superlocalmemory/hooks/_outcome_common.py +506 -0
  47. package/src/superlocalmemory/hooks/adapter_base.py +317 -0
  48. package/src/superlocalmemory/hooks/antigravity_adapter.py +192 -0
  49. package/src/superlocalmemory/hooks/claude_code_hooks.py +33 -1
  50. package/src/superlocalmemory/hooks/context_payload.py +312 -0
  51. package/src/superlocalmemory/hooks/copilot_adapter.py +154 -0
  52. package/src/superlocalmemory/hooks/cross_platform_connector.py +90 -0
  53. package/src/superlocalmemory/hooks/cursor_adapter.py +195 -0
  54. package/src/superlocalmemory/hooks/hook_handlers.py +109 -8
  55. package/src/superlocalmemory/hooks/ide_connector.py +25 -2
  56. package/src/superlocalmemory/hooks/post_tool_async_hook.py +165 -0
  57. package/src/superlocalmemory/hooks/post_tool_outcome_hook.py +223 -0
  58. package/src/superlocalmemory/hooks/prewarm_auth.py +170 -0
  59. package/src/superlocalmemory/hooks/session_registry.py +186 -0
  60. package/src/superlocalmemory/hooks/stop_outcome_hook.py +134 -0
  61. package/src/superlocalmemory/hooks/sync_loop.py +114 -0
  62. package/src/superlocalmemory/hooks/user_prompt_hook.py +128 -0
  63. package/src/superlocalmemory/hooks/user_prompt_rehash_hook.py +202 -0
  64. package/src/superlocalmemory/infra/backup.py +3 -3
  65. package/src/superlocalmemory/infra/cloud_backup.py +2 -2
  66. package/src/superlocalmemory/infra/event_bus.py +2 -2
  67. package/src/superlocalmemory/infra/webhook_dispatcher.py +3 -3
  68. package/src/superlocalmemory/learning/arm_catalog.py +99 -0
  69. package/src/superlocalmemory/learning/bandit.py +526 -0
  70. package/src/superlocalmemory/learning/bandit_cache.py +133 -0
  71. package/src/superlocalmemory/learning/behavioral.py +53 -1
  72. package/src/superlocalmemory/learning/consolidation_cycle.py +381 -0
  73. package/src/superlocalmemory/learning/consolidation_worker.py +188 -520
  74. package/src/superlocalmemory/learning/database.py +256 -0
  75. package/src/superlocalmemory/learning/dedup_hnsw.py +413 -0
  76. package/src/superlocalmemory/learning/ensemble.py +300 -0
  77. package/src/superlocalmemory/learning/fact_outcome_joins.py +207 -0
  78. package/src/superlocalmemory/learning/forgetting_scheduler.py +55 -0
  79. package/src/superlocalmemory/learning/hnsw_dedup.py +69 -0
  80. package/src/superlocalmemory/learning/labeler.py +87 -0
  81. package/src/superlocalmemory/learning/legacy_migration.py +277 -0
  82. package/src/superlocalmemory/learning/memory_merge.py +160 -0
  83. package/src/superlocalmemory/learning/model_cache.py +269 -0
  84. package/src/superlocalmemory/learning/model_rollback.py +278 -0
  85. package/src/superlocalmemory/learning/outcome_queue.py +284 -0
  86. package/src/superlocalmemory/learning/pattern_miner.py +415 -0
  87. package/src/superlocalmemory/learning/pattern_miner_constants.py +47 -0
  88. package/src/superlocalmemory/learning/ranker.py +225 -81
  89. package/src/superlocalmemory/learning/ranker_common.py +163 -0
  90. package/src/superlocalmemory/learning/ranker_retrain_legacy.py +202 -0
  91. package/src/superlocalmemory/learning/ranker_retrain_online.py +411 -0
  92. package/src/superlocalmemory/learning/reward.py +777 -0
  93. package/src/superlocalmemory/learning/reward_archive.py +210 -0
  94. package/src/superlocalmemory/learning/reward_boost.py +201 -0
  95. package/src/superlocalmemory/learning/reward_proxy.py +326 -0
  96. package/src/superlocalmemory/learning/shadow_test.py +524 -0
  97. package/src/superlocalmemory/learning/signal_worker.py +270 -0
  98. package/src/superlocalmemory/learning/signals.py +314 -0
  99. package/src/superlocalmemory/learning/trigram_index.py +547 -0
  100. package/src/superlocalmemory/mcp/server.py +5 -5
  101. package/src/superlocalmemory/mcp/tools_context.py +183 -0
  102. package/src/superlocalmemory/mcp/tools_core.py +92 -27
  103. package/src/superlocalmemory/parameterization/soft_prompt_generator.py +13 -0
  104. package/src/superlocalmemory/retrieval/engine.py +52 -0
  105. package/src/superlocalmemory/retrieval/reranker.py +4 -2
  106. package/src/superlocalmemory/server/api.py +2 -2
  107. package/src/superlocalmemory/server/bandit_loops.py +140 -0
  108. package/src/superlocalmemory/server/middleware/__init__.py +11 -0
  109. package/src/superlocalmemory/server/middleware/security_headers.py +144 -0
  110. package/src/superlocalmemory/server/routes/backup.py +36 -13
  111. package/src/superlocalmemory/server/routes/behavioral.py +50 -19
  112. package/src/superlocalmemory/server/routes/brain.py +1234 -0
  113. package/src/superlocalmemory/server/routes/data_io.py +4 -4
  114. package/src/superlocalmemory/server/routes/events.py +2 -2
  115. package/src/superlocalmemory/server/routes/helpers.py +1 -1
  116. package/src/superlocalmemory/server/routes/learning.py +192 -7
  117. package/src/superlocalmemory/server/routes/memories.py +189 -1
  118. package/src/superlocalmemory/server/routes/prewarm.py +171 -0
  119. package/src/superlocalmemory/server/routes/profiles.py +3 -3
  120. package/src/superlocalmemory/server/routes/token.py +88 -0
  121. package/src/superlocalmemory/server/routes/ws.py +5 -5
  122. package/src/superlocalmemory/server/security_middleware.py +13 -7
  123. package/src/superlocalmemory/server/ui.py +2 -2
  124. package/src/superlocalmemory/server/unified_daemon.py +335 -3
  125. package/src/superlocalmemory/storage/migration_runner.py +545 -0
  126. package/src/superlocalmemory/storage/migrations/M001_add_signal_features_columns.py +67 -0
  127. package/src/superlocalmemory/storage/migrations/M002_model_state_history.py +132 -0
  128. package/src/superlocalmemory/storage/migrations/M003_migration_log.py +38 -0
  129. package/src/superlocalmemory/storage/migrations/M004_cross_platform_sync_log.py +46 -0
  130. package/src/superlocalmemory/storage/migrations/M005_bandit_tables.py +75 -0
  131. package/src/superlocalmemory/storage/migrations/M006_action_outcomes_reward.py +75 -0
  132. package/src/superlocalmemory/storage/migrations/M007_pending_outcomes.py +63 -0
  133. package/src/superlocalmemory/storage/migrations/M009_model_lineage.py +54 -0
  134. package/src/superlocalmemory/storage/migrations/M010_evolution_config.py +75 -0
  135. package/src/superlocalmemory/storage/migrations/M011_archive_and_merge.py +87 -0
  136. package/src/superlocalmemory/storage/migrations/M012_shadow_observations.py +72 -0
  137. package/src/superlocalmemory/storage/migrations/M013_bi_temporal_columns.py +55 -0
  138. package/src/superlocalmemory/storage/migrations/__init__.py +81 -0
  139. package/src/superlocalmemory/storage/models.py +4 -0
  140. package/src/superlocalmemory/ui/css/brain.css +409 -0
  141. package/src/superlocalmemory/ui/css/legacy-dashboard.css +645 -0
  142. package/src/superlocalmemory/ui/index.html +459 -1345
  143. package/src/superlocalmemory/ui/js/brain.js +1321 -0
  144. package/src/superlocalmemory/ui/js/clusters.js +123 -4
  145. package/src/superlocalmemory/ui/js/init.js +48 -39
  146. package/src/superlocalmemory/ui/js/memories.js +88 -2
  147. package/src/superlocalmemory/ui/js/modal.js +71 -1
  148. package/src/superlocalmemory/ui/js/ng-shell.js +101 -88
  149. package/src/superlocalmemory/ui/js/trust-dashboard.js +168 -25
  150. package/src/superlocalmemory/ui/vendor/bootstrap-icons/bootstrap-icons.css +2018 -0
  151. package/src/superlocalmemory/ui/vendor/bootstrap-icons/fonts/bootstrap-icons.woff +0 -0
  152. package/src/superlocalmemory/ui/vendor/bootstrap-icons/fonts/bootstrap-icons.woff2 +0 -0
  153. package/src/superlocalmemory/ui/vendor/bootstrap.bundle.min.js +7 -0
  154. package/src/superlocalmemory/ui/vendor/bootstrap.min.css +6 -0
  155. package/src/superlocalmemory/ui/vendor/d3.v7.min.js +2 -0
  156. package/src/superlocalmemory/ui/vendor/graphology-library.min.js +2 -0
  157. package/src/superlocalmemory/ui/vendor/graphology.umd.min.js +2 -0
  158. package/src/superlocalmemory/ui/vendor/inter-ui/inter-variable.min.css +8 -0
  159. package/src/superlocalmemory/ui/vendor/inter-ui/variable/InterVariable-Italic.woff2 +0 -0
  160. package/src/superlocalmemory/ui/vendor/inter-ui/variable/InterVariable.woff2 +0 -0
  161. package/src/superlocalmemory/ui/vendor/sigma.min.js +1 -0
  162. package/src/superlocalmemory/ui/js/behavioral.js +0 -447
  163. package/src/superlocalmemory/ui/js/graph-core.js +0 -447
  164. package/src/superlocalmemory/ui/js/graph-interactions.js +0 -351
  165. package/src/superlocalmemory/ui/js/learning.js +0 -435
  166. package/src/superlocalmemory/ui/js/patterns.js +0 -93
  167. package/src/superlocalmemory.egg-info/PKG-INFO +0 -647
  168. package/src/superlocalmemory.egg-info/SOURCES.txt +0 -335
  169. package/src/superlocalmemory.egg-info/dependency_links.txt +0 -1
  170. package/src/superlocalmemory.egg-info/entry_points.txt +0 -2
  171. package/src/superlocalmemory.egg-info/requires.txt +0 -58
  172. package/src/superlocalmemory.egg-info/top_level.txt +0 -1
@@ -0,0 +1,87 @@
1
+ # Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
2
+ # Licensed under AGPL-3.0-or-later - see LICENSE file
3
+ # Part of SuperLocalMemory v3.4.21 — LLD-02 §4.7
4
+
5
+ """Integer-label mapping for LightGBM ``lambdarank`` training.
6
+
7
+ LLD reference: ``.backup/active-brain/lld/LLD-02-signal-pipeline-and-lightgbm.md``
8
+ Section 4.7 — single source of truth for outcome-reward / position → int label.
9
+
10
+ Rules:
11
+ - Labels are integers in ``[0, 4]`` (5 relevance tiers).
12
+ - ``label_gain`` length MUST be ``>= max(label) + 1``; we ship
13
+ ``label_gain=[0, 1, 3, 7, 15]`` (length 5).
14
+ - Prefer ``outcome_reward`` (v3.4.21) if present; else position proxy.
15
+ - ``NaN`` reward is treated as missing and falls through to position.
16
+ """
17
+
18
+ from __future__ import annotations
19
+
20
+ import math
21
+ from typing import Any
22
+
23
+ # Exactly five tiers — do not widen without updating ``label_gain`` callers.
24
+ _LABEL_GAIN: tuple[int, ...] = (0, 1, 3, 7, 15)
25
+
26
+
27
+ def label_gain() -> list[int]:
28
+ """Return the canonical ``label_gain`` list for LightGBM.
29
+
30
+ Length is ``max(label) + 1 = 5``. Must be passed verbatim to
31
+ ``lgb.train(params=..., label_gain=...)``.
32
+ """
33
+ return list(_LABEL_GAIN)
34
+
35
+
36
+ def _coerce_reward(raw: Any) -> float | None:
37
+ """Coerce a reward-ish input to float, rejecting None / NaN."""
38
+ if raw is None:
39
+ return None
40
+ try:
41
+ val = float(raw)
42
+ except (TypeError, ValueError):
43
+ return None
44
+ if math.isnan(val):
45
+ return None
46
+ return val
47
+
48
+
49
+ def label_for_row(row: dict) -> int:
50
+ """Map a training row to integer relevance in ``[0, 4]``.
51
+
52
+ Args:
53
+ row: Dict with optional ``outcome_reward`` (float in [0, 1]) and
54
+ ``position`` (int, 0-based rank at recall time).
55
+
56
+ Returns:
57
+ An integer label in ``[0, 4]``. Higher = more relevant.
58
+ """
59
+ reward = _coerce_reward(row.get("outcome_reward"))
60
+ if reward is not None:
61
+ if reward >= 0.90:
62
+ return 4
63
+ if reward >= 0.60:
64
+ return 3
65
+ if reward >= 0.30:
66
+ return 2
67
+ if reward > 0.00:
68
+ return 1
69
+ return 0
70
+
71
+ # 3.4.21 proxy: position (0 = best, higher = worse).
72
+ try:
73
+ pos = int(row.get("position", 99))
74
+ except (TypeError, ValueError):
75
+ return 0
76
+ if pos == 0:
77
+ return 4
78
+ if pos <= 2:
79
+ return 3
80
+ if pos <= 4:
81
+ return 2
82
+ if pos <= 9:
83
+ return 1
84
+ return 0
85
+
86
+
87
+ __all__ = ("label_for_row", "label_gain")
@@ -0,0 +1,277 @@
1
+ # Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
2
+ # Licensed under AGPL-3.0-or-later - see LICENSE file
3
+ # Part of SuperLocalMemory v3.4.21
4
+
5
+ """Legacy ``learning_feedback`` row migration — data movement only.
6
+
7
+ LLD reference: ``.backup/active-brain/lld/LLD-07-schema-migrations-and-security-primitives.md``
8
+ Section 5 (Legacy learning_feedback Migration).
9
+
10
+ Hard rule H15 (LLD-06 §10): this module MUST NOT contain any schema
11
+ DDL. All schema definitions live in
12
+ ``src/superlocalmemory/storage/migrations/`` modules. This file is
13
+ data-copy only — the tables it reads from and writes into are created
14
+ by the LLD-07 runner before this runs.
15
+
16
+ S8-ARC-01 fix: the earlier stub always returned ``copied=0`` while the
17
+ ``/api/v3/brain`` endpoint surfaced the pre-existing ``learning_feedback``
18
+ row count as ``legacy_migrated_count`` — a silent integrity lie. This
19
+ implementation does the real row copy and is idempotent (guarded by a
20
+ ``migration_log`` sentinel row under the name ``LEG001_feedback_to_signals``).
21
+ D5 (keep ``learning_feedback`` for one release) still holds — this
22
+ migration does NOT delete from ``learning_feedback``; it only inserts
23
+ into ``learning_signals`` + ``learning_features``.
24
+ """
25
+ from __future__ import annotations
26
+
27
+ import json
28
+ import logging
29
+ import sqlite3
30
+ from datetime import datetime, timezone
31
+ from pathlib import Path
32
+
33
+ logger = logging.getLogger(__name__)
34
+
35
+ # Marker used in migration_log to signal the data-copy has run.
36
+ MIGRATION_NAME = "LEG001_feedback_to_signals"
37
+
38
+ # Batch size for the row copy. 500 keeps the single transaction short
39
+ # enough that any concurrent SQLite reader only sees brief locks, while
40
+ # still amortising the per-row overhead.
41
+ _COPY_BATCH_SIZE = 500
42
+
43
+
44
+ def migrate_legacy_feedback(
45
+ learning_db: Path,
46
+ *,
47
+ dry_run: bool = False,
48
+ ) -> dict:
49
+ """Copy ``learning_feedback`` rows forward into LLD-02 tables.
50
+
51
+ Policy (LLD-07 §5 + D5 + D9):
52
+ * Idempotent: if ``migration_log`` already has ``LEG001_feedback_to_signals``
53
+ with status 'complete', returns a stats dict with ``already_done=True``.
54
+ * Preserves the original ``learning_feedback`` table (D5 — one release).
55
+ * Flags synthetic rows: ``learning_features.is_synthetic=1`` so the
56
+ LightGBM trainer in ``consolidation_worker._retrain_ranker`` can
57
+ exclude them from training (``WHERE is_synthetic=0``).
58
+ * Never raises; any sqlite3 error collapses into ``failed=N`` and a
59
+ WARN log so the daemon lifespan can continue.
60
+
61
+ Returns:
62
+ ``{"copied": int, "skipped": int, "failed": int,
63
+ "already_done": bool, "details": {...}}``
64
+ """
65
+ stats = {
66
+ "copied": 0, "skipped": 0, "failed": 0,
67
+ "already_done": False,
68
+ "details": {"name": MIGRATION_NAME, "dry_run": dry_run},
69
+ }
70
+ if not learning_db.exists():
71
+ stats["details"]["reason"] = "db_missing"
72
+ return stats
73
+
74
+ try:
75
+ conn = sqlite3.connect(str(learning_db), isolation_level=None, timeout=10.0)
76
+ conn.row_factory = sqlite3.Row
77
+ except sqlite3.Error as exc:
78
+ logger.warning("legacy migration: open failed: %s", exc)
79
+ stats["failed"] = 1
80
+ stats["details"]["reason"] = f"open_failed:{type(exc).__name__}"
81
+ return stats
82
+
83
+ try:
84
+ # Every required table must exist. If any is missing we defer to
85
+ # the migration runner (LLD-07 M001/M003) and record a skip.
86
+ if not _tables_exist(conn, ("migration_log", "learning_feedback",
87
+ "learning_signals", "learning_features")):
88
+ stats["details"]["reason"] = "required_tables_missing"
89
+ return stats
90
+
91
+ if _already_complete(conn, MIGRATION_NAME):
92
+ stats["already_done"] = True
93
+ stats["details"]["reason"] = "migration_log_complete"
94
+ return stats
95
+
96
+ # Row count of candidates up front so the stats dict can report
97
+ # progress even under a dry_run.
98
+ total = _count_feedback(conn)
99
+ stats["details"]["source_rows"] = total
100
+ if total == 0:
101
+ _record_migration(conn, MIGRATION_NAME,
102
+ status="complete", rows_affected=0,
103
+ dry_run=dry_run)
104
+ return stats
105
+
106
+ if dry_run:
107
+ stats["details"]["reason"] = "dry_run_noop"
108
+ stats["skipped"] = total
109
+ return stats
110
+
111
+ copied, failed = _copy_rows(conn)
112
+ stats["copied"] = copied
113
+ stats["failed"] = failed
114
+ _record_migration(conn, MIGRATION_NAME,
115
+ status="complete" if failed == 0 else "partial",
116
+ rows_affected=copied, dry_run=False)
117
+ return stats
118
+ finally:
119
+ try:
120
+ conn.close()
121
+ except sqlite3.Error: # pragma: no cover — defensive close
122
+ pass
123
+
124
+
125
+ # ---------------------------------------------------------------------------
126
+ # Helpers — parameterised SQL only; no DDL here per H15.
127
+ # ---------------------------------------------------------------------------
128
+
129
+
130
+ def _tables_exist(conn: sqlite3.Connection, names: tuple[str, ...]) -> bool:
131
+ rows = conn.execute(
132
+ "SELECT name FROM sqlite_master WHERE type='table' AND name IN "
133
+ "(" + ",".join("?" * len(names)) + ")",
134
+ names,
135
+ ).fetchall()
136
+ return len(rows) == len(names)
137
+
138
+
139
+ def _already_complete(conn: sqlite3.Connection, name: str) -> bool:
140
+ row = conn.execute(
141
+ "SELECT status FROM migration_log WHERE name = ?",
142
+ (name,),
143
+ ).fetchone()
144
+ return row is not None and str(row[0]).lower() == "complete"
145
+
146
+
147
+ def _count_feedback(conn: sqlite3.Connection) -> int:
148
+ row = conn.execute(
149
+ "SELECT COUNT(*) FROM learning_feedback",
150
+ ).fetchone()
151
+ return int(row[0]) if row else 0
152
+
153
+
154
+ def _copy_rows(conn: sqlite3.Connection) -> tuple[int, int]:
155
+ """Copy all ``learning_feedback`` rows forward.
156
+
157
+ Returns ``(copied, failed)``. Does not raise. Commits per batch so
158
+ a later failure still leaves the earlier batches durable.
159
+ """
160
+ copied = 0
161
+ failed = 0
162
+ offset = 0
163
+ while True:
164
+ try:
165
+ batch = conn.execute(
166
+ "SELECT id, profile_id, query_hash, fact_id, "
167
+ " signal_type, signal_value, created_at "
168
+ "FROM learning_feedback "
169
+ "ORDER BY id LIMIT ? OFFSET ?",
170
+ (_COPY_BATCH_SIZE, offset),
171
+ ).fetchall()
172
+ except sqlite3.Error as exc:
173
+ logger.warning("legacy migration: batch read failed: %s", exc)
174
+ failed += 1
175
+ break
176
+ if not batch:
177
+ break
178
+
179
+ try:
180
+ conn.execute("BEGIN IMMEDIATE")
181
+ for row in batch:
182
+ try:
183
+ _copy_single_row(conn, row)
184
+ copied += 1
185
+ except sqlite3.Error as exc:
186
+ logger.debug("legacy migration: row %s failed: %s",
187
+ row[0], exc)
188
+ failed += 1
189
+ conn.execute("COMMIT")
190
+ except sqlite3.Error as exc:
191
+ logger.warning("legacy migration: batch commit failed: %s", exc)
192
+ try:
193
+ conn.execute("ROLLBACK")
194
+ except sqlite3.Error: # pragma: no cover
195
+ pass
196
+ failed += len(batch)
197
+ offset += len(batch)
198
+
199
+ return copied, failed
200
+
201
+
202
+ def _copy_single_row(conn: sqlite3.Connection, row: sqlite3.Row) -> None:
203
+ """Insert one legacy row into learning_signals + learning_features.
204
+
205
+ Synthetic features are minimal (position from the legacy row if present,
206
+ zeros elsewhere). The LightGBM trainer filters these out via
207
+ ``is_synthetic=0``; the bandit/heuristic layer can still learn from
208
+ signal counts.
209
+
210
+ Note: ``learning_feedback.query_hash`` is already a privacy-hashed
211
+ digest (LLD-02 §4.1 S2), so we copy it forward as-is. Padding to
212
+ 32 chars keeps a stable shape when upstream hashes are shorter.
213
+ """
214
+ stored_hash = str(row["query_hash"] or "")
215
+ # Keep the hash at 32 hex chars for shape-stability with fresh signals.
216
+ query_hash = (stored_hash + ("0" * 32))[:32]
217
+ created_at = str(row["created_at"] or
218
+ datetime.now(timezone.utc).isoformat(timespec="seconds"))
219
+ profile_id = str(row["profile_id"] or "default")
220
+ fact_id = str(row["fact_id"] or "")
221
+ legacy_query_id = f"legacy:{row['id']}"
222
+
223
+ # Insert the signal row. ``signal_type='legacy_feedback'`` marks it
224
+ # clearly so consumers (dashboard, labeler) can treat it correctly.
225
+ cur = conn.execute(
226
+ "INSERT INTO learning_signals "
227
+ "(profile_id, query, fact_id, signal_type, value, created_at, "
228
+ " query_id, query_text_hash, position, channel_scores, cross_encoder) "
229
+ "VALUES (?, '', ?, 'legacy_feedback', ?, ?, ?, ?, 0, '{}', NULL)",
230
+ (profile_id, fact_id,
231
+ float(row["signal_value"] or 1.0),
232
+ created_at, legacy_query_id, query_hash),
233
+ )
234
+ sid = cur.lastrowid
235
+
236
+ # Synthetic 20-dim feature vector (zeros). The real FEATURE_NAMES
237
+ # come from ``learning.features`` at training time; we don't import
238
+ # it here to keep this module boot-order-independent.
239
+ conn.execute(
240
+ "INSERT INTO learning_features "
241
+ "(profile_id, query_id, fact_id, features_json, label, created_at, "
242
+ " signal_id, is_synthetic) "
243
+ "VALUES (?, ?, ?, '{}', 0.0, ?, ?, 1)",
244
+ (profile_id, legacy_query_id, fact_id, created_at, sid),
245
+ )
246
+
247
+
248
+ def _record_migration(
249
+ conn: sqlite3.Connection,
250
+ name: str,
251
+ *,
252
+ status: str,
253
+ rows_affected: int,
254
+ dry_run: bool,
255
+ ) -> None:
256
+ """Record migration status in ``migration_log``. No DDL here (H15).
257
+
258
+ The ``migration_log`` schema uses ``applied_at`` + ``ddl_sha256``
259
+ (see storage/migrations/M003_migration_log.py). This is a data-only
260
+ migration, so ``ddl_sha256`` is the empty string — the sentinel row
261
+ is for idempotency, not schema-drift detection.
262
+ """
263
+ applied_at = datetime.now(timezone.utc).isoformat(timespec="seconds")
264
+ if dry_run:
265
+ return
266
+ try:
267
+ conn.execute(
268
+ "INSERT OR REPLACE INTO migration_log "
269
+ "(name, applied_at, ddl_sha256, rows_affected, status) "
270
+ "VALUES (?, ?, '', ?, ?)",
271
+ (name, applied_at, rows_affected, status),
272
+ )
273
+ except sqlite3.Error as exc:
274
+ logger.warning("legacy migration: log record failed: %s", exc)
275
+
276
+
277
+ __all__ = ("migrate_legacy_feedback", "MIGRATION_NAME")
@@ -0,0 +1,160 @@
1
+ # Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
2
+ # Licensed under AGPL-3.0-or-later - see LICENSE file
3
+ # Part of SuperLocalMemory v3.4.21 — LLD-12 §2.3 + §1.4
4
+
5
+ """Reversible merge-log writer + unmerge for consolidation.
6
+
7
+ LLD-12 §1 hard invariant: consolidation NEVER issues
8
+ ``DELETE FROM atomic_facts``. Duplicates merge by:
9
+
10
+ 1. INSERT into ``memory_merge_log`` (canonical + merged fact_ids, scores,
11
+ timestamp, reversible flag).
12
+ 2. UPDATE ``atomic_facts`` SET archive_status='merged',
13
+ merged_into=<canonical>. Row stays; only status flips.
14
+
15
+ ``unmerge(merge_id)`` reverses the operation by flipping archive_status
16
+ back to 'live' and clearing merged_into. The log row is marked
17
+ ``reversible=0`` once reversed so a second unmerge is a no-op.
18
+
19
+ All operations run inside a single SQLite transaction with
20
+ ``busy_timeout=2000``. Partial failures roll back cleanly, leaving the
21
+ DB in its pre-merge state.
22
+ """
23
+
24
+ from __future__ import annotations
25
+
26
+ import logging
27
+ import sqlite3
28
+ import uuid
29
+ from datetime import datetime, timezone
30
+ from pathlib import Path
31
+
32
+ logger = logging.getLogger(__name__)
33
+
34
+
35
+ def _iso_now() -> str:
36
+ return datetime.now(timezone.utc).isoformat()
37
+
38
+
39
+ def apply_merges(
40
+ memory_db_path: str | Path,
41
+ candidates: list[tuple[str, str, float, float]],
42
+ *,
43
+ profile_id: str,
44
+ ) -> int:
45
+ """Apply merge candidates transactionally. Returns number applied.
46
+
47
+ Each candidate is ``(canonical_fact_id, merged_fact_id, cosine, jaccard)``.
48
+
49
+ Never deletes from ``atomic_facts``. Always writes a row to
50
+ ``memory_merge_log`` per applied merge.
51
+
52
+ Idempotent: if ``merged_fact_id`` is already ``archive_status='merged'``
53
+ from a prior run, the candidate is skipped (count not incremented).
54
+ """
55
+ if not candidates:
56
+ return 0
57
+
58
+ conn = sqlite3.connect(str(memory_db_path), timeout=10.0)
59
+ conn.execute("PRAGMA busy_timeout=2000")
60
+ applied = 0
61
+ # S-L02: track the candidate list in flight so a rollback diagnostic
62
+ # can blame the exact set of (canonical, merged) pairs instead of a
63
+ # blanket "rollback" message. Operators on the dashboard previously
64
+ # saw zero fidelity about which candidates were in the transaction
65
+ # at commit-time.
66
+ in_flight: list[tuple[str, str]] = []
67
+ try:
68
+ conn.execute("BEGIN IMMEDIATE")
69
+ for canonical_id, merged_id, cos, jac in candidates:
70
+ # Skip if already merged in a prior cycle.
71
+ row = conn.execute(
72
+ "SELECT archive_status FROM atomic_facts WHERE fact_id=?",
73
+ (merged_id,),
74
+ ).fetchone()
75
+ if row is None:
76
+ continue
77
+ if row[0] == "merged":
78
+ continue
79
+
80
+ conn.execute(
81
+ "INSERT INTO memory_merge_log "
82
+ "(merge_id, profile_id, canonical_fact_id, merged_fact_id, "
83
+ " cosine_sim, entity_jaccard, merged_at, reversible) "
84
+ "VALUES (?, ?, ?, ?, ?, ?, ?, 1)",
85
+ (
86
+ str(uuid.uuid4()),
87
+ profile_id,
88
+ canonical_id,
89
+ merged_id,
90
+ float(cos),
91
+ float(jac),
92
+ _iso_now(),
93
+ ),
94
+ )
95
+ conn.execute(
96
+ "UPDATE atomic_facts "
97
+ "SET archive_status='merged', "
98
+ " archive_reason='cosine_dup', "
99
+ " merged_into=? "
100
+ "WHERE fact_id=?",
101
+ (canonical_id, merged_id),
102
+ )
103
+ applied += 1
104
+ in_flight.append((canonical_id, merged_id))
105
+ conn.commit()
106
+ except sqlite3.Error as exc:
107
+ conn.rollback()
108
+ logger.warning(
109
+ "apply_merges rollback: profile=%s pre-rollback_applied=%d "
110
+ "in_flight=%s error=%s",
111
+ profile_id, applied, in_flight, exc,
112
+ )
113
+ applied = 0
114
+ finally:
115
+ conn.close()
116
+ return applied
117
+
118
+
119
+ def unmerge(memory_db_path: str | Path, merge_id: str) -> bool:
120
+ """Reverse a merge by merge_id. Returns True on success.
121
+
122
+ Flips the merged fact's archive_status back to 'live', clears
123
+ merged_into, and marks the log row ``reversible=0``.
124
+ """
125
+ conn = sqlite3.connect(str(memory_db_path), timeout=10.0)
126
+ conn.execute("PRAGMA busy_timeout=2000")
127
+ try:
128
+ row = conn.execute(
129
+ "SELECT merged_fact_id, reversible FROM memory_merge_log "
130
+ "WHERE merge_id=?",
131
+ (merge_id,),
132
+ ).fetchone()
133
+ if row is None:
134
+ return False
135
+ merged_fid, reversible = row
136
+ if not reversible:
137
+ return False
138
+
139
+ conn.execute("BEGIN IMMEDIATE")
140
+ conn.execute(
141
+ "UPDATE atomic_facts "
142
+ "SET archive_status='live', archive_reason=NULL, merged_into=NULL "
143
+ "WHERE fact_id=?",
144
+ (merged_fid,),
145
+ )
146
+ conn.execute(
147
+ "UPDATE memory_merge_log SET reversible=0 WHERE merge_id=?",
148
+ (merge_id,),
149
+ )
150
+ conn.commit()
151
+ return True
152
+ except sqlite3.Error as exc:
153
+ conn.rollback()
154
+ logger.warning("unmerge rollback: %s", exc)
155
+ return False
156
+ finally:
157
+ conn.close()
158
+
159
+
160
+ __all__ = ("apply_merges", "unmerge")