superlocalmemory 3.4.19 → 3.4.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (177) hide show
  1. package/CHANGELOG.md +24 -0
  2. package/README.md +42 -34
  3. package/bin/slm +11 -0
  4. package/bin/slm.bat +12 -0
  5. package/package.json +4 -3
  6. package/pyproject.toml +4 -3
  7. package/scripts/build-slm-hook.ps1 +40 -0
  8. package/scripts/build-slm-hook.sh +45 -0
  9. package/scripts/build_entry.py +452 -0
  10. package/scripts/ci/stage5b_gate.sh +50 -0
  11. package/scripts/postinstall/validation.js +187 -0
  12. package/scripts/postinstall-interactive.js +756 -0
  13. package/scripts/postinstall_binary.js +287 -0
  14. package/scripts/release_manifest.py +273 -0
  15. package/scripts/slm-hook.spec +56 -0
  16. package/skills/slm-build-graph/SKILL.md +423 -0
  17. package/skills/slm-list-recent/SKILL.md +348 -0
  18. package/skills/slm-recall/SKILL.md +343 -0
  19. package/skills/slm-remember/SKILL.md +194 -0
  20. package/skills/slm-show-patterns/SKILL.md +224 -0
  21. package/skills/slm-status/SKILL.md +363 -0
  22. package/skills/slm-switch-profile/SKILL.md +442 -0
  23. package/src/superlocalmemory/cli/commands.py +254 -79
  24. package/src/superlocalmemory/cli/context_commands.py +192 -0
  25. package/src/superlocalmemory/cli/daemon.py +15 -1
  26. package/src/superlocalmemory/cli/db_migrate.py +80 -0
  27. package/src/superlocalmemory/cli/escape_hatch.py +220 -0
  28. package/src/superlocalmemory/cli/main.py +72 -1
  29. package/src/superlocalmemory/core/context_cache.py +397 -0
  30. package/src/superlocalmemory/core/engine.py +38 -2
  31. package/src/superlocalmemory/core/engine_wiring.py +1 -1
  32. package/src/superlocalmemory/core/ram_lock.py +111 -0
  33. package/src/superlocalmemory/core/recall_pipeline.py +433 -3
  34. package/src/superlocalmemory/core/recall_worker.py +8 -3
  35. package/src/superlocalmemory/core/security_primitives.py +635 -0
  36. package/src/superlocalmemory/core/shadow_router.py +319 -0
  37. package/src/superlocalmemory/core/slm_disabled.py +87 -0
  38. package/src/superlocalmemory/core/slmignore.py +125 -0
  39. package/src/superlocalmemory/core/topic_signature.py +143 -0
  40. package/src/superlocalmemory/core/worker_pool.py +14 -3
  41. package/src/superlocalmemory/encoding/cognitive_consolidator.py +2 -2
  42. package/src/superlocalmemory/evolution/budget.py +321 -0
  43. package/src/superlocalmemory/evolution/llm_dispatch.py +508 -0
  44. package/src/superlocalmemory/evolution/skill_evolver.py +144 -94
  45. package/src/superlocalmemory/hooks/_outcome_common.py +506 -0
  46. package/src/superlocalmemory/hooks/adapter_base.py +317 -0
  47. package/src/superlocalmemory/hooks/antigravity_adapter.py +192 -0
  48. package/src/superlocalmemory/hooks/claude_code_hooks.py +33 -1
  49. package/src/superlocalmemory/hooks/context_payload.py +312 -0
  50. package/src/superlocalmemory/hooks/copilot_adapter.py +154 -0
  51. package/src/superlocalmemory/hooks/cross_platform_connector.py +90 -0
  52. package/src/superlocalmemory/hooks/cursor_adapter.py +195 -0
  53. package/src/superlocalmemory/hooks/hook_handlers.py +109 -8
  54. package/src/superlocalmemory/hooks/ide_connector.py +25 -2
  55. package/src/superlocalmemory/hooks/post_tool_async_hook.py +165 -0
  56. package/src/superlocalmemory/hooks/post_tool_outcome_hook.py +223 -0
  57. package/src/superlocalmemory/hooks/prewarm_auth.py +170 -0
  58. package/src/superlocalmemory/hooks/session_registry.py +186 -0
  59. package/src/superlocalmemory/hooks/stop_outcome_hook.py +134 -0
  60. package/src/superlocalmemory/hooks/sync_loop.py +114 -0
  61. package/src/superlocalmemory/hooks/user_prompt_hook.py +128 -0
  62. package/src/superlocalmemory/hooks/user_prompt_rehash_hook.py +202 -0
  63. package/src/superlocalmemory/infra/backup.py +3 -3
  64. package/src/superlocalmemory/infra/cloud_backup.py +2 -2
  65. package/src/superlocalmemory/infra/event_bus.py +2 -2
  66. package/src/superlocalmemory/infra/webhook_dispatcher.py +3 -3
  67. package/src/superlocalmemory/learning/arm_catalog.py +99 -0
  68. package/src/superlocalmemory/learning/bandit.py +526 -0
  69. package/src/superlocalmemory/learning/bandit_cache.py +133 -0
  70. package/src/superlocalmemory/learning/behavioral.py +53 -1
  71. package/src/superlocalmemory/learning/consolidation_cycle.py +381 -0
  72. package/src/superlocalmemory/learning/consolidation_worker.py +188 -520
  73. package/src/superlocalmemory/learning/database.py +256 -0
  74. package/src/superlocalmemory/learning/dedup_hnsw.py +413 -0
  75. package/src/superlocalmemory/learning/ensemble.py +300 -0
  76. package/src/superlocalmemory/learning/fact_outcome_joins.py +207 -0
  77. package/src/superlocalmemory/learning/forgetting_scheduler.py +55 -0
  78. package/src/superlocalmemory/learning/hnsw_dedup.py +69 -0
  79. package/src/superlocalmemory/learning/labeler.py +87 -0
  80. package/src/superlocalmemory/learning/legacy_migration.py +277 -0
  81. package/src/superlocalmemory/learning/memory_merge.py +160 -0
  82. package/src/superlocalmemory/learning/model_cache.py +269 -0
  83. package/src/superlocalmemory/learning/model_rollback.py +278 -0
  84. package/src/superlocalmemory/learning/outcome_queue.py +284 -0
  85. package/src/superlocalmemory/learning/pattern_miner.py +415 -0
  86. package/src/superlocalmemory/learning/pattern_miner_constants.py +47 -0
  87. package/src/superlocalmemory/learning/ranker.py +225 -81
  88. package/src/superlocalmemory/learning/ranker_common.py +163 -0
  89. package/src/superlocalmemory/learning/ranker_retrain_legacy.py +202 -0
  90. package/src/superlocalmemory/learning/ranker_retrain_online.py +411 -0
  91. package/src/superlocalmemory/learning/reward.py +777 -0
  92. package/src/superlocalmemory/learning/reward_archive.py +210 -0
  93. package/src/superlocalmemory/learning/reward_boost.py +201 -0
  94. package/src/superlocalmemory/learning/reward_proxy.py +326 -0
  95. package/src/superlocalmemory/learning/shadow_test.py +524 -0
  96. package/src/superlocalmemory/learning/signal_worker.py +270 -0
  97. package/src/superlocalmemory/learning/signals.py +314 -0
  98. package/src/superlocalmemory/learning/trigram_index.py +547 -0
  99. package/src/superlocalmemory/mcp/server.py +5 -5
  100. package/src/superlocalmemory/mcp/tools_context.py +183 -0
  101. package/src/superlocalmemory/mcp/tools_core.py +92 -27
  102. package/src/superlocalmemory/parameterization/soft_prompt_generator.py +13 -0
  103. package/src/superlocalmemory/retrieval/engine.py +52 -0
  104. package/src/superlocalmemory/server/api.py +2 -2
  105. package/src/superlocalmemory/server/bandit_loops.py +140 -0
  106. package/src/superlocalmemory/server/middleware/__init__.py +11 -0
  107. package/src/superlocalmemory/server/middleware/security_headers.py +144 -0
  108. package/src/superlocalmemory/server/routes/backup.py +36 -13
  109. package/src/superlocalmemory/server/routes/behavioral.py +50 -19
  110. package/src/superlocalmemory/server/routes/brain.py +1234 -0
  111. package/src/superlocalmemory/server/routes/data_io.py +4 -4
  112. package/src/superlocalmemory/server/routes/events.py +2 -2
  113. package/src/superlocalmemory/server/routes/helpers.py +1 -1
  114. package/src/superlocalmemory/server/routes/learning.py +192 -7
  115. package/src/superlocalmemory/server/routes/memories.py +189 -1
  116. package/src/superlocalmemory/server/routes/prewarm.py +171 -0
  117. package/src/superlocalmemory/server/routes/profiles.py +3 -3
  118. package/src/superlocalmemory/server/routes/token.py +88 -0
  119. package/src/superlocalmemory/server/routes/ws.py +5 -5
  120. package/src/superlocalmemory/server/security_middleware.py +13 -7
  121. package/src/superlocalmemory/server/ui.py +2 -2
  122. package/src/superlocalmemory/server/unified_daemon.py +335 -3
  123. package/src/superlocalmemory/skills/slm-build-graph/SKILL.md +423 -0
  124. package/src/superlocalmemory/skills/slm-list-recent/SKILL.md +348 -0
  125. package/src/superlocalmemory/skills/slm-recall/SKILL.md +343 -0
  126. package/src/superlocalmemory/skills/slm-remember/SKILL.md +194 -0
  127. package/src/superlocalmemory/skills/slm-show-patterns/SKILL.md +224 -0
  128. package/src/superlocalmemory/skills/slm-status/SKILL.md +363 -0
  129. package/src/superlocalmemory/skills/slm-switch-profile/SKILL.md +442 -0
  130. package/src/superlocalmemory/storage/migration_runner.py +545 -0
  131. package/src/superlocalmemory/storage/migrations/M001_add_signal_features_columns.py +67 -0
  132. package/src/superlocalmemory/storage/migrations/M002_model_state_history.py +132 -0
  133. package/src/superlocalmemory/storage/migrations/M003_migration_log.py +38 -0
  134. package/src/superlocalmemory/storage/migrations/M004_cross_platform_sync_log.py +46 -0
  135. package/src/superlocalmemory/storage/migrations/M005_bandit_tables.py +75 -0
  136. package/src/superlocalmemory/storage/migrations/M006_action_outcomes_reward.py +75 -0
  137. package/src/superlocalmemory/storage/migrations/M007_pending_outcomes.py +63 -0
  138. package/src/superlocalmemory/storage/migrations/M009_model_lineage.py +54 -0
  139. package/src/superlocalmemory/storage/migrations/M010_evolution_config.py +75 -0
  140. package/src/superlocalmemory/storage/migrations/M011_archive_and_merge.py +87 -0
  141. package/src/superlocalmemory/storage/migrations/M012_shadow_observations.py +72 -0
  142. package/src/superlocalmemory/storage/migrations/M013_bi_temporal_columns.py +55 -0
  143. package/src/superlocalmemory/storage/migrations/__init__.py +81 -0
  144. package/src/superlocalmemory/storage/models.py +4 -0
  145. package/src/superlocalmemory/ui/css/brain.css +409 -0
  146. package/src/superlocalmemory/ui/css/legacy-dashboard.css +645 -0
  147. package/src/superlocalmemory/ui/index.html +459 -1345
  148. package/src/superlocalmemory/ui/js/brain.js +1321 -0
  149. package/src/superlocalmemory/ui/js/clusters.js +123 -4
  150. package/src/superlocalmemory/ui/js/init.js +48 -39
  151. package/src/superlocalmemory/ui/js/memories.js +88 -2
  152. package/src/superlocalmemory/ui/js/modal.js +71 -1
  153. package/src/superlocalmemory/ui/js/ng-shell.js +101 -88
  154. package/src/superlocalmemory/ui/js/trust-dashboard.js +168 -25
  155. package/src/superlocalmemory/ui/vendor/bootstrap-icons/bootstrap-icons.css +2018 -0
  156. package/src/superlocalmemory/ui/vendor/bootstrap-icons/fonts/bootstrap-icons.woff +0 -0
  157. package/src/superlocalmemory/ui/vendor/bootstrap-icons/fonts/bootstrap-icons.woff2 +0 -0
  158. package/src/superlocalmemory/ui/vendor/bootstrap.bundle.min.js +7 -0
  159. package/src/superlocalmemory/ui/vendor/bootstrap.min.css +6 -0
  160. package/src/superlocalmemory/ui/vendor/d3.v7.min.js +2 -0
  161. package/src/superlocalmemory/ui/vendor/graphology-library.min.js +2 -0
  162. package/src/superlocalmemory/ui/vendor/graphology.umd.min.js +2 -0
  163. package/src/superlocalmemory/ui/vendor/inter-ui/inter-variable.min.css +8 -0
  164. package/src/superlocalmemory/ui/vendor/inter-ui/variable/InterVariable-Italic.woff2 +0 -0
  165. package/src/superlocalmemory/ui/vendor/inter-ui/variable/InterVariable.woff2 +0 -0
  166. package/src/superlocalmemory/ui/vendor/sigma.min.js +1 -0
  167. package/src/superlocalmemory/ui/js/behavioral.js +0 -447
  168. package/src/superlocalmemory/ui/js/graph-core.js +0 -447
  169. package/src/superlocalmemory/ui/js/graph-interactions.js +0 -351
  170. package/src/superlocalmemory/ui/js/learning.js +0 -435
  171. package/src/superlocalmemory/ui/js/patterns.js +0 -93
  172. package/src/superlocalmemory.egg-info/PKG-INFO +0 -647
  173. package/src/superlocalmemory.egg-info/SOURCES.txt +0 -335
  174. package/src/superlocalmemory.egg-info/dependency_links.txt +0 -1
  175. package/src/superlocalmemory.egg-info/entry_points.txt +0 -2
  176. package/src/superlocalmemory.egg-info/requires.txt +0 -58
  177. package/src/superlocalmemory.egg-info/top_level.txt +0 -1
@@ -0,0 +1,270 @@
1
+ # Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
2
+ # Licensed under AGPL-3.0-or-later - see LICENSE file
3
+ # Part of SuperLocalMemory v3.4.22 — LLD-02 §4.2
4
+
5
+ """Background signal drain worker.
6
+
7
+ LLD reference: ``.backup/active-brain/lld/LLD-02-signal-pipeline-and-lightgbm.md``
8
+ Section 4.2 — moves signal writes off the recall hot path.
9
+
10
+ Contract (hard rules, enforced by tests):
11
+ SW1 — Hot path never waits for disk.
12
+ SW2 — Drop + counter on full queue, never raise.
13
+ SW3 — Graceful flush ≤3 s on ``stop()``.
14
+ SW4 — Connection is thread-local; never shared across threads.
15
+ """
16
+
17
+ from __future__ import annotations
18
+
19
+ import logging
20
+ import queue
21
+ import sqlite3
22
+ import threading
23
+ import time
24
+ from pathlib import Path
25
+ from typing import Any
26
+
27
+ from superlocalmemory.learning import signals as _signals_mod
28
+ from superlocalmemory.learning.signals import (
29
+ SignalBatch,
30
+ bump_counter as _bump_counter,
31
+ get_queue as _signals_get_queue,
32
+ record_signal_batch,
33
+ )
34
+
35
+
36
+ def _current_queue() -> "queue.Queue[SignalBatch]":
37
+ """Resolve the queue through the public ``signals.get_queue()``
38
+ contract. Tests monkeypatching ``signals._Q`` still win because
39
+ ``get_queue`` reads the attribute dynamically via ``sys.modules``.
40
+ S8-ARC-03 (v3.4.22): no more private ``_Q`` reach-through.
41
+ """
42
+ return _signals_get_queue()
43
+
44
+ logger = logging.getLogger(__name__)
45
+
46
+
47
+ _DRAIN_BATCH_DEFAULT = 50
48
+ _DRAIN_INTERVAL_MS_DEFAULT = 250
49
+ _FLUSH_TIMEOUT_S_DEFAULT = 3.0
50
+
51
+
52
+ class SignalWorker:
53
+ """Background drainer for the module-level signal queue.
54
+
55
+ One instance per daemon. Creates its own thread and sqlite3 connection
56
+ on ``start()``; the connection is thread-local (SW4).
57
+ """
58
+
59
+ def __init__(
60
+ self,
61
+ learning_db: str | Path,
62
+ *,
63
+ batch_size: int = _DRAIN_BATCH_DEFAULT,
64
+ interval_ms: int = _DRAIN_INTERVAL_MS_DEFAULT,
65
+ ) -> None:
66
+ if batch_size <= 0:
67
+ raise ValueError("batch_size must be positive")
68
+ if interval_ms < 0:
69
+ raise ValueError("interval_ms must be >= 0")
70
+ self._db_path = str(learning_db)
71
+ self._batch_size = batch_size
72
+ self._interval_s = interval_ms / 1000.0
73
+ self._stop_event = threading.Event()
74
+ self._thread: threading.Thread | None = None
75
+ self._conn_thread_id: int | None = None
76
+
77
+ # --- public API ------------------------------------------------------
78
+
79
+ def start(self) -> None:
80
+ """Start the background drain thread (idempotent)."""
81
+ if self._thread is not None and self._thread.is_alive():
82
+ return
83
+ self._stop_event.clear()
84
+ t = threading.Thread(
85
+ target=self._run,
86
+ name="slm-signal-worker",
87
+ daemon=True,
88
+ )
89
+ self._thread = t
90
+ t.start()
91
+
92
+ def stop(self, *, timeout: float = _FLUSH_TIMEOUT_S_DEFAULT) -> int:
93
+ """Stop the worker, flushing pending batches up to ``timeout`` seconds.
94
+
95
+ Returns the number of batches dropped because they couldn't be flushed
96
+ before the timeout (SW3). Never raises.
97
+ """
98
+ if self._thread is None:
99
+ remaining = _drain_and_drop(log_prefix="no-thread")
100
+ return remaining
101
+
102
+ self._stop_event.set()
103
+ self._thread.join(timeout=max(0.0, timeout))
104
+
105
+ # After join, drain anything left and count it as drop-on-flush.
106
+ remaining = _drain_and_drop(log_prefix="post-join")
107
+ self._thread = None
108
+ return remaining
109
+
110
+ # --- internals -------------------------------------------------------
111
+
112
+ def _open_threadlocal_conn(self) -> sqlite3.Connection:
113
+ """Open the drain connection — SW4 threadlocal. Called once inside
114
+ the thread's run loop. The caller retains ownership and closes it
115
+ at shutdown.
116
+ """
117
+ conn = sqlite3.connect(
118
+ self._db_path,
119
+ isolation_level=None,
120
+ timeout=10,
121
+ check_same_thread=True,
122
+ )
123
+ conn.execute("PRAGMA journal_mode=WAL")
124
+ conn.execute("PRAGMA synchronous=NORMAL")
125
+ conn.execute("PRAGMA busy_timeout=10000")
126
+ conn.execute("PRAGMA foreign_keys=ON")
127
+ self._conn_thread_id = threading.get_ident()
128
+ return conn
129
+
130
+ def _run(self) -> None:
131
+ """Main loop: drain up to ``batch_size`` every ``interval_s``."""
132
+ try:
133
+ conn = self._open_threadlocal_conn()
134
+ except sqlite3.Error as exc: # pragma: no cover — DB unavailable
135
+ logger.error("signal_worker: cannot open DB: %s", exc)
136
+ return
137
+
138
+ try:
139
+ while not self._stop_event.is_set():
140
+ self._drain_once(conn)
141
+ if self._stop_event.wait(self._interval_s):
142
+ break
143
+ # Final drain on graceful stop.
144
+ self._drain_once(conn)
145
+ finally:
146
+ try:
147
+ conn.close()
148
+ except sqlite3.Error: # pragma: no cover
149
+ pass
150
+
151
+ def _drain_once(self, conn: sqlite3.Connection) -> int:
152
+ """Drain up to ``batch_size`` batches from the queue.
153
+
154
+ Returns the number of batches written. On OperationalError we retry
155
+ each batch up to 3 times with backoff; persistent failures are
156
+ dropped and counted (see LLD-02 §8 error matrix).
157
+ """
158
+ written = 0
159
+ for _ in range(self._batch_size):
160
+ try:
161
+ batch = _current_queue().get_nowait()
162
+ except queue.Empty:
163
+ break
164
+
165
+ ok = _write_with_retry(conn, batch)
166
+ if ok:
167
+ written += 1
168
+ else:
169
+ _bump_counter("signal_dropped_total")
170
+ return written
171
+
172
+
173
+ def _write_with_retry(
174
+ conn: sqlite3.Connection,
175
+ batch: SignalBatch,
176
+ *,
177
+ attempts: int = 3,
178
+ ) -> bool:
179
+ """Try to write a batch; retry on operational errors with backoff.
180
+
181
+ Returns True on success, False if dropped.
182
+ """
183
+ backoff_ms = 50
184
+ for attempt in range(1, attempts + 1):
185
+ try:
186
+ record_signal_batch(conn, batch)
187
+ return True
188
+ except sqlite3.OperationalError as exc:
189
+ logger.warning(
190
+ "signal_worker: write attempt %d failed: %s", attempt, exc,
191
+ )
192
+ if attempt == attempts:
193
+ return False
194
+ time.sleep(backoff_ms / 1000.0)
195
+ backoff_ms *= 2
196
+ except sqlite3.Error as exc: # pragma: no cover — defensive
197
+ logger.error("signal_worker: non-retriable error: %s", exc)
198
+ return False
199
+ except Exception as exc: # pragma: no cover — never propagate
200
+ logger.error("signal_worker: unexpected: %s", exc)
201
+ return False
202
+ return False # pragma: no cover — defensive
203
+
204
+
205
+ def _drain_and_drop(*, log_prefix: str = "") -> int:
206
+ """Drain remaining batches from the queue; count as drop-on-flush.
207
+
208
+ Used during shutdown when the worker could not flush in time (SW3).
209
+ """
210
+ remaining = 0
211
+ while True:
212
+ try:
213
+ _current_queue().get_nowait()
214
+ except queue.Empty:
215
+ break
216
+ remaining += 1
217
+ if remaining:
218
+ _bump_counter("signal_drop_on_flush_total", remaining)
219
+ logger.info(
220
+ "signal_worker: %s dropped %d unflushed batches",
221
+ log_prefix, remaining,
222
+ )
223
+ return remaining
224
+
225
+
226
+ # ---------------------------------------------------------------------------
227
+ # Module-level singleton helpers (S8-SK-01 integration): let the daemon
228
+ # start/stop one SignalWorker without knowing the class internals. Callers
229
+ # in ``unified_daemon.lifespan`` use ``start(learning_db)`` and ``stop()``.
230
+ # ---------------------------------------------------------------------------
231
+
232
+ _WORKER_SINGLETON: SignalWorker | None = None
233
+ _WORKER_LOCK = threading.Lock()
234
+
235
+
236
+ def start(learning_db: "str | Path", **kwargs) -> SignalWorker:
237
+ """Create-or-return the module-level SignalWorker; start its thread.
238
+
239
+ Idempotent. Safe to call twice — the existing instance is returned
240
+ and its ``start()`` is a no-op if the thread is already alive. The
241
+ daemon calls this from its lifespan once per process.
242
+ """
243
+ global _WORKER_SINGLETON
244
+ with _WORKER_LOCK:
245
+ if _WORKER_SINGLETON is None:
246
+ _WORKER_SINGLETON = SignalWorker(learning_db, **kwargs)
247
+ _WORKER_SINGLETON.start()
248
+ return _WORKER_SINGLETON
249
+
250
+
251
+ def stop(*, timeout: float = _FLUSH_TIMEOUT_S_DEFAULT) -> int:
252
+ """Stop the module-level SignalWorker (if any); returns drop count."""
253
+ global _WORKER_SINGLETON
254
+ with _WORKER_LOCK:
255
+ if _WORKER_SINGLETON is None:
256
+ return 0
257
+ worker = _WORKER_SINGLETON
258
+ dropped = worker.stop(timeout=timeout)
259
+ with _WORKER_LOCK:
260
+ _WORKER_SINGLETON = None
261
+ return dropped
262
+
263
+
264
+ def current() -> SignalWorker | None:
265
+ """Return the current singleton worker (or ``None``). TEST helper."""
266
+ with _WORKER_LOCK:
267
+ return _WORKER_SINGLETON
268
+
269
+
270
+ __all__ = ("SignalWorker", "start", "stop", "current")
@@ -19,16 +19,330 @@ Part of Qualixar | Author: Varun Pratap Bhardwaj
19
19
 
20
20
  from __future__ import annotations
21
21
 
22
+ import hashlib
23
+ import json
22
24
  import logging
23
25
  import math
26
+ import queue
24
27
  import sqlite3
25
28
  import threading
29
+ from dataclasses import dataclass, field
26
30
  from datetime import datetime, timezone
27
31
  from pathlib import Path
32
+ from typing import Any
28
33
 
29
34
  logger = logging.getLogger(__name__)
30
35
 
31
36
 
37
+ # ===========================================================================
38
+ # LLD-02 §4.1 — SignalBatch + enqueue + record_signal_batch
39
+ # ===========================================================================
40
+ #
41
+ # These module-level helpers are the v3.4.22 signal pipeline. The class
42
+ # ``LearningSignals`` below stays in place for v3.4.20 compatibility (D5);
43
+ # new writers go through ``enqueue`` / ``record_signal_batch``.
44
+ # ---------------------------------------------------------------------------
45
+
46
+
47
+ @dataclass(frozen=True)
48
+ class SignalCandidate:
49
+ """One candidate returned by the retrieval pipeline for signal recording.
50
+
51
+ Immutable (frozen=True). Carries the minimum needed to write both a
52
+ ``learning_signals`` row and a ``learning_features`` row in a single TX.
53
+ """
54
+
55
+ fact_id: str
56
+ channel_scores: dict[str, float] = field(default_factory=dict)
57
+ cross_encoder_score: float | None = None
58
+ # Full result dict — used by FeatureExtractor.extract(); kept lazily so we
59
+ # only serialise features at drain time, not enqueue time.
60
+ result_dict: dict[str, Any] = field(default_factory=dict)
61
+
62
+ def to_result_dict(self) -> dict[str, Any]:
63
+ """Return a result dict suitable for ``FeatureExtractor.extract()``.
64
+
65
+ Includes channel_scores and cross_encoder_score. Callers can override
66
+ by placing richer fields in ``result_dict`` at construction time.
67
+ """
68
+ merged: dict[str, Any] = {"fact_id": self.fact_id}
69
+ if self.channel_scores:
70
+ merged["channel_scores"] = dict(self.channel_scores)
71
+ if self.cross_encoder_score is not None:
72
+ merged["cross_encoder_score"] = self.cross_encoder_score
73
+ # Caller-provided fields override defaults.
74
+ merged.update(self.result_dict)
75
+ return merged
76
+
77
+
78
+ @dataclass(frozen=True)
79
+ class SignalBatch:
80
+ """One recall's worth of signal rows. Enqueued onto the worker."""
81
+
82
+ profile_id: str
83
+ query_id: str
84
+ query_text: str
85
+ candidates: tuple[SignalCandidate, ...] = field(default_factory=tuple)
86
+ query_context: dict[str, Any] = field(default_factory=dict)
87
+
88
+
89
+ # Module-level bounded queue — one per process. Sized per LLD-02 §9
90
+ # ``SLM_SIGNAL_QUEUE_MAX`` (default 5000). Readers are the signal_worker.
91
+ _QUEUE_MAX: int = 5000
92
+ _Q: "queue.Queue[SignalBatch]" = queue.Queue(maxsize=_QUEUE_MAX)
93
+
94
+ # Observability counters — module-level so tests can reset/inspect.
95
+ _counters: dict[str, int] = {
96
+ "signal_dropped_total": 0,
97
+ "signal_enqueued_total": 0,
98
+ "enqueue_failed_total": 0,
99
+ "signal_drop_on_flush_total": 0,
100
+ }
101
+ _counters_lock = threading.Lock()
102
+
103
+ # Throttle drop-warning logging to once per 60 seconds (LLD-02 §4.2).
104
+ _last_drop_log_ts: list[float] = [0.0]
105
+
106
+
107
+ def _bump(counter: str, n: int = 1) -> None:
108
+ with _counters_lock:
109
+ _counters[counter] = _counters.get(counter, 0) + n
110
+
111
+
112
+ # S8-ARC-03 (v3.4.22): public producer/consumer contract. ``signal_worker``
113
+ # used to reach through ``signals._Q`` and ``signals._bump`` by name,
114
+ # which made the private-by-convention boundary the actual test seam
115
+ # too. These wrappers are the sanctioned surface; ``_Q`` / ``_bump`` stay
116
+ # internal, and test-only helpers live on the ``_testing`` submodule.
117
+ def get_queue() -> "queue.Queue[SignalBatch]":
118
+ """Return the module-level producer queue (shared across threads)."""
119
+ import sys as _sys
120
+ # Tests may monkeypatch _Q by attribute — resolve dynamically.
121
+ return getattr(_sys.modules[__name__], "_Q", None) or _Q
122
+
123
+
124
+ def bump_counter(counter: str, n: int = 1) -> None:
125
+ """Public counter increment (identical semantics to internal ``_bump``)."""
126
+ _bump(counter, n)
127
+
128
+
129
+ def get_counters() -> dict[str, int]:
130
+ """Return a snapshot of signal pipeline counters."""
131
+ with _counters_lock:
132
+ return dict(_counters)
133
+
134
+
135
+ def reset_counters() -> None:
136
+ """Reset counters to zero — TEST-ONLY helper."""
137
+ with _counters_lock:
138
+ for k in _counters:
139
+ _counters[k] = 0
140
+ _last_drop_log_ts[0] = 0.0
141
+
142
+
143
+ def _drain_queue_for_tests() -> None:
144
+ """Drain the module queue — TEST-ONLY."""
145
+ while True:
146
+ try:
147
+ _Q.get_nowait()
148
+ except queue.Empty:
149
+ return
150
+
151
+
152
+ def queue_size() -> int:
153
+ """Return current queue depth — used by worker + tests."""
154
+ import sys as _sys
155
+ q = getattr(_sys.modules[__name__], "_Q", None) or _Q
156
+ return q.qsize()
157
+
158
+
159
+ def _hash_query(query_text: str) -> str:
160
+ """Compute ``query_text_hash`` per LLD-02 §4.1.
161
+
162
+ Lowercased, stripped, SHA-256 truncated to 32 hex chars. Stored in the
163
+ ``learning_signals.query_text_hash`` column. The raw ``query`` column
164
+ MUST stay empty (S2 privacy rule).
165
+ """
166
+ normalised = (query_text or "").lower().strip().encode("utf-8")
167
+ return hashlib.sha256(normalised).hexdigest()[:32]
168
+
169
+
170
+ def enqueue(batch: SignalBatch) -> None:
171
+ """Non-blocking enqueue of a SignalBatch.
172
+
173
+ Hot-path-safe: never raises, never blocks longer than a ``put_nowait``.
174
+ Drops with a counter bump if the queue is full (SW2).
175
+ Wraps ``queue.put_nowait`` exceptions (RP1 — never propagate).
176
+ """
177
+ import sys as _sys
178
+ import time as _time
179
+
180
+ if batch is None or not isinstance(batch, SignalBatch):
181
+ _bump("enqueue_failed_total")
182
+ return
183
+
184
+ # Resolve the queue through the module to honour monkeypatches in tests
185
+ # and future runtime reconfig. This is cheap — one dict lookup.
186
+ q = getattr(_sys.modules[__name__], "_Q", None) or _Q
187
+
188
+ try:
189
+ q.put_nowait(batch)
190
+ except queue.Full:
191
+ _bump("signal_dropped_total")
192
+ now = _time.monotonic()
193
+ if now - _last_drop_log_ts[0] >= 60.0:
194
+ _last_drop_log_ts[0] = now
195
+ logger.warning(
196
+ "signal queue full; dropped batch (total dropped=%d)",
197
+ get_counters()["signal_dropped_total"],
198
+ )
199
+ return
200
+ except Exception as exc: # pragma: no cover — defensive; never propagate.
201
+ _bump("enqueue_failed_total")
202
+ logger.debug("enqueue failed: %s", exc)
203
+ return
204
+
205
+ _bump("signal_enqueued_total")
206
+
207
+
208
+ def enqueue_shown_flip(query_id: str, fact_id: str, shown: bool) -> None:
209
+ """Record whether a candidate was shown to the user.
210
+
211
+ LLD-02 §4.9 — replaces the old fake-positive ``recall_hit`` emission.
212
+ Updates ``learning_signals.signal_type`` to ``'shown'`` (or
213
+ ``'not_shown'``) for an existing candidate row. Non-blocking;
214
+ defers the actual UPDATE to the signal_worker via a sentinel batch.
215
+ """
216
+ # Use a zero-candidate batch carrying the flip in ``query_context``.
217
+ batch = SignalBatch(
218
+ profile_id="",
219
+ query_id=query_id,
220
+ query_text="",
221
+ candidates=(),
222
+ query_context={
223
+ "_shown_flip": {"fact_id": fact_id, "shown": bool(shown)},
224
+ },
225
+ )
226
+ enqueue(batch)
227
+
228
+
229
+ def _apply_shown_flip(conn: sqlite3.Connection, batch: SignalBatch) -> None:
230
+ """Apply a shown-flip sentinel batch (see enqueue_shown_flip).
231
+
232
+ Updates the signal_type of matching ``(query_id, fact_id)`` rows.
233
+ Never invents reward data (S2 / M1 honesty rule).
234
+ """
235
+ flip = batch.query_context.get("_shown_flip") or {}
236
+ fact_id = flip.get("fact_id")
237
+ shown = bool(flip.get("shown", False))
238
+ if not fact_id or not batch.query_id:
239
+ return
240
+ new_type = "shown" if shown else "not_shown"
241
+ conn.execute(
242
+ "UPDATE learning_signals SET signal_type = ? "
243
+ "WHERE query_id = ? AND fact_id = ?",
244
+ (new_type, batch.query_id, fact_id),
245
+ )
246
+
247
+
248
+ def record_signal_batch(
249
+ conn: sqlite3.Connection, batch: SignalBatch,
250
+ ) -> list[int]:
251
+ """Synchronous write path used by the signal_worker drain.
252
+
253
+ Atomic (S1): signals+features INSERTs inside a single ``with conn:`` TX.
254
+ Privacy (S2): stores only ``query_text_hash``; ``query`` column is empty.
255
+ Handles the empty-candidate case (S3): returns ``[]`` with no side effect.
256
+
257
+ Args:
258
+ conn: sqlite3.Connection already configured (WAL, busy_timeout).
259
+ The caller owns the lifecycle.
260
+ batch: A ``SignalBatch``; if it carries a ``_shown_flip`` sentinel
261
+ the UPDATE path runs instead of the INSERT path.
262
+
263
+ Returns:
264
+ List of inserted ``learning_signals.id`` values in insert order.
265
+ Empty list if no candidates were present.
266
+ """
267
+ # Shown-flip path — LLD-02 §4.9.
268
+ if batch.query_context and "_shown_flip" in batch.query_context:
269
+ with conn: # implicit BEGIN/COMMIT
270
+ _apply_shown_flip(conn, batch)
271
+ return []
272
+
273
+ if not batch.candidates:
274
+ return []
275
+
276
+ # Import lazily — avoids a circular import at module load time.
277
+ from superlocalmemory.learning.features import FeatureExtractor
278
+
279
+ query_hash = _hash_query(batch.query_text)
280
+ now_iso = datetime.now(timezone.utc).isoformat(timespec="seconds")
281
+ signal_ids: list[int] = []
282
+
283
+ with conn: # BEGIN ... COMMIT on success, ROLLBACK on exception (S1).
284
+ for i, cand in enumerate(batch.candidates):
285
+ cur = conn.execute(
286
+ "INSERT INTO learning_signals "
287
+ "(profile_id, query, fact_id, signal_type, value, created_at, "
288
+ " query_id, query_text_hash, position, channel_scores, "
289
+ " cross_encoder) "
290
+ "VALUES (?, '', ?, ?, 1.0, ?, ?, ?, ?, ?, ?)",
291
+ (
292
+ batch.profile_id,
293
+ cand.fact_id,
294
+ "candidate",
295
+ now_iso,
296
+ batch.query_id,
297
+ query_hash,
298
+ i,
299
+ json.dumps(cand.channel_scores, separators=(",", ":")),
300
+ cand.cross_encoder_score,
301
+ ),
302
+ )
303
+ sid = cur.lastrowid
304
+ if sid is None: # pragma: no cover — should not occur.
305
+ raise sqlite3.OperationalError("no lastrowid from signal insert")
306
+
307
+ # PERF-v2-02: if ensemble_rerank already built features for this
308
+ # candidate (during the hot path), reuse them instead of calling
309
+ # FeatureExtractor.extract a second time. The reranker stashes a
310
+ # {fact_id: features_json_str} dict under a reserved key on
311
+ # ``query_context``. Cache miss falls through to extract.
312
+ fv_cache = batch.query_context.get(
313
+ "_precomputed_features_json", None,
314
+ ) if isinstance(batch.query_context, dict) else None
315
+ features_json_str: str
316
+ if isinstance(fv_cache, dict) and cand.fact_id in fv_cache:
317
+ raw = fv_cache[cand.fact_id]
318
+ features_json_str = raw if isinstance(raw, str) \
319
+ else json.dumps(raw, separators=(",", ":"))
320
+ else:
321
+ fv = FeatureExtractor.extract(
322
+ cand.to_result_dict(), batch.query_context,
323
+ ).features
324
+ features_json_str = json.dumps(fv, separators=(",", ":"))
325
+ # label column is NOT NULL REAL → use 0.0 sentinel (unlabeled).
326
+ # Real label comes from labeler.label_for_row at training time.
327
+ conn.execute(
328
+ "INSERT INTO learning_features "
329
+ "(profile_id, query_id, fact_id, features_json, label, "
330
+ " created_at, signal_id, is_synthetic) "
331
+ "VALUES (?, ?, ?, ?, 0.0, ?, ?, 0)",
332
+ (
333
+ batch.profile_id,
334
+ batch.query_id,
335
+ cand.fact_id,
336
+ features_json_str,
337
+ now_iso,
338
+ sid,
339
+ ),
340
+ )
341
+ signal_ids.append(sid)
342
+
343
+ return signal_ids
344
+
345
+
32
346
  class LearningSignals:
33
347
  """Compute and apply zero-cost learning signals.
34
348