superlocalmemory 3.4.18 → 3.4.21
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +35 -0
- package/README.md +42 -34
- package/bin/slm +11 -0
- package/bin/slm.bat +12 -0
- package/package.json +4 -3
- package/pyproject.toml +3 -2
- package/scripts/build-slm-hook.ps1 +40 -0
- package/scripts/build-slm-hook.sh +45 -0
- package/scripts/build_entry.py +452 -0
- package/scripts/ci/stage5b_gate.sh +50 -0
- package/scripts/postinstall/validation.js +187 -0
- package/scripts/postinstall-interactive.js +756 -0
- package/scripts/postinstall_binary.js +287 -0
- package/scripts/release_manifest.py +273 -0
- package/scripts/slm-hook.spec +56 -0
- package/skills/slm-build-graph/SKILL.md +423 -0
- package/skills/slm-list-recent/SKILL.md +348 -0
- package/skills/slm-recall/SKILL.md +343 -0
- package/skills/slm-remember/SKILL.md +194 -0
- package/skills/slm-show-patterns/SKILL.md +224 -0
- package/skills/slm-status/SKILL.md +363 -0
- package/skills/slm-switch-profile/SKILL.md +442 -0
- package/src/superlocalmemory/cli/commands.py +219 -79
- package/src/superlocalmemory/cli/context_commands.py +192 -0
- package/src/superlocalmemory/cli/daemon.py +15 -1
- package/src/superlocalmemory/cli/db_migrate.py +80 -0
- package/src/superlocalmemory/cli/escape_hatch.py +220 -0
- package/src/superlocalmemory/cli/main.py +72 -1
- package/src/superlocalmemory/core/context_cache.py +397 -0
- package/src/superlocalmemory/core/embeddings.py +8 -2
- package/src/superlocalmemory/core/engine.py +38 -2
- package/src/superlocalmemory/core/engine_wiring.py +1 -1
- package/src/superlocalmemory/core/ram_lock.py +111 -0
- package/src/superlocalmemory/core/recall_pipeline.py +433 -3
- package/src/superlocalmemory/core/recall_worker.py +8 -3
- package/src/superlocalmemory/core/security_primitives.py +635 -0
- package/src/superlocalmemory/core/shadow_router.py +319 -0
- package/src/superlocalmemory/core/slm_disabled.py +87 -0
- package/src/superlocalmemory/core/slmignore.py +125 -0
- package/src/superlocalmemory/core/topic_signature.py +143 -0
- package/src/superlocalmemory/core/worker_pool.py +14 -3
- package/src/superlocalmemory/encoding/cognitive_consolidator.py +2 -2
- package/src/superlocalmemory/evolution/budget.py +321 -0
- package/src/superlocalmemory/evolution/llm_dispatch.py +508 -0
- package/src/superlocalmemory/evolution/skill_evolver.py +144 -94
- package/src/superlocalmemory/hooks/_outcome_common.py +506 -0
- package/src/superlocalmemory/hooks/adapter_base.py +317 -0
- package/src/superlocalmemory/hooks/antigravity_adapter.py +192 -0
- package/src/superlocalmemory/hooks/claude_code_hooks.py +33 -1
- package/src/superlocalmemory/hooks/context_payload.py +312 -0
- package/src/superlocalmemory/hooks/copilot_adapter.py +154 -0
- package/src/superlocalmemory/hooks/cross_platform_connector.py +90 -0
- package/src/superlocalmemory/hooks/cursor_adapter.py +195 -0
- package/src/superlocalmemory/hooks/hook_handlers.py +109 -8
- package/src/superlocalmemory/hooks/ide_connector.py +25 -2
- package/src/superlocalmemory/hooks/post_tool_async_hook.py +165 -0
- package/src/superlocalmemory/hooks/post_tool_outcome_hook.py +223 -0
- package/src/superlocalmemory/hooks/prewarm_auth.py +170 -0
- package/src/superlocalmemory/hooks/session_registry.py +186 -0
- package/src/superlocalmemory/hooks/stop_outcome_hook.py +134 -0
- package/src/superlocalmemory/hooks/sync_loop.py +114 -0
- package/src/superlocalmemory/hooks/user_prompt_hook.py +128 -0
- package/src/superlocalmemory/hooks/user_prompt_rehash_hook.py +202 -0
- package/src/superlocalmemory/infra/backup.py +3 -3
- package/src/superlocalmemory/infra/cloud_backup.py +2 -2
- package/src/superlocalmemory/infra/event_bus.py +2 -2
- package/src/superlocalmemory/infra/webhook_dispatcher.py +3 -3
- package/src/superlocalmemory/learning/arm_catalog.py +99 -0
- package/src/superlocalmemory/learning/bandit.py +526 -0
- package/src/superlocalmemory/learning/bandit_cache.py +133 -0
- package/src/superlocalmemory/learning/behavioral.py +53 -1
- package/src/superlocalmemory/learning/consolidation_cycle.py +381 -0
- package/src/superlocalmemory/learning/consolidation_worker.py +188 -520
- package/src/superlocalmemory/learning/database.py +256 -0
- package/src/superlocalmemory/learning/dedup_hnsw.py +413 -0
- package/src/superlocalmemory/learning/ensemble.py +300 -0
- package/src/superlocalmemory/learning/fact_outcome_joins.py +207 -0
- package/src/superlocalmemory/learning/forgetting_scheduler.py +55 -0
- package/src/superlocalmemory/learning/hnsw_dedup.py +69 -0
- package/src/superlocalmemory/learning/labeler.py +87 -0
- package/src/superlocalmemory/learning/legacy_migration.py +277 -0
- package/src/superlocalmemory/learning/memory_merge.py +160 -0
- package/src/superlocalmemory/learning/model_cache.py +269 -0
- package/src/superlocalmemory/learning/model_rollback.py +278 -0
- package/src/superlocalmemory/learning/outcome_queue.py +284 -0
- package/src/superlocalmemory/learning/pattern_miner.py +415 -0
- package/src/superlocalmemory/learning/pattern_miner_constants.py +47 -0
- package/src/superlocalmemory/learning/ranker.py +225 -81
- package/src/superlocalmemory/learning/ranker_common.py +163 -0
- package/src/superlocalmemory/learning/ranker_retrain_legacy.py +202 -0
- package/src/superlocalmemory/learning/ranker_retrain_online.py +411 -0
- package/src/superlocalmemory/learning/reward.py +777 -0
- package/src/superlocalmemory/learning/reward_archive.py +210 -0
- package/src/superlocalmemory/learning/reward_boost.py +201 -0
- package/src/superlocalmemory/learning/reward_proxy.py +326 -0
- package/src/superlocalmemory/learning/shadow_test.py +524 -0
- package/src/superlocalmemory/learning/signal_worker.py +270 -0
- package/src/superlocalmemory/learning/signals.py +314 -0
- package/src/superlocalmemory/learning/trigram_index.py +547 -0
- package/src/superlocalmemory/mcp/server.py +5 -5
- package/src/superlocalmemory/mcp/tools_context.py +183 -0
- package/src/superlocalmemory/mcp/tools_core.py +92 -27
- package/src/superlocalmemory/parameterization/soft_prompt_generator.py +13 -0
- package/src/superlocalmemory/retrieval/engine.py +52 -0
- package/src/superlocalmemory/retrieval/reranker.py +4 -2
- package/src/superlocalmemory/server/api.py +2 -2
- package/src/superlocalmemory/server/bandit_loops.py +140 -0
- package/src/superlocalmemory/server/middleware/__init__.py +11 -0
- package/src/superlocalmemory/server/middleware/security_headers.py +144 -0
- package/src/superlocalmemory/server/routes/backup.py +36 -13
- package/src/superlocalmemory/server/routes/behavioral.py +50 -19
- package/src/superlocalmemory/server/routes/brain.py +1234 -0
- package/src/superlocalmemory/server/routes/data_io.py +4 -4
- package/src/superlocalmemory/server/routes/events.py +2 -2
- package/src/superlocalmemory/server/routes/helpers.py +1 -1
- package/src/superlocalmemory/server/routes/learning.py +192 -7
- package/src/superlocalmemory/server/routes/memories.py +189 -1
- package/src/superlocalmemory/server/routes/prewarm.py +171 -0
- package/src/superlocalmemory/server/routes/profiles.py +3 -3
- package/src/superlocalmemory/server/routes/token.py +88 -0
- package/src/superlocalmemory/server/routes/ws.py +5 -5
- package/src/superlocalmemory/server/security_middleware.py +13 -7
- package/src/superlocalmemory/server/ui.py +2 -2
- package/src/superlocalmemory/server/unified_daemon.py +335 -3
- package/src/superlocalmemory/storage/migration_runner.py +545 -0
- package/src/superlocalmemory/storage/migrations/M001_add_signal_features_columns.py +67 -0
- package/src/superlocalmemory/storage/migrations/M002_model_state_history.py +132 -0
- package/src/superlocalmemory/storage/migrations/M003_migration_log.py +38 -0
- package/src/superlocalmemory/storage/migrations/M004_cross_platform_sync_log.py +46 -0
- package/src/superlocalmemory/storage/migrations/M005_bandit_tables.py +75 -0
- package/src/superlocalmemory/storage/migrations/M006_action_outcomes_reward.py +75 -0
- package/src/superlocalmemory/storage/migrations/M007_pending_outcomes.py +63 -0
- package/src/superlocalmemory/storage/migrations/M009_model_lineage.py +54 -0
- package/src/superlocalmemory/storage/migrations/M010_evolution_config.py +75 -0
- package/src/superlocalmemory/storage/migrations/M011_archive_and_merge.py +87 -0
- package/src/superlocalmemory/storage/migrations/M012_shadow_observations.py +72 -0
- package/src/superlocalmemory/storage/migrations/M013_bi_temporal_columns.py +55 -0
- package/src/superlocalmemory/storage/migrations/__init__.py +81 -0
- package/src/superlocalmemory/storage/models.py +4 -0
- package/src/superlocalmemory/ui/css/brain.css +409 -0
- package/src/superlocalmemory/ui/css/legacy-dashboard.css +645 -0
- package/src/superlocalmemory/ui/index.html +459 -1345
- package/src/superlocalmemory/ui/js/brain.js +1321 -0
- package/src/superlocalmemory/ui/js/clusters.js +123 -4
- package/src/superlocalmemory/ui/js/init.js +48 -39
- package/src/superlocalmemory/ui/js/memories.js +88 -2
- package/src/superlocalmemory/ui/js/modal.js +71 -1
- package/src/superlocalmemory/ui/js/ng-shell.js +101 -88
- package/src/superlocalmemory/ui/js/trust-dashboard.js +168 -25
- package/src/superlocalmemory/ui/vendor/bootstrap-icons/bootstrap-icons.css +2018 -0
- package/src/superlocalmemory/ui/vendor/bootstrap-icons/fonts/bootstrap-icons.woff +0 -0
- package/src/superlocalmemory/ui/vendor/bootstrap-icons/fonts/bootstrap-icons.woff2 +0 -0
- package/src/superlocalmemory/ui/vendor/bootstrap.bundle.min.js +7 -0
- package/src/superlocalmemory/ui/vendor/bootstrap.min.css +6 -0
- package/src/superlocalmemory/ui/vendor/d3.v7.min.js +2 -0
- package/src/superlocalmemory/ui/vendor/graphology-library.min.js +2 -0
- package/src/superlocalmemory/ui/vendor/graphology.umd.min.js +2 -0
- package/src/superlocalmemory/ui/vendor/inter-ui/inter-variable.min.css +8 -0
- package/src/superlocalmemory/ui/vendor/inter-ui/variable/InterVariable-Italic.woff2 +0 -0
- package/src/superlocalmemory/ui/vendor/inter-ui/variable/InterVariable.woff2 +0 -0
- package/src/superlocalmemory/ui/vendor/sigma.min.js +1 -0
- package/src/superlocalmemory/ui/js/behavioral.js +0 -447
- package/src/superlocalmemory/ui/js/graph-core.js +0 -447
- package/src/superlocalmemory/ui/js/graph-interactions.js +0 -351
- package/src/superlocalmemory/ui/js/learning.js +0 -435
- package/src/superlocalmemory/ui/js/patterns.js +0 -93
- package/src/superlocalmemory.egg-info/PKG-INFO +0 -647
- package/src/superlocalmemory.egg-info/SOURCES.txt +0 -335
- package/src/superlocalmemory.egg-info/dependency_links.txt +0 -1
- package/src/superlocalmemory.egg-info/entry_points.txt +0 -2
- package/src/superlocalmemory.egg-info/requires.txt +0 -58
- package/src/superlocalmemory.egg-info/top_level.txt +0 -1
|
@@ -0,0 +1,270 @@
|
|
|
1
|
+
# Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
|
|
2
|
+
# Licensed under AGPL-3.0-or-later - see LICENSE file
|
|
3
|
+
# Part of SuperLocalMemory v3.4.21 — LLD-02 §4.2
|
|
4
|
+
|
|
5
|
+
"""Background signal drain worker.
|
|
6
|
+
|
|
7
|
+
LLD reference: ``.backup/active-brain/lld/LLD-02-signal-pipeline-and-lightgbm.md``
|
|
8
|
+
Section 4.2 — moves signal writes off the recall hot path.
|
|
9
|
+
|
|
10
|
+
Contract (hard rules, enforced by tests):
|
|
11
|
+
SW1 — Hot path never waits for disk.
|
|
12
|
+
SW2 — Drop + counter on full queue, never raise.
|
|
13
|
+
SW3 — Graceful flush ≤3 s on ``stop()``.
|
|
14
|
+
SW4 — Connection is thread-local; never shared across threads.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
from __future__ import annotations
|
|
18
|
+
|
|
19
|
+
import logging
|
|
20
|
+
import queue
|
|
21
|
+
import sqlite3
|
|
22
|
+
import threading
|
|
23
|
+
import time
|
|
24
|
+
from pathlib import Path
|
|
25
|
+
from typing import Any
|
|
26
|
+
|
|
27
|
+
from superlocalmemory.learning import signals as _signals_mod
|
|
28
|
+
from superlocalmemory.learning.signals import (
|
|
29
|
+
SignalBatch,
|
|
30
|
+
bump_counter as _bump_counter,
|
|
31
|
+
get_queue as _signals_get_queue,
|
|
32
|
+
record_signal_batch,
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _current_queue() -> "queue.Queue[SignalBatch]":
|
|
37
|
+
"""Resolve the queue through the public ``signals.get_queue()``
|
|
38
|
+
contract. Tests monkeypatching ``signals._Q`` still win because
|
|
39
|
+
``get_queue`` reads the attribute dynamically via ``sys.modules``.
|
|
40
|
+
S8-ARC-03 (v3.4.21): no more private ``_Q`` reach-through.
|
|
41
|
+
"""
|
|
42
|
+
return _signals_get_queue()
|
|
43
|
+
|
|
44
|
+
logger = logging.getLogger(__name__)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
_DRAIN_BATCH_DEFAULT = 50
|
|
48
|
+
_DRAIN_INTERVAL_MS_DEFAULT = 250
|
|
49
|
+
_FLUSH_TIMEOUT_S_DEFAULT = 3.0
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class SignalWorker:
|
|
53
|
+
"""Background drainer for the module-level signal queue.
|
|
54
|
+
|
|
55
|
+
One instance per daemon. Creates its own thread and sqlite3 connection
|
|
56
|
+
on ``start()``; the connection is thread-local (SW4).
|
|
57
|
+
"""
|
|
58
|
+
|
|
59
|
+
def __init__(
|
|
60
|
+
self,
|
|
61
|
+
learning_db: str | Path,
|
|
62
|
+
*,
|
|
63
|
+
batch_size: int = _DRAIN_BATCH_DEFAULT,
|
|
64
|
+
interval_ms: int = _DRAIN_INTERVAL_MS_DEFAULT,
|
|
65
|
+
) -> None:
|
|
66
|
+
if batch_size <= 0:
|
|
67
|
+
raise ValueError("batch_size must be positive")
|
|
68
|
+
if interval_ms < 0:
|
|
69
|
+
raise ValueError("interval_ms must be >= 0")
|
|
70
|
+
self._db_path = str(learning_db)
|
|
71
|
+
self._batch_size = batch_size
|
|
72
|
+
self._interval_s = interval_ms / 1000.0
|
|
73
|
+
self._stop_event = threading.Event()
|
|
74
|
+
self._thread: threading.Thread | None = None
|
|
75
|
+
self._conn_thread_id: int | None = None
|
|
76
|
+
|
|
77
|
+
# --- public API ------------------------------------------------------
|
|
78
|
+
|
|
79
|
+
def start(self) -> None:
|
|
80
|
+
"""Start the background drain thread (idempotent)."""
|
|
81
|
+
if self._thread is not None and self._thread.is_alive():
|
|
82
|
+
return
|
|
83
|
+
self._stop_event.clear()
|
|
84
|
+
t = threading.Thread(
|
|
85
|
+
target=self._run,
|
|
86
|
+
name="slm-signal-worker",
|
|
87
|
+
daemon=True,
|
|
88
|
+
)
|
|
89
|
+
self._thread = t
|
|
90
|
+
t.start()
|
|
91
|
+
|
|
92
|
+
def stop(self, *, timeout: float = _FLUSH_TIMEOUT_S_DEFAULT) -> int:
|
|
93
|
+
"""Stop the worker, flushing pending batches up to ``timeout`` seconds.
|
|
94
|
+
|
|
95
|
+
Returns the number of batches dropped because they couldn't be flushed
|
|
96
|
+
before the timeout (SW3). Never raises.
|
|
97
|
+
"""
|
|
98
|
+
if self._thread is None:
|
|
99
|
+
remaining = _drain_and_drop(log_prefix="no-thread")
|
|
100
|
+
return remaining
|
|
101
|
+
|
|
102
|
+
self._stop_event.set()
|
|
103
|
+
self._thread.join(timeout=max(0.0, timeout))
|
|
104
|
+
|
|
105
|
+
# After join, drain anything left and count it as drop-on-flush.
|
|
106
|
+
remaining = _drain_and_drop(log_prefix="post-join")
|
|
107
|
+
self._thread = None
|
|
108
|
+
return remaining
|
|
109
|
+
|
|
110
|
+
# --- internals -------------------------------------------------------
|
|
111
|
+
|
|
112
|
+
def _open_threadlocal_conn(self) -> sqlite3.Connection:
|
|
113
|
+
"""Open the drain connection — SW4 threadlocal. Called once inside
|
|
114
|
+
the thread's run loop. The caller retains ownership and closes it
|
|
115
|
+
at shutdown.
|
|
116
|
+
"""
|
|
117
|
+
conn = sqlite3.connect(
|
|
118
|
+
self._db_path,
|
|
119
|
+
isolation_level=None,
|
|
120
|
+
timeout=10,
|
|
121
|
+
check_same_thread=True,
|
|
122
|
+
)
|
|
123
|
+
conn.execute("PRAGMA journal_mode=WAL")
|
|
124
|
+
conn.execute("PRAGMA synchronous=NORMAL")
|
|
125
|
+
conn.execute("PRAGMA busy_timeout=10000")
|
|
126
|
+
conn.execute("PRAGMA foreign_keys=ON")
|
|
127
|
+
self._conn_thread_id = threading.get_ident()
|
|
128
|
+
return conn
|
|
129
|
+
|
|
130
|
+
def _run(self) -> None:
|
|
131
|
+
"""Main loop: drain up to ``batch_size`` every ``interval_s``."""
|
|
132
|
+
try:
|
|
133
|
+
conn = self._open_threadlocal_conn()
|
|
134
|
+
except sqlite3.Error as exc: # pragma: no cover — DB unavailable
|
|
135
|
+
logger.error("signal_worker: cannot open DB: %s", exc)
|
|
136
|
+
return
|
|
137
|
+
|
|
138
|
+
try:
|
|
139
|
+
while not self._stop_event.is_set():
|
|
140
|
+
self._drain_once(conn)
|
|
141
|
+
if self._stop_event.wait(self._interval_s):
|
|
142
|
+
break
|
|
143
|
+
# Final drain on graceful stop.
|
|
144
|
+
self._drain_once(conn)
|
|
145
|
+
finally:
|
|
146
|
+
try:
|
|
147
|
+
conn.close()
|
|
148
|
+
except sqlite3.Error: # pragma: no cover
|
|
149
|
+
pass
|
|
150
|
+
|
|
151
|
+
def _drain_once(self, conn: sqlite3.Connection) -> int:
|
|
152
|
+
"""Drain up to ``batch_size`` batches from the queue.
|
|
153
|
+
|
|
154
|
+
Returns the number of batches written. On OperationalError we retry
|
|
155
|
+
each batch up to 3 times with backoff; persistent failures are
|
|
156
|
+
dropped and counted (see LLD-02 §8 error matrix).
|
|
157
|
+
"""
|
|
158
|
+
written = 0
|
|
159
|
+
for _ in range(self._batch_size):
|
|
160
|
+
try:
|
|
161
|
+
batch = _current_queue().get_nowait()
|
|
162
|
+
except queue.Empty:
|
|
163
|
+
break
|
|
164
|
+
|
|
165
|
+
ok = _write_with_retry(conn, batch)
|
|
166
|
+
if ok:
|
|
167
|
+
written += 1
|
|
168
|
+
else:
|
|
169
|
+
_bump_counter("signal_dropped_total")
|
|
170
|
+
return written
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
def _write_with_retry(
|
|
174
|
+
conn: sqlite3.Connection,
|
|
175
|
+
batch: SignalBatch,
|
|
176
|
+
*,
|
|
177
|
+
attempts: int = 3,
|
|
178
|
+
) -> bool:
|
|
179
|
+
"""Try to write a batch; retry on operational errors with backoff.
|
|
180
|
+
|
|
181
|
+
Returns True on success, False if dropped.
|
|
182
|
+
"""
|
|
183
|
+
backoff_ms = 50
|
|
184
|
+
for attempt in range(1, attempts + 1):
|
|
185
|
+
try:
|
|
186
|
+
record_signal_batch(conn, batch)
|
|
187
|
+
return True
|
|
188
|
+
except sqlite3.OperationalError as exc:
|
|
189
|
+
logger.warning(
|
|
190
|
+
"signal_worker: write attempt %d failed: %s", attempt, exc,
|
|
191
|
+
)
|
|
192
|
+
if attempt == attempts:
|
|
193
|
+
return False
|
|
194
|
+
time.sleep(backoff_ms / 1000.0)
|
|
195
|
+
backoff_ms *= 2
|
|
196
|
+
except sqlite3.Error as exc: # pragma: no cover — defensive
|
|
197
|
+
logger.error("signal_worker: non-retriable error: %s", exc)
|
|
198
|
+
return False
|
|
199
|
+
except Exception as exc: # pragma: no cover — never propagate
|
|
200
|
+
logger.error("signal_worker: unexpected: %s", exc)
|
|
201
|
+
return False
|
|
202
|
+
return False # pragma: no cover — defensive
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
def _drain_and_drop(*, log_prefix: str = "") -> int:
|
|
206
|
+
"""Drain remaining batches from the queue; count as drop-on-flush.
|
|
207
|
+
|
|
208
|
+
Used during shutdown when the worker could not flush in time (SW3).
|
|
209
|
+
"""
|
|
210
|
+
remaining = 0
|
|
211
|
+
while True:
|
|
212
|
+
try:
|
|
213
|
+
_current_queue().get_nowait()
|
|
214
|
+
except queue.Empty:
|
|
215
|
+
break
|
|
216
|
+
remaining += 1
|
|
217
|
+
if remaining:
|
|
218
|
+
_bump_counter("signal_drop_on_flush_total", remaining)
|
|
219
|
+
logger.info(
|
|
220
|
+
"signal_worker: %s dropped %d unflushed batches",
|
|
221
|
+
log_prefix, remaining,
|
|
222
|
+
)
|
|
223
|
+
return remaining
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
# ---------------------------------------------------------------------------
|
|
227
|
+
# Module-level singleton helpers (S8-SK-01 integration): let the daemon
|
|
228
|
+
# start/stop one SignalWorker without knowing the class internals. Callers
|
|
229
|
+
# in ``unified_daemon.lifespan`` use ``start(learning_db)`` and ``stop()``.
|
|
230
|
+
# ---------------------------------------------------------------------------
|
|
231
|
+
|
|
232
|
+
_WORKER_SINGLETON: SignalWorker | None = None
|
|
233
|
+
_WORKER_LOCK = threading.Lock()
|
|
234
|
+
|
|
235
|
+
|
|
236
|
+
def start(learning_db: "str | Path", **kwargs) -> SignalWorker:
|
|
237
|
+
"""Create-or-return the module-level SignalWorker; start its thread.
|
|
238
|
+
|
|
239
|
+
Idempotent. Safe to call twice — the existing instance is returned
|
|
240
|
+
and its ``start()`` is a no-op if the thread is already alive. The
|
|
241
|
+
daemon calls this from its lifespan once per process.
|
|
242
|
+
"""
|
|
243
|
+
global _WORKER_SINGLETON
|
|
244
|
+
with _WORKER_LOCK:
|
|
245
|
+
if _WORKER_SINGLETON is None:
|
|
246
|
+
_WORKER_SINGLETON = SignalWorker(learning_db, **kwargs)
|
|
247
|
+
_WORKER_SINGLETON.start()
|
|
248
|
+
return _WORKER_SINGLETON
|
|
249
|
+
|
|
250
|
+
|
|
251
|
+
def stop(*, timeout: float = _FLUSH_TIMEOUT_S_DEFAULT) -> int:
|
|
252
|
+
"""Stop the module-level SignalWorker (if any); returns drop count."""
|
|
253
|
+
global _WORKER_SINGLETON
|
|
254
|
+
with _WORKER_LOCK:
|
|
255
|
+
if _WORKER_SINGLETON is None:
|
|
256
|
+
return 0
|
|
257
|
+
worker = _WORKER_SINGLETON
|
|
258
|
+
dropped = worker.stop(timeout=timeout)
|
|
259
|
+
with _WORKER_LOCK:
|
|
260
|
+
_WORKER_SINGLETON = None
|
|
261
|
+
return dropped
|
|
262
|
+
|
|
263
|
+
|
|
264
|
+
def current() -> SignalWorker | None:
|
|
265
|
+
"""Return the current singleton worker (or ``None``). TEST helper."""
|
|
266
|
+
with _WORKER_LOCK:
|
|
267
|
+
return _WORKER_SINGLETON
|
|
268
|
+
|
|
269
|
+
|
|
270
|
+
__all__ = ("SignalWorker", "start", "stop", "current")
|
|
@@ -19,16 +19,330 @@ Part of Qualixar | Author: Varun Pratap Bhardwaj
|
|
|
19
19
|
|
|
20
20
|
from __future__ import annotations
|
|
21
21
|
|
|
22
|
+
import hashlib
|
|
23
|
+
import json
|
|
22
24
|
import logging
|
|
23
25
|
import math
|
|
26
|
+
import queue
|
|
24
27
|
import sqlite3
|
|
25
28
|
import threading
|
|
29
|
+
from dataclasses import dataclass, field
|
|
26
30
|
from datetime import datetime, timezone
|
|
27
31
|
from pathlib import Path
|
|
32
|
+
from typing import Any
|
|
28
33
|
|
|
29
34
|
logger = logging.getLogger(__name__)
|
|
30
35
|
|
|
31
36
|
|
|
37
|
+
# ===========================================================================
|
|
38
|
+
# LLD-02 §4.1 — SignalBatch + enqueue + record_signal_batch
|
|
39
|
+
# ===========================================================================
|
|
40
|
+
#
|
|
41
|
+
# These module-level helpers are the v3.4.21 signal pipeline. The class
|
|
42
|
+
# ``LearningSignals`` below stays in place for v3.4.20 compatibility (D5);
|
|
43
|
+
# new writers go through ``enqueue`` / ``record_signal_batch``.
|
|
44
|
+
# ---------------------------------------------------------------------------
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
@dataclass(frozen=True)
|
|
48
|
+
class SignalCandidate:
|
|
49
|
+
"""One candidate returned by the retrieval pipeline for signal recording.
|
|
50
|
+
|
|
51
|
+
Immutable (frozen=True). Carries the minimum needed to write both a
|
|
52
|
+
``learning_signals`` row and a ``learning_features`` row in a single TX.
|
|
53
|
+
"""
|
|
54
|
+
|
|
55
|
+
fact_id: str
|
|
56
|
+
channel_scores: dict[str, float] = field(default_factory=dict)
|
|
57
|
+
cross_encoder_score: float | None = None
|
|
58
|
+
# Full result dict — used by FeatureExtractor.extract(); kept lazily so we
|
|
59
|
+
# only serialise features at drain time, not enqueue time.
|
|
60
|
+
result_dict: dict[str, Any] = field(default_factory=dict)
|
|
61
|
+
|
|
62
|
+
def to_result_dict(self) -> dict[str, Any]:
|
|
63
|
+
"""Return a result dict suitable for ``FeatureExtractor.extract()``.
|
|
64
|
+
|
|
65
|
+
Includes channel_scores and cross_encoder_score. Callers can override
|
|
66
|
+
by placing richer fields in ``result_dict`` at construction time.
|
|
67
|
+
"""
|
|
68
|
+
merged: dict[str, Any] = {"fact_id": self.fact_id}
|
|
69
|
+
if self.channel_scores:
|
|
70
|
+
merged["channel_scores"] = dict(self.channel_scores)
|
|
71
|
+
if self.cross_encoder_score is not None:
|
|
72
|
+
merged["cross_encoder_score"] = self.cross_encoder_score
|
|
73
|
+
# Caller-provided fields override defaults.
|
|
74
|
+
merged.update(self.result_dict)
|
|
75
|
+
return merged
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
@dataclass(frozen=True)
|
|
79
|
+
class SignalBatch:
|
|
80
|
+
"""One recall's worth of signal rows. Enqueued onto the worker."""
|
|
81
|
+
|
|
82
|
+
profile_id: str
|
|
83
|
+
query_id: str
|
|
84
|
+
query_text: str
|
|
85
|
+
candidates: tuple[SignalCandidate, ...] = field(default_factory=tuple)
|
|
86
|
+
query_context: dict[str, Any] = field(default_factory=dict)
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
# Module-level bounded queue — one per process. Sized per LLD-02 §9
|
|
90
|
+
# ``SLM_SIGNAL_QUEUE_MAX`` (default 5000). Readers are the signal_worker.
|
|
91
|
+
_QUEUE_MAX: int = 5000
|
|
92
|
+
_Q: "queue.Queue[SignalBatch]" = queue.Queue(maxsize=_QUEUE_MAX)
|
|
93
|
+
|
|
94
|
+
# Observability counters — module-level so tests can reset/inspect.
|
|
95
|
+
_counters: dict[str, int] = {
|
|
96
|
+
"signal_dropped_total": 0,
|
|
97
|
+
"signal_enqueued_total": 0,
|
|
98
|
+
"enqueue_failed_total": 0,
|
|
99
|
+
"signal_drop_on_flush_total": 0,
|
|
100
|
+
}
|
|
101
|
+
_counters_lock = threading.Lock()
|
|
102
|
+
|
|
103
|
+
# Throttle drop-warning logging to once per 60 seconds (LLD-02 §4.2).
|
|
104
|
+
_last_drop_log_ts: list[float] = [0.0]
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def _bump(counter: str, n: int = 1) -> None:
|
|
108
|
+
with _counters_lock:
|
|
109
|
+
_counters[counter] = _counters.get(counter, 0) + n
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
# S8-ARC-03 (v3.4.21): public producer/consumer contract. ``signal_worker``
|
|
113
|
+
# used to reach through ``signals._Q`` and ``signals._bump`` by name,
|
|
114
|
+
# which made the private-by-convention boundary the actual test seam
|
|
115
|
+
# too. These wrappers are the sanctioned surface; ``_Q`` / ``_bump`` stay
|
|
116
|
+
# internal, and test-only helpers live on the ``_testing`` submodule.
|
|
117
|
+
def get_queue() -> "queue.Queue[SignalBatch]":
|
|
118
|
+
"""Return the module-level producer queue (shared across threads)."""
|
|
119
|
+
import sys as _sys
|
|
120
|
+
# Tests may monkeypatch _Q by attribute — resolve dynamically.
|
|
121
|
+
return getattr(_sys.modules[__name__], "_Q", None) or _Q
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def bump_counter(counter: str, n: int = 1) -> None:
|
|
125
|
+
"""Public counter increment (identical semantics to internal ``_bump``)."""
|
|
126
|
+
_bump(counter, n)
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def get_counters() -> dict[str, int]:
|
|
130
|
+
"""Return a snapshot of signal pipeline counters."""
|
|
131
|
+
with _counters_lock:
|
|
132
|
+
return dict(_counters)
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def reset_counters() -> None:
|
|
136
|
+
"""Reset counters to zero — TEST-ONLY helper."""
|
|
137
|
+
with _counters_lock:
|
|
138
|
+
for k in _counters:
|
|
139
|
+
_counters[k] = 0
|
|
140
|
+
_last_drop_log_ts[0] = 0.0
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def _drain_queue_for_tests() -> None:
|
|
144
|
+
"""Drain the module queue — TEST-ONLY."""
|
|
145
|
+
while True:
|
|
146
|
+
try:
|
|
147
|
+
_Q.get_nowait()
|
|
148
|
+
except queue.Empty:
|
|
149
|
+
return
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
def queue_size() -> int:
|
|
153
|
+
"""Return current queue depth — used by worker + tests."""
|
|
154
|
+
import sys as _sys
|
|
155
|
+
q = getattr(_sys.modules[__name__], "_Q", None) or _Q
|
|
156
|
+
return q.qsize()
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
def _hash_query(query_text: str) -> str:
|
|
160
|
+
"""Compute ``query_text_hash`` per LLD-02 §4.1.
|
|
161
|
+
|
|
162
|
+
Lowercased, stripped, SHA-256 truncated to 32 hex chars. Stored in the
|
|
163
|
+
``learning_signals.query_text_hash`` column. The raw ``query`` column
|
|
164
|
+
MUST stay empty (S2 privacy rule).
|
|
165
|
+
"""
|
|
166
|
+
normalised = (query_text or "").lower().strip().encode("utf-8")
|
|
167
|
+
return hashlib.sha256(normalised).hexdigest()[:32]
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
def enqueue(batch: SignalBatch) -> None:
|
|
171
|
+
"""Non-blocking enqueue of a SignalBatch.
|
|
172
|
+
|
|
173
|
+
Hot-path-safe: never raises, never blocks longer than a ``put_nowait``.
|
|
174
|
+
Drops with a counter bump if the queue is full (SW2).
|
|
175
|
+
Wraps ``queue.put_nowait`` exceptions (RP1 — never propagate).
|
|
176
|
+
"""
|
|
177
|
+
import sys as _sys
|
|
178
|
+
import time as _time
|
|
179
|
+
|
|
180
|
+
if batch is None or not isinstance(batch, SignalBatch):
|
|
181
|
+
_bump("enqueue_failed_total")
|
|
182
|
+
return
|
|
183
|
+
|
|
184
|
+
# Resolve the queue through the module to honour monkeypatches in tests
|
|
185
|
+
# and future runtime reconfig. This is cheap — one dict lookup.
|
|
186
|
+
q = getattr(_sys.modules[__name__], "_Q", None) or _Q
|
|
187
|
+
|
|
188
|
+
try:
|
|
189
|
+
q.put_nowait(batch)
|
|
190
|
+
except queue.Full:
|
|
191
|
+
_bump("signal_dropped_total")
|
|
192
|
+
now = _time.monotonic()
|
|
193
|
+
if now - _last_drop_log_ts[0] >= 60.0:
|
|
194
|
+
_last_drop_log_ts[0] = now
|
|
195
|
+
logger.warning(
|
|
196
|
+
"signal queue full; dropped batch (total dropped=%d)",
|
|
197
|
+
get_counters()["signal_dropped_total"],
|
|
198
|
+
)
|
|
199
|
+
return
|
|
200
|
+
except Exception as exc: # pragma: no cover — defensive; never propagate.
|
|
201
|
+
_bump("enqueue_failed_total")
|
|
202
|
+
logger.debug("enqueue failed: %s", exc)
|
|
203
|
+
return
|
|
204
|
+
|
|
205
|
+
_bump("signal_enqueued_total")
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
def enqueue_shown_flip(query_id: str, fact_id: str, shown: bool) -> None:
|
|
209
|
+
"""Record whether a candidate was shown to the user.
|
|
210
|
+
|
|
211
|
+
LLD-02 §4.9 — replaces the old fake-positive ``recall_hit`` emission.
|
|
212
|
+
Updates ``learning_signals.signal_type`` to ``'shown'`` (or
|
|
213
|
+
``'not_shown'``) for an existing candidate row. Non-blocking;
|
|
214
|
+
defers the actual UPDATE to the signal_worker via a sentinel batch.
|
|
215
|
+
"""
|
|
216
|
+
# Use a zero-candidate batch carrying the flip in ``query_context``.
|
|
217
|
+
batch = SignalBatch(
|
|
218
|
+
profile_id="",
|
|
219
|
+
query_id=query_id,
|
|
220
|
+
query_text="",
|
|
221
|
+
candidates=(),
|
|
222
|
+
query_context={
|
|
223
|
+
"_shown_flip": {"fact_id": fact_id, "shown": bool(shown)},
|
|
224
|
+
},
|
|
225
|
+
)
|
|
226
|
+
enqueue(batch)
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
def _apply_shown_flip(conn: sqlite3.Connection, batch: SignalBatch) -> None:
|
|
230
|
+
"""Apply a shown-flip sentinel batch (see enqueue_shown_flip).
|
|
231
|
+
|
|
232
|
+
Updates the signal_type of matching ``(query_id, fact_id)`` rows.
|
|
233
|
+
Never invents reward data (S2 / M1 honesty rule).
|
|
234
|
+
"""
|
|
235
|
+
flip = batch.query_context.get("_shown_flip") or {}
|
|
236
|
+
fact_id = flip.get("fact_id")
|
|
237
|
+
shown = bool(flip.get("shown", False))
|
|
238
|
+
if not fact_id or not batch.query_id:
|
|
239
|
+
return
|
|
240
|
+
new_type = "shown" if shown else "not_shown"
|
|
241
|
+
conn.execute(
|
|
242
|
+
"UPDATE learning_signals SET signal_type = ? "
|
|
243
|
+
"WHERE query_id = ? AND fact_id = ?",
|
|
244
|
+
(new_type, batch.query_id, fact_id),
|
|
245
|
+
)
|
|
246
|
+
|
|
247
|
+
|
|
248
|
+
def record_signal_batch(
|
|
249
|
+
conn: sqlite3.Connection, batch: SignalBatch,
|
|
250
|
+
) -> list[int]:
|
|
251
|
+
"""Synchronous write path used by the signal_worker drain.
|
|
252
|
+
|
|
253
|
+
Atomic (S1): signals+features INSERTs inside a single ``with conn:`` TX.
|
|
254
|
+
Privacy (S2): stores only ``query_text_hash``; ``query`` column is empty.
|
|
255
|
+
Handles the empty-candidate case (S3): returns ``[]`` with no side effect.
|
|
256
|
+
|
|
257
|
+
Args:
|
|
258
|
+
conn: sqlite3.Connection already configured (WAL, busy_timeout).
|
|
259
|
+
The caller owns the lifecycle.
|
|
260
|
+
batch: A ``SignalBatch``; if it carries a ``_shown_flip`` sentinel
|
|
261
|
+
the UPDATE path runs instead of the INSERT path.
|
|
262
|
+
|
|
263
|
+
Returns:
|
|
264
|
+
List of inserted ``learning_signals.id`` values in insert order.
|
|
265
|
+
Empty list if no candidates were present.
|
|
266
|
+
"""
|
|
267
|
+
# Shown-flip path — LLD-02 §4.9.
|
|
268
|
+
if batch.query_context and "_shown_flip" in batch.query_context:
|
|
269
|
+
with conn: # implicit BEGIN/COMMIT
|
|
270
|
+
_apply_shown_flip(conn, batch)
|
|
271
|
+
return []
|
|
272
|
+
|
|
273
|
+
if not batch.candidates:
|
|
274
|
+
return []
|
|
275
|
+
|
|
276
|
+
# Import lazily — avoids a circular import at module load time.
|
|
277
|
+
from superlocalmemory.learning.features import FeatureExtractor
|
|
278
|
+
|
|
279
|
+
query_hash = _hash_query(batch.query_text)
|
|
280
|
+
now_iso = datetime.now(timezone.utc).isoformat(timespec="seconds")
|
|
281
|
+
signal_ids: list[int] = []
|
|
282
|
+
|
|
283
|
+
with conn: # BEGIN ... COMMIT on success, ROLLBACK on exception (S1).
|
|
284
|
+
for i, cand in enumerate(batch.candidates):
|
|
285
|
+
cur = conn.execute(
|
|
286
|
+
"INSERT INTO learning_signals "
|
|
287
|
+
"(profile_id, query, fact_id, signal_type, value, created_at, "
|
|
288
|
+
" query_id, query_text_hash, position, channel_scores, "
|
|
289
|
+
" cross_encoder) "
|
|
290
|
+
"VALUES (?, '', ?, ?, 1.0, ?, ?, ?, ?, ?, ?)",
|
|
291
|
+
(
|
|
292
|
+
batch.profile_id,
|
|
293
|
+
cand.fact_id,
|
|
294
|
+
"candidate",
|
|
295
|
+
now_iso,
|
|
296
|
+
batch.query_id,
|
|
297
|
+
query_hash,
|
|
298
|
+
i,
|
|
299
|
+
json.dumps(cand.channel_scores, separators=(",", ":")),
|
|
300
|
+
cand.cross_encoder_score,
|
|
301
|
+
),
|
|
302
|
+
)
|
|
303
|
+
sid = cur.lastrowid
|
|
304
|
+
if sid is None: # pragma: no cover — should not occur.
|
|
305
|
+
raise sqlite3.OperationalError("no lastrowid from signal insert")
|
|
306
|
+
|
|
307
|
+
# PERF-v2-02: if ensemble_rerank already built features for this
|
|
308
|
+
# candidate (during the hot path), reuse them instead of calling
|
|
309
|
+
# FeatureExtractor.extract a second time. The reranker stashes a
|
|
310
|
+
# {fact_id: features_json_str} dict under a reserved key on
|
|
311
|
+
# ``query_context``. Cache miss falls through to extract.
|
|
312
|
+
fv_cache = batch.query_context.get(
|
|
313
|
+
"_precomputed_features_json", None,
|
|
314
|
+
) if isinstance(batch.query_context, dict) else None
|
|
315
|
+
features_json_str: str
|
|
316
|
+
if isinstance(fv_cache, dict) and cand.fact_id in fv_cache:
|
|
317
|
+
raw = fv_cache[cand.fact_id]
|
|
318
|
+
features_json_str = raw if isinstance(raw, str) \
|
|
319
|
+
else json.dumps(raw, separators=(",", ":"))
|
|
320
|
+
else:
|
|
321
|
+
fv = FeatureExtractor.extract(
|
|
322
|
+
cand.to_result_dict(), batch.query_context,
|
|
323
|
+
).features
|
|
324
|
+
features_json_str = json.dumps(fv, separators=(",", ":"))
|
|
325
|
+
# label column is NOT NULL REAL → use 0.0 sentinel (unlabeled).
|
|
326
|
+
# Real label comes from labeler.label_for_row at training time.
|
|
327
|
+
conn.execute(
|
|
328
|
+
"INSERT INTO learning_features "
|
|
329
|
+
"(profile_id, query_id, fact_id, features_json, label, "
|
|
330
|
+
" created_at, signal_id, is_synthetic) "
|
|
331
|
+
"VALUES (?, ?, ?, ?, 0.0, ?, ?, 0)",
|
|
332
|
+
(
|
|
333
|
+
batch.profile_id,
|
|
334
|
+
batch.query_id,
|
|
335
|
+
cand.fact_id,
|
|
336
|
+
features_json_str,
|
|
337
|
+
now_iso,
|
|
338
|
+
sid,
|
|
339
|
+
),
|
|
340
|
+
)
|
|
341
|
+
signal_ids.append(sid)
|
|
342
|
+
|
|
343
|
+
return signal_ids
|
|
344
|
+
|
|
345
|
+
|
|
32
346
|
class LearningSignals:
|
|
33
347
|
"""Compute and apply zero-cost learning signals.
|
|
34
348
|
|