superlocalmemory 3.4.19 → 3.4.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (177) hide show
  1. package/CHANGELOG.md +24 -0
  2. package/README.md +42 -34
  3. package/bin/slm +11 -0
  4. package/bin/slm.bat +12 -0
  5. package/package.json +4 -3
  6. package/pyproject.toml +4 -3
  7. package/scripts/build-slm-hook.ps1 +40 -0
  8. package/scripts/build-slm-hook.sh +45 -0
  9. package/scripts/build_entry.py +452 -0
  10. package/scripts/ci/stage5b_gate.sh +50 -0
  11. package/scripts/postinstall/validation.js +187 -0
  12. package/scripts/postinstall-interactive.js +756 -0
  13. package/scripts/postinstall_binary.js +287 -0
  14. package/scripts/release_manifest.py +273 -0
  15. package/scripts/slm-hook.spec +56 -0
  16. package/skills/slm-build-graph/SKILL.md +423 -0
  17. package/skills/slm-list-recent/SKILL.md +348 -0
  18. package/skills/slm-recall/SKILL.md +343 -0
  19. package/skills/slm-remember/SKILL.md +194 -0
  20. package/skills/slm-show-patterns/SKILL.md +224 -0
  21. package/skills/slm-status/SKILL.md +363 -0
  22. package/skills/slm-switch-profile/SKILL.md +442 -0
  23. package/src/superlocalmemory/cli/commands.py +254 -79
  24. package/src/superlocalmemory/cli/context_commands.py +192 -0
  25. package/src/superlocalmemory/cli/daemon.py +15 -1
  26. package/src/superlocalmemory/cli/db_migrate.py +80 -0
  27. package/src/superlocalmemory/cli/escape_hatch.py +220 -0
  28. package/src/superlocalmemory/cli/main.py +72 -1
  29. package/src/superlocalmemory/core/context_cache.py +397 -0
  30. package/src/superlocalmemory/core/engine.py +38 -2
  31. package/src/superlocalmemory/core/engine_wiring.py +1 -1
  32. package/src/superlocalmemory/core/ram_lock.py +111 -0
  33. package/src/superlocalmemory/core/recall_pipeline.py +433 -3
  34. package/src/superlocalmemory/core/recall_worker.py +8 -3
  35. package/src/superlocalmemory/core/security_primitives.py +635 -0
  36. package/src/superlocalmemory/core/shadow_router.py +319 -0
  37. package/src/superlocalmemory/core/slm_disabled.py +87 -0
  38. package/src/superlocalmemory/core/slmignore.py +125 -0
  39. package/src/superlocalmemory/core/topic_signature.py +143 -0
  40. package/src/superlocalmemory/core/worker_pool.py +14 -3
  41. package/src/superlocalmemory/encoding/cognitive_consolidator.py +2 -2
  42. package/src/superlocalmemory/evolution/budget.py +321 -0
  43. package/src/superlocalmemory/evolution/llm_dispatch.py +508 -0
  44. package/src/superlocalmemory/evolution/skill_evolver.py +144 -94
  45. package/src/superlocalmemory/hooks/_outcome_common.py +506 -0
  46. package/src/superlocalmemory/hooks/adapter_base.py +317 -0
  47. package/src/superlocalmemory/hooks/antigravity_adapter.py +192 -0
  48. package/src/superlocalmemory/hooks/claude_code_hooks.py +33 -1
  49. package/src/superlocalmemory/hooks/context_payload.py +312 -0
  50. package/src/superlocalmemory/hooks/copilot_adapter.py +154 -0
  51. package/src/superlocalmemory/hooks/cross_platform_connector.py +90 -0
  52. package/src/superlocalmemory/hooks/cursor_adapter.py +195 -0
  53. package/src/superlocalmemory/hooks/hook_handlers.py +109 -8
  54. package/src/superlocalmemory/hooks/ide_connector.py +25 -2
  55. package/src/superlocalmemory/hooks/post_tool_async_hook.py +165 -0
  56. package/src/superlocalmemory/hooks/post_tool_outcome_hook.py +223 -0
  57. package/src/superlocalmemory/hooks/prewarm_auth.py +170 -0
  58. package/src/superlocalmemory/hooks/session_registry.py +186 -0
  59. package/src/superlocalmemory/hooks/stop_outcome_hook.py +134 -0
  60. package/src/superlocalmemory/hooks/sync_loop.py +114 -0
  61. package/src/superlocalmemory/hooks/user_prompt_hook.py +128 -0
  62. package/src/superlocalmemory/hooks/user_prompt_rehash_hook.py +202 -0
  63. package/src/superlocalmemory/infra/backup.py +3 -3
  64. package/src/superlocalmemory/infra/cloud_backup.py +2 -2
  65. package/src/superlocalmemory/infra/event_bus.py +2 -2
  66. package/src/superlocalmemory/infra/webhook_dispatcher.py +3 -3
  67. package/src/superlocalmemory/learning/arm_catalog.py +99 -0
  68. package/src/superlocalmemory/learning/bandit.py +526 -0
  69. package/src/superlocalmemory/learning/bandit_cache.py +133 -0
  70. package/src/superlocalmemory/learning/behavioral.py +53 -1
  71. package/src/superlocalmemory/learning/consolidation_cycle.py +381 -0
  72. package/src/superlocalmemory/learning/consolidation_worker.py +188 -520
  73. package/src/superlocalmemory/learning/database.py +256 -0
  74. package/src/superlocalmemory/learning/dedup_hnsw.py +413 -0
  75. package/src/superlocalmemory/learning/ensemble.py +300 -0
  76. package/src/superlocalmemory/learning/fact_outcome_joins.py +207 -0
  77. package/src/superlocalmemory/learning/forgetting_scheduler.py +55 -0
  78. package/src/superlocalmemory/learning/hnsw_dedup.py +69 -0
  79. package/src/superlocalmemory/learning/labeler.py +87 -0
  80. package/src/superlocalmemory/learning/legacy_migration.py +277 -0
  81. package/src/superlocalmemory/learning/memory_merge.py +160 -0
  82. package/src/superlocalmemory/learning/model_cache.py +269 -0
  83. package/src/superlocalmemory/learning/model_rollback.py +278 -0
  84. package/src/superlocalmemory/learning/outcome_queue.py +284 -0
  85. package/src/superlocalmemory/learning/pattern_miner.py +415 -0
  86. package/src/superlocalmemory/learning/pattern_miner_constants.py +47 -0
  87. package/src/superlocalmemory/learning/ranker.py +225 -81
  88. package/src/superlocalmemory/learning/ranker_common.py +163 -0
  89. package/src/superlocalmemory/learning/ranker_retrain_legacy.py +202 -0
  90. package/src/superlocalmemory/learning/ranker_retrain_online.py +411 -0
  91. package/src/superlocalmemory/learning/reward.py +777 -0
  92. package/src/superlocalmemory/learning/reward_archive.py +210 -0
  93. package/src/superlocalmemory/learning/reward_boost.py +201 -0
  94. package/src/superlocalmemory/learning/reward_proxy.py +326 -0
  95. package/src/superlocalmemory/learning/shadow_test.py +524 -0
  96. package/src/superlocalmemory/learning/signal_worker.py +270 -0
  97. package/src/superlocalmemory/learning/signals.py +314 -0
  98. package/src/superlocalmemory/learning/trigram_index.py +547 -0
  99. package/src/superlocalmemory/mcp/server.py +5 -5
  100. package/src/superlocalmemory/mcp/tools_context.py +183 -0
  101. package/src/superlocalmemory/mcp/tools_core.py +92 -27
  102. package/src/superlocalmemory/parameterization/soft_prompt_generator.py +13 -0
  103. package/src/superlocalmemory/retrieval/engine.py +52 -0
  104. package/src/superlocalmemory/server/api.py +2 -2
  105. package/src/superlocalmemory/server/bandit_loops.py +140 -0
  106. package/src/superlocalmemory/server/middleware/__init__.py +11 -0
  107. package/src/superlocalmemory/server/middleware/security_headers.py +144 -0
  108. package/src/superlocalmemory/server/routes/backup.py +36 -13
  109. package/src/superlocalmemory/server/routes/behavioral.py +50 -19
  110. package/src/superlocalmemory/server/routes/brain.py +1234 -0
  111. package/src/superlocalmemory/server/routes/data_io.py +4 -4
  112. package/src/superlocalmemory/server/routes/events.py +2 -2
  113. package/src/superlocalmemory/server/routes/helpers.py +1 -1
  114. package/src/superlocalmemory/server/routes/learning.py +192 -7
  115. package/src/superlocalmemory/server/routes/memories.py +189 -1
  116. package/src/superlocalmemory/server/routes/prewarm.py +171 -0
  117. package/src/superlocalmemory/server/routes/profiles.py +3 -3
  118. package/src/superlocalmemory/server/routes/token.py +88 -0
  119. package/src/superlocalmemory/server/routes/ws.py +5 -5
  120. package/src/superlocalmemory/server/security_middleware.py +13 -7
  121. package/src/superlocalmemory/server/ui.py +2 -2
  122. package/src/superlocalmemory/server/unified_daemon.py +335 -3
  123. package/src/superlocalmemory/skills/slm-build-graph/SKILL.md +423 -0
  124. package/src/superlocalmemory/skills/slm-list-recent/SKILL.md +348 -0
  125. package/src/superlocalmemory/skills/slm-recall/SKILL.md +343 -0
  126. package/src/superlocalmemory/skills/slm-remember/SKILL.md +194 -0
  127. package/src/superlocalmemory/skills/slm-show-patterns/SKILL.md +224 -0
  128. package/src/superlocalmemory/skills/slm-status/SKILL.md +363 -0
  129. package/src/superlocalmemory/skills/slm-switch-profile/SKILL.md +442 -0
  130. package/src/superlocalmemory/storage/migration_runner.py +545 -0
  131. package/src/superlocalmemory/storage/migrations/M001_add_signal_features_columns.py +67 -0
  132. package/src/superlocalmemory/storage/migrations/M002_model_state_history.py +132 -0
  133. package/src/superlocalmemory/storage/migrations/M003_migration_log.py +38 -0
  134. package/src/superlocalmemory/storage/migrations/M004_cross_platform_sync_log.py +46 -0
  135. package/src/superlocalmemory/storage/migrations/M005_bandit_tables.py +75 -0
  136. package/src/superlocalmemory/storage/migrations/M006_action_outcomes_reward.py +75 -0
  137. package/src/superlocalmemory/storage/migrations/M007_pending_outcomes.py +63 -0
  138. package/src/superlocalmemory/storage/migrations/M009_model_lineage.py +54 -0
  139. package/src/superlocalmemory/storage/migrations/M010_evolution_config.py +75 -0
  140. package/src/superlocalmemory/storage/migrations/M011_archive_and_merge.py +87 -0
  141. package/src/superlocalmemory/storage/migrations/M012_shadow_observations.py +72 -0
  142. package/src/superlocalmemory/storage/migrations/M013_bi_temporal_columns.py +55 -0
  143. package/src/superlocalmemory/storage/migrations/__init__.py +81 -0
  144. package/src/superlocalmemory/storage/models.py +4 -0
  145. package/src/superlocalmemory/ui/css/brain.css +409 -0
  146. package/src/superlocalmemory/ui/css/legacy-dashboard.css +645 -0
  147. package/src/superlocalmemory/ui/index.html +459 -1345
  148. package/src/superlocalmemory/ui/js/brain.js +1321 -0
  149. package/src/superlocalmemory/ui/js/clusters.js +123 -4
  150. package/src/superlocalmemory/ui/js/init.js +48 -39
  151. package/src/superlocalmemory/ui/js/memories.js +88 -2
  152. package/src/superlocalmemory/ui/js/modal.js +71 -1
  153. package/src/superlocalmemory/ui/js/ng-shell.js +101 -88
  154. package/src/superlocalmemory/ui/js/trust-dashboard.js +168 -25
  155. package/src/superlocalmemory/ui/vendor/bootstrap-icons/bootstrap-icons.css +2018 -0
  156. package/src/superlocalmemory/ui/vendor/bootstrap-icons/fonts/bootstrap-icons.woff +0 -0
  157. package/src/superlocalmemory/ui/vendor/bootstrap-icons/fonts/bootstrap-icons.woff2 +0 -0
  158. package/src/superlocalmemory/ui/vendor/bootstrap.bundle.min.js +7 -0
  159. package/src/superlocalmemory/ui/vendor/bootstrap.min.css +6 -0
  160. package/src/superlocalmemory/ui/vendor/d3.v7.min.js +2 -0
  161. package/src/superlocalmemory/ui/vendor/graphology-library.min.js +2 -0
  162. package/src/superlocalmemory/ui/vendor/graphology.umd.min.js +2 -0
  163. package/src/superlocalmemory/ui/vendor/inter-ui/inter-variable.min.css +8 -0
  164. package/src/superlocalmemory/ui/vendor/inter-ui/variable/InterVariable-Italic.woff2 +0 -0
  165. package/src/superlocalmemory/ui/vendor/inter-ui/variable/InterVariable.woff2 +0 -0
  166. package/src/superlocalmemory/ui/vendor/sigma.min.js +1 -0
  167. package/src/superlocalmemory/ui/js/behavioral.js +0 -447
  168. package/src/superlocalmemory/ui/js/graph-core.js +0 -447
  169. package/src/superlocalmemory/ui/js/graph-interactions.js +0 -351
  170. package/src/superlocalmemory/ui/js/learning.js +0 -435
  171. package/src/superlocalmemory/ui/js/patterns.js +0 -93
  172. package/src/superlocalmemory.egg-info/PKG-INFO +0 -647
  173. package/src/superlocalmemory.egg-info/SOURCES.txt +0 -335
  174. package/src/superlocalmemory.egg-info/dependency_links.txt +0 -1
  175. package/src/superlocalmemory.egg-info/entry_points.txt +0 -2
  176. package/src/superlocalmemory.egg-info/requires.txt +0 -58
  177. package/src/superlocalmemory.egg-info/top_level.txt +0 -1
@@ -0,0 +1,284 @@
1
+ # Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
2
+ # Licensed under AGPL-3.0-or-later - see LICENSE file
3
+ # Part of SuperLocalMemory v3.4.22 — Stage 9 DASH-02
4
+
5
+ """Background queue for producer-side engagement-reward recording.
6
+
7
+ **Why this module exists.** Before v3.4.22, ``EngagementRewardModel.record_recall``
8
+ had zero production callers. Every recall path (CLI, MCP, dashboard, daemon
9
+ HTTP) returned results without ever creating a ``pending_outcomes`` row. The
10
+ closed-loop learning pipeline therefore had no producer — its consumers
11
+ (PostToolUse hook, Stop hook, finalize_outcome, action_outcomes, retrain,
12
+ shadow, rollback) were all consuming an empty stream.
13
+
14
+ This module wires the producer as a non-blocking enqueue + background drain.
15
+ The I1 invariant is absolute — recall wall time must NOT regress. So we:
16
+
17
+ 1. ``enqueue_recall`` is ``queue.put_nowait`` — microseconds.
18
+ 2. A single daemon thread drains the queue and calls
19
+ ``EngagementRewardModel.record_recall`` (one SQLite INSERT per recall).
20
+ 3. Queue is bounded; a full queue drops the oldest entry and bumps a
21
+ counter. Signal quality is NEVER load-bearing on recall correctness.
22
+
23
+ The worker lives for the daemon's lifetime and is stopped on shutdown
24
+ by ``unified_daemon.py``'s lifespan hook.
25
+ """
26
+
27
+ from __future__ import annotations
28
+
29
+ import logging
30
+ import queue
31
+ import threading
32
+ from dataclasses import dataclass, field
33
+ from pathlib import Path
34
+ from typing import Optional
35
+
36
+ logger = logging.getLogger(__name__)
37
+
38
+
39
+ # ---------------------------------------------------------------------------
40
+ # Public dataclass — the enqueue payload
41
+ # ---------------------------------------------------------------------------
42
+
43
+ @dataclass(frozen=True)
44
+ class RecallEvent:
45
+ """One recall event awaiting pending_outcomes persistence.
46
+
47
+ Fields mirror ``EngagementRewardModel.record_recall`` arguments.
48
+ ``session_id`` is REQUIRED for hook-based signal matching; if the
49
+ caller can't produce one, pass a stable synthetic like
50
+ ``f"cli:{os.getpid()}"`` or ``"dashboard:<profile>"``.
51
+ """
52
+ session_id: str
53
+ profile_id: str
54
+ query: str
55
+ fact_ids: tuple[str, ...]
56
+ query_id: str = ""
57
+
58
+
59
+ # ---------------------------------------------------------------------------
60
+ # Module state — bounded queue + counters
61
+ # ---------------------------------------------------------------------------
62
+
63
+ #: S9-DASH-02: cap at 1000 pending recalls. At ~50 ms per row drain, even
64
+ #: a full queue drains in 50 s. Bigger than realistic daemon burst (p99
65
+ #: recalls/sec < 20). Fuller queue → drop oldest, bump counter.
66
+ _MAX_QUEUE = 1000
67
+
68
+ _queue: "queue.Queue[RecallEvent]" = queue.Queue(maxsize=_MAX_QUEUE)
69
+ _worker_thread: Optional[threading.Thread] = None
70
+ _stop_event = threading.Event()
71
+ _counters: dict[str, int] = {
72
+ "recall_enqueued": 0,
73
+ "recall_dropped_queue_full": 0,
74
+ "recall_persisted": 0,
75
+ "recall_persist_failed": 0,
76
+ "recall_reaped": 0,
77
+ }
78
+ _counters_lock = threading.Lock()
79
+
80
+
81
+ def _bump(name: str, n: int = 1) -> None:
82
+ with _counters_lock:
83
+ _counters[name] = _counters.get(name, 0) + n
84
+
85
+
86
+ def get_counters() -> dict[str, int]:
87
+ """Snapshot of queue counters for dashboards / tests."""
88
+ with _counters_lock:
89
+ return dict(_counters)
90
+
91
+
92
+ def queue_size() -> int:
93
+ """Current queue depth (approximate, no lock)."""
94
+ return _queue.qsize()
95
+
96
+
97
+ # ---------------------------------------------------------------------------
98
+ # Public API — called from the recall hot path
99
+ # ---------------------------------------------------------------------------
100
+
101
+ def enqueue_recall(event: RecallEvent) -> None:
102
+ """Non-blocking enqueue for later ``record_recall`` persistence.
103
+
104
+ Hot-path cost: one ``put_nowait`` — ~1 µs. Drops the event if the
105
+ queue is full; never raises. Signal quality is not load-bearing on
106
+ recall correctness (S9-DASH-02 contract).
107
+ """
108
+ if not isinstance(event, RecallEvent):
109
+ return
110
+ if not event.session_id or not event.profile_id:
111
+ # S9-DASH-02: session_id is mandatory — hooks key by it.
112
+ # If the caller can't name a session, we silently drop: this
113
+ # is a recall whose outcome cannot match to a signal anyway.
114
+ return
115
+ try:
116
+ _queue.put_nowait(event)
117
+ _bump("recall_enqueued")
118
+ except queue.Full:
119
+ # Drop oldest-first so newer recalls always make it in.
120
+ try:
121
+ _queue.get_nowait()
122
+ except queue.Empty:
123
+ pass
124
+ try:
125
+ _queue.put_nowait(event)
126
+ _bump("recall_dropped_queue_full")
127
+ except queue.Full: # pragma: no cover — defensive
128
+ _bump("recall_dropped_queue_full")
129
+
130
+
131
+ # ---------------------------------------------------------------------------
132
+ # Worker — persists to pending_outcomes
133
+ # ---------------------------------------------------------------------------
134
+
135
+ def _drain_once(memory_db_path: Path, max_batch: int = 50) -> int:
136
+ """Drain up to ``max_batch`` events, persisting each via
137
+ ``EngagementRewardModel.record_recall``. Returns count persisted.
138
+ """
139
+ from superlocalmemory.learning.reward import EngagementRewardModel
140
+
141
+ model = EngagementRewardModel(memory_db_path)
142
+ persisted = 0
143
+ try:
144
+ for _ in range(max_batch):
145
+ try:
146
+ event = _queue.get_nowait()
147
+ except queue.Empty:
148
+ break
149
+ try:
150
+ model.record_recall(
151
+ profile_id=event.profile_id,
152
+ session_id=event.session_id,
153
+ recall_query_id=event.query_id or "",
154
+ fact_ids=list(event.fact_ids),
155
+ query_text=event.query,
156
+ )
157
+ _bump("recall_persisted")
158
+ persisted += 1
159
+ except Exception as exc:
160
+ _bump("recall_persist_failed")
161
+ logger.debug(
162
+ "outcome_queue: record_recall failed: %s (session=%s)",
163
+ exc, event.session_id,
164
+ )
165
+ finally:
166
+ try:
167
+ model.close()
168
+ except Exception:
169
+ pass
170
+ return persisted
171
+
172
+
173
+ #: S9-DASH-02: reaper cadence — force-finalize pending_outcomes older
174
+ #: than this so CLI/dashboard recalls (no Stop hook) still land in
175
+ #: action_outcomes with a neutral reward. One hour keeps slow-moving
176
+ #: interactive sessions alive while still draining abandoned recalls.
177
+ _REAP_INTERVAL_S = 300.0 # check every 5 minutes
178
+ _REAP_AGE_MS = 3_600_000 # 1 hour
179
+
180
+
181
+ def _reap_stale(memory_db_path: Path) -> int:
182
+ """Force-finalize pending_outcomes older than ``_REAP_AGE_MS``.
183
+
184
+ Uses ``EngagementRewardModel.reap_stale`` which computes the label
185
+ from whatever signals accumulated (``0.5`` if none, which is the
186
+ intended neutral for CLI/dashboard recalls without hook coverage).
187
+ """
188
+ try:
189
+ from superlocalmemory.learning.reward import EngagementRewardModel
190
+ model = EngagementRewardModel(memory_db_path)
191
+ try:
192
+ return int(model.reap_stale(older_than_ms=_REAP_AGE_MS))
193
+ finally:
194
+ try:
195
+ model.close()
196
+ except Exception:
197
+ pass
198
+ except Exception as exc: # pragma: no cover — defensive
199
+ logger.debug("reap_stale failed: %s", exc)
200
+ return 0
201
+
202
+
203
+ def _worker_loop(memory_db_path: Path, interval_s: float) -> None:
204
+ logger.info(
205
+ "outcome_queue worker started (db=%s interval=%.2fs)",
206
+ memory_db_path, interval_s,
207
+ )
208
+ import time as _time
209
+ next_reap = _time.monotonic() + _REAP_INTERVAL_S
210
+ while not _stop_event.wait(interval_s):
211
+ try:
212
+ _drain_once(memory_db_path)
213
+ except Exception as exc: # pragma: no cover — defensive
214
+ logger.warning("outcome_queue drain crashed: %s", exc)
215
+ # Periodic reaper for CLI/dashboard outcomes that no Stop hook
216
+ # will ever finalize. Runs OFF the drain path so a busy queue
217
+ # doesn't starve the reaper.
218
+ now = _time.monotonic()
219
+ if now >= next_reap:
220
+ try:
221
+ reaped = _reap_stale(memory_db_path)
222
+ if reaped:
223
+ logger.info(
224
+ "outcome_queue reaper: finalized %d stale rows",
225
+ reaped,
226
+ )
227
+ _bump("recall_reaped", reaped)
228
+ except Exception: # pragma: no cover
229
+ pass
230
+ next_reap = now + _REAP_INTERVAL_S
231
+ # Final drain on graceful shutdown.
232
+ try:
233
+ _drain_once(memory_db_path, max_batch=1000)
234
+ except Exception:
235
+ pass
236
+ logger.info("outcome_queue worker stopped")
237
+
238
+
239
+ def start_worker(memory_db_path: Path, *, interval_s: float = 0.25) -> None:
240
+ """Start the drain thread (idempotent)."""
241
+ global _worker_thread
242
+ if _worker_thread is not None and _worker_thread.is_alive():
243
+ return
244
+ _stop_event.clear()
245
+ _worker_thread = threading.Thread(
246
+ target=_worker_loop,
247
+ args=(Path(memory_db_path), interval_s),
248
+ name="slm-outcome-queue",
249
+ daemon=True,
250
+ )
251
+ _worker_thread.start()
252
+
253
+
254
+ def stop_worker(*, timeout_s: float = 2.0) -> int:
255
+ """Signal the worker to stop and wait up to ``timeout_s`` for flush."""
256
+ global _worker_thread
257
+ _stop_event.set()
258
+ if _worker_thread is not None and _worker_thread.is_alive():
259
+ _worker_thread.join(timeout=timeout_s)
260
+ remaining = _queue.qsize()
261
+ _worker_thread = None
262
+ return remaining
263
+
264
+
265
+ # ---------------------------------------------------------------------------
266
+ # Test helpers
267
+ # ---------------------------------------------------------------------------
268
+
269
+ def _reset_for_testing() -> None:
270
+ """TEST-ONLY: drain queue and zero counters. Never called in prod."""
271
+ global _worker_thread
272
+ _stop_event.set()
273
+ if _worker_thread is not None and _worker_thread.is_alive():
274
+ _worker_thread.join(timeout=1.0)
275
+ _worker_thread = None
276
+ while True:
277
+ try:
278
+ _queue.get_nowait()
279
+ except queue.Empty:
280
+ break
281
+ with _counters_lock:
282
+ for k in _counters:
283
+ _counters[k] = 0
284
+ _stop_event.clear()
@@ -0,0 +1,415 @@
1
+ # Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
2
+ # Licensed under AGPL-3.0-or-later - see LICENSE file
3
+ # Part of SuperLocalMemory v3.4.22 — F4.A Stage-8 H-01 fix
4
+
5
+ """Behavioural pattern mining for the consolidation worker.
6
+
7
+ Analyses atomic_facts + signals + co-retrieval + entities to produce
8
+ pattern rows consumed by the dashboard, soft-prompt generator, and
9
+ recall ranker. Eight families: tech_preference, interest, temporal,
10
+ entity_preferences, session_activity, fact_type_distribution,
11
+ channel_performance + co_retrieval_clusters, knowledge_structure.
12
+
13
+ Contract refs: LLD-10 §2, Stage 8 H-01 (file split).
14
+ """
15
+
16
+ from __future__ import annotations
17
+
18
+ import json as _json
19
+ import logging
20
+ import re
21
+ import sqlite3
22
+ from collections import Counter
23
+
24
+ logger = logging.getLogger(__name__)
25
+
26
+ __all__ = ("generate_patterns",)
27
+
28
+
29
+ # Keyword + stopword dictionaries live in a sibling constants module
30
+ # so this file stays within the 400-LOC per-file cap.
31
+ from superlocalmemory.learning.pattern_miner_constants import (
32
+ TECH_KEYWORDS as _TECH_KEYWORDS,
33
+ STOPWORDS as _STOPWORDS,
34
+ )
35
+
36
+
37
+ def generate_patterns(
38
+ memory_db: str,
39
+ learning_db: str,
40
+ profile_id: str,
41
+ dry_run: bool,
42
+ ) -> int:
43
+ """Mine behavioural patterns and record them in BehavioralPatternStore.
44
+
45
+ Returns the count of patterns generated. Safe to call repeatedly —
46
+ the store upserts by ``(profile_id, pattern_type, pattern_key)``.
47
+ """
48
+ try:
49
+ from superlocalmemory.learning.behavioral import BehavioralPatternStore
50
+
51
+ conn = sqlite3.connect(memory_db, timeout=10)
52
+ conn.execute("PRAGMA busy_timeout=5000")
53
+ conn.row_factory = sqlite3.Row
54
+
55
+ facts = conn.execute(
56
+ "SELECT fact_id, content, fact_type, created_at, session_id, "
57
+ "confidence, canonical_entities_json "
58
+ "FROM atomic_facts "
59
+ "WHERE profile_id = ? AND lifecycle = 'active' "
60
+ "ORDER BY created_at DESC",
61
+ (profile_id,),
62
+ ).fetchall()
63
+
64
+ if len(facts) < 5:
65
+ conn.close()
66
+ return 0
67
+
68
+ store = BehavioralPatternStore(learning_db)
69
+ generated = 0
70
+
71
+ generated += _mine_tech_preferences(store, facts, profile_id, dry_run)
72
+ generated += _mine_topic_interests(store, facts, profile_id, dry_run)
73
+ generated += _mine_temporal(store, facts, profile_id, dry_run)
74
+ generated += _mine_entity_preferences(
75
+ store, conn, facts, profile_id, dry_run,
76
+ )
77
+ generated += _mine_session_activity(store, facts, profile_id, dry_run)
78
+ generated += _mine_fact_type_distribution(
79
+ store, facts, profile_id, dry_run,
80
+ )
81
+ generated += _mine_channel_and_coretrieval(
82
+ store, learning_db, profile_id, dry_run,
83
+ )
84
+ generated += _mine_knowledge_structure(
85
+ store, conn, profile_id, dry_run,
86
+ )
87
+
88
+ conn.close()
89
+
90
+ logger.info(
91
+ "Pattern mining: %d patterns generated for profile %s "
92
+ "from %d facts",
93
+ generated, profile_id, len(facts),
94
+ )
95
+ return generated
96
+ except Exception as exc:
97
+ logger.warning("Pattern generation error: %s", exc)
98
+ return 0
99
+
100
+
101
+ # ---------------------------------------------------------------------------
102
+ # Family miners — each returns its contribution to the generated count.
103
+ # ---------------------------------------------------------------------------
104
+
105
+
106
+ def _mine_tech_preferences(store, facts, profile_id, dry_run) -> int:
107
+ tech_counts: Counter = Counter()
108
+ for f in facts:
109
+ content = dict(f)["content"].lower()
110
+ for keyword, label in _TECH_KEYWORDS.items():
111
+ if keyword in content:
112
+ tech_counts[label] += 1
113
+
114
+ gen = 0
115
+ for tech, count in tech_counts.most_common(20):
116
+ if count >= 2 and not dry_run:
117
+ confidence = min(1.0, count / max(len(facts) * 0.1, 10))
118
+ store.record_pattern(
119
+ profile_id=profile_id,
120
+ pattern_type="tech_preference",
121
+ data={"topic": tech, "pattern_key": tech,
122
+ "value": tech, "key": "tech",
123
+ "evidence": count},
124
+ success_rate=confidence,
125
+ confidence=confidence,
126
+ )
127
+ gen += 1
128
+ return gen
129
+
130
+
131
+ def _mine_topic_interests(store, facts, profile_id, dry_run) -> int:
132
+ word_counts: Counter = Counter()
133
+ for f in facts:
134
+ words = re.findall(r'\b[a-zA-Z]{4,}\b', dict(f)["content"].lower())
135
+ for w in words:
136
+ if w not in _STOPWORDS:
137
+ word_counts[w] += 1
138
+
139
+ gen = 0
140
+ for topic, count in word_counts.most_common(15):
141
+ if count >= 3 and not dry_run:
142
+ confidence = min(1.0, count / max(len(facts) * 0.05, 15))
143
+ store.record_pattern(
144
+ profile_id=profile_id,
145
+ pattern_type="interest",
146
+ data={"topic": topic, "pattern_key": topic,
147
+ "count": count, "evidence": count},
148
+ success_rate=confidence,
149
+ confidence=confidence,
150
+ )
151
+ gen += 1
152
+ return gen
153
+
154
+
155
+ def _mine_temporal(store, facts, profile_id, dry_run) -> int:
156
+ hour_counts: Counter = Counter()
157
+ for f in facts:
158
+ created = dict(f).get("created_at", "")
159
+ try:
160
+ if "T" in created:
161
+ hour = int(created.split("T")[1][:2])
162
+ elif " " in created:
163
+ hour = int(created.split(" ")[1][:2])
164
+ else:
165
+ continue
166
+ period = (
167
+ "morning" if 6 <= hour < 12 else
168
+ "afternoon" if 12 <= hour < 18 else
169
+ "evening" if 18 <= hour < 22 else "night"
170
+ )
171
+ hour_counts[period] += 1
172
+ except (ValueError, IndexError):
173
+ pass
174
+
175
+ gen = 0
176
+ total_hours = sum(hour_counts.values())
177
+ for period, count in hour_counts.most_common():
178
+ if count >= 2 and total_hours > 0 and not dry_run:
179
+ pct = round(count / total_hours * 100)
180
+ store.record_pattern(
181
+ profile_id=profile_id,
182
+ pattern_type="temporal",
183
+ data={"topic": period, "pattern_key": period,
184
+ "value": f"{period} ({pct}%)",
185
+ "evidence": count, "key": period,
186
+ "distribution": dict(hour_counts)},
187
+ success_rate=pct / 100,
188
+ confidence=min(1.0, count / max(total_hours * 0.1, 5)),
189
+ )
190
+ gen += 1
191
+ return gen
192
+
193
+
194
+ def _mine_entity_preferences(
195
+ store, conn: sqlite3.Connection, facts, profile_id, dry_run,
196
+ ) -> int:
197
+ entity_counts: Counter = Counter()
198
+ for f in facts:
199
+ raw = dict(f).get("canonical_entities_json", "")
200
+ if raw:
201
+ try:
202
+ for ent in _json.loads(raw):
203
+ entity_counts[ent] += 1
204
+ except (ValueError, TypeError):
205
+ pass
206
+
207
+ # v3.4.7: Resolve entity IDs to readable canonical names
208
+ entity_names: dict = {}
209
+ try:
210
+ eid_list = list(entity_counts.keys())
211
+ if eid_list:
212
+ placeholders = ",".join("?" * len(eid_list))
213
+ name_rows = conn.execute(
214
+ f"SELECT entity_id, canonical_name FROM canonical_entities "
215
+ f"WHERE entity_id IN ({placeholders})",
216
+ eid_list,
217
+ ).fetchall()
218
+ entity_names = {
219
+ dict(r)["entity_id"]: dict(r)["canonical_name"]
220
+ for r in name_rows
221
+ }
222
+ except Exception:
223
+ pass
224
+
225
+ gen = 0
226
+ # S9-DASH-01: skip entity IDs that don't resolve to a canonical name.
227
+ # Previously the fallback ``entity_names.get(entity, entity)`` leaked
228
+ # raw hex entity_ids (e.g. ``ea701bf01f1ff4df8``) into the dashboard
229
+ # as "preferences". We now (1) require a resolved readable name and
230
+ # (2) defensively drop pure-hex strings that somehow slip through
231
+ # (16-17 hex chars — the canonical_entities primary-key shape).
232
+ _HEX_ID_RE = re.compile(r"^[0-9a-f]{15,20}$")
233
+ for entity, count in entity_counts.most_common(15):
234
+ if count < 3 or dry_run:
235
+ continue
236
+ readable = entity_names.get(entity)
237
+ if not readable:
238
+ logger.debug(
239
+ "_mine_entity_preferences: skipping orphan entity_id=%r "
240
+ "(count=%d) — no row in canonical_entities",
241
+ entity, count,
242
+ )
243
+ continue
244
+ if _HEX_ID_RE.match(readable):
245
+ logger.debug(
246
+ "_mine_entity_preferences: skipping hex-shaped name=%r "
247
+ "— likely an entity_id stored as name", readable,
248
+ )
249
+ continue
250
+ confidence = min(1.0, count / max(len(facts) * 0.05, 10))
251
+ store.record_pattern(
252
+ profile_id=profile_id,
253
+ pattern_type="entity_preferences",
254
+ data={"topic": readable,
255
+ "pattern_key": f"entity:{readable}",
256
+ "value": readable, "evidence": count,
257
+ "source": "entity_frequency"},
258
+ success_rate=confidence,
259
+ confidence=confidence,
260
+ )
261
+ gen += 1
262
+ return gen
263
+
264
+
265
+ def _mine_session_activity(store, facts, profile_id, dry_run) -> int:
266
+ session_counts: Counter = Counter()
267
+ for f in facts:
268
+ sid = dict(f).get("session_id", "")
269
+ if sid:
270
+ session_counts[sid] += 1
271
+
272
+ if not session_counts:
273
+ return 0
274
+
275
+ avg_facts_per_session = sum(session_counts.values()) / len(session_counts)
276
+ heavy_sessions = [
277
+ s for s, c in session_counts.items()
278
+ if c > avg_facts_per_session * 2
279
+ ]
280
+ if heavy_sessions and not dry_run:
281
+ store.record_pattern(
282
+ profile_id=profile_id,
283
+ pattern_type="session_activity",
284
+ data={"pattern_key": "heavy_session_usage",
285
+ "value": f"{len(heavy_sessions)} intensive sessions",
286
+ "evidence": len(heavy_sessions),
287
+ "avg_facts": round(avg_facts_per_session, 1),
288
+ "total_sessions": len(session_counts)},
289
+ success_rate=0.8,
290
+ confidence=min(1.0, len(heavy_sessions) / 5),
291
+ )
292
+ return 1
293
+ return 0
294
+
295
+
296
+ def _mine_fact_type_distribution(store, facts, profile_id, dry_run) -> int:
297
+ type_counts: Counter = Counter()
298
+ for f in facts:
299
+ ft = dict(f).get("fact_type", "semantic")
300
+ type_counts[ft] += 1
301
+
302
+ total_ft = sum(type_counts.values())
303
+ if total_ft > 0 and not dry_run:
304
+ dominant_type = type_counts.most_common(1)[0]
305
+ pct = round(dominant_type[1] / total_ft * 100)
306
+ store.record_pattern(
307
+ profile_id=profile_id,
308
+ pattern_type="fact_type_distribution",
309
+ data={"pattern_key": "memory_style",
310
+ "value": f"{dominant_type[0]} dominant ({pct}%)",
311
+ "evidence": dominant_type[1],
312
+ "distribution": dict(type_counts)},
313
+ success_rate=pct / 100,
314
+ confidence=min(1.0, dominant_type[1] / 20),
315
+ )
316
+ return 1
317
+ return 0
318
+
319
+
320
+ def _mine_channel_and_coretrieval(
321
+ store, learning_db: str, profile_id: str, dry_run: bool,
322
+ ) -> int:
323
+ gen = 0
324
+ try:
325
+ learn_conn = sqlite3.connect(learning_db, timeout=10)
326
+ learn_conn.row_factory = sqlite3.Row
327
+
328
+ channel_rows = learn_conn.execute(
329
+ "SELECT channel, COUNT(*) AS cnt, "
330
+ "AVG(signal_value) AS avg_signal "
331
+ "FROM learning_feedback "
332
+ "WHERE profile_id = ? "
333
+ "GROUP BY channel ORDER BY cnt DESC",
334
+ (profile_id,),
335
+ ).fetchall()
336
+
337
+ for row in channel_rows:
338
+ d = dict(row)
339
+ ch = d.get("channel", "unknown")
340
+ cnt = d.get("cnt", 0)
341
+ avg_sig = round(float(d.get("avg_signal", 0) or 0), 3)
342
+ if cnt >= 5 and not dry_run:
343
+ store.record_pattern(
344
+ profile_id=profile_id,
345
+ pattern_type="channel_performance",
346
+ data={"pattern_key": f"channel:{ch}",
347
+ "value": f"{ch} ({cnt} hits, {avg_sig} avg)",
348
+ "evidence": cnt,
349
+ "avg_signal": avg_sig},
350
+ success_rate=avg_sig,
351
+ confidence=min(1.0, cnt / 50),
352
+ )
353
+ gen += 1
354
+
355
+ try:
356
+ coret_rows = learn_conn.execute(
357
+ "SELECT fact_a, fact_b, co_access_count "
358
+ "FROM co_retrieval_edges "
359
+ "WHERE profile_id = ? AND co_access_count >= 3 "
360
+ "ORDER BY co_access_count DESC LIMIT 20",
361
+ (profile_id,),
362
+ ).fetchall()
363
+ if coret_rows and not dry_run:
364
+ top_pair = (
365
+ dict(coret_rows[0]).get("co_access_count", 0)
366
+ if coret_rows else 0
367
+ )
368
+ store.record_pattern(
369
+ profile_id=profile_id,
370
+ pattern_type="co_retrieval_clusters",
371
+ data={"pattern_key": "co_retrieval_clusters",
372
+ "value": f"{len(coret_rows)} strong fact pairs",
373
+ "evidence": len(coret_rows),
374
+ "top_pair_count": top_pair},
375
+ success_rate=0.7,
376
+ confidence=min(1.0, len(coret_rows) / 10),
377
+ )
378
+ gen += 1
379
+ except Exception:
380
+ pass
381
+
382
+ learn_conn.close()
383
+ except Exception as exc:
384
+ logger.debug("Signal pattern mining failed: %s", exc)
385
+ return gen
386
+
387
+
388
+ def _mine_knowledge_structure(
389
+ store, conn: sqlite3.Connection, profile_id: str, dry_run: bool,
390
+ ) -> int:
391
+ try:
392
+ comm_rows = conn.execute(
393
+ "SELECT community_id, COUNT(*) AS cnt "
394
+ "FROM fact_importance "
395
+ "WHERE profile_id = ? AND community_id IS NOT NULL "
396
+ "GROUP BY community_id ORDER BY cnt DESC",
397
+ (profile_id,),
398
+ ).fetchall()
399
+ if comm_rows and not dry_run:
400
+ total_comm = sum(dict(r)["cnt"] for r in comm_rows)
401
+ store.record_pattern(
402
+ profile_id=profile_id,
403
+ pattern_type="knowledge_structure",
404
+ data={"pattern_key": "knowledge_structure",
405
+ "value": f"{len(comm_rows)} topic communities, "
406
+ f"{total_comm} classified facts",
407
+ "evidence": total_comm,
408
+ "community_count": len(comm_rows)},
409
+ success_rate=0.8,
410
+ confidence=min(1.0, len(comm_rows) / 5),
411
+ )
412
+ return 1
413
+ except Exception:
414
+ pass
415
+ return 0