superlocalmemory 3.4.18 → 3.4.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (172) hide show
  1. package/CHANGELOG.md +35 -0
  2. package/README.md +42 -34
  3. package/bin/slm +11 -0
  4. package/bin/slm.bat +12 -0
  5. package/package.json +4 -3
  6. package/pyproject.toml +3 -2
  7. package/scripts/build-slm-hook.ps1 +40 -0
  8. package/scripts/build-slm-hook.sh +45 -0
  9. package/scripts/build_entry.py +452 -0
  10. package/scripts/ci/stage5b_gate.sh +50 -0
  11. package/scripts/postinstall/validation.js +187 -0
  12. package/scripts/postinstall-interactive.js +756 -0
  13. package/scripts/postinstall_binary.js +287 -0
  14. package/scripts/release_manifest.py +273 -0
  15. package/scripts/slm-hook.spec +56 -0
  16. package/skills/slm-build-graph/SKILL.md +423 -0
  17. package/skills/slm-list-recent/SKILL.md +348 -0
  18. package/skills/slm-recall/SKILL.md +343 -0
  19. package/skills/slm-remember/SKILL.md +194 -0
  20. package/skills/slm-show-patterns/SKILL.md +224 -0
  21. package/skills/slm-status/SKILL.md +363 -0
  22. package/skills/slm-switch-profile/SKILL.md +442 -0
  23. package/src/superlocalmemory/cli/commands.py +219 -79
  24. package/src/superlocalmemory/cli/context_commands.py +192 -0
  25. package/src/superlocalmemory/cli/daemon.py +15 -1
  26. package/src/superlocalmemory/cli/db_migrate.py +80 -0
  27. package/src/superlocalmemory/cli/escape_hatch.py +220 -0
  28. package/src/superlocalmemory/cli/main.py +72 -1
  29. package/src/superlocalmemory/core/context_cache.py +397 -0
  30. package/src/superlocalmemory/core/embeddings.py +8 -2
  31. package/src/superlocalmemory/core/engine.py +38 -2
  32. package/src/superlocalmemory/core/engine_wiring.py +1 -1
  33. package/src/superlocalmemory/core/ram_lock.py +111 -0
  34. package/src/superlocalmemory/core/recall_pipeline.py +433 -3
  35. package/src/superlocalmemory/core/recall_worker.py +8 -3
  36. package/src/superlocalmemory/core/security_primitives.py +635 -0
  37. package/src/superlocalmemory/core/shadow_router.py +319 -0
  38. package/src/superlocalmemory/core/slm_disabled.py +87 -0
  39. package/src/superlocalmemory/core/slmignore.py +125 -0
  40. package/src/superlocalmemory/core/topic_signature.py +143 -0
  41. package/src/superlocalmemory/core/worker_pool.py +14 -3
  42. package/src/superlocalmemory/encoding/cognitive_consolidator.py +2 -2
  43. package/src/superlocalmemory/evolution/budget.py +321 -0
  44. package/src/superlocalmemory/evolution/llm_dispatch.py +508 -0
  45. package/src/superlocalmemory/evolution/skill_evolver.py +144 -94
  46. package/src/superlocalmemory/hooks/_outcome_common.py +506 -0
  47. package/src/superlocalmemory/hooks/adapter_base.py +317 -0
  48. package/src/superlocalmemory/hooks/antigravity_adapter.py +192 -0
  49. package/src/superlocalmemory/hooks/claude_code_hooks.py +33 -1
  50. package/src/superlocalmemory/hooks/context_payload.py +312 -0
  51. package/src/superlocalmemory/hooks/copilot_adapter.py +154 -0
  52. package/src/superlocalmemory/hooks/cross_platform_connector.py +90 -0
  53. package/src/superlocalmemory/hooks/cursor_adapter.py +195 -0
  54. package/src/superlocalmemory/hooks/hook_handlers.py +109 -8
  55. package/src/superlocalmemory/hooks/ide_connector.py +25 -2
  56. package/src/superlocalmemory/hooks/post_tool_async_hook.py +165 -0
  57. package/src/superlocalmemory/hooks/post_tool_outcome_hook.py +223 -0
  58. package/src/superlocalmemory/hooks/prewarm_auth.py +170 -0
  59. package/src/superlocalmemory/hooks/session_registry.py +186 -0
  60. package/src/superlocalmemory/hooks/stop_outcome_hook.py +134 -0
  61. package/src/superlocalmemory/hooks/sync_loop.py +114 -0
  62. package/src/superlocalmemory/hooks/user_prompt_hook.py +128 -0
  63. package/src/superlocalmemory/hooks/user_prompt_rehash_hook.py +202 -0
  64. package/src/superlocalmemory/infra/backup.py +3 -3
  65. package/src/superlocalmemory/infra/cloud_backup.py +2 -2
  66. package/src/superlocalmemory/infra/event_bus.py +2 -2
  67. package/src/superlocalmemory/infra/webhook_dispatcher.py +3 -3
  68. package/src/superlocalmemory/learning/arm_catalog.py +99 -0
  69. package/src/superlocalmemory/learning/bandit.py +526 -0
  70. package/src/superlocalmemory/learning/bandit_cache.py +133 -0
  71. package/src/superlocalmemory/learning/behavioral.py +53 -1
  72. package/src/superlocalmemory/learning/consolidation_cycle.py +381 -0
  73. package/src/superlocalmemory/learning/consolidation_worker.py +188 -520
  74. package/src/superlocalmemory/learning/database.py +256 -0
  75. package/src/superlocalmemory/learning/dedup_hnsw.py +413 -0
  76. package/src/superlocalmemory/learning/ensemble.py +300 -0
  77. package/src/superlocalmemory/learning/fact_outcome_joins.py +207 -0
  78. package/src/superlocalmemory/learning/forgetting_scheduler.py +55 -0
  79. package/src/superlocalmemory/learning/hnsw_dedup.py +69 -0
  80. package/src/superlocalmemory/learning/labeler.py +87 -0
  81. package/src/superlocalmemory/learning/legacy_migration.py +277 -0
  82. package/src/superlocalmemory/learning/memory_merge.py +160 -0
  83. package/src/superlocalmemory/learning/model_cache.py +269 -0
  84. package/src/superlocalmemory/learning/model_rollback.py +278 -0
  85. package/src/superlocalmemory/learning/outcome_queue.py +284 -0
  86. package/src/superlocalmemory/learning/pattern_miner.py +415 -0
  87. package/src/superlocalmemory/learning/pattern_miner_constants.py +47 -0
  88. package/src/superlocalmemory/learning/ranker.py +225 -81
  89. package/src/superlocalmemory/learning/ranker_common.py +163 -0
  90. package/src/superlocalmemory/learning/ranker_retrain_legacy.py +202 -0
  91. package/src/superlocalmemory/learning/ranker_retrain_online.py +411 -0
  92. package/src/superlocalmemory/learning/reward.py +777 -0
  93. package/src/superlocalmemory/learning/reward_archive.py +210 -0
  94. package/src/superlocalmemory/learning/reward_boost.py +201 -0
  95. package/src/superlocalmemory/learning/reward_proxy.py +326 -0
  96. package/src/superlocalmemory/learning/shadow_test.py +524 -0
  97. package/src/superlocalmemory/learning/signal_worker.py +270 -0
  98. package/src/superlocalmemory/learning/signals.py +314 -0
  99. package/src/superlocalmemory/learning/trigram_index.py +547 -0
  100. package/src/superlocalmemory/mcp/server.py +5 -5
  101. package/src/superlocalmemory/mcp/tools_context.py +183 -0
  102. package/src/superlocalmemory/mcp/tools_core.py +92 -27
  103. package/src/superlocalmemory/parameterization/soft_prompt_generator.py +13 -0
  104. package/src/superlocalmemory/retrieval/engine.py +52 -0
  105. package/src/superlocalmemory/retrieval/reranker.py +4 -2
  106. package/src/superlocalmemory/server/api.py +2 -2
  107. package/src/superlocalmemory/server/bandit_loops.py +140 -0
  108. package/src/superlocalmemory/server/middleware/__init__.py +11 -0
  109. package/src/superlocalmemory/server/middleware/security_headers.py +144 -0
  110. package/src/superlocalmemory/server/routes/backup.py +36 -13
  111. package/src/superlocalmemory/server/routes/behavioral.py +50 -19
  112. package/src/superlocalmemory/server/routes/brain.py +1234 -0
  113. package/src/superlocalmemory/server/routes/data_io.py +4 -4
  114. package/src/superlocalmemory/server/routes/events.py +2 -2
  115. package/src/superlocalmemory/server/routes/helpers.py +1 -1
  116. package/src/superlocalmemory/server/routes/learning.py +192 -7
  117. package/src/superlocalmemory/server/routes/memories.py +189 -1
  118. package/src/superlocalmemory/server/routes/prewarm.py +171 -0
  119. package/src/superlocalmemory/server/routes/profiles.py +3 -3
  120. package/src/superlocalmemory/server/routes/token.py +88 -0
  121. package/src/superlocalmemory/server/routes/ws.py +5 -5
  122. package/src/superlocalmemory/server/security_middleware.py +13 -7
  123. package/src/superlocalmemory/server/ui.py +2 -2
  124. package/src/superlocalmemory/server/unified_daemon.py +335 -3
  125. package/src/superlocalmemory/storage/migration_runner.py +545 -0
  126. package/src/superlocalmemory/storage/migrations/M001_add_signal_features_columns.py +67 -0
  127. package/src/superlocalmemory/storage/migrations/M002_model_state_history.py +132 -0
  128. package/src/superlocalmemory/storage/migrations/M003_migration_log.py +38 -0
  129. package/src/superlocalmemory/storage/migrations/M004_cross_platform_sync_log.py +46 -0
  130. package/src/superlocalmemory/storage/migrations/M005_bandit_tables.py +75 -0
  131. package/src/superlocalmemory/storage/migrations/M006_action_outcomes_reward.py +75 -0
  132. package/src/superlocalmemory/storage/migrations/M007_pending_outcomes.py +63 -0
  133. package/src/superlocalmemory/storage/migrations/M009_model_lineage.py +54 -0
  134. package/src/superlocalmemory/storage/migrations/M010_evolution_config.py +75 -0
  135. package/src/superlocalmemory/storage/migrations/M011_archive_and_merge.py +87 -0
  136. package/src/superlocalmemory/storage/migrations/M012_shadow_observations.py +72 -0
  137. package/src/superlocalmemory/storage/migrations/M013_bi_temporal_columns.py +55 -0
  138. package/src/superlocalmemory/storage/migrations/__init__.py +81 -0
  139. package/src/superlocalmemory/storage/models.py +4 -0
  140. package/src/superlocalmemory/ui/css/brain.css +409 -0
  141. package/src/superlocalmemory/ui/css/legacy-dashboard.css +645 -0
  142. package/src/superlocalmemory/ui/index.html +459 -1345
  143. package/src/superlocalmemory/ui/js/brain.js +1321 -0
  144. package/src/superlocalmemory/ui/js/clusters.js +123 -4
  145. package/src/superlocalmemory/ui/js/init.js +48 -39
  146. package/src/superlocalmemory/ui/js/memories.js +88 -2
  147. package/src/superlocalmemory/ui/js/modal.js +71 -1
  148. package/src/superlocalmemory/ui/js/ng-shell.js +101 -88
  149. package/src/superlocalmemory/ui/js/trust-dashboard.js +168 -25
  150. package/src/superlocalmemory/ui/vendor/bootstrap-icons/bootstrap-icons.css +2018 -0
  151. package/src/superlocalmemory/ui/vendor/bootstrap-icons/fonts/bootstrap-icons.woff +0 -0
  152. package/src/superlocalmemory/ui/vendor/bootstrap-icons/fonts/bootstrap-icons.woff2 +0 -0
  153. package/src/superlocalmemory/ui/vendor/bootstrap.bundle.min.js +7 -0
  154. package/src/superlocalmemory/ui/vendor/bootstrap.min.css +6 -0
  155. package/src/superlocalmemory/ui/vendor/d3.v7.min.js +2 -0
  156. package/src/superlocalmemory/ui/vendor/graphology-library.min.js +2 -0
  157. package/src/superlocalmemory/ui/vendor/graphology.umd.min.js +2 -0
  158. package/src/superlocalmemory/ui/vendor/inter-ui/inter-variable.min.css +8 -0
  159. package/src/superlocalmemory/ui/vendor/inter-ui/variable/InterVariable-Italic.woff2 +0 -0
  160. package/src/superlocalmemory/ui/vendor/inter-ui/variable/InterVariable.woff2 +0 -0
  161. package/src/superlocalmemory/ui/vendor/sigma.min.js +1 -0
  162. package/src/superlocalmemory/ui/js/behavioral.js +0 -447
  163. package/src/superlocalmemory/ui/js/graph-core.js +0 -447
  164. package/src/superlocalmemory/ui/js/graph-interactions.js +0 -351
  165. package/src/superlocalmemory/ui/js/learning.js +0 -435
  166. package/src/superlocalmemory/ui/js/patterns.js +0 -93
  167. package/src/superlocalmemory.egg-info/PKG-INFO +0 -647
  168. package/src/superlocalmemory.egg-info/SOURCES.txt +0 -335
  169. package/src/superlocalmemory.egg-info/dependency_links.txt +0 -1
  170. package/src/superlocalmemory.egg-info/entry_points.txt +0 -2
  171. package/src/superlocalmemory.egg-info/requires.txt +0 -58
  172. package/src/superlocalmemory.egg-info/top_level.txt +0 -1
@@ -0,0 +1,547 @@
1
+ # Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
2
+ # Licensed under AGPL-3.0-or-later - see LICENSE file
3
+ # Part of SuperLocalMemory v3.4.21 — LLD-13 Track C.1
4
+
5
+ """Inline trigram entity detection — hot-path Layer A of the two-layer
6
+ entity detector defined in ``LLD-13-inline-entity-detection.md``.
7
+
8
+ Design contract (do NOT improvise):
9
+
10
+ - Hot-path ``lookup(text)`` targets **p99 < 2 ms**. Implemented as a
11
+ single parameterised SQLite ``SELECT`` over a pre-built ``entity_trigrams``
12
+ table in ``active_brain_cache.db`` plus a per-session ``@lru_cache``
13
+ (≤200 entries, ≤100 KB total).
14
+ - ``bootstrap()`` builds (or rebuilds) the cache table from
15
+ ``canonical_entities`` + ``entity_aliases`` in ``memory.db``. It
16
+ runs under ``core.ram_lock.ram_reservation('trigram_rebuild',
17
+ required_mb=300)`` per LLD-00 §7.
18
+ - ``memory.db`` is **SACRED** — this module only READS from
19
+ ``canonical_entities`` / ``entity_aliases``. Never writes.
20
+ - ``cache.db`` is **NOT a migration target** (LLD-00 §6). The index
21
+ table is (re)created via ``CREATE TABLE IF NOT EXISTS`` inside
22
+ ``bootstrap()``. ``slm cache clear`` and first-run both hit this
23
+ lazy path.
24
+ - Every SQL call uses parameterised queries (SEC-C-03). The IN-clause
25
+ placeholder count is bounded (≤256 trigrams).
26
+ - SQLite connections open with ``busy_timeout=50`` so a locked DB
27
+ fails fast rather than eating the hook budget.
28
+
29
+ Stdlib-only imports at module load. The singleton helper
30
+ ``get_or_none()`` returns a shared ``TrigramIndex`` instance or ``None``
31
+ if the cache DB is absent; the hook uses this to fall back silently.
32
+ """
33
+
34
+ from __future__ import annotations
35
+
36
+ import os
37
+ import sqlite3
38
+ import threading
39
+ import unicodedata
40
+ from functools import lru_cache
41
+ from pathlib import Path
42
+ from typing import Optional
43
+
44
+ # Import the RAM semaphore at module scope so tests can monkeypatch
45
+ # ``trigram_index.ram_reservation`` to a no-op on CI boxes with tight RAM.
46
+ from superlocalmemory.core.ram_lock import ram_reservation
47
+
48
+
49
+ # --------------------------------------------------------------------------
50
+ # Module constants
51
+ # --------------------------------------------------------------------------
52
+
53
+ _ACTIVE_PROFILE: str = "default"
54
+ _BUSY_TIMEOUT_MS: int = 50
55
+ _MAX_IN_CLAUSE: int = 256
56
+ _MAX_INPUT_CHARS: int = 500
57
+
58
+
59
+ # H-12/H-P-06: module-level cached connection for the inline lookup
60
+ # path. The first cache miss on a fresh session previously paid the
61
+ # ``sqlite3.connect`` cost (~1–3 ms warm, blowing the <2 ms p99
62
+ # budget). With a shared conn (guarded by ``_CACHE_CONN_LOCK``), every
63
+ # lookup pays only the query cost. ``_reset_cache_conn()`` exists so
64
+ # tests + ``bootstrap()`` can drop a stale conn after the cache DB is
65
+ # rebuilt.
66
+ _CACHE_CONN: Optional[sqlite3.Connection] = None
67
+ _CACHE_CONN_LOCK = threading.Lock()
68
+ _CACHE_CONN_OWNER_PID: int | None = None
69
+
70
+
71
+ def _reset_cache_conn_for_child() -> None:
72
+ """S9-W2 C5 fork safety: wipe the inherited handle in the child.
73
+
74
+ Running ``close()`` on a handle the parent still uses would be a
75
+ race; we simply orphan the reference and let the parent keep its
76
+ open fd. The child opens a fresh conn on first ``_get_cache_conn``.
77
+ """
78
+ global _CACHE_CONN, _CACHE_CONN_OWNER_PID
79
+ _CACHE_CONN = None
80
+ _CACHE_CONN_OWNER_PID = os.getpid()
81
+
82
+
83
+ def _get_cache_conn() -> Optional[sqlite3.Connection]:
84
+ """Return a process-cached connection to the trigram cache DB.
85
+
86
+ Returns ``None`` if the cache DB is missing or the connect fails.
87
+ Caller holds no lock — every ``execute`` is serialised via
88
+ ``_CACHE_CONN_LOCK``.
89
+ """
90
+ global _CACHE_CONN, _CACHE_CONN_OWNER_PID
91
+ current_pid = os.getpid()
92
+ # S9-W2 C5: pid drift belt-and-suspenders. If a fork path somehow
93
+ # skipped ``register_at_fork``, we still refuse to hand out an
94
+ # inherited handle.
95
+ if _CACHE_CONN is not None and (
96
+ _CACHE_CONN_OWNER_PID is not None
97
+ and _CACHE_CONN_OWNER_PID != current_pid
98
+ ):
99
+ _CACHE_CONN = None
100
+ if _CACHE_CONN is not None:
101
+ return _CACHE_CONN
102
+ with _CACHE_CONN_LOCK:
103
+ if _CACHE_CONN is not None:
104
+ return _CACHE_CONN
105
+ if not TrigramIndex.CACHE_DB_PATH.exists():
106
+ return None
107
+ try:
108
+ conn = sqlite3.connect(
109
+ str(TrigramIndex.CACHE_DB_PATH),
110
+ timeout=0.05,
111
+ isolation_level=None,
112
+ check_same_thread=False,
113
+ )
114
+ conn.execute(f"PRAGMA busy_timeout = {_BUSY_TIMEOUT_MS}")
115
+ except sqlite3.OperationalError:
116
+ return None
117
+ _CACHE_CONN = conn
118
+ _CACHE_CONN_OWNER_PID = current_pid
119
+ return _CACHE_CONN
120
+
121
+
122
+ def _reset_cache_conn() -> None:
123
+ """Drop the cached connection. Called after ``bootstrap()`` swaps
124
+ the cache table so subsequent lookups re-connect to the fresh DB.
125
+ """
126
+ global _CACHE_CONN, _CACHE_CONN_OWNER_PID
127
+ with _CACHE_CONN_LOCK:
128
+ if _CACHE_CONN is not None:
129
+ try:
130
+ _CACHE_CONN.close()
131
+ except sqlite3.Error: # pragma: no cover — defensive
132
+ pass
133
+ _CACHE_CONN = None
134
+ _CACHE_CONN_OWNER_PID = None
135
+
136
+
137
+ if hasattr(os, "register_at_fork"):
138
+ os.register_at_fork(after_in_child=_reset_cache_conn_for_child)
139
+
140
+
141
+ # --------------------------------------------------------------------------
142
+ # Trigram extraction (stdlib-only, deterministic, NFKD + ASCII-fold)
143
+ # --------------------------------------------------------------------------
144
+
145
+
146
+ #: L-P-02: cheap proxy for trigram "commonness". A trigram composed of
147
+ #: three ASCII lowercase letters is the most frequent class; any trigram
148
+ #: with a digit or with an uncommon starting letter is rarer and thus
149
+ #: more discriminative for entity lookup. Lower key == earlier sort ==
150
+ #: preferred to keep.
151
+ _COMMON_STARTS: frozenset[str] = frozenset("etaoinshrdlucmfgpwby")
152
+
153
+
154
+ def _trigram_rarity_key(t: str) -> int:
155
+ """Return a small-int rarity rank; LOW == rare/kept, HIGH == common."""
156
+ if not t:
157
+ return 3
158
+ has_digit = any(c.isdigit() for c in t)
159
+ starts_common = t[0] in _COMMON_STARTS
160
+ # 0: has a digit (very discriminative, e.g. "sl3", "1st").
161
+ # 1: starts with an uncommon letter.
162
+ # 2: all-letter common trigram (default big bucket).
163
+ if has_digit:
164
+ return 0
165
+ if not starts_common:
166
+ return 1
167
+ return 2
168
+
169
+
170
+ def _trigrams_for(text: str) -> set[str]:
171
+ """Extract 3-gram set from ``text``.
172
+
173
+ Pipeline: clamp-to-500-chars -> NFKD normalize -> ASCII-fold ->
174
+ lowercase -> split on non-alphanumeric -> skip tokens < 3 chars ->
175
+ emit overlapping 3-grams per token.
176
+
177
+ Matches LLD-13 §4.1 exactly. stdlib-only.
178
+ """
179
+ if not text:
180
+ return set()
181
+ s = unicodedata.normalize("NFKD", text[:_MAX_INPUT_CHARS])
182
+ s = s.encode("ascii", "ignore").decode("ascii").lower()
183
+ s = "".join(c if c.isalnum() else " " for c in s)
184
+ out: set[str] = set()
185
+ for token in s.split():
186
+ if len(token) < 3:
187
+ continue
188
+ for i in range(len(token) - 2):
189
+ out.add(token[i : i + 3])
190
+ return out
191
+
192
+
193
+ # --------------------------------------------------------------------------
194
+ # TrigramIndex
195
+ # --------------------------------------------------------------------------
196
+
197
+
198
+ class TrigramIndex:
199
+ """Two-layer entity detection — Layer A (hot inline).
200
+
201
+ Bootstrap reads ``canonical_entities`` + ``entity_aliases`` from
202
+ the SLM source-of-truth DB and writes a compact
203
+ ``(trigram, entity_id, weight)`` table into the cache DB. The hot
204
+ path does one grouped SELECT per prompt and returns up to 10 ranked
205
+ ``(entity_id, hits)`` candidates.
206
+ """
207
+
208
+ CACHE_DB_PATH: Path = Path.home() / ".superlocalmemory" / "active_brain_cache.db"
209
+ MAX_TRIGRAMS: int = 1_000_000
210
+ LOOKUP_LIMIT: int = 10
211
+ LOOKUP_MIN_HITS: int = 2
212
+
213
+ # ----------------------------------------------------------------------
214
+ # Construction
215
+ # ----------------------------------------------------------------------
216
+
217
+ def __init__(self, source_db_path: Path) -> None:
218
+ if not isinstance(source_db_path, Path):
219
+ raise ValueError("source_db_path must be a pathlib.Path")
220
+ self._source_db_path = source_db_path
221
+ # Per-instance LRU wrapper (200 entries, ≤100 KB envelope).
222
+ self._cached_lookup_key = lru_cache(maxsize=200)(self._lookup_raw)
223
+
224
+ # ----------------------------------------------------------------------
225
+ # bootstrap() — daemon-side rebuild
226
+ # ----------------------------------------------------------------------
227
+
228
+ #: L-P-04: reservation default mirrors LLD-00 §7 (300 MB sized for
229
+ #: ~10k entities × ~5 aliases × ~15 trigrams). On small installs this
230
+ #: over-reserves on tight-RAM laptops; on 500k-entity power users it
231
+ #: under-protects. The env override lets operators right-size per host
232
+ #: without a code change — the fallback stays safe.
233
+ BOOTSTRAP_RAM_MB_DEFAULT: int = 300
234
+ BOOTSTRAP_RAM_MB_ENV: str = "SLM_TRIGRAM_BOOTSTRAP_RAM_MB"
235
+
236
+ @classmethod
237
+ def _bootstrap_ram_mb(cls) -> int:
238
+ raw = os.environ.get(cls.BOOTSTRAP_RAM_MB_ENV, "").strip()
239
+ if not raw:
240
+ return cls.BOOTSTRAP_RAM_MB_DEFAULT
241
+ try:
242
+ val = int(raw)
243
+ except ValueError:
244
+ return cls.BOOTSTRAP_RAM_MB_DEFAULT
245
+ if val < 16:
246
+ # Refuse to under-reserve — the minimum keeps semaphore math
247
+ # meaningful even on tiny CI boxes.
248
+ return 16
249
+ return val
250
+
251
+ def bootstrap(self) -> None:
252
+ """Read canonical_entities + entity_aliases, recompute trigram
253
+ buckets, atomically swap the cache table.
254
+
255
+ Wraps the heavy phase in ``ram_reservation('trigram_rebuild',
256
+ required_mb=<default 300, overridable via
257
+ ``SLM_TRIGRAM_BOOTSTRAP_RAM_MB``>)``. Source DB is opened
258
+ read-only; memory.db is never mutated.
259
+ """
260
+ with ram_reservation(
261
+ "trigram_rebuild",
262
+ required_mb=self._bootstrap_ram_mb(),
263
+ ):
264
+ self._rebuild_index()
265
+
266
+ # SEC-M5 — safety cap on rebuild input row count. An adversarial or
267
+ # bloated memory.db with millions of canonical_entities could exceed
268
+ # the 300 MB ``ram_reservation`` block after fast-fail passed, since
269
+ # ``fetchall()`` materialises the entire JOIN into Python memory.
270
+ # ``MAX_TRIGRAMS=1_000_000`` downstream already bounds the final
271
+ # index; capping the source fetch at 5M rows keeps peak RAM within
272
+ # I2 even on pathological inputs.
273
+ _MAX_REBUILD_ROWS: int = 5_000_000
274
+
275
+ def _rebuild_index(self) -> None:
276
+ buckets: dict[str, dict[str, float]] = {}
277
+ src = sqlite3.connect(
278
+ f"file:{self._source_db_path}?mode=ro",
279
+ uri=True,
280
+ timeout=1.0,
281
+ )
282
+ try:
283
+ # SEC-M5 — bounded LIMIT + explicit busy_timeout on the
284
+ # source connection so a locked memory.db fails fast rather
285
+ # than blocking the entire timeout.
286
+ src.execute(f"PRAGMA busy_timeout = {_BUSY_TIMEOUT_MS}")
287
+ # S9-W3 H-PERF-05: previously ``fetchall()`` materialised the
288
+ # entire 5M-row JOIN as one list in Python, regardless of the
289
+ # ``ram_reservation`` block. On a pathological input this is
290
+ # ~1.5 GB peak RAM — the "SLM_TRIGRAM_BOOTSTRAP_RAM_MB"
291
+ # override looked tuneable but was ornamental. We now iterate
292
+ # the cursor row-by-row (SQLite streams from the prepared
293
+ # statement), so peak Python RAM scales with the bucket
294
+ # dict (bounded by ``MAX_TRIGRAMS``) not the row count.
295
+ cursor = src.execute(
296
+ "SELECT ce.entity_id, ce.canonical_name, "
297
+ " COALESCE(ea.alias, '') AS alias "
298
+ "FROM canonical_entities ce "
299
+ "LEFT JOIN entity_aliases ea USING (entity_id) "
300
+ "WHERE ce.profile_id = ? "
301
+ "LIMIT ?",
302
+ (_ACTIVE_PROFILE, self._MAX_REBUILD_ROWS),
303
+ )
304
+ rows = cursor # streamed iteration
305
+ row_iter = iter(rows)
306
+ # Fall through to the bucket loop — ``cursor`` is consumed
307
+ # lazily so we can still close(src) in ``finally``.
308
+ except sqlite3.Error:
309
+ src.close()
310
+ raise
311
+ # Consume the cursor lazily; ``src`` stays open through the
312
+ # buckets loop because sqlite3 cursors hold a reference to it.
313
+ try:
314
+ for entity_id, canonical_name, alias in row_iter:
315
+ for name in (canonical_name, alias):
316
+ if not name:
317
+ continue
318
+ for tri in _trigrams_for(str(name)):
319
+ buckets.setdefault(tri, {}).setdefault(entity_id, 0.0)
320
+ buckets[tri][entity_id] += 1.0
321
+ finally:
322
+ src.close()
323
+
324
+ # S9-defer H-P-05: stream the flat-list construction through a
325
+ # bounded min-heap of size ``MAX_TRIGRAMS`` instead of
326
+ # materialising the full list and sort-truncating. For a
327
+ # bucket count far above the cap this saves O(N_extra) Python
328
+ # memory AND trades an O(N log N) full-sort for an O(N log K)
329
+ # heap-push pass.
330
+ import heapq
331
+ _cap = int(self.MAX_TRIGRAMS)
332
+ _heap: list[tuple[float, str, str]] = []
333
+ for tri, d in buckets.items():
334
+ for eid, w in d.items():
335
+ # heapq is a min-heap so pushing (w, ...) keeps the
336
+ # LOWEST-weight row at the root; we evict it whenever a
337
+ # higher-weight row arrives. Net effect: the heap holds
338
+ # the top-``_cap`` rows by weight at any given time.
339
+ if len(_heap) < _cap:
340
+ heapq.heappush(_heap, (float(w), tri, eid))
341
+ else:
342
+ heapq.heappushpop(_heap, (float(w), tri, eid))
343
+ flat: list[tuple[str, str, float]] = [
344
+ (tri, eid, w) for (w, tri, eid) in _heap
345
+ ]
346
+ # ``buckets`` is no longer needed; release its memory before
347
+ # opening the writer connection.
348
+ buckets = {}
349
+
350
+ # Write to cache DB via atomic shadow-table swap.
351
+ self.CACHE_DB_PATH.parent.mkdir(parents=True, exist_ok=True)
352
+
353
+ # S9-W2 C5: close the shared reader connection BEFORE opening the
354
+ # writer. Previously `_reset_cache_conn()` ran AFTER the swap,
355
+ # which meant concurrent `lookup()` calls (same process, other
356
+ # threads) held the old conn through the DROP TABLE window and
357
+ # saw partial/empty rowsets or SQLITE_BUSY retries. Closing up
358
+ # front forces every subsequent lookup to wait for the writer's
359
+ # ALTER TABLE (serialised by SQLite's own locking) and then open
360
+ # a fresh conn against the post-swap schema.
361
+ _reset_cache_conn()
362
+
363
+ conn = sqlite3.connect(str(self.CACHE_DB_PATH), timeout=2.0)
364
+ try:
365
+ conn.execute(f"PRAGMA busy_timeout = {_BUSY_TIMEOUT_MS}")
366
+ conn.executescript(
367
+ """
368
+ CREATE TABLE IF NOT EXISTS entity_trigrams (
369
+ trigram TEXT NOT NULL,
370
+ entity_id TEXT NOT NULL,
371
+ weight REAL NOT NULL DEFAULT 1.0,
372
+ PRIMARY KEY (trigram, entity_id)
373
+ ) WITHOUT ROWID;
374
+ CREATE INDEX IF NOT EXISTS idx_trigram_lookup
375
+ ON entity_trigrams (trigram);
376
+ CREATE TABLE IF NOT EXISTS entity_trigrams_meta (
377
+ key TEXT PRIMARY KEY,
378
+ value TEXT NOT NULL
379
+ );
380
+ DROP TABLE IF EXISTS entity_trigrams_shadow;
381
+ CREATE TABLE entity_trigrams_shadow (
382
+ trigram TEXT NOT NULL,
383
+ entity_id TEXT NOT NULL,
384
+ weight REAL NOT NULL DEFAULT 1.0,
385
+ PRIMARY KEY (trigram, entity_id)
386
+ ) WITHOUT ROWID;
387
+ """
388
+ )
389
+ with conn:
390
+ conn.executemany(
391
+ "INSERT INTO entity_trigrams_shadow (trigram, entity_id, weight) "
392
+ "VALUES (?, ?, ?)",
393
+ flat,
394
+ )
395
+ conn.execute("DROP TABLE entity_trigrams")
396
+ conn.execute(
397
+ "ALTER TABLE entity_trigrams_shadow "
398
+ "RENAME TO entity_trigrams"
399
+ )
400
+ conn.execute(
401
+ "CREATE INDEX IF NOT EXISTS idx_trigram_lookup "
402
+ "ON entity_trigrams (trigram)"
403
+ )
404
+ conn.execute(
405
+ "INSERT OR REPLACE INTO entity_trigrams_meta (key, value) "
406
+ "VALUES (?, ?)",
407
+ ("entity_count", str(len(flat))),
408
+ )
409
+ finally:
410
+ conn.close()
411
+
412
+ # Bust the per-instance LRU — stale entries would point at now-
413
+ # dropped rows. The module-level cached conn was already dropped
414
+ # BEFORE the writer ran (see C5 fix above); nothing to do here.
415
+ self._cached_lookup_key.cache_clear()
416
+
417
+ # ----------------------------------------------------------------------
418
+ # lookup() — hot path
419
+ # ----------------------------------------------------------------------
420
+
421
+ def lookup(self, text: str) -> list[tuple[str, int]]:
422
+ """Return up to ``LOOKUP_LIMIT`` ``(entity_id, hits)`` matches,
423
+ ordered by hit count DESC, weight DESC.
424
+
425
+ Returns ``[]`` on any failure (missing table, locked DB, empty
426
+ trigram set). Target p99 < 2 ms.
427
+ """
428
+ if not text:
429
+ return []
430
+ trigrams = _trigrams_for(text)
431
+ if not trigrams:
432
+ return []
433
+ if len(trigrams) > _MAX_IN_CLAUSE:
434
+ # L-P-02: alphabetical ``sorted(trigrams)[:256]`` threw away
435
+ # the discriminative tail of the signature. Switch to a
436
+ # rarity-weighted selection that prefers trigrams with at
437
+ # least one digit or non-common prefix — those are IDF-rich
438
+ # relative to plain ASCII letter trigrams. The selection is
439
+ # still deterministic (stable secondary sort on the trigram
440
+ # itself) so the LRU key remains repeatable across identical
441
+ # prompts.
442
+ ranked = sorted(trigrams, key=lambda t: (_trigram_rarity_key(t), t))
443
+ trigrams = set(ranked[:_MAX_IN_CLAUSE])
444
+ key = frozenset(trigrams)
445
+ try:
446
+ return list(self._cached_lookup_key(key))
447
+ except Exception:
448
+ # Any failure: self-heal cache + fall back to a direct query.
449
+ try:
450
+ self._cached_lookup_key.cache_clear()
451
+ except Exception:
452
+ pass
453
+ try:
454
+ return list(self._lookup_raw(key))
455
+ except Exception:
456
+ return []
457
+
458
+ def _lookup_raw(self, trigrams: frozenset[str]) -> tuple[tuple[str, int], ...]:
459
+ """SQLite-backed lookup. Returns a tuple (hashable for LRU)."""
460
+ if not trigrams:
461
+ return ()
462
+ if not self.CACHE_DB_PATH.exists():
463
+ return ()
464
+
465
+ params = tuple(trigrams)
466
+ placeholders = ",".join("?" * len(params))
467
+ sql = (
468
+ "SELECT entity_id, COUNT(*) AS hits, SUM(weight) AS score "
469
+ "FROM entity_trigrams "
470
+ f"WHERE trigram IN ({placeholders}) "
471
+ "GROUP BY entity_id "
472
+ "HAVING hits >= ? "
473
+ "ORDER BY hits DESC, score DESC "
474
+ "LIMIT ?"
475
+ )
476
+ bound = params + (self.LOOKUP_MIN_HITS, self.LOOKUP_LIMIT)
477
+
478
+ # H-12/H-P-06: use the module-cached connection; fall back to a
479
+ # fresh connect only when the cache is empty (first-lookup-in-
480
+ # process or post-rebuild). ``_CACHE_CONN_LOCK`` serialises
481
+ # access because ``check_same_thread=False`` lets worker threads
482
+ # share the conn with the hot path.
483
+ conn = _get_cache_conn()
484
+ if conn is not None:
485
+ try:
486
+ with _CACHE_CONN_LOCK:
487
+ rows = conn.execute(sql, bound).fetchall()
488
+ except sqlite3.OperationalError as exc:
489
+ # S9-W2 H-PERF-04: only evict the cached conn when the
490
+ # error signals a SCHEMA change (table dropped/rebuilt).
491
+ # A transient SQLITE_BUSY does NOT require re-connecting
492
+ # — that triggered an eviction storm on concurrent slm
493
+ # doctor runs and blew the <2 ms p99 budget on 10-30% of
494
+ # lookups. We let busy errors fall through to the
495
+ # one-shot fresh-connect fallback without touching the
496
+ # shared cache.
497
+ msg = str(exc).lower()
498
+ if "schema" in msg or "no such table" in msg:
499
+ _reset_cache_conn()
500
+ conn = None
501
+ if conn is None:
502
+ try:
503
+ fresh = sqlite3.connect(
504
+ str(self.CACHE_DB_PATH),
505
+ timeout=0.05, # 50 ms connection timeout
506
+ isolation_level=None,
507
+ )
508
+ except sqlite3.OperationalError:
509
+ return ()
510
+ try:
511
+ fresh.execute(f"PRAGMA busy_timeout = {_BUSY_TIMEOUT_MS}")
512
+ try:
513
+ rows = fresh.execute(sql, bound).fetchall()
514
+ except sqlite3.OperationalError:
515
+ return ()
516
+ finally:
517
+ fresh.close()
518
+ return tuple((eid, int(hits)) for (eid, hits, _score) in rows)
519
+
520
+
521
+ # --------------------------------------------------------------------------
522
+ # Singleton accessor used by the hook
523
+ # --------------------------------------------------------------------------
524
+
525
+
526
+ _SINGLETON: Optional[TrigramIndex] = None
527
+
528
+
529
+ def get_or_none() -> Optional[TrigramIndex]:
530
+ """Return a process-local ``TrigramIndex`` if the cache DB exists,
531
+ else ``None`` so the hook can fall back to the regex-only signature.
532
+
533
+ Test fixtures monkeypatch this module-level function directly.
534
+ """
535
+ global _SINGLETON
536
+ if _SINGLETON is not None:
537
+ return _SINGLETON
538
+ if not TrigramIndex.CACHE_DB_PATH.exists():
539
+ return None
540
+ default_source = Path.home() / ".superlocalmemory" / "memory.db"
541
+ if not default_source.exists():
542
+ return None
543
+ _SINGLETON = TrigramIndex(source_db_path=default_source)
544
+ return _SINGLETON
545
+
546
+
547
+ __all__ = ("TrigramIndex", "get_or_none")
@@ -55,11 +55,11 @@ def reset_engine():
55
55
 
56
56
  # Register tools and resources -------------------------------------------------
57
57
  #
58
- # V3.3.19: Trimmed from 38 tools to 15 essential tools.
59
- # IDEs cap at 50-100 tools total (Cursor, Antigravity, Windsurf).
60
- # 38 tools from SLM alone crowds out other MCP servers.
61
- #
62
- # Essential 15: the tools an AI agent actually needs during a session.
58
+ # Essential-only default: 25 base tools + 8 mesh tools = 33 registered
59
+ # when mesh is enabled. Set ``SLM_MCP_ALL_TOOLS=1`` to expose the full
60
+ # toolset. Rationale: IDEs cap at 50-100 tools total (Cursor,
61
+ # Antigravity, Windsurf) and a maximal SLM registration crowds out
62
+ # other MCP servers the user may have installed.
63
63
  # Admin/diagnostics tools remain available via CLI (`slm <command>`).
64
64
  # Set SLM_MCP_ALL_TOOLS=1 to enable all 38 tools (power users).
65
65