superlocalmemory 3.4.18 → 3.4.21
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +35 -0
- package/README.md +42 -34
- package/bin/slm +11 -0
- package/bin/slm.bat +12 -0
- package/package.json +4 -3
- package/pyproject.toml +3 -2
- package/scripts/build-slm-hook.ps1 +40 -0
- package/scripts/build-slm-hook.sh +45 -0
- package/scripts/build_entry.py +452 -0
- package/scripts/ci/stage5b_gate.sh +50 -0
- package/scripts/postinstall/validation.js +187 -0
- package/scripts/postinstall-interactive.js +756 -0
- package/scripts/postinstall_binary.js +287 -0
- package/scripts/release_manifest.py +273 -0
- package/scripts/slm-hook.spec +56 -0
- package/skills/slm-build-graph/SKILL.md +423 -0
- package/skills/slm-list-recent/SKILL.md +348 -0
- package/skills/slm-recall/SKILL.md +343 -0
- package/skills/slm-remember/SKILL.md +194 -0
- package/skills/slm-show-patterns/SKILL.md +224 -0
- package/skills/slm-status/SKILL.md +363 -0
- package/skills/slm-switch-profile/SKILL.md +442 -0
- package/src/superlocalmemory/cli/commands.py +219 -79
- package/src/superlocalmemory/cli/context_commands.py +192 -0
- package/src/superlocalmemory/cli/daemon.py +15 -1
- package/src/superlocalmemory/cli/db_migrate.py +80 -0
- package/src/superlocalmemory/cli/escape_hatch.py +220 -0
- package/src/superlocalmemory/cli/main.py +72 -1
- package/src/superlocalmemory/core/context_cache.py +397 -0
- package/src/superlocalmemory/core/embeddings.py +8 -2
- package/src/superlocalmemory/core/engine.py +38 -2
- package/src/superlocalmemory/core/engine_wiring.py +1 -1
- package/src/superlocalmemory/core/ram_lock.py +111 -0
- package/src/superlocalmemory/core/recall_pipeline.py +433 -3
- package/src/superlocalmemory/core/recall_worker.py +8 -3
- package/src/superlocalmemory/core/security_primitives.py +635 -0
- package/src/superlocalmemory/core/shadow_router.py +319 -0
- package/src/superlocalmemory/core/slm_disabled.py +87 -0
- package/src/superlocalmemory/core/slmignore.py +125 -0
- package/src/superlocalmemory/core/topic_signature.py +143 -0
- package/src/superlocalmemory/core/worker_pool.py +14 -3
- package/src/superlocalmemory/encoding/cognitive_consolidator.py +2 -2
- package/src/superlocalmemory/evolution/budget.py +321 -0
- package/src/superlocalmemory/evolution/llm_dispatch.py +508 -0
- package/src/superlocalmemory/evolution/skill_evolver.py +144 -94
- package/src/superlocalmemory/hooks/_outcome_common.py +506 -0
- package/src/superlocalmemory/hooks/adapter_base.py +317 -0
- package/src/superlocalmemory/hooks/antigravity_adapter.py +192 -0
- package/src/superlocalmemory/hooks/claude_code_hooks.py +33 -1
- package/src/superlocalmemory/hooks/context_payload.py +312 -0
- package/src/superlocalmemory/hooks/copilot_adapter.py +154 -0
- package/src/superlocalmemory/hooks/cross_platform_connector.py +90 -0
- package/src/superlocalmemory/hooks/cursor_adapter.py +195 -0
- package/src/superlocalmemory/hooks/hook_handlers.py +109 -8
- package/src/superlocalmemory/hooks/ide_connector.py +25 -2
- package/src/superlocalmemory/hooks/post_tool_async_hook.py +165 -0
- package/src/superlocalmemory/hooks/post_tool_outcome_hook.py +223 -0
- package/src/superlocalmemory/hooks/prewarm_auth.py +170 -0
- package/src/superlocalmemory/hooks/session_registry.py +186 -0
- package/src/superlocalmemory/hooks/stop_outcome_hook.py +134 -0
- package/src/superlocalmemory/hooks/sync_loop.py +114 -0
- package/src/superlocalmemory/hooks/user_prompt_hook.py +128 -0
- package/src/superlocalmemory/hooks/user_prompt_rehash_hook.py +202 -0
- package/src/superlocalmemory/infra/backup.py +3 -3
- package/src/superlocalmemory/infra/cloud_backup.py +2 -2
- package/src/superlocalmemory/infra/event_bus.py +2 -2
- package/src/superlocalmemory/infra/webhook_dispatcher.py +3 -3
- package/src/superlocalmemory/learning/arm_catalog.py +99 -0
- package/src/superlocalmemory/learning/bandit.py +526 -0
- package/src/superlocalmemory/learning/bandit_cache.py +133 -0
- package/src/superlocalmemory/learning/behavioral.py +53 -1
- package/src/superlocalmemory/learning/consolidation_cycle.py +381 -0
- package/src/superlocalmemory/learning/consolidation_worker.py +188 -520
- package/src/superlocalmemory/learning/database.py +256 -0
- package/src/superlocalmemory/learning/dedup_hnsw.py +413 -0
- package/src/superlocalmemory/learning/ensemble.py +300 -0
- package/src/superlocalmemory/learning/fact_outcome_joins.py +207 -0
- package/src/superlocalmemory/learning/forgetting_scheduler.py +55 -0
- package/src/superlocalmemory/learning/hnsw_dedup.py +69 -0
- package/src/superlocalmemory/learning/labeler.py +87 -0
- package/src/superlocalmemory/learning/legacy_migration.py +277 -0
- package/src/superlocalmemory/learning/memory_merge.py +160 -0
- package/src/superlocalmemory/learning/model_cache.py +269 -0
- package/src/superlocalmemory/learning/model_rollback.py +278 -0
- package/src/superlocalmemory/learning/outcome_queue.py +284 -0
- package/src/superlocalmemory/learning/pattern_miner.py +415 -0
- package/src/superlocalmemory/learning/pattern_miner_constants.py +47 -0
- package/src/superlocalmemory/learning/ranker.py +225 -81
- package/src/superlocalmemory/learning/ranker_common.py +163 -0
- package/src/superlocalmemory/learning/ranker_retrain_legacy.py +202 -0
- package/src/superlocalmemory/learning/ranker_retrain_online.py +411 -0
- package/src/superlocalmemory/learning/reward.py +777 -0
- package/src/superlocalmemory/learning/reward_archive.py +210 -0
- package/src/superlocalmemory/learning/reward_boost.py +201 -0
- package/src/superlocalmemory/learning/reward_proxy.py +326 -0
- package/src/superlocalmemory/learning/shadow_test.py +524 -0
- package/src/superlocalmemory/learning/signal_worker.py +270 -0
- package/src/superlocalmemory/learning/signals.py +314 -0
- package/src/superlocalmemory/learning/trigram_index.py +547 -0
- package/src/superlocalmemory/mcp/server.py +5 -5
- package/src/superlocalmemory/mcp/tools_context.py +183 -0
- package/src/superlocalmemory/mcp/tools_core.py +92 -27
- package/src/superlocalmemory/parameterization/soft_prompt_generator.py +13 -0
- package/src/superlocalmemory/retrieval/engine.py +52 -0
- package/src/superlocalmemory/retrieval/reranker.py +4 -2
- package/src/superlocalmemory/server/api.py +2 -2
- package/src/superlocalmemory/server/bandit_loops.py +140 -0
- package/src/superlocalmemory/server/middleware/__init__.py +11 -0
- package/src/superlocalmemory/server/middleware/security_headers.py +144 -0
- package/src/superlocalmemory/server/routes/backup.py +36 -13
- package/src/superlocalmemory/server/routes/behavioral.py +50 -19
- package/src/superlocalmemory/server/routes/brain.py +1234 -0
- package/src/superlocalmemory/server/routes/data_io.py +4 -4
- package/src/superlocalmemory/server/routes/events.py +2 -2
- package/src/superlocalmemory/server/routes/helpers.py +1 -1
- package/src/superlocalmemory/server/routes/learning.py +192 -7
- package/src/superlocalmemory/server/routes/memories.py +189 -1
- package/src/superlocalmemory/server/routes/prewarm.py +171 -0
- package/src/superlocalmemory/server/routes/profiles.py +3 -3
- package/src/superlocalmemory/server/routes/token.py +88 -0
- package/src/superlocalmemory/server/routes/ws.py +5 -5
- package/src/superlocalmemory/server/security_middleware.py +13 -7
- package/src/superlocalmemory/server/ui.py +2 -2
- package/src/superlocalmemory/server/unified_daemon.py +335 -3
- package/src/superlocalmemory/storage/migration_runner.py +545 -0
- package/src/superlocalmemory/storage/migrations/M001_add_signal_features_columns.py +67 -0
- package/src/superlocalmemory/storage/migrations/M002_model_state_history.py +132 -0
- package/src/superlocalmemory/storage/migrations/M003_migration_log.py +38 -0
- package/src/superlocalmemory/storage/migrations/M004_cross_platform_sync_log.py +46 -0
- package/src/superlocalmemory/storage/migrations/M005_bandit_tables.py +75 -0
- package/src/superlocalmemory/storage/migrations/M006_action_outcomes_reward.py +75 -0
- package/src/superlocalmemory/storage/migrations/M007_pending_outcomes.py +63 -0
- package/src/superlocalmemory/storage/migrations/M009_model_lineage.py +54 -0
- package/src/superlocalmemory/storage/migrations/M010_evolution_config.py +75 -0
- package/src/superlocalmemory/storage/migrations/M011_archive_and_merge.py +87 -0
- package/src/superlocalmemory/storage/migrations/M012_shadow_observations.py +72 -0
- package/src/superlocalmemory/storage/migrations/M013_bi_temporal_columns.py +55 -0
- package/src/superlocalmemory/storage/migrations/__init__.py +81 -0
- package/src/superlocalmemory/storage/models.py +4 -0
- package/src/superlocalmemory/ui/css/brain.css +409 -0
- package/src/superlocalmemory/ui/css/legacy-dashboard.css +645 -0
- package/src/superlocalmemory/ui/index.html +459 -1345
- package/src/superlocalmemory/ui/js/brain.js +1321 -0
- package/src/superlocalmemory/ui/js/clusters.js +123 -4
- package/src/superlocalmemory/ui/js/init.js +48 -39
- package/src/superlocalmemory/ui/js/memories.js +88 -2
- package/src/superlocalmemory/ui/js/modal.js +71 -1
- package/src/superlocalmemory/ui/js/ng-shell.js +101 -88
- package/src/superlocalmemory/ui/js/trust-dashboard.js +168 -25
- package/src/superlocalmemory/ui/vendor/bootstrap-icons/bootstrap-icons.css +2018 -0
- package/src/superlocalmemory/ui/vendor/bootstrap-icons/fonts/bootstrap-icons.woff +0 -0
- package/src/superlocalmemory/ui/vendor/bootstrap-icons/fonts/bootstrap-icons.woff2 +0 -0
- package/src/superlocalmemory/ui/vendor/bootstrap.bundle.min.js +7 -0
- package/src/superlocalmemory/ui/vendor/bootstrap.min.css +6 -0
- package/src/superlocalmemory/ui/vendor/d3.v7.min.js +2 -0
- package/src/superlocalmemory/ui/vendor/graphology-library.min.js +2 -0
- package/src/superlocalmemory/ui/vendor/graphology.umd.min.js +2 -0
- package/src/superlocalmemory/ui/vendor/inter-ui/inter-variable.min.css +8 -0
- package/src/superlocalmemory/ui/vendor/inter-ui/variable/InterVariable-Italic.woff2 +0 -0
- package/src/superlocalmemory/ui/vendor/inter-ui/variable/InterVariable.woff2 +0 -0
- package/src/superlocalmemory/ui/vendor/sigma.min.js +1 -0
- package/src/superlocalmemory/ui/js/behavioral.js +0 -447
- package/src/superlocalmemory/ui/js/graph-core.js +0 -447
- package/src/superlocalmemory/ui/js/graph-interactions.js +0 -351
- package/src/superlocalmemory/ui/js/learning.js +0 -435
- package/src/superlocalmemory/ui/js/patterns.js +0 -93
- package/src/superlocalmemory.egg-info/PKG-INFO +0 -647
- package/src/superlocalmemory.egg-info/SOURCES.txt +0 -335
- package/src/superlocalmemory.egg-info/dependency_links.txt +0 -1
- package/src/superlocalmemory.egg-info/entry_points.txt +0 -2
- package/src/superlocalmemory.egg-info/requires.txt +0 -58
- package/src/superlocalmemory.egg-info/top_level.txt +0 -1
|
@@ -0,0 +1,319 @@
|
|
|
1
|
+
# Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
|
|
2
|
+
# Licensed under AGPL-3.0-or-later - see LICENSE file
|
|
3
|
+
# Part of SuperLocalMemory v3.4.21 — Stage 8 SB-1 / Track A.3 (LLD-10)
|
|
4
|
+
|
|
5
|
+
"""Daemon-resident recall-time A/B router for LLD-10 shadow + rollback.
|
|
6
|
+
|
|
7
|
+
This module is the single seam that wires ``ShadowTest`` (pre-promotion)
|
|
8
|
+
and ``ModelRollback`` (post-promotion) into the live recall path. Before
|
|
9
|
+
this module existed (pre-v3.4.21 Stage 8 SB-1) both classes were
|
|
10
|
+
defined + unit-tested but had zero production callers in ``src/``.
|
|
11
|
+
|
|
12
|
+
Design constraints:
|
|
13
|
+
|
|
14
|
+
* Process-local state only — one ``ShadowRouter`` instance per
|
|
15
|
+
``(memory_db, learning_db, profile_id)`` tuple, held by the
|
|
16
|
+
daemon's in-process singleton cache.
|
|
17
|
+
* Deterministic A/B routing — ``route_query(qid)`` uses
|
|
18
|
+
``sha256(install_token + qid)`` so an attacker who controls
|
|
19
|
+
``qid`` still cannot bias the split without reading the install
|
|
20
|
+
token on disk (closes skeptic H-02 + H-03).
|
|
21
|
+
* Promotion and rollback are DELEGATED to the canonical helpers in
|
|
22
|
+
``learning/consolidation_worker`` and ``learning/model_rollback``.
|
|
23
|
+
The router never writes to ``learning_model_state`` directly.
|
|
24
|
+
* Fail-soft: any exception during recall-time ingestion is logged
|
|
25
|
+
and swallowed — we must not break the user's recall path.
|
|
26
|
+
|
|
27
|
+
References:
|
|
28
|
+
- LLD-00 §8 — two-phase shadow + auto-rollback.
|
|
29
|
+
- LLD-10 §4.1 — deterministic A/B routing.
|
|
30
|
+
- LLD-10 §5 — atomic BEGIN IMMEDIATE promotion + rollback.
|
|
31
|
+
- Stage 8 SB-1 — architect S8-ARC-C1, skeptic C-01/C-02/H-07.
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
from __future__ import annotations
|
|
35
|
+
|
|
36
|
+
import hashlib
|
|
37
|
+
import logging
|
|
38
|
+
import threading
|
|
39
|
+
from typing import Final
|
|
40
|
+
|
|
41
|
+
from superlocalmemory.core.security_primitives import ensure_install_token
|
|
42
|
+
from superlocalmemory.learning.model_rollback import ModelRollback
|
|
43
|
+
from superlocalmemory.learning.shadow_test import ShadowTest
|
|
44
|
+
|
|
45
|
+
logger = logging.getLogger(__name__)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
# ---------------------------------------------------------------------------
|
|
49
|
+
# Arm vocabulary — externalised so hooks can grep-check.
|
|
50
|
+
# ---------------------------------------------------------------------------
|
|
51
|
+
|
|
52
|
+
ARM_BASELINE: Final[str] = "baseline"
|
|
53
|
+
ARM_CANDIDATE: Final[str] = "candidate"
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
class ShadowRouter:
|
|
57
|
+
"""Per-profile A/B router + post-promotion watcher.
|
|
58
|
+
|
|
59
|
+
Not thread-safe for write state (`_shadow`, `_rollback`) — the daemon
|
|
60
|
+
calls ``on_recall_settled`` from a single recall-settled worker queue.
|
|
61
|
+
Reads (``route_query``) are safe under concurrency.
|
|
62
|
+
"""
|
|
63
|
+
|
|
64
|
+
def __init__(
|
|
65
|
+
self,
|
|
66
|
+
*,
|
|
67
|
+
memory_db: str,
|
|
68
|
+
learning_db: str,
|
|
69
|
+
profile_id: str,
|
|
70
|
+
) -> None:
|
|
71
|
+
self._memory_db = str(memory_db)
|
|
72
|
+
self._learning_db = str(learning_db)
|
|
73
|
+
self._profile_id = str(profile_id)
|
|
74
|
+
# Shadow pre-promotion accumulator (created lazily when a
|
|
75
|
+
# candidate is persisted).
|
|
76
|
+
self._shadow: ShadowTest | None = None
|
|
77
|
+
self._candidate_id: int | None = None
|
|
78
|
+
# S9-SKEP-11: pin the routing token at router creation.
|
|
79
|
+
# ``install_token`` rotation is a documented user action (SEC-M07
|
|
80
|
+
# ``slm escape-hatch rotate-token``) but must not flip the arm
|
|
81
|
+
# assignment of every in-flight query_id — that would silently
|
|
82
|
+
# mix baseline and candidate observations for the same qid and
|
|
83
|
+
# break the paired-t assumption. We snapshot the token once at
|
|
84
|
+
# init; subsequent rotations are picked up only when a new
|
|
85
|
+
# router is created (i.e. at daemon restart / candidate rotate).
|
|
86
|
+
try:
|
|
87
|
+
self._routing_token = ensure_install_token()
|
|
88
|
+
except Exception: # pragma: no cover — defensive
|
|
89
|
+
self._routing_token = ""
|
|
90
|
+
# Rollback post-promotion watcher (created when arm_post_promotion
|
|
91
|
+
# is called after a promote).
|
|
92
|
+
self._rollback: ModelRollback | None = None
|
|
93
|
+
# S9-W4 H-ARC-01: re-attach an existing candidate across daemon
|
|
94
|
+
# restart. The prior implementation lost the candidate_id on
|
|
95
|
+
# process exit — a 6-hour shadow test that had persisted a
|
|
96
|
+
# candidate an hour in would restart the observation window
|
|
97
|
+
# from zero on the next boot, extending the test indefinitely.
|
|
98
|
+
# Observations themselves do NOT survive restart (they live
|
|
99
|
+
# only in ``ShadowTest._active/_candidate`` lists and would
|
|
100
|
+
# require a new schema to persist durably); re-attaching the
|
|
101
|
+
# candidate_id is the minimum that keeps the A/B loop from
|
|
102
|
+
# silently stalling.
|
|
103
|
+
self._reattach_existing_candidate()
|
|
104
|
+
|
|
105
|
+
def _reattach_existing_candidate(self) -> None:
|
|
106
|
+
"""H-ARC-01: look up any ``is_candidate=1`` row for this profile
|
|
107
|
+
and restore ``self._candidate_id`` + a fresh ``ShadowTest``.
|
|
108
|
+
|
|
109
|
+
Fail-soft — on any DB error we leave the router empty and the
|
|
110
|
+
normal lazy-init path takes over as candidates land.
|
|
111
|
+
"""
|
|
112
|
+
try:
|
|
113
|
+
import sqlite3 as _sq
|
|
114
|
+
conn = _sq.connect(self._learning_db, timeout=2.0)
|
|
115
|
+
try:
|
|
116
|
+
row = conn.execute(
|
|
117
|
+
"SELECT id FROM learning_model_state "
|
|
118
|
+
"WHERE profile_id = ? AND is_candidate = 1 "
|
|
119
|
+
"LIMIT 1",
|
|
120
|
+
(self._profile_id,),
|
|
121
|
+
).fetchone()
|
|
122
|
+
finally:
|
|
123
|
+
conn.close()
|
|
124
|
+
except Exception:
|
|
125
|
+
return # learning.db or table missing — normal cold start.
|
|
126
|
+
if row and row[0] is not None:
|
|
127
|
+
cid = int(row[0])
|
|
128
|
+
self._candidate_id = cid
|
|
129
|
+
# Pass learning_db so ShadowTest reloads persisted paired
|
|
130
|
+
# observations from ``shadow_observations`` (M012).
|
|
131
|
+
self._shadow = ShadowTest(
|
|
132
|
+
profile_id=self._profile_id,
|
|
133
|
+
candidate_model_id=str(cid),
|
|
134
|
+
learning_db=self._learning_db,
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
# ------------------------------------------------------------------
|
|
138
|
+
# Routing
|
|
139
|
+
# ------------------------------------------------------------------
|
|
140
|
+
|
|
141
|
+
def route_query(self, query_id: str) -> str:
|
|
142
|
+
"""Return the arm for ``query_id``: ``'baseline'`` or ``'candidate'``.
|
|
143
|
+
|
|
144
|
+
Deterministic per ``(install_token, query_id)`` — survives
|
|
145
|
+
daemon restart. Install-token dependence closes the
|
|
146
|
+
attacker-picks-query-id bias vector (skeptic H-02).
|
|
147
|
+
|
|
148
|
+
S9-SKEP-11: uses ``self._routing_token`` (pinned at router
|
|
149
|
+
init) instead of a fresh ``ensure_install_token()`` per call,
|
|
150
|
+
so mid-test token rotation does not silently flip the arm of
|
|
151
|
+
every subsequent qid and mix the paired-t arms.
|
|
152
|
+
"""
|
|
153
|
+
digest = hashlib.sha256(
|
|
154
|
+
(self._routing_token + str(query_id)).encode("utf-8"),
|
|
155
|
+
).hexdigest()[:8]
|
|
156
|
+
return ARM_CANDIDATE if int(digest, 16) % 2 == 1 else ARM_BASELINE
|
|
157
|
+
|
|
158
|
+
# ------------------------------------------------------------------
|
|
159
|
+
# Pre-promotion accumulator
|
|
160
|
+
# ------------------------------------------------------------------
|
|
161
|
+
|
|
162
|
+
def attach_candidate(self, candidate_id: int) -> None:
|
|
163
|
+
"""Called after ``_persist_candidate`` writes a fresh candidate row.
|
|
164
|
+
Creates a new ``ShadowTest`` to collect paired recall results.
|
|
165
|
+
"""
|
|
166
|
+
self._candidate_id = int(candidate_id)
|
|
167
|
+
# Pass learning_db so new paired observations persist to
|
|
168
|
+
# ``shadow_observations`` (M012) for restart durability.
|
|
169
|
+
self._shadow = ShadowTest(
|
|
170
|
+
profile_id=self._profile_id,
|
|
171
|
+
candidate_model_id=str(candidate_id),
|
|
172
|
+
learning_db=self._learning_db,
|
|
173
|
+
)
|
|
174
|
+
|
|
175
|
+
# ------------------------------------------------------------------
|
|
176
|
+
# Post-promotion watcher
|
|
177
|
+
# ------------------------------------------------------------------
|
|
178
|
+
|
|
179
|
+
def arm_post_promotion_watch(self, *, baseline_ndcg: float) -> None:
|
|
180
|
+
"""Install a fresh ``ModelRollback`` observer for the 200-recall
|
|
181
|
+
post-promotion watch window."""
|
|
182
|
+
self._rollback = ModelRollback(
|
|
183
|
+
learning_db_path=self._learning_db,
|
|
184
|
+
profile_id=self._profile_id,
|
|
185
|
+
baseline_ndcg=float(baseline_ndcg),
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
# ------------------------------------------------------------------
|
|
189
|
+
# Recall-settled ingestion
|
|
190
|
+
# ------------------------------------------------------------------
|
|
191
|
+
|
|
192
|
+
def on_recall_settled(
|
|
193
|
+
self,
|
|
194
|
+
*,
|
|
195
|
+
query_id: str,
|
|
196
|
+
arm: str,
|
|
197
|
+
ndcg_at_10: float,
|
|
198
|
+
) -> None:
|
|
199
|
+
"""Feed one settled recall into whichever phase is active.
|
|
200
|
+
|
|
201
|
+
Precedence:
|
|
202
|
+
1. If a ShadowTest is active → record paired observation; on
|
|
203
|
+
``decide() == 'promote'`` fire the atomic promotion.
|
|
204
|
+
2. If a ModelRollback watch is active → record observation;
|
|
205
|
+
on ``should_rollback() is True`` fire execute_rollback.
|
|
206
|
+
|
|
207
|
+
Both phases may run for the same profile briefly if a promote
|
|
208
|
+
fires mid-batch; that is intentional — the first watch window
|
|
209
|
+
starts immediately on promotion.
|
|
210
|
+
"""
|
|
211
|
+
try:
|
|
212
|
+
if self._shadow is not None and self._candidate_id is not None:
|
|
213
|
+
# ShadowTest expects arm='active'|'candidate'. Our router
|
|
214
|
+
# uses 'baseline' as the externally-visible arm name.
|
|
215
|
+
st_arm = "active" if arm == ARM_BASELINE else "candidate"
|
|
216
|
+
self._shadow.record_recall_pair(
|
|
217
|
+
query_id=str(query_id), arm=st_arm,
|
|
218
|
+
ndcg_at_10=float(ndcg_at_10),
|
|
219
|
+
)
|
|
220
|
+
decision, _stats = self._shadow.decide()
|
|
221
|
+
if decision == "promote":
|
|
222
|
+
self._fire_promotion()
|
|
223
|
+
elif decision == "reject":
|
|
224
|
+
# Release the candidate reservation — a future
|
|
225
|
+
# _run_shadow_cycle may insert a new candidate.
|
|
226
|
+
self._shadow = None
|
|
227
|
+
self._candidate_id = None
|
|
228
|
+
|
|
229
|
+
if self._rollback is not None:
|
|
230
|
+
self._rollback.record_post_promotion(
|
|
231
|
+
query_id=str(query_id), ndcg_at_10=float(ndcg_at_10),
|
|
232
|
+
)
|
|
233
|
+
if self._rollback.should_rollback():
|
|
234
|
+
self._fire_rollback()
|
|
235
|
+
except Exception as exc: # pragma: no cover — defensive
|
|
236
|
+
logger.debug("shadow_router on_recall_settled error: %s", exc)
|
|
237
|
+
|
|
238
|
+
# ------------------------------------------------------------------
|
|
239
|
+
# Internal: promotion + rollback triggers
|
|
240
|
+
# ------------------------------------------------------------------
|
|
241
|
+
|
|
242
|
+
def _fire_promotion(self) -> None:
|
|
243
|
+
"""Call the canonical ``_promote_candidate`` helper and reset
|
|
244
|
+
the shadow state so a new A/B cycle can start next retrain."""
|
|
245
|
+
if self._candidate_id is None:
|
|
246
|
+
return
|
|
247
|
+
try:
|
|
248
|
+
from superlocalmemory.learning.consolidation_worker import (
|
|
249
|
+
_promote_candidate,
|
|
250
|
+
)
|
|
251
|
+
_promote_candidate(
|
|
252
|
+
self._learning_db,
|
|
253
|
+
profile_id=self._profile_id,
|
|
254
|
+
candidate_id=int(self._candidate_id),
|
|
255
|
+
)
|
|
256
|
+
except Exception as exc: # pragma: no cover — defensive
|
|
257
|
+
logger.warning("shadow_router promotion failed: %s", exc)
|
|
258
|
+
finally:
|
|
259
|
+
self._shadow = None
|
|
260
|
+
self._candidate_id = None
|
|
261
|
+
|
|
262
|
+
def _fire_rollback(self) -> None:
|
|
263
|
+
"""Call ``ModelRollback.execute_rollback`` and clear the watcher."""
|
|
264
|
+
try:
|
|
265
|
+
if self._rollback is not None:
|
|
266
|
+
self._rollback.execute_rollback(
|
|
267
|
+
reason="watch_window_regression",
|
|
268
|
+
)
|
|
269
|
+
except Exception as exc: # pragma: no cover — defensive
|
|
270
|
+
logger.warning("shadow_router rollback failed: %s", exc)
|
|
271
|
+
finally:
|
|
272
|
+
self._rollback = None
|
|
273
|
+
|
|
274
|
+
|
|
275
|
+
# ---------------------------------------------------------------------------
|
|
276
|
+
# Process-local singleton cache — one ShadowRouter per (learning_db, profile).
|
|
277
|
+
# Thread-safe; keyed by a stable tuple so repeated factory calls reuse state.
|
|
278
|
+
# ---------------------------------------------------------------------------
|
|
279
|
+
|
|
280
|
+
|
|
281
|
+
_CACHE: dict[tuple[str, str, str], ShadowRouter] = {}
|
|
282
|
+
_CACHE_LOCK = threading.Lock()
|
|
283
|
+
|
|
284
|
+
|
|
285
|
+
def get_shadow_router(
|
|
286
|
+
*,
|
|
287
|
+
memory_db: str,
|
|
288
|
+
learning_db: str,
|
|
289
|
+
profile_id: str,
|
|
290
|
+
) -> ShadowRouter:
|
|
291
|
+
"""Factory — returns the process-local ``ShadowRouter`` for
|
|
292
|
+
``(memory_db, learning_db, profile_id)``, creating one on first call.
|
|
293
|
+
"""
|
|
294
|
+
key = (str(memory_db), str(learning_db), str(profile_id))
|
|
295
|
+
with _CACHE_LOCK:
|
|
296
|
+
router = _CACHE.get(key)
|
|
297
|
+
if router is None:
|
|
298
|
+
router = ShadowRouter(
|
|
299
|
+
memory_db=memory_db,
|
|
300
|
+
learning_db=learning_db,
|
|
301
|
+
profile_id=profile_id,
|
|
302
|
+
)
|
|
303
|
+
_CACHE[key] = router
|
|
304
|
+
return router
|
|
305
|
+
|
|
306
|
+
|
|
307
|
+
def reset_for_testing() -> None:
|
|
308
|
+
"""Clear the singleton cache — tests only."""
|
|
309
|
+
with _CACHE_LOCK:
|
|
310
|
+
_CACHE.clear()
|
|
311
|
+
|
|
312
|
+
|
|
313
|
+
__all__ = (
|
|
314
|
+
"ARM_BASELINE",
|
|
315
|
+
"ARM_CANDIDATE",
|
|
316
|
+
"ShadowRouter",
|
|
317
|
+
"get_shadow_router",
|
|
318
|
+
"reset_for_testing",
|
|
319
|
+
)
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
# Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
|
|
2
|
+
# Licensed under AGPL-3.0-or-later - see LICENSE file
|
|
3
|
+
# Part of SuperLocalMemory v3.4.21 — Stage 8 SB-5
|
|
4
|
+
|
|
5
|
+
"""Global kill-switch for SLM.
|
|
6
|
+
|
|
7
|
+
Two mechanisms, either disables the entire system cheaply:
|
|
8
|
+
|
|
9
|
+
1. File-marker: ``~/.superlocalmemory/.disabled`` — persistent across
|
|
10
|
+
reboots, survives daemon restarts, written by ``slm disable``.
|
|
11
|
+
2. Environment variable: ``SLM_DISABLE=1`` — per-process, useful for
|
|
12
|
+
CI, sandboxes, or "just for this shell" overrides.
|
|
13
|
+
|
|
14
|
+
Every hot-path entry point (hooks, MCP tools, recall pipeline, daemon
|
|
15
|
+
lifespan) calls :func:`is_disabled` first. Returns ``True`` ⇒ exit
|
|
16
|
+
quietly, no side effects.
|
|
17
|
+
|
|
18
|
+
Backward-compat: unset env + missing marker ⇒ ``False`` ⇒ normal
|
|
19
|
+
behaviour. Zero impact on the 18k live users who never touch it.
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
from __future__ import annotations
|
|
23
|
+
|
|
24
|
+
import os
|
|
25
|
+
from pathlib import Path
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
_MARKER_NAME = ".disabled"
|
|
29
|
+
_ENV_NAME = "SLM_DISABLE"
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _slm_home() -> Path:
|
|
33
|
+
"""Return the SLM state directory. Override via ``SLM_HOME`` env."""
|
|
34
|
+
override = os.environ.get("SLM_HOME")
|
|
35
|
+
if override:
|
|
36
|
+
return Path(override)
|
|
37
|
+
return Path.home() / ".superlocalmemory"
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def marker_path() -> Path:
|
|
41
|
+
"""Where the persistent ``.disabled`` marker lives."""
|
|
42
|
+
return _slm_home() / _MARKER_NAME
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def is_disabled() -> bool:
|
|
46
|
+
"""Return True iff SLM should no-op everything.
|
|
47
|
+
|
|
48
|
+
Precedence: env var first (cheapest check), then file marker. Any
|
|
49
|
+
non-empty, non-"0", non-"false" value in the env counts as disabled.
|
|
50
|
+
"""
|
|
51
|
+
env = os.environ.get(_ENV_NAME, "").strip().lower()
|
|
52
|
+
if env and env not in ("0", "false", "no", "off"):
|
|
53
|
+
return True
|
|
54
|
+
try:
|
|
55
|
+
return marker_path().exists()
|
|
56
|
+
except OSError: # pragma: no cover — defensive against FS errors
|
|
57
|
+
return False
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def write_marker(reason: str = "") -> Path:
|
|
61
|
+
"""Create the disabled marker. Returns the path."""
|
|
62
|
+
home = _slm_home()
|
|
63
|
+
home.mkdir(parents=True, exist_ok=True)
|
|
64
|
+
path = home / _MARKER_NAME
|
|
65
|
+
payload = "disabled"
|
|
66
|
+
if reason:
|
|
67
|
+
payload = f"disabled: {reason}\n"
|
|
68
|
+
path.write_text(payload, encoding="utf-8")
|
|
69
|
+
return path
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def remove_marker() -> bool:
|
|
73
|
+
"""Remove the disabled marker. Returns True if removed, False if absent."""
|
|
74
|
+
path = marker_path()
|
|
75
|
+
try:
|
|
76
|
+
path.unlink()
|
|
77
|
+
return True
|
|
78
|
+
except FileNotFoundError:
|
|
79
|
+
return False
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
__all__ = (
|
|
83
|
+
"is_disabled",
|
|
84
|
+
"write_marker",
|
|
85
|
+
"remove_marker",
|
|
86
|
+
"marker_path",
|
|
87
|
+
)
|
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
# Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
|
|
2
|
+
# Licensed under AGPL-3.0-or-later - see LICENSE file
|
|
3
|
+
# Part of SuperLocalMemory v3.4.21 — Stage 8 SB-5
|
|
4
|
+
|
|
5
|
+
"""Path-level opt-out via ``.slmignore``.
|
|
6
|
+
|
|
7
|
+
A repository-scoped escape hatch: drop a ``.slmignore`` at any
|
|
8
|
+
ancestor of a workspace and SLM will skip hooks / recall / remember
|
|
9
|
+
for any path inside that ancestor. Roughly the shape of ``.gitignore``
|
|
10
|
+
but with simpler matching — one path pattern per line, ``#`` comments,
|
|
11
|
+
whitespace stripped.
|
|
12
|
+
|
|
13
|
+
Matching rules (intentionally minimal):
|
|
14
|
+
|
|
15
|
+
- Lines starting with ``#`` or empty ⇒ ignored.
|
|
16
|
+
- A line like ``node_modules`` matches any path segment named
|
|
17
|
+
``node_modules`` anywhere in the resolved absolute path.
|
|
18
|
+
- A line starting with ``/`` is an absolute-prefix match
|
|
19
|
+
(``/Users/me/secret`` matches everything under that dir).
|
|
20
|
+
- Glob chars ``*`` / ``?`` are treated literally — keep it boring,
|
|
21
|
+
avoid re-implementing ``.gitignore``'s subtleties.
|
|
22
|
+
|
|
23
|
+
Cache: the parsed ignore list is memoised per ignore-file path + mtime.
|
|
24
|
+
Look-up cost at the hook hot path is O(depth × patterns) and
|
|
25
|
+
patterns ≤ 50 in practice.
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
from __future__ import annotations
|
|
29
|
+
|
|
30
|
+
import os
|
|
31
|
+
from pathlib import Path
|
|
32
|
+
|
|
33
|
+
_FILENAME = ".slmignore"
|
|
34
|
+
_CACHE: dict[tuple[str, float], tuple[str, ...]] = {}
|
|
35
|
+
_CACHE_CAP = 64
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def _load_patterns(ignore_path: Path) -> tuple[str, ...]:
|
|
39
|
+
"""Parse one ``.slmignore`` file, memoised by path + mtime."""
|
|
40
|
+
try:
|
|
41
|
+
stat = ignore_path.stat()
|
|
42
|
+
except OSError:
|
|
43
|
+
return ()
|
|
44
|
+
key = (str(ignore_path), stat.st_mtime)
|
|
45
|
+
cached = _CACHE.get(key)
|
|
46
|
+
if cached is not None:
|
|
47
|
+
return cached
|
|
48
|
+
try:
|
|
49
|
+
raw = ignore_path.read_text(encoding="utf-8", errors="ignore")
|
|
50
|
+
except OSError:
|
|
51
|
+
return ()
|
|
52
|
+
patterns: list[str] = []
|
|
53
|
+
for line in raw.splitlines():
|
|
54
|
+
stripped = line.strip()
|
|
55
|
+
if not stripped or stripped.startswith("#"):
|
|
56
|
+
continue
|
|
57
|
+
patterns.append(stripped)
|
|
58
|
+
result = tuple(patterns)
|
|
59
|
+
if len(_CACHE) >= _CACHE_CAP:
|
|
60
|
+
_CACHE.clear()
|
|
61
|
+
_CACHE[key] = result
|
|
62
|
+
return result
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def _iter_ancestor_ignores(target: Path) -> list[tuple[Path, tuple[str, ...]]]:
|
|
66
|
+
"""Walk from ``target`` up to the filesystem root collecting ignore files."""
|
|
67
|
+
hits: list[tuple[Path, tuple[str, ...]]] = []
|
|
68
|
+
seen_dirs: set[Path] = set()
|
|
69
|
+
probe = target if target.is_dir() else target.parent
|
|
70
|
+
while probe not in seen_dirs:
|
|
71
|
+
seen_dirs.add(probe)
|
|
72
|
+
candidate = probe / _FILENAME
|
|
73
|
+
if candidate.is_file():
|
|
74
|
+
patterns = _load_patterns(candidate)
|
|
75
|
+
if patterns:
|
|
76
|
+
hits.append((candidate.parent, patterns))
|
|
77
|
+
if probe.parent == probe:
|
|
78
|
+
break
|
|
79
|
+
probe = probe.parent
|
|
80
|
+
return hits
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def path_is_ignored(target: str | Path) -> bool:
|
|
84
|
+
"""Return True iff any ancestor ``.slmignore`` ignores the given path.
|
|
85
|
+
|
|
86
|
+
Absolute paths are resolved (symlinks preserved — we match on name,
|
|
87
|
+
not realpath, so a symlink into an ignored dir still matches).
|
|
88
|
+
Non-existent targets are allowed; we walk the theoretical ancestry.
|
|
89
|
+
"""
|
|
90
|
+
p = Path(target)
|
|
91
|
+
try:
|
|
92
|
+
abs_path = p.resolve(strict=False)
|
|
93
|
+
except OSError:
|
|
94
|
+
abs_path = p
|
|
95
|
+
segments = set(abs_path.parts)
|
|
96
|
+
for ignore_dir, patterns in _iter_ancestor_ignores(abs_path):
|
|
97
|
+
for pat in patterns:
|
|
98
|
+
if pat.startswith("/"):
|
|
99
|
+
# Absolute prefix: the ignore file's directory provides the
|
|
100
|
+
# anchor for relative-looking absolute patterns.
|
|
101
|
+
candidate = Path(pat)
|
|
102
|
+
try:
|
|
103
|
+
abs_path.relative_to(candidate)
|
|
104
|
+
return True
|
|
105
|
+
except ValueError:
|
|
106
|
+
continue
|
|
107
|
+
else:
|
|
108
|
+
# Match any path segment.
|
|
109
|
+
if pat in segments:
|
|
110
|
+
return True
|
|
111
|
+
# Also honour a per-ignore-dir relative path.
|
|
112
|
+
rel = abs_path.relative_to(ignore_dir) if (
|
|
113
|
+
ignore_dir in abs_path.parents or ignore_dir == abs_path
|
|
114
|
+
) else None
|
|
115
|
+
if rel is not None and pat in set(rel.parts):
|
|
116
|
+
return True
|
|
117
|
+
return False
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def clear_cache() -> None:
|
|
121
|
+
"""Test-only helper: drop the memoised pattern cache."""
|
|
122
|
+
_CACHE.clear()
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
__all__ = ("path_is_ignored", "clear_cache")
|
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
# Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
|
|
2
|
+
# Licensed under AGPL-3.0-or-later - see LICENSE file
|
|
3
|
+
# Part of SuperLocalMemory v3.4.21 — LLD-01 §4.2
|
|
4
|
+
|
|
5
|
+
"""Topic signature — deterministic, Unicode-safe, ReDoS-resistant 16-char hex.
|
|
6
|
+
|
|
7
|
+
LLD reference: `.backup/active-brain/lld/LLD-01-context-cache-and-hot-path-hooks.md`
|
|
8
|
+
Section 4.2.
|
|
9
|
+
|
|
10
|
+
Hot-path contract:
|
|
11
|
+
- stdlib-only imports (no third-party packages).
|
|
12
|
+
- Same (NFC-normalized, lowercased) input → same output across Python
|
|
13
|
+
versions, OSes, and locales. Enforced via CI matrix.
|
|
14
|
+
- Patterns are O(n) — no catastrophic backtracking regardless of input.
|
|
15
|
+
- Input is truncated to ``MAX_SIG_INPUT_CHARS`` BEFORE any regex to
|
|
16
|
+
guarantee a hard upper bound on compute time.
|
|
17
|
+
- Budget: <5 ms p95 at 2000 chars, <8 ms at 8000 (see tests).
|
|
18
|
+
|
|
19
|
+
No ``@lru_cache`` anywhere — per LLD-01 SEC-01-01 / PERF-01-04, caching on
|
|
20
|
+
raw prompts would leak secrets in memory and is useless across fresh hook
|
|
21
|
+
processes anyway.
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
from __future__ import annotations
|
|
25
|
+
|
|
26
|
+
import hashlib
|
|
27
|
+
import re
|
|
28
|
+
import unicodedata
|
|
29
|
+
|
|
30
|
+
# --------------------------------------------------------------------------
|
|
31
|
+
# Non-backtracking patterns. Each one is linear in input length.
|
|
32
|
+
# --------------------------------------------------------------------------
|
|
33
|
+
|
|
34
|
+
# CamelCase / PascalCase identifiers (>= 2 upper-case humps).
|
|
35
|
+
_CAMEL_PASCAL = re.compile(r"\b[A-Z][a-zA-Z0-9]+[A-Z][a-zA-Z0-9]*\b")
|
|
36
|
+
# URLs — capture up to whitespace / angle brackets. Linear in input.
|
|
37
|
+
_URL = re.compile(r"https?://[^\s<>]+")
|
|
38
|
+
# Paths — absolute POSIX paths. Linear; each segment is a simple char class.
|
|
39
|
+
_PATH = re.compile(r"/[^\s/<>]+(?:/[^\s/<>]+)*(?:\.[A-Za-z0-9]+)?")
|
|
40
|
+
# Quoted strings (capped at 200 chars to preserve linear time bound).
|
|
41
|
+
_QUOTED_DOUBLE = re.compile(r'"([^"]{1,200})"')
|
|
42
|
+
_QUOTED_SINGLE = re.compile(r"'([^']{1,200})'")
|
|
43
|
+
# Word tokens for content-word extraction. Length >= 3 below filters shorter.
|
|
44
|
+
_WORD = re.compile(r"[A-Za-z0-9][A-Za-z0-9'\-]{2,}")
|
|
45
|
+
|
|
46
|
+
# ~120 common English stopwords. Kept inline so the module is stdlib-only.
|
|
47
|
+
_STOPWORDS: frozenset[str] = frozenset({
|
|
48
|
+
"a", "about", "above", "after", "again", "against", "all", "am", "an",
|
|
49
|
+
"and", "any", "are", "as", "at", "be", "because", "been", "before",
|
|
50
|
+
"being", "below", "between", "both", "but", "by", "can", "cannot",
|
|
51
|
+
"could", "did", "do", "does", "doing", "don", "down", "during", "each",
|
|
52
|
+
"few", "for", "from", "further", "had", "has", "have", "having", "he",
|
|
53
|
+
"her", "here", "hers", "herself", "him", "himself", "his", "how", "i",
|
|
54
|
+
"if", "in", "into", "is", "it", "its", "itself", "just", "let", "me",
|
|
55
|
+
"more", "most", "my", "myself", "no", "nor", "not", "now", "of", "off",
|
|
56
|
+
"on", "once", "only", "or", "other", "ought", "our", "ours", "ourselves",
|
|
57
|
+
"out", "over", "own", "same", "she", "should", "so", "some", "such",
|
|
58
|
+
"than", "that", "the", "their", "theirs", "them", "themselves", "then",
|
|
59
|
+
"there", "these", "they", "this", "those", "through", "to", "too",
|
|
60
|
+
"under", "until", "up", "use", "using", "very", "was", "we", "were",
|
|
61
|
+
"what", "when", "where", "which", "while", "who", "whom", "why", "will",
|
|
62
|
+
"with", "would", "you", "your", "yours", "yourself", "yourselves",
|
|
63
|
+
})
|
|
64
|
+
|
|
65
|
+
MAX_SIG_INPUT_CHARS: int = 4000
|
|
66
|
+
_SIG_LEN: int = 16
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def _canon(items: list[str]) -> str:
|
|
70
|
+
"""Canonicalize a list of tokens: sort, dedupe, join with a sentinel."""
|
|
71
|
+
return "\0".join(sorted(set(items)))
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def compute_topic_signature(
|
|
75
|
+
text: str,
|
|
76
|
+
*,
|
|
77
|
+
entity_hits: list[str] | tuple[str, ...] | None = None,
|
|
78
|
+
) -> str:
|
|
79
|
+
"""Compute a deterministic 16-char hex signature of ``text``.
|
|
80
|
+
|
|
81
|
+
Returns ``"0" * 16`` for empty input. Always returns exactly 16
|
|
82
|
+
lowercase hex characters.
|
|
83
|
+
|
|
84
|
+
Algorithm:
|
|
85
|
+
1. Truncate input to ``MAX_SIG_INPUT_CHARS`` (ReDoS safety).
|
|
86
|
+
2. NFC-normalize so composed/decomposed Unicode hash identically.
|
|
87
|
+
3. Extract structural tokens (identifiers, URLs, paths, quoted).
|
|
88
|
+
4. Extract word tokens (lowercased); filter stopwords + len<3.
|
|
89
|
+
5. Build bigrams over content words to resist stopword-only collisions.
|
|
90
|
+
6. Sort-dedupe each group; join; SHA-256; take first 16 hex chars.
|
|
91
|
+
|
|
92
|
+
``entity_hits`` (LLD-13 Track C.1) — optional list of entity IDs
|
|
93
|
+
produced by the inline trigram lookup. Backward-compatible default:
|
|
94
|
+
when omitted or empty, the output is BYTE-IDENTICAL to the v3.4.21
|
|
95
|
+
pre-Living-Brain signature. When non-empty, the sorted-deduped IDs
|
|
96
|
+
are mixed into the canonical material as a seventh group so that
|
|
97
|
+
cache probes differentiate semantically-distinct prompts that
|
|
98
|
+
happen to share regex-level tokens.
|
|
99
|
+
"""
|
|
100
|
+
if not text:
|
|
101
|
+
return "0" * _SIG_LEN
|
|
102
|
+
|
|
103
|
+
# 1. Hard truncation FIRST — bounds regex compute time.
|
|
104
|
+
if len(text) > MAX_SIG_INPUT_CHARS:
|
|
105
|
+
text = text[:MAX_SIG_INPUT_CHARS]
|
|
106
|
+
|
|
107
|
+
# 2. NFC normalize. Different input encodings of the same glyph now
|
|
108
|
+
# have identical codepoints before we extract or lowercase.
|
|
109
|
+
text_nfc = unicodedata.normalize("NFC", text)
|
|
110
|
+
lowered = text_nfc.lower()
|
|
111
|
+
|
|
112
|
+
# 3. Structural tokens (case-preserving — camelCase carries meaning).
|
|
113
|
+
identifiers = _CAMEL_PASCAL.findall(text_nfc)
|
|
114
|
+
urls = _URL.findall(text_nfc)
|
|
115
|
+
paths = _PATH.findall(text_nfc)
|
|
116
|
+
quoted = _QUOTED_DOUBLE.findall(text_nfc) + _QUOTED_SINGLE.findall(text_nfc)
|
|
117
|
+
|
|
118
|
+
# 4. Content words (lowered, stop-filtered, len >= 3).
|
|
119
|
+
words = _WORD.findall(lowered)
|
|
120
|
+
content_words = [w for w in words if w not in _STOPWORDS and len(w) >= 3]
|
|
121
|
+
|
|
122
|
+
# 5. Bigrams from the ORIGINAL token stream order. Preserves "foo bar"
|
|
123
|
+
# vs "bar foo" distinction and resists stopword-only differentiation.
|
|
124
|
+
bigrams = [f"{a}_{b}" for a, b in zip(content_words, content_words[1:])]
|
|
125
|
+
|
|
126
|
+
# 6. Materialize canonical form and hash.
|
|
127
|
+
groups = [
|
|
128
|
+
_canon(identifiers),
|
|
129
|
+
_canon(urls),
|
|
130
|
+
_canon(paths),
|
|
131
|
+
_canon(quoted),
|
|
132
|
+
_canon(content_words),
|
|
133
|
+
_canon(bigrams),
|
|
134
|
+
]
|
|
135
|
+
# LLD-13: append entity-hits group ONLY when non-empty. Empty/missing
|
|
136
|
+
# preserves the byte-identical v3.4.21 pre-Living-Brain signature.
|
|
137
|
+
if entity_hits:
|
|
138
|
+
groups.append(_canon([str(e) for e in entity_hits]))
|
|
139
|
+
material = "\0\0".join(groups)
|
|
140
|
+
return hashlib.sha256(material.encode("utf-8")).hexdigest()[:_SIG_LEN]
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
__all__ = ("compute_topic_signature", "MAX_SIG_INPUT_CHARS")
|