superlocalmemory 3.4.19 → 3.4.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (177) hide show
  1. package/CHANGELOG.md +24 -0
  2. package/README.md +42 -34
  3. package/bin/slm +11 -0
  4. package/bin/slm.bat +12 -0
  5. package/package.json +4 -3
  6. package/pyproject.toml +4 -3
  7. package/scripts/build-slm-hook.ps1 +40 -0
  8. package/scripts/build-slm-hook.sh +45 -0
  9. package/scripts/build_entry.py +452 -0
  10. package/scripts/ci/stage5b_gate.sh +50 -0
  11. package/scripts/postinstall/validation.js +187 -0
  12. package/scripts/postinstall-interactive.js +756 -0
  13. package/scripts/postinstall_binary.js +287 -0
  14. package/scripts/release_manifest.py +273 -0
  15. package/scripts/slm-hook.spec +56 -0
  16. package/skills/slm-build-graph/SKILL.md +423 -0
  17. package/skills/slm-list-recent/SKILL.md +348 -0
  18. package/skills/slm-recall/SKILL.md +343 -0
  19. package/skills/slm-remember/SKILL.md +194 -0
  20. package/skills/slm-show-patterns/SKILL.md +224 -0
  21. package/skills/slm-status/SKILL.md +363 -0
  22. package/skills/slm-switch-profile/SKILL.md +442 -0
  23. package/src/superlocalmemory/cli/commands.py +254 -79
  24. package/src/superlocalmemory/cli/context_commands.py +192 -0
  25. package/src/superlocalmemory/cli/daemon.py +15 -1
  26. package/src/superlocalmemory/cli/db_migrate.py +80 -0
  27. package/src/superlocalmemory/cli/escape_hatch.py +220 -0
  28. package/src/superlocalmemory/cli/main.py +72 -1
  29. package/src/superlocalmemory/core/context_cache.py +397 -0
  30. package/src/superlocalmemory/core/engine.py +38 -2
  31. package/src/superlocalmemory/core/engine_wiring.py +1 -1
  32. package/src/superlocalmemory/core/ram_lock.py +111 -0
  33. package/src/superlocalmemory/core/recall_pipeline.py +433 -3
  34. package/src/superlocalmemory/core/recall_worker.py +8 -3
  35. package/src/superlocalmemory/core/security_primitives.py +635 -0
  36. package/src/superlocalmemory/core/shadow_router.py +319 -0
  37. package/src/superlocalmemory/core/slm_disabled.py +87 -0
  38. package/src/superlocalmemory/core/slmignore.py +125 -0
  39. package/src/superlocalmemory/core/topic_signature.py +143 -0
  40. package/src/superlocalmemory/core/worker_pool.py +14 -3
  41. package/src/superlocalmemory/encoding/cognitive_consolidator.py +2 -2
  42. package/src/superlocalmemory/evolution/budget.py +321 -0
  43. package/src/superlocalmemory/evolution/llm_dispatch.py +508 -0
  44. package/src/superlocalmemory/evolution/skill_evolver.py +144 -94
  45. package/src/superlocalmemory/hooks/_outcome_common.py +506 -0
  46. package/src/superlocalmemory/hooks/adapter_base.py +317 -0
  47. package/src/superlocalmemory/hooks/antigravity_adapter.py +192 -0
  48. package/src/superlocalmemory/hooks/claude_code_hooks.py +33 -1
  49. package/src/superlocalmemory/hooks/context_payload.py +312 -0
  50. package/src/superlocalmemory/hooks/copilot_adapter.py +154 -0
  51. package/src/superlocalmemory/hooks/cross_platform_connector.py +90 -0
  52. package/src/superlocalmemory/hooks/cursor_adapter.py +195 -0
  53. package/src/superlocalmemory/hooks/hook_handlers.py +109 -8
  54. package/src/superlocalmemory/hooks/ide_connector.py +25 -2
  55. package/src/superlocalmemory/hooks/post_tool_async_hook.py +165 -0
  56. package/src/superlocalmemory/hooks/post_tool_outcome_hook.py +223 -0
  57. package/src/superlocalmemory/hooks/prewarm_auth.py +170 -0
  58. package/src/superlocalmemory/hooks/session_registry.py +186 -0
  59. package/src/superlocalmemory/hooks/stop_outcome_hook.py +134 -0
  60. package/src/superlocalmemory/hooks/sync_loop.py +114 -0
  61. package/src/superlocalmemory/hooks/user_prompt_hook.py +128 -0
  62. package/src/superlocalmemory/hooks/user_prompt_rehash_hook.py +202 -0
  63. package/src/superlocalmemory/infra/backup.py +3 -3
  64. package/src/superlocalmemory/infra/cloud_backup.py +2 -2
  65. package/src/superlocalmemory/infra/event_bus.py +2 -2
  66. package/src/superlocalmemory/infra/webhook_dispatcher.py +3 -3
  67. package/src/superlocalmemory/learning/arm_catalog.py +99 -0
  68. package/src/superlocalmemory/learning/bandit.py +526 -0
  69. package/src/superlocalmemory/learning/bandit_cache.py +133 -0
  70. package/src/superlocalmemory/learning/behavioral.py +53 -1
  71. package/src/superlocalmemory/learning/consolidation_cycle.py +381 -0
  72. package/src/superlocalmemory/learning/consolidation_worker.py +188 -520
  73. package/src/superlocalmemory/learning/database.py +256 -0
  74. package/src/superlocalmemory/learning/dedup_hnsw.py +413 -0
  75. package/src/superlocalmemory/learning/ensemble.py +300 -0
  76. package/src/superlocalmemory/learning/fact_outcome_joins.py +207 -0
  77. package/src/superlocalmemory/learning/forgetting_scheduler.py +55 -0
  78. package/src/superlocalmemory/learning/hnsw_dedup.py +69 -0
  79. package/src/superlocalmemory/learning/labeler.py +87 -0
  80. package/src/superlocalmemory/learning/legacy_migration.py +277 -0
  81. package/src/superlocalmemory/learning/memory_merge.py +160 -0
  82. package/src/superlocalmemory/learning/model_cache.py +269 -0
  83. package/src/superlocalmemory/learning/model_rollback.py +278 -0
  84. package/src/superlocalmemory/learning/outcome_queue.py +284 -0
  85. package/src/superlocalmemory/learning/pattern_miner.py +415 -0
  86. package/src/superlocalmemory/learning/pattern_miner_constants.py +47 -0
  87. package/src/superlocalmemory/learning/ranker.py +225 -81
  88. package/src/superlocalmemory/learning/ranker_common.py +163 -0
  89. package/src/superlocalmemory/learning/ranker_retrain_legacy.py +202 -0
  90. package/src/superlocalmemory/learning/ranker_retrain_online.py +411 -0
  91. package/src/superlocalmemory/learning/reward.py +777 -0
  92. package/src/superlocalmemory/learning/reward_archive.py +210 -0
  93. package/src/superlocalmemory/learning/reward_boost.py +201 -0
  94. package/src/superlocalmemory/learning/reward_proxy.py +326 -0
  95. package/src/superlocalmemory/learning/shadow_test.py +524 -0
  96. package/src/superlocalmemory/learning/signal_worker.py +270 -0
  97. package/src/superlocalmemory/learning/signals.py +314 -0
  98. package/src/superlocalmemory/learning/trigram_index.py +547 -0
  99. package/src/superlocalmemory/mcp/server.py +5 -5
  100. package/src/superlocalmemory/mcp/tools_context.py +183 -0
  101. package/src/superlocalmemory/mcp/tools_core.py +92 -27
  102. package/src/superlocalmemory/parameterization/soft_prompt_generator.py +13 -0
  103. package/src/superlocalmemory/retrieval/engine.py +52 -0
  104. package/src/superlocalmemory/server/api.py +2 -2
  105. package/src/superlocalmemory/server/bandit_loops.py +140 -0
  106. package/src/superlocalmemory/server/middleware/__init__.py +11 -0
  107. package/src/superlocalmemory/server/middleware/security_headers.py +144 -0
  108. package/src/superlocalmemory/server/routes/backup.py +36 -13
  109. package/src/superlocalmemory/server/routes/behavioral.py +50 -19
  110. package/src/superlocalmemory/server/routes/brain.py +1234 -0
  111. package/src/superlocalmemory/server/routes/data_io.py +4 -4
  112. package/src/superlocalmemory/server/routes/events.py +2 -2
  113. package/src/superlocalmemory/server/routes/helpers.py +1 -1
  114. package/src/superlocalmemory/server/routes/learning.py +192 -7
  115. package/src/superlocalmemory/server/routes/memories.py +189 -1
  116. package/src/superlocalmemory/server/routes/prewarm.py +171 -0
  117. package/src/superlocalmemory/server/routes/profiles.py +3 -3
  118. package/src/superlocalmemory/server/routes/token.py +88 -0
  119. package/src/superlocalmemory/server/routes/ws.py +5 -5
  120. package/src/superlocalmemory/server/security_middleware.py +13 -7
  121. package/src/superlocalmemory/server/ui.py +2 -2
  122. package/src/superlocalmemory/server/unified_daemon.py +335 -3
  123. package/src/superlocalmemory/skills/slm-build-graph/SKILL.md +423 -0
  124. package/src/superlocalmemory/skills/slm-list-recent/SKILL.md +348 -0
  125. package/src/superlocalmemory/skills/slm-recall/SKILL.md +343 -0
  126. package/src/superlocalmemory/skills/slm-remember/SKILL.md +194 -0
  127. package/src/superlocalmemory/skills/slm-show-patterns/SKILL.md +224 -0
  128. package/src/superlocalmemory/skills/slm-status/SKILL.md +363 -0
  129. package/src/superlocalmemory/skills/slm-switch-profile/SKILL.md +442 -0
  130. package/src/superlocalmemory/storage/migration_runner.py +545 -0
  131. package/src/superlocalmemory/storage/migrations/M001_add_signal_features_columns.py +67 -0
  132. package/src/superlocalmemory/storage/migrations/M002_model_state_history.py +132 -0
  133. package/src/superlocalmemory/storage/migrations/M003_migration_log.py +38 -0
  134. package/src/superlocalmemory/storage/migrations/M004_cross_platform_sync_log.py +46 -0
  135. package/src/superlocalmemory/storage/migrations/M005_bandit_tables.py +75 -0
  136. package/src/superlocalmemory/storage/migrations/M006_action_outcomes_reward.py +75 -0
  137. package/src/superlocalmemory/storage/migrations/M007_pending_outcomes.py +63 -0
  138. package/src/superlocalmemory/storage/migrations/M009_model_lineage.py +54 -0
  139. package/src/superlocalmemory/storage/migrations/M010_evolution_config.py +75 -0
  140. package/src/superlocalmemory/storage/migrations/M011_archive_and_merge.py +87 -0
  141. package/src/superlocalmemory/storage/migrations/M012_shadow_observations.py +72 -0
  142. package/src/superlocalmemory/storage/migrations/M013_bi_temporal_columns.py +55 -0
  143. package/src/superlocalmemory/storage/migrations/__init__.py +81 -0
  144. package/src/superlocalmemory/storage/models.py +4 -0
  145. package/src/superlocalmemory/ui/css/brain.css +409 -0
  146. package/src/superlocalmemory/ui/css/legacy-dashboard.css +645 -0
  147. package/src/superlocalmemory/ui/index.html +459 -1345
  148. package/src/superlocalmemory/ui/js/brain.js +1321 -0
  149. package/src/superlocalmemory/ui/js/clusters.js +123 -4
  150. package/src/superlocalmemory/ui/js/init.js +48 -39
  151. package/src/superlocalmemory/ui/js/memories.js +88 -2
  152. package/src/superlocalmemory/ui/js/modal.js +71 -1
  153. package/src/superlocalmemory/ui/js/ng-shell.js +101 -88
  154. package/src/superlocalmemory/ui/js/trust-dashboard.js +168 -25
  155. package/src/superlocalmemory/ui/vendor/bootstrap-icons/bootstrap-icons.css +2018 -0
  156. package/src/superlocalmemory/ui/vendor/bootstrap-icons/fonts/bootstrap-icons.woff +0 -0
  157. package/src/superlocalmemory/ui/vendor/bootstrap-icons/fonts/bootstrap-icons.woff2 +0 -0
  158. package/src/superlocalmemory/ui/vendor/bootstrap.bundle.min.js +7 -0
  159. package/src/superlocalmemory/ui/vendor/bootstrap.min.css +6 -0
  160. package/src/superlocalmemory/ui/vendor/d3.v7.min.js +2 -0
  161. package/src/superlocalmemory/ui/vendor/graphology-library.min.js +2 -0
  162. package/src/superlocalmemory/ui/vendor/graphology.umd.min.js +2 -0
  163. package/src/superlocalmemory/ui/vendor/inter-ui/inter-variable.min.css +8 -0
  164. package/src/superlocalmemory/ui/vendor/inter-ui/variable/InterVariable-Italic.woff2 +0 -0
  165. package/src/superlocalmemory/ui/vendor/inter-ui/variable/InterVariable.woff2 +0 -0
  166. package/src/superlocalmemory/ui/vendor/sigma.min.js +1 -0
  167. package/src/superlocalmemory/ui/js/behavioral.js +0 -447
  168. package/src/superlocalmemory/ui/js/graph-core.js +0 -447
  169. package/src/superlocalmemory/ui/js/graph-interactions.js +0 -351
  170. package/src/superlocalmemory/ui/js/learning.js +0 -435
  171. package/src/superlocalmemory/ui/js/patterns.js +0 -93
  172. package/src/superlocalmemory.egg-info/PKG-INFO +0 -647
  173. package/src/superlocalmemory.egg-info/SOURCES.txt +0 -335
  174. package/src/superlocalmemory.egg-info/dependency_links.txt +0 -1
  175. package/src/superlocalmemory.egg-info/entry_points.txt +0 -2
  176. package/src/superlocalmemory.egg-info/requires.txt +0 -58
  177. package/src/superlocalmemory.egg-info/top_level.txt +0 -1
@@ -0,0 +1,635 @@
1
+ # Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
2
+ # Licensed under AGPL-3.0-or-later - see LICENSE file
3
+ # Part of SuperLocalMemory v3.4.22 — LLD-07 §6
4
+
5
+ """Shared security primitives for SLM v3.4.22.
6
+
7
+ LLD reference: `.backup/active-brain/lld/LLD-07-schema-migrations-and-security-primitives.md`
8
+ Section: 6.1 through 6.10.
9
+
10
+ Every file write, subprocess spawn, and secret-bearing string across SLM
11
+ daemon, adapters, hooks, and binary installer routes through this module.
12
+ Single source of truth — the hard rules in LLD-07 §7 are enforced here.
13
+
14
+ All functions are defensive: they raise early, log nothing about the secret
15
+ content, and use constant-time comparisons where applicable.
16
+ """
17
+
18
+ from __future__ import annotations
19
+
20
+ import hashlib
21
+ import hmac
22
+ import math
23
+ import os
24
+ import re
25
+ import secrets as _secrets
26
+ import stat
27
+ import subprocess
28
+ import sys
29
+ from pathlib import Path
30
+ from typing import Iterable
31
+
32
+
33
+ # ---------------------------------------------------------------------------
34
+ # Exceptions
35
+ # ---------------------------------------------------------------------------
36
+
37
+
38
+ class PathTraversalError(ValueError):
39
+ """Raised by safe_resolve when a path escapes its allowed base."""
40
+
41
+
42
+ class IntegrityError(ValueError):
43
+ """Raised when a SHA-256 integrity check fails."""
44
+
45
+
46
+ # ---------------------------------------------------------------------------
47
+ # 6.1 Safe path resolver (SEC-01-05, SEC-05-01, SEC-06-03)
48
+ # ---------------------------------------------------------------------------
49
+
50
+
51
+ _DENY_PREFIXES_POSIX: tuple[str, ...] = (
52
+ "/etc",
53
+ "/usr",
54
+ "/var",
55
+ "/sys",
56
+ "/proc",
57
+ "/bin",
58
+ "/sbin",
59
+ "/System",
60
+ "/Library",
61
+ )
62
+ _DENY_PREFIXES_WINDOWS: tuple[str, ...] = (
63
+ r"C:\Windows",
64
+ r"C:\Program Files",
65
+ r"C:\ProgramData",
66
+ )
67
+
68
+
69
+ def _is_windows() -> bool:
70
+ return sys.platform == "win32"
71
+
72
+
73
+ def _hits_deny_prefix(resolved: Path) -> bool:
74
+ resolved_str = str(resolved)
75
+ if _is_windows(): # pragma: no cover — Windows-only branch
76
+ lower = resolved_str.lower()
77
+ return any(lower.startswith(p.lower()) for p in _DENY_PREFIXES_WINDOWS)
78
+ return any(resolved_str == p or resolved_str.startswith(p + os.sep)
79
+ for p in _DENY_PREFIXES_POSIX)
80
+
81
+
82
+ def safe_resolve(base: Path, rel: str | Path) -> Path:
83
+ """Resolve ``rel`` against ``base`` safely.
84
+
85
+ Rules:
86
+ - ``rel`` must be str or Path.
87
+ - ``..`` components are refused outright.
88
+ - Resolved absolute path must be a descendant of ``base.resolve()``.
89
+ - Resolved path must not land in a reserved system prefix.
90
+ - Any symlink in the chain is re-validated: its target must also live
91
+ under ``base``.
92
+
93
+ Returns the resolved absolute Path on success; raises PathTraversalError
94
+ otherwise.
95
+ """
96
+ if not isinstance(rel, (str, Path)):
97
+ raise TypeError(f"rel must be str | Path, got {type(rel).__name__}")
98
+
99
+ rel_path = Path(rel)
100
+ if ".." in rel_path.parts:
101
+ raise PathTraversalError(f"'..' components are forbidden: {rel!r}")
102
+
103
+ if rel_path.is_absolute():
104
+ candidate = rel_path
105
+ else:
106
+ candidate = base / rel_path
107
+
108
+ try:
109
+ resolved = candidate.resolve(strict=False)
110
+ except (OSError, RuntimeError) as exc: # pragma: no cover — defensive
111
+ raise PathTraversalError(f"cannot resolve {rel!r}: {exc}") from exc
112
+
113
+ if _hits_deny_prefix(resolved):
114
+ raise PathTraversalError(f"denied system prefix: {resolved}")
115
+
116
+ try:
117
+ base_resolved = base.resolve(strict=False)
118
+ except (OSError, RuntimeError) as exc: # pragma: no cover — defensive
119
+ raise PathTraversalError(f"cannot resolve base {base!r}: {exc}") from exc
120
+
121
+ try:
122
+ resolved.relative_to(base_resolved)
123
+ except ValueError as exc:
124
+ raise PathTraversalError(
125
+ f"{resolved} escapes base {base_resolved}"
126
+ ) from exc
127
+
128
+ # Symlink walk — defense in depth against TOCTOU on a symlink parent.
129
+ # The ``resolved.relative_to(base)`` check above catches the common case;
130
+ # this loop walks the pre-resolution chain so we refuse when any
131
+ # intermediate component is a symlink whose target escapes the base.
132
+ cur = candidate
133
+ while cur != cur.parent:
134
+ if cur.exists() and cur.is_symlink():
135
+ try:
136
+ target = cur.resolve(strict=False)
137
+ target.relative_to(base_resolved)
138
+ except (ValueError, OSError) as exc: # pragma: no cover — TOCTOU
139
+ raise PathTraversalError(
140
+ f"symlink {cur} points outside base"
141
+ ) from exc
142
+ cur = cur.parent
143
+
144
+ return resolved
145
+
146
+
147
+ # ---------------------------------------------------------------------------
148
+ # LLD-00 §4 — safe_resolve_identifier (SEC-C-02 fix)
149
+ # ---------------------------------------------------------------------------
150
+ #
151
+ # The pre-existing ``safe_resolve`` above handles hardcoded relative paths
152
+ # (e.g. `.cursor/rules/file.mdc`) against a trusted base. LLD-00 §4 adds a
153
+ # stricter contract for *untrusted identifiers* — a ``session_id`` or
154
+ # ``profile_id`` that may reach the filesystem via path join. This helper
155
+ # enforces the LLD-00 regex AND the base-containment check. Callers in
156
+ # LLD-09 (session state files) and LLD-11 (evolution.lock) MUST use this.
157
+ #
158
+ # Naming deviation from IMPLEMENTATION-MANIFEST P0.2: the manifest reused
159
+ # the name ``safe_resolve`` but the existing path-style helper is used in
160
+ # 9+ call sites. A separate name avoids breakage. See
161
+ # ``.backup/active-brain/MANIFEST-DEVIATION.md`` P0.2 entry.
162
+
163
+ _SAFE_ID_RE = re.compile(r"^[a-zA-Z0-9_-]{1,128}$")
164
+
165
+
166
+ def safe_resolve_identifier(base: Path, untrusted: str) -> Path:
167
+ """Return ``base / untrusted`` only if ``untrusted`` is a safe identifier
168
+ AND the resolved path stays within ``base``. Raises ``ValueError`` otherwise.
169
+
170
+ Rejects: '..', '/', '\\', null bytes, empty strings, strings longer than
171
+ 128 chars, and anything outside ``[a-zA-Z0-9_-]``.
172
+
173
+ Used for untrusted filesystem identifiers (``session_id``, ``profile_id``)
174
+ — NOT for hardcoded template paths (use :func:`safe_resolve` for those).
175
+ """
176
+ if not isinstance(untrusted, str):
177
+ raise ValueError(
178
+ f"unsafe identifier: expected str, got {type(untrusted).__name__}"
179
+ )
180
+ if not _SAFE_ID_RE.match(untrusted):
181
+ raise ValueError(f"unsafe identifier: {untrusted!r}")
182
+
183
+ base_abs = base.resolve(strict=False)
184
+ target = (base / untrusted).resolve(strict=False)
185
+ # The resolved target must be a direct child of base (or equal to it —
186
+ # defensive, though the regex already forbids the empty case).
187
+ if target != base_abs and base_abs not in target.parents:
188
+ raise ValueError(f"path escape: {untrusted!r}")
189
+ # S9-W2 M-SEC-01: enforce byte-level name equality after resolve.
190
+ # On case-insensitive filesystems (macOS APFS, Windows NTFS) the
191
+ # untrusted id "Session_1" can collide with an existing "session_1"
192
+ # path and ``.resolve()`` returns the on-disk name. Allowing that
193
+ # equivalence would let a second user on the same macOS machine
194
+ # enumerate / overwrite another user's session state by guessing
195
+ # the case-folded identifier.
196
+ if target != base_abs and target.name != untrusted:
197
+ raise ValueError(
198
+ f"path-case collision: resolved {target.name!r} != "
199
+ f"requested {untrusted!r}"
200
+ )
201
+ return target
202
+
203
+
204
+ # ---------------------------------------------------------------------------
205
+ # 6.10 SHA-256 integrity verifier (SEC-06-01)
206
+ # ---------------------------------------------------------------------------
207
+
208
+
209
+ def verify_sha256(data: bytes, expected_hex: str) -> None:
210
+ """Verify ``hashlib.sha256(data).hexdigest() == expected_hex``.
211
+
212
+ Uses ``hmac.compare_digest`` for constant-time comparison.
213
+ Raises IntegrityError on any mismatch.
214
+
215
+ Accepts expected_hex in either case (SHA-256 hex is case-insensitive).
216
+ """
217
+ if not isinstance(expected_hex, str):
218
+ raise IntegrityError("expected_hex must be str")
219
+ if len(expected_hex) != 64:
220
+ raise IntegrityError(
221
+ f"expected_hex must be 64 chars, got {len(expected_hex)}"
222
+ )
223
+ actual = hashlib.sha256(data).hexdigest()
224
+ if not hmac.compare_digest(actual.lower(), expected_hex.lower()):
225
+ raise IntegrityError("SHA-256 mismatch")
226
+
227
+
228
+ # ---------------------------------------------------------------------------
229
+ # 6.3 Secret redaction (SEC-02-01, SEC-01-03)
230
+ # ---------------------------------------------------------------------------
231
+
232
+
233
+ _SECRET_PATTERNS: tuple[tuple[re.Pattern[str], str], ...] = (
234
+ (re.compile(r"sk-ant-[A-Za-z0-9_\-]{20,}"), "ANTHROPIC"),
235
+ (re.compile(r"sk-[A-Za-z0-9_\-]{20,}"), "OPENAI"),
236
+ (re.compile(r"ghp_[A-Za-z0-9]{30,}"), "GITHUB"),
237
+ (re.compile(r"AKIA[A-Z0-9]{16}"), "AWS"),
238
+ (re.compile(r"xoxb-[A-Za-z0-9\-]{10,}"), "SLACK"),
239
+ (re.compile(r"ey[A-Za-z0-9_\-]{10,}\.ey[A-Za-z0-9_\-]{10,}\.[A-Za-z0-9_\-]{5,}"),
240
+ "JWT"),
241
+ (re.compile(r"-----BEGIN [A-Z ]+-----"), "PRIVATE_KEY"),
242
+ )
243
+
244
+ # LLD-00 §5 high-aggression patterns (P0.3). Stricter than the defaults:
245
+ # they match concrete, well-known secret shapes only, and are tried FIRST
246
+ # so their specific labels win over the broader 'OPENAI'/'ANTHROPIC' fallbacks.
247
+ _HIGH_AGGRESSION_PATTERNS: tuple[tuple[re.Pattern[str], str], ...] = (
248
+ # Generic JWT — three dot-separated base64url segments starting with "eyJ".
249
+ (re.compile(
250
+ r"\beyJ[A-Za-z0-9_-]{10,}\.[A-Za-z0-9_-]{10,}\.[A-Za-z0-9_-]{10,}\b"
251
+ ), "JWT"),
252
+ # Bearer header — catches `Authorization: Bearer ...` style tokens.
253
+ (re.compile(r"\bBearer\s+[A-Za-z0-9_\-.=]{20,}"), "BEARER"),
254
+ # GitHub PATs: classic + OAuth + server-to-server.
255
+ (re.compile(
256
+ r"\bghp_[A-Za-z0-9]{36}\b|\bgho_[A-Za-z0-9]{36}\b|\bghs_[A-Za-z0-9]{36}\b"
257
+ ), "GITHUB_PAT"),
258
+ # Anthropic API/admin keys — current format as of 2026.
259
+ (re.compile(r"\bsk-ant-(?:api|admin)\d{2}-[A-Za-z0-9_-]{50,}\b"),
260
+ "ANTHROPIC_KEY"),
261
+ # OpenAI modern keys — carry the "T3BlbkFJ" ("OpenAI" base64) sentinel.
262
+ (re.compile(r"\bsk-[A-Za-z0-9]{20,}T3BlbkFJ[A-Za-z0-9]{20,}\b"),
263
+ "OPENAI_KEY"),
264
+ # Generic env-var-style secret (e.g. SLM_API_ABC123...).
265
+ #
266
+ # H-08 (Stage 8): must skip pure-letter UPPER_SNAKE_CASE constants.
267
+ # S9-W2 H-SEC-02: the Stage-8 lookahead ``(?=[A-Z0-9]*\d)`` was
268
+ # linear-in-text but triggered super-linear BACKTRACKING on crafted
269
+ # inputs like ``"A" * 256_000`` because every ``[A-Z]{2,5}`` prefix
270
+ # attempt re-ran the tail lookahead. Under ``redact_secrets`` with
271
+ # an attacker-controlled prompt this stalled the dispatcher for
272
+ # seconds — a reachable DoS. New design: match the broader shape
273
+ # without a lookahead, then check ``"any(ch.isdigit() for ch in m)"``
274
+ # in Python after the match. Python's post-match loop is O(n) with
275
+ # no backtracking regardless of input shape.
276
+ (re.compile(r"\b[A-Z]{2,5}_[A-Z0-9]{20,}\b"), "GENERIC_KEY"),
277
+ )
278
+
279
+ _VALID_AGGRESSION = frozenset({"normal", "high"})
280
+
281
+
282
+ def _shannon_entropy(s: str) -> float:
283
+ if not s: # pragma: no cover — callers guard
284
+ return 0.0
285
+ counts: dict[str, int] = {}
286
+ for c in s:
287
+ counts[c] = counts.get(c, 0) + 1
288
+ total = len(s)
289
+ entropy = 0.0
290
+ for n in counts.values():
291
+ p = n / total
292
+ entropy -= p * math.log2(p)
293
+ return entropy
294
+
295
+
296
+ def redact_secrets(text: str, *, entropy_threshold: float = 4.5,
297
+ window: int = 32,
298
+ aggression: str = "normal") -> str:
299
+ """Replace detected secrets with ``[REDACTED:TYPE:last4]`` markers.
300
+
301
+ Three-layer defense:
302
+ 1. High-aggression patterns (JWT/Bearer/GitHub PAT/Anthropic/OpenAI/
303
+ GENERIC_KEY) — applied first when ``aggression='high'`` (LLD-00 §5).
304
+ These have concrete, well-known shapes so labels are specific.
305
+ 2. Pattern-based fallback (OpenAI/Anthropic/GitHub/AWS/Slack/JWT/PEM).
306
+ 3. Entropy-based sweep — any 32+ char contiguous high-entropy run
307
+ of URL-safe characters that survived pattern scan gets redacted as
308
+ ``[REDACTED:ENTROPY:last4]``.
309
+
310
+ ``aggression='high'`` is mandatory for every LLM-bound prompt
311
+ (LLD-11 evolution dispatch). Rationale: LLM providers may log or
312
+ retain prompts, so any leaked secret is a breach. LLD-07 §6.3 rule:
313
+ every string entering cache or dashboard goes through this helper.
314
+ """
315
+ if aggression not in _VALID_AGGRESSION:
316
+ raise ValueError(
317
+ f"aggression must be one of {sorted(_VALID_AGGRESSION)}, "
318
+ f"got {aggression!r}"
319
+ )
320
+ if not isinstance(text, str):
321
+ return text # pragma: no cover — defensive
322
+ if not text:
323
+ return text
324
+
325
+ out = text
326
+
327
+ if aggression == "high":
328
+ for pat, label in _HIGH_AGGRESSION_PATTERNS:
329
+ def _sub_high(match: re.Match[str], _label: str = label) -> str:
330
+ matched = match.group(0)
331
+ # S9-W2 H-SEC-02: GENERIC_KEY now requires a post-match
332
+ # digit check (replaces the lookahead that caused
333
+ # super-linear backtracking on crafted input). Pure
334
+ # UPPER_SNAKE constants still pass through unredacted.
335
+ if _label == "GENERIC_KEY" and not any(
336
+ ch.isdigit() for ch in matched
337
+ ):
338
+ return matched
339
+ last4 = matched[-4:] if len(matched) >= 4 else matched
340
+ return f"[REDACTED:{_label}:{last4}]"
341
+ out = pat.sub(_sub_high, out)
342
+
343
+ for pat, label in _SECRET_PATTERNS:
344
+ def _sub(match: re.Match[str], _label: str = label) -> str:
345
+ matched = match.group(0)
346
+ last4 = matched[-4:] if len(matched) >= 4 else matched
347
+ return f"[REDACTED:{_label}:{last4}]"
348
+ out = pat.sub(_sub, out)
349
+
350
+ # S9-W2 L-SEC-01: skip strings that already look like a REDACTED
351
+ # marker so the entropy sweep doesn't double-redact and lose the
352
+ # provenance label (e.g. turning ``[REDACTED:GITHUB_PAT:deadbeef]``
353
+ # into ``[REDACTED:ENTROPY:eef]``). Matching is conservative — the
354
+ # regex below is the one ``_emit_marker`` emits.
355
+ _redacted_marker_re = re.compile(r"\[REDACTED:[A-Z_]+:[^\]]+\]")
356
+
357
+ # Entropy sweep — scan contiguous URL-safe runs.
358
+ #
359
+ # H-08 (Stage 8): real high-entropy secrets (API keys, hex tokens,
360
+ # base64 blobs) almost always carry at least one digit or lowercase
361
+ # letter. Pure ``UPPER_SNAKE_CASE`` constants clear the 4.5 entropy
362
+ # threshold too — so without this guard the entropy sweep misfires
363
+ # on long variable/field names. We skip tokens whose character set
364
+ # is a subset of ``[A-Z_]``.
365
+ #
366
+ # ``=`` is deliberately excluded from the token class so a
367
+ # ``key=VALUE`` pair splits on the sign; otherwise a short ``key=``
368
+ # prefix bridges into the value and bypasses the pure-upper-snake
369
+ # check. Base64 padding is at most ``==`` — losing those two
370
+ # characters at the tail does not hide a secret.
371
+ token_re = re.compile(r"[A-Za-z0-9_\-./+]{%d,}" % window)
372
+ _pure_upper_snake = re.compile(r"^[A-Z_]+$")
373
+
374
+ def _entropy_sub(match: re.Match[str]) -> str:
375
+ token = match.group(0)
376
+ # L-SEC-01: preserve REDACTED markers emitted by earlier passes.
377
+ if _redacted_marker_re.search(token):
378
+ return token
379
+ # S9-SKEP-12: pure UPPER_SNAKE is a legitimate-constant shape
380
+ # ONLY when its entropy is below the secret threshold. A 24-char
381
+ # all-caps mnemonic backup code or a hand-typed token does clear
382
+ # 4.5 bits Shannon entropy and should be redacted — the old
383
+ # unconditional skip let such secrets through. We now require
384
+ # BOTH "looks like a constant" AND "low entropy" before skipping.
385
+ entropy = _shannon_entropy(token)
386
+ if _pure_upper_snake.match(token) and entropy < entropy_threshold:
387
+ return token
388
+ if entropy >= entropy_threshold:
389
+ last4 = token[-4:]
390
+ return f"[REDACTED:ENTROPY:{last4}]"
391
+ return token
392
+
393
+ out = token_re.sub(_entropy_sub, out)
394
+ return out
395
+
396
+
397
+ # ---------------------------------------------------------------------------
398
+ # 6.6 Install-token generation + verification (SEC-01-02, SEC-06-03)
399
+ # ---------------------------------------------------------------------------
400
+
401
+
402
+ def _install_token_path() -> Path: # pragma: no cover — monkeypatched in tests
403
+ """Default install-token location — override in tests via monkeypatch."""
404
+ return Path.home() / ".superlocalmemory" / ".install_token"
405
+
406
+
407
+ def ensure_install_token() -> str:
408
+ """Create or read the install token at ``~/.superlocalmemory/.install_token``.
409
+
410
+ On first call, creates the file with 32 bytes of ``secrets.token_hex``
411
+ and sets mode 0600 on POSIX. On subsequent calls, returns the existing
412
+ token unchanged.
413
+
414
+ The token is used as:
415
+ - ``X-SLM-Hook-Token`` header for ``/internal/prewarm`` auth.
416
+ - Cache-install binding via ``slm_meta`` row.
417
+ """
418
+ token_path = _install_token_path()
419
+ token_path.parent.mkdir(parents=True, exist_ok=True)
420
+
421
+ if token_path.exists():
422
+ token = token_path.read_text(encoding="utf-8").strip()
423
+ if token:
424
+ return token
425
+ # Empty file — regenerate.
426
+
427
+ # S9-W2 H-SEC-01: close the docstring promise "Open with O_EXCL
428
+ # where possible to prevent races" that the implementation did NOT
429
+ # enforce. O_EXCL | O_CREAT atomically fails if the file exists,
430
+ # which means a second concurrent daemon hitting this path after
431
+ # the first one wrote the token sees EEXIST, re-reads, and returns
432
+ # the token from disk — both daemons converge on the same token.
433
+ # Fallback to the non-EXCL path is preserved for exotic FS that
434
+ # don't support the flag, but the common POSIX case now closes
435
+ # the race.
436
+ token = _secrets.token_hex(32)
437
+ flags = os.O_WRONLY | os.O_CREAT | os.O_EXCL
438
+ if hasattr(os, "O_NOFOLLOW"):
439
+ flags |= os.O_NOFOLLOW
440
+ wrote = False
441
+ try:
442
+ fd = os.open(str(token_path), flags, 0o600)
443
+ try:
444
+ os.write(fd, token.encode("utf-8"))
445
+ wrote = True
446
+ finally:
447
+ os.close(fd)
448
+ except FileExistsError:
449
+ # Someone else won the race. Re-read and return their token.
450
+ try:
451
+ existing = token_path.read_text(encoding="utf-8").strip()
452
+ except OSError: # pragma: no cover — defensive
453
+ existing = ""
454
+ if existing:
455
+ return existing
456
+ # Empty file left by the racer — overwrite via the non-EXCL
457
+ # path below so we still end up with a valid token.
458
+ try:
459
+ fd = os.open(
460
+ str(token_path),
461
+ os.O_WRONLY | os.O_CREAT | os.O_TRUNC | (
462
+ os.O_NOFOLLOW if hasattr(os, "O_NOFOLLOW") else 0
463
+ ),
464
+ 0o600,
465
+ )
466
+ try:
467
+ os.write(fd, token.encode("utf-8"))
468
+ wrote = True
469
+ finally:
470
+ os.close(fd)
471
+ except OSError: # pragma: no cover — fallback for exotic FS
472
+ token_path.write_text(token, encoding="utf-8")
473
+ wrote = True
474
+ except OSError: # pragma: no cover — fallback for exotic FS
475
+ token_path.write_text(token, encoding="utf-8")
476
+ wrote = True
477
+
478
+ if wrote and not _is_windows():
479
+ try:
480
+ os.chmod(token_path, 0o600)
481
+ except OSError: # pragma: no cover
482
+ pass
483
+
484
+ return token
485
+
486
+
487
+ def verify_install_token(presented: str) -> bool:
488
+ """Constant-time compare ``presented`` against the stored install token.
489
+
490
+ Returns False (never raises) on missing file, empty input, or mismatch.
491
+ """
492
+ if not isinstance(presented, str) or not presented:
493
+ return False
494
+ token_path = _install_token_path()
495
+ if not token_path.exists():
496
+ return False
497
+ try:
498
+ stored = token_path.read_text(encoding="utf-8").strip()
499
+ except OSError: # pragma: no cover
500
+ return False
501
+ if not stored:
502
+ return False
503
+ return hmac.compare_digest(stored, presented)
504
+
505
+
506
+ def rotate_install_token() -> tuple[str, str]:
507
+ """S-M07 — atomically rotate the install token.
508
+
509
+ Returns ``(old_token, new_token)``. The old token is captured BEFORE
510
+ the rotation so callers that need to invalidate cached HMAC markers
511
+ can detect the change. Atomic file-swap via ``os.replace`` so a
512
+ concurrent ``verify_install_token`` never observes a half-written
513
+ value.
514
+
515
+ Callers (e.g. ``slm rotate-token`` CLI) SHOULD restart the daemon
516
+ after a successful rotation: in-memory HMAC marker caches — used by
517
+ ``recall_pipeline._emit_marker`` — retain the old token until the
518
+ next cold start. Without a restart, already-emitted markers fail
519
+ validation on the next ``post_tool_outcome_hook`` call (harmless —
520
+ just a dropped signal, never a security bypass), but new markers
521
+ under the new token mix with old-token markers still in transit.
522
+
523
+ Never raises; returns ``("", "")`` on any filesystem error so the
524
+ caller can surface a graceful message to the user.
525
+ """
526
+ token_path = _install_token_path()
527
+ try:
528
+ token_path.parent.mkdir(parents=True, exist_ok=True)
529
+ except OSError: # pragma: no cover — defensive
530
+ return ("", "")
531
+
532
+ old = ""
533
+ if token_path.exists():
534
+ try:
535
+ old = token_path.read_text(encoding="utf-8").strip()
536
+ except OSError: # pragma: no cover
537
+ old = ""
538
+
539
+ new_token = _secrets.token_hex(32)
540
+ # Write via tmp + os.replace for atomic swap.
541
+ tmp = token_path.with_suffix(
542
+ token_path.suffix + f".rot.{os.getpid()}.tmp"
543
+ )
544
+ try:
545
+ flags = os.O_WRONLY | os.O_CREAT | os.O_EXCL
546
+ if hasattr(os, "O_NOFOLLOW"):
547
+ flags |= os.O_NOFOLLOW
548
+ fd = os.open(str(tmp), flags, 0o600)
549
+ try:
550
+ os.write(fd, new_token.encode("utf-8"))
551
+ finally:
552
+ os.close(fd)
553
+ os.replace(str(tmp), str(token_path))
554
+ except OSError: # pragma: no cover — fallback
555
+ try:
556
+ token_path.write_text(new_token, encoding="utf-8")
557
+ except OSError:
558
+ return (old, "")
559
+ if not _is_windows():
560
+ try:
561
+ os.chmod(token_path, 0o600)
562
+ except OSError: # pragma: no cover
563
+ pass
564
+ return (old, new_token)
565
+
566
+
567
+ # ---------------------------------------------------------------------------
568
+ # 6.9 Subprocess sanitizer (SEC-05-01)
569
+ # ---------------------------------------------------------------------------
570
+
571
+
572
+ _DEFAULT_SAFE_ENV_KEYS: tuple[str, ...] = (
573
+ "PATH", "HOME", "USER", "LANG", "LC_ALL",
574
+ "SYSTEMROOT", "TEMP", "TMP", "USERPROFILE", # Windows
575
+ )
576
+
577
+
578
+ def _default_env() -> dict[str, str]:
579
+ return {k: os.environ[k] for k in _DEFAULT_SAFE_ENV_KEYS if k in os.environ}
580
+
581
+
582
+ def run_subprocess_safe(
583
+ argv: list[str],
584
+ *,
585
+ timeout: float = 5.0,
586
+ env: dict[str, str] | None = None,
587
+ check: bool = False,
588
+ capture_output: bool = True,
589
+ ) -> subprocess.CompletedProcess:
590
+ """Safe wrapper around ``subprocess.run``.
591
+
592
+ Rules enforced:
593
+ - ``argv`` must be a list of strings (never a shell string).
594
+ - ``shell=False`` always.
595
+ - ``timeout`` is mandatory.
596
+ - Restricted environment by default — only a minimal set of safe keys.
597
+ - Callers may pass an explicit ``env`` to add specific variables.
598
+
599
+ This is the ONE place in the codebase allowed to call ``subprocess.run``.
600
+ Grep guard in CI enforces this (LLD-07 §7 SEC-HR-06).
601
+ """
602
+ if not isinstance(argv, list):
603
+ raise TypeError("argv must be list[str], shell=False only")
604
+ if not argv:
605
+ raise ValueError("argv must be non-empty")
606
+ for i, piece in enumerate(argv):
607
+ if not isinstance(piece, str):
608
+ raise TypeError(f"argv[{i}] must be str, got {type(piece).__name__}")
609
+
610
+ effective_env = _default_env()
611
+ if env is not None:
612
+ effective_env.update(env)
613
+
614
+ # NOTE: This is the sanctioned subprocess.run call site for SLM.
615
+ return subprocess.run( # noqa: S603
616
+ argv,
617
+ shell=False,
618
+ timeout=timeout,
619
+ check=check,
620
+ capture_output=capture_output,
621
+ env=effective_env,
622
+ )
623
+
624
+
625
+ __all__ = (
626
+ "PathTraversalError",
627
+ "IntegrityError",
628
+ "safe_resolve",
629
+ "safe_resolve_identifier",
630
+ "verify_sha256",
631
+ "redact_secrets",
632
+ "ensure_install_token",
633
+ "verify_install_token",
634
+ "run_subprocess_safe",
635
+ )