superlocalmemory 3.4.19 → 3.4.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (177) hide show
  1. package/CHANGELOG.md +24 -0
  2. package/README.md +42 -34
  3. package/bin/slm +11 -0
  4. package/bin/slm.bat +12 -0
  5. package/package.json +4 -3
  6. package/pyproject.toml +4 -3
  7. package/scripts/build-slm-hook.ps1 +40 -0
  8. package/scripts/build-slm-hook.sh +45 -0
  9. package/scripts/build_entry.py +452 -0
  10. package/scripts/ci/stage5b_gate.sh +50 -0
  11. package/scripts/postinstall/validation.js +187 -0
  12. package/scripts/postinstall-interactive.js +756 -0
  13. package/scripts/postinstall_binary.js +287 -0
  14. package/scripts/release_manifest.py +273 -0
  15. package/scripts/slm-hook.spec +56 -0
  16. package/skills/slm-build-graph/SKILL.md +423 -0
  17. package/skills/slm-list-recent/SKILL.md +348 -0
  18. package/skills/slm-recall/SKILL.md +343 -0
  19. package/skills/slm-remember/SKILL.md +194 -0
  20. package/skills/slm-show-patterns/SKILL.md +224 -0
  21. package/skills/slm-status/SKILL.md +363 -0
  22. package/skills/slm-switch-profile/SKILL.md +442 -0
  23. package/src/superlocalmemory/cli/commands.py +254 -79
  24. package/src/superlocalmemory/cli/context_commands.py +192 -0
  25. package/src/superlocalmemory/cli/daemon.py +15 -1
  26. package/src/superlocalmemory/cli/db_migrate.py +80 -0
  27. package/src/superlocalmemory/cli/escape_hatch.py +220 -0
  28. package/src/superlocalmemory/cli/main.py +72 -1
  29. package/src/superlocalmemory/core/context_cache.py +397 -0
  30. package/src/superlocalmemory/core/engine.py +38 -2
  31. package/src/superlocalmemory/core/engine_wiring.py +1 -1
  32. package/src/superlocalmemory/core/ram_lock.py +111 -0
  33. package/src/superlocalmemory/core/recall_pipeline.py +433 -3
  34. package/src/superlocalmemory/core/recall_worker.py +8 -3
  35. package/src/superlocalmemory/core/security_primitives.py +635 -0
  36. package/src/superlocalmemory/core/shadow_router.py +319 -0
  37. package/src/superlocalmemory/core/slm_disabled.py +87 -0
  38. package/src/superlocalmemory/core/slmignore.py +125 -0
  39. package/src/superlocalmemory/core/topic_signature.py +143 -0
  40. package/src/superlocalmemory/core/worker_pool.py +14 -3
  41. package/src/superlocalmemory/encoding/cognitive_consolidator.py +2 -2
  42. package/src/superlocalmemory/evolution/budget.py +321 -0
  43. package/src/superlocalmemory/evolution/llm_dispatch.py +508 -0
  44. package/src/superlocalmemory/evolution/skill_evolver.py +144 -94
  45. package/src/superlocalmemory/hooks/_outcome_common.py +506 -0
  46. package/src/superlocalmemory/hooks/adapter_base.py +317 -0
  47. package/src/superlocalmemory/hooks/antigravity_adapter.py +192 -0
  48. package/src/superlocalmemory/hooks/claude_code_hooks.py +33 -1
  49. package/src/superlocalmemory/hooks/context_payload.py +312 -0
  50. package/src/superlocalmemory/hooks/copilot_adapter.py +154 -0
  51. package/src/superlocalmemory/hooks/cross_platform_connector.py +90 -0
  52. package/src/superlocalmemory/hooks/cursor_adapter.py +195 -0
  53. package/src/superlocalmemory/hooks/hook_handlers.py +109 -8
  54. package/src/superlocalmemory/hooks/ide_connector.py +25 -2
  55. package/src/superlocalmemory/hooks/post_tool_async_hook.py +165 -0
  56. package/src/superlocalmemory/hooks/post_tool_outcome_hook.py +223 -0
  57. package/src/superlocalmemory/hooks/prewarm_auth.py +170 -0
  58. package/src/superlocalmemory/hooks/session_registry.py +186 -0
  59. package/src/superlocalmemory/hooks/stop_outcome_hook.py +134 -0
  60. package/src/superlocalmemory/hooks/sync_loop.py +114 -0
  61. package/src/superlocalmemory/hooks/user_prompt_hook.py +128 -0
  62. package/src/superlocalmemory/hooks/user_prompt_rehash_hook.py +202 -0
  63. package/src/superlocalmemory/infra/backup.py +3 -3
  64. package/src/superlocalmemory/infra/cloud_backup.py +2 -2
  65. package/src/superlocalmemory/infra/event_bus.py +2 -2
  66. package/src/superlocalmemory/infra/webhook_dispatcher.py +3 -3
  67. package/src/superlocalmemory/learning/arm_catalog.py +99 -0
  68. package/src/superlocalmemory/learning/bandit.py +526 -0
  69. package/src/superlocalmemory/learning/bandit_cache.py +133 -0
  70. package/src/superlocalmemory/learning/behavioral.py +53 -1
  71. package/src/superlocalmemory/learning/consolidation_cycle.py +381 -0
  72. package/src/superlocalmemory/learning/consolidation_worker.py +188 -520
  73. package/src/superlocalmemory/learning/database.py +256 -0
  74. package/src/superlocalmemory/learning/dedup_hnsw.py +413 -0
  75. package/src/superlocalmemory/learning/ensemble.py +300 -0
  76. package/src/superlocalmemory/learning/fact_outcome_joins.py +207 -0
  77. package/src/superlocalmemory/learning/forgetting_scheduler.py +55 -0
  78. package/src/superlocalmemory/learning/hnsw_dedup.py +69 -0
  79. package/src/superlocalmemory/learning/labeler.py +87 -0
  80. package/src/superlocalmemory/learning/legacy_migration.py +277 -0
  81. package/src/superlocalmemory/learning/memory_merge.py +160 -0
  82. package/src/superlocalmemory/learning/model_cache.py +269 -0
  83. package/src/superlocalmemory/learning/model_rollback.py +278 -0
  84. package/src/superlocalmemory/learning/outcome_queue.py +284 -0
  85. package/src/superlocalmemory/learning/pattern_miner.py +415 -0
  86. package/src/superlocalmemory/learning/pattern_miner_constants.py +47 -0
  87. package/src/superlocalmemory/learning/ranker.py +225 -81
  88. package/src/superlocalmemory/learning/ranker_common.py +163 -0
  89. package/src/superlocalmemory/learning/ranker_retrain_legacy.py +202 -0
  90. package/src/superlocalmemory/learning/ranker_retrain_online.py +411 -0
  91. package/src/superlocalmemory/learning/reward.py +777 -0
  92. package/src/superlocalmemory/learning/reward_archive.py +210 -0
  93. package/src/superlocalmemory/learning/reward_boost.py +201 -0
  94. package/src/superlocalmemory/learning/reward_proxy.py +326 -0
  95. package/src/superlocalmemory/learning/shadow_test.py +524 -0
  96. package/src/superlocalmemory/learning/signal_worker.py +270 -0
  97. package/src/superlocalmemory/learning/signals.py +314 -0
  98. package/src/superlocalmemory/learning/trigram_index.py +547 -0
  99. package/src/superlocalmemory/mcp/server.py +5 -5
  100. package/src/superlocalmemory/mcp/tools_context.py +183 -0
  101. package/src/superlocalmemory/mcp/tools_core.py +92 -27
  102. package/src/superlocalmemory/parameterization/soft_prompt_generator.py +13 -0
  103. package/src/superlocalmemory/retrieval/engine.py +52 -0
  104. package/src/superlocalmemory/server/api.py +2 -2
  105. package/src/superlocalmemory/server/bandit_loops.py +140 -0
  106. package/src/superlocalmemory/server/middleware/__init__.py +11 -0
  107. package/src/superlocalmemory/server/middleware/security_headers.py +144 -0
  108. package/src/superlocalmemory/server/routes/backup.py +36 -13
  109. package/src/superlocalmemory/server/routes/behavioral.py +50 -19
  110. package/src/superlocalmemory/server/routes/brain.py +1234 -0
  111. package/src/superlocalmemory/server/routes/data_io.py +4 -4
  112. package/src/superlocalmemory/server/routes/events.py +2 -2
  113. package/src/superlocalmemory/server/routes/helpers.py +1 -1
  114. package/src/superlocalmemory/server/routes/learning.py +192 -7
  115. package/src/superlocalmemory/server/routes/memories.py +189 -1
  116. package/src/superlocalmemory/server/routes/prewarm.py +171 -0
  117. package/src/superlocalmemory/server/routes/profiles.py +3 -3
  118. package/src/superlocalmemory/server/routes/token.py +88 -0
  119. package/src/superlocalmemory/server/routes/ws.py +5 -5
  120. package/src/superlocalmemory/server/security_middleware.py +13 -7
  121. package/src/superlocalmemory/server/ui.py +2 -2
  122. package/src/superlocalmemory/server/unified_daemon.py +335 -3
  123. package/src/superlocalmemory/skills/slm-build-graph/SKILL.md +423 -0
  124. package/src/superlocalmemory/skills/slm-list-recent/SKILL.md +348 -0
  125. package/src/superlocalmemory/skills/slm-recall/SKILL.md +343 -0
  126. package/src/superlocalmemory/skills/slm-remember/SKILL.md +194 -0
  127. package/src/superlocalmemory/skills/slm-show-patterns/SKILL.md +224 -0
  128. package/src/superlocalmemory/skills/slm-status/SKILL.md +363 -0
  129. package/src/superlocalmemory/skills/slm-switch-profile/SKILL.md +442 -0
  130. package/src/superlocalmemory/storage/migration_runner.py +545 -0
  131. package/src/superlocalmemory/storage/migrations/M001_add_signal_features_columns.py +67 -0
  132. package/src/superlocalmemory/storage/migrations/M002_model_state_history.py +132 -0
  133. package/src/superlocalmemory/storage/migrations/M003_migration_log.py +38 -0
  134. package/src/superlocalmemory/storage/migrations/M004_cross_platform_sync_log.py +46 -0
  135. package/src/superlocalmemory/storage/migrations/M005_bandit_tables.py +75 -0
  136. package/src/superlocalmemory/storage/migrations/M006_action_outcomes_reward.py +75 -0
  137. package/src/superlocalmemory/storage/migrations/M007_pending_outcomes.py +63 -0
  138. package/src/superlocalmemory/storage/migrations/M009_model_lineage.py +54 -0
  139. package/src/superlocalmemory/storage/migrations/M010_evolution_config.py +75 -0
  140. package/src/superlocalmemory/storage/migrations/M011_archive_and_merge.py +87 -0
  141. package/src/superlocalmemory/storage/migrations/M012_shadow_observations.py +72 -0
  142. package/src/superlocalmemory/storage/migrations/M013_bi_temporal_columns.py +55 -0
  143. package/src/superlocalmemory/storage/migrations/__init__.py +81 -0
  144. package/src/superlocalmemory/storage/models.py +4 -0
  145. package/src/superlocalmemory/ui/css/brain.css +409 -0
  146. package/src/superlocalmemory/ui/css/legacy-dashboard.css +645 -0
  147. package/src/superlocalmemory/ui/index.html +459 -1345
  148. package/src/superlocalmemory/ui/js/brain.js +1321 -0
  149. package/src/superlocalmemory/ui/js/clusters.js +123 -4
  150. package/src/superlocalmemory/ui/js/init.js +48 -39
  151. package/src/superlocalmemory/ui/js/memories.js +88 -2
  152. package/src/superlocalmemory/ui/js/modal.js +71 -1
  153. package/src/superlocalmemory/ui/js/ng-shell.js +101 -88
  154. package/src/superlocalmemory/ui/js/trust-dashboard.js +168 -25
  155. package/src/superlocalmemory/ui/vendor/bootstrap-icons/bootstrap-icons.css +2018 -0
  156. package/src/superlocalmemory/ui/vendor/bootstrap-icons/fonts/bootstrap-icons.woff +0 -0
  157. package/src/superlocalmemory/ui/vendor/bootstrap-icons/fonts/bootstrap-icons.woff2 +0 -0
  158. package/src/superlocalmemory/ui/vendor/bootstrap.bundle.min.js +7 -0
  159. package/src/superlocalmemory/ui/vendor/bootstrap.min.css +6 -0
  160. package/src/superlocalmemory/ui/vendor/d3.v7.min.js +2 -0
  161. package/src/superlocalmemory/ui/vendor/graphology-library.min.js +2 -0
  162. package/src/superlocalmemory/ui/vendor/graphology.umd.min.js +2 -0
  163. package/src/superlocalmemory/ui/vendor/inter-ui/inter-variable.min.css +8 -0
  164. package/src/superlocalmemory/ui/vendor/inter-ui/variable/InterVariable-Italic.woff2 +0 -0
  165. package/src/superlocalmemory/ui/vendor/inter-ui/variable/InterVariable.woff2 +0 -0
  166. package/src/superlocalmemory/ui/vendor/sigma.min.js +1 -0
  167. package/src/superlocalmemory/ui/js/behavioral.js +0 -447
  168. package/src/superlocalmemory/ui/js/graph-core.js +0 -447
  169. package/src/superlocalmemory/ui/js/graph-interactions.js +0 -351
  170. package/src/superlocalmemory/ui/js/learning.js +0 -435
  171. package/src/superlocalmemory/ui/js/patterns.js +0 -93
  172. package/src/superlocalmemory.egg-info/PKG-INFO +0 -647
  173. package/src/superlocalmemory.egg-info/SOURCES.txt +0 -335
  174. package/src/superlocalmemory.egg-info/dependency_links.txt +0 -1
  175. package/src/superlocalmemory.egg-info/entry_points.txt +0 -2
  176. package/src/superlocalmemory.egg-info/requires.txt +0 -58
  177. package/src/superlocalmemory.egg-info/top_level.txt +0 -1
@@ -0,0 +1,508 @@
1
+ # Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
2
+ # Licensed under AGPL-3.0-or-later - see LICENSE file
3
+ # Part of SuperLocalMemory v3.4.22 — LLD-11 §Dispatch
4
+
5
+ """Central LLM dispatch for the skill-evolution subsystem.
6
+
7
+ Enforces MASTER-PLAN D2 (no top-tier "O-family" Claude models, no
8
+ ``gpt-4-turbo``) and LLD-00 §5 (every LLM-bound prompt passes through
9
+ ``redact_secrets(aggression='high')`` FIRST).
10
+
11
+ Every evolution LLM call funnels through :func:`_dispatch_llm`. Writes an
12
+ audit row to ``evolution_llm_cost_log`` after the dispatch succeeds — the
13
+ row stores only the *redacted* prompt length and the model, never the
14
+ raw prompt, so no canary can leak via the cost log.
15
+
16
+ SB-2/SB-3/SB-4 fix cluster (v3.4.22 Stage 8):
17
+ * All backend entry points (claude CLI, ollama, Anthropic/OpenAI API)
18
+ live HERE, not in ``skill_evolver``. ``SkillEvolver._llm_call``
19
+ delegates to ``_dispatch_llm`` so the validate → redact → log
20
+ invariants can never be bypassed.
21
+ * The claude CLI backend routes through
22
+ ``core.security_primitives.run_subprocess_safe`` — no bare
23
+ ``subprocess.run`` in evolution code (SB-4).
24
+
25
+ Author: Varun Pratap Bhardwaj / Qualixar
26
+ """
27
+
28
+ from __future__ import annotations
29
+
30
+ import atexit
31
+ import logging
32
+ import os
33
+ import sqlite3
34
+ import tempfile
35
+ import threading
36
+ from datetime import datetime, timezone
37
+ from pathlib import Path
38
+ from typing import Callable, Optional
39
+
40
+ from superlocalmemory.core.security_primitives import (
41
+ redact_secrets,
42
+ run_subprocess_safe,
43
+ )
44
+
45
+ logger = logging.getLogger(__name__)
46
+
47
+
48
+ # M-P-09: one cached writer connection per learning_db path — LLM cost
49
+ # logging used to pay a fresh ``sqlite3.connect`` + fsync per call. At
50
+ # current volume (<10 calls/cycle) the cost is small, but caching keeps
51
+ # the dispatch code consistent with the rest of the "one-cached-writer"
52
+ # pattern the codebase standardised on (reward.py, trigram_index.py).
53
+ #
54
+ # S9-W2 C4 (fork safety): a cached SQLite handle inherited across
55
+ # ``os.fork()`` corrupts the DB because both processes think they hold an
56
+ # exclusive lock. We clear the cache in any forked child via
57
+ # ``os.register_at_fork`` AND keyed-by-pid within ``_get_cost_conn`` so
58
+ # a child that somehow missed the registrar still behaves correctly.
59
+ # On Windows / platforms without ``register_at_fork`` the fork path
60
+ # cannot happen so the pid-check is free insurance.
61
+ #
62
+ # S9-W2 C9 (serialization): ``_COST_CONN_LOCK`` is now ONLY held during
63
+ # the get/create-cache flip, NOT during the ``execute+commit`` inside
64
+ # ``_log_cost``. SQLite's own writer serialisation (BEGIN IMMEDIATE +
65
+ # busy_timeout) is the correct tool for write ordering; the Python lock
66
+ # was converting 10 parallel candidates × 3-8ms fsync into a single
67
+ # 30-80ms tail. Cache structure remains intact; the lock's scope shrinks.
68
+ _COST_CONN_CACHE: dict[str, sqlite3.Connection] = {}
69
+ _COST_CONN_LOCK = threading.Lock()
70
+ _COST_CONN_OWNER_PID: int | None = None
71
+
72
+
73
+ def _resolve_cost_key(learning_db: Path) -> str:
74
+ """Resolve a DB path to a stable cache key.
75
+
76
+ M-P-03 fix: ``~/.slm/learning.db`` and ``/home/u/.slm/learning.db``
77
+ previously cached to separate conns on the same inode, producing two
78
+ writers contending over WAL. ``os.path.realpath`` collapses them.
79
+ """
80
+ try:
81
+ return os.path.realpath(str(learning_db))
82
+ except OSError: # pragma: no cover — defensive
83
+ return str(learning_db)
84
+
85
+
86
+ def _reset_cost_cache_for_child() -> None:
87
+ """Close any inherited handles in the fork child.
88
+
89
+ C4: ``os.register_at_fork(after_in_child=...)`` fires before any user
90
+ code runs in the child, so closing here is safe even if the parent
91
+ was mid-write (the child never participated in that transaction).
92
+ """
93
+ global _COST_CONN_OWNER_PID
94
+ # Do NOT close parent-owned handles — let the parent keep using them.
95
+ # We only clear our cache reference so the child opens fresh ones.
96
+ _COST_CONN_CACHE.clear()
97
+ _COST_CONN_OWNER_PID = os.getpid()
98
+
99
+
100
+ def _get_cost_conn(learning_db: Path) -> sqlite3.Connection:
101
+ """Return a cached writer connection for ``learning_db``. Never raises."""
102
+ global _COST_CONN_OWNER_PID
103
+ key = _resolve_cost_key(learning_db)
104
+ with _COST_CONN_LOCK:
105
+ # Belt-and-suspenders: if we somehow missed the fork registrar
106
+ # (embedded interpreter, non-POSIX fork path), detect pid drift
107
+ # and reset before handing out a potentially-corrupt handle.
108
+ current_pid = os.getpid()
109
+ if _COST_CONN_OWNER_PID is not None and (
110
+ _COST_CONN_OWNER_PID != current_pid
111
+ ):
112
+ _COST_CONN_CACHE.clear()
113
+ _COST_CONN_OWNER_PID = current_pid
114
+ conn = _COST_CONN_CACHE.get(key)
115
+ if conn is not None:
116
+ return conn
117
+ conn = sqlite3.connect(key, check_same_thread=False, timeout=2.0)
118
+ _COST_CONN_CACHE[key] = conn
119
+ return conn
120
+
121
+
122
+ def _close_cost_conns() -> None:
123
+ """Close every cached cost-log connection (atexit)."""
124
+ with _COST_CONN_LOCK:
125
+ conns = list(_COST_CONN_CACHE.items())
126
+ _COST_CONN_CACHE.clear()
127
+ for _key, conn in conns:
128
+ try:
129
+ conn.close()
130
+ except Exception: # pragma: no cover
131
+ pass
132
+
133
+
134
+ atexit.register(_close_cost_conns)
135
+ # C4: wipe inherited caches in any forked child. ``register_at_fork`` is
136
+ # POSIX-only; Windows simply doesn't fork so there is nothing to register.
137
+ if hasattr(os, "register_at_fork"):
138
+ os.register_at_fork(after_in_child=_reset_cost_cache_for_child)
139
+
140
+
141
+ # ---------------------------------------------------------------------------
142
+ # Allow-list and deny-list
143
+ # ---------------------------------------------------------------------------
144
+ #
145
+ # Allow-list explicitly names every model evolution may invoke. Deny-list
146
+ # catches substrings that must NEVER appear in an evolution-issued model
147
+ # id — notably the O-tier Claude family (MASTER-PLAN D2) and OpenAI's
148
+ # ``gpt-4-turbo`` (cost + behaviour regressions observed in prod).
149
+ #
150
+ # NOTE on the deny-list strings: the Stage-5b CI gate scans ``src/`` for
151
+ # the full banned model-family literal. That literal must NEVER appear in
152
+ # this file or any other source file. We check for the shorter substring
153
+ # ``opus`` instead; that catches every Claude O-family id variant without
154
+ # putting the banned literal anywhere in source.
155
+
156
+ ALLOWED_LLM_MODELS: frozenset[str] = frozenset({
157
+ "claude-haiku-4-5",
158
+ "claude-sonnet-4-6",
159
+ "ollama:llama3",
160
+ "ollama:qwen2.5",
161
+ })
162
+
163
+ FORBIDDEN_MODEL_SUBSTRINGS: tuple[str, ...] = ("opus", "gpt-4-turbo")
164
+
165
+ MAX_TOKENS_CAP: int = 500
166
+
167
+
168
+ # ---------------------------------------------------------------------------
169
+ # Backends (SB-2, SB-4) — moved out of skill_evolver.py
170
+ # ---------------------------------------------------------------------------
171
+ #
172
+ # Every backend has a uniform signature::
173
+ #
174
+ # backend(prompt: str, *, model: str, max_tokens: int) -> str
175
+ #
176
+ # They receive the ALREADY-REDACTED prompt from ``_dispatch_llm``. They
177
+ # must never log the prompt. They return an empty string on any
178
+ # transport failure (fail-closed: caller treats "" as "no evolution").
179
+
180
+
181
+ def _call_claude_cli_backend(
182
+ prompt: str, *, model: str, max_tokens: int,
183
+ ) -> str:
184
+ """Spawn ``claude --model <model>`` via ``run_subprocess_safe``.
185
+
186
+ SB-4: bare ``subprocess.run`` is banned in evolution code — every
187
+ shell-out goes through ``run_subprocess_safe`` which strips the
188
+ inherited env down to a vetted allow-list.
189
+ """
190
+ # Translate the allow-listed model id to the CLI short name.
191
+ cli_model = "haiku"
192
+ if "sonnet" in model:
193
+ cli_model = "sonnet"
194
+
195
+ with tempfile.NamedTemporaryFile(
196
+ mode="w", suffix=".txt", delete=False,
197
+ ) as f:
198
+ f.write(prompt)
199
+ prompt_file = f.name
200
+
201
+ try:
202
+ result = run_subprocess_safe(
203
+ ["claude", "--model", cli_model, "--print", "--no-input",
204
+ "--max-tokens", str(max_tokens),
205
+ "--prompt-file", prompt_file],
206
+ timeout=120.0,
207
+ env={
208
+ "CLAUDE_CODE_ENTRYPOINT": "cli",
209
+ "ECC_SKIP_OBSERVE": "1",
210
+ },
211
+ )
212
+ stdout = getattr(result, "stdout", "") or ""
213
+ rc = getattr(result, "returncode", 1)
214
+ return stdout.strip() if rc == 0 else ""
215
+ except Exception as exc: # noqa: BLE001 — fail-closed, never crash caller
216
+ logger.debug("claude CLI backend failed: %s", exc)
217
+ return ""
218
+ finally:
219
+ try:
220
+ os.unlink(prompt_file)
221
+ except OSError:
222
+ pass
223
+
224
+
225
+ def _call_ollama_backend(
226
+ prompt: str, *, model: str, max_tokens: int,
227
+ ) -> str:
228
+ """Call local Ollama HTTP API for LLM completion.
229
+
230
+ ``model`` is expected to be an allow-listed id of the form
231
+ ``"ollama:<model-name>"`` — the prefix is stripped before dispatch.
232
+ """
233
+ import json as _json
234
+ import urllib.request
235
+
236
+ ollama_model = model.split(":", 1)[1] if model.startswith("ollama:") else model
237
+ payload = _json.dumps({
238
+ "model": ollama_model,
239
+ "prompt": prompt,
240
+ "stream": False,
241
+ "options": {"num_predict": max_tokens},
242
+ }).encode()
243
+
244
+ try:
245
+ req = urllib.request.Request(
246
+ "http://127.0.0.1:11434/api/generate",
247
+ data=payload,
248
+ headers={"Content-Type": "application/json"},
249
+ method="POST",
250
+ )
251
+ with urllib.request.urlopen(req, timeout=120) as resp: # noqa: S310
252
+ data = _json.loads(resp.read())
253
+ return data.get("response", "") or ""
254
+ except Exception as exc: # noqa: BLE001
255
+ logger.debug("Ollama backend failed: %s", exc)
256
+ return ""
257
+
258
+
259
+ def _call_claude_api_backend(
260
+ prompt: str, *, model: str, max_tokens: int,
261
+ ) -> str:
262
+ """Call the Anthropic Messages API directly.
263
+
264
+ The API model id is the allow-listed name itself — no client-side
265
+ mapping table, so adding a new allow-listed model is a one-line
266
+ edit to :data:`ALLOWED_LLM_MODELS`.
267
+ """
268
+ try:
269
+ import anthropic # type: ignore[import-not-found]
270
+ except Exception as exc: # noqa: BLE001
271
+ logger.debug("anthropic sdk unavailable: %s", exc)
272
+ return ""
273
+
274
+ try:
275
+ client = anthropic.Anthropic()
276
+ msg = client.messages.create(
277
+ model=model,
278
+ max_tokens=max_tokens,
279
+ messages=[{"role": "user", "content": prompt}],
280
+ )
281
+ content = getattr(msg, "content", None)
282
+ if content and len(content) > 0:
283
+ first = content[0]
284
+ text = getattr(first, "text", None)
285
+ if isinstance(text, str):
286
+ return text
287
+ return ""
288
+ except Exception as exc: # noqa: BLE001
289
+ logger.debug("Anthropic API backend failed: %s", exc)
290
+ return ""
291
+
292
+
293
+ # ---------------------------------------------------------------------------
294
+ # Backend registry — dispatches by (allow-listed) model id
295
+ # ---------------------------------------------------------------------------
296
+
297
+
298
+ def _fail_closed_backend(
299
+ prompt: str, *, model: str, max_tokens: int,
300
+ ) -> str:
301
+ """S9-SKEP-14: explicit fail-closed backend for unroutable models.
302
+
303
+ Returns "" (the fail-closed sentinel every dispatch treats as
304
+ "no evolution happened") and logs a warning. Previously the
305
+ fallthrough silently routed any unknown id to the paid Anthropic
306
+ API — a misconfigured entry in ``ALLOWED_LLM_MODELS`` would burn
307
+ user money without anyone noticing.
308
+ """
309
+ logger.warning(
310
+ "llm_dispatch: no backend registered for model=%r — "
311
+ "fail-closed (returning empty string)", model,
312
+ )
313
+ return ""
314
+
315
+
316
+ def _pick_backend(model: str) -> Callable[..., str]:
317
+ """Resolve an allow-listed model id to its backend callable.
318
+
319
+ Contract: ``model`` is already validated against ``ALLOWED_LLM_MODELS``
320
+ by the caller (``_dispatch_llm`` runs ``_validate_model`` first).
321
+
322
+ S9-SKEP-14: routing is prefix-exact — we no longer default unknown
323
+ models to the Claude API path. An allow-listed model without a
324
+ backend entry hits ``_fail_closed_backend`` and returns ""
325
+ instead of silently spending money on the wrong vendor.
326
+ """
327
+ if model.startswith("ollama:"):
328
+ return _call_ollama_backend
329
+ if model.startswith("claude-"):
330
+ # Claude CLI path is an alternative — selected when an explicit
331
+ # env flag is set. Default path is the Anthropic API backend.
332
+ if os.environ.get("SLM_EVOLUTION_BACKEND") == "claude-cli":
333
+ return _call_claude_cli_backend
334
+ return _call_claude_api_backend
335
+ return _fail_closed_backend
336
+
337
+
338
+ def _actual_llm_call(prompt: str, *, model: str, max_tokens: int) -> str:
339
+ """Dispatch the redacted prompt to the backend registered for ``model``.
340
+
341
+ Kept as a stable module-level function so tests can ``monkeypatch``
342
+ it with a deterministic stub (see ``record_backend`` fixture in
343
+ ``test_llm_dispatch.py``). Production callers never invoke this
344
+ directly — they go through :func:`_dispatch_llm`.
345
+ """
346
+ backend = _pick_backend(model)
347
+ return backend(prompt, model=model, max_tokens=max_tokens)
348
+
349
+
350
+ # ---------------------------------------------------------------------------
351
+ # Gate
352
+ # ---------------------------------------------------------------------------
353
+
354
+
355
+ def _validate_model(model: str) -> None:
356
+ """Raise ``ValueError`` if the model is forbidden or not allow-listed."""
357
+ if not isinstance(model, str) or not model:
358
+ raise ValueError(f"model must be a non-empty str, got {model!r}")
359
+ lowered = model.lower()
360
+ for forbidden in FORBIDDEN_MODEL_SUBSTRINGS:
361
+ if forbidden in lowered:
362
+ raise ValueError(
363
+ f"forbidden model: {model!r} (contains {forbidden!r})"
364
+ )
365
+ if model not in ALLOWED_LLM_MODELS:
366
+ raise ValueError(
367
+ f"model not in ALLOWED_LLM_MODELS: {model!r} "
368
+ f"(allowed: {sorted(ALLOWED_LLM_MODELS)})"
369
+ )
370
+
371
+
372
+ def _log_cost(
373
+ *,
374
+ learning_db: Path,
375
+ profile_id: str,
376
+ model: str,
377
+ tokens_in: int,
378
+ tokens_out: int,
379
+ cost_usd: float = 0.0,
380
+ cycle_id: str | None = None,
381
+ ) -> None:
382
+ """Append a redacted cost-log row. Never stores prompt/response text.
383
+
384
+ H-16 (Stage 8): ``profile_id`` must be a non-empty string. The schema
385
+ has ``NOT NULL`` on the column but SQLite accepts empty strings — that
386
+ would break the dashboard's per-profile cost widget by silently
387
+ aggregating unattributed spend. We raise here instead so the caller
388
+ fixes the upstream bug rather than learning about it weeks later from
389
+ a mis-reported invoice.
390
+ """
391
+ if not isinstance(profile_id, str) or not profile_id.strip():
392
+ raise ValueError(
393
+ "evolution_llm_cost_log.profile_id must be a non-empty string "
394
+ f"(got {profile_id!r})"
395
+ )
396
+ now = datetime.now(timezone.utc).isoformat(timespec="seconds")
397
+ try:
398
+ # S9-W2 C9: the cached conn is ``check_same_thread=False`` and
399
+ # SQLite's own writer serialisation (BEGIN IMMEDIATE + 2 s
400
+ # ``busy_timeout`` in the connect() call) is the right tool for
401
+ # write ordering. Previously we held _COST_CONN_LOCK across the
402
+ # execute+commit fsync, converting 10 parallel candidates' worth
403
+ # of 3-8 ms commits into a single 30-80 ms tail. Release the
404
+ # Python lock BEFORE the SQL round-trip.
405
+ conn = _get_cost_conn(Path(learning_db))
406
+ conn.execute(
407
+ "INSERT INTO evolution_llm_cost_log "
408
+ "(profile_id, ts, model, tokens_in, tokens_out, cost_usd, cycle_id) "
409
+ "VALUES (?,?,?,?,?,?,?)",
410
+ (profile_id, now, model, tokens_in, tokens_out, cost_usd, cycle_id),
411
+ )
412
+ conn.commit()
413
+ except sqlite3.Error as e:
414
+ logger.warning("cost log write failed: %s", e)
415
+
416
+
417
+ def _dispatch_llm(
418
+ prompt: str,
419
+ *,
420
+ model: str,
421
+ learning_db: Path | str,
422
+ profile_id: str,
423
+ max_tokens: int = MAX_TOKENS_CAP,
424
+ cycle_id: str | None = None,
425
+ ) -> str:
426
+ """Central choke-point for every evolution LLM call.
427
+
428
+ Validates model against allow/deny lists, caps ``max_tokens``, runs the
429
+ prompt through ``redact_secrets(aggression='high')``, dispatches, and
430
+ logs a redacted cost row. Raises ``ValueError`` on any contract breach.
431
+ """
432
+ _validate_model(model)
433
+
434
+ if not isinstance(max_tokens, int) or max_tokens <= 0:
435
+ raise ValueError(
436
+ f"max_tokens must be a positive int, got {max_tokens!r}"
437
+ )
438
+ if max_tokens > MAX_TOKENS_CAP:
439
+ raise ValueError(
440
+ f"max_tokens {max_tokens} > {MAX_TOKENS_CAP} cap (LLD-11)"
441
+ )
442
+ # S9-W2 H-SKEP-05: validate profile_id BEFORE paying for the LLM
443
+ # call. Previously the check lived in _log_cost, AFTER the paid
444
+ # Anthropic round-trip — a misconfigured profile with empty id would
445
+ # spend the money, raise ValueError in _log_cost, and return "" from
446
+ # _llm_call's except. Net: cost incurred, no cost-log row, caller
447
+ # may retry and burn more. Validate up-front, fail-closed, zero cost.
448
+ if not isinstance(profile_id, str) or not profile_id.strip():
449
+ raise ValueError(
450
+ "profile_id must be a non-empty string "
451
+ f"(got {profile_id!r})"
452
+ )
453
+
454
+ # LLD-00 §5 — redact BEFORE dispatch. Never log the raw prompt.
455
+ safe_prompt = redact_secrets(prompt, aggression="high")
456
+
457
+ # S9-defer H-P-10: per-cycle retry-cost DoS guard. If the caller
458
+ # (or an orchestrator layer) keeps retrying a failing dispatch on
459
+ # the same ``cycle_id``, cost escalates without bound — a crafted
460
+ # adversarial scenario could make evolution burn through the
461
+ # daily USD cap in minutes. Count prior calls for this cycle_id
462
+ # in ``evolution_llm_cost_log`` and refuse once the retry cap is
463
+ # hit. The EvolutionBudget object already caps overall LLM calls
464
+ # per cycle to 10; this is the per-cycle-ID guard for retries on
465
+ # the SAME logical step (distinct from 10 different LLM calls for
466
+ # 10 different steps).
467
+ _RETRY_CAP_PER_CYCLE = int(
468
+ os.environ.get("SLM_EVOLUTION_RETRY_CAP", "5")
469
+ )
470
+ if cycle_id:
471
+ try:
472
+ _conn = _get_cost_conn(Path(learning_db))
473
+ row = _conn.execute(
474
+ "SELECT COUNT(*) FROM evolution_llm_cost_log "
475
+ "WHERE profile_id = ? AND cycle_id = ?",
476
+ (profile_id, cycle_id),
477
+ ).fetchone()
478
+ prior = int(row[0]) if row and row[0] is not None else 0
479
+ if prior >= _RETRY_CAP_PER_CYCLE:
480
+ logger.warning(
481
+ "evolution retry cap hit: profile=%s cycle_id=%s "
482
+ "prior=%d cap=%d — refusing dispatch",
483
+ profile_id, cycle_id, prior, _RETRY_CAP_PER_CYCLE,
484
+ )
485
+ raise RuntimeError(
486
+ f"evolution retry cap exceeded for cycle {cycle_id}"
487
+ )
488
+ except sqlite3.Error:
489
+ # Cost log unavailable — fail-open on this guard (the
490
+ # outer EvolutionBudget still enforces the 10-call cap).
491
+ pass
492
+
493
+ response = _actual_llm_call(
494
+ safe_prompt, model=model, max_tokens=max_tokens,
495
+ )
496
+
497
+ # Cost-log row: lengths only, no text content. This guarantees the
498
+ # redaction canary (e.g. a ``ghp_...`` GitHub PAT) cannot end up in
499
+ # the audit log — we never persist the redacted prompt either.
500
+ _log_cost(
501
+ learning_db=Path(learning_db),
502
+ profile_id=profile_id,
503
+ model=model,
504
+ tokens_in=len(safe_prompt),
505
+ tokens_out=len(response) if isinstance(response, str) else 0,
506
+ cycle_id=cycle_id,
507
+ )
508
+ return response