superlocalmemory 3.4.19 → 3.4.22
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +24 -0
- package/README.md +42 -34
- package/bin/slm +11 -0
- package/bin/slm.bat +12 -0
- package/package.json +4 -3
- package/pyproject.toml +4 -3
- package/scripts/build-slm-hook.ps1 +40 -0
- package/scripts/build-slm-hook.sh +45 -0
- package/scripts/build_entry.py +452 -0
- package/scripts/ci/stage5b_gate.sh +50 -0
- package/scripts/postinstall/validation.js +187 -0
- package/scripts/postinstall-interactive.js +756 -0
- package/scripts/postinstall_binary.js +287 -0
- package/scripts/release_manifest.py +273 -0
- package/scripts/slm-hook.spec +56 -0
- package/skills/slm-build-graph/SKILL.md +423 -0
- package/skills/slm-list-recent/SKILL.md +348 -0
- package/skills/slm-recall/SKILL.md +343 -0
- package/skills/slm-remember/SKILL.md +194 -0
- package/skills/slm-show-patterns/SKILL.md +224 -0
- package/skills/slm-status/SKILL.md +363 -0
- package/skills/slm-switch-profile/SKILL.md +442 -0
- package/src/superlocalmemory/cli/commands.py +254 -79
- package/src/superlocalmemory/cli/context_commands.py +192 -0
- package/src/superlocalmemory/cli/daemon.py +15 -1
- package/src/superlocalmemory/cli/db_migrate.py +80 -0
- package/src/superlocalmemory/cli/escape_hatch.py +220 -0
- package/src/superlocalmemory/cli/main.py +72 -1
- package/src/superlocalmemory/core/context_cache.py +397 -0
- package/src/superlocalmemory/core/engine.py +38 -2
- package/src/superlocalmemory/core/engine_wiring.py +1 -1
- package/src/superlocalmemory/core/ram_lock.py +111 -0
- package/src/superlocalmemory/core/recall_pipeline.py +433 -3
- package/src/superlocalmemory/core/recall_worker.py +8 -3
- package/src/superlocalmemory/core/security_primitives.py +635 -0
- package/src/superlocalmemory/core/shadow_router.py +319 -0
- package/src/superlocalmemory/core/slm_disabled.py +87 -0
- package/src/superlocalmemory/core/slmignore.py +125 -0
- package/src/superlocalmemory/core/topic_signature.py +143 -0
- package/src/superlocalmemory/core/worker_pool.py +14 -3
- package/src/superlocalmemory/encoding/cognitive_consolidator.py +2 -2
- package/src/superlocalmemory/evolution/budget.py +321 -0
- package/src/superlocalmemory/evolution/llm_dispatch.py +508 -0
- package/src/superlocalmemory/evolution/skill_evolver.py +144 -94
- package/src/superlocalmemory/hooks/_outcome_common.py +506 -0
- package/src/superlocalmemory/hooks/adapter_base.py +317 -0
- package/src/superlocalmemory/hooks/antigravity_adapter.py +192 -0
- package/src/superlocalmemory/hooks/claude_code_hooks.py +33 -1
- package/src/superlocalmemory/hooks/context_payload.py +312 -0
- package/src/superlocalmemory/hooks/copilot_adapter.py +154 -0
- package/src/superlocalmemory/hooks/cross_platform_connector.py +90 -0
- package/src/superlocalmemory/hooks/cursor_adapter.py +195 -0
- package/src/superlocalmemory/hooks/hook_handlers.py +109 -8
- package/src/superlocalmemory/hooks/ide_connector.py +25 -2
- package/src/superlocalmemory/hooks/post_tool_async_hook.py +165 -0
- package/src/superlocalmemory/hooks/post_tool_outcome_hook.py +223 -0
- package/src/superlocalmemory/hooks/prewarm_auth.py +170 -0
- package/src/superlocalmemory/hooks/session_registry.py +186 -0
- package/src/superlocalmemory/hooks/stop_outcome_hook.py +134 -0
- package/src/superlocalmemory/hooks/sync_loop.py +114 -0
- package/src/superlocalmemory/hooks/user_prompt_hook.py +128 -0
- package/src/superlocalmemory/hooks/user_prompt_rehash_hook.py +202 -0
- package/src/superlocalmemory/infra/backup.py +3 -3
- package/src/superlocalmemory/infra/cloud_backup.py +2 -2
- package/src/superlocalmemory/infra/event_bus.py +2 -2
- package/src/superlocalmemory/infra/webhook_dispatcher.py +3 -3
- package/src/superlocalmemory/learning/arm_catalog.py +99 -0
- package/src/superlocalmemory/learning/bandit.py +526 -0
- package/src/superlocalmemory/learning/bandit_cache.py +133 -0
- package/src/superlocalmemory/learning/behavioral.py +53 -1
- package/src/superlocalmemory/learning/consolidation_cycle.py +381 -0
- package/src/superlocalmemory/learning/consolidation_worker.py +188 -520
- package/src/superlocalmemory/learning/database.py +256 -0
- package/src/superlocalmemory/learning/dedup_hnsw.py +413 -0
- package/src/superlocalmemory/learning/ensemble.py +300 -0
- package/src/superlocalmemory/learning/fact_outcome_joins.py +207 -0
- package/src/superlocalmemory/learning/forgetting_scheduler.py +55 -0
- package/src/superlocalmemory/learning/hnsw_dedup.py +69 -0
- package/src/superlocalmemory/learning/labeler.py +87 -0
- package/src/superlocalmemory/learning/legacy_migration.py +277 -0
- package/src/superlocalmemory/learning/memory_merge.py +160 -0
- package/src/superlocalmemory/learning/model_cache.py +269 -0
- package/src/superlocalmemory/learning/model_rollback.py +278 -0
- package/src/superlocalmemory/learning/outcome_queue.py +284 -0
- package/src/superlocalmemory/learning/pattern_miner.py +415 -0
- package/src/superlocalmemory/learning/pattern_miner_constants.py +47 -0
- package/src/superlocalmemory/learning/ranker.py +225 -81
- package/src/superlocalmemory/learning/ranker_common.py +163 -0
- package/src/superlocalmemory/learning/ranker_retrain_legacy.py +202 -0
- package/src/superlocalmemory/learning/ranker_retrain_online.py +411 -0
- package/src/superlocalmemory/learning/reward.py +777 -0
- package/src/superlocalmemory/learning/reward_archive.py +210 -0
- package/src/superlocalmemory/learning/reward_boost.py +201 -0
- package/src/superlocalmemory/learning/reward_proxy.py +326 -0
- package/src/superlocalmemory/learning/shadow_test.py +524 -0
- package/src/superlocalmemory/learning/signal_worker.py +270 -0
- package/src/superlocalmemory/learning/signals.py +314 -0
- package/src/superlocalmemory/learning/trigram_index.py +547 -0
- package/src/superlocalmemory/mcp/server.py +5 -5
- package/src/superlocalmemory/mcp/tools_context.py +183 -0
- package/src/superlocalmemory/mcp/tools_core.py +92 -27
- package/src/superlocalmemory/parameterization/soft_prompt_generator.py +13 -0
- package/src/superlocalmemory/retrieval/engine.py +52 -0
- package/src/superlocalmemory/server/api.py +2 -2
- package/src/superlocalmemory/server/bandit_loops.py +140 -0
- package/src/superlocalmemory/server/middleware/__init__.py +11 -0
- package/src/superlocalmemory/server/middleware/security_headers.py +144 -0
- package/src/superlocalmemory/server/routes/backup.py +36 -13
- package/src/superlocalmemory/server/routes/behavioral.py +50 -19
- package/src/superlocalmemory/server/routes/brain.py +1234 -0
- package/src/superlocalmemory/server/routes/data_io.py +4 -4
- package/src/superlocalmemory/server/routes/events.py +2 -2
- package/src/superlocalmemory/server/routes/helpers.py +1 -1
- package/src/superlocalmemory/server/routes/learning.py +192 -7
- package/src/superlocalmemory/server/routes/memories.py +189 -1
- package/src/superlocalmemory/server/routes/prewarm.py +171 -0
- package/src/superlocalmemory/server/routes/profiles.py +3 -3
- package/src/superlocalmemory/server/routes/token.py +88 -0
- package/src/superlocalmemory/server/routes/ws.py +5 -5
- package/src/superlocalmemory/server/security_middleware.py +13 -7
- package/src/superlocalmemory/server/ui.py +2 -2
- package/src/superlocalmemory/server/unified_daemon.py +335 -3
- package/src/superlocalmemory/skills/slm-build-graph/SKILL.md +423 -0
- package/src/superlocalmemory/skills/slm-list-recent/SKILL.md +348 -0
- package/src/superlocalmemory/skills/slm-recall/SKILL.md +343 -0
- package/src/superlocalmemory/skills/slm-remember/SKILL.md +194 -0
- package/src/superlocalmemory/skills/slm-show-patterns/SKILL.md +224 -0
- package/src/superlocalmemory/skills/slm-status/SKILL.md +363 -0
- package/src/superlocalmemory/skills/slm-switch-profile/SKILL.md +442 -0
- package/src/superlocalmemory/storage/migration_runner.py +545 -0
- package/src/superlocalmemory/storage/migrations/M001_add_signal_features_columns.py +67 -0
- package/src/superlocalmemory/storage/migrations/M002_model_state_history.py +132 -0
- package/src/superlocalmemory/storage/migrations/M003_migration_log.py +38 -0
- package/src/superlocalmemory/storage/migrations/M004_cross_platform_sync_log.py +46 -0
- package/src/superlocalmemory/storage/migrations/M005_bandit_tables.py +75 -0
- package/src/superlocalmemory/storage/migrations/M006_action_outcomes_reward.py +75 -0
- package/src/superlocalmemory/storage/migrations/M007_pending_outcomes.py +63 -0
- package/src/superlocalmemory/storage/migrations/M009_model_lineage.py +54 -0
- package/src/superlocalmemory/storage/migrations/M010_evolution_config.py +75 -0
- package/src/superlocalmemory/storage/migrations/M011_archive_and_merge.py +87 -0
- package/src/superlocalmemory/storage/migrations/M012_shadow_observations.py +72 -0
- package/src/superlocalmemory/storage/migrations/M013_bi_temporal_columns.py +55 -0
- package/src/superlocalmemory/storage/migrations/__init__.py +81 -0
- package/src/superlocalmemory/storage/models.py +4 -0
- package/src/superlocalmemory/ui/css/brain.css +409 -0
- package/src/superlocalmemory/ui/css/legacy-dashboard.css +645 -0
- package/src/superlocalmemory/ui/index.html +459 -1345
- package/src/superlocalmemory/ui/js/brain.js +1321 -0
- package/src/superlocalmemory/ui/js/clusters.js +123 -4
- package/src/superlocalmemory/ui/js/init.js +48 -39
- package/src/superlocalmemory/ui/js/memories.js +88 -2
- package/src/superlocalmemory/ui/js/modal.js +71 -1
- package/src/superlocalmemory/ui/js/ng-shell.js +101 -88
- package/src/superlocalmemory/ui/js/trust-dashboard.js +168 -25
- package/src/superlocalmemory/ui/vendor/bootstrap-icons/bootstrap-icons.css +2018 -0
- package/src/superlocalmemory/ui/vendor/bootstrap-icons/fonts/bootstrap-icons.woff +0 -0
- package/src/superlocalmemory/ui/vendor/bootstrap-icons/fonts/bootstrap-icons.woff2 +0 -0
- package/src/superlocalmemory/ui/vendor/bootstrap.bundle.min.js +7 -0
- package/src/superlocalmemory/ui/vendor/bootstrap.min.css +6 -0
- package/src/superlocalmemory/ui/vendor/d3.v7.min.js +2 -0
- package/src/superlocalmemory/ui/vendor/graphology-library.min.js +2 -0
- package/src/superlocalmemory/ui/vendor/graphology.umd.min.js +2 -0
- package/src/superlocalmemory/ui/vendor/inter-ui/inter-variable.min.css +8 -0
- package/src/superlocalmemory/ui/vendor/inter-ui/variable/InterVariable-Italic.woff2 +0 -0
- package/src/superlocalmemory/ui/vendor/inter-ui/variable/InterVariable.woff2 +0 -0
- package/src/superlocalmemory/ui/vendor/sigma.min.js +1 -0
- package/src/superlocalmemory/ui/js/behavioral.js +0 -447
- package/src/superlocalmemory/ui/js/graph-core.js +0 -447
- package/src/superlocalmemory/ui/js/graph-interactions.js +0 -351
- package/src/superlocalmemory/ui/js/learning.js +0 -435
- package/src/superlocalmemory/ui/js/patterns.js +0 -93
- package/src/superlocalmemory.egg-info/PKG-INFO +0 -647
- package/src/superlocalmemory.egg-info/SOURCES.txt +0 -335
- package/src/superlocalmemory.egg-info/dependency_links.txt +0 -1
- package/src/superlocalmemory.egg-info/entry_points.txt +0 -2
- package/src/superlocalmemory.egg-info/requires.txt +0 -58
- package/src/superlocalmemory.egg-info/top_level.txt +0 -1
|
@@ -0,0 +1,508 @@
|
|
|
1
|
+
# Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
|
|
2
|
+
# Licensed under AGPL-3.0-or-later - see LICENSE file
|
|
3
|
+
# Part of SuperLocalMemory v3.4.22 — LLD-11 §Dispatch
|
|
4
|
+
|
|
5
|
+
"""Central LLM dispatch for the skill-evolution subsystem.
|
|
6
|
+
|
|
7
|
+
Enforces MASTER-PLAN D2 (no top-tier "O-family" Claude models, no
|
|
8
|
+
``gpt-4-turbo``) and LLD-00 §5 (every LLM-bound prompt passes through
|
|
9
|
+
``redact_secrets(aggression='high')`` FIRST).
|
|
10
|
+
|
|
11
|
+
Every evolution LLM call funnels through :func:`_dispatch_llm`. Writes an
|
|
12
|
+
audit row to ``evolution_llm_cost_log`` after the dispatch succeeds — the
|
|
13
|
+
row stores only the *redacted* prompt length and the model, never the
|
|
14
|
+
raw prompt, so no canary can leak via the cost log.
|
|
15
|
+
|
|
16
|
+
SB-2/SB-3/SB-4 fix cluster (v3.4.22 Stage 8):
|
|
17
|
+
* All backend entry points (claude CLI, ollama, Anthropic/OpenAI API)
|
|
18
|
+
live HERE, not in ``skill_evolver``. ``SkillEvolver._llm_call``
|
|
19
|
+
delegates to ``_dispatch_llm`` so the validate → redact → log
|
|
20
|
+
invariants can never be bypassed.
|
|
21
|
+
* The claude CLI backend routes through
|
|
22
|
+
``core.security_primitives.run_subprocess_safe`` — no bare
|
|
23
|
+
``subprocess.run`` in evolution code (SB-4).
|
|
24
|
+
|
|
25
|
+
Author: Varun Pratap Bhardwaj / Qualixar
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
from __future__ import annotations
|
|
29
|
+
|
|
30
|
+
import atexit
|
|
31
|
+
import logging
|
|
32
|
+
import os
|
|
33
|
+
import sqlite3
|
|
34
|
+
import tempfile
|
|
35
|
+
import threading
|
|
36
|
+
from datetime import datetime, timezone
|
|
37
|
+
from pathlib import Path
|
|
38
|
+
from typing import Callable, Optional
|
|
39
|
+
|
|
40
|
+
from superlocalmemory.core.security_primitives import (
|
|
41
|
+
redact_secrets,
|
|
42
|
+
run_subprocess_safe,
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
logger = logging.getLogger(__name__)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
# M-P-09: one cached writer connection per learning_db path — LLM cost
|
|
49
|
+
# logging used to pay a fresh ``sqlite3.connect`` + fsync per call. At
|
|
50
|
+
# current volume (<10 calls/cycle) the cost is small, but caching keeps
|
|
51
|
+
# the dispatch code consistent with the rest of the "one-cached-writer"
|
|
52
|
+
# pattern the codebase standardised on (reward.py, trigram_index.py).
|
|
53
|
+
#
|
|
54
|
+
# S9-W2 C4 (fork safety): a cached SQLite handle inherited across
|
|
55
|
+
# ``os.fork()`` corrupts the DB because both processes think they hold an
|
|
56
|
+
# exclusive lock. We clear the cache in any forked child via
|
|
57
|
+
# ``os.register_at_fork`` AND keyed-by-pid within ``_get_cost_conn`` so
|
|
58
|
+
# a child that somehow missed the registrar still behaves correctly.
|
|
59
|
+
# On Windows / platforms without ``register_at_fork`` the fork path
|
|
60
|
+
# cannot happen so the pid-check is free insurance.
|
|
61
|
+
#
|
|
62
|
+
# S9-W2 C9 (serialization): ``_COST_CONN_LOCK`` is now ONLY held during
|
|
63
|
+
# the get/create-cache flip, NOT during the ``execute+commit`` inside
|
|
64
|
+
# ``_log_cost``. SQLite's own writer serialisation (BEGIN IMMEDIATE +
|
|
65
|
+
# busy_timeout) is the correct tool for write ordering; the Python lock
|
|
66
|
+
# was converting 10 parallel candidates × 3-8ms fsync into a single
|
|
67
|
+
# 30-80ms tail. Cache structure remains intact; the lock's scope shrinks.
|
|
68
|
+
_COST_CONN_CACHE: dict[str, sqlite3.Connection] = {}
|
|
69
|
+
_COST_CONN_LOCK = threading.Lock()
|
|
70
|
+
_COST_CONN_OWNER_PID: int | None = None
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def _resolve_cost_key(learning_db: Path) -> str:
|
|
74
|
+
"""Resolve a DB path to a stable cache key.
|
|
75
|
+
|
|
76
|
+
M-P-03 fix: ``~/.slm/learning.db`` and ``/home/u/.slm/learning.db``
|
|
77
|
+
previously cached to separate conns on the same inode, producing two
|
|
78
|
+
writers contending over WAL. ``os.path.realpath`` collapses them.
|
|
79
|
+
"""
|
|
80
|
+
try:
|
|
81
|
+
return os.path.realpath(str(learning_db))
|
|
82
|
+
except OSError: # pragma: no cover — defensive
|
|
83
|
+
return str(learning_db)
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def _reset_cost_cache_for_child() -> None:
|
|
87
|
+
"""Close any inherited handles in the fork child.
|
|
88
|
+
|
|
89
|
+
C4: ``os.register_at_fork(after_in_child=...)`` fires before any user
|
|
90
|
+
code runs in the child, so closing here is safe even if the parent
|
|
91
|
+
was mid-write (the child never participated in that transaction).
|
|
92
|
+
"""
|
|
93
|
+
global _COST_CONN_OWNER_PID
|
|
94
|
+
# Do NOT close parent-owned handles — let the parent keep using them.
|
|
95
|
+
# We only clear our cache reference so the child opens fresh ones.
|
|
96
|
+
_COST_CONN_CACHE.clear()
|
|
97
|
+
_COST_CONN_OWNER_PID = os.getpid()
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def _get_cost_conn(learning_db: Path) -> sqlite3.Connection:
|
|
101
|
+
"""Return a cached writer connection for ``learning_db``. Never raises."""
|
|
102
|
+
global _COST_CONN_OWNER_PID
|
|
103
|
+
key = _resolve_cost_key(learning_db)
|
|
104
|
+
with _COST_CONN_LOCK:
|
|
105
|
+
# Belt-and-suspenders: if we somehow missed the fork registrar
|
|
106
|
+
# (embedded interpreter, non-POSIX fork path), detect pid drift
|
|
107
|
+
# and reset before handing out a potentially-corrupt handle.
|
|
108
|
+
current_pid = os.getpid()
|
|
109
|
+
if _COST_CONN_OWNER_PID is not None and (
|
|
110
|
+
_COST_CONN_OWNER_PID != current_pid
|
|
111
|
+
):
|
|
112
|
+
_COST_CONN_CACHE.clear()
|
|
113
|
+
_COST_CONN_OWNER_PID = current_pid
|
|
114
|
+
conn = _COST_CONN_CACHE.get(key)
|
|
115
|
+
if conn is not None:
|
|
116
|
+
return conn
|
|
117
|
+
conn = sqlite3.connect(key, check_same_thread=False, timeout=2.0)
|
|
118
|
+
_COST_CONN_CACHE[key] = conn
|
|
119
|
+
return conn
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def _close_cost_conns() -> None:
|
|
123
|
+
"""Close every cached cost-log connection (atexit)."""
|
|
124
|
+
with _COST_CONN_LOCK:
|
|
125
|
+
conns = list(_COST_CONN_CACHE.items())
|
|
126
|
+
_COST_CONN_CACHE.clear()
|
|
127
|
+
for _key, conn in conns:
|
|
128
|
+
try:
|
|
129
|
+
conn.close()
|
|
130
|
+
except Exception: # pragma: no cover
|
|
131
|
+
pass
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
atexit.register(_close_cost_conns)
|
|
135
|
+
# C4: wipe inherited caches in any forked child. ``register_at_fork`` is
|
|
136
|
+
# POSIX-only; Windows simply doesn't fork so there is nothing to register.
|
|
137
|
+
if hasattr(os, "register_at_fork"):
|
|
138
|
+
os.register_at_fork(after_in_child=_reset_cost_cache_for_child)
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
# ---------------------------------------------------------------------------
|
|
142
|
+
# Allow-list and deny-list
|
|
143
|
+
# ---------------------------------------------------------------------------
|
|
144
|
+
#
|
|
145
|
+
# Allow-list explicitly names every model evolution may invoke. Deny-list
|
|
146
|
+
# catches substrings that must NEVER appear in an evolution-issued model
|
|
147
|
+
# id — notably the O-tier Claude family (MASTER-PLAN D2) and OpenAI's
|
|
148
|
+
# ``gpt-4-turbo`` (cost + behaviour regressions observed in prod).
|
|
149
|
+
#
|
|
150
|
+
# NOTE on the deny-list strings: the Stage-5b CI gate scans ``src/`` for
|
|
151
|
+
# the full banned model-family literal. That literal must NEVER appear in
|
|
152
|
+
# this file or any other source file. We check for the shorter substring
|
|
153
|
+
# ``opus`` instead; that catches every Claude O-family id variant without
|
|
154
|
+
# putting the banned literal anywhere in source.
|
|
155
|
+
|
|
156
|
+
ALLOWED_LLM_MODELS: frozenset[str] = frozenset({
|
|
157
|
+
"claude-haiku-4-5",
|
|
158
|
+
"claude-sonnet-4-6",
|
|
159
|
+
"ollama:llama3",
|
|
160
|
+
"ollama:qwen2.5",
|
|
161
|
+
})
|
|
162
|
+
|
|
163
|
+
FORBIDDEN_MODEL_SUBSTRINGS: tuple[str, ...] = ("opus", "gpt-4-turbo")
|
|
164
|
+
|
|
165
|
+
MAX_TOKENS_CAP: int = 500
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
# ---------------------------------------------------------------------------
|
|
169
|
+
# Backends (SB-2, SB-4) — moved out of skill_evolver.py
|
|
170
|
+
# ---------------------------------------------------------------------------
|
|
171
|
+
#
|
|
172
|
+
# Every backend has a uniform signature::
|
|
173
|
+
#
|
|
174
|
+
# backend(prompt: str, *, model: str, max_tokens: int) -> str
|
|
175
|
+
#
|
|
176
|
+
# They receive the ALREADY-REDACTED prompt from ``_dispatch_llm``. They
|
|
177
|
+
# must never log the prompt. They return an empty string on any
|
|
178
|
+
# transport failure (fail-closed: caller treats "" as "no evolution").
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
def _call_claude_cli_backend(
|
|
182
|
+
prompt: str, *, model: str, max_tokens: int,
|
|
183
|
+
) -> str:
|
|
184
|
+
"""Spawn ``claude --model <model>`` via ``run_subprocess_safe``.
|
|
185
|
+
|
|
186
|
+
SB-4: bare ``subprocess.run`` is banned in evolution code — every
|
|
187
|
+
shell-out goes through ``run_subprocess_safe`` which strips the
|
|
188
|
+
inherited env down to a vetted allow-list.
|
|
189
|
+
"""
|
|
190
|
+
# Translate the allow-listed model id to the CLI short name.
|
|
191
|
+
cli_model = "haiku"
|
|
192
|
+
if "sonnet" in model:
|
|
193
|
+
cli_model = "sonnet"
|
|
194
|
+
|
|
195
|
+
with tempfile.NamedTemporaryFile(
|
|
196
|
+
mode="w", suffix=".txt", delete=False,
|
|
197
|
+
) as f:
|
|
198
|
+
f.write(prompt)
|
|
199
|
+
prompt_file = f.name
|
|
200
|
+
|
|
201
|
+
try:
|
|
202
|
+
result = run_subprocess_safe(
|
|
203
|
+
["claude", "--model", cli_model, "--print", "--no-input",
|
|
204
|
+
"--max-tokens", str(max_tokens),
|
|
205
|
+
"--prompt-file", prompt_file],
|
|
206
|
+
timeout=120.0,
|
|
207
|
+
env={
|
|
208
|
+
"CLAUDE_CODE_ENTRYPOINT": "cli",
|
|
209
|
+
"ECC_SKIP_OBSERVE": "1",
|
|
210
|
+
},
|
|
211
|
+
)
|
|
212
|
+
stdout = getattr(result, "stdout", "") or ""
|
|
213
|
+
rc = getattr(result, "returncode", 1)
|
|
214
|
+
return stdout.strip() if rc == 0 else ""
|
|
215
|
+
except Exception as exc: # noqa: BLE001 — fail-closed, never crash caller
|
|
216
|
+
logger.debug("claude CLI backend failed: %s", exc)
|
|
217
|
+
return ""
|
|
218
|
+
finally:
|
|
219
|
+
try:
|
|
220
|
+
os.unlink(prompt_file)
|
|
221
|
+
except OSError:
|
|
222
|
+
pass
|
|
223
|
+
|
|
224
|
+
|
|
225
|
+
def _call_ollama_backend(
|
|
226
|
+
prompt: str, *, model: str, max_tokens: int,
|
|
227
|
+
) -> str:
|
|
228
|
+
"""Call local Ollama HTTP API for LLM completion.
|
|
229
|
+
|
|
230
|
+
``model`` is expected to be an allow-listed id of the form
|
|
231
|
+
``"ollama:<model-name>"`` — the prefix is stripped before dispatch.
|
|
232
|
+
"""
|
|
233
|
+
import json as _json
|
|
234
|
+
import urllib.request
|
|
235
|
+
|
|
236
|
+
ollama_model = model.split(":", 1)[1] if model.startswith("ollama:") else model
|
|
237
|
+
payload = _json.dumps({
|
|
238
|
+
"model": ollama_model,
|
|
239
|
+
"prompt": prompt,
|
|
240
|
+
"stream": False,
|
|
241
|
+
"options": {"num_predict": max_tokens},
|
|
242
|
+
}).encode()
|
|
243
|
+
|
|
244
|
+
try:
|
|
245
|
+
req = urllib.request.Request(
|
|
246
|
+
"http://127.0.0.1:11434/api/generate",
|
|
247
|
+
data=payload,
|
|
248
|
+
headers={"Content-Type": "application/json"},
|
|
249
|
+
method="POST",
|
|
250
|
+
)
|
|
251
|
+
with urllib.request.urlopen(req, timeout=120) as resp: # noqa: S310
|
|
252
|
+
data = _json.loads(resp.read())
|
|
253
|
+
return data.get("response", "") or ""
|
|
254
|
+
except Exception as exc: # noqa: BLE001
|
|
255
|
+
logger.debug("Ollama backend failed: %s", exc)
|
|
256
|
+
return ""
|
|
257
|
+
|
|
258
|
+
|
|
259
|
+
def _call_claude_api_backend(
|
|
260
|
+
prompt: str, *, model: str, max_tokens: int,
|
|
261
|
+
) -> str:
|
|
262
|
+
"""Call the Anthropic Messages API directly.
|
|
263
|
+
|
|
264
|
+
The API model id is the allow-listed name itself — no client-side
|
|
265
|
+
mapping table, so adding a new allow-listed model is a one-line
|
|
266
|
+
edit to :data:`ALLOWED_LLM_MODELS`.
|
|
267
|
+
"""
|
|
268
|
+
try:
|
|
269
|
+
import anthropic # type: ignore[import-not-found]
|
|
270
|
+
except Exception as exc: # noqa: BLE001
|
|
271
|
+
logger.debug("anthropic sdk unavailable: %s", exc)
|
|
272
|
+
return ""
|
|
273
|
+
|
|
274
|
+
try:
|
|
275
|
+
client = anthropic.Anthropic()
|
|
276
|
+
msg = client.messages.create(
|
|
277
|
+
model=model,
|
|
278
|
+
max_tokens=max_tokens,
|
|
279
|
+
messages=[{"role": "user", "content": prompt}],
|
|
280
|
+
)
|
|
281
|
+
content = getattr(msg, "content", None)
|
|
282
|
+
if content and len(content) > 0:
|
|
283
|
+
first = content[0]
|
|
284
|
+
text = getattr(first, "text", None)
|
|
285
|
+
if isinstance(text, str):
|
|
286
|
+
return text
|
|
287
|
+
return ""
|
|
288
|
+
except Exception as exc: # noqa: BLE001
|
|
289
|
+
logger.debug("Anthropic API backend failed: %s", exc)
|
|
290
|
+
return ""
|
|
291
|
+
|
|
292
|
+
|
|
293
|
+
# ---------------------------------------------------------------------------
|
|
294
|
+
# Backend registry — dispatches by (allow-listed) model id
|
|
295
|
+
# ---------------------------------------------------------------------------
|
|
296
|
+
|
|
297
|
+
|
|
298
|
+
def _fail_closed_backend(
|
|
299
|
+
prompt: str, *, model: str, max_tokens: int,
|
|
300
|
+
) -> str:
|
|
301
|
+
"""S9-SKEP-14: explicit fail-closed backend for unroutable models.
|
|
302
|
+
|
|
303
|
+
Returns "" (the fail-closed sentinel every dispatch treats as
|
|
304
|
+
"no evolution happened") and logs a warning. Previously the
|
|
305
|
+
fallthrough silently routed any unknown id to the paid Anthropic
|
|
306
|
+
API — a misconfigured entry in ``ALLOWED_LLM_MODELS`` would burn
|
|
307
|
+
user money without anyone noticing.
|
|
308
|
+
"""
|
|
309
|
+
logger.warning(
|
|
310
|
+
"llm_dispatch: no backend registered for model=%r — "
|
|
311
|
+
"fail-closed (returning empty string)", model,
|
|
312
|
+
)
|
|
313
|
+
return ""
|
|
314
|
+
|
|
315
|
+
|
|
316
|
+
def _pick_backend(model: str) -> Callable[..., str]:
|
|
317
|
+
"""Resolve an allow-listed model id to its backend callable.
|
|
318
|
+
|
|
319
|
+
Contract: ``model`` is already validated against ``ALLOWED_LLM_MODELS``
|
|
320
|
+
by the caller (``_dispatch_llm`` runs ``_validate_model`` first).
|
|
321
|
+
|
|
322
|
+
S9-SKEP-14: routing is prefix-exact — we no longer default unknown
|
|
323
|
+
models to the Claude API path. An allow-listed model without a
|
|
324
|
+
backend entry hits ``_fail_closed_backend`` and returns ""
|
|
325
|
+
instead of silently spending money on the wrong vendor.
|
|
326
|
+
"""
|
|
327
|
+
if model.startswith("ollama:"):
|
|
328
|
+
return _call_ollama_backend
|
|
329
|
+
if model.startswith("claude-"):
|
|
330
|
+
# Claude CLI path is an alternative — selected when an explicit
|
|
331
|
+
# env flag is set. Default path is the Anthropic API backend.
|
|
332
|
+
if os.environ.get("SLM_EVOLUTION_BACKEND") == "claude-cli":
|
|
333
|
+
return _call_claude_cli_backend
|
|
334
|
+
return _call_claude_api_backend
|
|
335
|
+
return _fail_closed_backend
|
|
336
|
+
|
|
337
|
+
|
|
338
|
+
def _actual_llm_call(prompt: str, *, model: str, max_tokens: int) -> str:
|
|
339
|
+
"""Dispatch the redacted prompt to the backend registered for ``model``.
|
|
340
|
+
|
|
341
|
+
Kept as a stable module-level function so tests can ``monkeypatch``
|
|
342
|
+
it with a deterministic stub (see ``record_backend`` fixture in
|
|
343
|
+
``test_llm_dispatch.py``). Production callers never invoke this
|
|
344
|
+
directly — they go through :func:`_dispatch_llm`.
|
|
345
|
+
"""
|
|
346
|
+
backend = _pick_backend(model)
|
|
347
|
+
return backend(prompt, model=model, max_tokens=max_tokens)
|
|
348
|
+
|
|
349
|
+
|
|
350
|
+
# ---------------------------------------------------------------------------
|
|
351
|
+
# Gate
|
|
352
|
+
# ---------------------------------------------------------------------------
|
|
353
|
+
|
|
354
|
+
|
|
355
|
+
def _validate_model(model: str) -> None:
|
|
356
|
+
"""Raise ``ValueError`` if the model is forbidden or not allow-listed."""
|
|
357
|
+
if not isinstance(model, str) or not model:
|
|
358
|
+
raise ValueError(f"model must be a non-empty str, got {model!r}")
|
|
359
|
+
lowered = model.lower()
|
|
360
|
+
for forbidden in FORBIDDEN_MODEL_SUBSTRINGS:
|
|
361
|
+
if forbidden in lowered:
|
|
362
|
+
raise ValueError(
|
|
363
|
+
f"forbidden model: {model!r} (contains {forbidden!r})"
|
|
364
|
+
)
|
|
365
|
+
if model not in ALLOWED_LLM_MODELS:
|
|
366
|
+
raise ValueError(
|
|
367
|
+
f"model not in ALLOWED_LLM_MODELS: {model!r} "
|
|
368
|
+
f"(allowed: {sorted(ALLOWED_LLM_MODELS)})"
|
|
369
|
+
)
|
|
370
|
+
|
|
371
|
+
|
|
372
|
+
def _log_cost(
|
|
373
|
+
*,
|
|
374
|
+
learning_db: Path,
|
|
375
|
+
profile_id: str,
|
|
376
|
+
model: str,
|
|
377
|
+
tokens_in: int,
|
|
378
|
+
tokens_out: int,
|
|
379
|
+
cost_usd: float = 0.0,
|
|
380
|
+
cycle_id: str | None = None,
|
|
381
|
+
) -> None:
|
|
382
|
+
"""Append a redacted cost-log row. Never stores prompt/response text.
|
|
383
|
+
|
|
384
|
+
H-16 (Stage 8): ``profile_id`` must be a non-empty string. The schema
|
|
385
|
+
has ``NOT NULL`` on the column but SQLite accepts empty strings — that
|
|
386
|
+
would break the dashboard's per-profile cost widget by silently
|
|
387
|
+
aggregating unattributed spend. We raise here instead so the caller
|
|
388
|
+
fixes the upstream bug rather than learning about it weeks later from
|
|
389
|
+
a mis-reported invoice.
|
|
390
|
+
"""
|
|
391
|
+
if not isinstance(profile_id, str) or not profile_id.strip():
|
|
392
|
+
raise ValueError(
|
|
393
|
+
"evolution_llm_cost_log.profile_id must be a non-empty string "
|
|
394
|
+
f"(got {profile_id!r})"
|
|
395
|
+
)
|
|
396
|
+
now = datetime.now(timezone.utc).isoformat(timespec="seconds")
|
|
397
|
+
try:
|
|
398
|
+
# S9-W2 C9: the cached conn is ``check_same_thread=False`` and
|
|
399
|
+
# SQLite's own writer serialisation (BEGIN IMMEDIATE + 2 s
|
|
400
|
+
# ``busy_timeout`` in the connect() call) is the right tool for
|
|
401
|
+
# write ordering. Previously we held _COST_CONN_LOCK across the
|
|
402
|
+
# execute+commit fsync, converting 10 parallel candidates' worth
|
|
403
|
+
# of 3-8 ms commits into a single 30-80 ms tail. Release the
|
|
404
|
+
# Python lock BEFORE the SQL round-trip.
|
|
405
|
+
conn = _get_cost_conn(Path(learning_db))
|
|
406
|
+
conn.execute(
|
|
407
|
+
"INSERT INTO evolution_llm_cost_log "
|
|
408
|
+
"(profile_id, ts, model, tokens_in, tokens_out, cost_usd, cycle_id) "
|
|
409
|
+
"VALUES (?,?,?,?,?,?,?)",
|
|
410
|
+
(profile_id, now, model, tokens_in, tokens_out, cost_usd, cycle_id),
|
|
411
|
+
)
|
|
412
|
+
conn.commit()
|
|
413
|
+
except sqlite3.Error as e:
|
|
414
|
+
logger.warning("cost log write failed: %s", e)
|
|
415
|
+
|
|
416
|
+
|
|
417
|
+
def _dispatch_llm(
|
|
418
|
+
prompt: str,
|
|
419
|
+
*,
|
|
420
|
+
model: str,
|
|
421
|
+
learning_db: Path | str,
|
|
422
|
+
profile_id: str,
|
|
423
|
+
max_tokens: int = MAX_TOKENS_CAP,
|
|
424
|
+
cycle_id: str | None = None,
|
|
425
|
+
) -> str:
|
|
426
|
+
"""Central choke-point for every evolution LLM call.
|
|
427
|
+
|
|
428
|
+
Validates model against allow/deny lists, caps ``max_tokens``, runs the
|
|
429
|
+
prompt through ``redact_secrets(aggression='high')``, dispatches, and
|
|
430
|
+
logs a redacted cost row. Raises ``ValueError`` on any contract breach.
|
|
431
|
+
"""
|
|
432
|
+
_validate_model(model)
|
|
433
|
+
|
|
434
|
+
if not isinstance(max_tokens, int) or max_tokens <= 0:
|
|
435
|
+
raise ValueError(
|
|
436
|
+
f"max_tokens must be a positive int, got {max_tokens!r}"
|
|
437
|
+
)
|
|
438
|
+
if max_tokens > MAX_TOKENS_CAP:
|
|
439
|
+
raise ValueError(
|
|
440
|
+
f"max_tokens {max_tokens} > {MAX_TOKENS_CAP} cap (LLD-11)"
|
|
441
|
+
)
|
|
442
|
+
# S9-W2 H-SKEP-05: validate profile_id BEFORE paying for the LLM
|
|
443
|
+
# call. Previously the check lived in _log_cost, AFTER the paid
|
|
444
|
+
# Anthropic round-trip — a misconfigured profile with empty id would
|
|
445
|
+
# spend the money, raise ValueError in _log_cost, and return "" from
|
|
446
|
+
# _llm_call's except. Net: cost incurred, no cost-log row, caller
|
|
447
|
+
# may retry and burn more. Validate up-front, fail-closed, zero cost.
|
|
448
|
+
if not isinstance(profile_id, str) or not profile_id.strip():
|
|
449
|
+
raise ValueError(
|
|
450
|
+
"profile_id must be a non-empty string "
|
|
451
|
+
f"(got {profile_id!r})"
|
|
452
|
+
)
|
|
453
|
+
|
|
454
|
+
# LLD-00 §5 — redact BEFORE dispatch. Never log the raw prompt.
|
|
455
|
+
safe_prompt = redact_secrets(prompt, aggression="high")
|
|
456
|
+
|
|
457
|
+
# S9-defer H-P-10: per-cycle retry-cost DoS guard. If the caller
|
|
458
|
+
# (or an orchestrator layer) keeps retrying a failing dispatch on
|
|
459
|
+
# the same ``cycle_id``, cost escalates without bound — a crafted
|
|
460
|
+
# adversarial scenario could make evolution burn through the
|
|
461
|
+
# daily USD cap in minutes. Count prior calls for this cycle_id
|
|
462
|
+
# in ``evolution_llm_cost_log`` and refuse once the retry cap is
|
|
463
|
+
# hit. The EvolutionBudget object already caps overall LLM calls
|
|
464
|
+
# per cycle to 10; this is the per-cycle-ID guard for retries on
|
|
465
|
+
# the SAME logical step (distinct from 10 different LLM calls for
|
|
466
|
+
# 10 different steps).
|
|
467
|
+
_RETRY_CAP_PER_CYCLE = int(
|
|
468
|
+
os.environ.get("SLM_EVOLUTION_RETRY_CAP", "5")
|
|
469
|
+
)
|
|
470
|
+
if cycle_id:
|
|
471
|
+
try:
|
|
472
|
+
_conn = _get_cost_conn(Path(learning_db))
|
|
473
|
+
row = _conn.execute(
|
|
474
|
+
"SELECT COUNT(*) FROM evolution_llm_cost_log "
|
|
475
|
+
"WHERE profile_id = ? AND cycle_id = ?",
|
|
476
|
+
(profile_id, cycle_id),
|
|
477
|
+
).fetchone()
|
|
478
|
+
prior = int(row[0]) if row and row[0] is not None else 0
|
|
479
|
+
if prior >= _RETRY_CAP_PER_CYCLE:
|
|
480
|
+
logger.warning(
|
|
481
|
+
"evolution retry cap hit: profile=%s cycle_id=%s "
|
|
482
|
+
"prior=%d cap=%d — refusing dispatch",
|
|
483
|
+
profile_id, cycle_id, prior, _RETRY_CAP_PER_CYCLE,
|
|
484
|
+
)
|
|
485
|
+
raise RuntimeError(
|
|
486
|
+
f"evolution retry cap exceeded for cycle {cycle_id}"
|
|
487
|
+
)
|
|
488
|
+
except sqlite3.Error:
|
|
489
|
+
# Cost log unavailable — fail-open on this guard (the
|
|
490
|
+
# outer EvolutionBudget still enforces the 10-call cap).
|
|
491
|
+
pass
|
|
492
|
+
|
|
493
|
+
response = _actual_llm_call(
|
|
494
|
+
safe_prompt, model=model, max_tokens=max_tokens,
|
|
495
|
+
)
|
|
496
|
+
|
|
497
|
+
# Cost-log row: lengths only, no text content. This guarantees the
|
|
498
|
+
# redaction canary (e.g. a ``ghp_...`` GitHub PAT) cannot end up in
|
|
499
|
+
# the audit log — we never persist the redacted prompt either.
|
|
500
|
+
_log_cost(
|
|
501
|
+
learning_db=Path(learning_db),
|
|
502
|
+
profile_id=profile_id,
|
|
503
|
+
model=model,
|
|
504
|
+
tokens_in=len(safe_prompt),
|
|
505
|
+
tokens_out=len(response) if isinstance(response, str) else 0,
|
|
506
|
+
cycle_id=cycle_id,
|
|
507
|
+
)
|
|
508
|
+
return response
|