superlocalmemory 3.4.22 → 3.4.24
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +29 -0
- package/package.json +1 -1
- package/pyproject.toml +1 -1
- package/skills/slm-build-graph/SKILL.md +1 -1
- package/skills/slm-list-recent/SKILL.md +1 -1
- package/skills/slm-recall/SKILL.md +1 -1
- package/skills/slm-remember/SKILL.md +1 -1
- package/skills/slm-status/SKILL.md +1 -1
- package/skills/slm-switch-profile/SKILL.md +1 -1
- package/src/superlocalmemory/__init__.py +3 -0
- package/src/superlocalmemory/core/config.py +66 -18
- package/src/superlocalmemory/core/context_cache.py +1 -1
- package/src/superlocalmemory/core/embedding_worker.py +8 -27
- package/src/superlocalmemory/core/embeddings.py +83 -1
- package/src/superlocalmemory/core/engine_wiring.py +8 -0
- package/src/superlocalmemory/core/platform_utils.py +127 -0
- package/src/superlocalmemory/core/recall_worker.py +8 -24
- package/src/superlocalmemory/core/reranker_worker.py +8 -24
- package/src/superlocalmemory/core/worker_pool.py +2 -1
- package/src/superlocalmemory/hooks/context_payload.py +1 -1
- package/src/superlocalmemory/learning/database.py +1 -1
- package/src/superlocalmemory/retrieval/reranker.py +2 -1
- package/src/superlocalmemory/server/routes/brain.py +1 -1
- package/src/superlocalmemory/server/routes/v3_api.py +150 -8
- package/src/superlocalmemory/server/security_middleware.py +20 -2
- package/src/superlocalmemory/server/unified_daemon.py +107 -5
- package/src/superlocalmemory/ui/index.html +50 -1
- package/src/superlocalmemory/ui/js/auto-settings.js +131 -5
- package/src/superlocalmemory/ui/js/core.js +96 -1
|
@@ -20,7 +20,6 @@ import json
|
|
|
20
20
|
import os
|
|
21
21
|
import signal
|
|
22
22
|
import sys
|
|
23
|
-
import threading
|
|
24
23
|
|
|
25
24
|
# Force CPU BEFORE any torch import
|
|
26
25
|
os.environ["CUDA_VISIBLE_DEVICES"] = ""
|
|
@@ -39,24 +38,10 @@ if sys.platform != "win32":
|
|
|
39
38
|
def _start_parent_watchdog() -> None:
|
|
40
39
|
"""Monitor parent process — self-terminate if parent dies.
|
|
41
40
|
|
|
42
|
-
|
|
43
|
-
process crashes, is killed, or exits without cleanup.
|
|
44
|
-
|
|
45
|
-
V3.3.7: Added after incident where orphaned workers consumed 33 GB.
|
|
41
|
+
V3.4.24: Delegates to platform_utils.start_parent_watchdog().
|
|
46
42
|
"""
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
def _watch() -> None:
|
|
50
|
-
import time
|
|
51
|
-
while True:
|
|
52
|
-
time.sleep(5)
|
|
53
|
-
try:
|
|
54
|
-
os.kill(parent_pid, 0)
|
|
55
|
-
except OSError:
|
|
56
|
-
os._exit(0)
|
|
57
|
-
|
|
58
|
-
t = threading.Thread(target=_watch, daemon=True, name="parent-watchdog")
|
|
59
|
-
t.start()
|
|
43
|
+
from superlocalmemory.core.platform_utils import start_parent_watchdog
|
|
44
|
+
start_parent_watchdog()
|
|
60
45
|
|
|
61
46
|
_engine = None
|
|
62
47
|
|
|
@@ -253,7 +238,8 @@ def _handle_status() -> dict:
|
|
|
253
238
|
|
|
254
239
|
def _worker_main() -> None:
|
|
255
240
|
"""Main loop: read JSON requests from stdin, write responses to stdout."""
|
|
256
|
-
_start_parent_watchdog()
|
|
241
|
+
_start_parent_watchdog()
|
|
242
|
+
from superlocalmemory.core.platform_utils import get_rss_mb
|
|
257
243
|
|
|
258
244
|
for line in sys.stdin:
|
|
259
245
|
line = line.strip()
|
|
@@ -326,11 +312,9 @@ def _worker_main() -> None:
|
|
|
326
312
|
except Exception as exc:
|
|
327
313
|
_respond({"ok": False, "error": str(exc)})
|
|
328
314
|
|
|
329
|
-
# V3.3.16: RSS watchdog —
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
rss_mb = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024 / 1024
|
|
333
|
-
if rss_mb > 2500:
|
|
315
|
+
# V3.3.16: RSS watchdog — V3.4.24: cross-platform via platform_utils.
|
|
316
|
+
rss_mb = get_rss_mb()
|
|
317
|
+
if rss_mb > 0 and rss_mb > 2500:
|
|
334
318
|
sys.exit(0)
|
|
335
319
|
|
|
336
320
|
|
|
@@ -31,7 +31,6 @@ import platform
|
|
|
31
31
|
import signal
|
|
32
32
|
import struct
|
|
33
33
|
import sys
|
|
34
|
-
import threading
|
|
35
34
|
|
|
36
35
|
# Force CPU BEFORE any torch import
|
|
37
36
|
os.environ["CUDA_VISIBLE_DEVICES"] = ""
|
|
@@ -52,25 +51,10 @@ if sys.platform != "win32":
|
|
|
52
51
|
def _start_parent_watchdog() -> None:
|
|
53
52
|
"""Monitor parent process — self-terminate if parent dies.
|
|
54
53
|
|
|
55
|
-
|
|
56
|
-
process crashes, is killed, or exits without cleanup.
|
|
57
|
-
|
|
58
|
-
V3.3.7: Added after incident where ~30 orphaned workers consumed 33 GB.
|
|
54
|
+
V3.4.24: Delegates to platform_utils.start_parent_watchdog().
|
|
59
55
|
"""
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
def _watch() -> None:
|
|
63
|
-
import time
|
|
64
|
-
while True:
|
|
65
|
-
time.sleep(5)
|
|
66
|
-
try:
|
|
67
|
-
os.kill(parent_pid, 0) # Check if parent is alive (signal 0)
|
|
68
|
-
except OSError:
|
|
69
|
-
# Parent is dead — self-terminate
|
|
70
|
-
os._exit(0)
|
|
71
|
-
|
|
72
|
-
t = threading.Thread(target=_watch, daemon=True, name="parent-watchdog")
|
|
73
|
-
t.start()
|
|
56
|
+
from superlocalmemory.core.platform_utils import start_parent_watchdog
|
|
57
|
+
start_parent_watchdog()
|
|
74
58
|
|
|
75
59
|
|
|
76
60
|
def _detect_onnx_variant(model_name: str = "") -> str:
|
|
@@ -101,7 +85,8 @@ def _detect_onnx_variant(model_name: str = "") -> str:
|
|
|
101
85
|
|
|
102
86
|
def _worker_main() -> None:
|
|
103
87
|
"""Main loop: read JSON requests from stdin, write responses to stdout."""
|
|
104
|
-
_start_parent_watchdog()
|
|
88
|
+
_start_parent_watchdog()
|
|
89
|
+
from superlocalmemory.core.platform_utils import get_rss_mb
|
|
105
90
|
|
|
106
91
|
model = None
|
|
107
92
|
active_backend = ""
|
|
@@ -194,10 +179,9 @@ def _worker_main() -> None:
|
|
|
194
179
|
except Exception as exc:
|
|
195
180
|
_respond({"ok": False, "error": str(exc)})
|
|
196
181
|
|
|
197
|
-
# V3.3.16: RSS watchdog —
|
|
198
|
-
|
|
199
|
-
rss_mb
|
|
200
|
-
if rss_mb > 2500:
|
|
182
|
+
# V3.3.16: RSS watchdog — V3.4.24: cross-platform via platform_utils.
|
|
183
|
+
rss_mb = get_rss_mb()
|
|
184
|
+
if rss_mb > 0 and rss_mb > 2500:
|
|
201
185
|
sys.exit(0)
|
|
202
186
|
|
|
203
187
|
continue
|
|
@@ -247,6 +247,7 @@ class WorkerPool:
|
|
|
247
247
|
"TOKENIZERS_PARALLELISM": "false",
|
|
248
248
|
"TORCH_DEVICE": "cpu",
|
|
249
249
|
}
|
|
250
|
+
from superlocalmemory.core.platform_utils import popen_platform_kwargs
|
|
250
251
|
self._proc = subprocess.Popen(
|
|
251
252
|
[sys.executable, "-m", "superlocalmemory.core.recall_worker"],
|
|
252
253
|
stdin=subprocess.PIPE,
|
|
@@ -255,7 +256,7 @@ class WorkerPool:
|
|
|
255
256
|
text=True,
|
|
256
257
|
bufsize=1,
|
|
257
258
|
env=env,
|
|
258
|
-
|
|
259
|
+
**popen_platform_kwargs(),
|
|
259
260
|
)
|
|
260
261
|
logger.info("Recall worker spawned (PID %d)", self._proc.pid)
|
|
261
262
|
except Exception as exc:
|
|
@@ -193,6 +193,7 @@ class CrossEncoderReranker:
|
|
|
193
193
|
"TOKENIZERS_PARALLELISM": "false",
|
|
194
194
|
"TORCH_DEVICE": "cpu",
|
|
195
195
|
}
|
|
196
|
+
from superlocalmemory.core.platform_utils import popen_platform_kwargs
|
|
196
197
|
self._worker_proc = subprocess.Popen(
|
|
197
198
|
[sys.executable, "-m", worker_module],
|
|
198
199
|
stdin=subprocess.PIPE,
|
|
@@ -201,7 +202,7 @@ class CrossEncoderReranker:
|
|
|
201
202
|
text=True,
|
|
202
203
|
bufsize=1,
|
|
203
204
|
env=env,
|
|
204
|
-
|
|
205
|
+
**popen_platform_kwargs(),
|
|
205
206
|
)
|
|
206
207
|
# v3.4.13: Register PID for machine-wide singleton
|
|
207
208
|
_RERANKER_PID_FILE.parent.mkdir(parents=True, exist_ok=True)
|
|
@@ -64,7 +64,7 @@ router = APIRouter(prefix="/api/v3", tags=["brain"])
|
|
|
64
64
|
# LLD-03 v2 stratum space = 4 query types × 3 entity bins × 4 time buckets.
|
|
65
65
|
_STRATA_TOTAL: int = 48
|
|
66
66
|
|
|
67
|
-
_VERSION: str = "3.4.
|
|
67
|
+
_VERSION: str = "3.4.23"
|
|
68
68
|
|
|
69
69
|
# Banned metric names (LLD-04 U4). Kept as a tuple for grep visibility;
|
|
70
70
|
# the source-level test asserts we don't accidentally reintroduce them.
|
|
@@ -129,6 +129,11 @@ async def set_mode(request: Request):
|
|
|
129
129
|
llm_model=old_config.llm.model,
|
|
130
130
|
llm_api_key=old_config.llm.api_key,
|
|
131
131
|
llm_api_base=old_config.llm.api_base,
|
|
132
|
+
embedding_provider=old_config.embedding.provider,
|
|
133
|
+
embedding_endpoint=old_config.embedding.api_endpoint,
|
|
134
|
+
embedding_key=old_config.embedding.api_key,
|
|
135
|
+
embedding_model_name=old_config.embedding.model_name,
|
|
136
|
+
embedding_dimension=old_config.embedding.dimension,
|
|
132
137
|
)
|
|
133
138
|
new_config.active_profile = old_config.active_profile
|
|
134
139
|
new_config.save()
|
|
@@ -165,7 +170,10 @@ async def set_mode(request: Request):
|
|
|
165
170
|
|
|
166
171
|
@router.post("/mode/set")
|
|
167
172
|
async def set_full_config(request: Request):
|
|
168
|
-
"""Save mode + provider + model + API key together.
|
|
173
|
+
"""Save mode + provider + model + API key together.
|
|
174
|
+
|
|
175
|
+
V3.4.24: Also accepts embedding_* fields for custom embedding endpoints.
|
|
176
|
+
"""
|
|
169
177
|
try:
|
|
170
178
|
body = await request.json()
|
|
171
179
|
new_mode = body.get("mode", "a").lower()
|
|
@@ -187,6 +195,11 @@ async def set_full_config(request: Request):
|
|
|
187
195
|
llm_model=model,
|
|
188
196
|
llm_api_key=api_key,
|
|
189
197
|
llm_api_base="http://localhost:11434" if provider == "ollama" else "",
|
|
198
|
+
embedding_provider=body.get("embedding_provider", ""),
|
|
199
|
+
embedding_endpoint=body.get("embedding_endpoint", ""),
|
|
200
|
+
embedding_key=body.get("embedding_key", ""),
|
|
201
|
+
embedding_model_name=body.get("embedding_model", ""),
|
|
202
|
+
embedding_dimension=int(body.get("embedding_dimension", 0) or 0),
|
|
190
203
|
)
|
|
191
204
|
config.active_profile = old.active_profile
|
|
192
205
|
config.save()
|
|
@@ -213,11 +226,145 @@ async def set_full_config(request: Request):
|
|
|
213
226
|
"mode": new_mode,
|
|
214
227
|
"provider": provider,
|
|
215
228
|
"model": model,
|
|
229
|
+
"embedding_provider": config.embedding.provider,
|
|
230
|
+
"embedding_model": config.embedding.model_name,
|
|
231
|
+
"embedding_dimension": config.embedding.dimension,
|
|
232
|
+
}
|
|
233
|
+
except Exception as e:
|
|
234
|
+
return JSONResponse({"error": str(e)}, status_code=500)
|
|
235
|
+
|
|
236
|
+
|
|
237
|
+
# ── V3.4.24: Embedding Configuration ────────────────────────────────
|
|
238
|
+
|
|
239
|
+
@router.get("/embedding/config")
|
|
240
|
+
async def get_embedding_config(request: Request):
|
|
241
|
+
"""Return current embedding configuration."""
|
|
242
|
+
try:
|
|
243
|
+
from superlocalmemory.core.config import SLMConfig
|
|
244
|
+
config = SLMConfig.load()
|
|
245
|
+
emb = config.embedding
|
|
246
|
+
return {
|
|
247
|
+
"provider": emb.provider,
|
|
248
|
+
"model_name": emb.model_name,
|
|
249
|
+
"dimension": emb.dimension,
|
|
250
|
+
"api_endpoint": emb.api_endpoint,
|
|
251
|
+
"has_key": bool(emb.api_key),
|
|
252
|
+
"is_openai_compatible": emb.is_openai_compatible,
|
|
253
|
+
"mode": config.mode.value,
|
|
254
|
+
}
|
|
255
|
+
except Exception as e:
|
|
256
|
+
return JSONResponse({"error": str(e)}, status_code=500)
|
|
257
|
+
|
|
258
|
+
|
|
259
|
+
@router.put("/embedding/config")
|
|
260
|
+
async def set_embedding_config(request: Request):
|
|
261
|
+
"""Update embedding configuration independently of mode switch."""
|
|
262
|
+
try:
|
|
263
|
+
body = await request.json()
|
|
264
|
+
from superlocalmemory.core.config import SLMConfig, EmbeddingConfig
|
|
265
|
+
config = SLMConfig.load()
|
|
266
|
+
|
|
267
|
+
new_provider = body.get("provider", config.embedding.provider)
|
|
268
|
+
new_model = body.get("model_name", config.embedding.model_name)
|
|
269
|
+
new_dim = int(body.get("dimension", config.embedding.dimension) or 768)
|
|
270
|
+
if not (64 <= new_dim <= 8192):
|
|
271
|
+
return JSONResponse({"error": f"Dimension must be 64-8192, got {new_dim}"}, status_code=400)
|
|
272
|
+
new_endpoint = body.get("api_endpoint", config.embedding.api_endpoint)
|
|
273
|
+
new_key = body.get("api_key", config.embedding.api_key)
|
|
274
|
+
|
|
275
|
+
old_emb = config.embedding
|
|
276
|
+
config.embedding = EmbeddingConfig(
|
|
277
|
+
model_name=new_model,
|
|
278
|
+
dimension=new_dim,
|
|
279
|
+
provider=new_provider,
|
|
280
|
+
api_endpoint=new_endpoint,
|
|
281
|
+
api_key=new_key,
|
|
282
|
+
ollama_model=old_emb.ollama_model,
|
|
283
|
+
ollama_base_url=old_emb.ollama_base_url,
|
|
284
|
+
api_version=old_emb.api_version,
|
|
285
|
+
deployment_name=old_emb.deployment_name,
|
|
286
|
+
)
|
|
287
|
+
config.save()
|
|
288
|
+
|
|
289
|
+
needs_reindex = (
|
|
290
|
+
old_emb.provider != new_provider
|
|
291
|
+
or old_emb.model_name != new_model
|
|
292
|
+
or old_emb.dimension != new_dim
|
|
293
|
+
)
|
|
294
|
+
|
|
295
|
+
# Kill workers so next request uses new config
|
|
296
|
+
try:
|
|
297
|
+
from superlocalmemory.core.worker_pool import WorkerPool
|
|
298
|
+
WorkerPool.shared().shutdown()
|
|
299
|
+
except Exception:
|
|
300
|
+
pass
|
|
301
|
+
if hasattr(request.app.state, "engine"):
|
|
302
|
+
request.app.state.engine = None
|
|
303
|
+
|
|
304
|
+
return {
|
|
305
|
+
"success": True,
|
|
306
|
+
"provider": new_provider,
|
|
307
|
+
"model_name": new_model,
|
|
308
|
+
"dimension": new_dim,
|
|
309
|
+
"needs_reindex": needs_reindex,
|
|
216
310
|
}
|
|
217
311
|
except Exception as e:
|
|
218
312
|
return JSONResponse({"error": str(e)}, status_code=500)
|
|
219
313
|
|
|
220
314
|
|
|
315
|
+
@router.post("/embedding/test")
|
|
316
|
+
async def test_embedding_endpoint(request: Request):
|
|
317
|
+
"""Test connectivity to a custom embedding endpoint."""
|
|
318
|
+
try:
|
|
319
|
+
import httpx
|
|
320
|
+
from urllib.parse import urlparse
|
|
321
|
+
body = await request.json()
|
|
322
|
+
endpoint = body.get("api_endpoint", "").rstrip("/")
|
|
323
|
+
model = body.get("model_name", "test")
|
|
324
|
+
api_key = body.get("api_key", "")
|
|
325
|
+
|
|
326
|
+
if not endpoint:
|
|
327
|
+
return JSONResponse({"error": "No endpoint provided"}, status_code=400)
|
|
328
|
+
|
|
329
|
+
parsed = urlparse(endpoint)
|
|
330
|
+
if parsed.scheme not in ("http", "https"):
|
|
331
|
+
return JSONResponse({"error": "Only http/https endpoints supported"}, status_code=400)
|
|
332
|
+
host = parsed.hostname or ""
|
|
333
|
+
if host in ("169.254.169.254", "metadata.google.internal"):
|
|
334
|
+
return JSONResponse({"error": "Cloud metadata endpoints not allowed"}, status_code=400)
|
|
335
|
+
|
|
336
|
+
if not endpoint.endswith("/embeddings"):
|
|
337
|
+
endpoint = f"{endpoint}/embeddings"
|
|
338
|
+
|
|
339
|
+
headers = {"Content-Type": "application/json"}
|
|
340
|
+
if api_key:
|
|
341
|
+
headers["Authorization"] = f"Bearer {api_key}"
|
|
342
|
+
|
|
343
|
+
payload = {"input": ["test embedding connection"], "model": model}
|
|
344
|
+
|
|
345
|
+
with httpx.Client(timeout=httpx.Timeout(15.0)) as client:
|
|
346
|
+
resp = client.post(endpoint, headers=headers, json=payload)
|
|
347
|
+
resp.raise_for_status()
|
|
348
|
+
data = resp.json()
|
|
349
|
+
emb_data = data.get("data", [])
|
|
350
|
+
if emb_data:
|
|
351
|
+
dim = len(emb_data[0].get("embedding", []))
|
|
352
|
+
return {
|
|
353
|
+
"success": True,
|
|
354
|
+
"message": f"Connected! Dimension: {dim}",
|
|
355
|
+
"dimension": dim,
|
|
356
|
+
}
|
|
357
|
+
return {"success": False, "error": "No embedding data returned"}
|
|
358
|
+
except httpx.HTTPStatusError as e:
|
|
359
|
+
return {"success": False, "error": f"HTTP {e.response.status_code}"}
|
|
360
|
+
except httpx.ConnectError:
|
|
361
|
+
return {"success": False, "error": "Cannot reach the embedding server. Is it running?"}
|
|
362
|
+
except httpx.TimeoutException:
|
|
363
|
+
return {"success": False, "error": "Connection timed out after 15 seconds."}
|
|
364
|
+
except Exception as e:
|
|
365
|
+
return {"success": False, "error": type(e).__name__}
|
|
366
|
+
|
|
367
|
+
|
|
221
368
|
@router.post("/provider/test")
|
|
222
369
|
async def test_provider(request: Request):
|
|
223
370
|
"""Test connectivity to an LLM provider."""
|
|
@@ -1593,13 +1740,8 @@ async def process_health(request: Request):
|
|
|
1593
1740
|
processes["worker_pool"] = {"status": worker_status}
|
|
1594
1741
|
|
|
1595
1742
|
# Memory usage of current process (approximate)
|
|
1596
|
-
|
|
1597
|
-
|
|
1598
|
-
import resource
|
|
1599
|
-
usage = resource.getrusage(resource.RUSAGE_SELF)
|
|
1600
|
-
memory_mb = round(usage.ru_maxrss / (1024 * 1024), 1)
|
|
1601
|
-
except Exception:
|
|
1602
|
-
pass
|
|
1743
|
+
from superlocalmemory.core.platform_utils import get_rss_mb
|
|
1744
|
+
memory_mb = round(get_rss_mb(), 1)
|
|
1603
1745
|
|
|
1604
1746
|
return {
|
|
1605
1747
|
"processes": processes,
|
|
@@ -56,9 +56,27 @@ class SecurityHeadersMiddleware(BaseHTTPMiddleware):
|
|
|
56
56
|
# Control referrer information leakage
|
|
57
57
|
response.headers["Referrer-Policy"] = "strict-origin-when-cross-origin"
|
|
58
58
|
|
|
59
|
-
#
|
|
60
|
-
|
|
59
|
+
# v3.4.23: Cache-Control strategy
|
|
60
|
+
# ---------------------------------------------------------------
|
|
61
|
+
# Three classes of paths, three policies:
|
|
62
|
+
#
|
|
63
|
+
# /api/* -> no-store (sensitive data, never cache)
|
|
64
|
+
# index.html -> no-cache, must-revalidate (always revalidate)
|
|
65
|
+
# /static/* -> no-cache, must-revalidate (always revalidate
|
|
66
|
+
# with ETag; fast reloads but never stale-after-
|
|
67
|
+
# upgrade)
|
|
68
|
+
#
|
|
69
|
+
# Before v3.4.23 only /api/* had cache headers. Browsers then cached
|
|
70
|
+
# JS/CSS/HTML aggressively via default heuristics, and after a daemon
|
|
71
|
+
# upgrade the dashboard showed an infinite spinner because old cached
|
|
72
|
+
# JS was calling endpoints with stale response shapes. "no-cache"
|
|
73
|
+
# (not "no-store") still allows 304s on unchanged files, so reload
|
|
74
|
+
# cost stays low.
|
|
75
|
+
path = request.url.path
|
|
76
|
+
if path.startswith("/api/"):
|
|
61
77
|
response.headers["Cache-Control"] = "no-store, no-cache, must-revalidate"
|
|
62
78
|
response.headers["Pragma"] = "no-cache"
|
|
79
|
+
elif path == "/" or path.endswith(".html") or path.startswith("/static/"):
|
|
80
|
+
response.headers["Cache-Control"] = "no-cache, must-revalidate"
|
|
63
81
|
|
|
64
82
|
return response
|
|
@@ -495,9 +495,20 @@ async def lifespan(application: FastAPI):
|
|
|
495
495
|
global _start_time
|
|
496
496
|
_start_time = time.monotonic()
|
|
497
497
|
_last_activity = time.monotonic()
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
498
|
+
# v3.4.23: pre-format the ready message. Previous code passed a ternary as
|
|
499
|
+
# the log format string with a fixed 2-arg tuple; when idle_timeout<=0 the
|
|
500
|
+
# chosen branch had only one %d, triggering a TypeError on every startup.
|
|
501
|
+
# Python's logging module then wrote the full stack to stderr. Because the
|
|
502
|
+
# call runs inside FastAPI's stacked merged_lifespan, each dump was ~30 KB
|
|
503
|
+
# and the error log grew to tens of MB within a day.
|
|
504
|
+
if idle_timeout <= 0:
|
|
505
|
+
_ready_msg = f"Unified daemon ready on port {_DEFAULT_PORT} (24/7 mode)"
|
|
506
|
+
else:
|
|
507
|
+
_ready_msg = (
|
|
508
|
+
f"Unified daemon ready on port {_DEFAULT_PORT} "
|
|
509
|
+
f"(idle timeout: {idle_timeout}s)"
|
|
510
|
+
)
|
|
511
|
+
logger.info(_ready_msg)
|
|
501
512
|
|
|
502
513
|
yield
|
|
503
514
|
|
|
@@ -850,7 +861,18 @@ def _register_dashboard_routes(application: FastAPI) -> None:
|
|
|
850
861
|
_data_io_mod.ws_manager = ws_manager
|
|
851
862
|
|
|
852
863
|
# Root page
|
|
853
|
-
from fastapi.responses import HTMLResponse
|
|
864
|
+
from fastapi.responses import HTMLResponse, JSONResponse
|
|
865
|
+
|
|
866
|
+
# v3.4.23: /api/version — dashboard polls this to detect daemon upgrades
|
|
867
|
+
# and auto-reload stale tabs (see ui/js/core.js::checkVersionFingerprint).
|
|
868
|
+
try:
|
|
869
|
+
from superlocalmemory import __version__ as _SLM_VERSION
|
|
870
|
+
except Exception: # pragma: no cover — defensive
|
|
871
|
+
_SLM_VERSION = "unknown"
|
|
872
|
+
|
|
873
|
+
@application.get("/api/version")
|
|
874
|
+
async def api_version():
|
|
875
|
+
return JSONResponse({"version": _SLM_VERSION})
|
|
854
876
|
|
|
855
877
|
@application.get("/", response_class=HTMLResponse)
|
|
856
878
|
async def root():
|
|
@@ -863,7 +885,11 @@ def _register_dashboard_routes(application: FastAPI) -> None:
|
|
|
863
885
|
"<p><a href='/docs'>API Documentation</a></p>"
|
|
864
886
|
"</body></html>"
|
|
865
887
|
)
|
|
866
|
-
|
|
888
|
+
# v3.4.23: substitute version placeholder so the dashboard can detect
|
|
889
|
+
# upgrades and auto-reload. Read fresh each request (daemon uptime is
|
|
890
|
+
# days, but we want zero caching surprises during development).
|
|
891
|
+
html = index_path.read_text()
|
|
892
|
+
return html.replace("__SLM_VERSION__", _SLM_VERSION)
|
|
867
893
|
|
|
868
894
|
# Startup event for event listener
|
|
869
895
|
@application.on_event("startup")
|
|
@@ -1066,6 +1092,13 @@ def start_server(port: int = _DEFAULT_PORT) -> None:
|
|
|
1066
1092
|
global _start_time
|
|
1067
1093
|
import uvicorn
|
|
1068
1094
|
|
|
1095
|
+
# v3.4.23: rotate oversized logs before anything else so both the CLI
|
|
1096
|
+
# path (`slm serve`) and the LaunchAgent path (__main__) are covered.
|
|
1097
|
+
try:
|
|
1098
|
+
rotate_oversized_logs()
|
|
1099
|
+
except Exception:
|
|
1100
|
+
pass # never block startup on log housekeeping
|
|
1101
|
+
|
|
1069
1102
|
_PID_FILE.parent.mkdir(parents=True, exist_ok=True)
|
|
1070
1103
|
_PID_FILE.write_text(str(os.getpid()))
|
|
1071
1104
|
_PORT_FILE.write_text(str(port))
|
|
@@ -1094,11 +1127,80 @@ def start_server(port: int = _DEFAULT_PORT) -> None:
|
|
|
1094
1127
|
_PORT_FILE.unlink(missing_ok=True)
|
|
1095
1128
|
|
|
1096
1129
|
|
|
1130
|
+
# ---------------------------------------------------------------------------
|
|
1131
|
+
# v3.4.23 — Startup log rotation
|
|
1132
|
+
# ---------------------------------------------------------------------------
|
|
1133
|
+
# The LaunchAgent plist redirects stdout/stderr to daemon.log and
|
|
1134
|
+
# daemon-error.log. Those files are managed by launchd, not Python, so
|
|
1135
|
+
# Python's RotatingFileHandler cannot prune them. If any bug ever writes
|
|
1136
|
+
# large amounts of data to stderr (the v3.4.22 logger-format bug produced
|
|
1137
|
+
# ~30 KB per startup and the file grew to 69 MB), end users end up with a
|
|
1138
|
+
# disk-eating log they never knew existed.
|
|
1139
|
+
#
|
|
1140
|
+
# rotate_oversized_logs() is a belt-and-suspenders guard: every time the
|
|
1141
|
+
# daemon starts, if either log exceeds MAX_LOG_BYTES we rename the current
|
|
1142
|
+
# file to ".1" (keeping one rotated copy) and truncate the original so
|
|
1143
|
+
# launchd's open file descriptor keeps working. This is cheap, stateless,
|
|
1144
|
+
# and independent of whatever caused the overflow.
|
|
1145
|
+
# ---------------------------------------------------------------------------
|
|
1146
|
+
|
|
1147
|
+
_MAX_LOG_BYTES = 10 * 1024 * 1024 # 10 MB
|
|
1148
|
+
|
|
1149
|
+
|
|
1150
|
+
def rotate_oversized_logs(log_dir: Optional[Path] = None,
|
|
1151
|
+
max_bytes: int = _MAX_LOG_BYTES) -> None:
|
|
1152
|
+
"""Rotate daemon.log and daemon-error.log at startup if oversized.
|
|
1153
|
+
|
|
1154
|
+
Keeps one rotated copy (.1). Safe under concurrent start attempts:
|
|
1155
|
+
rename is atomic on POSIX, and truncation is idempotent.
|
|
1156
|
+
"""
|
|
1157
|
+
log_dir = log_dir or (Path.home() / ".superlocalmemory" / "logs")
|
|
1158
|
+
try:
|
|
1159
|
+
log_dir.mkdir(parents=True, exist_ok=True)
|
|
1160
|
+
except Exception:
|
|
1161
|
+
return
|
|
1162
|
+
for name in ("daemon.log", "daemon-error.log", "daemon.json.log"):
|
|
1163
|
+
path = log_dir / name
|
|
1164
|
+
try:
|
|
1165
|
+
if not path.exists() or path.stat().st_size <= max_bytes:
|
|
1166
|
+
continue
|
|
1167
|
+
rotated = log_dir / f"{name}.1"
|
|
1168
|
+
try:
|
|
1169
|
+
if rotated.exists():
|
|
1170
|
+
rotated.unlink()
|
|
1171
|
+
except Exception:
|
|
1172
|
+
pass
|
|
1173
|
+
try:
|
|
1174
|
+
path.rename(rotated)
|
|
1175
|
+
except Exception:
|
|
1176
|
+
# If rename fails (e.g., file is the open stderr fd under
|
|
1177
|
+
# launchd), fall back to truncation so we at least reclaim
|
|
1178
|
+
# disk without breaking the redirect.
|
|
1179
|
+
try:
|
|
1180
|
+
with open(path, "w"):
|
|
1181
|
+
pass
|
|
1182
|
+
except Exception:
|
|
1183
|
+
pass
|
|
1184
|
+
continue
|
|
1185
|
+
# Re-create the original path as empty so launchd's redirect
|
|
1186
|
+
# keeps appending to a fresh file.
|
|
1187
|
+
try:
|
|
1188
|
+
path.touch()
|
|
1189
|
+
except Exception:
|
|
1190
|
+
pass
|
|
1191
|
+
except Exception:
|
|
1192
|
+
# Log rotation must never prevent daemon startup.
|
|
1193
|
+
continue
|
|
1194
|
+
|
|
1195
|
+
|
|
1097
1196
|
# ---------------------------------------------------------------------------
|
|
1098
1197
|
# CLI entry point
|
|
1099
1198
|
# ---------------------------------------------------------------------------
|
|
1100
1199
|
|
|
1101
1200
|
if __name__ == "__main__":
|
|
1201
|
+
# Rotate first, then configure logging, so the first log line lands in a
|
|
1202
|
+
# freshly-sized file.
|
|
1203
|
+
rotate_oversized_logs()
|
|
1102
1204
|
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s")
|
|
1103
1205
|
port = _DEFAULT_PORT
|
|
1104
1206
|
for arg in sys.argv:
|
|
@@ -3,6 +3,10 @@
|
|
|
3
3
|
<head>
|
|
4
4
|
<meta charset="UTF-8">
|
|
5
5
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
|
6
|
+
<!-- v3.4.23: server substitutes __SLM_VERSION__ at serve time. core.js
|
|
7
|
+
compares this to /api/version and hard-reloads + clears localStorage
|
|
8
|
+
on mismatch, so the browser cannot show stale UI after an upgrade. -->
|
|
9
|
+
<meta name="slm-version" content="__SLM_VERSION__">
|
|
6
10
|
<title>SuperLocalMemory V3 — Dashboard</title>
|
|
7
11
|
|
|
8
12
|
<!-- Bootstrap CSS (vendored locally v3.4.21 — no CDN calls, works offline) -->
|
|
@@ -1003,8 +1007,53 @@
|
|
|
1003
1007
|
</div>
|
|
1004
1008
|
</div>
|
|
1005
1009
|
|
|
1010
|
+
<!-- Step 3: Embedding Configuration (V3.4.24) -->
|
|
1011
|
+
<div class="mt-3 pt-3 border-top" id="settings-embedding-panel">
|
|
1012
|
+
<h6 class="text-muted"><i class="bi bi-cpu"></i> Step 3: Embedding Model</h6>
|
|
1013
|
+
<p class="small text-muted mb-2">
|
|
1014
|
+
Controls how text is converted to vectors for semantic search.
|
|
1015
|
+
Default: local model (768d). Custom: any OpenAI-compatible endpoint.
|
|
1016
|
+
</p>
|
|
1017
|
+
<div class="row g-2 mb-2">
|
|
1018
|
+
<div class="col-md-4">
|
|
1019
|
+
<label class="form-label small">Embedding Provider</label>
|
|
1020
|
+
<select class="form-select form-select-sm" id="settings-emb-provider">
|
|
1021
|
+
<option value="default">Default (Local Model)</option>
|
|
1022
|
+
<option value="openai">Custom Endpoint (OpenAI-compatible)</option>
|
|
1023
|
+
</select>
|
|
1024
|
+
</div>
|
|
1025
|
+
<div class="col-md-4" id="settings-emb-model-col" style="display:none;">
|
|
1026
|
+
<label class="form-label small">Model Name</label>
|
|
1027
|
+
<input type="text" id="settings-emb-model" class="form-control form-control-sm" placeholder="e.g. Qwen3-Embedding">
|
|
1028
|
+
</div>
|
|
1029
|
+
<div class="col-md-4" id="settings-emb-dim-col" style="display:none;">
|
|
1030
|
+
<label class="form-label small">Dimension</label>
|
|
1031
|
+
<input type="number" id="settings-emb-dimension" class="form-control form-control-sm" placeholder="e.g. 1024" min="64" max="8192">
|
|
1032
|
+
</div>
|
|
1033
|
+
</div>
|
|
1034
|
+
<div class="row g-2 mb-2" id="settings-emb-endpoint-row" style="display:none;">
|
|
1035
|
+
<div class="col-md-8">
|
|
1036
|
+
<label class="form-label small">Embedding Endpoint</label>
|
|
1037
|
+
<input type="text" id="settings-emb-endpoint" class="form-control form-control-sm" placeholder="http://localhost:8045/v1/embeddings">
|
|
1038
|
+
</div>
|
|
1039
|
+
<div class="col-md-4">
|
|
1040
|
+
<label class="form-label small">API Key (optional)</label>
|
|
1041
|
+
<input type="password" id="settings-emb-key" class="form-control form-control-sm" placeholder="not-needed">
|
|
1042
|
+
</div>
|
|
1043
|
+
</div>
|
|
1044
|
+
<div id="settings-emb-test-row" style="display:none;">
|
|
1045
|
+
<button class="btn btn-sm btn-outline-info" id="settings-emb-test-btn">
|
|
1046
|
+
<i class="bi bi-lightning"></i> Test Embedding
|
|
1047
|
+
</button>
|
|
1048
|
+
<span id="settings-emb-test-result" class="ms-2 small"></span>
|
|
1049
|
+
</div>
|
|
1050
|
+
<div id="settings-emb-info" class="small text-muted mt-1">
|
|
1051
|
+
Using local <strong>nomic-embed-text-v1.5</strong> (768d)
|
|
1052
|
+
</div>
|
|
1053
|
+
</div>
|
|
1054
|
+
|
|
1006
1055
|
<!-- Save button -->
|
|
1007
|
-
<div class="mt-
|
|
1056
|
+
<div class="mt-3">
|
|
1008
1057
|
<button class="btn btn-primary" id="settings-save-all">
|
|
1009
1058
|
<i class="bi bi-check-circle"></i> Save Configuration
|
|
1010
1059
|
</button>
|