superlocalmemory 3.4.22 → 3.4.24

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. package/CHANGELOG.md +29 -0
  2. package/package.json +1 -1
  3. package/pyproject.toml +1 -1
  4. package/skills/slm-build-graph/SKILL.md +1 -1
  5. package/skills/slm-list-recent/SKILL.md +1 -1
  6. package/skills/slm-recall/SKILL.md +1 -1
  7. package/skills/slm-remember/SKILL.md +1 -1
  8. package/skills/slm-status/SKILL.md +1 -1
  9. package/skills/slm-switch-profile/SKILL.md +1 -1
  10. package/src/superlocalmemory/__init__.py +3 -0
  11. package/src/superlocalmemory/core/config.py +66 -18
  12. package/src/superlocalmemory/core/context_cache.py +1 -1
  13. package/src/superlocalmemory/core/embedding_worker.py +8 -27
  14. package/src/superlocalmemory/core/embeddings.py +83 -1
  15. package/src/superlocalmemory/core/engine_wiring.py +8 -0
  16. package/src/superlocalmemory/core/platform_utils.py +127 -0
  17. package/src/superlocalmemory/core/recall_worker.py +8 -24
  18. package/src/superlocalmemory/core/reranker_worker.py +8 -24
  19. package/src/superlocalmemory/core/worker_pool.py +2 -1
  20. package/src/superlocalmemory/hooks/context_payload.py +1 -1
  21. package/src/superlocalmemory/learning/database.py +1 -1
  22. package/src/superlocalmemory/retrieval/reranker.py +2 -1
  23. package/src/superlocalmemory/server/routes/brain.py +1 -1
  24. package/src/superlocalmemory/server/routes/v3_api.py +150 -8
  25. package/src/superlocalmemory/server/security_middleware.py +20 -2
  26. package/src/superlocalmemory/server/unified_daemon.py +107 -5
  27. package/src/superlocalmemory/ui/index.html +50 -1
  28. package/src/superlocalmemory/ui/js/auto-settings.js +131 -5
  29. package/src/superlocalmemory/ui/js/core.js +96 -1
@@ -20,7 +20,6 @@ import json
20
20
  import os
21
21
  import signal
22
22
  import sys
23
- import threading
24
23
 
25
24
  # Force CPU BEFORE any torch import
26
25
  os.environ["CUDA_VISIBLE_DEVICES"] = ""
@@ -39,24 +38,10 @@ if sys.platform != "win32":
39
38
  def _start_parent_watchdog() -> None:
40
39
  """Monitor parent process — self-terminate if parent dies.
41
40
 
42
- Prevents orphaned workers that consume 500+ MB each when the parent
43
- process crashes, is killed, or exits without cleanup.
44
-
45
- V3.3.7: Added after incident where orphaned workers consumed 33 GB.
41
+ V3.4.24: Delegates to platform_utils.start_parent_watchdog().
46
42
  """
47
- parent_pid = os.getppid()
48
-
49
- def _watch() -> None:
50
- import time
51
- while True:
52
- time.sleep(5)
53
- try:
54
- os.kill(parent_pid, 0)
55
- except OSError:
56
- os._exit(0)
57
-
58
- t = threading.Thread(target=_watch, daemon=True, name="parent-watchdog")
59
- t.start()
43
+ from superlocalmemory.core.platform_utils import start_parent_watchdog
44
+ start_parent_watchdog()
60
45
 
61
46
  _engine = None
62
47
 
@@ -253,7 +238,8 @@ def _handle_status() -> dict:
253
238
 
254
239
  def _worker_main() -> None:
255
240
  """Main loop: read JSON requests from stdin, write responses to stdout."""
256
- _start_parent_watchdog() # V3.3.7: self-terminate if parent dies
241
+ _start_parent_watchdog()
242
+ from superlocalmemory.core.platform_utils import get_rss_mb
257
243
 
258
244
  for line in sys.stdin:
259
245
  line = line.strip()
@@ -326,11 +312,9 @@ def _worker_main() -> None:
326
312
  except Exception as exc:
327
313
  _respond({"ok": False, "error": str(exc)})
328
314
 
329
- # V3.3.16: RSS watchdog — self-terminate if memory exceeds 1.5GB.
330
- # Parent auto-respawns a fresh worker on next request.
331
- import resource
332
- rss_mb = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024 / 1024
333
- if rss_mb > 2500:
315
+ # V3.3.16: RSS watchdog — V3.4.24: cross-platform via platform_utils.
316
+ rss_mb = get_rss_mb()
317
+ if rss_mb > 0 and rss_mb > 2500:
334
318
  sys.exit(0)
335
319
 
336
320
 
@@ -31,7 +31,6 @@ import platform
31
31
  import signal
32
32
  import struct
33
33
  import sys
34
- import threading
35
34
 
36
35
  # Force CPU BEFORE any torch import
37
36
  os.environ["CUDA_VISIBLE_DEVICES"] = ""
@@ -52,25 +51,10 @@ if sys.platform != "win32":
52
51
  def _start_parent_watchdog() -> None:
53
52
  """Monitor parent process — self-terminate if parent dies.
54
53
 
55
- Prevents orphaned workers that consume 1+ GB each when the parent
56
- process crashes, is killed, or exits without cleanup.
57
-
58
- V3.3.7: Added after incident where ~30 orphaned workers consumed 33 GB.
54
+ V3.4.24: Delegates to platform_utils.start_parent_watchdog().
59
55
  """
60
- parent_pid = os.getppid()
61
-
62
- def _watch() -> None:
63
- import time
64
- while True:
65
- time.sleep(5)
66
- try:
67
- os.kill(parent_pid, 0) # Check if parent is alive (signal 0)
68
- except OSError:
69
- # Parent is dead — self-terminate
70
- os._exit(0)
71
-
72
- t = threading.Thread(target=_watch, daemon=True, name="parent-watchdog")
73
- t.start()
56
+ from superlocalmemory.core.platform_utils import start_parent_watchdog
57
+ start_parent_watchdog()
74
58
 
75
59
 
76
60
  def _detect_onnx_variant(model_name: str = "") -> str:
@@ -101,7 +85,8 @@ def _detect_onnx_variant(model_name: str = "") -> str:
101
85
 
102
86
  def _worker_main() -> None:
103
87
  """Main loop: read JSON requests from stdin, write responses to stdout."""
104
- _start_parent_watchdog() # V3.3.7: self-terminate if parent dies
88
+ _start_parent_watchdog()
89
+ from superlocalmemory.core.platform_utils import get_rss_mb
105
90
 
106
91
  model = None
107
92
  active_backend = ""
@@ -194,10 +179,9 @@ def _worker_main() -> None:
194
179
  except Exception as exc:
195
180
  _respond({"ok": False, "error": str(exc)})
196
181
 
197
- # V3.3.16: RSS watchdog — same as embedding_worker
198
- import resource
199
- rss_mb = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024 / 1024
200
- if rss_mb > 2500:
182
+ # V3.3.16: RSS watchdog — V3.4.24: cross-platform via platform_utils.
183
+ rss_mb = get_rss_mb()
184
+ if rss_mb > 0 and rss_mb > 2500:
201
185
  sys.exit(0)
202
186
 
203
187
  continue
@@ -247,6 +247,7 @@ class WorkerPool:
247
247
  "TOKENIZERS_PARALLELISM": "false",
248
248
  "TORCH_DEVICE": "cpu",
249
249
  }
250
+ from superlocalmemory.core.platform_utils import popen_platform_kwargs
250
251
  self._proc = subprocess.Popen(
251
252
  [sys.executable, "-m", "superlocalmemory.core.recall_worker"],
252
253
  stdin=subprocess.PIPE,
@@ -255,7 +256,7 @@ class WorkerPool:
255
256
  text=True,
256
257
  bufsize=1,
257
258
  env=env,
258
- start_new_session=True, # Prevent terminal signals bleeding to worker
259
+ **popen_platform_kwargs(),
259
260
  )
260
261
  logger.info("Recall worker spawned (PID %d)", self._proc.pid)
261
262
  except Exception as exc:
@@ -22,7 +22,7 @@ from typing import Callable, Iterable
22
22
  from superlocalmemory.core.security_primitives import redact_secrets
23
23
 
24
24
 
25
- VERSION = "3.4.22"
25
+ VERSION = "3.4.23"
26
26
  DEFAULT_TOP_K = 10
27
27
  DEFAULT_DECISIONS_K = 5
28
28
  DEFAULT_MEMORIES_K = 10
@@ -395,7 +395,7 @@ class LearningDatabase:
395
395
  feature_names: list[str],
396
396
  trained_on_count: int,
397
397
  metrics: dict,
398
- model_version: str = "3.4.22",
398
+ model_version: str = "3.4.23",
399
399
  ) -> int:
400
400
  """Persist a newly trained model and flip the active flag.
401
401
 
@@ -193,6 +193,7 @@ class CrossEncoderReranker:
193
193
  "TOKENIZERS_PARALLELISM": "false",
194
194
  "TORCH_DEVICE": "cpu",
195
195
  }
196
+ from superlocalmemory.core.platform_utils import popen_platform_kwargs
196
197
  self._worker_proc = subprocess.Popen(
197
198
  [sys.executable, "-m", worker_module],
198
199
  stdin=subprocess.PIPE,
@@ -201,7 +202,7 @@ class CrossEncoderReranker:
201
202
  text=True,
202
203
  bufsize=1,
203
204
  env=env,
204
- start_new_session=True,
205
+ **popen_platform_kwargs(),
205
206
  )
206
207
  # v3.4.13: Register PID for machine-wide singleton
207
208
  _RERANKER_PID_FILE.parent.mkdir(parents=True, exist_ok=True)
@@ -64,7 +64,7 @@ router = APIRouter(prefix="/api/v3", tags=["brain"])
64
64
  # LLD-03 v2 stratum space = 4 query types × 3 entity bins × 4 time buckets.
65
65
  _STRATA_TOTAL: int = 48
66
66
 
67
- _VERSION: str = "3.4.22"
67
+ _VERSION: str = "3.4.23"
68
68
 
69
69
  # Banned metric names (LLD-04 U4). Kept as a tuple for grep visibility;
70
70
  # the source-level test asserts we don't accidentally reintroduce them.
@@ -129,6 +129,11 @@ async def set_mode(request: Request):
129
129
  llm_model=old_config.llm.model,
130
130
  llm_api_key=old_config.llm.api_key,
131
131
  llm_api_base=old_config.llm.api_base,
132
+ embedding_provider=old_config.embedding.provider,
133
+ embedding_endpoint=old_config.embedding.api_endpoint,
134
+ embedding_key=old_config.embedding.api_key,
135
+ embedding_model_name=old_config.embedding.model_name,
136
+ embedding_dimension=old_config.embedding.dimension,
132
137
  )
133
138
  new_config.active_profile = old_config.active_profile
134
139
  new_config.save()
@@ -165,7 +170,10 @@ async def set_mode(request: Request):
165
170
 
166
171
  @router.post("/mode/set")
167
172
  async def set_full_config(request: Request):
168
- """Save mode + provider + model + API key together."""
173
+ """Save mode + provider + model + API key together.
174
+
175
+ V3.4.24: Also accepts embedding_* fields for custom embedding endpoints.
176
+ """
169
177
  try:
170
178
  body = await request.json()
171
179
  new_mode = body.get("mode", "a").lower()
@@ -187,6 +195,11 @@ async def set_full_config(request: Request):
187
195
  llm_model=model,
188
196
  llm_api_key=api_key,
189
197
  llm_api_base="http://localhost:11434" if provider == "ollama" else "",
198
+ embedding_provider=body.get("embedding_provider", ""),
199
+ embedding_endpoint=body.get("embedding_endpoint", ""),
200
+ embedding_key=body.get("embedding_key", ""),
201
+ embedding_model_name=body.get("embedding_model", ""),
202
+ embedding_dimension=int(body.get("embedding_dimension", 0) or 0),
190
203
  )
191
204
  config.active_profile = old.active_profile
192
205
  config.save()
@@ -213,11 +226,145 @@ async def set_full_config(request: Request):
213
226
  "mode": new_mode,
214
227
  "provider": provider,
215
228
  "model": model,
229
+ "embedding_provider": config.embedding.provider,
230
+ "embedding_model": config.embedding.model_name,
231
+ "embedding_dimension": config.embedding.dimension,
232
+ }
233
+ except Exception as e:
234
+ return JSONResponse({"error": str(e)}, status_code=500)
235
+
236
+
237
+ # ── V3.4.24: Embedding Configuration ────────────────────────────────
238
+
239
+ @router.get("/embedding/config")
240
+ async def get_embedding_config(request: Request):
241
+ """Return current embedding configuration."""
242
+ try:
243
+ from superlocalmemory.core.config import SLMConfig
244
+ config = SLMConfig.load()
245
+ emb = config.embedding
246
+ return {
247
+ "provider": emb.provider,
248
+ "model_name": emb.model_name,
249
+ "dimension": emb.dimension,
250
+ "api_endpoint": emb.api_endpoint,
251
+ "has_key": bool(emb.api_key),
252
+ "is_openai_compatible": emb.is_openai_compatible,
253
+ "mode": config.mode.value,
254
+ }
255
+ except Exception as e:
256
+ return JSONResponse({"error": str(e)}, status_code=500)
257
+
258
+
259
+ @router.put("/embedding/config")
260
+ async def set_embedding_config(request: Request):
261
+ """Update embedding configuration independently of mode switch."""
262
+ try:
263
+ body = await request.json()
264
+ from superlocalmemory.core.config import SLMConfig, EmbeddingConfig
265
+ config = SLMConfig.load()
266
+
267
+ new_provider = body.get("provider", config.embedding.provider)
268
+ new_model = body.get("model_name", config.embedding.model_name)
269
+ new_dim = int(body.get("dimension", config.embedding.dimension) or 768)
270
+ if not (64 <= new_dim <= 8192):
271
+ return JSONResponse({"error": f"Dimension must be 64-8192, got {new_dim}"}, status_code=400)
272
+ new_endpoint = body.get("api_endpoint", config.embedding.api_endpoint)
273
+ new_key = body.get("api_key", config.embedding.api_key)
274
+
275
+ old_emb = config.embedding
276
+ config.embedding = EmbeddingConfig(
277
+ model_name=new_model,
278
+ dimension=new_dim,
279
+ provider=new_provider,
280
+ api_endpoint=new_endpoint,
281
+ api_key=new_key,
282
+ ollama_model=old_emb.ollama_model,
283
+ ollama_base_url=old_emb.ollama_base_url,
284
+ api_version=old_emb.api_version,
285
+ deployment_name=old_emb.deployment_name,
286
+ )
287
+ config.save()
288
+
289
+ needs_reindex = (
290
+ old_emb.provider != new_provider
291
+ or old_emb.model_name != new_model
292
+ or old_emb.dimension != new_dim
293
+ )
294
+
295
+ # Kill workers so next request uses new config
296
+ try:
297
+ from superlocalmemory.core.worker_pool import WorkerPool
298
+ WorkerPool.shared().shutdown()
299
+ except Exception:
300
+ pass
301
+ if hasattr(request.app.state, "engine"):
302
+ request.app.state.engine = None
303
+
304
+ return {
305
+ "success": True,
306
+ "provider": new_provider,
307
+ "model_name": new_model,
308
+ "dimension": new_dim,
309
+ "needs_reindex": needs_reindex,
216
310
  }
217
311
  except Exception as e:
218
312
  return JSONResponse({"error": str(e)}, status_code=500)
219
313
 
220
314
 
315
+ @router.post("/embedding/test")
316
+ async def test_embedding_endpoint(request: Request):
317
+ """Test connectivity to a custom embedding endpoint."""
318
+ try:
319
+ import httpx
320
+ from urllib.parse import urlparse
321
+ body = await request.json()
322
+ endpoint = body.get("api_endpoint", "").rstrip("/")
323
+ model = body.get("model_name", "test")
324
+ api_key = body.get("api_key", "")
325
+
326
+ if not endpoint:
327
+ return JSONResponse({"error": "No endpoint provided"}, status_code=400)
328
+
329
+ parsed = urlparse(endpoint)
330
+ if parsed.scheme not in ("http", "https"):
331
+ return JSONResponse({"error": "Only http/https endpoints supported"}, status_code=400)
332
+ host = parsed.hostname or ""
333
+ if host in ("169.254.169.254", "metadata.google.internal"):
334
+ return JSONResponse({"error": "Cloud metadata endpoints not allowed"}, status_code=400)
335
+
336
+ if not endpoint.endswith("/embeddings"):
337
+ endpoint = f"{endpoint}/embeddings"
338
+
339
+ headers = {"Content-Type": "application/json"}
340
+ if api_key:
341
+ headers["Authorization"] = f"Bearer {api_key}"
342
+
343
+ payload = {"input": ["test embedding connection"], "model": model}
344
+
345
+ with httpx.Client(timeout=httpx.Timeout(15.0)) as client:
346
+ resp = client.post(endpoint, headers=headers, json=payload)
347
+ resp.raise_for_status()
348
+ data = resp.json()
349
+ emb_data = data.get("data", [])
350
+ if emb_data:
351
+ dim = len(emb_data[0].get("embedding", []))
352
+ return {
353
+ "success": True,
354
+ "message": f"Connected! Dimension: {dim}",
355
+ "dimension": dim,
356
+ }
357
+ return {"success": False, "error": "No embedding data returned"}
358
+ except httpx.HTTPStatusError as e:
359
+ return {"success": False, "error": f"HTTP {e.response.status_code}"}
360
+ except httpx.ConnectError:
361
+ return {"success": False, "error": "Cannot reach the embedding server. Is it running?"}
362
+ except httpx.TimeoutException:
363
+ return {"success": False, "error": "Connection timed out after 15 seconds."}
364
+ except Exception as e:
365
+ return {"success": False, "error": type(e).__name__}
366
+
367
+
221
368
  @router.post("/provider/test")
222
369
  async def test_provider(request: Request):
223
370
  """Test connectivity to an LLM provider."""
@@ -1593,13 +1740,8 @@ async def process_health(request: Request):
1593
1740
  processes["worker_pool"] = {"status": worker_status}
1594
1741
 
1595
1742
  # Memory usage of current process (approximate)
1596
- memory_mb = 0.0
1597
- try:
1598
- import resource
1599
- usage = resource.getrusage(resource.RUSAGE_SELF)
1600
- memory_mb = round(usage.ru_maxrss / (1024 * 1024), 1)
1601
- except Exception:
1602
- pass
1743
+ from superlocalmemory.core.platform_utils import get_rss_mb
1744
+ memory_mb = round(get_rss_mb(), 1)
1603
1745
 
1604
1746
  return {
1605
1747
  "processes": processes,
@@ -56,9 +56,27 @@ class SecurityHeadersMiddleware(BaseHTTPMiddleware):
56
56
  # Control referrer information leakage
57
57
  response.headers["Referrer-Policy"] = "strict-origin-when-cross-origin"
58
58
 
59
- # Prevent caching of sensitive data (for API endpoints)
60
- if request.url.path.startswith("/api/"):
59
+ # v3.4.23: Cache-Control strategy
60
+ # ---------------------------------------------------------------
61
+ # Three classes of paths, three policies:
62
+ #
63
+ # /api/* -> no-store (sensitive data, never cache)
64
+ # index.html -> no-cache, must-revalidate (always revalidate)
65
+ # /static/* -> no-cache, must-revalidate (always revalidate
66
+ # with ETag; fast reloads but never stale-after-
67
+ # upgrade)
68
+ #
69
+ # Before v3.4.23 only /api/* had cache headers. Browsers then cached
70
+ # JS/CSS/HTML aggressively via default heuristics, and after a daemon
71
+ # upgrade the dashboard showed an infinite spinner because old cached
72
+ # JS was calling endpoints with stale response shapes. "no-cache"
73
+ # (not "no-store") still allows 304s on unchanged files, so reload
74
+ # cost stays low.
75
+ path = request.url.path
76
+ if path.startswith("/api/"):
61
77
  response.headers["Cache-Control"] = "no-store, no-cache, must-revalidate"
62
78
  response.headers["Pragma"] = "no-cache"
79
+ elif path == "/" or path.endswith(".html") or path.startswith("/static/"):
80
+ response.headers["Cache-Control"] = "no-cache, must-revalidate"
63
81
 
64
82
  return response
@@ -495,9 +495,20 @@ async def lifespan(application: FastAPI):
495
495
  global _start_time
496
496
  _start_time = time.monotonic()
497
497
  _last_activity = time.monotonic()
498
- logger.info("Unified daemon ready on port %d (24/7 mode)" if idle_timeout <= 0
499
- else "Unified daemon ready on port %d (idle timeout: %ds)",
500
- _DEFAULT_PORT, idle_timeout)
498
+ # v3.4.23: pre-format the ready message. Previous code passed a ternary as
499
+ # the log format string with a fixed 2-arg tuple; when idle_timeout<=0 the
500
+ # chosen branch had only one %d, triggering a TypeError on every startup.
501
+ # Python's logging module then wrote the full stack to stderr. Because the
502
+ # call runs inside FastAPI's stacked merged_lifespan, each dump was ~30 KB
503
+ # and the error log grew to tens of MB within a day.
504
+ if idle_timeout <= 0:
505
+ _ready_msg = f"Unified daemon ready on port {_DEFAULT_PORT} (24/7 mode)"
506
+ else:
507
+ _ready_msg = (
508
+ f"Unified daemon ready on port {_DEFAULT_PORT} "
509
+ f"(idle timeout: {idle_timeout}s)"
510
+ )
511
+ logger.info(_ready_msg)
501
512
 
502
513
  yield
503
514
 
@@ -850,7 +861,18 @@ def _register_dashboard_routes(application: FastAPI) -> None:
850
861
  _data_io_mod.ws_manager = ws_manager
851
862
 
852
863
  # Root page
853
- from fastapi.responses import HTMLResponse
864
+ from fastapi.responses import HTMLResponse, JSONResponse
865
+
866
+ # v3.4.23: /api/version — dashboard polls this to detect daemon upgrades
867
+ # and auto-reload stale tabs (see ui/js/core.js::checkVersionFingerprint).
868
+ try:
869
+ from superlocalmemory import __version__ as _SLM_VERSION
870
+ except Exception: # pragma: no cover — defensive
871
+ _SLM_VERSION = "unknown"
872
+
873
+ @application.get("/api/version")
874
+ async def api_version():
875
+ return JSONResponse({"version": _SLM_VERSION})
854
876
 
855
877
  @application.get("/", response_class=HTMLResponse)
856
878
  async def root():
@@ -863,7 +885,11 @@ def _register_dashboard_routes(application: FastAPI) -> None:
863
885
  "<p><a href='/docs'>API Documentation</a></p>"
864
886
  "</body></html>"
865
887
  )
866
- return index_path.read_text()
888
+ # v3.4.23: substitute version placeholder so the dashboard can detect
889
+ # upgrades and auto-reload. Read fresh each request (daemon uptime is
890
+ # days, but we want zero caching surprises during development).
891
+ html = index_path.read_text()
892
+ return html.replace("__SLM_VERSION__", _SLM_VERSION)
867
893
 
868
894
  # Startup event for event listener
869
895
  @application.on_event("startup")
@@ -1066,6 +1092,13 @@ def start_server(port: int = _DEFAULT_PORT) -> None:
1066
1092
  global _start_time
1067
1093
  import uvicorn
1068
1094
 
1095
+ # v3.4.23: rotate oversized logs before anything else so both the CLI
1096
+ # path (`slm serve`) and the LaunchAgent path (__main__) are covered.
1097
+ try:
1098
+ rotate_oversized_logs()
1099
+ except Exception:
1100
+ pass # never block startup on log housekeeping
1101
+
1069
1102
  _PID_FILE.parent.mkdir(parents=True, exist_ok=True)
1070
1103
  _PID_FILE.write_text(str(os.getpid()))
1071
1104
  _PORT_FILE.write_text(str(port))
@@ -1094,11 +1127,80 @@ def start_server(port: int = _DEFAULT_PORT) -> None:
1094
1127
  _PORT_FILE.unlink(missing_ok=True)
1095
1128
 
1096
1129
 
1130
+ # ---------------------------------------------------------------------------
1131
+ # v3.4.23 — Startup log rotation
1132
+ # ---------------------------------------------------------------------------
1133
+ # The LaunchAgent plist redirects stdout/stderr to daemon.log and
1134
+ # daemon-error.log. Those files are managed by launchd, not Python, so
1135
+ # Python's RotatingFileHandler cannot prune them. If any bug ever writes
1136
+ # large amounts of data to stderr (the v3.4.22 logger-format bug produced
1137
+ # ~30 KB per startup and the file grew to 69 MB), end users end up with a
1138
+ # disk-eating log they never knew existed.
1139
+ #
1140
+ # rotate_oversized_logs() is a belt-and-suspenders guard: every time the
1141
+ # daemon starts, if either log exceeds MAX_LOG_BYTES we rename the current
1142
+ # file to ".1" (keeping one rotated copy) and truncate the original so
1143
+ # launchd's open file descriptor keeps working. This is cheap, stateless,
1144
+ # and independent of whatever caused the overflow.
1145
+ # ---------------------------------------------------------------------------
1146
+
1147
+ _MAX_LOG_BYTES = 10 * 1024 * 1024 # 10 MB
1148
+
1149
+
1150
+ def rotate_oversized_logs(log_dir: Optional[Path] = None,
1151
+ max_bytes: int = _MAX_LOG_BYTES) -> None:
1152
+ """Rotate daemon.log and daemon-error.log at startup if oversized.
1153
+
1154
+ Keeps one rotated copy (.1). Safe under concurrent start attempts:
1155
+ rename is atomic on POSIX, and truncation is idempotent.
1156
+ """
1157
+ log_dir = log_dir or (Path.home() / ".superlocalmemory" / "logs")
1158
+ try:
1159
+ log_dir.mkdir(parents=True, exist_ok=True)
1160
+ except Exception:
1161
+ return
1162
+ for name in ("daemon.log", "daemon-error.log", "daemon.json.log"):
1163
+ path = log_dir / name
1164
+ try:
1165
+ if not path.exists() or path.stat().st_size <= max_bytes:
1166
+ continue
1167
+ rotated = log_dir / f"{name}.1"
1168
+ try:
1169
+ if rotated.exists():
1170
+ rotated.unlink()
1171
+ except Exception:
1172
+ pass
1173
+ try:
1174
+ path.rename(rotated)
1175
+ except Exception:
1176
+ # If rename fails (e.g., file is the open stderr fd under
1177
+ # launchd), fall back to truncation so we at least reclaim
1178
+ # disk without breaking the redirect.
1179
+ try:
1180
+ with open(path, "w"):
1181
+ pass
1182
+ except Exception:
1183
+ pass
1184
+ continue
1185
+ # Re-create the original path as empty so launchd's redirect
1186
+ # keeps appending to a fresh file.
1187
+ try:
1188
+ path.touch()
1189
+ except Exception:
1190
+ pass
1191
+ except Exception:
1192
+ # Log rotation must never prevent daemon startup.
1193
+ continue
1194
+
1195
+
1097
1196
  # ---------------------------------------------------------------------------
1098
1197
  # CLI entry point
1099
1198
  # ---------------------------------------------------------------------------
1100
1199
 
1101
1200
  if __name__ == "__main__":
1201
+ # Rotate first, then configure logging, so the first log line lands in a
1202
+ # freshly-sized file.
1203
+ rotate_oversized_logs()
1102
1204
  logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s")
1103
1205
  port = _DEFAULT_PORT
1104
1206
  for arg in sys.argv:
@@ -3,6 +3,10 @@
3
3
  <head>
4
4
  <meta charset="UTF-8">
5
5
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <!-- v3.4.23: server substitutes __SLM_VERSION__ at serve time. core.js
7
+ compares this to /api/version and hard-reloads + clears localStorage
8
+ on mismatch, so the browser cannot show stale UI after an upgrade. -->
9
+ <meta name="slm-version" content="__SLM_VERSION__">
6
10
  <title>SuperLocalMemory V3 — Dashboard</title>
7
11
 
8
12
  <!-- Bootstrap CSS (vendored locally v3.4.21 — no CDN calls, works offline) -->
@@ -1003,8 +1007,53 @@
1003
1007
  </div>
1004
1008
  </div>
1005
1009
 
1010
+ <!-- Step 3: Embedding Configuration (V3.4.24) -->
1011
+ <div class="mt-3 pt-3 border-top" id="settings-embedding-panel">
1012
+ <h6 class="text-muted"><i class="bi bi-cpu"></i> Step 3: Embedding Model</h6>
1013
+ <p class="small text-muted mb-2">
1014
+ Controls how text is converted to vectors for semantic search.
1015
+ Default: local model (768d). Custom: any OpenAI-compatible endpoint.
1016
+ </p>
1017
+ <div class="row g-2 mb-2">
1018
+ <div class="col-md-4">
1019
+ <label class="form-label small">Embedding Provider</label>
1020
+ <select class="form-select form-select-sm" id="settings-emb-provider">
1021
+ <option value="default">Default (Local Model)</option>
1022
+ <option value="openai">Custom Endpoint (OpenAI-compatible)</option>
1023
+ </select>
1024
+ </div>
1025
+ <div class="col-md-4" id="settings-emb-model-col" style="display:none;">
1026
+ <label class="form-label small">Model Name</label>
1027
+ <input type="text" id="settings-emb-model" class="form-control form-control-sm" placeholder="e.g. Qwen3-Embedding">
1028
+ </div>
1029
+ <div class="col-md-4" id="settings-emb-dim-col" style="display:none;">
1030
+ <label class="form-label small">Dimension</label>
1031
+ <input type="number" id="settings-emb-dimension" class="form-control form-control-sm" placeholder="e.g. 1024" min="64" max="8192">
1032
+ </div>
1033
+ </div>
1034
+ <div class="row g-2 mb-2" id="settings-emb-endpoint-row" style="display:none;">
1035
+ <div class="col-md-8">
1036
+ <label class="form-label small">Embedding Endpoint</label>
1037
+ <input type="text" id="settings-emb-endpoint" class="form-control form-control-sm" placeholder="http://localhost:8045/v1/embeddings">
1038
+ </div>
1039
+ <div class="col-md-4">
1040
+ <label class="form-label small">API Key (optional)</label>
1041
+ <input type="password" id="settings-emb-key" class="form-control form-control-sm" placeholder="not-needed">
1042
+ </div>
1043
+ </div>
1044
+ <div id="settings-emb-test-row" style="display:none;">
1045
+ <button class="btn btn-sm btn-outline-info" id="settings-emb-test-btn">
1046
+ <i class="bi bi-lightning"></i> Test Embedding
1047
+ </button>
1048
+ <span id="settings-emb-test-result" class="ms-2 small"></span>
1049
+ </div>
1050
+ <div id="settings-emb-info" class="small text-muted mt-1">
1051
+ Using local <strong>nomic-embed-text-v1.5</strong> (768d)
1052
+ </div>
1053
+ </div>
1054
+
1006
1055
  <!-- Save button -->
1007
- <div class="mt-2">
1056
+ <div class="mt-3">
1008
1057
  <button class="btn btn-primary" id="settings-save-all">
1009
1058
  <i class="bi bi-check-circle"></i> Save Configuration
1010
1059
  </button>