superlocalmemory 3.4.23 → 3.4.24
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/pyproject.toml +1 -1
- package/src/superlocalmemory/__init__.py +1 -1
- package/src/superlocalmemory/core/config.py +66 -18
- package/src/superlocalmemory/core/embedding_worker.py +8 -27
- package/src/superlocalmemory/core/embeddings.py +83 -1
- package/src/superlocalmemory/core/engine_wiring.py +8 -0
- package/src/superlocalmemory/core/platform_utils.py +127 -0
- package/src/superlocalmemory/core/recall_worker.py +8 -24
- package/src/superlocalmemory/core/reranker_worker.py +8 -24
- package/src/superlocalmemory/core/worker_pool.py +2 -1
- package/src/superlocalmemory/retrieval/reranker.py +2 -1
- package/src/superlocalmemory/server/routes/v3_api.py +150 -8
- package/src/superlocalmemory/ui/index.html +46 -1
- package/src/superlocalmemory/ui/js/auto-settings.js +131 -5
- package/src/superlocalmemory.egg-info/PKG-INFO +0 -655
- package/src/superlocalmemory.egg-info/SOURCES.txt +0 -426
- package/src/superlocalmemory.egg-info/dependency_links.txt +0 -1
- package/src/superlocalmemory.egg-info/entry_points.txt +0 -2
- package/src/superlocalmemory.egg-info/requires.txt +0 -58
- package/src/superlocalmemory.egg-info/top_level.txt +0 -1
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "superlocalmemory",
|
|
3
|
-
"version": "3.4.
|
|
3
|
+
"version": "3.4.24",
|
|
4
4
|
"description": "Information-geometric agent memory with mathematical guarantees. 4-channel retrieval, Fisher-Rao similarity, zero-LLM mode, EU AI Act compliant. Works with Claude, Cursor, Windsurf, and 17+ AI tools.",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"ai-memory",
|
package/pyproject.toml
CHANGED
|
@@ -37,7 +37,8 @@ class EmbeddingConfig:
|
|
|
37
37
|
|
|
38
38
|
model_name: str = "nomic-ai/nomic-embed-text-v1.5"
|
|
39
39
|
dimension: int = 768
|
|
40
|
-
# Provider: "" = auto-detect, "sentence-transformers", "ollama", "cloud"
|
|
40
|
+
# Provider: "" = auto-detect, "sentence-transformers", "ollama", "cloud",
|
|
41
|
+
# "openai" (V3.4.24: any OpenAI-compatible /v1/embeddings endpoint)
|
|
41
42
|
provider: str = ""
|
|
42
43
|
# Ollama settings (used when provider="ollama" or auto-detected)
|
|
43
44
|
ollama_model: str = "nomic-embed-text"
|
|
@@ -50,12 +51,19 @@ class EmbeddingConfig:
|
|
|
50
51
|
|
|
51
52
|
@property
|
|
52
53
|
def is_cloud(self) -> bool:
|
|
54
|
+
if self.provider == "openai":
|
|
55
|
+
return False
|
|
53
56
|
return bool(self.api_endpoint) or self.provider == "cloud"
|
|
54
57
|
|
|
55
58
|
@property
|
|
56
59
|
def is_ollama(self) -> bool:
|
|
57
60
|
return self.provider == "ollama"
|
|
58
61
|
|
|
62
|
+
@property
|
|
63
|
+
def is_openai_compatible(self) -> bool:
|
|
64
|
+
"""V3.4.24: True when using a custom OpenAI-compatible endpoint."""
|
|
65
|
+
return self.provider == "openai" and bool(self.api_endpoint)
|
|
66
|
+
|
|
59
67
|
|
|
60
68
|
# ---------------------------------------------------------------------------
|
|
61
69
|
# LLM Config
|
|
@@ -639,6 +647,8 @@ class SLMConfig:
|
|
|
639
647
|
embedding_endpoint=emb_data.get("api_endpoint", ""),
|
|
640
648
|
embedding_key=emb_data.get("api_key", ""),
|
|
641
649
|
embedding_deployment=emb_data.get("deployment_name", ""),
|
|
650
|
+
embedding_model_name=emb_data.get("model_name", ""),
|
|
651
|
+
embedding_dimension=int(emb_data.get("dimension", 0) or 0),
|
|
642
652
|
)
|
|
643
653
|
config.active_profile = data.get("active_profile", "default")
|
|
644
654
|
|
|
@@ -787,20 +797,34 @@ class SLMConfig:
|
|
|
787
797
|
embedding_endpoint: str = "",
|
|
788
798
|
embedding_key: str = "",
|
|
789
799
|
embedding_deployment: str = "",
|
|
800
|
+
embedding_model_name: str = "",
|
|
801
|
+
embedding_dimension: int = 0,
|
|
790
802
|
) -> SLMConfig:
|
|
791
803
|
"""Create config with mode-appropriate defaults."""
|
|
792
804
|
_base = base_dir or DEFAULT_BASE_DIR
|
|
793
805
|
|
|
794
806
|
if mode == Mode.A:
|
|
807
|
+
# V3.4.24: If user chose "openai" provider, honour their custom
|
|
808
|
+
# endpoint/model/dimension. Otherwise use local defaults.
|
|
809
|
+
_a_provider = embedding_provider or "sentence-transformers"
|
|
810
|
+
if _a_provider == "openai" and embedding_endpoint:
|
|
811
|
+
_a_emb = EmbeddingConfig(
|
|
812
|
+
model_name=embedding_model_name or "nomic-ai/nomic-embed-text-v1.5",
|
|
813
|
+
dimension=embedding_dimension or 768,
|
|
814
|
+
provider="openai",
|
|
815
|
+
api_endpoint=embedding_endpoint,
|
|
816
|
+
api_key=embedding_key,
|
|
817
|
+
)
|
|
818
|
+
else:
|
|
819
|
+
_a_emb = EmbeddingConfig(
|
|
820
|
+
model_name="nomic-ai/nomic-embed-text-v1.5",
|
|
821
|
+
dimension=768,
|
|
822
|
+
provider=_a_provider,
|
|
823
|
+
)
|
|
795
824
|
return cls(
|
|
796
825
|
mode=mode,
|
|
797
826
|
base_dir=_base,
|
|
798
|
-
embedding=
|
|
799
|
-
model_name="nomic-ai/nomic-embed-text-v1.5",
|
|
800
|
-
dimension=768,
|
|
801
|
-
# Mode A: sentence-transformers in SUBPROCESS (never in-process)
|
|
802
|
-
provider=embedding_provider or "sentence-transformers",
|
|
803
|
-
),
|
|
827
|
+
embedding=_a_emb,
|
|
804
828
|
llm=LLMConfig(), # No LLM
|
|
805
829
|
retrieval=RetrievalConfig(
|
|
806
830
|
# V3.3.2: ONNX cross-encoder enabled for all modes (~200MB)
|
|
@@ -816,15 +840,27 @@ class SLMConfig:
|
|
|
816
840
|
)
|
|
817
841
|
|
|
818
842
|
if mode == Mode.B:
|
|
843
|
+
# V3.4.24: If user chose "openai" provider with a custom endpoint
|
|
844
|
+
# (e.g. local vLLM, LiteLLM, Ollama /v1), honour it.
|
|
845
|
+
_b_provider = embedding_provider or "ollama"
|
|
846
|
+
if _b_provider == "openai" and embedding_endpoint:
|
|
847
|
+
_b_emb = EmbeddingConfig(
|
|
848
|
+
model_name=embedding_model_name or "nomic-ai/nomic-embed-text-v1.5",
|
|
849
|
+
dimension=embedding_dimension or 768,
|
|
850
|
+
provider="openai",
|
|
851
|
+
api_endpoint=embedding_endpoint,
|
|
852
|
+
api_key=embedding_key,
|
|
853
|
+
)
|
|
854
|
+
else:
|
|
855
|
+
_b_emb = EmbeddingConfig(
|
|
856
|
+
model_name="nomic-ai/nomic-embed-text-v1.5",
|
|
857
|
+
dimension=768,
|
|
858
|
+
provider=_b_provider,
|
|
859
|
+
)
|
|
819
860
|
return cls(
|
|
820
861
|
mode=mode,
|
|
821
862
|
base_dir=_base,
|
|
822
|
-
embedding=
|
|
823
|
-
model_name="nomic-ai/nomic-embed-text-v1.5",
|
|
824
|
-
dimension=768,
|
|
825
|
-
# Mode B: Ollama HTTP API (zero PyTorch in-process)
|
|
826
|
-
provider=embedding_provider or "ollama",
|
|
827
|
-
),
|
|
863
|
+
embedding=_b_emb,
|
|
828
864
|
llm=LLMConfig(
|
|
829
865
|
provider=llm_provider or "ollama",
|
|
830
866
|
model=llm_model or "llama3.2",
|
|
@@ -841,16 +877,28 @@ class SLMConfig:
|
|
|
841
877
|
# Don't carry over local-only providers (ollama) to cloud mode
|
|
842
878
|
c_provider = llm_provider if llm_provider not in ("ollama", "") else "openrouter"
|
|
843
879
|
c_model = llm_model if llm_provider not in ("ollama", "") else "anthropic/claude-sonnet-4"
|
|
844
|
-
|
|
845
|
-
|
|
846
|
-
|
|
847
|
-
|
|
880
|
+
# V3.4.24: If user chose "openai" provider, honour it in Mode C too.
|
|
881
|
+
_c_emb_provider = embedding_provider or ""
|
|
882
|
+
if _c_emb_provider == "openai" and embedding_endpoint:
|
|
883
|
+
_c_emb = EmbeddingConfig(
|
|
884
|
+
model_name=embedding_model_name or "text-embedding-3-large",
|
|
885
|
+
dimension=embedding_dimension or 3072,
|
|
886
|
+
provider="openai",
|
|
887
|
+
api_endpoint=embedding_endpoint,
|
|
888
|
+
api_key=embedding_key,
|
|
889
|
+
)
|
|
890
|
+
else:
|
|
891
|
+
_c_emb = EmbeddingConfig(
|
|
848
892
|
model_name="text-embedding-3-large",
|
|
849
893
|
dimension=3072,
|
|
850
894
|
api_endpoint=embedding_endpoint,
|
|
851
895
|
api_key=embedding_key,
|
|
852
896
|
deployment_name=embedding_deployment,
|
|
853
|
-
)
|
|
897
|
+
)
|
|
898
|
+
return cls(
|
|
899
|
+
mode=mode,
|
|
900
|
+
base_dir=_base,
|
|
901
|
+
embedding=_c_emb,
|
|
854
902
|
llm=LLMConfig(
|
|
855
903
|
provider=c_provider,
|
|
856
904
|
model=c_model,
|
|
@@ -26,7 +26,6 @@ import json
|
|
|
26
26
|
import os
|
|
27
27
|
import signal
|
|
28
28
|
import sys
|
|
29
|
-
import threading
|
|
30
29
|
|
|
31
30
|
# Force CPU BEFORE any torch import
|
|
32
31
|
os.environ["CUDA_VISIBLE_DEVICES"] = ""
|
|
@@ -47,24 +46,10 @@ if sys.platform != "win32":
|
|
|
47
46
|
def _start_parent_watchdog() -> None:
|
|
48
47
|
"""Monitor parent process — self-terminate if parent dies.
|
|
49
48
|
|
|
50
|
-
|
|
51
|
-
process crashes, is killed, or exits without cleanup.
|
|
52
|
-
|
|
53
|
-
V3.3.7: Added after incident where orphaned workers consumed 33 GB.
|
|
49
|
+
V3.4.24: Delegates to platform_utils.start_parent_watchdog().
|
|
54
50
|
"""
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
def _watch() -> None:
|
|
58
|
-
import time
|
|
59
|
-
while True:
|
|
60
|
-
time.sleep(5)
|
|
61
|
-
try:
|
|
62
|
-
os.kill(parent_pid, 0)
|
|
63
|
-
except OSError:
|
|
64
|
-
os._exit(0)
|
|
65
|
-
|
|
66
|
-
t = threading.Thread(target=_watch, daemon=True, name="parent-watchdog")
|
|
67
|
-
t.start()
|
|
51
|
+
from superlocalmemory.core.platform_utils import start_parent_watchdog
|
|
52
|
+
start_parent_watchdog()
|
|
68
53
|
|
|
69
54
|
|
|
70
55
|
def _load_embedding_model(name: str) -> tuple:
|
|
@@ -97,9 +82,10 @@ def _load_embedding_model(name: str) -> tuple:
|
|
|
97
82
|
|
|
98
83
|
def _worker_main() -> None:
|
|
99
84
|
"""Main loop: read JSON requests from stdin, write responses to stdout."""
|
|
100
|
-
_start_parent_watchdog()
|
|
85
|
+
_start_parent_watchdog()
|
|
101
86
|
|
|
102
87
|
import numpy as np
|
|
88
|
+
from superlocalmemory.core.platform_utils import get_rss_mb
|
|
103
89
|
|
|
104
90
|
model = None
|
|
105
91
|
model_name = None
|
|
@@ -164,15 +150,10 @@ def _worker_main() -> None:
|
|
|
164
150
|
except Exception as exc:
|
|
165
151
|
_respond({"ok": False, "error": str(exc)})
|
|
166
152
|
|
|
167
|
-
# V3.3.16: RSS watchdog —
|
|
168
|
-
# PyTorch on ARM64 Mac never returns memory to OS. After ~200 embeds
|
|
169
|
-
# a worker that started at 300MB grows to 17GB+. Parent auto-respawns
|
|
170
|
-
# a fresh worker on next request (existing mechanism in embeddings.py).
|
|
171
|
-
# V3.3.21: Configurable via SLM_EMBED_WORKER_RSS_LIMIT_MB (default 2500MB).
|
|
172
|
-
import resource
|
|
153
|
+
# V3.3.16: RSS watchdog — V3.4.24: cross-platform via platform_utils.
|
|
173
154
|
_rss_limit = int(os.environ.get("SLM_EMBED_WORKER_RSS_LIMIT_MB", 4000))
|
|
174
|
-
rss_mb =
|
|
175
|
-
if rss_mb > _rss_limit:
|
|
155
|
+
rss_mb = get_rss_mb()
|
|
156
|
+
if rss_mb > 0 and rss_mb > _rss_limit:
|
|
176
157
|
sys.exit(0)
|
|
177
158
|
|
|
178
159
|
continue
|
|
@@ -178,6 +178,7 @@ class EmbeddingService:
|
|
|
178
178
|
self._idle_timer: threading.Timer | None = None
|
|
179
179
|
self._worker_ready = False
|
|
180
180
|
self._request_count: int = 0
|
|
181
|
+
self._http_client: object | None = None
|
|
181
182
|
|
|
182
183
|
# Register for atexit cleanup (prevent orphaned workers)
|
|
183
184
|
ref = weakref.ref(self, _live_embedding_services.discard)
|
|
@@ -189,10 +190,17 @@ class EmbeddingService:
|
|
|
189
190
|
self._kill_worker()
|
|
190
191
|
except Exception:
|
|
191
192
|
pass
|
|
193
|
+
try:
|
|
194
|
+
if self._http_client is not None:
|
|
195
|
+
self._http_client.close()
|
|
196
|
+
except Exception:
|
|
197
|
+
pass
|
|
192
198
|
|
|
193
199
|
@property
|
|
194
200
|
def is_available(self) -> bool:
|
|
195
201
|
"""Check if embedding service can produce embeddings."""
|
|
202
|
+
if self._config.is_openai_compatible:
|
|
203
|
+
return bool(self._config.api_endpoint)
|
|
196
204
|
if self._config.is_cloud:
|
|
197
205
|
return bool(self._config.api_endpoint and self._config.api_key)
|
|
198
206
|
return self._available
|
|
@@ -215,6 +223,11 @@ class EmbeddingService:
|
|
|
215
223
|
"""Embed a single text string. Returns list of floats or None."""
|
|
216
224
|
if not text or not text.strip():
|
|
217
225
|
raise ValueError("Cannot embed empty text")
|
|
226
|
+
if self._config.is_openai_compatible:
|
|
227
|
+
vecs = self._openai_compatible_embed_batch([text])
|
|
228
|
+
vec = vecs[0]
|
|
229
|
+
self._validate_dimension(np.asarray(vec))
|
|
230
|
+
return vec
|
|
218
231
|
if self._config.is_cloud:
|
|
219
232
|
return self._cloud_embed_single(text)
|
|
220
233
|
result = self._subprocess_embed([text])
|
|
@@ -228,6 +241,12 @@ class EmbeddingService:
|
|
|
228
241
|
"""Embed a batch of texts."""
|
|
229
242
|
if not texts:
|
|
230
243
|
raise ValueError("Cannot embed empty batch")
|
|
244
|
+
if self._config.is_openai_compatible:
|
|
245
|
+
results = self._openai_compatible_embed_batch(texts)
|
|
246
|
+
for vec in results:
|
|
247
|
+
if vec is not None:
|
|
248
|
+
self._validate_dimension(np.asarray(vec))
|
|
249
|
+
return results
|
|
231
250
|
if self._config.is_cloud:
|
|
232
251
|
return self._cloud_embed_batch(texts)
|
|
233
252
|
result = self._subprocess_embed(texts)
|
|
@@ -458,6 +477,7 @@ class EmbeddingService:
|
|
|
458
477
|
"TOKENIZERS_PARALLELISM": "false",
|
|
459
478
|
"TORCH_DEVICE": "cpu",
|
|
460
479
|
}
|
|
480
|
+
from superlocalmemory.core.platform_utils import popen_platform_kwargs
|
|
461
481
|
self._worker_proc = subprocess.Popen(
|
|
462
482
|
[sys.executable, "-m", worker_module],
|
|
463
483
|
stdin=subprocess.PIPE,
|
|
@@ -466,7 +486,7 @@ class EmbeddingService:
|
|
|
466
486
|
text=True,
|
|
467
487
|
bufsize=1,
|
|
468
488
|
env=env,
|
|
469
|
-
|
|
489
|
+
**popen_platform_kwargs(),
|
|
470
490
|
)
|
|
471
491
|
# v3.4.13: Register PID for machine-wide singleton guard
|
|
472
492
|
register_embedding_worker_pid(self._worker_proc.pid)
|
|
@@ -511,6 +531,68 @@ class EmbeddingService:
|
|
|
511
531
|
self._idle_timer.start()
|
|
512
532
|
self._last_used = time.time()
|
|
513
533
|
|
|
534
|
+
# ------------------------------------------------------------------
|
|
535
|
+
# OpenAI-compatible embedding (V3.4.24 — any /v1/embeddings endpoint)
|
|
536
|
+
# ------------------------------------------------------------------
|
|
537
|
+
|
|
538
|
+
def _get_http_client(self):
|
|
539
|
+
"""Reusable httpx client for OpenAI-compatible endpoints."""
|
|
540
|
+
if self._http_client is None:
|
|
541
|
+
import httpx
|
|
542
|
+
self._http_client = httpx.Client(
|
|
543
|
+
timeout=httpx.Timeout(connect=5.0, read=30.0, write=10.0, pool=5.0),
|
|
544
|
+
)
|
|
545
|
+
return self._http_client
|
|
546
|
+
|
|
547
|
+
def _openai_compatible_embed_batch(
|
|
548
|
+
self, texts: list[str], *, max_retries: int = 3,
|
|
549
|
+
) -> list[list[float]]:
|
|
550
|
+
"""Encode via any OpenAI-compatible embedding API.
|
|
551
|
+
|
|
552
|
+
V3.4.24: Standard ``/v1/embeddings`` format. Works with Ollama,
|
|
553
|
+
vLLM, LiteLLM, text-embeddings-inference, and any endpoint that
|
|
554
|
+
implements the OpenAI embeddings spec.
|
|
555
|
+
"""
|
|
556
|
+
endpoint = self._config.api_endpoint.rstrip("/")
|
|
557
|
+
if not endpoint.endswith("/embeddings"):
|
|
558
|
+
endpoint = f"{endpoint}/embeddings"
|
|
559
|
+
headers = {"Content-Type": "application/json"}
|
|
560
|
+
if self._config.api_key:
|
|
561
|
+
headers["Authorization"] = f"Bearer {self._config.api_key}"
|
|
562
|
+
body = {
|
|
563
|
+
"input": texts,
|
|
564
|
+
"model": self._config.model_name,
|
|
565
|
+
}
|
|
566
|
+
|
|
567
|
+
client = self._get_http_client()
|
|
568
|
+
last_error: Exception | None = None
|
|
569
|
+
for attempt in range(max_retries):
|
|
570
|
+
try:
|
|
571
|
+
resp = client.post(endpoint, headers=headers, json=body)
|
|
572
|
+
resp.raise_for_status()
|
|
573
|
+
data = resp.json()
|
|
574
|
+
if "data" not in data or not isinstance(data["data"], list):
|
|
575
|
+
raise ValueError(
|
|
576
|
+
f"Unexpected response: missing 'data' array. Keys: {list(data.keys())}"
|
|
577
|
+
)
|
|
578
|
+
results: list[list[float]] = []
|
|
579
|
+
for item in sorted(data["data"], key=lambda d: d["index"]):
|
|
580
|
+
results.append(item["embedding"])
|
|
581
|
+
if len(results) != len(texts):
|
|
582
|
+
logger.warning(
|
|
583
|
+
"Embedding count mismatch: sent %d texts, got %d vectors",
|
|
584
|
+
len(texts), len(results),
|
|
585
|
+
)
|
|
586
|
+
return results
|
|
587
|
+
except Exception as exc:
|
|
588
|
+
last_error = exc
|
|
589
|
+
if attempt < max_retries - 1:
|
|
590
|
+
time.sleep(2 ** attempt)
|
|
591
|
+
raise RuntimeError(
|
|
592
|
+
f"OpenAI-compatible embedding failed after {max_retries} retries: "
|
|
593
|
+
f"{last_error}"
|
|
594
|
+
)
|
|
595
|
+
|
|
514
596
|
# ------------------------------------------------------------------
|
|
515
597
|
# Cloud embedding (no subprocess needed — just HTTP)
|
|
516
598
|
# ------------------------------------------------------------------
|
|
@@ -113,6 +113,14 @@ def init_embedder(config: SLMConfig) -> Any | None:
|
|
|
113
113
|
return result
|
|
114
114
|
return None
|
|
115
115
|
|
|
116
|
+
# --- V3.4.24: Explicit OpenAI-compatible provider ---
|
|
117
|
+
if provider == "openai" and emb_cfg.is_openai_compatible:
|
|
118
|
+
logger.info(
|
|
119
|
+
"Using OpenAI-compatible embedding endpoint: %s (model=%s, dim=%d)",
|
|
120
|
+
emb_cfg.api_endpoint, emb_cfg.model_name, emb_cfg.dimension,
|
|
121
|
+
)
|
|
122
|
+
return _try_service_embedder(EmbeddingService, emb_cfg)
|
|
123
|
+
|
|
116
124
|
# --- Explicit cloud provider ---
|
|
117
125
|
if provider == "cloud" or emb_cfg.is_cloud:
|
|
118
126
|
return _try_service_embedder(EmbeddingService, emb_cfg)
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
# Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
|
|
2
|
+
# Licensed under AGPL-3.0-or-later - see LICENSE file
|
|
3
|
+
# Part of SuperLocalMemory V3 | https://qualixar.com | https://varunpratap.com
|
|
4
|
+
|
|
5
|
+
"""Cross-platform utilities for subprocess management and resource monitoring.
|
|
6
|
+
|
|
7
|
+
V3.4.24: Consolidates Windows/POSIX branching from 10+ files into one module.
|
|
8
|
+
Replaces the Unix-only ``resource`` module with ``psutil`` on Windows.
|
|
9
|
+
Inspired by community PR #14 (GuillaumeG / Tyrin451).
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
import os
|
|
15
|
+
import subprocess
|
|
16
|
+
import sys
|
|
17
|
+
import threading
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def popen_platform_kwargs() -> dict:
|
|
21
|
+
"""Platform-appropriate kwargs for subprocess.Popen.
|
|
22
|
+
|
|
23
|
+
POSIX: ``start_new_session=True`` — prevents terminal signals bleeding.
|
|
24
|
+
Windows: ``CREATE_NO_WINDOW`` — prevents console window popup.
|
|
25
|
+
"""
|
|
26
|
+
if sys.platform == "win32":
|
|
27
|
+
# CREATE_NO_WINDOW = 0x08000000 — only defined on Windows.
|
|
28
|
+
flag = getattr(subprocess, "CREATE_NO_WINDOW", 0x08000000)
|
|
29
|
+
return {"creationflags": flag}
|
|
30
|
+
return {"start_new_session": True}
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def get_rss_mb() -> float:
|
|
34
|
+
"""Current process RSS in megabytes.
|
|
35
|
+
|
|
36
|
+
POSIX: ``resource.getrusage`` (stdlib). Windows: ``psutil``.
|
|
37
|
+
Returns 0.0 if measurement is unavailable.
|
|
38
|
+
"""
|
|
39
|
+
if sys.platform != "win32":
|
|
40
|
+
try:
|
|
41
|
+
import resource
|
|
42
|
+
ru_maxrss = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
|
|
43
|
+
if sys.platform == "darwin":
|
|
44
|
+
return ru_maxrss / 1024 / 1024 # macOS: bytes
|
|
45
|
+
return ru_maxrss / 1024 # Linux: kilobytes
|
|
46
|
+
except Exception:
|
|
47
|
+
return 0.0
|
|
48
|
+
try:
|
|
49
|
+
import psutil
|
|
50
|
+
return psutil.Process().memory_info().rss / 1024 / 1024
|
|
51
|
+
except Exception:
|
|
52
|
+
return 0.0
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def is_pid_alive(pid: int) -> bool:
|
|
56
|
+
"""Check whether a process with *pid* is alive.
|
|
57
|
+
|
|
58
|
+
POSIX: ``os.kill(pid, 0)`` — signal 0 checks existence.
|
|
59
|
+
Windows: ``psutil.pid_exists()`` with ``os.kill`` fallback.
|
|
60
|
+
"""
|
|
61
|
+
if pid <= 0:
|
|
62
|
+
return False
|
|
63
|
+
if sys.platform != "win32":
|
|
64
|
+
try:
|
|
65
|
+
os.kill(pid, 0)
|
|
66
|
+
return True
|
|
67
|
+
except OSError:
|
|
68
|
+
return False
|
|
69
|
+
try:
|
|
70
|
+
import psutil
|
|
71
|
+
return psutil.pid_exists(pid)
|
|
72
|
+
except ImportError:
|
|
73
|
+
try:
|
|
74
|
+
os.kill(pid, 0)
|
|
75
|
+
return True
|
|
76
|
+
except OSError:
|
|
77
|
+
return False
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def kill_process(pid: int) -> bool:
|
|
81
|
+
"""Send SIGTERM (POSIX) or taskkill /F /T (Windows).
|
|
82
|
+
|
|
83
|
+
Returns True if the signal was sent successfully.
|
|
84
|
+
"""
|
|
85
|
+
if pid <= 0:
|
|
86
|
+
return False
|
|
87
|
+
if sys.platform == "win32":
|
|
88
|
+
try:
|
|
89
|
+
subprocess.call(
|
|
90
|
+
["taskkill", "/F", "/T", "/PID", str(pid)],
|
|
91
|
+
stdout=subprocess.DEVNULL,
|
|
92
|
+
stderr=subprocess.DEVNULL,
|
|
93
|
+
)
|
|
94
|
+
return True
|
|
95
|
+
except Exception:
|
|
96
|
+
return False
|
|
97
|
+
try:
|
|
98
|
+
import signal
|
|
99
|
+
os.kill(pid, signal.SIGTERM)
|
|
100
|
+
return True
|
|
101
|
+
except OSError:
|
|
102
|
+
return False
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def start_parent_watchdog() -> None:
|
|
106
|
+
"""Self-terminate when the parent process dies.
|
|
107
|
+
|
|
108
|
+
Prevents orphaned workers (500+ MB each) after parent crash/kill.
|
|
109
|
+
V3.3.7 origin: 33 GB consumed by orphaned workers.
|
|
110
|
+
V3.4.24: Consolidated from 3 separate worker files.
|
|
111
|
+
"""
|
|
112
|
+
try:
|
|
113
|
+
parent_pid = os.getppid()
|
|
114
|
+
except AttributeError:
|
|
115
|
+
return
|
|
116
|
+
if parent_pid <= 1:
|
|
117
|
+
return
|
|
118
|
+
|
|
119
|
+
def _watch() -> None:
|
|
120
|
+
import time
|
|
121
|
+
while True:
|
|
122
|
+
time.sleep(5)
|
|
123
|
+
if not is_pid_alive(parent_pid):
|
|
124
|
+
os._exit(0)
|
|
125
|
+
|
|
126
|
+
t = threading.Thread(target=_watch, daemon=True, name="parent-watchdog")
|
|
127
|
+
t.start()
|
|
@@ -20,7 +20,6 @@ import json
|
|
|
20
20
|
import os
|
|
21
21
|
import signal
|
|
22
22
|
import sys
|
|
23
|
-
import threading
|
|
24
23
|
|
|
25
24
|
# Force CPU BEFORE any torch import
|
|
26
25
|
os.environ["CUDA_VISIBLE_DEVICES"] = ""
|
|
@@ -39,24 +38,10 @@ if sys.platform != "win32":
|
|
|
39
38
|
def _start_parent_watchdog() -> None:
|
|
40
39
|
"""Monitor parent process — self-terminate if parent dies.
|
|
41
40
|
|
|
42
|
-
|
|
43
|
-
process crashes, is killed, or exits without cleanup.
|
|
44
|
-
|
|
45
|
-
V3.3.7: Added after incident where orphaned workers consumed 33 GB.
|
|
41
|
+
V3.4.24: Delegates to platform_utils.start_parent_watchdog().
|
|
46
42
|
"""
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
def _watch() -> None:
|
|
50
|
-
import time
|
|
51
|
-
while True:
|
|
52
|
-
time.sleep(5)
|
|
53
|
-
try:
|
|
54
|
-
os.kill(parent_pid, 0)
|
|
55
|
-
except OSError:
|
|
56
|
-
os._exit(0)
|
|
57
|
-
|
|
58
|
-
t = threading.Thread(target=_watch, daemon=True, name="parent-watchdog")
|
|
59
|
-
t.start()
|
|
43
|
+
from superlocalmemory.core.platform_utils import start_parent_watchdog
|
|
44
|
+
start_parent_watchdog()
|
|
60
45
|
|
|
61
46
|
_engine = None
|
|
62
47
|
|
|
@@ -253,7 +238,8 @@ def _handle_status() -> dict:
|
|
|
253
238
|
|
|
254
239
|
def _worker_main() -> None:
|
|
255
240
|
"""Main loop: read JSON requests from stdin, write responses to stdout."""
|
|
256
|
-
_start_parent_watchdog()
|
|
241
|
+
_start_parent_watchdog()
|
|
242
|
+
from superlocalmemory.core.platform_utils import get_rss_mb
|
|
257
243
|
|
|
258
244
|
for line in sys.stdin:
|
|
259
245
|
line = line.strip()
|
|
@@ -326,11 +312,9 @@ def _worker_main() -> None:
|
|
|
326
312
|
except Exception as exc:
|
|
327
313
|
_respond({"ok": False, "error": str(exc)})
|
|
328
314
|
|
|
329
|
-
# V3.3.16: RSS watchdog —
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
rss_mb = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024 / 1024
|
|
333
|
-
if rss_mb > 2500:
|
|
315
|
+
# V3.3.16: RSS watchdog — V3.4.24: cross-platform via platform_utils.
|
|
316
|
+
rss_mb = get_rss_mb()
|
|
317
|
+
if rss_mb > 0 and rss_mb > 2500:
|
|
334
318
|
sys.exit(0)
|
|
335
319
|
|
|
336
320
|
|
|
@@ -31,7 +31,6 @@ import platform
|
|
|
31
31
|
import signal
|
|
32
32
|
import struct
|
|
33
33
|
import sys
|
|
34
|
-
import threading
|
|
35
34
|
|
|
36
35
|
# Force CPU BEFORE any torch import
|
|
37
36
|
os.environ["CUDA_VISIBLE_DEVICES"] = ""
|
|
@@ -52,25 +51,10 @@ if sys.platform != "win32":
|
|
|
52
51
|
def _start_parent_watchdog() -> None:
|
|
53
52
|
"""Monitor parent process — self-terminate if parent dies.
|
|
54
53
|
|
|
55
|
-
|
|
56
|
-
process crashes, is killed, or exits without cleanup.
|
|
57
|
-
|
|
58
|
-
V3.3.7: Added after incident where ~30 orphaned workers consumed 33 GB.
|
|
54
|
+
V3.4.24: Delegates to platform_utils.start_parent_watchdog().
|
|
59
55
|
"""
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
def _watch() -> None:
|
|
63
|
-
import time
|
|
64
|
-
while True:
|
|
65
|
-
time.sleep(5)
|
|
66
|
-
try:
|
|
67
|
-
os.kill(parent_pid, 0) # Check if parent is alive (signal 0)
|
|
68
|
-
except OSError:
|
|
69
|
-
# Parent is dead — self-terminate
|
|
70
|
-
os._exit(0)
|
|
71
|
-
|
|
72
|
-
t = threading.Thread(target=_watch, daemon=True, name="parent-watchdog")
|
|
73
|
-
t.start()
|
|
56
|
+
from superlocalmemory.core.platform_utils import start_parent_watchdog
|
|
57
|
+
start_parent_watchdog()
|
|
74
58
|
|
|
75
59
|
|
|
76
60
|
def _detect_onnx_variant(model_name: str = "") -> str:
|
|
@@ -101,7 +85,8 @@ def _detect_onnx_variant(model_name: str = "") -> str:
|
|
|
101
85
|
|
|
102
86
|
def _worker_main() -> None:
|
|
103
87
|
"""Main loop: read JSON requests from stdin, write responses to stdout."""
|
|
104
|
-
_start_parent_watchdog()
|
|
88
|
+
_start_parent_watchdog()
|
|
89
|
+
from superlocalmemory.core.platform_utils import get_rss_mb
|
|
105
90
|
|
|
106
91
|
model = None
|
|
107
92
|
active_backend = ""
|
|
@@ -194,10 +179,9 @@ def _worker_main() -> None:
|
|
|
194
179
|
except Exception as exc:
|
|
195
180
|
_respond({"ok": False, "error": str(exc)})
|
|
196
181
|
|
|
197
|
-
# V3.3.16: RSS watchdog —
|
|
198
|
-
|
|
199
|
-
rss_mb
|
|
200
|
-
if rss_mb > 2500:
|
|
182
|
+
# V3.3.16: RSS watchdog — V3.4.24: cross-platform via platform_utils.
|
|
183
|
+
rss_mb = get_rss_mb()
|
|
184
|
+
if rss_mb > 0 and rss_mb > 2500:
|
|
201
185
|
sys.exit(0)
|
|
202
186
|
|
|
203
187
|
continue
|
|
@@ -247,6 +247,7 @@ class WorkerPool:
|
|
|
247
247
|
"TOKENIZERS_PARALLELISM": "false",
|
|
248
248
|
"TORCH_DEVICE": "cpu",
|
|
249
249
|
}
|
|
250
|
+
from superlocalmemory.core.platform_utils import popen_platform_kwargs
|
|
250
251
|
self._proc = subprocess.Popen(
|
|
251
252
|
[sys.executable, "-m", "superlocalmemory.core.recall_worker"],
|
|
252
253
|
stdin=subprocess.PIPE,
|
|
@@ -255,7 +256,7 @@ class WorkerPool:
|
|
|
255
256
|
text=True,
|
|
256
257
|
bufsize=1,
|
|
257
258
|
env=env,
|
|
258
|
-
|
|
259
|
+
**popen_platform_kwargs(),
|
|
259
260
|
)
|
|
260
261
|
logger.info("Recall worker spawned (PID %d)", self._proc.pid)
|
|
261
262
|
except Exception as exc:
|
|
@@ -193,6 +193,7 @@ class CrossEncoderReranker:
|
|
|
193
193
|
"TOKENIZERS_PARALLELISM": "false",
|
|
194
194
|
"TORCH_DEVICE": "cpu",
|
|
195
195
|
}
|
|
196
|
+
from superlocalmemory.core.platform_utils import popen_platform_kwargs
|
|
196
197
|
self._worker_proc = subprocess.Popen(
|
|
197
198
|
[sys.executable, "-m", worker_module],
|
|
198
199
|
stdin=subprocess.PIPE,
|
|
@@ -201,7 +202,7 @@ class CrossEncoderReranker:
|
|
|
201
202
|
text=True,
|
|
202
203
|
bufsize=1,
|
|
203
204
|
env=env,
|
|
204
|
-
|
|
205
|
+
**popen_platform_kwargs(),
|
|
205
206
|
)
|
|
206
207
|
# v3.4.13: Register PID for machine-wide singleton
|
|
207
208
|
_RERANKER_PID_FILE.parent.mkdir(parents=True, exist_ok=True)
|