superlocalmemory 3.0.17 → 3.0.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/bin/slm-npm CHANGED
@@ -81,6 +81,14 @@ const result = spawnSync(pythonParts[0], [
81
81
  env: {
82
82
  ...process.env,
83
83
  PYTHONPATH: SRC_DIR + (process.env.PYTHONPATH ? path.delimiter + process.env.PYTHONPATH : ''),
84
+ // Prevent PyTorch Metal/MPS GPU memory reservation on Apple Silicon.
85
+ // Without these, macOS Activity Monitor shows 3-6 GB for a 40 MB process.
86
+ PYTORCH_MPS_HIGH_WATERMARK_RATIO: '0.0',
87
+ PYTORCH_MPS_MEM_LIMIT: '0',
88
+ PYTORCH_ENABLE_MPS_FALLBACK: '1',
89
+ TOKENIZERS_PARALLELISM: 'false',
90
+ TORCH_DEVICE: 'cpu',
91
+ CUDA_VISIBLE_DEVICES: '',
84
92
  },
85
93
  });
86
94
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "superlocalmemory",
3
- "version": "3.0.17",
3
+ "version": "3.0.18",
4
4
  "description": "Information-geometric agent memory with mathematical guarantees. 4-channel retrieval, Fisher-Rao similarity, zero-LLM mode, EU AI Act compliant. Works with Claude, Cursor, Windsurf, and 17+ AI tools.",
5
5
  "keywords": [
6
6
  "ai-memory",
package/pyproject.toml CHANGED
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "superlocalmemory"
3
- version = "3.0.17"
3
+ version = "3.0.18"
4
4
  description = "Information-geometric agent memory with mathematical guarantees"
5
5
  readme = "README.md"
6
6
  license = {text = "MIT"}
@@ -11,6 +11,16 @@ Part of Qualixar | Author: Varun Pratap Bhardwaj
11
11
 
12
12
  from __future__ import annotations
13
13
 
14
+ # CRITICAL: Set BEFORE any torch/transformers import to prevent Metal/MPS
15
+ # GPU memory reservation on Apple Silicon. Without this, macOS Activity
16
+ # Monitor shows 3-6 GB for what is actually a 40 MB process.
17
+ import os as _os
18
+ _os.environ.setdefault('PYTORCH_MPS_HIGH_WATERMARK_RATIO', '0.0')
19
+ _os.environ.setdefault('PYTORCH_MPS_MEM_LIMIT', '0')
20
+ _os.environ.setdefault('PYTORCH_ENABLE_MPS_FALLBACK', '1')
21
+ _os.environ.setdefault('TOKENIZERS_PARALLELISM', 'false')
22
+ _os.environ.setdefault('TORCH_DEVICE', 'cpu')
23
+
14
24
  import argparse
15
25
  import sys
16
26
 
@@ -0,0 +1,120 @@
1
+ # Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
2
+ # Licensed under the MIT License - see LICENSE file
3
+ # Part of SuperLocalMemory V3 | https://qualixar.com | https://varunpratap.com
4
+
5
+ """Subprocess embedding worker — isolates PyTorch memory from main process.
6
+
7
+ The main process (dashboard/MCP) stays at ~60 MB. All PyTorch/model memory
8
+ lives in this worker subprocess, which auto-kills after idle timeout.
9
+
10
+ Protocol (JSON over stdin/stdout):
11
+ Request: {"cmd": "embed", "texts": ["hello"]}
12
+ Response: {"ok": true, "vectors": [[0.1, ...]], "dim": 768}
13
+
14
+ Request: {"cmd": "ping"}
15
+ Response: {"ok": true}
16
+
17
+ Request: {"cmd": "quit"}
18
+ (worker exits)
19
+
20
+ Part of Qualixar | Author: Varun Pratap Bhardwaj
21
+ """
22
+
23
+ from __future__ import annotations
24
+
25
+ import json
26
+ import sys
27
+ import os
28
+
29
+ # Force CPU BEFORE any torch import
30
+ os.environ["CUDA_VISIBLE_DEVICES"] = ""
31
+ os.environ["PYTORCH_MPS_HIGH_WATERMARK_RATIO"] = "0.0"
32
+ os.environ["PYTORCH_MPS_MEM_LIMIT"] = "0"
33
+ os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1"
34
+ os.environ["TOKENIZERS_PARALLELISM"] = "false"
35
+ os.environ["TORCH_DEVICE"] = "cpu"
36
+
37
+
38
+ def _worker_main() -> None:
39
+ """Main loop: read JSON requests from stdin, write responses to stdout."""
40
+ import numpy as np
41
+
42
+ model = None
43
+ model_name = None
44
+ dim = 0
45
+
46
+ for line in sys.stdin:
47
+ line = line.strip()
48
+ if not line:
49
+ continue
50
+ try:
51
+ req = json.loads(line)
52
+ except json.JSONDecodeError:
53
+ _respond({"ok": False, "error": "Invalid JSON"})
54
+ continue
55
+
56
+ cmd = req.get("cmd", "")
57
+
58
+ if cmd == "quit":
59
+ break
60
+
61
+ if cmd == "ping":
62
+ _respond({"ok": True})
63
+ continue
64
+
65
+ if cmd == "load":
66
+ name = req.get("model_name", "nomic-ai/nomic-embed-text-v1.5")
67
+ expected_dim = req.get("dimension", 768)
68
+ try:
69
+ from sentence_transformers import SentenceTransformer
70
+ model = SentenceTransformer(name, trust_remote_code=True, device="cpu")
71
+ dim = model.get_sentence_embedding_dimension()
72
+ if dim != expected_dim:
73
+ _respond({"ok": False, "error": f"Dimension mismatch: {dim} != {expected_dim}"})
74
+ model = None
75
+ continue
76
+ model_name = name
77
+ _respond({"ok": True, "dim": dim, "model": name})
78
+ except Exception as exc:
79
+ _respond({"ok": False, "error": str(exc)})
80
+ continue
81
+
82
+ if cmd == "embed":
83
+ texts = req.get("texts", [])
84
+ if not texts:
85
+ _respond({"ok": False, "error": "No texts provided"})
86
+ continue
87
+ if model is None:
88
+ # Auto-load if not yet loaded
89
+ name = req.get("model_name", "nomic-ai/nomic-embed-text-v1.5")
90
+ expected_dim = req.get("dimension", 768)
91
+ try:
92
+ from sentence_transformers import SentenceTransformer
93
+ model = SentenceTransformer(name, trust_remote_code=True, device="cpu")
94
+ dim = model.get_sentence_embedding_dimension()
95
+ model_name = name
96
+ except Exception as exc:
97
+ _respond({"ok": False, "error": f"Model load failed: {exc}"})
98
+ continue
99
+ try:
100
+ vecs = model.encode(texts, normalize_embeddings=True)
101
+ if isinstance(vecs, np.ndarray) and vecs.ndim == 2:
102
+ result = [vecs[i].tolist() for i in range(vecs.shape[0])]
103
+ else:
104
+ result = [np.asarray(v, dtype=np.float32).tolist() for v in vecs]
105
+ _respond({"ok": True, "vectors": result, "dim": dim})
106
+ except Exception as exc:
107
+ _respond({"ok": False, "error": str(exc)})
108
+ continue
109
+
110
+ _respond({"ok": False, "error": f"Unknown command: {cmd}"})
111
+
112
+
113
+ def _respond(data: dict) -> None:
114
+ """Write JSON response to stdout, flush immediately."""
115
+ sys.stdout.write(json.dumps(data) + "\n")
116
+ sys.stdout.flush()
117
+
118
+
119
+ if __name__ == "__main__":
120
+ _worker_main()