superlocalmemory 3.0.16 → 3.0.18
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/slm-npm +8 -0
- package/package.json +1 -1
- package/pyproject.toml +1 -1
- package/src/superlocalmemory/cli/commands.py +29 -0
- package/src/superlocalmemory/cli/main.py +94 -30
- package/src/superlocalmemory/core/embedding_worker.py +120 -0
- package/src/superlocalmemory/core/embeddings.py +156 -240
- package/src/superlocalmemory/core/recall_worker.py +193 -0
- package/src/superlocalmemory/core/summarizer.py +182 -0
- package/src/superlocalmemory/core/worker_pool.py +209 -0
- package/src/superlocalmemory/mcp/server.py +9 -0
- package/src/superlocalmemory/mcp/tools_core.py +21 -8
- package/src/superlocalmemory/mcp/tools_v3.py +21 -0
- package/src/superlocalmemory/server/routes/helpers.py +21 -0
- package/src/superlocalmemory/server/routes/memories.py +100 -42
- package/src/superlocalmemory/server/routes/stats.py +11 -0
- package/src/superlocalmemory/server/routes/v3_api.py +195 -43
- package/src/superlocalmemory/server/ui.py +15 -14
- package/src/superlocalmemory/storage/database.py +23 -0
- package/src/superlocalmemory.egg-info/PKG-INFO +1 -1
- package/src/superlocalmemory.egg-info/SOURCES.txt +4 -0
- package/ui/index.html +113 -29
- package/ui/js/auto-settings.js +330 -1
- package/ui/js/clusters.js +138 -101
- package/ui/js/graph-core.js +3 -1
- package/ui/js/graph-interactions.js +2 -5
- package/ui/js/memories.js +65 -2
- package/ui/js/modal.js +79 -42
- package/ui/js/recall-lab.js +206 -60
package/bin/slm-npm
CHANGED
|
@@ -81,6 +81,14 @@ const result = spawnSync(pythonParts[0], [
|
|
|
81
81
|
env: {
|
|
82
82
|
...process.env,
|
|
83
83
|
PYTHONPATH: SRC_DIR + (process.env.PYTHONPATH ? path.delimiter + process.env.PYTHONPATH : ''),
|
|
84
|
+
// Prevent PyTorch Metal/MPS GPU memory reservation on Apple Silicon.
|
|
85
|
+
// Without these, macOS Activity Monitor shows 3-6 GB for a 40 MB process.
|
|
86
|
+
PYTORCH_MPS_HIGH_WATERMARK_RATIO: '0.0',
|
|
87
|
+
PYTORCH_MPS_MEM_LIMIT: '0',
|
|
88
|
+
PYTORCH_ENABLE_MPS_FALLBACK: '1',
|
|
89
|
+
TOKENIZERS_PARALLELISM: 'false',
|
|
90
|
+
TORCH_DEVICE: 'cpu',
|
|
91
|
+
CUDA_VISIBLE_DEVICES: '',
|
|
84
92
|
},
|
|
85
93
|
});
|
|
86
94
|
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "superlocalmemory",
|
|
3
|
-
"version": "3.0.
|
|
3
|
+
"version": "3.0.18",
|
|
4
4
|
"description": "Information-geometric agent memory with mathematical guarantees. 4-channel retrieval, Fisher-Rao similarity, zero-LLM mode, EU AI Act compliant. Works with Claude, Cursor, Windsurf, and 17+ AI tools.",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"ai-memory",
|
package/pyproject.toml
CHANGED
|
@@ -23,6 +23,7 @@ def dispatch(args: Namespace) -> None:
|
|
|
23
23
|
"provider": cmd_provider,
|
|
24
24
|
"connect": cmd_connect,
|
|
25
25
|
"migrate": cmd_migrate,
|
|
26
|
+
"list": cmd_list,
|
|
26
27
|
"remember": cmd_remember,
|
|
27
28
|
"recall": cmd_recall,
|
|
28
29
|
"forget": cmd_forget,
|
|
@@ -113,6 +114,34 @@ def cmd_migrate(args: Namespace) -> None:
|
|
|
113
114
|
_migrate(args)
|
|
114
115
|
|
|
115
116
|
|
|
117
|
+
def cmd_list(args: Namespace) -> None:
|
|
118
|
+
"""List recent memories chronologically."""
|
|
119
|
+
from superlocalmemory.core.config import SLMConfig
|
|
120
|
+
from superlocalmemory.core.engine import MemoryEngine
|
|
121
|
+
|
|
122
|
+
config = SLMConfig.load()
|
|
123
|
+
engine = MemoryEngine(config)
|
|
124
|
+
engine.initialize()
|
|
125
|
+
|
|
126
|
+
limit = getattr(args, "limit", 20)
|
|
127
|
+
facts = engine._db.get_all_facts(engine.profile_id)
|
|
128
|
+
# Sort by created_at descending, take limit
|
|
129
|
+
facts.sort(key=lambda f: f.created_at or "", reverse=True)
|
|
130
|
+
facts = facts[:limit]
|
|
131
|
+
|
|
132
|
+
if not facts:
|
|
133
|
+
print("No memories stored yet.")
|
|
134
|
+
return
|
|
135
|
+
|
|
136
|
+
print(f"Recent memories ({len(facts)}):\n")
|
|
137
|
+
for i, f in enumerate(facts, 1):
|
|
138
|
+
date = (f.created_at or "")[:19]
|
|
139
|
+
ftype_raw = getattr(f, "fact_type", "")
|
|
140
|
+
ftype = ftype_raw.value if hasattr(ftype_raw, "value") else str(ftype_raw)
|
|
141
|
+
content = f.content[:100] + ("..." if len(f.content) > 100 else "")
|
|
142
|
+
print(f" {i:3d}. [{date}] ({ftype}) {content}")
|
|
143
|
+
|
|
144
|
+
|
|
116
145
|
def cmd_remember(args: Namespace) -> None:
|
|
117
146
|
"""Store a memory via the engine."""
|
|
118
147
|
from superlocalmemory.core.config import SLMConfig
|
|
@@ -11,76 +11,140 @@ Part of Qualixar | Author: Varun Pratap Bhardwaj
|
|
|
11
11
|
|
|
12
12
|
from __future__ import annotations
|
|
13
13
|
|
|
14
|
+
# CRITICAL: Set BEFORE any torch/transformers import to prevent Metal/MPS
|
|
15
|
+
# GPU memory reservation on Apple Silicon. Without this, macOS Activity
|
|
16
|
+
# Monitor shows 3-6 GB for what is actually a 40 MB process.
|
|
17
|
+
import os as _os
|
|
18
|
+
_os.environ.setdefault('PYTORCH_MPS_HIGH_WATERMARK_RATIO', '0.0')
|
|
19
|
+
_os.environ.setdefault('PYTORCH_MPS_MEM_LIMIT', '0')
|
|
20
|
+
_os.environ.setdefault('PYTORCH_ENABLE_MPS_FALLBACK', '1')
|
|
21
|
+
_os.environ.setdefault('TOKENIZERS_PARALLELISM', 'false')
|
|
22
|
+
_os.environ.setdefault('TORCH_DEVICE', 'cpu')
|
|
23
|
+
|
|
14
24
|
import argparse
|
|
15
25
|
import sys
|
|
16
26
|
|
|
27
|
+
_HELP_EPILOG = """\
|
|
28
|
+
operating modes:
|
|
29
|
+
Mode A Local Guardian — Zero cloud, zero LLM. All processing stays on
|
|
30
|
+
your machine. Full EU AI Act compliance. Best for privacy-first
|
|
31
|
+
use, air-gapped systems, and regulated environments.
|
|
32
|
+
Retrieval score: 74.8% on LoCoMo benchmark.
|
|
33
|
+
|
|
34
|
+
Mode B Smart Local — Uses a local Ollama LLM for summarization and
|
|
35
|
+
enrichment. Data never leaves your network. EU AI Act compliant.
|
|
36
|
+
Requires: ollama running locally with a model pulled.
|
|
37
|
+
|
|
38
|
+
Mode C Full Power — Uses a cloud LLM (OpenAI, Anthropic, etc.) for
|
|
39
|
+
maximum accuracy. Best retrieval quality, agentic multi-hop.
|
|
40
|
+
Retrieval score: 87.7% on LoCoMo benchmark.
|
|
41
|
+
|
|
42
|
+
quick start:
|
|
43
|
+
slm setup Interactive first-time setup
|
|
44
|
+
slm remember "some fact" Store a memory
|
|
45
|
+
slm recall "search query" Semantic search across memories
|
|
46
|
+
slm list -n 20 Show 20 most recent memories
|
|
47
|
+
slm dashboard Open web dashboard at localhost:8765
|
|
48
|
+
|
|
49
|
+
ide integration:
|
|
50
|
+
slm mcp Start MCP server (used by IDEs)
|
|
51
|
+
slm connect Auto-configure all detected IDEs
|
|
52
|
+
slm connect cursor Configure a specific IDE
|
|
53
|
+
|
|
54
|
+
examples:
|
|
55
|
+
slm remember "Project X uses PostgreSQL 16" --tags "project-x,db"
|
|
56
|
+
slm recall "which database does project X use"
|
|
57
|
+
slm list -n 50
|
|
58
|
+
slm mode a Switch to zero-LLM mode
|
|
59
|
+
slm trace "auth flow" Recall with per-channel score breakdown
|
|
60
|
+
slm health Check math layer status
|
|
61
|
+
slm dashboard --port 9000 Dashboard on custom port
|
|
62
|
+
|
|
63
|
+
documentation:
|
|
64
|
+
Website: https://superlocalmemory.com
|
|
65
|
+
GitHub: https://github.com/qualixar/superlocalmemory
|
|
66
|
+
Paper: https://arxiv.org/abs/2603.14588
|
|
67
|
+
"""
|
|
68
|
+
|
|
17
69
|
|
|
18
70
|
def main() -> None:
|
|
19
71
|
"""Parse CLI arguments and dispatch to command handlers."""
|
|
20
|
-
|
|
21
|
-
|
|
72
|
+
try:
|
|
73
|
+
from importlib.metadata import version as _pkg_version
|
|
74
|
+
_ver = _pkg_version("superlocalmemory")
|
|
75
|
+
except Exception:
|
|
76
|
+
_ver = "unknown"
|
|
77
|
+
|
|
78
|
+
parser = argparse.ArgumentParser(
|
|
79
|
+
prog="slm",
|
|
80
|
+
description=f"SuperLocalMemory V3 ({_ver}) — AI agent memory with mathematical foundations",
|
|
81
|
+
epilog=_HELP_EPILOG,
|
|
82
|
+
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
83
|
+
)
|
|
84
|
+
parser.add_argument(
|
|
85
|
+
"-v", "--version", action="version", version=f"superlocalmemory {_ver}",
|
|
86
|
+
)
|
|
87
|
+
sub = parser.add_subparsers(dest="command", title="commands")
|
|
22
88
|
|
|
23
|
-
# Setup
|
|
24
|
-
sub.add_parser("setup", help="
|
|
89
|
+
# -- Setup & Config ------------------------------------------------
|
|
90
|
+
sub.add_parser("setup", help="Interactive first-time setup wizard")
|
|
25
91
|
|
|
26
|
-
|
|
27
|
-
mode_p = sub.add_parser("mode", help="Get or set operating mode")
|
|
92
|
+
mode_p = sub.add_parser("mode", help="Get or set operating mode (a/b/c)")
|
|
28
93
|
mode_p.add_argument(
|
|
29
94
|
"value", nargs="?", choices=["a", "b", "c"], help="Mode to set",
|
|
30
95
|
)
|
|
31
96
|
|
|
32
|
-
|
|
33
|
-
provider_p = sub.add_parser("provider", help="Get or set LLM provider")
|
|
97
|
+
provider_p = sub.add_parser("provider", help="Get or set LLM provider for Mode B/C")
|
|
34
98
|
provider_p.add_argument(
|
|
35
99
|
"action", nargs="?", choices=["set"], help="Action",
|
|
36
100
|
)
|
|
37
101
|
|
|
38
|
-
|
|
39
|
-
connect_p = sub.add_parser("connect", help="Configure IDE integrations")
|
|
102
|
+
connect_p = sub.add_parser("connect", help="Auto-configure IDE integrations (17+ IDEs)")
|
|
40
103
|
connect_p.add_argument("ide", nargs="?", help="Specific IDE to configure")
|
|
41
104
|
connect_p.add_argument(
|
|
42
105
|
"--list", action="store_true", help="List all supported IDEs",
|
|
43
106
|
)
|
|
44
107
|
|
|
45
|
-
|
|
46
|
-
migrate_p = sub.add_parser("migrate", help="Migrate from V2")
|
|
108
|
+
migrate_p = sub.add_parser("migrate", help="Migrate data from V2 to V3 schema")
|
|
47
109
|
migrate_p.add_argument(
|
|
48
110
|
"--rollback", action="store_true", help="Rollback migration",
|
|
49
111
|
)
|
|
50
112
|
|
|
51
|
-
# Memory
|
|
52
|
-
remember_p = sub.add_parser("remember", help="Store a memory")
|
|
113
|
+
# -- Memory Operations ---------------------------------------------
|
|
114
|
+
remember_p = sub.add_parser("remember", help="Store a memory (extracts facts, builds graph)")
|
|
53
115
|
remember_p.add_argument("content", help="Content to remember")
|
|
54
116
|
remember_p.add_argument("--tags", default="", help="Comma-separated tags")
|
|
55
117
|
|
|
56
|
-
recall_p = sub.add_parser("recall", help="
|
|
118
|
+
recall_p = sub.add_parser("recall", help="Semantic search with 4-channel retrieval")
|
|
57
119
|
recall_p.add_argument("query", help="Search query")
|
|
58
|
-
recall_p.add_argument("--limit", type=int, default=10, help="Max results")
|
|
120
|
+
recall_p.add_argument("--limit", type=int, default=10, help="Max results (default 10)")
|
|
121
|
+
|
|
122
|
+
forget_p = sub.add_parser("forget", help="Delete memories matching a query")
|
|
123
|
+
forget_p.add_argument("query", help="Query to match for deletion")
|
|
59
124
|
|
|
60
|
-
|
|
61
|
-
|
|
125
|
+
list_p = sub.add_parser("list", help="List recent memories chronologically")
|
|
126
|
+
list_p.add_argument(
|
|
127
|
+
"--limit", "-n", type=int, default=20, help="Number of entries (default 20)",
|
|
128
|
+
)
|
|
62
129
|
|
|
63
|
-
#
|
|
64
|
-
sub.add_parser("status", help="System status")
|
|
65
|
-
sub.add_parser("health", help="Math layer health")
|
|
130
|
+
# -- Diagnostics ---------------------------------------------------
|
|
131
|
+
sub.add_parser("status", help="System status (mode, profile, DB size)")
|
|
132
|
+
sub.add_parser("health", help="Math layer health (Fisher-Rao, Sheaf, Langevin)")
|
|
66
133
|
|
|
67
|
-
trace_p = sub.add_parser("trace", help="Recall with channel breakdown")
|
|
134
|
+
trace_p = sub.add_parser("trace", help="Recall with per-channel score breakdown")
|
|
68
135
|
trace_p.add_argument("query", help="Search query")
|
|
69
136
|
|
|
70
|
-
#
|
|
137
|
+
# -- Services ------------------------------------------------------
|
|
71
138
|
sub.add_parser("mcp", help="Start MCP server (stdio transport for IDE integration)")
|
|
139
|
+
sub.add_parser("warmup", help="Pre-download embedding model (~500MB, one-time)")
|
|
72
140
|
|
|
73
|
-
|
|
74
|
-
sub.add_parser("warmup", help="Pre-download embedding model (~500MB)")
|
|
75
|
-
|
|
76
|
-
# Dashboard
|
|
77
|
-
dashboard_p = sub.add_parser("dashboard", help="Open web dashboard")
|
|
141
|
+
dashboard_p = sub.add_parser("dashboard", help="Open 17-tab web dashboard")
|
|
78
142
|
dashboard_p.add_argument(
|
|
79
143
|
"--port", type=int, default=8765, help="Port (default 8765)",
|
|
80
144
|
)
|
|
81
145
|
|
|
82
|
-
# Profiles
|
|
83
|
-
profile_p = sub.add_parser("profile", help="Profile management")
|
|
146
|
+
# -- Profiles ------------------------------------------------------
|
|
147
|
+
profile_p = sub.add_parser("profile", help="Profile management (list/switch/create)")
|
|
84
148
|
profile_p.add_argument(
|
|
85
149
|
"action", choices=["list", "switch", "create"], help="Action",
|
|
86
150
|
)
|
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
# Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
|
|
2
|
+
# Licensed under the MIT License - see LICENSE file
|
|
3
|
+
# Part of SuperLocalMemory V3 | https://qualixar.com | https://varunpratap.com
|
|
4
|
+
|
|
5
|
+
"""Subprocess embedding worker — isolates PyTorch memory from main process.
|
|
6
|
+
|
|
7
|
+
The main process (dashboard/MCP) stays at ~60 MB. All PyTorch/model memory
|
|
8
|
+
lives in this worker subprocess, which auto-kills after idle timeout.
|
|
9
|
+
|
|
10
|
+
Protocol (JSON over stdin/stdout):
|
|
11
|
+
Request: {"cmd": "embed", "texts": ["hello"]}
|
|
12
|
+
Response: {"ok": true, "vectors": [[0.1, ...]], "dim": 768}
|
|
13
|
+
|
|
14
|
+
Request: {"cmd": "ping"}
|
|
15
|
+
Response: {"ok": true}
|
|
16
|
+
|
|
17
|
+
Request: {"cmd": "quit"}
|
|
18
|
+
(worker exits)
|
|
19
|
+
|
|
20
|
+
Part of Qualixar | Author: Varun Pratap Bhardwaj
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
from __future__ import annotations
|
|
24
|
+
|
|
25
|
+
import json
|
|
26
|
+
import sys
|
|
27
|
+
import os
|
|
28
|
+
|
|
29
|
+
# Force CPU BEFORE any torch import
|
|
30
|
+
os.environ["CUDA_VISIBLE_DEVICES"] = ""
|
|
31
|
+
os.environ["PYTORCH_MPS_HIGH_WATERMARK_RATIO"] = "0.0"
|
|
32
|
+
os.environ["PYTORCH_MPS_MEM_LIMIT"] = "0"
|
|
33
|
+
os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1"
|
|
34
|
+
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
|
35
|
+
os.environ["TORCH_DEVICE"] = "cpu"
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def _worker_main() -> None:
|
|
39
|
+
"""Main loop: read JSON requests from stdin, write responses to stdout."""
|
|
40
|
+
import numpy as np
|
|
41
|
+
|
|
42
|
+
model = None
|
|
43
|
+
model_name = None
|
|
44
|
+
dim = 0
|
|
45
|
+
|
|
46
|
+
for line in sys.stdin:
|
|
47
|
+
line = line.strip()
|
|
48
|
+
if not line:
|
|
49
|
+
continue
|
|
50
|
+
try:
|
|
51
|
+
req = json.loads(line)
|
|
52
|
+
except json.JSONDecodeError:
|
|
53
|
+
_respond({"ok": False, "error": "Invalid JSON"})
|
|
54
|
+
continue
|
|
55
|
+
|
|
56
|
+
cmd = req.get("cmd", "")
|
|
57
|
+
|
|
58
|
+
if cmd == "quit":
|
|
59
|
+
break
|
|
60
|
+
|
|
61
|
+
if cmd == "ping":
|
|
62
|
+
_respond({"ok": True})
|
|
63
|
+
continue
|
|
64
|
+
|
|
65
|
+
if cmd == "load":
|
|
66
|
+
name = req.get("model_name", "nomic-ai/nomic-embed-text-v1.5")
|
|
67
|
+
expected_dim = req.get("dimension", 768)
|
|
68
|
+
try:
|
|
69
|
+
from sentence_transformers import SentenceTransformer
|
|
70
|
+
model = SentenceTransformer(name, trust_remote_code=True, device="cpu")
|
|
71
|
+
dim = model.get_sentence_embedding_dimension()
|
|
72
|
+
if dim != expected_dim:
|
|
73
|
+
_respond({"ok": False, "error": f"Dimension mismatch: {dim} != {expected_dim}"})
|
|
74
|
+
model = None
|
|
75
|
+
continue
|
|
76
|
+
model_name = name
|
|
77
|
+
_respond({"ok": True, "dim": dim, "model": name})
|
|
78
|
+
except Exception as exc:
|
|
79
|
+
_respond({"ok": False, "error": str(exc)})
|
|
80
|
+
continue
|
|
81
|
+
|
|
82
|
+
if cmd == "embed":
|
|
83
|
+
texts = req.get("texts", [])
|
|
84
|
+
if not texts:
|
|
85
|
+
_respond({"ok": False, "error": "No texts provided"})
|
|
86
|
+
continue
|
|
87
|
+
if model is None:
|
|
88
|
+
# Auto-load if not yet loaded
|
|
89
|
+
name = req.get("model_name", "nomic-ai/nomic-embed-text-v1.5")
|
|
90
|
+
expected_dim = req.get("dimension", 768)
|
|
91
|
+
try:
|
|
92
|
+
from sentence_transformers import SentenceTransformer
|
|
93
|
+
model = SentenceTransformer(name, trust_remote_code=True, device="cpu")
|
|
94
|
+
dim = model.get_sentence_embedding_dimension()
|
|
95
|
+
model_name = name
|
|
96
|
+
except Exception as exc:
|
|
97
|
+
_respond({"ok": False, "error": f"Model load failed: {exc}"})
|
|
98
|
+
continue
|
|
99
|
+
try:
|
|
100
|
+
vecs = model.encode(texts, normalize_embeddings=True)
|
|
101
|
+
if isinstance(vecs, np.ndarray) and vecs.ndim == 2:
|
|
102
|
+
result = [vecs[i].tolist() for i in range(vecs.shape[0])]
|
|
103
|
+
else:
|
|
104
|
+
result = [np.asarray(v, dtype=np.float32).tolist() for v in vecs]
|
|
105
|
+
_respond({"ok": True, "vectors": result, "dim": dim})
|
|
106
|
+
except Exception as exc:
|
|
107
|
+
_respond({"ok": False, "error": str(exc)})
|
|
108
|
+
continue
|
|
109
|
+
|
|
110
|
+
_respond({"ok": False, "error": f"Unknown command: {cmd}"})
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def _respond(data: dict) -> None:
|
|
114
|
+
"""Write JSON response to stdout, flush immediately."""
|
|
115
|
+
sys.stdout.write(json.dumps(data) + "\n")
|
|
116
|
+
sys.stdout.flush()
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
if __name__ == "__main__":
|
|
120
|
+
_worker_main()
|