superlocalmemory 3.0.35 → 3.0.36
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "superlocalmemory",
|
|
3
|
-
"version": "3.0.
|
|
3
|
+
"version": "3.0.36",
|
|
4
4
|
"description": "Information-geometric agent memory with mathematical guarantees. 4-channel retrieval, Fisher-Rao similarity, zero-LLM mode, EU AI Act compliant. Works with Claude, Cursor, Windsurf, and 17+ AI tools.",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"ai-memory",
|
package/pyproject.toml
CHANGED
|
@@ -94,9 +94,14 @@ class Summarizer:
|
|
|
94
94
|
# ------------------------------------------------------------------
|
|
95
95
|
|
|
96
96
|
def _has_llm(self) -> bool:
|
|
97
|
-
"""Check if LLM is available.
|
|
97
|
+
"""Check if LLM is available (AND warm for Ollama).
|
|
98
|
+
|
|
99
|
+
For Mode B (Ollama): only returns True if the model is already
|
|
100
|
+
loaded in memory. NEVER triggers a cold model load — that would
|
|
101
|
+
spike 5+ GB of RAM on every recall, unacceptable on ≤32 GB machines.
|
|
102
|
+
"""
|
|
98
103
|
if self._mode == "b":
|
|
99
|
-
return
|
|
104
|
+
return self._is_ollama_model_warm()
|
|
100
105
|
if self._mode == "c":
|
|
101
106
|
return bool(
|
|
102
107
|
os.environ.get("OPENROUTER_API_KEY")
|
|
@@ -104,6 +109,27 @@ class Summarizer:
|
|
|
104
109
|
)
|
|
105
110
|
return False
|
|
106
111
|
|
|
112
|
+
def _is_ollama_model_warm(self) -> bool:
|
|
113
|
+
"""Check if the LLM model is already loaded in Ollama memory.
|
|
114
|
+
|
|
115
|
+
Queries Ollama /api/ps. Returns True only if our model is loaded,
|
|
116
|
+
preventing cold-load memory spikes during recall.
|
|
117
|
+
"""
|
|
118
|
+
try:
|
|
119
|
+
import httpx
|
|
120
|
+
model = getattr(self._config.llm, 'model', None) or "llama3.1:8b"
|
|
121
|
+
model_base = model.split(":")[0]
|
|
122
|
+
with httpx.Client(timeout=httpx.Timeout(2.0)) as client:
|
|
123
|
+
resp = client.get("http://localhost:11434/api/ps")
|
|
124
|
+
if resp.status_code != 200:
|
|
125
|
+
return False
|
|
126
|
+
for m in resp.json().get("models", []):
|
|
127
|
+
if model_base in m.get("name", ""):
|
|
128
|
+
return True
|
|
129
|
+
return False
|
|
130
|
+
except Exception:
|
|
131
|
+
return False
|
|
132
|
+
|
|
107
133
|
def _call_llm(self, prompt: str, max_tokens: int = 200) -> str:
|
|
108
134
|
"""Route to Ollama (B) or OpenRouter (C)."""
|
|
109
135
|
if self._mode == "b":
|
|
@@ -111,15 +137,26 @@ class Summarizer:
|
|
|
111
137
|
return self._call_openrouter(prompt, max_tokens)
|
|
112
138
|
|
|
113
139
|
def _call_ollama(self, prompt: str, max_tokens: int = 200) -> str:
|
|
114
|
-
"""Call local Ollama for summary generation.
|
|
140
|
+
"""Call local Ollama for summary generation.
|
|
141
|
+
|
|
142
|
+
CRITICAL: num_ctx MUST be set. Without it, Ollama defaults to the
|
|
143
|
+
model's native context (128K for llama3.1) which allocates ~30 GB
|
|
144
|
+
of KV cache — fatal on machines with ≤32 GB RAM.
|
|
145
|
+
SLM prompts are <500 tokens; 4096 context is more than enough.
|
|
146
|
+
"""
|
|
115
147
|
import httpx
|
|
116
148
|
model = getattr(self._config.llm, 'model', None) or "llama3.1:8b"
|
|
117
|
-
with httpx.Client(timeout=httpx.Timeout(
|
|
149
|
+
with httpx.Client(timeout=httpx.Timeout(30.0)) as client:
|
|
118
150
|
resp = client.post("http://localhost:11434/api/generate", json={
|
|
119
151
|
"model": model,
|
|
120
152
|
"prompt": prompt,
|
|
121
153
|
"stream": False,
|
|
122
|
-
"
|
|
154
|
+
"keep_alive": "30s",
|
|
155
|
+
"options": {
|
|
156
|
+
"num_predict": max_tokens,
|
|
157
|
+
"temperature": 0.3,
|
|
158
|
+
"num_ctx": 4096,
|
|
159
|
+
},
|
|
123
160
|
})
|
|
124
161
|
resp.raise_for_status()
|
|
125
162
|
return resp.json().get("response", "").strip()
|
|
@@ -127,13 +127,34 @@ class LLMBackbone:
|
|
|
127
127
|
# -- Properties ---------------------------------------------------------
|
|
128
128
|
|
|
129
129
|
def is_available(self) -> bool:
|
|
130
|
-
"""True when the provider is ready for requests.
|
|
130
|
+
"""True when the provider is ready for requests.
|
|
131
|
+
|
|
132
|
+
For Ollama: only returns True if the model is already loaded in
|
|
133
|
+
memory. Prevents cold-load memory spikes (5+ GB) during recall.
|
|
134
|
+
"""
|
|
131
135
|
if not self._provider:
|
|
132
136
|
return False
|
|
133
137
|
if self._provider == "ollama":
|
|
134
|
-
return
|
|
138
|
+
return self._is_ollama_model_warm()
|
|
135
139
|
return bool(self._api_key)
|
|
136
140
|
|
|
141
|
+
def _is_ollama_model_warm(self) -> bool:
|
|
142
|
+
"""Check if the LLM model is already loaded in Ollama."""
|
|
143
|
+
try:
|
|
144
|
+
model_base = self._model.split(":")[0]
|
|
145
|
+
resp = httpx.get(
|
|
146
|
+
f"{_OLLAMA_DEFAULT_BASE}/api/ps",
|
|
147
|
+
timeout=httpx.Timeout(2.0),
|
|
148
|
+
)
|
|
149
|
+
if resp.status_code != 200:
|
|
150
|
+
return False
|
|
151
|
+
for m in resp.json().get("models", []):
|
|
152
|
+
if model_base in m.get("name", ""):
|
|
153
|
+
return True
|
|
154
|
+
return False
|
|
155
|
+
except Exception:
|
|
156
|
+
return False
|
|
157
|
+
|
|
137
158
|
@property
|
|
138
159
|
def provider(self) -> str:
|
|
139
160
|
return self._provider
|
|
@@ -250,6 +271,8 @@ class LLMBackbone:
|
|
|
250
271
|
"messages": messages,
|
|
251
272
|
"max_tokens": max_tokens,
|
|
252
273
|
"temperature": temperature,
|
|
274
|
+
"keep_alive": "30s",
|
|
275
|
+
"options": {"num_ctx": 4096},
|
|
253
276
|
}
|
|
254
277
|
return self._base_url, headers, payload
|
|
255
278
|
|
|
@@ -22,26 +22,32 @@ from pydantic import BaseModel, Field
|
|
|
22
22
|
# ---------------------------------------------------------------------------
|
|
23
23
|
|
|
24
24
|
def _get_version() -> str:
|
|
25
|
-
"""Read version from package.json / pyproject.toml / importlib.
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
25
|
+
"""Read version from package.json / pyproject.toml / importlib.
|
|
26
|
+
|
|
27
|
+
Walks up from this file to find the project root. In the src layout
|
|
28
|
+
(running from source tree), package.json is 5 parents up; for an
|
|
29
|
+
installed package it won't exist, so we fall through to importlib.
|
|
30
|
+
"""
|
|
31
|
+
here = Path(__file__).resolve()
|
|
32
|
+
for depth in (5, 4):
|
|
33
|
+
try:
|
|
34
|
+
import json as _json
|
|
35
|
+
root = here
|
|
36
|
+
for _ in range(depth):
|
|
37
|
+
root = root.parent
|
|
38
|
+
pkg_json = root / "package.json"
|
|
39
|
+
if pkg_json.exists():
|
|
40
|
+
with open(pkg_json) as f:
|
|
41
|
+
v = _json.load(f).get("version", "")
|
|
42
|
+
if v:
|
|
43
|
+
return v
|
|
44
|
+
toml_path = root / "pyproject.toml"
|
|
45
|
+
if toml_path.exists():
|
|
46
|
+
import tomllib
|
|
47
|
+
with open(toml_path, "rb") as f:
|
|
48
|
+
return tomllib.load(f)["project"]["version"]
|
|
49
|
+
except Exception:
|
|
50
|
+
continue
|
|
45
51
|
try:
|
|
46
52
|
from importlib.metadata import version
|
|
47
53
|
return version("superlocalmemory")
|