superlocalmemory 3.0.35 → 3.0.37

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "superlocalmemory",
3
- "version": "3.0.35",
3
+ "version": "3.0.37",
4
4
  "description": "Information-geometric agent memory with mathematical guarantees. 4-channel retrieval, Fisher-Rao similarity, zero-LLM mode, EU AI Act compliant. Works with Claude, Cursor, Windsurf, and 17+ AI tools.",
5
5
  "keywords": [
6
6
  "ai-memory",
package/pyproject.toml CHANGED
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "superlocalmemory"
3
- version = "3.0.35"
3
+ version = "3.0.37"
4
4
  description = "Information-geometric agent memory with mathematical guarantees"
5
5
  readme = "README.md"
6
6
  license = {text = "MIT"}
@@ -94,9 +94,14 @@ class Summarizer:
94
94
  # ------------------------------------------------------------------
95
95
 
96
96
  def _has_llm(self) -> bool:
97
- """Check if LLM is available."""
97
+ """Check if LLM is available (AND warm for Ollama).
98
+
99
+ For Mode B (Ollama): only returns True if the model is already
100
+ loaded in memory. NEVER triggers a cold model load — that would
101
+ spike 5+ GB of RAM on every recall, unacceptable on ≤32 GB machines.
102
+ """
98
103
  if self._mode == "b":
99
- return True # Ollama assumed running
104
+ return self._is_ollama_model_warm()
100
105
  if self._mode == "c":
101
106
  return bool(
102
107
  os.environ.get("OPENROUTER_API_KEY")
@@ -104,6 +109,27 @@ class Summarizer:
104
109
  )
105
110
  return False
106
111
 
112
+ def _is_ollama_model_warm(self) -> bool:
113
+ """Check if the LLM model is already loaded in Ollama memory.
114
+
115
+ Queries Ollama /api/ps. Returns True only if our model is loaded,
116
+ preventing cold-load memory spikes during recall.
117
+ """
118
+ try:
119
+ import httpx
120
+ model = getattr(self._config.llm, 'model', None) or "llama3.1:8b"
121
+ model_base = model.split(":")[0]
122
+ with httpx.Client(timeout=httpx.Timeout(2.0)) as client:
123
+ resp = client.get("http://localhost:11434/api/ps")
124
+ if resp.status_code != 200:
125
+ return False
126
+ for m in resp.json().get("models", []):
127
+ if model_base in m.get("name", ""):
128
+ return True
129
+ return False
130
+ except Exception:
131
+ return False
132
+
107
133
  def _call_llm(self, prompt: str, max_tokens: int = 200) -> str:
108
134
  """Route to Ollama (B) or OpenRouter (C)."""
109
135
  if self._mode == "b":
@@ -111,15 +137,26 @@ class Summarizer:
111
137
  return self._call_openrouter(prompt, max_tokens)
112
138
 
113
139
  def _call_ollama(self, prompt: str, max_tokens: int = 200) -> str:
114
- """Call local Ollama for summary generation."""
140
+ """Call local Ollama for summary generation.
141
+
142
+ CRITICAL: num_ctx MUST be set. Without it, Ollama defaults to the
143
+ model's native context (128K for llama3.1) which allocates ~30 GB
144
+ of KV cache — fatal on machines with ≤32 GB RAM.
145
+ SLM prompts are <500 tokens; 4096 context is more than enough.
146
+ """
115
147
  import httpx
116
148
  model = getattr(self._config.llm, 'model', None) or "llama3.1:8b"
117
- with httpx.Client(timeout=httpx.Timeout(20.0)) as client:
149
+ with httpx.Client(timeout=httpx.Timeout(30.0)) as client:
118
150
  resp = client.post("http://localhost:11434/api/generate", json={
119
151
  "model": model,
120
152
  "prompt": prompt,
121
153
  "stream": False,
122
- "options": {"num_predict": max_tokens, "temperature": 0.3},
154
+ "keep_alive": "30s",
155
+ "options": {
156
+ "num_predict": max_tokens,
157
+ "temperature": 0.3,
158
+ "num_ctx": 4096,
159
+ },
123
160
  })
124
161
  resp.raise_for_status()
125
162
  return resp.json().get("response", "").strip()
@@ -127,7 +127,13 @@ class LLMBackbone:
127
127
  # -- Properties ---------------------------------------------------------
128
128
 
129
129
  def is_available(self) -> bool:
130
- """True when the provider is ready for requests."""
130
+ """True when the provider is ready for requests.
131
+
132
+ For Ollama: always True (no API key needed). The num_ctx and
133
+ keep_alive guards in _build_ollama() protect against memory spikes.
134
+ The recall-path warm-only guard lives in Summarizer, not here —
135
+ store/fact-extraction should always use the LLM in Mode B.
136
+ """
131
137
  if not self._provider:
132
138
  return False
133
139
  if self._provider == "ollama":
@@ -250,6 +256,8 @@ class LLMBackbone:
250
256
  "messages": messages,
251
257
  "max_tokens": max_tokens,
252
258
  "temperature": temperature,
259
+ "keep_alive": "30s",
260
+ "options": {"num_ctx": 4096},
253
261
  }
254
262
  return self._base_url, headers, payload
255
263
 
@@ -22,26 +22,32 @@ from pydantic import BaseModel, Field
22
22
  # ---------------------------------------------------------------------------
23
23
 
24
24
  def _get_version() -> str:
25
- """Read version from package.json / pyproject.toml / importlib."""
26
- try:
27
- import json as _json
28
- pkg_root = Path(__file__).resolve().parent.parent.parent.parent
29
- pkg_json = pkg_root / "package.json"
30
- if pkg_json.exists():
31
- with open(pkg_json) as f:
32
- v = _json.load(f).get("version", "")
33
- if v:
34
- return v
35
- except Exception:
36
- pass
37
- try:
38
- import tomllib
39
- toml_path = Path(__file__).resolve().parent.parent.parent.parent / "pyproject.toml"
40
- if toml_path.exists():
41
- with open(toml_path, "rb") as f:
42
- return tomllib.load(f)["project"]["version"]
43
- except Exception:
44
- pass
25
+ """Read version from package.json / pyproject.toml / importlib.
26
+
27
+ Walks up from this file to find the project root. In the src layout
28
+ (running from source tree), package.json is 5 parents up; for an
29
+ installed package it won't exist, so we fall through to importlib.
30
+ """
31
+ here = Path(__file__).resolve()
32
+ for depth in (5, 4):
33
+ try:
34
+ import json as _json
35
+ root = here
36
+ for _ in range(depth):
37
+ root = root.parent
38
+ pkg_json = root / "package.json"
39
+ if pkg_json.exists():
40
+ with open(pkg_json) as f:
41
+ v = _json.load(f).get("version", "")
42
+ if v:
43
+ return v
44
+ toml_path = root / "pyproject.toml"
45
+ if toml_path.exists():
46
+ import tomllib
47
+ with open(toml_path, "rb") as f:
48
+ return tomllib.load(f)["project"]["version"]
49
+ except Exception:
50
+ continue
45
51
  try:
46
52
  from importlib.metadata import version
47
53
  return version("superlocalmemory")