@simbimbo/memory-ocmemog 0.1.5 → 0.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +23 -1
- package/README.md +19 -14
- package/brain/runtime/config.py +6 -1
- package/brain/runtime/inference.py +98 -28
- package/brain/runtime/memory/api.py +822 -1
- package/brain/runtime/memory/context_builder.py +101 -76
- package/brain/runtime/memory/distill.py +156 -13
- package/brain/runtime/memory/pondering_engine.py +2 -0
- package/brain/runtime/memory/promote.py +6 -0
- package/brain/runtime/memory/provenance.py +52 -0
- package/brain/runtime/memory/retrieval.py +116 -50
- package/brain/runtime/model_router.py +2 -0
- package/brain/runtime/providers.py +17 -8
- package/docs/notes/2026-03-18-memory-repair-and-backfill.md +3 -3
- package/docs/notes/local-model-role-matrix-2026-03-18.md +7 -3
- package/docs/usage.md +23 -19
- package/index.ts +1 -1
- package/ocmemog/sidecar/app.py +225 -1
- package/ocmemog/sidecar/compat.py +4 -0
- package/ocmemog/sidecar/transcript_watcher.py +2 -2
- package/package.json +1 -1
- package/scripts/install-ocmemog.sh +26 -26
- package/scripts/ocmemog-backfill-vectors.py +6 -4
- package/scripts/ocmemog-context.sh +1 -1
- package/scripts/ocmemog-demo.py +2 -2
- package/scripts/ocmemog-install.sh +4 -12
- package/scripts/ocmemog-load-test.py +2 -2
- package/scripts/ocmemog-ponder.sh +2 -2
- package/scripts/ocmemog-recall-test.py +2 -2
- package/scripts/ocmemog-reindex-vectors.py +6 -4
- package/scripts/ocmemog-reliability-soak.py +1 -1
- package/scripts/ocmemog-sidecar.sh +11 -7
- package/scripts/ocmemog-test-rig.py +4 -3
package/CHANGELOG.md
CHANGED
|
@@ -1,11 +1,33 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## 0.1.7 — 2026-03-19
|
|
4
|
+
|
|
5
|
+
llama.cpp-first cleanup after the 0.1.6 runtime cutover.
|
|
6
|
+
|
|
7
|
+
### Highlights
|
|
8
|
+
- made llama.cpp / local OpenAI-compatible endpoints the primary documented and scripted local runtime path
|
|
9
|
+
- reduced misleading Ollama-first defaults in installers, sidecar scripts, docs, and helper tooling
|
|
10
|
+
- aligned context/distill/runtime helpers with the fixed local model architecture (`17890` gateway, `17891` sidecar, `18080` text, `18081` embeddings)
|
|
11
|
+
- kept compatibility hooks only where still useful for rollback or mixed environments
|
|
12
|
+
|
|
13
|
+
## 0.1.6 — 2026-03-19
|
|
14
|
+
|
|
15
|
+
Port-separation and publish-solid follow-up.
|
|
16
|
+
|
|
17
|
+
### Highlights
|
|
18
|
+
- Split ocmemog sidecar onto dedicated loopback port `17891` to avoid collision with the OpenClaw gateway/dashboard on `17890`
|
|
19
|
+
- Restored the plain realtime dashboard on `/dashboard` and fixed the `local_html` template crash
|
|
20
|
+
- Updated plugin/runtime defaults, scripts, and documentation to use the dedicated sidecar endpoint on `17891`
|
|
21
|
+
- Switched repo-facing local-runtime defaults to llama.cpp-first endpoints on `18080`/`18081` with Qwen2.5 text and `nomic-embed-text-v1.5` embeddings, while keeping Ollama as explicit legacy fallback only
|
|
22
|
+
- Added governance retrieval/governance-policy hardening plus expanded regression coverage for duplicate, contradiction, supersession, queue, audit, rollback, and auto-resolve flows
|
|
23
|
+
- Aligned package/version metadata across npm, Python, and FastAPI surfaces
|
|
24
|
+
|
|
3
25
|
## 0.1.5 — 2026-03-18
|
|
4
26
|
|
|
5
27
|
Repair and hardening follow-up after the 0.1.4 publish.
|
|
6
28
|
|
|
7
29
|
### Highlights
|
|
8
|
-
- Fixed vector reindex defaults so repair scripts use provider-backed
|
|
30
|
+
- Fixed vector reindex defaults so repair scripts use provider-backed local embeddings instead of silently rebuilding weak local/hash vectors
|
|
9
31
|
- Added battery-aware sidecar defaults for macOS laptops (`OCMEMOG_LAPTOP_MODE=auto|ac|battery`)
|
|
10
32
|
- Fixed `record_reinforcement()` so new experiences preserve `memory_reference`, and added integrity repair to backfill legacy missing references
|
|
11
33
|
- Added incremental vector backfill tooling (`scripts/ocmemog-backfill-vectors.py`) for non-destructive backlog repair
|
package/README.md
CHANGED
|
@@ -32,7 +32,7 @@ pip install -r requirements.txt
|
|
|
32
32
|
./scripts/ocmemog-sidecar.sh
|
|
33
33
|
|
|
34
34
|
# then open
|
|
35
|
-
# http://127.0.0.1:
|
|
35
|
+
# http://127.0.0.1:17891/dashboard
|
|
36
36
|
```
|
|
37
37
|
|
|
38
38
|
## Optional: transcript watcher (auto-ingest)
|
|
@@ -45,8 +45,8 @@ export OCMEMOG_TRANSCRIPT_DIR="$HOME/.openclaw/workspace/memory/transcripts"
|
|
|
45
45
|
|
|
46
46
|
Default bind:
|
|
47
47
|
|
|
48
|
-
- endpoint: `http://127.0.0.1:
|
|
49
|
-
- health: `http://127.0.0.1:
|
|
48
|
+
- endpoint: `http://127.0.0.1:17891`
|
|
49
|
+
- health: `http://127.0.0.1:17891/healthz`
|
|
50
50
|
|
|
51
51
|
## Continuity proof / benchmark harness
|
|
52
52
|
|
|
@@ -78,20 +78,24 @@ Optional environment variables:
|
|
|
78
78
|
- `OCMEMOG_OPENAI_API_BASE` (default: `https://api.openai.com/v1`)
|
|
79
79
|
- `OCMEMOG_OPENAI_EMBED_MODEL` (default: `text-embedding-3-small`)
|
|
80
80
|
- `BRAIN_EMBED_MODEL_LOCAL` (`simple` by default)
|
|
81
|
-
- `BRAIN_EMBED_MODEL_PROVIDER` (`openai` to
|
|
81
|
+
- `BRAIN_EMBED_MODEL_PROVIDER` (`local-openai` to use the local llama.cpp embedding endpoint; `openai` remains available for hosted embeddings)
|
|
82
82
|
- `OCMEMOG_TRANSCRIPT_WATCHER` (`true` to auto-start transcript watcher inside the sidecar)
|
|
83
83
|
- `OCMEMOG_TRANSCRIPT_ROOTS` (comma-separated allowed roots for transcript context retrieval; default: `~/.openclaw/workspace/memory`)
|
|
84
84
|
- `OCMEMOG_API_TOKEN` (optional; if set, requests must include `x-ocmemog-token` or `Authorization: Bearer ...`)
|
|
85
85
|
- `OCMEMOG_AUTO_HYDRATION` (`true` to re-enable prompt-time continuity prepending; defaults to `false` as a safety guard until the host runtime is verified not to persist prepended context into session history)
|
|
86
86
|
- `OCMEMOG_LAPTOP_MODE` (`auto` by default; on macOS battery power this slows watcher polling, reduces ingest batch size, and disables sentiment reinforcement unless explicitly overridden)
|
|
87
|
-
- `
|
|
88
|
-
- `
|
|
89
|
-
- `
|
|
90
|
-
- `
|
|
87
|
+
- `OCMEMOG_LOCAL_LLM_BASE_URL` (default: `http://127.0.0.1:18080/v1`; local OpenAI-compatible text endpoint, e.g. llama.cpp)
|
|
88
|
+
- `OCMEMOG_LOCAL_LLM_MODEL` (default: `qwen2.5-7b-instruct`; matches the active Qwen2.5-7B-Instruct GGUF runtime)
|
|
89
|
+
- `OCMEMOG_LOCAL_EMBED_BASE_URL` (default: `http://127.0.0.1:18081/v1`; local OpenAI-compatible embedding endpoint)
|
|
90
|
+
- `OCMEMOG_LOCAL_EMBED_MODEL` (default: `nomic-embed-text-v1.5`)
|
|
91
|
+
- `OCMEMOG_USE_OLLAMA` (`true` to force legacy Ollama local inference path)
|
|
92
|
+
- `OCMEMOG_OLLAMA_HOST` (default: `http://127.0.0.1:11434`; legacy fallback)
|
|
93
|
+
- `OCMEMOG_OLLAMA_MODEL` (default: `qwen2.5:7b`; legacy fallback for machines that still use Ollama)
|
|
94
|
+
- `OCMEMOG_OLLAMA_EMBED_MODEL` (default: `nomic-embed-text:latest`; legacy embedding fallback)
|
|
91
95
|
- `OCMEMOG_PROMOTION_THRESHOLD` (default: `0.5`)
|
|
92
96
|
- `OCMEMOG_DEMOTION_THRESHOLD` (default: `0.2`)
|
|
93
97
|
- `OCMEMOG_PONDER_ENABLED` (default: `true`)
|
|
94
|
-
- `OCMEMOG_PONDER_MODEL` (default via launcher: `qwen2.5
|
|
98
|
+
- `OCMEMOG_PONDER_MODEL` (default via launcher: `local-openai:qwen2.5-7b-instruct`; recommended for structured local memory refinement)
|
|
95
99
|
- `OCMEMOG_LESSON_MINING_ENABLED` (default: `true`)
|
|
96
100
|
|
|
97
101
|
## Security
|
|
@@ -129,12 +133,13 @@ This installer will try to:
|
|
|
129
133
|
- install Python requirements
|
|
130
134
|
- install/enable the OpenClaw plugin when the `openclaw` CLI is available
|
|
131
135
|
- install/load LaunchAgents via `scripts/ocmemog-install.sh`
|
|
132
|
-
-
|
|
136
|
+
- verify the local llama.cpp runtime and expected text/embed endpoints
|
|
133
137
|
- validate `/healthz`
|
|
134
138
|
|
|
135
139
|
Notes:
|
|
136
|
-
- If `OCMEMOG_INSTALL_PREREQS=true` and Homebrew is present, the installer will try to install missing `
|
|
137
|
-
-
|
|
140
|
+
- If `OCMEMOG_INSTALL_PREREQS=true` and Homebrew is present, the installer will try to install missing `llama.cpp` and `ffmpeg` automatically.
|
|
141
|
+
- The installer no longer pulls local models. It assumes your llama.cpp text endpoint is on `127.0.0.1:18080` and your embedding endpoint is on `127.0.0.1:18081`.
|
|
142
|
+
- Legacy Ollama compatibility remains available only when you explicitly opt into it with `OCMEMOG_USE_OLLAMA=true`.
|
|
138
143
|
- If package install is unavailable in the local OpenClaw build, the installer falls back to local-path plugin install.
|
|
139
144
|
- Advanced flags are available for local debugging/CI (`--skip-plugin-install`, `--skip-launchagents`, `--skip-model-pulls`, `--endpoint`, `--repo-url`).
|
|
140
145
|
|
|
@@ -154,7 +159,7 @@ launchctl bootstrap gui/$UID scripts/launchagents/com.openclaw.ocmemog.guard.pli
|
|
|
154
159
|
|
|
155
160
|
## Recent changes
|
|
156
161
|
|
|
157
|
-
### 0.1.
|
|
162
|
+
### 0.1.6 (current main)
|
|
158
163
|
|
|
159
164
|
Package ownership + runtime safety release:
|
|
160
165
|
- Publish package under `@simbimbo/memory-ocmemog` instead of the unauthorized `@openclaw` scope
|
|
@@ -193,7 +198,7 @@ plugins:
|
|
|
193
198
|
memory-ocmemog:
|
|
194
199
|
enabled: true
|
|
195
200
|
config:
|
|
196
|
-
endpoint: http://127.0.0.1:
|
|
201
|
+
endpoint: http://127.0.0.1:17891
|
|
197
202
|
timeoutMs: 30000
|
|
198
203
|
```
|
|
199
204
|
|
package/brain/runtime/config.py
CHANGED
|
@@ -9,8 +9,13 @@ OCMEMOG_MEMORY_MODEL = os.environ.get("OCMEMOG_MEMORY_MODEL", "gpt-4o-mini")
|
|
|
9
9
|
OCMEMOG_OPENAI_API_BASE = os.environ.get("OCMEMOG_OPENAI_API_BASE", "https://api.openai.com/v1")
|
|
10
10
|
OCMEMOG_OPENAI_EMBED_MODEL = os.environ.get("OCMEMOG_OPENAI_EMBED_MODEL", "text-embedding-3-small")
|
|
11
11
|
|
|
12
|
+
OCMEMOG_LOCAL_LLM_BASE_URL = os.environ.get("OCMEMOG_LOCAL_LLM_BASE_URL", "http://127.0.0.1:18080/v1")
|
|
13
|
+
OCMEMOG_LOCAL_LLM_MODEL = os.environ.get("OCMEMOG_LOCAL_LLM_MODEL", "qwen2.5-7b-instruct")
|
|
14
|
+
OCMEMOG_LOCAL_EMBED_BASE_URL = os.environ.get("OCMEMOG_LOCAL_EMBED_BASE_URL", "http://127.0.0.1:18081/v1")
|
|
15
|
+
OCMEMOG_LOCAL_EMBED_MODEL = os.environ.get("OCMEMOG_LOCAL_EMBED_MODEL", "nomic-embed-text-v1.5")
|
|
16
|
+
|
|
12
17
|
OCMEMOG_OLLAMA_HOST = os.environ.get("OCMEMOG_OLLAMA_HOST", "http://127.0.0.1:11434")
|
|
13
|
-
OCMEMOG_OLLAMA_MODEL = os.environ.get("OCMEMOG_OLLAMA_MODEL", "
|
|
18
|
+
OCMEMOG_OLLAMA_MODEL = os.environ.get("OCMEMOG_OLLAMA_MODEL", "qwen2.5:7b")
|
|
14
19
|
OCMEMOG_OLLAMA_EMBED_MODEL = os.environ.get("OCMEMOG_OLLAMA_EMBED_MODEL", "nomic-embed-text:latest")
|
|
15
20
|
|
|
16
21
|
OCMEMOG_PROMOTION_THRESHOLD = float(os.environ.get("OCMEMOG_PROMOTION_THRESHOLD", "0.5"))
|
|
@@ -11,6 +11,35 @@ from brain.runtime.instrumentation import emit_event
|
|
|
11
11
|
LOGFILE = state_store.reports_dir() / "brain_memory.log.jsonl"
|
|
12
12
|
|
|
13
13
|
|
|
14
|
+
def _infer_openai_compatible(prompt: str, *, base_url: str, model: str, api_key: str | None = None, provider_label: str = "openai-compatible") -> dict[str, str]:
|
|
15
|
+
url = f"{base_url.rstrip('/')}/chat/completions"
|
|
16
|
+
payload = {
|
|
17
|
+
"model": model,
|
|
18
|
+
"messages": [{"role": "user", "content": prompt}],
|
|
19
|
+
"temperature": 0.2,
|
|
20
|
+
}
|
|
21
|
+
data = json.dumps(payload).encode("utf-8")
|
|
22
|
+
req = urllib.request.Request(url, data=data, method="POST")
|
|
23
|
+
if api_key:
|
|
24
|
+
req.add_header("Authorization", f"Bearer {api_key}")
|
|
25
|
+
req.add_header("Content-Type", "application/json")
|
|
26
|
+
|
|
27
|
+
try:
|
|
28
|
+
with urllib.request.urlopen(req, timeout=30) as resp:
|
|
29
|
+
response = json.loads(resp.read().decode("utf-8"))
|
|
30
|
+
except Exception as exc:
|
|
31
|
+
emit_event(LOGFILE, "brain_infer_error", status="error", provider=provider_label, error=str(exc))
|
|
32
|
+
return {"status": "error", "error": f"request_failed:{exc}"}
|
|
33
|
+
|
|
34
|
+
try:
|
|
35
|
+
output = response["choices"][0]["message"]["content"]
|
|
36
|
+
except Exception as exc:
|
|
37
|
+
emit_event(LOGFILE, "brain_infer_error", status="error", provider=provider_label, error=str(exc))
|
|
38
|
+
return {"status": "error", "error": "invalid_response"}
|
|
39
|
+
|
|
40
|
+
return {"status": "ok", "output": str(output).strip()}
|
|
41
|
+
|
|
42
|
+
|
|
14
43
|
def _infer_ollama(prompt: str, model: str | None = None) -> dict[str, str]:
|
|
15
44
|
payload = {
|
|
16
45
|
"model": model or config.OCMEMOG_OLLAMA_MODEL,
|
|
@@ -33,47 +62,88 @@ def _infer_ollama(prompt: str, model: str | None = None) -> dict[str, str]:
|
|
|
33
62
|
return {"status": "ok", "output": str(output).strip()}
|
|
34
63
|
|
|
35
64
|
|
|
65
|
+
def _looks_like_local_openai_model(name: str) -> bool:
|
|
66
|
+
if not name:
|
|
67
|
+
return False
|
|
68
|
+
lowered = name.strip().lower()
|
|
69
|
+
return lowered.startswith("local-openai:") or lowered.startswith("local_openai:") or lowered.startswith("llamacpp:")
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def _normalize_local_model_name(name: str) -> str:
|
|
73
|
+
lowered = (name or "").strip()
|
|
74
|
+
for prefix in ("local-openai:", "local_openai:", "llamacpp:"):
|
|
75
|
+
if lowered.lower().startswith(prefix):
|
|
76
|
+
return lowered[len(prefix):]
|
|
77
|
+
return lowered
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def _looks_like_ollama_model(name: str) -> bool:
|
|
81
|
+
if not name:
|
|
82
|
+
return False
|
|
83
|
+
lowered = name.strip().lower()
|
|
84
|
+
if lowered.startswith("ollama:"):
|
|
85
|
+
return True
|
|
86
|
+
if "/" in lowered:
|
|
87
|
+
return False
|
|
88
|
+
return ":" in lowered
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def stats() -> dict[str, object]:
|
|
92
|
+
materialized_local = int(_LOCAL_INFER_STATS.get("local_success", 0)) + int(_LOCAL_INFER_STATS.get("cache_hits", 0))
|
|
93
|
+
est_prompt_tokens_saved = materialized_local * _AVG_PROMPT_TOKENS_SAVED
|
|
94
|
+
est_completion_tokens_saved = materialized_local * _AVG_COMPLETION_TOKENS_SAVED
|
|
95
|
+
est_cost_saved = (
|
|
96
|
+
(est_prompt_tokens_saved / 1000.0) * _EST_FRONTIER_INPUT_COST_PER_1K
|
|
97
|
+
+ (est_completion_tokens_saved / 1000.0) * _EST_FRONTIER_OUTPUT_COST_PER_1K
|
|
98
|
+
)
|
|
99
|
+
return {
|
|
100
|
+
"cache_entries": len(_LOCAL_INFER_CACHE),
|
|
101
|
+
"warm_models": sorted(_MODEL_WARM_STATE.keys()),
|
|
102
|
+
"frontier_calls_avoided_est": materialized_local,
|
|
103
|
+
"prompt_tokens_saved_est": est_prompt_tokens_saved,
|
|
104
|
+
"completion_tokens_saved_est": est_completion_tokens_saved,
|
|
105
|
+
"cost_saved_usd_est": round(est_cost_saved, 4),
|
|
106
|
+
**{k: int(v) for k, v in _LOCAL_INFER_STATS.items()},
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
|
|
36
110
|
def infer(prompt: str, provider_name: str | None = None) -> dict[str, str]:
|
|
37
111
|
if not isinstance(prompt, str) or not prompt.strip():
|
|
38
112
|
return {"status": "error", "error": "empty_prompt"}
|
|
39
113
|
|
|
40
114
|
use_ollama = os.environ.get("OCMEMOG_USE_OLLAMA", "").lower() in {"1", "true", "yes"}
|
|
41
115
|
model_override = provider_name or config.OCMEMOG_MEMORY_MODEL
|
|
42
|
-
if
|
|
116
|
+
if _looks_like_local_openai_model(model_override):
|
|
117
|
+
model = _normalize_local_model_name(model_override) or config.OCMEMOG_LOCAL_LLM_MODEL
|
|
118
|
+
return _infer_openai_compatible(
|
|
119
|
+
prompt,
|
|
120
|
+
base_url=config.OCMEMOG_LOCAL_LLM_BASE_URL,
|
|
121
|
+
model=model,
|
|
122
|
+
api_key=os.environ.get("OCMEMOG_LOCAL_LLM_API_KEY") or os.environ.get("LOCAL_LLM_API_KEY"),
|
|
123
|
+
provider_label="local-openai",
|
|
124
|
+
)
|
|
125
|
+
if use_ollama or _looks_like_ollama_model(model_override):
|
|
43
126
|
model = model_override.split(":", 1)[-1] if model_override.startswith("ollama:") else model_override
|
|
44
127
|
return _infer_ollama(prompt, model)
|
|
45
128
|
|
|
46
129
|
api_key = os.environ.get("OCMEMOG_OPENAI_API_KEY") or os.environ.get("OPENAI_API_KEY")
|
|
47
130
|
if not api_key:
|
|
48
|
-
|
|
49
|
-
|
|
131
|
+
return _infer_openai_compatible(
|
|
132
|
+
prompt,
|
|
133
|
+
base_url=config.OCMEMOG_LOCAL_LLM_BASE_URL,
|
|
134
|
+
model=config.OCMEMOG_LOCAL_LLM_MODEL,
|
|
135
|
+
api_key=os.environ.get("OCMEMOG_LOCAL_LLM_API_KEY") or os.environ.get("LOCAL_LLM_API_KEY"),
|
|
136
|
+
provider_label="local-openai",
|
|
137
|
+
)
|
|
50
138
|
|
|
51
139
|
model = model_override
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
req = urllib.request.Request(url, data=data, method="POST")
|
|
60
|
-
req.add_header("Authorization", f"Bearer {api_key}")
|
|
61
|
-
req.add_header("Content-Type", "application/json")
|
|
62
|
-
|
|
63
|
-
try:
|
|
64
|
-
with urllib.request.urlopen(req, timeout=30) as resp:
|
|
65
|
-
response = json.loads(resp.read().decode("utf-8"))
|
|
66
|
-
except Exception as exc:
|
|
67
|
-
emit_event(LOGFILE, "brain_infer_error", status="error", provider="openai", error=str(exc))
|
|
68
|
-
return {"status": "error", "error": f"request_failed:{exc}"}
|
|
69
|
-
|
|
70
|
-
try:
|
|
71
|
-
output = response["choices"][0]["message"]["content"]
|
|
72
|
-
except Exception as exc:
|
|
73
|
-
emit_event(LOGFILE, "brain_infer_error", status="error", provider="openai", error=str(exc))
|
|
74
|
-
return {"status": "error", "error": "invalid_response"}
|
|
75
|
-
|
|
76
|
-
return {"status": "ok", "output": str(output).strip()}
|
|
140
|
+
return _infer_openai_compatible(
|
|
141
|
+
prompt,
|
|
142
|
+
base_url=config.OCMEMOG_OPENAI_API_BASE,
|
|
143
|
+
model=model,
|
|
144
|
+
api_key=api_key,
|
|
145
|
+
provider_label="openai",
|
|
146
|
+
)
|
|
77
147
|
|
|
78
148
|
|
|
79
149
|
def parse_operator_name(text: str) -> dict[str, str] | None:
|