clawmem 0.10.1 → 0.10.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +1 -1
- package/CLAUDE.md +1 -1
- package/README.md +5 -1
- package/SKILL.md +1 -1
- package/package.json +1 -1
- package/src/amem.ts +428 -40
- package/src/clawmem.ts +1 -0
- package/src/hermes/__init__.py +30 -1
- package/src/llm.ts +59 -8
- package/src/openclaw/index.ts +15 -4
- package/src/openclaw/openclaw.plugin.json +27 -0
- package/src/openclaw/package.json +1 -1
package/AGENTS.md
CHANGED
|
@@ -739,7 +739,7 @@ clawmem focus clear --session-id abc123
|
|
|
739
739
|
- Consolidation worker (`CLAWMEM_ENABLE_CONSOLIDATION=true`) backfills unenriched docs with A-MEM notes + links. Only runs if the MCP process stays alive long enough to tick (every 5min).
|
|
740
740
|
- Beads integration: `syncBeadsIssues()` queries `bd` CLI (Dolt backend, v0.58.0+) for live issue data, creates markdown docs in `beads` collection, maps all dependency edge types into `memory_relations`, and triggers A-MEM enrichment for new docs. Watcher auto-triggers on `.beads/` directory changes; `beads_sync` MCP tool for manual sync. Requires `bd` binary on PATH or at `~/go/bin/bd`.
|
|
741
741
|
- HTTP REST API: `clawmem serve [--port 7438]` — optional REST server on localhost. Search, retrieval, lifecycle, and graph traversal. `POST /retrieve` mirrors `memory_retrieve` with auto-routing (keyword/semantic/causal/timeline/hybrid). `POST /search` provides direct mode selection. Bearer token auth via `CLAWMEM_API_TOKEN` env var (disabled if unset).
|
|
742
|
-
- OpenClaw memory plugin: `clawmem setup openclaw` — registers ClawMem as a native OpenClaw memory plugin (`kind: memory`, v0.10.0+). Lifecycle events on the plugin-hook bus: `before_prompt_build` is the **load-bearing** path — it runs prompt-aware retrieval (context-surfacing) AND the pre-emptive `precompact-extract` synchronously when token usage approaches the compaction threshold, so state is captured BEFORE the LLM call that could trigger compaction; `agent_end` runs decision-extractor + handoff-generator + feedback-loop in parallel (fire-and-forget at OpenClaw); `before_compaction` is **defense-in-depth fallback only** — fire-and-forget, races the compactor, exists for the rare case where the `before_prompt_build` proximity heuristic missed a sudden token jump; `session_start` registers the session and caches first-turn bootstrap context. Shares the same vault as Claude Code hooks (dual-mode). SQLite busy_timeout=5000ms for concurrent access safety.
|
|
742
|
+
- OpenClaw memory plugin: `clawmem setup openclaw` — registers ClawMem as a native OpenClaw memory plugin (`kind: memory`, v0.10.0+). Lifecycle events on the plugin-hook bus: `before_prompt_build` is the **load-bearing** path — it runs prompt-aware retrieval (context-surfacing) AND the pre-emptive `precompact-extract` synchronously when token usage approaches the compaction threshold, so state is captured BEFORE the LLM call that could trigger compaction; `agent_end` runs decision-extractor + handoff-generator + feedback-loop in parallel (fire-and-forget at OpenClaw, plus a 30s default void-hook timeout from OpenClaw v2026.4.26+ that logs slow handlers but does not cancel the underlying postrun work); `before_compaction` is **defense-in-depth fallback only** — fire-and-forget, races the compactor, exists for the rare case where the `before_prompt_build` proximity heuristic missed a sudden token jump; `session_start` registers the session and caches first-turn bootstrap context. Shares the same vault as Claude Code hooks (dual-mode). SQLite busy_timeout=5000ms for concurrent access safety.
|
|
743
743
|
- **§14.3 pure-memory migration (v0.10.0):** v0.10.0 drops the `ClawMemContextEngine` class entirely. Previous versions registered as `kind: context-engine` and implemented `assemble()`/`bootstrap()`/`afterTurn()`/`compact()` on a class. v0.10.0 registers as `kind: memory` and wires every lifecycle surface through plugin hooks on the event bus. Retrieval pipeline, composite scoring, vault format, and the 5 registered agent tools are unchanged — this is a packaging and registration change, not a behavioral one.
|
|
744
744
|
- **v2026.4.11 packaging fix (v0.10.0):** `src/openclaw/package.json` declares `openclaw.extensions: ["./index.ts"]` (required by v2026.4.11's discovery path), and `cmdSetupOpenClaw` defaults to `cpSync(..., { recursive: true, dereference: true })` because v2026.4.11's discoverer uses `readdirSync({ withFileTypes: true })` where symlink `isDirectory() === false`. A `--link` opt-in flag preserves the old symlink behavior for dev workflows with a warning.
|
|
745
745
|
- **v2026.4.18 synchronous-`register()` constraint:** OpenClaw v2026.4.18 (`fix(plugins): enforce synchronous registration`) throws `"plugin register must be synchronous"` if the plugin's `register()` function returns a Promise. ClawMem's `register(api)` in `src/openclaw/index.ts` is intentionally synchronous — all `await` work lives inside per-event handlers, never in registration itself. Companion change: register failures now atomically roll back side effects (globals, hook registrations, tool registrations), so any future throw inside `register()` will leave OpenClaw in a clean state. Keep the function synchronous and throw-free; do not add `async` or top-level `await`.
|
package/CLAUDE.md
CHANGED
|
@@ -739,7 +739,7 @@ clawmem focus clear --session-id abc123
|
|
|
739
739
|
- Consolidation worker (`CLAWMEM_ENABLE_CONSOLIDATION=true`) backfills unenriched docs with A-MEM notes + links. Only runs if the MCP process stays alive long enough to tick (every 5min).
|
|
740
740
|
- Beads integration: `syncBeadsIssues()` queries `bd` CLI (Dolt backend, v0.58.0+) for live issue data, creates markdown docs in `beads` collection, maps all dependency edge types into `memory_relations`, and triggers A-MEM enrichment for new docs. Watcher auto-triggers on `.beads/` directory changes; `beads_sync` MCP tool for manual sync. Requires `bd` binary on PATH or at `~/go/bin/bd`.
|
|
741
741
|
- HTTP REST API: `clawmem serve [--port 7438]` — optional REST server on localhost. Search, retrieval, lifecycle, and graph traversal. `POST /retrieve` mirrors `memory_retrieve` with auto-routing (keyword/semantic/causal/timeline/hybrid). `POST /search` provides direct mode selection. Bearer token auth via `CLAWMEM_API_TOKEN` env var (disabled if unset).
|
|
742
|
-
- OpenClaw memory plugin: `clawmem setup openclaw` — registers ClawMem as a native OpenClaw memory plugin (`kind: memory`, v0.10.0+). Lifecycle events on the plugin-hook bus: `before_prompt_build` is the **load-bearing** path — it runs prompt-aware retrieval (context-surfacing) AND the pre-emptive `precompact-extract` synchronously when token usage approaches the compaction threshold, so state is captured BEFORE the LLM call that could trigger compaction; `agent_end` runs decision-extractor + handoff-generator + feedback-loop in parallel (fire-and-forget at OpenClaw); `before_compaction` is **defense-in-depth fallback only** — fire-and-forget, races the compactor, exists for the rare case where the `before_prompt_build` proximity heuristic missed a sudden token jump; `session_start` registers the session and caches first-turn bootstrap context. Shares the same vault as Claude Code hooks (dual-mode). SQLite busy_timeout=5000ms for concurrent access safety.
|
|
742
|
+
- OpenClaw memory plugin: `clawmem setup openclaw` — registers ClawMem as a native OpenClaw memory plugin (`kind: memory`, v0.10.0+). Lifecycle events on the plugin-hook bus: `before_prompt_build` is the **load-bearing** path — it runs prompt-aware retrieval (context-surfacing) AND the pre-emptive `precompact-extract` synchronously when token usage approaches the compaction threshold, so state is captured BEFORE the LLM call that could trigger compaction; `agent_end` runs decision-extractor + handoff-generator + feedback-loop in parallel (fire-and-forget at OpenClaw, plus a 30s default void-hook timeout from OpenClaw v2026.4.26+ that logs slow handlers but does not cancel the underlying postrun work); `before_compaction` is **defense-in-depth fallback only** — fire-and-forget, races the compactor, exists for the rare case where the `before_prompt_build` proximity heuristic missed a sudden token jump; `session_start` registers the session and caches first-turn bootstrap context. Shares the same vault as Claude Code hooks (dual-mode). SQLite busy_timeout=5000ms for concurrent access safety.
|
|
743
743
|
- **§14.3 pure-memory migration (v0.10.0):** v0.10.0 drops the `ClawMemContextEngine` class entirely. Previous versions registered as `kind: context-engine` and implemented `assemble()`/`bootstrap()`/`afterTurn()`/`compact()` on a class. v0.10.0 registers as `kind: memory` and wires every lifecycle surface through plugin hooks on the event bus. Retrieval pipeline, composite scoring, vault format, and the 5 registered agent tools are unchanged — this is a packaging and registration change, not a behavioral one.
|
|
744
744
|
- **v2026.4.11 packaging fix (v0.10.0):** `src/openclaw/package.json` declares `openclaw.extensions: ["./index.ts"]` (required by v2026.4.11's discovery path), and `cmdSetupOpenClaw` defaults to `cpSync(..., { recursive: true, dereference: true })` because v2026.4.11's discoverer uses `readdirSync({ withFileTypes: true })` where symlink `isDirectory() === false`. A `--link` opt-in flag preserves the old symlink behavior for dev workflows with a warning.
|
|
745
745
|
- **v2026.4.18 synchronous-`register()` constraint:** OpenClaw v2026.4.18 (`fix(plugins): enforce synchronous registration`) throws `"plugin register must be synchronous"` if the plugin's `register()` function returns a Promise. ClawMem's `register(api)` in `src/openclaw/index.ts` is intentionally synchronous — all `await` work lives inside per-event handlers, never in registration itself. Companion change: register failures now atomically roll back side effects (globals, hook registrations, tool registrations), so any future throw inside `register()` will leave OpenClaw in a clean state. Keep the function synchronous and throw-free; do not add `async` or top-level `await`.
|
package/README.md
CHANGED
|
@@ -193,7 +193,7 @@ clawmem setup openclaw # Installs plugin into ~/.openclaw/extensions/clawmem (
|
|
|
193
193
|
|
|
194
194
|
**What the plugin provides:**
|
|
195
195
|
- **`before_prompt_build` hook (load-bearing)** - prompt-aware retrieval (context-surfacing + session-bootstrap) AND the pre-emptive `precompact-extract` run when token usage approaches the compaction threshold. This is the authoritative path for precompact state capture because it runs synchronously before the LLM call that would trigger compaction, so it cannot race the compactor.
|
|
196
|
-
- **`agent_end` hook** - decision extraction, handoff generation, feedback loop (parallel, fire-and-forget at the OpenClaw call site)
|
|
196
|
+
- **`agent_end` hook** - decision extraction, handoff generation, feedback loop (parallel, fire-and-forget at the OpenClaw call site). OpenClaw v2026.4.26+ also enforces a 30s default void-hook timeout on `agent_end` — slow handlers are logged but the underlying postrun work is not cancelled (fail-open).
|
|
197
197
|
- **`before_compaction` hook (defense-in-depth fallback)** - fires `precompact-extract` again for the rare case where `before_prompt_build`'s proximity heuristic missed a sudden token-count jump. Fire-and-forget at OpenClaw's call site, so it races the compactor and offers no correctness guarantee on its own — the `before_prompt_build` path is what actually holds the invariant.
|
|
198
198
|
- **`session_start` hook** - session registration + cached first-turn bootstrap context
|
|
199
199
|
- **5 agent tools** - `clawmem_search`, `clawmem_get`, `clawmem_session_log`, `clawmem_timeline`, `clawmem_similar`
|
|
@@ -332,6 +332,7 @@ If your GPU lives on a separate machine, point the env vars at it:
|
|
|
332
332
|
```bash
|
|
333
333
|
export CLAWMEM_EMBED_URL=http://gpu-host:8088
|
|
334
334
|
export CLAWMEM_LLM_URL=http://gpu-host:8089
|
|
335
|
+
export CLAWMEM_LLM_MODEL=qwen3
|
|
335
336
|
export CLAWMEM_RERANK_URL=http://gpu-host:8090
|
|
336
337
|
```
|
|
337
338
|
|
|
@@ -944,6 +945,9 @@ Notes referenced by the agent during a session get boosted (`access_count++`). U
|
|
|
944
945
|
| `CLAWMEM_EMBED_TPM_LIMIT` | `100000` | Tokens-per-minute limit for cloud embedding pacing. Match to your provider tier. |
|
|
945
946
|
| `CLAWMEM_EMBED_DIMENSIONS` | (none) | Output dimensions for OpenAI `text-embedding-3-*` Matryoshka models (e.g. `512`, `1024`). |
|
|
946
947
|
| `CLAWMEM_LLM_URL` | `http://localhost:8089` | LLM server URL for intent/query/A-MEM. Without it, falls to `node-llama-cpp` (if allowed). |
|
|
948
|
+
| `CLAWMEM_LLM_MODEL` | `qwen3` | Model name sent to the configured LLM endpoint. Override this for OpenAI-compatible proxies such as `gpt-5.4-mini`. |
|
|
949
|
+
| `CLAWMEM_LLM_REASONING_EFFORT` | (none) | Optional top-level `reasoning_effort` field for Chat Completions endpoints that support it (for example OpenAI reasoning models). Leave unset for llama-server/vLLM unless your serving stack explicitly accepts that field. |
|
|
950
|
+
| `CLAWMEM_LLM_NO_THINK` | `true` | Append `/no_think` to remote LLM prompts. Set to `false` for standard OpenAI models and other endpoints that reject or treat the Qwen-style suffix as literal prompt text. |
|
|
947
951
|
| `CLAWMEM_RERANK_URL` | `http://localhost:8090` | Reranker server URL. Without it, falls to `node-llama-cpp` (if allowed). |
|
|
948
952
|
| `CLAWMEM_NO_LOCAL_MODELS` | `false` | Block `node-llama-cpp` from auto-downloading GGUF models. Set `true` for remote-only setups where you want fail-fast on unreachable endpoints. |
|
|
949
953
|
| `CLAWMEM_MERGE_SCORE_NORMAL` | `0.93` | **v0.7.1.** Phase 2 consolidation merge-safety threshold when candidate and existing anchors align. Merges above this normalized 3-gram cosine score are allowed. |
|
package/SKILL.md
CHANGED
|
@@ -796,7 +796,7 @@ clawmem focus clear --session-id abc123
|
|
|
796
796
|
- Consolidation worker (`CLAWMEM_ENABLE_CONSOLIDATION=true`) backfills unenriched docs and runs Phase 2 merge / Phase 3 deductive synthesis. **v0.8.2:** hosted by either `clawmem watch` (long-lived, canonical) or `clawmem mcp` (per-session fallback); every tick acquires a `light-consolidation` `worker_leases` row before doing work, so dual-hosting against the same vault is safe.
|
|
797
797
|
- Beads integration: `syncBeadsIssues()` queries `bd` CLI (Dolt backend, v0.58.0+), creates markdown docs, maps dependency edges into `memory_relations`. Watcher auto-triggers on `.beads/` changes; `beads_sync` MCP for manual sync.
|
|
798
798
|
- HTTP REST API: `clawmem serve [--port 7438]` — optional REST server on localhost. Search, retrieval, lifecycle, and graph traversal. `POST /retrieve` mirrors `memory_retrieve` with auto-routing (keyword/semantic/causal/timeline/hybrid). `POST /search` provides direct mode selection. Bearer token auth via `CLAWMEM_API_TOKEN` env var (disabled if unset).
|
|
799
|
-
- OpenClaw memory plugin (v0.10.0+): `clawmem setup openclaw` — registers as native OpenClaw memory plugin (`kind: memory`). Dual-mode: shares vault with Claude Code hooks. Hook wiring on the plugin-hook bus: `before_prompt_build` is the **load-bearing** path — it runs prompt-aware retrieval AND the pre-emptive `precompact-extract` synchronously when token usage approaches the compaction threshold, so state is captured before the LLM call that could trigger compaction. `agent_end` runs decision-extractor + handoff-generator + feedback-loop in parallel (fire-and-forget at OpenClaw's call site). `before_compaction` is **defense-in-depth fallback only** — fire-and-forget, races the compactor, exists for the rare case where the proximity heuristic in `before_prompt_build` missed a sudden token jump. `session_start` registers the session + caches first-turn bootstrap context. The §14.3 migration removed the `ClawMemContextEngine` class and moved the plugin from the `context-engine` slot to the `memory` slot. Requires OpenClaw v2026.4.11+ (earlier versions do not support the new discovery contract).
|
|
799
|
+
- OpenClaw memory plugin (v0.10.0+): `clawmem setup openclaw` — registers as native OpenClaw memory plugin (`kind: memory`). Dual-mode: shares vault with Claude Code hooks. Hook wiring on the plugin-hook bus: `before_prompt_build` is the **load-bearing** path — it runs prompt-aware retrieval AND the pre-emptive `precompact-extract` synchronously when token usage approaches the compaction threshold, so state is captured before the LLM call that could trigger compaction. `agent_end` runs decision-extractor + handoff-generator + feedback-loop in parallel (fire-and-forget at OpenClaw's call site, plus a 30s default void-hook timeout from OpenClaw v2026.4.26+ that logs slow handlers but does not cancel the underlying postrun work). `before_compaction` is **defense-in-depth fallback only** — fire-and-forget, races the compactor, exists for the rare case where the proximity heuristic in `before_prompt_build` missed a sudden token jump. `session_start` registers the session + caches first-turn bootstrap context. The §14.3 migration removed the `ClawMemContextEngine` class and moved the plugin from the `context-engine` slot to the `memory` slot. Requires OpenClaw v2026.4.11+ (earlier versions do not support the new discovery contract).
|
|
800
800
|
- Hermes Agent MemoryProvider plugin: `src/hermes/` — Python plugin for Hermes's memory system. Shell-out hooks for lifecycle (prefetch, extraction, precompact), REST API for tools. Plugin-managed transcript JSONL bridges Hermes turn pairs to ClawMem file format. Shares vault with Claude Code and OpenClaw. **Preferred install path:** `$HERMES_HOME/plugins/clawmem/` (Hermes #10529 user-plugin discovery, v2026.4.13+) — survives `git pull` of hermes-agent. The bundled `hermes-agent/plugins/memory/clawmem/` path still works. **Agent-context isolation:** read-side hooks always run; write-side surfaces (`sync_turn`, `on_session_end`, `on_pre_compress`) early-return when `agent_context != "primary"` so cron/subagent state never reaches the vault.
|
|
801
801
|
|
|
802
802
|
## Tool Selection (one-liner)
|
package/package.json
CHANGED
package/src/amem.ts
CHANGED
|
@@ -22,50 +22,243 @@ const EMPTY_NOTE: MemoryNote = {
|
|
|
22
22
|
context: ""
|
|
23
23
|
};
|
|
24
24
|
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
25
|
+
function uniqueStrings(values: string[]): string[] {
|
|
26
|
+
const seen = new Set<string>();
|
|
27
|
+
const out: string[] = [];
|
|
28
|
+
for (const value of values) {
|
|
29
|
+
const trimmed = value.trim();
|
|
30
|
+
if (!trimmed || seen.has(trimmed)) continue;
|
|
31
|
+
seen.add(trimmed);
|
|
32
|
+
out.push(trimmed);
|
|
33
|
+
}
|
|
34
|
+
return out;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
type LinkRelationType = 'semantic' | 'supporting' | 'contradicts';
|
|
38
|
+
|
|
39
|
+
type ParsedLinkGeneration = {
|
|
40
|
+
target_idx: number;
|
|
41
|
+
link_type: LinkRelationType;
|
|
42
|
+
confidence: number;
|
|
43
|
+
reasoning: string;
|
|
44
|
+
};
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
function isLinkRelationType(value: unknown): value is LinkRelationType {
|
|
48
|
+
return value === 'semantic' || value === 'supporting' || value === 'contradicts';
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
function isUnitIntervalNumber(value: unknown): value is number {
|
|
52
|
+
return typeof value === 'number' && Number.isFinite(value) && value >= 0 && value <= 1;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
function isParsedLinkGeneration(value: unknown): value is ParsedLinkGeneration {
|
|
56
|
+
if (!value || typeof value !== 'object') return false;
|
|
57
|
+
const link = value as Record<string, unknown>;
|
|
58
|
+
return Number.isInteger(link.target_idx) &&
|
|
59
|
+
(link.target_idx as number) > 0 &&
|
|
60
|
+
isLinkRelationType(link.link_type) &&
|
|
61
|
+
isUnitIntervalNumber(link.confidence) &&
|
|
62
|
+
typeof link.reasoning === 'string';
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
export function parseMemoryNoteFromLLM(raw: string): MemoryNote | null {
|
|
66
|
+
const parsed = extractJsonFromLLM(raw) as Partial<MemoryNote> | null;
|
|
67
|
+
if (parsed && Array.isArray(parsed.keywords)) {
|
|
68
|
+
return {
|
|
69
|
+
keywords: parsed.keywords.filter((v): v is string => typeof v === 'string'),
|
|
70
|
+
tags: Array.isArray(parsed.tags) ? parsed.tags.filter((v): v is string => typeof v === 'string') : [],
|
|
71
|
+
context: typeof parsed.context === 'string' ? parsed.context : '',
|
|
72
|
+
};
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
const lines = raw.split(/\r?\n/).map((line) => line.trim()).filter(Boolean);
|
|
76
|
+
const keywords = uniqueStrings(lines.filter((line) => line.startsWith('lex:')).map((line) => line.slice(4).trim()));
|
|
77
|
+
const context = lines.find((line) => line.startsWith('hyde:'))?.slice(5).trim() ?? '';
|
|
78
|
+
if (keywords.length === 0 && !context) {
|
|
79
|
+
return null;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
return {
|
|
83
|
+
keywords,
|
|
84
|
+
tags: [],
|
|
85
|
+
context,
|
|
86
|
+
};
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
export function parseLinkGenerationFromLLM(raw: string): ParsedLinkGeneration[] | null {
|
|
90
|
+
const parsed = extractJsonFromLLM(raw) as { result?: unknown } | unknown[] | null;
|
|
91
|
+
const wrapped = parsed && typeof parsed === 'object' ? parsed as { result?: unknown } : null;
|
|
92
|
+
const items = Array.isArray(parsed)
|
|
93
|
+
? parsed
|
|
94
|
+
: wrapped && Array.isArray(wrapped.result)
|
|
95
|
+
? wrapped.result
|
|
96
|
+
: null;
|
|
97
|
+
|
|
98
|
+
if (!items) return null;
|
|
99
|
+
const validItems = items.filter(isParsedLinkGeneration);
|
|
100
|
+
return validItems.length === items.length ? validItems : null;
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
function tryParseJsonWithCommaRepair(text: string): any | null {
|
|
104
|
+
// Repair missing commas between object fields without rewriting adjacent array strings.
|
|
105
|
+
const repaired = text.replace(
|
|
106
|
+
/(\]|\}|"(?:[^"\\]|\\.)*"|-?\d(?:[\d.eE+-])*|true|false|null)(\s*"[^"\n]+"\s*:)/g,
|
|
107
|
+
'$1,$2'
|
|
108
|
+
);
|
|
109
|
+
if (repaired === text) return null;
|
|
110
|
+
try {
|
|
111
|
+
return JSON.parse(repaired);
|
|
112
|
+
} catch {
|
|
113
|
+
return null;
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
function extractBalancedJsonCandidate(text: string): string | null {
|
|
118
|
+
if (text[0] !== '{' && text[0] !== '[') return null;
|
|
119
|
+
|
|
120
|
+
const stack: string[] = [text[0]!];
|
|
121
|
+
let inString = false;
|
|
122
|
+
let escaped = false;
|
|
123
|
+
|
|
124
|
+
for (let i = 1; i < text.length; i++) {
|
|
125
|
+
const ch = text[i]!;
|
|
126
|
+
if (inString) {
|
|
127
|
+
if (escaped) {
|
|
128
|
+
escaped = false;
|
|
129
|
+
} else if (ch === '\\') {
|
|
130
|
+
escaped = true;
|
|
131
|
+
} else if (ch === '"') {
|
|
132
|
+
inString = false;
|
|
133
|
+
}
|
|
134
|
+
continue;
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
if (ch === '"') {
|
|
138
|
+
inString = true;
|
|
139
|
+
continue;
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
if (ch === '{' || ch === '[') {
|
|
143
|
+
stack.push(ch);
|
|
144
|
+
continue;
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
if (ch === '}' || ch === ']') {
|
|
148
|
+
const expected = ch === '}' ? '{' : '[';
|
|
149
|
+
if (stack[stack.length - 1] !== expected) return null;
|
|
150
|
+
stack.pop();
|
|
151
|
+
if (stack.length === 0) return text.slice(0, i + 1);
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
return null;
|
|
156
|
+
}
|
|
33
157
|
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
158
|
+
function parseBalancedJsonValue(candidate: string): any | null {
|
|
159
|
+
try {
|
|
160
|
+
return JSON.parse(candidate);
|
|
161
|
+
} catch {
|
|
162
|
+
return tryParseJsonWithCommaRepair(candidate);
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
function jsonStartsAtTrimmedLineStart(text: string, index: number): boolean {
|
|
167
|
+
const lineStart = text.lastIndexOf('\n', index - 1) + 1;
|
|
168
|
+
return text.slice(lineStart, index).trim().length === 0;
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
function findLineStartJsonAfter(text: string, index: number): number {
|
|
172
|
+
for (let i = index; i < text.length; i++) {
|
|
173
|
+
if ((text[i] === '{' || text[i] === '[') && jsonStartsAtTrimmedLineStart(text, i)) return i;
|
|
174
|
+
}
|
|
175
|
+
return -1;
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
function isLikelyInlineProseLiteral(text: string, index: number): boolean {
|
|
179
|
+
return !jsonStartsAtTrimmedLineStart(text, index) && !hasPayloadCueBefore(text, index);
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
function collectParseableBalancedJsonCandidates(
|
|
183
|
+
text: string,
|
|
184
|
+
startIndex: number
|
|
185
|
+
): Array<{ start: number; parsed: any }> {
|
|
186
|
+
const candidates: Array<{ start: number; parsed: any }> = [];
|
|
187
|
+
for (let i = startIndex; i < text.length; i++) {
|
|
188
|
+
if (text[i] !== '{' && text[i] !== '[') continue;
|
|
189
|
+
|
|
190
|
+
const balancedCandidate = extractBalancedJsonCandidate(text.slice(i));
|
|
191
|
+
if (!balancedCandidate) continue;
|
|
192
|
+
|
|
193
|
+
const parsed = parseBalancedJsonValue(balancedCandidate);
|
|
194
|
+
if (parsed !== null) candidates.push({ start: i, parsed });
|
|
195
|
+
|
|
196
|
+
i += balancedCandidate.length - 1;
|
|
197
|
+
}
|
|
198
|
+
return candidates;
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
function selectBalancedJsonCandidate(text: string, candidates: Array<{ start: number; parsed: any }>): any | null {
|
|
202
|
+
if (candidates.length === 0) return null;
|
|
203
|
+
|
|
204
|
+
const payloadCandidate = candidates.find((candidate) => hasPayloadCueBefore(text, candidate.start));
|
|
205
|
+
if (payloadCandidate) return payloadCandidate.parsed;
|
|
206
|
+
|
|
207
|
+
const first = candidates[0]!;
|
|
208
|
+
if (candidates.length > 1 && (hasExampleCueBefore(text, first.start) || isLikelyInlineProseLiteral(text, first.start))) {
|
|
209
|
+
const laterPayload = candidates.find((candidate) =>
|
|
210
|
+
!hasExampleCueBefore(text, candidate.start) && !isLikelyInlineProseLiteral(text, candidate.start)
|
|
211
|
+
);
|
|
212
|
+
if (laterPayload) return laterPayload.parsed;
|
|
38
213
|
}
|
|
39
214
|
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
215
|
+
return first.parsed;
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
function parseJsonCandidate(raw: string): any | null {
|
|
219
|
+
const trimmed = raw.trim();
|
|
220
|
+
const arrStart = trimmed.indexOf('[');
|
|
221
|
+
const objStart = trimmed.indexOf('{');
|
|
43
222
|
if (arrStart === -1 && objStart === -1) return null;
|
|
44
223
|
|
|
45
224
|
const start = arrStart === -1 ? objStart : objStart === -1 ? arrStart : Math.min(arrStart, objStart);
|
|
46
|
-
text =
|
|
225
|
+
const text = trimmed.slice(start);
|
|
47
226
|
|
|
48
|
-
// Try parsing as-is first
|
|
49
227
|
try {
|
|
50
228
|
return JSON.parse(text);
|
|
51
229
|
} catch {
|
|
52
|
-
//
|
|
230
|
+
// Try extracting balanced JSON values before lighter repairs.
|
|
53
231
|
}
|
|
54
232
|
|
|
55
|
-
|
|
233
|
+
const firstBalancedCandidate = extractBalancedJsonCandidate(text);
|
|
234
|
+
if (firstBalancedCandidate) {
|
|
235
|
+
if (hasExampleCueBefore(trimmed, start) || isLikelyInlineProseLiteral(trimmed, start)) {
|
|
236
|
+
const laterLineStartJson = findLineStartJsonAfter(trimmed, start + firstBalancedCandidate.length);
|
|
237
|
+
if (laterLineStartJson !== -1) {
|
|
238
|
+
const laterParsed = parseJsonCandidate(trimmed.slice(laterLineStartJson));
|
|
239
|
+
if (laterParsed !== null) return laterParsed;
|
|
240
|
+
}
|
|
241
|
+
}
|
|
242
|
+
const balancedParsed = selectBalancedJsonCandidate(
|
|
243
|
+
trimmed,
|
|
244
|
+
collectParseableBalancedJsonCandidates(trimmed, start)
|
|
245
|
+
);
|
|
246
|
+
if (balancedParsed !== null) return balancedParsed;
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
const commaRepaired = tryParseJsonWithCommaRepair(text);
|
|
250
|
+
if (commaRepaired !== null) return commaRepaired;
|
|
251
|
+
|
|
56
252
|
if (text.startsWith('[')) {
|
|
57
253
|
const lastBrace = text.lastIndexOf('}');
|
|
58
254
|
if (lastBrace > 0) {
|
|
59
255
|
const repaired = text.slice(0, lastBrace + 1) + ']';
|
|
60
256
|
try { return JSON.parse(repaired); } catch { /* continue */ }
|
|
61
257
|
}
|
|
62
|
-
// Might be an empty or trivial array
|
|
63
258
|
try { return JSON.parse(text.replace(/,\s*$/, '') + ']'); } catch { /* continue */ }
|
|
64
259
|
}
|
|
65
260
|
|
|
66
|
-
// Repair truncated objects: find last complete value, close the object
|
|
67
261
|
if (text.startsWith('{')) {
|
|
68
|
-
// Try closing at each } from the end
|
|
69
262
|
for (let i = text.length - 1; i > 0; i--) {
|
|
70
263
|
if (text[i] === '}' || text[i] === '"' || text[i] === '0' || text[i] === '1' ||
|
|
71
264
|
text[i] === '2' || text[i] === '3' || text[i] === '4' || text[i] === '5' ||
|
|
@@ -80,6 +273,201 @@ export function extractJsonFromLLM(raw: string): any | null {
|
|
|
80
273
|
return null;
|
|
81
274
|
}
|
|
82
275
|
|
|
276
|
+
function collectFenceBlocks(text: string): Array<{ start: number; end: number; tag: string | null; body: string }> {
|
|
277
|
+
const lines = text.split('\n');
|
|
278
|
+
const fences: Array<{ start: number; end: number; tag: string | null; body: string }> = [];
|
|
279
|
+
let offset = 0;
|
|
280
|
+
let open: { start: number; tag: string | null; bodyLines: string[] } | null = null;
|
|
281
|
+
|
|
282
|
+
for (const line of lines) {
|
|
283
|
+
const trimmed = line.trim();
|
|
284
|
+
const lineStart = offset;
|
|
285
|
+
const lineEnd = offset + line.length;
|
|
286
|
+
|
|
287
|
+
if (!open) {
|
|
288
|
+
const match = trimmed.match(/^```([^\s`]*)?\s*$/);
|
|
289
|
+
if (match) {
|
|
290
|
+
open = { start: lineStart, tag: match[1] || null, bodyLines: [] };
|
|
291
|
+
}
|
|
292
|
+
} else if (trimmed === '```') {
|
|
293
|
+
fences.push({
|
|
294
|
+
start: open.start,
|
|
295
|
+
end: Math.min(text.length, lineEnd + 1),
|
|
296
|
+
tag: open.tag,
|
|
297
|
+
body: open.bodyLines.join('\n').trim(),
|
|
298
|
+
});
|
|
299
|
+
open = null;
|
|
300
|
+
} else {
|
|
301
|
+
open.bodyLines.push(line);
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
offset = lineEnd + 1;
|
|
305
|
+
}
|
|
306
|
+
|
|
307
|
+
if (open) {
|
|
308
|
+
fences.push({
|
|
309
|
+
start: open.start,
|
|
310
|
+
end: text.length,
|
|
311
|
+
tag: open.tag,
|
|
312
|
+
body: open.bodyLines.join('\n').trim(),
|
|
313
|
+
});
|
|
314
|
+
}
|
|
315
|
+
|
|
316
|
+
return fences;
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
function stripAnyFences(text: string): string {
|
|
320
|
+
const ranges = collectAnyFenceRanges(text);
|
|
321
|
+
if (ranges.length === 0) return text.trim();
|
|
322
|
+
|
|
323
|
+
let out = '';
|
|
324
|
+
let cursor = 0;
|
|
325
|
+
for (const range of ranges) {
|
|
326
|
+
out += text.slice(cursor, range.start);
|
|
327
|
+
cursor = range.end;
|
|
328
|
+
}
|
|
329
|
+
out += text.slice(cursor);
|
|
330
|
+
return out.trim();
|
|
331
|
+
}
|
|
332
|
+
|
|
333
|
+
function collectStructuralFences(text: string): Array<{ body: string; isJson: boolean; start: number; end: number }> {
|
|
334
|
+
return collectFenceBlocks(text)
|
|
335
|
+
.filter((fence) => fence.tag === null || fence.tag === 'json')
|
|
336
|
+
.map((fence) => ({
|
|
337
|
+
body: fence.body,
|
|
338
|
+
isJson: fence.tag === 'json',
|
|
339
|
+
start: fence.start,
|
|
340
|
+
end: fence.end,
|
|
341
|
+
}));
|
|
342
|
+
}
|
|
343
|
+
|
|
344
|
+
function collectAnyFenceRanges(text: string): Array<{ start: number; end: number }> {
|
|
345
|
+
return collectFenceBlocks(text).map((fence) => ({ start: fence.start, end: fence.end }));
|
|
346
|
+
}
|
|
347
|
+
|
|
348
|
+
function findFirstJsonStartOutsideFences(
|
|
349
|
+
text: string,
|
|
350
|
+
fences: Array<{ start: number; end: number }>
|
|
351
|
+
): number {
|
|
352
|
+
let fenceIndex = 0;
|
|
353
|
+
for (let i = 0; i < text.length; i++) {
|
|
354
|
+
while (fenceIndex < fences.length && i >= fences[fenceIndex]!.end) {
|
|
355
|
+
fenceIndex++;
|
|
356
|
+
}
|
|
357
|
+
if (fenceIndex < fences.length) {
|
|
358
|
+
const fence = fences[fenceIndex]!;
|
|
359
|
+
if (i >= fence.start && i < fence.end) {
|
|
360
|
+
i = fence.end - 1;
|
|
361
|
+
continue;
|
|
362
|
+
}
|
|
363
|
+
}
|
|
364
|
+
if (text[i] === '[' || text[i] === '{') return i;
|
|
365
|
+
}
|
|
366
|
+
return -1;
|
|
367
|
+
}
|
|
368
|
+
|
|
369
|
+
function hasExampleCueBefore(text: string, index: number): boolean {
|
|
370
|
+
let end = index;
|
|
371
|
+
while (end > 0 && /\s/.test(text[end - 1]!)) end--;
|
|
372
|
+
const lineStart = text.lastIndexOf('\n', end - 1) + 1;
|
|
373
|
+
const cue = text.slice(lineStart, end).toLowerCase();
|
|
374
|
+
return cue.includes('example') || cue.includes('e.g.') || cue.includes('schema');
|
|
375
|
+
}
|
|
376
|
+
|
|
377
|
+
function hasPayloadCueBefore(text: string, index: number): boolean {
|
|
378
|
+
let end = index;
|
|
379
|
+
while (end > 0 && /\s/.test(text[end - 1]!)) end--;
|
|
380
|
+
const lineStart = text.lastIndexOf('\n', end - 1) + 1;
|
|
381
|
+
const cue = text.slice(lineStart, end).trim().toLowerCase();
|
|
382
|
+
return /^(actual|result|final|answer)(?:\s+(json|answer|response|payload))?[:\-]?$/.test(cue);
|
|
383
|
+
}
|
|
384
|
+
/**
|
|
385
|
+
* Extract and parse JSON from LLM output, handling:
|
|
386
|
+
* - Markdown code blocks (```json ... ```)
|
|
387
|
+
* - Leading/trailing prose around JSON
|
|
388
|
+
* - Truncated JSON from token limits (repairs arrays/objects)
|
|
389
|
+
*/
|
|
390
|
+
export function extractJsonFromLLM(raw: string): any | null {
|
|
391
|
+
const text = raw.trim();
|
|
392
|
+
if (!text) return null;
|
|
393
|
+
|
|
394
|
+
const fences = collectStructuralFences(text);
|
|
395
|
+
const jsonFences = fences.filter((fence) => fence.isJson);
|
|
396
|
+
const anyFenceRanges = collectAnyFenceRanges(text);
|
|
397
|
+
const outsideJsonStart = findFirstJsonStartOutsideFences(text, anyFenceRanges);
|
|
398
|
+
const outsideLooksLikeExample = outsideJsonStart !== -1 && hasExampleCueBefore(text, outsideJsonStart);
|
|
399
|
+
const outsideLooksLikePayload = outsideJsonStart !== -1 && hasPayloadCueBefore(text, outsideJsonStart);
|
|
400
|
+
const preferredJsonFences = jsonFences.filter((fence) =>
|
|
401
|
+
!hasExampleCueBefore(text, fence.start) &&
|
|
402
|
+
!(text.startsWith('```') && fence.start === fences[0]?.start && outsideLooksLikePayload)
|
|
403
|
+
);
|
|
404
|
+
const preferredUntaggedFences = fences.filter((fence) => !fence.isJson && !hasExampleCueBefore(text, fence.start));
|
|
405
|
+
const firstPreferredJsonFence = preferredJsonFences[0] ?? null;
|
|
406
|
+
const firstPreferredUntaggedFence = preferredUntaggedFences[0] ?? null;
|
|
407
|
+
const untaggedFenceLooksLikeExample = firstPreferredUntaggedFence ? hasExampleCueBefore(text, firstPreferredUntaggedFence.start) : false;
|
|
408
|
+
const outsidePrecedesPreferredJsonFence = !!firstPreferredJsonFence && outsideJsonStart < firstPreferredJsonFence.start;
|
|
409
|
+
const tryOutsideBeforeJsonFences = outsideJsonStart !== -1 &&
|
|
410
|
+
!outsideLooksLikeExample &&
|
|
411
|
+
(!outsidePrecedesPreferredJsonFence || outsideLooksLikePayload);
|
|
412
|
+
|
|
413
|
+
if (text.startsWith('```') && fences[0]?.start === 0 && !outsideLooksLikePayload && (fences[0]!.isJson || preferredJsonFences.length === 0)) {
|
|
414
|
+
const parsedLeadingFence = parseJsonCandidate(fences[0]!.body);
|
|
415
|
+
if (parsedLeadingFence !== null) return parsedLeadingFence;
|
|
416
|
+
}
|
|
417
|
+
|
|
418
|
+
const tryOutsideFences = () => {
|
|
419
|
+
const withoutFences = stripAnyFences(text);
|
|
420
|
+
if (!withoutFences || withoutFences === text) return null;
|
|
421
|
+
return parseJsonCandidate(withoutFences);
|
|
422
|
+
};
|
|
423
|
+
|
|
424
|
+
if (!text.startsWith('```') && !firstPreferredJsonFence && firstPreferredUntaggedFence && outsideLooksLikeExample && !untaggedFenceLooksLikeExample) {
|
|
425
|
+
const parsedUntaggedFence = parseJsonCandidate(firstPreferredUntaggedFence.body);
|
|
426
|
+
if (parsedUntaggedFence !== null) return parsedUntaggedFence;
|
|
427
|
+
}
|
|
428
|
+
|
|
429
|
+
if (tryOutsideBeforeJsonFences) {
|
|
430
|
+
const parsedOutsideFences = tryOutsideFences();
|
|
431
|
+
if (parsedOutsideFences !== null) return parsedOutsideFences;
|
|
432
|
+
}
|
|
433
|
+
|
|
434
|
+
for (const fence of preferredJsonFences) {
|
|
435
|
+
const parsedJsonFence = parseJsonCandidate(fence.body);
|
|
436
|
+
if (parsedJsonFence !== null) return parsedJsonFence;
|
|
437
|
+
}
|
|
438
|
+
|
|
439
|
+
if (!tryOutsideBeforeJsonFences) {
|
|
440
|
+
const parsedOutsideFences = tryOutsideFences();
|
|
441
|
+
if (parsedOutsideFences !== null) return parsedOutsideFences;
|
|
442
|
+
}
|
|
443
|
+
|
|
444
|
+
if (fences.length === 0) {
|
|
445
|
+
const parsedRaw = parseJsonCandidate(text);
|
|
446
|
+
if (parsedRaw !== null) return parsedRaw;
|
|
447
|
+
}
|
|
448
|
+
|
|
449
|
+
const fallbackFences = preferredJsonFences.length === 0
|
|
450
|
+
? [
|
|
451
|
+
...preferredUntaggedFences,
|
|
452
|
+
...jsonFences.filter((fence) => hasExampleCueBefore(text, fence.start)),
|
|
453
|
+
...(text.startsWith('```')
|
|
454
|
+
? fences.slice(1).filter((fence) => !fence.isJson && hasExampleCueBefore(text, fence.start))
|
|
455
|
+
: fences.filter((fence) => !fence.isJson && hasExampleCueBefore(text, fence.start))),
|
|
456
|
+
]
|
|
457
|
+
: [
|
|
458
|
+
...jsonFences.filter((fence) => hasExampleCueBefore(text, fence.start)),
|
|
459
|
+
...(text.startsWith('```')
|
|
460
|
+
? fences.slice(1).filter((fence) => !fence.isJson)
|
|
461
|
+
: fences.filter((fence) => !fence.isJson)),
|
|
462
|
+
];
|
|
463
|
+
for (const fence of fallbackFences) {
|
|
464
|
+
const parsedFence = parseJsonCandidate(fence.body);
|
|
465
|
+
if (parsedFence !== null) return parsedFence;
|
|
466
|
+
}
|
|
467
|
+
|
|
468
|
+
return null;
|
|
469
|
+
}
|
|
470
|
+
|
|
83
471
|
/**
|
|
84
472
|
* Construct a memory note for a document using LLM analysis.
|
|
85
473
|
* Extracts keywords, tags, and context summary.
|
|
@@ -142,9 +530,11 @@ Return ONLY valid JSON in this exact format:
|
|
|
142
530
|
return EMPTY_NOTE;
|
|
143
531
|
}
|
|
144
532
|
|
|
145
|
-
const parsed =
|
|
533
|
+
const parsed = parseMemoryNoteFromLLM(result.text);
|
|
146
534
|
|
|
147
|
-
if (!parsed
|
|
535
|
+
if (!parsed) {
|
|
536
|
+
console.log(`[amem] RAW memory note output for docId ${docId}:`);
|
|
537
|
+
console.log(result.text);
|
|
148
538
|
console.log(`[amem] Invalid/unparseable JSON for docId ${docId}`);
|
|
149
539
|
return EMPTY_NOTE;
|
|
150
540
|
}
|
|
@@ -302,34 +692,32 @@ Include all ${neighbors.length} neighbors in your response.`;
|
|
|
302
692
|
return 0;
|
|
303
693
|
}
|
|
304
694
|
|
|
305
|
-
const parsed =
|
|
306
|
-
target_idx: number;
|
|
307
|
-
link_type: 'semantic' | 'supporting' | 'contradicts';
|
|
308
|
-
confidence: number;
|
|
309
|
-
reasoning: string;
|
|
310
|
-
}> | null;
|
|
695
|
+
const parsed = parseLinkGenerationFromLLM(result.text);
|
|
311
696
|
|
|
312
|
-
if (!
|
|
697
|
+
if (!parsed) {
|
|
698
|
+
console.log(`[amem] RAW link generation output for docId ${docId}:`);
|
|
699
|
+
console.log(result.text);
|
|
313
700
|
console.log(`[amem] Invalid/unparseable JSON for link generation docId ${docId}`);
|
|
314
701
|
return 0;
|
|
315
702
|
}
|
|
316
703
|
|
|
704
|
+
|
|
317
705
|
// Insert links into memory_relations
|
|
318
706
|
let linksCreated = 0;
|
|
319
707
|
const now = new Date().toISOString();
|
|
708
|
+
const linkedTargetIndexes = new Set<number>();
|
|
320
709
|
|
|
321
710
|
for (const link of parsed) {
|
|
322
|
-
|
|
323
|
-
if (
|
|
324
|
-
|
|
325
|
-
link.target_idx > neighbors.length ||
|
|
326
|
-
!['semantic', 'supporting', 'contradicts'].includes(link.link_type) ||
|
|
327
|
-
typeof link.confidence !== 'number') {
|
|
711
|
+
const neighbor = neighbors[link.target_idx - 1];
|
|
712
|
+
if (!neighbor) {
|
|
713
|
+
console.log(`[amem] Skipping out-of-range link target ${link.target_idx} for docId ${docId}`);
|
|
328
714
|
continue;
|
|
329
715
|
}
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
716
|
+
if (linkedTargetIndexes.has(link.target_idx)) {
|
|
717
|
+
console.log(`[amem] Skipping duplicate link target ${link.target_idx} for docId ${docId}`);
|
|
718
|
+
continue;
|
|
719
|
+
}
|
|
720
|
+
linkedTargetIndexes.add(link.target_idx);
|
|
333
721
|
|
|
334
722
|
// Insert link with INSERT OR IGNORE for idempotency
|
|
335
723
|
store.db.prepare(`
|
package/src/clawmem.ts
CHANGED
|
@@ -1491,6 +1491,7 @@ async function cmdSetupOpenClaw(args: string[]) {
|
|
|
1491
1491
|
console.log(` 3. Configure GPU endpoints (if not using defaults):`);
|
|
1492
1492
|
console.log(` ${c.cyan}openclaw config set plugins.entries.clawmem.config.gpuEmbed http://YOUR_GPU:8088${c.reset}`);
|
|
1493
1493
|
console.log(` ${c.cyan}openclaw config set plugins.entries.clawmem.config.gpuLlm http://YOUR_GPU:8089${c.reset}`);
|
|
1494
|
+
console.log(` ${c.cyan}openclaw config set plugins.entries.clawmem.config.gpuLlmModel qwen3${c.reset}`);
|
|
1494
1495
|
console.log(` ${c.cyan}openclaw config set plugins.entries.clawmem.config.gpuRerank http://YOUR_GPU:8090${c.reset}`);
|
|
1495
1496
|
console.log();
|
|
1496
1497
|
console.log(` 4. Start the REST API (for agent tools):`);
|
package/src/hermes/__init__.py
CHANGED
|
@@ -15,6 +15,9 @@ Config via environment variables:
|
|
|
15
15
|
CLAWMEM_PROFILE — Retrieval profile: speed, balanced, deep (default: balanced)
|
|
16
16
|
CLAWMEM_EMBED_URL — GPU embedding server URL (optional)
|
|
17
17
|
CLAWMEM_LLM_URL — GPU LLM server URL (optional)
|
|
18
|
+
CLAWMEM_LLM_MODEL — Model name sent to the GPU/cloud LLM endpoint (optional)
|
|
19
|
+
CLAWMEM_LLM_REASONING_EFFORT — Top-level reasoning_effort for supporting Chat Completions endpoints (optional)
|
|
20
|
+
CLAWMEM_LLM_NO_THINK — Append /no_think to remote prompts; false disables it for standard OpenAI models (optional)
|
|
18
21
|
CLAWMEM_RERANK_URL — GPU reranker server URL (optional)
|
|
19
22
|
|
|
20
23
|
Agent-context isolation:
|
|
@@ -295,6 +298,24 @@ class ClawMemProvider(MemoryProvider):
|
|
|
295
298
|
"secret": False,
|
|
296
299
|
"env_var": "CLAWMEM_LLM_URL",
|
|
297
300
|
},
|
|
301
|
+
{
|
|
302
|
+
"key": "llm_model",
|
|
303
|
+
"description": "Model name sent to the GPU LLM server (e.g., qwen3, gpt-5.4-mini)",
|
|
304
|
+
"secret": False,
|
|
305
|
+
"env_var": "CLAWMEM_LLM_MODEL",
|
|
306
|
+
},
|
|
307
|
+
{
|
|
308
|
+
"key": "llm_reasoning_effort",
|
|
309
|
+
"description": "Optional top-level reasoning_effort for Chat Completions endpoints that support it",
|
|
310
|
+
"secret": False,
|
|
311
|
+
"env_var": "CLAWMEM_LLM_REASONING_EFFORT",
|
|
312
|
+
},
|
|
313
|
+
{
|
|
314
|
+
"key": "llm_no_think",
|
|
315
|
+
"description": "Append /no_think to remote LLM prompts; disable for standard OpenAI models",
|
|
316
|
+
"secret": False,
|
|
317
|
+
"env_var": "CLAWMEM_LLM_NO_THINK",
|
|
318
|
+
},
|
|
298
319
|
]
|
|
299
320
|
|
|
300
321
|
# -- Core lifecycle --------------------------------------------------------
|
|
@@ -324,7 +345,15 @@ class ClawMemProvider(MemoryProvider):
|
|
|
324
345
|
)
|
|
325
346
|
|
|
326
347
|
# Build env for hook shell-outs (GPU endpoints, profile)
|
|
327
|
-
for var in (
|
|
348
|
+
for var in (
|
|
349
|
+
"CLAWMEM_EMBED_URL",
|
|
350
|
+
"CLAWMEM_LLM_URL",
|
|
351
|
+
"CLAWMEM_LLM_MODEL",
|
|
352
|
+
"CLAWMEM_LLM_REASONING_EFFORT",
|
|
353
|
+
"CLAWMEM_LLM_NO_THINK",
|
|
354
|
+
"CLAWMEM_RERANK_URL",
|
|
355
|
+
"CLAWMEM_PROFILE",
|
|
356
|
+
):
|
|
328
357
|
val = os.environ.get(var)
|
|
329
358
|
if val:
|
|
330
359
|
self._env_extra[var] = val
|
package/src/llm.ts
CHANGED
|
@@ -237,6 +237,23 @@ export type LlamaCppConfig = {
|
|
|
237
237
|
* When set, generate() calls /v1/chat/completions instead of local node-llama-cpp.
|
|
238
238
|
*/
|
|
239
239
|
remoteLlmUrl?: string;
|
|
240
|
+
/**
|
|
241
|
+
* Remote LLM model name to send with chat completion requests.
|
|
242
|
+
* Env: CLAWMEM_LLM_MODEL
|
|
243
|
+
*/
|
|
244
|
+
remoteLlmModel?: string;
|
|
245
|
+
/**
|
|
246
|
+
* Optional top-level reasoning_effort field for Chat Completions endpoints that support it.
|
|
247
|
+
* Example values: none, minimal, low, medium, high, xhigh.
|
|
248
|
+
* Env: CLAWMEM_LLM_REASONING_EFFORT
|
|
249
|
+
*/
|
|
250
|
+
remoteLlmReasoningEffort?: string;
|
|
251
|
+
/**
|
|
252
|
+
* Whether to append /no_think to remote LLM prompts.
|
|
253
|
+
* Defaults to true to preserve current behavior with Qwen3-compatible endpoints.
|
|
254
|
+
* Env: CLAWMEM_LLM_NO_THINK
|
|
255
|
+
*/
|
|
256
|
+
remoteLlmNoThink?: boolean;
|
|
240
257
|
/**
|
|
241
258
|
* Inactivity timeout in ms before unloading contexts (default: 2 minutes, 0 to disable).
|
|
242
259
|
*
|
|
@@ -259,6 +276,23 @@ export type LlamaCppConfig = {
|
|
|
259
276
|
*/
|
|
260
277
|
// Default inactivity timeout: 2 minutes
|
|
261
278
|
const DEFAULT_INACTIVITY_TIMEOUT_MS = 2 * 60 * 1000;
|
|
279
|
+
const ALLOWED_REMOTE_LLM_REASONING_EFFORTS = new Set(["none", "minimal", "low", "medium", "high", "xhigh"]);
|
|
280
|
+
|
|
281
|
+
function normalizeRemoteLlmReasoningEffort(value?: string): string | null {
|
|
282
|
+
const raw = (value || "").trim().toLowerCase();
|
|
283
|
+
if (!raw) return null;
|
|
284
|
+
if (!ALLOWED_REMOTE_LLM_REASONING_EFFORTS.has(raw)) {
|
|
285
|
+
console.warn(`[clawmem] Ignoring unsupported remoteLlmReasoningEffort=${raw}`);
|
|
286
|
+
return null;
|
|
287
|
+
}
|
|
288
|
+
return raw;
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
function buildRemoteChatCompletionsUrl(remoteLlmUrl: string): string {
|
|
292
|
+
const baseUrl = remoteLlmUrl.replace(/\/+$/, "");
|
|
293
|
+
const endpoint = baseUrl.endsWith("/v1") ? "/chat/completions" : "/v1/chat/completions";
|
|
294
|
+
return `${baseUrl}${endpoint}`;
|
|
295
|
+
}
|
|
262
296
|
|
|
263
297
|
export class LlamaCpp implements LLM {
|
|
264
298
|
private llama: Llama | null = null;
|
|
@@ -276,6 +310,9 @@ export class LlamaCpp implements LLM {
|
|
|
276
310
|
private remoteEmbedApiKey: string | null;
|
|
277
311
|
private remoteEmbedModel: string;
|
|
278
312
|
private remoteLlmUrl: string | null;
|
|
313
|
+
private remoteLlmModel: string;
|
|
314
|
+
private remoteLlmReasoningEffort: string | null;
|
|
315
|
+
private remoteLlmNoThink: boolean;
|
|
279
316
|
|
|
280
317
|
// Ensure we don't load the same model concurrently (which can allocate duplicate VRAM).
|
|
281
318
|
private embedModelLoadPromise: Promise<LlamaModel> | null = null;
|
|
@@ -306,6 +343,10 @@ export class LlamaCpp implements LLM {
|
|
|
306
343
|
this.remoteEmbedApiKey = config.remoteEmbedApiKey || null;
|
|
307
344
|
this.remoteEmbedModel = config.remoteEmbedModel || "embedding";
|
|
308
345
|
this.remoteLlmUrl = config.remoteLlmUrl || null;
|
|
346
|
+
const normalizedRemoteLlmModel = config.remoteLlmModel?.trim();
|
|
347
|
+
this.remoteLlmModel = normalizedRemoteLlmModel || "qwen3";
|
|
348
|
+
this.remoteLlmReasoningEffort = normalizeRemoteLlmReasoningEffort(config.remoteLlmReasoningEffort);
|
|
349
|
+
this.remoteLlmNoThink = config.remoteLlmNoThink ?? true;
|
|
309
350
|
this.inactivityTimeoutMs = config.inactivityTimeoutMs ?? DEFAULT_INACTIVITY_TIMEOUT_MS;
|
|
310
351
|
this.disposeModelsOnInactivity = config.disposeModelsOnInactivity ?? false;
|
|
311
352
|
}
|
|
@@ -921,15 +962,19 @@ export class LlamaCpp implements LLM {
|
|
|
921
962
|
// Re-check: concurrent call may have set cooldown while we were awaited
|
|
922
963
|
if (this.isRemoteLlmDown()) return null;
|
|
923
964
|
try {
|
|
924
|
-
const
|
|
965
|
+
const body: Record<string, unknown> = {
|
|
966
|
+
model: this.remoteLlmModel,
|
|
967
|
+
messages: [{ role: "user", content: this.remoteLlmNoThink ? `${prompt} /no_think` : prompt }],
|
|
968
|
+
max_tokens: maxTokens,
|
|
969
|
+
temperature,
|
|
970
|
+
};
|
|
971
|
+
if (this.remoteLlmReasoningEffort) {
|
|
972
|
+
body.reasoning_effort = this.remoteLlmReasoningEffort;
|
|
973
|
+
}
|
|
974
|
+
const resp = await fetch(buildRemoteChatCompletionsUrl(this.remoteLlmUrl!), {
|
|
925
975
|
method: "POST",
|
|
926
976
|
headers: { "Content-Type": "application/json" },
|
|
927
|
-
body: JSON.stringify(
|
|
928
|
-
model: "qwen3",
|
|
929
|
-
messages: [{ role: "user", content: `${prompt} /no_think` }],
|
|
930
|
-
max_tokens: maxTokens,
|
|
931
|
-
temperature,
|
|
932
|
-
}),
|
|
977
|
+
body: JSON.stringify(body),
|
|
933
978
|
signal,
|
|
934
979
|
});
|
|
935
980
|
|
|
@@ -1254,6 +1299,13 @@ export function getDefaultLlamaCpp(): LlamaCpp {
|
|
|
1254
1299
|
remoteEmbedApiKey: embedApiKey,
|
|
1255
1300
|
remoteEmbedModel: process.env.CLAWMEM_EMBED_MODEL || undefined,
|
|
1256
1301
|
remoteLlmUrl: process.env.CLAWMEM_LLM_URL || undefined,
|
|
1302
|
+
remoteLlmModel: process.env.CLAWMEM_LLM_MODEL?.trim() || undefined,
|
|
1303
|
+
remoteLlmReasoningEffort: process.env.CLAWMEM_LLM_REASONING_EFFORT || undefined,
|
|
1304
|
+
remoteLlmNoThink: (() => {
|
|
1305
|
+
const raw = (process.env.CLAWMEM_LLM_NO_THINK || "").trim().toLowerCase();
|
|
1306
|
+
if (!raw) return undefined;
|
|
1307
|
+
return !["0", "false", "no", "off"].includes(raw);
|
|
1308
|
+
})(),
|
|
1257
1309
|
});
|
|
1258
1310
|
}
|
|
1259
1311
|
return defaultLlamaCpp;
|
|
@@ -1276,4 +1328,3 @@ export async function disposeDefaultLlamaCpp(): Promise<void> {
|
|
|
1276
1328
|
defaultLlamaCpp = null;
|
|
1277
1329
|
}
|
|
1278
1330
|
}
|
|
1279
|
-
|
package/src/openclaw/index.ts
CHANGED
|
@@ -37,8 +37,8 @@
|
|
|
37
37
|
* 4. REST API service (`clawmem serve`) lifecycle — unchanged.
|
|
38
38
|
*
|
|
39
39
|
* §14.3 critical correctness contract: `agent_end` is fire-and-forget at
|
|
40
|
-
* `attempt.ts:
|
|
41
|
-
* `handleBeforePromptBuild` (which IS awaited at `attempt.ts:
|
|
40
|
+
* `attempt.ts:3379-3402`. Precompact-extract MUST run inside
|
|
41
|
+
* `handleBeforePromptBuild` (which IS awaited at `attempt.ts:2610`), gated
|
|
42
42
|
* by the proximity heuristic in `compaction-threshold.ts`. See `engine.ts`
|
|
43
43
|
* top-of-file comment for the full rationale.
|
|
44
44
|
*/
|
|
@@ -107,6 +107,13 @@ const clawmemPlugin = {
|
|
|
107
107
|
env: {
|
|
108
108
|
...(pluginCfg.gpuEmbed ? { CLAWMEM_EMBED_URL: pluginCfg.gpuEmbed as string } : {}),
|
|
109
109
|
...(pluginCfg.gpuLlm ? { CLAWMEM_LLM_URL: pluginCfg.gpuLlm as string } : {}),
|
|
110
|
+
...(pluginCfg.gpuLlmModel ? { CLAWMEM_LLM_MODEL: pluginCfg.gpuLlmModel as string } : {}),
|
|
111
|
+
...(pluginCfg.gpuLlmReasoningEffort
|
|
112
|
+
? { CLAWMEM_LLM_REASONING_EFFORT: pluginCfg.gpuLlmReasoningEffort as string }
|
|
113
|
+
: {}),
|
|
114
|
+
...(pluginCfg.gpuLlmNoThink !== undefined
|
|
115
|
+
? { CLAWMEM_LLM_NO_THINK: String(pluginCfg.gpuLlmNoThink) }
|
|
116
|
+
: {}),
|
|
110
117
|
...(pluginCfg.gpuRerank ? { CLAWMEM_RERANK_URL: pluginCfg.gpuRerank as string } : {}),
|
|
111
118
|
CLAWMEM_PROFILE: profile,
|
|
112
119
|
},
|
|
@@ -154,7 +161,7 @@ const clawmemPlugin = {
|
|
|
154
161
|
// ----- Plugin Hook: before_prompt_build (AWAITED — load-bearing path) -----
|
|
155
162
|
// Both context-surfacing retrieval injection and pre-emptive precompact
|
|
156
163
|
// extraction live here. handleBeforePromptBuild is async and the OpenClaw
|
|
157
|
-
// attempt path awaits the result at attempt.ts:
|
|
164
|
+
// attempt path awaits the result at attempt.ts:2610 before building the
|
|
158
165
|
// effective prompt. precompact-extract therefore runs strictly before
|
|
159
166
|
// the LLM call that could trigger compaction on this turn.
|
|
160
167
|
api.on(
|
|
@@ -168,7 +175,11 @@ const clawmemPlugin = {
|
|
|
168
175
|
// ----- Plugin Hook: agent_end (FIRE-AND-FORGET in core) -----
|
|
169
176
|
// Decision-extractor, handoff-generator, and feedback-loop run here.
|
|
170
177
|
// These writes are eventually-consistent (saveMemory dedupes), so the
|
|
171
|
-
// fire-and-forget context at attempt.ts:
|
|
178
|
+
// fire-and-forget context at attempt.ts:3379-3402 is acceptable.
|
|
179
|
+
// OpenClaw v2026.4.26+ also enforces a 30s default void-hook timeout
|
|
180
|
+
// (DEFAULT_VOID_HOOK_TIMEOUT_MS_BY_HOOK in src/plugins/hooks.ts) — a
|
|
181
|
+
// timed-out handler is logged but our underlying postrun work is not
|
|
182
|
+
// cancelled, so eventual consistency is preserved.
|
|
172
183
|
// precompact-extract is intentionally NOT in this handler — it lives
|
|
173
184
|
// in handleBeforePromptBuild for correctness reasons.
|
|
174
185
|
api.on("agent_end", async (event: AgentEndEvent, ctx: AgentEndContext) => {
|
|
@@ -41,6 +41,23 @@
|
|
|
41
41
|
"help": "URL for ClawMem LLM (query expansion, extraction)",
|
|
42
42
|
"advanced": true
|
|
43
43
|
},
|
|
44
|
+
"gpuLlmModel": {
|
|
45
|
+
"label": "LLM Model",
|
|
46
|
+
"placeholder": "qwen3",
|
|
47
|
+
"help": "Model name sent to the configured LLM endpoint",
|
|
48
|
+
"advanced": true
|
|
49
|
+
},
|
|
50
|
+
"gpuLlmReasoningEffort": {
|
|
51
|
+
"label": "Reasoning Effort",
|
|
52
|
+
"placeholder": "(unset)",
|
|
53
|
+
"help": "Optional top-level reasoning_effort for Chat Completions endpoints that support it. Unset omits the field.",
|
|
54
|
+
"advanced": true
|
|
55
|
+
},
|
|
56
|
+
"gpuLlmNoThink": {
|
|
57
|
+
"label": "Append /no_think",
|
|
58
|
+
"help": "Append /no_think to remote LLM prompts (default: true). Disable for standard OpenAI models.",
|
|
59
|
+
"advanced": true
|
|
60
|
+
},
|
|
44
61
|
"gpuRerank": {
|
|
45
62
|
"label": "Reranker Endpoint",
|
|
46
63
|
"placeholder": "http://localhost:8090",
|
|
@@ -78,6 +95,16 @@
|
|
|
78
95
|
"gpuLlm": {
|
|
79
96
|
"type": "string"
|
|
80
97
|
},
|
|
98
|
+
"gpuLlmModel": {
|
|
99
|
+
"type": "string"
|
|
100
|
+
},
|
|
101
|
+
"gpuLlmReasoningEffort": {
|
|
102
|
+
"type": "string",
|
|
103
|
+
"enum": ["none", "minimal", "low", "medium", "high", "xhigh"]
|
|
104
|
+
},
|
|
105
|
+
"gpuLlmNoThink": {
|
|
106
|
+
"type": "boolean"
|
|
107
|
+
},
|
|
81
108
|
"gpuRerank": {
|
|
82
109
|
"type": "string"
|
|
83
110
|
}
|