clawmem 0.10.0 → 0.10.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +4 -1
- package/CLAUDE.md +4 -1
- package/README.md +19 -6
- package/SKILL.md +3 -1
- package/package.json +1 -1
- package/src/clawmem.ts +1 -0
- package/src/hermes/__init__.py +71 -3
- package/src/llm.ts +59 -8
- package/src/openclaw/index.ts +12 -5
- package/src/openclaw/openclaw.plugin.json +27 -0
- package/src/openclaw/package.json +1 -1
package/AGENTS.md
CHANGED
|
@@ -208,6 +208,8 @@ When using ClawMem with OpenClaw, choose one of two deployment options:
|
|
|
208
208
|
|
|
209
209
|
**Active Memory coexistence:** ClawMem is fully compatible with OpenClaw's Active Memory plugin (v2026.4.10+). They search different backends (ClawMem vault vs dreaming/wiki) and inject into different prompt regions (user prompt vs system prompt). Both can run simultaneously — no configuration needed.
|
|
210
210
|
|
|
211
|
+
**memory-core dreaming sidecar coexistence (v2026.4.18+, #65411):** ClawMem and `memory-core` dreaming can also run side-by-side. When ClawMem owns the memory slot AND `plugins.entries.memory-core.config.dreaming.enabled = true`, OpenClaw loads `memory-core`'s dreaming engine alongside ClawMem (rest of `memory-core` stays unloaded). Dreaming continues writing to `memory/dreaming/{phase}/YYYY-MM-DD.md` (separate from ClawMem's vault). Default after `openclaw plugins enable clawmem` is `dreaming.enabled = false` (ClawMem-only). Set it `true` if you want to keep the dreaming output stream alongside ClawMem.
|
|
212
|
+
|
|
211
213
|
**OpenClaw v2026.4.11+ required for ClawMem v0.10.0+.** v2026.4.11 tightened plugin discovery (`readdirSync({ withFileTypes: true })` + `dirent.isDirectory()`) and added a plugin-directory ownership check. ClawMem v0.10.0+ ships the new discovery manifest (`src/openclaw/package.json` with `openclaw.extensions`) and defaults to recursive copy (not symlink) installs to clear both gates. Older notes: v2026.4.10 fixed #64192 (`plugins.slots.contextEngine` silently dropped during config normalization, relevant only for pre-v0.10.0 ClawMem that still used the `contextEngine` slot).
|
|
212
214
|
|
|
213
215
|
**ClawMem v0.10.0 uses the `memory` slot, not `contextEngine`.** `openclaw plugins enable clawmem` sets `plugins.slots.memory: "clawmem"` and disables competing memory plugins (`memory-core`, `memory-lancedb`) in one step. The older `openclaw config set plugins.slots.contextEngine clawmem` pattern does not apply to v0.10.0+.
|
|
@@ -740,5 +742,6 @@ clawmem focus clear --session-id abc123
|
|
|
740
742
|
- OpenClaw memory plugin: `clawmem setup openclaw` — registers ClawMem as a native OpenClaw memory plugin (`kind: memory`, v0.10.0+). Lifecycle events on the plugin-hook bus: `before_prompt_build` is the **load-bearing** path — it runs prompt-aware retrieval (context-surfacing) AND the pre-emptive `precompact-extract` synchronously when token usage approaches the compaction threshold, so state is captured BEFORE the LLM call that could trigger compaction; `agent_end` runs decision-extractor + handoff-generator + feedback-loop in parallel (fire-and-forget at OpenClaw); `before_compaction` is **defense-in-depth fallback only** — fire-and-forget, races the compactor, exists for the rare case where the `before_prompt_build` proximity heuristic missed a sudden token jump; `session_start` registers the session and caches first-turn bootstrap context. Shares the same vault as Claude Code hooks (dual-mode). SQLite busy_timeout=5000ms for concurrent access safety.
|
|
741
743
|
- **§14.3 pure-memory migration (v0.10.0):** v0.10.0 drops the `ClawMemContextEngine` class entirely. Previous versions registered as `kind: context-engine` and implemented `assemble()`/`bootstrap()`/`afterTurn()`/`compact()` on a class. v0.10.0 registers as `kind: memory` and wires every lifecycle surface through plugin hooks on the event bus. Retrieval pipeline, composite scoring, vault format, and the 5 registered agent tools are unchanged — this is a packaging and registration change, not a behavioral one.
|
|
742
744
|
- **v2026.4.11 packaging fix (v0.10.0):** `src/openclaw/package.json` declares `openclaw.extensions: ["./index.ts"]` (required by v2026.4.11's discovery path), and `cmdSetupOpenClaw` defaults to `cpSync(..., { recursive: true, dereference: true })` because v2026.4.11's discoverer uses `readdirSync({ withFileTypes: true })` where symlink `isDirectory() === false`. A `--link` opt-in flag preserves the old symlink behavior for dev workflows with a warning.
|
|
745
|
+
- **v2026.4.18 synchronous-`register()` constraint:** OpenClaw v2026.4.18 (`fix(plugins): enforce synchronous registration`) throws `"plugin register must be synchronous"` if the plugin's `register()` function returns a Promise. ClawMem's `register(api)` in `src/openclaw/index.ts` is intentionally synchronous — all `await` work lives inside per-event handlers, never in registration itself. Companion change: register failures now atomically roll back side effects (globals, hook registrations, tool registrations), so any future throw inside `register()` will leave OpenClaw in a clean state. Keep the function synchronous and throw-free; do not add `async` or top-level `await`.
|
|
743
746
|
- **Precompact correctness contract (v0.10.0):** The load-bearing precompact path is `before_prompt_build`, NOT `before_compaction`. `before_prompt_build` is awaited synchronously before the LLM call that could trigger compaction, so it cannot race the compactor. `before_compaction` is fire-and-forget at OpenClaw's call site and exists only as a safety net for the rare case the proximity heuristic in `before_prompt_build` missed a sudden token-count jump. Do not describe `before_compaction` as the primary surface — the guarantee comes from `before_prompt_build`. v0.3.0 did the pre-emptive extraction from `ContextEngine.compact()` via `delegateCompactionToRuntime()`; v0.10.0 moves it into `before_prompt_build` where it can be awaited before the triggering LLM call. User-visible behavior is equivalent or better: state capture now happens strictly before compaction, not in a race with it.
|
|
744
|
-
- Hermes Agent MemoryProvider plugin: `src/hermes/` — Python plugin implementing Hermes's `MemoryProvider` ABC.
|
|
747
|
+
- Hermes Agent MemoryProvider plugin: `src/hermes/` — Python plugin implementing Hermes's `MemoryProvider` ABC. **Preferred install:** copy into `$HERMES_HOME/plugins/clawmem/` (typically `~/.hermes/plugins/clawmem/`) — Hermes #10529 (v2026.4.13+) added user-plugin discovery, so this path survives `git pull` of hermes-agent. **Bundled-style install:** `hermes-agent/plugins/memory/clawmem/` still works (bundled-first precedence on name collisions). Uses shell-out for lifecycle hooks (session-bootstrap, context-surfacing, extraction) and REST API for tools (retrieve, get, session_log, timeline, similar). Plugin manages its own transcript JSONL for ClawMem hooks. Supports external (you run `clawmem serve`) and managed (plugin starts/stops serve) modes. **Agent-context isolation:** `initialize()` reads the `agent_context` kwarg Hermes passes ("primary"/"subagent"/"cron"/"flush"); for non-primary contexts the read-side hooks (session-bootstrap, context-surfacing) still run but the write-side surfaces (`sync_turn` transcript appends, `on_session_end` extraction, `on_pre_compress` precompact) early-return so cron system prompts and subagent intermediate state never reach the vault.
|
package/CLAUDE.md
CHANGED
|
@@ -208,6 +208,8 @@ When using ClawMem with OpenClaw, choose one of two deployment options:
|
|
|
208
208
|
|
|
209
209
|
**Active Memory coexistence:** ClawMem is fully compatible with OpenClaw's Active Memory plugin (v2026.4.10+). They search different backends (ClawMem vault vs dreaming/wiki) and inject into different prompt regions (user prompt vs system prompt). Both can run simultaneously — no configuration needed.
|
|
210
210
|
|
|
211
|
+
**memory-core dreaming sidecar coexistence (v2026.4.18+, #65411):** ClawMem and `memory-core` dreaming can also run side-by-side. When ClawMem owns the memory slot AND `plugins.entries.memory-core.config.dreaming.enabled = true`, OpenClaw loads `memory-core`'s dreaming engine alongside ClawMem (rest of `memory-core` stays unloaded). Dreaming continues writing to `memory/dreaming/{phase}/YYYY-MM-DD.md` (separate from ClawMem's vault). Default after `openclaw plugins enable clawmem` is `dreaming.enabled = false` (ClawMem-only). Set it `true` if you want to keep the dreaming output stream alongside ClawMem.
|
|
212
|
+
|
|
211
213
|
**OpenClaw v2026.4.11+ required for ClawMem v0.10.0+.** v2026.4.11 tightened plugin discovery (`readdirSync({ withFileTypes: true })` + `dirent.isDirectory()`) and added a plugin-directory ownership check. ClawMem v0.10.0+ ships the new discovery manifest (`src/openclaw/package.json` with `openclaw.extensions`) and defaults to recursive copy (not symlink) installs to clear both gates. Older notes: v2026.4.10 fixed #64192 (`plugins.slots.contextEngine` silently dropped during config normalization, relevant only for pre-v0.10.0 ClawMem that still used the `contextEngine` slot).
|
|
212
214
|
|
|
213
215
|
**ClawMem v0.10.0 uses the `memory` slot, not `contextEngine`.** `openclaw plugins enable clawmem` sets `plugins.slots.memory: "clawmem"` and disables competing memory plugins (`memory-core`, `memory-lancedb`) in one step. The older `openclaw config set plugins.slots.contextEngine clawmem` pattern does not apply to v0.10.0+.
|
|
@@ -740,5 +742,6 @@ clawmem focus clear --session-id abc123
|
|
|
740
742
|
- OpenClaw memory plugin: `clawmem setup openclaw` — registers ClawMem as a native OpenClaw memory plugin (`kind: memory`, v0.10.0+). Lifecycle events on the plugin-hook bus: `before_prompt_build` is the **load-bearing** path — it runs prompt-aware retrieval (context-surfacing) AND the pre-emptive `precompact-extract` synchronously when token usage approaches the compaction threshold, so state is captured BEFORE the LLM call that could trigger compaction; `agent_end` runs decision-extractor + handoff-generator + feedback-loop in parallel (fire-and-forget at OpenClaw); `before_compaction` is **defense-in-depth fallback only** — fire-and-forget, races the compactor, exists for the rare case where the `before_prompt_build` proximity heuristic missed a sudden token jump; `session_start` registers the session and caches first-turn bootstrap context. Shares the same vault as Claude Code hooks (dual-mode). SQLite busy_timeout=5000ms for concurrent access safety.
|
|
741
743
|
- **§14.3 pure-memory migration (v0.10.0):** v0.10.0 drops the `ClawMemContextEngine` class entirely. Previous versions registered as `kind: context-engine` and implemented `assemble()`/`bootstrap()`/`afterTurn()`/`compact()` on a class. v0.10.0 registers as `kind: memory` and wires every lifecycle surface through plugin hooks on the event bus. Retrieval pipeline, composite scoring, vault format, and the 5 registered agent tools are unchanged — this is a packaging and registration change, not a behavioral one.
|
|
742
744
|
- **v2026.4.11 packaging fix (v0.10.0):** `src/openclaw/package.json` declares `openclaw.extensions: ["./index.ts"]` (required by v2026.4.11's discovery path), and `cmdSetupOpenClaw` defaults to `cpSync(..., { recursive: true, dereference: true })` because v2026.4.11's discoverer uses `readdirSync({ withFileTypes: true })` where symlink `isDirectory() === false`. A `--link` opt-in flag preserves the old symlink behavior for dev workflows with a warning.
|
|
745
|
+
- **v2026.4.18 synchronous-`register()` constraint:** OpenClaw v2026.4.18 (`fix(plugins): enforce synchronous registration`) throws `"plugin register must be synchronous"` if the plugin's `register()` function returns a Promise. ClawMem's `register(api)` in `src/openclaw/index.ts` is intentionally synchronous — all `await` work lives inside per-event handlers, never in registration itself. Companion change: register failures now atomically roll back side effects (globals, hook registrations, tool registrations), so any future throw inside `register()` will leave OpenClaw in a clean state. Keep the function synchronous and throw-free; do not add `async` or top-level `await`.
|
|
743
746
|
- **Precompact correctness contract (v0.10.0):** The load-bearing precompact path is `before_prompt_build`, NOT `before_compaction`. `before_prompt_build` is awaited synchronously before the LLM call that could trigger compaction, so it cannot race the compactor. `before_compaction` is fire-and-forget at OpenClaw's call site and exists only as a safety net for the rare case the proximity heuristic in `before_prompt_build` missed a sudden token-count jump. Do not describe `before_compaction` as the primary surface — the guarantee comes from `before_prompt_build`. v0.3.0 did the pre-emptive extraction from `ContextEngine.compact()` via `delegateCompactionToRuntime()`; v0.10.0 moves it into `before_prompt_build` where it can be awaited before the triggering LLM call. User-visible behavior is equivalent or better: state capture now happens strictly before compaction, not in a race with it.
|
|
744
|
-
- Hermes Agent MemoryProvider plugin: `src/hermes/` — Python plugin implementing Hermes's `MemoryProvider` ABC.
|
|
747
|
+
- Hermes Agent MemoryProvider plugin: `src/hermes/` — Python plugin implementing Hermes's `MemoryProvider` ABC. **Preferred install:** copy into `$HERMES_HOME/plugins/clawmem/` (typically `~/.hermes/plugins/clawmem/`) — Hermes #10529 (v2026.4.13+) added user-plugin discovery, so this path survives `git pull` of hermes-agent. **Bundled-style install:** `hermes-agent/plugins/memory/clawmem/` still works (bundled-first precedence on name collisions). Uses shell-out for lifecycle hooks (session-bootstrap, context-surfacing, extraction) and REST API for tools (retrieve, get, session_log, timeline, similar). Plugin manages its own transcript JSONL for ClawMem hooks. Supports external (you run `clawmem serve`) and managed (plugin starts/stops serve) modes. **Agent-context isolation:** `initialize()` reads the `agent_context` kwarg Hermes passes ("primary"/"subagent"/"cron"/"flush"); for non-primary contexts the read-side hooks (session-bootstrap, context-surfacing) still run but the write-side surfaces (`sync_turn` transcript appends, `on_session_end` extraction, `on_pre_compress` precompact) early-return so cron system prompts and subagent intermediate state never reach the vault.
|
package/README.md
CHANGED
|
@@ -203,7 +203,7 @@ Disable OpenClaw's native memory search to avoid duplicate injection:
|
|
|
203
203
|
openclaw config set agents.defaults.memorySearch.extraPaths "[]"
|
|
204
204
|
```
|
|
205
205
|
|
|
206
|
-
ClawMem coexists cleanly with OpenClaw's [Active Memory](https://docs.openclaw.ai/concepts/active-memory) plugin (v2026.4.10+). They search different backends and inject into different prompt regions, so
|
|
206
|
+
ClawMem coexists cleanly with OpenClaw's [Active Memory](https://docs.openclaw.ai/concepts/active-memory) plugin (v2026.4.10+) and, on OpenClaw v2026.4.18+ (#65411), with the `memory-core` dreaming sidecar — both run alongside ClawMem instead of being mutually exclusive. They search different backends and inject into different prompt regions, so they do not conflict. See the [OpenClaw plugin guide — Active Memory coexistence](docs/guides/openclaw-plugin.md#coexistence-with-openclaw-active-memory) and the [memory-core dreaming sidecar section](docs/guides/openclaw-plugin.md#coexistence-with-memory-core-dreaming-sidecar) for the two patterns.
|
|
207
207
|
|
|
208
208
|
**Pair ClawMem (memory) with a context-engine plugin (v0.10.0+).** OpenClaw and Hermes maintainers have converged on a two-surface plugin model: one slot for memory plugins (cross-session, retrieval-first) and a separate slot for context-engine plugins (in-session, compression/compaction-first). Under that model ClawMem is a memory layer — it has always been one in Hermes via the `MemoryProvider` ABC, and v0.10.0 moves the OpenClaw integration to the same semantic slot. You can now run ClawMem in the `memory` slot alongside an LCM-style compression plugin (for example, `lossless-claw`) in the `context-engine` slot. The two plugins do not overlap: one persists across sessions, the other reshapes the live window. See the [OpenClaw plugin guide — memory vs context engine](docs/guides/openclaw-plugin.md#memory-vs-context-engine--the-dual-plugin-surface) for the full rationale.
|
|
209
209
|
|
|
@@ -218,11 +218,16 @@ ClawMem integrates as a native MemoryProvider plugin — Hermes's pluggable inte
|
|
|
218
218
|
**Install:**
|
|
219
219
|
|
|
220
220
|
```bash
|
|
221
|
-
#
|
|
221
|
+
# Preferred — user-plugin path (Hermes #10529, v2026.4.13+).
|
|
222
|
+
# Survives `git pull` of hermes-agent and avoids dual-registration with bundled providers.
|
|
223
|
+
cp -r /path/to/ClawMem/src/hermes ${HERMES_HOME:-~/.hermes}/plugins/clawmem
|
|
224
|
+
|
|
225
|
+
# Or, the bundled-style path (always supported, takes precedence on name collisions).
|
|
226
|
+
# Recommended only when you actively work in the hermes-agent source tree.
|
|
222
227
|
cp -r /path/to/ClawMem/src/hermes /path/to/hermes-agent/plugins/memory/clawmem
|
|
223
228
|
|
|
224
|
-
#
|
|
225
|
-
ln -s /path/to/ClawMem/src/hermes
|
|
229
|
+
# Symlink alternative for in-place development (either path).
|
|
230
|
+
ln -s /path/to/ClawMem/src/hermes ${HERMES_HOME:-~/.hermes}/plugins/clawmem
|
|
226
231
|
```
|
|
227
232
|
|
|
228
233
|
**Configure** in your Hermes profile's `.env` or environment:
|
|
@@ -327,6 +332,7 @@ If your GPU lives on a separate machine, point the env vars at it:
|
|
|
327
332
|
```bash
|
|
328
333
|
export CLAWMEM_EMBED_URL=http://gpu-host:8088
|
|
329
334
|
export CLAWMEM_LLM_URL=http://gpu-host:8089
|
|
335
|
+
export CLAWMEM_LLM_MODEL=qwen3
|
|
330
336
|
export CLAWMEM_RERANK_URL=http://gpu-host:8090
|
|
331
337
|
```
|
|
332
338
|
|
|
@@ -939,6 +945,9 @@ Notes referenced by the agent during a session get boosted (`access_count++`). U
|
|
|
939
945
|
| `CLAWMEM_EMBED_TPM_LIMIT` | `100000` | Tokens-per-minute limit for cloud embedding pacing. Match to your provider tier. |
|
|
940
946
|
| `CLAWMEM_EMBED_DIMENSIONS` | (none) | Output dimensions for OpenAI `text-embedding-3-*` Matryoshka models (e.g. `512`, `1024`). |
|
|
941
947
|
| `CLAWMEM_LLM_URL` | `http://localhost:8089` | LLM server URL for intent/query/A-MEM. Without it, falls to `node-llama-cpp` (if allowed). |
|
|
948
|
+
| `CLAWMEM_LLM_MODEL` | `qwen3` | Model name sent to the configured LLM endpoint. Override this for OpenAI-compatible proxies such as `gpt-5.4-mini`. |
|
|
949
|
+
| `CLAWMEM_LLM_REASONING_EFFORT` | (none) | Optional top-level `reasoning_effort` field for Chat Completions endpoints that support it (for example OpenAI reasoning models). Leave unset for llama-server/vLLM unless your serving stack explicitly accepts that field. |
|
|
950
|
+
| `CLAWMEM_LLM_NO_THINK` | `true` | Append `/no_think` to remote LLM prompts. Set to `false` for standard OpenAI models and other endpoints that reject or treat the Qwen-style suffix as literal prompt text. |
|
|
942
951
|
| `CLAWMEM_RERANK_URL` | `http://localhost:8090` | Reranker server URL. Without it, falls to `node-llama-cpp` (if allowed). |
|
|
943
952
|
| `CLAWMEM_NO_LOCAL_MODELS` | `false` | Block `node-llama-cpp` from auto-downloading GGUF models. Set `true` for remote-only setups where you want fail-fast on unreachable endpoints. |
|
|
944
953
|
| `CLAWMEM_MERGE_SCORE_NORMAL` | `0.93` | **v0.7.1.** Phase 2 consolidation merge-safety threshold when candidate and existing anchors align. Merges above this normalized 3-gram cosine score are allowed. |
|
|
@@ -1105,8 +1114,12 @@ Index your content directories with `clawmem bootstrap` as above. The OpenClaw p
|
|
|
1105
1114
|
#### Hermes-specific
|
|
1106
1115
|
|
|
1107
1116
|
```bash
|
|
1108
|
-
# Install the memory provider plugin
|
|
1109
|
-
|
|
1117
|
+
# Install the memory provider plugin.
|
|
1118
|
+
# Preferred: $HERMES_HOME/plugins/clawmem/ (user-plugin path, Hermes #10529, v2026.4.13+).
|
|
1119
|
+
ln -s $(npm root -g)/clawmem/src/hermes ${HERMES_HOME:-~/.hermes}/plugins/clawmem
|
|
1120
|
+
|
|
1121
|
+
# Bundled-style path is also supported (takes precedence on name collisions):
|
|
1122
|
+
# ln -s $(npm root -g)/clawmem/src/hermes /path/to/hermes-agent/plugins/memory/clawmem
|
|
1110
1123
|
|
|
1111
1124
|
# Start the REST API (required for Hermes tool calls)
|
|
1112
1125
|
clawmem serve --port 7438 &
|
package/SKILL.md
CHANGED
|
@@ -607,6 +607,8 @@ Phase 3 deductive synthesis applies the same `contradicts` link for any draft th
|
|
|
607
607
|
|
|
608
608
|
**Active Memory coexistence:** ClawMem is fully compatible with OpenClaw's Active Memory plugin (v2026.4.10+). They search different backends and inject into different prompt regions, both can run simultaneously. The deployment options below control native memory search (`memorySearch.extraPaths`), not Active Memory.
|
|
609
609
|
|
|
610
|
+
**memory-core dreaming sidecar coexistence (v2026.4.18+, #65411):** When ClawMem owns the memory slot AND `plugins.entries.memory-core.config.dreaming.enabled = true`, OpenClaw loads `memory-core`'s dreaming engine alongside ClawMem (rest of `memory-core` stays unloaded). Default after `openclaw plugins enable clawmem` is `dreaming.enabled = false` (ClawMem-only). Set it `true` to keep the dreaming output stream (`memory/dreaming/{phase}/YYYY-MM-DD.md`) alongside ClawMem.
|
|
611
|
+
|
|
610
612
|
**OpenClaw v2026.4.11+ required for ClawMem v0.10.0+.** v2026.4.11 tightened plugin discovery (requires `package.json` with `openclaw.extensions`, rejects symlinked plugin directories). ClawMem v0.10.0 ships the new discovery manifest and defaults `clawmem setup openclaw` to recursive copy (not symlink). v2026.4.10 earlier fixed the #64192 config-normalization bug that dropped the `contextEngine` slot, which is now moot on v0.10.0+ because v0.10.0 uses the `memory` slot instead.
|
|
611
613
|
|
|
612
614
|
**ClawMem v0.10.0 plugin kind:** `memory` (not `context-engine`). Enable with `openclaw plugins enable clawmem`, which also disables `memory-core` / `memory-lancedb` in one step.
|
|
@@ -795,7 +797,7 @@ clawmem focus clear --session-id abc123
|
|
|
795
797
|
- Beads integration: `syncBeadsIssues()` queries `bd` CLI (Dolt backend, v0.58.0+), creates markdown docs, maps dependency edges into `memory_relations`. Watcher auto-triggers on `.beads/` changes; `beads_sync` MCP for manual sync.
|
|
796
798
|
- HTTP REST API: `clawmem serve [--port 7438]` — optional REST server on localhost. Search, retrieval, lifecycle, and graph traversal. `POST /retrieve` mirrors `memory_retrieve` with auto-routing (keyword/semantic/causal/timeline/hybrid). `POST /search` provides direct mode selection. Bearer token auth via `CLAWMEM_API_TOKEN` env var (disabled if unset).
|
|
797
799
|
- OpenClaw memory plugin (v0.10.0+): `clawmem setup openclaw` — registers as native OpenClaw memory plugin (`kind: memory`). Dual-mode: shares vault with Claude Code hooks. Hook wiring on the plugin-hook bus: `before_prompt_build` is the **load-bearing** path — it runs prompt-aware retrieval AND the pre-emptive `precompact-extract` synchronously when token usage approaches the compaction threshold, so state is captured before the LLM call that could trigger compaction. `agent_end` runs decision-extractor + handoff-generator + feedback-loop in parallel (fire-and-forget at OpenClaw's call site). `before_compaction` is **defense-in-depth fallback only** — fire-and-forget, races the compactor, exists for the rare case where the proximity heuristic in `before_prompt_build` missed a sudden token jump. `session_start` registers the session + caches first-turn bootstrap context. The §14.3 migration removed the `ClawMemContextEngine` class and moved the plugin from the `context-engine` slot to the `memory` slot. Requires OpenClaw v2026.4.11+ (earlier versions do not support the new discovery contract).
|
|
798
|
-
- Hermes Agent MemoryProvider plugin: `src/hermes/` — Python plugin for Hermes's memory system. Shell-out hooks for lifecycle (prefetch, extraction, precompact), REST API for tools. Plugin-managed transcript JSONL bridges Hermes turn pairs to ClawMem file format. Shares vault with Claude Code and OpenClaw.
|
|
800
|
+
- Hermes Agent MemoryProvider plugin: `src/hermes/` — Python plugin for Hermes's memory system. Shell-out hooks for lifecycle (prefetch, extraction, precompact), REST API for tools. Plugin-managed transcript JSONL bridges Hermes turn pairs to ClawMem file format. Shares vault with Claude Code and OpenClaw. **Preferred install path:** `$HERMES_HOME/plugins/clawmem/` (Hermes #10529 user-plugin discovery, v2026.4.13+) — survives `git pull` of hermes-agent. The bundled `hermes-agent/plugins/memory/clawmem/` path still works. **Agent-context isolation:** read-side hooks always run; write-side surfaces (`sync_turn`, `on_session_end`, `on_pre_compress`) early-return when `agent_context != "primary"` so cron/subagent state never reaches the vault.
|
|
799
801
|
|
|
800
802
|
## Tool Selection (one-liner)
|
|
801
803
|
|
package/package.json
CHANGED
package/src/clawmem.ts
CHANGED
|
@@ -1491,6 +1491,7 @@ async function cmdSetupOpenClaw(args: string[]) {
|
|
|
1491
1491
|
console.log(` 3. Configure GPU endpoints (if not using defaults):`);
|
|
1492
1492
|
console.log(` ${c.cyan}openclaw config set plugins.entries.clawmem.config.gpuEmbed http://YOUR_GPU:8088${c.reset}`);
|
|
1493
1493
|
console.log(` ${c.cyan}openclaw config set plugins.entries.clawmem.config.gpuLlm http://YOUR_GPU:8089${c.reset}`);
|
|
1494
|
+
console.log(` ${c.cyan}openclaw config set plugins.entries.clawmem.config.gpuLlmModel qwen3${c.reset}`);
|
|
1494
1495
|
console.log(` ${c.cyan}openclaw config set plugins.entries.clawmem.config.gpuRerank http://YOUR_GPU:8090${c.reset}`);
|
|
1495
1496
|
console.log();
|
|
1496
1497
|
console.log(` 4. Start the REST API (for agent tools):`);
|
package/src/hermes/__init__.py
CHANGED
|
@@ -15,7 +15,21 @@ Config via environment variables:
|
|
|
15
15
|
CLAWMEM_PROFILE — Retrieval profile: speed, balanced, deep (default: balanced)
|
|
16
16
|
CLAWMEM_EMBED_URL — GPU embedding server URL (optional)
|
|
17
17
|
CLAWMEM_LLM_URL — GPU LLM server URL (optional)
|
|
18
|
+
CLAWMEM_LLM_MODEL — Model name sent to the GPU/cloud LLM endpoint (optional)
|
|
19
|
+
CLAWMEM_LLM_REASONING_EFFORT — Top-level reasoning_effort for supporting Chat Completions endpoints (optional)
|
|
20
|
+
CLAWMEM_LLM_NO_THINK — Append /no_think to remote prompts; false disables it for standard OpenAI models (optional)
|
|
18
21
|
CLAWMEM_RERANK_URL — GPU reranker server URL (optional)
|
|
22
|
+
|
|
23
|
+
Agent-context isolation:
|
|
24
|
+
Hermes ``run_agent.py`` passes ``agent_context`` to ``initialize()``
|
|
25
|
+
with one of "primary", "subagent", "cron", or "flush". Per the
|
|
26
|
+
``MemoryProvider`` ABC contract ("Providers should skip writes for
|
|
27
|
+
non-primary contexts (cron system prompts would corrupt user
|
|
28
|
+
representations)"), this plugin treats the read-side hooks
|
|
29
|
+
(session-bootstrap, context-surfacing) as always safe but routes the
|
|
30
|
+
write-side surfaces (transcript appends in ``sync_turn``, extraction
|
|
31
|
+
in ``on_session_end`` and ``on_pre_compress``) through a primary-only
|
|
32
|
+
guard. Non-primary contexts get retrieval but no vault writes.
|
|
19
33
|
"""
|
|
20
34
|
|
|
21
35
|
from __future__ import annotations
|
|
@@ -223,6 +237,10 @@ class ClawMemProvider(MemoryProvider):
|
|
|
223
237
|
self._serve_mode: str = "external"
|
|
224
238
|
self._serve_proc: Optional[subprocess.Popen] = None
|
|
225
239
|
self._env_extra: dict = {}
|
|
240
|
+
# Agent-context isolation. "primary" = full read+write; everything else
|
|
241
|
+
# ("subagent", "cron", "flush") = reads OK, writes suppressed. See file
|
|
242
|
+
# docstring for the ABC contract this implements.
|
|
243
|
+
self._agent_context: str = "primary"
|
|
226
244
|
|
|
227
245
|
# Prefetch state (generation counter prevents stale overwrites)
|
|
228
246
|
self._prefetch_result: str = ""
|
|
@@ -280,6 +298,24 @@ class ClawMemProvider(MemoryProvider):
|
|
|
280
298
|
"secret": False,
|
|
281
299
|
"env_var": "CLAWMEM_LLM_URL",
|
|
282
300
|
},
|
|
301
|
+
{
|
|
302
|
+
"key": "llm_model",
|
|
303
|
+
"description": "Model name sent to the GPU LLM server (e.g., qwen3, gpt-5.4-mini)",
|
|
304
|
+
"secret": False,
|
|
305
|
+
"env_var": "CLAWMEM_LLM_MODEL",
|
|
306
|
+
},
|
|
307
|
+
{
|
|
308
|
+
"key": "llm_reasoning_effort",
|
|
309
|
+
"description": "Optional top-level reasoning_effort for Chat Completions endpoints that support it",
|
|
310
|
+
"secret": False,
|
|
311
|
+
"env_var": "CLAWMEM_LLM_REASONING_EFFORT",
|
|
312
|
+
},
|
|
313
|
+
{
|
|
314
|
+
"key": "llm_no_think",
|
|
315
|
+
"description": "Append /no_think to remote LLM prompts; disable for standard OpenAI models",
|
|
316
|
+
"secret": False,
|
|
317
|
+
"env_var": "CLAWMEM_LLM_NO_THINK",
|
|
318
|
+
},
|
|
283
319
|
]
|
|
284
320
|
|
|
285
321
|
# -- Core lifecycle --------------------------------------------------------
|
|
@@ -301,9 +337,23 @@ class ClawMemProvider(MemoryProvider):
|
|
|
301
337
|
self._port = _DEFAULT_PORT
|
|
302
338
|
self._serve_mode = os.environ.get("CLAWMEM_SERVE_MODE", "external")
|
|
303
339
|
self._hermes_home = kwargs.get("hermes_home", str(Path.home() / ".hermes"))
|
|
340
|
+
self._agent_context = str(kwargs.get("agent_context", "primary") or "primary")
|
|
341
|
+
if self._agent_context != "primary":
|
|
342
|
+
logger.info(
|
|
343
|
+
"clawmem: agent_context=%s — reads enabled, writes suppressed",
|
|
344
|
+
self._agent_context,
|
|
345
|
+
)
|
|
304
346
|
|
|
305
347
|
# Build env for hook shell-outs (GPU endpoints, profile)
|
|
306
|
-
for var in (
|
|
348
|
+
for var in (
|
|
349
|
+
"CLAWMEM_EMBED_URL",
|
|
350
|
+
"CLAWMEM_LLM_URL",
|
|
351
|
+
"CLAWMEM_LLM_MODEL",
|
|
352
|
+
"CLAWMEM_LLM_REASONING_EFFORT",
|
|
353
|
+
"CLAWMEM_LLM_NO_THINK",
|
|
354
|
+
"CLAWMEM_RERANK_URL",
|
|
355
|
+
"CLAWMEM_PROFILE",
|
|
356
|
+
):
|
|
307
357
|
val = os.environ.get(var)
|
|
308
358
|
if val:
|
|
309
359
|
self._env_extra[var] = val
|
|
@@ -410,7 +460,11 @@ class ClawMemProvider(MemoryProvider):
|
|
|
410
460
|
"""Append turn to plugin-managed transcript JSONL.
|
|
411
461
|
|
|
412
462
|
Writes in Claude Code transcript format so ClawMem hooks can read it.
|
|
463
|
+
Suppressed for non-primary agent contexts (subagent/cron/flush) so the
|
|
464
|
+
vault never absorbs system-prompt or background-task content.
|
|
413
465
|
"""
|
|
466
|
+
if self._agent_context != "primary":
|
|
467
|
+
return
|
|
414
468
|
if not self._transcript_path:
|
|
415
469
|
return
|
|
416
470
|
|
|
@@ -441,7 +495,15 @@ class ClawMemProvider(MemoryProvider):
|
|
|
441
495
|
# -- Session end / compression hooks ---------------------------------------
|
|
442
496
|
|
|
443
497
|
def on_session_end(self, messages: List[Dict[str, Any]]) -> None:
|
|
444
|
-
"""Run extraction hooks in parallel.
|
|
498
|
+
"""Run extraction hooks in parallel.
|
|
499
|
+
|
|
500
|
+
Suppressed for non-primary agent contexts (subagent/cron/flush) — the
|
|
501
|
+
decision-extractor / handoff-generator / feedback-loop pipeline would
|
|
502
|
+
otherwise capture cron system prompts or subagent intermediate state
|
|
503
|
+
as if it were primary-agent reasoning.
|
|
504
|
+
"""
|
|
505
|
+
if self._agent_context != "primary":
|
|
506
|
+
return
|
|
445
507
|
if not self._bin or not self._transcript_path:
|
|
446
508
|
return
|
|
447
509
|
|
|
@@ -470,7 +532,13 @@ class ClawMemProvider(MemoryProvider):
|
|
|
470
532
|
logger.info("clawmem: session %s extraction complete", self._session_id[:8])
|
|
471
533
|
|
|
472
534
|
def on_pre_compress(self, messages: List[Dict[str, Any]]) -> str:
|
|
473
|
-
"""Run precompact-extract (side effect only — Hermes ignores return).
|
|
535
|
+
"""Run precompact-extract (side effect only — Hermes ignores return).
|
|
536
|
+
|
|
537
|
+
Suppressed for non-primary agent contexts so the precompact state file
|
|
538
|
+
in auto-memory never picks up cron/subagent context as primary state.
|
|
539
|
+
"""
|
|
540
|
+
if self._agent_context != "primary":
|
|
541
|
+
return ""
|
|
474
542
|
if not self._bin or not self._transcript_path:
|
|
475
543
|
return ""
|
|
476
544
|
|
package/src/llm.ts
CHANGED
|
@@ -237,6 +237,23 @@ export type LlamaCppConfig = {
|
|
|
237
237
|
* When set, generate() calls /v1/chat/completions instead of local node-llama-cpp.
|
|
238
238
|
*/
|
|
239
239
|
remoteLlmUrl?: string;
|
|
240
|
+
/**
|
|
241
|
+
* Remote LLM model name to send with chat completion requests.
|
|
242
|
+
* Env: CLAWMEM_LLM_MODEL
|
|
243
|
+
*/
|
|
244
|
+
remoteLlmModel?: string;
|
|
245
|
+
/**
|
|
246
|
+
* Optional top-level reasoning_effort field for Chat Completions endpoints that support it.
|
|
247
|
+
* Example values: none, minimal, low, medium, high, xhigh.
|
|
248
|
+
* Env: CLAWMEM_LLM_REASONING_EFFORT
|
|
249
|
+
*/
|
|
250
|
+
remoteLlmReasoningEffort?: string;
|
|
251
|
+
/**
|
|
252
|
+
* Whether to append /no_think to remote LLM prompts.
|
|
253
|
+
* Defaults to true to preserve current behavior with Qwen3-compatible endpoints.
|
|
254
|
+
* Env: CLAWMEM_LLM_NO_THINK
|
|
255
|
+
*/
|
|
256
|
+
remoteLlmNoThink?: boolean;
|
|
240
257
|
/**
|
|
241
258
|
* Inactivity timeout in ms before unloading contexts (default: 2 minutes, 0 to disable).
|
|
242
259
|
*
|
|
@@ -259,6 +276,23 @@ export type LlamaCppConfig = {
|
|
|
259
276
|
*/
|
|
260
277
|
// Default inactivity timeout: 2 minutes
|
|
261
278
|
const DEFAULT_INACTIVITY_TIMEOUT_MS = 2 * 60 * 1000;
|
|
279
|
+
const ALLOWED_REMOTE_LLM_REASONING_EFFORTS = new Set(["none", "minimal", "low", "medium", "high", "xhigh"]);
|
|
280
|
+
|
|
281
|
+
function normalizeRemoteLlmReasoningEffort(value?: string): string | null {
|
|
282
|
+
const raw = (value || "").trim().toLowerCase();
|
|
283
|
+
if (!raw) return null;
|
|
284
|
+
if (!ALLOWED_REMOTE_LLM_REASONING_EFFORTS.has(raw)) {
|
|
285
|
+
console.warn(`[clawmem] Ignoring unsupported remoteLlmReasoningEffort=${raw}`);
|
|
286
|
+
return null;
|
|
287
|
+
}
|
|
288
|
+
return raw;
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
function buildRemoteChatCompletionsUrl(remoteLlmUrl: string): string {
|
|
292
|
+
const baseUrl = remoteLlmUrl.replace(/\/+$/, "");
|
|
293
|
+
const endpoint = baseUrl.endsWith("/v1") ? "/chat/completions" : "/v1/chat/completions";
|
|
294
|
+
return `${baseUrl}${endpoint}`;
|
|
295
|
+
}
|
|
262
296
|
|
|
263
297
|
export class LlamaCpp implements LLM {
|
|
264
298
|
private llama: Llama | null = null;
|
|
@@ -276,6 +310,9 @@ export class LlamaCpp implements LLM {
|
|
|
276
310
|
private remoteEmbedApiKey: string | null;
|
|
277
311
|
private remoteEmbedModel: string;
|
|
278
312
|
private remoteLlmUrl: string | null;
|
|
313
|
+
private remoteLlmModel: string;
|
|
314
|
+
private remoteLlmReasoningEffort: string | null;
|
|
315
|
+
private remoteLlmNoThink: boolean;
|
|
279
316
|
|
|
280
317
|
// Ensure we don't load the same model concurrently (which can allocate duplicate VRAM).
|
|
281
318
|
private embedModelLoadPromise: Promise<LlamaModel> | null = null;
|
|
@@ -306,6 +343,10 @@ export class LlamaCpp implements LLM {
|
|
|
306
343
|
this.remoteEmbedApiKey = config.remoteEmbedApiKey || null;
|
|
307
344
|
this.remoteEmbedModel = config.remoteEmbedModel || "embedding";
|
|
308
345
|
this.remoteLlmUrl = config.remoteLlmUrl || null;
|
|
346
|
+
const normalizedRemoteLlmModel = config.remoteLlmModel?.trim();
|
|
347
|
+
this.remoteLlmModel = normalizedRemoteLlmModel || "qwen3";
|
|
348
|
+
this.remoteLlmReasoningEffort = normalizeRemoteLlmReasoningEffort(config.remoteLlmReasoningEffort);
|
|
349
|
+
this.remoteLlmNoThink = config.remoteLlmNoThink ?? true;
|
|
309
350
|
this.inactivityTimeoutMs = config.inactivityTimeoutMs ?? DEFAULT_INACTIVITY_TIMEOUT_MS;
|
|
310
351
|
this.disposeModelsOnInactivity = config.disposeModelsOnInactivity ?? false;
|
|
311
352
|
}
|
|
@@ -921,15 +962,19 @@ export class LlamaCpp implements LLM {
|
|
|
921
962
|
// Re-check: concurrent call may have set cooldown while we were awaited
|
|
922
963
|
if (this.isRemoteLlmDown()) return null;
|
|
923
964
|
try {
|
|
924
|
-
const
|
|
965
|
+
const body: Record<string, unknown> = {
|
|
966
|
+
model: this.remoteLlmModel,
|
|
967
|
+
messages: [{ role: "user", content: this.remoteLlmNoThink ? `${prompt} /no_think` : prompt }],
|
|
968
|
+
max_tokens: maxTokens,
|
|
969
|
+
temperature,
|
|
970
|
+
};
|
|
971
|
+
if (this.remoteLlmReasoningEffort) {
|
|
972
|
+
body.reasoning_effort = this.remoteLlmReasoningEffort;
|
|
973
|
+
}
|
|
974
|
+
const resp = await fetch(buildRemoteChatCompletionsUrl(this.remoteLlmUrl!), {
|
|
925
975
|
method: "POST",
|
|
926
976
|
headers: { "Content-Type": "application/json" },
|
|
927
|
-
body: JSON.stringify(
|
|
928
|
-
model: "qwen3",
|
|
929
|
-
messages: [{ role: "user", content: `${prompt} /no_think` }],
|
|
930
|
-
max_tokens: maxTokens,
|
|
931
|
-
temperature,
|
|
932
|
-
}),
|
|
977
|
+
body: JSON.stringify(body),
|
|
933
978
|
signal,
|
|
934
979
|
});
|
|
935
980
|
|
|
@@ -1254,6 +1299,13 @@ export function getDefaultLlamaCpp(): LlamaCpp {
|
|
|
1254
1299
|
remoteEmbedApiKey: embedApiKey,
|
|
1255
1300
|
remoteEmbedModel: process.env.CLAWMEM_EMBED_MODEL || undefined,
|
|
1256
1301
|
remoteLlmUrl: process.env.CLAWMEM_LLM_URL || undefined,
|
|
1302
|
+
remoteLlmModel: process.env.CLAWMEM_LLM_MODEL?.trim() || undefined,
|
|
1303
|
+
remoteLlmReasoningEffort: process.env.CLAWMEM_LLM_REASONING_EFFORT || undefined,
|
|
1304
|
+
remoteLlmNoThink: (() => {
|
|
1305
|
+
const raw = (process.env.CLAWMEM_LLM_NO_THINK || "").trim().toLowerCase();
|
|
1306
|
+
if (!raw) return undefined;
|
|
1307
|
+
return !["0", "false", "no", "off"].includes(raw);
|
|
1308
|
+
})(),
|
|
1257
1309
|
});
|
|
1258
1310
|
}
|
|
1259
1311
|
return defaultLlamaCpp;
|
|
@@ -1276,4 +1328,3 @@ export async function disposeDefaultLlamaCpp(): Promise<void> {
|
|
|
1276
1328
|
defaultLlamaCpp = null;
|
|
1277
1329
|
}
|
|
1278
1330
|
}
|
|
1279
|
-
|
package/src/openclaw/index.ts
CHANGED
|
@@ -37,8 +37,8 @@
|
|
|
37
37
|
* 4. REST API service (`clawmem serve`) lifecycle — unchanged.
|
|
38
38
|
*
|
|
39
39
|
* §14.3 critical correctness contract: `agent_end` is fire-and-forget at
|
|
40
|
-
* `attempt.ts:
|
|
41
|
-
* `handleBeforePromptBuild` (which IS awaited at `attempt.ts:
|
|
40
|
+
* `attempt.ts:2470-2496`. Precompact-extract MUST run inside
|
|
41
|
+
* `handleBeforePromptBuild` (which IS awaited at `attempt.ts:1873`), gated
|
|
42
42
|
* by the proximity heuristic in `compaction-threshold.ts`. See `engine.ts`
|
|
43
43
|
* top-of-file comment for the full rationale.
|
|
44
44
|
*/
|
|
@@ -89,7 +89,7 @@ const clawmemPlugin = {
|
|
|
89
89
|
name: "ClawMem",
|
|
90
90
|
description:
|
|
91
91
|
"On-device hybrid memory layer for OpenClaw — composite scoring, graph traversal, lifecycle management, and pre-emptive compaction state extraction",
|
|
92
|
-
version: "0.10.
|
|
92
|
+
version: "0.10.1",
|
|
93
93
|
kind: "memory" as const,
|
|
94
94
|
|
|
95
95
|
register(api: any) {
|
|
@@ -107,6 +107,13 @@ const clawmemPlugin = {
|
|
|
107
107
|
env: {
|
|
108
108
|
...(pluginCfg.gpuEmbed ? { CLAWMEM_EMBED_URL: pluginCfg.gpuEmbed as string } : {}),
|
|
109
109
|
...(pluginCfg.gpuLlm ? { CLAWMEM_LLM_URL: pluginCfg.gpuLlm as string } : {}),
|
|
110
|
+
...(pluginCfg.gpuLlmModel ? { CLAWMEM_LLM_MODEL: pluginCfg.gpuLlmModel as string } : {}),
|
|
111
|
+
...(pluginCfg.gpuLlmReasoningEffort
|
|
112
|
+
? { CLAWMEM_LLM_REASONING_EFFORT: pluginCfg.gpuLlmReasoningEffort as string }
|
|
113
|
+
: {}),
|
|
114
|
+
...(pluginCfg.gpuLlmNoThink !== undefined
|
|
115
|
+
? { CLAWMEM_LLM_NO_THINK: String(pluginCfg.gpuLlmNoThink) }
|
|
116
|
+
: {}),
|
|
110
117
|
...(pluginCfg.gpuRerank ? { CLAWMEM_RERANK_URL: pluginCfg.gpuRerank as string } : {}),
|
|
111
118
|
CLAWMEM_PROFILE: profile,
|
|
112
119
|
},
|
|
@@ -154,7 +161,7 @@ const clawmemPlugin = {
|
|
|
154
161
|
// ----- Plugin Hook: before_prompt_build (AWAITED — load-bearing path) -----
|
|
155
162
|
// Both context-surfacing retrieval injection and pre-emptive precompact
|
|
156
163
|
// extraction live here. handleBeforePromptBuild is async and the OpenClaw
|
|
157
|
-
// attempt path awaits the result at attempt.ts:
|
|
164
|
+
// attempt path awaits the result at attempt.ts:1873 before building the
|
|
158
165
|
// effective prompt. precompact-extract therefore runs strictly before
|
|
159
166
|
// the LLM call that could trigger compaction on this turn.
|
|
160
167
|
api.on(
|
|
@@ -168,7 +175,7 @@ const clawmemPlugin = {
|
|
|
168
175
|
// ----- Plugin Hook: agent_end (FIRE-AND-FORGET in core) -----
|
|
169
176
|
// Decision-extractor, handoff-generator, and feedback-loop run here.
|
|
170
177
|
// These writes are eventually-consistent (saveMemory dedupes), so the
|
|
171
|
-
// fire-and-forget context at attempt.ts:
|
|
178
|
+
// fire-and-forget context at attempt.ts:2470-2496 is acceptable.
|
|
172
179
|
// precompact-extract is intentionally NOT in this handler — it lives
|
|
173
180
|
// in handleBeforePromptBuild for correctness reasons.
|
|
174
181
|
api.on("agent_end", async (event: AgentEndEvent, ctx: AgentEndContext) => {
|
|
@@ -41,6 +41,23 @@
|
|
|
41
41
|
"help": "URL for ClawMem LLM (query expansion, extraction)",
|
|
42
42
|
"advanced": true
|
|
43
43
|
},
|
|
44
|
+
"gpuLlmModel": {
|
|
45
|
+
"label": "LLM Model",
|
|
46
|
+
"placeholder": "qwen3",
|
|
47
|
+
"help": "Model name sent to the configured LLM endpoint",
|
|
48
|
+
"advanced": true
|
|
49
|
+
},
|
|
50
|
+
"gpuLlmReasoningEffort": {
|
|
51
|
+
"label": "Reasoning Effort",
|
|
52
|
+
"placeholder": "(unset)",
|
|
53
|
+
"help": "Optional top-level reasoning_effort for Chat Completions endpoints that support it. Unset omits the field.",
|
|
54
|
+
"advanced": true
|
|
55
|
+
},
|
|
56
|
+
"gpuLlmNoThink": {
|
|
57
|
+
"label": "Append /no_think",
|
|
58
|
+
"help": "Append /no_think to remote LLM prompts (default: true). Disable for standard OpenAI models.",
|
|
59
|
+
"advanced": true
|
|
60
|
+
},
|
|
44
61
|
"gpuRerank": {
|
|
45
62
|
"label": "Reranker Endpoint",
|
|
46
63
|
"placeholder": "http://localhost:8090",
|
|
@@ -78,6 +95,16 @@
|
|
|
78
95
|
"gpuLlm": {
|
|
79
96
|
"type": "string"
|
|
80
97
|
},
|
|
98
|
+
"gpuLlmModel": {
|
|
99
|
+
"type": "string"
|
|
100
|
+
},
|
|
101
|
+
"gpuLlmReasoningEffort": {
|
|
102
|
+
"type": "string",
|
|
103
|
+
"enum": ["none", "minimal", "low", "medium", "high", "xhigh"]
|
|
104
|
+
},
|
|
105
|
+
"gpuLlmNoThink": {
|
|
106
|
+
"type": "boolean"
|
|
107
|
+
},
|
|
81
108
|
"gpuRerank": {
|
|
82
109
|
"type": "string"
|
|
83
110
|
}
|