clawmem 0.9.0 → 0.10.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +11 -4
- package/CLAUDE.md +11 -4
- package/README.md +37 -21
- package/SKILL.md +16 -6
- package/package.json +2 -2
- package/src/clawmem.ts +150 -23
- package/src/hermes/__init__.py +41 -2
- package/src/openclaw/compaction-threshold.ts +166 -0
- package/src/openclaw/engine.ts +520 -241
- package/src/openclaw/index.ts +151 -140
- package/src/openclaw/openclaw.plugin.json +4 -1
- package/src/openclaw/package.json +9 -0
- package/src/openclaw/session-state.ts +55 -0
- package/src/openclaw/transcript-resolver.ts +441 -0
package/src/hermes/__init__.py
CHANGED
|
@@ -16,6 +16,17 @@ Config via environment variables:
|
|
|
16
16
|
CLAWMEM_EMBED_URL — GPU embedding server URL (optional)
|
|
17
17
|
CLAWMEM_LLM_URL — GPU LLM server URL (optional)
|
|
18
18
|
CLAWMEM_RERANK_URL — GPU reranker server URL (optional)
|
|
19
|
+
|
|
20
|
+
Agent-context isolation:
|
|
21
|
+
Hermes ``run_agent.py`` passes ``agent_context`` to ``initialize()``
|
|
22
|
+
with one of "primary", "subagent", "cron", or "flush". Per the
|
|
23
|
+
``MemoryProvider`` ABC contract ("Providers should skip writes for
|
|
24
|
+
non-primary contexts (cron system prompts would corrupt user
|
|
25
|
+
representations)"), this plugin treats the read-side hooks
|
|
26
|
+
(session-bootstrap, context-surfacing) as always safe but routes the
|
|
27
|
+
write-side surfaces (transcript appends in ``sync_turn``, extraction
|
|
28
|
+
in ``on_session_end`` and ``on_pre_compress``) through a primary-only
|
|
29
|
+
guard. Non-primary contexts get retrieval but no vault writes.
|
|
19
30
|
"""
|
|
20
31
|
|
|
21
32
|
from __future__ import annotations
|
|
@@ -223,6 +234,10 @@ class ClawMemProvider(MemoryProvider):
|
|
|
223
234
|
self._serve_mode: str = "external"
|
|
224
235
|
self._serve_proc: Optional[subprocess.Popen] = None
|
|
225
236
|
self._env_extra: dict = {}
|
|
237
|
+
# Agent-context isolation. "primary" = full read+write; everything else
|
|
238
|
+
# ("subagent", "cron", "flush") = reads OK, writes suppressed. See file
|
|
239
|
+
# docstring for the ABC contract this implements.
|
|
240
|
+
self._agent_context: str = "primary"
|
|
226
241
|
|
|
227
242
|
# Prefetch state (generation counter prevents stale overwrites)
|
|
228
243
|
self._prefetch_result: str = ""
|
|
@@ -301,6 +316,12 @@ class ClawMemProvider(MemoryProvider):
|
|
|
301
316
|
self._port = _DEFAULT_PORT
|
|
302
317
|
self._serve_mode = os.environ.get("CLAWMEM_SERVE_MODE", "external")
|
|
303
318
|
self._hermes_home = kwargs.get("hermes_home", str(Path.home() / ".hermes"))
|
|
319
|
+
self._agent_context = str(kwargs.get("agent_context", "primary") or "primary")
|
|
320
|
+
if self._agent_context != "primary":
|
|
321
|
+
logger.info(
|
|
322
|
+
"clawmem: agent_context=%s — reads enabled, writes suppressed",
|
|
323
|
+
self._agent_context,
|
|
324
|
+
)
|
|
304
325
|
|
|
305
326
|
# Build env for hook shell-outs (GPU endpoints, profile)
|
|
306
327
|
for var in ("CLAWMEM_EMBED_URL", "CLAWMEM_LLM_URL", "CLAWMEM_RERANK_URL", "CLAWMEM_PROFILE"):
|
|
@@ -410,7 +431,11 @@ class ClawMemProvider(MemoryProvider):
|
|
|
410
431
|
"""Append turn to plugin-managed transcript JSONL.
|
|
411
432
|
|
|
412
433
|
Writes in Claude Code transcript format so ClawMem hooks can read it.
|
|
434
|
+
Suppressed for non-primary agent contexts (subagent/cron/flush) so the
|
|
435
|
+
vault never absorbs system-prompt or background-task content.
|
|
413
436
|
"""
|
|
437
|
+
if self._agent_context != "primary":
|
|
438
|
+
return
|
|
414
439
|
if not self._transcript_path:
|
|
415
440
|
return
|
|
416
441
|
|
|
@@ -441,7 +466,15 @@ class ClawMemProvider(MemoryProvider):
|
|
|
441
466
|
# -- Session end / compression hooks ---------------------------------------
|
|
442
467
|
|
|
443
468
|
def on_session_end(self, messages: List[Dict[str, Any]]) -> None:
|
|
444
|
-
"""Run extraction hooks in parallel.
|
|
469
|
+
"""Run extraction hooks in parallel.
|
|
470
|
+
|
|
471
|
+
Suppressed for non-primary agent contexts (subagent/cron/flush) — the
|
|
472
|
+
decision-extractor / handoff-generator / feedback-loop pipeline would
|
|
473
|
+
otherwise capture cron system prompts or subagent intermediate state
|
|
474
|
+
as if it were primary-agent reasoning.
|
|
475
|
+
"""
|
|
476
|
+
if self._agent_context != "primary":
|
|
477
|
+
return
|
|
445
478
|
if not self._bin or not self._transcript_path:
|
|
446
479
|
return
|
|
447
480
|
|
|
@@ -470,7 +503,13 @@ class ClawMemProvider(MemoryProvider):
|
|
|
470
503
|
logger.info("clawmem: session %s extraction complete", self._session_id[:8])
|
|
471
504
|
|
|
472
505
|
def on_pre_compress(self, messages: List[Dict[str, Any]]) -> str:
|
|
473
|
-
"""Run precompact-extract (side effect only — Hermes ignores return).
|
|
506
|
+
"""Run precompact-extract (side effect only — Hermes ignores return).
|
|
507
|
+
|
|
508
|
+
Suppressed for non-primary agent contexts so the precompact state file
|
|
509
|
+
in auto-memory never picks up cron/subagent context as primary state.
|
|
510
|
+
"""
|
|
511
|
+
if self._agent_context != "primary":
|
|
512
|
+
return ""
|
|
474
513
|
if not self._bin or not self._transcript_path:
|
|
475
514
|
return ""
|
|
476
515
|
|
|
@@ -0,0 +1,166 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* ClawMem OpenClaw Plugin — Compaction proximity heuristic
|
|
3
|
+
*
|
|
4
|
+
* After §14.3 migration, ClawMem runs precompact-extract pre-emptively from
|
|
5
|
+
* `before_prompt_build` (synchronous, awaited) when the messages buffer is
|
|
6
|
+
* close to the OpenClaw compaction threshold. This module owns the gating
|
|
7
|
+
* math.
|
|
8
|
+
*
|
|
9
|
+
* The OpenClaw `agent_end` PluginHookName event is fire-and-forget at
|
|
10
|
+
* `src/agents/pi-embedded-runner/run/attempt.ts:2226-2249` (literal comment:
|
|
11
|
+
* "This is fire-and-forget, so we don't await"), so it cannot be the
|
|
12
|
+
* load-bearing path for precompact-extract. `before_prompt_build` IS
|
|
13
|
+
* awaited at `attempt.ts:1661` (its return value `prependContext` is used
|
|
14
|
+
* to build the final prompt) and runs strictly before any LLM call that
|
|
15
|
+
* could trigger compaction on the current turn.
|
|
16
|
+
*
|
|
17
|
+
* Compaction threshold derivation:
|
|
18
|
+
* OpenClaw computes the in-flight compaction threshold as
|
|
19
|
+
* contextWindowTokens - reserveTokensFloor - softThresholdTokens
|
|
20
|
+
* (see `src/auto-reply/reply/agent-runner-memory.ts:567`).
|
|
21
|
+
*
|
|
22
|
+
* `contextWindowTokens` is per-model and not exposed in the
|
|
23
|
+
* `before_prompt_build` event payload. ClawMem uses a conservative default
|
|
24
|
+
* (200K tokens, matching Claude defaults) that can be overridden via the
|
|
25
|
+
* plugin config. The proximity ratio (PRECOMPACT_PROXIMITY_RATIO) provides
|
|
26
|
+
* additional headroom for sudden token-count jumps.
|
|
27
|
+
*
|
|
28
|
+
* Token estimation is intentionally rough: the plugin runs precompact-extract
|
|
29
|
+
* itself (regex-only, milliseconds), so over-firing is cheap. The cost of
|
|
30
|
+
* under-firing (missed precompact opportunity) is bounded by the
|
|
31
|
+
* `before_compaction` fire-and-forget defense-in-depth fallback.
|
|
32
|
+
*/
|
|
33
|
+
|
|
34
|
+
/**
|
|
35
|
+
* Proximity ratio gate: precompact runs when estimated tokens cross
|
|
36
|
+
* `PRECOMPACT_PROXIMITY_RATIO * compactionThreshold`.
|
|
37
|
+
*
|
|
38
|
+
* 0.85 leaves 15% headroom for a single tool result or long user prompt to
|
|
39
|
+
* push the buffer over the actual compaction trigger. Tunable via env var
|
|
40
|
+
* `CLAWMEM_PRECOMPACT_PROXIMITY_RATIO` (clamped to [0.5, 0.95]).
|
|
41
|
+
*/
|
|
42
|
+
export const PRECOMPACT_PROXIMITY_RATIO_DEFAULT = 0.85;
|
|
43
|
+
|
|
44
|
+
/**
|
|
45
|
+
* Default OpenClaw context window in tokens when the plugin config does
|
|
46
|
+
* not override it. Matches Claude's default 200K context window. The
|
|
47
|
+
* threshold subtracts reserve + soft from this number.
|
|
48
|
+
*/
|
|
49
|
+
export const DEFAULT_CONTEXT_WINDOW_TOKENS = 200_000;
|
|
50
|
+
|
|
51
|
+
/**
|
|
52
|
+
* Default soft threshold and reserve floor when no MemoryFlushPlan is
|
|
53
|
+
* registered (matches OpenClaw's own defaults at agent-runner-memory.ts:386).
|
|
54
|
+
*/
|
|
55
|
+
export const DEFAULT_SOFT_THRESHOLD_TOKENS = 4_000;
|
|
56
|
+
export const DEFAULT_RESERVE_TOKENS_FLOOR = 8_000;
|
|
57
|
+
|
|
58
|
+
export type CompactionThresholdConfig = {
|
|
59
|
+
/** Override the conservative 200K default. Plugin config: `compactionContextWindow`. */
|
|
60
|
+
contextWindowTokens?: number;
|
|
61
|
+
/** Override the proximity ratio. Plugin config: `precompactProximityRatio`. */
|
|
62
|
+
precompactProximityRatio?: number;
|
|
63
|
+
/** Soft threshold tokens (matches MemoryFlushPlan.softThresholdTokens). */
|
|
64
|
+
softThresholdTokens?: number;
|
|
65
|
+
/** Reserve floor tokens (matches MemoryFlushPlan.reserveTokensFloor). */
|
|
66
|
+
reserveTokensFloor?: number;
|
|
67
|
+
};
|
|
68
|
+
|
|
69
|
+
/**
|
|
70
|
+
* Compute the effective compaction threshold from config + defaults.
|
|
71
|
+
* Mirrors the OpenClaw computation at agent-runner-memory.ts:567.
|
|
72
|
+
*/
|
|
73
|
+
export function resolveCompactionThreshold(cfg: CompactionThresholdConfig): number {
|
|
74
|
+
const contextWindow = cfg.contextWindowTokens ?? DEFAULT_CONTEXT_WINDOW_TOKENS;
|
|
75
|
+
const reserve = cfg.reserveTokensFloor ?? DEFAULT_RESERVE_TOKENS_FLOOR;
|
|
76
|
+
const soft = cfg.softThresholdTokens ?? DEFAULT_SOFT_THRESHOLD_TOKENS;
|
|
77
|
+
const threshold = contextWindow - reserve - soft;
|
|
78
|
+
// Floor at a small positive number to avoid divide-by-zero in proximity gate
|
|
79
|
+
return Math.max(threshold, 1_000);
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
/**
|
|
83
|
+
* Resolve the proximity ratio from config, env var, and default. Clamped
|
|
84
|
+
* to a safe range so misconfigured ratios cannot disable precompact entirely
|
|
85
|
+
* (very high ratio) or fire on every turn (very low ratio).
|
|
86
|
+
*/
|
|
87
|
+
export function resolveProximityRatio(cfg: CompactionThresholdConfig): number {
|
|
88
|
+
const fromConfig = cfg.precompactProximityRatio;
|
|
89
|
+
const fromEnv = (() => {
|
|
90
|
+
const raw = process.env.CLAWMEM_PRECOMPACT_PROXIMITY_RATIO;
|
|
91
|
+
if (!raw) return undefined;
|
|
92
|
+
const parsed = Number.parseFloat(raw);
|
|
93
|
+
return Number.isFinite(parsed) ? parsed : undefined;
|
|
94
|
+
})();
|
|
95
|
+
const ratio = fromConfig ?? fromEnv ?? PRECOMPACT_PROXIMITY_RATIO_DEFAULT;
|
|
96
|
+
// Clamp to safe range
|
|
97
|
+
if (ratio < 0.5) return 0.5;
|
|
98
|
+
if (ratio > 0.95) return 0.95;
|
|
99
|
+
return ratio;
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
/**
|
|
103
|
+
* Estimate token count from a messages array. Intentionally cheap and
|
|
104
|
+
* conservative — the precompact-extract handler is regex-only (milliseconds),
|
|
105
|
+
* so over-firing has near-zero cost.
|
|
106
|
+
*
|
|
107
|
+
* Uses a rough chars/4 heuristic (matches OpenAI's 1 token ≈ 4 chars rule of
|
|
108
|
+
* thumb). Walks the messages array best-effort, handling unknown shapes by
|
|
109
|
+
* stringifying and estimating from the result length.
|
|
110
|
+
*/
|
|
111
|
+
export function estimateTokensFromMessages(messages: unknown[] | undefined): number {
|
|
112
|
+
if (!Array.isArray(messages) || messages.length === 0) return 0;
|
|
113
|
+
let totalChars = 0;
|
|
114
|
+
for (const msg of messages) {
|
|
115
|
+
if (typeof msg === "string") {
|
|
116
|
+
totalChars += msg.length;
|
|
117
|
+
continue;
|
|
118
|
+
}
|
|
119
|
+
if (msg && typeof msg === "object") {
|
|
120
|
+
// Extract text-like fields fast-path: content string, content array of
|
|
121
|
+
// {type: 'text', text} items, or fall back to JSON length.
|
|
122
|
+
const m = msg as Record<string, unknown>;
|
|
123
|
+
const content = m.content;
|
|
124
|
+
if (typeof content === "string") {
|
|
125
|
+
totalChars += content.length;
|
|
126
|
+
} else if (Array.isArray(content)) {
|
|
127
|
+
for (const part of content) {
|
|
128
|
+
if (typeof part === "string") {
|
|
129
|
+
totalChars += part.length;
|
|
130
|
+
} else if (part && typeof part === "object") {
|
|
131
|
+
const p = part as Record<string, unknown>;
|
|
132
|
+
if (typeof p.text === "string") {
|
|
133
|
+
totalChars += p.text.length;
|
|
134
|
+
} else {
|
|
135
|
+
try {
|
|
136
|
+
totalChars += JSON.stringify(p).length;
|
|
137
|
+
} catch {
|
|
138
|
+
// Skip un-serializable parts
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
} else {
|
|
144
|
+
try {
|
|
145
|
+
totalChars += JSON.stringify(msg).length;
|
|
146
|
+
} catch {
|
|
147
|
+
// Skip un-serializable messages
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
// Rough conversion: 1 token ≈ 4 characters
|
|
153
|
+
return Math.ceil(totalChars / 4);
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
/**
|
|
157
|
+
* The proximity gate: returns true iff estimated tokens are at or above
|
|
158
|
+
* `proximityRatio * threshold`. Pure function for unit testing.
|
|
159
|
+
*/
|
|
160
|
+
export function isWithinPrecompactProximity(params: {
|
|
161
|
+
estimatedTokens: number;
|
|
162
|
+
threshold: number;
|
|
163
|
+
proximityRatio: number;
|
|
164
|
+
}): boolean {
|
|
165
|
+
return params.estimatedTokens >= params.proximityRatio * params.threshold;
|
|
166
|
+
}
|