clawmem 0.9.0 → 0.10.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -16,6 +16,17 @@ Config via environment variables:
16
16
  CLAWMEM_EMBED_URL — GPU embedding server URL (optional)
17
17
  CLAWMEM_LLM_URL — GPU LLM server URL (optional)
18
18
  CLAWMEM_RERANK_URL — GPU reranker server URL (optional)
19
+
20
+ Agent-context isolation:
21
+ Hermes ``run_agent.py`` passes ``agent_context`` to ``initialize()``
22
+ with one of "primary", "subagent", "cron", or "flush". Per the
23
+ ``MemoryProvider`` ABC contract ("Providers should skip writes for
24
+ non-primary contexts (cron system prompts would corrupt user
25
+ representations)"), this plugin treats the read-side hooks
26
+ (session-bootstrap, context-surfacing) as always safe but routes the
27
+ write-side surfaces (transcript appends in ``sync_turn``, extraction
28
+ in ``on_session_end`` and ``on_pre_compress``) through a primary-only
29
+ guard. Non-primary contexts get retrieval but no vault writes.
19
30
  """
20
31
 
21
32
  from __future__ import annotations
@@ -223,6 +234,10 @@ class ClawMemProvider(MemoryProvider):
223
234
  self._serve_mode: str = "external"
224
235
  self._serve_proc: Optional[subprocess.Popen] = None
225
236
  self._env_extra: dict = {}
237
+ # Agent-context isolation. "primary" = full read+write; everything else
238
+ # ("subagent", "cron", "flush") = reads OK, writes suppressed. See file
239
+ # docstring for the ABC contract this implements.
240
+ self._agent_context: str = "primary"
226
241
 
227
242
  # Prefetch state (generation counter prevents stale overwrites)
228
243
  self._prefetch_result: str = ""
@@ -301,6 +316,12 @@ class ClawMemProvider(MemoryProvider):
301
316
  self._port = _DEFAULT_PORT
302
317
  self._serve_mode = os.environ.get("CLAWMEM_SERVE_MODE", "external")
303
318
  self._hermes_home = kwargs.get("hermes_home", str(Path.home() / ".hermes"))
319
+ self._agent_context = str(kwargs.get("agent_context", "primary") or "primary")
320
+ if self._agent_context != "primary":
321
+ logger.info(
322
+ "clawmem: agent_context=%s — reads enabled, writes suppressed",
323
+ self._agent_context,
324
+ )
304
325
 
305
326
  # Build env for hook shell-outs (GPU endpoints, profile)
306
327
  for var in ("CLAWMEM_EMBED_URL", "CLAWMEM_LLM_URL", "CLAWMEM_RERANK_URL", "CLAWMEM_PROFILE"):
@@ -410,7 +431,11 @@ class ClawMemProvider(MemoryProvider):
410
431
  """Append turn to plugin-managed transcript JSONL.
411
432
 
412
433
  Writes in Claude Code transcript format so ClawMem hooks can read it.
434
+ Suppressed for non-primary agent contexts (subagent/cron/flush) so the
435
+ vault never absorbs system-prompt or background-task content.
413
436
  """
437
+ if self._agent_context != "primary":
438
+ return
414
439
  if not self._transcript_path:
415
440
  return
416
441
 
@@ -441,7 +466,15 @@ class ClawMemProvider(MemoryProvider):
441
466
  # -- Session end / compression hooks ---------------------------------------
442
467
 
443
468
  def on_session_end(self, messages: List[Dict[str, Any]]) -> None:
444
- """Run extraction hooks in parallel."""
469
+ """Run extraction hooks in parallel.
470
+
471
+ Suppressed for non-primary agent contexts (subagent/cron/flush) — the
472
+ decision-extractor / handoff-generator / feedback-loop pipeline would
473
+ otherwise capture cron system prompts or subagent intermediate state
474
+ as if it were primary-agent reasoning.
475
+ """
476
+ if self._agent_context != "primary":
477
+ return
445
478
  if not self._bin or not self._transcript_path:
446
479
  return
447
480
 
@@ -470,7 +503,13 @@ class ClawMemProvider(MemoryProvider):
470
503
  logger.info("clawmem: session %s extraction complete", self._session_id[:8])
471
504
 
472
505
  def on_pre_compress(self, messages: List[Dict[str, Any]]) -> str:
473
- """Run precompact-extract (side effect only — Hermes ignores return)."""
506
+ """Run precompact-extract (side effect only — Hermes ignores return).
507
+
508
+ Suppressed for non-primary agent contexts so the precompact state file
509
+ in auto-memory never picks up cron/subagent context as primary state.
510
+ """
511
+ if self._agent_context != "primary":
512
+ return ""
474
513
  if not self._bin or not self._transcript_path:
475
514
  return ""
476
515
 
@@ -0,0 +1,166 @@
1
+ /**
2
+ * ClawMem OpenClaw Plugin — Compaction proximity heuristic
3
+ *
4
+ * After §14.3 migration, ClawMem runs precompact-extract pre-emptively from
5
+ * `before_prompt_build` (synchronous, awaited) when the messages buffer is
6
+ * close to the OpenClaw compaction threshold. This module owns the gating
7
+ * math.
8
+ *
9
+ * The OpenClaw `agent_end` PluginHookName event is fire-and-forget at
10
+ * `src/agents/pi-embedded-runner/run/attempt.ts:2226-2249` (literal comment:
11
+ * "This is fire-and-forget, so we don't await"), so it cannot be the
12
+ * load-bearing path for precompact-extract. `before_prompt_build` IS
13
+ * awaited at `attempt.ts:1661` (its return value `prependContext` is used
14
+ * to build the final prompt) and runs strictly before any LLM call that
15
+ * could trigger compaction on the current turn.
16
+ *
17
+ * Compaction threshold derivation:
18
+ * OpenClaw computes the in-flight compaction threshold as
19
+ * contextWindowTokens - reserveTokensFloor - softThresholdTokens
20
+ * (see `src/auto-reply/reply/agent-runner-memory.ts:567`).
21
+ *
22
+ * `contextWindowTokens` is per-model and not exposed in the
23
+ * `before_prompt_build` event payload. ClawMem uses a conservative default
24
+ * (200K tokens, matching Claude defaults) that can be overridden via the
25
+ * plugin config. The proximity ratio (PRECOMPACT_PROXIMITY_RATIO) provides
26
+ * additional headroom for sudden token-count jumps.
27
+ *
28
+ * Token estimation is intentionally rough: the plugin runs precompact-extract
29
+ * itself (regex-only, milliseconds), so over-firing is cheap. The cost of
30
+ * under-firing (missed precompact opportunity) is bounded by the
31
+ * `before_compaction` fire-and-forget defense-in-depth fallback.
32
+ */
33
+
34
+ /**
35
+ * Proximity ratio gate: precompact runs when estimated tokens cross
36
+ * `PRECOMPACT_PROXIMITY_RATIO * compactionThreshold`.
37
+ *
38
+ * 0.85 leaves 15% headroom for a single tool result or long user prompt to
39
+ * push the buffer over the actual compaction trigger. Tunable via env var
40
+ * `CLAWMEM_PRECOMPACT_PROXIMITY_RATIO` (clamped to [0.5, 0.95]).
41
+ */
42
+ export const PRECOMPACT_PROXIMITY_RATIO_DEFAULT = 0.85;
43
+
44
+ /**
45
+ * Default OpenClaw context window in tokens when the plugin config does
46
+ * not override it. Matches Claude's default 200K context window. The
47
+ * threshold subtracts reserve + soft from this number.
48
+ */
49
+ export const DEFAULT_CONTEXT_WINDOW_TOKENS = 200_000;
50
+
51
+ /**
52
+ * Default soft threshold and reserve floor when no MemoryFlushPlan is
53
+ * registered (matches OpenClaw's own defaults at agent-runner-memory.ts:386).
54
+ */
55
+ export const DEFAULT_SOFT_THRESHOLD_TOKENS = 4_000;
56
+ export const DEFAULT_RESERVE_TOKENS_FLOOR = 8_000;
57
+
58
+ export type CompactionThresholdConfig = {
59
+ /** Override the conservative 200K default. Plugin config: `compactionContextWindow`. */
60
+ contextWindowTokens?: number;
61
+ /** Override the proximity ratio. Plugin config: `precompactProximityRatio`. */
62
+ precompactProximityRatio?: number;
63
+ /** Soft threshold tokens (matches MemoryFlushPlan.softThresholdTokens). */
64
+ softThresholdTokens?: number;
65
+ /** Reserve floor tokens (matches MemoryFlushPlan.reserveTokensFloor). */
66
+ reserveTokensFloor?: number;
67
+ };
68
+
69
+ /**
70
+ * Compute the effective compaction threshold from config + defaults.
71
+ * Mirrors the OpenClaw computation at agent-runner-memory.ts:567.
72
+ */
73
+ export function resolveCompactionThreshold(cfg: CompactionThresholdConfig): number {
74
+ const contextWindow = cfg.contextWindowTokens ?? DEFAULT_CONTEXT_WINDOW_TOKENS;
75
+ const reserve = cfg.reserveTokensFloor ?? DEFAULT_RESERVE_TOKENS_FLOOR;
76
+ const soft = cfg.softThresholdTokens ?? DEFAULT_SOFT_THRESHOLD_TOKENS;
77
+ const threshold = contextWindow - reserve - soft;
78
+ // Floor at a small positive number to avoid divide-by-zero in proximity gate
79
+ return Math.max(threshold, 1_000);
80
+ }
81
+
82
+ /**
83
+ * Resolve the proximity ratio from config, env var, and default. Clamped
84
+ * to a safe range so misconfigured ratios cannot disable precompact entirely
85
+ * (very high ratio) or fire on every turn (very low ratio).
86
+ */
87
+ export function resolveProximityRatio(cfg: CompactionThresholdConfig): number {
88
+ const fromConfig = cfg.precompactProximityRatio;
89
+ const fromEnv = (() => {
90
+ const raw = process.env.CLAWMEM_PRECOMPACT_PROXIMITY_RATIO;
91
+ if (!raw) return undefined;
92
+ const parsed = Number.parseFloat(raw);
93
+ return Number.isFinite(parsed) ? parsed : undefined;
94
+ })();
95
+ const ratio = fromConfig ?? fromEnv ?? PRECOMPACT_PROXIMITY_RATIO_DEFAULT;
96
+ // Clamp to safe range
97
+ if (ratio < 0.5) return 0.5;
98
+ if (ratio > 0.95) return 0.95;
99
+ return ratio;
100
+ }
101
+
102
+ /**
103
+ * Estimate token count from a messages array. Intentionally cheap and
104
+ * conservative — the precompact-extract handler is regex-only (milliseconds),
105
+ * so over-firing has near-zero cost.
106
+ *
107
+ * Uses a rough chars/4 heuristic (matches OpenAI's 1 token ≈ 4 chars rule of
108
+ * thumb). Walks the messages array best-effort, handling unknown shapes by
109
+ * stringifying and estimating from the result length.
110
+ */
111
+ export function estimateTokensFromMessages(messages: unknown[] | undefined): number {
112
+ if (!Array.isArray(messages) || messages.length === 0) return 0;
113
+ let totalChars = 0;
114
+ for (const msg of messages) {
115
+ if (typeof msg === "string") {
116
+ totalChars += msg.length;
117
+ continue;
118
+ }
119
+ if (msg && typeof msg === "object") {
120
+ // Extract text-like fields fast-path: content string, content array of
121
+ // {type: 'text', text} items, or fall back to JSON length.
122
+ const m = msg as Record<string, unknown>;
123
+ const content = m.content;
124
+ if (typeof content === "string") {
125
+ totalChars += content.length;
126
+ } else if (Array.isArray(content)) {
127
+ for (const part of content) {
128
+ if (typeof part === "string") {
129
+ totalChars += part.length;
130
+ } else if (part && typeof part === "object") {
131
+ const p = part as Record<string, unknown>;
132
+ if (typeof p.text === "string") {
133
+ totalChars += p.text.length;
134
+ } else {
135
+ try {
136
+ totalChars += JSON.stringify(p).length;
137
+ } catch {
138
+ // Skip un-serializable parts
139
+ }
140
+ }
141
+ }
142
+ }
143
+ } else {
144
+ try {
145
+ totalChars += JSON.stringify(msg).length;
146
+ } catch {
147
+ // Skip un-serializable messages
148
+ }
149
+ }
150
+ }
151
+ }
152
+ // Rough conversion: 1 token ≈ 4 characters
153
+ return Math.ceil(totalChars / 4);
154
+ }
155
+
156
+ /**
157
+ * The proximity gate: returns true iff estimated tokens are at or above
158
+ * `proximityRatio * threshold`. Pure function for unit testing.
159
+ */
160
+ export function isWithinPrecompactProximity(params: {
161
+ estimatedTokens: number;
162
+ threshold: number;
163
+ proximityRatio: number;
164
+ }): boolean {
165
+ return params.estimatedTokens >= params.proximityRatio * params.threshold;
166
+ }