npm - clawmem - Versions diffs - 0.9.0 → 0.10.1 - Mend

clawmem 0.9.0 → 0.10.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

package/AGENTS.md +11 -4
package/CLAUDE.md +11 -4
package/README.md +37 -21
package/SKILL.md +16 -6
package/package.json +2 -2
package/src/clawmem.ts +150 -23
package/src/hermes/__init__.py +41 -2
package/src/openclaw/compaction-threshold.ts +166 -0
package/src/openclaw/engine.ts +520 -241
package/src/openclaw/index.ts +151 -140
package/src/openclaw/openclaw.plugin.json +4 -1
package/src/openclaw/package.json +9 -0
package/src/openclaw/session-state.ts +55 -0
package/src/openclaw/transcript-resolver.ts +441 -0

package/src/hermes/__init__.py CHANGED Viewed

@@ -16,6 +16,17 @@ Config via environment variables:
   CLAWMEM_EMBED_URL     — GPU embedding server URL (optional)
   CLAWMEM_LLM_URL       — GPU LLM server URL (optional)
   CLAWMEM_RERANK_URL    — GPU reranker server URL (optional)
+Agent-context isolation:
+  Hermes ``run_agent.py`` passes ``agent_context`` to ``initialize()``
+  with one of "primary", "subagent", "cron", or "flush". Per the
+  ``MemoryProvider`` ABC contract ("Providers should skip writes for
+  non-primary contexts (cron system prompts would corrupt user
+  representations)"), this plugin treats the read-side hooks
+  (session-bootstrap, context-surfacing) as always safe but routes the
+  write-side surfaces (transcript appends in ``sync_turn``, extraction
+  in ``on_session_end`` and ``on_pre_compress``) through a primary-only
+  guard. Non-primary contexts get retrieval but no vault writes.
 """
 from __future__ import annotations
@@ -223,6 +234,10 @@ class ClawMemProvider(MemoryProvider):
         self._serve_mode: str = "external"
         self._serve_proc: Optional[subprocess.Popen] = None
         self._env_extra: dict = {}
+        # Agent-context isolation. "primary" = full read+write; everything else
+        # ("subagent", "cron", "flush") = reads OK, writes suppressed. See file
+        # docstring for the ABC contract this implements.
+        self._agent_context: str = "primary"
         # Prefetch state (generation counter prevents stale overwrites)
         self._prefetch_result: str = ""
@@ -301,6 +316,12 @@ class ClawMemProvider(MemoryProvider):
             self._port = _DEFAULT_PORT
         self._serve_mode = os.environ.get("CLAWMEM_SERVE_MODE", "external")
         self._hermes_home = kwargs.get("hermes_home", str(Path.home() / ".hermes"))
+        self._agent_context = str(kwargs.get("agent_context", "primary") or "primary")
+        if self._agent_context != "primary":
+            logger.info(
+                "clawmem: agent_context=%s — reads enabled, writes suppressed",
+                self._agent_context,
+            )
         # Build env for hook shell-outs (GPU endpoints, profile)
         for var in ("CLAWMEM_EMBED_URL", "CLAWMEM_LLM_URL", "CLAWMEM_RERANK_URL", "CLAWMEM_PROFILE"):
@@ -410,7 +431,11 @@ class ClawMemProvider(MemoryProvider):
         """Append turn to plugin-managed transcript JSONL.
         Writes in Claude Code transcript format so ClawMem hooks can read it.
+        Suppressed for non-primary agent contexts (subagent/cron/flush) so the
+        vault never absorbs system-prompt or background-task content.
         """
+        if self._agent_context != "primary":
+            return
         if not self._transcript_path:
             return
@@ -441,7 +466,15 @@ class ClawMemProvider(MemoryProvider):
     # -- Session end / compression hooks ---------------------------------------
     def on_session_end(self, messages: List[Dict[str, Any]]) -> None:
-        """Run extraction hooks in parallel."""
+        """Run extraction hooks in parallel.
+        Suppressed for non-primary agent contexts (subagent/cron/flush) — the
+        decision-extractor / handoff-generator / feedback-loop pipeline would
+        otherwise capture cron system prompts or subagent intermediate state
+        as if it were primary-agent reasoning.
+        """
+        if self._agent_context != "primary":
+            return
         if not self._bin or not self._transcript_path:
             return
@@ -470,7 +503,13 @@ class ClawMemProvider(MemoryProvider):
         logger.info("clawmem: session %s extraction complete", self._session_id[:8])
     def on_pre_compress(self, messages: List[Dict[str, Any]]) -> str:
-        """Run precompact-extract (side effect only — Hermes ignores return)."""
+        """Run precompact-extract (side effect only — Hermes ignores return).
+        Suppressed for non-primary agent contexts so the precompact state file
+        in auto-memory never picks up cron/subagent context as primary state.
+        """
+        if self._agent_context != "primary":
+            return ""
         if not self._bin or not self._transcript_path:
             return ""

package/src/openclaw/compaction-threshold.ts ADDED Viewed

@@ -0,0 +1,166 @@
+/**
+ * ClawMem OpenClaw Plugin — Compaction proximity heuristic
+ *
+ * After §14.3 migration, ClawMem runs precompact-extract pre-emptively from
+ * `before_prompt_build` (synchronous, awaited) when the messages buffer is
+ * close to the OpenClaw compaction threshold. This module owns the gating
+ * math.
+ *
+ * The OpenClaw `agent_end` PluginHookName event is fire-and-forget at
+ * `src/agents/pi-embedded-runner/run/attempt.ts:2226-2249` (literal comment:
+ * "This is fire-and-forget, so we don't await"), so it cannot be the
+ * load-bearing path for precompact-extract. `before_prompt_build` IS
+ * awaited at `attempt.ts:1661` (its return value `prependContext` is used
+ * to build the final prompt) and runs strictly before any LLM call that
+ * could trigger compaction on the current turn.
+ *
+ * Compaction threshold derivation:
+ * OpenClaw computes the in-flight compaction threshold as
+ *   contextWindowTokens - reserveTokensFloor - softThresholdTokens
+ * (see `src/auto-reply/reply/agent-runner-memory.ts:567`).
+ *
+ * `contextWindowTokens` is per-model and not exposed in the
+ * `before_prompt_build` event payload. ClawMem uses a conservative default
+ * (200K tokens, matching Claude defaults) that can be overridden via the
+ * plugin config. The proximity ratio (PRECOMPACT_PROXIMITY_RATIO) provides
+ * additional headroom for sudden token-count jumps.
+ *
+ * Token estimation is intentionally rough: the plugin runs precompact-extract
+ * itself (regex-only, milliseconds), so over-firing is cheap. The cost of
+ * under-firing (missed precompact opportunity) is bounded by the
+ * `before_compaction` fire-and-forget defense-in-depth fallback.
+ */
+/**
+ * Proximity ratio gate: precompact runs when estimated tokens cross
+ * `PRECOMPACT_PROXIMITY_RATIO * compactionThreshold`.
+ *
+ * 0.85 leaves 15% headroom for a single tool result or long user prompt to
+ * push the buffer over the actual compaction trigger. Tunable via env var
+ * `CLAWMEM_PRECOMPACT_PROXIMITY_RATIO` (clamped to [0.5, 0.95]).
+ */
+export const PRECOMPACT_PROXIMITY_RATIO_DEFAULT = 0.85;
+/**
+ * Default OpenClaw context window in tokens when the plugin config does
+ * not override it. Matches Claude's default 200K context window. The
+ * threshold subtracts reserve + soft from this number.
+ */
+export const DEFAULT_CONTEXT_WINDOW_TOKENS = 200_000;
+/**
+ * Default soft threshold and reserve floor when no MemoryFlushPlan is
+ * registered (matches OpenClaw's own defaults at agent-runner-memory.ts:386).
+ */
+export const DEFAULT_SOFT_THRESHOLD_TOKENS = 4_000;
+export const DEFAULT_RESERVE_TOKENS_FLOOR = 8_000;
+export type CompactionThresholdConfig = {
+  /** Override the conservative 200K default. Plugin config: `compactionContextWindow`. */
+  contextWindowTokens?: number;
+  /** Override the proximity ratio. Plugin config: `precompactProximityRatio`. */
+  precompactProximityRatio?: number;
+  /** Soft threshold tokens (matches MemoryFlushPlan.softThresholdTokens). */
+  softThresholdTokens?: number;
+  /** Reserve floor tokens (matches MemoryFlushPlan.reserveTokensFloor). */
+  reserveTokensFloor?: number;
+};
+/**
+ * Compute the effective compaction threshold from config + defaults.
+ * Mirrors the OpenClaw computation at agent-runner-memory.ts:567.
+ */
+export function resolveCompactionThreshold(cfg: CompactionThresholdConfig): number {
+  const contextWindow = cfg.contextWindowTokens ?? DEFAULT_CONTEXT_WINDOW_TOKENS;
+  const reserve = cfg.reserveTokensFloor ?? DEFAULT_RESERVE_TOKENS_FLOOR;
+  const soft = cfg.softThresholdTokens ?? DEFAULT_SOFT_THRESHOLD_TOKENS;
+  const threshold = contextWindow - reserve - soft;
+  // Floor at a small positive number to avoid divide-by-zero in proximity gate
+  return Math.max(threshold, 1_000);
+}
+/**
+ * Resolve the proximity ratio from config, env var, and default. Clamped
+ * to a safe range so misconfigured ratios cannot disable precompact entirely
+ * (very high ratio) or fire on every turn (very low ratio).
+ */
+export function resolveProximityRatio(cfg: CompactionThresholdConfig): number {
+  const fromConfig = cfg.precompactProximityRatio;
+  const fromEnv = (() => {
+    const raw = process.env.CLAWMEM_PRECOMPACT_PROXIMITY_RATIO;
+    if (!raw) return undefined;
+    const parsed = Number.parseFloat(raw);
+    return Number.isFinite(parsed) ? parsed : undefined;
+  })();
+  const ratio = fromConfig ?? fromEnv ?? PRECOMPACT_PROXIMITY_RATIO_DEFAULT;
+  // Clamp to safe range
+  if (ratio < 0.5) return 0.5;
+  if (ratio > 0.95) return 0.95;
+  return ratio;
+}
+/**
+ * Estimate token count from a messages array. Intentionally cheap and
+ * conservative — the precompact-extract handler is regex-only (milliseconds),
+ * so over-firing has near-zero cost.
+ *
+ * Uses a rough chars/4 heuristic (matches OpenAI's 1 token ≈ 4 chars rule of
+ * thumb). Walks the messages array best-effort, handling unknown shapes by
+ * stringifying and estimating from the result length.
+ */
+export function estimateTokensFromMessages(messages: unknown[] | undefined): number {
+  if (!Array.isArray(messages) || messages.length === 0) return 0;
+  let totalChars = 0;
+  for (const msg of messages) {
+    if (typeof msg === "string") {
+      totalChars += msg.length;
+      continue;
+    }
+    if (msg && typeof msg === "object") {
+      // Extract text-like fields fast-path: content string, content array of
+      // {type: 'text', text} items, or fall back to JSON length.
+      const m = msg as Record<string, unknown>;
+      const content = m.content;
+      if (typeof content === "string") {
+        totalChars += content.length;
+      } else if (Array.isArray(content)) {
+        for (const part of content) {
+          if (typeof part === "string") {
+            totalChars += part.length;
+          } else if (part && typeof part === "object") {
+            const p = part as Record<string, unknown>;
+            if (typeof p.text === "string") {
+              totalChars += p.text.length;
+            } else {
+              try {
+                totalChars += JSON.stringify(p).length;
+              } catch {
+                // Skip un-serializable parts
+              }
+            }
+          }
+        }
+      } else {
+        try {
+          totalChars += JSON.stringify(msg).length;
+        } catch {
+          // Skip un-serializable messages
+        }
+      }
+    }
+  }
+  // Rough conversion: 1 token ≈ 4 characters
+  return Math.ceil(totalChars / 4);
+}
+/**
+ * The proximity gate: returns true iff estimated tokens are at or above
+ * `proximityRatio * threshold`. Pure function for unit testing.
+ */
+export function isWithinPrecompactProximity(params: {
+  estimatedTokens: number;
+  threshold: number;
+  proximityRatio: number;
+}): boolean {
+  return params.estimatedTokens >= params.proximityRatio * params.threshold;
+}