npm - @martian-engineering/lossless-claw - Versions diffs - 0.5.1 → 0.5.3 - Mend

@martian-engineering/lossless-claw 0.5.1 → 0.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

package/README.md +20 -11
package/docs/configuration.md +22 -0
package/docs/tui.md +10 -1
package/openclaw.plugin.json +39 -0
package/package.json +1 -1
package/src/assembler.ts +194 -3
package/src/compaction.ts +231 -25
package/src/db/config.ts +24 -3
package/src/engine.ts +35 -8
package/src/plugin/index.ts +113 -73
package/src/store/summary-store.ts +80 -0
package/src/summarize.ts +473 -209
package/src/tools/lcm-expand-query-tool.ts +339 -144
package/src/types.ts +1 -0

package/README.md CHANGED Viewed

@@ -59,6 +59,8 @@ openclaw plugins install --link /path/to/lossless-claw
 The install command records the plugin, enables it, and applies compatible slot selection (including `contextEngine` when applicable).
+> **Note:** If your OpenClaw config uses `plugins.allow`, make sure both `lossless-claw` and any active plugins you rely on remain allowlisted. In some setups, narrowing the allowlist can prevent plugin-backed integrations from loading, even if `lossless-claw` itself is installed correctly. Restart the gateway after plugin config changes.
 ### Configure OpenClaw
 In most cases, no manual JSON edits are needed after `openclaw plugins install`.
@@ -92,14 +94,16 @@ Add a `lossless-claw` entry under `plugins.entries` in your OpenClaw config:
       "lossless-claw": {
         "enabled": true,
         "config": {
-          "freshTailCount": 32,
+          "freshTailCount": 64,
+          "leafChunkTokens": 80000,
           "contextThreshold": 0.75,
-          "incrementalMaxDepth": -1,
+          "incrementalMaxDepth": 1,
           "ignoreSessionPatterns": [
             "agent:*:cron:**"
           ],
-          "summaryProvider": "anthropic",
-          "summaryModel": "claude-3-5-haiku"
+          "summaryModel": "anthropic/claude-haiku-4-5",
+          "expansionModel": "anthropic/claude-haiku-4-5",
+          "delegationTimeoutMs": 300000
         }
       }
     }
@@ -107,7 +111,7 @@ Add a `lossless-claw` entry under `plugins.entries` in your OpenClaw config:
 }
 ```
-`summaryModel` and `summaryProvider` let you pin compaction summarization to a cheaper or faster model than your main OpenClaw session model. When unset, LCM uses OpenClaw's configured default model/provider.
+`leafChunkTokens` controls how many source tokens can accumulate in a leaf compaction chunk before summarization is triggered. The default is `20000`, but quota-limited summary providers may benefit from a larger value to reduce compaction frequency. `summaryModel` and `summaryProvider` let you pin compaction summarization to a cheaper or faster model than your main OpenClaw session model. `expansionModel` does the same for `lcm_expand_query` sub-agent calls (drilling into summaries to recover detail). `delegationTimeoutMs` controls how long `lcm_expand_query` waits for that delegated sub-agent to finish before returning a timeout error; it defaults to `120000` (120s). When unset, the model settings still fall back to OpenClaw's configured default model/provider. See [Expansion model override requirements](#expansion-model-override-requirements) for the required `subagent` trust policy when using `expansionModel`.
 ### Environment variables
@@ -119,11 +123,11 @@ Add a `lossless-claw` entry under `plugins.entries` in your OpenClaw config:
 | `LCM_STATELESS_SESSION_PATTERNS` | `""` | Comma-separated glob patterns for session keys that may read from LCM but never write to it |
 | `LCM_SKIP_STATELESS_SESSIONS` | `true` | Enable stateless-session write skipping for matching session keys |
 | `LCM_CONTEXT_THRESHOLD` | `0.75` | Fraction of context window that triggers compaction (0.0–1.0) |
-| `LCM_FRESH_TAIL_COUNT` | `32` | Number of recent messages protected from compaction |
+| `LCM_FRESH_TAIL_COUNT` | `64` | Number of recent messages protected from compaction |
 | `LCM_LEAF_MIN_FANOUT` | `8` | Minimum raw messages per leaf summary |
 | `LCM_CONDENSED_MIN_FANOUT` | `4` | Minimum summaries per condensed node |
 | `LCM_CONDENSED_MIN_FANOUT_HARD` | `2` | Relaxed fanout for forced compaction sweeps |
-| `LCM_INCREMENTAL_MAX_DEPTH` | `0` | How deep incremental compaction goes (0 = leaf only, -1 = unlimited) |
+| `LCM_INCREMENTAL_MAX_DEPTH` | `1` | How deep incremental compaction goes (0 = leaf only, 1 = one condensed pass, -1 = unlimited) |
 | `LCM_LEAF_CHUNK_TOKENS` | `20000` | Max source tokens per leaf compaction chunk |
 | `LCM_LEAF_TARGET_TOKENS` | `1200` | Target token count for leaf summaries |
 | `LCM_CONDENSED_TARGET_TOKENS` | `2000` | Target token count for condensed summaries |
@@ -133,8 +137,10 @@ Add a `lossless-claw` entry under `plugins.entries` in your OpenClaw config:
 | `LCM_LARGE_FILE_SUMMARY_MODEL` | `""` | Model override for large-file summarization |
 | `LCM_SUMMARY_MODEL` | `""` | Model override for compaction summarization; falls back to OpenClaw's default model when unset |
 | `LCM_SUMMARY_PROVIDER` | `""` | Provider override for compaction summarization; falls back to `OPENCLAW_PROVIDER` or the provider embedded in the model ref |
+| `LCM_SUMMARY_BASE_URL` | *(from OpenClaw / provider default)* | Base URL override for summarization API calls |
 | `LCM_EXPANSION_MODEL` | *(from OpenClaw)* | Model override for `lcm_expand_query` sub-agent (e.g. `anthropic/claude-haiku-4-5`) |
 | `LCM_EXPANSION_PROVIDER` | *(from OpenClaw)* | Provider override for `lcm_expand_query` sub-agent |
+| `LCM_DELEGATION_TIMEOUT_MS` | `120000` | Max time to wait for delegated `lcm_expand_query` sub-agent completion |
 | `LCM_AUTOCOMPACT_DISABLED` | `false` | Disable automatic compaction after turns |
 | `LCM_PRUNE_HEARTBEAT_OK` | `false` | Retroactively delete `HEARTBEAT_OK` turn cycles from LCM storage |
@@ -178,6 +184,7 @@ Plugin config equivalents:
 - `skipStatelessSessions`
 - `summaryModel`
 - `summaryProvider`
+- `delegationTimeoutMs`
 Environment variables still win over plugin config when both are set.
@@ -195,13 +202,15 @@ If `summaryModel` already includes a provider prefix such as `anthropic/claude-s
 ### Recommended starting configuration
 ```
-LCM_FRESH_TAIL_COUNT=32
-LCM_INCREMENTAL_MAX_DEPTH=-1
+LCM_FRESH_TAIL_COUNT=64
+LCM_LEAF_CHUNK_TOKENS=20000
+LCM_INCREMENTAL_MAX_DEPTH=1
 LCM_CONTEXT_THRESHOLD=0.75
 ```
-- **freshTailCount=32** protects the last 32 messages from compaction, giving the model enough recent context for continuity.
-- **incrementalMaxDepth=-1** enables unlimited automatic condensation after each compaction pass — the DAG cascades as deep as needed. Set to `0` (default) for leaf-only, or a positive integer for a specific depth cap.
+- **freshTailCount=64** protects the last 64 messages from compaction, giving the model more recent context for continuity.
+- **leafChunkTokens=20000** limits how large each leaf compaction chunk can grow before LCM summarizes it. Increase this when your summary provider is quota-limited and frequent leaf compactions are exhausting that quota.
+- **incrementalMaxDepth=1** runs one condensed pass after each leaf compaction by default. Set to `0` for leaf-only behavior, a larger positive integer for a deeper cap, or `-1` for unlimited cascading.
 - **contextThreshold=0.75** triggers compaction when context reaches 75% of the model's window, leaving headroom for the model's response.
 ### Session exclusion patterns

package/docs/configuration.md CHANGED Viewed

@@ -91,6 +91,25 @@ The actual summary size depends on the LLM's output; these values are guidelines
 - Smaller chunks create summaries more frequently from less material.
 - This also affects the condensed minimum input threshold (10% of this value).
+### Maximum assembly token budget
+`LCM_MAX_ASSEMBLY_TOKEN_BUDGET` (default: none) caps the token budget used for context assembly and compaction threshold evaluation. When set, this takes precedence over both the 128k fallback and runtime-provided budgets.
+Set this if you're using a model with a smaller context window:
+- **8k models:** `LCM_MAX_ASSEMBLY_TOKEN_BUDGET=7000`
+- **32k models:** `LCM_MAX_ASSEMBLY_TOKEN_BUDGET=30000`
+- **128k+ models:** No need to set (128k fallback is appropriate)
+### Summary size cap
+`LCM_SUMMARY_MAX_OVERAGE_FACTOR` (default: `3`) controls the hard ceiling on summary sizes relative to the target tokens (`leafTargetTokens` for leaf summaries, `condensedTargetTokens` for condensed summaries).
+If a summary exceeds `overage_factor * target_tokens`, it is deterministically truncated. A warning is logged when any summary exceeds `1.5 * target_tokens`.
+- **Lower values** (e.g., 2) enforce tighter summaries but may truncate more often with weaker summarizer models.
+- **Higher values** (e.g., 5) allow more LLM flexibility but risk storing oversized summaries.
 ## Model selection
 LCM uses the same model as the parent OpenClaw session for summarization by default. You can override this:
@@ -99,6 +118,7 @@ LCM uses the same model as the parent OpenClaw session for summarization by defa
 # Use a specific model for summarization
 export LCM_SUMMARY_MODEL=anthropic/claude-sonnet-4-20250514
 export LCM_SUMMARY_PROVIDER=anthropic
+export LCM_SUMMARY_BASE_URL=https://api.anthropic.com
 ```
 Using a cheaper/faster model for summarization can reduce costs, but quality matters — poor summaries compound as they're condensed into higher-level nodes.
@@ -112,6 +132,8 @@ When more than one source is present, compaction summarization resolves in this
 If `summaryModel` already includes a provider prefix such as `anthropic/claude-sonnet-4-20250514`, `summaryProvider` is ignored for that choice.
+For delegated `lcm_expand_query` runs, you can extend the sub-agent wait window with `delegationTimeoutMs` (plugin config) or `LCM_DELEGATION_TIMEOUT_MS` (environment variable). The default is `120000` milliseconds.
 ## Session controls
 ### Excluding sessions entirely

package/docs/tui.md CHANGED Viewed

@@ -287,6 +287,9 @@ lcm-tui rewrite 44 --all --apply --diff
 # Rewrite with OpenAI Responses API
 lcm-tui rewrite 44 --summary sum_abc123 --provider openai --model gpt-5.3-codex --apply
+# Rewrite through a custom OpenAI-compatible proxy
+lcm-tui rewrite 44 --summary sum_abc123 --provider openai --model gpt-5.3-codex --base-url https://proxy.example.com/openai --apply
 # Use custom prompt templates
 lcm-tui rewrite 44 --all --apply --prompt-dir ~/.config/lcm-tui/prompts
 ```
@@ -301,6 +304,7 @@ lcm-tui rewrite 44 --all --apply --prompt-dir ~/.config/lcm-tui/prompts
 | `--diff` | Show unified diff |
 | `--provider <id>` | API provider (inferred from `--model` when omitted) |
 | `--model <model>` | API model (default depends on provider) |
+| `--base-url <url>` | Custom API base URL (overrides config and env) |
 | `--prompt-dir <path>` | Custom prompt template directory |
 | `--timestamps` | Inject timestamps into source text (default: true) |
 | `--tz <timezone>` | Timezone for timestamps (default: system local) |
@@ -378,6 +382,9 @@ lcm-tui backfill my-agent session_abc123 --apply --transplant-to 653
 # Backfill using OpenAI
 lcm-tui backfill my-agent session_abc123 --apply --provider openai --model gpt-5.3-codex
+# Backfill through a custom OpenAI-compatible proxy
+lcm-tui backfill my-agent session_abc123 --apply --provider openai --model gpt-5.3-codex --base-url https://proxy.example.com/openai
 ```
 All write paths are transactional:
@@ -404,6 +411,7 @@ An idempotency guard prevents duplicate imports for the same `session_id`.
 | `--fresh-tail <n>` | Preserve freshest N raw messages from leaf compaction |
 | `--provider <id>` | API provider (inferred from model when omitted) |
 | `--model <id>` | API model (default depends on provider) |
+| `--base-url <url>` | Custom API base URL (overrides config and env) |
 | `--prompt-dir <path>` | Custom depth-prompt directory |
 ### `lcm-tui prompts`
@@ -479,9 +487,10 @@ If the provider auth profile mode is `oauth` (not `api_key`), set the provider A
 Interactive rewrite (`w`/`W`) can be configured with:
 - `LCM_TUI_SUMMARY_PROVIDER`
 - `LCM_TUI_SUMMARY_MODEL`
+- `LCM_TUI_SUMMARY_BASE_URL`
 - `LCM_TUI_CONVERSATION_WINDOW_SIZE` (default `200`)
-It also honors `LCM_SUMMARY_PROVIDER` / `LCM_SUMMARY_MODEL` as fallback.
+It also honors `LCM_SUMMARY_PROVIDER` / `LCM_SUMMARY_MODEL` / `LCM_SUMMARY_BASE_URL` as fallback.
 ## Database

package/openclaw.plugin.json CHANGED Viewed

@@ -13,6 +13,10 @@
       "label": "Fresh Tail Count",
       "help": "Number of recent messages protected from compaction"
     },
+    "leafChunkTokens": {
+      "label": "Leaf Chunk Tokens",
+      "help": "Maximum source tokens per leaf compaction chunk before summarization"
+    },
     "dbPath": {
       "label": "Database Path",
       "help": "Path to LCM SQLite database (default: ~/.openclaw/lcm.db)"
@@ -44,6 +48,22 @@
     "expansionProvider": {
       "label": "Expansion Provider",
       "help": "Provider override for lcm_expand_query sub-agent (e.g., 'anthropic')"
+    },
+    "delegationTimeoutMs": {
+      "label": "Delegation Timeout (ms)",
+      "help": "Maximum time to wait for delegated lcm_expand_query sub-agent completion before timing out"
+    },
+    "maxAssemblyTokenBudget": {
+      "label": "Max Assembly Token Budget",
+      "help": "Hard ceiling for assembly token budget — caps runtime-provided and fallback budgets. Set for smaller context-window models (e.g., 30000 for 32k models)"
+    },
+    "summaryMaxOverageFactor": {
+      "label": "Summary Max Overage Factor",
+      "help": "Maximum allowed overage factor for summaries relative to target tokens (default 3). Summaries exceeding this are deterministically truncated."
+    },
+    "customInstructions": {
+      "label": "Custom Instructions",
+      "help": "Natural language instructions injected into all summarization prompts (e.g., formatting rules, tone control)"
     }
   },
   "configSchema": {
@@ -66,6 +86,10 @@
         "type": "integer",
         "minimum": 1
       },
+      "leafChunkTokens": {
+        "type": "integer",
+        "minimum": 1
+      },
       "leafMinFanout": {
         "type": "integer",
         "minimum": 2
@@ -111,6 +135,21 @@
       },
       "expansionProvider": {
         "type": "string"
+      },
+      "delegationTimeoutMs": {
+        "type": "integer",
+        "minimum": 1
+      },
+      "maxAssemblyTokenBudget": {
+        "type": "integer",
+        "minimum": 1000
+      },
+      "summaryMaxOverageFactor": {
+        "type": "number",
+        "minimum": 1
+      },
+      "customInstructions": {
+        "type": "string"
       }
     }
   }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@martian-engineering/lossless-claw",
-  "version": "0.5.1",
+  "version": "0.5.3",
   "description": "Lossless Context Management plugin for OpenClaw — DAG-based conversation summarization with incremental compaction",
   "type": "module",
   "main": "index.ts",

package/src/assembler.ts CHANGED Viewed

@@ -9,6 +9,15 @@ import type { SummaryStore, ContextItemRecord, SummaryRecord } from "./store/sum
 type AgentMessage = Parameters<ContextEngine["ingest"]>[0]["message"];
+const TOOL_CALL_TYPES = new Set([
+  "toolCall",
+  "toolUse",
+  "tool_use",
+  "tool-use",
+  "functionCall",
+  "function_call",
+]);
 // ── Public types ─────────────────────────────────────────────────────────────
 export interface AssembleContextInput {
@@ -67,6 +76,10 @@ function buildSystemPromptAddition(summarySignals: SummaryPromptSignal[]): strin
     "",
     "**Recall priority:** Use LCM tools first for compacted conversation history. If LCM does not cover the needed data, prefer any available memory/recall tool before falling back to raw text search.",
     "",
+    "**Conflict handling:** If newer evidence conflicts with an older summary or recollection, prefer the newer evidence. Do not trust a stale summary over fresher contradictory information.",
+    "",
+    "**Contradictions/uncertainty:** If facts seem contradictory or uncertain, verify with LCM tools before answering instead of trusting the summary at face value.",
+    "",
     "**Tool escalation:**",
     "1. `lcm_grep` — search by regex or full-text across messages and summaries",
     "2. `lcm_describe` — inspect a specific summary (cheap, no sub-agent)",
@@ -92,6 +105,7 @@ function buildSystemPromptAddition(summarySignals: SummaryPromptSignal[]): strin
       "3) Answer with citations to summary IDs used",
       "",
       "**Uncertainty checklist (run before answering):**",
+      "- Am I relying on an older summary even though newer evidence disagrees?",
       "- Am I making exact factual claims from a condensed summary?",
       "- Could compaction have omitted a crucial detail?",
       "- Would this answer fail if the user asks for proof?",
@@ -555,6 +569,174 @@ function pickToolIsError(parts: MessagePartRecord[]): boolean | undefined {
   return undefined;
 }
+function extractToolCallId(block: { id?: unknown; call_id?: unknown }): string | null {
+  if (typeof block.id === "string" && block.id.length > 0) {
+    return block.id;
+  }
+  if (typeof block.call_id === "string" && block.call_id.length > 0) {
+    return block.call_id;
+  }
+  return null;
+}
+function extractToolCallIdsFromAssistant(message: AgentMessage): string[] {
+  if (message?.role !== "assistant" || !Array.isArray(message.content)) {
+    return [];
+  }
+  const ids: string[] = [];
+  for (const block of message.content) {
+    if (!block || typeof block !== "object") {
+      continue;
+    }
+    const record = block as { type?: unknown; id?: unknown; call_id?: unknown };
+    if (typeof record.type !== "string" || !TOOL_CALL_TYPES.has(record.type)) {
+      continue;
+    }
+    const id = extractToolCallId(record);
+    if (id) {
+      ids.push(id);
+    }
+  }
+  return ids;
+}
+function extractToolResultIdFromMessage(message: AgentMessage): string | null {
+  if (!message || typeof message !== "object") {
+    return null;
+  }
+  if (typeof message.toolCallId === "string" && message.toolCallId.length > 0) {
+    return message.toolCallId;
+  }
+  if (typeof message.toolUseId === "string" && message.toolUseId.length > 0) {
+    return message.toolUseId;
+  }
+  return null;
+}
+function collectAssistantToolCallIds(items: ResolvedItem[]): Set<string> {
+  const ids = new Set<string>();
+  for (const item of items) {
+    for (const id of extractToolCallIdsFromAssistant(item.message)) {
+      ids.add(id);
+    }
+  }
+  return ids;
+}
+function mergeFreshTailWithMatchingToolResults(
+  freshTail: ResolvedItem[],
+  matchingToolResults: ResolvedItem[],
+): ResolvedItem[] {
+  if (matchingToolResults.length === 0) {
+    return freshTail;
+  }
+  const resultsById = new Map<string, ResolvedItem[]>();
+  for (const item of matchingToolResults) {
+    const toolResultId = extractToolResultIdFromMessage(item.message);
+    if (!toolResultId) {
+      continue;
+    }
+    const existing = resultsById.get(toolResultId);
+    if (existing) {
+      existing.push(item);
+    } else {
+      resultsById.set(toolResultId, [item]);
+    }
+  }
+  const merged: ResolvedItem[] = [];
+  const usedOrdinals = new Set<number>();
+  for (const item of freshTail) {
+    merged.push(item);
+    const toolCallIds = extractToolCallIdsFromAssistant(item.message);
+    if (toolCallIds.length === 0) {
+      continue;
+    }
+    for (const toolCallId of toolCallIds) {
+      const matches = resultsById.get(toolCallId);
+      if (!matches) {
+        continue;
+      }
+      for (const match of matches) {
+        if (usedOrdinals.has(match.ordinal)) {
+          continue;
+        }
+        merged.push(match);
+        usedOrdinals.add(match.ordinal);
+      }
+    }
+  }
+  for (const item of matchingToolResults) {
+    if (!usedOrdinals.has(item.ordinal)) {
+      merged.push(item);
+    }
+  }
+  return merged;
+}
+function filterNonFreshAssistantToolCalls(
+  items: ResolvedItem[],
+  freshTailOrdinals: Set<number>,
+): AgentMessage[] {
+  const availableToolResultIds = new Set<string>();
+  for (const item of items) {
+    const toolResultId = extractToolResultIdFromMessage(item.message);
+    if (toolResultId) {
+      availableToolResultIds.add(toolResultId);
+    }
+  }
+  const filteredMessages: AgentMessage[] = [];
+  for (const item of items) {
+    if (item.message?.role !== "assistant" || freshTailOrdinals.has(item.ordinal)) {
+      filteredMessages.push(item.message);
+      continue;
+    }
+    if (!Array.isArray(item.message.content)) {
+      filteredMessages.push(item.message);
+      continue;
+    }
+    let removedAny = false;
+    const content = item.message.content.filter((block) => {
+      if (!block || typeof block !== "object") {
+        return true;
+      }
+      const record = block as { type?: unknown; id?: unknown; call_id?: unknown };
+      if (typeof record.type !== "string" || !TOOL_CALL_TYPES.has(record.type)) {
+        return true;
+      }
+      const toolCallId = extractToolCallId(record);
+      if (!toolCallId || availableToolResultIds.has(toolCallId)) {
+        return true;
+      }
+      removedAny = true;
+      return false;
+    });
+    if (content.length === 0) {
+      continue;
+    }
+    if (!removedAny) {
+      filteredMessages.push(item.message);
+      continue;
+    }
+    filteredMessages.push({
+      ...item.message,
+      content: content as typeof item.message.content,
+    } as AgentMessage);
+  }
+  return filteredMessages;
+}
 /** Format a Date for XML attributes in the agent's timezone. */
 function formatDateForAttribute(date: Date, timezone?: string): string {
   const tz = timezone ?? "UTC";
@@ -692,8 +874,17 @@ export class ContextAssembler {
     // Step 3: Split into evictable prefix and protected fresh tail
     const tailStart = Math.max(0, resolved.length - freshTailCount);
-    const freshTail = resolved.slice(tailStart);
-    const evictable = resolved.slice(0, tailStart);
+    const baseFreshTail = resolved.slice(tailStart);
+    const initialEvictable = resolved.slice(0, tailStart);
+    const freshTailOrdinals = new Set(baseFreshTail.map((item) => item.ordinal));
+    const tailToolCallIds = collectAssistantToolCallIds(baseFreshTail);
+    const tailPairToolResults = initialEvictable.filter((item) => {
+      const toolResultId = extractToolResultIdFromMessage(item.message);
+      return toolResultId !== null && tailToolCallIds.has(toolResultId);
+    });
+    const protectedEvictableOrdinals = new Set(tailPairToolResults.map((item) => item.ordinal));
+    const evictable = initialEvictable.filter((item) => !protectedEvictableOrdinals.has(item.ordinal));
+    const freshTail = mergeFreshTailWithMatchingToolResults(baseFreshTail, tailPairToolResults);
     // Step 4: Budget-aware selection
     // First, compute the token cost of the fresh tail (always included).
@@ -747,7 +938,7 @@ export class ContextAssembler {
     // Normalize assistant string content to array blocks (some providers return
     // content as a plain string; Anthropic expects content block arrays).
-    const rawMessages = selected.map((item) => item.message);
+    const rawMessages = filterNonFreshAssistantToolCalls(selected, freshTailOrdinals);
     for (let i = 0; i < rawMessages.length; i++) {
       const msg = rawMessages[i];
       if (msg?.role === "assistant" && typeof msg.content === "string") {