@martian-engineering/lossless-claw 0.5.2 → 0.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -59,6 +59,8 @@ openclaw plugins install --link /path/to/lossless-claw
59
59
 
60
60
  The install command records the plugin, enables it, and applies compatible slot selection (including `contextEngine` when applicable).
61
61
 
62
+ > **Note:** If your OpenClaw config uses `plugins.allow`, make sure both `lossless-claw` and any active plugins you rely on remain allowlisted. In some setups, narrowing the allowlist can prevent plugin-backed integrations from loading, even if `lossless-claw` itself is installed correctly. Restart the gateway after plugin config changes.
63
+
62
64
  ### Configure OpenClaw
63
65
 
64
66
  In most cases, no manual JSON edits are needed after `openclaw plugins install`.
@@ -92,14 +94,16 @@ Add a `lossless-claw` entry under `plugins.entries` in your OpenClaw config:
92
94
  "lossless-claw": {
93
95
  "enabled": true,
94
96
  "config": {
95
- "freshTailCount": 32,
97
+ "freshTailCount": 64,
98
+ "leafChunkTokens": 80000,
96
99
  "contextThreshold": 0.75,
97
- "incrementalMaxDepth": -1,
100
+ "incrementalMaxDepth": 1,
98
101
  "ignoreSessionPatterns": [
99
102
  "agent:*:cron:**"
100
103
  ],
101
104
  "summaryModel": "anthropic/claude-haiku-4-5",
102
- "expansionModel": "anthropic/claude-haiku-4-5"
105
+ "expansionModel": "anthropic/claude-haiku-4-5",
106
+ "delegationTimeoutMs": 300000
103
107
  }
104
108
  }
105
109
  }
@@ -107,7 +111,7 @@ Add a `lossless-claw` entry under `plugins.entries` in your OpenClaw config:
107
111
  }
108
112
  ```
109
113
 
110
- `summaryModel` and `summaryProvider` let you pin compaction summarization to a cheaper or faster model than your main OpenClaw session model. `expansionModel` does the same for `lcm_expand_query` sub-agent calls (drilling into summaries to recover detail). When unset, both fall back to OpenClaw's configured default model/provider. See [Expansion model override requirements](#expansion-model-override-requirements) for the required `subagent` trust policy when using `expansionModel`.
114
+ `leafChunkTokens` controls how many source tokens can accumulate in a leaf compaction chunk before summarization is triggered. The default is `20000`, but quota-limited summary providers may benefit from a larger value to reduce compaction frequency. `summaryModel` and `summaryProvider` let you pin compaction summarization to a cheaper or faster model than your main OpenClaw session model. `expansionModel` does the same for `lcm_expand_query` sub-agent calls (drilling into summaries to recover detail). `delegationTimeoutMs` controls how long `lcm_expand_query` waits for that delegated sub-agent to finish before returning a timeout error; it defaults to `120000` (120s). When unset, the model settings still fall back to OpenClaw's configured default model/provider. See [Expansion model override requirements](#expansion-model-override-requirements) for the required `subagent` trust policy when using `expansionModel`.
111
115
 
112
116
  ### Environment variables
113
117
 
@@ -119,11 +123,11 @@ Add a `lossless-claw` entry under `plugins.entries` in your OpenClaw config:
119
123
  | `LCM_STATELESS_SESSION_PATTERNS` | `""` | Comma-separated glob patterns for session keys that may read from LCM but never write to it |
120
124
  | `LCM_SKIP_STATELESS_SESSIONS` | `true` | Enable stateless-session write skipping for matching session keys |
121
125
  | `LCM_CONTEXT_THRESHOLD` | `0.75` | Fraction of context window that triggers compaction (0.0–1.0) |
122
- | `LCM_FRESH_TAIL_COUNT` | `32` | Number of recent messages protected from compaction |
126
+ | `LCM_FRESH_TAIL_COUNT` | `64` | Number of recent messages protected from compaction |
123
127
  | `LCM_LEAF_MIN_FANOUT` | `8` | Minimum raw messages per leaf summary |
124
128
  | `LCM_CONDENSED_MIN_FANOUT` | `4` | Minimum summaries per condensed node |
125
129
  | `LCM_CONDENSED_MIN_FANOUT_HARD` | `2` | Relaxed fanout for forced compaction sweeps |
126
- | `LCM_INCREMENTAL_MAX_DEPTH` | `0` | How deep incremental compaction goes (0 = leaf only, -1 = unlimited) |
130
+ | `LCM_INCREMENTAL_MAX_DEPTH` | `1` | How deep incremental compaction goes (0 = leaf only, 1 = one condensed pass, -1 = unlimited) |
127
131
  | `LCM_LEAF_CHUNK_TOKENS` | `20000` | Max source tokens per leaf compaction chunk |
128
132
  | `LCM_LEAF_TARGET_TOKENS` | `1200` | Target token count for leaf summaries |
129
133
  | `LCM_CONDENSED_TARGET_TOKENS` | `2000` | Target token count for condensed summaries |
@@ -136,6 +140,7 @@ Add a `lossless-claw` entry under `plugins.entries` in your OpenClaw config:
136
140
  | `LCM_SUMMARY_BASE_URL` | *(from OpenClaw / provider default)* | Base URL override for summarization API calls |
137
141
  | `LCM_EXPANSION_MODEL` | *(from OpenClaw)* | Model override for `lcm_expand_query` sub-agent (e.g. `anthropic/claude-haiku-4-5`) |
138
142
  | `LCM_EXPANSION_PROVIDER` | *(from OpenClaw)* | Provider override for `lcm_expand_query` sub-agent |
143
+ | `LCM_DELEGATION_TIMEOUT_MS` | `120000` | Max time to wait for delegated `lcm_expand_query` sub-agent completion |
139
144
  | `LCM_AUTOCOMPACT_DISABLED` | `false` | Disable automatic compaction after turns |
140
145
  | `LCM_PRUNE_HEARTBEAT_OK` | `false` | Retroactively delete `HEARTBEAT_OK` turn cycles from LCM storage |
141
146
 
@@ -179,6 +184,7 @@ Plugin config equivalents:
179
184
  - `skipStatelessSessions`
180
185
  - `summaryModel`
181
186
  - `summaryProvider`
187
+ - `delegationTimeoutMs`
182
188
 
183
189
  Environment variables still win over plugin config when both are set.
184
190
 
@@ -196,13 +202,15 @@ If `summaryModel` already includes a provider prefix such as `anthropic/claude-s
196
202
  ### Recommended starting configuration
197
203
 
198
204
  ```
199
- LCM_FRESH_TAIL_COUNT=32
200
- LCM_INCREMENTAL_MAX_DEPTH=-1
205
+ LCM_FRESH_TAIL_COUNT=64
206
+ LCM_LEAF_CHUNK_TOKENS=20000
207
+ LCM_INCREMENTAL_MAX_DEPTH=1
201
208
  LCM_CONTEXT_THRESHOLD=0.75
202
209
  ```
203
210
 
204
- - **freshTailCount=32** protects the last 32 messages from compaction, giving the model enough recent context for continuity.
205
- - **incrementalMaxDepth=-1** enables unlimited automatic condensation after each compaction pass the DAG cascades as deep as needed. Set to `0` (default) for leaf-only, or a positive integer for a specific depth cap.
211
+ - **freshTailCount=64** protects the last 64 messages from compaction, giving the model more recent context for continuity.
212
+ - **leafChunkTokens=20000** limits how large each leaf compaction chunk can grow before LCM summarizes it. Increase this when your summary provider is quota-limited and frequent leaf compactions are exhausting that quota.
213
+ - **incrementalMaxDepth=1** runs one condensed pass after each leaf compaction by default. Set to `0` for leaf-only behavior, a larger positive integer for a deeper cap, or `-1` for unlimited cascading.
206
214
  - **contextThreshold=0.75** triggers compaction when context reaches 75% of the model's window, leaving headroom for the model's response.
207
215
 
208
216
  ### Session exclusion patterns
@@ -91,6 +91,25 @@ The actual summary size depends on the LLM's output; these values are guidelines
91
91
  - Smaller chunks create summaries more frequently from less material.
92
92
  - This also affects the condensed minimum input threshold (10% of this value).
93
93
 
94
+ ### Maximum assembly token budget
95
+
96
+ `LCM_MAX_ASSEMBLY_TOKEN_BUDGET` (default: none) caps the token budget used for context assembly and compaction threshold evaluation. When set, this takes precedence over both the 128k fallback and runtime-provided budgets.
97
+
98
+ Set this if you're using a model with a smaller context window:
99
+
100
+ - **8k models:** `LCM_MAX_ASSEMBLY_TOKEN_BUDGET=7000`
101
+ - **32k models:** `LCM_MAX_ASSEMBLY_TOKEN_BUDGET=30000`
102
+ - **128k+ models:** No need to set (128k fallback is appropriate)
103
+
104
+ ### Summary size cap
105
+
106
+ `LCM_SUMMARY_MAX_OVERAGE_FACTOR` (default: `3`) controls the hard ceiling on summary sizes relative to the target tokens (`leafTargetTokens` for leaf summaries, `condensedTargetTokens` for condensed summaries).
107
+
108
+ If a summary exceeds `overage_factor * target_tokens`, it is deterministically truncated. A warning is logged when any summary exceeds `1.5 * target_tokens`.
109
+
110
+ - **Lower values** (e.g., 2) enforce tighter summaries but may truncate more often with weaker summarizer models.
111
+ - **Higher values** (e.g., 5) allow more LLM flexibility but risk storing oversized summaries.
112
+
94
113
  ## Model selection
95
114
 
96
115
  LCM uses the same model as the parent OpenClaw session for summarization by default. You can override this:
@@ -113,6 +132,8 @@ When more than one source is present, compaction summarization resolves in this
113
132
 
114
133
  If `summaryModel` already includes a provider prefix such as `anthropic/claude-sonnet-4-20250514`, `summaryProvider` is ignored for that choice.
115
134
 
135
+ For delegated `lcm_expand_query` runs, you can extend the sub-agent wait window with `delegationTimeoutMs` (plugin config) or `LCM_DELEGATION_TIMEOUT_MS` (environment variable). The default is `120000` milliseconds.
136
+
116
137
  ## Session controls
117
138
 
118
139
  ### Excluding sessions entirely
@@ -13,6 +13,10 @@
13
13
  "label": "Fresh Tail Count",
14
14
  "help": "Number of recent messages protected from compaction"
15
15
  },
16
+ "leafChunkTokens": {
17
+ "label": "Leaf Chunk Tokens",
18
+ "help": "Maximum source tokens per leaf compaction chunk before summarization"
19
+ },
16
20
  "dbPath": {
17
21
  "label": "Database Path",
18
22
  "help": "Path to LCM SQLite database (default: ~/.openclaw/lcm.db)"
@@ -44,6 +48,22 @@
44
48
  "expansionProvider": {
45
49
  "label": "Expansion Provider",
46
50
  "help": "Provider override for lcm_expand_query sub-agent (e.g., 'anthropic')"
51
+ },
52
+ "delegationTimeoutMs": {
53
+ "label": "Delegation Timeout (ms)",
54
+ "help": "Maximum time to wait for delegated lcm_expand_query sub-agent completion before timing out"
55
+ },
56
+ "maxAssemblyTokenBudget": {
57
+ "label": "Max Assembly Token Budget",
58
+ "help": "Hard ceiling for assembly token budget — caps runtime-provided and fallback budgets. Set for smaller context-window models (e.g., 30000 for 32k models)"
59
+ },
60
+ "summaryMaxOverageFactor": {
61
+ "label": "Summary Max Overage Factor",
62
+ "help": "Maximum allowed overage factor for summaries relative to target tokens (default 3). Summaries exceeding this are deterministically truncated."
63
+ },
64
+ "customInstructions": {
65
+ "label": "Custom Instructions",
66
+ "help": "Natural language instructions injected into all summarization prompts (e.g., formatting rules, tone control)"
47
67
  }
48
68
  },
49
69
  "configSchema": {
@@ -66,6 +86,10 @@
66
86
  "type": "integer",
67
87
  "minimum": 1
68
88
  },
89
+ "leafChunkTokens": {
90
+ "type": "integer",
91
+ "minimum": 1
92
+ },
69
93
  "leafMinFanout": {
70
94
  "type": "integer",
71
95
  "minimum": 2
@@ -111,6 +135,21 @@
111
135
  },
112
136
  "expansionProvider": {
113
137
  "type": "string"
138
+ },
139
+ "delegationTimeoutMs": {
140
+ "type": "integer",
141
+ "minimum": 1
142
+ },
143
+ "maxAssemblyTokenBudget": {
144
+ "type": "integer",
145
+ "minimum": 1000
146
+ },
147
+ "summaryMaxOverageFactor": {
148
+ "type": "number",
149
+ "minimum": 1
150
+ },
151
+ "customInstructions": {
152
+ "type": "string"
114
153
  }
115
154
  }
116
155
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@martian-engineering/lossless-claw",
3
- "version": "0.5.2",
3
+ "version": "0.5.3",
4
4
  "description": "Lossless Context Management plugin for OpenClaw — DAG-based conversation summarization with incremental compaction",
5
5
  "type": "module",
6
6
  "main": "index.ts",
package/src/assembler.ts CHANGED
@@ -9,6 +9,15 @@ import type { SummaryStore, ContextItemRecord, SummaryRecord } from "./store/sum
9
9
 
10
10
  type AgentMessage = Parameters<ContextEngine["ingest"]>[0]["message"];
11
11
 
12
+ const TOOL_CALL_TYPES = new Set([
13
+ "toolCall",
14
+ "toolUse",
15
+ "tool_use",
16
+ "tool-use",
17
+ "functionCall",
18
+ "function_call",
19
+ ]);
20
+
12
21
  // ── Public types ─────────────────────────────────────────────────────────────
13
22
 
14
23
  export interface AssembleContextInput {
@@ -67,6 +76,10 @@ function buildSystemPromptAddition(summarySignals: SummaryPromptSignal[]): strin
67
76
  "",
68
77
  "**Recall priority:** Use LCM tools first for compacted conversation history. If LCM does not cover the needed data, prefer any available memory/recall tool before falling back to raw text search.",
69
78
  "",
79
+ "**Conflict handling:** If newer evidence conflicts with an older summary or recollection, prefer the newer evidence. Do not trust a stale summary over fresher contradictory information.",
80
+ "",
81
+ "**Contradictions/uncertainty:** If facts seem contradictory or uncertain, verify with LCM tools before answering instead of trusting the summary at face value.",
82
+ "",
70
83
  "**Tool escalation:**",
71
84
  "1. `lcm_grep` — search by regex or full-text across messages and summaries",
72
85
  "2. `lcm_describe` — inspect a specific summary (cheap, no sub-agent)",
@@ -92,6 +105,7 @@ function buildSystemPromptAddition(summarySignals: SummaryPromptSignal[]): strin
92
105
  "3) Answer with citations to summary IDs used",
93
106
  "",
94
107
  "**Uncertainty checklist (run before answering):**",
108
+ "- Am I relying on an older summary even though newer evidence disagrees?",
95
109
  "- Am I making exact factual claims from a condensed summary?",
96
110
  "- Could compaction have omitted a crucial detail?",
97
111
  "- Would this answer fail if the user asks for proof?",
@@ -555,6 +569,174 @@ function pickToolIsError(parts: MessagePartRecord[]): boolean | undefined {
555
569
  return undefined;
556
570
  }
557
571
 
572
+ function extractToolCallId(block: { id?: unknown; call_id?: unknown }): string | null {
573
+ if (typeof block.id === "string" && block.id.length > 0) {
574
+ return block.id;
575
+ }
576
+ if (typeof block.call_id === "string" && block.call_id.length > 0) {
577
+ return block.call_id;
578
+ }
579
+ return null;
580
+ }
581
+
582
+ function extractToolCallIdsFromAssistant(message: AgentMessage): string[] {
583
+ if (message?.role !== "assistant" || !Array.isArray(message.content)) {
584
+ return [];
585
+ }
586
+
587
+ const ids: string[] = [];
588
+ for (const block of message.content) {
589
+ if (!block || typeof block !== "object") {
590
+ continue;
591
+ }
592
+ const record = block as { type?: unknown; id?: unknown; call_id?: unknown };
593
+ if (typeof record.type !== "string" || !TOOL_CALL_TYPES.has(record.type)) {
594
+ continue;
595
+ }
596
+ const id = extractToolCallId(record);
597
+ if (id) {
598
+ ids.push(id);
599
+ }
600
+ }
601
+ return ids;
602
+ }
603
+
604
+ function extractToolResultIdFromMessage(message: AgentMessage): string | null {
605
+ if (!message || typeof message !== "object") {
606
+ return null;
607
+ }
608
+ if (typeof message.toolCallId === "string" && message.toolCallId.length > 0) {
609
+ return message.toolCallId;
610
+ }
611
+ if (typeof message.toolUseId === "string" && message.toolUseId.length > 0) {
612
+ return message.toolUseId;
613
+ }
614
+ return null;
615
+ }
616
+
617
+ function collectAssistantToolCallIds(items: ResolvedItem[]): Set<string> {
618
+ const ids = new Set<string>();
619
+ for (const item of items) {
620
+ for (const id of extractToolCallIdsFromAssistant(item.message)) {
621
+ ids.add(id);
622
+ }
623
+ }
624
+ return ids;
625
+ }
626
+
627
+ function mergeFreshTailWithMatchingToolResults(
628
+ freshTail: ResolvedItem[],
629
+ matchingToolResults: ResolvedItem[],
630
+ ): ResolvedItem[] {
631
+ if (matchingToolResults.length === 0) {
632
+ return freshTail;
633
+ }
634
+
635
+ const resultsById = new Map<string, ResolvedItem[]>();
636
+ for (const item of matchingToolResults) {
637
+ const toolResultId = extractToolResultIdFromMessage(item.message);
638
+ if (!toolResultId) {
639
+ continue;
640
+ }
641
+ const existing = resultsById.get(toolResultId);
642
+ if (existing) {
643
+ existing.push(item);
644
+ } else {
645
+ resultsById.set(toolResultId, [item]);
646
+ }
647
+ }
648
+
649
+ const merged: ResolvedItem[] = [];
650
+ const usedOrdinals = new Set<number>();
651
+
652
+ for (const item of freshTail) {
653
+ merged.push(item);
654
+
655
+ const toolCallIds = extractToolCallIdsFromAssistant(item.message);
656
+ if (toolCallIds.length === 0) {
657
+ continue;
658
+ }
659
+
660
+ for (const toolCallId of toolCallIds) {
661
+ const matches = resultsById.get(toolCallId);
662
+ if (!matches) {
663
+ continue;
664
+ }
665
+ for (const match of matches) {
666
+ if (usedOrdinals.has(match.ordinal)) {
667
+ continue;
668
+ }
669
+ merged.push(match);
670
+ usedOrdinals.add(match.ordinal);
671
+ }
672
+ }
673
+ }
674
+
675
+ for (const item of matchingToolResults) {
676
+ if (!usedOrdinals.has(item.ordinal)) {
677
+ merged.push(item);
678
+ }
679
+ }
680
+
681
+ return merged;
682
+ }
683
+
684
+ function filterNonFreshAssistantToolCalls(
685
+ items: ResolvedItem[],
686
+ freshTailOrdinals: Set<number>,
687
+ ): AgentMessage[] {
688
+ const availableToolResultIds = new Set<string>();
689
+ for (const item of items) {
690
+ const toolResultId = extractToolResultIdFromMessage(item.message);
691
+ if (toolResultId) {
692
+ availableToolResultIds.add(toolResultId);
693
+ }
694
+ }
695
+
696
+ const filteredMessages: AgentMessage[] = [];
697
+ for (const item of items) {
698
+ if (item.message?.role !== "assistant" || freshTailOrdinals.has(item.ordinal)) {
699
+ filteredMessages.push(item.message);
700
+ continue;
701
+ }
702
+
703
+ if (!Array.isArray(item.message.content)) {
704
+ filteredMessages.push(item.message);
705
+ continue;
706
+ }
707
+
708
+ let removedAny = false;
709
+ const content = item.message.content.filter((block) => {
710
+ if (!block || typeof block !== "object") {
711
+ return true;
712
+ }
713
+ const record = block as { type?: unknown; id?: unknown; call_id?: unknown };
714
+ if (typeof record.type !== "string" || !TOOL_CALL_TYPES.has(record.type)) {
715
+ return true;
716
+ }
717
+ const toolCallId = extractToolCallId(record);
718
+ if (!toolCallId || availableToolResultIds.has(toolCallId)) {
719
+ return true;
720
+ }
721
+ removedAny = true;
722
+ return false;
723
+ });
724
+
725
+ if (content.length === 0) {
726
+ continue;
727
+ }
728
+ if (!removedAny) {
729
+ filteredMessages.push(item.message);
730
+ continue;
731
+ }
732
+ filteredMessages.push({
733
+ ...item.message,
734
+ content: content as typeof item.message.content,
735
+ } as AgentMessage);
736
+ }
737
+ return filteredMessages;
738
+ }
739
+
558
740
  /** Format a Date for XML attributes in the agent's timezone. */
559
741
  function formatDateForAttribute(date: Date, timezone?: string): string {
560
742
  const tz = timezone ?? "UTC";
@@ -692,8 +874,17 @@ export class ContextAssembler {
692
874
 
693
875
  // Step 3: Split into evictable prefix and protected fresh tail
694
876
  const tailStart = Math.max(0, resolved.length - freshTailCount);
695
- const freshTail = resolved.slice(tailStart);
696
- const evictable = resolved.slice(0, tailStart);
877
+ const baseFreshTail = resolved.slice(tailStart);
878
+ const initialEvictable = resolved.slice(0, tailStart);
879
+ const freshTailOrdinals = new Set(baseFreshTail.map((item) => item.ordinal));
880
+ const tailToolCallIds = collectAssistantToolCallIds(baseFreshTail);
881
+ const tailPairToolResults = initialEvictable.filter((item) => {
882
+ const toolResultId = extractToolResultIdFromMessage(item.message);
883
+ return toolResultId !== null && tailToolCallIds.has(toolResultId);
884
+ });
885
+ const protectedEvictableOrdinals = new Set(tailPairToolResults.map((item) => item.ordinal));
886
+ const evictable = initialEvictable.filter((item) => !protectedEvictableOrdinals.has(item.ordinal));
887
+ const freshTail = mergeFreshTailWithMatchingToolResults(baseFreshTail, tailPairToolResults);
697
888
 
698
889
  // Step 4: Budget-aware selection
699
890
  // First, compute the token cost of the fresh tail (always included).
@@ -747,7 +938,7 @@ export class ContextAssembler {
747
938
 
748
939
  // Normalize assistant string content to array blocks (some providers return
749
940
  // content as a plain string; Anthropic expects content block arrays).
750
- const rawMessages = selected.map((item) => item.message);
941
+ const rawMessages = filterNonFreshAssistantToolCalls(selected, freshTailOrdinals);
751
942
  for (let i = 0; i < rawMessages.length; i++) {
752
943
  const msg = rawMessages[i];
753
944
  if (msg?.role === "assistant" && typeof msg.content === "string") {