@martian-engineering/lossless-claw 0.5.1 → 0.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -59,6 +59,8 @@ openclaw plugins install --link /path/to/lossless-claw
59
59
 
60
60
  The install command records the plugin, enables it, and applies compatible slot selection (including `contextEngine` when applicable).
61
61
 
62
+ > **Note:** If your OpenClaw config uses `plugins.allow`, make sure both `lossless-claw` and any active plugins you rely on remain allowlisted. In some setups, narrowing the allowlist can prevent plugin-backed integrations from loading, even if `lossless-claw` itself is installed correctly. Restart the gateway after plugin config changes.
63
+
62
64
  ### Configure OpenClaw
63
65
 
64
66
  In most cases, no manual JSON edits are needed after `openclaw plugins install`.
@@ -92,14 +94,16 @@ Add a `lossless-claw` entry under `plugins.entries` in your OpenClaw config:
92
94
  "lossless-claw": {
93
95
  "enabled": true,
94
96
  "config": {
95
- "freshTailCount": 32,
97
+ "freshTailCount": 64,
98
+ "leafChunkTokens": 80000,
96
99
  "contextThreshold": 0.75,
97
- "incrementalMaxDepth": -1,
100
+ "incrementalMaxDepth": 1,
98
101
  "ignoreSessionPatterns": [
99
102
  "agent:*:cron:**"
100
103
  ],
101
- "summaryProvider": "anthropic",
102
- "summaryModel": "claude-3-5-haiku"
104
+ "summaryModel": "anthropic/claude-haiku-4-5",
105
+ "expansionModel": "anthropic/claude-haiku-4-5",
106
+ "delegationTimeoutMs": 300000
103
107
  }
104
108
  }
105
109
  }
@@ -107,7 +111,7 @@ Add a `lossless-claw` entry under `plugins.entries` in your OpenClaw config:
107
111
  }
108
112
  ```
109
113
 
110
- `summaryModel` and `summaryProvider` let you pin compaction summarization to a cheaper or faster model than your main OpenClaw session model. When unset, LCM uses OpenClaw's configured default model/provider.
114
+ `leafChunkTokens` controls how many source tokens can accumulate in a leaf compaction chunk before summarization is triggered. The default is `20000`, but quota-limited summary providers may benefit from a larger value to reduce compaction frequency. `summaryModel` and `summaryProvider` let you pin compaction summarization to a cheaper or faster model than your main OpenClaw session model. `expansionModel` does the same for `lcm_expand_query` sub-agent calls (drilling into summaries to recover detail). `delegationTimeoutMs` controls how long `lcm_expand_query` waits for that delegated sub-agent to finish before returning a timeout error; it defaults to `120000` (120s). When unset, the model settings still fall back to OpenClaw's configured default model/provider. See [Expansion model override requirements](#expansion-model-override-requirements) for the required `subagent` trust policy when using `expansionModel`.
111
115
 
112
116
  ### Environment variables
113
117
 
@@ -119,11 +123,11 @@ Add a `lossless-claw` entry under `plugins.entries` in your OpenClaw config:
119
123
  | `LCM_STATELESS_SESSION_PATTERNS` | `""` | Comma-separated glob patterns for session keys that may read from LCM but never write to it |
120
124
  | `LCM_SKIP_STATELESS_SESSIONS` | `true` | Enable stateless-session write skipping for matching session keys |
121
125
  | `LCM_CONTEXT_THRESHOLD` | `0.75` | Fraction of context window that triggers compaction (0.0–1.0) |
122
- | `LCM_FRESH_TAIL_COUNT` | `32` | Number of recent messages protected from compaction |
126
+ | `LCM_FRESH_TAIL_COUNT` | `64` | Number of recent messages protected from compaction |
123
127
  | `LCM_LEAF_MIN_FANOUT` | `8` | Minimum raw messages per leaf summary |
124
128
  | `LCM_CONDENSED_MIN_FANOUT` | `4` | Minimum summaries per condensed node |
125
129
  | `LCM_CONDENSED_MIN_FANOUT_HARD` | `2` | Relaxed fanout for forced compaction sweeps |
126
- | `LCM_INCREMENTAL_MAX_DEPTH` | `0` | How deep incremental compaction goes (0 = leaf only, -1 = unlimited) |
130
+ | `LCM_INCREMENTAL_MAX_DEPTH` | `1` | How deep incremental compaction goes (0 = leaf only, 1 = one condensed pass, -1 = unlimited) |
127
131
  | `LCM_LEAF_CHUNK_TOKENS` | `20000` | Max source tokens per leaf compaction chunk |
128
132
  | `LCM_LEAF_TARGET_TOKENS` | `1200` | Target token count for leaf summaries |
129
133
  | `LCM_CONDENSED_TARGET_TOKENS` | `2000` | Target token count for condensed summaries |
@@ -133,8 +137,10 @@ Add a `lossless-claw` entry under `plugins.entries` in your OpenClaw config:
133
137
  | `LCM_LARGE_FILE_SUMMARY_MODEL` | `""` | Model override for large-file summarization |
134
138
  | `LCM_SUMMARY_MODEL` | `""` | Model override for compaction summarization; falls back to OpenClaw's default model when unset |
135
139
  | `LCM_SUMMARY_PROVIDER` | `""` | Provider override for compaction summarization; falls back to `OPENCLAW_PROVIDER` or the provider embedded in the model ref |
140
+ | `LCM_SUMMARY_BASE_URL` | *(from OpenClaw / provider default)* | Base URL override for summarization API calls |
136
141
  | `LCM_EXPANSION_MODEL` | *(from OpenClaw)* | Model override for `lcm_expand_query` sub-agent (e.g. `anthropic/claude-haiku-4-5`) |
137
142
  | `LCM_EXPANSION_PROVIDER` | *(from OpenClaw)* | Provider override for `lcm_expand_query` sub-agent |
143
+ | `LCM_DELEGATION_TIMEOUT_MS` | `120000` | Max time to wait for delegated `lcm_expand_query` sub-agent completion |
138
144
  | `LCM_AUTOCOMPACT_DISABLED` | `false` | Disable automatic compaction after turns |
139
145
  | `LCM_PRUNE_HEARTBEAT_OK` | `false` | Retroactively delete `HEARTBEAT_OK` turn cycles from LCM storage |
140
146
 
@@ -178,6 +184,7 @@ Plugin config equivalents:
178
184
  - `skipStatelessSessions`
179
185
  - `summaryModel`
180
186
  - `summaryProvider`
187
+ - `delegationTimeoutMs`
181
188
 
182
189
  Environment variables still win over plugin config when both are set.
183
190
 
@@ -195,13 +202,15 @@ If `summaryModel` already includes a provider prefix such as `anthropic/claude-s
195
202
  ### Recommended starting configuration
196
203
 
197
204
  ```
198
- LCM_FRESH_TAIL_COUNT=32
199
- LCM_INCREMENTAL_MAX_DEPTH=-1
205
+ LCM_FRESH_TAIL_COUNT=64
206
+ LCM_LEAF_CHUNK_TOKENS=20000
207
+ LCM_INCREMENTAL_MAX_DEPTH=1
200
208
  LCM_CONTEXT_THRESHOLD=0.75
201
209
  ```
202
210
 
203
- - **freshTailCount=32** protects the last 32 messages from compaction, giving the model enough recent context for continuity.
204
- - **incrementalMaxDepth=-1** enables unlimited automatic condensation after each compaction pass the DAG cascades as deep as needed. Set to `0` (default) for leaf-only, or a positive integer for a specific depth cap.
211
+ - **freshTailCount=64** protects the last 64 messages from compaction, giving the model more recent context for continuity.
212
+ - **leafChunkTokens=20000** limits how large each leaf compaction chunk can grow before LCM summarizes it. Increase this when your summary provider is quota-limited and frequent leaf compactions are exhausting that quota.
213
+ - **incrementalMaxDepth=1** runs one condensed pass after each leaf compaction by default. Set to `0` for leaf-only behavior, a larger positive integer for a deeper cap, or `-1` for unlimited cascading.
205
214
  - **contextThreshold=0.75** triggers compaction when context reaches 75% of the model's window, leaving headroom for the model's response.
206
215
 
207
216
  ### Session exclusion patterns
@@ -91,6 +91,25 @@ The actual summary size depends on the LLM's output; these values are guidelines
91
91
  - Smaller chunks create summaries more frequently from less material.
92
92
  - This also affects the condensed minimum input threshold (10% of this value).
93
93
 
94
+ ### Maximum assembly token budget
95
+
96
+ `LCM_MAX_ASSEMBLY_TOKEN_BUDGET` (default: none) caps the token budget used for context assembly and compaction threshold evaluation. When set, this takes precedence over both the 128k fallback and runtime-provided budgets.
97
+
98
+ Set this if you're using a model with a smaller context window:
99
+
100
+ - **8k models:** `LCM_MAX_ASSEMBLY_TOKEN_BUDGET=7000`
101
+ - **32k models:** `LCM_MAX_ASSEMBLY_TOKEN_BUDGET=30000`
102
+ - **128k+ models:** No need to set (128k fallback is appropriate)
103
+
104
+ ### Summary size cap
105
+
106
+ `LCM_SUMMARY_MAX_OVERAGE_FACTOR` (default: `3`) controls the hard ceiling on summary sizes relative to the target tokens (`leafTargetTokens` for leaf summaries, `condensedTargetTokens` for condensed summaries).
107
+
108
+ If a summary exceeds `overage_factor * target_tokens`, it is deterministically truncated. A warning is logged when any summary exceeds `1.5 * target_tokens`.
109
+
110
+ - **Lower values** (e.g., 2) enforce tighter summaries but may truncate more often with weaker summarizer models.
111
+ - **Higher values** (e.g., 5) allow more LLM flexibility but risk storing oversized summaries.
112
+
94
113
  ## Model selection
95
114
 
96
115
  LCM uses the same model as the parent OpenClaw session for summarization by default. You can override this:
@@ -99,6 +118,7 @@ LCM uses the same model as the parent OpenClaw session for summarization by defa
99
118
  # Use a specific model for summarization
100
119
  export LCM_SUMMARY_MODEL=anthropic/claude-sonnet-4-20250514
101
120
  export LCM_SUMMARY_PROVIDER=anthropic
121
+ export LCM_SUMMARY_BASE_URL=https://api.anthropic.com
102
122
  ```
103
123
 
104
124
  Using a cheaper/faster model for summarization can reduce costs, but quality matters — poor summaries compound as they're condensed into higher-level nodes.
@@ -112,6 +132,8 @@ When more than one source is present, compaction summarization resolves in this
112
132
 
113
133
  If `summaryModel` already includes a provider prefix such as `anthropic/claude-sonnet-4-20250514`, `summaryProvider` is ignored for that choice.
114
134
 
135
+ For delegated `lcm_expand_query` runs, you can extend the sub-agent wait window with `delegationTimeoutMs` (plugin config) or `LCM_DELEGATION_TIMEOUT_MS` (environment variable). The default is `120000` milliseconds.
136
+
115
137
  ## Session controls
116
138
 
117
139
  ### Excluding sessions entirely
package/docs/tui.md CHANGED
@@ -287,6 +287,9 @@ lcm-tui rewrite 44 --all --apply --diff
287
287
  # Rewrite with OpenAI Responses API
288
288
  lcm-tui rewrite 44 --summary sum_abc123 --provider openai --model gpt-5.3-codex --apply
289
289
 
290
+ # Rewrite through a custom OpenAI-compatible proxy
291
+ lcm-tui rewrite 44 --summary sum_abc123 --provider openai --model gpt-5.3-codex --base-url https://proxy.example.com/openai --apply
292
+
290
293
  # Use custom prompt templates
291
294
  lcm-tui rewrite 44 --all --apply --prompt-dir ~/.config/lcm-tui/prompts
292
295
  ```
@@ -301,6 +304,7 @@ lcm-tui rewrite 44 --all --apply --prompt-dir ~/.config/lcm-tui/prompts
301
304
  | `--diff` | Show unified diff |
302
305
  | `--provider <id>` | API provider (inferred from `--model` when omitted) |
303
306
  | `--model <model>` | API model (default depends on provider) |
307
+ | `--base-url <url>` | Custom API base URL (overrides config and env) |
304
308
  | `--prompt-dir <path>` | Custom prompt template directory |
305
309
  | `--timestamps` | Inject timestamps into source text (default: true) |
306
310
  | `--tz <timezone>` | Timezone for timestamps (default: system local) |
@@ -378,6 +382,9 @@ lcm-tui backfill my-agent session_abc123 --apply --transplant-to 653
378
382
 
379
383
  # Backfill using OpenAI
380
384
  lcm-tui backfill my-agent session_abc123 --apply --provider openai --model gpt-5.3-codex
385
+
386
+ # Backfill through a custom OpenAI-compatible proxy
387
+ lcm-tui backfill my-agent session_abc123 --apply --provider openai --model gpt-5.3-codex --base-url https://proxy.example.com/openai
381
388
  ```
382
389
 
383
390
  All write paths are transactional:
@@ -404,6 +411,7 @@ An idempotency guard prevents duplicate imports for the same `session_id`.
404
411
  | `--fresh-tail <n>` | Preserve freshest N raw messages from leaf compaction |
405
412
  | `--provider <id>` | API provider (inferred from model when omitted) |
406
413
  | `--model <id>` | API model (default depends on provider) |
414
+ | `--base-url <url>` | Custom API base URL (overrides config and env) |
407
415
  | `--prompt-dir <path>` | Custom depth-prompt directory |
408
416
 
409
417
  ### `lcm-tui prompts`
@@ -479,9 +487,10 @@ If the provider auth profile mode is `oauth` (not `api_key`), set the provider A
479
487
  Interactive rewrite (`w`/`W`) can be configured with:
480
488
  - `LCM_TUI_SUMMARY_PROVIDER`
481
489
  - `LCM_TUI_SUMMARY_MODEL`
490
+ - `LCM_TUI_SUMMARY_BASE_URL`
482
491
  - `LCM_TUI_CONVERSATION_WINDOW_SIZE` (default `200`)
483
492
 
484
- It also honors `LCM_SUMMARY_PROVIDER` / `LCM_SUMMARY_MODEL` as fallback.
493
+ It also honors `LCM_SUMMARY_PROVIDER` / `LCM_SUMMARY_MODEL` / `LCM_SUMMARY_BASE_URL` as fallback.
485
494
 
486
495
  ## Database
487
496
 
@@ -13,6 +13,10 @@
13
13
  "label": "Fresh Tail Count",
14
14
  "help": "Number of recent messages protected from compaction"
15
15
  },
16
+ "leafChunkTokens": {
17
+ "label": "Leaf Chunk Tokens",
18
+ "help": "Maximum source tokens per leaf compaction chunk before summarization"
19
+ },
16
20
  "dbPath": {
17
21
  "label": "Database Path",
18
22
  "help": "Path to LCM SQLite database (default: ~/.openclaw/lcm.db)"
@@ -44,6 +48,22 @@
44
48
  "expansionProvider": {
45
49
  "label": "Expansion Provider",
46
50
  "help": "Provider override for lcm_expand_query sub-agent (e.g., 'anthropic')"
51
+ },
52
+ "delegationTimeoutMs": {
53
+ "label": "Delegation Timeout (ms)",
54
+ "help": "Maximum time to wait for delegated lcm_expand_query sub-agent completion before timing out"
55
+ },
56
+ "maxAssemblyTokenBudget": {
57
+ "label": "Max Assembly Token Budget",
58
+ "help": "Hard ceiling for assembly token budget — caps runtime-provided and fallback budgets. Set for smaller context-window models (e.g., 30000 for 32k models)"
59
+ },
60
+ "summaryMaxOverageFactor": {
61
+ "label": "Summary Max Overage Factor",
62
+ "help": "Maximum allowed overage factor for summaries relative to target tokens (default 3). Summaries exceeding this are deterministically truncated."
63
+ },
64
+ "customInstructions": {
65
+ "label": "Custom Instructions",
66
+ "help": "Natural language instructions injected into all summarization prompts (e.g., formatting rules, tone control)"
47
67
  }
48
68
  },
49
69
  "configSchema": {
@@ -66,6 +86,10 @@
66
86
  "type": "integer",
67
87
  "minimum": 1
68
88
  },
89
+ "leafChunkTokens": {
90
+ "type": "integer",
91
+ "minimum": 1
92
+ },
69
93
  "leafMinFanout": {
70
94
  "type": "integer",
71
95
  "minimum": 2
@@ -111,6 +135,21 @@
111
135
  },
112
136
  "expansionProvider": {
113
137
  "type": "string"
138
+ },
139
+ "delegationTimeoutMs": {
140
+ "type": "integer",
141
+ "minimum": 1
142
+ },
143
+ "maxAssemblyTokenBudget": {
144
+ "type": "integer",
145
+ "minimum": 1000
146
+ },
147
+ "summaryMaxOverageFactor": {
148
+ "type": "number",
149
+ "minimum": 1
150
+ },
151
+ "customInstructions": {
152
+ "type": "string"
114
153
  }
115
154
  }
116
155
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@martian-engineering/lossless-claw",
3
- "version": "0.5.1",
3
+ "version": "0.5.3",
4
4
  "description": "Lossless Context Management plugin for OpenClaw — DAG-based conversation summarization with incremental compaction",
5
5
  "type": "module",
6
6
  "main": "index.ts",
package/src/assembler.ts CHANGED
@@ -9,6 +9,15 @@ import type { SummaryStore, ContextItemRecord, SummaryRecord } from "./store/sum
9
9
 
10
10
  type AgentMessage = Parameters<ContextEngine["ingest"]>[0]["message"];
11
11
 
12
+ const TOOL_CALL_TYPES = new Set([
13
+ "toolCall",
14
+ "toolUse",
15
+ "tool_use",
16
+ "tool-use",
17
+ "functionCall",
18
+ "function_call",
19
+ ]);
20
+
12
21
  // ── Public types ─────────────────────────────────────────────────────────────
13
22
 
14
23
  export interface AssembleContextInput {
@@ -67,6 +76,10 @@ function buildSystemPromptAddition(summarySignals: SummaryPromptSignal[]): strin
67
76
  "",
68
77
  "**Recall priority:** Use LCM tools first for compacted conversation history. If LCM does not cover the needed data, prefer any available memory/recall tool before falling back to raw text search.",
69
78
  "",
79
+ "**Conflict handling:** If newer evidence conflicts with an older summary or recollection, prefer the newer evidence. Do not trust a stale summary over fresher contradictory information.",
80
+ "",
81
+ "**Contradictions/uncertainty:** If facts seem contradictory or uncertain, verify with LCM tools before answering instead of trusting the summary at face value.",
82
+ "",
70
83
  "**Tool escalation:**",
71
84
  "1. `lcm_grep` — search by regex or full-text across messages and summaries",
72
85
  "2. `lcm_describe` — inspect a specific summary (cheap, no sub-agent)",
@@ -92,6 +105,7 @@ function buildSystemPromptAddition(summarySignals: SummaryPromptSignal[]): strin
92
105
  "3) Answer with citations to summary IDs used",
93
106
  "",
94
107
  "**Uncertainty checklist (run before answering):**",
108
+ "- Am I relying on an older summary even though newer evidence disagrees?",
95
109
  "- Am I making exact factual claims from a condensed summary?",
96
110
  "- Could compaction have omitted a crucial detail?",
97
111
  "- Would this answer fail if the user asks for proof?",
@@ -555,6 +569,174 @@ function pickToolIsError(parts: MessagePartRecord[]): boolean | undefined {
555
569
  return undefined;
556
570
  }
557
571
 
572
+ function extractToolCallId(block: { id?: unknown; call_id?: unknown }): string | null {
573
+ if (typeof block.id === "string" && block.id.length > 0) {
574
+ return block.id;
575
+ }
576
+ if (typeof block.call_id === "string" && block.call_id.length > 0) {
577
+ return block.call_id;
578
+ }
579
+ return null;
580
+ }
581
+
582
+ function extractToolCallIdsFromAssistant(message: AgentMessage): string[] {
583
+ if (message?.role !== "assistant" || !Array.isArray(message.content)) {
584
+ return [];
585
+ }
586
+
587
+ const ids: string[] = [];
588
+ for (const block of message.content) {
589
+ if (!block || typeof block !== "object") {
590
+ continue;
591
+ }
592
+ const record = block as { type?: unknown; id?: unknown; call_id?: unknown };
593
+ if (typeof record.type !== "string" || !TOOL_CALL_TYPES.has(record.type)) {
594
+ continue;
595
+ }
596
+ const id = extractToolCallId(record);
597
+ if (id) {
598
+ ids.push(id);
599
+ }
600
+ }
601
+ return ids;
602
+ }
603
+
604
+ function extractToolResultIdFromMessage(message: AgentMessage): string | null {
605
+ if (!message || typeof message !== "object") {
606
+ return null;
607
+ }
608
+ if (typeof message.toolCallId === "string" && message.toolCallId.length > 0) {
609
+ return message.toolCallId;
610
+ }
611
+ if (typeof message.toolUseId === "string" && message.toolUseId.length > 0) {
612
+ return message.toolUseId;
613
+ }
614
+ return null;
615
+ }
616
+
617
+ function collectAssistantToolCallIds(items: ResolvedItem[]): Set<string> {
618
+ const ids = new Set<string>();
619
+ for (const item of items) {
620
+ for (const id of extractToolCallIdsFromAssistant(item.message)) {
621
+ ids.add(id);
622
+ }
623
+ }
624
+ return ids;
625
+ }
626
+
627
+ function mergeFreshTailWithMatchingToolResults(
628
+ freshTail: ResolvedItem[],
629
+ matchingToolResults: ResolvedItem[],
630
+ ): ResolvedItem[] {
631
+ if (matchingToolResults.length === 0) {
632
+ return freshTail;
633
+ }
634
+
635
+ const resultsById = new Map<string, ResolvedItem[]>();
636
+ for (const item of matchingToolResults) {
637
+ const toolResultId = extractToolResultIdFromMessage(item.message);
638
+ if (!toolResultId) {
639
+ continue;
640
+ }
641
+ const existing = resultsById.get(toolResultId);
642
+ if (existing) {
643
+ existing.push(item);
644
+ } else {
645
+ resultsById.set(toolResultId, [item]);
646
+ }
647
+ }
648
+
649
+ const merged: ResolvedItem[] = [];
650
+ const usedOrdinals = new Set<number>();
651
+
652
+ for (const item of freshTail) {
653
+ merged.push(item);
654
+
655
+ const toolCallIds = extractToolCallIdsFromAssistant(item.message);
656
+ if (toolCallIds.length === 0) {
657
+ continue;
658
+ }
659
+
660
+ for (const toolCallId of toolCallIds) {
661
+ const matches = resultsById.get(toolCallId);
662
+ if (!matches) {
663
+ continue;
664
+ }
665
+ for (const match of matches) {
666
+ if (usedOrdinals.has(match.ordinal)) {
667
+ continue;
668
+ }
669
+ merged.push(match);
670
+ usedOrdinals.add(match.ordinal);
671
+ }
672
+ }
673
+ }
674
+
675
+ for (const item of matchingToolResults) {
676
+ if (!usedOrdinals.has(item.ordinal)) {
677
+ merged.push(item);
678
+ }
679
+ }
680
+
681
+ return merged;
682
+ }
683
+
684
+ function filterNonFreshAssistantToolCalls(
685
+ items: ResolvedItem[],
686
+ freshTailOrdinals: Set<number>,
687
+ ): AgentMessage[] {
688
+ const availableToolResultIds = new Set<string>();
689
+ for (const item of items) {
690
+ const toolResultId = extractToolResultIdFromMessage(item.message);
691
+ if (toolResultId) {
692
+ availableToolResultIds.add(toolResultId);
693
+ }
694
+ }
695
+
696
+ const filteredMessages: AgentMessage[] = [];
697
+ for (const item of items) {
698
+ if (item.message?.role !== "assistant" || freshTailOrdinals.has(item.ordinal)) {
699
+ filteredMessages.push(item.message);
700
+ continue;
701
+ }
702
+
703
+ if (!Array.isArray(item.message.content)) {
704
+ filteredMessages.push(item.message);
705
+ continue;
706
+ }
707
+
708
+ let removedAny = false;
709
+ const content = item.message.content.filter((block) => {
710
+ if (!block || typeof block !== "object") {
711
+ return true;
712
+ }
713
+ const record = block as { type?: unknown; id?: unknown; call_id?: unknown };
714
+ if (typeof record.type !== "string" || !TOOL_CALL_TYPES.has(record.type)) {
715
+ return true;
716
+ }
717
+ const toolCallId = extractToolCallId(record);
718
+ if (!toolCallId || availableToolResultIds.has(toolCallId)) {
719
+ return true;
720
+ }
721
+ removedAny = true;
722
+ return false;
723
+ });
724
+
725
+ if (content.length === 0) {
726
+ continue;
727
+ }
728
+ if (!removedAny) {
729
+ filteredMessages.push(item.message);
730
+ continue;
731
+ }
732
+ filteredMessages.push({
733
+ ...item.message,
734
+ content: content as typeof item.message.content,
735
+ } as AgentMessage);
736
+ }
737
+ return filteredMessages;
738
+ }
739
+
558
740
  /** Format a Date for XML attributes in the agent's timezone. */
559
741
  function formatDateForAttribute(date: Date, timezone?: string): string {
560
742
  const tz = timezone ?? "UTC";
@@ -692,8 +874,17 @@ export class ContextAssembler {
692
874
 
693
875
  // Step 3: Split into evictable prefix and protected fresh tail
694
876
  const tailStart = Math.max(0, resolved.length - freshTailCount);
695
- const freshTail = resolved.slice(tailStart);
696
- const evictable = resolved.slice(0, tailStart);
877
+ const baseFreshTail = resolved.slice(tailStart);
878
+ const initialEvictable = resolved.slice(0, tailStart);
879
+ const freshTailOrdinals = new Set(baseFreshTail.map((item) => item.ordinal));
880
+ const tailToolCallIds = collectAssistantToolCallIds(baseFreshTail);
881
+ const tailPairToolResults = initialEvictable.filter((item) => {
882
+ const toolResultId = extractToolResultIdFromMessage(item.message);
883
+ return toolResultId !== null && tailToolCallIds.has(toolResultId);
884
+ });
885
+ const protectedEvictableOrdinals = new Set(tailPairToolResults.map((item) => item.ordinal));
886
+ const evictable = initialEvictable.filter((item) => !protectedEvictableOrdinals.has(item.ordinal));
887
+ const freshTail = mergeFreshTailWithMatchingToolResults(baseFreshTail, tailPairToolResults);
697
888
 
698
889
  // Step 4: Budget-aware selection
699
890
  // First, compute the token cost of the fresh tail (always included).
@@ -747,7 +938,7 @@ export class ContextAssembler {
747
938
 
748
939
  // Normalize assistant string content to array blocks (some providers return
749
940
  // content as a plain string; Anthropic expects content block arrays).
750
- const rawMessages = selected.map((item) => item.message);
941
+ const rawMessages = filterNonFreshAssistantToolCalls(selected, freshTailOrdinals);
751
942
  for (let i = 0; i < rawMessages.length; i++) {
752
943
  const msg = rawMessages[i];
753
944
  if (msg?.role === "assistant" && typeof msg.content === "string") {