@martian-engineering/lossless-claw 0.5.1 → 0.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -98,8 +98,8 @@ Add a `lossless-claw` entry under `plugins.entries` in your OpenClaw config:
98
98
  "ignoreSessionPatterns": [
99
99
  "agent:*:cron:**"
100
100
  ],
101
- "summaryProvider": "anthropic",
102
- "summaryModel": "claude-3-5-haiku"
101
+ "summaryModel": "anthropic/claude-haiku-4-5",
102
+ "expansionModel": "anthropic/claude-haiku-4-5"
103
103
  }
104
104
  }
105
105
  }
@@ -107,7 +107,7 @@ Add a `lossless-claw` entry under `plugins.entries` in your OpenClaw config:
107
107
  }
108
108
  ```
109
109
 
110
- `summaryModel` and `summaryProvider` let you pin compaction summarization to a cheaper or faster model than your main OpenClaw session model. When unset, LCM uses OpenClaw's configured default model/provider.
110
+ `summaryModel` and `summaryProvider` let you pin compaction summarization to a cheaper or faster model than your main OpenClaw session model. `expansionModel` does the same for `lcm_expand_query` sub-agent calls (drilling into summaries to recover detail). When unset, both fall back to OpenClaw's configured default model/provider. See [Expansion model override requirements](#expansion-model-override-requirements) for the required `subagent` trust policy when using `expansionModel`.
111
111
 
112
112
  ### Environment variables
113
113
 
@@ -133,6 +133,7 @@ Add a `lossless-claw` entry under `plugins.entries` in your OpenClaw config:
133
133
  | `LCM_LARGE_FILE_SUMMARY_MODEL` | `""` | Model override for large-file summarization |
134
134
  | `LCM_SUMMARY_MODEL` | `""` | Model override for compaction summarization; falls back to OpenClaw's default model when unset |
135
135
  | `LCM_SUMMARY_PROVIDER` | `""` | Provider override for compaction summarization; falls back to `OPENCLAW_PROVIDER` or the provider embedded in the model ref |
136
+ | `LCM_SUMMARY_BASE_URL` | *(from OpenClaw / provider default)* | Base URL override for summarization API calls |
136
137
  | `LCM_EXPANSION_MODEL` | *(from OpenClaw)* | Model override for `lcm_expand_query` sub-agent (e.g. `anthropic/claude-haiku-4-5`) |
137
138
  | `LCM_EXPANSION_PROVIDER` | *(from OpenClaw)* | Provider override for `lcm_expand_query` sub-agent |
138
139
  | `LCM_AUTOCOMPACT_DISABLED` | `false` | Disable automatic compaction after turns |
@@ -99,6 +99,7 @@ LCM uses the same model as the parent OpenClaw session for summarization by defa
99
99
  # Use a specific model for summarization
100
100
  export LCM_SUMMARY_MODEL=anthropic/claude-sonnet-4-20250514
101
101
  export LCM_SUMMARY_PROVIDER=anthropic
102
+ export LCM_SUMMARY_BASE_URL=https://api.anthropic.com
102
103
  ```
103
104
 
104
105
  Using a cheaper/faster model for summarization can reduce costs, but quality matters — poor summaries compound as they're condensed into higher-level nodes.
package/docs/tui.md CHANGED
@@ -287,6 +287,9 @@ lcm-tui rewrite 44 --all --apply --diff
287
287
  # Rewrite with OpenAI Responses API
288
288
  lcm-tui rewrite 44 --summary sum_abc123 --provider openai --model gpt-5.3-codex --apply
289
289
 
290
+ # Rewrite through a custom OpenAI-compatible proxy
291
+ lcm-tui rewrite 44 --summary sum_abc123 --provider openai --model gpt-5.3-codex --base-url https://proxy.example.com/openai --apply
292
+
290
293
  # Use custom prompt templates
291
294
  lcm-tui rewrite 44 --all --apply --prompt-dir ~/.config/lcm-tui/prompts
292
295
  ```
@@ -301,6 +304,7 @@ lcm-tui rewrite 44 --all --apply --prompt-dir ~/.config/lcm-tui/prompts
301
304
  | `--diff` | Show unified diff |
302
305
  | `--provider <id>` | API provider (inferred from `--model` when omitted) |
303
306
  | `--model <model>` | API model (default depends on provider) |
307
+ | `--base-url <url>` | Custom API base URL (overrides config and env) |
304
308
  | `--prompt-dir <path>` | Custom prompt template directory |
305
309
  | `--timestamps` | Inject timestamps into source text (default: true) |
306
310
  | `--tz <timezone>` | Timezone for timestamps (default: system local) |
@@ -378,6 +382,9 @@ lcm-tui backfill my-agent session_abc123 --apply --transplant-to 653
378
382
 
379
383
  # Backfill using OpenAI
380
384
  lcm-tui backfill my-agent session_abc123 --apply --provider openai --model gpt-5.3-codex
385
+
386
+ # Backfill through a custom OpenAI-compatible proxy
387
+ lcm-tui backfill my-agent session_abc123 --apply --provider openai --model gpt-5.3-codex --base-url https://proxy.example.com/openai
381
388
  ```
382
389
 
383
390
  All write paths are transactional:
@@ -404,6 +411,7 @@ An idempotency guard prevents duplicate imports for the same `session_id`.
404
411
  | `--fresh-tail <n>` | Preserve freshest N raw messages from leaf compaction |
405
412
  | `--provider <id>` | API provider (inferred from model when omitted) |
406
413
  | `--model <id>` | API model (default depends on provider) |
414
+ | `--base-url <url>` | Custom API base URL (overrides config and env) |
407
415
  | `--prompt-dir <path>` | Custom depth-prompt directory |
408
416
 
409
417
  ### `lcm-tui prompts`
@@ -479,9 +487,10 @@ If the provider auth profile mode is `oauth` (not `api_key`), set the provider A
479
487
  Interactive rewrite (`w`/`W`) can be configured with:
480
488
  - `LCM_TUI_SUMMARY_PROVIDER`
481
489
  - `LCM_TUI_SUMMARY_MODEL`
490
+ - `LCM_TUI_SUMMARY_BASE_URL`
482
491
  - `LCM_TUI_CONVERSATION_WINDOW_SIZE` (default `200`)
483
492
 
484
- It also honors `LCM_SUMMARY_PROVIDER` / `LCM_SUMMARY_MODEL` as fallback.
493
+ It also honors `LCM_SUMMARY_PROVIDER` / `LCM_SUMMARY_MODEL` / `LCM_SUMMARY_BASE_URL` as fallback.
485
494
 
486
495
  ## Database
487
496
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@martian-engineering/lossless-claw",
3
- "version": "0.5.1",
3
+ "version": "0.5.2",
4
4
  "description": "Lossless Context Management plugin for OpenClaw — DAG-based conversation summarization with incremental compaction",
5
5
  "type": "module",
6
6
  "main": "index.ts",
package/src/compaction.ts CHANGED
@@ -2,6 +2,7 @@ import { createHash } from "node:crypto";
2
2
  import type { ConversationStore, CreateMessagePartInput } from "./store/conversation-store.js";
3
3
  import type { SummaryStore, SummaryRecord, ContextItemRecord } from "./store/summary-store.js";
4
4
  import { extractFileIdsFromContent } from "./large-files.js";
5
+ import { LcmProviderAuthError } from "./summarize.js";
5
6
 
6
7
  // ── Public types ─────────────────────────────────────────────────────────────
7
8
 
@@ -1001,6 +1002,9 @@ export class CompactionEngine {
1001
1002
  /**
1002
1003
  * Run three-level summarization escalation:
1003
1004
  * normal -> aggressive -> deterministic fallback.
1005
+ *
1006
+ * Provider-auth failures are treated as non-compacting skips so we do not
1007
+ * persist truncation artifacts into the summary DAG.
1004
1008
  */
1005
1009
  private async summarizeWithEscalation(params: {
1006
1010
  sourceText: string;
@@ -1026,17 +1030,31 @@ export class CompactionEngine {
1026
1030
  level: "fallback",
1027
1031
  };
1028
1032
  };
1029
-
1030
- const runSummarizer = async (aggressiveMode: boolean): Promise<string | null> => {
1031
- const output = await params.summarize(sourceText, aggressiveMode, params.options);
1033
+ const authFailure = Symbol("authFailure");
1034
+
1035
+ const runSummarizer = async (
1036
+ aggressiveMode: boolean,
1037
+ ): Promise<string | null | typeof authFailure> => {
1038
+ let output: string;
1039
+ try {
1040
+ output = await params.summarize(sourceText, aggressiveMode, params.options);
1041
+ } catch (err) {
1042
+ if (err instanceof LcmProviderAuthError) {
1043
+ return authFailure;
1044
+ }
1045
+ throw err;
1046
+ }
1032
1047
  const trimmed = output.trim();
1033
1048
  return trimmed || null;
1034
1049
  };
1035
1050
 
1036
1051
  const initialSummary = await runSummarizer(false);
1052
+ if (initialSummary === authFailure) {
1053
+ return null;
1054
+ }
1037
1055
  if (initialSummary === null) {
1038
- // Empty provider output should still compact deterministically so auth
1039
- // failures or empty responses do not stall compaction entirely.
1056
+ // Empty provider output should still compact deterministically so a
1057
+ // silent no-op does not stall compaction forever.
1040
1058
  return buildDeterministicFallback();
1041
1059
  }
1042
1060
  let summaryText = initialSummary;
@@ -1044,6 +1062,9 @@ export class CompactionEngine {
1044
1062
 
1045
1063
  if (estimateTokens(summaryText) >= inputTokens) {
1046
1064
  const aggressiveSummary = await runSummarizer(true);
1065
+ if (aggressiveSummary === authFailure) {
1066
+ return null;
1067
+ }
1047
1068
  if (aggressiveSummary === null) {
1048
1069
  return buildDeterministicFallback();
1049
1070
  }
@@ -1149,7 +1170,7 @@ export class CompactionEngine {
1149
1170
  });
1150
1171
  if (!summary) {
1151
1172
  console.warn(
1152
- `[lcm] leaf summarizer returned empty content; conversationId=${conversationId}; chunkMessages=${messageContents.length}; skipping leaf chunk`,
1173
+ `[lcm] leaf compaction skipped summary write; conversationId=${conversationId}; chunkMessages=${messageContents.length}`,
1153
1174
  );
1154
1175
  return null;
1155
1176
  }
@@ -1256,7 +1277,7 @@ export class CompactionEngine {
1256
1277
  });
1257
1278
  if (!condensed) {
1258
1279
  console.warn(
1259
- `[lcm] condensed summarizer returned empty content; conversationId=${conversationId}; depth=${targetDepth}; chunkSummaries=${summaryRecords.length}; skipping condensed chunk`,
1280
+ `[lcm] condensed compaction skipped summary write; conversationId=${conversationId}; depth=${targetDepth}; chunkSummaries=${summaryRecords.length}`,
1260
1281
  );
1261
1282
  return null;
1262
1283
  }
package/src/engine.ts CHANGED
@@ -45,7 +45,7 @@ import {
45
45
  type MessagePartType,
46
46
  } from "./store/conversation-store.js";
47
47
  import { SummaryStore } from "./store/summary-store.js";
48
- import { createLcmSummarizeFromLegacyParams } from "./summarize.js";
48
+ import { createLcmSummarizeFromLegacyParams, LcmProviderAuthError } from "./summarize.js";
49
49
  import type { LcmDependencies } from "./types.js";
50
50
 
51
51
  type AgentMessage = Parameters<ContextEngine["ingest"]>[0]["message"];
@@ -1277,7 +1277,15 @@ export class LcmContextEngine implements ContextEngine {
1277
1277
  }
1278
1278
 
1279
1279
  this.largeFileTextSummarizer = async (prompt: string): Promise<string | null> => {
1280
- const summary = await result.fn(prompt, false);
1280
+ let summary: string;
1281
+ try {
1282
+ summary = await result.fn(prompt, false);
1283
+ } catch (err) {
1284
+ if (err instanceof LcmProviderAuthError) {
1285
+ return null;
1286
+ }
1287
+ throw err;
1288
+ }
1281
1289
  if (typeof summary !== "string") {
1282
1290
  return null;
1283
1291
  }
@@ -1294,6 +1294,8 @@ function createLcmDependencies(api: OpenClawPluginApi): LcmDependencies {
1294
1294
  return sub.run({
1295
1295
  sessionKey: String(params.params?.sessionKey ?? ""),
1296
1296
  message: String(params.params?.message ?? ""),
1297
+ provider: params.params?.provider as string | undefined,
1298
+ model: params.params?.model as string | undefined,
1297
1299
  extraSystemPrompt: params.params?.extraSystemPrompt as string | undefined,
1298
1300
  lane: params.params?.lane as string | undefined,
1299
1301
  deliver: (params.params?.deliver as boolean) ?? false,
package/src/summarize.ts CHANGED
@@ -42,6 +42,28 @@ type ProviderAuthFailure = {
42
42
  missingModelRequestScope: boolean;
43
43
  };
44
44
 
45
+ /**
46
+ * Signals that the summarizer hit a provider-auth failure and callers should
47
+ * avoid treating the result like an empty summary.
48
+ */
49
+ export class LcmProviderAuthError extends Error {
50
+ readonly provider: string;
51
+ readonly model: string;
52
+ readonly failure: ProviderAuthFailure;
53
+
54
+ constructor(params: {
55
+ provider: string;
56
+ model: string;
57
+ failure: ProviderAuthFailure;
58
+ }) {
59
+ super(buildProviderAuthWarning(params));
60
+ this.name = "LcmProviderAuthError";
61
+ this.provider = params.provider;
62
+ this.model = params.model;
63
+ this.failure = params.failure;
64
+ }
65
+ }
66
+
45
67
  /**
46
68
  * Default timeout for a single summarizer LLM call. Long enough for large
47
69
  * context windows on slower providers, short enough to prevent the gateway
@@ -976,13 +998,13 @@ export async function createLcmSummarizeFromLegacyParams(params: {
976
998
  },
977
999
  ],
978
1000
  maxTokens: targetTokens,
979
- temperature: aggressive ? 0.1 : 0.2,
980
1001
  }), SUMMARIZER_TIMEOUT_MS, "initial");
981
1002
  } catch (err) {
982
1003
  const authFailure = extractProviderAuthFailure(err);
983
1004
  if (authFailure) {
984
- console.warn(buildProviderAuthWarning({ provider, model, failure: authFailure }));
985
- return "";
1005
+ const authError = new LcmProviderAuthError({ provider, model, failure: authFailure });
1006
+ console.warn(authError.message);
1007
+ throw authError;
986
1008
  }
987
1009
  const errMsg = err instanceof Error ? err.message : String(err);
988
1010
  const isTimeout = errMsg.includes("summarizer timeout");
@@ -1000,8 +1022,9 @@ export async function createLcmSummarizeFromLegacyParams(params: {
1000
1022
 
1001
1023
  const authFailure = extractProviderAuthFailure(result);
1002
1024
  if (authFailure) {
1003
- console.warn(buildProviderAuthWarning({ provider, model, failure: authFailure }));
1004
- return "";
1025
+ const authError = new LcmProviderAuthError({ provider, model, failure: authFailure });
1026
+ console.warn(authError.message);
1027
+ throw authError;
1005
1028
  }
1006
1029
 
1007
1030
  const normalized = normalizeCompletionSummary(result.content);
@@ -1059,7 +1082,6 @@ export async function createLcmSummarizeFromLegacyParams(params: {
1059
1082
  },
1060
1083
  ],
1061
1084
  maxTokens: targetTokens,
1062
- temperature: 0.05,
1063
1085
  reasoning: "low",
1064
1086
  }), SUMMARIZER_TIMEOUT_MS, "retry");
1065
1087
  const retryAuthFailure = extractProviderAuthFailure(retryResult);
@@ -80,6 +80,76 @@ type SummaryCandidate = {
80
80
  conversationId: number;
81
81
  };
82
82
 
83
+ function collectExpansionFailureText(value: unknown, parts: string[], depth = 0): void {
84
+ if (depth > 3 || value == null) {
85
+ return;
86
+ }
87
+ if (typeof value === "string") {
88
+ const trimmed = value.trim();
89
+ if (trimmed) {
90
+ parts.push(trimmed);
91
+ }
92
+ return;
93
+ }
94
+ if (typeof value === "number" || typeof value === "boolean") {
95
+ parts.push(String(value));
96
+ return;
97
+ }
98
+ if (value instanceof Error) {
99
+ if (value.message.trim()) {
100
+ parts.push(value.message.trim());
101
+ }
102
+ collectExpansionFailureText(value.cause, parts, depth + 1);
103
+ return;
104
+ }
105
+ if (Array.isArray(value)) {
106
+ for (const entry of value) {
107
+ collectExpansionFailureText(entry, parts, depth + 1);
108
+ }
109
+ return;
110
+ }
111
+ if (typeof value === "object") {
112
+ const record = value as Record<string, unknown>;
113
+ for (const key of ["message", "error", "reason", "details", "response", "cause", "code"]) {
114
+ collectExpansionFailureText(record[key], parts, depth + 1);
115
+ }
116
+ }
117
+ }
118
+
119
+ function formatExpansionFailure(error: unknown): string {
120
+ const parts: string[] = [];
121
+ collectExpansionFailureText(error, parts);
122
+ const message = parts.join(" ").replace(/\s+/g, " ").trim();
123
+ if (message) {
124
+ return message;
125
+ }
126
+ if (typeof error === "string" && error.trim()) {
127
+ return error.trim();
128
+ }
129
+ return "Delegated expansion query failed.";
130
+ }
131
+
132
+ function shouldRetryWithoutOverride(message: string): boolean {
133
+ const normalized = message.toLowerCase();
134
+ return [
135
+ "model.request",
136
+ "missing scopes",
137
+ "insufficient scope",
138
+ "unauthorized",
139
+ "not authorized",
140
+ "forbidden",
141
+ "provider/model overrides are not authorized",
142
+ "model override is not authorized",
143
+ "unknown model",
144
+ "model not found",
145
+ "invalid model",
146
+ "not available",
147
+ "not supported",
148
+ "401",
149
+ "403",
150
+ ].some((signal) => normalized.includes(signal));
151
+ }
152
+
83
153
  /**
84
154
  * Build the sub-agent task message for delegated expansion and prompt answering.
85
155
  */
@@ -401,9 +471,6 @@ export function createLcmExpandQueryTool(input: {
401
471
  });
402
472
  }
403
473
 
404
- let childSessionKey = "";
405
- let grantCreated = false;
406
-
407
474
  try {
408
475
  const candidates = await resolveSummaryCandidates({
409
476
  lcm: input.lcm,
@@ -448,26 +515,9 @@ export function createLcmExpandQueryTool(input: {
448
515
  const requesterAgentId = input.deps.normalizeAgentId(
449
516
  input.deps.parseAgentSessionKey(callerSessionKey)?.agentId,
450
517
  );
451
- childSessionKey = `agent:${requesterAgentId}:subagent:${crypto.randomUUID()}`;
452
518
  const childExpansionDepth = resolveNextExpansionDepth(callerSessionKey);
453
519
  const originSessionKey = recursionCheck.originSessionKey || callerSessionKey || "main";
454
520
 
455
- createDelegatedExpansionGrant({
456
- delegatedSessionKey: childSessionKey,
457
- issuerSessionId: callerSessionKey || "main",
458
- allowedConversationIds: [sourceConversationId],
459
- tokenCap: expansionTokenCap,
460
- ttlMs: DELEGATED_WAIT_TIMEOUT_MS + 30_000,
461
- });
462
- stampDelegatedExpansionContext({
463
- sessionKey: childSessionKey,
464
- requestId,
465
- expansionDepth: childExpansionDepth,
466
- originSessionKey,
467
- stampedBy: "lcm_expand_query",
468
- });
469
- grantCreated = true;
470
-
471
521
  const task = buildDelegatedExpandQueryTask({
472
522
  summaryIds,
473
523
  conversationId: sourceConversationId,
@@ -480,118 +530,160 @@ export function createLcmExpandQueryTool(input: {
480
530
  originSessionKey,
481
531
  });
482
532
 
483
- const childIdem = crypto.randomUUID();
484
533
  const expansionProvider = input.deps.config.expansionProvider || undefined;
485
534
  const expansionModel = input.deps.config.expansionModel || undefined;
486
- const response = (await input.deps.callGateway({
487
- method: "agent",
488
- params: {
489
- message: task,
490
- sessionKey: childSessionKey,
491
- deliver: false,
492
- lane: input.deps.agentLaneSubagent,
493
- idempotencyKey: childIdem,
494
- ...(expansionProvider ? { provider: expansionProvider } : {}),
495
- ...(expansionModel ? { model: expansionModel } : {}),
496
- extraSystemPrompt: input.deps.buildSubagentSystemPrompt({
497
- depth: 1,
498
- maxDepth: 8,
499
- taskSummary: "Run lcm_expand and return prompt-focused JSON answer",
500
- }),
501
- },
502
- timeoutMs: GATEWAY_TIMEOUT_MS,
503
- })) as { runId?: string };
504
-
505
- const runId = typeof response?.runId === "string" ? response.runId.trim() : "";
506
- if (!runId) {
507
- return jsonResult({
508
- error: "Delegated expansion did not return a runId.",
509
- });
510
- }
535
+ const configuredOverrideLabel =
536
+ expansionProvider && expansionModel
537
+ ? `${expansionProvider}/${expansionModel}`
538
+ : expansionModel || expansionProvider || "configured override";
511
539
 
512
- const wait = (await input.deps.callGateway({
513
- method: "agent.wait",
514
- params: {
515
- runId,
516
- timeoutMs: DELEGATED_WAIT_TIMEOUT_MS,
517
- },
518
- timeoutMs: DELEGATED_WAIT_TIMEOUT_MS,
519
- })) as { status?: string; error?: string };
520
- const status = typeof wait?.status === "string" ? wait.status : "error";
521
- if (status === "timeout") {
522
- recordExpansionDelegationTelemetry({
523
- deps: input.deps,
524
- component: "lcm_expand_query",
525
- event: "timeout",
526
- requestId,
527
- sessionKey: callerSessionKey,
528
- expansionDepth: childExpansionDepth,
529
- originSessionKey,
530
- runId,
531
- });
532
- return jsonResult({
533
- error: "lcm_expand_query timed out waiting for delegated expansion (120s).",
534
- });
535
- }
536
- if (status !== "ok") {
537
- return jsonResult({
538
- error:
539
- typeof wait?.error === "string" && wait.error.trim()
540
- ? wait.error
541
- : "Delegated expansion query failed.",
542
- });
543
- }
544
-
545
- const replyPayload = (await input.deps.callGateway({
546
- method: "sessions.get",
547
- params: { key: childSessionKey, limit: 80 },
548
- timeoutMs: GATEWAY_TIMEOUT_MS,
549
- })) as { messages?: unknown[] };
550
- const reply = input.deps.readLatestAssistantReply(
551
- Array.isArray(replyPayload.messages) ? replyPayload.messages : [],
552
- );
553
- const parsed = parseDelegatedExpandQueryReply(reply, summaryIds.length);
554
- recordExpansionDelegationTelemetry({
555
- deps: input.deps,
556
- component: "lcm_expand_query",
557
- event: "success",
558
- requestId,
559
- sessionKey: callerSessionKey,
560
- expansionDepth: childExpansionDepth,
561
- originSessionKey,
562
- runId,
563
- });
540
+ const runDelegatedQuery = async (provider?: string, model?: string) => {
541
+ const childSessionKey = `agent:${requesterAgentId}:subagent:${crypto.randomUUID()}`;
542
+ const childIdem = crypto.randomUUID();
543
+ let grantCreated = false;
564
544
 
565
- return jsonResult({
566
- answer: parsed.answer,
567
- citedIds: parsed.citedIds,
568
- sourceConversationId,
569
- expandedSummaryCount: parsed.expandedSummaryCount,
570
- totalSourceTokens: parsed.totalSourceTokens,
571
- truncated: parsed.truncated,
572
- });
573
- } catch (error) {
574
- return jsonResult({
575
- error: error instanceof Error ? error.message : String(error),
576
- });
577
- } finally {
578
- if (childSessionKey) {
579
545
  try {
580
- await input.deps.callGateway({
581
- method: "sessions.delete",
582
- params: { key: childSessionKey, deleteTranscript: true },
546
+ createDelegatedExpansionGrant({
547
+ delegatedSessionKey: childSessionKey,
548
+ issuerSessionId: callerSessionKey || "main",
549
+ allowedConversationIds: [sourceConversationId],
550
+ tokenCap: expansionTokenCap,
551
+ ttlMs: DELEGATED_WAIT_TIMEOUT_MS + 30_000,
552
+ });
553
+ stampDelegatedExpansionContext({
554
+ sessionKey: childSessionKey,
555
+ requestId,
556
+ expansionDepth: childExpansionDepth,
557
+ originSessionKey,
558
+ stampedBy: "lcm_expand_query",
559
+ });
560
+ grantCreated = true;
561
+
562
+ const response = (await input.deps.callGateway({
563
+ method: "agent",
564
+ params: {
565
+ message: task,
566
+ sessionKey: childSessionKey,
567
+ deliver: false,
568
+ lane: input.deps.agentLaneSubagent,
569
+ idempotencyKey: childIdem,
570
+ ...(provider ? { provider } : {}),
571
+ ...(model ? { model } : {}),
572
+ extraSystemPrompt: input.deps.buildSubagentSystemPrompt({
573
+ depth: 1,
574
+ maxDepth: 8,
575
+ taskSummary: "Run lcm_expand and return prompt-focused JSON answer",
576
+ }),
577
+ },
578
+ timeoutMs: GATEWAY_TIMEOUT_MS,
579
+ })) as { runId?: unknown; error?: unknown };
580
+
581
+ const runId = typeof response?.runId === "string" ? response.runId.trim() : "";
582
+ if (!runId) {
583
+ throw new Error(
584
+ formatExpansionFailure(response?.error ?? response)
585
+ || "Delegated expansion did not return a runId.",
586
+ );
587
+ }
588
+
589
+ const wait = (await input.deps.callGateway({
590
+ method: "agent.wait",
591
+ params: {
592
+ runId,
593
+ timeoutMs: DELEGATED_WAIT_TIMEOUT_MS,
594
+ },
595
+ timeoutMs: DELEGATED_WAIT_TIMEOUT_MS,
596
+ })) as { status?: string; error?: unknown };
597
+ const status = typeof wait?.status === "string" ? wait.status : "error";
598
+ if (status === "timeout") {
599
+ recordExpansionDelegationTelemetry({
600
+ deps: input.deps,
601
+ component: "lcm_expand_query",
602
+ event: "timeout",
603
+ requestId,
604
+ sessionKey: callerSessionKey,
605
+ expansionDepth: childExpansionDepth,
606
+ originSessionKey,
607
+ runId,
608
+ });
609
+ throw new Error(
610
+ "lcm_expand_query timed out waiting for delegated expansion (120s).",
611
+ );
612
+ }
613
+ if (status !== "ok") {
614
+ throw new Error(formatExpansionFailure(wait?.error));
615
+ }
616
+
617
+ const replyPayload = (await input.deps.callGateway({
618
+ method: "sessions.get",
619
+ params: { key: childSessionKey, limit: 80 },
583
620
  timeoutMs: GATEWAY_TIMEOUT_MS,
621
+ })) as { messages?: unknown[] };
622
+ const reply = input.deps.readLatestAssistantReply(
623
+ Array.isArray(replyPayload.messages) ? replyPayload.messages : [],
624
+ );
625
+ const parsed = parseDelegatedExpandQueryReply(reply, summaryIds.length);
626
+ recordExpansionDelegationTelemetry({
627
+ deps: input.deps,
628
+ component: "lcm_expand_query",
629
+ event: "success",
630
+ requestId,
631
+ sessionKey: callerSessionKey,
632
+ expansionDepth: childExpansionDepth,
633
+ originSessionKey,
634
+ runId,
635
+ });
636
+
637
+ return jsonResult({
638
+ answer: parsed.answer,
639
+ citedIds: parsed.citedIds,
640
+ sourceConversationId,
641
+ expandedSummaryCount: parsed.expandedSummaryCount,
642
+ totalSourceTokens: parsed.totalSourceTokens,
643
+ truncated: parsed.truncated,
584
644
  });
585
- } catch {
586
- // Cleanup is best-effort.
645
+ } finally {
646
+ try {
647
+ await input.deps.callGateway({
648
+ method: "sessions.delete",
649
+ params: { key: childSessionKey, deleteTranscript: true },
650
+ timeoutMs: GATEWAY_TIMEOUT_MS,
651
+ });
652
+ } catch {
653
+ // Cleanup is best-effort.
654
+ }
655
+ if (grantCreated) {
656
+ revokeDelegatedExpansionGrantForSession(childSessionKey, { removeBinding: true });
657
+ }
658
+ clearDelegatedExpansionContext(childSessionKey);
587
659
  }
660
+ };
661
+
662
+ if (!expansionProvider && !expansionModel) {
663
+ return await runDelegatedQuery();
588
664
  }
589
- if (grantCreated && childSessionKey) {
590
- revokeDelegatedExpansionGrantForSession(childSessionKey, { removeBinding: true });
591
- }
592
- if (childSessionKey) {
593
- clearDelegatedExpansionContext(childSessionKey);
665
+
666
+ try {
667
+ return await runDelegatedQuery(expansionProvider, expansionModel);
668
+ } catch (error) {
669
+ const failure = formatExpansionFailure(error);
670
+ input.deps.log.warn(
671
+ `[lcm] delegated expansion override failed (${configuredOverrideLabel}): ${failure}`,
672
+ );
673
+ if (!shouldRetryWithoutOverride(failure)) {
674
+ throw new Error(failure);
675
+ }
676
+ input.deps.log.warn(
677
+ `[lcm] retrying delegated expansion without provider/model override after: ${failure}`,
678
+ );
679
+ return await runDelegatedQuery();
594
680
  }
681
+ } catch (error) {
682
+ const failure = formatExpansionFailure(error);
683
+ input.deps.log.error(`[lcm] delegated expansion query failed: ${failure}`);
684
+ return jsonResult({
685
+ error: failure,
686
+ });
595
687
  }
596
688
  },
597
689
  };