@martian-engineering/lossless-claw 0.5.1 → 0.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/compaction.ts CHANGED
@@ -2,6 +2,7 @@ import { createHash } from "node:crypto";
2
2
  import type { ConversationStore, CreateMessagePartInput } from "./store/conversation-store.js";
3
3
  import type { SummaryStore, SummaryRecord, ContextItemRecord } from "./store/summary-store.js";
4
4
  import { extractFileIdsFromContent } from "./large-files.js";
5
+ import { LcmProviderAuthError } from "./summarize.js";
5
6
 
6
7
  // ── Public types ─────────────────────────────────────────────────────────────
7
8
 
@@ -37,7 +38,7 @@ export interface CompactionConfig {
37
38
  condensedMinFanout: number;
38
39
  /** Relaxed minimum fanout for hard-trigger sweeps. */
39
40
  condensedMinFanoutHard: number;
40
- /** Incremental depth passes to run after each leaf compaction (default 0). */
41
+ /** Incremental depth passes to run after each leaf compaction (default 1). */
41
42
  incrementalMaxDepth: number;
42
43
  /** Max source tokens to compact per leaf/condensed chunk (default 20000) */
43
44
  leafChunkTokens?: number;
@@ -49,9 +50,11 @@ export interface CompactionConfig {
49
50
  maxRounds: number;
50
51
  /** IANA timezone for timestamps in summaries (default: UTC) */
51
52
  timezone?: string;
53
+ /** Maximum allowed overage factor for summaries relative to target tokens (default 3). */
54
+ summaryMaxOverageFactor: number;
52
55
  }
53
56
 
54
- type CompactionLevel = "normal" | "aggressive" | "fallback";
57
+ type CompactionLevel = "normal" | "aggressive" | "fallback" | "capped";
55
58
  type CompactionPass = "leaf" | "condensed";
56
59
  type CompactionSummarizeOptions = {
57
60
  previousSummary?: string;
@@ -85,6 +88,30 @@ function estimateTokens(content: string): number {
85
88
  return Math.ceil(content.length / 4);
86
89
  }
87
90
 
91
+ /** Deterministically cap summary text so the persisted output stays within maxTokens. */
92
+ function capSummaryText(
93
+ content: string,
94
+ originalTokens: number,
95
+ maxTokens: number,
96
+ ): string {
97
+ const suffixes = [
98
+ `\n[Capped from ${originalTokens} tokens to ~${maxTokens}]`,
99
+ `\n[Capped to ~${maxTokens}]`,
100
+ "\n[Capped]",
101
+ "",
102
+ ];
103
+
104
+ for (const suffix of suffixes) {
105
+ const maxChars = Math.max(0, maxTokens * 4 - suffix.length);
106
+ const capped = `${content.slice(0, maxChars)}${suffix}`;
107
+ if (estimateTokens(capped) <= maxTokens) {
108
+ return capped;
109
+ }
110
+ }
111
+
112
+ return content.slice(0, Math.max(0, maxTokens * 4));
113
+ }
114
+
88
115
  /** Format a timestamp as `YYYY-MM-DD HH:mm TZ` for prompt source text. */
89
116
  export function formatTimestamp(value: Date, timezone: string = "UTC"): string {
90
117
  try {
@@ -149,6 +176,11 @@ const DEFAULT_LEAF_CHUNK_TOKENS = 20_000;
149
176
  * with no meaningful text.
150
177
  */
151
178
  const MEDIA_PATH_RE = /^MEDIA:\/.+$/;
179
+ const EMBEDDED_DATA_URL_RE = /data:[^;\s"'`]+;base64,[A-Za-z0-9+/=\s]+/gi;
180
+ const MEDIA_ATTACHMENT_PART_TYPES = new Set(["file", "snapshot"]);
181
+ const MEDIA_ATTACHMENT_RAW_TYPES = new Set(["file", "image", "snapshot"]);
182
+ const STRUCTURED_MEDIA_TEXT_KEYS = ["text", "caption", "alt", "title", "summary"] as const;
183
+ const STRUCTURED_MEDIA_NESTED_KEYS = ["content", "parts", "items", "message", "messages"] as const;
152
184
 
153
185
  const CONDENSED_MIN_INPUT_RATIO = 0.1;
154
186
 
@@ -164,6 +196,140 @@ function dedupeOrderedIds(ids: Iterable<string>): string[] {
164
196
  return ordered;
165
197
  }
166
198
 
199
+ /** Parse message-part metadata without throwing on malformed JSON. */
200
+ function parseMessagePartMetadata(part: CreateMessagePartInput | { metadata: string | null }): Record<string, unknown> {
201
+ if (typeof part.metadata !== "string" || !part.metadata.trim()) {
202
+ return {};
203
+ }
204
+ try {
205
+ const parsed = JSON.parse(part.metadata) as unknown;
206
+ return parsed && typeof parsed === "object" && !Array.isArray(parsed)
207
+ ? (parsed as Record<string, unknown>)
208
+ : {};
209
+ } catch {
210
+ return {};
211
+ }
212
+ }
213
+
214
+ /** Detect whether a string is mostly binary/base64 payload and not meaningful prose. */
215
+ function looksLikeBinaryPayload(value: string): boolean {
216
+ const trimmed = value.trim();
217
+ if (!trimmed) {
218
+ return false;
219
+ }
220
+ if (/^data:[^;\s"'`]+;base64,/i.test(trimmed)) {
221
+ return true;
222
+ }
223
+ const compact = trimmed.replace(/\s+/g, "");
224
+ if (compact.length < 256 || compact.length % 4 !== 0) {
225
+ return false;
226
+ }
227
+ if (!/^[A-Za-z0-9+/=]+$/.test(compact)) {
228
+ return false;
229
+ }
230
+ return !/[ .,:;!?()[\]{}]/.test(trimmed);
231
+ }
232
+
233
+ /** Strip attachment payloads from plain strings before they reach the summarizer. */
234
+ function stripEmbeddedMediaPayloads(content: string): string {
235
+ const withoutDataUrls = content.replace(EMBEDDED_DATA_URL_RE, "[embedded media omitted]");
236
+ const sanitizedLines = withoutDataUrls
237
+ .split(/\r?\n/)
238
+ .map((line) => line.trimEnd())
239
+ .filter((line) => {
240
+ const trimmed = line.trim();
241
+ if (!trimmed) {
242
+ return false;
243
+ }
244
+ if (MEDIA_PATH_RE.test(trimmed)) {
245
+ return false;
246
+ }
247
+ if (looksLikeBinaryPayload(trimmed)) {
248
+ return false;
249
+ }
250
+ return true;
251
+ });
252
+ return sanitizedLines.join("\n").trim();
253
+ }
254
+
255
+ /** Extract human-readable text from structured content while ignoring attachment payload fields. */
256
+ function extractSanitizedStructuredText(value: unknown, depth = 0): string[] {
257
+ if (depth >= 4 || value == null) {
258
+ return [];
259
+ }
260
+ if (typeof value === "string") {
261
+ const sanitized = stripEmbeddedMediaPayloads(value);
262
+ return sanitized ? [sanitized] : [];
263
+ }
264
+ if (Array.isArray(value)) {
265
+ return value.flatMap((entry) => extractSanitizedStructuredText(entry, depth + 1));
266
+ }
267
+ if (typeof value !== "object") {
268
+ return [];
269
+ }
270
+
271
+ const record = value as Record<string, unknown>;
272
+ const rawType = typeof record.type === "string" ? record.type.trim().toLowerCase() : "";
273
+ const textFragments: string[] = [];
274
+
275
+ for (const key of STRUCTURED_MEDIA_TEXT_KEYS) {
276
+ const candidate = record[key];
277
+ if (typeof candidate !== "string") {
278
+ continue;
279
+ }
280
+ const sanitized = stripEmbeddedMediaPayloads(candidate);
281
+ if (sanitized) {
282
+ textFragments.push(sanitized);
283
+ }
284
+ }
285
+
286
+ if (MEDIA_ATTACHMENT_RAW_TYPES.has(rawType)) {
287
+ return textFragments;
288
+ }
289
+
290
+ for (const key of STRUCTURED_MEDIA_NESTED_KEYS) {
291
+ textFragments.push(...extractSanitizedStructuredText(record[key], depth + 1));
292
+ }
293
+
294
+ return textFragments;
295
+ }
296
+
297
+ /** Normalize message content down to human-readable text, excluding binary/media payloads. */
298
+ function extractMeaningfulMessageText(content: string): string {
299
+ const trimmed = content.trim();
300
+ if (!trimmed) {
301
+ return "";
302
+ }
303
+ if ((trimmed.startsWith("[") && trimmed.endsWith("]")) || (trimmed.startsWith("{") && trimmed.endsWith("}"))) {
304
+ try {
305
+ const parsed = JSON.parse(trimmed) as unknown;
306
+ const extracted = extractSanitizedStructuredText(parsed)
307
+ .map((fragment) => fragment.trim())
308
+ .filter(Boolean);
309
+ return extracted.join("\n").trim();
310
+ } catch {
311
+ // Fall back to plain-text sanitation below.
312
+ }
313
+ }
314
+ return stripEmbeddedMediaPayloads(content);
315
+ }
316
+
317
+ /** Identify whether a stored message part represents a media attachment. */
318
+ function isMediaAttachmentPart(part: CreateMessagePartInput | { partType: string; metadata: string | null }): boolean {
319
+ if (MEDIA_ATTACHMENT_PART_TYPES.has(part.partType)) {
320
+ return true;
321
+ }
322
+ const metadata = parseMessagePartMetadata(part);
323
+ const rawType =
324
+ typeof metadata.rawType === "string"
325
+ ? metadata.rawType.trim().toLowerCase()
326
+ : metadata.raw && typeof metadata.raw === "object" && !Array.isArray(metadata.raw) &&
327
+ typeof (metadata.raw as Record<string, unknown>).type === "string"
328
+ ? ((metadata.raw as Record<string, unknown>).type as string).trim().toLowerCase()
329
+ : "";
330
+ return MEDIA_ATTACHMENT_RAW_TYPES.has(rawType);
331
+ }
332
+
167
333
  // ── CompactionEngine ─────────────────────────────────────────────────────────
168
334
 
169
335
  export class CompactionEngine {
@@ -1001,11 +1167,16 @@ export class CompactionEngine {
1001
1167
  /**
1002
1168
  * Run three-level summarization escalation:
1003
1169
  * normal -> aggressive -> deterministic fallback.
1170
+ *
1171
+ * Provider-auth failures are treated as non-compacting skips so we do not
1172
+ * persist truncation artifacts into the summary DAG.
1004
1173
  */
1005
1174
  private async summarizeWithEscalation(params: {
1006
1175
  sourceText: string;
1007
1176
  summarize: CompactionSummarizeFn;
1008
1177
  options?: CompactionSummarizeOptions;
1178
+ /** Target token count for this summary kind (leaf or condensed). Used for hard-cap enforcement. */
1179
+ targetTokens: number;
1009
1180
  }): Promise<{ content: string; level: CompactionLevel } | null> {
1010
1181
  const sourceText = params.sourceText.trim();
1011
1182
  if (!sourceText) {
@@ -1026,17 +1197,31 @@ export class CompactionEngine {
1026
1197
  level: "fallback",
1027
1198
  };
1028
1199
  };
1029
-
1030
- const runSummarizer = async (aggressiveMode: boolean): Promise<string | null> => {
1031
- const output = await params.summarize(sourceText, aggressiveMode, params.options);
1200
+ const authFailure = Symbol("authFailure");
1201
+
1202
+ const runSummarizer = async (
1203
+ aggressiveMode: boolean,
1204
+ ): Promise<string | null | typeof authFailure> => {
1205
+ let output: string;
1206
+ try {
1207
+ output = await params.summarize(sourceText, aggressiveMode, params.options);
1208
+ } catch (err) {
1209
+ if (err instanceof LcmProviderAuthError) {
1210
+ return authFailure;
1211
+ }
1212
+ throw err;
1213
+ }
1032
1214
  const trimmed = output.trim();
1033
1215
  return trimmed || null;
1034
1216
  };
1035
1217
 
1036
1218
  const initialSummary = await runSummarizer(false);
1219
+ if (initialSummary === authFailure) {
1220
+ return null;
1221
+ }
1037
1222
  if (initialSummary === null) {
1038
- // Empty provider output should still compact deterministically so auth
1039
- // failures or empty responses do not stall compaction entirely.
1223
+ // Empty provider output should still compact deterministically so a
1224
+ // silent no-op does not stall compaction forever.
1040
1225
  return buildDeterministicFallback();
1041
1226
  }
1042
1227
  let summaryText = initialSummary;
@@ -1044,6 +1229,9 @@ export class CompactionEngine {
1044
1229
 
1045
1230
  if (estimateTokens(summaryText) >= inputTokens) {
1046
1231
  const aggressiveSummary = await runSummarizer(true);
1232
+ if (aggressiveSummary === authFailure) {
1233
+ return null;
1234
+ }
1047
1235
  if (aggressiveSummary === null) {
1048
1236
  return buildDeterministicFallback();
1049
1237
  }
@@ -1055,6 +1243,21 @@ export class CompactionEngine {
1055
1243
  }
1056
1244
  }
1057
1245
 
1246
+ // Hard cap: enforce maximum summary size relative to the kind-appropriate target.
1247
+ const summaryTokens = estimateTokens(summaryText);
1248
+ const maxTokens = Math.ceil(params.targetTokens * this.config.summaryMaxOverageFactor);
1249
+
1250
+ if (summaryTokens > Math.ceil(params.targetTokens * 1.5)) {
1251
+ console.warn(
1252
+ `[lcm] summary exceeds target by ${Math.round((summaryTokens / params.targetTokens - 1) * 100)}%: ${summaryTokens} tokens vs target ${params.targetTokens}`,
1253
+ );
1254
+ }
1255
+
1256
+ if (summaryTokens > maxTokens) {
1257
+ summaryText = capSummaryText(summaryText, summaryTokens, maxTokens);
1258
+ level = "capped";
1259
+ }
1260
+
1058
1261
  return { content: summaryText, level };
1059
1262
  }
1060
1263
 
@@ -1065,10 +1268,9 @@ export class CompactionEngine {
1065
1268
  * attachments. This gives the summarizer enough context to produce a
1066
1269
  * meaningful summary instead of trying to compress raw file paths.
1067
1270
  *
1068
- * - Media-only messages (just a file path, no text): content is replaced
1069
- * with "[Media attachment]" or "[Image attachment]" etc.
1070
- * - Media-mostly messages (any real text + attachment): content is annotated
1071
- * with " [with media attachment]" suffix.
1271
+ * - Media-only messages: content is replaced with "[Media attachment]".
1272
+ * - Media-mostly messages: text is preserved and annotated with
1273
+ * " [with media attachment]".
1072
1274
  * - Text-only messages: returned unchanged.
1073
1275
  */
1074
1276
  private async annotateMediaContent(
@@ -1076,27 +1278,29 @@ export class CompactionEngine {
1076
1278
  content: string,
1077
1279
  ): Promise<string> {
1078
1280
  const parts = await this.conversationStore.getMessageParts(messageId);
1079
- const hasMediaParts = parts.some(
1080
- (p) => p.partType === "file" || p.partType === "snapshot",
1081
- );
1281
+ const hasMediaParts = parts.some((part) => isMediaAttachmentPart(part));
1082
1282
  if (!hasMediaParts) {
1083
1283
  return content;
1084
1284
  }
1085
1285
 
1086
- // Strip MEDIA:/... paths to see how much actual text remains
1087
- const textWithoutPaths = content
1088
- .split("\n")
1089
- .filter((line) => !MEDIA_PATH_RE.test(line.trim()))
1286
+ const partText = parts
1287
+ .filter((part) => !isMediaAttachmentPart(part))
1288
+ .map((part) => (typeof part.textContent === "string" ? part.textContent : ""))
1289
+ .map((text) => stripEmbeddedMediaPayloads(text))
1290
+ .map((text) => text.trim())
1291
+ .filter(Boolean)
1090
1292
  .join("\n")
1091
1293
  .trim();
1294
+ const fallbackText = extractMeaningfulMessageText(content);
1295
+ const meaningfulText = (partText || fallbackText).trim();
1092
1296
 
1093
- if (textWithoutPaths.length === 0) {
1094
- // Media-only: replace with descriptive annotation
1297
+ if (!meaningfulText) {
1095
1298
  return "[Media attachment]";
1096
1299
  }
1097
-
1098
- // Media-mostly: keep the text, add annotation
1099
- return `${textWithoutPaths} [with media attachment]`;
1300
+ if (meaningfulText.includes("[with media attachment]")) {
1301
+ return meaningfulText;
1302
+ }
1303
+ return `${meaningfulText} [with media attachment]`;
1100
1304
  }
1101
1305
 
1102
1306
  // ── Private: Leaf Pass ───────────────────────────────────────────────────
@@ -1146,10 +1350,11 @@ export class CompactionEngine {
1146
1350
  previousSummary: previousSummaryContent,
1147
1351
  isCondensed: false,
1148
1352
  },
1353
+ targetTokens: this.config.leafTargetTokens,
1149
1354
  });
1150
1355
  if (!summary) {
1151
1356
  console.warn(
1152
- `[lcm] leaf summarizer returned empty content; conversationId=${conversationId}; chunkMessages=${messageContents.length}; skipping leaf chunk`,
1357
+ `[lcm] leaf compaction skipped summary write; conversationId=${conversationId}; chunkMessages=${messageContents.length}`,
1153
1358
  );
1154
1359
  return null;
1155
1360
  }
@@ -1253,10 +1458,11 @@ export class CompactionEngine {
1253
1458
  isCondensed: true,
1254
1459
  depth: targetDepth + 1,
1255
1460
  },
1461
+ targetTokens: this.config.condensedTargetTokens,
1256
1462
  });
1257
1463
  if (!condensed) {
1258
1464
  console.warn(
1259
- `[lcm] condensed summarizer returned empty content; conversationId=${conversationId}; depth=${targetDepth}; chunkSummaries=${summaryRecords.length}; skipping condensed chunk`,
1465
+ `[lcm] condensed compaction skipped summary write; conversationId=${conversationId}; depth=${targetDepth}; chunkSummaries=${summaryRecords.length}`,
1260
1466
  );
1261
1467
  return null;
1262
1468
  }
package/src/db/config.ts CHANGED
@@ -37,11 +37,19 @@ export type LcmConfig = {
37
37
  expansionProvider: string;
38
38
  /** Model override for lcm_expand_query sub-agent. */
39
39
  expansionModel: string;
40
+ /** Max time to wait for delegated lcm_expand_query sub-agent completion. */
41
+ delegationTimeoutMs: number;
40
42
  autocompactDisabled: boolean;
41
43
  /** IANA timezone for timestamps in summaries (from TZ env or system default) */
42
44
  timezone: string;
43
45
  /** When true, retroactively delete HEARTBEAT_OK turn cycles from LCM storage. */
44
46
  pruneHeartbeatOk: boolean;
47
+ /** Hard ceiling for assembly token budget — caps runtime-provided and fallback budgets. */
48
+ maxAssemblyTokenBudget?: number;
49
+ /** Maximum allowed overage factor for summaries relative to target tokens (default 3). */
50
+ summaryMaxOverageFactor: number;
51
+ /** Custom instructions injected into all summarization prompts. */
52
+ customInstructions: string;
45
53
  };
46
54
 
47
55
  /** Safely coerce an unknown value to a finite number, or return undefined. */
@@ -100,6 +108,10 @@ export function resolveLcmConfig(
100
108
  pluginConfig?: Record<string, unknown>,
101
109
  ): LcmConfig {
102
110
  const pc = pluginConfig ?? {};
111
+ const envDelegationTimeoutMs =
112
+ env.LCM_DELEGATION_TIMEOUT_MS !== undefined
113
+ ? toNumber(env.LCM_DELEGATION_TIMEOUT_MS)
114
+ : undefined;
103
115
 
104
116
  return {
105
117
  enabled:
@@ -134,7 +146,7 @@ export function resolveLcmConfig(
134
146
  ?? toNumber(pc.contextThreshold) ?? 0.75,
135
147
  freshTailCount:
136
148
  (env.LCM_FRESH_TAIL_COUNT !== undefined ? parseInt(env.LCM_FRESH_TAIL_COUNT, 10) : undefined)
137
- ?? toNumber(pc.freshTailCount) ?? 32,
149
+ ?? toNumber(pc.freshTailCount) ?? 64,
138
150
  leafMinFanout:
139
151
  (env.LCM_LEAF_MIN_FANOUT !== undefined ? parseInt(env.LCM_LEAF_MIN_FANOUT, 10) : undefined)
140
152
  ?? toNumber(pc.leafMinFanout) ?? 8,
@@ -146,13 +158,13 @@ export function resolveLcmConfig(
146
158
  ?? toNumber(pc.condensedMinFanoutHard) ?? 2,
147
159
  incrementalMaxDepth:
148
160
  (env.LCM_INCREMENTAL_MAX_DEPTH !== undefined ? parseInt(env.LCM_INCREMENTAL_MAX_DEPTH, 10) : undefined)
149
- ?? toNumber(pc.incrementalMaxDepth) ?? 0,
161
+ ?? toNumber(pc.incrementalMaxDepth) ?? 1,
150
162
  leafChunkTokens:
151
163
  (env.LCM_LEAF_CHUNK_TOKENS !== undefined ? parseInt(env.LCM_LEAF_CHUNK_TOKENS, 10) : undefined)
152
164
  ?? toNumber(pc.leafChunkTokens) ?? 20000,
153
165
  leafTargetTokens:
154
166
  (env.LCM_LEAF_TARGET_TOKENS !== undefined ? parseInt(env.LCM_LEAF_TARGET_TOKENS, 10) : undefined)
155
- ?? toNumber(pc.leafTargetTokens) ?? 1200,
167
+ ?? toNumber(pc.leafTargetTokens) ?? 2400,
156
168
  condensedTargetTokens:
157
169
  (env.LCM_CONDENSED_TARGET_TOKENS !== undefined ? parseInt(env.LCM_CONDENSED_TARGET_TOKENS, 10) : undefined)
158
170
  ?? toNumber(pc.condensedTargetTokens) ?? 2000,
@@ -176,6 +188,7 @@ export function resolveLcmConfig(
176
188
  env.LCM_EXPANSION_PROVIDER?.trim() ?? toStr(pc.expansionProvider) ?? "",
177
189
  expansionModel:
178
190
  env.LCM_EXPANSION_MODEL?.trim() ?? toStr(pc.expansionModel) ?? "",
191
+ delegationTimeoutMs: envDelegationTimeoutMs ?? toNumber(pc.delegationTimeoutMs) ?? 120000,
179
192
  autocompactDisabled:
180
193
  env.LCM_AUTOCOMPACT_DISABLED !== undefined
181
194
  ? env.LCM_AUTOCOMPACT_DISABLED === "true"
@@ -185,5 +198,13 @@ export function resolveLcmConfig(
185
198
  env.LCM_PRUNE_HEARTBEAT_OK !== undefined
186
199
  ? env.LCM_PRUNE_HEARTBEAT_OK === "true"
187
200
  : toBool(pc.pruneHeartbeatOk) ?? false,
201
+ maxAssemblyTokenBudget:
202
+ (env.LCM_MAX_ASSEMBLY_TOKEN_BUDGET !== undefined ? parseInt(env.LCM_MAX_ASSEMBLY_TOKEN_BUDGET, 10) : undefined)
203
+ ?? toNumber(pc.maxAssemblyTokenBudget) ?? undefined,
204
+ summaryMaxOverageFactor:
205
+ (env.LCM_SUMMARY_MAX_OVERAGE_FACTOR !== undefined ? parseFloat(env.LCM_SUMMARY_MAX_OVERAGE_FACTOR) : undefined)
206
+ ?? toNumber(pc.summaryMaxOverageFactor) ?? 3,
207
+ customInstructions:
208
+ env.LCM_CUSTOM_INSTRUCTIONS?.trim() ?? toStr(pc.customInstructions) ?? "",
188
209
  };
189
210
  }
package/src/engine.ts CHANGED
@@ -45,7 +45,7 @@ import {
45
45
  type MessagePartType,
46
46
  } from "./store/conversation-store.js";
47
47
  import { SummaryStore } from "./store/summary-store.js";
48
- import { createLcmSummarizeFromLegacyParams } from "./summarize.js";
48
+ import { createLcmSummarizeFromLegacyParams, LcmProviderAuthError } from "./summarize.js";
49
49
  import type { LcmDependencies } from "./types.js";
50
50
 
51
51
  type AgentMessage = Parameters<ContextEngine["ingest"]>[0]["message"];
@@ -1064,6 +1064,7 @@ export class LcmContextEngine implements ContextEngine {
1064
1064
  condensedTargetTokens: this.config.condensedTargetTokens,
1065
1065
  maxRounds: 10,
1066
1066
  timezone: this.config.timezone,
1067
+ summaryMaxOverageFactor: this.config.summaryMaxOverageFactor,
1067
1068
  };
1068
1069
  this.compaction = new CompactionEngine(
1069
1070
  this.conversationStore,
@@ -1189,6 +1190,12 @@ export class LcmContextEngine implements ContextEngine {
1189
1190
  return undefined;
1190
1191
  }
1191
1192
 
1193
+ /** Cap a resolved token budget against the configured maxAssemblyTokenBudget. */
1194
+ private applyAssemblyBudgetCap(budget: number): number {
1195
+ const cap = this.config.maxAssemblyTokenBudget;
1196
+ return cap != null && cap > 0 ? Math.min(budget, cap) : budget;
1197
+ }
1198
+
1192
1199
  /** Resolve an LCM conversation id from a session key via the session store. */
1193
1200
  private async resolveConversationIdForSessionKey(
1194
1201
  sessionKey: string,
@@ -1231,10 +1238,14 @@ export class LcmContextEngine implements ContextEngine {
1231
1238
  };
1232
1239
  }
1233
1240
  try {
1241
+ const customInstructions =
1242
+ params.customInstructions !== undefined
1243
+ ? params.customInstructions
1244
+ : (this.config.customInstructions || undefined);
1234
1245
  const runtimeSummarizer = await createLcmSummarizeFromLegacyParams({
1235
1246
  deps: this.deps,
1236
1247
  legacyParams: lp,
1237
- customInstructions: params.customInstructions,
1248
+ customInstructions,
1238
1249
  });
1239
1250
  if (runtimeSummarizer) {
1240
1251
  return { summarize: runtimeSummarizer.fn, summaryModel: runtimeSummarizer.model };
@@ -1271,13 +1282,22 @@ export class LcmContextEngine implements ContextEngine {
1271
1282
  const result = await createLcmSummarizeFromLegacyParams({
1272
1283
  deps: this.deps,
1273
1284
  legacyParams: { provider, model },
1285
+ customInstructions: this.config.customInstructions || undefined,
1274
1286
  });
1275
1287
  if (!result) {
1276
1288
  return undefined;
1277
1289
  }
1278
1290
 
1279
1291
  this.largeFileTextSummarizer = async (prompt: string): Promise<string | null> => {
1280
- const summary = await result.fn(prompt, false);
1292
+ let summary: string;
1293
+ try {
1294
+ summary = await result.fn(prompt, false);
1295
+ } catch (err) {
1296
+ if (err instanceof LcmProviderAuthError) {
1297
+ return null;
1298
+ }
1299
+ throw err;
1300
+ }
1281
1301
  if (typeof summary !== "string") {
1282
1302
  return null;
1283
1303
  }
@@ -2125,7 +2145,7 @@ export class LcmContextEngine implements ContextEngine {
2125
2145
  runtimeContext: params.runtimeContext,
2126
2146
  legacyParams,
2127
2147
  });
2128
- const tokenBudget = resolvedTokenBudget ?? DEFAULT_AFTER_TURN_TOKEN_BUDGET;
2148
+ const tokenBudget = this.applyAssemblyBudgetCap(resolvedTokenBudget ?? DEFAULT_AFTER_TURN_TOKEN_BUDGET);
2129
2149
  if (resolvedTokenBudget === undefined) {
2130
2150
  console.warn(
2131
2151
  `[lcm] afterTurn: tokenBudget not provided; using default ${DEFAULT_AFTER_TURN_TOKEN_BUDGET}`,
@@ -2212,12 +2232,13 @@ export class LcmContextEngine implements ContextEngine {
2212
2232
  };
2213
2233
  }
2214
2234
 
2215
- const tokenBudget =
2235
+ const tokenBudget = this.applyAssemblyBudgetCap(
2216
2236
  typeof params.tokenBudget === "number" &&
2217
2237
  Number.isFinite(params.tokenBudget) &&
2218
2238
  params.tokenBudget > 0
2219
2239
  ? Math.floor(params.tokenBudget)
2220
- : 128_000;
2240
+ : 128_000,
2241
+ );
2221
2242
 
2222
2243
  const assembled = await this.assembler.assemble({
2223
2244
  conversationId: conversation.conversationId,
@@ -2316,11 +2337,14 @@ export class LcmContextEngine implements ContextEngine {
2316
2337
  }
2317
2338
 
2318
2339
  const legacyParams = asRecord(params.runtimeContext) ?? params.legacyParams;
2319
- const tokenBudget = this.resolveTokenBudget({
2340
+ const resolvedTokenBudget = this.resolveTokenBudget({
2320
2341
  tokenBudget: params.tokenBudget,
2321
2342
  runtimeContext: params.runtimeContext,
2322
2343
  legacyParams,
2323
2344
  });
2345
+ const tokenBudget = resolvedTokenBudget
2346
+ ? this.applyAssemblyBudgetCap(resolvedTokenBudget)
2347
+ : resolvedTokenBudget;
2324
2348
  if (!tokenBudget) {
2325
2349
  return {
2326
2350
  ok: false,
@@ -2430,11 +2454,14 @@ export class LcmContextEngine implements ContextEngine {
2430
2454
  }
2431
2455
  ).manualCompaction === true;
2432
2456
  const forceCompaction = force || manualCompactionRequested;
2433
- const tokenBudget = this.resolveTokenBudget({
2457
+ const resolvedTokenBudget = this.resolveTokenBudget({
2434
2458
  tokenBudget: params.tokenBudget,
2435
2459
  runtimeContext: params.runtimeContext,
2436
2460
  legacyParams,
2437
2461
  });
2462
+ const tokenBudget = resolvedTokenBudget
2463
+ ? this.applyAssemblyBudgetCap(resolvedTokenBudget)
2464
+ : resolvedTokenBudget;
2438
2465
  if (!tokenBudget) {
2439
2466
  return {
2440
2467
  ok: false,