@martian-engineering/lossless-claw 0.4.0 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/engine.ts CHANGED
@@ -1,9 +1,10 @@
1
- import { randomUUID } from "node:crypto";
2
- import { readFileSync } from "node:fs";
1
+ import { createHash, randomUUID } from "node:crypto";
2
+ import { closeSync, createReadStream, openSync, readSync, statSync } from "node:fs";
3
3
  import { mkdir, writeFile } from "node:fs/promises";
4
4
  import { homedir } from "node:os";
5
5
  import { join } from "node:path";
6
6
  import type { DatabaseSync } from "node:sqlite";
7
+ import { createInterface } from "node:readline";
7
8
  import type {
8
9
  ContextEngine,
9
10
  ContextEngineInfo,
@@ -30,6 +31,7 @@ import {
30
31
  import {
31
32
  extensionFromNameOrMime,
32
33
  formatFileReference,
34
+ formatToolOutputReference,
33
35
  generateExplorationSummary,
34
36
  parseFileBlocks,
35
37
  } from "./large-files.js";
@@ -168,6 +170,15 @@ function extractStructuredText(value: unknown, depth: number = 0): string | unde
168
170
 
169
171
  // Skip tool call/result objects — their structured data belongs in the parts table, not content
170
172
  if (typeof record.type === "string" && TOOL_RAW_TYPES.has(record.type)) {
173
+ if (safeBoolean(record.toolOutputExternalized)) {
174
+ const externalizedText =
175
+ extractStructuredText(record.output, depth + 1) ??
176
+ extractStructuredText(record.content, depth + 1) ??
177
+ extractStructuredText(record.result, depth + 1);
178
+ if (typeof externalizedText === "string" && externalizedText.trim().length > 0) {
179
+ return externalizedText;
180
+ }
181
+ }
171
182
  return undefined;
172
183
  }
173
184
 
@@ -564,6 +575,13 @@ function buildMessageParts(params: {
564
575
  toolCallId: topLevelToolCallId,
565
576
  toolName: topLevelToolName,
566
577
  isError: topLevelIsError,
578
+ externalizedFileId: safeString(metadataRecord?.externalizedFileId),
579
+ originalByteSize:
580
+ typeof metadataRecord?.originalByteSize === "number"
581
+ ? metadataRecord.originalByteSize
582
+ : undefined,
583
+ toolOutputExternalized: safeBoolean(metadataRecord?.toolOutputExternalized),
584
+ externalizationReason: safeString(metadataRecord?.externalizationReason),
567
585
  rawType: block.type,
568
586
  raw: metadataRecord ?? message.content[ordinal],
569
587
  }),
@@ -639,6 +657,15 @@ function toStoredMessage(message: AgentMessage): StoredMessage {
639
657
  };
640
658
  }
641
659
 
660
+ function createBootstrapEntryHash(message: StoredMessage | null): string | null {
661
+ if (!message) {
662
+ return null;
663
+ }
664
+ return createHash("sha256")
665
+ .update(JSON.stringify({ role: message.role, content: message.content }))
666
+ .digest("hex");
667
+ }
668
+
642
669
  function estimateMessageContentTokensForAfterTurn(content: unknown): number {
643
670
  if (typeof content === "string") {
644
671
  return estimateTokens(content);
@@ -702,53 +729,216 @@ function isBootstrapMessage(value: unknown): value is AgentMessage {
702
729
  return "content" in msg || ("command" in msg && "output" in msg);
703
730
  }
704
731
 
705
- /** Load recoverable messages from a JSON/JSONL session file. */
706
- function readLeafPathMessages(sessionFile: string): AgentMessage[] {
707
- let raw = "";
708
- try {
709
- raw = readFileSync(sessionFile, "utf8");
710
- } catch {
711
- return [];
712
- }
713
-
714
- const trimmed = raw.trim();
715
- if (!trimmed) {
716
- return [];
717
- }
718
-
719
- if (trimmed.startsWith("[")) {
720
- try {
721
- const parsed = JSON.parse(trimmed);
722
- if (!Array.isArray(parsed)) {
723
- return [];
724
- }
725
- return parsed.filter(isBootstrapMessage);
726
- } catch {
727
- return [];
728
- }
729
- }
732
+ function extractBootstrapMessageCandidate(value: unknown): AgentMessage | null {
733
+ const candidate =
734
+ value && typeof value === "object" && "message" in value
735
+ ? (value as { message?: unknown }).message
736
+ : value;
737
+ return isBootstrapMessage(candidate) ? candidate : null;
738
+ }
730
739
 
740
+ function parseBootstrapJsonl(raw: string, options?: {
741
+ strict?: boolean;
742
+ }): { messages: AgentMessage[]; sawNonWhitespace: boolean; hadMalformedLine: boolean } {
731
743
  const messages: AgentMessage[] = [];
732
744
  const lines = raw.split(/\r?\n/);
745
+ let sawNonWhitespace = false;
746
+ let hadMalformedLine = false;
733
747
  for (const line of lines) {
734
748
  const item = line.trim();
735
749
  if (!item) {
736
750
  continue;
737
751
  }
752
+ sawNonWhitespace = true;
738
753
  try {
739
754
  const parsed = JSON.parse(item);
740
- const candidate =
741
- parsed && typeof parsed === "object" && "message" in parsed
742
- ? (parsed as { message?: unknown }).message
743
- : parsed;
744
- if (isBootstrapMessage(candidate)) {
755
+ const candidate = extractBootstrapMessageCandidate(parsed);
756
+ if (candidate) {
745
757
  messages.push(candidate);
758
+ continue;
759
+ }
760
+ if (options?.strict) {
761
+ hadMalformedLine = true;
746
762
  }
747
763
  } catch {
748
- // Skip malformed lines.
764
+ if (options?.strict) {
765
+ hadMalformedLine = true;
766
+ }
767
+ }
768
+ }
769
+ return { messages, sawNonWhitespace, hadMalformedLine };
770
+ }
771
+
772
+ /** Load recoverable messages from a JSON/JSONL session file without full-file reads for JSONL. */
773
+ async function readLeafPathMessages(sessionFile: string): Promise<AgentMessage[]> {
774
+ try {
775
+ let sawNonWhitespace = false;
776
+ let jsonArrayMode = false;
777
+ let jsonArrayBuffer = "";
778
+ const messages: AgentMessage[] = [];
779
+ const stream = createReadStream(sessionFile, { encoding: "utf8" });
780
+ const lines = createInterface({
781
+ input: stream,
782
+ crlfDelay: Infinity,
783
+ });
784
+
785
+ for await (const line of lines) {
786
+ if (!sawNonWhitespace) {
787
+ const trimmed = line.trim();
788
+ if (trimmed) {
789
+ sawNonWhitespace = true;
790
+ if (trimmed.startsWith("[")) {
791
+ jsonArrayMode = true;
792
+ }
793
+ }
794
+ }
795
+
796
+ if (jsonArrayMode) {
797
+ jsonArrayBuffer += `${line}\n`;
798
+ continue;
799
+ }
800
+
801
+ const parsed = parseBootstrapJsonl(line);
802
+ if (parsed.messages.length > 0) {
803
+ messages.push(...parsed.messages);
804
+ }
805
+ }
806
+
807
+ if (jsonArrayMode) {
808
+ const trimmed = jsonArrayBuffer.trim();
809
+ if (!trimmed) {
810
+ return [];
811
+ }
812
+ try {
813
+ const parsed = JSON.parse(trimmed);
814
+ if (!Array.isArray(parsed)) {
815
+ return [];
816
+ }
817
+ return parsed.filter(isBootstrapMessage);
818
+ } catch {
819
+ return [];
820
+ }
821
+ }
822
+
823
+ return messages;
824
+ } catch {
825
+ return [];
826
+ }
827
+ }
828
+
829
+ function readFileSegment(sessionFile: string, offset: number): string | null {
830
+ let fd: number | null = null;
831
+ try {
832
+ fd = openSync(sessionFile, "r");
833
+ const stats = statSync(sessionFile);
834
+ const safeOffset = Math.max(0, Math.min(Math.floor(offset), stats.size));
835
+ const length = stats.size - safeOffset;
836
+ if (length <= 0) {
837
+ return "";
838
+ }
839
+ const buffer = Buffer.alloc(length);
840
+ readSync(fd, buffer, 0, length, safeOffset);
841
+ return buffer.toString("utf8");
842
+ } catch {
843
+ return null;
844
+ } finally {
845
+ if (fd != null) {
846
+ closeSync(fd);
749
847
  }
750
848
  }
751
- return messages;
849
+ }
850
+
851
+ function readLastJsonlEntryBeforeOffset(sessionFile: string, offset: number): string | null {
852
+ const chunkSize = 16_384;
853
+ let fd: number | null = null;
854
+ try {
855
+ const safeOffset = Math.max(0, Math.floor(offset));
856
+ if (safeOffset <= 0) {
857
+ return null;
858
+ }
859
+
860
+ fd = openSync(sessionFile, "r");
861
+ let cursor = safeOffset;
862
+ let carry = "";
863
+ while (cursor > 0) {
864
+ const start = Math.max(0, cursor - chunkSize);
865
+ const length = cursor - start;
866
+ const buffer = Buffer.alloc(length);
867
+ readSync(fd, buffer, 0, length, start);
868
+ carry = buffer.toString("utf8") + carry;
869
+
870
+ const trimmedEnd = carry.replace(/\s+$/u, "");
871
+ if (!trimmedEnd) {
872
+ cursor = start;
873
+ carry = "";
874
+ continue;
875
+ }
876
+
877
+ const newlineIndex = Math.max(trimmedEnd.lastIndexOf("\n"), trimmedEnd.lastIndexOf("\r"));
878
+ if (newlineIndex >= 0) {
879
+ const candidate = trimmedEnd.slice(newlineIndex + 1).trim();
880
+ if (candidate) {
881
+ return candidate;
882
+ }
883
+ carry = trimmedEnd.slice(0, newlineIndex);
884
+ cursor = start;
885
+ continue;
886
+ }
887
+
888
+ if (start === 0) {
889
+ return trimmedEnd.trim() || null;
890
+ }
891
+ cursor = start;
892
+ }
893
+ return null;
894
+ } catch {
895
+ return null;
896
+ } finally {
897
+ if (fd != null) {
898
+ closeSync(fd);
899
+ }
900
+ }
901
+ }
902
+
903
+ function readAppendedLeafPathMessages(params: {
904
+ sessionFile: string;
905
+ offset: number;
906
+ }): { messages: AgentMessage[]; canUseAppendOnly: boolean; sawNonWhitespace: boolean } {
907
+ const raw = readFileSegment(params.sessionFile, params.offset);
908
+ if (raw == null) {
909
+ return { messages: [], canUseAppendOnly: false, sawNonWhitespace: false };
910
+ }
911
+
912
+ const trimmed = raw.trim();
913
+ if (!trimmed) {
914
+ return { messages: [], canUseAppendOnly: true, sawNonWhitespace: false };
915
+ }
916
+
917
+ if (trimmed.startsWith("[")) {
918
+ return { messages: [], canUseAppendOnly: false, sawNonWhitespace: true };
919
+ }
920
+
921
+ const parsed = parseBootstrapJsonl(raw, { strict: true });
922
+ if (parsed.hadMalformedLine) {
923
+ return { messages: [], canUseAppendOnly: false, sawNonWhitespace: parsed.sawNonWhitespace };
924
+ }
925
+
926
+ return {
927
+ messages: parsed.messages,
928
+ canUseAppendOnly: true,
929
+ sawNonWhitespace: parsed.sawNonWhitespace,
930
+ };
931
+ }
932
+
933
+ function readBootstrapMessageFromJsonLine(line: string | null): AgentMessage | null {
934
+ if (!line) {
935
+ return null;
936
+ }
937
+ try {
938
+ return extractBootstrapMessageCandidate(JSON.parse(line));
939
+ } catch {
940
+ return null;
941
+ }
752
942
  }
753
943
 
754
944
  function messageIdentity(role: string, content: string): string {
@@ -777,7 +967,10 @@ export class LcmContextEngine implements ContextEngine {
777
967
  private readonly fts5Available: boolean;
778
968
  private readonly ignoreSessionPatterns: RegExp[];
779
969
  private readonly statelessSessionPatterns: RegExp[];
780
- private sessionOperationQueues = new Map<string, Promise<void>>();
970
+ private sessionOperationQueues = new Map<
971
+ string,
972
+ { promise: Promise<void>; refCount: number }
973
+ >();
781
974
  private largeFileTextSummarizerResolved = false;
782
975
  private largeFileTextSummarizer?: (prompt: string) => Promise<string | null>;
783
976
  private deps: LcmDependencies;
@@ -931,24 +1124,30 @@ export class LcmContextEngine implements ContextEngine {
931
1124
  * ingest/compaction races across runtime UUID recycling.
932
1125
  */
933
1126
  private async withSessionQueue<T>(queueKey: string, operation: () => Promise<T>): Promise<T> {
934
- const previous = this.sessionOperationQueues.get(queueKey) ?? Promise.resolve();
1127
+ const entry = this.sessionOperationQueues.get(queueKey);
1128
+ const previous = entry?.promise ?? Promise.resolve();
935
1129
  let releaseQueue: () => void = () => {};
936
1130
  const current = new Promise<void>((resolve) => {
937
1131
  releaseQueue = resolve;
938
1132
  });
939
1133
  const next = previous.catch(() => {}).then(() => current);
940
- this.sessionOperationQueues.set(queueKey, next);
1134
+
1135
+ if (entry) {
1136
+ entry.promise = next;
1137
+ entry.refCount++;
1138
+ } else {
1139
+ this.sessionOperationQueues.set(queueKey, { promise: next, refCount: 1 });
1140
+ }
941
1141
 
942
1142
  await previous.catch(() => {});
943
1143
  try {
944
1144
  return await operation();
945
1145
  } finally {
946
1146
  releaseQueue();
947
- void next.finally(() => {
948
- if (this.sessionOperationQueues.get(queueKey) === next) {
949
- this.sessionOperationQueues.delete(queueKey);
950
- }
951
- });
1147
+ const cur = this.sessionOperationQueues.get(queueKey);
1148
+ if (cur && --cur.refCount === 0) {
1149
+ this.sessionOperationQueues.delete(queueKey);
1150
+ }
952
1151
  }
953
1152
  }
954
1153
 
@@ -1023,10 +1222,13 @@ export class LcmContextEngine implements ContextEngine {
1023
1222
  private async resolveSummarize(params: {
1024
1223
  legacyParams?: Record<string, unknown>;
1025
1224
  customInstructions?: string;
1026
- }): Promise<(text: string, aggressive?: boolean) => Promise<string>> {
1225
+ }): Promise<{ summarize: (text: string, aggressive?: boolean) => Promise<string>; summaryModel: string }> {
1027
1226
  const lp = params.legacyParams ?? {};
1028
1227
  if (typeof lp.summarize === "function") {
1029
- return lp.summarize as (text: string, aggressive?: boolean) => Promise<string>;
1228
+ return {
1229
+ summarize: lp.summarize as (text: string, aggressive?: boolean) => Promise<string>,
1230
+ summaryModel: "unknown",
1231
+ };
1030
1232
  }
1031
1233
  try {
1032
1234
  const runtimeSummarizer = await createLcmSummarizeFromLegacyParams({
@@ -1035,14 +1237,14 @@ export class LcmContextEngine implements ContextEngine {
1035
1237
  customInstructions: params.customInstructions,
1036
1238
  });
1037
1239
  if (runtimeSummarizer) {
1038
- return runtimeSummarizer;
1240
+ return { summarize: runtimeSummarizer.fn, summaryModel: runtimeSummarizer.model };
1039
1241
  }
1040
1242
  console.error(`[lcm] resolveSummarize: createLcmSummarizeFromLegacyParams returned undefined`);
1041
1243
  } catch (err) {
1042
1244
  console.error(`[lcm] resolveSummarize failed, using emergency fallback:`, err instanceof Error ? err.message : err);
1043
1245
  }
1044
1246
  console.error(`[lcm] resolveSummarize: FALLING BACK TO EMERGENCY TRUNCATION`);
1045
- return createEmergencyFallbackSummarize();
1247
+ return { summarize: createEmergencyFallbackSummarize(), summaryModel: "unknown" };
1046
1248
  }
1047
1249
 
1048
1250
  /**
@@ -1066,16 +1268,16 @@ export class LcmContextEngine implements ContextEngine {
1066
1268
  }
1067
1269
 
1068
1270
  try {
1069
- const summarize = await createLcmSummarizeFromLegacyParams({
1271
+ const result = await createLcmSummarizeFromLegacyParams({
1070
1272
  deps: this.deps,
1071
1273
  legacyParams: { provider, model },
1072
1274
  });
1073
- if (!summarize) {
1275
+ if (!result) {
1074
1276
  return undefined;
1075
1277
  }
1076
1278
 
1077
1279
  this.largeFileTextSummarizer = async (prompt: string): Promise<string | null> => {
1078
- const summary = await summarize(prompt, false);
1280
+ const summary = await result.fn(prompt, false);
1079
1281
  if (typeof summary !== "string") {
1080
1282
  return null;
1081
1283
  }
@@ -1104,6 +1306,53 @@ export class LcmContextEngine implements ContextEngine {
1104
1306
  return filePath;
1105
1307
  }
1106
1308
 
1309
+ /** Persist a large text payload and return the resulting compact placeholder. */
1310
+ private async externalizeLargeTextPayload(params: {
1311
+ conversationId: number;
1312
+ content: string;
1313
+ fileName?: string;
1314
+ mimeType?: string;
1315
+ formatReference: (input: { fileId: string; byteSize: number; summary: string }) => string;
1316
+ }): Promise<{ fileId: string; byteSize: number; summary: string; reference: string }> {
1317
+ const summarizeText = await this.resolveLargeFileTextSummarizer();
1318
+ const fileId = `file_${randomUUID().replace(/-/g, "").slice(0, 16)}`;
1319
+ const extension = extensionFromNameOrMime(params.fileName, params.mimeType);
1320
+ const storageUri = await this.storeLargeFileContent({
1321
+ conversationId: params.conversationId,
1322
+ fileId,
1323
+ extension,
1324
+ content: params.content,
1325
+ });
1326
+ const byteSize = Buffer.byteLength(params.content, "utf8");
1327
+ const explorationSummary = await generateExplorationSummary({
1328
+ content: params.content,
1329
+ fileName: params.fileName,
1330
+ mimeType: params.mimeType,
1331
+ summarizeText,
1332
+ });
1333
+
1334
+ await this.summaryStore.insertLargeFile({
1335
+ fileId,
1336
+ conversationId: params.conversationId,
1337
+ fileName: params.fileName,
1338
+ mimeType: params.mimeType,
1339
+ byteSize,
1340
+ storageUri,
1341
+ explorationSummary,
1342
+ });
1343
+
1344
+ return {
1345
+ fileId,
1346
+ byteSize,
1347
+ summary: explorationSummary,
1348
+ reference: params.formatReference({
1349
+ fileId,
1350
+ byteSize,
1351
+ summary: explorationSummary,
1352
+ }),
1353
+ };
1354
+ }
1355
+
1107
1356
  /**
1108
1357
  * Intercept oversized <file> blocks before persistence and replace them with
1109
1358
  * compact file references backed by large_files records.
@@ -1118,7 +1367,6 @@ export class LcmContextEngine implements ContextEngine {
1118
1367
  }
1119
1368
 
1120
1369
  const threshold = Math.max(1, this.config.largeFileTokenThreshold);
1121
- const summarizeText = await this.resolveLargeFileTextSummarizer();
1122
1370
  const fileIds: string[] = [];
1123
1371
  const rewrittenSegments: string[] = [];
1124
1372
  let cursor = 0;
@@ -1131,44 +1379,25 @@ export class LcmContextEngine implements ContextEngine {
1131
1379
  }
1132
1380
 
1133
1381
  interceptedAny = true;
1134
- const fileId = `file_${randomUUID().replace(/-/g, "").slice(0, 16)}`;
1135
- const extension = extensionFromNameOrMime(block.fileName, block.mimeType);
1136
- const storageUri = await this.storeLargeFileContent({
1382
+ const externalized = await this.externalizeLargeTextPayload({
1137
1383
  conversationId: params.conversationId,
1138
- fileId,
1139
- extension,
1140
- content: block.text,
1141
- });
1142
- const byteSize = Buffer.byteLength(block.text, "utf8");
1143
- const explorationSummary = await generateExplorationSummary({
1144
1384
  content: block.text,
1145
1385
  fileName: block.fileName,
1146
1386
  mimeType: block.mimeType,
1147
- summarizeText,
1148
- });
1149
-
1150
- await this.summaryStore.insertLargeFile({
1151
- fileId,
1152
- conversationId: params.conversationId,
1153
- fileName: block.fileName,
1154
- mimeType: block.mimeType,
1155
- byteSize,
1156
- storageUri,
1157
- explorationSummary,
1387
+ formatReference: ({ fileId, byteSize, summary }) =>
1388
+ formatFileReference({
1389
+ fileId,
1390
+ fileName: block.fileName,
1391
+ mimeType: block.mimeType,
1392
+ byteSize,
1393
+ summary,
1394
+ }),
1158
1395
  });
1159
1396
 
1160
1397
  rewrittenSegments.push(params.content.slice(cursor, block.start));
1161
- rewrittenSegments.push(
1162
- formatFileReference({
1163
- fileId,
1164
- fileName: block.fileName,
1165
- mimeType: block.mimeType,
1166
- byteSize,
1167
- summary: explorationSummary,
1168
- }),
1169
- );
1398
+ rewrittenSegments.push(externalized.reference);
1170
1399
  cursor = block.end;
1171
- fileIds.push(fileId);
1400
+ fileIds.push(externalized.fileId);
1172
1401
  }
1173
1402
 
1174
1403
  if (!interceptedAny) {
@@ -1182,6 +1411,146 @@ export class LcmContextEngine implements ContextEngine {
1182
1411
  };
1183
1412
  }
1184
1413
 
1414
+ /** Externalize oversized textual tool outputs before they are persisted inline. */
1415
+ private async interceptLargeToolResults(params: {
1416
+ conversationId: number;
1417
+ message: AgentMessage;
1418
+ }): Promise<{ rewrittenMessage: AgentMessage; fileIds: string[] } | null> {
1419
+ if (
1420
+ (params.message.role !== "toolResult" && params.message.role !== "tool") ||
1421
+ !("content" in params.message)
1422
+ ) {
1423
+ return null;
1424
+ }
1425
+ if (!Array.isArray(params.message.content)) {
1426
+ return null;
1427
+ }
1428
+
1429
+ const threshold = Math.max(1, this.config.largeFileTokenThreshold);
1430
+ const rewrittenContent: unknown[] = [];
1431
+ const fileIds: string[] = [];
1432
+ let interceptedAny = false;
1433
+ const topLevel = params.message as Record<string, unknown>;
1434
+ const topLevelToolCallId =
1435
+ safeString(topLevel.toolCallId) ??
1436
+ safeString(topLevel.tool_call_id) ??
1437
+ safeString(topLevel.toolUseId) ??
1438
+ safeString(topLevel.tool_use_id) ??
1439
+ safeString(topLevel.call_id) ??
1440
+ safeString(topLevel.id);
1441
+ const topLevelToolName =
1442
+ safeString(topLevel.toolName) ??
1443
+ safeString(topLevel.tool_name);
1444
+ const topLevelIsError =
1445
+ safeBoolean(topLevel.isError) ??
1446
+ safeBoolean(topLevel.is_error);
1447
+
1448
+ for (const item of params.message.content) {
1449
+ if (!item || typeof item !== "object" || Array.isArray(item)) {
1450
+ rewrittenContent.push(item);
1451
+ continue;
1452
+ }
1453
+
1454
+ const record = item as Record<string, unknown>;
1455
+ const rawType = safeString(record.type);
1456
+ const isStructuredToolResult =
1457
+ rawType !== "tool_result" &&
1458
+ rawType !== "toolResult" &&
1459
+ rawType !== "function_call_output";
1460
+ const isPlainTextToolResult =
1461
+ rawType === "text" &&
1462
+ typeof record.text === "string";
1463
+ if (isStructuredToolResult && !isPlainTextToolResult) {
1464
+ rewrittenContent.push(item);
1465
+ continue;
1466
+ }
1467
+
1468
+ const textSource =
1469
+ isPlainTextToolResult
1470
+ ? record.text
1471
+ : record.output !== undefined
1472
+ ? record.output
1473
+ : record.content !== undefined
1474
+ ? record.content
1475
+ : record;
1476
+ const extractedText = extractStructuredText(textSource);
1477
+ if (typeof extractedText !== "string" || estimateTokens(extractedText) < threshold) {
1478
+ rewrittenContent.push(item);
1479
+ continue;
1480
+ }
1481
+
1482
+ interceptedAny = true;
1483
+ const toolName =
1484
+ safeString(record.name) ??
1485
+ topLevelToolName ??
1486
+ "tool-result";
1487
+ const externalized = await this.externalizeLargeTextPayload({
1488
+ conversationId: params.conversationId,
1489
+ content: extractedText,
1490
+ fileName: `${toolName}.txt`,
1491
+ mimeType: "text/plain",
1492
+ formatReference: ({ fileId, byteSize, summary }) =>
1493
+ formatToolOutputReference({
1494
+ fileId,
1495
+ toolName,
1496
+ byteSize,
1497
+ summary,
1498
+ }),
1499
+ });
1500
+
1501
+ const normalizedRawType =
1502
+ rawType === "function_call_output" ? "function_call_output" : "tool_result";
1503
+ const compactBlock: Record<string, unknown> = {
1504
+ type: normalizedRawType,
1505
+ output: externalized.reference,
1506
+ externalizedFileId: externalized.fileId,
1507
+ originalByteSize: externalized.byteSize,
1508
+ toolOutputExternalized: true,
1509
+ externalizationReason: "large_tool_result",
1510
+ };
1511
+ const callId =
1512
+ safeString(record.tool_use_id) ??
1513
+ safeString(record.toolUseId) ??
1514
+ safeString(record.tool_call_id) ??
1515
+ safeString(record.toolCallId) ??
1516
+ safeString(record.call_id) ??
1517
+ safeString(record.id) ??
1518
+ topLevelToolCallId;
1519
+ if (callId) {
1520
+ if (normalizedRawType === "function_call_output") {
1521
+ compactBlock.call_id = callId;
1522
+ } else {
1523
+ compactBlock.tool_use_id = callId;
1524
+ }
1525
+ }
1526
+ if (typeof record.is_error === "boolean") {
1527
+ compactBlock.is_error = record.is_error;
1528
+ } else if (typeof record.isError === "boolean") {
1529
+ compactBlock.isError = record.isError;
1530
+ } else if (typeof topLevelIsError === "boolean") {
1531
+ compactBlock.isError = topLevelIsError;
1532
+ }
1533
+ if (toolName) {
1534
+ compactBlock.name = toolName;
1535
+ }
1536
+
1537
+ rewrittenContent.push(compactBlock);
1538
+ fileIds.push(externalized.fileId);
1539
+ }
1540
+
1541
+ if (!interceptedAny) {
1542
+ return null;
1543
+ }
1544
+
1545
+ return {
1546
+ rewrittenMessage: {
1547
+ ...params.message,
1548
+ content: rewrittenContent,
1549
+ } as AgentMessage,
1550
+ fileIds,
1551
+ };
1552
+ }
1553
+
1185
1554
  // ── ContextEngine interface ─────────────────────────────────────────────
1186
1555
 
1187
1556
  /**
@@ -1315,93 +1684,221 @@ export class LcmContextEngine implements ContextEngine {
1315
1684
  };
1316
1685
  }
1317
1686
  this.ensureMigrated();
1687
+ const sessionFileStats = statSync(params.sessionFile);
1688
+ const sessionFileSize = sessionFileStats.size;
1689
+ const sessionFileMtimeMs = Math.trunc(sessionFileStats.mtimeMs);
1318
1690
 
1319
1691
  const result = await this.withSessionQueue(
1320
1692
  this.resolveSessionQueueKey(params.sessionId, params.sessionKey),
1321
1693
  async () =>
1322
1694
  this.conversationStore.withTransaction(async () => {
1695
+ const persistBootstrapState = async (
1696
+ conversationId: number,
1697
+ historicalMessages: AgentMessage[],
1698
+ ): Promise<void> => {
1699
+ const lastMessage =
1700
+ historicalMessages.length > 0
1701
+ ? toStoredMessage(historicalMessages[historicalMessages.length - 1]!)
1702
+ : null;
1703
+ await this.summaryStore.upsertConversationBootstrapState({
1704
+ conversationId,
1705
+ sessionFilePath: params.sessionFile,
1706
+ lastSeenSize: sessionFileSize,
1707
+ lastSeenMtimeMs: sessionFileMtimeMs,
1708
+ lastProcessedOffset: sessionFileSize,
1709
+ lastProcessedEntryHash: createBootstrapEntryHash(lastMessage),
1710
+ });
1711
+ };
1712
+
1323
1713
  const conversation = await this.conversationStore.getOrCreateConversation(params.sessionId, {
1324
1714
  sessionKey: params.sessionKey,
1325
1715
  });
1326
1716
  const conversationId = conversation.conversationId;
1327
- const historicalMessages = readLeafPathMessages(params.sessionFile);
1328
-
1329
- // First-time import path: no LCM rows yet, so seed directly from the
1330
- // active leaf context snapshot.
1331
- const existingCount = await this.conversationStore.getMessageCount(conversationId);
1332
- if (existingCount === 0) {
1333
- if (historicalMessages.length === 0) {
1334
- await this.conversationStore.markConversationBootstrapped(conversationId);
1717
+ const existingCount = await this.conversationStore.getMessageCount(conversationId);
1718
+ const bootstrapState =
1719
+ existingCount > 0
1720
+ ? await this.summaryStore.getConversationBootstrapState(conversationId)
1721
+ : null;
1722
+
1723
+ // If the transcript file is byte-for-byte unchanged from the last
1724
+ // successful bootstrap checkpoint, skip reopening and reparsing it.
1725
+ if (
1726
+ bootstrapState &&
1727
+ bootstrapState.sessionFilePath === params.sessionFile &&
1728
+ bootstrapState.lastSeenSize === sessionFileSize &&
1729
+ bootstrapState.lastSeenMtimeMs === sessionFileMtimeMs
1730
+ ) {
1731
+ if (!conversation.bootstrappedAt) {
1732
+ await this.conversationStore.markConversationBootstrapped(conversationId);
1733
+ }
1335
1734
  return {
1336
1735
  bootstrapped: false,
1337
1736
  importedMessages: 0,
1338
- reason: "no leaf-path messages in session",
1737
+ reason: conversation.bootstrappedAt ? "already bootstrapped" : "conversation already up to date",
1339
1738
  };
1340
1739
  }
1341
1740
 
1342
- const nextSeq = (await this.conversationStore.getMaxSeq(conversationId)) + 1;
1343
- const bulkInput = historicalMessages.map((message, index) => {
1344
- const stored = toStoredMessage(message);
1345
- return {
1346
- conversationId,
1347
- seq: nextSeq + index,
1348
- role: stored.role,
1349
- content: stored.content,
1350
- tokenCount: stored.tokenCount,
1351
- };
1352
- });
1741
+ if (
1742
+ existingCount > 0 &&
1743
+ bootstrapState &&
1744
+ bootstrapState.sessionFilePath === params.sessionFile &&
1745
+ sessionFileSize > bootstrapState.lastSeenSize &&
1746
+ sessionFileMtimeMs >= bootstrapState.lastSeenMtimeMs
1747
+ ) {
1748
+ const latestDbMessage = await this.conversationStore.getLastMessage(conversationId);
1749
+ const latestDbHash = latestDbMessage
1750
+ ? createBootstrapEntryHash({
1751
+ role: latestDbMessage.role,
1752
+ content: latestDbMessage.content,
1753
+ tokenCount: latestDbMessage.tokenCount,
1754
+ })
1755
+ : null;
1756
+ const tailEntryRaw = readLastJsonlEntryBeforeOffset(
1757
+ params.sessionFile,
1758
+ bootstrapState.lastProcessedOffset,
1759
+ );
1760
+ const tailEntryMessage = readBootstrapMessageFromJsonLine(tailEntryRaw);
1761
+ const tailEntryHash = tailEntryMessage
1762
+ ? createBootstrapEntryHash(toStoredMessage(tailEntryMessage))
1763
+ : null;
1764
+
1765
+ if (
1766
+ latestDbHash &&
1767
+ latestDbHash === bootstrapState.lastProcessedEntryHash &&
1768
+ tailEntryHash &&
1769
+ tailEntryHash === bootstrapState.lastProcessedEntryHash
1770
+ ) {
1771
+ const appended = readAppendedLeafPathMessages({
1772
+ sessionFile: params.sessionFile,
1773
+ offset: bootstrapState.lastProcessedOffset,
1774
+ });
1775
+ if (appended.canUseAppendOnly) {
1776
+ if (!conversation.bootstrappedAt) {
1777
+ await this.conversationStore.markConversationBootstrapped(conversationId);
1778
+ }
1779
+
1780
+ let importedMessages = 0;
1781
+ for (const message of appended.messages) {
1782
+ const ingestResult = await this.ingestSingle({
1783
+ sessionId: params.sessionId,
1784
+ sessionKey: params.sessionKey,
1785
+ message,
1786
+ });
1787
+ if (ingestResult.ingested) {
1788
+ importedMessages += 1;
1789
+ }
1790
+ }
1791
+
1792
+ const lastAppendedMessage =
1793
+ appended.messages.length > 0
1794
+ ? appended.messages[appended.messages.length - 1]!
1795
+ : tailEntryMessage;
1796
+ await persistBootstrapState(
1797
+ conversationId,
1798
+ lastAppendedMessage ? [lastAppendedMessage] : [],
1799
+ );
1800
+
1801
+ if (importedMessages > 0) {
1802
+ return {
1803
+ bootstrapped: true,
1804
+ importedMessages,
1805
+ reason: "reconciled missing session messages",
1806
+ };
1807
+ }
1808
+
1809
+ return {
1810
+ bootstrapped: false,
1811
+ importedMessages: 0,
1812
+ reason: conversation.bootstrappedAt ? "already bootstrapped" : "conversation already up to date",
1813
+ };
1814
+ }
1815
+ }
1816
+ }
1353
1817
 
1354
- const inserted = await this.conversationStore.createMessagesBulk(bulkInput);
1355
- await this.summaryStore.appendContextMessages(
1356
- conversationId,
1357
- inserted.map((record) => record.messageId),
1358
- );
1359
- await this.conversationStore.markConversationBootstrapped(conversationId);
1360
-
1361
- // Prune HEARTBEAT_OK turns from the freshly imported data
1362
- if (this.config.pruneHeartbeatOk) {
1363
- const pruned = await this.pruneHeartbeatOkTurns(conversationId);
1364
- if (pruned > 0) {
1365
- console.error(
1366
- `[lcm] bootstrap: pruned ${pruned} HEARTBEAT_OK messages from conversation ${conversationId}`,
1367
- );
1818
+ const historicalMessages = await readLeafPathMessages(params.sessionFile);
1819
+
1820
+ // First-time import path: no LCM rows yet, so seed directly from the
1821
+ // active leaf context snapshot.
1822
+ if (existingCount === 0) {
1823
+ if (historicalMessages.length === 0) {
1824
+ await this.conversationStore.markConversationBootstrapped(conversationId);
1825
+ await persistBootstrapState(conversationId, historicalMessages);
1826
+ return {
1827
+ bootstrapped: false,
1828
+ importedMessages: 0,
1829
+ reason: "no leaf-path messages in session",
1830
+ };
1368
1831
  }
1832
+
1833
+ const nextSeq = (await this.conversationStore.getMaxSeq(conversationId)) + 1;
1834
+ const bulkInput = historicalMessages.map((message, index) => {
1835
+ const stored = toStoredMessage(message);
1836
+ return {
1837
+ conversationId,
1838
+ seq: nextSeq + index,
1839
+ role: stored.role,
1840
+ content: stored.content,
1841
+ tokenCount: stored.tokenCount,
1842
+ };
1843
+ });
1844
+
1845
+ const inserted = await this.conversationStore.createMessagesBulk(bulkInput);
1846
+ await this.summaryStore.appendContextMessages(
1847
+ conversationId,
1848
+ inserted.map((record) => record.messageId),
1849
+ );
1850
+ await this.conversationStore.markConversationBootstrapped(conversationId);
1851
+ await persistBootstrapState(conversationId, historicalMessages);
1852
+
1853
+ // Prune HEARTBEAT_OK turns from the freshly imported data
1854
+ if (this.config.pruneHeartbeatOk) {
1855
+ const pruned = await this.pruneHeartbeatOkTurns(conversationId);
1856
+ if (pruned > 0) {
1857
+ console.error(
1858
+ `[lcm] bootstrap: pruned ${pruned} HEARTBEAT_OK messages from conversation ${conversationId}`,
1859
+ );
1860
+ }
1861
+ }
1862
+
1863
+ return {
1864
+ bootstrapped: true,
1865
+ importedMessages: inserted.length,
1866
+ };
1369
1867
  }
1370
1868
 
1371
- return {
1372
- bootstrapped: true,
1373
- importedMessages: inserted.length,
1374
- };
1375
- }
1869
+ // Existing conversation path: reconcile crash gaps by appending JSONL
1870
+ // messages that were never persisted to LCM.
1871
+ const reconcile = await this.reconcileSessionTail({
1872
+ sessionId: params.sessionId,
1873
+ sessionKey: params.sessionKey,
1874
+ conversationId,
1875
+ historicalMessages,
1876
+ });
1376
1877
 
1377
- // Existing conversation path: reconcile crash gaps by appending JSONL
1378
- // messages that were never persisted to LCM.
1379
- const reconcile = await this.reconcileSessionTail({
1380
- sessionId: params.sessionId,
1381
- sessionKey: params.sessionKey,
1382
- conversationId,
1383
- historicalMessages,
1384
- });
1878
+ if (!conversation.bootstrappedAt) {
1879
+ await this.conversationStore.markConversationBootstrapped(conversationId);
1880
+ }
1385
1881
 
1386
- if (!conversation.bootstrappedAt) {
1387
- await this.conversationStore.markConversationBootstrapped(conversationId);
1388
- }
1882
+ if (reconcile.importedMessages > 0) {
1883
+ await persistBootstrapState(conversationId, historicalMessages);
1884
+ return {
1885
+ bootstrapped: true,
1886
+ importedMessages: reconcile.importedMessages,
1887
+ reason: "reconciled missing session messages",
1888
+ };
1889
+ }
1389
1890
 
1390
- if (reconcile.importedMessages > 0) {
1391
- return {
1392
- bootstrapped: true,
1393
- importedMessages: reconcile.importedMessages,
1394
- reason: "reconciled missing session messages",
1395
- };
1396
- }
1891
+ if (reconcile.hasOverlap) {
1892
+ await persistBootstrapState(conversationId, historicalMessages);
1893
+ }
1397
1894
 
1398
- if (conversation.bootstrappedAt) {
1399
- return {
1400
- bootstrapped: false,
1401
- importedMessages: 0,
1402
- reason: "already bootstrapped",
1403
- };
1404
- }
1895
+ if (conversation.bootstrappedAt) {
1896
+ return {
1897
+ bootstrapped: false,
1898
+ importedMessages: 0,
1899
+ reason: "already bootstrapped",
1900
+ };
1901
+ }
1405
1902
 
1406
1903
  return {
1407
1904
  bootstrapped: false,
@@ -1474,6 +1971,17 @@ export class LcmContextEngine implements ContextEngine {
1474
1971
  } as AgentMessage;
1475
1972
  }
1476
1973
  }
1974
+ } else if (stored.role === "tool") {
1975
+ const intercepted = await this.interceptLargeToolResults({
1976
+ conversationId,
1977
+ message,
1978
+ });
1979
+ if (intercepted) {
1980
+ messageForParts = intercepted.rewrittenMessage;
1981
+ const rewrittenStored = toStoredMessage(intercepted.rewrittenMessage);
1982
+ stored.content = rewrittenStored.content;
1983
+ stored.tokenCount = rewrittenStored.tokenCount;
1984
+ }
1477
1985
  }
1478
1986
 
1479
1987
  // Determine next sequence number
@@ -1830,7 +2338,7 @@ export class LcmContextEngine implements ContextEngine {
1830
2338
  }
1831
2339
  ).currentTokenCount,
1832
2340
  );
1833
- const summarize = await this.resolveSummarize({
2341
+ const { summarize, summaryModel } = await this.resolveSummarize({
1834
2342
  legacyParams,
1835
2343
  customInstructions: params.customInstructions,
1836
2344
  });
@@ -1841,6 +2349,7 @@ export class LcmContextEngine implements ContextEngine {
1841
2349
  summarize,
1842
2350
  force: params.force,
1843
2351
  previousSummaryContent: params.previousSummaryContent,
2352
+ summaryModel,
1844
2353
  });
1845
2354
  const tokensBefore = observedTokens ?? leafResult.tokensBefore;
1846
2355
 
@@ -1934,7 +2443,7 @@ export class LcmContextEngine implements ContextEngine {
1934
2443
  };
1935
2444
  }
1936
2445
 
1937
- const summarize = await this.resolveSummarize({
2446
+ const { summarize, summaryModel } = await this.resolveSummarize({
1938
2447
  legacyParams,
1939
2448
  customInstructions: params.customInstructions,
1940
2449
  });
@@ -1977,6 +2486,7 @@ export class LcmContextEngine implements ContextEngine {
1977
2486
  summarize,
1978
2487
  force: forceCompaction,
1979
2488
  hardTrigger: false,
2489
+ summaryModel,
1980
2490
  });
1981
2491
 
1982
2492
  return {
@@ -2013,6 +2523,7 @@ export class LcmContextEngine implements ContextEngine {
2013
2523
  targetTokens: convergenceTargetTokens,
2014
2524
  ...(observedTokens !== undefined ? { currentTokens: observedTokens } : {}),
2015
2525
  summarize,
2526
+ summaryModel,
2016
2527
  });
2017
2528
  const didCompact = compactResult.rounds > 0;
2018
2529