@martian-engineering/lossless-claw 0.4.0 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@martian-engineering/lossless-claw",
3
- "version": "0.4.0",
3
+ "version": "0.5.1",
4
4
  "description": "Lossless Context Management plugin for OpenClaw — DAG-based conversation summarization with incremental compaction",
5
5
  "type": "module",
6
6
  "main": "index.ts",
@@ -35,6 +35,7 @@
35
35
  "@sinclair/typebox": "0.34.48"
36
36
  },
37
37
  "devDependencies": {
38
+ "@changesets/changelog-github": "^0.6.0",
38
39
  "@changesets/cli": "^2.30.0",
39
40
  "typescript": "^5.7.0",
40
41
  "vitest": "^3.0.0"
package/src/assembler.ts CHANGED
@@ -238,9 +238,12 @@ export function toolCallBlockFromPart(part: MessagePartRecord, rawType?: string)
238
238
  return block;
239
239
  }
240
240
 
241
- if (typeof part.toolCallId === "string" && part.toolCallId.length > 0) {
242
- block.id = part.toolCallId;
243
- }
241
+ // Always set id downstream providers (e.g. Anthropic) call
242
+ // normalizeToolCallId(block.id) which crashes on undefined.
243
+ block.id =
244
+ typeof part.toolCallId === "string" && part.toolCallId.length > 0
245
+ ? part.toolCallId
246
+ : `toolu_lcm_${part.partId ?? "unknown"}`;
244
247
  if (typeof part.toolName === "string" && part.toolName.length > 0) {
245
248
  block.name = part.toolName;
246
249
  }
@@ -362,6 +365,37 @@ export function blockFromPart(part: MessagePartRecord): unknown {
362
365
  if (!isToolBlock) {
363
366
  return metadata.raw;
364
367
  }
368
+
369
+ // When tool blocks are routed through toolCallBlockFromPart (below) instead
370
+ // of returning raw directly, the function reads part.toolCallId / part.toolName
371
+ // from the DB columns. For rows stored as part_type='text' those columns are
372
+ // often NULL — the values only live inside metadata.raw. Backfill them here
373
+ // so the reconstructed block keeps the original id/name.
374
+ const rawRecord = metadata.raw as Record<string, unknown>;
375
+ const rawToolCallId =
376
+ typeof rawRecord.id === "string" && rawRecord.id.length > 0
377
+ ? rawRecord.id
378
+ : typeof rawRecord.call_id === "string" && rawRecord.call_id.length > 0
379
+ ? rawRecord.call_id
380
+ : undefined;
381
+ if (rawToolCallId) {
382
+ if (typeof part.toolCallId !== "string" || part.toolCallId.length === 0) {
383
+ part.toolCallId = rawToolCallId;
384
+ }
385
+ }
386
+ if (typeof rawRecord.name === "string" && rawRecord.name.length > 0) {
387
+ if (typeof part.toolName !== "string" || part.toolName.length === 0) {
388
+ part.toolName = rawRecord.name;
389
+ }
390
+ }
391
+ // Backfill toolInput from raw arguments/input so toolCallBlockFromPart
392
+ // can reconstruct the full block.
393
+ if (part.toolInput == null || part.toolInput === "") {
394
+ const rawArgs = rawRecord.arguments ?? rawRecord.input;
395
+ if (rawArgs !== undefined) {
396
+ part.toolInput = typeof rawArgs === "string" ? rawArgs : JSON.stringify(rawArgs);
397
+ }
398
+ }
365
399
  }
366
400
 
367
401
  if (part.partType === "reasoning") {
package/src/compaction.ts CHANGED
@@ -142,6 +142,14 @@ function generateSummaryId(content: string): string {
142
142
  /** Maximum characters for the deterministic fallback truncation (512 tokens * 4 chars). */
143
143
  const FALLBACK_MAX_CHARS = 512 * 4;
144
144
  const DEFAULT_LEAF_CHUNK_TOKENS = 20_000;
145
+
146
+ /**
147
+ * Pattern matching MEDIA:/... file path references that appear in message content
148
+ * when the original message contained only a media attachment (image, file, etc.)
149
+ * with no meaningful text.
150
+ */
151
+ const MEDIA_PATH_RE = /^MEDIA:\/.+$/;
152
+
145
153
  const CONDENSED_MIN_INPUT_RATIO = 0.1;
146
154
 
147
155
  function dedupeOrderedIds(ids: Iterable<string>): string[] {
@@ -231,6 +239,7 @@ export class CompactionEngine {
231
239
  summarize: CompactionSummarizeFn;
232
240
  force?: boolean;
233
241
  hardTrigger?: boolean;
242
+ summaryModel?: string;
234
243
  }): Promise<CompactionResult> {
235
244
  return this.compactFullSweep(input);
236
245
  }
@@ -246,6 +255,7 @@ export class CompactionEngine {
246
255
  summarize: CompactionSummarizeFn;
247
256
  force?: boolean;
248
257
  previousSummaryContent?: string;
258
+ summaryModel?: string;
249
259
  }): Promise<CompactionResult> {
250
260
  const { conversationId, tokenBudget, summarize, force } = input;
251
261
 
@@ -281,6 +291,7 @@ export class CompactionEngine {
281
291
  leafChunk.items,
282
292
  summarize,
283
293
  previousSummaryContent,
294
+ input.summaryModel,
284
295
  );
285
296
  if (!leafResult) {
286
297
  return {
@@ -322,6 +333,7 @@ export class CompactionEngine {
322
333
  chunk.items,
323
334
  targetDepth,
324
335
  summarize,
336
+ input.summaryModel,
325
337
  );
326
338
  if (!condenseResult) {
327
339
  break;
@@ -370,6 +382,7 @@ export class CompactionEngine {
370
382
  summarize: CompactionSummarizeFn;
371
383
  force?: boolean;
372
384
  hardTrigger?: boolean;
385
+ summaryModel?: string;
373
386
  }): Promise<CompactionResult> {
374
387
  const { conversationId, tokenBudget, summarize, force, hardTrigger } = input;
375
388
 
@@ -416,6 +429,7 @@ export class CompactionEngine {
416
429
  leafChunk.items,
417
430
  summarize,
418
431
  previousSummaryContent,
432
+ input.summaryModel,
419
433
  );
420
434
  if (!leafResult) {
421
435
  break;
@@ -461,6 +475,7 @@ export class CompactionEngine {
461
475
  candidate.chunk.items,
462
476
  candidate.targetDepth,
463
477
  summarize,
478
+ input.summaryModel,
464
479
  );
465
480
  if (!condenseResult) {
466
481
  break;
@@ -511,6 +526,7 @@ export class CompactionEngine {
511
526
  targetTokens?: number;
512
527
  currentTokens?: number;
513
528
  summarize: CompactionSummarizeFn;
529
+ summaryModel?: string;
514
530
  }): Promise<{ success: boolean; rounds: number; finalTokens: number }> {
515
531
  const { conversationId, tokenBudget, summarize } = input;
516
532
  const targetTokens =
@@ -542,6 +558,7 @@ export class CompactionEngine {
542
558
  tokenBudget,
543
559
  summarize,
544
560
  force: true,
561
+ summaryModel: input.summaryModel,
545
562
  });
546
563
 
547
564
  if (result.tokensAfter <= targetTokens) {
@@ -998,6 +1015,17 @@ export class CompactionEngine {
998
1015
  };
999
1016
  }
1000
1017
  const inputTokens = Math.max(1, estimateTokens(sourceText));
1018
+ const buildDeterministicFallback = (): { content: string; level: CompactionLevel } => {
1019
+ const truncated =
1020
+ sourceText.length > FALLBACK_MAX_CHARS
1021
+ ? sourceText.slice(0, FALLBACK_MAX_CHARS)
1022
+ : sourceText;
1023
+ return {
1024
+ content: `${truncated}
1025
+ [Truncated from ${inputTokens} tokens]`,
1026
+ level: "fallback",
1027
+ };
1028
+ };
1001
1029
 
1002
1030
  const runSummarizer = async (aggressiveMode: boolean): Promise<string | null> => {
1003
1031
  const output = await params.summarize(sourceText, aggressiveMode, params.options);
@@ -1007,7 +1035,9 @@ export class CompactionEngine {
1007
1035
 
1008
1036
  const initialSummary = await runSummarizer(false);
1009
1037
  if (initialSummary === null) {
1010
- return null;
1038
+ // Empty provider output should still compact deterministically so auth
1039
+ // failures or empty responses do not stall compaction entirely.
1040
+ return buildDeterministicFallback();
1011
1041
  }
1012
1042
  let summaryText = initialSummary;
1013
1043
  let level: CompactionLevel = "normal";
@@ -1015,25 +1045,60 @@ export class CompactionEngine {
1015
1045
  if (estimateTokens(summaryText) >= inputTokens) {
1016
1046
  const aggressiveSummary = await runSummarizer(true);
1017
1047
  if (aggressiveSummary === null) {
1018
- return null;
1048
+ return buildDeterministicFallback();
1019
1049
  }
1020
1050
  summaryText = aggressiveSummary;
1021
1051
  level = "aggressive";
1022
1052
 
1023
1053
  if (estimateTokens(summaryText) >= inputTokens) {
1024
- const truncated =
1025
- sourceText.length > FALLBACK_MAX_CHARS
1026
- ? sourceText.slice(0, FALLBACK_MAX_CHARS)
1027
- : sourceText;
1028
- summaryText = `${truncated}
1029
- [Truncated from ${inputTokens} tokens]`;
1030
- level = "fallback";
1054
+ return buildDeterministicFallback();
1031
1055
  }
1032
1056
  }
1033
1057
 
1034
1058
  return { content: summaryText, level };
1035
1059
  }
1036
1060
 
1061
+ // ── Private: Media Annotation ────────────────────────────────────────────
1062
+
1063
+ /**
1064
+ * Annotate a message's content with media context when it has file/media
1065
+ * attachments. This gives the summarizer enough context to produce a
1066
+ * meaningful summary instead of trying to compress raw file paths.
1067
+ *
1068
+ * - Media-only messages (just a file path, no text): content is replaced
1069
+ * with "[Media attachment]" or "[Image attachment]" etc.
1070
+ * - Media-mostly messages (any real text + attachment): content is annotated
1071
+ * with " [with media attachment]" suffix.
1072
+ * - Text-only messages: returned unchanged.
1073
+ */
1074
+ private async annotateMediaContent(
1075
+ messageId: number,
1076
+ content: string,
1077
+ ): Promise<string> {
1078
+ const parts = await this.conversationStore.getMessageParts(messageId);
1079
+ const hasMediaParts = parts.some(
1080
+ (p) => p.partType === "file" || p.partType === "snapshot",
1081
+ );
1082
+ if (!hasMediaParts) {
1083
+ return content;
1084
+ }
1085
+
1086
+ // Strip MEDIA:/... paths to see how much actual text remains
1087
+ const textWithoutPaths = content
1088
+ .split("\n")
1089
+ .filter((line) => !MEDIA_PATH_RE.test(line.trim()))
1090
+ .join("\n")
1091
+ .trim();
1092
+
1093
+ if (textWithoutPaths.length === 0) {
1094
+ // Media-only: replace with descriptive annotation
1095
+ return "[Media attachment]";
1096
+ }
1097
+
1098
+ // Media-mostly: keep the text, add annotation
1099
+ return `${textWithoutPaths} [with media attachment]`;
1100
+ }
1101
+
1037
1102
  // ── Private: Leaf Pass ───────────────────────────────────────────────────
1038
1103
 
1039
1104
  /**
@@ -1044,6 +1109,7 @@ export class CompactionEngine {
1044
1109
  messageItems: ContextItemRecord[],
1045
1110
  summarize: CompactionSummarizeFn,
1046
1111
  previousSummaryContent?: string,
1112
+ summaryModel?: string,
1047
1113
  ): Promise<{ summaryId: string; level: CompactionLevel; content: string } | null> {
1048
1114
  // Fetch full message content for each context item
1049
1115
  const messageContents: { messageId: number; content: string; createdAt: Date; tokenCount: number }[] =
@@ -1054,9 +1120,13 @@ export class CompactionEngine {
1054
1120
  }
1055
1121
  const msg = await this.conversationStore.getMessageById(item.messageId);
1056
1122
  if (msg) {
1123
+ const annotatedContent = await this.annotateMediaContent(
1124
+ msg.messageId,
1125
+ msg.content,
1126
+ );
1057
1127
  messageContents.push({
1058
1128
  messageId: msg.messageId,
1059
- content: msg.content,
1129
+ content: annotatedContent,
1060
1130
  createdAt: msg.createdAt,
1061
1131
  tokenCount: this.resolveMessageTokenCount(msg),
1062
1132
  });
@@ -1110,6 +1180,7 @@ export class CompactionEngine {
1110
1180
  (sum, message) => sum + Math.max(0, Math.floor(message.tokenCount)),
1111
1181
  0,
1112
1182
  ),
1183
+ model: summaryModel,
1113
1184
  });
1114
1185
 
1115
1186
  // Link to source messages
@@ -1141,6 +1212,7 @@ export class CompactionEngine {
1141
1212
  summaryItems: ContextItemRecord[],
1142
1213
  targetDepth: number,
1143
1214
  summarize: CompactionSummarizeFn,
1215
+ summaryModel?: string,
1144
1216
  ): Promise<PassResult | null> {
1145
1217
  // Fetch full summary records
1146
1218
  const summaryRecords: SummaryRecord[] = [];
@@ -1242,6 +1314,7 @@ export class CompactionEngine {
1242
1314
  : 0;
1243
1315
  return count + sourceTokens;
1244
1316
  }, 0),
1317
+ model: summaryModel,
1245
1318
  });
1246
1319
 
1247
1320
  // Link to parent summaries
@@ -3,6 +3,7 @@ import { dirname, resolve } from "node:path";
3
3
  import { DatabaseSync } from "node:sqlite";
4
4
 
5
5
  type ConnectionKey = string;
6
+ const SQLITE_BUSY_TIMEOUT_MS = 5_000;
6
7
 
7
8
  const connectionsByPath = new Map<ConnectionKey, Set<DatabaseSync>>();
8
9
  const connectionIndex = new Map<DatabaseSync, ConnectionKey>();
@@ -29,6 +30,7 @@ function ensureDbDirectory(dbPath: string): void {
29
30
 
30
31
  function configureConnection(db: DatabaseSync): DatabaseSync {
31
32
  db.exec("PRAGMA journal_mode = WAL");
33
+ db.exec(`PRAGMA busy_timeout = ${SQLITE_BUSY_TIMEOUT_MS}`);
32
34
  db.exec("PRAGMA foreign_keys = ON");
33
35
  return db;
34
36
  }
@@ -80,6 +80,14 @@ function isoStringOrNull(value: Date | null): string | null {
80
80
  return value ? value.toISOString() : null;
81
81
  }
82
82
 
83
+ function ensureSummaryModelColumn(db: DatabaseSync): void {
84
+ const summaryColumns = db.prepare(`PRAGMA table_info(summaries)`).all() as SummaryColumnInfo[];
85
+ const hasModel = summaryColumns.some((col) => col.name === "model");
86
+ if (!hasModel) {
87
+ db.exec(`ALTER TABLE summaries ADD COLUMN model TEXT NOT NULL DEFAULT 'unknown'`);
88
+ }
89
+ }
90
+
83
91
  function backfillSummaryDepths(db: DatabaseSync): void {
84
92
  // Leaves are always depth 0, even if legacy rows had malformed values.
85
93
  db.exec(`UPDATE summaries SET depth = 0 WHERE kind = 'leaf'`);
@@ -355,6 +363,68 @@ function backfillSummaryMetadata(db: DatabaseSync): void {
355
363
  }
356
364
  }
357
365
 
366
+ /**
367
+ * Backfill tool_call_id, tool_name, and tool_input from metadata JSON for rows
368
+ * where the DB columns are NULL but the values exist in metadata. This covers
369
+ * legacy text-type parts where the string-content ingestion path stored tool
370
+ * info only in the metadata JSON (see #158).
371
+ */
372
+ function backfillToolCallColumns(db: DatabaseSync): void {
373
+ db.exec(
374
+ `UPDATE message_parts
375
+ SET tool_call_id = COALESCE(
376
+ json_extract(metadata, '$.toolCallId'),
377
+ json_extract(metadata, '$.raw.id'),
378
+ json_extract(metadata, '$.raw.call_id'),
379
+ json_extract(metadata, '$.raw.toolCallId'),
380
+ json_extract(metadata, '$.raw.tool_call_id')
381
+ )
382
+ WHERE tool_call_id IS NULL
383
+ AND metadata IS NOT NULL
384
+ AND COALESCE(
385
+ json_extract(metadata, '$.toolCallId'),
386
+ json_extract(metadata, '$.raw.id'),
387
+ json_extract(metadata, '$.raw.call_id'),
388
+ json_extract(metadata, '$.raw.toolCallId'),
389
+ json_extract(metadata, '$.raw.tool_call_id')
390
+ ) IS NOT NULL`,
391
+ );
392
+
393
+ db.exec(
394
+ `UPDATE message_parts
395
+ SET tool_name = COALESCE(
396
+ json_extract(metadata, '$.toolName'),
397
+ json_extract(metadata, '$.raw.name'),
398
+ json_extract(metadata, '$.raw.toolName'),
399
+ json_extract(metadata, '$.raw.tool_name')
400
+ )
401
+ WHERE tool_name IS NULL
402
+ AND metadata IS NOT NULL
403
+ AND COALESCE(
404
+ json_extract(metadata, '$.toolName'),
405
+ json_extract(metadata, '$.raw.name'),
406
+ json_extract(metadata, '$.raw.toolName'),
407
+ json_extract(metadata, '$.raw.tool_name')
408
+ ) IS NOT NULL`,
409
+ );
410
+
411
+ db.exec(
412
+ `UPDATE message_parts
413
+ SET tool_input = COALESCE(
414
+ json_extract(metadata, '$.raw.input'),
415
+ json_extract(metadata, '$.raw.arguments'),
416
+ json_extract(metadata, '$.raw.toolInput')
417
+ )
418
+ WHERE tool_input IS NULL
419
+ AND metadata IS NOT NULL
420
+ AND COALESCE(
421
+ json_extract(metadata, '$.raw.input'),
422
+ json_extract(metadata, '$.raw.arguments'),
423
+ json_extract(metadata, '$.raw.toolInput')
424
+ ) IS NOT NULL`,
425
+ );
426
+ }
427
+
358
428
  export function runLcmMigrations(
359
429
  db: DatabaseSync,
360
430
  options?: { fts5Available?: boolean },
@@ -474,6 +544,16 @@ export function runLcmMigrations(
474
544
  created_at TEXT NOT NULL DEFAULT (datetime('now'))
475
545
  );
476
546
 
547
+ CREATE TABLE IF NOT EXISTS conversation_bootstrap_state (
548
+ conversation_id INTEGER PRIMARY KEY REFERENCES conversations(conversation_id) ON DELETE CASCADE,
549
+ session_file_path TEXT NOT NULL,
550
+ last_seen_size INTEGER NOT NULL,
551
+ last_seen_mtime_ms INTEGER NOT NULL,
552
+ last_processed_offset INTEGER NOT NULL,
553
+ last_processed_entry_hash TEXT,
554
+ updated_at TEXT NOT NULL DEFAULT (datetime('now'))
555
+ );
556
+
477
557
  -- Indexes
478
558
  CREATE INDEX IF NOT EXISTS messages_conv_seq_idx ON messages (conversation_id, seq);
479
559
  CREATE INDEX IF NOT EXISTS summaries_conv_created_idx ON summaries (conversation_id, created_at);
@@ -481,6 +561,8 @@ export function runLcmMigrations(
481
561
  CREATE INDEX IF NOT EXISTS message_parts_type_idx ON message_parts (part_type);
482
562
  CREATE INDEX IF NOT EXISTS context_items_conv_idx ON context_items (conversation_id, ordinal);
483
563
  CREATE INDEX IF NOT EXISTS large_files_conv_idx ON large_files (conversation_id, created_at);
564
+ CREATE INDEX IF NOT EXISTS bootstrap_state_path_idx
565
+ ON conversation_bootstrap_state (session_file_path, updated_at);
484
566
  `);
485
567
 
486
568
  // Forward-compatible conversations migration for existing DBs.
@@ -500,8 +582,10 @@ export function runLcmMigrations(
500
582
  db.exec(`CREATE UNIQUE INDEX IF NOT EXISTS conversations_session_key_idx ON conversations (session_key)`);
501
583
  ensureSummaryDepthColumn(db);
502
584
  ensureSummaryMetadataColumns(db);
585
+ ensureSummaryModelColumn(db);
503
586
  backfillSummaryDepths(db);
504
587
  backfillSummaryMetadata(db);
588
+ backfillToolCallColumns(db);
505
589
 
506
590
  const fts5Available = options?.fts5Available ?? getLcmDbFeatures(db).fts5Available;
507
591
  if (!fts5Available) {