npm - @martian-engineering/lossless-claw - Versions diffs - 0.4.0 → 0.5.1 - Mend

@martian-engineering/lossless-claw 0.4.0 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

package/package.json +2 -1
package/src/assembler.ts +37 -3
package/src/compaction.ts +83 -10
package/src/db/connection.ts +2 -0
package/src/db/migration.ts +84 -0
package/src/engine.ts +657 -146
package/src/large-files.ts +19 -0
package/src/plugin/index.ts +188 -28
package/src/store/conversation-store.ts +76 -10
package/src/store/full-text-fallback.ts +9 -0
package/src/store/index.ts +2 -0
package/src/store/summary-store.ts +130 -10
package/src/summarize.ts +209 -13
package/src/types.ts +9 -0

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@martian-engineering/lossless-claw",
-  "version": "0.4.0",
+  "version": "0.5.1",
   "description": "Lossless Context Management plugin for OpenClaw — DAG-based conversation summarization with incremental compaction",
   "type": "module",
   "main": "index.ts",
@@ -35,6 +35,7 @@
     "@sinclair/typebox": "0.34.48"
   },
   "devDependencies": {
+    "@changesets/changelog-github": "^0.6.0",
     "@changesets/cli": "^2.30.0",
     "typescript": "^5.7.0",
     "vitest": "^3.0.0"

package/src/assembler.ts CHANGED Viewed

@@ -238,9 +238,12 @@ export function toolCallBlockFromPart(part: MessagePartRecord, rawType?: string)
     return block;
   }
-  if (typeof part.toolCallId === "string" && part.toolCallId.length > 0) {
-    block.id = part.toolCallId;
-  }
+  // Always set id — downstream providers (e.g. Anthropic) call
+  // normalizeToolCallId(block.id) which crashes on undefined.
+  block.id =
+    typeof part.toolCallId === "string" && part.toolCallId.length > 0
+      ? part.toolCallId
+      : `toolu_lcm_${part.partId ?? "unknown"}`;
   if (typeof part.toolName === "string" && part.toolName.length > 0) {
     block.name = part.toolName;
   }
@@ -362,6 +365,37 @@ export function blockFromPart(part: MessagePartRecord): unknown {
     if (!isToolBlock) {
       return metadata.raw;
     }
+    // When tool blocks are routed through toolCallBlockFromPart (below) instead
+    // of returning raw directly, the function reads part.toolCallId / part.toolName
+    // from the DB columns.  For rows stored as part_type='text' those columns are
+    // often NULL — the values only live inside metadata.raw.  Backfill them here
+    // so the reconstructed block keeps the original id/name.
+    const rawRecord = metadata.raw as Record<string, unknown>;
+    const rawToolCallId =
+      typeof rawRecord.id === "string" && rawRecord.id.length > 0
+        ? rawRecord.id
+        : typeof rawRecord.call_id === "string" && rawRecord.call_id.length > 0
+          ? rawRecord.call_id
+          : undefined;
+    if (rawToolCallId) {
+      if (typeof part.toolCallId !== "string" || part.toolCallId.length === 0) {
+        part.toolCallId = rawToolCallId;
+      }
+    }
+    if (typeof rawRecord.name === "string" && rawRecord.name.length > 0) {
+      if (typeof part.toolName !== "string" || part.toolName.length === 0) {
+        part.toolName = rawRecord.name;
+      }
+    }
+    // Backfill toolInput from raw arguments/input so toolCallBlockFromPart
+    // can reconstruct the full block.
+    if (part.toolInput == null || part.toolInput === "") {
+      const rawArgs = rawRecord.arguments ?? rawRecord.input;
+      if (rawArgs !== undefined) {
+        part.toolInput = typeof rawArgs === "string" ? rawArgs : JSON.stringify(rawArgs);
+      }
+    }
   }
   if (part.partType === "reasoning") {

package/src/compaction.ts CHANGED Viewed

@@ -142,6 +142,14 @@ function generateSummaryId(content: string): string {
 /** Maximum characters for the deterministic fallback truncation (512 tokens * 4 chars). */
 const FALLBACK_MAX_CHARS = 512 * 4;
 const DEFAULT_LEAF_CHUNK_TOKENS = 20_000;
+/**
+ * Pattern matching MEDIA:/... file path references that appear in message content
+ * when the original message contained only a media attachment (image, file, etc.)
+ * with no meaningful text.
+ */
+const MEDIA_PATH_RE = /^MEDIA:\/.+$/;
 const CONDENSED_MIN_INPUT_RATIO = 0.1;
 function dedupeOrderedIds(ids: Iterable<string>): string[] {
@@ -231,6 +239,7 @@ export class CompactionEngine {
     summarize: CompactionSummarizeFn;
     force?: boolean;
     hardTrigger?: boolean;
+    summaryModel?: string;
   }): Promise<CompactionResult> {
     return this.compactFullSweep(input);
   }
@@ -246,6 +255,7 @@ export class CompactionEngine {
     summarize: CompactionSummarizeFn;
     force?: boolean;
     previousSummaryContent?: string;
+    summaryModel?: string;
   }): Promise<CompactionResult> {
     const { conversationId, tokenBudget, summarize, force } = input;
@@ -281,6 +291,7 @@ export class CompactionEngine {
       leafChunk.items,
       summarize,
       previousSummaryContent,
+      input.summaryModel,
     );
     if (!leafResult) {
       return {
@@ -322,6 +333,7 @@ export class CompactionEngine {
           chunk.items,
           targetDepth,
           summarize,
+          input.summaryModel,
         );
         if (!condenseResult) {
           break;
@@ -370,6 +382,7 @@ export class CompactionEngine {
     summarize: CompactionSummarizeFn;
     force?: boolean;
     hardTrigger?: boolean;
+    summaryModel?: string;
   }): Promise<CompactionResult> {
     const { conversationId, tokenBudget, summarize, force, hardTrigger } = input;
@@ -416,6 +429,7 @@ export class CompactionEngine {
         leafChunk.items,
         summarize,
         previousSummaryContent,
+        input.summaryModel,
       );
       if (!leafResult) {
         break;
@@ -461,6 +475,7 @@ export class CompactionEngine {
         candidate.chunk.items,
         candidate.targetDepth,
         summarize,
+        input.summaryModel,
       );
       if (!condenseResult) {
         break;
@@ -511,6 +526,7 @@ export class CompactionEngine {
     targetTokens?: number;
     currentTokens?: number;
     summarize: CompactionSummarizeFn;
+    summaryModel?: string;
   }): Promise<{ success: boolean; rounds: number; finalTokens: number }> {
     const { conversationId, tokenBudget, summarize } = input;
     const targetTokens =
@@ -542,6 +558,7 @@ export class CompactionEngine {
         tokenBudget,
         summarize,
         force: true,
+        summaryModel: input.summaryModel,
       });
       if (result.tokensAfter <= targetTokens) {
@@ -998,6 +1015,17 @@ export class CompactionEngine {
       };
     }
     const inputTokens = Math.max(1, estimateTokens(sourceText));
+    const buildDeterministicFallback = (): { content: string; level: CompactionLevel } => {
+      const truncated =
+        sourceText.length > FALLBACK_MAX_CHARS
+          ? sourceText.slice(0, FALLBACK_MAX_CHARS)
+          : sourceText;
+      return {
+        content: `${truncated}
+[Truncated from ${inputTokens} tokens]`,
+        level: "fallback",
+      };
+    };
     const runSummarizer = async (aggressiveMode: boolean): Promise<string | null> => {
       const output = await params.summarize(sourceText, aggressiveMode, params.options);
@@ -1007,7 +1035,9 @@ export class CompactionEngine {
     const initialSummary = await runSummarizer(false);
     if (initialSummary === null) {
-      return null;
+      // Empty provider output should still compact deterministically so auth
+      // failures or empty responses do not stall compaction entirely.
+      return buildDeterministicFallback();
     }
     let summaryText = initialSummary;
     let level: CompactionLevel = "normal";
@@ -1015,25 +1045,60 @@ export class CompactionEngine {
     if (estimateTokens(summaryText) >= inputTokens) {
       const aggressiveSummary = await runSummarizer(true);
       if (aggressiveSummary === null) {
-        return null;
+        return buildDeterministicFallback();
       }
       summaryText = aggressiveSummary;
       level = "aggressive";
       if (estimateTokens(summaryText) >= inputTokens) {
-        const truncated =
-          sourceText.length > FALLBACK_MAX_CHARS
-            ? sourceText.slice(0, FALLBACK_MAX_CHARS)
-            : sourceText;
-        summaryText = `${truncated}
-[Truncated from ${inputTokens} tokens]`;
-        level = "fallback";
+        return buildDeterministicFallback();
       }
     }
     return { content: summaryText, level };
   }
+  // ── Private: Media Annotation ────────────────────────────────────────────
+  /**
+   * Annotate a message's content with media context when it has file/media
+   * attachments. This gives the summarizer enough context to produce a
+   * meaningful summary instead of trying to compress raw file paths.
+   *
+   * - Media-only messages (just a file path, no text): content is replaced
+   *   with "[Media attachment]" or "[Image attachment]" etc.
+   * - Media-mostly messages (any real text + attachment): content is annotated
+   *   with " [with media attachment]" suffix.
+   * - Text-only messages: returned unchanged.
+   */
+  private async annotateMediaContent(
+    messageId: number,
+    content: string,
+  ): Promise<string> {
+    const parts = await this.conversationStore.getMessageParts(messageId);
+    const hasMediaParts = parts.some(
+      (p) => p.partType === "file" || p.partType === "snapshot",
+    );
+    if (!hasMediaParts) {
+      return content;
+    }
+    // Strip MEDIA:/... paths to see how much actual text remains
+    const textWithoutPaths = content
+      .split("\n")
+      .filter((line) => !MEDIA_PATH_RE.test(line.trim()))
+      .join("\n")
+      .trim();
+    if (textWithoutPaths.length === 0) {
+      // Media-only: replace with descriptive annotation
+      return "[Media attachment]";
+    }
+    // Media-mostly: keep the text, add annotation
+    return `${textWithoutPaths} [with media attachment]`;
+  }
   // ── Private: Leaf Pass ───────────────────────────────────────────────────
   /**
@@ -1044,6 +1109,7 @@ export class CompactionEngine {
     messageItems: ContextItemRecord[],
     summarize: CompactionSummarizeFn,
     previousSummaryContent?: string,
+    summaryModel?: string,
   ): Promise<{ summaryId: string; level: CompactionLevel; content: string } | null> {
     // Fetch full message content for each context item
     const messageContents: { messageId: number; content: string; createdAt: Date; tokenCount: number }[] =
@@ -1054,9 +1120,13 @@ export class CompactionEngine {
       }
       const msg = await this.conversationStore.getMessageById(item.messageId);
       if (msg) {
+        const annotatedContent = await this.annotateMediaContent(
+          msg.messageId,
+          msg.content,
+        );
         messageContents.push({
           messageId: msg.messageId,
-          content: msg.content,
+          content: annotatedContent,
           createdAt: msg.createdAt,
           tokenCount: this.resolveMessageTokenCount(msg),
         });
@@ -1110,6 +1180,7 @@ export class CompactionEngine {
         (sum, message) => sum + Math.max(0, Math.floor(message.tokenCount)),
         0,
       ),
+      model: summaryModel,
     });
     // Link to source messages
@@ -1141,6 +1212,7 @@ export class CompactionEngine {
     summaryItems: ContextItemRecord[],
     targetDepth: number,
     summarize: CompactionSummarizeFn,
+    summaryModel?: string,
   ): Promise<PassResult | null> {
     // Fetch full summary records
     const summaryRecords: SummaryRecord[] = [];
@@ -1242,6 +1314,7 @@ export class CompactionEngine {
             : 0;
         return count + sourceTokens;
       }, 0),
+      model: summaryModel,
     });
     // Link to parent summaries

package/src/db/connection.ts CHANGED Viewed

@@ -3,6 +3,7 @@ import { dirname, resolve } from "node:path";
 import { DatabaseSync } from "node:sqlite";
 type ConnectionKey = string;
+const SQLITE_BUSY_TIMEOUT_MS = 5_000;
 const connectionsByPath = new Map<ConnectionKey, Set<DatabaseSync>>();
 const connectionIndex = new Map<DatabaseSync, ConnectionKey>();
@@ -29,6 +30,7 @@ function ensureDbDirectory(dbPath: string): void {
 function configureConnection(db: DatabaseSync): DatabaseSync {
   db.exec("PRAGMA journal_mode = WAL");
+  db.exec(`PRAGMA busy_timeout = ${SQLITE_BUSY_TIMEOUT_MS}`);
   db.exec("PRAGMA foreign_keys = ON");
   return db;
 }

package/src/db/migration.ts CHANGED Viewed

@@ -80,6 +80,14 @@ function isoStringOrNull(value: Date | null): string | null {
   return value ? value.toISOString() : null;
 }
+function ensureSummaryModelColumn(db: DatabaseSync): void {
+  const summaryColumns = db.prepare(`PRAGMA table_info(summaries)`).all() as SummaryColumnInfo[];
+  const hasModel = summaryColumns.some((col) => col.name === "model");
+  if (!hasModel) {
+    db.exec(`ALTER TABLE summaries ADD COLUMN model TEXT NOT NULL DEFAULT 'unknown'`);
+  }
+}
 function backfillSummaryDepths(db: DatabaseSync): void {
   // Leaves are always depth 0, even if legacy rows had malformed values.
   db.exec(`UPDATE summaries SET depth = 0 WHERE kind = 'leaf'`);
@@ -355,6 +363,68 @@ function backfillSummaryMetadata(db: DatabaseSync): void {
   }
 }
+/**
+ * Backfill tool_call_id, tool_name, and tool_input from metadata JSON for rows
+ * where the DB columns are NULL but the values exist in metadata.  This covers
+ * legacy text-type parts where the string-content ingestion path stored tool
+ * info only in the metadata JSON (see #158).
+ */
+function backfillToolCallColumns(db: DatabaseSync): void {
+  db.exec(
+    `UPDATE message_parts
+     SET tool_call_id = COALESCE(
+       json_extract(metadata, '$.toolCallId'),
+       json_extract(metadata, '$.raw.id'),
+       json_extract(metadata, '$.raw.call_id'),
+       json_extract(metadata, '$.raw.toolCallId'),
+       json_extract(metadata, '$.raw.tool_call_id')
+     )
+     WHERE tool_call_id IS NULL
+       AND metadata IS NOT NULL
+       AND COALESCE(
+         json_extract(metadata, '$.toolCallId'),
+         json_extract(metadata, '$.raw.id'),
+         json_extract(metadata, '$.raw.call_id'),
+         json_extract(metadata, '$.raw.toolCallId'),
+         json_extract(metadata, '$.raw.tool_call_id')
+       ) IS NOT NULL`,
+  );
+  db.exec(
+    `UPDATE message_parts
+     SET tool_name = COALESCE(
+       json_extract(metadata, '$.toolName'),
+       json_extract(metadata, '$.raw.name'),
+       json_extract(metadata, '$.raw.toolName'),
+       json_extract(metadata, '$.raw.tool_name')
+     )
+     WHERE tool_name IS NULL
+       AND metadata IS NOT NULL
+       AND COALESCE(
+         json_extract(metadata, '$.toolName'),
+         json_extract(metadata, '$.raw.name'),
+         json_extract(metadata, '$.raw.toolName'),
+         json_extract(metadata, '$.raw.tool_name')
+       ) IS NOT NULL`,
+  );
+  db.exec(
+    `UPDATE message_parts
+     SET tool_input = COALESCE(
+       json_extract(metadata, '$.raw.input'),
+       json_extract(metadata, '$.raw.arguments'),
+       json_extract(metadata, '$.raw.toolInput')
+     )
+     WHERE tool_input IS NULL
+       AND metadata IS NOT NULL
+       AND COALESCE(
+         json_extract(metadata, '$.raw.input'),
+         json_extract(metadata, '$.raw.arguments'),
+         json_extract(metadata, '$.raw.toolInput')
+       ) IS NOT NULL`,
+  );
+}
 export function runLcmMigrations(
   db: DatabaseSync,
   options?: { fts5Available?: boolean },
@@ -474,6 +544,16 @@ export function runLcmMigrations(
       created_at TEXT NOT NULL DEFAULT (datetime('now'))
     );
+    CREATE TABLE IF NOT EXISTS conversation_bootstrap_state (
+      conversation_id INTEGER PRIMARY KEY REFERENCES conversations(conversation_id) ON DELETE CASCADE,
+      session_file_path TEXT NOT NULL,
+      last_seen_size INTEGER NOT NULL,
+      last_seen_mtime_ms INTEGER NOT NULL,
+      last_processed_offset INTEGER NOT NULL,
+      last_processed_entry_hash TEXT,
+      updated_at TEXT NOT NULL DEFAULT (datetime('now'))
+    );
     -- Indexes
     CREATE INDEX IF NOT EXISTS messages_conv_seq_idx ON messages (conversation_id, seq);
     CREATE INDEX IF NOT EXISTS summaries_conv_created_idx ON summaries (conversation_id, created_at);
@@ -481,6 +561,8 @@ export function runLcmMigrations(
     CREATE INDEX IF NOT EXISTS message_parts_type_idx ON message_parts (part_type);
     CREATE INDEX IF NOT EXISTS context_items_conv_idx ON context_items (conversation_id, ordinal);
     CREATE INDEX IF NOT EXISTS large_files_conv_idx ON large_files (conversation_id, created_at);
+    CREATE INDEX IF NOT EXISTS bootstrap_state_path_idx
+      ON conversation_bootstrap_state (session_file_path, updated_at);
   `);
   // Forward-compatible conversations migration for existing DBs.
@@ -500,8 +582,10 @@ export function runLcmMigrations(
   db.exec(`CREATE UNIQUE INDEX IF NOT EXISTS conversations_session_key_idx ON conversations (session_key)`);
   ensureSummaryDepthColumn(db);
   ensureSummaryMetadataColumns(db);
+  ensureSummaryModelColumn(db);
   backfillSummaryDepths(db);
   backfillSummaryMetadata(db);
+  backfillToolCallColumns(db);
   const fts5Available = options?.fts5Available ?? getLcmDbFeatures(db).fts5Available;
   if (!fts5Available) {