npm - @martian-engineering/lossless-claw - Versions diffs - 0.5.2 → 0.6.0 - Mend

@martian-engineering/lossless-claw 0.5.2 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

package/README.md +49 -11
package/docs/configuration.md +44 -0
package/openclaw.plugin.json +114 -0
package/package.json +2 -1
package/skills/lossless-claw/SKILL.md +33 -0
package/skills/lossless-claw/references/architecture.md +52 -0
package/skills/lossless-claw/references/config.md +263 -0
package/skills/lossless-claw/references/diagnostics.md +79 -0
package/skills/lossless-claw/references/recall-tools.md +55 -0
package/skills/lossless-claw/references/session-lifecycle.md +59 -0
package/src/assembler.ts +321 -34
package/src/compaction.ts +220 -19
package/src/db/config.ts +74 -21
package/src/db/migration.ts +50 -13
package/src/engine.ts +742 -133
package/src/plugin/index.ts +156 -73
package/src/plugin/lcm-command.ts +759 -0
package/src/plugin/lcm-doctor-apply.ts +546 -0
package/src/plugin/lcm-doctor-shared.ts +210 -0
package/src/store/conversation-store.ts +60 -21
package/src/store/parse-utc-timestamp.ts +25 -0
package/src/store/summary-store.ts +460 -11
package/src/summarize.ts +553 -224
package/src/tools/lcm-expand-query-tool.ts +195 -59
package/src/tools/lcm-expansion-recursion-guard.ts +87 -0
package/src/types.ts +1 -0

package/src/store/summary-store.ts CHANGED Viewed

@@ -1,6 +1,7 @@
 import type { DatabaseSync } from "node:sqlite";
 import { sanitizeFts5Query } from "./fts5-sanitize.js";
 import { buildLikeSearchPlan, containsCjk, createFallbackSnippet } from "./full-text-fallback.js";
+import { parseUtcTimestamp, parseUtcTimestampOrNull } from "./parse-utc-timestamp.js";
 export type SummaryKind = "leaf" | "condensed";
 export type ContextItemType = "message" | "summary";
@@ -45,6 +46,11 @@ export type SummarySubtreeNodeRecord = SummaryRecord & {
   childCount: number;
 };
+export type MessageLeafSummaryLinkRecord = {
+  messageId: number;
+  summaryId: string;
+};
 export type ContextItemRecord = {
   conversationId: number;
   ordinal: number;
@@ -112,6 +118,16 @@ export type ConversationBootstrapStateRecord = {
   updatedAt: Date;
 };
+export type TranscriptGcCandidateRecord = {
+  messageId: number;
+  conversationId: number;
+  seq: number;
+  toolCallId: string;
+  toolName: string | null;
+  externalizedFileId: string | null;
+  originalByteSize: number | null;
+};
 // ── DB row shapes (snake_case) ────────────────────────────────────────────────
 interface SummaryRow {
@@ -172,6 +188,15 @@ interface MessageIdRow {
   message_id: number;
 }
+interface MaxDepthRow {
+  max_depth: number | null;
+}
+interface MessageLeafSummaryLinkRow {
+  message_id: number;
+  summary_id: string;
+}
 interface LargeFileRow {
   file_id: string;
   conversation_id: number;
@@ -193,6 +218,17 @@ interface ConversationBootstrapStateRow {
   updated_at: string;
 }
+const CJK_QUERY_SEGMENT_RE =
+  /[\u2E80-\u9FFF\u3400-\u4DBF\uF900-\uFAFF\uAC00-\uD7AF\u3040-\u309F\u30A0-\u30FF]+/g;
+const LATIN_QUERY_TOKEN_RE = /[a-zA-Z0-9][\w./-]*/g;
+interface TranscriptGcCandidateRow {
+  message_id: number;
+  conversation_id: number;
+  seq: number;
+  tool_call_id: string | null;
+  tool_name: string | null;
+  metadata: string | null;
+}
 // ── Row mappers ───────────────────────────────────────────────────────────────
 function toSummaryRecord(row: SummaryRow): SummaryRecord {
@@ -210,8 +246,8 @@ function toSummaryRecord(row: SummaryRow): SummaryRecord {
     content: row.content,
     tokenCount: row.token_count,
     fileIds,
-    earliestAt: row.earliest_at ? new Date(row.earliest_at) : null,
-    latestAt: row.latest_at ? new Date(row.latest_at) : null,
+    earliestAt: parseUtcTimestampOrNull(row.earliest_at),
+    latestAt: parseUtcTimestampOrNull(row.latest_at),
     descendantCount:
       typeof row.descendant_count === "number" &&
       Number.isFinite(row.descendant_count) &&
@@ -231,7 +267,7 @@ function toSummaryRecord(row: SummaryRow): SummaryRecord {
         ? Math.floor(row.source_message_token_count)
         : 0,
     model: typeof row.model === "string" ? row.model : "unknown",
-    createdAt: new Date(row.created_at),
+    createdAt: parseUtcTimestamp(row.created_at),
   };
 }
@@ -242,7 +278,7 @@ function toContextItemRecord(row: ContextItemRow): ContextItemRecord {
     itemType: row.item_type,
     messageId: row.message_id,
     summaryId: row.summary_id,
-    createdAt: new Date(row.created_at),
+    createdAt: parseUtcTimestamp(row.created_at),
   };
 }
@@ -252,7 +288,7 @@ function toSearchResult(row: SummarySearchRow): SummarySearchResult {
     conversationId: row.conversation_id,
     kind: row.kind,
     snippet: row.snippet,
-    createdAt: new Date(row.created_at),
+    createdAt: parseUtcTimestamp(row.created_at),
     rank: row.rank,
   };
 }
@@ -266,7 +302,7 @@ function toLargeFileRecord(row: LargeFileRow): LargeFileRecord {
     byteSize: row.byte_size,
     storageUri: row.storage_uri,
     explorationSummary: row.exploration_summary,
-    createdAt: new Date(row.created_at),
+    createdAt: parseUtcTimestamp(row.created_at),
   };
 }
@@ -280,7 +316,43 @@ function toConversationBootstrapStateRecord(
     lastSeenMtimeMs: row.last_seen_mtime_ms,
     lastProcessedOffset: row.last_processed_offset,
     lastProcessedEntryHash: row.last_processed_entry_hash,
-    updatedAt: new Date(row.updated_at),
+    updatedAt: parseUtcTimestamp(row.updated_at),
+  };
+}
+function toTranscriptGcCandidateRecord(
+  row: TranscriptGcCandidateRow,
+): TranscriptGcCandidateRecord | null {
+  if (typeof row.tool_call_id !== "string" || row.tool_call_id.length === 0) {
+    return null;
+  }
+  let metadata: Record<string, unknown> | null = null;
+  try {
+    metadata =
+      typeof row.metadata === "string" && row.metadata.length > 0
+        ? (JSON.parse(row.metadata) as Record<string, unknown>)
+        : null;
+  } catch {
+    metadata = null;
+  }
+  if (!metadata || metadata.toolOutputExternalized !== true) {
+    return null;
+  }
+  return {
+    messageId: row.message_id,
+    conversationId: row.conversation_id,
+    seq: row.seq,
+    toolCallId: row.tool_call_id,
+    toolName: row.tool_name,
+    externalizedFileId:
+      typeof metadata.externalizedFileId === "string" ? metadata.externalizedFileId : null,
+    originalByteSize:
+      typeof metadata.originalByteSize === "number" && Number.isFinite(metadata.originalByteSize)
+        ? Math.max(0, Math.floor(metadata.originalByteSize))
+        : null,
   };
 }
@@ -386,6 +458,17 @@ export class SummaryStore {
       // compaction and assembly will still work correctly.
     }
+    // Also index into the CJK trigram FTS table for CJK substring search.
+    try {
+      this.db
+        .prepare(
+          `INSERT INTO summaries_fts_cjk(summary_id, content) VALUES (?, ?)`,
+        )
+        .run(input.summaryId, input.content);
+    } catch {
+      // CJK trigram FTS table may not exist yet (pre-migration); ignore.
+    }
     return toSummaryRecord(row);
   }
@@ -460,6 +543,136 @@ export class SummaryStore {
     return rows.map((r) => r.message_id);
   }
+  /**
+   * Return the deepest persisted summary depth for a conversation.
+   */
+  async getConversationMaxSummaryDepth(conversationId: number): Promise<number | null> {
+    const row = this.db
+      .prepare(
+        `SELECT MAX(depth) AS max_depth
+         FROM summaries
+         WHERE conversation_id = ?`,
+      )
+      .get(conversationId) as unknown as MaxDepthRow | undefined;
+    return typeof row?.max_depth === "number" ? row.max_depth : null;
+  }
+  /**
+   * Resolve raw message hits back to their linked leaf summaries.
+   */
+  async getLeafSummaryLinksForMessageIds(
+    conversationId: number,
+    messageIds: number[],
+  ): Promise<MessageLeafSummaryLinkRecord[]> {
+    const normalizedMessageIds = Array.from(
+      new Set(
+        messageIds.filter(
+          (messageId): messageId is number => Number.isInteger(messageId) && messageId > 0,
+        ),
+      ),
+    );
+    if (normalizedMessageIds.length === 0) {
+      return [];
+    }
+    const placeholders = normalizedMessageIds.map(() => "?").join(", ");
+    const rows = this.db
+      .prepare(
+        `SELECT sm.message_id, sm.summary_id
+         FROM summary_messages sm
+         JOIN summaries s ON s.summary_id = sm.summary_id
+         WHERE s.conversation_id = ?
+           AND s.kind = 'leaf'
+           AND sm.message_id IN (${placeholders})
+         ORDER BY sm.ordinal ASC, s.created_at ASC`,
+      )
+      .all(conversationId, ...normalizedMessageIds) as unknown as MessageLeafSummaryLinkRow[];
+    const summaryIdsByMessageId = new Map<number, string[]>();
+    for (const row of rows) {
+      const existing = summaryIdsByMessageId.get(row.message_id) ?? [];
+      if (!existing.includes(row.summary_id)) {
+        existing.push(row.summary_id);
+        summaryIdsByMessageId.set(row.message_id, existing);
+      }
+    }
+    const orderedLinks: MessageLeafSummaryLinkRecord[] = [];
+    for (const messageId of normalizedMessageIds) {
+      for (const summaryId of summaryIdsByMessageId.get(messageId) ?? []) {
+        orderedLinks.push({
+          messageId,
+          summaryId,
+        });
+      }
+    }
+    return orderedLinks;
+  }
+  /**
+   * Return summarized tool-result messages that are safe candidates for
+   * transcript GC because they are no longer present as raw context items.
+   */
+  async listTranscriptGcCandidates(
+    conversationId: number,
+    options?: { limit?: number },
+  ): Promise<TranscriptGcCandidateRecord[]> {
+    const limit =
+      typeof options?.limit === "number" && Number.isFinite(options.limit) && options.limit > 0
+        ? Math.max(1, Math.floor(options.limit))
+        : 25;
+    const rows = this.db
+      .prepare(
+        `SELECT
+           m.message_id,
+           m.conversation_id,
+           m.seq,
+           mp.tool_call_id,
+           mp.tool_name,
+           mp.metadata
+         FROM messages m
+         JOIN message_parts mp
+           ON mp.message_id = m.message_id
+         WHERE m.conversation_id = ?
+           AND m.role = 'tool'
+           AND mp.part_type = 'tool'
+           AND mp.tool_call_id IS NOT NULL
+           AND mp.tool_call_id != ''
+           AND EXISTS (
+             SELECT 1
+             FROM summary_messages sm
+             WHERE sm.message_id = m.message_id
+           )
+           AND NOT EXISTS (
+             SELECT 1
+             FROM context_items ci
+             WHERE ci.conversation_id = m.conversation_id
+               AND ci.item_type = 'message'
+               AND ci.message_id = m.message_id
+           )
+         ORDER BY m.seq ASC, mp.ordinal ASC`,
+      )
+      .all(conversationId) as unknown as TranscriptGcCandidateRow[];
+    const seenMessageIds = new Set<number>();
+    const candidates: TranscriptGcCandidateRecord[] = [];
+    for (const row of rows) {
+      if (seenMessageIds.has(row.message_id)) {
+        continue;
+      }
+      const candidate = toTranscriptGcCandidateRecord(row);
+      if (!candidate) {
+        continue;
+      }
+      seenMessageIds.add(candidate.messageId);
+      candidates.push(candidate);
+      if (candidates.length >= limit) {
+        break;
+      }
+    }
+    return candidates;
+  }
   async getSummaryChildren(parentSummaryId: string): Promise<SummaryRecord[]> {
     const rows = this.db
       .prepare(
@@ -607,6 +820,45 @@ export class SummaryStore {
     return rows.map((row) => row.depth);
   }
+  async pruneForNewSession(conversationId: number, retainDepth: number): Promise<void> {
+    if (Number.isFinite(retainDepth) && retainDepth < 0) {
+      return;
+    }
+    this.db
+      .prepare(
+        `DELETE FROM context_items
+       WHERE conversation_id = ?
+         AND item_type = 'message'`,
+      )
+      .run(conversationId);
+    if (!Number.isFinite(retainDepth)) {
+      this.db
+        .prepare(
+          `DELETE FROM context_items
+         WHERE conversation_id = ?
+           AND item_type = 'summary'`,
+        )
+        .run(conversationId);
+      return;
+    }
+    this.db
+      .prepare(
+        `DELETE FROM context_items
+       WHERE conversation_id = ?
+         AND item_type = 'summary'
+         AND summary_id IN (
+           SELECT summary_id
+           FROM summaries
+           WHERE conversation_id = ?
+             AND depth < ?
+         )`,
+      )
+      .run(conversationId, conversationId, Math.floor(retainDepth));
+  }
   async appendContextMessage(conversationId: number, messageId: number): Promise<void> {
     const row = this.db
       .prepare(
@@ -750,10 +1002,30 @@ export class SummaryStore {
     const limit = input.limit ?? 50;
     if (input.mode === "full_text") {
-      // FTS5 unicode61 can return incomplete matches for CJK text, so route
-      // those queries through the existing LIKE fallback path immediately.
+      // FTS5 unicode61 cannot segment CJK ideographs, so CJK queries route
+      // through the trigram FTS table first, then fall back to LIKE with OR
+      // semantics (instead of the original AND logic which fails when the
+      // user's phrasing doesn't exactly match the summary text).
       if (containsCjk(input.query)) {
-        return this.searchLike(
+        const cjkSegments = this.extractCjkSegments(input.query);
+        const hasShortCjkSegment = cjkSegments.some((segment) => segment.length < 3);
+        if (!hasShortCjkSegment) {
+          try {
+            const trigramResults = this.searchCjkTrigram(
+              input.query,
+              limit,
+              input.conversationId,
+              input.since,
+              input.before,
+            );
+            if (trigramResults.length > 0) {
+              return trigramResults;
+            }
+          } catch {
+            // trigram table may not exist; fall through to LIKE OR
+          }
+        }
+        return this.searchLikeCjk(
           input.query,
           limit,
           input.conversationId,
@@ -870,6 +1142,183 @@ export class SummaryStore {
       conversationId: row.conversation_id,
       kind: row.kind,
       snippet: createFallbackSnippet(row.content, plan.terms),
+      createdAt: parseUtcTimestamp(row.created_at),
+      rank: 0,
+    }));
+  }
+  private extractCjkSegments(query: string): string[] {
+    return query.match(CJK_QUERY_SEGMENT_RE) ?? [];
+  }
+  private extractLatinTokens(query: string): string[] {
+    const tokens = query.match(LATIN_QUERY_TOKEN_RE) ?? [];
+    return [...new Set(tokens.map((token) => token.toLowerCase()))];
+  }
+  private escapeLikeTerm(term: string): string {
+    return term.replace(/([\\%_])/g, "\\$1");
+  }
+  // ── CJK trigram FTS search ──────────────────────────────────────────────
+  // Each CJK segment of 3+ chars is split into overlapping 4-char chunks for
+  // trigram MATCH with OR semantics within the segment. Segment groups are
+  // combined with AND, and Latin tokens are applied as LIKE filters so mixed
+  // queries still require every part of the user's intent.
+  /**
+   * Split a CJK string into overlapping chunks of `size` characters.
+   * E.g. "端到端测试结果" with size=4 →
+   *   ["端到端测", "到端测试", "端测试结", "测试结果"]
+   */
+  private splitCjkChunks(text: string, size: number): string[] {
+    const chunks: string[] = [];
+    for (let i = 0; i <= text.length - size; i++) {
+      const chunk = text.slice(i, i + size);
+      if (!chunks.includes(chunk)) {
+        chunks.push(chunk);
+      }
+    }
+    return chunks;
+  }
+  private searchCjkTrigram(
+    query: string,
+    limit: number,
+    conversationId?: number,
+    since?: Date,
+    before?: Date,
+  ): SummarySearchResult[] {
+    const cjkSegments = this.extractCjkSegments(query).filter((segment) => segment.length >= 3);
+    if (cjkSegments.length === 0) {
+      return [];
+    }
+    const latinTokens = this.extractLatinTokens(query);
+    // Build one OR group per CJK segment, then require every segment group and
+    // every Latin token to match so mixed queries preserve full-intent search.
+    const cjkGroups: string[] = [];
+    for (const segment of cjkSegments) {
+      const segmentTerms =
+        segment.length <= 4 ? [segment] : this.splitCjkChunks(segment, 4);
+      const groupExpr = [...new Set(segmentTerms)]
+        .map((term) => `"${term.replace(/"/g, '""')}"`)
+        .join(" OR ");
+      cjkGroups.push(`(${groupExpr})`);
+    }
+    const where: string[] = ["summaries_fts_cjk MATCH ?"];
+    const args: Array<string | number> = [cjkGroups.join(" AND ")];
+    for (const token of latinTokens) {
+      where.push("LOWER(s.content) LIKE ? ESCAPE '\\'");
+      args.push(`%${this.escapeLikeTerm(token)}%`);
+    }
+    if (conversationId != null) {
+      where.push("s.conversation_id = ?");
+      args.push(conversationId);
+    }
+    if (since) {
+      where.push("julianday(s.created_at) >= julianday(?)");
+      args.push(since.toISOString());
+    }
+    if (before) {
+      where.push("julianday(s.created_at) < julianday(?)");
+      args.push(before.toISOString());
+    }
+    args.push(limit);
+    const sql = `SELECT
+         f.summary_id,
+         s.conversation_id,
+         s.kind,
+         snippet(summaries_fts_cjk, 1, '', '', '...', 32) AS snippet,
+         rank,
+         s.created_at
+       FROM summaries_fts_cjk f
+       JOIN summaries s ON s.summary_id = f.summary_id
+       WHERE ${where.join(" AND ")}
+       ORDER BY rank
+       LIMIT ?`;
+    const rows = this.db.prepare(sql).all(...args) as unknown as SummarySearchRow[];
+    return rows.map(toSearchResult);
+  }
+  // ── CJK LIKE fallback ────────────────────────────────────────────────────
+  // When the trigram table is unavailable, split each CJK segment into
+  // sliding-window terms so partial matches still work. Terms within a single
+  // segment are ORed together, but each segment and Latin token still has to
+  // match so mixed queries keep full-intent semantics.
+  private searchLikeCjk(
+    query: string,
+    limit: number,
+    conversationId?: number,
+    since?: Date,
+    before?: Date,
+  ): SummarySearchResult[] {
+    const cjkSegments = this.extractCjkSegments(query);
+    const latinTokens = this.extractLatinTokens(query);
+    if (cjkSegments.length === 0 && latinTokens.length === 0) {
+      return [];
+    }
+    const cjkTerms: string[] = [];
+    const cjkClauses: string[] = [];
+    const cjkArgs: string[] = [];
+    for (const segment of cjkSegments) {
+      const segmentTerms =
+        segment.length === 1
+          ? [segment]
+          : segment.length === 2
+            ? [segment]
+            : this.splitCjkChunks(segment, 2);
+      const uniqueTerms = [...new Set(segmentTerms)];
+      cjkTerms.push(...uniqueTerms);
+      cjkClauses.push(
+        `(${uniqueTerms.map(() => `LOWER(content) LIKE ? ESCAPE '\\'`).join(" OR ")})`,
+      );
+      cjkArgs.push(
+        ...uniqueTerms.map((term) => `%${this.escapeLikeTerm(term.toLowerCase())}%`),
+      );
+    }
+    const latinClauses = latinTokens.map(() => `LOWER(content) LIKE ? ESCAPE '\\'`);
+    const latinArgs = latinTokens.map((token) => `%${this.escapeLikeTerm(token)}%`);
+    const where: string[] = [...cjkClauses, ...latinClauses];
+    const args: Array<string | number> = [...cjkArgs, ...latinArgs];
+    if (conversationId != null) {
+      where.push("conversation_id = ?");
+      args.push(conversationId);
+    }
+    if (since) {
+      where.push("julianday(created_at) >= julianday(?)");
+      args.push(since.toISOString());
+    }
+    if (before) {
+      where.push("julianday(created_at) < julianday(?)");
+      args.push(before.toISOString());
+    }
+    args.push(limit);
+    const rows = this.db
+      .prepare(
+        `SELECT summary_id, conversation_id, kind, depth, content, token_count, file_ids,
+                earliest_at, latest_at, descendant_count, descendant_token_count,
+                source_message_token_count, model, created_at
+         FROM summaries
+         WHERE ${where.join(" AND ")}
+         ORDER BY created_at DESC
+         LIMIT ?`,
+      )
+      .all(...args) as unknown as SummaryRow[];
+    const snippetTerms = cjkTerms.length > 0 ? [...new Set([...cjkTerms, ...latinTokens])] : latinTokens;
+    return rows.map((row) => ({
+      summaryId: row.summary_id,
+      conversationId: row.conversation_id,
+      kind: row.kind,
+      snippet: createFallbackSnippet(row.content, snippetTerms),
       createdAt: new Date(row.created_at),
       rank: 0,
     }));
@@ -934,7 +1383,7 @@ export class SummaryStore {
           conversationId: row.conversation_id,
           kind: row.kind,
           snippet: match[0],
-          createdAt: new Date(row.created_at),
+          createdAt: parseUtcTimestamp(row.created_at),
           rank: 0,
         });
       }