npm - @martian-engineering/lossless-claw - Versions diffs - 0.5.2 → 0.6.0 - Mend

@martian-engineering/lossless-claw 0.5.2 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

package/README.md +49 -11
package/docs/configuration.md +44 -0
package/openclaw.plugin.json +114 -0
package/package.json +2 -1
package/skills/lossless-claw/SKILL.md +33 -0
package/skills/lossless-claw/references/architecture.md +52 -0
package/skills/lossless-claw/references/config.md +263 -0
package/skills/lossless-claw/references/diagnostics.md +79 -0
package/skills/lossless-claw/references/recall-tools.md +55 -0
package/skills/lossless-claw/references/session-lifecycle.md +59 -0
package/src/assembler.ts +321 -34
package/src/compaction.ts +220 -19
package/src/db/config.ts +74 -21
package/src/db/migration.ts +50 -13
package/src/engine.ts +742 -133
package/src/plugin/index.ts +156 -73
package/src/plugin/lcm-command.ts +759 -0
package/src/plugin/lcm-doctor-apply.ts +546 -0
package/src/plugin/lcm-doctor-shared.ts +210 -0
package/src/store/conversation-store.ts +60 -21
package/src/store/parse-utc-timestamp.ts +25 -0
package/src/store/summary-store.ts +460 -11
package/src/summarize.ts +553 -224
package/src/tools/lcm-expand-query-tool.ts +195 -59
package/src/tools/lcm-expansion-recursion-guard.ts +87 -0
package/src/types.ts +1 -0

package/src/assembler.ts CHANGED Viewed

@@ -9,6 +9,15 @@ import type { SummaryStore, ContextItemRecord, SummaryRecord } from "./store/sum
 type AgentMessage = Parameters<ContextEngine["ingest"]>[0]["message"];
+const TOOL_CALL_TYPES = new Set([
+  "toolCall",
+  "toolUse",
+  "tool_use",
+  "tool-use",
+  "functionCall",
+  "function_call",
+]);
 // ── Public types ─────────────────────────────────────────────────────────────
 export interface AssembleContextInput {
@@ -16,6 +25,8 @@ export interface AssembleContextInput {
   tokenBudget: number;
   /** Number of most recent raw turns to always include (default: 8) */
   freshTailCount?: number;
+  /** Optional user query for relevance-based eviction scoring (BM25-lite). When absent or unsearchable, falls back to chronological eviction. */
+  prompt?: string;
 }
 export interface AssembleContextResult {
@@ -43,10 +54,11 @@ function estimateTokens(text: string): number {
 type SummaryPromptSignal = Pick<SummaryRecord, "kind" | "depth" | "descendantCount">;
 /**
- * Build LCM usage guidance for the runtime system prompt.
+ * Build dynamic prompt guidance for compacted session context.
  *
  * Guidance is emitted only when summaries are present in assembled context.
- * Depth-aware: minimal for shallow compaction, full guidance for deep trees.
+ * Static recall policy lives in the plugin prompt hook so this addition
+ * remains session-specific and reflects only the current compaction state.
  */
 function buildSystemPromptAddition(summarySignals: SummaryPromptSignal[]): string | undefined {
   if (summarySignals.length === 0) {
@@ -59,32 +71,24 @@ function buildSystemPromptAddition(summarySignals: SummaryPromptSignal[]): strin
   const sections: string[] = [];
-  // Core recall workflow — always present when summaries exist
+  // Dynamic compaction reminder — always present when summaries exist.
   sections.push(
-    "## LCM Recall",
-    "",
-    "Summaries above are compressed context — maps to details, not the details themselves.",
+    "## Compacted Conversation Context",
     "",
-    "**Recall priority:** Use LCM tools first for compacted conversation history. If LCM does not cover the needed data, prefer any available memory/recall tool before falling back to raw text search.",
+    "Summaries above are compressed context, not full detail.",
     "",
-    "**Tool escalation:**",
-    "1. `lcm_grep` — search by regex or full-text across messages and summaries",
-    "2. `lcm_describe` — inspect a specific summary (cheap, no sub-agent)",
-    "3. `lcm_expand_query` — deep recall: spawns bounded sub-agent, expands DAG, returns answer with cited summary IDs (~120s, don't ration it)",
+    "Treat summaries as compressed recall cues rather than proof of exact wording or exact values.",
     "",
-    "**`lcm_expand_query` usage** — two patterns (always requires `prompt`):",
-    "- With IDs: `lcm_expand_query(summaryIds: [\"sum_xxx\"], prompt: \"What config changes were discussed?\")`",
-    "- With search: `lcm_expand_query(query: \"database migration\", prompt: \"What strategy was decided?\")`",
-    "- Optional: `maxTokens` (default 2000), `conversationId`, `allConversations: true`",
-    "",
-    "**Summaries include \"Expand for details about:\" footers** listing compressed specifics. Use `lcm_expand_query` with that summary's ID to retrieve them.",
+    "If a summary includes an \"Expand for details about:\" footer, use it as a cue to expand before asserting specifics.",
   );
-  // Precision/evidence rules — always present but stronger when heavily compacted
+  // Precision/evidence rules — always present but stronger when heavily compacted.
   if (heavilyCompacted) {
     sections.push(
       "",
-      "**\u26a0 Deeply compacted context — expand before asserting specifics.**",
+      "**Deeply compacted context: expand before asserting specifics.**",
+      "",
+      "Before answering with exact commands, SHAs, paths, timestamps, config values, or causal chains, expand for the missing detail.",
       "",
       "Default recall flow for precision work:",
       "1) `lcm_grep` to locate relevant summary/message IDs",
@@ -92,19 +96,20 @@ function buildSystemPromptAddition(summarySignals: SummaryPromptSignal[]): strin
       "3) Answer with citations to summary IDs used",
       "",
       "**Uncertainty checklist (run before answering):**",
-      "- Am I making exact factual claims from a condensed summary?",
+      "- Am I making an exact factual claim from a compressed or condensed summary?",
       "- Could compaction have omitted a crucial detail?",
-      "- Would this answer fail if the user asks for proof?",
+      "- Would I need an expansion step if the user asks for proof or the exact text?",
+      "- Should I state uncertainty instead of asserting specifics until I expand?",
       "",
-      "If yes to any \u2192 expand first.",
+      "If yes to any item, expand first or explicitly say that you need to expand.",
       "",
-      "**Do not guess** exact commands, SHAs, file paths, timestamps, config values, or causal claims from condensed summaries. Expand first or state that you need to expand.",
+      "Do not guess exact commands, SHAs, file paths, timestamps, config values, or causal claims from condensed summaries. Expand first or explicitly say that you need to expand.",
     );
   } else {
     sections.push(
       "",
-      "**For precision/evidence questions** (exact commands, SHAs, paths, timestamps, config values, root-cause chains): expand before answering.",
-      "Do not guess from condensed summaries — expand first or state uncertainty.",
+      "For exact commands, SHAs, paths, timestamps, config values, or causal chains, expand for details before answering.",
+      "State uncertainty instead of guessing from compressed summaries.",
     );
   }
@@ -267,6 +272,20 @@ export function toolResultBlockFromPart(
   rawType?: string,
   raw?: Record<string, unknown>,
 ): unknown {
+  if (
+    raw &&
+    typeof raw.text === "string" &&
+    raw.output === undefined &&
+    raw.content === undefined &&
+    (part.toolOutput == null || part.toolOutput === "") &&
+    (part.textContent == null || part.textContent === raw.text)
+  ) {
+    return {
+      type: "text",
+      text: raw.text,
+    };
+  }
   const type =
     rawType === "function_call_output" || rawType === "toolResult" || rawType === "tool_result"
       ? rawType
@@ -454,7 +473,8 @@ export function blockFromPart(part: MessagePartRecord): unknown {
   return { type: "text", text: "" };
 }
-function contentFromParts(
+/** @internal Exported for transcript-maintenance reconstruction. */
+export function contentFromParts(
   parts: MessagePartRecord[],
   role: "user" | "assistant" | "toolResult",
   fallbackContent: string,
@@ -483,7 +503,8 @@ function contentFromParts(
   return blocks;
 }
-function pickToolCallId(parts: MessagePartRecord[]): string | undefined {
+/** @internal Exported for transcript-maintenance reconstruction. */
+export function pickToolCallId(parts: MessagePartRecord[]): string | undefined {
   for (const part of parts) {
     if (typeof part.toolCallId === "string" && part.toolCallId.length > 0) {
       return part.toolCallId;
@@ -512,7 +533,8 @@ function pickToolCallId(parts: MessagePartRecord[]): string | undefined {
   return undefined;
 }
-function pickToolName(parts: MessagePartRecord[]): string | undefined {
+/** @internal Exported for transcript-maintenance reconstruction. */
+export function pickToolName(parts: MessagePartRecord[]): string | undefined {
   for (const part of parts) {
     if (typeof part.toolName === "string" && part.toolName.length > 0) {
       return part.toolName;
@@ -541,7 +563,8 @@ function pickToolName(parts: MessagePartRecord[]): string | undefined {
   return undefined;
 }
-function pickToolIsError(parts: MessagePartRecord[]): boolean | undefined {
+/** @internal Exported for transcript-maintenance reconstruction. */
+export function pickToolIsError(parts: MessagePartRecord[]): boolean | undefined {
   for (const part of parts) {
     const decoded = parseJson(part.metadata);
     if (!decoded || typeof decoded !== "object") {
@@ -555,6 +578,174 @@ function pickToolIsError(parts: MessagePartRecord[]): boolean | undefined {
   return undefined;
 }
+function extractToolCallId(block: { id?: unknown; call_id?: unknown }): string | null {
+  if (typeof block.id === "string" && block.id.length > 0) {
+    return block.id;
+  }
+  if (typeof block.call_id === "string" && block.call_id.length > 0) {
+    return block.call_id;
+  }
+  return null;
+}
+function extractToolCallIdsFromAssistant(message: AgentMessage): string[] {
+  if (message?.role !== "assistant" || !Array.isArray(message.content)) {
+    return [];
+  }
+  const ids: string[] = [];
+  for (const block of message.content) {
+    if (!block || typeof block !== "object") {
+      continue;
+    }
+    const record = block as { type?: unknown; id?: unknown; call_id?: unknown };
+    if (typeof record.type !== "string" || !TOOL_CALL_TYPES.has(record.type)) {
+      continue;
+    }
+    const id = extractToolCallId(record);
+    if (id) {
+      ids.push(id);
+    }
+  }
+  return ids;
+}
+function extractToolResultIdFromMessage(message: AgentMessage): string | null {
+  if (!message || typeof message !== "object") {
+    return null;
+  }
+  if (typeof message.toolCallId === "string" && message.toolCallId.length > 0) {
+    return message.toolCallId;
+  }
+  if (typeof message.toolUseId === "string" && message.toolUseId.length > 0) {
+    return message.toolUseId;
+  }
+  return null;
+}
+function collectAssistantToolCallIds(items: ResolvedItem[]): Set<string> {
+  const ids = new Set<string>();
+  for (const item of items) {
+    for (const id of extractToolCallIdsFromAssistant(item.message)) {
+      ids.add(id);
+    }
+  }
+  return ids;
+}
+function mergeFreshTailWithMatchingToolResults(
+  freshTail: ResolvedItem[],
+  matchingToolResults: ResolvedItem[],
+): ResolvedItem[] {
+  if (matchingToolResults.length === 0) {
+    return freshTail;
+  }
+  const resultsById = new Map<string, ResolvedItem[]>();
+  for (const item of matchingToolResults) {
+    const toolResultId = extractToolResultIdFromMessage(item.message);
+    if (!toolResultId) {
+      continue;
+    }
+    const existing = resultsById.get(toolResultId);
+    if (existing) {
+      existing.push(item);
+    } else {
+      resultsById.set(toolResultId, [item]);
+    }
+  }
+  const merged: ResolvedItem[] = [];
+  const usedOrdinals = new Set<number>();
+  for (const item of freshTail) {
+    merged.push(item);
+    const toolCallIds = extractToolCallIdsFromAssistant(item.message);
+    if (toolCallIds.length === 0) {
+      continue;
+    }
+    for (const toolCallId of toolCallIds) {
+      const matches = resultsById.get(toolCallId);
+      if (!matches) {
+        continue;
+      }
+      for (const match of matches) {
+        if (usedOrdinals.has(match.ordinal)) {
+          continue;
+        }
+        merged.push(match);
+        usedOrdinals.add(match.ordinal);
+      }
+    }
+  }
+  for (const item of matchingToolResults) {
+    if (!usedOrdinals.has(item.ordinal)) {
+      merged.push(item);
+    }
+  }
+  return merged;
+}
+function filterNonFreshAssistantToolCalls(
+  items: ResolvedItem[],
+  freshTailOrdinals: Set<number>,
+): AgentMessage[] {
+  const availableToolResultIds = new Set<string>();
+  for (const item of items) {
+    const toolResultId = extractToolResultIdFromMessage(item.message);
+    if (toolResultId) {
+      availableToolResultIds.add(toolResultId);
+    }
+  }
+  const filteredMessages: AgentMessage[] = [];
+  for (const item of items) {
+    if (item.message?.role !== "assistant" || freshTailOrdinals.has(item.ordinal)) {
+      filteredMessages.push(item.message);
+      continue;
+    }
+    if (!Array.isArray(item.message.content)) {
+      filteredMessages.push(item.message);
+      continue;
+    }
+    let removedAny = false;
+    const content = item.message.content.filter((block) => {
+      if (!block || typeof block !== "object") {
+        return true;
+      }
+      const record = block as { type?: unknown; id?: unknown; call_id?: unknown };
+      if (typeof record.type !== "string" || !TOOL_CALL_TYPES.has(record.type)) {
+        return true;
+      }
+      const toolCallId = extractToolCallId(record);
+      if (!toolCallId || availableToolResultIds.has(toolCallId)) {
+        return true;
+      }
+      removedAny = true;
+      return false;
+    });
+    if (content.length === 0) {
+      continue;
+    }
+    if (!removedAny) {
+      filteredMessages.push(item.message);
+      continue;
+    }
+    filteredMessages.push({
+      ...item.message,
+      content: content as typeof item.message.content,
+    } as AgentMessage);
+  }
+  return filteredMessages;
+}
 /** Format a Date for XML attributes in the agent's timezone. */
 function formatDateForAttribute(date: Date, timezone?: string): string {
   const tz = timezone ?? "UTC";
@@ -632,10 +823,60 @@ interface ResolvedItem {
   tokens: number;
   /** Whether this came from a raw message (vs. a summary) */
   isMessage: boolean;
+  /** Pre-extracted plain text used for relevance scoring */
+  text: string;
   /** Summary metadata used for dynamic system prompt guidance */
   summarySignal?: SummaryPromptSignal;
 }
+// ── BM25-lite relevance scorer ────────────────────────────────────────────────
+/** @internal Exported for testing only. Tokenize text into lowercase alphanumeric terms. */
+export function tokenizeText(text: string): string[] {
+  return text
+    .toLowerCase()
+    .split(/[^a-z0-9]+/)
+    .filter((t) => t.length > 1);
+}
+/**
+ * @internal Exported for testing only.
+ * Score an item's text against a prompt using BM25-lite (term-frequency overlap).
+ * Higher scores indicate stronger keyword overlap. Returns 0 when either input is empty.
+ */
+export function scoreRelevance(itemText: string, prompt: string): number {
+  const promptTerms = tokenizeText(prompt);
+  if (promptTerms.length === 0) return 0;
+  const itemTerms = tokenizeText(itemText);
+  if (itemTerms.length === 0) return 0;
+  // Build term-frequency map for the item
+  const freq = new Map<string, number>();
+  for (const term of itemTerms) {
+    freq.set(term, (freq.get(term) ?? 0) + 1);
+  }
+  // Sum TF contribution for each unique prompt term
+  const seen = new Set<string>();
+  let score = 0;
+  for (const term of promptTerms) {
+    if (seen.has(term)) continue;
+    seen.add(term);
+    const tf = freq.get(term) ?? 0;
+    if (tf > 0) {
+      // Normalised TF: tf / itemLength (BM25-lite saturation skipped for simplicity)
+      score += tf / itemTerms.length;
+    }
+  }
+  return score;
+}
+/** Return true when a prompt contains at least one searchable term. */
+function hasSearchablePrompt(prompt?: string): prompt is string {
+  return typeof prompt === "string" && tokenizeText(prompt).length > 0;
+}
 // ── ContextAssembler ─────────────────────────────────────────────────────────
 export class ContextAssembler {
@@ -692,8 +933,17 @@ export class ContextAssembler {
     // Step 3: Split into evictable prefix and protected fresh tail
     const tailStart = Math.max(0, resolved.length - freshTailCount);
-    const freshTail = resolved.slice(tailStart);
-    const evictable = resolved.slice(0, tailStart);
+    const baseFreshTail = resolved.slice(tailStart);
+    const initialEvictable = resolved.slice(0, tailStart);
+    const freshTailOrdinals = new Set(baseFreshTail.map((item) => item.ordinal));
+    const tailToolCallIds = collectAssistantToolCallIds(baseFreshTail);
+    const tailPairToolResults = initialEvictable.filter((item) => {
+      const toolResultId = extractToolResultIdFromMessage(item.message);
+      return toolResultId !== null && tailToolCallIds.has(toolResultId);
+    });
+    const protectedEvictableOrdinals = new Set(tailPairToolResults.map((item) => item.ordinal));
+    const evictable = initialEvictable.filter((item) => !protectedEvictableOrdinals.has(item.ordinal));
+    const freshTail = mergeFreshTailWithMatchingToolResults(baseFreshTail, tailPairToolResults);
     // Step 4: Budget-aware selection
     // First, compute the token cost of the fresh tail (always included).
@@ -719,8 +969,32 @@ export class ContextAssembler {
       // Everything fits
       selected.push(...evictable);
       evictableTokens = evictableTotalTokens;
+    } else if (hasSearchablePrompt(input.prompt)) {
+      // Prompt-aware eviction: score each evictable item by relevance to the
+      // prompt, then greedily fill budget from highest-scoring items down.
+      // Re-sort selected items by ordinal to restore chronological order.
+      const scored = evictable.map((item, idx) => ({
+        item,
+        score: scoreRelevance(item.text, input.prompt),
+        idx, // original index — higher = more recent, used as tiebreaker
+      }));
+      // Sort: highest relevance first; most recent (higher idx) breaks ties
+      scored.sort((a, b) => b.score - a.score || b.idx - a.idx);
+      const kept: ResolvedItem[] = [];
+      let accum = 0;
+      for (const { item } of scored) {
+        if (accum + item.tokens <= remainingBudget) {
+          kept.push(item);
+          accum += item.tokens;
+        }
+      }
+      // Restore chronological order by ordinal before appending freshTail
+      kept.sort((a, b) => a.ordinal - b.ordinal);
+      selected.push(...kept);
+      evictableTokens = accum;
     } else {
-      // Need to drop oldest items until we fit.
+      // Chronological eviction (default): drop oldest items until we fit.
       // Walk from the END of evictable (newest first) accumulating tokens,
       // then reverse to restore chronological order.
       const kept: ResolvedItem[] = [];
@@ -747,7 +1021,7 @@ export class ContextAssembler {
     // Normalize assistant string content to array blocks (some providers return
     // content as a plain string; Anthropic expects content block arrays).
-    const rawMessages = selected.map((item) => item.message);
+    const rawMessages = filterNonFreshAssistantToolCalls(selected, freshTailOrdinals);
     for (let i = 0; i < rawMessages.length; i++) {
       const msg = rawMessages[i];
       if (msg?.role === "assistant" && typeof msg.content === "string") {
@@ -758,8 +1032,19 @@ export class ContextAssembler {
       }
     }
+    // Filter out assistant messages with empty content — these can occur when
+    // tool-use-only turns are stored with content="" and zero message_parts,
+    // or when filterNonFreshAssistantToolCalls strips all tool_use blocks.
+    // Anthropic (and other providers) reject empty content arrays/strings.
+    const cleaned = rawMessages.filter(
+      (m) =>
+        !(
+          m?.role === "assistant" &&
+          (Array.isArray(m.content) ? m.content.length === 0 : !m.content)
+        ),
+    );
     return {
-      messages: sanitizeToolUseResultPairing(rawMessages) as AgentMessage[],
+      messages: sanitizeToolUseResultPairing(cleaned) as AgentMessage[],
       estimatedTokens,
       systemPromptAddition,
       stats: {
@@ -865,6 +1150,7 @@ export class ContextAssembler {
             } as AgentMessage),
       tokens: tokenCount,
       isMessage: true,
+      text: contentText,
     };
   }
@@ -887,6 +1173,7 @@ export class ContextAssembler {
       message: { role: "user" as const, content } as AgentMessage,
       tokens,
       isMessage: false,
+      text: summary.content,
       summarySignal: {
         kind: summary.kind,
         depth: summary.depth,