npm - @martian-engineering/lossless-claw - Versions diffs - 0.1.4 → 0.1.6 - Mend

@martian-engineering/lossless-claw 0.1.4 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

package/docs/configuration.md +7 -0
package/docs/tui.md +5 -0
package/index.ts +93 -9
package/package.json +1 -1
package/src/compaction.ts +36 -1
package/src/db/migration.ts +58 -6
package/src/expansion-auth.ts +53 -1
package/src/retrieval.ts +44 -1
package/src/store/summary-store.ts +122 -3
package/src/summarize.ts +303 -4
package/src/tools/lcm-describe-tool.ts +104 -17
package/src/tools/lcm-expand-query-tool.ts +128 -16
package/src/tools/lcm-expand-tool.delegation.ts +96 -0
package/src/tools/lcm-expansion-recursion-guard.ts +286 -0
package/src/types.ts +12 -1

package/src/store/summary-store.ts CHANGED Viewed

@@ -15,6 +15,8 @@ export type CreateSummaryInput = {
   earliestAt?: Date;
   latestAt?: Date;
   descendantCount?: number;
+  descendantTokenCount?: number;
+  sourceMessageTokenCount?: number;
 };
 export type SummaryRecord = {
@@ -28,9 +30,18 @@ export type SummaryRecord = {
   earliestAt: Date | null;
   latestAt: Date | null;
   descendantCount: number;
+  descendantTokenCount: number;
+  sourceMessageTokenCount: number;
   createdAt: Date;
 };
+export type SummarySubtreeNodeRecord = SummaryRecord & {
+  depthFromRoot: number;
+  parentSummaryId: string | null;
+  path: string;
+  childCount: number;
+};
 export type ContextItemRecord = {
   conversationId: number;
   ordinal: number;
@@ -92,9 +103,18 @@ interface SummaryRow {
   earliest_at: string | null;
   latest_at: string | null;
   descendant_count: number | null;
+  descendant_token_count: number | null;
+  source_message_token_count: number | null;
   created_at: string;
 }
+interface SummarySubtreeRow extends SummaryRow {
+  depth_from_root: number;
+  parent_summary_id: string | null;
+  path: string;
+  child_count: number | null;
+}
 interface ContextItemRow {
   conversation_id: number;
   ordinal: number;
@@ -165,6 +185,18 @@ function toSummaryRecord(row: SummaryRow): SummaryRecord {
       row.descendant_count >= 0
         ? Math.floor(row.descendant_count)
         : 0,
+    descendantTokenCount:
+      typeof row.descendant_token_count === "number" &&
+      Number.isFinite(row.descendant_token_count) &&
+      row.descendant_token_count >= 0
+        ? Math.floor(row.descendant_token_count)
+        : 0,
+    sourceMessageTokenCount:
+      typeof row.source_message_token_count === "number" &&
+      Number.isFinite(row.source_message_token_count) &&
+      row.source_message_token_count >= 0
+        ? Math.floor(row.source_message_token_count)
+        : 0,
     createdAt: new Date(row.created_at),
   };
 }
@@ -221,6 +253,18 @@ export class SummaryStore {
       input.descendantCount >= 0
         ? Math.floor(input.descendantCount)
         : 0;
+    const descendantTokenCount =
+      typeof input.descendantTokenCount === "number" &&
+      Number.isFinite(input.descendantTokenCount) &&
+      input.descendantTokenCount >= 0
+        ? Math.floor(input.descendantTokenCount)
+        : 0;
+    const sourceMessageTokenCount =
+      typeof input.sourceMessageTokenCount === "number" &&
+      Number.isFinite(input.sourceMessageTokenCount) &&
+      input.sourceMessageTokenCount >= 0
+        ? Math.floor(input.sourceMessageTokenCount)
+        : 0;
     const depth =
       typeof input.depth === "number" && Number.isFinite(input.depth) && input.depth >= 0
         ? Math.floor(input.depth)
@@ -240,9 +284,11 @@ export class SummaryStore {
           file_ids,
           earliest_at,
           latest_at,
-          descendant_count
+          descendant_count,
+          descendant_token_count,
+          source_message_token_count
         )
-       VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
+       VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
       )
       .run(
         input.summaryId,
@@ -255,6 +301,8 @@ export class SummaryStore {
         earliestAt,
         latestAt,
         descendantCount,
+        descendantTokenCount,
+        sourceMessageTokenCount,
       );
     // Index in FTS5 as best-effort; compaction flow must continue even if
@@ -272,6 +320,7 @@ export class SummaryStore {
       .prepare(
         `SELECT summary_id, conversation_id, kind, depth, content, token_count, file_ids,
                 earliest_at, latest_at, descendant_count, created_at
+                , descendant_token_count, source_message_token_count
        FROM summaries WHERE summary_id = ?`,
       )
       .get(input.summaryId) as unknown as SummaryRow;
@@ -284,6 +333,7 @@ export class SummaryStore {
       .prepare(
         `SELECT summary_id, conversation_id, kind, depth, content, token_count, file_ids,
                 earliest_at, latest_at, descendant_count, created_at
+                , descendant_token_count, source_message_token_count
        FROM summaries WHERE summary_id = ?`,
       )
       .get(summaryId) as unknown as SummaryRow | undefined;
@@ -295,6 +345,7 @@ export class SummaryStore {
       .prepare(
         `SELECT summary_id, conversation_id, kind, depth, content, token_count, file_ids,
                 earliest_at, latest_at, descendant_count, created_at
+                , descendant_token_count, source_message_token_count
        FROM summaries
        WHERE conversation_id = ?
        ORDER BY created_at`,
@@ -353,6 +404,7 @@ export class SummaryStore {
       .prepare(
         `SELECT s.summary_id, s.conversation_id, s.kind, s.depth, s.content, s.token_count,
                 s.file_ids, s.earliest_at, s.latest_at, s.descendant_count, s.created_at
+                , s.descendant_token_count, s.source_message_token_count
        FROM summaries s
        JOIN summary_parents sp ON sp.summary_id = s.summary_id
        WHERE sp.parent_summary_id = ?
@@ -367,6 +419,7 @@ export class SummaryStore {
       .prepare(
         `SELECT s.summary_id, s.conversation_id, s.kind, s.depth, s.content, s.token_count,
                 s.file_ids, s.earliest_at, s.latest_at, s.descendant_count, s.created_at
+                , s.descendant_token_count, s.source_message_token_count
        FROM summaries s
        JOIN summary_parents sp ON sp.parent_summary_id = s.summary_id
        WHERE sp.summary_id = ?
@@ -376,6 +429,71 @@ export class SummaryStore {
     return rows.map(toSummaryRecord);
   }
+  async getSummarySubtree(summaryId: string): Promise<SummarySubtreeNodeRecord[]> {
+    const rows = this.db
+      .prepare(
+        `WITH RECURSIVE subtree(summary_id, parent_summary_id, depth_from_root, path) AS (
+           SELECT ?, NULL, 0, ''
+           UNION ALL
+           SELECT
+             sp.summary_id,
+             sp.parent_summary_id,
+             subtree.depth_from_root + 1,
+             CASE
+               WHEN subtree.path = '' THEN printf('%04d', sp.ordinal)
+               ELSE subtree.path || '.' || printf('%04d', sp.ordinal)
+             END
+           FROM summary_parents sp
+           JOIN subtree ON sp.parent_summary_id = subtree.summary_id
+         )
+         SELECT
+           s.summary_id,
+           s.conversation_id,
+           s.kind,
+           s.depth,
+           s.content,
+           s.token_count,
+           s.file_ids,
+           s.earliest_at,
+           s.latest_at,
+           s.descendant_count,
+           s.descendant_token_count,
+           s.source_message_token_count,
+           s.created_at,
+           subtree.depth_from_root,
+           subtree.parent_summary_id,
+           subtree.path,
+           (
+             SELECT COUNT(*) FROM summary_parents sp2
+             WHERE sp2.parent_summary_id = s.summary_id
+           ) AS child_count
+         FROM subtree
+         JOIN summaries s ON s.summary_id = subtree.summary_id
+         ORDER BY subtree.depth_from_root ASC, subtree.path ASC, s.created_at ASC`,
+      )
+      .all(summaryId) as unknown as SummarySubtreeRow[];
+    const seen = new Set<string>();
+    const output: SummarySubtreeNodeRecord[] = [];
+    for (const row of rows) {
+      if (seen.has(row.summary_id)) {
+        continue;
+      }
+      seen.add(row.summary_id);
+      output.push({
+        ...toSummaryRecord(row),
+        depthFromRoot: Math.max(0, Math.floor(row.depth_from_root ?? 0)),
+        parentSummaryId: row.parent_summary_id ?? null,
+        path: typeof row.path === "string" ? row.path : "",
+        childCount:
+          typeof row.child_count === "number" && Number.isFinite(row.child_count)
+            ? Math.max(0, Math.floor(row.child_count))
+            : 0,
+      });
+    }
+    return output;
+  }
   // ── Context items ─────────────────────────────────────────────────────────
   async getContextItems(conversationId: number): Promise<ContextItemRecord[]> {
@@ -644,7 +762,8 @@ export class SummaryStore {
     const rows = this.db
       .prepare(
         `SELECT summary_id, conversation_id, kind, depth, content, token_count, file_ids,
-                earliest_at, latest_at, descendant_count, created_at
+                earliest_at, latest_at, descendant_count, descendant_token_count,
+                source_message_token_count, created_at
          FROM summaries
          ${whereClause}
          ORDER BY created_at DESC`,

package/src/summarize.ts CHANGED Viewed

@@ -24,6 +24,14 @@ export type LcmSummarizerLegacyParams = {
 type SummaryMode = "normal" | "aggressive";
 const DEFAULT_CONDENSED_TARGET_TOKENS = 2000;
+const LCM_SUMMARIZER_SYSTEM_PROMPT =
+  "You are a context-compaction summarization engine. Follow user instructions exactly and return plain text summary content only.";
+const DIAGNOSTIC_MAX_DEPTH = 4;
+const DIAGNOSTIC_MAX_ARRAY_ITEMS = 8;
+const DIAGNOSTIC_MAX_OBJECT_KEYS = 16;
+const DIAGNOSTIC_MAX_CHARS = 1200;
+const DIAGNOSTIC_SENSITIVE_KEY_PATTERN =
+  /(api[-_]?key|authorization|token|secret|password|cookie|set-cookie|private[-_]?key|bearer)/i;
 /** Normalize provider ids for stable config/profile lookup. */
 function normalizeProviderId(provider: string): string {
@@ -193,6 +201,202 @@ function formatBlockTypes(blockTypes: string[]): string {
   return blockTypes.join(",");
 }
+/** Truncate long diagnostic text values to keep logs bounded and readable. */
+function truncateDiagnosticText(value: string, maxChars = DIAGNOSTIC_MAX_CHARS): string {
+  if (value.length <= maxChars) {
+    return value;
+  }
+  return `${value.slice(0, maxChars)}...[truncated:${value.length - maxChars} chars]`;
+}
+/** Build a JSON-safe, redacted, depth-limited clone for diagnostic logging. */
+function sanitizeForDiagnostics(value: unknown, depth = 0): unknown {
+  if (depth >= DIAGNOSTIC_MAX_DEPTH) {
+    return "[max-depth]";
+  }
+  if (typeof value === "string") {
+    return truncateDiagnosticText(value);
+  }
+  if (
+    value === null ||
+    typeof value === "number" ||
+    typeof value === "boolean" ||
+    typeof value === "bigint"
+  ) {
+    return value;
+  }
+  if (value === undefined) {
+    return "[undefined]";
+  }
+  if (typeof value === "function") {
+    return "[function]";
+  }
+  if (typeof value === "symbol") {
+    return "[symbol]";
+  }
+  if (Array.isArray(value)) {
+    const head = value
+      .slice(0, DIAGNOSTIC_MAX_ARRAY_ITEMS)
+      .map((entry) => sanitizeForDiagnostics(entry, depth + 1));
+    if (value.length > DIAGNOSTIC_MAX_ARRAY_ITEMS) {
+      head.push(`[+${value.length - DIAGNOSTIC_MAX_ARRAY_ITEMS} more items]`);
+    }
+    return head;
+  }
+  if (!isRecord(value)) {
+    return String(value);
+  }
+  const out: Record<string, unknown> = {};
+  const entries = Object.entries(value);
+  for (const [key, entry] of entries.slice(0, DIAGNOSTIC_MAX_OBJECT_KEYS)) {
+    out[key] = DIAGNOSTIC_SENSITIVE_KEY_PATTERN.test(key)
+      ? "[redacted]"
+      : sanitizeForDiagnostics(entry, depth + 1);
+  }
+  if (entries.length > DIAGNOSTIC_MAX_OBJECT_KEYS) {
+    out.__truncated_keys__ = entries.length - DIAGNOSTIC_MAX_OBJECT_KEYS;
+  }
+  return out;
+}
+/** Encode diagnostic payloads in a compact JSON string with safety guards. */
+function formatDiagnosticPayload(value: unknown): string {
+  try {
+    const json = JSON.stringify(sanitizeForDiagnostics(value));
+    if (!json) {
+      return "\"\"";
+    }
+    return truncateDiagnosticText(json);
+  } catch {
+    return "\"[unserializable]\"";
+  }
+}
+/**
+ * Extract safe diagnostic metadata from a provider response envelope.
+ *
+ * Picks common metadata fields (request id, model echo, usage counters) without
+ * leaking secrets like API keys or auth tokens. The result object from
+ * `deps.complete` is typed narrowly but real provider responses carry extra
+ * fields that are useful for debugging empty-summary incidents.
+ */
+function extractResponseDiagnostics(result: unknown): string {
+  if (!isRecord(result)) {
+    return "";
+  }
+  const parts: string[] = [];
+  // Envelope-shape diagnostics for empty-block incidents.
+  const topLevelKeys = Object.keys(result).slice(0, 24);
+  if (topLevelKeys.length > 0) {
+    parts.push(`keys=${topLevelKeys.join(",")}`);
+  }
+  if ("content" in result) {
+    const contentVal = result.content;
+    if (Array.isArray(contentVal)) {
+      parts.push(`content_kind=array`);
+      parts.push(`content_len=${contentVal.length}`);
+    } else if (contentVal === null) {
+      parts.push(`content_kind=null`);
+    } else {
+      parts.push(`content_kind=${typeof contentVal}`);
+    }
+    parts.push(`content_preview=${formatDiagnosticPayload(contentVal)}`);
+  } else {
+    parts.push("content_kind=missing");
+  }
+  // Preview common non-content payload envelopes used by provider SDKs.
+  const envelopePayload: Record<string, unknown> = {};
+  for (const key of ["summary", "output", "message", "response"]) {
+    if (key in result) {
+      envelopePayload[key] = result[key];
+    }
+  }
+  if (Object.keys(envelopePayload).length > 0) {
+    parts.push(`payload_preview=${formatDiagnosticPayload(envelopePayload)}`);
+  }
+  // Request / response id — present in most provider envelopes.
+  for (const key of ["id", "request_id", "x-request-id"]) {
+    const val = result[key];
+    if (typeof val === "string" && val.trim()) {
+      parts.push(`${key}=${val.trim()}`);
+    }
+  }
+  // Model echo — useful when the provider selects a different checkpoint.
+  if (typeof result.model === "string" && result.model.trim()) {
+    parts.push(`resp_model=${result.model.trim()}`);
+  }
+  if (typeof result.provider === "string" && result.provider.trim()) {
+    parts.push(`resp_provider=${result.provider.trim()}`);
+  }
+  for (const key of [
+    "request_provider",
+    "request_model",
+    "request_api",
+    "request_reasoning",
+    "request_has_system",
+    "request_temperature",
+    "request_temperature_sent",
+  ]) {
+    const val = result[key];
+    if (typeof val === "string" && val.trim()) {
+      parts.push(`${key}=${val.trim()}`);
+    }
+  }
+  // Usage counters — safe numeric diagnostics.
+  if (isRecord(result.usage)) {
+    const u = result.usage;
+    const tokens: string[] = [];
+    for (const k of [
+      "prompt_tokens",
+      "completion_tokens",
+      "total_tokens",
+      "input",
+      "output",
+      "cacheRead",
+      "cacheWrite",
+    ]) {
+      if (typeof u[k] === "number") {
+        tokens.push(`${k}=${u[k]}`);
+      }
+    }
+    if (tokens.length > 0) {
+      parts.push(tokens.join(","));
+    }
+  }
+  // Finish reason — helps explain empty content.
+  const finishReason =
+    typeof result.finish_reason === "string"
+      ? result.finish_reason
+      : typeof result.stopReason === "string"
+        ? result.stopReason
+      : typeof result.stop_reason === "string"
+        ? result.stop_reason
+        : undefined;
+  if (finishReason) {
+    parts.push(`finish=${finishReason}`);
+  }
+  // Provider-level error payloads (most useful when finish=error and content is empty).
+  const errorMessage = result.errorMessage;
+  if (typeof errorMessage === "string" && errorMessage.trim()) {
+    parts.push(`error_message=${truncateDiagnosticText(errorMessage.trim(), 400)}`);
+  }
+  const errorPayload = result.error;
+  if (errorPayload !== undefined) {
+    parts.push(`error_preview=${formatDiagnosticPayload(errorPayload)}`);
+  }
+  return parts.join("; ");
+}
 /**
  * Resolve a practical target token count for leaf and condensed summaries.
  * Aggressive leaf mode intentionally aims lower so compaction converges faster.
@@ -522,6 +726,7 @@ export async function createLcmSummarizeFromLegacyParams(params: {
       authProfileId,
       agentDir,
       runtimeConfig: params.legacyParams.config,
+      system: LCM_SUMMARIZER_SYSTEM_PROMPT,
       messages: [
         {
           role: "user",
@@ -533,17 +738,111 @@ export async function createLcmSummarizeFromLegacyParams(params: {
     });
     const normalized = normalizeCompletionSummary(result.content);
-    const summary = normalized.summary;
+    let summary = normalized.summary;
+    let summarySource: "content" | "envelope" | "retry" | "fallback" = "content";
+    // --- Empty-summary hardening: envelope → retry → deterministic fallback ---
     if (!summary) {
+      // Envelope-aware extraction: some providers place summary text in
+      // top-level response fields (output, message, response) rather than
+      // inside the content array.  Re-run normalization against the full
+      // response envelope before spending an API call on a retry.
+      const envelopeNormalized = normalizeCompletionSummary(result);
+      if (envelopeNormalized.summary) {
+        summary = envelopeNormalized.summary;
+        summarySource = "envelope";
+        console.error(
+          `[lcm] recovered summary from response envelope; provider=${provider}; model=${model}; ` +
+            `block_types=${formatBlockTypes(envelopeNormalized.blockTypes)}; source=envelope`,
+        );
+      }
+    }
+    if (!summary) {
+      const responseDiag = extractResponseDiagnostics(result);
+      const diagParts = [
+        `[lcm] empty normalized summary on first attempt`,
+        `provider=${provider}`,
+        `model=${model}`,
+        `block_types=${formatBlockTypes(normalized.blockTypes)}`,
+        `response_blocks=${result.content.length}`,
+      ];
+      if (responseDiag) {
+        diagParts.push(responseDiag);
+      }
+      console.error(`${diagParts.join("; ")}; retrying with conservative settings`);
+      // Single retry with conservative parameters: low temperature and low
+      // reasoning budget to coax a textual response from providers that
+      // sometimes return reasoning-only or empty blocks on the first pass.
+      try {
+        const retryResult = await params.deps.complete({
+          provider,
+          model,
+          apiKey,
+          providerApi,
+          authProfileId,
+          agentDir,
+          runtimeConfig: params.legacyParams.config,
+          system: LCM_SUMMARIZER_SYSTEM_PROMPT,
+          messages: [
+            {
+              role: "user",
+              content: prompt,
+            },
+          ],
+          maxTokens: targetTokens,
+          temperature: 0.05,
+          reasoning: "low",
+        });
+        const retryNormalized = normalizeCompletionSummary(retryResult.content);
+        summary = retryNormalized.summary;
+        if (summary) {
+          summarySource = "retry";
+          console.error(
+            `[lcm] retry succeeded; provider=${provider}; model=${model}; ` +
+              `block_types=${formatBlockTypes(retryNormalized.blockTypes)}; source=retry`,
+          );
+        } else {
+          const retryDiag = extractResponseDiagnostics(retryResult);
+          const retryParts = [
+            `[lcm] retry also returned empty summary`,
+            `provider=${provider}`,
+            `model=${model}`,
+            `block_types=${formatBlockTypes(retryNormalized.blockTypes)}`,
+            `response_blocks=${retryResult.content.length}`,
+          ];
+          if (retryDiag) {
+            retryParts.push(retryDiag);
+          }
+          console.error(`${retryParts.join("; ")}; falling back to truncation`);
+        }
+      } catch (retryErr) {
+        // Retry is best-effort; log and proceed to deterministic fallback.
+        console.error(
+          `[lcm] retry failed; provider=${provider} model=${model}; error=${
+            retryErr instanceof Error ? retryErr.message : String(retryErr)
+          }; falling back to truncation`,
+        );
+      }
+    }
+    if (!summary) {
+      summarySource = "fallback";
       console.error(
-        `[lcm] summarize empty normalized summary; provider=${provider} model=${model} block_types=${formatBlockTypes(
-          normalized.blockTypes,
-        )}; response_blocks=${result.content.length}; falling back to truncation`,
+        `[lcm] all extraction attempts exhausted; provider=${provider}; model=${model}; source=fallback`,
       );
       return buildDeterministicFallbackSummary(text, targetTokens);
     }
+    if (summarySource !== "content") {
+      console.error(
+        `[lcm] summary resolved via non-content path; provider=${provider}; model=${model}; source=${summarySource}`,
+      );
+    }
     return summary;
   };
 }