npm - pi-smart-compact - Versions diffs - 7.5.0 - Mend

pi-smart-compact 7.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

package/CHANGELOG.md +72 -0
package/LICENSE +21 -0
package/README.md +200 -0
package/package.json +42 -0
package/src/constants.ts +140 -0
package/src/core.ts +360 -0
package/src/index.ts +175 -0
package/src/phases/explore.ts +371 -0
package/src/phases/synthesize.ts +184 -0
package/src/phases/verify.ts +191 -0
package/src/types.ts +176 -0
package/src/ui/overlays.ts +329 -0
package/src/utils/cache.ts +145 -0
package/src/utils/damage.ts +153 -0
package/src/utils/extraction.ts +259 -0
package/src/utils/fingerprint.ts +190 -0
package/src/utils/helpers.ts +161 -0
package/src/utils/message-blocks.ts +21 -0
package/src/utils/pruning.ts +147 -0
package/src/utils/tokens.ts +63 -0

package/src/utils/pruning.ts ADDED Viewed

@@ -0,0 +1,147 @@
+/**
+ * Pre-compaction redundancy pruning — deterministic, zero LLM cost.
+ * Reduces compaction input by collapsing redundant message sequences.
+ */
+import type { LlmMessage, ToolCallBlock } from "../types.ts";
+import { extractText } from "./extraction.ts";
+import { estimateTokens } from "./tokens.ts";
+export interface PruningResult {
+  messages: LlmMessage[];
+  prunedCount: number;
+  prunedTokenSaving: number;
+  reasons: Array<{ count: number; reason: string }>;
+}
+// Pattern for agent acknowledgment messages with no information
+const ACK_RE = /^(?:I'?ll |let me |sure|ok[,.]?|got it|i understand|i see|now i|next,? i|alright|great|perfect|sounds good|i can|i will|checking|looking|right away)/i;
+// Maximum chars to keep from a tool result output
+const MAX_TOOL_OUTPUT_CHARS = 800;
+/**
+ * Build a quick index of assistant tool calls for lookups.
+ */
+function buildToolCallIndex(msgs: LlmMessage[]): Map<string, { name: string; args: Record<string, unknown>; msgIndex: number }> {
+  const idx = new Map<string, { name: string; args: Record<string, unknown>; msgIndex: number }>();
+  for (let i = 0; i < msgs.length; i++) {
+    if (msgs[i].role !== "assistant") continue;
+    for (const b of (msgs[i].content ?? []) as unknown[]) {
+      const block = b as ToolCallBlock;
+      if (block?.type === "toolCall" && block.id) {
+        idx.set(block.id, { name: block.name, args: block.arguments, msgIndex: i });
+      }
+    }
+  }
+  return idx;
+}
+/**
+ * Detect and collapse redundant message sequences.
+ */
+export function pruneRedundant(msgs: LlmMessage[]): PruningResult {
+  if (msgs.length < 5) return { messages: msgs, prunedCount: 0, prunedTokenSaving: 0, reasons: [] };
+  const tcIdx = buildToolCallIndex(msgs);
+  const keep = new Set<number>(msgs.map((_, i) => i));
+  const reasonMap = new Map<string, number>();
+  // ── 1. Duplicate file reads: keep only last read per file ──
+  const readIndices = new Map<string, number[]>(); // filepath → [indices of toolResult]
+  for (let i = 0; i < msgs.length; i++) {
+    if (msgs[i].role !== "toolResult") continue;
+    const tc = tcIdx.get(msgs[i].toolCallId ?? "");
+    if (!tc || tc.name !== "read") continue;
+    const fp = (tc.args?.path ?? tc.args?.file_path) as string | undefined;
+    if (!fp) continue;
+    const arr = readIndices.get(fp) ?? [];
+    arr.push(i);
+    readIndices.set(fp, arr);
+  }
+  for (const [fp, indices] of readIndices) {
+    // Keep last read, prune the rest
+    for (let j = 0; j < indices.length - 1; j++) {
+      keep.delete(indices[j]);
+      // Also prune the corresponding assistant tool call message
+      const tc = tcIdx.get(msgs[indices[j]].toolCallId ?? "");
+      if (tc) keep.delete(tc.msgIndex);
+    }
+    if (indices.length > 1) {
+      reasonMap.set("Duplicate file reads", (reasonMap.get("Duplicate file reads") ?? 0) + indices.length - 1);
+    }
+  }
+  // ── 2. Failed → retry → success chains: keep first failure + success only ──
+  const failedToolResults: Array<{ index: number; tool: string; tcIndex: number }> = [];
+  for (let i = 0; i < msgs.length; i++) {
+    if (msgs[i].role !== "toolResult" || !msgs[i].isError) continue;
+    const tc = tcIdx.get(msgs[i].toolCallId ?? "");
+    failedToolResults.push({ index: i, tool: tc?.name ?? "unknown", tcIndex: tc?.msgIndex ?? -1 });
+  }
+  // Group consecutive failures of the same tool
+  let i = 0;
+  while (i < failedToolResults.length) {
+    const tool = failedToolResults[i].tool;
+    let j = i + 1;
+    while (j < failedToolResults.length && failedToolResults[j].tool === tool && failedToolResults[j].index - failedToolResults[j - 1].index < 10) {
+      j++;
+    }
+    // If 3+ consecutive failures of same tool, keep only first and last
+    if (j - i >= 3) {
+      for (let k = i + 1; k < j - 1; k++) {
+        keep.delete(failedToolResults[k].index);
+        if (failedToolResults[k].tcIndex >= 0) keep.delete(failedToolResults[k].tcIndex);
+      }
+      reasonMap.set("Collapsed error chains", (reasonMap.get("Collapsed error chains") ?? 0) + (j - i - 2));
+    }
+    i = j;
+  }
+  // ── 3. Agent acknowledgment messages: no informational content ──
+  for (let idx = 0; idx < msgs.length; idx++) {
+    if (msgs[idx].role !== "assistant") continue;
+    const blocks = (msgs[idx].content ?? []) as unknown[];
+    // Only consider messages that are pure text with no tool calls
+    const hasToolCall = blocks.some((b: any) => b?.type === "toolCall");
+    if (hasToolCall) continue;
+    const text = extractText(msgs[idx].content).trim();
+    if (text.length > 0 && text.length < 100 && ACK_RE.test(text)) {
+      keep.delete(idx);
+      reasonMap.set("Agent acknowledgments", (reasonMap.get("Agent acknowledgments") ?? 0) + 1);
+    }
+  }
+  // ── 4. Truncate long tool result outputs ──
+  // (Applied as content modification, not message removal)
+  const kept = msgs.map((m, idx) => {
+    if (!keep.has(idx)) return null;
+    if (m.role !== "toolResult") return m;
+    const text = extractText(m.content);
+    if (text.length > MAX_TOOL_OUTPUT_CHARS) {
+      // Keep first 400 chars + last 400 chars with truncation marker
+      const head = text.slice(0, 400);
+      const tail = text.slice(-400);
+      const truncated = head + "\n... [truncated " + (text.length - 800) + " chars] ...\n" + tail;
+      return { ...m, content: [{ type: "text" as const, text: truncated }] };
+    }
+    return m;
+  });
+  // Build final message list, preserving order
+  const finalMsgs = kept.filter((m): m is LlmMessage => m !== null);
+  const prunedCount = msgs.length - finalMsgs.length;
+  // Estimate token saving
+  const originalTokens = estimateTokens(msgs.map(m => extractText(m.content)).join(""));
+  const prunedTokens = estimateTokens(finalMsgs.map(m => extractText(m.content)).join(""));
+  const reasons = [...reasonMap.entries()].map(([reason, count]) => ({ count, reason }));
+  return {
+    messages: finalMsgs,
+    prunedCount,
+    prunedTokenSaving: Math.max(0, originalTokens - prunedTokens),
+    reasons,
+  };
+}

package/src/utils/tokens.ts ADDED Viewed

@@ -0,0 +1,63 @@
+/**
+ * Token estimation with provider-specific ratios and EMA calibration.
+ */
+import { CHARS_PER_TOKEN } from "../constants.ts";
+import type { ProviderCapabilities } from "../types.ts";
+const PROVIDER_MAP: Record<string, ProviderCapabilities> = {
+  "zai-anthropic": {
+    maxOutputTokens: 8192, supportsTools: "probe", jsonReliability: "high",
+    instructionFollowing: "high", tokenRatioEstimate: 3.5, concurrencyLimit: 3,
+    cacheStrategy: "anthropic",
+  },
+  "minimax": {
+    maxOutputTokens: 4096, supportsTools: "probe", jsonReliability: "medium",
+    instructionFollowing: "medium", tokenRatioEstimate: 3.8, concurrencyLimit: 2,
+    cacheStrategy: "anthropic",
+  },
+  "xiaomi-token-plan": {
+    maxOutputTokens: 8192, supportsTools: "probe", jsonReliability: "medium",
+    instructionFollowing: "medium", tokenRatioEstimate: 3.3, concurrencyLimit: 2,
+    cacheStrategy: "openai",
+  },
+  "openai": {
+    maxOutputTokens: 16384, supportsTools: true, jsonReliability: "high",
+    instructionFollowing: "high", tokenRatioEstimate: 4.0, concurrencyLimit: 5,
+    cacheStrategy: "openai",
+  },
+};
+export function getProviderCaps(provider: string): ProviderCapabilities {
+  return PROVIDER_MAP[provider] ?? {
+    maxOutputTokens: 8192, supportsTools: "probe", jsonReliability: "medium",
+    instructionFollowing: "medium", tokenRatioEstimate: 3.8, concurrencyLimit: 2,
+    cacheStrategy: "none",
+  };
+}
+// Per-provider calibration to avoid cross-session bleed.
+const _calibrationFactors = new Map<string, number>();
+function getCalibrationFactor(provider?: string): number {
+  if (!provider) return 1.0;
+  return _calibrationFactors.get(provider) ?? 1.0;
+}
+export function estimateTokens(text: string, provider?: string): number {
+  const baseRatio = provider ? getProviderCaps(provider).tokenRatioEstimate : CHARS_PER_TOKEN;
+  // JSON content has denser tokenization (brackets, quotes, escapes)
+  const jsonPenalty = text.startsWith("[") || text.startsWith("{") ? 0.85 : 1.0;
+  // Turkish/CE characters tokenize differently (multi-byte in some tokenizers)
+  const langPenalty = /[çğıöşüÇĞİÖŞÜ]/.test(text) ? 0.9 : 1.0;
+  const calibration = getCalibrationFactor(provider);
+  return Math.ceil((text.length / baseRatio) * jsonPenalty * langPenalty * calibration);
+}
+export function calibrateFromResponse(estimated: number, actual: number, provider?: string): void {
+  if (actual > 0 && estimated > 0 && provider) {
+    const prev = _calibrationFactors.get(provider) ?? 1.0;
+    const sample = actual / estimated;
+    _calibrationFactors.set(provider, prev * 0.7 + sample * 0.3); // EMA smoothing
+  }
+}