npm - pi-smart-compact - Versions diffs - 7.5.0 - Mend

pi-smart-compact 7.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

package/CHANGELOG.md +72 -0
package/LICENSE +21 -0
package/README.md +200 -0
package/package.json +42 -0
package/src/constants.ts +140 -0
package/src/core.ts +360 -0
package/src/index.ts +175 -0
package/src/phases/explore.ts +371 -0
package/src/phases/synthesize.ts +184 -0
package/src/phases/verify.ts +191 -0
package/src/types.ts +176 -0
package/src/ui/overlays.ts +329 -0
package/src/utils/cache.ts +145 -0
package/src/utils/damage.ts +153 -0
package/src/utils/extraction.ts +259 -0
package/src/utils/fingerprint.ts +190 -0
package/src/utils/helpers.ts +161 -0
package/src/utils/message-blocks.ts +21 -0
package/src/utils/pruning.ts +147 -0
package/src/utils/tokens.ts +63 -0

package/src/utils/extraction.ts ADDED Viewed

@@ -0,0 +1,259 @@
+/**
+ * Phase 1: Deterministic extraction — zero LLM calls.
+ */
+import path from "node:path";
+import type { LlmMessage, ProfileConfig, StructuredExtraction, ToolCallBlock } from "../types.ts";
+import { NO_OP_RE, SHIFT_RE, CHOICE_RE } from "../constants.ts";
+import { estimateTokens } from "./tokens.ts";
+export function extractText(content: unknown): string {
+  if (typeof content === "string") return content;
+  if (Array.isArray(content)) return content.map((b: unknown) => {
+    if (typeof b === "string") return b;
+    if ((b as Record<string, unknown>)?.type === "text") return (b as { text?: string }).text ?? "";
+    return "";
+  }).join("");
+  return "";
+}
+export function buildToolCallIndex(msgs: LlmMessage[]): Map<string, { name: string; arguments: Record<string, unknown>; msgIndex: number }> {
+  const idx = new Map<string, { name: string; arguments: Record<string, unknown>; msgIndex: number }>();
+  for (let i = 0; i < msgs.length; i++) {
+    const m = msgs[i];
+    if (m.role !== "assistant") continue;
+    const blocks = (m.content ?? []) as unknown[];
+    for (const b of blocks) {
+      const block = b as ToolCallBlock;
+      if (block?.type === "toolCall" && block.id) {
+        idx.set(block.id, { name: block.name, arguments: block.arguments, msgIndex: i });
+      }
+    }
+  }
+  return idx;
+}
+export function trackFileOps(msgs: LlmMessage[]): { modified: StructuredExtraction["modifiedFiles"]; read: string[]; deleted: string[] } {
+  const tcIdx = buildToolCallIndex(msgs);
+  const modMap = new Map<string, { toolCalls: number; lastIdx: number }>();
+  const readSet = new Set<string>();
+  const delSet = new Set<string>();
+  for (let i = 0; i < msgs.length; i++) {
+    const m = msgs[i];
+    if (m.role !== "toolResult" || m.isError) continue;
+    const tc = tcIdx.get(m.toolCallId ?? "");
+    if (!tc) continue;
+    const args = tc.arguments;
+    const filePath = (args?.path ?? args?.file_path ?? args?.filePath) as string | undefined;
+    if (!filePath) continue;
+    const tool = tc.name.toLowerCase();
+    if (tool.includes("write") || tool.includes("edit")) {
+      const resultText = extractText(m.content);
+      if (!NO_OP_RE.test(resultText)) {
+        const existing = modMap.get(filePath);
+        modMap.set(filePath, { toolCalls: (existing?.toolCalls ?? 0) + 1, lastIdx: i });
+      }
+    } else if (tool.includes("delete") || tool.includes("remove")) {
+      delSet.add(filePath);
+    } else if (tool.includes("read")) {
+      readSet.add(filePath);
+    }
+  }
+  return {
+    modified: [...modMap.entries()].map(([p, d]) => ({ path: p, toolCalls: d.toolCalls, lastModifiedIndex: d.lastIdx })),
+    read: [...readSet], deleted: [...delSet],
+  };
+}
+export function catalogErrors(msgs: LlmMessage[]): StructuredExtraction["errors"] {
+  const tcIdx = buildToolCallIndex(msgs);
+  const errors: StructuredExtraction["errors"] = [];
+  for (let i = 0; i < msgs.length; i++) {
+    const m = msgs[i];
+    if (m.role !== "toolResult") continue;
+    const tc = tcIdx.get(m.toolCallId ?? "");
+    if (m.isError) {
+      errors.push({ index: i, tool: tc?.name ?? "unknown", message: extractText(m.content).slice(0, 500), retryAttempted: false, resolved: false });
+      continue;
+    }
+    if (tc?.name === "bash") {
+      const txt = extractText(m.content);
+      const isLikelyError = /(?:command not found|no such file|permission denied|syntax error|cannot find|module not found|compilation error|build failed|test failed)/i.test(txt);
+      if (isLikelyError && txt.length < 2000) {
+        errors.push({ index: i, tool: "bash", message: txt.slice(0, 300), retryAttempted: false, resolved: false });
+      }
+    }
+  }
+  for (const err of errors) {
+    for (let j = err.index + 1; j < Math.min(msgs.length, err.index + 6); j++) {
+      if (msgs[j]?.role === "assistant") {
+        const blocks = (msgs[j]?.content ?? []) as unknown[];
+        for (const b of blocks) {
+          const block = b as ToolCallBlock;
+          if (block?.type === "toolCall" && block.name === err.tool) {
+            err.retryAttempted = true;
+            for (let k = j + 1; k < Math.min(msgs.length, j + 10); k++) {
+              if (msgs[k]?.role === "toolResult" && msgs[k]?.toolCallId === block.id && !msgs[k]?.isError) {
+                err.resolved = true; break;
+              }
+            }
+            break;
+          }
+        }
+        if (err.retryAttempted) break;
+      }
+    }
+  }
+  return errors;
+}
+export function extractDecisions(msgs: LlmMessage[]): StructuredExtraction["decisions"] {
+  const tcIdx = buildToolCallIndex(msgs);
+  const decisions: StructuredExtraction["decisions"] = [];
+  for (const [id, tc] of tcIdx) {
+    if (tc.name !== "ask_user") continue;
+    const args = tc.arguments;
+    const question = typeof args === "string" ? args : (args?.question ?? args?.prompt ?? "") as string;
+    if (!question) continue;
+    for (let i = tc.msgIndex + 1; i < Math.min(msgs.length, tc.msgIndex + 4); i++) {
+      if (msgs[i]?.role === "toolResult" && msgs[i]?.toolCallId === id) {
+        decisions.push({ index: tc.msgIndex, type: "explicit", summary: question.slice(0, 200), userResponse: extractText(msgs[i].content).slice(0, 300) });
+        break;
+      }
+    }
+  }
+  for (let i = 0; i < msgs.length; i++) {
+    if (msgs[i]?.role !== "user") continue;
+    const txt = extractText(msgs[i].content);
+    if (CHOICE_RE.test(txt)) {
+      decisions.push({ index: i, type: "implicit", summary: txt.slice(0, 200) });
+    }
+  }
+  return decisions;
+}
+const CONSTRAINT_PATTERNS: Array<{ re: RegExp; cat: StructuredExtraction["constraints"][0]["category"]; conf: number }> = [
+  { re: /\b(?:must|need|require|has to|important)\b.*\b(?:be|use|have|include|support)\b/i, cat: "requirement", conf: 0.85 },
+  { re: /\b(?:don't|never|avoid|shouldn't|must not|do not|no\s+(?:need|want))\b/i, cat: "prohibition", conf: 0.8 },
+  { re: /\b(?:prefer|like|want|would rather|should)\b.*\b(?:use|be|have|with)\b/i, cat: "preference", conf: 0.6 },
+  // Turkish patterns — both with and without diacriticals
+  { re: /\b(?:kritik|kritikal|\u00f6nemli|onemli|\u015fart|sart|zorunlu|\u015fart ko\u015ful|\u00f6nemli \u015fart|kesinlikle|kesinlikle \u015fart|asla|sak\u0131n|sak\u0131nha|bunu yapma|b\u00f6yle olsun|b\u00f6yle yap\u0131n|\u015f\u00f6yle olsun|\u015f\u00f6yle yap\u0131n)\b/iu, cat: "requirement", conf: 0.8 },
+  { re: /\b(?:yapma|kullanma|sak\u0131n|asla\s+(?:kullanma|yapma|getirme))\b/iu, cat: "prohibition", conf: 0.8 },
+  { re: /\b(?:tercih|isterim|olsun|kullanal\u0131m|yapal\u0131m|istiyorum)\b/iu, cat: "preference", conf: 0.6 },
+];
+export function mineConstraints(msgs: LlmMessage[]): StructuredExtraction["constraints"] {
+  const constraints: StructuredExtraction["constraints"] = [];
+  for (let i = 0; i < msgs.length; i++) {
+    if (msgs[i]?.role !== "user") continue;
+    const txt = extractText(msgs[i].content);
+    if (txt.length < 10 || txt.startsWith("/")) continue;
+    for (const { re, cat, conf } of CONSTRAINT_PATTERNS) {
+      if (re.test(txt)) {
+        constraints.push({ index: i, text: txt.slice(0, 300), category: cat, confidence: conf });
+        break;
+      }
+    }
+  }
+  return constraints;
+}
+export function segmentTopicsHeuristic(msgs: LlmMessage[], pc: ProfileConfig, maxSegs = 20): StructuredExtraction["topics"] {
+  const topics: StructuredExtraction["topics"] = [];
+  let startIdx = 0, tokenAcc = 0, lastFile: string | null = null, errAcc = 0;
+  const tcIdx = buildToolCallIndex(msgs);
+  for (let i = 0; i < msgs.length; i++) {
+    const m = msgs[i];
+    const txt = extractText(m.content);
+    tokenAcc += estimateTokens(txt);
+    let brk = false;
+    let type: StructuredExtraction["topics"][0]["type"] = "exploration";
+    let primaryFile: string | null = null;
+    if (m.role === "assistant") {
+      const blocks = (m.content ?? []) as unknown[];
+      for (const b of blocks) {
+        const block = b as ToolCallBlock;
+        if (block?.type === "toolCall") {
+          const fp = (block.arguments?.path ?? block.arguments?.file_path) as string | undefined;
+          if (fp) {
+            const fn = path.basename(fp);
+            if (lastFile && fn !== lastFile && tokenAcc > pc.minChunkTokens) brk = true;
+            lastFile = fn;
+            primaryFile = fp;
+            if (block.name?.includes("write") || block.name?.includes("edit")) type = "implementation";
+            else if (block.name?.includes("read")) type = "review";
+          }
+        }
+      }
+    }
+    if (m.role === "toolResult" && m.isError) { errAcc++; type = "debugging"; }
+    if (m.role === "toolResult" && !m.isError) {
+      const tc = tcIdx.get(m.toolCallId ?? "");
+      if (tc?.name === "bash" && /error|fail/i.test(txt)) { errAcc++; type = "debugging"; }
+    }
+    if (m.role === "user" && SHIFT_RE.test(txt) && tokenAcc > pc.minChunkTokens) brk = true;
+    if (tokenAcc >= pc.maxChunkTokens) brk = true;
+    if (brk && i > startIdx && topics.length < maxSegs - 1) {
+      topics.push({ startIndex: startIdx, endIndex: i, primaryFile, type, errorDensity: errAcc });
+      startIdx = i + 1; tokenAcc = 0; lastFile = null; errAcc = 0;
+    }
+  }
+  if (startIdx < msgs.length) {
+    topics.push({ startIndex: startIdx, endIndex: msgs.length - 1, primaryFile: null, type: "exploration", errorDensity: errAcc });
+  }
+  return topics;
+}
+export function buildTimeline(msgs: LlmMessage[], errors: StructuredExtraction["errors"]): StructuredExtraction["timeline"] {
+  const timeline: StructuredExtraction["timeline"] = [];
+  const errorIndices = new Set(errors.map(e => e.index));
+  for (let i = 0; i < msgs.length; i++) {
+    const m = msgs[i];
+    if (m.role === "user") {
+      const txt = extractText(m.content);
+      if (!txt.startsWith("/")) timeline.push({ index: i, event: "user_request", summary: txt.slice(0, 150) });
+    }
+    if (errorIndices.has(i)) timeline.push({ index: i, event: "error", summary: errors.find(e => e.index === i)?.message.slice(0, 100) ?? "error" });
+  }
+  return timeline.length > 30
+    ? [...timeline.filter(t => t.event === "user_request").slice(0, 10), ...timeline.filter(t => t.event === "error")]
+    : timeline;
+}
+export function extractMainGoal(msgs: LlmMessage[]): string | null {
+  for (const m of msgs) {
+    if (m?.role !== "user") continue;
+    const txt = extractText(m.content).trim();
+    if (txt && !txt.startsWith("/")) return txt.slice(0, 300);
+  }
+  return null;
+}
+export function extractStructured(msgs: LlmMessage[], pc: ProfileConfig): StructuredExtraction {
+  const { modified, read, deleted } = trackFileOps(msgs);
+  const errors = catalogErrors(msgs);
+  const decisions = extractDecisions(msgs);
+  const constraints = mineConstraints(msgs);
+  const topics = segmentTopicsHeuristic(msgs, pc);
+  const timeline = buildTimeline(msgs, errors);
+  const mainGoal = extractMainGoal(msgs);
+  const lastUserMessages = msgs.filter(m => m.role === "user").slice(-5).map(m => extractText(m.content));
+  const lastErrors = errors.slice(-3).map(e => e.message);
+  return {
+    modifiedFiles: modified, readFiles: read, deletedFiles: deleted,
+    errors, decisions, constraints, topics, timeline,
+    mainGoal, lastUserMessages, lastErrors, messageCount: msgs.length,
+  };
+}

package/src/utils/fingerprint.ts ADDED Viewed

@@ -0,0 +1,190 @@
+/**
+ * Lightweight project fingerprint for cross-session context.
+ * Stores basic project metadata to improve compaction accuracy.
+ */
+import fs from "node:fs";
+import path from "node:path";
+import type { StructuredExtraction } from "../types.ts";
+export interface ProjectFingerprint {
+  id: string;
+  language: string;
+  framework: string | null;
+  keyDirectories: string[];
+  knownFiles: string[];
+  sessionCount: number;
+  updatedAt: number;
+}
+const FINGERPRINT_DIR = path.join(process.env.HOME ?? "/tmp", ".pi", "agent", ".cache", "smart-compact", "projects");
+// Language detection heuristics from file extensions
+const LANG_MAP: Record<string, string> = {
+  ".ts": "typescript", ".tsx": "typescript",
+  ".js": "javascript", ".jsx": "javascript",
+  ".rs": "rust",
+  ".py": "python",
+  ".go": "go",
+  ".java": "java",
+  ".rb": "ruby",
+  ".cs": "csharp",
+  ".cpp": "cpp", ".c": "c", ".h": "c",
+  ".swift": "swift",
+  ".kt": "kotlin",
+  ".php": "php",
+};
+// Framework detection from file paths and names
+const FRAMEWORK_SIGNALS: Array<{ pattern: RegExp; framework: string }> = [
+  { pattern: /next\.config/i, framework: "nextjs" },
+  { pattern: /nuxt\.config/i, framework: "nuxt" },
+  { pattern: /vite\.config/i, framework: "vite" },
+  { pattern: /astro\.config/i, framework: "astro" },
+  { pattern: /tailwind\.config/i, framework: "tailwind" },
+  { pattern: /django/i, framework: "django" },
+  { pattern: /flask/i, framework: "flask" },
+  { pattern: /cargo\.toml/i, framework: "cargo" },
+  { pattern: /go\.mod/i, framework: "go-modules" },
+  { pattern: /Gemfile/i, framework: "bundler" },
+  { pattern: /package\.json/i, framework: "node" },
+];
+function getFingerprintPath(projectId: string): string {
+  return path.join(FINGERPRINT_DIR, projectId + ".json");
+}
+/**
+ * Generate a project ID from file paths in the extraction.
+ * Uses the most common root directory as a heuristic.
+ */
+export function deriveProjectId(extraction: StructuredExtraction): string {
+  const allPaths = [
+    ...extraction.modifiedFiles.map(f => f.path),
+    ...extraction.readFiles,
+  ];
+  if (!allPaths.length) return "unknown";
+  // Find most common root directory
+  const roots = new Map<string, number>();
+  for (const p of allPaths) {
+    const parts = p.split("/");
+    const root = parts.length > 1 ? parts.slice(0, Math.min(2, parts.length - 1)).join("/") : "root";
+    roots.set(root, (roots.get(root) ?? 0) + 1);
+  }
+  const topRoot = [...roots.entries()].sort((a, b) => b[1] - a[1])[0]?.[0] ?? "unknown";
+  // Simple hash of the root
+  let hash = 0;
+  for (let i = 0; i < topRoot.length; i++) {
+    hash = ((hash << 5) - hash + topRoot.charCodeAt(i)) | 0;
+  }
+  return "proj-" + Math.abs(hash).toString(36);
+}
+/**
+ * Detect language from file extensions in extraction data.
+ */
+function detectLanguage(extraction: StructuredExtraction): string {
+  const extCounts = new Map<string, number>();
+  for (const f of extraction.modifiedFiles) {
+    const ext = path.extname(f.path).toLowerCase();
+    if (ext && LANG_MAP[ext]) {
+      extCounts.set(LANG_MAP[ext], (extCounts.get(LANG_MAP[ext]) ?? 0) + 1);
+    }
+  }
+  for (const f of extraction.readFiles) {
+    const ext = path.extname(f).toLowerCase();
+    if (ext && LANG_MAP[ext]) {
+      extCounts.set(LANG_MAP[ext], (extCounts.get(LANG_MAP[ext]) ?? 0) + 1);
+    }
+  }
+  if (!extCounts.size) return "unknown";
+  return [...extCounts.entries()].sort((a, b) => b[1] - a[1])[0][0];
+}
+/**
+ * Detect framework from file paths.
+ */
+function detectFramework(extraction: StructuredExtraction): string | null {
+  const allPaths = extraction.readFiles.join(" ") + " " + extraction.modifiedFiles.map(f => f.path).join(" ");
+  for (const { pattern, framework } of FRAMEWORK_SIGNALS) {
+    if (pattern.test(allPaths)) return framework;
+  }
+  return null;
+}
+/**
+ * Extract key directory patterns from file paths.
+ */
+function extractKeyDirs(extraction: StructuredExtraction, maxDirs = 8): string[] {
+  const dirCounts = new Map<string, number>();
+  for (const f of extraction.modifiedFiles) {
+    const parts = f.path.split("/");
+    if (parts.length > 1) {
+      const dir = parts.slice(0, -1).join("/");
+      dirCounts.set(dir, (dirCounts.get(dir) ?? 0) + 1);
+    }
+  }
+  return [...dirCounts.entries()]
+    .sort((a, b) => b[1] - a[1])
+    .slice(0, maxDirs)
+    .map(([d]) => d);
+}
+/**
+ * Load project fingerprint from cache.
+ */
+export function loadProjectFingerprint(projectId: string): ProjectFingerprint | null {
+  try {
+    const fp = getFingerprintPath(projectId);
+    if (!fs.existsSync(fp)) return null;
+    const data = JSON.parse(fs.readFileSync(fp, "utf8")) as ProjectFingerprint;
+    // Expire after 30 days
+    if (Date.now() - data.updatedAt > 30 * 24 * 60 * 60 * 1000) return null;
+    return data;
+  } catch { return null; }
+}
+/**
+ * Save/update project fingerprint after compaction.
+ */
+export function saveProjectFingerprint(
+  projectId: string,
+  extraction: StructuredExtraction,
+): void {
+  try {
+    if (!fs.existsSync(FINGERPRINT_DIR)) fs.mkdirSync(FINGERPRINT_DIR, { recursive: true });
+    const existing = loadProjectFingerprint(projectId);
+    const newKnownFiles = [...new Set([
+      ...(existing?.knownFiles ?? []),
+      ...extraction.modifiedFiles.map(f => f.path),
+      ...extraction.readFiles,
+    ])].slice(-50); // Keep last 50 unique files
+    const fingerprint: ProjectFingerprint = {
+      id: projectId,
+      language: existing?.language ?? detectLanguage(extraction),
+      framework: existing?.framework ?? detectFramework(extraction),
+      keyDirectories: extractKeyDirs(extraction),
+      knownFiles: newKnownFiles,
+      sessionCount: (existing?.sessionCount ?? 0) + 1,
+      updatedAt: Date.now(),
+    };
+    fs.writeFileSync(getFingerprintPath(projectId), JSON.stringify(fingerprint, null, 2));
+  } catch { /* best effort */ }
+}
+/**
+ * Build a project context string for injection into prompts.
+ */
+export function buildProjectContext(fingerprint: ProjectFingerprint | null): string {
+  if (!fingerprint) return "";
+  return [
+    "## Project Context (learned from " + fingerprint.sessionCount + " session(s))",
+    "Language: " + fingerprint.language,
+    fingerprint.framework ? "Framework: " + fingerprint.framework : "",
+    fingerprint.keyDirectories.length ? "Key dirs: " + fingerprint.keyDirectories.join(", ") : "",
+  ].filter(Boolean).join("\n");
+}

package/src/utils/helpers.ts ADDED Viewed

@@ -0,0 +1,161 @@
+/**
+ * General helpers: config, backup, batching, preprocessing.
+ */
+import fs from "node:fs";
+import path from "node:path";
+import crypto from "node:crypto";
+import type { CompactConfig, CompressionProfile, ChunkSummary, LlmChunk, ProfileConfig, LlmMessage, StructuredExtraction, ExplorationReport } from "../types.ts";
+import { DEFAULT_CONFIG, PROFILES } from "../constants.ts";
+let _cfg: CompactConfig | null = null;
+let _cfgMtime = 0;
+export function loadConfig(): CompactConfig {
+  try {
+    const p = path.join(process.env.HOME ?? "/tmp", ".pi/agent/settings.json");
+    const stat = fs.statSync(p);
+    if (_cfg && stat.mtimeMs === _cfgMtime) return _cfg;
+    const raw = JSON.parse(fs.readFileSync(p, "utf-8"));
+    const sc = raw.smartCompact ?? raw.semanticCompact ?? {};
+    const merged = { ...DEFAULT_CONFIG, ...sc };
+    if (sc.profiles) merged.profiles = { ...PROFILES, ...sc.profiles };
+    if (!merged.backupDir) merged.backupDir = path.join(process.env.HOME ?? "/tmp", ".pi/agent/compact-backups");
+    _cfg = merged; _cfgMtime = stat.mtimeMs; return _cfg;
+  } catch {
+    return { ...DEFAULT_CONFIG, backupDir: path.join(process.env.HOME ?? "/tmp", ".pi/agent/compact-backups") };
+  }
+}
+export function backupConversation(convText: string, sessionId: string): string | null {
+  try {
+    const cfg = loadConfig(); if (!cfg.backupEnabled) return null;
+    const dir = cfg.backupDir; fs.mkdirSync(dir, { recursive: true });
+    const ts = new Date().toISOString().replace(/[:.]/g, "-");
+    const hash = crypto.createHash("sha256").update(convText).digest("hex").slice(0, 8);
+    const fp = path.join(dir, sessionId + "-" + ts + "-" + hash + ".md");
+    fs.writeFileSync(fp, "# Smart Compact Backup\n# Date: " + new Date().toISOString() + "\n# Session: " + sessionId + "\n\n" + convText);
+    return fp;
+  } catch { return null; }
+}
+export function getPreviousCompactionContext(branch: unknown[]): string {
+  interface BranchEntry { type: string; details?: { topics?: string[]; method?: string } }
+  const compactions = branch.filter((e: BranchEntry) => e.type === "compaction");
+  if (!compactions.length) return "";
+  const last = compactions[compactions.length - 1] as BranchEntry;
+  const topics = last.details?.topics ?? [];
+  if (!topics.length) return "";
+  return "\n[IMPORTANT: Previous compaction exists (" + (last.details?.method ?? "unknown") + "). Already summarized topics: " + topics.join(", ") + ". Build upon this, don't re-summarize the same content.]";
+}
+interface SessionMessageEntry { type: "message"; id: string; message: unknown }
+export function smartKeepBoundary(msgs: SessionMessageEntry[], keepFromIndex: number): number {
+  if (keepFromIndex <= 0 || keepFromIndex >= msgs.length) return keepFromIndex;
+  const last = msgs[keepFromIndex - 1];
+  const first = msgs[keepFromIndex];
+  if (last && first) {
+    const lastText = JSON.stringify(last.message).toLowerCase();
+    const keptText = JSON.stringify(first.message).toLowerCase();
+    const fileRe = /(?:path|file)=["']([^"']+)["']/g;
+    const lastFiles = new Set([...lastText.matchAll(fileRe)].map(m => m[1].split("/").pop()));
+    fileRe.lastIndex = 0;
+    const keptFiles = new Set([...keptText.matchAll(fileRe)].map(m => m[1].split("/").pop()));
+    if ([...lastFiles].filter(f => keptFiles.has(f)).length > 0) return keepFromIndex - 1;
+  }
+  return keepFromIndex;
+}
+export function extractUserNote(args: string): string | undefined {
+  const SKIP = new Set(["verbose", "debug", "dry-run", "light", "balanced", "aggressive"]);
+  const tokens = args.trim().split(/\s+/).filter(Boolean);
+  const nonFlags = tokens.filter(t => !t.includes("/") && !SKIP.has(t.toLowerCase()));
+  return nonFlags.length > 0 ? nonFlags.join(" ") : undefined;
+}
+export function createBatches(chunks: LlmChunk[], maxTokens: number): LlmChunk[][] {
+  const batches: LlmChunk[][] = [];
+  let batch: LlmChunk[] = [], bt = 0;
+  for (const ch of chunks) {
+    if (batch.length && bt + ch.tokenEstimate > maxTokens) { batches.push(batch); batch = []; bt = 0; }
+    batch.push(ch); bt += ch.tokenEstimate;
+  }
+  if (batch.length) batches.push(batch);
+  return batches;
+}
+/**
+ * Allocate token budget per topic based on priority, error density, and recency.
+ * Topics with higher weights get more detail preserved.
+ */
+function allocateTopicBudgets(summaries: ChunkSummary[], totalBudget: number): Map<string, number> {
+  const n = summaries.length;
+  if (n === 0) return new Map();
+  const weights = summaries.map((s, i) => {
+    let w = 1.0;
+    // Priority weighting
+    if (s.priority === "critical") w *= 2.0;
+    else if (s.priority === "high") w *= 1.5;
+    else if (s.priority === "low") w *= 0.6;
+    // Error density — topics with errors need more context
+    const errorKeywords = (s.summary.match(/error|fail|bug|fix|crash|exception/gi) ?? []).length;
+    w *= (1 + errorKeywords * 0.2);
+    // Recency — later topics are more relevant
+    const recency = (i + 1) / n;
+    w *= (0.6 + recency * 0.4);
+    // Topics with decisions are important
+    if (s.keyDecisions.length > 0) w *= 1.3;
+    return w;
+  });
+  const totalWeight = weights.reduce((a, b) => a + b, 0);
+  const baseTokensPerTopic = Math.floor(totalBudget / n);
+  const budgetMap = new Map<string, number>();
+  for (let i = 0; i < summaries.length; i++) {
+    const allocated = Math.round(baseTokensPerTopic * (weights[i] / (totalWeight / n)));
+    budgetMap.set(summaries[i].topic, Math.max(200, allocated)); // minimum 200 tokens per topic
+  }
+  return budgetMap;
+}
+export function preProcessSummaries(summaries: ChunkSummary[], budgetTokens?: number) {
+  const topicBudgets = budgetTokens ? allocateTopicBudgets(summaries, budgetTokens) : null;
+  return {
+    decisions: [...new Set(summaries.flatMap(s => s.keyDecisions))],
+    modified: [...new Set(summaries.flatMap(s => s.filesModified))].sort(),
+    read: [...new Set(summaries.flatMap(s => s.filesRead))].sort(),
+    text: summaries.map((cs, i) => {
+      const budgetHint = topicBudgets?.get(cs.topic);
+      const budgetLine = budgetHint ? "\nBudget: ~" + budgetHint + " tokens" : "";
+      return "### Segment " + (i + 1) + ": " + cs.topic + "\nPriority: " + cs.priority + " | msgs " + cs.startIndex + "-" + cs.endIndex + budgetLine + "\n\n" + cs.summary + "\n\nDecisions: " + (cs.keyDecisions.join("; ") || "None") + "\nModified: " + (cs.filesModified.join(", ") || "None") + "\nRead: " + (cs.filesRead.join(", ") || "None");
+    }).join("\n---\n"),
+  };
+}
+export function buildExtractionContext(extraction: StructuredExtraction, forRange?: { start: number; end: number }): string {
+  const files = forRange ? extraction.modifiedFiles.filter(f => f.lastModifiedIndex >= forRange.start && f.lastModifiedIndex <= forRange.end) : extraction.modifiedFiles;
+  const errors = forRange ? extraction.errors.filter(e => e.index >= forRange.start && e.index <= forRange.end) : extraction.errors;
+  return [
+    "## Deterministic Extraction (verified facts)",
+    "Files modified: " + (files.map(f => f.path).join(", ") || "none"),
+    "Errors: " + (errors.map(e => "[" + e.tool + "] " + e.message.slice(0, 80) + (e.resolved ? " ✓" : "")).join("; ") || "none"),
+    "Decisions: " + (extraction.decisions.map(d => d.type + ": " + d.summary.slice(0, 60)).join("; ") || "none"),
+    "Constraints: " + (extraction.constraints.map(c => "[" + c.category + "] " + c.text.slice(0, 60)).join("; ") || "none"),
+  ].join("\n");
+}
+export function buildExplorationContext(report: ExplorationReport): string {
+  if (!report.mainGoal && !report.crossReferences.length && !report.enrichedConstraints.length) return "";
+  return [
+    "## Exploration Report",
+    "Main goal: " + report.mainGoal,
+    "Session type: " + report.sessionType,
+    report.crossReferences.length ? "Cross-references: " + report.crossReferences.join("; ") : "",
+    report.enrichedConstraints.length ? "Enriched constraints: " + report.enrichedConstraints.join("; ") : "",
+    report.statusAssessment.done.length ? "Assessed done: " + report.statusAssessment.done.join("; ") : "",
+    report.statusAssessment.inProgress.length ? "Assessed in-progress: " + report.statusAssessment.inProgress.join("; ") : "",
+    report.criticalContext.length ? "Critical context: " + report.criticalContext.join("; ") : "",
+  ].filter(Boolean).join("\n");
+}

package/src/utils/message-blocks.ts ADDED Viewed

@@ -0,0 +1,21 @@
+import type { LlmContentBlock, LlmMessage, LlmTextBlock, LlmToolCallBlock } from "../types";
+export function getBlocks(message: Pick<LlmMessage, "content">): LlmContentBlock[] {
+  return Array.isArray(message.content) ? message.content : [];
+}
+export function isTextBlock(block: LlmContentBlock): block is LlmTextBlock {
+  return typeof block !== "string" && block.type === "text";
+}
+export function isToolCallBlock(block: LlmContentBlock): block is LlmToolCallBlock {
+  return typeof block !== "string" && block.type === "toolCall";
+}
+export function getToolArgumentString(args: Record<string, unknown>, ...keys: string[]): string {
+  for (const key of keys) {
+    const value = args[key];
+    if (typeof value === "string" && value) return value;
+  }
+  return "";
+}