@freesyntax/notch-cli 0.5.13 → 0.5.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,213 @@
1
+ // src/agent/compression.ts
2
+ import { generateText } from "ai";
3
+ function estimateTokens(messages) {
4
+ let chars = 0;
5
+ for (const msg of messages) {
6
+ if (typeof msg.content === "string") {
7
+ chars += msg.content.length;
8
+ } else if (Array.isArray(msg.content)) {
9
+ for (const part of msg.content) {
10
+ if ("text" in part) chars += part.text.length;
11
+ else if ("result" in part) chars += JSON.stringify(part.result).length;
12
+ else if ("args" in part) chars += JSON.stringify(part.args).length;
13
+ }
14
+ }
15
+ }
16
+ return Math.ceil(chars / 4);
17
+ }
18
+ var TEXT_BLOCK_MAX = 8e3;
19
+ function microCompact(messages) {
20
+ const result = [];
21
+ for (let idx = 0; idx < messages.length; idx++) {
22
+ const msg = messages[idx];
23
+ if (idx === 0 || idx >= messages.length - 4) {
24
+ result.push(msg);
25
+ continue;
26
+ }
27
+ if (typeof msg.content === "string" && msg.content.length > TEXT_BLOCK_MAX && msg.role === "assistant") {
28
+ result.push({
29
+ ...msg,
30
+ content: msg.content.slice(0, TEXT_BLOCK_MAX) + "\n... [truncated]"
31
+ });
32
+ continue;
33
+ }
34
+ result.push(msg);
35
+ }
36
+ return result;
37
+ }
38
+ var RESERVE_BUFFER_TOKENS = 13e3;
39
+ var MAX_SUMMARY_TOKENS = 2e4;
40
+ var MAX_COMPRESSION_FAILURES = 3;
41
+ var compressionFailures = 0;
42
+ async function autoCompactSummarize(messages, opts) {
43
+ const threshold = opts.contextWindow - RESERVE_BUFFER_TOKENS;
44
+ const currentTokens = estimateTokens(messages);
45
+ if (currentTokens < threshold * 0.75 || messages.length < 6) {
46
+ return { messages, compressed: false };
47
+ }
48
+ if (compressionFailures >= MAX_COMPRESSION_FAILURES) {
49
+ return deterministicCompress(messages, opts.keepRecent ?? 4);
50
+ }
51
+ const keepRecent = opts.keepRecent ?? 4;
52
+ const keepStart = 1;
53
+ const head = messages.slice(0, keepStart);
54
+ const middle = messages.slice(keepStart, -keepRecent);
55
+ const tail = messages.slice(-keepRecent);
56
+ if (middle.length === 0) {
57
+ return { messages, compressed: false };
58
+ }
59
+ const middleText = summarizeMessages(middle);
60
+ let summaryText;
61
+ try {
62
+ const result = await generateText({
63
+ model: opts.model,
64
+ system: "You are a conversation summarizer. Condense the following conversation history into a brief summary. Preserve: files modified, key decisions, errors encountered, and the current task state. Be concise but thorough. Output only the summary, no preamble.",
65
+ messages: [{ role: "user", content: middleText }],
66
+ maxTokens: Math.min(1024, Math.floor(MAX_SUMMARY_TOKENS / 4))
67
+ });
68
+ summaryText = result.text;
69
+ compressionFailures = 0;
70
+ } catch {
71
+ compressionFailures++;
72
+ return deterministicCompress(messages, keepRecent);
73
+ }
74
+ return buildCompressedHistory(head, summaryText, tail);
75
+ }
76
+ async function fullCompact(messages, model) {
77
+ if (messages.length < 4) {
78
+ return { messages, compressed: false };
79
+ }
80
+ const allButLast2 = messages.slice(0, -2);
81
+ const last2 = messages.slice(-2);
82
+ const middleText = summarizeMessages(allButLast2);
83
+ let summaryText;
84
+ try {
85
+ const result = await generateText({
86
+ model,
87
+ system: "Compress this entire conversation into a dense summary. Include: the original task, all files created/modified, key decisions, current state, and any unresolved issues. Max 500 words.",
88
+ messages: [{ role: "user", content: middleText }],
89
+ maxTokens: 2048
90
+ });
91
+ summaryText = result.text;
92
+ } catch {
93
+ summaryText = buildDeterministicSummary(allButLast2);
94
+ }
95
+ return buildCompressedHistory([], summaryText, last2);
96
+ }
97
+ function deterministicCompress(messages, keepRecent) {
98
+ const head = messages.slice(0, 1);
99
+ const middle = messages.slice(1, -keepRecent);
100
+ const tail = messages.slice(-keepRecent);
101
+ if (middle.length === 0) return { messages, compressed: false };
102
+ const summaryText = buildDeterministicSummary(middle);
103
+ return buildCompressedHistory(head, summaryText, tail);
104
+ }
105
+ function buildDeterministicSummary(messages) {
106
+ const filesModified = /* @__PURE__ */ new Set();
107
+ const toolsUsed = /* @__PURE__ */ new Set();
108
+ const userRequests = [];
109
+ let errorCount = 0;
110
+ for (const msg of messages) {
111
+ if (msg.role === "user" && typeof msg.content === "string") {
112
+ userRequests.push(msg.content.slice(0, 100));
113
+ }
114
+ if (Array.isArray(msg.content)) {
115
+ for (const part of msg.content) {
116
+ if ("toolName" in part) {
117
+ const p = part;
118
+ toolsUsed.add(String(p.toolName));
119
+ const args = p.args;
120
+ if (args?.path) filesModified.add(String(args.path));
121
+ }
122
+ if ("result" in part) {
123
+ const r = part;
124
+ const res = r.result;
125
+ if (res?.isError) errorCount++;
126
+ }
127
+ }
128
+ }
129
+ }
130
+ const lines = ["Summary of previous conversation:"];
131
+ if (userRequests.length > 0) lines.push(`- User requests: ${userRequests.join("; ")}`);
132
+ if (toolsUsed.size > 0) lines.push(`- Tools used: ${[...toolsUsed].join(", ")}`);
133
+ if (filesModified.size > 0) lines.push(`- Files touched: ${[...filesModified].join(", ")}`);
134
+ if (errorCount > 0) lines.push(`- Errors encountered: ${errorCount}`);
135
+ lines.push(`- Total messages summarized: ${messages.length}`);
136
+ return lines.join("\n");
137
+ }
138
+ function summarizeMessages(messages) {
139
+ const lines = [];
140
+ for (const msg of messages) {
141
+ const role = msg.role.toUpperCase();
142
+ if (typeof msg.content === "string") {
143
+ lines.push(`${role}: ${msg.content.slice(0, 500)}`);
144
+ } else if (Array.isArray(msg.content)) {
145
+ const parts = [];
146
+ for (const part of msg.content) {
147
+ if ("text" in part) parts.push(part.text.slice(0, 200));
148
+ else if ("toolName" in part) parts.push(`[tool: ${part.toolName}]`);
149
+ else if ("result" in part) parts.push(`[result: ${JSON.stringify(part.result).slice(0, 100)}]`);
150
+ }
151
+ lines.push(`${role}: ${parts.join(" | ")}`);
152
+ }
153
+ }
154
+ return lines.join("\n");
155
+ }
156
+ function buildCompressedHistory(head, summaryText, tail) {
157
+ const compressed = [...head];
158
+ const summaryContent = `[Previous conversation context]
159
+ ${summaryText}
160
+ [End of context]`;
161
+ if (tail.length > 0 && tail[0].role === "user") {
162
+ const firstContent = typeof tail[0].content === "string" ? tail[0].content : "";
163
+ compressed.push({
164
+ role: "user",
165
+ content: `${summaryContent}
166
+
167
+ ---
168
+
169
+ ${firstContent}`
170
+ });
171
+ compressed.push(...tail.slice(1));
172
+ } else {
173
+ compressed.push({ role: "user", content: summaryContent });
174
+ compressed.push({
175
+ role: "assistant",
176
+ content: "Understood. I have the context from our previous conversation. Continuing."
177
+ });
178
+ compressed.push(...tail);
179
+ }
180
+ return { messages: compressed, compressed: true };
181
+ }
182
+ async function autoCompress(messages, model, contextWindow, onCompress) {
183
+ let result = microCompact(messages);
184
+ const threshold = (contextWindow - RESERVE_BUFFER_TOKENS) * 0.75;
185
+ let tokens = estimateTokens(result);
186
+ if (tokens < threshold) return result;
187
+ const auto = await autoCompactSummarize(result, { model, contextWindow });
188
+ if (auto.compressed) {
189
+ onCompress?.();
190
+ result = auto.messages;
191
+ tokens = estimateTokens(result);
192
+ }
193
+ if (tokens < threshold) return result;
194
+ const full = await fullCompact(result, model);
195
+ if (full.compressed) {
196
+ onCompress?.();
197
+ result = full.messages;
198
+ }
199
+ return result;
200
+ }
201
+ async function compressHistory(messages, opts) {
202
+ const result = await autoCompactSummarize(messages, opts);
203
+ return result;
204
+ }
205
+
206
+ export {
207
+ estimateTokens,
208
+ microCompact,
209
+ autoCompactSummarize,
210
+ fullCompact,
211
+ autoCompress,
212
+ compressHistory
213
+ };
@@ -0,0 +1,17 @@
1
+ import {
2
+ autoCompactSummarize,
3
+ autoCompress,
4
+ compressHistory,
5
+ estimateTokens,
6
+ fullCompact,
7
+ microCompact
8
+ } from "./chunk-6M6CXXWR.js";
9
+ import "./chunk-3RG5ZIWI.js";
10
+ export {
11
+ autoCompactSummarize,
12
+ autoCompress,
13
+ compressHistory,
14
+ estimateTokens,
15
+ fullCompact,
16
+ microCompact
17
+ };
package/dist/index.js CHANGED
@@ -7,7 +7,7 @@ import {
7
7
  import {
8
8
  autoCompress,
9
9
  estimateTokens
10
- } from "./chunk-MWM5TFY4.js";
10
+ } from "./chunk-6M6CXXWR.js";
11
11
  import {
12
12
  __require
13
13
  } from "./chunk-3RG5ZIWI.js";
@@ -77,15 +77,6 @@ var MODEL_CATALOG = {
77
77
  maxOutputTokens: 16384,
78
78
  baseUrl: "https://cutmob--notch-serve-solace-notchsolaceserver-serve.modal.run/v1"
79
79
  },
80
- "notch-forge-lite": {
81
- id: "notch-forge-lite",
82
- label: "Forge Lite",
83
- size: "9B",
84
- gpu: "L4",
85
- contextWindow: 131072,
86
- maxOutputTokens: 16384,
87
- baseUrl: "https://cutmob--notch-serve-forge-lite-notchforgeliteserver-serve.modal.run/v1"
88
- },
89
80
  "notch-solace-lite": {
90
81
  id: "notch-solace-lite",
91
82
  label: "Solace Lite",
@@ -417,36 +408,94 @@ var editTool = {
417
408
  };
418
409
 
419
410
  // src/tools/shell.ts
420
- import { execSync } from "child_process";
411
+ import { execFile, exec } from "child_process";
412
+ import { promisify } from "util";
421
413
  import path5 from "path";
422
414
  import { z as z4 } from "zod";
415
+ var execFileAsync = promisify(execFile);
416
+ var execAsync = promisify(exec);
423
417
  var BLOCKED_PATTERNS = [
424
418
  /rm\s+-rf\s+\/(?!\S)/,
425
419
  // rm -rf /
426
420
  /mkfs\./,
421
+ // format filesystem
427
422
  /dd\s+if=.*of=\/dev/,
423
+ // raw disk write
428
424
  /:\(\)\s*\{.*:\|:.*\}/,
429
- // fork bomb variants
430
- /chmod\s+-R\s+777\s+\//
425
+ // fork bomb
426
+ /chmod\s+-R\s+777\s+\//,
431
427
  // recursive chmod on root
428
+ /curl\s.*\|\s*(?:ba)?sh/,
429
+ // curl | sh (remote code execution)
430
+ /wget\s.*\|\s*(?:ba)?sh/,
431
+ // wget | sh
432
+ />\s*\/dev\/sd[a-z]/,
433
+ // overwrite disk device
434
+ /shutdown|reboot|init\s+[06]/,
435
+ // system shutdown/reboot
436
+ /rm\s+-rf\s+~\//
437
+ // rm -rf ~/
432
438
  ];
433
439
  var DESTRUCTIVE_PATTERNS = [
434
440
  /rm\s+-rf/,
435
441
  /rm\s+-r\s/,
436
442
  /git\s+push\s+--force(?!\s+--with-lease)/,
437
443
  /git\s+reset\s+--hard/,
444
+ /git\s+clean\s+-f/,
445
+ /git\s+checkout\s+--\s*\./,
438
446
  /DROP\s+(TABLE|DATABASE)/i,
439
447
  /TRUNCATE/i,
440
- />\s*\/dev\/sd/
448
+ />\s*\/dev\/sd/,
449
+ /docker\s+(rm|rmi|system\s+prune)/,
450
+ /kubectl\s+delete/,
451
+ /npm\s+unpublish/
452
+ ];
453
+ var BLOCKED_ENV_PATTERNS = [
454
+ /\bLD_PRELOAD=/,
455
+ /\bLD_LIBRARY_PATH=/,
456
+ /\bDYLD_INSERT_LIBRARIES=/,
457
+ /\bPATH=\//,
458
+ // Setting PATH to absolute (could shadow binaries)
459
+ /\bHOME=\//,
460
+ /\bSHELL=/
441
461
  ];
442
462
  var MAX_OUTPUT = 5e4;
443
463
  var DEFAULT_TIMEOUT = 3e4;
444
464
  var MAX_TIMEOUT = 6e5;
445
465
  var parameters4 = z4.object({
446
466
  command: z4.string().describe("Shell command to execute"),
447
- timeout: z4.number().optional().describe("Timeout in ms (default 30s, max configurable up to 10m)")
467
+ timeout: z4.number().optional().describe("Timeout in ms (default 30s, max 10m)")
448
468
  });
449
469
  function validateCommand(command, cwd) {
470
+ for (const pattern of BLOCKED_PATTERNS) {
471
+ if (pattern.test(command)) {
472
+ return `Blocked: this command is too dangerous to execute.`;
473
+ }
474
+ }
475
+ for (const pattern of BLOCKED_ENV_PATTERNS) {
476
+ if (pattern.test(command)) {
477
+ return `Blocked: command attempts to override a protected environment variable.`;
478
+ }
479
+ }
480
+ const pipeSegments = command.split(/\s*\|\s*/);
481
+ for (const segment of pipeSegments) {
482
+ for (const pattern of BLOCKED_PATTERNS) {
483
+ if (pattern.test(segment.trim())) {
484
+ return `Blocked: a pipe segment contains a dangerous command.`;
485
+ }
486
+ }
487
+ }
488
+ const subCommands = [
489
+ ...command.matchAll(/\$\(([^)]+)\)/g),
490
+ ...command.matchAll(/`([^`]+)`/g)
491
+ ];
492
+ for (const match2 of subCommands) {
493
+ for (const pattern of BLOCKED_PATTERNS) {
494
+ if (pattern.test(match2[1])) {
495
+ return `Blocked: command substitution contains a dangerous command.`;
496
+ }
497
+ }
498
+ }
450
499
  const fileOpRegex = /(?:^|\s)(?:>|>>|cat|cp|mv|ln|tee|tar|zip|scp|rsync|chmod|chown|rm)\s+(\/(?!tmp\b|dev\/null\b)[^\s]+)/g;
451
500
  let match;
452
501
  while ((match = fileOpRegex.exec(command)) !== null) {
@@ -456,10 +505,19 @@ function validateCommand(command, cwd) {
456
505
  }
457
506
  }
458
507
  if (/(?:^|\s)(?:\.\.\/){3,}/.test(command)) {
459
- return "Blocked: deep path traversal detected";
508
+ return "Blocked: deep path traversal detected.";
460
509
  }
461
510
  return null;
462
511
  }
512
+ function isDestructive(command) {
513
+ const segments = [command, ...command.split(/\s*\|\s*/)];
514
+ for (const segment of segments) {
515
+ for (const pattern of DESTRUCTIVE_PATTERNS) {
516
+ if (pattern.test(segment)) return true;
517
+ }
518
+ }
519
+ return false;
520
+ }
463
521
  var shellTool = {
464
522
  name: "shell",
465
523
  description: "Execute a shell command in the project directory. Dangerous commands (rm -rf, DROP TABLE, git push --force) require confirmation. Some destructive system commands are blocked entirely.",
@@ -468,50 +526,38 @@ var shellTool = {
468
526
  const { command } = params;
469
527
  const maxTimeout = ctx.shellTimeout ?? MAX_TIMEOUT;
470
528
  const timeout = Math.min(params.timeout ?? DEFAULT_TIMEOUT, maxTimeout);
471
- for (const pattern of BLOCKED_PATTERNS) {
472
- if (pattern.test(command)) {
473
- return {
474
- content: `Blocked: "${command}" is too dangerous to execute.`,
475
- isError: true
476
- };
477
- }
529
+ const validationError = validateCommand(command, ctx.cwd);
530
+ if (validationError) {
531
+ return { content: validationError, isError: true };
478
532
  }
479
- const pathError = validateCommand(command, ctx.cwd);
480
- if (pathError) {
481
- return { content: pathError, isError: true };
482
- }
483
- if (ctx.requireConfirm) {
484
- for (const pattern of DESTRUCTIVE_PATTERNS) {
485
- if (pattern.test(command)) {
486
- const confirmed = await ctx.confirm(
487
- `\u26A0 Destructive command: ${command}
533
+ if (ctx.requireConfirm && isDestructive(command)) {
534
+ const confirmed = await ctx.confirm(
535
+ `\u26A0 Destructive command: ${command}
488
536
  Proceed?`
489
- );
490
- if (!confirmed) {
491
- return { content: "Command cancelled by user.", isError: true };
492
- }
493
- break;
494
- }
537
+ );
538
+ if (!confirmed) {
539
+ return { content: "Command cancelled by user.", isError: true };
495
540
  }
496
541
  }
497
542
  try {
498
- const output = execSync(command, {
543
+ const { stdout, stderr } = await execAsync(command, {
499
544
  cwd: ctx.cwd,
500
545
  encoding: "utf-8",
501
546
  timeout,
502
547
  maxBuffer: 10 * 1024 * 1024,
503
- env: { ...process.env, FORCE_COLOR: "0" },
504
- stdio: ["pipe", "pipe", "pipe"]
548
+ env: { ...process.env, FORCE_COLOR: "0" }
505
549
  });
506
- const trimmed = output.length > MAX_OUTPUT ? output.slice(0, MAX_OUTPUT) + `
507
- ... (truncated, ${output.length} chars total)` : output;
550
+ const combined = [stdout, stderr].filter(Boolean).join("\n");
551
+ const trimmed = combined.length > MAX_OUTPUT ? combined.slice(0, MAX_OUTPUT) + `
552
+ ... (truncated, ${combined.length} chars total)` : combined;
508
553
  return { content: trimmed || "(no output)" };
509
554
  } catch (err) {
510
- const stderr = err.stderr?.toString() ?? "";
511
- const stdout = err.stdout?.toString() ?? "";
555
+ const e = err;
556
+ const stderr = e.stderr ?? "";
557
+ const stdout = e.stdout ?? "";
512
558
  const combined = [stdout, stderr].filter(Boolean).join("\n");
513
559
  const trimmed = combined.length > MAX_OUTPUT ? combined.slice(0, MAX_OUTPUT) + "\n... (truncated)" : combined;
514
- if (err.killed && err.signal === "SIGTERM") {
560
+ if (e.killed && e.signal === "SIGTERM") {
515
561
  return {
516
562
  content: `Command timed out after ${(timeout / 1e3).toFixed(0)}s: ${command}
517
563
 
@@ -521,8 +567,8 @@ ${trimmed || "(none)"}`,
521
567
  };
522
568
  }
523
569
  return {
524
- content: `Command failed (exit ${err.status ?? "unknown"}):
525
- ${trimmed || err.message}`,
570
+ content: `Command failed (exit ${e.status ?? e.code ?? "unknown"}):
571
+ ${trimmed || e.message || "Unknown error"}`,
526
572
  isError: true
527
573
  };
528
574
  }
@@ -666,7 +712,7 @@ Proceed?`
666
712
  };
667
713
 
668
714
  // src/tools/grep.ts
669
- import { execSync as execSync2 } from "child_process";
715
+ import { execSync } from "child_process";
670
716
  import fs5 from "fs/promises";
671
717
  import path6 from "path";
672
718
  import { z as z6 } from "zod";
@@ -696,7 +742,7 @@ var grepTool = {
696
742
  JSON.stringify(params.pattern),
697
743
  searchPath
698
744
  ].filter(Boolean).join(" ");
699
- const output = execSync2(rgArgs, {
745
+ const output = execSync(rgArgs, {
700
746
  cwd: ctx.cwd,
701
747
  encoding: "utf-8",
702
748
  timeout: 15e3,
@@ -2404,6 +2450,25 @@ async function updateIndex() {
2404
2450
  }
2405
2451
 
2406
2452
  // src/agent/loop.ts
2453
+ var MAX_RETRIES = 3;
2454
+ var RETRY_DELAYS = [1e3, 3e3, 8e3];
2455
+ function isRetryableError(err) {
2456
+ if (!(err instanceof Error)) return false;
2457
+ const msg = err.message.toLowerCase();
2458
+ return msg.includes("502") || msg.includes("503") || msg.includes("429") || msg.includes("rate limit") || msg.includes("timeout") || msg.includes("econnreset") || msg.includes("econnrefused") || msg.includes("fetch failed") || msg.includes("network") || msg.includes("aborted");
2459
+ }
2460
+ async function sleep(ms) {
2461
+ return new Promise((resolve2) => setTimeout(resolve2, ms));
2462
+ }
2463
+ function getErrorSignature(toolName, result) {
2464
+ return {
2465
+ toolName,
2466
+ errorPrefix: result.slice(0, 120)
2467
+ };
2468
+ }
2469
+ function signaturesMatch(a, b) {
2470
+ return a.toolName === b.toolName && a.errorPrefix === b.errorPrefix;
2471
+ }
2407
2472
  async function runAgentLoop(messages, config) {
2408
2473
  const readCache = /* @__PURE__ */ new Map();
2409
2474
  const toolCtxWithCache = {
@@ -2418,6 +2483,8 @@ async function runAgentLoop(messages, config) {
2418
2483
  let totalPromptTokens = 0;
2419
2484
  let totalCompletionTokens = 0;
2420
2485
  let wasCompressed = false;
2486
+ const recentErrors = [];
2487
+ const MAX_REPEATED_ERRORS = 3;
2421
2488
  let history = [...messages];
2422
2489
  await config.toolContext.runHook?.("pre-compact", { messageCount: history.length });
2423
2490
  history = await autoCompress(history, config.model, contextWindow, () => {
@@ -2429,53 +2496,104 @@ async function runAgentLoop(messages, config) {
2429
2496
  }
2430
2497
  while (iterations < maxIter) {
2431
2498
  iterations++;
2432
- const result = streamText({
2433
- model: config.model,
2434
- system: config.systemPrompt,
2435
- messages: history,
2436
- tools,
2437
- maxSteps: 1
2438
- // We manage the loop ourselves for better control
2439
- });
2440
2499
  let fullText = "";
2441
2500
  const toolCalls = [];
2442
2501
  const toolResults = [];
2443
- for await (const event of result.fullStream) {
2444
- if (event.type === "text-delta") {
2445
- fullText += event.textDelta;
2446
- config.onTextChunk?.(event.textDelta);
2447
- } else if (event.type === "tool-call") {
2448
- toolCalls.push({
2449
- toolCallId: event.toolCallId,
2450
- toolName: event.toolName,
2451
- args: event.args
2502
+ let streamUsage = null;
2503
+ let lastError = null;
2504
+ for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) {
2505
+ if (attempt > 0) {
2506
+ const delay = RETRY_DELAYS[Math.min(attempt - 1, RETRY_DELAYS.length - 1)];
2507
+ config.onRetry?.(attempt, lastError instanceof Error ? lastError.message : "unknown");
2508
+ await sleep(delay);
2509
+ }
2510
+ try {
2511
+ fullText = "";
2512
+ toolCalls.length = 0;
2513
+ toolResults.length = 0;
2514
+ const result = streamText({
2515
+ model: config.model,
2516
+ system: config.systemPrompt,
2517
+ messages: history,
2518
+ tools,
2519
+ maxSteps: 1
2452
2520
  });
2453
- config.onToolCall?.(event.toolName, event.args);
2454
- }
2455
- const evt = event;
2456
- if (evt.type === "tool-result") {
2457
- const res = evt.result;
2458
- toolResults.push({
2459
- toolCallId: evt.toolCallId,
2460
- result: evt.result
2521
+ for await (const event of result.fullStream) {
2522
+ if (event.type === "text-delta") {
2523
+ fullText += event.textDelta;
2524
+ config.onTextChunk?.(event.textDelta);
2525
+ } else if (event.type === "tool-call") {
2526
+ toolCalls.push({
2527
+ toolCallId: event.toolCallId,
2528
+ toolName: event.toolName,
2529
+ args: event.args
2530
+ });
2531
+ config.onToolCall?.(event.toolName, event.args);
2532
+ }
2533
+ const evt = event;
2534
+ if (evt.type === "tool-result") {
2535
+ const res = evt.result;
2536
+ toolResults.push({
2537
+ toolCallId: evt.toolCallId,
2538
+ result: evt.result
2539
+ });
2540
+ config.onToolResult?.(
2541
+ toolCalls.find((tc) => tc.toolCallId === evt.toolCallId)?.toolName ?? "unknown",
2542
+ res?.content ?? String(evt.result),
2543
+ res?.isError ?? false
2544
+ );
2545
+ }
2546
+ }
2547
+ try {
2548
+ const u = await result.usage;
2549
+ if (u) streamUsage = u;
2550
+ } catch {
2551
+ }
2552
+ lastError = null;
2553
+ break;
2554
+ } catch (err) {
2555
+ lastError = err;
2556
+ if (attempt < MAX_RETRIES && isRetryableError(err)) {
2557
+ continue;
2558
+ }
2559
+ const errMsg = err instanceof Error ? err.message : String(err);
2560
+ history.push({
2561
+ role: "assistant",
2562
+ content: `[Error: ${errMsg}. The model endpoint may be unavailable. Try again or switch models with /model.]`
2461
2563
  });
2462
- config.onToolResult?.(
2463
- toolCalls.find((tc) => tc.toolCallId === evt.toolCallId)?.toolName ?? "unknown",
2464
- res?.content ?? String(evt.result),
2465
- res?.isError ?? false
2466
- );
2564
+ return {
2565
+ text: `[Error: ${errMsg}]`,
2566
+ messages: history,
2567
+ iterations,
2568
+ toolCallCount: totalToolCalls,
2569
+ compressed: wasCompressed,
2570
+ usage: {
2571
+ promptTokens: totalPromptTokens,
2572
+ completionTokens: totalCompletionTokens,
2573
+ totalTokens: totalPromptTokens + totalCompletionTokens
2574
+ }
2575
+ };
2467
2576
  }
2468
2577
  }
2469
- try {
2470
- const u = await result.usage;
2471
- if (u) {
2472
- totalPromptTokens += u.promptTokens ?? 0;
2473
- totalCompletionTokens += u.completionTokens ?? 0;
2474
- }
2475
- } catch {
2578
+ if (streamUsage) {
2579
+ totalPromptTokens += streamUsage.promptTokens ?? 0;
2580
+ totalCompletionTokens += streamUsage.completionTokens ?? 0;
2476
2581
  }
2477
2582
  totalToolCalls += toolCalls.length;
2478
2583
  if (toolCalls.length > 0) {
2584
+ let hasRepeatedError = false;
2585
+ for (const tr of toolResults) {
2586
+ const res = tr.result;
2587
+ if (res?.isError) {
2588
+ const toolName = toolCalls.find((tc) => tc.toolCallId === tr.toolCallId)?.toolName ?? "unknown";
2589
+ const sig = getErrorSignature(toolName, res.content ?? "");
2590
+ const repeated = recentErrors.filter((e) => signaturesMatch(e, sig)).length;
2591
+ recentErrors.push(sig);
2592
+ if (repeated >= MAX_REPEATED_ERRORS - 1) {
2593
+ hasRepeatedError = true;
2594
+ }
2595
+ }
2596
+ }
2479
2597
  history.push({
2480
2598
  role: "assistant",
2481
2599
  content: [
@@ -2497,6 +2615,13 @@ async function runAgentLoop(messages, config) {
2497
2615
  result: tr.result
2498
2616
  }))
2499
2617
  });
2618
+ if (hasRepeatedError) {
2619
+ history.push({
2620
+ role: "user",
2621
+ content: "[System: You have repeated the same failing tool call multiple times. Stop and try a different approach. If the tool is unavailable or the command keeps failing, explain the issue to the user instead of retrying.]"
2622
+ });
2623
+ recentErrors.length = 0;
2624
+ }
2500
2625
  if (iterations % 5 === 0) {
2501
2626
  const prevLen = history.length;
2502
2627
  await config.toolContext.runHook?.("pre-compact", { messageCount: prevLen });
@@ -2566,7 +2691,8 @@ async function buildSystemPrompt(projectRoot, modelId) {
2566
2691
  "- Explain what you're doing before making changes.",
2567
2692
  "- If a task is complex, break it into steps.",
2568
2693
  "- When running shell commands, prefer non-destructive operations.",
2569
- "- If you encounter an error, analyze it and suggest a fix."
2694
+ "- If you encounter an error, analyze it and suggest a fix.",
2695
+ "- If the same tool call keeps failing, stop retrying and try a different approach."
2570
2696
  ];
2571
2697
  try {
2572
2698
  const instructions = await loadProjectInstructions(projectRoot);
@@ -2755,12 +2881,12 @@ async function withRetry(fn, options = {}) {
2755
2881
  delay += Math.random() * delay * 0.5;
2756
2882
  }
2757
2883
  options.onRetry?.(attempt, err, delay);
2758
- await sleep(delay);
2884
+ await sleep2(delay);
2759
2885
  }
2760
2886
  }
2761
2887
  throw lastError;
2762
2888
  }
2763
- function sleep(ms) {
2889
+ function sleep2(ms) {
2764
2890
  return new Promise((resolve2) => setTimeout(resolve2, ms));
2765
2891
  }
2766
2892
 
@@ -3054,20 +3180,18 @@ function isPlanComplete(plan) {
3054
3180
  // src/agent/cost.ts
3055
3181
  import chalk4 from "chalk";
3056
3182
  var MODEL_COSTS = {
3057
- "notch-cinder": { input: 0.05, output: 0.15 },
3058
- // L4 cheapest
3059
- "notch-forge": { input: 0.1, output: 0.3 },
3060
- // L40S
3061
- "notch-pyre": { input: 0.3, output: 0.9 },
3062
- // A100
3063
- "notch-ignis": { input: 0.4, output: 1.2 },
3064
- // A100
3065
- "notch-solace": { input: 0.35, output: 1 },
3066
- // A100 Gemma 4 31B
3067
- "notch-forge-lite": { input: 0.08, output: 0.25 },
3068
- // L4 INT8
3069
- "notch-solace-lite": { input: 0.04, output: 0.12 }
3070
- // L4 — Gemma 4 E4B
3183
+ "notch-cinder": { input: 1.59, output: 7.66 },
3184
+ // L4 (benchmarked 2026-04-02)
3185
+ "notch-forge": { input: 3.17, output: 12.32 },
3186
+ // L40S (benchmarked 2026-04-02)
3187
+ "notch-pyre": { input: 4.34, output: 15.42 },
3188
+ // A100-80GB (benchmarked 2026-04-02)
3189
+ "notch-ignis": { input: 2.86, output: 25.7 },
3190
+ // A100-80GB (benchmarked 2026-04-02)
3191
+ "notch-solace": { input: 4.63, output: 36.15 },
3192
+ // A100-80GB (benchmarked 2026-04-06)
3193
+ "notch-solace-lite": { input: 1.11, output: 10.28 }
3194
+ // L4 (benchmarked 2026-04-05)
3071
3195
  };
3072
3196
  var CostTracker = class {
3073
3197
  entries = [];
@@ -4298,7 +4422,7 @@ function formatTokens(n) {
4298
4422
  import fs15 from "fs/promises";
4299
4423
  import path18 from "path";
4300
4424
  import os4 from "os";
4301
- import { execSync as execSync3 } from "child_process";
4425
+ import { execSync as execSync2 } from "child_process";
4302
4426
  import chalk8 from "chalk";
4303
4427
  var CACHE_FILE = path18.join(os4.homedir(), ".notch", "update-check.json");
4304
4428
  var CHECK_INTERVAL = 0;
@@ -4337,7 +4461,7 @@ function autoUpdate(current, latest) {
4337
4461
  \u2B06 Updating Notch CLI: ${current} \u2192 ${latest}...
4338
4462
  `));
4339
4463
  try {
4340
- execSync3(`npm install -g ${PACKAGE_NAME}@${latest}`, {
4464
+ execSync2(`npm install -g ${PACKAGE_NAME}@${latest}`, {
4341
4465
  stdio: "inherit",
4342
4466
  timeout: 6e4
4343
4467
  });
@@ -4456,7 +4580,7 @@ function mergePermissions(base, override) {
4456
4580
  }
4457
4581
 
4458
4582
  // src/hooks/index.ts
4459
- import { execSync as execSync4 } from "child_process";
4583
+ import { execSync as execSync3 } from "child_process";
4460
4584
  import fs17 from "fs/promises";
4461
4585
  import { watch } from "fs";
4462
4586
  import path20 from "path";
@@ -4564,7 +4688,7 @@ async function executeHook(hook, context) {
4564
4688
  NOTCH_CWD: context.cwd
4565
4689
  };
4566
4690
  try {
4567
- const output = execSync4(hook.command, {
4691
+ const output = execSync3(hook.command, {
4568
4692
  cwd: context.cwd,
4569
4693
  encoding: "utf-8",
4570
4694
  timeout: hook.timeout ?? 1e4,
@@ -4936,7 +5060,7 @@ function findSync(oldLines, newLines, oi, ni, lookAhead) {
4936
5060
  }
4937
5061
 
4938
5062
  // src/commands/doctor.ts
4939
- import { execSync as execSync5 } from "child_process";
5063
+ import { execSync as execSync4 } from "child_process";
4940
5064
  import fs20 from "fs/promises";
4941
5065
  import path23 from "path";
4942
5066
  import os8 from "os";
@@ -4953,7 +5077,7 @@ async function runDiagnostics(cwd) {
4953
5077
  results.push({ name: "Node.js", status: "fail", message: `v${nodeVersion} (requires >= 18)` });
4954
5078
  }
4955
5079
  try {
4956
- const gitVersion = execSync5("git --version", { encoding: "utf-8", timeout: 5e3 }).trim();
5080
+ const gitVersion = execSync4("git --version", { encoding: "utf-8", timeout: 5e3 }).trim();
4957
5081
  results.push({ name: "Git", status: "ok", message: gitVersion });
4958
5082
  } catch {
4959
5083
  results.push({ name: "Git", status: "fail", message: "Not found. Install git to use git tools." });
@@ -5055,23 +5179,23 @@ registerCommand("/doctor", async (_args, ctx) => {
5055
5179
  });
5056
5180
 
5057
5181
  // src/commands/copy.ts
5058
- import { execSync as execSync6 } from "child_process";
5182
+ import { execSync as execSync5 } from "child_process";
5059
5183
  import chalk11 from "chalk";
5060
5184
  function copyToClipboard(text) {
5061
5185
  try {
5062
5186
  const platform = process.platform;
5063
5187
  if (platform === "win32") {
5064
- execSync6("clip.exe", { input: text, timeout: 5e3 });
5188
+ execSync5("clip.exe", { input: text, timeout: 5e3 });
5065
5189
  } else if (platform === "darwin") {
5066
- execSync6("pbcopy", { input: text, timeout: 5e3 });
5190
+ execSync5("pbcopy", { input: text, timeout: 5e3 });
5067
5191
  } else {
5068
5192
  try {
5069
- execSync6("xclip -selection clipboard", { input: text, timeout: 5e3 });
5193
+ execSync5("xclip -selection clipboard", { input: text, timeout: 5e3 });
5070
5194
  } catch {
5071
5195
  try {
5072
- execSync6("xsel --clipboard --input", { input: text, timeout: 5e3 });
5196
+ execSync5("xsel --clipboard --input", { input: text, timeout: 5e3 });
5073
5197
  } catch {
5074
- execSync6("wl-copy", { input: text, timeout: 5e3 });
5198
+ execSync5("wl-copy", { input: text, timeout: 5e3 });
5075
5199
  }
5076
5200
  }
5077
5201
  }
@@ -5117,7 +5241,7 @@ registerCommand("/btw", async (args, ctx) => {
5117
5241
  });
5118
5242
 
5119
5243
  // src/commands/security-review.ts
5120
- import { execFileSync as execFileSync2, execSync as execSync7 } from "child_process";
5244
+ import { execFileSync as execFileSync2, execSync as execSync6 } from "child_process";
5121
5245
  import chalk13 from "chalk";
5122
5246
  function isValidGitRange(range) {
5123
5247
  return /^[a-zA-Z0-9._~^\/\-]+(\.\.[a-zA-Z0-9._~^\/\-]+)?$/.test(range);
@@ -5145,12 +5269,12 @@ registerCommand("/security-review", async (args, ctx) => {
5145
5269
  }).trim();
5146
5270
  } catch {
5147
5271
  try {
5148
- stat = execSync7("git diff --stat", {
5272
+ stat = execSync6("git diff --stat", {
5149
5273
  cwd: ctx.cwd,
5150
5274
  encoding: "utf-8",
5151
5275
  timeout: 1e4
5152
5276
  }).trim();
5153
- diff = execSync7("git diff", {
5277
+ diff = execSync6("git diff", {
5154
5278
  cwd: ctx.cwd,
5155
5279
  encoding: "utf-8",
5156
5280
  timeout: 1e4,
@@ -5422,7 +5546,7 @@ Read the file first, then make the change. Only modify this one file.`
5422
5546
  });
5423
5547
 
5424
5548
  // src/commands/plugin.ts
5425
- import { execSync as execSync8, execFileSync as execFileSync3 } from "child_process";
5549
+ import { execSync as execSync7, execFileSync as execFileSync3 } from "child_process";
5426
5550
  import fs21 from "fs/promises";
5427
5551
  import path24 from "path";
5428
5552
  import os9 from "os";
@@ -5491,7 +5615,7 @@ registerCommand("/plugin", async (args, ctx) => {
5491
5615
  try {
5492
5616
  const pkgExists = await fs21.access(path24.join(pluginDir, "package.json")).then(() => true).catch(() => false);
5493
5617
  if (pkgExists) {
5494
- execSync8("npm install --production", {
5618
+ execSync7("npm install --production", {
5495
5619
  cwd: pluginDir,
5496
5620
  encoding: "utf-8",
5497
5621
  timeout: 12e4,
@@ -5642,12 +5766,12 @@ Reply with ONLY the commit message, nothing else. No markdown, no explanation.`;
5642
5766
  });
5643
5767
 
5644
5768
  // src/commands/pr.ts
5645
- import { execSync as execSync10, execFileSync as execFileSync5 } from "child_process";
5769
+ import { execSync as execSync9, execFileSync as execFileSync5 } from "child_process";
5646
5770
  import chalk18 from "chalk";
5647
5771
  import ora3 from "ora";
5648
5772
  function tryExec(cmd, cwd) {
5649
5773
  try {
5650
- return execSync10(cmd, { cwd, encoding: "utf-8", timeout: 15e3 }).trim();
5774
+ return execSync9(cmd, { cwd, encoding: "utf-8", timeout: 15e3 }).trim();
5651
5775
  } catch {
5652
5776
  return null;
5653
5777
  }
@@ -5778,11 +5902,11 @@ BODY:
5778
5902
  });
5779
5903
 
5780
5904
  // src/commands/worktree.ts
5781
- import { execSync as execSync11, execFileSync as execFileSync6 } from "child_process";
5905
+ import { execSync as execSync10, execFileSync as execFileSync6 } from "child_process";
5782
5906
  import chalk19 from "chalk";
5783
5907
  function tryExec2(cmd, cwd) {
5784
5908
  try {
5785
- return execSync11(cmd, { cwd, encoding: "utf-8", timeout: 15e3 }).trim();
5909
+ return execSync10(cmd, { cwd, encoding: "utf-8", timeout: 15e3 }).trim();
5786
5910
  } catch {
5787
5911
  return null;
5788
5912
  }
@@ -5893,7 +6017,7 @@ registerCommand("/worktree", async (args, ctx) => {
5893
6017
  }
5894
6018
  case "prune": {
5895
6019
  try {
5896
- execSync11("git worktree prune", { cwd: ctx.cwd, encoding: "utf-8" });
6020
+ execSync10("git worktree prune", { cwd: ctx.cwd, encoding: "utf-8" });
5897
6021
  console.log(chalk19.green(" \u2713 Pruned stale worktrees.\n"));
5898
6022
  } catch (err) {
5899
6023
  console.log(chalk19.red(` Failed: ${err.message}
@@ -6390,8 +6514,8 @@ function clearMenu(state) {
6390
6514
  if (state.renderedLines === 0) return "";
6391
6515
  const lines = state.renderedLines;
6392
6516
  state.renderedLines = 0;
6393
- let ansi = "";
6394
- for (let i = 0; i < lines; i++) {
6517
+ let ansi = "\x1B[2K";
6518
+ for (let i = 0; i < lines + 1; i++) {
6395
6519
  ansi += "\x1B[1A\x1B[2K";
6396
6520
  }
6397
6521
  return ansi;
@@ -6456,16 +6580,15 @@ function attachSlashMenu(rl) {
6456
6580
  const shouldShow = updateMenu(state, line);
6457
6581
  if (wasVisible && !shouldShow) {
6458
6582
  process.stdout.write(clearMenu(state));
6583
+ rewritePromptLine(rl);
6459
6584
  } else if (shouldShow) {
6460
6585
  if (wasVisible) {
6461
6586
  process.stdout.write(clearMenu(state));
6462
6587
  }
6463
6588
  const menuStr = renderMenu(state);
6464
6589
  if (menuStr) {
6465
- process.stdout.write("\x1B[s");
6466
6590
  process.stdout.write(menuStr + "\n");
6467
6591
  rewritePromptLine(rl);
6468
- process.stdout.write("\x1B[u");
6469
6592
  }
6470
6593
  }
6471
6594
  }
@@ -6516,19 +6639,60 @@ var modelChoices = MODEL_IDS.join(", ");
6516
6639
  var program = new Command().name("notch").description("Notch CLI \u2014 AI-powered coding assistant by Driftrail").version(VERSION).argument("[prompt...]", "One-shot prompt (runs once and exits)").option(`-m, --model <model>`, `Notch model (${modelChoices})`).option("--base-url <url>", "Override Notch API base URL").option("--api-key <key>", "Notch API key (prefer NOTCH_API_KEY env var)").option("--no-repo-map", "Disable automatic repository mapping").option("--no-markdown", "Disable markdown rendering in output").option("--max-iterations <n>", "Max tool-call rounds per turn", "25").option("-y, --yes", "Auto-confirm destructive actions").option("--trust", "Trust mode \u2014 auto-allow all tool calls").option("--theme <theme>", `UI color theme (${THEME_IDS.join(", ")})`).option("--resume", "Resume the last session for this project").option("--session <id>", "Resume a specific session by ID").option("--cwd <dir>", "Set working directory").parse(process.argv);
6517
6640
  var opts = program.opts();
6518
6641
  var promptArgs = program.args;
6519
- function printModelTable(activeModel) {
6520
- const t = theme();
6521
- console.log(t.dim("\n Available models:\n"));
6522
- for (const id of MODEL_IDS) {
6523
- const info = MODEL_CATALOG[id];
6524
- const active = id === activeModel ? t.success(" \u25CF") : " ";
6525
- const label = id === activeModel ? t.bold(`${info.label}`) : t.dim(`${info.label}`);
6526
- const ctx = t.dim(`${(info.contextWindow / 1024).toFixed(0)}K ctx`);
6527
- console.log(` ${active} ${t.brand(id.padEnd(14))} ${label} ${ctx}`);
6528
- }
6529
- console.log(t.dim(`
6530
- Switch with: /model <name>
6531
- `));
6642
+ function interactiveModelPicker(activeModel) {
6643
+ return new Promise((resolve2) => {
6644
+ const t = theme();
6645
+ let cursor = MODEL_IDS.indexOf(activeModel);
6646
+ if (cursor < 0) cursor = 0;
6647
+ const render = () => {
6648
+ process.stdout.write(`\x1B[${MODEL_IDS.length + 2}A\x1B[J`);
6649
+ draw();
6650
+ };
6651
+ const draw = () => {
6652
+ console.log(t.dim(" Select a model (\u2191\u2193 to move, Enter to select, Esc to cancel)\n"));
6653
+ for (let i = 0; i < MODEL_IDS.length; i++) {
6654
+ const id = MODEL_IDS[i];
6655
+ const info = MODEL_CATALOG[id];
6656
+ const isCurrent = id === activeModel;
6657
+ const isSelected = i === cursor;
6658
+ const pointer = isSelected ? t.brand("\u276F") : " ";
6659
+ const dot = isCurrent ? t.success("\u25CF") : " ";
6660
+ const label = isSelected ? t.bold(info.label) : t.dim(info.label);
6661
+ const size = t.dim(info.size);
6662
+ const gpu = t.dim(info.gpu);
6663
+ const ctx = t.dim(`${(info.contextWindow / 1024).toFixed(0)}K`);
6664
+ console.log(` ${pointer} ${dot} ${t.brand(id.replace("notch-", "").padEnd(12))} ${label.padEnd(20)} ${size.padEnd(6)} ${gpu.padEnd(12)} ${ctx}`);
6665
+ }
6666
+ };
6667
+ console.log("");
6668
+ draw();
6669
+ const stdin = process.stdin;
6670
+ const wasRaw = stdin.isRaw;
6671
+ stdin.setRawMode(true);
6672
+ stdin.resume();
6673
+ const onKey = (key) => {
6674
+ const s = key.toString();
6675
+ if (s === "\x1B[A") {
6676
+ cursor = (cursor - 1 + MODEL_IDS.length) % MODEL_IDS.length;
6677
+ render();
6678
+ } else if (s === "\x1B[B") {
6679
+ cursor = (cursor + 1) % MODEL_IDS.length;
6680
+ render();
6681
+ } else if (s === "\r" || s === "\n") {
6682
+ cleanup();
6683
+ resolve2(MODEL_IDS[cursor] ?? null);
6684
+ } else if (s === "\x1B" || s === "") {
6685
+ cleanup();
6686
+ resolve2(null);
6687
+ }
6688
+ };
6689
+ const cleanup = () => {
6690
+ stdin.removeListener("data", onKey);
6691
+ stdin.setRawMode(wasRaw ?? false);
6692
+ process.stdout.write(`\x1B[${MODEL_IDS.length + 2}A\x1B[J`);
6693
+ };
6694
+ stdin.on("data", onKey);
6695
+ });
6532
6696
  }
6533
6697
  function printHelp() {
6534
6698
  console.log(chalk27.gray(`
@@ -6974,7 +7138,23 @@ Analyze the above input.`;
6974
7138
  return;
6975
7139
  }
6976
7140
  if (input === "/model" || input === "/models") {
6977
- printModelTable(activeModelId);
7141
+ rl.pause();
7142
+ const picked = await interactiveModelPicker(activeModelId);
7143
+ if (picked && picked !== activeModelId) {
7144
+ activeModelId = picked;
7145
+ config.models.chat.model = activeModelId;
7146
+ model = resolveModel(config.models.chat);
7147
+ const switchedInfo = MODEL_CATALOG[activeModelId];
7148
+ console.log(chalk27.green(` \u2713 Switched to ${switchedInfo.label} (${switchedInfo.id})
7149
+ `));
7150
+ } else if (picked) {
7151
+ console.log(chalk27.gray(` Already using ${MODEL_CATALOG[activeModelId].label}
7152
+ `));
7153
+ } else {
7154
+ console.log(chalk27.gray(` Cancelled
7155
+ `));
7156
+ }
7157
+ rl.resume();
6978
7158
  rl.prompt();
6979
7159
  return;
6980
7160
  }
@@ -7052,7 +7232,7 @@ Analyze the above input.`;
7052
7232
  return;
7053
7233
  }
7054
7234
  if (input === "/compact") {
7055
- const { autoCompress: autoCompress2 } = await import("./compression-CXJN2ZYN.js");
7235
+ const { autoCompress: autoCompress2 } = await import("./compression-LPFNGAV6.js");
7056
7236
  const before = messages.length;
7057
7237
  const compressed = await autoCompress2(messages, model, MODEL_CATALOG[activeModelId].contextWindow);
7058
7238
  messages.length = 0;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@freesyntax/notch-cli",
3
- "version": "0.5.13",
3
+ "version": "0.5.16",
4
4
  "description": "Notch CLI — AI-powered coding assistant by Driftrail",
5
5
  "type": "module",
6
6
  "bin": {
@@ -1,142 +0,0 @@
1
- // src/agent/compression.ts
2
- import { generateText } from "ai";
3
- function estimateTokens(messages) {
4
- let chars = 0;
5
- for (const msg of messages) {
6
- if (typeof msg.content === "string") {
7
- chars += msg.content.length;
8
- } else if (Array.isArray(msg.content)) {
9
- for (const part of msg.content) {
10
- if ("text" in part) chars += part.text.length;
11
- else if ("result" in part) chars += JSON.stringify(part.result).length;
12
- else if ("args" in part) chars += JSON.stringify(part.args).length;
13
- }
14
- }
15
- }
16
- return Math.ceil(chars / 4);
17
- }
18
- async function compressHistory(messages, opts) {
19
- const threshold = opts.contextWindow * 0.75;
20
- const currentTokens = estimateTokens(messages);
21
- if (currentTokens < threshold || messages.length < 6) {
22
- return { messages, compressed: false };
23
- }
24
- const keepRecent = opts.keepRecent ?? 4;
25
- const keepStart = 1;
26
- const head = messages.slice(0, keepStart);
27
- const middle = messages.slice(keepStart, -keepRecent);
28
- const tail = messages.slice(-keepRecent);
29
- if (middle.length === 0) {
30
- return { messages, compressed: false };
31
- }
32
- const middleSummary = summarizeMessages(middle);
33
- let summaryText;
34
- try {
35
- const result = await generateText({
36
- model: opts.model,
37
- system: "You are a conversation summarizer. Condense the following conversation history into a brief summary that preserves all important context, decisions made, files modified, and any errors encountered. Be concise but thorough. Output only the summary.",
38
- messages: [{ role: "user", content: middleSummary }],
39
- maxTokens: 1024
40
- });
41
- summaryText = result.text;
42
- } catch {
43
- summaryText = buildDeterministicSummary(middle);
44
- }
45
- const compressedMessages = [...head];
46
- const summaryContent = `[Previous conversation context]
47
- ${summaryText}
48
- [End of context]`;
49
- if (tail.length > 0 && tail[0].role === "user") {
50
- const firstContent = typeof tail[0].content === "string" ? tail[0].content : "";
51
- compressedMessages.push({
52
- role: "user",
53
- content: `${summaryContent}
54
-
55
- ---
56
-
57
- ${firstContent}`
58
- });
59
- compressedMessages.push(...tail.slice(1));
60
- } else {
61
- compressedMessages.push({ role: "user", content: summaryContent });
62
- compressedMessages.push({
63
- role: "assistant",
64
- content: "Understood. I have the context from our previous conversation. Continuing."
65
- });
66
- compressedMessages.push(...tail);
67
- }
68
- return { messages: compressedMessages, compressed: true };
69
- }
70
- function summarizeMessages(messages) {
71
- const lines = [];
72
- for (const msg of messages) {
73
- const role = msg.role.toUpperCase();
74
- if (typeof msg.content === "string") {
75
- lines.push(`${role}: ${msg.content.slice(0, 500)}`);
76
- } else if (Array.isArray(msg.content)) {
77
- const parts = [];
78
- for (const part of msg.content) {
79
- if ("text" in part) parts.push(part.text.slice(0, 200));
80
- else if ("toolName" in part) parts.push(`[tool: ${part.toolName}]`);
81
- else if ("result" in part) parts.push(`[result: ${JSON.stringify(part.result).slice(0, 100)}]`);
82
- }
83
- lines.push(`${role}: ${parts.join(" | ")}`);
84
- }
85
- }
86
- return lines.join("\n");
87
- }
88
- function buildDeterministicSummary(messages) {
89
- const filesModified = /* @__PURE__ */ new Set();
90
- const toolsUsed = /* @__PURE__ */ new Set();
91
- const userRequests = [];
92
- let errorCount = 0;
93
- for (const msg of messages) {
94
- if (msg.role === "user" && typeof msg.content === "string") {
95
- userRequests.push(msg.content.slice(0, 100));
96
- }
97
- if (Array.isArray(msg.content)) {
98
- for (const part of msg.content) {
99
- if ("toolName" in part) {
100
- const p = part;
101
- toolsUsed.add(p.toolName);
102
- if (p.args?.path) filesModified.add(String(p.args.path));
103
- }
104
- if ("result" in part) {
105
- const r = part;
106
- if (r.result?.isError) errorCount++;
107
- }
108
- }
109
- }
110
- }
111
- const lines = ["Summary of previous conversation:"];
112
- if (userRequests.length > 0) {
113
- lines.push(`- User requests: ${userRequests.join("; ")}`);
114
- }
115
- if (toolsUsed.size > 0) {
116
- lines.push(`- Tools used: ${[...toolsUsed].join(", ")}`);
117
- }
118
- if (filesModified.size > 0) {
119
- lines.push(`- Files touched: ${[...filesModified].join(", ")}`);
120
- }
121
- if (errorCount > 0) {
122
- lines.push(`- Errors encountered: ${errorCount}`);
123
- }
124
- lines.push(`- Total messages summarized: ${messages.length}`);
125
- return lines.join("\n");
126
- }
127
- async function autoCompress(messages, model, contextWindow, onCompress) {
128
- const result = await compressHistory(messages, {
129
- model,
130
- contextWindow
131
- });
132
- if (result.compressed) {
133
- onCompress?.();
134
- }
135
- return result.messages;
136
- }
137
-
138
- export {
139
- estimateTokens,
140
- compressHistory,
141
- autoCompress
142
- };
@@ -1,11 +0,0 @@
1
- import {
2
- autoCompress,
3
- compressHistory,
4
- estimateTokens
5
- } from "./chunk-MWM5TFY4.js";
6
- import "./chunk-3RG5ZIWI.js";
7
- export {
8
- autoCompress,
9
- compressHistory,
10
- estimateTokens
11
- };