@probelabs/probe 0.6.0-rc226 → 0.6.0-rc228

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -68681,8 +68681,6 @@ async function truncateIfNeeded(content, tokenCounter, sessionId, maxTokens) {
68681
68681
  if (tokenCount <= limit) {
68682
68682
  return { truncated: false, content };
68683
68683
  }
68684
- const maxChars = limit * CHARS_PER_TOKEN2;
68685
- const truncatedContent = content.substring(0, maxChars);
68686
68684
  let tempFilePath = null;
68687
68685
  let fileError = null;
68688
68686
  try {
@@ -68694,22 +68692,38 @@ async function truncateIfNeeded(content, tokenCounter, sessionId, maxTokens) {
68694
68692
  fileError = err.message || "Unknown file system error";
68695
68693
  tempFilePath = null;
68696
68694
  }
68695
+ let truncatedBody;
68696
+ const useTail = limit >= MIN_LIMIT_FOR_TAIL;
68697
+ if (useTail) {
68698
+ const headTokens = limit - TAIL_TOKENS;
68699
+ const headChars = headTokens * CHARS_PER_TOKEN2;
68700
+ const tailChars = TAIL_TOKENS * CHARS_PER_TOKEN2;
68701
+ const headContent = content.substring(0, headChars);
68702
+ const tailContent = content.substring(content.length - tailChars);
68703
+ const omittedTokens = tokenCount - headTokens - TAIL_TOKENS;
68704
+ truncatedBody = `${headContent}
68705
+
68706
+ ... ${omittedTokens} tokens omitted ...
68707
+
68708
+ ${tailContent}`;
68709
+ } else {
68710
+ const maxChars = limit * CHARS_PER_TOKEN2;
68711
+ truncatedBody = content.substring(0, maxChars);
68712
+ }
68697
68713
  let message;
68698
68714
  if (tempFilePath) {
68699
68715
  message = `Output exceeded maximum size (${tokenCount} tokens, limit: ${limit}).
68700
68716
  Full output saved to: ${tempFilePath}
68701
68717
 
68702
- --- Truncated Output (first ${limit} tokens approx) ---
68703
- ${truncatedContent}
68704
- ...
68718
+ --- Truncated Output ---
68719
+ ${truncatedBody}
68705
68720
  --- End of Truncated Output ---`;
68706
68721
  } else {
68707
68722
  message = `Output exceeded maximum size (${tokenCount} tokens, limit: ${limit}).
68708
68723
  Warning: Could not save full output to file (${fileError}).
68709
68724
 
68710
- --- Truncated Output (first ${limit} tokens approx) ---
68711
- ${truncatedContent}
68712
- ...
68725
+ --- Truncated Output ---
68726
+ ${truncatedBody}
68713
68727
  --- End of Truncated Output ---`;
68714
68728
  }
68715
68729
  return {
@@ -68720,12 +68734,14 @@ ${truncatedContent}
68720
68734
  error: fileError || void 0
68721
68735
  };
68722
68736
  }
68723
- var DEFAULT_MAX_OUTPUT_TOKENS, CHARS_PER_TOKEN2;
68737
+ var DEFAULT_MAX_OUTPUT_TOKENS, CHARS_PER_TOKEN2, TAIL_TOKENS, MIN_LIMIT_FOR_TAIL;
68724
68738
  var init_outputTruncator = __esm({
68725
68739
  "src/agent/outputTruncator.js"() {
68726
68740
  "use strict";
68727
68741
  DEFAULT_MAX_OUTPUT_TOKENS = 2e4;
68728
68742
  CHARS_PER_TOKEN2 = 4;
68743
+ TAIL_TOKENS = 1e3;
68744
+ MIN_LIMIT_FOR_TAIL = 2e3;
68729
68745
  }
68730
68746
  });
68731
68747
 
@@ -69451,7 +69467,7 @@ import path8 from "path";
69451
69467
  import os3 from "os";
69452
69468
  import { EventEmitter as EventEmitter4 } from "events";
69453
69469
  async function createEnhancedClaudeCLIEngine(options = {}) {
69454
- const { agent, systemPrompt, customPrompt, debug, sessionId, allowedTools } = options;
69470
+ const { agent, systemPrompt, customPrompt, debug, sessionId, allowedTools, timeout = 12e4 } = options;
69455
69471
  const session = new Session(
69456
69472
  sessionId || randomBytes(8).toString("hex"),
69457
69473
  debug
@@ -69550,6 +69566,30 @@ ${opts.schema}`;
69550
69566
  stdio: ["ignore", "pipe", "pipe"]
69551
69567
  // Ignore stdin since echo handles it
69552
69568
  });
69569
+ let killed = false;
69570
+ let timeoutHandle;
69571
+ let sigkillHandle;
69572
+ if (timeout > 0) {
69573
+ timeoutHandle = setTimeout(() => {
69574
+ if (!killed) {
69575
+ killed = true;
69576
+ processEnded = true;
69577
+ proc2.kill("SIGTERM");
69578
+ if (debug) {
69579
+ console.log(`[DEBUG] Process timed out after ${timeout}ms, sending SIGTERM`);
69580
+ }
69581
+ sigkillHandle = setTimeout(() => {
69582
+ if (proc2.exitCode === null) {
69583
+ proc2.kill("SIGKILL");
69584
+ if (debug) {
69585
+ console.log("[DEBUG] Process did not exit, sending SIGKILL");
69586
+ }
69587
+ }
69588
+ }, 5e3);
69589
+ emitter.emit("error", new Error(`Claude CLI process timed out after ${timeout}ms`));
69590
+ }
69591
+ }, timeout);
69592
+ }
69553
69593
  proc2.stdout.on("data", (data) => {
69554
69594
  buffer += data.toString();
69555
69595
  processJsonBuffer(buffer, emitter, session, debug, toolCollector);
@@ -69566,10 +69606,20 @@ ${opts.schema}`;
69566
69606
  }
69567
69607
  });
69568
69608
  proc2.on("close", (code) => {
69609
+ if (timeoutHandle) {
69610
+ clearTimeout(timeoutHandle);
69611
+ }
69612
+ if (sigkillHandle) {
69613
+ clearTimeout(sigkillHandle);
69614
+ }
69569
69615
  processEnded = true;
69570
69616
  if (code !== 0 && debug) {
69571
69617
  console.log(`[DEBUG] Process exited with code ${code}`);
69572
69618
  }
69619
+ if (killed) {
69620
+ emitter.emit("end");
69621
+ return;
69622
+ }
69573
69623
  if (buffer.trim()) {
69574
69624
  processJsonBuffer(buffer, emitter, session, debug, toolCollector);
69575
69625
  }
@@ -69585,6 +69635,13 @@ ${opts.schema}`;
69585
69635
  emitter.emit("end");
69586
69636
  });
69587
69637
  proc2.on("error", (error) => {
69638
+ if (timeoutHandle) {
69639
+ clearTimeout(timeoutHandle);
69640
+ }
69641
+ if (sigkillHandle) {
69642
+ clearTimeout(sigkillHandle);
69643
+ }
69644
+ processEnded = true;
69588
69645
  emitter.emit("error", error);
69589
69646
  });
69590
69647
  const messageQueue = [];
@@ -69630,7 +69687,22 @@ ${opts.schema}`;
69630
69687
  \u{1F527} Using ${msg.name}: ${JSON.stringify(msg.input)}
69631
69688
  `
69632
69689
  };
69633
- const result = await executeProbleTool(agent, msg.name, msg.input);
69690
+ const toolTimeout = 3e4;
69691
+ let toolTimeoutId;
69692
+ const timeoutPromise = new Promise((_, reject2) => {
69693
+ toolTimeoutId = setTimeout(() => reject2(new Error(`Tool ${msg.name} timed out after ${toolTimeout}ms`)), toolTimeout);
69694
+ });
69695
+ let result;
69696
+ try {
69697
+ result = await Promise.race([
69698
+ executeProbleTool(agent, msg.name, msg.input),
69699
+ timeoutPromise
69700
+ ]);
69701
+ } catch (error) {
69702
+ result = `Tool error: ${error.message}`;
69703
+ } finally {
69704
+ clearTimeout(toolTimeoutId);
69705
+ }
69634
69706
  yield { type: "text", content: `${result}
69635
69707
  ` };
69636
69708
  } else if (msg.type === "toolBatch") {
@@ -70197,6 +70269,9 @@ var init_enhanced_vercel = __esm({
70197
70269
  // src/agent/ProbeAgent.js
70198
70270
  var ProbeAgent_exports = {};
70199
70271
  __export(ProbeAgent_exports, {
70272
+ ENGINE_ACTIVITY_TIMEOUT_DEFAULT: () => ENGINE_ACTIVITY_TIMEOUT_DEFAULT,
70273
+ ENGINE_ACTIVITY_TIMEOUT_MAX: () => ENGINE_ACTIVITY_TIMEOUT_MAX,
70274
+ ENGINE_ACTIVITY_TIMEOUT_MIN: () => ENGINE_ACTIVITY_TIMEOUT_MIN,
70200
70275
  ProbeAgent: () => ProbeAgent
70201
70276
  });
70202
70277
  import dotenv2 from "dotenv";
@@ -70231,7 +70306,7 @@ Your content here
70231
70306
 
70232
70307
  Do NOT wrap in other tags like <api_call>, <tool_name>, <function>, etc.`;
70233
70308
  }
70234
- var MAX_TOOL_ITERATIONS, MAX_HISTORY_MESSAGES, MAX_IMAGE_FILE_SIZE, ProbeAgent;
70309
+ var ENGINE_ACTIVITY_TIMEOUT_DEFAULT, ENGINE_ACTIVITY_TIMEOUT_MIN, ENGINE_ACTIVITY_TIMEOUT_MAX, MAX_TOOL_ITERATIONS, MAX_HISTORY_MESSAGES, MAX_IMAGE_FILE_SIZE, ProbeAgent;
70235
70310
  var init_ProbeAgent = __esm({
70236
70311
  "src/agent/ProbeAgent.js"() {
70237
70312
  "use strict";
@@ -70260,6 +70335,9 @@ var init_ProbeAgent = __esm({
70260
70335
  init_delegate();
70261
70336
  init_tasks();
70262
70337
  dotenv2.config();
70338
+ ENGINE_ACTIVITY_TIMEOUT_DEFAULT = 18e4;
70339
+ ENGINE_ACTIVITY_TIMEOUT_MIN = 5e3;
70340
+ ENGINE_ACTIVITY_TIMEOUT_MAX = 6e5;
70263
70341
  MAX_TOOL_ITERATIONS = (() => {
70264
70342
  const val = parseInt(process.env.MAX_TOOL_ITERATIONS || "30", 10);
70265
70343
  if (isNaN(val) || val < 1 || val > 200) {
@@ -70318,6 +70396,8 @@ var init_ProbeAgent = __esm({
70318
70396
  * @param {number} [options.fallback.maxTotalAttempts=10] - Maximum total attempts across all providers
70319
70397
  * @param {string} [options.completionPrompt] - Custom prompt to run after attempt_completion for validation/review (runs before mermaid/JSON validation)
70320
70398
  * @param {number} [options.maxOutputTokens] - Maximum tokens for tool output before truncation (default: 20000, can also be set via PROBE_MAX_OUTPUT_TOKENS env var)
70399
+ * @param {number} [options.requestTimeout] - Timeout in ms for AI requests (default: 120000 or REQUEST_TIMEOUT env var). Used to abort hung requests.
70400
+ * @param {number} [options.maxOperationTimeout] - Maximum timeout in ms for the entire operation including all retries and fallbacks (default: 300000 or MAX_OPERATION_TIMEOUT env var). This is the absolute maximum time for streamTextWithRetryAndFallback.
70321
70401
  */
70322
70402
  constructor(options = {}) {
70323
70403
  this.sessionId = options.sessionId || randomUUID6();
@@ -70403,6 +70483,32 @@ var init_ProbeAgent = __esm({
70403
70483
  this.enableTasks = !!options.enableTasks;
70404
70484
  this.taskManager = null;
70405
70485
  this.delegationManager = new DelegationManager();
70486
+ this.requestTimeout = options.requestTimeout ?? (() => {
70487
+ if (process.env.REQUEST_TIMEOUT) {
70488
+ const parsed = parseInt(process.env.REQUEST_TIMEOUT, 10);
70489
+ if (isNaN(parsed) || parsed < 1e3 || parsed > 36e5) {
70490
+ return 12e4;
70491
+ }
70492
+ return parsed;
70493
+ }
70494
+ return 12e4;
70495
+ })();
70496
+ if (this.debug) {
70497
+ console.log(`[DEBUG] Request timeout: ${this.requestTimeout}ms`);
70498
+ }
70499
+ this.maxOperationTimeout = options.maxOperationTimeout ?? (() => {
70500
+ if (process.env.MAX_OPERATION_TIMEOUT) {
70501
+ const parsed = parseInt(process.env.MAX_OPERATION_TIMEOUT, 10);
70502
+ if (isNaN(parsed) || parsed < 1e3 || parsed > 72e5) {
70503
+ return 3e5;
70504
+ }
70505
+ return parsed;
70506
+ }
70507
+ return 3e5;
70508
+ })();
70509
+ if (this.debug) {
70510
+ console.log(`[DEBUG] Max operation timeout: ${this.maxOperationTimeout}ms`);
70511
+ }
70406
70512
  this.retryConfig = options.retry || {};
70407
70513
  this.retryManager = null;
70408
70514
  this.fallbackConfig = options.fallback || null;
@@ -71022,88 +71128,98 @@ var init_ProbeAgent = __esm({
71022
71128
  }
71023
71129
  }
71024
71130
  /**
71025
- * Execute streamText with retry and fallback support
71026
- * @param {Object} options - streamText options
71027
- * @returns {Promise<Object>} - streamText result
71131
+ * Create a streamText-compatible result from an engine stream with timeout handling
71132
+ * @param {AsyncGenerator} engineStream - The engine's query result
71133
+ * @param {AbortSignal} abortSignal - Signal for aborting the operation
71134
+ * @param {number} requestTimeout - Per-request timeout in ms
71135
+ * @param {Object} timeoutState - Object with timeoutId property (mutable for cleanup)
71136
+ * @returns {Object} - streamText-compatible result with textStream
71028
71137
  * @private
71029
71138
  */
71030
- async streamTextWithRetryAndFallback(options) {
71031
- if (this.clientApiProvider === "claude-code" || process.env.USE_CLAUDE_CODE === "true") {
71139
+ _createEngineTextStreamResult(engineStream, abortSignal, requestTimeout, timeoutState) {
71140
+ const activityTimeout = (() => {
71141
+ const parsed = parseInt(process.env.ENGINE_ACTIVITY_TIMEOUT, 10);
71142
+ return isNaN(parsed) || parsed < ENGINE_ACTIVITY_TIMEOUT_MIN || parsed > ENGINE_ACTIVITY_TIMEOUT_MAX ? ENGINE_ACTIVITY_TIMEOUT_DEFAULT : parsed;
71143
+ })();
71144
+ const startTime = Date.now();
71145
+ async function* createTextStream() {
71146
+ let lastActivity = Date.now();
71032
71147
  try {
71033
- const engine = await this.getEngine();
71034
- if (engine && engine.query) {
71035
- const userMessages = options.messages.filter(
71036
- (m) => m.role === "user" && !m.content.includes("WARNING: You have reached the maximum tool iterations limit")
71037
- );
71038
- const lastUserMessage = userMessages[userMessages.length - 1];
71039
- const prompt = lastUserMessage ? lastUserMessage.content : "";
71040
- const engineOptions = {
71041
- maxTokens: options.maxTokens,
71042
- temperature: options.temperature,
71043
- messages: options.messages,
71044
- systemPrompt: options.messages.find((m) => m.role === "system")?.content
71045
- };
71046
- const engineStream = engine.query(prompt, engineOptions);
71047
- async function* createTextStream() {
71048
- for await (const message of engineStream) {
71049
- if (message.type === "text" && message.content) {
71050
- yield message.content;
71051
- } else if (typeof message === "string") {
71052
- yield message;
71053
- }
71054
- }
71148
+ for await (const message of engineStream) {
71149
+ if (abortSignal.aborted) {
71150
+ const abortError = new Error("Operation aborted");
71151
+ abortError.name = "AbortError";
71152
+ throw abortError;
71055
71153
  }
71056
- return {
71057
- textStream: createTextStream(),
71058
- usage: Promise.resolve({})
71059
- // Engine should handle its own usage tracking
71060
- // Add other streamText-compatible properties as needed
71061
- };
71062
- }
71063
- } catch (error) {
71064
- if (this.debug) {
71065
- console.log(`[DEBUG] Failed to use Claude Code engine, falling back to Vercel:`, error.message);
71066
- }
71067
- }
71068
- }
71069
- if (this.clientApiProvider === "codex" || process.env.USE_CODEX === "true") {
71070
- try {
71071
- const engine = await this.getEngine();
71072
- if (engine && engine.query) {
71073
- const userMessages = options.messages.filter(
71074
- (m) => m.role === "user" && !m.content.includes("WARNING: You have reached the maximum tool iterations limit")
71075
- );
71076
- const lastUserMessage = userMessages[userMessages.length - 1];
71077
- const prompt = lastUserMessage ? lastUserMessage.content : "";
71078
- const engineOptions = {
71079
- maxTokens: options.maxTokens,
71080
- temperature: options.temperature,
71081
- messages: options.messages,
71082
- systemPrompt: options.messages.find((m) => m.role === "system")?.content
71083
- };
71084
- const engineStream = engine.query(prompt, engineOptions);
71085
- async function* createTextStream() {
71086
- for await (const message of engineStream) {
71087
- if (message.type === "text" && message.content) {
71088
- yield message.content;
71089
- } else if (typeof message === "string") {
71090
- yield message;
71091
- }
71092
- }
71154
+ const now = Date.now();
71155
+ if (now - lastActivity > activityTimeout) {
71156
+ throw new Error(`Engine stream timeout - no activity for ${activityTimeout}ms`);
71157
+ }
71158
+ if (requestTimeout > 0 && now - startTime > requestTimeout) {
71159
+ throw new Error(`Engine stream timeout - request exceeded ${requestTimeout}ms`);
71160
+ }
71161
+ lastActivity = now;
71162
+ if (message.type === "text" && message.content) {
71163
+ yield message.content;
71164
+ } else if (typeof message === "string") {
71165
+ yield message;
71093
71166
  }
71094
- return {
71095
- textStream: createTextStream(),
71096
- usage: Promise.resolve({})
71097
- // Engine should handle its own usage tracking
71098
- // Add other streamText-compatible properties as needed
71099
- };
71100
71167
  }
71101
- } catch (error) {
71102
- if (this.debug) {
71103
- console.log(`[DEBUG] Failed to use Codex engine, falling back to Vercel:`, error.message);
71168
+ } finally {
71169
+ if (timeoutState.timeoutId) {
71170
+ clearTimeout(timeoutState.timeoutId);
71171
+ timeoutState.timeoutId = null;
71104
71172
  }
71105
71173
  }
71106
71174
  }
71175
+ return {
71176
+ textStream: createTextStream(),
71177
+ usage: Promise.resolve({})
71178
+ // Engine should handle its own usage tracking
71179
+ // Add other streamText-compatible properties as needed
71180
+ };
71181
+ }
71182
+ /**
71183
+ * Try to use an engine (claude-code or codex) for streaming
71184
+ * @param {Object} options - streamText options
71185
+ * @param {AbortController} controller - Abort controller for the operation
71186
+ * @param {Object} timeoutState - Mutable timeout state for cleanup
71187
+ * @returns {Promise<Object|null>} - Stream result or null if engine unavailable
71188
+ * @private
71189
+ */
71190
+ async _tryEngineStreamPath(options, controller, timeoutState) {
71191
+ const engine = await this.getEngine();
71192
+ if (!engine || !engine.query) {
71193
+ return null;
71194
+ }
71195
+ const userMessages = options.messages.filter(
71196
+ (m) => m.role === "user" && !m.content.includes("WARNING: You have reached the maximum tool iterations limit")
71197
+ );
71198
+ const lastUserMessage = userMessages[userMessages.length - 1];
71199
+ const prompt = lastUserMessage ? lastUserMessage.content : "";
71200
+ const engineOptions = {
71201
+ maxTokens: options.maxTokens,
71202
+ temperature: options.temperature,
71203
+ messages: options.messages,
71204
+ systemPrompt: options.messages.find((m) => m.role === "system")?.content,
71205
+ abortSignal: controller.signal
71206
+ };
71207
+ const engineStream = engine.query(prompt, engineOptions);
71208
+ return this._createEngineTextStreamResult(
71209
+ engineStream,
71210
+ controller.signal,
71211
+ this.requestTimeout,
71212
+ timeoutState
71213
+ );
71214
+ }
71215
+ /**
71216
+ * Execute streamText with Vercel AI SDK using retry/fallback logic
71217
+ * @param {Object} options - streamText options
71218
+ * @param {AbortController} controller - Abort controller for the operation
71219
+ * @returns {Promise<Object>} - Stream result
71220
+ * @private
71221
+ */
71222
+ async _executeWithVercelProvider(options, controller) {
71107
71223
  if (!this.retryManager) {
71108
71224
  this.retryManager = new RetryManager({
71109
71225
  maxRetries: this.retryConfig.maxRetries ?? 3,
@@ -71116,10 +71232,11 @@ var init_ProbeAgent = __esm({
71116
71232
  }
71117
71233
  if (!this.fallbackManager) {
71118
71234
  return await this.retryManager.executeWithRetry(
71119
- () => streamText2(options),
71235
+ () => streamText2({ ...options, abortSignal: controller.signal }),
71120
71236
  {
71121
71237
  provider: this.apiType,
71122
- model: this.model
71238
+ model: this.model,
71239
+ signal: controller.signal
71123
71240
  }
71124
71241
  );
71125
71242
  }
@@ -71127,7 +71244,8 @@ var init_ProbeAgent = __esm({
71127
71244
  async (provider, model, config) => {
71128
71245
  const fallbackOptions = {
71129
71246
  ...options,
71130
- model: provider(model)
71247
+ model: provider(model),
71248
+ abortSignal: controller.signal
71131
71249
  };
71132
71250
  const providerRetryManager = new RetryManager({
71133
71251
  maxRetries: config.maxRetries ?? this.retryConfig.maxRetries ?? 3,
@@ -71141,12 +71259,54 @@ var init_ProbeAgent = __esm({
71141
71259
  () => streamText2(fallbackOptions),
71142
71260
  {
71143
71261
  provider: config.provider,
71144
- model
71262
+ model,
71263
+ signal: controller.signal
71145
71264
  }
71146
71265
  );
71147
71266
  }
71148
71267
  );
71149
71268
  }
71269
+ /**
71270
+ * Execute streamText with retry and fallback support
71271
+ * @param {Object} options - streamText options
71272
+ * @returns {Promise<Object>} - streamText result
71273
+ * @private
71274
+ */
71275
+ async streamTextWithRetryAndFallback(options) {
71276
+ const controller = new AbortController();
71277
+ const timeoutState = { timeoutId: null };
71278
+ if (this.maxOperationTimeout && this.maxOperationTimeout > 0) {
71279
+ timeoutState.timeoutId = setTimeout(() => {
71280
+ controller.abort();
71281
+ if (this.debug) {
71282
+ console.log(`[DEBUG] Operation timed out after ${this.maxOperationTimeout}ms (max operation timeout)`);
71283
+ }
71284
+ }, this.maxOperationTimeout);
71285
+ }
71286
+ try {
71287
+ const useClaudeCode = this.clientApiProvider === "claude-code" || process.env.USE_CLAUDE_CODE === "true";
71288
+ const useCodex = this.clientApiProvider === "codex" || process.env.USE_CODEX === "true";
71289
+ if (useClaudeCode || useCodex) {
71290
+ try {
71291
+ const result = await this._tryEngineStreamPath(options, controller, timeoutState);
71292
+ if (result) {
71293
+ return result;
71294
+ }
71295
+ } catch (error) {
71296
+ if (this.debug) {
71297
+ const engineType = useClaudeCode ? "Claude Code" : "Codex";
71298
+ console.log(`[DEBUG] Failed to use ${engineType} engine, falling back to Vercel:`, error.message);
71299
+ }
71300
+ }
71301
+ }
71302
+ return await this._executeWithVercelProvider(options, controller);
71303
+ } finally {
71304
+ if (timeoutState.timeoutId) {
71305
+ clearTimeout(timeoutState.timeoutId);
71306
+ timeoutState.timeoutId = null;
71307
+ }
71308
+ }
71309
+ }
71150
71310
  /**
71151
71311
  * Initialize Anthropic model
71152
71312
  */
@@ -5,6 +5,8 @@ import { randomUUID } from 'crypto';
5
5
 
6
6
  const DEFAULT_MAX_OUTPUT_TOKENS = 20000;
7
7
  const CHARS_PER_TOKEN = 4; // Conservative approximation
8
+ const TAIL_TOKENS = 1000; // Number of tokens to show from the end of truncated output
9
+ const MIN_LIMIT_FOR_TAIL = 2000; // Minimum token limit to use head+tail split
8
10
 
9
11
  /**
10
12
  * Validate and normalize a token limit value.
@@ -61,10 +63,6 @@ export async function truncateIfNeeded(content, tokenCounter, sessionId, maxToke
61
63
  return { truncated: false, content };
62
64
  }
63
65
 
64
- // Truncate to approximately maxTokens worth of characters
65
- const maxChars = limit * CHARS_PER_TOKEN;
66
- const truncatedContent = content.substring(0, maxChars);
67
-
68
66
  // Try to write full output to temp file
69
67
  let tempFilePath = null;
70
68
  let fileError = null;
@@ -79,22 +77,37 @@ export async function truncateIfNeeded(content, tokenCounter, sessionId, maxToke
79
77
  tempFilePath = null;
80
78
  }
81
79
 
80
+ // Build truncated content with head + tail for better context
81
+ let truncatedBody;
82
+ const useTail = limit >= MIN_LIMIT_FOR_TAIL;
83
+
84
+ if (useTail) {
85
+ const headTokens = limit - TAIL_TOKENS;
86
+ const headChars = headTokens * CHARS_PER_TOKEN;
87
+ const tailChars = TAIL_TOKENS * CHARS_PER_TOKEN;
88
+ const headContent = content.substring(0, headChars);
89
+ const tailContent = content.substring(content.length - tailChars);
90
+ const omittedTokens = tokenCount - headTokens - TAIL_TOKENS;
91
+ truncatedBody = `${headContent}\n\n... ${omittedTokens} tokens omitted ...\n\n${tailContent}`;
92
+ } else {
93
+ const maxChars = limit * CHARS_PER_TOKEN;
94
+ truncatedBody = content.substring(0, maxChars);
95
+ }
96
+
82
97
  let message;
83
98
  if (tempFilePath) {
84
99
  message = `Output exceeded maximum size (${tokenCount} tokens, limit: ${limit}).
85
100
  Full output saved to: ${tempFilePath}
86
101
 
87
- --- Truncated Output (first ${limit} tokens approx) ---
88
- ${truncatedContent}
89
- ...
102
+ --- Truncated Output ---
103
+ ${truncatedBody}
90
104
  --- End of Truncated Output ---`;
91
105
  } else {
92
106
  message = `Output exceeded maximum size (${tokenCount} tokens, limit: ${limit}).
93
107
  Warning: Could not save full output to file (${fileError}).
94
108
 
95
- --- Truncated Output (first ${limit} tokens approx) ---
96
- ${truncatedContent}
97
- ...
109
+ --- Truncated Output ---
110
+ ${truncatedBody}
98
111
  --- End of Truncated Output ---`;
99
112
  }
100
113