@probelabs/probe 0.6.0-rc295 → 0.6.0-rc296

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. package/README.md +7 -0
  2. package/bin/binaries/{probe-v0.6.0-rc295-aarch64-apple-darwin.tar.gz → probe-v0.6.0-rc296-aarch64-apple-darwin.tar.gz} +0 -0
  3. package/bin/binaries/{probe-v0.6.0-rc295-aarch64-unknown-linux-musl.tar.gz → probe-v0.6.0-rc296-aarch64-unknown-linux-musl.tar.gz} +0 -0
  4. package/bin/binaries/{probe-v0.6.0-rc295-x86_64-apple-darwin.tar.gz → probe-v0.6.0-rc296-x86_64-apple-darwin.tar.gz} +0 -0
  5. package/bin/binaries/{probe-v0.6.0-rc295-x86_64-pc-windows-msvc.zip → probe-v0.6.0-rc296-x86_64-pc-windows-msvc.zip} +0 -0
  6. package/bin/binaries/{probe-v0.6.0-rc295-x86_64-unknown-linux-musl.tar.gz → probe-v0.6.0-rc296-x86_64-unknown-linux-musl.tar.gz} +0 -0
  7. package/build/agent/ProbeAgent.d.ts +8 -2
  8. package/build/agent/ProbeAgent.js +683 -10
  9. package/build/agent/mcp/client.js +81 -4
  10. package/build/agent/mcp/xmlBridge.js +11 -0
  11. package/build/agent/otelLogBridge.js +184 -0
  12. package/build/agent/simpleTelemetry.js +8 -0
  13. package/build/delegate.js +75 -6
  14. package/build/index.js +6 -2
  15. package/build/tools/common.js +84 -11
  16. package/build/tools/vercel.js +78 -18
  17. package/cjs/agent/ProbeAgent.cjs +858 -32
  18. package/cjs/agent/simpleTelemetry.cjs +112 -0
  19. package/cjs/index.cjs +970 -32
  20. package/index.d.ts +26 -0
  21. package/package.json +1 -1
  22. package/src/agent/ProbeAgent.d.ts +8 -2
  23. package/src/agent/ProbeAgent.js +683 -10
  24. package/src/agent/mcp/client.js +81 -4
  25. package/src/agent/mcp/xmlBridge.js +11 -0
  26. package/src/agent/otelLogBridge.js +184 -0
  27. package/src/agent/simpleTelemetry.js +8 -0
  28. package/src/delegate.js +75 -6
  29. package/src/index.js +6 -2
  30. package/src/tools/common.js +84 -11
  31. package/src/tools/vercel.js +78 -18
@@ -31,7 +31,7 @@ import { createAnthropic } from '@ai-sdk/anthropic';
31
31
  import { createOpenAI } from '@ai-sdk/openai';
32
32
  import { createGoogleGenerativeAI } from '@ai-sdk/google';
33
33
  import { createAmazonBedrock } from '@ai-sdk/amazon-bedrock';
34
- import { streamText, tool, stepCountIs, jsonSchema, Output } from 'ai';
34
+ import { streamText, generateText, tool, stepCountIs, jsonSchema, Output } from 'ai';
35
35
  import { randomUUID } from 'crypto';
36
36
  import { EventEmitter } from 'events';
37
37
  import { existsSync } from 'fs';
@@ -214,6 +214,7 @@ export class ProbeAgent {
214
214
  this.debug = options.debug || process.env.DEBUG === '1';
215
215
  this.cancelled = false;
216
216
  this._abortController = new AbortController();
217
+ this._activeSubagents = new Map(); // sessionId → subagent ProbeAgent instance
217
218
  this.tracer = options.tracer || null;
218
219
  this.outline = !!options.outline;
219
220
  this.searchDelegate = options.searchDelegate !== undefined ? !!options.searchDelegate : true;
@@ -391,10 +392,12 @@ export class ProbeAgent {
391
392
  console.log(`[DEBUG] Max operation timeout: ${this.maxOperationTimeout}ms`);
392
393
  }
393
394
 
394
- // Timeout behavior: 'graceful' (default) winds down with bonus steps, 'hard' aborts immediately
395
+ // Timeout behavior: 'graceful' (default) winds down with bonus steps, 'hard' aborts immediately,
396
+ // 'negotiated' lets the AI request more time via request_more_time tool
395
397
  this.timeoutBehavior = options.timeoutBehavior ?? (() => {
396
398
  const val = process.env.TIMEOUT_BEHAVIOR;
397
399
  if (val === 'hard') return 'hard';
400
+ if (val === 'negotiated') return 'negotiated';
398
401
  return 'graceful';
399
402
  })();
400
403
 
@@ -404,8 +407,32 @@ export class ProbeAgent {
404
407
  return (isNaN(parsed) || parsed < 1 || parsed > 20) ? 4 : parsed;
405
408
  })();
406
409
 
410
+ // Negotiated timeout: total extra time budget in ms (default 30 min)
411
+ this.negotiatedTimeoutBudget = options.negotiatedTimeoutBudget ?? (() => {
412
+ const parsed = parseInt(process.env.NEGOTIATED_TIMEOUT_BUDGET, 10);
413
+ return (isNaN(parsed) || parsed < 60000 || parsed > 7200000) ? 1800000 : parsed;
414
+ })();
415
+
416
+ // Negotiated timeout: max extension requests (default 3)
417
+ this.negotiatedTimeoutMaxRequests = options.negotiatedTimeoutMaxRequests ?? (() => {
418
+ const parsed = parseInt(process.env.NEGOTIATED_TIMEOUT_MAX_REQUESTS, 10);
419
+ return (isNaN(parsed) || parsed < 1 || parsed > 10) ? 3 : parsed;
420
+ })();
421
+
422
+ // Negotiated timeout: max ms per extension request (default 10 min)
423
+ this.negotiatedTimeoutMaxPerRequest = options.negotiatedTimeoutMaxPerRequest ?? (() => {
424
+ const parsed = parseInt(process.env.NEGOTIATED_TIMEOUT_MAX_PER_REQUEST, 10);
425
+ return (isNaN(parsed) || parsed < 60000 || parsed > 3600000) ? 600000 : parsed;
426
+ })();
427
+
428
+ // Graceful stop deadline: how long to wait for subagents/MCP after observer declines (default 45s)
429
+ this.gracefulStopDeadline = options.gracefulStopDeadline ?? (() => {
430
+ const parsed = parseInt(process.env.GRACEFUL_STOP_DEADLINE, 10);
431
+ return (isNaN(parsed) || parsed < 5000 || parsed > 300000) ? 45000 : parsed;
432
+ })();
433
+
407
434
  if (this.debug) {
408
- console.log(`[DEBUG] Timeout behavior: ${this.timeoutBehavior}, bonus steps: ${this.gracefulTimeoutBonusSteps}`);
435
+ console.log(`[DEBUG] Timeout behavior: ${this.timeoutBehavior}, bonus steps: ${this.gracefulTimeoutBonusSteps}, graceful stop deadline: ${this.gracefulStopDeadline}ms`);
409
436
  }
410
437
 
411
438
  // Retry configuration
@@ -843,6 +870,17 @@ export class ProbeAgent {
843
870
  searchDelegateModel: this.searchDelegateModel,
844
871
  delegationManager: this.delegationManager, // Per-instance delegation limits
845
872
  parentAbortSignal: this._abortController.signal, // Propagate cancellation to delegations
873
+ // Timeout settings for delegate subagents to inherit
874
+ timeoutBehavior: this.timeoutBehavior,
875
+ maxOperationTimeout: this.maxOperationTimeout,
876
+ requestTimeout: this.requestTimeout,
877
+ gracefulTimeoutBonusSteps: this.gracefulTimeoutBonusSteps,
878
+ negotiatedTimeoutBudget: this.negotiatedTimeoutBudget,
879
+ negotiatedTimeoutMaxRequests: this.negotiatedTimeoutMaxRequests,
880
+ negotiatedTimeoutMaxPerRequest: this.negotiatedTimeoutMaxPerRequest,
881
+ parentOperationStartTime: this._operationStartTime, // For remaining budget calculation
882
+ onSubagentCreated: (sid, subagent) => this._registerSubagent(sid, subagent),
883
+ onSubagentCompleted: (sid) => this._unregisterSubagent(sid),
846
884
  outputBuffer: this._outputBuffer,
847
885
  concurrencyLimiter: this.concurrencyLimiter, // Global AI concurrency limiter
848
886
  isToolAllowed,
@@ -1577,8 +1615,8 @@ export class ProbeAgent {
1577
1615
  // This timer only handles the hard abort for non-graceful mode and engine paths.
1578
1616
  if (this.maxOperationTimeout && this.maxOperationTimeout > 0) {
1579
1617
  const gts = this._gracefulTimeoutState;
1580
- if (this.timeoutBehavior === 'graceful' && gts) {
1581
- // Graceful mode: timer is managed in run() method.
1618
+ if ((this.timeoutBehavior === 'graceful' || this.timeoutBehavior === 'negotiated') && gts) {
1619
+ // Graceful/negotiated mode: timer is managed in run() method.
1582
1620
  // Only set up the AbortController link (no timer here).
1583
1621
  } else {
1584
1622
  // Hard mode: immediate abort (legacy behavior)
@@ -3269,6 +3307,9 @@ Follow these instructions carefully:
3269
3307
  options = schemaOrOptions || {};
3270
3308
  }
3271
3309
 
3310
+ // Track operation start time for delegate budget calculation
3311
+ this._operationStartTime = Date.now();
3312
+
3272
3313
  try {
3273
3314
  // Track initial history length for storage
3274
3315
  const oldHistoryLength = this.history.length;
@@ -3392,7 +3433,11 @@ Follow these instructions carefully:
3392
3433
  }
3393
3434
 
3394
3435
  let currentIteration = 0;
3395
- let finalResult = 'I was unable to complete your request due to reaching the maximum number of tool iterations.';
3436
+ let finalResult = null; // Will be set to a descriptive failure message if max iterations reached
3437
+ const DEFAULT_MAX_ITER_MSG = 'I was unable to complete your request due to reaching the maximum number of tool iterations.';
3438
+ // Track all tool calls across iterations for failure diagnostics
3439
+ const _toolCallLog = []; // { name, args (truncated) }
3440
+ let abortSummaryTaken = false; // Set when negotiated timeout abort summary runs — skip completionPrompt
3396
3441
 
3397
3442
  // Adjust max iterations if schema is provided
3398
3443
  // +1 for schema formatting
@@ -3589,6 +3634,274 @@ Follow these instructions carefully:
3589
3634
  };
3590
3635
  this._gracefulTimeoutState = gracefulTimeoutState;
3591
3636
 
3637
+ // Negotiated timeout state — used when timeoutBehavior === 'negotiated'
3638
+ // The "timeout observer" pattern: when timeout fires, a separate LLM call
3639
+ // decides whether to extend — this works even when the main loop is blocked
3640
+ // by a long-running delegate or MCP tool.
3641
+ const negotiatedTimeoutState = {
3642
+ extensionsUsed: 0,
3643
+ totalExtraTimeMs: 0,
3644
+ softTimeoutId: null,
3645
+ hardAbortTimeoutId: null,
3646
+ maxRequests: this.negotiatedTimeoutMaxRequests,
3647
+ maxPerRequestMs: this.negotiatedTimeoutMaxPerRequest,
3648
+ budgetMs: this.negotiatedTimeoutBudget,
3649
+ observerRunning: false, // true while observer LLM call is in flight
3650
+ extensionMessage: null, // message to show in prepareStep after extension granted
3651
+ startTime: Date.now(),
3652
+ };
3653
+
3654
+ this._negotiatedTimeoutState = negotiatedTimeoutState;
3655
+
3656
+ // Track in-flight tools via event emitter
3657
+ const activeTools = new Map(); // toolCallId → { name, args, startedAt }
3658
+ this._activeTools = activeTools;
3659
+
3660
+ const onToolCall = (event) => {
3661
+ // Use a composite key: name + truncated args for dedup
3662
+ const key = event.toolCallId || `${event.name}:${JSON.stringify(event.args || {}).slice(0, 100)}`;
3663
+ if (event.status === 'started') {
3664
+ activeTools.set(key, {
3665
+ name: event.name,
3666
+ args: event.args,
3667
+ startedAt: event.timestamp || new Date().toISOString(),
3668
+ });
3669
+ } else if (event.status === 'completed' || event.status === 'error') {
3670
+ activeTools.delete(key);
3671
+ }
3672
+ };
3673
+ this.events.on('toolCall', onToolCall);
3674
+
3675
+ // Timeout observer: separate LLM call that decides whether to extend.
3676
+ // Runs independently of the main agent loop — works even when blocked by delegates.
3677
+ const runTimeoutObserver = async () => {
3678
+ if (negotiatedTimeoutState.observerRunning) return;
3679
+ negotiatedTimeoutState.observerRunning = true;
3680
+
3681
+ const remainingRequests = negotiatedTimeoutState.maxRequests - negotiatedTimeoutState.extensionsUsed;
3682
+ const remainingBudgetMs = negotiatedTimeoutState.budgetMs - negotiatedTimeoutState.totalExtraTimeMs;
3683
+ const maxPerReqMin = Math.round(negotiatedTimeoutState.maxPerRequestMs / 60000);
3684
+ const elapsedMin = Math.round((Date.now() - negotiatedTimeoutState.startTime) / 60000);
3685
+
3686
+ // Check if extensions/budget exhausted — go straight to graceful wind-down
3687
+ if (remainingRequests <= 0 || remainingBudgetMs <= 0) {
3688
+ if (this.debug) {
3689
+ console.log(`[DEBUG] Timeout observer: no extensions/budget remaining — aborting in-flight tools and triggering graceful wind-down`);
3690
+ }
3691
+ if (this.tracer) {
3692
+ this.tracer.addEvent('negotiated_timeout.observer_exhausted', {
3693
+ extensions_used: negotiatedTimeoutState.extensionsUsed,
3694
+ max_requests: negotiatedTimeoutState.maxRequests,
3695
+ total_extra_time_ms: negotiatedTimeoutState.totalExtraTimeMs,
3696
+ budget_ms: negotiatedTimeoutState.budgetMs,
3697
+ elapsed_min: elapsedMin,
3698
+ active_tools: Array.from(activeTools.values()).map(t => t.name),
3699
+ });
3700
+ }
3701
+ // Two-phase graceful stop: signal subagents/MCP to wind down, hard abort after deadline
3702
+ await this._initiateGracefulStop(gracefulTimeoutState, 'budget/extensions exhausted');
3703
+ negotiatedTimeoutState.observerRunning = false;
3704
+ return;
3705
+ }
3706
+
3707
+ // Build context for the observer
3708
+ const activeToolsList = Array.from(activeTools.values());
3709
+ const now = Date.now();
3710
+ const formatDuration = (ms) => {
3711
+ const totalSec = Math.round(ms / 1000);
3712
+ if (totalSec < 60) return `${totalSec}s`;
3713
+ const min = Math.floor(totalSec / 60);
3714
+ const sec = totalSec % 60;
3715
+ if (min < 60) return `${min}m ${sec}s`;
3716
+ const hr = Math.floor(min / 60);
3717
+ const remainMin = min % 60;
3718
+ return `${hr}h ${remainMin}m`;
3719
+ };
3720
+ const activeToolsDesc = activeToolsList.length > 0
3721
+ ? activeToolsList.map(t => {
3722
+ const runningForMs = now - new Date(t.startedAt).getTime();
3723
+ return `- ${t.name}(${JSON.stringify(t.args || {}).slice(0, 200)}) — running for ${formatDuration(runningForMs)}`;
3724
+ }).join('\n')
3725
+ : '(none currently running)';
3726
+
3727
+ // Summarize recent history (last few exchanges, capped)
3728
+ const recentHistory = this.history.slice(-6).map(msg => {
3729
+ const content = typeof msg.content === 'string'
3730
+ ? msg.content.slice(0, 300)
3731
+ : JSON.stringify(msg.content).slice(0, 300);
3732
+ return `[${msg.role}]: ${content}`;
3733
+ }).join('\n');
3734
+
3735
+ const observerPrompt = `You are a timeout observer for an AI coding agent. The agent has been working for ${elapsedMin} minute(s) and has reached its time limit.
3736
+
3737
+ ## Recent Conversation
3738
+ ${recentHistory || '(no history yet)'}
3739
+
3740
+ ## Currently Running Tools
3741
+ ${activeToolsDesc}
3742
+
3743
+ ## Budget
3744
+ - Extensions used: ${negotiatedTimeoutState.extensionsUsed}/${negotiatedTimeoutState.maxRequests}
3745
+ - Time budget remaining: ${Math.round(remainingBudgetMs / 60000)} minutes
3746
+ - Max per extension: ${maxPerReqMin} minutes
3747
+
3748
+ Decide whether the agent should get more time. EXTEND if:
3749
+ - Tools are actively running (especially delegates or complex analysis) — they need time to finish
3750
+ - The agent is making clear progress on a complex task
3751
+ - New information is being gathered that will improve the final answer
3752
+
3753
+ DO NOT EXTEND if:
3754
+ - The agent appears stuck in a loop (repeating the same tool calls or getting the same errors)
3755
+ - The conversation shows the agent retrying failed operations without changing approach
3756
+ - The agent has enough information to answer but keeps searching for more
3757
+ - Tool calls are returning empty or error results repeatedly
3758
+ - The agent is doing redundant work (searching for things it already found)
3759
+
3760
+ A stuck agent will not recover with more time — it will just burn the budget. Better to force it to answer with what it has.
3761
+
3762
+ Respond with ONLY valid JSON (no markdown, no explanation):
3763
+ {"extend": true, "minutes": <1-${maxPerReqMin}>, "reason": "your reason here"}
3764
+ or
3765
+ {"extend": false, "reason": "your reason here"}`;
3766
+
3767
+ const observerFn = async () => {
3768
+ const modelInstance = this.provider ? this.provider(this.model) : this.model;
3769
+
3770
+ if (this.debug) {
3771
+ console.log(`[DEBUG] Timeout observer: making LLM call (${activeToolsList.length} active tools, ${elapsedMin} min elapsed)`);
3772
+ }
3773
+
3774
+ if (this.tracer) {
3775
+ this.tracer.addEvent('negotiated_timeout.observer_invoked', {
3776
+ elapsed_min: elapsedMin,
3777
+ active_tools: activeToolsList.map(t => t.name),
3778
+ active_tools_detail: activeToolsList.map(t => ({
3779
+ name: t.name,
3780
+ running_for_ms: now - new Date(t.startedAt).getTime(),
3781
+ args_preview: JSON.stringify(t.args || {}).slice(0, 100),
3782
+ })),
3783
+ active_tools_count: activeToolsList.length,
3784
+ extensions_used: negotiatedTimeoutState.extensionsUsed,
3785
+ remaining_requests: remainingRequests,
3786
+ remaining_budget_ms: remainingBudgetMs,
3787
+ history_length: this.history.length,
3788
+ });
3789
+ }
3790
+
3791
+ const observerResult = await generateText({
3792
+ model: modelInstance,
3793
+ messages: [{ role: 'user', content: observerPrompt }],
3794
+ maxTokens: 500,
3795
+ });
3796
+
3797
+ const responseText = observerResult.text.trim();
3798
+
3799
+ if (this.tracer) {
3800
+ this.tracer.addEvent('negotiated_timeout.observer_response', {
3801
+ response_text: responseText,
3802
+ usage_prompt_tokens: observerResult.usage?.promptTokens,
3803
+ usage_completion_tokens: observerResult.usage?.completionTokens,
3804
+ });
3805
+ }
3806
+
3807
+ // Parse JSON response — handle potential markdown wrapping
3808
+ const jsonStr = responseText.replace(/^```(?:json)?\s*/, '').replace(/\s*```$/, '');
3809
+ const decision = JSON.parse(jsonStr);
3810
+
3811
+ if (decision.extend && decision.minutes > 0) {
3812
+ const requestedMs = Math.min(decision.minutes, maxPerReqMin) * 60000;
3813
+ const grantedMs = Math.min(requestedMs, remainingBudgetMs, negotiatedTimeoutState.maxPerRequestMs);
3814
+ const grantedMin = Math.round(grantedMs / 60000 * 10) / 10;
3815
+
3816
+ // Update state
3817
+ negotiatedTimeoutState.extensionsUsed++;
3818
+ negotiatedTimeoutState.totalExtraTimeMs += grantedMs;
3819
+
3820
+ // Set message for prepareStep to show when main loop unblocks
3821
+ negotiatedTimeoutState.extensionMessage =
3822
+ `⏰ Time limit was reached. The timeout observer granted ${grantedMin} more minute(s) ` +
3823
+ `(reason: ${decision.reason || 'work in progress'}). ` +
3824
+ `Extensions remaining: ${negotiatedTimeoutState.maxRequests - negotiatedTimeoutState.extensionsUsed}. ` +
3825
+ `Continue your work efficiently.`;
3826
+
3827
+ // Schedule next observer call
3828
+ negotiatedTimeoutState.softTimeoutId = setTimeout(() => {
3829
+ runTimeoutObserver();
3830
+ }, grantedMs);
3831
+
3832
+ if (this.debug) {
3833
+ console.log(`[DEBUG] Timeout observer: granted ${grantedMin} min (reason: ${decision.reason}). Extensions: ${negotiatedTimeoutState.extensionsUsed}/${negotiatedTimeoutState.maxRequests}`);
3834
+ }
3835
+
3836
+ if (this.tracer) {
3837
+ this.tracer.addEvent('negotiated_timeout.observer_extended', {
3838
+ decision_reason: decision.reason,
3839
+ requested_minutes: decision.minutes,
3840
+ granted_ms: grantedMs,
3841
+ granted_min: grantedMin,
3842
+ extensions_used: negotiatedTimeoutState.extensionsUsed,
3843
+ max_requests: negotiatedTimeoutState.maxRequests,
3844
+ total_extra_time_ms: negotiatedTimeoutState.totalExtraTimeMs,
3845
+ budget_remaining_ms: remainingBudgetMs - grantedMs,
3846
+ active_tools: activeToolsList.map(t => t.name),
3847
+ active_tools_count: activeToolsList.length,
3848
+ });
3849
+ }
3850
+ } else {
3851
+ // Observer decided not to extend — two-phase graceful stop
3852
+ if (this.debug) {
3853
+ console.log(`[DEBUG] Timeout observer: declined extension (reason: ${decision.reason}). Initiating graceful stop.`);
3854
+ }
3855
+
3856
+ if (this.tracer) {
3857
+ this.tracer.addEvent('negotiated_timeout.observer_declined', {
3858
+ decision_reason: decision.reason,
3859
+ extensions_used: negotiatedTimeoutState.extensionsUsed,
3860
+ total_extra_time_ms: negotiatedTimeoutState.totalExtraTimeMs,
3861
+ elapsed_min: elapsedMin,
3862
+ active_tools: activeToolsList.map(t => t.name),
3863
+ });
3864
+ }
3865
+
3866
+ await this._initiateGracefulStop(gracefulTimeoutState, `observer declined: ${decision.reason}`);
3867
+ }
3868
+ };
3869
+
3870
+ try {
3871
+ if (this.tracer) {
3872
+ await this.tracer.withSpan('negotiated_timeout.observer', observerFn, {
3873
+ 'timeout.elapsed_min': elapsedMin,
3874
+ 'timeout.extensions_used': negotiatedTimeoutState.extensionsUsed,
3875
+ 'timeout.active_tools_count': activeToolsList.length,
3876
+ 'timeout.remaining_budget_ms': remainingBudgetMs,
3877
+ });
3878
+ } else {
3879
+ await observerFn();
3880
+ }
3881
+ } catch (err) {
3882
+ // Observer call failed — fall back to graceful stop
3883
+ if (this.debug) {
3884
+ console.log(`[DEBUG] Timeout observer: LLM call failed (${err.message}). Initiating graceful stop.`);
3885
+ }
3886
+
3887
+ if (this.tracer) {
3888
+ this.tracer.addEvent('negotiated_timeout.observer_error', {
3889
+ error_message: err.message,
3890
+ error_name: err.name,
3891
+ extensions_used: negotiatedTimeoutState.extensionsUsed,
3892
+ elapsed_min: elapsedMin,
3893
+ });
3894
+ }
3895
+
3896
+ await this._initiateGracefulStop(gracefulTimeoutState, `observer error: ${err.message}`);
3897
+ } finally {
3898
+ negotiatedTimeoutState.observerRunning = false;
3899
+ }
3900
+ };
3901
+
3902
+ // Store observer function on state for testability
3903
+ negotiatedTimeoutState.runObserver = runTimeoutObserver;
3904
+
3592
3905
  // Context compaction retry loop
3593
3906
  let compactionAttempted = false;
3594
3907
  while (true) {
@@ -3670,6 +3983,17 @@ Follow these instructions carefully:
3670
3983
  return false;
3671
3984
  },
3672
3985
  prepareStep: ({ steps, stepNumber }) => {
3986
+ // Negotiated timeout: if the observer granted an extension while the main
3987
+ // loop was blocked (e.g. during a delegate call), inform the AI
3988
+ if (negotiatedTimeoutState.extensionMessage && !gracefulTimeoutState.triggered) {
3989
+ const msg = negotiatedTimeoutState.extensionMessage;
3990
+ negotiatedTimeoutState.extensionMessage = null; // show once
3991
+ if (this.debug) {
3992
+ console.log(`[DEBUG] prepareStep: delivering timeout observer extension message`);
3993
+ }
3994
+ return { userMessage: msg };
3995
+ }
3996
+
3673
3997
  // Graceful timeout wind-down: force text-only response with wrap-up reminder
3674
3998
  if (gracefulTimeoutState.triggered) {
3675
3999
  gracefulTimeoutState.bonusStepsUsed++;
@@ -3699,10 +4023,19 @@ Follow these instructions carefully:
3699
4023
  return { toolChoice: 'none' };
3700
4024
  }
3701
4025
 
3702
- // Last-iteration warning
4026
+ // Last-iteration warning — force text-only and tell the AI to summarize
3703
4027
  if (stepNumber === maxIterations - 1) {
4028
+ // Build a brief summary of tools used so the model can reference them in its answer
4029
+ const searchesTried = _toolCallLog
4030
+ .filter(tc => tc.name === 'search')
4031
+ .map(tc => `"${tc.args.query || ''}"${tc.args.exact ? ' (exact)' : ''}`)
4032
+ .filter((v, i, a) => a.indexOf(v) === i); // unique
4033
+ const searchSummary = searchesTried.length > 0
4034
+ ? `\nSearches attempted: ${searchesTried.join(', ')}`
4035
+ : '';
3704
4036
  return {
3705
4037
  toolChoice: 'none',
4038
+ userMessage: `⚠️ LAST ITERATION — you are out of tool calls. Provide your BEST answer NOW with the information gathered so far. If you could not find what was requested, explain exactly what you searched for and why it did not work, so the caller can try a different approach.${searchSummary}`
3706
4039
  };
3707
4040
  }
3708
4041
 
@@ -3797,6 +4130,13 @@ Double-check your response based on the criteria above. If everything looks good
3797
4130
  currentIteration++;
3798
4131
  toolContext.currentIteration = currentIteration;
3799
4132
 
4133
+ // Track tool calls for failure diagnostics
4134
+ if (toolCalls?.length > 0) {
4135
+ for (const tc of toolCalls) {
4136
+ _toolCallLog.push({ name: tc.toolName, args: tc.args || {} });
4137
+ }
4138
+ }
4139
+
3800
4140
  // Record telemetry — include model's reasoning and tool call details
3801
4141
  if (this.tracer) {
3802
4142
  const stepEvent = {
@@ -3892,7 +4232,7 @@ Double-check your response based on the criteria above. If everything looks good
3892
4232
  const executeAIRequest = async () => {
3893
4233
  const result = await this.streamTextWithRetryAndFallback(streamOptions);
3894
4234
 
3895
- // Set up graceful timeout timer now that streamText is running.
4235
+ // Set up timeout timer now that streamText is running.
3896
4236
  // streamText() returns immediately — the actual tool loop runs asynchronously
3897
4237
  // and completes when we await result.steps/result.text below.
3898
4238
  let gracefulTimeoutId = null;
@@ -3915,6 +4255,16 @@ Double-check your response based on the criteria above. If everything looks good
3915
4255
  }, this.maxOperationTimeout);
3916
4256
  }
3917
4257
 
4258
+ // Negotiated timeout: run the timeout observer (separate LLM call)
4259
+ if (this.timeoutBehavior === 'negotiated' && this.maxOperationTimeout > 0) {
4260
+ negotiatedTimeoutState.softTimeoutId = setTimeout(() => {
4261
+ if (this.debug) {
4262
+ console.log(`[DEBUG] Soft timeout after ${this.maxOperationTimeout}ms — invoking timeout observer`);
4263
+ }
4264
+ runTimeoutObserver();
4265
+ }, this.maxOperationTimeout);
4266
+ }
4267
+
3918
4268
  try {
3919
4269
  // Use only the last step's text as the final answer.
3920
4270
  // result.text concatenates ALL steps (including intermediate planning text),
@@ -3944,6 +4294,15 @@ Double-check your response based on the criteria above. If everything looks good
3944
4294
  // Clean up graceful timeout timers
3945
4295
  if (gracefulTimeoutId) clearTimeout(gracefulTimeoutId);
3946
4296
  if (hardAbortTimeoutId) clearTimeout(hardAbortTimeoutId);
4297
+ // Clean up negotiated timeout timer
4298
+ if (negotiatedTimeoutState.softTimeoutId) clearTimeout(negotiatedTimeoutState.softTimeoutId);
4299
+ // Clean up graceful stop hard abort timer
4300
+ if (this._gracefulStopHardAbortId) {
4301
+ clearTimeout(this._gracefulStopHardAbortId);
4302
+ this._gracefulStopHardAbortId = null;
4303
+ }
4304
+ // Remove in-flight tool tracker
4305
+ this.events.removeListener('toolCall', onToolCall);
3947
4306
  }
3948
4307
  };
3949
4308
 
@@ -3994,7 +4353,7 @@ Double-check your response based on the criteria above. If everything looks good
3994
4353
  if (gracefulTimeoutState.triggered) {
3995
4354
  const timeoutNotice = '**Note: This response was generated under a time constraint. The research may be incomplete, and some planned searches or analysis steps were not completed.**\n\n';
3996
4355
 
3997
- if (!finalResult || finalResult === 'I was unable to complete your request due to reaching the maximum number of tool iterations.') {
4356
+ if (!finalResult || finalResult === DEFAULT_MAX_ITER_MSG || finalResult.startsWith('I was unable to complete your request after')) {
3998
4357
  // Wind-down produced empty text — try to collect useful content.
3999
4358
  // Some models (e.g., Gemini) return finishReason:'other' with empty text
4000
4359
  // when forced from tool-calling to text-only mode mid-task.
@@ -4056,7 +4415,7 @@ Double-check your response based on the criteria above. If everything looks good
4056
4415
  // If the model answered without tool calls (or its final step had none),
4057
4416
  // stopWhen never gets a chance to force continuation. In that case, run
4058
4417
  // a second streamText pass with the completion prompt injected.
4059
- if (this.completionPrompt && !options._completionPromptProcessed && !completionPromptInjected && finalResult) {
4418
+ if (this.completionPrompt && !options._completionPromptProcessed && !completionPromptInjected && !abortSummaryTaken && finalResult) {
4060
4419
  completionPromptInjected = true;
4061
4420
  preCompletionResult = finalResult;
4062
4421
 
@@ -4142,6 +4501,146 @@ Double-check your response based on the criteria above. If everything looks good
4142
4501
  break; // Success
4143
4502
 
4144
4503
  } catch (error) {
4504
+ // Negotiated timeout observer aborted in-flight tools to trigger wind-down.
4505
+ // Give the AI a dedicated summary call with full conversation context so it
4506
+ // can explain what it accomplished and what remains incomplete.
4507
+ if (gracefulTimeoutState.triggered && error?.name === 'AbortError') {
4508
+ if (this.debug) {
4509
+ console.log(`[DEBUG] Negotiated timeout: abort caught — making summary LLM call with conversation context`);
4510
+ }
4511
+
4512
+ if (this.tracer) {
4513
+ this.tracer.addEvent('negotiated_timeout.abort_summary_started', {
4514
+ conversation_messages: currentMessages.length,
4515
+ has_schema: !!options.schema,
4516
+ has_tasks: !!(this.enableTasks && this.taskManager),
4517
+ });
4518
+ }
4519
+
4520
+ try {
4521
+ // Build task status context if tasks are active
4522
+ let taskContext = '';
4523
+ if (this.enableTasks && this.taskManager) {
4524
+ const taskSummary = this.taskManager.getTaskSummary?.();
4525
+ if (taskSummary) {
4526
+ taskContext = `\n\n## Task Status\n${taskSummary}\n\nAcknowledge which tasks were completed and which were not.`;
4527
+ }
4528
+ }
4529
+
4530
+ // Build schema instructions if a schema is required
4531
+ let schemaContext = '';
4532
+ if (options.schema) {
4533
+ try {
4534
+ const parsedSchema = typeof options.schema === 'string' ? JSON.parse(options.schema) : options.schema;
4535
+ schemaContext = `\n\nIMPORTANT: Your response MUST be valid JSON matching this schema:\n${JSON.stringify(parsedSchema, null, 2)}\n\n` +
4536
+ `Respond with ONLY valid JSON — no markdown, no explanation, no text outside the JSON object. ` +
4537
+ `Include all findings and partial results within the JSON structure. ` +
4538
+ `If fields cannot be fully populated due to the interruption, use partial data or null values as appropriate.`;
4539
+ } catch {}
4540
+ }
4541
+
4542
+ const summaryPrompt = `Your operation was interrupted by a timeout observer because the time limit was reached. ` +
4543
+ `Some of your tool calls were cancelled mid-execution.\n\n` +
4544
+ `Please provide a DETAILED summary of:\n` +
4545
+ `1. What you were asked to do (the original task)\n` +
4546
+ `2. What you accomplished — include ALL findings, code snippets, data, and conclusions you gathered\n` +
4547
+ `3. What was still in progress or not yet started\n` +
4548
+ `4. Any partial results or recommendations you can offer based on what you found so far` +
4549
+ `${taskContext}${schemaContext}\n\n` +
4550
+ `Be thorough — this is the user's only response. Include all useful information you collected.`;
4551
+
4552
+ const summaryMessages = [
4553
+ ...currentMessages,
4554
+ { role: 'user', content: summaryPrompt },
4555
+ ];
4556
+
4557
+ const modelInstance = this.provider ? this.provider(this.model) : this.model;
4558
+
4559
+ const summaryFn = async () => {
4560
+ const summaryResult = await generateText({
4561
+ model: modelInstance,
4562
+ messages: this.prepareMessagesWithImages(summaryMessages),
4563
+ maxTokens: 4000,
4564
+ });
4565
+
4566
+ if (this.tracer) {
4567
+ this.tracer.addEvent('negotiated_timeout.abort_summary_completed', {
4568
+ summary_length: summaryResult.text?.length || 0,
4569
+ usage_prompt_tokens: summaryResult.usage?.promptTokens,
4570
+ usage_completion_tokens: summaryResult.usage?.completionTokens,
4571
+ });
4572
+ }
4573
+
4574
+ // Record token usage for the summary call
4575
+ if (summaryResult.usage) {
4576
+ this.tokenCounter.recordUsage(summaryResult.usage);
4577
+ }
4578
+
4579
+ return summaryResult.text;
4580
+ };
4581
+
4582
+ let summaryText;
4583
+ if (this.tracer) {
4584
+ summaryText = await this.tracer.withSpan('negotiated_timeout.abort_summary', summaryFn, {
4585
+ 'summary.conversation_messages': currentMessages.length,
4586
+ });
4587
+ } else {
4588
+ summaryText = await summaryFn();
4589
+ }
4590
+
4591
+ if (options.schema) {
4592
+ // Schema mode: use the summary text as-is (it should already be JSON).
4593
+ // Don't prepend a notice — it would break the JSON structure.
4594
+ // The schema validation pipeline downstream will validate/fix it.
4595
+ finalResult = summaryText || '{}';
4596
+ } else {
4597
+ const timeoutNotice = '**Note: This response was generated under a time constraint. The timeout observer interrupted the operation because the time budget was exhausted.**\n\n';
4598
+ finalResult = timeoutNotice + (summaryText || 'The operation was interrupted before a response could be generated.');
4599
+ }
4600
+
4601
+ // Stream the abort summary to onStream callback so callers see the output
4602
+ if (options.onStream && finalResult) {
4603
+ options.onStream(finalResult);
4604
+ }
4605
+
4606
+ if (this.debug) {
4607
+ console.log(`[DEBUG] Negotiated timeout: summary produced ${summaryText?.length || 0} chars`);
4608
+ }
4609
+ } catch (summaryErr) {
4610
+ if (this.debug) {
4611
+ console.log(`[DEBUG] Negotiated timeout: summary call failed (${summaryErr.message}), falling back to partial text`);
4612
+ }
4613
+ if (this.tracer) {
4614
+ this.tracer.addEvent('negotiated_timeout.abort_summary_error', {
4615
+ error_message: summaryErr.message,
4616
+ });
4617
+ }
4618
+
4619
+ // Fallback: collect whatever text is in conversation history
4620
+ const partialTexts = currentMessages
4621
+ .filter(m => m.role === 'assistant' && typeof m.content === 'string' && m.content.trim())
4622
+ .map(m => m.content);
4623
+
4624
+ if (options.schema) {
4625
+ // Schema mode: try to pass through the last assistant message (may contain JSON)
4626
+ finalResult = partialTexts.length > 0 ? partialTexts[partialTexts.length - 1] : '{}';
4627
+ } else {
4628
+ const timeoutNotice = '**Note: This response was generated under a time constraint. The operation was interrupted and some work was not completed.**\n\n';
4629
+ finalResult = partialTexts.length > 0
4630
+ ? timeoutNotice + partialTexts[partialTexts.length - 1]
4631
+ : timeoutNotice + 'The operation was interrupted before enough information could be gathered. Please try again with a simpler query or increase the timeout.';
4632
+ }
4633
+
4634
+ // Stream the fallback result
4635
+ if (options.onStream && finalResult) {
4636
+ options.onStream(finalResult);
4637
+ }
4638
+ }
4639
+
4640
+ abortSummaryTaken = true;
4641
+ break; // Exit the compaction retry loop with the summary
4642
+ }
4643
+
4145
4644
  // Handle context-limit error: compact messages and retry (once)
4146
4645
  if (!compactionAttempted && handleContextLimitError) {
4147
4646
  const compactionResult = handleContextLimitError(error, currentMessages, {
@@ -4185,6 +4684,37 @@ Double-check your response based on the criteria above. If everything looks good
4185
4684
 
4186
4685
  if (currentIteration >= maxIterations) {
4187
4686
  console.warn(`[WARN] Max tool iterations (${maxIterations}) reached for session ${this.sessionId}.`);
4687
+
4688
+ // Build a descriptive failure message with a summary of tool calls made,
4689
+ // so the caller (e.g. a parent agent) knows what was attempted and why it failed.
4690
+ if (!finalResult || finalResult === DEFAULT_MAX_ITER_MSG) {
4691
+ try {
4692
+ const searchQueries = [];
4693
+ const toolCounts = {};
4694
+ for (const tc of _toolCallLog) {
4695
+ toolCounts[tc.name] = (toolCounts[tc.name] || 0) + 1;
4696
+ if (tc.name === 'search') {
4697
+ const q = tc.args.query || '';
4698
+ const exact = tc.args.exact ? ' (exact)' : '';
4699
+ searchQueries.push(`"${q}"${exact}`);
4700
+ }
4701
+ }
4702
+ const toolBreakdown = Object.entries(toolCounts)
4703
+ .map(([name, count]) => `${name}: ${count}x`)
4704
+ .join(', ');
4705
+ const uniqueSearches = [...new Set(searchQueries)];
4706
+
4707
+ let summary = `I was unable to complete your request after ${currentIteration} tool iterations.\n\n`;
4708
+ summary += `Tool calls made: ${toolBreakdown || 'none'}\n`;
4709
+ if (uniqueSearches.length > 0) {
4710
+ summary += `Search queries tried: ${uniqueSearches.join(', ')}\n`;
4711
+ }
4712
+ summary += `\nThe search approach may be fundamentally wrong for this query. Consider: using exact=true for literal string matching, using bash/grep for pattern-based file searches, or trying a completely different strategy instead of repeating similar searches.`;
4713
+ finalResult = summary;
4714
+ } catch {
4715
+ finalResult = DEFAULT_MAX_ITER_MSG;
4716
+ }
4717
+ }
4188
4718
  }
4189
4719
 
4190
4720
  // Store final history
@@ -4888,6 +5418,149 @@ Double-check your response based on the criteria above. If everything looks good
4888
5418
  }
4889
5419
  }
4890
5420
 
5421
+ /**
5422
+ * Trigger graceful wind-down from outside (e.g., parent agent).
5423
+ * Unlike cancel(), this does NOT abort — it sets the graceful timeout flag
5424
+ * so the agent finishes its current step and then winds down naturally.
5425
+ */
5426
+ triggerGracefulWindDown() {
5427
+ if (this._gracefulTimeoutState && !this._gracefulTimeoutState.triggered) {
5428
+ this._gracefulTimeoutState.triggered = true;
5429
+ if (this.debug) {
5430
+ console.log(`[DEBUG] Graceful wind-down triggered externally for session ${this.sessionId}`);
5431
+ }
5432
+ if (this.tracer) {
5433
+ this.tracer.addEvent('graceful_stop.external_trigger', {
5434
+ 'session.id': this.sessionId,
5435
+ });
5436
+ }
5437
+ } else if (this.debug) {
5438
+ console.log(`[DEBUG] Graceful wind-down already active for session ${this.sessionId}, skipping`);
5439
+ }
5440
+ }
5441
+
5442
+ /**
5443
+ * Initiate two-phase graceful stop: signal subagents and MCP servers to wind down,
5444
+ * then hard-abort after a deadline if they haven't finished.
5445
+ * @param {Object} gracefulTimeoutState - The graceful timeout state object from run()
5446
+ * @param {string} reason - Why the graceful stop was initiated
5447
+ */
5448
+ async _initiateGracefulStop(gracefulTimeoutState, reason) {
5449
+ if (gracefulTimeoutState.triggered) return; // Already initiated
5450
+
5451
+ if (this.debug) {
5452
+ console.log(`[DEBUG] Initiating graceful stop: ${reason} (subagents: ${this._activeSubagents.size}, hasMcpBridge: ${!!this.mcpBridge}, deadline: ${this.gracefulStopDeadline}ms)`);
5453
+ }
5454
+
5455
+ // Mark graceful timeout — prepareStep will pick this up for the parent's wind-down
5456
+ gracefulTimeoutState.triggered = true;
5457
+
5458
+ if (this.tracer) {
5459
+ this.tracer.addEvent('graceful_stop.initiated', {
5460
+ 'session.id': this.sessionId,
5461
+ 'graceful_stop.reason': reason,
5462
+ 'graceful_stop.active_subagents': this._activeSubagents.size,
5463
+ 'graceful_stop.has_mcp_bridge': !!this.mcpBridge,
5464
+ 'graceful_stop.deadline_ms': this.gracefulStopDeadline,
5465
+ });
5466
+ }
5467
+
5468
+ // Signal all active subagents to wind down gracefully (not hard-cancel)
5469
+ let subagentsSignalled = 0;
5470
+ let subagentErrors = 0;
5471
+ for (const [sid, subagent] of this._activeSubagents) {
5472
+ try {
5473
+ subagent.triggerGracefulWindDown();
5474
+ subagentsSignalled++;
5475
+ if (this.debug) {
5476
+ console.log(`[DEBUG] Triggered graceful wind-down on subagent ${sid}`);
5477
+ }
5478
+ } catch (e) {
5479
+ subagentErrors++;
5480
+ if (this.debug) {
5481
+ console.log(`[DEBUG] Failed to trigger wind-down on subagent ${sid}: ${e.message}`);
5482
+ }
5483
+ }
5484
+ }
5485
+
5486
+ // Call graceful_stop on MCP servers that expose it (fire-and-forget with short timeout)
5487
+ let mcpResults = [];
5488
+ if (this.mcpBridge) {
5489
+ try {
5490
+ mcpResults = await this.mcpBridge.callGracefulStopAll();
5491
+ if (this.debug && mcpResults.length > 0) {
5492
+ console.log(`[DEBUG] MCP graceful_stop results: ${JSON.stringify(mcpResults)}`);
5493
+ }
5494
+ } catch (e) {
5495
+ if (this.debug) {
5496
+ console.log(`[DEBUG] MCP graceful_stop failed: ${e.message}`);
5497
+ }
5498
+ }
5499
+ }
5500
+
5501
+ if (this.tracer) {
5502
+ this.tracer.addEvent('graceful_stop.signals_sent', {
5503
+ 'session.id': this.sessionId,
5504
+ 'graceful_stop.subagents_signalled': subagentsSignalled,
5505
+ 'graceful_stop.subagent_errors': subagentErrors,
5506
+ 'graceful_stop.mcp_servers_called': mcpResults.filter(r => r.success).length,
5507
+ 'graceful_stop.mcp_servers_failed': mcpResults.filter(r => !r.success).length,
5508
+ 'graceful_stop.mcp_servers_total': mcpResults.length,
5509
+ });
5510
+ }
5511
+
5512
+ // Safety net: hard abort after deadline if tools haven't finished
5513
+ this._gracefulStopHardAbortId = setTimeout(() => {
5514
+ if (this.debug) {
5515
+ console.log(`[DEBUG] Graceful stop deadline (${this.gracefulStopDeadline}ms) expired — hard aborting`);
5516
+ }
5517
+ if (this.tracer) {
5518
+ this.tracer.addEvent('graceful_stop.deadline_expired', {
5519
+ 'session.id': this.sessionId,
5520
+ 'graceful_stop.deadline_ms': this.gracefulStopDeadline,
5521
+ });
5522
+ }
5523
+ if (this._abortController) this._abortController.abort();
5524
+ }, this.gracefulStopDeadline);
5525
+ }
5526
+
5527
+ /**
5528
+ * Register an active subagent for graceful stop coordination.
5529
+ * @param {string} sessionId
5530
+ * @param {ProbeAgent} subagent
5531
+ */
5532
+ _registerSubagent(sessionId, subagent) {
5533
+ this._activeSubagents.set(sessionId, subagent);
5534
+ if (this.debug) {
5535
+ console.log(`[DEBUG] Registered subagent ${sessionId} (active: ${this._activeSubagents.size})`);
5536
+ }
5537
+ if (this.tracer) {
5538
+ this.tracer.addEvent('subagent.registered', {
5539
+ 'session.id': this.sessionId,
5540
+ 'subagent.session_id': sessionId,
5541
+ 'subagent.active_count': this._activeSubagents.size,
5542
+ });
5543
+ }
5544
+ }
5545
+
5546
+ /**
5547
+ * Unregister a completed subagent.
5548
+ * @param {string} sessionId
5549
+ */
5550
+ _unregisterSubagent(sessionId) {
5551
+ this._activeSubagents.delete(sessionId);
5552
+ if (this.debug) {
5553
+ console.log(`[DEBUG] Unregistered subagent ${sessionId} (active: ${this._activeSubagents.size})`);
5554
+ }
5555
+ if (this.tracer) {
5556
+ this.tracer.addEvent('subagent.unregistered', {
5557
+ 'session.id': this.sessionId,
5558
+ 'subagent.session_id': sessionId,
5559
+ 'subagent.active_count': this._activeSubagents.size,
5560
+ });
5561
+ }
5562
+ }
5563
+
4891
5564
  /**
4892
5565
  * Get the abort signal for this agent.
4893
5566
  * Delegations and subagents should check this signal.