@probelabs/probe 0.6.0-rc293 → 0.6.0-rc295
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/binaries/{probe-v0.6.0-rc293-aarch64-apple-darwin.tar.gz → probe-v0.6.0-rc295-aarch64-apple-darwin.tar.gz} +0 -0
- package/bin/binaries/{probe-v0.6.0-rc293-aarch64-unknown-linux-musl.tar.gz → probe-v0.6.0-rc295-aarch64-unknown-linux-musl.tar.gz} +0 -0
- package/bin/binaries/{probe-v0.6.0-rc293-x86_64-apple-darwin.tar.gz → probe-v0.6.0-rc295-x86_64-apple-darwin.tar.gz} +0 -0
- package/bin/binaries/{probe-v0.6.0-rc293-x86_64-pc-windows-msvc.zip → probe-v0.6.0-rc295-x86_64-pc-windows-msvc.zip} +0 -0
- package/bin/binaries/{probe-v0.6.0-rc293-x86_64-unknown-linux-musl.tar.gz → probe-v0.6.0-rc295-x86_64-unknown-linux-musl.tar.gz} +0 -0
- package/build/agent/ProbeAgent.d.ts +4 -0
- package/build/agent/ProbeAgent.js +348 -74
- package/cjs/agent/ProbeAgent.cjs +286 -61
- package/cjs/index.cjs +286 -61
- package/package.json +1 -1
- package/src/agent/ProbeAgent.d.ts +4 -0
- package/src/agent/ProbeAgent.js +348 -74
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -106,6 +106,10 @@ export interface ProbeAgentOptions {
|
|
|
106
106
|
requestTimeout?: number;
|
|
107
107
|
/** Maximum timeout in ms for the entire operation including all retries and fallbacks (default: 300000 or MAX_OPERATION_TIMEOUT env var). This is the absolute maximum time for streamTextWithRetryAndFallback. */
|
|
108
108
|
maxOperationTimeout?: number;
|
|
109
|
+
/** Timeout behavior: 'graceful' winds down with bonus steps giving the agent a chance to respond, 'hard' aborts immediately (default: 'graceful'). Env var: TIMEOUT_BEHAVIOR */
|
|
110
|
+
timeoutBehavior?: 'graceful' | 'hard';
|
|
111
|
+
/** Number of bonus steps during graceful timeout wind-down (default: 4, range: 1-20). Env var: GRACEFUL_TIMEOUT_BONUS_STEPS */
|
|
112
|
+
gracefulTimeoutBonusSteps?: number;
|
|
109
113
|
}
|
|
110
114
|
|
|
111
115
|
/**
|
|
@@ -391,6 +391,23 @@ export class ProbeAgent {
|
|
|
391
391
|
console.log(`[DEBUG] Max operation timeout: ${this.maxOperationTimeout}ms`);
|
|
392
392
|
}
|
|
393
393
|
|
|
394
|
+
// Timeout behavior: 'graceful' (default) winds down with bonus steps, 'hard' aborts immediately
|
|
395
|
+
this.timeoutBehavior = options.timeoutBehavior ?? (() => {
|
|
396
|
+
const val = process.env.TIMEOUT_BEHAVIOR;
|
|
397
|
+
if (val === 'hard') return 'hard';
|
|
398
|
+
return 'graceful';
|
|
399
|
+
})();
|
|
400
|
+
|
|
401
|
+
// Number of bonus steps during graceful timeout wind-down (default 4)
|
|
402
|
+
this.gracefulTimeoutBonusSteps = options.gracefulTimeoutBonusSteps ?? (() => {
|
|
403
|
+
const parsed = parseInt(process.env.GRACEFUL_TIMEOUT_BONUS_STEPS, 10);
|
|
404
|
+
return (isNaN(parsed) || parsed < 1 || parsed > 20) ? 4 : parsed;
|
|
405
|
+
})();
|
|
406
|
+
|
|
407
|
+
if (this.debug) {
|
|
408
|
+
console.log(`[DEBUG] Timeout behavior: ${this.timeoutBehavior}, bonus steps: ${this.gracefulTimeoutBonusSteps}`);
|
|
409
|
+
}
|
|
410
|
+
|
|
394
411
|
// Retry configuration
|
|
395
412
|
this.retryConfig = options.retry || {};
|
|
396
413
|
this.retryManager = null; // Will be initialized lazily when needed
|
|
@@ -1341,9 +1358,16 @@ export class ProbeAgent {
|
|
|
1341
1358
|
// Use fallback manager with retry for each provider
|
|
1342
1359
|
return await this.fallbackManager.executeWithFallback(
|
|
1343
1360
|
async (provider, model, config) => {
|
|
1361
|
+
// Wrap fallback model with per-call concurrency limiter if configured.
|
|
1362
|
+
// The original options.model was wrapped in streamTextWithRetryAndFallback,
|
|
1363
|
+
// but fallback replaces it with a new model that needs wrapping too.
|
|
1364
|
+
let fallbackModel = provider(model);
|
|
1365
|
+
if (this.concurrencyLimiter) {
|
|
1366
|
+
fallbackModel = ProbeAgent._wrapModelWithLimiter(fallbackModel, this.concurrencyLimiter, this.debug);
|
|
1367
|
+
}
|
|
1344
1368
|
const fallbackOptions = {
|
|
1345
1369
|
...options,
|
|
1346
|
-
model:
|
|
1370
|
+
model: fallbackModel,
|
|
1347
1371
|
abortSignal: controller.signal
|
|
1348
1372
|
};
|
|
1349
1373
|
|
|
@@ -1377,6 +1401,143 @@ export class ProbeAgent {
|
|
|
1377
1401
|
);
|
|
1378
1402
|
}
|
|
1379
1403
|
|
|
1404
|
+
/**
|
|
1405
|
+
* Wrap a LanguageModelV1 model so each doStream/doGenerate call acquires and
|
|
1406
|
+
* releases a concurrency limiter slot. This gates individual LLM API calls
|
|
1407
|
+
* (seconds each) instead of entire multi-step agent sessions (minutes).
|
|
1408
|
+
*
|
|
1409
|
+
* @param {Object} model - LanguageModelV1 model instance
|
|
1410
|
+
* @param {Object} limiter - Concurrency limiter with acquire/release/getStats
|
|
1411
|
+
* @param {boolean} debug - Enable debug logging
|
|
1412
|
+
* @returns {Object} Wrapped model with per-call concurrency gating
|
|
1413
|
+
* @private
|
|
1414
|
+
*/
|
|
1415
|
+
static _wrapModelWithLimiter(model, limiter, debug) {
|
|
1416
|
+
return new Proxy(model, {
|
|
1417
|
+
get(target, prop) {
|
|
1418
|
+
if (prop === 'doStream') {
|
|
1419
|
+
return async function (...args) {
|
|
1420
|
+
await limiter.acquire(null);
|
|
1421
|
+
if (debug) {
|
|
1422
|
+
const stats = limiter.getStats();
|
|
1423
|
+
console.log(`[DEBUG] Acquired AI slot for LLM call (${stats.globalActive}/${stats.maxConcurrent}, queue: ${stats.queueSize})`);
|
|
1424
|
+
}
|
|
1425
|
+
try {
|
|
1426
|
+
const result = await target.doStream(...args);
|
|
1427
|
+
|
|
1428
|
+
// Wrap the ReadableStream to release the slot when it completes,
|
|
1429
|
+
// errors, or is cancelled — covering all stream termination paths.
|
|
1430
|
+
// Guard against double-release: if cancel() races with an in-flight
|
|
1431
|
+
// pull() that is awaiting originalReader.read(), both paths could
|
|
1432
|
+
// try to release. The flag ensures exactly one release.
|
|
1433
|
+
const originalStream = result.stream;
|
|
1434
|
+
const originalReader = originalStream.getReader();
|
|
1435
|
+
let released = false;
|
|
1436
|
+
const releaseOnce = () => {
|
|
1437
|
+
if (released) return;
|
|
1438
|
+
released = true;
|
|
1439
|
+
limiter.release(null);
|
|
1440
|
+
};
|
|
1441
|
+
const wrappedStream = new ReadableStream({
|
|
1442
|
+
async pull(controller) {
|
|
1443
|
+
try {
|
|
1444
|
+
const { done, value } = await originalReader.read();
|
|
1445
|
+
if (done) {
|
|
1446
|
+
controller.close();
|
|
1447
|
+
releaseOnce();
|
|
1448
|
+
if (debug) {
|
|
1449
|
+
const stats = limiter.getStats();
|
|
1450
|
+
console.log(`[DEBUG] Released AI slot after LLM stream complete (${stats.globalActive}/${stats.maxConcurrent})`);
|
|
1451
|
+
}
|
|
1452
|
+
} else {
|
|
1453
|
+
controller.enqueue(value);
|
|
1454
|
+
}
|
|
1455
|
+
} catch (err) {
|
|
1456
|
+
releaseOnce();
|
|
1457
|
+
if (debug) {
|
|
1458
|
+
console.log(`[DEBUG] Released AI slot on LLM stream error`);
|
|
1459
|
+
}
|
|
1460
|
+
controller.error(err);
|
|
1461
|
+
}
|
|
1462
|
+
},
|
|
1463
|
+
cancel() {
|
|
1464
|
+
releaseOnce();
|
|
1465
|
+
if (debug) {
|
|
1466
|
+
console.log(`[DEBUG] Released AI slot on LLM stream cancel`);
|
|
1467
|
+
}
|
|
1468
|
+
originalReader.cancel();
|
|
1469
|
+
}
|
|
1470
|
+
});
|
|
1471
|
+
|
|
1472
|
+
return { ...result, stream: wrappedStream };
|
|
1473
|
+
} catch (err) {
|
|
1474
|
+
limiter.release(null);
|
|
1475
|
+
if (debug) {
|
|
1476
|
+
console.log(`[DEBUG] Released AI slot on doStream error`);
|
|
1477
|
+
}
|
|
1478
|
+
throw err;
|
|
1479
|
+
}
|
|
1480
|
+
};
|
|
1481
|
+
}
|
|
1482
|
+
|
|
1483
|
+
if (prop === 'doGenerate') {
|
|
1484
|
+
return async function (...args) {
|
|
1485
|
+
await limiter.acquire(null);
|
|
1486
|
+
if (debug) {
|
|
1487
|
+
const stats = limiter.getStats();
|
|
1488
|
+
console.log(`[DEBUG] Acquired AI slot for LLM generate (${stats.globalActive}/${stats.maxConcurrent})`);
|
|
1489
|
+
}
|
|
1490
|
+
try {
|
|
1491
|
+
const result = await target.doGenerate(...args);
|
|
1492
|
+
return result;
|
|
1493
|
+
} finally {
|
|
1494
|
+
limiter.release(null);
|
|
1495
|
+
if (debug) {
|
|
1496
|
+
const stats = limiter.getStats();
|
|
1497
|
+
console.log(`[DEBUG] Released AI slot after LLM generate (${stats.globalActive}/${stats.maxConcurrent})`);
|
|
1498
|
+
}
|
|
1499
|
+
}
|
|
1500
|
+
};
|
|
1501
|
+
}
|
|
1502
|
+
|
|
1503
|
+
const value = target[prop];
|
|
1504
|
+
return typeof value === 'function' ? value.bind(target) : value;
|
|
1505
|
+
}
|
|
1506
|
+
});
|
|
1507
|
+
}
|
|
1508
|
+
|
|
1509
|
+
/**
|
|
1510
|
+
* Wrap an engine stream result so its textStream async generator acquires
|
|
1511
|
+
* and releases a concurrency limiter slot. Acquire happens when iteration
|
|
1512
|
+
* begins; release happens in finally (completion, error, or break).
|
|
1513
|
+
*
|
|
1514
|
+
* @param {Object} result - Engine result with { textStream, usage, ... }
|
|
1515
|
+
* @param {Object} limiter - Concurrency limiter with acquire/release/getStats
|
|
1516
|
+
* @param {boolean} debug - Enable debug logging
|
|
1517
|
+
* @returns {Object} Result with wrapped textStream
|
|
1518
|
+
* @private
|
|
1519
|
+
*/
|
|
1520
|
+
static _wrapEngineStreamWithLimiter(result, limiter, debug) {
|
|
1521
|
+
const originalStream = result.textStream;
|
|
1522
|
+
async function* gatedStream() {
|
|
1523
|
+
await limiter.acquire(null);
|
|
1524
|
+
if (debug) {
|
|
1525
|
+
const stats = limiter.getStats();
|
|
1526
|
+
console.log(`[DEBUG] Acquired AI slot for engine stream (${stats.globalActive}/${stats.maxConcurrent}, queue: ${stats.queueSize})`);
|
|
1527
|
+
}
|
|
1528
|
+
try {
|
|
1529
|
+
yield* originalStream;
|
|
1530
|
+
} finally {
|
|
1531
|
+
limiter.release(null);
|
|
1532
|
+
if (debug) {
|
|
1533
|
+
const stats = limiter.getStats();
|
|
1534
|
+
console.log(`[DEBUG] Released AI slot after engine stream (${stats.globalActive}/${stats.maxConcurrent})`);
|
|
1535
|
+
}
|
|
1536
|
+
}
|
|
1537
|
+
}
|
|
1538
|
+
return { ...result, textStream: gatedStream() };
|
|
1539
|
+
}
|
|
1540
|
+
|
|
1380
1541
|
/**
|
|
1381
1542
|
* Execute streamText with retry and fallback support
|
|
1382
1543
|
* @param {Object} options - streamText options
|
|
@@ -1384,14 +1545,12 @@ export class ProbeAgent {
|
|
|
1384
1545
|
* @private
|
|
1385
1546
|
*/
|
|
1386
1547
|
async streamTextWithRetryAndFallback(options) {
|
|
1387
|
-
//
|
|
1548
|
+
// Wrap the model with per-call concurrency gating if limiter is configured.
|
|
1549
|
+
// This acquires/releases the slot around each individual LLM API call (doStream/doGenerate)
|
|
1550
|
+
// instead of holding it for the entire multi-step agent session.
|
|
1388
1551
|
const limiter = this.concurrencyLimiter;
|
|
1389
|
-
if (limiter) {
|
|
1390
|
-
|
|
1391
|
-
if (this.debug) {
|
|
1392
|
-
const stats = limiter.getStats();
|
|
1393
|
-
console.log(`[DEBUG] Acquired global AI concurrency slot (${stats.globalActive}/${stats.maxConcurrent}, queue: ${stats.queueSize})`);
|
|
1394
|
-
}
|
|
1552
|
+
if (limiter && options.model) {
|
|
1553
|
+
options = { ...options, model: ProbeAgent._wrapModelWithLimiter(options.model, limiter, this.debug) };
|
|
1395
1554
|
}
|
|
1396
1555
|
|
|
1397
1556
|
// Create AbortController for overall operation timeout
|
|
@@ -1412,13 +1571,24 @@ export class ProbeAgent {
|
|
|
1412
1571
|
}
|
|
1413
1572
|
|
|
1414
1573
|
// Set up overall operation timeout (default 5 minutes)
|
|
1574
|
+
// NOTE: For Vercel AI SDK paths, streamText() returns immediately and the
|
|
1575
|
+
// actual tool loop runs asynchronously. The graceful timeout timer is set up
|
|
1576
|
+
// in the run() method where results are actually awaited, not here.
|
|
1577
|
+
// This timer only handles the hard abort for non-graceful mode and engine paths.
|
|
1415
1578
|
if (this.maxOperationTimeout && this.maxOperationTimeout > 0) {
|
|
1416
|
-
|
|
1417
|
-
|
|
1418
|
-
|
|
1419
|
-
|
|
1420
|
-
|
|
1421
|
-
|
|
1579
|
+
const gts = this._gracefulTimeoutState;
|
|
1580
|
+
if (this.timeoutBehavior === 'graceful' && gts) {
|
|
1581
|
+
// Graceful mode: timer is managed in run() method.
|
|
1582
|
+
// Only set up the AbortController link (no timer here).
|
|
1583
|
+
} else {
|
|
1584
|
+
// Hard mode: immediate abort (legacy behavior)
|
|
1585
|
+
timeoutState.timeoutId = setTimeout(() => {
|
|
1586
|
+
controller.abort();
|
|
1587
|
+
if (this.debug) {
|
|
1588
|
+
console.log(`[DEBUG] Operation timed out after ${this.maxOperationTimeout}ms (max operation timeout)`);
|
|
1589
|
+
}
|
|
1590
|
+
}, this.maxOperationTimeout);
|
|
1591
|
+
}
|
|
1422
1592
|
}
|
|
1423
1593
|
|
|
1424
1594
|
try {
|
|
@@ -1430,6 +1600,12 @@ export class ProbeAgent {
|
|
|
1430
1600
|
if (useClaudeCode || useCodex) {
|
|
1431
1601
|
try {
|
|
1432
1602
|
result = await this._tryEngineStreamPath(options, controller, timeoutState);
|
|
1603
|
+
// Gate engine stream with concurrency limiter if configured.
|
|
1604
|
+
// Engine paths bypass the Vercel model wrapper, so we wrap the
|
|
1605
|
+
// textStream async generator with acquire/release instead.
|
|
1606
|
+
if (result && limiter) {
|
|
1607
|
+
result = ProbeAgent._wrapEngineStreamWithLimiter(result, limiter, this.debug);
|
|
1608
|
+
}
|
|
1433
1609
|
} catch (error) {
|
|
1434
1610
|
if (this.debug) {
|
|
1435
1611
|
const engineType = useClaudeCode ? 'Claude Code' : 'Codex';
|
|
@@ -1444,47 +1620,7 @@ export class ProbeAgent {
|
|
|
1444
1620
|
result = await this._executeWithVercelProvider(options, controller);
|
|
1445
1621
|
}
|
|
1446
1622
|
|
|
1447
|
-
// Wrap textStream so limiter slot is held until stream completes.
|
|
1448
|
-
// result.textStream is a read-only getter on DefaultStreamTextResult,
|
|
1449
|
-
// so we wrap the result in a Proxy that intercepts the textStream property.
|
|
1450
|
-
if (limiter && result.textStream) {
|
|
1451
|
-
const originalStream = result.textStream;
|
|
1452
|
-
const debug = this.debug;
|
|
1453
|
-
const wrappedStream = (async function* () {
|
|
1454
|
-
try {
|
|
1455
|
-
for await (const chunk of originalStream) {
|
|
1456
|
-
yield chunk;
|
|
1457
|
-
}
|
|
1458
|
-
} finally {
|
|
1459
|
-
limiter.release(null);
|
|
1460
|
-
if (debug) {
|
|
1461
|
-
const stats = limiter.getStats();
|
|
1462
|
-
console.log(`[DEBUG] Released global AI concurrency slot (${stats.globalActive}/${stats.maxConcurrent}, queue: ${stats.queueSize})`);
|
|
1463
|
-
}
|
|
1464
|
-
}
|
|
1465
|
-
})();
|
|
1466
|
-
return new Proxy(result, {
|
|
1467
|
-
get(target, prop) {
|
|
1468
|
-
if (prop === 'textStream') return wrappedStream;
|
|
1469
|
-
const value = target[prop];
|
|
1470
|
-
return typeof value === 'function' ? value.bind(target) : value;
|
|
1471
|
-
}
|
|
1472
|
-
});
|
|
1473
|
-
} else if (limiter) {
|
|
1474
|
-
// No textStream (shouldn't happen, but release just in case)
|
|
1475
|
-
limiter.release(null);
|
|
1476
|
-
}
|
|
1477
|
-
|
|
1478
1623
|
return result;
|
|
1479
|
-
} catch (error) {
|
|
1480
|
-
// Release on error if limiter was acquired
|
|
1481
|
-
if (limiter) {
|
|
1482
|
-
limiter.release(null);
|
|
1483
|
-
if (this.debug) {
|
|
1484
|
-
console.log(`[DEBUG] Released global AI concurrency slot on error`);
|
|
1485
|
-
}
|
|
1486
|
-
}
|
|
1487
|
-
throw error;
|
|
1488
1624
|
} finally {
|
|
1489
1625
|
// Clean up timeout (for non-engine paths; engine paths clean up in the generator)
|
|
1490
1626
|
if (timeoutState.timeoutId) {
|
|
@@ -3444,6 +3580,15 @@ Follow these instructions carefully:
|
|
|
3444
3580
|
let completionPromptInjected = false;
|
|
3445
3581
|
let preCompletionResult = null; // Stores the result before completionPrompt for fallback
|
|
3446
3582
|
|
|
3583
|
+
// Graceful timeout state — shared between setTimeout (in streamTextWithRetryAndFallback)
|
|
3584
|
+
// and prepareStep/stopWhen callbacks (in streamText loop)
|
|
3585
|
+
const gracefulTimeoutState = {
|
|
3586
|
+
triggered: false, // Set to true when soft timeout fires
|
|
3587
|
+
bonusStepsUsed: 0, // Steps taken after soft timeout
|
|
3588
|
+
bonusStepsMax: this.gracefulTimeoutBonusSteps
|
|
3589
|
+
};
|
|
3590
|
+
this._gracefulTimeoutState = gracefulTimeoutState;
|
|
3591
|
+
|
|
3447
3592
|
// Context compaction retry loop
|
|
3448
3593
|
let compactionAttempted = false;
|
|
3449
3594
|
while (true) {
|
|
@@ -3455,6 +3600,17 @@ Follow these instructions carefully:
|
|
|
3455
3600
|
messages: messagesForAI,
|
|
3456
3601
|
tools,
|
|
3457
3602
|
stopWhen: ({ steps }) => {
|
|
3603
|
+
// Graceful timeout wind-down: override normal limits, stop only when bonus steps exhausted
|
|
3604
|
+
if (gracefulTimeoutState.triggered) {
|
|
3605
|
+
if (gracefulTimeoutState.bonusStepsUsed >= gracefulTimeoutState.bonusStepsMax) {
|
|
3606
|
+
if (this.debug) {
|
|
3607
|
+
console.log(`[DEBUG] stopWhen: graceful timeout bonus steps exhausted (${gracefulTimeoutState.bonusStepsUsed}/${gracefulTimeoutState.bonusStepsMax}), forcing stop`);
|
|
3608
|
+
}
|
|
3609
|
+
return true;
|
|
3610
|
+
}
|
|
3611
|
+
return false; // Allow more bonus steps
|
|
3612
|
+
}
|
|
3613
|
+
|
|
3458
3614
|
// Hard limit
|
|
3459
3615
|
if (steps.length >= maxIterations) return true;
|
|
3460
3616
|
|
|
@@ -3514,6 +3670,35 @@ Follow these instructions carefully:
|
|
|
3514
3670
|
return false;
|
|
3515
3671
|
},
|
|
3516
3672
|
prepareStep: ({ steps, stepNumber }) => {
|
|
3673
|
+
// Graceful timeout wind-down: force text-only response with wrap-up reminder
|
|
3674
|
+
if (gracefulTimeoutState.triggered) {
|
|
3675
|
+
gracefulTimeoutState.bonusStepsUsed++;
|
|
3676
|
+
const remaining = gracefulTimeoutState.bonusStepsMax - gracefulTimeoutState.bonusStepsUsed;
|
|
3677
|
+
|
|
3678
|
+
if (gracefulTimeoutState.bonusStepsUsed === 1) {
|
|
3679
|
+
// First wind-down step: inject wrap-up message
|
|
3680
|
+
if (this.debug) {
|
|
3681
|
+
console.log(`[DEBUG] prepareStep: graceful timeout wind-down step 1/${gracefulTimeoutState.bonusStepsMax}`);
|
|
3682
|
+
}
|
|
3683
|
+
if (this.tracer) {
|
|
3684
|
+
this.tracer.addEvent('graceful_timeout.wind_down_started', {
|
|
3685
|
+
bonus_steps_max: gracefulTimeoutState.bonusStepsMax,
|
|
3686
|
+
current_iteration: currentIteration,
|
|
3687
|
+
max_iterations: maxIterations
|
|
3688
|
+
});
|
|
3689
|
+
}
|
|
3690
|
+
return {
|
|
3691
|
+
toolChoice: 'none',
|
|
3692
|
+
userMessage: `⚠️ TIME LIMIT REACHED. You are running out of time. You have ${remaining} step(s) remaining. Provide your BEST answer NOW using the information you have already gathered. Do NOT call any more tools. Summarize your findings and respond completely. If something was not completed, honestly state what was not done and provide any partial results or recommendations you can offer.`
|
|
3693
|
+
};
|
|
3694
|
+
}
|
|
3695
|
+
|
|
3696
|
+
if (this.debug) {
|
|
3697
|
+
console.log(`[DEBUG] prepareStep: graceful timeout wind-down step ${gracefulTimeoutState.bonusStepsUsed}/${gracefulTimeoutState.bonusStepsMax} (${remaining} remaining)`);
|
|
3698
|
+
}
|
|
3699
|
+
return { toolChoice: 'none' };
|
|
3700
|
+
}
|
|
3701
|
+
|
|
3517
3702
|
// Last-iteration warning
|
|
3518
3703
|
if (stepNumber === maxIterations - 1) {
|
|
3519
3704
|
return {
|
|
@@ -3638,6 +3823,14 @@ Double-check your response based on the criteria above. If everything looks good
|
|
|
3638
3823
|
}));
|
|
3639
3824
|
}
|
|
3640
3825
|
this.tracer.addEvent('iteration.step', stepEvent);
|
|
3826
|
+
|
|
3827
|
+
// Track graceful timeout wind-down steps
|
|
3828
|
+
if (gracefulTimeoutState.triggered) {
|
|
3829
|
+
this.tracer.addEvent('graceful_timeout.wind_down_step', {
|
|
3830
|
+
bonus_step: gracefulTimeoutState.bonusStepsUsed,
|
|
3831
|
+
bonus_max: gracefulTimeoutState.bonusStepsMax
|
|
3832
|
+
});
|
|
3833
|
+
}
|
|
3641
3834
|
}
|
|
3642
3835
|
|
|
3643
3836
|
// Record token usage
|
|
@@ -3699,30 +3892,59 @@ Double-check your response based on the criteria above. If everything looks good
|
|
|
3699
3892
|
const executeAIRequest = async () => {
|
|
3700
3893
|
const result = await this.streamTextWithRetryAndFallback(streamOptions);
|
|
3701
3894
|
|
|
3702
|
-
//
|
|
3703
|
-
//
|
|
3704
|
-
//
|
|
3705
|
-
|
|
3706
|
-
let
|
|
3707
|
-
if (
|
|
3708
|
-
|
|
3709
|
-
|
|
3710
|
-
|
|
3711
|
-
|
|
3712
|
-
|
|
3895
|
+
// Set up graceful timeout timer now that streamText is running.
|
|
3896
|
+
// streamText() returns immediately — the actual tool loop runs asynchronously
|
|
3897
|
+
// and completes when we await result.steps/result.text below.
|
|
3898
|
+
let gracefulTimeoutId = null;
|
|
3899
|
+
let hardAbortTimeoutId = null;
|
|
3900
|
+
if (this.timeoutBehavior === 'graceful' && gracefulTimeoutState && this.maxOperationTimeout > 0) {
|
|
3901
|
+
gracefulTimeoutId = setTimeout(() => {
|
|
3902
|
+
gracefulTimeoutState.triggered = true;
|
|
3903
|
+
if (this.debug) {
|
|
3904
|
+
console.log(`[DEBUG] Soft timeout after ${this.maxOperationTimeout}ms — entering wind-down mode (${gracefulTimeoutState.bonusStepsMax} bonus steps)`);
|
|
3905
|
+
}
|
|
3906
|
+
// Safety net: hard abort after 60s if wind-down doesn't complete
|
|
3907
|
+
hardAbortTimeoutId = setTimeout(() => {
|
|
3908
|
+
if (this._abortController) {
|
|
3909
|
+
this._abortController.abort();
|
|
3910
|
+
}
|
|
3911
|
+
if (this.debug) {
|
|
3912
|
+
console.log(`[DEBUG] Hard abort — wind-down safety net expired after 60s`);
|
|
3913
|
+
}
|
|
3914
|
+
}, 60000);
|
|
3915
|
+
}, this.maxOperationTimeout);
|
|
3713
3916
|
}
|
|
3714
3917
|
|
|
3715
|
-
|
|
3716
|
-
|
|
3717
|
-
|
|
3918
|
+
try {
|
|
3919
|
+
// Use only the last step's text as the final answer.
|
|
3920
|
+
// result.text concatenates ALL steps (including intermediate planning text),
|
|
3921
|
+
// but the user should only see the final answer from the last step.
|
|
3922
|
+
const steps = await result.steps;
|
|
3923
|
+
let finalText;
|
|
3924
|
+
if (steps && steps.length > 1) {
|
|
3925
|
+
// Multi-step: use last step's text (the actual answer after tool calls)
|
|
3926
|
+
const lastStepText = steps[steps.length - 1].text;
|
|
3927
|
+
finalText = lastStepText || await result.text;
|
|
3928
|
+
} else {
|
|
3929
|
+
finalText = await result.text;
|
|
3930
|
+
}
|
|
3718
3931
|
|
|
3719
|
-
|
|
3720
|
-
|
|
3721
|
-
|
|
3722
|
-
this.tokenCounter.recordUsage(usage, result.experimental_providerMetadata);
|
|
3723
|
-
}
|
|
3932
|
+
if (this.debug) {
|
|
3933
|
+
console.log(`[DEBUG] streamText completed: ${steps?.length || 0} steps, finalText=${finalText?.length || 0} chars`);
|
|
3934
|
+
}
|
|
3724
3935
|
|
|
3725
|
-
|
|
3936
|
+
// Record final token usage
|
|
3937
|
+
const usage = await result.usage;
|
|
3938
|
+
if (usage) {
|
|
3939
|
+
this.tokenCounter.recordUsage(usage, result.experimental_providerMetadata);
|
|
3940
|
+
}
|
|
3941
|
+
|
|
3942
|
+
return { finalText, result };
|
|
3943
|
+
} finally {
|
|
3944
|
+
// Clean up graceful timeout timers
|
|
3945
|
+
if (gracefulTimeoutId) clearTimeout(gracefulTimeoutId);
|
|
3946
|
+
if (hardAbortTimeoutId) clearTimeout(hardAbortTimeoutId);
|
|
3947
|
+
}
|
|
3726
3948
|
};
|
|
3727
3949
|
|
|
3728
3950
|
let aiResult;
|
|
@@ -3767,6 +3989,58 @@ Double-check your response based on the criteria above. If everything looks good
|
|
|
3767
3989
|
finalResult = aiResult.finalText;
|
|
3768
3990
|
}
|
|
3769
3991
|
|
|
3992
|
+
// Graceful timeout handling: ensure the response clearly indicates
|
|
3993
|
+
// the research was interrupted and may be incomplete.
|
|
3994
|
+
if (gracefulTimeoutState.triggered) {
|
|
3995
|
+
const timeoutNotice = '**Note: This response was generated under a time constraint. The research may be incomplete, and some planned searches or analysis steps were not completed.**\n\n';
|
|
3996
|
+
|
|
3997
|
+
if (!finalResult || finalResult === 'I was unable to complete your request due to reaching the maximum number of tool iterations.') {
|
|
3998
|
+
// Wind-down produced empty text — try to collect useful content.
|
|
3999
|
+
// Some models (e.g., Gemini) return finishReason:'other' with empty text
|
|
4000
|
+
// when forced from tool-calling to text-only mode mid-task.
|
|
4001
|
+
try {
|
|
4002
|
+
// Try result.text (concatenation of all step texts)
|
|
4003
|
+
const allText = await aiResult.result.text;
|
|
4004
|
+
if (allText && allText.trim()) {
|
|
4005
|
+
finalResult = timeoutNotice + allText;
|
|
4006
|
+
if (this.debug) {
|
|
4007
|
+
console.log(`[DEBUG] Graceful timeout: using concatenated step text (${allText.length} chars)`);
|
|
4008
|
+
}
|
|
4009
|
+
} else {
|
|
4010
|
+
// Last resort: collect tool result summaries as partial information
|
|
4011
|
+
const steps = await aiResult.result.steps;
|
|
4012
|
+
const toolSummaries = [];
|
|
4013
|
+
for (const step of (steps || [])) {
|
|
4014
|
+
if (step.toolResults?.length > 0) {
|
|
4015
|
+
for (const tr of step.toolResults) {
|
|
4016
|
+
const resultText = typeof tr.result === 'string' ? tr.result : JSON.stringify(tr.result);
|
|
4017
|
+
if (resultText && resultText.length > 0 && resultText.length < 5000) {
|
|
4018
|
+
toolSummaries.push(resultText.substring(0, 2000));
|
|
4019
|
+
}
|
|
4020
|
+
}
|
|
4021
|
+
}
|
|
4022
|
+
}
|
|
4023
|
+
if (toolSummaries.length > 0) {
|
|
4024
|
+
finalResult = `${timeoutNotice}The operation timed out before a complete answer could be generated. Here is the partial information gathered:\n\n${toolSummaries.join('\n\n---\n\n')}`;
|
|
4025
|
+
if (this.debug) {
|
|
4026
|
+
console.log(`[DEBUG] Graceful timeout: built fallback from ${toolSummaries.length} tool results`);
|
|
4027
|
+
}
|
|
4028
|
+
} else {
|
|
4029
|
+
finalResult = 'The operation timed out before enough information could be gathered to provide an answer. Please try again with a simpler query or increase the timeout.';
|
|
4030
|
+
}
|
|
4031
|
+
}
|
|
4032
|
+
} catch (e) {
|
|
4033
|
+
if (this.debug) {
|
|
4034
|
+
console.log(`[DEBUG] Graceful timeout fallback error: ${e.message}`);
|
|
4035
|
+
}
|
|
4036
|
+
finalResult = 'The operation timed out before enough information could be gathered to provide an answer. Please try again with a simpler query or increase the timeout.';
|
|
4037
|
+
}
|
|
4038
|
+
} else {
|
|
4039
|
+
// Model produced text during wind-down — prepend the timeout notice
|
|
4040
|
+
finalResult = timeoutNotice + finalResult;
|
|
4041
|
+
}
|
|
4042
|
+
}
|
|
4043
|
+
|
|
3770
4044
|
// Update currentMessages from the result for history storage
|
|
3771
4045
|
// The SDK manages the full message history internally
|
|
3772
4046
|
const resultMessages = await aiResult.result.response?.messages;
|