@probelabs/probe 0.6.0-rc293 → 0.6.0-rc294
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/binaries/{probe-v0.6.0-rc293-aarch64-apple-darwin.tar.gz → probe-v0.6.0-rc294-aarch64-apple-darwin.tar.gz} +0 -0
- package/bin/binaries/{probe-v0.6.0-rc293-aarch64-unknown-linux-musl.tar.gz → probe-v0.6.0-rc294-aarch64-unknown-linux-musl.tar.gz} +0 -0
- package/bin/binaries/{probe-v0.6.0-rc293-x86_64-apple-darwin.tar.gz → probe-v0.6.0-rc294-x86_64-apple-darwin.tar.gz} +0 -0
- package/bin/binaries/{probe-v0.6.0-rc293-x86_64-pc-windows-msvc.zip → probe-v0.6.0-rc294-x86_64-pc-windows-msvc.zip} +0 -0
- package/bin/binaries/{probe-v0.6.0-rc293-x86_64-unknown-linux-musl.tar.gz → probe-v0.6.0-rc294-x86_64-unknown-linux-musl.tar.gz} +0 -0
- package/build/agent/ProbeAgent.js +156 -48
- package/cjs/agent/ProbeAgent.cjs +136 -41
- package/cjs/index.cjs +136 -41
- package/package.json +1 -1
- package/src/agent/ProbeAgent.js +156 -48
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -1341,9 +1341,16 @@ export class ProbeAgent {
|
|
|
1341
1341
|
// Use fallback manager with retry for each provider
|
|
1342
1342
|
return await this.fallbackManager.executeWithFallback(
|
|
1343
1343
|
async (provider, model, config) => {
|
|
1344
|
+
// Wrap fallback model with per-call concurrency limiter if configured.
|
|
1345
|
+
// The original options.model was wrapped in streamTextWithRetryAndFallback,
|
|
1346
|
+
// but fallback replaces it with a new model that needs wrapping too.
|
|
1347
|
+
let fallbackModel = provider(model);
|
|
1348
|
+
if (this.concurrencyLimiter) {
|
|
1349
|
+
fallbackModel = ProbeAgent._wrapModelWithLimiter(fallbackModel, this.concurrencyLimiter, this.debug);
|
|
1350
|
+
}
|
|
1344
1351
|
const fallbackOptions = {
|
|
1345
1352
|
...options,
|
|
1346
|
-
model:
|
|
1353
|
+
model: fallbackModel,
|
|
1347
1354
|
abortSignal: controller.signal
|
|
1348
1355
|
};
|
|
1349
1356
|
|
|
@@ -1377,6 +1384,143 @@ export class ProbeAgent {
|
|
|
1377
1384
|
);
|
|
1378
1385
|
}
|
|
1379
1386
|
|
|
1387
|
+
/**
|
|
1388
|
+
* Wrap a LanguageModelV1 model so each doStream/doGenerate call acquires and
|
|
1389
|
+
* releases a concurrency limiter slot. This gates individual LLM API calls
|
|
1390
|
+
* (seconds each) instead of entire multi-step agent sessions (minutes).
|
|
1391
|
+
*
|
|
1392
|
+
* @param {Object} model - LanguageModelV1 model instance
|
|
1393
|
+
* @param {Object} limiter - Concurrency limiter with acquire/release/getStats
|
|
1394
|
+
* @param {boolean} debug - Enable debug logging
|
|
1395
|
+
* @returns {Object} Wrapped model with per-call concurrency gating
|
|
1396
|
+
* @private
|
|
1397
|
+
*/
|
|
1398
|
+
static _wrapModelWithLimiter(model, limiter, debug) {
|
|
1399
|
+
return new Proxy(model, {
|
|
1400
|
+
get(target, prop) {
|
|
1401
|
+
if (prop === 'doStream') {
|
|
1402
|
+
return async function (...args) {
|
|
1403
|
+
await limiter.acquire(null);
|
|
1404
|
+
if (debug) {
|
|
1405
|
+
const stats = limiter.getStats();
|
|
1406
|
+
console.log(`[DEBUG] Acquired AI slot for LLM call (${stats.globalActive}/${stats.maxConcurrent}, queue: ${stats.queueSize})`);
|
|
1407
|
+
}
|
|
1408
|
+
try {
|
|
1409
|
+
const result = await target.doStream(...args);
|
|
1410
|
+
|
|
1411
|
+
// Wrap the ReadableStream to release the slot when it completes,
|
|
1412
|
+
// errors, or is cancelled — covering all stream termination paths.
|
|
1413
|
+
// Guard against double-release: if cancel() races with an in-flight
|
|
1414
|
+
// pull() that is awaiting originalReader.read(), both paths could
|
|
1415
|
+
// try to release. The flag ensures exactly one release.
|
|
1416
|
+
const originalStream = result.stream;
|
|
1417
|
+
const originalReader = originalStream.getReader();
|
|
1418
|
+
let released = false;
|
|
1419
|
+
const releaseOnce = () => {
|
|
1420
|
+
if (released) return;
|
|
1421
|
+
released = true;
|
|
1422
|
+
limiter.release(null);
|
|
1423
|
+
};
|
|
1424
|
+
const wrappedStream = new ReadableStream({
|
|
1425
|
+
async pull(controller) {
|
|
1426
|
+
try {
|
|
1427
|
+
const { done, value } = await originalReader.read();
|
|
1428
|
+
if (done) {
|
|
1429
|
+
controller.close();
|
|
1430
|
+
releaseOnce();
|
|
1431
|
+
if (debug) {
|
|
1432
|
+
const stats = limiter.getStats();
|
|
1433
|
+
console.log(`[DEBUG] Released AI slot after LLM stream complete (${stats.globalActive}/${stats.maxConcurrent})`);
|
|
1434
|
+
}
|
|
1435
|
+
} else {
|
|
1436
|
+
controller.enqueue(value);
|
|
1437
|
+
}
|
|
1438
|
+
} catch (err) {
|
|
1439
|
+
releaseOnce();
|
|
1440
|
+
if (debug) {
|
|
1441
|
+
console.log(`[DEBUG] Released AI slot on LLM stream error`);
|
|
1442
|
+
}
|
|
1443
|
+
controller.error(err);
|
|
1444
|
+
}
|
|
1445
|
+
},
|
|
1446
|
+
cancel() {
|
|
1447
|
+
releaseOnce();
|
|
1448
|
+
if (debug) {
|
|
1449
|
+
console.log(`[DEBUG] Released AI slot on LLM stream cancel`);
|
|
1450
|
+
}
|
|
1451
|
+
originalReader.cancel();
|
|
1452
|
+
}
|
|
1453
|
+
});
|
|
1454
|
+
|
|
1455
|
+
return { ...result, stream: wrappedStream };
|
|
1456
|
+
} catch (err) {
|
|
1457
|
+
limiter.release(null);
|
|
1458
|
+
if (debug) {
|
|
1459
|
+
console.log(`[DEBUG] Released AI slot on doStream error`);
|
|
1460
|
+
}
|
|
1461
|
+
throw err;
|
|
1462
|
+
}
|
|
1463
|
+
};
|
|
1464
|
+
}
|
|
1465
|
+
|
|
1466
|
+
if (prop === 'doGenerate') {
|
|
1467
|
+
return async function (...args) {
|
|
1468
|
+
await limiter.acquire(null);
|
|
1469
|
+
if (debug) {
|
|
1470
|
+
const stats = limiter.getStats();
|
|
1471
|
+
console.log(`[DEBUG] Acquired AI slot for LLM generate (${stats.globalActive}/${stats.maxConcurrent})`);
|
|
1472
|
+
}
|
|
1473
|
+
try {
|
|
1474
|
+
const result = await target.doGenerate(...args);
|
|
1475
|
+
return result;
|
|
1476
|
+
} finally {
|
|
1477
|
+
limiter.release(null);
|
|
1478
|
+
if (debug) {
|
|
1479
|
+
const stats = limiter.getStats();
|
|
1480
|
+
console.log(`[DEBUG] Released AI slot after LLM generate (${stats.globalActive}/${stats.maxConcurrent})`);
|
|
1481
|
+
}
|
|
1482
|
+
}
|
|
1483
|
+
};
|
|
1484
|
+
}
|
|
1485
|
+
|
|
1486
|
+
const value = target[prop];
|
|
1487
|
+
return typeof value === 'function' ? value.bind(target) : value;
|
|
1488
|
+
}
|
|
1489
|
+
});
|
|
1490
|
+
}
|
|
1491
|
+
|
|
1492
|
+
/**
|
|
1493
|
+
* Wrap an engine stream result so its textStream async generator acquires
|
|
1494
|
+
* and releases a concurrency limiter slot. Acquire happens when iteration
|
|
1495
|
+
* begins; release happens in finally (completion, error, or break).
|
|
1496
|
+
*
|
|
1497
|
+
* @param {Object} result - Engine result with { textStream, usage, ... }
|
|
1498
|
+
* @param {Object} limiter - Concurrency limiter with acquire/release/getStats
|
|
1499
|
+
* @param {boolean} debug - Enable debug logging
|
|
1500
|
+
* @returns {Object} Result with wrapped textStream
|
|
1501
|
+
* @private
|
|
1502
|
+
*/
|
|
1503
|
+
static _wrapEngineStreamWithLimiter(result, limiter, debug) {
|
|
1504
|
+
const originalStream = result.textStream;
|
|
1505
|
+
async function* gatedStream() {
|
|
1506
|
+
await limiter.acquire(null);
|
|
1507
|
+
if (debug) {
|
|
1508
|
+
const stats = limiter.getStats();
|
|
1509
|
+
console.log(`[DEBUG] Acquired AI slot for engine stream (${stats.globalActive}/${stats.maxConcurrent}, queue: ${stats.queueSize})`);
|
|
1510
|
+
}
|
|
1511
|
+
try {
|
|
1512
|
+
yield* originalStream;
|
|
1513
|
+
} finally {
|
|
1514
|
+
limiter.release(null);
|
|
1515
|
+
if (debug) {
|
|
1516
|
+
const stats = limiter.getStats();
|
|
1517
|
+
console.log(`[DEBUG] Released AI slot after engine stream (${stats.globalActive}/${stats.maxConcurrent})`);
|
|
1518
|
+
}
|
|
1519
|
+
}
|
|
1520
|
+
}
|
|
1521
|
+
return { ...result, textStream: gatedStream() };
|
|
1522
|
+
}
|
|
1523
|
+
|
|
1380
1524
|
/**
|
|
1381
1525
|
* Execute streamText with retry and fallback support
|
|
1382
1526
|
* @param {Object} options - streamText options
|
|
@@ -1384,14 +1528,12 @@ export class ProbeAgent {
|
|
|
1384
1528
|
* @private
|
|
1385
1529
|
*/
|
|
1386
1530
|
async streamTextWithRetryAndFallback(options) {
|
|
1387
|
-
//
|
|
1531
|
+
// Wrap the model with per-call concurrency gating if limiter is configured.
|
|
1532
|
+
// This acquires/releases the slot around each individual LLM API call (doStream/doGenerate)
|
|
1533
|
+
// instead of holding it for the entire multi-step agent session.
|
|
1388
1534
|
const limiter = this.concurrencyLimiter;
|
|
1389
|
-
if (limiter) {
|
|
1390
|
-
|
|
1391
|
-
if (this.debug) {
|
|
1392
|
-
const stats = limiter.getStats();
|
|
1393
|
-
console.log(`[DEBUG] Acquired global AI concurrency slot (${stats.globalActive}/${stats.maxConcurrent}, queue: ${stats.queueSize})`);
|
|
1394
|
-
}
|
|
1535
|
+
if (limiter && options.model) {
|
|
1536
|
+
options = { ...options, model: ProbeAgent._wrapModelWithLimiter(options.model, limiter, this.debug) };
|
|
1395
1537
|
}
|
|
1396
1538
|
|
|
1397
1539
|
// Create AbortController for overall operation timeout
|
|
@@ -1430,6 +1572,12 @@ export class ProbeAgent {
|
|
|
1430
1572
|
if (useClaudeCode || useCodex) {
|
|
1431
1573
|
try {
|
|
1432
1574
|
result = await this._tryEngineStreamPath(options, controller, timeoutState);
|
|
1575
|
+
// Gate engine stream with concurrency limiter if configured.
|
|
1576
|
+
// Engine paths bypass the Vercel model wrapper, so we wrap the
|
|
1577
|
+
// textStream async generator with acquire/release instead.
|
|
1578
|
+
if (result && limiter) {
|
|
1579
|
+
result = ProbeAgent._wrapEngineStreamWithLimiter(result, limiter, this.debug);
|
|
1580
|
+
}
|
|
1433
1581
|
} catch (error) {
|
|
1434
1582
|
if (this.debug) {
|
|
1435
1583
|
const engineType = useClaudeCode ? 'Claude Code' : 'Codex';
|
|
@@ -1444,47 +1592,7 @@ export class ProbeAgent {
|
|
|
1444
1592
|
result = await this._executeWithVercelProvider(options, controller);
|
|
1445
1593
|
}
|
|
1446
1594
|
|
|
1447
|
-
// Wrap textStream so limiter slot is held until stream completes.
|
|
1448
|
-
// result.textStream is a read-only getter on DefaultStreamTextResult,
|
|
1449
|
-
// so we wrap the result in a Proxy that intercepts the textStream property.
|
|
1450
|
-
if (limiter && result.textStream) {
|
|
1451
|
-
const originalStream = result.textStream;
|
|
1452
|
-
const debug = this.debug;
|
|
1453
|
-
const wrappedStream = (async function* () {
|
|
1454
|
-
try {
|
|
1455
|
-
for await (const chunk of originalStream) {
|
|
1456
|
-
yield chunk;
|
|
1457
|
-
}
|
|
1458
|
-
} finally {
|
|
1459
|
-
limiter.release(null);
|
|
1460
|
-
if (debug) {
|
|
1461
|
-
const stats = limiter.getStats();
|
|
1462
|
-
console.log(`[DEBUG] Released global AI concurrency slot (${stats.globalActive}/${stats.maxConcurrent}, queue: ${stats.queueSize})`);
|
|
1463
|
-
}
|
|
1464
|
-
}
|
|
1465
|
-
})();
|
|
1466
|
-
return new Proxy(result, {
|
|
1467
|
-
get(target, prop) {
|
|
1468
|
-
if (prop === 'textStream') return wrappedStream;
|
|
1469
|
-
const value = target[prop];
|
|
1470
|
-
return typeof value === 'function' ? value.bind(target) : value;
|
|
1471
|
-
}
|
|
1472
|
-
});
|
|
1473
|
-
} else if (limiter) {
|
|
1474
|
-
// No textStream (shouldn't happen, but release just in case)
|
|
1475
|
-
limiter.release(null);
|
|
1476
|
-
}
|
|
1477
|
-
|
|
1478
1595
|
return result;
|
|
1479
|
-
} catch (error) {
|
|
1480
|
-
// Release on error if limiter was acquired
|
|
1481
|
-
if (limiter) {
|
|
1482
|
-
limiter.release(null);
|
|
1483
|
-
if (this.debug) {
|
|
1484
|
-
console.log(`[DEBUG] Released global AI concurrency slot on error`);
|
|
1485
|
-
}
|
|
1486
|
-
}
|
|
1487
|
-
throw error;
|
|
1488
1596
|
} finally {
|
|
1489
1597
|
// Clean up timeout (for non-engine paths; engine paths clean up in the generator)
|
|
1490
1598
|
if (timeoutState.timeoutId) {
|
package/cjs/agent/ProbeAgent.cjs
CHANGED
|
@@ -100620,9 +100620,13 @@ var init_ProbeAgent = __esm({
|
|
|
100620
100620
|
}
|
|
100621
100621
|
return await this.fallbackManager.executeWithFallback(
|
|
100622
100622
|
async (provider, model, config2) => {
|
|
100623
|
+
let fallbackModel = provider(model);
|
|
100624
|
+
if (this.concurrencyLimiter) {
|
|
100625
|
+
fallbackModel = _ProbeAgent._wrapModelWithLimiter(fallbackModel, this.concurrencyLimiter, this.debug);
|
|
100626
|
+
}
|
|
100623
100627
|
const fallbackOptions = {
|
|
100624
100628
|
...options,
|
|
100625
|
-
model:
|
|
100629
|
+
model: fallbackModel,
|
|
100626
100630
|
abortSignal: controller.signal
|
|
100627
100631
|
};
|
|
100628
100632
|
if (config2.provider !== "google" && fallbackOptions.tools) {
|
|
@@ -100650,6 +100654,132 @@ var init_ProbeAgent = __esm({
|
|
|
100650
100654
|
}
|
|
100651
100655
|
);
|
|
100652
100656
|
}
|
|
100657
|
+
/**
|
|
100658
|
+
* Wrap a LanguageModelV1 model so each doStream/doGenerate call acquires and
|
|
100659
|
+
* releases a concurrency limiter slot. This gates individual LLM API calls
|
|
100660
|
+
* (seconds each) instead of entire multi-step agent sessions (minutes).
|
|
100661
|
+
*
|
|
100662
|
+
* @param {Object} model - LanguageModelV1 model instance
|
|
100663
|
+
* @param {Object} limiter - Concurrency limiter with acquire/release/getStats
|
|
100664
|
+
* @param {boolean} debug - Enable debug logging
|
|
100665
|
+
* @returns {Object} Wrapped model with per-call concurrency gating
|
|
100666
|
+
* @private
|
|
100667
|
+
*/
|
|
100668
|
+
static _wrapModelWithLimiter(model, limiter, debug) {
|
|
100669
|
+
return new Proxy(model, {
|
|
100670
|
+
get(target, prop) {
|
|
100671
|
+
if (prop === "doStream") {
|
|
100672
|
+
return async function(...args) {
|
|
100673
|
+
await limiter.acquire(null);
|
|
100674
|
+
if (debug) {
|
|
100675
|
+
const stats = limiter.getStats();
|
|
100676
|
+
console.log(`[DEBUG] Acquired AI slot for LLM call (${stats.globalActive}/${stats.maxConcurrent}, queue: ${stats.queueSize})`);
|
|
100677
|
+
}
|
|
100678
|
+
try {
|
|
100679
|
+
const result = await target.doStream(...args);
|
|
100680
|
+
const originalStream = result.stream;
|
|
100681
|
+
const originalReader = originalStream.getReader();
|
|
100682
|
+
let released = false;
|
|
100683
|
+
const releaseOnce = () => {
|
|
100684
|
+
if (released) return;
|
|
100685
|
+
released = true;
|
|
100686
|
+
limiter.release(null);
|
|
100687
|
+
};
|
|
100688
|
+
const wrappedStream = new ReadableStream({
|
|
100689
|
+
async pull(controller) {
|
|
100690
|
+
try {
|
|
100691
|
+
const { done, value: value2 } = await originalReader.read();
|
|
100692
|
+
if (done) {
|
|
100693
|
+
controller.close();
|
|
100694
|
+
releaseOnce();
|
|
100695
|
+
if (debug) {
|
|
100696
|
+
const stats = limiter.getStats();
|
|
100697
|
+
console.log(`[DEBUG] Released AI slot after LLM stream complete (${stats.globalActive}/${stats.maxConcurrent})`);
|
|
100698
|
+
}
|
|
100699
|
+
} else {
|
|
100700
|
+
controller.enqueue(value2);
|
|
100701
|
+
}
|
|
100702
|
+
} catch (err) {
|
|
100703
|
+
releaseOnce();
|
|
100704
|
+
if (debug) {
|
|
100705
|
+
console.log(`[DEBUG] Released AI slot on LLM stream error`);
|
|
100706
|
+
}
|
|
100707
|
+
controller.error(err);
|
|
100708
|
+
}
|
|
100709
|
+
},
|
|
100710
|
+
cancel() {
|
|
100711
|
+
releaseOnce();
|
|
100712
|
+
if (debug) {
|
|
100713
|
+
console.log(`[DEBUG] Released AI slot on LLM stream cancel`);
|
|
100714
|
+
}
|
|
100715
|
+
originalReader.cancel();
|
|
100716
|
+
}
|
|
100717
|
+
});
|
|
100718
|
+
return { ...result, stream: wrappedStream };
|
|
100719
|
+
} catch (err) {
|
|
100720
|
+
limiter.release(null);
|
|
100721
|
+
if (debug) {
|
|
100722
|
+
console.log(`[DEBUG] Released AI slot on doStream error`);
|
|
100723
|
+
}
|
|
100724
|
+
throw err;
|
|
100725
|
+
}
|
|
100726
|
+
};
|
|
100727
|
+
}
|
|
100728
|
+
if (prop === "doGenerate") {
|
|
100729
|
+
return async function(...args) {
|
|
100730
|
+
await limiter.acquire(null);
|
|
100731
|
+
if (debug) {
|
|
100732
|
+
const stats = limiter.getStats();
|
|
100733
|
+
console.log(`[DEBUG] Acquired AI slot for LLM generate (${stats.globalActive}/${stats.maxConcurrent})`);
|
|
100734
|
+
}
|
|
100735
|
+
try {
|
|
100736
|
+
const result = await target.doGenerate(...args);
|
|
100737
|
+
return result;
|
|
100738
|
+
} finally {
|
|
100739
|
+
limiter.release(null);
|
|
100740
|
+
if (debug) {
|
|
100741
|
+
const stats = limiter.getStats();
|
|
100742
|
+
console.log(`[DEBUG] Released AI slot after LLM generate (${stats.globalActive}/${stats.maxConcurrent})`);
|
|
100743
|
+
}
|
|
100744
|
+
}
|
|
100745
|
+
};
|
|
100746
|
+
}
|
|
100747
|
+
const value = target[prop];
|
|
100748
|
+
return typeof value === "function" ? value.bind(target) : value;
|
|
100749
|
+
}
|
|
100750
|
+
});
|
|
100751
|
+
}
|
|
100752
|
+
/**
|
|
100753
|
+
* Wrap an engine stream result so its textStream async generator acquires
|
|
100754
|
+
* and releases a concurrency limiter slot. Acquire happens when iteration
|
|
100755
|
+
* begins; release happens in finally (completion, error, or break).
|
|
100756
|
+
*
|
|
100757
|
+
* @param {Object} result - Engine result with { textStream, usage, ... }
|
|
100758
|
+
* @param {Object} limiter - Concurrency limiter with acquire/release/getStats
|
|
100759
|
+
* @param {boolean} debug - Enable debug logging
|
|
100760
|
+
* @returns {Object} Result with wrapped textStream
|
|
100761
|
+
* @private
|
|
100762
|
+
*/
|
|
100763
|
+
static _wrapEngineStreamWithLimiter(result, limiter, debug) {
|
|
100764
|
+
const originalStream = result.textStream;
|
|
100765
|
+
async function* gatedStream() {
|
|
100766
|
+
await limiter.acquire(null);
|
|
100767
|
+
if (debug) {
|
|
100768
|
+
const stats = limiter.getStats();
|
|
100769
|
+
console.log(`[DEBUG] Acquired AI slot for engine stream (${stats.globalActive}/${stats.maxConcurrent}, queue: ${stats.queueSize})`);
|
|
100770
|
+
}
|
|
100771
|
+
try {
|
|
100772
|
+
yield* originalStream;
|
|
100773
|
+
} finally {
|
|
100774
|
+
limiter.release(null);
|
|
100775
|
+
if (debug) {
|
|
100776
|
+
const stats = limiter.getStats();
|
|
100777
|
+
console.log(`[DEBUG] Released AI slot after engine stream (${stats.globalActive}/${stats.maxConcurrent})`);
|
|
100778
|
+
}
|
|
100779
|
+
}
|
|
100780
|
+
}
|
|
100781
|
+
return { ...result, textStream: gatedStream() };
|
|
100782
|
+
}
|
|
100653
100783
|
/**
|
|
100654
100784
|
* Execute streamText with retry and fallback support
|
|
100655
100785
|
* @param {Object} options - streamText options
|
|
@@ -100658,12 +100788,8 @@ var init_ProbeAgent = __esm({
|
|
|
100658
100788
|
*/
|
|
100659
100789
|
async streamTextWithRetryAndFallback(options) {
|
|
100660
100790
|
const limiter = this.concurrencyLimiter;
|
|
100661
|
-
if (limiter) {
|
|
100662
|
-
|
|
100663
|
-
if (this.debug) {
|
|
100664
|
-
const stats = limiter.getStats();
|
|
100665
|
-
console.log(`[DEBUG] Acquired global AI concurrency slot (${stats.globalActive}/${stats.maxConcurrent}, queue: ${stats.queueSize})`);
|
|
100666
|
-
}
|
|
100791
|
+
if (limiter && options.model) {
|
|
100792
|
+
options = { ...options, model: _ProbeAgent._wrapModelWithLimiter(options.model, limiter, this.debug) };
|
|
100667
100793
|
}
|
|
100668
100794
|
const controller = new AbortController();
|
|
100669
100795
|
const timeoutState = { timeoutId: null };
|
|
@@ -100691,6 +100817,9 @@ var init_ProbeAgent = __esm({
|
|
|
100691
100817
|
if (useClaudeCode || useCodex) {
|
|
100692
100818
|
try {
|
|
100693
100819
|
result = await this._tryEngineStreamPath(options, controller, timeoutState);
|
|
100820
|
+
if (result && limiter) {
|
|
100821
|
+
result = _ProbeAgent._wrapEngineStreamWithLimiter(result, limiter, this.debug);
|
|
100822
|
+
}
|
|
100694
100823
|
} catch (error40) {
|
|
100695
100824
|
if (this.debug) {
|
|
100696
100825
|
const engineType = useClaudeCode ? "Claude Code" : "Codex";
|
|
@@ -100701,41 +100830,7 @@ var init_ProbeAgent = __esm({
|
|
|
100701
100830
|
if (!result) {
|
|
100702
100831
|
result = await this._executeWithVercelProvider(options, controller);
|
|
100703
100832
|
}
|
|
100704
|
-
if (limiter && result.textStream) {
|
|
100705
|
-
const originalStream = result.textStream;
|
|
100706
|
-
const debug = this.debug;
|
|
100707
|
-
const wrappedStream = (async function* () {
|
|
100708
|
-
try {
|
|
100709
|
-
for await (const chunk of originalStream) {
|
|
100710
|
-
yield chunk;
|
|
100711
|
-
}
|
|
100712
|
-
} finally {
|
|
100713
|
-
limiter.release(null);
|
|
100714
|
-
if (debug) {
|
|
100715
|
-
const stats = limiter.getStats();
|
|
100716
|
-
console.log(`[DEBUG] Released global AI concurrency slot (${stats.globalActive}/${stats.maxConcurrent}, queue: ${stats.queueSize})`);
|
|
100717
|
-
}
|
|
100718
|
-
}
|
|
100719
|
-
})();
|
|
100720
|
-
return new Proxy(result, {
|
|
100721
|
-
get(target, prop) {
|
|
100722
|
-
if (prop === "textStream") return wrappedStream;
|
|
100723
|
-
const value = target[prop];
|
|
100724
|
-
return typeof value === "function" ? value.bind(target) : value;
|
|
100725
|
-
}
|
|
100726
|
-
});
|
|
100727
|
-
} else if (limiter) {
|
|
100728
|
-
limiter.release(null);
|
|
100729
|
-
}
|
|
100730
100833
|
return result;
|
|
100731
|
-
} catch (error40) {
|
|
100732
|
-
if (limiter) {
|
|
100733
|
-
limiter.release(null);
|
|
100734
|
-
if (this.debug) {
|
|
100735
|
-
console.log(`[DEBUG] Released global AI concurrency slot on error`);
|
|
100736
|
-
}
|
|
100737
|
-
}
|
|
100738
|
-
throw error40;
|
|
100739
100834
|
} finally {
|
|
100740
100835
|
if (timeoutState.timeoutId) {
|
|
100741
100836
|
clearTimeout(timeoutState.timeoutId);
|
package/cjs/index.cjs
CHANGED
|
@@ -97531,9 +97531,13 @@ var init_ProbeAgent = __esm({
|
|
|
97531
97531
|
}
|
|
97532
97532
|
return await this.fallbackManager.executeWithFallback(
|
|
97533
97533
|
async (provider, model, config2) => {
|
|
97534
|
+
let fallbackModel = provider(model);
|
|
97535
|
+
if (this.concurrencyLimiter) {
|
|
97536
|
+
fallbackModel = _ProbeAgent._wrapModelWithLimiter(fallbackModel, this.concurrencyLimiter, this.debug);
|
|
97537
|
+
}
|
|
97534
97538
|
const fallbackOptions = {
|
|
97535
97539
|
...options,
|
|
97536
|
-
model:
|
|
97540
|
+
model: fallbackModel,
|
|
97537
97541
|
abortSignal: controller.signal
|
|
97538
97542
|
};
|
|
97539
97543
|
if (config2.provider !== "google" && fallbackOptions.tools) {
|
|
@@ -97561,6 +97565,132 @@ var init_ProbeAgent = __esm({
|
|
|
97561
97565
|
}
|
|
97562
97566
|
);
|
|
97563
97567
|
}
|
|
97568
|
+
/**
|
|
97569
|
+
* Wrap a LanguageModelV1 model so each doStream/doGenerate call acquires and
|
|
97570
|
+
* releases a concurrency limiter slot. This gates individual LLM API calls
|
|
97571
|
+
* (seconds each) instead of entire multi-step agent sessions (minutes).
|
|
97572
|
+
*
|
|
97573
|
+
* @param {Object} model - LanguageModelV1 model instance
|
|
97574
|
+
* @param {Object} limiter - Concurrency limiter with acquire/release/getStats
|
|
97575
|
+
* @param {boolean} debug - Enable debug logging
|
|
97576
|
+
* @returns {Object} Wrapped model with per-call concurrency gating
|
|
97577
|
+
* @private
|
|
97578
|
+
*/
|
|
97579
|
+
static _wrapModelWithLimiter(model, limiter, debug) {
|
|
97580
|
+
return new Proxy(model, {
|
|
97581
|
+
get(target, prop) {
|
|
97582
|
+
if (prop === "doStream") {
|
|
97583
|
+
return async function(...args) {
|
|
97584
|
+
await limiter.acquire(null);
|
|
97585
|
+
if (debug) {
|
|
97586
|
+
const stats = limiter.getStats();
|
|
97587
|
+
console.log(`[DEBUG] Acquired AI slot for LLM call (${stats.globalActive}/${stats.maxConcurrent}, queue: ${stats.queueSize})`);
|
|
97588
|
+
}
|
|
97589
|
+
try {
|
|
97590
|
+
const result = await target.doStream(...args);
|
|
97591
|
+
const originalStream = result.stream;
|
|
97592
|
+
const originalReader = originalStream.getReader();
|
|
97593
|
+
let released = false;
|
|
97594
|
+
const releaseOnce = () => {
|
|
97595
|
+
if (released) return;
|
|
97596
|
+
released = true;
|
|
97597
|
+
limiter.release(null);
|
|
97598
|
+
};
|
|
97599
|
+
const wrappedStream = new ReadableStream({
|
|
97600
|
+
async pull(controller) {
|
|
97601
|
+
try {
|
|
97602
|
+
const { done, value: value2 } = await originalReader.read();
|
|
97603
|
+
if (done) {
|
|
97604
|
+
controller.close();
|
|
97605
|
+
releaseOnce();
|
|
97606
|
+
if (debug) {
|
|
97607
|
+
const stats = limiter.getStats();
|
|
97608
|
+
console.log(`[DEBUG] Released AI slot after LLM stream complete (${stats.globalActive}/${stats.maxConcurrent})`);
|
|
97609
|
+
}
|
|
97610
|
+
} else {
|
|
97611
|
+
controller.enqueue(value2);
|
|
97612
|
+
}
|
|
97613
|
+
} catch (err) {
|
|
97614
|
+
releaseOnce();
|
|
97615
|
+
if (debug) {
|
|
97616
|
+
console.log(`[DEBUG] Released AI slot on LLM stream error`);
|
|
97617
|
+
}
|
|
97618
|
+
controller.error(err);
|
|
97619
|
+
}
|
|
97620
|
+
},
|
|
97621
|
+
cancel() {
|
|
97622
|
+
releaseOnce();
|
|
97623
|
+
if (debug) {
|
|
97624
|
+
console.log(`[DEBUG] Released AI slot on LLM stream cancel`);
|
|
97625
|
+
}
|
|
97626
|
+
originalReader.cancel();
|
|
97627
|
+
}
|
|
97628
|
+
});
|
|
97629
|
+
return { ...result, stream: wrappedStream };
|
|
97630
|
+
} catch (err) {
|
|
97631
|
+
limiter.release(null);
|
|
97632
|
+
if (debug) {
|
|
97633
|
+
console.log(`[DEBUG] Released AI slot on doStream error`);
|
|
97634
|
+
}
|
|
97635
|
+
throw err;
|
|
97636
|
+
}
|
|
97637
|
+
};
|
|
97638
|
+
}
|
|
97639
|
+
if (prop === "doGenerate") {
|
|
97640
|
+
return async function(...args) {
|
|
97641
|
+
await limiter.acquire(null);
|
|
97642
|
+
if (debug) {
|
|
97643
|
+
const stats = limiter.getStats();
|
|
97644
|
+
console.log(`[DEBUG] Acquired AI slot for LLM generate (${stats.globalActive}/${stats.maxConcurrent})`);
|
|
97645
|
+
}
|
|
97646
|
+
try {
|
|
97647
|
+
const result = await target.doGenerate(...args);
|
|
97648
|
+
return result;
|
|
97649
|
+
} finally {
|
|
97650
|
+
limiter.release(null);
|
|
97651
|
+
if (debug) {
|
|
97652
|
+
const stats = limiter.getStats();
|
|
97653
|
+
console.log(`[DEBUG] Released AI slot after LLM generate (${stats.globalActive}/${stats.maxConcurrent})`);
|
|
97654
|
+
}
|
|
97655
|
+
}
|
|
97656
|
+
};
|
|
97657
|
+
}
|
|
97658
|
+
const value = target[prop];
|
|
97659
|
+
return typeof value === "function" ? value.bind(target) : value;
|
|
97660
|
+
}
|
|
97661
|
+
});
|
|
97662
|
+
}
|
|
97663
|
+
/**
|
|
97664
|
+
* Wrap an engine stream result so its textStream async generator acquires
|
|
97665
|
+
* and releases a concurrency limiter slot. Acquire happens when iteration
|
|
97666
|
+
* begins; release happens in finally (completion, error, or break).
|
|
97667
|
+
*
|
|
97668
|
+
* @param {Object} result - Engine result with { textStream, usage, ... }
|
|
97669
|
+
* @param {Object} limiter - Concurrency limiter with acquire/release/getStats
|
|
97670
|
+
* @param {boolean} debug - Enable debug logging
|
|
97671
|
+
* @returns {Object} Result with wrapped textStream
|
|
97672
|
+
* @private
|
|
97673
|
+
*/
|
|
97674
|
+
static _wrapEngineStreamWithLimiter(result, limiter, debug) {
|
|
97675
|
+
const originalStream = result.textStream;
|
|
97676
|
+
async function* gatedStream() {
|
|
97677
|
+
await limiter.acquire(null);
|
|
97678
|
+
if (debug) {
|
|
97679
|
+
const stats = limiter.getStats();
|
|
97680
|
+
console.log(`[DEBUG] Acquired AI slot for engine stream (${stats.globalActive}/${stats.maxConcurrent}, queue: ${stats.queueSize})`);
|
|
97681
|
+
}
|
|
97682
|
+
try {
|
|
97683
|
+
yield* originalStream;
|
|
97684
|
+
} finally {
|
|
97685
|
+
limiter.release(null);
|
|
97686
|
+
if (debug) {
|
|
97687
|
+
const stats = limiter.getStats();
|
|
97688
|
+
console.log(`[DEBUG] Released AI slot after engine stream (${stats.globalActive}/${stats.maxConcurrent})`);
|
|
97689
|
+
}
|
|
97690
|
+
}
|
|
97691
|
+
}
|
|
97692
|
+
return { ...result, textStream: gatedStream() };
|
|
97693
|
+
}
|
|
97564
97694
|
/**
|
|
97565
97695
|
* Execute streamText with retry and fallback support
|
|
97566
97696
|
* @param {Object} options - streamText options
|
|
@@ -97569,12 +97699,8 @@ var init_ProbeAgent = __esm({
|
|
|
97569
97699
|
*/
|
|
97570
97700
|
async streamTextWithRetryAndFallback(options) {
|
|
97571
97701
|
const limiter = this.concurrencyLimiter;
|
|
97572
|
-
if (limiter) {
|
|
97573
|
-
|
|
97574
|
-
if (this.debug) {
|
|
97575
|
-
const stats = limiter.getStats();
|
|
97576
|
-
console.log(`[DEBUG] Acquired global AI concurrency slot (${stats.globalActive}/${stats.maxConcurrent}, queue: ${stats.queueSize})`);
|
|
97577
|
-
}
|
|
97702
|
+
if (limiter && options.model) {
|
|
97703
|
+
options = { ...options, model: _ProbeAgent._wrapModelWithLimiter(options.model, limiter, this.debug) };
|
|
97578
97704
|
}
|
|
97579
97705
|
const controller = new AbortController();
|
|
97580
97706
|
const timeoutState = { timeoutId: null };
|
|
@@ -97602,6 +97728,9 @@ var init_ProbeAgent = __esm({
|
|
|
97602
97728
|
if (useClaudeCode || useCodex) {
|
|
97603
97729
|
try {
|
|
97604
97730
|
result = await this._tryEngineStreamPath(options, controller, timeoutState);
|
|
97731
|
+
if (result && limiter) {
|
|
97732
|
+
result = _ProbeAgent._wrapEngineStreamWithLimiter(result, limiter, this.debug);
|
|
97733
|
+
}
|
|
97605
97734
|
} catch (error40) {
|
|
97606
97735
|
if (this.debug) {
|
|
97607
97736
|
const engineType = useClaudeCode ? "Claude Code" : "Codex";
|
|
@@ -97612,41 +97741,7 @@ var init_ProbeAgent = __esm({
|
|
|
97612
97741
|
if (!result) {
|
|
97613
97742
|
result = await this._executeWithVercelProvider(options, controller);
|
|
97614
97743
|
}
|
|
97615
|
-
if (limiter && result.textStream) {
|
|
97616
|
-
const originalStream = result.textStream;
|
|
97617
|
-
const debug = this.debug;
|
|
97618
|
-
const wrappedStream = (async function* () {
|
|
97619
|
-
try {
|
|
97620
|
-
for await (const chunk of originalStream) {
|
|
97621
|
-
yield chunk;
|
|
97622
|
-
}
|
|
97623
|
-
} finally {
|
|
97624
|
-
limiter.release(null);
|
|
97625
|
-
if (debug) {
|
|
97626
|
-
const stats = limiter.getStats();
|
|
97627
|
-
console.log(`[DEBUG] Released global AI concurrency slot (${stats.globalActive}/${stats.maxConcurrent}, queue: ${stats.queueSize})`);
|
|
97628
|
-
}
|
|
97629
|
-
}
|
|
97630
|
-
})();
|
|
97631
|
-
return new Proxy(result, {
|
|
97632
|
-
get(target, prop) {
|
|
97633
|
-
if (prop === "textStream") return wrappedStream;
|
|
97634
|
-
const value = target[prop];
|
|
97635
|
-
return typeof value === "function" ? value.bind(target) : value;
|
|
97636
|
-
}
|
|
97637
|
-
});
|
|
97638
|
-
} else if (limiter) {
|
|
97639
|
-
limiter.release(null);
|
|
97640
|
-
}
|
|
97641
97744
|
return result;
|
|
97642
|
-
} catch (error40) {
|
|
97643
|
-
if (limiter) {
|
|
97644
|
-
limiter.release(null);
|
|
97645
|
-
if (this.debug) {
|
|
97646
|
-
console.log(`[DEBUG] Released global AI concurrency slot on error`);
|
|
97647
|
-
}
|
|
97648
|
-
}
|
|
97649
|
-
throw error40;
|
|
97650
97745
|
} finally {
|
|
97651
97746
|
if (timeoutState.timeoutId) {
|
|
97652
97747
|
clearTimeout(timeoutState.timeoutId);
|
package/package.json
CHANGED
package/src/agent/ProbeAgent.js
CHANGED
|
@@ -1341,9 +1341,16 @@ export class ProbeAgent {
|
|
|
1341
1341
|
// Use fallback manager with retry for each provider
|
|
1342
1342
|
return await this.fallbackManager.executeWithFallback(
|
|
1343
1343
|
async (provider, model, config) => {
|
|
1344
|
+
// Wrap fallback model with per-call concurrency limiter if configured.
|
|
1345
|
+
// The original options.model was wrapped in streamTextWithRetryAndFallback,
|
|
1346
|
+
// but fallback replaces it with a new model that needs wrapping too.
|
|
1347
|
+
let fallbackModel = provider(model);
|
|
1348
|
+
if (this.concurrencyLimiter) {
|
|
1349
|
+
fallbackModel = ProbeAgent._wrapModelWithLimiter(fallbackModel, this.concurrencyLimiter, this.debug);
|
|
1350
|
+
}
|
|
1344
1351
|
const fallbackOptions = {
|
|
1345
1352
|
...options,
|
|
1346
|
-
model:
|
|
1353
|
+
model: fallbackModel,
|
|
1347
1354
|
abortSignal: controller.signal
|
|
1348
1355
|
};
|
|
1349
1356
|
|
|
@@ -1377,6 +1384,143 @@ export class ProbeAgent {
|
|
|
1377
1384
|
);
|
|
1378
1385
|
}
|
|
1379
1386
|
|
|
1387
|
+
/**
|
|
1388
|
+
* Wrap a LanguageModelV1 model so each doStream/doGenerate call acquires and
|
|
1389
|
+
* releases a concurrency limiter slot. This gates individual LLM API calls
|
|
1390
|
+
* (seconds each) instead of entire multi-step agent sessions (minutes).
|
|
1391
|
+
*
|
|
1392
|
+
* @param {Object} model - LanguageModelV1 model instance
|
|
1393
|
+
* @param {Object} limiter - Concurrency limiter with acquire/release/getStats
|
|
1394
|
+
* @param {boolean} debug - Enable debug logging
|
|
1395
|
+
* @returns {Object} Wrapped model with per-call concurrency gating
|
|
1396
|
+
* @private
|
|
1397
|
+
*/
|
|
1398
|
+
static _wrapModelWithLimiter(model, limiter, debug) {
|
|
1399
|
+
return new Proxy(model, {
|
|
1400
|
+
get(target, prop) {
|
|
1401
|
+
if (prop === 'doStream') {
|
|
1402
|
+
return async function (...args) {
|
|
1403
|
+
await limiter.acquire(null);
|
|
1404
|
+
if (debug) {
|
|
1405
|
+
const stats = limiter.getStats();
|
|
1406
|
+
console.log(`[DEBUG] Acquired AI slot for LLM call (${stats.globalActive}/${stats.maxConcurrent}, queue: ${stats.queueSize})`);
|
|
1407
|
+
}
|
|
1408
|
+
try {
|
|
1409
|
+
const result = await target.doStream(...args);
|
|
1410
|
+
|
|
1411
|
+
// Wrap the ReadableStream to release the slot when it completes,
|
|
1412
|
+
// errors, or is cancelled — covering all stream termination paths.
|
|
1413
|
+
// Guard against double-release: if cancel() races with an in-flight
|
|
1414
|
+
// pull() that is awaiting originalReader.read(), both paths could
|
|
1415
|
+
// try to release. The flag ensures exactly one release.
|
|
1416
|
+
const originalStream = result.stream;
|
|
1417
|
+
const originalReader = originalStream.getReader();
|
|
1418
|
+
let released = false;
|
|
1419
|
+
const releaseOnce = () => {
|
|
1420
|
+
if (released) return;
|
|
1421
|
+
released = true;
|
|
1422
|
+
limiter.release(null);
|
|
1423
|
+
};
|
|
1424
|
+
const wrappedStream = new ReadableStream({
|
|
1425
|
+
async pull(controller) {
|
|
1426
|
+
try {
|
|
1427
|
+
const { done, value } = await originalReader.read();
|
|
1428
|
+
if (done) {
|
|
1429
|
+
controller.close();
|
|
1430
|
+
releaseOnce();
|
|
1431
|
+
if (debug) {
|
|
1432
|
+
const stats = limiter.getStats();
|
|
1433
|
+
console.log(`[DEBUG] Released AI slot after LLM stream complete (${stats.globalActive}/${stats.maxConcurrent})`);
|
|
1434
|
+
}
|
|
1435
|
+
} else {
|
|
1436
|
+
controller.enqueue(value);
|
|
1437
|
+
}
|
|
1438
|
+
} catch (err) {
|
|
1439
|
+
releaseOnce();
|
|
1440
|
+
if (debug) {
|
|
1441
|
+
console.log(`[DEBUG] Released AI slot on LLM stream error`);
|
|
1442
|
+
}
|
|
1443
|
+
controller.error(err);
|
|
1444
|
+
}
|
|
1445
|
+
},
|
|
1446
|
+
cancel() {
|
|
1447
|
+
releaseOnce();
|
|
1448
|
+
if (debug) {
|
|
1449
|
+
console.log(`[DEBUG] Released AI slot on LLM stream cancel`);
|
|
1450
|
+
}
|
|
1451
|
+
originalReader.cancel();
|
|
1452
|
+
}
|
|
1453
|
+
});
|
|
1454
|
+
|
|
1455
|
+
return { ...result, stream: wrappedStream };
|
|
1456
|
+
} catch (err) {
|
|
1457
|
+
limiter.release(null);
|
|
1458
|
+
if (debug) {
|
|
1459
|
+
console.log(`[DEBUG] Released AI slot on doStream error`);
|
|
1460
|
+
}
|
|
1461
|
+
throw err;
|
|
1462
|
+
}
|
|
1463
|
+
};
|
|
1464
|
+
}
|
|
1465
|
+
|
|
1466
|
+
if (prop === 'doGenerate') {
|
|
1467
|
+
return async function (...args) {
|
|
1468
|
+
await limiter.acquire(null);
|
|
1469
|
+
if (debug) {
|
|
1470
|
+
const stats = limiter.getStats();
|
|
1471
|
+
console.log(`[DEBUG] Acquired AI slot for LLM generate (${stats.globalActive}/${stats.maxConcurrent})`);
|
|
1472
|
+
}
|
|
1473
|
+
try {
|
|
1474
|
+
const result = await target.doGenerate(...args);
|
|
1475
|
+
return result;
|
|
1476
|
+
} finally {
|
|
1477
|
+
limiter.release(null);
|
|
1478
|
+
if (debug) {
|
|
1479
|
+
const stats = limiter.getStats();
|
|
1480
|
+
console.log(`[DEBUG] Released AI slot after LLM generate (${stats.globalActive}/${stats.maxConcurrent})`);
|
|
1481
|
+
}
|
|
1482
|
+
}
|
|
1483
|
+
};
|
|
1484
|
+
}
|
|
1485
|
+
|
|
1486
|
+
const value = target[prop];
|
|
1487
|
+
return typeof value === 'function' ? value.bind(target) : value;
|
|
1488
|
+
}
|
|
1489
|
+
});
|
|
1490
|
+
}
|
|
1491
|
+
|
|
1492
|
+
/**
|
|
1493
|
+
* Wrap an engine stream result so its textStream async generator acquires
|
|
1494
|
+
* and releases a concurrency limiter slot. Acquire happens when iteration
|
|
1495
|
+
* begins; release happens in finally (completion, error, or break).
|
|
1496
|
+
*
|
|
1497
|
+
* @param {Object} result - Engine result with { textStream, usage, ... }
|
|
1498
|
+
* @param {Object} limiter - Concurrency limiter with acquire/release/getStats
|
|
1499
|
+
* @param {boolean} debug - Enable debug logging
|
|
1500
|
+
* @returns {Object} Result with wrapped textStream
|
|
1501
|
+
* @private
|
|
1502
|
+
*/
|
|
1503
|
+
static _wrapEngineStreamWithLimiter(result, limiter, debug) {
|
|
1504
|
+
const originalStream = result.textStream;
|
|
1505
|
+
async function* gatedStream() {
|
|
1506
|
+
await limiter.acquire(null);
|
|
1507
|
+
if (debug) {
|
|
1508
|
+
const stats = limiter.getStats();
|
|
1509
|
+
console.log(`[DEBUG] Acquired AI slot for engine stream (${stats.globalActive}/${stats.maxConcurrent}, queue: ${stats.queueSize})`);
|
|
1510
|
+
}
|
|
1511
|
+
try {
|
|
1512
|
+
yield* originalStream;
|
|
1513
|
+
} finally {
|
|
1514
|
+
limiter.release(null);
|
|
1515
|
+
if (debug) {
|
|
1516
|
+
const stats = limiter.getStats();
|
|
1517
|
+
console.log(`[DEBUG] Released AI slot after engine stream (${stats.globalActive}/${stats.maxConcurrent})`);
|
|
1518
|
+
}
|
|
1519
|
+
}
|
|
1520
|
+
}
|
|
1521
|
+
return { ...result, textStream: gatedStream() };
|
|
1522
|
+
}
|
|
1523
|
+
|
|
1380
1524
|
/**
|
|
1381
1525
|
* Execute streamText with retry and fallback support
|
|
1382
1526
|
* @param {Object} options - streamText options
|
|
@@ -1384,14 +1528,12 @@ export class ProbeAgent {
|
|
|
1384
1528
|
* @private
|
|
1385
1529
|
*/
|
|
1386
1530
|
async streamTextWithRetryAndFallback(options) {
|
|
1387
|
-
//
|
|
1531
|
+
// Wrap the model with per-call concurrency gating if limiter is configured.
|
|
1532
|
+
// This acquires/releases the slot around each individual LLM API call (doStream/doGenerate)
|
|
1533
|
+
// instead of holding it for the entire multi-step agent session.
|
|
1388
1534
|
const limiter = this.concurrencyLimiter;
|
|
1389
|
-
if (limiter) {
|
|
1390
|
-
|
|
1391
|
-
if (this.debug) {
|
|
1392
|
-
const stats = limiter.getStats();
|
|
1393
|
-
console.log(`[DEBUG] Acquired global AI concurrency slot (${stats.globalActive}/${stats.maxConcurrent}, queue: ${stats.queueSize})`);
|
|
1394
|
-
}
|
|
1535
|
+
if (limiter && options.model) {
|
|
1536
|
+
options = { ...options, model: ProbeAgent._wrapModelWithLimiter(options.model, limiter, this.debug) };
|
|
1395
1537
|
}
|
|
1396
1538
|
|
|
1397
1539
|
// Create AbortController for overall operation timeout
|
|
@@ -1430,6 +1572,12 @@ export class ProbeAgent {
|
|
|
1430
1572
|
if (useClaudeCode || useCodex) {
|
|
1431
1573
|
try {
|
|
1432
1574
|
result = await this._tryEngineStreamPath(options, controller, timeoutState);
|
|
1575
|
+
// Gate engine stream with concurrency limiter if configured.
|
|
1576
|
+
// Engine paths bypass the Vercel model wrapper, so we wrap the
|
|
1577
|
+
// textStream async generator with acquire/release instead.
|
|
1578
|
+
if (result && limiter) {
|
|
1579
|
+
result = ProbeAgent._wrapEngineStreamWithLimiter(result, limiter, this.debug);
|
|
1580
|
+
}
|
|
1433
1581
|
} catch (error) {
|
|
1434
1582
|
if (this.debug) {
|
|
1435
1583
|
const engineType = useClaudeCode ? 'Claude Code' : 'Codex';
|
|
@@ -1444,47 +1592,7 @@ export class ProbeAgent {
|
|
|
1444
1592
|
result = await this._executeWithVercelProvider(options, controller);
|
|
1445
1593
|
}
|
|
1446
1594
|
|
|
1447
|
-
// Wrap textStream so limiter slot is held until stream completes.
|
|
1448
|
-
// result.textStream is a read-only getter on DefaultStreamTextResult,
|
|
1449
|
-
// so we wrap the result in a Proxy that intercepts the textStream property.
|
|
1450
|
-
if (limiter && result.textStream) {
|
|
1451
|
-
const originalStream = result.textStream;
|
|
1452
|
-
const debug = this.debug;
|
|
1453
|
-
const wrappedStream = (async function* () {
|
|
1454
|
-
try {
|
|
1455
|
-
for await (const chunk of originalStream) {
|
|
1456
|
-
yield chunk;
|
|
1457
|
-
}
|
|
1458
|
-
} finally {
|
|
1459
|
-
limiter.release(null);
|
|
1460
|
-
if (debug) {
|
|
1461
|
-
const stats = limiter.getStats();
|
|
1462
|
-
console.log(`[DEBUG] Released global AI concurrency slot (${stats.globalActive}/${stats.maxConcurrent}, queue: ${stats.queueSize})`);
|
|
1463
|
-
}
|
|
1464
|
-
}
|
|
1465
|
-
})();
|
|
1466
|
-
return new Proxy(result, {
|
|
1467
|
-
get(target, prop) {
|
|
1468
|
-
if (prop === 'textStream') return wrappedStream;
|
|
1469
|
-
const value = target[prop];
|
|
1470
|
-
return typeof value === 'function' ? value.bind(target) : value;
|
|
1471
|
-
}
|
|
1472
|
-
});
|
|
1473
|
-
} else if (limiter) {
|
|
1474
|
-
// No textStream (shouldn't happen, but release just in case)
|
|
1475
|
-
limiter.release(null);
|
|
1476
|
-
}
|
|
1477
|
-
|
|
1478
1595
|
return result;
|
|
1479
|
-
} catch (error) {
|
|
1480
|
-
// Release on error if limiter was acquired
|
|
1481
|
-
if (limiter) {
|
|
1482
|
-
limiter.release(null);
|
|
1483
|
-
if (this.debug) {
|
|
1484
|
-
console.log(`[DEBUG] Released global AI concurrency slot on error`);
|
|
1485
|
-
}
|
|
1486
|
-
}
|
|
1487
|
-
throw error;
|
|
1488
1596
|
} finally {
|
|
1489
1597
|
// Clean up timeout (for non-engine paths; engine paths clean up in the generator)
|
|
1490
1598
|
if (timeoutState.timeoutId) {
|