@probelabs/probe 0.6.0-rc231 → 0.6.0-rc232

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -357,6 +357,10 @@ export class ProbeAgent {
357
357
  // Each ProbeAgent instance has its own limits, not shared globally
358
358
  this.delegationManager = new DelegationManager();
359
359
 
360
+ // Optional global concurrency limiter shared across all ProbeAgent instances.
361
+ // When set, every AI API call acquires a slot before calling the provider.
362
+ this.concurrencyLimiter = options.concurrencyLimiter || null;
363
+
360
364
  // Request timeout configuration (default 2 minutes)
361
365
  // Validates env var to prevent NaN or unreasonable values
362
366
  this.requestTimeout = options.requestTimeout ?? (() => {
@@ -824,6 +828,7 @@ export class ProbeAgent {
824
828
  provider: this.clientApiProvider,
825
829
  model: this.clientApiModel,
826
830
  delegationManager: this.delegationManager, // Per-instance delegation limits
831
+ concurrencyLimiter: this.concurrencyLimiter, // Global AI concurrency limiter
827
832
  isToolAllowed
828
833
  };
829
834
 
@@ -1363,6 +1368,16 @@ export class ProbeAgent {
1363
1368
  * @private
1364
1369
  */
1365
1370
  async streamTextWithRetryAndFallback(options) {
1371
+ // Acquire global concurrency slot if limiter is configured
1372
+ const limiter = this.concurrencyLimiter;
1373
+ if (limiter) {
1374
+ await limiter.acquire(null);
1375
+ if (this.debug) {
1376
+ const stats = limiter.getStats();
1377
+ console.log(`[DEBUG] Acquired global AI concurrency slot (${stats.globalActive}/${stats.maxConcurrent}, queue: ${stats.queueSize})`);
1378
+ }
1379
+ }
1380
+
1366
1381
  // Create AbortController for overall operation timeout
1367
1382
  const controller = new AbortController();
1368
1383
  const timeoutState = { timeoutId: null };
@@ -1382,12 +1397,10 @@ export class ProbeAgent {
1382
1397
  const useClaudeCode = this.clientApiProvider === 'claude-code' || process.env.USE_CLAUDE_CODE === 'true';
1383
1398
  const useCodex = this.clientApiProvider === 'codex' || process.env.USE_CODEX === 'true';
1384
1399
 
1400
+ let result;
1385
1401
  if (useClaudeCode || useCodex) {
1386
1402
  try {
1387
- const result = await this._tryEngineStreamPath(options, controller, timeoutState);
1388
- if (result) {
1389
- return result;
1390
- }
1403
+ result = await this._tryEngineStreamPath(options, controller, timeoutState);
1391
1404
  } catch (error) {
1392
1405
  if (this.debug) {
1393
1406
  const engineType = useClaudeCode ? 'Claude Code' : 'Codex';
@@ -1397,8 +1410,43 @@ export class ProbeAgent {
1397
1410
  }
1398
1411
  }
1399
1412
 
1400
- // Use Vercel AI SDK with retry/fallback
1401
- return await this._executeWithVercelProvider(options, controller);
1413
+ if (!result) {
1414
+ // Use Vercel AI SDK with retry/fallback
1415
+ result = await this._executeWithVercelProvider(options, controller);
1416
+ }
1417
+
1418
+ // Wrap textStream so limiter slot is held until stream completes
1419
+ if (limiter && result.textStream) {
1420
+ const originalStream = result.textStream;
1421
+ const debug = this.debug;
1422
+ result.textStream = (async function* () {
1423
+ try {
1424
+ for await (const chunk of originalStream) {
1425
+ yield chunk;
1426
+ }
1427
+ } finally {
1428
+ limiter.release(null);
1429
+ if (debug) {
1430
+ const stats = limiter.getStats();
1431
+ console.log(`[DEBUG] Released global AI concurrency slot (${stats.globalActive}/${stats.maxConcurrent}, queue: ${stats.queueSize})`);
1432
+ }
1433
+ }
1434
+ })();
1435
+ } else if (limiter) {
1436
+ // No textStream (shouldn't happen, but release just in case)
1437
+ limiter.release(null);
1438
+ }
1439
+
1440
+ return result;
1441
+ } catch (error) {
1442
+ // Release on error if limiter was acquired
1443
+ if (limiter) {
1444
+ limiter.release(null);
1445
+ if (this.debug) {
1446
+ console.log(`[DEBUG] Released global AI concurrency slot on error`);
1447
+ }
1448
+ }
1449
+ throw error;
1402
1450
  } finally {
1403
1451
  // Clean up timeout (for non-engine paths; engine paths clean up in the generator)
1404
1452
  if (timeoutState.timeoutId) {
@@ -3859,8 +3859,10 @@ async function delegate({
3859
3859
  enableMcp = false,
3860
3860
  mcpConfig = null,
3861
3861
  mcpConfigPath = null,
3862
- delegationManager = null
3862
+ delegationManager = null,
3863
3863
  // Optional per-instance manager, falls back to default singleton
3864
+ concurrencyLimiter = null
3865
+ // Optional global AI concurrency limiter
3864
3866
  }) {
3865
3867
  if (!task || typeof task !== "string") {
3866
3868
  throw new Error("Task parameter is required and must be a string");
@@ -3936,8 +3938,10 @@ async function delegate({
3936
3938
  // Inherit from parent (subagent creates own MCPXmlBridge)
3937
3939
  mcpConfig,
3938
3940
  // Inherit from parent
3939
- mcpConfigPath
3941
+ mcpConfigPath,
3940
3942
  // Inherit from parent
3943
+ concurrencyLimiter
3944
+ // Inherit global AI concurrency limiter
3941
3945
  });
3942
3946
  if (debug) {
3943
3947
  console.error(`[DELEGATE] Created subagent with session ${sessionId}`);
@@ -4034,10 +4038,10 @@ var init_delegate = __esm({
4034
4038
  "use strict";
4035
4039
  init_ProbeAgent();
4036
4040
  DelegationManager = class {
4037
- constructor() {
4038
- this.maxConcurrent = parseInt(process.env.MAX_CONCURRENT_DELEGATIONS || "3", 10);
4039
- this.maxPerSession = parseInt(process.env.MAX_DELEGATIONS_PER_SESSION || "10", 10);
4040
- this.defaultQueueTimeout = parseInt(process.env.DELEGATION_QUEUE_TIMEOUT || "60000", 10);
4041
+ constructor(options = {}) {
4042
+ this.maxConcurrent = options.maxConcurrent ?? parseInt(process.env.MAX_CONCURRENT_DELEGATIONS || "3", 10);
4043
+ this.maxPerSession = options.maxPerSession ?? parseInt(process.env.MAX_DELEGATIONS_PER_SESSION || "10", 10);
4044
+ this.defaultQueueTimeout = options.queueTimeout ?? parseInt(process.env.DELEGATION_QUEUE_TIMEOUT || "60000", 10);
4041
4045
  this.sessionDelegations = /* @__PURE__ */ new Map();
4042
4046
  this.globalActive = 0;
4043
4047
  this.waitQueue = [];
@@ -70572,6 +70576,7 @@ var init_ProbeAgent = __esm({
70572
70576
  this.enableTasks = !!options.enableTasks;
70573
70577
  this.taskManager = null;
70574
70578
  this.delegationManager = new DelegationManager();
70579
+ this.concurrencyLimiter = options.concurrencyLimiter || null;
70575
70580
  this.requestTimeout = options.requestTimeout ?? (() => {
70576
70581
  if (process.env.REQUEST_TIMEOUT) {
70577
70582
  const parsed = parseInt(process.env.REQUEST_TIMEOUT, 10);
@@ -70948,6 +70953,8 @@ var init_ProbeAgent = __esm({
70948
70953
  model: this.clientApiModel,
70949
70954
  delegationManager: this.delegationManager,
70950
70955
  // Per-instance delegation limits
70956
+ concurrencyLimiter: this.concurrencyLimiter,
70957
+ // Global AI concurrency limiter
70951
70958
  isToolAllowed
70952
70959
  };
70953
70960
  const baseTools = createTools(configOptions);
@@ -71369,6 +71376,14 @@ var init_ProbeAgent = __esm({
71369
71376
  * @private
71370
71377
  */
71371
71378
  async streamTextWithRetryAndFallback(options) {
71379
+ const limiter = this.concurrencyLimiter;
71380
+ if (limiter) {
71381
+ await limiter.acquire(null);
71382
+ if (this.debug) {
71383
+ const stats = limiter.getStats();
71384
+ console.log(`[DEBUG] Acquired global AI concurrency slot (${stats.globalActive}/${stats.maxConcurrent}, queue: ${stats.queueSize})`);
71385
+ }
71386
+ }
71372
71387
  const controller = new AbortController();
71373
71388
  const timeoutState = { timeoutId: null };
71374
71389
  if (this.maxOperationTimeout && this.maxOperationTimeout > 0) {
@@ -71382,12 +71397,10 @@ var init_ProbeAgent = __esm({
71382
71397
  try {
71383
71398
  const useClaudeCode = this.clientApiProvider === "claude-code" || process.env.USE_CLAUDE_CODE === "true";
71384
71399
  const useCodex = this.clientApiProvider === "codex" || process.env.USE_CODEX === "true";
71400
+ let result;
71385
71401
  if (useClaudeCode || useCodex) {
71386
71402
  try {
71387
- const result = await this._tryEngineStreamPath(options, controller, timeoutState);
71388
- if (result) {
71389
- return result;
71390
- }
71403
+ result = await this._tryEngineStreamPath(options, controller, timeoutState);
71391
71404
  } catch (error) {
71392
71405
  if (this.debug) {
71393
71406
  const engineType = useClaudeCode ? "Claude Code" : "Codex";
@@ -71395,7 +71408,37 @@ var init_ProbeAgent = __esm({
71395
71408
  }
71396
71409
  }
71397
71410
  }
71398
- return await this._executeWithVercelProvider(options, controller);
71411
+ if (!result) {
71412
+ result = await this._executeWithVercelProvider(options, controller);
71413
+ }
71414
+ if (limiter && result.textStream) {
71415
+ const originalStream = result.textStream;
71416
+ const debug = this.debug;
71417
+ result.textStream = (async function* () {
71418
+ try {
71419
+ for await (const chunk of originalStream) {
71420
+ yield chunk;
71421
+ }
71422
+ } finally {
71423
+ limiter.release(null);
71424
+ if (debug) {
71425
+ const stats = limiter.getStats();
71426
+ console.log(`[DEBUG] Released global AI concurrency slot (${stats.globalActive}/${stats.maxConcurrent}, queue: ${stats.queueSize})`);
71427
+ }
71428
+ }
71429
+ })();
71430
+ } else if (limiter) {
71431
+ limiter.release(null);
71432
+ }
71433
+ return result;
71434
+ } catch (error) {
71435
+ if (limiter) {
71436
+ limiter.release(null);
71437
+ if (this.debug) {
71438
+ console.log(`[DEBUG] Released global AI concurrency slot on error`);
71439
+ }
71440
+ }
71441
+ throw error;
71399
71442
  } finally {
71400
71443
  if (timeoutState.timeoutId) {
71401
71444
  clearTimeout(timeoutState.timeoutId);
package/build/delegate.js CHANGED
@@ -19,11 +19,14 @@ import { ProbeAgent } from './agent/ProbeAgent.js';
19
19
  * - For long-running processes, periodic cleanup of stale sessions may be needed
20
20
  */
21
21
  class DelegationManager {
22
- constructor() {
23
- this.maxConcurrent = parseInt(process.env.MAX_CONCURRENT_DELEGATIONS || '3', 10);
24
- this.maxPerSession = parseInt(process.env.MAX_DELEGATIONS_PER_SESSION || '10', 10);
22
+ constructor(options = {}) {
23
+ this.maxConcurrent = options.maxConcurrent
24
+ ?? parseInt(process.env.MAX_CONCURRENT_DELEGATIONS || '3', 10);
25
+ this.maxPerSession = options.maxPerSession
26
+ ?? parseInt(process.env.MAX_DELEGATIONS_PER_SESSION || '10', 10);
25
27
  // Default queue timeout: 60 seconds. Set DELEGATION_QUEUE_TIMEOUT=0 to disable.
26
- this.defaultQueueTimeout = parseInt(process.env.DELEGATION_QUEUE_TIMEOUT || '60000', 10);
28
+ this.defaultQueueTimeout = options.queueTimeout
29
+ ?? parseInt(process.env.DELEGATION_QUEUE_TIMEOUT || '60000', 10);
27
30
 
28
31
  // Track delegations per session with timestamp for potential TTL cleanup
29
32
  // Map<string, { count: number, lastUpdated: number }>
@@ -353,6 +356,7 @@ const DEFAULT_DELEGATE_TIMEOUT = parseInt(process.env.DELEGATE_TIMEOUT, 10) || 3
353
356
  * @param {boolean} [options.enableMcp=false] - Enable MCP tool integration (inherited from parent)
354
357
  * @param {Object} [options.mcpConfig] - MCP configuration object (inherited from parent)
355
358
  * @param {string} [options.mcpConfigPath] - Path to MCP configuration file (inherited from parent)
359
+ * @param {Object} [options.concurrencyLimiter=null] - Global AI concurrency limiter (DelegationManager instance)
356
360
  * @returns {Promise<string>} The response from the delegate agent
357
361
  */
358
362
  export async function delegate({
@@ -379,7 +383,8 @@ export async function delegate({
379
383
  enableMcp = false,
380
384
  mcpConfig = null,
381
385
  mcpConfigPath = null,
382
- delegationManager = null // Optional per-instance manager, falls back to default singleton
386
+ delegationManager = null, // Optional per-instance manager, falls back to default singleton
387
+ concurrencyLimiter = null // Optional global AI concurrency limiter
383
388
  }) {
384
389
  if (!task || typeof task !== 'string') {
385
390
  throw new Error('Task parameter is required and must be a string');
@@ -464,7 +469,8 @@ export async function delegate({
464
469
  enableTasks, // Inherit from parent (subagent gets isolated TaskManager)
465
470
  enableMcp, // Inherit from parent (subagent creates own MCPXmlBridge)
466
471
  mcpConfig, // Inherit from parent
467
- mcpConfigPath // Inherit from parent
472
+ mcpConfigPath, // Inherit from parent
473
+ concurrencyLimiter // Inherit global AI concurrency limiter
468
474
  });
469
475
 
470
476
  if (debug) {
@@ -30969,8 +30969,10 @@ async function delegate({
30969
30969
  enableMcp = false,
30970
30970
  mcpConfig = null,
30971
30971
  mcpConfigPath = null,
30972
- delegationManager = null
30972
+ delegationManager = null,
30973
30973
  // Optional per-instance manager, falls back to default singleton
30974
+ concurrencyLimiter = null
30975
+ // Optional global AI concurrency limiter
30974
30976
  }) {
30975
30977
  if (!task || typeof task !== "string") {
30976
30978
  throw new Error("Task parameter is required and must be a string");
@@ -31046,8 +31048,10 @@ async function delegate({
31046
31048
  // Inherit from parent (subagent creates own MCPXmlBridge)
31047
31049
  mcpConfig,
31048
31050
  // Inherit from parent
31049
- mcpConfigPath
31051
+ mcpConfigPath,
31050
31052
  // Inherit from parent
31053
+ concurrencyLimiter
31054
+ // Inherit global AI concurrency limiter
31051
31055
  });
31052
31056
  if (debug) {
31053
31057
  console.error(`[DELEGATE] Created subagent with session ${sessionId}`);
@@ -31145,10 +31149,10 @@ var init_delegate = __esm({
31145
31149
  import_crypto2 = require("crypto");
31146
31150
  init_ProbeAgent();
31147
31151
  DelegationManager = class {
31148
- constructor() {
31149
- this.maxConcurrent = parseInt(process.env.MAX_CONCURRENT_DELEGATIONS || "3", 10);
31150
- this.maxPerSession = parseInt(process.env.MAX_DELEGATIONS_PER_SESSION || "10", 10);
31151
- this.defaultQueueTimeout = parseInt(process.env.DELEGATION_QUEUE_TIMEOUT || "60000", 10);
31152
+ constructor(options = {}) {
31153
+ this.maxConcurrent = options.maxConcurrent ?? parseInt(process.env.MAX_CONCURRENT_DELEGATIONS || "3", 10);
31154
+ this.maxPerSession = options.maxPerSession ?? parseInt(process.env.MAX_DELEGATIONS_PER_SESSION || "10", 10);
31155
+ this.defaultQueueTimeout = options.queueTimeout ?? parseInt(process.env.DELEGATION_QUEUE_TIMEOUT || "60000", 10);
31152
31156
  this.sessionDelegations = /* @__PURE__ */ new Map();
31153
31157
  this.globalActive = 0;
31154
31158
  this.waitQueue = [];
@@ -97249,6 +97253,7 @@ var init_ProbeAgent = __esm({
97249
97253
  this.enableTasks = !!options.enableTasks;
97250
97254
  this.taskManager = null;
97251
97255
  this.delegationManager = new DelegationManager();
97256
+ this.concurrencyLimiter = options.concurrencyLimiter || null;
97252
97257
  this.requestTimeout = options.requestTimeout ?? (() => {
97253
97258
  if (process.env.REQUEST_TIMEOUT) {
97254
97259
  const parsed = parseInt(process.env.REQUEST_TIMEOUT, 10);
@@ -97625,6 +97630,8 @@ var init_ProbeAgent = __esm({
97625
97630
  model: this.clientApiModel,
97626
97631
  delegationManager: this.delegationManager,
97627
97632
  // Per-instance delegation limits
97633
+ concurrencyLimiter: this.concurrencyLimiter,
97634
+ // Global AI concurrency limiter
97628
97635
  isToolAllowed
97629
97636
  };
97630
97637
  const baseTools = createTools(configOptions);
@@ -98046,6 +98053,14 @@ var init_ProbeAgent = __esm({
98046
98053
  * @private
98047
98054
  */
98048
98055
  async streamTextWithRetryAndFallback(options) {
98056
+ const limiter = this.concurrencyLimiter;
98057
+ if (limiter) {
98058
+ await limiter.acquire(null);
98059
+ if (this.debug) {
98060
+ const stats = limiter.getStats();
98061
+ console.log(`[DEBUG] Acquired global AI concurrency slot (${stats.globalActive}/${stats.maxConcurrent}, queue: ${stats.queueSize})`);
98062
+ }
98063
+ }
98049
98064
  const controller = new AbortController();
98050
98065
  const timeoutState = { timeoutId: null };
98051
98066
  if (this.maxOperationTimeout && this.maxOperationTimeout > 0) {
@@ -98059,12 +98074,10 @@ var init_ProbeAgent = __esm({
98059
98074
  try {
98060
98075
  const useClaudeCode = this.clientApiProvider === "claude-code" || process.env.USE_CLAUDE_CODE === "true";
98061
98076
  const useCodex = this.clientApiProvider === "codex" || process.env.USE_CODEX === "true";
98077
+ let result;
98062
98078
  if (useClaudeCode || useCodex) {
98063
98079
  try {
98064
- const result = await this._tryEngineStreamPath(options, controller, timeoutState);
98065
- if (result) {
98066
- return result;
98067
- }
98080
+ result = await this._tryEngineStreamPath(options, controller, timeoutState);
98068
98081
  } catch (error2) {
98069
98082
  if (this.debug) {
98070
98083
  const engineType = useClaudeCode ? "Claude Code" : "Codex";
@@ -98072,7 +98085,37 @@ var init_ProbeAgent = __esm({
98072
98085
  }
98073
98086
  }
98074
98087
  }
98075
- return await this._executeWithVercelProvider(options, controller);
98088
+ if (!result) {
98089
+ result = await this._executeWithVercelProvider(options, controller);
98090
+ }
98091
+ if (limiter && result.textStream) {
98092
+ const originalStream = result.textStream;
98093
+ const debug = this.debug;
98094
+ result.textStream = (async function* () {
98095
+ try {
98096
+ for await (const chunk of originalStream) {
98097
+ yield chunk;
98098
+ }
98099
+ } finally {
98100
+ limiter.release(null);
98101
+ if (debug) {
98102
+ const stats = limiter.getStats();
98103
+ console.log(`[DEBUG] Released global AI concurrency slot (${stats.globalActive}/${stats.maxConcurrent}, queue: ${stats.queueSize})`);
98104
+ }
98105
+ }
98106
+ })();
98107
+ } else if (limiter) {
98108
+ limiter.release(null);
98109
+ }
98110
+ return result;
98111
+ } catch (error2) {
98112
+ if (limiter) {
98113
+ limiter.release(null);
98114
+ if (this.debug) {
98115
+ console.log(`[DEBUG] Released global AI concurrency slot on error`);
98116
+ }
98117
+ }
98118
+ throw error2;
98076
98119
  } finally {
98077
98120
  if (timeoutState.timeoutId) {
98078
98121
  clearTimeout(timeoutState.timeoutId);
package/cjs/index.cjs CHANGED
@@ -93982,6 +93982,7 @@ var init_ProbeAgent = __esm({
93982
93982
  this.enableTasks = !!options.enableTasks;
93983
93983
  this.taskManager = null;
93984
93984
  this.delegationManager = new DelegationManager();
93985
+ this.concurrencyLimiter = options.concurrencyLimiter || null;
93985
93986
  this.requestTimeout = options.requestTimeout ?? (() => {
93986
93987
  if (process.env.REQUEST_TIMEOUT) {
93987
93988
  const parsed = parseInt(process.env.REQUEST_TIMEOUT, 10);
@@ -94358,6 +94359,8 @@ var init_ProbeAgent = __esm({
94358
94359
  model: this.clientApiModel,
94359
94360
  delegationManager: this.delegationManager,
94360
94361
  // Per-instance delegation limits
94362
+ concurrencyLimiter: this.concurrencyLimiter,
94363
+ // Global AI concurrency limiter
94361
94364
  isToolAllowed
94362
94365
  };
94363
94366
  const baseTools = createTools(configOptions);
@@ -94779,6 +94782,14 @@ var init_ProbeAgent = __esm({
94779
94782
  * @private
94780
94783
  */
94781
94784
  async streamTextWithRetryAndFallback(options) {
94785
+ const limiter = this.concurrencyLimiter;
94786
+ if (limiter) {
94787
+ await limiter.acquire(null);
94788
+ if (this.debug) {
94789
+ const stats = limiter.getStats();
94790
+ console.log(`[DEBUG] Acquired global AI concurrency slot (${stats.globalActive}/${stats.maxConcurrent}, queue: ${stats.queueSize})`);
94791
+ }
94792
+ }
94782
94793
  const controller = new AbortController();
94783
94794
  const timeoutState = { timeoutId: null };
94784
94795
  if (this.maxOperationTimeout && this.maxOperationTimeout > 0) {
@@ -94792,12 +94803,10 @@ var init_ProbeAgent = __esm({
94792
94803
  try {
94793
94804
  const useClaudeCode = this.clientApiProvider === "claude-code" || process.env.USE_CLAUDE_CODE === "true";
94794
94805
  const useCodex = this.clientApiProvider === "codex" || process.env.USE_CODEX === "true";
94806
+ let result;
94795
94807
  if (useClaudeCode || useCodex) {
94796
94808
  try {
94797
- const result = await this._tryEngineStreamPath(options, controller, timeoutState);
94798
- if (result) {
94799
- return result;
94800
- }
94809
+ result = await this._tryEngineStreamPath(options, controller, timeoutState);
94801
94810
  } catch (error2) {
94802
94811
  if (this.debug) {
94803
94812
  const engineType = useClaudeCode ? "Claude Code" : "Codex";
@@ -94805,7 +94814,37 @@ var init_ProbeAgent = __esm({
94805
94814
  }
94806
94815
  }
94807
94816
  }
94808
- return await this._executeWithVercelProvider(options, controller);
94817
+ if (!result) {
94818
+ result = await this._executeWithVercelProvider(options, controller);
94819
+ }
94820
+ if (limiter && result.textStream) {
94821
+ const originalStream = result.textStream;
94822
+ const debug = this.debug;
94823
+ result.textStream = (async function* () {
94824
+ try {
94825
+ for await (const chunk of originalStream) {
94826
+ yield chunk;
94827
+ }
94828
+ } finally {
94829
+ limiter.release(null);
94830
+ if (debug) {
94831
+ const stats = limiter.getStats();
94832
+ console.log(`[DEBUG] Released global AI concurrency slot (${stats.globalActive}/${stats.maxConcurrent}, queue: ${stats.queueSize})`);
94833
+ }
94834
+ }
94835
+ })();
94836
+ } else if (limiter) {
94837
+ limiter.release(null);
94838
+ }
94839
+ return result;
94840
+ } catch (error2) {
94841
+ if (limiter) {
94842
+ limiter.release(null);
94843
+ if (this.debug) {
94844
+ console.log(`[DEBUG] Released global AI concurrency slot on error`);
94845
+ }
94846
+ }
94847
+ throw error2;
94809
94848
  } finally {
94810
94849
  if (timeoutState.timeoutId) {
94811
94850
  clearTimeout(timeoutState.timeoutId);
@@ -97639,8 +97678,10 @@ async function delegate({
97639
97678
  enableMcp = false,
97640
97679
  mcpConfig = null,
97641
97680
  mcpConfigPath = null,
97642
- delegationManager = null
97681
+ delegationManager = null,
97643
97682
  // Optional per-instance manager, falls back to default singleton
97683
+ concurrencyLimiter = null
97684
+ // Optional global AI concurrency limiter
97644
97685
  }) {
97645
97686
  if (!task || typeof task !== "string") {
97646
97687
  throw new Error("Task parameter is required and must be a string");
@@ -97716,8 +97757,10 @@ async function delegate({
97716
97757
  // Inherit from parent (subagent creates own MCPXmlBridge)
97717
97758
  mcpConfig,
97718
97759
  // Inherit from parent
97719
- mcpConfigPath
97760
+ mcpConfigPath,
97720
97761
  // Inherit from parent
97762
+ concurrencyLimiter
97763
+ // Inherit global AI concurrency limiter
97721
97764
  });
97722
97765
  if (debug) {
97723
97766
  console.error(`[DELEGATE] Created subagent with session ${sessionId}`);
@@ -97815,10 +97858,10 @@ var init_delegate = __esm({
97815
97858
  import_crypto9 = require("crypto");
97816
97859
  init_ProbeAgent();
97817
97860
  DelegationManager = class {
97818
- constructor() {
97819
- this.maxConcurrent = parseInt(process.env.MAX_CONCURRENT_DELEGATIONS || "3", 10);
97820
- this.maxPerSession = parseInt(process.env.MAX_DELEGATIONS_PER_SESSION || "10", 10);
97821
- this.defaultQueueTimeout = parseInt(process.env.DELEGATION_QUEUE_TIMEOUT || "60000", 10);
97861
+ constructor(options = {}) {
97862
+ this.maxConcurrent = options.maxConcurrent ?? parseInt(process.env.MAX_CONCURRENT_DELEGATIONS || "3", 10);
97863
+ this.maxPerSession = options.maxPerSession ?? parseInt(process.env.MAX_DELEGATIONS_PER_SESSION || "10", 10);
97864
+ this.defaultQueueTimeout = options.queueTimeout ?? parseInt(process.env.DELEGATION_QUEUE_TIMEOUT || "60000", 10);
97822
97865
  this.sessionDelegations = /* @__PURE__ */ new Map();
97823
97866
  this.globalActive = 0;
97824
97867
  this.waitQueue = [];
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@probelabs/probe",
3
- "version": "0.6.0-rc231",
3
+ "version": "0.6.0-rc232",
4
4
  "description": "Node.js wrapper for the probe code search tool",
5
5
  "main": "src/index.js",
6
6
  "module": "src/index.js",
@@ -357,6 +357,10 @@ export class ProbeAgent {
357
357
  // Each ProbeAgent instance has its own limits, not shared globally
358
358
  this.delegationManager = new DelegationManager();
359
359
 
360
+ // Optional global concurrency limiter shared across all ProbeAgent instances.
361
+ // When set, every AI API call acquires a slot before calling the provider.
362
+ this.concurrencyLimiter = options.concurrencyLimiter || null;
363
+
360
364
  // Request timeout configuration (default 2 minutes)
361
365
  // Validates env var to prevent NaN or unreasonable values
362
366
  this.requestTimeout = options.requestTimeout ?? (() => {
@@ -824,6 +828,7 @@ export class ProbeAgent {
824
828
  provider: this.clientApiProvider,
825
829
  model: this.clientApiModel,
826
830
  delegationManager: this.delegationManager, // Per-instance delegation limits
831
+ concurrencyLimiter: this.concurrencyLimiter, // Global AI concurrency limiter
827
832
  isToolAllowed
828
833
  };
829
834
 
@@ -1363,6 +1368,16 @@ export class ProbeAgent {
1363
1368
  * @private
1364
1369
  */
1365
1370
  async streamTextWithRetryAndFallback(options) {
1371
+ // Acquire global concurrency slot if limiter is configured
1372
+ const limiter = this.concurrencyLimiter;
1373
+ if (limiter) {
1374
+ await limiter.acquire(null);
1375
+ if (this.debug) {
1376
+ const stats = limiter.getStats();
1377
+ console.log(`[DEBUG] Acquired global AI concurrency slot (${stats.globalActive}/${stats.maxConcurrent}, queue: ${stats.queueSize})`);
1378
+ }
1379
+ }
1380
+
1366
1381
  // Create AbortController for overall operation timeout
1367
1382
  const controller = new AbortController();
1368
1383
  const timeoutState = { timeoutId: null };
@@ -1382,12 +1397,10 @@ export class ProbeAgent {
1382
1397
  const useClaudeCode = this.clientApiProvider === 'claude-code' || process.env.USE_CLAUDE_CODE === 'true';
1383
1398
  const useCodex = this.clientApiProvider === 'codex' || process.env.USE_CODEX === 'true';
1384
1399
 
1400
+ let result;
1385
1401
  if (useClaudeCode || useCodex) {
1386
1402
  try {
1387
- const result = await this._tryEngineStreamPath(options, controller, timeoutState);
1388
- if (result) {
1389
- return result;
1390
- }
1403
+ result = await this._tryEngineStreamPath(options, controller, timeoutState);
1391
1404
  } catch (error) {
1392
1405
  if (this.debug) {
1393
1406
  const engineType = useClaudeCode ? 'Claude Code' : 'Codex';
@@ -1397,8 +1410,43 @@ export class ProbeAgent {
1397
1410
  }
1398
1411
  }
1399
1412
 
1400
- // Use Vercel AI SDK with retry/fallback
1401
- return await this._executeWithVercelProvider(options, controller);
1413
+ if (!result) {
1414
+ // Use Vercel AI SDK with retry/fallback
1415
+ result = await this._executeWithVercelProvider(options, controller);
1416
+ }
1417
+
1418
+ // Wrap textStream so limiter slot is held until stream completes
1419
+ if (limiter && result.textStream) {
1420
+ const originalStream = result.textStream;
1421
+ const debug = this.debug;
1422
+ result.textStream = (async function* () {
1423
+ try {
1424
+ for await (const chunk of originalStream) {
1425
+ yield chunk;
1426
+ }
1427
+ } finally {
1428
+ limiter.release(null);
1429
+ if (debug) {
1430
+ const stats = limiter.getStats();
1431
+ console.log(`[DEBUG] Released global AI concurrency slot (${stats.globalActive}/${stats.maxConcurrent}, queue: ${stats.queueSize})`);
1432
+ }
1433
+ }
1434
+ })();
1435
+ } else if (limiter) {
1436
+ // No textStream (shouldn't happen, but release just in case)
1437
+ limiter.release(null);
1438
+ }
1439
+
1440
+ return result;
1441
+ } catch (error) {
1442
+ // Release on error if limiter was acquired
1443
+ if (limiter) {
1444
+ limiter.release(null);
1445
+ if (this.debug) {
1446
+ console.log(`[DEBUG] Released global AI concurrency slot on error`);
1447
+ }
1448
+ }
1449
+ throw error;
1402
1450
  } finally {
1403
1451
  // Clean up timeout (for non-engine paths; engine paths clean up in the generator)
1404
1452
  if (timeoutState.timeoutId) {
package/src/delegate.js CHANGED
@@ -19,11 +19,14 @@ import { ProbeAgent } from './agent/ProbeAgent.js';
19
19
  * - For long-running processes, periodic cleanup of stale sessions may be needed
20
20
  */
21
21
  class DelegationManager {
22
- constructor() {
23
- this.maxConcurrent = parseInt(process.env.MAX_CONCURRENT_DELEGATIONS || '3', 10);
24
- this.maxPerSession = parseInt(process.env.MAX_DELEGATIONS_PER_SESSION || '10', 10);
22
+ constructor(options = {}) {
23
+ this.maxConcurrent = options.maxConcurrent
24
+ ?? parseInt(process.env.MAX_CONCURRENT_DELEGATIONS || '3', 10);
25
+ this.maxPerSession = options.maxPerSession
26
+ ?? parseInt(process.env.MAX_DELEGATIONS_PER_SESSION || '10', 10);
25
27
  // Default queue timeout: 60 seconds. Set DELEGATION_QUEUE_TIMEOUT=0 to disable.
26
- this.defaultQueueTimeout = parseInt(process.env.DELEGATION_QUEUE_TIMEOUT || '60000', 10);
28
+ this.defaultQueueTimeout = options.queueTimeout
29
+ ?? parseInt(process.env.DELEGATION_QUEUE_TIMEOUT || '60000', 10);
27
30
 
28
31
  // Track delegations per session with timestamp for potential TTL cleanup
29
32
  // Map<string, { count: number, lastUpdated: number }>
@@ -353,6 +356,7 @@ const DEFAULT_DELEGATE_TIMEOUT = parseInt(process.env.DELEGATE_TIMEOUT, 10) || 3
353
356
  * @param {boolean} [options.enableMcp=false] - Enable MCP tool integration (inherited from parent)
354
357
  * @param {Object} [options.mcpConfig] - MCP configuration object (inherited from parent)
355
358
  * @param {string} [options.mcpConfigPath] - Path to MCP configuration file (inherited from parent)
359
+ * @param {Object} [options.concurrencyLimiter=null] - Global AI concurrency limiter (DelegationManager instance)
356
360
  * @returns {Promise<string>} The response from the delegate agent
357
361
  */
358
362
  export async function delegate({
@@ -379,7 +383,8 @@ export async function delegate({
379
383
  enableMcp = false,
380
384
  mcpConfig = null,
381
385
  mcpConfigPath = null,
382
- delegationManager = null // Optional per-instance manager, falls back to default singleton
386
+ delegationManager = null, // Optional per-instance manager, falls back to default singleton
387
+ concurrencyLimiter = null // Optional global AI concurrency limiter
383
388
  }) {
384
389
  if (!task || typeof task !== 'string') {
385
390
  throw new Error('Task parameter is required and must be a string');
@@ -464,7 +469,8 @@ export async function delegate({
464
469
  enableTasks, // Inherit from parent (subagent gets isolated TaskManager)
465
470
  enableMcp, // Inherit from parent (subagent creates own MCPXmlBridge)
466
471
  mcpConfig, // Inherit from parent
467
- mcpConfigPath // Inherit from parent
472
+ mcpConfigPath, // Inherit from parent
473
+ concurrencyLimiter // Inherit global AI concurrency limiter
468
474
  });
469
475
 
470
476
  if (debug) {