@probelabs/probe 0.6.0-rc293 → 0.6.0-rc295

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/cjs/index.cjs CHANGED
@@ -96781,6 +96781,18 @@ var init_ProbeAgent = __esm({
96781
96781
  if (this.debug) {
96782
96782
  console.log(`[DEBUG] Max operation timeout: ${this.maxOperationTimeout}ms`);
96783
96783
  }
96784
+ this.timeoutBehavior = options.timeoutBehavior ?? (() => {
96785
+ const val = process.env.TIMEOUT_BEHAVIOR;
96786
+ if (val === "hard") return "hard";
96787
+ return "graceful";
96788
+ })();
96789
+ this.gracefulTimeoutBonusSteps = options.gracefulTimeoutBonusSteps ?? (() => {
96790
+ const parsed = parseInt(process.env.GRACEFUL_TIMEOUT_BONUS_STEPS, 10);
96791
+ return isNaN(parsed) || parsed < 1 || parsed > 20 ? 4 : parsed;
96792
+ })();
96793
+ if (this.debug) {
96794
+ console.log(`[DEBUG] Timeout behavior: ${this.timeoutBehavior}, bonus steps: ${this.gracefulTimeoutBonusSteps}`);
96795
+ }
96784
96796
  this.retryConfig = options.retry || {};
96785
96797
  this.retryManager = null;
96786
96798
  this.fallbackConfig = options.fallback || null;
@@ -97531,9 +97543,13 @@ var init_ProbeAgent = __esm({
97531
97543
  }
97532
97544
  return await this.fallbackManager.executeWithFallback(
97533
97545
  async (provider, model, config2) => {
97546
+ let fallbackModel = provider(model);
97547
+ if (this.concurrencyLimiter) {
97548
+ fallbackModel = _ProbeAgent._wrapModelWithLimiter(fallbackModel, this.concurrencyLimiter, this.debug);
97549
+ }
97534
97550
  const fallbackOptions = {
97535
97551
  ...options,
97536
- model: provider(model),
97552
+ model: fallbackModel,
97537
97553
  abortSignal: controller.signal
97538
97554
  };
97539
97555
  if (config2.provider !== "google" && fallbackOptions.tools) {
@@ -97561,6 +97577,132 @@ var init_ProbeAgent = __esm({
97561
97577
  }
97562
97578
  );
97563
97579
  }
97580
+ /**
97581
+ * Wrap a LanguageModelV1 model so each doStream/doGenerate call acquires and
97582
+ * releases a concurrency limiter slot. This gates individual LLM API calls
97583
+ * (seconds each) instead of entire multi-step agent sessions (minutes).
97584
+ *
97585
+ * @param {Object} model - LanguageModelV1 model instance
97586
+ * @param {Object} limiter - Concurrency limiter with acquire/release/getStats
97587
+ * @param {boolean} debug - Enable debug logging
97588
+ * @returns {Object} Wrapped model with per-call concurrency gating
97589
+ * @private
97590
+ */
97591
+ static _wrapModelWithLimiter(model, limiter, debug) {
97592
+ return new Proxy(model, {
97593
+ get(target, prop) {
97594
+ if (prop === "doStream") {
97595
+ return async function(...args) {
97596
+ await limiter.acquire(null);
97597
+ if (debug) {
97598
+ const stats = limiter.getStats();
97599
+ console.log(`[DEBUG] Acquired AI slot for LLM call (${stats.globalActive}/${stats.maxConcurrent}, queue: ${stats.queueSize})`);
97600
+ }
97601
+ try {
97602
+ const result = await target.doStream(...args);
97603
+ const originalStream = result.stream;
97604
+ const originalReader = originalStream.getReader();
97605
+ let released = false;
97606
+ const releaseOnce = () => {
97607
+ if (released) return;
97608
+ released = true;
97609
+ limiter.release(null);
97610
+ };
97611
+ const wrappedStream = new ReadableStream({
97612
+ async pull(controller) {
97613
+ try {
97614
+ const { done, value: value2 } = await originalReader.read();
97615
+ if (done) {
97616
+ controller.close();
97617
+ releaseOnce();
97618
+ if (debug) {
97619
+ const stats = limiter.getStats();
97620
+ console.log(`[DEBUG] Released AI slot after LLM stream complete (${stats.globalActive}/${stats.maxConcurrent})`);
97621
+ }
97622
+ } else {
97623
+ controller.enqueue(value2);
97624
+ }
97625
+ } catch (err) {
97626
+ releaseOnce();
97627
+ if (debug) {
97628
+ console.log(`[DEBUG] Released AI slot on LLM stream error`);
97629
+ }
97630
+ controller.error(err);
97631
+ }
97632
+ },
97633
+ cancel() {
97634
+ releaseOnce();
97635
+ if (debug) {
97636
+ console.log(`[DEBUG] Released AI slot on LLM stream cancel`);
97637
+ }
97638
+ originalReader.cancel();
97639
+ }
97640
+ });
97641
+ return { ...result, stream: wrappedStream };
97642
+ } catch (err) {
97643
+ limiter.release(null);
97644
+ if (debug) {
97645
+ console.log(`[DEBUG] Released AI slot on doStream error`);
97646
+ }
97647
+ throw err;
97648
+ }
97649
+ };
97650
+ }
97651
+ if (prop === "doGenerate") {
97652
+ return async function(...args) {
97653
+ await limiter.acquire(null);
97654
+ if (debug) {
97655
+ const stats = limiter.getStats();
97656
+ console.log(`[DEBUG] Acquired AI slot for LLM generate (${stats.globalActive}/${stats.maxConcurrent})`);
97657
+ }
97658
+ try {
97659
+ const result = await target.doGenerate(...args);
97660
+ return result;
97661
+ } finally {
97662
+ limiter.release(null);
97663
+ if (debug) {
97664
+ const stats = limiter.getStats();
97665
+ console.log(`[DEBUG] Released AI slot after LLM generate (${stats.globalActive}/${stats.maxConcurrent})`);
97666
+ }
97667
+ }
97668
+ };
97669
+ }
97670
+ const value = target[prop];
97671
+ return typeof value === "function" ? value.bind(target) : value;
97672
+ }
97673
+ });
97674
+ }
97675
+ /**
97676
+ * Wrap an engine stream result so its textStream async generator acquires
97677
+ * and releases a concurrency limiter slot. Acquire happens when iteration
97678
+ * begins; release happens in finally (completion, error, or break).
97679
+ *
97680
+ * @param {Object} result - Engine result with { textStream, usage, ... }
97681
+ * @param {Object} limiter - Concurrency limiter with acquire/release/getStats
97682
+ * @param {boolean} debug - Enable debug logging
97683
+ * @returns {Object} Result with wrapped textStream
97684
+ * @private
97685
+ */
97686
+ static _wrapEngineStreamWithLimiter(result, limiter, debug) {
97687
+ const originalStream = result.textStream;
97688
+ async function* gatedStream() {
97689
+ await limiter.acquire(null);
97690
+ if (debug) {
97691
+ const stats = limiter.getStats();
97692
+ console.log(`[DEBUG] Acquired AI slot for engine stream (${stats.globalActive}/${stats.maxConcurrent}, queue: ${stats.queueSize})`);
97693
+ }
97694
+ try {
97695
+ yield* originalStream;
97696
+ } finally {
97697
+ limiter.release(null);
97698
+ if (debug) {
97699
+ const stats = limiter.getStats();
97700
+ console.log(`[DEBUG] Released AI slot after engine stream (${stats.globalActive}/${stats.maxConcurrent})`);
97701
+ }
97702
+ }
97703
+ }
97704
+ return { ...result, textStream: gatedStream() };
97705
+ }
97564
97706
  /**
97565
97707
  * Execute streamText with retry and fallback support
97566
97708
  * @param {Object} options - streamText options
@@ -97569,12 +97711,8 @@ var init_ProbeAgent = __esm({
97569
97711
  */
97570
97712
  async streamTextWithRetryAndFallback(options) {
97571
97713
  const limiter = this.concurrencyLimiter;
97572
- if (limiter) {
97573
- await limiter.acquire(null);
97574
- if (this.debug) {
97575
- const stats = limiter.getStats();
97576
- console.log(`[DEBUG] Acquired global AI concurrency slot (${stats.globalActive}/${stats.maxConcurrent}, queue: ${stats.queueSize})`);
97577
- }
97714
+ if (limiter && options.model) {
97715
+ options = { ...options, model: _ProbeAgent._wrapModelWithLimiter(options.model, limiter, this.debug) };
97578
97716
  }
97579
97717
  const controller = new AbortController();
97580
97718
  const timeoutState = { timeoutId: null };
@@ -97588,12 +97726,16 @@ var init_ProbeAgent = __esm({
97588
97726
  }, { once: true });
97589
97727
  }
97590
97728
  if (this.maxOperationTimeout && this.maxOperationTimeout > 0) {
97591
- timeoutState.timeoutId = setTimeout(() => {
97592
- controller.abort();
97593
- if (this.debug) {
97594
- console.log(`[DEBUG] Operation timed out after ${this.maxOperationTimeout}ms (max operation timeout)`);
97595
- }
97596
- }, this.maxOperationTimeout);
97729
+ const gts = this._gracefulTimeoutState;
97730
+ if (this.timeoutBehavior === "graceful" && gts) {
97731
+ } else {
97732
+ timeoutState.timeoutId = setTimeout(() => {
97733
+ controller.abort();
97734
+ if (this.debug) {
97735
+ console.log(`[DEBUG] Operation timed out after ${this.maxOperationTimeout}ms (max operation timeout)`);
97736
+ }
97737
+ }, this.maxOperationTimeout);
97738
+ }
97597
97739
  }
97598
97740
  try {
97599
97741
  const useClaudeCode = this.clientApiProvider === "claude-code" || process.env.USE_CLAUDE_CODE === "true";
@@ -97602,6 +97744,9 @@ var init_ProbeAgent = __esm({
97602
97744
  if (useClaudeCode || useCodex) {
97603
97745
  try {
97604
97746
  result = await this._tryEngineStreamPath(options, controller, timeoutState);
97747
+ if (result && limiter) {
97748
+ result = _ProbeAgent._wrapEngineStreamWithLimiter(result, limiter, this.debug);
97749
+ }
97605
97750
  } catch (error40) {
97606
97751
  if (this.debug) {
97607
97752
  const engineType = useClaudeCode ? "Claude Code" : "Codex";
@@ -97612,41 +97757,7 @@ var init_ProbeAgent = __esm({
97612
97757
  if (!result) {
97613
97758
  result = await this._executeWithVercelProvider(options, controller);
97614
97759
  }
97615
- if (limiter && result.textStream) {
97616
- const originalStream = result.textStream;
97617
- const debug = this.debug;
97618
- const wrappedStream = (async function* () {
97619
- try {
97620
- for await (const chunk of originalStream) {
97621
- yield chunk;
97622
- }
97623
- } finally {
97624
- limiter.release(null);
97625
- if (debug) {
97626
- const stats = limiter.getStats();
97627
- console.log(`[DEBUG] Released global AI concurrency slot (${stats.globalActive}/${stats.maxConcurrent}, queue: ${stats.queueSize})`);
97628
- }
97629
- }
97630
- })();
97631
- return new Proxy(result, {
97632
- get(target, prop) {
97633
- if (prop === "textStream") return wrappedStream;
97634
- const value = target[prop];
97635
- return typeof value === "function" ? value.bind(target) : value;
97636
- }
97637
- });
97638
- } else if (limiter) {
97639
- limiter.release(null);
97640
- }
97641
97760
  return result;
97642
- } catch (error40) {
97643
- if (limiter) {
97644
- limiter.release(null);
97645
- if (this.debug) {
97646
- console.log(`[DEBUG] Released global AI concurrency slot on error`);
97647
- }
97648
- }
97649
- throw error40;
97650
97761
  } finally {
97651
97762
  if (timeoutState.timeoutId) {
97652
97763
  clearTimeout(timeoutState.timeoutId);
@@ -99215,6 +99326,14 @@ You are working with a workspace. Available paths: ${workspaceDesc}
99215
99326
  }
99216
99327
  let completionPromptInjected = false;
99217
99328
  let preCompletionResult = null;
99329
+ const gracefulTimeoutState = {
99330
+ triggered: false,
99331
+ // Set to true when soft timeout fires
99332
+ bonusStepsUsed: 0,
99333
+ // Steps taken after soft timeout
99334
+ bonusStepsMax: this.gracefulTimeoutBonusSteps
99335
+ };
99336
+ this._gracefulTimeoutState = gracefulTimeoutState;
99218
99337
  let compactionAttempted = false;
99219
99338
  while (true) {
99220
99339
  try {
@@ -99224,6 +99343,15 @@ You are working with a workspace. Available paths: ${workspaceDesc}
99224
99343
  messages: messagesForAI,
99225
99344
  tools: tools2,
99226
99345
  stopWhen: ({ steps }) => {
99346
+ if (gracefulTimeoutState.triggered) {
99347
+ if (gracefulTimeoutState.bonusStepsUsed >= gracefulTimeoutState.bonusStepsMax) {
99348
+ if (this.debug) {
99349
+ console.log(`[DEBUG] stopWhen: graceful timeout bonus steps exhausted (${gracefulTimeoutState.bonusStepsUsed}/${gracefulTimeoutState.bonusStepsMax}), forcing stop`);
99350
+ }
99351
+ return true;
99352
+ }
99353
+ return false;
99354
+ }
99227
99355
  if (steps.length >= maxIterations) return true;
99228
99356
  const lastStep = steps[steps.length - 1];
99229
99357
  const modelWantsToStop = lastStep?.finishReason === "stop" && (!lastStep?.toolCalls || lastStep.toolCalls.length === 0);
@@ -99267,6 +99395,30 @@ You are working with a workspace. Available paths: ${workspaceDesc}
99267
99395
  return false;
99268
99396
  },
99269
99397
  prepareStep: ({ steps, stepNumber }) => {
99398
+ if (gracefulTimeoutState.triggered) {
99399
+ gracefulTimeoutState.bonusStepsUsed++;
99400
+ const remaining = gracefulTimeoutState.bonusStepsMax - gracefulTimeoutState.bonusStepsUsed;
99401
+ if (gracefulTimeoutState.bonusStepsUsed === 1) {
99402
+ if (this.debug) {
99403
+ console.log(`[DEBUG] prepareStep: graceful timeout wind-down step 1/${gracefulTimeoutState.bonusStepsMax}`);
99404
+ }
99405
+ if (this.tracer) {
99406
+ this.tracer.addEvent("graceful_timeout.wind_down_started", {
99407
+ bonus_steps_max: gracefulTimeoutState.bonusStepsMax,
99408
+ current_iteration: currentIteration,
99409
+ max_iterations: maxIterations
99410
+ });
99411
+ }
99412
+ return {
99413
+ toolChoice: "none",
99414
+ userMessage: `\u26A0\uFE0F TIME LIMIT REACHED. You are running out of time. You have ${remaining} step(s) remaining. Provide your BEST answer NOW using the information you have already gathered. Do NOT call any more tools. Summarize your findings and respond completely. If something was not completed, honestly state what was not done and provide any partial results or recommendations you can offer.`
99415
+ };
99416
+ }
99417
+ if (this.debug) {
99418
+ console.log(`[DEBUG] prepareStep: graceful timeout wind-down step ${gracefulTimeoutState.bonusStepsUsed}/${gracefulTimeoutState.bonusStepsMax} (${remaining} remaining)`);
99419
+ }
99420
+ return { toolChoice: "none" };
99421
+ }
99270
99422
  if (stepNumber === maxIterations - 1) {
99271
99423
  return {
99272
99424
  toolChoice: "none"
@@ -99370,6 +99522,12 @@ Double-check your response based on the criteria above. If everything looks good
99370
99522
  }));
99371
99523
  }
99372
99524
  this.tracer.addEvent("iteration.step", stepEvent);
99525
+ if (gracefulTimeoutState.triggered) {
99526
+ this.tracer.addEvent("graceful_timeout.wind_down_step", {
99527
+ bonus_step: gracefulTimeoutState.bonusStepsUsed,
99528
+ bonus_max: gracefulTimeoutState.bonusStepsMax
99529
+ });
99530
+ }
99373
99531
  }
99374
99532
  if (usage) {
99375
99533
  this.tokenCounter.recordUsage(usage);
@@ -99415,22 +99573,45 @@ Double-check your response based on the criteria above. If everything looks good
99415
99573
  }
99416
99574
  const executeAIRequest = async () => {
99417
99575
  const result = await this.streamTextWithRetryAndFallback(streamOptions);
99418
- const steps = await result.steps;
99419
- let finalText;
99420
- if (steps && steps.length > 1) {
99421
- const lastStepText = steps[steps.length - 1].text;
99422
- finalText = lastStepText || await result.text;
99423
- } else {
99424
- finalText = await result.text;
99425
- }
99426
- if (this.debug) {
99427
- console.log(`[DEBUG] streamText completed: ${steps?.length || 0} steps, finalText=${finalText?.length || 0} chars`);
99576
+ let gracefulTimeoutId = null;
99577
+ let hardAbortTimeoutId = null;
99578
+ if (this.timeoutBehavior === "graceful" && gracefulTimeoutState && this.maxOperationTimeout > 0) {
99579
+ gracefulTimeoutId = setTimeout(() => {
99580
+ gracefulTimeoutState.triggered = true;
99581
+ if (this.debug) {
99582
+ console.log(`[DEBUG] Soft timeout after ${this.maxOperationTimeout}ms \u2014 entering wind-down mode (${gracefulTimeoutState.bonusStepsMax} bonus steps)`);
99583
+ }
99584
+ hardAbortTimeoutId = setTimeout(() => {
99585
+ if (this._abortController) {
99586
+ this._abortController.abort();
99587
+ }
99588
+ if (this.debug) {
99589
+ console.log(`[DEBUG] Hard abort \u2014 wind-down safety net expired after 60s`);
99590
+ }
99591
+ }, 6e4);
99592
+ }, this.maxOperationTimeout);
99428
99593
  }
99429
- const usage = await result.usage;
99430
- if (usage) {
99431
- this.tokenCounter.recordUsage(usage, result.experimental_providerMetadata);
99594
+ try {
99595
+ const steps = await result.steps;
99596
+ let finalText;
99597
+ if (steps && steps.length > 1) {
99598
+ const lastStepText = steps[steps.length - 1].text;
99599
+ finalText = lastStepText || await result.text;
99600
+ } else {
99601
+ finalText = await result.text;
99602
+ }
99603
+ if (this.debug) {
99604
+ console.log(`[DEBUG] streamText completed: ${steps?.length || 0} steps, finalText=${finalText?.length || 0} chars`);
99605
+ }
99606
+ const usage = await result.usage;
99607
+ if (usage) {
99608
+ this.tokenCounter.recordUsage(usage, result.experimental_providerMetadata);
99609
+ }
99610
+ return { finalText, result };
99611
+ } finally {
99612
+ if (gracefulTimeoutId) clearTimeout(gracefulTimeoutId);
99613
+ if (hardAbortTimeoutId) clearTimeout(hardAbortTimeoutId);
99432
99614
  }
99433
- return { finalText, result };
99434
99615
  };
99435
99616
  let aiResult;
99436
99617
  if (this.tracer) {
@@ -99467,6 +99648,50 @@ Double-check your response based on the criteria above. If everything looks good
99467
99648
  } else if (aiResult.finalText) {
99468
99649
  finalResult = aiResult.finalText;
99469
99650
  }
99651
+ if (gracefulTimeoutState.triggered) {
99652
+ const timeoutNotice = "**Note: This response was generated under a time constraint. The research may be incomplete, and some planned searches or analysis steps were not completed.**\n\n";
99653
+ if (!finalResult || finalResult === "I was unable to complete your request due to reaching the maximum number of tool iterations.") {
99654
+ try {
99655
+ const allText = await aiResult.result.text;
99656
+ if (allText && allText.trim()) {
99657
+ finalResult = timeoutNotice + allText;
99658
+ if (this.debug) {
99659
+ console.log(`[DEBUG] Graceful timeout: using concatenated step text (${allText.length} chars)`);
99660
+ }
99661
+ } else {
99662
+ const steps = await aiResult.result.steps;
99663
+ const toolSummaries = [];
99664
+ for (const step of steps || []) {
99665
+ if (step.toolResults?.length > 0) {
99666
+ for (const tr of step.toolResults) {
99667
+ const resultText = typeof tr.result === "string" ? tr.result : JSON.stringify(tr.result);
99668
+ if (resultText && resultText.length > 0 && resultText.length < 5e3) {
99669
+ toolSummaries.push(resultText.substring(0, 2e3));
99670
+ }
99671
+ }
99672
+ }
99673
+ }
99674
+ if (toolSummaries.length > 0) {
99675
+ finalResult = `${timeoutNotice}The operation timed out before a complete answer could be generated. Here is the partial information gathered:
99676
+
99677
+ ${toolSummaries.join("\n\n---\n\n")}`;
99678
+ if (this.debug) {
99679
+ console.log(`[DEBUG] Graceful timeout: built fallback from ${toolSummaries.length} tool results`);
99680
+ }
99681
+ } else {
99682
+ finalResult = "The operation timed out before enough information could be gathered to provide an answer. Please try again with a simpler query or increase the timeout.";
99683
+ }
99684
+ }
99685
+ } catch (e) {
99686
+ if (this.debug) {
99687
+ console.log(`[DEBUG] Graceful timeout fallback error: ${e.message}`);
99688
+ }
99689
+ finalResult = "The operation timed out before enough information could be gathered to provide an answer. Please try again with a simpler query or increase the timeout.";
99690
+ }
99691
+ } else {
99692
+ finalResult = timeoutNotice + finalResult;
99693
+ }
99694
+ }
99470
99695
  const resultMessages = await aiResult.result.response?.messages;
99471
99696
  if (resultMessages) {
99472
99697
  for (const msg of resultMessages) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@probelabs/probe",
3
- "version": "0.6.0-rc293",
3
+ "version": "0.6.0-rc295",
4
4
  "description": "Node.js wrapper for the probe code search tool",
5
5
  "main": "src/index.js",
6
6
  "module": "src/index.js",
@@ -106,6 +106,10 @@ export interface ProbeAgentOptions {
106
106
  requestTimeout?: number;
107
107
  /** Maximum timeout in ms for the entire operation including all retries and fallbacks (default: 300000 or MAX_OPERATION_TIMEOUT env var). This is the absolute maximum time for streamTextWithRetryAndFallback. */
108
108
  maxOperationTimeout?: number;
109
+ /** Timeout behavior: 'graceful' winds down with bonus steps giving the agent a chance to respond, 'hard' aborts immediately (default: 'graceful'). Env var: TIMEOUT_BEHAVIOR */
110
+ timeoutBehavior?: 'graceful' | 'hard';
111
+ /** Number of bonus steps during graceful timeout wind-down (default: 4, range: 1-20). Env var: GRACEFUL_TIMEOUT_BONUS_STEPS */
112
+ gracefulTimeoutBonusSteps?: number;
109
113
  }
110
114
 
111
115
  /**