@probelabs/probe 0.6.0-rc293 → 0.6.0-rc295

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -99870,6 +99870,18 @@ var init_ProbeAgent = __esm({
99870
99870
  if (this.debug) {
99871
99871
  console.log(`[DEBUG] Max operation timeout: ${this.maxOperationTimeout}ms`);
99872
99872
  }
99873
+ this.timeoutBehavior = options.timeoutBehavior ?? (() => {
99874
+ const val = process.env.TIMEOUT_BEHAVIOR;
99875
+ if (val === "hard") return "hard";
99876
+ return "graceful";
99877
+ })();
99878
+ this.gracefulTimeoutBonusSteps = options.gracefulTimeoutBonusSteps ?? (() => {
99879
+ const parsed = parseInt(process.env.GRACEFUL_TIMEOUT_BONUS_STEPS, 10);
99880
+ return isNaN(parsed) || parsed < 1 || parsed > 20 ? 4 : parsed;
99881
+ })();
99882
+ if (this.debug) {
99883
+ console.log(`[DEBUG] Timeout behavior: ${this.timeoutBehavior}, bonus steps: ${this.gracefulTimeoutBonusSteps}`);
99884
+ }
99873
99885
  this.retryConfig = options.retry || {};
99874
99886
  this.retryManager = null;
99875
99887
  this.fallbackConfig = options.fallback || null;
@@ -100620,9 +100632,13 @@ var init_ProbeAgent = __esm({
100620
100632
  }
100621
100633
  return await this.fallbackManager.executeWithFallback(
100622
100634
  async (provider, model, config2) => {
100635
+ let fallbackModel = provider(model);
100636
+ if (this.concurrencyLimiter) {
100637
+ fallbackModel = _ProbeAgent._wrapModelWithLimiter(fallbackModel, this.concurrencyLimiter, this.debug);
100638
+ }
100623
100639
  const fallbackOptions = {
100624
100640
  ...options,
100625
- model: provider(model),
100641
+ model: fallbackModel,
100626
100642
  abortSignal: controller.signal
100627
100643
  };
100628
100644
  if (config2.provider !== "google" && fallbackOptions.tools) {
@@ -100650,6 +100666,132 @@ var init_ProbeAgent = __esm({
100650
100666
  }
100651
100667
  );
100652
100668
  }
100669
+ /**
100670
+ * Wrap a LanguageModelV1 model so each doStream/doGenerate call acquires and
100671
+ * releases a concurrency limiter slot. This gates individual LLM API calls
100672
+ * (seconds each) instead of entire multi-step agent sessions (minutes).
100673
+ *
100674
+ * @param {Object} model - LanguageModelV1 model instance
100675
+ * @param {Object} limiter - Concurrency limiter with acquire/release/getStats
100676
+ * @param {boolean} debug - Enable debug logging
100677
+ * @returns {Object} Wrapped model with per-call concurrency gating
100678
+ * @private
100679
+ */
100680
+ static _wrapModelWithLimiter(model, limiter, debug) {
100681
+ return new Proxy(model, {
100682
+ get(target, prop) {
100683
+ if (prop === "doStream") {
100684
+ return async function(...args) {
100685
+ await limiter.acquire(null);
100686
+ if (debug) {
100687
+ const stats = limiter.getStats();
100688
+ console.log(`[DEBUG] Acquired AI slot for LLM call (${stats.globalActive}/${stats.maxConcurrent}, queue: ${stats.queueSize})`);
100689
+ }
100690
+ try {
100691
+ const result = await target.doStream(...args);
100692
+ const originalStream = result.stream;
100693
+ const originalReader = originalStream.getReader();
100694
+ let released = false;
100695
+ const releaseOnce = () => {
100696
+ if (released) return;
100697
+ released = true;
100698
+ limiter.release(null);
100699
+ };
100700
+ const wrappedStream = new ReadableStream({
100701
+ async pull(controller) {
100702
+ try {
100703
+ const { done, value: value2 } = await originalReader.read();
100704
+ if (done) {
100705
+ controller.close();
100706
+ releaseOnce();
100707
+ if (debug) {
100708
+ const stats = limiter.getStats();
100709
+ console.log(`[DEBUG] Released AI slot after LLM stream complete (${stats.globalActive}/${stats.maxConcurrent})`);
100710
+ }
100711
+ } else {
100712
+ controller.enqueue(value2);
100713
+ }
100714
+ } catch (err) {
100715
+ releaseOnce();
100716
+ if (debug) {
100717
+ console.log(`[DEBUG] Released AI slot on LLM stream error`);
100718
+ }
100719
+ controller.error(err);
100720
+ }
100721
+ },
100722
+ cancel() {
100723
+ releaseOnce();
100724
+ if (debug) {
100725
+ console.log(`[DEBUG] Released AI slot on LLM stream cancel`);
100726
+ }
100727
+ originalReader.cancel();
100728
+ }
100729
+ });
100730
+ return { ...result, stream: wrappedStream };
100731
+ } catch (err) {
100732
+ limiter.release(null);
100733
+ if (debug) {
100734
+ console.log(`[DEBUG] Released AI slot on doStream error`);
100735
+ }
100736
+ throw err;
100737
+ }
100738
+ };
100739
+ }
100740
+ if (prop === "doGenerate") {
100741
+ return async function(...args) {
100742
+ await limiter.acquire(null);
100743
+ if (debug) {
100744
+ const stats = limiter.getStats();
100745
+ console.log(`[DEBUG] Acquired AI slot for LLM generate (${stats.globalActive}/${stats.maxConcurrent})`);
100746
+ }
100747
+ try {
100748
+ const result = await target.doGenerate(...args);
100749
+ return result;
100750
+ } finally {
100751
+ limiter.release(null);
100752
+ if (debug) {
100753
+ const stats = limiter.getStats();
100754
+ console.log(`[DEBUG] Released AI slot after LLM generate (${stats.globalActive}/${stats.maxConcurrent})`);
100755
+ }
100756
+ }
100757
+ };
100758
+ }
100759
+ const value = target[prop];
100760
+ return typeof value === "function" ? value.bind(target) : value;
100761
+ }
100762
+ });
100763
+ }
100764
+ /**
100765
+ * Wrap an engine stream result so its textStream async generator acquires
100766
+ * and releases a concurrency limiter slot. Acquire happens when iteration
100767
+ * begins; release happens in finally (completion, error, or break).
100768
+ *
100769
+ * @param {Object} result - Engine result with { textStream, usage, ... }
100770
+ * @param {Object} limiter - Concurrency limiter with acquire/release/getStats
100771
+ * @param {boolean} debug - Enable debug logging
100772
+ * @returns {Object} Result with wrapped textStream
100773
+ * @private
100774
+ */
100775
+ static _wrapEngineStreamWithLimiter(result, limiter, debug) {
100776
+ const originalStream = result.textStream;
100777
+ async function* gatedStream() {
100778
+ await limiter.acquire(null);
100779
+ if (debug) {
100780
+ const stats = limiter.getStats();
100781
+ console.log(`[DEBUG] Acquired AI slot for engine stream (${stats.globalActive}/${stats.maxConcurrent}, queue: ${stats.queueSize})`);
100782
+ }
100783
+ try {
100784
+ yield* originalStream;
100785
+ } finally {
100786
+ limiter.release(null);
100787
+ if (debug) {
100788
+ const stats = limiter.getStats();
100789
+ console.log(`[DEBUG] Released AI slot after engine stream (${stats.globalActive}/${stats.maxConcurrent})`);
100790
+ }
100791
+ }
100792
+ }
100793
+ return { ...result, textStream: gatedStream() };
100794
+ }
100653
100795
  /**
100654
100796
  * Execute streamText with retry and fallback support
100655
100797
  * @param {Object} options - streamText options
@@ -100658,12 +100800,8 @@ var init_ProbeAgent = __esm({
100658
100800
  */
100659
100801
  async streamTextWithRetryAndFallback(options) {
100660
100802
  const limiter = this.concurrencyLimiter;
100661
- if (limiter) {
100662
- await limiter.acquire(null);
100663
- if (this.debug) {
100664
- const stats = limiter.getStats();
100665
- console.log(`[DEBUG] Acquired global AI concurrency slot (${stats.globalActive}/${stats.maxConcurrent}, queue: ${stats.queueSize})`);
100666
- }
100803
+ if (limiter && options.model) {
100804
+ options = { ...options, model: _ProbeAgent._wrapModelWithLimiter(options.model, limiter, this.debug) };
100667
100805
  }
100668
100806
  const controller = new AbortController();
100669
100807
  const timeoutState = { timeoutId: null };
@@ -100677,12 +100815,16 @@ var init_ProbeAgent = __esm({
100677
100815
  }, { once: true });
100678
100816
  }
100679
100817
  if (this.maxOperationTimeout && this.maxOperationTimeout > 0) {
100680
- timeoutState.timeoutId = setTimeout(() => {
100681
- controller.abort();
100682
- if (this.debug) {
100683
- console.log(`[DEBUG] Operation timed out after ${this.maxOperationTimeout}ms (max operation timeout)`);
100684
- }
100685
- }, this.maxOperationTimeout);
100818
+ const gts = this._gracefulTimeoutState;
100819
+ if (this.timeoutBehavior === "graceful" && gts) {
100820
+ } else {
100821
+ timeoutState.timeoutId = setTimeout(() => {
100822
+ controller.abort();
100823
+ if (this.debug) {
100824
+ console.log(`[DEBUG] Operation timed out after ${this.maxOperationTimeout}ms (max operation timeout)`);
100825
+ }
100826
+ }, this.maxOperationTimeout);
100827
+ }
100686
100828
  }
100687
100829
  try {
100688
100830
  const useClaudeCode = this.clientApiProvider === "claude-code" || process.env.USE_CLAUDE_CODE === "true";
@@ -100691,6 +100833,9 @@ var init_ProbeAgent = __esm({
100691
100833
  if (useClaudeCode || useCodex) {
100692
100834
  try {
100693
100835
  result = await this._tryEngineStreamPath(options, controller, timeoutState);
100836
+ if (result && limiter) {
100837
+ result = _ProbeAgent._wrapEngineStreamWithLimiter(result, limiter, this.debug);
100838
+ }
100694
100839
  } catch (error40) {
100695
100840
  if (this.debug) {
100696
100841
  const engineType = useClaudeCode ? "Claude Code" : "Codex";
@@ -100701,41 +100846,7 @@ var init_ProbeAgent = __esm({
100701
100846
  if (!result) {
100702
100847
  result = await this._executeWithVercelProvider(options, controller);
100703
100848
  }
100704
- if (limiter && result.textStream) {
100705
- const originalStream = result.textStream;
100706
- const debug = this.debug;
100707
- const wrappedStream = (async function* () {
100708
- try {
100709
- for await (const chunk of originalStream) {
100710
- yield chunk;
100711
- }
100712
- } finally {
100713
- limiter.release(null);
100714
- if (debug) {
100715
- const stats = limiter.getStats();
100716
- console.log(`[DEBUG] Released global AI concurrency slot (${stats.globalActive}/${stats.maxConcurrent}, queue: ${stats.queueSize})`);
100717
- }
100718
- }
100719
- })();
100720
- return new Proxy(result, {
100721
- get(target, prop) {
100722
- if (prop === "textStream") return wrappedStream;
100723
- const value = target[prop];
100724
- return typeof value === "function" ? value.bind(target) : value;
100725
- }
100726
- });
100727
- } else if (limiter) {
100728
- limiter.release(null);
100729
- }
100730
100849
  return result;
100731
- } catch (error40) {
100732
- if (limiter) {
100733
- limiter.release(null);
100734
- if (this.debug) {
100735
- console.log(`[DEBUG] Released global AI concurrency slot on error`);
100736
- }
100737
- }
100738
- throw error40;
100739
100850
  } finally {
100740
100851
  if (timeoutState.timeoutId) {
100741
100852
  clearTimeout(timeoutState.timeoutId);
@@ -102304,6 +102415,14 @@ You are working with a workspace. Available paths: ${workspaceDesc}
102304
102415
  }
102305
102416
  let completionPromptInjected = false;
102306
102417
  let preCompletionResult = null;
102418
+ const gracefulTimeoutState = {
102419
+ triggered: false,
102420
+ // Set to true when soft timeout fires
102421
+ bonusStepsUsed: 0,
102422
+ // Steps taken after soft timeout
102423
+ bonusStepsMax: this.gracefulTimeoutBonusSteps
102424
+ };
102425
+ this._gracefulTimeoutState = gracefulTimeoutState;
102307
102426
  let compactionAttempted = false;
102308
102427
  while (true) {
102309
102428
  try {
@@ -102313,6 +102432,15 @@ You are working with a workspace. Available paths: ${workspaceDesc}
102313
102432
  messages: messagesForAI,
102314
102433
  tools: tools2,
102315
102434
  stopWhen: ({ steps }) => {
102435
+ if (gracefulTimeoutState.triggered) {
102436
+ if (gracefulTimeoutState.bonusStepsUsed >= gracefulTimeoutState.bonusStepsMax) {
102437
+ if (this.debug) {
102438
+ console.log(`[DEBUG] stopWhen: graceful timeout bonus steps exhausted (${gracefulTimeoutState.bonusStepsUsed}/${gracefulTimeoutState.bonusStepsMax}), forcing stop`);
102439
+ }
102440
+ return true;
102441
+ }
102442
+ return false;
102443
+ }
102316
102444
  if (steps.length >= maxIterations) return true;
102317
102445
  const lastStep = steps[steps.length - 1];
102318
102446
  const modelWantsToStop = lastStep?.finishReason === "stop" && (!lastStep?.toolCalls || lastStep.toolCalls.length === 0);
@@ -102356,6 +102484,30 @@ You are working with a workspace. Available paths: ${workspaceDesc}
102356
102484
  return false;
102357
102485
  },
102358
102486
  prepareStep: ({ steps, stepNumber }) => {
102487
+ if (gracefulTimeoutState.triggered) {
102488
+ gracefulTimeoutState.bonusStepsUsed++;
102489
+ const remaining = gracefulTimeoutState.bonusStepsMax - gracefulTimeoutState.bonusStepsUsed;
102490
+ if (gracefulTimeoutState.bonusStepsUsed === 1) {
102491
+ if (this.debug) {
102492
+ console.log(`[DEBUG] prepareStep: graceful timeout wind-down step 1/${gracefulTimeoutState.bonusStepsMax}`);
102493
+ }
102494
+ if (this.tracer) {
102495
+ this.tracer.addEvent("graceful_timeout.wind_down_started", {
102496
+ bonus_steps_max: gracefulTimeoutState.bonusStepsMax,
102497
+ current_iteration: currentIteration,
102498
+ max_iterations: maxIterations
102499
+ });
102500
+ }
102501
+ return {
102502
+ toolChoice: "none",
102503
+ userMessage: `\u26A0\uFE0F TIME LIMIT REACHED. You are running out of time. You have ${remaining} step(s) remaining. Provide your BEST answer NOW using the information you have already gathered. Do NOT call any more tools. Summarize your findings and respond completely. If something was not completed, honestly state what was not done and provide any partial results or recommendations you can offer.`
102504
+ };
102505
+ }
102506
+ if (this.debug) {
102507
+ console.log(`[DEBUG] prepareStep: graceful timeout wind-down step ${gracefulTimeoutState.bonusStepsUsed}/${gracefulTimeoutState.bonusStepsMax} (${remaining} remaining)`);
102508
+ }
102509
+ return { toolChoice: "none" };
102510
+ }
102359
102511
  if (stepNumber === maxIterations - 1) {
102360
102512
  return {
102361
102513
  toolChoice: "none"
@@ -102459,6 +102611,12 @@ Double-check your response based on the criteria above. If everything looks good
102459
102611
  }));
102460
102612
  }
102461
102613
  this.tracer.addEvent("iteration.step", stepEvent);
102614
+ if (gracefulTimeoutState.triggered) {
102615
+ this.tracer.addEvent("graceful_timeout.wind_down_step", {
102616
+ bonus_step: gracefulTimeoutState.bonusStepsUsed,
102617
+ bonus_max: gracefulTimeoutState.bonusStepsMax
102618
+ });
102619
+ }
102462
102620
  }
102463
102621
  if (usage) {
102464
102622
  this.tokenCounter.recordUsage(usage);
@@ -102504,22 +102662,45 @@ Double-check your response based on the criteria above. If everything looks good
102504
102662
  }
102505
102663
  const executeAIRequest = async () => {
102506
102664
  const result = await this.streamTextWithRetryAndFallback(streamOptions);
102507
- const steps = await result.steps;
102508
- let finalText;
102509
- if (steps && steps.length > 1) {
102510
- const lastStepText = steps[steps.length - 1].text;
102511
- finalText = lastStepText || await result.text;
102512
- } else {
102513
- finalText = await result.text;
102514
- }
102515
- if (this.debug) {
102516
- console.log(`[DEBUG] streamText completed: ${steps?.length || 0} steps, finalText=${finalText?.length || 0} chars`);
102665
+ let gracefulTimeoutId = null;
102666
+ let hardAbortTimeoutId = null;
102667
+ if (this.timeoutBehavior === "graceful" && gracefulTimeoutState && this.maxOperationTimeout > 0) {
102668
+ gracefulTimeoutId = setTimeout(() => {
102669
+ gracefulTimeoutState.triggered = true;
102670
+ if (this.debug) {
102671
+ console.log(`[DEBUG] Soft timeout after ${this.maxOperationTimeout}ms \u2014 entering wind-down mode (${gracefulTimeoutState.bonusStepsMax} bonus steps)`);
102672
+ }
102673
+ hardAbortTimeoutId = setTimeout(() => {
102674
+ if (this._abortController) {
102675
+ this._abortController.abort();
102676
+ }
102677
+ if (this.debug) {
102678
+ console.log(`[DEBUG] Hard abort \u2014 wind-down safety net expired after 60s`);
102679
+ }
102680
+ }, 6e4);
102681
+ }, this.maxOperationTimeout);
102517
102682
  }
102518
- const usage = await result.usage;
102519
- if (usage) {
102520
- this.tokenCounter.recordUsage(usage, result.experimental_providerMetadata);
102683
+ try {
102684
+ const steps = await result.steps;
102685
+ let finalText;
102686
+ if (steps && steps.length > 1) {
102687
+ const lastStepText = steps[steps.length - 1].text;
102688
+ finalText = lastStepText || await result.text;
102689
+ } else {
102690
+ finalText = await result.text;
102691
+ }
102692
+ if (this.debug) {
102693
+ console.log(`[DEBUG] streamText completed: ${steps?.length || 0} steps, finalText=${finalText?.length || 0} chars`);
102694
+ }
102695
+ const usage = await result.usage;
102696
+ if (usage) {
102697
+ this.tokenCounter.recordUsage(usage, result.experimental_providerMetadata);
102698
+ }
102699
+ return { finalText, result };
102700
+ } finally {
102701
+ if (gracefulTimeoutId) clearTimeout(gracefulTimeoutId);
102702
+ if (hardAbortTimeoutId) clearTimeout(hardAbortTimeoutId);
102521
102703
  }
102522
- return { finalText, result };
102523
102704
  };
102524
102705
  let aiResult;
102525
102706
  if (this.tracer) {
@@ -102556,6 +102737,50 @@ Double-check your response based on the criteria above. If everything looks good
102556
102737
  } else if (aiResult.finalText) {
102557
102738
  finalResult = aiResult.finalText;
102558
102739
  }
102740
+ if (gracefulTimeoutState.triggered) {
102741
+ const timeoutNotice = "**Note: This response was generated under a time constraint. The research may be incomplete, and some planned searches or analysis steps were not completed.**\n\n";
102742
+ if (!finalResult || finalResult === "I was unable to complete your request due to reaching the maximum number of tool iterations.") {
102743
+ try {
102744
+ const allText = await aiResult.result.text;
102745
+ if (allText && allText.trim()) {
102746
+ finalResult = timeoutNotice + allText;
102747
+ if (this.debug) {
102748
+ console.log(`[DEBUG] Graceful timeout: using concatenated step text (${allText.length} chars)`);
102749
+ }
102750
+ } else {
102751
+ const steps = await aiResult.result.steps;
102752
+ const toolSummaries = [];
102753
+ for (const step of steps || []) {
102754
+ if (step.toolResults?.length > 0) {
102755
+ for (const tr of step.toolResults) {
102756
+ const resultText = typeof tr.result === "string" ? tr.result : JSON.stringify(tr.result);
102757
+ if (resultText && resultText.length > 0 && resultText.length < 5e3) {
102758
+ toolSummaries.push(resultText.substring(0, 2e3));
102759
+ }
102760
+ }
102761
+ }
102762
+ }
102763
+ if (toolSummaries.length > 0) {
102764
+ finalResult = `${timeoutNotice}The operation timed out before a complete answer could be generated. Here is the partial information gathered:
102765
+
102766
+ ${toolSummaries.join("\n\n---\n\n")}`;
102767
+ if (this.debug) {
102768
+ console.log(`[DEBUG] Graceful timeout: built fallback from ${toolSummaries.length} tool results`);
102769
+ }
102770
+ } else {
102771
+ finalResult = "The operation timed out before enough information could be gathered to provide an answer. Please try again with a simpler query or increase the timeout.";
102772
+ }
102773
+ }
102774
+ } catch (e) {
102775
+ if (this.debug) {
102776
+ console.log(`[DEBUG] Graceful timeout fallback error: ${e.message}`);
102777
+ }
102778
+ finalResult = "The operation timed out before enough information could be gathered to provide an answer. Please try again with a simpler query or increase the timeout.";
102779
+ }
102780
+ } else {
102781
+ finalResult = timeoutNotice + finalResult;
102782
+ }
102783
+ }
102559
102784
  const resultMessages = await aiResult.result.response?.messages;
102560
102785
  if (resultMessages) {
102561
102786
  for (const msg of resultMessages) {