npm - @probelabs/probe - Versions diffs - 0.6.0-rc293 → 0.6.0-rc295 - Mend

@probelabs/probe 0.6.0-rc293 → 0.6.0-rc295

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

package/cjs/index.cjs CHANGED Viewed

@@ -96781,6 +96781,18 @@ var init_ProbeAgent = __esm({
         if (this.debug) {
           console.log(`[DEBUG] Max operation timeout: ${this.maxOperationTimeout}ms`);
         }
+        this.timeoutBehavior = options.timeoutBehavior ?? (() => {
+          const val = process.env.TIMEOUT_BEHAVIOR;
+          if (val === "hard") return "hard";
+          return "graceful";
+        })();
+        this.gracefulTimeoutBonusSteps = options.gracefulTimeoutBonusSteps ?? (() => {
+          const parsed = parseInt(process.env.GRACEFUL_TIMEOUT_BONUS_STEPS, 10);
+          return isNaN(parsed) || parsed < 1 || parsed > 20 ? 4 : parsed;
+        })();
+        if (this.debug) {
+          console.log(`[DEBUG] Timeout behavior: ${this.timeoutBehavior}, bonus steps: ${this.gracefulTimeoutBonusSteps}`);
+        }
         this.retryConfig = options.retry || {};
         this.retryManager = null;
         this.fallbackConfig = options.fallback || null;
@@ -97531,9 +97543,13 @@ var init_ProbeAgent = __esm({
         }
         return await this.fallbackManager.executeWithFallback(
           async (provider, model, config2) => {
+            let fallbackModel = provider(model);
+            if (this.concurrencyLimiter) {
+              fallbackModel = _ProbeAgent._wrapModelWithLimiter(fallbackModel, this.concurrencyLimiter, this.debug);
+            }
             const fallbackOptions = {
               ...options,
-              model: provider(model),
+              model: fallbackModel,
               abortSignal: controller.signal
             };
             if (config2.provider !== "google" && fallbackOptions.tools) {
@@ -97561,6 +97577,132 @@ var init_ProbeAgent = __esm({
           }
         );
       }
+      /**
+       * Wrap a LanguageModelV1 model so each doStream/doGenerate call acquires and
+       * releases a concurrency limiter slot. This gates individual LLM API calls
+       * (seconds each) instead of entire multi-step agent sessions (minutes).
+       *
+       * @param {Object} model - LanguageModelV1 model instance
+       * @param {Object} limiter - Concurrency limiter with acquire/release/getStats
+       * @param {boolean} debug - Enable debug logging
+       * @returns {Object} Wrapped model with per-call concurrency gating
+       * @private
+       */
+      static _wrapModelWithLimiter(model, limiter, debug) {
+        return new Proxy(model, {
+          get(target, prop) {
+            if (prop === "doStream") {
+              return async function(...args) {
+                await limiter.acquire(null);
+                if (debug) {
+                  const stats = limiter.getStats();
+                  console.log(`[DEBUG] Acquired AI slot for LLM call (${stats.globalActive}/${stats.maxConcurrent}, queue: ${stats.queueSize})`);
+                }
+                try {
+                  const result = await target.doStream(...args);
+                  const originalStream = result.stream;
+                  const originalReader = originalStream.getReader();
+                  let released = false;
+                  const releaseOnce = () => {
+                    if (released) return;
+                    released = true;
+                    limiter.release(null);
+                  };
+                  const wrappedStream = new ReadableStream({
+                    async pull(controller) {
+                      try {
+                        const { done, value: value2 } = await originalReader.read();
+                        if (done) {
+                          controller.close();
+                          releaseOnce();
+                          if (debug) {
+                            const stats = limiter.getStats();
+                            console.log(`[DEBUG] Released AI slot after LLM stream complete (${stats.globalActive}/${stats.maxConcurrent})`);
+                          }
+                        } else {
+                          controller.enqueue(value2);
+                        }
+                      } catch (err) {
+                        releaseOnce();
+                        if (debug) {
+                          console.log(`[DEBUG] Released AI slot on LLM stream error`);
+                        }
+                        controller.error(err);
+                      }
+                    },
+                    cancel() {
+                      releaseOnce();
+                      if (debug) {
+                        console.log(`[DEBUG] Released AI slot on LLM stream cancel`);
+                      }
+                      originalReader.cancel();
+                    }
+                  });
+                  return { ...result, stream: wrappedStream };
+                } catch (err) {
+                  limiter.release(null);
+                  if (debug) {
+                    console.log(`[DEBUG] Released AI slot on doStream error`);
+                  }
+                  throw err;
+                }
+              };
+            }
+            if (prop === "doGenerate") {
+              return async function(...args) {
+                await limiter.acquire(null);
+                if (debug) {
+                  const stats = limiter.getStats();
+                  console.log(`[DEBUG] Acquired AI slot for LLM generate (${stats.globalActive}/${stats.maxConcurrent})`);
+                }
+                try {
+                  const result = await target.doGenerate(...args);
+                  return result;
+                } finally {
+                  limiter.release(null);
+                  if (debug) {
+                    const stats = limiter.getStats();
+                    console.log(`[DEBUG] Released AI slot after LLM generate (${stats.globalActive}/${stats.maxConcurrent})`);
+                  }
+                }
+              };
+            }
+            const value = target[prop];
+            return typeof value === "function" ? value.bind(target) : value;
+          }
+        });
+      }
+      /**
+       * Wrap an engine stream result so its textStream async generator acquires
+       * and releases a concurrency limiter slot. Acquire happens when iteration
+       * begins; release happens in finally (completion, error, or break).
+       *
+       * @param {Object} result - Engine result with { textStream, usage, ... }
+       * @param {Object} limiter - Concurrency limiter with acquire/release/getStats
+       * @param {boolean} debug - Enable debug logging
+       * @returns {Object} Result with wrapped textStream
+       * @private
+       */
+      static _wrapEngineStreamWithLimiter(result, limiter, debug) {
+        const originalStream = result.textStream;
+        async function* gatedStream() {
+          await limiter.acquire(null);
+          if (debug) {
+            const stats = limiter.getStats();
+            console.log(`[DEBUG] Acquired AI slot for engine stream (${stats.globalActive}/${stats.maxConcurrent}, queue: ${stats.queueSize})`);
+          }
+          try {
+            yield* originalStream;
+          } finally {
+            limiter.release(null);
+            if (debug) {
+              const stats = limiter.getStats();
+              console.log(`[DEBUG] Released AI slot after engine stream (${stats.globalActive}/${stats.maxConcurrent})`);
+            }
+          }
+        }
+        return { ...result, textStream: gatedStream() };
+      }
       /**
        * Execute streamText with retry and fallback support
        * @param {Object} options - streamText options
@@ -97569,12 +97711,8 @@ var init_ProbeAgent = __esm({
        */
       async streamTextWithRetryAndFallback(options) {
         const limiter = this.concurrencyLimiter;
-        if (limiter) {
-          await limiter.acquire(null);
-          if (this.debug) {
-            const stats = limiter.getStats();
-            console.log(`[DEBUG] Acquired global AI concurrency slot (${stats.globalActive}/${stats.maxConcurrent}, queue: ${stats.queueSize})`);
-          }
+        if (limiter && options.model) {
+          options = { ...options, model: _ProbeAgent._wrapModelWithLimiter(options.model, limiter, this.debug) };
         }
         const controller = new AbortController();
         const timeoutState = { timeoutId: null };
@@ -97588,12 +97726,16 @@ var init_ProbeAgent = __esm({
           }, { once: true });
         }
         if (this.maxOperationTimeout && this.maxOperationTimeout > 0) {
-          timeoutState.timeoutId = setTimeout(() => {
-            controller.abort();
-            if (this.debug) {
-              console.log(`[DEBUG] Operation timed out after ${this.maxOperationTimeout}ms (max operation timeout)`);
-            }
-          }, this.maxOperationTimeout);
+          const gts = this._gracefulTimeoutState;
+          if (this.timeoutBehavior === "graceful" && gts) {
+          } else {
+            timeoutState.timeoutId = setTimeout(() => {
+              controller.abort();
+              if (this.debug) {
+                console.log(`[DEBUG] Operation timed out after ${this.maxOperationTimeout}ms (max operation timeout)`);
+              }
+            }, this.maxOperationTimeout);
+          }
         }
         try {
           const useClaudeCode = this.clientApiProvider === "claude-code" || process.env.USE_CLAUDE_CODE === "true";
@@ -97602,6 +97744,9 @@ var init_ProbeAgent = __esm({
           if (useClaudeCode || useCodex) {
             try {
               result = await this._tryEngineStreamPath(options, controller, timeoutState);
+              if (result && limiter) {
+                result = _ProbeAgent._wrapEngineStreamWithLimiter(result, limiter, this.debug);
+              }
             } catch (error40) {
               if (this.debug) {
                 const engineType = useClaudeCode ? "Claude Code" : "Codex";
@@ -97612,41 +97757,7 @@ var init_ProbeAgent = __esm({
           if (!result) {
             result = await this._executeWithVercelProvider(options, controller);
           }
-          if (limiter && result.textStream) {
-            const originalStream = result.textStream;
-            const debug = this.debug;
-            const wrappedStream = (async function* () {
-              try {
-                for await (const chunk of originalStream) {
-                  yield chunk;
-                }
-              } finally {
-                limiter.release(null);
-                if (debug) {
-                  const stats = limiter.getStats();
-                  console.log(`[DEBUG] Released global AI concurrency slot (${stats.globalActive}/${stats.maxConcurrent}, queue: ${stats.queueSize})`);
-                }
-              }
-            })();
-            return new Proxy(result, {
-              get(target, prop) {
-                if (prop === "textStream") return wrappedStream;
-                const value = target[prop];
-                return typeof value === "function" ? value.bind(target) : value;
-              }
-            });
-          } else if (limiter) {
-            limiter.release(null);
-          }
           return result;
-        } catch (error40) {
-          if (limiter) {
-            limiter.release(null);
-            if (this.debug) {
-              console.log(`[DEBUG] Released global AI concurrency slot on error`);
-            }
-          }
-          throw error40;
         } finally {
           if (timeoutState.timeoutId) {
             clearTimeout(timeoutState.timeoutId);
@@ -99215,6 +99326,14 @@ You are working with a workspace. Available paths: ${workspaceDesc}
           }
           let completionPromptInjected = false;
           let preCompletionResult = null;
+          const gracefulTimeoutState = {
+            triggered: false,
+            // Set to true when soft timeout fires
+            bonusStepsUsed: 0,
+            // Steps taken after soft timeout
+            bonusStepsMax: this.gracefulTimeoutBonusSteps
+          };
+          this._gracefulTimeoutState = gracefulTimeoutState;
           let compactionAttempted = false;
           while (true) {
             try {
@@ -99224,6 +99343,15 @@ You are working with a workspace. Available paths: ${workspaceDesc}
                 messages: messagesForAI,
                 tools: tools2,
                 stopWhen: ({ steps }) => {
+                  if (gracefulTimeoutState.triggered) {
+                    if (gracefulTimeoutState.bonusStepsUsed >= gracefulTimeoutState.bonusStepsMax) {
+                      if (this.debug) {
+                        console.log(`[DEBUG] stopWhen: graceful timeout bonus steps exhausted (${gracefulTimeoutState.bonusStepsUsed}/${gracefulTimeoutState.bonusStepsMax}), forcing stop`);
+                      }
+                      return true;
+                    }
+                    return false;
+                  }
                   if (steps.length >= maxIterations) return true;
                   const lastStep = steps[steps.length - 1];
                   const modelWantsToStop = lastStep?.finishReason === "stop" && (!lastStep?.toolCalls || lastStep.toolCalls.length === 0);
@@ -99267,6 +99395,30 @@ You are working with a workspace. Available paths: ${workspaceDesc}
                   return false;
                 },
                 prepareStep: ({ steps, stepNumber }) => {
+                  if (gracefulTimeoutState.triggered) {
+                    gracefulTimeoutState.bonusStepsUsed++;
+                    const remaining = gracefulTimeoutState.bonusStepsMax - gracefulTimeoutState.bonusStepsUsed;
+                    if (gracefulTimeoutState.bonusStepsUsed === 1) {
+                      if (this.debug) {
+                        console.log(`[DEBUG] prepareStep: graceful timeout wind-down step 1/${gracefulTimeoutState.bonusStepsMax}`);
+                      }
+                      if (this.tracer) {
+                        this.tracer.addEvent("graceful_timeout.wind_down_started", {
+                          bonus_steps_max: gracefulTimeoutState.bonusStepsMax,
+                          current_iteration: currentIteration,
+                          max_iterations: maxIterations
+                        });
+                      }
+                      return {
+                        toolChoice: "none",
+                        userMessage: `\u26A0\uFE0F TIME LIMIT REACHED. You are running out of time. You have ${remaining} step(s) remaining. Provide your BEST answer NOW using the information you have already gathered. Do NOT call any more tools. Summarize your findings and respond completely. If something was not completed, honestly state what was not done and provide any partial results or recommendations you can offer.`
+                      };
+                    }
+                    if (this.debug) {
+                      console.log(`[DEBUG] prepareStep: graceful timeout wind-down step ${gracefulTimeoutState.bonusStepsUsed}/${gracefulTimeoutState.bonusStepsMax} (${remaining} remaining)`);
+                    }
+                    return { toolChoice: "none" };
+                  }
                   if (stepNumber === maxIterations - 1) {
                     return {
                       toolChoice: "none"
@@ -99370,6 +99522,12 @@ Double-check your response based on the criteria above. If everything looks good
                       }));
                     }
                     this.tracer.addEvent("iteration.step", stepEvent);
+                    if (gracefulTimeoutState.triggered) {
+                      this.tracer.addEvent("graceful_timeout.wind_down_step", {
+                        bonus_step: gracefulTimeoutState.bonusStepsUsed,
+                        bonus_max: gracefulTimeoutState.bonusStepsMax
+                      });
+                    }
                   }
                   if (usage) {
                     this.tokenCounter.recordUsage(usage);
@@ -99415,22 +99573,45 @@ Double-check your response based on the criteria above. If everything looks good
               }
               const executeAIRequest = async () => {
                 const result = await this.streamTextWithRetryAndFallback(streamOptions);
-                const steps = await result.steps;
-                let finalText;
-                if (steps && steps.length > 1) {
-                  const lastStepText = steps[steps.length - 1].text;
-                  finalText = lastStepText || await result.text;
-                } else {
-                  finalText = await result.text;
-                }
-                if (this.debug) {
-                  console.log(`[DEBUG] streamText completed: ${steps?.length || 0} steps, finalText=${finalText?.length || 0} chars`);
+                let gracefulTimeoutId = null;
+                let hardAbortTimeoutId = null;
+                if (this.timeoutBehavior === "graceful" && gracefulTimeoutState && this.maxOperationTimeout > 0) {
+                  gracefulTimeoutId = setTimeout(() => {
+                    gracefulTimeoutState.triggered = true;
+                    if (this.debug) {
+                      console.log(`[DEBUG] Soft timeout after ${this.maxOperationTimeout}ms \u2014 entering wind-down mode (${gracefulTimeoutState.bonusStepsMax} bonus steps)`);
+                    }
+                    hardAbortTimeoutId = setTimeout(() => {
+                      if (this._abortController) {
+                        this._abortController.abort();
+                      }
+                      if (this.debug) {
+                        console.log(`[DEBUG] Hard abort \u2014 wind-down safety net expired after 60s`);
+                      }
+                    }, 6e4);
+                  }, this.maxOperationTimeout);
                 }
-                const usage = await result.usage;
-                if (usage) {
-                  this.tokenCounter.recordUsage(usage, result.experimental_providerMetadata);
+                try {
+                  const steps = await result.steps;
+                  let finalText;
+                  if (steps && steps.length > 1) {
+                    const lastStepText = steps[steps.length - 1].text;
+                    finalText = lastStepText || await result.text;
+                  } else {
+                    finalText = await result.text;
+                  }
+                  if (this.debug) {
+                    console.log(`[DEBUG] streamText completed: ${steps?.length || 0} steps, finalText=${finalText?.length || 0} chars`);
+                  }
+                  const usage = await result.usage;
+                  if (usage) {
+                    this.tokenCounter.recordUsage(usage, result.experimental_providerMetadata);
+                  }
+                  return { finalText, result };
+                } finally {
+                  if (gracefulTimeoutId) clearTimeout(gracefulTimeoutId);
+                  if (hardAbortTimeoutId) clearTimeout(hardAbortTimeoutId);
                 }
-                return { finalText, result };
               };
               let aiResult;
               if (this.tracer) {
@@ -99467,6 +99648,50 @@ Double-check your response based on the criteria above. If everything looks good
               } else if (aiResult.finalText) {
                 finalResult = aiResult.finalText;
               }
+              if (gracefulTimeoutState.triggered) {
+                const timeoutNotice = "**Note: This response was generated under a time constraint. The research may be incomplete, and some planned searches or analysis steps were not completed.**\n\n";
+                if (!finalResult || finalResult === "I was unable to complete your request due to reaching the maximum number of tool iterations.") {
+                  try {
+                    const allText = await aiResult.result.text;
+                    if (allText && allText.trim()) {
+                      finalResult = timeoutNotice + allText;
+                      if (this.debug) {
+                        console.log(`[DEBUG] Graceful timeout: using concatenated step text (${allText.length} chars)`);
+                      }
+                    } else {
+                      const steps = await aiResult.result.steps;
+                      const toolSummaries = [];
+                      for (const step of steps || []) {
+                        if (step.toolResults?.length > 0) {
+                          for (const tr of step.toolResults) {
+                            const resultText = typeof tr.result === "string" ? tr.result : JSON.stringify(tr.result);
+                            if (resultText && resultText.length > 0 && resultText.length < 5e3) {
+                              toolSummaries.push(resultText.substring(0, 2e3));
+                            }
+                          }
+                        }
+                      }
+                      if (toolSummaries.length > 0) {
+                        finalResult = `${timeoutNotice}The operation timed out before a complete answer could be generated. Here is the partial information gathered:
+${toolSummaries.join("\n\n---\n\n")}`;
+                        if (this.debug) {
+                          console.log(`[DEBUG] Graceful timeout: built fallback from ${toolSummaries.length} tool results`);
+                        }
+                      } else {
+                        finalResult = "The operation timed out before enough information could be gathered to provide an answer. Please try again with a simpler query or increase the timeout.";
+                      }
+                    }
+                  } catch (e) {
+                    if (this.debug) {
+                      console.log(`[DEBUG] Graceful timeout fallback error: ${e.message}`);
+                    }
+                    finalResult = "The operation timed out before enough information could be gathered to provide an answer. Please try again with a simpler query or increase the timeout.";
+                  }
+                } else {
+                  finalResult = timeoutNotice + finalResult;
+                }
+              }
               const resultMessages = await aiResult.result.response?.messages;
               if (resultMessages) {
                 for (const msg of resultMessages) {

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@probelabs/probe",
-  "version": "0.6.0-rc293",
+  "version": "0.6.0-rc295",
   "description": "Node.js wrapper for the probe code search tool",
   "main": "src/index.js",
   "module": "src/index.js",

package/src/agent/ProbeAgent.d.ts CHANGED Viewed

@@ -106,6 +106,10 @@ export interface ProbeAgentOptions {
   requestTimeout?: number;
   /** Maximum timeout in ms for the entire operation including all retries and fallbacks (default: 300000 or MAX_OPERATION_TIMEOUT env var). This is the absolute maximum time for streamTextWithRetryAndFallback. */
   maxOperationTimeout?: number;
+  /** Timeout behavior: 'graceful' winds down with bonus steps giving the agent a chance to respond, 'hard' aborts immediately (default: 'graceful'). Env var: TIMEOUT_BEHAVIOR */
+  timeoutBehavior?: 'graceful' | 'hard';
+  /** Number of bonus steps during graceful timeout wind-down (default: 4, range: 1-20). Env var: GRACEFUL_TIMEOUT_BONUS_STEPS */
+  gracefulTimeoutBonusSteps?: number;
 }
 /**