@probelabs/probe 0.6.0-rc287 → 0.6.0-rc290

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2976,9 +2976,9 @@ ${extractGuidance2}
2976
2976
  Follow these instructions carefully:
2977
2977
  1. Analyze the user's request.
2978
2978
  2. Use the available tools step-by-step to fulfill the request.
2979
- 3. You should always prefer the search tool for code-related questions.${this.searchDelegate ? ' Ask natural language questions — the search subagent handles keyword formulation and returns extracted code blocks. Use extract only to expand context or read full files.' : ' Search handles stemming and case variations automatically — do NOT try keyword variations manually. Read full files only if really necessary.'}
2980
- 4. Ensure to get really deep and understand the full picture before answering.
2981
- 5. Once the task is fully completed, provide your final answer directly as text.
2979
+ 3. You MUST use the search tool before answering ANY code-related question. NEVER answer from memory or general knowledge — your answers must be grounded in actual code found via search/extract.${this.searchDelegate ? ' Ask natural language questions — the search subagent handles keyword formulation and returns extracted code blocks. Use extract only to expand context or read full files.' : ' Search handles stemming and case variations automatically — do NOT try keyword variations manually. Read full files only if really necessary.'}
2980
+ 4. Ensure to get really deep and understand the full picture before answering. Follow call chains — if function A calls B, search for B too. Look for related subsystems (e.g., if asked about rate limiting, also check for quota, throttling, smoothing).
2981
+ 5. Once the task is fully completed, provide your final answer directly as text. Always cite specific files and line numbers as evidence. Do NOT output planning or thinking text — go straight to the answer.
2982
2982
  6. ${this.searchDelegate ? 'Ask clear, specific questions when searching. Each search should target a distinct concept or question.' : 'Prefer concise and focused search queries. Use specific keywords and phrases to narrow down results.'}
2983
2983
  7. NEVER use bash for code exploration (no grep, cat, find, head, tail, awk, sed) — always use search and extract tools instead. Bash is only for system operations like building, running tests, or git commands.${this.allowEdit ? `
2984
2984
  7. When modifying files, choose the appropriate tool:
@@ -3483,6 +3483,24 @@ Follow these instructions carefully:
3483
3483
  if (recentTexts.every(t => detectStuckResponse(t))) return true;
3484
3484
  }
3485
3485
 
3486
+ // Circuit breaker: repeated identical tool calls (e.g. model ignores dedup message)
3487
+ if (steps.length >= 3) {
3488
+ const last3 = steps.slice(-3);
3489
+ const allHaveTools = last3.every(s => s.toolCalls?.length === 1);
3490
+ if (allHaveTools) {
3491
+ const signatures = last3.map(s => {
3492
+ const tc = s.toolCalls[0];
3493
+ return `${tc.toolName}::${JSON.stringify(tc.args ?? tc.input)}`;
3494
+ });
3495
+ if (signatures[0] === signatures[1] && signatures[1] === signatures[2]) {
3496
+ if (this.debug) {
3497
+ console.log(`[DEBUG] Circuit breaker: 3 consecutive identical tool calls detected (${last3[0].toolCalls[0].toolName}), forcing stop`);
3498
+ }
3499
+ return true;
3500
+ }
3501
+ }
3502
+ }
3503
+
3486
3504
  return false;
3487
3505
  },
3488
3506
  prepareStep: ({ steps, stepNumber }) => {
@@ -3493,6 +3511,24 @@ Follow these instructions carefully:
3493
3511
  };
3494
3512
  }
3495
3513
 
3514
+ // Force text-only response after 2 consecutive identical tool calls
3515
+ if (steps.length >= 2) {
3516
+ const last2 = steps.slice(-2);
3517
+ if (last2.every(s => s.toolCalls?.length === 1)) {
3518
+ const tc1 = last2[0].toolCalls[0];
3519
+ const tc2 = last2[1].toolCalls[0];
3520
+ const sig1 = `${tc1.toolName}::${JSON.stringify(tc1.args ?? tc1.input)}`;
3521
+ const sig2 = `${tc2.toolName}::${JSON.stringify(tc2.args ?? tc2.input)}`;
3522
+ if (sig1 === sig2) {
3523
+ if (this.debug) {
3524
+ console.log(`[DEBUG] prepareStep: 2 consecutive identical tool calls (${tc1.toolName}), forcing toolChoice=none`);
3525
+ console.log(`[DEBUG] sig: ${sig1.substring(0, 200)}`);
3526
+ }
3527
+ return { toolChoice: 'none' };
3528
+ }
3529
+ }
3530
+ }
3531
+
3496
3532
  const lastStep = steps[steps.length - 1];
3497
3533
  const modelJustStopped = lastStep?.finishReason === 'stop'
3498
3534
  && (!lastStep?.toolCalls || lastStep.toolCalls.length === 0);
@@ -3532,7 +3568,8 @@ ${resultToReview}
3532
3568
  Double-check your response based on the criteria above. If everything looks good, respond with your previous answer exactly as-is. If something needs to be fixed or is missing, do it now, then respond with the COMPLETE updated answer (everything you did in total, not just the fix).`;
3533
3569
 
3534
3570
  return {
3535
- userMessage: completionPromptMessage
3571
+ userMessage: completionPromptMessage,
3572
+ toolChoice: 'none' // Force text-only review — no tool calls
3536
3573
  };
3537
3574
  }
3538
3575
  }
@@ -3585,7 +3622,13 @@ Double-check your response based on the criteria above. If everything looks good
3585
3622
  }
3586
3623
 
3587
3624
  if (this.debug) {
3588
- console.log(`[DEBUG] Step ${currentIteration}/${maxIterations} finished (reason: ${finishReason}, tools: ${toolResults?.length || 0})`);
3625
+ const toolSummary = toolCalls?.length
3626
+ ? toolCalls.map(tc => {
3627
+ const args = tc.args ? JSON.stringify(tc.args) : '';
3628
+ return args ? `${tc.toolName}(${debugTruncate(args, 120)})` : tc.toolName;
3629
+ }).join(', ')
3630
+ : 'none';
3631
+ console.log(`[DEBUG] Step ${currentIteration}/${maxIterations} finished (reason: ${finishReason}, tools: [${toolSummary}])`);
3589
3632
  if (text) {
3590
3633
  console.log(`[DEBUG] model text: ${debugTruncate(text)}`);
3591
3634
  }
@@ -3627,11 +3670,20 @@ Double-check your response based on the criteria above. If everything looks good
3627
3670
  const executeAIRequest = async () => {
3628
3671
  const result = await this.streamTextWithRetryAndFallback(streamOptions);
3629
3672
 
3630
- // Collect the final text
3631
- const finalText = await result.text;
3673
+ // Use only the last step's text as the final answer.
3674
+ // result.text concatenates ALL steps (including intermediate planning text),
3675
+ // but the user should only see the final answer from the last step.
3676
+ const steps = await result.steps;
3677
+ let finalText;
3678
+ if (steps && steps.length > 1) {
3679
+ // Multi-step: use last step's text (the actual answer after tool calls)
3680
+ const lastStepText = steps[steps.length - 1].text;
3681
+ finalText = lastStepText || await result.text;
3682
+ } else {
3683
+ finalText = await result.text;
3684
+ }
3632
3685
 
3633
3686
  if (this.debug) {
3634
- const steps = await result.steps;
3635
3687
  console.log(`[DEBUG] streamText completed: ${steps?.length || 0} steps, finalText=${finalText?.length || 0} chars`);
3636
3688
  }
3637
3689
 
@@ -3726,12 +3778,11 @@ Double-check your response based on the criteria above. If everything looks good
3726
3778
 
3727
3779
  currentMessages.push({ role: 'user', content: completionPromptMessage });
3728
3780
 
3729
- const completionMaxIterations = 5;
3730
3781
  const completionStreamOptions = {
3731
3782
  model: this.provider ? this.provider(this.model) : this.model,
3732
3783
  messages: this.prepareMessagesWithImages(currentMessages),
3733
3784
  tools,
3734
- stopWhen: stepCountIs(completionMaxIterations),
3785
+ toolChoice: 'none', // Force text-only response — no tool calls during review
3735
3786
  maxTokens: maxResponseTokens,
3736
3787
  temperature: 0.3,
3737
3788
  onStepFinish: ({ toolResults, text, finishReason, usage }) => {