@probelabs/probe 0.6.0-rc288 → 0.6.0-rc291

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. package/bin/binaries/probe-v0.6.0-rc291-aarch64-apple-darwin.tar.gz +0 -0
  2. package/bin/binaries/probe-v0.6.0-rc291-aarch64-unknown-linux-musl.tar.gz +0 -0
  3. package/bin/binaries/probe-v0.6.0-rc291-x86_64-apple-darwin.tar.gz +0 -0
  4. package/bin/binaries/probe-v0.6.0-rc291-x86_64-pc-windows-msvc.zip +0 -0
  5. package/bin/binaries/probe-v0.6.0-rc291-x86_64-unknown-linux-musl.tar.gz +0 -0
  6. package/build/agent/ProbeAgent.js +95 -14
  7. package/build/agent/index.js +401 -86261
  8. package/build/agent/shared/prompts.js +27 -6
  9. package/build/extract.js +4 -2
  10. package/build/mcp/index.js +122 -9
  11. package/build/mcp/index.ts +162 -17
  12. package/build/search.js +6 -5
  13. package/build/tools/vercel.js +56 -23
  14. package/build/utils/error-types.js +2 -2
  15. package/build/utils/path-validation.js +1 -1
  16. package/cjs/agent/ProbeAgent.cjs +193 -45
  17. package/cjs/index.cjs +193 -45
  18. package/package.json +2 -1
  19. package/src/agent/ProbeAgent.js +95 -14
  20. package/src/agent/shared/prompts.js +27 -6
  21. package/src/extract.js +4 -2
  22. package/src/mcp/index.ts +162 -17
  23. package/src/search.js +6 -5
  24. package/src/tools/vercel.js +56 -23
  25. package/src/utils/error-types.js +2 -2
  26. package/src/utils/path-validation.js +1 -1
  27. package/bin/binaries/probe-v0.6.0-rc288-aarch64-apple-darwin.tar.gz +0 -0
  28. package/bin/binaries/probe-v0.6.0-rc288-aarch64-unknown-linux-musl.tar.gz +0 -0
  29. package/bin/binaries/probe-v0.6.0-rc288-x86_64-apple-darwin.tar.gz +0 -0
  30. package/bin/binaries/probe-v0.6.0-rc288-x86_64-pc-windows-msvc.zip +0 -0
  31. package/bin/binaries/probe-v0.6.0-rc288-x86_64-unknown-linux-musl.tar.gz +0 -0
@@ -1444,11 +1444,13 @@ export class ProbeAgent {
1444
1444
  result = await this._executeWithVercelProvider(options, controller);
1445
1445
  }
1446
1446
 
1447
- // Wrap textStream so limiter slot is held until stream completes
1447
+ // Wrap textStream so limiter slot is held until stream completes.
1448
+ // result.textStream is a read-only getter on DefaultStreamTextResult,
1449
+ // so we wrap the result in a Proxy that intercepts the textStream property.
1448
1450
  if (limiter && result.textStream) {
1449
1451
  const originalStream = result.textStream;
1450
1452
  const debug = this.debug;
1451
- result.textStream = (async function* () {
1453
+ const wrappedStream = (async function* () {
1452
1454
  try {
1453
1455
  for await (const chunk of originalStream) {
1454
1456
  yield chunk;
@@ -1461,6 +1463,13 @@ export class ProbeAgent {
1461
1463
  }
1462
1464
  }
1463
1465
  })();
1466
+ return new Proxy(result, {
1467
+ get(target, prop) {
1468
+ if (prop === 'textStream') return wrappedStream;
1469
+ const value = target[prop];
1470
+ return typeof value === 'function' ? value.bind(target) : value;
1471
+ }
1472
+ });
1464
1473
  } else if (limiter) {
1465
1474
  // No textStream (shouldn't happen, but release just in case)
1466
1475
  limiter.release(null);
@@ -2976,9 +2985,9 @@ ${extractGuidance2}
2976
2985
  Follow these instructions carefully:
2977
2986
  1. Analyze the user's request.
2978
2987
  2. Use the available tools step-by-step to fulfill the request.
2979
- 3. You should always prefer the search tool for code-related questions.${this.searchDelegate ? ' Ask natural language questions — the search subagent handles keyword formulation and returns extracted code blocks. Use extract only to expand context or read full files.' : ' Search handles stemming and case variations automatically — do NOT try keyword variations manually. Read full files only if really necessary.'}
2980
- 4. Ensure to get really deep and understand the full picture before answering.
2981
- 5. Once the task is fully completed, provide your final answer directly as text.
2988
+ 3. You MUST use the search tool before answering ANY code-related question. NEVER answer from memory or general knowledge — your answers must be grounded in actual code found via search/extract.${this.searchDelegate ? ' Ask natural language questions — the search subagent handles keyword formulation and returns extracted code blocks. Use extract only to expand context or read full files.' : ' Search handles stemming and case variations automatically — do NOT try keyword variations manually. Read full files only if really necessary.'}
2989
+ 4. Ensure to get really deep and understand the full picture before answering. Follow call chains — if function A calls B, search for B too. Look for related subsystems (e.g., if asked about rate limiting, also check for quota, throttling, smoothing).
2990
+ 5. Once the task is fully completed, provide your final answer directly as text. Always cite specific files and line numbers as evidence. Do NOT output planning or thinking text — go straight to the answer.
2982
2991
  6. ${this.searchDelegate ? 'Ask clear, specific questions when searching. Each search should target a distinct concept or question.' : 'Prefer concise and focused search queries. Use specific keywords and phrases to narrow down results.'}
2983
2992
  7. NEVER use bash for code exploration (no grep, cat, find, head, tail, awk, sed) — always use search and extract tools instead. Bash is only for system operations like building, running tests, or git commands.${this.allowEdit ? `
2984
2993
  7. When modifying files, choose the appropriate tool:
@@ -3483,6 +3492,25 @@ Follow these instructions carefully:
3483
3492
  if (recentTexts.every(t => detectStuckResponse(t))) return true;
3484
3493
  }
3485
3494
 
3495
+ // Circuit breaker: repeated identical tool calls (e.g. model ignores dedup message)
3496
+ if (steps.length >= 3) {
3497
+ const last3 = steps.slice(-3);
3498
+ const allHaveTools = last3.every(s => s.toolCalls?.length === 1);
3499
+ if (allHaveTools) {
3500
+ const signatures = last3.map(s => {
3501
+ const tc = s.toolCalls[0];
3502
+ return `${tc.toolName}::${JSON.stringify(tc.args ?? tc.input)}`;
3503
+ });
3504
+ if (signatures[0] === signatures[1] && signatures[1] === signatures[2]) {
3505
+ if (this.debug) {
3506
+ console.log(`[DEBUG] Circuit breaker: 3 consecutive identical tool calls detected (${last3[0].toolCalls[0].toolName}), forcing stop`);
3507
+ }
3508
+ return true;
3509
+ }
3510
+ }
3511
+
3512
+ }
3513
+
3486
3514
  return false;
3487
3515
  },
3488
3516
  prepareStep: ({ steps, stepNumber }) => {
@@ -3493,6 +3521,42 @@ Follow these instructions carefully:
3493
3521
  };
3494
3522
  }
3495
3523
 
3524
+ // Force text-only response after 2 consecutive identical tool calls
3525
+ if (steps.length >= 2) {
3526
+ const last2 = steps.slice(-2);
3527
+ if (last2.every(s => s.toolCalls?.length === 1)) {
3528
+ const tc1 = last2[0].toolCalls[0];
3529
+ const tc2 = last2[1].toolCalls[0];
3530
+ const sig1 = `${tc1.toolName}::${JSON.stringify(tc1.args ?? tc1.input)}`;
3531
+ const sig2 = `${tc2.toolName}::${JSON.stringify(tc2.args ?? tc2.input)}`;
3532
+ if (sig1 === sig2) {
3533
+ if (this.debug) {
3534
+ console.log(`[DEBUG] prepareStep: 2 consecutive identical tool calls (${tc1.toolName}), forcing toolChoice=none`);
3535
+ console.log(`[DEBUG] sig: ${sig1.substring(0, 200)}`);
3536
+ }
3537
+ return { toolChoice: 'none' };
3538
+ }
3539
+ }
3540
+ }
3541
+
3542
+ // Force text-only response after 3 consecutive tool errors
3543
+ // (e.g. workspace deleted mid-run — let the model produce its answer)
3544
+ if (steps.length >= 3) {
3545
+ const last3 = steps.slice(-3);
3546
+ const allErrors = last3.every(s =>
3547
+ s.toolResults?.length > 0 && s.toolResults.every(tr => {
3548
+ const r = typeof tr.result === 'string' ? tr.result : '';
3549
+ return r.includes('<error ') || r.includes('does not exist');
3550
+ })
3551
+ );
3552
+ if (allErrors) {
3553
+ if (this.debug) {
3554
+ console.log(`[DEBUG] prepareStep: 3 consecutive tool errors, forcing toolChoice=none`);
3555
+ }
3556
+ return { toolChoice: 'none' };
3557
+ }
3558
+ }
3559
+
3496
3560
  const lastStep = steps[steps.length - 1];
3497
3561
  const modelJustStopped = lastStep?.finishReason === 'stop'
3498
3562
  && (!lastStep?.toolCalls || lastStep.toolCalls.length === 0);
@@ -3529,10 +3593,12 @@ Here is the result to review:
3529
3593
  ${resultToReview}
3530
3594
  </result>
3531
3595
 
3532
- Double-check your response based on the criteria above. If everything looks good, respond with your previous answer exactly as-is. If something needs to be fixed or is missing, do it now, then respond with the COMPLETE updated answer (everything you did in total, not just the fix).`;
3596
+ IMPORTANT: First review ALL completed work in the conversation above before taking any action.
3597
+ Double-check your response based on the criteria above. If everything looks good, respond with your previous answer exactly as-is. If your text has inaccuracies, fix the text. Only call a tool if you find a genuinely MISSING action — NEVER redo work that was already completed successfully. Respond with the COMPLETE corrected answer.`;
3533
3598
 
3534
3599
  return {
3535
- userMessage: completionPromptMessage
3600
+ userMessage: completionPromptMessage,
3601
+ toolChoice: 'none' // Force text-only review — no tool calls
3536
3602
  };
3537
3603
  }
3538
3604
  }
@@ -3585,7 +3651,13 @@ Double-check your response based on the criteria above. If everything looks good
3585
3651
  }
3586
3652
 
3587
3653
  if (this.debug) {
3588
- console.log(`[DEBUG] Step ${currentIteration}/${maxIterations} finished (reason: ${finishReason}, tools: ${toolResults?.length || 0})`);
3654
+ const toolSummary = toolCalls?.length
3655
+ ? toolCalls.map(tc => {
3656
+ const args = tc.args ? JSON.stringify(tc.args) : '';
3657
+ return args ? `${tc.toolName}(${debugTruncate(args, 120)})` : tc.toolName;
3658
+ }).join(', ')
3659
+ : 'none';
3660
+ console.log(`[DEBUG] Step ${currentIteration}/${maxIterations} finished (reason: ${finishReason}, tools: [${toolSummary}])`);
3589
3661
  if (text) {
3590
3662
  console.log(`[DEBUG] model text: ${debugTruncate(text)}`);
3591
3663
  }
@@ -3627,11 +3699,20 @@ Double-check your response based on the criteria above. If everything looks good
3627
3699
  const executeAIRequest = async () => {
3628
3700
  const result = await this.streamTextWithRetryAndFallback(streamOptions);
3629
3701
 
3630
- // Collect the final text
3631
- const finalText = await result.text;
3702
+ // Use only the last step's text as the final answer.
3703
+ // result.text concatenates ALL steps (including intermediate planning text),
3704
+ // but the user should only see the final answer from the last step.
3705
+ const steps = await result.steps;
3706
+ let finalText;
3707
+ if (steps && steps.length > 1) {
3708
+ // Multi-step: use last step's text (the actual answer after tool calls)
3709
+ const lastStepText = steps[steps.length - 1].text;
3710
+ finalText = lastStepText || await result.text;
3711
+ } else {
3712
+ finalText = await result.text;
3713
+ }
3632
3714
 
3633
3715
  if (this.debug) {
3634
- const steps = await result.steps;
3635
3716
  console.log(`[DEBUG] streamText completed: ${steps?.length || 0} steps, finalText=${finalText?.length || 0} chars`);
3636
3717
  }
3637
3718
 
@@ -3722,16 +3803,16 @@ Here is the result to review:
3722
3803
  ${finalResult}
3723
3804
  </result>
3724
3805
 
3725
- Double-check your response based on the criteria above. If everything looks good, respond with your previous answer exactly as-is. If something needs to be fixed or is missing, do it now, then respond with the COMPLETE updated answer (everything you did in total, not just the fix).`;
3806
+ IMPORTANT: First review ALL completed work in the conversation above before taking any action.
3807
+ Double-check your response based on the criteria above. If everything looks good, respond with your previous answer exactly as-is. If your text has inaccuracies, fix the text. Only call a tool if you find a genuinely MISSING action — NEVER redo work that was already completed successfully. Respond with the COMPLETE corrected answer.`;
3726
3808
 
3727
3809
  currentMessages.push({ role: 'user', content: completionPromptMessage });
3728
3810
 
3729
- const completionMaxIterations = 5;
3730
3811
  const completionStreamOptions = {
3731
3812
  model: this.provider ? this.provider(this.model) : this.model,
3732
3813
  messages: this.prepareMessagesWithImages(currentMessages),
3733
3814
  tools,
3734
- stopWhen: stepCountIs(completionMaxIterations),
3815
+ toolChoice: 'none', // Force text-only response — no tool calls during review
3735
3816
  maxTokens: maxResponseTokens,
3736
3817
  temperature: 0.3,
3737
3818
  onStepFinish: ({ toolResults, text, finishReason, usage }) => {