@probelabs/probe 0.6.0-rc288 → 0.6.0-rc290
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/binaries/probe-v0.6.0-rc290-aarch64-apple-darwin.tar.gz +0 -0
- package/bin/binaries/probe-v0.6.0-rc290-aarch64-unknown-linux-musl.tar.gz +0 -0
- package/bin/binaries/probe-v0.6.0-rc290-x86_64-apple-darwin.tar.gz +0 -0
- package/bin/binaries/probe-v0.6.0-rc290-x86_64-pc-windows-msvc.zip +0 -0
- package/bin/binaries/probe-v0.6.0-rc290-x86_64-unknown-linux-musl.tar.gz +0 -0
- package/build/agent/ProbeAgent.js +61 -10
- package/build/agent/index.js +401 -86261
- package/build/agent/shared/prompts.js +27 -6
- package/build/extract.js +4 -2
- package/build/mcp/index.js +122 -9
- package/build/mcp/index.ts +162 -17
- package/build/search.js +6 -5
- package/build/tools/vercel.js +51 -22
- package/cjs/agent/ProbeAgent.cjs +131 -38
- package/cjs/index.cjs +131 -38
- package/package.json +2 -1
- package/src/agent/ProbeAgent.js +61 -10
- package/src/agent/shared/prompts.js +27 -6
- package/src/extract.js +4 -2
- package/src/mcp/index.ts +162 -17
- package/src/search.js +6 -5
- package/src/tools/vercel.js +51 -22
- package/bin/binaries/probe-v0.6.0-rc288-aarch64-apple-darwin.tar.gz +0 -0
- package/bin/binaries/probe-v0.6.0-rc288-aarch64-unknown-linux-musl.tar.gz +0 -0
- package/bin/binaries/probe-v0.6.0-rc288-x86_64-apple-darwin.tar.gz +0 -0
- package/bin/binaries/probe-v0.6.0-rc288-x86_64-pc-windows-msvc.zip +0 -0
- package/bin/binaries/probe-v0.6.0-rc288-x86_64-unknown-linux-musl.tar.gz +0 -0
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -2976,9 +2976,9 @@ ${extractGuidance2}
|
|
|
2976
2976
|
Follow these instructions carefully:
|
|
2977
2977
|
1. Analyze the user's request.
|
|
2978
2978
|
2. Use the available tools step-by-step to fulfill the request.
|
|
2979
|
-
3. You
|
|
2980
|
-
4. Ensure to get really deep and understand the full picture before answering.
|
|
2981
|
-
5. Once the task is fully completed, provide your final answer directly as text.
|
|
2979
|
+
3. You MUST use the search tool before answering ANY code-related question. NEVER answer from memory or general knowledge — your answers must be grounded in actual code found via search/extract.${this.searchDelegate ? ' Ask natural language questions — the search subagent handles keyword formulation and returns extracted code blocks. Use extract only to expand context or read full files.' : ' Search handles stemming and case variations automatically — do NOT try keyword variations manually. Read full files only if really necessary.'}
|
|
2980
|
+
4. Ensure to get really deep and understand the full picture before answering. Follow call chains — if function A calls B, search for B too. Look for related subsystems (e.g., if asked about rate limiting, also check for quota, throttling, smoothing).
|
|
2981
|
+
5. Once the task is fully completed, provide your final answer directly as text. Always cite specific files and line numbers as evidence. Do NOT output planning or thinking text — go straight to the answer.
|
|
2982
2982
|
6. ${this.searchDelegate ? 'Ask clear, specific questions when searching. Each search should target a distinct concept or question.' : 'Prefer concise and focused search queries. Use specific keywords and phrases to narrow down results.'}
|
|
2983
2983
|
7. NEVER use bash for code exploration (no grep, cat, find, head, tail, awk, sed) — always use search and extract tools instead. Bash is only for system operations like building, running tests, or git commands.${this.allowEdit ? `
|
|
2984
2984
|
7. When modifying files, choose the appropriate tool:
|
|
@@ -3483,6 +3483,24 @@ Follow these instructions carefully:
|
|
|
3483
3483
|
if (recentTexts.every(t => detectStuckResponse(t))) return true;
|
|
3484
3484
|
}
|
|
3485
3485
|
|
|
3486
|
+
// Circuit breaker: repeated identical tool calls (e.g. model ignores dedup message)
|
|
3487
|
+
if (steps.length >= 3) {
|
|
3488
|
+
const last3 = steps.slice(-3);
|
|
3489
|
+
const allHaveTools = last3.every(s => s.toolCalls?.length === 1);
|
|
3490
|
+
if (allHaveTools) {
|
|
3491
|
+
const signatures = last3.map(s => {
|
|
3492
|
+
const tc = s.toolCalls[0];
|
|
3493
|
+
return `${tc.toolName}::${JSON.stringify(tc.args ?? tc.input)}`;
|
|
3494
|
+
});
|
|
3495
|
+
if (signatures[0] === signatures[1] && signatures[1] === signatures[2]) {
|
|
3496
|
+
if (this.debug) {
|
|
3497
|
+
console.log(`[DEBUG] Circuit breaker: 3 consecutive identical tool calls detected (${last3[0].toolCalls[0].toolName}), forcing stop`);
|
|
3498
|
+
}
|
|
3499
|
+
return true;
|
|
3500
|
+
}
|
|
3501
|
+
}
|
|
3502
|
+
}
|
|
3503
|
+
|
|
3486
3504
|
return false;
|
|
3487
3505
|
},
|
|
3488
3506
|
prepareStep: ({ steps, stepNumber }) => {
|
|
@@ -3493,6 +3511,24 @@ Follow these instructions carefully:
|
|
|
3493
3511
|
};
|
|
3494
3512
|
}
|
|
3495
3513
|
|
|
3514
|
+
// Force text-only response after 2 consecutive identical tool calls
|
|
3515
|
+
if (steps.length >= 2) {
|
|
3516
|
+
const last2 = steps.slice(-2);
|
|
3517
|
+
if (last2.every(s => s.toolCalls?.length === 1)) {
|
|
3518
|
+
const tc1 = last2[0].toolCalls[0];
|
|
3519
|
+
const tc2 = last2[1].toolCalls[0];
|
|
3520
|
+
const sig1 = `${tc1.toolName}::${JSON.stringify(tc1.args ?? tc1.input)}`;
|
|
3521
|
+
const sig2 = `${tc2.toolName}::${JSON.stringify(tc2.args ?? tc2.input)}`;
|
|
3522
|
+
if (sig1 === sig2) {
|
|
3523
|
+
if (this.debug) {
|
|
3524
|
+
console.log(`[DEBUG] prepareStep: 2 consecutive identical tool calls (${tc1.toolName}), forcing toolChoice=none`);
|
|
3525
|
+
console.log(`[DEBUG] sig: ${sig1.substring(0, 200)}`);
|
|
3526
|
+
}
|
|
3527
|
+
return { toolChoice: 'none' };
|
|
3528
|
+
}
|
|
3529
|
+
}
|
|
3530
|
+
}
|
|
3531
|
+
|
|
3496
3532
|
const lastStep = steps[steps.length - 1];
|
|
3497
3533
|
const modelJustStopped = lastStep?.finishReason === 'stop'
|
|
3498
3534
|
&& (!lastStep?.toolCalls || lastStep.toolCalls.length === 0);
|
|
@@ -3532,7 +3568,8 @@ ${resultToReview}
|
|
|
3532
3568
|
Double-check your response based on the criteria above. If everything looks good, respond with your previous answer exactly as-is. If something needs to be fixed or is missing, do it now, then respond with the COMPLETE updated answer (everything you did in total, not just the fix).`;
|
|
3533
3569
|
|
|
3534
3570
|
return {
|
|
3535
|
-
userMessage: completionPromptMessage
|
|
3571
|
+
userMessage: completionPromptMessage,
|
|
3572
|
+
toolChoice: 'none' // Force text-only review — no tool calls
|
|
3536
3573
|
};
|
|
3537
3574
|
}
|
|
3538
3575
|
}
|
|
@@ -3585,7 +3622,13 @@ Double-check your response based on the criteria above. If everything looks good
|
|
|
3585
3622
|
}
|
|
3586
3623
|
|
|
3587
3624
|
if (this.debug) {
|
|
3588
|
-
|
|
3625
|
+
const toolSummary = toolCalls?.length
|
|
3626
|
+
? toolCalls.map(tc => {
|
|
3627
|
+
const args = tc.args ? JSON.stringify(tc.args) : '';
|
|
3628
|
+
return args ? `${tc.toolName}(${debugTruncate(args, 120)})` : tc.toolName;
|
|
3629
|
+
}).join(', ')
|
|
3630
|
+
: 'none';
|
|
3631
|
+
console.log(`[DEBUG] Step ${currentIteration}/${maxIterations} finished (reason: ${finishReason}, tools: [${toolSummary}])`);
|
|
3589
3632
|
if (text) {
|
|
3590
3633
|
console.log(`[DEBUG] model text: ${debugTruncate(text)}`);
|
|
3591
3634
|
}
|
|
@@ -3627,11 +3670,20 @@ Double-check your response based on the criteria above. If everything looks good
|
|
|
3627
3670
|
const executeAIRequest = async () => {
|
|
3628
3671
|
const result = await this.streamTextWithRetryAndFallback(streamOptions);
|
|
3629
3672
|
|
|
3630
|
-
//
|
|
3631
|
-
|
|
3673
|
+
// Use only the last step's text as the final answer.
|
|
3674
|
+
// result.text concatenates ALL steps (including intermediate planning text),
|
|
3675
|
+
// but the user should only see the final answer from the last step.
|
|
3676
|
+
const steps = await result.steps;
|
|
3677
|
+
let finalText;
|
|
3678
|
+
if (steps && steps.length > 1) {
|
|
3679
|
+
// Multi-step: use last step's text (the actual answer after tool calls)
|
|
3680
|
+
const lastStepText = steps[steps.length - 1].text;
|
|
3681
|
+
finalText = lastStepText || await result.text;
|
|
3682
|
+
} else {
|
|
3683
|
+
finalText = await result.text;
|
|
3684
|
+
}
|
|
3632
3685
|
|
|
3633
3686
|
if (this.debug) {
|
|
3634
|
-
const steps = await result.steps;
|
|
3635
3687
|
console.log(`[DEBUG] streamText completed: ${steps?.length || 0} steps, finalText=${finalText?.length || 0} chars`);
|
|
3636
3688
|
}
|
|
3637
3689
|
|
|
@@ -3726,12 +3778,11 @@ Double-check your response based on the criteria above. If everything looks good
|
|
|
3726
3778
|
|
|
3727
3779
|
currentMessages.push({ role: 'user', content: completionPromptMessage });
|
|
3728
3780
|
|
|
3729
|
-
const completionMaxIterations = 5;
|
|
3730
3781
|
const completionStreamOptions = {
|
|
3731
3782
|
model: this.provider ? this.provider(this.model) : this.model,
|
|
3732
3783
|
messages: this.prepareMessagesWithImages(currentMessages),
|
|
3733
3784
|
tools,
|
|
3734
|
-
|
|
3785
|
+
toolChoice: 'none', // Force text-only response — no tool calls during review
|
|
3735
3786
|
maxTokens: maxResponseTokens,
|
|
3736
3787
|
temperature: 0.3,
|
|
3737
3788
|
onStepFinish: ({ toolResults, text, finishReason, usage }) => {
|