@probelabs/probe 0.6.0-rc251 → 0.6.0-rc253
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/binaries/probe-v0.6.0-rc253-aarch64-apple-darwin.tar.gz +0 -0
- package/bin/binaries/probe-v0.6.0-rc253-aarch64-unknown-linux-musl.tar.gz +0 -0
- package/bin/binaries/probe-v0.6.0-rc253-x86_64-apple-darwin.tar.gz +0 -0
- package/bin/binaries/probe-v0.6.0-rc253-x86_64-pc-windows-msvc.zip +0 -0
- package/bin/binaries/probe-v0.6.0-rc253-x86_64-unknown-linux-musl.tar.gz +0 -0
- package/build/agent/ProbeAgent.js +65 -25
- package/build/agent/index.js +188 -71
- package/build/agent/xmlParsingUtils.js +28 -2
- package/build/tools/executePlan.js +39 -7
- package/cjs/agent/ProbeAgent.cjs +9826 -7372
- package/cjs/index.cjs +9829 -7375
- package/package.json +1 -1
- package/src/agent/ProbeAgent.js +65 -25
- package/src/agent/xmlParsingUtils.js +28 -2
- package/src/tools/executePlan.js +39 -7
- package/bin/binaries/probe-v0.6.0-rc251-aarch64-apple-darwin.tar.gz +0 -0
- package/bin/binaries/probe-v0.6.0-rc251-aarch64-unknown-linux-musl.tar.gz +0 -0
- package/bin/binaries/probe-v0.6.0-rc251-x86_64-apple-darwin.tar.gz +0 -0
- package/bin/binaries/probe-v0.6.0-rc251-x86_64-pc-windows-msvc.zip +0 -0
- package/bin/binaries/probe-v0.6.0-rc251-x86_64-unknown-linux-musl.tar.gz +0 -0
package/package.json
CHANGED
package/src/agent/ProbeAgent.js
CHANGED
|
@@ -819,6 +819,11 @@ export class ProbeAgent {
|
|
|
819
819
|
// reset at the start of each answer() call
|
|
820
820
|
this._outputBuffer = { items: [] };
|
|
821
821
|
|
|
822
|
+
// Separate accumulator for extracted RAW_OUTPUT blocks from tool results.
|
|
823
|
+
// This is distinct from _outputBuffer to prevent the cycle where:
|
|
824
|
+
// formatSuccess wraps → extract re-adds → next execute_plan re-wraps (issue #438)
|
|
825
|
+
this._extractedRawBlocks = [];
|
|
826
|
+
|
|
822
827
|
const configOptions = {
|
|
823
828
|
sessionId: this.sessionId,
|
|
824
829
|
debug: this.debug,
|
|
@@ -2910,6 +2915,8 @@ Follow these instructions carefully:
|
|
|
2910
2915
|
// Both must preserve the output buffer so the parent call can append it.
|
|
2911
2916
|
if (this._outputBuffer && !options?._schemaFormatted && !options?._completionPromptProcessed) {
|
|
2912
2917
|
this._outputBuffer.items = [];
|
|
2918
|
+
// Also reset the extracted blocks accumulator (issue #438)
|
|
2919
|
+
this._extractedRawBlocks = [];
|
|
2913
2920
|
}
|
|
2914
2921
|
|
|
2915
2922
|
// START CHECKPOINT: Initialize task management for this request
|
|
@@ -3564,7 +3571,12 @@ Follow these instructions carefully:
|
|
|
3564
3571
|
} else {
|
|
3565
3572
|
// Content was mostly/entirely inside thinking tags.
|
|
3566
3573
|
// Extract thinking content and use it as the actual answer.
|
|
3567
|
-
|
|
3574
|
+
// extractThinkingContent now handles nested thinking tags (issue #439)
|
|
3575
|
+
let thinkingContent = extractThinkingContent(prevContent);
|
|
3576
|
+
// Also apply removeThinkingTags as extra safety to catch any edge cases
|
|
3577
|
+
if (thinkingContent) {
|
|
3578
|
+
thinkingContent = removeThinkingTags(thinkingContent) || thinkingContent.replace(/<\/?thinking>/g, '');
|
|
3579
|
+
}
|
|
3568
3580
|
if (thinkingContent && thinkingContent.length > 50) {
|
|
3569
3581
|
finalResult = thinkingContent;
|
|
3570
3582
|
if (this.debug) console.log(`[DEBUG] Previous response was mostly in thinking tags — using thinking content as completion: ${finalResult.substring(0, 100)}...`);
|
|
@@ -3629,15 +3641,17 @@ Follow these instructions carefully:
|
|
|
3629
3641
|
|
|
3630
3642
|
let toolResultContent = typeof executionResult === 'string' ? executionResult : JSON.stringify(executionResult, null, 2);
|
|
3631
3643
|
|
|
3632
|
-
// Extract raw output blocks
|
|
3644
|
+
// Extract raw output blocks from tool result (before truncation)
|
|
3633
3645
|
// This prevents LLM from processing/hallucinating large structured output from execute_plan
|
|
3634
|
-
|
|
3635
|
-
|
|
3636
|
-
|
|
3637
|
-
|
|
3638
|
-
|
|
3639
|
-
|
|
3640
|
-
|
|
3646
|
+
// Push to _extractedRawBlocks (NOT _outputBuffer) to prevent the cycle where:
|
|
3647
|
+
// formatSuccess wraps → extract re-adds → next execute_plan re-wraps (issue #438)
|
|
3648
|
+
const { cleanedContent, extractedBlocks } = extractRawOutputBlocks(toolResultContent);
|
|
3649
|
+
if (extractedBlocks.length > 0) {
|
|
3650
|
+
toolResultContent = cleanedContent;
|
|
3651
|
+
// Accumulate extracted blocks separately from DSL output() buffer
|
|
3652
|
+
this._extractedRawBlocks.push(...extractedBlocks);
|
|
3653
|
+
if (this.debug) {
|
|
3654
|
+
console.log(`[DEBUG] Extracted ${extractedBlocks.length} raw output blocks (${extractedBlocks.reduce((sum, b) => sum + b.length, 0)} chars) from tool result`);
|
|
3641
3655
|
}
|
|
3642
3656
|
}
|
|
3643
3657
|
|
|
@@ -3887,15 +3901,17 @@ Follow these instructions carefully:
|
|
|
3887
3901
|
toolResultContent = toolResultContent.split(wsPrefix).join('');
|
|
3888
3902
|
}
|
|
3889
3903
|
|
|
3890
|
-
// Extract raw output blocks
|
|
3904
|
+
// Extract raw output blocks from tool result (before truncation)
|
|
3891
3905
|
// This prevents LLM from processing/hallucinating large structured output from execute_plan
|
|
3892
|
-
|
|
3893
|
-
|
|
3894
|
-
|
|
3895
|
-
|
|
3896
|
-
|
|
3897
|
-
|
|
3898
|
-
|
|
3906
|
+
// Push to _extractedRawBlocks (NOT _outputBuffer) to prevent the cycle where:
|
|
3907
|
+
// formatSuccess wraps → extract re-adds → next execute_plan re-wraps (issue #438)
|
|
3908
|
+
const { cleanedContent, extractedBlocks } = extractRawOutputBlocks(toolResultContent);
|
|
3909
|
+
if (extractedBlocks.length > 0) {
|
|
3910
|
+
toolResultContent = cleanedContent;
|
|
3911
|
+
// Accumulate extracted blocks separately from DSL output() buffer
|
|
3912
|
+
this._extractedRawBlocks.push(...extractedBlocks);
|
|
3913
|
+
if (this.debug) {
|
|
3914
|
+
console.log(`[DEBUG] Extracted ${extractedBlocks.length} raw output blocks (${extractedBlocks.reduce((sum, b) => sum + b.length, 0)} chars) from tool result`);
|
|
3899
3915
|
}
|
|
3900
3916
|
}
|
|
3901
3917
|
|
|
@@ -4314,16 +4330,18 @@ After reviewing, provide your final answer using attempt_completion.`;
|
|
|
4314
4330
|
|
|
4315
4331
|
// Make a follow-up call with the completion prompt
|
|
4316
4332
|
// Pass _completionPromptProcessed to prevent infinite loops
|
|
4317
|
-
// Save output
|
|
4333
|
+
// Save output buffers — the recursive answer() must not destroy DSL output() content
|
|
4318
4334
|
const savedOutputItems = this._outputBuffer ? [...this._outputBuffer.items] : [];
|
|
4335
|
+
const savedExtractedBlocks = this._extractedRawBlocks ? [...this._extractedRawBlocks] : [];
|
|
4319
4336
|
const completionResult = await this.answer(completionPromptMessage, [], {
|
|
4320
4337
|
...options,
|
|
4321
4338
|
_completionPromptProcessed: true
|
|
4322
4339
|
});
|
|
4323
|
-
// Restore output
|
|
4340
|
+
// Restore output buffers so the parent call can append them to the final result
|
|
4324
4341
|
if (this._outputBuffer) {
|
|
4325
4342
|
this._outputBuffer.items = savedOutputItems;
|
|
4326
4343
|
}
|
|
4344
|
+
this._extractedRawBlocks = savedExtractedBlocks;
|
|
4327
4345
|
|
|
4328
4346
|
// Update finalResult with the result from the completion prompt
|
|
4329
4347
|
finalResult = completionResult;
|
|
@@ -4782,17 +4800,38 @@ Convert your previous response content into actual JSON data that follows this s
|
|
|
4782
4800
|
}
|
|
4783
4801
|
|
|
4784
4802
|
// Remove thinking tags from final result before returning to user
|
|
4803
|
+
// Skip for valid JSON to avoid destroying JSON structure when <thinking> appears
|
|
4804
|
+
// inside string values (e.g., after tryAutoWrapForSimpleSchema embeds content with
|
|
4805
|
+
// residual thinking tag fragments — issue #439)
|
|
4785
4806
|
if (!options._schemaFormatted) {
|
|
4786
|
-
|
|
4787
|
-
|
|
4788
|
-
|
|
4807
|
+
let isValidJson = false;
|
|
4808
|
+
try {
|
|
4809
|
+
JSON.parse(finalResult);
|
|
4810
|
+
isValidJson = true;
|
|
4811
|
+
} catch {
|
|
4812
|
+
// Not valid JSON, proceed with thinking tag removal
|
|
4813
|
+
}
|
|
4814
|
+
|
|
4815
|
+
if (!isValidJson) {
|
|
4816
|
+
finalResult = removeThinkingTags(finalResult);
|
|
4817
|
+
if (this.debug) {
|
|
4818
|
+
console.log(`[DEBUG] Removed thinking tags from final result`);
|
|
4819
|
+
}
|
|
4820
|
+
} else if (this.debug) {
|
|
4821
|
+
console.log(`[DEBUG] Skipped thinking tag removal for valid JSON result (issue #439)`);
|
|
4789
4822
|
}
|
|
4790
4823
|
}
|
|
4791
4824
|
|
|
4792
4825
|
// Append DSL output buffer directly to response (bypasses LLM rewriting)
|
|
4793
4826
|
// Skip during _completionPromptProcessed — only the parent answer() should append the buffer.
|
|
4794
|
-
|
|
4795
|
-
|
|
4827
|
+
// Combine _outputBuffer (from DSL output() calls) and _extractedRawBlocks (from tool results)
|
|
4828
|
+
// Using separate accumulators prevents the cycle described in issue #438.
|
|
4829
|
+
const allOutputItems = [
|
|
4830
|
+
...(this._outputBuffer?.items || []),
|
|
4831
|
+
...(this._extractedRawBlocks || [])
|
|
4832
|
+
];
|
|
4833
|
+
if (allOutputItems.length > 0 && !options._schemaFormatted && !options._completionPromptProcessed) {
|
|
4834
|
+
const outputContent = allOutputItems.join('\n\n');
|
|
4796
4835
|
if (options.schema) {
|
|
4797
4836
|
// Schema response — the finalResult is JSON. Wrap output in RAW_OUTPUT
|
|
4798
4837
|
// delimiters so clients (visor, etc.) can extract and propagate the
|
|
@@ -4805,9 +4844,10 @@ Convert your previous response content into actual JSON data that follows this s
|
|
|
4805
4844
|
options.onStream('\n\n' + outputContent);
|
|
4806
4845
|
}
|
|
4807
4846
|
if (this.debug) {
|
|
4808
|
-
console.log(`[DEBUG] Appended ${
|
|
4847
|
+
console.log(`[DEBUG] Appended ${allOutputItems.length} output items (${outputContent.length} chars) to final result${options.schema ? ' (with RAW_OUTPUT delimiters)' : ''}`);
|
|
4809
4848
|
}
|
|
4810
4849
|
this._outputBuffer.items = [];
|
|
4850
|
+
this._extractedRawBlocks = [];
|
|
4811
4851
|
}
|
|
4812
4852
|
|
|
4813
4853
|
return finalResult;
|
|
@@ -45,12 +45,38 @@ export function removeThinkingTags(xmlString) {
|
|
|
45
45
|
|
|
46
46
|
/**
|
|
47
47
|
* Extract thinking content for potential logging
|
|
48
|
+
* Handles nested thinking tags by recursively stripping inner tags.
|
|
48
49
|
* @param {string} xmlString - The XML string to extract from
|
|
49
|
-
* @returns {string|null} - Thinking content or null if not found
|
|
50
|
+
* @returns {string|null} - Thinking content (cleaned of nested tags) or null if not found
|
|
50
51
|
*/
|
|
51
52
|
export function extractThinkingContent(xmlString) {
|
|
52
53
|
const thinkingMatch = xmlString.match(/<thinking>([\s\S]*?)<\/thinking>/);
|
|
53
|
-
|
|
54
|
+
if (!thinkingMatch) {
|
|
55
|
+
return null;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
let content = thinkingMatch[1].trim();
|
|
59
|
+
|
|
60
|
+
// Handle nested thinking tags: if the extracted content itself starts with <thinking>,
|
|
61
|
+
// recursively extract from it until we get clean content.
|
|
62
|
+
// This handles: <thinking><thinking>content</thinking></thinking>
|
|
63
|
+
// where non-greedy match captures "<thinking>content" (issue #439)
|
|
64
|
+
while (content.startsWith('<thinking>')) {
|
|
65
|
+
const innerMatch = content.match(/<thinking>([\s\S]*?)<\/thinking>/);
|
|
66
|
+
if (innerMatch) {
|
|
67
|
+
content = innerMatch[1].trim();
|
|
68
|
+
} else {
|
|
69
|
+
// Unclosed inner <thinking> tag - strip the opening tag and use remaining content
|
|
70
|
+
// e.g., "<thinking>content" becomes "content"
|
|
71
|
+
content = content.substring('<thinking>'.length).trim();
|
|
72
|
+
break;
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
// Also strip any remaining thinking tags that might be embedded in the content
|
|
77
|
+
content = content.replace(/<\/?thinking>/g, '').trim();
|
|
78
|
+
|
|
79
|
+
return content || null;
|
|
54
80
|
}
|
|
55
81
|
|
|
56
82
|
/**
|
package/src/tools/executePlan.js
CHANGED
|
@@ -65,14 +65,28 @@ function stripCodeWrapping(code) {
|
|
|
65
65
|
return s.trim();
|
|
66
66
|
}
|
|
67
67
|
|
|
68
|
+
/**
|
|
69
|
+
* Generate a unique session ID for this execute_plan invocation.
|
|
70
|
+
* Uses crypto.randomUUID if available, falls back to timestamp + random.
|
|
71
|
+
*/
|
|
72
|
+
function generatePlanSessionId(baseSessionId) {
|
|
73
|
+
const uniquePart = typeof crypto !== 'undefined' && crypto.randomUUID
|
|
74
|
+
? crypto.randomUUID().slice(0, 8)
|
|
75
|
+
: `${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
|
|
76
|
+
return `${baseSessionId || 'plan'}-${uniquePart}`;
|
|
77
|
+
}
|
|
78
|
+
|
|
68
79
|
/**
|
|
69
80
|
* Build DSL-compatible tool implementations from the agent's configOptions.
|
|
70
81
|
*
|
|
71
82
|
* @param {Object} configOptions - Agent config (sessionId, cwd, provider, model, etc.)
|
|
83
|
+
* @param {string} [planSessionId] - Unique session ID for this execute_plan invocation
|
|
72
84
|
* @returns {Object} toolImplementations for createDSLRuntime
|
|
73
85
|
*/
|
|
74
|
-
function buildToolImplementations(configOptions) {
|
|
75
|
-
const {
|
|
86
|
+
function buildToolImplementations(configOptions, planSessionId) {
|
|
87
|
+
const { cwd } = configOptions;
|
|
88
|
+
// Use planSessionId for isolated pagination per execute_plan, fall back to global sessionId
|
|
89
|
+
const sessionId = planSessionId || configOptions.sessionId;
|
|
76
90
|
const tools = {};
|
|
77
91
|
|
|
78
92
|
tools.search = {
|
|
@@ -311,9 +325,11 @@ export function createExecutePlanTool(options) {
|
|
|
311
325
|
|
|
312
326
|
/**
|
|
313
327
|
* Build or rebuild the DSL runtime.
|
|
314
|
-
* Called
|
|
328
|
+
* Called for each execute() invocation with a unique planSessionId.
|
|
329
|
+
*
|
|
330
|
+
* @param {string} [planSessionId] - Unique session ID for this execute_plan invocation
|
|
315
331
|
*/
|
|
316
|
-
function buildRuntime() {
|
|
332
|
+
function buildRuntime(planSessionId) {
|
|
317
333
|
const currentMcpBridge = getMcpBridge();
|
|
318
334
|
const currentMcpTools = getMcpTools();
|
|
319
335
|
|
|
@@ -340,7 +356,7 @@ export function createExecutePlanTool(options) {
|
|
|
340
356
|
// Agent configOptions — build everything from the agent's config
|
|
341
357
|
llmCallFn = llmCallFn || buildLLMCall(options);
|
|
342
358
|
runtimeOptions = {
|
|
343
|
-
toolImplementations: buildToolImplementations(options),
|
|
359
|
+
toolImplementations: buildToolImplementations(options, planSessionId),
|
|
344
360
|
llmCall: llmCallFn,
|
|
345
361
|
mcpBridge: currentMcpBridge,
|
|
346
362
|
mcpTools: filteredMcpTools,
|
|
@@ -360,6 +376,7 @@ export function createExecutePlanTool(options) {
|
|
|
360
376
|
|
|
361
377
|
/**
|
|
362
378
|
* Get or rebuild the runtime if MCP state has changed.
|
|
379
|
+
* @deprecated Use buildRuntime(planSessionId) directly for unique sessions per execution
|
|
363
380
|
*/
|
|
364
381
|
function getRuntime() {
|
|
365
382
|
const currentMcpBridge = getMcpBridge();
|
|
@@ -378,14 +395,22 @@ export function createExecutePlanTool(options) {
|
|
|
378
395
|
'Write simple synchronous-looking code — do NOT use async/await.',
|
|
379
396
|
parameters: executePlanSchema,
|
|
380
397
|
execute: async ({ code, description }) => {
|
|
398
|
+
// Generate a unique session ID for this execute_plan invocation
|
|
399
|
+
// This ensures search pagination is isolated per execute_plan call
|
|
400
|
+
const planSessionId = generatePlanSessionId(options.sessionId);
|
|
401
|
+
|
|
381
402
|
// Create top-level OTEL span for the entire execute_plan invocation
|
|
382
403
|
const planSpan = tracer?.createToolSpan?.('execute_plan', {
|
|
383
404
|
'dsl.description': description || '',
|
|
384
405
|
'dsl.code_length': code.length,
|
|
385
406
|
'dsl.code': code,
|
|
386
407
|
'dsl.max_retries': maxRetries,
|
|
408
|
+
'dsl.plan_session_id': planSessionId,
|
|
387
409
|
}) || null;
|
|
388
410
|
|
|
411
|
+
// Build runtime with the unique planSessionId for isolated search pagination
|
|
412
|
+
const planRuntime = buildRuntime(planSessionId);
|
|
413
|
+
|
|
389
414
|
// Strip XML tags and markdown fences LLMs sometimes wrap code in
|
|
390
415
|
let currentCode = stripCodeWrapping(code);
|
|
391
416
|
let lastError = null;
|
|
@@ -446,7 +471,7 @@ RULES REMINDER:
|
|
|
446
471
|
}
|
|
447
472
|
}
|
|
448
473
|
|
|
449
|
-
const result = await
|
|
474
|
+
const result = await planRuntime.execute(currentCode, description);
|
|
450
475
|
|
|
451
476
|
if (result.status === 'success') {
|
|
452
477
|
finalOutput = formatSuccess(result, description, attempt, outputBuffer);
|
|
@@ -574,8 +599,15 @@ function formatSuccess(result, description, attempt, outputBuffer) {
|
|
|
574
599
|
|
|
575
600
|
// Format the result value
|
|
576
601
|
const resultValue = result.result;
|
|
602
|
+
const hasOutputBufferContent = outputBuffer && outputBuffer.items && outputBuffer.items.length > 0;
|
|
577
603
|
if (resultValue === undefined || resultValue === null) {
|
|
578
|
-
|
|
604
|
+
if (hasOutputBufferContent) {
|
|
605
|
+
// output() was used but no return statement — tell LLM the script succeeded
|
|
606
|
+
const totalChars = outputBuffer.items.reduce((sum, item) => sum + item.length, 0);
|
|
607
|
+
output += `Plan completed successfully. Output captured (${totalChars} chars) via output() and will be included in the final response.`;
|
|
608
|
+
} else {
|
|
609
|
+
output += 'Plan completed (no return value).';
|
|
610
|
+
}
|
|
579
611
|
} else if (typeof resultValue === 'string') {
|
|
580
612
|
output += `Result:\n${resultValue}`;
|
|
581
613
|
} else {
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|