@probelabs/probe 0.6.0-rc251 → 0.6.0-rc253

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@probelabs/probe",
3
- "version": "0.6.0-rc251",
3
+ "version": "0.6.0-rc253",
4
4
  "description": "Node.js wrapper for the probe code search tool",
5
5
  "main": "src/index.js",
6
6
  "module": "src/index.js",
@@ -819,6 +819,11 @@ export class ProbeAgent {
819
819
  // reset at the start of each answer() call
820
820
  this._outputBuffer = { items: [] };
821
821
 
822
+ // Separate accumulator for extracted RAW_OUTPUT blocks from tool results.
823
+ // This is distinct from _outputBuffer to prevent the cycle where:
824
+ // formatSuccess wraps → extract re-adds → next execute_plan re-wraps (issue #438)
825
+ this._extractedRawBlocks = [];
826
+
822
827
  const configOptions = {
823
828
  sessionId: this.sessionId,
824
829
  debug: this.debug,
@@ -2910,6 +2915,8 @@ Follow these instructions carefully:
2910
2915
  // Both must preserve the output buffer so the parent call can append it.
2911
2916
  if (this._outputBuffer && !options?._schemaFormatted && !options?._completionPromptProcessed) {
2912
2917
  this._outputBuffer.items = [];
2918
+ // Also reset the extracted blocks accumulator (issue #438)
2919
+ this._extractedRawBlocks = [];
2913
2920
  }
2914
2921
 
2915
2922
  // START CHECKPOINT: Initialize task management for this request
@@ -3564,7 +3571,12 @@ Follow these instructions carefully:
3564
3571
  } else {
3565
3572
  // Content was mostly/entirely inside thinking tags.
3566
3573
  // Extract thinking content and use it as the actual answer.
3567
- const thinkingContent = extractThinkingContent(prevContent);
3574
+ // extractThinkingContent now handles nested thinking tags (issue #439)
3575
+ let thinkingContent = extractThinkingContent(prevContent);
3576
+ // Also apply removeThinkingTags as extra safety to catch any edge cases
3577
+ if (thinkingContent) {
3578
+ thinkingContent = removeThinkingTags(thinkingContent) || thinkingContent.replace(/<\/?thinking>/g, '');
3579
+ }
3568
3580
  if (thinkingContent && thinkingContent.length > 50) {
3569
3581
  finalResult = thinkingContent;
3570
3582
  if (this.debug) console.log(`[DEBUG] Previous response was mostly in thinking tags — using thinking content as completion: ${finalResult.substring(0, 100)}...`);
@@ -3629,15 +3641,17 @@ Follow these instructions carefully:
3629
3641
 
3630
3642
  let toolResultContent = typeof executionResult === 'string' ? executionResult : JSON.stringify(executionResult, null, 2);
3631
3643
 
3632
- // Extract raw output blocks and pass them through to output buffer (before truncation)
3644
+ // Extract raw output blocks from tool result (before truncation)
3633
3645
  // This prevents LLM from processing/hallucinating large structured output from execute_plan
3634
- if (this._outputBuffer) {
3635
- const { cleanedContent, extractedBlocks } = extractRawOutputBlocks(toolResultContent, this._outputBuffer);
3636
- if (extractedBlocks.length > 0) {
3637
- toolResultContent = cleanedContent;
3638
- if (this.debug) {
3639
- console.log(`[DEBUG] Extracted ${extractedBlocks.length} raw output blocks (${extractedBlocks.reduce((sum, b) => sum + b.length, 0)} chars) to output buffer`);
3640
- }
3646
+ // Push to _extractedRawBlocks (NOT _outputBuffer) to prevent the cycle where:
3647
+ // formatSuccess wraps extract re-adds → next execute_plan re-wraps (issue #438)
3648
+ const { cleanedContent, extractedBlocks } = extractRawOutputBlocks(toolResultContent);
3649
+ if (extractedBlocks.length > 0) {
3650
+ toolResultContent = cleanedContent;
3651
+ // Accumulate extracted blocks separately from DSL output() buffer
3652
+ this._extractedRawBlocks.push(...extractedBlocks);
3653
+ if (this.debug) {
3654
+ console.log(`[DEBUG] Extracted ${extractedBlocks.length} raw output blocks (${extractedBlocks.reduce((sum, b) => sum + b.length, 0)} chars) from tool result`);
3641
3655
  }
3642
3656
  }
3643
3657
 
@@ -3887,15 +3901,17 @@ Follow these instructions carefully:
3887
3901
  toolResultContent = toolResultContent.split(wsPrefix).join('');
3888
3902
  }
3889
3903
 
3890
- // Extract raw output blocks and pass them through to output buffer (before truncation)
3904
+ // Extract raw output blocks from tool result (before truncation)
3891
3905
  // This prevents LLM from processing/hallucinating large structured output from execute_plan
3892
- if (this._outputBuffer) {
3893
- const { cleanedContent, extractedBlocks } = extractRawOutputBlocks(toolResultContent, this._outputBuffer);
3894
- if (extractedBlocks.length > 0) {
3895
- toolResultContent = cleanedContent;
3896
- if (this.debug) {
3897
- console.log(`[DEBUG] Extracted ${extractedBlocks.length} raw output blocks (${extractedBlocks.reduce((sum, b) => sum + b.length, 0)} chars) to output buffer`);
3898
- }
3906
+ // Push to _extractedRawBlocks (NOT _outputBuffer) to prevent the cycle where:
3907
+ // formatSuccess wraps extract re-adds → next execute_plan re-wraps (issue #438)
3908
+ const { cleanedContent, extractedBlocks } = extractRawOutputBlocks(toolResultContent);
3909
+ if (extractedBlocks.length > 0) {
3910
+ toolResultContent = cleanedContent;
3911
+ // Accumulate extracted blocks separately from DSL output() buffer
3912
+ this._extractedRawBlocks.push(...extractedBlocks);
3913
+ if (this.debug) {
3914
+ console.log(`[DEBUG] Extracted ${extractedBlocks.length} raw output blocks (${extractedBlocks.reduce((sum, b) => sum + b.length, 0)} chars) from tool result`);
3899
3915
  }
3900
3916
  }
3901
3917
 
@@ -4314,16 +4330,18 @@ After reviewing, provide your final answer using attempt_completion.`;
4314
4330
 
4315
4331
  // Make a follow-up call with the completion prompt
4316
4332
  // Pass _completionPromptProcessed to prevent infinite loops
4317
- // Save output buffer — the recursive answer() must not destroy DSL output() content
4333
+ // Save output buffers — the recursive answer() must not destroy DSL output() content
4318
4334
  const savedOutputItems = this._outputBuffer ? [...this._outputBuffer.items] : [];
4335
+ const savedExtractedBlocks = this._extractedRawBlocks ? [...this._extractedRawBlocks] : [];
4319
4336
  const completionResult = await this.answer(completionPromptMessage, [], {
4320
4337
  ...options,
4321
4338
  _completionPromptProcessed: true
4322
4339
  });
4323
- // Restore output buffer so the parent call can append it to the final result
4340
+ // Restore output buffers so the parent call can append them to the final result
4324
4341
  if (this._outputBuffer) {
4325
4342
  this._outputBuffer.items = savedOutputItems;
4326
4343
  }
4344
+ this._extractedRawBlocks = savedExtractedBlocks;
4327
4345
 
4328
4346
  // Update finalResult with the result from the completion prompt
4329
4347
  finalResult = completionResult;
@@ -4782,17 +4800,38 @@ Convert your previous response content into actual JSON data that follows this s
4782
4800
  }
4783
4801
 
4784
4802
  // Remove thinking tags from final result before returning to user
4803
+ // Skip for valid JSON to avoid destroying JSON structure when <thinking> appears
4804
+ // inside string values (e.g., after tryAutoWrapForSimpleSchema embeds content with
4805
+ // residual thinking tag fragments — issue #439)
4785
4806
  if (!options._schemaFormatted) {
4786
- finalResult = removeThinkingTags(finalResult);
4787
- if (this.debug) {
4788
- console.log(`[DEBUG] Removed thinking tags from final result`);
4807
+ let isValidJson = false;
4808
+ try {
4809
+ JSON.parse(finalResult);
4810
+ isValidJson = true;
4811
+ } catch {
4812
+ // Not valid JSON, proceed with thinking tag removal
4813
+ }
4814
+
4815
+ if (!isValidJson) {
4816
+ finalResult = removeThinkingTags(finalResult);
4817
+ if (this.debug) {
4818
+ console.log(`[DEBUG] Removed thinking tags from final result`);
4819
+ }
4820
+ } else if (this.debug) {
4821
+ console.log(`[DEBUG] Skipped thinking tag removal for valid JSON result (issue #439)`);
4789
4822
  }
4790
4823
  }
4791
4824
 
4792
4825
  // Append DSL output buffer directly to response (bypasses LLM rewriting)
4793
4826
  // Skip during _completionPromptProcessed — only the parent answer() should append the buffer.
4794
- if (this._outputBuffer && this._outputBuffer.items.length > 0 && !options._schemaFormatted && !options._completionPromptProcessed) {
4795
- const outputContent = this._outputBuffer.items.join('\n\n');
4827
+ // Combine _outputBuffer (from DSL output() calls) and _extractedRawBlocks (from tool results)
4828
+ // Using separate accumulators prevents the cycle described in issue #438.
4829
+ const allOutputItems = [
4830
+ ...(this._outputBuffer?.items || []),
4831
+ ...(this._extractedRawBlocks || [])
4832
+ ];
4833
+ if (allOutputItems.length > 0 && !options._schemaFormatted && !options._completionPromptProcessed) {
4834
+ const outputContent = allOutputItems.join('\n\n');
4796
4835
  if (options.schema) {
4797
4836
  // Schema response — the finalResult is JSON. Wrap output in RAW_OUTPUT
4798
4837
  // delimiters so clients (visor, etc.) can extract and propagate the
@@ -4805,9 +4844,10 @@ Convert your previous response content into actual JSON data that follows this s
4805
4844
  options.onStream('\n\n' + outputContent);
4806
4845
  }
4807
4846
  if (this.debug) {
4808
- console.log(`[DEBUG] Appended ${this._outputBuffer.items.length} output buffer items (${outputContent.length} chars) to final result${options.schema ? ' (with RAW_OUTPUT delimiters)' : ''}`);
4847
+ console.log(`[DEBUG] Appended ${allOutputItems.length} output items (${outputContent.length} chars) to final result${options.schema ? ' (with RAW_OUTPUT delimiters)' : ''}`);
4809
4848
  }
4810
4849
  this._outputBuffer.items = [];
4850
+ this._extractedRawBlocks = [];
4811
4851
  }
4812
4852
 
4813
4853
  return finalResult;
@@ -45,12 +45,38 @@ export function removeThinkingTags(xmlString) {
45
45
 
46
46
  /**
47
47
  * Extract thinking content for potential logging
48
+ * Handles nested thinking tags by recursively stripping inner tags.
48
49
  * @param {string} xmlString - The XML string to extract from
49
- * @returns {string|null} - Thinking content or null if not found
50
+ * @returns {string|null} - Thinking content (cleaned of nested tags) or null if not found
50
51
  */
51
52
  export function extractThinkingContent(xmlString) {
52
53
  const thinkingMatch = xmlString.match(/<thinking>([\s\S]*?)<\/thinking>/);
53
- return thinkingMatch ? thinkingMatch[1].trim() : null;
54
+ if (!thinkingMatch) {
55
+ return null;
56
+ }
57
+
58
+ let content = thinkingMatch[1].trim();
59
+
60
+ // Handle nested thinking tags: if the extracted content itself starts with <thinking>,
61
+ // recursively extract from it until we get clean content.
62
+ // This handles: <thinking><thinking>content</thinking></thinking>
63
+ // where non-greedy match captures "<thinking>content" (issue #439)
64
+ while (content.startsWith('<thinking>')) {
65
+ const innerMatch = content.match(/<thinking>([\s\S]*?)<\/thinking>/);
66
+ if (innerMatch) {
67
+ content = innerMatch[1].trim();
68
+ } else {
69
+ // Unclosed inner <thinking> tag - strip the opening tag and use remaining content
70
+ // e.g., "<thinking>content" becomes "content"
71
+ content = content.substring('<thinking>'.length).trim();
72
+ break;
73
+ }
74
+ }
75
+
76
+ // Also strip any remaining thinking tags that might be embedded in the content
77
+ content = content.replace(/<\/?thinking>/g, '').trim();
78
+
79
+ return content || null;
54
80
  }
55
81
 
56
82
  /**
@@ -65,14 +65,28 @@ function stripCodeWrapping(code) {
65
65
  return s.trim();
66
66
  }
67
67
 
68
+ /**
69
+ * Generate a unique session ID for this execute_plan invocation.
70
+ * Uses crypto.randomUUID if available, falls back to timestamp + random.
71
+ */
72
+ function generatePlanSessionId(baseSessionId) {
73
+ const uniquePart = typeof crypto !== 'undefined' && crypto.randomUUID
74
+ ? crypto.randomUUID().slice(0, 8)
75
+ : `${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
76
+ return `${baseSessionId || 'plan'}-${uniquePart}`;
77
+ }
78
+
68
79
  /**
69
80
  * Build DSL-compatible tool implementations from the agent's configOptions.
70
81
  *
71
82
  * @param {Object} configOptions - Agent config (sessionId, cwd, provider, model, etc.)
83
+ * @param {string} [planSessionId] - Unique session ID for this execute_plan invocation
72
84
  * @returns {Object} toolImplementations for createDSLRuntime
73
85
  */
74
- function buildToolImplementations(configOptions) {
75
- const { sessionId, cwd } = configOptions;
86
+ function buildToolImplementations(configOptions, planSessionId) {
87
+ const { cwd } = configOptions;
88
+ // Use planSessionId for isolated pagination per execute_plan, fall back to global sessionId
89
+ const sessionId = planSessionId || configOptions.sessionId;
76
90
  const tools = {};
77
91
 
78
92
  tools.search = {
@@ -311,9 +325,11 @@ export function createExecutePlanTool(options) {
311
325
 
312
326
  /**
313
327
  * Build or rebuild the DSL runtime.
314
- * Called lazily on first execute() and when MCP bridge changes.
328
+ * Called for each execute() invocation with a unique planSessionId.
329
+ *
330
+ * @param {string} [planSessionId] - Unique session ID for this execute_plan invocation
315
331
  */
316
- function buildRuntime() {
332
+ function buildRuntime(planSessionId) {
317
333
  const currentMcpBridge = getMcpBridge();
318
334
  const currentMcpTools = getMcpTools();
319
335
 
@@ -340,7 +356,7 @@ export function createExecutePlanTool(options) {
340
356
  // Agent configOptions — build everything from the agent's config
341
357
  llmCallFn = llmCallFn || buildLLMCall(options);
342
358
  runtimeOptions = {
343
- toolImplementations: buildToolImplementations(options),
359
+ toolImplementations: buildToolImplementations(options, planSessionId),
344
360
  llmCall: llmCallFn,
345
361
  mcpBridge: currentMcpBridge,
346
362
  mcpTools: filteredMcpTools,
@@ -360,6 +376,7 @@ export function createExecutePlanTool(options) {
360
376
 
361
377
  /**
362
378
  * Get or rebuild the runtime if MCP state has changed.
379
+ * @deprecated Use buildRuntime(planSessionId) directly for unique sessions per execution
363
380
  */
364
381
  function getRuntime() {
365
382
  const currentMcpBridge = getMcpBridge();
@@ -378,14 +395,22 @@ export function createExecutePlanTool(options) {
378
395
  'Write simple synchronous-looking code — do NOT use async/await.',
379
396
  parameters: executePlanSchema,
380
397
  execute: async ({ code, description }) => {
398
+ // Generate a unique session ID for this execute_plan invocation
399
+ // This ensures search pagination is isolated per execute_plan call
400
+ const planSessionId = generatePlanSessionId(options.sessionId);
401
+
381
402
  // Create top-level OTEL span for the entire execute_plan invocation
382
403
  const planSpan = tracer?.createToolSpan?.('execute_plan', {
383
404
  'dsl.description': description || '',
384
405
  'dsl.code_length': code.length,
385
406
  'dsl.code': code,
386
407
  'dsl.max_retries': maxRetries,
408
+ 'dsl.plan_session_id': planSessionId,
387
409
  }) || null;
388
410
 
411
+ // Build runtime with the unique planSessionId for isolated search pagination
412
+ const planRuntime = buildRuntime(planSessionId);
413
+
389
414
  // Strip XML tags and markdown fences LLMs sometimes wrap code in
390
415
  let currentCode = stripCodeWrapping(code);
391
416
  let lastError = null;
@@ -446,7 +471,7 @@ RULES REMINDER:
446
471
  }
447
472
  }
448
473
 
449
- const result = await getRuntime().execute(currentCode, description);
474
+ const result = await planRuntime.execute(currentCode, description);
450
475
 
451
476
  if (result.status === 'success') {
452
477
  finalOutput = formatSuccess(result, description, attempt, outputBuffer);
@@ -574,8 +599,15 @@ function formatSuccess(result, description, attempt, outputBuffer) {
574
599
 
575
600
  // Format the result value
576
601
  const resultValue = result.result;
602
+ const hasOutputBufferContent = outputBuffer && outputBuffer.items && outputBuffer.items.length > 0;
577
603
  if (resultValue === undefined || resultValue === null) {
578
- output += 'Plan completed (no return value).';
604
+ if (hasOutputBufferContent) {
605
+ // output() was used but no return statement — tell LLM the script succeeded
606
+ const totalChars = outputBuffer.items.reduce((sum, item) => sum + item.length, 0);
607
+ output += `Plan completed successfully. Output captured (${totalChars} chars) via output() and will be included in the final response.`;
608
+ } else {
609
+ output += 'Plan completed (no return value).';
610
+ }
579
611
  } else if (typeof resultValue === 'string') {
580
612
  output += `Result:\n${resultValue}`;
581
613
  } else {