@probelabs/probe 0.6.0-rc251 → 0.6.0-rc253

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -819,6 +819,11 @@ export class ProbeAgent {
819
819
  // reset at the start of each answer() call
820
820
  this._outputBuffer = { items: [] };
821
821
 
822
+ // Separate accumulator for extracted RAW_OUTPUT blocks from tool results.
823
+ // This is distinct from _outputBuffer to prevent the cycle where:
824
+ // formatSuccess wraps → extract re-adds → next execute_plan re-wraps (issue #438)
825
+ this._extractedRawBlocks = [];
826
+
822
827
  const configOptions = {
823
828
  sessionId: this.sessionId,
824
829
  debug: this.debug,
@@ -2910,6 +2915,8 @@ Follow these instructions carefully:
2910
2915
  // Both must preserve the output buffer so the parent call can append it.
2911
2916
  if (this._outputBuffer && !options?._schemaFormatted && !options?._completionPromptProcessed) {
2912
2917
  this._outputBuffer.items = [];
2918
+ // Also reset the extracted blocks accumulator (issue #438)
2919
+ this._extractedRawBlocks = [];
2913
2920
  }
2914
2921
 
2915
2922
  // START CHECKPOINT: Initialize task management for this request
@@ -3564,7 +3571,12 @@ Follow these instructions carefully:
3564
3571
  } else {
3565
3572
  // Content was mostly/entirely inside thinking tags.
3566
3573
  // Extract thinking content and use it as the actual answer.
3567
- const thinkingContent = extractThinkingContent(prevContent);
3574
+ // extractThinkingContent now handles nested thinking tags (issue #439)
3575
+ let thinkingContent = extractThinkingContent(prevContent);
3576
+ // Also apply removeThinkingTags as extra safety to catch any edge cases
3577
+ if (thinkingContent) {
3578
+ thinkingContent = removeThinkingTags(thinkingContent) || thinkingContent.replace(/<\/?thinking>/g, '');
3579
+ }
3568
3580
  if (thinkingContent && thinkingContent.length > 50) {
3569
3581
  finalResult = thinkingContent;
3570
3582
  if (this.debug) console.log(`[DEBUG] Previous response was mostly in thinking tags — using thinking content as completion: ${finalResult.substring(0, 100)}...`);
@@ -3629,15 +3641,17 @@ Follow these instructions carefully:
3629
3641
 
3630
3642
  let toolResultContent = typeof executionResult === 'string' ? executionResult : JSON.stringify(executionResult, null, 2);
3631
3643
 
3632
- // Extract raw output blocks and pass them through to output buffer (before truncation)
3644
+ // Extract raw output blocks from tool result (before truncation)
3633
3645
  // This prevents LLM from processing/hallucinating large structured output from execute_plan
3634
- if (this._outputBuffer) {
3635
- const { cleanedContent, extractedBlocks } = extractRawOutputBlocks(toolResultContent, this._outputBuffer);
3636
- if (extractedBlocks.length > 0) {
3637
- toolResultContent = cleanedContent;
3638
- if (this.debug) {
3639
- console.log(`[DEBUG] Extracted ${extractedBlocks.length} raw output blocks (${extractedBlocks.reduce((sum, b) => sum + b.length, 0)} chars) to output buffer`);
3640
- }
3646
+ // Push to _extractedRawBlocks (NOT _outputBuffer) to prevent the cycle where:
3647
+ // formatSuccess wraps extract re-adds → next execute_plan re-wraps (issue #438)
3648
+ const { cleanedContent, extractedBlocks } = extractRawOutputBlocks(toolResultContent);
3649
+ if (extractedBlocks.length > 0) {
3650
+ toolResultContent = cleanedContent;
3651
+ // Accumulate extracted blocks separately from DSL output() buffer
3652
+ this._extractedRawBlocks.push(...extractedBlocks);
3653
+ if (this.debug) {
3654
+ console.log(`[DEBUG] Extracted ${extractedBlocks.length} raw output blocks (${extractedBlocks.reduce((sum, b) => sum + b.length, 0)} chars) from tool result`);
3641
3655
  }
3642
3656
  }
3643
3657
 
@@ -3887,15 +3901,17 @@ Follow these instructions carefully:
3887
3901
  toolResultContent = toolResultContent.split(wsPrefix).join('');
3888
3902
  }
3889
3903
 
3890
- // Extract raw output blocks and pass them through to output buffer (before truncation)
3904
+ // Extract raw output blocks from tool result (before truncation)
3891
3905
  // This prevents LLM from processing/hallucinating large structured output from execute_plan
3892
- if (this._outputBuffer) {
3893
- const { cleanedContent, extractedBlocks } = extractRawOutputBlocks(toolResultContent, this._outputBuffer);
3894
- if (extractedBlocks.length > 0) {
3895
- toolResultContent = cleanedContent;
3896
- if (this.debug) {
3897
- console.log(`[DEBUG] Extracted ${extractedBlocks.length} raw output blocks (${extractedBlocks.reduce((sum, b) => sum + b.length, 0)} chars) to output buffer`);
3898
- }
3906
+ // Push to _extractedRawBlocks (NOT _outputBuffer) to prevent the cycle where:
3907
+ // formatSuccess wraps extract re-adds → next execute_plan re-wraps (issue #438)
3908
+ const { cleanedContent, extractedBlocks } = extractRawOutputBlocks(toolResultContent);
3909
+ if (extractedBlocks.length > 0) {
3910
+ toolResultContent = cleanedContent;
3911
+ // Accumulate extracted blocks separately from DSL output() buffer
3912
+ this._extractedRawBlocks.push(...extractedBlocks);
3913
+ if (this.debug) {
3914
+ console.log(`[DEBUG] Extracted ${extractedBlocks.length} raw output blocks (${extractedBlocks.reduce((sum, b) => sum + b.length, 0)} chars) from tool result`);
3899
3915
  }
3900
3916
  }
3901
3917
 
@@ -4314,16 +4330,18 @@ After reviewing, provide your final answer using attempt_completion.`;
4314
4330
 
4315
4331
  // Make a follow-up call with the completion prompt
4316
4332
  // Pass _completionPromptProcessed to prevent infinite loops
4317
- // Save output buffer — the recursive answer() must not destroy DSL output() content
4333
+ // Save output buffers — the recursive answer() must not destroy DSL output() content
4318
4334
  const savedOutputItems = this._outputBuffer ? [...this._outputBuffer.items] : [];
4335
+ const savedExtractedBlocks = this._extractedRawBlocks ? [...this._extractedRawBlocks] : [];
4319
4336
  const completionResult = await this.answer(completionPromptMessage, [], {
4320
4337
  ...options,
4321
4338
  _completionPromptProcessed: true
4322
4339
  });
4323
- // Restore output buffer so the parent call can append it to the final result
4340
+ // Restore output buffers so the parent call can append them to the final result
4324
4341
  if (this._outputBuffer) {
4325
4342
  this._outputBuffer.items = savedOutputItems;
4326
4343
  }
4344
+ this._extractedRawBlocks = savedExtractedBlocks;
4327
4345
 
4328
4346
  // Update finalResult with the result from the completion prompt
4329
4347
  finalResult = completionResult;
@@ -4782,17 +4800,38 @@ Convert your previous response content into actual JSON data that follows this s
4782
4800
  }
4783
4801
 
4784
4802
  // Remove thinking tags from final result before returning to user
4803
+ // Skip for valid JSON to avoid destroying JSON structure when <thinking> appears
4804
+ // inside string values (e.g., after tryAutoWrapForSimpleSchema embeds content with
4805
+ // residual thinking tag fragments — issue #439)
4785
4806
  if (!options._schemaFormatted) {
4786
- finalResult = removeThinkingTags(finalResult);
4787
- if (this.debug) {
4788
- console.log(`[DEBUG] Removed thinking tags from final result`);
4807
+ let isValidJson = false;
4808
+ try {
4809
+ JSON.parse(finalResult);
4810
+ isValidJson = true;
4811
+ } catch {
4812
+ // Not valid JSON, proceed with thinking tag removal
4813
+ }
4814
+
4815
+ if (!isValidJson) {
4816
+ finalResult = removeThinkingTags(finalResult);
4817
+ if (this.debug) {
4818
+ console.log(`[DEBUG] Removed thinking tags from final result`);
4819
+ }
4820
+ } else if (this.debug) {
4821
+ console.log(`[DEBUG] Skipped thinking tag removal for valid JSON result (issue #439)`);
4789
4822
  }
4790
4823
  }
4791
4824
 
4792
4825
  // Append DSL output buffer directly to response (bypasses LLM rewriting)
4793
4826
  // Skip during _completionPromptProcessed — only the parent answer() should append the buffer.
4794
- if (this._outputBuffer && this._outputBuffer.items.length > 0 && !options._schemaFormatted && !options._completionPromptProcessed) {
4795
- const outputContent = this._outputBuffer.items.join('\n\n');
4827
+ // Combine _outputBuffer (from DSL output() calls) and _extractedRawBlocks (from tool results)
4828
+ // Using separate accumulators prevents the cycle described in issue #438.
4829
+ const allOutputItems = [
4830
+ ...(this._outputBuffer?.items || []),
4831
+ ...(this._extractedRawBlocks || [])
4832
+ ];
4833
+ if (allOutputItems.length > 0 && !options._schemaFormatted && !options._completionPromptProcessed) {
4834
+ const outputContent = allOutputItems.join('\n\n');
4796
4835
  if (options.schema) {
4797
4836
  // Schema response — the finalResult is JSON. Wrap output in RAW_OUTPUT
4798
4837
  // delimiters so clients (visor, etc.) can extract and propagate the
@@ -4805,9 +4844,10 @@ Convert your previous response content into actual JSON data that follows this s
4805
4844
  options.onStream('\n\n' + outputContent);
4806
4845
  }
4807
4846
  if (this.debug) {
4808
- console.log(`[DEBUG] Appended ${this._outputBuffer.items.length} output buffer items (${outputContent.length} chars) to final result${options.schema ? ' (with RAW_OUTPUT delimiters)' : ''}`);
4847
+ console.log(`[DEBUG] Appended ${allOutputItems.length} output items (${outputContent.length} chars) to final result${options.schema ? ' (with RAW_OUTPUT delimiters)' : ''}`);
4809
4848
  }
4810
4849
  this._outputBuffer.items = [];
4850
+ this._extractedRawBlocks = [];
4811
4851
  }
4812
4852
 
4813
4853
  return finalResult;