@probelabs/probe 0.6.0-rc241 → 0.6.0-rc245

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -102,6 +102,7 @@ import { formatErrorForAI, ParameterError } from '../utils/error-types.js';
102
102
  import { getCommonPrefix, toRelativePath, safeRealpath } from '../utils/path-validation.js';
103
103
  import { truncateIfNeeded, getMaxOutputTokens } from './outputTruncator.js';
104
104
  import { DelegationManager } from '../delegate.js';
105
+ import { extractRawOutputBlocks } from '../tools/executePlan.js';
105
106
  import {
106
107
  TaskManager,
107
108
  createTaskTool,
@@ -3610,6 +3611,18 @@ Follow these instructions carefully:
3610
3611
 
3611
3612
  let toolResultContent = typeof executionResult === 'string' ? executionResult : JSON.stringify(executionResult, null, 2);
3612
3613
 
3614
+ // Extract raw output blocks and pass them through to output buffer (before truncation)
3615
+ // This prevents LLM from processing/hallucinating large structured output from execute_plan
3616
+ if (this._outputBuffer) {
3617
+ const { cleanedContent, extractedBlocks } = extractRawOutputBlocks(toolResultContent, this._outputBuffer);
3618
+ if (extractedBlocks.length > 0) {
3619
+ toolResultContent = cleanedContent;
3620
+ if (this.debug) {
3621
+ console.log(`[DEBUG] Extracted ${extractedBlocks.length} raw output blocks (${extractedBlocks.reduce((sum, b) => sum + b.length, 0)} chars) to output buffer`);
3622
+ }
3623
+ }
3624
+ }
3625
+
3613
3626
  // Truncate if output exceeds token limit
3614
3627
  try {
3615
3628
  const truncateResult = await truncateIfNeeded(toolResultContent, this.tokenCounter, this.sessionId, this.maxOutputTokens);
@@ -3856,6 +3869,18 @@ Follow these instructions carefully:
3856
3869
  toolResultContent = toolResultContent.split(wsPrefix).join('');
3857
3870
  }
3858
3871
 
3872
+ // Extract raw output blocks and pass them through to output buffer (before truncation)
3873
+ // This prevents LLM from processing/hallucinating large structured output from execute_plan
3874
+ if (this._outputBuffer) {
3875
+ const { cleanedContent, extractedBlocks } = extractRawOutputBlocks(toolResultContent, this._outputBuffer);
3876
+ if (extractedBlocks.length > 0) {
3877
+ toolResultContent = cleanedContent;
3878
+ if (this.debug) {
3879
+ console.log(`[DEBUG] Extracted ${extractedBlocks.length} raw output blocks (${extractedBlocks.reduce((sum, b) => sum + b.length, 0)} chars) to output buffer`);
3880
+ }
3881
+ }
3882
+ }
3883
+
3859
3884
  // Truncate if output exceeds token limit
3860
3885
  try {
3861
3886
  const truncateResult = await truncateIfNeeded(toolResultContent, this.tokenCounter, this.sessionId, this.maxOutputTokens);
@@ -4740,12 +4765,19 @@ Convert your previous response content into actual JSON data that follows this s
4740
4765
  // Append DSL output buffer directly to response (bypasses LLM rewriting)
4741
4766
  if (this._outputBuffer && this._outputBuffer.items.length > 0 && !options._schemaFormatted) {
4742
4767
  const outputContent = this._outputBuffer.items.join('\n\n');
4743
- finalResult = (finalResult || '') + '\n\n' + outputContent;
4768
+ if (options.schema) {
4769
+ // Schema response — the finalResult is JSON. Wrap output in RAW_OUTPUT
4770
+ // delimiters so clients (visor, etc.) can extract and propagate the
4771
+ // content separately from the JSON.
4772
+ finalResult = (finalResult || '') + '\n<<<RAW_OUTPUT>>>\n' + outputContent + '\n<<<END_RAW_OUTPUT>>>';
4773
+ } else {
4774
+ finalResult = (finalResult || '') + '\n\n' + outputContent;
4775
+ }
4744
4776
  if (options.onStream) {
4745
4777
  options.onStream('\n\n' + outputContent);
4746
4778
  }
4747
4779
  if (this.debug) {
4748
- console.log(`[DEBUG] Appended ${this._outputBuffer.items.length} output buffer items (${outputContent.length} chars) to final result`);
4780
+ console.log(`[DEBUG] Appended ${this._outputBuffer.items.length} output buffer items (${outputContent.length} chars) to final result${options.schema ? ' (with RAW_OUTPUT delimiters)' : ''}`);
4749
4781
  }
4750
4782
  this._outputBuffer.items = [];
4751
4783
  }
@@ -227,9 +227,21 @@ export function generateSandboxGlobals(options) {
227
227
  }
228
228
 
229
229
  // LLM() built-in — delegate already has its own OTEL, but we add a DSL-level span
230
+ // When schema is provided, auto-parse the JSON result for easier downstream processing
230
231
  if (llmCall) {
231
232
  const rawLLM = async (instruction, data, opts = {}) => {
232
- return llmCall(instruction, data, opts);
233
+ const result = await llmCall(instruction, data, opts);
234
+ // Auto-parse JSON when schema is provided and result is a string
235
+ if (opts.schema && typeof result === 'string') {
236
+ try {
237
+ return JSON.parse(result);
238
+ } catch (e) {
239
+ // If parsing fails, return the raw string (may have formatting issues)
240
+ logFn?.('[LLM] Warning: schema provided but result is not valid JSON');
241
+ return result;
242
+ }
243
+ }
244
+ return result;
233
245
  };
234
246
  globals.LLM = traceToolCall('LLM', rawLLM, tracer, logFn);
235
247
  }
@@ -306,6 +318,81 @@ export function generateSandboxGlobals(options) {
306
318
  return chunks;
307
319
  };
308
320
 
321
+ // chunkByKey() - chunk data ensuring same-key items stay together
322
+ // - Chunks CAN have multiple keys (customers)
323
+ // - But same key NEVER splits across chunks
324
+ globals.chunkByKey = (data, keyFn, maxTokens = 20000) => {
325
+ const CHARS_PER_TOKEN = 4;
326
+ const maxChars = maxTokens * CHARS_PER_TOKEN;
327
+ const text = typeof data === 'string' ? data : JSON.stringify(data);
328
+
329
+ // Find all File: markers
330
+ const blockRegex = /^File: ([^\n]+)/gm;
331
+ const markers = [];
332
+ let match;
333
+ while ((match = blockRegex.exec(text)) !== null) {
334
+ markers.push({ index: match.index, file: match[1].trim() });
335
+ }
336
+
337
+ // No File: headers - fallback to regular chunk
338
+ if (markers.length === 0) {
339
+ return globals.chunk(data, maxTokens);
340
+ }
341
+
342
+ const chunks = [];
343
+ let currentChunk = '';
344
+ let currentSize = 0;
345
+ let keysInChunk = new Set(); // Track which keys are in current chunk
346
+
347
+ // Process each block
348
+ for (let i = 0; i < markers.length; i++) {
349
+ const start = markers[i].index;
350
+ const end = i + 1 < markers.length ? markers[i + 1].index : text.length;
351
+ const block = text.slice(start, end).trim();
352
+ const file = markers[i].file;
353
+ const key = typeof keyFn === 'function' ? keyFn(file) : file;
354
+
355
+ const blockSize = block.length + 2; // +2 for \n\n separator
356
+ const wouldOverflow = currentSize + blockSize > maxChars;
357
+ const keyAlreadyInChunk = keysInChunk.has(key);
358
+
359
+ // Decision logic:
360
+ // - If key already in chunk: MUST add (never split a key)
361
+ // - If new key and would overflow: flush first, then add
362
+ // - If new key and fits: add to current chunk
363
+
364
+ if (!keyAlreadyInChunk && wouldOverflow && currentChunk) {
365
+ // New key would overflow - flush current chunk first
366
+ chunks.push(currentChunk.trim());
367
+ currentChunk = '';
368
+ currentSize = 0;
369
+ keysInChunk = new Set();
370
+ }
371
+
372
+ // Add block to current chunk
373
+ if (currentChunk) currentChunk += '\n\n';
374
+ currentChunk += block;
375
+ currentSize += blockSize;
376
+ keysInChunk.add(key);
377
+ }
378
+
379
+ // Flush final chunk
380
+ if (currentChunk.trim()) {
381
+ chunks.push(currentChunk.trim());
382
+ }
383
+
384
+ return chunks.length > 0 ? chunks : [''];
385
+ };
386
+
387
+ // extractPaths() - extract unique file paths from search results
388
+ // Parses File: headers and returns deduplicated array of paths
389
+ globals.extractPaths = (searchResults) => {
390
+ const text = typeof searchResults === 'string' ? searchResults : JSON.stringify(searchResults);
391
+ const matches = text.match(/^File: ([^\n]+)/gm) || [];
392
+ const paths = matches.map(m => m.replace('File: ', '').trim());
393
+ return [...new Set(paths)];
394
+ };
395
+
309
396
  // Utility functions (pure, no async)
310
397
  globals.log = (message) => {
311
398
  // Collected by the runtime for the execution log
@@ -21903,7 +21903,16 @@ function generateSandboxGlobals(options) {
21903
21903
  }
21904
21904
  if (llmCall) {
21905
21905
  const rawLLM = async (instruction, data2, opts = {}) => {
21906
- return llmCall(instruction, data2, opts);
21906
+ const result = await llmCall(instruction, data2, opts);
21907
+ if (opts.schema && typeof result === "string") {
21908
+ try {
21909
+ return JSON.parse(result);
21910
+ } catch (e) {
21911
+ logFn?.("[LLM] Warning: schema provided but result is not valid JSON");
21912
+ return result;
21913
+ }
21914
+ }
21915
+ return result;
21907
21916
  };
21908
21917
  globals.LLM = traceToolCall("LLM", rawLLM, tracer, logFn);
21909
21918
  }
@@ -21960,6 +21969,54 @@ function generateSandboxGlobals(options) {
21960
21969
  }
21961
21970
  return chunks;
21962
21971
  };
21972
+ globals.chunkByKey = (data2, keyFn, maxTokens = 2e4) => {
21973
+ const CHARS_PER_TOKEN3 = 4;
21974
+ const maxChars = maxTokens * CHARS_PER_TOKEN3;
21975
+ const text = typeof data2 === "string" ? data2 : JSON.stringify(data2);
21976
+ const blockRegex = /^File: ([^\n]+)/gm;
21977
+ const markers = [];
21978
+ let match2;
21979
+ while ((match2 = blockRegex.exec(text)) !== null) {
21980
+ markers.push({ index: match2.index, file: match2[1].trim() });
21981
+ }
21982
+ if (markers.length === 0) {
21983
+ return globals.chunk(data2, maxTokens);
21984
+ }
21985
+ const chunks = [];
21986
+ let currentChunk = "";
21987
+ let currentSize = 0;
21988
+ let keysInChunk = /* @__PURE__ */ new Set();
21989
+ for (let i = 0; i < markers.length; i++) {
21990
+ const start = markers[i].index;
21991
+ const end = i + 1 < markers.length ? markers[i + 1].index : text.length;
21992
+ const block = text.slice(start, end).trim();
21993
+ const file = markers[i].file;
21994
+ const key = typeof keyFn === "function" ? keyFn(file) : file;
21995
+ const blockSize = block.length + 2;
21996
+ const wouldOverflow = currentSize + blockSize > maxChars;
21997
+ const keyAlreadyInChunk = keysInChunk.has(key);
21998
+ if (!keyAlreadyInChunk && wouldOverflow && currentChunk) {
21999
+ chunks.push(currentChunk.trim());
22000
+ currentChunk = "";
22001
+ currentSize = 0;
22002
+ keysInChunk = /* @__PURE__ */ new Set();
22003
+ }
22004
+ if (currentChunk) currentChunk += "\n\n";
22005
+ currentChunk += block;
22006
+ currentSize += blockSize;
22007
+ keysInChunk.add(key);
22008
+ }
22009
+ if (currentChunk.trim()) {
22010
+ chunks.push(currentChunk.trim());
22011
+ }
22012
+ return chunks.length > 0 ? chunks : [""];
22013
+ };
22014
+ globals.extractPaths = (searchResults) => {
22015
+ const text = typeof searchResults === "string" ? searchResults : JSON.stringify(searchResults);
22016
+ const matches = text.match(/^File: ([^\n]+)/gm) || [];
22017
+ const paths = matches.map((m) => m.replace("File: ", "").trim());
22018
+ return [...new Set(paths)];
22019
+ };
21963
22020
  globals.log = (message) => {
21964
22021
  if (globals._logs) globals._logs.push(String(message));
21965
22022
  };
@@ -29205,6 +29262,26 @@ Last error: ${lastError}`;
29205
29262
  }
29206
29263
  });
29207
29264
  }
29265
+ function extractRawOutputBlocks(content, outputBuffer = null) {
29266
+ if (typeof content !== "string") {
29267
+ return { cleanedContent: content, extractedBlocks: [] };
29268
+ }
29269
+ const extractedBlocks = [];
29270
+ const regex = new RegExp(`${RAW_OUTPUT_START}\\n([\\s\\S]*?)\\n${RAW_OUTPUT_END}`, "g");
29271
+ let cleanedContent = content;
29272
+ let match2;
29273
+ while ((match2 = regex.exec(content)) !== null) {
29274
+ extractedBlocks.push(match2[1]);
29275
+ }
29276
+ cleanedContent = content.replace(new RegExp(`${RAW_OUTPUT_START}\\n[\\s\\S]*?\\n${RAW_OUTPUT_END}`, "g"), "").replace(/\n\n\[The above raw output \(\d+ chars\) will be passed directly to the final response\. Do NOT repeat, summarize, or modify it\.\]/g, "").trim();
29277
+ if (outputBuffer && extractedBlocks.length > 0) {
29278
+ for (const block of extractedBlocks) {
29279
+ outputBuffer.items = outputBuffer.items || [];
29280
+ outputBuffer.items.push(block);
29281
+ }
29282
+ }
29283
+ return { cleanedContent, extractedBlocks };
29284
+ }
29208
29285
  function formatSuccess(result, description, attempt, outputBuffer) {
29209
29286
  let output = "";
29210
29287
  if (description) {
@@ -29241,10 +29318,15 @@ ${JSON.stringify(resultValue, null, 2)}`;
29241
29318
  }
29242
29319
  }
29243
29320
  if (outputBuffer && outputBuffer.items && outputBuffer.items.length > 0) {
29244
- const totalChars = outputBuffer.items.reduce((sum, item) => sum + item.length, 0);
29321
+ const rawContent = outputBuffer.items.join("\n");
29322
+ output += `
29323
+
29324
+ ${RAW_OUTPUT_START}
29325
+ ${rawContent}
29326
+ ${RAW_OUTPUT_END}`;
29245
29327
  output += `
29246
29328
 
29247
- [Output buffer: ${totalChars} chars written via output(). This content will be appended directly to your response. Do NOT repeat or summarize it.]`;
29329
+ [The above raw output (${rawContent.length} chars) will be passed directly to the final response. Do NOT repeat, summarize, or modify it.]`;
29248
29330
  }
29249
29331
  return output;
29250
29332
  }
@@ -29693,6 +29775,7 @@ Example:
29693
29775
  <clearOutputBuffer>true</clearOutputBuffer>
29694
29776
  </cleanup_execute_plan>`;
29695
29777
  }
29778
+ var RAW_OUTPUT_START, RAW_OUTPUT_END;
29696
29779
  var init_executePlan = __esm({
29697
29780
  "src/tools/executePlan.js"() {
29698
29781
  "use strict";
@@ -29703,6 +29786,8 @@ var init_executePlan = __esm({
29703
29786
  init_extract();
29704
29787
  init_delegate();
29705
29788
  init_esm5();
29789
+ RAW_OUTPUT_START = "<<<RAW_OUTPUT>>>";
29790
+ RAW_OUTPUT_END = "<<<END_RAW_OUTPUT>>>";
29706
29791
  }
29707
29792
  });
29708
29793
 
@@ -81148,6 +81233,7 @@ var init_ProbeAgent = __esm({
81148
81233
  init_path_validation();
81149
81234
  init_outputTruncator();
81150
81235
  init_delegate();
81236
+ init_executePlan();
81151
81237
  init_tasks();
81152
81238
  dotenv2.config();
81153
81239
  ENGINE_ACTIVITY_TIMEOUT_DEFAULT = 18e4;
@@ -83879,6 +83965,15 @@ You are working with a workspace. Available paths: ${workspaceDesc}
83879
83965
  }
83880
83966
  const executionResult = await this.mcpBridge.mcpTools[toolName].execute(params);
83881
83967
  let toolResultContent = typeof executionResult === "string" ? executionResult : JSON.stringify(executionResult, null, 2);
83968
+ if (this._outputBuffer) {
83969
+ const { cleanedContent, extractedBlocks } = extractRawOutputBlocks(toolResultContent, this._outputBuffer);
83970
+ if (extractedBlocks.length > 0) {
83971
+ toolResultContent = cleanedContent;
83972
+ if (this.debug) {
83973
+ console.log(`[DEBUG] Extracted ${extractedBlocks.length} raw output blocks (${extractedBlocks.reduce((sum, b) => sum + b.length, 0)} chars) to output buffer`);
83974
+ }
83975
+ }
83976
+ }
83882
83977
  try {
83883
83978
  const truncateResult = await truncateIfNeeded(toolResultContent, this.tokenCounter, this.sessionId, this.maxOutputTokens);
83884
83979
  if (truncateResult.truncated) {
@@ -84085,6 +84180,15 @@ ${errorXml}
84085
84180
  const wsPrefix = this.workspaceRoot.endsWith(sep5) ? this.workspaceRoot : this.workspaceRoot + sep5;
84086
84181
  toolResultContent = toolResultContent.split(wsPrefix).join("");
84087
84182
  }
84183
+ if (this._outputBuffer) {
84184
+ const { cleanedContent, extractedBlocks } = extractRawOutputBlocks(toolResultContent, this._outputBuffer);
84185
+ if (extractedBlocks.length > 0) {
84186
+ toolResultContent = cleanedContent;
84187
+ if (this.debug) {
84188
+ console.log(`[DEBUG] Extracted ${extractedBlocks.length} raw output blocks (${extractedBlocks.reduce((sum, b) => sum + b.length, 0)} chars) to output buffer`);
84189
+ }
84190
+ }
84191
+ }
84088
84192
  try {
84089
84193
  const truncateResult = await truncateIfNeeded(toolResultContent, this.tokenCounter, this.sessionId, this.maxOutputTokens);
84090
84194
  if (truncateResult.truncated) {
@@ -84770,12 +84874,16 @@ Convert your previous response content into actual JSON data that follows this s
84770
84874
  }
84771
84875
  if (this._outputBuffer && this._outputBuffer.items.length > 0 && !options._schemaFormatted) {
84772
84876
  const outputContent = this._outputBuffer.items.join("\n\n");
84773
- finalResult = (finalResult || "") + "\n\n" + outputContent;
84877
+ if (options.schema) {
84878
+ finalResult = (finalResult || "") + "\n<<<RAW_OUTPUT>>>\n" + outputContent + "\n<<<END_RAW_OUTPUT>>>";
84879
+ } else {
84880
+ finalResult = (finalResult || "") + "\n\n" + outputContent;
84881
+ }
84774
84882
  if (options.onStream) {
84775
84883
  options.onStream("\n\n" + outputContent);
84776
84884
  }
84777
84885
  if (this.debug) {
84778
- console.log(`[DEBUG] Appended ${this._outputBuffer.items.length} output buffer items (${outputContent.length} chars) to final result`);
84886
+ console.log(`[DEBUG] Appended ${this._outputBuffer.items.length} output buffer items (${outputContent.length} chars) to final result${options.schema ? " (with RAW_OUTPUT delimiters)" : ""}`);
84779
84887
  }
84780
84888
  this._outputBuffer.items = [];
84781
84889
  }
@@ -420,6 +420,51 @@ RULES REMINDER:
420
420
  });
421
421
  }
422
422
 
423
+ // Delimiters for raw output passthrough - prevents LLM from processing/hallucinating large structured output
424
+ export const RAW_OUTPUT_START = '<<<RAW_OUTPUT>>>';
425
+ export const RAW_OUTPUT_END = '<<<END_RAW_OUTPUT>>>';
426
+
427
+ /**
428
+ * Extract raw output blocks from tool result content and pass them through to the output buffer.
429
+ * This prevents parent LLMs from processing/hallucinating large structured output.
430
+ *
431
+ * @param {string} content - The tool result content
432
+ * @param {Object} [outputBuffer] - The output buffer to append extracted content to
433
+ * @returns {{ cleanedContent: string, extractedBlocks: string[] }} - Content with blocks removed and extracted blocks
434
+ */
435
+ export function extractRawOutputBlocks(content, outputBuffer = null) {
436
+ if (typeof content !== 'string') {
437
+ return { cleanedContent: content, extractedBlocks: [] };
438
+ }
439
+
440
+ const extractedBlocks = [];
441
+ const regex = new RegExp(`${RAW_OUTPUT_START}\\n([\\s\\S]*?)\\n${RAW_OUTPUT_END}`, 'g');
442
+
443
+ let cleanedContent = content;
444
+ let match;
445
+
446
+ // Extract all blocks
447
+ while ((match = regex.exec(content)) !== null) {
448
+ extractedBlocks.push(match[1]);
449
+ }
450
+
451
+ // Remove the blocks and any following instruction line from content
452
+ cleanedContent = content
453
+ .replace(new RegExp(`${RAW_OUTPUT_START}\\n[\\s\\S]*?\\n${RAW_OUTPUT_END}`, 'g'), '')
454
+ .replace(/\n\n\[The above raw output \(\d+ chars\) will be passed directly to the final response\. Do NOT repeat, summarize, or modify it\.\]/g, '')
455
+ .trim();
456
+
457
+ // If output buffer provided, append extracted content
458
+ if (outputBuffer && extractedBlocks.length > 0) {
459
+ for (const block of extractedBlocks) {
460
+ outputBuffer.items = outputBuffer.items || [];
461
+ outputBuffer.items.push(block);
462
+ }
463
+ }
464
+
465
+ return { cleanedContent, extractedBlocks };
466
+ }
467
+
423
468
  function formatSuccess(result, description, attempt, outputBuffer) {
424
469
  let output = '';
425
470
 
@@ -452,10 +497,12 @@ function formatSuccess(result, description, attempt, outputBuffer) {
452
497
  }
453
498
  }
454
499
 
455
- // If output buffer has content, tell the LLM the data was written to direct output
500
+ // If output buffer has content, wrap it in delimiters for passthrough
501
+ // This prevents parent LLMs from processing/hallucinating the raw data
456
502
  if (outputBuffer && outputBuffer.items && outputBuffer.items.length > 0) {
457
- const totalChars = outputBuffer.items.reduce((sum, item) => sum + item.length, 0);
458
- output += `\n\n[Output buffer: ${totalChars} chars written via output(). This content will be appended directly to your response. Do NOT repeat or summarize it.]`;
503
+ const rawContent = outputBuffer.items.join('\n');
504
+ output += `\n\n${RAW_OUTPUT_START}\n${rawContent}\n${RAW_OUTPUT_END}`;
505
+ output += `\n\n[The above raw output (${rawContent.length} chars) will be passed directly to the final response. Do NOT repeat, summarize, or modify it.]`;
459
506
  }
460
507
 
461
508
  return output;