@probelabs/probe 0.6.0-rc240 → 0.6.0-rc242

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@probelabs/probe",
3
- "version": "0.6.0-rc240",
3
+ "version": "0.6.0-rc242",
4
4
  "description": "Node.js wrapper for the probe code search tool",
5
5
  "main": "src/index.js",
6
6
  "module": "src/index.js",
@@ -102,6 +102,7 @@ import { formatErrorForAI, ParameterError } from '../utils/error-types.js';
102
102
  import { getCommonPrefix, toRelativePath, safeRealpath } from '../utils/path-validation.js';
103
103
  import { truncateIfNeeded, getMaxOutputTokens } from './outputTruncator.js';
104
104
  import { DelegationManager } from '../delegate.js';
105
+ import { extractRawOutputBlocks } from '../tools/executePlan.js';
105
106
  import {
106
107
  TaskManager,
107
108
  createTaskTool,
@@ -3610,6 +3611,18 @@ Follow these instructions carefully:
3610
3611
 
3611
3612
  let toolResultContent = typeof executionResult === 'string' ? executionResult : JSON.stringify(executionResult, null, 2);
3612
3613
 
3614
+ // Extract raw output blocks and pass them through to output buffer (before truncation)
3615
+ // This prevents LLM from processing/hallucinating large structured output from execute_plan
3616
+ if (this._outputBuffer) {
3617
+ const { cleanedContent, extractedBlocks } = extractRawOutputBlocks(toolResultContent, this._outputBuffer);
3618
+ if (extractedBlocks.length > 0) {
3619
+ toolResultContent = cleanedContent;
3620
+ if (this.debug) {
3621
+ console.log(`[DEBUG] Extracted ${extractedBlocks.length} raw output blocks (${extractedBlocks.reduce((sum, b) => sum + b.length, 0)} chars) to output buffer`);
3622
+ }
3623
+ }
3624
+ }
3625
+
3613
3626
  // Truncate if output exceeds token limit
3614
3627
  try {
3615
3628
  const truncateResult = await truncateIfNeeded(toolResultContent, this.tokenCounter, this.sessionId, this.maxOutputTokens);
@@ -3856,6 +3869,18 @@ Follow these instructions carefully:
3856
3869
  toolResultContent = toolResultContent.split(wsPrefix).join('');
3857
3870
  }
3858
3871
 
3872
+ // Extract raw output blocks and pass them through to output buffer (before truncation)
3873
+ // This prevents LLM from processing/hallucinating large structured output from execute_plan
3874
+ if (this._outputBuffer) {
3875
+ const { cleanedContent, extractedBlocks } = extractRawOutputBlocks(toolResultContent, this._outputBuffer);
3876
+ if (extractedBlocks.length > 0) {
3877
+ toolResultContent = cleanedContent;
3878
+ if (this.debug) {
3879
+ console.log(`[DEBUG] Extracted ${extractedBlocks.length} raw output blocks (${extractedBlocks.reduce((sum, b) => sum + b.length, 0)} chars) to output buffer`);
3880
+ }
3881
+ }
3882
+ }
3883
+
3859
3884
  // Truncate if output exceeds token limit
3860
3885
  try {
3861
3886
  const truncateResult = await truncateIfNeeded(toolResultContent, this.tokenCounter, this.sessionId, this.maxOutputTokens);
@@ -227,9 +227,21 @@ export function generateSandboxGlobals(options) {
227
227
  }
228
228
 
229
229
  // LLM() built-in — delegate already has its own OTEL, but we add a DSL-level span
230
+ // When schema is provided, auto-parse the JSON result for easier downstream processing
230
231
  if (llmCall) {
231
232
  const rawLLM = async (instruction, data, opts = {}) => {
232
- return llmCall(instruction, data, opts);
233
+ const result = await llmCall(instruction, data, opts);
234
+ // Auto-parse JSON when schema is provided and result is a string
235
+ if (opts.schema && typeof result === 'string') {
236
+ try {
237
+ return JSON.parse(result);
238
+ } catch (e) {
239
+ // If parsing fails, return the raw string (may have formatting issues)
240
+ logFn?.('[LLM] Warning: schema provided but result is not valid JSON');
241
+ return result;
242
+ }
243
+ }
244
+ return result;
233
245
  };
234
246
  globals.LLM = traceToolCall('LLM', rawLLM, tracer, logFn);
235
247
  }
@@ -306,6 +318,81 @@ export function generateSandboxGlobals(options) {
306
318
  return chunks;
307
319
  };
308
320
 
321
+ // chunkByKey() - chunk data ensuring same-key items stay together
322
+ // - Chunks CAN have multiple keys (customers)
323
+ // - But same key NEVER splits across chunks
324
+ globals.chunkByKey = (data, keyFn, maxTokens = 20000) => {
325
+ const CHARS_PER_TOKEN = 4;
326
+ const maxChars = maxTokens * CHARS_PER_TOKEN;
327
+ const text = typeof data === 'string' ? data : JSON.stringify(data);
328
+
329
+ // Find all File: markers
330
+ const blockRegex = /^File: ([^\n]+)/gm;
331
+ const markers = [];
332
+ let match;
333
+ while ((match = blockRegex.exec(text)) !== null) {
334
+ markers.push({ index: match.index, file: match[1].trim() });
335
+ }
336
+
337
+ // No File: headers - fallback to regular chunk
338
+ if (markers.length === 0) {
339
+ return globals.chunk(data, maxTokens);
340
+ }
341
+
342
+ const chunks = [];
343
+ let currentChunk = '';
344
+ let currentSize = 0;
345
+ let keysInChunk = new Set(); // Track which keys are in current chunk
346
+
347
+ // Process each block
348
+ for (let i = 0; i < markers.length; i++) {
349
+ const start = markers[i].index;
350
+ const end = i + 1 < markers.length ? markers[i + 1].index : text.length;
351
+ const block = text.slice(start, end).trim();
352
+ const file = markers[i].file;
353
+ const key = typeof keyFn === 'function' ? keyFn(file) : file;
354
+
355
+ const blockSize = block.length + 2; // +2 for \n\n separator
356
+ const wouldOverflow = currentSize + blockSize > maxChars;
357
+ const keyAlreadyInChunk = keysInChunk.has(key);
358
+
359
+ // Decision logic:
360
+ // - If key already in chunk: MUST add (never split a key)
361
+ // - If new key and would overflow: flush first, then add
362
+ // - If new key and fits: add to current chunk
363
+
364
+ if (!keyAlreadyInChunk && wouldOverflow && currentChunk) {
365
+ // New key would overflow - flush current chunk first
366
+ chunks.push(currentChunk.trim());
367
+ currentChunk = '';
368
+ currentSize = 0;
369
+ keysInChunk = new Set();
370
+ }
371
+
372
+ // Add block to current chunk
373
+ if (currentChunk) currentChunk += '\n\n';
374
+ currentChunk += block;
375
+ currentSize += blockSize;
376
+ keysInChunk.add(key);
377
+ }
378
+
379
+ // Flush final chunk
380
+ if (currentChunk.trim()) {
381
+ chunks.push(currentChunk.trim());
382
+ }
383
+
384
+ return chunks.length > 0 ? chunks : [''];
385
+ };
386
+
387
+ // extractPaths() - extract unique file paths from search results
388
+ // Parses File: headers and returns deduplicated array of paths
389
+ globals.extractPaths = (searchResults) => {
390
+ const text = typeof searchResults === 'string' ? searchResults : JSON.stringify(searchResults);
391
+ const matches = text.match(/^File: ([^\n]+)/gm) || [];
392
+ const paths = matches.map(m => m.replace('File: ', '').trim());
393
+ return [...new Set(paths)];
394
+ };
395
+
309
396
  // Utility functions (pure, no async)
310
397
  globals.log = (message) => {
311
398
  // Collected by the runtime for the execution log
@@ -9,6 +9,81 @@
9
9
  import * as acorn from 'acorn';
10
10
  import * as walk from 'acorn-walk';
11
11
 
12
+ /**
13
+ * Convert a character offset to line and column numbers.
14
+ * @param {string} code - The source code
15
+ * @param {number} offset - Character offset
16
+ * @returns {{ line: number, column: number }}
17
+ */
18
+ function offsetToLineColumn(code, offset) {
19
+ const lines = code.split('\n');
20
+ let pos = 0;
21
+ for (let i = 0; i < lines.length; i++) {
22
+ const lineLength = lines[i].length + 1; // +1 for newline
23
+ if (pos + lineLength > offset) {
24
+ return { line: i + 1, column: offset - pos + 1 };
25
+ }
26
+ pos += lineLength;
27
+ }
28
+ return { line: lines.length, column: 1 };
29
+ }
30
+
31
+ /**
32
+ * Generate a code snippet with an arrow pointing to the error location.
33
+ * @param {string} code - The source code
34
+ * @param {number} line - Line number (1-based)
35
+ * @param {number} column - Column number (1-based)
36
+ * @param {number} contextLines - Number of lines to show before/after (default: 2)
37
+ * @returns {string}
38
+ */
39
+ function generateErrorSnippet(code, line, column, contextLines = 2) {
40
+ const lines = code.split('\n');
41
+ const startLine = Math.max(0, line - 1 - contextLines);
42
+ const endLine = Math.min(lines.length, line + contextLines);
43
+
44
+ const snippetLines = [];
45
+ const lineNumWidth = String(endLine).length;
46
+
47
+ for (let i = startLine; i < endLine; i++) {
48
+ const lineNum = String(i + 1).padStart(lineNumWidth, ' ');
49
+ const marker = (i + 1 === line) ? '>' : ' ';
50
+ snippetLines.push(`${marker} ${lineNum} | ${lines[i]}`);
51
+
52
+ // Add arrow line for the error line
53
+ if (i + 1 === line) {
54
+ const padding = ' '.repeat(lineNumWidth + 4); // " 123 | " prefix
55
+ const arrow = ' '.repeat(Math.max(0, column - 1)) + '^';
56
+ snippetLines.push(`${padding}${arrow}`);
57
+ }
58
+ }
59
+
60
+ return snippetLines.join('\n');
61
+ }
62
+
63
+ /**
64
+ * Format an error message with code snippet.
65
+ * @param {string} message - The error message
66
+ * @param {string} code - The source code
67
+ * @param {number} offset - Character offset (optional, use -1 if line/column provided)
68
+ * @param {number} line - Line number (optional)
69
+ * @param {number} column - Column number (optional)
70
+ * @returns {string}
71
+ */
72
+ function formatErrorWithSnippet(message, code, offset = -1, line = 0, column = 0) {
73
+ if (offset >= 0) {
74
+ const loc = offsetToLineColumn(code, offset);
75
+ line = loc.line;
76
+ column = loc.column;
77
+ }
78
+
79
+ if (line <= 0) {
80
+ return message;
81
+ }
82
+
83
+ const snippet = generateErrorSnippet(code, line, column);
84
+ return `${message}\n\n${snippet}`;
85
+ }
86
+
12
87
  // Node types the LLM is allowed to generate
13
88
  const ALLOWED_NODE_TYPES = new Set([
14
89
  'Program',
@@ -102,16 +177,32 @@ export function validateDSL(code) {
102
177
  ecmaVersion: 2022,
103
178
  sourceType: 'script',
104
179
  allowReturnOutsideFunction: true,
180
+ locations: true, // Enable location tracking for better error messages
105
181
  });
106
182
  } catch (e) {
107
- return { valid: false, errors: [`Syntax error: ${e.message}`] };
183
+ // Acorn errors have loc property with line/column
184
+ const line = e.loc?.line || 0;
185
+ const column = e.loc?.column ? e.loc.column + 1 : 0; // Acorn column is 0-based
186
+ const formattedError = formatErrorWithSnippet(
187
+ `Syntax error: ${e.message}`,
188
+ code,
189
+ -1,
190
+ line,
191
+ column
192
+ );
193
+ return { valid: false, errors: [formattedError] };
108
194
  }
109
195
 
196
+ // Helper to add error with code snippet
197
+ const addError = (message, position) => {
198
+ errors.push(formatErrorWithSnippet(message, code, position));
199
+ };
200
+
110
201
  // Step 2: Walk every node and validate
111
202
  walk.full(ast, (node) => {
112
203
  // Check node type against whitelist
113
204
  if (!ALLOWED_NODE_TYPES.has(node.type)) {
114
- errors.push(`Blocked node type: ${node.type} at position ${node.start}`);
205
+ addError(`Blocked node type: ${node.type}`, node.start);
115
206
  return;
116
207
  }
117
208
 
@@ -121,7 +212,7 @@ export function validateDSL(code) {
121
212
  node.type === 'FunctionExpression') &&
122
213
  node.async
123
214
  ) {
124
- errors.push(`Async functions are not allowed at position ${node.start}. Write synchronous code — the runtime handles async.`);
215
+ addError(`Async functions are not allowed. Write synchronous code — the runtime handles async.`, node.start);
125
216
  }
126
217
 
127
218
  // Block generator functions
@@ -129,19 +220,19 @@ export function validateDSL(code) {
129
220
  (node.type === 'FunctionExpression') &&
130
221
  node.generator
131
222
  ) {
132
- errors.push(`Generator functions are not allowed at position ${node.start}`);
223
+ addError(`Generator functions are not allowed`, node.start);
133
224
  }
134
225
 
135
226
 
136
227
  // Check identifiers against blocklist
137
228
  if (node.type === 'Identifier' && BLOCKED_IDENTIFIERS.has(node.name)) {
138
- errors.push(`Blocked identifier: '${node.name}' at position ${node.start}`);
229
+ addError(`Blocked identifier: '${node.name}'`, node.start);
139
230
  }
140
231
 
141
232
  // Check member expressions for blocked properties
142
233
  if (node.type === 'MemberExpression' && !node.computed) {
143
234
  if (node.property.type === 'Identifier' && BLOCKED_PROPERTIES.has(node.property.name)) {
144
- errors.push(`Blocked property access: '.${node.property.name}' at position ${node.property.start}`);
235
+ addError(`Blocked property access: '.${node.property.name}'`, node.property.start);
145
236
  }
146
237
  }
147
238
 
@@ -149,7 +240,7 @@ export function validateDSL(code) {
149
240
  if (node.type === 'MemberExpression' && node.computed) {
150
241
  if (node.property.type === 'Literal' && typeof node.property.value === 'string') {
151
242
  if (BLOCKED_PROPERTIES.has(node.property.value) || BLOCKED_IDENTIFIERS.has(node.property.value)) {
152
- errors.push(`Blocked computed property access: '["${node.property.value}"]' at position ${node.property.start}`);
243
+ addError(`Blocked computed property access: '["${node.property.value}"]'`, node.property.start);
153
244
  }
154
245
  }
155
246
  }
@@ -157,7 +248,7 @@ export function validateDSL(code) {
157
248
  // Block variable declarations named with blocked identifiers
158
249
  if (node.type === 'VariableDeclarator' && node.id.type === 'Identifier') {
159
250
  if (BLOCKED_IDENTIFIERS.has(node.id.name)) {
160
- errors.push(`Cannot declare variable with blocked name: '${node.id.name}' at position ${node.id.start}`);
251
+ addError(`Cannot declare variable with blocked name: '${node.id.name}'`, node.id.start);
161
252
  }
162
253
  }
163
254
  });
@@ -16,8 +16,42 @@ import { glob } from 'glob';
16
16
 
17
17
  export { executePlanSchema };
18
18
 
19
+ /**
20
+ * Decode common HTML entities that LLMs sometimes produce when generating code.
21
+ * This handles entities like &amp;&amp; → &&, &lt;= → <=, etc.
22
+ */
23
+ function decodeHtmlEntities(str) {
24
+ const entities = {
25
+ '&amp;': '&',
26
+ '&lt;': '<',
27
+ '&gt;': '>',
28
+ '&quot;': '"',
29
+ '&apos;': "'",
30
+ '&#39;': "'",
31
+ '&#x27;': "'",
32
+ };
33
+
34
+ // Replace named/common entities
35
+ let result = str.replace(/&(?:amp|lt|gt|quot|apos|#39|#x27);/gi, (match) => {
36
+ return entities[match.toLowerCase()] || match;
37
+ });
38
+
39
+ // Handle numeric entities (decimal): &#60; → <
40
+ result = result.replace(/&#(\d+);/g, (match, dec) => {
41
+ return String.fromCharCode(parseInt(dec, 10));
42
+ });
43
+
44
+ // Handle numeric entities (hex): &#x3C; → <
45
+ result = result.replace(/&#x([0-9a-f]+);/gi, (match, hex) => {
46
+ return String.fromCharCode(parseInt(hex, 16));
47
+ });
48
+
49
+ return result;
50
+ }
51
+
19
52
  /**
20
53
  * Strip markdown fences and XML tags that LLMs sometimes wrap code in.
54
+ * Also decodes HTML entities that may appear in XML-extracted code.
21
55
  */
22
56
  function stripCodeWrapping(code) {
23
57
  let s = String(code || '');
@@ -25,6 +59,8 @@ function stripCodeWrapping(code) {
25
59
  s = s.replace(/^```(?:javascript|js)?\n?/gm, '').replace(/```$/gm, '');
26
60
  // Strip XML-style tags: <execute_plan>, </execute_plan>, <code>, </code>
27
61
  s = s.replace(/<\/?(?:execute_plan|code)>/g, '');
62
+ // Decode HTML entities (e.g., &amp;&amp; → &&, &lt;= → <=)
63
+ s = decodeHtmlEntities(s);
28
64
  return s.trim();
29
65
  }
30
66
 
@@ -384,6 +420,51 @@ RULES REMINDER:
384
420
  });
385
421
  }
386
422
 
423
+ // Delimiters for raw output passthrough - prevents LLM from processing/hallucinating large structured output
424
+ export const RAW_OUTPUT_START = '<<<RAW_OUTPUT>>>';
425
+ export const RAW_OUTPUT_END = '<<<END_RAW_OUTPUT>>>';
426
+
427
+ /**
428
+ * Extract raw output blocks from tool result content and pass them through to the output buffer.
429
+ * This prevents parent LLMs from processing/hallucinating large structured output.
430
+ *
431
+ * @param {string} content - The tool result content
432
+ * @param {Object} [outputBuffer] - The output buffer to append extracted content to
433
+ * @returns {{ cleanedContent: string, extractedBlocks: string[] }} - Content with blocks removed and extracted blocks
434
+ */
435
+ export function extractRawOutputBlocks(content, outputBuffer = null) {
436
+ if (typeof content !== 'string') {
437
+ return { cleanedContent: content, extractedBlocks: [] };
438
+ }
439
+
440
+ const extractedBlocks = [];
441
+ const regex = new RegExp(`${RAW_OUTPUT_START}\\n([\\s\\S]*?)\\n${RAW_OUTPUT_END}`, 'g');
442
+
443
+ let cleanedContent = content;
444
+ let match;
445
+
446
+ // Extract all blocks
447
+ while ((match = regex.exec(content)) !== null) {
448
+ extractedBlocks.push(match[1]);
449
+ }
450
+
451
+ // Remove the blocks and any following instruction line from content
452
+ cleanedContent = content
453
+ .replace(new RegExp(`${RAW_OUTPUT_START}\\n[\\s\\S]*?\\n${RAW_OUTPUT_END}`, 'g'), '')
454
+ .replace(/\n\n\[The above raw output \(\d+ chars\) will be passed directly to the final response\. Do NOT repeat, summarize, or modify it\.\]/g, '')
455
+ .trim();
456
+
457
+ // If output buffer provided, append extracted content
458
+ if (outputBuffer && extractedBlocks.length > 0) {
459
+ for (const block of extractedBlocks) {
460
+ outputBuffer.items = outputBuffer.items || [];
461
+ outputBuffer.items.push(block);
462
+ }
463
+ }
464
+
465
+ return { cleanedContent, extractedBlocks };
466
+ }
467
+
387
468
  function formatSuccess(result, description, attempt, outputBuffer) {
388
469
  let output = '';
389
470
 
@@ -416,10 +497,12 @@ function formatSuccess(result, description, attempt, outputBuffer) {
416
497
  }
417
498
  }
418
499
 
419
- // If output buffer has content, tell the LLM the data was written to direct output
500
+ // If output buffer has content, wrap it in delimiters for passthrough
501
+ // This prevents parent LLMs from processing/hallucinating the raw data
420
502
  if (outputBuffer && outputBuffer.items && outputBuffer.items.length > 0) {
421
- const totalChars = outputBuffer.items.reduce((sum, item) => sum + item.length, 0);
422
- output += `\n\n[Output buffer: ${totalChars} chars written via output(). This content will be appended directly to your response. Do NOT repeat or summarize it.]`;
503
+ const rawContent = outputBuffer.items.join('\n');
504
+ output += `\n\n${RAW_OUTPUT_START}\n${rawContent}\n${RAW_OUTPUT_END}`;
505
+ output += `\n\n[The above raw output (${rawContent.length} chars) will be passed directly to the final response. Do NOT repeat, summarize, or modify it.]`;
423
506
  }
424
507
 
425
508
  return output;