@probelabs/probe 0.6.0-rc252 → 0.6.0-rc254

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -165,6 +165,39 @@ export function decodeHtmlEntities(text) {
165
165
  return decoded;
166
166
  }
167
167
 
168
+ /**
169
+ * Sanitize Markdown escape sequences in JSON strings
170
+ *
171
+ * Markdown uses backslash escapes like \*, \_, \#, \~ etc. which are NOT valid
172
+ * JSON escape sequences. When AI models produce JSON with Markdown content,
173
+ * these escapes cause JSON.parse() to fail with "Invalid \escape" errors.
174
+ *
175
+ * This function removes the backslash from invalid escape sequences while
176
+ * preserving valid JSON escapes: \\, \", \/, \b, \f, \n, \r, \t, \uXXXX
177
+ *
178
+ * @param {string} jsonString - JSON string that may contain Markdown escapes
179
+ * @returns {string} - JSON string with invalid escapes sanitized
180
+ */
181
+ export function sanitizeMarkdownEscapesInJson(jsonString) {
182
+ if (!jsonString || typeof jsonString !== 'string') {
183
+ return jsonString;
184
+ }
185
+
186
+ // Strategy: Match either:
187
+ // 1. \\\\ (escaped backslash) - preserve as-is
188
+ // 2. \\X where X is NOT a valid JSON escape char - remove the backslash
189
+ //
190
+ // Valid JSON escape chars: " \ / b f n r t u
191
+ // This converts: \* → *, \_ → _, \# → #, \~ → ~, etc.
192
+ // But preserves: \\, \", \n, \t, \r, \b, \f, \/, \uXXXX
193
+ return jsonString.replace(/\\\\|\\([^"\\\/bfnrtu])/g, (match, captured) => {
194
+ if (match === '\\\\') {
195
+ return '\\\\'; // Preserve escaped backslash
196
+ }
197
+ return captured; // Remove backslash from invalid escape
198
+ });
199
+ }
200
+
168
201
  /**
169
202
  * Normalize JavaScript syntax to valid JSON syntax
170
203
  * Converts single quotes to double quotes for strings in JSON-like structures
@@ -261,6 +294,22 @@ export function cleanSchemaResponse(response) {
261
294
  return cleanSchemaResponse(resultWrapperMatch[1]);
262
295
  }
263
296
 
297
+ // Strip <tool_code>...</tool_code> wrapper (Gemini-style code execution format)
298
+ // Issue #443: Gemini sometimes wraps responses in <plan> + <tool_code> tags
299
+ // e.g., <tool_code>print(attempt_completion({"projects": ["repo1"]}))</tool_code>
300
+ const toolCodeMatch = trimmed.match(/<tool_code>\s*([\s\S]*?)\s*<\/tool_code>/);
301
+ if (toolCodeMatch) {
302
+ let innerContent = toolCodeMatch[1].trim();
303
+ // Extract JSON from print() or attempt_completion() wrappers
304
+ // e.g., print({"key": "value"}) or attempt_completion({"key": "value"})
305
+ const funcCallMatch = innerContent.match(/(?:print|attempt_completion)\s*\(\s*([{\[][\s\S]*[}\]])\s*\)/);
306
+ if (funcCallMatch) {
307
+ return cleanSchemaResponse(funcCallMatch[1]);
308
+ }
309
+ // Try cleaning the inner content directly
310
+ return cleanSchemaResponse(innerContent);
311
+ }
312
+
264
313
  // First, look for JSON after code block markers - similar to mermaid extraction
265
314
  // Try with json language specifier
266
315
  const jsonBlockMatch = trimmed.match(/```json\s*\n([\s\S]*?)\n```/);
@@ -370,9 +419,30 @@ export function validateJsonResponse(response, options = {}) {
370
419
  }
371
420
  }
372
421
 
422
+ // Try to parse the response, with fallback to sanitizing Markdown escapes (issue #441)
423
+ let responseToValidate = response;
424
+ try {
425
+ JSON.parse(response);
426
+ } catch (initialError) {
427
+ // Check if the error is due to invalid escape sequences (Markdown escapes like \*, \_)
428
+ if (initialError.message && initialError.message.includes('escape')) {
429
+ const sanitized = sanitizeMarkdownEscapesInJson(response);
430
+ try {
431
+ JSON.parse(sanitized);
432
+ // Sanitized version parses - use it instead
433
+ responseToValidate = sanitized;
434
+ if (debug) {
435
+ console.log(`[DEBUG] JSON validation: Fixed Markdown escapes in JSON (issue #441)`);
436
+ }
437
+ } catch {
438
+ // Sanitization didn't help, continue with original (will fail below with proper error)
439
+ }
440
+ }
441
+ }
442
+
373
443
  try {
374
444
  const parseStart = Date.now();
375
- const parsed = JSON.parse(response);
445
+ const parsed = JSON.parse(responseToValidate);
376
446
  const parseTime = Date.now() - parseStart;
377
447
 
378
448
  if (debug) {
@@ -853,7 +923,26 @@ export function tryAutoWrapForSimpleSchema(response, schema, options = {}) {
853
923
  console.log(`[DEBUG] Auto-wrap: Response is already valid JSON, skipping`);
854
924
  }
855
925
  return null;
856
- } catch {
926
+ } catch (initialError) {
927
+ // Not valid JSON - check if it's due to Markdown escapes (issue #441)
928
+ // AI models sometimes produce JSON with Markdown escapes like \* or \_
929
+ // which are valid Markdown but NOT valid JSON escape sequences
930
+ if (initialError.message && initialError.message.includes('escape')) {
931
+ try {
932
+ const sanitized = sanitizeMarkdownEscapesInJson(response);
933
+ JSON.parse(sanitized);
934
+ // Sanitized JSON is valid! Return it instead of wrapping
935
+ if (debug) {
936
+ console.log(`[DEBUG] Auto-wrap: Fixed Markdown escapes in JSON (issue #441), returning sanitized JSON`);
937
+ }
938
+ return sanitized;
939
+ } catch {
940
+ // Sanitization didn't help, proceed with wrapping
941
+ if (debug) {
942
+ console.log(`[DEBUG] Auto-wrap: Markdown escape sanitization didn't fix JSON, proceeding with wrapping`);
943
+ }
944
+ }
945
+ }
857
946
  // Not valid JSON, proceed with wrapping
858
947
  }
859
948
 
@@ -45,12 +45,38 @@ export function removeThinkingTags(xmlString) {
45
45
 
46
46
  /**
47
47
  * Extract thinking content for potential logging
48
+ * Handles nested thinking tags by recursively stripping inner tags.
48
49
  * @param {string} xmlString - The XML string to extract from
49
- * @returns {string|null} - Thinking content or null if not found
50
+ * @returns {string|null} - Thinking content (cleaned of nested tags) or null if not found
50
51
  */
51
52
  export function extractThinkingContent(xmlString) {
52
53
  const thinkingMatch = xmlString.match(/<thinking>([\s\S]*?)<\/thinking>/);
53
- return thinkingMatch ? thinkingMatch[1].trim() : null;
54
+ if (!thinkingMatch) {
55
+ return null;
56
+ }
57
+
58
+ let content = thinkingMatch[1].trim();
59
+
60
+ // Handle nested thinking tags: if the extracted content itself starts with <thinking>,
61
+ // recursively extract from it until we get clean content.
62
+ // This handles: <thinking><thinking>content</thinking></thinking>
63
+ // where non-greedy match captures "<thinking>content" (issue #439)
64
+ while (content.startsWith('<thinking>')) {
65
+ const innerMatch = content.match(/<thinking>([\s\S]*?)<\/thinking>/);
66
+ if (innerMatch) {
67
+ content = innerMatch[1].trim();
68
+ } else {
69
+ // Unclosed inner <thinking> tag - strip the opening tag and use remaining content
70
+ // e.g., "<thinking>content" becomes "content"
71
+ content = content.substring('<thinking>'.length).trim();
72
+ break;
73
+ }
74
+ }
75
+
76
+ // Also strip any remaining thinking tags that might be embedded in the content
77
+ content = content.replace(/<\/?thinking>/g, '').trim();
78
+
79
+ return content || null;
54
80
  }
55
81
 
56
82
  /**
@@ -436,6 +436,7 @@ ${lastError}
436
436
 
437
437
  RULES REMINDER:
438
438
  - search(query) is KEYWORD SEARCH — pass a search query, NOT a filename. Use extract(filepath) to read file contents.
439
+ - search(query, path) — the path argument must be a STRING, not an object. Use field.file_path, not field.
439
440
  - search() returns up to 20K tokens by default. Use search(query, path, {maxTokens: null}) for unlimited, or searchAll(query) to auto-paginate ALL results.
440
441
  - search(), searchAll(), query(), extract(), listFiles(), bash() all return STRINGS, not arrays.
441
442
  - Use chunk(stringData) to split a string into an array of chunks.
@@ -444,7 +445,8 @@ RULES REMINDER:
444
445
  - Do NOT define helper functions that call tools — write logic inline.
445
446
  - Do NOT use async/await, template literals, or shorthand properties.
446
447
  - Do NOT use regex literals (/pattern/) — use String methods like indexOf, includes, startsWith instead.
447
- - String concatenation with +, not template literals.`;
448
+ - String concatenation with +, not template literals.
449
+ - IMPORTANT: If a tool returns "ERROR: ...", do NOT pass that error string to LLM() — handle or skip it.`;
448
450
 
449
451
  const fixedCode = await llmCallFn(fixPrompt, '', { maxTokens: 4000, temperature: 0.2 });
450
452
  // Strip markdown fences and XML tags the LLM might add