@probelabs/probe 0.6.0-rc284 → 0.6.0-rc285

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. package/bin/binaries/{probe-v0.6.0-rc284-aarch64-apple-darwin.tar.gz → probe-v0.6.0-rc285-aarch64-apple-darwin.tar.gz} +0 -0
  2. package/bin/binaries/probe-v0.6.0-rc285-aarch64-unknown-linux-musl.tar.gz +0 -0
  3. package/bin/binaries/probe-v0.6.0-rc285-x86_64-apple-darwin.tar.gz +0 -0
  4. package/bin/binaries/probe-v0.6.0-rc285-x86_64-pc-windows-msvc.zip +0 -0
  5. package/bin/binaries/probe-v0.6.0-rc285-x86_64-unknown-linux-musl.tar.gz +0 -0
  6. package/build/agent/ProbeAgent.d.ts +1 -1
  7. package/build/agent/ProbeAgent.js +333 -486
  8. package/build/agent/contextCompactor.js +17 -10
  9. package/build/agent/index.js +301 -702
  10. package/build/agent/schemaUtils.js +10 -11
  11. package/build/agent/shared/prompts.js +2 -2
  12. package/build/agent/tasks/taskTool.js +3 -3
  13. package/build/agent/tools.js +0 -2
  14. package/build/index.js +0 -2
  15. package/build/tools/analyzeAll.js +4 -4
  16. package/build/tools/common.js +55 -55
  17. package/build/tools/index.js +0 -1
  18. package/build/tools/vercel.js +3 -3
  19. package/cjs/agent/ProbeAgent.cjs +292 -758
  20. package/cjs/index.cjs +293 -814
  21. package/package.json +1 -1
  22. package/src/agent/ProbeAgent.d.ts +1 -1
  23. package/src/agent/ProbeAgent.js +333 -486
  24. package/src/agent/contextCompactor.js +17 -10
  25. package/src/agent/index.js +8 -2
  26. package/src/agent/schemaUtils.js +10 -11
  27. package/src/agent/shared/prompts.js +2 -2
  28. package/src/agent/tasks/taskTool.js +3 -3
  29. package/src/agent/tools.js +0 -2
  30. package/src/index.js +0 -2
  31. package/src/tools/analyzeAll.js +4 -4
  32. package/src/tools/common.js +55 -55
  33. package/src/tools/index.js +0 -1
  34. package/src/tools/vercel.js +3 -3
  35. package/bin/binaries/probe-v0.6.0-rc284-aarch64-unknown-linux-musl.tar.gz +0 -0
  36. package/bin/binaries/probe-v0.6.0-rc284-x86_64-apple-darwin.tar.gz +0 -0
  37. package/bin/binaries/probe-v0.6.0-rc284-x86_64-pc-windows-msvc.zip +0 -0
  38. package/bin/binaries/probe-v0.6.0-rc284-x86_64-unknown-linux-musl.tar.gz +0 -0
@@ -31,7 +31,7 @@ import { createAnthropic } from '@ai-sdk/anthropic';
31
31
  import { createOpenAI } from '@ai-sdk/openai';
32
32
  import { createGoogleGenerativeAI } from '@ai-sdk/google';
33
33
  import { createAmazonBedrock } from '@ai-sdk/amazon-bedrock';
34
- import { streamText, tool, stepCountIs, jsonSchema } from 'ai';
34
+ import { streamText, tool, stepCountIs, jsonSchema, Output } from 'ai';
35
35
  import { randomUUID } from 'crypto';
36
36
  import { EventEmitter } from 'events';
37
37
  import { existsSync } from 'fs';
@@ -43,7 +43,6 @@ import { HookManager, HOOK_TYPES } from './hooks/HookManager.js';
43
43
  import { SUPPORTED_IMAGE_EXTENSIONS, IMAGE_MIME_TYPES, isFormatSupportedByProvider } from './imageConfig.js';
44
44
  import {
45
45
  createTools,
46
- attemptCompletionSchema,
47
46
  searchSchema,
48
47
  querySchema,
49
48
  extractSchema,
@@ -61,7 +60,7 @@ import {
61
60
  listSkillsSchema,
62
61
  useSkillSchema
63
62
  } from './tools.js';
64
- import { createMessagePreview } from '../tools/common.js';
63
+ import { createMessagePreview, detectStuckResponse } from '../tools/common.js';
65
64
  import { taskSchema } from './tasks/taskTool.js';
66
65
  import { FileTracker } from '../tools/fileTracker.js';
67
66
  import {
@@ -140,8 +139,8 @@ export function debugTruncate(s, limit = 200) {
140
139
  export function debugLogToolResults(toolResults) {
141
140
  if (!toolResults || toolResults.length === 0) return;
142
141
  for (const tr of toolResults) {
143
- const argsStr = JSON.stringify(tr.args || {});
144
- const resultStr = typeof tr.result === 'string' ? tr.result : JSON.stringify(tr.result || '');
142
+ const argsStr = tr.args != null ? JSON.stringify(tr.args) : '<no args>';
143
+ const resultStr = tr.result != null ? (typeof tr.result === 'string' ? tr.result : JSON.stringify(tr.result)) : '<no result>';
145
144
  console.log(`[DEBUG] tool: ${tr.toolName} | args: ${debugTruncate(argsStr)} | result: ${debugTruncate(resultStr)}`);
146
145
  }
147
146
  }
@@ -196,7 +195,7 @@ export class ProbeAgent {
196
195
  * @param {Array<Object>} [options.fallback.providers] - List of provider configurations for custom fallback
197
196
  * @param {boolean} [options.fallback.stopOnSuccess=true] - Stop on first success
198
197
  * @param {number} [options.fallback.maxTotalAttempts=10] - Maximum total attempts across all providers
199
- * @param {string} [options.completionPrompt] - Custom prompt to run after attempt_completion for validation/review (runs before mermaid/JSON validation)
198
+ * @param {string} [options.completionPrompt] - Custom prompt to run after completion for validation/review (runs before mermaid/JSON validation)
200
199
  * @param {number} [options.maxOutputTokens] - Maximum tokens for tool output before truncation (default: 20000, can also be set via PROBE_MAX_OUTPUT_TOKENS env var)
201
200
  * @param {number} [options.requestTimeout] - Timeout in ms for AI requests (default: 120000 or REQUEST_TIMEOUT env var). Used to abort hung requests.
202
201
  * @param {number} [options.maxOperationTimeout] - Maximum timeout in ms for the entire operation including all retries and fallbacks (default: 300000 or MAX_OPERATION_TIMEOUT env var). This is the absolute maximum time for streamTextWithRetryAndFallback.
@@ -1682,12 +1681,11 @@ export class ProbeAgent {
1682
1681
  * - Delegate tool param injection
1683
1682
  *
1684
1683
  * @param {Object} options - Options from the answer() call
1685
- * @param {Function} onComplete - Callback when attempt_completion is called (receives result string)
1686
1684
  * @param {Object} context - Execution context { maxIterations, currentMessages }
1687
1685
  * @returns {Object} Tools object for streamText()
1688
1686
  * @private
1689
1687
  */
1690
- _buildNativeTools(options, onComplete, context = {}) {
1688
+ _buildNativeTools(options, context = {}) {
1691
1689
  const { maxIterations = 30 } = context;
1692
1690
  const nativeTools = {};
1693
1691
  const isToolAllowed = (toolName) => this.allowedTools.isEnabled(toolName);
@@ -1874,18 +1872,8 @@ export class ProbeAgent {
1874
1872
  });
1875
1873
  };
1876
1874
 
1877
- // Only include attempt_completion when _disableTools is set
1875
+ // When _disableTools is set, provide no tools — the model responds with text directly
1878
1876
  if (options._disableTools) {
1879
- nativeTools.attempt_completion = tool({
1880
- description: 'Signal task completion and provide the final result to the user',
1881
- inputSchema: z.object({
1882
- result: z.string().describe('The final result to present to the user')
1883
- }),
1884
- execute: async ({ result }) => {
1885
- onComplete(result);
1886
- return result;
1887
- }
1888
- });
1889
1877
  return nativeTools;
1890
1878
  }
1891
1879
 
@@ -1902,33 +1890,6 @@ export class ProbeAgent {
1902
1890
  }
1903
1891
  }
1904
1892
 
1905
- // Always add attempt_completion
1906
- nativeTools.attempt_completion = tool({
1907
- description: 'Signal task completion and provide the final result to the user',
1908
- inputSchema: z.object({
1909
- result: z.string().describe('The final result to present to the user')
1910
- }),
1911
- execute: async ({ result }) => {
1912
- // Task completion blocking
1913
- if (this.enableTasks && this.taskManager && this.taskManager.hasIncompleteTasks()) {
1914
- const incompleteTasks = this.taskManager.getIncompleteTasks();
1915
- const highIterationCount = (context.currentIteration || 0) > maxIterations * 0.7;
1916
-
1917
- if (!highIterationCount) {
1918
- const taskSummary = this.taskManager.getTaskSummary();
1919
- const blockedMessage = createTaskCompletionBlockedMessage(taskSummary);
1920
- if (this.debug) {
1921
- console.log('[DEBUG] Task checkpoint: Blocking completion due to incomplete tasks');
1922
- }
1923
- return blockedMessage;
1924
- }
1925
- }
1926
-
1927
- onComplete(result);
1928
- return result;
1929
- }
1930
- });
1931
-
1932
1893
  // Add MCP tools if available
1933
1894
  if (this.mcpBridge && !options._disableTools) {
1934
1895
  const mcpTools = this.mcpBridge.getVercelTools(this._filterMcpTools(this.mcpBridge.getToolNames()));
@@ -3017,7 +2978,7 @@ Follow these instructions carefully:
3017
2978
  2. Use the available tools step-by-step to fulfill the request.
3018
2979
  3. You should always prefer the search tool for code-related questions.${this.searchDelegate ? ' Ask natural language questions — the search subagent handles keyword formulation and returns extracted code blocks. Use extract only to expand context or read full files.' : ' Search handles stemming and case variations automatically — do NOT try keyword variations manually. Read full files only if really necessary.'}
3019
2980
  4. Ensure to get really deep and understand the full picture before answering.
3020
- 5. Once the task is fully completed, use the attempt_completion tool to provide the final result.
2981
+ 5. Once the task is fully completed, provide your final answer directly as text.
3021
2982
  6. ${this.searchDelegate ? 'Ask clear, specific questions when searching. Each search should target a distinct concept or question.' : 'Prefer concise and focused search queries. Use specific keywords and phrases to narrow down results.'}
3022
2983
  7. NEVER use bash for code exploration (no grep, cat, find, head, tail, awk, sed) — always use search and extract tools instead. Bash is only for system operations like building, running tests, or git commands.${this.allowEdit ? `
3023
2984
  7. When modifying files, choose the appropriate tool:
@@ -3224,7 +3185,8 @@ Follow these instructions carefully:
3224
3185
  let userMessage = { role: 'user', content: message.trim() };
3225
3186
 
3226
3187
  // If schema is provided, prepend JSON format requirement to user message
3227
- if (options.schema && !options._schemaFormatted) {
3188
+ // Skip when _disableTools is set — native Output.object() handles schema constraint
3189
+ if (options.schema && !options._schemaFormatted && !options._disableTools) {
3228
3190
  const schemaInstructions = generateSchemaInstructions(options.schema, { debug: this.debug });
3229
3191
  userMessage.content = message.trim() + schemaInstructions;
3230
3192
  }
@@ -3266,7 +3228,6 @@ Follow these instructions carefully:
3266
3228
  }
3267
3229
 
3268
3230
  let currentIteration = 0;
3269
- let completionAttempted = false;
3270
3231
  let finalResult = 'I was unable to complete your request due to reaching the maximum number of tool iterations.';
3271
3232
 
3272
3233
  // Adjust max iterations if schema is provided
@@ -3432,13 +3393,9 @@ Follow these instructions carefully:
3432
3393
  // Iteration counter for telemetry
3433
3394
 
3434
3395
  // Native tool calling via Vercel AI SDK streamText + maxSteps
3435
- let completionResult = null;
3436
3396
  const toolContext = { maxIterations, currentIteration: 0, currentMessages };
3437
3397
 
3438
- const tools = this._buildNativeTools(options, (result) => {
3439
- completionResult = result;
3440
- completionAttempted = true;
3441
- }, toolContext);
3398
+ const tools = this._buildNativeTools(options, toolContext);
3442
3399
 
3443
3400
  if (this.debug) {
3444
3401
  const toolNames = Object.keys(tools);
@@ -3455,6 +3412,10 @@ Follow these instructions carefully:
3455
3412
  }
3456
3413
  }
3457
3414
 
3415
+ // Track whether completionPrompt has been injected into the loop
3416
+ let completionPromptInjected = false;
3417
+ let preCompletionResult = null; // Stores the result before completionPrompt for fallback
3418
+
3458
3419
  // Context compaction retry loop
3459
3420
  let compactionAttempted = false;
3460
3421
  while (true) {
@@ -3465,21 +3426,133 @@ Follow these instructions carefully:
3465
3426
  model: this.provider ? this.provider(this.model) : this.model,
3466
3427
  messages: messagesForAI,
3467
3428
  tools,
3468
- stopWhen: stepCountIs(maxIterations),
3429
+ stopWhen: ({ steps }) => {
3430
+ // Hard limit
3431
+ if (steps.length >= maxIterations) return true;
3432
+
3433
+ const lastStep = steps[steps.length - 1];
3434
+ const modelWantsToStop = lastStep?.finishReason === 'stop'
3435
+ && (!lastStep?.toolCalls || lastStep.toolCalls.length === 0);
3436
+
3437
+ if (modelWantsToStop) {
3438
+ // Task blocking: force continuation when tasks are incomplete
3439
+ if (this.enableTasks && this.taskManager?.hasIncompleteTasks()) {
3440
+ const highIterationCount = steps.length > maxIterations * 0.7;
3441
+ if (!highIterationCount) return false; // Force continuation
3442
+ }
3443
+
3444
+ // Completion prompt: force one more round for review
3445
+ if (this.completionPrompt && !options._completionPromptProcessed && !completionPromptInjected) {
3446
+ // Save the pre-completion result for fallback
3447
+ preCompletionResult = lastStep.text || null;
3448
+ return false; // Force continuation — prepareStep will inject the prompt
3449
+ }
3450
+ }
3451
+
3452
+ // Circuit breaker: consecutive no-tool steps
3453
+ let trailingNoTool = 0;
3454
+ for (let i = steps.length - 1; i >= 0; i--) {
3455
+ if (!steps[i].toolCalls?.length) trailingNoTool++;
3456
+ else break;
3457
+ }
3458
+ if (trailingNoTool >= 5) return true;
3459
+
3460
+ // Circuit breaker: identical/stuck responses
3461
+ if (trailingNoTool >= 3) {
3462
+ const recentTexts = steps.slice(-3).map(s => s.text);
3463
+ if (recentTexts.every(t => t && t === recentTexts[0])) return true;
3464
+ if (recentTexts.every(t => detectStuckResponse(t))) return true;
3465
+ }
3466
+
3467
+ return false;
3468
+ },
3469
+ prepareStep: ({ steps, stepNumber }) => {
3470
+ // Last-iteration warning
3471
+ if (stepNumber === maxIterations - 1) {
3472
+ return {
3473
+ toolChoice: 'none',
3474
+ };
3475
+ }
3476
+
3477
+ const lastStep = steps[steps.length - 1];
3478
+ const modelJustStopped = lastStep?.finishReason === 'stop'
3479
+ && (!lastStep?.toolCalls || lastStep.toolCalls.length === 0);
3480
+
3481
+ if (modelJustStopped) {
3482
+ // Task blocking: inject reminder when tasks are incomplete
3483
+ if (this.enableTasks && this.taskManager?.hasIncompleteTasks()) {
3484
+ const taskSummary = this.taskManager.getTaskSummary();
3485
+ const blockedMessage = createTaskCompletionBlockedMessage(taskSummary);
3486
+ return {
3487
+ userMessage: blockedMessage
3488
+ };
3489
+ }
3490
+
3491
+ // Completion prompt: inject review message on first stop
3492
+ if (this.completionPrompt && !options._completionPromptProcessed && !completionPromptInjected) {
3493
+ completionPromptInjected = true;
3494
+ const resultToReview = lastStep.text || preCompletionResult || '';
3495
+
3496
+ if (this.debug) {
3497
+ console.log('[DEBUG] Injecting completion prompt into main loop via prepareStep...');
3498
+ }
3499
+
3500
+ if (this.tracer) {
3501
+ this.tracer.recordEvent('completion_prompt.started', {
3502
+ 'completion_prompt.original_result_length': resultToReview.length
3503
+ });
3504
+ }
3505
+
3506
+ const completionPromptMessage = `${this.completionPrompt}
3507
+
3508
+ Here is the result to review:
3509
+ <result>
3510
+ ${resultToReview}
3511
+ </result>
3512
+
3513
+ Double-check your response based on the criteria above. If everything looks good, respond with your previous answer exactly as-is. If something needs to be fixed or is missing, do it now, then respond with the COMPLETE updated answer (everything you did in total, not just the fix).`;
3514
+
3515
+ return {
3516
+ userMessage: completionPromptMessage
3517
+ };
3518
+ }
3519
+ }
3520
+
3521
+ return undefined;
3522
+ },
3469
3523
  maxTokens: maxResponseTokens,
3470
3524
  temperature: 0.3,
3471
- onStepFinish: ({ toolResults, text, finishReason, usage }) => {
3525
+ onStepFinish: (stepResult) => {
3526
+ const { toolResults, toolCalls, text, reasoningText, finishReason, usage } = stepResult;
3472
3527
  currentIteration++;
3473
3528
  toolContext.currentIteration = currentIteration;
3474
3529
 
3475
- // Record telemetry
3530
+ // Record telemetry — include model's reasoning and tool call details
3476
3531
  if (this.tracer) {
3477
- this.tracer.addEvent('iteration.step', {
3532
+ const stepEvent = {
3478
3533
  'iteration': currentIteration,
3479
3534
  'max_iterations': maxIterations,
3480
3535
  'finish_reason': finishReason,
3481
3536
  'has_tool_calls': !!(toolResults && toolResults.length > 0)
3482
- });
3537
+ };
3538
+ // Model's text output (its monologue explaining why it's calling tools)
3539
+ if (text) {
3540
+ stepEvent['ai.text'] = text.substring(0, 10000);
3541
+ stepEvent['ai.text.length'] = text.length;
3542
+ }
3543
+ // Model's internal reasoning/thinking tokens (if available)
3544
+ if (reasoningText) {
3545
+ stepEvent['ai.reasoning'] = reasoningText.substring(0, 10000);
3546
+ stepEvent['ai.reasoning.length'] = reasoningText.length;
3547
+ }
3548
+ // Tool call names and args for this step
3549
+ if (toolCalls && toolCalls.length > 0) {
3550
+ stepEvent['ai.tool_calls'] = toolCalls.map(tc => ({
3551
+ name: tc.toolName,
3552
+ args: JSON.stringify(tc.args || {}).substring(0, 2000)
3553
+ }));
3554
+ }
3555
+ this.tracer.addEvent('iteration.step', stepEvent);
3483
3556
  }
3484
3557
 
3485
3558
  // Record token usage
@@ -3494,11 +3567,38 @@ Follow these instructions carefully:
3494
3567
 
3495
3568
  if (this.debug) {
3496
3569
  console.log(`[DEBUG] Step ${currentIteration}/${maxIterations} finished (reason: ${finishReason}, tools: ${toolResults?.length || 0})`);
3570
+ if (text) {
3571
+ console.log(`[DEBUG] model text: ${debugTruncate(text)}`);
3572
+ }
3573
+ if (reasoningText) {
3574
+ console.log(`[DEBUG] reasoning: ${debugTruncate(reasoningText)}`);
3575
+ }
3497
3576
  debugLogToolResults(toolResults);
3498
3577
  }
3499
3578
  }
3500
3579
  };
3501
3580
 
3581
+ // Native JSON schema output — use model's built-in JSON schema constraint
3582
+ // when no tools are active (many providers like Gemini don't support
3583
+ // structured output + function calling simultaneously).
3584
+ // When tools ARE active, we rely on AJV post-validation + correction loop.
3585
+ const hasActiveTools = Object.keys(tools).length > 0;
3586
+ if (options.schema && !hasActiveTools) {
3587
+ try {
3588
+ const parsedSchema = typeof options.schema === 'string' ? JSON.parse(options.schema) : options.schema;
3589
+ if (isJsonSchema(options.schema)) {
3590
+ streamOptions.output = Output.object({ schema: jsonSchema(parsedSchema) });
3591
+ if (this.debug) {
3592
+ console.log(`[DEBUG] Native JSON schema output enabled (no active tools)`);
3593
+ }
3594
+ }
3595
+ } catch (e) {
3596
+ if (this.debug) {
3597
+ console.log(`[DEBUG] Failed to set native JSON schema output: ${e.message}`);
3598
+ }
3599
+ }
3600
+ }
3601
+
3502
3602
  // Add native thinking/reasoning providerOptions when thinkingEffort is set
3503
3603
  const providerOpts = this._buildThinkingProviderOptions(maxResponseTokens);
3504
3604
  if (providerOpts) {
@@ -3513,7 +3613,7 @@ Follow these instructions carefully:
3513
3613
 
3514
3614
  if (this.debug) {
3515
3615
  const steps = await result.steps;
3516
- console.log(`[DEBUG] streamText completed: ${steps?.length || 0} steps, finalText=${finalText?.length || 0} chars, completion=${!!completionResult}`);
3616
+ console.log(`[DEBUG] streamText completed: ${steps?.length || 0} steps, finalText=${finalText?.length || 0} chars`);
3517
3617
  }
3518
3618
 
3519
3619
  // Record final token usage
@@ -3545,21 +3645,26 @@ Follow these instructions carefully:
3545
3645
  aiResult = await executeAIRequest();
3546
3646
  }
3547
3647
 
3548
- // Use completion result if available, otherwise use final text
3549
- if (completionResult) {
3550
- finalResult = completionResult;
3551
-
3552
- // Stream the final result if callback is provided
3553
- if (options.onStream && finalResult) {
3554
- const chunkSize = 50;
3555
- for (let i = 0; i < finalResult.length; i += chunkSize) {
3556
- const chunk = finalResult.slice(i, Math.min(i + chunkSize, finalResult.length));
3557
- options.onStream(chunk);
3648
+ // Try native JSON schema output first Output.object() is set when no tools are active
3649
+ if (options.schema && streamOptions.output) {
3650
+ try {
3651
+ const outputObject = await aiResult.result.output;
3652
+ if (outputObject) {
3653
+ finalResult = JSON.stringify(outputObject);
3654
+ } else if (aiResult.finalText) {
3655
+ finalResult = aiResult.finalText;
3656
+ }
3657
+ } catch (e) {
3658
+ // NoObjectGeneratedError — fall back to text-based extraction
3659
+ if (this.debug) {
3660
+ console.log(`[DEBUG] Native JSON output failed, falling back to text: ${e.message}`);
3661
+ }
3662
+ if (aiResult.finalText) {
3663
+ finalResult = aiResult.finalText;
3558
3664
  }
3559
3665
  }
3560
3666
  } else if (aiResult.finalText) {
3561
3667
  finalResult = aiResult.finalText;
3562
- completionAttempted = true;
3563
3668
  }
3564
3669
 
3565
3670
  // Update currentMessages from the result for history storage
@@ -3572,6 +3677,94 @@ Follow these instructions carefully:
3572
3677
  }
3573
3678
  }
3574
3679
 
3680
+ // Post-streamText completionPrompt fallback:
3681
+ // The stopWhen/prepareStep mechanism only fires between tool-call steps.
3682
+ // If the model answered without tool calls (or its final step had none),
3683
+ // stopWhen never gets a chance to force continuation. In that case, run
3684
+ // a second streamText pass with the completion prompt injected.
3685
+ if (this.completionPrompt && !options._completionPromptProcessed && !completionPromptInjected && finalResult) {
3686
+ completionPromptInjected = true;
3687
+ preCompletionResult = finalResult;
3688
+
3689
+ if (this.debug) {
3690
+ console.log('[DEBUG] Injecting completion prompt as post-streamText follow-up pass...');
3691
+ }
3692
+
3693
+ if (this.tracer) {
3694
+ this.tracer.recordEvent('completion_prompt.started', {
3695
+ 'completion_prompt.original_result_length': finalResult.length
3696
+ });
3697
+ }
3698
+
3699
+ const completionPromptMessage = `${this.completionPrompt}
3700
+
3701
+ Here is the result to review:
3702
+ <result>
3703
+ ${finalResult}
3704
+ </result>
3705
+
3706
+ Double-check your response based on the criteria above. If everything looks good, respond with your previous answer exactly as-is. If something needs to be fixed or is missing, do it now, then respond with the COMPLETE updated answer (everything you did in total, not just the fix).`;
3707
+
3708
+ currentMessages.push({ role: 'user', content: completionPromptMessage });
3709
+
3710
+ const completionMaxIterations = 5;
3711
+ const completionStreamOptions = {
3712
+ model: this.provider ? this.provider(this.model) : this.model,
3713
+ messages: this.prepareMessagesWithImages(currentMessages),
3714
+ tools,
3715
+ stopWhen: stepCountIs(completionMaxIterations),
3716
+ maxTokens: maxResponseTokens,
3717
+ temperature: 0.3,
3718
+ onStepFinish: ({ toolResults, text, finishReason, usage }) => {
3719
+ if (usage) {
3720
+ this.tokenCounter.recordUsage(usage);
3721
+ }
3722
+ if (options.onStream && text) {
3723
+ options.onStream(text);
3724
+ }
3725
+ if (this.debug) {
3726
+ console.log(`[DEBUG] Completion prompt step finished (reason: ${finishReason}, tools: ${toolResults?.length || 0})`);
3727
+ }
3728
+ }
3729
+ };
3730
+
3731
+ const providerOpts = this._buildThinkingProviderOptions(maxResponseTokens);
3732
+ if (providerOpts) {
3733
+ completionStreamOptions.providerOptions = providerOpts;
3734
+ }
3735
+
3736
+ try {
3737
+ const cpResult = await this.streamTextWithRetryAndFallback(completionStreamOptions);
3738
+ const cpFinalText = await cpResult.text;
3739
+ const cpUsage = await cpResult.usage;
3740
+ if (cpUsage) {
3741
+ this.tokenCounter.recordUsage(cpUsage, cpResult.experimental_providerMetadata);
3742
+ }
3743
+
3744
+ // Append follow-up messages to conversation history
3745
+ const cpMessages = await cpResult.response?.messages;
3746
+ if (cpMessages) {
3747
+ for (const msg of cpMessages) {
3748
+ currentMessages.push(msg);
3749
+ }
3750
+ }
3751
+
3752
+ // Use updated result if non-empty, otherwise keep original
3753
+ if (cpFinalText && cpFinalText.trim().length > 0) {
3754
+ finalResult = cpFinalText;
3755
+ }
3756
+
3757
+ if (this.debug) {
3758
+ console.log(`[DEBUG] Completion prompt follow-up produced ${cpFinalText?.length || 0} chars (using ${cpFinalText && cpFinalText.trim().length > 0 ? 'updated' : 'original'} result)`);
3759
+ }
3760
+ } catch (cpError) {
3761
+ if (this.debug) {
3762
+ console.log(`[DEBUG] Completion prompt follow-up failed: ${cpError.message}, keeping original result`);
3763
+ }
3764
+ // Keep original result on failure
3765
+ }
3766
+ }
3767
+
3575
3768
  break; // Success
3576
3769
 
3577
3770
  } catch (error) {
@@ -3610,20 +3803,14 @@ Follow these instructions carefully:
3610
3803
  }
3611
3804
  }
3612
3805
 
3613
- // Handle AbortError from attempt_completion gracefully
3614
- if (completionResult) {
3615
- finalResult = completionResult;
3616
- break;
3617
- }
3618
-
3619
3806
  console.error(`Error during streamText:`, error);
3620
3807
  finalResult = `Error: Failed to get response from AI model. ${error.message}`;
3621
3808
  throw new Error(finalResult);
3622
3809
  }
3623
3810
  }
3624
3811
 
3625
- if (currentIteration >= maxIterations && !completionAttempted) {
3626
- console.warn(`[WARN] Max tool iterations (${maxIterations}) reached for session ${this.sessionId}. Returning current error state.`);
3812
+ if (currentIteration >= maxIterations) {
3813
+ console.warn(`[WARN] Max tool iterations (${maxIterations}) reached for session ${this.sessionId}.`);
3627
3814
  }
3628
3815
 
3629
3816
  // Store final history
@@ -3654,351 +3841,22 @@ Follow these instructions carefully:
3654
3841
  // Continue even if storage fails
3655
3842
  }
3656
3843
 
3657
- // Completion prompt handling - inject one more user message into the existing conversation
3658
- // This continues the SAME agentic session (same tools, same TaskManager, same history)
3659
- // rather than spawning a recursive this.answer() call which would reset state
3660
- if (completionAttempted && this.completionPrompt && !options._completionPromptProcessed) {
3661
- if (this.debug) {
3662
- console.log('[DEBUG] Running completion prompt as continuation of current session...');
3663
- }
3664
-
3665
- try {
3666
- const originalResult = finalResult;
3667
-
3668
- if (this.tracer) {
3669
- this.tracer.recordEvent('completion_prompt.started', {
3670
- 'completion_prompt.original_result_length': finalResult?.length || 0
3671
- });
3672
- }
3673
-
3674
- // Append completion prompt as a user message to the existing conversation
3675
- const completionPromptMessage = `${this.completionPrompt}
3676
-
3677
- Here is the result to review:
3678
- <result>
3679
- ${finalResult}
3680
- </result>
3681
-
3682
- Double-check your response based on the criteria above. If everything looks good, respond with your previous answer exactly as-is using attempt_completion. If something needs to be fixed or is missing, do it now, then respond with the COMPLETE updated answer (everything you did in total, not just the fix) using attempt_completion.`;
3683
-
3684
- currentMessages.push({ role: 'user', content: completionPromptMessage });
3685
-
3686
- // Reset completion tracking for the follow-up turn
3687
- completionResult = null;
3688
- completionAttempted = false;
3689
-
3690
- // Run one more streamText pass with the same tools and conversation context
3691
- // Give a small number of extra iterations for the follow-up
3692
- const completionMaxIterations = 5;
3693
- const completionStreamOptions = {
3694
- model: this.provider ? this.provider(this.model) : this.model,
3695
- messages: this.prepareMessagesWithImages(currentMessages),
3696
- tools,
3697
- stopWhen: stepCountIs(completionMaxIterations),
3698
- maxTokens: maxResponseTokens,
3699
- temperature: 0.3,
3700
- onStepFinish: ({ toolResults, text, finishReason, usage }) => {
3701
- if (usage) {
3702
- this.tokenCounter.recordUsage(usage);
3703
- }
3704
- if (options.onStream && text) {
3705
- options.onStream(text);
3706
- }
3707
- if (this.debug) {
3708
- console.log(`[DEBUG] Completion prompt step finished (reason: ${finishReason}, tools: ${toolResults?.length || 0})`);
3709
- debugLogToolResults(toolResults);
3710
- }
3711
- }
3712
- };
3713
-
3714
- const providerOpts = this._buildThinkingProviderOptions(maxResponseTokens);
3715
- if (providerOpts) {
3716
- completionStreamOptions.providerOptions = providerOpts;
3717
- }
3718
-
3719
- const cpResult = await this.streamTextWithRetryAndFallback(completionStreamOptions);
3720
- const cpFinalText = await cpResult.text;
3721
- const cpUsage = await cpResult.usage;
3722
- if (cpUsage) {
3723
- this.tokenCounter.recordUsage(cpUsage, cpResult.experimental_providerMetadata);
3724
- }
3725
-
3726
- // Append follow-up messages to conversation history
3727
- const cpMessages = await cpResult.response?.messages;
3728
- if (cpMessages) {
3729
- for (const msg of cpMessages) {
3730
- currentMessages.push(msg);
3731
- }
3732
- }
3733
-
3734
- // Use new completion result if the agent called attempt_completion again,
3735
- // otherwise keep the original result (the follow-up may have just done side-effects)
3736
- if (completionResult) {
3737
- finalResult = completionResult;
3738
- completionAttempted = true;
3739
- } else if (cpFinalText && cpFinalText.trim().length > 0) {
3740
- finalResult = cpFinalText;
3741
- completionAttempted = true;
3742
- } else {
3743
- // Follow-up produced nothing useful — keep the original
3744
- finalResult = originalResult;
3745
- completionAttempted = true;
3746
- if (this.debug) {
3747
- console.log('[DEBUG] Completion prompt returned empty result, keeping original.');
3748
- }
3749
- }
3750
-
3751
- if (this.debug) {
3752
- console.log(`[DEBUG] Completion prompt finished. Final result length: ${finalResult?.length || 0}`);
3753
- }
3754
-
3755
- if (this.tracer) {
3756
- this.tracer.recordEvent('completion_prompt.completed', {
3757
- 'completion_prompt.final_result_length': finalResult?.length || 0,
3758
- 'completion_prompt.used_original': finalResult === originalResult
3759
- });
3760
- }
3761
- } catch (error) {
3762
- console.error('[ERROR] Completion prompt failed:', error);
3763
- // Keep the original result if completion prompt fails
3764
- if (this.tracer) {
3765
- this.tracer.recordEvent('completion_prompt.error', {
3766
- 'completion_prompt.error': error.message
3767
- });
3768
- }
3769
- }
3844
+ // Log completion prompt telemetry if it was injected
3845
+ if (completionPromptInjected && this.tracer) {
3846
+ this.tracer.recordEvent('completion_prompt.completed', {
3847
+ 'completion_prompt.final_result_length': finalResult?.length || 0,
3848
+ 'completion_prompt.used_original': preCompletionResult && finalResult === preCompletionResult
3849
+ });
3770
3850
  }
3771
3851
 
3772
- // Schema handling - format response according to provided schema
3773
- // Skip schema processing if result came from attempt_completion tool
3774
- // Don't apply schema formatting if we failed due to max iterations
3775
- const reachedMaxIterations = currentIteration >= maxIterations && !completionAttempted;
3776
- if (options.schema && !options._schemaFormatted && !completionAttempted && !reachedMaxIterations) {
3777
- if (this.debug) {
3778
- console.log('[DEBUG] Schema provided, applying automatic formatting...');
3779
- }
3780
-
3781
- try {
3782
- // Step 1: Make a follow-up call to format according to schema
3783
- const schemaPrompt = `CRITICAL: You MUST respond with ONLY valid JSON DATA that conforms to this schema structure. DO NOT return the schema definition itself.
3784
-
3785
- Schema to follow (this is just the structure - provide ACTUAL DATA):
3786
- ${options.schema}
3787
-
3788
- REQUIREMENTS:
3789
- - Return ONLY the JSON object/array with REAL DATA that matches the schema structure
3790
- - DO NOT return the schema definition itself (no "$schema", "$id", "type", "properties", etc.)
3791
- - NO additional text, explanations, or markdown formatting
3792
- - NO code blocks or backticks
3793
- - The JSON must be parseable by JSON.parse()
3794
- - Fill in actual values that make sense based on your previous response content
3795
-
3796
- EXAMPLE:
3797
- If schema defines {type: "object", properties: {name: {type: "string"}, age: {type: "number"}}}
3798
- Return: {"name": "John Doe", "age": 25}
3799
- NOT: {"type": "object", "properties": {"name": {"type": "string"}}}
3800
-
3801
- Convert your previous response content into actual JSON data that follows this schema structure.`;
3802
-
3803
- // Call answer recursively with _schemaFormatted flag to prevent infinite loop
3804
- finalResult = await this.answer(schemaPrompt, [], {
3805
- ...options,
3806
- _schemaFormatted: true,
3807
- _completionPromptProcessed: true // Prevent cascading completion prompts in retry calls
3808
- });
3809
-
3810
- // Step 2: Validate and fix Mermaid diagrams if present (BEFORE cleaning schema)
3811
- // This ensures mermaid validation sees the full response before JSON extraction strips content
3812
- if (!this.disableMermaidValidation) {
3813
- try {
3814
- if (this.debug) {
3815
- console.log(`[DEBUG] Mermaid validation: Starting enhanced mermaid validation...`);
3816
- }
3817
-
3818
- // Record mermaid validation start in telemetry
3819
- if (this.tracer) {
3820
- this.tracer.recordMermaidValidationEvent('schema_processing_started', {
3821
- 'mermaid_validation.context': 'schema_processing',
3822
- 'mermaid_validation.response_length': finalResult.length
3823
- });
3824
- }
3825
-
3826
- const mermaidValidation = await validateAndFixMermaidResponse(finalResult, {
3827
- debug: this.debug,
3828
- path: this.workspaceRoot || this.allowedFolders[0],
3829
- provider: this.clientApiProvider,
3830
- model: this.model,
3831
- tracer: this.tracer
3832
- });
3833
-
3834
- if (mermaidValidation.wasFixed) {
3835
- finalResult = mermaidValidation.fixedResponse;
3836
- if (this.debug) {
3837
- console.log(`[DEBUG] Mermaid validation: Diagrams successfully fixed`);
3838
-
3839
- if (mermaidValidation.performanceMetrics) {
3840
- const metrics = mermaidValidation.performanceMetrics;
3841
- console.log(`[DEBUG] Mermaid validation: Performance - total: ${metrics.totalTimeMs}ms, AI fixing: ${metrics.aiFixingTimeMs}ms`);
3842
- console.log(`[DEBUG] Mermaid validation: Results - ${metrics.diagramsFixed}/${metrics.diagramsProcessed} diagrams fixed`);
3843
- }
3844
-
3845
- if (mermaidValidation.fixingResults) {
3846
- mermaidValidation.fixingResults.forEach((fixResult, index) => {
3847
- if (fixResult.wasFixed) {
3848
- const method = fixResult.fixedWithHtmlDecoding ? 'HTML entity decoding' : 'AI correction';
3849
- const time = fixResult.aiFixingTimeMs ? ` in ${fixResult.aiFixingTimeMs}ms` : '';
3850
- console.log(`[DEBUG] Mermaid validation: Fixed diagram ${fixResult.diagramIndex + 1} with ${method}${time}`);
3851
- console.log(`[DEBUG] Mermaid validation: Original error: ${fixResult.originalError}`);
3852
- } else {
3853
- console.log(`[DEBUG] Mermaid validation: Failed to fix diagram ${fixResult.diagramIndex + 1}: ${fixResult.fixingError}`);
3854
- }
3855
- });
3856
- }
3857
- }
3858
- } else if (this.debug) {
3859
- console.log(`[DEBUG] Mermaid validation: No fixes needed or fixes unsuccessful`);
3860
- if (mermaidValidation.diagrams?.length > 0) {
3861
- console.log(`[DEBUG] Mermaid validation: Found ${mermaidValidation.diagrams.length} diagrams, all valid: ${mermaidValidation.isValid}`);
3862
- }
3863
- }
3864
- } catch (error) {
3865
- if (this.debug) {
3866
- console.log(`[DEBUG] Mermaid validation: Process failed with error: ${error.message}`);
3867
- console.log(`[DEBUG] Mermaid validation: Stack trace: ${error.stack}`);
3868
- }
3869
- }
3870
- } else if (this.debug) {
3871
- console.log(`[DEBUG] Mermaid validation: Skipped due to disableMermaidValidation option`);
3872
- }
3873
-
3874
- // Step 3: Clean the response (remove code blocks, extract JSON)
3875
- // This happens AFTER mermaid validation to preserve full content for validation
3876
- finalResult = cleanSchemaResponse(finalResult);
3877
-
3878
- // Step 4: Validate and potentially correct JSON responses
3879
- if (isJsonSchema(options.schema)) {
3880
- if (this.debug) {
3881
- console.log(`[DEBUG] JSON validation: Starting validation process for schema response`);
3882
- console.log(`[DEBUG] JSON validation: Cleaned response length: ${finalResult.length} chars`);
3883
- }
3884
-
3885
- // Record JSON validation start in telemetry
3886
- if (this.tracer) {
3887
- this.tracer.recordJsonValidationEvent('started', {
3888
- 'json_validation.response_length': finalResult.length,
3889
- 'json_validation.schema_type': 'JSON'
3890
- });
3891
- }
3892
-
3893
- let validation = validateJsonResponse(finalResult, { debug: this.debug, schema: options.schema });
3894
- let retryCount = 0;
3895
- const maxRetries = 3;
3896
-
3897
- // First check if the response is valid JSON but is actually a schema definition
3898
- if (validation.isValid && isJsonSchemaDefinition(finalResult, { debug: this.debug })) {
3899
- if (this.debug) {
3900
- console.log(`[DEBUG] JSON validation: Response is a JSON schema definition instead of data, needs correction...`);
3901
- }
3902
- // Mark as invalid so it goes through the fixing process
3903
- validation = {
3904
- isValid: false,
3905
- error: 'Response is a JSON schema definition instead of actual data',
3906
- enhancedError: 'Response is a JSON schema definition instead of actual data. Please return data that conforms to the schema, not the schema itself.'
3907
- };
3908
- }
3909
-
3910
- // Use separate JsonFixingAgent for JSON corrections (isolates session like Mermaid fixing)
3911
- if (!validation.isValid) {
3912
- if (this.debug) {
3913
- console.log(`[DEBUG] JSON validation: Starting separate JsonFixingAgent session...`);
3914
- }
3915
-
3916
- const { JsonFixingAgent } = await import('./schemaUtils.js');
3917
- const jsonFixer = new JsonFixingAgent({
3918
- path: this.workspaceRoot || this.allowedFolders[0],
3919
- provider: this.clientApiProvider,
3920
- model: this.model,
3921
- debug: this.debug,
3922
- tracer: this.tracer
3923
- });
3924
-
3925
- let currentResult = finalResult;
3926
- let currentValidation = validation;
3927
-
3928
- while (!currentValidation.isValid && retryCount < maxRetries) {
3929
- if (this.debug) {
3930
- console.log(`[DEBUG] JSON validation: Validation failed (attempt ${retryCount + 1}/${maxRetries}):`, currentValidation.error);
3931
- console.log(`[DEBUG] JSON validation: Invalid response sample: ${currentResult.substring(0, 300)}${currentResult.length > 300 ? '...' : ''}`);
3932
- }
3933
-
3934
- try {
3935
- // Use specialized JsonFixingAgent to fix the JSON in a separate session
3936
- currentResult = await jsonFixer.fixJson(
3937
- currentResult,
3938
- options.schema,
3939
- currentValidation,
3940
- retryCount + 1
3941
- );
3942
-
3943
- // Validate the corrected response
3944
- currentValidation = validateJsonResponse(currentResult, { debug: this.debug, schema: options.schema });
3945
- retryCount++;
3946
-
3947
- if (this.debug) {
3948
- if (!currentValidation.isValid && retryCount < maxRetries) {
3949
- console.log(`[DEBUG] JSON validation: Still invalid after correction ${retryCount}, retrying...`);
3950
- console.log(`[DEBUG] JSON validation: Corrected response sample: ${currentResult.substring(0, 300)}${currentResult.length > 300 ? '...' : ''}`);
3951
- } else if (currentValidation.isValid) {
3952
- console.log(`[DEBUG] JSON validation: Successfully corrected after ${retryCount} attempts with JsonFixingAgent`);
3953
- }
3954
- }
3955
- } catch (error) {
3956
- if (this.debug) {
3957
- console.error(`[DEBUG] JSON validation: JsonFixingAgent error on attempt ${retryCount + 1}:`, error.message);
3958
- }
3959
- // If JsonFixingAgent fails, break out of loop
3960
- break;
3961
- }
3962
- }
3963
-
3964
- // Update finalResult with the fixed version
3965
- finalResult = currentResult;
3966
- validation = currentValidation;
3967
-
3968
- if (!validation.isValid && this.debug) {
3969
- console.log(`[DEBUG] JSON validation: Still invalid after ${maxRetries} correction attempts with JsonFixingAgent:`, validation.error);
3970
- console.log(`[DEBUG] JSON validation: Final invalid response: ${finalResult.substring(0, 500)}${finalResult.length > 500 ? '...' : ''}`);
3971
- } else if (validation.isValid && this.debug) {
3972
- console.log(`[DEBUG] JSON validation: Final validation successful`);
3973
- }
3974
- }
3975
-
3976
- // Record JSON validation completion in telemetry
3977
- if (this.tracer) {
3978
- this.tracer.recordJsonValidationEvent('completed', {
3979
- 'json_validation.success': validation.isValid,
3980
- 'json_validation.retry_count': retryCount,
3981
- 'json_validation.max_retries': maxRetries,
3982
- 'json_validation.final_response_length': finalResult.length,
3983
- 'json_validation.error': validation.isValid ? null : validation.error
3984
- });
3985
- }
3986
- }
3987
- } catch (error) {
3988
- console.error('[ERROR] Schema formatting failed:', error);
3989
- // Return the original result if schema formatting fails
3990
- }
3991
- } else if (reachedMaxIterations && options.schema && this.debug) {
3992
- console.log('[DEBUG] Skipping schema formatting due to max iterations reached without completion');
3993
- } else if (completionAttempted && options.schema && !options._schemaFormatted && !options._skipValidation) {
3994
- // For attempt_completion results with schema, validate mermaid diagrams BEFORE cleaning schema
3995
- // This ensures mermaid validation sees the full response before JSON extraction strips content
3996
- // Skip this validation if we're in a recursive correction call (_skipValidation flag)
3852
+ // Schema handling - validate and fix response according to provided schema
3853
+ // Skip if already formatted or in a recursive correction call
3854
+ if (options.schema && !options._schemaFormatted && !options._skipValidation) {
3997
3855
  try {
3998
- // Validate and fix Mermaid diagrams if present (BEFORE schema cleaning)
3856
+ // Step 1: Validate and fix Mermaid diagrams BEFORE cleaning schema
3999
3857
  if (!this.disableMermaidValidation) {
4000
3858
  if (this.debug) {
4001
- console.log(`[DEBUG] Mermaid validation: Validating attempt_completion result BEFORE schema cleaning...`);
3859
+ console.log(`[DEBUG] Mermaid validation: Validating result BEFORE schema cleaning...`);
4002
3860
  }
4003
3861
 
4004
3862
  const mermaidValidation = await validateAndFixMermaidResponse(finalResult, {
@@ -4012,70 +3870,66 @@ Convert your previous response content into actual JSON data that follows this s
4012
3870
  if (mermaidValidation.wasFixed) {
4013
3871
  finalResult = mermaidValidation.fixedResponse;
4014
3872
  if (this.debug) {
4015
- console.log(`[DEBUG] Mermaid validation: attempt_completion diagrams fixed`);
3873
+ console.log(`[DEBUG] Mermaid validation: Diagrams fixed`);
4016
3874
  if (mermaidValidation.performanceMetrics) {
4017
3875
  console.log(`[DEBUG] Mermaid validation: Fixed in ${mermaidValidation.performanceMetrics.totalTimeMs}ms`);
4018
3876
  }
4019
3877
  }
4020
3878
  } else if (this.debug) {
4021
- console.log(`[DEBUG] Mermaid validation: attempt_completion result validation completed (no fixes needed)`);
3879
+ console.log(`[DEBUG] Mermaid validation: Completed (no fixes needed)`);
4022
3880
  }
4023
3881
  } else if (this.debug) {
4024
- console.log(`[DEBUG] Mermaid validation: Skipped for attempt_completion result due to disableMermaidValidation option`);
3882
+ console.log(`[DEBUG] Mermaid validation: Skipped due to disableMermaidValidation option`);
4025
3883
  }
4026
3884
 
4027
- // Now clean the schema response (may extract JSON and discard other content)
3885
+ // Step 2: Clean the schema response (remove code blocks, extract JSON)
4028
3886
  finalResult = cleanSchemaResponse(finalResult);
4029
-
4030
- // Validate and potentially correct JSON for attempt_completion results
3887
+
3888
+ // Step 3: Validate and potentially correct JSON responses
4031
3889
  if (isJsonSchema(options.schema)) {
4032
3890
  if (this.debug) {
4033
- console.log(`[DEBUG] JSON validation: Starting validation process for attempt_completion result`);
3891
+ console.log(`[DEBUG] JSON validation: Starting validation process`);
4034
3892
  console.log(`[DEBUG] JSON validation: Response length: ${finalResult.length} chars`);
4035
3893
  }
4036
-
3894
+
4037
3895
  // Record JSON validation start in telemetry
4038
3896
  if (this.tracer) {
4039
- this.tracer.recordJsonValidationEvent('attempt_completion_started', {
3897
+ this.tracer.recordJsonValidationEvent('started', {
4040
3898
  'json_validation.response_length': finalResult.length,
4041
- 'json_validation.schema_type': 'JSON',
4042
- 'json_validation.context': 'attempt_completion'
3899
+ 'json_validation.schema_type': 'JSON'
4043
3900
  });
4044
3901
  }
4045
-
4046
- let validation = validateJsonResponse(finalResult, { debug: this.debug });
3902
+
3903
+ let validation = validateJsonResponse(finalResult, { debug: this.debug, schema: options.schema });
4047
3904
  let retryCount = 0;
4048
3905
  const maxRetries = 3;
4049
-
4050
- // First check if the response is valid JSON but is actually a schema definition
3906
+
3907
+ // Check if the response is valid JSON but is actually a schema definition
4051
3908
  if (validation.isValid && isJsonSchemaDefinition(finalResult, { debug: this.debug })) {
4052
3909
  if (this.debug) {
4053
- console.log(`[DEBUG] JSON validation: attempt_completion response is a JSON schema definition instead of data, correcting...`);
3910
+ console.log(`[DEBUG] JSON validation: Response is a JSON schema definition instead of data, correcting...`);
4054
3911
  }
4055
-
4056
- // Use specialized correction prompt for schema definition confusion
3912
+
4057
3913
  const schemaDefinitionPrompt = createSchemaDefinitionCorrectionPrompt(
4058
3914
  finalResult,
4059
3915
  options.schema,
4060
3916
  0
4061
3917
  );
4062
-
4063
- // Strip schema from correction options to prevent inflated iteration budget (issue #447)
4064
- const { schema: _unusedSchema1, ...schemaDefCorrectionOptions } = options;
3918
+
4065
3919
  finalResult = await this.answer(schemaDefinitionPrompt, [], {
4066
- ...schemaDefCorrectionOptions,
3920
+ ...options,
4067
3921
  _schemaFormatted: true,
4068
- _skipValidation: true, // Skip validation in recursive correction calls to prevent loops
4069
- _completionPromptProcessed: true, // Prevent cascading completion prompts in retry calls
4070
- _maxIterationsOverride: 3 // Correction should complete in 1-2 iterations (issue #447)
3922
+ _skipValidation: true,
3923
+ _disableTools: true,
3924
+ _completionPromptProcessed: true,
3925
+ _maxIterationsOverride: 3
4071
3926
  });
4072
3927
  finalResult = cleanSchemaResponse(finalResult);
4073
- validation = validateJsonResponse(finalResult);
4074
- retryCount = 1; // Start at 1 since we already did one correction
3928
+ validation = validateJsonResponse(finalResult, { debug: this.debug, schema: options.schema });
3929
+ retryCount = 1;
4075
3930
  }
4076
-
4077
- // Before entering correction loop, try auto-wrapping for simple schemas
4078
- // This avoids re-invoking AI for schemas like {text: string} where we can just wrap programmatically
3931
+
3932
+ // Try auto-wrapping for simple schemas before entering correction loop
4079
3933
  if (!validation.isValid) {
4080
3934
  const autoWrapped = tryAutoWrapForSimpleSchema(finalResult, options.schema, { debug: this.debug });
4081
3935
  if (autoWrapped) {
@@ -4083,23 +3937,19 @@ Convert your previous response content into actual JSON data that follows this s
4083
3937
  console.log(`[DEBUG] JSON validation: Auto-wrapped plain text for simple schema`);
4084
3938
  }
4085
3939
  finalResult = autoWrapped;
4086
- validation = validateJsonResponse(finalResult, { debug: this.debug });
3940
+ validation = validateJsonResponse(finalResult, { debug: this.debug, schema: options.schema });
4087
3941
  }
4088
3942
  }
4089
3943
 
3944
+ // Correction loop
4090
3945
  while (!validation.isValid && retryCount < maxRetries) {
4091
3946
  if (this.debug) {
4092
- console.log(`[DEBUG] JSON validation: attempt_completion validation failed (attempt ${retryCount + 1}/${maxRetries}):`, validation.error);
4093
- console.log(`[DEBUG] JSON validation: Invalid response sample: ${finalResult.substring(0, 300)}${finalResult.length > 300 ? '...' : ''}`);
3947
+ console.log(`[DEBUG] JSON validation: Validation failed (attempt ${retryCount + 1}/${maxRetries}):`, validation.error);
4094
3948
  }
4095
-
4096
- // Check if the invalid response is actually a schema definition
3949
+
4097
3950
  let correctionPrompt;
4098
3951
  try {
4099
3952
  if (isJsonSchemaDefinition(finalResult, { debug: this.debug })) {
4100
- if (this.debug) {
4101
- console.log(`[DEBUG] JSON validation: attempt_completion response is still a schema definition, using specialized correction`);
4102
- }
4103
3953
  correctionPrompt = createSchemaDefinitionCorrectionPrompt(
4104
3954
  finalResult,
4105
3955
  options.schema,
@@ -4107,71 +3957,68 @@ Convert your previous response content into actual JSON data that follows this s
4107
3957
  );
4108
3958
  } else {
4109
3959
  correctionPrompt = createJsonCorrectionPrompt(
4110
- finalResult,
4111
- options.schema,
3960
+ finalResult,
3961
+ options.schema,
4112
3962
  validation.error,
4113
3963
  retryCount
4114
3964
  );
4115
3965
  }
4116
3966
  } catch (error) {
4117
- // If we can't parse to check if it's a schema definition, use regular correction
4118
3967
  correctionPrompt = createJsonCorrectionPrompt(
4119
- finalResult,
4120
- options.schema,
3968
+ finalResult,
3969
+ options.schema,
4121
3970
  validation.error,
4122
3971
  retryCount
4123
3972
  );
4124
3973
  }
4125
-
4126
- // Strip schema from correction options to prevent inflated iteration budget (issue #447)
4127
- const { schema: _unusedSchema2, ...correctionOptions } = options;
3974
+
4128
3975
  finalResult = await this.answer(correctionPrompt, [], {
4129
- ...correctionOptions,
3976
+ ...options,
4130
3977
  _schemaFormatted: true,
4131
- _skipValidation: true, // Skip validation in recursive correction calls to prevent loops
4132
- _disableTools: true, // Only allow attempt_completion - prevent AI from using search/query tools
4133
- _completionPromptProcessed: true, // Prevent cascading completion prompts in retry calls
4134
- _maxIterationsOverride: 3 // Correction should complete in 1-2 iterations (issue #447)
3978
+ _skipValidation: true,
3979
+ _disableTools: true,
3980
+ _completionPromptProcessed: true,
3981
+ _maxIterationsOverride: 3
4135
3982
  });
4136
3983
  finalResult = cleanSchemaResponse(finalResult);
4137
-
4138
- // Validate the corrected response
4139
- validation = validateJsonResponse(finalResult, { debug: this.debug });
3984
+
3985
+ validation = validateJsonResponse(finalResult, { debug: this.debug, schema: options.schema });
4140
3986
  retryCount++;
4141
-
3987
+
4142
3988
  if (this.debug) {
4143
3989
  if (validation.isValid) {
4144
- console.log(`[DEBUG] JSON validation: attempt_completion correction successful on attempt ${retryCount}`);
3990
+ console.log(`[DEBUG] JSON validation: Correction successful on attempt ${retryCount}`);
4145
3991
  } else {
4146
- console.log(`[DEBUG] JSON validation: attempt_completion correction failed on attempt ${retryCount}: ${validation.error}`);
3992
+ console.log(`[DEBUG] JSON validation: Correction failed on attempt ${retryCount}: ${validation.error}`);
4147
3993
  }
4148
3994
  }
4149
3995
  }
4150
-
4151
- // Record final validation result
3996
+
3997
+ // Record JSON validation completion in telemetry
4152
3998
  if (this.tracer) {
4153
- this.tracer.recordJsonValidationEvent('attempt_completion_completed', {
3999
+ this.tracer.recordJsonValidationEvent('completed', {
4154
4000
  'json_validation.success': validation.isValid,
4155
4001
  'json_validation.retry_count': retryCount,
4156
- 'json_validation.final_response_length': finalResult.length
4002
+ 'json_validation.max_retries': maxRetries,
4003
+ 'json_validation.final_response_length': finalResult.length,
4004
+ 'json_validation.error': validation.isValid ? null : validation.error
4157
4005
  });
4158
4006
  }
4159
-
4007
+
4160
4008
  if (!validation.isValid && this.debug) {
4161
- console.log(`[DEBUG] JSON validation: attempt_completion result validation failed after ${maxRetries} attempts: ${validation.error}`);
4162
- console.log(`[DEBUG] JSON validation: Final attempt_completion response: ${finalResult.substring(0, 500)}${finalResult.length > 500 ? '...' : ''}`);
4009
+ console.log(`[DEBUG] JSON validation: Failed after ${maxRetries} attempts: ${validation.error}`);
4163
4010
  } else if (validation.isValid && this.debug) {
4164
- console.log(`[DEBUG] JSON validation: attempt_completion result validation successful`);
4011
+ console.log(`[DEBUG] JSON validation: Final validation successful`);
4165
4012
  }
4166
4013
  }
4167
4014
  } catch (error) {
4168
4015
  if (this.debug) {
4169
- console.log(`[DEBUG] attempt_completion result cleanup failed: ${error.message}`);
4016
+ console.log(`[DEBUG] Schema validation/cleanup failed: ${error.message}`);
4170
4017
  }
4171
4018
  }
4172
4019
  }
4173
4020
 
4174
- // Final mermaid validation for all responses (regardless of schema or attempt_completion)
4021
+ // Final mermaid validation for all responses (regardless of schema)
4175
4022
  if (!this.disableMermaidValidation && !options._schemaFormatted) {
4176
4023
  try {
4177
4024
  if (this.debug) {