@probelabs/probe 0.6.0-rc284 → 0.6.0-rc286

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. package/bin/binaries/probe-v0.6.0-rc286-aarch64-apple-darwin.tar.gz +0 -0
  2. package/bin/binaries/{probe-v0.6.0-rc284-aarch64-unknown-linux-musl.tar.gz → probe-v0.6.0-rc286-aarch64-unknown-linux-musl.tar.gz} +0 -0
  3. package/bin/binaries/probe-v0.6.0-rc286-x86_64-apple-darwin.tar.gz +0 -0
  4. package/bin/binaries/probe-v0.6.0-rc286-x86_64-pc-windows-msvc.zip +0 -0
  5. package/bin/binaries/probe-v0.6.0-rc286-x86_64-unknown-linux-musl.tar.gz +0 -0
  6. package/build/agent/ProbeAgent.d.ts +1 -1
  7. package/build/agent/ProbeAgent.js +353 -489
  8. package/build/agent/contextCompactor.js +17 -10
  9. package/build/agent/index.js +325 -719
  10. package/build/agent/schemaUtils.js +10 -11
  11. package/build/agent/shared/prompts.js +2 -2
  12. package/build/agent/tasks/taskTool.js +3 -3
  13. package/build/agent/tools.js +0 -2
  14. package/build/downloader.js +5 -1
  15. package/build/index.js +0 -2
  16. package/build/tools/analyzeAll.js +4 -4
  17. package/build/tools/common.js +55 -55
  18. package/build/tools/index.js +0 -1
  19. package/build/tools/vercel.js +10 -8
  20. package/cjs/agent/ProbeAgent.cjs +316 -775
  21. package/cjs/index.cjs +317 -831
  22. package/package.json +1 -1
  23. package/src/agent/ProbeAgent.d.ts +1 -1
  24. package/src/agent/ProbeAgent.js +353 -489
  25. package/src/agent/contextCompactor.js +17 -10
  26. package/src/agent/index.js +8 -2
  27. package/src/agent/schemaUtils.js +10 -11
  28. package/src/agent/shared/prompts.js +2 -2
  29. package/src/agent/tasks/taskTool.js +3 -3
  30. package/src/agent/tools.js +0 -2
  31. package/src/downloader.js +5 -1
  32. package/src/index.js +0 -2
  33. package/src/tools/analyzeAll.js +4 -4
  34. package/src/tools/common.js +55 -55
  35. package/src/tools/index.js +0 -1
  36. package/src/tools/vercel.js +10 -8
  37. package/bin/binaries/probe-v0.6.0-rc284-aarch64-apple-darwin.tar.gz +0 -0
  38. package/bin/binaries/probe-v0.6.0-rc284-x86_64-apple-darwin.tar.gz +0 -0
  39. package/bin/binaries/probe-v0.6.0-rc284-x86_64-pc-windows-msvc.zip +0 -0
  40. package/bin/binaries/probe-v0.6.0-rc284-x86_64-unknown-linux-musl.tar.gz +0 -0
@@ -31,7 +31,7 @@ import { createAnthropic } from '@ai-sdk/anthropic';
31
31
  import { createOpenAI } from '@ai-sdk/openai';
32
32
  import { createGoogleGenerativeAI } from '@ai-sdk/google';
33
33
  import { createAmazonBedrock } from '@ai-sdk/amazon-bedrock';
34
- import { streamText, tool, stepCountIs, jsonSchema } from 'ai';
34
+ import { streamText, tool, stepCountIs, jsonSchema, Output } from 'ai';
35
35
  import { randomUUID } from 'crypto';
36
36
  import { EventEmitter } from 'events';
37
37
  import { existsSync } from 'fs';
@@ -43,7 +43,6 @@ import { HookManager, HOOK_TYPES } from './hooks/HookManager.js';
43
43
  import { SUPPORTED_IMAGE_EXTENSIONS, IMAGE_MIME_TYPES, isFormatSupportedByProvider } from './imageConfig.js';
44
44
  import {
45
45
  createTools,
46
- attemptCompletionSchema,
47
46
  searchSchema,
48
47
  querySchema,
49
48
  extractSchema,
@@ -61,7 +60,7 @@ import {
61
60
  listSkillsSchema,
62
61
  useSkillSchema
63
62
  } from './tools.js';
64
- import { createMessagePreview } from '../tools/common.js';
63
+ import { createMessagePreview, detectStuckResponse } from '../tools/common.js';
65
64
  import { taskSchema } from './tasks/taskTool.js';
66
65
  import { FileTracker } from '../tools/fileTracker.js';
67
66
  import {
@@ -94,7 +93,7 @@ import { formatAvailableSkillsXml as formatAvailableSkills } from './skills/form
94
93
  import { createSkillToolInstances } from './skills/tools.js';
95
94
  import { RetryManager, createRetryManagerFromEnv } from './RetryManager.js';
96
95
  import { FallbackManager, createFallbackManagerFromEnv, buildFallbackProvidersFromEnv } from './FallbackManager.js';
97
- import { handleContextLimitError } from './contextCompactor.js';
96
+ import { handleContextLimitError, compactMessages, calculateCompactionStats } from './contextCompactor.js';
98
97
  import { formatErrorForAI, ParameterError } from '../utils/error-types.js';
99
98
  import { getCommonPrefix, toRelativePath, safeRealpath } from '../utils/path-validation.js';
100
99
  import { truncateIfNeeded, getMaxOutputTokens } from './outputTruncator.js';
@@ -140,8 +139,8 @@ export function debugTruncate(s, limit = 200) {
140
139
  export function debugLogToolResults(toolResults) {
141
140
  if (!toolResults || toolResults.length === 0) return;
142
141
  for (const tr of toolResults) {
143
- const argsStr = JSON.stringify(tr.args || {});
144
- const resultStr = typeof tr.result === 'string' ? tr.result : JSON.stringify(tr.result || '');
142
+ const argsStr = tr.args != null ? JSON.stringify(tr.args) : '<no args>';
143
+ const resultStr = tr.result != null ? (typeof tr.result === 'string' ? tr.result : JSON.stringify(tr.result)) : '<no result>';
145
144
  console.log(`[DEBUG] tool: ${tr.toolName} | args: ${debugTruncate(argsStr)} | result: ${debugTruncate(resultStr)}`);
146
145
  }
147
146
  }
@@ -196,7 +195,7 @@ export class ProbeAgent {
196
195
  * @param {Array<Object>} [options.fallback.providers] - List of provider configurations for custom fallback
197
196
  * @param {boolean} [options.fallback.stopOnSuccess=true] - Stop on first success
198
197
  * @param {number} [options.fallback.maxTotalAttempts=10] - Maximum total attempts across all providers
199
- * @param {string} [options.completionPrompt] - Custom prompt to run after attempt_completion for validation/review (runs before mermaid/JSON validation)
198
+ * @param {string} [options.completionPrompt] - Custom prompt to run after completion for validation/review (runs before mermaid/JSON validation)
200
199
  * @param {number} [options.maxOutputTokens] - Maximum tokens for tool output before truncation (default: 20000, can also be set via PROBE_MAX_OUTPUT_TOKENS env var)
201
200
  * @param {number} [options.requestTimeout] - Timeout in ms for AI requests (default: 120000 or REQUEST_TIMEOUT env var). Used to abort hung requests.
202
201
  * @param {number} [options.maxOperationTimeout] - Maximum timeout in ms for the entire operation including all retries and fallbacks (default: 300000 or MAX_OPERATION_TIMEOUT env var). This is the absolute maximum time for streamTextWithRetryAndFallback.
@@ -1682,12 +1681,11 @@ export class ProbeAgent {
1682
1681
  * - Delegate tool param injection
1683
1682
  *
1684
1683
  * @param {Object} options - Options from the answer() call
1685
- * @param {Function} onComplete - Callback when attempt_completion is called (receives result string)
1686
1684
  * @param {Object} context - Execution context { maxIterations, currentMessages }
1687
1685
  * @returns {Object} Tools object for streamText()
1688
1686
  * @private
1689
1687
  */
1690
- _buildNativeTools(options, onComplete, context = {}) {
1688
+ _buildNativeTools(options, context = {}) {
1691
1689
  const { maxIterations = 30 } = context;
1692
1690
  const nativeTools = {};
1693
1691
  const isToolAllowed = (toolName) => this.allowedTools.isEnabled(toolName);
@@ -1874,18 +1872,8 @@ export class ProbeAgent {
1874
1872
  });
1875
1873
  };
1876
1874
 
1877
- // Only include attempt_completion when _disableTools is set
1875
+ // When _disableTools is set, provide no tools — the model responds with text directly
1878
1876
  if (options._disableTools) {
1879
- nativeTools.attempt_completion = tool({
1880
- description: 'Signal task completion and provide the final result to the user',
1881
- inputSchema: z.object({
1882
- result: z.string().describe('The final result to present to the user')
1883
- }),
1884
- execute: async ({ result }) => {
1885
- onComplete(result);
1886
- return result;
1887
- }
1888
- });
1889
1877
  return nativeTools;
1890
1878
  }
1891
1879
 
@@ -1902,33 +1890,6 @@ export class ProbeAgent {
1902
1890
  }
1903
1891
  }
1904
1892
 
1905
- // Always add attempt_completion
1906
- nativeTools.attempt_completion = tool({
1907
- description: 'Signal task completion and provide the final result to the user',
1908
- inputSchema: z.object({
1909
- result: z.string().describe('The final result to present to the user')
1910
- }),
1911
- execute: async ({ result }) => {
1912
- // Task completion blocking
1913
- if (this.enableTasks && this.taskManager && this.taskManager.hasIncompleteTasks()) {
1914
- const incompleteTasks = this.taskManager.getIncompleteTasks();
1915
- const highIterationCount = (context.currentIteration || 0) > maxIterations * 0.7;
1916
-
1917
- if (!highIterationCount) {
1918
- const taskSummary = this.taskManager.getTaskSummary();
1919
- const blockedMessage = createTaskCompletionBlockedMessage(taskSummary);
1920
- if (this.debug) {
1921
- console.log('[DEBUG] Task checkpoint: Blocking completion due to incomplete tasks');
1922
- }
1923
- return blockedMessage;
1924
- }
1925
- }
1926
-
1927
- onComplete(result);
1928
- return result;
1929
- }
1930
- });
1931
-
1932
1893
  // Add MCP tools if available
1933
1894
  if (this.mcpBridge && !options._disableTools) {
1934
1895
  const mcpTools = this.mcpBridge.getVercelTools(this._filterMcpTools(this.mcpBridge.getToolNames()));
@@ -3017,7 +2978,7 @@ Follow these instructions carefully:
3017
2978
  2. Use the available tools step-by-step to fulfill the request.
3018
2979
  3. You should always prefer the search tool for code-related questions.${this.searchDelegate ? ' Ask natural language questions — the search subagent handles keyword formulation and returns extracted code blocks. Use extract only to expand context or read full files.' : ' Search handles stemming and case variations automatically — do NOT try keyword variations manually. Read full files only if really necessary.'}
3019
2980
  4. Ensure to get really deep and understand the full picture before answering.
3020
- 5. Once the task is fully completed, use the attempt_completion tool to provide the final result.
2981
+ 5. Once the task is fully completed, provide your final answer directly as text.
3021
2982
  6. ${this.searchDelegate ? 'Ask clear, specific questions when searching. Each search should target a distinct concept or question.' : 'Prefer concise and focused search queries. Use specific keywords and phrases to narrow down results.'}
3022
2983
  7. NEVER use bash for code exploration (no grep, cat, find, head, tail, awk, sed) — always use search and extract tools instead. Bash is only for system operations like building, running tests, or git commands.${this.allowEdit ? `
3023
2984
  7. When modifying files, choose the appropriate tool:
@@ -3224,7 +3185,8 @@ Follow these instructions carefully:
3224
3185
  let userMessage = { role: 'user', content: message.trim() };
3225
3186
 
3226
3187
  // If schema is provided, prepend JSON format requirement to user message
3227
- if (options.schema && !options._schemaFormatted) {
3188
+ // Skip when _disableTools is set — native Output.object() handles schema constraint
3189
+ if (options.schema && !options._schemaFormatted && !options._disableTools) {
3228
3190
  const schemaInstructions = generateSchemaInstructions(options.schema, { debug: this.debug });
3229
3191
  userMessage.content = message.trim() + schemaInstructions;
3230
3192
  }
@@ -3265,8 +3227,26 @@ Follow these instructions carefully:
3265
3227
  ];
3266
3228
  }
3267
3229
 
3230
+ // Proactively compact for multi-turn conversations.
3231
+ // On turn 2+, previous turns contain full tool call/result history which can
3232
+ // be 50K+ tokens. This drowns out the new user message and causes the model to
3233
+ // focus on prior context rather than the new question.
3234
+ // compactMessages strips intermediate monologue from completed segments,
3235
+ // keeping user messages + final answers from prior turns.
3236
+ // Must run AFTER adding the new user message so the compactor sees 2+ segments
3237
+ // (completed prior turns + the new incomplete turn), preserving the latest segment.
3238
+ if (this.history.length > 0) {
3239
+ const compacted = compactMessages(currentMessages, { keepLastSegment: true, minSegmentsToKeep: 1 });
3240
+ if (compacted.length < currentMessages.length) {
3241
+ const stats = calculateCompactionStats(currentMessages, compacted);
3242
+ if (this.debug) {
3243
+ console.log(`[DEBUG] Proactive history compaction: ${currentMessages.length} → ${compacted.length} messages (${stats.reductionPercent}% reduction, ~${stats.tokensSaved} tokens saved)`);
3244
+ }
3245
+ currentMessages = compacted;
3246
+ }
3247
+ }
3248
+
3268
3249
  let currentIteration = 0;
3269
- let completionAttempted = false;
3270
3250
  let finalResult = 'I was unable to complete your request due to reaching the maximum number of tool iterations.';
3271
3251
 
3272
3252
  // Adjust max iterations if schema is provided
@@ -3432,13 +3412,9 @@ Follow these instructions carefully:
3432
3412
  // Iteration counter for telemetry
3433
3413
 
3434
3414
  // Native tool calling via Vercel AI SDK streamText + maxSteps
3435
- let completionResult = null;
3436
3415
  const toolContext = { maxIterations, currentIteration: 0, currentMessages };
3437
3416
 
3438
- const tools = this._buildNativeTools(options, (result) => {
3439
- completionResult = result;
3440
- completionAttempted = true;
3441
- }, toolContext);
3417
+ const tools = this._buildNativeTools(options, toolContext);
3442
3418
 
3443
3419
  if (this.debug) {
3444
3420
  const toolNames = Object.keys(tools);
@@ -3455,6 +3431,10 @@ Follow these instructions carefully:
3455
3431
  }
3456
3432
  }
3457
3433
 
3434
+ // Track whether completionPrompt has been injected into the loop
3435
+ let completionPromptInjected = false;
3436
+ let preCompletionResult = null; // Stores the result before completionPrompt for fallback
3437
+
3458
3438
  // Context compaction retry loop
3459
3439
  let compactionAttempted = false;
3460
3440
  while (true) {
@@ -3465,21 +3445,133 @@ Follow these instructions carefully:
3465
3445
  model: this.provider ? this.provider(this.model) : this.model,
3466
3446
  messages: messagesForAI,
3467
3447
  tools,
3468
- stopWhen: stepCountIs(maxIterations),
3448
+ stopWhen: ({ steps }) => {
3449
+ // Hard limit
3450
+ if (steps.length >= maxIterations) return true;
3451
+
3452
+ const lastStep = steps[steps.length - 1];
3453
+ const modelWantsToStop = lastStep?.finishReason === 'stop'
3454
+ && (!lastStep?.toolCalls || lastStep.toolCalls.length === 0);
3455
+
3456
+ if (modelWantsToStop) {
3457
+ // Task blocking: force continuation when tasks are incomplete
3458
+ if (this.enableTasks && this.taskManager?.hasIncompleteTasks()) {
3459
+ const highIterationCount = steps.length > maxIterations * 0.7;
3460
+ if (!highIterationCount) return false; // Force continuation
3461
+ }
3462
+
3463
+ // Completion prompt: force one more round for review
3464
+ if (this.completionPrompt && !options._completionPromptProcessed && !completionPromptInjected) {
3465
+ // Save the pre-completion result for fallback
3466
+ preCompletionResult = lastStep.text || null;
3467
+ return false; // Force continuation — prepareStep will inject the prompt
3468
+ }
3469
+ }
3470
+
3471
+ // Circuit breaker: consecutive no-tool steps
3472
+ let trailingNoTool = 0;
3473
+ for (let i = steps.length - 1; i >= 0; i--) {
3474
+ if (!steps[i].toolCalls?.length) trailingNoTool++;
3475
+ else break;
3476
+ }
3477
+ if (trailingNoTool >= 5) return true;
3478
+
3479
+ // Circuit breaker: identical/stuck responses
3480
+ if (trailingNoTool >= 3) {
3481
+ const recentTexts = steps.slice(-3).map(s => s.text);
3482
+ if (recentTexts.every(t => t && t === recentTexts[0])) return true;
3483
+ if (recentTexts.every(t => detectStuckResponse(t))) return true;
3484
+ }
3485
+
3486
+ return false;
3487
+ },
3488
+ prepareStep: ({ steps, stepNumber }) => {
3489
+ // Last-iteration warning
3490
+ if (stepNumber === maxIterations - 1) {
3491
+ return {
3492
+ toolChoice: 'none',
3493
+ };
3494
+ }
3495
+
3496
+ const lastStep = steps[steps.length - 1];
3497
+ const modelJustStopped = lastStep?.finishReason === 'stop'
3498
+ && (!lastStep?.toolCalls || lastStep.toolCalls.length === 0);
3499
+
3500
+ if (modelJustStopped) {
3501
+ // Task blocking: inject reminder when tasks are incomplete
3502
+ if (this.enableTasks && this.taskManager?.hasIncompleteTasks()) {
3503
+ const taskSummary = this.taskManager.getTaskSummary();
3504
+ const blockedMessage = createTaskCompletionBlockedMessage(taskSummary);
3505
+ return {
3506
+ userMessage: blockedMessage
3507
+ };
3508
+ }
3509
+
3510
+ // Completion prompt: inject review message on first stop
3511
+ if (this.completionPrompt && !options._completionPromptProcessed && !completionPromptInjected) {
3512
+ completionPromptInjected = true;
3513
+ const resultToReview = lastStep.text || preCompletionResult || '';
3514
+
3515
+ if (this.debug) {
3516
+ console.log('[DEBUG] Injecting completion prompt into main loop via prepareStep...');
3517
+ }
3518
+
3519
+ if (this.tracer) {
3520
+ this.tracer.recordEvent('completion_prompt.started', {
3521
+ 'completion_prompt.original_result_length': resultToReview.length
3522
+ });
3523
+ }
3524
+
3525
+ const completionPromptMessage = `${this.completionPrompt}
3526
+
3527
+ Here is the result to review:
3528
+ <result>
3529
+ ${resultToReview}
3530
+ </result>
3531
+
3532
+ Double-check your response based on the criteria above. If everything looks good, respond with your previous answer exactly as-is. If something needs to be fixed or is missing, do it now, then respond with the COMPLETE updated answer (everything you did in total, not just the fix).`;
3533
+
3534
+ return {
3535
+ userMessage: completionPromptMessage
3536
+ };
3537
+ }
3538
+ }
3539
+
3540
+ return undefined;
3541
+ },
3469
3542
  maxTokens: maxResponseTokens,
3470
3543
  temperature: 0.3,
3471
- onStepFinish: ({ toolResults, text, finishReason, usage }) => {
3544
+ onStepFinish: (stepResult) => {
3545
+ const { toolResults, toolCalls, text, reasoningText, finishReason, usage } = stepResult;
3472
3546
  currentIteration++;
3473
3547
  toolContext.currentIteration = currentIteration;
3474
3548
 
3475
- // Record telemetry
3549
+ // Record telemetry — include model's reasoning and tool call details
3476
3550
  if (this.tracer) {
3477
- this.tracer.addEvent('iteration.step', {
3551
+ const stepEvent = {
3478
3552
  'iteration': currentIteration,
3479
3553
  'max_iterations': maxIterations,
3480
3554
  'finish_reason': finishReason,
3481
3555
  'has_tool_calls': !!(toolResults && toolResults.length > 0)
3482
- });
3556
+ };
3557
+ // Model's text output (its monologue explaining why it's calling tools)
3558
+ if (text) {
3559
+ stepEvent['ai.text'] = text.substring(0, 10000);
3560
+ stepEvent['ai.text.length'] = text.length;
3561
+ }
3562
+ // Model's internal reasoning/thinking tokens (if available)
3563
+ if (reasoningText) {
3564
+ stepEvent['ai.reasoning'] = reasoningText.substring(0, 10000);
3565
+ stepEvent['ai.reasoning.length'] = reasoningText.length;
3566
+ }
3567
+ // Tool call names and args for this step
3568
+ if (toolCalls && toolCalls.length > 0) {
3569
+ stepEvent['ai.tool_calls'] = toolCalls.map(tc => ({
3570
+ name: tc.toolName,
3571
+ args: JSON.stringify(tc.args || {}).substring(0, 2000)
3572
+ }));
3573
+ }
3574
+ this.tracer.addEvent('iteration.step', stepEvent);
3483
3575
  }
3484
3576
 
3485
3577
  // Record token usage
@@ -3494,11 +3586,38 @@ Follow these instructions carefully:
3494
3586
 
3495
3587
  if (this.debug) {
3496
3588
  console.log(`[DEBUG] Step ${currentIteration}/${maxIterations} finished (reason: ${finishReason}, tools: ${toolResults?.length || 0})`);
3589
+ if (text) {
3590
+ console.log(`[DEBUG] model text: ${debugTruncate(text)}`);
3591
+ }
3592
+ if (reasoningText) {
3593
+ console.log(`[DEBUG] reasoning: ${debugTruncate(reasoningText)}`);
3594
+ }
3497
3595
  debugLogToolResults(toolResults);
3498
3596
  }
3499
3597
  }
3500
3598
  };
3501
3599
 
3600
+ // Native JSON schema output — use model's built-in JSON schema constraint
3601
+ // when no tools are active (many providers like Gemini don't support
3602
+ // structured output + function calling simultaneously).
3603
+ // When tools ARE active, we rely on AJV post-validation + correction loop.
3604
+ const hasActiveTools = Object.keys(tools).length > 0;
3605
+ if (options.schema && !hasActiveTools) {
3606
+ try {
3607
+ const parsedSchema = typeof options.schema === 'string' ? JSON.parse(options.schema) : options.schema;
3608
+ if (isJsonSchema(options.schema)) {
3609
+ streamOptions.output = Output.object({ schema: jsonSchema(parsedSchema) });
3610
+ if (this.debug) {
3611
+ console.log(`[DEBUG] Native JSON schema output enabled (no active tools)`);
3612
+ }
3613
+ }
3614
+ } catch (e) {
3615
+ if (this.debug) {
3616
+ console.log(`[DEBUG] Failed to set native JSON schema output: ${e.message}`);
3617
+ }
3618
+ }
3619
+ }
3620
+
3502
3621
  // Add native thinking/reasoning providerOptions when thinkingEffort is set
3503
3622
  const providerOpts = this._buildThinkingProviderOptions(maxResponseTokens);
3504
3623
  if (providerOpts) {
@@ -3513,7 +3632,7 @@ Follow these instructions carefully:
3513
3632
 
3514
3633
  if (this.debug) {
3515
3634
  const steps = await result.steps;
3516
- console.log(`[DEBUG] streamText completed: ${steps?.length || 0} steps, finalText=${finalText?.length || 0} chars, completion=${!!completionResult}`);
3635
+ console.log(`[DEBUG] streamText completed: ${steps?.length || 0} steps, finalText=${finalText?.length || 0} chars`);
3517
3636
  }
3518
3637
 
3519
3638
  // Record final token usage
@@ -3545,21 +3664,26 @@ Follow these instructions carefully:
3545
3664
  aiResult = await executeAIRequest();
3546
3665
  }
3547
3666
 
3548
- // Use completion result if available, otherwise use final text
3549
- if (completionResult) {
3550
- finalResult = completionResult;
3551
-
3552
- // Stream the final result if callback is provided
3553
- if (options.onStream && finalResult) {
3554
- const chunkSize = 50;
3555
- for (let i = 0; i < finalResult.length; i += chunkSize) {
3556
- const chunk = finalResult.slice(i, Math.min(i + chunkSize, finalResult.length));
3557
- options.onStream(chunk);
3667
+ // Try native JSON schema output first Output.object() is set when no tools are active
3668
+ if (options.schema && streamOptions.output) {
3669
+ try {
3670
+ const outputObject = await aiResult.result.output;
3671
+ if (outputObject) {
3672
+ finalResult = JSON.stringify(outputObject);
3673
+ } else if (aiResult.finalText) {
3674
+ finalResult = aiResult.finalText;
3675
+ }
3676
+ } catch (e) {
3677
+ // NoObjectGeneratedError — fall back to text-based extraction
3678
+ if (this.debug) {
3679
+ console.log(`[DEBUG] Native JSON output failed, falling back to text: ${e.message}`);
3680
+ }
3681
+ if (aiResult.finalText) {
3682
+ finalResult = aiResult.finalText;
3558
3683
  }
3559
3684
  }
3560
3685
  } else if (aiResult.finalText) {
3561
3686
  finalResult = aiResult.finalText;
3562
- completionAttempted = true;
3563
3687
  }
3564
3688
 
3565
3689
  // Update currentMessages from the result for history storage
@@ -3572,6 +3696,94 @@ Follow these instructions carefully:
3572
3696
  }
3573
3697
  }
3574
3698
 
3699
+ // Post-streamText completionPrompt fallback:
3700
+ // The stopWhen/prepareStep mechanism only fires between tool-call steps.
3701
+ // If the model answered without tool calls (or its final step had none),
3702
+ // stopWhen never gets a chance to force continuation. In that case, run
3703
+ // a second streamText pass with the completion prompt injected.
3704
+ if (this.completionPrompt && !options._completionPromptProcessed && !completionPromptInjected && finalResult) {
3705
+ completionPromptInjected = true;
3706
+ preCompletionResult = finalResult;
3707
+
3708
+ if (this.debug) {
3709
+ console.log('[DEBUG] Injecting completion prompt as post-streamText follow-up pass...');
3710
+ }
3711
+
3712
+ if (this.tracer) {
3713
+ this.tracer.recordEvent('completion_prompt.started', {
3714
+ 'completion_prompt.original_result_length': finalResult.length
3715
+ });
3716
+ }
3717
+
3718
+ const completionPromptMessage = `${this.completionPrompt}
3719
+
3720
+ Here is the result to review:
3721
+ <result>
3722
+ ${finalResult}
3723
+ </result>
3724
+
3725
+ Double-check your response based on the criteria above. If everything looks good, respond with your previous answer exactly as-is. If something needs to be fixed or is missing, do it now, then respond with the COMPLETE updated answer (everything you did in total, not just the fix).`;
3726
+
3727
+ currentMessages.push({ role: 'user', content: completionPromptMessage });
3728
+
3729
+ const completionMaxIterations = 5;
3730
+ const completionStreamOptions = {
3731
+ model: this.provider ? this.provider(this.model) : this.model,
3732
+ messages: this.prepareMessagesWithImages(currentMessages),
3733
+ tools,
3734
+ stopWhen: stepCountIs(completionMaxIterations),
3735
+ maxTokens: maxResponseTokens,
3736
+ temperature: 0.3,
3737
+ onStepFinish: ({ toolResults, text, finishReason, usage }) => {
3738
+ if (usage) {
3739
+ this.tokenCounter.recordUsage(usage);
3740
+ }
3741
+ if (options.onStream && text) {
3742
+ options.onStream(text);
3743
+ }
3744
+ if (this.debug) {
3745
+ console.log(`[DEBUG] Completion prompt step finished (reason: ${finishReason}, tools: ${toolResults?.length || 0})`);
3746
+ }
3747
+ }
3748
+ };
3749
+
3750
+ const providerOpts = this._buildThinkingProviderOptions(maxResponseTokens);
3751
+ if (providerOpts) {
3752
+ completionStreamOptions.providerOptions = providerOpts;
3753
+ }
3754
+
3755
+ try {
3756
+ const cpResult = await this.streamTextWithRetryAndFallback(completionStreamOptions);
3757
+ const cpFinalText = await cpResult.text;
3758
+ const cpUsage = await cpResult.usage;
3759
+ if (cpUsage) {
3760
+ this.tokenCounter.recordUsage(cpUsage, cpResult.experimental_providerMetadata);
3761
+ }
3762
+
3763
+ // Append follow-up messages to conversation history
3764
+ const cpMessages = await cpResult.response?.messages;
3765
+ if (cpMessages) {
3766
+ for (const msg of cpMessages) {
3767
+ currentMessages.push(msg);
3768
+ }
3769
+ }
3770
+
3771
+ // Use updated result if non-empty, otherwise keep original
3772
+ if (cpFinalText && cpFinalText.trim().length > 0) {
3773
+ finalResult = cpFinalText;
3774
+ }
3775
+
3776
+ if (this.debug) {
3777
+ console.log(`[DEBUG] Completion prompt follow-up produced ${cpFinalText?.length || 0} chars (using ${cpFinalText && cpFinalText.trim().length > 0 ? 'updated' : 'original'} result)`);
3778
+ }
3779
+ } catch (cpError) {
3780
+ if (this.debug) {
3781
+ console.log(`[DEBUG] Completion prompt follow-up failed: ${cpError.message}, keeping original result`);
3782
+ }
3783
+ // Keep original result on failure
3784
+ }
3785
+ }
3786
+
3575
3787
  break; // Success
3576
3788
 
3577
3789
  } catch (error) {
@@ -3610,20 +3822,14 @@ Follow these instructions carefully:
3610
3822
  }
3611
3823
  }
3612
3824
 
3613
- // Handle AbortError from attempt_completion gracefully
3614
- if (completionResult) {
3615
- finalResult = completionResult;
3616
- break;
3617
- }
3618
-
3619
3825
  console.error(`Error during streamText:`, error);
3620
3826
  finalResult = `Error: Failed to get response from AI model. ${error.message}`;
3621
3827
  throw new Error(finalResult);
3622
3828
  }
3623
3829
  }
3624
3830
 
3625
- if (currentIteration >= maxIterations && !completionAttempted) {
3626
- console.warn(`[WARN] Max tool iterations (${maxIterations}) reached for session ${this.sessionId}. Returning current error state.`);
3831
+ if (currentIteration >= maxIterations) {
3832
+ console.warn(`[WARN] Max tool iterations (${maxIterations}) reached for session ${this.sessionId}.`);
3627
3833
  }
3628
3834
 
3629
3835
  // Store final history
@@ -3654,351 +3860,22 @@ Follow these instructions carefully:
3654
3860
  // Continue even if storage fails
3655
3861
  }
3656
3862
 
3657
- // Completion prompt handling - inject one more user message into the existing conversation
3658
- // This continues the SAME agentic session (same tools, same TaskManager, same history)
3659
- // rather than spawning a recursive this.answer() call which would reset state
3660
- if (completionAttempted && this.completionPrompt && !options._completionPromptProcessed) {
3661
- if (this.debug) {
3662
- console.log('[DEBUG] Running completion prompt as continuation of current session...');
3663
- }
3664
-
3665
- try {
3666
- const originalResult = finalResult;
3667
-
3668
- if (this.tracer) {
3669
- this.tracer.recordEvent('completion_prompt.started', {
3670
- 'completion_prompt.original_result_length': finalResult?.length || 0
3671
- });
3672
- }
3673
-
3674
- // Append completion prompt as a user message to the existing conversation
3675
- const completionPromptMessage = `${this.completionPrompt}
3676
-
3677
- Here is the result to review:
3678
- <result>
3679
- ${finalResult}
3680
- </result>
3681
-
3682
- Double-check your response based on the criteria above. If everything looks good, respond with your previous answer exactly as-is using attempt_completion. If something needs to be fixed or is missing, do it now, then respond with the COMPLETE updated answer (everything you did in total, not just the fix) using attempt_completion.`;
3683
-
3684
- currentMessages.push({ role: 'user', content: completionPromptMessage });
3685
-
3686
- // Reset completion tracking for the follow-up turn
3687
- completionResult = null;
3688
- completionAttempted = false;
3689
-
3690
- // Run one more streamText pass with the same tools and conversation context
3691
- // Give a small number of extra iterations for the follow-up
3692
- const completionMaxIterations = 5;
3693
- const completionStreamOptions = {
3694
- model: this.provider ? this.provider(this.model) : this.model,
3695
- messages: this.prepareMessagesWithImages(currentMessages),
3696
- tools,
3697
- stopWhen: stepCountIs(completionMaxIterations),
3698
- maxTokens: maxResponseTokens,
3699
- temperature: 0.3,
3700
- onStepFinish: ({ toolResults, text, finishReason, usage }) => {
3701
- if (usage) {
3702
- this.tokenCounter.recordUsage(usage);
3703
- }
3704
- if (options.onStream && text) {
3705
- options.onStream(text);
3706
- }
3707
- if (this.debug) {
3708
- console.log(`[DEBUG] Completion prompt step finished (reason: ${finishReason}, tools: ${toolResults?.length || 0})`);
3709
- debugLogToolResults(toolResults);
3710
- }
3711
- }
3712
- };
3713
-
3714
- const providerOpts = this._buildThinkingProviderOptions(maxResponseTokens);
3715
- if (providerOpts) {
3716
- completionStreamOptions.providerOptions = providerOpts;
3717
- }
3718
-
3719
- const cpResult = await this.streamTextWithRetryAndFallback(completionStreamOptions);
3720
- const cpFinalText = await cpResult.text;
3721
- const cpUsage = await cpResult.usage;
3722
- if (cpUsage) {
3723
- this.tokenCounter.recordUsage(cpUsage, cpResult.experimental_providerMetadata);
3724
- }
3725
-
3726
- // Append follow-up messages to conversation history
3727
- const cpMessages = await cpResult.response?.messages;
3728
- if (cpMessages) {
3729
- for (const msg of cpMessages) {
3730
- currentMessages.push(msg);
3731
- }
3732
- }
3733
-
3734
- // Use new completion result if the agent called attempt_completion again,
3735
- // otherwise keep the original result (the follow-up may have just done side-effects)
3736
- if (completionResult) {
3737
- finalResult = completionResult;
3738
- completionAttempted = true;
3739
- } else if (cpFinalText && cpFinalText.trim().length > 0) {
3740
- finalResult = cpFinalText;
3741
- completionAttempted = true;
3742
- } else {
3743
- // Follow-up produced nothing useful — keep the original
3744
- finalResult = originalResult;
3745
- completionAttempted = true;
3746
- if (this.debug) {
3747
- console.log('[DEBUG] Completion prompt returned empty result, keeping original.');
3748
- }
3749
- }
3750
-
3751
- if (this.debug) {
3752
- console.log(`[DEBUG] Completion prompt finished. Final result length: ${finalResult?.length || 0}`);
3753
- }
3754
-
3755
- if (this.tracer) {
3756
- this.tracer.recordEvent('completion_prompt.completed', {
3757
- 'completion_prompt.final_result_length': finalResult?.length || 0,
3758
- 'completion_prompt.used_original': finalResult === originalResult
3759
- });
3760
- }
3761
- } catch (error) {
3762
- console.error('[ERROR] Completion prompt failed:', error);
3763
- // Keep the original result if completion prompt fails
3764
- if (this.tracer) {
3765
- this.tracer.recordEvent('completion_prompt.error', {
3766
- 'completion_prompt.error': error.message
3767
- });
3768
- }
3769
- }
3863
+ // Log completion prompt telemetry if it was injected
3864
+ if (completionPromptInjected && this.tracer) {
3865
+ this.tracer.recordEvent('completion_prompt.completed', {
3866
+ 'completion_prompt.final_result_length': finalResult?.length || 0,
3867
+ 'completion_prompt.used_original': preCompletionResult && finalResult === preCompletionResult
3868
+ });
3770
3869
  }
3771
3870
 
3772
- // Schema handling - format response according to provided schema
3773
- // Skip schema processing if result came from attempt_completion tool
3774
- // Don't apply schema formatting if we failed due to max iterations
3775
- const reachedMaxIterations = currentIteration >= maxIterations && !completionAttempted;
3776
- if (options.schema && !options._schemaFormatted && !completionAttempted && !reachedMaxIterations) {
3777
- if (this.debug) {
3778
- console.log('[DEBUG] Schema provided, applying automatic formatting...');
3779
- }
3780
-
3871
+ // Schema handling - validate and fix response according to provided schema
3872
+ // Skip if already formatted or in a recursive correction call
3873
+ if (options.schema && !options._schemaFormatted && !options._skipValidation) {
3781
3874
  try {
3782
- // Step 1: Make a follow-up call to format according to schema
3783
- const schemaPrompt = `CRITICAL: You MUST respond with ONLY valid JSON DATA that conforms to this schema structure. DO NOT return the schema definition itself.
3784
-
3785
- Schema to follow (this is just the structure - provide ACTUAL DATA):
3786
- ${options.schema}
3787
-
3788
- REQUIREMENTS:
3789
- - Return ONLY the JSON object/array with REAL DATA that matches the schema structure
3790
- - DO NOT return the schema definition itself (no "$schema", "$id", "type", "properties", etc.)
3791
- - NO additional text, explanations, or markdown formatting
3792
- - NO code blocks or backticks
3793
- - The JSON must be parseable by JSON.parse()
3794
- - Fill in actual values that make sense based on your previous response content
3795
-
3796
- EXAMPLE:
3797
- If schema defines {type: "object", properties: {name: {type: "string"}, age: {type: "number"}}}
3798
- Return: {"name": "John Doe", "age": 25}
3799
- NOT: {"type": "object", "properties": {"name": {"type": "string"}}}
3800
-
3801
- Convert your previous response content into actual JSON data that follows this schema structure.`;
3802
-
3803
- // Call answer recursively with _schemaFormatted flag to prevent infinite loop
3804
- finalResult = await this.answer(schemaPrompt, [], {
3805
- ...options,
3806
- _schemaFormatted: true,
3807
- _completionPromptProcessed: true // Prevent cascading completion prompts in retry calls
3808
- });
3809
-
3810
- // Step 2: Validate and fix Mermaid diagrams if present (BEFORE cleaning schema)
3811
- // This ensures mermaid validation sees the full response before JSON extraction strips content
3875
+ // Step 1: Validate and fix Mermaid diagrams BEFORE cleaning schema
3812
3876
  if (!this.disableMermaidValidation) {
3813
- try {
3814
- if (this.debug) {
3815
- console.log(`[DEBUG] Mermaid validation: Starting enhanced mermaid validation...`);
3816
- }
3817
-
3818
- // Record mermaid validation start in telemetry
3819
- if (this.tracer) {
3820
- this.tracer.recordMermaidValidationEvent('schema_processing_started', {
3821
- 'mermaid_validation.context': 'schema_processing',
3822
- 'mermaid_validation.response_length': finalResult.length
3823
- });
3824
- }
3825
-
3826
- const mermaidValidation = await validateAndFixMermaidResponse(finalResult, {
3827
- debug: this.debug,
3828
- path: this.workspaceRoot || this.allowedFolders[0],
3829
- provider: this.clientApiProvider,
3830
- model: this.model,
3831
- tracer: this.tracer
3832
- });
3833
-
3834
- if (mermaidValidation.wasFixed) {
3835
- finalResult = mermaidValidation.fixedResponse;
3836
- if (this.debug) {
3837
- console.log(`[DEBUG] Mermaid validation: Diagrams successfully fixed`);
3838
-
3839
- if (mermaidValidation.performanceMetrics) {
3840
- const metrics = mermaidValidation.performanceMetrics;
3841
- console.log(`[DEBUG] Mermaid validation: Performance - total: ${metrics.totalTimeMs}ms, AI fixing: ${metrics.aiFixingTimeMs}ms`);
3842
- console.log(`[DEBUG] Mermaid validation: Results - ${metrics.diagramsFixed}/${metrics.diagramsProcessed} diagrams fixed`);
3843
- }
3844
-
3845
- if (mermaidValidation.fixingResults) {
3846
- mermaidValidation.fixingResults.forEach((fixResult, index) => {
3847
- if (fixResult.wasFixed) {
3848
- const method = fixResult.fixedWithHtmlDecoding ? 'HTML entity decoding' : 'AI correction';
3849
- const time = fixResult.aiFixingTimeMs ? ` in ${fixResult.aiFixingTimeMs}ms` : '';
3850
- console.log(`[DEBUG] Mermaid validation: Fixed diagram ${fixResult.diagramIndex + 1} with ${method}${time}`);
3851
- console.log(`[DEBUG] Mermaid validation: Original error: ${fixResult.originalError}`);
3852
- } else {
3853
- console.log(`[DEBUG] Mermaid validation: Failed to fix diagram ${fixResult.diagramIndex + 1}: ${fixResult.fixingError}`);
3854
- }
3855
- });
3856
- }
3857
- }
3858
- } else if (this.debug) {
3859
- console.log(`[DEBUG] Mermaid validation: No fixes needed or fixes unsuccessful`);
3860
- if (mermaidValidation.diagrams?.length > 0) {
3861
- console.log(`[DEBUG] Mermaid validation: Found ${mermaidValidation.diagrams.length} diagrams, all valid: ${mermaidValidation.isValid}`);
3862
- }
3863
- }
3864
- } catch (error) {
3865
- if (this.debug) {
3866
- console.log(`[DEBUG] Mermaid validation: Process failed with error: ${error.message}`);
3867
- console.log(`[DEBUG] Mermaid validation: Stack trace: ${error.stack}`);
3868
- }
3869
- }
3870
- } else if (this.debug) {
3871
- console.log(`[DEBUG] Mermaid validation: Skipped due to disableMermaidValidation option`);
3872
- }
3873
-
3874
- // Step 3: Clean the response (remove code blocks, extract JSON)
3875
- // This happens AFTER mermaid validation to preserve full content for validation
3876
- finalResult = cleanSchemaResponse(finalResult);
3877
-
3878
- // Step 4: Validate and potentially correct JSON responses
3879
- if (isJsonSchema(options.schema)) {
3880
3877
  if (this.debug) {
3881
- console.log(`[DEBUG] JSON validation: Starting validation process for schema response`);
3882
- console.log(`[DEBUG] JSON validation: Cleaned response length: ${finalResult.length} chars`);
3883
- }
3884
-
3885
- // Record JSON validation start in telemetry
3886
- if (this.tracer) {
3887
- this.tracer.recordJsonValidationEvent('started', {
3888
- 'json_validation.response_length': finalResult.length,
3889
- 'json_validation.schema_type': 'JSON'
3890
- });
3891
- }
3892
-
3893
- let validation = validateJsonResponse(finalResult, { debug: this.debug, schema: options.schema });
3894
- let retryCount = 0;
3895
- const maxRetries = 3;
3896
-
3897
- // First check if the response is valid JSON but is actually a schema definition
3898
- if (validation.isValid && isJsonSchemaDefinition(finalResult, { debug: this.debug })) {
3899
- if (this.debug) {
3900
- console.log(`[DEBUG] JSON validation: Response is a JSON schema definition instead of data, needs correction...`);
3901
- }
3902
- // Mark as invalid so it goes through the fixing process
3903
- validation = {
3904
- isValid: false,
3905
- error: 'Response is a JSON schema definition instead of actual data',
3906
- enhancedError: 'Response is a JSON schema definition instead of actual data. Please return data that conforms to the schema, not the schema itself.'
3907
- };
3908
- }
3909
-
3910
- // Use separate JsonFixingAgent for JSON corrections (isolates session like Mermaid fixing)
3911
- if (!validation.isValid) {
3912
- if (this.debug) {
3913
- console.log(`[DEBUG] JSON validation: Starting separate JsonFixingAgent session...`);
3914
- }
3915
-
3916
- const { JsonFixingAgent } = await import('./schemaUtils.js');
3917
- const jsonFixer = new JsonFixingAgent({
3918
- path: this.workspaceRoot || this.allowedFolders[0],
3919
- provider: this.clientApiProvider,
3920
- model: this.model,
3921
- debug: this.debug,
3922
- tracer: this.tracer
3923
- });
3924
-
3925
- let currentResult = finalResult;
3926
- let currentValidation = validation;
3927
-
3928
- while (!currentValidation.isValid && retryCount < maxRetries) {
3929
- if (this.debug) {
3930
- console.log(`[DEBUG] JSON validation: Validation failed (attempt ${retryCount + 1}/${maxRetries}):`, currentValidation.error);
3931
- console.log(`[DEBUG] JSON validation: Invalid response sample: ${currentResult.substring(0, 300)}${currentResult.length > 300 ? '...' : ''}`);
3932
- }
3933
-
3934
- try {
3935
- // Use specialized JsonFixingAgent to fix the JSON in a separate session
3936
- currentResult = await jsonFixer.fixJson(
3937
- currentResult,
3938
- options.schema,
3939
- currentValidation,
3940
- retryCount + 1
3941
- );
3942
-
3943
- // Validate the corrected response
3944
- currentValidation = validateJsonResponse(currentResult, { debug: this.debug, schema: options.schema });
3945
- retryCount++;
3946
-
3947
- if (this.debug) {
3948
- if (!currentValidation.isValid && retryCount < maxRetries) {
3949
- console.log(`[DEBUG] JSON validation: Still invalid after correction ${retryCount}, retrying...`);
3950
- console.log(`[DEBUG] JSON validation: Corrected response sample: ${currentResult.substring(0, 300)}${currentResult.length > 300 ? '...' : ''}`);
3951
- } else if (currentValidation.isValid) {
3952
- console.log(`[DEBUG] JSON validation: Successfully corrected after ${retryCount} attempts with JsonFixingAgent`);
3953
- }
3954
- }
3955
- } catch (error) {
3956
- if (this.debug) {
3957
- console.error(`[DEBUG] JSON validation: JsonFixingAgent error on attempt ${retryCount + 1}:`, error.message);
3958
- }
3959
- // If JsonFixingAgent fails, break out of loop
3960
- break;
3961
- }
3962
- }
3963
-
3964
- // Update finalResult with the fixed version
3965
- finalResult = currentResult;
3966
- validation = currentValidation;
3967
-
3968
- if (!validation.isValid && this.debug) {
3969
- console.log(`[DEBUG] JSON validation: Still invalid after ${maxRetries} correction attempts with JsonFixingAgent:`, validation.error);
3970
- console.log(`[DEBUG] JSON validation: Final invalid response: ${finalResult.substring(0, 500)}${finalResult.length > 500 ? '...' : ''}`);
3971
- } else if (validation.isValid && this.debug) {
3972
- console.log(`[DEBUG] JSON validation: Final validation successful`);
3973
- }
3974
- }
3975
-
3976
- // Record JSON validation completion in telemetry
3977
- if (this.tracer) {
3978
- this.tracer.recordJsonValidationEvent('completed', {
3979
- 'json_validation.success': validation.isValid,
3980
- 'json_validation.retry_count': retryCount,
3981
- 'json_validation.max_retries': maxRetries,
3982
- 'json_validation.final_response_length': finalResult.length,
3983
- 'json_validation.error': validation.isValid ? null : validation.error
3984
- });
3985
- }
3986
- }
3987
- } catch (error) {
3988
- console.error('[ERROR] Schema formatting failed:', error);
3989
- // Return the original result if schema formatting fails
3990
- }
3991
- } else if (reachedMaxIterations && options.schema && this.debug) {
3992
- console.log('[DEBUG] Skipping schema formatting due to max iterations reached without completion');
3993
- } else if (completionAttempted && options.schema && !options._schemaFormatted && !options._skipValidation) {
3994
- // For attempt_completion results with schema, validate mermaid diagrams BEFORE cleaning schema
3995
- // This ensures mermaid validation sees the full response before JSON extraction strips content
3996
- // Skip this validation if we're in a recursive correction call (_skipValidation flag)
3997
- try {
3998
- // Validate and fix Mermaid diagrams if present (BEFORE schema cleaning)
3999
- if (!this.disableMermaidValidation) {
4000
- if (this.debug) {
4001
- console.log(`[DEBUG] Mermaid validation: Validating attempt_completion result BEFORE schema cleaning...`);
3878
+ console.log(`[DEBUG] Mermaid validation: Validating result BEFORE schema cleaning...`);
4002
3879
  }
4003
3880
 
4004
3881
  const mermaidValidation = await validateAndFixMermaidResponse(finalResult, {
@@ -4012,70 +3889,66 @@ Convert your previous response content into actual JSON data that follows this s
4012
3889
  if (mermaidValidation.wasFixed) {
4013
3890
  finalResult = mermaidValidation.fixedResponse;
4014
3891
  if (this.debug) {
4015
- console.log(`[DEBUG] Mermaid validation: attempt_completion diagrams fixed`);
3892
+ console.log(`[DEBUG] Mermaid validation: Diagrams fixed`);
4016
3893
  if (mermaidValidation.performanceMetrics) {
4017
3894
  console.log(`[DEBUG] Mermaid validation: Fixed in ${mermaidValidation.performanceMetrics.totalTimeMs}ms`);
4018
3895
  }
4019
3896
  }
4020
3897
  } else if (this.debug) {
4021
- console.log(`[DEBUG] Mermaid validation: attempt_completion result validation completed (no fixes needed)`);
3898
+ console.log(`[DEBUG] Mermaid validation: Completed (no fixes needed)`);
4022
3899
  }
4023
3900
  } else if (this.debug) {
4024
- console.log(`[DEBUG] Mermaid validation: Skipped for attempt_completion result due to disableMermaidValidation option`);
3901
+ console.log(`[DEBUG] Mermaid validation: Skipped due to disableMermaidValidation option`);
4025
3902
  }
4026
3903
 
4027
- // Now clean the schema response (may extract JSON and discard other content)
3904
+ // Step 2: Clean the schema response (remove code blocks, extract JSON)
4028
3905
  finalResult = cleanSchemaResponse(finalResult);
4029
-
4030
- // Validate and potentially correct JSON for attempt_completion results
3906
+
3907
+ // Step 3: Validate and potentially correct JSON responses
4031
3908
  if (isJsonSchema(options.schema)) {
4032
3909
  if (this.debug) {
4033
- console.log(`[DEBUG] JSON validation: Starting validation process for attempt_completion result`);
3910
+ console.log(`[DEBUG] JSON validation: Starting validation process`);
4034
3911
  console.log(`[DEBUG] JSON validation: Response length: ${finalResult.length} chars`);
4035
3912
  }
4036
-
3913
+
4037
3914
  // Record JSON validation start in telemetry
4038
3915
  if (this.tracer) {
4039
- this.tracer.recordJsonValidationEvent('attempt_completion_started', {
3916
+ this.tracer.recordJsonValidationEvent('started', {
4040
3917
  'json_validation.response_length': finalResult.length,
4041
- 'json_validation.schema_type': 'JSON',
4042
- 'json_validation.context': 'attempt_completion'
3918
+ 'json_validation.schema_type': 'JSON'
4043
3919
  });
4044
3920
  }
4045
-
4046
- let validation = validateJsonResponse(finalResult, { debug: this.debug });
3921
+
3922
+ let validation = validateJsonResponse(finalResult, { debug: this.debug, schema: options.schema });
4047
3923
  let retryCount = 0;
4048
3924
  const maxRetries = 3;
4049
-
4050
- // First check if the response is valid JSON but is actually a schema definition
3925
+
3926
+ // Check if the response is valid JSON but is actually a schema definition
4051
3927
  if (validation.isValid && isJsonSchemaDefinition(finalResult, { debug: this.debug })) {
4052
3928
  if (this.debug) {
4053
- console.log(`[DEBUG] JSON validation: attempt_completion response is a JSON schema definition instead of data, correcting...`);
3929
+ console.log(`[DEBUG] JSON validation: Response is a JSON schema definition instead of data, correcting...`);
4054
3930
  }
4055
-
4056
- // Use specialized correction prompt for schema definition confusion
3931
+
4057
3932
  const schemaDefinitionPrompt = createSchemaDefinitionCorrectionPrompt(
4058
3933
  finalResult,
4059
3934
  options.schema,
4060
3935
  0
4061
3936
  );
4062
-
4063
- // Strip schema from correction options to prevent inflated iteration budget (issue #447)
4064
- const { schema: _unusedSchema1, ...schemaDefCorrectionOptions } = options;
3937
+
4065
3938
  finalResult = await this.answer(schemaDefinitionPrompt, [], {
4066
- ...schemaDefCorrectionOptions,
3939
+ ...options,
4067
3940
  _schemaFormatted: true,
4068
- _skipValidation: true, // Skip validation in recursive correction calls to prevent loops
4069
- _completionPromptProcessed: true, // Prevent cascading completion prompts in retry calls
4070
- _maxIterationsOverride: 3 // Correction should complete in 1-2 iterations (issue #447)
3941
+ _skipValidation: true,
3942
+ _disableTools: true,
3943
+ _completionPromptProcessed: true,
3944
+ _maxIterationsOverride: 3
4071
3945
  });
4072
3946
  finalResult = cleanSchemaResponse(finalResult);
4073
- validation = validateJsonResponse(finalResult);
4074
- retryCount = 1; // Start at 1 since we already did one correction
3947
+ validation = validateJsonResponse(finalResult, { debug: this.debug, schema: options.schema });
3948
+ retryCount = 1;
4075
3949
  }
4076
-
4077
- // Before entering correction loop, try auto-wrapping for simple schemas
4078
- // This avoids re-invoking AI for schemas like {text: string} where we can just wrap programmatically
3950
+
3951
+ // Try auto-wrapping for simple schemas before entering correction loop
4079
3952
  if (!validation.isValid) {
4080
3953
  const autoWrapped = tryAutoWrapForSimpleSchema(finalResult, options.schema, { debug: this.debug });
4081
3954
  if (autoWrapped) {
@@ -4083,23 +3956,19 @@ Convert your previous response content into actual JSON data that follows this s
4083
3956
  console.log(`[DEBUG] JSON validation: Auto-wrapped plain text for simple schema`);
4084
3957
  }
4085
3958
  finalResult = autoWrapped;
4086
- validation = validateJsonResponse(finalResult, { debug: this.debug });
3959
+ validation = validateJsonResponse(finalResult, { debug: this.debug, schema: options.schema });
4087
3960
  }
4088
3961
  }
4089
3962
 
3963
+ // Correction loop
4090
3964
  while (!validation.isValid && retryCount < maxRetries) {
4091
3965
  if (this.debug) {
4092
- console.log(`[DEBUG] JSON validation: attempt_completion validation failed (attempt ${retryCount + 1}/${maxRetries}):`, validation.error);
4093
- console.log(`[DEBUG] JSON validation: Invalid response sample: ${finalResult.substring(0, 300)}${finalResult.length > 300 ? '...' : ''}`);
3966
+ console.log(`[DEBUG] JSON validation: Validation failed (attempt ${retryCount + 1}/${maxRetries}):`, validation.error);
4094
3967
  }
4095
-
4096
- // Check if the invalid response is actually a schema definition
3968
+
4097
3969
  let correctionPrompt;
4098
3970
  try {
4099
3971
  if (isJsonSchemaDefinition(finalResult, { debug: this.debug })) {
4100
- if (this.debug) {
4101
- console.log(`[DEBUG] JSON validation: attempt_completion response is still a schema definition, using specialized correction`);
4102
- }
4103
3972
  correctionPrompt = createSchemaDefinitionCorrectionPrompt(
4104
3973
  finalResult,
4105
3974
  options.schema,
@@ -4107,71 +3976,68 @@ Convert your previous response content into actual JSON data that follows this s
4107
3976
  );
4108
3977
  } else {
4109
3978
  correctionPrompt = createJsonCorrectionPrompt(
4110
- finalResult,
4111
- options.schema,
3979
+ finalResult,
3980
+ options.schema,
4112
3981
  validation.error,
4113
3982
  retryCount
4114
3983
  );
4115
3984
  }
4116
3985
  } catch (error) {
4117
- // If we can't parse to check if it's a schema definition, use regular correction
4118
3986
  correctionPrompt = createJsonCorrectionPrompt(
4119
- finalResult,
4120
- options.schema,
3987
+ finalResult,
3988
+ options.schema,
4121
3989
  validation.error,
4122
3990
  retryCount
4123
3991
  );
4124
3992
  }
4125
-
4126
- // Strip schema from correction options to prevent inflated iteration budget (issue #447)
4127
- const { schema: _unusedSchema2, ...correctionOptions } = options;
3993
+
4128
3994
  finalResult = await this.answer(correctionPrompt, [], {
4129
- ...correctionOptions,
3995
+ ...options,
4130
3996
  _schemaFormatted: true,
4131
- _skipValidation: true, // Skip validation in recursive correction calls to prevent loops
4132
- _disableTools: true, // Only allow attempt_completion - prevent AI from using search/query tools
4133
- _completionPromptProcessed: true, // Prevent cascading completion prompts in retry calls
4134
- _maxIterationsOverride: 3 // Correction should complete in 1-2 iterations (issue #447)
3997
+ _skipValidation: true,
3998
+ _disableTools: true,
3999
+ _completionPromptProcessed: true,
4000
+ _maxIterationsOverride: 3
4135
4001
  });
4136
4002
  finalResult = cleanSchemaResponse(finalResult);
4137
-
4138
- // Validate the corrected response
4139
- validation = validateJsonResponse(finalResult, { debug: this.debug });
4003
+
4004
+ validation = validateJsonResponse(finalResult, { debug: this.debug, schema: options.schema });
4140
4005
  retryCount++;
4141
-
4006
+
4142
4007
  if (this.debug) {
4143
4008
  if (validation.isValid) {
4144
- console.log(`[DEBUG] JSON validation: attempt_completion correction successful on attempt ${retryCount}`);
4009
+ console.log(`[DEBUG] JSON validation: Correction successful on attempt ${retryCount}`);
4145
4010
  } else {
4146
- console.log(`[DEBUG] JSON validation: attempt_completion correction failed on attempt ${retryCount}: ${validation.error}`);
4011
+ console.log(`[DEBUG] JSON validation: Correction failed on attempt ${retryCount}: ${validation.error}`);
4147
4012
  }
4148
4013
  }
4149
4014
  }
4150
-
4151
- // Record final validation result
4015
+
4016
+ // Record JSON validation completion in telemetry
4152
4017
  if (this.tracer) {
4153
- this.tracer.recordJsonValidationEvent('attempt_completion_completed', {
4018
+ this.tracer.recordJsonValidationEvent('completed', {
4154
4019
  'json_validation.success': validation.isValid,
4155
4020
  'json_validation.retry_count': retryCount,
4156
- 'json_validation.final_response_length': finalResult.length
4021
+ 'json_validation.max_retries': maxRetries,
4022
+ 'json_validation.final_response_length': finalResult.length,
4023
+ 'json_validation.error': validation.isValid ? null : validation.error
4157
4024
  });
4158
4025
  }
4159
-
4026
+
4160
4027
  if (!validation.isValid && this.debug) {
4161
- console.log(`[DEBUG] JSON validation: attempt_completion result validation failed after ${maxRetries} attempts: ${validation.error}`);
4162
- console.log(`[DEBUG] JSON validation: Final attempt_completion response: ${finalResult.substring(0, 500)}${finalResult.length > 500 ? '...' : ''}`);
4028
+ console.log(`[DEBUG] JSON validation: Failed after ${maxRetries} attempts: ${validation.error}`);
4163
4029
  } else if (validation.isValid && this.debug) {
4164
- console.log(`[DEBUG] JSON validation: attempt_completion result validation successful`);
4030
+ console.log(`[DEBUG] JSON validation: Final validation successful`);
4165
4031
  }
4166
4032
  }
4167
4033
  } catch (error) {
4168
4034
  if (this.debug) {
4169
- console.log(`[DEBUG] attempt_completion result cleanup failed: ${error.message}`);
4035
+ console.log(`[DEBUG] Schema validation/cleanup failed: ${error.message}`);
4170
4036
  }
4171
4037
  }
4172
4038
  }
4173
4039
 
4174
- // Final mermaid validation for all responses (regardless of schema or attempt_completion)
4040
+ // Final mermaid validation for all responses (regardless of schema)
4175
4041
  if (!this.disableMermaidValidation && !options._schemaFormatted) {
4176
4042
  try {
4177
4043
  if (this.debug) {
@@ -4294,8 +4160,6 @@ Convert your previous response content into actual JSON data that follows this s
4294
4160
  * @returns {Object} Compaction statistics
4295
4161
  */
4296
4162
  async compactHistory(options = {}) {
4297
- const { compactMessages, calculateCompactionStats } = await import('./contextCompactor.js');
4298
-
4299
4163
  if (this.history.length === 0) {
4300
4164
  if (this.debug) {
4301
4165
  console.log(`[DEBUG] No history to compact for session ${this.sessionId}`);