erosolar-cli 1.7.78 → 1.7.80

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (118) hide show
  1. package/agents/erosolar-code.rules.json +0 -5
  2. package/agents/general.rules.json +0 -5
  3. package/dist/bin/erosolar.js +2 -0
  4. package/dist/bin/erosolar.js.map +1 -1
  5. package/dist/capabilities/filesystemCapability.d.ts.map +1 -1
  6. package/dist/capabilities/filesystemCapability.js +1 -2
  7. package/dist/capabilities/filesystemCapability.js.map +1 -1
  8. package/dist/contracts/agent-schemas.json +12 -20
  9. package/dist/contracts/unified-schema.json +1 -1
  10. package/dist/core/agent.d.ts +3 -38
  11. package/dist/core/agent.d.ts.map +1 -1
  12. package/dist/core/agent.js +8 -283
  13. package/dist/core/agent.js.map +1 -1
  14. package/dist/core/contextManager.js +8 -8
  15. package/dist/core/contextManager.js.map +1 -1
  16. package/dist/core/errors/apiKeyErrors.js +1 -1
  17. package/dist/core/errors/apiKeyErrors.js.map +1 -1
  18. package/dist/core/isolatedVerifier.js +22 -274
  19. package/dist/core/isolatedVerifier.js.map +1 -1
  20. package/dist/core/modelDiscovery.d.ts.map +1 -1
  21. package/dist/core/modelDiscovery.js +28 -23
  22. package/dist/core/modelDiscovery.js.map +1 -1
  23. package/dist/core/secretStore.d.ts +0 -9
  24. package/dist/core/secretStore.d.ts.map +1 -1
  25. package/dist/core/secretStore.js +2 -52
  26. package/dist/core/secretStore.js.map +1 -1
  27. package/dist/core/types.d.ts +1 -9
  28. package/dist/core/types.d.ts.map +1 -1
  29. package/dist/headless/headlessApp.d.ts.map +1 -1
  30. package/dist/headless/headlessApp.js +0 -16
  31. package/dist/headless/headlessApp.js.map +1 -1
  32. package/dist/plugins/providers/google/index.js +2 -3
  33. package/dist/plugins/providers/google/index.js.map +1 -1
  34. package/dist/providers/anthropicProvider.d.ts.map +1 -1
  35. package/dist/providers/anthropicProvider.js +19 -76
  36. package/dist/providers/anthropicProvider.js.map +1 -1
  37. package/dist/providers/googleProvider.d.ts.map +1 -1
  38. package/dist/providers/googleProvider.js +1 -23
  39. package/dist/providers/googleProvider.js.map +1 -1
  40. package/dist/providers/openaiChatCompletionsProvider.d.ts +1 -2
  41. package/dist/providers/openaiChatCompletionsProvider.d.ts.map +1 -1
  42. package/dist/providers/openaiChatCompletionsProvider.js +4 -121
  43. package/dist/providers/openaiChatCompletionsProvider.js.map +1 -1
  44. package/dist/providers/openaiResponsesProvider.d.ts.map +1 -1
  45. package/dist/providers/openaiResponsesProvider.js +18 -55
  46. package/dist/providers/openaiResponsesProvider.js.map +1 -1
  47. package/dist/runtime/agentController.d.ts +0 -4
  48. package/dist/runtime/agentController.d.ts.map +1 -1
  49. package/dist/runtime/agentController.js +3 -32
  50. package/dist/runtime/agentController.js.map +1 -1
  51. package/dist/security/persistence-research.d.ts +2 -0
  52. package/dist/security/persistence-research.d.ts.map +1 -1
  53. package/dist/security/persistence-research.js +2 -0
  54. package/dist/security/persistence-research.js.map +1 -1
  55. package/dist/security/security-testing-framework.d.ts +2 -0
  56. package/dist/security/security-testing-framework.d.ts.map +1 -1
  57. package/dist/security/security-testing-framework.js +2 -0
  58. package/dist/security/security-testing-framework.js.map +1 -1
  59. package/dist/shell/bracketedPasteManager.d.ts +5 -8
  60. package/dist/shell/bracketedPasteManager.d.ts.map +1 -1
  61. package/dist/shell/bracketedPasteManager.js +43 -27
  62. package/dist/shell/bracketedPasteManager.js.map +1 -1
  63. package/dist/shell/composableMessage.d.ts +1 -1
  64. package/dist/shell/composableMessage.js +2 -2
  65. package/dist/shell/composableMessage.js.map +1 -1
  66. package/dist/shell/inputProcessor.d.ts +55 -0
  67. package/dist/shell/inputProcessor.d.ts.map +1 -0
  68. package/dist/shell/inputProcessor.js +171 -0
  69. package/dist/shell/inputProcessor.js.map +1 -0
  70. package/dist/shell/interactiveShell.d.ts +48 -7
  71. package/dist/shell/interactiveShell.d.ts.map +1 -1
  72. package/dist/shell/interactiveShell.js +340 -172
  73. package/dist/shell/interactiveShell.js.map +1 -1
  74. package/dist/shell/shellApp.d.ts.map +1 -1
  75. package/dist/shell/shellApp.js +3 -54
  76. package/dist/shell/shellApp.js.map +1 -1
  77. package/dist/shell/systemPrompt.d.ts +1 -1
  78. package/dist/shell/systemPrompt.d.ts.map +1 -1
  79. package/dist/shell/systemPrompt.js +57 -15
  80. package/dist/shell/systemPrompt.js.map +1 -1
  81. package/dist/shell/updateManager.js +2 -4
  82. package/dist/shell/updateManager.js.map +1 -1
  83. package/dist/subagents/taskRunner.js +2 -2
  84. package/dist/subagents/taskRunner.js.map +1 -1
  85. package/dist/tools/cloudTools.d.ts +2 -0
  86. package/dist/tools/cloudTools.d.ts.map +1 -1
  87. package/dist/tools/cloudTools.js +2 -0
  88. package/dist/tools/cloudTools.js.map +1 -1
  89. package/dist/tools/fileTools.d.ts.map +1 -1
  90. package/dist/tools/fileTools.js +3 -31
  91. package/dist/tools/fileTools.js.map +1 -1
  92. package/dist/ui/ShellUIAdapter.d.ts +2 -10
  93. package/dist/ui/ShellUIAdapter.d.ts.map +1 -1
  94. package/dist/ui/ShellUIAdapter.js +11 -123
  95. package/dist/ui/ShellUIAdapter.js.map +1 -1
  96. package/dist/ui/keyboardShortcuts.d.ts.map +1 -1
  97. package/dist/ui/keyboardShortcuts.js +2 -12
  98. package/dist/ui/keyboardShortcuts.js.map +1 -1
  99. package/dist/ui/persistentPrompt.d.ts +0 -24
  100. package/dist/ui/persistentPrompt.d.ts.map +1 -1
  101. package/dist/ui/persistentPrompt.js +4 -86
  102. package/dist/ui/persistentPrompt.js.map +1 -1
  103. package/dist/ui/toolDisplay.d.ts.map +1 -1
  104. package/dist/ui/toolDisplay.js +0 -652
  105. package/dist/ui/toolDisplay.js.map +1 -1
  106. package/package.json +10 -10
  107. package/dist/core/cliTestHarness.d.ts +0 -200
  108. package/dist/core/cliTestHarness.d.ts.map +0 -1
  109. package/dist/core/cliTestHarness.js +0 -549
  110. package/dist/core/cliTestHarness.js.map +0 -1
  111. package/dist/core/multilinePasteHandler.d.ts +0 -35
  112. package/dist/core/multilinePasteHandler.d.ts.map +0 -1
  113. package/dist/core/multilinePasteHandler.js +0 -80
  114. package/dist/core/multilinePasteHandler.js.map +0 -1
  115. package/dist/tools/exploreTools.d.ts +0 -7
  116. package/dist/tools/exploreTools.d.ts.map +0 -1
  117. package/dist/tools/exploreTools.js +0 -322
  118. package/dist/tools/exploreTools.js.map +0 -1
@@ -23,6 +23,7 @@ import { PersistentPrompt, PinnedChatBox } from '../ui/persistentPrompt.js';
23
23
  import { formatShortcutsHelp } from '../ui/shortcutsHelp.js';
24
24
  import { MetricsTracker } from '../alpha-zero/index.js';
25
25
  import { listAvailablePlugins } from '../plugins/index.js';
26
+ import { verifyResponse, formatVerificationReport, } from '../core/responseVerifier.js';
26
27
  const DROPDOWN_COLORS = [
27
28
  theme.primary,
28
29
  theme.info,
@@ -73,6 +74,7 @@ export class InteractiveShell {
73
74
  workspaceOptions;
74
75
  sessionState;
75
76
  isProcessing = false;
77
+ isInsideThinkingBlock = false;
76
78
  pendingInteraction = null;
77
79
  pendingSecretRetry = null;
78
80
  bufferedInputLines = [];
@@ -105,8 +107,12 @@ export class InteractiveShell {
105
107
  pendingHistoryLoad = null;
106
108
  cachedHistory = [];
107
109
  activeSessionId = null;
110
+ sessionStartTime = Date.now();
108
111
  activeSessionTitle = null;
109
112
  sessionResumeNotice = null;
113
+ lastAssistantResponse = null;
114
+ verificationRetryCount = 0;
115
+ maxVerificationRetries = 2;
110
116
  customCommands;
111
117
  customCommandMap;
112
118
  sessionRestoreConfig;
@@ -167,16 +173,11 @@ export class InteractiveShell {
167
173
  // Update persistent prompt status bar with file changes
168
174
  this.updatePersistentPromptFileChanges();
169
175
  });
170
- // Set up tool status callback to update pinned chat box during tool execution
176
+ // Set up tool status callback to update streaming status line during tool execution
177
+ // Uses Claude Code style: single line at bottom that updates in-place
171
178
  this.uiAdapter.setToolStatusCallback((status) => {
172
- if (status) {
173
- this.pinnedChatBox.setStatusMessage(status);
174
- }
175
- else {
176
- // Clear status but keep processing indicator if still processing
177
- this.pinnedChatBox.setStatusMessage(null);
178
- }
179
- this.pinnedChatBox.forceRender();
179
+ // Update the streaming status line (Claude Code style)
180
+ display.updateStreamingStatus(status);
180
181
  });
181
182
  this.skillRepository = new SkillRepository({
182
183
  workingDir: this.workingDir,
@@ -188,9 +189,8 @@ export class InteractiveShell {
188
189
  this.rl = readline.createInterface({
189
190
  input,
190
191
  output,
191
- // Use empty prompt since PinnedChatBox handles all prompt rendering
192
- // This prevents duplicate '>' characters from appearing
193
- prompt: '',
192
+ // Claude Code style: simple '> ' prompt
193
+ prompt: '> ',
194
194
  terminal: true,
195
195
  historySize: 100, // Enable native readline history
196
196
  });
@@ -281,7 +281,10 @@ export class InteractiveShell {
281
281
  this.pinnedChatBox.show();
282
282
  this.pinnedChatBox.forceRender();
283
283
  if (initialPrompt) {
284
+ // For command-line prompts, show the user's input with separator (Claude Code style)
284
285
  display.newLine();
286
+ const cols = Math.min(process.stdout.columns || 80, 72);
287
+ console.log(theme.ui.border('─'.repeat(cols)));
285
288
  console.log(`${formatUserPrompt(this.profileLabel || this.profile)}${initialPrompt}`);
286
289
  await this.processInputBlock(initialPrompt);
287
290
  return;
@@ -472,9 +475,9 @@ export class InteractiveShell {
472
475
  inputStream.off('keypress', this.keypressHandler);
473
476
  this.keypressHandler = null;
474
477
  }
475
- // Restore original stdin emit (cleanup from paste interception)
476
- if (this.rawDataHandler) {
477
- this.rawDataHandler(); // This restores the original emit function
478
+ // Remove raw data handler
479
+ if (inputStream && this.rawDataHandler) {
480
+ inputStream.off('data', this.rawDataHandler);
478
481
  this.rawDataHandler = null;
479
482
  }
480
483
  // Clear any pending cleanup to prevent hanging
@@ -486,8 +489,6 @@ export class InteractiveShell {
486
489
  display.newLine();
487
490
  const highlightedEmail = theme.info('support@ero.solar');
488
491
  const infoMessage = [
489
- 'Thank you to Anthropic for allowing me to use Claude Code to build erosolar-cli.',
490
- '',
491
492
  `Email ${highlightedEmail} with any bugs or feedback`,
492
493
  'GitHub: https://github.com/ErosolarAI/erosolar-by-bo',
493
494
  'npm: https://www.npmjs.com/package/erosolar-cli',
@@ -525,31 +526,21 @@ export class InteractiveShell {
525
526
  // All pastes (single or multi-line) are captured for confirmation before submit
526
527
  this.capturePaste(content, lineCount);
527
528
  });
528
- // Set up raw data interception to catch bracketed paste before readline processes it.
529
- // We need to actually PREVENT readline from seeing the paste content to avoid echo.
530
- // Strategy: Replace stdin's 'data' event emission during paste capture.
531
- const originalEmit = inputStream.emit.bind(inputStream);
532
- inputStream.emit = (event, ...args) => {
533
- if (event === 'data' && args[0]) {
534
- const data = args[0];
535
- const str = typeof data === 'string' ? data : data.toString();
536
- const result = this.bracketedPaste.processRawData(str);
537
- if (result.consumed) {
538
- // Data was consumed by paste handler - don't pass to readline
539
- // If there's passThrough data, emit that instead
540
- if (result.passThrough) {
541
- return originalEmit('data', Buffer.from(result.passThrough));
542
- }
543
- return true; // Event "handled" but not passed to other listeners
544
- }
529
+ // Set up raw data interception to catch bracketed paste before readline processes it
530
+ // We prepend our listener so it runs before readline's listener
531
+ this.rawDataHandler = (data) => {
532
+ const str = data.toString();
533
+ const result = this.bracketedPaste.processRawData(str);
534
+ if (result.consumed) {
535
+ // Don't show preview here - readline will still echo lines to the terminal,
536
+ // and our preview would get clobbered. Instead, we show the preview in the
537
+ // line handler after clearing readline's echoed output.
538
+ // The processRawData() sets flags that the line handler will check.
545
539
  }
546
- // Pass through all other events and non-paste data normally
547
- return originalEmit(event, ...args);
548
- };
549
- // Store reference for cleanup
550
- this.rawDataHandler = () => {
551
- inputStream.emit = originalEmit;
552
540
  };
541
+ // Use prependListener to ensure our handler runs before readline's handlers
542
+ // This gives us first look at the raw data including bracketed paste markers
543
+ inputStream.prependListener('data', this.rawDataHandler);
553
544
  }
554
545
  setupSlashCommandPreviewHandler() {
555
546
  const inputStream = input;
@@ -574,8 +565,8 @@ export class InteractiveShell {
574
565
  const currentLine = this.rl.line || '';
575
566
  const cursorPos = this.rl.cursor || 0;
576
567
  this.persistentPrompt.updateInput(currentLine, cursorPos);
577
- // Sync to pinned chat box for display only
578
- this.pinnedChatBox.setInput(currentLine);
568
+ // Sync to pinned chat box for display only (include cursor position)
569
+ this.pinnedChatBox.setInput(currentLine, cursorPos);
579
570
  if (this.composableMessage.hasContent()) {
580
571
  this.composableMessage.setDraft(currentLine);
581
572
  this.updateComposeStatusSummary();
@@ -834,9 +825,6 @@ export class InteractiveShell {
834
825
  this.rl.write(newLine); // Write new content
835
826
  // Update persistent prompt display
836
827
  this.persistentPrompt.updateInput(newLine, newCursor);
837
- // NOTE: Don't clear pasteJustCaptured here - the counter-based logic in shouldIgnoreLineEvent()
838
- // will decrement for each readline line event and auto-clear when all are processed.
839
- // Clearing prematurely causes the remaining readline-echoed lines to pass through.
840
828
  // Re-prompt to show the inline content
841
829
  this.rl.prompt(true);
842
830
  return;
@@ -854,16 +842,13 @@ export class InteractiveShell {
854
842
  });
855
843
  // Set the prompt to show paste chips, then position cursor after them
856
844
  // The user can type additional text after the chips
857
- this.persistentPrompt.updateInput(pasteChips + ' ', pasteChips.length + 1);
845
+ this.persistentPrompt.updateInput(`${pasteChips} `, pasteChips.length + 1);
858
846
  // Update readline's line buffer to include the chips as prefix
859
847
  // This ensures typed text appears after the chips
860
848
  if (this.rl.line !== undefined) {
861
- this.rl.line = pasteChips + ' ';
849
+ this.rl.line = `${pasteChips} `;
862
850
  this.rl.cursor = pasteChips.length + 1;
863
851
  }
864
- // NOTE: Don't clear pasteJustCaptured here - the counter-based logic in shouldIgnoreLineEvent()
865
- // will decrement for each readline line event (one per pasted line) and auto-clear when done.
866
- // Clearing prematurely causes remaining readline-echoed lines to pass through and get displayed.
867
852
  this.rl.prompt(true); // preserveCursor=true to keep position after chips
868
853
  }
869
854
  /**
@@ -1203,6 +1188,9 @@ export class InteractiveShell {
1203
1188
  case '/discover':
1204
1189
  await this.discoverModelsCommand();
1205
1190
  break;
1191
+ case '/verify':
1192
+ await this.handleVerifyCommand();
1193
+ break;
1206
1194
  default:
1207
1195
  if (!(await this.tryCustomSlashCommand(command, input))) {
1208
1196
  display.showWarning(`Unknown command "${command}".`);
@@ -1319,7 +1307,6 @@ export class InteractiveShell {
1319
1307
  this.baseSystemPrompt = buildInteractiveSystemPrompt(profileConfig.systemPrompt, profileConfig.label, tools);
1320
1308
  if (this.rebuildAgent()) {
1321
1309
  display.showInfo(`Workspace snapshot refreshed (${this.describeWorkspaceOptions()}).`);
1322
- this.resetChatBoxAfterModelSwap();
1323
1310
  }
1324
1311
  else {
1325
1312
  display.showWarning('Workspace snapshot refreshed, but the agent failed to rebuild. Run /doctor for details.');
@@ -1497,9 +1484,7 @@ export class InteractiveShell {
1497
1484
  }
1498
1485
  this.thinkingMode = value;
1499
1486
  saveSessionPreferences({ thinkingMode: this.thinkingMode });
1500
- if (this.rebuildAgent()) {
1501
- this.resetChatBoxAfterModelSwap();
1502
- }
1487
+ this.rebuildAgent();
1503
1488
  const descriptions = {
1504
1489
  concise: 'Hides internal reasoning and responds directly.',
1505
1490
  balanced: 'Shows short thoughts only when helpful.',
@@ -1522,7 +1507,7 @@ export class InteractiveShell {
1522
1507
  lines.push(theme.bold('Session File Changes'));
1523
1508
  lines.push('');
1524
1509
  lines.push(`${theme.info('•')} ${summary.files} file${summary.files === 1 ? '' : 's'} modified`);
1525
- lines.push(`${theme.info('•')} ${theme.success('+' + summary.additions)} ${theme.error('-' + summary.removals)} lines`);
1510
+ lines.push(`${theme.info('•')} ${theme.success(`+${summary.additions}`)} ${theme.error(`-${summary.removals}`)} lines`);
1526
1511
  lines.push('');
1527
1512
  // Group changes by file
1528
1513
  const fileMap = new Map();
@@ -1546,7 +1531,7 @@ export class InteractiveShell {
1546
1531
  if (stats.writes > 0)
1547
1532
  operations.push(`${stats.writes} write${stats.writes === 1 ? '' : 's'}`);
1548
1533
  const opsText = operations.join(', ');
1549
- const diffText = `${theme.success('+' + stats.additions)} ${theme.error('-' + stats.removals)}`;
1534
+ const diffText = `${theme.success(`+${stats.additions}`)} ${theme.error(`-${stats.removals}`)}`;
1550
1535
  lines.push(` ${theme.dim(path)}`);
1551
1536
  lines.push(` ${opsText} • ${diffText}`);
1552
1537
  }
@@ -1556,6 +1541,211 @@ export class InteractiveShell {
1556
1541
  const summary = this.alphaZeroMetrics.getPerformanceSummary();
1557
1542
  display.showSystemMessage(summary);
1558
1543
  }
1544
+ /**
1545
+ * Create a verification context for isolated process verification.
1546
+ *
1547
+ * Verification now runs in a completely separate Node.js process for full isolation.
1548
+ * This ensures:
1549
+ * - Separate memory space from main CLI
1550
+ * - Independent event loop
1551
+ * - No shared state
1552
+ * - Errors in verification cannot crash main process
1553
+ */
1554
+ createVerificationContext() {
1555
+ // Build conversation history for context
1556
+ const conversationHistory = this.cachedHistory
1557
+ .filter(msg => msg.role === 'user' || msg.role === 'assistant')
1558
+ .slice(-10) // Last 10 messages for context
1559
+ .map(msg => `${msg.role}: ${typeof msg.content === 'string' ? msg.content.slice(0, 500) : '[complex content]'}`);
1560
+ return {
1561
+ workingDirectory: this.workingDir,
1562
+ conversationHistory,
1563
+ provider: this.sessionState.provider,
1564
+ model: this.sessionState.model,
1565
+ };
1566
+ }
1567
+ /**
1568
+ * Handle /verify command - verify the last assistant response
1569
+ */
1570
+ async handleVerifyCommand() {
1571
+ if (!this.lastAssistantResponse) {
1572
+ display.showWarning('No assistant response to verify. Send a message first.');
1573
+ return;
1574
+ }
1575
+ display.showSystemMessage('Verifying last response in isolated process...\n');
1576
+ try {
1577
+ const context = this.createVerificationContext();
1578
+ const report = await verifyResponse(this.lastAssistantResponse, context);
1579
+ const formattedReport = formatVerificationReport(report);
1580
+ display.showSystemMessage(formattedReport);
1581
+ // Show actionable summary
1582
+ if (report.overallVerdict === 'contradicted') {
1583
+ display.showError('Some claims in the response could not be verified!');
1584
+ }
1585
+ else if (report.overallVerdict === 'verified') {
1586
+ display.showInfo('All verifiable claims in the response were verified.');
1587
+ }
1588
+ else if (report.overallVerdict === 'partially_verified') {
1589
+ display.showWarning('Some claims were verified, but not all.');
1590
+ }
1591
+ else {
1592
+ display.showInfo('No verifiable claims found in the response.');
1593
+ }
1594
+ }
1595
+ catch (err) {
1596
+ display.showError(`Verification failed: ${err instanceof Error ? err.message : 'Unknown error'}`);
1597
+ }
1598
+ }
1599
+ /**
1600
+ * Check if a response looks like a completion (claims to be done)
1601
+ * vs. asking follow-up questions or waiting for user input.
1602
+ * Uses LLM to intelligently determine if verification should run.
1603
+ * Only run auto-verification when assistant claims task completion.
1604
+ */
1605
+ async shouldRunAutoVerification(response) {
1606
+ // Quick pre-filter: very short responses are unlikely to have verifiable claims
1607
+ if (response.length < 100) {
1608
+ return false;
1609
+ }
1610
+ try {
1611
+ // Use LLM to determine if this response contains verifiable completion claims
1612
+ const prompt = `Analyze this AI assistant response and determine if it claims to have COMPLETED a task that can be verified.
1613
+
1614
+ RESPONSE:
1615
+ ---
1616
+ ${response.slice(0, 2000)}
1617
+ ---
1618
+
1619
+ Answer with ONLY "YES" or "NO":
1620
+ - YES: The response claims to have completed something verifiable (created/modified files, ran commands, fixed bugs, implemented features, etc.)
1621
+ - NO: The response is asking questions, requesting clarification, explaining concepts, or hasn't completed any verifiable action yet.
1622
+
1623
+ Answer:`;
1624
+ const agent = this.runtimeSession.createAgent({
1625
+ provider: this.sessionState.provider,
1626
+ model: this.sessionState.model,
1627
+ temperature: 0,
1628
+ maxTokens: 10,
1629
+ systemPrompt: 'You are a classifier. Answer only YES or NO.',
1630
+ });
1631
+ const result = await agent.send(prompt);
1632
+ const answer = result.trim().toUpperCase();
1633
+ return answer.startsWith('YES');
1634
+ }
1635
+ catch {
1636
+ // On error, fall back to not running verification
1637
+ return false;
1638
+ }
1639
+ }
1640
+ /**
1641
+ * Schedule auto-verification after assistant response.
1642
+ * Uses LLM-based semantic analysis to verify ALL claims.
1643
+ * Runs asynchronously to not block the UI.
1644
+ * Only runs when assistant claims completion, not when asking questions.
1645
+ */
1646
+ scheduleAutoVerification(response) {
1647
+ // Run verification asynchronously after a short delay
1648
+ // This allows the UI to update first
1649
+ setTimeout(async () => {
1650
+ try {
1651
+ // Use LLM to determine if this response should be verified
1652
+ const shouldVerify = await this.shouldRunAutoVerification(response);
1653
+ if (!shouldVerify) {
1654
+ return;
1655
+ }
1656
+ display.showSystemMessage(`\n🔍 Auto-verifying response in isolated process...`);
1657
+ const context = this.createVerificationContext();
1658
+ const report = await verifyResponse(response, context);
1659
+ const formattedReport = formatVerificationReport(report);
1660
+ // Show compact result
1661
+ if (report.summary.total === 0) {
1662
+ display.showInfo('No verifiable claims found in the response.');
1663
+ this.verificationRetryCount = 0;
1664
+ return;
1665
+ }
1666
+ if (report.overallVerdict === 'verified') {
1667
+ display.showInfo(`✅ Verified: ${report.summary.verified}/${report.summary.total} claims confirmed`);
1668
+ // Reset retry count on success
1669
+ this.verificationRetryCount = 0;
1670
+ }
1671
+ else if (report.overallVerdict === 'contradicted' || report.overallVerdict === 'partially_verified') {
1672
+ const failedCount = report.summary.failed;
1673
+ const icon = report.overallVerdict === 'contradicted' ? '❌' : '⚠️';
1674
+ const label = report.overallVerdict === 'contradicted' ? 'Verification failed' : 'Partial verification';
1675
+ display.showError(`${icon} ${label}: ${failedCount} claim${failedCount > 1 ? 's' : ''} could not be verified`);
1676
+ display.showSystemMessage(formattedReport);
1677
+ // Attempt to fix if we have retries left
1678
+ if (this.verificationRetryCount < this.maxVerificationRetries) {
1679
+ this.verificationRetryCount++;
1680
+ this.requestVerificationFix(report);
1681
+ }
1682
+ else {
1683
+ display.showWarning(`Max verification retries (${this.maxVerificationRetries}) reached. Use /verify to check manually.`);
1684
+ this.verificationRetryCount = 0;
1685
+ }
1686
+ }
1687
+ }
1688
+ catch (err) {
1689
+ // Silently ignore verification errors to not disrupt the flow
1690
+ // User can always run /verify manually
1691
+ }
1692
+ }, 500);
1693
+ }
1694
+ /**
1695
+ * Request the AI to fix failed verification claims.
1696
+ * Generates a strategic fix request with context about what failed and why.
1697
+ */
1698
+ requestVerificationFix(report) {
1699
+ const failedResults = report.results.filter(r => !r.verified && r.confidence === 'high');
1700
+ if (failedResults.length === 0) {
1701
+ return;
1702
+ }
1703
+ // Build detailed failure descriptions with suggested fixes
1704
+ const failureDetails = failedResults.map(r => {
1705
+ const claim = r.claim;
1706
+ const evidence = r.evidence;
1707
+ // Generate specific fix strategy based on claim category
1708
+ let suggestedFix = '';
1709
+ switch (claim.category) {
1710
+ case 'file_op':
1711
+ suggestedFix = `Re-create or update the file at: ${claim.context['path'] || 'specified path'}`;
1712
+ break;
1713
+ case 'code':
1714
+ suggestedFix = 'Fix any type errors or syntax issues, then run the build again';
1715
+ break;
1716
+ case 'command':
1717
+ suggestedFix = 'Re-run the command and verify it completes successfully';
1718
+ break;
1719
+ case 'state':
1720
+ suggestedFix = 'Verify the state change was applied correctly';
1721
+ break;
1722
+ case 'behavior':
1723
+ suggestedFix = 'Test the feature manually or check implementation';
1724
+ break;
1725
+ default:
1726
+ suggestedFix = 'Retry the operation';
1727
+ }
1728
+ return `• ${claim.statement}
1729
+ Evidence: ${evidence.slice(0, 150)}
1730
+ Suggested fix: ${suggestedFix}`;
1731
+ }).join('\n\n');
1732
+ const fixMessage = `🔧 VERIFICATION FAILED - AUTO-RETRY (attempt ${this.verificationRetryCount}/${this.maxVerificationRetries})
1733
+
1734
+ The following claims could not be verified:
1735
+
1736
+ ${failureDetails}
1737
+
1738
+ Think through this carefully, then:
1739
+ 1. Analyze why each operation failed (check files, errors, state)
1740
+ 2. Identify the root cause
1741
+ 3. Fix the underlying issue
1742
+ 4. Re-execute the failed operation(s)
1743
+ 5. Verify the fix worked`;
1744
+ display.showSystemMessage(`\n🔧 Auto-retry: Generating fix strategy for ${failedResults.length} failed claim${failedResults.length > 1 ? 's' : ''}...`);
1745
+ // Queue the fix request
1746
+ this.followUpQueue.push({ type: 'request', text: fixMessage });
1747
+ this.scheduleQueueProcessing();
1748
+ }
1559
1749
  showImprovementSuggestions() {
1560
1750
  const suggestions = this.alphaZeroMetrics.getImprovementSuggestions();
1561
1751
  if (suggestions.length === 0) {
@@ -2194,7 +2384,6 @@ export class InteractiveShell {
2194
2384
  display.showInfo(`Switched to ${preset.label}.`);
2195
2385
  this.refreshBannerSessionInfo();
2196
2386
  this.persistSessionPreference();
2197
- this.resetChatBoxAfterModelSwap();
2198
2387
  }
2199
2388
  }
2200
2389
  async handleSecretSelection(input) {
@@ -2255,9 +2444,7 @@ export class InteractiveShell {
2255
2444
  const deferred = this.pendingSecretRetry;
2256
2445
  this.pendingSecretRetry = null;
2257
2446
  if (pending.secret.providers.includes(this.sessionState.provider)) {
2258
- if (this.rebuildAgent()) {
2259
- this.resetChatBoxAfterModelSwap();
2260
- }
2447
+ this.rebuildAgent();
2261
2448
  }
2262
2449
  if (deferred) {
2263
2450
  await deferred();
@@ -2285,25 +2472,20 @@ export class InteractiveShell {
2285
2472
  return;
2286
2473
  }
2287
2474
  this.isProcessing = true;
2475
+ this.resetThinkingState(); // Reset thinking block styling state
2288
2476
  const requestStartTime = Date.now(); // Alpha Zero 2 timing
2289
- // Keep persistent prompt visible during processing so users can type follow-up requests
2290
- // The prompt will show a "processing" indicator but remain interactive
2291
- this.persistentPrompt.updateStatusBar({ message: '⏳ Processing... (type to queue follow-up)' });
2292
2477
  // Update pinned chat box to show processing state
2293
- // Clear the input display since the request was already submitted
2294
- // Note: Don't set statusMessage here - the isProcessing flag already shows "⏳ Processing..."
2295
2478
  this.pinnedChatBox.setProcessing(true);
2296
- this.pinnedChatBox.setStatusMessage(null); // Clear any previous status to avoid duplication
2479
+ this.pinnedChatBox.setStatusMessage(null);
2297
2480
  this.pinnedChatBox.clearInput();
2481
+ // Add newline so user's submitted input stays visible
2482
+ // (readline already displayed their input, we just need to preserve it)
2483
+ process.stdout.write('\n');
2484
+ // Note: Don't render pinned box during streaming - it interferes with content
2485
+ // The spinner will handle showing activity
2298
2486
  this.uiAdapter.startProcessing('Working on your request');
2299
2487
  this.setProcessingStatus();
2300
2488
  try {
2301
- display.newLine();
2302
- // Pinned chat box already shows processing state - skip redundant spinner
2303
- // which would conflict with the pinned area at terminal bottom
2304
- // display.showThinking('Working on your request...');
2305
- // Force render the pinned chat box to ensure it's visible during processing
2306
- this.pinnedChatBox.forceRender();
2307
2489
  // Enable streaming for real-time text output (Claude Code style)
2308
2490
  await agent.send(request, true);
2309
2491
  await this.awaitPendingCleanup();
@@ -2325,17 +2507,15 @@ export class InteractiveShell {
2325
2507
  this.isProcessing = false;
2326
2508
  this.uiAdapter.endProcessing('Ready for prompts');
2327
2509
  this.setIdleStatus();
2328
- display.newLine();
2510
+ // Clear the pinned processing box before showing final output
2511
+ this.pinnedChatBox.clear();
2512
+ this.pinnedChatBox.setProcessing(false);
2513
+ this.pinnedChatBox.setStatusMessage(null);
2329
2514
  // Clear the processing status and ensure persistent prompt is visible
2330
2515
  this.persistentPrompt.updateStatusBar({ message: undefined });
2331
2516
  this.persistentPrompt.show();
2332
- // Update pinned chat box to show ready state and force render
2333
- this.pinnedChatBox.setProcessing(false);
2334
- this.pinnedChatBox.setStatusMessage(null);
2335
- this.pinnedChatBox.forceRender();
2336
2517
  // CRITICAL: Ensure readline prompt is active for user input
2337
- // Call ensureReadlineReady to resume stdin if paused and re-enable keypress
2338
- this.ensureReadlineReady();
2518
+ // This is a safety net in case the caller doesn't call rl.prompt()
2339
2519
  this.rl.prompt();
2340
2520
  this.scheduleQueueProcessing();
2341
2521
  this.refreshQueueIndicators();
@@ -2551,13 +2731,13 @@ What's the next action?`;
2551
2731
  // Clear the processing status and ensure persistent prompt is visible
2552
2732
  this.persistentPrompt.updateStatusBar({ message: undefined });
2553
2733
  this.persistentPrompt.show();
2554
- // Update pinned chat box to show ready state and force render
2734
+ // Clear streaming status line (Claude Code style)
2735
+ display.clearStreamingStatus();
2736
+ // Update pinned chat box to show ready state
2555
2737
  this.pinnedChatBox.setProcessing(false);
2556
2738
  this.pinnedChatBox.setStatusMessage(null);
2557
- this.pinnedChatBox.forceRender();
2558
2739
  // CRITICAL: Ensure readline prompt is active for user input
2559
- // Call ensureReadlineReady to resume stdin if paused and re-enable keypress
2560
- this.ensureReadlineReady();
2740
+ // This is a safety net in case the caller doesn't call rl.prompt()
2561
2741
  this.rl.prompt();
2562
2742
  this.scheduleQueueProcessing();
2563
2743
  this.refreshQueueIndicators();
@@ -2728,9 +2908,6 @@ What's the next action?`;
2728
2908
  systemPrompt: this.buildSystemPrompt(),
2729
2909
  reasoningEffort: this.sessionState.reasoningEffort,
2730
2910
  };
2731
- // Track streaming state for thinking vs content transitions
2732
- let wasThinking = false;
2733
- let thinkingHeaderShown = false;
2734
2911
  this.agent = this.runtimeSession.createAgent(selection, {
2735
2912
  onStreamChunk: (chunk) => {
2736
2913
  // Stream text directly to console for real-time display (Claude Code style)
@@ -2741,76 +2918,46 @@ What's the next action?`;
2741
2918
  display.stopThinking(false);
2742
2919
  process.stdout.write('\n'); // Newline after spinner
2743
2920
  }
2744
- // If transitioning from thinking to content, add separator
2745
- if (wasThinking) {
2746
- process.stdout.write('\n\n'); // Double newline to separate thinking from answer
2747
- wasThinking = false;
2748
- thinkingHeaderShown = false;
2749
- }
2750
- process.stdout.write(chunk);
2751
- });
2752
- },
2753
- onThinkingChunk: (thinking) => {
2754
- // Display thinking/reasoning content from models (DeepSeek-reasoner, GPT-5, Claude extended thinking)
2755
- // Stream the thinking content in a muted style to show Chain of Thought
2756
- display.safeWrite(() => {
2757
- // Stop spinner on first thinking chunk to show streaming content
2758
- if (display.isSpinnerActive()) {
2759
- display.stopThinking(false);
2760
- process.stdout.write('\n'); // Newline after spinner
2761
- }
2762
- // Show thinking header once
2763
- if (!thinkingHeaderShown) {
2764
- process.stdout.write(theme.ui.muted('💭 Thinking...\n'));
2765
- thinkingHeaderShown = true;
2766
- }
2767
- // Stream thinking content in muted style (distinguish from final answer)
2768
- process.stdout.write(theme.ui.muted(thinking));
2769
- wasThinking = true;
2921
+ // Style thinking blocks (Claude Code style)
2922
+ const styledChunk = this.styleStreamingChunk(chunk);
2923
+ process.stdout.write(styledChunk);
2770
2924
  });
2771
2925
  },
2772
2926
  onAssistantMessage: (content, metadata) => {
2773
2927
  const enriched = this.buildDisplayMetadata(metadata);
2774
2928
  // Update spinner based on message type
2775
2929
  if (metadata.isFinal) {
2776
- // Skip display if content was already streamed to avoid double-display
2777
- if (!metadata.wasStreamed) {
2778
- const parsed = this.splitThinkingResponse(content);
2779
- if (parsed?.thinking) {
2780
- const summary = this.extractThoughtSummary(parsed.thinking);
2781
- if (summary) {
2782
- display.updateThinking(`💭 ${summary}`);
2783
- }
2784
- display.showAssistantMessage(parsed.thinking, { ...enriched, isFinal: false });
2785
- }
2786
- const finalContent = parsed?.response?.trim() || content;
2787
- if (finalContent) {
2788
- display.showAssistantMessage(finalContent, enriched);
2789
- }
2930
+ const parsed = this.splitThinkingResponse(content);
2931
+ // Don't re-display thinking - it was already streamed in real-time
2932
+ // Just extract the response part
2933
+ const finalContent = parsed?.response?.trim() || content.replace(/<thinking>[\s\S]*?<\/thinking>/gi, '').trim();
2934
+ if (finalContent) {
2935
+ display.showAssistantMessage(finalContent, enriched);
2790
2936
  }
2791
- // Show status line at end (Claude Code style: "• Context X% used • Ready for prompts (2s)")
2937
+ // Store last response for verification
2938
+ this.lastAssistantResponse = content;
2939
+ // Auto-verify if response contains verifiable claims
2940
+ this.scheduleAutoVerification(content);
2941
+ // Show status line at end (Claude Code style: "Session 5m • Context X% used • Ready for prompts (2s)")
2792
2942
  display.stopThinking();
2793
- // Calculate context usage
2794
- let contextInfo;
2943
+ // Calculate context usage and session time
2944
+ const sessionElapsedMs = Date.now() - this.sessionStartTime;
2945
+ let contextInfo = { sessionElapsedMs };
2795
2946
  if (enriched.contextWindowTokens && metadata.usage) {
2796
2947
  const total = this.totalTokens(metadata.usage);
2797
2948
  if (total && total > 0) {
2798
2949
  const percentage = Math.round((total / enriched.contextWindowTokens) * 100);
2799
- contextInfo = { percentage, tokens: total };
2950
+ contextInfo = { ...contextInfo, percentage, tokens: total };
2800
2951
  }
2801
2952
  }
2802
2953
  display.showStatusLine('Ready for prompts', enriched.elapsedMs, contextInfo);
2803
2954
  }
2804
2955
  else {
2805
- // Non-final message = narrative text before tool calls (Claude Code style)
2806
- // Stop spinner and show the narrative text directly
2956
+ // Non-final message = narrative text before tool calls
2957
+ // This content was already streamed in real-time via onStreamChunk
2958
+ // Don't display it again - just stop the spinner and continue
2807
2959
  display.stopThinking();
2808
- // Skip display if content was already streamed to avoid double-display
2809
- if (!metadata.wasStreamed) {
2810
- display.showNarrative(content.trim());
2811
- }
2812
- // The isProcessing flag already shows "⏳ Processing..." - no need for duplicate status
2813
- this.pinnedChatBox.forceRender();
2960
+ // Continue processing - content already shown via streaming
2814
2961
  return;
2815
2962
  }
2816
2963
  const cleanup = this.handleContextTelemetry(metadata, enriched);
@@ -2881,18 +3028,6 @@ What's the next action?`;
2881
3028
  return false;
2882
3029
  }
2883
3030
  }
2884
- /**
2885
- * Reset the pinned chat box to a fresh state after model/provider swap.
2886
- * Ensures the input box is properly visible and ready for input,
2887
- * just like on fresh startup.
2888
- */
2889
- resetChatBoxAfterModelSwap() {
2890
- this.pinnedChatBox.setStatusMessage(null);
2891
- this.pinnedChatBox.setProcessing(false);
2892
- this.pinnedChatBox.show();
2893
- this.pinnedChatBox.forceRender();
2894
- this.ensureReadlineReady();
2895
- }
2896
3031
  buildSystemPrompt() {
2897
3032
  const providerLabel = this.providerLabel(this.sessionState.provider);
2898
3033
  const lines = [
@@ -3254,27 +3389,6 @@ What's the next action?`;
3254
3389
  const fileChangesText = `${summary.files} file${summary.files === 1 ? '' : 's'} +${summary.additions} -${summary.removals}`;
3255
3390
  this.persistentPrompt.updateStatusBar({ fileChanges: fileChangesText });
3256
3391
  }
3257
- extractThoughtSummary(thought) {
3258
- // Extract first non-empty line
3259
- const lines = thought?.split('\n').filter(line => line.trim()) ?? [];
3260
- if (!lines.length) {
3261
- return null;
3262
- }
3263
- // Remove common thought prefixes
3264
- const cleaned = lines[0]
3265
- .trim()
3266
- .replace(/^(Thinking|Analyzing|Considering|Looking at|Let me)[:.\s]+/i, '')
3267
- .replace(/^I (should|need to|will|am)[:.\s]+/i, '')
3268
- .trim();
3269
- if (!cleaned) {
3270
- return null;
3271
- }
3272
- // Truncate to reasonable length
3273
- const maxLength = 50;
3274
- return cleaned.length > maxLength
3275
- ? cleaned.slice(0, maxLength - 3) + '...'
3276
- : cleaned;
3277
- }
3278
3392
  splitThinkingResponse(content) {
3279
3393
  if (!content?.includes('<thinking') && !content?.includes('<response')) {
3280
3394
  return null;
@@ -3297,6 +3411,61 @@ What's the next action?`;
3297
3411
  response: responseBody ?? '',
3298
3412
  };
3299
3413
  }
3414
+ /**
3415
+ * Style streaming chunks in real-time (Claude Code style)
3416
+ * Detects <thinking> blocks and applies cyan styling, hides XML tags
3417
+ */
3418
+ styleStreamingChunk(chunk) {
3419
+ let result = '';
3420
+ let remaining = chunk;
3421
+ while (remaining.length > 0) {
3422
+ if (this.isInsideThinkingBlock) {
3423
+ // Look for </thinking> end tag
3424
+ const endIdx = remaining.indexOf('</thinking>');
3425
+ if (endIdx !== -1) {
3426
+ // End of thinking block found
3427
+ const thinkingContent = remaining.slice(0, endIdx);
3428
+ // Apply cyan thinking styling to content (hide the closing tag)
3429
+ result += theme.thinking.text(thinkingContent);
3430
+ remaining = remaining.slice(endIdx + '</thinking>'.length);
3431
+ this.isInsideThinkingBlock = false;
3432
+ // Add separator and newline after thinking block ends
3433
+ result += `\n${theme.thinking.border('─'.repeat(40))}\n`;
3434
+ }
3435
+ else {
3436
+ // Still inside thinking block, apply cyan styling to all remaining
3437
+ result += theme.thinking.text(remaining);
3438
+ remaining = '';
3439
+ }
3440
+ }
3441
+ else {
3442
+ // Look for <thinking> start tag
3443
+ const startIdx = remaining.indexOf('<thinking>');
3444
+ if (startIdx !== -1) {
3445
+ // Output text before thinking tag normally
3446
+ if (startIdx > 0) {
3447
+ result += remaining.slice(0, startIdx);
3448
+ }
3449
+ // Show thinking header with cyan styling (Claude Code style)
3450
+ result += `${theme.thinking.icon('💭')} ${theme.thinking.label('Thinking')}\n`;
3451
+ remaining = remaining.slice(startIdx + '<thinking>'.length);
3452
+ this.isInsideThinkingBlock = true;
3453
+ }
3454
+ else {
3455
+ // No thinking tag, output normally
3456
+ result += remaining;
3457
+ remaining = '';
3458
+ }
3459
+ }
3460
+ }
3461
+ return result;
3462
+ }
3463
+ /**
3464
+ * Reset thinking block state (call at start of new request)
3465
+ */
3466
+ resetThinkingState() {
3467
+ this.isInsideThinkingBlock = false;
3468
+ }
3300
3469
  persistSessionPreference() {
3301
3470
  saveModelPreference(this.profile, {
3302
3471
  provider: this.sessionState.provider,
@@ -3351,7 +3520,6 @@ What's the next action?`;
3351
3520
  this.persistSessionPreference();
3352
3521
  this.refreshBannerSessionInfo();
3353
3522
  display.showInfo(`Switched from ${this.providerLabel(oldProvider)}/${oldModel} to ${match.label}/${defaultModel.id}`);
3354
- this.resetChatBoxAfterModelSwap();
3355
3523
  }
3356
3524
  else {
3357
3525
  // Revert on failure