erosolar-cli 2.1.193 → 2.1.194

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. package/README.md +54 -0
  2. package/dist/capabilities/offsecOpsCapability.d.ts +2 -2
  3. package/dist/capabilities/offsecOpsCapability.d.ts.map +1 -1
  4. package/dist/capabilities/offsecOpsCapability.js +6 -7
  5. package/dist/capabilities/offsecOpsCapability.js.map +1 -1
  6. package/dist/contracts/models.schema.json +9 -0
  7. package/dist/core/agent.d.ts +31 -12
  8. package/dist/core/agent.d.ts.map +1 -1
  9. package/dist/core/agent.js +71 -84
  10. package/dist/core/agent.js.map +1 -1
  11. package/dist/core/agentOrchestrator.d.ts +49 -0
  12. package/dist/core/agentOrchestrator.d.ts.map +1 -0
  13. package/dist/core/agentOrchestrator.js +313 -0
  14. package/dist/core/agentOrchestrator.js.map +1 -0
  15. package/dist/core/alphaZeroOrchestrator.d.ts +140 -0
  16. package/dist/core/alphaZeroOrchestrator.d.ts.map +1 -0
  17. package/dist/core/alphaZeroOrchestrator.js +418 -0
  18. package/dist/core/alphaZeroOrchestrator.js.map +1 -0
  19. package/dist/core/schemaValidator.d.ts +5 -0
  20. package/dist/core/schemaValidator.d.ts.map +1 -1
  21. package/dist/core/schemaValidator.js +65 -0
  22. package/dist/core/schemaValidator.js.map +1 -1
  23. package/dist/core/taskCompletionDetector.d.ts +106 -0
  24. package/dist/core/taskCompletionDetector.d.ts.map +1 -0
  25. package/dist/core/taskCompletionDetector.js +402 -0
  26. package/dist/core/taskCompletionDetector.js.map +1 -0
  27. package/dist/core/toolRuntime.d.ts +4 -0
  28. package/dist/core/toolRuntime.d.ts.map +1 -1
  29. package/dist/core/toolRuntime.js +40 -64
  30. package/dist/core/toolRuntime.js.map +1 -1
  31. package/dist/core/types.d.ts +14 -0
  32. package/dist/core/types.d.ts.map +1 -1
  33. package/dist/core/types.js.map +1 -1
  34. package/dist/providers/anthropicProvider.d.ts +8 -1
  35. package/dist/providers/anthropicProvider.d.ts.map +1 -1
  36. package/dist/providers/anthropicProvider.js +51 -0
  37. package/dist/providers/anthropicProvider.js.map +1 -1
  38. package/dist/providers/googleProvider.d.ts +7 -1
  39. package/dist/providers/googleProvider.d.ts.map +1 -1
  40. package/dist/providers/googleProvider.js +41 -0
  41. package/dist/providers/googleProvider.js.map +1 -1
  42. package/dist/providers/openaiChatCompletionsProvider.d.ts +7 -1
  43. package/dist/providers/openaiChatCompletionsProvider.d.ts.map +1 -1
  44. package/dist/providers/openaiChatCompletionsProvider.js +44 -0
  45. package/dist/providers/openaiChatCompletionsProvider.js.map +1 -1
  46. package/dist/shell/interactiveShell.d.ts +24 -4
  47. package/dist/shell/interactiveShell.d.ts.map +1 -1
  48. package/dist/shell/interactiveShell.js +437 -191
  49. package/dist/shell/interactiveShell.js.map +1 -1
  50. package/dist/shell/shellApp.js +8 -1
  51. package/dist/shell/shellApp.js.map +1 -1
  52. package/dist/tools/bashTools.d.ts +0 -1
  53. package/dist/tools/bashTools.d.ts.map +1 -1
  54. package/dist/tools/bashTools.js +3 -47
  55. package/dist/tools/bashTools.js.map +1 -1
  56. package/dist/tools/buildTools.js +1 -1
  57. package/dist/tools/buildTools.js.map +1 -1
  58. package/dist/tools/grepTools.js +6 -4
  59. package/dist/tools/grepTools.js.map +1 -1
  60. package/dist/tools/repoChecksTools.d.ts.map +1 -1
  61. package/dist/tools/repoChecksTools.js +5 -7
  62. package/dist/tools/repoChecksTools.js.map +1 -1
  63. package/dist/ui/ShellUIAdapter.d.ts +3 -2
  64. package/dist/ui/ShellUIAdapter.d.ts.map +1 -1
  65. package/dist/ui/ShellUIAdapter.js +9 -7
  66. package/dist/ui/ShellUIAdapter.js.map +1 -1
  67. package/package.json +1 -1
  68. package/dist/runtime/flowOrchestrator.d.ts +0 -46
  69. package/dist/runtime/flowOrchestrator.d.ts.map +0 -1
  70. package/dist/runtime/flowOrchestrator.js +0 -80
  71. package/dist/runtime/flowOrchestrator.js.map +0 -1
  72. package/dist/shell/taskCompletionDetector.d.ts +0 -52
  73. package/dist/shell/taskCompletionDetector.d.ts.map +0 -1
  74. package/dist/shell/taskCompletionDetector.js +0 -131
  75. package/dist/shell/taskCompletionDetector.js.map +0 -1
@@ -4,6 +4,7 @@ import { exec } from 'node:child_process';
4
4
  import { promisify } from 'node:util';
5
5
  import { existsSync, readFileSync, statSync, writeFileSync } from 'node:fs';
6
6
  import { join, resolve } from 'node:path';
7
+ import { AgentOrchestrator } from '../core/agentOrchestrator.js';
7
8
  import { display } from '../ui/display.js';
8
9
  import { theme } from '../ui/theme.js';
9
10
  import { getTerminalColumns } from '../ui/layout.js';
@@ -12,13 +13,11 @@ import { ensureSecretForProvider, getSecretDefinitionForProvider, getSecretValue
12
13
  import { saveActiveProfilePreference, saveModelPreference, loadToolSettings, saveToolSettings, clearToolSettings, clearActiveProfilePreference, loadSessionPreferences, saveSessionPreferences, loadFeatureFlags, saveFeatureFlags, toggleFeatureFlag, FEATURE_FLAG_INFO, } from '../core/preferences.js';
13
14
  import { getLearningSummary, getRecentLearning, commitLearning, exportAllLearning, getLearningDir, } from '../core/learningPersistence.js';
14
15
  import { buildEnabledToolSet, evaluateToolPermissions, getToolToggleOptions, } from '../capabilities/toolRegistry.js';
15
- import { FlowOrchestrator } from '../runtime/flowOrchestrator.js';
16
16
  import { detectApiKeyError } from '../core/errors/apiKeyErrors.js';
17
17
  import { detectPromptBlockError, } from '../core/errors/promptBlockErrors.js';
18
18
  import { detectNetworkError } from '../core/errors/networkErrors.js';
19
19
  import { buildWorkspaceContext } from '../workspace.js';
20
20
  import { buildInteractiveSystemPrompt } from './systemPrompt.js';
21
- import { getTaskCompletionDetector, resetTaskCompletionDetector, WRITE_TOOLS, } from './taskCompletionDetector.js';
22
21
  import { discoverAllModels, quickCheckProviders, getCachedDiscoveredModels, sortModelsByPriority } from '../core/modelDiscovery.js';
23
22
  import { getModels, getSlashCommands, getProviders } from '../core/agentSchemaLoader.js';
24
23
  import { loadMcpServers } from '../mcp/config.js';
@@ -37,8 +36,8 @@ import { analyzeImprovementOpportunities, runSelfImprovementCycle, getImprovemen
37
36
  import { listAvailablePlugins } from '../plugins/index.js';
38
37
  import { isValidSourceRepo, getRepoName, analyzeSource, runSelfEvolution, stopEvolution, getEvolutionStatus, emergencyEvolutionRollback, learnSourcePatterns, generateFix, } from '../core/selfEvolution.js';
39
38
  import { analyzeTokenUsage, discoverModularTargets, getModularStatusDisplay, generateContextOptimizations, getGuidelines, deleteGuideline, getPendingActions, executeModularAction, } from '../core/alphaZeroModular.js';
40
- import { startOffsecRun, resumeOffsecRun, recordOffsecOutcome, getOffsecNextActions, simulateOffsecRollout, formatOffsecStatus, listOffsecRuns, } from '../core/offsecAlphaZero.js';
41
39
  import { generateTestFlows, detectBugs, detectUIUpdates, saveTestFlows, saveBugReports, saveUIUpdates, getTestFlowStatus, } from '../core/intelligentTestFlows.js';
40
+ import { startOffsecRun, resumeOffsecRun, listOffsecRuns, getOffsecNextActions, simulateOffsecRollout, recordOffsecOutcome, formatOffsecStatus, } from '../core/offsecAlphaZero.js';
42
41
  import { PromptController } from '../ui/PromptController.js';
43
42
  import { enterStreamingMode, exitStreamingMode, isStreamingMode } from '../ui/globalWriteLock.js';
44
43
  import { setGlobalAIEnhancer } from '../tools/localExplore.js';
@@ -86,10 +85,6 @@ const CONTEXT_CLEANUP_SYSTEM_PROMPT = `Summarize earlier IDE collaboration so th
86
85
  - Respond in plain Markdown only (no tool or shell calls).`;
87
86
  const MAX_ATTACHMENT_BYTES = 200 * 1024; // 200KB per attachment
88
87
  const MAX_ATTACHMENT_CHARS = 16000; // Guardrail to avoid flooding context
89
- const WRITE_TOOL_NAMES = new Set(Array.from(WRITE_TOOLS)
90
- .map((tool) => tool.toLowerCase())
91
- // Bash/execute commands can be read-only; guard only on clear mutating tools
92
- .filter((tool) => !tool.includes('bash') && !tool.startsWith('execute')));
93
88
  export class InteractiveShell {
94
89
  agent = null;
95
90
  profile;
@@ -124,7 +119,6 @@ export class InteractiveShell {
124
119
  uiUpdates;
125
120
  _fileChangeTracker = new FileChangeTracker(); // Reserved for future file tracking features
126
121
  alphaZeroMetrics; // Alpha Zero 2 performance tracking
127
- flowOrchestrator = new FlowOrchestrator();
128
122
  maxNetworkRetries = 2;
129
123
  statusSubscription = null;
130
124
  followUpQueue = [];
@@ -140,6 +134,7 @@ export class InteractiveShell {
140
134
  lastContextWarningLevel = null;
141
135
  sessionPreferences;
142
136
  autosaveEnabled;
137
+ orchestrationNoticeShown = false;
143
138
  verificationEnabled = false;
144
139
  criticalApprovalMode = 'auto';
145
140
  editGuardMode = 'display-edits';
@@ -1629,7 +1624,10 @@ export class InteractiveShell {
1629
1624
  return `${provider} · ${this.sessionState.model}`;
1630
1625
  }
1631
1626
  refreshContextGauge() {
1632
- const tokens = getContextWindowTokens(this.sessionState.model);
1627
+ // First try to get context window from provider API (real value)
1628
+ // Fall back to static model mapping only if provider API is not available
1629
+ const providerContextWindow = this.agent?.getModelInfo()?.contextWindow;
1630
+ const tokens = providerContextWindow ?? getContextWindowTokens(this.sessionState.model);
1633
1631
  const normalizedTokens = typeof tokens === 'number' && Number.isFinite(tokens) ? tokens : null;
1634
1632
  this.activeContextWindowTokens = normalizedTokens;
1635
1633
  if (normalizedTokens !== null) {
@@ -1639,6 +1637,28 @@ export class InteractiveShell {
1639
1637
  };
1640
1638
  }
1641
1639
  }
1640
+ /**
1641
+ * Fetch real context window from provider API and update the gauge.
1642
+ * This should be called after the agent is created.
1643
+ */
1644
+ async fetchAndUpdateContextWindow() {
1645
+ if (!this.agent) {
1646
+ return;
1647
+ }
1648
+ try {
1649
+ const contextWindow = await this.agent.getContextWindowFromProvider();
1650
+ if (contextWindow !== null && Number.isFinite(contextWindow)) {
1651
+ this.activeContextWindowTokens = contextWindow;
1652
+ this.latestTokenUsage = {
1653
+ used: this.latestTokenUsage.used,
1654
+ limit: contextWindow,
1655
+ };
1656
+ }
1657
+ }
1658
+ catch {
1659
+ // Ignore errors - keep using static mapping
1660
+ }
1661
+ }
1642
1662
  updateContextUsage(percentage, autoCompactThreshold = CONTEXT_AUTOCOMPACT_PERCENT) {
1643
1663
  this.uiAdapter.updateContextUsage(percentage);
1644
1664
  this.terminalInput.setContextUsage(percentage);
@@ -1997,9 +2017,8 @@ export class InteractiveShell {
1997
2017
  return;
1998
2018
  }
1999
2019
  const isReasoning = type === 'reasoning';
2000
- // Approximate token count (roughly 4 chars per token)
2001
- this.streamingTokenCount += Math.ceil(chunk.length / 4);
2002
- this.renderer?.updateStreamingTokens(this.streamingTokenCount);
2020
+ // Token count is updated from real provider usage data in onAssistantMessage
2021
+ // Do NOT estimate tokens from chunk length - wait for actual API response
2003
2022
  // Keep pinned status updated for all streaming chunks
2004
2023
  this.updateStreamingStatusFromChunk(chunk);
2005
2024
  // Handle <thinking> tags as separate events in the queue
@@ -2276,14 +2295,17 @@ export class InteractiveShell {
2276
2295
  // Check for continuous/infinite loop commands or auto-escalation to completion mode
2277
2296
  const explicitContinuous = this.isContinuousCommand(trimmed);
2278
2297
  const autoContinuous = this.shouldAutoRunToCompletion(trimmed);
2279
- if (explicitContinuous || autoContinuous) {
2280
- if (autoContinuous && !explicitContinuous) {
2281
- display.showSystemMessage('⚡ Actionable request detected; running continuously until complete (Ctrl+C to stop).');
2282
- }
2298
+ if (explicitContinuous) {
2283
2299
  await this.processContinuousRequest(trimmed);
2284
2300
  this.syncRendererInput();
2285
2301
  return;
2286
2302
  }
2303
+ if (autoContinuous) {
2304
+ display.showSystemMessage('⚡ Actionable request detected; orchestrating until complete (Ctrl+C to stop).');
2305
+ await this.processRequest(trimmed, { orchestrate: true });
2306
+ this.syncRendererInput();
2307
+ return;
2308
+ }
2287
2309
  // Direct execution for all inputs, including multi-line pastes
2288
2310
  await this.processRequest(trimmed);
2289
2311
  this.syncRendererInput();
@@ -2320,19 +2342,113 @@ export class InteractiveShell {
2320
2342
  ];
2321
2343
  const strongMaintenanceIntent = maintenancePatterns.some((pattern) => pattern.test(normalized));
2322
2344
  // General action intent in a code context
2323
- const actionVerb = /\b(fix|resolve|address|refactor|implement|upgrade|migrate|optimi[sz]e|modernize|stabilize|harden|ship|complete|finish|clean|remove|delete|prune|tidy|audit|cleanup)\b/;
2345
+ const actionVerb = /\b(fix|resolve|address|refactor|implement|upgrade|migrate|optimi[sz]e|modernize|stabilize|harden|ship|complete|finish|clean|remove|delete|prune|tidy|audit|cleanup|debug|investigate|triage|scan|check|diagnos(e|is))\b/;
2324
2346
  const codeContext = /\b(repo|codebase|project|app|service|package|module|component|workspace|cli|tests?|source|files?|artifacts?)\b/;
2325
- const strongActionIntent = actionVerb.test(normalized) && codeContext.test(normalized) && normalized.length > 20;
2326
- // Allow maintenance/refactor tasks to auto-run even if phrased as questions ("got any junk to remove?")
2327
- if (strongMaintenanceIntent || strongActionIntent) {
2328
- // But still avoid purely informational questions like "what is this repo?"
2329
- if (startsWithInfoWord && !strongMaintenanceIntent) {
2330
- return false;
2331
- }
2332
- return true;
2347
+ const hasActionVerb = actionVerb.test(normalized);
2348
+ const hasCodeContext = codeContext.test(normalized);
2349
+ // Bug/issue hunt or health-check intents, even when phrased as questions
2350
+ const bugSweepIntent = /\b(got\s+any|any|open|known)\s+(bugs?|issues?|defects?)\b/;
2351
+ const triageIntent = /\b(find|check|hunt|triage|detect|scan|review|look\s+for)\s+(bugs?|issues?|errors?|failures?)\b/;
2352
+ const failingTestsIntent = /\b(failing|broken|red)\s+tests?\b|\btests?\s+(are\s+)?failing\b/;
2353
+ const qualitySweepIntent = /\b(audit|health\s+check|stability\s+check|bug\s+scan|issue\s+scan|regression\s+pass)\b/;
2354
+ const bugOrHealthIntent = [bugSweepIntent, triageIntent, failingTestsIntent, qualitySweepIntent].some((pattern) => pattern.test(normalized));
2355
+ // Score-based intent detection keeps questions like "what is this repo?" out
2356
+ let actionScore = 0;
2357
+ if (strongMaintenanceIntent)
2358
+ actionScore += 2;
2359
+ if (bugOrHealthIntent)
2360
+ actionScore += 2;
2361
+ if (hasActionVerb)
2362
+ actionScore += 1;
2363
+ if (hasCodeContext)
2364
+ actionScore += 1;
2365
+ const decisiveAction = actionScore >= 2 || (actionScore === 1 && !isQuestion && normalized.length > 10);
2366
+ if (!decisiveAction) {
2367
+ return false;
2333
2368
  }
2334
- return false;
2369
+ // Respect informational openers unless intent is clearly actionable
2370
+ if (startsWithInfoWord && actionScore < 3 && !bugOrHealthIntent && !strongMaintenanceIntent) {
2371
+ return false;
2372
+ }
2373
+ return true;
2374
+ }
2375
+ shouldContinueOrchestrating(originalRequest, result) {
2376
+ if (!result) {
2377
+ return { shouldContinue: false, reason: null };
2378
+ }
2379
+ const actionable = this.shouldAutoRunToCompletion(originalRequest);
2380
+ if (!actionable) {
2381
+ const incomplete = result.exitReason !== 'complete';
2382
+ return { shouldContinue: incomplete, reason: incomplete ? result.exitReason : null };
2383
+ }
2384
+ if (result.exitReason !== 'complete') {
2385
+ return { shouldContinue: true, reason: result.exitReason };
2386
+ }
2387
+ const executedTools = result.passes.some(pass => (pass.toolsUsed?.length ?? 0) > 0);
2388
+ const lastPass = result.passes[result.passes.length - 1];
2389
+ const lastPlanOnly = Boolean(lastPass?.planOnly) && !lastPass?.tookAction && (!lastPass?.toolsUsed?.length);
2390
+ const lastEmpty = !lastPass?.response?.trim();
2391
+ if (!executedTools) {
2392
+ return { shouldContinue: true, reason: 'no-action' };
2393
+ }
2394
+ if (lastPlanOnly) {
2395
+ return { shouldContinue: true, reason: 'plan-only' };
2396
+ }
2397
+ if (lastEmpty) {
2398
+ return { shouldContinue: true, reason: 'empty-response' };
2399
+ }
2400
+ return { shouldContinue: false, reason: null };
2401
+ }
2402
+ describeContinuationReason(reason) {
2403
+ switch (reason) {
2404
+ case 'no-action':
2405
+ return 'No tools or concrete actions were executed; continuing until real work is done.';
2406
+ case 'plan-only':
2407
+ return 'Last pass was just planning; executing the next concrete step now.';
2408
+ case 'empty-response':
2409
+ case 'empty':
2410
+ return 'Previous pass returned nothing; resuming with a concrete action.';
2411
+ case 'stalled':
2412
+ return 'Previous orchestration stalled; forcing continuation.';
2413
+ case 'max-passes':
2414
+ return 'Reached pass limit; extending the run to finish the task.';
2415
+ default:
2416
+ return 'Continuing orchestration until the task is actually finished.';
2417
+ }
2418
+ }
2419
+ buildForcedContinuationPrompt(originalRequest, lastResult, continuationReason) {
2420
+ const lastPass = lastResult.passes[lastResult.passes.length - 1];
2421
+ const rawResponse = lastPass?.response?.trim() ?? '';
2422
+ const truncatedResponse = rawResponse ? rawResponse.slice(0, 1200) : '';
2423
+ const responseNote = rawResponse && rawResponse.length > truncatedResponse.length
2424
+ ? '\n\n[Last response truncated]'
2425
+ : '';
2426
+ const usedTools = lastResult.passes.flatMap(pass => pass.toolsUsed ?? []);
2427
+ const hasToolUsage = usedTools.length > 0;
2428
+ const toolsUsed = hasToolUsage
2429
+ ? `Tools used so far: ${usedTools.join(', ')}.`
2430
+ : 'No tools have been used yet - start with a quick workspace scan (list_files + glob/grep/search) and then take concrete actions now.';
2431
+ const responseSection = truncatedResponse
2432
+ ? `Last response snapshot:\n${truncatedResponse}${responseNote}\n`
2433
+ : '';
2434
+ const exitReason = continuationReason ?? lastResult.exitReason ?? 'incomplete';
2435
+ const reasonLine = this.describeContinuationReason(exitReason);
2436
+ const missingActionLine = hasToolUsage
2437
+ ? ''
2438
+ : 'No tool calls or file edits have been made yet. Start with list_files + glob/grep/search to sweep the repo, then run a command (read/search/test/edit) and show the output before summarizing.';
2439
+ return `${originalRequest.trim()}
2440
+
2441
+ The previous orchestration stopped early (reason: ${exitReason}). Continue working until the task is truly finished. Use tools, run checks, and only declare completion when nothing remains.
2442
+
2443
+ ${reasonLine}
2444
+ ${toolsUsed}
2445
+ ${responseSection}
2446
+ ${missingActionLine ? `${missingActionLine}\n` : ''}Resume with the next concrete action now.`;
2335
2447
  }
2448
+ /**
2449
+ * Decide whether to automatically continue execution when the model stops after a plan/summary
2450
+ * without taking actions. This keeps flows moving toward completion instead of stalling on planning.
2451
+ */
2336
2452
  isExitCommand(input) {
2337
2453
  const lower = input.trim().toLowerCase();
2338
2454
  return (lower === 'exit' ||
@@ -5780,22 +5896,20 @@ export class InteractiveShell {
5780
5896
  this.clearInlinePanel();
5781
5897
  this.syncRendererInput();
5782
5898
  }
5783
- async runFlowControlledTask(initialRequest, options) {
5784
- const { mode } = options;
5785
- const followUpType = mode === 'continuous' ? 'continuous' : 'request';
5899
+ async processRequest(request, options) {
5786
5900
  if (this.isProcessing) {
5787
- this.enqueueFollowUpAction({ type: followUpType, text: initialRequest });
5788
- return null;
5901
+ this.enqueueFollowUpAction({ type: 'request', text: request });
5902
+ return;
5789
5903
  }
5790
5904
  if (!this.agent && !this.rebuildAgent()) {
5791
5905
  display.showWarning('Configure an API key via /secrets before sending requests.');
5792
- return null;
5906
+ return;
5793
5907
  }
5794
5908
  this.inlinePanelScopeActive = false;
5795
5909
  this.clearInlinePanel();
5796
5910
  const agent = this.agent;
5797
5911
  if (!agent) {
5798
- return null;
5912
+ return;
5799
5913
  }
5800
5914
  this.toolsUsedThisRun = [];
5801
5915
  this.currentToolCalls = [];
@@ -5806,138 +5920,166 @@ export class InteractiveShell {
5806
5920
  else {
5807
5921
  this.resetNetworkRetryState();
5808
5922
  }
5923
+ // Reset per-request render tracking
5809
5924
  this.responseRendered = false;
5810
- if (this.shouldLogPrompt(initialRequest)) {
5811
- this.logUserPrompt(initialRequest);
5925
+ if (this.shouldLogPrompt(request)) {
5926
+ this.logUserPrompt(request);
5812
5927
  }
5813
5928
  this.isProcessing = true;
5814
5929
  this.uiUpdates.setMode('processing');
5815
- this.streamingTokenCount = 0;
5930
+ this.streamingTokenCount = 0; // Reset token counter for new request
5816
5931
  this.terminalInput.setStreaming(true);
5932
+ // Keep the persistent input/control bar active as we transition into streaming.
5817
5933
  this.syncRendererInput();
5818
5934
  this.renderer?.render();
5819
- const overallStartTime = Date.now();
5935
+ const requestStartTime = Date.now(); // Alpha Zero 2 timing
5936
+ // Clear previous parallel agents and start fresh for new request
5820
5937
  const parallelManager = getParallelAgentManager();
5821
5938
  parallelManager.clear();
5822
5939
  parallelManager.startBatch();
5823
- this.lastUserQuery = initialRequest;
5824
- this.currentTaskType = classifyTaskType(initialRequest);
5940
+ // AlphaZero: Track task for learning
5941
+ this.lastUserQuery = request;
5942
+ this.currentTaskType = classifyTaskType(request);
5825
5943
  this.currentToolCalls = [];
5826
5944
  this.clearToolUsageMeta();
5827
5945
  this.renderer?.setActivity('Starting...');
5828
- this.uiAdapter.startProcessing(mode === 'continuous' ? 'Continuous execution mode' : 'Working on your request');
5946
+ this.uiAdapter.startProcessing('Working on your request');
5829
5947
  this.setProcessingStatus();
5830
5948
  this.beginAiRuntime();
5831
- this.startStreamingHeartbeat(mode === 'continuous' ? 'Streaming' : 'Streaming response');
5832
- const completionDetector = getTaskCompletionDetector();
5833
- completionDetector.reset();
5834
- this.flowOrchestrator.start(initialRequest);
5835
- this.updateStatusMessage('Orchestrating request...');
5836
- if (mode === 'continuous') {
5837
- display.showSystemMessage('Continuous mode active. Ctrl+C to stop.');
5949
+ let responseText = '';
5950
+ let orchestratorResult = null;
5951
+ const orchestrate = options?.orchestrate ?? true;
5952
+ const orchestratorPassLimit = options?.maxPasses ?? 10;
5953
+ if (orchestrate && !this.orchestrationNoticeShown) {
5954
+ display.showSystemMessage('⚡ Orchestrating every prompt until completion. Press Ctrl+C to stop a run early.');
5955
+ this.orchestrationNoticeShown = true;
5838
5956
  }
5839
- else {
5840
- display.showSystemMessage('Flow orchestrator engaged; running until the request is satisfied.');
5841
- }
5842
- let currentPrompt = initialRequest;
5843
- if (this.isSelfImprovementRequest(initialRequest)) {
5844
- currentPrompt = `${initialRequest}
5845
-
5846
- When finished with ALL tasks, say "TASK_FULLY_COMPLETE".`;
5847
- }
5848
- let iteration = 0;
5849
- let lastResponseText = '';
5850
- let lastToolsUsed = [];
5851
- let result = null;
5852
5957
  try {
5853
- while (true) {
5854
- iteration++;
5855
- this.toolsUsedThisRun = [];
5856
- this.updateStatusMessage('Orchestrating request...');
5857
- try {
5858
- display.showThinking('Responding...');
5859
- this.refreshStatusLine(true);
5860
- const response = await agent.send(currentPrompt, true);
5861
- lastResponseText = response ?? '';
5862
- this.finishStreamingFormatter(undefined, { refreshPrompt: false, mode: 'complete' });
5863
- await this.awaitPendingCleanup();
5864
- this.captureHistorySnapshot();
5865
- this.autosaveIfEnabled();
5866
- const elapsedMs = Date.now() - overallStartTime;
5867
- this.alphaZeroMetrics.recordMessage(elapsedMs);
5868
- if (!response?.trim()) {
5869
- display.showWarning('Model returned an empty response. Continuing orchestration...');
5870
- currentPrompt = `${initialRequest}
5871
-
5872
- The previous reply was empty. Resume the task now.`;
5873
- continue;
5874
- }
5875
- const toolsUsed = this.getExecutedTools(response);
5876
- lastToolsUsed = toolsUsed;
5877
- toolsUsed.forEach(tool => completionDetector.recordToolCall(tool, true, true));
5878
- const completionAnalysis = completionDetector.analyzeCompletion(response, toolsUsed);
5879
- const decision = this.flowOrchestrator.decide({
5880
- iteration,
5881
- response,
5882
- toolsUsed,
5883
- completionAnalysis,
5884
- verificationConfirmed: this.flowOrchestrator.isVerificationPending()
5885
- ? completionDetector.isVerificationConfirmed(response)
5886
- : false,
5887
- });
5888
- if (decision.type === 'stop') {
5889
- display.showSystemMessage(decision.message);
5890
- break;
5891
- }
5892
- if (decision.type === 'stagnation-stop') {
5893
- display.showWarning(decision.message);
5958
+ // Start streaming - no header needed, the input area already provides context
5959
+ this.startStreamingHeartbeat('Streaming response');
5960
+ if (orchestrate) {
5961
+ const orchestrator = new AgentOrchestrator(agent);
5962
+ orchestratorResult = await orchestrator.runToCompletion(request, {
5963
+ streaming: true,
5964
+ maxPasses: orchestratorPassLimit,
5965
+ maxStagnantPasses: 3,
5966
+ verificationMode: 'auto',
5967
+ enforceActions: true,
5968
+ });
5969
+ const MAX_CONTINUATIONS = 2;
5970
+ let continuationRuns = 0;
5971
+ while (orchestratorResult && continuationRuns < MAX_CONTINUATIONS) {
5972
+ const continuationDecision = this.shouldContinueOrchestrating(request, orchestratorResult);
5973
+ if (!continuationDecision.shouldContinue) {
5894
5974
  break;
5895
5975
  }
5896
- if (decision.type === 'continue') {
5897
- if (decision.message) {
5898
- display.showSystemMessage(decision.message);
5899
- }
5900
- currentPrompt = decision.prompt;
5901
- }
5902
- await new Promise(resolve => setTimeout(resolve, 500));
5976
+ const reasonMessage = this.describeContinuationReason(continuationDecision.reason);
5977
+ display.showSystemMessage(`🔁 ${reasonMessage}`);
5978
+ const continuationPrompt = this.buildForcedContinuationPrompt(request, orchestratorResult, continuationDecision.reason ?? undefined);
5979
+ const continuationResult = await orchestrator.runToCompletion(continuationPrompt, {
5980
+ streaming: true,
5981
+ maxPasses: Math.max(orchestratorPassLimit, 12),
5982
+ maxStagnantPasses: 3,
5983
+ verificationMode: 'auto',
5984
+ enforceActions: true,
5985
+ });
5986
+ orchestratorResult = {
5987
+ finalResponse: continuationResult.finalResponse,
5988
+ passes: [...orchestratorResult.passes, ...continuationResult.passes],
5989
+ exitReason: continuationResult.exitReason,
5990
+ };
5991
+ continuationRuns++;
5903
5992
  }
5904
- catch (error) {
5905
- display.stopThinking(false);
5906
- if (this.isContextOverflowError(error)) {
5907
- display.showSystemMessage(`⚡ Context overflow handled. Continuing with reduced context...`);
5908
- continue;
5909
- }
5910
- const handled = this.handleProviderError(error, () => this.runFlowControlledTask(initialRequest, options));
5911
- if (!handled) {
5912
- display.showError(error instanceof Error ? error.message : String(error), error);
5913
- break;
5993
+ responseText = orchestratorResult.finalResponse;
5994
+ }
5995
+ else {
5996
+ responseText = await agent.send(request, true);
5997
+ }
5998
+ this.finishStreamingFormatter(undefined, { refreshPrompt: false, mode: 'complete' });
5999
+ await this.awaitPendingCleanup();
6000
+ this.captureHistorySnapshot();
6001
+ this.autosaveIfEnabled();
6002
+ // Track metrics with Alpha Zero 2
6003
+ const elapsedMs = Date.now() - requestStartTime;
6004
+ this.alphaZeroMetrics.recordMessage(elapsedMs);
6005
+ if (!responseText?.trim()) {
6006
+ display.showWarning('The provider returned an empty response. Check your API key/provider selection or retry the prompt.');
6007
+ }
6008
+ // AlphaZero: Extract and track tool calls from response
6009
+ const toolsUsed = orchestratorResult
6010
+ ? orchestratorResult.passes.flatMap(pass => pass.toolsUsed)
6011
+ : this.getExecutedTools(responseText);
6012
+ this.currentToolCalls = toolsUsed.map(name => ({
6013
+ name,
6014
+ arguments: {},
6015
+ success: true, // Assume success if we got here
6016
+ duration: 0,
6017
+ }));
6018
+ if (orchestratorResult && orchestratorResult.exitReason !== 'complete') {
6019
+ const exitDetail = (() => {
6020
+ switch (orchestratorResult?.exitReason) {
6021
+ case 'max-passes':
6022
+ return 'Reached orchestrator pass limit; showing last response.';
6023
+ case 'empty-response':
6024
+ return 'Received empty replies while orchestrating; showing last response.';
6025
+ case 'stalled':
6026
+ return 'Orchestrator detected stagnation; showing last response.';
6027
+ default:
6028
+ return null;
5914
6029
  }
6030
+ })();
6031
+ if (exitDetail) {
6032
+ display.showSystemMessage(`⚠️ ${exitDetail}`);
6033
+ }
6034
+ }
6035
+ // AlphaZero: Check for failure in response
6036
+ const failure = detectFailure(responseText, {
6037
+ toolCalls: this.currentToolCalls,
6038
+ userMessage: request,
6039
+ });
6040
+ if (failure) {
6041
+ this.lastFailure = failure;
6042
+ // Check if we have a recovery strategy
6043
+ const strategy = findRecoveryStrategy(failure);
6044
+ if (strategy) {
6045
+ display.showSystemMessage(`🔄 Found recovery strategy for this type of issue (success rate: ${Math.round(strategy.successRate * 100)}%)`);
6046
+ }
6047
+ }
6048
+ else {
6049
+ // Success - record the tool pattern for this task type
6050
+ if (this.currentToolCalls.length > 0) {
6051
+ const toolPattern = {
6052
+ taskType: this.currentTaskType,
6053
+ toolSequence: this.currentToolCalls.map(t => t.name),
6054
+ successRate: 1.0,
6055
+ avgDuration: elapsedMs,
6056
+ occurrences: 1,
6057
+ };
6058
+ addToolPattern(this.currentTaskType, toolPattern);
5915
6059
  }
6060
+ // Clear action history on success
6061
+ clearActionHistory();
6062
+ this.lastFailure = null;
6063
+ }
6064
+ }
6065
+ catch (error) {
6066
+ const handled = this.handleProviderError(error, () => this.processRequest(request, options));
6067
+ if (!handled) {
6068
+ // Pass full error object for enhanced formatting with stack trace
6069
+ display.showError(error instanceof Error ? error.message : String(error), error);
5916
6070
  }
5917
- if (!this.responseRendered && lastResponseText.trim()) {
5918
- const finalText = lastResponseText.trim();
6071
+ }
6072
+ finally {
6073
+ // Fallback: if no assistant message was rendered (e.g., streaming hiccup), show the full response
6074
+ if (!this.responseRendered && responseText.trim()) {
6075
+ const finalText = responseText.trim();
5919
6076
  display.showAssistantMessage(finalText, { isFinal: true });
5920
6077
  this.ui.controller.recordAssistantResponse(finalText, {
5921
6078
  source: 'final',
5922
6079
  });
5923
6080
  this.responseRendered = true;
5924
6081
  }
5925
- result = {
5926
- finalResponse: lastResponseText,
5927
- toolsUsed: lastToolsUsed,
5928
- elapsedMs: Date.now() - overallStartTime,
5929
- };
5930
- }
5931
- finally {
5932
6082
  this.finishStreamingFormatter(undefined, { refreshPrompt: false, mode: 'complete' });
5933
- const totalElapsed = Date.now() - overallStartTime;
5934
- const minutes = Math.floor(totalElapsed / 60000);
5935
- const seconds = Math.floor((totalElapsed % 60000) / 1000);
5936
- const completionLabel = mode === 'continuous'
5937
- ? `\n🏁 Continuous execution completed in ${minutes}m ${seconds}s total`
5938
- : `\n🏁 Flow orchestration completed in ${minutes}m ${seconds}s total`;
5939
- display.showSystemMessage(completionLabel);
5940
- resetTaskCompletionDetector();
5941
6083
  display.stopThinking(false);
5942
6084
  this.uiUpdates.setMode('processing');
5943
6085
  this.stopStreamingHeartbeat('complete', { quiet: true });
@@ -5950,60 +6092,20 @@ The previous reply was empty. Resume the task now.`;
5950
6092
  this.updateStatusMessage(null);
5951
6093
  this.toolsUsedThisRun = [];
5952
6094
  queueMicrotask(() => this.uiUpdates.setMode('idle'));
6095
+ // CRITICAL: Ensure readline prompt is active for user input
6096
+ // Erosolar-CLI style: New prompt naturally appears at bottom
5953
6097
  this.ensureReadlineReady();
5954
6098
  this.scheduleQueueProcessing();
5955
6099
  this.maybeProcessPromptInbox();
5956
6100
  this.refreshQueueIndicators();
5957
6101
  }
5958
- return result;
5959
- }
5960
- handleFlowRunOutcome(request, result) {
5961
- this.currentToolCalls = result.toolsUsed.map((name) => ({
5962
- name,
5963
- arguments: {},
5964
- success: true,
5965
- duration: 0,
5966
- }));
5967
- const failure = detectFailure(result.finalResponse, {
5968
- toolCalls: this.currentToolCalls,
5969
- userMessage: request,
5970
- });
5971
- if (failure) {
5972
- this.lastFailure = failure;
5973
- const strategy = findRecoveryStrategy(failure);
5974
- if (strategy) {
5975
- display.showSystemMessage(`🔄 Found recovery strategy for this type of issue (success rate: ${Math.round(strategy.successRate * 100)}%)`);
5976
- }
5977
- return;
5978
- }
5979
- if (this.currentToolCalls.length > 0) {
5980
- const toolPattern = {
5981
- taskType: this.currentTaskType,
5982
- toolSequence: this.currentToolCalls.map((t) => t.name),
5983
- successRate: 1.0,
5984
- avgDuration: result.elapsedMs,
5985
- occurrences: 1,
5986
- };
5987
- addToolPattern(this.currentTaskType, toolPattern);
5988
- }
5989
- clearActionHistory();
5990
- this.lastFailure = null;
5991
- }
5992
- async processRequest(request) {
5993
- const result = await this.runFlowControlledTask(request, {
5994
- mode: 'standard',
5995
- });
5996
- if (!result) {
5997
- return;
5998
- }
5999
- this.handleFlowRunOutcome(request, result);
6000
6102
  }
6001
6103
  /**
6002
6104
  * Process a continuous/infinite loop request.
6003
6105
  * Runs the agent in a loop until:
6004
6106
  * 1. The agent indicates completion (verified by AI confirmation)
6005
6107
  * 2. User interrupts (Ctrl+C)
6006
- * 3. The orchestrator halts due to stagnation/verification exhaustion
6108
+ * 3. Maximum iterations reached (safety limit)
6007
6109
  *
6008
6110
  * Uses intelligent task completion detection with AI verification
6009
6111
  * to ensure tasks are truly complete before stopping.
@@ -6011,13 +6113,29 @@ The previous reply was empty. Resume the task now.`;
6011
6113
  * Context is automatically managed - overflow errors trigger auto-recovery.
6012
6114
  */
6013
6115
  async processContinuousRequest(initialRequest) {
6014
- const result = await this.runFlowControlledTask(initialRequest, {
6015
- mode: 'continuous',
6016
- });
6017
- if (!result) {
6116
+ const MAX_PASSES = 100; // Safety limit to prevent truly infinite loops
6117
+ if (this.isProcessing) {
6118
+ this.enqueueFollowUpAction({ type: 'continuous', text: initialRequest });
6119
+ return;
6120
+ }
6121
+ if (!this.agent && !this.rebuildAgent()) {
6122
+ display.showWarning('Configure an API key via /secrets before sending requests.');
6018
6123
  return;
6019
6124
  }
6020
- this.handleFlowRunOutcome(initialRequest, result);
6125
+ display.showSystemMessage(`Continuous mode active. Ctrl+C to stop.`);
6126
+ const preparedRequest = this.isSelfImprovementRequest(initialRequest)
6127
+ ? `${initialRequest}
6128
+
6129
+ IMPORTANT: You have full git access. After making improvements:
6130
+ 1. Use bash to run: git status (see changes)
6131
+ 2. Use bash to run: git add -A (stage changes)
6132
+ 3. Use bash to run: git commit -m "descriptive message" (commit)
6133
+ 4. Use bash to run: git push (when milestone reached)
6134
+
6135
+ Commit frequently with descriptive messages. Push when ready.
6136
+ When truly finished with ALL tasks, explicitly state "TASK_FULLY_COMPLETE".`
6137
+ : initialRequest;
6138
+ await this.processRequest(preparedRequest, { orchestrate: true, maxPasses: MAX_PASSES });
6021
6139
  }
6022
6140
  /**
6023
6141
  * Resolve executed tools for the current turn. Prefer the actual tool
@@ -6039,6 +6157,129 @@ The previous reply was empty. Resume the task now.`;
6039
6157
  }
6040
6158
  return this.extractToolsFromResponse(responseText);
6041
6159
  }
6160
+ /**
6161
+ * Detect plan-only responses that narrate intent without executing actions.
6162
+ */
6163
+ isPlanOnlyResponse(response) {
6164
+ const normalized = response.trim().toLowerCase();
6165
+ if (!normalized) {
6166
+ return false;
6167
+ }
6168
+ // If the assistant is clearly declaring completion, don't treat it as plan-only
6169
+ const completionGuards = [
6170
+ /\bnothing\s+(left|else)\s+(to\s+do|pending)\b/i,
6171
+ /\b(already|now)\s+(clean|complete|done)\b/i,
6172
+ /\b(no\s+(junk|issues?|changes?)\s+found)\b/i,
6173
+ ];
6174
+ if (completionGuards.some((pattern) => pattern.test(response))) {
6175
+ return false;
6176
+ }
6177
+ const planIndicators = [
6178
+ /\bplan\b/i,
6179
+ /\bapproach\b/i,
6180
+ /\bsteps?:\b/i,
6181
+ /\bstep\s+1\b/i,
6182
+ /\bstart by\b/i,
6183
+ /\bfirst[, ]/i,
6184
+ /\bthen\b/i,
6185
+ /\bnext\b/i,
6186
+ /\bi['’]?\s*will\b/i,
6187
+ /\bi['’]?\s*ll\b/i,
6188
+ /\bi['’]?\s*can\b.{0,40}\bthen\b/i,
6189
+ /\bi['’]?\s*(?:will|ll)\s+begin\b/i,
6190
+ ];
6191
+ return planIndicators.some((pattern) => pattern.test(response));
6192
+ }
6193
+ /**
6194
+ * Check if a response contains indicators that work is actually incomplete,
6195
+ * even if it also contains TASK_FULLY_COMPLETE marker.
6196
+ * This catches contradictory responses where the AI says "done" but also "not integrated yet".
6197
+ */
6198
+ responseIndicatesIncompleteWork(response) {
6199
+ // Patterns that indicate work isn't actually complete
6200
+ // Organized by category for maintainability
6201
+ const incompletePatterns = [
6202
+ // === INTEGRATION/DEPLOYMENT STATE ===
6203
+ // "hasn't been integrated/implemented/connected yet"
6204
+ /hasn'?t\s+been\s+(integrated|implemented|connected|deployed|added|completed|tested|verified)\s*(yet|still)?/i,
6205
+ // "not yet integrated/implemented" or "not integrated"
6206
+ /not\s+(yet\s+)?(integrated|implemented|connected|deployed|functional|working|complete|tested|verified)/i,
6207
+ // "ready for integration" = NOT integrated
6208
+ /ready\s+(for|to\s+be)\s+(integration|integrated|connected|deployed|testing|review)/i,
6209
+ // "needs to be integrated"
6210
+ /needs?\s+to\s+be\s+(integrated|connected|deployed|added|hooked|wired|tested|reviewed|merged)/i,
6211
+ // Passive voice: "was not performed/completed"
6212
+ /was\s+not\s+(performed|completed|implemented|deployed|integrated|tested)/i,
6213
+ // "the [X] service hasn't been"
6214
+ /the\s+\w+\s+(service|module|component|feature)\s+hasn'?t\s+been/i,
6215
+ // === PARTIAL/INCOMPLETE STATE ===
6216
+ // "still stores/uses/has" (current bad state persists)
6217
+ /still\s+(stores?|uses?|has|contains?|needs?|requires?|missing|lacks?|broken)/i,
6218
+ // Partial completion: "partially", "mostly", "almost"
6219
+ /\b(partially|mostly|almost|nearly|not\s+fully)\s+(complete|done|finished|implemented|working)/i,
6220
+ // Explicit partial: "part of", "some of", "half of"
6221
+ /\b(only\s+)?(part|some|half|portion)\s+of\s+(the\s+)?(task|work|feature|implementation)/i,
6222
+ // === QUALIFIER WORDS (uncertain completion) ===
6223
+ // "should be complete", "appears complete", "theoretically"
6224
+ /\b(should|might|may|could|appears?\s+to)\s+be\s+(complete|done|working|functional)/i,
6225
+ /\btheoretically\s+(complete|done|working|functional)/i,
6226
+ // "assuming", "if everything works"
6227
+ /\b(assuming|provided|if)\s+(everything|it|this|that)\s+(works?|is\s+correct)/i,
6228
+ // === SELF-CONTRADICTION PHRASES ===
6229
+ // "done but...", "complete except...", "finished however..."
6230
+ /\b(done|complete|finished)\s+(but|except|however|although|though)/i,
6231
+ // "however" followed by incomplete indicator
6232
+ /however[,\s].{0,50}?(hasn'?t|not\s+yet|still\s+needs?|pending|remains?|missing|broken|failing)/i,
6233
+ // "but" followed by negative state
6234
+ /\bbut\s+.{0,30}?(not|hasn'?t|won'?t|can'?t|doesn'?t|isn'?t|wasn'?t)/i,
6235
+ // === FUTURE TENSE / DEFERRED WORK ===
6236
+ // "will need to", "will require"
6237
+ /will\s+(need\s+to|require|have\s+to)\s+(integrate|connect|deploy|complete|implement|test|fix)/i,
6238
+ // Deferred: "left as", "deferred", "postponed", "out of scope"
6239
+ /\b(left\s+as|deferred|postponed|out\s+of\s+scope|for\s+later|in\s+a\s+future)/i,
6240
+ // Time-dependent: "after restart", "takes effect after", "once you"
6241
+ /\b(after\s+(restart|reboot|redeploy)|takes?\s+effect\s+after|once\s+you)/i,
6242
+ // === REMAINING WORK INDICATORS ===
6243
+ // "remaining tasks", "outstanding items"
6244
+ /\b(remaining|outstanding|pending|leftover)\s+(tasks?|items?|work|issues?|steps?)/i,
6245
+ // "X more to do", "still have to"
6246
+ /\b(more\s+to\s+do|still\s+have\s+to|yet\s+to\s+be\s+done)/i,
6247
+ // Explicit blockers
6248
+ /\b(blocker|blocked\s+by|waiting\s+(for|on)|depends?\s+on)/i,
6249
+ // === ERROR/FAILURE STATE ===
6250
+ // "failing tests", "build errors"
6251
+ /\b(failing|broken|erroring)\s+(tests?|builds?|checks?|validations?)/i,
6252
+ // "tests? (are )?(still )?failing"
6253
+ /\btests?\s+(are\s+)?(still\s+)?failing/i,
6254
+ // "errors? to (address|fix)"
6255
+ /\b(errors?|warnings?|issues?)\s+to\s+(address|fix|resolve)/i,
6256
+ // "doesn't work", "isn't working", "not working"
6257
+ /\b(doesn'?t|isn'?t|not)\s+(work|working|functional|functioning)/i,
6258
+ // === MANUAL STEPS REQUIRED ===
6259
+ // "you'll need to", "manually run", "requires user"
6260
+ /\b(you('ll|\s+will)\s+need\s+to|manually\s+(run|configure|set|update)|requires?\s+user)/i,
6261
+ // "run this command", "execute the following"
6262
+ /\b(run\s+this|execute\s+the\s+following|apply\s+the\s+migration)/i,
6263
+ // === TODO/FIXME IN PROSE ===
6264
+ // TODO or FIXME mentioned as remaining work (not in code blocks)
6265
+ /\b(todo|fixme|hack|xxx):\s/i,
6266
+ // "need to add", "should implement"
6267
+ /\b(need\s+to|should|must)\s+(add|implement|create|write|build|fix)\b/i,
6268
+ // === SCOPE LIMITATIONS ===
6269
+ // "didn't have time", "ran out of time"
6270
+ /\b(didn'?t|did\s+not)\s+have\s+(time|chance|opportunity)/i,
6271
+ // "beyond scope", "outside scope"
6272
+ /\b(beyond|outside)\s+(the\s+)?scope/i,
6273
+ // "for now" (temporary state)
6274
+ /\b(for\s+now|at\s+this\s+point|currently)\s*.{0,20}?(not|without|lacks?|missing)/i,
6275
+ ];
6276
+ for (const pattern of incompletePatterns) {
6277
+ if (pattern.test(response)) {
6278
+ return true;
6279
+ }
6280
+ }
6281
+ return false;
6282
+ }
6042
6283
  /**
6043
6284
  * Extract tool names from a response by looking for tool call patterns
6044
6285
  */
@@ -6531,12 +6772,11 @@ Return ONLY JSON array:
6531
6772
  const activity = normalized ? `Working: ${normalized}` : 'Working';
6532
6773
  this.renderer?.setActivity(activity);
6533
6774
  },
6534
- onBeforeFirstToolCall: (toolNames, _hasNarration) => {
6775
+ onBeforeFirstToolCall: (toolNames) => {
6535
6776
  const primaryTool = toolNames[0];
6536
6777
  if (primaryTool) {
6537
6778
  this.renderer?.setActivity(`Running ${primaryTool}`);
6538
6779
  }
6539
- // Don't inject synthetic thinking blocks - let the model respond naturally
6540
6780
  return undefined;
6541
6781
  },
6542
6782
  onStreamChunk: (chunk, type) => {
@@ -6685,14 +6925,10 @@ Return ONLY JSON array:
6685
6925
  activity = `Reading ${path}`;
6686
6926
  }
6687
6927
  this.renderer?.setActivity(activity);
6688
- // Estimate tokens for tool call (~50 tokens per call)
6689
- this.streamingTokenCount += 50;
6690
- this.renderer?.updateStreamingTokens(this.streamingTokenCount);
6928
+ // Token count updated from real provider usage - do not estimate
6691
6929
  }
6692
6930
  else {
6693
- // Tool finished - estimate result tokens (~100 per result)
6694
- this.streamingTokenCount += 100;
6695
- this.renderer?.updateStreamingTokens(this.streamingTokenCount);
6931
+ // Tool finished - token count updated from real provider usage
6696
6932
  // Reset to thinking state while model generates next response
6697
6933
  this.renderer?.setActivity('Thinking');
6698
6934
  }
@@ -6701,6 +6937,14 @@ Return ONLY JSON array:
6701
6937
  this.lastAssistantResponse = response;
6702
6938
  void this.runAutoQualityChecks('verification', response, context);
6703
6939
  },
6940
+ // Real token usage from provider during streaming
6941
+ onUsage: (usage) => {
6942
+ const totalTokens = this.totalTokens(usage);
6943
+ if (totalTokens !== null) {
6944
+ this.streamingTokenCount = totalTokens;
6945
+ this.renderer?.updateStreamingTokens(this.streamingTokenCount);
6946
+ }
6947
+ },
6704
6948
  // Retry notification for transient errors
6705
6949
  onRetrying: (attempt, maxAttempts, error) => {
6706
6950
  const shortError = error.message.slice(0, 100);
@@ -6710,6 +6954,8 @@ Return ONLY JSON array:
6710
6954
  });
6711
6955
  // Register global AI enhancer for explore tool - uses active model by default
6712
6956
  this.registerExploreAIEnhancer();
6957
+ // Fetch real context window from provider API (async, updates in background)
6958
+ void this.fetchAndUpdateContextWindow();
6713
6959
  const allowHistoryRestore = this.sessionRestoreConfig.mode !== 'none';
6714
6960
  const historyToLoad = allowHistoryRestore && this.pendingHistoryLoad && this.pendingHistoryLoad.length
6715
6961
  ? this.pendingHistoryLoad