erosolar-cli 2.1.175 → 2.1.176

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -12,6 +12,7 @@ import { ensureSecretForProvider, getSecretDefinitionForProvider, getSecretValue
12
12
  import { saveActiveProfilePreference, saveModelPreference, loadToolSettings, saveToolSettings, clearToolSettings, clearActiveProfilePreference, loadSessionPreferences, saveSessionPreferences, loadFeatureFlags, saveFeatureFlags, toggleFeatureFlag, FEATURE_FLAG_INFO, } from '../core/preferences.js';
13
13
  import { getLearningSummary, getRecentLearning, commitLearning, exportAllLearning, getLearningDir, } from '../core/learningPersistence.js';
14
14
  import { buildEnabledToolSet, evaluateToolPermissions, getToolToggleOptions, } from '../capabilities/toolRegistry.js';
15
+ import { FlowOrchestrator } from '../runtime/flowOrchestrator.js';
15
16
  import { detectApiKeyError } from '../core/errors/apiKeyErrors.js';
16
17
  import { detectPromptBlockError, } from '../core/errors/promptBlockErrors.js';
17
18
  import { detectNetworkError } from '../core/errors/networkErrors.js';
@@ -123,6 +124,7 @@ export class InteractiveShell {
123
124
  uiUpdates;
124
125
  _fileChangeTracker = new FileChangeTracker(); // Reserved for future file tracking features
125
126
  alphaZeroMetrics; // Alpha Zero 2 performance tracking
127
+ flowOrchestrator = new FlowOrchestrator();
126
128
  maxNetworkRetries = 2;
127
129
  statusSubscription = null;
128
130
  followUpQueue = [];
@@ -2356,7 +2358,7 @@ export class InteractiveShell {
2356
2358
  return false;
2357
2359
  }
2358
2360
  const mutatingToolUsed = toolsUsed.some((tool) => WRITE_TOOL_NAMES.has(tool.toLowerCase()));
2359
- const planOnly = this.isPlanOnlyResponse(response);
2361
+ const planOnly = this.flowOrchestrator.isPlanOnlyResponse(response);
2360
2362
  const lowActionDensity = response.split(/\s+/).length < 80;
2361
2363
  // If it's clearly a plan-only response, continue regardless of prior tool usage
2362
2364
  if (planOnly) {
@@ -6035,13 +6037,8 @@ Execute the plan you outlined. Use the available tools (bash, edits, git) to tak
6035
6037
  this.beginAiRuntime();
6036
6038
  // No streaming header - just start streaming directly
6037
6039
  this.startStreamingHeartbeat('Streaming');
6040
+ this.flowOrchestrator.start(initialRequest);
6038
6041
  let iteration = 0;
6039
- let lastResponse = '';
6040
- let consecutiveNoProgress = 0;
6041
- const MAX_NO_PROGRESS = 5; // Increased to allow more attempts before giving up
6042
- let pendingVerification = false;
6043
- let verificationAttempts = 0;
6044
- const MAX_VERIFICATION_ATTEMPTS = 2;
6045
6042
  try {
6046
6043
  // Enhance initial prompt with git context for self-improvement tasks
6047
6044
  let currentPrompt = initialRequest;
@@ -6076,7 +6073,6 @@ When truly finished with ALL tasks, explicitly state "TASK_FULLY_COMPLETE".`;
6076
6073
  this.alphaZeroMetrics.recordMessage(elapsedMs);
6077
6074
  if (!response?.trim()) {
6078
6075
  display.showWarning('Model returned an empty response. Retrying this iteration...');
6079
- consecutiveNoProgress++;
6080
6076
  currentPrompt = `${initialRequest}
6081
6077
 
6082
6078
  The previous reply was empty. Resume the task now: take the next action, call the necessary tools, and report progress.`;
@@ -6088,91 +6084,41 @@ The previous reply was empty. Resume the task now: take the next action, call th
6088
6084
  // Use intelligent completion detection
6089
6085
  const completionAnalysis = completionDetector.analyzeCompletion(response, toolsUsed);
6090
6086
  display.showSystemMessage(`šŸ“ˆ Completion confidence: ${(completionAnalysis.confidence * 100).toFixed(0)}%`);
6091
- // Check for explicit TASK_FULLY_COMPLETE marker (highest priority)
6092
- // BUT: Don't terminate if the response also indicates work is incomplete
6093
- if (response.includes('TASK_FULLY_COMPLETE')) {
6094
- const hasContradiction = this.responseIndicatesIncompleteWork(response);
6095
- if (hasContradiction) {
6096
- display.showSystemMessage(`\nāš ļø TASK_FULLY_COMPLETE detected but response indicates incomplete work. Continuing...`);
6097
- // Override the completion signal - the AI is contradicting itself
6098
- currentPrompt = `You marked the task as TASK_FULLY_COMPLETE but also indicated that work is still pending or not integrated. Please clarify:
6099
-
6100
- 1. Is ALL the originally requested work actually complete and functional?
6101
- 2. If there are parts that are "ready but not integrated" or "implemented but not connected", those are NOT complete.
6102
- 3. Only say TASK_FULLY_COMPLETE when the user's original request is 100% fulfilled.
6103
-
6104
- What remains to be done? Continue with the next step.`;
6105
- await new Promise(resolve => setTimeout(resolve, 500));
6106
- continue;
6107
- }
6108
- display.showSystemMessage(`\nāœ… Task explicitly marked complete after ${iteration} iteration(s).`);
6087
+ const decision = this.flowOrchestrator.decide({
6088
+ iteration,
6089
+ response,
6090
+ toolsUsed,
6091
+ completionAnalysis,
6092
+ verificationConfirmed: this.flowOrchestrator.isVerificationPending()
6093
+ ? completionDetector.isVerificationConfirmed(response)
6094
+ : false,
6095
+ });
6096
+ if (decision.type === 'stop') {
6097
+ display.showSystemMessage(decision.message);
6109
6098
  break;
6110
6099
  }
6111
- // High confidence completion without verification needed
6112
- if (completionAnalysis.isComplete && completionAnalysis.confidence >= 0.85) {
6113
- display.showSystemMessage(`\nāœ… Task completed with high confidence after ${iteration} iteration(s).`);
6114
- display.showSystemMessage(` Reason: ${completionAnalysis.reason}`);
6100
+ if (decision.type === 'stagnation-stop') {
6101
+ display.showWarning(decision.message);
6115
6102
  break;
6116
6103
  }
6117
- // Medium confidence - run verification round
6118
- if (completionAnalysis.shouldVerify && completionAnalysis.verificationPrompt && !pendingVerification) {
6119
- if (verificationAttempts < MAX_VERIFICATION_ATTEMPTS) {
6120
- display.showSystemMessage(`\nšŸ” Running verification round (confidence: ${(completionAnalysis.confidence * 100).toFixed(0)}%)...`);
6121
- pendingVerification = true;
6122
- verificationAttempts++;
6123
- currentPrompt = completionAnalysis.verificationPrompt;
6124
- await new Promise(resolve => setTimeout(resolve, 500));
6125
- continue;
6126
- }
6104
+ if (decision.type === 'execute-plan') {
6105
+ display.showSystemMessage(decision.message);
6106
+ currentPrompt = decision.prompt;
6107
+ await new Promise(resolve => setTimeout(resolve, 500));
6108
+ continue;
6127
6109
  }
6128
- // If we were in verification mode, check the result
6129
- if (pendingVerification) {
6130
- pendingVerification = false;
6131
- if (completionDetector.isVerificationConfirmed(response)) {
6132
- display.showSystemMessage(`\nāœ… Task completion verified by AI after ${iteration} iteration(s).`);
6133
- break;
6134
- }
6135
- else {
6136
- display.showSystemMessage(`šŸ”„ Verification indicates more work needed. Continuing...`);
6137
- }
6110
+ if (decision.type === 'verify') {
6111
+ display.showSystemMessage(decision.message);
6112
+ currentPrompt = decision.prompt;
6113
+ await new Promise(resolve => setTimeout(resolve, 500));
6114
+ continue;
6138
6115
  }
6139
- // Check for no progress (same response multiple times)
6140
- const responseChanged = response !== lastResponse;
6141
- if (!responseChanged) {
6142
- consecutiveNoProgress++;
6143
- if (consecutiveNoProgress >= MAX_NO_PROGRESS) {
6144
- // Before giving up, ask one final verification
6145
- if (verificationAttempts < MAX_VERIFICATION_ATTEMPTS) {
6146
- display.showSystemMessage(`\nāš ļø No progress for ${MAX_NO_PROGRESS} iterations. Running final verification...`);
6147
- currentPrompt = `I notice you may be stuck or finished. Please confirm:
6148
-
6149
- 1. Is the original task FULLY complete?
6150
- 2. If yes, respond with exactly: "TASK_FULLY_COMPLETE"
6151
- 3. If no, what specific action should be taken next?
6152
-
6153
- Be explicit about the current state.`;
6154
- verificationAttempts++;
6155
- consecutiveNoProgress = 0;
6156
- await new Promise(resolve => setTimeout(resolve, 500));
6157
- continue;
6158
- }
6159
- display.showSystemMessage(`\nāš ļø No progress detected for ${MAX_NO_PROGRESS} iterations and verification exhausted. Stopping.`);
6160
- break;
6116
+ if (decision.type === 'continue') {
6117
+ if (decision.message) {
6118
+ display.showSystemMessage(decision.message);
6161
6119
  }
6120
+ currentPrompt = decision.prompt;
6162
6121
  }
6163
- else {
6164
- consecutiveNoProgress = 0;
6165
- }
6166
- lastResponse = response;
6167
- // Prepare next iteration prompt - explicitly encourage progress reporting
6168
- currentPrompt = `Continue with the next step. Remember:
6169
- - Use bash to run git commands (git status, git add, git commit, git push)
6170
- - Commit your changes with descriptive messages after completing improvements
6171
- - Push changes when a logical milestone is reached
6172
- - If all tasks are complete, respond with exactly: "TASK_FULLY_COMPLETE"
6173
- - If there are errors or blockers, explain what's preventing progress
6174
-
6175
- What's the next action?`;
6176
6122
  // Small delay between iterations to prevent rate limiting
6177
6123
  await new Promise(resolve => setTimeout(resolve, 500));
6178
6124
  }
@@ -6245,129 +6191,6 @@ What's the next action?`;
6245
6191
  }
6246
6192
  return this.extractToolsFromResponse(responseText);
6247
6193
  }
6248
- /**
6249
- * Detect plan-only responses that narrate intent without executing actions.
6250
- */
6251
- isPlanOnlyResponse(response) {
6252
- const normalized = response.trim().toLowerCase();
6253
- if (!normalized) {
6254
- return false;
6255
- }
6256
- // If the assistant is clearly declaring completion, don't treat it as plan-only
6257
- const completionGuards = [
6258
- /\bnothing\s+(left|else)\s+(to\s+do|pending)\b/i,
6259
- /\b(already|now)\s+(clean|complete|done)\b/i,
6260
- /\b(no\s+(junk|issues?|changes?)\s+found)\b/i,
6261
- ];
6262
- if (completionGuards.some((pattern) => pattern.test(response))) {
6263
- return false;
6264
- }
6265
- const planIndicators = [
6266
- /\bplan\b/i,
6267
- /\bapproach\b/i,
6268
- /\bsteps?:\b/i,
6269
- /\bstep\s+1\b/i,
6270
- /\bstart by\b/i,
6271
- /\bfirst[, ]/i,
6272
- /\bthen\b/i,
6273
- /\bnext\b/i,
6274
- /\bi['’]?\s*will\b/i,
6275
- /\bi['’]?\s*ll\b/i,
6276
- /\bi['’]?\s*can\b.{0,40}\bthen\b/i,
6277
- /\bi['’]?\s*(?:will|ll)\s+begin\b/i,
6278
- ];
6279
- return planIndicators.some((pattern) => pattern.test(response));
6280
- }
6281
- /**
6282
- * Check if a response contains indicators that work is actually incomplete,
6283
- * even if it also contains TASK_FULLY_COMPLETE marker.
6284
- * This catches contradictory responses where the AI says "done" but also "not integrated yet".
6285
- */
6286
- responseIndicatesIncompleteWork(response) {
6287
- // Patterns that indicate work isn't actually complete
6288
- // Organized by category for maintainability
6289
- const incompletePatterns = [
6290
- // === INTEGRATION/DEPLOYMENT STATE ===
6291
- // "hasn't been integrated/implemented/connected yet"
6292
- /hasn'?t\s+been\s+(integrated|implemented|connected|deployed|added|completed|tested|verified)\s*(yet|still)?/i,
6293
- // "not yet integrated/implemented" or "not integrated"
6294
- /not\s+(yet\s+)?(integrated|implemented|connected|deployed|functional|working|complete|tested|verified)/i,
6295
- // "ready for integration" = NOT integrated
6296
- /ready\s+(for|to\s+be)\s+(integration|integrated|connected|deployed|testing|review)/i,
6297
- // "needs to be integrated"
6298
- /needs?\s+to\s+be\s+(integrated|connected|deployed|added|hooked|wired|tested|reviewed|merged)/i,
6299
- // Passive voice: "was not performed/completed"
6300
- /was\s+not\s+(performed|completed|implemented|deployed|integrated|tested)/i,
6301
- // "the [X] service hasn't been"
6302
- /the\s+\w+\s+(service|module|component|feature)\s+hasn'?t\s+been/i,
6303
- // === PARTIAL/INCOMPLETE STATE ===
6304
- // "still stores/uses/has" (current bad state persists)
6305
- /still\s+(stores?|uses?|has|contains?|needs?|requires?|missing|lacks?|broken)/i,
6306
- // Partial completion: "partially", "mostly", "almost"
6307
- /\b(partially|mostly|almost|nearly|not\s+fully)\s+(complete|done|finished|implemented|working)/i,
6308
- // Explicit partial: "part of", "some of", "half of"
6309
- /\b(only\s+)?(part|some|half|portion)\s+of\s+(the\s+)?(task|work|feature|implementation)/i,
6310
- // === QUALIFIER WORDS (uncertain completion) ===
6311
- // "should be complete", "appears complete", "theoretically"
6312
- /\b(should|might|may|could|appears?\s+to)\s+be\s+(complete|done|working|functional)/i,
6313
- /\btheoretically\s+(complete|done|working|functional)/i,
6314
- // "assuming", "if everything works"
6315
- /\b(assuming|provided|if)\s+(everything|it|this|that)\s+(works?|is\s+correct)/i,
6316
- // === SELF-CONTRADICTION PHRASES ===
6317
- // "done but...", "complete except...", "finished however..."
6318
- /\b(done|complete|finished)\s+(but|except|however|although|though)/i,
6319
- // "however" followed by incomplete indicator
6320
- /however[,\s].{0,50}?(hasn'?t|not\s+yet|still\s+needs?|pending|remains?|missing|broken|failing)/i,
6321
- // "but" followed by negative state
6322
- /\bbut\s+.{0,30}?(not|hasn'?t|won'?t|can'?t|doesn'?t|isn'?t|wasn'?t)/i,
6323
- // === FUTURE TENSE / DEFERRED WORK ===
6324
- // "will need to", "will require"
6325
- /will\s+(need\s+to|require|have\s+to)\s+(integrate|connect|deploy|complete|implement|test|fix)/i,
6326
- // Deferred: "left as", "deferred", "postponed", "out of scope"
6327
- /\b(left\s+as|deferred|postponed|out\s+of\s+scope|for\s+later|in\s+a\s+future)/i,
6328
- // Time-dependent: "after restart", "takes effect after", "once you"
6329
- /\b(after\s+(restart|reboot|redeploy)|takes?\s+effect\s+after|once\s+you)/i,
6330
- // === REMAINING WORK INDICATORS ===
6331
- // "remaining tasks", "outstanding items"
6332
- /\b(remaining|outstanding|pending|leftover)\s+(tasks?|items?|work|issues?|steps?)/i,
6333
- // "X more to do", "still have to"
6334
- /\b(more\s+to\s+do|still\s+have\s+to|yet\s+to\s+be\s+done)/i,
6335
- // Explicit blockers
6336
- /\b(blocker|blocked\s+by|waiting\s+(for|on)|depends?\s+on)/i,
6337
- // === ERROR/FAILURE STATE ===
6338
- // "failing tests", "build errors"
6339
- /\b(failing|broken|erroring)\s+(tests?|builds?|checks?|validations?)/i,
6340
- // "tests? (are )?(still )?failing"
6341
- /\btests?\s+(are\s+)?(still\s+)?failing/i,
6342
- // "errors? to (address|fix)"
6343
- /\b(errors?|warnings?|issues?)\s+to\s+(address|fix|resolve)/i,
6344
- // "doesn't work", "isn't working", "not working"
6345
- /\b(doesn'?t|isn'?t|not)\s+(work|working|functional|functioning)/i,
6346
- // === MANUAL STEPS REQUIRED ===
6347
- // "you'll need to", "manually run", "requires user"
6348
- /\b(you('ll|\s+will)\s+need\s+to|manually\s+(run|configure|set|update)|requires?\s+user)/i,
6349
- // "run this command", "execute the following"
6350
- /\b(run\s+this|execute\s+the\s+following|apply\s+the\s+migration)/i,
6351
- // === TODO/FIXME IN PROSE ===
6352
- // TODO or FIXME mentioned as remaining work (not in code blocks)
6353
- /\b(todo|fixme|hack|xxx):\s/i,
6354
- // "need to add", "should implement"
6355
- /\b(need\s+to|should|must)\s+(add|implement|create|write|build|fix)\b/i,
6356
- // === SCOPE LIMITATIONS ===
6357
- // "didn't have time", "ran out of time"
6358
- /\b(didn'?t|did\s+not)\s+have\s+(time|chance|opportunity)/i,
6359
- // "beyond scope", "outside scope"
6360
- /\b(beyond|outside)\s+(the\s+)?scope/i,
6361
- // "for now" (temporary state)
6362
- /\b(for\s+now|at\s+this\s+point|currently)\s*.{0,20}?(not|without|lacks?|missing)/i,
6363
- ];
6364
- for (const pattern of incompletePatterns) {
6365
- if (pattern.test(response)) {
6366
- return true;
6367
- }
6368
- }
6369
- return false;
6370
- }
6371
6194
  /**
6372
6195
  * Extract tool names from a response by looking for tool call patterns
6373
6196
  */