myshell-tools 1.0.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +44 -69
- package/LICENSE +21 -21
- package/README.md +178 -318
- package/dist/cli.d.ts +8 -0
- package/dist/cli.js +130 -0
- package/dist/cli.js.map +1 -0
- package/dist/commands/cost.d.ts +36 -0
- package/dist/commands/cost.js +103 -0
- package/dist/commands/cost.js.map +1 -0
- package/dist/commands/doctor.d.ts +36 -0
- package/dist/commands/doctor.js +115 -0
- package/dist/commands/doctor.js.map +1 -0
- package/dist/commands/login.d.ts +20 -0
- package/dist/commands/login.js +60 -0
- package/dist/commands/login.js.map +1 -0
- package/dist/core/assess.d.ts +25 -0
- package/dist/core/assess.js +142 -0
- package/dist/core/assess.js.map +1 -0
- package/dist/core/classify.d.ts +19 -0
- package/dist/core/classify.js +80 -0
- package/dist/core/classify.js.map +1 -0
- package/dist/core/escalate.d.ts +32 -0
- package/dist/core/escalate.js +57 -0
- package/dist/core/escalate.js.map +1 -0
- package/dist/core/index.d.ts +13 -0
- package/dist/core/index.js +12 -0
- package/dist/core/index.js.map +1 -0
- package/dist/core/orchestrate.d.ts +42 -0
- package/dist/core/orchestrate.js +439 -0
- package/dist/core/orchestrate.js.map +1 -0
- package/dist/core/policy.d.ts +9 -0
- package/dist/core/policy.js +27 -0
- package/dist/core/policy.js.map +1 -0
- package/dist/core/prompt.d.ts +26 -0
- package/dist/core/prompt.js +125 -0
- package/dist/core/prompt.js.map +1 -0
- package/dist/core/review.d.ts +46 -0
- package/dist/core/review.js +148 -0
- package/dist/core/review.js.map +1 -0
- package/dist/core/route.d.ts +28 -0
- package/dist/core/route.js +52 -0
- package/dist/core/route.js.map +1 -0
- package/dist/core/types.d.ts +141 -0
- package/dist/core/types.js +14 -0
- package/dist/core/types.js.map +1 -0
- package/dist/infra/atomic.d.ts +53 -0
- package/dist/infra/atomic.js +171 -0
- package/dist/infra/atomic.js.map +1 -0
- package/dist/infra/clock.d.ts +9 -0
- package/dist/infra/clock.js +15 -0
- package/dist/infra/clock.js.map +1 -0
- package/dist/infra/index.d.ts +9 -0
- package/dist/infra/index.js +7 -0
- package/dist/infra/index.js.map +1 -0
- package/dist/infra/ledger.d.ts +49 -0
- package/dist/infra/ledger.js +90 -0
- package/dist/infra/ledger.js.map +1 -0
- package/dist/infra/paths.d.ts +28 -0
- package/dist/infra/paths.js +38 -0
- package/dist/infra/paths.js.map +1 -0
- package/dist/infra/pricing.d.ts +47 -0
- package/dist/infra/pricing.js +151 -0
- package/dist/infra/pricing.js.map +1 -0
- package/dist/infra/session.d.ts +28 -0
- package/dist/infra/session.js +61 -0
- package/dist/infra/session.js.map +1 -0
- package/dist/interface/render.d.ts +27 -0
- package/dist/interface/render.js +134 -0
- package/dist/interface/render.js.map +1 -0
- package/dist/interface/repl.d.ts +23 -0
- package/dist/interface/repl.js +90 -0
- package/dist/interface/repl.js.map +1 -0
- package/dist/interface/run.d.ts +20 -0
- package/dist/interface/run.js +31 -0
- package/dist/interface/run.js.map +1 -0
- package/dist/providers/claude-parse.d.ts +24 -0
- package/dist/providers/claude-parse.js +113 -0
- package/dist/providers/claude-parse.js.map +1 -0
- package/dist/providers/claude.d.ts +45 -0
- package/dist/providers/claude.js +122 -0
- package/dist/providers/claude.js.map +1 -0
- package/dist/providers/codex-parse.d.ts +32 -0
- package/dist/providers/codex-parse.js +145 -0
- package/dist/providers/codex-parse.js.map +1 -0
- package/dist/providers/codex.d.ts +44 -0
- package/dist/providers/codex.js +124 -0
- package/dist/providers/codex.js.map +1 -0
- package/dist/providers/detect.d.ts +49 -0
- package/dist/providers/detect.js +125 -0
- package/dist/providers/detect.js.map +1 -0
- package/dist/providers/errors.d.ts +49 -0
- package/dist/providers/errors.js +189 -0
- package/dist/providers/errors.js.map +1 -0
- package/dist/providers/index.d.ts +9 -0
- package/dist/providers/index.js +7 -0
- package/dist/providers/index.js.map +1 -0
- package/dist/providers/port.d.ts +74 -0
- package/dist/providers/port.js +16 -0
- package/dist/providers/port.js.map +1 -0
- package/dist/providers/registry.d.ts +21 -0
- package/dist/providers/registry.js +34 -0
- package/dist/providers/registry.js.map +1 -0
- package/dist/ui/banner.d.ts +19 -0
- package/dist/ui/banner.js +32 -0
- package/dist/ui/banner.js.map +1 -0
- package/dist/ui/spinner.d.ts +27 -0
- package/dist/ui/spinner.js +67 -0
- package/dist/ui/spinner.js.map +1 -0
- package/dist/ui/theme.d.ts +32 -0
- package/dist/ui/theme.js +56 -0
- package/dist/ui/theme.js.map +1 -0
- package/package.json +55 -49
- package/data/orchestrator.json +0 -113
- package/src/auth/recovery.mjs +0 -328
- package/src/auth/refresh.mjs +0 -373
- package/src/chef.mjs +0 -348
- package/src/cli/doctor.mjs +0 -568
- package/src/cli/reset.mjs +0 -447
- package/src/cli/status.mjs +0 -379
- package/src/cli.mjs +0 -429
- package/src/commands/doctor.mjs +0 -375
- package/src/commands/help.mjs +0 -324
- package/src/commands/status.mjs +0 -331
- package/src/monitor/health.mjs +0 -486
- package/src/monitor/performance.mjs +0 -442
- package/src/monitor/report.mjs +0 -535
- package/src/orchestrator/classify.mjs +0 -391
- package/src/orchestrator/confidence.mjs +0 -151
- package/src/orchestrator/handoffs.mjs +0 -231
- package/src/orchestrator/review.mjs +0 -222
- package/src/providers/balance.mjs +0 -201
- package/src/providers/claude.mjs +0 -236
- package/src/providers/codex.mjs +0 -255
- package/src/providers/detect.mjs +0 -185
- package/src/providers/errors.mjs +0 -373
- package/src/providers/select.mjs +0 -162
- package/src/repl-enhanced.mjs +0 -417
- package/src/repl.mjs +0 -321
- package/src/state/archive.mjs +0 -366
- package/src/state/atomic.mjs +0 -116
- package/src/state/cleanup.mjs +0 -440
- package/src/state/recovery.mjs +0 -461
- package/src/state/session.mjs +0 -147
- package/src/ui/errors.mjs +0 -456
- package/src/ui/formatter.mjs +0 -327
- package/src/ui/icons.mjs +0 -318
- package/src/ui/progress.mjs +0 -468
- package/templates/prompts/confidence-format.txt +0 -14
- package/templates/prompts/ic-with-feedback.txt +0 -41
- package/templates/prompts/ic.txt +0 -13
- package/templates/prompts/manager-review.txt +0 -40
- package/templates/prompts/manager.txt +0 -14
- package/templates/prompts/worker.txt +0 -12
|
@@ -1,231 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* handoffs.mjs — Comprehensive handoff logging and audit trail
|
|
3
|
-
*/
|
|
4
|
-
|
|
5
|
-
import { existsSync, mkdirSync, appendFileSync, readFileSync } from 'fs';
|
|
6
|
-
import { join } from 'path';
|
|
7
|
-
|
|
8
|
-
/**
|
|
9
|
-
* Log a handoff operation to the audit trail
|
|
10
|
-
*/
|
|
11
|
-
export function logHandoff(operation, fromTier, toTier, metadata = {}) {
|
|
12
|
-
const handoffDir = getCortexDir();
|
|
13
|
-
ensureHandoffDir(handoffDir);
|
|
14
|
-
|
|
15
|
-
const entry = {
|
|
16
|
-
ts: Date.now(),
|
|
17
|
-
timestamp: new Date().toISOString(),
|
|
18
|
-
plan_id: metadata.planId || generatePlanId(),
|
|
19
|
-
op: operation,
|
|
20
|
-
from: fromTier,
|
|
21
|
-
to: toTier,
|
|
22
|
-
reason: metadata.reason || 'unknown',
|
|
23
|
-
confidence_in: metadata.confidenceIn || null,
|
|
24
|
-
confidence_out: metadata.confidenceOut || null,
|
|
25
|
-
duration_ms: metadata.durationMs || null,
|
|
26
|
-
provider_from: metadata.providerFrom || null,
|
|
27
|
-
provider_to: metadata.providerTo || null,
|
|
28
|
-
attempt: metadata.attempt || 1,
|
|
29
|
-
session_id: metadata.sessionId || 'unknown',
|
|
30
|
-
notes: metadata.notes || null
|
|
31
|
-
};
|
|
32
|
-
|
|
33
|
-
const logPath = join(handoffDir, 'handoffs.jsonl');
|
|
34
|
-
|
|
35
|
-
try {
|
|
36
|
-
appendFileSync(logPath, JSON.stringify(entry) + '\n');
|
|
37
|
-
} catch (error) {
|
|
38
|
-
console.warn(`Failed to log handoff: ${error.message}`);
|
|
39
|
-
}
|
|
40
|
-
|
|
41
|
-
return entry;
|
|
42
|
-
}
|
|
43
|
-
|
|
44
|
-
/**
|
|
45
|
-
* Log escalation from one tier to another
|
|
46
|
-
*/
|
|
47
|
-
export function logEscalation(fromTier, toTier, reason, metadata = {}) {
|
|
48
|
-
return logHandoff('escalate_up', fromTier, toTier, {
|
|
49
|
-
reason,
|
|
50
|
-
...metadata
|
|
51
|
-
});
|
|
52
|
-
}
|
|
53
|
-
|
|
54
|
-
/**
|
|
55
|
-
* Log delegation down to a lower tier
|
|
56
|
-
*/
|
|
57
|
-
export function logDelegation(fromTier, toTier, reason, metadata = {}) {
|
|
58
|
-
return logHandoff('delegate_down', fromTier, toTier, {
|
|
59
|
-
reason,
|
|
60
|
-
...metadata
|
|
61
|
-
});
|
|
62
|
-
}
|
|
63
|
-
|
|
64
|
-
/**
|
|
65
|
-
* Log manager bounce back to IC with feedback
|
|
66
|
-
*/
|
|
67
|
-
export function logBounce(fromTier, toTier, feedback, attempt, metadata = {}) {
|
|
68
|
-
return logHandoff('bounce_down', fromTier, toTier, {
|
|
69
|
-
reason: 'manager review failure',
|
|
70
|
-
notes: feedback,
|
|
71
|
-
attempt,
|
|
72
|
-
...metadata
|
|
73
|
-
});
|
|
74
|
-
}
|
|
75
|
-
|
|
76
|
-
/**
|
|
77
|
-
* Load recent handoffs for analysis and load balancing
|
|
78
|
-
*/
|
|
79
|
-
export function getRecentHandoffs(timeWindowHours = 1, sessionId = null) {
|
|
80
|
-
const handoffDir = getCortexDir();
|
|
81
|
-
const logPath = join(handoffDir, 'handoffs.jsonl');
|
|
82
|
-
|
|
83
|
-
if (!existsSync(logPath)) {
|
|
84
|
-
return [];
|
|
85
|
-
}
|
|
86
|
-
|
|
87
|
-
const cutoffTime = Date.now() - (timeWindowHours * 60 * 60 * 1000);
|
|
88
|
-
const handoffs = [];
|
|
89
|
-
|
|
90
|
-
try {
|
|
91
|
-
const content = readFileSync(logPath, 'utf8');
|
|
92
|
-
const lines = content.trim().split('\n').filter(line => line.trim());
|
|
93
|
-
|
|
94
|
-
for (const line of lines) {
|
|
95
|
-
try {
|
|
96
|
-
const entry = JSON.parse(line);
|
|
97
|
-
|
|
98
|
-
// Filter by time window
|
|
99
|
-
if (entry.ts < cutoffTime) continue;
|
|
100
|
-
|
|
101
|
-
// Filter by session if specified
|
|
102
|
-
if (sessionId && entry.session_id !== sessionId) continue;
|
|
103
|
-
|
|
104
|
-
handoffs.push(entry);
|
|
105
|
-
} catch (error) {
|
|
106
|
-
// Skip malformed lines
|
|
107
|
-
continue;
|
|
108
|
-
}
|
|
109
|
-
}
|
|
110
|
-
} catch (error) {
|
|
111
|
-
console.warn(`Failed to read handoff log: ${error.message}`);
|
|
112
|
-
return [];
|
|
113
|
-
}
|
|
114
|
-
|
|
115
|
-
return handoffs.sort((a, b) => b.ts - a.ts); // Most recent first
|
|
116
|
-
}
|
|
117
|
-
|
|
118
|
-
/**
|
|
119
|
-
* Get handoff statistics for load balancing
|
|
120
|
-
*/
|
|
121
|
-
export function getHandoffStats(timeWindowHours = 1) {
|
|
122
|
-
const handoffs = getRecentHandoffs(timeWindowHours);
|
|
123
|
-
|
|
124
|
-
const stats = {
|
|
125
|
-
total: handoffs.length,
|
|
126
|
-
by_operation: {},
|
|
127
|
-
by_provider: {},
|
|
128
|
-
by_tier: {},
|
|
129
|
-
avg_duration_ms: 0,
|
|
130
|
-
success_rate: 0
|
|
131
|
-
};
|
|
132
|
-
|
|
133
|
-
let totalDuration = 0;
|
|
134
|
-
let durationsCount = 0;
|
|
135
|
-
let successful = 0;
|
|
136
|
-
|
|
137
|
-
for (const handoff of handoffs) {
|
|
138
|
-
// Count by operation
|
|
139
|
-
stats.by_operation[handoff.op] = (stats.by_operation[handoff.op] || 0) + 1;
|
|
140
|
-
|
|
141
|
-
// Count by provider (from)
|
|
142
|
-
if (handoff.provider_from) {
|
|
143
|
-
stats.by_provider[handoff.provider_from] = (stats.by_provider[handoff.provider_from] || 0) + 1;
|
|
144
|
-
}
|
|
145
|
-
|
|
146
|
-
// Count by tier
|
|
147
|
-
stats.by_tier[handoff.from] = (stats.by_tier[handoff.from] || 0) + 1;
|
|
148
|
-
|
|
149
|
-
// Track durations
|
|
150
|
-
if (handoff.duration_ms) {
|
|
151
|
-
totalDuration += handoff.duration_ms;
|
|
152
|
-
durationsCount++;
|
|
153
|
-
}
|
|
154
|
-
|
|
155
|
-
// Track success (escalations are "success" for routing, bounces are not)
|
|
156
|
-
if (handoff.op !== 'bounce_down') {
|
|
157
|
-
successful++;
|
|
158
|
-
}
|
|
159
|
-
}
|
|
160
|
-
|
|
161
|
-
stats.avg_duration_ms = durationsCount > 0 ? Math.round(totalDuration / durationsCount) : 0;
|
|
162
|
-
stats.success_rate = stats.total > 0 ? successful / stats.total : 0;
|
|
163
|
-
|
|
164
|
-
return stats;
|
|
165
|
-
}
|
|
166
|
-
|
|
167
|
-
/**
|
|
168
|
-
* Check for failure loops similar to dual-brain pattern
|
|
169
|
-
*/
|
|
170
|
-
export function checkFailureLoop(taskHash, timeWindowHours = 2) {
|
|
171
|
-
const handoffs = getRecentHandoffs(timeWindowHours);
|
|
172
|
-
|
|
173
|
-
// Count bounces and escalations for this task
|
|
174
|
-
const taskHandoffs = handoffs.filter(h =>
|
|
175
|
-
h.plan_id === taskHash ||
|
|
176
|
-
h.notes?.includes(taskHash) ||
|
|
177
|
-
h.reason?.includes(taskHash)
|
|
178
|
-
);
|
|
179
|
-
|
|
180
|
-
const bounces = taskHandoffs.filter(h => h.op === 'bounce_down');
|
|
181
|
-
const escalations = taskHandoffs.filter(h => h.op === 'escalate_up');
|
|
182
|
-
|
|
183
|
-
// Apply decay weight to recent failures (similar to dual-brain)
|
|
184
|
-
const now = Date.now();
|
|
185
|
-
let weightedScore = 0;
|
|
186
|
-
|
|
187
|
-
for (const handoff of bounces) {
|
|
188
|
-
const ageMs = now - handoff.ts;
|
|
189
|
-
const ageMin = ageMs / (60 * 1000);
|
|
190
|
-
|
|
191
|
-
let weight = 1.0;
|
|
192
|
-
if (ageMin > 30) weight = 0.5;
|
|
193
|
-
if (ageMin > 60) weight = 0.25;
|
|
194
|
-
|
|
195
|
-
weightedScore += weight;
|
|
196
|
-
}
|
|
197
|
-
|
|
198
|
-
const isLoop = weightedScore >= 2.0;
|
|
199
|
-
|
|
200
|
-
return {
|
|
201
|
-
isLoop,
|
|
202
|
-
bounceCount: bounces.length,
|
|
203
|
-
escalationCount: escalations.length,
|
|
204
|
-
weightedScore,
|
|
205
|
-
suggestion: isLoop ? 'escalate_to_manager' : null,
|
|
206
|
-
recentHandoffs: taskHandoffs.slice(0, 5) // Last 5 for context
|
|
207
|
-
};
|
|
208
|
-
}
|
|
209
|
-
|
|
210
|
-
/**
|
|
211
|
-
* Generate a plan/session ID for tracking related handoffs
|
|
212
|
-
*/
|
|
213
|
-
function generatePlanId() {
|
|
214
|
-
return `p_${Date.now()}_${Math.random().toString(36).substr(2, 6)}`;
|
|
215
|
-
}
|
|
216
|
-
|
|
217
|
-
/**
|
|
218
|
-
* Get or create .cortex directory
|
|
219
|
-
*/
|
|
220
|
-
function getCortexDir(cwd = process.cwd()) {
|
|
221
|
-
return join(cwd, '.cortex');
|
|
222
|
-
}
|
|
223
|
-
|
|
224
|
-
/**
|
|
225
|
-
* Ensure handoff directory exists
|
|
226
|
-
*/
|
|
227
|
-
function ensureHandoffDir(cortexDir) {
|
|
228
|
-
if (!existsSync(cortexDir)) {
|
|
229
|
-
mkdirSync(cortexDir, { recursive: true });
|
|
230
|
-
}
|
|
231
|
-
}
|
|
@@ -1,222 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* review.mjs — Manager review workflow and bounce-down pattern
|
|
3
|
-
*/
|
|
4
|
-
|
|
5
|
-
import { logBounce, logEscalation, logHandoff } from './handoffs.mjs';
|
|
6
|
-
import { parseConfidence } from './confidence.mjs';
|
|
7
|
-
|
|
8
|
-
/**
|
|
9
|
-
* Run manager review of IC work and decide action
|
|
10
|
-
* Implements the BOUNCE DOWN workflow from dual-brain
|
|
11
|
-
*/
|
|
12
|
-
export async function runManagerReview(task, icResult, context, runTierFn) {
|
|
13
|
-
console.log(` 🔍 MANAGER REVIEW: Evaluating IC work...`);
|
|
14
|
-
|
|
15
|
-
const reviewPrompt = buildManagerReviewPrompt(task, icResult, context);
|
|
16
|
-
|
|
17
|
-
// Execute manager review
|
|
18
|
-
const managerResult = await runTierFn('manager', reviewPrompt, {
|
|
19
|
-
...context,
|
|
20
|
-
operation: 'review',
|
|
21
|
-
reviewTarget: 'ic_output'
|
|
22
|
-
});
|
|
23
|
-
|
|
24
|
-
if (!managerResult.success) {
|
|
25
|
-
console.log(` ❌ Manager review failed: ${managerResult.error}`);
|
|
26
|
-
return {
|
|
27
|
-
verdict: 'error',
|
|
28
|
-
action: 'approve', // Default to approving if review fails
|
|
29
|
-
notes: `Manager review failed: ${managerResult.error}`,
|
|
30
|
-
managerResult
|
|
31
|
-
};
|
|
32
|
-
}
|
|
33
|
-
|
|
34
|
-
// Parse manager decision
|
|
35
|
-
const decision = parseManagerReviewDecision(managerResult.output);
|
|
36
|
-
|
|
37
|
-
console.log(` 📋 Manager decision: ${decision.verdict.toUpperCase()}`);
|
|
38
|
-
if (decision.notes) {
|
|
39
|
-
console.log(` 📝 Notes: ${decision.notes}`);
|
|
40
|
-
}
|
|
41
|
-
|
|
42
|
-
// Log the review handoff
|
|
43
|
-
logHandoff('manager_review', 'ic', 'manager', {
|
|
44
|
-
reason: 'post-execution review',
|
|
45
|
-
confidenceIn: icResult.confidence,
|
|
46
|
-
confidenceOut: managerResult.confidence,
|
|
47
|
-
durationMs: managerResult.durationMs,
|
|
48
|
-
sessionId: context.sessionId,
|
|
49
|
-
notes: decision.notes,
|
|
50
|
-
verdict: decision.verdict
|
|
51
|
-
});
|
|
52
|
-
|
|
53
|
-
return {
|
|
54
|
-
verdict: decision.verdict,
|
|
55
|
-
action: decision.action,
|
|
56
|
-
notes: decision.notes,
|
|
57
|
-
managerResult,
|
|
58
|
-
icResult
|
|
59
|
-
};
|
|
60
|
-
}
|
|
61
|
-
|
|
62
|
-
/**
|
|
63
|
-
* Build the manager review prompt
|
|
64
|
-
*/
|
|
65
|
-
function buildManagerReviewPrompt(originalTask, icResult, context) {
|
|
66
|
-
const prompt = `You are a MANAGER reviewing work completed by an IC (Individual Contributor).
|
|
67
|
-
|
|
68
|
-
ORIGINAL TASK: ${originalTask}
|
|
69
|
-
|
|
70
|
-
IC'S WORK RESULT:
|
|
71
|
-
─────────────────
|
|
72
|
-
${icResult.output}
|
|
73
|
-
─────────────────
|
|
74
|
-
IC Confidence: ${icResult.confidence || 'Not reported'}
|
|
75
|
-
Duration: ${icResult.durationMs || 'Unknown'}ms
|
|
76
|
-
Model: ${icResult.selectedModel?.model || 'Unknown'}
|
|
77
|
-
|
|
78
|
-
Your job is to review this work and decide:
|
|
79
|
-
|
|
80
|
-
REVIEW CRITERIA:
|
|
81
|
-
1. CORRECTNESS: Did the IC actually complete the task as requested?
|
|
82
|
-
2. QUALITY: Is the implementation solid, or are there obvious issues?
|
|
83
|
-
3. SECURITY: Any security concerns or risky patterns?
|
|
84
|
-
4. COMPLETENESS: Are there missing pieces or edge cases not handled?
|
|
85
|
-
5. EDGE CASES: What could break under unusual conditions?
|
|
86
|
-
|
|
87
|
-
POSSIBLE VERDICTS:
|
|
88
|
-
- APPROVE: Work is good, ship it
|
|
89
|
-
- BOUNCE: IC should fix issues and retry (provide specific feedback)
|
|
90
|
-
- ESCALATE: Manager should take over this task directly
|
|
91
|
-
- REFRAME: Task needs to be broken down or approached differently
|
|
92
|
-
|
|
93
|
-
Your response MUST end with structured output:
|
|
94
|
-
{"verdict": "approve|bounce|escalate|reframe", "notes": "specific feedback", "confidence": 0.0-1.0, "risk_level": "low|medium|high|critical"}
|
|
95
|
-
|
|
96
|
-
If bouncing, be specific about what needs to be fixed.
|
|
97
|
-
If approving, note any minor concerns for future reference.`;
|
|
98
|
-
|
|
99
|
-
// Add context about previous attempts if this is a retry
|
|
100
|
-
if (context.attempt && context.attempt > 1) {
|
|
101
|
-
prompt += `\n\nNOTE: This is attempt #${context.attempt}. Previous manager feedback was addressed.`;
|
|
102
|
-
}
|
|
103
|
-
|
|
104
|
-
// Add manager notes from previous bounces
|
|
105
|
-
if (context.managerNotes) {
|
|
106
|
-
prompt += `\n\nPREVIOUS MANAGER FEEDBACK:\n${context.managerNotes}`;
|
|
107
|
-
}
|
|
108
|
-
|
|
109
|
-
return prompt;
|
|
110
|
-
}
|
|
111
|
-
|
|
112
|
-
/**
|
|
113
|
-
* Parse manager's review decision from their response
|
|
114
|
-
*/
|
|
115
|
-
function parseManagerReviewDecision(managerOutput) {
|
|
116
|
-
const confidence = parseConfidence(managerOutput);
|
|
117
|
-
|
|
118
|
-
// Try to extract structured decision
|
|
119
|
-
const structuredMatch = managerOutput.match(/\{[^{}]*"verdict"[^{}]*\}/);
|
|
120
|
-
|
|
121
|
-
if (structuredMatch) {
|
|
122
|
-
try {
|
|
123
|
-
const data = JSON.parse(structuredMatch[0]);
|
|
124
|
-
return {
|
|
125
|
-
verdict: data.verdict || 'approve',
|
|
126
|
-
notes: data.notes || '',
|
|
127
|
-
confidence: data.confidence || confidence.confidence,
|
|
128
|
-
riskLevel: data.risk_level || 'medium',
|
|
129
|
-
action: mapVerdictToAction(data.verdict)
|
|
130
|
-
};
|
|
131
|
-
} catch (error) {
|
|
132
|
-
console.warn(`Failed to parse manager decision JSON: ${error.message}`);
|
|
133
|
-
}
|
|
134
|
-
}
|
|
135
|
-
|
|
136
|
-
// Fallback: try to parse from text patterns
|
|
137
|
-
const text = managerOutput.toLowerCase();
|
|
138
|
-
|
|
139
|
-
let verdict = 'approve'; // default
|
|
140
|
-
let action = 'approve';
|
|
141
|
-
|
|
142
|
-
if (text.includes('bounce') || text.includes('retry') || text.includes('fix')) {
|
|
143
|
-
verdict = 'bounce';
|
|
144
|
-
action = 'bounce';
|
|
145
|
-
} else if (text.includes('escalate') || text.includes('take over') || text.includes('manager should')) {
|
|
146
|
-
verdict = 'escalate';
|
|
147
|
-
action = 'escalate';
|
|
148
|
-
} else if (text.includes('reframe') || text.includes('break down') || text.includes('different approach')) {
|
|
149
|
-
verdict = 'reframe';
|
|
150
|
-
action = 'reframe';
|
|
151
|
-
} else if (text.includes('approve') || text.includes('lgtm') || text.includes('ship it')) {
|
|
152
|
-
verdict = 'approve';
|
|
153
|
-
action = 'approve';
|
|
154
|
-
}
|
|
155
|
-
|
|
156
|
-
// Extract notes from the content (everything before structured output)
|
|
157
|
-
const notes = structuredMatch
|
|
158
|
-
? managerOutput.substring(0, managerOutput.indexOf(structuredMatch[0])).trim()
|
|
159
|
-
: managerOutput.substring(0, 300).trim(); // First 300 chars as fallback
|
|
160
|
-
|
|
161
|
-
return {
|
|
162
|
-
verdict,
|
|
163
|
-
action,
|
|
164
|
-
notes: notes || 'No specific notes provided',
|
|
165
|
-
confidence: confidence.confidence,
|
|
166
|
-
riskLevel: 'medium' // default when not specified
|
|
167
|
-
};
|
|
168
|
-
}
|
|
169
|
-
|
|
170
|
-
/**
|
|
171
|
-
* Map verdict to specific action
|
|
172
|
-
*/
|
|
173
|
-
function mapVerdictToAction(verdict) {
|
|
174
|
-
const actionMap = {
|
|
175
|
-
approve: 'approve',
|
|
176
|
-
bounce: 'bounce',
|
|
177
|
-
escalate: 'escalate',
|
|
178
|
-
reframe: 'reframe'
|
|
179
|
-
};
|
|
180
|
-
|
|
181
|
-
return actionMap[verdict] || 'approve';
|
|
182
|
-
}
|
|
183
|
-
|
|
184
|
-
/**
|
|
185
|
-
* Check if task qualifies for automatic manager review
|
|
186
|
-
* Based on risk level and tier routing rules
|
|
187
|
-
*/
|
|
188
|
-
export function shouldTriggerManagerReview(task, classification, icResult) {
|
|
189
|
-
// Always review critical risk tasks
|
|
190
|
-
if (classification.risk === 'critical') {
|
|
191
|
-
return { required: true, reason: 'critical risk level' };
|
|
192
|
-
}
|
|
193
|
-
|
|
194
|
-
// Review high risk tasks with medium or low confidence
|
|
195
|
-
if (classification.risk === 'high' && icResult.confidence < 0.7) {
|
|
196
|
-
return { required: true, reason: 'high risk with low confidence' };
|
|
197
|
-
}
|
|
198
|
-
|
|
199
|
-
// Review if IC explicitly requested it
|
|
200
|
-
if (icResult.escalate === true) {
|
|
201
|
-
return { required: true, reason: 'IC requested escalation' };
|
|
202
|
-
}
|
|
203
|
-
|
|
204
|
-
// Review auth/billing/security related changes
|
|
205
|
-
if (classification.paths.some(path =>
|
|
206
|
-
path.includes('auth') ||
|
|
207
|
-
path.includes('credential') ||
|
|
208
|
-
path.includes('billing') ||
|
|
209
|
-
path.includes('payment')
|
|
210
|
-
)) {
|
|
211
|
-
return { required: true, reason: 'sensitive file paths' };
|
|
212
|
-
}
|
|
213
|
-
|
|
214
|
-
// Review if no tests were added for substantial changes
|
|
215
|
-
if (!icResult.output.toLowerCase().includes('test') &&
|
|
216
|
-
icResult.output.length > 500 &&
|
|
217
|
-
classification.risk !== 'low') {
|
|
218
|
-
return { required: true, reason: 'substantial change without tests' };
|
|
219
|
-
}
|
|
220
|
-
|
|
221
|
-
return { required: false, reason: 'standard IC work' };
|
|
222
|
-
}
|
|
@@ -1,201 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* balance.mjs — Provider load balancing and health monitoring
|
|
3
|
-
*/
|
|
4
|
-
|
|
5
|
-
import { getRecentHandoffs, getHandoffStats } from '../orchestrator/handoffs.mjs';
|
|
6
|
-
|
|
7
|
-
/**
|
|
8
|
-
* Balance load across providers by tracking recent usage
|
|
9
|
-
*/
|
|
10
|
-
export function balanceProviderLoad(availableModels, tier, context = {}) {
|
|
11
|
-
const tierModels = availableModels[tier] || [];
|
|
12
|
-
if (tierModels.length <= 1) {
|
|
13
|
-
return tierModels[0] || null;
|
|
14
|
-
}
|
|
15
|
-
|
|
16
|
-
const { sessionId } = context;
|
|
17
|
-
const timeWindow = 1; // 1 hour window
|
|
18
|
-
|
|
19
|
-
// Get recent usage per provider
|
|
20
|
-
const usage = getProviderUsage(sessionId, timeWindow);
|
|
21
|
-
const health = getProviderHealthStatus();
|
|
22
|
-
|
|
23
|
-
// Score each model based on usage and health
|
|
24
|
-
const scoredModels = tierModels.map(model => {
|
|
25
|
-
const providerUsage = usage[model.provider] || 0;
|
|
26
|
-
const providerHealth = health[model.provider] || { score: 1.0 };
|
|
27
|
-
|
|
28
|
-
// Lower usage = higher score (load balancing)
|
|
29
|
-
const usageScore = Math.max(0, 1 - (providerUsage / 10)); // Penalize after 10 uses
|
|
30
|
-
|
|
31
|
-
// Health score (0.0 = unhealthy, 1.0 = healthy)
|
|
32
|
-
const healthScore = providerHealth.score;
|
|
33
|
-
|
|
34
|
-
// Combined score
|
|
35
|
-
const totalScore = (usageScore * 0.6) + (healthScore * 0.4);
|
|
36
|
-
|
|
37
|
-
return {
|
|
38
|
-
...model,
|
|
39
|
-
score: totalScore,
|
|
40
|
-
usage: providerUsage,
|
|
41
|
-
health: providerHealth,
|
|
42
|
-
reasoning: `usage: ${providerUsage}, health: ${healthScore.toFixed(2)}, score: ${totalScore.toFixed(2)}`
|
|
43
|
-
};
|
|
44
|
-
});
|
|
45
|
-
|
|
46
|
-
// Sort by score and return best
|
|
47
|
-
scoredModels.sort((a, b) => b.score - a.score);
|
|
48
|
-
return scoredModels[0];
|
|
49
|
-
}
|
|
50
|
-
|
|
51
|
-
/**
|
|
52
|
-
* Get provider usage statistics for load balancing
|
|
53
|
-
*/
|
|
54
|
-
function getProviderUsage(sessionId, timeWindowHours) {
|
|
55
|
-
const handoffs = getRecentHandoffs(timeWindowHours, sessionId);
|
|
56
|
-
|
|
57
|
-
const usage = {};
|
|
58
|
-
|
|
59
|
-
for (const handoff of handoffs) {
|
|
60
|
-
// Count operations initiated by each provider
|
|
61
|
-
if (handoff.provider_from) {
|
|
62
|
-
usage[handoff.provider_from] = (usage[handoff.provider_from] || 0) + 1;
|
|
63
|
-
}
|
|
64
|
-
}
|
|
65
|
-
|
|
66
|
-
return usage;
|
|
67
|
-
}
|
|
68
|
-
|
|
69
|
-
/**
|
|
70
|
-
* Get provider health status based on recent failures
|
|
71
|
-
*/
|
|
72
|
-
function getProviderHealthStatus() {
|
|
73
|
-
const handoffs = getRecentHandoffs(0.5); // Last 30 minutes
|
|
74
|
-
const health = {};
|
|
75
|
-
|
|
76
|
-
// Track failures per provider
|
|
77
|
-
const failures = {};
|
|
78
|
-
const total = {};
|
|
79
|
-
|
|
80
|
-
for (const handoff of handoffs) {
|
|
81
|
-
if (!handoff.provider_from) continue;
|
|
82
|
-
|
|
83
|
-
const provider = handoff.provider_from;
|
|
84
|
-
total[provider] = (total[provider] || 0) + 1;
|
|
85
|
-
|
|
86
|
-
// Count escalations due to failures as failures
|
|
87
|
-
if (handoff.op === 'escalate_up' && (
|
|
88
|
-
handoff.reason.includes('failure') ||
|
|
89
|
-
handoff.reason.includes('error') ||
|
|
90
|
-
handoff.reason.includes('timeout')
|
|
91
|
-
)) {
|
|
92
|
-
failures[provider] = (failures[provider] || 0) + 1;
|
|
93
|
-
}
|
|
94
|
-
}
|
|
95
|
-
|
|
96
|
-
// Calculate health scores
|
|
97
|
-
for (const provider of ['claude', 'codex']) {
|
|
98
|
-
const totalOps = total[provider] || 0;
|
|
99
|
-
const failedOps = failures[provider] || 0;
|
|
100
|
-
|
|
101
|
-
let score = 1.0; // Default healthy
|
|
102
|
-
|
|
103
|
-
if (totalOps > 0) {
|
|
104
|
-
const failureRate = failedOps / totalOps;
|
|
105
|
-
score = Math.max(0, 1 - (failureRate * 2)); // Penalize failures heavily
|
|
106
|
-
}
|
|
107
|
-
|
|
108
|
-
// Severely degraded if no operations completed successfully recently
|
|
109
|
-
if (totalOps === 0 && failedOps > 0) {
|
|
110
|
-
score = 0.1;
|
|
111
|
-
}
|
|
112
|
-
|
|
113
|
-
health[provider] = {
|
|
114
|
-
score,
|
|
115
|
-
failures: failedOps,
|
|
116
|
-
total: totalOps,
|
|
117
|
-
failure_rate: totalOps > 0 ? (failedOps / totalOps) : 0,
|
|
118
|
-
status: score > 0.8 ? 'healthy' : score > 0.5 ? 'degraded' : 'unhealthy'
|
|
119
|
-
};
|
|
120
|
-
}
|
|
121
|
-
|
|
122
|
-
return health;
|
|
123
|
-
}
|
|
124
|
-
|
|
125
|
-
/**
|
|
126
|
-
* Check if we should redistribute load
|
|
127
|
-
*/
|
|
128
|
-
export function shouldRedistributeLoad(sessionId, threshold = 5) {
|
|
129
|
-
const usage = getProviderUsage(sessionId, 1);
|
|
130
|
-
const providers = Object.keys(usage);
|
|
131
|
-
|
|
132
|
-
if (providers.length < 2) return false;
|
|
133
|
-
|
|
134
|
-
const usageValues = Object.values(usage);
|
|
135
|
-
const maxUsage = Math.max(...usageValues);
|
|
136
|
-
const minUsage = Math.min(...usageValues);
|
|
137
|
-
|
|
138
|
-
// Redistribute if difference is greater than threshold
|
|
139
|
-
return (maxUsage - minUsage) > threshold;
|
|
140
|
-
}
|
|
141
|
-
|
|
142
|
-
/**
|
|
143
|
-
* Get load balancing recommendations
|
|
144
|
-
*/
|
|
145
|
-
export function getLoadBalanceRecommendations(sessionId) {
|
|
146
|
-
const usage = getProviderUsage(sessionId, 1);
|
|
147
|
-
const health = getProviderHealthStatus();
|
|
148
|
-
const shouldRedist = shouldRedistributeLoad(sessionId);
|
|
149
|
-
|
|
150
|
-
const recommendations = [];
|
|
151
|
-
|
|
152
|
-
if (shouldRedist) {
|
|
153
|
-
// Find overused and underused providers
|
|
154
|
-
const sorted = Object.entries(usage).sort(([,a], [,b]) => b - a);
|
|
155
|
-
const [overused] = sorted[0] || [];
|
|
156
|
-
const [underused] = sorted[sorted.length - 1] || [];
|
|
157
|
-
|
|
158
|
-
if (overused && underused && health[underused]?.status === 'healthy') {
|
|
159
|
-
recommendations.push({
|
|
160
|
-
type: 'redistribute',
|
|
161
|
-
from: overused,
|
|
162
|
-
to: underused,
|
|
163
|
-
reason: `${overused} is overloaded (${usage[overused]} ops), prefer ${underused} (${usage[underused]} ops)`
|
|
164
|
-
});
|
|
165
|
-
}
|
|
166
|
-
}
|
|
167
|
-
|
|
168
|
-
// Health-based recommendations
|
|
169
|
-
for (const [provider, healthInfo] of Object.entries(health)) {
|
|
170
|
-
if (healthInfo.status === 'unhealthy') {
|
|
171
|
-
recommendations.push({
|
|
172
|
-
type: 'avoid',
|
|
173
|
-
provider,
|
|
174
|
-
reason: `${provider} is unhealthy (${Math.round(healthInfo.failure_rate * 100)}% failure rate)`
|
|
175
|
-
});
|
|
176
|
-
}
|
|
177
|
-
}
|
|
178
|
-
|
|
179
|
-
return {
|
|
180
|
-
should_redistribute: shouldRedist,
|
|
181
|
-
recommendations,
|
|
182
|
-
current_usage: usage,
|
|
183
|
-
provider_health: health
|
|
184
|
-
};
|
|
185
|
-
}
|
|
186
|
-
|
|
187
|
-
/**
|
|
188
|
-
* Force a provider offline for maintenance
|
|
189
|
-
*/
|
|
190
|
-
export function setProviderMaintenance(provider, offline = true) {
|
|
191
|
-
// This would integrate with a persistent config or state system
|
|
192
|
-
// For now, just log the maintenance status
|
|
193
|
-
console.log(`Provider ${provider} maintenance mode: ${offline ? 'ON' : 'OFF'}`);
|
|
194
|
-
|
|
195
|
-
// Return updated health status
|
|
196
|
-
return {
|
|
197
|
-
provider,
|
|
198
|
-
maintenance: offline,
|
|
199
|
-
timestamp: new Date().toISOString()
|
|
200
|
-
};
|
|
201
|
-
}
|