@geekbeer/minion 2.32.0 → 2.42.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +0 -3
- package/README.md +0 -1
- package/core/api.js +13 -0
- package/core/config.js +50 -5
- package/core/lib/llm-checker.js +9 -16
- package/core/lib/log-manager.js +7 -3
- package/core/lib/platform.js +10 -15
- package/core/lib/revision-watcher.js +252 -0
- package/core/lib/step-poller.js +222 -0
- package/core/lib/strip-ansi.js +18 -0
- package/core/lib/workflow-orchestrator.js +382 -0
- package/core/routes/diagnose.js +296 -0
- package/core/routes/health.js +27 -0
- package/core/routes/routines.js +15 -10
- package/core/routes/skills.js +4 -1
- package/core/routes/workflows.js +49 -2
- package/core/stores/chat-store.js +12 -5
- package/core/stores/execution-store.js +4 -4
- package/core/stores/routine-store.js +7 -7
- package/core/stores/workflow-store.js +5 -6
- package/linux/lib/process-manager.js +14 -0
- package/linux/minion-cli.sh +57 -16
- package/linux/routes/chat.js +182 -20
- package/linux/routes/config.js +8 -12
- package/linux/routine-runner.js +5 -4
- package/linux/server.js +53 -1
- package/linux/workflow-runner.js +25 -61
- package/package.json +1 -1
- package/roles/pm.md +11 -12
- package/win/lib/process-manager.js +15 -0
- package/win/minion-cli.ps1 +79 -17
- package/win/routes/chat.js +178 -14
- package/win/routes/config.js +7 -3
- package/win/routes/directives.js +1 -1
- package/win/routes/terminal.js +19 -0
- package/win/routine-runner.js +5 -3
- package/win/server.js +53 -0
- package/win/terminal-server.js +8 -0
- package/win/workflow-runner.js +32 -44
- package/skills/execution-report/SKILL.md +0 -106
- package/win/lib/llm-checker.js +0 -115
- package/win/lib/log-manager.js +0 -119
|
@@ -0,0 +1,222 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Step Poller
|
|
3
|
+
*
|
|
4
|
+
* Polling daemon that runs on every minion (including PM).
|
|
5
|
+
* Periodically checks HQ for pending workflow steps assigned to this
|
|
6
|
+
* minion's role, then fetches the skill and executes it.
|
|
7
|
+
*
|
|
8
|
+
* This enables the Pull model: minions autonomously pick up work
|
|
9
|
+
* when their turn comes, without needing a PM to push-dispatch.
|
|
10
|
+
* Handles minion absence gracefully — when a minion comes online,
|
|
11
|
+
* it simply picks up any pending steps waiting for its role.
|
|
12
|
+
*
|
|
13
|
+
* Flow per poll cycle:
|
|
14
|
+
* 1. GET /api/minion/pending-steps → list of actionable steps
|
|
15
|
+
* 2. For each step (one at a time):
|
|
16
|
+
* a. POST /api/skills/fetch/:name → deploy skill locally
|
|
17
|
+
* b. POST /api/skills/run → execute in tmux session
|
|
18
|
+
* c. (step-complete is reported by the /api/skills/run post-execution hook)
|
|
19
|
+
*/
|
|
20
|
+
|
|
21
|
+
const { config, isHqConfigured } = require('../config')
|
|
22
|
+
const api = require('../api')
|
|
23
|
+
|
|
24
|
+
// Polling interval: 30 seconds (matches heartbeat frequency)
|
|
25
|
+
const POLL_INTERVAL_MS = 30_000
|
|
26
|
+
|
|
27
|
+
// Prevent concurrent poll cycles from overlapping
|
|
28
|
+
let polling = false
|
|
29
|
+
|
|
30
|
+
// Timer reference for cleanup
|
|
31
|
+
let pollTimer = null
|
|
32
|
+
|
|
33
|
+
// Track currently executing step to avoid double-dispatch
|
|
34
|
+
let activeStepExecutionId = null
|
|
35
|
+
|
|
36
|
+
/**
|
|
37
|
+
* Poll HQ for pending steps and execute them.
|
|
38
|
+
*/
|
|
39
|
+
async function pollOnce() {
|
|
40
|
+
if (!isHqConfigured()) return
|
|
41
|
+
if (polling) return
|
|
42
|
+
|
|
43
|
+
polling = true
|
|
44
|
+
try {
|
|
45
|
+
// 1. Fetch pending steps from HQ
|
|
46
|
+
const data = await api.request('/pending-steps')
|
|
47
|
+
|
|
48
|
+
if (!data.steps || data.steps.length === 0) {
|
|
49
|
+
return
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
console.log(`[StepPoller] Found ${data.steps.length} pending step(s)`)
|
|
53
|
+
|
|
54
|
+
// 2. Process steps one at a time (sequential execution)
|
|
55
|
+
for (const step of data.steps) {
|
|
56
|
+
// Skip if we're already executing this step
|
|
57
|
+
if (activeStepExecutionId === step.step_execution_id) {
|
|
58
|
+
console.log(`[StepPoller] Step ${step.step_execution_id} already in progress, skipping`)
|
|
59
|
+
continue
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
await executeStep(step)
|
|
63
|
+
|
|
64
|
+
// Only execute one step per poll cycle to avoid overloading
|
|
65
|
+
break
|
|
66
|
+
}
|
|
67
|
+
} catch (err) {
|
|
68
|
+
// Don't log network errors at error level — they're expected when HQ is temporarily unreachable
|
|
69
|
+
if (err.message?.includes('fetch failed') || err.message?.includes('ECONNREFUSED')) {
|
|
70
|
+
console.log(`[StepPoller] HQ unreachable, will retry next cycle`)
|
|
71
|
+
} else {
|
|
72
|
+
console.error(`[StepPoller] Poll error: ${err.message}`)
|
|
73
|
+
}
|
|
74
|
+
} finally {
|
|
75
|
+
polling = false
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
/**
|
|
80
|
+
* Execute a single pending step:
|
|
81
|
+
* 1. Claim the step by calling dispatch-self endpoint
|
|
82
|
+
* 2. Fetch the skill from HQ
|
|
83
|
+
* 3. Run the skill locally
|
|
84
|
+
*
|
|
85
|
+
* @param {object} step - Step info from pending-steps response
|
|
86
|
+
*/
|
|
87
|
+
async function executeStep(step) {
|
|
88
|
+
const {
|
|
89
|
+
step_execution_id,
|
|
90
|
+
execution_id,
|
|
91
|
+
workflow_name,
|
|
92
|
+
step_index,
|
|
93
|
+
skill_version_id,
|
|
94
|
+
assigned_role,
|
|
95
|
+
skill_name,
|
|
96
|
+
revision_feedback,
|
|
97
|
+
} = step
|
|
98
|
+
|
|
99
|
+
console.log(
|
|
100
|
+
`[StepPoller] Executing step ${step_index} of "${workflow_name}" ` +
|
|
101
|
+
`(skill: ${skill_name || skill_version_id}, role: ${assigned_role})`
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
activeStepExecutionId = step_execution_id
|
|
105
|
+
|
|
106
|
+
try {
|
|
107
|
+
// 1. Claim the step — tell HQ we're taking it
|
|
108
|
+
// This sets status to 'running' and prevents other minions from picking it up
|
|
109
|
+
try {
|
|
110
|
+
await api.request('/claim-step', {
|
|
111
|
+
method: 'POST',
|
|
112
|
+
body: JSON.stringify({
|
|
113
|
+
execution_id,
|
|
114
|
+
step_index,
|
|
115
|
+
}),
|
|
116
|
+
})
|
|
117
|
+
} catch (claimErr) {
|
|
118
|
+
// 409 means step is no longer pending (already claimed or completed)
|
|
119
|
+
if (claimErr.statusCode === 409) {
|
|
120
|
+
console.log(`[StepPoller] Step ${step_index} already claimed, skipping`)
|
|
121
|
+
return
|
|
122
|
+
}
|
|
123
|
+
throw claimErr
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
// 2. Fetch the skill from HQ to ensure it's deployed locally
|
|
127
|
+
if (skill_name) {
|
|
128
|
+
try {
|
|
129
|
+
const fetchUrl = `http://localhost:${config.AGENT_PORT || 8080}/api/skills/fetch/${encodeURIComponent(skill_name)}`
|
|
130
|
+
const fetchResp = await fetch(fetchUrl, {
|
|
131
|
+
method: 'POST',
|
|
132
|
+
headers: { 'Authorization': `Bearer ${config.API_TOKEN}` },
|
|
133
|
+
})
|
|
134
|
+
if (!fetchResp.ok) {
|
|
135
|
+
console.error(`[StepPoller] Skill fetch failed: ${await fetchResp.text()}`)
|
|
136
|
+
}
|
|
137
|
+
} catch (fetchErr) {
|
|
138
|
+
console.error(`[StepPoller] Skill fetch error: ${fetchErr.message}`)
|
|
139
|
+
// Continue — skill may already be deployed locally
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
// 3. Run the skill via local API
|
|
144
|
+
const runPayload = {
|
|
145
|
+
skill_name,
|
|
146
|
+
execution_id,
|
|
147
|
+
step_index,
|
|
148
|
+
workflow_name,
|
|
149
|
+
role: assigned_role,
|
|
150
|
+
}
|
|
151
|
+
if (revision_feedback) {
|
|
152
|
+
runPayload.revision_feedback = revision_feedback
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
const runUrl = `http://localhost:${config.AGENT_PORT || 8080}/api/skills/run`
|
|
156
|
+
const runResp = await fetch(runUrl, {
|
|
157
|
+
method: 'POST',
|
|
158
|
+
headers: {
|
|
159
|
+
'Content-Type': 'application/json',
|
|
160
|
+
'Authorization': `Bearer ${config.API_TOKEN}`,
|
|
161
|
+
},
|
|
162
|
+
body: JSON.stringify(runPayload),
|
|
163
|
+
})
|
|
164
|
+
|
|
165
|
+
if (!runResp.ok) {
|
|
166
|
+
const errData = await runResp.json().catch(() => ({}))
|
|
167
|
+
console.error(`[StepPoller] Skill run failed: ${errData.error || runResp.status}`)
|
|
168
|
+
// Report failure to HQ
|
|
169
|
+
try {
|
|
170
|
+
await api.reportStepComplete({
|
|
171
|
+
workflow_execution_id: execution_id,
|
|
172
|
+
step_index,
|
|
173
|
+
status: 'failed',
|
|
174
|
+
output_summary: `Step poller failed to start skill: ${errData.error || 'unknown error'}`,
|
|
175
|
+
})
|
|
176
|
+
} catch (reportErr) {
|
|
177
|
+
console.error(`[StepPoller] Failed to report step failure: ${reportErr.message}`)
|
|
178
|
+
}
|
|
179
|
+
return
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
const runData = await runResp.json()
|
|
183
|
+
console.log(
|
|
184
|
+
`[StepPoller] Skill "${skill_name}" started (session: ${runData.session_name}). ` +
|
|
185
|
+
`Step completion will be reported by the post-execution hook.`
|
|
186
|
+
)
|
|
187
|
+
} catch (err) {
|
|
188
|
+
console.error(`[StepPoller] Failed to execute step ${step_index}: ${err.message}`)
|
|
189
|
+
} finally {
|
|
190
|
+
activeStepExecutionId = null
|
|
191
|
+
}
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
/**
|
|
195
|
+
* Start the polling daemon.
|
|
196
|
+
*/
|
|
197
|
+
function start() {
|
|
198
|
+
if (!isHqConfigured()) {
|
|
199
|
+
console.log('[StepPoller] HQ not configured, step poller disabled')
|
|
200
|
+
return
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
// Initial poll after a short delay (let server fully start)
|
|
204
|
+
setTimeout(() => pollOnce(), 5000)
|
|
205
|
+
|
|
206
|
+
// Periodic polling
|
|
207
|
+
pollTimer = setInterval(() => pollOnce(), POLL_INTERVAL_MS)
|
|
208
|
+
console.log(`[StepPoller] Started (polling every ${POLL_INTERVAL_MS / 1000}s)`)
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
/**
|
|
212
|
+
* Stop the polling daemon.
|
|
213
|
+
*/
|
|
214
|
+
function stop() {
|
|
215
|
+
if (pollTimer) {
|
|
216
|
+
clearInterval(pollTimer)
|
|
217
|
+
pollTimer = null
|
|
218
|
+
console.log('[StepPoller] Stopped')
|
|
219
|
+
}
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
module.exports = { start, stop, pollOnce }
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Strip ANSI escape sequences from a string.
|
|
3
|
+
* Handles CSI sequences, OSC sequences, and other control codes
|
|
4
|
+
* that terminal emulators (node-pty) produce.
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
// Match ANSI escape sequences:
|
|
8
|
+
// - CSI: ESC[ ... (params including <>=:;?) ... (final byte)
|
|
9
|
+
// - OSC: ESC] ... ST (string terminator: ESC\ or BEL, or next ESC)
|
|
10
|
+
// - Other ESC sequences: ESC followed by a character
|
|
11
|
+
// - Standalone control characters (except newline, carriage return, tab)
|
|
12
|
+
const ANSI_REGEX = /(?:\x1B\[[0-9;?<>=:]*[A-Za-z~]|\x1B\][^\x07\x1B]*(?:\x07|\x1B\\|\x1B(?=\[|\]))|\x1B[^[\]()][A-Za-z]?|[\x00-\x08\x0B\x0C\x0E-\x1F\x7F])/g
|
|
13
|
+
|
|
14
|
+
function stripAnsi(str) {
|
|
15
|
+
return str.replace(ANSI_REGEX, '')
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
module.exports = { stripAnsi }
|
|
@@ -0,0 +1,382 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Workflow Orchestrator
|
|
3
|
+
*
|
|
4
|
+
* Deterministic orchestration engine for multi-minion workflow execution.
|
|
5
|
+
* Replaces the previous LLM-based orchestration skill with code-driven
|
|
6
|
+
* step dispatch, status polling, and review gate handling.
|
|
7
|
+
*
|
|
8
|
+
* The only LLM call is for revision routing: when a reviewer requests
|
|
9
|
+
* changes, the orchestrator asks an LLM to decide which pipeline step
|
|
10
|
+
* to roll back to based on the review comment and pipeline structure.
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
const { config } = require('../config')
|
|
14
|
+
const api = require('../api')
|
|
15
|
+
|
|
16
|
+
// Polling configuration
|
|
17
|
+
const POLL_INTERVAL_MS = 30_000 // 30 seconds
|
|
18
|
+
const STEP_TIMEOUT_MS = 30 * 60 * 1000 // 30 minutes per step
|
|
19
|
+
const MAX_REVISIONS_PER_STEP = 3
|
|
20
|
+
|
|
21
|
+
/**
|
|
22
|
+
* Sleep for specified milliseconds
|
|
23
|
+
* @param {number} ms
|
|
24
|
+
*/
|
|
25
|
+
function sleep(ms) {
|
|
26
|
+
return new Promise((resolve) => setTimeout(resolve, ms))
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
/**
|
|
30
|
+
* Run the orchestration loop for a workflow execution.
|
|
31
|
+
* Dispatches steps sequentially, polls for completion, and handles review gates.
|
|
32
|
+
*
|
|
33
|
+
* @param {object} params
|
|
34
|
+
* @param {string} params.executionId - Workflow execution UUID
|
|
35
|
+
* @param {Array<{step_index: number, step_execution_id: string, skill_version_id: string, skill_name: string, assigned_role: string, requires_review: boolean}>} params.steps
|
|
36
|
+
* @param {string} params.hqUrl - HQ server URL
|
|
37
|
+
* @param {string} [params.revisionPolicy] - Optional PM revision policy text
|
|
38
|
+
* @returns {Promise<{success: boolean, error?: string}>}
|
|
39
|
+
*/
|
|
40
|
+
async function orchestrate({ executionId, steps, hqUrl, revisionPolicy }) {
|
|
41
|
+
// Always use the minion's own HQ_URL — it is guaranteed reachable from this
|
|
42
|
+
// network context. The hqUrl passed from the trigger route may be a
|
|
43
|
+
// public URL that is not resolvable from inside a container.
|
|
44
|
+
const effectiveHqUrl = config.HQ_URL || hqUrl
|
|
45
|
+
if (!effectiveHqUrl) {
|
|
46
|
+
console.error('[Orchestrator] No HQ URL available (config.HQ_URL and hqUrl are both empty)')
|
|
47
|
+
return { success: false, error: 'No HQ URL configured' }
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
const apiToken = config.API_TOKEN
|
|
51
|
+
const headers = {
|
|
52
|
+
'Content-Type': 'application/json',
|
|
53
|
+
'Authorization': `Bearer ${apiToken}`,
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
console.log(`[Orchestrator] Starting execution ${executionId} with ${steps.length} steps`)
|
|
57
|
+
|
|
58
|
+
let currentStepIdx = 0
|
|
59
|
+
|
|
60
|
+
while (currentStepIdx < steps.length) {
|
|
61
|
+
const step = steps[currentStepIdx]
|
|
62
|
+
console.log(`[Orchestrator] Dispatching step ${step.step_index}: ${step.skill_name} (role: ${step.assigned_role})`)
|
|
63
|
+
|
|
64
|
+
// 1. Dispatch step
|
|
65
|
+
const dispatchResult = await dispatchStep(effectiveHqUrl, headers, executionId, step.step_index)
|
|
66
|
+
if (!dispatchResult.success) {
|
|
67
|
+
console.error(`[Orchestrator] Failed to dispatch step ${step.step_index}: ${dispatchResult.error}`)
|
|
68
|
+
return { success: false, error: `Dispatch failed at step ${step.step_index}: ${dispatchResult.error}` }
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
console.log(`[Orchestrator] Step ${step.step_index} dispatched to ${dispatchResult.minion_name || 'unknown'}`)
|
|
72
|
+
|
|
73
|
+
// 2. Poll until step completes
|
|
74
|
+
const pollResult = await pollStepCompletion(effectiveHqUrl, headers, executionId, step.step_index)
|
|
75
|
+
if (!pollResult.success) {
|
|
76
|
+
console.error(`[Orchestrator] Step ${step.step_index} failed: ${pollResult.error}`)
|
|
77
|
+
return { success: false, error: `Step ${step.step_index} failed: ${pollResult.error}` }
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
console.log(`[Orchestrator] Step ${step.step_index} completed (status: ${pollResult.status})`)
|
|
81
|
+
|
|
82
|
+
// 3. Handle review gate if required
|
|
83
|
+
if (step.requires_review) {
|
|
84
|
+
const reviewResult = await handleReviewGate(effectiveHqUrl, headers, executionId, step, steps, revisionPolicy)
|
|
85
|
+
|
|
86
|
+
if (reviewResult.action === 'continue') {
|
|
87
|
+
// Approved — proceed to next step
|
|
88
|
+
console.log(`[Orchestrator] Step ${step.step_index} approved, continuing`)
|
|
89
|
+
} else if (reviewResult.action === 'abort') {
|
|
90
|
+
console.log(`[Orchestrator] Step ${step.step_index} rejected, aborting workflow`)
|
|
91
|
+
return { success: false, error: `Workflow aborted: step ${step.step_index} rejected by reviewer` }
|
|
92
|
+
} else if (reviewResult.action === 'retry_from') {
|
|
93
|
+
// Revision requested — jump back to target step
|
|
94
|
+
console.log(`[Orchestrator] Revision requested, restarting from step ${reviewResult.targetStepIndex}`)
|
|
95
|
+
currentStepIdx = reviewResult.targetStepIndex
|
|
96
|
+
continue
|
|
97
|
+
} else if (reviewResult.action === 'error') {
|
|
98
|
+
return { success: false, error: reviewResult.error }
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
currentStepIdx++
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
console.log(`[Orchestrator] Execution ${executionId} completed successfully`)
|
|
106
|
+
return { success: true }
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
/**
|
|
110
|
+
* Dispatch a single step via HQ API.
|
|
111
|
+
*/
|
|
112
|
+
async function dispatchStep(hqUrl, headers, executionId, stepIndex) {
|
|
113
|
+
try {
|
|
114
|
+
const resp = await fetch(`${hqUrl}/api/minion/dispatch-step`, {
|
|
115
|
+
method: 'POST',
|
|
116
|
+
headers,
|
|
117
|
+
body: JSON.stringify({ execution_id: executionId, step_index: stepIndex }),
|
|
118
|
+
})
|
|
119
|
+
|
|
120
|
+
const data = await resp.json()
|
|
121
|
+
if (!resp.ok) {
|
|
122
|
+
return { success: false, error: data.error || `HTTP ${resp.status}` }
|
|
123
|
+
}
|
|
124
|
+
return { success: true, minion_name: data.minion_name }
|
|
125
|
+
} catch (err) {
|
|
126
|
+
return { success: false, error: err.message }
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
/**
|
|
131
|
+
* Poll HQ for step completion. Returns when the step reaches a terminal state.
|
|
132
|
+
*/
|
|
133
|
+
async function pollStepCompletion(hqUrl, headers, executionId, stepIndex) {
|
|
134
|
+
const startTime = Date.now()
|
|
135
|
+
|
|
136
|
+
while (Date.now() - startTime < STEP_TIMEOUT_MS) {
|
|
137
|
+
try {
|
|
138
|
+
const resp = await fetch(`${hqUrl}/api/minion/execution/${executionId}/status`, {
|
|
139
|
+
headers,
|
|
140
|
+
})
|
|
141
|
+
|
|
142
|
+
if (!resp.ok) {
|
|
143
|
+
console.error(`[Orchestrator] Status poll failed: HTTP ${resp.status}`)
|
|
144
|
+
await sleep(POLL_INTERVAL_MS)
|
|
145
|
+
continue
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
const data = await resp.json()
|
|
149
|
+
const stepData = data.steps?.find((s) => s.step_index === stepIndex)
|
|
150
|
+
|
|
151
|
+
if (!stepData) {
|
|
152
|
+
console.error(`[Orchestrator] Step ${stepIndex} not found in status response`)
|
|
153
|
+
await sleep(POLL_INTERVAL_MS)
|
|
154
|
+
continue
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
if (stepData.status === 'completed') {
|
|
158
|
+
return { success: true, status: 'completed' }
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
if (stepData.status === 'failed') {
|
|
162
|
+
return { success: false, error: 'Step execution failed' }
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
// Still running or pending
|
|
166
|
+
await sleep(POLL_INTERVAL_MS)
|
|
167
|
+
} catch (err) {
|
|
168
|
+
console.error(`[Orchestrator] Poll error: ${err.message}`)
|
|
169
|
+
await sleep(POLL_INTERVAL_MS)
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
return { success: false, error: `Step ${stepIndex} timed out after ${STEP_TIMEOUT_MS / 60000} minutes` }
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
/**
|
|
177
|
+
* Handle review gate: poll for review decision and act accordingly.
|
|
178
|
+
* Reviewer can approve, reject, or request revisions.
|
|
179
|
+
*
|
|
180
|
+
* @returns {{ action: 'continue' | 'abort' | 'retry_from' | 'error', targetStepIndex?: number, error?: string }}
|
|
181
|
+
*/
|
|
182
|
+
async function handleReviewGate(hqUrl, headers, executionId, currentStep, allSteps, revisionPolicy) {
|
|
183
|
+
console.log(`[Orchestrator] Waiting for review on step ${currentStep.step_index}...`)
|
|
184
|
+
|
|
185
|
+
// Poll for review decision (no timeout — waits indefinitely for human input)
|
|
186
|
+
// In practice, the orchestrate caller or HQ can cancel the execution externally.
|
|
187
|
+
while (true) {
|
|
188
|
+
try {
|
|
189
|
+
const resp = await fetch(`${hqUrl}/api/minion/execution/${executionId}/status`, {
|
|
190
|
+
headers,
|
|
191
|
+
})
|
|
192
|
+
|
|
193
|
+
if (!resp.ok) {
|
|
194
|
+
await sleep(POLL_INTERVAL_MS)
|
|
195
|
+
continue
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
const data = await resp.json()
|
|
199
|
+
const stepData = data.steps?.find((s) => s.step_index === currentStep.step_index)
|
|
200
|
+
|
|
201
|
+
if (!stepData || !stepData.review_status) {
|
|
202
|
+
await sleep(POLL_INTERVAL_MS)
|
|
203
|
+
continue
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
if (stepData.review_status === 'approved') {
|
|
207
|
+
return { action: 'continue' }
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
if (stepData.review_status === 'rejected') {
|
|
211
|
+
return { action: 'abort' }
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
if (stepData.review_status === 'revision_requested') {
|
|
215
|
+
const reviewComment = stepData.review_comment || ''
|
|
216
|
+
console.log(`[Orchestrator] Revision requested: "${reviewComment}"`)
|
|
217
|
+
|
|
218
|
+
// Decide which step to roll back to
|
|
219
|
+
const targetStepIndex = await decideRevisionTarget(
|
|
220
|
+
allSteps,
|
|
221
|
+
reviewComment,
|
|
222
|
+
currentStep.step_index,
|
|
223
|
+
revisionPolicy
|
|
224
|
+
)
|
|
225
|
+
|
|
226
|
+
// Call revision-reset API
|
|
227
|
+
const resetResult = await resetForRevision(
|
|
228
|
+
hqUrl, headers, executionId,
|
|
229
|
+
targetStepIndex, currentStep.step_index, reviewComment
|
|
230
|
+
)
|
|
231
|
+
|
|
232
|
+
if (!resetResult.success) {
|
|
233
|
+
return { action: 'error', error: `Revision reset failed: ${resetResult.error}` }
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
return { action: 'retry_from', targetStepIndex }
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
await sleep(POLL_INTERVAL_MS)
|
|
240
|
+
} catch (err) {
|
|
241
|
+
console.error(`[Orchestrator] Review poll error: ${err.message}`)
|
|
242
|
+
await sleep(POLL_INTERVAL_MS)
|
|
243
|
+
}
|
|
244
|
+
}
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
/**
|
|
248
|
+
* Decide which step to roll back to when a reviewer requests revisions.
|
|
249
|
+
*
|
|
250
|
+
* Uses LLM to analyze the review comment and pipeline structure.
|
|
251
|
+
* Falls back to current step index if LLM call fails or returns invalid data.
|
|
252
|
+
*
|
|
253
|
+
* @param {Array} steps - All pipeline steps
|
|
254
|
+
* @param {string} reviewComment - Reviewer's feedback
|
|
255
|
+
* @param {number} currentStepIndex - The step that was reviewed
|
|
256
|
+
* @param {string} [revisionPolicy] - Optional PM-specific revision policy
|
|
257
|
+
* @returns {Promise<number>} Target step index to roll back to
|
|
258
|
+
*/
|
|
259
|
+
async function decideRevisionTarget(steps, reviewComment, currentStepIndex, revisionPolicy) {
|
|
260
|
+
// If only one step or current is first, no choice needed
|
|
261
|
+
if (currentStepIndex === 0 || steps.length <= 1) {
|
|
262
|
+
return currentStepIndex
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
// Build pipeline description for the LLM
|
|
266
|
+
const pipelineDesc = steps
|
|
267
|
+
.filter((s) => s.step_index <= currentStepIndex)
|
|
268
|
+
.map((s) => `Step ${s.step_index}: ${s.skill_name} (role: ${s.assigned_role})`)
|
|
269
|
+
.join('\n')
|
|
270
|
+
|
|
271
|
+
const systemPrompt = `You are analyzing a workflow pipeline to decide which step to roll back to after a reviewer requested changes.
|
|
272
|
+
|
|
273
|
+
Given the pipeline steps and the reviewer's feedback, determine which step is the root cause that needs to be re-executed.
|
|
274
|
+
- If the feedback targets the current step's output only, return the current step index.
|
|
275
|
+
- If the feedback suggests an earlier step produced incorrect input, return that earlier step's index.
|
|
276
|
+
- Always return the EARLIEST step that needs re-execution.
|
|
277
|
+
|
|
278
|
+
Respond with ONLY a JSON object: {"target_step_index": <number>}
|
|
279
|
+
Do not include any other text.`
|
|
280
|
+
|
|
281
|
+
const userPrompt = `## Pipeline (steps 0 through ${currentStepIndex})
|
|
282
|
+
${pipelineDesc}
|
|
283
|
+
|
|
284
|
+
## Reviewer Feedback
|
|
285
|
+
${reviewComment}
|
|
286
|
+
|
|
287
|
+
## Current Step (reviewed)
|
|
288
|
+
Step ${currentStepIndex}
|
|
289
|
+
${revisionPolicy ? `\n## PM Revision Policy\n${revisionPolicy}` : ''}`
|
|
290
|
+
|
|
291
|
+
try {
|
|
292
|
+
const result = await callLlmForJson(systemPrompt, userPrompt)
|
|
293
|
+
|
|
294
|
+
if (
|
|
295
|
+
result &&
|
|
296
|
+
typeof result.target_step_index === 'number' &&
|
|
297
|
+
Number.isInteger(result.target_step_index) &&
|
|
298
|
+
result.target_step_index >= 0 &&
|
|
299
|
+
result.target_step_index <= currentStepIndex
|
|
300
|
+
) {
|
|
301
|
+
console.log(`[Orchestrator] LLM decided revision target: step ${result.target_step_index}`)
|
|
302
|
+
return result.target_step_index
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
console.warn(`[Orchestrator] LLM returned invalid target, falling back to current step ${currentStepIndex}`)
|
|
306
|
+
return currentStepIndex
|
|
307
|
+
} catch (err) {
|
|
308
|
+
console.error(`[Orchestrator] LLM call failed, falling back to current step: ${err.message}`)
|
|
309
|
+
return currentStepIndex
|
|
310
|
+
}
|
|
311
|
+
}
|
|
312
|
+
|
|
313
|
+
/**
|
|
314
|
+
* Call an LLM API to get a JSON response.
|
|
315
|
+
* Uses the Anthropic Messages API with ANTHROPIC_API_KEY from environment.
|
|
316
|
+
*
|
|
317
|
+
* @param {string} systemPrompt
|
|
318
|
+
* @param {string} userPrompt
|
|
319
|
+
* @returns {Promise<object>} Parsed JSON response
|
|
320
|
+
*/
|
|
321
|
+
async function callLlmForJson(systemPrompt, userPrompt) {
|
|
322
|
+
const apiKey = process.env.ANTHROPIC_API_KEY
|
|
323
|
+
if (!apiKey) {
|
|
324
|
+
throw new Error('ANTHROPIC_API_KEY not set — cannot make LLM call for revision routing')
|
|
325
|
+
}
|
|
326
|
+
|
|
327
|
+
const resp = await fetch('https://api.anthropic.com/v1/messages', {
|
|
328
|
+
method: 'POST',
|
|
329
|
+
headers: {
|
|
330
|
+
'Content-Type': 'application/json',
|
|
331
|
+
'x-api-key': apiKey,
|
|
332
|
+
'anthropic-version': '2023-06-01',
|
|
333
|
+
},
|
|
334
|
+
body: JSON.stringify({
|
|
335
|
+
model: 'claude-haiku-4-5-20251001',
|
|
336
|
+
max_tokens: 256,
|
|
337
|
+
system: systemPrompt,
|
|
338
|
+
messages: [{ role: 'user', content: userPrompt }],
|
|
339
|
+
}),
|
|
340
|
+
})
|
|
341
|
+
|
|
342
|
+
if (!resp.ok) {
|
|
343
|
+
const text = await resp.text()
|
|
344
|
+
throw new Error(`Anthropic API error: ${resp.status} ${text}`)
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
const data = await resp.json()
|
|
348
|
+
const content = data.content?.[0]?.text
|
|
349
|
+
if (!content) {
|
|
350
|
+
throw new Error('Empty response from Anthropic API')
|
|
351
|
+
}
|
|
352
|
+
|
|
353
|
+
return JSON.parse(content)
|
|
354
|
+
}
|
|
355
|
+
|
|
356
|
+
/**
|
|
357
|
+
* Call the revision-reset API on HQ to reset steps for re-execution.
|
|
358
|
+
*/
|
|
359
|
+
async function resetForRevision(hqUrl, headers, executionId, targetStepIndex, revisionStepIndex, revisionFeedback) {
|
|
360
|
+
try {
|
|
361
|
+
const resp = await fetch(`${hqUrl}/api/minion/revision-reset`, {
|
|
362
|
+
method: 'POST',
|
|
363
|
+
headers,
|
|
364
|
+
body: JSON.stringify({
|
|
365
|
+
execution_id: executionId,
|
|
366
|
+
target_step_index: targetStepIndex,
|
|
367
|
+
revision_step_index: revisionStepIndex,
|
|
368
|
+
revision_feedback: revisionFeedback,
|
|
369
|
+
}),
|
|
370
|
+
})
|
|
371
|
+
|
|
372
|
+
const data = await resp.json()
|
|
373
|
+
if (!resp.ok) {
|
|
374
|
+
return { success: false, error: data.error || `HTTP ${resp.status}` }
|
|
375
|
+
}
|
|
376
|
+
return { success: true }
|
|
377
|
+
} catch (err) {
|
|
378
|
+
return { success: false, error: err.message }
|
|
379
|
+
}
|
|
380
|
+
}
|
|
381
|
+
|
|
382
|
+
module.exports = { orchestrate }
|