@geekbeer/minion 2.33.4 → 2.42.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +0 -3
- package/README.md +0 -1
- package/core/api.js +13 -0
- package/core/config.js +46 -1
- package/core/lib/log-manager.js +4 -1
- package/core/lib/platform.js +8 -13
- package/core/lib/revision-watcher.js +252 -0
- package/core/lib/step-poller.js +222 -0
- package/core/lib/strip-ansi.js +18 -0
- package/core/lib/workflow-orchestrator.js +382 -0
- package/core/routes/diagnose.js +296 -0
- package/core/routes/health.js +27 -0
- package/core/routes/routines.js +15 -10
- package/core/routes/skills.js +4 -1
- package/core/routes/workflows.js +49 -2
- package/core/stores/chat-store.js +8 -1
- package/core/stores/routine-store.js +2 -2
- package/linux/lib/process-manager.js +14 -0
- package/linux/minion-cli.sh +57 -16
- package/linux/routes/chat.js +182 -20
- package/linux/routes/config.js +8 -12
- package/linux/routine-runner.js +5 -4
- package/linux/server.js +53 -1
- package/linux/workflow-runner.js +25 -61
- package/package.json +1 -1
- package/roles/pm.md +11 -12
- package/win/lib/process-manager.js +15 -0
- package/win/minion-cli.ps1 +122 -27
- package/win/routes/chat.js +178 -14
- package/win/routes/config.js +6 -2
- package/win/routine-runner.js +4 -2
- package/win/server.js +53 -0
- package/win/workflow-runner.js +31 -43
- package/skills/execution-report/SKILL.md +0 -106
|
@@ -0,0 +1,382 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Workflow Orchestrator
|
|
3
|
+
*
|
|
4
|
+
* Deterministic orchestration engine for multi-minion workflow execution.
|
|
5
|
+
* Replaces the previous LLM-based orchestration skill with code-driven
|
|
6
|
+
* step dispatch, status polling, and review gate handling.
|
|
7
|
+
*
|
|
8
|
+
* The only LLM call is for revision routing: when a reviewer requests
|
|
9
|
+
* changes, the orchestrator asks an LLM to decide which pipeline step
|
|
10
|
+
* to roll back to based on the review comment and pipeline structure.
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
const { config } = require('../config')
|
|
14
|
+
const api = require('../api')
|
|
15
|
+
|
|
16
|
+
// Polling configuration
|
|
17
|
+
const POLL_INTERVAL_MS = 30_000 // 30 seconds
|
|
18
|
+
const STEP_TIMEOUT_MS = 30 * 60 * 1000 // 30 minutes per step
|
|
19
|
+
const MAX_REVISIONS_PER_STEP = 3
|
|
20
|
+
|
|
21
|
+
/**
|
|
22
|
+
* Sleep for specified milliseconds
|
|
23
|
+
* @param {number} ms
|
|
24
|
+
*/
|
|
25
|
+
function sleep(ms) {
|
|
26
|
+
return new Promise((resolve) => setTimeout(resolve, ms))
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
/**
|
|
30
|
+
* Run the orchestration loop for a workflow execution.
|
|
31
|
+
* Dispatches steps sequentially, polls for completion, and handles review gates.
|
|
32
|
+
*
|
|
33
|
+
* @param {object} params
|
|
34
|
+
* @param {string} params.executionId - Workflow execution UUID
|
|
35
|
+
* @param {Array<{step_index: number, step_execution_id: string, skill_version_id: string, skill_name: string, assigned_role: string, requires_review: boolean}>} params.steps
|
|
36
|
+
* @param {string} params.hqUrl - HQ server URL
|
|
37
|
+
* @param {string} [params.revisionPolicy] - Optional PM revision policy text
|
|
38
|
+
* @returns {Promise<{success: boolean, error?: string}>}
|
|
39
|
+
*/
|
|
40
|
+
async function orchestrate({ executionId, steps, hqUrl, revisionPolicy }) {
|
|
41
|
+
// Always use the minion's own HQ_URL — it is guaranteed reachable from this
|
|
42
|
+
// network context. The hqUrl passed from the trigger route may be a
|
|
43
|
+
// public URL that is not resolvable from inside a container.
|
|
44
|
+
const effectiveHqUrl = config.HQ_URL || hqUrl
|
|
45
|
+
if (!effectiveHqUrl) {
|
|
46
|
+
console.error('[Orchestrator] No HQ URL available (config.HQ_URL and hqUrl are both empty)')
|
|
47
|
+
return { success: false, error: 'No HQ URL configured' }
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
const apiToken = config.API_TOKEN
|
|
51
|
+
const headers = {
|
|
52
|
+
'Content-Type': 'application/json',
|
|
53
|
+
'Authorization': `Bearer ${apiToken}`,
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
console.log(`[Orchestrator] Starting execution ${executionId} with ${steps.length} steps`)
|
|
57
|
+
|
|
58
|
+
let currentStepIdx = 0
|
|
59
|
+
|
|
60
|
+
while (currentStepIdx < steps.length) {
|
|
61
|
+
const step = steps[currentStepIdx]
|
|
62
|
+
console.log(`[Orchestrator] Dispatching step ${step.step_index}: ${step.skill_name} (role: ${step.assigned_role})`)
|
|
63
|
+
|
|
64
|
+
// 1. Dispatch step
|
|
65
|
+
const dispatchResult = await dispatchStep(effectiveHqUrl, headers, executionId, step.step_index)
|
|
66
|
+
if (!dispatchResult.success) {
|
|
67
|
+
console.error(`[Orchestrator] Failed to dispatch step ${step.step_index}: ${dispatchResult.error}`)
|
|
68
|
+
return { success: false, error: `Dispatch failed at step ${step.step_index}: ${dispatchResult.error}` }
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
console.log(`[Orchestrator] Step ${step.step_index} dispatched to ${dispatchResult.minion_name || 'unknown'}`)
|
|
72
|
+
|
|
73
|
+
// 2. Poll until step completes
|
|
74
|
+
const pollResult = await pollStepCompletion(effectiveHqUrl, headers, executionId, step.step_index)
|
|
75
|
+
if (!pollResult.success) {
|
|
76
|
+
console.error(`[Orchestrator] Step ${step.step_index} failed: ${pollResult.error}`)
|
|
77
|
+
return { success: false, error: `Step ${step.step_index} failed: ${pollResult.error}` }
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
console.log(`[Orchestrator] Step ${step.step_index} completed (status: ${pollResult.status})`)
|
|
81
|
+
|
|
82
|
+
// 3. Handle review gate if required
|
|
83
|
+
if (step.requires_review) {
|
|
84
|
+
const reviewResult = await handleReviewGate(effectiveHqUrl, headers, executionId, step, steps, revisionPolicy)
|
|
85
|
+
|
|
86
|
+
if (reviewResult.action === 'continue') {
|
|
87
|
+
// Approved — proceed to next step
|
|
88
|
+
console.log(`[Orchestrator] Step ${step.step_index} approved, continuing`)
|
|
89
|
+
} else if (reviewResult.action === 'abort') {
|
|
90
|
+
console.log(`[Orchestrator] Step ${step.step_index} rejected, aborting workflow`)
|
|
91
|
+
return { success: false, error: `Workflow aborted: step ${step.step_index} rejected by reviewer` }
|
|
92
|
+
} else if (reviewResult.action === 'retry_from') {
|
|
93
|
+
// Revision requested — jump back to target step
|
|
94
|
+
console.log(`[Orchestrator] Revision requested, restarting from step ${reviewResult.targetStepIndex}`)
|
|
95
|
+
currentStepIdx = reviewResult.targetStepIndex
|
|
96
|
+
continue
|
|
97
|
+
} else if (reviewResult.action === 'error') {
|
|
98
|
+
return { success: false, error: reviewResult.error }
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
currentStepIdx++
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
console.log(`[Orchestrator] Execution ${executionId} completed successfully`)
|
|
106
|
+
return { success: true }
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
/**
|
|
110
|
+
* Dispatch a single step via HQ API.
|
|
111
|
+
*/
|
|
112
|
+
async function dispatchStep(hqUrl, headers, executionId, stepIndex) {
|
|
113
|
+
try {
|
|
114
|
+
const resp = await fetch(`${hqUrl}/api/minion/dispatch-step`, {
|
|
115
|
+
method: 'POST',
|
|
116
|
+
headers,
|
|
117
|
+
body: JSON.stringify({ execution_id: executionId, step_index: stepIndex }),
|
|
118
|
+
})
|
|
119
|
+
|
|
120
|
+
const data = await resp.json()
|
|
121
|
+
if (!resp.ok) {
|
|
122
|
+
return { success: false, error: data.error || `HTTP ${resp.status}` }
|
|
123
|
+
}
|
|
124
|
+
return { success: true, minion_name: data.minion_name }
|
|
125
|
+
} catch (err) {
|
|
126
|
+
return { success: false, error: err.message }
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
/**
|
|
131
|
+
* Poll HQ for step completion. Returns when the step reaches a terminal state.
|
|
132
|
+
*/
|
|
133
|
+
async function pollStepCompletion(hqUrl, headers, executionId, stepIndex) {
|
|
134
|
+
const startTime = Date.now()
|
|
135
|
+
|
|
136
|
+
while (Date.now() - startTime < STEP_TIMEOUT_MS) {
|
|
137
|
+
try {
|
|
138
|
+
const resp = await fetch(`${hqUrl}/api/minion/execution/${executionId}/status`, {
|
|
139
|
+
headers,
|
|
140
|
+
})
|
|
141
|
+
|
|
142
|
+
if (!resp.ok) {
|
|
143
|
+
console.error(`[Orchestrator] Status poll failed: HTTP ${resp.status}`)
|
|
144
|
+
await sleep(POLL_INTERVAL_MS)
|
|
145
|
+
continue
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
const data = await resp.json()
|
|
149
|
+
const stepData = data.steps?.find((s) => s.step_index === stepIndex)
|
|
150
|
+
|
|
151
|
+
if (!stepData) {
|
|
152
|
+
console.error(`[Orchestrator] Step ${stepIndex} not found in status response`)
|
|
153
|
+
await sleep(POLL_INTERVAL_MS)
|
|
154
|
+
continue
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
if (stepData.status === 'completed') {
|
|
158
|
+
return { success: true, status: 'completed' }
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
if (stepData.status === 'failed') {
|
|
162
|
+
return { success: false, error: 'Step execution failed' }
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
// Still running or pending
|
|
166
|
+
await sleep(POLL_INTERVAL_MS)
|
|
167
|
+
} catch (err) {
|
|
168
|
+
console.error(`[Orchestrator] Poll error: ${err.message}`)
|
|
169
|
+
await sleep(POLL_INTERVAL_MS)
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
return { success: false, error: `Step ${stepIndex} timed out after ${STEP_TIMEOUT_MS / 60000} minutes` }
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
/**
|
|
177
|
+
* Handle review gate: poll for review decision and act accordingly.
|
|
178
|
+
* Reviewer can approve, reject, or request revisions.
|
|
179
|
+
*
|
|
180
|
+
* @returns {{ action: 'continue' | 'abort' | 'retry_from' | 'error', targetStepIndex?: number, error?: string }}
|
|
181
|
+
*/
|
|
182
|
+
async function handleReviewGate(hqUrl, headers, executionId, currentStep, allSteps, revisionPolicy) {
|
|
183
|
+
console.log(`[Orchestrator] Waiting for review on step ${currentStep.step_index}...`)
|
|
184
|
+
|
|
185
|
+
// Poll for review decision (no timeout — waits indefinitely for human input)
|
|
186
|
+
// In practice, the orchestrate caller or HQ can cancel the execution externally.
|
|
187
|
+
while (true) {
|
|
188
|
+
try {
|
|
189
|
+
const resp = await fetch(`${hqUrl}/api/minion/execution/${executionId}/status`, {
|
|
190
|
+
headers,
|
|
191
|
+
})
|
|
192
|
+
|
|
193
|
+
if (!resp.ok) {
|
|
194
|
+
await sleep(POLL_INTERVAL_MS)
|
|
195
|
+
continue
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
const data = await resp.json()
|
|
199
|
+
const stepData = data.steps?.find((s) => s.step_index === currentStep.step_index)
|
|
200
|
+
|
|
201
|
+
if (!stepData || !stepData.review_status) {
|
|
202
|
+
await sleep(POLL_INTERVAL_MS)
|
|
203
|
+
continue
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
if (stepData.review_status === 'approved') {
|
|
207
|
+
return { action: 'continue' }
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
if (stepData.review_status === 'rejected') {
|
|
211
|
+
return { action: 'abort' }
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
if (stepData.review_status === 'revision_requested') {
|
|
215
|
+
const reviewComment = stepData.review_comment || ''
|
|
216
|
+
console.log(`[Orchestrator] Revision requested: "${reviewComment}"`)
|
|
217
|
+
|
|
218
|
+
// Decide which step to roll back to
|
|
219
|
+
const targetStepIndex = await decideRevisionTarget(
|
|
220
|
+
allSteps,
|
|
221
|
+
reviewComment,
|
|
222
|
+
currentStep.step_index,
|
|
223
|
+
revisionPolicy
|
|
224
|
+
)
|
|
225
|
+
|
|
226
|
+
// Call revision-reset API
|
|
227
|
+
const resetResult = await resetForRevision(
|
|
228
|
+
hqUrl, headers, executionId,
|
|
229
|
+
targetStepIndex, currentStep.step_index, reviewComment
|
|
230
|
+
)
|
|
231
|
+
|
|
232
|
+
if (!resetResult.success) {
|
|
233
|
+
return { action: 'error', error: `Revision reset failed: ${resetResult.error}` }
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
return { action: 'retry_from', targetStepIndex }
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
await sleep(POLL_INTERVAL_MS)
|
|
240
|
+
} catch (err) {
|
|
241
|
+
console.error(`[Orchestrator] Review poll error: ${err.message}`)
|
|
242
|
+
await sleep(POLL_INTERVAL_MS)
|
|
243
|
+
}
|
|
244
|
+
}
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
/**
|
|
248
|
+
* Decide which step to roll back to when a reviewer requests revisions.
|
|
249
|
+
*
|
|
250
|
+
* Uses LLM to analyze the review comment and pipeline structure.
|
|
251
|
+
* Falls back to current step index if LLM call fails or returns invalid data.
|
|
252
|
+
*
|
|
253
|
+
* @param {Array} steps - All pipeline steps
|
|
254
|
+
* @param {string} reviewComment - Reviewer's feedback
|
|
255
|
+
* @param {number} currentStepIndex - The step that was reviewed
|
|
256
|
+
* @param {string} [revisionPolicy] - Optional PM-specific revision policy
|
|
257
|
+
* @returns {Promise<number>} Target step index to roll back to
|
|
258
|
+
*/
|
|
259
|
+
async function decideRevisionTarget(steps, reviewComment, currentStepIndex, revisionPolicy) {
|
|
260
|
+
// If only one step or current is first, no choice needed
|
|
261
|
+
if (currentStepIndex === 0 || steps.length <= 1) {
|
|
262
|
+
return currentStepIndex
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
// Build pipeline description for the LLM
|
|
266
|
+
const pipelineDesc = steps
|
|
267
|
+
.filter((s) => s.step_index <= currentStepIndex)
|
|
268
|
+
.map((s) => `Step ${s.step_index}: ${s.skill_name} (role: ${s.assigned_role})`)
|
|
269
|
+
.join('\n')
|
|
270
|
+
|
|
271
|
+
const systemPrompt = `You are analyzing a workflow pipeline to decide which step to roll back to after a reviewer requested changes.
|
|
272
|
+
|
|
273
|
+
Given the pipeline steps and the reviewer's feedback, determine which step is the root cause that needs to be re-executed.
|
|
274
|
+
- If the feedback targets the current step's output only, return the current step index.
|
|
275
|
+
- If the feedback suggests an earlier step produced incorrect input, return that earlier step's index.
|
|
276
|
+
- Always return the EARLIEST step that needs re-execution.
|
|
277
|
+
|
|
278
|
+
Respond with ONLY a JSON object: {"target_step_index": <number>}
|
|
279
|
+
Do not include any other text.`
|
|
280
|
+
|
|
281
|
+
const userPrompt = `## Pipeline (steps 0 through ${currentStepIndex})
|
|
282
|
+
${pipelineDesc}
|
|
283
|
+
|
|
284
|
+
## Reviewer Feedback
|
|
285
|
+
${reviewComment}
|
|
286
|
+
|
|
287
|
+
## Current Step (reviewed)
|
|
288
|
+
Step ${currentStepIndex}
|
|
289
|
+
${revisionPolicy ? `\n## PM Revision Policy\n${revisionPolicy}` : ''}`
|
|
290
|
+
|
|
291
|
+
try {
|
|
292
|
+
const result = await callLlmForJson(systemPrompt, userPrompt)
|
|
293
|
+
|
|
294
|
+
if (
|
|
295
|
+
result &&
|
|
296
|
+
typeof result.target_step_index === 'number' &&
|
|
297
|
+
Number.isInteger(result.target_step_index) &&
|
|
298
|
+
result.target_step_index >= 0 &&
|
|
299
|
+
result.target_step_index <= currentStepIndex
|
|
300
|
+
) {
|
|
301
|
+
console.log(`[Orchestrator] LLM decided revision target: step ${result.target_step_index}`)
|
|
302
|
+
return result.target_step_index
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
console.warn(`[Orchestrator] LLM returned invalid target, falling back to current step ${currentStepIndex}`)
|
|
306
|
+
return currentStepIndex
|
|
307
|
+
} catch (err) {
|
|
308
|
+
console.error(`[Orchestrator] LLM call failed, falling back to current step: ${err.message}`)
|
|
309
|
+
return currentStepIndex
|
|
310
|
+
}
|
|
311
|
+
}
|
|
312
|
+
|
|
313
|
+
/**
|
|
314
|
+
* Call an LLM API to get a JSON response.
|
|
315
|
+
* Uses the Anthropic Messages API with ANTHROPIC_API_KEY from environment.
|
|
316
|
+
*
|
|
317
|
+
* @param {string} systemPrompt
|
|
318
|
+
* @param {string} userPrompt
|
|
319
|
+
* @returns {Promise<object>} Parsed JSON response
|
|
320
|
+
*/
|
|
321
|
+
async function callLlmForJson(systemPrompt, userPrompt) {
|
|
322
|
+
const apiKey = process.env.ANTHROPIC_API_KEY
|
|
323
|
+
if (!apiKey) {
|
|
324
|
+
throw new Error('ANTHROPIC_API_KEY not set — cannot make LLM call for revision routing')
|
|
325
|
+
}
|
|
326
|
+
|
|
327
|
+
const resp = await fetch('https://api.anthropic.com/v1/messages', {
|
|
328
|
+
method: 'POST',
|
|
329
|
+
headers: {
|
|
330
|
+
'Content-Type': 'application/json',
|
|
331
|
+
'x-api-key': apiKey,
|
|
332
|
+
'anthropic-version': '2023-06-01',
|
|
333
|
+
},
|
|
334
|
+
body: JSON.stringify({
|
|
335
|
+
model: 'claude-haiku-4-5-20251001',
|
|
336
|
+
max_tokens: 256,
|
|
337
|
+
system: systemPrompt,
|
|
338
|
+
messages: [{ role: 'user', content: userPrompt }],
|
|
339
|
+
}),
|
|
340
|
+
})
|
|
341
|
+
|
|
342
|
+
if (!resp.ok) {
|
|
343
|
+
const text = await resp.text()
|
|
344
|
+
throw new Error(`Anthropic API error: ${resp.status} ${text}`)
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
const data = await resp.json()
|
|
348
|
+
const content = data.content?.[0]?.text
|
|
349
|
+
if (!content) {
|
|
350
|
+
throw new Error('Empty response from Anthropic API')
|
|
351
|
+
}
|
|
352
|
+
|
|
353
|
+
return JSON.parse(content)
|
|
354
|
+
}
|
|
355
|
+
|
|
356
|
+
/**
|
|
357
|
+
* Call the revision-reset API on HQ to reset steps for re-execution.
|
|
358
|
+
*/
|
|
359
|
+
async function resetForRevision(hqUrl, headers, executionId, targetStepIndex, revisionStepIndex, revisionFeedback) {
|
|
360
|
+
try {
|
|
361
|
+
const resp = await fetch(`${hqUrl}/api/minion/revision-reset`, {
|
|
362
|
+
method: 'POST',
|
|
363
|
+
headers,
|
|
364
|
+
body: JSON.stringify({
|
|
365
|
+
execution_id: executionId,
|
|
366
|
+
target_step_index: targetStepIndex,
|
|
367
|
+
revision_step_index: revisionStepIndex,
|
|
368
|
+
revision_feedback: revisionFeedback,
|
|
369
|
+
}),
|
|
370
|
+
})
|
|
371
|
+
|
|
372
|
+
const data = await resp.json()
|
|
373
|
+
if (!resp.ok) {
|
|
374
|
+
return { success: false, error: data.error || `HTTP ${resp.status}` }
|
|
375
|
+
}
|
|
376
|
+
return { success: true }
|
|
377
|
+
} catch (err) {
|
|
378
|
+
return { success: false, error: err.message }
|
|
379
|
+
}
|
|
380
|
+
}
|
|
381
|
+
|
|
382
|
+
module.exports = { orchestrate }
|
|
@@ -0,0 +1,296 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Diagnostic endpoint for checking all essential services
|
|
3
|
+
*
|
|
4
|
+
* Endpoints:
|
|
5
|
+
* - GET /api/diagnose - Run full diagnostic check
|
|
6
|
+
*
|
|
7
|
+
* Checks:
|
|
8
|
+
* - Minion Server (Fastify) status
|
|
9
|
+
* - HQ connectivity (heartbeat reachability)
|
|
10
|
+
* - Cloudflare Tunnel process
|
|
11
|
+
* - VNC / websockify (display server)
|
|
12
|
+
* - Terminal proxy (ttyd / terminal-server)
|
|
13
|
+
* - LLM CLI availability (Claude, Gemini, Codex)
|
|
14
|
+
* - Environment variable configuration
|
|
15
|
+
*/
|
|
16
|
+
|
|
17
|
+
const { execSync } = require('child_process')
|
|
18
|
+
const { config, isHqConfigured } = require('../config')
|
|
19
|
+
const { version } = require('../../package.json')
|
|
20
|
+
const { getLlmServices, isLlmCommandConfigured } = require('../lib/llm-checker')
|
|
21
|
+
const { IS_WINDOWS } = require('../lib/platform')
|
|
22
|
+
const { getStatus } = require('./health')
|
|
23
|
+
|
|
24
|
+
/**
|
|
25
|
+
* Check if a process is running by name.
|
|
26
|
+
* @param {string} name - Process name to search for
|
|
27
|
+
* @returns {boolean}
|
|
28
|
+
*/
|
|
29
|
+
function isProcessRunning(name) {
|
|
30
|
+
try {
|
|
31
|
+
if (IS_WINDOWS) {
|
|
32
|
+
const out = execSync(`tasklist /FI "IMAGENAME eq ${name}" /NH`, {
|
|
33
|
+
encoding: 'utf-8',
|
|
34
|
+
timeout: 5000,
|
|
35
|
+
stdio: 'pipe',
|
|
36
|
+
})
|
|
37
|
+
return out.toLowerCase().includes(name.toLowerCase())
|
|
38
|
+
} else {
|
|
39
|
+
execSync(`pgrep -f "${name}"`, {
|
|
40
|
+
encoding: 'utf-8',
|
|
41
|
+
timeout: 5000,
|
|
42
|
+
stdio: 'pipe',
|
|
43
|
+
})
|
|
44
|
+
return true
|
|
45
|
+
}
|
|
46
|
+
} catch {
|
|
47
|
+
return false
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
/**
|
|
52
|
+
* Check if a local TCP port is listening.
|
|
53
|
+
* @param {number} port
|
|
54
|
+
* @returns {boolean}
|
|
55
|
+
*/
|
|
56
|
+
function isPortListening(port) {
|
|
57
|
+
try {
|
|
58
|
+
if (IS_WINDOWS) {
|
|
59
|
+
const out = execSync(`netstat -an | findstr ":${port} "`, {
|
|
60
|
+
encoding: 'utf-8',
|
|
61
|
+
timeout: 5000,
|
|
62
|
+
stdio: 'pipe',
|
|
63
|
+
})
|
|
64
|
+
return out.includes('LISTENING')
|
|
65
|
+
} else {
|
|
66
|
+
const out = execSync(`ss -tlnp 2>/dev/null | grep ":${port} " || netstat -tlnp 2>/dev/null | grep ":${port} "`, {
|
|
67
|
+
encoding: 'utf-8',
|
|
68
|
+
timeout: 5000,
|
|
69
|
+
stdio: 'pipe',
|
|
70
|
+
})
|
|
71
|
+
return out.trim().length > 0
|
|
72
|
+
}
|
|
73
|
+
} catch {
|
|
74
|
+
return false
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
/**
|
|
79
|
+
* Check HQ connectivity by sending a test heartbeat.
|
|
80
|
+
* @returns {Promise<{ ok: boolean, latency_ms?: number, error?: string }>}
|
|
81
|
+
*/
|
|
82
|
+
async function checkHqConnectivity() {
|
|
83
|
+
if (!isHqConfigured()) {
|
|
84
|
+
return { ok: false, error: 'HQ not configured (standalone mode)' }
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
const url = `${config.HQ_URL}/api/minion/heartbeat`
|
|
88
|
+
const { currentStatus, currentTask } = getStatus()
|
|
89
|
+
const start = Date.now()
|
|
90
|
+
|
|
91
|
+
try {
|
|
92
|
+
const response = await fetch(url, {
|
|
93
|
+
method: 'POST',
|
|
94
|
+
headers: {
|
|
95
|
+
'Content-Type': 'application/json',
|
|
96
|
+
'Authorization': `Bearer ${config.API_TOKEN}`,
|
|
97
|
+
},
|
|
98
|
+
body: JSON.stringify({ status: currentStatus, current_task: currentTask, version }),
|
|
99
|
+
signal: AbortSignal.timeout(10000),
|
|
100
|
+
})
|
|
101
|
+
const latency = Date.now() - start
|
|
102
|
+
|
|
103
|
+
if (!response.ok) {
|
|
104
|
+
const data = await response.json().catch(() => ({}))
|
|
105
|
+
return { ok: false, latency_ms: latency, error: `HTTP ${response.status}: ${data.error || response.statusText}` }
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
return { ok: true, latency_ms: latency }
|
|
109
|
+
} catch (err) {
|
|
110
|
+
const latency = Date.now() - start
|
|
111
|
+
return { ok: false, latency_ms: latency, error: err.message }
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
/**
|
|
116
|
+
* Check Cloudflare Tunnel status.
|
|
117
|
+
* @returns {{ running: boolean, details?: string }}
|
|
118
|
+
*/
|
|
119
|
+
function checkTunnel() {
|
|
120
|
+
if (IS_WINDOWS) {
|
|
121
|
+
const running = isProcessRunning('cloudflared.exe')
|
|
122
|
+
return { running, details: running ? 'cloudflared.exe process found' : 'cloudflared.exe not running' }
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
// Linux: check for cloudflared process
|
|
126
|
+
const running = isProcessRunning('cloudflared')
|
|
127
|
+
if (!running) {
|
|
128
|
+
return { running: false, details: 'cloudflared not running' }
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
// Check if it's a tunnel connector (not just any cloudflared)
|
|
132
|
+
try {
|
|
133
|
+
const out = execSync('pgrep -af "cloudflared.*tunnel"', {
|
|
134
|
+
encoding: 'utf-8',
|
|
135
|
+
timeout: 5000,
|
|
136
|
+
stdio: 'pipe',
|
|
137
|
+
}).trim()
|
|
138
|
+
return { running: true, details: out.split('\n')[0] || 'cloudflared tunnel running' }
|
|
139
|
+
} catch {
|
|
140
|
+
return { running: true, details: 'cloudflared running (tunnel mode unconfirmed)' }
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
/**
|
|
145
|
+
* Check VNC / display server status.
|
|
146
|
+
* @returns {{ running: boolean, port?: number, details?: string }}
|
|
147
|
+
*/
|
|
148
|
+
function checkVnc() {
|
|
149
|
+
if (IS_WINDOWS) {
|
|
150
|
+
// TightVNC on Windows
|
|
151
|
+
const running = isProcessRunning('tvnserver.exe')
|
|
152
|
+
const websockify = isPortListening(6080)
|
|
153
|
+
return {
|
|
154
|
+
running: running && websockify,
|
|
155
|
+
details: [
|
|
156
|
+
`TightVNC: ${running ? 'running' : 'not running'}`,
|
|
157
|
+
`websockify (:6080): ${websockify ? 'listening' : 'not listening'}`,
|
|
158
|
+
].join(', '),
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
// Linux: Xvfb + x11vnc + websockify (noVNC)
|
|
163
|
+
const xvfb = isProcessRunning('Xvfb')
|
|
164
|
+
const vnc = isProcessRunning('x11vnc')
|
|
165
|
+
const websockify = isPortListening(6080)
|
|
166
|
+
return {
|
|
167
|
+
running: xvfb && websockify,
|
|
168
|
+
details: [
|
|
169
|
+
`Xvfb: ${xvfb ? 'running' : 'not running'}`,
|
|
170
|
+
`x11vnc: ${vnc ? 'running' : 'not running'}`,
|
|
171
|
+
`websockify (:6080): ${websockify ? 'listening' : 'not listening'}`,
|
|
172
|
+
].join(', '),
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
/**
|
|
177
|
+
* Check terminal proxy status.
|
|
178
|
+
* @returns {{ running: boolean, port?: number, details?: string }}
|
|
179
|
+
*/
|
|
180
|
+
function checkTerminal() {
|
|
181
|
+
if (IS_WINDOWS) {
|
|
182
|
+
// Windows terminal server runs on port 7681
|
|
183
|
+
const listening = isPortListening(7681)
|
|
184
|
+
return {
|
|
185
|
+
running: listening,
|
|
186
|
+
details: `terminal-server (:7681): ${listening ? 'listening' : 'not listening'}`,
|
|
187
|
+
}
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
// Linux: ttyd on port 7681
|
|
191
|
+
const ttyd = isProcessRunning('ttyd')
|
|
192
|
+
const listening = isPortListening(7681)
|
|
193
|
+
return {
|
|
194
|
+
running: ttyd || listening,
|
|
195
|
+
details: [
|
|
196
|
+
`ttyd: ${ttyd ? 'running' : 'not running'}`,
|
|
197
|
+
`port 7681: ${listening ? 'listening' : 'not listening'}`,
|
|
198
|
+
].join(', '),
|
|
199
|
+
}
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
/**
|
|
203
|
+
* Check LLM service availability.
|
|
204
|
+
* @returns {{ services: object[], llm_command: { configured: boolean, value: string } }}
|
|
205
|
+
*/
|
|
206
|
+
function checkLlm() {
|
|
207
|
+
return {
|
|
208
|
+
services: getLlmServices(),
|
|
209
|
+
llm_command: {
|
|
210
|
+
configured: isLlmCommandConfigured(),
|
|
211
|
+
value: config.LLM_COMMAND || '(not set)',
|
|
212
|
+
},
|
|
213
|
+
}
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
/**
|
|
217
|
+
* Check environment variable configuration.
|
|
218
|
+
* @returns {{ configured: string[], missing: string[] }}
|
|
219
|
+
*/
|
|
220
|
+
function checkEnv() {
|
|
221
|
+
const required = ['HQ_URL', 'API_TOKEN', 'MINION_ID']
|
|
222
|
+
const optional = ['AGENT_PORT', 'LLM_COMMAND', 'HEARTBEAT_INTERVAL', 'MINION_USER']
|
|
223
|
+
|
|
224
|
+
const configured = []
|
|
225
|
+
const missing = []
|
|
226
|
+
|
|
227
|
+
for (const key of required) {
|
|
228
|
+
if (config[key] || process.env[key]) {
|
|
229
|
+
configured.push(key)
|
|
230
|
+
} else {
|
|
231
|
+
missing.push(key)
|
|
232
|
+
}
|
|
233
|
+
}
|
|
234
|
+
for (const key of optional) {
|
|
235
|
+
if (config[key] || process.env[key]) {
|
|
236
|
+
configured.push(key)
|
|
237
|
+
}
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
return { configured, missing }
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
/**
|
|
244
|
+
* Register diagnose routes as Fastify plugin
|
|
245
|
+
* @param {import('fastify').FastifyInstance} fastify
|
|
246
|
+
*/
|
|
247
|
+
async function diagnoseRoutes(fastify) {
|
|
248
|
+
fastify.get('/api/diagnose', async () => {
|
|
249
|
+
const { currentStatus, currentTask } = getStatus()
|
|
250
|
+
|
|
251
|
+
// Run HQ check (async) in parallel with sync checks
|
|
252
|
+
const [hq, tunnel, vnc, terminal, llm, env] = await Promise.all([
|
|
253
|
+
checkHqConnectivity(),
|
|
254
|
+
Promise.resolve(checkTunnel()),
|
|
255
|
+
Promise.resolve(checkVnc()),
|
|
256
|
+
Promise.resolve(checkTerminal()),
|
|
257
|
+
Promise.resolve(checkLlm()),
|
|
258
|
+
Promise.resolve(checkEnv()),
|
|
259
|
+
])
|
|
260
|
+
|
|
261
|
+
// Build summary: count ok/warn/fail
|
|
262
|
+
const checks = {
|
|
263
|
+
agent: { ok: true, details: `status=${currentStatus}, uptime=${Math.floor(process.uptime())}s` },
|
|
264
|
+
hq: { ok: hq.ok, details: hq.error || `latency=${hq.latency_ms}ms` },
|
|
265
|
+
tunnel: { ok: tunnel.running, details: tunnel.details },
|
|
266
|
+
vnc: { ok: vnc.running, details: vnc.details },
|
|
267
|
+
terminal: { ok: terminal.running, details: terminal.details },
|
|
268
|
+
llm: {
|
|
269
|
+
ok: llm.services.some(s => s.authenticated) && llm.llm_command.configured,
|
|
270
|
+
details: llm.services.map(s => `${s.name}:${s.authenticated ? 'ok' : 'ng'}`).join(', ')
|
|
271
|
+
+ ` | llm_command:${llm.llm_command.configured ? 'ok' : 'ng'}`,
|
|
272
|
+
},
|
|
273
|
+
env: {
|
|
274
|
+
ok: env.missing.length === 0,
|
|
275
|
+
details: env.missing.length === 0
|
|
276
|
+
? `all configured (${env.configured.join(', ')})`
|
|
277
|
+
: `missing: ${env.missing.join(', ')}`,
|
|
278
|
+
},
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
const okCount = Object.values(checks).filter(c => c.ok).length
|
|
282
|
+
const totalCount = Object.keys(checks).length
|
|
283
|
+
const allOk = okCount === totalCount
|
|
284
|
+
|
|
285
|
+
return {
|
|
286
|
+
summary: allOk ? 'ALL OK' : `${okCount}/${totalCount} checks passed`,
|
|
287
|
+
version,
|
|
288
|
+
platform: IS_WINDOWS ? 'windows' : 'linux',
|
|
289
|
+
timestamp: new Date().toISOString(),
|
|
290
|
+
current_task: currentTask,
|
|
291
|
+
checks,
|
|
292
|
+
}
|
|
293
|
+
})
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
module.exports = { diagnoseRoutes }
|