@ryanfw/prompt-orchestration-pipeline 0.11.0 → 0.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. package/package.json +2 -1
  2. package/src/components/DAGGrid.jsx +157 -47
  3. package/src/components/ui/RestartJobModal.jsx +26 -6
  4. package/src/components/ui/StopJobModal.jsx +183 -0
  5. package/src/core/config.js +7 -3
  6. package/src/core/lifecycle-policy.js +62 -0
  7. package/src/core/pipeline-runner.js +312 -217
  8. package/src/core/status-writer.js +84 -0
  9. package/src/pages/Code.jsx +8 -1
  10. package/src/pages/PipelineDetail.jsx +85 -3
  11. package/src/pages/PromptPipelineDashboard.jsx +10 -11
  12. package/src/ui/client/adapters/job-adapter.js +60 -0
  13. package/src/ui/client/api.js +233 -8
  14. package/src/ui/client/hooks/useJobList.js +14 -1
  15. package/src/ui/dist/app.js +262 -0
  16. package/src/ui/dist/assets/{index-DeDzq-Kk.js → index-B320avRx.js} +4854 -2104
  17. package/src/ui/dist/assets/index-B320avRx.js.map +1 -0
  18. package/src/ui/dist/assets/style-BYCoLBnK.css +62 -0
  19. package/src/ui/dist/favicon.svg +12 -0
  20. package/src/ui/dist/index.html +2 -2
  21. package/src/ui/endpoints/file-endpoints.js +330 -0
  22. package/src/ui/endpoints/job-control-endpoints.js +1001 -0
  23. package/src/ui/endpoints/job-endpoints.js +62 -0
  24. package/src/ui/endpoints/sse-endpoints.js +223 -0
  25. package/src/ui/endpoints/state-endpoint.js +85 -0
  26. package/src/ui/endpoints/upload-endpoints.js +406 -0
  27. package/src/ui/express-app.js +182 -0
  28. package/src/ui/server.js +38 -1880
  29. package/src/ui/sse-broadcast.js +93 -0
  30. package/src/ui/utils/http-utils.js +139 -0
  31. package/src/ui/utils/mime-types.js +196 -0
  32. package/src/ui/vite.config.js +22 -0
  33. package/src/utils/jobs.js +39 -0
  34. package/src/ui/dist/assets/style-aBtD_Yrs.css +0 -62
@@ -0,0 +1,1001 @@
1
+ import fs from "fs";
2
+ import path from "path";
3
+ import { spawn } from "node:child_process";
4
+ import { fileURLToPath } from "url";
5
+ import {
6
+ resetJobToCleanSlate,
7
+ resetJobFromTask,
8
+ resetSingleTask,
9
+ initializeJobArtifacts,
10
+ writeJobStatus,
11
+ readJobStatus,
12
+ } from "../../core/status-writer.js";
13
+ import { getPipelineConfig } from "../../core/config.js";
14
+ import {
15
+ getPendingSeedPath,
16
+ resolvePipelinePaths,
17
+ getJobDirectoryPath,
18
+ getJobMetadataPath,
19
+ getJobPipelinePath,
20
+ } from "../../config/paths.js";
21
+ import { readRawBody } from "../utils/http-utils.js";
22
+
23
+ // Get __dirname equivalent in ES modules
24
+ const __filename = fileURLToPath(import.meta.url);
25
+ const __dirname = path.dirname(__filename);
26
+
27
+ // In-memory restart guard to prevent duplicate concurrent restarts per job
28
+ const restartingJobs = new Set();
29
+
30
+ // In-memory start guard to prevent duplicate concurrent starts per job
31
+ const startingJobs = new Set();
32
+
33
+ // In-memory stop guard to prevent duplicate concurrent stops per job
34
+ const stoppingJobs = new Set();
35
+
36
+ // Helper functions for restart guard
37
+ function isRestartInProgress(jobId) {
38
+ return restartingJobs.has(jobId);
39
+ }
40
+
41
+ function beginRestart(jobId) {
42
+ restartingJobs.add(jobId);
43
+ }
44
+
45
+ function endRestart(jobId) {
46
+ restartingJobs.delete(jobId);
47
+ }
48
+
49
+ // Helper functions for start guard
50
+ function isStartInProgress(jobId) {
51
+ return startingJobs.has(jobId);
52
+ }
53
+
54
+ function beginStart(jobId) {
55
+ startingJobs.add(jobId);
56
+ }
57
+
58
+ function endStart(jobId) {
59
+ startingJobs.delete(jobId);
60
+ }
61
+
62
+ // Helper functions for stop guard
63
+ function isStopInProgress(jobId) {
64
+ return stoppingJobs.has(jobId);
65
+ }
66
+
67
+ function beginStop(jobId) {
68
+ stoppingJobs.add(jobId);
69
+ }
70
+
71
+ function endStop(jobId) {
72
+ stoppingJobs.delete(jobId);
73
+ }
74
+
75
+ /**
76
+ * Validate that all upstream tasks are DONE
77
+ * @param {Object} params - Parameters object
78
+ * @param {Array} params.jobPipelineTasks - Pipeline tasks array from pipeline.json
79
+ * @param {string} params.targetTaskId - Target task ID to validate
80
+ * @param {Object} params.snapshotTasks - Tasks from tasks-status.json snapshot
81
+ * @returns {Object} Validation result { ok: true } or { ok: false, code: "dependencies_not_satisfied", missing: [names] }
82
+ */
83
+ function validateUpstreamDone({
84
+ jobPipelineTasks,
85
+ targetTaskId,
86
+ snapshotTasks,
87
+ }) {
88
+ // Helper function to extract task name from string or object
89
+ const getTaskName = (t) => (typeof t === "string" ? t : t.name);
90
+
91
+ // Derive ordered task names from pipeline config
92
+ const orderedTaskNames = (jobPipelineTasks || []).map(getTaskName);
93
+
94
+ // Find target task index
95
+ const targetIndex = orderedTaskNames.indexOf(targetTaskId);
96
+ if (targetIndex === -1) {
97
+ return { ok: false, code: "task_not_found" };
98
+ }
99
+
100
+ // Get upstream tasks (all tasks before target)
101
+ const upstreamTasks = orderedTaskNames.slice(0, targetIndex);
102
+
103
+ // Check if all upstream tasks are DONE
104
+ const missing = [];
105
+ for (const taskName of upstreamTasks) {
106
+ const taskState = snapshotTasks[taskName]?.state;
107
+ if (taskState !== "done") {
108
+ missing.push(taskName);
109
+ }
110
+ }
111
+
112
+ if (missing.length > 0) {
113
+ return {
114
+ ok: false,
115
+ code: "dependencies_not_satisfied",
116
+ missing,
117
+ };
118
+ }
119
+
120
+ return { ok: true };
121
+ }
122
+
123
+ /**
124
+ * Resolve job lifecycle directory deterministically
125
+ * @param {string} dataDir - Base data directory
126
+ * @param {string} jobId - Job identifier
127
+ * @returns {Promise<string|null>} One of "current", "complete", "rejected", or null if job not found
128
+ */
129
+ async function resolveJobLifecycle(dataDir, jobId) {
130
+ const currentJobDir = getJobDirectoryPath(dataDir, jobId, "current");
131
+ const completeJobDir = getJobDirectoryPath(dataDir, jobId, "complete");
132
+ const rejectedJobDir = getJobDirectoryPath(dataDir, jobId, "rejected");
133
+
134
+ // Check in order of preference: current > complete > rejected
135
+ const currentExists = await exists(currentJobDir);
136
+ const completeExists = await exists(completeJobDir);
137
+ const rejectedExists = await exists(rejectedJobDir);
138
+
139
+ if (currentExists) {
140
+ return "current";
141
+ }
142
+
143
+ if (completeExists) {
144
+ return "complete";
145
+ }
146
+
147
+ if (rejectedExists) {
148
+ return "rejected";
149
+ }
150
+
151
+ // Job not found in any lifecycle
152
+ return null;
153
+ }
154
+
155
+ const exists = async (p) =>
156
+ fs.promises
157
+ .access(p)
158
+ .then(() => true)
159
+ .catch(() => false);
160
+
161
+ /**
162
+ * Handle POST /api/jobs/:jobId/rescan
163
+ */
164
+ export async function handleJobRescan(req, res, jobId, dataDir, sendJson) {
165
+ try {
166
+ // Validate jobId
167
+ if (!jobId || typeof jobId !== "string" || jobId.trim() === "") {
168
+ sendJson(res, 400, {
169
+ ok: false,
170
+ error: "bad_request",
171
+ message: "jobId is required",
172
+ });
173
+ return;
174
+ }
175
+
176
+ // Resolve job lifecycle
177
+ const lifecycle = await resolveJobLifecycle(dataDir, jobId);
178
+ if (!lifecycle) {
179
+ sendJson(res, 404, {
180
+ ok: false,
181
+ code: "job_not_found",
182
+ message: "Job not found",
183
+ });
184
+ return;
185
+ }
186
+
187
+ // Determine job directory
188
+ const jobDir = getJobDirectoryPath(dataDir, jobId, lifecycle);
189
+
190
+ // Read job metadata to get pipeline slug
191
+ const jobMetaPath = path.join(jobDir, "job.json");
192
+ let jobMeta;
193
+ try {
194
+ const content = await fs.promises.readFile(jobMetaPath, "utf8");
195
+ jobMeta = JSON.parse(content);
196
+ } catch (error) {
197
+ console.error(`Error reading job metadata for ${jobId}:`, error);
198
+ sendJson(res, 500, {
199
+ ok: false,
200
+ code: "internal_error",
201
+ message: "Failed to read job metadata",
202
+ });
203
+ return;
204
+ }
205
+
206
+ const pipelineSlug = jobMeta.pipeline;
207
+ if (!pipelineSlug) {
208
+ sendJson(res, 500, {
209
+ ok: false,
210
+ code: "invalid_job_metadata",
211
+ message: "Job metadata missing pipeline slug",
212
+ });
213
+ return;
214
+ }
215
+
216
+ // Get authoritative source pipeline config
217
+ let sourcePipelinePath;
218
+ try {
219
+ const config = await getPipelineConfig(pipelineSlug);
220
+ sourcePipelinePath = config.pipelineJsonPath;
221
+ } catch (error) {
222
+ console.error(
223
+ `Error getting pipeline config for ${pipelineSlug}:`,
224
+ error
225
+ );
226
+ sendJson(res, 404, {
227
+ ok: false,
228
+ code: "pipeline_not_found",
229
+ message: `Pipeline configuration not found for slug: ${pipelineSlug}`,
230
+ });
231
+ return;
232
+ }
233
+
234
+ let sourcePipeline;
235
+ try {
236
+ const content = await fs.promises.readFile(sourcePipelinePath, "utf8");
237
+ sourcePipeline = JSON.parse(content);
238
+ } catch (error) {
239
+ console.error(
240
+ `Error reading source pipeline config for ${pipelineSlug}:`,
241
+ error
242
+ );
243
+ sendJson(res, 404, {
244
+ ok: false,
245
+ code: "pipeline_config_not_found",
246
+ message: `Pipeline configuration not found for slug: ${pipelineSlug}`,
247
+ });
248
+ return;
249
+ }
250
+
251
+ // Read job's local pipeline config
252
+ const jobPipelinePath = path.join(jobDir, "pipeline.json");
253
+ let jobPipeline;
254
+ try {
255
+ const content = await fs.promises.readFile(jobPipelinePath, "utf8");
256
+ jobPipeline = JSON.parse(content);
257
+ } catch (error) {
258
+ console.error(`Error reading job pipeline config for ${jobId}:`, error);
259
+ sendJson(res, 500, {
260
+ ok: false,
261
+ code: "internal_error",
262
+ message: "Failed to read job pipeline configuration",
263
+ });
264
+ return;
265
+ }
266
+
267
+ // Helper function to extract task name from string or object
268
+ const getTaskName = (t) => (typeof t === "string" ? t : t.name);
269
+
270
+ // Calculate added and removed tasks
271
+ const existingTaskNames = new Set(
272
+ (jobPipeline.tasks || []).map(getTaskName)
273
+ );
274
+ const sourceTaskNames = new Set(
275
+ (sourcePipeline.tasks || []).map(getTaskName)
276
+ );
277
+
278
+ const added = (sourcePipeline.tasks || []).filter(
279
+ (t) => !existingTaskNames.has(getTaskName(t))
280
+ );
281
+ const removed = (jobPipeline.tasks || []).filter(
282
+ (t) => !sourceTaskNames.has(getTaskName(t))
283
+ );
284
+
285
+ if (added.length === 0 && removed.length === 0) {
286
+ sendJson(res, 200, {
287
+ ok: true,
288
+ added: [],
289
+ removed: [],
290
+ });
291
+ return;
292
+ }
293
+
294
+ // Update job's pipeline.json with full synchronization
295
+ jobPipeline.tasks = JSON.parse(JSON.stringify(sourcePipeline.tasks || []));
296
+ await fs.promises.writeFile(
297
+ jobPipelinePath,
298
+ JSON.stringify(jobPipeline, null, 2)
299
+ );
300
+
301
+ // Create directories for all tasks in synchronized pipeline
302
+ const addedTaskNames = [];
303
+ for (const task of jobPipeline.tasks || []) {
304
+ const taskName = getTaskName(task);
305
+ const taskDir = path.join(jobDir, "tasks", taskName);
306
+ await fs.promises.mkdir(taskDir, { recursive: true });
307
+
308
+ // Track which tasks were newly added for response
309
+ if (added.some((t) => getTaskName(t) === taskName)) {
310
+ addedTaskNames.push(taskName);
311
+ }
312
+ }
313
+
314
+ // Update tasks-status.json with reconstruction logic
315
+ await writeJobStatus(jobDir, (snapshot) => {
316
+ const oldTasks = snapshot.tasks || {};
317
+ const newTasksStatus = {};
318
+
319
+ // Iterate through source pipeline tasks in order
320
+ for (const task of sourcePipeline.tasks || []) {
321
+ const taskName = getTaskName(task);
322
+
323
+ if (oldTasks[taskName]) {
324
+ // Preserve existing state for tasks that remain
325
+ newTasksStatus[taskName] = oldTasks[taskName];
326
+ } else {
327
+ // Initialize new state for added tasks
328
+ newTasksStatus[taskName] = {
329
+ state: "pending",
330
+ currentStage: null,
331
+ attempts: 0,
332
+ refinementAttempts: 0,
333
+ files: {
334
+ artifacts: [],
335
+ logs: [],
336
+ tmp: [],
337
+ },
338
+ };
339
+ }
340
+ }
341
+
342
+ snapshot.tasks = newTasksStatus;
343
+ return snapshot;
344
+ });
345
+
346
+ sendJson(res, 200, {
347
+ ok: true,
348
+ added: addedTaskNames,
349
+ removed: removed.map(getTaskName),
350
+ });
351
+ } catch (error) {
352
+ console.error(`Error handling POST /api/jobs/${jobId}/rescan:`, error);
353
+ sendJson(res, 500, {
354
+ ok: false,
355
+ code: "internal_error",
356
+ message: "Internal server error",
357
+ });
358
+ }
359
+ }
360
+
361
+ /**
362
+ * Handle POST /api/jobs/:jobId/restart
363
+ */
364
+ export async function handleJobRestart(req, res, jobId, dataDir, sendJson) {
365
+ try {
366
+ // Validate jobId
367
+ if (!jobId || typeof jobId !== "string" || jobId.trim() === "") {
368
+ sendJson(res, 400, {
369
+ ok: false,
370
+ error: "bad_request",
371
+ message: "jobId is required",
372
+ });
373
+ return;
374
+ }
375
+
376
+ // Resolve job lifecycle
377
+ const lifecycle = await resolveJobLifecycle(dataDir, jobId);
378
+ if (!lifecycle) {
379
+ sendJson(res, 404, {
380
+ ok: false,
381
+ code: "job_not_found",
382
+ message: "Job not found",
383
+ });
384
+ return;
385
+ }
386
+
387
+ // Move job to current directory if it's not already there
388
+ let jobDir = getJobDirectoryPath(dataDir, jobId, lifecycle);
389
+
390
+ if (lifecycle !== "current") {
391
+ const sourcePath = getJobDirectoryPath(dataDir, jobId, lifecycle);
392
+ const targetPath = getJobDirectoryPath(dataDir, jobId, "current");
393
+
394
+ // Atomically move job to current directory
395
+ await fs.promises.rename(sourcePath, targetPath);
396
+ jobDir = targetPath;
397
+ }
398
+
399
+ // Check if job is already running
400
+ const statusPath = path.join(jobDir, "tasks-status.json");
401
+
402
+ let snapshot;
403
+ try {
404
+ const content = await fs.promises.readFile(statusPath, "utf8");
405
+ snapshot = JSON.parse(content);
406
+ } catch (error) {
407
+ if (error.code === "ENOENT") {
408
+ sendJson(res, 404, {
409
+ ok: false,
410
+ code: "job_not_found",
411
+ message: "Job status file not found",
412
+ });
413
+ return;
414
+ }
415
+ throw error;
416
+ }
417
+
418
+ // Guard against running jobs
419
+ if (snapshot.state === "running") {
420
+ sendJson(res, 409, {
421
+ ok: false,
422
+ code: "job_running",
423
+ message: "Job is currently running",
424
+ });
425
+ return;
426
+ }
427
+
428
+ // Guard against concurrent restarts
429
+ if (isRestartInProgress(jobId)) {
430
+ sendJson(res, 409, {
431
+ ok: false,
432
+ code: "job_running",
433
+ message: "Job restart is already in progress",
434
+ });
435
+ return;
436
+ }
437
+
438
+ // Parse optional fromTask from request body for targeted restart
439
+ let body = {};
440
+ try {
441
+ const rawBody = await readRawBody(req);
442
+ if (rawBody && rawBody.length > 0) {
443
+ const bodyString = rawBody.toString("utf8");
444
+ body = JSON.parse(bodyString);
445
+ }
446
+ } catch (error) {
447
+ sendJson(res, 400, {
448
+ ok: false,
449
+ error: "bad_request",
450
+ message: "Invalid JSON in request body",
451
+ });
452
+ return;
453
+ }
454
+
455
+ const { fromTask, singleTask } = body;
456
+
457
+ // Begin restart guard
458
+ beginRestart(jobId);
459
+
460
+ try {
461
+ // Reset job: clean-slate, partial from a specific task, or single task
462
+ if (fromTask && singleTask === true) {
463
+ await resetSingleTask(jobDir, fromTask, { clearTokenUsage: true });
464
+ } else if (fromTask) {
465
+ await resetJobFromTask(jobDir, fromTask, { clearTokenUsage: true });
466
+ } else {
467
+ await resetJobToCleanSlate(jobDir, { clearTokenUsage: true });
468
+ }
469
+ } finally {
470
+ // Always end restart guard
471
+ endRestart(jobId);
472
+ }
473
+
474
+ // Spawn detached pipeline-runner process
475
+ const runnerPath = path.join(__dirname, "../../core/pipeline-runner.js");
476
+ const base = process.env.PO_ROOT || dataDir;
477
+ const env = {
478
+ ...process.env,
479
+ PO_ROOT: base,
480
+ PO_DATA_DIR: path.join(base, "pipeline-data"),
481
+ PO_PENDING_DIR: path.join(base, "pipeline-data", "pending"),
482
+ PO_CURRENT_DIR: path.join(base, "pipeline-data", "current"),
483
+ PO_COMPLETE_DIR: path.join(base, "pipeline-data", "complete"),
484
+ ...(fromTask && { PO_START_FROM_TASK: fromTask }),
485
+ ...(singleTask && { PO_RUN_SINGLE_TASK: "true" }),
486
+ };
487
+
488
+ const child = spawn(process.execPath, [runnerPath, jobId], {
489
+ env,
490
+ stdio: "ignore",
491
+ detached: true,
492
+ });
493
+
494
+ // Unref() child process so it runs in the background
495
+ child.unref();
496
+
497
+ // Send success response
498
+ const mode =
499
+ fromTask && singleTask === true
500
+ ? "single-task"
501
+ : fromTask
502
+ ? "partial"
503
+ : "clean-slate";
504
+ sendJson(res, 202, {
505
+ ok: true,
506
+ jobId,
507
+ mode,
508
+ spawned: true,
509
+ });
510
+ } catch (error) {
511
+ console.error(`Error handling POST /api/jobs/${jobId}/restart:`, error);
512
+
513
+ // Clean up restart guard on error
514
+ if (isRestartInProgress(jobId)) {
515
+ endRestart(jobId);
516
+ }
517
+
518
+ if (error.code === "ENOENT") {
519
+ sendJson(res, 404, {
520
+ ok: false,
521
+ code: "job_not_found",
522
+ message: "Job directory not found",
523
+ });
524
+ } else if (error.code === "spawn failed") {
525
+ sendJson(res, 500, {
526
+ ok: false,
527
+ code: "spawn_failed",
528
+ message: error.message || "Failed to spawn pipeline runner",
529
+ });
530
+ } else if (error.httpStatus === 409) {
531
+ // Handle lifecycle policy errors from pipeline-runner
532
+ sendJson(res, 409, {
533
+ ok: false,
534
+ code: error.error || "unsupported_lifecycle",
535
+ message: error.message || "Operation not allowed by lifecycle policy",
536
+ ...(error.reason && { reason: error.reason }),
537
+ });
538
+ } else {
539
+ sendJson(res, 500, {
540
+ ok: false,
541
+ code: "internal_error",
542
+ message: "Internal server error",
543
+ });
544
+ }
545
+ }
546
+ }
547
+
548
+ /**
549
+ * Handle POST /api/jobs/:jobId/stop
550
+ */
551
+ export async function handleJobStop(req, res, jobId, dataDir, sendJson) {
552
+ try {
553
+ // Validate jobId
554
+ if (!jobId || typeof jobId !== "string" || jobId.trim() === "") {
555
+ sendJson(res, 400, {
556
+ ok: false,
557
+ code: "bad_request",
558
+ message: "jobId is required",
559
+ });
560
+ return;
561
+ }
562
+
563
+ // Resolve job lifecycle
564
+ const lifecycle = await resolveJobLifecycle(dataDir, jobId);
565
+ if (!lifecycle) {
566
+ sendJson(res, 404, {
567
+ ok: false,
568
+ code: "job_not_found",
569
+ message: "Job not found",
570
+ });
571
+ return;
572
+ }
573
+
574
+ // Concurrency: if isStopInProgress(jobId) return 409
575
+ if (isStopInProgress(jobId)) {
576
+ sendJson(res, 409, {
577
+ ok: false,
578
+ code: "job_running",
579
+ message: "Job stop is already in progress",
580
+ });
581
+ return;
582
+ }
583
+
584
+ // beginStop(jobId) before doing work; ensure endStop(jobId) in finally
585
+ beginStop(jobId);
586
+
587
+ try {
588
+ // Determine job directory; if not in current, rename into current (mirror restart)
589
+ let jobDir = getJobDirectoryPath(dataDir, jobId, lifecycle);
590
+
591
+ if (lifecycle !== "current") {
592
+ const sourcePath = getJobDirectoryPath(dataDir, jobId, lifecycle);
593
+ const targetPath = getJobDirectoryPath(dataDir, jobId, "current");
594
+
595
+ // Atomically move job to current directory
596
+ await fs.promises.rename(sourcePath, targetPath);
597
+ jobDir = targetPath;
598
+ }
599
+
600
+ let pidFound = false;
601
+ let usedSignal = null;
602
+ let resetTask = null;
603
+
604
+ // Read PID from path.join(jobDir, "runner.pid")
605
+ const pidPath = path.join(jobDir, "runner.pid");
606
+ const pidExists = await exists(pidPath);
607
+
608
+ if (pidExists) {
609
+ try {
610
+ const pidContent = await fs.promises.readFile(pidPath, "utf8");
611
+ const pid = parseInt(pidContent.trim(), 10);
612
+
613
+ if (isNaN(pid)) {
614
+ // Treat as no runner (remove file)
615
+ await fs.promises.unlink(pidPath).catch(() => {}); // Ignore ENOENT
616
+ } else {
617
+ pidFound = true;
618
+
619
+ try {
620
+ // Try process.kill(pid, "SIGTERM")
621
+ process.kill(pid, "SIGTERM");
622
+ usedSignal = "SIGTERM";
623
+
624
+ // Wait 1500ms
625
+ await new Promise((resolve) => setTimeout(resolve, 1500));
626
+
627
+ // If process still exists: try process.kill(pid, 0) to check
628
+ try {
629
+ process.kill(pid, 0); // Check if process exists
630
+ // If we get here, process still exists, try SIGKILL
631
+ process.kill(pid, "SIGKILL");
632
+ usedSignal = "SIGKILL";
633
+ } catch (checkError) {
634
+ // ESRCH means process is gone (SIGTERM worked or process ended naturally)
635
+ if (checkError.code !== "ESRCH") {
636
+ throw checkError;
637
+ }
638
+ // Keep usedSignal as "SIGTERM"
639
+ }
640
+ } catch (killError) {
641
+ if (killError.code === "ESRCH") {
642
+ // Process was already dead, no signal was sent
643
+ usedSignal = null;
644
+ } else {
645
+ // Non-ESRCH errors → 500 spawn_failed/internal with message
646
+ throw killError;
647
+ }
648
+ }
649
+ }
650
+ } catch (error) {
651
+ // Remove runner.pid regardless after attempts (unlink ignoring ENOENT)
652
+ await fs.promises.unlink(pidPath).catch(() => {});
653
+ throw error;
654
+ }
655
+
656
+ // Remove runner.pid regardless after attempts (unlink ignoring ENOENT)
657
+ await fs.promises.unlink(pidPath).catch(() => {});
658
+ }
659
+
660
+ // Status reset:
661
+ // Read tasks-status.json via readJobStatus
662
+ const snapshot = await readJobStatus(jobDir);
663
+ if (!snapshot) {
664
+ sendJson(res, 500, {
665
+ ok: false,
666
+ code: "internal_error",
667
+ message: "Failed to read job status",
668
+ });
669
+ return;
670
+ }
671
+
672
+ // Determine running taskId:
673
+ let runningTaskId = null;
674
+ if (
675
+ snapshot.current &&
676
+ typeof snapshot.current === "string" &&
677
+ snapshot.tasks[snapshot.current]?.state === "running"
678
+ ) {
679
+ runningTaskId = snapshot.current;
680
+ } else {
681
+ // Else find first key in snapshot.tasks with state === "running"
682
+ for (const taskId of Object.keys(snapshot.tasks || {})) {
683
+ if (snapshot.tasks[taskId].state === "running") {
684
+ runningTaskId = taskId;
685
+ break;
686
+ }
687
+ }
688
+ }
689
+
690
+ // If running taskId found: await resetSingleTask(jobDir, taskId, { clearTokenUsage: true })
691
+ if (runningTaskId) {
692
+ resetTask = runningTaskId;
693
+ await resetSingleTask(jobDir, runningTaskId, { clearTokenUsage: true });
694
+ }
695
+
696
+ // Always normalize root fields afterward:
697
+ await writeJobStatus(jobDir, (s) => {
698
+ s.current = null;
699
+ s.currentStage = null;
700
+ return s;
701
+ });
702
+
703
+ // Response: sendJson 200 with { ok: true, jobId, stopped: Boolean(pidFound), resetTask: taskId || null, signal: usedSignal || null }
704
+ sendJson(res, 200, {
705
+ ok: true,
706
+ jobId,
707
+ stopped: pidFound,
708
+ resetTask: resetTask,
709
+ signal: usedSignal,
710
+ });
711
+ } finally {
712
+ // Always endStop(jobId)
713
+ endStop(jobId);
714
+ }
715
+ } catch (error) {
716
+ console.error(`Error handling POST /api/jobs/${jobId}/stop:`, error);
717
+
718
+ // Clean up stop guard on error
719
+ endStop(jobId);
720
+
721
+ if (error.code === "ENOENT") {
722
+ sendJson(res, 404, {
723
+ ok: false,
724
+ code: "job_not_found",
725
+ message: "Job directory not found",
726
+ });
727
+ } else if (error.code === "spawn_failed") {
728
+ sendJson(res, 500, {
729
+ ok: false,
730
+ code: "spawn_failed",
731
+ message: error.message || "Failed to stop pipeline runner",
732
+ });
733
+ } else {
734
+ sendJson(res, 500, {
735
+ ok: false,
736
+ code: "internal_error",
737
+ message: "Internal server error",
738
+ });
739
+ }
740
+ }
741
+ }
742
+
743
+ /**
744
+ * Handle POST /api/jobs/:jobId/tasks/:taskId/start
745
+ */
746
+ export async function handleTaskStart(
747
+ req,
748
+ res,
749
+ jobId,
750
+ taskId,
751
+ dataDir,
752
+ sendJson
753
+ ) {
754
+ try {
755
+ // Validate jobId and taskId
756
+ if (!jobId || typeof jobId !== "string" || jobId.trim() === "") {
757
+ sendJson(res, 400, {
758
+ ok: false,
759
+ error: "bad_request",
760
+ message: "jobId is required",
761
+ });
762
+ return;
763
+ }
764
+
765
+ if (!taskId || typeof taskId !== "string" || taskId.trim() === "") {
766
+ sendJson(res, 400, {
767
+ ok: false,
768
+ error: "bad_request",
769
+ message: "taskId is required",
770
+ });
771
+ return;
772
+ }
773
+
774
+ // Resolve job lifecycle
775
+ const lifecycle = await resolveJobLifecycle(dataDir, jobId);
776
+ if (!lifecycle) {
777
+ sendJson(res, 404, {
778
+ ok: false,
779
+ code: "job_not_found",
780
+ message: "Job not found",
781
+ });
782
+ return;
783
+ }
784
+
785
+ // Move job to current directory if it's not already there (same logic as restart)
786
+ let jobDir = getJobDirectoryPath(dataDir, jobId, lifecycle);
787
+
788
+ if (lifecycle !== "current") {
789
+ const sourcePath = getJobDirectoryPath(dataDir, jobId, lifecycle);
790
+ const targetPath = getJobDirectoryPath(dataDir, jobId, "current");
791
+
792
+ // Atomically move job to current directory
793
+ await fs.promises.rename(sourcePath, targetPath);
794
+ jobDir = targetPath;
795
+ }
796
+
797
+ // Read snapshot from tasks-status.json
798
+ const statusPath = path.join(jobDir, "tasks-status.json");
799
+ let snapshot;
800
+ try {
801
+ const content = await fs.promises.readFile(statusPath, "utf8");
802
+ snapshot = JSON.parse(content);
803
+ } catch (error) {
804
+ if (error.code === "ENOENT") {
805
+ sendJson(res, 404, {
806
+ ok: false,
807
+ code: "job_not_found",
808
+ message: "Job status file not found",
809
+ });
810
+ return;
811
+ }
812
+ if (error instanceof SyntaxError) {
813
+ sendJson(res, 500, {
814
+ ok: false,
815
+ code: "internal_error",
816
+ message: "Invalid job status JSON",
817
+ });
818
+ return;
819
+ }
820
+ throw error;
821
+ }
822
+
823
+ // Guard job not running
824
+ if (snapshot.state === "running") {
825
+ sendJson(res, 409, {
826
+ ok: false,
827
+ code: "job_running",
828
+ message: "Job is currently running; start is unavailable",
829
+ });
830
+ return;
831
+ }
832
+
833
+ // Check if any task is currently running
834
+ const hasRunningTask = Object.values(snapshot.tasks || {}).some(
835
+ (task) => task.state === "running"
836
+ );
837
+ if (hasRunningTask) {
838
+ sendJson(res, 409, {
839
+ ok: false,
840
+ code: "job_running",
841
+ message: "Job is currently running; start is unavailable",
842
+ });
843
+ return;
844
+ }
845
+
846
+ // Validate task existence
847
+ if (!snapshot.tasks || !snapshot.tasks[taskId]) {
848
+ sendJson(res, 400, {
849
+ ok: false,
850
+ code: "task_not_found",
851
+ message: "Task not found in job",
852
+ });
853
+ return;
854
+ }
855
+
856
+ // Validate task state is Pending
857
+ if (snapshot.tasks[taskId].state !== "pending") {
858
+ sendJson(res, 400, {
859
+ ok: false,
860
+ code: "task_not_pending",
861
+ message: "Task is not in pending state",
862
+ });
863
+ return;
864
+ }
865
+
866
+ // Read job pipeline config
867
+ const jobPipelinePath = getJobPipelinePath(dataDir, jobId, "current");
868
+ let jobPipeline;
869
+ try {
870
+ const content = await fs.promises.readFile(jobPipelinePath, "utf8");
871
+ jobPipeline = JSON.parse(content);
872
+ } catch (error) {
873
+ sendJson(res, 500, {
874
+ ok: false,
875
+ code: "pipeline_config_not_found",
876
+ message: "Pipeline configuration not found",
877
+ });
878
+ return;
879
+ }
880
+
881
+ // Validate dependencies via validateUpstreamDone
882
+ const depCheck = validateUpstreamDone({
883
+ jobPipelineTasks: jobPipeline.tasks,
884
+ targetTaskId: taskId,
885
+ snapshotTasks: snapshot.tasks,
886
+ });
887
+
888
+ if (!depCheck.ok) {
889
+ if (depCheck.code === "dependencies_not_satisfied") {
890
+ sendJson(res, 409, {
891
+ ok: false,
892
+ code: "dependencies_not_satisfied",
893
+ message: `Dependencies not satisfied for task: ${depCheck.missing.join(", ")}`,
894
+ });
895
+ return;
896
+ }
897
+ // Handle other validation errors
898
+ sendJson(res, 400, {
899
+ ok: false,
900
+ code: depCheck.code,
901
+ message: "Task validation failed",
902
+ });
903
+ return;
904
+ }
905
+
906
+ // Start guard: prevent duplicate starts
907
+ if (isStartInProgress(jobId)) {
908
+ sendJson(res, 409, {
909
+ ok: false,
910
+ code: "job_running",
911
+ message: "Task start is already in progress",
912
+ });
913
+ return;
914
+ }
915
+
916
+ beginStart(jobId);
917
+
918
+ try {
919
+ // Spawn detached runner (mirror restart code)
920
+ const runnerPath = path.join(__dirname, "../../core/pipeline-runner.js");
921
+ const base = process.env.PO_ROOT || dataDir;
922
+ const env = {
923
+ ...process.env,
924
+ PO_ROOT: base,
925
+ PO_DATA_DIR: path.join(base, "pipeline-data"),
926
+ PO_PENDING_DIR: path.join(base, "pipeline-data", "pending"),
927
+ PO_CURRENT_DIR: path.join(base, "pipeline-data", "current"),
928
+ PO_COMPLETE_DIR: path.join(base, "pipeline-data", "complete"),
929
+ PO_START_FROM_TASK: taskId,
930
+ PO_RUN_SINGLE_TASK: "true",
931
+ };
932
+
933
+ const child = spawn(process.execPath, [runnerPath, jobId], {
934
+ env,
935
+ stdio: "ignore",
936
+ detached: true,
937
+ });
938
+
939
+ child.unref();
940
+ } finally {
941
+ // Always end start guard
942
+ endStart(jobId);
943
+ }
944
+
945
+ // Send success response
946
+ sendJson(res, 202, {
947
+ ok: true,
948
+ jobId,
949
+ taskId,
950
+ mode: "single-task-start",
951
+ spawned: true,
952
+ });
953
+ } catch (error) {
954
+ console.error(
955
+ `Error handling POST /api/jobs/${jobId}/tasks/${taskId}/start:`,
956
+ error
957
+ );
958
+
959
+ // Clean up start guard on error
960
+ if (isStartInProgress(jobId)) {
961
+ endStart(jobId);
962
+ }
963
+
964
+ if (error.code === "ENOENT") {
965
+ sendJson(res, 404, {
966
+ ok: false,
967
+ code: "job_not_found",
968
+ message: "Job directory not found",
969
+ });
970
+ } else if (error.code === "spawn failed") {
971
+ sendJson(res, 500, {
972
+ ok: false,
973
+ code: "spawn_failed",
974
+ message: error.message || "Failed to spawn pipeline runner",
975
+ });
976
+ } else if (error.httpStatus === 409) {
977
+ // Handle lifecycle policy errors from pipeline-runner
978
+ sendJson(res, 409, {
979
+ ok: false,
980
+ code: error.error || "unsupported_lifecycle",
981
+ message: error.message || "Operation not allowed by lifecycle policy",
982
+ ...(error.reason && { reason: error.reason }),
983
+ });
984
+ } else {
985
+ sendJson(res, 500, {
986
+ ok: false,
987
+ code: "internal_error",
988
+ message: "Internal server error",
989
+ });
990
+ }
991
+ }
992
+ }
993
+
994
+ // Export restart guard functions for testing
995
+ export { isRestartInProgress, beginRestart, endRestart, resolveJobLifecycle };
996
+
997
+ // Export start guard functions for testing
998
+ export { isStartInProgress, beginStart, endStart };
999
+
1000
+ // Export stop guard functions for testing
1001
+ export { isStopInProgress, beginStop, endStop };