@ryanfw/prompt-orchestration-pipeline 0.9.1 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,6 +5,8 @@ import chokidar from "chokidar";
5
5
  import { spawn as defaultSpawn } from "node:child_process";
6
6
  import { getConfig, getPipelineConfig } from "./config.js";
7
7
  import { createLogger } from "./logger.js";
8
+ import { createTaskFileIO, generateLogName } from "./file-io.js";
9
+ import { LogEvent } from "../config/log-events.js";
8
10
 
9
11
  /**
10
12
  * Resolve canonical pipeline directories for the given data root.
@@ -152,7 +154,68 @@ export async function startOrchestrator(opts) {
152
154
  tasks: {}, // Initialize empty tasks object for pipeline runner
153
155
  };
154
156
  await fs.writeFile(statusPath, JSON.stringify(status, null, 2));
157
+
158
+ // Initialize status from artifacts if any exist
159
+ try {
160
+ const { initializeStatusFromArtifacts } = await import(
161
+ "./status-initializer.js"
162
+ );
163
+ const pipelineConfig = getPipelineConfig(seed?.pipeline || "default");
164
+ const pipelineSnapshot = JSON.parse(
165
+ await fs.readFile(pipelineConfig.pipelineJsonPath, "utf8")
166
+ );
167
+
168
+ const applyArtifacts = await initializeStatusFromArtifacts({
169
+ jobDir: workDir,
170
+ pipeline: pipelineSnapshot,
171
+ });
172
+
173
+ // Apply artifact initialization to the status
174
+ const updatedStatus = applyArtifacts(status);
175
+ await fs.writeFile(statusPath, JSON.stringify(updatedStatus, null, 2));
176
+
177
+ logger.log("Initialized status from upload artifacts", {
178
+ jobId,
179
+ pipeline: seed?.pipeline,
180
+ artifactsCount: updatedStatus.files?.artifacts?.length || 0,
181
+ });
182
+ } catch (artifactError) {
183
+ // Don't fail job startup if artifact initialization fails, just log
184
+ logger.warn("Failed to initialize status from artifacts", {
185
+ jobId,
186
+ error: artifactError.message,
187
+ });
188
+ }
155
189
  }
190
+ // Create fileIO for orchestrator-level logging
191
+ const fileIO = createTaskFileIO({
192
+ workDir,
193
+ taskName: jobId,
194
+ getStage: () => "orchestrator",
195
+ statusPath,
196
+ trackTaskFiles: false,
197
+ });
198
+
199
+ // Write job start log
200
+ await fileIO.writeLog(
201
+ generateLogName(jobId, "orchestrator", LogEvent.START),
202
+ JSON.stringify(
203
+ {
204
+ jobId,
205
+ pipeline: seed?.pipeline,
206
+ timestamp: new Date().toISOString(),
207
+ seedSummary: {
208
+ name: seed?.name,
209
+ pipeline: seed?.pipeline,
210
+ keys: Object.keys(seed || {}),
211
+ },
212
+ },
213
+ null,
214
+ 2
215
+ ),
216
+ { mode: "replace" }
217
+ );
218
+
156
219
  // Spawn runner for this job
157
220
  const child = spawnRunner(
158
221
  logger,
@@ -161,7 +224,8 @@ export async function startOrchestrator(opts) {
161
224
  running,
162
225
  spawn,
163
226
  testMode,
164
- seed
227
+ seed,
228
+ fileIO
165
229
  );
166
230
  // child registered inside spawnRunner
167
231
  return child;
@@ -223,6 +287,12 @@ export async function startOrchestrator(opts) {
223
287
  return { stop };
224
288
  }
225
289
 
290
+ /**
291
+ * @typedef {Object} TaskFileIO
292
+ * @property {(name: string, content: string, options?: { mode?: 'append'|'replace' }) => Promise<string>} writeLog
293
+ * @property {(name: string, content: string, options?: { mode?: 'append'|'replace' }) => string} writeLogSync
294
+ */
295
+
226
296
  /**
227
297
  * Spawn a pipeline runner. In testMode we still call spawn() so tests can assert,
228
298
  * but we resolve immediately and let tests drive the lifecycle (emit 'exit', etc.).
@@ -234,8 +304,18 @@ export async function startOrchestrator(opts) {
234
304
  * @param {typeof defaultSpawn} spawn
235
305
  * @param {boolean} testMode
236
306
  * @param {Object} seed - Seed data containing pipeline information
307
+ * @param {TaskFileIO} fileIO - Task-scoped file I/O interface for writing logs
237
308
  */
238
- function spawnRunner(logger, jobId, dirs, running, spawn, testMode, seed) {
309
+ function spawnRunner(
310
+ logger,
311
+ jobId,
312
+ dirs,
313
+ running,
314
+ spawn,
315
+ testMode,
316
+ seed,
317
+ fileIO
318
+ ) {
239
319
  // Use path relative to this file to avoid process.cwd() issues
240
320
  const orchestratorDir = path.dirname(new URL(import.meta.url).pathname);
241
321
  const runnerPath = path.join(orchestratorDir, "pipeline-runner.js");
@@ -316,11 +396,67 @@ function spawnRunner(logger, jobId, dirs, running, spawn, testMode, seed) {
316
396
 
317
397
  running.set(jobId, child);
318
398
 
319
- child.on("exit", () => {
399
+ child.on("exit", (code, signal) => {
320
400
  running.delete(jobId);
401
+
402
+ // Write job completion log synchronously
403
+ if (fileIO) {
404
+ try {
405
+ fileIO.writeLogSync(
406
+ generateLogName(jobId, "orchestrator", LogEvent.COMPLETE),
407
+ JSON.stringify(
408
+ {
409
+ jobId,
410
+ exitCode: code,
411
+ signal: signal,
412
+ timestamp: new Date().toISOString(),
413
+ completionType: code === 0 ? "success" : "failure",
414
+ },
415
+ null,
416
+ 2
417
+ ),
418
+ { mode: "replace" }
419
+ );
420
+ } catch (error) {
421
+ logger.error("Failed to write job completion log", {
422
+ jobId,
423
+ error: error.message,
424
+ });
425
+ }
426
+ }
321
427
  });
322
- child.on("error", () => {
428
+
429
+ child.on("error", (error) => {
323
430
  running.delete(jobId);
431
+
432
+ // Write job error log synchronously
433
+ if (fileIO) {
434
+ try {
435
+ fileIO.writeLogSync(
436
+ generateLogName(jobId, "orchestrator", LogEvent.ERROR),
437
+ JSON.stringify(
438
+ {
439
+ jobId,
440
+ error: {
441
+ message: error.message,
442
+ name: error.name,
443
+ code: error.code,
444
+ },
445
+ timestamp: new Date().toISOString(),
446
+ completionType: "error",
447
+ },
448
+ null,
449
+ 2
450
+ ),
451
+ { mode: "replace" }
452
+ );
453
+ } catch (logError) {
454
+ logger.error("Failed to write job error log", {
455
+ jobId,
456
+ error: logError.message,
457
+ });
458
+ }
459
+ }
324
460
  });
325
461
 
326
462
  // In test mode: return immediately; in real mode you might await readiness
@@ -7,9 +7,14 @@ import { getPipelineConfig } from "./config.js";
7
7
  import { writeJobStatus } from "./status-writer.js";
8
8
  import { TaskState } from "../config/statuses.js";
9
9
  import { ensureTaskSymlinkBridge } from "./symlink-bridge.js";
10
- import { cleanupTaskSymlinks } from "./symlink-utils.js";
11
- import { createTaskFileIO } from "./file-io.js";
10
+ import {
11
+ cleanupTaskSymlinks,
12
+ validateTaskSymlinks,
13
+ repairTaskSymlinks,
14
+ } from "./symlink-utils.js";
15
+ import { createTaskFileIO, generateLogName } from "./file-io.js";
12
16
  import { createJobLogger } from "./logger.js";
17
+ import { LogEvent, LogFileExtension } from "../config/log-events.js";
13
18
 
14
19
  const ROOT = process.env.PO_ROOT || process.cwd();
15
20
  const DATA_DIR = path.join(ROOT, process.env.PO_DATA_DIR || "pipeline-data");
@@ -132,8 +137,66 @@ for (const taskName of pipeline.tasks) {
132
137
  ? modulePath
133
138
  : path.resolve(path.dirname(TASK_REGISTRY), modulePath);
134
139
 
135
- // Create symlink bridge for deterministic module resolution
140
+ // Validate symlinks before task execution to ensure restart reliability
136
141
  const poRoot = process.env.PO_ROOT || process.cwd();
142
+ const expectedTargets = {
143
+ nodeModules: path.join(path.resolve(poRoot, ".."), "node_modules"),
144
+ taskRoot: path.dirname(absoluteModulePath),
145
+ };
146
+
147
+ const validationResult = await validateTaskSymlinks(
148
+ taskDir,
149
+ expectedTargets
150
+ );
151
+
152
+ if (!validationResult.isValid) {
153
+ logger.warn("Task symlinks validation failed, attempting repair", {
154
+ taskName,
155
+ taskDir,
156
+ errors: validationResult.errors,
157
+ validationDuration: validationResult.duration,
158
+ });
159
+
160
+ const repairResult = await repairTaskSymlinks(
161
+ taskDir,
162
+ poRoot,
163
+ absoluteModulePath
164
+ );
165
+
166
+ if (!repairResult.success) {
167
+ const errorMessage = `Failed to repair task symlinks for ${taskName}: ${repairResult.errors.join(", ")}`;
168
+ logger.error("Task symlink repair failed, aborting execution", {
169
+ taskName,
170
+ taskDir,
171
+ errors: repairResult.errors,
172
+ repairDuration: repairResult.duration,
173
+ });
174
+
175
+ await updateStatus(taskName, {
176
+ state: TaskState.FAILED,
177
+ endedAt: now(),
178
+ error: { message: errorMessage, type: "SymlinkRepairFailed" },
179
+ });
180
+
181
+ process.exitCode = 1;
182
+ process.exit(1);
183
+ }
184
+
185
+ logger.log("Task symlinks repaired successfully", {
186
+ taskName,
187
+ taskDir,
188
+ repairDuration: repairResult.duration,
189
+ relocatedEntry: repairResult.relocatedEntry,
190
+ });
191
+ } else {
192
+ logger.debug("Task symlinks validation passed", {
193
+ taskName,
194
+ taskDir,
195
+ validationDuration: validationResult.duration,
196
+ });
197
+ }
198
+
199
+ // Create symlink bridge for deterministic module resolution
137
200
  const relocatedEntry = await ensureTaskSymlinkBridge({
138
201
  taskDir,
139
202
  poRoot,
@@ -162,7 +225,12 @@ for (const taskName of pipeline.tasks) {
162
225
  // Persist execution-logs.json and failure-details.json on task failure via IO
163
226
  if (result.logs) {
164
227
  await fileIO.writeLog(
165
- "execution-logs.json",
228
+ generateLogName(
229
+ taskName,
230
+ "pipeline",
231
+ LogEvent.EXECUTION_LOGS,
232
+ LogFileExtension.JSON
233
+ ),
166
234
  JSON.stringify(result.logs, null, 2),
167
235
  { mode: "replace" }
168
236
  );
@@ -175,7 +243,12 @@ for (const taskName of pipeline.tasks) {
175
243
  refinementAttempts: result.refinementAttempts || 0,
176
244
  };
177
245
  await fileIO.writeLog(
178
- "failure-details.json",
246
+ generateLogName(
247
+ taskName,
248
+ "pipeline",
249
+ LogEvent.FAILURE_DETAILS,
250
+ LogFileExtension.JSON
251
+ ),
179
252
  JSON.stringify(failureDetails, null, 2),
180
253
  { mode: "replace" }
181
254
  );
@@ -218,7 +291,12 @@ for (const taskName of pipeline.tasks) {
218
291
 
219
292
  if (result.logs) {
220
293
  await fileIO.writeLog(
221
- "execution-logs.json",
294
+ generateLogName(
295
+ taskName,
296
+ "pipeline",
297
+ LogEvent.EXECUTION_LOGS,
298
+ LogFileExtension.JSON
299
+ ),
222
300
  JSON.stringify(result.logs, null, 2),
223
301
  { mode: "replace" }
224
302
  );
@@ -0,0 +1,155 @@
1
+ import fs from "node:fs/promises";
2
+ import path from "node:path";
3
+
4
+ /**
5
+ * Initialize status snapshot from artifacts in the filesystem
6
+ * @param {Object} options - Options object
7
+ * @param {string} options.jobDir - Job directory path
8
+ * @param {Object} options.pipeline - Pipeline configuration object
9
+ * @returns {Promise<Function>} Function that applies artifact initialization to a snapshot
10
+ */
11
+ export async function initializeStatusFromArtifacts({ jobDir, pipeline }) {
12
+ if (!jobDir || typeof jobDir !== "string") {
13
+ throw new Error("jobDir must be a non-empty string");
14
+ }
15
+
16
+ if (!pipeline || typeof pipeline !== "object") {
17
+ throw new Error("pipeline must be an object");
18
+ }
19
+
20
+ const artifactsDir = path.join(jobDir, "files", "artifacts");
21
+ let artifactFilenames = [];
22
+
23
+ try {
24
+ // Read artifacts directory
25
+ const entries = await fs.readdir(artifactsDir, { withFileTypes: true });
26
+
27
+ // Collect filenames for regular files only
28
+ artifactFilenames = entries
29
+ .filter((entry) => entry.isFile())
30
+ .map((entry) => entry.name);
31
+
32
+ console.log("[STATUS_INIT] Found artifacts in directory", {
33
+ artifactsDir,
34
+ artifactCount: artifactFilenames.length,
35
+ artifactNames: artifactFilenames,
36
+ });
37
+ } catch (error) {
38
+ if (error.code === "ENOENT") {
39
+ // Directory doesn't exist, no artifacts to initialize
40
+ console.log(
41
+ "[STATUS_INIT] Artifacts directory does not exist, skipping initialization",
42
+ {
43
+ artifactsDir,
44
+ }
45
+ );
46
+ } else {
47
+ console.error("[STATUS_INIT] Failed to read artifacts directory", {
48
+ artifactsDir,
49
+ error: error.message,
50
+ });
51
+ }
52
+ // Return a no-op function for non-existent or unreadable directory
53
+ return (snapshot) => snapshot;
54
+ }
55
+
56
+ // Determine first task ID from pipeline
57
+ const firstTaskId = Array.isArray(pipeline.tasks) ? pipeline.tasks[0] : null;
58
+ console.log("[STATUS_INIT] Determined first task", {
59
+ firstTaskId,
60
+ hasTasks: Array.isArray(pipeline.tasks),
61
+ taskCount: pipeline.tasks?.length || 0,
62
+ });
63
+
64
+ // Return function that applies the artifact initialization to a snapshot
65
+ return function apply(snapshot) {
66
+ console.log("[STATUS_INIT] Applying artifact initialization to snapshot", {
67
+ existingArtifacts: snapshot.files?.artifacts?.length || 0,
68
+ newArtifacts: artifactFilenames.length,
69
+ firstTaskId,
70
+ });
71
+
72
+ // Ensure files object exists with proper structure
73
+ if (!snapshot.files || typeof snapshot.files !== "object") {
74
+ snapshot.files = { artifacts: [], logs: [], tmp: [] };
75
+ } else {
76
+ // Ensure each files array exists
77
+ for (const type of ["artifacts", "logs", "tmp"]) {
78
+ if (!Array.isArray(snapshot.files[type])) {
79
+ snapshot.files[type] = [];
80
+ }
81
+ }
82
+ }
83
+
84
+ // Add artifact filenames to root level (deduplicate)
85
+ const existingArtifacts = new Set(snapshot.files.artifacts || []);
86
+ for (const filename of artifactFilenames) {
87
+ if (!existingArtifacts.has(filename)) {
88
+ snapshot.files.artifacts.push(filename);
89
+ existingArtifacts.add(filename);
90
+ }
91
+ }
92
+
93
+ // Add artifact filenames to first task (if it exists)
94
+ if (firstTaskId) {
95
+ // Ensure tasks object exists
96
+ if (!snapshot.tasks || typeof snapshot.tasks !== "object") {
97
+ snapshot.tasks = {};
98
+ }
99
+
100
+ // Ensure first task exists
101
+ if (!snapshot.tasks[firstTaskId]) {
102
+ snapshot.tasks[firstTaskId] = {};
103
+ }
104
+
105
+ // Ensure task files object exists with proper structure
106
+ if (
107
+ !snapshot.tasks[firstTaskId].files ||
108
+ typeof snapshot.tasks[firstTaskId].files !== "object"
109
+ ) {
110
+ snapshot.tasks[firstTaskId].files = {
111
+ artifacts: [],
112
+ logs: [],
113
+ tmp: [],
114
+ };
115
+ } else {
116
+ // Ensure each task files array exists
117
+ for (const type of ["artifacts", "logs", "tmp"]) {
118
+ if (!Array.isArray(snapshot.tasks[firstTaskId].files[type])) {
119
+ snapshot.tasks[firstTaskId].files[type] = [];
120
+ }
121
+ }
122
+ }
123
+
124
+ // Add artifact filenames to first task (deduplicate)
125
+ const existingTaskArtifacts = new Set(
126
+ snapshot.tasks[firstTaskId].files.artifacts || []
127
+ );
128
+ for (const filename of artifactFilenames) {
129
+ if (!existingTaskArtifacts.has(filename)) {
130
+ snapshot.tasks[firstTaskId].files.artifacts.push(filename);
131
+ existingTaskArtifacts.add(filename);
132
+ }
133
+ }
134
+
135
+ console.log("[STATUS_INIT] Added artifacts to first task", {
136
+ firstTaskId,
137
+ taskArtifactCount: snapshot.tasks[firstTaskId].files.artifacts.length,
138
+ artifactNames: artifactFilenames,
139
+ });
140
+ }
141
+
142
+ console.log("[STATUS_INIT] Final snapshot state", {
143
+ rootArtifacts: snapshot.files.artifacts.length,
144
+ rootArtifactNames: snapshot.files.artifacts,
145
+ firstTaskArtifacts: firstTaskId
146
+ ? snapshot.tasks[firstTaskId].files.artifacts.length
147
+ : 0,
148
+ firstTaskArtifactNames: firstTaskId
149
+ ? snapshot.tasks[firstTaskId].files.artifacts
150
+ : [],
151
+ });
152
+
153
+ return snapshot;
154
+ };
155
+ }
@@ -281,22 +281,71 @@ export async function updateTaskStatus(jobDir, taskId, taskUpdateFn) {
281
281
  throw new Error("taskUpdateFn must be a function");
282
282
  }
283
283
 
284
- return writeJobStatus(jobDir, (snapshot) => {
285
- // Ensure task exists
286
- if (!snapshot.tasks[taskId]) {
287
- snapshot.tasks[taskId] = {};
288
- }
284
+ const jobId = path.basename(jobDir);
285
+ const logger = createJobLogger("StatusWriter", jobId);
289
286
 
290
- const task = snapshot.tasks[taskId];
287
+ // Get or create the write queue for this job directory
288
+ const prev = writeQueues.get(jobDir) || Promise.resolve();
289
+ let resultSnapshot;
291
290
 
292
- // Apply task updates
293
- const result = taskUpdateFn(task);
294
- if (result !== undefined) {
295
- snapshot.tasks[taskId] = result;
296
- }
291
+ const next = prev
292
+ .then(async () => {
293
+ logger.group("Task Status Update Operation");
294
+ logger.log(`Updating task ${taskId} for job: ${jobId}`);
297
295
 
298
- return snapshot;
299
- });
296
+ const statusPath = path.join(jobDir, "tasks-status.json");
297
+
298
+ // Read existing status or create default
299
+ const current = await readStatusFile(statusPath, jobId);
300
+ const validated = validateStatusSnapshot(current);
301
+
302
+ // Ensure task exists
303
+ if (!validated.tasks[taskId]) {
304
+ validated.tasks[taskId] = {};
305
+ }
306
+
307
+ const task = validated.tasks[taskId];
308
+
309
+ // Apply task updates
310
+ const result = taskUpdateFn(task);
311
+ if (result !== undefined) {
312
+ validated.tasks[taskId] = result;
313
+ }
314
+
315
+ validated.lastUpdated = new Date().toISOString();
316
+
317
+ // Atomic write
318
+ await atomicWrite(statusPath, validated);
319
+ logger.log("Task status file written successfully");
320
+
321
+ // Emit task:updated SSE event after successful write
322
+ try {
323
+ const eventData = {
324
+ jobId,
325
+ taskId,
326
+ task: validated.tasks[taskId],
327
+ };
328
+ await logger.sse("task:updated", eventData);
329
+ logger.log("task:updated SSE event broadcasted successfully");
330
+ } catch (error) {
331
+ // Don't fail the write if SSE emission fails
332
+ logger.error("Failed to emit task:updated SSE event:", error);
333
+ }
334
+
335
+ logger.groupEnd();
336
+ resultSnapshot = validated;
337
+ })
338
+ .catch((e) => {
339
+ throw e;
340
+ });
341
+
342
+ // Store the promise chain and set up cleanup
343
+ writeQueues.set(
344
+ jobDir,
345
+ next.finally(() => {})
346
+ );
347
+
348
+ return next.then(() => resultSnapshot);
300
349
  }
301
350
 
302
351
  /**
@@ -435,3 +484,92 @@ export async function resetJobToCleanSlate(
435
484
  return snapshot;
436
485
  });
437
486
  }
487
+
488
+ /**
489
+ * Consolidated path jail security validation with generic error messages
490
+ * @param {string} filename - Filename to validate
491
+ * @returns {Object|null} Validation result or null if valid
492
+ */
493
+ function validateFilePath(filename) {
494
+ // Check for path traversal patterns
495
+ if (filename.includes("..")) {
496
+ console.error("Path security: path traversal detected", { filename });
497
+ return {
498
+ allowed: false,
499
+ message: "Path validation failed",
500
+ };
501
+ }
502
+
503
+ // Check for absolute paths (POSIX, Windows, backslashes, ~)
504
+ if (
505
+ path.isAbsolute(filename) ||
506
+ /^[a-zA-Z]:/.test(filename) ||
507
+ filename.includes("\\") ||
508
+ filename.startsWith("~")
509
+ ) {
510
+ console.error("Path security: absolute path detected", { filename });
511
+ return {
512
+ allowed: false,
513
+ message: "Path validation failed",
514
+ };
515
+ }
516
+
517
+ // Check for empty filename
518
+ if (!filename || filename.trim() === "") {
519
+ console.error("Path security: empty filename detected");
520
+ return {
521
+ allowed: false,
522
+ message: "Path validation failed",
523
+ };
524
+ }
525
+
526
+ // Path is valid
527
+ return null;
528
+ }
529
+
530
+ /**
531
+ * Initialize job-level artifact index and copy artifacts to job directory
532
+ * @param {string} jobDir - Job directory path
533
+ * @param {Array} uploadArtifacts - Array of {filename, content} objects
534
+ * @returns {Promise<void>}
535
+ */
536
+ export async function initializeJobArtifacts(jobDir, uploadArtifacts = []) {
537
+ if (!jobDir || typeof jobDir !== "string") {
538
+ throw new Error("jobDir must be a non-empty string");
539
+ }
540
+
541
+ if (!Array.isArray(uploadArtifacts)) {
542
+ throw new Error("uploadArtifacts must be an array");
543
+ }
544
+
545
+ if (uploadArtifacts.length === 0) {
546
+ return;
547
+ }
548
+
549
+ const jobFilesDir = path.join(jobDir, "files");
550
+ const jobArtifactsDir = path.join(jobFilesDir, "artifacts");
551
+
552
+ await fs.mkdir(jobFilesDir, { recursive: true });
553
+ await fs.mkdir(jobArtifactsDir, { recursive: true });
554
+
555
+ for (const artifact of uploadArtifacts) {
556
+ const { filename, content } = artifact || {};
557
+
558
+ if (!filename || typeof filename !== "string") {
559
+ continue; // Skip invalid entries rather than throwing
560
+ }
561
+
562
+ // Validate filename using the consolidated function
563
+ const validation = validateFilePath(filename);
564
+ if (validation) {
565
+ console.error("Path security: skipping invalid artifact", {
566
+ filename,
567
+ reason: validation.message,
568
+ });
569
+ continue; // Skip invalid filenames rather than throwing
570
+ }
571
+
572
+ const artifactPath = path.join(jobArtifactsDir, filename);
573
+ await fs.writeFile(artifactPath, content);
574
+ }
575
+ }