@ryanfw/prompt-orchestration-pipeline 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,335 @@
1
+ // ESM
2
+ import fs from "node:fs/promises";
3
+ import path from "node:path";
4
+ import crypto from "node:crypto";
5
+ import chokidar from "chokidar";
6
+ import { spawn } from "node:child_process";
7
+ import url from "node:url";
8
+ import { validateSeed, formatValidationErrors } from "./validation.js";
9
+ import { getConfig } from "./config.js";
10
+ import { withRetry } from "./retry.js";
11
+
12
+ export class Orchestrator {
13
+ constructor({ paths, pipelineDefinition }) {
14
+ this.paths = paths;
15
+ this.pipelineDefinition = pipelineDefinition;
16
+ this.runningProcesses = new Map();
17
+ this.watcher = null;
18
+ }
19
+
20
+ async start() {
21
+ await fs.mkdir(this.paths.pending, { recursive: true });
22
+ await fs.mkdir(this.paths.current, { recursive: true });
23
+ await fs.mkdir(this.paths.complete, { recursive: true });
24
+
25
+ for (const name of await this.#listDirs(this.paths.current)) {
26
+ this.#ensureRunner(name);
27
+ }
28
+
29
+ const config = getConfig();
30
+ this.watcher = chokidar
31
+ .watch(path.join(this.paths.pending, "*-seed.json"), {
32
+ awaitWriteFinish: {
33
+ stabilityThreshold: config.orchestrator.watchStabilityThreshold,
34
+ pollInterval: config.orchestrator.watchPollInterval,
35
+ },
36
+ })
37
+ .on("add", (p) => this.#onSeed(p));
38
+
39
+ return this;
40
+ }
41
+
42
+ async stop() {
43
+ if (this.watcher) {
44
+ await this.watcher.close();
45
+ this.watcher = null;
46
+ }
47
+
48
+ for (const [name, info] of this.runningProcesses) {
49
+ info.process.kill("SIGTERM");
50
+ }
51
+
52
+ // Skip the shutdown timeout in test environment
53
+ if (process.env.NODE_ENV !== "test") {
54
+ const config = getConfig();
55
+ await new Promise((r) =>
56
+ setTimeout(r, config.orchestrator.shutdownTimeout)
57
+ );
58
+ }
59
+
60
+ for (const [name, info] of this.runningProcesses) {
61
+ if (!info.process.killed) info.process.kill("SIGKILL");
62
+ }
63
+
64
+ this.runningProcesses.clear();
65
+ }
66
+
67
+ async #onSeed(seedPath) {
68
+ const base = path.basename(seedPath);
69
+ const name = base.replace(/-seed\.json$/, "");
70
+ const workDir = path.join(this.paths.current, name);
71
+ const lockFile = path.join(this.paths.current, `${name}.lock`);
72
+
73
+ try {
74
+ await fs.writeFile(lockFile, process.pid.toString(), { flag: "wx" });
75
+ } catch (err) {
76
+ if (err.code === "EEXIST") return;
77
+ throw err;
78
+ }
79
+
80
+ try {
81
+ try {
82
+ await fs.mkdir(workDir, { recursive: false });
83
+ } catch (err) {
84
+ if (err.code === "EEXIST") return;
85
+ throw err;
86
+ }
87
+
88
+ const seed = JSON.parse(await fs.readFile(seedPath, "utf8"));
89
+
90
+ // Validate seed file structure
91
+ const validation = validateSeed(seed);
92
+ if (!validation.valid) {
93
+ const errorMsg = formatValidationErrors(validation.errors);
94
+ console.error(`Invalid seed file ${base}:\n${errorMsg}`);
95
+ // Move invalid seed to a rejected directory for inspection
96
+ const rejectedDir = path.join(
97
+ path.dirname(this.paths.pending),
98
+ "rejected"
99
+ );
100
+ await fs.mkdir(rejectedDir, { recursive: true });
101
+ const rejectedPath = path.join(rejectedDir, base);
102
+ await fs.rename(seedPath, rejectedPath);
103
+ return;
104
+ }
105
+
106
+ const pipelineId = this.#makeId();
107
+
108
+ await this.#atomicWrite(
109
+ path.join(workDir, "seed.json"),
110
+ JSON.stringify(seed, null, 2)
111
+ );
112
+ await this.#atomicWrite(
113
+ path.join(workDir, "tasks-status.json"),
114
+ JSON.stringify(
115
+ {
116
+ pipelineId,
117
+ name,
118
+ current: null,
119
+ createdAt: new Date().toISOString(),
120
+ tasks: {},
121
+ },
122
+ null,
123
+ 2
124
+ )
125
+ );
126
+
127
+ await fs.mkdir(path.join(workDir, "tasks"), { recursive: true });
128
+
129
+ // Move the seed file to a 'processed' directory after successful processing
130
+ const processedDir = path.join(
131
+ path.dirname(this.paths.pending),
132
+ "processed"
133
+ );
134
+ await fs.mkdir(processedDir, { recursive: true });
135
+ const processedPath = path.join(processedDir, base);
136
+ await fs.rename(seedPath, processedPath);
137
+ } finally {
138
+ try {
139
+ await fs.unlink(lockFile);
140
+ } catch {}
141
+ }
142
+
143
+ // Start runner after all file operations are complete
144
+ this.#ensureRunner(name);
145
+ }
146
+
147
+ #ensureRunner(name) {
148
+ if (this.runningProcesses.has(name)) return;
149
+
150
+ const config = getConfig();
151
+
152
+ // Wrap process spawn in retry logic (fire-and-forget)
153
+ // This is intentionally not awaited - we want to start runners asynchronously
154
+ // and let them run in the background. Failures are handled via dead letter queue.
155
+ withRetry(() => this.#spawnRunner(name), {
156
+ maxAttempts: config.orchestrator.processSpawnRetries,
157
+ initialDelay: config.orchestrator.processSpawnRetryDelay,
158
+ onRetry: ({ attempt, delay, error }) => {
159
+ console.warn(
160
+ `Failed to start pipeline ${name} (attempt ${attempt}): ${error.message}. Retrying in ${delay}ms...`
161
+ );
162
+ },
163
+ shouldRetry: (error) => {
164
+ // Don't retry if the error is due to missing files or invalid config
165
+ const nonRetryableCodes = ["ENOENT", "EACCES", "MODULE_NOT_FOUND"];
166
+ const nonRetryableMessages = ["Invalid pipeline"];
167
+ if (error.code && nonRetryableCodes.includes(error.code)) {
168
+ return false;
169
+ }
170
+ if (error.message && nonRetryableMessages.includes(error.message)) {
171
+ return false;
172
+ }
173
+ return true;
174
+ },
175
+ }).catch((error) => {
176
+ console.error(
177
+ `Failed to start pipeline ${name} after ${config.orchestrator.processSpawnRetries} attempts:`,
178
+ error
179
+ );
180
+ // Move to dead letter queue
181
+ this.#moveToDeadLetter(name, error).catch((dlqError) => {
182
+ console.error(`Failed to move ${name} to dead letter queue:`, dlqError);
183
+ });
184
+ });
185
+ }
186
+
187
+ #spawnRunner(name) {
188
+ return new Promise((resolve, reject) => {
189
+ const __dirname = path.dirname(url.fileURLToPath(import.meta.url));
190
+ const runnerPath = path.join(__dirname, "pipeline-runner.js");
191
+
192
+ const env = {
193
+ ...process.env,
194
+ PO_ROOT: process.cwd(),
195
+ PO_DATA_DIR: path.relative(
196
+ process.cwd(),
197
+ path.dirname(this.paths.pending)
198
+ ),
199
+ PO_CURRENT_DIR: this.paths.current,
200
+ PO_COMPLETE_DIR: this.paths.complete,
201
+ PO_CONFIG_DIR: path.join(process.cwd(), "pipeline-config"),
202
+ PO_PIPELINE_PATH:
203
+ this.pipelineDefinition?.__path ||
204
+ path.join(process.cwd(), "pipeline-config", "pipeline.json"),
205
+ PO_TASK_REGISTRY: path.join(
206
+ process.cwd(),
207
+ "pipeline-config",
208
+ "tasks/index.js"
209
+ ),
210
+ };
211
+
212
+ const child = spawn(process.execPath, [runnerPath, name], {
213
+ stdio: ["ignore", "inherit", "inherit"],
214
+ env,
215
+ cwd: process.cwd(),
216
+ });
217
+
218
+ // Track if process started successfully
219
+ let started = false;
220
+
221
+ // Consider spawn successful after a short delay
222
+ const startupTimeout = setTimeout(() => {
223
+ started = true;
224
+ resolve();
225
+ }, 100);
226
+
227
+ this.runningProcesses.set(name, {
228
+ process: child,
229
+ startedAt: new Date().toISOString(),
230
+ name,
231
+ });
232
+
233
+ child.on("exit", (code, signal) => {
234
+ clearTimeout(startupTimeout);
235
+ this.runningProcesses.delete(name);
236
+ if (code !== 0) {
237
+ console.error(
238
+ `Pipeline ${name} exited with code ${code}, signal ${signal}`
239
+ );
240
+ } else {
241
+ console.log(`Pipeline ${name} completed successfully`);
242
+ }
243
+ });
244
+
245
+ child.on("error", (err) => {
246
+ clearTimeout(startupTimeout);
247
+ this.runningProcesses.delete(name);
248
+ if (!started) {
249
+ reject(err);
250
+ } else {
251
+ console.error(`Pipeline ${name} encountered error:`, err);
252
+ }
253
+ });
254
+ });
255
+ }
256
+
257
+ async #moveToDeadLetter(name, error) {
258
+ const workDir = path.join(this.paths.current, name);
259
+ const deadLetterDir = path.join(
260
+ path.dirname(this.paths.pending),
261
+ "dead-letter"
262
+ );
263
+ await fs.mkdir(deadLetterDir, { recursive: true });
264
+
265
+ const errorLog = {
266
+ name,
267
+ error: {
268
+ message: error.message,
269
+ stack: error.stack,
270
+ },
271
+ timestamp: new Date().toISOString(),
272
+ attempts: getConfig().orchestrator.processSpawnRetries,
273
+ };
274
+
275
+ await this.#atomicWrite(
276
+ path.join(deadLetterDir, `${name}-error.json`),
277
+ JSON.stringify(errorLog, null, 2)
278
+ );
279
+
280
+ // Move the work directory to dead letter
281
+ const deadLetterWorkDir = path.join(deadLetterDir, name);
282
+ try {
283
+ await fs.rename(workDir, deadLetterWorkDir);
284
+ } catch (err) {
285
+ // If rename fails, try to copy
286
+ console.warn(`Could not move ${name} to dead letter, attempting copy`);
287
+ // If rename fails, try to copy
288
+ console.warn(`Could not move ${name} to dead letter, attempting copy`);
289
+ try {
290
+ await this.#copyDirRecursive(workDir, deadLetterWorkDir);
291
+ await fs.rm(workDir, { recursive: true, force: true });
292
+ } catch (copyErr) {
293
+ console.error(`Failed to copy ${name} to dead letter:`, copyErr);
294
+ }
295
+ }
296
+ }
297
+
298
+ async #copyDirRecursive(src, dest) {
299
+ await fs.mkdir(dest, { recursive: true });
300
+ const entries = await fs.readdir(src, { withFileTypes: true });
301
+ for (const entry of entries) {
302
+ const srcPath = path.join(src, entry.name);
303
+ const destPath = path.join(dest, entry.name);
304
+ if (entry.isDirectory()) {
305
+ await this.#copyDirRecursive(srcPath, destPath);
306
+ } else if (entry.isFile()) {
307
+ await fs.copyFile(srcPath, destPath);
308
+ }
309
+ }
310
+ }
311
+ async #listDirs(dir) {
312
+ try {
313
+ const entries = await fs.readdir(dir, { withFileTypes: true });
314
+ return entries.filter((e) => e.isDirectory()).map((e) => e.name);
315
+ } catch (err) {
316
+ if (err.code === "ENOENT") return [];
317
+ throw err;
318
+ }
319
+ }
320
+
321
+ #makeId() {
322
+ return (
323
+ "pl-" +
324
+ new Date().toISOString().replaceAll(/[:.]/g, "-") +
325
+ "-" +
326
+ crypto.randomBytes(3).toString("hex")
327
+ );
328
+ }
329
+
330
+ async #atomicWrite(file, data) {
331
+ const tmp = file + ".tmp";
332
+ await fs.writeFile(tmp, data);
333
+ await fs.rename(tmp, file);
334
+ }
335
+ }
@@ -0,0 +1,182 @@
1
+ import fs from "node:fs/promises";
2
+ import path from "node:path";
3
+ import { pathToFileURL } from "node:url";
4
+ import { runPipeline } from "./task-runner.js";
5
+
6
+ const ROOT = process.env.PO_ROOT || process.cwd();
7
+ const DATA_DIR = path.join(ROOT, process.env.PO_DATA_DIR || "pipeline-data");
8
+ const CURRENT_DIR =
9
+ process.env.PO_CURRENT_DIR || path.join(DATA_DIR, "current");
10
+ const COMPLETE_DIR =
11
+ process.env.PO_COMPLETE_DIR || path.join(DATA_DIR, "complete");
12
+
13
+ const CONFIG_DIR =
14
+ process.env.PO_CONFIG_DIR || path.join(ROOT, "pipeline-config");
15
+ const TASK_REGISTRY =
16
+ process.env.PO_TASK_REGISTRY || path.join(CONFIG_DIR, "tasks/index.js");
17
+ const PIPELINE_DEF_PATH =
18
+ process.env.PO_PIPELINE_PATH || path.join(CONFIG_DIR, "pipeline.json");
19
+
20
+ const name = process.argv[2];
21
+ if (!name) throw new Error("runner requires pipeline name");
22
+
23
+ const workDir = path.join(CURRENT_DIR, name);
24
+ const tasksStatusPath = path.join(workDir, "tasks-status.json");
25
+
26
+ const pipeline = JSON.parse(await fs.readFile(PIPELINE_DEF_PATH, "utf8"));
27
+ // Add cache busting to force task registry reload
28
+ const taskRegistryUrl = `${pathToFileURL(TASK_REGISTRY).href}?t=${Date.now()}`;
29
+ const tasks = (await import(taskRegistryUrl)).default;
30
+
31
+ const status = JSON.parse(await fs.readFile(tasksStatusPath, "utf8"));
32
+ const seed = JSON.parse(
33
+ await fs.readFile(path.join(workDir, "seed.json"), "utf8")
34
+ );
35
+
36
+ let pipelineArtifacts = {};
37
+
38
+ for (const taskName of pipeline.tasks) {
39
+ if (status.tasks[taskName]?.state === "done") {
40
+ try {
41
+ const outputPath = path.join(workDir, "tasks", taskName, "output.json");
42
+ const output = JSON.parse(await fs.readFile(outputPath, "utf8"));
43
+ pipelineArtifacts[taskName] = output;
44
+ } catch {}
45
+ continue;
46
+ }
47
+
48
+ await updateStatus(taskName, {
49
+ state: "running",
50
+ startedAt: now(),
51
+ attempts: (status.tasks[taskName]?.attempts || 0) + 1,
52
+ });
53
+
54
+ const taskDir = path.join(workDir, "tasks", taskName);
55
+ await fs.mkdir(taskDir, { recursive: true });
56
+ await atomicWrite(
57
+ path.join(taskDir, "letter.json"),
58
+ JSON.stringify({ task: taskName, at: now() }, null, 2)
59
+ );
60
+
61
+ try {
62
+ const ctx = {
63
+ workDir,
64
+ taskDir,
65
+ seed,
66
+ artifacts: pipelineArtifacts,
67
+ taskName,
68
+ taskConfig: pipeline.taskConfig?.[taskName] || {},
69
+ };
70
+ const modulePath = tasks[taskName];
71
+ if (!modulePath) throw new Error(`Task not registered: ${taskName}`);
72
+
73
+ // Resolve relative paths from task registry to absolute paths
74
+ const absoluteModulePath = path.isAbsolute(modulePath)
75
+ ? modulePath
76
+ : path.resolve(path.dirname(TASK_REGISTRY), modulePath);
77
+
78
+ const result = await runPipeline(absoluteModulePath, ctx);
79
+
80
+ if (!result.ok) {
81
+ throw new Error(
82
+ `${taskName} failed after ${result.refinementAttempts || 0} attempts: ${result.error?.message || "unknown"}`
83
+ );
84
+ }
85
+
86
+ if (result.context?.output) {
87
+ await atomicWrite(
88
+ path.join(taskDir, "output.json"),
89
+ JSON.stringify(result.context.output, null, 2)
90
+ );
91
+ pipelineArtifacts[taskName] = result.context.output;
92
+ }
93
+
94
+ if (result.logs) {
95
+ await atomicWrite(
96
+ path.join(taskDir, "execution-logs.json"),
97
+ JSON.stringify(result.logs, null, 2)
98
+ );
99
+ }
100
+
101
+ const artifacts = await getArtifacts(taskDir);
102
+ await updateStatus(taskName, {
103
+ state: "done",
104
+ endedAt: now(),
105
+ artifacts,
106
+ executionTime:
107
+ result.logs?.reduce((total, log) => total + (log.ms || 0), 0) || 0,
108
+ refinementAttempts: result.refinementAttempts || 0,
109
+ });
110
+ } catch (err) {
111
+ await updateStatus(taskName, {
112
+ state: "failed",
113
+ endedAt: now(),
114
+ error: normalizeError(err),
115
+ });
116
+ process.exitCode = 1;
117
+ process.exit(1);
118
+ }
119
+ }
120
+
121
+ await fs.mkdir(COMPLETE_DIR, { recursive: true });
122
+ const dest = path.join(COMPLETE_DIR, name);
123
+ await fs.rename(workDir, dest);
124
+ await appendLine(
125
+ path.join(COMPLETE_DIR, "runs.jsonl"),
126
+ JSON.stringify({
127
+ name,
128
+ pipelineId: status.pipelineId,
129
+ finishedAt: now(),
130
+ tasks: Object.keys(status.tasks),
131
+ totalExecutionTime: Object.values(status.tasks).reduce(
132
+ (total, t) => total + (t.executionTime || 0),
133
+ 0
134
+ ),
135
+ totalRefinementAttempts: Object.values(status.tasks).reduce(
136
+ (total, t) => total + (t.refinementAttempts || 0),
137
+ 0
138
+ ),
139
+ finalArtifacts: Object.keys(pipelineArtifacts),
140
+ }) + "\n"
141
+ );
142
+
143
+ function now() {
144
+ return new Date().toISOString();
145
+ }
146
+
147
+ async function updateStatus(taskName, patch) {
148
+ const current = JSON.parse(await fs.readFile(tasksStatusPath, "utf8"));
149
+ current.current = taskName;
150
+ current.tasks[taskName] = { ...(current.tasks[taskName] || {}), ...patch };
151
+ await atomicWrite(tasksStatusPath, JSON.stringify(current, null, 2));
152
+ Object.assign(status, current);
153
+ }
154
+
155
+ async function appendLine(file, line) {
156
+ await fs.mkdir(path.dirname(file), { recursive: true });
157
+ await fs.appendFile(file, line);
158
+ }
159
+
160
+ async function atomicWrite(file, data) {
161
+ const tmp = file + ".tmp";
162
+ await fs.writeFile(tmp, data);
163
+ await fs.rename(tmp, file);
164
+ }
165
+
166
+ function normalizeError(e) {
167
+ if (e instanceof Error)
168
+ return { name: e.name, message: e.message, stack: e.stack };
169
+ return { message: String(e) };
170
+ }
171
+
172
+ async function getArtifacts(dir) {
173
+ const potentialFiles = ["output.json", "letter.json", "execution-logs.json"];
174
+ const artifacts = [];
175
+ for (const file of potentialFiles) {
176
+ try {
177
+ await fs.stat(path.join(dir, file));
178
+ artifacts.push(file);
179
+ } catch {}
180
+ }
181
+ return artifacts;
182
+ }
@@ -0,0 +1,83 @@
1
+ /**
2
+ * Retry utilities for handling transient failures
3
+ * Implements exponential backoff with configurable options
4
+ */
5
+
6
+ /**
7
+ * Sleep for specified milliseconds
8
+ * @param {number} ms - Milliseconds to sleep
9
+ * @returns {Promise<void>}
10
+ */
11
+ function sleep(ms) {
12
+ return new Promise((resolve) => setTimeout(resolve, ms));
13
+ }
14
+
15
+ /**
16
+ * Execute a function with retry logic and exponential backoff
17
+ * @param {Function} fn - Async function to execute
18
+ * @param {object} options - Retry options
19
+ * @param {number} options.maxAttempts - Maximum number of attempts (default: 3)
20
+ * @param {number} options.initialDelay - Initial delay in ms (default: 1000)
21
+ * @param {number} options.maxDelay - Maximum delay in ms (default: 10000)
22
+ * @param {number} options.backoffMultiplier - Backoff multiplier (default: 2)
23
+ * @param {Function} options.onRetry - Callback on retry (default: noop)
24
+ * @param {Function} options.shouldRetry - Function to determine if error should be retried (default: always retry)
25
+ * @returns {Promise<any>} Result of successful function execution
26
+ * @throws {Error} Last error if all attempts fail
27
+ */
28
+ export async function withRetry(fn, options = {}) {
29
+ const {
30
+ maxAttempts = 3,
31
+ initialDelay = 1000,
32
+ maxDelay = 10000,
33
+ backoffMultiplier = 2,
34
+ onRetry = () => {},
35
+ shouldRetry = () => true,
36
+ } = options;
37
+
38
+ let lastError;
39
+
40
+ for (let attempt = 1; attempt <= maxAttempts; attempt++) {
41
+ try {
42
+ return await fn();
43
+ } catch (error) {
44
+ lastError = error;
45
+
46
+ // Check if we should retry this error
47
+ if (!shouldRetry(error)) {
48
+ throw error;
49
+ }
50
+
51
+ // If this was the last attempt, throw the error
52
+ if (attempt === maxAttempts) {
53
+ break;
54
+ }
55
+
56
+ // Calculate delay with exponential backoff
57
+ const delay = Math.min(
58
+ initialDelay * Math.pow(backoffMultiplier, attempt - 1),
59
+ maxDelay
60
+ );
61
+
62
+ // Call retry callback
63
+ onRetry({ attempt, delay, error, maxAttempts });
64
+
65
+ // Wait before retrying
66
+ await sleep(delay);
67
+ }
68
+ }
69
+
70
+ // All attempts failed
71
+ throw lastError;
72
+ }
73
+
74
+ /**
75
+ * Create a retry wrapper with preset options
76
+ * @param {object} defaultOptions - Default retry options
77
+ * @returns {Function} Retry function with preset options
78
+ */
79
+ export function createRetryWrapper(defaultOptions = {}) {
80
+ return (fn, options = {}) => {
81
+ return withRetry(fn, { ...defaultOptions, ...options });
82
+ };
83
+ }