@ryanfw/prompt-orchestration-pipeline 0.15.1 → 0.16.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,277 @@
1
+ /**
2
+ * Batch Runner - Concurrent job processing with SQLite state management
3
+ */
4
+
5
+ import crypto from "node:crypto";
6
+ import pLimit from "p-limit";
7
+
8
+ /**
9
+ * Creates the batch_jobs table and index if they don't exist
10
+ * @param {import('better-sqlite3').Database} db - SQLite database instance
11
+ */
12
+ export function ensureBatchSchema(db) {
13
+ db.exec(`
14
+ CREATE TABLE IF NOT EXISTS batch_jobs (
15
+ id TEXT PRIMARY KEY,
16
+ batch_id TEXT NOT NULL,
17
+ status TEXT NOT NULL DEFAULT 'pending',
18
+ input TEXT NOT NULL,
19
+ output TEXT,
20
+ error TEXT,
21
+ retry_count INTEGER NOT NULL DEFAULT 0,
22
+ started_at TEXT,
23
+ completed_at TEXT
24
+ );
25
+ CREATE INDEX IF NOT EXISTS idx_batch_jobs_batch_status ON batch_jobs(batch_id, status);
26
+ `);
27
+ }
28
+
29
+ /**
30
+ * Inserts jobs into the batch_jobs table
31
+ * @param {import('better-sqlite3').Database} db - SQLite database instance
32
+ * @param {string} batchId - Unique batch identifier
33
+ * @param {Array<Object>} jobs - Array of job objects
34
+ * @returns {string[]} Array of job IDs inserted
35
+ */
36
+ export function insertJobs(db, batchId, jobs) {
37
+ const insertStmt = db.prepare(
38
+ `INSERT OR IGNORE INTO batch_jobs (id, batch_id, status, input) VALUES (?, ?, 'pending', ?)`
39
+ );
40
+ const selectStatusStmt = db.prepare(
41
+ `SELECT status FROM batch_jobs WHERE id = ? AND batch_id = ?`
42
+ );
43
+
44
+ const insertMany = db.transaction((jobList) => {
45
+ const ids = [];
46
+ for (const job of jobList) {
47
+ const id = job.id ?? crypto.randomUUID();
48
+ const input = JSON.stringify(job);
49
+ const result = insertStmt.run(id, batchId, input);
50
+
51
+ // If no row was inserted, the job already exists. Validate its state.
52
+ if (result.changes === 0) {
53
+ const existing = selectStatusStmt.get(id, batchId);
54
+ if (existing && (existing.status === "complete" || existing.status === "permanently_failed")) {
55
+ throw new Error(
56
+ `Cannot re-insert job "${id}" for batch "${batchId}": existing job is in terminal state "${existing.status}".`
57
+ );
58
+ }
59
+ }
60
+ ids.push(id);
61
+ }
62
+ return ids;
63
+ });
64
+
65
+ return insertMany(jobs);
66
+ }
67
+
68
+ /**
69
+ * Marks a job as processing
70
+ * @param {import('better-sqlite3').Database} db - SQLite database instance
71
+ * @param {string} jobId - Job identifier
72
+ */
73
+ export function markProcessing(db, jobId) {
74
+ const stmt = db.prepare(
75
+ `UPDATE batch_jobs SET status = 'processing', started_at = datetime('now') WHERE id = ?`
76
+ );
77
+ stmt.run(jobId);
78
+ }
79
+
80
+ /**
81
+ * Marks a job as complete with output
82
+ * @param {import('better-sqlite3').Database} db - SQLite database instance
83
+ * @param {string} jobId - Job identifier
84
+ * @param {*} output - Job output (will be JSON serialized)
85
+ */
86
+ export function markComplete(db, jobId, output) {
87
+ const stmt = db.prepare(
88
+ `UPDATE batch_jobs SET status = 'complete', output = ?, completed_at = datetime('now') WHERE id = ?`
89
+ );
90
+ stmt.run(JSON.stringify(output), jobId);
91
+ }
92
+
93
+ /**
94
+ * Marks a job as failed and increments retry count
95
+ * @param {import('better-sqlite3').Database} db - SQLite database instance
96
+ * @param {string} jobId - Job identifier
97
+ * @param {string} error - Error message
98
+ */
99
+ export function markFailed(db, jobId, error) {
100
+ const stmt = db.prepare(
101
+ `UPDATE batch_jobs SET status = 'failed', error = ?, retry_count = retry_count + 1 WHERE id = ?`
102
+ );
103
+ stmt.run(error, jobId);
104
+ }
105
+
106
+ /**
107
+ * Gets pending and failed jobs that are under the retry limit
108
+ * @param {import('better-sqlite3').Database} db - SQLite database instance
109
+ * @param {string} batchId - Unique batch identifier
110
+ * @param {number} maxRetries - Maximum retry attempts
111
+ * @returns {Array<{id: string, input: Object, retryCount: number}>} Array of pending jobs
112
+ */
113
+ export function getPendingJobs(db, batchId, maxRetries) {
114
+ const stmt = db.prepare(
115
+ `SELECT id, input, retry_count FROM batch_jobs WHERE batch_id = ? AND status IN ('pending', 'failed') AND retry_count < ? ORDER BY id`
116
+ );
117
+ const rows = stmt.all(batchId, maxRetries);
118
+ return rows.map((row) => ({
119
+ id: row.id,
120
+ input: JSON.parse(row.input),
121
+ retryCount: row.retry_count,
122
+ }));
123
+ }
124
+
125
+ /**
126
+ * Recovers jobs stuck in 'processing' state (from process crash)
127
+ * @param {import('better-sqlite3').Database} db - SQLite database instance
128
+ * @param {string} batchId - Unique batch identifier
129
+ * @returns {number} Number of jobs recovered
130
+ */
131
+ export function recoverStaleJobs(db, batchId) {
132
+ const stmt = db.prepare(
133
+ `UPDATE batch_jobs SET status = 'pending' WHERE batch_id = ? AND status = 'processing'`
134
+ );
135
+ const result = stmt.run(batchId);
136
+ return result.changes;
137
+ }
138
+
139
+ /**
140
+ * Gets completed jobs for a batch
141
+ * @param {import('better-sqlite3').Database} db - SQLite database instance
142
+ * @param {string} batchId - Unique batch identifier
143
+ * @returns {Array<{id: string, input: Object, output: *}>} Array of completed jobs
144
+ */
145
+ function getCompletedJobs(db, batchId) {
146
+ const stmt = db.prepare(
147
+ `SELECT id, input, output FROM batch_jobs WHERE batch_id = ? AND status = 'complete'`
148
+ );
149
+ const rows = stmt.all(batchId);
150
+ return rows.map((row) => ({
151
+ id: row.id,
152
+ input: JSON.parse(row.input),
153
+ output: JSON.parse(row.output),
154
+ }));
155
+ }
156
+
157
+ /**
158
+ * Gets failed jobs for a batch (those that exhausted retries)
159
+ * @param {import('better-sqlite3').Database} db - SQLite database instance
160
+ * @param {string} batchId - Unique batch identifier
161
+ * @param {number} maxRetries - Maximum retry attempts
162
+ * @returns {Array<{id: string, input: Object, error: string, retryCount: number}>} Array of failed jobs
163
+ */
164
+ function getFailedJobs(db, batchId, maxRetries) {
165
+ const stmt = db.prepare(
166
+ `SELECT id, input, error, retry_count FROM batch_jobs WHERE batch_id = ? AND status = 'failed' AND retry_count >= ?`
167
+ );
168
+ const rows = stmt.all(batchId, maxRetries);
169
+ return rows.map((row) => ({
170
+ id: row.id,
171
+ input: JSON.parse(row.input),
172
+ error: row.error,
173
+ retryCount: row.retry_count,
174
+ }));
175
+ }
176
+
177
+ /**
178
+ * Processes a single job with try/catch and status updates
179
+ * @param {import('better-sqlite3').Database} db - SQLite database instance
180
+ * @param {Object} job - Job object with id, input, retryCount
181
+ * @param {Function} processor - async (input, ctx) => result
182
+ * @param {string} batchId - Unique batch identifier
183
+ */
184
+ async function processOneJob(db, job, processor, batchId) {
185
+ markProcessing(db, job.id);
186
+ try {
187
+ const output = await processor(job.input, {
188
+ attempt: job.retryCount + 1,
189
+ batchId,
190
+ db,
191
+ });
192
+ markComplete(db, job.id, output);
193
+ } catch (err) {
194
+ markFailed(db, job.id, err.message || String(err));
195
+ }
196
+ }
197
+
198
+ /**
199
+ * Validates batch options and throws with descriptive errors if invalid
200
+ * @param {Object} options - Batch options to validate
201
+ * @throws {Error} If options are invalid
202
+ */
203
+ export function validateBatchOptions(options) {
204
+ if (!options || typeof options !== "object") {
205
+ throw new Error(
206
+ `runBatch: options must be an object, got: ${typeof options}`
207
+ );
208
+ }
209
+ if (!Array.isArray(options.jobs)) {
210
+ throw new Error(
211
+ `runBatch: jobs must be an array, got: ${typeof options.jobs}`
212
+ );
213
+ }
214
+ if (options.jobs.length === 0) {
215
+ throw new Error("runBatch: jobs must be a non-empty array");
216
+ }
217
+ if (typeof options.processor !== "function") {
218
+ throw new Error(
219
+ `runBatch: processor must be a function, got: ${typeof options.processor}`
220
+ );
221
+ }
222
+ if (options.concurrency !== undefined) {
223
+ if (!Number.isInteger(options.concurrency) || options.concurrency < 1) {
224
+ throw new Error(
225
+ `runBatch: concurrency must be a positive integer, got: ${options.concurrency}`
226
+ );
227
+ }
228
+ }
229
+ if (options.maxRetries !== undefined) {
230
+ if (!Number.isInteger(options.maxRetries) || options.maxRetries < 0) {
231
+ throw new Error(
232
+ `runBatch: maxRetries must be a non-negative integer, got: ${options.maxRetries}`
233
+ );
234
+ }
235
+ }
236
+ }
237
+
238
+ /**
239
+ * Executes a batch of jobs concurrently with retry support
240
+ * @param {import('better-sqlite3').Database} db - SQLite database instance
241
+ * @param {Object} options - Batch options
242
+ * @param {Array<Object>} options.jobs - Array of job objects
243
+ * @param {Function} options.processor - async (input, ctx) => result
244
+ * @param {number} [options.concurrency=10] - Max concurrent jobs
245
+ * @param {number} [options.maxRetries=3] - Max retry attempts per job
246
+ * @param {string} [options.batchId] - Unique batch identifier (auto-generated if omitted)
247
+ * @returns {Promise<{completed: Array, failed: Array}>} Batch results
248
+ */
249
+ export async function executeBatch(db, options) {
250
+ const {
251
+ jobs,
252
+ processor,
253
+ concurrency = 10,
254
+ maxRetries = 3,
255
+ batchId = crypto.randomUUID(),
256
+ } = options;
257
+
258
+ ensureBatchSchema(db);
259
+ recoverStaleJobs(db, batchId);
260
+ insertJobs(db, batchId, jobs);
261
+
262
+ const limit = pLimit(concurrency);
263
+
264
+ let pending = getPendingJobs(db, batchId, maxRetries);
265
+ while (pending.length > 0) {
266
+ const promises = pending.map((job) =>
267
+ limit(() => processOneJob(db, job, processor, batchId))
268
+ );
269
+ await Promise.allSettled(promises);
270
+ pending = getPendingJobs(db, batchId, maxRetries);
271
+ }
272
+
273
+ return {
274
+ completed: getCompletedJobs(db, batchId),
275
+ failed: getFailedJobs(db, batchId, maxRetries),
276
+ };
277
+ }
@@ -8,6 +8,8 @@ import {
8
8
  isValidLogEvent,
9
9
  isValidLogFileExtension,
10
10
  } from "../config/log-events.js";
11
+ import Database from "better-sqlite3";
12
+ import { executeBatch, validateBatchOptions } from "./batch-runner.js";
11
13
 
12
14
  /**
13
15
  * Creates a task-scoped file I/O interface that manages file operations
@@ -26,7 +28,7 @@ async function ensureDir(dir) {
26
28
  }
27
29
 
28
30
  function ensureDirSync(dir) {
29
- fsSync.mkdir(dir, { recursive: true });
31
+ fsSync.mkdirSync(dir, { recursive: true });
30
32
  }
31
33
 
32
34
  export function createTaskFileIO({
@@ -293,6 +295,40 @@ export function createTaskFileIO({
293
295
  getCurrentStage() {
294
296
  return getStage();
295
297
  },
298
+
299
+ /**
300
+ * Get a SQLite database instance for this job run
301
+ * @param {Object} options - better-sqlite3 options
302
+ * @returns {Database} better-sqlite3 Database instance
303
+ */
304
+ getDB(options = {}) {
305
+ ensureDirSync(artifactsDir);
306
+ const dbPath = path.join(artifactsDir, "run.db");
307
+ const db = new Database(dbPath, options);
308
+ db.pragma("journal_mode = WAL");
309
+ updateStatusWithFilesSync("artifacts", "run.db");
310
+ return db;
311
+ },
312
+
313
+ /**
314
+ * Execute a batch of jobs concurrently with SQLite state management
315
+ * @param {Object} options - Batch options
316
+ * @param {Array<Object>} options.jobs - Array of job objects
317
+ * @param {Function} options.processor - async (input, ctx) => result
318
+ * @param {number} [options.concurrency=10] - Max concurrent jobs
319
+ * @param {number} [options.maxRetries=3] - Max retry attempts per job
320
+ * @param {string} [options.batchId] - Unique batch identifier (auto-generated if omitted)
321
+ * @returns {Promise<{completed: Array, failed: Array}>} Batch results
322
+ */
323
+ async runBatch(options) {
324
+ validateBatchOptions(options);
325
+ const db = this.getDB();
326
+ try {
327
+ return await executeBatch(db, options);
328
+ } finally {
329
+ db.close();
330
+ }
331
+ },
296
332
  };
297
333
  }
298
334
 
@@ -1,7 +1,9 @@
1
1
  import Ajv from "ajv";
2
+ import addFormats from "ajv-formats";
2
3
  import { getConfig } from "./config.js";
3
4
 
4
- const ajv = new Ajv({ allErrors: true });
5
+ const ajv = new Ajv({ allErrors: true, strictFormats: false });
6
+ addFormats(ajv);
5
7
 
6
8
  // JSON schema for seed file structure - uses config for validation rules
7
9
  function getSeedSchema() {