job-forge 2.14.13 → 2.14.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.codex/config.toml +1 -2
- package/.cursor/mcp.json +1 -6
- package/.cursor/rules/main.mdc +3 -0
- package/.mcp.json +1 -6
- package/AGENTS.md +3 -0
- package/CLAUDE.md +3 -0
- package/README.md +5 -2
- package/batch/README.md +9 -2
- package/batch/batch-runner.sh +18 -2
- package/bin/create-job-forge.mjs +2 -0
- package/bin/job-forge.mjs +25 -0
- package/docs/ARCHITECTURE.md +12 -7
- package/docs/CUSTOMIZATION.md +12 -0
- package/docs/README.md +1 -1
- package/docs/SETUP.md +3 -1
- package/iso/instructions.md +3 -0
- package/iso/mcp.json +1 -2
- package/modes/batch.md +9 -0
- package/opencode.json +0 -3
- package/package.json +6 -2
- package/scripts/batch-orchestrator.mjs +886 -0
- package/scripts/guard.mjs +404 -0
- package/templates/guards/jobforge-baseline.yaml +50 -0
|
@@ -0,0 +1,886 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* Durable JobForge batch runner powered by @razroo/iso-orchestrator.
|
|
4
|
+
*
|
|
5
|
+
* This preserves the public batch-runner.sh interface while moving the
|
|
6
|
+
* load-bearing control loop into a resumable workflow:
|
|
7
|
+
* - one durable workflow record per project
|
|
8
|
+
* - idempotent bundle execution keyed by URL + retry count
|
|
9
|
+
* - bounded fan-out through workflow.forEach(..., { maxParallel })
|
|
10
|
+
* - mutexed state/report-number writes across parallel workers
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
import { spawn, spawnSync } from 'node:child_process';
|
|
14
|
+
import { createHash } from 'node:crypto';
|
|
15
|
+
import { existsSync } from 'node:fs';
|
|
16
|
+
import {
|
|
17
|
+
mkdir,
|
|
18
|
+
readFile,
|
|
19
|
+
readdir,
|
|
20
|
+
rm,
|
|
21
|
+
stat,
|
|
22
|
+
writeFile,
|
|
23
|
+
} from 'node:fs/promises';
|
|
24
|
+
import { dirname, join, resolve } from 'node:path';
|
|
25
|
+
import { fileURLToPath } from 'node:url';
|
|
26
|
+
|
|
27
|
+
import { runWorkflow } from '@razroo/iso-orchestrator';
|
|
28
|
+
|
|
29
|
+
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
30
|
+
const PKG_ROOT = resolve(__dirname, '..');
|
|
31
|
+
const PROJECT_DIR = process.env.JOB_FORGE_PROJECT || process.cwd();
|
|
32
|
+
|
|
33
|
+
const BATCH_DIR = join(PROJECT_DIR, 'batch');
|
|
34
|
+
const INPUT_FILE = join(BATCH_DIR, 'batch-input.tsv');
|
|
35
|
+
const STATE_FILE = join(BATCH_DIR, 'batch-state.tsv');
|
|
36
|
+
const PROMPT_FILE = join(BATCH_DIR, 'batch-prompt.md');
|
|
37
|
+
const LOGS_DIR = join(BATCH_DIR, 'logs');
|
|
38
|
+
const TRACKER_DIR = join(BATCH_DIR, 'tracker-additions');
|
|
39
|
+
const REPORTS_DIR = join(PROJECT_DIR, 'reports');
|
|
40
|
+
const APPLICATIONS_DIR = join(PROJECT_DIR, 'data', 'applications');
|
|
41
|
+
const TSV_MERGED_DIR = join(TRACKER_DIR, 'merged');
|
|
42
|
+
const LOCK_FILE = join(BATCH_DIR, 'batch-runner.pid');
|
|
43
|
+
const WORKFLOW_DIR = join(PROJECT_DIR, '.jobforge-runs');
|
|
44
|
+
const DEFAULT_WORKFLOW_ID = 'jobforge-batch';
|
|
45
|
+
|
|
46
|
+
const STATE_HEADER = 'id\turl\tstatus\tstarted_at\tcompleted_at\treport_num\tscore\terror\tretries';
|
|
47
|
+
|
|
48
|
+
function usage() {
|
|
49
|
+
console.log(`job-forge batch runner - process job offers in batch via opencode run workers
|
|
50
|
+
Uses your default opencode model.
|
|
51
|
+
|
|
52
|
+
Usage: batch-runner.sh [OPTIONS]
|
|
53
|
+
|
|
54
|
+
Options:
|
|
55
|
+
--parallel N Number of parallel workers (default: 1)
|
|
56
|
+
--bundle-size N Offers per worker invocation (default: 5, use 1 for
|
|
57
|
+
legacy per-offer mode). Each worker processes N
|
|
58
|
+
offers sequentially, amortizing the system prompt.
|
|
59
|
+
--dry-run Show what would be processed, don't execute
|
|
60
|
+
--retry-failed Only retry offers marked as "failed" in state
|
|
61
|
+
--start-from N Start from offer ID N
|
|
62
|
+
--max-retries N Max retry attempts per offer (default: 2)
|
|
63
|
+
--workflow-id ID Durable workflow id (default: ${DEFAULT_WORKFLOW_ID})
|
|
64
|
+
-h, --help Show this help
|
|
65
|
+
|
|
66
|
+
Files:
|
|
67
|
+
batch-input.tsv Input offers (id, url, source, notes)
|
|
68
|
+
batch-state.tsv Processing state (auto-managed)
|
|
69
|
+
.jobforge-runs/ Durable iso-orchestrator workflow records
|
|
70
|
+
batch-prompt.md Prompt template for workers
|
|
71
|
+
logs/ Per-bundle logs
|
|
72
|
+
tracker-additions/ Tracker lines for post-batch merge`);
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
function parseArgs(argv) {
|
|
76
|
+
const options = {
|
|
77
|
+
parallel: 1,
|
|
78
|
+
dryRun: false,
|
|
79
|
+
retryFailed: false,
|
|
80
|
+
startFrom: 0,
|
|
81
|
+
maxRetries: 2,
|
|
82
|
+
bundleSize: 5,
|
|
83
|
+
workflowId: DEFAULT_WORKFLOW_ID,
|
|
84
|
+
};
|
|
85
|
+
|
|
86
|
+
for (let i = 0; i < argv.length; i += 1) {
|
|
87
|
+
const arg = argv[i];
|
|
88
|
+
const next = () => {
|
|
89
|
+
i += 1;
|
|
90
|
+
if (i >= argv.length) {
|
|
91
|
+
throw new Error(`Missing value for ${arg}`);
|
|
92
|
+
}
|
|
93
|
+
return argv[i];
|
|
94
|
+
};
|
|
95
|
+
|
|
96
|
+
switch (arg) {
|
|
97
|
+
case '--parallel':
|
|
98
|
+
options.parallel = parsePositiveInt(next(), '--parallel');
|
|
99
|
+
break;
|
|
100
|
+
case '--bundle-size':
|
|
101
|
+
options.bundleSize = parsePositiveInt(next(), '--bundle-size');
|
|
102
|
+
break;
|
|
103
|
+
case '--dry-run':
|
|
104
|
+
options.dryRun = true;
|
|
105
|
+
break;
|
|
106
|
+
case '--retry-failed':
|
|
107
|
+
options.retryFailed = true;
|
|
108
|
+
break;
|
|
109
|
+
case '--start-from':
|
|
110
|
+
options.startFrom = parseNonNegativeInt(next(), '--start-from');
|
|
111
|
+
break;
|
|
112
|
+
case '--max-retries':
|
|
113
|
+
options.maxRetries = parsePositiveInt(next(), '--max-retries');
|
|
114
|
+
break;
|
|
115
|
+
case '--workflow-id':
|
|
116
|
+
options.workflowId = sanitizeWorkflowId(next());
|
|
117
|
+
break;
|
|
118
|
+
case '-h':
|
|
119
|
+
case '--help':
|
|
120
|
+
usage();
|
|
121
|
+
process.exit(0);
|
|
122
|
+
break;
|
|
123
|
+
default:
|
|
124
|
+
throw new Error(`Unknown option: ${arg}`);
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
return options;
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
function parsePositiveInt(value, label) {
|
|
132
|
+
const n = Number.parseInt(value, 10);
|
|
133
|
+
if (!Number.isInteger(n) || n < 1) {
|
|
134
|
+
throw new Error(`${label} must be a positive integer`);
|
|
135
|
+
}
|
|
136
|
+
return n;
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
function parseNonNegativeInt(value, label) {
|
|
140
|
+
const n = Number.parseInt(value, 10);
|
|
141
|
+
if (!Number.isInteger(n) || n < 0) {
|
|
142
|
+
throw new Error(`${label} must be a non-negative integer`);
|
|
143
|
+
}
|
|
144
|
+
return n;
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
function sanitizeWorkflowId(value) {
|
|
148
|
+
const clean = value.trim().replace(/[^a-zA-Z0-9._:-]+/g, '-');
|
|
149
|
+
if (!clean) throw new Error('--workflow-id cannot be empty');
|
|
150
|
+
return clean;
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
function hash(value, length = 12) {
|
|
154
|
+
return createHash('sha256').update(value).digest('hex').slice(0, length);
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
function nowIso() {
|
|
158
|
+
return new Date().toISOString();
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
function today() {
|
|
162
|
+
return new Date().toISOString().slice(0, 10);
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
function sanitizeCell(value, fallback = '-') {
|
|
166
|
+
const text = value === undefined || value === null || value === '' ? fallback : String(value);
|
|
167
|
+
return text.replace(/[\t\r\n]+/g, ' ').trim() || fallback;
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
function padReportNum(n) {
|
|
171
|
+
return String(n).padStart(3, '0');
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
async function ensureDir(path) {
|
|
175
|
+
await mkdir(path, { recursive: true });
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
async function readTextIfExists(path) {
|
|
179
|
+
if (!existsSync(path)) return '';
|
|
180
|
+
return readFile(path, 'utf8');
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
async function checkPrerequisites({ dryRun }) {
|
|
184
|
+
if (!existsSync(INPUT_FILE)) {
|
|
185
|
+
throw new Error(`${INPUT_FILE} not found. Add offers first.`);
|
|
186
|
+
}
|
|
187
|
+
if (!existsSync(PROMPT_FILE)) {
|
|
188
|
+
throw new Error(`${PROMPT_FILE} not found.`);
|
|
189
|
+
}
|
|
190
|
+
if (!dryRun) {
|
|
191
|
+
const result = spawnSync('opencode', ['--help'], { stdio: 'ignore' });
|
|
192
|
+
if (result.error?.code === 'ENOENT') {
|
|
193
|
+
throw new Error("'opencode' CLI not found in PATH.");
|
|
194
|
+
}
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
await ensureDir(LOGS_DIR);
|
|
198
|
+
await ensureDir(TRACKER_DIR);
|
|
199
|
+
await ensureDir(REPORTS_DIR);
|
|
200
|
+
await ensureDir(WORKFLOW_DIR);
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
async function acquirePidLock({ dryRun }) {
|
|
204
|
+
if (dryRun) return () => {};
|
|
205
|
+
|
|
206
|
+
if (existsSync(LOCK_FILE)) {
|
|
207
|
+
const oldPid = (await readTextIfExists(LOCK_FILE)).trim();
|
|
208
|
+
if (oldPid) {
|
|
209
|
+
try {
|
|
210
|
+
process.kill(Number(oldPid), 0);
|
|
211
|
+
throw new Error(`Another batch-runner is already running (PID ${oldPid}). If this is stale, remove ${LOCK_FILE}`);
|
|
212
|
+
} catch (error) {
|
|
213
|
+
if (error.code !== 'ESRCH') throw error;
|
|
214
|
+
}
|
|
215
|
+
}
|
|
216
|
+
console.log(`WARN: Stale lock file found (PID ${oldPid || 'unknown'} not running). Removing.`);
|
|
217
|
+
await rm(LOCK_FILE, { force: true });
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
await writeFile(LOCK_FILE, String(process.pid), 'utf8');
|
|
221
|
+
|
|
222
|
+
let released = false;
|
|
223
|
+
return async () => {
|
|
224
|
+
if (released) return;
|
|
225
|
+
released = true;
|
|
226
|
+
await rm(LOCK_FILE, { force: true });
|
|
227
|
+
};
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
async function initState() {
|
|
231
|
+
if (existsSync(STATE_FILE)) return;
|
|
232
|
+
await ensureDir(dirname(STATE_FILE));
|
|
233
|
+
await writeFile(STATE_FILE, `${STATE_HEADER}\n`, 'utf8');
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
async function readState() {
|
|
237
|
+
await initState();
|
|
238
|
+
const content = await readFile(STATE_FILE, 'utf8');
|
|
239
|
+
const rows = new Map();
|
|
240
|
+
|
|
241
|
+
for (const line of content.split('\n')) {
|
|
242
|
+
if (!line.trim()) continue;
|
|
243
|
+
const parts = line.split('\t');
|
|
244
|
+
if (parts[0] === 'id') continue;
|
|
245
|
+
const row = normalizeStateRow({
|
|
246
|
+
id: parts[0],
|
|
247
|
+
url: parts[1],
|
|
248
|
+
status: parts[2],
|
|
249
|
+
started_at: parts[3],
|
|
250
|
+
completed_at: parts[4],
|
|
251
|
+
report_num: parts[5],
|
|
252
|
+
score: parts[6],
|
|
253
|
+
error: parts[7],
|
|
254
|
+
retries: parts[8],
|
|
255
|
+
});
|
|
256
|
+
if (row.id) rows.set(row.id, row);
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
return rows;
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
function normalizeStateRow(row) {
|
|
263
|
+
return {
|
|
264
|
+
id: sanitizeCell(row.id, ''),
|
|
265
|
+
url: sanitizeCell(row.url),
|
|
266
|
+
status: sanitizeCell(row.status, 'pending'),
|
|
267
|
+
started_at: sanitizeCell(row.started_at),
|
|
268
|
+
completed_at: sanitizeCell(row.completed_at),
|
|
269
|
+
report_num: sanitizeCell(row.report_num),
|
|
270
|
+
score: sanitizeCell(row.score),
|
|
271
|
+
error: sanitizeCell(row.error),
|
|
272
|
+
retries: String(Number.parseInt(row.retries, 10) || 0),
|
|
273
|
+
};
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
async function writeState(rows) {
|
|
277
|
+
const sorted = [...rows.values()].sort((a, b) => {
|
|
278
|
+
const na = Number.parseInt(a.id, 10);
|
|
279
|
+
const nb = Number.parseInt(b.id, 10);
|
|
280
|
+
if (Number.isNaN(na) || Number.isNaN(nb)) return a.id.localeCompare(b.id);
|
|
281
|
+
return na - nb;
|
|
282
|
+
});
|
|
283
|
+
|
|
284
|
+
const lines = [STATE_HEADER];
|
|
285
|
+
for (const row of sorted) {
|
|
286
|
+
lines.push([
|
|
287
|
+
row.id,
|
|
288
|
+
row.url,
|
|
289
|
+
row.status,
|
|
290
|
+
row.started_at,
|
|
291
|
+
row.completed_at,
|
|
292
|
+
row.report_num,
|
|
293
|
+
row.score,
|
|
294
|
+
row.error,
|
|
295
|
+
row.retries,
|
|
296
|
+
].map((value) => sanitizeCell(value)).join('\t'));
|
|
297
|
+
}
|
|
298
|
+
await writeFile(STATE_FILE, `${lines.join('\n')}\n`, 'utf8');
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
async function updateStateRow(workflow, nextRow) {
|
|
302
|
+
return workflow.withMutex('batch-state', async () => {
|
|
303
|
+
const rows = await readState();
|
|
304
|
+
const current = rows.get(nextRow.id) || {};
|
|
305
|
+
const row = normalizeStateRow({ ...current, ...nextRow });
|
|
306
|
+
rows.set(row.id, row);
|
|
307
|
+
await writeState(rows);
|
|
308
|
+
return row;
|
|
309
|
+
});
|
|
310
|
+
}
|
|
311
|
+
|
|
312
|
+
async function readInputOffers() {
|
|
313
|
+
const content = await readFile(INPUT_FILE, 'utf8');
|
|
314
|
+
const offers = [];
|
|
315
|
+
|
|
316
|
+
for (const rawLine of content.split('\n')) {
|
|
317
|
+
if (!rawLine.trim()) continue;
|
|
318
|
+
const parts = rawLine.split('\t');
|
|
319
|
+
if (parts[0] === 'id') continue;
|
|
320
|
+
const id = sanitizeCell(parts[0], '');
|
|
321
|
+
const url = sanitizeCell(parts[1], '');
|
|
322
|
+
if (!id || !url) continue;
|
|
323
|
+
offers.push({
|
|
324
|
+
id,
|
|
325
|
+
url,
|
|
326
|
+
source: sanitizeCell(parts[2]),
|
|
327
|
+
notes: sanitizeCell(parts.slice(3).join(' ')),
|
|
328
|
+
});
|
|
329
|
+
}
|
|
330
|
+
|
|
331
|
+
return offers;
|
|
332
|
+
}
|
|
333
|
+
|
|
334
|
+
function retriesFor(rows, id) {
|
|
335
|
+
const n = Number.parseInt(rows.get(id)?.retries, 10);
|
|
336
|
+
return Number.isInteger(n) && n >= 0 ? n : 0;
|
|
337
|
+
}
|
|
338
|
+
|
|
339
|
+
function statusFor(rows, id) {
|
|
340
|
+
return rows.get(id)?.status || 'none';
|
|
341
|
+
}
|
|
342
|
+
|
|
343
|
+
function selectPendingOffers(offers, rows, options) {
|
|
344
|
+
const pending = [];
|
|
345
|
+
|
|
346
|
+
for (const offer of offers) {
|
|
347
|
+
const numericId = Number.parseInt(offer.id, 10);
|
|
348
|
+
if (!Number.isNaN(numericId) && numericId < options.startFrom) continue;
|
|
349
|
+
|
|
350
|
+
const status = statusFor(rows, offer.id);
|
|
351
|
+
const retries = retriesFor(rows, offer.id);
|
|
352
|
+
|
|
353
|
+
if (options.retryFailed) {
|
|
354
|
+
if (status !== 'failed') continue;
|
|
355
|
+
if (retries >= options.maxRetries) {
|
|
356
|
+
console.log(`SKIP #${offer.id}: max retries (${options.maxRetries}) reached`);
|
|
357
|
+
continue;
|
|
358
|
+
}
|
|
359
|
+
} else {
|
|
360
|
+
if (status === 'completed') continue;
|
|
361
|
+
if (status === 'failed' && retries >= options.maxRetries) {
|
|
362
|
+
console.log(`SKIP #${offer.id}: failed and max retries reached (use --retry-failed to force)`);
|
|
363
|
+
continue;
|
|
364
|
+
}
|
|
365
|
+
}
|
|
366
|
+
|
|
367
|
+
pending.push(offer);
|
|
368
|
+
}
|
|
369
|
+
|
|
370
|
+
return pending;
|
|
371
|
+
}
|
|
372
|
+
|
|
373
|
+
function partition(items, size) {
|
|
374
|
+
const bundles = [];
|
|
375
|
+
for (let i = 0; i < items.length; i += size) {
|
|
376
|
+
bundles.push(items.slice(i, i + size));
|
|
377
|
+
}
|
|
378
|
+
return bundles;
|
|
379
|
+
}
|
|
380
|
+
|
|
381
|
+
async function maxReportNumberFromFiles(rows) {
|
|
382
|
+
let max = 0;
|
|
383
|
+
|
|
384
|
+
async function scanDir(dir, visitor) {
|
|
385
|
+
if (!existsSync(dir)) return;
|
|
386
|
+
for (const file of await readdir(dir)) {
|
|
387
|
+
const full = join(dir, file);
|
|
388
|
+
const info = await stat(full);
|
|
389
|
+
if (info.isFile()) await visitor(file, full);
|
|
390
|
+
}
|
|
391
|
+
}
|
|
392
|
+
|
|
393
|
+
await scanDir(REPORTS_DIR, async (file) => {
|
|
394
|
+
const match = file.match(/^(\d+)-.*\.md$/);
|
|
395
|
+
if (match) max = Math.max(max, Number.parseInt(match[1], 10));
|
|
396
|
+
});
|
|
397
|
+
|
|
398
|
+
await scanDir(APPLICATIONS_DIR, async (file, full) => {
|
|
399
|
+
if (!file.endsWith('.md')) return;
|
|
400
|
+
const content = await readFile(full, 'utf8');
|
|
401
|
+
for (const line of content.split('\n')) {
|
|
402
|
+
const match = line.match(/^\|\s*(\d+)\s*\|/);
|
|
403
|
+
if (match) max = Math.max(max, Number.parseInt(match[1], 10));
|
|
404
|
+
}
|
|
405
|
+
});
|
|
406
|
+
|
|
407
|
+
for (const dir of [TRACKER_DIR, TSV_MERGED_DIR]) {
|
|
408
|
+
await scanDir(dir, async (file, full) => {
|
|
409
|
+
if (!file.endsWith('.tsv')) return;
|
|
410
|
+
const nameMatch = file.match(/^(\d+)-/);
|
|
411
|
+
if (nameMatch) {
|
|
412
|
+
max = Math.max(max, Number.parseInt(nameMatch[1], 10));
|
|
413
|
+
return;
|
|
414
|
+
}
|
|
415
|
+
const content = await readFile(full, 'utf8');
|
|
416
|
+
const firstLine = content.split('\n').find((line) => line.trim());
|
|
417
|
+
if (!firstLine) return;
|
|
418
|
+
const n = Number.parseInt(firstLine.split('\t')[0], 10);
|
|
419
|
+
if (!Number.isNaN(n)) max = Math.max(max, n);
|
|
420
|
+
});
|
|
421
|
+
}
|
|
422
|
+
|
|
423
|
+
for (const row of rows.values()) {
|
|
424
|
+
const n = Number.parseInt(row.report_num, 10);
|
|
425
|
+
if (!Number.isNaN(n)) max = Math.max(max, n);
|
|
426
|
+
}
|
|
427
|
+
|
|
428
|
+
return max;
|
|
429
|
+
}
|
|
430
|
+
|
|
431
|
+
async function reserveBundle(workflow, bundle, startedAt) {
|
|
432
|
+
return workflow.withMutex('report-number', async () => {
|
|
433
|
+
const rows = await readState();
|
|
434
|
+
let next = await maxReportNumberFromFiles(rows);
|
|
435
|
+
const specs = [];
|
|
436
|
+
|
|
437
|
+
for (const offer of bundle) {
|
|
438
|
+
const current = rows.get(offer.id);
|
|
439
|
+
const existingReportNum = current?.status === 'processing' && current?.report_num && current.report_num !== '-'
|
|
440
|
+
? current.report_num
|
|
441
|
+
: null;
|
|
442
|
+
const reportNum = existingReportNum || padReportNum(++next);
|
|
443
|
+
const retries = retriesFor(rows, offer.id);
|
|
444
|
+
|
|
445
|
+
rows.set(offer.id, normalizeStateRow({
|
|
446
|
+
id: offer.id,
|
|
447
|
+
url: offer.url,
|
|
448
|
+
status: 'processing',
|
|
449
|
+
started_at: startedAt,
|
|
450
|
+
completed_at: '-',
|
|
451
|
+
report_num: reportNum,
|
|
452
|
+
score: '-',
|
|
453
|
+
error: '-',
|
|
454
|
+
retries,
|
|
455
|
+
}));
|
|
456
|
+
|
|
457
|
+
specs.push({
|
|
458
|
+
id: offer.id,
|
|
459
|
+
url: offer.url,
|
|
460
|
+
jd_file: `/tmp/batch-jd-${offer.id}.txt`,
|
|
461
|
+
report_num: reportNum,
|
|
462
|
+
date: today(),
|
|
463
|
+
});
|
|
464
|
+
}
|
|
465
|
+
|
|
466
|
+
await writeState(rows);
|
|
467
|
+
return specs;
|
|
468
|
+
});
|
|
469
|
+
}
|
|
470
|
+
|
|
471
|
+
function bundleStepName(bundle, rows) {
|
|
472
|
+
const key = bundle
|
|
473
|
+
.map((offer) => `${offer.id}\0${offer.url}\0${retriesFor(rows, offer.id)}`)
|
|
474
|
+
.join('\n');
|
|
475
|
+
const ids = bundle.map((offer) => offer.id).join('_');
|
|
476
|
+
return `batch-bundle:${ids}:${hash(key)}`;
|
|
477
|
+
}
|
|
478
|
+
|
|
479
|
+
function bundleTag(bundle) {
|
|
480
|
+
return `bundle-${bundle.map((offer) => offer.id).join('_')}`;
|
|
481
|
+
}
|
|
482
|
+
|
|
483
|
+
function buildBundlePrompt(specs) {
|
|
484
|
+
return `Process these ${specs.length} offers sequentially using the full pipeline in batch-prompt.md
|
|
485
|
+
(Step 1 JD retrieval -> Steps 2-6 evaluate/report/PDF/tracker line). Do each
|
|
486
|
+
offer fully before starting the next. Continue to the next offer even if one
|
|
487
|
+
fails. After each offer, emit ONE single-line JSON on its own line with this
|
|
488
|
+
exact shape (no extra prose, no code fences around it):
|
|
489
|
+
|
|
490
|
+
{"id":"<id>","status":"completed|failed","report_num":"<num>","company":"...","role":"...","score":<num-or-null>,"pdf":"<path-or-null>","report":"<path-or-null>","error":"<msg-or-null>"}
|
|
491
|
+
|
|
492
|
+
The orchestrator parses these lines to update state. Anything between status
|
|
493
|
+
JSONs is fine, but do NOT omit or reorder the required keys.
|
|
494
|
+
|
|
495
|
+
Offers:
|
|
496
|
+
${JSON.stringify(specs)}`;
|
|
497
|
+
}
|
|
498
|
+
|
|
499
|
+
async function runOpencode(prompt, logFile) {
|
|
500
|
+
await ensureDir(dirname(logFile));
|
|
501
|
+
|
|
502
|
+
return new Promise((resolve) => {
|
|
503
|
+
const child = spawn('opencode', [
|
|
504
|
+
'run',
|
|
505
|
+
'--dangerously-skip-permissions',
|
|
506
|
+
'--file',
|
|
507
|
+
PROMPT_FILE,
|
|
508
|
+
prompt,
|
|
509
|
+
], {
|
|
510
|
+
cwd: PROJECT_DIR,
|
|
511
|
+
env: {
|
|
512
|
+
...process.env,
|
|
513
|
+
JOB_FORGE_PROJECT: PROJECT_DIR,
|
|
514
|
+
},
|
|
515
|
+
stdio: ['ignore', 'pipe', 'pipe'],
|
|
516
|
+
});
|
|
517
|
+
|
|
518
|
+
const chunks = [];
|
|
519
|
+
child.stdout.on('data', (chunk) => chunks.push(chunk));
|
|
520
|
+
child.stderr.on('data', (chunk) => chunks.push(chunk));
|
|
521
|
+
|
|
522
|
+
child.on('error', async (error) => {
|
|
523
|
+
chunks.push(Buffer.from(`\n${error.stack || error.message}\n`));
|
|
524
|
+
});
|
|
525
|
+
|
|
526
|
+
child.on('close', async (code) => {
|
|
527
|
+
const output = Buffer.concat(chunks).toString('utf8');
|
|
528
|
+
await writeFile(logFile, output, 'utf8');
|
|
529
|
+
resolve({ exitCode: code ?? 1, output });
|
|
530
|
+
});
|
|
531
|
+
});
|
|
532
|
+
}
|
|
533
|
+
|
|
534
|
+
function parseStatusLines(output) {
|
|
535
|
+
const seen = new Map();
|
|
536
|
+
for (const line of output.split('\n')) {
|
|
537
|
+
const start = line.indexOf('{');
|
|
538
|
+
const end = line.lastIndexOf('}');
|
|
539
|
+
if (start === -1 || end === -1 || end <= start) continue;
|
|
540
|
+
try {
|
|
541
|
+
const parsed = JSON.parse(line.slice(start, end + 1));
|
|
542
|
+
if (!parsed || typeof parsed !== 'object') continue;
|
|
543
|
+
if (!parsed.id || !parsed.status) continue;
|
|
544
|
+
const id = String(parsed.id);
|
|
545
|
+
if (!seen.has(id)) seen.set(id, parsed);
|
|
546
|
+
} catch {
|
|
547
|
+
// Workers may print non-JSON diagnostics; only exact status JSON matters.
|
|
548
|
+
}
|
|
549
|
+
}
|
|
550
|
+
return seen;
|
|
551
|
+
}
|
|
552
|
+
|
|
553
|
+
async function processBundle(workflow, bundle) {
|
|
554
|
+
const startedAt = nowIso();
|
|
555
|
+
const specs = await reserveBundle(workflow, bundle, startedAt);
|
|
556
|
+
const tag = bundleTag(bundle);
|
|
557
|
+
const logFile = join(LOGS_DIR, `${tag}.log`);
|
|
558
|
+
|
|
559
|
+
console.log(`--- Processing bundle of ${bundle.length} offer(s): ${bundle.map((offer) => offer.id).join(' ')}`);
|
|
560
|
+
await workflow.appendEvent({
|
|
561
|
+
type: 'batch.bundle.started',
|
|
562
|
+
detail: {
|
|
563
|
+
ids: bundle.map((offer) => offer.id),
|
|
564
|
+
log: relativeProjectPath(logFile),
|
|
565
|
+
},
|
|
566
|
+
});
|
|
567
|
+
|
|
568
|
+
const { exitCode, output } = await runOpencode(buildBundlePrompt(specs), logFile);
|
|
569
|
+
const completedAt = nowIso();
|
|
570
|
+
const statuses = parseStatusLines(output);
|
|
571
|
+
const outcomes = [];
|
|
572
|
+
|
|
573
|
+
for (const spec of specs) {
|
|
574
|
+
const parsed = statuses.get(spec.id);
|
|
575
|
+
const rows = await readState();
|
|
576
|
+
const retries = retriesFor(rows, spec.id);
|
|
577
|
+
const offer = bundle.find((candidate) => candidate.id === spec.id);
|
|
578
|
+
|
|
579
|
+
if (parsed) {
|
|
580
|
+
const status = parsed.status === 'completed' ? 'completed' : 'failed';
|
|
581
|
+
const nextRetries = status === 'failed' ? retries + 1 : retries;
|
|
582
|
+
const score = parsed.score === null || parsed.score === undefined ? '-' : parsed.score;
|
|
583
|
+
const error = parsed.error === null || parsed.error === undefined ? '-' : parsed.error;
|
|
584
|
+
await updateStateRow(workflow, {
|
|
585
|
+
id: spec.id,
|
|
586
|
+
url: offer?.url || spec.url,
|
|
587
|
+
status,
|
|
588
|
+
started_at: startedAt,
|
|
589
|
+
completed_at: completedAt,
|
|
590
|
+
report_num: sanitizeCell(parsed.report_num, spec.report_num),
|
|
591
|
+
score: sanitizeCell(score),
|
|
592
|
+
error: sanitizeCell(error),
|
|
593
|
+
retries: String(nextRetries),
|
|
594
|
+
});
|
|
595
|
+
outcomes.push({
|
|
596
|
+
id: spec.id,
|
|
597
|
+
status,
|
|
598
|
+
score: sanitizeCell(score),
|
|
599
|
+
report_num: sanitizeCell(parsed.report_num, spec.report_num),
|
|
600
|
+
});
|
|
601
|
+
console.log(` ${status === 'completed' ? 'OK' : 'FAIL'} #${spec.id} (status=${status}, score=${sanitizeCell(score)}, report=${sanitizeCell(parsed.report_num, spec.report_num)})`);
|
|
602
|
+
continue;
|
|
603
|
+
}
|
|
604
|
+
|
|
605
|
+
const error = exitCode === 0
|
|
606
|
+
? 'Worker finished without emitting status JSON for this offer'
|
|
607
|
+
: `Worker exited ${exitCode} without emitting status JSON for this offer`;
|
|
608
|
+
await updateStateRow(workflow, {
|
|
609
|
+
id: spec.id,
|
|
610
|
+
url: offer?.url || spec.url,
|
|
611
|
+
status: 'failed',
|
|
612
|
+
started_at: startedAt,
|
|
613
|
+
completed_at: completedAt,
|
|
614
|
+
report_num: spec.report_num,
|
|
615
|
+
score: '-',
|
|
616
|
+
error,
|
|
617
|
+
retries: String(retries + 1),
|
|
618
|
+
});
|
|
619
|
+
outcomes.push({
|
|
620
|
+
id: spec.id,
|
|
621
|
+
status: 'failed',
|
|
622
|
+
score: '-',
|
|
623
|
+
report_num: spec.report_num,
|
|
624
|
+
});
|
|
625
|
+
console.log(` FAIL #${spec.id} (no status emitted; see ${relativeProjectPath(logFile)})`);
|
|
626
|
+
}
|
|
627
|
+
|
|
628
|
+
if (exitCode !== 0) {
|
|
629
|
+
console.log(` WARN worker exit code ${exitCode}; see ${relativeProjectPath(logFile)}`);
|
|
630
|
+
}
|
|
631
|
+
|
|
632
|
+
await workflow.appendEvent({
|
|
633
|
+
type: 'batch.bundle.completed',
|
|
634
|
+
detail: {
|
|
635
|
+
ids: bundle.map((offer) => offer.id),
|
|
636
|
+
exitCode,
|
|
637
|
+
log: relativeProjectPath(logFile),
|
|
638
|
+
outcomes,
|
|
639
|
+
},
|
|
640
|
+
});
|
|
641
|
+
|
|
642
|
+
return {
|
|
643
|
+
ids: bundle.map((offer) => offer.id),
|
|
644
|
+
exitCode,
|
|
645
|
+
log: relativeProjectPath(logFile),
|
|
646
|
+
outcomes,
|
|
647
|
+
};
|
|
648
|
+
}
|
|
649
|
+
|
|
650
|
+
function relativeProjectPath(path) {
|
|
651
|
+
return path.startsWith(PROJECT_DIR)
|
|
652
|
+
? path.slice(PROJECT_DIR.length + 1)
|
|
653
|
+
: path;
|
|
654
|
+
}
|
|
655
|
+
|
|
656
|
+
async function runNodeScript(relPath, args = [], { allowFailure = false } = {}) {
|
|
657
|
+
const scriptPath = join(PKG_ROOT, relPath);
|
|
658
|
+
const result = spawnSync(process.execPath, [scriptPath, ...args], {
|
|
659
|
+
cwd: PROJECT_DIR,
|
|
660
|
+
env: {
|
|
661
|
+
...process.env,
|
|
662
|
+
JOB_FORGE_PROJECT: PROJECT_DIR,
|
|
663
|
+
},
|
|
664
|
+
stdio: 'inherit',
|
|
665
|
+
});
|
|
666
|
+
|
|
667
|
+
if (!allowFailure && (result.status ?? 1) !== 0) {
|
|
668
|
+
throw new Error(`${relPath} exited with status ${result.status ?? 1}`);
|
|
669
|
+
}
|
|
670
|
+
|
|
671
|
+
return result.status ?? 1;
|
|
672
|
+
}
|
|
673
|
+
|
|
674
|
+
async function mergeTracker() {
|
|
675
|
+
console.log('\n=== Merging tracker additions ===');
|
|
676
|
+
await runNodeScript('merge-tracker.mjs');
|
|
677
|
+
console.log('\n=== Verifying pipeline integrity ===');
|
|
678
|
+
const verifyStatus = await runNodeScript('verify-pipeline.mjs', [], { allowFailure: true });
|
|
679
|
+
if (verifyStatus !== 0) {
|
|
680
|
+
console.log('WARN Verification found issues (see above)');
|
|
681
|
+
}
|
|
682
|
+
}
|
|
683
|
+
|
|
684
|
+
async function costReport(sinceMinutes = 180) {
|
|
685
|
+
console.log(`\n=== Token usage (last ${sinceMinutes} min, warn at $1.00) ===`);
|
|
686
|
+
await runNodeScript('bin/job-forge.mjs', [
|
|
687
|
+
'session-report',
|
|
688
|
+
'--since-minutes',
|
|
689
|
+
String(sinceMinutes),
|
|
690
|
+
'--log',
|
|
691
|
+
'--warn-at',
|
|
692
|
+
'1.00',
|
|
693
|
+
], { allowFailure: true });
|
|
694
|
+
}
|
|
695
|
+
|
|
696
|
+
async function summaryFromState() {
|
|
697
|
+
const rows = await readState();
|
|
698
|
+
let total = 0;
|
|
699
|
+
let completed = 0;
|
|
700
|
+
let failed = 0;
|
|
701
|
+
let pending = 0;
|
|
702
|
+
let scoreSum = 0;
|
|
703
|
+
let scoreCount = 0;
|
|
704
|
+
|
|
705
|
+
for (const row of rows.values()) {
|
|
706
|
+
total += 1;
|
|
707
|
+
if (row.status === 'completed') {
|
|
708
|
+
completed += 1;
|
|
709
|
+
const score = Number.parseFloat(row.score);
|
|
710
|
+
if (!Number.isNaN(score)) {
|
|
711
|
+
scoreSum += score;
|
|
712
|
+
scoreCount += 1;
|
|
713
|
+
}
|
|
714
|
+
} else if (row.status === 'failed') {
|
|
715
|
+
failed += 1;
|
|
716
|
+
} else {
|
|
717
|
+
pending += 1;
|
|
718
|
+
}
|
|
719
|
+
}
|
|
720
|
+
|
|
721
|
+
return {
|
|
722
|
+
total,
|
|
723
|
+
completed,
|
|
724
|
+
failed,
|
|
725
|
+
pending,
|
|
726
|
+
averageScore: scoreCount > 0 ? Number((scoreSum / scoreCount).toFixed(1)) : null,
|
|
727
|
+
scoreCount,
|
|
728
|
+
};
|
|
729
|
+
}
|
|
730
|
+
|
|
731
|
+
async function printSummary() {
|
|
732
|
+
console.log('\n=== Batch Summary ===');
|
|
733
|
+
const summary = await summaryFromState();
|
|
734
|
+
console.log(`Total: ${summary.total} | Completed: ${summary.completed} | Failed: ${summary.failed} | Pending: ${summary.pending}`);
|
|
735
|
+
if (summary.averageScore !== null) {
|
|
736
|
+
console.log(`Average score: ${summary.averageScore}/5 (${summary.scoreCount} scored)`);
|
|
737
|
+
}
|
|
738
|
+
return summary;
|
|
739
|
+
}
|
|
740
|
+
|
|
741
|
+
async function run(options) {
|
|
742
|
+
await checkPrerequisites(options);
|
|
743
|
+
const releaseLock = await acquirePidLock(options);
|
|
744
|
+
|
|
745
|
+
process.once('SIGINT', async () => {
|
|
746
|
+
await releaseLock();
|
|
747
|
+
process.exit(130);
|
|
748
|
+
});
|
|
749
|
+
process.once('SIGTERM', async () => {
|
|
750
|
+
await releaseLock();
|
|
751
|
+
process.exit(143);
|
|
752
|
+
});
|
|
753
|
+
|
|
754
|
+
try {
|
|
755
|
+
await initState();
|
|
756
|
+
|
|
757
|
+
const offers = await readInputOffers();
|
|
758
|
+
const totalInput = offers.length;
|
|
759
|
+
if (totalInput === 0) {
|
|
760
|
+
console.log(`No offers in ${INPUT_FILE}. Add offers first.`);
|
|
761
|
+
return;
|
|
762
|
+
}
|
|
763
|
+
|
|
764
|
+
const startedAt = nowIso();
|
|
765
|
+
const stateRows = await readState();
|
|
766
|
+
const pending = selectPendingOffers(offers, stateRows, options);
|
|
767
|
+
|
|
768
|
+
console.log('=== job-forge batch runner ===');
|
|
769
|
+
console.log(`Parallel: ${options.parallel} | Bundle size: ${options.bundleSize} | Max retries: ${options.maxRetries}`);
|
|
770
|
+
console.log(`Workflow: ${options.workflowId} (${relativeProjectPath(WORKFLOW_DIR)})`);
|
|
771
|
+
console.log(`Input: ${totalInput} offers`);
|
|
772
|
+
console.log('');
|
|
773
|
+
|
|
774
|
+
if (pending.length === 0) {
|
|
775
|
+
console.log('No offers to process.');
|
|
776
|
+
await printSummary();
|
|
777
|
+
return;
|
|
778
|
+
}
|
|
779
|
+
|
|
780
|
+
console.log(`Pending: ${pending.length} offers`);
|
|
781
|
+
console.log('');
|
|
782
|
+
|
|
783
|
+
if (options.dryRun) {
|
|
784
|
+
console.log('=== DRY RUN (no processing) ===');
|
|
785
|
+
for (const offer of pending) {
|
|
786
|
+
console.log(` #${offer.id}: ${offer.url} [${offer.source}] (status: ${statusFor(stateRows, offer.id)})`);
|
|
787
|
+
}
|
|
788
|
+
console.log('');
|
|
789
|
+
console.log(`Would process ${pending.length} offers`);
|
|
790
|
+
return;
|
|
791
|
+
}
|
|
792
|
+
|
|
793
|
+
const bundles = partition(pending, options.bundleSize);
|
|
794
|
+
console.log(`Partitioned into ${bundles.length} bundle(s) of up to ${options.bundleSize} offer(s) each`);
|
|
795
|
+
|
|
796
|
+
await runWorkflow(
|
|
797
|
+
{
|
|
798
|
+
workflowId: options.workflowId,
|
|
799
|
+
dir: WORKFLOW_DIR,
|
|
800
|
+
initialState: {
|
|
801
|
+
kind: 'jobforge-batch',
|
|
802
|
+
runs: 0,
|
|
803
|
+
lastRun: null,
|
|
804
|
+
},
|
|
805
|
+
},
|
|
806
|
+
async (workflow) => {
|
|
807
|
+
await workflow.updateState((state) => ({
|
|
808
|
+
...state,
|
|
809
|
+
runs: Number(state.runs || 0) + 1,
|
|
810
|
+
lastRun: {
|
|
811
|
+
startedAt,
|
|
812
|
+
totalInput,
|
|
813
|
+
pending: pending.length,
|
|
814
|
+
bundles: bundles.length,
|
|
815
|
+
parallel: options.parallel,
|
|
816
|
+
bundleSize: options.bundleSize,
|
|
817
|
+
},
|
|
818
|
+
}));
|
|
819
|
+
|
|
820
|
+
const rowsBeforeRun = await readState();
|
|
821
|
+
const summary = await workflow.forEach(
|
|
822
|
+
bundles,
|
|
823
|
+
async (bundle) => {
|
|
824
|
+
const stepName = bundleStepName(bundle, rowsBeforeRun);
|
|
825
|
+
return workflow.step(
|
|
826
|
+
stepName,
|
|
827
|
+
async () => processBundle(workflow, bundle),
|
|
828
|
+
{
|
|
829
|
+
idempotencyKey: stepName,
|
|
830
|
+
},
|
|
831
|
+
);
|
|
832
|
+
},
|
|
833
|
+
{
|
|
834
|
+
maxParallel: options.parallel,
|
|
835
|
+
stopOnError: false,
|
|
836
|
+
},
|
|
837
|
+
);
|
|
838
|
+
|
|
839
|
+
await workflow.appendEvent({
|
|
840
|
+
type: 'batch.bundles.finished',
|
|
841
|
+
detail: {
|
|
842
|
+
fulfilled: summary.fulfilled,
|
|
843
|
+
rejected: summary.rejected,
|
|
844
|
+
},
|
|
845
|
+
});
|
|
846
|
+
|
|
847
|
+
await workflow.step(
|
|
848
|
+
`merge-and-verify:${hash(startedAt)}`,
|
|
849
|
+
async () => {
|
|
850
|
+
await mergeTracker();
|
|
851
|
+
return { ok: true };
|
|
852
|
+
},
|
|
853
|
+
);
|
|
854
|
+
|
|
855
|
+
const finalSummary = await printSummary();
|
|
856
|
+
await workflow.updateState((state) => ({
|
|
857
|
+
...state,
|
|
858
|
+
lastRun: {
|
|
859
|
+
...state.lastRun,
|
|
860
|
+
completedAt: nowIso(),
|
|
861
|
+
summary: finalSummary,
|
|
862
|
+
},
|
|
863
|
+
}));
|
|
864
|
+
|
|
865
|
+
if (process.env.JOBFORGE_SKIP_COST_REPORT !== '1') {
|
|
866
|
+
await costReport(180);
|
|
867
|
+
}
|
|
868
|
+
|
|
869
|
+
return {
|
|
870
|
+
bundles: bundles.length,
|
|
871
|
+
summary: finalSummary,
|
|
872
|
+
};
|
|
873
|
+
},
|
|
874
|
+
);
|
|
875
|
+
} finally {
|
|
876
|
+
await releaseLock();
|
|
877
|
+
}
|
|
878
|
+
}
|
|
879
|
+
|
|
880
|
+
try {
|
|
881
|
+
const options = parseArgs(process.argv.slice(2));
|
|
882
|
+
await run(options);
|
|
883
|
+
} catch (error) {
|
|
884
|
+
console.error(`ERROR: ${error.message}`);
|
|
885
|
+
process.exit(1);
|
|
886
|
+
}
|