@ryanfw/prompt-orchestration-pipeline 1.2.9 → 1.2.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/core/__tests__/pipeline-runner.test.ts +193 -4
- package/src/core/pipeline-runner.ts +53 -1
- package/src/core/status-writer.ts +24 -9
- package/src/ui/client/__tests__/job-adapter.test.ts +27 -0
- package/src/ui/client/adapters/job-adapter.ts +3 -0
- package/src/ui/client/types.ts +5 -0
- package/src/ui/dist/assets/{index-BnAqY4_n.js → index-HrBsHfx3.js} +3 -0
- package/src/ui/dist/assets/{index-BnAqY4_n.js.map → index-HrBsHfx3.js.map} +1 -1
- package/src/ui/dist/index.html +1 -1
- package/src/ui/embedded-assets.js +6 -6
- package/src/ui/server/__tests__/job-control-endpoints.test.ts +233 -2
- package/src/ui/server/endpoints/job-control-endpoints.ts +69 -10
- package/src/ui/state/transformers/__tests__/status-transformer.test.ts +27 -0
- package/src/ui/state/transformers/status-transformer.ts +3 -0
- package/src/ui/state/types.ts +5 -0
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@ryanfw/prompt-orchestration-pipeline",
|
|
3
|
-
"version": "1.2.
|
|
3
|
+
"version": "1.2.10",
|
|
4
4
|
"description": "A Prompt-orchestration pipeline (POP) is a framework for building, running, and experimenting with complex chains of LLM tasks.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "src/ui/server/index.ts",
|
|
@@ -278,6 +278,54 @@ describe("runPipelineJob — multi-task success regression", () => {
|
|
|
278
278
|
expect(anyLifecycleBlock).toBe(false);
|
|
279
279
|
expect(exitCalls).toEqual([]);
|
|
280
280
|
});
|
|
281
|
+
|
|
282
|
+
test("starting a task clears stale terminal fields from earlier attempts", async () => {
|
|
283
|
+
const fixture = await setupMultiTaskFixture(["task-a"]);
|
|
284
|
+
cleanupDirs.push(fixture.tmpDir);
|
|
285
|
+
|
|
286
|
+
const initialStatusText = await readFile(join(fixture.jobDir, "tasks-status.json"), "utf-8");
|
|
287
|
+
const initialStatus = JSON.parse(initialStatusText) as {
|
|
288
|
+
tasks: Record<string, Record<string, unknown>>;
|
|
289
|
+
};
|
|
290
|
+
initialStatus.tasks["task-a"] = {
|
|
291
|
+
...initialStatus.tasks["task-a"],
|
|
292
|
+
state: "pending",
|
|
293
|
+
endedAt: "2026-04-01T10:00:00.000Z",
|
|
294
|
+
failedStage: "generate",
|
|
295
|
+
error: { message: "old failure" },
|
|
296
|
+
stageLogPath: "/tmp/old.log",
|
|
297
|
+
errorContext: { stage: "generate" },
|
|
298
|
+
retrying: true,
|
|
299
|
+
nextRetryAt: "2026-04-01T10:01:00.000Z",
|
|
300
|
+
lastRetryError: { message: "old retry" },
|
|
301
|
+
};
|
|
302
|
+
await writeFile(join(fixture.jobDir, "tasks-status.json"), JSON.stringify(initialStatus));
|
|
303
|
+
|
|
304
|
+
const exitSpy = spyOn(process, "exit").mockImplementation(((code?: number) => {
|
|
305
|
+
throw new Error(`process.exit called with ${String(code)}`);
|
|
306
|
+
}) as typeof process.exit);
|
|
307
|
+
|
|
308
|
+
try {
|
|
309
|
+
await runPipelineJob(fixture.jobId);
|
|
310
|
+
} finally {
|
|
311
|
+
exitSpy.mockRestore();
|
|
312
|
+
}
|
|
313
|
+
|
|
314
|
+
const finalStatusText = await readFile(fixture.statusPath, "utf-8");
|
|
315
|
+
const finalStatus = JSON.parse(finalStatusText) as {
|
|
316
|
+
tasks: Record<string, Record<string, unknown>>;
|
|
317
|
+
};
|
|
318
|
+
const task = finalStatus.tasks["task-a"];
|
|
319
|
+
expect(task?.["state"]).toBe("done");
|
|
320
|
+
expect(typeof task?.["endedAt"]).toBe("string");
|
|
321
|
+
expect(task?.["failedStage"]).toBeUndefined();
|
|
322
|
+
expect(task?.["error"]).toBeUndefined();
|
|
323
|
+
expect(task?.["stageLogPath"]).toBeUndefined();
|
|
324
|
+
expect(task?.["errorContext"]).toBeUndefined();
|
|
325
|
+
expect(task?.["retrying"]).toBeUndefined();
|
|
326
|
+
expect(task?.["nextRetryAt"]).toBeUndefined();
|
|
327
|
+
expect(task?.["lastRetryError"]).toBeUndefined();
|
|
328
|
+
});
|
|
281
329
|
});
|
|
282
330
|
|
|
283
331
|
describe("runPipelineJob — outer-catch failure surfacing", () => {
|
|
@@ -361,6 +409,18 @@ describe("runPipelineJob — outer-catch failure surfacing", () => {
|
|
|
361
409
|
expect(typeof failure.message).toBe("string");
|
|
362
410
|
expect(failure.message).toContain(injectedMessage);
|
|
363
411
|
|
|
412
|
+
const statusText = await readFile(join(fixture.jobDir, "tasks-status.json"), "utf-8");
|
|
413
|
+
const status = JSON.parse(statusText) as {
|
|
414
|
+
state?: string;
|
|
415
|
+
current?: string | null;
|
|
416
|
+
tasks: Record<string, { state?: string; failedStage?: unknown; error?: unknown }>;
|
|
417
|
+
};
|
|
418
|
+
expect(status.state).toBe("pending");
|
|
419
|
+
expect(status.current).toBeNull();
|
|
420
|
+
expect(status.tasks["task-a"]?.state).toBe("pending");
|
|
421
|
+
expect(status.tasks["task-a"]?.failedStage).toBeUndefined();
|
|
422
|
+
expect(status.tasks["task-a"]?.error).toBeUndefined();
|
|
423
|
+
|
|
364
424
|
const stderrContainsMessage = consoleErrorMessages.some((args) =>
|
|
365
425
|
args.some((a) => typeof a === "string" && a.includes(injectedMessage)),
|
|
366
426
|
);
|
|
@@ -501,11 +561,21 @@ describe("runPipelineJob — bounded retry loop", () => {
|
|
|
501
561
|
|
|
502
562
|
const statusText = await readFile(fixture.statusPath, "utf-8");
|
|
503
563
|
const status = JSON.parse(statusText) as {
|
|
504
|
-
tasks: Record<string, {
|
|
564
|
+
tasks: Record<string, {
|
|
565
|
+
state?: string;
|
|
566
|
+
attempts?: number;
|
|
567
|
+
restartCount?: number;
|
|
568
|
+
retrying?: unknown;
|
|
569
|
+
nextRetryAt?: unknown;
|
|
570
|
+
lastRetryError?: unknown;
|
|
571
|
+
}>;
|
|
505
572
|
};
|
|
506
573
|
expect(status.tasks["task-a"]?.state).toBe("done");
|
|
507
574
|
expect(status.tasks["task-a"]?.attempts).toBe(3);
|
|
508
575
|
expect(status.tasks["task-a"]?.restartCount).toBe(2);
|
|
576
|
+
expect(status.tasks["task-a"]?.retrying).toBeUndefined();
|
|
577
|
+
expect(status.tasks["task-a"]?.nextRetryAt).toBeUndefined();
|
|
578
|
+
expect(status.tasks["task-a"]?.lastRetryError).toBeUndefined();
|
|
509
579
|
});
|
|
510
580
|
|
|
511
581
|
test("maxAttempts: 3 — always fails: three calls, restartCount=2, exits non-zero", async () => {
|
|
@@ -535,11 +605,25 @@ describe("runPipelineJob — bounded retry loop", () => {
|
|
|
535
605
|
|
|
536
606
|
const statusText = await readFile(join(fixture.jobDir, "tasks-status.json"), "utf-8");
|
|
537
607
|
const status = JSON.parse(statusText) as {
|
|
538
|
-
tasks: Record<string, {
|
|
608
|
+
tasks: Record<string, {
|
|
609
|
+
state?: string;
|
|
610
|
+
attempts?: number;
|
|
611
|
+
restartCount?: number;
|
|
612
|
+
retrying?: unknown;
|
|
613
|
+
nextRetryAt?: unknown;
|
|
614
|
+
lastRetryError?: unknown;
|
|
615
|
+
failedStage?: unknown;
|
|
616
|
+
error?: unknown;
|
|
617
|
+
}>;
|
|
539
618
|
};
|
|
540
619
|
expect(status.tasks["task-a"]?.state).toBe("failed");
|
|
541
620
|
expect(status.tasks["task-a"]?.attempts).toBe(3);
|
|
542
621
|
expect(status.tasks["task-a"]?.restartCount).toBe(2);
|
|
622
|
+
expect(status.tasks["task-a"]?.retrying).toBeUndefined();
|
|
623
|
+
expect(status.tasks["task-a"]?.nextRetryAt).toBeUndefined();
|
|
624
|
+
expect(status.tasks["task-a"]?.lastRetryError).toBeUndefined();
|
|
625
|
+
expect(status.tasks["task-a"]?.failedStage).toBe("generate");
|
|
626
|
+
expect(status.tasks["task-a"]?.error).toBeDefined();
|
|
543
627
|
});
|
|
544
628
|
|
|
545
629
|
test("interim status between attempts: state=running, no failedStage/error, restartCount incremented", async () => {
|
|
@@ -548,7 +632,17 @@ describe("runPipelineJob — bounded retry loop", () => {
|
|
|
548
632
|
cleanupDirs.push(fixture.tmpDir);
|
|
549
633
|
|
|
550
634
|
let call = 0;
|
|
551
|
-
let
|
|
635
|
+
let interimLastUpdated: string | undefined;
|
|
636
|
+
let interimSnapshot: {
|
|
637
|
+
state?: string;
|
|
638
|
+
attempts?: number;
|
|
639
|
+
failedStage?: unknown;
|
|
640
|
+
error?: unknown;
|
|
641
|
+
restartCount?: number;
|
|
642
|
+
retrying?: unknown;
|
|
643
|
+
nextRetryAt?: unknown;
|
|
644
|
+
lastRetryError?: { message?: unknown };
|
|
645
|
+
} | undefined;
|
|
552
646
|
|
|
553
647
|
// Capture the snapshot from disk *during* the second call (after the first failure
|
|
554
648
|
// and the interim writeJobStatus). At call #2 we read tasks-status.json, then
|
|
@@ -558,8 +652,19 @@ describe("runPipelineJob — bounded retry loop", () => {
|
|
|
558
652
|
if (call === 2) {
|
|
559
653
|
const text = await readFile(join(fixture.jobDir, "tasks-status.json"), "utf-8");
|
|
560
654
|
const parsed = JSON.parse(text) as {
|
|
561
|
-
|
|
655
|
+
lastUpdated?: string;
|
|
656
|
+
tasks: Record<string, {
|
|
657
|
+
state?: string;
|
|
658
|
+
attempts?: number;
|
|
659
|
+
failedStage?: unknown;
|
|
660
|
+
error?: unknown;
|
|
661
|
+
restartCount?: number;
|
|
662
|
+
retrying?: unknown;
|
|
663
|
+
nextRetryAt?: unknown;
|
|
664
|
+
lastRetryError?: { message?: unknown };
|
|
665
|
+
}>;
|
|
562
666
|
};
|
|
667
|
+
interimLastUpdated = parsed.lastUpdated;
|
|
563
668
|
interimSnapshot = parsed.tasks["task-a"];
|
|
564
669
|
return makeSuccessResult() as never;
|
|
565
670
|
}
|
|
@@ -584,6 +689,62 @@ describe("runPipelineJob — bounded retry loop", () => {
|
|
|
584
689
|
expect(interimSnapshot?.error).toBeUndefined();
|
|
585
690
|
expect(interimSnapshot?.attempts).toBe(2);
|
|
586
691
|
expect(interimSnapshot?.restartCount).toBe(1);
|
|
692
|
+
expect(interimSnapshot?.retrying).toBe(true);
|
|
693
|
+
expect(typeof interimSnapshot?.nextRetryAt).toBe("string");
|
|
694
|
+
expect(interimSnapshot?.lastRetryError?.message).toBe("stub failure");
|
|
695
|
+
expect(Date.parse(interimSnapshot?.nextRetryAt as string)).toBeGreaterThan(Date.parse(interimLastUpdated ?? ""));
|
|
696
|
+
});
|
|
697
|
+
|
|
698
|
+
test("exception escaping retry loop after first retry metadata write clears retrying/nextRetryAt/lastRetryError", async () => {
|
|
699
|
+
mockGetConfig.mockImplementation(() => ({ taskRunner: { maxAttempts: 3 } }));
|
|
700
|
+
const fixture = await setupMultiTaskFixture(["task-a"]);
|
|
701
|
+
cleanupDirs.push(fixture.tmpDir);
|
|
702
|
+
|
|
703
|
+
let call = 0;
|
|
704
|
+
mockRunPipeline.mockImplementation(async () => {
|
|
705
|
+
call += 1;
|
|
706
|
+
if (call === 1) return makeFailureResult() as never;
|
|
707
|
+
// On the second attempt, throw an exception instead of returning a result.
|
|
708
|
+
// This simulates an unexpected error escaping the retry loop after retry
|
|
709
|
+
// metadata (retrying=true, nextRetryAt, lastRetryError) has been written.
|
|
710
|
+
throw new Error("exception mid-retry");
|
|
711
|
+
});
|
|
712
|
+
|
|
713
|
+
const exitCalls: Array<number | undefined> = [];
|
|
714
|
+
const exitSpy = spyOn(process, "exit").mockImplementation(((code?: number) => {
|
|
715
|
+
exitCalls.push(code);
|
|
716
|
+
throw new Error(`__test_exit__:${String(code)}`);
|
|
717
|
+
}) as typeof process.exit);
|
|
718
|
+
const fakeTimer = { unref: () => fakeTimer, ref: () => fakeTimer };
|
|
719
|
+
// Stub setTimeout to prevent the delayed process.exit(1) from leaking into subsequent tests.
|
|
720
|
+
const setTimeoutSpy = spyOn(globalThis, "setTimeout").mockImplementation(
|
|
721
|
+
((() => fakeTimer as unknown as ReturnType<typeof setTimeout>) as unknown) as typeof setTimeout,
|
|
722
|
+
);
|
|
723
|
+
|
|
724
|
+
try {
|
|
725
|
+
await runPipelineJob(fixture.jobId);
|
|
726
|
+
} catch (e) {
|
|
727
|
+
if (!(e instanceof Error) || !/^__test_exit__:/.test(e.message)) throw e;
|
|
728
|
+
} finally {
|
|
729
|
+
exitSpy.mockRestore();
|
|
730
|
+
setTimeoutSpy.mockRestore();
|
|
731
|
+
}
|
|
732
|
+
|
|
733
|
+
expect(exitCalls).toContain(1);
|
|
734
|
+
|
|
735
|
+
const statusText = await readFile(join(fixture.jobDir, "tasks-status.json"), "utf-8");
|
|
736
|
+
const status = JSON.parse(statusText) as {
|
|
737
|
+
tasks: Record<string, {
|
|
738
|
+
state?: string;
|
|
739
|
+
retrying?: unknown;
|
|
740
|
+
nextRetryAt?: unknown;
|
|
741
|
+
lastRetryError?: unknown;
|
|
742
|
+
}>;
|
|
743
|
+
};
|
|
744
|
+
expect(status.tasks["task-a"]?.state).toBe("failed");
|
|
745
|
+
expect(status.tasks["task-a"]?.retrying).toBeUndefined();
|
|
746
|
+
expect(status.tasks["task-a"]?.nextRetryAt).toBeUndefined();
|
|
747
|
+
expect(status.tasks["task-a"]?.lastRetryError).toBeUndefined();
|
|
587
748
|
});
|
|
588
749
|
|
|
589
750
|
test("missing taskRunner config falls back to the default retry cap", async () => {
|
|
@@ -617,6 +778,12 @@ describe("runPipelineJob — bounded retry loop", () => {
|
|
|
617
778
|
exitCalls.push(code);
|
|
618
779
|
throw new Error(`__test_exit__:${String(code)}`);
|
|
619
780
|
}) as typeof process.exit);
|
|
781
|
+
const fakeTimer = { unref: () => fakeTimer, ref: () => fakeTimer };
|
|
782
|
+
// The catch block schedules process.exit(1) via setTimeout(...).unref(); without this
|
|
783
|
+
// stub the timer leaks into subsequent tests.
|
|
784
|
+
const setTimeoutSpy = spyOn(globalThis, "setTimeout").mockImplementation(
|
|
785
|
+
((() => fakeTimer as unknown as ReturnType<typeof setTimeout>) as unknown) as typeof setTimeout,
|
|
786
|
+
);
|
|
620
787
|
|
|
621
788
|
try {
|
|
622
789
|
await runPipelineJob(fixture.jobId);
|
|
@@ -624,11 +791,33 @@ describe("runPipelineJob — bounded retry loop", () => {
|
|
|
624
791
|
if (!(e instanceof Error) || !/^__test_exit__:/.test(e.message)) throw e;
|
|
625
792
|
} finally {
|
|
626
793
|
exitSpy.mockRestore();
|
|
794
|
+
setTimeoutSpy.mockRestore();
|
|
627
795
|
}
|
|
628
796
|
|
|
629
797
|
expect(mockRunPipeline.mock.calls.length).toBe(1);
|
|
630
798
|
expect(sleepDelays).toEqual([]);
|
|
631
799
|
expect(exitCalls).toContain(1);
|
|
800
|
+
|
|
801
|
+
const statusText = await readFile(join(fixture.jobDir, "tasks-status.json"), "utf-8");
|
|
802
|
+
const status = JSON.parse(statusText) as {
|
|
803
|
+
state?: string;
|
|
804
|
+
current?: string | null;
|
|
805
|
+
currentStage?: string | null;
|
|
806
|
+
tasks: Record<string, {
|
|
807
|
+
state?: string;
|
|
808
|
+
endedAt?: string;
|
|
809
|
+
failedStage?: string;
|
|
810
|
+
error?: { message?: string };
|
|
811
|
+
}>;
|
|
812
|
+
};
|
|
813
|
+
const failedTask = status.tasks["task-a"];
|
|
814
|
+
expect(status.state).toBe("failed");
|
|
815
|
+
expect(status.current).toBe("task-a");
|
|
816
|
+
expect(status.currentStage).toBeNull();
|
|
817
|
+
expect(failedTask?.state).toBe("failed");
|
|
818
|
+
expect(typeof failedTask?.endedAt).toBe("string");
|
|
819
|
+
expect(failedTask?.failedStage).toBe("orchestrator");
|
|
820
|
+
expect(failedTask?.error?.message).toContain("task module exploded");
|
|
632
821
|
});
|
|
633
822
|
});
|
|
634
823
|
|
|
@@ -100,6 +100,9 @@ export interface TaskStatus {
|
|
|
100
100
|
executionTimeMs?: number;
|
|
101
101
|
refinementAttempts?: number;
|
|
102
102
|
restartCount?: number;
|
|
103
|
+
retrying?: boolean;
|
|
104
|
+
nextRetryAt?: string;
|
|
105
|
+
lastRetryError?: NormalizedError;
|
|
103
106
|
error?: NormalizedError;
|
|
104
107
|
failedStage?: string;
|
|
105
108
|
stageLogPath?: string;
|
|
@@ -285,6 +288,7 @@ const RETRY_BACKOFF_MULTIPLIER = 2;
|
|
|
285
288
|
/** Runs a pipeline job end-to-end for the given job ID. */
|
|
286
289
|
export async function runPipelineJob(jobId: string): Promise<void> {
|
|
287
290
|
let workDir: string | undefined;
|
|
291
|
+
let activeTaskName: string | null = null;
|
|
288
292
|
const poRoot = resolve(process.env["PO_ROOT"] ?? process.cwd());
|
|
289
293
|
let dataDir: string | undefined = resolve(poRoot, process.env["PO_DATA_DIR"] ?? "pipeline-data");
|
|
290
294
|
try {
|
|
@@ -374,6 +378,7 @@ export async function runPipelineJob(jobId: string): Promise<void> {
|
|
|
374
378
|
}
|
|
375
379
|
|
|
376
380
|
// Update status to RUNNING
|
|
381
|
+
activeTaskName = taskName;
|
|
377
382
|
await writeJobStatus(config.workDir, (snapshot) => {
|
|
378
383
|
snapshot.state = "running";
|
|
379
384
|
snapshot.current = taskName;
|
|
@@ -382,6 +387,14 @@ export async function runPipelineJob(jobId: string): Promise<void> {
|
|
|
382
387
|
taskEntry.state = "running";
|
|
383
388
|
taskEntry.startedAt = new Date().toISOString();
|
|
384
389
|
taskEntry.attempts = (taskEntry.attempts ?? 0) + 1;
|
|
390
|
+
delete taskEntry.endedAt;
|
|
391
|
+
delete taskEntry.failedStage;
|
|
392
|
+
delete taskEntry.error;
|
|
393
|
+
delete taskEntry.stageLogPath;
|
|
394
|
+
delete taskEntry.errorContext;
|
|
395
|
+
delete taskEntry.retrying;
|
|
396
|
+
delete taskEntry.nextRetryAt;
|
|
397
|
+
delete taskEntry.lastRetryError;
|
|
385
398
|
snapshot.tasks[taskName] = taskEntry;
|
|
386
399
|
});
|
|
387
400
|
|
|
@@ -452,13 +465,15 @@ export async function runPipelineJob(jobId: string): Promise<void> {
|
|
|
452
465
|
let result: PipelineResult | undefined;
|
|
453
466
|
for (let attempt = 1; attempt <= cap; attempt++) {
|
|
454
467
|
result = await runPipeline(relocatedEntryPath, taskExecutionContext);
|
|
455
|
-
|
|
468
|
+
const failedResult = result; // const binding lets TypeScript narrow the union after the ok check below
|
|
469
|
+
if (failedResult.ok) break;
|
|
456
470
|
if (attempt >= cap) break;
|
|
457
471
|
|
|
458
472
|
const delay = Math.min(
|
|
459
473
|
INITIAL_RETRY_DELAY_MS * RETRY_BACKOFF_MULTIPLIER ** (attempt - 1),
|
|
460
474
|
MAX_RETRY_DELAY_MS,
|
|
461
475
|
);
|
|
476
|
+
const nextRetryAt = new Date(Date.now() + delay).toISOString();
|
|
462
477
|
|
|
463
478
|
await writeJobStatus(config.workDir, (snapshot) => {
|
|
464
479
|
const entry = snapshot.tasks[taskName] ?? {};
|
|
@@ -466,6 +481,9 @@ export async function runPipelineJob(jobId: string): Promise<void> {
|
|
|
466
481
|
entry.state = "running";
|
|
467
482
|
entry.attempts = currentAttempts + 1;
|
|
468
483
|
entry.restartCount = (entry.restartCount ?? 0) + 1;
|
|
484
|
+
entry.retrying = true;
|
|
485
|
+
entry.nextRetryAt = nextRetryAt;
|
|
486
|
+
entry.lastRetryError = failedResult.error;
|
|
469
487
|
delete entry.failedStage;
|
|
470
488
|
delete entry.error;
|
|
471
489
|
snapshot.tasks[taskName] = entry;
|
|
@@ -493,8 +511,12 @@ export async function runPipelineJob(jobId: string): Promise<void> {
|
|
|
493
511
|
taskEntry.endedAt = new Date().toISOString();
|
|
494
512
|
taskEntry.executionTimeMs = executionTimeMs;
|
|
495
513
|
taskEntry.refinementAttempts = ((result.context as unknown) as Record<string, unknown>)["refinementAttempts"] as number | undefined ?? 0;
|
|
514
|
+
delete taskEntry.retrying;
|
|
515
|
+
delete taskEntry.nextRetryAt;
|
|
516
|
+
delete taskEntry.lastRetryError;
|
|
496
517
|
snapshot.tasks[taskName] = taskEntry;
|
|
497
518
|
});
|
|
519
|
+
activeTaskName = null;
|
|
498
520
|
|
|
499
521
|
// Add task output to pipelineArtifacts
|
|
500
522
|
const outputPath = join(config.workDir, "tasks", taskName, "output.json");
|
|
@@ -522,8 +544,12 @@ export async function runPipelineJob(jobId: string): Promise<void> {
|
|
|
522
544
|
raw["failedStage"] = result.failedStage;
|
|
523
545
|
raw["stageLogPath"] = result.error.debug?.logPath;
|
|
524
546
|
raw["errorContext"] = result.error.debug as unknown as Record<string, unknown>;
|
|
547
|
+
delete raw["retrying"];
|
|
548
|
+
delete raw["nextRetryAt"];
|
|
549
|
+
delete raw["lastRetryError"];
|
|
525
550
|
snapshot.tasks[taskName] = raw as typeof snapshot.tasks[string];
|
|
526
551
|
});
|
|
552
|
+
activeTaskName = null;
|
|
527
553
|
|
|
528
554
|
await releaseJobSlotBestEffort(dataDir, jobId);
|
|
529
555
|
process.exit(1);
|
|
@@ -567,6 +593,32 @@ export async function runPipelineJob(jobId: string): Promise<void> {
|
|
|
567
593
|
} catch {
|
|
568
594
|
// Do not mask the original failure if log-write fails
|
|
569
595
|
}
|
|
596
|
+
if (activeTaskName !== null) {
|
|
597
|
+
try {
|
|
598
|
+
const failedTaskName = activeTaskName;
|
|
599
|
+
const failedAt = new Date().toISOString();
|
|
600
|
+
await writeJobStatus(workDir, (snapshot) => {
|
|
601
|
+
snapshot.state = "failed";
|
|
602
|
+
snapshot.current = failedTaskName;
|
|
603
|
+
snapshot.currentStage = null;
|
|
604
|
+
const existing = snapshot.tasks[failedTaskName] ?? {};
|
|
605
|
+
const taskEntry: Partial<TaskStatus> & Record<string, unknown> = {
|
|
606
|
+
...existing,
|
|
607
|
+
state: TaskState.FAILED,
|
|
608
|
+
endedAt: failedAt,
|
|
609
|
+
failedStage: "orchestrator",
|
|
610
|
+
error: normalized,
|
|
611
|
+
currentStage: null,
|
|
612
|
+
};
|
|
613
|
+
delete taskEntry.retrying;
|
|
614
|
+
delete taskEntry.nextRetryAt;
|
|
615
|
+
delete taskEntry.lastRetryError;
|
|
616
|
+
snapshot.tasks[failedTaskName] = taskEntry as typeof snapshot.tasks[string];
|
|
617
|
+
});
|
|
618
|
+
} catch {
|
|
619
|
+
// Do not mask the original failure if status finalization fails
|
|
620
|
+
}
|
|
621
|
+
}
|
|
570
622
|
try {
|
|
571
623
|
await cleanupPidFile(workDir);
|
|
572
624
|
} catch {
|
|
@@ -1,3 +1,8 @@
|
|
|
1
|
+
import { rename, unlink, mkdir } from "node:fs/promises";
|
|
2
|
+
import { basename, join } from "node:path";
|
|
3
|
+
import { createJobLogger } from "./logger";
|
|
4
|
+
import type { NormalizedError } from "./pipeline-runner";
|
|
5
|
+
|
|
1
6
|
export type JobState = "pending" | "running" | "done" | "failed";
|
|
2
7
|
|
|
3
8
|
type TaskState = "pending" | "running" | "done" | "failed";
|
|
@@ -16,9 +21,14 @@ export interface TaskEntry {
|
|
|
16
21
|
attempts?: number;
|
|
17
22
|
restartCount?: number;
|
|
18
23
|
refinementAttempts?: number;
|
|
24
|
+
retrying?: boolean;
|
|
25
|
+
nextRetryAt?: string;
|
|
26
|
+
lastRetryError?: NormalizedError;
|
|
19
27
|
tokenUsage?: unknown[];
|
|
20
28
|
startedAt?: string;
|
|
21
29
|
endedAt?: string;
|
|
30
|
+
stageLogPath?: string;
|
|
31
|
+
errorContext?: Record<string, unknown>;
|
|
22
32
|
files?: FilesManifest;
|
|
23
33
|
[key: string]: unknown;
|
|
24
34
|
}
|
|
@@ -52,9 +62,17 @@ export interface UploadArtifact {
|
|
|
52
62
|
|
|
53
63
|
export const STATUS_FILENAME = "tasks-status.json";
|
|
54
64
|
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
65
|
+
function clearResetTaskMetadata(task: TaskEntry): void {
|
|
66
|
+
delete task.startedAt;
|
|
67
|
+
delete task.endedAt;
|
|
68
|
+
delete task.failedStage;
|
|
69
|
+
delete task.error;
|
|
70
|
+
delete task.stageLogPath;
|
|
71
|
+
delete task.errorContext;
|
|
72
|
+
delete task.retrying;
|
|
73
|
+
delete task.nextRetryAt;
|
|
74
|
+
delete task.lastRetryError;
|
|
75
|
+
}
|
|
58
76
|
|
|
59
77
|
export function validateFilePath(filename: string): boolean {
|
|
60
78
|
if (!filename || typeof filename !== "string") {
|
|
@@ -225,8 +243,7 @@ export function resetJobFromTask(jobDir: string, fromTask: string, options?: Res
|
|
|
225
243
|
const task = snapshot.tasks[key]!;
|
|
226
244
|
task.state = "pending";
|
|
227
245
|
task.currentStage = null;
|
|
228
|
-
|
|
229
|
-
delete task.error;
|
|
246
|
+
clearResetTaskMetadata(task);
|
|
230
247
|
task.attempts = 0;
|
|
231
248
|
task.restartCount = 0;
|
|
232
249
|
task.refinementAttempts = 0;
|
|
@@ -252,8 +269,7 @@ export function resetJobToCleanSlate(jobDir: string, options?: ResetOptions): Pr
|
|
|
252
269
|
const task = snapshot.tasks[key]!;
|
|
253
270
|
task.state = "pending";
|
|
254
271
|
task.currentStage = null;
|
|
255
|
-
|
|
256
|
-
delete task.error;
|
|
272
|
+
clearResetTaskMetadata(task);
|
|
257
273
|
task.attempts = 0;
|
|
258
274
|
task.restartCount = 0;
|
|
259
275
|
task.refinementAttempts = 0;
|
|
@@ -280,8 +296,7 @@ export function resetSingleTask(jobDir: string, taskId: string, options?: ResetO
|
|
|
280
296
|
const task = snapshot.tasks[taskId]!;
|
|
281
297
|
task.state = "pending";
|
|
282
298
|
task.currentStage = null;
|
|
283
|
-
|
|
284
|
-
delete task.error;
|
|
299
|
+
clearResetTaskMetadata(task);
|
|
285
300
|
task.attempts = 0;
|
|
286
301
|
task.restartCount = 0;
|
|
287
302
|
task.refinementAttempts = 0;
|
|
@@ -42,6 +42,33 @@ describe("job adapter", () => {
|
|
|
42
42
|
expect(normalizeTasks({ t1: { state: "done" } })["t1"]?.restartCount).toBeUndefined();
|
|
43
43
|
});
|
|
44
44
|
|
|
45
|
+
it("maps retry metadata when present and leaves it undefined when absent", () => {
|
|
46
|
+
const tasks = normalizeTasks({
|
|
47
|
+
retrying: {
|
|
48
|
+
state: "running",
|
|
49
|
+
retrying: true,
|
|
50
|
+
nextRetryAt: "2026-04-01T10:01:00.000Z",
|
|
51
|
+
lastRetryError: { message: "try again" },
|
|
52
|
+
},
|
|
53
|
+
plain: { state: "pending" },
|
|
54
|
+
});
|
|
55
|
+
|
|
56
|
+
expect(tasks["retrying"]?.retrying).toBe(true);
|
|
57
|
+
expect(tasks["retrying"]?.nextRetryAt).toBe("2026-04-01T10:01:00.000Z");
|
|
58
|
+
expect(tasks["retrying"]?.lastRetryError).toEqual({ message: "try again" });
|
|
59
|
+
expect(tasks["plain"]?.retrying).toBeUndefined();
|
|
60
|
+
expect(tasks["plain"]?.nextRetryAt).toBeUndefined();
|
|
61
|
+
expect(tasks["plain"]?.lastRetryError).toBeUndefined();
|
|
62
|
+
});
|
|
63
|
+
|
|
64
|
+
it("maps lastRetryError: null and preserves null on the normalized task", () => {
|
|
65
|
+
const tasks = normalizeTasks({
|
|
66
|
+
t1: { state: "running", lastRetryError: null },
|
|
67
|
+
});
|
|
68
|
+
|
|
69
|
+
expect(tasks["t1"]?.lastRetryError).toBeNull();
|
|
70
|
+
});
|
|
71
|
+
|
|
45
72
|
it("adapts summary jobs with defaults", () => {
|
|
46
73
|
const job = adaptJobSummary({
|
|
47
74
|
jobId: "job-1",
|
|
@@ -50,6 +50,9 @@ function normalizeTask(name: string, rawTask: unknown): NormalizedTask {
|
|
|
50
50
|
endedAt: toStringOrNull(task["endedAt"]),
|
|
51
51
|
attempts: typeof task["attempts"] === "number" ? task["attempts"] : undefined,
|
|
52
52
|
restartCount: typeof task["restartCount"] === "number" ? task["restartCount"] : undefined,
|
|
53
|
+
retrying: typeof task["retrying"] === "boolean" ? task["retrying"] : undefined,
|
|
54
|
+
nextRetryAt: typeof task["nextRetryAt"] === "string" ? task["nextRetryAt"] : undefined,
|
|
55
|
+
lastRetryError: Object.hasOwn(task, "lastRetryError") ? task["lastRetryError"] as import("../types").RetryError | null : undefined,
|
|
53
56
|
executionTimeMs: typeof task["executionTimeMs"] === "number" ? task["executionTimeMs"] : undefined,
|
|
54
57
|
currentStage: typeof task["currentStage"] === "string" ? task["currentStage"] : undefined,
|
|
55
58
|
failedStage: typeof task["failedStage"] === "string" ? task["failedStage"] : undefined,
|
package/src/ui/client/types.ts
CHANGED
|
@@ -79,6 +79,8 @@ export interface CostsSummary {
|
|
|
79
79
|
totalOutputCost: number;
|
|
80
80
|
}
|
|
81
81
|
|
|
82
|
+
export type RetryError = { message: string; name?: string; stack?: string } | string;
|
|
83
|
+
|
|
82
84
|
export interface TaskFiles {
|
|
83
85
|
artifacts: string[];
|
|
84
86
|
logs: string[];
|
|
@@ -92,6 +94,9 @@ export interface NormalizedTask {
|
|
|
92
94
|
endedAt: string | null;
|
|
93
95
|
attempts?: number;
|
|
94
96
|
restartCount?: number;
|
|
97
|
+
retrying?: boolean;
|
|
98
|
+
nextRetryAt?: string;
|
|
99
|
+
lastRetryError?: RetryError | null;
|
|
95
100
|
executionTimeMs?: number;
|
|
96
101
|
currentStage?: string;
|
|
97
102
|
failedStage?: string;
|
|
@@ -21943,6 +21943,9 @@ function normalizeTask(name2, rawTask) {
|
|
|
21943
21943
|
endedAt: toStringOrNull(task["endedAt"]),
|
|
21944
21944
|
attempts: typeof task["attempts"] === "number" ? task["attempts"] : void 0,
|
|
21945
21945
|
restartCount: typeof task["restartCount"] === "number" ? task["restartCount"] : void 0,
|
|
21946
|
+
retrying: typeof task["retrying"] === "boolean" ? task["retrying"] : void 0,
|
|
21947
|
+
nextRetryAt: typeof task["nextRetryAt"] === "string" ? task["nextRetryAt"] : void 0,
|
|
21948
|
+
lastRetryError: Object.hasOwn(task, "lastRetryError") ? task["lastRetryError"] : void 0,
|
|
21946
21949
|
executionTimeMs: typeof task["executionTimeMs"] === "number" ? task["executionTimeMs"] : void 0,
|
|
21947
21950
|
currentStage: typeof task["currentStage"] === "string" ? task["currentStage"] : void 0,
|
|
21948
21951
|
failedStage: typeof task["failedStage"] === "string" ? task["failedStage"] : void 0,
|