@ryanfw/prompt-orchestration-pipeline 1.2.8 → 1.2.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/config/paths.ts +13 -8
- package/src/core/__tests__/config.test.ts +42 -0
- package/src/core/__tests__/job-concurrency.test.ts +554 -0
- package/src/core/__tests__/orchestrator.test.ts +353 -0
- package/src/core/__tests__/pipeline-runner.test.ts +355 -5
- package/src/core/config.ts +31 -1
- package/src/core/job-concurrency.ts +462 -0
- package/src/core/orchestrator.ts +370 -57
- package/src/core/pipeline-runner.ts +93 -12
- package/src/core/status-writer.ts +24 -9
- package/src/ui/client/__tests__/api.test.ts +101 -1
- package/src/ui/client/__tests__/job-adapter.test.ts +27 -0
- package/src/ui/client/__tests__/useConcurrencyStatus.test.ts +126 -0
- package/src/ui/client/adapters/job-adapter.ts +3 -0
- package/src/ui/client/api.ts +77 -7
- package/src/ui/client/hooks/useConcurrencyStatus.ts +102 -0
- package/src/ui/client/types.ts +38 -1
- package/src/ui/dist/assets/{index-CNlnQmK4.js → index-HrBsHfx3.js} +291 -42
- package/src/ui/dist/assets/index-HrBsHfx3.js.map +1 -0
- package/src/ui/dist/assets/style-BKG0bHu-.css +2 -0
- package/src/ui/dist/index.html +2 -2
- package/src/ui/embedded-assets.js +6 -6
- package/src/ui/pages/PromptPipelineDashboard.tsx +186 -4
- package/src/ui/pages/__tests__/PromptPipelineDashboard.test.tsx +272 -1
- package/src/ui/server/__tests__/concurrency-endpoint.test.ts +190 -0
- package/src/ui/server/__tests__/index.test.ts +92 -3
- package/src/ui/server/__tests__/job-control-endpoints.test.ts +419 -3
- package/src/ui/server/endpoints/concurrency-endpoint.ts +72 -0
- package/src/ui/server/endpoints/job-control-endpoints.ts +189 -33
- package/src/ui/server/index.ts +21 -2
- package/src/ui/server/router.ts +2 -0
- package/src/ui/state/__tests__/watcher.test.ts +31 -0
- package/src/ui/state/transformers/__tests__/status-transformer.test.ts +27 -0
- package/src/ui/state/transformers/status-transformer.ts +3 -0
- package/src/ui/state/types.ts +7 -0
- package/src/ui/state/watcher.ts +9 -1
- package/src/ui/dist/assets/index-CNlnQmK4.js.map +0 -1
- package/src/ui/dist/assets/style-DNbNL3Yg.css +0 -2
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@ryanfw/prompt-orchestration-pipeline",
|
|
3
|
-
"version": "1.2.
|
|
3
|
+
"version": "1.2.10",
|
|
4
4
|
"description": "A Prompt-orchestration pipeline (POP) is a framework for building, running, and experimenting with complex chains of LLM tasks.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "src/ui/server/index.ts",
|
package/src/config/paths.ts
CHANGED
|
@@ -8,25 +8,30 @@ export interface PipelinePaths {
|
|
|
8
8
|
readonly rejected: string;
|
|
9
9
|
}
|
|
10
10
|
|
|
11
|
+
export function getPipelineDataDir(baseDir: string): string {
|
|
12
|
+
return join(baseDir, "pipeline-data");
|
|
13
|
+
}
|
|
14
|
+
|
|
11
15
|
export function resolvePipelinePaths(baseDir: string): PipelinePaths {
|
|
16
|
+
const dataDir = getPipelineDataDir(baseDir);
|
|
12
17
|
return {
|
|
13
|
-
pending: join(
|
|
14
|
-
current: join(
|
|
15
|
-
complete: join(
|
|
16
|
-
rejected: join(
|
|
18
|
+
pending: join(dataDir, "pending"),
|
|
19
|
+
current: join(dataDir, "current"),
|
|
20
|
+
complete: join(dataDir, "complete"),
|
|
21
|
+
rejected: join(dataDir, "rejected"),
|
|
17
22
|
};
|
|
18
23
|
}
|
|
19
24
|
|
|
20
25
|
export function getPendingSeedPath(baseDir: string, jobId: string): string {
|
|
21
|
-
return join(baseDir, "
|
|
26
|
+
return join(getPipelineDataDir(baseDir), "pending", `${jobId}-seed.json`);
|
|
22
27
|
}
|
|
23
28
|
|
|
24
29
|
export function getCurrentSeedPath(baseDir: string, jobId: string): string {
|
|
25
|
-
return join(baseDir, "
|
|
30
|
+
return join(getPipelineDataDir(baseDir), "current", jobId, "seed.json");
|
|
26
31
|
}
|
|
27
32
|
|
|
28
33
|
export function getCompleteSeedPath(baseDir: string, jobId: string): string {
|
|
29
|
-
return join(baseDir, "
|
|
34
|
+
return join(getPipelineDataDir(baseDir), "complete", jobId, "seed.json");
|
|
30
35
|
}
|
|
31
36
|
|
|
32
37
|
export function getJobDirectoryPath(
|
|
@@ -34,7 +39,7 @@ export function getJobDirectoryPath(
|
|
|
34
39
|
jobId: string,
|
|
35
40
|
location: JobLocationValue,
|
|
36
41
|
): string {
|
|
37
|
-
return join(baseDir,
|
|
42
|
+
return join(getPipelineDataDir(baseDir), location, jobId);
|
|
38
43
|
}
|
|
39
44
|
|
|
40
45
|
export function getJobMetadataPath(
|
|
@@ -76,6 +76,10 @@ describe("defaultConfig", () => {
|
|
|
76
76
|
test("taskRunner.maxAttempts defaults to 3", () => {
|
|
77
77
|
expect(defaultConfig.taskRunner.maxAttempts).toBe(3);
|
|
78
78
|
});
|
|
79
|
+
|
|
80
|
+
test("orchestrator.maxConcurrentJobs defaults to 3", () => {
|
|
81
|
+
expect(defaultConfig.orchestrator.maxConcurrentJobs).toBe(3);
|
|
82
|
+
});
|
|
79
83
|
});
|
|
80
84
|
|
|
81
85
|
describe("validateConfig (via loadConfig)", () => {
|
|
@@ -152,6 +156,44 @@ describe("PO_TASK_MAX_ATTEMPTS env override", () => {
|
|
|
152
156
|
});
|
|
153
157
|
});
|
|
154
158
|
|
|
159
|
+
describe("PO_MAX_RUNNING_JOBS env override", () => {
|
|
160
|
+
afterEach(() => {
|
|
161
|
+
delete process.env.PO_MAX_RUNNING_JOBS;
|
|
162
|
+
resetConfig();
|
|
163
|
+
});
|
|
164
|
+
|
|
165
|
+
test("getConfig reads PO_MAX_RUNNING_JOBS into orchestrator.maxConcurrentJobs", () => {
|
|
166
|
+
process.env.PO_MAX_RUNNING_JOBS = "7";
|
|
167
|
+
resetConfig();
|
|
168
|
+
const config: AppConfig = getConfig();
|
|
169
|
+
expect(config.orchestrator.maxConcurrentJobs).toBe(7);
|
|
170
|
+
});
|
|
171
|
+
|
|
172
|
+
test("getConfig rejects PO_MAX_RUNNING_JOBS=0", () => {
|
|
173
|
+
process.env.PO_MAX_RUNNING_JOBS = "0";
|
|
174
|
+
resetConfig();
|
|
175
|
+
expect(() => getConfig()).toThrow("orchestrator.maxConcurrentJobs");
|
|
176
|
+
});
|
|
177
|
+
|
|
178
|
+
test("getConfig rejects negative PO_MAX_RUNNING_JOBS", () => {
|
|
179
|
+
process.env.PO_MAX_RUNNING_JOBS = "-1";
|
|
180
|
+
resetConfig();
|
|
181
|
+
expect(() => getConfig()).toThrow("orchestrator.maxConcurrentJobs");
|
|
182
|
+
});
|
|
183
|
+
|
|
184
|
+
test("getConfig rejects non-integer PO_MAX_RUNNING_JOBS", () => {
|
|
185
|
+
process.env.PO_MAX_RUNNING_JOBS = "1.5";
|
|
186
|
+
resetConfig();
|
|
187
|
+
expect(() => getConfig()).toThrow("orchestrator.maxConcurrentJobs");
|
|
188
|
+
});
|
|
189
|
+
|
|
190
|
+
test("getConfig rejects non-numeric PO_MAX_RUNNING_JOBS", () => {
|
|
191
|
+
process.env.PO_MAX_RUNNING_JOBS = "abc";
|
|
192
|
+
resetConfig();
|
|
193
|
+
expect(() => getConfig()).toThrow("orchestrator.maxConcurrentJobs");
|
|
194
|
+
});
|
|
195
|
+
});
|
|
196
|
+
|
|
155
197
|
describe("getConfig", () => {
|
|
156
198
|
afterEach(() => {
|
|
157
199
|
resetConfig();
|
|
@@ -0,0 +1,554 @@
|
|
|
1
|
+
import { describe, test, expect } from "bun:test";
|
|
2
|
+
import { mkdtemp, mkdir, writeFile, utimes, rm, readdir } from "node:fs/promises";
|
|
3
|
+
import { existsSync } from "node:fs";
|
|
4
|
+
import { tmpdir } from "node:os";
|
|
5
|
+
import { join } from "node:path";
|
|
6
|
+
import { spawnSync } from "node:child_process";
|
|
7
|
+
import {
|
|
8
|
+
getConcurrencyRuntimePaths,
|
|
9
|
+
getJobConcurrencyStatus,
|
|
10
|
+
listQueuedSeeds,
|
|
11
|
+
pruneStaleJobSlots,
|
|
12
|
+
releaseJobSlot,
|
|
13
|
+
tryAcquireJobSlot,
|
|
14
|
+
updateJobSlotPid,
|
|
15
|
+
type JobSlotLease,
|
|
16
|
+
} from "../job-concurrency";
|
|
17
|
+
import { readFile } from "node:fs/promises";
|
|
18
|
+
|
|
19
|
+
describe("getConcurrencyRuntimePaths", () => {
|
|
20
|
+
test("resolves runtime paths under <dataDir>/runtime", () => {
|
|
21
|
+
const dataDir = "/tmp/pipeline-data";
|
|
22
|
+
const paths = getConcurrencyRuntimePaths(dataDir);
|
|
23
|
+
expect(paths.runtimeDir).toBe(join(dataDir, "runtime"));
|
|
24
|
+
expect(paths.lockDir).toBe(join(dataDir, "runtime", "lock"));
|
|
25
|
+
expect(paths.runningJobsDir).toBe(join(dataDir, "runtime", "running-jobs"));
|
|
26
|
+
});
|
|
27
|
+
|
|
28
|
+
test("works with relative dataDir", () => {
|
|
29
|
+
const paths = getConcurrencyRuntimePaths("data");
|
|
30
|
+
expect(paths.runtimeDir).toBe(join("data", "runtime"));
|
|
31
|
+
expect(paths.lockDir).toBe(join("data", "runtime", "lock"));
|
|
32
|
+
expect(paths.runningJobsDir).toBe(join("data", "runtime", "running-jobs"));
|
|
33
|
+
});
|
|
34
|
+
});
|
|
35
|
+
|
|
36
|
+
async function writeSeed(
|
|
37
|
+
pendingDir: string,
|
|
38
|
+
jobId: string,
|
|
39
|
+
body: unknown,
|
|
40
|
+
mtimeSec: number,
|
|
41
|
+
): Promise<string> {
|
|
42
|
+
const filePath = join(pendingDir, `${jobId}-seed.json`);
|
|
43
|
+
await writeFile(filePath, typeof body === "string" ? body : JSON.stringify(body));
|
|
44
|
+
await utimes(filePath, mtimeSec, mtimeSec);
|
|
45
|
+
return filePath;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
describe("listQueuedSeeds", () => {
|
|
49
|
+
test("returns [] when pending dir does not exist", async () => {
|
|
50
|
+
const dir = await mkdtemp(join(tmpdir(), "queued-seeds-"));
|
|
51
|
+
try {
|
|
52
|
+
expect(await listQueuedSeeds(dir)).toEqual([]);
|
|
53
|
+
} finally {
|
|
54
|
+
await rm(dir, { recursive: true });
|
|
55
|
+
}
|
|
56
|
+
});
|
|
57
|
+
|
|
58
|
+
test("returns [] when pending dir is empty", async () => {
|
|
59
|
+
const dir = await mkdtemp(join(tmpdir(), "queued-seeds-"));
|
|
60
|
+
await mkdir(join(dir, "pending"), { recursive: true });
|
|
61
|
+
try {
|
|
62
|
+
expect(await listQueuedSeeds(dir)).toEqual([]);
|
|
63
|
+
} finally {
|
|
64
|
+
await rm(dir, { recursive: true });
|
|
65
|
+
}
|
|
66
|
+
});
|
|
67
|
+
|
|
68
|
+
test("extracts name and pipeline from valid seed JSON", async () => {
|
|
69
|
+
const dir = await mkdtemp(join(tmpdir(), "queued-seeds-"));
|
|
70
|
+
const pendingDir = join(dir, "pending");
|
|
71
|
+
await mkdir(pendingDir, { recursive: true });
|
|
72
|
+
const seedPath = await writeSeed(
|
|
73
|
+
pendingDir,
|
|
74
|
+
"job-123",
|
|
75
|
+
{ name: "demo", pipeline: "alpha", extra: 1 },
|
|
76
|
+
1700000000,
|
|
77
|
+
);
|
|
78
|
+
try {
|
|
79
|
+
const result = await listQueuedSeeds(dir);
|
|
80
|
+
expect(result).toEqual([
|
|
81
|
+
{
|
|
82
|
+
jobId: "job-123",
|
|
83
|
+
seedPath,
|
|
84
|
+
queuedAt: new Date(1700000000 * 1000).toISOString(),
|
|
85
|
+
name: "demo",
|
|
86
|
+
pipeline: "alpha",
|
|
87
|
+
},
|
|
88
|
+
]);
|
|
89
|
+
} finally {
|
|
90
|
+
await rm(dir, { recursive: true });
|
|
91
|
+
}
|
|
92
|
+
});
|
|
93
|
+
|
|
94
|
+
test("returns name and pipeline as null on invalid JSON without throwing", async () => {
|
|
95
|
+
const dir = await mkdtemp(join(tmpdir(), "queued-seeds-"));
|
|
96
|
+
const pendingDir = join(dir, "pending");
|
|
97
|
+
await mkdir(pendingDir, { recursive: true });
|
|
98
|
+
await writeSeed(pendingDir, "job-bad", "not json {", 1700000100);
|
|
99
|
+
try {
|
|
100
|
+
const result = await listQueuedSeeds(dir);
|
|
101
|
+
expect(result).toHaveLength(1);
|
|
102
|
+
expect(result[0]!.jobId).toBe("job-bad");
|
|
103
|
+
expect(result[0]!.name).toBeNull();
|
|
104
|
+
expect(result[0]!.pipeline).toBeNull();
|
|
105
|
+
} finally {
|
|
106
|
+
await rm(dir, { recursive: true });
|
|
107
|
+
}
|
|
108
|
+
});
|
|
109
|
+
|
|
110
|
+
test("sorts by mtime ascending, then jobId ascending as tiebreaker", async () => {
|
|
111
|
+
const dir = await mkdtemp(join(tmpdir(), "queued-seeds-"));
|
|
112
|
+
const pendingDir = join(dir, "pending");
|
|
113
|
+
await mkdir(pendingDir, { recursive: true });
|
|
114
|
+
await writeSeed(pendingDir, "job-c", { pipeline: "p" }, 1700000300);
|
|
115
|
+
await writeSeed(pendingDir, "job-b", { pipeline: "p" }, 1700000100);
|
|
116
|
+
await writeSeed(pendingDir, "job-a", { pipeline: "p" }, 1700000100);
|
|
117
|
+
await writeSeed(pendingDir, "job-d", { pipeline: "p" }, 1700000200);
|
|
118
|
+
try {
|
|
119
|
+
const result = await listQueuedSeeds(dir);
|
|
120
|
+
expect(result.map((s) => s.jobId)).toEqual(["job-a", "job-b", "job-d", "job-c"]);
|
|
121
|
+
} finally {
|
|
122
|
+
await rm(dir, { recursive: true });
|
|
123
|
+
}
|
|
124
|
+
});
|
|
125
|
+
|
|
126
|
+
test("ignores files that do not match the seed filename pattern", async () => {
|
|
127
|
+
const dir = await mkdtemp(join(tmpdir(), "queued-seeds-"));
|
|
128
|
+
const pendingDir = join(dir, "pending");
|
|
129
|
+
await mkdir(pendingDir, { recursive: true });
|
|
130
|
+
await writeFile(join(pendingDir, "seed.json"), "{}");
|
|
131
|
+
await writeFile(join(pendingDir, "random.txt"), "noise");
|
|
132
|
+
await writeFile(join(pendingDir, "job-x-other.json"), "{}");
|
|
133
|
+
await writeSeed(pendingDir, "job-only", { pipeline: "p" }, 1700000050);
|
|
134
|
+
try {
|
|
135
|
+
const result = await listQueuedSeeds(dir);
|
|
136
|
+
expect(result.map((s) => s.jobId)).toEqual(["job-only"]);
|
|
137
|
+
} finally {
|
|
138
|
+
await rm(dir, { recursive: true });
|
|
139
|
+
}
|
|
140
|
+
});
|
|
141
|
+
});
|
|
142
|
+
|
|
143
|
+
async function writeLease(
|
|
144
|
+
runningJobsDir: string,
|
|
145
|
+
jobId: string,
|
|
146
|
+
body: Partial<JobSlotLease> | string,
|
|
147
|
+
): Promise<string> {
|
|
148
|
+
const slotPath = join(runningJobsDir, `${jobId}.json`);
|
|
149
|
+
await writeFile(slotPath, typeof body === "string" ? body : JSON.stringify(body));
|
|
150
|
+
return slotPath;
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
function getDeadPid(): number {
|
|
154
|
+
// Spawn a synchronous child that exits immediately. spawnSync only returns
|
|
155
|
+
// after the child has exited, so the captured PID is guaranteed dead by then.
|
|
156
|
+
const result = spawnSync(process.execPath, ["-e", ""]);
|
|
157
|
+
if (result.pid === undefined || result.status === null) {
|
|
158
|
+
throw new Error("failed to spawn child for dead-pid test");
|
|
159
|
+
}
|
|
160
|
+
return result.pid;
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
describe("pruneStaleJobSlots", () => {
|
|
164
|
+
test("returns [] when runningJobsDir does not exist", async () => {
|
|
165
|
+
const dir = await mkdtemp(join(tmpdir(), "prune-stale-"));
|
|
166
|
+
try {
|
|
167
|
+
expect(await pruneStaleJobSlots(dir, 1000)).toEqual([]);
|
|
168
|
+
} finally {
|
|
169
|
+
await rm(dir, { recursive: true });
|
|
170
|
+
}
|
|
171
|
+
});
|
|
172
|
+
|
|
173
|
+
test("returns [] when runningJobsDir is empty", async () => {
|
|
174
|
+
const dir = await mkdtemp(join(tmpdir(), "prune-stale-"));
|
|
175
|
+
const { runningJobsDir } = getConcurrencyRuntimePaths(dir);
|
|
176
|
+
await mkdir(runningJobsDir, { recursive: true });
|
|
177
|
+
try {
|
|
178
|
+
expect(await pruneStaleJobSlots(dir, 1000)).toEqual([]);
|
|
179
|
+
} finally {
|
|
180
|
+
await rm(dir, { recursive: true });
|
|
181
|
+
}
|
|
182
|
+
});
|
|
183
|
+
|
|
184
|
+
test("removes and reports lease with malformed JSON", async () => {
|
|
185
|
+
const dir = await mkdtemp(join(tmpdir(), "prune-stale-"));
|
|
186
|
+
const { runningJobsDir } = getConcurrencyRuntimePaths(dir);
|
|
187
|
+
await mkdir(runningJobsDir, { recursive: true });
|
|
188
|
+
const slotPath = await writeLease(runningJobsDir, "job-bad", "not json {");
|
|
189
|
+
try {
|
|
190
|
+
const result = await pruneStaleJobSlots(dir, 1000);
|
|
191
|
+
expect(result).toEqual([
|
|
192
|
+
{ jobId: "job-bad", slotPath, reason: "invalid_json" },
|
|
193
|
+
]);
|
|
194
|
+
expect(existsSync(slotPath)).toBe(false);
|
|
195
|
+
} finally {
|
|
196
|
+
await rm(dir, { recursive: true });
|
|
197
|
+
}
|
|
198
|
+
});
|
|
199
|
+
|
|
200
|
+
test("removes and reports lease whose current/<jobId> directory is missing once aged past lockTimeoutMs", async () => {
|
|
201
|
+
const dir = await mkdtemp(join(tmpdir(), "prune-stale-"));
|
|
202
|
+
const { runningJobsDir } = getConcurrencyRuntimePaths(dir);
|
|
203
|
+
await mkdir(runningJobsDir, { recursive: true });
|
|
204
|
+
const slotPath = await writeLease(runningJobsDir, "job-missing", {
|
|
205
|
+
jobId: "job-missing",
|
|
206
|
+
pid: process.pid,
|
|
207
|
+
acquiredAt: new Date(Date.now() - 60_000).toISOString(),
|
|
208
|
+
source: "orchestrator",
|
|
209
|
+
slotPath: join(runningJobsDir, "job-missing.json"),
|
|
210
|
+
});
|
|
211
|
+
try {
|
|
212
|
+
const result = await pruneStaleJobSlots(dir, 1000);
|
|
213
|
+
expect(result).toEqual([
|
|
214
|
+
{ jobId: "job-missing", slotPath, reason: "missing_current_job" },
|
|
215
|
+
]);
|
|
216
|
+
expect(existsSync(slotPath)).toBe(false);
|
|
217
|
+
} finally {
|
|
218
|
+
await rm(dir, { recursive: true });
|
|
219
|
+
}
|
|
220
|
+
});
|
|
221
|
+
|
|
222
|
+
test("does not prune a fresh lease whose current/<jobId> dir has not yet been created", async () => {
|
|
223
|
+
// drainPendingQueue acquires the slot before creating current/<jobId>.
|
|
224
|
+
// A pruner running in that window must not delete the in-flight lease,
|
|
225
|
+
// or the global concurrency cap can be bypassed.
|
|
226
|
+
const dir = await mkdtemp(join(tmpdir(), "prune-stale-"));
|
|
227
|
+
const { runningJobsDir } = getConcurrencyRuntimePaths(dir);
|
|
228
|
+
await mkdir(runningJobsDir, { recursive: true });
|
|
229
|
+
const slotPath = await writeLease(runningJobsDir, "job-inflight", {
|
|
230
|
+
jobId: "job-inflight",
|
|
231
|
+
pid: process.pid,
|
|
232
|
+
acquiredAt: new Date().toISOString(),
|
|
233
|
+
source: "orchestrator",
|
|
234
|
+
slotPath: join(runningJobsDir, "job-inflight.json"),
|
|
235
|
+
});
|
|
236
|
+
try {
|
|
237
|
+
const result = await pruneStaleJobSlots(dir, 60_000);
|
|
238
|
+
expect(result).toEqual([]);
|
|
239
|
+
expect(existsSync(slotPath)).toBe(true);
|
|
240
|
+
} finally {
|
|
241
|
+
await rm(dir, { recursive: true });
|
|
242
|
+
}
|
|
243
|
+
});
|
|
244
|
+
|
|
245
|
+
test("removes and reports lease with a dead PID", async () => {
|
|
246
|
+
const dir = await mkdtemp(join(tmpdir(), "prune-stale-"));
|
|
247
|
+
const { runningJobsDir } = getConcurrencyRuntimePaths(dir);
|
|
248
|
+
await mkdir(runningJobsDir, { recursive: true });
|
|
249
|
+
await mkdir(join(dir, "current", "job-dead"), { recursive: true });
|
|
250
|
+
const deadPid = getDeadPid();
|
|
251
|
+
const slotPath = await writeLease(runningJobsDir, "job-dead", {
|
|
252
|
+
jobId: "job-dead",
|
|
253
|
+
pid: deadPid,
|
|
254
|
+
acquiredAt: new Date().toISOString(),
|
|
255
|
+
source: "orchestrator",
|
|
256
|
+
slotPath: join(runningJobsDir, "job-dead.json"),
|
|
257
|
+
});
|
|
258
|
+
try {
|
|
259
|
+
const result = await pruneStaleJobSlots(dir, 1000);
|
|
260
|
+
expect(result).toEqual([
|
|
261
|
+
{ jobId: "job-dead", slotPath, reason: "dead_pid" },
|
|
262
|
+
]);
|
|
263
|
+
expect(existsSync(slotPath)).toBe(false);
|
|
264
|
+
} finally {
|
|
265
|
+
await rm(dir, { recursive: true });
|
|
266
|
+
}
|
|
267
|
+
});
|
|
268
|
+
|
|
269
|
+
test("removes and reports lease with null pid older than lockTimeoutMs", async () => {
|
|
270
|
+
const dir = await mkdtemp(join(tmpdir(), "prune-stale-"));
|
|
271
|
+
const { runningJobsDir } = getConcurrencyRuntimePaths(dir);
|
|
272
|
+
await mkdir(runningJobsDir, { recursive: true });
|
|
273
|
+
await mkdir(join(dir, "current", "job-stale-pidless"), { recursive: true });
|
|
274
|
+
const slotPath = await writeLease(runningJobsDir, "job-stale-pidless", {
|
|
275
|
+
jobId: "job-stale-pidless",
|
|
276
|
+
pid: null,
|
|
277
|
+
acquiredAt: new Date(Date.now() - 60_000).toISOString(),
|
|
278
|
+
source: "orchestrator",
|
|
279
|
+
slotPath: join(runningJobsDir, "job-stale-pidless.json"),
|
|
280
|
+
});
|
|
281
|
+
try {
|
|
282
|
+
const result = await pruneStaleJobSlots(dir, 1000);
|
|
283
|
+
expect(result).toEqual([
|
|
284
|
+
{ jobId: "job-stale-pidless", slotPath, reason: "missing_pid" },
|
|
285
|
+
]);
|
|
286
|
+
expect(existsSync(slotPath)).toBe(false);
|
|
287
|
+
} finally {
|
|
288
|
+
await rm(dir, { recursive: true });
|
|
289
|
+
}
|
|
290
|
+
});
|
|
291
|
+
|
|
292
|
+
test("does not prune a fresh lease with null pid younger than lockTimeoutMs", async () => {
|
|
293
|
+
const dir = await mkdtemp(join(tmpdir(), "prune-stale-"));
|
|
294
|
+
const { runningJobsDir } = getConcurrencyRuntimePaths(dir);
|
|
295
|
+
await mkdir(runningJobsDir, { recursive: true });
|
|
296
|
+
await mkdir(join(dir, "current", "job-fresh"), { recursive: true });
|
|
297
|
+
const slotPath = await writeLease(runningJobsDir, "job-fresh", {
|
|
298
|
+
jobId: "job-fresh",
|
|
299
|
+
pid: null,
|
|
300
|
+
acquiredAt: new Date().toISOString(),
|
|
301
|
+
source: "orchestrator",
|
|
302
|
+
slotPath: join(runningJobsDir, "job-fresh.json"),
|
|
303
|
+
});
|
|
304
|
+
try {
|
|
305
|
+
const result = await pruneStaleJobSlots(dir, 60_000);
|
|
306
|
+
expect(result).toEqual([]);
|
|
307
|
+
expect(existsSync(slotPath)).toBe(true);
|
|
308
|
+
expect(await readdir(runningJobsDir)).toEqual(["job-fresh.json"]);
|
|
309
|
+
} finally {
|
|
310
|
+
await rm(dir, { recursive: true });
|
|
311
|
+
}
|
|
312
|
+
});
|
|
313
|
+
|
|
314
|
+
test("keeps lease with live pid and present current dir", async () => {
|
|
315
|
+
const dir = await mkdtemp(join(tmpdir(), "prune-stale-"));
|
|
316
|
+
const { runningJobsDir } = getConcurrencyRuntimePaths(dir);
|
|
317
|
+
await mkdir(runningJobsDir, { recursive: true });
|
|
318
|
+
await mkdir(join(dir, "current", "job-live"), { recursive: true });
|
|
319
|
+
const slotPath = await writeLease(runningJobsDir, "job-live", {
|
|
320
|
+
jobId: "job-live",
|
|
321
|
+
pid: process.pid,
|
|
322
|
+
acquiredAt: new Date().toISOString(),
|
|
323
|
+
source: "orchestrator",
|
|
324
|
+
slotPath: join(runningJobsDir, "job-live.json"),
|
|
325
|
+
});
|
|
326
|
+
try {
|
|
327
|
+
const result = await pruneStaleJobSlots(dir, 1000);
|
|
328
|
+
expect(result).toEqual([]);
|
|
329
|
+
expect(existsSync(slotPath)).toBe(true);
|
|
330
|
+
} finally {
|
|
331
|
+
await rm(dir, { recursive: true });
|
|
332
|
+
}
|
|
333
|
+
});
|
|
334
|
+
});
|
|
335
|
+
|
|
336
|
+
async function setupJobsDir(prefix: string): Promise<string> {
|
|
337
|
+
const dir = await mkdtemp(join(tmpdir(), prefix));
|
|
338
|
+
const { runningJobsDir } = getConcurrencyRuntimePaths(dir);
|
|
339
|
+
await mkdir(runningJobsDir, { recursive: true });
|
|
340
|
+
await mkdir(join(dir, "current"), { recursive: true });
|
|
341
|
+
return dir;
|
|
342
|
+
}
|
|
343
|
+
|
|
344
|
+
async function makeCurrent(dir: string, jobId: string): Promise<void> {
|
|
345
|
+
await mkdir(join(dir, "current", jobId), { recursive: true });
|
|
346
|
+
}
|
|
347
|
+
|
|
348
|
+
describe("tryAcquireJobSlot", () => {
|
|
349
|
+
test("succeeds up to maxConcurrentJobs and rejects the next attempt", async () => {
|
|
350
|
+
const dir = await setupJobsDir("acquire-");
|
|
351
|
+
try {
|
|
352
|
+
for (const jobId of ["a", "b"]) {
|
|
353
|
+
await makeCurrent(dir, jobId);
|
|
354
|
+
const r = await tryAcquireJobSlot({
|
|
355
|
+
dataDir: dir,
|
|
356
|
+
jobId,
|
|
357
|
+
maxConcurrentJobs: 2,
|
|
358
|
+
source: "orchestrator",
|
|
359
|
+
pid: process.pid,
|
|
360
|
+
});
|
|
361
|
+
expect(r.ok).toBe(true);
|
|
362
|
+
}
|
|
363
|
+
await makeCurrent(dir, "c");
|
|
364
|
+
const overflow = await tryAcquireJobSlot({
|
|
365
|
+
dataDir: dir,
|
|
366
|
+
jobId: "c",
|
|
367
|
+
maxConcurrentJobs: 2,
|
|
368
|
+
source: "orchestrator",
|
|
369
|
+
});
|
|
370
|
+
expect(overflow.ok).toBe(false);
|
|
371
|
+
if (!overflow.ok) {
|
|
372
|
+
expect(overflow.reason).toBe("limit_reached");
|
|
373
|
+
expect(overflow.status.runningCount).toBe(2);
|
|
374
|
+
expect(overflow.status.availableSlots).toBe(0);
|
|
375
|
+
}
|
|
376
|
+
} finally {
|
|
377
|
+
await rm(dir, { recursive: true });
|
|
378
|
+
}
|
|
379
|
+
});
|
|
380
|
+
|
|
381
|
+
test("releaseJobSlot frees capacity", async () => {
|
|
382
|
+
const dir = await setupJobsDir("release-cap-");
|
|
383
|
+
try {
|
|
384
|
+
await makeCurrent(dir, "a");
|
|
385
|
+
await makeCurrent(dir, "b");
|
|
386
|
+
const r1 = await tryAcquireJobSlot({
|
|
387
|
+
dataDir: dir,
|
|
388
|
+
jobId: "a",
|
|
389
|
+
maxConcurrentJobs: 1,
|
|
390
|
+
source: "orchestrator",
|
|
391
|
+
pid: process.pid,
|
|
392
|
+
});
|
|
393
|
+
expect(r1.ok).toBe(true);
|
|
394
|
+
const r2 = await tryAcquireJobSlot({
|
|
395
|
+
dataDir: dir,
|
|
396
|
+
jobId: "b",
|
|
397
|
+
maxConcurrentJobs: 1,
|
|
398
|
+
source: "orchestrator",
|
|
399
|
+
});
|
|
400
|
+
expect(r2.ok).toBe(false);
|
|
401
|
+
await releaseJobSlot(dir, "a");
|
|
402
|
+
const r3 = await tryAcquireJobSlot({
|
|
403
|
+
dataDir: dir,
|
|
404
|
+
jobId: "b",
|
|
405
|
+
maxConcurrentJobs: 1,
|
|
406
|
+
source: "orchestrator",
|
|
407
|
+
pid: process.pid,
|
|
408
|
+
});
|
|
409
|
+
expect(r3.ok).toBe(true);
|
|
410
|
+
} finally {
|
|
411
|
+
await rm(dir, { recursive: true });
|
|
412
|
+
}
|
|
413
|
+
});
|
|
414
|
+
|
|
415
|
+
test("concurrent acquisition of the last slot yields exactly one winner", async () => {
|
|
416
|
+
const dir = await setupJobsDir("acquire-race-");
|
|
417
|
+
try {
|
|
418
|
+
const limit = 3;
|
|
419
|
+
const total = 12;
|
|
420
|
+
const jobIds = Array.from({ length: total }, (_, i) => `job-${i}`);
|
|
421
|
+
for (const id of jobIds) await makeCurrent(dir, id);
|
|
422
|
+
const results = await Promise.all(
|
|
423
|
+
jobIds.map((jobId) =>
|
|
424
|
+
tryAcquireJobSlot({
|
|
425
|
+
dataDir: dir,
|
|
426
|
+
jobId,
|
|
427
|
+
maxConcurrentJobs: limit,
|
|
428
|
+
source: "orchestrator",
|
|
429
|
+
pid: process.pid,
|
|
430
|
+
}),
|
|
431
|
+
),
|
|
432
|
+
);
|
|
433
|
+
const winners = results.filter((r) => r.ok).length;
|
|
434
|
+
const losers = results.filter((r) => !r.ok).length;
|
|
435
|
+
expect(winners).toBe(limit);
|
|
436
|
+
expect(losers).toBe(total - limit);
|
|
437
|
+
} finally {
|
|
438
|
+
await rm(dir, { recursive: true });
|
|
439
|
+
}
|
|
440
|
+
});
|
|
441
|
+
|
|
442
|
+
test("returns already_held when a slot is already held for the same jobId", async () => {
|
|
443
|
+
const dir = await setupJobsDir("acquire-dup-");
|
|
444
|
+
try {
|
|
445
|
+
await makeCurrent(dir, "a");
|
|
446
|
+
await tryAcquireJobSlot({
|
|
447
|
+
dataDir: dir,
|
|
448
|
+
jobId: "a",
|
|
449
|
+
maxConcurrentJobs: 5,
|
|
450
|
+
source: "orchestrator",
|
|
451
|
+
pid: process.pid,
|
|
452
|
+
});
|
|
453
|
+
const duplicate = await tryAcquireJobSlot({
|
|
454
|
+
dataDir: dir,
|
|
455
|
+
jobId: "a",
|
|
456
|
+
maxConcurrentJobs: 5,
|
|
457
|
+
source: "orchestrator",
|
|
458
|
+
});
|
|
459
|
+
expect(duplicate.ok).toBe(false);
|
|
460
|
+
if (!duplicate.ok) {
|
|
461
|
+
expect(duplicate.reason).toBe("already_held");
|
|
462
|
+
expect(duplicate.status.runningCount).toBe(1);
|
|
463
|
+
}
|
|
464
|
+
} finally {
|
|
465
|
+
await rm(dir, { recursive: true });
|
|
466
|
+
}
|
|
467
|
+
});
|
|
468
|
+
});
|
|
469
|
+
|
|
470
|
+
describe("updateJobSlotPid", () => {
|
|
471
|
+
test("updates the pid on an existing lease", async () => {
|
|
472
|
+
const dir = await setupJobsDir("update-pid-");
|
|
473
|
+
try {
|
|
474
|
+
await makeCurrent(dir, "a");
|
|
475
|
+
await tryAcquireJobSlot({
|
|
476
|
+
dataDir: dir,
|
|
477
|
+
jobId: "a",
|
|
478
|
+
maxConcurrentJobs: 1,
|
|
479
|
+
source: "orchestrator",
|
|
480
|
+
});
|
|
481
|
+
await updateJobSlotPid(dir, "a", 12345);
|
|
482
|
+
const { runningJobsDir } = getConcurrencyRuntimePaths(dir);
|
|
483
|
+
const raw = await readFile(join(runningJobsDir, "a.json"), "utf-8");
|
|
484
|
+
const lease = JSON.parse(raw) as JobSlotLease;
|
|
485
|
+
expect(lease.pid).toBe(12345);
|
|
486
|
+
expect(lease.jobId).toBe("a");
|
|
487
|
+
} finally {
|
|
488
|
+
await rm(dir, { recursive: true });
|
|
489
|
+
}
|
|
490
|
+
});
|
|
491
|
+
|
|
492
|
+
test("throws when no lease exists for the jobId", async () => {
|
|
493
|
+
const dir = await setupJobsDir("update-pid-missing-");
|
|
494
|
+
try {
|
|
495
|
+
await expect(updateJobSlotPid(dir, "missing", 1)).rejects.toThrow(
|
|
496
|
+
/no lease found for job missing/,
|
|
497
|
+
);
|
|
498
|
+
} finally {
|
|
499
|
+
await rm(dir, { recursive: true });
|
|
500
|
+
}
|
|
501
|
+
});
|
|
502
|
+
});
|
|
503
|
+
|
|
504
|
+
describe("releaseJobSlot", () => {
|
|
505
|
+
test("is idempotent when called on a non-existent lease", async () => {
|
|
506
|
+
const dir = await setupJobsDir("release-idempotent-");
|
|
507
|
+
try {
|
|
508
|
+
await releaseJobSlot(dir, "ghost");
|
|
509
|
+
await releaseJobSlot(dir, "ghost");
|
|
510
|
+
} finally {
|
|
511
|
+
await rm(dir, { recursive: true });
|
|
512
|
+
}
|
|
513
|
+
});
|
|
514
|
+
});
|
|
515
|
+
|
|
516
|
+
describe("getJobConcurrencyStatus", () => {
|
|
517
|
+
test("returns counts including queued seeds and stale slots", async () => {
|
|
518
|
+
const dir = await setupJobsDir("status-");
|
|
519
|
+
const { runningJobsDir } = getConcurrencyRuntimePaths(dir);
|
|
520
|
+
const pendingDir = join(dir, "pending");
|
|
521
|
+
await mkdir(pendingDir, { recursive: true });
|
|
522
|
+
try {
|
|
523
|
+
await makeCurrent(dir, "live");
|
|
524
|
+
await tryAcquireJobSlot({
|
|
525
|
+
dataDir: dir,
|
|
526
|
+
jobId: "live",
|
|
527
|
+
maxConcurrentJobs: 5,
|
|
528
|
+
source: "orchestrator",
|
|
529
|
+
pid: process.pid,
|
|
530
|
+
});
|
|
531
|
+
await writeLease(runningJobsDir, "broken", "not json {");
|
|
532
|
+
await writeSeed(pendingDir, "queued-1", { name: "n", pipeline: "p" }, 1700000000);
|
|
533
|
+
await writeSeed(pendingDir, "queued-2", { name: "m", pipeline: "p" }, 1700000100);
|
|
534
|
+
|
|
535
|
+
const status = await getJobConcurrencyStatus(dir, 5, 1000);
|
|
536
|
+
expect(status.limit).toBe(5);
|
|
537
|
+
expect(status.runningCount).toBe(1);
|
|
538
|
+
expect(status.availableSlots).toBe(4);
|
|
539
|
+
expect(status.queuedCount).toBe(2);
|
|
540
|
+
expect(status.activeJobs.map((l) => l.jobId)).toEqual(["live"]);
|
|
541
|
+
expect(status.queuedJobs.map((q) => q.jobId)).toEqual(["queued-1", "queued-2"]);
|
|
542
|
+
expect(status.staleSlots).toEqual([
|
|
543
|
+
{
|
|
544
|
+
jobId: "broken",
|
|
545
|
+
slotPath: join(runningJobsDir, "broken.json"),
|
|
546
|
+
reason: "invalid_json",
|
|
547
|
+
},
|
|
548
|
+
]);
|
|
549
|
+
expect(existsSync(join(runningJobsDir, "broken.json"))).toBe(false);
|
|
550
|
+
} finally {
|
|
551
|
+
await rm(dir, { recursive: true });
|
|
552
|
+
}
|
|
553
|
+
});
|
|
554
|
+
});
|