@donkeylabs/server 0.4.8 → 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/docs/external-jobs.md +131 -11
- package/docs/workflows.md +150 -56
- package/examples/external-jobs/python/donkeylabs_job.py +366 -0
- package/examples/external-jobs/shell/donkeylabs-job.sh +264 -0
- package/examples/external-jobs/shell/example-job.sh +47 -0
- package/package.json +2 -1
- package/src/client/base.ts +6 -4
- package/src/core/external-job-socket.ts +142 -21
- package/src/core/index.ts +5 -0
- package/src/core/job-adapter-sqlite.ts +287 -0
- package/src/core/jobs.ts +36 -3
- package/src/core/workflows.ts +202 -49
- package/src/core.ts +73 -4
- package/src/index.ts +12 -0
package/src/client/base.ts
CHANGED
|
@@ -248,19 +248,21 @@ export class ApiClientBase<TEvents extends Record<string, any> = Record<string,
|
|
|
248
248
|
}
|
|
249
249
|
|
|
250
250
|
/**
|
|
251
|
-
* Make a raw request (for non-JSON endpoints)
|
|
251
|
+
* Make a raw request (for non-JSON endpoints like streaming)
|
|
252
252
|
*/
|
|
253
253
|
protected async rawRequest(
|
|
254
254
|
route: string,
|
|
255
|
-
init
|
|
255
|
+
init?: RequestInit
|
|
256
256
|
): Promise<Response> {
|
|
257
257
|
const fetchFn = this.options.fetch || fetch;
|
|
258
|
+
const requestInit = init ?? {};
|
|
258
259
|
|
|
259
260
|
return fetchFn(`${this.baseUrl}/${route}`, {
|
|
260
|
-
|
|
261
|
+
method: "POST",
|
|
262
|
+
...requestInit,
|
|
261
263
|
headers: {
|
|
262
264
|
...this.options.headers,
|
|
263
|
-
...
|
|
265
|
+
...requestInit.headers,
|
|
264
266
|
},
|
|
265
267
|
credentials: this.options.credentials,
|
|
266
268
|
});
|
|
@@ -34,12 +34,18 @@ export interface SocketServerOptions {
|
|
|
34
34
|
export interface ExternalJobSocketServer {
|
|
35
35
|
/** Create a new socket for a job (returns socket path or TCP port) */
|
|
36
36
|
createSocket(jobId: string): Promise<{ socketPath?: string; tcpPort?: number }>;
|
|
37
|
-
/** Close a specific job's socket */
|
|
37
|
+
/** Close a specific job's socket and release reservations */
|
|
38
38
|
closeSocket(jobId: string): Promise<void>;
|
|
39
39
|
/** Get all active job connections */
|
|
40
40
|
getActiveConnections(): string[];
|
|
41
41
|
/** Attempt to reconnect to an existing socket */
|
|
42
42
|
reconnect(jobId: string, socketPath?: string, tcpPort?: number): Promise<boolean>;
|
|
43
|
+
/** Reserve a socket path/port for an orphaned job (prevents reuse until released) */
|
|
44
|
+
reserve(jobId: string, socketPath?: string, tcpPort?: number): void;
|
|
45
|
+
/** Release reservation for a job (called when job is cleaned up) */
|
|
46
|
+
release(jobId: string): void;
|
|
47
|
+
/** Check if a socket path or port is reserved */
|
|
48
|
+
isReserved(socketPath?: string, tcpPort?: number): boolean;
|
|
43
49
|
/** Shutdown all sockets and cleanup */
|
|
44
50
|
shutdown(): Promise<void>;
|
|
45
51
|
/** Clean orphaned socket files from a previous run */
|
|
@@ -68,6 +74,14 @@ export class ExternalJobSocketServerImpl implements ExternalJobSocketServer {
|
|
|
68
74
|
private tcpPorts = new Map<string, number>();
|
|
69
75
|
// Track used TCP ports
|
|
70
76
|
private usedPorts = new Set<number>();
|
|
77
|
+
// Track reserved socket paths (for jobs that might reconnect)
|
|
78
|
+
private reservedSocketPaths = new Set<string>();
|
|
79
|
+
// Track reserved TCP ports (for jobs that might reconnect)
|
|
80
|
+
private reservedTcpPorts = new Set<number>();
|
|
81
|
+
// Map jobId -> reserved socket path (for release by jobId)
|
|
82
|
+
private jobReservedSocketPath = new Map<string, string>();
|
|
83
|
+
// Map jobId -> reserved TCP port (for release by jobId)
|
|
84
|
+
private jobReservedTcpPort = new Map<string, number>();
|
|
71
85
|
|
|
72
86
|
private isWindows = process.platform === "win32";
|
|
73
87
|
|
|
@@ -96,6 +110,11 @@ export class ExternalJobSocketServerImpl implements ExternalJobSocketServer {
|
|
|
96
110
|
private async createUnixServer(jobId: string): Promise<{ socketPath: string }> {
|
|
97
111
|
const socketPath = join(this.socketDir, `job_${jobId}.sock`);
|
|
98
112
|
|
|
113
|
+
// Check if this socket path is reserved by another job
|
|
114
|
+
if (this.reservedSocketPaths.has(socketPath) && !this.jobReservedSocketPath.has(jobId)) {
|
|
115
|
+
throw new Error(`Socket path ${socketPath} is reserved by another job`);
|
|
116
|
+
}
|
|
117
|
+
|
|
99
118
|
// Remove existing socket file if it exists
|
|
100
119
|
if (existsSync(socketPath)) {
|
|
101
120
|
await unlink(socketPath);
|
|
@@ -148,12 +167,14 @@ export class ExternalJobSocketServerImpl implements ExternalJobSocketServer {
|
|
|
148
167
|
// Try random ports within range
|
|
149
168
|
for (let i = 0; i < 100; i++) {
|
|
150
169
|
const port = minPort + Math.floor(Math.random() * (maxPort - minPort));
|
|
151
|
-
if
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
170
|
+
// Skip if port is already in use or reserved by another job
|
|
171
|
+
if (this.usedPorts.has(port) || this.reservedTcpPorts.has(port)) {
|
|
172
|
+
continue;
|
|
173
|
+
}
|
|
174
|
+
// Check if port is actually available
|
|
175
|
+
const isAvailable = await this.checkPortAvailable(port);
|
|
176
|
+
if (isAvailable) {
|
|
177
|
+
return port;
|
|
157
178
|
}
|
|
158
179
|
}
|
|
159
180
|
|
|
@@ -241,12 +262,62 @@ export class ExternalJobSocketServerImpl implements ExternalJobSocketServer {
|
|
|
241
262
|
this.usedPorts.delete(port);
|
|
242
263
|
this.tcpPorts.delete(jobId);
|
|
243
264
|
}
|
|
265
|
+
|
|
266
|
+
// Release any reservations for this job
|
|
267
|
+
this.release(jobId);
|
|
244
268
|
}
|
|
245
269
|
|
|
246
270
|
getActiveConnections(): string[] {
|
|
247
271
|
return Array.from(this.clientSockets.keys());
|
|
248
272
|
}
|
|
249
273
|
|
|
274
|
+
reserve(jobId: string, socketPath?: string, tcpPort?: number): void {
|
|
275
|
+
if (socketPath) {
|
|
276
|
+
this.reservedSocketPaths.add(socketPath);
|
|
277
|
+
this.jobReservedSocketPath.set(jobId, socketPath);
|
|
278
|
+
}
|
|
279
|
+
if (tcpPort) {
|
|
280
|
+
this.reservedTcpPorts.add(tcpPort);
|
|
281
|
+
this.jobReservedTcpPort.set(jobId, tcpPort);
|
|
282
|
+
}
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
release(jobId: string): void {
|
|
286
|
+
// Release socket path reservation
|
|
287
|
+
const socketPath = this.jobReservedSocketPath.get(jobId);
|
|
288
|
+
if (socketPath) {
|
|
289
|
+
this.reservedSocketPaths.delete(socketPath);
|
|
290
|
+
this.jobReservedSocketPath.delete(jobId);
|
|
291
|
+
}
|
|
292
|
+
// Also check socketPaths map (for active jobs)
|
|
293
|
+
const activeSocketPath = this.socketPaths.get(jobId);
|
|
294
|
+
if (activeSocketPath) {
|
|
295
|
+
this.reservedSocketPaths.delete(activeSocketPath);
|
|
296
|
+
}
|
|
297
|
+
|
|
298
|
+
// Release TCP port reservation
|
|
299
|
+
const tcpPort = this.jobReservedTcpPort.get(jobId);
|
|
300
|
+
if (tcpPort) {
|
|
301
|
+
this.reservedTcpPorts.delete(tcpPort);
|
|
302
|
+
this.jobReservedTcpPort.delete(jobId);
|
|
303
|
+
}
|
|
304
|
+
// Also check tcpPorts map (for active jobs)
|
|
305
|
+
const activeTcpPort = this.tcpPorts.get(jobId);
|
|
306
|
+
if (activeTcpPort) {
|
|
307
|
+
this.reservedTcpPorts.delete(activeTcpPort);
|
|
308
|
+
}
|
|
309
|
+
}
|
|
310
|
+
|
|
311
|
+
isReserved(socketPath?: string, tcpPort?: number): boolean {
|
|
312
|
+
if (socketPath && this.reservedSocketPaths.has(socketPath)) {
|
|
313
|
+
return true;
|
|
314
|
+
}
|
|
315
|
+
if (tcpPort && this.reservedTcpPorts.has(tcpPort)) {
|
|
316
|
+
return true;
|
|
317
|
+
}
|
|
318
|
+
return false;
|
|
319
|
+
}
|
|
320
|
+
|
|
250
321
|
async reconnect(
|
|
251
322
|
jobId: string,
|
|
252
323
|
socketPath?: string,
|
|
@@ -257,19 +328,67 @@ export class ExternalJobSocketServerImpl implements ExternalJobSocketServer {
|
|
|
257
328
|
return true;
|
|
258
329
|
}
|
|
259
330
|
|
|
260
|
-
// For Unix sockets,
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
331
|
+
// For Unix sockets, recreate the server on the same path
|
|
332
|
+
// The external process should be retrying to connect
|
|
333
|
+
if (socketPath && !this.isWindows) {
|
|
334
|
+
try {
|
|
335
|
+
// Remove old socket file if it exists
|
|
336
|
+
if (existsSync(socketPath)) {
|
|
337
|
+
await unlink(socketPath);
|
|
338
|
+
}
|
|
339
|
+
|
|
340
|
+
// Create new server on the same path
|
|
341
|
+
return new Promise((resolve) => {
|
|
342
|
+
const server = createNetServer((socket) => {
|
|
343
|
+
this.handleConnection(jobId, socket);
|
|
344
|
+
});
|
|
345
|
+
|
|
346
|
+
server.on("error", (err) => {
|
|
347
|
+
this.onError?.(err, jobId);
|
|
348
|
+
resolve(false);
|
|
349
|
+
});
|
|
350
|
+
|
|
351
|
+
server.listen(socketPath, () => {
|
|
352
|
+
this.servers.set(jobId, server);
|
|
353
|
+
this.socketPaths.set(jobId, socketPath);
|
|
354
|
+
console.log(`[SocketServer] Recreated socket for job ${jobId} at ${socketPath}`);
|
|
355
|
+
// Return true - the server is ready, external process should reconnect
|
|
356
|
+
resolve(true);
|
|
357
|
+
});
|
|
358
|
+
});
|
|
359
|
+
} catch (err) {
|
|
360
|
+
this.onError?.(err as Error, jobId);
|
|
361
|
+
return false;
|
|
362
|
+
}
|
|
363
|
+
}
|
|
364
|
+
|
|
365
|
+
// For TCP, recreate the server on the same port
|
|
366
|
+
if (tcpPort && this.isWindows) {
|
|
367
|
+
try {
|
|
368
|
+
return new Promise((resolve) => {
|
|
369
|
+
const server = createNetServer((socket) => {
|
|
370
|
+
this.handleConnection(jobId, socket);
|
|
371
|
+
});
|
|
372
|
+
|
|
373
|
+
server.on("error", (err) => {
|
|
374
|
+
this.onError?.(err, jobId);
|
|
375
|
+
resolve(false);
|
|
376
|
+
});
|
|
377
|
+
|
|
378
|
+
server.listen(tcpPort, "127.0.0.1", () => {
|
|
379
|
+
this.servers.set(jobId, server);
|
|
380
|
+
this.tcpPorts.set(jobId, tcpPort);
|
|
381
|
+
this.usedPorts.add(tcpPort);
|
|
382
|
+
console.log(`[SocketServer] Recreated TCP server for job ${jobId} on port ${tcpPort}`);
|
|
383
|
+
resolve(true);
|
|
384
|
+
});
|
|
385
|
+
});
|
|
386
|
+
} catch (err) {
|
|
387
|
+
this.onError?.(err as Error, jobId);
|
|
388
|
+
return false;
|
|
389
|
+
}
|
|
270
390
|
}
|
|
271
391
|
|
|
272
|
-
// For TCP, we can't easily reconnect without the process knowing
|
|
273
392
|
return false;
|
|
274
393
|
}
|
|
275
394
|
|
|
@@ -319,9 +438,11 @@ export class ExternalJobSocketServerImpl implements ExternalJobSocketServer {
|
|
|
319
438
|
const match = file.match(/^job_(.+)\.sock$/);
|
|
320
439
|
if (match) {
|
|
321
440
|
const jobId = match[1]!;
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
441
|
+
const socketPath = join(this.socketDir, file);
|
|
442
|
+
|
|
443
|
+
// Don't clean if job is active or socket path is reserved
|
|
444
|
+
if (!activeJobIds.has(jobId) && !this.reservedSocketPaths.has(socketPath)) {
|
|
445
|
+
// This socket file doesn't correspond to any active job and isn't reserved
|
|
325
446
|
await unlink(socketPath).catch(() => {});
|
|
326
447
|
}
|
|
327
448
|
}
|
package/src/core/index.ts
CHANGED
|
@@ -0,0 +1,287 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Built-in SQLite Job Adapter
|
|
3
|
+
*
|
|
4
|
+
* Provides automatic persistence for jobs, enabling server restart resilience
|
|
5
|
+
* for external jobs without requiring user configuration.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import { Database } from "bun:sqlite";
|
|
9
|
+
import { mkdir } from "node:fs/promises";
|
|
10
|
+
import { dirname } from "node:path";
|
|
11
|
+
import type { Job, JobAdapter, JobStatus } from "./jobs";
|
|
12
|
+
import type { ExternalJobProcessState } from "./external-jobs";
|
|
13
|
+
|
|
14
|
+
export interface SqliteJobAdapterConfig {
|
|
15
|
+
/** Path to SQLite database file (default: .donkeylabs/jobs.db) */
|
|
16
|
+
path?: string;
|
|
17
|
+
/** Auto-cleanup completed jobs older than N days (default: 7, 0 to disable) */
|
|
18
|
+
cleanupDays?: number;
|
|
19
|
+
/** Cleanup interval in ms (default: 3600000 = 1 hour) */
|
|
20
|
+
cleanupInterval?: number;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
export class SqliteJobAdapter implements JobAdapter {
|
|
24
|
+
private db: Database;
|
|
25
|
+
private initialized = false;
|
|
26
|
+
private cleanupTimer?: ReturnType<typeof setInterval>;
|
|
27
|
+
private cleanupDays: number;
|
|
28
|
+
|
|
29
|
+
constructor(config: SqliteJobAdapterConfig = {}) {
|
|
30
|
+
const dbPath = config.path ?? ".donkeylabs/jobs.db";
|
|
31
|
+
this.cleanupDays = config.cleanupDays ?? 7;
|
|
32
|
+
|
|
33
|
+
// Ensure directory exists
|
|
34
|
+
this.ensureDir(dbPath);
|
|
35
|
+
|
|
36
|
+
this.db = new Database(dbPath);
|
|
37
|
+
this.init();
|
|
38
|
+
|
|
39
|
+
// Start cleanup timer
|
|
40
|
+
if (this.cleanupDays > 0) {
|
|
41
|
+
const interval = config.cleanupInterval ?? 3600000; // 1 hour
|
|
42
|
+
this.cleanupTimer = setInterval(() => this.cleanup(), interval);
|
|
43
|
+
// Run cleanup on startup
|
|
44
|
+
this.cleanup();
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
private ensureDir(dbPath: string): void {
|
|
49
|
+
const dir = dirname(dbPath);
|
|
50
|
+
if (dir && dir !== ".") {
|
|
51
|
+
// Sync mkdir for constructor
|
|
52
|
+
try {
|
|
53
|
+
Bun.spawnSync(["mkdir", "-p", dir]);
|
|
54
|
+
} catch {
|
|
55
|
+
// Directory may already exist
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
private init(): void {
|
|
61
|
+
if (this.initialized) return;
|
|
62
|
+
|
|
63
|
+
this.db.run(`
|
|
64
|
+
CREATE TABLE IF NOT EXISTS jobs (
|
|
65
|
+
id TEXT PRIMARY KEY,
|
|
66
|
+
name TEXT NOT NULL,
|
|
67
|
+
data TEXT NOT NULL,
|
|
68
|
+
status TEXT NOT NULL DEFAULT 'pending',
|
|
69
|
+
created_at TEXT NOT NULL,
|
|
70
|
+
run_at TEXT,
|
|
71
|
+
started_at TEXT,
|
|
72
|
+
completed_at TEXT,
|
|
73
|
+
result TEXT,
|
|
74
|
+
error TEXT,
|
|
75
|
+
attempts INTEGER NOT NULL DEFAULT 0,
|
|
76
|
+
max_attempts INTEGER NOT NULL DEFAULT 3,
|
|
77
|
+
-- External job fields
|
|
78
|
+
external INTEGER DEFAULT 0,
|
|
79
|
+
pid INTEGER,
|
|
80
|
+
socket_path TEXT,
|
|
81
|
+
tcp_port INTEGER,
|
|
82
|
+
last_heartbeat TEXT,
|
|
83
|
+
process_state TEXT
|
|
84
|
+
)
|
|
85
|
+
`);
|
|
86
|
+
|
|
87
|
+
// Indexes for efficient queries
|
|
88
|
+
this.db.run(`CREATE INDEX IF NOT EXISTS idx_jobs_status ON jobs(status)`);
|
|
89
|
+
this.db.run(`CREATE INDEX IF NOT EXISTS idx_jobs_name ON jobs(name)`);
|
|
90
|
+
this.db.run(`CREATE INDEX IF NOT EXISTS idx_jobs_external ON jobs(external, status)`);
|
|
91
|
+
this.db.run(`CREATE INDEX IF NOT EXISTS idx_jobs_scheduled ON jobs(status, run_at)`);
|
|
92
|
+
|
|
93
|
+
this.initialized = true;
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
async create(job: Omit<Job, "id">): Promise<Job> {
|
|
97
|
+
const id = `job_${Date.now()}_${Math.random().toString(36).slice(2, 9)}`;
|
|
98
|
+
|
|
99
|
+
this.db.run(
|
|
100
|
+
`INSERT INTO jobs (
|
|
101
|
+
id, name, data, status, created_at, run_at, attempts, max_attempts,
|
|
102
|
+
external, process_state
|
|
103
|
+
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
|
|
104
|
+
[
|
|
105
|
+
id,
|
|
106
|
+
job.name,
|
|
107
|
+
JSON.stringify(job.data),
|
|
108
|
+
job.status,
|
|
109
|
+
job.createdAt.toISOString(),
|
|
110
|
+
job.runAt?.toISOString() ?? null,
|
|
111
|
+
job.attempts,
|
|
112
|
+
job.maxAttempts,
|
|
113
|
+
job.external ? 1 : 0,
|
|
114
|
+
job.processState ?? null,
|
|
115
|
+
]
|
|
116
|
+
);
|
|
117
|
+
|
|
118
|
+
return { ...job, id };
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
async get(jobId: string): Promise<Job | null> {
|
|
122
|
+
const row = this.db.query(`SELECT * FROM jobs WHERE id = ?`).get(jobId) as any;
|
|
123
|
+
if (!row) return null;
|
|
124
|
+
return this.rowToJob(row);
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
async update(jobId: string, updates: Partial<Job>): Promise<void> {
|
|
128
|
+
const sets: string[] = [];
|
|
129
|
+
const values: any[] = [];
|
|
130
|
+
|
|
131
|
+
if (updates.status !== undefined) {
|
|
132
|
+
sets.push("status = ?");
|
|
133
|
+
values.push(updates.status);
|
|
134
|
+
}
|
|
135
|
+
if (updates.startedAt !== undefined) {
|
|
136
|
+
sets.push("started_at = ?");
|
|
137
|
+
values.push(updates.startedAt?.toISOString() ?? null);
|
|
138
|
+
}
|
|
139
|
+
if (updates.completedAt !== undefined) {
|
|
140
|
+
sets.push("completed_at = ?");
|
|
141
|
+
values.push(updates.completedAt?.toISOString() ?? null);
|
|
142
|
+
}
|
|
143
|
+
if (updates.result !== undefined) {
|
|
144
|
+
sets.push("result = ?");
|
|
145
|
+
values.push(JSON.stringify(updates.result));
|
|
146
|
+
}
|
|
147
|
+
if (updates.error !== undefined) {
|
|
148
|
+
sets.push("error = ?");
|
|
149
|
+
values.push(updates.error);
|
|
150
|
+
}
|
|
151
|
+
if (updates.attempts !== undefined) {
|
|
152
|
+
sets.push("attempts = ?");
|
|
153
|
+
values.push(updates.attempts);
|
|
154
|
+
}
|
|
155
|
+
// External job fields
|
|
156
|
+
if (updates.pid !== undefined) {
|
|
157
|
+
sets.push("pid = ?");
|
|
158
|
+
values.push(updates.pid);
|
|
159
|
+
}
|
|
160
|
+
if (updates.socketPath !== undefined) {
|
|
161
|
+
sets.push("socket_path = ?");
|
|
162
|
+
values.push(updates.socketPath);
|
|
163
|
+
}
|
|
164
|
+
if (updates.tcpPort !== undefined) {
|
|
165
|
+
sets.push("tcp_port = ?");
|
|
166
|
+
values.push(updates.tcpPort);
|
|
167
|
+
}
|
|
168
|
+
if (updates.lastHeartbeat !== undefined) {
|
|
169
|
+
sets.push("last_heartbeat = ?");
|
|
170
|
+
values.push(updates.lastHeartbeat?.toISOString() ?? null);
|
|
171
|
+
}
|
|
172
|
+
if (updates.processState !== undefined) {
|
|
173
|
+
sets.push("process_state = ?");
|
|
174
|
+
values.push(updates.processState);
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
if (sets.length === 0) return;
|
|
178
|
+
|
|
179
|
+
values.push(jobId);
|
|
180
|
+
this.db.run(`UPDATE jobs SET ${sets.join(", ")} WHERE id = ?`, values);
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
async delete(jobId: string): Promise<boolean> {
|
|
184
|
+
const result = this.db.run(`DELETE FROM jobs WHERE id = ?`, [jobId]);
|
|
185
|
+
return result.changes > 0;
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
async getPending(limit: number = 100): Promise<Job[]> {
|
|
189
|
+
const rows = this.db
|
|
190
|
+
.query(`SELECT * FROM jobs WHERE status = 'pending' ORDER BY created_at LIMIT ?`)
|
|
191
|
+
.all(limit) as any[];
|
|
192
|
+
return rows.map((r) => this.rowToJob(r));
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
async getScheduledReady(now: Date): Promise<Job[]> {
|
|
196
|
+
const rows = this.db
|
|
197
|
+
.query(`SELECT * FROM jobs WHERE status = 'scheduled' AND run_at <= ? ORDER BY run_at`)
|
|
198
|
+
.all(now.toISOString()) as any[];
|
|
199
|
+
return rows.map((r) => this.rowToJob(r));
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
async getByName(name: string, status?: JobStatus): Promise<Job[]> {
|
|
203
|
+
let query = `SELECT * FROM jobs WHERE name = ?`;
|
|
204
|
+
const params: any[] = [name];
|
|
205
|
+
|
|
206
|
+
if (status) {
|
|
207
|
+
query += ` AND status = ?`;
|
|
208
|
+
params.push(status);
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
query += ` ORDER BY created_at DESC`;
|
|
212
|
+
|
|
213
|
+
const rows = this.db.query(query).all(...params) as any[];
|
|
214
|
+
return rows.map((r) => this.rowToJob(r));
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
async getRunningExternal(): Promise<Job[]> {
|
|
218
|
+
const rows = this.db
|
|
219
|
+
.query(`SELECT * FROM jobs WHERE external = 1 AND status = 'running'`)
|
|
220
|
+
.all() as any[];
|
|
221
|
+
return rows.map((r) => this.rowToJob(r));
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
async getOrphanedExternal(): Promise<Job[]> {
|
|
225
|
+
// Get external jobs that were running when server died
|
|
226
|
+
const rows = this.db
|
|
227
|
+
.query(
|
|
228
|
+
`SELECT * FROM jobs WHERE external = 1 AND status = 'running'
|
|
229
|
+
AND (process_state = 'running' OR process_state = 'orphaned' OR process_state = 'spawning')`
|
|
230
|
+
)
|
|
231
|
+
.all() as any[];
|
|
232
|
+
return rows.map((r) => this.rowToJob(r));
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
private rowToJob(row: any): Job {
|
|
236
|
+
return {
|
|
237
|
+
id: row.id,
|
|
238
|
+
name: row.name,
|
|
239
|
+
data: JSON.parse(row.data),
|
|
240
|
+
status: row.status as JobStatus,
|
|
241
|
+
createdAt: new Date(row.created_at),
|
|
242
|
+
runAt: row.run_at ? new Date(row.run_at) : undefined,
|
|
243
|
+
startedAt: row.started_at ? new Date(row.started_at) : undefined,
|
|
244
|
+
completedAt: row.completed_at ? new Date(row.completed_at) : undefined,
|
|
245
|
+
result: row.result ? JSON.parse(row.result) : undefined,
|
|
246
|
+
error: row.error ?? undefined,
|
|
247
|
+
attempts: row.attempts,
|
|
248
|
+
maxAttempts: row.max_attempts,
|
|
249
|
+
// External job fields
|
|
250
|
+
external: row.external === 1 ? true : undefined,
|
|
251
|
+
pid: row.pid ?? undefined,
|
|
252
|
+
socketPath: row.socket_path ?? undefined,
|
|
253
|
+
tcpPort: row.tcp_port ?? undefined,
|
|
254
|
+
lastHeartbeat: row.last_heartbeat ? new Date(row.last_heartbeat) : undefined,
|
|
255
|
+
processState: row.process_state as ExternalJobProcessState | undefined,
|
|
256
|
+
};
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
/** Clean up old completed/failed jobs */
|
|
260
|
+
private cleanup(): void {
|
|
261
|
+
if (this.cleanupDays <= 0) return;
|
|
262
|
+
|
|
263
|
+
try {
|
|
264
|
+
const cutoff = new Date();
|
|
265
|
+
cutoff.setDate(cutoff.getDate() - this.cleanupDays);
|
|
266
|
+
|
|
267
|
+
const result = this.db.run(
|
|
268
|
+
`DELETE FROM jobs WHERE (status = 'completed' OR status = 'failed') AND completed_at < ?`,
|
|
269
|
+
[cutoff.toISOString()]
|
|
270
|
+
);
|
|
271
|
+
|
|
272
|
+
if (result.changes > 0) {
|
|
273
|
+
console.log(`[Jobs] Cleaned up ${result.changes} old jobs`);
|
|
274
|
+
}
|
|
275
|
+
} catch (err) {
|
|
276
|
+
console.error("[Jobs] Cleanup error:", err);
|
|
277
|
+
}
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
/** Stop the adapter and cleanup timer */
|
|
281
|
+
stop(): void {
|
|
282
|
+
if (this.cleanupTimer) {
|
|
283
|
+
clearInterval(this.cleanupTimer);
|
|
284
|
+
this.cleanupTimer = undefined;
|
|
285
|
+
}
|
|
286
|
+
}
|
|
287
|
+
}
|
package/src/core/jobs.ts
CHANGED
|
@@ -25,6 +25,7 @@ import {
|
|
|
25
25
|
createExternalJobSocketServer,
|
|
26
26
|
type ExternalJobSocketServer,
|
|
27
27
|
} from "./external-job-socket";
|
|
28
|
+
import { SqliteJobAdapter } from "./job-adapter-sqlite";
|
|
28
29
|
|
|
29
30
|
export type JobStatus = "pending" | "running" | "completed" | "failed" | "scheduled";
|
|
30
31
|
|
|
@@ -82,6 +83,13 @@ export interface JobsConfig {
|
|
|
82
83
|
maxAttempts?: number; // Default retry attempts, default 3
|
|
83
84
|
/** External jobs configuration */
|
|
84
85
|
external?: ExternalJobsConfig;
|
|
86
|
+
/**
|
|
87
|
+
* Use SQLite for persistence (default: true when external jobs are used)
|
|
88
|
+
* Set to false to use MemoryJobAdapter (not recommended for production)
|
|
89
|
+
*/
|
|
90
|
+
persist?: boolean;
|
|
91
|
+
/** SQLite database path (default: .donkeylabs/jobs.db) */
|
|
92
|
+
dbPath?: string;
|
|
85
93
|
}
|
|
86
94
|
|
|
87
95
|
export interface Jobs {
|
|
@@ -188,6 +196,7 @@ export class MemoryJobAdapter implements JobAdapter {
|
|
|
188
196
|
|
|
189
197
|
class JobsImpl implements Jobs {
|
|
190
198
|
private adapter: JobAdapter;
|
|
199
|
+
private sqliteAdapter?: SqliteJobAdapter;
|
|
191
200
|
private events?: Events;
|
|
192
201
|
private handlers = new Map<string, JobHandler>();
|
|
193
202
|
private running = false;
|
|
@@ -197,6 +206,8 @@ class JobsImpl implements Jobs {
|
|
|
197
206
|
private concurrency: number;
|
|
198
207
|
private pollInterval: number;
|
|
199
208
|
private defaultMaxAttempts: number;
|
|
209
|
+
private usePersistence: boolean;
|
|
210
|
+
private dbPath?: string;
|
|
200
211
|
|
|
201
212
|
// External jobs support
|
|
202
213
|
private externalConfigs = new Map<string, ExternalJobConfig>();
|
|
@@ -205,12 +216,23 @@ class JobsImpl implements Jobs {
|
|
|
205
216
|
private externalProcesses = new Map<string, { pid: number; timeout?: ReturnType<typeof setTimeout> }>();
|
|
206
217
|
|
|
207
218
|
constructor(config: JobsConfig = {}) {
|
|
208
|
-
this.adapter = config.adapter ?? new MemoryJobAdapter();
|
|
209
219
|
this.events = config.events;
|
|
210
220
|
this.concurrency = config.concurrency ?? 5;
|
|
211
221
|
this.pollInterval = config.pollInterval ?? 1000;
|
|
212
222
|
this.defaultMaxAttempts = config.maxAttempts ?? 3;
|
|
213
223
|
this.externalConfig = config.external ?? {};
|
|
224
|
+
this.usePersistence = config.persist ?? true; // Default to SQLite persistence
|
|
225
|
+
this.dbPath = config.dbPath;
|
|
226
|
+
|
|
227
|
+
// Use provided adapter, or create SQLite adapter if persistence enabled
|
|
228
|
+
if (config.adapter) {
|
|
229
|
+
this.adapter = config.adapter;
|
|
230
|
+
} else if (this.usePersistence) {
|
|
231
|
+
this.sqliteAdapter = new SqliteJobAdapter({ path: this.dbPath });
|
|
232
|
+
this.adapter = this.sqliteAdapter;
|
|
233
|
+
} else {
|
|
234
|
+
this.adapter = new MemoryJobAdapter();
|
|
235
|
+
}
|
|
214
236
|
}
|
|
215
237
|
|
|
216
238
|
register<T = any, R = any>(name: string, handler: JobHandler<T, R>): void {
|
|
@@ -362,6 +384,11 @@ class JobsImpl implements Jobs {
|
|
|
362
384
|
this.socketServer = null;
|
|
363
385
|
}
|
|
364
386
|
|
|
387
|
+
// Stop SQLite adapter cleanup timer
|
|
388
|
+
if (this.sqliteAdapter) {
|
|
389
|
+
this.sqliteAdapter.stop();
|
|
390
|
+
}
|
|
391
|
+
|
|
365
392
|
// Wait for active in-process jobs to complete (with timeout)
|
|
366
393
|
const maxWait = 30000; // 30 seconds
|
|
367
394
|
const startTime = Date.now();
|
|
@@ -476,6 +503,9 @@ class JobsImpl implements Jobs {
|
|
|
476
503
|
console.log(`[Jobs] Found orphaned job ${job.id} with PID ${job.pid}, attempting reconnect`);
|
|
477
504
|
activeJobIds.add(job.id);
|
|
478
505
|
|
|
506
|
+
// Reserve the socket path/port to prevent new jobs from using it
|
|
507
|
+
this.socketServer?.reserve(job.id, job.socketPath, job.tcpPort);
|
|
508
|
+
|
|
479
509
|
// Try to reconnect to the socket
|
|
480
510
|
const reconnected = await this.socketServer?.reconnect(
|
|
481
511
|
job.id,
|
|
@@ -496,7 +526,7 @@ class JobsImpl implements Jobs {
|
|
|
496
526
|
});
|
|
497
527
|
}
|
|
498
528
|
} else {
|
|
499
|
-
// Mark as orphaned, but keep tracking
|
|
529
|
+
// Mark as orphaned, but keep tracking (reservation remains)
|
|
500
530
|
await this.adapter.update(job.id, { processState: "orphaned" });
|
|
501
531
|
|
|
502
532
|
if (this.events) {
|
|
@@ -507,7 +537,7 @@ class JobsImpl implements Jobs {
|
|
|
507
537
|
}
|
|
508
538
|
}
|
|
509
539
|
} else {
|
|
510
|
-
// Process is dead, mark job as failed
|
|
540
|
+
// Process is dead, mark job as failed and release any reservations
|
|
511
541
|
console.log(`[Jobs] Orphaned job ${job.id} process (PID ${job.pid}) is dead`);
|
|
512
542
|
await this.adapter.update(job.id, {
|
|
513
543
|
status: "failed",
|
|
@@ -515,6 +545,9 @@ class JobsImpl implements Jobs {
|
|
|
515
545
|
completedAt: new Date(),
|
|
516
546
|
});
|
|
517
547
|
|
|
548
|
+
// Release reservation since the job is done
|
|
549
|
+
this.socketServer?.release(job.id);
|
|
550
|
+
|
|
518
551
|
if (this.events) {
|
|
519
552
|
await this.events.emit("job.failed", {
|
|
520
553
|
jobId: job.id,
|