@donkeylabs/server 0.4.7 → 0.4.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,356 @@
1
+ // External Job Socket Server
2
+ // Handles bidirectional communication with external job processes via Unix sockets (or TCP on Windows)
3
+
4
+ import { mkdir, rm, readdir, unlink } from "node:fs/promises";
5
+ import { existsSync } from "node:fs";
6
+ import { join } from "node:path";
7
+ import type { Server as NetServer, Socket } from "node:net";
8
+ import { createServer as createNetServer } from "node:net";
9
+ import type {
10
+ AnyExternalJobMessage,
11
+ ExternalJobsConfig,
12
+ } from "./external-jobs";
13
+ import { parseJobMessage } from "./external-jobs";
14
+
15
+ // ============================================
16
+ // Types
17
+ // ============================================
18
+
19
+ export interface SocketServerOptions {
20
+ /** Directory for Unix sockets */
21
+ socketDir: string;
22
+ /** TCP port range for Windows fallback */
23
+ tcpPortRange: [number, number];
24
+ /** Callback when a message is received */
25
+ onMessage: (message: AnyExternalJobMessage) => void;
26
+ /** Callback when a connection is established */
27
+ onConnect?: (jobId: string) => void;
28
+ /** Callback when a connection is closed */
29
+ onDisconnect?: (jobId: string) => void;
30
+ /** Callback for errors */
31
+ onError?: (error: Error, jobId?: string) => void;
32
+ }
33
+
34
+ export interface ExternalJobSocketServer {
35
+ /** Create a new socket for a job (returns socket path or TCP port) */
36
+ createSocket(jobId: string): Promise<{ socketPath?: string; tcpPort?: number }>;
37
+ /** Close a specific job's socket */
38
+ closeSocket(jobId: string): Promise<void>;
39
+ /** Get all active job connections */
40
+ getActiveConnections(): string[];
41
+ /** Attempt to reconnect to an existing socket */
42
+ reconnect(jobId: string, socketPath?: string, tcpPort?: number): Promise<boolean>;
43
+ /** Shutdown all sockets and cleanup */
44
+ shutdown(): Promise<void>;
45
+ /** Clean orphaned socket files from a previous run */
46
+ cleanOrphanedSockets(activeJobIds: Set<string>): Promise<void>;
47
+ }
48
+
49
+ // ============================================
50
+ // Implementation
51
+ // ============================================
52
+
53
+ export class ExternalJobSocketServerImpl implements ExternalJobSocketServer {
54
+ private socketDir: string;
55
+ private tcpPortRange: [number, number];
56
+ private onMessage: (message: AnyExternalJobMessage) => void;
57
+ private onConnect?: (jobId: string) => void;
58
+ private onDisconnect?: (jobId: string) => void;
59
+ private onError?: (error: Error, jobId?: string) => void;
60
+
61
+ // Map of jobId -> server instance
62
+ private servers = new Map<string, NetServer>();
63
+ // Map of jobId -> active client socket
64
+ private clientSockets = new Map<string, Socket>();
65
+ // Map of jobId -> socket path
66
+ private socketPaths = new Map<string, string>();
67
+ // Map of jobId -> TCP port
68
+ private tcpPorts = new Map<string, number>();
69
+ // Track used TCP ports
70
+ private usedPorts = new Set<number>();
71
+
72
+ private isWindows = process.platform === "win32";
73
+
74
+ constructor(options: SocketServerOptions) {
75
+ this.socketDir = options.socketDir;
76
+ this.tcpPortRange = options.tcpPortRange;
77
+ this.onMessage = options.onMessage;
78
+ this.onConnect = options.onConnect;
79
+ this.onDisconnect = options.onDisconnect;
80
+ this.onError = options.onError;
81
+ }
82
+
83
+ async createSocket(jobId: string): Promise<{ socketPath?: string; tcpPort?: number }> {
84
+ // Ensure socket directory exists (only for Unix)
85
+ if (!this.isWindows) {
86
+ await mkdir(this.socketDir, { recursive: true });
87
+ }
88
+
89
+ if (this.isWindows) {
90
+ return this.createTcpServer(jobId);
91
+ } else {
92
+ return this.createUnixServer(jobId);
93
+ }
94
+ }
95
+
96
+ private async createUnixServer(jobId: string): Promise<{ socketPath: string }> {
97
+ const socketPath = join(this.socketDir, `job_${jobId}.sock`);
98
+
99
+ // Remove existing socket file if it exists
100
+ if (existsSync(socketPath)) {
101
+ await unlink(socketPath);
102
+ }
103
+
104
+ return new Promise((resolve, reject) => {
105
+ const server = createNetServer((socket) => {
106
+ this.handleConnection(jobId, socket);
107
+ });
108
+
109
+ server.on("error", (err) => {
110
+ this.onError?.(err, jobId);
111
+ reject(err);
112
+ });
113
+
114
+ server.listen(socketPath, () => {
115
+ this.servers.set(jobId, server);
116
+ this.socketPaths.set(jobId, socketPath);
117
+ resolve({ socketPath });
118
+ });
119
+ });
120
+ }
121
+
122
+ private async createTcpServer(jobId: string): Promise<{ tcpPort: number }> {
123
+ const port = await this.findAvailablePort();
124
+
125
+ return new Promise((resolve, reject) => {
126
+ const server = createNetServer((socket) => {
127
+ this.handleConnection(jobId, socket);
128
+ });
129
+
130
+ server.on("error", (err) => {
131
+ this.usedPorts.delete(port);
132
+ this.onError?.(err, jobId);
133
+ reject(err);
134
+ });
135
+
136
+ server.listen(port, "127.0.0.1", () => {
137
+ this.servers.set(jobId, server);
138
+ this.tcpPorts.set(jobId, port);
139
+ this.usedPorts.add(port);
140
+ resolve({ tcpPort: port });
141
+ });
142
+ });
143
+ }
144
+
145
+ private async findAvailablePort(): Promise<number> {
146
+ const [minPort, maxPort] = this.tcpPortRange;
147
+
148
+ // Try random ports within range
149
+ for (let i = 0; i < 100; i++) {
150
+ const port = minPort + Math.floor(Math.random() * (maxPort - minPort));
151
+ if (!this.usedPorts.has(port)) {
152
+ // Check if port is actually available
153
+ const isAvailable = await this.checkPortAvailable(port);
154
+ if (isAvailable) {
155
+ return port;
156
+ }
157
+ }
158
+ }
159
+
160
+ throw new Error(
161
+ `Could not find available port in range ${minPort}-${maxPort}`
162
+ );
163
+ }
164
+
165
+ private checkPortAvailable(port: number): Promise<boolean> {
166
+ return new Promise((resolve) => {
167
+ const server = createNetServer();
168
+ server.once("error", () => resolve(false));
169
+ server.once("listening", () => {
170
+ server.close(() => resolve(true));
171
+ });
172
+ server.listen(port, "127.0.0.1");
173
+ });
174
+ }
175
+
176
+ private handleConnection(jobId: string, socket: Socket): void {
177
+ // Store the client socket
178
+ this.clientSockets.set(jobId, socket);
179
+ this.onConnect?.(jobId);
180
+
181
+ let buffer = "";
182
+
183
+ socket.on("data", (data) => {
184
+ buffer += data.toString();
185
+
186
+ // Process complete messages (newline-delimited JSON)
187
+ const lines = buffer.split("\n");
188
+ buffer = lines.pop() || ""; // Keep incomplete line in buffer
189
+
190
+ for (const line of lines) {
191
+ if (!line.trim()) continue;
192
+
193
+ const message = parseJobMessage(line);
194
+ if (message) {
195
+ this.onMessage(message);
196
+ } else {
197
+ this.onError?.(new Error(`Invalid message: ${line}`), jobId);
198
+ }
199
+ }
200
+ });
201
+
202
+ socket.on("error", (err) => {
203
+ this.onError?.(err, jobId);
204
+ });
205
+
206
+ socket.on("close", () => {
207
+ this.clientSockets.delete(jobId);
208
+ this.onDisconnect?.(jobId);
209
+ });
210
+ }
211
+
212
+ async closeSocket(jobId: string): Promise<void> {
213
+ // Close client socket
214
+ const clientSocket = this.clientSockets.get(jobId);
215
+ if (clientSocket) {
216
+ clientSocket.destroy();
217
+ this.clientSockets.delete(jobId);
218
+ }
219
+
220
+ // Close server
221
+ const server = this.servers.get(jobId);
222
+ if (server) {
223
+ await new Promise<void>((resolve) => {
224
+ server.close(() => resolve());
225
+ });
226
+ this.servers.delete(jobId);
227
+ }
228
+
229
+ // Clean up socket file (Unix only)
230
+ const socketPath = this.socketPaths.get(jobId);
231
+ if (socketPath && existsSync(socketPath)) {
232
+ await unlink(socketPath).catch(() => {
233
+ // Ignore errors during cleanup
234
+ });
235
+ }
236
+ this.socketPaths.delete(jobId);
237
+
238
+ // Clean up port tracking (TCP)
239
+ const port = this.tcpPorts.get(jobId);
240
+ if (port) {
241
+ this.usedPorts.delete(port);
242
+ this.tcpPorts.delete(jobId);
243
+ }
244
+ }
245
+
246
+ getActiveConnections(): string[] {
247
+ return Array.from(this.clientSockets.keys());
248
+ }
249
+
250
+ async reconnect(
251
+ jobId: string,
252
+ socketPath?: string,
253
+ tcpPort?: number
254
+ ): Promise<boolean> {
255
+ // Check if we already have a connection
256
+ if (this.clientSockets.has(jobId)) {
257
+ return true;
258
+ }
259
+
260
+ // For Unix sockets, check if the socket file exists
261
+ if (socketPath && existsSync(socketPath)) {
262
+ // The external process should still be connected to this socket
263
+ // We need to create a server to listen for the existing connection
264
+ // However, this is tricky because the original server is gone
265
+ // The reconnection strategy depends on the external process behavior
266
+
267
+ // For now, we'll just check if the socket exists
268
+ // The external process should attempt to reconnect if configured
269
+ return false;
270
+ }
271
+
272
+ // For TCP, we can't easily reconnect without the process knowing
273
+ return false;
274
+ }
275
+
276
+ async shutdown(): Promise<void> {
277
+ // Close all client sockets
278
+ for (const socket of this.clientSockets.values()) {
279
+ socket.destroy();
280
+ }
281
+ this.clientSockets.clear();
282
+
283
+ // Close all servers
284
+ const closePromises = Array.from(this.servers.values()).map(
285
+ (server) =>
286
+ new Promise<void>((resolve) => {
287
+ server.close(() => resolve());
288
+ })
289
+ );
290
+ await Promise.all(closePromises);
291
+ this.servers.clear();
292
+
293
+ // Clean up socket files
294
+ for (const socketPath of this.socketPaths.values()) {
295
+ if (existsSync(socketPath)) {
296
+ await unlink(socketPath).catch(() => {});
297
+ }
298
+ }
299
+ this.socketPaths.clear();
300
+ this.tcpPorts.clear();
301
+ this.usedPorts.clear();
302
+ }
303
+
304
+ async cleanOrphanedSockets(activeJobIds: Set<string>): Promise<void> {
305
+ if (this.isWindows) {
306
+ // No socket files to clean on Windows
307
+ return;
308
+ }
309
+
310
+ if (!existsSync(this.socketDir)) {
311
+ return;
312
+ }
313
+
314
+ try {
315
+ const files = await readdir(this.socketDir);
316
+
317
+ for (const file of files) {
318
+ // Match socket files: job_<jobId>.sock
319
+ const match = file.match(/^job_(.+)\.sock$/);
320
+ if (match) {
321
+ const jobId = match[1]!;
322
+ if (!activeJobIds.has(jobId)) {
323
+ // This socket file doesn't correspond to any active job
324
+ const socketPath = join(this.socketDir, file);
325
+ await unlink(socketPath).catch(() => {});
326
+ }
327
+ }
328
+ }
329
+ } catch {
330
+ // Ignore errors during cleanup
331
+ }
332
+ }
333
+ }
334
+
335
+ // ============================================
336
+ // Factory Function
337
+ // ============================================
338
+
339
+ export function createExternalJobSocketServer(
340
+ config: ExternalJobsConfig,
341
+ callbacks: {
342
+ onMessage: (message: AnyExternalJobMessage) => void;
343
+ onConnect?: (jobId: string) => void;
344
+ onDisconnect?: (jobId: string) => void;
345
+ onError?: (error: Error, jobId?: string) => void;
346
+ }
347
+ ): ExternalJobSocketServer {
348
+ return new ExternalJobSocketServerImpl({
349
+ socketDir: config.socketDir ?? "/tmp/donkeylabs-jobs",
350
+ tcpPortRange: config.tcpPortRange ?? [49152, 65535],
351
+ onMessage: callbacks.onMessage,
352
+ onConnect: callbacks.onConnect,
353
+ onDisconnect: callbacks.onDisconnect,
354
+ onError: callbacks.onError,
355
+ });
356
+ }
@@ -0,0 +1,237 @@
1
+ // External Jobs Service
2
+ // Extends the Jobs system to support external processes written in any language
3
+
4
+ import type { Events } from "./events";
5
+ import type { Job, JobAdapter } from "./jobs";
6
+
7
+ // ============================================
8
+ // Message Protocol Types
9
+ // ============================================
10
+
11
+ export type ExternalJobMessageType =
12
+ | "started"
13
+ | "progress"
14
+ | "heartbeat"
15
+ | "log"
16
+ | "completed"
17
+ | "failed";
18
+
19
+ export interface ExternalJobMessage {
20
+ type: ExternalJobMessageType;
21
+ jobId: string;
22
+ timestamp: number;
23
+ }
24
+
25
+ export interface StartedMessage extends ExternalJobMessage {
26
+ type: "started";
27
+ }
28
+
29
+ export interface ProgressMessage extends ExternalJobMessage {
30
+ type: "progress";
31
+ percent: number;
32
+ message?: string;
33
+ data?: Record<string, any>;
34
+ }
35
+
36
+ export interface HeartbeatMessage extends ExternalJobMessage {
37
+ type: "heartbeat";
38
+ }
39
+
40
+ export interface LogMessage extends ExternalJobMessage {
41
+ type: "log";
42
+ level: "debug" | "info" | "warn" | "error";
43
+ message: string;
44
+ data?: Record<string, any>;
45
+ }
46
+
47
+ export interface CompletedMessage extends ExternalJobMessage {
48
+ type: "completed";
49
+ result?: any;
50
+ }
51
+
52
+ export interface FailedMessage extends ExternalJobMessage {
53
+ type: "failed";
54
+ error: string;
55
+ stack?: string;
56
+ }
57
+
58
+ export type AnyExternalJobMessage =
59
+ | StartedMessage
60
+ | ProgressMessage
61
+ | HeartbeatMessage
62
+ | LogMessage
63
+ | CompletedMessage
64
+ | FailedMessage;
65
+
66
+ // ============================================
67
+ // External Job Configuration
68
+ // ============================================
69
+
70
+ export interface ExternalJobConfig {
71
+ /** Command to execute (e.g., "python", "node", "./script.sh") */
72
+ command: string;
73
+ /** Arguments to pass to the command */
74
+ args?: string[];
75
+ /** Working directory for the process */
76
+ cwd?: string;
77
+ /** Environment variables to set */
78
+ env?: Record<string, string>;
79
+ /** Heartbeat timeout in milliseconds (default: 30000) */
80
+ heartbeatTimeout?: number;
81
+ /** Job timeout in milliseconds (optional) */
82
+ timeout?: number;
83
+ }
84
+
85
+ // ============================================
86
+ // External Job State
87
+ // ============================================
88
+
89
+ export type ExternalJobProcessState =
90
+ | "spawning"
91
+ | "running"
92
+ | "orphaned"
93
+ | "reconnecting";
94
+
95
+ export interface ExternalJob extends Job {
96
+ /** Flag indicating this is an external job */
97
+ external: true;
98
+ /** Process ID of the external process */
99
+ pid?: number;
100
+ /** Unix socket path for communication */
101
+ socketPath?: string;
102
+ /** TCP port for Windows fallback */
103
+ tcpPort?: number;
104
+ /** Timestamp of last heartbeat */
105
+ lastHeartbeat?: Date;
106
+ /** Current process state */
107
+ processState?: ExternalJobProcessState;
108
+ }
109
+
110
+ // ============================================
111
+ // External Jobs Configuration
112
+ // ============================================
113
+
114
+ export interface ExternalJobsConfig {
115
+ /** Directory for Unix sockets (default: /tmp/donkeylabs-jobs) */
116
+ socketDir?: string;
117
+ /** TCP port range for Windows fallback (default: [49152, 65535]) */
118
+ tcpPortRange?: [number, number];
119
+ /** Default heartbeat timeout in ms (default: 30000) */
120
+ defaultHeartbeatTimeout?: number;
121
+ /** Heartbeat check interval in ms (default: 10000) */
122
+ heartbeatCheckInterval?: number;
123
+ }
124
+
125
+ // ============================================
126
+ // External Job Manager Interface
127
+ // ============================================
128
+
129
+ export interface ExternalJobManager {
130
+ /** Register an external job configuration */
131
+ registerExternal(name: string, config: ExternalJobConfig): void;
132
+ /** Check if a job is registered as external */
133
+ isExternal(name: string): boolean;
134
+ /** Get external job configuration */
135
+ getExternalConfig(name: string): ExternalJobConfig | undefined;
136
+ /** Spawn an external job process */
137
+ spawn(jobId: string, name: string, data: any): Promise<ExternalJob>;
138
+ /** Handle message from external process */
139
+ handleMessage(message: AnyExternalJobMessage): Promise<void>;
140
+ /** Get all running external jobs */
141
+ getRunningExternal(): Promise<ExternalJob[]>;
142
+ /** Attempt to reconnect to orphaned jobs on server restart */
143
+ reconnectOrphaned(): Promise<void>;
144
+ /** Start the heartbeat monitoring loop */
145
+ startHeartbeatMonitor(): void;
146
+ /** Stop the heartbeat monitoring and cleanup */
147
+ stop(): Promise<void>;
148
+ }
149
+
150
+ // ============================================
151
+ // Helper Functions
152
+ // ============================================
153
+
154
+ /**
155
+ * Check if a process with given PID is still alive
156
+ */
157
+ export function isProcessAlive(pid: number): boolean {
158
+ try {
159
+ // Sending signal 0 doesn't actually send a signal,
160
+ // it just checks if the process exists and we have permission to signal it
161
+ process.kill(pid, 0);
162
+ return true;
163
+ } catch {
164
+ return false;
165
+ }
166
+ }
167
+
168
+ /**
169
+ * Generate a unique socket path for a job
170
+ */
171
+ export function generateSocketPath(socketDir: string, jobId: string): string {
172
+ return `${socketDir}/job_${jobId}.sock`;
173
+ }
174
+
175
+ /**
176
+ * Parse a message from an external job process
177
+ */
178
+ export function parseJobMessage(data: string): AnyExternalJobMessage | null {
179
+ try {
180
+ const parsed = JSON.parse(data);
181
+ if (!parsed.type || !parsed.jobId || typeof parsed.timestamp !== "number") {
182
+ return null;
183
+ }
184
+ return parsed as AnyExternalJobMessage;
185
+ } catch {
186
+ return null;
187
+ }
188
+ }
189
+
190
+ /**
191
+ * Create the initial payload to send to the external process via stdin
192
+ */
193
+ export function createInitialPayload(
194
+ jobId: string,
195
+ name: string,
196
+ data: any,
197
+ socketPath: string
198
+ ): string {
199
+ return JSON.stringify({
200
+ jobId,
201
+ name,
202
+ data,
203
+ socketPath,
204
+ });
205
+ }
206
+
207
+ // ============================================
208
+ // Type Guards
209
+ // ============================================
210
+
211
+ export function isExternalJob(job: Job): job is ExternalJob {
212
+ return (job as ExternalJob).external === true;
213
+ }
214
+
215
+ export function isProgressMessage(msg: AnyExternalJobMessage): msg is ProgressMessage {
216
+ return msg.type === "progress";
217
+ }
218
+
219
+ export function isHeartbeatMessage(msg: AnyExternalJobMessage): msg is HeartbeatMessage {
220
+ return msg.type === "heartbeat";
221
+ }
222
+
223
+ export function isLogMessage(msg: AnyExternalJobMessage): msg is LogMessage {
224
+ return msg.type === "log";
225
+ }
226
+
227
+ export function isCompletedMessage(msg: AnyExternalJobMessage): msg is CompletedMessage {
228
+ return msg.type === "completed";
229
+ }
230
+
231
+ export function isFailedMessage(msg: AnyExternalJobMessage): msg is FailedMessage {
232
+ return msg.type === "failed";
233
+ }
234
+
235
+ export function isStartedMessage(msg: AnyExternalJobMessage): msg is StartedMessage {
236
+ return msg.type === "started";
237
+ }
package/src/core/index.ts CHANGED
@@ -47,6 +47,31 @@ export {
47
47
  createJobs,
48
48
  } from "./jobs";
49
49
 
50
+ export {
51
+ type ExternalJobConfig,
52
+ type ExternalJob,
53
+ type ExternalJobProcessState,
54
+ type ExternalJobsConfig,
55
+ type ExternalJobManager,
56
+ type ExternalJobMessage,
57
+ type ExternalJobMessageType,
58
+ type AnyExternalJobMessage,
59
+ type StartedMessage,
60
+ type ProgressMessage,
61
+ type HeartbeatMessage,
62
+ type LogMessage,
63
+ type CompletedMessage,
64
+ type FailedMessage,
65
+ isExternalJob,
66
+ isProgressMessage,
67
+ isHeartbeatMessage,
68
+ isLogMessage,
69
+ isCompletedMessage,
70
+ isFailedMessage,
71
+ isStartedMessage,
72
+ isProcessAlive,
73
+ } from "./external-jobs";
74
+
50
75
  export {
51
76
  type SSE,
52
77
  type SSEClient,
@@ -92,3 +117,27 @@ export {
92
117
  createErrors,
93
118
  createValidationError,
94
119
  } from "./errors";
120
+
121
+ export {
122
+ type Workflows,
123
+ type WorkflowsConfig,
124
+ type WorkflowDefinition,
125
+ type WorkflowInstance,
126
+ type WorkflowStatus,
127
+ type WorkflowContext,
128
+ type WorkflowAdapter,
129
+ type StepDefinition,
130
+ type StepType,
131
+ type StepStatus,
132
+ type StepResult,
133
+ type TaskStepDefinition,
134
+ type ParallelStepDefinition,
135
+ type ChoiceStepDefinition,
136
+ type ChoiceCondition,
137
+ type PassStepDefinition,
138
+ type RetryConfig,
139
+ WorkflowBuilder,
140
+ MemoryWorkflowAdapter,
141
+ workflow,
142
+ createWorkflows,
143
+ } from "./workflows";