@donkeylabs/server 0.5.0 → 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/docs/external-jobs.md +131 -11
- package/examples/external-jobs/python/donkeylabs_job.py +366 -0
- package/examples/external-jobs/shell/donkeylabs-job.sh +264 -0
- package/examples/external-jobs/shell/example-job.sh +47 -0
- package/package.json +2 -1
- package/src/client/base.ts +6 -4
- package/src/core/external-job-socket.ts +142 -21
- package/src/core/index.ts +5 -0
- package/src/core/job-adapter-sqlite.ts +287 -0
- package/src/core/jobs.ts +36 -3
- package/src/core.ts +73 -4
- package/src/index.ts +12 -0
package/docs/external-jobs.md
CHANGED
|
@@ -258,9 +258,17 @@ router.route("subscribe-job").raw({
|
|
|
258
258
|
|
|
259
259
|
## Wrapper Libraries
|
|
260
260
|
|
|
261
|
-
|
|
261
|
+
After installing `@donkeylabs/server`, copy the wrapper to your project:
|
|
262
|
+
|
|
263
|
+
```bash
|
|
264
|
+
# Python
|
|
265
|
+
cp node_modules/@donkeylabs/server/examples/external-jobs/python/donkeylabs_job.py ./workers/
|
|
266
|
+
|
|
267
|
+
# Shell
|
|
268
|
+
cp node_modules/@donkeylabs/server/examples/external-jobs/shell/donkeylabs-job.sh ./workers/
|
|
269
|
+
```
|
|
262
270
|
|
|
263
|
-
|
|
271
|
+
### Python Wrapper
|
|
264
272
|
|
|
265
273
|
```python
|
|
266
274
|
from donkeylabs_job import DonkeylabsJob, run_job
|
|
@@ -331,19 +339,131 @@ job_complete '{"result": "success"}'
|
|
|
331
339
|
|
|
332
340
|
## Server Restart Resilience
|
|
333
341
|
|
|
334
|
-
External jobs survive server restarts
|
|
342
|
+
External jobs automatically survive server restarts through built-in SQLite persistence.
|
|
343
|
+
|
|
344
|
+
### Default Behavior (SQLite Persistence)
|
|
345
|
+
|
|
346
|
+
Jobs are automatically persisted to `.donkeylabs/jobs.db` by default:
|
|
347
|
+
|
|
348
|
+
```typescript
|
|
349
|
+
import { AppServer } from "@donkeylabs/server";
|
|
350
|
+
|
|
351
|
+
const server = new AppServer({
|
|
352
|
+
db: createDatabase(),
|
|
353
|
+
// Jobs automatically use SQLite persistence - no config needed!
|
|
354
|
+
});
|
|
355
|
+
|
|
356
|
+
server.getCore().jobs.registerExternal("process-video", {
|
|
357
|
+
command: "python",
|
|
358
|
+
args: ["-m", "video_processor"],
|
|
359
|
+
});
|
|
360
|
+
```
|
|
361
|
+
|
|
362
|
+
### Configuration Options
|
|
363
|
+
|
|
364
|
+
```typescript
|
|
365
|
+
const server = new AppServer({
|
|
366
|
+
db: createDatabase(),
|
|
367
|
+
jobs: {
|
|
368
|
+
// SQLite is used by default (persist: true)
|
|
369
|
+
persist: true, // Set to false for in-memory only
|
|
370
|
+
dbPath: ".donkeylabs/jobs.db", // Custom database path
|
|
371
|
+
external: {
|
|
372
|
+
socketDir: "/tmp/donkeylabs-jobs",
|
|
373
|
+
},
|
|
374
|
+
},
|
|
375
|
+
});
|
|
376
|
+
```
|
|
377
|
+
|
|
378
|
+
### Custom Adapter
|
|
379
|
+
|
|
380
|
+
For Postgres, MySQL, or other databases, provide your own adapter:
|
|
381
|
+
|
|
382
|
+
```typescript
|
|
383
|
+
import { AppServer, SqliteJobAdapter } from "@donkeylabs/server";
|
|
384
|
+
import { MyPostgresJobAdapter } from "./adapters/postgres";
|
|
385
|
+
|
|
386
|
+
const server = new AppServer({
|
|
387
|
+
db: createDatabase(),
|
|
388
|
+
jobs: {
|
|
389
|
+
adapter: new MyPostgresJobAdapter(db), // Custom adapter
|
|
390
|
+
},
|
|
391
|
+
});
|
|
392
|
+
```
|
|
393
|
+
|
|
394
|
+
### What Gets Persisted
|
|
395
|
+
|
|
396
|
+
The adapter must persist these fields for external jobs:
|
|
397
|
+
|
|
398
|
+
| Field | Description |
|
|
399
|
+
|-------|-------------|
|
|
400
|
+
| `id` | Unique job ID |
|
|
401
|
+
| `name` | Job name |
|
|
402
|
+
| `data` | Job payload (JSON) |
|
|
403
|
+
| `status` | pending, running, completed, failed |
|
|
404
|
+
| `pid` | External process ID |
|
|
405
|
+
| `socketPath` | Unix socket path |
|
|
406
|
+
| `tcpPort` | TCP port (Windows) |
|
|
407
|
+
| `lastHeartbeat` | Last heartbeat timestamp |
|
|
408
|
+
| `processState` | spawning, running, orphaned |
|
|
409
|
+
|
|
410
|
+
### How Reconnection Works
|
|
411
|
+
|
|
412
|
+
1. **On Server Shutdown**: Job state is already persisted in the database
|
|
413
|
+
2. **On Server Restart**:
|
|
414
|
+
- Server queries for jobs where `status = 'running'` and `external = true`
|
|
415
|
+
- Checks if the process is still alive (via PID)
|
|
416
|
+
- Checks if heartbeat hasn't expired
|
|
417
|
+
- **Reserves** the socket path/port to prevent new jobs from using it
|
|
418
|
+
- Recreates the socket server on the **same path/port**
|
|
419
|
+
- External process detects disconnection and retries connecting
|
|
420
|
+
3. **Reconnection**: Once reconnected, the job resumes normal operation
|
|
421
|
+
4. **Cleanup**: When the job completes, fails, or is killed, the reservation is released
|
|
422
|
+
|
|
423
|
+
### Socket/Port Reservation
|
|
424
|
+
|
|
425
|
+
The server prevents new jobs from accidentally using socket paths or TCP ports that are reserved for orphaned jobs awaiting reconnection:
|
|
426
|
+
|
|
427
|
+
- When an orphaned job is detected on startup, its socket path/port is **reserved**
|
|
428
|
+
- New jobs cannot use reserved paths/ports (an error is thrown if attempted)
|
|
429
|
+
- Reservations are automatically released when:
|
|
430
|
+
- The job completes successfully
|
|
431
|
+
- The job fails
|
|
432
|
+
- The job is killed due to stale heartbeat
|
|
433
|
+
- The process is confirmed dead
|
|
434
|
+
|
|
435
|
+
This ensures that running external processes can always reconnect to their original socket path/port even if the server restarts multiple times.
|
|
436
|
+
|
|
437
|
+
### Python Wrapper Reconnection
|
|
438
|
+
|
|
439
|
+
The Python wrapper automatically handles reconnection:
|
|
440
|
+
|
|
441
|
+
```python
|
|
442
|
+
# Default reconnection settings
|
|
443
|
+
job = DonkeylabsJob(
|
|
444
|
+
job_id=job_id,
|
|
445
|
+
name=name,
|
|
446
|
+
data=data,
|
|
447
|
+
socket_path=socket_path,
|
|
448
|
+
heartbeat_interval=5.0, # Heartbeat every 5 seconds
|
|
449
|
+
reconnect_interval=2.0, # Retry every 2 seconds
|
|
450
|
+
max_reconnect_attempts=30, # Try for up to 60 seconds
|
|
451
|
+
)
|
|
452
|
+
```
|
|
335
453
|
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
454
|
+
When the connection is lost:
|
|
455
|
+
1. Heartbeat/progress messages fail to send
|
|
456
|
+
2. Background reconnection thread starts
|
|
457
|
+
3. Retries connecting to the same socket path
|
|
458
|
+
4. Once reconnected, sends "started" message to server
|
|
459
|
+
5. Normal operation resumes
|
|
341
460
|
|
|
342
461
|
### Best Practices
|
|
343
462
|
|
|
344
|
-
-
|
|
345
|
-
-
|
|
346
|
-
-
|
|
463
|
+
- **Always use a persistent adapter in production**
|
|
464
|
+
- External workers should be idempotent when possible
|
|
465
|
+
- Set `heartbeatTimeout` appropriately (longer = more time to reconnect)
|
|
466
|
+
- Consider longer `max_reconnect_attempts` for critical jobs
|
|
347
467
|
|
|
348
468
|
## Error Handling
|
|
349
469
|
|
|
@@ -0,0 +1,366 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Donkeylabs External Job Python Wrapper
|
|
3
|
+
|
|
4
|
+
This module provides a simple interface for Python scripts to communicate
|
|
5
|
+
with the Donkeylabs job system via Unix sockets or TCP.
|
|
6
|
+
|
|
7
|
+
Usage:
|
|
8
|
+
from donkeylabs_job import DonkeylabsJob, run_job
|
|
9
|
+
|
|
10
|
+
def my_job(job: DonkeylabsJob):
|
|
11
|
+
job.progress(0, "Starting...")
|
|
12
|
+
# Do work...
|
|
13
|
+
job.progress(50, "Halfway done")
|
|
14
|
+
# More work...
|
|
15
|
+
return {"result": "success"}
|
|
16
|
+
|
|
17
|
+
if __name__ == "__main__":
|
|
18
|
+
run_job(my_job)
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
import json
|
|
22
|
+
import os
|
|
23
|
+
import socket
|
|
24
|
+
import sys
|
|
25
|
+
import threading
|
|
26
|
+
import time
|
|
27
|
+
from typing import Any, Callable, Dict, Optional
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class DonkeylabsJob:
|
|
31
|
+
"""Interface for communicating with the Donkeylabs job system."""
|
|
32
|
+
|
|
33
|
+
def __init__(
|
|
34
|
+
self,
|
|
35
|
+
job_id: str,
|
|
36
|
+
name: str,
|
|
37
|
+
data: Any,
|
|
38
|
+
socket_path: str,
|
|
39
|
+
heartbeat_interval: float = 5.0,
|
|
40
|
+
reconnect_interval: float = 2.0,
|
|
41
|
+
max_reconnect_attempts: int = 30,
|
|
42
|
+
):
|
|
43
|
+
self.job_id = job_id
|
|
44
|
+
self.name = name
|
|
45
|
+
self.data = data
|
|
46
|
+
self._socket_path = socket_path
|
|
47
|
+
self._heartbeat_interval = heartbeat_interval
|
|
48
|
+
self._reconnect_interval = reconnect_interval
|
|
49
|
+
self._max_reconnect_attempts = max_reconnect_attempts
|
|
50
|
+
self._socket: Optional[socket.socket] = None
|
|
51
|
+
self._heartbeat_thread: Optional[threading.Thread] = None
|
|
52
|
+
self._reconnect_thread: Optional[threading.Thread] = None
|
|
53
|
+
self._running = False
|
|
54
|
+
self._connected = False
|
|
55
|
+
self._lock = threading.Lock()
|
|
56
|
+
self._reconnect_lock = threading.Lock()
|
|
57
|
+
|
|
58
|
+
def connect(self) -> None:
|
|
59
|
+
"""Connect to the job server socket."""
|
|
60
|
+
self._do_connect()
|
|
61
|
+
self._running = True
|
|
62
|
+
self._connected = True
|
|
63
|
+
self._start_heartbeat()
|
|
64
|
+
self._send_started()
|
|
65
|
+
|
|
66
|
+
def _do_connect(self) -> None:
|
|
67
|
+
"""Internal connection logic."""
|
|
68
|
+
if self._socket_path.startswith("tcp://"):
|
|
69
|
+
# TCP connection (Windows fallback)
|
|
70
|
+
addr = self._socket_path[6:] # Remove "tcp://"
|
|
71
|
+
host, port = addr.rsplit(":", 1)
|
|
72
|
+
self._socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
|
73
|
+
self._socket.connect((host, int(port)))
|
|
74
|
+
else:
|
|
75
|
+
# Unix socket
|
|
76
|
+
self._socket = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
|
|
77
|
+
self._socket.connect(self._socket_path)
|
|
78
|
+
|
|
79
|
+
def _try_reconnect(self) -> bool:
|
|
80
|
+
"""Attempt to reconnect to the server (for server restart resilience)."""
|
|
81
|
+
with self._reconnect_lock:
|
|
82
|
+
if self._connected:
|
|
83
|
+
return True
|
|
84
|
+
|
|
85
|
+
print(f"[DonkeylabsJob] Attempting to reconnect...", file=sys.stderr)
|
|
86
|
+
|
|
87
|
+
for attempt in range(self._max_reconnect_attempts):
|
|
88
|
+
try:
|
|
89
|
+
# Close old socket
|
|
90
|
+
if self._socket:
|
|
91
|
+
try:
|
|
92
|
+
self._socket.close()
|
|
93
|
+
except Exception:
|
|
94
|
+
pass
|
|
95
|
+
|
|
96
|
+
# Try to reconnect
|
|
97
|
+
self._do_connect()
|
|
98
|
+
self._connected = True
|
|
99
|
+
print(f"[DonkeylabsJob] Reconnected after {attempt + 1} attempts", file=sys.stderr)
|
|
100
|
+
|
|
101
|
+
# Send started message to let server know we're back
|
|
102
|
+
self._send_started()
|
|
103
|
+
return True
|
|
104
|
+
except Exception as e:
|
|
105
|
+
print(f"[DonkeylabsJob] Reconnect attempt {attempt + 1}/{self._max_reconnect_attempts} failed: {e}", file=sys.stderr)
|
|
106
|
+
time.sleep(self._reconnect_interval)
|
|
107
|
+
|
|
108
|
+
print(f"[DonkeylabsJob] Failed to reconnect after {self._max_reconnect_attempts} attempts", file=sys.stderr)
|
|
109
|
+
return False
|
|
110
|
+
|
|
111
|
+
def disconnect(self) -> None:
|
|
112
|
+
"""Disconnect from the job server."""
|
|
113
|
+
self._running = False
|
|
114
|
+
if self._heartbeat_thread:
|
|
115
|
+
self._heartbeat_thread.join(timeout=2.0)
|
|
116
|
+
if self._socket:
|
|
117
|
+
try:
|
|
118
|
+
self._socket.close()
|
|
119
|
+
except Exception:
|
|
120
|
+
pass
|
|
121
|
+
|
|
122
|
+
def _send_message(self, message: Dict[str, Any]) -> bool:
|
|
123
|
+
"""Send a JSON message to the server. Returns True if sent successfully."""
|
|
124
|
+
if not self._socket:
|
|
125
|
+
return False
|
|
126
|
+
|
|
127
|
+
message["jobId"] = self.job_id
|
|
128
|
+
message["timestamp"] = int(time.time() * 1000)
|
|
129
|
+
|
|
130
|
+
with self._lock:
|
|
131
|
+
try:
|
|
132
|
+
data = json.dumps(message) + "\n"
|
|
133
|
+
self._socket.sendall(data.encode("utf-8"))
|
|
134
|
+
return True
|
|
135
|
+
except (BrokenPipeError, ConnectionResetError, OSError) as e:
|
|
136
|
+
print(f"[DonkeylabsJob] Connection lost: {e}", file=sys.stderr)
|
|
137
|
+
self._connected = False
|
|
138
|
+
|
|
139
|
+
# Try to reconnect in background (don't block the caller)
|
|
140
|
+
if self._running and not self._reconnect_thread:
|
|
141
|
+
self._reconnect_thread = threading.Thread(
|
|
142
|
+
target=self._reconnect_loop,
|
|
143
|
+
daemon=True
|
|
144
|
+
)
|
|
145
|
+
self._reconnect_thread.start()
|
|
146
|
+
return False
|
|
147
|
+
except Exception as e:
|
|
148
|
+
print(f"[DonkeylabsJob] Failed to send message: {e}", file=sys.stderr)
|
|
149
|
+
return False
|
|
150
|
+
|
|
151
|
+
def _reconnect_loop(self) -> None:
|
|
152
|
+
"""Background thread that attempts to reconnect."""
|
|
153
|
+
if self._try_reconnect():
|
|
154
|
+
print(f"[DonkeylabsJob] Reconnection successful, resuming operation", file=sys.stderr)
|
|
155
|
+
else:
|
|
156
|
+
print(f"[DonkeylabsJob] Reconnection failed, job may be lost", file=sys.stderr)
|
|
157
|
+
self._reconnect_thread = None
|
|
158
|
+
|
|
159
|
+
def _send_started(self) -> None:
|
|
160
|
+
"""Send a started message to the server."""
|
|
161
|
+
self._send_message({"type": "started"})
|
|
162
|
+
|
|
163
|
+
def _start_heartbeat(self) -> None:
|
|
164
|
+
"""Start the background heartbeat thread."""
|
|
165
|
+
|
|
166
|
+
def heartbeat_loop():
|
|
167
|
+
while self._running:
|
|
168
|
+
self._send_message({"type": "heartbeat"})
|
|
169
|
+
time.sleep(self._heartbeat_interval)
|
|
170
|
+
|
|
171
|
+
self._heartbeat_thread = threading.Thread(target=heartbeat_loop, daemon=True)
|
|
172
|
+
self._heartbeat_thread.start()
|
|
173
|
+
|
|
174
|
+
def progress(
|
|
175
|
+
self,
|
|
176
|
+
percent: float,
|
|
177
|
+
message: Optional[str] = None,
|
|
178
|
+
**data: Any,
|
|
179
|
+
) -> None:
|
|
180
|
+
"""
|
|
181
|
+
Report progress to the job server.
|
|
182
|
+
|
|
183
|
+
Args:
|
|
184
|
+
percent: Progress percentage (0-100)
|
|
185
|
+
message: Optional status message
|
|
186
|
+
**data: Additional data to include
|
|
187
|
+
"""
|
|
188
|
+
msg: Dict[str, Any] = {
|
|
189
|
+
"type": "progress",
|
|
190
|
+
"percent": percent,
|
|
191
|
+
}
|
|
192
|
+
if message:
|
|
193
|
+
msg["message"] = message
|
|
194
|
+
if data:
|
|
195
|
+
msg["data"] = data
|
|
196
|
+
|
|
197
|
+
self._send_message(msg)
|
|
198
|
+
|
|
199
|
+
def log(
|
|
200
|
+
self,
|
|
201
|
+
level: str,
|
|
202
|
+
message: str,
|
|
203
|
+
**data: Any,
|
|
204
|
+
) -> None:
|
|
205
|
+
"""
|
|
206
|
+
Send a log message to the job server.
|
|
207
|
+
|
|
208
|
+
Args:
|
|
209
|
+
level: Log level (debug, info, warn, error)
|
|
210
|
+
message: Log message
|
|
211
|
+
**data: Additional data to include
|
|
212
|
+
"""
|
|
213
|
+
msg: Dict[str, Any] = {
|
|
214
|
+
"type": "log",
|
|
215
|
+
"level": level,
|
|
216
|
+
"message": message,
|
|
217
|
+
}
|
|
218
|
+
if data:
|
|
219
|
+
msg["data"] = data
|
|
220
|
+
|
|
221
|
+
self._send_message(msg)
|
|
222
|
+
|
|
223
|
+
def debug(self, message: str, **data: Any) -> None:
|
|
224
|
+
"""Send a debug log message."""
|
|
225
|
+
self.log("debug", message, **data)
|
|
226
|
+
|
|
227
|
+
def info(self, message: str, **data: Any) -> None:
|
|
228
|
+
"""Send an info log message."""
|
|
229
|
+
self.log("info", message, **data)
|
|
230
|
+
|
|
231
|
+
def warn(self, message: str, **data: Any) -> None:
|
|
232
|
+
"""Send a warning log message."""
|
|
233
|
+
self.log("warn", message, **data)
|
|
234
|
+
|
|
235
|
+
def error(self, message: str, **data: Any) -> None:
|
|
236
|
+
"""Send an error log message."""
|
|
237
|
+
self.log("error", message, **data)
|
|
238
|
+
|
|
239
|
+
def complete(self, result: Any = None) -> None:
|
|
240
|
+
"""
|
|
241
|
+
Mark the job as completed.
|
|
242
|
+
|
|
243
|
+
Args:
|
|
244
|
+
result: Optional result data to return
|
|
245
|
+
"""
|
|
246
|
+
msg: Dict[str, Any] = {"type": "completed"}
|
|
247
|
+
if result is not None:
|
|
248
|
+
msg["result"] = result
|
|
249
|
+
|
|
250
|
+
self._send_message(msg)
|
|
251
|
+
|
|
252
|
+
def fail(self, error: str, stack: Optional[str] = None) -> None:
|
|
253
|
+
"""
|
|
254
|
+
Mark the job as failed.
|
|
255
|
+
|
|
256
|
+
Args:
|
|
257
|
+
error: Error message
|
|
258
|
+
stack: Optional stack trace
|
|
259
|
+
"""
|
|
260
|
+
msg: Dict[str, Any] = {
|
|
261
|
+
"type": "failed",
|
|
262
|
+
"error": error,
|
|
263
|
+
}
|
|
264
|
+
if stack:
|
|
265
|
+
msg["stack"] = stack
|
|
266
|
+
|
|
267
|
+
self._send_message(msg)
|
|
268
|
+
|
|
269
|
+
|
|
270
|
+
def run_job(
|
|
271
|
+
handler: Callable[[DonkeylabsJob], Any],
|
|
272
|
+
heartbeat_interval: float = 5.0,
|
|
273
|
+
) -> None:
|
|
274
|
+
"""
|
|
275
|
+
Run a job handler function.
|
|
276
|
+
|
|
277
|
+
This function reads the job payload from stdin, connects to the job server,
|
|
278
|
+
runs the handler, and reports the result.
|
|
279
|
+
|
|
280
|
+
Args:
|
|
281
|
+
handler: A function that takes a DonkeylabsJob and returns the result
|
|
282
|
+
heartbeat_interval: How often to send heartbeats (seconds)
|
|
283
|
+
|
|
284
|
+
Example:
|
|
285
|
+
def my_job(job: DonkeylabsJob):
|
|
286
|
+
job.progress(0, "Starting...")
|
|
287
|
+
result = do_work(job.data)
|
|
288
|
+
return result
|
|
289
|
+
|
|
290
|
+
if __name__ == "__main__":
|
|
291
|
+
run_job(my_job)
|
|
292
|
+
"""
|
|
293
|
+
# Read payload from stdin
|
|
294
|
+
payload_line = sys.stdin.readline()
|
|
295
|
+
if not payload_line:
|
|
296
|
+
print("No payload received on stdin", file=sys.stderr)
|
|
297
|
+
sys.exit(1)
|
|
298
|
+
|
|
299
|
+
try:
|
|
300
|
+
payload = json.loads(payload_line)
|
|
301
|
+
except json.JSONDecodeError as e:
|
|
302
|
+
print(f"Failed to parse payload: {e}", file=sys.stderr)
|
|
303
|
+
sys.exit(1)
|
|
304
|
+
|
|
305
|
+
job_id = payload.get("jobId")
|
|
306
|
+
name = payload.get("name")
|
|
307
|
+
data = payload.get("data")
|
|
308
|
+
socket_path = payload.get("socketPath")
|
|
309
|
+
|
|
310
|
+
# Fall back to environment variables if not in payload
|
|
311
|
+
if not job_id:
|
|
312
|
+
job_id = os.environ.get("DONKEYLABS_JOB_ID")
|
|
313
|
+
if not socket_path:
|
|
314
|
+
socket_path = os.environ.get("DONKEYLABS_SOCKET_PATH")
|
|
315
|
+
tcp_port = os.environ.get("DONKEYLABS_TCP_PORT")
|
|
316
|
+
if tcp_port and not socket_path:
|
|
317
|
+
socket_path = f"tcp://127.0.0.1:{tcp_port}"
|
|
318
|
+
|
|
319
|
+
if not job_id or not socket_path:
|
|
320
|
+
print("Missing jobId or socketPath", file=sys.stderr)
|
|
321
|
+
sys.exit(1)
|
|
322
|
+
|
|
323
|
+
job = DonkeylabsJob(
|
|
324
|
+
job_id=job_id,
|
|
325
|
+
name=name or "unknown",
|
|
326
|
+
data=data,
|
|
327
|
+
socket_path=socket_path,
|
|
328
|
+
heartbeat_interval=heartbeat_interval,
|
|
329
|
+
)
|
|
330
|
+
|
|
331
|
+
try:
|
|
332
|
+
job.connect()
|
|
333
|
+
|
|
334
|
+
# Run the handler
|
|
335
|
+
result = handler(job)
|
|
336
|
+
|
|
337
|
+
# Send completion
|
|
338
|
+
job.complete(result)
|
|
339
|
+
except Exception as e:
|
|
340
|
+
import traceback
|
|
341
|
+
|
|
342
|
+
job.fail(str(e), traceback.format_exc())
|
|
343
|
+
sys.exit(1)
|
|
344
|
+
finally:
|
|
345
|
+
job.disconnect()
|
|
346
|
+
|
|
347
|
+
|
|
348
|
+
# Example job handler
|
|
349
|
+
def example_handler(job: DonkeylabsJob) -> Dict[str, Any]:
|
|
350
|
+
"""Example job handler that processes data in steps."""
|
|
351
|
+
job.info(f"Starting job with data: {job.data}")
|
|
352
|
+
|
|
353
|
+
total_steps = job.data.get("steps", 5)
|
|
354
|
+
|
|
355
|
+
for i in range(total_steps):
|
|
356
|
+
progress = (i / total_steps) * 100
|
|
357
|
+
job.progress(progress, f"Processing step {i + 1} of {total_steps}")
|
|
358
|
+
time.sleep(0.5) # Simulate work
|
|
359
|
+
|
|
360
|
+
job.progress(100, "Complete!")
|
|
361
|
+
return {"processed": True, "steps": total_steps}
|
|
362
|
+
|
|
363
|
+
|
|
364
|
+
if __name__ == "__main__":
|
|
365
|
+
# If run directly, use the example handler
|
|
366
|
+
run_job(example_handler)
|