@lobu/worker 6.1.1 → 7.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. package/dist/embedded/just-bash-bootstrap.d.ts.map +1 -1
  2. package/dist/embedded/just-bash-bootstrap.js +26 -2
  3. package/dist/embedded/just-bash-bootstrap.js.map +1 -1
  4. package/dist/gateway/gateway-integration.js +4 -4
  5. package/dist/gateway/gateway-integration.js.map +1 -1
  6. package/dist/gateway/message-batcher.d.ts.map +1 -1
  7. package/dist/gateway/message-batcher.js +3 -5
  8. package/dist/gateway/message-batcher.js.map +1 -1
  9. package/dist/gateway/sse-client.d.ts +1 -0
  10. package/dist/gateway/sse-client.d.ts.map +1 -1
  11. package/dist/gateway/sse-client.js +8 -0
  12. package/dist/gateway/sse-client.js.map +1 -1
  13. package/dist/openclaw/worker.d.ts +0 -1
  14. package/dist/openclaw/worker.d.ts.map +1 -1
  15. package/dist/openclaw/worker.js +18 -75
  16. package/dist/openclaw/worker.js.map +1 -1
  17. package/dist/shared/tool-implementations.d.ts.map +1 -1
  18. package/dist/shared/tool-implementations.js +37 -13
  19. package/dist/shared/tool-implementations.js.map +1 -1
  20. package/package.json +14 -4
  21. package/src/__tests__/audio-provider-suggestions.test.ts +199 -0
  22. package/src/__tests__/custom-tools.test.ts +92 -0
  23. package/src/__tests__/embedded-just-bash-bootstrap.test.ts +128 -0
  24. package/src/__tests__/embedded-mcp-cli-bash.test.ts +179 -0
  25. package/src/__tests__/embedded-tools.test.ts +744 -0
  26. package/src/__tests__/exec-sandbox-extra.test.ts +0 -0
  27. package/src/__tests__/exec-sandbox.test.ts +550 -0
  28. package/src/__tests__/generated-media.test.ts +142 -0
  29. package/src/__tests__/instructions.test.ts +60 -0
  30. package/src/__tests__/mcp-cli-commands-extra.test.ts +478 -0
  31. package/src/__tests__/mcp-cli-commands.test.ts +383 -0
  32. package/src/__tests__/mcp-tool-call.test.ts +423 -0
  33. package/src/__tests__/memory-flush-harden.test.ts +367 -0
  34. package/src/__tests__/memory-flush-runtime.test.ts +138 -0
  35. package/src/__tests__/memory-flush.test.ts +64 -0
  36. package/src/__tests__/message-batcher.test.ts +247 -0
  37. package/src/__tests__/model-resolver-harden.test.ts +197 -0
  38. package/src/__tests__/model-resolver.test.ts +156 -0
  39. package/src/__tests__/processor-harden.test.ts +269 -0
  40. package/src/__tests__/processor.test.ts +225 -0
  41. package/src/__tests__/replace-base-prompt-identity.test.ts +41 -0
  42. package/src/__tests__/sandbox-leak-harden.test.ts +200 -0
  43. package/src/__tests__/sandbox-leak.test.ts +167 -0
  44. package/src/__tests__/setup.ts +102 -0
  45. package/src/__tests__/sse-client-harden.test.ts +588 -0
  46. package/src/__tests__/sse-client.test.ts +90 -0
  47. package/src/__tests__/tool-implementations.test.ts +196 -0
  48. package/src/__tests__/tool-policy-edge-cases.test.ts +263 -0
  49. package/src/__tests__/tool-policy.test.ts +269 -0
  50. package/src/__tests__/worker.test.ts +89 -0
  51. package/src/core/error-handler.ts +62 -0
  52. package/src/core/project-scanner.ts +65 -0
  53. package/src/core/types.ts +128 -0
  54. package/src/core/workspace.ts +89 -0
  55. package/src/embedded/exec-sandbox.ts +372 -0
  56. package/src/embedded/just-bash-bootstrap.ts +543 -0
  57. package/src/embedded/mcp-cli-commands.ts +402 -0
  58. package/src/gateway/gateway-integration.ts +298 -0
  59. package/src/gateway/message-batcher.ts +123 -0
  60. package/src/gateway/sse-client.ts +951 -0
  61. package/src/gateway/types.ts +68 -0
  62. package/src/index.ts +141 -0
  63. package/src/instructions/builder.ts +45 -0
  64. package/src/instructions/providers.ts +27 -0
  65. package/src/modules/lifecycle.ts +92 -0
  66. package/src/openclaw/custom-tools.ts +315 -0
  67. package/src/openclaw/instructions.ts +36 -0
  68. package/src/openclaw/model-resolver.ts +150 -0
  69. package/src/openclaw/plugin-loader.ts +427 -0
  70. package/src/openclaw/processor.ts +198 -0
  71. package/src/openclaw/sandbox-leak.ts +105 -0
  72. package/src/openclaw/session-context.ts +320 -0
  73. package/src/openclaw/tool-policy.ts +248 -0
  74. package/src/openclaw/tools.ts +277 -0
  75. package/src/openclaw/worker.ts +1847 -0
  76. package/src/server.ts +334 -0
  77. package/src/shared/audio-provider-suggestions.ts +132 -0
  78. package/src/shared/processor-utils.ts +33 -0
  79. package/src/shared/provider-auth-hints.ts +68 -0
  80. package/src/shared/tool-display-config.ts +75 -0
  81. package/src/shared/tool-implementations.ts +940 -0
  82. package/src/shared/worker-env-keys.ts +8 -0
@@ -0,0 +1,951 @@
1
+ /**
2
+ * SSE client for receiving jobs from dispatcher
3
+ */
4
+
5
+ import { spawn } from "node:child_process";
6
+ import {
7
+ createChildSpan,
8
+ createLogger,
9
+ extractTraceId,
10
+ flushTracing,
11
+ SpanStatusCode,
12
+ } from "@lobu/core";
13
+ import { z } from "zod";
14
+ import type { WorkerConfig, WorkerExecutor } from "../core/types";
15
+ import { HttpWorkerTransport } from "./gateway-integration";
16
+ import { MessageBatcher } from "./message-batcher";
17
+ import type { MessagePayload, QueuedMessage } from "./types";
18
+
19
+ const logger = createLogger("sse-client");
20
+
21
+ type AbortControllerLike = {
22
+ abort(): void;
23
+ readonly signal: AbortSignal;
24
+ };
25
+
26
+ // --- Pending config change notifications ---
27
+
28
+ interface ConfigChangeEntry {
29
+ category: string;
30
+ action: string;
31
+ summary: string;
32
+ details?: string[];
33
+ }
34
+
35
+ const pendingConfigNotifications: ConfigChangeEntry[] = [];
36
+
37
+ /**
38
+ * Returns and clears all pending config change notifications.
39
+ * Called by the worker before building the next prompt.
40
+ */
41
+ export function consumePendingConfigNotifications(): ConfigChangeEntry[] {
42
+ if (pendingConfigNotifications.length === 0) return [];
43
+ return pendingConfigNotifications.splice(0);
44
+ }
45
+
46
+ // Zod schemas for runtime validation of SSE event data
47
+ const ConnectedEventSchema = z.object({
48
+ deploymentName: z.string(),
49
+ });
50
+
51
+ // Platform metadata is a transport envelope for platform-specific details.
52
+ // Known chat fields are typed below, but gateway callers may include nested
53
+ // objects such as watcher run intent metadata, file descriptors, or provider
54
+ // context. Preserve those values instead of rejecting otherwise valid jobs.
55
+ const PlatformMetadataSchema = z
56
+ .object({
57
+ team_id: z.string().optional(),
58
+ channel: z.string().optional(),
59
+ ts: z.string().optional(),
60
+ thread_ts: z.string().optional(),
61
+ files: z.array(z.any()).optional(),
62
+ })
63
+ .catchall(z.unknown());
64
+
65
+ // AgentOptions has known fields plus arbitrary extra fields (including nested objects)
66
+ const AgentOptionsSchema = z
67
+ .object({
68
+ runtime: z.string().optional(),
69
+ model: z.string().optional(),
70
+ maxTokens: z.number().optional(),
71
+ temperature: z.number().optional(),
72
+ allowedTools: z.union([z.string(), z.array(z.string())]).optional(),
73
+ disallowedTools: z.union([z.string(), z.array(z.string())]).optional(),
74
+ timeoutMinutes: z.union([z.number(), z.string()]).optional(),
75
+ // Additional settings passed through from gateway
76
+ networkConfig: z.any().optional(),
77
+ envVars: z.any().optional(),
78
+ })
79
+ .passthrough();
80
+
81
+ const JobEventSchema = z.object({
82
+ payload: z.object({
83
+ botId: z.string(),
84
+ userId: z.string(),
85
+ agentId: z.string(),
86
+ conversationId: z.string(),
87
+ platform: z.string(),
88
+ channelId: z.string(),
89
+ messageId: z.string(),
90
+ messageText: z.string(),
91
+ platformMetadata: PlatformMetadataSchema,
92
+ agentOptions: AgentOptionsSchema,
93
+ jobId: z.string().optional(),
94
+ teamId: z.string().optional(), // Optional for WhatsApp (top-level) and Slack (in platformMetadata)
95
+ }),
96
+ processedIds: z.array(z.string()).optional(),
97
+ });
98
+
99
+ /**
100
+ * Gateway client for workers - connects to dispatcher via SSE
101
+ * Receives jobs via SSE stream, sends responses via HTTP POST
102
+ */
103
+ export class GatewayClient {
104
+ private dispatcherUrl: string;
105
+ private workerToken: string;
106
+ private userId: string;
107
+ private deploymentName: string;
108
+ private isRunning = false;
109
+ private currentWorker: WorkerExecutor | null = null;
110
+ private abortController?: AbortControllerLike;
111
+ private currentJobId?: string;
112
+ private currentTraceId?: string; // Trace ID for end-to-end observability
113
+ private currentTraceparent?: string; // W3C traceparent for distributed tracing
114
+ private reconnectAttempts = 0;
115
+ private maxReconnectAttempts = 10;
116
+ private messageBatcher: MessageBatcher;
117
+ private eventErrorCount = 0;
118
+ private eventErrorThreshold = 10;
119
+ private httpPort?: number;
120
+
121
+ constructor(
122
+ dispatcherUrl: string,
123
+ workerToken: string,
124
+ userId: string,
125
+ deploymentName: string,
126
+ httpPort?: number
127
+ ) {
128
+ this.dispatcherUrl = dispatcherUrl;
129
+ this.workerToken = workerToken;
130
+ this.userId = userId;
131
+ this.deploymentName = deploymentName;
132
+ this.httpPort = httpPort;
133
+ // Get initial traceId from environment (set by deployment)
134
+ this.currentTraceId = process.env.TRACE_ID;
135
+
136
+ this.messageBatcher = new MessageBatcher({
137
+ onBatchReady: async (messages) => {
138
+ await this.processBatchedMessages(messages);
139
+ },
140
+ });
141
+
142
+ logger.info(
143
+ { traceId: this.currentTraceId, deploymentName },
144
+ "Worker connected"
145
+ );
146
+ }
147
+
148
+ async start(): Promise<void> {
149
+ this.isRunning = true;
150
+
151
+ while (this.isRunning) {
152
+ try {
153
+ await this.connectAndListen();
154
+ if (!this.isRunning) break;
155
+ await this.handleReconnect();
156
+ } catch (error) {
157
+ if (error instanceof Error && error.name === "AbortError") {
158
+ logger.info("SSE connection aborted");
159
+ break;
160
+ }
161
+ logger.error("SSE connection error:", error);
162
+ if (!this.isRunning) break;
163
+ await this.handleReconnect();
164
+ }
165
+ }
166
+ if (this.reconnectsExhausted) {
167
+ // Don't return normally — a caller that logs "started successfully" and
168
+ // then awaits forever would leave a zombie process holding its
169
+ // workspace/port that never receives jobs and never exits.
170
+ throw new Error("Gateway worker exhausted reconnect attempts");
171
+ }
172
+ }
173
+
174
+ private async connectAndListen(): Promise<void> {
175
+ // Abort previous controller before creating a new one
176
+ if (this.abortController) {
177
+ this.abortController.abort();
178
+ }
179
+ const abortController =
180
+ new globalThis.AbortController() as AbortControllerLike;
181
+ this.abortController = abortController;
182
+ const streamUrl = this.httpPort
183
+ ? `${this.dispatcherUrl}/worker/stream?httpPort=${this.httpPort}`
184
+ : `${this.dispatcherUrl}/worker/stream`;
185
+
186
+ logger.info(
187
+ `Connecting to dispatcher at ${streamUrl} (attempt ${this.reconnectAttempts + 1})`
188
+ );
189
+
190
+ const response = await fetch(streamUrl, {
191
+ method: "GET",
192
+ headers: {
193
+ Authorization: `Bearer ${this.workerToken}`,
194
+ Accept: "text/event-stream",
195
+ },
196
+ signal: abortController.signal,
197
+ });
198
+
199
+ if (!response.ok) {
200
+ throw new Error(
201
+ `Failed to connect to dispatcher: ${response.status} ${response.statusText}`
202
+ );
203
+ }
204
+
205
+ logger.info("✅ Connected to dispatcher via SSE");
206
+ this.reconnectAttempts = 0;
207
+
208
+ const reader = response.body?.getReader();
209
+ const decoder = new TextDecoder();
210
+
211
+ if (!reader) {
212
+ throw new Error("No response body");
213
+ }
214
+
215
+ let buffer = "";
216
+
217
+ logger.info("[SSE-CLIENT] 🔄 Starting SSE stream reading loop");
218
+
219
+ while (this.isRunning) {
220
+ const { done, value } = await reader.read();
221
+
222
+ if (done) {
223
+ logger.info("[SSE-CLIENT] SSE stream ended");
224
+ break;
225
+ }
226
+
227
+ const chunk = decoder.decode(value, { stream: true });
228
+ logger.debug(
229
+ `[SSE-CLIENT] 📨 Received chunk: ${chunk.substring(0, 200)}`
230
+ );
231
+ buffer += chunk;
232
+
233
+ const events = buffer.split("\n\n");
234
+ buffer = events.pop() || "";
235
+
236
+ logger.debug(
237
+ `[SSE-CLIENT] 📊 Parsed ${events.length} events from buffer`
238
+ );
239
+
240
+ for (const event of events) {
241
+ if (!event.trim()) continue;
242
+
243
+ const lines = event.split("\n");
244
+ let eventType = "message";
245
+ let eventData = "";
246
+
247
+ for (const line of lines) {
248
+ if (line.startsWith("event:")) {
249
+ eventType = line.substring(6).trim();
250
+ } else if (line.startsWith("data:")) {
251
+ eventData = line.substring(5).trim();
252
+ }
253
+ }
254
+
255
+ if (eventData) {
256
+ logger.info(`[SSE-CLIENT] 🎯 Processing event type: ${eventType}`);
257
+ // Don't await - fire async to avoid blocking SSE reading loop
258
+ this.handleEvent(eventType, eventData).catch((error) => {
259
+ this.eventErrorCount++;
260
+ logger.error(
261
+ `[SSE-CLIENT] Error handling ${eventType} event (error ${this.eventErrorCount}/${this.eventErrorThreshold}):`,
262
+ error
263
+ );
264
+
265
+ // Trigger cleanup if too many errors
266
+ if (this.eventErrorCount >= this.eventErrorThreshold) {
267
+ logger.error(
268
+ `❌ Event error threshold reached (${this.eventErrorCount} errors). Triggering cleanup...`
269
+ );
270
+ this.cleanupOnEventError(eventType).catch((cleanupErr) => {
271
+ logger.error(
272
+ "Failed to cleanup after event errors:",
273
+ cleanupErr
274
+ );
275
+ });
276
+ }
277
+ });
278
+ }
279
+ }
280
+ }
281
+ }
282
+
283
+ /**
284
+ * Send a quick delivery receipt to the gateway confirming job was received.
285
+ * Fire-and-forget — don't block job processing on the receipt send.
286
+ */
287
+ private sendDeliveryReceipt(jobId: string): void {
288
+ const url = `${this.dispatcherUrl}/worker/response`;
289
+ fetch(url, {
290
+ method: "POST",
291
+ headers: {
292
+ "Content-Type": "application/json",
293
+ Authorization: `Bearer ${this.workerToken}`,
294
+ },
295
+ body: JSON.stringify({ jobId, received: true }),
296
+ signal: AbortSignal.timeout(10_000),
297
+ }).catch((err) => {
298
+ logger.warn(`Failed to send delivery receipt for job ${jobId}:`, err);
299
+ });
300
+ }
301
+
302
+ /**
303
+ * Send a heartbeat ACK back to the gateway so stale cleanup is based on
304
+ * verified inbound worker activity rather than outbound SSE writes.
305
+ */
306
+ private sendHeartbeatAck(): void {
307
+ const url = `${this.dispatcherUrl}/worker/response`;
308
+ fetch(url, {
309
+ method: "POST",
310
+ headers: {
311
+ "Content-Type": "application/json",
312
+ Authorization: `Bearer ${this.workerToken}`,
313
+ },
314
+ body: JSON.stringify({ received: true, heartbeat: true }),
315
+ signal: AbortSignal.timeout(10_000),
316
+ }).catch((err) => {
317
+ logger.warn("Failed to send heartbeat ACK:", err);
318
+ });
319
+ }
320
+
321
+ private reconnectsExhausted = false;
322
+
323
+ private async handleReconnect(): Promise<void> {
324
+ if (this.reconnectAttempts >= this.maxReconnectAttempts) {
325
+ logger.error("Max reconnection attempts reached, giving up");
326
+ this.reconnectsExhausted = true;
327
+ this.isRunning = false;
328
+ return;
329
+ }
330
+
331
+ this.reconnectAttempts++;
332
+ const delay = Math.min(1000 * 2 ** (this.reconnectAttempts - 1), 60000);
333
+
334
+ logger.info(
335
+ `Reconnecting in ${delay}ms (attempt ${this.reconnectAttempts}/${this.maxReconnectAttempts})...`
336
+ );
337
+
338
+ await new Promise((resolve) => setTimeout(resolve, delay));
339
+ }
340
+
341
+ async stop(): Promise<void> {
342
+ try {
343
+ this.isRunning = false;
344
+
345
+ if (this.abortController) {
346
+ this.abortController.abort();
347
+ }
348
+
349
+ this.messageBatcher.stop();
350
+
351
+ if (this.currentWorker) {
352
+ await this.currentWorker.cleanup();
353
+ this.currentWorker = null;
354
+ }
355
+
356
+ logger.info("✅ Gateway client stopped");
357
+ } catch (error) {
358
+ logger.error("Error stopping gateway client:", error);
359
+ throw error;
360
+ }
361
+ }
362
+
363
+ private async handleEvent(eventType: string, data: string): Promise<void> {
364
+ try {
365
+ if (eventType === "connected") {
366
+ const parsedData = JSON.parse(data);
367
+ const validationResult = ConnectedEventSchema.safeParse(parsedData);
368
+
369
+ if (!validationResult.success) {
370
+ logger.error(
371
+ "Invalid connected event data:",
372
+ validationResult.error.format()
373
+ );
374
+ throw new Error(
375
+ `Connected event validation failed: ${validationResult.error.message}`
376
+ );
377
+ }
378
+
379
+ const connData = validationResult.data;
380
+ logger.info(
381
+ `Connected to dispatcher for deployment ${connData.deploymentName}`
382
+ );
383
+ return;
384
+ }
385
+
386
+ if (eventType === "ping") {
387
+ logger.debug("Received heartbeat ping from dispatcher");
388
+ this.sendHeartbeatAck();
389
+ return;
390
+ }
391
+
392
+ if (eventType === "config_changed") {
393
+ logger.info(
394
+ "Received config_changed event from gateway, invalidating session context cache"
395
+ );
396
+ const { invalidateSessionContextCache } = await import(
397
+ "../openclaw/session-context"
398
+ );
399
+ invalidateSessionContextCache();
400
+
401
+ // Parse and queue config change notifications for the next prompt
402
+ try {
403
+ const parsed = JSON.parse(data);
404
+ const changes = Array.isArray(parsed?.changes)
405
+ ? (parsed.changes as ConfigChangeEntry[])
406
+ : [];
407
+ if (changes.length > 0) {
408
+ pendingConfigNotifications.push(...changes);
409
+ logger.info(
410
+ `Queued ${changes.length} config change notification(s)`
411
+ );
412
+ }
413
+ } catch {
414
+ // Backward compat: old gateway may send empty or invalid payload
415
+ }
416
+ return;
417
+ }
418
+
419
+ if (eventType === "job") {
420
+ try {
421
+ const parsedData = JSON.parse(data);
422
+ const validationResult = JobEventSchema.safeParse(parsedData);
423
+
424
+ if (!validationResult.success) {
425
+ logger.error(
426
+ "Invalid job event data:",
427
+ validationResult.error.format()
428
+ );
429
+ logger.debug(`Raw job data: ${data}`);
430
+ throw new Error(
431
+ `Job event validation failed: ${validationResult.error.message}`
432
+ );
433
+ }
434
+
435
+ // Send delivery receipt immediately so the gateway knows
436
+ // the job was actually received (not lost to a stale SSE connection).
437
+ // jobId is at the top level of the SSE event (set by job-router),
438
+ // not inside the validated payload.
439
+ const jobId = parsedData.jobId as string | undefined;
440
+ if (jobId) {
441
+ this.sendDeliveryReceipt(jobId);
442
+ }
443
+
444
+ // Zod validates structure but passthrough allows extra fields
445
+ // The validated payload matches MessagePayload interface
446
+ await this.handleThreadMessage(validationResult.data.payload);
447
+ } catch (parseError) {
448
+ logger.error(
449
+ `Failed to parse or validate job event data:`,
450
+ parseError
451
+ );
452
+ logger.debug(`Raw job data: ${data}`);
453
+ }
454
+ return;
455
+ }
456
+
457
+ logger.warn(
458
+ `[DEBUG] Unknown SSE event type: ${eventType}, data: ${data}`
459
+ );
460
+ } catch (error) {
461
+ logger.error(`Error handling event ${eventType}:`, error);
462
+ }
463
+ }
464
+
465
+ private async handleThreadMessage(data: MessagePayload): Promise<void> {
466
+ // Extract traceparent for distributed tracing
467
+ // Prefer platformMetadata.traceparent, fall back to TRACEPARENT env var
468
+ const traceparent =
469
+ (data.platformMetadata?.traceparent as string) || process.env.TRACEPARENT;
470
+ this.currentTraceparent = traceparent;
471
+
472
+ // Extract traceId for logging (backwards compatible)
473
+ const traceId =
474
+ extractTraceId(data) || this.currentTraceId || process.env.TRACE_ID;
475
+ this.currentTraceId = traceId;
476
+
477
+ const conversationId = data.conversationId;
478
+
479
+ if (data.jobId) {
480
+ this.currentJobId = data.jobId;
481
+ // Create child span for job received (linked to parent via traceparent)
482
+ const span = createChildSpan("job_received", traceparent, {
483
+ "lobu.job_id": data.jobId,
484
+ "lobu.message_id": data.messageId,
485
+ "lobu.conversation_id": conversationId,
486
+ "lobu.job_type": data.jobType || "message",
487
+ });
488
+ span?.setStatus({ code: SpanStatusCode.OK });
489
+ span?.end();
490
+ // Flush job_received span immediately
491
+ void flushTracing();
492
+ logger.info(
493
+ {
494
+ traceparent,
495
+ traceId,
496
+ jobId: data.jobId,
497
+ messageId: data.messageId,
498
+ jobType: data.jobType,
499
+ },
500
+ "Job received"
501
+ );
502
+ }
503
+
504
+ // No per-user filtering here: deployment names intentionally hash only
505
+ // `platform:channelId:conversationId` (see `generateDeploymentName` in
506
+ // base-deployment-manager.ts) so a channel/thread has ONE shared worker
507
+ // across all posting users. DMs are single-participant, so a check would
508
+ // be dead there too. The WORKER_TOKEN-scoped-to-spawning-user tradeoff
509
+ // for shared channel workers is acknowledged and deferred to per-message
510
+ // JWT minting — gating here would break the core group-bot design.
511
+
512
+ // Check job type and dispatch accordingly
513
+ if (data.jobType === "exec") {
514
+ await this.handleExecJob(data);
515
+ return;
516
+ }
517
+
518
+ // Default: message job
519
+ const queuedMessage: QueuedMessage = {
520
+ payload: data,
521
+ timestamp: Date.now(),
522
+ };
523
+
524
+ await this.messageBatcher.addMessage(queuedMessage);
525
+ logger.info(
526
+ { traceId, messageId: data.messageId, conversationId },
527
+ "Message queued for processing"
528
+ );
529
+ }
530
+
531
+ /**
532
+ * Handle exec job - spawn command in sandbox and stream output back
533
+ */
534
+ private async handleExecJob(data: MessagePayload): Promise<void> {
535
+ const { execId, execCommand, execCwd, execEnv, execTimeout } = data;
536
+ const conversationId = data.conversationId;
537
+ const traceId = this.currentTraceId;
538
+ const traceparent = this.currentTraceparent;
539
+
540
+ if (!execId || !execCommand) {
541
+ logger.error(
542
+ { traceId, execId },
543
+ "Invalid exec job: missing execId or execCommand"
544
+ );
545
+ return;
546
+ }
547
+
548
+ logger.info(
549
+ { traceId, execId, command: execCommand.substring(0, 100) },
550
+ "Executing command in sandbox"
551
+ );
552
+
553
+ // Create span for exec execution
554
+ const span = createChildSpan("exec_execution", traceparent, {
555
+ "lobu.exec_id": execId,
556
+ "lobu.command": execCommand.substring(0, 100),
557
+ });
558
+
559
+ // Determine working directory
560
+ const workingDir = execCwd || process.env.WORKSPACE_DIR || "/workspace";
561
+ const timeout = execTimeout || 300000; // 5 minutes default
562
+
563
+ // Create transport for sending responses back to gateway
564
+ const transport = new HttpWorkerTransport({
565
+ gatewayUrl: this.dispatcherUrl,
566
+ workerToken: this.workerToken,
567
+ userId: data.userId,
568
+ channelId: data.channelId,
569
+ conversationId,
570
+ originalMessageTs: execId,
571
+ teamId: data.teamId || "api",
572
+ platform: data.platform,
573
+ platformMetadata: data.platformMetadata,
574
+ });
575
+
576
+ let completed = false;
577
+
578
+ try {
579
+ // Spawn the command
580
+ const proc = spawn("sh", ["-c", execCommand], {
581
+ cwd: workingDir,
582
+ env: { ...process.env, ...execEnv },
583
+ stdio: ["ignore", "pipe", "pipe"],
584
+ });
585
+
586
+ // Setup timeout
587
+ const timeoutId = setTimeout(() => {
588
+ if (!completed) {
589
+ logger.warn(
590
+ { traceId, execId },
591
+ "Exec timeout reached, killing process"
592
+ );
593
+ proc.kill("SIGTERM");
594
+ setTimeout(() => {
595
+ if (!completed) {
596
+ proc.kill("SIGKILL");
597
+ }
598
+ }, 5000);
599
+ }
600
+ }, timeout);
601
+
602
+ // Stream stdout
603
+ proc.stdout?.on("data", (chunk: Buffer) => {
604
+ const content = chunk.toString();
605
+ transport.sendExecOutput(execId, "stdout", content).catch((err) => {
606
+ logger.error(
607
+ { traceId, execId, error: err },
608
+ "Failed to send stdout"
609
+ );
610
+ });
611
+ });
612
+
613
+ // Stream stderr
614
+ proc.stderr?.on("data", (chunk: Buffer) => {
615
+ const content = chunk.toString();
616
+ transport.sendExecOutput(execId, "stderr", content).catch((err) => {
617
+ logger.error(
618
+ { traceId, execId, error: err },
619
+ "Failed to send stderr"
620
+ );
621
+ });
622
+ });
623
+
624
+ // Wait for process to complete
625
+ const exitCode = await new Promise<number>((resolve, reject) => {
626
+ proc.on("close", (code) => {
627
+ completed = true;
628
+ clearTimeout(timeoutId);
629
+ resolve(code ?? 0);
630
+ });
631
+
632
+ proc.on("error", (error) => {
633
+ completed = true;
634
+ clearTimeout(timeoutId);
635
+ reject(error);
636
+ });
637
+ });
638
+
639
+ // Send completion
640
+ await transport.sendExecComplete(execId, exitCode);
641
+
642
+ span?.setAttribute("lobu.exit_code", exitCode);
643
+ span?.setStatus({ code: SpanStatusCode.OK });
644
+ span?.end();
645
+ await flushTracing();
646
+
647
+ logger.info({ traceId, execId, exitCode }, "Exec completed");
648
+ } catch (error) {
649
+ const errorMessage =
650
+ error instanceof Error ? error.message : String(error);
651
+
652
+ // Send error
653
+ await transport.sendExecError(execId, errorMessage).catch((err) => {
654
+ logger.error(
655
+ { traceId, execId, error: err },
656
+ "Failed to send exec error"
657
+ );
658
+ });
659
+
660
+ span?.setStatus({ code: SpanStatusCode.ERROR, message: errorMessage });
661
+ span?.end();
662
+ await flushTracing();
663
+
664
+ logger.error({ traceId, execId, error: errorMessage }, "Exec failed");
665
+ } finally {
666
+ this.currentJobId = undefined;
667
+ }
668
+ }
669
+
670
+ private async processBatchedMessages(
671
+ messages: QueuedMessage[]
672
+ ): Promise<void> {
673
+ if (messages.length === 0) return;
674
+
675
+ if (messages.length === 1) {
676
+ const singleMessage = messages[0];
677
+ if (singleMessage) {
678
+ await this.processSingleMessage(singleMessage, [
679
+ singleMessage.payload.messageId,
680
+ ]);
681
+ }
682
+ return;
683
+ }
684
+
685
+ logger.info(`Batching ${messages.length} messages for combined processing`);
686
+
687
+ const firstMessage = messages[0];
688
+ if (!firstMessage) return;
689
+
690
+ const combinedPrompt = messages
691
+ .map((msg, index) => `Message ${index + 1}: ${msg.payload.messageText}`)
692
+ .join("\n\n");
693
+
694
+ const batchedMessage: QueuedMessage = {
695
+ timestamp: firstMessage.timestamp,
696
+ payload: {
697
+ ...firstMessage.payload,
698
+ messageText: combinedPrompt,
699
+ agentOptions: firstMessage.payload.agentOptions,
700
+ },
701
+ };
702
+
703
+ const processedIds = messages
704
+ .map((m) => m.payload.messageId)
705
+ .filter(Boolean);
706
+ await this.processSingleMessage(batchedMessage, processedIds);
707
+ }
708
+
709
+ private async processSingleMessage(
710
+ message: QueuedMessage,
711
+ processedIds?: string[]
712
+ ): Promise<void> {
713
+ // Get traceparent for distributed tracing
714
+ const traceparent =
715
+ (message.payload.platformMetadata?.traceparent as string) ||
716
+ this.currentTraceparent ||
717
+ process.env.TRACEPARENT;
718
+
719
+ const traceId =
720
+ extractTraceId(message.payload) ||
721
+ this.currentTraceId ||
722
+ process.env.TRACE_ID;
723
+
724
+ const conversationId = message.payload.conversationId;
725
+
726
+ // Create child span for agent execution (linked to parent via traceparent)
727
+ const span = createChildSpan("agent_execution", traceparent, {
728
+ "lobu.message_id": message.payload.messageId,
729
+ "lobu.conversation_id": conversationId,
730
+ "lobu.user_id": message.payload.userId,
731
+ "lobu.model": message.payload.agentOptions?.model || "default",
732
+ });
733
+
734
+ try {
735
+ if (!process.env.USER_ID) {
736
+ logger.warn(
737
+ `USER_ID not set in environment, using userId from payload: ${message.payload.userId}`
738
+ );
739
+ process.env.USER_ID = message.payload.userId;
740
+ }
741
+
742
+ const workerConfig = this.payloadToWorkerConfig(message.payload);
743
+
744
+ logger.info(
745
+ {
746
+ traceparent,
747
+ traceId,
748
+ messageId: message.payload.messageId,
749
+ model: message.payload.agentOptions?.model,
750
+ },
751
+ "Agent starting"
752
+ );
753
+
754
+ // Worker will decide whether to continue session based on workspace state
755
+ const { OpenClawWorker } = await import("../openclaw/worker");
756
+ this.currentWorker = new OpenClawWorker(workerConfig);
757
+
758
+ const workerTransport = this.currentWorker.getWorkerTransport();
759
+
760
+ if (workerTransport && workerTransport instanceof HttpWorkerTransport) {
761
+ if (this.currentJobId) {
762
+ workerTransport.setJobId(this.currentJobId);
763
+ }
764
+
765
+ // Set processedMessageIds directly on the integration instance
766
+ workerTransport.processedMessageIds =
767
+ processedIds && processedIds.length > 0
768
+ ? processedIds
769
+ : message.payload.messageId
770
+ ? [message.payload.messageId]
771
+ : [];
772
+ }
773
+
774
+ await this.currentWorker.execute();
775
+
776
+ this.currentJobId = undefined;
777
+
778
+ // Reset error count on successful message processing
779
+ this.eventErrorCount = 0;
780
+
781
+ // End span with success
782
+ span?.setStatus({ code: SpanStatusCode.OK });
783
+ span?.end();
784
+ // Flush traces immediately to ensure spans are exported before worker scales down
785
+ await flushTracing();
786
+ logger.info(
787
+ {
788
+ traceparent,
789
+ messageId: message.payload.messageId,
790
+ conversationId,
791
+ },
792
+ "Agent completed"
793
+ );
794
+ } catch (error) {
795
+ // End span with error
796
+ span?.setStatus({
797
+ code: SpanStatusCode.ERROR,
798
+ message: error instanceof Error ? error.message : String(error),
799
+ });
800
+ span?.end();
801
+ // Flush traces on error too
802
+ await flushTracing();
803
+ logger.error(
804
+ {
805
+ traceparent,
806
+ messageId: message.payload.messageId,
807
+ conversationId,
808
+ error: error instanceof Error ? error.message : String(error),
809
+ },
810
+ "Agent failed"
811
+ );
812
+
813
+ const workerTransport = this.currentWorker?.getWorkerTransport();
814
+ if (workerTransport) {
815
+ try {
816
+ const enhancedError =
817
+ error instanceof Error ? error : new Error(String(error));
818
+ await workerTransport.signalError(enhancedError);
819
+ } catch (errorSendError) {
820
+ logger.error(
821
+ { traceId, error: errorSendError },
822
+ "Failed to send error to dispatcher"
823
+ );
824
+ }
825
+ }
826
+
827
+ throw error;
828
+ } finally {
829
+ if (this.currentWorker) {
830
+ try {
831
+ await this.currentWorker.cleanup();
832
+ } catch (cleanupError) {
833
+ logger.error(
834
+ { traceId, error: cleanupError },
835
+ "Error during worker cleanup"
836
+ );
837
+ }
838
+ this.currentWorker = null;
839
+ }
840
+ }
841
+ }
842
+
843
+ private payloadToWorkerConfig(payload: MessagePayload): WorkerConfig {
844
+ const conversationId = payload.conversationId || "default";
845
+ const platformMetadata = payload.platformMetadata;
846
+
847
+ const agentOptions = {
848
+ ...(payload.agentOptions || {}),
849
+ ...(payload.agentOptions?.allowedTools
850
+ ? { allowedTools: payload.agentOptions.allowedTools }
851
+ : {}),
852
+ ...(payload.agentOptions?.disallowedTools
853
+ ? { disallowedTools: payload.agentOptions.disallowedTools }
854
+ : {}),
855
+ ...(payload.agentOptions?.timeoutMinutes
856
+ ? { timeoutMinutes: payload.agentOptions.timeoutMinutes }
857
+ : {}),
858
+ };
859
+
860
+ return {
861
+ sessionKey: `session-${conversationId}`,
862
+ userId: payload.userId,
863
+ agentId: payload.agentId,
864
+ channelId: payload.channelId,
865
+ conversationId,
866
+ userPrompt: Buffer.from(payload.messageText).toString("base64"),
867
+ responseChannel: String(
868
+ platformMetadata.responseChannel || payload.channelId
869
+ ),
870
+ responseId: String(platformMetadata.responseId || payload.messageId),
871
+ botResponseId: platformMetadata.botResponseId
872
+ ? String(platformMetadata.botResponseId)
873
+ : undefined,
874
+ // Check both payload.teamId (WhatsApp) and platformMetadata.teamId (Slack)
875
+ teamId:
876
+ (payload.teamId ?? platformMetadata.teamId)
877
+ ? String(payload.teamId ?? platformMetadata.teamId)
878
+ : undefined,
879
+ platform: payload.platform,
880
+ platformMetadata: platformMetadata, // Include full platformMetadata for files and other metadata
881
+ agentOptions: JSON.stringify(agentOptions),
882
+ workspace: {
883
+ baseDirectory: process.env.WORKSPACE_DIR || "/workspace",
884
+ },
885
+ };
886
+ }
887
+
888
+ /**
889
+ * Cleanup resources after event handling errors exceed threshold
890
+ */
891
+ private async cleanupOnEventError(eventType: string): Promise<void> {
892
+ logger.warn(
893
+ `Cleaning up after ${this.eventErrorCount} event handling errors (last: ${eventType})`
894
+ );
895
+
896
+ try {
897
+ // Clean up current worker if it exists
898
+ if (this.currentWorker) {
899
+ logger.info("Cleaning up current worker due to event errors");
900
+ try {
901
+ await this.currentWorker.cleanup();
902
+ } catch (cleanupError) {
903
+ logger.error("Worker cleanup failed:", cleanupError);
904
+ }
905
+ this.currentWorker = null;
906
+ }
907
+
908
+ // Reset current job
909
+ if (this.currentJobId) {
910
+ logger.info(`Clearing stuck job: ${this.currentJobId}`);
911
+ this.currentJobId = undefined;
912
+ }
913
+
914
+ // Abort SSE connection to trigger reconnect
915
+ if (this.abortController) {
916
+ logger.info("Aborting SSE connection to trigger reconnect");
917
+ this.abortController.abort();
918
+ this.abortController = undefined;
919
+ }
920
+
921
+ // Reset error count after cleanup
922
+ this.eventErrorCount = 0;
923
+
924
+ logger.info("Event error cleanup completed, will reconnect");
925
+ } catch (cleanupError) {
926
+ logger.error("Fatal error during event error cleanup:", cleanupError);
927
+ // Last resort: stop the client entirely
928
+ this.isRunning = false;
929
+ }
930
+ }
931
+
932
+ isHealthy(): boolean {
933
+ return this.isRunning && !this.messageBatcher.isCurrentlyProcessing();
934
+ }
935
+
936
+ getStatus(): {
937
+ isRunning: boolean;
938
+ isProcessing: boolean;
939
+ userId: string;
940
+ deploymentName: string;
941
+ pendingMessages: number;
942
+ } {
943
+ return {
944
+ isRunning: this.isRunning,
945
+ isProcessing: this.messageBatcher.isCurrentlyProcessing(),
946
+ userId: this.userId,
947
+ deploymentName: this.deploymentName,
948
+ pendingMessages: this.messageBatcher.getPendingCount(),
949
+ };
950
+ }
951
+ }