@lobu/worker 6.1.1 → 7.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. package/dist/core/error-handler.d.ts +0 -4
  2. package/dist/core/error-handler.d.ts.map +1 -1
  3. package/dist/core/error-handler.js +4 -15
  4. package/dist/core/error-handler.js.map +1 -1
  5. package/dist/core/types.d.ts +1 -19
  6. package/dist/core/types.d.ts.map +1 -1
  7. package/dist/core/types.js +0 -4
  8. package/dist/core/types.js.map +1 -1
  9. package/dist/core/workspace.d.ts +2 -11
  10. package/dist/core/workspace.d.ts.map +1 -1
  11. package/dist/core/workspace.js +14 -36
  12. package/dist/core/workspace.js.map +1 -1
  13. package/dist/embedded/just-bash-bootstrap.d.ts.map +1 -1
  14. package/dist/embedded/just-bash-bootstrap.js +60 -6
  15. package/dist/embedded/just-bash-bootstrap.js.map +1 -1
  16. package/dist/embedded/mcp-cli-commands.d.ts.map +1 -1
  17. package/dist/embedded/mcp-cli-commands.js +3 -38
  18. package/dist/embedded/mcp-cli-commands.js.map +1 -1
  19. package/dist/gateway/gateway-integration.js +4 -4
  20. package/dist/gateway/gateway-integration.js.map +1 -1
  21. package/dist/gateway/message-batcher.d.ts.map +1 -1
  22. package/dist/gateway/message-batcher.js +3 -5
  23. package/dist/gateway/message-batcher.js.map +1 -1
  24. package/dist/gateway/sse-client.d.ts +1 -0
  25. package/dist/gateway/sse-client.d.ts.map +1 -1
  26. package/dist/gateway/sse-client.js +52 -8
  27. package/dist/gateway/sse-client.js.map +1 -1
  28. package/dist/index.d.ts.map +1 -1
  29. package/dist/index.js +7 -24
  30. package/dist/index.js.map +1 -1
  31. package/dist/instructions/builder.d.ts.map +1 -1
  32. package/dist/instructions/builder.js +2 -1
  33. package/dist/instructions/builder.js.map +1 -1
  34. package/dist/openclaw/plugin-loader.d.ts.map +1 -1
  35. package/dist/openclaw/plugin-loader.js +8 -19
  36. package/dist/openclaw/plugin-loader.js.map +1 -1
  37. package/dist/openclaw/processor.d.ts.map +1 -1
  38. package/dist/openclaw/processor.js +2 -0
  39. package/dist/openclaw/processor.js.map +1 -1
  40. package/dist/openclaw/sandbox-leak.d.ts.map +1 -1
  41. package/dist/openclaw/sandbox-leak.js +1 -6
  42. package/dist/openclaw/sandbox-leak.js.map +1 -1
  43. package/dist/openclaw/session-context.d.ts.map +1 -1
  44. package/dist/openclaw/session-context.js +3 -0
  45. package/dist/openclaw/session-context.js.map +1 -1
  46. package/dist/openclaw/tool-policy.d.ts.map +1 -1
  47. package/dist/openclaw/tool-policy.js +5 -11
  48. package/dist/openclaw/tool-policy.js.map +1 -1
  49. package/dist/openclaw/worker.d.ts +0 -1
  50. package/dist/openclaw/worker.d.ts.map +1 -1
  51. package/dist/openclaw/worker.js +19 -85
  52. package/dist/openclaw/worker.js.map +1 -1
  53. package/dist/server.d.ts.map +1 -1
  54. package/dist/server.js +3 -40
  55. package/dist/server.js.map +1 -1
  56. package/dist/shared/audio-provider-suggestions.d.ts.map +1 -1
  57. package/dist/shared/audio-provider-suggestions.js +4 -6
  58. package/dist/shared/audio-provider-suggestions.js.map +1 -1
  59. package/dist/shared/tool-implementations.d.ts.map +1 -1
  60. package/dist/shared/tool-implementations.js +99 -37
  61. package/dist/shared/tool-implementations.js.map +1 -1
  62. package/package.json +14 -4
  63. package/src/__tests__/audio-provider-suggestions.test.ts +199 -0
  64. package/src/__tests__/custom-tools.test.ts +92 -0
  65. package/src/__tests__/embedded-just-bash-bootstrap.test.ts +128 -0
  66. package/src/__tests__/embedded-mcp-cli-bash.test.ts +179 -0
  67. package/src/__tests__/embedded-tools.test.ts +744 -0
  68. package/src/__tests__/exec-sandbox-extra.test.ts +0 -0
  69. package/src/__tests__/exec-sandbox.test.ts +550 -0
  70. package/src/__tests__/generated-media.test.ts +142 -0
  71. package/src/__tests__/instructions.test.ts +60 -0
  72. package/src/__tests__/mcp-cli-commands-extra.test.ts +478 -0
  73. package/src/__tests__/mcp-cli-commands.test.ts +383 -0
  74. package/src/__tests__/mcp-tool-call.test.ts +423 -0
  75. package/src/__tests__/memory-flush-harden.test.ts +367 -0
  76. package/src/__tests__/memory-flush-runtime.test.ts +138 -0
  77. package/src/__tests__/memory-flush.test.ts +64 -0
  78. package/src/__tests__/message-batcher.test.ts +247 -0
  79. package/src/__tests__/model-resolver-harden.test.ts +197 -0
  80. package/src/__tests__/model-resolver.test.ts +156 -0
  81. package/src/__tests__/processor-harden.test.ts +259 -0
  82. package/src/__tests__/processor.test.ts +225 -0
  83. package/src/__tests__/replace-base-prompt-identity.test.ts +41 -0
  84. package/src/__tests__/sandbox-leak-harden.test.ts +200 -0
  85. package/src/__tests__/sandbox-leak.test.ts +167 -0
  86. package/src/__tests__/setup.ts +102 -0
  87. package/src/__tests__/sse-client-harden.test.ts +588 -0
  88. package/src/__tests__/sse-client.test.ts +90 -0
  89. package/src/__tests__/tool-implementations.test.ts +196 -0
  90. package/src/__tests__/tool-policy-edge-cases.test.ts +263 -0
  91. package/src/__tests__/tool-policy.test.ts +269 -0
  92. package/src/__tests__/worker.test.ts +89 -0
  93. package/src/core/error-handler.ts +47 -0
  94. package/src/core/project-scanner.ts +65 -0
  95. package/src/core/types.ts +94 -0
  96. package/src/core/workspace.ts +66 -0
  97. package/src/embedded/exec-sandbox.ts +372 -0
  98. package/src/embedded/just-bash-bootstrap.ts +575 -0
  99. package/src/embedded/mcp-cli-commands.ts +405 -0
  100. package/src/gateway/gateway-integration.ts +298 -0
  101. package/src/gateway/message-batcher.ts +123 -0
  102. package/src/gateway/sse-client.ts +988 -0
  103. package/src/gateway/types.ts +68 -0
  104. package/src/index.ts +123 -0
  105. package/src/instructions/builder.ts +44 -0
  106. package/src/instructions/providers.ts +27 -0
  107. package/src/modules/lifecycle.ts +92 -0
  108. package/src/openclaw/custom-tools.ts +315 -0
  109. package/src/openclaw/instructions.ts +36 -0
  110. package/src/openclaw/model-resolver.ts +150 -0
  111. package/src/openclaw/plugin-loader.ts +423 -0
  112. package/src/openclaw/processor.ts +199 -0
  113. package/src/openclaw/sandbox-leak.ts +100 -0
  114. package/src/openclaw/session-context.ts +323 -0
  115. package/src/openclaw/tool-policy.ts +241 -0
  116. package/src/openclaw/tools.ts +277 -0
  117. package/src/openclaw/worker.ts +1836 -0
  118. package/src/server.ts +330 -0
  119. package/src/shared/audio-provider-suggestions.ts +130 -0
  120. package/src/shared/processor-utils.ts +33 -0
  121. package/src/shared/provider-auth-hints.ts +68 -0
  122. package/src/shared/tool-display-config.ts +75 -0
  123. package/src/shared/tool-implementations.ts +981 -0
  124. package/src/shared/worker-env-keys.ts +8 -0
@@ -0,0 +1,988 @@
1
+ /**
2
+ * SSE client for receiving jobs from dispatcher
3
+ */
4
+
5
+ import { spawn } from "node:child_process";
6
+ import {
7
+ createChildSpan,
8
+ createLogger,
9
+ extractTraceId,
10
+ flushTracing,
11
+ SpanStatusCode,
12
+ stripEnv,
13
+ } from "@lobu/core";
14
+ import { z } from "zod";
15
+ import type { WorkerConfig, WorkerExecutor } from "../core/types";
16
+ import { SENSITIVE_WORKER_ENV_KEYS } from "../shared/worker-env-keys";
17
+ import { HttpWorkerTransport } from "./gateway-integration";
18
+ import { MessageBatcher } from "./message-batcher";
19
+ import type { MessagePayload, QueuedMessage } from "./types";
20
+
21
+ const logger = createLogger("sse-client");
22
+
23
+ type AbortControllerLike = {
24
+ abort(): void;
25
+ readonly signal: AbortSignal;
26
+ };
27
+
28
+ // --- Pending config change notifications ---
29
+
30
+ interface ConfigChangeEntry {
31
+ category: string;
32
+ action: string;
33
+ summary: string;
34
+ details?: string[];
35
+ }
36
+
37
+ const pendingConfigNotifications: ConfigChangeEntry[] = [];
38
+
39
+ /**
40
+ * Returns and clears all pending config change notifications.
41
+ * Called by the worker before building the next prompt.
42
+ */
43
+ export function consumePendingConfigNotifications(): ConfigChangeEntry[] {
44
+ if (pendingConfigNotifications.length === 0) return [];
45
+ return pendingConfigNotifications.splice(0);
46
+ }
47
+
48
+ // Zod schemas for runtime validation of SSE event data
49
+ const ConnectedEventSchema = z.object({
50
+ deploymentName: z.string(),
51
+ });
52
+
53
+ // Platform metadata is a transport envelope for platform-specific details.
54
+ // Known chat fields are typed below, but gateway callers may include nested
55
+ // objects such as watcher run intent metadata, file descriptors, or provider
56
+ // context. Preserve those values instead of rejecting otherwise valid jobs.
57
+ const PlatformMetadataSchema = z
58
+ .object({
59
+ team_id: z.string().optional(),
60
+ channel: z.string().optional(),
61
+ ts: z.string().optional(),
62
+ thread_ts: z.string().optional(),
63
+ files: z.array(z.any()).optional(),
64
+ })
65
+ .catchall(z.unknown());
66
+
67
+ // AgentOptions has known fields plus arbitrary extra fields (including nested objects)
68
+ const AgentOptionsSchema = z
69
+ .object({
70
+ runtime: z.string().optional(),
71
+ model: z.string().optional(),
72
+ maxTokens: z.number().optional(),
73
+ temperature: z.number().optional(),
74
+ allowedTools: z.union([z.string(), z.array(z.string())]).optional(),
75
+ disallowedTools: z.union([z.string(), z.array(z.string())]).optional(),
76
+ timeoutMinutes: z.union([z.number(), z.string()]).optional(),
77
+ // Additional settings passed through from gateway
78
+ networkConfig: z.any().optional(),
79
+ envVars: z.any().optional(),
80
+ })
81
+ .passthrough();
82
+
83
+ const JobEventSchema = z.object({
84
+ payload: z.object({
85
+ botId: z.string(),
86
+ userId: z.string(),
87
+ agentId: z.string(),
88
+ conversationId: z.string(),
89
+ platform: z.string(),
90
+ channelId: z.string(),
91
+ messageId: z.string(),
92
+ messageText: z.string(),
93
+ platformMetadata: PlatformMetadataSchema,
94
+ agentOptions: AgentOptionsSchema,
95
+ jobId: z.string().optional(),
96
+ teamId: z.string().optional(), // Optional for WhatsApp (top-level) and Slack (in platformMetadata)
97
+ }),
98
+ processedIds: z.array(z.string()).optional(),
99
+ });
100
+
101
+ /**
102
+ * Gateway client for workers - connects to dispatcher via SSE
103
+ * Receives jobs via SSE stream, sends responses via HTTP POST
104
+ */
105
+ export class GatewayClient {
106
+ private dispatcherUrl: string;
107
+ private workerToken: string;
108
+ private userId: string;
109
+ private deploymentName: string;
110
+ private isRunning = false;
111
+ private currentWorker: WorkerExecutor | null = null;
112
+ private abortController?: AbortControllerLike;
113
+ private currentJobId?: string;
114
+ private currentTraceId?: string; // Trace ID for end-to-end observability
115
+ private currentTraceparent?: string; // W3C traceparent for distributed tracing
116
+ private reconnectAttempts = 0;
117
+ private maxReconnectAttempts = 10;
118
+ private messageBatcher: MessageBatcher;
119
+ private eventErrorCount = 0;
120
+ private eventErrorThreshold = 10;
121
+ private httpPort?: number;
122
+
123
+ constructor(
124
+ dispatcherUrl: string,
125
+ workerToken: string,
126
+ userId: string,
127
+ deploymentName: string,
128
+ httpPort?: number
129
+ ) {
130
+ this.dispatcherUrl = dispatcherUrl;
131
+ this.workerToken = workerToken;
132
+ this.userId = userId;
133
+ this.deploymentName = deploymentName;
134
+ this.httpPort = httpPort;
135
+ // Get initial traceId from environment (set by deployment)
136
+ this.currentTraceId = process.env.TRACE_ID;
137
+
138
+ this.messageBatcher = new MessageBatcher({
139
+ onBatchReady: async (messages) => {
140
+ await this.processBatchedMessages(messages);
141
+ },
142
+ });
143
+
144
+ logger.info(
145
+ { traceId: this.currentTraceId, deploymentName },
146
+ "Worker connected"
147
+ );
148
+ }
149
+
150
+ async start(): Promise<void> {
151
+ this.isRunning = true;
152
+
153
+ while (this.isRunning) {
154
+ try {
155
+ await this.connectAndListen();
156
+ if (!this.isRunning) break;
157
+ await this.handleReconnect();
158
+ } catch (error) {
159
+ if (error instanceof Error && error.name === "AbortError") {
160
+ logger.info("SSE connection aborted");
161
+ break;
162
+ }
163
+ logger.error("SSE connection error:", error);
164
+ if (!this.isRunning) break;
165
+ await this.handleReconnect();
166
+ }
167
+ }
168
+ if (this.reconnectsExhausted) {
169
+ // Don't return normally — a caller that logs "started successfully" and
170
+ // then awaits forever would leave a zombie process holding its
171
+ // workspace/port that never receives jobs and never exits.
172
+ throw new Error("Gateway worker exhausted reconnect attempts");
173
+ }
174
+ }
175
+
176
+ private async connectAndListen(): Promise<void> {
177
+ // Abort previous controller before creating a new one
178
+ if (this.abortController) {
179
+ this.abortController.abort();
180
+ }
181
+ const abortController =
182
+ new globalThis.AbortController() as AbortControllerLike;
183
+ this.abortController = abortController;
184
+ const streamUrl = this.httpPort
185
+ ? `${this.dispatcherUrl}/worker/stream?httpPort=${this.httpPort}`
186
+ : `${this.dispatcherUrl}/worker/stream`;
187
+
188
+ logger.info(
189
+ `Connecting to dispatcher at ${streamUrl} (attempt ${this.reconnectAttempts + 1})`
190
+ );
191
+
192
+ const response = await fetch(streamUrl, {
193
+ method: "GET",
194
+ headers: {
195
+ Authorization: `Bearer ${this.workerToken}`,
196
+ Accept: "text/event-stream",
197
+ },
198
+ signal: abortController.signal,
199
+ });
200
+
201
+ if (!response.ok) {
202
+ throw new Error(
203
+ `Failed to connect to dispatcher: ${response.status} ${response.statusText}`
204
+ );
205
+ }
206
+
207
+ logger.info("✅ Connected to dispatcher via SSE");
208
+ this.reconnectAttempts = 0;
209
+
210
+ const reader = response.body?.getReader();
211
+ const decoder = new TextDecoder();
212
+
213
+ if (!reader) {
214
+ throw new Error("No response body");
215
+ }
216
+
217
+ let buffer = "";
218
+
219
+ logger.info("[SSE-CLIENT] 🔄 Starting SSE stream reading loop");
220
+
221
+ while (this.isRunning) {
222
+ const { done, value } = await reader.read();
223
+
224
+ if (done) {
225
+ logger.info("[SSE-CLIENT] SSE stream ended");
226
+ break;
227
+ }
228
+
229
+ const chunk = decoder.decode(value, { stream: true });
230
+ logger.debug(
231
+ `[SSE-CLIENT] 📨 Received chunk: ${chunk.substring(0, 200)}`
232
+ );
233
+ buffer += chunk;
234
+
235
+ const events = buffer.split("\n\n");
236
+ buffer = events.pop() || "";
237
+
238
+ logger.debug(
239
+ `[SSE-CLIENT] 📊 Parsed ${events.length} events from buffer`
240
+ );
241
+
242
+ for (const event of events) {
243
+ if (!event.trim()) continue;
244
+
245
+ const lines = event.split("\n");
246
+ let eventType = "message";
247
+ let eventData = "";
248
+
249
+ for (const line of lines) {
250
+ if (line.startsWith("event:")) {
251
+ eventType = line.substring(6).trim();
252
+ } else if (line.startsWith("data:")) {
253
+ eventData = line.substring(5).trim();
254
+ }
255
+ }
256
+
257
+ if (eventData) {
258
+ logger.info(`[SSE-CLIENT] 🎯 Processing event type: ${eventType}`);
259
+ // Don't await - fire async to avoid blocking SSE reading loop
260
+ this.handleEvent(eventType, eventData).catch((error) => {
261
+ this.eventErrorCount++;
262
+ logger.error(
263
+ `[SSE-CLIENT] Error handling ${eventType} event (error ${this.eventErrorCount}/${this.eventErrorThreshold}):`,
264
+ error
265
+ );
266
+
267
+ // Trigger cleanup if too many errors
268
+ if (this.eventErrorCount >= this.eventErrorThreshold) {
269
+ logger.error(
270
+ `❌ Event error threshold reached (${this.eventErrorCount} errors). Triggering cleanup...`
271
+ );
272
+ this.cleanupOnEventError(eventType).catch((cleanupErr) => {
273
+ logger.error(
274
+ "Failed to cleanup after event errors:",
275
+ cleanupErr
276
+ );
277
+ });
278
+ }
279
+ });
280
+ }
281
+ }
282
+ }
283
+ }
284
+
285
+ /**
286
+ * Send a quick delivery receipt to the gateway confirming job was received.
287
+ * Fire-and-forget — don't block job processing on the receipt send.
288
+ */
289
+ private sendDeliveryReceipt(jobId: string): void {
290
+ const url = `${this.dispatcherUrl}/worker/response`;
291
+ fetch(url, {
292
+ method: "POST",
293
+ headers: {
294
+ "Content-Type": "application/json",
295
+ Authorization: `Bearer ${this.workerToken}`,
296
+ },
297
+ body: JSON.stringify({ jobId, received: true }),
298
+ signal: AbortSignal.timeout(10_000),
299
+ }).catch((err) => {
300
+ logger.warn(`Failed to send delivery receipt for job ${jobId}:`, err);
301
+ });
302
+ }
303
+
304
+ /**
305
+ * Send a heartbeat ACK back to the gateway so stale cleanup is based on
306
+ * verified inbound worker activity rather than outbound SSE writes.
307
+ */
308
+ private sendHeartbeatAck(): void {
309
+ const url = `${this.dispatcherUrl}/worker/response`;
310
+ fetch(url, {
311
+ method: "POST",
312
+ headers: {
313
+ "Content-Type": "application/json",
314
+ Authorization: `Bearer ${this.workerToken}`,
315
+ },
316
+ body: JSON.stringify({ received: true, heartbeat: true }),
317
+ signal: AbortSignal.timeout(10_000),
318
+ }).catch((err) => {
319
+ logger.warn("Failed to send heartbeat ACK:", err);
320
+ });
321
+ }
322
+
323
+ private reconnectsExhausted = false;
324
+
325
+ private async handleReconnect(): Promise<void> {
326
+ if (this.reconnectAttempts >= this.maxReconnectAttempts) {
327
+ logger.error("Max reconnection attempts reached, giving up");
328
+ this.reconnectsExhausted = true;
329
+ this.isRunning = false;
330
+ return;
331
+ }
332
+
333
+ this.reconnectAttempts++;
334
+ const delay = Math.min(1000 * 2 ** (this.reconnectAttempts - 1), 60000);
335
+
336
+ logger.info(
337
+ `Reconnecting in ${delay}ms (attempt ${this.reconnectAttempts}/${this.maxReconnectAttempts})...`
338
+ );
339
+
340
+ await new Promise((resolve) => setTimeout(resolve, delay));
341
+ }
342
+
343
+ async stop(): Promise<void> {
344
+ try {
345
+ this.isRunning = false;
346
+
347
+ if (this.abortController) {
348
+ this.abortController.abort();
349
+ }
350
+
351
+ this.messageBatcher.stop();
352
+
353
+ if (this.currentWorker) {
354
+ await this.currentWorker.cleanup();
355
+ this.currentWorker = null;
356
+ }
357
+
358
+ logger.info("✅ Gateway client stopped");
359
+ } catch (error) {
360
+ logger.error("Error stopping gateway client:", error);
361
+ throw error;
362
+ }
363
+ }
364
+
365
+ private async handleEvent(eventType: string, data: string): Promise<void> {
366
+ try {
367
+ if (eventType === "connected") {
368
+ const parsedData = JSON.parse(data);
369
+ const validationResult = ConnectedEventSchema.safeParse(parsedData);
370
+
371
+ if (!validationResult.success) {
372
+ logger.error(
373
+ "Invalid connected event data:",
374
+ validationResult.error.format()
375
+ );
376
+ throw new Error(
377
+ `Connected event validation failed: ${validationResult.error.message}`
378
+ );
379
+ }
380
+
381
+ const connData = validationResult.data;
382
+ logger.info(
383
+ `Connected to dispatcher for deployment ${connData.deploymentName}`
384
+ );
385
+ return;
386
+ }
387
+
388
+ if (eventType === "ping") {
389
+ logger.debug("Received heartbeat ping from dispatcher");
390
+ this.sendHeartbeatAck();
391
+ return;
392
+ }
393
+
394
+ if (eventType === "config_changed") {
395
+ logger.info(
396
+ "Received config_changed event from gateway, invalidating session context cache"
397
+ );
398
+ const { invalidateSessionContextCache } = await import(
399
+ "../openclaw/session-context"
400
+ );
401
+ invalidateSessionContextCache();
402
+
403
+ // Parse and queue config change notifications for the next prompt
404
+ try {
405
+ const parsed = JSON.parse(data);
406
+ const changes = Array.isArray(parsed?.changes)
407
+ ? (parsed.changes as ConfigChangeEntry[])
408
+ : [];
409
+ if (changes.length > 0) {
410
+ pendingConfigNotifications.push(...changes);
411
+ logger.info(
412
+ `Queued ${changes.length} config change notification(s)`
413
+ );
414
+ }
415
+ } catch {
416
+ // Backward compat: old gateway may send empty or invalid payload
417
+ }
418
+ return;
419
+ }
420
+
421
+ if (eventType === "job") {
422
+ try {
423
+ const parsedData = JSON.parse(data);
424
+ const validationResult = JobEventSchema.safeParse(parsedData);
425
+
426
+ if (!validationResult.success) {
427
+ logger.error(
428
+ "Invalid job event data:",
429
+ validationResult.error.format()
430
+ );
431
+ logger.debug(`Raw job data: ${data}`);
432
+ throw new Error(
433
+ `Job event validation failed: ${validationResult.error.message}`
434
+ );
435
+ }
436
+
437
+ // Send delivery receipt immediately so the gateway knows
438
+ // the job was actually received (not lost to a stale SSE connection).
439
+ // jobId is at the top level of the SSE event (set by job-router),
440
+ // not inside the validated payload.
441
+ const jobId = parsedData.jobId as string | undefined;
442
+ if (jobId) {
443
+ this.sendDeliveryReceipt(jobId);
444
+ }
445
+
446
+ // Zod validates structure but passthrough allows extra fields
447
+ // The validated payload matches MessagePayload interface
448
+ await this.handleThreadMessage(validationResult.data.payload);
449
+ } catch (parseError) {
450
+ logger.error(
451
+ `Failed to parse or validate job event data:`,
452
+ parseError
453
+ );
454
+ logger.debug(`Raw job data: ${data}`);
455
+ }
456
+ return;
457
+ }
458
+
459
+ logger.warn(
460
+ `[DEBUG] Unknown SSE event type: ${eventType}, data: ${data}`
461
+ );
462
+ } catch (error) {
463
+ logger.error(`Error handling event ${eventType}:`, error);
464
+ }
465
+ }
466
+
467
+ private async handleThreadMessage(data: MessagePayload): Promise<void> {
468
+ // Extract traceparent for distributed tracing
469
+ // Prefer platformMetadata.traceparent, fall back to TRACEPARENT env var
470
+ const traceparent =
471
+ (data.platformMetadata?.traceparent as string) || process.env.TRACEPARENT;
472
+ this.currentTraceparent = traceparent;
473
+
474
+ // Extract traceId for logging (backwards compatible)
475
+ const traceId =
476
+ extractTraceId(data) || this.currentTraceId || process.env.TRACE_ID;
477
+ this.currentTraceId = traceId;
478
+
479
+ const conversationId = data.conversationId;
480
+
481
+ if (data.jobId) {
482
+ this.currentJobId = data.jobId;
483
+ // Create child span for job received (linked to parent via traceparent)
484
+ const span = createChildSpan("job_received", traceparent, {
485
+ "lobu.job_id": data.jobId,
486
+ "lobu.message_id": data.messageId,
487
+ "lobu.conversation_id": conversationId,
488
+ "lobu.job_type": data.jobType || "message",
489
+ });
490
+ span?.setStatus({ code: SpanStatusCode.OK });
491
+ span?.end();
492
+ // Flush job_received span immediately
493
+ void flushTracing();
494
+ logger.info(
495
+ {
496
+ traceparent,
497
+ traceId,
498
+ jobId: data.jobId,
499
+ messageId: data.messageId,
500
+ jobType: data.jobType,
501
+ },
502
+ "Job received"
503
+ );
504
+ }
505
+
506
+ // No per-user filtering here: deployment names intentionally hash only
507
+ // `platform:channelId:conversationId` (see `generateDeploymentName` in
508
+ // base-deployment-manager.ts) so a channel/thread has ONE shared worker
509
+ // across all posting users. DMs are single-participant, so a check would
510
+ // be dead there too. The WORKER_TOKEN-scoped-to-spawning-user tradeoff
511
+ // for shared channel workers is acknowledged and deferred to per-message
512
+ // JWT minting — gating here would break the core group-bot design.
513
+
514
+ // Check job type and dispatch accordingly
515
+ if (data.jobType === "exec") {
516
+ await this.handleExecJob(data);
517
+ return;
518
+ }
519
+
520
+ // Default: message job
521
+ const queuedMessage: QueuedMessage = {
522
+ payload: data,
523
+ timestamp: Date.now(),
524
+ };
525
+
526
+ await this.messageBatcher.addMessage(queuedMessage);
527
+ logger.info(
528
+ { traceId, messageId: data.messageId, conversationId },
529
+ "Message queued for processing"
530
+ );
531
+ }
532
+
533
+ /**
534
+ * Handle exec job - spawn command in sandbox and stream output back
535
+ */
536
+ private async handleExecJob(data: MessagePayload): Promise<void> {
537
+ const { execId, execCommand, execCwd, execEnv, execTimeout } = data;
538
+ const conversationId = data.conversationId;
539
+ const traceId = this.currentTraceId;
540
+ const traceparent = this.currentTraceparent;
541
+
542
+ if (!execId || !execCommand) {
543
+ logger.error(
544
+ { traceId, execId },
545
+ "Invalid exec job: missing execId or execCommand"
546
+ );
547
+ return;
548
+ }
549
+
550
+ logger.info(
551
+ { traceId, execId, command: execCommand.substring(0, 100) },
552
+ "Executing command in sandbox"
553
+ );
554
+
555
+ // Create span for exec execution
556
+ const span = createChildSpan("exec_execution", traceparent, {
557
+ "lobu.exec_id": execId,
558
+ "lobu.command": execCommand.substring(0, 100),
559
+ });
560
+
561
+ // Determine working directory
562
+ const workingDir = execCwd || process.env.WORKSPACE_DIR || "/workspace";
563
+ const timeout = execTimeout || 300000; // 5 minutes default
564
+
565
+ // Create transport for sending responses back to gateway
566
+ const transport = new HttpWorkerTransport({
567
+ gatewayUrl: this.dispatcherUrl,
568
+ workerToken: this.workerToken,
569
+ userId: data.userId,
570
+ channelId: data.channelId,
571
+ conversationId,
572
+ originalMessageTs: execId,
573
+ teamId: data.teamId || "api",
574
+ platform: data.platform,
575
+ platformMetadata: data.platformMetadata,
576
+ });
577
+
578
+ let completed = false;
579
+ let sigkillTimer: NodeJS.Timeout | null = null;
580
+
581
+ try {
582
+ // Strip the worker's own gateway credentials before handing the shell
583
+ // its env. An `exec` command is an arbitrary string from the gateway
584
+ // that ends up under `sh -c`; leaking WORKER_TOKEN / DISPATCHER_URL
585
+ // into that environment would let a malicious or buggy exec impersonate
586
+ // the worker against its own gateway. The bash-tool and just-bash
587
+ // spawners already apply the same filter (see openclaw/tools.ts and
588
+ // embedded/just-bash-bootstrap.ts) — keep parity here.
589
+ const baseEnv = stripEnv(process.env, SENSITIVE_WORKER_ENV_KEYS);
590
+ const proc = spawn("sh", ["-c", execCommand], {
591
+ cwd: workingDir,
592
+ env: { ...baseEnv, ...execEnv },
593
+ stdio: ["ignore", "pipe", "pipe"],
594
+ });
595
+
596
+ // Setup timeout. The SIGKILL escalation timer is tracked so the `close`
597
+ // handler can clear it when the child exits between SIGTERM and SIGKILL;
598
+ // otherwise the timer pins the event loop for an extra 5s after every
599
+ // timed-out exec and (worse) leaks if `close`/`error` never fires.
600
+ const timeoutId = setTimeout(() => {
601
+ if (!completed) {
602
+ logger.warn(
603
+ { traceId, execId },
604
+ "Exec timeout reached, killing process"
605
+ );
606
+ proc.kill("SIGTERM");
607
+ sigkillTimer = setTimeout(() => {
608
+ sigkillTimer = null;
609
+ if (!completed) {
610
+ proc.kill("SIGKILL");
611
+ }
612
+ }, 5000);
613
+ }
614
+ }, timeout);
615
+
616
+ // Stream stdout
617
+ const onStdout = (chunk: Buffer) => {
618
+ const content = chunk.toString();
619
+ transport.sendExecOutput(execId, "stdout", content).catch((err) => {
620
+ logger.error(
621
+ { traceId, execId, error: err },
622
+ "Failed to send stdout"
623
+ );
624
+ });
625
+ };
626
+ proc.stdout?.on("data", onStdout);
627
+
628
+ // Stream stderr
629
+ const onStderr = (chunk: Buffer) => {
630
+ const content = chunk.toString();
631
+ transport.sendExecOutput(execId, "stderr", content).catch((err) => {
632
+ logger.error(
633
+ { traceId, execId, error: err },
634
+ "Failed to send stderr"
635
+ );
636
+ });
637
+ };
638
+ proc.stderr?.on("data", onStderr);
639
+
640
+ // Wait for process to complete
641
+ const exitCode = await new Promise<number>((resolve, reject) => {
642
+ proc.on("close", (code) => {
643
+ completed = true;
644
+ clearTimeout(timeoutId);
645
+ if (sigkillTimer) {
646
+ clearTimeout(sigkillTimer);
647
+ sigkillTimer = null;
648
+ }
649
+ // Stop accepting late `data` events so a chunk buffered after exit
650
+ // can't fire `sendExecOutput` AFTER we've signalled completion.
651
+ proc.stdout?.removeListener("data", onStdout);
652
+ proc.stderr?.removeListener("data", onStderr);
653
+ resolve(code ?? 0);
654
+ });
655
+
656
+ proc.on("error", (error) => {
657
+ completed = true;
658
+ clearTimeout(timeoutId);
659
+ if (sigkillTimer) {
660
+ clearTimeout(sigkillTimer);
661
+ sigkillTimer = null;
662
+ }
663
+ proc.stdout?.removeListener("data", onStdout);
664
+ proc.stderr?.removeListener("data", onStderr);
665
+ reject(error);
666
+ });
667
+ });
668
+
669
+ // Send completion
670
+ await transport.sendExecComplete(execId, exitCode);
671
+
672
+ span?.setAttribute("lobu.exit_code", exitCode);
673
+ span?.setStatus({ code: SpanStatusCode.OK });
674
+ span?.end();
675
+ await flushTracing();
676
+
677
+ logger.info({ traceId, execId, exitCode }, "Exec completed");
678
+ } catch (error) {
679
+ const errorMessage =
680
+ error instanceof Error ? error.message : String(error);
681
+
682
+ // Send error
683
+ await transport.sendExecError(execId, errorMessage).catch((err) => {
684
+ logger.error(
685
+ { traceId, execId, error: err },
686
+ "Failed to send exec error"
687
+ );
688
+ });
689
+
690
+ span?.setStatus({ code: SpanStatusCode.ERROR, message: errorMessage });
691
+ span?.end();
692
+ await flushTracing();
693
+
694
+ logger.error({ traceId, execId, error: errorMessage }, "Exec failed");
695
+ } finally {
696
+ // Defensive: if we threw before `close`/`error` fired (e.g. transport
697
+ // throwing during sendExecOutput on a long-running child), make sure
698
+ // the SIGKILL escalation timer doesn't outlive this exec.
699
+ if (sigkillTimer) {
700
+ clearTimeout(sigkillTimer);
701
+ sigkillTimer = null;
702
+ }
703
+ this.currentJobId = undefined;
704
+ }
705
+ }
706
+
707
+ private async processBatchedMessages(
708
+ messages: QueuedMessage[]
709
+ ): Promise<void> {
710
+ if (messages.length === 0) return;
711
+
712
+ if (messages.length === 1) {
713
+ const singleMessage = messages[0];
714
+ if (singleMessage) {
715
+ await this.processSingleMessage(singleMessage, [
716
+ singleMessage.payload.messageId,
717
+ ]);
718
+ }
719
+ return;
720
+ }
721
+
722
+ logger.info(`Batching ${messages.length} messages for combined processing`);
723
+
724
+ const firstMessage = messages[0];
725
+ if (!firstMessage) return;
726
+
727
+ const combinedPrompt = messages
728
+ .map((msg, index) => `Message ${index + 1}: ${msg.payload.messageText}`)
729
+ .join("\n\n");
730
+
731
+ const batchedMessage: QueuedMessage = {
732
+ timestamp: firstMessage.timestamp,
733
+ payload: {
734
+ ...firstMessage.payload,
735
+ messageText: combinedPrompt,
736
+ agentOptions: firstMessage.payload.agentOptions,
737
+ },
738
+ };
739
+
740
+ const processedIds = messages
741
+ .map((m) => m.payload.messageId)
742
+ .filter(Boolean);
743
+ await this.processSingleMessage(batchedMessage, processedIds);
744
+ }
745
+
746
+ private async processSingleMessage(
747
+ message: QueuedMessage,
748
+ processedIds?: string[]
749
+ ): Promise<void> {
750
+ // Get traceparent for distributed tracing
751
+ const traceparent =
752
+ (message.payload.platformMetadata?.traceparent as string) ||
753
+ this.currentTraceparent ||
754
+ process.env.TRACEPARENT;
755
+
756
+ const traceId =
757
+ extractTraceId(message.payload) ||
758
+ this.currentTraceId ||
759
+ process.env.TRACE_ID;
760
+
761
+ const conversationId = message.payload.conversationId;
762
+
763
+ // Create child span for agent execution (linked to parent via traceparent)
764
+ const span = createChildSpan("agent_execution", traceparent, {
765
+ "lobu.message_id": message.payload.messageId,
766
+ "lobu.conversation_id": conversationId,
767
+ "lobu.user_id": message.payload.userId,
768
+ "lobu.model": message.payload.agentOptions?.model || "default",
769
+ });
770
+
771
+ try {
772
+ if (!process.env.USER_ID) {
773
+ logger.warn(
774
+ `USER_ID not set in environment, using userId from payload: ${message.payload.userId}`
775
+ );
776
+ process.env.USER_ID = message.payload.userId;
777
+ }
778
+
779
+ const workerConfig = this.payloadToWorkerConfig(message.payload);
780
+
781
+ logger.info(
782
+ {
783
+ traceparent,
784
+ traceId,
785
+ messageId: message.payload.messageId,
786
+ model: message.payload.agentOptions?.model,
787
+ },
788
+ "Agent starting"
789
+ );
790
+
791
+ // Worker will decide whether to continue session based on workspace state
792
+ const { OpenClawWorker } = await import("../openclaw/worker");
793
+ this.currentWorker = new OpenClawWorker(workerConfig);
794
+
795
+ const workerTransport = this.currentWorker.getWorkerTransport();
796
+
797
+ if (workerTransport && workerTransport instanceof HttpWorkerTransport) {
798
+ if (this.currentJobId) {
799
+ workerTransport.setJobId(this.currentJobId);
800
+ }
801
+
802
+ // Set processedMessageIds directly on the integration instance
803
+ workerTransport.processedMessageIds =
804
+ processedIds && processedIds.length > 0
805
+ ? processedIds
806
+ : message.payload.messageId
807
+ ? [message.payload.messageId]
808
+ : [];
809
+ }
810
+
811
+ await this.currentWorker.execute();
812
+
813
+ this.currentJobId = undefined;
814
+
815
+ // Reset error count on successful message processing
816
+ this.eventErrorCount = 0;
817
+
818
+ // End span with success
819
+ span?.setStatus({ code: SpanStatusCode.OK });
820
+ span?.end();
821
+ // Flush traces immediately to ensure spans are exported before worker scales down
822
+ await flushTracing();
823
+ logger.info(
824
+ {
825
+ traceparent,
826
+ messageId: message.payload.messageId,
827
+ conversationId,
828
+ },
829
+ "Agent completed"
830
+ );
831
+ } catch (error) {
832
+ // End span with error
833
+ span?.setStatus({
834
+ code: SpanStatusCode.ERROR,
835
+ message: error instanceof Error ? error.message : String(error),
836
+ });
837
+ span?.end();
838
+ // Flush traces on error too
839
+ await flushTracing();
840
+ logger.error(
841
+ {
842
+ traceparent,
843
+ messageId: message.payload.messageId,
844
+ conversationId,
845
+ error: error instanceof Error ? error.message : String(error),
846
+ },
847
+ "Agent failed"
848
+ );
849
+
850
+ const workerTransport = this.currentWorker?.getWorkerTransport();
851
+ if (workerTransport) {
852
+ try {
853
+ const enhancedError =
854
+ error instanceof Error ? error : new Error(String(error));
855
+ await workerTransport.signalError(enhancedError);
856
+ } catch (errorSendError) {
857
+ logger.error(
858
+ { traceId, error: errorSendError },
859
+ "Failed to send error to dispatcher"
860
+ );
861
+ }
862
+ }
863
+
864
+ throw error;
865
+ } finally {
866
+ if (this.currentWorker) {
867
+ try {
868
+ await this.currentWorker.cleanup();
869
+ } catch (cleanupError) {
870
+ logger.error(
871
+ { traceId, error: cleanupError },
872
+ "Error during worker cleanup"
873
+ );
874
+ }
875
+ this.currentWorker = null;
876
+ }
877
+ }
878
+ }
879
+
880
+ private payloadToWorkerConfig(payload: MessagePayload): WorkerConfig {
881
+ const conversationId = payload.conversationId || "default";
882
+ const platformMetadata = payload.platformMetadata;
883
+
884
+ const agentOptions = {
885
+ ...(payload.agentOptions || {}),
886
+ ...(payload.agentOptions?.allowedTools
887
+ ? { allowedTools: payload.agentOptions.allowedTools }
888
+ : {}),
889
+ ...(payload.agentOptions?.disallowedTools
890
+ ? { disallowedTools: payload.agentOptions.disallowedTools }
891
+ : {}),
892
+ ...(payload.agentOptions?.timeoutMinutes
893
+ ? { timeoutMinutes: payload.agentOptions.timeoutMinutes }
894
+ : {}),
895
+ };
896
+
897
+ return {
898
+ sessionKey: `session-${conversationId}`,
899
+ userId: payload.userId,
900
+ agentId: payload.agentId,
901
+ channelId: payload.channelId,
902
+ conversationId,
903
+ userPrompt: Buffer.from(payload.messageText).toString("base64"),
904
+ responseChannel: String(
905
+ platformMetadata.responseChannel || payload.channelId
906
+ ),
907
+ responseId: String(platformMetadata.responseId || payload.messageId),
908
+ botResponseId: platformMetadata.botResponseId
909
+ ? String(platformMetadata.botResponseId)
910
+ : undefined,
911
+ // Check both payload.teamId (WhatsApp) and platformMetadata.teamId (Slack)
912
+ teamId:
913
+ (payload.teamId ?? platformMetadata.teamId)
914
+ ? String(payload.teamId ?? platformMetadata.teamId)
915
+ : undefined,
916
+ platform: payload.platform,
917
+ platformMetadata: platformMetadata, // Include full platformMetadata for files and other metadata
918
+ agentOptions: JSON.stringify(agentOptions),
919
+ workspace: {
920
+ baseDirectory: process.env.WORKSPACE_DIR || "/workspace",
921
+ },
922
+ };
923
+ }
924
+
925
+ /**
926
+ * Cleanup resources after event handling errors exceed threshold
927
+ */
928
+ private async cleanupOnEventError(eventType: string): Promise<void> {
929
+ logger.warn(
930
+ `Cleaning up after ${this.eventErrorCount} event handling errors (last: ${eventType})`
931
+ );
932
+
933
+ try {
934
+ // Clean up current worker if it exists
935
+ if (this.currentWorker) {
936
+ logger.info("Cleaning up current worker due to event errors");
937
+ try {
938
+ await this.currentWorker.cleanup();
939
+ } catch (cleanupError) {
940
+ logger.error("Worker cleanup failed:", cleanupError);
941
+ }
942
+ this.currentWorker = null;
943
+ }
944
+
945
+ // Reset current job
946
+ if (this.currentJobId) {
947
+ logger.info(`Clearing stuck job: ${this.currentJobId}`);
948
+ this.currentJobId = undefined;
949
+ }
950
+
951
+ // Abort SSE connection to trigger reconnect
952
+ if (this.abortController) {
953
+ logger.info("Aborting SSE connection to trigger reconnect");
954
+ this.abortController.abort();
955
+ this.abortController = undefined;
956
+ }
957
+
958
+ // Reset error count after cleanup
959
+ this.eventErrorCount = 0;
960
+
961
+ logger.info("Event error cleanup completed, will reconnect");
962
+ } catch (cleanupError) {
963
+ logger.error("Fatal error during event error cleanup:", cleanupError);
964
+ // Last resort: stop the client entirely
965
+ this.isRunning = false;
966
+ }
967
+ }
968
+
969
+ isHealthy(): boolean {
970
+ return this.isRunning && !this.messageBatcher.isCurrentlyProcessing();
971
+ }
972
+
973
+ getStatus(): {
974
+ isRunning: boolean;
975
+ isProcessing: boolean;
976
+ userId: string;
977
+ deploymentName: string;
978
+ pendingMessages: number;
979
+ } {
980
+ return {
981
+ isRunning: this.isRunning,
982
+ isProcessing: this.messageBatcher.isCurrentlyProcessing(),
983
+ userId: this.userId,
984
+ deploymentName: this.deploymentName,
985
+ pendingMessages: this.messageBatcher.getPendingCount(),
986
+ };
987
+ }
988
+ }