@lobu/gateway 3.0.8 → 3.0.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (219) hide show
  1. package/dist/api/platform.d.ts.map +1 -1
  2. package/dist/api/platform.js +8 -26
  3. package/dist/api/platform.js.map +1 -1
  4. package/dist/auth/mcp/proxy.d.ts +14 -0
  5. package/dist/auth/mcp/proxy.d.ts.map +1 -1
  6. package/dist/auth/mcp/proxy.js +149 -13
  7. package/dist/auth/mcp/proxy.js.map +1 -1
  8. package/dist/cli/gateway.d.ts.map +1 -1
  9. package/dist/cli/gateway.js +29 -0
  10. package/dist/cli/gateway.js.map +1 -1
  11. package/dist/cli/index.js +2 -2
  12. package/dist/cli/index.js.map +1 -1
  13. package/dist/connections/chat-instance-manager.d.ts.map +1 -1
  14. package/dist/connections/chat-instance-manager.js +2 -1
  15. package/dist/connections/chat-instance-manager.js.map +1 -1
  16. package/dist/connections/interaction-bridge.d.ts +9 -2
  17. package/dist/connections/interaction-bridge.d.ts.map +1 -1
  18. package/dist/connections/interaction-bridge.js +132 -230
  19. package/dist/connections/interaction-bridge.js.map +1 -1
  20. package/dist/connections/message-handler-bridge.d.ts.map +1 -1
  21. package/dist/connections/message-handler-bridge.js +44 -26
  22. package/dist/connections/message-handler-bridge.js.map +1 -1
  23. package/dist/interactions.d.ts +9 -43
  24. package/dist/interactions.d.ts.map +1 -1
  25. package/dist/interactions.js +10 -52
  26. package/dist/interactions.js.map +1 -1
  27. package/dist/orchestration/base-deployment-manager.js +7 -7
  28. package/dist/orchestration/base-deployment-manager.js.map +1 -1
  29. package/dist/platform/unified-thread-consumer.d.ts.map +1 -1
  30. package/dist/platform/unified-thread-consumer.js +38 -34
  31. package/dist/platform/unified-thread-consumer.js.map +1 -1
  32. package/dist/routes/public/agent.d.ts +4 -0
  33. package/dist/routes/public/agent.d.ts.map +1 -1
  34. package/dist/routes/public/agent.js +21 -0
  35. package/dist/routes/public/agent.js.map +1 -1
  36. package/dist/services/core-services.d.ts.map +1 -1
  37. package/dist/services/core-services.js +4 -0
  38. package/dist/services/core-services.js.map +1 -1
  39. package/package.json +2 -2
  40. package/src/__tests__/agent-config-routes.test.ts +0 -254
  41. package/src/__tests__/agent-history-routes.test.ts +0 -72
  42. package/src/__tests__/agent-routes.test.ts +0 -68
  43. package/src/__tests__/agent-schedules-routes.test.ts +0 -59
  44. package/src/__tests__/agent-settings-store.test.ts +0 -323
  45. package/src/__tests__/bedrock-model-catalog.test.ts +0 -40
  46. package/src/__tests__/bedrock-openai-service.test.ts +0 -157
  47. package/src/__tests__/bedrock-provider-module.test.ts +0 -56
  48. package/src/__tests__/chat-instance-manager-slack.test.ts +0 -204
  49. package/src/__tests__/chat-response-bridge.test.ts +0 -131
  50. package/src/__tests__/config-memory-plugins.test.ts +0 -92
  51. package/src/__tests__/config-request-store.test.ts +0 -127
  52. package/src/__tests__/connection-routes.test.ts +0 -144
  53. package/src/__tests__/core-services-store-selection.test.ts +0 -92
  54. package/src/__tests__/docker-deployment.test.ts +0 -1211
  55. package/src/__tests__/embedded-deployment.test.ts +0 -342
  56. package/src/__tests__/grant-store.test.ts +0 -148
  57. package/src/__tests__/http-proxy.test.ts +0 -281
  58. package/src/__tests__/instruction-service.test.ts +0 -37
  59. package/src/__tests__/link-buttons.test.ts +0 -112
  60. package/src/__tests__/lobu.test.ts +0 -32
  61. package/src/__tests__/mcp-config-service.test.ts +0 -347
  62. package/src/__tests__/mcp-proxy.test.ts +0 -694
  63. package/src/__tests__/message-handler-bridge.test.ts +0 -17
  64. package/src/__tests__/model-selection.test.ts +0 -172
  65. package/src/__tests__/oauth-templates.test.ts +0 -39
  66. package/src/__tests__/platform-adapter-slack-send.test.ts +0 -114
  67. package/src/__tests__/platform-helpers-model-resolution.test.ts +0 -253
  68. package/src/__tests__/provider-inheritance.test.ts +0 -212
  69. package/src/__tests__/routes/cli-auth.test.ts +0 -337
  70. package/src/__tests__/routes/interactions.test.ts +0 -121
  71. package/src/__tests__/secret-proxy.test.ts +0 -85
  72. package/src/__tests__/session-manager.test.ts +0 -572
  73. package/src/__tests__/setup.ts +0 -133
  74. package/src/__tests__/skill-and-mcp-registry.test.ts +0 -203
  75. package/src/__tests__/slack-routes.test.ts +0 -161
  76. package/src/__tests__/system-config-resolver.test.ts +0 -75
  77. package/src/__tests__/system-message-limiter.test.ts +0 -89
  78. package/src/__tests__/system-skills-service.test.ts +0 -362
  79. package/src/__tests__/transcription-service.test.ts +0 -222
  80. package/src/__tests__/utils/rate-limiter.test.ts +0 -102
  81. package/src/__tests__/worker-connection-manager.test.ts +0 -497
  82. package/src/__tests__/worker-job-router.test.ts +0 -722
  83. package/src/api/index.ts +0 -1
  84. package/src/api/platform.ts +0 -292
  85. package/src/api/response-renderer.ts +0 -157
  86. package/src/auth/agent-metadata-store.ts +0 -168
  87. package/src/auth/api-auth-middleware.ts +0 -69
  88. package/src/auth/api-key-provider-module.ts +0 -213
  89. package/src/auth/base-provider-module.ts +0 -201
  90. package/src/auth/bedrock/provider-module.ts +0 -110
  91. package/src/auth/chatgpt/chatgpt-oauth-module.ts +0 -185
  92. package/src/auth/chatgpt/device-code-client.ts +0 -218
  93. package/src/auth/chatgpt/index.ts +0 -1
  94. package/src/auth/claude/oauth-module.ts +0 -280
  95. package/src/auth/cli/token-service.ts +0 -249
  96. package/src/auth/external/client.ts +0 -560
  97. package/src/auth/external/device-code-client.ts +0 -235
  98. package/src/auth/mcp/config-service.ts +0 -420
  99. package/src/auth/mcp/proxy.ts +0 -1086
  100. package/src/auth/mcp/string-substitution.ts +0 -17
  101. package/src/auth/mcp/tool-cache.ts +0 -90
  102. package/src/auth/oauth/base-client.ts +0 -267
  103. package/src/auth/oauth/client.ts +0 -153
  104. package/src/auth/oauth/credentials.ts +0 -7
  105. package/src/auth/oauth/providers.ts +0 -69
  106. package/src/auth/oauth/state-store.ts +0 -150
  107. package/src/auth/oauth-templates.ts +0 -179
  108. package/src/auth/provider-catalog.ts +0 -220
  109. package/src/auth/provider-model-options.ts +0 -41
  110. package/src/auth/settings/agent-settings-store.ts +0 -565
  111. package/src/auth/settings/auth-profiles-manager.ts +0 -216
  112. package/src/auth/settings/index.ts +0 -12
  113. package/src/auth/settings/model-preference-store.ts +0 -52
  114. package/src/auth/settings/model-selection.ts +0 -135
  115. package/src/auth/settings/resolved-settings-view.ts +0 -298
  116. package/src/auth/settings/template-utils.ts +0 -44
  117. package/src/auth/settings/token-service.ts +0 -88
  118. package/src/auth/system-env-store.ts +0 -98
  119. package/src/auth/user-agents-store.ts +0 -68
  120. package/src/channels/binding-service.ts +0 -214
  121. package/src/channels/index.ts +0 -4
  122. package/src/cli/gateway.ts +0 -1312
  123. package/src/cli/index.ts +0 -74
  124. package/src/commands/built-in-commands.ts +0 -80
  125. package/src/commands/command-dispatcher.ts +0 -94
  126. package/src/commands/command-reply-adapters.ts +0 -27
  127. package/src/config/file-loader.ts +0 -618
  128. package/src/config/index.ts +0 -588
  129. package/src/config/network-allowlist.ts +0 -71
  130. package/src/connections/chat-instance-manager.ts +0 -1284
  131. package/src/connections/chat-response-bridge.ts +0 -618
  132. package/src/connections/index.ts +0 -7
  133. package/src/connections/interaction-bridge.ts +0 -831
  134. package/src/connections/message-handler-bridge.ts +0 -415
  135. package/src/connections/platform-auth-methods.ts +0 -15
  136. package/src/connections/types.ts +0 -84
  137. package/src/gateway/connection-manager.ts +0 -291
  138. package/src/gateway/index.ts +0 -698
  139. package/src/gateway/job-router.ts +0 -201
  140. package/src/gateway-main.ts +0 -200
  141. package/src/index.ts +0 -41
  142. package/src/infrastructure/queue/index.ts +0 -12
  143. package/src/infrastructure/queue/queue-producer.ts +0 -148
  144. package/src/infrastructure/queue/redis-queue.ts +0 -361
  145. package/src/infrastructure/queue/types.ts +0 -133
  146. package/src/infrastructure/redis/system-message-limiter.ts +0 -94
  147. package/src/interactions/config-request-store.ts +0 -198
  148. package/src/interactions.ts +0 -363
  149. package/src/lobu.ts +0 -311
  150. package/src/metrics/prometheus.ts +0 -159
  151. package/src/modules/module-system.ts +0 -179
  152. package/src/orchestration/base-deployment-manager.ts +0 -900
  153. package/src/orchestration/deployment-utils.ts +0 -98
  154. package/src/orchestration/impl/docker-deployment.ts +0 -620
  155. package/src/orchestration/impl/embedded-deployment.ts +0 -268
  156. package/src/orchestration/impl/index.ts +0 -8
  157. package/src/orchestration/impl/k8s/deployment.ts +0 -1061
  158. package/src/orchestration/impl/k8s/helpers.ts +0 -610
  159. package/src/orchestration/impl/k8s/index.ts +0 -1
  160. package/src/orchestration/index.ts +0 -333
  161. package/src/orchestration/message-consumer.ts +0 -584
  162. package/src/orchestration/scheduled-wakeup.ts +0 -704
  163. package/src/permissions/approval-policy.ts +0 -36
  164. package/src/permissions/grant-store.ts +0 -219
  165. package/src/platform/file-handler.ts +0 -66
  166. package/src/platform/link-buttons.ts +0 -57
  167. package/src/platform/renderer-utils.ts +0 -44
  168. package/src/platform/response-renderer.ts +0 -84
  169. package/src/platform/unified-thread-consumer.ts +0 -187
  170. package/src/platform.ts +0 -318
  171. package/src/proxy/http-proxy.ts +0 -752
  172. package/src/proxy/proxy-manager.ts +0 -81
  173. package/src/proxy/secret-proxy.ts +0 -402
  174. package/src/proxy/token-refresh-job.ts +0 -143
  175. package/src/routes/internal/audio.ts +0 -141
  176. package/src/routes/internal/device-auth.ts +0 -652
  177. package/src/routes/internal/files.ts +0 -226
  178. package/src/routes/internal/history.ts +0 -69
  179. package/src/routes/internal/images.ts +0 -127
  180. package/src/routes/internal/interactions.ts +0 -84
  181. package/src/routes/internal/middleware.ts +0 -23
  182. package/src/routes/internal/schedule.ts +0 -226
  183. package/src/routes/internal/types.ts +0 -22
  184. package/src/routes/openapi-auto.ts +0 -239
  185. package/src/routes/public/agent-access.ts +0 -23
  186. package/src/routes/public/agent-config.ts +0 -675
  187. package/src/routes/public/agent-history.ts +0 -422
  188. package/src/routes/public/agent-schedules.ts +0 -296
  189. package/src/routes/public/agent.ts +0 -1086
  190. package/src/routes/public/agents.ts +0 -373
  191. package/src/routes/public/channels.ts +0 -191
  192. package/src/routes/public/cli-auth.ts +0 -896
  193. package/src/routes/public/connections.ts +0 -574
  194. package/src/routes/public/landing.ts +0 -16
  195. package/src/routes/public/oauth.ts +0 -147
  196. package/src/routes/public/settings-auth.ts +0 -104
  197. package/src/routes/public/slack.ts +0 -173
  198. package/src/routes/shared/agent-ownership.ts +0 -101
  199. package/src/routes/shared/token-verifier.ts +0 -34
  200. package/src/services/bedrock-model-catalog.ts +0 -217
  201. package/src/services/bedrock-openai-service.ts +0 -658
  202. package/src/services/core-services.ts +0 -1072
  203. package/src/services/image-generation-service.ts +0 -257
  204. package/src/services/instruction-service.ts +0 -318
  205. package/src/services/mcp-registry.ts +0 -94
  206. package/src/services/platform-helpers.ts +0 -287
  207. package/src/services/session-manager.ts +0 -262
  208. package/src/services/settings-resolver.ts +0 -74
  209. package/src/services/system-config-resolver.ts +0 -89
  210. package/src/services/system-skills-service.ts +0 -229
  211. package/src/services/transcription-service.ts +0 -684
  212. package/src/session.ts +0 -110
  213. package/src/spaces/index.ts +0 -1
  214. package/src/spaces/space-resolver.ts +0 -17
  215. package/src/stores/in-memory-agent-store.ts +0 -403
  216. package/src/stores/redis-agent-store.ts +0 -279
  217. package/src/utils/public-url.ts +0 -44
  218. package/src/utils/rate-limiter.ts +0 -94
  219. package/tsconfig.json +0 -33
@@ -1,584 +0,0 @@
1
- import {
2
- createChildSpan,
3
- createLogger,
4
- ErrorCode,
5
- extractTraceId,
6
- generateTraceId,
7
- getTraceparent,
8
- OrchestratorError,
9
- retryWithBackoff,
10
- SpanStatusCode,
11
- } from "@lobu/core";
12
- import * as Sentry from "@sentry/node";
13
- import type {
14
- IMessageQueue,
15
- QueueJob as SharedQueueJob,
16
- } from "../infrastructure/queue";
17
- import { RedisQueue, type RedisQueueConfig } from "../infrastructure/queue";
18
- import {
19
- type BaseDeploymentManager,
20
- buildCanonicalConversationKey,
21
- generateDeploymentName,
22
- type MessagePayload,
23
- type OrchestratorConfig,
24
- } from "./base-deployment-manager";
25
-
26
- const logger = createLogger("orchestrator");
27
-
28
- export class MessageConsumer {
29
- private queue: IMessageQueue;
30
- private deploymentManager: BaseDeploymentManager;
31
- private config: OrchestratorConfig;
32
- private isRunning = false;
33
- constructor(
34
- config: OrchestratorConfig,
35
- deploymentManager: BaseDeploymentManager
36
- ) {
37
- this.config = config;
38
- this.deploymentManager = deploymentManager;
39
- // Parse Redis connection string
40
- const url = new URL(config.queues.connectionString);
41
- if (url.protocol !== "redis:") {
42
- throw new Error(
43
- `Unsupported queue protocol: ${url.protocol}. Only redis:// is supported.`
44
- );
45
- }
46
-
47
- const queueConfig: RedisQueueConfig = {
48
- host: url.hostname,
49
- port: Number.parseInt(url.port, 10) || 6379,
50
- password: url.password || undefined,
51
- db: url.pathname ? Number.parseInt(url.pathname.slice(1), 10) : 0,
52
- maxRetriesPerRequest: 3,
53
- };
54
-
55
- this.queue = new RedisQueue(queueConfig);
56
- }
57
-
58
- async start(): Promise<void> {
59
- try {
60
- await this.queue.start();
61
- this.isRunning = true;
62
-
63
- // Create the messages queue if it doesn't exist
64
- await this.queue.createQueue("messages");
65
- logger.debug("Created/verified messages queue");
66
-
67
- // Subscribe to the single messages queue for all messages
68
- await this.queue.work(
69
- "messages",
70
- async (job: SharedQueueJob<MessagePayload>) => {
71
- return await Sentry.startSpan(
72
- {
73
- name: "orchestrator.process_queue_job",
74
- op: "orchestrator.queue_processing",
75
- attributes: {
76
- "job.id": job?.id || "unknown",
77
- },
78
- },
79
- async () => {
80
- return this.handleMessage(job);
81
- }
82
- );
83
- }
84
- );
85
-
86
- logger.debug("Queue consumer started");
87
- } catch (error) {
88
- throw new OrchestratorError(
89
- ErrorCode.QUEUE_JOB_PROCESSING_FAILED,
90
- `Failed to start queue consumer: ${error instanceof Error ? error.message : String(error)}`,
91
- { error },
92
- true
93
- );
94
- }
95
- }
96
-
97
- async stop(): Promise<void> {
98
- this.isRunning = false;
99
- await this.queue.stop();
100
- }
101
-
102
- /**
103
- * Handle all messages - creates deployment for new threads or routes to existing thread queues
104
- */
105
- private async handleMessage(
106
- job: SharedQueueJob<MessagePayload>
107
- ): Promise<void> {
108
- const data = job?.data;
109
- const jobId = job?.id || "unknown";
110
-
111
- // Extract traceparent for distributed tracing (from message ingestion)
112
- const traceparent = data?.platformMetadata?.traceparent as
113
- | string
114
- | undefined;
115
-
116
- // Extract or generate trace ID for logging (backwards compatible)
117
- const traceId =
118
- extractTraceId(data) || generateTraceId(data?.messageId || jobId);
119
-
120
- // Add traceId to Sentry scope for correlation
121
- Sentry.getCurrentScope().setTag("traceId", traceId);
122
-
123
- // Create child span for queue processing (linked to message_received span)
124
- const queueSpan = createChildSpan("queue_processing", traceparent, {
125
- "lobu.trace_id": traceId,
126
- "lobu.job_id": jobId,
127
- "lobu.user_id": data?.userId || "unknown",
128
- "lobu.conversation_id": data?.conversationId || "unknown",
129
- });
130
-
131
- // Get traceparent to pass to worker (for further context propagation)
132
- const childTraceparent = getTraceparent(queueSpan) || traceparent;
133
-
134
- logger.info(
135
- {
136
- traceparent,
137
- traceId,
138
- jobId,
139
- userId: data?.userId,
140
- conversationId: data?.conversationId,
141
- },
142
- "Processing job with trace context"
143
- );
144
-
145
- try {
146
- // CRITICAL: For consistent worker naming, conversationId must be the root conversation ID
147
- // (e.g., Slack thread root ts), not individual message timestamps.
148
- const effectiveConversationId = data.conversationId;
149
- if (!effectiveConversationId) {
150
- throw new OrchestratorError(
151
- ErrorCode.QUEUE_JOB_PROCESSING_FAILED,
152
- "conversationId is required for message routing",
153
- { messageId: data.messageId, userId: data.userId },
154
- true
155
- );
156
- }
157
-
158
- const canonicalConversationKey = buildCanonicalConversationKey({
159
- platform: data.platform,
160
- channelId: data.channelId,
161
- conversationId: effectiveConversationId,
162
- });
163
- const deploymentName = generateDeploymentName({
164
- userId: data.userId,
165
- platform: data.platform,
166
- channelId: data.channelId,
167
- conversationId: effectiveConversationId,
168
- });
169
-
170
- logger.info(
171
- `Conversation routing - effectiveConversationId: ${effectiveConversationId}, canonicalKey: ${canonicalConversationKey}, deploymentName: ${deploymentName}`
172
- );
173
-
174
- // 1) Send to thread queue immediately (Redis persists; worker will drain on attach)
175
- await Sentry.startSpan(
176
- {
177
- name: "orchestrator.send_to_worker_queue",
178
- op: "orchestrator.message_routing",
179
- attributes: {
180
- "user.id": data.userId,
181
- "conversation.id": effectiveConversationId || "unknown",
182
- "deployment.name": deploymentName,
183
- },
184
- },
185
- async () => {
186
- await this.sendToWorkerQueue(data, deploymentName);
187
- }
188
- );
189
-
190
- logger.info(
191
- { traceId, traceparent: childTraceparent, deploymentName },
192
- "Enqueued message to thread queue"
193
- );
194
-
195
- // 2) Ensure worker exists in the background (don't block queue send)
196
- // Pass traceparent for propagation to worker deployment
197
- this.ensureWorkerExists(
198
- deploymentName,
199
- data,
200
- effectiveConversationId,
201
- traceId,
202
- childTraceparent
203
- ).catch((bgError) => {
204
- // Capture error for monitoring and alerting
205
- Sentry.captureException(bgError, {
206
- tags: {
207
- component: "deployment-creation",
208
- deploymentName,
209
- userId: data.userId,
210
- conversationId: effectiveConversationId,
211
- },
212
- level: "error",
213
- });
214
-
215
- logger.error(
216
- {
217
- traceId,
218
- error: bgError instanceof Error ? bgError.message : String(bgError),
219
- stack: bgError instanceof Error ? bgError.stack : undefined,
220
- deploymentName,
221
- userId: data.userId,
222
- conversationId: effectiveConversationId,
223
- },
224
- "Critical: Background worker creation failed. Messages are queued but worker unavailable."
225
- );
226
-
227
- // Track failed deployments for monitoring and potential retry
228
- this.trackFailedDeployment(deploymentName, data, bgError).catch(
229
- (trackError) => {
230
- logger.error("Failed to track deployment failure:", trackError);
231
- }
232
- );
233
- });
234
-
235
- queueSpan?.setStatus({ code: SpanStatusCode.OK });
236
- queueSpan?.end();
237
-
238
- logger.info({ traceId, jobId }, "Message job queued successfully");
239
- } catch (error) {
240
- queueSpan?.setStatus({
241
- code: SpanStatusCode.ERROR,
242
- message: error instanceof Error ? error.message : String(error),
243
- });
244
- queueSpan?.end();
245
- Sentry.captureException(error);
246
- logger.error({ traceId, jobId, error }, "Message job failed");
247
-
248
- // Re-throw for Redis retry handling
249
- throw new OrchestratorError(
250
- ErrorCode.QUEUE_JOB_PROCESSING_FAILED,
251
- `Failed to process message job: ${error instanceof Error ? error.message : String(error)}`,
252
- { jobId, data, error },
253
- true
254
- );
255
- }
256
- }
257
-
258
- /**
259
- * Send message to worker queue for the worker to consume
260
- */
261
- private async sendToWorkerQueue(
262
- data: MessagePayload,
263
- deploymentName: string
264
- ): Promise<void> {
265
- try {
266
- // Create thread-specific queue name: thread_message_[deploymentid]
267
- const threadQueueName = `thread_message_${deploymentName}`;
268
-
269
- // Create the thread-specific queue if it doesn't exist
270
- await this.queue.createQueue(threadQueueName);
271
-
272
- // Send message to thread-specific queue
273
- const jobId = await this.queue.send(threadQueueName, data, {
274
- expireInSeconds: this.config.queues.expireInSeconds,
275
- retryLimit: this.config.queues.retryLimit,
276
- retryDelay: 2, // 2 seconds — fast retry for stale connection recovery
277
- priority: 10, // Thread messages have high priority
278
- });
279
-
280
- if (!jobId) {
281
- throw new OrchestratorError(
282
- ErrorCode.QUEUE_JOB_PROCESSING_FAILED,
283
- `queue.send() returned null/undefined for queue: ${threadQueueName}`,
284
- { threadQueueName, deploymentName },
285
- true
286
- );
287
- }
288
-
289
- logger.info(
290
- `✅ Sent message to thread queue ${threadQueueName} for conversation ${data.conversationId}, jobId: ${jobId}`
291
- );
292
- } catch (error) {
293
- logger.error(`❌ [ERROR] sendToWorkerQueue failed:`, error);
294
- throw new OrchestratorError(
295
- ErrorCode.QUEUE_JOB_PROCESSING_FAILED,
296
- `Failed to send message to thread queue: ${error instanceof Error ? error.message : String(error)}`,
297
- { deploymentName, data, error },
298
- true
299
- );
300
- }
301
- }
302
-
303
- /**
304
- * Acquire a Redis-based lock for deployment creation.
305
- * Prevents concurrent duplicate deployment creation for the same thread.
306
- */
307
- private async acquireDeploymentLock(
308
- deploymentName: string
309
- ): Promise<boolean> {
310
- const lockKey = `deployment:lock:${deploymentName}`;
311
- const redisClient = this.queue.getRedisClient();
312
- // SET NX with 60s TTL - standard Redis distributed lock
313
- const result = await redisClient.set(lockKey, "1", "EX", 60, "NX");
314
- return result === "OK";
315
- }
316
-
317
- private async releaseDeploymentLock(deploymentName: string): Promise<void> {
318
- const lockKey = `deployment:lock:${deploymentName}`;
319
- const redisClient = this.queue.getRedisClient();
320
- await redisClient.del(lockKey);
321
- }
322
-
323
- /**
324
- * Ensure worker deployment exists for a thread
325
- * Uses shared retry utility with linear backoff + jitter
326
- * Uses Redis lock to prevent concurrent duplicate deployment creation
327
- */
328
- private async ensureWorkerExists(
329
- deploymentName: string,
330
- data: MessagePayload,
331
- conversationId: string,
332
- traceId: string,
333
- traceparent?: string
334
- ): Promise<void> {
335
- return retryWithBackoff(
336
- async () => {
337
- // Ensure traceparent is in platformMetadata for worker deployment
338
- const dataWithTrace: MessagePayload = {
339
- ...data,
340
- platformMetadata: {
341
- ...data.platformMetadata,
342
- traceparent: traceparent || data.platformMetadata?.traceparent,
343
- },
344
- };
345
-
346
- // Check if this is truly a new thread by looking for existing deployment
347
- const existingDeployments =
348
- await this.deploymentManager.listDeployments();
349
- const isNewThread = !existingDeployments.some(
350
- (d) => d.deploymentName === deploymentName
351
- );
352
-
353
- if (isNewThread) {
354
- // Acquire lock to prevent concurrent deployment creation
355
- const acquired = await this.acquireDeploymentLock(deploymentName);
356
- if (!acquired) {
357
- logger.info(
358
- { traceId, deploymentName },
359
- "Another process is creating this deployment, waiting"
360
- );
361
- // Wait briefly and re-check - the other process should finish soon
362
- await new Promise((r) => setTimeout(r, 3000));
363
- // Verify it was created
364
- const rechecked = await this.deploymentManager.listDeployments();
365
- if (rechecked.some((d) => d.deploymentName === deploymentName)) {
366
- await this.deploymentManager.scaleDeployment(deploymentName, 1);
367
- logger.info(
368
- { traceId, deploymentName },
369
- "Deployment created by other process, scaled up"
370
- );
371
- await this.deploymentManager.updateDeploymentActivity(
372
- deploymentName
373
- );
374
- return;
375
- }
376
- throw new Error("Deployment lock held but deployment not created");
377
- }
378
-
379
- try {
380
- // Re-check after acquiring lock - another process may have created
381
- // the deployment between our initial check and lock acquisition
382
- const recheckAfterLock =
383
- await this.deploymentManager.listDeployments();
384
- if (
385
- recheckAfterLock.some((d) => d.deploymentName === deploymentName)
386
- ) {
387
- logger.info(
388
- { traceId, deploymentName },
389
- "Deployment already created by another process after lock acquired"
390
- );
391
- await this.deploymentManager.scaleDeployment(deploymentName, 1);
392
- await this.deploymentManager.updateDeploymentActivity(
393
- deploymentName
394
- );
395
- return;
396
- }
397
-
398
- logger.info(
399
- { traceId, traceparent, conversationId, deploymentName },
400
- "New thread - creating deployment"
401
- );
402
- await this.deploymentManager.createWorkerDeployment(
403
- data.userId,
404
- conversationId,
405
- dataWithTrace,
406
- recheckAfterLock
407
- );
408
- logger.info({ traceId, deploymentName }, "Created deployment");
409
- } finally {
410
- try {
411
- await this.releaseDeploymentLock(deploymentName);
412
- } catch (releaseError) {
413
- logger.error(
414
- {
415
- deploymentName,
416
- error:
417
- releaseError instanceof Error
418
- ? releaseError.message
419
- : String(releaseError),
420
- },
421
- "CRITICAL: Failed to release deployment lock, attempting emergency Redis key deletion"
422
- );
423
- try {
424
- const lockKey = `deployment:lock:${deploymentName}`;
425
- const redisClient = this.queue.getRedisClient();
426
- await redisClient.del(lockKey);
427
- logger.info(
428
- { deploymentName },
429
- "Emergency lock cleanup succeeded"
430
- );
431
- } catch (emergencyError) {
432
- logger.error(
433
- {
434
- deploymentName,
435
- error:
436
- emergencyError instanceof Error
437
- ? emergencyError.message
438
- : String(emergencyError),
439
- },
440
- "CRITICAL: Emergency lock cleanup also failed, lock will expire via TTL"
441
- );
442
- }
443
- }
444
- }
445
- } else {
446
- logger.info(
447
- { traceId, conversationId, deploymentName },
448
- "Existing thread - ensuring worker exists"
449
- );
450
- // Sync network config domains to grant store (picks up settings changes)
451
- await this.deploymentManager.syncNetworkConfigGrants(dataWithTrace);
452
- try {
453
- await this.deploymentManager.scaleDeployment(deploymentName, 1);
454
- logger.info(
455
- { traceId, deploymentName },
456
- "Scaled existing worker to 1"
457
- );
458
- } catch {
459
- logger.info(
460
- { traceId, conversationId, deploymentName },
461
- "Worker doesn't exist, creating it"
462
- );
463
- await this.deploymentManager.createWorkerDeployment(
464
- data.userId,
465
- conversationId,
466
- dataWithTrace
467
- );
468
- logger.info({ traceId, deploymentName }, "Created worker");
469
- }
470
- }
471
-
472
- // Update deployment activity annotation for simplified tracking
473
- await this.deploymentManager.updateDeploymentActivity(deploymentName);
474
-
475
- logger.info({ traceId, deploymentName }, "Worker is ready");
476
- },
477
- {
478
- maxRetries: 3,
479
- baseDelay: 2000,
480
- strategy: "linear",
481
- jitter: true,
482
- onRetry: (attempt, error) => {
483
- logger.warn(
484
- { traceId, deploymentName, attempt, maxAttempts: 3 },
485
- `Retry attempt failed: ${error.message}`
486
- );
487
- },
488
- }
489
- );
490
- }
491
-
492
- /**
493
- * Track failed deployment creation for monitoring and potential recovery.
494
- * Also sends an error response to the user via the thread_response queue.
495
- */
496
- private async trackFailedDeployment(
497
- deploymentName: string,
498
- data: MessagePayload,
499
- error: unknown
500
- ): Promise<void> {
501
- try {
502
- const failureKey = `deployment:failed:${deploymentName}`;
503
- const failureData = {
504
- deploymentName,
505
- userId: data.userId,
506
- conversationId: data.conversationId,
507
- error: error instanceof Error ? error.message : String(error),
508
- stack: error instanceof Error ? error.stack : undefined,
509
- timestamp: new Date().toISOString(),
510
- queueName: `thread_message_${deploymentName}`,
511
- };
512
-
513
- // Store in Redis with 24h TTL for monitoring dashboards
514
- // This allows ops to detect stuck queues and manually intervene
515
- const redisClient = this.queue.getRedisClient();
516
- await redisClient.setex(
517
- failureKey,
518
- 86400, // 24 hours
519
- JSON.stringify(failureData)
520
- );
521
-
522
- logger.info(
523
- `Tracked deployment failure in Redis: ${failureKey} (TTL: 24h)`
524
- );
525
-
526
- const failureReason =
527
- error instanceof Error ? error.message : String(error);
528
- const isImagePullFailure =
529
- /ImagePullBackOff|ErrImagePull|image pull/i.test(failureReason);
530
- const userMessage = isImagePullFailure
531
- ? "Worker startup failed due to a Kubernetes image pull error. Please retry after the deployment image/registry configuration is fixed."
532
- : "Worker startup failed and your request could not be processed. Please retry in a moment.";
533
-
534
- // Notify user that their message could not be processed
535
- try {
536
- const responseQueue = "thread_response";
537
- await this.queue.createQueue(responseQueue);
538
- await this.queue.send(responseQueue, {
539
- messageId: data.messageId,
540
- userId: data.userId,
541
- channelId: data.channelId,
542
- conversationId: data.conversationId,
543
- platform: data.platform,
544
- platformMetadata: data.platformMetadata,
545
- content: userMessage,
546
- processedMessageIds: [data.messageId],
547
- });
548
- } catch (notifyError) {
549
- logger.error("Failed to send error notification to user:", notifyError);
550
- }
551
- } catch (trackError) {
552
- // Don't fail the main flow if tracking fails
553
- logger.error("Failed to track deployment failure:", trackError);
554
- }
555
- }
556
-
557
- /**
558
- * Get queue statistics
559
- */
560
- async getQueueStats(): Promise<{
561
- messages?: {
562
- waiting: number;
563
- active: number;
564
- completed: number;
565
- failed: number;
566
- };
567
- isRunning: boolean;
568
- error?: string;
569
- }> {
570
- try {
571
- const stats = await this.queue.getQueueStats("messages");
572
- return {
573
- messages: stats,
574
- isRunning: this.isRunning,
575
- };
576
- } catch (error) {
577
- logger.error("Failed to get queue stats:", error);
578
- return {
579
- isRunning: this.isRunning,
580
- error: error instanceof Error ? error.message : String(error),
581
- };
582
- }
583
- }
584
- }