@lobu/gateway 3.0.9 → 3.0.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/api/platform.d.ts.map +1 -1
- package/dist/api/platform.js +7 -26
- package/dist/api/platform.js.map +1 -1
- package/dist/auth/mcp/proxy.d.ts +14 -0
- package/dist/auth/mcp/proxy.d.ts.map +1 -1
- package/dist/auth/mcp/proxy.js +149 -13
- package/dist/auth/mcp/proxy.js.map +1 -1
- package/dist/cli/gateway.d.ts.map +1 -1
- package/dist/cli/gateway.js +29 -0
- package/dist/cli/gateway.js.map +1 -1
- package/dist/connections/chat-instance-manager.d.ts.map +1 -1
- package/dist/connections/chat-instance-manager.js +2 -1
- package/dist/connections/chat-instance-manager.js.map +1 -1
- package/dist/connections/interaction-bridge.d.ts +9 -2
- package/dist/connections/interaction-bridge.d.ts.map +1 -1
- package/dist/connections/interaction-bridge.js +121 -261
- package/dist/connections/interaction-bridge.js.map +1 -1
- package/dist/gateway/index.js +1 -1
- package/dist/gateway/index.js.map +1 -1
- package/dist/interactions.d.ts +9 -43
- package/dist/interactions.d.ts.map +1 -1
- package/dist/interactions.js +10 -52
- package/dist/interactions.js.map +1 -1
- package/dist/routes/public/agent.d.ts +4 -0
- package/dist/routes/public/agent.d.ts.map +1 -1
- package/dist/routes/public/agent.js +21 -0
- package/dist/routes/public/agent.js.map +1 -1
- package/dist/services/core-services.d.ts.map +1 -1
- package/dist/services/core-services.js +4 -0
- package/dist/services/core-services.js.map +1 -1
- package/package.json +9 -9
- package/src/__tests__/agent-config-routes.test.ts +0 -254
- package/src/__tests__/agent-history-routes.test.ts +0 -72
- package/src/__tests__/agent-routes.test.ts +0 -68
- package/src/__tests__/agent-schedules-routes.test.ts +0 -59
- package/src/__tests__/agent-settings-store.test.ts +0 -323
- package/src/__tests__/bedrock-model-catalog.test.ts +0 -40
- package/src/__tests__/bedrock-openai-service.test.ts +0 -157
- package/src/__tests__/bedrock-provider-module.test.ts +0 -56
- package/src/__tests__/chat-instance-manager-slack.test.ts +0 -204
- package/src/__tests__/chat-response-bridge.test.ts +0 -131
- package/src/__tests__/config-memory-plugins.test.ts +0 -92
- package/src/__tests__/config-request-store.test.ts +0 -127
- package/src/__tests__/connection-routes.test.ts +0 -144
- package/src/__tests__/core-services-store-selection.test.ts +0 -92
- package/src/__tests__/docker-deployment.test.ts +0 -1211
- package/src/__tests__/embedded-deployment.test.ts +0 -342
- package/src/__tests__/grant-store.test.ts +0 -148
- package/src/__tests__/http-proxy.test.ts +0 -281
- package/src/__tests__/instruction-service.test.ts +0 -37
- package/src/__tests__/link-buttons.test.ts +0 -112
- package/src/__tests__/lobu.test.ts +0 -32
- package/src/__tests__/mcp-config-service.test.ts +0 -347
- package/src/__tests__/mcp-proxy.test.ts +0 -694
- package/src/__tests__/message-handler-bridge.test.ts +0 -17
- package/src/__tests__/model-selection.test.ts +0 -172
- package/src/__tests__/oauth-templates.test.ts +0 -39
- package/src/__tests__/platform-adapter-slack-send.test.ts +0 -114
- package/src/__tests__/platform-helpers-model-resolution.test.ts +0 -253
- package/src/__tests__/provider-inheritance.test.ts +0 -212
- package/src/__tests__/routes/cli-auth.test.ts +0 -337
- package/src/__tests__/routes/interactions.test.ts +0 -121
- package/src/__tests__/secret-proxy.test.ts +0 -85
- package/src/__tests__/session-manager.test.ts +0 -572
- package/src/__tests__/setup.ts +0 -133
- package/src/__tests__/skill-and-mcp-registry.test.ts +0 -203
- package/src/__tests__/slack-routes.test.ts +0 -161
- package/src/__tests__/system-config-resolver.test.ts +0 -75
- package/src/__tests__/system-message-limiter.test.ts +0 -89
- package/src/__tests__/system-skills-service.test.ts +0 -362
- package/src/__tests__/transcription-service.test.ts +0 -222
- package/src/__tests__/utils/rate-limiter.test.ts +0 -102
- package/src/__tests__/worker-connection-manager.test.ts +0 -497
- package/src/__tests__/worker-job-router.test.ts +0 -722
- package/src/api/index.ts +0 -1
- package/src/api/platform.ts +0 -292
- package/src/api/response-renderer.ts +0 -157
- package/src/auth/agent-metadata-store.ts +0 -168
- package/src/auth/api-auth-middleware.ts +0 -69
- package/src/auth/api-key-provider-module.ts +0 -213
- package/src/auth/base-provider-module.ts +0 -201
- package/src/auth/bedrock/provider-module.ts +0 -110
- package/src/auth/chatgpt/chatgpt-oauth-module.ts +0 -185
- package/src/auth/chatgpt/device-code-client.ts +0 -218
- package/src/auth/chatgpt/index.ts +0 -1
- package/src/auth/claude/oauth-module.ts +0 -280
- package/src/auth/cli/token-service.ts +0 -249
- package/src/auth/external/client.ts +0 -560
- package/src/auth/external/device-code-client.ts +0 -235
- package/src/auth/mcp/config-service.ts +0 -420
- package/src/auth/mcp/proxy.ts +0 -1086
- package/src/auth/mcp/string-substitution.ts +0 -17
- package/src/auth/mcp/tool-cache.ts +0 -90
- package/src/auth/oauth/base-client.ts +0 -267
- package/src/auth/oauth/client.ts +0 -153
- package/src/auth/oauth/credentials.ts +0 -7
- package/src/auth/oauth/providers.ts +0 -69
- package/src/auth/oauth/state-store.ts +0 -150
- package/src/auth/oauth-templates.ts +0 -179
- package/src/auth/provider-catalog.ts +0 -220
- package/src/auth/provider-model-options.ts +0 -41
- package/src/auth/settings/agent-settings-store.ts +0 -565
- package/src/auth/settings/auth-profiles-manager.ts +0 -216
- package/src/auth/settings/index.ts +0 -12
- package/src/auth/settings/model-preference-store.ts +0 -52
- package/src/auth/settings/model-selection.ts +0 -135
- package/src/auth/settings/resolved-settings-view.ts +0 -298
- package/src/auth/settings/template-utils.ts +0 -44
- package/src/auth/settings/token-service.ts +0 -88
- package/src/auth/system-env-store.ts +0 -98
- package/src/auth/user-agents-store.ts +0 -68
- package/src/channels/binding-service.ts +0 -214
- package/src/channels/index.ts +0 -4
- package/src/cli/gateway.ts +0 -1312
- package/src/cli/index.ts +0 -74
- package/src/commands/built-in-commands.ts +0 -80
- package/src/commands/command-dispatcher.ts +0 -94
- package/src/commands/command-reply-adapters.ts +0 -27
- package/src/config/file-loader.ts +0 -618
- package/src/config/index.ts +0 -588
- package/src/config/network-allowlist.ts +0 -71
- package/src/connections/chat-instance-manager.ts +0 -1284
- package/src/connections/chat-response-bridge.ts +0 -618
- package/src/connections/index.ts +0 -7
- package/src/connections/interaction-bridge.ts +0 -831
- package/src/connections/message-handler-bridge.ts +0 -440
- package/src/connections/platform-auth-methods.ts +0 -15
- package/src/connections/types.ts +0 -84
- package/src/gateway/connection-manager.ts +0 -291
- package/src/gateway/index.ts +0 -698
- package/src/gateway/job-router.ts +0 -201
- package/src/gateway-main.ts +0 -200
- package/src/index.ts +0 -41
- package/src/infrastructure/queue/index.ts +0 -12
- package/src/infrastructure/queue/queue-producer.ts +0 -148
- package/src/infrastructure/queue/redis-queue.ts +0 -361
- package/src/infrastructure/queue/types.ts +0 -133
- package/src/infrastructure/redis/system-message-limiter.ts +0 -94
- package/src/interactions/config-request-store.ts +0 -198
- package/src/interactions.ts +0 -363
- package/src/lobu.ts +0 -311
- package/src/metrics/prometheus.ts +0 -159
- package/src/modules/module-system.ts +0 -179
- package/src/orchestration/base-deployment-manager.ts +0 -900
- package/src/orchestration/deployment-utils.ts +0 -98
- package/src/orchestration/impl/docker-deployment.ts +0 -620
- package/src/orchestration/impl/embedded-deployment.ts +0 -268
- package/src/orchestration/impl/index.ts +0 -8
- package/src/orchestration/impl/k8s/deployment.ts +0 -1061
- package/src/orchestration/impl/k8s/helpers.ts +0 -610
- package/src/orchestration/impl/k8s/index.ts +0 -1
- package/src/orchestration/index.ts +0 -333
- package/src/orchestration/message-consumer.ts +0 -584
- package/src/orchestration/scheduled-wakeup.ts +0 -704
- package/src/permissions/approval-policy.ts +0 -36
- package/src/permissions/grant-store.ts +0 -219
- package/src/platform/file-handler.ts +0 -66
- package/src/platform/link-buttons.ts +0 -57
- package/src/platform/renderer-utils.ts +0 -44
- package/src/platform/response-renderer.ts +0 -84
- package/src/platform/unified-thread-consumer.ts +0 -194
- package/src/platform.ts +0 -318
- package/src/proxy/http-proxy.ts +0 -752
- package/src/proxy/proxy-manager.ts +0 -81
- package/src/proxy/secret-proxy.ts +0 -402
- package/src/proxy/token-refresh-job.ts +0 -143
- package/src/routes/internal/audio.ts +0 -141
- package/src/routes/internal/device-auth.ts +0 -652
- package/src/routes/internal/files.ts +0 -226
- package/src/routes/internal/history.ts +0 -69
- package/src/routes/internal/images.ts +0 -127
- package/src/routes/internal/interactions.ts +0 -84
- package/src/routes/internal/middleware.ts +0 -23
- package/src/routes/internal/schedule.ts +0 -226
- package/src/routes/internal/types.ts +0 -22
- package/src/routes/openapi-auto.ts +0 -239
- package/src/routes/public/agent-access.ts +0 -23
- package/src/routes/public/agent-config.ts +0 -675
- package/src/routes/public/agent-history.ts +0 -422
- package/src/routes/public/agent-schedules.ts +0 -296
- package/src/routes/public/agent.ts +0 -1086
- package/src/routes/public/agents.ts +0 -373
- package/src/routes/public/channels.ts +0 -191
- package/src/routes/public/cli-auth.ts +0 -896
- package/src/routes/public/connections.ts +0 -574
- package/src/routes/public/landing.ts +0 -16
- package/src/routes/public/oauth.ts +0 -147
- package/src/routes/public/settings-auth.ts +0 -104
- package/src/routes/public/slack.ts +0 -173
- package/src/routes/shared/agent-ownership.ts +0 -101
- package/src/routes/shared/token-verifier.ts +0 -34
- package/src/services/bedrock-model-catalog.ts +0 -217
- package/src/services/bedrock-openai-service.ts +0 -658
- package/src/services/core-services.ts +0 -1072
- package/src/services/image-generation-service.ts +0 -257
- package/src/services/instruction-service.ts +0 -318
- package/src/services/mcp-registry.ts +0 -94
- package/src/services/platform-helpers.ts +0 -287
- package/src/services/session-manager.ts +0 -262
- package/src/services/settings-resolver.ts +0 -74
- package/src/services/system-config-resolver.ts +0 -89
- package/src/services/system-skills-service.ts +0 -229
- package/src/services/transcription-service.ts +0 -684
- package/src/session.ts +0 -110
- package/src/spaces/index.ts +0 -1
- package/src/spaces/space-resolver.ts +0 -17
- package/src/stores/in-memory-agent-store.ts +0 -403
- package/src/stores/redis-agent-store.ts +0 -279
- package/src/utils/public-url.ts +0 -44
- package/src/utils/rate-limiter.ts +0 -94
- package/tsconfig.json +0 -33
- package/tsconfig.tsbuildinfo +0 -1
|
@@ -1,584 +0,0 @@
|
|
|
1
|
-
import {
|
|
2
|
-
createChildSpan,
|
|
3
|
-
createLogger,
|
|
4
|
-
ErrorCode,
|
|
5
|
-
extractTraceId,
|
|
6
|
-
generateTraceId,
|
|
7
|
-
getTraceparent,
|
|
8
|
-
OrchestratorError,
|
|
9
|
-
retryWithBackoff,
|
|
10
|
-
SpanStatusCode,
|
|
11
|
-
} from "@lobu/core";
|
|
12
|
-
import * as Sentry from "@sentry/node";
|
|
13
|
-
import type {
|
|
14
|
-
IMessageQueue,
|
|
15
|
-
QueueJob as SharedQueueJob,
|
|
16
|
-
} from "../infrastructure/queue";
|
|
17
|
-
import { RedisQueue, type RedisQueueConfig } from "../infrastructure/queue";
|
|
18
|
-
import {
|
|
19
|
-
type BaseDeploymentManager,
|
|
20
|
-
buildCanonicalConversationKey,
|
|
21
|
-
generateDeploymentName,
|
|
22
|
-
type MessagePayload,
|
|
23
|
-
type OrchestratorConfig,
|
|
24
|
-
} from "./base-deployment-manager";
|
|
25
|
-
|
|
26
|
-
const logger = createLogger("orchestrator");
|
|
27
|
-
|
|
28
|
-
export class MessageConsumer {
|
|
29
|
-
private queue: IMessageQueue;
|
|
30
|
-
private deploymentManager: BaseDeploymentManager;
|
|
31
|
-
private config: OrchestratorConfig;
|
|
32
|
-
private isRunning = false;
|
|
33
|
-
constructor(
|
|
34
|
-
config: OrchestratorConfig,
|
|
35
|
-
deploymentManager: BaseDeploymentManager
|
|
36
|
-
) {
|
|
37
|
-
this.config = config;
|
|
38
|
-
this.deploymentManager = deploymentManager;
|
|
39
|
-
// Parse Redis connection string
|
|
40
|
-
const url = new URL(config.queues.connectionString);
|
|
41
|
-
if (url.protocol !== "redis:") {
|
|
42
|
-
throw new Error(
|
|
43
|
-
`Unsupported queue protocol: ${url.protocol}. Only redis:// is supported.`
|
|
44
|
-
);
|
|
45
|
-
}
|
|
46
|
-
|
|
47
|
-
const queueConfig: RedisQueueConfig = {
|
|
48
|
-
host: url.hostname,
|
|
49
|
-
port: Number.parseInt(url.port, 10) || 6379,
|
|
50
|
-
password: url.password || undefined,
|
|
51
|
-
db: url.pathname ? Number.parseInt(url.pathname.slice(1), 10) : 0,
|
|
52
|
-
maxRetriesPerRequest: 3,
|
|
53
|
-
};
|
|
54
|
-
|
|
55
|
-
this.queue = new RedisQueue(queueConfig);
|
|
56
|
-
}
|
|
57
|
-
|
|
58
|
-
async start(): Promise<void> {
|
|
59
|
-
try {
|
|
60
|
-
await this.queue.start();
|
|
61
|
-
this.isRunning = true;
|
|
62
|
-
|
|
63
|
-
// Create the messages queue if it doesn't exist
|
|
64
|
-
await this.queue.createQueue("messages");
|
|
65
|
-
logger.debug("Created/verified messages queue");
|
|
66
|
-
|
|
67
|
-
// Subscribe to the single messages queue for all messages
|
|
68
|
-
await this.queue.work(
|
|
69
|
-
"messages",
|
|
70
|
-
async (job: SharedQueueJob<MessagePayload>) => {
|
|
71
|
-
return await Sentry.startSpan(
|
|
72
|
-
{
|
|
73
|
-
name: "orchestrator.process_queue_job",
|
|
74
|
-
op: "orchestrator.queue_processing",
|
|
75
|
-
attributes: {
|
|
76
|
-
"job.id": job?.id || "unknown",
|
|
77
|
-
},
|
|
78
|
-
},
|
|
79
|
-
async () => {
|
|
80
|
-
return this.handleMessage(job);
|
|
81
|
-
}
|
|
82
|
-
);
|
|
83
|
-
}
|
|
84
|
-
);
|
|
85
|
-
|
|
86
|
-
logger.debug("Queue consumer started");
|
|
87
|
-
} catch (error) {
|
|
88
|
-
throw new OrchestratorError(
|
|
89
|
-
ErrorCode.QUEUE_JOB_PROCESSING_FAILED,
|
|
90
|
-
`Failed to start queue consumer: ${error instanceof Error ? error.message : String(error)}`,
|
|
91
|
-
{ error },
|
|
92
|
-
true
|
|
93
|
-
);
|
|
94
|
-
}
|
|
95
|
-
}
|
|
96
|
-
|
|
97
|
-
async stop(): Promise<void> {
|
|
98
|
-
this.isRunning = false;
|
|
99
|
-
await this.queue.stop();
|
|
100
|
-
}
|
|
101
|
-
|
|
102
|
-
/**
|
|
103
|
-
* Handle all messages - creates deployment for new threads or routes to existing thread queues
|
|
104
|
-
*/
|
|
105
|
-
private async handleMessage(
|
|
106
|
-
job: SharedQueueJob<MessagePayload>
|
|
107
|
-
): Promise<void> {
|
|
108
|
-
const data = job?.data;
|
|
109
|
-
const jobId = job?.id || "unknown";
|
|
110
|
-
|
|
111
|
-
// Extract traceparent for distributed tracing (from message ingestion)
|
|
112
|
-
const traceparent = data?.platformMetadata?.traceparent as
|
|
113
|
-
| string
|
|
114
|
-
| undefined;
|
|
115
|
-
|
|
116
|
-
// Extract or generate trace ID for logging (backwards compatible)
|
|
117
|
-
const traceId =
|
|
118
|
-
extractTraceId(data) || generateTraceId(data?.messageId || jobId);
|
|
119
|
-
|
|
120
|
-
// Add traceId to Sentry scope for correlation
|
|
121
|
-
Sentry.getCurrentScope().setTag("traceId", traceId);
|
|
122
|
-
|
|
123
|
-
// Create child span for queue processing (linked to message_received span)
|
|
124
|
-
const queueSpan = createChildSpan("queue_processing", traceparent, {
|
|
125
|
-
"lobu.trace_id": traceId,
|
|
126
|
-
"lobu.job_id": jobId,
|
|
127
|
-
"lobu.user_id": data?.userId || "unknown",
|
|
128
|
-
"lobu.conversation_id": data?.conversationId || "unknown",
|
|
129
|
-
});
|
|
130
|
-
|
|
131
|
-
// Get traceparent to pass to worker (for further context propagation)
|
|
132
|
-
const childTraceparent = getTraceparent(queueSpan) || traceparent;
|
|
133
|
-
|
|
134
|
-
logger.info(
|
|
135
|
-
{
|
|
136
|
-
traceparent,
|
|
137
|
-
traceId,
|
|
138
|
-
jobId,
|
|
139
|
-
userId: data?.userId,
|
|
140
|
-
conversationId: data?.conversationId,
|
|
141
|
-
},
|
|
142
|
-
"Processing job with trace context"
|
|
143
|
-
);
|
|
144
|
-
|
|
145
|
-
try {
|
|
146
|
-
// CRITICAL: For consistent worker naming, conversationId must be the root conversation ID
|
|
147
|
-
// (e.g., Slack thread root ts), not individual message timestamps.
|
|
148
|
-
const effectiveConversationId = data.conversationId;
|
|
149
|
-
if (!effectiveConversationId) {
|
|
150
|
-
throw new OrchestratorError(
|
|
151
|
-
ErrorCode.QUEUE_JOB_PROCESSING_FAILED,
|
|
152
|
-
"conversationId is required for message routing",
|
|
153
|
-
{ messageId: data.messageId, userId: data.userId },
|
|
154
|
-
true
|
|
155
|
-
);
|
|
156
|
-
}
|
|
157
|
-
|
|
158
|
-
const canonicalConversationKey = buildCanonicalConversationKey({
|
|
159
|
-
platform: data.platform,
|
|
160
|
-
channelId: data.channelId,
|
|
161
|
-
conversationId: effectiveConversationId,
|
|
162
|
-
});
|
|
163
|
-
const deploymentName = generateDeploymentName({
|
|
164
|
-
userId: data.userId,
|
|
165
|
-
platform: data.platform,
|
|
166
|
-
channelId: data.channelId,
|
|
167
|
-
conversationId: effectiveConversationId,
|
|
168
|
-
});
|
|
169
|
-
|
|
170
|
-
logger.info(
|
|
171
|
-
`Conversation routing - effectiveConversationId: ${effectiveConversationId}, canonicalKey: ${canonicalConversationKey}, deploymentName: ${deploymentName}`
|
|
172
|
-
);
|
|
173
|
-
|
|
174
|
-
// 1) Send to thread queue immediately (Redis persists; worker will drain on attach)
|
|
175
|
-
await Sentry.startSpan(
|
|
176
|
-
{
|
|
177
|
-
name: "orchestrator.send_to_worker_queue",
|
|
178
|
-
op: "orchestrator.message_routing",
|
|
179
|
-
attributes: {
|
|
180
|
-
"user.id": data.userId,
|
|
181
|
-
"conversation.id": effectiveConversationId || "unknown",
|
|
182
|
-
"deployment.name": deploymentName,
|
|
183
|
-
},
|
|
184
|
-
},
|
|
185
|
-
async () => {
|
|
186
|
-
await this.sendToWorkerQueue(data, deploymentName);
|
|
187
|
-
}
|
|
188
|
-
);
|
|
189
|
-
|
|
190
|
-
logger.info(
|
|
191
|
-
{ traceId, traceparent: childTraceparent, deploymentName },
|
|
192
|
-
"Enqueued message to thread queue"
|
|
193
|
-
);
|
|
194
|
-
|
|
195
|
-
// 2) Ensure worker exists in the background (don't block queue send)
|
|
196
|
-
// Pass traceparent for propagation to worker deployment
|
|
197
|
-
this.ensureWorkerExists(
|
|
198
|
-
deploymentName,
|
|
199
|
-
data,
|
|
200
|
-
effectiveConversationId,
|
|
201
|
-
traceId,
|
|
202
|
-
childTraceparent
|
|
203
|
-
).catch((bgError) => {
|
|
204
|
-
// Capture error for monitoring and alerting
|
|
205
|
-
Sentry.captureException(bgError, {
|
|
206
|
-
tags: {
|
|
207
|
-
component: "deployment-creation",
|
|
208
|
-
deploymentName,
|
|
209
|
-
userId: data.userId,
|
|
210
|
-
conversationId: effectiveConversationId,
|
|
211
|
-
},
|
|
212
|
-
level: "error",
|
|
213
|
-
});
|
|
214
|
-
|
|
215
|
-
logger.error(
|
|
216
|
-
{
|
|
217
|
-
traceId,
|
|
218
|
-
error: bgError instanceof Error ? bgError.message : String(bgError),
|
|
219
|
-
stack: bgError instanceof Error ? bgError.stack : undefined,
|
|
220
|
-
deploymentName,
|
|
221
|
-
userId: data.userId,
|
|
222
|
-
conversationId: effectiveConversationId,
|
|
223
|
-
},
|
|
224
|
-
"Critical: Background worker creation failed. Messages are queued but worker unavailable."
|
|
225
|
-
);
|
|
226
|
-
|
|
227
|
-
// Track failed deployments for monitoring and potential retry
|
|
228
|
-
this.trackFailedDeployment(deploymentName, data, bgError).catch(
|
|
229
|
-
(trackError) => {
|
|
230
|
-
logger.error("Failed to track deployment failure:", trackError);
|
|
231
|
-
}
|
|
232
|
-
);
|
|
233
|
-
});
|
|
234
|
-
|
|
235
|
-
queueSpan?.setStatus({ code: SpanStatusCode.OK });
|
|
236
|
-
queueSpan?.end();
|
|
237
|
-
|
|
238
|
-
logger.info({ traceId, jobId }, "Message job queued successfully");
|
|
239
|
-
} catch (error) {
|
|
240
|
-
queueSpan?.setStatus({
|
|
241
|
-
code: SpanStatusCode.ERROR,
|
|
242
|
-
message: error instanceof Error ? error.message : String(error),
|
|
243
|
-
});
|
|
244
|
-
queueSpan?.end();
|
|
245
|
-
Sentry.captureException(error);
|
|
246
|
-
logger.error({ traceId, jobId, error }, "Message job failed");
|
|
247
|
-
|
|
248
|
-
// Re-throw for Redis retry handling
|
|
249
|
-
throw new OrchestratorError(
|
|
250
|
-
ErrorCode.QUEUE_JOB_PROCESSING_FAILED,
|
|
251
|
-
`Failed to process message job: ${error instanceof Error ? error.message : String(error)}`,
|
|
252
|
-
{ jobId, data, error },
|
|
253
|
-
true
|
|
254
|
-
);
|
|
255
|
-
}
|
|
256
|
-
}
|
|
257
|
-
|
|
258
|
-
/**
|
|
259
|
-
* Send message to worker queue for the worker to consume
|
|
260
|
-
*/
|
|
261
|
-
private async sendToWorkerQueue(
|
|
262
|
-
data: MessagePayload,
|
|
263
|
-
deploymentName: string
|
|
264
|
-
): Promise<void> {
|
|
265
|
-
try {
|
|
266
|
-
// Create thread-specific queue name: thread_message_[deploymentid]
|
|
267
|
-
const threadQueueName = `thread_message_${deploymentName}`;
|
|
268
|
-
|
|
269
|
-
// Create the thread-specific queue if it doesn't exist
|
|
270
|
-
await this.queue.createQueue(threadQueueName);
|
|
271
|
-
|
|
272
|
-
// Send message to thread-specific queue
|
|
273
|
-
const jobId = await this.queue.send(threadQueueName, data, {
|
|
274
|
-
expireInSeconds: this.config.queues.expireInSeconds,
|
|
275
|
-
retryLimit: this.config.queues.retryLimit,
|
|
276
|
-
retryDelay: 2, // 2 seconds — fast retry for stale connection recovery
|
|
277
|
-
priority: 10, // Thread messages have high priority
|
|
278
|
-
});
|
|
279
|
-
|
|
280
|
-
if (!jobId) {
|
|
281
|
-
throw new OrchestratorError(
|
|
282
|
-
ErrorCode.QUEUE_JOB_PROCESSING_FAILED,
|
|
283
|
-
`queue.send() returned null/undefined for queue: ${threadQueueName}`,
|
|
284
|
-
{ threadQueueName, deploymentName },
|
|
285
|
-
true
|
|
286
|
-
);
|
|
287
|
-
}
|
|
288
|
-
|
|
289
|
-
logger.info(
|
|
290
|
-
`✅ Sent message to thread queue ${threadQueueName} for conversation ${data.conversationId}, jobId: ${jobId}`
|
|
291
|
-
);
|
|
292
|
-
} catch (error) {
|
|
293
|
-
logger.error(`❌ [ERROR] sendToWorkerQueue failed:`, error);
|
|
294
|
-
throw new OrchestratorError(
|
|
295
|
-
ErrorCode.QUEUE_JOB_PROCESSING_FAILED,
|
|
296
|
-
`Failed to send message to thread queue: ${error instanceof Error ? error.message : String(error)}`,
|
|
297
|
-
{ deploymentName, data, error },
|
|
298
|
-
true
|
|
299
|
-
);
|
|
300
|
-
}
|
|
301
|
-
}
|
|
302
|
-
|
|
303
|
-
/**
|
|
304
|
-
* Acquire a Redis-based lock for deployment creation.
|
|
305
|
-
* Prevents concurrent duplicate deployment creation for the same thread.
|
|
306
|
-
*/
|
|
307
|
-
private async acquireDeploymentLock(
|
|
308
|
-
deploymentName: string
|
|
309
|
-
): Promise<boolean> {
|
|
310
|
-
const lockKey = `deployment:lock:${deploymentName}`;
|
|
311
|
-
const redisClient = this.queue.getRedisClient();
|
|
312
|
-
// SET NX with 60s TTL - standard Redis distributed lock
|
|
313
|
-
const result = await redisClient.set(lockKey, "1", "EX", 60, "NX");
|
|
314
|
-
return result === "OK";
|
|
315
|
-
}
|
|
316
|
-
|
|
317
|
-
private async releaseDeploymentLock(deploymentName: string): Promise<void> {
|
|
318
|
-
const lockKey = `deployment:lock:${deploymentName}`;
|
|
319
|
-
const redisClient = this.queue.getRedisClient();
|
|
320
|
-
await redisClient.del(lockKey);
|
|
321
|
-
}
|
|
322
|
-
|
|
323
|
-
/**
|
|
324
|
-
* Ensure worker deployment exists for a thread
|
|
325
|
-
* Uses shared retry utility with linear backoff + jitter
|
|
326
|
-
* Uses Redis lock to prevent concurrent duplicate deployment creation
|
|
327
|
-
*/
|
|
328
|
-
private async ensureWorkerExists(
|
|
329
|
-
deploymentName: string,
|
|
330
|
-
data: MessagePayload,
|
|
331
|
-
conversationId: string,
|
|
332
|
-
traceId: string,
|
|
333
|
-
traceparent?: string
|
|
334
|
-
): Promise<void> {
|
|
335
|
-
return retryWithBackoff(
|
|
336
|
-
async () => {
|
|
337
|
-
// Ensure traceparent is in platformMetadata for worker deployment
|
|
338
|
-
const dataWithTrace: MessagePayload = {
|
|
339
|
-
...data,
|
|
340
|
-
platformMetadata: {
|
|
341
|
-
...data.platformMetadata,
|
|
342
|
-
traceparent: traceparent || data.platformMetadata?.traceparent,
|
|
343
|
-
},
|
|
344
|
-
};
|
|
345
|
-
|
|
346
|
-
// Check if this is truly a new thread by looking for existing deployment
|
|
347
|
-
const existingDeployments =
|
|
348
|
-
await this.deploymentManager.listDeployments();
|
|
349
|
-
const isNewThread = !existingDeployments.some(
|
|
350
|
-
(d) => d.deploymentName === deploymentName
|
|
351
|
-
);
|
|
352
|
-
|
|
353
|
-
if (isNewThread) {
|
|
354
|
-
// Acquire lock to prevent concurrent deployment creation
|
|
355
|
-
const acquired = await this.acquireDeploymentLock(deploymentName);
|
|
356
|
-
if (!acquired) {
|
|
357
|
-
logger.info(
|
|
358
|
-
{ traceId, deploymentName },
|
|
359
|
-
"Another process is creating this deployment, waiting"
|
|
360
|
-
);
|
|
361
|
-
// Wait briefly and re-check - the other process should finish soon
|
|
362
|
-
await new Promise((r) => setTimeout(r, 3000));
|
|
363
|
-
// Verify it was created
|
|
364
|
-
const rechecked = await this.deploymentManager.listDeployments();
|
|
365
|
-
if (rechecked.some((d) => d.deploymentName === deploymentName)) {
|
|
366
|
-
await this.deploymentManager.scaleDeployment(deploymentName, 1);
|
|
367
|
-
logger.info(
|
|
368
|
-
{ traceId, deploymentName },
|
|
369
|
-
"Deployment created by other process, scaled up"
|
|
370
|
-
);
|
|
371
|
-
await this.deploymentManager.updateDeploymentActivity(
|
|
372
|
-
deploymentName
|
|
373
|
-
);
|
|
374
|
-
return;
|
|
375
|
-
}
|
|
376
|
-
throw new Error("Deployment lock held but deployment not created");
|
|
377
|
-
}
|
|
378
|
-
|
|
379
|
-
try {
|
|
380
|
-
// Re-check after acquiring lock - another process may have created
|
|
381
|
-
// the deployment between our initial check and lock acquisition
|
|
382
|
-
const recheckAfterLock =
|
|
383
|
-
await this.deploymentManager.listDeployments();
|
|
384
|
-
if (
|
|
385
|
-
recheckAfterLock.some((d) => d.deploymentName === deploymentName)
|
|
386
|
-
) {
|
|
387
|
-
logger.info(
|
|
388
|
-
{ traceId, deploymentName },
|
|
389
|
-
"Deployment already created by another process after lock acquired"
|
|
390
|
-
);
|
|
391
|
-
await this.deploymentManager.scaleDeployment(deploymentName, 1);
|
|
392
|
-
await this.deploymentManager.updateDeploymentActivity(
|
|
393
|
-
deploymentName
|
|
394
|
-
);
|
|
395
|
-
return;
|
|
396
|
-
}
|
|
397
|
-
|
|
398
|
-
logger.info(
|
|
399
|
-
{ traceId, traceparent, conversationId, deploymentName },
|
|
400
|
-
"New thread - creating deployment"
|
|
401
|
-
);
|
|
402
|
-
await this.deploymentManager.createWorkerDeployment(
|
|
403
|
-
data.userId,
|
|
404
|
-
conversationId,
|
|
405
|
-
dataWithTrace,
|
|
406
|
-
recheckAfterLock
|
|
407
|
-
);
|
|
408
|
-
logger.info({ traceId, deploymentName }, "Created deployment");
|
|
409
|
-
} finally {
|
|
410
|
-
try {
|
|
411
|
-
await this.releaseDeploymentLock(deploymentName);
|
|
412
|
-
} catch (releaseError) {
|
|
413
|
-
logger.error(
|
|
414
|
-
{
|
|
415
|
-
deploymentName,
|
|
416
|
-
error:
|
|
417
|
-
releaseError instanceof Error
|
|
418
|
-
? releaseError.message
|
|
419
|
-
: String(releaseError),
|
|
420
|
-
},
|
|
421
|
-
"CRITICAL: Failed to release deployment lock, attempting emergency Redis key deletion"
|
|
422
|
-
);
|
|
423
|
-
try {
|
|
424
|
-
const lockKey = `deployment:lock:${deploymentName}`;
|
|
425
|
-
const redisClient = this.queue.getRedisClient();
|
|
426
|
-
await redisClient.del(lockKey);
|
|
427
|
-
logger.info(
|
|
428
|
-
{ deploymentName },
|
|
429
|
-
"Emergency lock cleanup succeeded"
|
|
430
|
-
);
|
|
431
|
-
} catch (emergencyError) {
|
|
432
|
-
logger.error(
|
|
433
|
-
{
|
|
434
|
-
deploymentName,
|
|
435
|
-
error:
|
|
436
|
-
emergencyError instanceof Error
|
|
437
|
-
? emergencyError.message
|
|
438
|
-
: String(emergencyError),
|
|
439
|
-
},
|
|
440
|
-
"CRITICAL: Emergency lock cleanup also failed, lock will expire via TTL"
|
|
441
|
-
);
|
|
442
|
-
}
|
|
443
|
-
}
|
|
444
|
-
}
|
|
445
|
-
} else {
|
|
446
|
-
logger.info(
|
|
447
|
-
{ traceId, conversationId, deploymentName },
|
|
448
|
-
"Existing thread - ensuring worker exists"
|
|
449
|
-
);
|
|
450
|
-
// Sync network config domains to grant store (picks up settings changes)
|
|
451
|
-
await this.deploymentManager.syncNetworkConfigGrants(dataWithTrace);
|
|
452
|
-
try {
|
|
453
|
-
await this.deploymentManager.scaleDeployment(deploymentName, 1);
|
|
454
|
-
logger.info(
|
|
455
|
-
{ traceId, deploymentName },
|
|
456
|
-
"Scaled existing worker to 1"
|
|
457
|
-
);
|
|
458
|
-
} catch {
|
|
459
|
-
logger.info(
|
|
460
|
-
{ traceId, conversationId, deploymentName },
|
|
461
|
-
"Worker doesn't exist, creating it"
|
|
462
|
-
);
|
|
463
|
-
await this.deploymentManager.createWorkerDeployment(
|
|
464
|
-
data.userId,
|
|
465
|
-
conversationId,
|
|
466
|
-
dataWithTrace
|
|
467
|
-
);
|
|
468
|
-
logger.info({ traceId, deploymentName }, "Created worker");
|
|
469
|
-
}
|
|
470
|
-
}
|
|
471
|
-
|
|
472
|
-
// Update deployment activity annotation for simplified tracking
|
|
473
|
-
await this.deploymentManager.updateDeploymentActivity(deploymentName);
|
|
474
|
-
|
|
475
|
-
logger.info({ traceId, deploymentName }, "Worker is ready");
|
|
476
|
-
},
|
|
477
|
-
{
|
|
478
|
-
maxRetries: 3,
|
|
479
|
-
baseDelay: 2000,
|
|
480
|
-
strategy: "linear",
|
|
481
|
-
jitter: true,
|
|
482
|
-
onRetry: (attempt, error) => {
|
|
483
|
-
logger.warn(
|
|
484
|
-
{ traceId, deploymentName, attempt, maxAttempts: 3 },
|
|
485
|
-
`Retry attempt failed: ${error.message}`
|
|
486
|
-
);
|
|
487
|
-
},
|
|
488
|
-
}
|
|
489
|
-
);
|
|
490
|
-
}
|
|
491
|
-
|
|
492
|
-
/**
|
|
493
|
-
* Track failed deployment creation for monitoring and potential recovery.
|
|
494
|
-
* Also sends an error response to the user via the thread_response queue.
|
|
495
|
-
*/
|
|
496
|
-
private async trackFailedDeployment(
|
|
497
|
-
deploymentName: string,
|
|
498
|
-
data: MessagePayload,
|
|
499
|
-
error: unknown
|
|
500
|
-
): Promise<void> {
|
|
501
|
-
try {
|
|
502
|
-
const failureKey = `deployment:failed:${deploymentName}`;
|
|
503
|
-
const failureData = {
|
|
504
|
-
deploymentName,
|
|
505
|
-
userId: data.userId,
|
|
506
|
-
conversationId: data.conversationId,
|
|
507
|
-
error: error instanceof Error ? error.message : String(error),
|
|
508
|
-
stack: error instanceof Error ? error.stack : undefined,
|
|
509
|
-
timestamp: new Date().toISOString(),
|
|
510
|
-
queueName: `thread_message_${deploymentName}`,
|
|
511
|
-
};
|
|
512
|
-
|
|
513
|
-
// Store in Redis with 24h TTL for monitoring dashboards
|
|
514
|
-
// This allows ops to detect stuck queues and manually intervene
|
|
515
|
-
const redisClient = this.queue.getRedisClient();
|
|
516
|
-
await redisClient.setex(
|
|
517
|
-
failureKey,
|
|
518
|
-
86400, // 24 hours
|
|
519
|
-
JSON.stringify(failureData)
|
|
520
|
-
);
|
|
521
|
-
|
|
522
|
-
logger.info(
|
|
523
|
-
`Tracked deployment failure in Redis: ${failureKey} (TTL: 24h)`
|
|
524
|
-
);
|
|
525
|
-
|
|
526
|
-
const failureReason =
|
|
527
|
-
error instanceof Error ? error.message : String(error);
|
|
528
|
-
const isImagePullFailure =
|
|
529
|
-
/ImagePullBackOff|ErrImagePull|image pull/i.test(failureReason);
|
|
530
|
-
const userMessage = isImagePullFailure
|
|
531
|
-
? "Worker startup failed due to a Kubernetes image pull error. Please retry after the deployment image/registry configuration is fixed."
|
|
532
|
-
: "Worker startup failed and your request could not be processed. Please retry in a moment.";
|
|
533
|
-
|
|
534
|
-
// Notify user that their message could not be processed
|
|
535
|
-
try {
|
|
536
|
-
const responseQueue = "thread_response";
|
|
537
|
-
await this.queue.createQueue(responseQueue);
|
|
538
|
-
await this.queue.send(responseQueue, {
|
|
539
|
-
messageId: data.messageId,
|
|
540
|
-
userId: data.userId,
|
|
541
|
-
channelId: data.channelId,
|
|
542
|
-
conversationId: data.conversationId,
|
|
543
|
-
platform: data.platform,
|
|
544
|
-
platformMetadata: data.platformMetadata,
|
|
545
|
-
content: userMessage,
|
|
546
|
-
processedMessageIds: [data.messageId],
|
|
547
|
-
});
|
|
548
|
-
} catch (notifyError) {
|
|
549
|
-
logger.error("Failed to send error notification to user:", notifyError);
|
|
550
|
-
}
|
|
551
|
-
} catch (trackError) {
|
|
552
|
-
// Don't fail the main flow if tracking fails
|
|
553
|
-
logger.error("Failed to track deployment failure:", trackError);
|
|
554
|
-
}
|
|
555
|
-
}
|
|
556
|
-
|
|
557
|
-
/**
|
|
558
|
-
* Get queue statistics
|
|
559
|
-
*/
|
|
560
|
-
async getQueueStats(): Promise<{
|
|
561
|
-
messages?: {
|
|
562
|
-
waiting: number;
|
|
563
|
-
active: number;
|
|
564
|
-
completed: number;
|
|
565
|
-
failed: number;
|
|
566
|
-
};
|
|
567
|
-
isRunning: boolean;
|
|
568
|
-
error?: string;
|
|
569
|
-
}> {
|
|
570
|
-
try {
|
|
571
|
-
const stats = await this.queue.getQueueStats("messages");
|
|
572
|
-
return {
|
|
573
|
-
messages: stats,
|
|
574
|
-
isRunning: this.isRunning,
|
|
575
|
-
};
|
|
576
|
-
} catch (error) {
|
|
577
|
-
logger.error("Failed to get queue stats:", error);
|
|
578
|
-
return {
|
|
579
|
-
isRunning: this.isRunning,
|
|
580
|
-
error: error instanceof Error ? error.message : String(error),
|
|
581
|
-
};
|
|
582
|
-
}
|
|
583
|
-
}
|
|
584
|
-
}
|