@lobu/gateway 3.0.5 → 3.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (175) hide show
  1. package/package.json +2 -2
  2. package/src/__tests__/agent-config-routes.test.ts +254 -0
  3. package/src/__tests__/agent-history-routes.test.ts +72 -0
  4. package/src/__tests__/agent-routes.test.ts +68 -0
  5. package/src/__tests__/agent-schedules-routes.test.ts +59 -0
  6. package/src/__tests__/agent-settings-store.test.ts +323 -0
  7. package/src/__tests__/chat-instance-manager-slack.test.ts +204 -0
  8. package/src/__tests__/chat-response-bridge.test.ts +131 -0
  9. package/src/__tests__/config-memory-plugins.test.ts +92 -0
  10. package/src/__tests__/config-request-store.test.ts +127 -0
  11. package/src/__tests__/connection-routes.test.ts +144 -0
  12. package/src/__tests__/core-services-store-selection.test.ts +92 -0
  13. package/src/__tests__/docker-deployment.test.ts +1211 -0
  14. package/src/__tests__/embedded-deployment.test.ts +342 -0
  15. package/src/__tests__/grant-store.test.ts +148 -0
  16. package/src/__tests__/http-proxy.test.ts +281 -0
  17. package/src/__tests__/instruction-service.test.ts +37 -0
  18. package/src/__tests__/link-buttons.test.ts +112 -0
  19. package/src/__tests__/lobu.test.ts +32 -0
  20. package/src/__tests__/mcp-config-service.test.ts +347 -0
  21. package/src/__tests__/mcp-proxy.test.ts +696 -0
  22. package/src/__tests__/message-handler-bridge.test.ts +17 -0
  23. package/src/__tests__/model-selection.test.ts +172 -0
  24. package/src/__tests__/oauth-templates.test.ts +39 -0
  25. package/src/__tests__/platform-adapter-slack-send.test.ts +114 -0
  26. package/src/__tests__/platform-helpers-model-resolution.test.ts +253 -0
  27. package/src/__tests__/provider-inheritance.test.ts +212 -0
  28. package/src/__tests__/routes/cli-auth.test.ts +337 -0
  29. package/src/__tests__/routes/interactions.test.ts +121 -0
  30. package/src/__tests__/secret-proxy.test.ts +85 -0
  31. package/src/__tests__/session-manager.test.ts +572 -0
  32. package/src/__tests__/setup.ts +133 -0
  33. package/src/__tests__/skill-and-mcp-registry.test.ts +203 -0
  34. package/src/__tests__/slack-routes.test.ts +161 -0
  35. package/src/__tests__/system-config-resolver.test.ts +75 -0
  36. package/src/__tests__/system-message-limiter.test.ts +89 -0
  37. package/src/__tests__/system-skills-service.test.ts +362 -0
  38. package/src/__tests__/transcription-service.test.ts +222 -0
  39. package/src/__tests__/utils/rate-limiter.test.ts +102 -0
  40. package/src/__tests__/worker-connection-manager.test.ts +497 -0
  41. package/src/__tests__/worker-job-router.test.ts +722 -0
  42. package/src/api/index.ts +1 -0
  43. package/src/api/platform.ts +292 -0
  44. package/src/api/response-renderer.ts +157 -0
  45. package/src/auth/agent-metadata-store.ts +168 -0
  46. package/src/auth/api-auth-middleware.ts +69 -0
  47. package/src/auth/api-key-provider-module.ts +213 -0
  48. package/src/auth/base-provider-module.ts +201 -0
  49. package/src/auth/chatgpt/chatgpt-oauth-module.ts +185 -0
  50. package/src/auth/chatgpt/device-code-client.ts +218 -0
  51. package/src/auth/chatgpt/index.ts +1 -0
  52. package/src/auth/claude/oauth-module.ts +280 -0
  53. package/src/auth/cli/token-service.ts +249 -0
  54. package/src/auth/external/client.ts +560 -0
  55. package/src/auth/external/device-code-client.ts +225 -0
  56. package/src/auth/mcp/config-service.ts +392 -0
  57. package/src/auth/mcp/proxy.ts +1088 -0
  58. package/src/auth/mcp/string-substitution.ts +17 -0
  59. package/src/auth/mcp/tool-cache.ts +90 -0
  60. package/src/auth/oauth/base-client.ts +267 -0
  61. package/src/auth/oauth/client.ts +153 -0
  62. package/src/auth/oauth/credentials.ts +7 -0
  63. package/src/auth/oauth/providers.ts +69 -0
  64. package/src/auth/oauth/state-store.ts +150 -0
  65. package/src/auth/oauth-templates.ts +179 -0
  66. package/src/auth/provider-catalog.ts +220 -0
  67. package/src/auth/provider-model-options.ts +41 -0
  68. package/src/auth/settings/agent-settings-store.ts +565 -0
  69. package/src/auth/settings/auth-profiles-manager.ts +216 -0
  70. package/src/auth/settings/index.ts +12 -0
  71. package/src/auth/settings/model-preference-store.ts +52 -0
  72. package/src/auth/settings/model-selection.ts +135 -0
  73. package/src/auth/settings/resolved-settings-view.ts +298 -0
  74. package/src/auth/settings/template-utils.ts +44 -0
  75. package/src/auth/settings/token-service.ts +88 -0
  76. package/src/auth/system-env-store.ts +98 -0
  77. package/src/auth/user-agents-store.ts +68 -0
  78. package/src/channels/binding-service.ts +214 -0
  79. package/src/channels/index.ts +4 -0
  80. package/src/cli/gateway.ts +1304 -0
  81. package/src/cli/index.ts +74 -0
  82. package/src/commands/built-in-commands.ts +80 -0
  83. package/src/commands/command-dispatcher.ts +94 -0
  84. package/src/commands/command-reply-adapters.ts +27 -0
  85. package/src/config/file-loader.ts +618 -0
  86. package/src/config/index.ts +588 -0
  87. package/src/config/network-allowlist.ts +71 -0
  88. package/src/connections/chat-instance-manager.ts +1284 -0
  89. package/src/connections/chat-response-bridge.ts +618 -0
  90. package/src/connections/index.ts +7 -0
  91. package/src/connections/interaction-bridge.ts +831 -0
  92. package/src/connections/message-handler-bridge.ts +415 -0
  93. package/src/connections/platform-auth-methods.ts +15 -0
  94. package/src/connections/types.ts +84 -0
  95. package/src/gateway/connection-manager.ts +291 -0
  96. package/src/gateway/index.ts +700 -0
  97. package/src/gateway/job-router.ts +201 -0
  98. package/src/gateway-main.ts +200 -0
  99. package/src/index.ts +41 -0
  100. package/src/infrastructure/queue/index.ts +12 -0
  101. package/src/infrastructure/queue/queue-producer.ts +148 -0
  102. package/src/infrastructure/queue/redis-queue.ts +361 -0
  103. package/src/infrastructure/queue/types.ts +133 -0
  104. package/src/infrastructure/redis/system-message-limiter.ts +94 -0
  105. package/src/interactions/config-request-store.ts +198 -0
  106. package/src/interactions.ts +363 -0
  107. package/src/lobu.ts +311 -0
  108. package/src/metrics/prometheus.ts +159 -0
  109. package/src/modules/module-system.ts +179 -0
  110. package/src/orchestration/base-deployment-manager.ts +900 -0
  111. package/src/orchestration/deployment-utils.ts +98 -0
  112. package/src/orchestration/impl/docker-deployment.ts +620 -0
  113. package/src/orchestration/impl/embedded-deployment.ts +268 -0
  114. package/src/orchestration/impl/index.ts +8 -0
  115. package/src/orchestration/impl/k8s/deployment.ts +1061 -0
  116. package/src/orchestration/impl/k8s/helpers.ts +610 -0
  117. package/src/orchestration/impl/k8s/index.ts +1 -0
  118. package/src/orchestration/index.ts +333 -0
  119. package/src/orchestration/message-consumer.ts +584 -0
  120. package/src/orchestration/scheduled-wakeup.ts +704 -0
  121. package/src/permissions/approval-policy.ts +36 -0
  122. package/src/permissions/grant-store.ts +219 -0
  123. package/src/platform/file-handler.ts +66 -0
  124. package/src/platform/link-buttons.ts +57 -0
  125. package/src/platform/renderer-utils.ts +44 -0
  126. package/src/platform/response-renderer.ts +84 -0
  127. package/src/platform/unified-thread-consumer.ts +187 -0
  128. package/src/platform.ts +318 -0
  129. package/src/proxy/http-proxy.ts +752 -0
  130. package/src/proxy/proxy-manager.ts +81 -0
  131. package/src/proxy/secret-proxy.ts +402 -0
  132. package/src/proxy/token-refresh-job.ts +143 -0
  133. package/src/routes/internal/audio.ts +141 -0
  134. package/src/routes/internal/device-auth.ts +566 -0
  135. package/src/routes/internal/files.ts +226 -0
  136. package/src/routes/internal/history.ts +69 -0
  137. package/src/routes/internal/images.ts +127 -0
  138. package/src/routes/internal/interactions.ts +84 -0
  139. package/src/routes/internal/middleware.ts +23 -0
  140. package/src/routes/internal/schedule.ts +226 -0
  141. package/src/routes/internal/types.ts +22 -0
  142. package/src/routes/openapi-auto.ts +239 -0
  143. package/src/routes/public/agent-access.ts +23 -0
  144. package/src/routes/public/agent-config.ts +675 -0
  145. package/src/routes/public/agent-history.ts +422 -0
  146. package/src/routes/public/agent-schedules.ts +296 -0
  147. package/src/routes/public/agent.ts +1086 -0
  148. package/src/routes/public/agents.ts +373 -0
  149. package/src/routes/public/channels.ts +191 -0
  150. package/src/routes/public/cli-auth.ts +883 -0
  151. package/src/routes/public/connections.ts +574 -0
  152. package/src/routes/public/landing.ts +16 -0
  153. package/src/routes/public/oauth.ts +147 -0
  154. package/src/routes/public/settings-auth.ts +104 -0
  155. package/src/routes/public/slack.ts +173 -0
  156. package/src/routes/shared/agent-ownership.ts +101 -0
  157. package/src/routes/shared/token-verifier.ts +34 -0
  158. package/src/services/core-services.ts +1053 -0
  159. package/src/services/image-generation-service.ts +257 -0
  160. package/src/services/instruction-service.ts +318 -0
  161. package/src/services/mcp-registry.ts +94 -0
  162. package/src/services/platform-helpers.ts +287 -0
  163. package/src/services/session-manager.ts +262 -0
  164. package/src/services/settings-resolver.ts +74 -0
  165. package/src/services/system-config-resolver.ts +90 -0
  166. package/src/services/system-skills-service.ts +229 -0
  167. package/src/services/transcription-service.ts +684 -0
  168. package/src/session.ts +110 -0
  169. package/src/spaces/index.ts +1 -0
  170. package/src/spaces/space-resolver.ts +17 -0
  171. package/src/stores/in-memory-agent-store.ts +403 -0
  172. package/src/stores/redis-agent-store.ts +279 -0
  173. package/src/utils/public-url.ts +44 -0
  174. package/src/utils/rate-limiter.ts +94 -0
  175. package/tsconfig.json +33 -0
@@ -0,0 +1,610 @@
1
+ import type * as k8s from "@kubernetes/client-node";
2
+ import {
3
+ createChildSpan,
4
+ createLogger,
5
+ ErrorCode,
6
+ OrchestratorError,
7
+ SpanStatusCode,
8
+ } from "@lobu/core";
9
+ import { BASE_WORKER_LABELS } from "../../deployment-utils";
10
+ import {
11
+ IMAGE_PULL_FAILURE_REASONS,
12
+ LOBU_FINALIZER,
13
+ WORKER_SECURITY,
14
+ } from "./deployment";
15
+
16
+ const logger = createLogger("k8s-deployment");
17
+
18
+ /**
19
+ * Run a short-lived preflight pod to verify the worker image can be pulled.
20
+ */
21
+ export async function runImagePullPreflight(
22
+ coreV1Api: k8s.CoreV1Api,
23
+ namespace: string,
24
+ imageName: string,
25
+ pullPolicy: string,
26
+ serviceAccountName: string,
27
+ imagePullSecrets: Array<{ name: string }> | undefined
28
+ ): Promise<void> {
29
+ const podName = `lobu-worker-image-preflight-${Date.now().toString(36)}`;
30
+ const timeoutMs = 45_000;
31
+ const startMs = Date.now();
32
+
33
+ const pod: k8s.V1Pod = {
34
+ apiVersion: "v1",
35
+ kind: "Pod",
36
+ metadata: {
37
+ name: podName,
38
+ namespace,
39
+ labels: {
40
+ "app.kubernetes.io/name": "lobu",
41
+ "app.kubernetes.io/component": "worker-image-preflight",
42
+ "lobu/managed-by": "orchestrator",
43
+ },
44
+ },
45
+ spec: {
46
+ restartPolicy: "Never",
47
+ serviceAccountName,
48
+ imagePullSecrets,
49
+ containers: [
50
+ {
51
+ name: "preflight",
52
+ image: imageName,
53
+ imagePullPolicy: pullPolicy,
54
+ command: ["/bin/sh", "-lc", "echo preflight"],
55
+ securityContext: {
56
+ runAsUser: WORKER_SECURITY.USER_ID,
57
+ runAsGroup: WORKER_SECURITY.GROUP_ID,
58
+ runAsNonRoot: true,
59
+ readOnlyRootFilesystem: true,
60
+ allowPrivilegeEscalation: false,
61
+ capabilities: { drop: ["ALL"] },
62
+ },
63
+ },
64
+ ],
65
+ },
66
+ };
67
+
68
+ try {
69
+ await coreV1Api.createNamespacedPod(namespace, pod);
70
+
71
+ while (Date.now() - startMs < timeoutMs) {
72
+ const podResp = await coreV1Api.readNamespacedPod(podName, namespace);
73
+ const podBody = (podResp as { body?: k8s.V1Pod }).body;
74
+ const status = podBody?.status;
75
+ const containerStatus = status?.containerStatuses?.find(
76
+ (c) => c.name === "preflight"
77
+ );
78
+ const waiting = containerStatus?.state?.waiting;
79
+
80
+ if (waiting?.reason && IMAGE_PULL_FAILURE_REASONS.has(waiting.reason)) {
81
+ throw new OrchestratorError(
82
+ ErrorCode.DEPLOYMENT_CREATE_FAILED,
83
+ `Worker image preflight failed (${waiting.reason}): ${waiting.message || "image pull failed"}`,
84
+ { imageName, waitingReason: waiting.reason },
85
+ true
86
+ );
87
+ }
88
+
89
+ if (
90
+ containerStatus?.state?.running ||
91
+ containerStatus?.state?.terminated
92
+ ) {
93
+ logger.info(`✅ Worker image preflight passed: ${imageName}`);
94
+ return;
95
+ }
96
+
97
+ if (status?.phase === "Running" || status?.phase === "Succeeded") {
98
+ logger.info(`✅ Worker image preflight passed: ${imageName}`);
99
+ return;
100
+ }
101
+
102
+ await new Promise((resolve) => setTimeout(resolve, 1500));
103
+ }
104
+
105
+ throw new OrchestratorError(
106
+ ErrorCode.DEPLOYMENT_CREATE_FAILED,
107
+ `Timed out validating worker image pullability: ${imageName}`,
108
+ { imageName, timeoutMs },
109
+ true
110
+ );
111
+ } catch (error) {
112
+ const k8sError = error as { statusCode?: number; message?: string };
113
+ if (k8sError.statusCode === 403) {
114
+ logger.warn(
115
+ `⚠️ Skipping worker image preflight due to RBAC restrictions (cannot create pods): ${k8sError.message || "forbidden"}`
116
+ );
117
+ return;
118
+ }
119
+ throw error;
120
+ } finally {
121
+ try {
122
+ await coreV1Api.deleteNamespacedPod(
123
+ podName,
124
+ namespace,
125
+ undefined,
126
+ undefined,
127
+ 0
128
+ );
129
+ } catch (error) {
130
+ const k8sError = error as { statusCode?: number };
131
+ if (k8sError.statusCode !== 404) {
132
+ logger.warn(
133
+ `Failed to delete preflight pod ${podName}: ${error instanceof Error ? error.message : String(error)}`
134
+ );
135
+ }
136
+ }
137
+ }
138
+ }
139
+
140
+ /**
141
+ * Reconcile all existing worker deployments to match the desired image,
142
+ * pull policy, service account, and image pull secrets.
143
+ */
144
+ export async function reconcileWorkerDeploymentImages(
145
+ appsV1Api: k8s.AppsV1Api,
146
+ namespace: string,
147
+ desiredImage: string,
148
+ desiredPullPolicy: string,
149
+ desiredServiceAccount: string,
150
+ desiredImagePullSecrets: Array<{ name: string }> | undefined,
151
+ listRawWorkerDeployments: () => Promise<k8s.V1Deployment[]>
152
+ ): Promise<void> {
153
+ try {
154
+ const deployments = await listRawWorkerDeployments();
155
+ let patchedCount = 0;
156
+
157
+ for (const deployment of deployments) {
158
+ const deploymentName = deployment.metadata?.name;
159
+ if (!deploymentName) continue;
160
+
161
+ const templateSpec = deployment.spec?.template.spec;
162
+ const workerContainer = templateSpec?.containers?.find(
163
+ (container) => container.name === "worker"
164
+ );
165
+ if (!workerContainer) continue;
166
+
167
+ const initContainer = templateSpec?.initContainers?.find(
168
+ (container) => container.name === "nix-bootstrap"
169
+ );
170
+ const currentSecrets = (templateSpec?.imagePullSecrets || [])
171
+ .map((secret) => secret.name || "")
172
+ .filter(Boolean)
173
+ .sort();
174
+ const desiredSecrets = (desiredImagePullSecrets || [])
175
+ .map((secret) => secret.name)
176
+ .sort();
177
+ const secretsMatch =
178
+ currentSecrets.length === desiredSecrets.length &&
179
+ currentSecrets.every(
180
+ (secret, index) => secret === desiredSecrets[index]
181
+ );
182
+
183
+ const needsPatch =
184
+ workerContainer.image !== desiredImage ||
185
+ workerContainer.imagePullPolicy !== desiredPullPolicy ||
186
+ (initContainer ? initContainer.image !== desiredImage : false) ||
187
+ templateSpec?.serviceAccountName !== desiredServiceAccount ||
188
+ !secretsMatch;
189
+
190
+ if (!needsPatch) continue;
191
+
192
+ const patch: Record<string, unknown> = {
193
+ spec: {
194
+ template: {
195
+ spec: {
196
+ serviceAccountName: desiredServiceAccount,
197
+ imagePullSecrets: desiredImagePullSecrets || null,
198
+ containers: [
199
+ {
200
+ name: "worker",
201
+ image: desiredImage,
202
+ imagePullPolicy: desiredPullPolicy,
203
+ },
204
+ ],
205
+ },
206
+ },
207
+ },
208
+ };
209
+
210
+ if (initContainer) {
211
+ (
212
+ patch.spec as {
213
+ template: { spec: Record<string, unknown> };
214
+ }
215
+ ).template.spec.initContainers = [
216
+ {
217
+ name: "nix-bootstrap",
218
+ image: desiredImage,
219
+ imagePullPolicy: desiredPullPolicy,
220
+ },
221
+ ];
222
+ }
223
+
224
+ await appsV1Api.patchNamespacedDeployment(
225
+ deploymentName,
226
+ namespace,
227
+ patch,
228
+ undefined,
229
+ undefined,
230
+ undefined,
231
+ undefined,
232
+ undefined,
233
+ {
234
+ headers: {
235
+ "Content-Type": "application/strategic-merge-patch+json",
236
+ },
237
+ }
238
+ );
239
+
240
+ patchedCount += 1;
241
+ logger.info(
242
+ `🔁 Reconciled worker deployment image for ${deploymentName} -> ${desiredImage}`
243
+ );
244
+ }
245
+
246
+ if (patchedCount > 0) {
247
+ logger.info(
248
+ `✅ Reconciled ${patchedCount} worker deployment(s) to image ${desiredImage}`
249
+ );
250
+ }
251
+ } catch (error) {
252
+ logger.warn(
253
+ `Failed to reconcile worker deployment images: ${error instanceof Error ? error.message : String(error)}`
254
+ );
255
+ }
256
+ }
257
+
258
+ /**
259
+ * Create a PersistentVolumeClaim for a space.
260
+ * Multiple threads in the same space share the same PVC.
261
+ */
262
+ export async function createPVC(
263
+ coreV1Api: k8s.CoreV1Api,
264
+ namespace: string,
265
+ pvcName: string,
266
+ agentId: string,
267
+ storageClass: string | undefined,
268
+ traceparent?: string,
269
+ sizeOverride?: string,
270
+ defaultSize?: string
271
+ ): Promise<void> {
272
+ const pvcSize = sizeOverride || defaultSize || "1Gi";
273
+ const pvc = {
274
+ apiVersion: "v1",
275
+ kind: "PersistentVolumeClaim",
276
+ metadata: {
277
+ name: pvcName,
278
+ namespace,
279
+ labels: {
280
+ ...BASE_WORKER_LABELS,
281
+ "app.kubernetes.io/component": "worker-storage",
282
+ "lobu.io/agent-id": agentId,
283
+ },
284
+ finalizers: [LOBU_FINALIZER],
285
+ },
286
+ spec: {
287
+ accessModes: ["ReadWriteOnce"],
288
+ resources: {
289
+ requests: {
290
+ storage: pvcSize,
291
+ },
292
+ },
293
+ ...(storageClass ? { storageClassName: storageClass } : {}),
294
+ },
295
+ };
296
+
297
+ // Create child span for PVC setup (linked to parent via traceparent)
298
+ const span = createChildSpan("pvc_setup", traceparent, {
299
+ "lobu.pvc_name": pvcName,
300
+ "lobu.agent_id": agentId,
301
+ "lobu.pvc_size": pvcSize,
302
+ });
303
+
304
+ logger.info({ traceparent, pvcName, agentId, size: pvcSize }, "Creating PVC");
305
+
306
+ try {
307
+ await coreV1Api.createNamespacedPersistentVolumeClaim(namespace, pvc);
308
+ span?.setStatus({ code: SpanStatusCode.OK });
309
+ span?.end();
310
+ logger.info({ pvcName }, "Created PVC");
311
+ } catch (error) {
312
+ const k8sError = error as {
313
+ statusCode?: number;
314
+ body?: unknown;
315
+ message?: string;
316
+ };
317
+ logger.error(`PVC creation error for ${pvcName}:`, {
318
+ statusCode: k8sError.statusCode,
319
+ message: k8sError.message,
320
+ body: k8sError.body,
321
+ });
322
+ if (k8sError.statusCode === 409) {
323
+ span?.setAttribute("lobu.pvc_exists", true);
324
+ span?.setStatus({ code: SpanStatusCode.OK });
325
+ span?.end();
326
+ logger.info(`PVC ${pvcName} already exists (reusing)`);
327
+ } else {
328
+ span?.setStatus({
329
+ code: SpanStatusCode.ERROR,
330
+ message: k8sError.message || "PVC creation failed",
331
+ });
332
+ span?.end();
333
+ throw error;
334
+ }
335
+ }
336
+ }
337
+
338
+ /**
339
+ * List pods belonging to a given deployment by matching owner references.
340
+ */
341
+ async function listDeploymentPods(
342
+ coreV1Api: k8s.CoreV1Api,
343
+ namespace: string,
344
+ deploymentName: string
345
+ ): Promise<k8s.V1Pod[]> {
346
+ const pods = await coreV1Api.listNamespacedPod(
347
+ namespace,
348
+ undefined,
349
+ undefined,
350
+ undefined,
351
+ undefined,
352
+ "app.kubernetes.io/component=worker"
353
+ );
354
+
355
+ const podItems = (
356
+ (pods as { body?: { items?: k8s.V1Pod[] } }).body?.items || []
357
+ ).filter((pod) =>
358
+ (pod.metadata?.ownerReferences || []).some(
359
+ (owner) =>
360
+ owner.kind === "ReplicaSet" &&
361
+ owner.name?.startsWith(`${deploymentName}-`)
362
+ )
363
+ );
364
+
365
+ return podItems;
366
+ }
367
+
368
+ /**
369
+ * Get a failure message for a pod by inspecting its events.
370
+ */
371
+ async function getPodFailureMessage(
372
+ coreV1Api: k8s.CoreV1Api,
373
+ namespace: string,
374
+ podName: string
375
+ ): Promise<string> {
376
+ try {
377
+ const events = await coreV1Api.listNamespacedEvent(
378
+ namespace,
379
+ undefined,
380
+ undefined,
381
+ undefined,
382
+ `involvedObject.name=${podName}`
383
+ );
384
+ const items = (events as { body?: { items?: k8s.CoreV1Event[] } }).body
385
+ ?.items;
386
+ const latest = items
387
+ ?.filter((event) =>
388
+ ["Failed", "BackOff", "ErrImagePull", "ImagePullBackOff"].includes(
389
+ event.reason || ""
390
+ )
391
+ )
392
+ .sort(
393
+ (a, b) =>
394
+ new Date(
395
+ b.lastTimestamp || b.eventTime || b.metadata?.creationTimestamp || 0
396
+ ).getTime() -
397
+ new Date(
398
+ a.lastTimestamp || a.eventTime || a.metadata?.creationTimestamp || 0
399
+ ).getTime()
400
+ )[0];
401
+
402
+ if (latest?.message) {
403
+ return latest.message;
404
+ }
405
+ } catch {
406
+ // Ignore event lookup failures (RBAC/compat).
407
+ }
408
+
409
+ return "";
410
+ }
411
+
412
+ /**
413
+ * Wait for a worker deployment to have at least one available replica.
414
+ * Detects image pull failures early and throws.
415
+ */
416
+ export async function waitForWorkerReady(
417
+ appsV1Api: k8s.AppsV1Api,
418
+ coreV1Api: k8s.CoreV1Api,
419
+ namespace: string,
420
+ deploymentName: string,
421
+ timeoutMs: number
422
+ ): Promise<void> {
423
+ const startedAt = Date.now();
424
+
425
+ while (Date.now() - startedAt < timeoutMs) {
426
+ const deployment = await appsV1Api.readNamespacedDeployment(
427
+ deploymentName,
428
+ namespace
429
+ );
430
+ const deploymentBody = (deployment as { body?: k8s.V1Deployment }).body;
431
+ const availableReplicas = deploymentBody?.status?.availableReplicas || 0;
432
+
433
+ if (availableReplicas > 0) {
434
+ return;
435
+ }
436
+
437
+ const pods = await listDeploymentPods(coreV1Api, namespace, deploymentName);
438
+ for (const pod of pods) {
439
+ const podName = pod.metadata?.name || "unknown";
440
+ const workerStatus = pod.status?.containerStatuses?.find(
441
+ (status) => status.name === "worker"
442
+ );
443
+ const waiting = workerStatus?.state?.waiting;
444
+
445
+ if (waiting?.reason && IMAGE_PULL_FAILURE_REASONS.has(waiting.reason)) {
446
+ const eventMessage = await getPodFailureMessage(
447
+ coreV1Api,
448
+ namespace,
449
+ podName
450
+ );
451
+ throw new OrchestratorError(
452
+ ErrorCode.DEPLOYMENT_CREATE_FAILED,
453
+ `Worker startup failed (${waiting.reason}) for ${deploymentName}: ${eventMessage || waiting.message || "image pull failed"}`,
454
+ {
455
+ deploymentName,
456
+ podName,
457
+ waitingReason: waiting.reason,
458
+ waitingMessage: waiting.message,
459
+ },
460
+ true
461
+ );
462
+ }
463
+ }
464
+
465
+ await new Promise((resolve) => setTimeout(resolve, 2000));
466
+ }
467
+
468
+ throw new OrchestratorError(
469
+ ErrorCode.DEPLOYMENT_CREATE_FAILED,
470
+ `Timed out waiting for worker deployment ${deploymentName} to become ready`,
471
+ { deploymentName, timeoutMs },
472
+ true
473
+ );
474
+ }
475
+
476
+ /**
477
+ * Remove the lobu.io/cleanup finalizer from a deployment or PVC.
478
+ * No-ops if the finalizer is already absent.
479
+ */
480
+ export async function removeFinalizerFromResource(
481
+ appsV1Api: k8s.AppsV1Api,
482
+ coreV1Api: k8s.CoreV1Api,
483
+ namespace: string,
484
+ kind: "deployment" | "pvc",
485
+ name: string
486
+ ): Promise<void> {
487
+ try {
488
+ // Read current finalizers
489
+ let currentFinalizers: string[] | undefined;
490
+ if (kind === "deployment") {
491
+ const resource = await appsV1Api.readNamespacedDeployment(
492
+ name,
493
+ namespace
494
+ );
495
+ currentFinalizers = (resource as any).body?.metadata?.finalizers;
496
+ } else {
497
+ const resource = await coreV1Api.readNamespacedPersistentVolumeClaim(
498
+ name,
499
+ namespace
500
+ );
501
+ currentFinalizers = (resource as any).body?.metadata?.finalizers;
502
+ }
503
+
504
+ if (!currentFinalizers || !currentFinalizers.includes(LOBU_FINALIZER)) {
505
+ return; // Finalizer not present, nothing to do
506
+ }
507
+
508
+ const updatedFinalizers = currentFinalizers.filter(
509
+ (f) => f !== LOBU_FINALIZER
510
+ );
511
+ const patch = {
512
+ metadata: {
513
+ finalizers: updatedFinalizers.length > 0 ? updatedFinalizers : null,
514
+ },
515
+ };
516
+
517
+ if (kind === "deployment") {
518
+ await appsV1Api.patchNamespacedDeployment(
519
+ name,
520
+ namespace,
521
+ patch,
522
+ undefined,
523
+ undefined,
524
+ undefined,
525
+ undefined,
526
+ undefined,
527
+ {
528
+ headers: {
529
+ "Content-Type": "application/merge-patch+json",
530
+ },
531
+ }
532
+ );
533
+ } else {
534
+ await coreV1Api.patchNamespacedPersistentVolumeClaim(
535
+ name,
536
+ namespace,
537
+ patch,
538
+ undefined,
539
+ undefined,
540
+ undefined,
541
+ undefined,
542
+ undefined,
543
+ {
544
+ headers: {
545
+ "Content-Type": "application/merge-patch+json",
546
+ },
547
+ }
548
+ );
549
+ }
550
+
551
+ logger.debug(`Removed finalizer from ${kind} ${name}`);
552
+ } catch (error) {
553
+ const k8sError = error as { statusCode?: number };
554
+ if (k8sError.statusCode === 404) {
555
+ // Resource already gone, nothing to do
556
+ return;
557
+ }
558
+ logger.warn(
559
+ `Failed to remove finalizer from ${kind} ${name}:`,
560
+ error instanceof Error ? error.message : String(error)
561
+ );
562
+ // Don't throw - finalizer removal failure should not block deletion
563
+ }
564
+ }
565
+
566
+ /**
567
+ * Clean up PVCs stuck in Terminating state with our finalizer.
568
+ */
569
+ export async function cleanupOrphanedPvcFinalizers(
570
+ appsV1Api: k8s.AppsV1Api,
571
+ coreV1Api: k8s.CoreV1Api,
572
+ namespace: string
573
+ ): Promise<void> {
574
+ try {
575
+ const pvcs = await coreV1Api.listNamespacedPersistentVolumeClaim(
576
+ namespace,
577
+ undefined,
578
+ undefined,
579
+ undefined,
580
+ undefined,
581
+ "app.kubernetes.io/component=worker-storage"
582
+ );
583
+
584
+ const pvcResponse = pvcs as {
585
+ body?: { items?: k8s.V1PersistentVolumeClaim[] };
586
+ };
587
+
588
+ for (const pvc of pvcResponse.body?.items || []) {
589
+ const name = pvc.metadata?.name;
590
+ const deletionTimestamp = pvc.metadata?.deletionTimestamp;
591
+ const finalizers = pvc.metadata?.finalizers;
592
+
593
+ if (name && deletionTimestamp && finalizers?.includes(LOBU_FINALIZER)) {
594
+ logger.info(`Removing orphaned finalizer from Terminating PVC ${name}`);
595
+ await removeFinalizerFromResource(
596
+ appsV1Api,
597
+ coreV1Api,
598
+ namespace,
599
+ "pvc",
600
+ name
601
+ );
602
+ }
603
+ }
604
+ } catch (error) {
605
+ logger.warn(
606
+ "Failed to clean up orphaned PVC finalizers:",
607
+ error instanceof Error ? error.message : String(error)
608
+ );
609
+ }
610
+ }
@@ -0,0 +1 @@
1
+ export { K8sDeploymentManager } from "./deployment";