@lobu/gateway 3.0.9 → 3.0.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/api/platform.d.ts.map +1 -1
- package/dist/api/platform.js +7 -26
- package/dist/api/platform.js.map +1 -1
- package/dist/auth/mcp/proxy.d.ts +14 -0
- package/dist/auth/mcp/proxy.d.ts.map +1 -1
- package/dist/auth/mcp/proxy.js +149 -13
- package/dist/auth/mcp/proxy.js.map +1 -1
- package/dist/cli/gateway.d.ts.map +1 -1
- package/dist/cli/gateway.js +29 -0
- package/dist/cli/gateway.js.map +1 -1
- package/dist/connections/chat-instance-manager.d.ts.map +1 -1
- package/dist/connections/chat-instance-manager.js +2 -1
- package/dist/connections/chat-instance-manager.js.map +1 -1
- package/dist/connections/interaction-bridge.d.ts +9 -2
- package/dist/connections/interaction-bridge.d.ts.map +1 -1
- package/dist/connections/interaction-bridge.js +121 -261
- package/dist/connections/interaction-bridge.js.map +1 -1
- package/dist/interactions.d.ts +9 -43
- package/dist/interactions.d.ts.map +1 -1
- package/dist/interactions.js +10 -52
- package/dist/interactions.js.map +1 -1
- package/dist/routes/public/agent.d.ts +4 -0
- package/dist/routes/public/agent.d.ts.map +1 -1
- package/dist/routes/public/agent.js +21 -0
- package/dist/routes/public/agent.js.map +1 -1
- package/dist/services/core-services.d.ts.map +1 -1
- package/dist/services/core-services.js +4 -0
- package/dist/services/core-services.js.map +1 -1
- package/package.json +9 -9
- package/src/__tests__/agent-config-routes.test.ts +0 -254
- package/src/__tests__/agent-history-routes.test.ts +0 -72
- package/src/__tests__/agent-routes.test.ts +0 -68
- package/src/__tests__/agent-schedules-routes.test.ts +0 -59
- package/src/__tests__/agent-settings-store.test.ts +0 -323
- package/src/__tests__/bedrock-model-catalog.test.ts +0 -40
- package/src/__tests__/bedrock-openai-service.test.ts +0 -157
- package/src/__tests__/bedrock-provider-module.test.ts +0 -56
- package/src/__tests__/chat-instance-manager-slack.test.ts +0 -204
- package/src/__tests__/chat-response-bridge.test.ts +0 -131
- package/src/__tests__/config-memory-plugins.test.ts +0 -92
- package/src/__tests__/config-request-store.test.ts +0 -127
- package/src/__tests__/connection-routes.test.ts +0 -144
- package/src/__tests__/core-services-store-selection.test.ts +0 -92
- package/src/__tests__/docker-deployment.test.ts +0 -1211
- package/src/__tests__/embedded-deployment.test.ts +0 -342
- package/src/__tests__/grant-store.test.ts +0 -148
- package/src/__tests__/http-proxy.test.ts +0 -281
- package/src/__tests__/instruction-service.test.ts +0 -37
- package/src/__tests__/link-buttons.test.ts +0 -112
- package/src/__tests__/lobu.test.ts +0 -32
- package/src/__tests__/mcp-config-service.test.ts +0 -347
- package/src/__tests__/mcp-proxy.test.ts +0 -694
- package/src/__tests__/message-handler-bridge.test.ts +0 -17
- package/src/__tests__/model-selection.test.ts +0 -172
- package/src/__tests__/oauth-templates.test.ts +0 -39
- package/src/__tests__/platform-adapter-slack-send.test.ts +0 -114
- package/src/__tests__/platform-helpers-model-resolution.test.ts +0 -253
- package/src/__tests__/provider-inheritance.test.ts +0 -212
- package/src/__tests__/routes/cli-auth.test.ts +0 -337
- package/src/__tests__/routes/interactions.test.ts +0 -121
- package/src/__tests__/secret-proxy.test.ts +0 -85
- package/src/__tests__/session-manager.test.ts +0 -572
- package/src/__tests__/setup.ts +0 -133
- package/src/__tests__/skill-and-mcp-registry.test.ts +0 -203
- package/src/__tests__/slack-routes.test.ts +0 -161
- package/src/__tests__/system-config-resolver.test.ts +0 -75
- package/src/__tests__/system-message-limiter.test.ts +0 -89
- package/src/__tests__/system-skills-service.test.ts +0 -362
- package/src/__tests__/transcription-service.test.ts +0 -222
- package/src/__tests__/utils/rate-limiter.test.ts +0 -102
- package/src/__tests__/worker-connection-manager.test.ts +0 -497
- package/src/__tests__/worker-job-router.test.ts +0 -722
- package/src/api/index.ts +0 -1
- package/src/api/platform.ts +0 -292
- package/src/api/response-renderer.ts +0 -157
- package/src/auth/agent-metadata-store.ts +0 -168
- package/src/auth/api-auth-middleware.ts +0 -69
- package/src/auth/api-key-provider-module.ts +0 -213
- package/src/auth/base-provider-module.ts +0 -201
- package/src/auth/bedrock/provider-module.ts +0 -110
- package/src/auth/chatgpt/chatgpt-oauth-module.ts +0 -185
- package/src/auth/chatgpt/device-code-client.ts +0 -218
- package/src/auth/chatgpt/index.ts +0 -1
- package/src/auth/claude/oauth-module.ts +0 -280
- package/src/auth/cli/token-service.ts +0 -249
- package/src/auth/external/client.ts +0 -560
- package/src/auth/external/device-code-client.ts +0 -235
- package/src/auth/mcp/config-service.ts +0 -420
- package/src/auth/mcp/proxy.ts +0 -1086
- package/src/auth/mcp/string-substitution.ts +0 -17
- package/src/auth/mcp/tool-cache.ts +0 -90
- package/src/auth/oauth/base-client.ts +0 -267
- package/src/auth/oauth/client.ts +0 -153
- package/src/auth/oauth/credentials.ts +0 -7
- package/src/auth/oauth/providers.ts +0 -69
- package/src/auth/oauth/state-store.ts +0 -150
- package/src/auth/oauth-templates.ts +0 -179
- package/src/auth/provider-catalog.ts +0 -220
- package/src/auth/provider-model-options.ts +0 -41
- package/src/auth/settings/agent-settings-store.ts +0 -565
- package/src/auth/settings/auth-profiles-manager.ts +0 -216
- package/src/auth/settings/index.ts +0 -12
- package/src/auth/settings/model-preference-store.ts +0 -52
- package/src/auth/settings/model-selection.ts +0 -135
- package/src/auth/settings/resolved-settings-view.ts +0 -298
- package/src/auth/settings/template-utils.ts +0 -44
- package/src/auth/settings/token-service.ts +0 -88
- package/src/auth/system-env-store.ts +0 -98
- package/src/auth/user-agents-store.ts +0 -68
- package/src/channels/binding-service.ts +0 -214
- package/src/channels/index.ts +0 -4
- package/src/cli/gateway.ts +0 -1312
- package/src/cli/index.ts +0 -74
- package/src/commands/built-in-commands.ts +0 -80
- package/src/commands/command-dispatcher.ts +0 -94
- package/src/commands/command-reply-adapters.ts +0 -27
- package/src/config/file-loader.ts +0 -618
- package/src/config/index.ts +0 -588
- package/src/config/network-allowlist.ts +0 -71
- package/src/connections/chat-instance-manager.ts +0 -1284
- package/src/connections/chat-response-bridge.ts +0 -618
- package/src/connections/index.ts +0 -7
- package/src/connections/interaction-bridge.ts +0 -831
- package/src/connections/message-handler-bridge.ts +0 -440
- package/src/connections/platform-auth-methods.ts +0 -15
- package/src/connections/types.ts +0 -84
- package/src/gateway/connection-manager.ts +0 -291
- package/src/gateway/index.ts +0 -698
- package/src/gateway/job-router.ts +0 -201
- package/src/gateway-main.ts +0 -200
- package/src/index.ts +0 -41
- package/src/infrastructure/queue/index.ts +0 -12
- package/src/infrastructure/queue/queue-producer.ts +0 -148
- package/src/infrastructure/queue/redis-queue.ts +0 -361
- package/src/infrastructure/queue/types.ts +0 -133
- package/src/infrastructure/redis/system-message-limiter.ts +0 -94
- package/src/interactions/config-request-store.ts +0 -198
- package/src/interactions.ts +0 -363
- package/src/lobu.ts +0 -311
- package/src/metrics/prometheus.ts +0 -159
- package/src/modules/module-system.ts +0 -179
- package/src/orchestration/base-deployment-manager.ts +0 -900
- package/src/orchestration/deployment-utils.ts +0 -98
- package/src/orchestration/impl/docker-deployment.ts +0 -620
- package/src/orchestration/impl/embedded-deployment.ts +0 -268
- package/src/orchestration/impl/index.ts +0 -8
- package/src/orchestration/impl/k8s/deployment.ts +0 -1061
- package/src/orchestration/impl/k8s/helpers.ts +0 -610
- package/src/orchestration/impl/k8s/index.ts +0 -1
- package/src/orchestration/index.ts +0 -333
- package/src/orchestration/message-consumer.ts +0 -584
- package/src/orchestration/scheduled-wakeup.ts +0 -704
- package/src/permissions/approval-policy.ts +0 -36
- package/src/permissions/grant-store.ts +0 -219
- package/src/platform/file-handler.ts +0 -66
- package/src/platform/link-buttons.ts +0 -57
- package/src/platform/renderer-utils.ts +0 -44
- package/src/platform/response-renderer.ts +0 -84
- package/src/platform/unified-thread-consumer.ts +0 -194
- package/src/platform.ts +0 -318
- package/src/proxy/http-proxy.ts +0 -752
- package/src/proxy/proxy-manager.ts +0 -81
- package/src/proxy/secret-proxy.ts +0 -402
- package/src/proxy/token-refresh-job.ts +0 -143
- package/src/routes/internal/audio.ts +0 -141
- package/src/routes/internal/device-auth.ts +0 -652
- package/src/routes/internal/files.ts +0 -226
- package/src/routes/internal/history.ts +0 -69
- package/src/routes/internal/images.ts +0 -127
- package/src/routes/internal/interactions.ts +0 -84
- package/src/routes/internal/middleware.ts +0 -23
- package/src/routes/internal/schedule.ts +0 -226
- package/src/routes/internal/types.ts +0 -22
- package/src/routes/openapi-auto.ts +0 -239
- package/src/routes/public/agent-access.ts +0 -23
- package/src/routes/public/agent-config.ts +0 -675
- package/src/routes/public/agent-history.ts +0 -422
- package/src/routes/public/agent-schedules.ts +0 -296
- package/src/routes/public/agent.ts +0 -1086
- package/src/routes/public/agents.ts +0 -373
- package/src/routes/public/channels.ts +0 -191
- package/src/routes/public/cli-auth.ts +0 -896
- package/src/routes/public/connections.ts +0 -574
- package/src/routes/public/landing.ts +0 -16
- package/src/routes/public/oauth.ts +0 -147
- package/src/routes/public/settings-auth.ts +0 -104
- package/src/routes/public/slack.ts +0 -173
- package/src/routes/shared/agent-ownership.ts +0 -101
- package/src/routes/shared/token-verifier.ts +0 -34
- package/src/services/bedrock-model-catalog.ts +0 -217
- package/src/services/bedrock-openai-service.ts +0 -658
- package/src/services/core-services.ts +0 -1072
- package/src/services/image-generation-service.ts +0 -257
- package/src/services/instruction-service.ts +0 -318
- package/src/services/mcp-registry.ts +0 -94
- package/src/services/platform-helpers.ts +0 -287
- package/src/services/session-manager.ts +0 -262
- package/src/services/settings-resolver.ts +0 -74
- package/src/services/system-config-resolver.ts +0 -89
- package/src/services/system-skills-service.ts +0 -229
- package/src/services/transcription-service.ts +0 -684
- package/src/session.ts +0 -110
- package/src/spaces/index.ts +0 -1
- package/src/spaces/space-resolver.ts +0 -17
- package/src/stores/in-memory-agent-store.ts +0 -403
- package/src/stores/redis-agent-store.ts +0 -279
- package/src/utils/public-url.ts +0 -44
- package/src/utils/rate-limiter.ts +0 -94
- package/tsconfig.json +0 -33
- package/tsconfig.tsbuildinfo +0 -1
|
@@ -1,1061 +0,0 @@
|
|
|
1
|
-
import * as k8s from "@kubernetes/client-node";
|
|
2
|
-
import {
|
|
3
|
-
createChildSpan,
|
|
4
|
-
createLogger,
|
|
5
|
-
ErrorCode,
|
|
6
|
-
OrchestratorError,
|
|
7
|
-
SpanStatusCode,
|
|
8
|
-
} from "@lobu/core";
|
|
9
|
-
import type { ModelProviderModule } from "../../../modules/module-system";
|
|
10
|
-
import {
|
|
11
|
-
BaseDeploymentManager,
|
|
12
|
-
type DeploymentInfo,
|
|
13
|
-
type MessagePayload,
|
|
14
|
-
type ModuleEnvVarsBuilder,
|
|
15
|
-
type OrchestratorConfig,
|
|
16
|
-
} from "../../base-deployment-manager";
|
|
17
|
-
import {
|
|
18
|
-
BASE_WORKER_LABELS,
|
|
19
|
-
buildDeploymentInfoSummary,
|
|
20
|
-
getVeryOldThresholdDays,
|
|
21
|
-
resolvePlatformDeploymentMetadata,
|
|
22
|
-
} from "../../deployment-utils";
|
|
23
|
-
import {
|
|
24
|
-
cleanupOrphanedPvcFinalizers,
|
|
25
|
-
createPVC,
|
|
26
|
-
reconcileWorkerDeploymentImages,
|
|
27
|
-
removeFinalizerFromResource,
|
|
28
|
-
runImagePullPreflight,
|
|
29
|
-
waitForWorkerReady,
|
|
30
|
-
} from "./helpers";
|
|
31
|
-
|
|
32
|
-
export const LOBU_FINALIZER = "lobu.io/cleanup";
|
|
33
|
-
|
|
34
|
-
export const WORKER_SECURITY = {
|
|
35
|
-
USER_ID: 1001,
|
|
36
|
-
GROUP_ID: 1001,
|
|
37
|
-
TMP_SIZE_LIMIT: "100Mi",
|
|
38
|
-
} as const;
|
|
39
|
-
|
|
40
|
-
const WORKER_SELECTOR_LABELS = {
|
|
41
|
-
"app.kubernetes.io/name": BASE_WORKER_LABELS["app.kubernetes.io/name"],
|
|
42
|
-
"app.kubernetes.io/component":
|
|
43
|
-
BASE_WORKER_LABELS["app.kubernetes.io/component"],
|
|
44
|
-
} as const;
|
|
45
|
-
|
|
46
|
-
export interface K8sProbe {
|
|
47
|
-
httpGet?: {
|
|
48
|
-
path: string;
|
|
49
|
-
port: number | string;
|
|
50
|
-
scheme?: string;
|
|
51
|
-
};
|
|
52
|
-
exec?: {
|
|
53
|
-
command: string[];
|
|
54
|
-
};
|
|
55
|
-
tcpSocket?: {
|
|
56
|
-
port: number | string;
|
|
57
|
-
};
|
|
58
|
-
initialDelaySeconds?: number;
|
|
59
|
-
periodSeconds?: number;
|
|
60
|
-
timeoutSeconds?: number;
|
|
61
|
-
successThreshold?: number;
|
|
62
|
-
failureThreshold?: number;
|
|
63
|
-
}
|
|
64
|
-
|
|
65
|
-
export interface SimpleDeployment {
|
|
66
|
-
apiVersion: "apps/v1";
|
|
67
|
-
kind: "Deployment";
|
|
68
|
-
metadata: {
|
|
69
|
-
name: string;
|
|
70
|
-
namespace: string;
|
|
71
|
-
labels?: Record<string, string>;
|
|
72
|
-
annotations?: Record<string, string>;
|
|
73
|
-
finalizers?: string[];
|
|
74
|
-
};
|
|
75
|
-
spec: {
|
|
76
|
-
replicas: number;
|
|
77
|
-
selector: {
|
|
78
|
-
matchLabels: Record<string, string>;
|
|
79
|
-
};
|
|
80
|
-
template: {
|
|
81
|
-
metadata: {
|
|
82
|
-
labels: Record<string, string>;
|
|
83
|
-
annotations?: Record<string, string>;
|
|
84
|
-
};
|
|
85
|
-
spec: {
|
|
86
|
-
serviceAccountName?: string;
|
|
87
|
-
imagePullSecrets?: Array<{ name: string }>;
|
|
88
|
-
runtimeClassName?: string;
|
|
89
|
-
securityContext?: {
|
|
90
|
-
fsGroup?: number;
|
|
91
|
-
fsGroupChangePolicy?: "Always" | "OnRootMismatch";
|
|
92
|
-
runAsUser?: number;
|
|
93
|
-
runAsGroup?: number;
|
|
94
|
-
runAsNonRoot?: boolean;
|
|
95
|
-
};
|
|
96
|
-
initContainers?: Array<{
|
|
97
|
-
name: string;
|
|
98
|
-
image: string;
|
|
99
|
-
imagePullPolicy?: string;
|
|
100
|
-
command?: string[];
|
|
101
|
-
args?: string[];
|
|
102
|
-
securityContext?: {
|
|
103
|
-
runAsUser?: number;
|
|
104
|
-
runAsGroup?: number;
|
|
105
|
-
runAsNonRoot?: boolean;
|
|
106
|
-
readOnlyRootFilesystem?: boolean;
|
|
107
|
-
allowPrivilegeEscalation?: boolean;
|
|
108
|
-
capabilities?: {
|
|
109
|
-
drop?: string[];
|
|
110
|
-
add?: string[];
|
|
111
|
-
};
|
|
112
|
-
};
|
|
113
|
-
resources?: {
|
|
114
|
-
requests?: Record<string, string>;
|
|
115
|
-
limits?: Record<string, string>;
|
|
116
|
-
};
|
|
117
|
-
volumeMounts?: Array<{
|
|
118
|
-
name: string;
|
|
119
|
-
mountPath: string;
|
|
120
|
-
subPath?: string;
|
|
121
|
-
}>;
|
|
122
|
-
}>;
|
|
123
|
-
containers: Array<{
|
|
124
|
-
name: string;
|
|
125
|
-
image: string;
|
|
126
|
-
imagePullPolicy?: string;
|
|
127
|
-
command?: string[];
|
|
128
|
-
args?: string[];
|
|
129
|
-
securityContext?: {
|
|
130
|
-
runAsUser?: number;
|
|
131
|
-
runAsGroup?: number;
|
|
132
|
-
runAsNonRoot?: boolean;
|
|
133
|
-
readOnlyRootFilesystem?: boolean;
|
|
134
|
-
allowPrivilegeEscalation?: boolean;
|
|
135
|
-
capabilities?: {
|
|
136
|
-
drop?: string[];
|
|
137
|
-
add?: string[];
|
|
138
|
-
};
|
|
139
|
-
};
|
|
140
|
-
env?: Array<{
|
|
141
|
-
name: string;
|
|
142
|
-
value?: string;
|
|
143
|
-
valueFrom?: {
|
|
144
|
-
secretKeyRef?: {
|
|
145
|
-
name: string;
|
|
146
|
-
key: string;
|
|
147
|
-
};
|
|
148
|
-
};
|
|
149
|
-
}>;
|
|
150
|
-
ports?: Array<{
|
|
151
|
-
name: string;
|
|
152
|
-
containerPort: number;
|
|
153
|
-
protocol?: string;
|
|
154
|
-
}>;
|
|
155
|
-
livenessProbe?: K8sProbe;
|
|
156
|
-
readinessProbe?: K8sProbe;
|
|
157
|
-
resources?: {
|
|
158
|
-
requests?: Record<string, string>;
|
|
159
|
-
limits?: Record<string, string>;
|
|
160
|
-
};
|
|
161
|
-
volumeMounts?: Array<{
|
|
162
|
-
name: string;
|
|
163
|
-
mountPath: string;
|
|
164
|
-
subPath?: string;
|
|
165
|
-
}>;
|
|
166
|
-
}>;
|
|
167
|
-
volumes?: Array<{
|
|
168
|
-
name: string;
|
|
169
|
-
persistentVolumeClaim?: {
|
|
170
|
-
claimName: string;
|
|
171
|
-
};
|
|
172
|
-
emptyDir?: {
|
|
173
|
-
sizeLimit?: string;
|
|
174
|
-
medium?: string;
|
|
175
|
-
};
|
|
176
|
-
hostPath?: {
|
|
177
|
-
path: string;
|
|
178
|
-
type?: string;
|
|
179
|
-
};
|
|
180
|
-
}>;
|
|
181
|
-
};
|
|
182
|
-
};
|
|
183
|
-
};
|
|
184
|
-
}
|
|
185
|
-
|
|
186
|
-
export const IMAGE_PULL_FAILURE_REASONS = new Set([
|
|
187
|
-
"ImagePullBackOff",
|
|
188
|
-
"ErrImagePull",
|
|
189
|
-
"InvalidImageName",
|
|
190
|
-
"RegistryUnavailable",
|
|
191
|
-
]);
|
|
192
|
-
|
|
193
|
-
const logger = createLogger("k8s-deployment");
|
|
194
|
-
|
|
195
|
-
export class K8sDeploymentManager extends BaseDeploymentManager {
|
|
196
|
-
private kc: k8s.KubeConfig;
|
|
197
|
-
private appsV1Api: k8s.AppsV1Api;
|
|
198
|
-
private coreV1Api: k8s.CoreV1Api;
|
|
199
|
-
private nodeV1Api: k8s.NodeV1Api;
|
|
200
|
-
private informer: k8s.Informer<k8s.V1Deployment> | null = null;
|
|
201
|
-
private informerInitializing = false;
|
|
202
|
-
|
|
203
|
-
constructor(
|
|
204
|
-
config: OrchestratorConfig,
|
|
205
|
-
moduleEnvVarsBuilder?: ModuleEnvVarsBuilder,
|
|
206
|
-
providerModules: ModelProviderModule[] = []
|
|
207
|
-
) {
|
|
208
|
-
super(config, moduleEnvVarsBuilder, providerModules);
|
|
209
|
-
|
|
210
|
-
const kc = new k8s.KubeConfig();
|
|
211
|
-
try {
|
|
212
|
-
// Try in-cluster config first, then fall back to default
|
|
213
|
-
if (process.env.KUBERNETES_SERVICE_HOST) {
|
|
214
|
-
try {
|
|
215
|
-
kc.loadFromCluster();
|
|
216
|
-
} catch (_clusterError) {
|
|
217
|
-
kc.loadFromDefault();
|
|
218
|
-
}
|
|
219
|
-
} else {
|
|
220
|
-
kc.loadFromDefault();
|
|
221
|
-
}
|
|
222
|
-
|
|
223
|
-
// For development environments, disable TLS verification to avoid certificate issues
|
|
224
|
-
if (
|
|
225
|
-
process.env.NODE_ENV === "development" ||
|
|
226
|
-
process.env.KUBERNETES_SERVICE_HOST?.includes("127.0.0.1") ||
|
|
227
|
-
process.env.KUBERNETES_SERVICE_HOST?.includes("192.168") ||
|
|
228
|
-
process.env.KUBERNETES_SERVICE_HOST?.includes("localhost")
|
|
229
|
-
) {
|
|
230
|
-
const cluster = kc.getCurrentCluster();
|
|
231
|
-
if (
|
|
232
|
-
cluster &&
|
|
233
|
-
typeof cluster === "object" &&
|
|
234
|
-
cluster.skipTLSVerify !== true
|
|
235
|
-
) {
|
|
236
|
-
// Safely set skipTLSVerify property with type checking
|
|
237
|
-
Object.defineProperty(cluster, "skipTLSVerify", {
|
|
238
|
-
value: true,
|
|
239
|
-
writable: true,
|
|
240
|
-
enumerable: true,
|
|
241
|
-
configurable: true,
|
|
242
|
-
});
|
|
243
|
-
}
|
|
244
|
-
}
|
|
245
|
-
} catch (error) {
|
|
246
|
-
logger.error("❌ Failed to load Kubernetes config:", error);
|
|
247
|
-
throw new OrchestratorError(
|
|
248
|
-
ErrorCode.DEPLOYMENT_CREATE_FAILED,
|
|
249
|
-
`Failed to initialize Kubernetes client: ${error instanceof Error ? error.message : String(error)}`,
|
|
250
|
-
{ error },
|
|
251
|
-
true
|
|
252
|
-
);
|
|
253
|
-
}
|
|
254
|
-
|
|
255
|
-
// Store KubeConfig for informer creation
|
|
256
|
-
this.kc = kc;
|
|
257
|
-
|
|
258
|
-
// Configure K8s API clients
|
|
259
|
-
this.appsV1Api = kc.makeApiClient(k8s.AppsV1Api);
|
|
260
|
-
this.coreV1Api = kc.makeApiClient(k8s.CoreV1Api);
|
|
261
|
-
this.nodeV1Api = kc.makeApiClient(k8s.NodeV1Api);
|
|
262
|
-
|
|
263
|
-
// API clients are already configured with authentication through makeApiClient
|
|
264
|
-
|
|
265
|
-
logger.info(
|
|
266
|
-
`🔧 K8s client initialized for namespace: ${this.config.kubernetes.namespace}`
|
|
267
|
-
);
|
|
268
|
-
|
|
269
|
-
// Validate namespace exists and we have access
|
|
270
|
-
this.validateNamespace();
|
|
271
|
-
|
|
272
|
-
// Check runtime class availability on initialization (like Docker's gVisor check)
|
|
273
|
-
this.checkRuntimeClassAvailability();
|
|
274
|
-
}
|
|
275
|
-
|
|
276
|
-
/**
|
|
277
|
-
* Validate that the target namespace exists and we have access to it
|
|
278
|
-
*/
|
|
279
|
-
private async validateNamespace(): Promise<void> {
|
|
280
|
-
const namespace = this.config.kubernetes.namespace;
|
|
281
|
-
|
|
282
|
-
try {
|
|
283
|
-
await this.coreV1Api.readNamespace(namespace);
|
|
284
|
-
logger.info(`✅ Namespace '${namespace}' validated`);
|
|
285
|
-
} catch (error) {
|
|
286
|
-
const k8sError = error as { statusCode?: number };
|
|
287
|
-
|
|
288
|
-
if (k8sError.statusCode === 404) {
|
|
289
|
-
logger.error(
|
|
290
|
-
`❌ Namespace '${namespace}' does not exist. ` +
|
|
291
|
-
`Create it with: kubectl create namespace ${namespace}`
|
|
292
|
-
);
|
|
293
|
-
throw new OrchestratorError(
|
|
294
|
-
ErrorCode.DEPLOYMENT_CREATE_FAILED,
|
|
295
|
-
`Namespace '${namespace}' does not exist`,
|
|
296
|
-
{ namespace },
|
|
297
|
-
true
|
|
298
|
-
);
|
|
299
|
-
} else if (k8sError.statusCode === 403) {
|
|
300
|
-
// 403 Forbidden for namespace read is expected with namespace-scoped Roles
|
|
301
|
-
// The gateway can still create resources in the namespace without cluster-level namespace read permission
|
|
302
|
-
logger.info(
|
|
303
|
-
`ℹ️ Namespace '${namespace}' access check skipped (namespace-scoped RBAC). ` +
|
|
304
|
-
`Will validate via resource operations.`
|
|
305
|
-
);
|
|
306
|
-
// Don't throw - we're running in this namespace so it exists
|
|
307
|
-
} else {
|
|
308
|
-
logger.warn(
|
|
309
|
-
`⚠️ Could not validate namespace '${namespace}': ${error instanceof Error ? error.message : String(error)}`
|
|
310
|
-
);
|
|
311
|
-
// Don't throw - let operations fail with more specific errors
|
|
312
|
-
}
|
|
313
|
-
}
|
|
314
|
-
}
|
|
315
|
-
|
|
316
|
-
/**
|
|
317
|
-
* Check if the configured RuntimeClass exists in the cluster
|
|
318
|
-
* Similar to Docker's checkGvisorAvailability()
|
|
319
|
-
*/
|
|
320
|
-
private async checkRuntimeClassAvailability(): Promise<void> {
|
|
321
|
-
const runtimeClassName = this.config.worker.runtimeClassName || "kata";
|
|
322
|
-
|
|
323
|
-
try {
|
|
324
|
-
await this.nodeV1Api.readRuntimeClass(runtimeClassName);
|
|
325
|
-
logger.info(
|
|
326
|
-
`✅ RuntimeClass '${runtimeClassName}' verified and will be used for worker isolation`
|
|
327
|
-
);
|
|
328
|
-
} catch (error) {
|
|
329
|
-
const k8sError = error as { statusCode?: number };
|
|
330
|
-
if (k8sError.statusCode === 404) {
|
|
331
|
-
logger.warn(
|
|
332
|
-
`⚠️ RuntimeClass '${runtimeClassName}' not found in cluster. ` +
|
|
333
|
-
`Workers will use default runtime. Consider installing ${runtimeClassName} for enhanced isolation.`
|
|
334
|
-
);
|
|
335
|
-
} else {
|
|
336
|
-
logger.warn(
|
|
337
|
-
`⚠️ Failed to verify RuntimeClass '${runtimeClassName}': ${error instanceof Error ? error.message : String(error)}`
|
|
338
|
-
);
|
|
339
|
-
}
|
|
340
|
-
// Clear runtime class if not available or verification failed (workers will use default)
|
|
341
|
-
this.config.worker.runtimeClassName = undefined;
|
|
342
|
-
}
|
|
343
|
-
}
|
|
344
|
-
|
|
345
|
-
private getWorkerServiceAccountName(): string {
|
|
346
|
-
return this.config.worker.serviceAccountName || "lobu-worker";
|
|
347
|
-
}
|
|
348
|
-
|
|
349
|
-
private getWorkerImagePullSecrets(): Array<{ name: string }> | undefined {
|
|
350
|
-
const configured = this.config.worker.imagePullSecrets || [];
|
|
351
|
-
const names = configured.map((name) => name.trim()).filter(Boolean);
|
|
352
|
-
if (names.length === 0) return undefined;
|
|
353
|
-
return names.map((name) => ({ name }));
|
|
354
|
-
}
|
|
355
|
-
|
|
356
|
-
private getWorkerStartupTimeoutMs(): number {
|
|
357
|
-
const timeoutSeconds = this.config.worker.startupTimeoutSeconds ?? 90;
|
|
358
|
-
return Math.max(timeoutSeconds, 5) * 1000;
|
|
359
|
-
}
|
|
360
|
-
|
|
361
|
-
private async listRawWorkerDeployments(): Promise<k8s.V1Deployment[]> {
|
|
362
|
-
const k8sDeployments = await this.appsV1Api.listNamespacedDeployment(
|
|
363
|
-
this.config.kubernetes.namespace,
|
|
364
|
-
undefined, // pretty
|
|
365
|
-
undefined, // allowWatchBookmarks
|
|
366
|
-
undefined, // _continue
|
|
367
|
-
undefined, // fieldSelector
|
|
368
|
-
"app.kubernetes.io/component=worker" // labelSelector - only worker deployments
|
|
369
|
-
);
|
|
370
|
-
|
|
371
|
-
const response = k8sDeployments as {
|
|
372
|
-
body?: { items?: k8s.V1Deployment[] };
|
|
373
|
-
};
|
|
374
|
-
|
|
375
|
-
return response.body?.items || [];
|
|
376
|
-
}
|
|
377
|
-
|
|
378
|
-
/**
|
|
379
|
-
* Validate that the worker image exists and is pullable
|
|
380
|
-
* Called on gateway startup to ensure workers can be created
|
|
381
|
-
*/
|
|
382
|
-
async validateWorkerImage(): Promise<void> {
|
|
383
|
-
const imageName = this.getWorkerImageReference();
|
|
384
|
-
logger.info(
|
|
385
|
-
`ℹ️ Worker image configured: ${imageName} (pullPolicy: ${this.config.worker.image.pullPolicy || "Always"})`
|
|
386
|
-
);
|
|
387
|
-
|
|
388
|
-
if (this.config.worker.image.pullPolicy === "Never") {
|
|
389
|
-
logger.warn(
|
|
390
|
-
`⚠️ Worker image pullPolicy is 'Never'. Ensure image ${imageName} is pre-loaded on all nodes.`
|
|
391
|
-
);
|
|
392
|
-
return;
|
|
393
|
-
}
|
|
394
|
-
|
|
395
|
-
await runImagePullPreflight(
|
|
396
|
-
this.coreV1Api,
|
|
397
|
-
this.config.kubernetes.namespace,
|
|
398
|
-
imageName,
|
|
399
|
-
this.config.worker.image.pullPolicy || "Always",
|
|
400
|
-
this.getWorkerServiceAccountName(),
|
|
401
|
-
this.getWorkerImagePullSecrets()
|
|
402
|
-
);
|
|
403
|
-
}
|
|
404
|
-
|
|
405
|
-
async reconcileWorkerDeploymentImages(): Promise<void> {
|
|
406
|
-
await reconcileWorkerDeploymentImages(
|
|
407
|
-
this.appsV1Api,
|
|
408
|
-
this.config.kubernetes.namespace,
|
|
409
|
-
this.getWorkerImageReference(),
|
|
410
|
-
this.config.worker.image.pullPolicy || "Always",
|
|
411
|
-
this.getWorkerServiceAccountName(),
|
|
412
|
-
this.getWorkerImagePullSecrets(),
|
|
413
|
-
() => this.listRawWorkerDeployments()
|
|
414
|
-
);
|
|
415
|
-
}
|
|
416
|
-
|
|
417
|
-
async listDeployments(): Promise<DeploymentInfo[]> {
|
|
418
|
-
try {
|
|
419
|
-
const now = Date.now();
|
|
420
|
-
const idleThresholdMinutes = this.config.worker.idleCleanupMinutes;
|
|
421
|
-
const veryOldDays = getVeryOldThresholdDays(this.config);
|
|
422
|
-
const results: DeploymentInfo[] = [];
|
|
423
|
-
|
|
424
|
-
for (const deployment of await this.listRawWorkerDeployments()) {
|
|
425
|
-
const deploymentName = deployment.metadata?.name || "";
|
|
426
|
-
|
|
427
|
-
// Clean up orphaned finalizers on Terminating deployments (avoids extra API call)
|
|
428
|
-
if (
|
|
429
|
-
deployment.metadata?.deletionTimestamp &&
|
|
430
|
-
deployment.metadata?.finalizers?.includes(LOBU_FINALIZER)
|
|
431
|
-
) {
|
|
432
|
-
logger.info(
|
|
433
|
-
`Removing orphaned finalizer from Terminating deployment ${deploymentName}`
|
|
434
|
-
);
|
|
435
|
-
removeFinalizerFromResource(
|
|
436
|
-
this.appsV1Api,
|
|
437
|
-
this.coreV1Api,
|
|
438
|
-
this.config.kubernetes.namespace,
|
|
439
|
-
"deployment",
|
|
440
|
-
deploymentName
|
|
441
|
-
).catch((err) =>
|
|
442
|
-
logger.warn(
|
|
443
|
-
`Failed to remove orphaned finalizer from ${deploymentName}:`,
|
|
444
|
-
err instanceof Error ? err.message : String(err)
|
|
445
|
-
)
|
|
446
|
-
);
|
|
447
|
-
continue; // Skip Terminating deployments from the active list
|
|
448
|
-
}
|
|
449
|
-
|
|
450
|
-
// Get last activity from annotations or fallback to creation time
|
|
451
|
-
const lastActivityStr =
|
|
452
|
-
deployment.metadata?.annotations?.["lobu.io/last-activity"] ||
|
|
453
|
-
deployment.metadata?.annotations?.["lobu.io/created"] ||
|
|
454
|
-
deployment.metadata?.creationTimestamp;
|
|
455
|
-
|
|
456
|
-
const lastActivity = lastActivityStr
|
|
457
|
-
? new Date(lastActivityStr)
|
|
458
|
-
: new Date();
|
|
459
|
-
const replicas = deployment.spec?.replicas || 0;
|
|
460
|
-
results.push(
|
|
461
|
-
buildDeploymentInfoSummary({
|
|
462
|
-
deploymentName,
|
|
463
|
-
lastActivity,
|
|
464
|
-
now,
|
|
465
|
-
idleThresholdMinutes,
|
|
466
|
-
veryOldDays,
|
|
467
|
-
replicas,
|
|
468
|
-
})
|
|
469
|
-
);
|
|
470
|
-
}
|
|
471
|
-
|
|
472
|
-
return results;
|
|
473
|
-
} catch (error) {
|
|
474
|
-
throw new OrchestratorError(
|
|
475
|
-
ErrorCode.DEPLOYMENT_CREATE_FAILED,
|
|
476
|
-
`Failed to list deployments: ${error instanceof Error ? error.message : String(error)}`,
|
|
477
|
-
{ error },
|
|
478
|
-
true
|
|
479
|
-
);
|
|
480
|
-
}
|
|
481
|
-
}
|
|
482
|
-
|
|
483
|
-
async createDeployment(
|
|
484
|
-
deploymentName: string,
|
|
485
|
-
username: string,
|
|
486
|
-
userId: string,
|
|
487
|
-
messageData?: MessagePayload
|
|
488
|
-
): Promise<void> {
|
|
489
|
-
// Extract traceparent for distributed tracing
|
|
490
|
-
const traceparent = messageData?.platformMetadata?.traceparent as
|
|
491
|
-
| string
|
|
492
|
-
| undefined;
|
|
493
|
-
|
|
494
|
-
logger.info(
|
|
495
|
-
{ traceparent, deploymentName, userId },
|
|
496
|
-
"Creating K8s deployment"
|
|
497
|
-
);
|
|
498
|
-
|
|
499
|
-
// Use agentId for PVC naming (shared across threads in same space)
|
|
500
|
-
const agentId = messageData?.agentId;
|
|
501
|
-
if (!agentId) {
|
|
502
|
-
throw new OrchestratorError(
|
|
503
|
-
ErrorCode.DEPLOYMENT_CREATE_FAILED,
|
|
504
|
-
"Missing agentId in message payload"
|
|
505
|
-
);
|
|
506
|
-
}
|
|
507
|
-
const pvcName = `lobu-workspace-${agentId}`;
|
|
508
|
-
|
|
509
|
-
// Check if Nix packages are configured (need init container + subPath mounts)
|
|
510
|
-
const hasNixConfig =
|
|
511
|
-
(messageData?.nixConfig?.packages?.length ?? 0) > 0 ||
|
|
512
|
-
!!messageData?.nixConfig?.flakeUrl;
|
|
513
|
-
|
|
514
|
-
// Use larger PVC when Nix packages are configured (Chromium etc. need space)
|
|
515
|
-
const pvcSize = hasNixConfig ? "5Gi" : undefined;
|
|
516
|
-
await createPVC(
|
|
517
|
-
this.coreV1Api,
|
|
518
|
-
this.config.kubernetes.namespace,
|
|
519
|
-
pvcName,
|
|
520
|
-
agentId,
|
|
521
|
-
this.config.worker.persistence?.storageClass,
|
|
522
|
-
traceparent,
|
|
523
|
-
pvcSize,
|
|
524
|
-
this.config.worker.persistence?.size
|
|
525
|
-
);
|
|
526
|
-
|
|
527
|
-
// Get environment variables before creating the deployment spec
|
|
528
|
-
// Include secrets (same as Docker behavior) - secrets are passed via env vars
|
|
529
|
-
const envVars = await this.generateEnvironmentVariables(
|
|
530
|
-
username,
|
|
531
|
-
userId,
|
|
532
|
-
deploymentName,
|
|
533
|
-
messageData,
|
|
534
|
-
true // Include secrets to match Docker behavior
|
|
535
|
-
);
|
|
536
|
-
|
|
537
|
-
const platform = messageData?.platform || "unknown";
|
|
538
|
-
const workerImage = this.getWorkerImageReference();
|
|
539
|
-
|
|
540
|
-
const deployment: SimpleDeployment = {
|
|
541
|
-
apiVersion: "apps/v1",
|
|
542
|
-
kind: "Deployment",
|
|
543
|
-
metadata: {
|
|
544
|
-
name: deploymentName,
|
|
545
|
-
namespace: this.config.kubernetes.namespace,
|
|
546
|
-
labels: {
|
|
547
|
-
...BASE_WORKER_LABELS,
|
|
548
|
-
"lobu.io/platform": platform,
|
|
549
|
-
"lobu.io/agent-id": agentId,
|
|
550
|
-
},
|
|
551
|
-
annotations: {
|
|
552
|
-
"lobu.io/status": "running",
|
|
553
|
-
"lobu.io/created": new Date().toISOString(),
|
|
554
|
-
},
|
|
555
|
-
finalizers: [LOBU_FINALIZER],
|
|
556
|
-
},
|
|
557
|
-
spec: {
|
|
558
|
-
replicas: 1,
|
|
559
|
-
selector: {
|
|
560
|
-
matchLabels: { ...WORKER_SELECTOR_LABELS },
|
|
561
|
-
},
|
|
562
|
-
template: {
|
|
563
|
-
metadata: {
|
|
564
|
-
annotations: {
|
|
565
|
-
// Add platform-specific metadata
|
|
566
|
-
...resolvePlatformDeploymentMetadata(messageData),
|
|
567
|
-
"lobu.io/created": new Date().toISOString(),
|
|
568
|
-
"lobu.io/agent-id": agentId,
|
|
569
|
-
...(traceparent ? { "lobu.io/traceparent": traceparent } : {}),
|
|
570
|
-
},
|
|
571
|
-
labels: {
|
|
572
|
-
...BASE_WORKER_LABELS,
|
|
573
|
-
"lobu.io/platform": platform,
|
|
574
|
-
},
|
|
575
|
-
},
|
|
576
|
-
spec: {
|
|
577
|
-
serviceAccountName: this.getWorkerServiceAccountName(),
|
|
578
|
-
imagePullSecrets: this.getWorkerImagePullSecrets(),
|
|
579
|
-
// Only set runtimeClassName if configured and available (validated on startup)
|
|
580
|
-
...(this.config.worker.runtimeClassName
|
|
581
|
-
? { runtimeClassName: this.config.worker.runtimeClassName }
|
|
582
|
-
: {}),
|
|
583
|
-
securityContext: {
|
|
584
|
-
fsGroup: WORKER_SECURITY.GROUP_ID,
|
|
585
|
-
fsGroupChangePolicy: "OnRootMismatch",
|
|
586
|
-
},
|
|
587
|
-
// Init container to bootstrap Nix store from image to PVC (first time only)
|
|
588
|
-
...(hasNixConfig
|
|
589
|
-
? {
|
|
590
|
-
initContainers: [
|
|
591
|
-
{
|
|
592
|
-
name: "nix-bootstrap",
|
|
593
|
-
image: workerImage,
|
|
594
|
-
imagePullPolicy:
|
|
595
|
-
this.config.worker.image.pullPolicy || "Always",
|
|
596
|
-
command: [
|
|
597
|
-
"bash",
|
|
598
|
-
"-c",
|
|
599
|
-
"if [ ! -f /workspace/.nix-bootstrapped ]; then " +
|
|
600
|
-
'echo "Bootstrapping Nix store to PVC..." && ' +
|
|
601
|
-
"cp -a /nix/store /workspace/.nix-store && " +
|
|
602
|
-
"cp -a /nix/var /workspace/.nix-var && " +
|
|
603
|
-
"mkdir -p /workspace/.nix-store/.nix-pvc-mounted && " +
|
|
604
|
-
"touch /workspace/.nix-bootstrapped && " +
|
|
605
|
-
'echo "Nix bootstrap complete"; ' +
|
|
606
|
-
'else echo "Nix store already bootstrapped"; fi',
|
|
607
|
-
],
|
|
608
|
-
securityContext: {
|
|
609
|
-
runAsUser: WORKER_SECURITY.USER_ID,
|
|
610
|
-
runAsGroup: WORKER_SECURITY.GROUP_ID,
|
|
611
|
-
},
|
|
612
|
-
volumeMounts: [
|
|
613
|
-
{
|
|
614
|
-
name: "workspace",
|
|
615
|
-
mountPath: "/workspace",
|
|
616
|
-
},
|
|
617
|
-
],
|
|
618
|
-
},
|
|
619
|
-
],
|
|
620
|
-
}
|
|
621
|
-
: {}),
|
|
622
|
-
containers: [
|
|
623
|
-
{
|
|
624
|
-
name: "worker",
|
|
625
|
-
image: workerImage,
|
|
626
|
-
imagePullPolicy:
|
|
627
|
-
this.config.worker.image.pullPolicy || "Always",
|
|
628
|
-
securityContext: {
|
|
629
|
-
runAsUser: WORKER_SECURITY.USER_ID,
|
|
630
|
-
runAsGroup: WORKER_SECURITY.GROUP_ID,
|
|
631
|
-
runAsNonRoot: true,
|
|
632
|
-
// Enable read-only root filesystem for security (matches Docker behavior)
|
|
633
|
-
readOnlyRootFilesystem: true,
|
|
634
|
-
// Prevent privilege escalation
|
|
635
|
-
allowPrivilegeEscalation: false,
|
|
636
|
-
// Drop all capabilities (matches Docker CAP_DROP: ALL)
|
|
637
|
-
capabilities: {
|
|
638
|
-
drop: ["ALL"],
|
|
639
|
-
},
|
|
640
|
-
},
|
|
641
|
-
env: [
|
|
642
|
-
// Common environment variables from base class
|
|
643
|
-
// (includes HTTP_PROXY, HTTPS_PROXY, NO_PROXY, NODE_ENV, DEBUG)
|
|
644
|
-
...Object.entries(envVars).map(([key, value]) => ({
|
|
645
|
-
name: key,
|
|
646
|
-
value: value,
|
|
647
|
-
})),
|
|
648
|
-
// Add traceparent for distributed tracing (passed to worker)
|
|
649
|
-
...(traceparent
|
|
650
|
-
? [{ name: "TRACEPARENT", value: traceparent }]
|
|
651
|
-
: []),
|
|
652
|
-
],
|
|
653
|
-
resources: {
|
|
654
|
-
requests: this.config.worker.resources.requests,
|
|
655
|
-
limits: this.config.worker.resources.limits,
|
|
656
|
-
},
|
|
657
|
-
volumeMounts: [
|
|
658
|
-
{
|
|
659
|
-
name: "workspace",
|
|
660
|
-
mountPath: "/workspace",
|
|
661
|
-
},
|
|
662
|
-
// Tmpfs mounts for writable directories (matches Docker behavior)
|
|
663
|
-
{
|
|
664
|
-
name: "tmp",
|
|
665
|
-
mountPath: "/tmp",
|
|
666
|
-
},
|
|
667
|
-
// /dev/shm for shared memory (needed by Chromium and other apps)
|
|
668
|
-
{
|
|
669
|
-
name: "dshm",
|
|
670
|
-
mountPath: "/dev/shm",
|
|
671
|
-
},
|
|
672
|
-
// When Nix packages configured, mount PVC subpaths at /nix/store and /nix/var
|
|
673
|
-
...(hasNixConfig
|
|
674
|
-
? [
|
|
675
|
-
{
|
|
676
|
-
name: "workspace",
|
|
677
|
-
mountPath: "/nix/store",
|
|
678
|
-
subPath: ".nix-store",
|
|
679
|
-
},
|
|
680
|
-
{
|
|
681
|
-
name: "workspace",
|
|
682
|
-
mountPath: "/nix/var",
|
|
683
|
-
subPath: ".nix-var",
|
|
684
|
-
},
|
|
685
|
-
]
|
|
686
|
-
: []),
|
|
687
|
-
],
|
|
688
|
-
},
|
|
689
|
-
],
|
|
690
|
-
volumes: [
|
|
691
|
-
{
|
|
692
|
-
name: "workspace",
|
|
693
|
-
// Use per-deployment PVC for session persistence across scale-to-zero
|
|
694
|
-
persistentVolumeClaim: {
|
|
695
|
-
claimName: pvcName,
|
|
696
|
-
},
|
|
697
|
-
},
|
|
698
|
-
// Tmpfs volumes for temporary files (in-memory, matches Docker Tmpfs)
|
|
699
|
-
{
|
|
700
|
-
name: "tmp",
|
|
701
|
-
emptyDir: {
|
|
702
|
-
medium: "Memory",
|
|
703
|
-
sizeLimit: WORKER_SECURITY.TMP_SIZE_LIMIT,
|
|
704
|
-
},
|
|
705
|
-
},
|
|
706
|
-
// Shared memory for Chromium and other apps requiring /dev/shm
|
|
707
|
-
{
|
|
708
|
-
name: "dshm",
|
|
709
|
-
emptyDir: {
|
|
710
|
-
medium: "Memory",
|
|
711
|
-
sizeLimit: "256Mi",
|
|
712
|
-
},
|
|
713
|
-
},
|
|
714
|
-
],
|
|
715
|
-
},
|
|
716
|
-
},
|
|
717
|
-
},
|
|
718
|
-
};
|
|
719
|
-
|
|
720
|
-
// Create child span for worker creation (linked to parent via traceparent)
|
|
721
|
-
const workerSpan = createChildSpan("worker_creation", traceparent, {
|
|
722
|
-
"lobu.deployment_name": deploymentName,
|
|
723
|
-
"lobu.user_id": userId,
|
|
724
|
-
"lobu.agent_id": agentId,
|
|
725
|
-
});
|
|
726
|
-
|
|
727
|
-
logger.info(
|
|
728
|
-
{ traceparent, deploymentName },
|
|
729
|
-
"Submitting deployment to K8s API"
|
|
730
|
-
);
|
|
731
|
-
|
|
732
|
-
try {
|
|
733
|
-
const response = await this.appsV1Api.createNamespacedDeployment(
|
|
734
|
-
this.config.kubernetes.namespace,
|
|
735
|
-
deployment
|
|
736
|
-
);
|
|
737
|
-
await waitForWorkerReady(
|
|
738
|
-
this.appsV1Api,
|
|
739
|
-
this.coreV1Api,
|
|
740
|
-
this.config.kubernetes.namespace,
|
|
741
|
-
deploymentName,
|
|
742
|
-
this.getWorkerStartupTimeoutMs()
|
|
743
|
-
);
|
|
744
|
-
|
|
745
|
-
const statusResponse = response as { response?: { statusCode?: number } };
|
|
746
|
-
workerSpan?.setAttribute(
|
|
747
|
-
"http.status_code",
|
|
748
|
-
statusResponse.response?.statusCode || 0
|
|
749
|
-
);
|
|
750
|
-
workerSpan?.setStatus({ code: SpanStatusCode.OK });
|
|
751
|
-
workerSpan?.end();
|
|
752
|
-
logger.info(
|
|
753
|
-
{ deploymentName, status: statusResponse.response?.statusCode },
|
|
754
|
-
"Deployment created and worker became ready"
|
|
755
|
-
);
|
|
756
|
-
} catch (error) {
|
|
757
|
-
const k8sError = error as {
|
|
758
|
-
statusCode?: number;
|
|
759
|
-
message?: string;
|
|
760
|
-
body?: unknown;
|
|
761
|
-
response?: { statusMessage?: string };
|
|
762
|
-
code?: string;
|
|
763
|
-
};
|
|
764
|
-
// Log detailed error information
|
|
765
|
-
logger.error(`❌ Failed to create deployment ${deploymentName}:`, {
|
|
766
|
-
statusCode: k8sError.statusCode,
|
|
767
|
-
message: k8sError.message,
|
|
768
|
-
body: k8sError.body,
|
|
769
|
-
response: k8sError.response?.statusMessage,
|
|
770
|
-
});
|
|
771
|
-
|
|
772
|
-
// Clean up the PVC that was created before the deployment failed
|
|
773
|
-
try {
|
|
774
|
-
await this.coreV1Api.deleteNamespacedPersistentVolumeClaim(
|
|
775
|
-
pvcName,
|
|
776
|
-
this.config.kubernetes.namespace
|
|
777
|
-
);
|
|
778
|
-
logger.info(
|
|
779
|
-
`Cleaned up orphaned PVC ${pvcName} after deployment creation failure`
|
|
780
|
-
);
|
|
781
|
-
} catch (pvcCleanupError) {
|
|
782
|
-
const pvcError = pvcCleanupError as { statusCode?: number };
|
|
783
|
-
if (pvcError.statusCode === 404) {
|
|
784
|
-
logger.debug(`PVC ${pvcName} already deleted, skipping cleanup`);
|
|
785
|
-
} else {
|
|
786
|
-
logger.error(
|
|
787
|
-
`Failed to clean up orphaned PVC ${pvcName}:`,
|
|
788
|
-
pvcCleanupError instanceof Error
|
|
789
|
-
? pvcCleanupError.message
|
|
790
|
-
: String(pvcCleanupError)
|
|
791
|
-
);
|
|
792
|
-
}
|
|
793
|
-
}
|
|
794
|
-
|
|
795
|
-
// End span with error
|
|
796
|
-
workerSpan?.setStatus({
|
|
797
|
-
code: SpanStatusCode.ERROR,
|
|
798
|
-
message: k8sError.message || "Deployment failed",
|
|
799
|
-
});
|
|
800
|
-
workerSpan?.end();
|
|
801
|
-
|
|
802
|
-
// Check for specific error conditions and throw OrchestratorError
|
|
803
|
-
if (k8sError.statusCode === 409) {
|
|
804
|
-
throw new OrchestratorError(
|
|
805
|
-
ErrorCode.DEPLOYMENT_CREATE_FAILED,
|
|
806
|
-
`Deployment ${deploymentName} already exists`,
|
|
807
|
-
{ deploymentName, statusCode: 409 },
|
|
808
|
-
false
|
|
809
|
-
);
|
|
810
|
-
} else if (k8sError.statusCode === 403) {
|
|
811
|
-
throw new OrchestratorError(
|
|
812
|
-
ErrorCode.DEPLOYMENT_CREATE_FAILED,
|
|
813
|
-
`Insufficient permissions to create deployment ${deploymentName}`,
|
|
814
|
-
{ deploymentName, statusCode: 403 },
|
|
815
|
-
true
|
|
816
|
-
);
|
|
817
|
-
} else if (k8sError.statusCode === 422) {
|
|
818
|
-
throw new OrchestratorError(
|
|
819
|
-
ErrorCode.DEPLOYMENT_CREATE_FAILED,
|
|
820
|
-
`Invalid deployment specification for ${deploymentName}: ${JSON.stringify(k8sError.body)}`,
|
|
821
|
-
{ deploymentName, statusCode: 422, body: k8sError.body },
|
|
822
|
-
true
|
|
823
|
-
);
|
|
824
|
-
} else if (
|
|
825
|
-
k8sError.message?.includes("timeout") ||
|
|
826
|
-
k8sError.code === "ETIMEDOUT"
|
|
827
|
-
) {
|
|
828
|
-
throw new OrchestratorError(
|
|
829
|
-
ErrorCode.DEPLOYMENT_CREATE_FAILED,
|
|
830
|
-
`Timeout creating deployment ${deploymentName} - K8s API may be overloaded`,
|
|
831
|
-
{ deploymentName, code: k8sError.code },
|
|
832
|
-
true
|
|
833
|
-
);
|
|
834
|
-
} else {
|
|
835
|
-
throw new OrchestratorError(
|
|
836
|
-
ErrorCode.DEPLOYMENT_CREATE_FAILED,
|
|
837
|
-
`HTTP request failed: ${k8sError.message || k8sError.response?.statusMessage || "Unknown error"}`,
|
|
838
|
-
{ deploymentName, error },
|
|
839
|
-
true
|
|
840
|
-
);
|
|
841
|
-
}
|
|
842
|
-
}
|
|
843
|
-
}
|
|
844
|
-
|
|
845
|
-
async scaleDeployment(
|
|
846
|
-
deploymentName: string,
|
|
847
|
-
replicas: number
|
|
848
|
-
): Promise<void> {
|
|
849
|
-
try {
|
|
850
|
-
const deployment = await this.appsV1Api.readNamespacedDeployment(
|
|
851
|
-
deploymentName,
|
|
852
|
-
this.config.kubernetes.namespace
|
|
853
|
-
);
|
|
854
|
-
|
|
855
|
-
if ((deployment as any).body?.spec?.replicas !== replicas) {
|
|
856
|
-
const patch = {
|
|
857
|
-
metadata: {
|
|
858
|
-
annotations: {
|
|
859
|
-
"lobu.io/status": replicas > 0 ? "running" : "scaled-down",
|
|
860
|
-
},
|
|
861
|
-
},
|
|
862
|
-
spec: {
|
|
863
|
-
replicas: replicas,
|
|
864
|
-
},
|
|
865
|
-
};
|
|
866
|
-
|
|
867
|
-
await this.appsV1Api.patchNamespacedDeployment(
|
|
868
|
-
deploymentName,
|
|
869
|
-
this.config.kubernetes.namespace,
|
|
870
|
-
patch,
|
|
871
|
-
undefined,
|
|
872
|
-
undefined,
|
|
873
|
-
undefined,
|
|
874
|
-
undefined,
|
|
875
|
-
undefined,
|
|
876
|
-
{
|
|
877
|
-
headers: {
|
|
878
|
-
"Content-Type": "application/strategic-merge-patch+json",
|
|
879
|
-
},
|
|
880
|
-
}
|
|
881
|
-
);
|
|
882
|
-
}
|
|
883
|
-
|
|
884
|
-
if (replicas > 0) {
|
|
885
|
-
await waitForWorkerReady(
|
|
886
|
-
this.appsV1Api,
|
|
887
|
-
this.coreV1Api,
|
|
888
|
-
this.config.kubernetes.namespace,
|
|
889
|
-
deploymentName,
|
|
890
|
-
this.getWorkerStartupTimeoutMs()
|
|
891
|
-
);
|
|
892
|
-
}
|
|
893
|
-
} catch (error) {
|
|
894
|
-
throw new OrchestratorError(
|
|
895
|
-
ErrorCode.DEPLOYMENT_SCALE_FAILED,
|
|
896
|
-
`Failed to scale deployment ${deploymentName}: ${error instanceof Error ? error.message : String(error)}`,
|
|
897
|
-
{ deploymentName, replicas, error },
|
|
898
|
-
true
|
|
899
|
-
);
|
|
900
|
-
}
|
|
901
|
-
}
|
|
902
|
-
|
|
903
|
-
async deleteDeployment(deploymentName: string): Promise<void> {
|
|
904
|
-
// Remove our finalizer before deleting so the resource can be garbage-collected
|
|
905
|
-
await removeFinalizerFromResource(
|
|
906
|
-
this.appsV1Api,
|
|
907
|
-
this.coreV1Api,
|
|
908
|
-
this.config.kubernetes.namespace,
|
|
909
|
-
"deployment",
|
|
910
|
-
deploymentName
|
|
911
|
-
);
|
|
912
|
-
|
|
913
|
-
// Delete the deployment with propagation policy
|
|
914
|
-
try {
|
|
915
|
-
await this.appsV1Api.deleteNamespacedDeployment(
|
|
916
|
-
deploymentName,
|
|
917
|
-
this.config.kubernetes.namespace,
|
|
918
|
-
undefined,
|
|
919
|
-
undefined,
|
|
920
|
-
undefined,
|
|
921
|
-
undefined,
|
|
922
|
-
"Foreground" // Wait for pods to terminate before returning
|
|
923
|
-
);
|
|
924
|
-
logger.info(`✅ Deleted deployment: ${deploymentName}`);
|
|
925
|
-
} catch (error) {
|
|
926
|
-
const k8sError = error as { statusCode?: number };
|
|
927
|
-
if (k8sError.statusCode === 404) {
|
|
928
|
-
logger.info(
|
|
929
|
-
`⚠️ Deployment ${deploymentName} not found (already deleted)`
|
|
930
|
-
);
|
|
931
|
-
} else {
|
|
932
|
-
throw error;
|
|
933
|
-
}
|
|
934
|
-
}
|
|
935
|
-
|
|
936
|
-
// NOTE: Space PVCs are NOT deleted on deployment deletion
|
|
937
|
-
// They are shared across threads in the same space and persist
|
|
938
|
-
// for future conversations. Cleanup is done manually or via separate process.
|
|
939
|
-
}
|
|
940
|
-
|
|
941
|
-
/**
|
|
942
|
-
* Override reconcileDeployments to also clean up orphaned PVC finalizers.
|
|
943
|
-
* Deployment orphan cleanup is handled inside listDeployments() to avoid
|
|
944
|
-
* duplicate API calls (listDeployments already iterates raw K8s objects).
|
|
945
|
-
*/
|
|
946
|
-
async reconcileDeployments(): Promise<void> {
|
|
947
|
-
await this.reconcileWorkerDeploymentImages();
|
|
948
|
-
await cleanupOrphanedPvcFinalizers(
|
|
949
|
-
this.appsV1Api,
|
|
950
|
-
this.coreV1Api,
|
|
951
|
-
this.config.kubernetes.namespace
|
|
952
|
-
);
|
|
953
|
-
await super.reconcileDeployments();
|
|
954
|
-
}
|
|
955
|
-
|
|
956
|
-
async updateDeploymentActivity(deploymentName: string): Promise<void> {
|
|
957
|
-
try {
|
|
958
|
-
const timestamp = new Date().toISOString();
|
|
959
|
-
const patch = {
|
|
960
|
-
metadata: {
|
|
961
|
-
annotations: {
|
|
962
|
-
"lobu.io/last-activity": timestamp,
|
|
963
|
-
},
|
|
964
|
-
},
|
|
965
|
-
};
|
|
966
|
-
|
|
967
|
-
await this.appsV1Api.patchNamespacedDeployment(
|
|
968
|
-
deploymentName,
|
|
969
|
-
this.config.kubernetes.namespace,
|
|
970
|
-
patch,
|
|
971
|
-
undefined,
|
|
972
|
-
undefined,
|
|
973
|
-
undefined,
|
|
974
|
-
undefined,
|
|
975
|
-
undefined,
|
|
976
|
-
{
|
|
977
|
-
headers: { "Content-Type": "application/strategic-merge-patch+json" },
|
|
978
|
-
}
|
|
979
|
-
);
|
|
980
|
-
} catch (error) {
|
|
981
|
-
logger.error(
|
|
982
|
-
`❌ Failed to update activity for deployment ${deploymentName}:`,
|
|
983
|
-
error instanceof Error ? error.message : String(error)
|
|
984
|
-
);
|
|
985
|
-
// Don't throw - activity tracking should not block message processing
|
|
986
|
-
}
|
|
987
|
-
}
|
|
988
|
-
|
|
989
|
-
protected getDispatcherHost(): string {
|
|
990
|
-
const dispatcherService =
|
|
991
|
-
process.env.DISPATCHER_SERVICE_NAME || "lobu-dispatcher";
|
|
992
|
-
return `${dispatcherService}.${this.config.kubernetes.namespace}.svc.cluster.local`;
|
|
993
|
-
}
|
|
994
|
-
|
|
995
|
-
/**
|
|
996
|
-
* Start a watch-based informer for worker deployments.
|
|
997
|
-
* The informer maintains a local cache that is updated via K8s watch events,
|
|
998
|
-
* reducing the need for frequent list API calls.
|
|
999
|
-
*/
|
|
1000
|
-
async startInformer(): Promise<void> {
|
|
1001
|
-
if (this.informer || this.informerInitializing) return;
|
|
1002
|
-
|
|
1003
|
-
this.informerInitializing = true;
|
|
1004
|
-
|
|
1005
|
-
const namespace = this.config.kubernetes.namespace;
|
|
1006
|
-
const listFn = () =>
|
|
1007
|
-
this.appsV1Api.listNamespacedDeployment(
|
|
1008
|
-
namespace,
|
|
1009
|
-
undefined,
|
|
1010
|
-
undefined,
|
|
1011
|
-
undefined,
|
|
1012
|
-
undefined,
|
|
1013
|
-
"app.kubernetes.io/component=worker"
|
|
1014
|
-
);
|
|
1015
|
-
|
|
1016
|
-
try {
|
|
1017
|
-
this.informer = k8s.makeInformer(
|
|
1018
|
-
this.kc,
|
|
1019
|
-
`/apis/apps/v1/namespaces/${namespace}/deployments`,
|
|
1020
|
-
listFn,
|
|
1021
|
-
"app.kubernetes.io/component=worker"
|
|
1022
|
-
);
|
|
1023
|
-
|
|
1024
|
-
this.informer.on("error", (err: unknown) => {
|
|
1025
|
-
logger.warn(
|
|
1026
|
-
"Informer error, will auto-restart:",
|
|
1027
|
-
err instanceof Error ? err.message : String(err)
|
|
1028
|
-
);
|
|
1029
|
-
});
|
|
1030
|
-
|
|
1031
|
-
await this.informer.start();
|
|
1032
|
-
logger.info("K8s deployment informer started");
|
|
1033
|
-
} catch (error) {
|
|
1034
|
-
logger.warn(
|
|
1035
|
-
"Failed to start informer, falling back to polling:",
|
|
1036
|
-
error instanceof Error ? error.message : String(error)
|
|
1037
|
-
);
|
|
1038
|
-
this.informer = null;
|
|
1039
|
-
} finally {
|
|
1040
|
-
this.informerInitializing = false;
|
|
1041
|
-
}
|
|
1042
|
-
}
|
|
1043
|
-
|
|
1044
|
-
/**
|
|
1045
|
-
* Stop the informer and clear the cache.
|
|
1046
|
-
*/
|
|
1047
|
-
async stopInformer(): Promise<void> {
|
|
1048
|
-
if (this.informer) {
|
|
1049
|
-
this.informer.stop();
|
|
1050
|
-
this.informer = null;
|
|
1051
|
-
logger.info("K8s deployment informer stopped");
|
|
1052
|
-
}
|
|
1053
|
-
}
|
|
1054
|
-
|
|
1055
|
-
/**
|
|
1056
|
-
* Whether the informer is active and has a populated cache.
|
|
1057
|
-
*/
|
|
1058
|
-
isInformerActive(): boolean {
|
|
1059
|
-
return this.informer !== null;
|
|
1060
|
-
}
|
|
1061
|
-
}
|