kova-node-cli 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +138 -0
- package/bin/cli.js +2 -0
- package/dist/__tests__/auto-bidder.test.js +267 -0
- package/dist/__tests__/container-manager.test.js +189 -0
- package/dist/__tests__/deployment-executor.test.js +332 -0
- package/dist/__tests__/heartbeat.test.js +191 -0
- package/dist/__tests__/lease-handler.test.js +268 -0
- package/dist/__tests__/resource-limits.test.js +164 -0
- package/dist/api/server.js +607 -0
- package/dist/cli.js +47 -0
- package/dist/commands/deploy.js +568 -0
- package/dist/commands/earnings.js +70 -0
- package/dist/commands/start.js +358 -0
- package/dist/commands/status.js +50 -0
- package/dist/commands/stop.js +101 -0
- package/dist/lib/client.js +87 -0
- package/dist/lib/config.js +107 -0
- package/dist/lib/docker.js +415 -0
- package/dist/lib/logger.js +12 -0
- package/dist/lib/message-signer.js +93 -0
- package/dist/lib/monitor.js +105 -0
- package/dist/lib/p2p.js +186 -0
- package/dist/lib/resource-limits.js +84 -0
- package/dist/lib/state.js +113 -0
- package/dist/lib/types.js +2 -0
- package/dist/lib/usage-meter.js +63 -0
- package/dist/services/auto-bidder.js +332 -0
- package/dist/services/container-manager.js +282 -0
- package/dist/services/deployment-executor.js +1562 -0
- package/dist/services/heartbeat.js +110 -0
- package/dist/services/job-handler.js +241 -0
- package/dist/services/lease-handler.js +382 -0
- package/package.json +51 -0
|
@@ -0,0 +1,1562 @@
|
|
|
1
|
+
// deployment executor - runs deployments from sdl manifests
|
|
2
|
+
// handles multi-service deployments, persistent volumes, port exposure
|
|
3
|
+
import { EventEmitter } from 'events';
|
|
4
|
+
import { PassThrough } from 'stream';
|
|
5
|
+
import { logger } from '../lib/logger.js';
|
|
6
|
+
import Docker from 'dockerode';
|
|
7
|
+
import { createHash } from 'crypto';
|
|
8
|
+
export class DeploymentExecutor extends EventEmitter {
|
|
9
|
+
docker;
|
|
10
|
+
executions = new Map();
|
|
11
|
+
orchestratorUrl;
|
|
12
|
+
apiKey;
|
|
13
|
+
constructor(config) {
|
|
14
|
+
super();
|
|
15
|
+
this.docker = new Docker();
|
|
16
|
+
this.orchestratorUrl = config?.orchestratorUrl || process.env.KOVA_ORCHESTRATOR_URL || 'http://localhost:3000';
|
|
17
|
+
this.apiKey = config?.apiKey || '';
|
|
18
|
+
}
|
|
19
|
+
// execute deployment from manifest
|
|
20
|
+
async executeDeployment(options) {
|
|
21
|
+
const { deploymentId, leaseId, manifest } = options;
|
|
22
|
+
logger.info({ deploymentId, leaseId }, 'executing deployment');
|
|
23
|
+
const execution = {
|
|
24
|
+
deploymentId,
|
|
25
|
+
leaseId,
|
|
26
|
+
manifest,
|
|
27
|
+
containers: new Map(),
|
|
28
|
+
networks: [],
|
|
29
|
+
volumes: []
|
|
30
|
+
};
|
|
31
|
+
this.executions.set(deploymentId, execution);
|
|
32
|
+
try {
|
|
33
|
+
// create isolated network for this deployment
|
|
34
|
+
const networkName = `kova-deploy-${deploymentId.slice(-8)}`;
|
|
35
|
+
let network;
|
|
36
|
+
try {
|
|
37
|
+
network = await this.docker.createNetwork({
|
|
38
|
+
Name: networkName,
|
|
39
|
+
Driver: 'bridge',
|
|
40
|
+
Internal: false
|
|
41
|
+
});
|
|
42
|
+
logger.info({ deploymentId, networkName }, 'created deployment network');
|
|
43
|
+
}
|
|
44
|
+
catch (err) {
|
|
45
|
+
if (err.statusCode === 409 || err.message?.includes('already exists')) {
|
|
46
|
+
// network already exists, get it
|
|
47
|
+
const networks = await this.docker.listNetworks({
|
|
48
|
+
filters: { name: [networkName] }
|
|
49
|
+
});
|
|
50
|
+
network = networks[0] ? this.docker.getNetwork(networks[0].Id) : null;
|
|
51
|
+
if (network) {
|
|
52
|
+
logger.info({ deploymentId, networkName }, 'using existing network');
|
|
53
|
+
}
|
|
54
|
+
else {
|
|
55
|
+
throw new Error(`network ${networkName} exists but could not be retrieved`);
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
else {
|
|
59
|
+
throw err;
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
execution.networks.push(network.id);
|
|
63
|
+
// create persistent volumes if needed
|
|
64
|
+
for (const [serviceName, service] of Object.entries(manifest.services)) {
|
|
65
|
+
if (service.params?.storage) {
|
|
66
|
+
for (const [volumeName, volumeConfig] of Object.entries(service.params.storage)) {
|
|
67
|
+
const volumeFullName = `kova-${deploymentId}-${serviceName}-${volumeName}`;
|
|
68
|
+
const volume = await this.docker.createVolume({
|
|
69
|
+
Name: volumeFullName,
|
|
70
|
+
Driver: 'local'
|
|
71
|
+
});
|
|
72
|
+
execution.volumes.push(volume.Name);
|
|
73
|
+
logger.info({ deploymentId, volumeName: volumeFullName }, 'created persistent volume');
|
|
74
|
+
// if source is "uploads", download and populate volume
|
|
75
|
+
if (volumeConfig.source === 'uploads') {
|
|
76
|
+
await this.populateVolumeFromUploads(deploymentId, serviceName, volumeFullName);
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
// sort services by depends_on so dependencies start first (topological order)
|
|
82
|
+
const serviceEntries = Object.entries(manifest.services);
|
|
83
|
+
const sorted = this.topologicalSort(serviceEntries);
|
|
84
|
+
// start each service in dependency order, respecting replica count
|
|
85
|
+
for (const [serviceName, service] of sorted) {
|
|
86
|
+
// extract gpu config from profiles if available
|
|
87
|
+
let gpu;
|
|
88
|
+
let replicaCount = 1;
|
|
89
|
+
if (manifest.profiles?.compute) {
|
|
90
|
+
// find matching compute profile for this service
|
|
91
|
+
for (const [profileName, profile] of Object.entries(manifest.profiles.compute)) {
|
|
92
|
+
if (profile.resources?.gpu) {
|
|
93
|
+
gpu = profile.resources.gpu;
|
|
94
|
+
break;
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
// get replica count from deployment section
|
|
99
|
+
const serviceDeployment = manifest.deployment?.[serviceName];
|
|
100
|
+
if (serviceDeployment) {
|
|
101
|
+
for (const [, config] of Object.entries(serviceDeployment)) {
|
|
102
|
+
if (config?.count && config.count > 1) {
|
|
103
|
+
replicaCount = Math.min(config.count, 20); // cap at 20 replicas
|
|
104
|
+
break;
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
if (replicaCount > 1) {
|
|
109
|
+
logger.info({ deploymentId, serviceName, replicaCount }, 'starting service replicas');
|
|
110
|
+
for (let i = 0; i < replicaCount; i++) {
|
|
111
|
+
const replicaName = `${serviceName}-${i}`;
|
|
112
|
+
await this.startService(deploymentId, replicaName, service, execution, networkName, gpu);
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
else {
|
|
116
|
+
await this.startService(deploymentId, serviceName, service, execution, networkName, gpu);
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
this.emit('deployment-started', { deploymentId, leaseId });
|
|
120
|
+
logger.info({ deploymentId, services: execution.containers.size }, 'deployment running');
|
|
121
|
+
}
|
|
122
|
+
catch (err) {
|
|
123
|
+
logger.error({ err, deploymentId }, 'deployment execution failed');
|
|
124
|
+
await this.cleanupDeployment(deploymentId, true); // cleanup all resources on failure
|
|
125
|
+
throw err;
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
// parse memory string like "512Mi", "2Gi" into bytes
|
|
129
|
+
parseMemoryToBytes(size) {
|
|
130
|
+
const units = {
|
|
131
|
+
'K': 1000, 'M': 1000 ** 2, 'G': 1000 ** 3, 'T': 1000 ** 4,
|
|
132
|
+
'Ki': 1024, 'Mi': 1024 ** 2, 'Gi': 1024 ** 3, 'Ti': 1024 ** 4,
|
|
133
|
+
};
|
|
134
|
+
const match = size.match(/^(\d+(?:\.\d+)?)\s*([A-Za-z]+)$/);
|
|
135
|
+
if (!match)
|
|
136
|
+
return 4 * 1024 * 1024 * 1024; // 4gb fallback
|
|
137
|
+
const value = parseFloat(match[1]);
|
|
138
|
+
const unit = match[2];
|
|
139
|
+
return Math.floor(value * (units[unit] || 1));
|
|
140
|
+
}
|
|
141
|
+
// start a single service
|
|
142
|
+
async startService(deploymentId, serviceName, service, execution, networkName, gpu) {
|
|
143
|
+
logger.info({ deploymentId, serviceName, image: service.image }, 'starting service');
|
|
144
|
+
// convert env to docker format
|
|
145
|
+
let env = [];
|
|
146
|
+
if (service.env) {
|
|
147
|
+
if (Array.isArray(service.env)) {
|
|
148
|
+
env = service.env;
|
|
149
|
+
}
|
|
150
|
+
else {
|
|
151
|
+
env = Object.entries(service.env).map(([k, v]) => `${k}=${v}`);
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
// setup volume binds and tmpfs mounts for ram class storage
|
|
155
|
+
const binds = [];
|
|
156
|
+
const tmpfs = {};
|
|
157
|
+
if (service.params?.storage) {
|
|
158
|
+
// look up storage resources from the compute profile to check classes
|
|
159
|
+
const serviceDeployment = execution?.manifest?.deployment?.[serviceName];
|
|
160
|
+
let storageResources = [];
|
|
161
|
+
if (serviceDeployment) {
|
|
162
|
+
for (const [, config] of Object.entries(serviceDeployment)) {
|
|
163
|
+
if (config?.profile) {
|
|
164
|
+
const profile = execution?.manifest?.profiles?.compute?.[config.profile];
|
|
165
|
+
if (profile?.resources?.storage) {
|
|
166
|
+
storageResources = Array.isArray(profile.resources.storage)
|
|
167
|
+
? profile.resources.storage : [profile.resources.storage];
|
|
168
|
+
}
|
|
169
|
+
break;
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
for (const [volumeName, volumeConfig] of Object.entries(service.params.storage)) {
|
|
174
|
+
const mountPath = volumeConfig.mount;
|
|
175
|
+
// check if this volume's storage class is 'ram' (shared memory / tmpfs)
|
|
176
|
+
const matchingStorage = storageResources.find((s) => s.name === volumeName);
|
|
177
|
+
if (matchingStorage?.attributes?.class === 'ram') {
|
|
178
|
+
// create tmpfs mount instead of docker volume
|
|
179
|
+
const sizeBytes = this.parseMemoryToBytes(matchingStorage.size || '64Mi');
|
|
180
|
+
tmpfs[mountPath] = `size=${sizeBytes}`;
|
|
181
|
+
logger.info({ deploymentId, serviceName, volumeName, mountPath, size: matchingStorage.size }, 'using tmpfs for ram class storage');
|
|
182
|
+
}
|
|
183
|
+
else {
|
|
184
|
+
const volumeFullName = `kova-${deploymentId}-${serviceName}-${volumeName}`;
|
|
185
|
+
const mode = volumeConfig.readOnly ? 'ro' : 'rw';
|
|
186
|
+
binds.push(`${volumeFullName}:${mountPath}:${mode}`);
|
|
187
|
+
}
|
|
188
|
+
}
|
|
189
|
+
}
|
|
190
|
+
// setup port exposure (internal only, no host binding)
|
|
191
|
+
// ingress controller will proxy to these ports via docker network
|
|
192
|
+
const exposedPorts = {};
|
|
193
|
+
if (service.expose) {
|
|
194
|
+
for (const expose of service.expose) {
|
|
195
|
+
const containerPort = expose.port;
|
|
196
|
+
exposedPorts[`${containerPort}/tcp`] = {};
|
|
197
|
+
}
|
|
198
|
+
}
|
|
199
|
+
// pull image first (pass credentials for private registries)
|
|
200
|
+
try {
|
|
201
|
+
await this.pullImage(service.image, deploymentId, serviceName, service.credentials);
|
|
202
|
+
}
|
|
203
|
+
catch (err) {
|
|
204
|
+
logger.error({ err, image: service.image }, 'failed to pull image');
|
|
205
|
+
throw err;
|
|
206
|
+
}
|
|
207
|
+
// create container
|
|
208
|
+
const containerName = `kova-${deploymentId}-${serviceName}`;
|
|
209
|
+
let container;
|
|
210
|
+
let isExisting = false;
|
|
211
|
+
// check if container already exists
|
|
212
|
+
try {
|
|
213
|
+
const existing = this.docker.getContainer(containerName);
|
|
214
|
+
const info = await existing.inspect();
|
|
215
|
+
if (info.State.Running) {
|
|
216
|
+
// container is already running, reuse it
|
|
217
|
+
container = existing;
|
|
218
|
+
isExisting = true;
|
|
219
|
+
logger.info({ containerName, containerId: info.Id }, 'reusing existing running container');
|
|
220
|
+
}
|
|
221
|
+
else {
|
|
222
|
+
// container exists but not running, remove and recreate
|
|
223
|
+
await existing.remove({ force: true });
|
|
224
|
+
logger.info({ containerName }, 'removed stopped container');
|
|
225
|
+
}
|
|
226
|
+
}
|
|
227
|
+
catch (err) {
|
|
228
|
+
// container doesn't exist, will create new one
|
|
229
|
+
}
|
|
230
|
+
if (!isExisting) {
|
|
231
|
+
// figure out resource limits from the service's compute profile
|
|
232
|
+
let memoryLimit = 4 * 1024 * 1024 * 1024; // 4gb default
|
|
233
|
+
let cpuCores = 4; // 4 cores default
|
|
234
|
+
// look up the compute profile mapped to this specific service
|
|
235
|
+
const serviceDeployment = execution.manifest.deployment?.[serviceName];
|
|
236
|
+
let profileName = null;
|
|
237
|
+
if (serviceDeployment) {
|
|
238
|
+
// deployment section: { serviceName: { placementName: { profile: "profileName", count: N } } }
|
|
239
|
+
for (const [, config] of Object.entries(serviceDeployment)) {
|
|
240
|
+
if (config?.profile) {
|
|
241
|
+
profileName = config.profile;
|
|
242
|
+
break;
|
|
243
|
+
}
|
|
244
|
+
}
|
|
245
|
+
}
|
|
246
|
+
const profiles = execution.manifest.profiles?.compute;
|
|
247
|
+
if (profiles) {
|
|
248
|
+
// use the mapped profile for this service, or fall back to first available
|
|
249
|
+
const profile = profileName && profiles[profileName]
|
|
250
|
+
? profiles[profileName]
|
|
251
|
+
: Object.values(profiles)[0];
|
|
252
|
+
if (profile?.resources) {
|
|
253
|
+
const res = profile.resources;
|
|
254
|
+
if (res.memory?.size) {
|
|
255
|
+
memoryLimit = this.parseMemoryToBytes(res.memory.size);
|
|
256
|
+
}
|
|
257
|
+
if (res.cpu?.units) {
|
|
258
|
+
cpuCores = parseFloat(res.cpu.units) || 4;
|
|
259
|
+
}
|
|
260
|
+
}
|
|
261
|
+
}
|
|
262
|
+
// clamp to sane limits
|
|
263
|
+
const maxMemory = 32 * 1024 * 1024 * 1024; // 32gb hard ceiling
|
|
264
|
+
memoryLimit = Math.min(memoryLimit, maxMemory);
|
|
265
|
+
cpuCores = Math.min(cpuCores, 32);
|
|
266
|
+
const containerConfig = {
|
|
267
|
+
name: containerName,
|
|
268
|
+
Image: service.image,
|
|
269
|
+
Env: env,
|
|
270
|
+
ExposedPorts: exposedPorts,
|
|
271
|
+
HostConfig: {
|
|
272
|
+
NetworkMode: networkName,
|
|
273
|
+
Binds: binds,
|
|
274
|
+
ReadonlyRootfs: false,
|
|
275
|
+
AutoRemove: false,
|
|
276
|
+
RestartPolicy: {
|
|
277
|
+
Name: execution.restartPolicy || 'on-failure',
|
|
278
|
+
MaximumRetryCount: (execution.restartPolicy || 'on-failure') === 'on-failure'
|
|
279
|
+
? (execution.restartMaxRetries || 3)
|
|
280
|
+
: 0
|
|
281
|
+
},
|
|
282
|
+
// resource limits based on what was ordered
|
|
283
|
+
Memory: memoryLimit,
|
|
284
|
+
MemorySwap: memoryLimit,
|
|
285
|
+
CpuPeriod: 100000,
|
|
286
|
+
CpuQuota: Math.floor(cpuCores * 100000),
|
|
287
|
+
Privileged: false,
|
|
288
|
+
PidsLimit: 256,
|
|
289
|
+
SecurityOpt: ['no-new-privileges:true'],
|
|
290
|
+
CapDrop: ['ALL'],
|
|
291
|
+
CapAdd: ['CHOWN', 'NET_BIND_SERVICE'],
|
|
292
|
+
// tmpfs mounts for ram class storage (shared memory)
|
|
293
|
+
...(Object.keys(tmpfs).length > 0 ? { Tmpfs: tmpfs } : {})
|
|
294
|
+
},
|
|
295
|
+
Labels: {
|
|
296
|
+
'kova.deployment': deploymentId,
|
|
297
|
+
'kova.service': serviceName,
|
|
298
|
+
'kova.lease': execution.leaseId
|
|
299
|
+
}
|
|
300
|
+
};
|
|
301
|
+
// add gpu device request if specified
|
|
302
|
+
if (gpu && gpu.units > 0) {
|
|
303
|
+
containerConfig.HostConfig.DeviceRequests = [{
|
|
304
|
+
Driver: '',
|
|
305
|
+
Count: gpu.units,
|
|
306
|
+
DeviceIDs: [],
|
|
307
|
+
Capabilities: [['gpu']],
|
|
308
|
+
Options: {}
|
|
309
|
+
}];
|
|
310
|
+
logger.info({ deploymentId, serviceName, gpuUnits: gpu.units }, 'requesting gpu access');
|
|
311
|
+
}
|
|
312
|
+
// add command override if specified (docker CMD)
|
|
313
|
+
if (service.command && service.command.length > 0) {
|
|
314
|
+
containerConfig.Cmd = service.command;
|
|
315
|
+
logger.info({ deploymentId, serviceName, command: service.command }, 'using custom command');
|
|
316
|
+
}
|
|
317
|
+
// add entrypoint args if specified
|
|
318
|
+
if (service.args && service.args.length > 0) {
|
|
319
|
+
containerConfig.Entrypoint = service.args;
|
|
320
|
+
logger.info({ deploymentId, serviceName, args: service.args }, 'using custom entrypoint');
|
|
321
|
+
}
|
|
322
|
+
container = await this.docker.createContainer(containerConfig);
|
|
323
|
+
// start container
|
|
324
|
+
await container.start();
|
|
325
|
+
logger.info({ deploymentId, serviceName, containerId: container.id }, 'service started');
|
|
326
|
+
}
|
|
327
|
+
execution.containers.set(serviceName, container.id);
|
|
328
|
+
// start streaming logs
|
|
329
|
+
this.streamLogs(container, deploymentId, serviceName);
|
|
330
|
+
}
|
|
331
|
+
// pull docker image with progress, optionally using private registry credentials
|
|
332
|
+
async pullImage(image, deploymentId, serviceName, credentials) {
|
|
333
|
+
const pullOptions = {};
|
|
334
|
+
if (credentials) {
|
|
335
|
+
pullOptions.authconfig = {
|
|
336
|
+
username: credentials.username,
|
|
337
|
+
password: credentials.password,
|
|
338
|
+
serveraddress: credentials.host,
|
|
339
|
+
...(credentials.email ? { email: credentials.email } : {})
|
|
340
|
+
};
|
|
341
|
+
logger.info({ deploymentId, serviceName, registry: credentials.host }, 'using private registry credentials');
|
|
342
|
+
}
|
|
343
|
+
return new Promise((resolve, reject) => {
|
|
344
|
+
this.docker.pull(image, pullOptions, (err, stream) => {
|
|
345
|
+
if (err) {
|
|
346
|
+
return reject(err);
|
|
347
|
+
}
|
|
348
|
+
this.docker.modem.followProgress(stream, (err) => {
|
|
349
|
+
if (err) {
|
|
350
|
+
this.emitLog(deploymentId, serviceName, `failed to pull ${image}: ${err.message}`, 'stderr');
|
|
351
|
+
return reject(err);
|
|
352
|
+
}
|
|
353
|
+
this.emitLog(deploymentId, serviceName, `pulled ${image}`, 'stdout');
|
|
354
|
+
resolve();
|
|
355
|
+
}, (event) => {
|
|
356
|
+
if (event.status) {
|
|
357
|
+
this.emitLog(deploymentId, serviceName, `[pull] ${event.status}`, 'stdout');
|
|
358
|
+
}
|
|
359
|
+
});
|
|
360
|
+
});
|
|
361
|
+
});
|
|
362
|
+
}
|
|
363
|
+
// stream container logs
|
|
364
|
+
streamLogs(container, deploymentId, serviceName) {
|
|
365
|
+
container.logs({
|
|
366
|
+
follow: true,
|
|
367
|
+
stdout: true,
|
|
368
|
+
stderr: true,
|
|
369
|
+
timestamps: false
|
|
370
|
+
}, (err, stream) => {
|
|
371
|
+
if (err) {
|
|
372
|
+
logger.error({ err }, 'failed to attach to container logs');
|
|
373
|
+
return;
|
|
374
|
+
}
|
|
375
|
+
// docker multiplexes stdout/stderr streams, need to demux
|
|
376
|
+
const stdout = new PassThrough();
|
|
377
|
+
const stderr = new PassThrough();
|
|
378
|
+
container.modem.demuxStream(stream, stdout, stderr);
|
|
379
|
+
stdout.on('data', (chunk) => {
|
|
380
|
+
const logLine = chunk.toString('utf8').trim();
|
|
381
|
+
if (logLine) {
|
|
382
|
+
this.emitLog(deploymentId, serviceName, logLine, 'stdout');
|
|
383
|
+
}
|
|
384
|
+
});
|
|
385
|
+
stderr.on('data', (chunk) => {
|
|
386
|
+
const logLine = chunk.toString('utf8').trim();
|
|
387
|
+
if (logLine) {
|
|
388
|
+
this.emitLog(deploymentId, serviceName, logLine, 'stderr');
|
|
389
|
+
}
|
|
390
|
+
});
|
|
391
|
+
stream.on('end', () => {
|
|
392
|
+
logger.info({ deploymentId, serviceName }, 'log stream ended');
|
|
393
|
+
});
|
|
394
|
+
stream.on('error', (err) => {
|
|
395
|
+
logger.error({ err, deploymentId, serviceName }, 'log stream error');
|
|
396
|
+
});
|
|
397
|
+
});
|
|
398
|
+
}
|
|
399
|
+
// emit log entry
|
|
400
|
+
emitLog(deploymentId, serviceName, logLine, stream) {
|
|
401
|
+
this.emit('log', {
|
|
402
|
+
deploymentId,
|
|
403
|
+
serviceName,
|
|
404
|
+
logLine,
|
|
405
|
+
stream,
|
|
406
|
+
timestamp: new Date()
|
|
407
|
+
});
|
|
408
|
+
}
|
|
409
|
+
// stop deployment (preserves persistent volumes for restart)
|
|
410
|
+
async stopDeployment(deploymentId) {
|
|
411
|
+
const execution = this.executions.get(deploymentId);
|
|
412
|
+
if (!execution) {
|
|
413
|
+
logger.warn({ deploymentId }, 'deployment not found');
|
|
414
|
+
return;
|
|
415
|
+
}
|
|
416
|
+
await this.cleanupDeployment(deploymentId, false);
|
|
417
|
+
logger.info({ deploymentId }, 'deployment stopped');
|
|
418
|
+
}
|
|
419
|
+
// close deployment permanently (deletes all resources including persistent volumes)
|
|
420
|
+
async closeDeployment(deploymentId) {
|
|
421
|
+
const execution = this.executions.get(deploymentId);
|
|
422
|
+
if (!execution) {
|
|
423
|
+
// try to find and clean up volumes anyway
|
|
424
|
+
await this.cleanupVolumes(deploymentId);
|
|
425
|
+
logger.warn({ deploymentId }, 'deployment not in memory, cleaned up volumes');
|
|
426
|
+
return;
|
|
427
|
+
}
|
|
428
|
+
await this.cleanupDeployment(deploymentId, true);
|
|
429
|
+
logger.info({ deploymentId }, 'deployment closed permanently');
|
|
430
|
+
}
|
|
431
|
+
// cleanup deployment resources
|
|
432
|
+
async cleanupDeployment(deploymentId, deleteVolumes) {
|
|
433
|
+
const execution = this.executions.get(deploymentId);
|
|
434
|
+
if (!execution)
|
|
435
|
+
return;
|
|
436
|
+
// stop and remove containers
|
|
437
|
+
for (const [serviceName, containerId] of execution.containers.entries()) {
|
|
438
|
+
try {
|
|
439
|
+
const container = this.docker.getContainer(containerId);
|
|
440
|
+
await container.stop({ t: 10 });
|
|
441
|
+
await container.remove();
|
|
442
|
+
logger.info({ deploymentId, serviceName }, 'container removed');
|
|
443
|
+
}
|
|
444
|
+
catch (err) {
|
|
445
|
+
logger.debug({ err, containerId }, 'failed to remove container');
|
|
446
|
+
}
|
|
447
|
+
}
|
|
448
|
+
// remove networks
|
|
449
|
+
for (const networkId of execution.networks) {
|
|
450
|
+
try {
|
|
451
|
+
const network = this.docker.getNetwork(networkId);
|
|
452
|
+
await network.remove();
|
|
453
|
+
logger.info({ deploymentId, networkId }, 'network removed');
|
|
454
|
+
}
|
|
455
|
+
catch (err) {
|
|
456
|
+
logger.debug({ err, networkId }, 'failed to remove network');
|
|
457
|
+
}
|
|
458
|
+
}
|
|
459
|
+
// remove volumes if requested (deployment closed permanently)
|
|
460
|
+
if (deleteVolumes) {
|
|
461
|
+
await this.cleanupVolumes(deploymentId);
|
|
462
|
+
}
|
|
463
|
+
else {
|
|
464
|
+
logger.info({ deploymentId, volumeCount: execution.volumes.length }, 'preserving persistent volumes');
|
|
465
|
+
}
|
|
466
|
+
this.executions.delete(deploymentId);
|
|
467
|
+
}
|
|
468
|
+
// cleanup volumes for a deployment
|
|
469
|
+
async cleanupVolumes(deploymentId) {
|
|
470
|
+
try {
|
|
471
|
+
const volumes = await this.docker.listVolumes({
|
|
472
|
+
filters: {
|
|
473
|
+
name: [`kova-${deploymentId}`]
|
|
474
|
+
}
|
|
475
|
+
});
|
|
476
|
+
for (const vol of volumes.Volumes || []) {
|
|
477
|
+
try {
|
|
478
|
+
const volume = this.docker.getVolume(vol.Name);
|
|
479
|
+
await volume.remove();
|
|
480
|
+
logger.info({ volumeName: vol.Name }, 'volume removed');
|
|
481
|
+
}
|
|
482
|
+
catch (err) {
|
|
483
|
+
logger.debug({ err, volumeName: vol.Name }, 'failed to remove volume');
|
|
484
|
+
}
|
|
485
|
+
}
|
|
486
|
+
}
|
|
487
|
+
catch (err) {
|
|
488
|
+
logger.error({ err, deploymentId }, 'failed to cleanup volumes');
|
|
489
|
+
}
|
|
490
|
+
}
|
|
491
|
+
// topological sort of services by depends_on (dependencies start first)
|
|
492
|
+
topologicalSort(services) {
|
|
493
|
+
const serviceMap = new Map(services);
|
|
494
|
+
const sorted = [];
|
|
495
|
+
const visited = new Set();
|
|
496
|
+
const visiting = new Set(); // cycle detection
|
|
497
|
+
const visit = (name) => {
|
|
498
|
+
if (visited.has(name))
|
|
499
|
+
return;
|
|
500
|
+
if (visiting.has(name)) {
|
|
501
|
+
logger.warn({ service: name }, 'circular dependency detected, breaking cycle');
|
|
502
|
+
return;
|
|
503
|
+
}
|
|
504
|
+
visiting.add(name);
|
|
505
|
+
const service = serviceMap.get(name);
|
|
506
|
+
if (service?.depends_on) {
|
|
507
|
+
for (const dep of service.depends_on) {
|
|
508
|
+
if (serviceMap.has(dep)) {
|
|
509
|
+
visit(dep);
|
|
510
|
+
}
|
|
511
|
+
else {
|
|
512
|
+
logger.warn({ service: name, dependency: dep }, 'depends_on references unknown service, ignoring');
|
|
513
|
+
}
|
|
514
|
+
}
|
|
515
|
+
}
|
|
516
|
+
visiting.delete(name);
|
|
517
|
+
visited.add(name);
|
|
518
|
+
if (service) {
|
|
519
|
+
sorted.push([name, service]);
|
|
520
|
+
}
|
|
521
|
+
};
|
|
522
|
+
for (const [name] of services) {
|
|
523
|
+
visit(name);
|
|
524
|
+
}
|
|
525
|
+
return sorted;
|
|
526
|
+
}
|
|
527
|
+
// get docker events for containers in a deployment
|
|
528
|
+
async getContainerEvents(deploymentId) {
|
|
529
|
+
const execution = this.executions.get(deploymentId);
|
|
530
|
+
if (!execution) {
|
|
531
|
+
return { error: 'deployment not found', events: [] };
|
|
532
|
+
}
|
|
533
|
+
const containerIds = Array.from(execution.containers.values());
|
|
534
|
+
if (containerIds.length === 0) {
|
|
535
|
+
return { deploymentId, events: [] };
|
|
536
|
+
}
|
|
537
|
+
const events = [];
|
|
538
|
+
for (const [serviceName, containerId] of execution.containers.entries()) {
|
|
539
|
+
try {
|
|
540
|
+
const container = this.docker.getContainer(containerId);
|
|
541
|
+
const info = await container.inspect();
|
|
542
|
+
// synthesize events from container state
|
|
543
|
+
events.push({
|
|
544
|
+
type: 'container',
|
|
545
|
+
action: 'create',
|
|
546
|
+
service: serviceName,
|
|
547
|
+
containerId: containerId.slice(0, 12),
|
|
548
|
+
image: info.Config.Image,
|
|
549
|
+
time: new Date(info.Created).toISOString()
|
|
550
|
+
});
|
|
551
|
+
if (info.State.StartedAt && info.State.StartedAt !== '0001-01-01T00:00:00Z') {
|
|
552
|
+
events.push({
|
|
553
|
+
type: 'container',
|
|
554
|
+
action: 'start',
|
|
555
|
+
service: serviceName,
|
|
556
|
+
containerId: containerId.slice(0, 12),
|
|
557
|
+
image: info.Config.Image,
|
|
558
|
+
time: info.State.StartedAt
|
|
559
|
+
});
|
|
560
|
+
}
|
|
561
|
+
if (info.State.FinishedAt && info.State.FinishedAt !== '0001-01-01T00:00:00Z' && !info.State.Running) {
|
|
562
|
+
events.push({
|
|
563
|
+
type: 'container',
|
|
564
|
+
action: 'stop',
|
|
565
|
+
service: serviceName,
|
|
566
|
+
containerId: containerId.slice(0, 12),
|
|
567
|
+
exitCode: info.State.ExitCode,
|
|
568
|
+
time: info.State.FinishedAt
|
|
569
|
+
});
|
|
570
|
+
}
|
|
571
|
+
// check health status if configured
|
|
572
|
+
if (info.State.Health) {
|
|
573
|
+
const health = info.State.Health;
|
|
574
|
+
events.push({
|
|
575
|
+
type: 'health',
|
|
576
|
+
action: health.Status, // healthy, unhealthy, starting
|
|
577
|
+
service: serviceName,
|
|
578
|
+
containerId: containerId.slice(0, 12),
|
|
579
|
+
failingStreak: health.FailingStreak,
|
|
580
|
+
time: health.Log?.length > 0
|
|
581
|
+
? health.Log[health.Log.length - 1].End
|
|
582
|
+
: new Date().toISOString()
|
|
583
|
+
});
|
|
584
|
+
}
|
|
585
|
+
}
|
|
586
|
+
catch (err) {
|
|
587
|
+
events.push({
|
|
588
|
+
type: 'error',
|
|
589
|
+
action: 'inspect_failed',
|
|
590
|
+
service: serviceName,
|
|
591
|
+
containerId: containerId.slice(0, 12),
|
|
592
|
+
error: err.message,
|
|
593
|
+
time: new Date().toISOString()
|
|
594
|
+
});
|
|
595
|
+
}
|
|
596
|
+
}
|
|
597
|
+
// sort events by time
|
|
598
|
+
events.sort((a, b) => new Date(a.time).getTime() - new Date(b.time).getTime());
|
|
599
|
+
return { deploymentId, timestamp: Date.now(), events };
|
|
600
|
+
}
|
|
601
|
+
// get running deployments
|
|
602
|
+
getRunningDeployments() {
|
|
603
|
+
return Array.from(this.executions.keys());
|
|
604
|
+
}
|
|
605
|
+
// get deployment info
|
|
606
|
+
getDeployment(deploymentId) {
|
|
607
|
+
return this.executions.get(deploymentId);
|
|
608
|
+
}
|
|
609
|
+
// get real-time docker stats for all containers in a deployment
|
|
610
|
+
async getDeploymentStats(deploymentId) {
|
|
611
|
+
const execution = this.executions.get(deploymentId);
|
|
612
|
+
if (!execution) {
|
|
613
|
+
return { error: 'deployment not found', services: {} };
|
|
614
|
+
}
|
|
615
|
+
const services = {};
|
|
616
|
+
for (const [serviceName, containerId] of execution.containers.entries()) {
|
|
617
|
+
try {
|
|
618
|
+
const container = this.docker.getContainer(containerId);
|
|
619
|
+
// one-shot stats (stream: false) to avoid hanging
|
|
620
|
+
const stats = await container.stats({ stream: false });
|
|
621
|
+
// calculate cpu usage percentage
|
|
622
|
+
const cpuDelta = stats.cpu_stats.cpu_usage.total_usage - (stats.precpu_stats?.cpu_usage?.total_usage || 0);
|
|
623
|
+
const systemDelta = stats.cpu_stats.system_cpu_usage - (stats.precpu_stats?.system_cpu_usage || 0);
|
|
624
|
+
const numCpus = stats.cpu_stats.online_cpus || stats.cpu_stats.cpu_usage?.percpu_usage?.length || 1;
|
|
625
|
+
const cpuPercent = systemDelta > 0 ? (cpuDelta / systemDelta) * numCpus * 100 : 0;
|
|
626
|
+
// memory
|
|
627
|
+
const memUsage = stats.memory_stats.usage || 0;
|
|
628
|
+
const memLimit = stats.memory_stats.limit || 0;
|
|
629
|
+
const memCache = stats.memory_stats.stats?.cache || 0;
|
|
630
|
+
const memActual = memUsage - memCache;
|
|
631
|
+
const memPercent = memLimit > 0 ? (memActual / memLimit) * 100 : 0;
|
|
632
|
+
// network i/o
|
|
633
|
+
let netRx = 0, netTx = 0;
|
|
634
|
+
if (stats.networks) {
|
|
635
|
+
for (const iface of Object.values(stats.networks)) {
|
|
636
|
+
netRx += iface.rx_bytes || 0;
|
|
637
|
+
netTx += iface.tx_bytes || 0;
|
|
638
|
+
}
|
|
639
|
+
}
|
|
640
|
+
// block i/o
|
|
641
|
+
let blockRead = 0, blockWrite = 0;
|
|
642
|
+
if (stats.blkio_stats?.io_service_bytes_recursive) {
|
|
643
|
+
for (const entry of stats.blkio_stats.io_service_bytes_recursive) {
|
|
644
|
+
if (entry.op === 'read' || entry.op === 'Read')
|
|
645
|
+
blockRead += entry.value;
|
|
646
|
+
if (entry.op === 'write' || entry.op === 'Write')
|
|
647
|
+
blockWrite += entry.value;
|
|
648
|
+
}
|
|
649
|
+
}
|
|
650
|
+
services[serviceName] = {
|
|
651
|
+
containerId: containerId.slice(0, 12),
|
|
652
|
+
cpu: { percent: Math.round(cpuPercent * 100) / 100, cores: numCpus },
|
|
653
|
+
memory: {
|
|
654
|
+
used: memActual,
|
|
655
|
+
limit: memLimit,
|
|
656
|
+
percent: Math.round(memPercent * 100) / 100,
|
|
657
|
+
usedFormatted: this.formatBytes(memActual),
|
|
658
|
+
limitFormatted: this.formatBytes(memLimit)
|
|
659
|
+
},
|
|
660
|
+
network: {
|
|
661
|
+
rx: netRx,
|
|
662
|
+
tx: netTx,
|
|
663
|
+
rxFormatted: this.formatBytes(netRx),
|
|
664
|
+
txFormatted: this.formatBytes(netTx)
|
|
665
|
+
},
|
|
666
|
+
blockIo: {
|
|
667
|
+
read: blockRead,
|
|
668
|
+
write: blockWrite,
|
|
669
|
+
readFormatted: this.formatBytes(blockRead),
|
|
670
|
+
writeFormatted: this.formatBytes(blockWrite)
|
|
671
|
+
},
|
|
672
|
+
pids: stats.pids_stats?.current || 0
|
|
673
|
+
};
|
|
674
|
+
}
|
|
675
|
+
catch (err) {
|
|
676
|
+
services[serviceName] = { error: err.message, containerId: containerId.slice(0, 12) };
|
|
677
|
+
}
|
|
678
|
+
}
|
|
679
|
+
return { deploymentId, timestamp: Date.now(), services };
|
|
680
|
+
}
|
|
681
|
+
// format bytes to human readable
|
|
682
|
+
formatBytes(bytes) {
|
|
683
|
+
if (bytes === 0)
|
|
684
|
+
return '0 B';
|
|
685
|
+
const units = ['B', 'KB', 'MB', 'GB', 'TB'];
|
|
686
|
+
const i = Math.floor(Math.log(bytes) / Math.log(1024));
|
|
687
|
+
return `${(bytes / Math.pow(1024, i)).toFixed(1)} ${units[i]}`;
|
|
688
|
+
}
|
|
689
|
+
// get container running status for all services in a deployment
|
|
690
|
+
async getDeploymentStatus(deploymentId) {
|
|
691
|
+
const execution = this.executions.get(deploymentId);
|
|
692
|
+
if (!execution) {
|
|
693
|
+
return { error: 'deployment not found', services: {} };
|
|
694
|
+
}
|
|
695
|
+
const services = {};
|
|
696
|
+
for (const [serviceName, containerId] of execution.containers.entries()) {
|
|
697
|
+
try {
|
|
698
|
+
const container = this.docker.getContainer(containerId);
|
|
699
|
+
const info = await container.inspect();
|
|
700
|
+
services[serviceName] = {
|
|
701
|
+
containerId: containerId.slice(0, 12),
|
|
702
|
+
running: info.State.Running,
|
|
703
|
+
status: info.State.Status, // running, exited, paused, restarting, dead
|
|
704
|
+
startedAt: info.State.StartedAt,
|
|
705
|
+
finishedAt: info.State.FinishedAt,
|
|
706
|
+
exitCode: info.State.ExitCode,
|
|
707
|
+
restartCount: info.RestartCount,
|
|
708
|
+
image: info.Config.Image,
|
|
709
|
+
ports: Object.keys(info.Config.ExposedPorts || {}).map(p => {
|
|
710
|
+
const [port, proto] = p.split('/');
|
|
711
|
+
return { port: parseInt(port), protocol: proto || 'tcp' };
|
|
712
|
+
})
|
|
713
|
+
};
|
|
714
|
+
}
|
|
715
|
+
catch (err) {
|
|
716
|
+
services[serviceName] = { error: err.message, containerId: containerId.slice(0, 12) };
|
|
717
|
+
}
|
|
718
|
+
}
|
|
719
|
+
return { deploymentId, timestamp: Date.now(), services };
|
|
720
|
+
}
|
|
721
|
+
// discover existing deployments on startup
|
|
722
|
+
async discoverExistingDeployments() {
|
|
723
|
+
logger.info('discovering existing kova deployments...');
|
|
724
|
+
try {
|
|
725
|
+
// find all containers with kova.deployment label
|
|
726
|
+
const containers = await this.docker.listContainers({
|
|
727
|
+
filters: { label: ['kova.deployment'] }
|
|
728
|
+
});
|
|
729
|
+
for (const containerInfo of containers) {
|
|
730
|
+
const deploymentId = containerInfo.Labels['kova.deployment'];
|
|
731
|
+
const serviceName = containerInfo.Labels['kova.service'] || 'web';
|
|
732
|
+
if (!deploymentId)
|
|
733
|
+
continue;
|
|
734
|
+
// skip if already tracked
|
|
735
|
+
if (this.executions.has(deploymentId))
|
|
736
|
+
continue;
|
|
737
|
+
logger.info({ deploymentId, serviceName, containerId: containerInfo.Id }, 'discovered existing deployment');
|
|
738
|
+
// get full container details
|
|
739
|
+
const container = this.docker.getContainer(containerInfo.Id);
|
|
740
|
+
const inspect = await container.inspect();
|
|
741
|
+
// extract volumes from mounts
|
|
742
|
+
const volumes = [];
|
|
743
|
+
for (const mount of inspect.Mounts || []) {
|
|
744
|
+
if (mount.Type === 'volume' && mount.Name) {
|
|
745
|
+
volumes.push(mount.Name);
|
|
746
|
+
}
|
|
747
|
+
}
|
|
748
|
+
// extract network
|
|
749
|
+
const networks = Object.keys(inspect.NetworkSettings.Networks || {});
|
|
750
|
+
const networkId = networks.length > 0 ? inspect.NetworkSettings.Networks[networks[0]].NetworkID : '';
|
|
751
|
+
// create execution record
|
|
752
|
+
const execution = {
|
|
753
|
+
deploymentId,
|
|
754
|
+
leaseId: containerInfo.Labels['kova.lease'] || '',
|
|
755
|
+
manifest: {
|
|
756
|
+
version: '2.0',
|
|
757
|
+
services: {},
|
|
758
|
+
profiles: {},
|
|
759
|
+
deployment: {}
|
|
760
|
+
},
|
|
761
|
+
containers: new Map([[serviceName, containerInfo.Id]]),
|
|
762
|
+
volumes,
|
|
763
|
+
networks: networkId ? [networkId] : []
|
|
764
|
+
};
|
|
765
|
+
this.executions.set(deploymentId, execution);
|
|
766
|
+
// start streaming logs from discovered container
|
|
767
|
+
try {
|
|
768
|
+
const container = this.docker.getContainer(containerInfo.Id);
|
|
769
|
+
this.streamLogs(container, deploymentId, serviceName);
|
|
770
|
+
logger.info({ deploymentId, serviceName }, 'log streaming attached to discovered container');
|
|
771
|
+
}
|
|
772
|
+
catch (err) {
|
|
773
|
+
logger.warn({ err, deploymentId }, 'failed to attach log streaming to discovered container');
|
|
774
|
+
}
|
|
775
|
+
logger.info({ deploymentId, volumes: volumes.length }, 'deployment state restored');
|
|
776
|
+
}
|
|
777
|
+
logger.info({ count: this.executions.size }, 'deployment discovery complete');
|
|
778
|
+
}
|
|
779
|
+
catch (err) {
|
|
780
|
+
logger.error({ err }, 'failed to discover existing deployments');
|
|
781
|
+
}
|
|
782
|
+
}
|
|
783
|
+
// download and populate volume with uploaded files from orchestrator
|
|
784
|
+
async populateVolumeFromUploads(deploymentId, serviceName, volumeName) {
|
|
785
|
+
const https = await import('https');
|
|
786
|
+
const http = await import('http');
|
|
787
|
+
const fs = await import('fs');
|
|
788
|
+
const tar = await import('tar');
|
|
789
|
+
const path = await import('path');
|
|
790
|
+
const os = await import('os');
|
|
791
|
+
logger.info({ deploymentId, serviceName, volumeName }, 'downloading files from orchestrator');
|
|
792
|
+
const orchestratorUrl = this.orchestratorUrl;
|
|
793
|
+
const downloadUrl = `${orchestratorUrl}/api/v1/deployments/${deploymentId}/services/${serviceName}/files/download`;
|
|
794
|
+
// use api key for auth
|
|
795
|
+
const authToken = this.apiKey || process.env.PROVIDER_TOKEN || '';
|
|
796
|
+
// max download size (100mb to prevent disk exhaustion)
|
|
797
|
+
const maxDownloadSize = 100 * 1024 * 1024;
|
|
798
|
+
try {
|
|
799
|
+
// download tarball to temp file
|
|
800
|
+
const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'kova-download-'));
|
|
801
|
+
const tarballPath = path.join(tempDir, 'files.tar.gz');
|
|
802
|
+
const downloadResult = await new Promise((resolve, reject) => {
|
|
803
|
+
const proto = orchestratorUrl.startsWith('https') ? https : http;
|
|
804
|
+
const hash = createHash('sha256');
|
|
805
|
+
let downloadedSize = 0;
|
|
806
|
+
const req = proto.get(downloadUrl, {
|
|
807
|
+
headers: {
|
|
808
|
+
'Authorization': `Bearer ${authToken}`
|
|
809
|
+
}
|
|
810
|
+
}, (res) => {
|
|
811
|
+
if (res.statusCode === 404) {
|
|
812
|
+
logger.info({ deploymentId, serviceName }, 'no uploaded files found, skipping');
|
|
813
|
+
resolve({});
|
|
814
|
+
return;
|
|
815
|
+
}
|
|
816
|
+
if (res.statusCode !== 200) {
|
|
817
|
+
reject(new Error(`failed to download files: ${res.statusCode} ${res.statusMessage}`));
|
|
818
|
+
return;
|
|
819
|
+
}
|
|
820
|
+
// get expected checksum from header if provided
|
|
821
|
+
const expectedChecksum = res.headers['x-checksum'];
|
|
822
|
+
const fileStream = fs.createWriteStream(tarballPath);
|
|
823
|
+
res.on('data', (chunk) => {
|
|
824
|
+
downloadedSize += chunk.length;
|
|
825
|
+
// check size limit
|
|
826
|
+
if (downloadedSize > maxDownloadSize) {
|
|
827
|
+
req.destroy();
|
|
828
|
+
fileStream.destroy();
|
|
829
|
+
fs.rmSync(tempDir, { recursive: true, force: true });
|
|
830
|
+
reject(new Error(`download exceeds size limit of ${maxDownloadSize} bytes`));
|
|
831
|
+
return;
|
|
832
|
+
}
|
|
833
|
+
hash.update(chunk);
|
|
834
|
+
});
|
|
835
|
+
res.pipe(fileStream);
|
|
836
|
+
fileStream.on('finish', () => {
|
|
837
|
+
fileStream.close();
|
|
838
|
+
const actualChecksum = hash.digest('hex');
|
|
839
|
+
// verify checksum if provided
|
|
840
|
+
if (expectedChecksum && actualChecksum !== expectedChecksum) {
|
|
841
|
+
fs.rmSync(tempDir, { recursive: true, force: true });
|
|
842
|
+
reject(new Error(`checksum mismatch: expected ${expectedChecksum}, got ${actualChecksum}`));
|
|
843
|
+
return;
|
|
844
|
+
}
|
|
845
|
+
logger.info({ deploymentId, size: downloadedSize, checksum: actualChecksum }, 'file download verified');
|
|
846
|
+
resolve({ checksum: actualChecksum });
|
|
847
|
+
});
|
|
848
|
+
fileStream.on('error', reject);
|
|
849
|
+
});
|
|
850
|
+
req.on('error', reject);
|
|
851
|
+
req.end();
|
|
852
|
+
});
|
|
853
|
+
// check if tarball was downloaded
|
|
854
|
+
if (!fs.existsSync(tarballPath)) {
|
|
855
|
+
logger.info({ deploymentId, serviceName }, 'no files to populate volume');
|
|
856
|
+
fs.rmSync(tempDir, { recursive: true, force: true });
|
|
857
|
+
return;
|
|
858
|
+
}
|
|
859
|
+
// extract tarball to temp directory
|
|
860
|
+
const extractDir = path.join(tempDir, 'extracted');
|
|
861
|
+
fs.mkdirSync(extractDir, { recursive: true });
|
|
862
|
+
await tar.extract({
|
|
863
|
+
file: tarballPath,
|
|
864
|
+
cwd: extractDir,
|
|
865
|
+
// prevent zip-slip: strip leading slashes and block path traversal
|
|
866
|
+
strip: 0,
|
|
867
|
+
filter: (path) => {
|
|
868
|
+
if (path.includes('..')) {
|
|
869
|
+
logger.warn({ path }, 'blocked path traversal attempt in tar');
|
|
870
|
+
return false;
|
|
871
|
+
}
|
|
872
|
+
return true;
|
|
873
|
+
}
|
|
874
|
+
});
|
|
875
|
+
// copy files to volume using a temporary container
|
|
876
|
+
// mount volume and copy files from temp directory
|
|
877
|
+
const containerName = `kova-temp-copy-${Date.now()}`;
|
|
878
|
+
await this.docker.run('alpine:latest', ['sh', '-c', `cp -r /source/. /dest/`], process.stdout, {
|
|
879
|
+
name: containerName,
|
|
880
|
+
HostConfig: {
|
|
881
|
+
Binds: [
|
|
882
|
+
`${volumeName}:/dest`,
|
|
883
|
+
`${extractDir}:/source:ro`
|
|
884
|
+
],
|
|
885
|
+
AutoRemove: true
|
|
886
|
+
}
|
|
887
|
+
});
|
|
888
|
+
logger.info({ deploymentId, serviceName, volumeName }, 'files populated to volume');
|
|
889
|
+
// cleanup temp directory
|
|
890
|
+
fs.rmSync(tempDir, { recursive: true, force: true });
|
|
891
|
+
}
|
|
892
|
+
catch (err) {
|
|
893
|
+
logger.error({ err, deploymentId, serviceName }, 'failed to populate volume from uploads');
|
|
894
|
+
throw err;
|
|
895
|
+
}
|
|
896
|
+
}
|
|
897
|
+
// update files in existing deployment volume and restart containers
|
|
898
|
+
async updateDeploymentFiles(deploymentId, serviceName) {
|
|
899
|
+
const execution = this.executions.get(deploymentId);
|
|
900
|
+
if (!execution) {
|
|
901
|
+
throw new Error('deployment not found');
|
|
902
|
+
}
|
|
903
|
+
logger.info({ deploymentId, serviceName }, 'updating deployment files');
|
|
904
|
+
// find volume for this service
|
|
905
|
+
const volumePrefix = `kova-${deploymentId}-${serviceName}-`;
|
|
906
|
+
let volumeName = execution.volumes.find(v => v && v.startsWith(volumePrefix));
|
|
907
|
+
// check docker if not in memory
|
|
908
|
+
if (!volumeName) {
|
|
909
|
+
logger.info({ deploymentId, serviceName, volumePrefix }, 'volume not in memory, querying docker');
|
|
910
|
+
try {
|
|
911
|
+
const volumes = await this.docker.listVolumes();
|
|
912
|
+
const matchingVolume = volumes.Volumes?.find(v => v.Name?.startsWith(volumePrefix));
|
|
913
|
+
if (matchingVolume) {
|
|
914
|
+
volumeName = matchingVolume.Name;
|
|
915
|
+
execution.volumes.push(volumeName);
|
|
916
|
+
logger.info({ deploymentId, serviceName, volumeName }, 'found existing volume in docker');
|
|
917
|
+
}
|
|
918
|
+
}
|
|
919
|
+
catch (err) {
|
|
920
|
+
logger.error({ err, deploymentId, serviceName }, 'failed to query docker volumes');
|
|
921
|
+
}
|
|
922
|
+
}
|
|
923
|
+
if (!volumeName) {
|
|
924
|
+
throw new Error(`no volume found for service ${serviceName} (expected prefix: ${volumePrefix})`);
|
|
925
|
+
}
|
|
926
|
+
logger.info({ deploymentId, serviceName, volumeName }, 'found volume for update');
|
|
927
|
+
// stop containers for this service
|
|
928
|
+
const containerId = execution.containers.get(serviceName);
|
|
929
|
+
if (containerId) {
|
|
930
|
+
try {
|
|
931
|
+
const container = this.docker.getContainer(containerId);
|
|
932
|
+
await container.stop({ t: 10 });
|
|
933
|
+
logger.info({ deploymentId, serviceName, containerId }, 'container stopped for file update');
|
|
934
|
+
}
|
|
935
|
+
catch (err) {
|
|
936
|
+
logger.warn({ err, containerId }, 'failed to stop container');
|
|
937
|
+
}
|
|
938
|
+
}
|
|
939
|
+
// backup volume to temp before clearing, so we can restore on failure
|
|
940
|
+
const backupVolumeName = `${volumeName}-backup-${Date.now()}`;
|
|
941
|
+
try {
|
|
942
|
+
await this.docker.createVolume({ Name: backupVolumeName });
|
|
943
|
+
await this.docker.run('alpine:latest', ['sh', '-c', 'cp -a /source/. /backup/'], process.stdout, {
|
|
944
|
+
HostConfig: {
|
|
945
|
+
Binds: [`${volumeName}:/source:ro`, `${backupVolumeName}:/backup`],
|
|
946
|
+
AutoRemove: true
|
|
947
|
+
}
|
|
948
|
+
});
|
|
949
|
+
logger.info({ deploymentId, serviceName, backupVolumeName }, 'volume backed up');
|
|
950
|
+
}
|
|
951
|
+
catch (err) {
|
|
952
|
+
logger.warn({ err, deploymentId }, 'volume backup failed, proceeding without safety net');
|
|
953
|
+
}
|
|
954
|
+
try {
|
|
955
|
+
// clear volume contents
|
|
956
|
+
await this.docker.run('alpine:latest', ['sh', '-c', 'rm -rf /dest/*'], process.stdout, {
|
|
957
|
+
HostConfig: {
|
|
958
|
+
Binds: [`${volumeName}:/dest`],
|
|
959
|
+
AutoRemove: true
|
|
960
|
+
}
|
|
961
|
+
});
|
|
962
|
+
logger.info({ deploymentId, serviceName, volumeName }, 'volume contents cleared');
|
|
963
|
+
// re-download and populate volume
|
|
964
|
+
await this.populateVolumeFromUploads(deploymentId, serviceName, volumeName);
|
|
965
|
+
// restart container
|
|
966
|
+
if (containerId) {
|
|
967
|
+
try {
|
|
968
|
+
const container = this.docker.getContainer(containerId);
|
|
969
|
+
await container.start();
|
|
970
|
+
logger.info({ deploymentId, serviceName, containerId }, 'container restarted after file update');
|
|
971
|
+
}
|
|
972
|
+
catch (err) {
|
|
973
|
+
logger.error({ err, containerId }, 'failed to restart container after file update');
|
|
974
|
+
throw err;
|
|
975
|
+
}
|
|
976
|
+
}
|
|
977
|
+
logger.info({ deploymentId, serviceName }, 'deployment files updated successfully');
|
|
978
|
+
}
|
|
979
|
+
catch (err) {
|
|
980
|
+
// restore from backup if download failed
|
|
981
|
+
try {
|
|
982
|
+
await this.docker.run('alpine:latest', ['sh', '-c', 'cp -a /backup/. /dest/'], process.stdout, {
|
|
983
|
+
HostConfig: {
|
|
984
|
+
Binds: [`${backupVolumeName}:/backup:ro`, `${volumeName}:/dest`],
|
|
985
|
+
AutoRemove: true
|
|
986
|
+
}
|
|
987
|
+
});
|
|
988
|
+
logger.info({ deploymentId, serviceName }, 'restored volume from backup after failed update');
|
|
989
|
+
}
|
|
990
|
+
catch (restoreErr) {
|
|
991
|
+
logger.error({ err: restoreErr }, 'failed to restore volume from backup');
|
|
992
|
+
}
|
|
993
|
+
// restart container even if update failed
|
|
994
|
+
if (containerId) {
|
|
995
|
+
try {
|
|
996
|
+
const container = this.docker.getContainer(containerId);
|
|
997
|
+
await container.start();
|
|
998
|
+
}
|
|
999
|
+
catch (restartErr) {
|
|
1000
|
+
logger.error({ err: restartErr }, 'failed to restart container after failed update');
|
|
1001
|
+
}
|
|
1002
|
+
}
|
|
1003
|
+
throw err;
|
|
1004
|
+
}
|
|
1005
|
+
finally {
|
|
1006
|
+
// clean up backup volume
|
|
1007
|
+
try {
|
|
1008
|
+
const backup = this.docker.getVolume(backupVolumeName);
|
|
1009
|
+
await backup.remove();
|
|
1010
|
+
}
|
|
1011
|
+
catch {
|
|
1012
|
+
// ignore cleanup errors
|
|
1013
|
+
}
|
|
1014
|
+
}
|
|
1015
|
+
}
|
|
1016
|
+
// browse files inside a running container
|
|
1017
|
+
async browseFiles(deploymentId, serviceName, dirPath = '/') {
|
|
1018
|
+
const execution = this.executions.get(deploymentId);
|
|
1019
|
+
if (!execution) {
|
|
1020
|
+
return { error: 'deployment not found', files: [] };
|
|
1021
|
+
}
|
|
1022
|
+
let containerId = execution.containers.get(serviceName);
|
|
1023
|
+
if (!containerId && execution.containers.size > 0) {
|
|
1024
|
+
containerId = execution.containers.entries().next().value[1];
|
|
1025
|
+
}
|
|
1026
|
+
if (!containerId) {
|
|
1027
|
+
return { error: 'no containers found', files: [] };
|
|
1028
|
+
}
|
|
1029
|
+
try {
|
|
1030
|
+
const container = this.docker.getContainer(containerId);
|
|
1031
|
+
// try gnu ls first, fall back to plain ls -la for busybox
|
|
1032
|
+
let lsCmd = ['ls', '-laF', dirPath];
|
|
1033
|
+
const exec = await container.exec({
|
|
1034
|
+
Cmd: lsCmd,
|
|
1035
|
+
AttachStdout: true,
|
|
1036
|
+
AttachStderr: true,
|
|
1037
|
+
});
|
|
1038
|
+
const stream = await exec.start({ hijack: true, stdin: false });
|
|
1039
|
+
const output = await this.collectExecOutput(stream);
|
|
1040
|
+
const files = [];
|
|
1041
|
+
const lines = output.stdout.split('\n').filter((l) => l.trim() && !l.startsWith('total'));
|
|
1042
|
+
for (const line of lines) {
|
|
1043
|
+
// try iso format first: -rw-r--r-- 1 root root 123 2026-02-11 09:54 file.txt
|
|
1044
|
+
let match = line.match(/^([drwxlstSTrw\-\.]+)\s+(\d+)\s+(\S+)\s+(\S+)\s+(\d+)\s+(\d{4}-\d{2}-\d{2})\s+(\d{2}:\d{2})\s+(.+)$/);
|
|
1045
|
+
let dateStr = '';
|
|
1046
|
+
if (match) {
|
|
1047
|
+
dateStr = `${match[6]} ${match[7]}`;
|
|
1048
|
+
}
|
|
1049
|
+
else {
|
|
1050
|
+
// busybox format: -rw-r--r-- 1 root root 123 Feb 11 09:54 file.txt
|
|
1051
|
+
match = line.match(/^([drwxlstSTrw\-\.]+)\s+(\d+)\s+(\S+)\s+(\S+)\s+(\d+)\s+(\w{3}\s+\d{1,2}\s+[\d:]+)\s+(.+)$/);
|
|
1052
|
+
if (match) {
|
|
1053
|
+
dateStr = match[6];
|
|
1054
|
+
// shift: busybox match has 7 groups (date is one field, name is match[7])
|
|
1055
|
+
match = [match[0], match[1], match[2], match[3], match[4], match[5], match[6], '', match[7]];
|
|
1056
|
+
}
|
|
1057
|
+
}
|
|
1058
|
+
if (!match)
|
|
1059
|
+
continue;
|
|
1060
|
+
const permissions = match[1];
|
|
1061
|
+
const owner = match[3];
|
|
1062
|
+
const group = match[4];
|
|
1063
|
+
const size = match[5];
|
|
1064
|
+
const rawName = match[8];
|
|
1065
|
+
const isDir = permissions.startsWith('d');
|
|
1066
|
+
const isLink = permissions.startsWith('l');
|
|
1067
|
+
let name = rawName;
|
|
1068
|
+
let linkTarget = '';
|
|
1069
|
+
// remove trailing / or @ or * from name (added by -F flag)
|
|
1070
|
+
if (isDir && name.endsWith('/'))
|
|
1071
|
+
name = name.slice(0, -1);
|
|
1072
|
+
if (name.endsWith('*'))
|
|
1073
|
+
name = name.slice(0, -1);
|
|
1074
|
+
if (name.endsWith('@'))
|
|
1075
|
+
name = name.slice(0, -1);
|
|
1076
|
+
// handle symlinks: name -> target
|
|
1077
|
+
if (isLink && name.includes(' -> ')) {
|
|
1078
|
+
const parts = name.split(' -> ');
|
|
1079
|
+
name = parts[0];
|
|
1080
|
+
linkTarget = parts[1];
|
|
1081
|
+
}
|
|
1082
|
+
// skip . and ..
|
|
1083
|
+
if (name === '.' || name === '..')
|
|
1084
|
+
continue;
|
|
1085
|
+
files.push({
|
|
1086
|
+
name,
|
|
1087
|
+
path: dirPath === '/' ? `/${name}` : `${dirPath}/${name}`,
|
|
1088
|
+
type: isDir ? 'directory' : isLink ? 'link' : 'file',
|
|
1089
|
+
size: parseInt(size),
|
|
1090
|
+
permissions,
|
|
1091
|
+
owner,
|
|
1092
|
+
group,
|
|
1093
|
+
modified: dateStr,
|
|
1094
|
+
linkTarget: linkTarget || undefined
|
|
1095
|
+
});
|
|
1096
|
+
}
|
|
1097
|
+
// sort: directories first, then alphabetical
|
|
1098
|
+
files.sort((a, b) => {
|
|
1099
|
+
if (a.type === 'directory' && b.type !== 'directory')
|
|
1100
|
+
return -1;
|
|
1101
|
+
if (a.type !== 'directory' && b.type === 'directory')
|
|
1102
|
+
return 1;
|
|
1103
|
+
return a.name.localeCompare(b.name);
|
|
1104
|
+
});
|
|
1105
|
+
return { path: dirPath, files };
|
|
1106
|
+
}
|
|
1107
|
+
catch (err) {
|
|
1108
|
+
logger.error({ err, deploymentId, dirPath }, 'failed to browse files');
|
|
1109
|
+
return { error: err.message, files: [] };
|
|
1110
|
+
}
|
|
1111
|
+
}
|
|
1112
|
+
// read a file from inside a container
|
|
1113
|
+
async readContainerFile(deploymentId, serviceName, filePath) {
|
|
1114
|
+
const execution = this.executions.get(deploymentId);
|
|
1115
|
+
if (!execution) {
|
|
1116
|
+
return { error: 'deployment not found' };
|
|
1117
|
+
}
|
|
1118
|
+
let containerId = execution.containers.get(serviceName);
|
|
1119
|
+
if (!containerId && execution.containers.size > 0) {
|
|
1120
|
+
containerId = execution.containers.entries().next().value[1];
|
|
1121
|
+
}
|
|
1122
|
+
if (!containerId) {
|
|
1123
|
+
return { error: 'no containers found' };
|
|
1124
|
+
}
|
|
1125
|
+
try {
|
|
1126
|
+
const container = this.docker.getContainer(containerId);
|
|
1127
|
+
// check file size first (reject files > 5MB)
|
|
1128
|
+
const statExec = await container.exec({
|
|
1129
|
+
Cmd: ['stat', '-c', '%s', filePath],
|
|
1130
|
+
AttachStdout: true,
|
|
1131
|
+
AttachStderr: true,
|
|
1132
|
+
});
|
|
1133
|
+
const statStream = await statExec.start({ hijack: true, stdin: false });
|
|
1134
|
+
const statOutput = await this.collectExecOutput(statStream);
|
|
1135
|
+
if (statOutput.stderr.includes('No such file')) {
|
|
1136
|
+
return { error: 'file not found' };
|
|
1137
|
+
}
|
|
1138
|
+
const fileSize = parseInt(statOutput.stdout.trim());
|
|
1139
|
+
if (fileSize > 5 * 1024 * 1024) {
|
|
1140
|
+
return { error: 'file too large (max 5MB)', size: fileSize };
|
|
1141
|
+
}
|
|
1142
|
+
// read the file content using base64 to handle binary safely
|
|
1143
|
+
const exec = await container.exec({
|
|
1144
|
+
Cmd: ['base64', filePath],
|
|
1145
|
+
AttachStdout: true,
|
|
1146
|
+
AttachStderr: true,
|
|
1147
|
+
});
|
|
1148
|
+
const stream = await exec.start({ hijack: true, stdin: false });
|
|
1149
|
+
const output = await this.collectExecOutput(stream);
|
|
1150
|
+
if (output.stderr && output.stderr.includes('No such file')) {
|
|
1151
|
+
return { error: 'file not found' };
|
|
1152
|
+
}
|
|
1153
|
+
const content = Buffer.from(output.stdout.replace(/\s/g, ''), 'base64').toString('utf8');
|
|
1154
|
+
// detect if binary (has null bytes or high ratio of non-printable chars)
|
|
1155
|
+
const nonPrintable = content.split('').filter(c => {
|
|
1156
|
+
const code = c.charCodeAt(0);
|
|
1157
|
+
return code < 32 && code !== 9 && code !== 10 && code !== 13;
|
|
1158
|
+
}).length;
|
|
1159
|
+
const isBinary = nonPrintable > content.length * 0.1;
|
|
1160
|
+
return {
|
|
1161
|
+
path: filePath,
|
|
1162
|
+
size: fileSize,
|
|
1163
|
+
content: isBinary ? undefined : content,
|
|
1164
|
+
binary: isBinary,
|
|
1165
|
+
encoding: isBinary ? 'base64' : 'utf8',
|
|
1166
|
+
rawBase64: isBinary ? output.stdout.replace(/\s/g, '') : undefined
|
|
1167
|
+
};
|
|
1168
|
+
}
|
|
1169
|
+
catch (err) {
|
|
1170
|
+
logger.error({ err, deploymentId, filePath }, 'failed to read container file');
|
|
1171
|
+
return { error: err.message };
|
|
1172
|
+
}
|
|
1173
|
+
}
|
|
1174
|
+
// upload a file into a running container
|
|
1175
|
+
async uploadFileToContainer(deploymentId, serviceName, filePath, content, encoding = 'utf8') {
|
|
1176
|
+
const execution = this.executions.get(deploymentId);
|
|
1177
|
+
if (!execution) {
|
|
1178
|
+
return { success: false, error: 'deployment not found' };
|
|
1179
|
+
}
|
|
1180
|
+
let containerId = execution.containers.get(serviceName);
|
|
1181
|
+
if (!containerId && execution.containers.size > 0) {
|
|
1182
|
+
containerId = execution.containers.entries().next().value[1];
|
|
1183
|
+
}
|
|
1184
|
+
if (!containerId) {
|
|
1185
|
+
return { success: false, error: 'no containers found' };
|
|
1186
|
+
}
|
|
1187
|
+
// validate path (prevent path traversal)
|
|
1188
|
+
if (filePath.includes('..') || !filePath.startsWith('/')) {
|
|
1189
|
+
return { success: false, error: 'invalid file path' };
|
|
1190
|
+
}
|
|
1191
|
+
try {
|
|
1192
|
+
const container = this.docker.getContainer(containerId);
|
|
1193
|
+
// ensure parent directory exists
|
|
1194
|
+
const parentDir = filePath.substring(0, filePath.lastIndexOf('/')) || '/';
|
|
1195
|
+
const mkdirExec = await container.exec({
|
|
1196
|
+
Cmd: ['mkdir', '-p', parentDir],
|
|
1197
|
+
AttachStdout: true,
|
|
1198
|
+
AttachStderr: true,
|
|
1199
|
+
});
|
|
1200
|
+
const mkdirStream = await mkdirExec.start({ hijack: true, stdin: false });
|
|
1201
|
+
await this.collectExecOutput(mkdirStream);
|
|
1202
|
+
// write file using base64 decode via shell
|
|
1203
|
+
const b64Content = encoding === 'base64' ? content : Buffer.from(content, 'utf8').toString('base64');
|
|
1204
|
+
const exec = await container.exec({
|
|
1205
|
+
Cmd: ['sh', '-c', `echo '${b64Content}' | base64 -d > ${filePath}`],
|
|
1206
|
+
AttachStdout: true,
|
|
1207
|
+
AttachStderr: true,
|
|
1208
|
+
});
|
|
1209
|
+
const stream = await exec.start({ hijack: true, stdin: false });
|
|
1210
|
+
const output = await this.collectExecOutput(stream);
|
|
1211
|
+
if (output.stderr && !output.stderr.includes('warning')) {
|
|
1212
|
+
return { success: false, error: output.stderr.trim() };
|
|
1213
|
+
}
|
|
1214
|
+
logger.info({ deploymentId, filePath }, 'file uploaded to container');
|
|
1215
|
+
return { success: true, path: filePath };
|
|
1216
|
+
}
|
|
1217
|
+
catch (err) {
|
|
1218
|
+
logger.error({ err, deploymentId, filePath }, 'failed to upload file to container');
|
|
1219
|
+
return { success: false, error: err.message };
|
|
1220
|
+
}
|
|
1221
|
+
}
|
|
1222
|
+
// collect output from a docker exec stream
|
|
1223
|
+
collectExecOutput(stream) {
|
|
1224
|
+
return new Promise((resolve) => {
|
|
1225
|
+
let stdout = '';
|
|
1226
|
+
let stderr = '';
|
|
1227
|
+
const timeout = setTimeout(() => {
|
|
1228
|
+
resolve({ stdout, stderr: stderr || 'command timed out' });
|
|
1229
|
+
}, 10000);
|
|
1230
|
+
stream.on('data', (chunk) => {
|
|
1231
|
+
// docker multiplexes: first 8 bytes are header
|
|
1232
|
+
// byte 0: stream type (1=stdout, 2=stderr)
|
|
1233
|
+
// bytes 4-7: payload size
|
|
1234
|
+
const data = chunk.toString('utf8');
|
|
1235
|
+
stdout += data;
|
|
1236
|
+
});
|
|
1237
|
+
stream.on('end', () => {
|
|
1238
|
+
clearTimeout(timeout);
|
|
1239
|
+
// strip docker header bytes if present
|
|
1240
|
+
const clean = stdout.replace(/[\x00-\x08]/g, '');
|
|
1241
|
+
resolve({ stdout: clean, stderr });
|
|
1242
|
+
});
|
|
1243
|
+
stream.on('error', (err) => {
|
|
1244
|
+
clearTimeout(timeout);
|
|
1245
|
+
resolve({ stdout, stderr: err.message });
|
|
1246
|
+
});
|
|
1247
|
+
});
|
|
1248
|
+
}
|
|
1249
|
+
// shell session tracking
|
|
1250
|
+
shellSessions = new Map();
|
|
1251
|
+
// start interactive shell session in container
|
|
1252
|
+
// returns { success: true } or { success: false, error: string }
|
|
1253
|
+
async startShellSession(sessionId, deploymentId, serviceName, onOutput) {
|
|
1254
|
+
const execution = this.executions.get(deploymentId);
|
|
1255
|
+
if (!execution) {
|
|
1256
|
+
logger.warn({ deploymentId, sessionId }, 'shell: deployment not found');
|
|
1257
|
+
return { success: false, error: 'deployment not found on this provider' };
|
|
1258
|
+
}
|
|
1259
|
+
// try requested service first, then fall back to first available service
|
|
1260
|
+
let containerId = execution.containers.get(serviceName);
|
|
1261
|
+
let actualServiceName = serviceName;
|
|
1262
|
+
if (!containerId && execution.containers.size > 0) {
|
|
1263
|
+
// fall back to first available service
|
|
1264
|
+
const firstEntry = execution.containers.entries().next().value;
|
|
1265
|
+
if (firstEntry) {
|
|
1266
|
+
actualServiceName = firstEntry[0];
|
|
1267
|
+
containerId = firstEntry[1];
|
|
1268
|
+
logger.info({ deploymentId, requestedService: serviceName, actualService: actualServiceName }, 'shell: using fallback service');
|
|
1269
|
+
}
|
|
1270
|
+
}
|
|
1271
|
+
if (!containerId) {
|
|
1272
|
+
logger.warn({ deploymentId, serviceName, sessionId, availableServices: Array.from(execution.containers.keys()) }, 'shell: no services found');
|
|
1273
|
+
return { success: false, error: 'no containers found for this service' };
|
|
1274
|
+
}
|
|
1275
|
+
try {
|
|
1276
|
+
const container = this.docker.getContainer(containerId);
|
|
1277
|
+
const info = await container.inspect();
|
|
1278
|
+
let execContainer = container;
|
|
1279
|
+
let debugContainer = null;
|
|
1280
|
+
if (info.State.Running) {
|
|
1281
|
+
// container is running, exec directly into it
|
|
1282
|
+
logger.info({ deploymentId, containerId }, 'shell: container running, attaching');
|
|
1283
|
+
}
|
|
1284
|
+
else {
|
|
1285
|
+
// container is stopped - try to start it
|
|
1286
|
+
logger.info({ deploymentId, containerId, state: info.State.Status }, 'shell: container not running, starting it');
|
|
1287
|
+
try {
|
|
1288
|
+
await container.start();
|
|
1289
|
+
}
|
|
1290
|
+
catch (startErr) {
|
|
1291
|
+
// ignore "already started" race
|
|
1292
|
+
if (!startErr.message?.includes('already started')) {
|
|
1293
|
+
logger.warn({ err: startErr, containerId }, 'shell: failed to start container');
|
|
1294
|
+
}
|
|
1295
|
+
}
|
|
1296
|
+
await new Promise(r => setTimeout(r, 1500));
|
|
1297
|
+
// check if it actually stayed running
|
|
1298
|
+
const recheck = await container.inspect();
|
|
1299
|
+
if (!recheck.State.Running) {
|
|
1300
|
+
// container exits immediately (e.g. node:20-alpine with no long-running process)
|
|
1301
|
+
// commit the stopped container to preserve its filesystem, then run with sleep
|
|
1302
|
+
logger.info({ deploymentId, containerId, image: info.Config.Image }, 'shell: container exits immediately, creating debug container from snapshot');
|
|
1303
|
+
const debugTag = `kova-debug:${containerId.slice(0, 12)}`;
|
|
1304
|
+
const debugName = `kova-debug-${sessionId.replace(/[^a-zA-Z0-9-]/g, '-').slice(0, 60)}`;
|
|
1305
|
+
// snapshot the stopped container's filesystem
|
|
1306
|
+
const commitResult = await container.commit({
|
|
1307
|
+
repo: 'kova-debug',
|
|
1308
|
+
tag: containerId.slice(0, 12),
|
|
1309
|
+
comment: 'debug shell snapshot'
|
|
1310
|
+
});
|
|
1311
|
+
logger.info({ debugTag, imageId: commitResult.Id }, 'shell: committed container snapshot');
|
|
1312
|
+
debugContainer = await this.docker.createContainer({
|
|
1313
|
+
name: debugName,
|
|
1314
|
+
Image: debugTag,
|
|
1315
|
+
Cmd: ['sh', '-c', 'trap "exit 0" TERM INT; while true; do sleep 1; done'],
|
|
1316
|
+
Tty: true,
|
|
1317
|
+
OpenStdin: true,
|
|
1318
|
+
WorkingDir: info.Config.WorkingDir || '/',
|
|
1319
|
+
Env: info.Config.Env || [],
|
|
1320
|
+
HostConfig: {
|
|
1321
|
+
NetworkMode: info.HostConfig.NetworkMode || 'bridge',
|
|
1322
|
+
Binds: info.HostConfig.Binds || [],
|
|
1323
|
+
AutoRemove: true
|
|
1324
|
+
},
|
|
1325
|
+
Labels: {
|
|
1326
|
+
'kova.deployment': deploymentId,
|
|
1327
|
+
'kova.service': serviceName,
|
|
1328
|
+
'kova.debug-shell': 'true'
|
|
1329
|
+
}
|
|
1330
|
+
});
|
|
1331
|
+
await debugContainer.start();
|
|
1332
|
+
execContainer = debugContainer;
|
|
1333
|
+
logger.info({ deploymentId, debugName }, 'shell: debug container started');
|
|
1334
|
+
}
|
|
1335
|
+
}
|
|
1336
|
+
// create exec instance for interactive shell
|
|
1337
|
+
const exec = await execContainer.exec({
|
|
1338
|
+
Cmd: ['/bin/sh'],
|
|
1339
|
+
AttachStdin: true,
|
|
1340
|
+
AttachStdout: true,
|
|
1341
|
+
AttachStderr: true,
|
|
1342
|
+
Tty: true
|
|
1343
|
+
});
|
|
1344
|
+
// start the exec and get stream
|
|
1345
|
+
const stream = await exec.start({
|
|
1346
|
+
hijack: true,
|
|
1347
|
+
stdin: true,
|
|
1348
|
+
Tty: true
|
|
1349
|
+
});
|
|
1350
|
+
// store session (including debug container + image ref for cleanup)
|
|
1351
|
+
this.shellSessions.set(sessionId, {
|
|
1352
|
+
exec,
|
|
1353
|
+
stream,
|
|
1354
|
+
deploymentId,
|
|
1355
|
+
serviceName,
|
|
1356
|
+
debugContainer,
|
|
1357
|
+
debugImageTag: debugContainer ? `kova-debug:${containerId.slice(0, 12)}` : undefined
|
|
1358
|
+
});
|
|
1359
|
+
// forward output to callback
|
|
1360
|
+
stream.on('data', (chunk) => {
|
|
1361
|
+
const output = chunk.toString('utf8');
|
|
1362
|
+
onOutput(output);
|
|
1363
|
+
});
|
|
1364
|
+
stream.on('end', () => {
|
|
1365
|
+
logger.info({ sessionId }, 'shell session stream ended');
|
|
1366
|
+
this.cleanupShellSession(sessionId);
|
|
1367
|
+
this.emit('shell-closed', { sessionId });
|
|
1368
|
+
});
|
|
1369
|
+
stream.on('error', (err) => {
|
|
1370
|
+
logger.error({ err, sessionId }, 'shell session stream error');
|
|
1371
|
+
this.cleanupShellSession(sessionId);
|
|
1372
|
+
});
|
|
1373
|
+
logger.info({ sessionId, deploymentId, serviceName, containerId, debug: !!debugContainer }, 'shell session started');
|
|
1374
|
+
return { success: true };
|
|
1375
|
+
}
|
|
1376
|
+
catch (err) {
|
|
1377
|
+
logger.error({ err, sessionId, deploymentId }, 'failed to start shell session');
|
|
1378
|
+
const msg = err.message || 'failed to start shell';
|
|
1379
|
+
if (msg.includes('is not running')) {
|
|
1380
|
+
return { success: false, error: 'container is not running - it may have crashed' };
|
|
1381
|
+
}
|
|
1382
|
+
if (msg.includes('No such image')) {
|
|
1383
|
+
return { success: false, error: 'container image not available locally' };
|
|
1384
|
+
}
|
|
1385
|
+
return { success: false, error: msg };
|
|
1386
|
+
}
|
|
1387
|
+
}
|
|
1388
|
+
// send input to shell session
|
|
1389
|
+
sendShellInput(sessionId, input) {
|
|
1390
|
+
const session = this.shellSessions.get(sessionId);
|
|
1391
|
+
if (!session) {
|
|
1392
|
+
logger.warn({ sessionId }, 'shell input: session not found');
|
|
1393
|
+
return false;
|
|
1394
|
+
}
|
|
1395
|
+
try {
|
|
1396
|
+
session.stream.write(input);
|
|
1397
|
+
return true;
|
|
1398
|
+
}
|
|
1399
|
+
catch (err) {
|
|
1400
|
+
logger.error({ err, sessionId }, 'failed to send shell input');
|
|
1401
|
+
return false;
|
|
1402
|
+
}
|
|
1403
|
+
}
|
|
1404
|
+
// resize shell terminal
|
|
1405
|
+
resizeShell(sessionId, cols, rows) {
|
|
1406
|
+
const session = this.shellSessions.get(sessionId);
|
|
1407
|
+
if (!session) {
|
|
1408
|
+
return false;
|
|
1409
|
+
}
|
|
1410
|
+
try {
|
|
1411
|
+
// resize the tty
|
|
1412
|
+
session.exec.resize({ h: rows, w: cols });
|
|
1413
|
+
return true;
|
|
1414
|
+
}
|
|
1415
|
+
catch (err) {
|
|
1416
|
+
logger.debug({ err, sessionId }, 'failed to resize shell');
|
|
1417
|
+
return false;
|
|
1418
|
+
}
|
|
1419
|
+
}
|
|
1420
|
+
// clean up a shell session and its debug container/image if any
|
|
1421
|
+
cleanupShellSession(sessionId) {
|
|
1422
|
+
const session = this.shellSessions.get(sessionId);
|
|
1423
|
+
if (!session)
|
|
1424
|
+
return;
|
|
1425
|
+
// stop debug container (AutoRemove will delete it)
|
|
1426
|
+
if (session.debugContainer) {
|
|
1427
|
+
session.debugContainer.stop({ t: 2 }).catch(() => {
|
|
1428
|
+
// ignore - may already be stopped
|
|
1429
|
+
});
|
|
1430
|
+
}
|
|
1431
|
+
// remove the committed snapshot image
|
|
1432
|
+
if (session.debugImageTag) {
|
|
1433
|
+
const img = this.docker.getImage(session.debugImageTag);
|
|
1434
|
+
img.remove({ force: true }).catch(() => {
|
|
1435
|
+
// ignore - best effort cleanup
|
|
1436
|
+
});
|
|
1437
|
+
}
|
|
1438
|
+
this.shellSessions.delete(sessionId);
|
|
1439
|
+
}
|
|
1440
|
+
// restart all containers in a deployment (stop then start)
|
|
1441
|
+
async restartDeployment(deploymentId) {
|
|
1442
|
+
const execution = this.executions.get(deploymentId);
|
|
1443
|
+
if (!execution) {
|
|
1444
|
+
throw new Error('deployment not found');
|
|
1445
|
+
}
|
|
1446
|
+
const restarted = [];
|
|
1447
|
+
for (const [serviceName, containerId] of execution.containers.entries()) {
|
|
1448
|
+
try {
|
|
1449
|
+
const container = this.docker.getContainer(containerId);
|
|
1450
|
+
await container.stop({ t: 10 });
|
|
1451
|
+
await container.start();
|
|
1452
|
+
restarted.push(serviceName);
|
|
1453
|
+
logger.info({ deploymentId, serviceName, containerId }, 'container restarted');
|
|
1454
|
+
}
|
|
1455
|
+
catch (err) {
|
|
1456
|
+
logger.error({ err, deploymentId, serviceName, containerId }, 'failed to restart container');
|
|
1457
|
+
}
|
|
1458
|
+
}
|
|
1459
|
+
return restarted;
|
|
1460
|
+
}
|
|
1461
|
+
// create a snapshot of a service's volume
|
|
1462
|
+
async createVolumeSnapshot(deploymentId, serviceName, snapshotId) {
|
|
1463
|
+
const execution = this.executions.get(deploymentId);
|
|
1464
|
+
if (!execution) {
|
|
1465
|
+
throw new Error('deployment not found');
|
|
1466
|
+
}
|
|
1467
|
+
// find the volume for this service
|
|
1468
|
+
const volumePrefix = `kova-${deploymentId}-${serviceName}-`;
|
|
1469
|
+
let volumeName = execution.volumes.find(v => v.startsWith(volumePrefix));
|
|
1470
|
+
if (!volumeName) {
|
|
1471
|
+
// check docker directly
|
|
1472
|
+
const volumes = await this.docker.listVolumes();
|
|
1473
|
+
const match = volumes.Volumes?.find(v => v.Name?.startsWith(volumePrefix));
|
|
1474
|
+
if (match) {
|
|
1475
|
+
volumeName = match.Name;
|
|
1476
|
+
}
|
|
1477
|
+
}
|
|
1478
|
+
if (!volumeName) {
|
|
1479
|
+
throw new Error(`no volume found for service ${serviceName}`);
|
|
1480
|
+
}
|
|
1481
|
+
const snapshotDir = '/var/kova/snapshots';
|
|
1482
|
+
const snapshotKey = `${snapshotId}.tar.gz`;
|
|
1483
|
+
// ensure snapshot directory exists on host
|
|
1484
|
+
const fs = await import('fs');
|
|
1485
|
+
if (!fs.existsSync(snapshotDir)) {
|
|
1486
|
+
fs.mkdirSync(snapshotDir, { recursive: true });
|
|
1487
|
+
}
|
|
1488
|
+
// create snapshot using a temporary alpine container
|
|
1489
|
+
await this.docker.run('alpine:latest', ['tar', 'czf', `/snapshots/${snapshotKey}`, '-C', '/data', '.'], process.stdout, {
|
|
1490
|
+
HostConfig: {
|
|
1491
|
+
Binds: [
|
|
1492
|
+
`${volumeName}:/data:ro`,
|
|
1493
|
+
`${snapshotDir}:/snapshots`
|
|
1494
|
+
],
|
|
1495
|
+
AutoRemove: true
|
|
1496
|
+
}
|
|
1497
|
+
});
|
|
1498
|
+
// get snapshot file size
|
|
1499
|
+
const snapshotPath = `${snapshotDir}/${snapshotKey}`;
|
|
1500
|
+
const stat = fs.statSync(snapshotPath);
|
|
1501
|
+
logger.info({ deploymentId, serviceName, volumeName, snapshotId, sizeBytes: stat.size }, 'volume snapshot created');
|
|
1502
|
+
return {
|
|
1503
|
+
volumeName,
|
|
1504
|
+
sizeBytes: stat.size,
|
|
1505
|
+
snapshotKey
|
|
1506
|
+
};
|
|
1507
|
+
}
|
|
1508
|
+
// restore a service's volume from a snapshot
|
|
1509
|
+
async restoreVolumeSnapshot(deploymentId, serviceName, snapshotKey) {
|
|
1510
|
+
const execution = this.executions.get(deploymentId);
|
|
1511
|
+
if (!execution) {
|
|
1512
|
+
throw new Error('deployment not found');
|
|
1513
|
+
}
|
|
1514
|
+
// find the volume for this service
|
|
1515
|
+
const volumePrefix = `kova-${deploymentId}-${serviceName}-`;
|
|
1516
|
+
let volumeName = execution.volumes.find(v => v.startsWith(volumePrefix));
|
|
1517
|
+
if (!volumeName) {
|
|
1518
|
+
const volumes = await this.docker.listVolumes();
|
|
1519
|
+
const match = volumes.Volumes?.find(v => v.Name?.startsWith(volumePrefix));
|
|
1520
|
+
if (match) {
|
|
1521
|
+
volumeName = match.Name;
|
|
1522
|
+
}
|
|
1523
|
+
}
|
|
1524
|
+
if (!volumeName) {
|
|
1525
|
+
throw new Error(`no volume found for service ${serviceName}`);
|
|
1526
|
+
}
|
|
1527
|
+
const snapshotDir = '/var/kova/snapshots';
|
|
1528
|
+
// clear existing volume data
|
|
1529
|
+
await this.docker.run('alpine:latest', ['sh', '-c', 'rm -rf /data/*'], process.stdout, {
|
|
1530
|
+
HostConfig: {
|
|
1531
|
+
Binds: [`${volumeName}:/data`],
|
|
1532
|
+
AutoRemove: true
|
|
1533
|
+
}
|
|
1534
|
+
});
|
|
1535
|
+
// restore from snapshot
|
|
1536
|
+
await this.docker.run('alpine:latest', ['tar', 'xzf', `/snapshots/${snapshotKey}`, '-C', '/data'], process.stdout, {
|
|
1537
|
+
HostConfig: {
|
|
1538
|
+
Binds: [
|
|
1539
|
+
`${volumeName}:/data`,
|
|
1540
|
+
`${snapshotDir}:/snapshots:ro`
|
|
1541
|
+
],
|
|
1542
|
+
AutoRemove: true
|
|
1543
|
+
}
|
|
1544
|
+
});
|
|
1545
|
+
logger.info({ deploymentId, serviceName, volumeName, snapshotKey }, 'volume snapshot restored');
|
|
1546
|
+
}
|
|
1547
|
+
// close shell session
|
|
1548
|
+
closeShellSession(sessionId) {
|
|
1549
|
+
const session = this.shellSessions.get(sessionId);
|
|
1550
|
+
if (!session) {
|
|
1551
|
+
return;
|
|
1552
|
+
}
|
|
1553
|
+
try {
|
|
1554
|
+
session.stream.end();
|
|
1555
|
+
}
|
|
1556
|
+
catch (err) {
|
|
1557
|
+
// ignore
|
|
1558
|
+
}
|
|
1559
|
+
this.cleanupShellSession(sessionId);
|
|
1560
|
+
logger.info({ sessionId }, 'shell session closed');
|
|
1561
|
+
}
|
|
1562
|
+
}
|