kova-node-cli 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +138 -0
- package/bin/cli.js +2 -0
- package/dist/__tests__/auto-bidder.test.js +267 -0
- package/dist/__tests__/container-manager.test.js +189 -0
- package/dist/__tests__/deployment-executor.test.js +332 -0
- package/dist/__tests__/heartbeat.test.js +191 -0
- package/dist/__tests__/lease-handler.test.js +268 -0
- package/dist/__tests__/resource-limits.test.js +164 -0
- package/dist/api/server.js +607 -0
- package/dist/cli.js +47 -0
- package/dist/commands/deploy.js +568 -0
- package/dist/commands/earnings.js +70 -0
- package/dist/commands/start.js +358 -0
- package/dist/commands/status.js +50 -0
- package/dist/commands/stop.js +101 -0
- package/dist/lib/client.js +87 -0
- package/dist/lib/config.js +107 -0
- package/dist/lib/docker.js +415 -0
- package/dist/lib/logger.js +12 -0
- package/dist/lib/message-signer.js +93 -0
- package/dist/lib/monitor.js +105 -0
- package/dist/lib/p2p.js +186 -0
- package/dist/lib/resource-limits.js +84 -0
- package/dist/lib/state.js +113 -0
- package/dist/lib/types.js +2 -0
- package/dist/lib/usage-meter.js +63 -0
- package/dist/services/auto-bidder.js +332 -0
- package/dist/services/container-manager.js +282 -0
- package/dist/services/deployment-executor.js +1562 -0
- package/dist/services/heartbeat.js +110 -0
- package/dist/services/job-handler.js +241 -0
- package/dist/services/lease-handler.js +382 -0
- package/package.json +51 -0
|
@@ -0,0 +1,332 @@
|
|
|
1
|
+
// auto bidder - evaluates orders and submits competitive bids
|
|
2
|
+
// pricing strategy, resource matching, bid submission
|
|
3
|
+
import { logger } from '../lib/logger.js';
|
|
4
|
+
export class AutoBidder {
|
|
5
|
+
config;
|
|
6
|
+
monitor;
|
|
7
|
+
pollingInterval = null;
|
|
8
|
+
submittedBids = new Map(); // orderId -> timestamp
|
|
9
|
+
// dynamic pricing state
|
|
10
|
+
bidHistory = [];
|
|
11
|
+
dynamicPriceMultiplier = 1.0;
|
|
12
|
+
lastPriceAdjustment = 0;
|
|
13
|
+
constructor(config, monitor) {
|
|
14
|
+
this.config = config;
|
|
15
|
+
this.monitor = monitor;
|
|
16
|
+
}
|
|
17
|
+
// start polling for orders
|
|
18
|
+
start(intervalMs = 15000) {
|
|
19
|
+
if (this.pollingInterval) {
|
|
20
|
+
logger.warn('auto-bidder already running');
|
|
21
|
+
return;
|
|
22
|
+
}
|
|
23
|
+
logger.info({ intervalMs }, 'starting auto-bidder');
|
|
24
|
+
this.pollingInterval = setInterval(async () => {
|
|
25
|
+
try {
|
|
26
|
+
await this.pollAndBid();
|
|
27
|
+
}
|
|
28
|
+
catch (err) {
|
|
29
|
+
logger.error({ err }, 'auto-bidder error');
|
|
30
|
+
}
|
|
31
|
+
}, intervalMs);
|
|
32
|
+
// run immediately on start
|
|
33
|
+
this.pollAndBid();
|
|
34
|
+
}
|
|
35
|
+
stop() {
|
|
36
|
+
if (this.pollingInterval) {
|
|
37
|
+
clearInterval(this.pollingInterval);
|
|
38
|
+
this.pollingInterval = null;
|
|
39
|
+
logger.info('auto-bidder stopped');
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
// poll for orders and submit bids
|
|
43
|
+
async pollAndBid() {
|
|
44
|
+
try {
|
|
45
|
+
// fetch open orders from orchestrator
|
|
46
|
+
const response = await fetch(`${this.config.orchestratorUrl}/api/v1/provider/orders`, {
|
|
47
|
+
headers: {
|
|
48
|
+
'Authorization': `Bearer ${this.config.apiKey}`
|
|
49
|
+
}
|
|
50
|
+
});
|
|
51
|
+
if (!response.ok) {
|
|
52
|
+
const error = await response.text();
|
|
53
|
+
logger.error({ status: response.status, error }, 'failed to fetch orders from orchestrator');
|
|
54
|
+
return;
|
|
55
|
+
}
|
|
56
|
+
const data = await response.json();
|
|
57
|
+
const orders = data.orders || [];
|
|
58
|
+
if (orders.length === 0) {
|
|
59
|
+
logger.debug('no open orders available');
|
|
60
|
+
return;
|
|
61
|
+
}
|
|
62
|
+
logger.info({ count: orders.length }, 'found open orders - evaluating for bidding');
|
|
63
|
+
// log the order ids and timestamps for debugging
|
|
64
|
+
for (const order of orders) {
|
|
65
|
+
// order id format: userId-timestamp-serviceIndex
|
|
66
|
+
// e.g. 9d4a6656-02c8-4e16-a4c3-910efe92e7e2-1763050326342-1
|
|
67
|
+
const parts = order.id.split('-');
|
|
68
|
+
const orderTimestamp = parseInt(parts[parts.length - 2] || '0');
|
|
69
|
+
const age = Date.now() - orderTimestamp;
|
|
70
|
+
const ageInHours = Math.floor(age / (1000 * 60 * 60));
|
|
71
|
+
logger.debug({
|
|
72
|
+
orderId: order.id,
|
|
73
|
+
timestamp: orderTimestamp,
|
|
74
|
+
ageInHours,
|
|
75
|
+
alreadyBid: this.submittedBids.has(order.id),
|
|
76
|
+
}, 'order details');
|
|
77
|
+
}
|
|
78
|
+
// evaluate each order
|
|
79
|
+
for (const order of orders) {
|
|
80
|
+
await this.evaluateAndBid(order);
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
catch (err) {
|
|
84
|
+
logger.debug({ err }, 'failed to poll orders');
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
// evaluate order and submit bid if suitable
|
|
88
|
+
async evaluateAndBid(order) {
|
|
89
|
+
// skip if we already bid on this order
|
|
90
|
+
if (this.submittedBids.has(order.id)) {
|
|
91
|
+
logger.info({ orderId: order.id }, 'skipping - already bid in this session');
|
|
92
|
+
return;
|
|
93
|
+
}
|
|
94
|
+
// evict stale bids to prevent unbounded growth
|
|
95
|
+
if (this.submittedBids.size > 5000) {
|
|
96
|
+
const cutoff = Date.now() - 24 * 60 * 60 * 1000;
|
|
97
|
+
for (const [id, ts] of this.submittedBids) {
|
|
98
|
+
if (ts < cutoff)
|
|
99
|
+
this.submittedBids.delete(id);
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
// skip old orders (more than 7 days)
|
|
103
|
+
// order id format: userId-timestamp-serviceIndex
|
|
104
|
+
const parts = order.id.split('-');
|
|
105
|
+
const orderTimestamp = parseInt(parts[parts.length - 2] || '0');
|
|
106
|
+
const now = Date.now();
|
|
107
|
+
const sevenDaysAgo = now - (7 * 24 * 60 * 60 * 1000);
|
|
108
|
+
if (orderTimestamp < sevenDaysAgo) {
|
|
109
|
+
logger.info({ orderId: order.id, ageInDays: Math.floor((now - orderTimestamp) / (1000 * 60 * 60 * 24)) }, 'skipping - order too old');
|
|
110
|
+
return;
|
|
111
|
+
}
|
|
112
|
+
logger.info({ orderId: order.id }, 'evaluating order');
|
|
113
|
+
// check if we can handle this order
|
|
114
|
+
const canHandle = await this.canHandleOrder(order);
|
|
115
|
+
if (!canHandle) {
|
|
116
|
+
logger.info({ orderId: order.id, required: order.resources }, 'cannot handle order - insufficient resources');
|
|
117
|
+
return;
|
|
118
|
+
}
|
|
119
|
+
// calculate our bid price
|
|
120
|
+
const ourPrice = this.calculateBidPrice(order);
|
|
121
|
+
// validate price is a valid number
|
|
122
|
+
if (isNaN(ourPrice) || !isFinite(ourPrice) || ourPrice <= 0) {
|
|
123
|
+
logger.error({ orderId: order.id, ourPrice, order: order.resources }, 'calculated invalid price');
|
|
124
|
+
return;
|
|
125
|
+
}
|
|
126
|
+
// check if our price is competitive
|
|
127
|
+
if (ourPrice > order.maxPricePerBlock) {
|
|
128
|
+
logger.info({ orderId: order.id, ourPrice, maxPrice: order.maxPricePerBlock }, 'our price too high');
|
|
129
|
+
return;
|
|
130
|
+
}
|
|
131
|
+
logger.info({ orderId: order.id, ourPrice, maxPrice: order.maxPricePerBlock, nodeId: this.config.nodeId }, 'submitting bid');
|
|
132
|
+
// submit bid
|
|
133
|
+
try {
|
|
134
|
+
await this.submitBid(order.id, ourPrice);
|
|
135
|
+
// only add to set after successful bid
|
|
136
|
+
this.submittedBids.set(order.id, Date.now());
|
|
137
|
+
logger.info({ orderId: order.id }, 'bid successful');
|
|
138
|
+
}
|
|
139
|
+
catch (err) {
|
|
140
|
+
if (err.message === 'already bid') {
|
|
141
|
+
// we already bid on this in a previous run, remember it silently
|
|
142
|
+
this.submittedBids.set(order.id, Date.now());
|
|
143
|
+
return;
|
|
144
|
+
}
|
|
145
|
+
logger.error({ err, orderId: order.id }, 'failed to submit bid');
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
// check if we have resources for order
|
|
149
|
+
async canHandleOrder(order) {
|
|
150
|
+
const resources = await this.monitor.getAvailableResources();
|
|
151
|
+
// check cpu
|
|
152
|
+
const requiredCpu = order.resources.cpu;
|
|
153
|
+
if (resources.cpu.available < requiredCpu) {
|
|
154
|
+
logger.info({
|
|
155
|
+
orderId: order.id,
|
|
156
|
+
required: requiredCpu,
|
|
157
|
+
available: resources.cpu.available
|
|
158
|
+
}, 'insufficient cpu');
|
|
159
|
+
return false;
|
|
160
|
+
}
|
|
161
|
+
// check memory (convert to GB)
|
|
162
|
+
const requiredMemory = this.parseMemoryToGb(order.resources.memory);
|
|
163
|
+
if (resources.memory.available < requiredMemory) {
|
|
164
|
+
logger.info({
|
|
165
|
+
orderId: order.id,
|
|
166
|
+
requiredMemory,
|
|
167
|
+
availableMemory: resources.memory.available
|
|
168
|
+
}, 'insufficient memory');
|
|
169
|
+
return false;
|
|
170
|
+
}
|
|
171
|
+
// check gpu if required
|
|
172
|
+
if (order.resources.gpu && order.resources.gpu.units > 0) {
|
|
173
|
+
const requiredGpuUnits = order.resources.gpu.units;
|
|
174
|
+
const requiredVendor = order.resources.gpu.attributes?.vendor;
|
|
175
|
+
const requiredRam = order.resources.gpu.attributes?.ram;
|
|
176
|
+
const availableGpus = resources.gpu || [];
|
|
177
|
+
if (availableGpus.length < requiredGpuUnits) {
|
|
178
|
+
logger.info({
|
|
179
|
+
orderId: order.id,
|
|
180
|
+
requiredGpuUnits,
|
|
181
|
+
availableGpus: availableGpus.length
|
|
182
|
+
}, 'insufficient gpu count');
|
|
183
|
+
return false;
|
|
184
|
+
}
|
|
185
|
+
// check vendor requirement if specified
|
|
186
|
+
if (requiredVendor) {
|
|
187
|
+
const vendorKey = Object.keys(requiredVendor)[0]?.toLowerCase();
|
|
188
|
+
const matchingGpus = availableGpus.filter((g) => g.vendor.includes(vendorKey));
|
|
189
|
+
if (matchingGpus.length < requiredGpuUnits) {
|
|
190
|
+
logger.info({
|
|
191
|
+
orderId: order.id,
|
|
192
|
+
requiredVendor: vendorKey,
|
|
193
|
+
matchingGpus: matchingGpus.length
|
|
194
|
+
}, 'no matching gpu vendor');
|
|
195
|
+
return false;
|
|
196
|
+
}
|
|
197
|
+
// check vram requirement if specified
|
|
198
|
+
if (requiredRam) {
|
|
199
|
+
const requiredVram = this.parseMemoryToGb(requiredRam);
|
|
200
|
+
const sufficientGpus = matchingGpus.filter((g) => g.vram >= requiredVram);
|
|
201
|
+
if (sufficientGpus.length < requiredGpuUnits) {
|
|
202
|
+
logger.info({
|
|
203
|
+
orderId: order.id,
|
|
204
|
+
requiredVram,
|
|
205
|
+
availableVram: matchingGpus.map((g) => g.vram)
|
|
206
|
+
}, 'insufficient gpu vram');
|
|
207
|
+
return false;
|
|
208
|
+
}
|
|
209
|
+
}
|
|
210
|
+
}
|
|
211
|
+
}
|
|
212
|
+
return true;
|
|
213
|
+
}
|
|
214
|
+
// adjust dynamic pricing based on bid history and utilization
|
|
215
|
+
adjustDynamicPricing() {
|
|
216
|
+
const now = Date.now();
|
|
217
|
+
const fiveMinutes = 5 * 60 * 1000;
|
|
218
|
+
// only adjust every 5 minutes
|
|
219
|
+
if (now - this.lastPriceAdjustment < fiveMinutes)
|
|
220
|
+
return;
|
|
221
|
+
this.lastPriceAdjustment = now;
|
|
222
|
+
// prune old history (keep last hour)
|
|
223
|
+
const oneHourAgo = now - 60 * 60 * 1000;
|
|
224
|
+
this.bidHistory = this.bidHistory.filter(b => b.timestamp > oneHourAgo);
|
|
225
|
+
if (this.bidHistory.length < 3)
|
|
226
|
+
return; // not enough data
|
|
227
|
+
// calculate win rate
|
|
228
|
+
const totalBids = this.bidHistory.length;
|
|
229
|
+
const wonBids = this.bidHistory.filter(b => b.won).length;
|
|
230
|
+
const winRate = wonBids / totalBids;
|
|
231
|
+
// adjust multiplier based on win rate
|
|
232
|
+
if (winRate > 0.7) {
|
|
233
|
+
// winning too many - we can raise prices
|
|
234
|
+
this.dynamicPriceMultiplier = Math.min(this.dynamicPriceMultiplier * 1.05, 2.0);
|
|
235
|
+
logger.info({ winRate, multiplier: this.dynamicPriceMultiplier }, 'raising prices - high win rate');
|
|
236
|
+
}
|
|
237
|
+
else if (winRate < 0.2) {
|
|
238
|
+
// losing too many - lower prices to be more competitive
|
|
239
|
+
this.dynamicPriceMultiplier = Math.max(this.dynamicPriceMultiplier * 0.95, 0.5);
|
|
240
|
+
logger.info({ winRate, multiplier: this.dynamicPriceMultiplier }, 'lowering prices - low win rate');
|
|
241
|
+
}
|
|
242
|
+
}
|
|
243
|
+
// record a bid result for dynamic pricing
|
|
244
|
+
recordBidResult(orderId, won) {
|
|
245
|
+
const bid = this.bidHistory.find(b => b.orderId === orderId);
|
|
246
|
+
if (bid) {
|
|
247
|
+
bid.won = won;
|
|
248
|
+
}
|
|
249
|
+
}
|
|
250
|
+
// calculate competitive bid price with dynamic adjustment
|
|
251
|
+
calculateBidPrice(order) {
|
|
252
|
+
// adjust pricing periodically based on history
|
|
253
|
+
this.adjustDynamicPricing();
|
|
254
|
+
const cpu = order.resources.cpu;
|
|
255
|
+
const memory = this.parseMemoryToGb(order.resources.memory);
|
|
256
|
+
// base cost
|
|
257
|
+
const cpuCost = cpu * this.config.pricingStrategy.cpuPricePerCore;
|
|
258
|
+
const memoryCost = memory * this.config.pricingStrategy.memoryPricePerGb;
|
|
259
|
+
// gpu cost if required
|
|
260
|
+
let gpuCost = 0;
|
|
261
|
+
if (order.resources.gpu && order.resources.gpu.units > 0) {
|
|
262
|
+
const gpuPricePerUnit = this.config.pricingStrategy.gpuPricePerUnit || 0.1;
|
|
263
|
+
gpuCost = order.resources.gpu.units * gpuPricePerUnit;
|
|
264
|
+
}
|
|
265
|
+
const baseCost = cpuCost + memoryCost + gpuCost;
|
|
266
|
+
// apply margin and dynamic multiplier
|
|
267
|
+
let price = baseCost * this.config.pricingStrategy.margin * this.dynamicPriceMultiplier;
|
|
268
|
+
// factor in utilization - higher util = higher prices
|
|
269
|
+
this.monitor.getAvailableResources().then(resources => {
|
|
270
|
+
const cpuUtil = 1 - (resources.cpu.available / resources.cpu.cores);
|
|
271
|
+
if (cpuUtil > 0.8) {
|
|
272
|
+
// when heavily utilized, charge a premium
|
|
273
|
+
price *= 1.15;
|
|
274
|
+
}
|
|
275
|
+
}).catch(() => { });
|
|
276
|
+
// minimum bid to ensure non-zero pricing
|
|
277
|
+
if (price < 0.001) {
|
|
278
|
+
price = 0.001;
|
|
279
|
+
}
|
|
280
|
+
// track this bid for win rate calculations
|
|
281
|
+
this.bidHistory.push({ price, won: false, timestamp: Date.now() });
|
|
282
|
+
// round to 4 decimals for precision
|
|
283
|
+
return Math.round(price * 10000) / 10000;
|
|
284
|
+
}
|
|
285
|
+
// submit bid to orchestrator
|
|
286
|
+
async submitBid(orderId, pricePerBlock) {
|
|
287
|
+
const bidData = {
|
|
288
|
+
orderId,
|
|
289
|
+
nodeId: this.config.nodeId,
|
|
290
|
+
pricePerBlock
|
|
291
|
+
};
|
|
292
|
+
logger.debug({ bidData }, 'submitting bid');
|
|
293
|
+
const response = await fetch(`${this.config.orchestratorUrl}/api/v1/provider/bids`, {
|
|
294
|
+
method: 'POST',
|
|
295
|
+
headers: {
|
|
296
|
+
'Content-Type': 'application/json',
|
|
297
|
+
'Authorization': `Bearer ${this.config.apiKey}`
|
|
298
|
+
},
|
|
299
|
+
body: JSON.stringify(bidData)
|
|
300
|
+
});
|
|
301
|
+
if (response.ok) {
|
|
302
|
+
const data = await response.json();
|
|
303
|
+
logger.info({ orderId, pricePerBlock, bidId: data.bid.id }, 'bid submitted');
|
|
304
|
+
}
|
|
305
|
+
else {
|
|
306
|
+
const error = await response.json();
|
|
307
|
+
// check if we already bid on this order
|
|
308
|
+
if (error.message?.includes('already bid')) {
|
|
309
|
+
// throw this specific error so evaluateAndBid can track it
|
|
310
|
+
throw new Error('already bid');
|
|
311
|
+
}
|
|
312
|
+
// only log other errors
|
|
313
|
+
logger.error({ orderId, status: response.status, error }, 'bid api error');
|
|
314
|
+
throw new Error(error.message || error.error || 'bid submission failed');
|
|
315
|
+
}
|
|
316
|
+
}
|
|
317
|
+
// parse memory size to GB
|
|
318
|
+
parseMemoryToGb(size) {
|
|
319
|
+
const match = size.match(/^(\d+(?:\.\d+)?)\s*([A-Za-z]+)$/);
|
|
320
|
+
if (!match)
|
|
321
|
+
return 0;
|
|
322
|
+
const value = parseFloat(match[1]);
|
|
323
|
+
const unit = match[2];
|
|
324
|
+
const multipliers = {
|
|
325
|
+
'Ki': 1 / (1024 * 1024),
|
|
326
|
+
'Mi': 1 / 1024,
|
|
327
|
+
'Gi': 1,
|
|
328
|
+
'Ti': 1024
|
|
329
|
+
};
|
|
330
|
+
return value * (multipliers[unit] || 1);
|
|
331
|
+
}
|
|
332
|
+
}
|
|
@@ -0,0 +1,282 @@
|
|
|
1
|
+
import { DockerManager } from '../lib/docker.js';
|
|
2
|
+
import { logger } from '../lib/logger.js';
|
|
3
|
+
import { EventEmitter } from 'events';
|
|
4
|
+
export class ContainerManager extends EventEmitter {
|
|
5
|
+
docker;
|
|
6
|
+
containers = new Map();
|
|
7
|
+
monitorInterval = null;
|
|
8
|
+
constructor() {
|
|
9
|
+
super();
|
|
10
|
+
this.docker = new DockerManager();
|
|
11
|
+
}
|
|
12
|
+
async start() {
|
|
13
|
+
const ready = await this.docker.checkDocker();
|
|
14
|
+
if (!ready) {
|
|
15
|
+
throw new Error('docker not available');
|
|
16
|
+
}
|
|
17
|
+
// monitor containers every 10 seconds
|
|
18
|
+
this.monitorInterval = setInterval(() => {
|
|
19
|
+
this.monitorContainers();
|
|
20
|
+
}, 10000);
|
|
21
|
+
}
|
|
22
|
+
async stop() {
|
|
23
|
+
if (this.monitorInterval) {
|
|
24
|
+
clearInterval(this.monitorInterval);
|
|
25
|
+
}
|
|
26
|
+
// stop all running containers
|
|
27
|
+
for (const [jobId, container] of this.containers) {
|
|
28
|
+
await this.stopContainer(jobId);
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
async runJob(job) {
|
|
32
|
+
logger.info({ jobId: job.id }, 'starting container for job');
|
|
33
|
+
try {
|
|
34
|
+
// pull image if needed
|
|
35
|
+
await this.docker.pullImage(job.image);
|
|
36
|
+
// create container with security settings
|
|
37
|
+
const container = await this.docker.createContainer({
|
|
38
|
+
jobId: job.id,
|
|
39
|
+
image: job.image,
|
|
40
|
+
memory: job.resources.memory * 1024, // gb to mb
|
|
41
|
+
cpus: job.resources.cpu,
|
|
42
|
+
disk: job.resources.disk,
|
|
43
|
+
env: job.env
|
|
44
|
+
});
|
|
45
|
+
const containerId = container.id;
|
|
46
|
+
// track it with health check config from manifest
|
|
47
|
+
const healthCheck = job.healthCheck;
|
|
48
|
+
const containerInfo = {
|
|
49
|
+
jobId: job.id,
|
|
50
|
+
containerId,
|
|
51
|
+
startTime: Date.now(),
|
|
52
|
+
resources: job.resources,
|
|
53
|
+
status: 'running',
|
|
54
|
+
healthCheck,
|
|
55
|
+
healthStatus: 'unknown',
|
|
56
|
+
healthFailures: 0,
|
|
57
|
+
timeout: job.duration
|
|
58
|
+
};
|
|
59
|
+
// set up timeout if duration specified and > 0
|
|
60
|
+
if (job.duration && job.duration > 0) {
|
|
61
|
+
containerInfo.timeoutTimer = setTimeout(async () => {
|
|
62
|
+
logger.warn({ jobId: job.id, duration: job.duration }, 'job timeout reached, stopping container');
|
|
63
|
+
this.emit('container-timeout', { jobId: job.id, duration: job.duration });
|
|
64
|
+
await this.stopContainer(job.id);
|
|
65
|
+
}, job.duration * 1000);
|
|
66
|
+
}
|
|
67
|
+
this.containers.set(job.id, containerInfo);
|
|
68
|
+
this.emit('container-started', { jobId: job.id, containerId });
|
|
69
|
+
// wait for container to finish or timeout
|
|
70
|
+
this.watchContainer(job.id, container);
|
|
71
|
+
return containerId;
|
|
72
|
+
}
|
|
73
|
+
catch (err) {
|
|
74
|
+
logger.error({ err, jobId: job.id }, 'failed to start container');
|
|
75
|
+
this.emit('container-failed', { jobId: job.id, error: err });
|
|
76
|
+
throw err;
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
async watchContainer(jobId, container) {
|
|
80
|
+
try {
|
|
81
|
+
// wait for container to exit
|
|
82
|
+
const data = await container.wait();
|
|
83
|
+
const exitCode = data.StatusCode;
|
|
84
|
+
logger.info({ jobId, exitCode }, 'container exited');
|
|
85
|
+
// SECURITY: Do NOT collect customer container logs
|
|
86
|
+
// Node operators should not have access to customer data/secrets
|
|
87
|
+
// Logs may contain API keys, passwords, sensitive data
|
|
88
|
+
// If customers need logs, they should retrieve via encrypted channel
|
|
89
|
+
this.emit('container-finished', {
|
|
90
|
+
jobId,
|
|
91
|
+
exitCode,
|
|
92
|
+
// logs intentionally omitted for security/privacy
|
|
93
|
+
logs: '[logs redacted for customer privacy - exit code: ' + exitCode + ']'
|
|
94
|
+
});
|
|
95
|
+
// cleanup
|
|
96
|
+
await this.docker.cleanupContainer(container.id);
|
|
97
|
+
this.containers.delete(jobId);
|
|
98
|
+
}
|
|
99
|
+
catch (err) {
|
|
100
|
+
logger.error({ err, jobId }, 'error watching container');
|
|
101
|
+
this.emit('container-failed', { jobId, error: err });
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
async stopContainer(jobId) {
|
|
105
|
+
const info = this.containers.get(jobId);
|
|
106
|
+
if (!info)
|
|
107
|
+
return;
|
|
108
|
+
info.status = 'stopping';
|
|
109
|
+
// clear timeout timer if set
|
|
110
|
+
if (info.timeoutTimer) {
|
|
111
|
+
clearTimeout(info.timeoutTimer);
|
|
112
|
+
info.timeoutTimer = undefined;
|
|
113
|
+
}
|
|
114
|
+
try {
|
|
115
|
+
await this.docker.cleanupContainer(info.containerId);
|
|
116
|
+
this.containers.delete(jobId);
|
|
117
|
+
this.emit('container-stopped', { jobId });
|
|
118
|
+
}
|
|
119
|
+
catch (err) {
|
|
120
|
+
logger.error({ err, jobId }, 'failed to stop container');
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
async monitorContainers() {
|
|
124
|
+
for (const [jobId, info] of this.containers) {
|
|
125
|
+
if (info.status !== 'running')
|
|
126
|
+
continue;
|
|
127
|
+
try {
|
|
128
|
+
const stats = await this.docker.getContainerStats(info.containerId);
|
|
129
|
+
this.emit('container-stats', {
|
|
130
|
+
jobId,
|
|
131
|
+
stats,
|
|
132
|
+
uptime: Date.now() - info.startTime,
|
|
133
|
+
healthStatus: info.healthStatus
|
|
134
|
+
});
|
|
135
|
+
// check resource limits
|
|
136
|
+
if (stats.memory > info.resources.memory * 1024 * 0.95) {
|
|
137
|
+
logger.warn({ jobId, memory: stats.memory }, 'container near memory limit');
|
|
138
|
+
}
|
|
139
|
+
// run health check if configured
|
|
140
|
+
if (info.healthCheck) {
|
|
141
|
+
const interval = (info.healthCheck.interval || 30) * 1000;
|
|
142
|
+
const lastCheck = info.lastHealthCheck || 0;
|
|
143
|
+
if (Date.now() - lastCheck >= interval) {
|
|
144
|
+
await this.runHealthCheck(jobId, info);
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
catch (err) {
|
|
149
|
+
// container probably died
|
|
150
|
+
logger.warn({ jobId, err }, 'failed to get container stats');
|
|
151
|
+
info.status = 'stopped';
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
// run a health check on a container
|
|
156
|
+
async runHealthCheck(jobId, info) {
|
|
157
|
+
if (!info.healthCheck)
|
|
158
|
+
return;
|
|
159
|
+
const { type, port, path, command, timeout = 5, retries = 3 } = info.healthCheck;
|
|
160
|
+
info.lastHealthCheck = Date.now();
|
|
161
|
+
let healthy = false;
|
|
162
|
+
try {
|
|
163
|
+
if (type === 'exec' && command) {
|
|
164
|
+
// run command inside container
|
|
165
|
+
const result = await this.docker.execCommand(info.containerId, command);
|
|
166
|
+
healthy = result.exitCode === 0;
|
|
167
|
+
logger.debug({ jobId, exitCode: result.exitCode }, 'health check exec completed');
|
|
168
|
+
}
|
|
169
|
+
else if (type === 'http' && port) {
|
|
170
|
+
// http health check - run curl inside container
|
|
171
|
+
const checkPath = path || '/';
|
|
172
|
+
const curlCmd = `curl -sf --max-time ${timeout} http://localhost:${port}${checkPath}`;
|
|
173
|
+
const result = await this.docker.execCommand(info.containerId, curlCmd);
|
|
174
|
+
healthy = result.exitCode === 0;
|
|
175
|
+
logger.debug({ jobId, port, path: checkPath, exitCode: result.exitCode }, 'health check http completed');
|
|
176
|
+
}
|
|
177
|
+
else if (type === 'tcp' && port) {
|
|
178
|
+
// tcp health check - use nc to check if port is open
|
|
179
|
+
const ncCmd = `nc -z -w ${timeout} localhost ${port}`;
|
|
180
|
+
const result = await this.docker.execCommand(info.containerId, ncCmd);
|
|
181
|
+
healthy = result.exitCode === 0;
|
|
182
|
+
logger.debug({ jobId, port, exitCode: result.exitCode }, 'health check tcp completed');
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
catch (err) {
|
|
186
|
+
logger.warn({ jobId, err }, 'health check error');
|
|
187
|
+
healthy = false;
|
|
188
|
+
}
|
|
189
|
+
if (healthy) {
|
|
190
|
+
// reset failures on success
|
|
191
|
+
if (info.healthStatus !== 'healthy') {
|
|
192
|
+
logger.info({ jobId }, 'container became healthy');
|
|
193
|
+
this.emit('container-healthy', { jobId });
|
|
194
|
+
}
|
|
195
|
+
info.healthStatus = 'healthy';
|
|
196
|
+
info.healthFailures = 0;
|
|
197
|
+
}
|
|
198
|
+
else {
|
|
199
|
+
info.healthFailures++;
|
|
200
|
+
if (info.healthFailures >= retries) {
|
|
201
|
+
if (info.healthStatus !== 'unhealthy') {
|
|
202
|
+
logger.warn({ jobId, failures: info.healthFailures }, 'container became unhealthy');
|
|
203
|
+
this.emit('container-unhealthy', { jobId, failures: info.healthFailures });
|
|
204
|
+
}
|
|
205
|
+
info.healthStatus = 'unhealthy';
|
|
206
|
+
}
|
|
207
|
+
}
|
|
208
|
+
}
|
|
209
|
+
// get health status for a container
|
|
210
|
+
getHealthStatus(jobId) {
|
|
211
|
+
const info = this.containers.get(jobId);
|
|
212
|
+
if (!info)
|
|
213
|
+
return undefined;
|
|
214
|
+
return {
|
|
215
|
+
status: info.healthStatus,
|
|
216
|
+
failures: info.healthFailures,
|
|
217
|
+
lastCheck: info.lastHealthCheck
|
|
218
|
+
};
|
|
219
|
+
}
|
|
220
|
+
getRunningJobs() {
|
|
221
|
+
return Array.from(this.containers.keys());
|
|
222
|
+
}
|
|
223
|
+
getContainerInfo(jobId) {
|
|
224
|
+
return this.containers.get(jobId);
|
|
225
|
+
}
|
|
226
|
+
async execInContainer(jobId, command) {
|
|
227
|
+
const container = this.containers.get(jobId);
|
|
228
|
+
if (!container) {
|
|
229
|
+
throw new Error('container not found');
|
|
230
|
+
}
|
|
231
|
+
if (container.status !== 'running') {
|
|
232
|
+
throw new Error('container not running');
|
|
233
|
+
}
|
|
234
|
+
return await this.docker.execCommand(container.containerId, command);
|
|
235
|
+
}
|
|
236
|
+
async getContainerLogs(jobId, tail = 100) {
|
|
237
|
+
const container = this.containers.get(jobId);
|
|
238
|
+
if (!container) {
|
|
239
|
+
throw new Error('container not found');
|
|
240
|
+
}
|
|
241
|
+
return await this.docker.getContainerLogs(container.containerId, tail);
|
|
242
|
+
}
|
|
243
|
+
// validate filepath to prevent path traversal and injection attacks
|
|
244
|
+
validateFilepath(filepath) {
|
|
245
|
+
if (filepath.includes('..') || filepath.includes('\0')) {
|
|
246
|
+
throw new Error('invalid filepath - path traversal not allowed');
|
|
247
|
+
}
|
|
248
|
+
if (!filepath.startsWith('/')) {
|
|
249
|
+
throw new Error('filepath must be absolute (start with /)');
|
|
250
|
+
}
|
|
251
|
+
// block shell metacharacters to prevent injection
|
|
252
|
+
if (/[;|&$`\\!"'<>(){}\[\]\n\r]/.test(filepath)) {
|
|
253
|
+
throw new Error('filepath contains invalid characters');
|
|
254
|
+
}
|
|
255
|
+
const blockedPaths = ['/proc', '/sys', '/dev', '/etc/passwd', '/etc/shadow'];
|
|
256
|
+
for (const blocked of blockedPaths) {
|
|
257
|
+
if (filepath.startsWith(blocked)) {
|
|
258
|
+
throw new Error('access to system paths not allowed');
|
|
259
|
+
}
|
|
260
|
+
}
|
|
261
|
+
}
|
|
262
|
+
async writeFile(jobId, filepath, content) {
|
|
263
|
+
const container = this.containers.get(jobId);
|
|
264
|
+
if (!container) {
|
|
265
|
+
throw new Error('container not found');
|
|
266
|
+
}
|
|
267
|
+
this.validateFilepath(filepath);
|
|
268
|
+
// use base64 to safely transport content without shell injection
|
|
269
|
+
const encoded = Buffer.from(content, 'utf8').toString('base64');
|
|
270
|
+
const command = `echo '${encoded}' | base64 -d > '${filepath}'`;
|
|
271
|
+
await this.docker.execCommand(container.containerId, command);
|
|
272
|
+
}
|
|
273
|
+
async readFile(jobId, filepath) {
|
|
274
|
+
const container = this.containers.get(jobId);
|
|
275
|
+
if (!container) {
|
|
276
|
+
throw new Error('container not found');
|
|
277
|
+
}
|
|
278
|
+
this.validateFilepath(filepath);
|
|
279
|
+
const result = await this.docker.execCommand(container.containerId, `cat '${filepath}'`);
|
|
280
|
+
return result.stdout;
|
|
281
|
+
}
|
|
282
|
+
}
|