kova-node-cli 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +138 -0
- package/bin/cli.js +2 -0
- package/dist/__tests__/auto-bidder.test.js +267 -0
- package/dist/__tests__/container-manager.test.js +189 -0
- package/dist/__tests__/deployment-executor.test.js +332 -0
- package/dist/__tests__/heartbeat.test.js +191 -0
- package/dist/__tests__/lease-handler.test.js +268 -0
- package/dist/__tests__/resource-limits.test.js +164 -0
- package/dist/api/server.js +607 -0
- package/dist/cli.js +47 -0
- package/dist/commands/deploy.js +568 -0
- package/dist/commands/earnings.js +70 -0
- package/dist/commands/start.js +358 -0
- package/dist/commands/status.js +50 -0
- package/dist/commands/stop.js +101 -0
- package/dist/lib/client.js +87 -0
- package/dist/lib/config.js +107 -0
- package/dist/lib/docker.js +415 -0
- package/dist/lib/logger.js +12 -0
- package/dist/lib/message-signer.js +93 -0
- package/dist/lib/monitor.js +105 -0
- package/dist/lib/p2p.js +186 -0
- package/dist/lib/resource-limits.js +84 -0
- package/dist/lib/state.js +113 -0
- package/dist/lib/types.js +2 -0
- package/dist/lib/usage-meter.js +63 -0
- package/dist/services/auto-bidder.js +332 -0
- package/dist/services/container-manager.js +282 -0
- package/dist/services/deployment-executor.js +1562 -0
- package/dist/services/heartbeat.js +110 -0
- package/dist/services/job-handler.js +241 -0
- package/dist/services/lease-handler.js +382 -0
- package/package.json +51 -0
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
import { EventEmitter } from 'events';
|
|
2
|
+
import { logger } from '../lib/logger.js';
|
|
3
|
+
export class HeartbeatService extends EventEmitter {
|
|
4
|
+
nodeId;
|
|
5
|
+
orchestratorUrl;
|
|
6
|
+
monitor;
|
|
7
|
+
limitManager;
|
|
8
|
+
interval = null;
|
|
9
|
+
heartbeatIntervalMs;
|
|
10
|
+
isRunning = false;
|
|
11
|
+
apiPort;
|
|
12
|
+
accessToken;
|
|
13
|
+
constructor(nodeId, orchestratorUrl, monitor, limitManager, intervalSeconds = 60, apiPort = 4002, accessToken = '') {
|
|
14
|
+
super();
|
|
15
|
+
this.nodeId = nodeId;
|
|
16
|
+
this.orchestratorUrl = orchestratorUrl;
|
|
17
|
+
this.monitor = monitor;
|
|
18
|
+
this.limitManager = limitManager;
|
|
19
|
+
this.heartbeatIntervalMs = intervalSeconds * 1000;
|
|
20
|
+
this.apiPort = apiPort;
|
|
21
|
+
this.accessToken = accessToken;
|
|
22
|
+
}
|
|
23
|
+
async start() {
|
|
24
|
+
if (this.isRunning) {
|
|
25
|
+
logger.warn('heartbeat service already running');
|
|
26
|
+
return;
|
|
27
|
+
}
|
|
28
|
+
this.isRunning = true;
|
|
29
|
+
// send initial heartbeat immediately
|
|
30
|
+
await this.sendHeartbeat();
|
|
31
|
+
// then send periodic heartbeats
|
|
32
|
+
this.interval = setInterval(async () => {
|
|
33
|
+
await this.sendHeartbeat();
|
|
34
|
+
}, this.heartbeatIntervalMs);
|
|
35
|
+
logger.info({ intervalSeconds: this.heartbeatIntervalMs / 1000 }, 'heartbeat service started');
|
|
36
|
+
}
|
|
37
|
+
async stop() {
|
|
38
|
+
if (this.interval) {
|
|
39
|
+
clearInterval(this.interval);
|
|
40
|
+
this.interval = null;
|
|
41
|
+
}
|
|
42
|
+
this.isRunning = false;
|
|
43
|
+
logger.info('heartbeat service stopped');
|
|
44
|
+
}
|
|
45
|
+
async sendHeartbeat() {
|
|
46
|
+
if (!this.isRunning)
|
|
47
|
+
return;
|
|
48
|
+
try {
|
|
49
|
+
// get system resources
|
|
50
|
+
const systemResources = await this.monitor.getAvailableResources();
|
|
51
|
+
const availableLimits = this.limitManager.getAvailableResources();
|
|
52
|
+
// send provider limits, not system resources
|
|
53
|
+
const resources = {
|
|
54
|
+
cpu: {
|
|
55
|
+
cores: this.limitManager.getLimits().cpu,
|
|
56
|
+
available: availableLimits.cpu
|
|
57
|
+
},
|
|
58
|
+
memory: {
|
|
59
|
+
total: this.limitManager.getLimits().memory,
|
|
60
|
+
available: availableLimits.memory
|
|
61
|
+
},
|
|
62
|
+
disk: systemResources.disk,
|
|
63
|
+
network: systemResources.network,
|
|
64
|
+
gpu: systemResources.gpu || []
|
|
65
|
+
};
|
|
66
|
+
// send heartbeat to orchestrator
|
|
67
|
+
const response = await fetch(`${this.orchestratorUrl}/api/v1/nodes/${this.nodeId}/heartbeat`, {
|
|
68
|
+
method: 'POST',
|
|
69
|
+
headers: {
|
|
70
|
+
'Content-Type': 'application/json',
|
|
71
|
+
},
|
|
72
|
+
body: JSON.stringify({
|
|
73
|
+
resources,
|
|
74
|
+
apiPort: this.apiPort,
|
|
75
|
+
accessToken: this.accessToken,
|
|
76
|
+
}),
|
|
77
|
+
});
|
|
78
|
+
if (response.ok) {
|
|
79
|
+
const data = await response.json();
|
|
80
|
+
logger.debug({
|
|
81
|
+
nodeId: this.nodeId,
|
|
82
|
+
cpu: resources.cpu.available,
|
|
83
|
+
memory: resources.memory.available,
|
|
84
|
+
}, 'heartbeat sent successfully');
|
|
85
|
+
this.emit('heartbeat-success', { resources, timestamp: data.timestamp || Date.now() });
|
|
86
|
+
// check if orchestrator sent pending jobs
|
|
87
|
+
if (data.pendingJobs && data.pendingJobs.length > 0) {
|
|
88
|
+
logger.info({ count: data.pendingJobs.length }, 'received pending jobs from heartbeat');
|
|
89
|
+
this.emit('pending-jobs', data.pendingJobs);
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
else {
|
|
93
|
+
const error = await response.text();
|
|
94
|
+
logger.warn({ status: response.status, error }, 'heartbeat request failed');
|
|
95
|
+
this.emit('heartbeat-error', { status: response.status, error });
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
catch (err) {
|
|
99
|
+
logger.error({ err }, 'failed to send heartbeat');
|
|
100
|
+
this.emit('heartbeat-error', { error: err });
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
// manually trigger a heartbeat
|
|
104
|
+
async triggerHeartbeat() {
|
|
105
|
+
await this.sendHeartbeat();
|
|
106
|
+
}
|
|
107
|
+
isActive() {
|
|
108
|
+
return this.isRunning;
|
|
109
|
+
}
|
|
110
|
+
}
|
|
@@ -0,0 +1,241 @@
|
|
|
1
|
+
import { EventEmitter } from 'events';
|
|
2
|
+
import { logger } from '../lib/logger.js';
|
|
3
|
+
import { UsageMeter } from '../lib/usage-meter.js';
|
|
4
|
+
export class JobHandler extends EventEmitter {
|
|
5
|
+
containerManager;
|
|
6
|
+
p2pNode;
|
|
7
|
+
activeJobs = new Map();
|
|
8
|
+
maxConcurrentJobs = 3;
|
|
9
|
+
usageMeter;
|
|
10
|
+
limitManager;
|
|
11
|
+
orchestratorUrl;
|
|
12
|
+
constructor(p2pNode, containerManager, limitManager, orchestratorUrl) {
|
|
13
|
+
super();
|
|
14
|
+
this.p2pNode = p2pNode;
|
|
15
|
+
this.containerManager = containerManager;
|
|
16
|
+
this.limitManager = limitManager;
|
|
17
|
+
this.orchestratorUrl = orchestratorUrl;
|
|
18
|
+
this.usageMeter = new UsageMeter();
|
|
19
|
+
this.setupContainerListeners();
|
|
20
|
+
this.setupP2PListeners();
|
|
21
|
+
}
|
|
22
|
+
setupP2PListeners() {
|
|
23
|
+
this.p2pNode.on('job-cancel', ({ jobId }) => {
|
|
24
|
+
logger.info({ jobId }, 'received cancellation request');
|
|
25
|
+
this.cancelJob(jobId);
|
|
26
|
+
});
|
|
27
|
+
}
|
|
28
|
+
setupContainerListeners() {
|
|
29
|
+
this.containerManager.on('container-started', ({ jobId, containerId }) => {
|
|
30
|
+
const job = this.activeJobs.get(jobId);
|
|
31
|
+
if (job) {
|
|
32
|
+
job.status = 'running';
|
|
33
|
+
job.containerId = containerId;
|
|
34
|
+
// start tracking usage
|
|
35
|
+
this.usageMeter.startMeter(jobId, this.p2pNode.getPeerId(), job.spec.userId);
|
|
36
|
+
this.emit('job-started', { jobId });
|
|
37
|
+
}
|
|
38
|
+
});
|
|
39
|
+
// track resource usage as container runs
|
|
40
|
+
this.containerManager.on('container-stats', ({ jobId, stats }) => {
|
|
41
|
+
this.usageMeter.updateUsage(jobId, {
|
|
42
|
+
cpu: stats.cpu / 100, // percentage to cores
|
|
43
|
+
memory: stats.memory / 1024, // mb to gb
|
|
44
|
+
network: stats.network
|
|
45
|
+
});
|
|
46
|
+
});
|
|
47
|
+
this.containerManager.on('container-finished', ({ jobId, exitCode, logs }) => {
|
|
48
|
+
const job = this.activeJobs.get(jobId);
|
|
49
|
+
if (job) {
|
|
50
|
+
job.status = exitCode === 0 ? 'completed' : 'failed';
|
|
51
|
+
job.endTime = Date.now();
|
|
52
|
+
job.result = { exitCode, logs };
|
|
53
|
+
// release allocated resources
|
|
54
|
+
this.limitManager.releaseResources(jobId, job.spec.resources);
|
|
55
|
+
// finalize usage tracking and calculate actual cost
|
|
56
|
+
const pricing = {
|
|
57
|
+
cpuPerHour: 0.05,
|
|
58
|
+
memoryPerGBHour: 0.02,
|
|
59
|
+
storagePerGBHour: 0.001,
|
|
60
|
+
networkPerGB: 0.01
|
|
61
|
+
};
|
|
62
|
+
const usageRecord = this.usageMeter.finalizeMeter(jobId, pricing);
|
|
63
|
+
if (usageRecord) {
|
|
64
|
+
job.earnings = usageRecord.cost.toNumber();
|
|
65
|
+
logger.info({ jobId, usage: usageRecord.usage, cost: job.earnings }, 'usage calculated');
|
|
66
|
+
}
|
|
67
|
+
else {
|
|
68
|
+
// fallback to simple time-based calc
|
|
69
|
+
const runtime = (job.endTime - job.startTime) / 1000 / 60 / 60;
|
|
70
|
+
const hourlyRate = job.spec.price || this.calculatePrice(job.spec);
|
|
71
|
+
job.earnings = runtime * hourlyRate;
|
|
72
|
+
}
|
|
73
|
+
this.emit('job-completed', { jobId, earnings: job.earnings });
|
|
74
|
+
// report back to orchestrator
|
|
75
|
+
this.reportJobCompletion(jobId, job);
|
|
76
|
+
}
|
|
77
|
+
});
|
|
78
|
+
this.containerManager.on('container-failed', ({ jobId, error }) => {
|
|
79
|
+
const job = this.activeJobs.get(jobId);
|
|
80
|
+
if (job) {
|
|
81
|
+
job.status = 'failed';
|
|
82
|
+
job.endTime = Date.now();
|
|
83
|
+
job.error = error;
|
|
84
|
+
// release resources
|
|
85
|
+
this.limitManager.releaseResources(jobId, job.spec.resources);
|
|
86
|
+
this.emit('job-failed', { jobId, error });
|
|
87
|
+
this.reportJobCompletion(jobId, job);
|
|
88
|
+
}
|
|
89
|
+
});
|
|
90
|
+
}
|
|
91
|
+
async handleJob(jobSpec) {
|
|
92
|
+
const runningJobs = Array.from(this.activeJobs.values())
|
|
93
|
+
.filter(j => j.status === 'running').length;
|
|
94
|
+
if (runningJobs >= this.maxConcurrentJobs) {
|
|
95
|
+
logger.warn({ jobId: jobSpec.id }, 'at max capacity');
|
|
96
|
+
return false;
|
|
97
|
+
}
|
|
98
|
+
// check against provider-configured limits
|
|
99
|
+
if (!this.limitManager.canAcceptJob(jobSpec.resources)) {
|
|
100
|
+
logger.warn({ jobId: jobSpec.id, required: jobSpec.resources }, 'exceeds provider limits');
|
|
101
|
+
return false;
|
|
102
|
+
}
|
|
103
|
+
// allocate the resources
|
|
104
|
+
if (!this.limitManager.allocateResources(jobSpec.id, jobSpec.resources)) {
|
|
105
|
+
logger.warn({ jobId: jobSpec.id }, 'failed to allocate resources');
|
|
106
|
+
return false;
|
|
107
|
+
}
|
|
108
|
+
const job = {
|
|
109
|
+
jobId: jobSpec.id,
|
|
110
|
+
spec: jobSpec,
|
|
111
|
+
status: 'received',
|
|
112
|
+
startTime: Date.now()
|
|
113
|
+
};
|
|
114
|
+
this.activeJobs.set(jobSpec.id, job);
|
|
115
|
+
logger.info({ jobId: jobSpec.id }, 'accepted job');
|
|
116
|
+
try {
|
|
117
|
+
job.status = 'starting';
|
|
118
|
+
await this.containerManager.runJob(jobSpec);
|
|
119
|
+
return true;
|
|
120
|
+
}
|
|
121
|
+
catch (err) {
|
|
122
|
+
logger.error({ err, jobId: jobSpec.id }, 'failed to start job');
|
|
123
|
+
job.status = 'failed';
|
|
124
|
+
job.error = err;
|
|
125
|
+
job.endTime = Date.now();
|
|
126
|
+
// release resources on failure
|
|
127
|
+
this.limitManager.releaseResources(jobSpec.id, jobSpec.resources);
|
|
128
|
+
return false;
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
calculatePrice(spec) {
|
|
132
|
+
// basic pricing if not specified
|
|
133
|
+
return spec.resources.cpu * 0.05 + spec.resources.memory * 0.02;
|
|
134
|
+
}
|
|
135
|
+
async reportJobCompletion(jobId, job) {
|
|
136
|
+
const nodeId = this.p2pNode.getPeerId();
|
|
137
|
+
// try http callback first (more reliable than p2p)
|
|
138
|
+
if (this.orchestratorUrl) {
|
|
139
|
+
try {
|
|
140
|
+
await fetch(`${this.orchestratorUrl}/api/v1/nodes/${nodeId}/jobs/${jobId}/complete`, {
|
|
141
|
+
method: 'POST',
|
|
142
|
+
headers: { 'Content-Type': 'application/json' },
|
|
143
|
+
body: JSON.stringify({
|
|
144
|
+
success: job.status === 'completed',
|
|
145
|
+
result: job.result,
|
|
146
|
+
usage: {
|
|
147
|
+
runtime: job.endTime ? (job.endTime - job.startTime) / 1000 : 0,
|
|
148
|
+
cost: job.earnings || 0
|
|
149
|
+
}
|
|
150
|
+
})
|
|
151
|
+
});
|
|
152
|
+
logger.info({
|
|
153
|
+
jobId,
|
|
154
|
+
status: job.status,
|
|
155
|
+
runtime: job.endTime ? (job.endTime - job.startTime) / 1000 : 0,
|
|
156
|
+
earnings: job.earnings
|
|
157
|
+
}, 'reported job completion via http');
|
|
158
|
+
}
|
|
159
|
+
catch (err) {
|
|
160
|
+
logger.warn({ err, jobId }, 'failed to report via http, trying p2p');
|
|
161
|
+
// fallback to p2p
|
|
162
|
+
const completionMessage = {
|
|
163
|
+
type: 'job-completed',
|
|
164
|
+
data: {
|
|
165
|
+
jobId,
|
|
166
|
+
success: job.status === 'completed',
|
|
167
|
+
result: job.result,
|
|
168
|
+
usage: {
|
|
169
|
+
runtime: job.endTime ? (job.endTime - job.startTime) / 1000 : 0,
|
|
170
|
+
cost: job.earnings || 0
|
|
171
|
+
},
|
|
172
|
+
nodeId
|
|
173
|
+
}
|
|
174
|
+
};
|
|
175
|
+
await this.p2pNode.sendToOrchestrator(completionMessage);
|
|
176
|
+
logger.info({ jobId }, 'reported job completion via p2p');
|
|
177
|
+
}
|
|
178
|
+
}
|
|
179
|
+
else {
|
|
180
|
+
// no http url, use p2p only
|
|
181
|
+
const completionMessage = {
|
|
182
|
+
type: 'job-completed',
|
|
183
|
+
data: {
|
|
184
|
+
jobId,
|
|
185
|
+
success: job.status === 'completed',
|
|
186
|
+
result: job.result,
|
|
187
|
+
usage: {
|
|
188
|
+
runtime: job.endTime ? (job.endTime - job.startTime) / 1000 : 0,
|
|
189
|
+
cost: job.earnings || 0
|
|
190
|
+
},
|
|
191
|
+
nodeId
|
|
192
|
+
}
|
|
193
|
+
};
|
|
194
|
+
await this.p2pNode.sendToOrchestrator(completionMessage);
|
|
195
|
+
logger.info({ jobId }, 'reported job completion via p2p');
|
|
196
|
+
}
|
|
197
|
+
setTimeout(() => {
|
|
198
|
+
this.activeJobs.delete(jobId);
|
|
199
|
+
}, 60000);
|
|
200
|
+
}
|
|
201
|
+
getActiveJobs() {
|
|
202
|
+
return Array.from(this.activeJobs.values());
|
|
203
|
+
}
|
|
204
|
+
getTotalEarnings() {
|
|
205
|
+
let total = 0;
|
|
206
|
+
for (const job of this.activeJobs.values()) {
|
|
207
|
+
if (job.earnings) {
|
|
208
|
+
total += job.earnings;
|
|
209
|
+
}
|
|
210
|
+
}
|
|
211
|
+
return total;
|
|
212
|
+
}
|
|
213
|
+
async cancelJob(jobId) {
|
|
214
|
+
const job = this.activeJobs.get(jobId);
|
|
215
|
+
if (!job) {
|
|
216
|
+
logger.warn({ jobId }, 'job not found for cancellation');
|
|
217
|
+
return false;
|
|
218
|
+
}
|
|
219
|
+
if (job.status === 'completed' || job.status === 'failed') {
|
|
220
|
+
logger.warn({ jobId }, 'job already finished, cannot cancel');
|
|
221
|
+
return false;
|
|
222
|
+
}
|
|
223
|
+
try {
|
|
224
|
+
// stop the container if running
|
|
225
|
+
if (job.containerId) {
|
|
226
|
+
await this.containerManager.stopContainer(job.containerId);
|
|
227
|
+
}
|
|
228
|
+
job.status = 'failed';
|
|
229
|
+
job.endTime = Date.now();
|
|
230
|
+
job.error = { message: 'cancelled by orchestrator' };
|
|
231
|
+
logger.info({ jobId }, 'job cancelled successfully');
|
|
232
|
+
// report cancellation
|
|
233
|
+
await this.reportJobCompletion(jobId, job);
|
|
234
|
+
return true;
|
|
235
|
+
}
|
|
236
|
+
catch (err) {
|
|
237
|
+
logger.error({ err, jobId }, 'failed to cancel job');
|
|
238
|
+
return false;
|
|
239
|
+
}
|
|
240
|
+
}
|
|
241
|
+
}
|