kova-node-cli 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,110 @@
1
+ import { EventEmitter } from 'events';
2
+ import { logger } from '../lib/logger.js';
3
+ export class HeartbeatService extends EventEmitter {
4
+ nodeId;
5
+ orchestratorUrl;
6
+ monitor;
7
+ limitManager;
8
+ interval = null;
9
+ heartbeatIntervalMs;
10
+ isRunning = false;
11
+ apiPort;
12
+ accessToken;
13
+ constructor(nodeId, orchestratorUrl, monitor, limitManager, intervalSeconds = 60, apiPort = 4002, accessToken = '') {
14
+ super();
15
+ this.nodeId = nodeId;
16
+ this.orchestratorUrl = orchestratorUrl;
17
+ this.monitor = monitor;
18
+ this.limitManager = limitManager;
19
+ this.heartbeatIntervalMs = intervalSeconds * 1000;
20
+ this.apiPort = apiPort;
21
+ this.accessToken = accessToken;
22
+ }
23
+ async start() {
24
+ if (this.isRunning) {
25
+ logger.warn('heartbeat service already running');
26
+ return;
27
+ }
28
+ this.isRunning = true;
29
+ // send initial heartbeat immediately
30
+ await this.sendHeartbeat();
31
+ // then send periodic heartbeats
32
+ this.interval = setInterval(async () => {
33
+ await this.sendHeartbeat();
34
+ }, this.heartbeatIntervalMs);
35
+ logger.info({ intervalSeconds: this.heartbeatIntervalMs / 1000 }, 'heartbeat service started');
36
+ }
37
+ async stop() {
38
+ if (this.interval) {
39
+ clearInterval(this.interval);
40
+ this.interval = null;
41
+ }
42
+ this.isRunning = false;
43
+ logger.info('heartbeat service stopped');
44
+ }
45
+ async sendHeartbeat() {
46
+ if (!this.isRunning)
47
+ return;
48
+ try {
49
+ // get system resources
50
+ const systemResources = await this.monitor.getAvailableResources();
51
+ const availableLimits = this.limitManager.getAvailableResources();
52
+ // send provider limits, not system resources
53
+ const resources = {
54
+ cpu: {
55
+ cores: this.limitManager.getLimits().cpu,
56
+ available: availableLimits.cpu
57
+ },
58
+ memory: {
59
+ total: this.limitManager.getLimits().memory,
60
+ available: availableLimits.memory
61
+ },
62
+ disk: systemResources.disk,
63
+ network: systemResources.network,
64
+ gpu: systemResources.gpu || []
65
+ };
66
+ // send heartbeat to orchestrator
67
+ const response = await fetch(`${this.orchestratorUrl}/api/v1/nodes/${this.nodeId}/heartbeat`, {
68
+ method: 'POST',
69
+ headers: {
70
+ 'Content-Type': 'application/json',
71
+ },
72
+ body: JSON.stringify({
73
+ resources,
74
+ apiPort: this.apiPort,
75
+ accessToken: this.accessToken,
76
+ }),
77
+ });
78
+ if (response.ok) {
79
+ const data = await response.json();
80
+ logger.debug({
81
+ nodeId: this.nodeId,
82
+ cpu: resources.cpu.available,
83
+ memory: resources.memory.available,
84
+ }, 'heartbeat sent successfully');
85
+ this.emit('heartbeat-success', { resources, timestamp: data.timestamp || Date.now() });
86
+ // check if orchestrator sent pending jobs
87
+ if (data.pendingJobs && data.pendingJobs.length > 0) {
88
+ logger.info({ count: data.pendingJobs.length }, 'received pending jobs from heartbeat');
89
+ this.emit('pending-jobs', data.pendingJobs);
90
+ }
91
+ }
92
+ else {
93
+ const error = await response.text();
94
+ logger.warn({ status: response.status, error }, 'heartbeat request failed');
95
+ this.emit('heartbeat-error', { status: response.status, error });
96
+ }
97
+ }
98
+ catch (err) {
99
+ logger.error({ err }, 'failed to send heartbeat');
100
+ this.emit('heartbeat-error', { error: err });
101
+ }
102
+ }
103
+ // manually trigger a heartbeat
104
+ async triggerHeartbeat() {
105
+ await this.sendHeartbeat();
106
+ }
107
+ isActive() {
108
+ return this.isRunning;
109
+ }
110
+ }
@@ -0,0 +1,241 @@
1
+ import { EventEmitter } from 'events';
2
+ import { logger } from '../lib/logger.js';
3
+ import { UsageMeter } from '../lib/usage-meter.js';
4
+ export class JobHandler extends EventEmitter {
5
+ containerManager;
6
+ p2pNode;
7
+ activeJobs = new Map();
8
+ maxConcurrentJobs = 3;
9
+ usageMeter;
10
+ limitManager;
11
+ orchestratorUrl;
12
+ constructor(p2pNode, containerManager, limitManager, orchestratorUrl) {
13
+ super();
14
+ this.p2pNode = p2pNode;
15
+ this.containerManager = containerManager;
16
+ this.limitManager = limitManager;
17
+ this.orchestratorUrl = orchestratorUrl;
18
+ this.usageMeter = new UsageMeter();
19
+ this.setupContainerListeners();
20
+ this.setupP2PListeners();
21
+ }
22
+ setupP2PListeners() {
23
+ this.p2pNode.on('job-cancel', ({ jobId }) => {
24
+ logger.info({ jobId }, 'received cancellation request');
25
+ this.cancelJob(jobId);
26
+ });
27
+ }
28
+ setupContainerListeners() {
29
+ this.containerManager.on('container-started', ({ jobId, containerId }) => {
30
+ const job = this.activeJobs.get(jobId);
31
+ if (job) {
32
+ job.status = 'running';
33
+ job.containerId = containerId;
34
+ // start tracking usage
35
+ this.usageMeter.startMeter(jobId, this.p2pNode.getPeerId(), job.spec.userId);
36
+ this.emit('job-started', { jobId });
37
+ }
38
+ });
39
+ // track resource usage as container runs
40
+ this.containerManager.on('container-stats', ({ jobId, stats }) => {
41
+ this.usageMeter.updateUsage(jobId, {
42
+ cpu: stats.cpu / 100, // percentage to cores
43
+ memory: stats.memory / 1024, // mb to gb
44
+ network: stats.network
45
+ });
46
+ });
47
+ this.containerManager.on('container-finished', ({ jobId, exitCode, logs }) => {
48
+ const job = this.activeJobs.get(jobId);
49
+ if (job) {
50
+ job.status = exitCode === 0 ? 'completed' : 'failed';
51
+ job.endTime = Date.now();
52
+ job.result = { exitCode, logs };
53
+ // release allocated resources
54
+ this.limitManager.releaseResources(jobId, job.spec.resources);
55
+ // finalize usage tracking and calculate actual cost
56
+ const pricing = {
57
+ cpuPerHour: 0.05,
58
+ memoryPerGBHour: 0.02,
59
+ storagePerGBHour: 0.001,
60
+ networkPerGB: 0.01
61
+ };
62
+ const usageRecord = this.usageMeter.finalizeMeter(jobId, pricing);
63
+ if (usageRecord) {
64
+ job.earnings = usageRecord.cost.toNumber();
65
+ logger.info({ jobId, usage: usageRecord.usage, cost: job.earnings }, 'usage calculated');
66
+ }
67
+ else {
68
+ // fallback to simple time-based calc
69
+ const runtime = (job.endTime - job.startTime) / 1000 / 60 / 60;
70
+ const hourlyRate = job.spec.price || this.calculatePrice(job.spec);
71
+ job.earnings = runtime * hourlyRate;
72
+ }
73
+ this.emit('job-completed', { jobId, earnings: job.earnings });
74
+ // report back to orchestrator
75
+ this.reportJobCompletion(jobId, job);
76
+ }
77
+ });
78
+ this.containerManager.on('container-failed', ({ jobId, error }) => {
79
+ const job = this.activeJobs.get(jobId);
80
+ if (job) {
81
+ job.status = 'failed';
82
+ job.endTime = Date.now();
83
+ job.error = error;
84
+ // release resources
85
+ this.limitManager.releaseResources(jobId, job.spec.resources);
86
+ this.emit('job-failed', { jobId, error });
87
+ this.reportJobCompletion(jobId, job);
88
+ }
89
+ });
90
+ }
91
+ async handleJob(jobSpec) {
92
+ const runningJobs = Array.from(this.activeJobs.values())
93
+ .filter(j => j.status === 'running').length;
94
+ if (runningJobs >= this.maxConcurrentJobs) {
95
+ logger.warn({ jobId: jobSpec.id }, 'at max capacity');
96
+ return false;
97
+ }
98
+ // check against provider-configured limits
99
+ if (!this.limitManager.canAcceptJob(jobSpec.resources)) {
100
+ logger.warn({ jobId: jobSpec.id, required: jobSpec.resources }, 'exceeds provider limits');
101
+ return false;
102
+ }
103
+ // allocate the resources
104
+ if (!this.limitManager.allocateResources(jobSpec.id, jobSpec.resources)) {
105
+ logger.warn({ jobId: jobSpec.id }, 'failed to allocate resources');
106
+ return false;
107
+ }
108
+ const job = {
109
+ jobId: jobSpec.id,
110
+ spec: jobSpec,
111
+ status: 'received',
112
+ startTime: Date.now()
113
+ };
114
+ this.activeJobs.set(jobSpec.id, job);
115
+ logger.info({ jobId: jobSpec.id }, 'accepted job');
116
+ try {
117
+ job.status = 'starting';
118
+ await this.containerManager.runJob(jobSpec);
119
+ return true;
120
+ }
121
+ catch (err) {
122
+ logger.error({ err, jobId: jobSpec.id }, 'failed to start job');
123
+ job.status = 'failed';
124
+ job.error = err;
125
+ job.endTime = Date.now();
126
+ // release resources on failure
127
+ this.limitManager.releaseResources(jobSpec.id, jobSpec.resources);
128
+ return false;
129
+ }
130
+ }
131
+ calculatePrice(spec) {
132
+ // basic pricing if not specified
133
+ return spec.resources.cpu * 0.05 + spec.resources.memory * 0.02;
134
+ }
135
+ async reportJobCompletion(jobId, job) {
136
+ const nodeId = this.p2pNode.getPeerId();
137
+ // try http callback first (more reliable than p2p)
138
+ if (this.orchestratorUrl) {
139
+ try {
140
+ await fetch(`${this.orchestratorUrl}/api/v1/nodes/${nodeId}/jobs/${jobId}/complete`, {
141
+ method: 'POST',
142
+ headers: { 'Content-Type': 'application/json' },
143
+ body: JSON.stringify({
144
+ success: job.status === 'completed',
145
+ result: job.result,
146
+ usage: {
147
+ runtime: job.endTime ? (job.endTime - job.startTime) / 1000 : 0,
148
+ cost: job.earnings || 0
149
+ }
150
+ })
151
+ });
152
+ logger.info({
153
+ jobId,
154
+ status: job.status,
155
+ runtime: job.endTime ? (job.endTime - job.startTime) / 1000 : 0,
156
+ earnings: job.earnings
157
+ }, 'reported job completion via http');
158
+ }
159
+ catch (err) {
160
+ logger.warn({ err, jobId }, 'failed to report via http, trying p2p');
161
+ // fallback to p2p
162
+ const completionMessage = {
163
+ type: 'job-completed',
164
+ data: {
165
+ jobId,
166
+ success: job.status === 'completed',
167
+ result: job.result,
168
+ usage: {
169
+ runtime: job.endTime ? (job.endTime - job.startTime) / 1000 : 0,
170
+ cost: job.earnings || 0
171
+ },
172
+ nodeId
173
+ }
174
+ };
175
+ await this.p2pNode.sendToOrchestrator(completionMessage);
176
+ logger.info({ jobId }, 'reported job completion via p2p');
177
+ }
178
+ }
179
+ else {
180
+ // no http url, use p2p only
181
+ const completionMessage = {
182
+ type: 'job-completed',
183
+ data: {
184
+ jobId,
185
+ success: job.status === 'completed',
186
+ result: job.result,
187
+ usage: {
188
+ runtime: job.endTime ? (job.endTime - job.startTime) / 1000 : 0,
189
+ cost: job.earnings || 0
190
+ },
191
+ nodeId
192
+ }
193
+ };
194
+ await this.p2pNode.sendToOrchestrator(completionMessage);
195
+ logger.info({ jobId }, 'reported job completion via p2p');
196
+ }
197
+ setTimeout(() => {
198
+ this.activeJobs.delete(jobId);
199
+ }, 60000);
200
+ }
201
+ getActiveJobs() {
202
+ return Array.from(this.activeJobs.values());
203
+ }
204
+ getTotalEarnings() {
205
+ let total = 0;
206
+ for (const job of this.activeJobs.values()) {
207
+ if (job.earnings) {
208
+ total += job.earnings;
209
+ }
210
+ }
211
+ return total;
212
+ }
213
+ async cancelJob(jobId) {
214
+ const job = this.activeJobs.get(jobId);
215
+ if (!job) {
216
+ logger.warn({ jobId }, 'job not found for cancellation');
217
+ return false;
218
+ }
219
+ if (job.status === 'completed' || job.status === 'failed') {
220
+ logger.warn({ jobId }, 'job already finished, cannot cancel');
221
+ return false;
222
+ }
223
+ try {
224
+ // stop the container if running
225
+ if (job.containerId) {
226
+ await this.containerManager.stopContainer(job.containerId);
227
+ }
228
+ job.status = 'failed';
229
+ job.endTime = Date.now();
230
+ job.error = { message: 'cancelled by orchestrator' };
231
+ logger.info({ jobId }, 'job cancelled successfully');
232
+ // report cancellation
233
+ await this.reportJobCompletion(jobId, job);
234
+ return true;
235
+ }
236
+ catch (err) {
237
+ logger.error({ err, jobId }, 'failed to cancel job');
238
+ return false;
239
+ }
240
+ }
241
+ }