@gns-foundation/hive-worker 0.1.10 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.js +61 -4
- package/dist/cli.js.map +1 -1
- package/dist/executor.js +2 -0
- package/dist/executor.js.map +1 -1
- package/dist/mobydb.d.ts +132 -0
- package/dist/mobydb.js +379 -0
- package/dist/mobydb.js.map +1 -0
- package/dist/mobydb_hooks.d.ts +54 -0
- package/dist/mobydb_hooks.js +136 -0
- package/dist/mobydb_hooks.js.map +1 -0
- package/package.json +10 -8
- package/package.json.v0.5.bak.20260515T094956Z +43 -0
- package/src/cli.ts +63 -4
- package/src/cli.ts.v0.5.bak.20260515T094956Z +569 -0
- package/src/executor.ts +4 -0
- package/src/executor.ts.v0.5.bak.20260515T094956Z +279 -0
- package/src/mobydb.ts +558 -0
- package/src/mobydb_hooks.ts +173 -0
package/src/cli.ts
CHANGED
|
@@ -12,6 +12,7 @@
|
|
|
12
12
|
|
|
13
13
|
import { program } from 'commander';
|
|
14
14
|
import { loadOrCreateIdentity, shortPk, identityPath } from './identity.js';
|
|
15
|
+
import { initWorkerMobyDB, shutdownMobyDB } from "./mobydb_hooks.js";
|
|
15
16
|
import { detectHardware, detectGeo } from './hardware.js';
|
|
16
17
|
import { registerNode, heartbeat, deregisterNode, fetchSwarmStats, fetchTokenBalance } from './registry.js';
|
|
17
18
|
import { findRpcBinary, startRpcServer, stopRpcServer, DEFAULT_RPC_PORT, type LlamaRpcHandle } from './llama.js';
|
|
@@ -77,6 +78,9 @@ async function cmdJoin(opts: {
|
|
|
77
78
|
const geo = await detectGeo();
|
|
78
79
|
printSuccess(`Location: ${geo.city}, ${geo.country} · H3 cell ${geo.h3Cell.slice(0, 10)}…`);
|
|
79
80
|
|
|
81
|
+
// Phase 4: Embedded MobyDB
|
|
82
|
+
initWorkerMobyDB(identity.pk, geo.h3Cell);
|
|
83
|
+
|
|
80
84
|
// 5. llama-cli check (for job execution)
|
|
81
85
|
const llamaCliAvailable = !!findLlamaCli();
|
|
82
86
|
if (!opts.noJobs) {
|
|
@@ -317,10 +321,64 @@ async function cmdJoin(opts: {
|
|
|
317
321
|
}
|
|
318
322
|
} else if (opts.noJobs) {
|
|
319
323
|
addLog('Job execution disabled (--no-jobs flag)');
|
|
320
|
-
} else {
|
|
324
|
+
} else if (!rpcHandle) {
|
|
321
325
|
addLog('Observer mode — install llama.cpp + fetch a model to execute jobs');
|
|
322
326
|
}
|
|
323
327
|
|
|
328
|
+
// ── Pipeline poller — runs when RPC server is available ──────
|
|
329
|
+
// This is the primary job handler when rpc-server is running.
|
|
330
|
+
// It claims jobs and runs them distributed (or solo if no peers).
|
|
331
|
+
if (!opts.noJobs && rpcHandle && (llamaCliAvailable || cachedModels.length > 0)) {
|
|
332
|
+
const PIPELINE_INTERVAL = 3000;
|
|
333
|
+
const pipelineTimer = setInterval(async () => {
|
|
334
|
+
if (state.status === 'computing') return;
|
|
335
|
+
|
|
336
|
+
try {
|
|
337
|
+
const result = await claimJobPipeline(identity.pk, geo.h3Cell);
|
|
338
|
+
if (!result) return;
|
|
339
|
+
|
|
340
|
+
const { job, peers } = result;
|
|
341
|
+
state.status = 'computing';
|
|
342
|
+
|
|
343
|
+
if (peers.length === 0) {
|
|
344
|
+
addLog(`Solo job ${job.id.slice(0, 8)} · model=${job.model_id}`);
|
|
345
|
+
} else {
|
|
346
|
+
addLog(`Pipeline job ${job.id.slice(0, 8)} · ${peers.length} peer(s) · ${peers.map(p => p.rpc_host).join(', ')}`);
|
|
347
|
+
}
|
|
348
|
+
|
|
349
|
+
try {
|
|
350
|
+
await markComputing(job.id);
|
|
351
|
+
const execResult = await executeJob(job, {
|
|
352
|
+
onLog: addLog,
|
|
353
|
+
rpcPeers: peers.length > 0 ? peers : undefined,
|
|
354
|
+
});
|
|
355
|
+
await postResult(job.id, execResult);
|
|
356
|
+
jobsCompleted++;
|
|
357
|
+
const mode = peers.length > 0 ? 'distributed' : 'solo';
|
|
358
|
+
addLog(`Job ${job.id.slice(0, 8)} done · ${execResult.tokensPerSecond} tok/s (${mode})`);
|
|
359
|
+
state.tokensEarned += job.gns_reward * SPLIT.WORKER;
|
|
360
|
+
await creditWorker(identity.pk, job.id, job.gns_reward).catch(() => {});
|
|
361
|
+
} catch (execErr) {
|
|
362
|
+
const errMsg = execErr instanceof Error ? execErr.message : String(execErr);
|
|
363
|
+
await postFailure(job.id, errMsg).catch(() => {});
|
|
364
|
+
addLog(`Job ${job.id.slice(0, 8)} failed: ${errMsg.slice(0, 60)}`);
|
|
365
|
+
} finally {
|
|
366
|
+
state.status = 'idle';
|
|
367
|
+
}
|
|
368
|
+
} catch (err) {
|
|
369
|
+
// Non-fatal poll errors
|
|
370
|
+
}
|
|
371
|
+
}, PIPELINE_INTERVAL);
|
|
372
|
+
|
|
373
|
+
// Wrap timer in jobPoller for cleanup
|
|
374
|
+
const prevStop = jobPoller?.stop.bind(jobPoller);
|
|
375
|
+
jobPoller = {
|
|
376
|
+
stop: () => { prevStop?.(); clearInterval(pipelineTimer); },
|
|
377
|
+
} as any;
|
|
378
|
+
|
|
379
|
+
addLog('Pipeline poller started — RPC server ready for distributed inference');
|
|
380
|
+
}
|
|
381
|
+
|
|
324
382
|
// ─── Dashboard refresh ────────────────────────────────────────
|
|
325
383
|
|
|
326
384
|
const dashTimer = setInterval(() => {
|
|
@@ -337,6 +395,7 @@ async function cmdJoin(opts: {
|
|
|
337
395
|
clearInterval(statsTimer);
|
|
338
396
|
clearInterval(dashTimer);
|
|
339
397
|
jobPoller?.stop();
|
|
398
|
+
shutdownMobyDB();
|
|
340
399
|
clearDashboard();
|
|
341
400
|
|
|
342
401
|
console.log('\n');
|
|
@@ -426,7 +485,7 @@ async function cmdModelsList(): Promise<void> {
|
|
|
426
485
|
}
|
|
427
486
|
}
|
|
428
487
|
console.log('');
|
|
429
|
-
printInfo('Available models: phi-3-mini, gemma-2-2b, tinyllama');
|
|
488
|
+
printInfo('Available models: phi-3-mini, gemma-2-2b, tinyllama, lfm2.5-1.2b-instruct, lfm2.5-1.2b-thinking');
|
|
430
489
|
console.log('');
|
|
431
490
|
}
|
|
432
491
|
|
|
@@ -441,7 +500,7 @@ async function cmdModelsFetch(modelId: string): Promise<void> {
|
|
|
441
500
|
|
|
442
501
|
const url = resolveModelUrl(job as HiveJob);
|
|
443
502
|
if (!url) {
|
|
444
|
-
printError(`Unknown model "${modelId}". Available: phi-3-mini, gemma-2-2b, tinyllama`);
|
|
503
|
+
printError(`Unknown model "${modelId}". Available: phi-3-mini, gemma-2-2b, tinyllama, lfm2.5-1.2b-instruct, lfm2.5-1.2b-thinking`);
|
|
445
504
|
process.exit(1);
|
|
446
505
|
}
|
|
447
506
|
|
|
@@ -504,7 +563,7 @@ models
|
|
|
504
563
|
|
|
505
564
|
models
|
|
506
565
|
.command('fetch <model-id>')
|
|
507
|
-
.description('Download a model (phi-3-mini, gemma-2-2b, tinyllama)')
|
|
566
|
+
.description('Download a model (phi-3-mini, gemma-2-2b, tinyllama, lfm2.5-1.2b-instruct, lfm2.5-1.2b-thinking)')
|
|
508
567
|
.action(async (modelId: string) => { await cmdModelsFetch(modelId); });
|
|
509
568
|
|
|
510
569
|
program.parse();
|
|
@@ -0,0 +1,569 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
// ============================================================
|
|
3
|
+
// HIVE WORKER CLI
|
|
4
|
+
// Usage:
|
|
5
|
+
// npx @gns-foundation/hive-worker join [--handle <@name>] [--rpc-port <port>]
|
|
6
|
+
// npx @gns-foundation/hive-worker status
|
|
7
|
+
// npx @gns-foundation/hive-worker leave
|
|
8
|
+
// npx @gns-foundation/hive-worker whoami
|
|
9
|
+
// npx @gns-foundation/hive-worker models list
|
|
10
|
+
// npx @gns-foundation/hive-worker models fetch <model-id>
|
|
11
|
+
// ============================================================
|
|
12
|
+
|
|
13
|
+
import { program } from 'commander';
|
|
14
|
+
import { loadOrCreateIdentity, shortPk, identityPath } from './identity.js';
|
|
15
|
+
import { initWorkerMobyDB, shutdownMobyDB } from "./mobydb_hooks.js";
|
|
16
|
+
import { detectHardware, detectGeo } from './hardware.js';
|
|
17
|
+
import { registerNode, heartbeat, deregisterNode, fetchSwarmStats, fetchTokenBalance } from './registry.js';
|
|
18
|
+
import { findRpcBinary, startRpcServer, stopRpcServer, DEFAULT_RPC_PORT, type LlamaRpcHandle } from './llama.js';
|
|
19
|
+
import {
|
|
20
|
+
printBanner, printSuccess, printInfo, printWarn, printError,
|
|
21
|
+
renderDashboard, clearDashboard, hideCursor,
|
|
22
|
+
type DashboardState,
|
|
23
|
+
} from './dashboard.js';
|
|
24
|
+
import { startJobPoller, claimJobPipeline, markComputing, postResult, postFailure, postChunk, type PollController, type HiveJob, type JobResult } from './jobs.js';
|
|
25
|
+
import { executeJob, downloadModel, listCachedModels, resolveModelUrl, findLlamaCli } from './executor.js';
|
|
26
|
+
import { creditWorker, SPLIT } from './settlement.js';
|
|
27
|
+
|
|
28
|
+
import os from 'os';
|
|
29
|
+
|
|
30
|
+
// ─── Get local LAN IP ─────────────────────────────────────────
|
|
31
|
+
function getLocalIp(): string | null {
|
|
32
|
+
const nets = os.networkInterfaces();
|
|
33
|
+
for (const name of Object.keys(nets)) {
|
|
34
|
+
for (const net of nets[name] ?? []) {
|
|
35
|
+
if (net.family === 'IPv4' && !net.internal) return net.address;
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
return null;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
const HEARTBEAT_INTERVAL_MS = 30_000;
|
|
42
|
+
const STATS_REFRESH_INTERVAL_MS = 60_000;
|
|
43
|
+
const DASHBOARD_REFRESH_MS = 1_000;
|
|
44
|
+
const JOB_POLL_INTERVAL_MS = 5_000; // poll for new jobs every 5s
|
|
45
|
+
|
|
46
|
+
// ─── JOIN command ─────────────────────────────────────────────
|
|
47
|
+
|
|
48
|
+
async function cmdJoin(opts: {
|
|
49
|
+
handle?: string;
|
|
50
|
+
rpcPort?: string;
|
|
51
|
+
skipRpc?: boolean;
|
|
52
|
+
noJobs?: boolean;
|
|
53
|
+
}): Promise<void> {
|
|
54
|
+
printBanner();
|
|
55
|
+
|
|
56
|
+
// 1. Identity
|
|
57
|
+
const { identity, isNew } = loadOrCreateIdentity();
|
|
58
|
+
if (isNew) {
|
|
59
|
+
printSuccess(`New identity generated: ${shortPk(identity.pk)}`);
|
|
60
|
+
printInfo(`Stored at ${identityPath()}`);
|
|
61
|
+
} else {
|
|
62
|
+
printSuccess(`Identity loaded: ${shortPk(identity.pk)}`);
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
// 2. Handle
|
|
66
|
+
const handle = opts.handle?.replace(/^@/, '') ?? null;
|
|
67
|
+
if (handle) printSuccess(`Handle: @${handle}`);
|
|
68
|
+
else printInfo('No handle — use --handle @yourname to claim one.');
|
|
69
|
+
|
|
70
|
+
// 3. Hardware
|
|
71
|
+
printInfo('Detecting hardware...');
|
|
72
|
+
const hw = detectHardware();
|
|
73
|
+
printSuccess(`${hw.cpuCores}-core ${hw.arch} · ${hw.ramGb} GB RAM · ~${hw.estimatedTflops} TFLOPS`);
|
|
74
|
+
if (hw.gpuModel) printSuccess(`GPU: ${hw.gpuModel}`);
|
|
75
|
+
|
|
76
|
+
// 4. Geo
|
|
77
|
+
printInfo('Detecting location...');
|
|
78
|
+
const geo = await detectGeo();
|
|
79
|
+
printSuccess(`Location: ${geo.city}, ${geo.country} · H3 cell ${geo.h3Cell.slice(0, 10)}…`);
|
|
80
|
+
|
|
81
|
+
// Phase 4: Embedded MobyDB
|
|
82
|
+
initWorkerMobyDB(identity.pk, geo.h3Cell);
|
|
83
|
+
|
|
84
|
+
// 5. llama-cli check (for job execution)
|
|
85
|
+
const llamaCliAvailable = !!findLlamaCli();
|
|
86
|
+
if (!opts.noJobs) {
|
|
87
|
+
if (llamaCliAvailable) {
|
|
88
|
+
printSuccess('llama-cli found — job execution enabled');
|
|
89
|
+
} else {
|
|
90
|
+
printWarn('llama-cli not found — joining as observer (no inference jobs).');
|
|
91
|
+
printWarn('Install llama.cpp to earn GNS: https://github.com/ggerganov/llama.cpp');
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
// logs declared early so the rpc-server callback can reference it immediately
|
|
96
|
+
const logs: string[] = [`Joined at ${new Date().toISOString()}`];
|
|
97
|
+
|
|
98
|
+
// 6. RPC server (for pipeline/shard mode)
|
|
99
|
+
let rpcHandle: LlamaRpcHandle | null = null;
|
|
100
|
+
const rpcBinary = findRpcBinary();
|
|
101
|
+
const rpcPort = opts.skipRpc ? null : parseInt(opts.rpcPort ?? String(DEFAULT_RPC_PORT), 10);
|
|
102
|
+
|
|
103
|
+
if (!opts.skipRpc && rpcBinary) {
|
|
104
|
+
printInfo(`Starting llama.cpp rpc-server on :${rpcPort}...`);
|
|
105
|
+
rpcHandle = startRpcServer(rpcPort!, (line) => logs.push(line));
|
|
106
|
+
if (rpcHandle) printSuccess(`RPC server PID ${rpcHandle.pid} on :${rpcPort}`);
|
|
107
|
+
} else if (!opts.skipRpc) {
|
|
108
|
+
printWarn('rpc-server not found — pipeline mode disabled.');
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
// 7. Register
|
|
112
|
+
printInfo('Registering with GEIANT Hive swarm...');
|
|
113
|
+
try {
|
|
114
|
+
const cachedModelIds = listCachedModels().map(m => m.modelId);
|
|
115
|
+
const localIp = rpcHandle ? getLocalIp() : null;
|
|
116
|
+
await registerNode(identity, hw, geo, handle, rpcHandle?.port ?? null, cachedModelIds, localIp, !!rpcHandle);
|
|
117
|
+
printSuccess('Registered in swarm registry');
|
|
118
|
+
} catch (err) {
|
|
119
|
+
printError(`Registration failed: ${err instanceof Error ? err.message : String(err)}`);
|
|
120
|
+
printWarn('Continuing in offline mode — will retry on heartbeat');
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
// 8. Initial stats
|
|
124
|
+
let swarmStats = await fetchSwarmStats();
|
|
125
|
+
let tokensEarned = await fetchTokenBalance(identity.pk);
|
|
126
|
+
printSuccess(`Swarm: ${swarmStats.activeNodes} active nodes · ${swarmStats.totalTflops} TFLOPS`);
|
|
127
|
+
|
|
128
|
+
// Cached models
|
|
129
|
+
const cachedModels = listCachedModels();
|
|
130
|
+
if (cachedModels.length > 0) {
|
|
131
|
+
printSuccess(`Models cached: ${cachedModels.map(m => m.modelId).join(', ')}`);
|
|
132
|
+
} else if (!opts.noJobs && llamaCliAvailable) {
|
|
133
|
+
printWarn('No models cached. Fetch one: hive-worker models fetch phi-3-mini');
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
console.log('');
|
|
137
|
+
printInfo('Entering live mode. Press Ctrl+C to disconnect.\n');
|
|
138
|
+
|
|
139
|
+
// ─── Live state ───────────────────────────────────────────────
|
|
140
|
+
|
|
141
|
+
hideCursor();
|
|
142
|
+
|
|
143
|
+
const startTime = Date.now();
|
|
144
|
+
let heartbeatCount = 0;
|
|
145
|
+
let lastHeartbeat: Date | null = null;
|
|
146
|
+
let jobsCompleted = 0;
|
|
147
|
+
|
|
148
|
+
const addLog = (msg: string) => {
|
|
149
|
+
logs.push(`[${new Date().toLocaleTimeString()}] ${msg}`);
|
|
150
|
+
if (logs.length > 50) logs.splice(0, logs.length - 50);
|
|
151
|
+
};
|
|
152
|
+
|
|
153
|
+
const state: DashboardState = {
|
|
154
|
+
pk: identity.pk,
|
|
155
|
+
handle,
|
|
156
|
+
status: 'idle',
|
|
157
|
+
hw,
|
|
158
|
+
geo,
|
|
159
|
+
rpcPort: rpcHandle?.port ?? null,
|
|
160
|
+
rpcAvailable: !!rpcBinary,
|
|
161
|
+
tokensEarned,
|
|
162
|
+
swarmStats,
|
|
163
|
+
uptimeSeconds: 0,
|
|
164
|
+
heartbeatCount: 0,
|
|
165
|
+
lastHeartbeat: null,
|
|
166
|
+
logs,
|
|
167
|
+
};
|
|
168
|
+
|
|
169
|
+
// ─── Heartbeat timer ─────────────────────────────────────────
|
|
170
|
+
|
|
171
|
+
const heartbeatTimer = setInterval(async () => {
|
|
172
|
+
try {
|
|
173
|
+
const freshModels = listCachedModels().map(m => m.modelId);
|
|
174
|
+
const localIp = rpcHandle ? getLocalIp() : null;
|
|
175
|
+
await heartbeat(identity.pk, state.status, freshModels, localIp);
|
|
176
|
+
heartbeatCount++;
|
|
177
|
+
lastHeartbeat = new Date();
|
|
178
|
+
state.heartbeatCount = heartbeatCount;
|
|
179
|
+
state.lastHeartbeat = lastHeartbeat;
|
|
180
|
+
addLog(`Heartbeat #${heartbeatCount}`);
|
|
181
|
+
} catch (err) {
|
|
182
|
+
addLog(`Heartbeat error: ${err instanceof Error ? err.message : String(err)}`);
|
|
183
|
+
}
|
|
184
|
+
}, HEARTBEAT_INTERVAL_MS);
|
|
185
|
+
|
|
186
|
+
// ─── Stats refresh timer ─────────────────────────────────────
|
|
187
|
+
|
|
188
|
+
const statsTimer = setInterval(async () => {
|
|
189
|
+
try {
|
|
190
|
+
swarmStats = await fetchSwarmStats();
|
|
191
|
+
tokensEarned = await fetchTokenBalance(identity.pk);
|
|
192
|
+
state.swarmStats = swarmStats;
|
|
193
|
+
state.tokensEarned = tokensEarned;
|
|
194
|
+
} catch { /* non-fatal */ }
|
|
195
|
+
}, STATS_REFRESH_INTERVAL_MS);
|
|
196
|
+
|
|
197
|
+
// ─── Job polling loop ─────────────────────────────────────────
|
|
198
|
+
|
|
199
|
+
let jobPoller: PollController | null = null;
|
|
200
|
+
|
|
201
|
+
// When RPC server is running, pipeline poller handles everything
|
|
202
|
+
// Standard poller only runs when no RPC server is available
|
|
203
|
+
const useStandardPoller = !rpcHandle;
|
|
204
|
+
|
|
205
|
+
if (!opts.noJobs && useStandardPoller && (llamaCliAvailable || cachedModels.length > 0)) {
|
|
206
|
+
jobPoller = startJobPoller({
|
|
207
|
+
workerPk: identity.pk,
|
|
208
|
+
h3Cell: geo.h3Cell,
|
|
209
|
+
intervalMs: JOB_POLL_INTERVAL_MS,
|
|
210
|
+
|
|
211
|
+
onJobClaimed: (job: HiveJob) => {
|
|
212
|
+
state.status = 'computing';
|
|
213
|
+
addLog(`Job claimed: ${job.id.slice(0, 8)} · model=${job.model_id} · ${job.max_tokens} tokens`);
|
|
214
|
+
},
|
|
215
|
+
|
|
216
|
+
onJobCompleted: (job: HiveJob, result: JobResult) => {
|
|
217
|
+
state.status = 'idle';
|
|
218
|
+
jobsCompleted++;
|
|
219
|
+
addLog(
|
|
220
|
+
`Job ${job.id.slice(0, 8)} done · ${result.tokensPerSecond} tok/s · ` +
|
|
221
|
+
`earned ${(job.gns_reward * SPLIT.WORKER).toFixed(4)} GNS`,
|
|
222
|
+
);
|
|
223
|
+
// Optimistic local credit (server-side Stellar TX is async)
|
|
224
|
+
state.tokensEarned += job.gns_reward * SPLIT.WORKER;
|
|
225
|
+
// Kick off background settlement
|
|
226
|
+
creditWorker(identity.pk, job.id, job.gns_reward)
|
|
227
|
+
.then(rec => {
|
|
228
|
+
if (rec.stellarTxHash) {
|
|
229
|
+
addLog(`Settled · TX ${rec.stellarTxHash.slice(0, 12)}…`);
|
|
230
|
+
}
|
|
231
|
+
})
|
|
232
|
+
.catch(err => addLog(`Settlement error: ${err instanceof Error ? err.message : String(err)}`));
|
|
233
|
+
},
|
|
234
|
+
|
|
235
|
+
onJobFailed: (job: HiveJob, error: string) => {
|
|
236
|
+
state.status = 'idle';
|
|
237
|
+
addLog(`Job ${job.id.slice(0, 8)} failed: ${error.slice(0, 60)}`);
|
|
238
|
+
},
|
|
239
|
+
|
|
240
|
+
onLog: addLog,
|
|
241
|
+
|
|
242
|
+
executor: async (job: HiveJob, onToken?: (t: string) => void) => {
|
|
243
|
+
return executeJob(job, {
|
|
244
|
+
onToken,
|
|
245
|
+
onLog: addLog,
|
|
246
|
+
// No RPC peers in standard mode — solo execution
|
|
247
|
+
});
|
|
248
|
+
},
|
|
249
|
+
});
|
|
250
|
+
|
|
251
|
+
addLog('Job poller started — polling every 5s');
|
|
252
|
+
|
|
253
|
+
// ── Pipeline poll loop (only when RPC server is running) ──
|
|
254
|
+
// Polls for jobs and uses split inference when peers are available.
|
|
255
|
+
if (rpcHandle) {
|
|
256
|
+
const PIPELINE_INTERVAL = 3000; // faster than standard poll — pipeline takes priority
|
|
257
|
+
const pipelineTimer = setInterval(async () => {
|
|
258
|
+
if (state.status === 'computing') return; // already busy
|
|
259
|
+
|
|
260
|
+
try {
|
|
261
|
+
const result = await claimJobPipeline(
|
|
262
|
+
identity.pk,
|
|
263
|
+
geo.h3Cell,
|
|
264
|
+
);
|
|
265
|
+
if (!result) return;
|
|
266
|
+
|
|
267
|
+
const { job, peers } = result;
|
|
268
|
+
if (peers.length === 0) {
|
|
269
|
+
// No peers available — run solo on this worker
|
|
270
|
+
addLog(`Solo job ${job.id.slice(0, 8)} (no RPC peers available)`);
|
|
271
|
+
try {
|
|
272
|
+
await markComputing(job.id);
|
|
273
|
+
const execResult = await executeJob(job, { onLog: addLog });
|
|
274
|
+
await postResult(job.id, execResult);
|
|
275
|
+
jobsCompleted++;
|
|
276
|
+
addLog(`Solo job ${job.id.slice(0, 8)} done · ${execResult.tokensPerSecond} tok/s`);
|
|
277
|
+
state.tokensEarned += job.gns_reward * SPLIT.WORKER;
|
|
278
|
+
await creditWorker(identity.pk, job.id, job.gns_reward).catch(() => {});
|
|
279
|
+
} catch (execErr) {
|
|
280
|
+
const errMsg = execErr instanceof Error ? execErr.message : String(execErr);
|
|
281
|
+
await postFailure(job.id, errMsg).catch(() => {});
|
|
282
|
+
} finally {
|
|
283
|
+
state.status = 'idle';
|
|
284
|
+
}
|
|
285
|
+
return;
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
state.status = 'computing';
|
|
289
|
+
addLog(`Pipeline job ${job.id.slice(0, 8)} · ${peers.length} peer(s) · ${peers.map(p => p.rpc_host).join(', ')}`);
|
|
290
|
+
|
|
291
|
+
try {
|
|
292
|
+
await markComputing(job.id);
|
|
293
|
+
const execResult = await executeJob(job, {
|
|
294
|
+
onLog: addLog,
|
|
295
|
+
rpcPeers: peers,
|
|
296
|
+
});
|
|
297
|
+
await postResult(job.id, execResult);
|
|
298
|
+
jobsCompleted++;
|
|
299
|
+
addLog(`Pipeline job ${job.id.slice(0, 8)} done · ${execResult.tokensPerSecond} tok/s (distributed)`);
|
|
300
|
+
state.tokensEarned += job.gns_reward * SPLIT.WORKER;
|
|
301
|
+
} catch (execErr) {
|
|
302
|
+
const errMsg = execErr instanceof Error ? execErr.message : String(execErr);
|
|
303
|
+
await postFailure(job.id, errMsg).catch(() => {});
|
|
304
|
+
addLog(`Pipeline job ${job.id.slice(0, 8)} failed: ${errMsg.slice(0, 60)}`);
|
|
305
|
+
} finally {
|
|
306
|
+
state.status = 'idle';
|
|
307
|
+
}
|
|
308
|
+
} catch (err) {
|
|
309
|
+
// Non-fatal — log and continue
|
|
310
|
+
addLog(`Pipeline poll error: ${err instanceof Error ? err.message : String(err)}`);
|
|
311
|
+
}
|
|
312
|
+
}, PIPELINE_INTERVAL);
|
|
313
|
+
|
|
314
|
+
// Store timer for cleanup
|
|
315
|
+
const origStop = jobPoller!.stop.bind(jobPoller);
|
|
316
|
+
jobPoller = {
|
|
317
|
+
...jobPoller!,
|
|
318
|
+
stop: () => { origStop(); clearInterval(pipelineTimer); },
|
|
319
|
+
};
|
|
320
|
+
addLog('Pipeline poller started — RPC server ready for distributed inference');
|
|
321
|
+
}
|
|
322
|
+
} else if (opts.noJobs) {
|
|
323
|
+
addLog('Job execution disabled (--no-jobs flag)');
|
|
324
|
+
} else if (!rpcHandle) {
|
|
325
|
+
addLog('Observer mode — install llama.cpp + fetch a model to execute jobs');
|
|
326
|
+
}
|
|
327
|
+
|
|
328
|
+
// ── Pipeline poller — runs when RPC server is available ──────
|
|
329
|
+
// This is the primary job handler when rpc-server is running.
|
|
330
|
+
// It claims jobs and runs them distributed (or solo if no peers).
|
|
331
|
+
if (!opts.noJobs && rpcHandle && (llamaCliAvailable || cachedModels.length > 0)) {
|
|
332
|
+
const PIPELINE_INTERVAL = 3000;
|
|
333
|
+
const pipelineTimer = setInterval(async () => {
|
|
334
|
+
if (state.status === 'computing') return;
|
|
335
|
+
|
|
336
|
+
try {
|
|
337
|
+
const result = await claimJobPipeline(identity.pk, geo.h3Cell);
|
|
338
|
+
if (!result) return;
|
|
339
|
+
|
|
340
|
+
const { job, peers } = result;
|
|
341
|
+
state.status = 'computing';
|
|
342
|
+
|
|
343
|
+
if (peers.length === 0) {
|
|
344
|
+
addLog(`Solo job ${job.id.slice(0, 8)} · model=${job.model_id}`);
|
|
345
|
+
} else {
|
|
346
|
+
addLog(`Pipeline job ${job.id.slice(0, 8)} · ${peers.length} peer(s) · ${peers.map(p => p.rpc_host).join(', ')}`);
|
|
347
|
+
}
|
|
348
|
+
|
|
349
|
+
try {
|
|
350
|
+
await markComputing(job.id);
|
|
351
|
+
const execResult = await executeJob(job, {
|
|
352
|
+
onLog: addLog,
|
|
353
|
+
rpcPeers: peers.length > 0 ? peers : undefined,
|
|
354
|
+
});
|
|
355
|
+
await postResult(job.id, execResult);
|
|
356
|
+
jobsCompleted++;
|
|
357
|
+
const mode = peers.length > 0 ? 'distributed' : 'solo';
|
|
358
|
+
addLog(`Job ${job.id.slice(0, 8)} done · ${execResult.tokensPerSecond} tok/s (${mode})`);
|
|
359
|
+
state.tokensEarned += job.gns_reward * SPLIT.WORKER;
|
|
360
|
+
await creditWorker(identity.pk, job.id, job.gns_reward).catch(() => {});
|
|
361
|
+
} catch (execErr) {
|
|
362
|
+
const errMsg = execErr instanceof Error ? execErr.message : String(execErr);
|
|
363
|
+
await postFailure(job.id, errMsg).catch(() => {});
|
|
364
|
+
addLog(`Job ${job.id.slice(0, 8)} failed: ${errMsg.slice(0, 60)}`);
|
|
365
|
+
} finally {
|
|
366
|
+
state.status = 'idle';
|
|
367
|
+
}
|
|
368
|
+
} catch (err) {
|
|
369
|
+
// Non-fatal poll errors
|
|
370
|
+
}
|
|
371
|
+
}, PIPELINE_INTERVAL);
|
|
372
|
+
|
|
373
|
+
// Wrap timer in jobPoller for cleanup
|
|
374
|
+
const prevStop = jobPoller?.stop.bind(jobPoller);
|
|
375
|
+
jobPoller = {
|
|
376
|
+
stop: () => { prevStop?.(); clearInterval(pipelineTimer); },
|
|
377
|
+
} as any;
|
|
378
|
+
|
|
379
|
+
addLog('Pipeline poller started — RPC server ready for distributed inference');
|
|
380
|
+
}
|
|
381
|
+
|
|
382
|
+
// ─── Dashboard refresh ────────────────────────────────────────
|
|
383
|
+
|
|
384
|
+
const dashTimer = setInterval(() => {
|
|
385
|
+
state.uptimeSeconds = Math.floor((Date.now() - startTime) / 1000);
|
|
386
|
+
renderDashboard(state);
|
|
387
|
+
}, DASHBOARD_REFRESH_MS);
|
|
388
|
+
|
|
389
|
+
renderDashboard(state);
|
|
390
|
+
|
|
391
|
+
// ─── Graceful shutdown ────────────────────────────────────────
|
|
392
|
+
|
|
393
|
+
const shutdown = async (signal: string) => {
|
|
394
|
+
clearInterval(heartbeatTimer);
|
|
395
|
+
clearInterval(statsTimer);
|
|
396
|
+
clearInterval(dashTimer);
|
|
397
|
+
jobPoller?.stop();
|
|
398
|
+
shutdownMobyDB();
|
|
399
|
+
clearDashboard();
|
|
400
|
+
|
|
401
|
+
console.log('\n');
|
|
402
|
+
printInfo(`Received ${signal}. Disconnecting gracefully...`);
|
|
403
|
+
|
|
404
|
+
try {
|
|
405
|
+
await deregisterNode(identity.pk);
|
|
406
|
+
printSuccess('Marked offline in swarm registry');
|
|
407
|
+
} catch { /* best-effort */ }
|
|
408
|
+
|
|
409
|
+
if (rpcHandle) {
|
|
410
|
+
stopRpcServer(rpcHandle);
|
|
411
|
+
printSuccess('RPC server stopped');
|
|
412
|
+
}
|
|
413
|
+
|
|
414
|
+
const earned = await fetchTokenBalance(identity.pk).catch(() => state.tokensEarned);
|
|
415
|
+
printSuccess(`Session complete · ${jobsCompleted} jobs · ${earned.toFixed(4)} GNS earned`);
|
|
416
|
+
console.log('');
|
|
417
|
+
process.exit(0);
|
|
418
|
+
};
|
|
419
|
+
|
|
420
|
+
process.on('SIGINT', () => shutdown('SIGINT'));
|
|
421
|
+
process.on('SIGTERM', () => shutdown('SIGTERM'));
|
|
422
|
+
}
|
|
423
|
+
|
|
424
|
+
// ─── STATUS command ───────────────────────────────────────────
|
|
425
|
+
|
|
426
|
+
async function cmdStatus(): Promise<void> {
|
|
427
|
+
printBanner();
|
|
428
|
+
const { identity } = loadOrCreateIdentity();
|
|
429
|
+
printInfo(`Identity: ${shortPk(identity.pk)}`);
|
|
430
|
+
|
|
431
|
+
const stats = await fetchSwarmStats();
|
|
432
|
+
const tokens = await fetchTokenBalance(identity.pk);
|
|
433
|
+
printInfo(`Swarm: ${stats.activeNodes} active nodes · ${stats.totalTflops} TFLOPS`);
|
|
434
|
+
printInfo(`GNS earned: ${tokens.toFixed(4)}`);
|
|
435
|
+
|
|
436
|
+
const models = listCachedModels();
|
|
437
|
+
if (models.length > 0) {
|
|
438
|
+
printInfo(`Cached models: ${models.map(m => `${m.modelId} (${m.sizeMb} MB)`).join(', ')}`);
|
|
439
|
+
} else {
|
|
440
|
+
printWarn('No models cached. Run: hive-worker models fetch phi-3-mini');
|
|
441
|
+
}
|
|
442
|
+
|
|
443
|
+
const llamaCli = findLlamaCli();
|
|
444
|
+
const rpcBin = findRpcBinary();
|
|
445
|
+
printInfo(`llama-cli: ${llamaCli ?? 'not found'}`);
|
|
446
|
+
printInfo(`rpc-server: ${rpcBin ?? 'not found'}`);
|
|
447
|
+
}
|
|
448
|
+
|
|
449
|
+
// ─── LEAVE command ────────────────────────────────────────────
|
|
450
|
+
|
|
451
|
+
async function cmdLeave(): Promise<void> {
|
|
452
|
+
printBanner();
|
|
453
|
+
const { identity } = loadOrCreateIdentity();
|
|
454
|
+
printInfo(`Deregistering ${shortPk(identity.pk)} from swarm...`);
|
|
455
|
+
await deregisterNode(identity.pk);
|
|
456
|
+
printSuccess('Marked offline. Identity preserved at ' + identityPath());
|
|
457
|
+
printInfo('Run `hive-worker join` to rejoin.');
|
|
458
|
+
}
|
|
459
|
+
|
|
460
|
+
// ─── WHOAMI command ───────────────────────────────────────────
|
|
461
|
+
|
|
462
|
+
async function cmdWhoami(): Promise<void> {
|
|
463
|
+
const { identity, isNew } = loadOrCreateIdentity();
|
|
464
|
+
if (isNew) printWarn('No identity found — a new one was just generated.');
|
|
465
|
+
console.log('');
|
|
466
|
+
console.log(` pk: ${identity.pk}`);
|
|
467
|
+
console.log(` short: ${shortPk(identity.pk)}`);
|
|
468
|
+
console.log(` created: ${identity.createdAt}`);
|
|
469
|
+
console.log(` file: ${identityPath()}`);
|
|
470
|
+
console.log('');
|
|
471
|
+
}
|
|
472
|
+
|
|
473
|
+
// ─── MODELS LIST command ──────────────────────────────────────
|
|
474
|
+
|
|
475
|
+
async function cmdModelsList(): Promise<void> {
|
|
476
|
+
const cached = listCachedModels();
|
|
477
|
+
console.log('');
|
|
478
|
+
if (cached.length === 0) {
|
|
479
|
+
printWarn('No models cached yet.');
|
|
480
|
+
printInfo('Fetch one: hive-worker models fetch phi-3-mini');
|
|
481
|
+
} else {
|
|
482
|
+
printSuccess(`${cached.length} model(s) cached:`);
|
|
483
|
+
for (const m of cached) {
|
|
484
|
+
console.log(` ${m.modelId.padEnd(20)} ${m.sizeMb} MB`);
|
|
485
|
+
}
|
|
486
|
+
}
|
|
487
|
+
console.log('');
|
|
488
|
+
printInfo('Available models: phi-3-mini, gemma-2-2b, tinyllama');
|
|
489
|
+
console.log('');
|
|
490
|
+
}
|
|
491
|
+
|
|
492
|
+
// ─── MODELS FETCH command ─────────────────────────────────────
|
|
493
|
+
|
|
494
|
+
async function cmdModelsFetch(modelId: string): Promise<void> {
|
|
495
|
+
printBanner();
|
|
496
|
+
const job = {
|
|
497
|
+
model_id: modelId,
|
|
498
|
+
model_url: null,
|
|
499
|
+
} as Pick<HiveJob, 'model_id' | 'model_url'>;
|
|
500
|
+
|
|
501
|
+
const url = resolveModelUrl(job as HiveJob);
|
|
502
|
+
if (!url) {
|
|
503
|
+
printError(`Unknown model "${modelId}". Available: phi-3-mini, gemma-2-2b, tinyllama`);
|
|
504
|
+
process.exit(1);
|
|
505
|
+
}
|
|
506
|
+
|
|
507
|
+
printInfo(`Downloading ${modelId}...`);
|
|
508
|
+
printInfo(`Source: ${url}`);
|
|
509
|
+
console.log('');
|
|
510
|
+
|
|
511
|
+
let lastPct = -1;
|
|
512
|
+
const dest = await downloadModel(modelId, url, (pct, mbDone, mbTotal) => {
|
|
513
|
+
if (pct !== lastPct) {
|
|
514
|
+
process.stdout.write(`\r Downloading... ${pct}% (${mbDone} / ${mbTotal} MB) `);
|
|
515
|
+
lastPct = pct;
|
|
516
|
+
}
|
|
517
|
+
});
|
|
518
|
+
|
|
519
|
+
console.log('');
|
|
520
|
+
printSuccess(`Saved to ${dest}`);
|
|
521
|
+
printInfo('You can now run: hive-worker join');
|
|
522
|
+
}
|
|
523
|
+
|
|
524
|
+
// ─── CLI definition ───────────────────────────────────────────
|
|
525
|
+
|
|
526
|
+
program
|
|
527
|
+
.name('hive-worker')
|
|
528
|
+
.description('GEIANT Hive compute node — earn GNS tokens for idle compute')
|
|
529
|
+
.version('0.1.0');
|
|
530
|
+
|
|
531
|
+
program
|
|
532
|
+
.command('join')
|
|
533
|
+
.description('Join the GEIANT Hive swarm and start earning GNS')
|
|
534
|
+
.option('--handle <n>', 'Your GNS handle (e.g. @alice)')
|
|
535
|
+
.option('--rpc-port <port>', 'Port for llama.cpp rpc-server', String(DEFAULT_RPC_PORT))
|
|
536
|
+
.option('--skip-rpc', 'Skip starting rpc-server')
|
|
537
|
+
.option('--no-jobs', 'Observer only — do not execute inference jobs')
|
|
538
|
+
.action(async (opts) => { await cmdJoin(opts); });
|
|
539
|
+
|
|
540
|
+
program
|
|
541
|
+
.command('status')
|
|
542
|
+
.description('Show swarm status and your token balance')
|
|
543
|
+
.action(async () => { await cmdStatus(); });
|
|
544
|
+
|
|
545
|
+
program
|
|
546
|
+
.command('leave')
|
|
547
|
+
.description('Gracefully disconnect from the swarm')
|
|
548
|
+
.action(async () => { await cmdLeave(); });
|
|
549
|
+
|
|
550
|
+
program
|
|
551
|
+
.command('whoami')
|
|
552
|
+
.description('Show your Hive identity')
|
|
553
|
+
.action(async () => { await cmdWhoami(); });
|
|
554
|
+
|
|
555
|
+
const models = program
|
|
556
|
+
.command('models')
|
|
557
|
+
.description('Manage locally cached inference models');
|
|
558
|
+
|
|
559
|
+
models
|
|
560
|
+
.command('list')
|
|
561
|
+
.description('List cached models')
|
|
562
|
+
.action(async () => { await cmdModelsList(); });
|
|
563
|
+
|
|
564
|
+
models
|
|
565
|
+
.command('fetch <model-id>')
|
|
566
|
+
.description('Download a model (phi-3-mini, gemma-2-2b, tinyllama)')
|
|
567
|
+
.action(async (modelId: string) => { await cmdModelsFetch(modelId); });
|
|
568
|
+
|
|
569
|
+
program.parse();
|
package/src/executor.ts
CHANGED
|
@@ -69,6 +69,10 @@ const KNOWN_MODELS: Record<string, string> = {
|
|
|
69
69
|
'https://huggingface.co/bartowski/gemma-2-2b-it-GGUF/resolve/main/gemma-2-2b-it-Q4_K_M.gguf',
|
|
70
70
|
'tinyllama':
|
|
71
71
|
'https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF/resolve/main/tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf',
|
|
72
|
+
'lfm2.5-1.2b-instruct':
|
|
73
|
+
'https://huggingface.co/LiquidAI/LFM2.5-1.2B-Instruct-GGUF/resolve/main/LFM2.5-1.2B-Instruct-Q4_K_M.gguf',
|
|
74
|
+
'lfm2.5-1.2b-thinking':
|
|
75
|
+
'https://huggingface.co/LiquidAI/LFM2.5-1.2B-Thinking-GGUF/resolve/main/LFM2.5-1.2B-Thinking-Q4_K_M.gguf',
|
|
72
76
|
};
|
|
73
77
|
|
|
74
78
|
export function resolveModelUrl(job: HiveJob): string | null {
|