npm - @gns-foundation/hive-worker - Versions diffs - 0.1.10 → 0.5.0 - Mend

@gns-foundation/hive-worker 0.1.10 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

package/dist/cli.js +61 -4
package/dist/cli.js.map +1 -1
package/dist/executor.js +2 -0
package/dist/executor.js.map +1 -1
package/dist/mobydb.d.ts +132 -0
package/dist/mobydb.js +379 -0
package/dist/mobydb.js.map +1 -0
package/dist/mobydb_hooks.d.ts +54 -0
package/dist/mobydb_hooks.js +136 -0
package/dist/mobydb_hooks.js.map +1 -0
package/package.json +10 -8
package/package.json.v0.5.bak.20260515T094956Z +43 -0
package/src/cli.ts +63 -4
package/src/cli.ts.v0.5.bak.20260515T094956Z +569 -0
package/src/executor.ts +4 -0
package/src/executor.ts.v0.5.bak.20260515T094956Z +279 -0
package/src/mobydb.ts +558 -0
package/src/mobydb_hooks.ts +173 -0

package/src/executor.ts.v0.5.bak.20260515T094956Z ADDED Viewed

@@ -0,0 +1,279 @@
+// ============================================================
+// HIVE WORKER — EXECUTOR
+// Runs llama-cli for an assigned job.
+//
+// Two modes:
+//   pipeline  — rpc-server already running, orchestrator calls us
+//               (worker just tracks status, no local llama-cli)
+//   solo      — worker claims and runs the whole model locally
+//               (for small models: phi-3-mini, gemma-2-2b, etc.)
+//
+// Solo mode is the active path for hive-worker v0.1 / v0.2.
+// Pipeline mode is the full swarm path (Phase 3 whitepaper).
+// ============================================================
+import { spawn } from 'child_process';
+import { execSync } from 'child_process';
+import os from 'os';
+import path from 'path';
+import type { HiveJob, JobResult, RpcPeer } from './jobs.js';
+// ─── Binary detection ─────────────────────────────────────────
+const LLAMA_CLI_CANDIDATES = [
+  // llama-completion: pure completion mode, clean stdout, no banner
+  `${os.homedir()}/llama.cpp/build/bin/llama-completion`,
+  `${os.homedir()}/llama.cpp/build/llama-completion`,
+  'llama-completion',
+  // Fallback to llama-cli
+  `${os.homedir()}/llama.cpp/build/bin/llama-cli`,
+  `${os.homedir()}/llama.cpp/build/llama-cli`,
+  'llama-cli',
+];
+export function findLlamaCli(): string | null {
+  for (const candidate of LLAMA_CLI_CANDIDATES) {
+    try {
+      execSync(`test -f "${candidate}" || which "${candidate}" 2>/dev/null`, { timeout: 2000 });
+      return candidate;
+    } catch { /* not found */ }
+  }
+  return null;
+}
+// ─── Model cache ──────────────────────────────────────────────
+const MODEL_CACHE_DIR = path.join(os.homedir(), '.hive', 'models');
+import fs from 'fs';
+export function ensureModelCacheDir(): void {
+  fs.mkdirSync(MODEL_CACHE_DIR, { recursive: true });
+}
+export function modelCachePath(modelId: string): string {
+  // Normalize: "phi-3-mini" → "phi-3-mini.gguf"
+  const filename = modelId.endsWith('.gguf') ? modelId : `${modelId}.gguf`;
+  return path.join(MODEL_CACHE_DIR, filename);
+}
+export function isModelCached(modelId: string): boolean {
+  return fs.existsSync(modelCachePath(modelId));
+}
+// Known small models with public GGUF URLs (Q4_K_M quantizations)
+const KNOWN_MODELS: Record<string, string> = {
+  'phi-3-mini':
+    'https://huggingface.co/microsoft/Phi-3-mini-4k-instruct-gguf/resolve/main/Phi-3-mini-4k-instruct-q4.gguf',
+  'gemma-2-2b':
+    'https://huggingface.co/bartowski/gemma-2-2b-it-GGUF/resolve/main/gemma-2-2b-it-Q4_K_M.gguf',
+  'tinyllama':
+    'https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF/resolve/main/tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf',
+};
+export function resolveModelUrl(job: HiveJob): string | null {
+  if (job.model_url) return job.model_url;
+  return KNOWN_MODELS[job.model_id] ?? null;
+}
+// ─── Download model (streaming, with progress) ───────────────
+export async function downloadModel(
+  modelId: string,
+  url: string,
+  onProgress: (pct: number, mbDone: number, mbTotal: number) => void,
+): Promise<string> {
+  ensureModelCacheDir();
+  const dest = modelCachePath(modelId);
+  const tmpDest = dest + '.download';
+  const resp = await fetch(url, { signal: AbortSignal.timeout(300_000) });
+  if (!resp.ok) throw new Error(`Model download failed: ${resp.status}`);
+  const totalBytes = parseInt(resp.headers.get('content-length') ?? '0', 10);
+  const totalMb = totalBytes / 1024 / 1024;
+  const writer = fs.createWriteStream(tmpDest);
+  let downloaded = 0;
+  if (!resp.body) throw new Error('No response body');
+  const reader = resp.body.getReader();
+  while (true) {
+    const { done, value } = await reader.read();
+    if (done) break;
+    writer.write(value);
+    downloaded += value.length;
+    if (totalBytes > 0) {
+      onProgress(
+        Math.round((downloaded / totalBytes) * 100),
+        Math.round(downloaded / 1024 / 1024),
+        Math.round(totalMb),
+      );
+    }
+  }
+  await new Promise<void>((res, rej) => {
+    writer.end(() => res());
+    writer.on('error', rej);
+  });
+  fs.renameSync(tmpDest, dest);
+  return dest;
+}
+// ─── Execute inference ────────────────────────────────────────
+export interface ExecutorOptions {
+  onToken?: (token: string) => void;
+  onLog?:   (line: string) => void;
+  rpcPeers?: RpcPeer[];  // peer RPC servers for pipeline parallelism
+}
+export async function executeJob(
+  job: HiveJob,
+  onTokenOrOpts: ((t: string) => void) | ExecutorOptions = {},
+): Promise<JobResult> {
+  const opts: ExecutorOptions = typeof onTokenOrOpts === 'function'
+    ? { onToken: onTokenOrOpts }
+    : onTokenOrOpts;
+  const llamaCli = findLlamaCli();
+  if (!llamaCli) {
+    throw new Error(
+      'llama-cli not found. Install llama.cpp: https://github.com/ggerganov/llama.cpp',
+    );
+  }
+  const modelPath = modelCachePath(job.model_id);
+  if (!fs.existsSync(modelPath)) {
+    throw new Error(
+      `Model "${job.model_id}" not cached at ${modelPath}. ` +
+      `Run: hive-worker models fetch ${job.model_id}`,
+    );
+  }
+  return runLlamaCli(llamaCli, modelPath, job, opts);
+}
+function runLlamaCli(
+  binary: string,
+  modelPath: string,
+  job: HiveJob,
+  opts: ExecutorOptions,
+): Promise<JobResult> {
+  return new Promise((resolve, reject) => {
+    const startMs = Date.now();
+    const threads = Math.max(1, Math.floor(os.cpus().length / 2));
+    // llama-completion: pure completion mode, clean stdout, no banner
+    const args = [
+      '--model',     modelPath,
+      '--prompt',    job.prompt,
+      '--n-predict', String(job.max_tokens),
+      '--temp',      String(job.temperature),
+      '--ctx-size',  '4096',
+      '--threads',   String(threads),
+      '--single-turn',  // exit after one completion
+      '-ngl', '0',      // CPU-only (Metal tensor API disabled on pre-M5)
+    ];
+    // Pipeline parallelism: add --rpc for each peer worker
+    const peers = opts.rpcPeers ?? [];
+    for (const peer of peers) {
+      args.push('--rpc', `${peer.rpc_host}:${peer.rpc_port}`);
+      opts.onLog?.(`Pipeline peer: ${peer.rpc_host}:${peer.rpc_port} (${peer.tflops} TFLOPS)`);
+    }
+    if (peers.length > 0) {
+      opts.onLog?.(`Pipeline mode: ${peers.length + 1} nodes, splitting ${job.max_tokens} tokens`);
+    }
+    opts.onLog?.(`Executing ${job.model_id} (${job.max_tokens} tokens)`);
+    const proc = spawn(binary, args, {
+      stdio: ['ignore', 'pipe', 'pipe'],
+    });
+    let stdout = '';
+    let stderr = '';
+    let streamBuffer = '';  // buffer for <|assistant|> detection before streaming
+    let streamingStarted = false;
+    proc.stdout.on('data', (chunk: Buffer) => {
+      const text = chunk.toString();
+      stdout += text;
+      // Stream tokens to caller once we've passed the <|assistant|> marker
+      if (opts.onToken) {
+        if (!streamingStarted) {
+          streamBuffer += text;
+          const markerIdx = streamBuffer.lastIndexOf('<|assistant|>');
+          if (markerIdx !== -1) {
+            // Found marker — emit everything after it
+            streamingStarted = true;
+            const afterMarker = streamBuffer.slice(markerIdx + '<|assistant|>'.length);
+            if (afterMarker.length > 0) opts.onToken(afterMarker);
+          }
+        } else {
+          // Already past marker — emit each chunk immediately
+          opts.onToken(text);
+        }
+      }
+    });
+    proc.stderr.on('data', (chunk: Buffer) => {
+      stderr += chunk.toString();
+      opts.onLog?.(chunk.toString().slice(0, 60).trim());
+    });
+    proc.on('error', (err) => reject(new Error(`spawn error: ${err.message}`)));
+    proc.on('close', (code) => {
+      const wallMs = Date.now() - startMs;
+      opts.onLog?.(`stdout=${stdout.length}b stderr=${stderr.length}b code=${code}`);
+      if (code !== 0 && stdout.trim().length === 0) {
+        reject(new Error(`llama-cli exited ${code}: ${stderr.slice(-200)}`));
+        return;
+      }
+      // llama-completion echoes the prompt with chat template then the response.
+      // Strip everything up to and including <|assistant|> token.
+      let resultText = stdout.trim();
+      const assistantMarker = '<|assistant|>';
+      const assistantIdx = resultText.lastIndexOf(assistantMarker);
+      if (assistantIdx !== -1) {
+        resultText = resultText.slice(assistantIdx + assistantMarker.length).trim();
+      }
+      // Parse tok/s from stderr timing line
+      let tokensPerSecond = 0;
+      const tokensGenerated = resultText.split(/\s+/).filter(Boolean).length;
+      const tpsMatch = stderr.match(/eval time\s*=\s*([\d.]+)\s*ms\s*\/\s*(\d+)\s*tokens/);
+      if (tpsMatch) {
+        const evalMs = parseFloat(tpsMatch[1]);
+        tokensPerSecond = Math.round((parseInt(tpsMatch[2], 10) / evalMs) * 1000 * 10) / 10;
+      } else if (wallMs > 0) {
+        tokensPerSecond = Math.round((tokensGenerated / wallMs) * 1000 * 10) / 10;
+      }
+      resolve({ resultText, tokensGenerated, tokensPerSecond });
+    });
+    const deadline = new Date(job.timeout_at).getTime();
+    const remaining = deadline - Date.now() - 10_000;
+    if (remaining > 0) {
+      setTimeout(() => { proc.kill(); reject(new Error('Job timed out')); }, remaining);
+    }
+  });
+}
+// ─── List cached models ───────────────────────────────────────
+export function listCachedModels(): Array<{ modelId: string; sizeMb: number }> {
+  ensureModelCacheDir();
+  const files = fs.readdirSync(MODEL_CACHE_DIR).filter(f => f.endsWith('.gguf'));
+  return files.map(f => ({
+    modelId: f.replace('.gguf', ''),
+    sizeMb:  Math.round(fs.statSync(path.join(MODEL_CACHE_DIR, f)).size / 1024 / 1024),
+  }));
+}