npm - @voidwire/llm - Versions diffs - 0.1.0 - Mend

@voidwire/llm 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/README.md ADDED Viewed

@@ -0,0 +1,101 @@
+# llm
+LLM infrastructure CLI — manage embedding servers and shared LLM services.
+## Philosophy
+**One server, many consumers** — The embed server loads `nomic-embed-text-v1.5` once and serves requests at ~9ms. Every tool that needs embeddings (lore, sable, future tools) hits the same server through `@voidwire/llm-core`'s `embed()` function. No in-process model loading, no duplicate codepaths.
+**Idempotent lifecycle** — `start` checks health first, only spawns if needed. PID file for clean shutdown. Safe to call from multiple entry points (shell init, bin/sable, scripts).
+**JSON output** — All commands emit JSON to stdout. Human-readable diagnostics go to stderr. Pipes to jq, composes with other tools.
+## Quick Start
+```bash
+# Start the embed server (idempotent — no-op if already running)
+llm embed-server start
+# Check status
+llm embed-server status
+# Stop
+llm embed-server stop
+```
+## Installation
+```bash
+bun add -g @voidwire/llm
+```
+Or from source:
+```bash
+cd packages/llm && bun link
+```
+## Usage
+### `llm embed-server start`
+Start the embedding server. Checks `/health` first — if already running, returns immediately.
+```bash
+llm embed-server start
+# stdout: {"status":"started","pid":12345,"port":8090}
+# stderr: Embed server started (pid: 12345, port: 8090)
+```
+### `llm embed-server stop`
+Stop the server via PID file. Graceful SIGTERM.
+```bash
+llm embed-server stop
+# stdout: {"status":"stopped","pid":12345}
+# stderr: Embed server stopped (pid: 12345)
+```
+### `llm embed-server status`
+Report server state. JSON to stdout.
+```bash
+llm embed-server status
+# {"running":true,"port":8090,"model":"nomic-ai/nomic-embed-text-v1.5","dims":768}
+llm embed-server status | jq .running
+# true
+```
+## Library Usage
+```typescript
+import { startEmbedServer, stopEmbedServer, getEmbedServerStatus } from "@voidwire/llm";
+const result = await startEmbedServer();
+// { status: "started" | "already_running", pid?: number, port: number }
+```
+## Configuration
+The embed server reads its endpoint from `~/.config/llm-core/services.toml`:
+```toml
+[services.embed]
+adapter = "embed"
+base_url = "http://localhost:8090"
+key_required = false
+default_model = "nomic-ai/nomic-embed-text-v1.5"
+```
+PID file: `~/.local/share/llm/embed-server.pid`
+## Exit Codes
+| Code | Meaning |
+|------|---------|
+| 0 | Success |
+| 1 | Runtime error (server failed to start, etc.) |
+| 2 | Client error (bad arguments) |

package/cli.ts ADDED Viewed

@@ -0,0 +1,105 @@
+#!/usr/bin/env bun
+/**
+ * llm CLI - LLM tooling subcommands
+ *
+ * Manages embedding infrastructure. The embed server loads
+ * nomic-embed-text-v1.5 once on startup and serves embedding
+ * requests at ~9ms per query, eliminating cold start overhead.
+ *
+ * Usage:
+ *   llm embed-server start    Start the embed server (idempotent)
+ *   llm embed-server stop     Stop the embed server
+ *   llm embed-server status   Show embed server status
+ *
+ * All commands output JSON to stdout. Diagnostics to stderr.
+ * Exit codes: 0 = success, 1 = runtime error, 2 = client error (bad args)
+ */
+import {
+  startEmbedServer,
+  stopEmbedServer,
+  getEmbedServerStatus,
+} from "./lib/lifecycle";
+function printUsage(): void {
+  process.stderr.write(`llm - LLM tooling CLI
+Philosophy:
+  Centralize LLM infrastructure into deterministic, composable commands.
+  The embed server is shared infrastructure — start once, use everywhere.
+Usage:
+  llm embed-server start    Start the embed server (idempotent)
+  llm embed-server stop     Stop the embed server
+  llm embed-server status   Show embed server status
+All commands output JSON to stdout. Diagnostics to stderr.
+Exit codes: 0 = success, 1 = runtime error, 2 = client error
+`);
+}
+async function main(): Promise<void> {
+  const args = process.argv.slice(2);
+  if (args.length === 0) {
+    printUsage();
+    process.exit(0);
+  }
+  const [command, subcommand] = args;
+  if (command !== "embed-server") {
+    process.stderr.write(`Error: Unknown command "${command}"\n\n`);
+    printUsage();
+    process.exit(2);
+  }
+  try {
+    switch (subcommand) {
+      case "start": {
+        const result = await startEmbedServer();
+        console.log(JSON.stringify(result));
+        if (result.status === "already_running") {
+          process.stderr.write("Embed server already running\n");
+        } else {
+          process.stderr.write(
+            `Embed server started (pid: ${result.pid}, port: ${result.port})\n`,
+          );
+        }
+        process.exit(0);
+        break;
+      }
+      case "stop": {
+        const result = await stopEmbedServer();
+        console.log(JSON.stringify(result));
+        if (result.status === "not_running") {
+          process.stderr.write("Embed server not running\n");
+        } else {
+          process.stderr.write(`Embed server stopped (pid: ${result.pid})\n`);
+        }
+        process.exit(0);
+        break;
+      }
+      case "status": {
+        const status = await getEmbedServerStatus();
+        console.log(JSON.stringify(status));
+        process.exit(0);
+        break;
+      }
+      default:
+        process.stderr.write(
+          `Error: Unknown subcommand "${subcommand}". Use: start, stop, status\n`,
+        );
+        process.exit(2);
+    }
+  } catch (err) {
+    const message = err instanceof Error ? err.message : String(err);
+    process.stderr.write(`Error: ${message}\n`);
+    process.exit(1);
+  }
+}
+main();

package/index.ts ADDED Viewed

@@ -0,0 +1,17 @@
+/**
+ * @voidwire/llm - LLM tooling library
+ *
+ * Embed server lifecycle management.
+ *
+ * Usage:
+ *   import { startEmbedServer, stopEmbedServer, getEmbedServerStatus } from "@voidwire/llm";
+ */
+export {
+  startEmbedServer,
+  stopEmbedServer,
+  getEmbedServerStatus,
+  type StartResult,
+  type StopResult,
+  type ServerStatus,
+} from "./lib/lifecycle";

package/lib/embed-server.ts ADDED Viewed

@@ -0,0 +1,155 @@
+#!/usr/bin/env bun
+/**
+ * embed-server.ts - Persistent embedding server
+ *
+ * Loads nomic-embed-text-v1.5 once on startup and serves embedding requests
+ * via HTTP. Eliminates the 689ms cold start on every bun process invocation.
+ *
+ * Endpoints:
+ *   GET  /health → { status, model, dims }
+ *   POST /embed  → { text, prefix } → { embedding, dims, durationMs }
+ *
+ * Usage:
+ *   EMBED_PORT=8090 bun run embed-server.ts
+ */
+import { pipeline } from "@huggingface/transformers";
+const PORT = parseInt(process.env.EMBED_PORT || "8090", 10);
+const MODEL_NAME = "nomic-ai/nomic-embed-text-v1.5";
+const EMBEDDING_DIM = 768;
+interface EmbedRequest {
+  text: string;
+  prefix?: string;
+}
+interface EmbeddingPipeline {
+  (
+    text: string,
+    options?: { pooling?: string; normalize?: boolean },
+  ): Promise<{
+    data: Float32Array;
+  }>;
+}
+// Module-scoped pipeline — loaded once on startup
+let embedder: EmbeddingPipeline | null = null;
+/**
+ * Load the embedding model pipeline
+ */
+async function loadModel(): Promise<void> {
+  const start = performance.now();
+  console.error(`[embed-server] Loading model ${MODEL_NAME}...`);
+  const p = await pipeline("feature-extraction", MODEL_NAME, {
+    dtype: "fp32",
+  });
+  embedder = p as unknown as EmbeddingPipeline;
+  const elapsed = (performance.now() - start).toFixed(0);
+  console.error(`[embed-server] Model loaded in ${elapsed}ms`);
+}
+/**
+ * Handle /health GET requests
+ */
+function handleHealth(): Response {
+  return Response.json({
+    status: "ok",
+    model: MODEL_NAME,
+    dims: EMBEDDING_DIM,
+  });
+}
+/**
+ * Handle /embed POST requests
+ */
+async function handleEmbed(req: Request): Promise<Response> {
+  if (!embedder) {
+    return Response.json({ error: "Model not loaded" }, { status: 503 });
+  }
+  let body: EmbedRequest;
+  try {
+    body = await req.json();
+  } catch {
+    return Response.json({ error: "Invalid JSON body" }, { status: 400 });
+  }
+  if (!body.text || typeof body.text !== "string") {
+    return Response.json(
+      { error: "Missing or invalid 'text' field" },
+      { status: 400 },
+    );
+  }
+  // Prepend nomic prefix (default to search_query)
+  const prefix = body.prefix || "search_query";
+  const prefixedText = `${prefix}: ${body.text}`;
+  const start = performance.now();
+  const output = await embedder(prefixedText, {
+    pooling: "mean",
+    normalize: true,
+  });
+  const durationMs = Math.round(performance.now() - start);
+  const embedding = Array.from(output.data as Float32Array);
+  if (embedding.length !== EMBEDDING_DIM) {
+    return Response.json(
+      {
+        error: `Unexpected dimensions: got ${embedding.length}, expected ${EMBEDDING_DIM}`,
+      },
+      { status: 500 },
+    );
+  }
+  return Response.json({
+    embedding,
+    dims: EMBEDDING_DIM,
+    durationMs,
+  });
+}
+/**
+ * Request router
+ */
+async function handleRequest(req: Request): Promise<Response> {
+  const url = new URL(req.url);
+  if (url.pathname === "/health" && req.method === "GET") {
+    return handleHealth();
+  }
+  if (url.pathname === "/embed" && req.method === "POST") {
+    return handleEmbed(req);
+  }
+  return Response.json({ error: "Not found" }, { status: 404 });
+}
+// Graceful shutdown
+process.on("SIGTERM", () => {
+  console.error("[embed-server] SIGTERM received, shutting down");
+  process.exit(0);
+});
+process.on("SIGINT", () => {
+  console.error("[embed-server] SIGINT received, shutting down");
+  process.exit(0);
+});
+// Startup
+await loadModel();
+const server = Bun.serve({
+  port: PORT,
+  fetch: handleRequest,
+});
+console.error(
+  `[embed-server] Ready on http://localhost:${server.port} (${MODEL_NAME}, ${EMBEDDING_DIM}d)`,
+);

package/lib/lifecycle.ts ADDED Viewed

@@ -0,0 +1,201 @@
+/**
+ * lib/lifecycle.ts - Embed server lifecycle management
+ *
+ * Start, stop, and status for the embed server process.
+ * Uses PID file for clean shutdown. Idempotent start (health check first).
+ *
+ * Usage:
+ *   import { startEmbedServer, stopEmbedServer, getEmbedServerStatus } from "./lifecycle";
+ */
+import {
+  existsSync,
+  mkdirSync,
+  readFileSync,
+  writeFileSync,
+  unlinkSync,
+} from "fs";
+import { join } from "path";
+import { homedir } from "os";
+import { resolveService } from "@voidwire/llm-core";
+const DATA_DIR = join(homedir(), ".local", "share", "llm");
+const PID_FILE = join(DATA_DIR, "embed-server.pid");
+/**
+ * Parse port from a base_url string.
+ * Defaults to 8090 if no explicit port.
+ */
+function parsePort(baseUrl: string): number {
+  const url = new URL(baseUrl);
+  const port = url.port;
+  return port ? parseInt(port, 10) : 8090;
+}
+/**
+ * Check if the embed server is healthy at the given base URL.
+ */
+async function healthCheck(baseUrl: string, timeoutMs = 500): Promise<boolean> {
+  try {
+    const resp = await fetch(`${baseUrl}/health`, {
+      signal: AbortSignal.timeout(timeoutMs),
+    });
+    return resp.ok;
+  } catch {
+    return false;
+  }
+}
+/**
+ * Read PID from PID file, or null if not found.
+ */
+function readPid(): number | null {
+  try {
+    if (!existsSync(PID_FILE)) return null;
+    const raw = readFileSync(PID_FILE, "utf-8").trim();
+    const pid = parseInt(raw, 10);
+    return isNaN(pid) ? null : pid;
+  } catch {
+    return null;
+  }
+}
+/**
+ * Write PID to PID file.
+ */
+function writePid(pid: number): void {
+  mkdirSync(DATA_DIR, { recursive: true });
+  writeFileSync(PID_FILE, String(pid));
+}
+/**
+ * Remove PID file.
+ */
+function removePid(): void {
+  try {
+    unlinkSync(PID_FILE);
+  } catch {
+    // File doesn't exist — fine
+  }
+}
+export interface StartResult {
+  status: "started" | "already_running";
+  pid?: number;
+  port: number;
+}
+/**
+ * Start the embed server idempotently.
+ * If already running (health check passes), returns immediately.
+ * Otherwise spawns the server, polls health, and writes PID.
+ */
+export async function startEmbedServer(): Promise<StartResult> {
+  const service = resolveService("embed");
+  const baseUrl = service.base_url;
+  const port = parsePort(baseUrl);
+  // Health check — already running?
+  if (await healthCheck(baseUrl)) {
+    return { status: "already_running", pid: readPid() ?? undefined, port };
+  }
+  // Resolve path to embed-server.ts (co-located in this package)
+  const embedServerPath = join(import.meta.dir, "embed-server.ts");
+  // Spawn detached process
+  const proc = Bun.spawn(["bun", "run", embedServerPath], {
+    env: { ...process.env, EMBED_PORT: String(port) },
+    stdout: "ignore",
+    stderr: "ignore",
+    detached: true,
+  });
+  // Detach from parent — let it run independently
+  proc.unref();
+  const pid = proc.pid;
+  writePid(pid);
+  // Poll /health every 100ms up to 3s
+  const maxAttempts = 30;
+  for (let i = 0; i < maxAttempts; i++) {
+    await new Promise((resolve) => setTimeout(resolve, 100));
+    if (await healthCheck(baseUrl)) {
+      return { status: "started", pid, port };
+    }
+  }
+  throw new Error(
+    `Embed server failed to start within 3s (pid: ${pid}, port: ${port}). ` +
+      `Check logs or try: EMBED_PORT=${port} bun run ${embedServerPath}`,
+  );
+}
+export interface StopResult {
+  status: "stopped" | "not_running";
+  pid?: number;
+}
+/**
+ * Stop the embed server via PID file.
+ * Gracefully handles missing PID file.
+ */
+export async function stopEmbedServer(): Promise<StopResult> {
+  const pid = readPid();
+  if (pid === null) {
+    return { status: "not_running" };
+  }
+  try {
+    process.kill(pid, "SIGTERM");
+  } catch {
+    // Process already dead — fine
+  }
+  removePid();
+  return { status: "stopped", pid };
+}
+export interface ServerStatus {
+  running: boolean;
+  pid?: number;
+  port: number;
+  model?: string;
+  dims?: number;
+}
+/**
+ * Get the current status of the embed server.
+ */
+export async function getEmbedServerStatus(): Promise<ServerStatus> {
+  const service = resolveService("embed");
+  const baseUrl = service.base_url;
+  const port = parsePort(baseUrl);
+  const pid = readPid() ?? undefined;
+  try {
+    const resp = await fetch(`${baseUrl}/health`, {
+      signal: AbortSignal.timeout(500),
+    });
+    if (resp.ok) {
+      const data = (await resp.json()) as {
+        model?: string;
+        dims?: number;
+      };
+      return {
+        running: true,
+        pid,
+        port,
+        model: data.model,
+        dims: data.dims,
+      };
+    }
+  } catch {
+    // Server not reachable
+  }
+  return { running: false, pid, port };
+}

package/package.json ADDED Viewed

@@ -0,0 +1,47 @@
+{
+  "name": "@voidwire/llm",
+  "version": "0.1.0",
+  "description": "LLM tooling CLI - embed server lifecycle and utility subcommands",
+  "type": "module",
+  "main": "./index.ts",
+  "bin": {
+    "llm": "./cli.ts"
+  },
+  "exports": {
+    ".": "./index.ts",
+    "./cli": "./cli.ts"
+  },
+  "files": [
+    "index.ts",
+    "cli.ts",
+    "lib/**/*.ts",
+    "README.md",
+    "LICENSE"
+  ],
+  "scripts": {
+    "typecheck": "tsc --noEmit"
+  },
+  "keywords": [
+    "llm",
+    "embed",
+    "cli",
+    "llcli"
+  ],
+  "author": "nickpending <nickpending@users.noreply.github.com>",
+  "license": "MIT",
+  "repository": {
+    "type": "git",
+    "url": "git+https://github.com/nickpending/llmcli-tools.git",
+    "directory": "packages/llm"
+  },
+  "engines": {
+    "bun": ">=1.0.0"
+  },
+  "dependencies": {
+    "@huggingface/transformers": "^3.8.1",
+    "@voidwire/llm-core": "workspace:*"
+  },
+  "devDependencies": {
+    "bun-types": "1.3.5"
+  }
+}