@voidwire/llm 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md ADDED
@@ -0,0 +1,101 @@
1
+ # llm
2
+
3
+ LLM infrastructure CLI — manage embedding servers and shared LLM services.
4
+
5
+ ## Philosophy
6
+
7
+ **One server, many consumers** — The embed server loads `nomic-embed-text-v1.5` once and serves requests at ~9ms. Every tool that needs embeddings (lore, sable, future tools) hits the same server through `@voidwire/llm-core`'s `embed()` function. No in-process model loading, no duplicate codepaths.
8
+
9
+ **Idempotent lifecycle** — `start` checks health first, only spawns if needed. PID file for clean shutdown. Safe to call from multiple entry points (shell init, bin/sable, scripts).
10
+
11
+ **JSON output** — All commands emit JSON to stdout. Human-readable diagnostics go to stderr. Pipes to jq, composes with other tools.
12
+
13
+ ## Quick Start
14
+
15
+ ```bash
16
+ # Start the embed server (idempotent — no-op if already running)
17
+ llm embed-server start
18
+
19
+ # Check status
20
+ llm embed-server status
21
+
22
+ # Stop
23
+ llm embed-server stop
24
+ ```
25
+
26
+ ## Installation
27
+
28
+ ```bash
29
+ bun add -g @voidwire/llm
30
+ ```
31
+
32
+ Or from source:
33
+
34
+ ```bash
35
+ cd packages/llm && bun link
36
+ ```
37
+
38
+ ## Usage
39
+
40
+ ### `llm embed-server start`
41
+
42
+ Start the embedding server. Checks `/health` first — if already running, returns immediately.
43
+
44
+ ```bash
45
+ llm embed-server start
46
+ # stdout: {"status":"started","pid":12345,"port":8090}
47
+ # stderr: Embed server started (pid: 12345, port: 8090)
48
+ ```
49
+
50
+ ### `llm embed-server stop`
51
+
52
+ Stop the server via PID file. Graceful SIGTERM.
53
+
54
+ ```bash
55
+ llm embed-server stop
56
+ # stdout: {"status":"stopped","pid":12345}
57
+ # stderr: Embed server stopped (pid: 12345)
58
+ ```
59
+
60
+ ### `llm embed-server status`
61
+
62
+ Report server state. JSON to stdout.
63
+
64
+ ```bash
65
+ llm embed-server status
66
+ # {"running":true,"port":8090,"model":"nomic-ai/nomic-embed-text-v1.5","dims":768}
67
+
68
+ llm embed-server status | jq .running
69
+ # true
70
+ ```
71
+
72
+ ## Library Usage
73
+
74
+ ```typescript
75
+ import { startEmbedServer, stopEmbedServer, getEmbedServerStatus } from "@voidwire/llm";
76
+
77
+ const result = await startEmbedServer();
78
+ // { status: "started" | "already_running", pid?: number, port: number }
79
+ ```
80
+
81
+ ## Configuration
82
+
83
+ The embed server reads its endpoint from `~/.config/llm-core/services.toml`:
84
+
85
+ ```toml
86
+ [services.embed]
87
+ adapter = "embed"
88
+ base_url = "http://localhost:8090"
89
+ key_required = false
90
+ default_model = "nomic-ai/nomic-embed-text-v1.5"
91
+ ```
92
+
93
+ PID file: `~/.local/share/llm/embed-server.pid`
94
+
95
+ ## Exit Codes
96
+
97
+ | Code | Meaning |
98
+ |------|---------|
99
+ | 0 | Success |
100
+ | 1 | Runtime error (server failed to start, etc.) |
101
+ | 2 | Client error (bad arguments) |
package/cli.ts ADDED
@@ -0,0 +1,105 @@
1
+ #!/usr/bin/env bun
2
+ /**
3
+ * llm CLI - LLM tooling subcommands
4
+ *
5
+ * Manages embedding infrastructure. The embed server loads
6
+ * nomic-embed-text-v1.5 once on startup and serves embedding
7
+ * requests at ~9ms per query, eliminating cold start overhead.
8
+ *
9
+ * Usage:
10
+ * llm embed-server start Start the embed server (idempotent)
11
+ * llm embed-server stop Stop the embed server
12
+ * llm embed-server status Show embed server status
13
+ *
14
+ * All commands output JSON to stdout. Diagnostics to stderr.
15
+ * Exit codes: 0 = success, 1 = runtime error, 2 = client error (bad args)
16
+ */
17
+
18
+ import {
19
+ startEmbedServer,
20
+ stopEmbedServer,
21
+ getEmbedServerStatus,
22
+ } from "./lib/lifecycle";
23
+
24
+ function printUsage(): void {
25
+ process.stderr.write(`llm - LLM tooling CLI
26
+
27
+ Philosophy:
28
+ Centralize LLM infrastructure into deterministic, composable commands.
29
+ The embed server is shared infrastructure — start once, use everywhere.
30
+
31
+ Usage:
32
+ llm embed-server start Start the embed server (idempotent)
33
+ llm embed-server stop Stop the embed server
34
+ llm embed-server status Show embed server status
35
+
36
+ All commands output JSON to stdout. Diagnostics to stderr.
37
+ Exit codes: 0 = success, 1 = runtime error, 2 = client error
38
+ `);
39
+ }
40
+
41
+ async function main(): Promise<void> {
42
+ const args = process.argv.slice(2);
43
+
44
+ if (args.length === 0) {
45
+ printUsage();
46
+ process.exit(0);
47
+ }
48
+
49
+ const [command, subcommand] = args;
50
+
51
+ if (command !== "embed-server") {
52
+ process.stderr.write(`Error: Unknown command "${command}"\n\n`);
53
+ printUsage();
54
+ process.exit(2);
55
+ }
56
+
57
+ try {
58
+ switch (subcommand) {
59
+ case "start": {
60
+ const result = await startEmbedServer();
61
+ console.log(JSON.stringify(result));
62
+ if (result.status === "already_running") {
63
+ process.stderr.write("Embed server already running\n");
64
+ } else {
65
+ process.stderr.write(
66
+ `Embed server started (pid: ${result.pid}, port: ${result.port})\n`,
67
+ );
68
+ }
69
+ process.exit(0);
70
+ break;
71
+ }
72
+
73
+ case "stop": {
74
+ const result = await stopEmbedServer();
75
+ console.log(JSON.stringify(result));
76
+ if (result.status === "not_running") {
77
+ process.stderr.write("Embed server not running\n");
78
+ } else {
79
+ process.stderr.write(`Embed server stopped (pid: ${result.pid})\n`);
80
+ }
81
+ process.exit(0);
82
+ break;
83
+ }
84
+
85
+ case "status": {
86
+ const status = await getEmbedServerStatus();
87
+ console.log(JSON.stringify(status));
88
+ process.exit(0);
89
+ break;
90
+ }
91
+
92
+ default:
93
+ process.stderr.write(
94
+ `Error: Unknown subcommand "${subcommand}". Use: start, stop, status\n`,
95
+ );
96
+ process.exit(2);
97
+ }
98
+ } catch (err) {
99
+ const message = err instanceof Error ? err.message : String(err);
100
+ process.stderr.write(`Error: ${message}\n`);
101
+ process.exit(1);
102
+ }
103
+ }
104
+
105
+ main();
package/index.ts ADDED
@@ -0,0 +1,17 @@
1
+ /**
2
+ * @voidwire/llm - LLM tooling library
3
+ *
4
+ * Embed server lifecycle management.
5
+ *
6
+ * Usage:
7
+ * import { startEmbedServer, stopEmbedServer, getEmbedServerStatus } from "@voidwire/llm";
8
+ */
9
+
10
+ export {
11
+ startEmbedServer,
12
+ stopEmbedServer,
13
+ getEmbedServerStatus,
14
+ type StartResult,
15
+ type StopResult,
16
+ type ServerStatus,
17
+ } from "./lib/lifecycle";
@@ -0,0 +1,155 @@
1
+ #!/usr/bin/env bun
2
+ /**
3
+ * embed-server.ts - Persistent embedding server
4
+ *
5
+ * Loads nomic-embed-text-v1.5 once on startup and serves embedding requests
6
+ * via HTTP. Eliminates the 689ms cold start on every bun process invocation.
7
+ *
8
+ * Endpoints:
9
+ * GET /health → { status, model, dims }
10
+ * POST /embed → { text, prefix } → { embedding, dims, durationMs }
11
+ *
12
+ * Usage:
13
+ * EMBED_PORT=8090 bun run embed-server.ts
14
+ */
15
+
16
+ import { pipeline } from "@huggingface/transformers";
17
+
18
+ const PORT = parseInt(process.env.EMBED_PORT || "8090", 10);
19
+ const MODEL_NAME = "nomic-ai/nomic-embed-text-v1.5";
20
+ const EMBEDDING_DIM = 768;
21
+
22
+ interface EmbedRequest {
23
+ text: string;
24
+ prefix?: string;
25
+ }
26
+
27
+ interface EmbeddingPipeline {
28
+ (
29
+ text: string,
30
+ options?: { pooling?: string; normalize?: boolean },
31
+ ): Promise<{
32
+ data: Float32Array;
33
+ }>;
34
+ }
35
+
36
+ // Module-scoped pipeline — loaded once on startup
37
+ let embedder: EmbeddingPipeline | null = null;
38
+
39
+ /**
40
+ * Load the embedding model pipeline
41
+ */
42
+ async function loadModel(): Promise<void> {
43
+ const start = performance.now();
44
+ console.error(`[embed-server] Loading model ${MODEL_NAME}...`);
45
+
46
+ const p = await pipeline("feature-extraction", MODEL_NAME, {
47
+ dtype: "fp32",
48
+ });
49
+ embedder = p as unknown as EmbeddingPipeline;
50
+
51
+ const elapsed = (performance.now() - start).toFixed(0);
52
+ console.error(`[embed-server] Model loaded in ${elapsed}ms`);
53
+ }
54
+
55
+ /**
56
+ * Handle /health GET requests
57
+ */
58
+ function handleHealth(): Response {
59
+ return Response.json({
60
+ status: "ok",
61
+ model: MODEL_NAME,
62
+ dims: EMBEDDING_DIM,
63
+ });
64
+ }
65
+
66
+ /**
67
+ * Handle /embed POST requests
68
+ */
69
+ async function handleEmbed(req: Request): Promise<Response> {
70
+ if (!embedder) {
71
+ return Response.json({ error: "Model not loaded" }, { status: 503 });
72
+ }
73
+
74
+ let body: EmbedRequest;
75
+ try {
76
+ body = await req.json();
77
+ } catch {
78
+ return Response.json({ error: "Invalid JSON body" }, { status: 400 });
79
+ }
80
+
81
+ if (!body.text || typeof body.text !== "string") {
82
+ return Response.json(
83
+ { error: "Missing or invalid 'text' field" },
84
+ { status: 400 },
85
+ );
86
+ }
87
+
88
+ // Prepend nomic prefix (default to search_query)
89
+ const prefix = body.prefix || "search_query";
90
+ const prefixedText = `${prefix}: ${body.text}`;
91
+
92
+ const start = performance.now();
93
+ const output = await embedder(prefixedText, {
94
+ pooling: "mean",
95
+ normalize: true,
96
+ });
97
+ const durationMs = Math.round(performance.now() - start);
98
+
99
+ const embedding = Array.from(output.data as Float32Array);
100
+
101
+ if (embedding.length !== EMBEDDING_DIM) {
102
+ return Response.json(
103
+ {
104
+ error: `Unexpected dimensions: got ${embedding.length}, expected ${EMBEDDING_DIM}`,
105
+ },
106
+ { status: 500 },
107
+ );
108
+ }
109
+
110
+ return Response.json({
111
+ embedding,
112
+ dims: EMBEDDING_DIM,
113
+ durationMs,
114
+ });
115
+ }
116
+
117
+ /**
118
+ * Request router
119
+ */
120
+ async function handleRequest(req: Request): Promise<Response> {
121
+ const url = new URL(req.url);
122
+
123
+ if (url.pathname === "/health" && req.method === "GET") {
124
+ return handleHealth();
125
+ }
126
+
127
+ if (url.pathname === "/embed" && req.method === "POST") {
128
+ return handleEmbed(req);
129
+ }
130
+
131
+ return Response.json({ error: "Not found" }, { status: 404 });
132
+ }
133
+
134
+ // Graceful shutdown
135
+ process.on("SIGTERM", () => {
136
+ console.error("[embed-server] SIGTERM received, shutting down");
137
+ process.exit(0);
138
+ });
139
+
140
+ process.on("SIGINT", () => {
141
+ console.error("[embed-server] SIGINT received, shutting down");
142
+ process.exit(0);
143
+ });
144
+
145
+ // Startup
146
+ await loadModel();
147
+
148
+ const server = Bun.serve({
149
+ port: PORT,
150
+ fetch: handleRequest,
151
+ });
152
+
153
+ console.error(
154
+ `[embed-server] Ready on http://localhost:${server.port} (${MODEL_NAME}, ${EMBEDDING_DIM}d)`,
155
+ );
@@ -0,0 +1,201 @@
1
+ /**
2
+ * lib/lifecycle.ts - Embed server lifecycle management
3
+ *
4
+ * Start, stop, and status for the embed server process.
5
+ * Uses PID file for clean shutdown. Idempotent start (health check first).
6
+ *
7
+ * Usage:
8
+ * import { startEmbedServer, stopEmbedServer, getEmbedServerStatus } from "./lifecycle";
9
+ */
10
+
11
+ import {
12
+ existsSync,
13
+ mkdirSync,
14
+ readFileSync,
15
+ writeFileSync,
16
+ unlinkSync,
17
+ } from "fs";
18
+ import { join } from "path";
19
+ import { homedir } from "os";
20
+ import { resolveService } from "@voidwire/llm-core";
21
+
22
+ const DATA_DIR = join(homedir(), ".local", "share", "llm");
23
+ const PID_FILE = join(DATA_DIR, "embed-server.pid");
24
+
25
+ /**
26
+ * Parse port from a base_url string.
27
+ * Defaults to 8090 if no explicit port.
28
+ */
29
+ function parsePort(baseUrl: string): number {
30
+ const url = new URL(baseUrl);
31
+ const port = url.port;
32
+ return port ? parseInt(port, 10) : 8090;
33
+ }
34
+
35
+ /**
36
+ * Check if the embed server is healthy at the given base URL.
37
+ */
38
+ async function healthCheck(baseUrl: string, timeoutMs = 500): Promise<boolean> {
39
+ try {
40
+ const resp = await fetch(`${baseUrl}/health`, {
41
+ signal: AbortSignal.timeout(timeoutMs),
42
+ });
43
+ return resp.ok;
44
+ } catch {
45
+ return false;
46
+ }
47
+ }
48
+
49
+ /**
50
+ * Read PID from PID file, or null if not found.
51
+ */
52
+ function readPid(): number | null {
53
+ try {
54
+ if (!existsSync(PID_FILE)) return null;
55
+ const raw = readFileSync(PID_FILE, "utf-8").trim();
56
+ const pid = parseInt(raw, 10);
57
+ return isNaN(pid) ? null : pid;
58
+ } catch {
59
+ return null;
60
+ }
61
+ }
62
+
63
+ /**
64
+ * Write PID to PID file.
65
+ */
66
+ function writePid(pid: number): void {
67
+ mkdirSync(DATA_DIR, { recursive: true });
68
+ writeFileSync(PID_FILE, String(pid));
69
+ }
70
+
71
+ /**
72
+ * Remove PID file.
73
+ */
74
+ function removePid(): void {
75
+ try {
76
+ unlinkSync(PID_FILE);
77
+ } catch {
78
+ // File doesn't exist — fine
79
+ }
80
+ }
81
+
82
+ export interface StartResult {
83
+ status: "started" | "already_running";
84
+ pid?: number;
85
+ port: number;
86
+ }
87
+
88
+ /**
89
+ * Start the embed server idempotently.
90
+ * If already running (health check passes), returns immediately.
91
+ * Otherwise spawns the server, polls health, and writes PID.
92
+ */
93
+ export async function startEmbedServer(): Promise<StartResult> {
94
+ const service = resolveService("embed");
95
+ const baseUrl = service.base_url;
96
+ const port = parsePort(baseUrl);
97
+
98
+ // Health check — already running?
99
+ if (await healthCheck(baseUrl)) {
100
+ return { status: "already_running", pid: readPid() ?? undefined, port };
101
+ }
102
+
103
+ // Resolve path to embed-server.ts (co-located in this package)
104
+ const embedServerPath = join(import.meta.dir, "embed-server.ts");
105
+
106
+ // Spawn detached process
107
+ const proc = Bun.spawn(["bun", "run", embedServerPath], {
108
+ env: { ...process.env, EMBED_PORT: String(port) },
109
+ stdout: "ignore",
110
+ stderr: "ignore",
111
+ detached: true,
112
+ });
113
+
114
+ // Detach from parent — let it run independently
115
+ proc.unref();
116
+
117
+ const pid = proc.pid;
118
+ writePid(pid);
119
+
120
+ // Poll /health every 100ms up to 3s
121
+ const maxAttempts = 30;
122
+ for (let i = 0; i < maxAttempts; i++) {
123
+ await new Promise((resolve) => setTimeout(resolve, 100));
124
+ if (await healthCheck(baseUrl)) {
125
+ return { status: "started", pid, port };
126
+ }
127
+ }
128
+
129
+ throw new Error(
130
+ `Embed server failed to start within 3s (pid: ${pid}, port: ${port}). ` +
131
+ `Check logs or try: EMBED_PORT=${port} bun run ${embedServerPath}`,
132
+ );
133
+ }
134
+
135
+ export interface StopResult {
136
+ status: "stopped" | "not_running";
137
+ pid?: number;
138
+ }
139
+
140
+ /**
141
+ * Stop the embed server via PID file.
142
+ * Gracefully handles missing PID file.
143
+ */
144
+ export async function stopEmbedServer(): Promise<StopResult> {
145
+ const pid = readPid();
146
+
147
+ if (pid === null) {
148
+ return { status: "not_running" };
149
+ }
150
+
151
+ try {
152
+ process.kill(pid, "SIGTERM");
153
+ } catch {
154
+ // Process already dead — fine
155
+ }
156
+
157
+ removePid();
158
+ return { status: "stopped", pid };
159
+ }
160
+
161
+ export interface ServerStatus {
162
+ running: boolean;
163
+ pid?: number;
164
+ port: number;
165
+ model?: string;
166
+ dims?: number;
167
+ }
168
+
169
+ /**
170
+ * Get the current status of the embed server.
171
+ */
172
+ export async function getEmbedServerStatus(): Promise<ServerStatus> {
173
+ const service = resolveService("embed");
174
+ const baseUrl = service.base_url;
175
+ const port = parsePort(baseUrl);
176
+ const pid = readPid() ?? undefined;
177
+
178
+ try {
179
+ const resp = await fetch(`${baseUrl}/health`, {
180
+ signal: AbortSignal.timeout(500),
181
+ });
182
+
183
+ if (resp.ok) {
184
+ const data = (await resp.json()) as {
185
+ model?: string;
186
+ dims?: number;
187
+ };
188
+ return {
189
+ running: true,
190
+ pid,
191
+ port,
192
+ model: data.model,
193
+ dims: data.dims,
194
+ };
195
+ }
196
+ } catch {
197
+ // Server not reachable
198
+ }
199
+
200
+ return { running: false, pid, port };
201
+ }
package/package.json ADDED
@@ -0,0 +1,47 @@
1
+ {
2
+ "name": "@voidwire/llm",
3
+ "version": "0.1.0",
4
+ "description": "LLM tooling CLI - embed server lifecycle and utility subcommands",
5
+ "type": "module",
6
+ "main": "./index.ts",
7
+ "bin": {
8
+ "llm": "./cli.ts"
9
+ },
10
+ "exports": {
11
+ ".": "./index.ts",
12
+ "./cli": "./cli.ts"
13
+ },
14
+ "files": [
15
+ "index.ts",
16
+ "cli.ts",
17
+ "lib/**/*.ts",
18
+ "README.md",
19
+ "LICENSE"
20
+ ],
21
+ "scripts": {
22
+ "typecheck": "tsc --noEmit"
23
+ },
24
+ "keywords": [
25
+ "llm",
26
+ "embed",
27
+ "cli",
28
+ "llcli"
29
+ ],
30
+ "author": "nickpending <nickpending@users.noreply.github.com>",
31
+ "license": "MIT",
32
+ "repository": {
33
+ "type": "git",
34
+ "url": "git+https://github.com/nickpending/llmcli-tools.git",
35
+ "directory": "packages/llm"
36
+ },
37
+ "engines": {
38
+ "bun": ">=1.0.0"
39
+ },
40
+ "dependencies": {
41
+ "@huggingface/transformers": "^3.8.1",
42
+ "@voidwire/llm-core": "workspace:*"
43
+ },
44
+ "devDependencies": {
45
+ "bun-types": "1.3.5"
46
+ }
47
+ }