@auxot/worker-cli 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,203 @@
1
+ /**
2
+ * llama.cpp Process Management
3
+ *
4
+ * Spawns and manages llama.cpp server process with parameters from GPU key policy.
5
+ *
6
+ * Handles:
7
+ * - Process lifecycle (start, stop, restart)
8
+ * - Crash detection and restart
9
+ * - stdout/stderr capture for debugging
10
+ * - Graceful shutdown
11
+ */
12
+ import { spawn } from 'child_process';
13
+ import { ensureLlamaBinary } from './llama-binary.js';
14
+ /**
15
+ * Spawn llama.cpp server process with policy parameters
16
+ */
17
+ export async function spawnLlamaCpp(options) {
18
+ // Ensure llama.cpp binary is downloaded
19
+ const binaryPath = options.binaryPath || await ensureLlamaBinary();
20
+ const { modelPath, contextSize, parallelism, port = 9002, host = '127.0.0.1', gpuLayers, } = options;
21
+ // Store original options for restart
22
+ const originalOptions = {
23
+ binaryPath,
24
+ modelPath,
25
+ contextSize,
26
+ parallelism,
27
+ port,
28
+ host,
29
+ gpuLayers,
30
+ };
31
+ // Build command arguments
32
+ const args = [
33
+ '--model', modelPath,
34
+ '--ctx-size', contextSize.toString(),
35
+ '--parallel', parallelism.toString(),
36
+ '--port', port.toString(),
37
+ '--host', host,
38
+ '--batch-size', '512',
39
+ '--threads', '12',
40
+ '--jinja', // Enable Jinja templating for tool calling
41
+ ];
42
+ // Add GPU layers if specified
43
+ if (gpuLayers !== undefined) {
44
+ args.push('--n-gpu-layers', gpuLayers.toString());
45
+ }
46
+ console.log(`[llama.cpp] Spawning process: ${binaryPath} ${args.join(' ')}`);
47
+ // Spawn process
48
+ const childProcess = spawn(binaryPath, args, {
49
+ stdio: ['ignore', 'pipe', 'pipe'], // stdin: ignore, stdout/stderr: pipe
50
+ env: process.env,
51
+ });
52
+ let isRunning = true;
53
+ const crashCallbacks = [];
54
+ // Capture stdout for debugging (with buffering for better formatting)
55
+ let stdoutBuffer = '';
56
+ childProcess.stdout?.on('data', (data) => {
57
+ stdoutBuffer += data.toString();
58
+ // Process complete lines
59
+ const lines = stdoutBuffer.split('\n');
60
+ stdoutBuffer = lines.pop() || ''; // Keep incomplete line in buffer
61
+ for (const line of lines) {
62
+ const trimmed = line.trim();
63
+ if (trimmed) {
64
+ console.log(`[llama.cpp stdout] ${trimmed}`);
65
+ }
66
+ }
67
+ });
68
+ // Capture stderr for debugging (with buffering for better formatting)
69
+ let stderrBuffer = '';
70
+ childProcess.stderr?.on('data', (data) => {
71
+ stderrBuffer += data.toString();
72
+ // Process complete lines
73
+ const lines = stderrBuffer.split('\n');
74
+ stderrBuffer = lines.pop() || ''; // Keep incomplete line in buffer
75
+ for (const line of lines) {
76
+ const trimmed = line.trim();
77
+ if (trimmed) {
78
+ console.error(`[llama.cpp stderr] ${trimmed}`);
79
+ }
80
+ }
81
+ });
82
+ // Handle process exit
83
+ childProcess.on('exit', (code, signal) => {
84
+ isRunning = false;
85
+ // Flush any remaining buffered output
86
+ if (stdoutBuffer.trim()) {
87
+ console.log(`[llama.cpp stdout] ${stdoutBuffer.trim()}`);
88
+ stdoutBuffer = '';
89
+ }
90
+ if (stderrBuffer.trim()) {
91
+ console.error(`[llama.cpp stderr] ${stderrBuffer.trim()}`);
92
+ stderrBuffer = '';
93
+ }
94
+ if (code !== null) {
95
+ console.log(`[llama.cpp] Process exited with code ${code}`);
96
+ // Notify crash callbacks (non-zero exit is a crash)
97
+ if (code !== 0) {
98
+ for (const callback of crashCallbacks) {
99
+ callback(code, signal);
100
+ }
101
+ }
102
+ }
103
+ else if (signal) {
104
+ console.log(`[llama.cpp] Process killed with signal ${signal}`);
105
+ // Notify crash callbacks (killed by signal is a crash unless it's SIGTERM/SIGINT)
106
+ if (signal !== 'SIGTERM' && signal !== 'SIGINT') {
107
+ for (const callback of crashCallbacks) {
108
+ callback(null, signal);
109
+ }
110
+ }
111
+ }
112
+ });
113
+ // Handle process errors
114
+ childProcess.on('error', (error) => {
115
+ console.error(`[llama.cpp] Process error:`, error);
116
+ isRunning = false;
117
+ // Notify crash callbacks
118
+ for (const callback of crashCallbacks) {
119
+ callback(null, null);
120
+ }
121
+ });
122
+ return {
123
+ process: childProcess,
124
+ get isRunning() {
125
+ return isRunning && childProcess.exitCode === null;
126
+ },
127
+ async stop() {
128
+ if (!isRunning) {
129
+ return;
130
+ }
131
+ console.log('[llama.cpp] Stopping process...');
132
+ isRunning = false;
133
+ // Try graceful shutdown first (SIGTERM)
134
+ childProcess.kill('SIGTERM');
135
+ // Wait up to 5 seconds for graceful shutdown
136
+ await new Promise((resolve) => {
137
+ const timeout = setTimeout(() => {
138
+ // Force kill if still running
139
+ if (childProcess.exitCode === null) {
140
+ console.warn('[llama.cpp] Process did not exit gracefully, force killing...');
141
+ childProcess.kill('SIGKILL');
142
+ }
143
+ resolve();
144
+ }, 5000);
145
+ childProcess.once('exit', () => {
146
+ clearTimeout(timeout);
147
+ resolve();
148
+ });
149
+ });
150
+ },
151
+ async restart() {
152
+ console.log('[llama.cpp] Restarting process...');
153
+ // Stop current process
154
+ const stopPromise = (async () => {
155
+ if (childProcess && isRunning) {
156
+ childProcess.kill('SIGTERM');
157
+ // Wait for graceful shutdown
158
+ await new Promise((resolve) => {
159
+ const timeout = setTimeout(() => {
160
+ if (childProcess && isRunning) {
161
+ console.warn('[llama.cpp] Process did not exit gracefully, force killing...');
162
+ childProcess.kill('SIGKILL');
163
+ }
164
+ resolve();
165
+ }, 5000);
166
+ childProcess.once('exit', () => {
167
+ clearTimeout(timeout);
168
+ resolve();
169
+ });
170
+ });
171
+ }
172
+ })();
173
+ await stopPromise;
174
+ isRunning = false;
175
+ // Create a new process with the same options
176
+ return await spawnLlamaCpp(originalOptions);
177
+ },
178
+ onCrash(callback) {
179
+ crashCallbacks.push(callback);
180
+ },
181
+ };
182
+ }
183
+ /**
184
+ * Wait for llama.cpp server to be ready (health check)
185
+ */
186
+ export async function waitForLlamaReady(url, timeoutMs = 30000) {
187
+ const startTime = Date.now();
188
+ const checkInterval = 500; // Check every 500ms
189
+ while (Date.now() - startTime < timeoutMs) {
190
+ try {
191
+ const response = await fetch(`${url}/v1/models`);
192
+ if (response.ok) {
193
+ console.log('[llama.cpp] Server is ready');
194
+ return;
195
+ }
196
+ }
197
+ catch (error) {
198
+ // Server not ready yet, continue waiting
199
+ }
200
+ await new Promise((resolve) => setTimeout(resolve, checkInterval));
201
+ }
202
+ throw new Error(`llama.cpp server did not become ready within ${timeoutMs}ms`);
203
+ }
package/dist/llama.js ADDED
@@ -0,0 +1,207 @@
1
+ /**
2
+ * llama.cpp Integration
3
+ *
4
+ * Simple dumb router: forwards OpenAI-format jobs to llama.cpp and streams back tokens.
5
+ * NO prompt templates or business logic - server controls all prompts.
6
+ */
7
+ import { createParser } from 'eventsource-parser';
8
+ import { logClientToLlama, logLlamaToClient } from './debug.js';
9
+ /**
10
+ * Process job by forwarding to llama.cpp
11
+ *
12
+ * Dumb router - receives OpenAI format, forwards to llama.cpp, streams back tokens.
13
+ *
14
+ * If job.max_tokens is not provided, uses capabilities.max_tokens_default to override
15
+ * llama.cpp's OpenAI API layer default (which is 2048, too restrictive).
16
+ *
17
+ * Supports cancellation via AbortSignal.
18
+ */
19
+ export async function processJob(job, llamaUrl, capabilities, abortSignal, onToken) {
20
+ const startTime = Date.now();
21
+ // Prepare request to llama.cpp (OpenAI compatible format)
22
+ const request = {
23
+ model: 'default',
24
+ messages: job.messages,
25
+ temperature: job.temperature ?? 0.7,
26
+ stream: true,
27
+ };
28
+ // Include tools if provided (for function calling)
29
+ if (job.tools && job.tools.length > 0) {
30
+ request.tools = job.tools;
31
+ }
32
+ // Handle max_tokens:
33
+ // - If job specifies max_tokens, use it (explicit limit)
34
+ // - Otherwise, use capabilities.max_tokens_default from /props (server's default, often -1 for unlimited)
35
+ // - This overrides llama.cpp's OpenAI API compat layer default of 2048
36
+ if (job.max_tokens !== undefined) {
37
+ request.max_tokens = job.max_tokens;
38
+ }
39
+ else if (capabilities.max_tokens_default !== undefined) {
40
+ request.max_tokens = capabilities.max_tokens_default;
41
+ }
42
+ // Debug log request to llama.cpp (level 2)
43
+ logClientToLlama(request);
44
+ const requestBody = JSON.stringify(request);
45
+ const response = await fetch(`${llamaUrl}/v1/chat/completions`, {
46
+ method: 'POST',
47
+ headers: {
48
+ 'Content-Type': 'application/json',
49
+ },
50
+ body: requestBody,
51
+ signal: abortSignal, // Pass abort signal for cancellation
52
+ });
53
+ if (!response.ok) {
54
+ // Try to get error details from response body
55
+ let errorDetails = '';
56
+ try {
57
+ const errorBody = await response.text();
58
+ errorDetails = errorBody ? ` - ${errorBody.substring(0, 500)}` : '';
59
+ }
60
+ catch {
61
+ // Ignore if we can't read error body
62
+ }
63
+ console.error(`[llama.cpp] Request failed:`, {
64
+ status: response.status,
65
+ statusText: response.statusText,
66
+ request: JSON.stringify(request, null, 2),
67
+ errorBody: errorDetails,
68
+ });
69
+ throw new Error(`llama.cpp request failed: ${response.status} ${response.statusText}${errorDetails}`);
70
+ }
71
+ if (!response.body) {
72
+ throw new Error('No response body from llama.cpp');
73
+ }
74
+ // Process stream
75
+ let fullResponse = '';
76
+ let timings = undefined;
77
+ const reader = response.body.getReader();
78
+ const decoder = new TextDecoder();
79
+ // Track tool calls as they stream in
80
+ const toolCallsMap = new Map();
81
+ // Create SSE parser
82
+ const parser = createParser((event) => {
83
+ // Skip reconnection interval messages
84
+ if (event.type === 'reconnect-interval')
85
+ return;
86
+ // Check for [DONE] signal
87
+ if (event.data === '[DONE]')
88
+ return;
89
+ try {
90
+ const chunk = JSON.parse(event.data);
91
+ // Capture timings from the final chunk
92
+ if (chunk.timings) {
93
+ timings = chunk.timings;
94
+ }
95
+ // Extract content
96
+ const content = chunk.choices[0]?.delta?.content;
97
+ if (content) {
98
+ fullResponse += content;
99
+ onToken(content);
100
+ }
101
+ // Extract tool_calls (streamed incrementally)
102
+ const toolCalls = chunk.choices[0]?.delta?.tool_calls;
103
+ if (toolCalls) {
104
+ for (const tc of toolCalls) {
105
+ const index = tc.index ?? 0;
106
+ // Get or create tool call entry
107
+ if (!toolCallsMap.has(index)) {
108
+ toolCallsMap.set(index, {
109
+ id: tc.id,
110
+ type: 'function',
111
+ function: {
112
+ name: tc.function?.name,
113
+ arguments: tc.function?.arguments || '',
114
+ },
115
+ });
116
+ }
117
+ else {
118
+ // Append to existing tool call (arguments stream incrementally)
119
+ const existing = toolCallsMap.get(index);
120
+ if (tc.id)
121
+ existing.id = tc.id;
122
+ if (tc.function?.name)
123
+ existing.function.name = tc.function.name;
124
+ if (tc.function?.arguments) {
125
+ existing.function.arguments = (existing.function.arguments || '') + tc.function.arguments;
126
+ }
127
+ }
128
+ }
129
+ }
130
+ }
131
+ catch (error) {
132
+ console.error('[SSE Parser] Error parsing chunk:', error);
133
+ }
134
+ });
135
+ try {
136
+ while (true) {
137
+ // Check for abort before reading next chunk
138
+ if (abortSignal.aborted) {
139
+ break; // Exit gracefully with partial response
140
+ }
141
+ const { done, value } = await reader.read();
142
+ if (done)
143
+ break;
144
+ // Decode and feed to SSE parser
145
+ const text = decoder.decode(value, { stream: true });
146
+ // Debug log raw SSE chunk (level 2)
147
+ logLlamaToClient(text);
148
+ parser.feed(text);
149
+ }
150
+ }
151
+ catch (error) {
152
+ // If aborted, return partial response gracefully
153
+ if (error instanceof Error && error.name === 'AbortError') {
154
+ console.log(`[Job ${job.job_id}] Cancelled - returning partial response`);
155
+ // Don't throw - return what we have so far
156
+ }
157
+ else {
158
+ throw error; // Re-throw non-abort errors
159
+ }
160
+ }
161
+ finally {
162
+ reader.releaseLock();
163
+ }
164
+ // Use real timing data from llama.cpp if available
165
+ let durationMs;
166
+ let inputTokens;
167
+ let outputTokens;
168
+ if (timings) {
169
+ // TypeScript has narrowing issues with optional types in closures, so we explicitly type
170
+ const t = timings;
171
+ durationMs = Math.round(t.prompt_ms + t.predicted_ms);
172
+ inputTokens = t.prompt_n;
173
+ outputTokens = t.predicted_n;
174
+ console.log(`[Job ${job.job_id}] Completed in ${durationMs}ms (${inputTokens} prompt, ${outputTokens} predicted)`);
175
+ }
176
+ else {
177
+ // Fallback to estimates if no timing data
178
+ console.warn(`[Job ${job.job_id}] No timing data from llama.cpp`);
179
+ durationMs = Date.now() - startTime;
180
+ inputTokens = Math.ceil(JSON.stringify(job.messages).length / 4);
181
+ outputTokens = Math.ceil(fullResponse.length / 4);
182
+ }
183
+ // Convert tool calls map to array (if any)
184
+ const finalToolCalls = toolCallsMap.size > 0
185
+ ? Array.from(toolCallsMap.values())
186
+ .filter(tc => tc.id && tc.function.name) // Only include complete tool calls
187
+ .map(tc => ({
188
+ id: tc.id,
189
+ type: 'function',
190
+ function: {
191
+ name: tc.function.name,
192
+ arguments: tc.function.arguments || '{}',
193
+ },
194
+ }))
195
+ : undefined;
196
+ // Log tool calls if present
197
+ if (finalToolCalls && finalToolCalls.length > 0) {
198
+ console.log(`[Job ${job.job_id}] Detected ${finalToolCalls.length} tool call(s):`, finalToolCalls.map(tc => tc.function.name).join(', '));
199
+ }
200
+ return {
201
+ fullResponse,
202
+ durationMs,
203
+ inputTokens,
204
+ outputTokens,
205
+ tool_calls: finalToolCalls,
206
+ };
207
+ }
@@ -0,0 +1,145 @@
1
+ /**
2
+ * Model Downloader
3
+ *
4
+ * Downloads GGUF model files from Hugging Face.
5
+ *
6
+ * Features:
7
+ * - Progress reporting
8
+ * - Resumable downloads (HTTP Range requests)
9
+ * - Integrity verification (file size)
10
+ * - Caching (checks if file exists)
11
+ */
12
+ import { createWriteStream, existsSync, statSync } from 'node:fs';
13
+ import { mkdir } from 'node:fs/promises';
14
+ import { dirname } from 'node:path';
15
+ /**
16
+ * Download a model file from Hugging Face
17
+ *
18
+ * @param entry Model registry entry
19
+ * @param outputPath Full path where the file should be saved
20
+ * @param onProgress Optional progress callback (bytes downloaded, total bytes)
21
+ * @returns Path to downloaded file
22
+ */
23
+ export async function downloadModel(entry, outputPath, onProgress) {
24
+ // Create output directory if it doesn't exist
25
+ const outputDir = dirname(outputPath);
26
+ if (!existsSync(outputDir)) {
27
+ await mkdir(outputDir, { recursive: true });
28
+ }
29
+ // Check if file already exists
30
+ if (existsSync(outputPath)) {
31
+ const stats = statSync(outputPath);
32
+ // If file size matches expected size, skip download
33
+ if (entry.file_size_bytes && stats.size === entry.file_size_bytes) {
34
+ console.log(` ✓ Model already downloaded (${formatBytes(stats.size)})`);
35
+ return outputPath;
36
+ }
37
+ // If file exists but size doesn't match, delete it and re-download
38
+ if (entry.file_size_bytes && stats.size !== entry.file_size_bytes) {
39
+ console.log(` ⊘ Existing file size mismatch (${formatBytes(stats.size)} vs ${formatBytes(entry.file_size_bytes)})`);
40
+ console.log(` ⊘ Re-downloading...`);
41
+ // Delete the file - we'll download fresh
42
+ const { unlink } = await import('node:fs/promises');
43
+ await unlink(outputPath);
44
+ }
45
+ }
46
+ // Build Hugging Face download URL
47
+ // Format: https://huggingface.co/{repo_id}/resolve/main/{filename}
48
+ const downloadUrl = `https://huggingface.co/${entry.huggingface_id}/resolve/main/${entry.file_name}`;
49
+ console.log(` Downloading from: ${entry.huggingface_id}`);
50
+ console.log(` File: ${entry.file_name}`);
51
+ if (entry.file_size_bytes) {
52
+ console.log(` Size: ${formatBytes(entry.file_size_bytes)}`);
53
+ }
54
+ // Download with progress tracking
55
+ let downloadedBytes = 0;
56
+ const totalBytes = entry.file_size_bytes || 0;
57
+ // Check if we can resume (partial file exists)
58
+ let startByte = 0;
59
+ if (existsSync(outputPath)) {
60
+ const stats = statSync(outputPath);
61
+ startByte = stats.size;
62
+ if (startByte > 0 && startByte < totalBytes) {
63
+ console.log(` Resuming from ${formatBytes(startByte)}...`);
64
+ downloadedBytes = startByte;
65
+ }
66
+ }
67
+ const response = await fetch(downloadUrl, {
68
+ headers: startByte > 0 ? {
69
+ 'Range': `bytes=${startByte}-`,
70
+ } : {},
71
+ });
72
+ if (!response.ok) {
73
+ if (response.status === 416) {
74
+ // Range not satisfiable - file already fully downloaded
75
+ if (existsSync(outputPath)) {
76
+ console.log(` ✓ Download complete`);
77
+ return outputPath;
78
+ }
79
+ }
80
+ throw new Error(`Download failed: ${response.status} ${response.statusText}`);
81
+ }
82
+ const contentLength = response.headers.get('content-length');
83
+ const totalSize = contentLength ? parseInt(contentLength, 10) + startByte : totalBytes;
84
+ // Open file for writing (append if resuming)
85
+ const fileStream = createWriteStream(outputPath, { flags: startByte > 0 ? 'a' : 'w' });
86
+ // Stream response to file
87
+ const reader = response.body?.getReader();
88
+ if (!reader) {
89
+ throw new Error('Response body is not readable');
90
+ }
91
+ try {
92
+ while (true) {
93
+ const { done, value } = await reader.read();
94
+ if (done) {
95
+ break;
96
+ }
97
+ fileStream.write(value);
98
+ downloadedBytes += value.length;
99
+ // Report progress
100
+ if (onProgress) {
101
+ onProgress(downloadedBytes, totalSize);
102
+ }
103
+ else if (totalSize > 0) {
104
+ // Simple progress log every 10MB
105
+ if (downloadedBytes % (10 * 1024 * 1024) < value.length) {
106
+ const percent = ((downloadedBytes / totalSize) * 100).toFixed(1);
107
+ process.stdout.write(`\r Progress: ${percent}% (${formatBytes(downloadedBytes)} / ${formatBytes(totalSize)})`);
108
+ }
109
+ }
110
+ }
111
+ fileStream.end();
112
+ // Wait for file stream to finish
113
+ await new Promise((resolve, reject) => {
114
+ fileStream.on('finish', resolve);
115
+ fileStream.on('error', reject);
116
+ });
117
+ if (totalSize > 0 && downloadedBytes !== totalSize) {
118
+ throw new Error(`Download incomplete: ${downloadedBytes} bytes downloaded, expected ${totalSize}`);
119
+ }
120
+ if (onProgress) {
121
+ // Clear progress line
122
+ process.stdout.write('\r');
123
+ }
124
+ else {
125
+ process.stdout.write('\r');
126
+ }
127
+ console.log(` ✓ Download complete (${formatBytes(downloadedBytes)})`);
128
+ return outputPath;
129
+ }
130
+ catch (error) {
131
+ fileStream.destroy();
132
+ throw error;
133
+ }
134
+ }
135
+ /**
136
+ * Format bytes to human-readable string
137
+ */
138
+ function formatBytes(bytes) {
139
+ if (bytes === 0)
140
+ return '0 B';
141
+ const k = 1024;
142
+ const sizes = ['B', 'KB', 'MB', 'GB', 'TB'];
143
+ const i = Math.floor(Math.log(bytes) / Math.log(k));
144
+ return `${(bytes / Math.pow(k, i)).toFixed(1)} ${sizes[i]}`;
145
+ }
@@ -0,0 +1,80 @@
1
+ /**
2
+ * Model Path Resolver
3
+ *
4
+ * Resolves model paths from policy using the model registry.
5
+ * Downloads models if not cached.
6
+ */
7
+ import { loadRegistry, getModels } from '@auxot/model-registry';
8
+ import { join } from 'path';
9
+ import { homedir } from 'os';
10
+ import { downloadModel } from './model-downloader.js';
11
+ import { existsSync } from 'node:fs';
12
+ /**
13
+ * Get model entry from registry based on policy
14
+ */
15
+ export function getModelFromPolicy(policy) {
16
+ const registry = loadRegistry();
17
+ // Find model matching policy (model_name and quantization)
18
+ const matchingModels = getModels(registry, {
19
+ model_name: policy.model_name,
20
+ });
21
+ // Filter by quantization (case-insensitive)
22
+ const model = matchingModels.find((m) => m.quantization.toLowerCase() === policy.quantization.toLowerCase());
23
+ return model || null;
24
+ }
25
+ /**
26
+ * Get model path from policy, downloading if necessary
27
+ *
28
+ * Returns the local path for the model file.
29
+ * Downloads the model if it doesn't exist or is incomplete.
30
+ *
31
+ * @param policy GPU key policy
32
+ * @param onProgress Optional progress callback (downloaded, total)
33
+ * @returns Model file path, or null if model not found in registry
34
+ */
35
+ export async function ensureModelDownloaded(policy, onProgress) {
36
+ const model = getModelFromPolicy(policy);
37
+ if (!model) {
38
+ console.error(` ✗ Model not found in registry: ${policy.model_name} (${policy.quantization})`);
39
+ return null;
40
+ }
41
+ // Build model path
42
+ const modelsDir = process.env.AUXOT_MODELS_DIR || join(homedir(), '.auxot', 'models');
43
+ const modelDir = join(modelsDir, model.huggingface_id.replace('/', '_'));
44
+ const modelPath = join(modelDir, model.file_name);
45
+ // Check if model exists and is valid
46
+ if (existsSync(modelPath)) {
47
+ const { statSync } = await import('node:fs');
48
+ const stats = statSync(modelPath);
49
+ // If file size matches expected size, use cached model
50
+ if (model.file_size_bytes && stats.size === model.file_size_bytes) {
51
+ return modelPath;
52
+ }
53
+ }
54
+ // Download model
55
+ console.log(` Downloading model: ${model.model_name} (${model.quantization})`);
56
+ try {
57
+ await downloadModel(model, modelPath, onProgress);
58
+ return modelPath;
59
+ }
60
+ catch (error) {
61
+ console.error(` ✗ Download failed:`, error);
62
+ throw error;
63
+ }
64
+ }
65
+ /**
66
+ * Get model path without downloading (for checking if model exists)
67
+ *
68
+ * @param policy GPU key policy
69
+ * @returns Model file path, or null if model not found in registry
70
+ */
71
+ export function getModelPath(policy) {
72
+ const model = getModelFromPolicy(policy);
73
+ if (!model) {
74
+ return null;
75
+ }
76
+ const modelsDir = process.env.AUXOT_MODELS_DIR || join(homedir(), '.auxot', 'models');
77
+ const modelDir = join(modelsDir, model.huggingface_id.replace('/', '_'));
78
+ const modelPath = join(modelDir, model.file_name);
79
+ return modelPath;
80
+ }