@auxot/worker-cli 0.1.0 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,145 +0,0 @@
1
- /**
2
- * Model Downloader
3
- *
4
- * Downloads GGUF model files from Hugging Face.
5
- *
6
- * Features:
7
- * - Progress reporting
8
- * - Resumable downloads (HTTP Range requests)
9
- * - Integrity verification (file size)
10
- * - Caching (checks if file exists)
11
- */
12
- import { createWriteStream, existsSync, statSync } from 'node:fs';
13
- import { mkdir } from 'node:fs/promises';
14
- import { dirname } from 'node:path';
15
- /**
16
- * Download a model file from Hugging Face
17
- *
18
- * @param entry Model registry entry
19
- * @param outputPath Full path where the file should be saved
20
- * @param onProgress Optional progress callback (bytes downloaded, total bytes)
21
- * @returns Path to downloaded file
22
- */
23
- export async function downloadModel(entry, outputPath, onProgress) {
24
- // Create output directory if it doesn't exist
25
- const outputDir = dirname(outputPath);
26
- if (!existsSync(outputDir)) {
27
- await mkdir(outputDir, { recursive: true });
28
- }
29
- // Check if file already exists
30
- if (existsSync(outputPath)) {
31
- const stats = statSync(outputPath);
32
- // If file size matches expected size, skip download
33
- if (entry.file_size_bytes && stats.size === entry.file_size_bytes) {
34
- console.log(` ✓ Model already downloaded (${formatBytes(stats.size)})`);
35
- return outputPath;
36
- }
37
- // If file exists but size doesn't match, delete it and re-download
38
- if (entry.file_size_bytes && stats.size !== entry.file_size_bytes) {
39
- console.log(` ⊘ Existing file size mismatch (${formatBytes(stats.size)} vs ${formatBytes(entry.file_size_bytes)})`);
40
- console.log(` ⊘ Re-downloading...`);
41
- // Delete the file - we'll download fresh
42
- const { unlink } = await import('node:fs/promises');
43
- await unlink(outputPath);
44
- }
45
- }
46
- // Build Hugging Face download URL
47
- // Format: https://huggingface.co/{repo_id}/resolve/main/{filename}
48
- const downloadUrl = `https://huggingface.co/${entry.huggingface_id}/resolve/main/${entry.file_name}`;
49
- console.log(` Downloading from: ${entry.huggingface_id}`);
50
- console.log(` File: ${entry.file_name}`);
51
- if (entry.file_size_bytes) {
52
- console.log(` Size: ${formatBytes(entry.file_size_bytes)}`);
53
- }
54
- // Download with progress tracking
55
- let downloadedBytes = 0;
56
- const totalBytes = entry.file_size_bytes || 0;
57
- // Check if we can resume (partial file exists)
58
- let startByte = 0;
59
- if (existsSync(outputPath)) {
60
- const stats = statSync(outputPath);
61
- startByte = stats.size;
62
- if (startByte > 0 && startByte < totalBytes) {
63
- console.log(` Resuming from ${formatBytes(startByte)}...`);
64
- downloadedBytes = startByte;
65
- }
66
- }
67
- const response = await fetch(downloadUrl, {
68
- headers: startByte > 0 ? {
69
- 'Range': `bytes=${startByte}-`,
70
- } : {},
71
- });
72
- if (!response.ok) {
73
- if (response.status === 416) {
74
- // Range not satisfiable - file already fully downloaded
75
- if (existsSync(outputPath)) {
76
- console.log(` ✓ Download complete`);
77
- return outputPath;
78
- }
79
- }
80
- throw new Error(`Download failed: ${response.status} ${response.statusText}`);
81
- }
82
- const contentLength = response.headers.get('content-length');
83
- const totalSize = contentLength ? parseInt(contentLength, 10) + startByte : totalBytes;
84
- // Open file for writing (append if resuming)
85
- const fileStream = createWriteStream(outputPath, { flags: startByte > 0 ? 'a' : 'w' });
86
- // Stream response to file
87
- const reader = response.body?.getReader();
88
- if (!reader) {
89
- throw new Error('Response body is not readable');
90
- }
91
- try {
92
- while (true) {
93
- const { done, value } = await reader.read();
94
- if (done) {
95
- break;
96
- }
97
- fileStream.write(value);
98
- downloadedBytes += value.length;
99
- // Report progress
100
- if (onProgress) {
101
- onProgress(downloadedBytes, totalSize);
102
- }
103
- else if (totalSize > 0) {
104
- // Simple progress log every 10MB
105
- if (downloadedBytes % (10 * 1024 * 1024) < value.length) {
106
- const percent = ((downloadedBytes / totalSize) * 100).toFixed(1);
107
- process.stdout.write(`\r Progress: ${percent}% (${formatBytes(downloadedBytes)} / ${formatBytes(totalSize)})`);
108
- }
109
- }
110
- }
111
- fileStream.end();
112
- // Wait for file stream to finish
113
- await new Promise((resolve, reject) => {
114
- fileStream.on('finish', resolve);
115
- fileStream.on('error', reject);
116
- });
117
- if (totalSize > 0 && downloadedBytes !== totalSize) {
118
- throw new Error(`Download incomplete: ${downloadedBytes} bytes downloaded, expected ${totalSize}`);
119
- }
120
- if (onProgress) {
121
- // Clear progress line
122
- process.stdout.write('\r');
123
- }
124
- else {
125
- process.stdout.write('\r');
126
- }
127
- console.log(` ✓ Download complete (${formatBytes(downloadedBytes)})`);
128
- return outputPath;
129
- }
130
- catch (error) {
131
- fileStream.destroy();
132
- throw error;
133
- }
134
- }
135
- /**
136
- * Format bytes to human-readable string
137
- */
138
- function formatBytes(bytes) {
139
- if (bytes === 0)
140
- return '0 B';
141
- const k = 1024;
142
- const sizes = ['B', 'KB', 'MB', 'GB', 'TB'];
143
- const i = Math.floor(Math.log(bytes) / Math.log(k));
144
- return `${(bytes / Math.pow(k, i)).toFixed(1)} ${sizes[i]}`;
145
- }
@@ -1,80 +0,0 @@
1
- /**
2
- * Model Path Resolver
3
- *
4
- * Resolves model paths from policy using the model registry.
5
- * Downloads models if not cached.
6
- */
7
- import { loadRegistry, getModels } from '@auxot/model-registry';
8
- import { join } from 'path';
9
- import { homedir } from 'os';
10
- import { downloadModel } from './model-downloader.js';
11
- import { existsSync } from 'node:fs';
12
- /**
13
- * Get model entry from registry based on policy
14
- */
15
- export function getModelFromPolicy(policy) {
16
- const registry = loadRegistry();
17
- // Find model matching policy (model_name and quantization)
18
- const matchingModels = getModels(registry, {
19
- model_name: policy.model_name,
20
- });
21
- // Filter by quantization (case-insensitive)
22
- const model = matchingModels.find((m) => m.quantization.toLowerCase() === policy.quantization.toLowerCase());
23
- return model || null;
24
- }
25
- /**
26
- * Get model path from policy, downloading if necessary
27
- *
28
- * Returns the local path for the model file.
29
- * Downloads the model if it doesn't exist or is incomplete.
30
- *
31
- * @param policy GPU key policy
32
- * @param onProgress Optional progress callback (downloaded, total)
33
- * @returns Model file path, or null if model not found in registry
34
- */
35
- export async function ensureModelDownloaded(policy, onProgress) {
36
- const model = getModelFromPolicy(policy);
37
- if (!model) {
38
- console.error(` ✗ Model not found in registry: ${policy.model_name} (${policy.quantization})`);
39
- return null;
40
- }
41
- // Build model path
42
- const modelsDir = process.env.AUXOT_MODELS_DIR || join(homedir(), '.auxot', 'models');
43
- const modelDir = join(modelsDir, model.huggingface_id.replace('/', '_'));
44
- const modelPath = join(modelDir, model.file_name);
45
- // Check if model exists and is valid
46
- if (existsSync(modelPath)) {
47
- const { statSync } = await import('node:fs');
48
- const stats = statSync(modelPath);
49
- // If file size matches expected size, use cached model
50
- if (model.file_size_bytes && stats.size === model.file_size_bytes) {
51
- return modelPath;
52
- }
53
- }
54
- // Download model
55
- console.log(` Downloading model: ${model.model_name} (${model.quantization})`);
56
- try {
57
- await downloadModel(model, modelPath, onProgress);
58
- return modelPath;
59
- }
60
- catch (error) {
61
- console.error(` ✗ Download failed:`, error);
62
- throw error;
63
- }
64
- }
65
- /**
66
- * Get model path without downloading (for checking if model exists)
67
- *
68
- * @param policy GPU key policy
69
- * @returns Model file path, or null if model not found in registry
70
- */
71
- export function getModelPath(policy) {
72
- const model = getModelFromPolicy(policy);
73
- if (!model) {
74
- return null;
75
- }
76
- const modelsDir = process.env.AUXOT_MODELS_DIR || join(homedir(), '.auxot', 'models');
77
- const modelDir = join(modelsDir, model.huggingface_id.replace('/', '_'));
78
- const modelPath = join(modelDir, model.file_name);
79
- return modelPath;
80
- }
@@ -1,242 +0,0 @@
1
- /**
2
- * Policy Validation
3
- *
4
- * Validates worker capabilities against GPU key policy.
5
- */
6
- /**
7
- * Normalize model name for comparison
8
- * Extracts base model name + version + variant (removes quantization, parameters, file patterns)
9
- */
10
- function normalizeModelName(name) {
11
- // Remove common quantization suffixes
12
- let normalized = name
13
- .replace(/-GGUF$/i, '')
14
- .replace(/\.gguf$/i, '');
15
- // Remove path components (keep only filename)
16
- const parts = normalized.split('/');
17
- if (parts.length > 1) {
18
- normalized = parts[parts.length - 1];
19
- }
20
- // Remove multi-GGUF file patterns (e.g., "-00001-of-00003")
21
- normalized = normalized.replace(/-\d{5}-of-\d{5}$/i, '');
22
- // Extract version number (e.g., "3", "2.5", "4", "3.3")
23
- let version = '';
24
- const versionMatch = normalized.match(/^(Qwen|Llama|Ministral|Devstral|Gemma|DeepSeek|Granite|GPT-OSS)[-_]?(\d+(?:\.\d+)?)/i);
25
- if (versionMatch) {
26
- version = versionMatch[2];
27
- normalized = normalized.replace(new RegExp(`^${versionMatch[1]}[-_]?${versionMatch[2]}`, 'i'), versionMatch[1]);
28
- }
29
- // Extract variant (composite or single)
30
- let variant = '';
31
- const compositePatterns = [
32
- /-VL-Instruct$/i,
33
- /-VL-Thinking$/i,
34
- /-VL-Chat$/i,
35
- /-VL-Coder$/i,
36
- /-VL-Code$/i,
37
- ];
38
- for (const pattern of compositePatterns) {
39
- const match = normalized.match(pattern);
40
- if (match) {
41
- variant = match[0].replace(/^-/i, '');
42
- normalized = normalized.replace(pattern, '');
43
- break;
44
- }
45
- }
46
- if (!variant) {
47
- const singlePatterns = [
48
- /-Instruct$/i,
49
- /-Thinking$/i,
50
- /-Chat$/i,
51
- /-Coder$/i,
52
- /-Code$/i,
53
- /-VL$/i,
54
- /-Vision$/i,
55
- /-Maverick$/i,
56
- /-Scout$/i,
57
- /-Reasoning$/i,
58
- ];
59
- for (const pattern of singlePatterns) {
60
- const match = normalized.match(pattern);
61
- if (match) {
62
- variant = match[0].replace(/^-/i, '');
63
- normalized = normalized.replace(pattern, '');
64
- break;
65
- }
66
- }
67
- }
68
- // Remove quantization patterns (Q4_K_M, Q5_K_S, etc.)
69
- normalized = normalized.replace(/[-_]Q\d+[_\w]*/i, '');
70
- // Remove parameter counts (7B, 13B, 30B, etc.)
71
- normalized = normalized.replace(/[-_](\d+(?:\.\d+)?[BMK])(?![0-9])/i, '');
72
- // Remove expert counts (A22B, E2B, etc.)
73
- normalized = normalized.replace(/[-_]([AE]\d+[BMK])/i, '');
74
- normalized = normalized.replace(/[-_](\d+[AE])(?![0-9])/i, '');
75
- // Remove trailing version numbers that are NOT part of the model name
76
- normalized = normalized.replace(/[-_](\d{4,})$/i, ''); // Remove 4+ digit trailing numbers
77
- // Normalize base name
78
- const nameLower = normalized.toLowerCase();
79
- let base = '';
80
- if (nameLower.startsWith('qwen')) {
81
- base = 'Qwen';
82
- }
83
- else if (nameLower.startsWith('llama') || nameLower.startsWith('meta-llama')) {
84
- base = 'Llama';
85
- }
86
- else if (nameLower.startsWith('ministral') || nameLower.startsWith('devstral')) {
87
- base = 'Ministral';
88
- }
89
- else if (nameLower.startsWith('gemma')) {
90
- base = 'Gemma';
91
- }
92
- else if (nameLower.startsWith('deepseek')) {
93
- base = 'DeepSeek';
94
- }
95
- else if (nameLower.startsWith('granite')) {
96
- base = 'Granite';
97
- }
98
- else if (nameLower.startsWith('gpt-oss') || nameLower.startsWith('gptoss')) {
99
- base = 'GPT-OSS';
100
- }
101
- else {
102
- base = normalized.trim();
103
- }
104
- // Combine base + version + variant
105
- let result = base;
106
- if (version) {
107
- result += ` ${version}`;
108
- }
109
- if (variant) {
110
- result += `-${variant}`;
111
- }
112
- return result.trim();
113
- }
114
- /**
115
- * Extract quantization from model name or capabilities
116
- */
117
- function extractQuantization(capabilities) {
118
- // Try to extract from model name
119
- const model = capabilities.model || '';
120
- // Check for quantization patterns
121
- const quantPatterns = [
122
- 'Q3_K_S', 'Q4_K_S', 'Q5_K_S', 'Q6_K', 'Q8_0', 'Q8_K',
123
- 'F16', 'F32', 'BF16',
124
- ];
125
- for (const pattern of quantPatterns) {
126
- if (model.includes(pattern)) {
127
- return pattern;
128
- }
129
- }
130
- return null;
131
- }
132
- /**
133
- * Infer capabilities from model name
134
- */
135
- function inferCapabilitiesFromModel(modelName) {
136
- const name = modelName.toLowerCase();
137
- const capabilities = [];
138
- if (name.includes('vision') || name.includes('multimodal') || name.includes('vl-')) {
139
- capabilities.push('vision');
140
- }
141
- if (name.includes('code') || name.includes('coder') || name.includes('starcoder')) {
142
- capabilities.push('code');
143
- }
144
- if (name.includes('embed') || name.includes('embedding')) {
145
- capabilities.push('embedding');
146
- }
147
- if (capabilities.length === 0) {
148
- capabilities.push('chat');
149
- }
150
- return [...new Set(capabilities)];
151
- }
152
- /**
153
- * Validate worker capabilities against policy
154
- */
155
- /**
156
- * Parse parameter count from string (e.g., "7B" -> 7e9, "30B" -> 30e9)
157
- */
158
- function parseParameters(parameters) {
159
- const match = parameters.match(/^(\d+(?:\.\d+)?)(B|M|K)$/i);
160
- if (!match)
161
- return 0;
162
- const value = parseFloat(match[1]);
163
- const unit = match[2].toUpperCase();
164
- if (unit === 'B')
165
- return value * 1e9;
166
- if (unit === 'M')
167
- return value * 1e6;
168
- if (unit === 'K')
169
- return value * 1e3;
170
- return value;
171
- }
172
- export function validatePolicy(discoveredCapabilities, policy) {
173
- const errors = [];
174
- const warnings = []; // Declared but currently unused (reserved for future validation warnings)
175
- // 0. Check model size limit for CPU mode (warning only - binary download already handles this)
176
- // Note: This is just a warning since the policy comes from the server
177
- // The actual binary selection (GPU vs CPU) happens in llama-binary.ts
178
- // TODO: Add warnings here if needed in the future
179
- // 1. Model name match (normalized comparison)
180
- // Normalize both discovered and policy model names to base + version + variant
181
- const discoveredNormalized = normalizeModelName(discoveredCapabilities.model || '');
182
- const policyNormalized = normalizeModelName(policy.model_name);
183
- if (discoveredNormalized !== policyNormalized) {
184
- errors.push(`Model name mismatch: discovered "${discoveredCapabilities.model}" (normalized: "${discoveredNormalized}") ` +
185
- `does not match policy "${policy.model_name}" (normalized: "${policyNormalized}")`);
186
- }
187
- // 2. Context size >= policy.context_size
188
- const discoveredCtxSize = discoveredCapabilities.ctx_size || 0;
189
- if (discoveredCtxSize < policy.context_size) {
190
- errors.push(`Context size insufficient: discovered ${discoveredCtxSize} < required ${policy.context_size}`);
191
- }
192
- // 3. Quantization match (if specified in policy)
193
- const discoveredQuant = extractQuantization(discoveredCapabilities);
194
- if (discoveredQuant && discoveredQuant !== policy.quantization) {
195
- errors.push(`Quantization mismatch: discovered "${discoveredQuant}" does not match policy "${policy.quantization}"`);
196
- }
197
- // 4. Capabilities match (worker must have all required capabilities)
198
- const discoveredCaps = inferCapabilitiesFromModel(discoveredCapabilities.model || '');
199
- const missingCaps = policy.capabilities.filter((requiredCap) => !discoveredCaps.includes(requiredCap));
200
- if (missingCaps.length > 0) {
201
- errors.push(`Missing required capabilities: ${missingCaps.join(', ')}. ` +
202
- `Discovered: ${discoveredCaps.join(', ')}. ` +
203
- `Required: ${policy.capabilities.join(', ')}`);
204
- }
205
- // 5. Parameters match (if specified in policy)
206
- if (policy.parameters) {
207
- const discoveredParams = discoveredCapabilities.parameters;
208
- if (discoveredParams && discoveredParams !== policy.parameters) {
209
- errors.push(`Parameters mismatch: discovered "${discoveredParams}" does not match policy "${policy.parameters}"`);
210
- }
211
- }
212
- // 6. Family match (if specified in policy)
213
- if (policy.family) {
214
- // Infer family from model name or capabilities
215
- // MoE models have:
216
- // 1. "MoE" or "mixture-of-experts" in name
217
- // 2. Expert notation like "A22B", "E22B" (e.g., "235B-A22B" = 235B total, 22B experts)
218
- // 3. Multiple parameter counts separated by dashes (total-expert pattern)
219
- const modelName = (discoveredCapabilities.model || '').toLowerCase();
220
- // Check for explicit MoE indicators
221
- const hasMoEKeyword = modelName.includes('moe') || modelName.includes('mixture-of-experts');
222
- // Check for expert notation (A22B, E22B, etc.)
223
- const hasExpertNotation = /[ae]\d+[bmk]/i.test(modelName);
224
- // Check for total-expert parameter pattern (e.g., "235B-A22B", "70B-E2B")
225
- const hasTotalExpertPattern = /\d+[bmk]-[ae]\d+[bmk]/i.test(modelName);
226
- // Check for multiple large parameter counts separated by dashes
227
- const paramPatterns = modelName.match(/\d+[bmk]/gi) || [];
228
- const hasMultipleLargeParams = paramPatterns.length >= 2 &&
229
- paramPatterns.some(p => /^(\d{2,}|[0-9]+0)[bmk]$/i.test(p)); // 2+ digits or ends in 0
230
- const isMoE = hasMoEKeyword || hasExpertNotation || hasTotalExpertPattern || hasMultipleLargeParams;
231
- const discoveredFamily = isMoE ? 'MoE' : 'Dense';
232
- if (discoveredFamily !== policy.family) {
233
- errors.push(`Family mismatch: discovered "${discoveredFamily}" does not match policy "${policy.family}". ` +
234
- `Model name: "${discoveredCapabilities.model}"`);
235
- }
236
- }
237
- return {
238
- valid: errors.length === 0,
239
- errors,
240
- warnings: warnings.length > 0 ? warnings : undefined,
241
- };
242
- }
package/dist/types.js DELETED
@@ -1,4 +0,0 @@
1
- /**
2
- * Type definitions for worker CLI
3
- */
4
- export {};