node-mlx 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.js +639 -0
- package/dist/cli.js.map +1 -0
- package/dist/index.cjs +253 -0
- package/dist/index.cjs.map +1 -0
- package/dist/index.d.cts +113 -0
- package/dist/index.d.ts +113 -0
- package/dist/index.js +216 -0
- package/dist/index.js.map +1 -0
- package/package.json +89 -0
- package/prebuilds/darwin-arm64/node.node +0 -0
- package/swift/libNodeMLX.dylib +0 -0
- package/swift/mlx-swift_Cmlx.bundle/Contents/Info.plist +40 -0
- package/swift/mlx-swift_Cmlx.bundle/Contents/Resources/default.metallib +0 -0
- package/swift/mlx.metallib +0 -0
package/dist/cli.js
ADDED
|
@@ -0,0 +1,639 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
|
|
3
|
+
// src/cli.ts
|
|
4
|
+
import * as readline from "readline";
|
|
5
|
+
|
|
6
|
+
// src/index.ts
|
|
7
|
+
import { platform, arch } from "os";
|
|
8
|
+
import { join, dirname } from "path";
|
|
9
|
+
import { fileURLToPath } from "url";
|
|
10
|
+
import { existsSync, readFileSync } from "fs";
|
|
11
|
+
import { createRequire } from "module";
|
|
12
|
+
var __filename = fileURLToPath(import.meta.url);
|
|
13
|
+
var __dirname = dirname(__filename);
|
|
14
|
+
var require2 = createRequire(import.meta.url);
|
|
15
|
+
var packageJsonPath = join(__dirname, "..", "package.json");
|
|
16
|
+
var packageJson = JSON.parse(readFileSync(packageJsonPath, "utf-8"));
|
|
17
|
+
var VERSION = packageJson.version;
|
|
18
|
+
var binding = null;
|
|
19
|
+
var initialized = false;
|
|
20
|
+
function loadNativeAddon() {
|
|
21
|
+
try {
|
|
22
|
+
const gypBuild = require2("node-gyp-build");
|
|
23
|
+
const nativeDir = join(__dirname, "..", "native");
|
|
24
|
+
if (existsSync(join(__dirname, "..", "prebuilds"))) {
|
|
25
|
+
return gypBuild(join(__dirname, ".."));
|
|
26
|
+
}
|
|
27
|
+
if (existsSync(join(nativeDir, "build"))) {
|
|
28
|
+
return gypBuild(nativeDir);
|
|
29
|
+
}
|
|
30
|
+
} catch {
|
|
31
|
+
}
|
|
32
|
+
const possibleAddonPaths = [
|
|
33
|
+
// From package dist/ (npm installed)
|
|
34
|
+
join(__dirname, "..", "prebuilds", "darwin-arm64", "node.napi.node"),
|
|
35
|
+
// From native/build (local development)
|
|
36
|
+
join(__dirname, "..", "native", "build", "Release", "node_mlx.node"),
|
|
37
|
+
// From project root (monorepo development)
|
|
38
|
+
join(process.cwd(), "packages", "node-mlx", "native", "build", "Release", "node_mlx.node")
|
|
39
|
+
];
|
|
40
|
+
for (const p of possibleAddonPaths) {
|
|
41
|
+
if (existsSync(p)) {
|
|
42
|
+
return require2(p);
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
throw new Error(
|
|
46
|
+
`Native addon not found. Run 'pnpm build:native' first.
|
|
47
|
+
Searched paths:
|
|
48
|
+
${possibleAddonPaths.join("\n")}`
|
|
49
|
+
);
|
|
50
|
+
}
|
|
51
|
+
function findSwiftLibrary() {
|
|
52
|
+
const possibleDylibPaths = [
|
|
53
|
+
// From package swift/ (preferred - has metallib co-located)
|
|
54
|
+
join(__dirname, "..", "swift", "libNodeMLX.dylib"),
|
|
55
|
+
// From project root packages/node-mlx/swift/ (monorepo development)
|
|
56
|
+
join(process.cwd(), "packages", "node-mlx", "swift", "libNodeMLX.dylib"),
|
|
57
|
+
// Fallback to packages/swift/.build (monorepo dev)
|
|
58
|
+
join(__dirname, "..", "..", "swift", ".build", "release", "libNodeMLX.dylib"),
|
|
59
|
+
join(__dirname, "..", "..", "..", "swift", ".build", "release", "libNodeMLX.dylib"),
|
|
60
|
+
join(process.cwd(), "packages", "swift", ".build", "release", "libNodeMLX.dylib")
|
|
61
|
+
];
|
|
62
|
+
for (const p of possibleDylibPaths) {
|
|
63
|
+
if (existsSync(p)) {
|
|
64
|
+
return p;
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
throw new Error(
|
|
68
|
+
`Swift library not found. Run 'pnpm build:swift' first.
|
|
69
|
+
Searched paths:
|
|
70
|
+
${possibleDylibPaths.join("\n")}`
|
|
71
|
+
);
|
|
72
|
+
}
|
|
73
|
+
function loadBinding() {
|
|
74
|
+
if (binding && initialized) {
|
|
75
|
+
return binding;
|
|
76
|
+
}
|
|
77
|
+
if (platform() !== "darwin" || arch() !== "arm64") {
|
|
78
|
+
throw new Error("node-mlx is only supported on macOS Apple Silicon (arm64)");
|
|
79
|
+
}
|
|
80
|
+
binding = loadNativeAddon();
|
|
81
|
+
const dylibPath = findSwiftLibrary();
|
|
82
|
+
const success = binding.initialize(dylibPath);
|
|
83
|
+
if (!success) {
|
|
84
|
+
throw new Error("Failed to initialize node-mlx native library");
|
|
85
|
+
}
|
|
86
|
+
initialized = true;
|
|
87
|
+
return binding;
|
|
88
|
+
}
|
|
89
|
+
var RECOMMENDED_MODELS = {
|
|
90
|
+
// Qwen 2.5 (Alibaba) - Working with proper RoPE support
|
|
91
|
+
// Using non-quantized models - quantized models have loading issues
|
|
92
|
+
qwen: "Qwen/Qwen2.5-1.5B-Instruct",
|
|
93
|
+
"qwen-2.5": "Qwen/Qwen2.5-1.5B-Instruct",
|
|
94
|
+
"qwen-2.5-0.5b": "Qwen/Qwen2.5-0.5B-Instruct",
|
|
95
|
+
"qwen-2.5-1.5b": "Qwen/Qwen2.5-1.5B-Instruct",
|
|
96
|
+
"qwen-2.5-3b": "Qwen/Qwen2.5-3B-Instruct",
|
|
97
|
+
// Phi (Microsoft) - Working with fused QKV and RoPE
|
|
98
|
+
phi: "microsoft/phi-4",
|
|
99
|
+
// Default to latest
|
|
100
|
+
phi4: "microsoft/phi-4",
|
|
101
|
+
"phi-4": "microsoft/phi-4",
|
|
102
|
+
phi3: "microsoft/Phi-3-mini-4k-instruct",
|
|
103
|
+
"phi-3": "microsoft/Phi-3-mini-4k-instruct",
|
|
104
|
+
"phi-3-mini": "microsoft/Phi-3-mini-4k-instruct",
|
|
105
|
+
// Llama 3.2 (Meta) - Requires HuggingFace authentication
|
|
106
|
+
// Note: meta-llama models require accepting license at huggingface.co
|
|
107
|
+
llama: "meta-llama/Llama-3.2-1B-Instruct",
|
|
108
|
+
"llama-3.2": "meta-llama/Llama-3.2-1B-Instruct",
|
|
109
|
+
"llama-3.2-1b": "meta-llama/Llama-3.2-1B-Instruct",
|
|
110
|
+
"llama-3.2-3b": "meta-llama/Llama-3.2-3B-Instruct",
|
|
111
|
+
// Gemma 3 (Google) - Standard transformer architecture with sliding window
|
|
112
|
+
gemma: "mlx-community/gemma-3-1b-it-4bit",
|
|
113
|
+
"gemma-3": "mlx-community/gemma-3-1b-it-4bit",
|
|
114
|
+
"gemma-3-1b": "mlx-community/gemma-3-1b-it-4bit",
|
|
115
|
+
"gemma-3-1b-bf16": "mlx-community/gemma-3-1b-it-bf16",
|
|
116
|
+
"gemma-3-4b": "mlx-community/gemma-3-4b-it-4bit",
|
|
117
|
+
"gemma-3-4b-bf16": "mlx-community/gemma-3-4b-it-bf16",
|
|
118
|
+
"gemma-3-12b": "mlx-community/gemma-3-12b-it-4bit",
|
|
119
|
+
"gemma-3-27b": "mlx-community/gemma-3-27b-it-4bit"
|
|
120
|
+
};
|
|
121
|
+
function isPlatformSupported() {
|
|
122
|
+
return platform() === "darwin" && arch() === "arm64";
|
|
123
|
+
}
|
|
124
|
+
function isSupported() {
|
|
125
|
+
if (!isPlatformSupported()) {
|
|
126
|
+
return false;
|
|
127
|
+
}
|
|
128
|
+
try {
|
|
129
|
+
const b = loadBinding();
|
|
130
|
+
return b.isAvailable();
|
|
131
|
+
} catch {
|
|
132
|
+
return false;
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
function loadModel(modelId) {
|
|
136
|
+
const b = loadBinding();
|
|
137
|
+
const handle = b.loadModel(modelId);
|
|
138
|
+
return {
|
|
139
|
+
handle,
|
|
140
|
+
generate(prompt, options) {
|
|
141
|
+
const jsonStr = b.generate(handle, prompt, {
|
|
142
|
+
maxTokens: options?.maxTokens ?? 256,
|
|
143
|
+
temperature: options?.temperature ?? 0.7,
|
|
144
|
+
topP: options?.topP ?? 0.9,
|
|
145
|
+
repetitionPenalty: options?.repetitionPenalty ?? 0,
|
|
146
|
+
repetitionContextSize: options?.repetitionContextSize ?? 20
|
|
147
|
+
});
|
|
148
|
+
const result = JSON.parse(jsonStr);
|
|
149
|
+
if (!result.success) {
|
|
150
|
+
throw new Error(result.error ?? "Generation failed");
|
|
151
|
+
}
|
|
152
|
+
return {
|
|
153
|
+
text: result.text ?? "",
|
|
154
|
+
tokenCount: result.tokenCount ?? 0,
|
|
155
|
+
tokensPerSecond: result.tokensPerSecond ?? 0
|
|
156
|
+
};
|
|
157
|
+
},
|
|
158
|
+
generateStreaming(prompt, options) {
|
|
159
|
+
const jsonStr = b.generateStreaming(handle, prompt, {
|
|
160
|
+
maxTokens: options?.maxTokens ?? 256,
|
|
161
|
+
temperature: options?.temperature ?? 0.7,
|
|
162
|
+
topP: options?.topP ?? 0.9,
|
|
163
|
+
repetitionPenalty: options?.repetitionPenalty ?? 0,
|
|
164
|
+
repetitionContextSize: options?.repetitionContextSize ?? 20
|
|
165
|
+
});
|
|
166
|
+
const result = JSON.parse(jsonStr);
|
|
167
|
+
if (!result.success) {
|
|
168
|
+
throw new Error(result.error ?? "Generation failed");
|
|
169
|
+
}
|
|
170
|
+
return {
|
|
171
|
+
tokenCount: result.tokenCount ?? 0,
|
|
172
|
+
tokensPerSecond: result.tokensPerSecond ?? 0
|
|
173
|
+
};
|
|
174
|
+
},
|
|
175
|
+
generateWithImage(prompt, imagePath, options) {
|
|
176
|
+
const jsonStr = b.generateWithImage(handle, prompt, imagePath, {
|
|
177
|
+
maxTokens: options?.maxTokens ?? 256,
|
|
178
|
+
temperature: options?.temperature ?? 0.7,
|
|
179
|
+
topP: options?.topP ?? 0.9,
|
|
180
|
+
repetitionPenalty: options?.repetitionPenalty ?? 0,
|
|
181
|
+
repetitionContextSize: options?.repetitionContextSize ?? 20
|
|
182
|
+
});
|
|
183
|
+
const result = JSON.parse(jsonStr);
|
|
184
|
+
if (!result.success) {
|
|
185
|
+
throw new Error(result.error ?? "Generation failed");
|
|
186
|
+
}
|
|
187
|
+
return {
|
|
188
|
+
tokenCount: result.tokenCount ?? 0,
|
|
189
|
+
tokensPerSecond: result.tokensPerSecond ?? 0
|
|
190
|
+
};
|
|
191
|
+
},
|
|
192
|
+
isVLM() {
|
|
193
|
+
return b.isVLM(handle);
|
|
194
|
+
},
|
|
195
|
+
unload() {
|
|
196
|
+
b.unloadModel(handle);
|
|
197
|
+
}
|
|
198
|
+
};
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
// src/cli.ts
|
|
202
|
+
var colors = {
|
|
203
|
+
reset: "\x1B[0m",
|
|
204
|
+
bold: "\x1B[1m",
|
|
205
|
+
dim: "\x1B[2m",
|
|
206
|
+
cyan: "\x1B[36m",
|
|
207
|
+
green: "\x1B[32m",
|
|
208
|
+
yellow: "\x1B[33m",
|
|
209
|
+
magenta: "\x1B[35m",
|
|
210
|
+
red: "\x1B[31m"
|
|
211
|
+
};
|
|
212
|
+
function log(msg) {
|
|
213
|
+
console.log(msg);
|
|
214
|
+
}
|
|
215
|
+
function error(msg) {
|
|
216
|
+
console.error(`${colors.red}Error:${colors.reset} ${msg}`);
|
|
217
|
+
}
|
|
218
|
+
function printHeader() {
|
|
219
|
+
log("");
|
|
220
|
+
log(`${colors.bold}${colors.cyan}\u2554\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2557${colors.reset}`);
|
|
221
|
+
log(
|
|
222
|
+
`${colors.bold}${colors.cyan}\u2551${colors.reset} ${colors.bold}MLX CLI${colors.reset} - LLMs on Apple Silicon ${colors.cyan}\u2551${colors.reset}`
|
|
223
|
+
);
|
|
224
|
+
log(`${colors.bold}${colors.cyan}\u255A\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u255D${colors.reset}`);
|
|
225
|
+
log("");
|
|
226
|
+
}
|
|
227
|
+
function printHelp() {
|
|
228
|
+
log(`${colors.bold}Usage:${colors.reset}`);
|
|
229
|
+
log(` mlx Interactive chat`);
|
|
230
|
+
log(` mlx "prompt" One-shot generation`);
|
|
231
|
+
log(` mlx --model <name> Use specific model`);
|
|
232
|
+
log(` mlx --image <path> Include image (VLM only)`);
|
|
233
|
+
log(` mlx --repetition-penalty <1-2> Penalize repeated tokens (default: off)`);
|
|
234
|
+
log(` mlx --list List available models`);
|
|
235
|
+
log(` mlx --help Show this help`);
|
|
236
|
+
log("");
|
|
237
|
+
log(`${colors.bold}Vision models (VLM):${colors.reset}`);
|
|
238
|
+
log(` mlx --model gemma-3-4b --image photo.jpg "What's in this image?"`);
|
|
239
|
+
log("");
|
|
240
|
+
log(`${colors.bold}Repetition penalty (for models that repeat):${colors.reset}`);
|
|
241
|
+
log(` mlx --model gemma-3n --repetition-penalty 1.2 "Tell me about AI"`);
|
|
242
|
+
log("");
|
|
243
|
+
log(`${colors.bold}Interactive commands:${colors.reset}`);
|
|
244
|
+
log(` /model <name> Switch model`);
|
|
245
|
+
log(` /image <path> Set image for next prompt`);
|
|
246
|
+
log(` /temp <0-2> Set temperature`);
|
|
247
|
+
log(` /tokens <n> Set max tokens`);
|
|
248
|
+
log(` /rep <1-2> Set repetition penalty`);
|
|
249
|
+
log(` /clear Clear conversation`);
|
|
250
|
+
log(` /help Show commands`);
|
|
251
|
+
log(` /quit Exit`);
|
|
252
|
+
log("");
|
|
253
|
+
}
|
|
254
|
+
function printModels() {
|
|
255
|
+
log(`${colors.bold}Available models:${colors.reset}`);
|
|
256
|
+
log("");
|
|
257
|
+
const modelsByHfId = /* @__PURE__ */ new Map();
|
|
258
|
+
for (const [alias, hfId] of Object.entries(RECOMMENDED_MODELS)) {
|
|
259
|
+
if (!modelsByHfId.has(hfId)) {
|
|
260
|
+
modelsByHfId.set(hfId, []);
|
|
261
|
+
}
|
|
262
|
+
modelsByHfId.get(hfId)?.push(alias);
|
|
263
|
+
}
|
|
264
|
+
const families = [
|
|
265
|
+
{
|
|
266
|
+
name: "Phi (Microsoft)",
|
|
267
|
+
prefix: "Phi",
|
|
268
|
+
desc: "Reasoning & coding"
|
|
269
|
+
},
|
|
270
|
+
{
|
|
271
|
+
name: "Gemma (Google)",
|
|
272
|
+
prefix: "gemma",
|
|
273
|
+
desc: "Efficient on-device"
|
|
274
|
+
},
|
|
275
|
+
{
|
|
276
|
+
name: "Llama (Meta)",
|
|
277
|
+
prefix: "Llama",
|
|
278
|
+
desc: "General purpose"
|
|
279
|
+
},
|
|
280
|
+
{
|
|
281
|
+
name: "Qwen (Alibaba)",
|
|
282
|
+
prefix: "Qwen",
|
|
283
|
+
desc: "Multilingual"
|
|
284
|
+
},
|
|
285
|
+
{
|
|
286
|
+
name: "Mistral",
|
|
287
|
+
prefix: "Mistral",
|
|
288
|
+
desc: "Balanced performance"
|
|
289
|
+
},
|
|
290
|
+
{
|
|
291
|
+
name: "Ministral",
|
|
292
|
+
prefix: "Ministral",
|
|
293
|
+
desc: "Fast inference"
|
|
294
|
+
}
|
|
295
|
+
];
|
|
296
|
+
for (const family of families) {
|
|
297
|
+
const familyModels = Array.from(modelsByHfId.entries()).filter(
|
|
298
|
+
([hfId]) => hfId.toLowerCase().includes(family.prefix.toLowerCase())
|
|
299
|
+
);
|
|
300
|
+
if (familyModels.length === 0) continue;
|
|
301
|
+
log(`${colors.bold}${family.name}${colors.reset} ${colors.dim}\u2014 ${family.desc}${colors.reset}`);
|
|
302
|
+
for (const [hfId, aliases] of familyModels) {
|
|
303
|
+
const sortedAliases = aliases.sort((a, b) => a.length - b.length || a.localeCompare(b));
|
|
304
|
+
const primary = sortedAliases[0];
|
|
305
|
+
const others = sortedAliases.slice(1);
|
|
306
|
+
const aliasStr = others.length > 0 ? `${colors.green}${primary ?? ""}${colors.reset} ${colors.dim}(${others.join(", ")})${colors.reset}` : `${colors.green}${primary ?? ""}${colors.reset}`;
|
|
307
|
+
log(` ${aliasStr.padEnd(45)} ${colors.dim}${hfId}${colors.reset}`);
|
|
308
|
+
}
|
|
309
|
+
log("");
|
|
310
|
+
}
|
|
311
|
+
log(`${colors.dim}Or use any mlx-community model:${colors.reset}`);
|
|
312
|
+
log(` ${colors.cyan}node-mlx --model mlx-community/YourModel-4bit${colors.reset}`);
|
|
313
|
+
log("");
|
|
314
|
+
}
|
|
315
|
+
function resolveModel(name) {
|
|
316
|
+
if (name in RECOMMENDED_MODELS) {
|
|
317
|
+
return RECOMMENDED_MODELS[name];
|
|
318
|
+
}
|
|
319
|
+
if (name.includes("/")) {
|
|
320
|
+
return name;
|
|
321
|
+
}
|
|
322
|
+
return `mlx-community/${name}`;
|
|
323
|
+
}
|
|
324
|
+
function runInteractive(initialModel) {
|
|
325
|
+
const state = {
|
|
326
|
+
model: null,
|
|
327
|
+
modelName: initialModel,
|
|
328
|
+
options: {
|
|
329
|
+
maxTokens: 512,
|
|
330
|
+
temperature: 0.7,
|
|
331
|
+
topP: 0.9
|
|
332
|
+
},
|
|
333
|
+
history: [],
|
|
334
|
+
imagePath: null
|
|
335
|
+
};
|
|
336
|
+
log(`${colors.dim}Loading ${state.modelName}...${colors.reset}`);
|
|
337
|
+
const modelId = resolveModel(state.modelName);
|
|
338
|
+
try {
|
|
339
|
+
state.model = loadModel(modelId);
|
|
340
|
+
log(`${colors.green}\u2713${colors.reset} Model loaded`);
|
|
341
|
+
} catch (err) {
|
|
342
|
+
error(`Failed to load model: ${err instanceof Error ? err.message : String(err)}`);
|
|
343
|
+
process.exit(1);
|
|
344
|
+
}
|
|
345
|
+
log("");
|
|
346
|
+
log(`${colors.dim}Type your message or /help for commands${colors.reset}`);
|
|
347
|
+
log("");
|
|
348
|
+
const rl = readline.createInterface({
|
|
349
|
+
input: process.stdin,
|
|
350
|
+
output: process.stdout
|
|
351
|
+
});
|
|
352
|
+
const promptUser = () => {
|
|
353
|
+
rl.question(`${colors.cyan}You:${colors.reset} `, (input) => {
|
|
354
|
+
void handleUserInput(input, state, rl, promptUser);
|
|
355
|
+
});
|
|
356
|
+
};
|
|
357
|
+
const handleUserInput = async (input, state2, rl2, next) => {
|
|
358
|
+
const trimmed = input.trim();
|
|
359
|
+
if (!trimmed) {
|
|
360
|
+
next();
|
|
361
|
+
return;
|
|
362
|
+
}
|
|
363
|
+
if (trimmed.startsWith("/")) {
|
|
364
|
+
await handleCommand(trimmed, state2, rl2);
|
|
365
|
+
next();
|
|
366
|
+
return;
|
|
367
|
+
}
|
|
368
|
+
if (!state2.model) {
|
|
369
|
+
error("No model loaded");
|
|
370
|
+
next();
|
|
371
|
+
return;
|
|
372
|
+
}
|
|
373
|
+
const fullPrompt = buildPrompt(state2.history, trimmed);
|
|
374
|
+
state2.history.push({ role: "user", content: trimmed });
|
|
375
|
+
process.stdout.write(`${colors.magenta}AI:${colors.reset} `);
|
|
376
|
+
try {
|
|
377
|
+
let result;
|
|
378
|
+
if (state2.imagePath && state2.model.isVLM()) {
|
|
379
|
+
result = state2.model.generateWithImage(fullPrompt, state2.imagePath, state2.options);
|
|
380
|
+
state2.imagePath = null;
|
|
381
|
+
} else {
|
|
382
|
+
result = state2.model.generateStreaming(fullPrompt, state2.options);
|
|
383
|
+
}
|
|
384
|
+
log("");
|
|
385
|
+
log(
|
|
386
|
+
`${colors.dim}(${String(result.tokenCount)} tokens, ${result.tokensPerSecond.toFixed(1)} tok/s)${colors.reset}`
|
|
387
|
+
);
|
|
388
|
+
log("");
|
|
389
|
+
state2.history.push({ role: "assistant", content: "[streamed response]" });
|
|
390
|
+
} catch (err) {
|
|
391
|
+
log("");
|
|
392
|
+
error(err instanceof Error ? err.message : String(err));
|
|
393
|
+
}
|
|
394
|
+
next();
|
|
395
|
+
};
|
|
396
|
+
rl.on("close", () => {
|
|
397
|
+
log("");
|
|
398
|
+
log(`${colors.dim}Goodbye!${colors.reset}`);
|
|
399
|
+
if (state.model) {
|
|
400
|
+
state.model.unload();
|
|
401
|
+
}
|
|
402
|
+
process.exit(0);
|
|
403
|
+
});
|
|
404
|
+
promptUser();
|
|
405
|
+
}
|
|
406
|
+
function buildPrompt(history, current) {
|
|
407
|
+
let prompt = "";
|
|
408
|
+
for (const msg of history.slice(-6)) {
|
|
409
|
+
if (msg.role === "user") {
|
|
410
|
+
prompt += `User: ${msg.content}
|
|
411
|
+
`;
|
|
412
|
+
} else {
|
|
413
|
+
prompt += `Assistant: ${msg.content}
|
|
414
|
+
`;
|
|
415
|
+
}
|
|
416
|
+
}
|
|
417
|
+
prompt += `User: ${current}
|
|
418
|
+
Assistant:`;
|
|
419
|
+
return prompt;
|
|
420
|
+
}
|
|
421
|
+
async function handleCommand(input, state, rl) {
|
|
422
|
+
const [cmd, ...args] = input.slice(1).split(" ");
|
|
423
|
+
const arg = args.join(" ");
|
|
424
|
+
switch (cmd) {
|
|
425
|
+
case "help":
|
|
426
|
+
case "h":
|
|
427
|
+
printHelp();
|
|
428
|
+
break;
|
|
429
|
+
case "quit":
|
|
430
|
+
case "q":
|
|
431
|
+
case "exit":
|
|
432
|
+
rl.close();
|
|
433
|
+
break;
|
|
434
|
+
case "clear":
|
|
435
|
+
case "c":
|
|
436
|
+
state.history = [];
|
|
437
|
+
log(`${colors.dim}Conversation cleared${colors.reset}`);
|
|
438
|
+
break;
|
|
439
|
+
case "model":
|
|
440
|
+
case "m":
|
|
441
|
+
if (!arg) {
|
|
442
|
+
log(`${colors.dim}Current model: ${state.modelName}${colors.reset}`);
|
|
443
|
+
log(`${colors.dim}Use /model <name> to switch${colors.reset}`);
|
|
444
|
+
} else {
|
|
445
|
+
log(`${colors.dim}Loading ${arg}...${colors.reset}`);
|
|
446
|
+
if (state.model) {
|
|
447
|
+
state.model.unload();
|
|
448
|
+
}
|
|
449
|
+
try {
|
|
450
|
+
state.model = loadModel(resolveModel(arg));
|
|
451
|
+
state.modelName = arg;
|
|
452
|
+
state.history = [];
|
|
453
|
+
log(`${colors.green}\u2713${colors.reset} Switched to ${arg}`);
|
|
454
|
+
} catch (err) {
|
|
455
|
+
error(err instanceof Error ? err.message : String(err));
|
|
456
|
+
}
|
|
457
|
+
}
|
|
458
|
+
break;
|
|
459
|
+
case "temp":
|
|
460
|
+
case "t":
|
|
461
|
+
if (!arg) {
|
|
462
|
+
log(`${colors.dim}Temperature: ${String(state.options.temperature)}${colors.reset}`);
|
|
463
|
+
} else {
|
|
464
|
+
const temp = parseFloat(arg);
|
|
465
|
+
if (isNaN(temp) || temp < 0 || temp > 2) {
|
|
466
|
+
error("Temperature must be between 0 and 2");
|
|
467
|
+
} else {
|
|
468
|
+
state.options.temperature = temp;
|
|
469
|
+
log(`${colors.dim}Temperature set to ${String(temp)}${colors.reset}`);
|
|
470
|
+
}
|
|
471
|
+
}
|
|
472
|
+
break;
|
|
473
|
+
case "tokens":
|
|
474
|
+
case "n":
|
|
475
|
+
if (!arg) {
|
|
476
|
+
log(`${colors.dim}Max tokens: ${String(state.options.maxTokens)}${colors.reset}`);
|
|
477
|
+
} else {
|
|
478
|
+
const tokens = parseInt(arg, 10);
|
|
479
|
+
if (isNaN(tokens) || tokens < 1) {
|
|
480
|
+
error("Tokens must be a positive number");
|
|
481
|
+
} else {
|
|
482
|
+
state.options.maxTokens = tokens;
|
|
483
|
+
log(`${colors.dim}Max tokens set to ${String(tokens)}${colors.reset}`);
|
|
484
|
+
}
|
|
485
|
+
}
|
|
486
|
+
break;
|
|
487
|
+
case "rep":
|
|
488
|
+
case "r":
|
|
489
|
+
if (!arg) {
|
|
490
|
+
log(
|
|
491
|
+
`${colors.dim}Repetition penalty: ${state.options.repetitionPenalty != null ? String(state.options.repetitionPenalty) : "off"}${colors.reset}`
|
|
492
|
+
);
|
|
493
|
+
} else {
|
|
494
|
+
const penalty = parseFloat(arg);
|
|
495
|
+
if (isNaN(penalty) || penalty < 1 || penalty > 2) {
|
|
496
|
+
error("Repetition penalty must be between 1 and 2");
|
|
497
|
+
} else {
|
|
498
|
+
state.options.repetitionPenalty = penalty;
|
|
499
|
+
log(`${colors.dim}Repetition penalty set to ${String(penalty)}${colors.reset}`);
|
|
500
|
+
}
|
|
501
|
+
}
|
|
502
|
+
break;
|
|
503
|
+
case "list":
|
|
504
|
+
case "l":
|
|
505
|
+
printModels();
|
|
506
|
+
break;
|
|
507
|
+
case "image":
|
|
508
|
+
case "i":
|
|
509
|
+
if (!arg) {
|
|
510
|
+
if (state.imagePath) {
|
|
511
|
+
log(`${colors.dim}Current image: ${state.imagePath}${colors.reset}`);
|
|
512
|
+
} else {
|
|
513
|
+
log(`${colors.dim}No image set. Use /image <path> to set one.${colors.reset}`);
|
|
514
|
+
}
|
|
515
|
+
} else {
|
|
516
|
+
const fs = await import("fs");
|
|
517
|
+
if (!fs.existsSync(arg)) {
|
|
518
|
+
error(`Image not found: ${arg}`);
|
|
519
|
+
} else if (!state.model?.isVLM()) {
|
|
520
|
+
error(`Current model doesn't support images. Use a VLM like gemma-3-4b.`);
|
|
521
|
+
} else {
|
|
522
|
+
state.imagePath = arg;
|
|
523
|
+
log(`${colors.green}\u2713${colors.reset} Image set: ${arg}`);
|
|
524
|
+
log(`${colors.dim}The next message will include this image.${colors.reset}`);
|
|
525
|
+
}
|
|
526
|
+
}
|
|
527
|
+
break;
|
|
528
|
+
default:
|
|
529
|
+
error(`Unknown command: /${cmd ?? ""}. Type /help for commands.`);
|
|
530
|
+
}
|
|
531
|
+
}
|
|
532
|
+
function runOneShot(modelName, prompt, imagePath, options) {
|
|
533
|
+
log(`${colors.dim}Loading ${modelName}...${colors.reset}`);
|
|
534
|
+
const modelId = resolveModel(modelName);
|
|
535
|
+
try {
|
|
536
|
+
const model = loadModel(modelId);
|
|
537
|
+
let result;
|
|
538
|
+
if (imagePath) {
|
|
539
|
+
if (!model.isVLM()) {
|
|
540
|
+
error(`Model ${modelName} doesn't support images. Use a VLM like gemma-3-4b.`);
|
|
541
|
+
model.unload();
|
|
542
|
+
process.exit(1);
|
|
543
|
+
}
|
|
544
|
+
result = model.generateWithImage(prompt, imagePath, options);
|
|
545
|
+
} else {
|
|
546
|
+
result = model.generateStreaming(prompt, options);
|
|
547
|
+
}
|
|
548
|
+
log("");
|
|
549
|
+
log(
|
|
550
|
+
`${colors.dim}(${String(result.tokenCount)} tokens, ${result.tokensPerSecond.toFixed(1)} tok/s)${colors.reset}`
|
|
551
|
+
);
|
|
552
|
+
model.unload();
|
|
553
|
+
} catch (err) {
|
|
554
|
+
error(err instanceof Error ? err.message : String(err));
|
|
555
|
+
process.exit(1);
|
|
556
|
+
}
|
|
557
|
+
}
|
|
558
|
+
function parseArgs() {
|
|
559
|
+
const args = process.argv.slice(2);
|
|
560
|
+
let model = "qwen";
|
|
561
|
+
let prompt = null;
|
|
562
|
+
let imagePath = null;
|
|
563
|
+
const options = {
|
|
564
|
+
maxTokens: 512,
|
|
565
|
+
temperature: 0.7,
|
|
566
|
+
topP: 0.9
|
|
567
|
+
};
|
|
568
|
+
let command = "chat";
|
|
569
|
+
for (let i = 0; i < args.length; i++) {
|
|
570
|
+
const arg = args[i];
|
|
571
|
+
if (arg === "--help" || arg === "-h") {
|
|
572
|
+
command = "help";
|
|
573
|
+
} else if (arg === "--version" || arg === "-v") {
|
|
574
|
+
command = "version";
|
|
575
|
+
} else if (arg === "--list" || arg === "-l") {
|
|
576
|
+
command = "list";
|
|
577
|
+
} else if (arg === "--model" || arg === "-m") {
|
|
578
|
+
model = args[++i] || model;
|
|
579
|
+
} else if (arg === "--image" || arg === "-i") {
|
|
580
|
+
imagePath = args[++i] || null;
|
|
581
|
+
} else if (arg === "--temp" || arg === "-t") {
|
|
582
|
+
options.temperature = parseFloat(args[++i] || "0.7");
|
|
583
|
+
} else if (arg === "--tokens" || arg === "-n") {
|
|
584
|
+
options.maxTokens = parseInt(args[++i] || "512", 10);
|
|
585
|
+
} else if (arg === "--repetition-penalty" || arg === "-r") {
|
|
586
|
+
options.repetitionPenalty = parseFloat(args[++i] || "1.2");
|
|
587
|
+
} else if (arg && !arg.startsWith("-")) {
|
|
588
|
+
if (model === "qwen") {
|
|
589
|
+
model = arg;
|
|
590
|
+
} else if (prompt === null) {
|
|
591
|
+
prompt = arg;
|
|
592
|
+
command = "oneshot";
|
|
593
|
+
}
|
|
594
|
+
}
|
|
595
|
+
}
|
|
596
|
+
return { model, prompt, imagePath, options, command };
|
|
597
|
+
}
|
|
598
|
+
function main() {
|
|
599
|
+
const { model, prompt, imagePath, options, command } = parseArgs();
|
|
600
|
+
switch (command) {
|
|
601
|
+
case "help":
|
|
602
|
+
printHeader();
|
|
603
|
+
printHelp();
|
|
604
|
+
return;
|
|
605
|
+
case "version":
|
|
606
|
+
log(`node-mlx v${VERSION}`);
|
|
607
|
+
return;
|
|
608
|
+
case "list":
|
|
609
|
+
printHeader();
|
|
610
|
+
printModels();
|
|
611
|
+
return;
|
|
612
|
+
}
|
|
613
|
+
if (!isPlatformSupported()) {
|
|
614
|
+
error("node-mlx requires macOS on Apple Silicon (M1/M2/M3/M4)");
|
|
615
|
+
process.exit(1);
|
|
616
|
+
}
|
|
617
|
+
if (!isSupported()) {
|
|
618
|
+
error("Native libraries not found. Run 'pnpm build:swift && pnpm build:native' first.");
|
|
619
|
+
process.exit(1);
|
|
620
|
+
}
|
|
621
|
+
switch (command) {
|
|
622
|
+
case "oneshot":
|
|
623
|
+
if (prompt) {
|
|
624
|
+
runOneShot(model, prompt, imagePath, options);
|
|
625
|
+
}
|
|
626
|
+
break;
|
|
627
|
+
case "chat":
|
|
628
|
+
printHeader();
|
|
629
|
+
runInteractive(model);
|
|
630
|
+
break;
|
|
631
|
+
}
|
|
632
|
+
}
|
|
633
|
+
try {
|
|
634
|
+
main();
|
|
635
|
+
} catch (err) {
|
|
636
|
+
error(err instanceof Error ? err.message : String(err));
|
|
637
|
+
process.exit(1);
|
|
638
|
+
}
|
|
639
|
+
//# sourceMappingURL=cli.js.map
|