@mlx-node/core 0.0.0 → 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +106 -0
- package/index.cjs +71 -53
- package/index.d.cts +965 -280
- package/package.json +16 -5
package/index.d.cts
CHANGED
|
@@ -26,49 +26,81 @@ export declare class BatchGenerationResult {
|
|
|
26
26
|
get groupSize(): number;
|
|
27
27
|
}
|
|
28
28
|
|
|
29
|
+
/** Handle returned by `chat_stream()` to control an in-progress streaming generation. */
|
|
30
|
+
export declare class ChatStreamHandle {
|
|
31
|
+
cancel(): void;
|
|
32
|
+
}
|
|
33
|
+
|
|
29
34
|
/**
|
|
30
|
-
*
|
|
35
|
+
* PP-DocLayoutV3 full model for document layout analysis.
|
|
31
36
|
*
|
|
32
|
-
*
|
|
33
|
-
* -
|
|
34
|
-
* - Thinking/reasoning extracted from `<think>` tags
|
|
35
|
-
* - Clean text with all special tags stripped
|
|
37
|
+
* Combines HGNetV2 backbone, hybrid encoder, and RT-DETR decoder
|
|
38
|
+
* with mask-enhanced attention and reading order prediction.
|
|
36
39
|
*
|
|
37
|
-
*
|
|
38
|
-
*
|
|
39
|
-
* const result = await model.chat(messages, { tools });
|
|
40
|
-
* console.log(result.text); // Clean response
|
|
41
|
-
* console.log(result.thinking); // Chain-of-thought (if any)
|
|
42
|
-
* console.log(result.toolCalls); // Parsed tool calls
|
|
43
|
-
* ```
|
|
40
|
+
* Weights must be downloaded from `PaddlePaddle/PP-DocLayoutV3_safetensors` on HuggingFace.
|
|
41
|
+
* The regular `PaddlePaddle/PP-DocLayoutV3` repo uses PaddlePaddle format and is not compatible.
|
|
44
42
|
*/
|
|
45
|
-
export declare class
|
|
46
|
-
/** Get the cleaned text (tool_call and think tags removed) */
|
|
47
|
-
get text(): string;
|
|
48
|
-
/** Get the extracted tool calls */
|
|
49
|
-
get toolCalls(): Array<ToolCallResult>;
|
|
43
|
+
export declare class DocLayoutModel {
|
|
50
44
|
/**
|
|
51
|
-
*
|
|
45
|
+
* Load a PP-DocLayoutV3 model from a directory containing `config.json` and `model.safetensors`.
|
|
52
46
|
*
|
|
53
|
-
*
|
|
54
|
-
* no thinking tags were present in the response.
|
|
47
|
+
* The model directory should be cloned from `PaddlePaddle/PP-DocLayoutV3_safetensors` on HuggingFace.
|
|
55
48
|
*
|
|
56
|
-
*
|
|
57
|
-
* -
|
|
58
|
-
*
|
|
59
|
-
*
|
|
49
|
+
* # Arguments
|
|
50
|
+
* * `model_path` - Path to model directory
|
|
51
|
+
*
|
|
52
|
+
* # Returns
|
|
53
|
+
* * Initialized DocLayoutModel ready for inference
|
|
60
54
|
*/
|
|
61
|
-
|
|
62
|
-
/**
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
55
|
+
static load(modelPath: string): DocLayoutModel;
|
|
56
|
+
/**
|
|
57
|
+
* Detect document layout elements in an image.
|
|
58
|
+
*
|
|
59
|
+
* # Arguments
|
|
60
|
+
* * `image_data` - Encoded image bytes (PNG/JPEG)
|
|
61
|
+
* * `threshold` - Optional confidence threshold (default 0.5)
|
|
62
|
+
*
|
|
63
|
+
* # Returns
|
|
64
|
+
* * Vec of LayoutElements sorted by reading order
|
|
65
|
+
*/
|
|
66
|
+
detect(imageData: Buffer, threshold?: number | undefined | null): Array<LayoutElement>;
|
|
67
|
+
}
|
|
68
|
+
export type PPDocLayoutV3Model = DocLayoutModel;
|
|
69
|
+
|
|
70
|
+
/**
|
|
71
|
+
* PP-LCNet_x1_0 Document Orientation Classification model.
|
|
72
|
+
*
|
|
73
|
+
* Classifies document images into 4 orientation classes (0/90/180/270 degrees).
|
|
74
|
+
* Uses depthwise separable convolutions with HardSwish activation.
|
|
75
|
+
*/
|
|
76
|
+
export declare class DocOrientationModel {
|
|
77
|
+
/** Load a DocOrientationModel from a directory containing model.safetensors and config.json. */
|
|
78
|
+
static load(modelPath: string): DocOrientationModel;
|
|
79
|
+
/**
|
|
80
|
+
* Classify the orientation of a document image.
|
|
81
|
+
*
|
|
82
|
+
* Returns the detected orientation angle (0, 90, 180, 270) and confidence.
|
|
83
|
+
*/
|
|
84
|
+
classify(imageData: Buffer): OrientationResult;
|
|
85
|
+
/**
|
|
86
|
+
* Classify orientation and return the corrected (upright) image bytes.
|
|
87
|
+
*
|
|
88
|
+
* Returns classification result plus corrected PNG image bytes.
|
|
89
|
+
*/
|
|
90
|
+
classifyAndRotate(imageData: Buffer): ClassifyRotateResult;
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
/**
|
|
94
|
+
* UVDoc Document Unwarping model.
|
|
95
|
+
*
|
|
96
|
+
* Predicts a 2D displacement field and applies it to correct perspective
|
|
97
|
+
* distortion in camera-captured documents.
|
|
98
|
+
*/
|
|
99
|
+
export declare class DocUnwarpModel {
|
|
100
|
+
/** Load a DocUnwarpModel from a directory containing model.safetensors. */
|
|
101
|
+
static load(modelPath: string): DocUnwarpModel;
|
|
102
|
+
/** Unwarp a document image and return the corrected image bytes. */
|
|
103
|
+
unwarp(imageData: Buffer): UnwarpResult;
|
|
72
104
|
}
|
|
73
105
|
|
|
74
106
|
/** Result from text generation with detailed metadata */
|
|
@@ -79,8 +111,8 @@ export declare class GenerationResult {
|
|
|
79
111
|
get tokens(): MxArray;
|
|
80
112
|
/** Get the log probabilities */
|
|
81
113
|
get logprobs(): MxArray;
|
|
82
|
-
/** Get the finish reason ("
|
|
83
|
-
get finishReason(): '
|
|
114
|
+
/** Get the finish reason ("stop", "length", or "repetition") */
|
|
115
|
+
get finishReason(): 'stop' | 'length' | 'repetition';
|
|
84
116
|
/** Get the number of tokens generated */
|
|
85
117
|
get numTokens(): number;
|
|
86
118
|
}
|
|
@@ -92,13 +124,17 @@ export declare class GenerationResult {
|
|
|
92
124
|
*/
|
|
93
125
|
export declare class GrpoTrainingEngine {
|
|
94
126
|
/**
|
|
95
|
-
* Create a new training engine from
|
|
127
|
+
* Create a new training engine from a Qwen3 model
|
|
96
128
|
*
|
|
97
129
|
* # Arguments
|
|
98
130
|
* * `model` - The Qwen3 model to train (will be cloned internally)
|
|
99
131
|
* * `config` - Engine configuration
|
|
100
132
|
*/
|
|
101
133
|
constructor(model: Qwen3Model, config: GrpoEngineConfig);
|
|
134
|
+
/** Create a new training engine from a Qwen3.5 dense model */
|
|
135
|
+
static fromQwen35(model: Qwen3_5Model, config: GrpoEngineConfig): GrpoTrainingEngine;
|
|
136
|
+
/** Create a new training engine from a Qwen3.5 MoE model */
|
|
137
|
+
static fromQwen35Moe(model: Qwen3_5MoeModel, config: GrpoEngineConfig): GrpoTrainingEngine;
|
|
102
138
|
/** Register a built-in reward function */
|
|
103
139
|
registerBuiltinReward(config: BuiltinRewardConfig): void;
|
|
104
140
|
/**
|
|
@@ -203,10 +239,39 @@ export declare class GrpoTrainingEngine {
|
|
|
203
239
|
get nanGradientCount(): number;
|
|
204
240
|
/** Clear the emergency save flag (call after saving emergency checkpoint) */
|
|
205
241
|
clearEmergencySaveFlag(): void;
|
|
242
|
+
/**
|
|
243
|
+
* Save optimizer state (moment tensors + step) to a SafeTensors file.
|
|
244
|
+
*
|
|
245
|
+
* The step counter is stored in the `__metadata__` field.
|
|
246
|
+
* Each parameter's first moment (m) and second moment (v) are stored as
|
|
247
|
+
* `{param_name}.m` and `{param_name}.v` tensors.
|
|
248
|
+
*
|
|
249
|
+
* No-op if the engine uses SGD (no optimizer state to save).
|
|
250
|
+
*/
|
|
251
|
+
saveOptimizerState(path: string): void;
|
|
252
|
+
/**
|
|
253
|
+
* Load optimizer state (moment tensors + step) from a SafeTensors file.
|
|
254
|
+
*
|
|
255
|
+
* Restores the step counter from metadata and sets first/second moment
|
|
256
|
+
* tensors for each parameter found in the file.
|
|
257
|
+
*
|
|
258
|
+
* No-op if the engine uses SGD (no optimizer to restore).
|
|
259
|
+
*/
|
|
260
|
+
loadOptimizerState(path: string): void;
|
|
206
261
|
}
|
|
207
262
|
export type GRPOTrainingEngine = GrpoTrainingEngine;
|
|
208
263
|
|
|
209
264
|
export declare class MxArray {
|
|
265
|
+
equal(other: MxArray): MxArray;
|
|
266
|
+
notEqual(other: MxArray): MxArray;
|
|
267
|
+
less(other: MxArray): MxArray;
|
|
268
|
+
lessEqual(other: MxArray): MxArray;
|
|
269
|
+
greater(other: MxArray): MxArray;
|
|
270
|
+
greaterEqual(other: MxArray): MxArray;
|
|
271
|
+
logicalAnd(other: MxArray): MxArray;
|
|
272
|
+
logicalOr(other: MxArray): MxArray;
|
|
273
|
+
logicalNot(): MxArray;
|
|
274
|
+
where(x: MxArray, y: MxArray): MxArray;
|
|
210
275
|
static fromInt32(data: Int32Array, shape: BigInt64Array): MxArray;
|
|
211
276
|
static fromInt64(data: BigInt64Array, shape: BigInt64Array): MxArray;
|
|
212
277
|
static fromUint32(data: Uint32Array, shape: BigInt64Array): MxArray;
|
|
@@ -234,60 +299,12 @@ export declare class MxArray {
|
|
|
234
299
|
step?: number | undefined | null,
|
|
235
300
|
dtype?: DType | undefined | null,
|
|
236
301
|
): MxArray;
|
|
237
|
-
reshape(shape: BigInt64Array): MxArray;
|
|
238
302
|
astype(dtype: DType): MxArray;
|
|
239
303
|
/**
|
|
240
304
|
* Create a copy of this array with a new handle.
|
|
241
305
|
* This is useful for parameter loading to avoid handle aliasing issues.
|
|
242
306
|
*/
|
|
243
307
|
copy(): MxArray;
|
|
244
|
-
logSoftmax(axis: number): MxArray;
|
|
245
|
-
exp(): MxArray;
|
|
246
|
-
log(): MxArray;
|
|
247
|
-
sum(axes?: Int32Array | undefined | null, keepdims?: boolean | undefined | null): MxArray;
|
|
248
|
-
mean(axes?: Int32Array | undefined | null, keepdims?: boolean | undefined | null): MxArray;
|
|
249
|
-
clip(minimum?: number | undefined | null, maximum?: number | undefined | null): MxArray;
|
|
250
|
-
minimum(other: MxArray): MxArray;
|
|
251
|
-
maximum(other: MxArray): MxArray;
|
|
252
|
-
add(other: MxArray): MxArray;
|
|
253
|
-
sub(other: MxArray): MxArray;
|
|
254
|
-
mul(other: MxArray): MxArray;
|
|
255
|
-
div(other: MxArray): MxArray;
|
|
256
|
-
addScalar(value: number): MxArray;
|
|
257
|
-
mulScalar(value: number): MxArray;
|
|
258
|
-
subScalar(value: number): MxArray;
|
|
259
|
-
divScalar(value: number): MxArray;
|
|
260
|
-
matmul(other: MxArray): MxArray;
|
|
261
|
-
/**
|
|
262
|
-
* Fused matrix multiply-add: D = beta * C + alpha * (self @ B)
|
|
263
|
-
* where self is A. More efficient than separate matmul and add operations.
|
|
264
|
-
* Default: alpha=1.0, beta=1.0, giving D = C + (self @ B)
|
|
265
|
-
*/
|
|
266
|
-
addmm(c: MxArray, b: MxArray, alpha?: number | undefined | null, beta?: number | undefined | null): MxArray;
|
|
267
|
-
transpose(axes?: Int32Array | undefined | null): MxArray;
|
|
268
|
-
take(indices: MxArray, axis: number): MxArray;
|
|
269
|
-
takeAlongAxis(indices: MxArray, axis: number): MxArray;
|
|
270
|
-
/**
|
|
271
|
-
* Put values into array at specified indices along an axis
|
|
272
|
-
* Equivalent to: result = array.copy(); result[..., indices] = values
|
|
273
|
-
* This matches MLX's put_along_axis for efficient in-place-style updates
|
|
274
|
-
*/
|
|
275
|
-
putAlongAxis(indices: MxArray, values: MxArray, axis: number): MxArray;
|
|
276
|
-
slice(starts: BigInt64Array, stops: BigInt64Array): MxArray;
|
|
277
|
-
/**
|
|
278
|
-
* Concatenate two arrays along an axis
|
|
279
|
-
* Optimized for the common binary concatenation case
|
|
280
|
-
*/
|
|
281
|
-
static concatenate(a: MxArray, b: MxArray, axis: number): MxArray;
|
|
282
|
-
/**
|
|
283
|
-
* Concatenate multiple arrays along an axis
|
|
284
|
-
* For concatenating 3 or more arrays
|
|
285
|
-
*/
|
|
286
|
-
static concatenateMany(arrays: Array<MxArray>, axis?: number | undefined | null): MxArray;
|
|
287
|
-
sort(axis?: number | undefined | null): MxArray;
|
|
288
|
-
argsort(axis?: number | undefined | null): MxArray;
|
|
289
|
-
partition(kth: number, axis?: number | undefined | null): MxArray;
|
|
290
|
-
argpartition(kth: number, axis?: number | undefined | null): MxArray;
|
|
291
308
|
eval(): void;
|
|
292
309
|
evalAsync(): Promise<undefined>;
|
|
293
310
|
size(): bigint;
|
|
@@ -314,7 +331,7 @@ export declare class MxArray {
|
|
|
314
331
|
/**
|
|
315
332
|
* Copy entire array from GPU to CPU as Float32Array
|
|
316
333
|
*
|
|
317
|
-
*
|
|
334
|
+
* **PERFORMANCE WARNING**: This triggers a FULL GPU->CPU memory transfer!
|
|
318
335
|
*
|
|
319
336
|
* **Performance impact**:
|
|
320
337
|
* - Forces evaluation of lazy operations
|
|
@@ -335,7 +352,7 @@ export declare class MxArray {
|
|
|
335
352
|
/**
|
|
336
353
|
* Copy entire array from GPU to CPU as Int32Array
|
|
337
354
|
*
|
|
338
|
-
*
|
|
355
|
+
* **PERFORMANCE WARNING**: This triggers a FULL GPU->CPU memory transfer!
|
|
339
356
|
*
|
|
340
357
|
* See `to_float32()` documentation for performance implications and alternatives.
|
|
341
358
|
* Prefer `item_int32()` for scalars.
|
|
@@ -344,57 +361,32 @@ export declare class MxArray {
|
|
|
344
361
|
/**
|
|
345
362
|
* Copy entire array from GPU to CPU as Uint32Array
|
|
346
363
|
*
|
|
347
|
-
*
|
|
364
|
+
* **PERFORMANCE WARNING**: This triggers a FULL GPU->CPU memory transfer!
|
|
348
365
|
*
|
|
349
366
|
* See `to_float32()` documentation for performance implications and alternatives.
|
|
350
367
|
*/
|
|
351
368
|
toUint32(): Uint32Array;
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
369
|
+
logSoftmax(axis: number): MxArray;
|
|
370
|
+
exp(): MxArray;
|
|
371
|
+
log(): MxArray;
|
|
372
|
+
clip(minimum?: number | undefined | null, maximum?: number | undefined | null): MxArray;
|
|
373
|
+
minimum(other: MxArray): MxArray;
|
|
374
|
+
maximum(other: MxArray): MxArray;
|
|
375
|
+
add(other: MxArray): MxArray;
|
|
376
|
+
sub(other: MxArray): MxArray;
|
|
377
|
+
mul(other: MxArray): MxArray;
|
|
378
|
+
div(other: MxArray): MxArray;
|
|
379
|
+
addScalar(value: number): MxArray;
|
|
380
|
+
mulScalar(value: number): MxArray;
|
|
381
|
+
subScalar(value: number): MxArray;
|
|
382
|
+
divScalar(value: number): MxArray;
|
|
383
|
+
matmul(other: MxArray): MxArray;
|
|
357
384
|
/**
|
|
358
|
-
*
|
|
359
|
-
*
|
|
385
|
+
* Fused matrix multiply-add: D = beta * C + alpha * (self @ B)
|
|
386
|
+
* where self is A. More efficient than separate matmul and add operations.
|
|
387
|
+
* Default: alpha=1.0, beta=1.0, giving D = C + (self @ B)
|
|
360
388
|
*/
|
|
361
|
-
|
|
362
|
-
equal(other: MxArray): MxArray;
|
|
363
|
-
notEqual(other: MxArray): MxArray;
|
|
364
|
-
less(other: MxArray): MxArray;
|
|
365
|
-
lessEqual(other: MxArray): MxArray;
|
|
366
|
-
greater(other: MxArray): MxArray;
|
|
367
|
-
greaterEqual(other: MxArray): MxArray;
|
|
368
|
-
logicalAnd(other: MxArray): MxArray;
|
|
369
|
-
logicalOr(other: MxArray): MxArray;
|
|
370
|
-
logicalNot(): MxArray;
|
|
371
|
-
where(x: MxArray, y: MxArray): MxArray;
|
|
372
|
-
argmax(axis: number, keepdims?: boolean | undefined | null): MxArray;
|
|
373
|
-
argmin(axis: number, keepdims?: boolean | undefined | null): MxArray;
|
|
374
|
-
max(axes?: Int32Array | undefined | null, keepdims?: boolean | undefined | null): MxArray;
|
|
375
|
-
min(axes?: Int32Array | undefined | null, keepdims?: boolean | undefined | null): MxArray;
|
|
376
|
-
prod(axes?: Int32Array | undefined | null, keepdims?: boolean | undefined | null): MxArray;
|
|
377
|
-
var(
|
|
378
|
-
axes?: Int32Array | undefined | null,
|
|
379
|
-
keepdims?: boolean | undefined | null,
|
|
380
|
-
ddof?: number | undefined | null,
|
|
381
|
-
): MxArray;
|
|
382
|
-
std(
|
|
383
|
-
axes?: Int32Array | undefined | null,
|
|
384
|
-
keepdims?: boolean | undefined | null,
|
|
385
|
-
ddof?: number | undefined | null,
|
|
386
|
-
): MxArray;
|
|
387
|
-
logsumexp(axes?: Int32Array | undefined | null, keepdims?: boolean | undefined | null): MxArray;
|
|
388
|
-
cumsum(axis: number): MxArray;
|
|
389
|
-
cumprod(axis: number): MxArray;
|
|
390
|
-
pad(padWidth: Int32Array, constantValue: number): MxArray;
|
|
391
|
-
roll(shift: number, axis: number): MxArray;
|
|
392
|
-
split(indicesOrSections: number, axis?: number | undefined | null): Array<MxArray>;
|
|
393
|
-
tile(reps: Int32Array): MxArray;
|
|
394
|
-
repeat(repeats: number, axis: number): MxArray;
|
|
395
|
-
squeeze(axes?: Int32Array | undefined | null): MxArray;
|
|
396
|
-
expandDims(axis: number): MxArray;
|
|
397
|
-
broadcastTo(shape: BigInt64Array): MxArray;
|
|
389
|
+
addmm(c: MxArray, b: MxArray, alpha?: number | undefined | null, beta?: number | undefined | null): MxArray;
|
|
398
390
|
abs(): MxArray;
|
|
399
391
|
negative(): MxArray;
|
|
400
392
|
sign(): MxArray;
|
|
@@ -440,6 +432,69 @@ export declare class MxArray {
|
|
|
440
432
|
* This is a GPU-native operation that avoids CPU data transfer.
|
|
441
433
|
*/
|
|
442
434
|
isfinite(): MxArray;
|
|
435
|
+
static randomUniform(shape: BigInt64Array, low: number, high: number, dtype?: DType | undefined | null): MxArray;
|
|
436
|
+
static randomNormal(shape: BigInt64Array, mean: number, std: number, dtype?: DType | undefined | null): MxArray;
|
|
437
|
+
static randomBernoulli(shape: BigInt64Array, prob: number): MxArray;
|
|
438
|
+
static randint(shape: BigInt64Array, low: number, high: number): MxArray;
|
|
439
|
+
/**
|
|
440
|
+
* Sample from categorical distribution
|
|
441
|
+
* Takes logits and returns sampled indices
|
|
442
|
+
*/
|
|
443
|
+
categorical(axis?: number | undefined | null): MxArray;
|
|
444
|
+
sum(axes?: Int32Array | undefined | null, keepdims?: boolean | undefined | null): MxArray;
|
|
445
|
+
mean(axes?: Int32Array | undefined | null, keepdims?: boolean | undefined | null): MxArray;
|
|
446
|
+
argmax(axis: number, keepdims?: boolean | undefined | null): MxArray;
|
|
447
|
+
argmin(axis: number, keepdims?: boolean | undefined | null): MxArray;
|
|
448
|
+
max(axes?: Int32Array | undefined | null, keepdims?: boolean | undefined | null): MxArray;
|
|
449
|
+
min(axes?: Int32Array | undefined | null, keepdims?: boolean | undefined | null): MxArray;
|
|
450
|
+
prod(axes?: Int32Array | undefined | null, keepdims?: boolean | undefined | null): MxArray;
|
|
451
|
+
var(
|
|
452
|
+
axes?: Int32Array | undefined | null,
|
|
453
|
+
keepdims?: boolean | undefined | null,
|
|
454
|
+
ddof?: number | undefined | null,
|
|
455
|
+
): MxArray;
|
|
456
|
+
std(
|
|
457
|
+
axes?: Int32Array | undefined | null,
|
|
458
|
+
keepdims?: boolean | undefined | null,
|
|
459
|
+
ddof?: number | undefined | null,
|
|
460
|
+
): MxArray;
|
|
461
|
+
logsumexp(axes?: Int32Array | undefined | null, keepdims?: boolean | undefined | null): MxArray;
|
|
462
|
+
cumsum(axis: number): MxArray;
|
|
463
|
+
cumprod(axis: number): MxArray;
|
|
464
|
+
reshape(shape: BigInt64Array): MxArray;
|
|
465
|
+
transpose(axes?: Int32Array | undefined | null): MxArray;
|
|
466
|
+
take(indices: MxArray, axis: number): MxArray;
|
|
467
|
+
takeAlongAxis(indices: MxArray, axis: number): MxArray;
|
|
468
|
+
/**
|
|
469
|
+
* Put values into array at specified indices along an axis
|
|
470
|
+
* Equivalent to: result = array.copy(); result[..., indices] = values
|
|
471
|
+
* This matches MLX's put_along_axis for efficient in-place-style updates
|
|
472
|
+
*/
|
|
473
|
+
putAlongAxis(indices: MxArray, values: MxArray, axis: number): MxArray;
|
|
474
|
+
slice(starts: BigInt64Array, stops: BigInt64Array): MxArray;
|
|
475
|
+
/**
|
|
476
|
+
* Concatenate two arrays along an axis
|
|
477
|
+
* Optimized for the common binary concatenation case
|
|
478
|
+
*/
|
|
479
|
+
static concatenate(a: MxArray, b: MxArray, axis: number): MxArray;
|
|
480
|
+
/**
|
|
481
|
+
* Concatenate multiple arrays along an axis
|
|
482
|
+
* For concatenating 3 or more arrays
|
|
483
|
+
*/
|
|
484
|
+
static concatenateMany(arrays: Array<MxArray>, axis?: number | undefined | null): MxArray;
|
|
485
|
+
sort(axis?: number | undefined | null): MxArray;
|
|
486
|
+
argsort(axis?: number | undefined | null): MxArray;
|
|
487
|
+
partition(kth: number, axis?: number | undefined | null): MxArray;
|
|
488
|
+
argpartition(kth: number, axis?: number | undefined | null): MxArray;
|
|
489
|
+
static stack(arrays: Array<MxArray>, axis?: number | undefined | null): MxArray;
|
|
490
|
+
pad(padWidth: Int32Array, constantValue: number): MxArray;
|
|
491
|
+
roll(shift: number, axis: number): MxArray;
|
|
492
|
+
split(indicesOrSections: number, axis?: number | undefined | null): Array<MxArray>;
|
|
493
|
+
tile(reps: Int32Array): MxArray;
|
|
494
|
+
repeat(repeats: number, axis: number): MxArray;
|
|
495
|
+
squeeze(axes?: Int32Array | undefined | null): MxArray;
|
|
496
|
+
expandDims(axis: number): MxArray;
|
|
497
|
+
broadcastTo(shape: BigInt64Array): MxArray;
|
|
443
498
|
}
|
|
444
499
|
|
|
445
500
|
/** NAPI-exported reward registry wrapper */
|
|
@@ -584,6 +639,128 @@ export declare class OutputStore {
|
|
|
584
639
|
queryRaw(sql: string): Promise<string>;
|
|
585
640
|
}
|
|
586
641
|
|
|
642
|
+
/**
|
|
643
|
+
* Qwen3.5 Model -- hybrid linear/full attention with optional MoE.
|
|
644
|
+
*
|
|
645
|
+
* Uses interior mutability (RwLock) for layers, final_norm, lm_head, and caches
|
|
646
|
+
* to allow async generation via spawn_blocking without blocking the Node.js event loop.
|
|
647
|
+
* This matches the pattern used by Qwen3Model.
|
|
648
|
+
*/
|
|
649
|
+
export declare class Qwen35Model {
|
|
650
|
+
/** Create a new Qwen3.5 model with the given configuration. */
|
|
651
|
+
constructor(config: Qwen35Config);
|
|
652
|
+
/** Initialize caches for incremental generation. */
|
|
653
|
+
initCaches(): void;
|
|
654
|
+
/** Reset all caches. */
|
|
655
|
+
resetCaches(): void;
|
|
656
|
+
/**
|
|
657
|
+
* Forward pass through the model.
|
|
658
|
+
*
|
|
659
|
+
* # Arguments
|
|
660
|
+
* * `input_ids` - Token IDs [B, T]
|
|
661
|
+
*
|
|
662
|
+
* # Returns
|
|
663
|
+
* Logits [B, T, vocab_size]
|
|
664
|
+
*/
|
|
665
|
+
forward(inputIds: MxArray): MxArray;
|
|
666
|
+
/** Forward pass with cache for incremental generation. */
|
|
667
|
+
forwardWithCache(inputIds: MxArray): MxArray;
|
|
668
|
+
/**
|
|
669
|
+
* Load a pretrained model from a directory.
|
|
670
|
+
*
|
|
671
|
+
* Expects the directory to contain:
|
|
672
|
+
* - config.json
|
|
673
|
+
* - model.safetensors (or model-*.safetensors)
|
|
674
|
+
* - tokenizer.json + tokenizer_config.json
|
|
675
|
+
*/
|
|
676
|
+
static load(path: string): Promise<Qwen35Model>;
|
|
677
|
+
/**
|
|
678
|
+
* Generate text from a prompt token sequence.
|
|
679
|
+
*
|
|
680
|
+
* Runs generation on a worker thread via spawn_blocking to avoid
|
|
681
|
+
* blocking the Node.js event loop.
|
|
682
|
+
*/
|
|
683
|
+
generate(promptTokens: MxArray, config: Qwen35GenerationConfig): Promise<Qwen35GenerationResult>;
|
|
684
|
+
/**
|
|
685
|
+
* Chat API with tool calling support.
|
|
686
|
+
*
|
|
687
|
+
* Runs tokenization + generation on a worker thread via spawn_blocking
|
|
688
|
+
* to avoid blocking the Node.js event loop.
|
|
689
|
+
*/
|
|
690
|
+
chat(messages: Array<ChatMessage>, config?: ChatConfig | undefined | null): Promise<ChatResult>;
|
|
691
|
+
/**
|
|
692
|
+
* Streaming chat API with tool calling support.
|
|
693
|
+
*
|
|
694
|
+
* Same as `chat()` but streams tokens one-by-one via the callback.
|
|
695
|
+
* Returns a `ChatStreamHandle` immediately; generation runs in background.
|
|
696
|
+
* Call `handle.cancel()` to abort generation early.
|
|
697
|
+
*/
|
|
698
|
+
chatStream(
|
|
699
|
+
messages: ChatMessage[],
|
|
700
|
+
config: ChatConfig | null,
|
|
701
|
+
callback: (err: Error | null, chunk: ChatStreamChunk) => void,
|
|
702
|
+
): Promise<ChatStreamHandle>;
|
|
703
|
+
/** Get the number of parameters in the model. */
|
|
704
|
+
numParameters(): number;
|
|
705
|
+
/**
|
|
706
|
+
* Save the model weights and configuration to a directory.
|
|
707
|
+
*
|
|
708
|
+
* This saves:
|
|
709
|
+
* - config.json: Model configuration (with model_type for detectModelType)
|
|
710
|
+
* - weights.safetensors: Full model weights in SafeTensors format
|
|
711
|
+
* - weights.mlx: Parameter metadata (for reference)
|
|
712
|
+
*
|
|
713
|
+
* # Arguments
|
|
714
|
+
* * `save_path` - Directory to save the model
|
|
715
|
+
*/
|
|
716
|
+
saveModel(savePath: string): Promise<undefined>;
|
|
717
|
+
}
|
|
718
|
+
export type Qwen3_5Model = Qwen35Model;
|
|
719
|
+
|
|
720
|
+
/**
|
|
721
|
+
* Qwen3.5 MoE Model -- hybrid linear/full attention with Mixture-of-Experts.
|
|
722
|
+
*
|
|
723
|
+
* Supports C++ MoE forward path (non-compiled, builds fresh graph per step)
|
|
724
|
+
* when weights are registered via `register_moe_weights_with_cpp`.
|
|
725
|
+
* Falls back to Rust forward_inner path for test models without stored weights.
|
|
726
|
+
*/
|
|
727
|
+
export declare class Qwen35MoeModel {
|
|
728
|
+
constructor(config: Qwen35MoeConfig);
|
|
729
|
+
initCaches(): void;
|
|
730
|
+
resetCaches(): void;
|
|
731
|
+
forward(inputIds: MxArray): MxArray;
|
|
732
|
+
forwardWithCache(inputIds: MxArray): MxArray;
|
|
733
|
+
static load(path: string): Promise<Qwen35MoeModel>;
|
|
734
|
+
generate(promptTokens: MxArray, config: Qwen35MoeGenerationConfig): Promise<Qwen35MoeGenerationResult>;
|
|
735
|
+
chat(messages: Array<ChatMessage>, config?: ChatConfig | undefined | null): Promise<ChatResult>;
|
|
736
|
+
/**
|
|
737
|
+
* Streaming chat API with tool calling support.
|
|
738
|
+
*
|
|
739
|
+
* Same as `chat()` but streams tokens one-by-one via the callback.
|
|
740
|
+
* Returns a `ChatStreamHandle` immediately; generation runs in background.
|
|
741
|
+
* Call `handle.cancel()` to abort generation early.
|
|
742
|
+
*/
|
|
743
|
+
chatStream(
|
|
744
|
+
messages: ChatMessage[],
|
|
745
|
+
config: ChatConfig | null,
|
|
746
|
+
callback: (err: Error | null, chunk: ChatStreamChunk) => void,
|
|
747
|
+
): Promise<ChatStreamHandle>;
|
|
748
|
+
numParameters(): number;
|
|
749
|
+
/**
|
|
750
|
+
* Save the model weights and configuration to a directory.
|
|
751
|
+
*
|
|
752
|
+
* This saves:
|
|
753
|
+
* - config.json: Model configuration (with model_type for detectModelType)
|
|
754
|
+
* - weights.safetensors: Full model weights in SafeTensors format
|
|
755
|
+
* - weights.mlx: Parameter metadata (for reference)
|
|
756
|
+
*
|
|
757
|
+
* # Arguments
|
|
758
|
+
* * `save_path` - Directory to save the model
|
|
759
|
+
*/
|
|
760
|
+
saveModel(savePath: string): Promise<undefined>;
|
|
761
|
+
}
|
|
762
|
+
export type Qwen3_5MoeModel = Qwen35MoeModel;
|
|
763
|
+
|
|
587
764
|
/**
|
|
588
765
|
* Qwen3 Model with automatic differentiation support
|
|
589
766
|
*
|
|
@@ -750,8 +927,8 @@ export declare class Qwen3Model {
|
|
|
750
927
|
*
|
|
751
928
|
* # Example (TypeScript)
|
|
752
929
|
* ```typescript
|
|
753
|
-
* const targetModel = await
|
|
754
|
-
* const draftModel = await
|
|
930
|
+
* const targetModel = await loadModel('qwen3-7b');
|
|
931
|
+
* const draftModel = await loadModel('qwen3-0.5b');
|
|
755
932
|
*
|
|
756
933
|
* const result = targetModel.generateSpeculativeSync(draftModel, inputIds, {
|
|
757
934
|
* numDraftTokens: 5,
|
|
@@ -941,7 +1118,7 @@ export declare class Qwen3Model {
|
|
|
941
1118
|
*
|
|
942
1119
|
* # Example
|
|
943
1120
|
* ```typescript
|
|
944
|
-
* const model = await Qwen3Model.
|
|
1121
|
+
* const model = await Qwen3Model.load("path/to/model");
|
|
945
1122
|
* const messages = [
|
|
946
1123
|
* { role: "user", content: "What is 2+2?" }
|
|
947
1124
|
* ];
|
|
@@ -1070,7 +1247,7 @@ export declare class Qwen3Model {
|
|
|
1070
1247
|
* Decode token IDs to text using the internal tokenizer
|
|
1071
1248
|
*
|
|
1072
1249
|
* Helper method for decoding generated tokens. The model must have been loaded
|
|
1073
|
-
* via
|
|
1250
|
+
* via load() to have a tokenizer available.
|
|
1074
1251
|
*
|
|
1075
1252
|
* # Arguments
|
|
1076
1253
|
* * `token_ids` - Token IDs to decode as Uint32Array
|
|
@@ -1084,7 +1261,7 @@ export declare class Qwen3Model {
|
|
|
1084
1261
|
* Apply chat template and encode to token IDs
|
|
1085
1262
|
*
|
|
1086
1263
|
* Formats messages using ChatML format (or Jinja2 template with tools) and encodes to tokens.
|
|
1087
|
-
* The model must have been loaded via
|
|
1264
|
+
* The model must have been loaded via load() to have a tokenizer available.
|
|
1088
1265
|
*
|
|
1089
1266
|
* # Arguments
|
|
1090
1267
|
* * `messages` - Array of chat messages
|
|
@@ -1115,7 +1292,7 @@ export declare class Qwen3Model {
|
|
|
1115
1292
|
* # Returns
|
|
1116
1293
|
* * A fully initialized Qwen3Model with loaded weights
|
|
1117
1294
|
*/
|
|
1118
|
-
static
|
|
1295
|
+
static load(modelPath: string): Promise<Qwen3Model>;
|
|
1119
1296
|
/**
|
|
1120
1297
|
* Save model configuration and weights to disk
|
|
1121
1298
|
*
|
|
@@ -1278,8 +1455,12 @@ export declare class Qwen3Tokenizer {
|
|
|
1278
1455
|
|
|
1279
1456
|
/** SFT Training Engine */
|
|
1280
1457
|
export declare class SftTrainingEngine {
|
|
1281
|
-
/** Create a new SFT training engine */
|
|
1458
|
+
/** Create a new SFT training engine from a Qwen3 model */
|
|
1282
1459
|
constructor(model: Qwen3Model, config: SftEngineConfig);
|
|
1460
|
+
/** Create a new SFT training engine from a Qwen3.5 dense model */
|
|
1461
|
+
static fromQwen35(model: Qwen35Model, config: SftEngineConfig): SftTrainingEngine;
|
|
1462
|
+
/** Create a new SFT training engine from a Qwen3.5 MoE model */
|
|
1463
|
+
static fromQwen35Moe(model: Qwen35MoeModel, config: SftEngineConfig): SftTrainingEngine;
|
|
1283
1464
|
/** Run a single training step */
|
|
1284
1465
|
trainStep(inputIds: MxArray, labels: MxArray): Promise<SftStepMetrics>;
|
|
1285
1466
|
/** Get current step number */
|
|
@@ -1318,8 +1499,12 @@ export declare class SftTrainingEngine {
|
|
|
1318
1499
|
reset(): void;
|
|
1319
1500
|
/** Restore training state (for resuming from checkpoint) */
|
|
1320
1501
|
restoreState(step: number, epoch: number): void;
|
|
1321
|
-
/** Get the underlying model for checkpointing */
|
|
1502
|
+
/** Get the underlying Qwen3 model for checkpointing */
|
|
1322
1503
|
getModel(): Qwen3Model;
|
|
1504
|
+
/** Get the underlying Qwen3.5 dense model for checkpointing */
|
|
1505
|
+
getQwen35Model(): Qwen35Model;
|
|
1506
|
+
/** Get the underlying Qwen3.5 MoE model for checkpointing */
|
|
1507
|
+
getQwen35MoeModel(): Qwen35MoeModel;
|
|
1323
1508
|
}
|
|
1324
1509
|
|
|
1325
1510
|
/**
|
|
@@ -1363,25 +1548,112 @@ export declare class Tensor {
|
|
|
1363
1548
|
/** Convert to Int32 array */
|
|
1364
1549
|
toInt32(): Int32Array;
|
|
1365
1550
|
/**
|
|
1366
|
-
* Detach this tensor from the computation graph
|
|
1551
|
+
* Detach this tensor from the computation graph
|
|
1552
|
+
*
|
|
1553
|
+
* Returns a new tensor with the same data but no gradient tracking
|
|
1554
|
+
*/
|
|
1555
|
+
detach(): Tensor;
|
|
1556
|
+
/** Create a tensor of zeros */
|
|
1557
|
+
static zeros(
|
|
1558
|
+
shape: BigInt64Array,
|
|
1559
|
+
dtype?: DType | undefined | null,
|
|
1560
|
+
requiresGrad?: boolean | undefined | null,
|
|
1561
|
+
): Tensor;
|
|
1562
|
+
/** Create a tensor of ones */
|
|
1563
|
+
static ones(
|
|
1564
|
+
shape: BigInt64Array,
|
|
1565
|
+
dtype?: DType | undefined | null,
|
|
1566
|
+
requiresGrad?: boolean | undefined | null,
|
|
1567
|
+
): Tensor;
|
|
1568
|
+
/** Evaluate the underlying array */
|
|
1569
|
+
eval(): void;
|
|
1570
|
+
}
|
|
1571
|
+
|
|
1572
|
+
/**
|
|
1573
|
+
* PP-OCRv5 Text Detection model (DBNet with PPHGNetV2 backbone).
|
|
1574
|
+
*
|
|
1575
|
+
* Detects text lines in document images and returns bounding boxes.
|
|
1576
|
+
*/
|
|
1577
|
+
export declare class TextDetModel {
|
|
1578
|
+
/**
|
|
1579
|
+
* Load a TextDetModel from a directory containing model.safetensors.
|
|
1580
|
+
*
|
|
1581
|
+
* # Arguments
|
|
1582
|
+
* * `model_path` - Path to model directory
|
|
1583
|
+
*/
|
|
1584
|
+
static load(modelPath: string): TextDetModel;
|
|
1585
|
+
/**
|
|
1586
|
+
* Detect text lines in an image.
|
|
1587
|
+
*
|
|
1588
|
+
* # Arguments
|
|
1589
|
+
* * `image_data` - Encoded image bytes (PNG/JPEG)
|
|
1590
|
+
* * `threshold` - Optional detection threshold (default from config, typically 0.3)
|
|
1591
|
+
*
|
|
1592
|
+
* # Returns
|
|
1593
|
+
* * Vec of TextBox with bounding boxes and confidence scores
|
|
1594
|
+
*/
|
|
1595
|
+
detect(imageData: Buffer, threshold?: number | undefined | null): Array<TextBox>;
|
|
1596
|
+
/**
|
|
1597
|
+
* Detect text lines from raw RGB pixel data.
|
|
1598
|
+
*
|
|
1599
|
+
* # Arguments
|
|
1600
|
+
* * `rgb_data` - Raw RGB pixel data
|
|
1601
|
+
* * `width` - Image width
|
|
1602
|
+
* * `height` - Image height
|
|
1603
|
+
* * `threshold` - Optional detection threshold (default from config)
|
|
1604
|
+
*
|
|
1605
|
+
* # Returns
|
|
1606
|
+
* * Vec of TextBox with bounding boxes and confidence scores
|
|
1607
|
+
*/
|
|
1608
|
+
detectCrop(rgbData: Uint8Array, width: number, height: number, threshold?: number | undefined | null): Array<TextBox>;
|
|
1609
|
+
}
|
|
1610
|
+
|
|
1611
|
+
/**
|
|
1612
|
+
* PP-OCRv5 Text Recognition model (PPHGNetV2 + SVTR + CTC).
|
|
1613
|
+
*
|
|
1614
|
+
* Recognizes text from cropped text line images.
|
|
1615
|
+
*/
|
|
1616
|
+
export declare class TextRecModel {
|
|
1617
|
+
/**
|
|
1618
|
+
* Load a TextRecModel from a directory containing model.safetensors.
|
|
1619
|
+
*
|
|
1620
|
+
* # Arguments
|
|
1621
|
+
* * `model_path` - Path to model directory
|
|
1622
|
+
* * `dict_path` - Path to character dictionary text file
|
|
1623
|
+
*/
|
|
1624
|
+
static load(modelPath: string, dictPath: string): TextRecModel;
|
|
1625
|
+
/**
|
|
1626
|
+
* Recognize text from encoded image bytes.
|
|
1627
|
+
*
|
|
1628
|
+
* # Arguments
|
|
1629
|
+
* * `image_data` - Encoded image bytes (PNG/JPEG)
|
|
1630
|
+
*
|
|
1631
|
+
* # Returns
|
|
1632
|
+
* * RecResult with recognized text and confidence score
|
|
1633
|
+
*/
|
|
1634
|
+
recognize(imageData: Buffer): RecResult;
|
|
1635
|
+
/**
|
|
1636
|
+
* Recognize text from multiple encoded images.
|
|
1367
1637
|
*
|
|
1368
|
-
*
|
|
1638
|
+
* # Arguments
|
|
1639
|
+
* * `images` - Vec of encoded image bytes (PNG/JPEG)
|
|
1640
|
+
*
|
|
1641
|
+
* # Returns
|
|
1642
|
+
* * Vec of RecResult with recognized text and confidence scores
|
|
1369
1643
|
*/
|
|
1370
|
-
|
|
1371
|
-
/**
|
|
1372
|
-
|
|
1373
|
-
|
|
1374
|
-
|
|
1375
|
-
|
|
1376
|
-
|
|
1377
|
-
|
|
1378
|
-
|
|
1379
|
-
|
|
1380
|
-
|
|
1381
|
-
|
|
1382
|
-
):
|
|
1383
|
-
/** Evaluate the underlying array */
|
|
1384
|
-
eval(): void;
|
|
1644
|
+
recognizeBatch(images: Array<Buffer>): Array<RecResult>;
|
|
1645
|
+
/**
|
|
1646
|
+
* Recognize text from raw RGB crop data.
|
|
1647
|
+
*
|
|
1648
|
+
* # Arguments
|
|
1649
|
+
* * `rgb_data` - Raw RGB pixel data of a cropped text line
|
|
1650
|
+
* * `width` - Image width
|
|
1651
|
+
* * `height` - Image height
|
|
1652
|
+
*
|
|
1653
|
+
* # Returns
|
|
1654
|
+
* * RecResult with recognized text and confidence score
|
|
1655
|
+
*/
|
|
1656
|
+
recognizeCrop(rgbData: Uint8Array, width: number, height: number): RecResult;
|
|
1385
1657
|
}
|
|
1386
1658
|
|
|
1387
1659
|
/** Result from VLM chat */
|
|
@@ -1419,27 +1691,27 @@ export declare class VLModel {
|
|
|
1419
1691
|
*
|
|
1420
1692
|
* # Arguments
|
|
1421
1693
|
* * `messages` - Chat messages (role + content)
|
|
1422
|
-
* * `config` - Chat configuration (including
|
|
1694
|
+
* * `config` - Chat configuration (including images for automatic processing)
|
|
1423
1695
|
*
|
|
1424
1696
|
* # Returns
|
|
1425
1697
|
* * VLMChatResult with generated text
|
|
1426
1698
|
*
|
|
1427
1699
|
* # Example
|
|
1428
1700
|
* ```typescript
|
|
1429
|
-
* const result = model.chat(
|
|
1701
|
+
* const result = await model.chat(
|
|
1430
1702
|
* [{ role: 'user', content: 'Describe this image.' }],
|
|
1431
|
-
* {
|
|
1703
|
+
* { images: [readFileSync('./photo.jpg')], maxNewTokens: 256 }
|
|
1432
1704
|
* );
|
|
1433
1705
|
* ```
|
|
1434
1706
|
*/
|
|
1435
|
-
chat(messages: Array<VlmChatMessage>, config?: VlmChatConfig | undefined | null): VlmChatResult
|
|
1707
|
+
chat(messages: Array<VlmChatMessage>, config?: VlmChatConfig | undefined | null): Promise<VlmChatResult>;
|
|
1436
1708
|
/**
|
|
1437
|
-
* Simple OCR: extract text from
|
|
1709
|
+
* Simple OCR: extract text from encoded image bytes
|
|
1438
1710
|
*
|
|
1439
1711
|
* Convenience method that processes an image and extracts all text.
|
|
1440
1712
|
*
|
|
1441
1713
|
* # Arguments
|
|
1442
|
-
* * `
|
|
1714
|
+
* * `image_data` - Encoded image bytes (PNG/JPEG)
|
|
1443
1715
|
* * `prompt` - Optional custom prompt (default: "Extract all text from this image.")
|
|
1444
1716
|
*
|
|
1445
1717
|
* # Returns
|
|
@@ -1447,11 +1719,11 @@ export declare class VLModel {
|
|
|
1447
1719
|
*
|
|
1448
1720
|
* # Example
|
|
1449
1721
|
* ```typescript
|
|
1450
|
-
* const text = await model.ocr(
|
|
1722
|
+
* const text = await model.ocr(imageBuffer);
|
|
1451
1723
|
* console.log(text);
|
|
1452
1724
|
* ```
|
|
1453
1725
|
*/
|
|
1454
|
-
ocr(
|
|
1726
|
+
ocr(imageData: Buffer, prompt?: string | undefined | null): Promise<string>;
|
|
1455
1727
|
/**
|
|
1456
1728
|
* Get input embeddings with vision features merged
|
|
1457
1729
|
*
|
|
@@ -1507,7 +1779,40 @@ export declare class VLModel {
|
|
|
1507
1779
|
pixelValues?: MxArray | undefined | null,
|
|
1508
1780
|
imageGridThw?: MxArray | undefined | null,
|
|
1509
1781
|
config?: GenerationConfig | undefined | null,
|
|
1510
|
-
): GenerationResult
|
|
1782
|
+
): Promise<GenerationResult>;
|
|
1783
|
+
/**
|
|
1784
|
+
* Batch OCR: extract text from multiple images simultaneously
|
|
1785
|
+
*
|
|
1786
|
+
* Processes N images with sequential prefill + batched decode for ~N× decode throughput.
|
|
1787
|
+
*
|
|
1788
|
+
* # Arguments
|
|
1789
|
+
* * `images` - Encoded image buffers
|
|
1790
|
+
* * `config` - Optional chat configuration (shared across all items)
|
|
1791
|
+
*
|
|
1792
|
+
* # Returns
|
|
1793
|
+
* * Vec of extracted text strings, one per image
|
|
1794
|
+
*
|
|
1795
|
+
* # Example
|
|
1796
|
+
* ```typescript
|
|
1797
|
+
* import { readFileSync } from 'fs';
|
|
1798
|
+
* const images = ['page1.jpg', 'page2.jpg'].map(p => readFileSync(p));
|
|
1799
|
+
* const texts = await model.ocrBatch(images);
|
|
1800
|
+
* ```
|
|
1801
|
+
*/
|
|
1802
|
+
ocrBatch(images: Array<Buffer>, config?: VlmChatConfig | undefined | null): Promise<Array<string>>;
|
|
1803
|
+
/**
|
|
1804
|
+
* Batch chat: process multiple items simultaneously
|
|
1805
|
+
*
|
|
1806
|
+
* Sequential prefill + batched decode. Each item can have different images/prompts.
|
|
1807
|
+
*
|
|
1808
|
+
* # Arguments
|
|
1809
|
+
* * `batch` - Batch items, each with messages and optional images
|
|
1810
|
+
* * `config` - Optional shared chat configuration
|
|
1811
|
+
*
|
|
1812
|
+
* # Returns
|
|
1813
|
+
* * Vec of VLMChatResult, one per batch item
|
|
1814
|
+
*/
|
|
1815
|
+
batch(batch: Array<VlmBatchItem>, config?: VlmChatConfig | undefined | null): Promise<Array<VlmChatResult>>;
|
|
1511
1816
|
/** Get model configuration */
|
|
1512
1817
|
get config(): ModelConfig;
|
|
1513
1818
|
/** Check if model is fully initialized */
|
|
@@ -1529,7 +1834,7 @@ export declare class VLModel {
|
|
|
1529
1834
|
* ```typescript
|
|
1530
1835
|
* import { VLModel } from '@mlx-node/vlm';
|
|
1531
1836
|
* const model = await VLModel.load('./models/paddleocr-vl');
|
|
1532
|
-
* const result = model.chat(messages, {
|
|
1837
|
+
* const result = await model.chat(messages, { images: [readFileSync('./image.jpg')] });
|
|
1533
1838
|
* ```
|
|
1534
1839
|
*/
|
|
1535
1840
|
static load(modelPath: string): Promise<VLModel>;
|
|
@@ -1559,35 +1864,6 @@ export declare class VLModel {
|
|
|
1559
1864
|
*
|
|
1560
1865
|
* Parses tool calls and thinking from completions, creating structured outputs
|
|
1561
1866
|
* aligned with the ChatResult structure.
|
|
1562
|
-
*
|
|
1563
|
-
* # Arguments
|
|
1564
|
-
* * `prompts` - Array of prompt texts (one per unique prompt, will be expanded by group_size)
|
|
1565
|
-
* * `completions` - Array of completion texts (prompts.len() * group_size total)
|
|
1566
|
-
* * `token_counts` - Array of token counts for each completion
|
|
1567
|
-
* * `finish_reasons` - Array of finish reasons from generation ("eos", "length", "stop", "repetition")
|
|
1568
|
-
* * `group_size` - Number of completions per prompt
|
|
1569
|
-
*
|
|
1570
|
-
* # Returns
|
|
1571
|
-
* Array of RewardOutput objects with structured completion data
|
|
1572
|
-
*
|
|
1573
|
-
* # Example
|
|
1574
|
-
* ```typescript
|
|
1575
|
-
* import { buildRewardOutputs } from '@mlx-node/core';
|
|
1576
|
-
*
|
|
1577
|
-
* const outputs = buildRewardOutputs(
|
|
1578
|
-
* ['What is 2+2?'], // prompts
|
|
1579
|
-
* ['<think>Let me calculate</think>
|
|
1580
|
-
|
|
1581
|
-
4', '4'], // completions (group_size=2)
|
|
1582
|
-
* [10, 5], // token counts
|
|
1583
|
-
* ['eos', 'length'], // finish reasons
|
|
1584
|
-
* 2 // group_size
|
|
1585
|
-
* );
|
|
1586
|
-
*
|
|
1587
|
-
* outputs[0].completion.thinking; // "Let me calculate"
|
|
1588
|
-
* outputs[0].completion.text; // "4"
|
|
1589
|
-
* outputs[0].completion.finishReason; // "eos"
|
|
1590
|
-
* ```
|
|
1591
1867
|
*/
|
|
1592
1868
|
export declare function buildRewardOutputs(
|
|
1593
1869
|
prompts: Array<string>,
|
|
@@ -1627,67 +1903,41 @@ export declare const enum BuiltinRewardType {
|
|
|
1627
1903
|
XmlFormat = 'XmlFormat',
|
|
1628
1904
|
/** Length-based scoring */
|
|
1629
1905
|
Length = 'Length',
|
|
1630
|
-
/** JSON
|
|
1906
|
+
/** JSON format validation (brace matching + field name check, not full JSON parsing) */
|
|
1631
1907
|
JsonSchema = 'JsonSchema',
|
|
1632
1908
|
}
|
|
1633
1909
|
|
|
1634
|
-
/**
|
|
1635
|
-
* Configuration for the high-level `chat()` API
|
|
1636
|
-
*
|
|
1637
|
-
* Combines tool definitions with generation parameters in a single config object.
|
|
1638
|
-
* Tools are optional - when not provided, `chat()` works as a simple conversational API.
|
|
1639
|
-
*
|
|
1640
|
-
* ## Example
|
|
1641
|
-
* ```typescript
|
|
1642
|
-
* // Simple chat (no tools)
|
|
1643
|
-
* const result = await model.chat(messages);
|
|
1644
|
-
*
|
|
1645
|
-
* // With tools
|
|
1646
|
-
* const result = await model.chat(messages, {
|
|
1647
|
-
* tools: [weatherTool, searchTool],
|
|
1648
|
-
* maxNewTokens: 2048,
|
|
1649
|
-
* temperature: 0.7,
|
|
1650
|
-
* });
|
|
1651
|
-
* ```
|
|
1652
|
-
*/
|
|
1910
|
+
/** Unified chat configuration shared by all model variants (Qwen3, Qwen3.5, Qwen3.5 MoE). */
|
|
1653
1911
|
export interface ChatConfig {
|
|
1912
|
+
maxNewTokens?: number | undefined;
|
|
1913
|
+
temperature?: number | undefined;
|
|
1914
|
+
topK?: number | undefined;
|
|
1915
|
+
topP?: number | undefined;
|
|
1916
|
+
minP?: number | undefined;
|
|
1917
|
+
/** Repetition penalty (1.0 = disabled). Penalizes tokens already in context. */
|
|
1918
|
+
repetitionPenalty?: number | undefined;
|
|
1919
|
+
/** Size of the context window for repetition penalty (default: 256) */
|
|
1920
|
+
repetitionContextSize?: number | undefined;
|
|
1921
|
+
/** Max consecutive identical tokens before stopping (default: 16, 0 = disabled) */
|
|
1922
|
+
maxConsecutiveTokens?: number | undefined;
|
|
1923
|
+
/** Max n-gram repetitions before stopping (default: 3, 0 = disabled) */
|
|
1924
|
+
maxNgramRepeats?: number | undefined;
|
|
1925
|
+
/** Max pattern size for n-gram repetition detection (default: 64) */
|
|
1926
|
+
ngramSize?: number | undefined;
|
|
1927
|
+
tools?: Array<ToolDefinition>;
|
|
1654
1928
|
/**
|
|
1655
|
-
*
|
|
1656
|
-
*
|
|
1657
|
-
* When provided, the model can invoke these tools during generation.
|
|
1658
|
-
* Tool calls are parsed and returned in `ChatResult.toolCalls`.
|
|
1929
|
+
* Enable thinking mode (Qwen3's <think> tags). Default: true (model thinks naturally).
|
|
1930
|
+
* Set to false to suppress thinking by injecting empty <think></think> tags.
|
|
1659
1931
|
*/
|
|
1660
|
-
|
|
1661
|
-
/**
|
|
1662
|
-
|
|
1663
|
-
/** Sampling temperature (0 = greedy, higher = more random) (default: 0.7) */
|
|
1664
|
-
temperature?: number;
|
|
1665
|
-
/** Top-k sampling: keep only top k tokens (0 = disabled) (default: 0) */
|
|
1666
|
-
topK?: number;
|
|
1667
|
-
/** Top-p (nucleus) sampling: keep tokens with cumulative prob < p (default: 0.9) */
|
|
1668
|
-
topP?: number;
|
|
1669
|
-
/** Min-p sampling: keep tokens with prob > min_p * max_prob (default: 0.0) */
|
|
1670
|
-
minP?: number;
|
|
1671
|
-
/** Repetition penalty factor (1.0 = no penalty) (default: 1.0) */
|
|
1672
|
-
repetitionPenalty?: number;
|
|
1673
|
-
/** Number of recent tokens to consider for repetition penalty (default: 20) */
|
|
1674
|
-
repetitionContextSize?: number;
|
|
1675
|
-
/** Stop if same token repeats this many times consecutively (default: 16) */
|
|
1676
|
-
maxConsecutiveTokens?: number;
|
|
1677
|
-
/** Stop if an n-gram pattern repeats this many times (default: 8) */
|
|
1678
|
-
maxNgramRepeats?: number;
|
|
1679
|
-
/** N-gram size for repetition detection (default: 3) */
|
|
1680
|
-
ngramSize?: number;
|
|
1681
|
-
/** EOS token ID (generation stops when this is generated) */
|
|
1682
|
-
eosTokenId?: number;
|
|
1683
|
-
/** Whether to return log probabilities (default: true) */
|
|
1684
|
-
returnLogprobs?: boolean;
|
|
1932
|
+
enableThinking?: boolean | undefined;
|
|
1933
|
+
/** When true, include performance metrics (TTFT, prefill tok/s, decode tok/s) in the result */
|
|
1934
|
+
reportPerformance?: boolean | undefined;
|
|
1685
1935
|
}
|
|
1686
1936
|
|
|
1687
1937
|
/** Chat message with tool calling support */
|
|
1688
1938
|
export interface ChatMessage {
|
|
1689
1939
|
/** Role: "system", "user", "assistant", or "tool" */
|
|
1690
|
-
role: string;
|
|
1940
|
+
role: 'system' | 'user' | 'assistant' | 'tool' | (string & {});
|
|
1691
1941
|
/** Message content */
|
|
1692
1942
|
content: string;
|
|
1693
1943
|
/** Tool calls made by the assistant (for assistant messages) */
|
|
@@ -1696,16 +1946,57 @@ export interface ChatMessage {
|
|
|
1696
1946
|
toolCallId?: string;
|
|
1697
1947
|
/** Reasoning content for thinking mode (used with <think> tags) */
|
|
1698
1948
|
reasoningContent?: string;
|
|
1949
|
+
/** Image data for VLM models (encoded image bytes: PNG/JPEG, passed as Uint8Array/Buffer) */
|
|
1950
|
+
images?: Array<Uint8Array> | undefined;
|
|
1951
|
+
}
|
|
1952
|
+
|
|
1953
|
+
/** Unified chat result shared by all model variants (Qwen3, Qwen3.5, Qwen3.5 MoE). */
|
|
1954
|
+
export interface ChatResult {
|
|
1955
|
+
text: string;
|
|
1956
|
+
toolCalls: Array<ToolCallResult>;
|
|
1957
|
+
thinking?: string;
|
|
1958
|
+
numTokens: number;
|
|
1959
|
+
finishReason: string;
|
|
1960
|
+
rawText: string;
|
|
1961
|
+
/** Performance metrics (present when `reportPerformance: true` in config) */
|
|
1962
|
+
performance?: PerformanceMetrics;
|
|
1699
1963
|
}
|
|
1700
1964
|
|
|
1701
|
-
/** Chat message role */
|
|
1965
|
+
/** Chat message role (lowercase values matching standard convention) */
|
|
1702
1966
|
export declare const enum ChatRole {
|
|
1703
1967
|
/** User message */
|
|
1704
|
-
User = '
|
|
1968
|
+
User = 'user',
|
|
1705
1969
|
/** Assistant response */
|
|
1706
|
-
Assistant = '
|
|
1970
|
+
Assistant = 'assistant',
|
|
1707
1971
|
/** System prompt */
|
|
1708
|
-
System = '
|
|
1972
|
+
System = 'system',
|
|
1973
|
+
/** Tool response */
|
|
1974
|
+
Tool = 'tool',
|
|
1975
|
+
}
|
|
1976
|
+
|
|
1977
|
+
/** A single chunk emitted during streaming chat generation. */
|
|
1978
|
+
export interface ChatStreamChunk {
|
|
1979
|
+
text: string;
|
|
1980
|
+
done: boolean;
|
|
1981
|
+
finishReason?: string;
|
|
1982
|
+
toolCalls?: Array<ToolCallResult>;
|
|
1983
|
+
thinking?: string;
|
|
1984
|
+
numTokens?: number;
|
|
1985
|
+
rawText?: string;
|
|
1986
|
+
/** Performance metrics (only present in the final chunk when `reportPerformance: true`) */
|
|
1987
|
+
performance?: PerformanceMetrics;
|
|
1988
|
+
}
|
|
1989
|
+
|
|
1990
|
+
/** Result from classify_and_rotate: orientation info + corrected image bytes. */
|
|
1991
|
+
export interface ClassifyRotateResult {
|
|
1992
|
+
/** Detected rotation angle (0, 90, 180, or 270 degrees) */
|
|
1993
|
+
angle: number;
|
|
1994
|
+
/** Confidence score */
|
|
1995
|
+
score: number;
|
|
1996
|
+
/** Angle label as string */
|
|
1997
|
+
label: string;
|
|
1998
|
+
/** Corrected image as PNG bytes (or original bytes if angle=0) */
|
|
1999
|
+
image: Buffer;
|
|
1709
2000
|
}
|
|
1710
2001
|
|
|
1711
2002
|
/** Statistics about cleanup operations (NAPI wrapper) */
|
|
@@ -1748,6 +2039,26 @@ export interface ConversionOptions {
|
|
|
1748
2039
|
dtype?: string;
|
|
1749
2040
|
/** Whether to verbose logging (default: false) */
|
|
1750
2041
|
verbose?: boolean;
|
|
2042
|
+
/** Model type for model-specific weight sanitization (e.g., "paddleocr-vl") */
|
|
2043
|
+
modelType?: string;
|
|
2044
|
+
/** Enable quantization of converted weights */
|
|
2045
|
+
quantize?: boolean;
|
|
2046
|
+
/** Quantization bits: 4 (default) or 8 */
|
|
2047
|
+
quantBits?: number;
|
|
2048
|
+
/** Quantization group size (default: 64 for affine, 32 for mxfp8) */
|
|
2049
|
+
quantGroupSize?: number;
|
|
2050
|
+
/** Quantization mode: "affine" (default) or "mxfp8" */
|
|
2051
|
+
quantMode?: string;
|
|
2052
|
+
/**
|
|
2053
|
+
* Quantization recipe for per-layer mixed-bit quantization.
|
|
2054
|
+
* Options: mixed_2_6, mixed_3_4, mixed_3_6, mixed_4_6, qwen3_5
|
|
2055
|
+
*/
|
|
2056
|
+
quantRecipe?: string;
|
|
2057
|
+
/**
|
|
2058
|
+
* Path to an imatrix GGUF file for AWQ-style pre-scaling.
|
|
2059
|
+
* Improves quantization quality by amplifying important weight channels.
|
|
2060
|
+
*/
|
|
2061
|
+
imatrixPath?: string;
|
|
1751
2062
|
}
|
|
1752
2063
|
|
|
1753
2064
|
export interface ConversionResult {
|
|
@@ -1761,6 +2072,10 @@ export interface ConversionResult {
|
|
|
1761
2072
|
tensorNames: Array<string>;
|
|
1762
2073
|
}
|
|
1763
2074
|
|
|
2075
|
+
export declare function convertForeignWeights(options: ForeignConversionOptions): ForeignConversionResult;
|
|
2076
|
+
|
|
2077
|
+
export declare function convertGgufToSafetensors(options: GgufConversionOptions): Promise<GgufConversionResult>;
|
|
2078
|
+
|
|
1764
2079
|
/**
|
|
1765
2080
|
* Convert a HuggingFace SafeTensors model to MLX format
|
|
1766
2081
|
*
|
|
@@ -1806,12 +2121,31 @@ export interface DocumentElement {
|
|
|
1806
2121
|
paragraph?: Paragraph;
|
|
1807
2122
|
}
|
|
1808
2123
|
|
|
2124
|
+
/**
|
|
2125
|
+
* Convert a ParsedDocument to an XLSX buffer.
|
|
2126
|
+
*
|
|
2127
|
+
* Each Table element becomes a separate worksheet with bold headers.
|
|
2128
|
+
* Paragraph elements are collected into a "Text" worksheet.
|
|
2129
|
+
*
|
|
2130
|
+
* # Example
|
|
2131
|
+
* ```typescript
|
|
2132
|
+
* import { parseVlmOutput, documentToXlsx } from '@mlx-node/core';
|
|
2133
|
+
* import { writeFileSync } from 'fs';
|
|
2134
|
+
*
|
|
2135
|
+
* const doc = parseVlmOutput(vlmResult.text);
|
|
2136
|
+
* const buffer = documentToXlsx(doc);
|
|
2137
|
+
* writeFileSync('output.xlsx', buffer);
|
|
2138
|
+
* ```
|
|
2139
|
+
*/
|
|
2140
|
+
export declare function documentToXlsx(doc: ParsedDocument): Buffer;
|
|
2141
|
+
|
|
1809
2142
|
export declare const enum DType {
|
|
1810
2143
|
Float32 = 0,
|
|
1811
2144
|
Int32 = 1,
|
|
1812
2145
|
Float16 = 2,
|
|
1813
2146
|
BFloat16 = 3,
|
|
1814
2147
|
Uint32 = 4,
|
|
2148
|
+
Uint8 = 5,
|
|
1815
2149
|
}
|
|
1816
2150
|
|
|
1817
2151
|
/** Document element type */
|
|
@@ -1864,6 +2198,23 @@ export interface EngineStepMetrics {
|
|
|
1864
2198
|
activeMemoryMb: number;
|
|
1865
2199
|
}
|
|
1866
2200
|
|
|
2201
|
+
export interface ForeignConversionOptions {
|
|
2202
|
+
/** Path to the input weights file (.pdparams, .pkl, .pt, .pth) */
|
|
2203
|
+
inputPath: string;
|
|
2204
|
+
/** Output directory for model.safetensors + config.json */
|
|
2205
|
+
outputDir: string;
|
|
2206
|
+
/** Model type: "pp-lcnet-ori" or "uvdoc" */
|
|
2207
|
+
modelType: string;
|
|
2208
|
+
/** Enable verbose logging */
|
|
2209
|
+
verbose?: boolean;
|
|
2210
|
+
}
|
|
2211
|
+
|
|
2212
|
+
export interface ForeignConversionResult {
|
|
2213
|
+
numTensors: number;
|
|
2214
|
+
outputPath: string;
|
|
2215
|
+
tensorNames: Array<string>;
|
|
2216
|
+
}
|
|
2217
|
+
|
|
1867
2218
|
/** Format parsed document according to config */
|
|
1868
2219
|
export declare function formatDocument(doc: ParsedDocument, config?: ParserConfig | undefined | null): string;
|
|
1869
2220
|
|
|
@@ -1897,13 +2248,13 @@ export interface GenerateBatchResult {
|
|
|
1897
2248
|
completionLogprobs: Array<number>;
|
|
1898
2249
|
/** Lengths of each completion (for reconstruction) */
|
|
1899
2250
|
completionLengths: Array<number>;
|
|
1900
|
-
/** Finish reasons for each completion ("
|
|
2251
|
+
/** Finish reasons for each completion ("stop", "length", or "repetition") */
|
|
1901
2252
|
finishReasons: Array<string>;
|
|
1902
2253
|
}
|
|
1903
2254
|
|
|
1904
2255
|
/** Configuration for text generation */
|
|
1905
2256
|
export interface GenerationConfig {
|
|
1906
|
-
/** Maximum number of new tokens to generate (default:
|
|
2257
|
+
/** Maximum number of new tokens to generate (default: 2048) */
|
|
1907
2258
|
maxNewTokens?: number;
|
|
1908
2259
|
/** Sampling temperature (0 = greedy, higher = more random) (default: 1.0) */
|
|
1909
2260
|
temperature?: number;
|
|
@@ -1926,13 +2277,15 @@ export interface GenerationConfig {
|
|
|
1926
2277
|
*/
|
|
1927
2278
|
maxConsecutiveTokens?: number;
|
|
1928
2279
|
/**
|
|
1929
|
-
* Stop if
|
|
1930
|
-
* Set to 0 to disable. Detects patterns like "A B A B A B
|
|
2280
|
+
* Stop if a pattern repeats this many times consecutively (default: 3)
|
|
2281
|
+
* Set to 0 to disable. Detects patterns like "A B A B A B".
|
|
2282
|
+
* Uses range-based detection: checks all pattern sizes from 2 to ngram_size.
|
|
1931
2283
|
*/
|
|
1932
2284
|
maxNgramRepeats?: number;
|
|
1933
2285
|
/**
|
|
1934
|
-
*
|
|
1935
|
-
*
|
|
2286
|
+
* Maximum pattern size for repetition detection (default: 64)
|
|
2287
|
+
* All pattern sizes from 2 up to this value are checked each decode step.
|
|
2288
|
+
* Larger values catch long phrase-level repetition common in small models.
|
|
1936
2289
|
*/
|
|
1937
2290
|
ngramSize?: number;
|
|
1938
2291
|
/** EOS token ID (generation stops when this is generated) */
|
|
@@ -1971,6 +2324,33 @@ export interface GenerationConfig {
|
|
|
1971
2324
|
numDraftTokens?: number;
|
|
1972
2325
|
}
|
|
1973
2326
|
|
|
2327
|
+
export interface GenerationProfile {
|
|
2328
|
+
/** Label identifying the decode loop variant. */
|
|
2329
|
+
label: string;
|
|
2330
|
+
/** Model type (e.g. "qwen3_5", "qwen3_5_moe", "qwen3"). */
|
|
2331
|
+
modelType: string;
|
|
2332
|
+
/** Number of tokens generated. */
|
|
2333
|
+
numTokens: number;
|
|
2334
|
+
/** Number of prompt tokens. */
|
|
2335
|
+
promptTokens: number;
|
|
2336
|
+
/** Prefill wall-clock time (ms). */
|
|
2337
|
+
prefillMs: number;
|
|
2338
|
+
/** Decode wall-clock time (ms). */
|
|
2339
|
+
decodeMs: number;
|
|
2340
|
+
/** Total wall-clock time (prefill + decode) (ms). */
|
|
2341
|
+
totalMs: number;
|
|
2342
|
+
/** Tokens per second (decode only). */
|
|
2343
|
+
tokensPerSecond: number;
|
|
2344
|
+
/** Time to first token (ms) — from decode loop start to first token extracted. */
|
|
2345
|
+
timeToFirstTokenMs: number;
|
|
2346
|
+
/** Per-phase breakdown. */
|
|
2347
|
+
phases: Array<PhaseProfile>;
|
|
2348
|
+
/** Memory snapshot before generation. */
|
|
2349
|
+
memoryBefore?: MemorySnapshot;
|
|
2350
|
+
/** Memory snapshot after generation. */
|
|
2351
|
+
memoryAfter?: MemorySnapshot;
|
|
2352
|
+
}
|
|
2353
|
+
|
|
1974
2354
|
/** A generation record (NAPI wrapper) */
|
|
1975
2355
|
export interface GenerationRecord {
|
|
1976
2356
|
batchIndex: number;
|
|
@@ -1994,6 +2374,62 @@ export interface GenerationWithToolCalls {
|
|
|
1994
2374
|
/** Get expected weight keys for PaddleOCR-VL model */
|
|
1995
2375
|
export declare function getExpectedWeightKeys(): Array<string>;
|
|
1996
2376
|
|
|
2377
|
+
/** Retrieve all collected profiling data as a `ProfilingSession`. */
|
|
2378
|
+
export declare function getProfilingData(): ProfilingSession;
|
|
2379
|
+
|
|
2380
|
+
export interface GgufConversionOptions {
|
|
2381
|
+
/** Path to the GGUF file */
|
|
2382
|
+
inputPath: string;
|
|
2383
|
+
/** Output directory for converted SafeTensors model */
|
|
2384
|
+
outputDir: string;
|
|
2385
|
+
/** Target dtype: "float32", "float16", "bfloat16" (default: keep original) */
|
|
2386
|
+
dtype?: string;
|
|
2387
|
+
/** Enable verbose logging */
|
|
2388
|
+
verbose?: boolean;
|
|
2389
|
+
/** Enable quantization of converted weights */
|
|
2390
|
+
quantize?: boolean;
|
|
2391
|
+
/** Quantization bits (default: 4) */
|
|
2392
|
+
quantBits?: number;
|
|
2393
|
+
/** Quantization group size (default: 64) */
|
|
2394
|
+
quantGroupSize?: number;
|
|
2395
|
+
/** Quantization mode: "affine" or "mxfp8" */
|
|
2396
|
+
quantMode?: string;
|
|
2397
|
+
/**
|
|
2398
|
+
* Quantization recipe for per-layer mixed-bit quantization.
|
|
2399
|
+
* Options: mixed_2_6, mixed_3_4, mixed_3_6, mixed_4_6, qwen3_5, unsloth
|
|
2400
|
+
*/
|
|
2401
|
+
quantRecipe?: string;
|
|
2402
|
+
/**
|
|
2403
|
+
* Path to an imatrix GGUF file for AWQ-style pre-scaling.
|
|
2404
|
+
* Improves quantization quality by amplifying important weight channels.
|
|
2405
|
+
*/
|
|
2406
|
+
imatrixPath?: string;
|
|
2407
|
+
/**
|
|
2408
|
+
* Output filename (default: "model.safetensors").
|
|
2409
|
+
* Useful for saving vision weights separately (e.g., "vision.safetensors").
|
|
2410
|
+
*/
|
|
2411
|
+
outputFilename?: string;
|
|
2412
|
+
/**
|
|
2413
|
+
* When true, remap LLM weight keys for VLM compatibility:
|
|
2414
|
+
* "model.X" → "language_model.model.X", "lm_head.X" → "language_model.lm_head.X"
|
|
2415
|
+
* This makes the safetensors compatible with mlx-vlm.
|
|
2416
|
+
*/
|
|
2417
|
+
vlmKeyPrefix?: boolean;
|
|
2418
|
+
}
|
|
2419
|
+
|
|
2420
|
+
export interface GgufConversionResult {
|
|
2421
|
+
numTensors: number;
|
|
2422
|
+
numParameters: number;
|
|
2423
|
+
outputPath: string;
|
|
2424
|
+
tensorNames: Array<string>;
|
|
2425
|
+
sourceFormat: string;
|
|
2426
|
+
}
|
|
2427
|
+
|
|
2428
|
+
export interface GpuInfo {
|
|
2429
|
+
/** GPU architecture generation (M1=13, M2=14, M3=15, M4=16, M5=17). */
|
|
2430
|
+
architectureGen: number;
|
|
2431
|
+
}
|
|
2432
|
+
|
|
1997
2433
|
/** Configuration for the GRPO training engine */
|
|
1998
2434
|
export interface GrpoEngineConfig {
|
|
1999
2435
|
/** Learning rate (default: 1e-6) */
|
|
@@ -2093,6 +2529,24 @@ export interface GrpoEngineConfig {
|
|
|
2093
2529
|
* then expand KV cache for G completions).
|
|
2094
2530
|
*/
|
|
2095
2531
|
useParallelBatchGeneration?: boolean;
|
|
2532
|
+
/**
|
|
2533
|
+
* Enable gradient checkpointing (default: true).
|
|
2534
|
+
* When true, each transformer layer's activations are discarded during the forward
|
|
2535
|
+
* pass and recomputed during backward, reducing peak memory from O(num_layers) to O(1)
|
|
2536
|
+
* for intermediate states. For Qwen3.5 0.8B, this reduces autograd peak from ~105GB to ~11GB.
|
|
2537
|
+
* The trade-off is ~30% more compute (one extra forward pass per layer during backward).
|
|
2538
|
+
*/
|
|
2539
|
+
gradientCheckpointing?: boolean;
|
|
2540
|
+
/** Optimizer type: "sgd" or "adamw" (default: "adamw") */
|
|
2541
|
+
optimizerType?: string;
|
|
2542
|
+
/** AdamW beta1 (default: 0.9) */
|
|
2543
|
+
adamwBeta1?: number;
|
|
2544
|
+
/** AdamW beta2 (default: 0.999) */
|
|
2545
|
+
adamwBeta2?: number;
|
|
2546
|
+
/** AdamW epsilon (default: 1e-8) */
|
|
2547
|
+
adamwEps?: number;
|
|
2548
|
+
/** Weight decay for AdamW (default: 0.01) */
|
|
2549
|
+
weightDecay?: number;
|
|
2096
2550
|
}
|
|
2097
2551
|
|
|
2098
2552
|
/** Configuration for GRPO loss computation */
|
|
@@ -2143,6 +2597,32 @@ export interface GrpoLossConfig {
|
|
|
2143
2597
|
vocabChunkSize?: number;
|
|
2144
2598
|
}
|
|
2145
2599
|
|
|
2600
|
+
/** Check whether profiling is currently enabled. */
|
|
2601
|
+
export declare function isProfilingEnabled(): boolean;
|
|
2602
|
+
|
|
2603
|
+
/** A single detected layout element. */
|
|
2604
|
+
export interface LayoutElement {
|
|
2605
|
+
/** Detection confidence score */
|
|
2606
|
+
score: number;
|
|
2607
|
+
/** Class label ID (0-24) */
|
|
2608
|
+
label: number;
|
|
2609
|
+
/** Human-readable label name (e.g., "title", "text", "table") */
|
|
2610
|
+
labelName: string;
|
|
2611
|
+
/** Bounding box in original image coordinates [x1, y1, x2, y2] */
|
|
2612
|
+
bbox: Array<number>;
|
|
2613
|
+
/** Reading order index (0 = first element to read) */
|
|
2614
|
+
order: number;
|
|
2615
|
+
}
|
|
2616
|
+
|
|
2617
|
+
export interface MemorySnapshot {
|
|
2618
|
+
/** Active (non-cached) memory in bytes. */
|
|
2619
|
+
activeBytes: number;
|
|
2620
|
+
/** Peak memory usage in bytes. */
|
|
2621
|
+
peakBytes: number;
|
|
2622
|
+
/** Cache memory in bytes. */
|
|
2623
|
+
cacheBytes: number;
|
|
2624
|
+
}
|
|
2625
|
+
|
|
2146
2626
|
/** Full model configuration */
|
|
2147
2627
|
export interface ModelConfig {
|
|
2148
2628
|
visionConfig: VisionConfig;
|
|
@@ -2156,8 +2636,18 @@ export interface ModelConfig {
|
|
|
2156
2636
|
eosTokenId: number;
|
|
2157
2637
|
}
|
|
2158
2638
|
|
|
2639
|
+
/** Result from document orientation classification. */
|
|
2640
|
+
export interface OrientationResult {
|
|
2641
|
+
/** Detected rotation angle (0, 90, 180, or 270 degrees) */
|
|
2642
|
+
angle: number;
|
|
2643
|
+
/** Confidence score */
|
|
2644
|
+
score: number;
|
|
2645
|
+
/** Angle label as string */
|
|
2646
|
+
label: string;
|
|
2647
|
+
}
|
|
2648
|
+
|
|
2159
2649
|
/** Output format options */
|
|
2160
|
-
export
|
|
2650
|
+
export enum OutputFormat {
|
|
2161
2651
|
/** Raw output with minimal processing */
|
|
2162
2652
|
Raw = 'Raw',
|
|
2163
2653
|
/** Plain text with aligned columns */
|
|
@@ -2166,6 +2656,8 @@ export declare const enum OutputFormat {
|
|
|
2166
2656
|
Markdown = 'Markdown',
|
|
2167
2657
|
/** HTML tables */
|
|
2168
2658
|
Html = 'Html',
|
|
2659
|
+
/** JSON structured output */
|
|
2660
|
+
Json = 'Json',
|
|
2169
2661
|
}
|
|
2170
2662
|
|
|
2171
2663
|
/** Configuration for creating an OutputStore connection */
|
|
@@ -2196,7 +2688,7 @@ export interface PagedCompletedSequence {
|
|
|
2196
2688
|
requestId: string;
|
|
2197
2689
|
/** All generated tokens (excluding prompt) */
|
|
2198
2690
|
tokens: Array<number>;
|
|
2199
|
-
/** Reason for completion ("
|
|
2691
|
+
/** Reason for completion ("stop", "length", "repetition", "tool_calls") */
|
|
2200
2692
|
finishReason: string;
|
|
2201
2693
|
}
|
|
2202
2694
|
|
|
@@ -2273,22 +2765,7 @@ export interface ParserConfig {
|
|
|
2273
2765
|
collapseEmptyRows?: boolean;
|
|
2274
2766
|
}
|
|
2275
2767
|
|
|
2276
|
-
/**
|
|
2277
|
-
* Parse tool calls from text (NAPI export)
|
|
2278
|
-
*
|
|
2279
|
-
* Extracts tool calls from model-generated text and returns both the cleaned text
|
|
2280
|
-
* and the parsed tool calls.
|
|
2281
|
-
*
|
|
2282
|
-
* # Example
|
|
2283
|
-
* ```typescript
|
|
2284
|
-
* import { parseToolCallsFromText } from '@mlx-node/core';
|
|
2285
|
-
*
|
|
2286
|
-
* const result = parseToolCallsFromText('<tool_call>{"name": "search", "arguments": {"q": "test"}}</tool_call>');
|
|
2287
|
-
* console.log(result.text); // ""
|
|
2288
|
-
* console.log(result.toolCalls[0].name); // "search"
|
|
2289
|
-
* console.log(result.toolCalls[0].arguments.q); // "test"
|
|
2290
|
-
* ```
|
|
2291
|
-
*/
|
|
2768
|
+
/** Parse tool calls from text (NAPI export) */
|
|
2292
2769
|
export declare function parseToolCallsFromText(text: string): ParseToolCallsResult;
|
|
2293
2770
|
|
|
2294
2771
|
/** Result of parsing tool calls from text */
|
|
@@ -2302,6 +2779,162 @@ export interface ParseToolCallsResult {
|
|
|
2302
2779
|
/** Parse VLM output into structured document */
|
|
2303
2780
|
export declare function parseVlmOutput(text: string): ParsedDocument;
|
|
2304
2781
|
|
|
2782
|
+
/**
|
|
2783
|
+
* Lightweight performance metrics returned by chat/chatStream when
|
|
2784
|
+
* `reportPerformance: true` is set in the config.
|
|
2785
|
+
*/
|
|
2786
|
+
export interface PerformanceMetrics {
|
|
2787
|
+
/**
|
|
2788
|
+
* Time to first token (ms) — wall-clock from generation start to
|
|
2789
|
+
* first token extracted. Includes tokenization, prefill (lazy graph
|
|
2790
|
+
* construction + first GPU eval), and first sample.
|
|
2791
|
+
*/
|
|
2792
|
+
ttftMs: number;
|
|
2793
|
+
/** Prefill throughput: prompt_tokens / (ttft_ms / 1000). */
|
|
2794
|
+
prefillTokensPerSecond: number;
|
|
2795
|
+
/**
|
|
2796
|
+
* Decode throughput: (generated_tokens - 1) / decode_time.
|
|
2797
|
+
* Excludes the first token (counted as prefill).
|
|
2798
|
+
*/
|
|
2799
|
+
decodeTokensPerSecond: number;
|
|
2800
|
+
}
|
|
2801
|
+
|
|
2802
|
+
export interface PhaseProfile {
|
|
2803
|
+
/** Phase name (e.g. "forward", "sample", "eval_token"). */
|
|
2804
|
+
name: string;
|
|
2805
|
+
/** Total wall-clock time spent in this phase (ms). */
|
|
2806
|
+
totalMs: number;
|
|
2807
|
+
/** Average time per invocation (µs). */
|
|
2808
|
+
avgUsPerToken: number;
|
|
2809
|
+
/** Number of invocations. */
|
|
2810
|
+
count: number;
|
|
2811
|
+
}
|
|
2812
|
+
|
|
2813
|
+
export interface ProfilingSession {
|
|
2814
|
+
/** GPU hardware info. */
|
|
2815
|
+
gpuInfo: GpuInfo;
|
|
2816
|
+
/** Total session duration (ms). */
|
|
2817
|
+
totalDurationMs: number;
|
|
2818
|
+
/** Individual generation profiles. */
|
|
2819
|
+
generations: Array<GenerationProfile>;
|
|
2820
|
+
/** Aggregate summary. */
|
|
2821
|
+
summary: ProfilingSummary;
|
|
2822
|
+
}
|
|
2823
|
+
|
|
2824
|
+
export interface ProfilingSummary {
|
|
2825
|
+
/** Total tokens generated across all generations. */
|
|
2826
|
+
totalTokens: number;
|
|
2827
|
+
/** Total prompt tokens across all generations. */
|
|
2828
|
+
totalPromptTokens: number;
|
|
2829
|
+
/** Average tokens per second. */
|
|
2830
|
+
avgTokensPerSecond: number;
|
|
2831
|
+
/** Average time to first token (ms). */
|
|
2832
|
+
avgTimeToFirstTokenMs: number;
|
|
2833
|
+
/** Average prefill time (ms). */
|
|
2834
|
+
avgPrefillMs: number;
|
|
2835
|
+
}
|
|
2836
|
+
|
|
2837
|
+
/**
|
|
2838
|
+
* Qwen3.5 model configuration (dense variant).
|
|
2839
|
+
*
|
|
2840
|
+
* For MoE models, use `Qwen3_5MoeConfig` from `qwen3_5_moe`.
|
|
2841
|
+
*/
|
|
2842
|
+
export interface Qwen35Config {
|
|
2843
|
+
vocabSize: number;
|
|
2844
|
+
hiddenSize: number;
|
|
2845
|
+
numLayers: number;
|
|
2846
|
+
numHeads: number;
|
|
2847
|
+
numKvHeads: number;
|
|
2848
|
+
intermediateSize: number;
|
|
2849
|
+
rmsNormEps: number;
|
|
2850
|
+
headDim: number;
|
|
2851
|
+
tieWordEmbeddings: boolean;
|
|
2852
|
+
attentionBias: boolean;
|
|
2853
|
+
maxPositionEmbeddings: number;
|
|
2854
|
+
padTokenId: number;
|
|
2855
|
+
eosTokenId: number;
|
|
2856
|
+
bosTokenId: number;
|
|
2857
|
+
linearNumValueHeads: number;
|
|
2858
|
+
linearNumKeyHeads: number;
|
|
2859
|
+
linearKeyHeadDim: number;
|
|
2860
|
+
linearValueHeadDim: number;
|
|
2861
|
+
linearConvKernelDim: number;
|
|
2862
|
+
fullAttentionInterval: number;
|
|
2863
|
+
partialRotaryFactor: number;
|
|
2864
|
+
ropeTheta: number;
|
|
2865
|
+
}
|
|
2866
|
+
|
|
2867
|
+
/** Generation configuration for Qwen3.5 */
|
|
2868
|
+
export interface Qwen35GenerationConfig {
|
|
2869
|
+
maxNewTokens: number;
|
|
2870
|
+
temperature?: number | undefined;
|
|
2871
|
+
topK?: number | undefined;
|
|
2872
|
+
topP?: number | undefined;
|
|
2873
|
+
minP?: number | undefined;
|
|
2874
|
+
}
|
|
2875
|
+
|
|
2876
|
+
/** Generation result */
|
|
2877
|
+
export interface Qwen35GenerationResult {
|
|
2878
|
+
tokens: Array<number>;
|
|
2879
|
+
text: string;
|
|
2880
|
+
numTokens: number;
|
|
2881
|
+
finishReason: string;
|
|
2882
|
+
}
|
|
2883
|
+
|
|
2884
|
+
/**
|
|
2885
|
+
* Qwen3.5 MoE model configuration.
|
|
2886
|
+
*
|
|
2887
|
+
* Contains all fields including MoE-specific ones (num_experts, etc.).
|
|
2888
|
+
*/
|
|
2889
|
+
export interface Qwen35MoeConfig {
|
|
2890
|
+
vocabSize: number;
|
|
2891
|
+
hiddenSize: number;
|
|
2892
|
+
numLayers: number;
|
|
2893
|
+
numHeads: number;
|
|
2894
|
+
numKvHeads: number;
|
|
2895
|
+
intermediateSize: number;
|
|
2896
|
+
rmsNormEps: number;
|
|
2897
|
+
headDim: number;
|
|
2898
|
+
tieWordEmbeddings: boolean;
|
|
2899
|
+
attentionBias: boolean;
|
|
2900
|
+
maxPositionEmbeddings: number;
|
|
2901
|
+
padTokenId: number;
|
|
2902
|
+
eosTokenId: number;
|
|
2903
|
+
bosTokenId: number;
|
|
2904
|
+
linearNumValueHeads: number;
|
|
2905
|
+
linearNumKeyHeads: number;
|
|
2906
|
+
linearKeyHeadDim: number;
|
|
2907
|
+
linearValueHeadDim: number;
|
|
2908
|
+
linearConvKernelDim: number;
|
|
2909
|
+
fullAttentionInterval: number;
|
|
2910
|
+
partialRotaryFactor: number;
|
|
2911
|
+
ropeTheta: number;
|
|
2912
|
+
numExperts: number;
|
|
2913
|
+
numExpertsPerTok: number;
|
|
2914
|
+
decoderSparseStep: number;
|
|
2915
|
+
sharedExpertIntermediateSize?: number | undefined;
|
|
2916
|
+
moeIntermediateSize?: number | undefined;
|
|
2917
|
+
normTopkProb: boolean;
|
|
2918
|
+
mlpOnlyLayers?: number[] | undefined;
|
|
2919
|
+
}
|
|
2920
|
+
|
|
2921
|
+
/** Generation configuration for Qwen3.5 MoE */
|
|
2922
|
+
export interface Qwen35MoeGenerationConfig {
|
|
2923
|
+
maxNewTokens: number;
|
|
2924
|
+
temperature?: number | undefined;
|
|
2925
|
+
topK?: number | undefined;
|
|
2926
|
+
topP?: number | undefined;
|
|
2927
|
+
minP?: number | undefined;
|
|
2928
|
+
}
|
|
2929
|
+
|
|
2930
|
+
/** Generation result */
|
|
2931
|
+
export interface Qwen35MoeGenerationResult {
|
|
2932
|
+
tokens: Array<number>;
|
|
2933
|
+
text: string;
|
|
2934
|
+
numTokens: number;
|
|
2935
|
+
finishReason: string;
|
|
2936
|
+
}
|
|
2937
|
+
|
|
2305
2938
|
/** Qwen3 model configuration */
|
|
2306
2939
|
export interface Qwen3Config {
|
|
2307
2940
|
vocabSize: number;
|
|
@@ -2344,6 +2977,17 @@ export interface Qwen3Config {
|
|
|
2344
2977
|
useFp8Cache?: boolean | undefined;
|
|
2345
2978
|
}
|
|
2346
2979
|
|
|
2980
|
+
/** Result of text recognition. */
|
|
2981
|
+
export interface RecResult {
|
|
2982
|
+
/** Recognized text */
|
|
2983
|
+
text: string;
|
|
2984
|
+
/** Confidence score (mean character probability) */
|
|
2985
|
+
score: number;
|
|
2986
|
+
}
|
|
2987
|
+
|
|
2988
|
+
/** Clear all collected profiling data and reset session timer. */
|
|
2989
|
+
export declare function resetProfilingData(): void;
|
|
2990
|
+
|
|
2347
2991
|
/** Result of resume position computation */
|
|
2348
2992
|
export interface ResumePosition {
|
|
2349
2993
|
/** Epoch to start from (0-indexed) */
|
|
@@ -2416,6 +3060,20 @@ export interface SamplingConfig {
|
|
|
2416
3060
|
minP?: number;
|
|
2417
3061
|
}
|
|
2418
3062
|
|
|
3063
|
+
/**
|
|
3064
|
+
* Parse VLM output and save directly as XLSX file.
|
|
3065
|
+
*
|
|
3066
|
+
* Convenience function that parses VLM output and writes it to an XLSX file.
|
|
3067
|
+
*
|
|
3068
|
+
* # Example
|
|
3069
|
+
* ```typescript
|
|
3070
|
+
* import { saveToXlsx } from '@mlx-node/core';
|
|
3071
|
+
*
|
|
3072
|
+
* saveToXlsx(vlmResult.text, 'output.xlsx');
|
|
3073
|
+
* ```
|
|
3074
|
+
*/
|
|
3075
|
+
export declare function saveToXlsx(text: string, filePath: string): void;
|
|
3076
|
+
|
|
2419
3077
|
/** Scheduler statistics (NAPI-compatible) */
|
|
2420
3078
|
export interface SchedulerStatsNapi {
|
|
2421
3079
|
/** Number of requests waiting to be scheduled */
|
|
@@ -2432,6 +3090,9 @@ export interface SchedulerStatsNapi {
|
|
|
2432
3090
|
totalRunningTokens: number;
|
|
2433
3091
|
}
|
|
2434
3092
|
|
|
3093
|
+
/** Enable or disable profiling globally. */
|
|
3094
|
+
export declare function setProfilingEnabled(enabled: boolean): void;
|
|
3095
|
+
|
|
2435
3096
|
/** Configuration for the SFT training engine */
|
|
2436
3097
|
export interface SftEngineConfig {
|
|
2437
3098
|
/** Learning rate (default: 2e-5) */
|
|
@@ -2461,6 +3122,11 @@ export interface SftEngineConfig {
|
|
|
2461
3122
|
* per-element analysis - useful for debugging but has significant performance overhead.
|
|
2462
3123
|
*/
|
|
2463
3124
|
verboseNanDetection?: boolean;
|
|
3125
|
+
/**
|
|
3126
|
+
* Enable gradient checkpointing to reduce memory (default: true)
|
|
3127
|
+
* Trades ~30% more compute for O(1) layer memory instead of O(num_layers).
|
|
3128
|
+
*/
|
|
3129
|
+
gradientCheckpointing?: boolean;
|
|
2464
3130
|
}
|
|
2465
3131
|
|
|
2466
3132
|
/** Metrics from a training epoch */
|
|
@@ -2560,6 +3226,14 @@ export interface TableRow {
|
|
|
2560
3226
|
cells: Array<TableCell>;
|
|
2561
3227
|
}
|
|
2562
3228
|
|
|
3229
|
+
/** A detected text bounding box. */
|
|
3230
|
+
export interface TextBox {
|
|
3231
|
+
/** Bounding box in original image coordinates [x1, y1, x2, y2] */
|
|
3232
|
+
bbox: Array<number>;
|
|
3233
|
+
/** Detection confidence score (mean probability inside box) */
|
|
3234
|
+
score: number;
|
|
3235
|
+
}
|
|
3236
|
+
|
|
2563
3237
|
/** Language model (text decoder) configuration */
|
|
2564
3238
|
export interface TextConfig {
|
|
2565
3239
|
modelType: string;
|
|
@@ -2682,6 +3356,12 @@ export interface TrainStepResultWithOutputs {
|
|
|
2682
3356
|
completionLengths: Array<number>;
|
|
2683
3357
|
}
|
|
2684
3358
|
|
|
3359
|
+
/** Result from document unwarping. */
|
|
3360
|
+
export interface UnwarpResult {
|
|
3361
|
+
/** Unwarped image as PNG bytes */
|
|
3362
|
+
image: Buffer;
|
|
3363
|
+
}
|
|
3364
|
+
|
|
2685
3365
|
/** Vision encoder configuration */
|
|
2686
3366
|
export interface VisionConfig {
|
|
2687
3367
|
modelType: string;
|
|
@@ -2698,13 +3378,18 @@ export interface VisionConfig {
|
|
|
2698
3378
|
spatialMergeSize: number;
|
|
2699
3379
|
}
|
|
2700
3380
|
|
|
3381
|
+
/** A batch item for VLM batch inference */
|
|
3382
|
+
export interface VlmBatchItem {
|
|
3383
|
+
/** Chat messages for this item */
|
|
3384
|
+
messages: Array<VlmChatMessage>;
|
|
3385
|
+
/** Encoded image buffers for this item (one image per item for OCR) */
|
|
3386
|
+
images?: Array<Buffer>;
|
|
3387
|
+
}
|
|
3388
|
+
|
|
2701
3389
|
/** Configuration for VLM chat */
|
|
2702
3390
|
export interface VlmChatConfig {
|
|
2703
|
-
/**
|
|
2704
|
-
|
|
2705
|
-
* These will be automatically processed using the ImageProcessor
|
|
2706
|
-
*/
|
|
2707
|
-
imagePaths?: Array<string>;
|
|
3391
|
+
/** Encoded image buffers to process (PNG/JPEG bytes) */
|
|
3392
|
+
images?: Array<Buffer>;
|
|
2708
3393
|
/** Maximum number of new tokens to generate (default: 512) */
|
|
2709
3394
|
maxNewTokens?: number;
|
|
2710
3395
|
/** Sampling temperature (0 = greedy, higher = more random) (default: 0.0 for OCR) */
|