@mlx-node/core 0.0.0 → 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. package/README.md +106 -0
  2. package/index.cjs +71 -53
  3. package/index.d.cts +965 -280
  4. package/package.json +16 -5
package/index.d.cts CHANGED
@@ -26,49 +26,81 @@ export declare class BatchGenerationResult {
26
26
  get groupSize(): number;
27
27
  }
28
28
 
29
+ /** Handle returned by `chat_stream()` to control an in-progress streaming generation. */
30
+ export declare class ChatStreamHandle {
31
+ cancel(): void;
32
+ }
33
+
29
34
  /**
30
- * Result from the high-level `chat()` API
35
+ * PP-DocLayoutV3 full model for document layout analysis.
31
36
  *
32
- * Contains structured responses with:
33
- * - Tool calls parsed as native JavaScript objects
34
- * - Thinking/reasoning extracted from `<think>` tags
35
- * - Clean text with all special tags stripped
37
+ * Combines HGNetV2 backbone, hybrid encoder, and RT-DETR decoder
38
+ * with mask-enhanced attention and reading order prediction.
36
39
  *
37
- * ## Example
38
- * ```typescript
39
- * const result = await model.chat(messages, { tools });
40
- * console.log(result.text); // Clean response
41
- * console.log(result.thinking); // Chain-of-thought (if any)
42
- * console.log(result.toolCalls); // Parsed tool calls
43
- * ```
40
+ * Weights must be downloaded from `PaddlePaddle/PP-DocLayoutV3_safetensors` on HuggingFace.
41
+ * The regular `PaddlePaddle/PP-DocLayoutV3` repo uses PaddlePaddle format and is not compatible.
44
42
  */
45
- export declare class ChatResult {
46
- /** Get the cleaned text (tool_call and think tags removed) */
47
- get text(): string;
48
- /** Get the extracted tool calls */
49
- get toolCalls(): Array<ToolCallResult>;
43
+ export declare class DocLayoutModel {
50
44
  /**
51
- * Get the extracted thinking/reasoning content
45
+ * Load a PP-DocLayoutV3 model from a directory containing `config.json` and `model.safetensors`.
52
46
  *
53
- * Returns the content from within `<think>...</think>` tags, or null if
54
- * no thinking tags were present in the response.
47
+ * The model directory should be cloned from `PaddlePaddle/PP-DocLayoutV3_safetensors` on HuggingFace.
55
48
  *
56
- * This is useful for:
57
- * - Debugging model reasoning
58
- * - Displaying chain-of-thought to users (optional)
59
- * - Analyzing model decision-making
49
+ * # Arguments
50
+ * * `model_path` - Path to model directory
51
+ *
52
+ * # Returns
53
+ * * Initialized DocLayoutModel ready for inference
60
54
  */
61
- get thinking(): string | null;
62
- /** Get the generated tokens */
63
- get tokens(): MxArray;
64
- /** Get the log probabilities */
65
- get logprobs(): MxArray;
66
- /** Get the finish reason ("stop", "length", "tool_calls", or "repetition") */
67
- get finishReason(): 'stop' | 'length' | 'tool_calls' | 'repetition';
68
- /** Get the number of tokens generated */
69
- get numTokens(): number;
70
- /** Get the raw text before tool call stripping (for debugging) */
71
- get rawText(): string;
55
+ static load(modelPath: string): DocLayoutModel;
56
+ /**
57
+ * Detect document layout elements in an image.
58
+ *
59
+ * # Arguments
60
+ * * `image_data` - Encoded image bytes (PNG/JPEG)
61
+ * * `threshold` - Optional confidence threshold (default 0.5)
62
+ *
63
+ * # Returns
64
+ * * Vec of LayoutElements sorted by reading order
65
+ */
66
+ detect(imageData: Buffer, threshold?: number | undefined | null): Array<LayoutElement>;
67
+ }
68
+ export type PPDocLayoutV3Model = DocLayoutModel;
69
+
70
+ /**
71
+ * PP-LCNet_x1_0 Document Orientation Classification model.
72
+ *
73
+ * Classifies document images into 4 orientation classes (0/90/180/270 degrees).
74
+ * Uses depthwise separable convolutions with HardSwish activation.
75
+ */
76
+ export declare class DocOrientationModel {
77
+ /** Load a DocOrientationModel from a directory containing model.safetensors and config.json. */
78
+ static load(modelPath: string): DocOrientationModel;
79
+ /**
80
+ * Classify the orientation of a document image.
81
+ *
82
+ * Returns the detected orientation angle (0, 90, 180, 270) and confidence.
83
+ */
84
+ classify(imageData: Buffer): OrientationResult;
85
+ /**
86
+ * Classify orientation and return the corrected (upright) image bytes.
87
+ *
88
+ * Returns classification result plus corrected PNG image bytes.
89
+ */
90
+ classifyAndRotate(imageData: Buffer): ClassifyRotateResult;
91
+ }
92
+
93
+ /**
94
+ * UVDoc Document Unwarping model.
95
+ *
96
+ * Predicts a 2D displacement field and applies it to correct perspective
97
+ * distortion in camera-captured documents.
98
+ */
99
+ export declare class DocUnwarpModel {
100
+ /** Load a DocUnwarpModel from a directory containing model.safetensors. */
101
+ static load(modelPath: string): DocUnwarpModel;
102
+ /** Unwarp a document image and return the corrected image bytes. */
103
+ unwarp(imageData: Buffer): UnwarpResult;
72
104
  }
73
105
 
74
106
  /** Result from text generation with detailed metadata */
@@ -79,8 +111,8 @@ export declare class GenerationResult {
79
111
  get tokens(): MxArray;
80
112
  /** Get the log probabilities */
81
113
  get logprobs(): MxArray;
82
- /** Get the finish reason ("eos", "length", or "repetition") */
83
- get finishReason(): 'eos' | 'length' | 'repetition';
114
+ /** Get the finish reason ("stop", "length", or "repetition") */
115
+ get finishReason(): 'stop' | 'length' | 'repetition';
84
116
  /** Get the number of tokens generated */
85
117
  get numTokens(): number;
86
118
  }
@@ -92,13 +124,17 @@ export declare class GenerationResult {
92
124
  */
93
125
  export declare class GrpoTrainingEngine {
94
126
  /**
95
- * Create a new training engine from an existing model
127
+ * Create a new training engine from a Qwen3 model
96
128
  *
97
129
  * # Arguments
98
130
  * * `model` - The Qwen3 model to train (will be cloned internally)
99
131
  * * `config` - Engine configuration
100
132
  */
101
133
  constructor(model: Qwen3Model, config: GrpoEngineConfig);
134
+ /** Create a new training engine from a Qwen3.5 dense model */
135
+ static fromQwen35(model: Qwen3_5Model, config: GrpoEngineConfig): GrpoTrainingEngine;
136
+ /** Create a new training engine from a Qwen3.5 MoE model */
137
+ static fromQwen35Moe(model: Qwen3_5MoeModel, config: GrpoEngineConfig): GrpoTrainingEngine;
102
138
  /** Register a built-in reward function */
103
139
  registerBuiltinReward(config: BuiltinRewardConfig): void;
104
140
  /**
@@ -203,10 +239,39 @@ export declare class GrpoTrainingEngine {
203
239
  get nanGradientCount(): number;
204
240
  /** Clear the emergency save flag (call after saving emergency checkpoint) */
205
241
  clearEmergencySaveFlag(): void;
242
+ /**
243
+ * Save optimizer state (moment tensors + step) to a SafeTensors file.
244
+ *
245
+ * The step counter is stored in the `__metadata__` field.
246
+ * Each parameter's first moment (m) and second moment (v) are stored as
247
+ * `{param_name}.m` and `{param_name}.v` tensors.
248
+ *
249
+ * No-op if the engine uses SGD (no optimizer state to save).
250
+ */
251
+ saveOptimizerState(path: string): void;
252
+ /**
253
+ * Load optimizer state (moment tensors + step) from a SafeTensors file.
254
+ *
255
+ * Restores the step counter from metadata and sets first/second moment
256
+ * tensors for each parameter found in the file.
257
+ *
258
+ * No-op if the engine uses SGD (no optimizer to restore).
259
+ */
260
+ loadOptimizerState(path: string): void;
206
261
  }
207
262
  export type GRPOTrainingEngine = GrpoTrainingEngine;
208
263
 
209
264
  export declare class MxArray {
265
+ equal(other: MxArray): MxArray;
266
+ notEqual(other: MxArray): MxArray;
267
+ less(other: MxArray): MxArray;
268
+ lessEqual(other: MxArray): MxArray;
269
+ greater(other: MxArray): MxArray;
270
+ greaterEqual(other: MxArray): MxArray;
271
+ logicalAnd(other: MxArray): MxArray;
272
+ logicalOr(other: MxArray): MxArray;
273
+ logicalNot(): MxArray;
274
+ where(x: MxArray, y: MxArray): MxArray;
210
275
  static fromInt32(data: Int32Array, shape: BigInt64Array): MxArray;
211
276
  static fromInt64(data: BigInt64Array, shape: BigInt64Array): MxArray;
212
277
  static fromUint32(data: Uint32Array, shape: BigInt64Array): MxArray;
@@ -234,60 +299,12 @@ export declare class MxArray {
234
299
  step?: number | undefined | null,
235
300
  dtype?: DType | undefined | null,
236
301
  ): MxArray;
237
- reshape(shape: BigInt64Array): MxArray;
238
302
  astype(dtype: DType): MxArray;
239
303
  /**
240
304
  * Create a copy of this array with a new handle.
241
305
  * This is useful for parameter loading to avoid handle aliasing issues.
242
306
  */
243
307
  copy(): MxArray;
244
- logSoftmax(axis: number): MxArray;
245
- exp(): MxArray;
246
- log(): MxArray;
247
- sum(axes?: Int32Array | undefined | null, keepdims?: boolean | undefined | null): MxArray;
248
- mean(axes?: Int32Array | undefined | null, keepdims?: boolean | undefined | null): MxArray;
249
- clip(minimum?: number | undefined | null, maximum?: number | undefined | null): MxArray;
250
- minimum(other: MxArray): MxArray;
251
- maximum(other: MxArray): MxArray;
252
- add(other: MxArray): MxArray;
253
- sub(other: MxArray): MxArray;
254
- mul(other: MxArray): MxArray;
255
- div(other: MxArray): MxArray;
256
- addScalar(value: number): MxArray;
257
- mulScalar(value: number): MxArray;
258
- subScalar(value: number): MxArray;
259
- divScalar(value: number): MxArray;
260
- matmul(other: MxArray): MxArray;
261
- /**
262
- * Fused matrix multiply-add: D = beta * C + alpha * (self @ B)
263
- * where self is A. More efficient than separate matmul and add operations.
264
- * Default: alpha=1.0, beta=1.0, giving D = C + (self @ B)
265
- */
266
- addmm(c: MxArray, b: MxArray, alpha?: number | undefined | null, beta?: number | undefined | null): MxArray;
267
- transpose(axes?: Int32Array | undefined | null): MxArray;
268
- take(indices: MxArray, axis: number): MxArray;
269
- takeAlongAxis(indices: MxArray, axis: number): MxArray;
270
- /**
271
- * Put values into array at specified indices along an axis
272
- * Equivalent to: result = array.copy(); result[..., indices] = values
273
- * This matches MLX's put_along_axis for efficient in-place-style updates
274
- */
275
- putAlongAxis(indices: MxArray, values: MxArray, axis: number): MxArray;
276
- slice(starts: BigInt64Array, stops: BigInt64Array): MxArray;
277
- /**
278
- * Concatenate two arrays along an axis
279
- * Optimized for the common binary concatenation case
280
- */
281
- static concatenate(a: MxArray, b: MxArray, axis: number): MxArray;
282
- /**
283
- * Concatenate multiple arrays along an axis
284
- * For concatenating 3 or more arrays
285
- */
286
- static concatenateMany(arrays: Array<MxArray>, axis?: number | undefined | null): MxArray;
287
- sort(axis?: number | undefined | null): MxArray;
288
- argsort(axis?: number | undefined | null): MxArray;
289
- partition(kth: number, axis?: number | undefined | null): MxArray;
290
- argpartition(kth: number, axis?: number | undefined | null): MxArray;
291
308
  eval(): void;
292
309
  evalAsync(): Promise<undefined>;
293
310
  size(): bigint;
@@ -314,7 +331,7 @@ export declare class MxArray {
314
331
  /**
315
332
  * Copy entire array from GPU to CPU as Float32Array
316
333
  *
317
- * ⚠吅 **PERFORMANCE WARNING**: This triggers a FULL GPUCPU memory transfer!
334
+ * **PERFORMANCE WARNING**: This triggers a FULL GPU->CPU memory transfer!
318
335
  *
319
336
  * **Performance impact**:
320
337
  * - Forces evaluation of lazy operations
@@ -335,7 +352,7 @@ export declare class MxArray {
335
352
  /**
336
353
  * Copy entire array from GPU to CPU as Int32Array
337
354
  *
338
- * ⚠吅 **PERFORMANCE WARNING**: This triggers a FULL GPUCPU memory transfer!
355
+ * **PERFORMANCE WARNING**: This triggers a FULL GPU->CPU memory transfer!
339
356
  *
340
357
  * See `to_float32()` documentation for performance implications and alternatives.
341
358
  * Prefer `item_int32()` for scalars.
@@ -344,57 +361,32 @@ export declare class MxArray {
344
361
  /**
345
362
  * Copy entire array from GPU to CPU as Uint32Array
346
363
  *
347
- * ⚠吅 **PERFORMANCE WARNING**: This triggers a FULL GPUCPU memory transfer!
364
+ * **PERFORMANCE WARNING**: This triggers a FULL GPU->CPU memory transfer!
348
365
  *
349
366
  * See `to_float32()` documentation for performance implications and alternatives.
350
367
  */
351
368
  toUint32(): Uint32Array;
352
- static stack(arrays: Array<MxArray>, axis?: number | undefined | null): MxArray;
353
- static randomUniform(shape: BigInt64Array, low: number, high: number, dtype?: DType | undefined | null): MxArray;
354
- static randomNormal(shape: BigInt64Array, mean: number, std: number, dtype?: DType | undefined | null): MxArray;
355
- static randomBernoulli(shape: BigInt64Array, prob: number): MxArray;
356
- static randint(shape: BigInt64Array, low: number, high: number): MxArray;
369
+ logSoftmax(axis: number): MxArray;
370
+ exp(): MxArray;
371
+ log(): MxArray;
372
+ clip(minimum?: number | undefined | null, maximum?: number | undefined | null): MxArray;
373
+ minimum(other: MxArray): MxArray;
374
+ maximum(other: MxArray): MxArray;
375
+ add(other: MxArray): MxArray;
376
+ sub(other: MxArray): MxArray;
377
+ mul(other: MxArray): MxArray;
378
+ div(other: MxArray): MxArray;
379
+ addScalar(value: number): MxArray;
380
+ mulScalar(value: number): MxArray;
381
+ subScalar(value: number): MxArray;
382
+ divScalar(value: number): MxArray;
383
+ matmul(other: MxArray): MxArray;
357
384
  /**
358
- * Sample from categorical distribution
359
- * Takes logits and returns sampled indices
385
+ * Fused matrix multiply-add: D = beta * C + alpha * (self @ B)
386
+ * where self is A. More efficient than separate matmul and add operations.
387
+ * Default: alpha=1.0, beta=1.0, giving D = C + (self @ B)
360
388
  */
361
- categorical(axis?: number | undefined | null): MxArray;
362
- equal(other: MxArray): MxArray;
363
- notEqual(other: MxArray): MxArray;
364
- less(other: MxArray): MxArray;
365
- lessEqual(other: MxArray): MxArray;
366
- greater(other: MxArray): MxArray;
367
- greaterEqual(other: MxArray): MxArray;
368
- logicalAnd(other: MxArray): MxArray;
369
- logicalOr(other: MxArray): MxArray;
370
- logicalNot(): MxArray;
371
- where(x: MxArray, y: MxArray): MxArray;
372
- argmax(axis: number, keepdims?: boolean | undefined | null): MxArray;
373
- argmin(axis: number, keepdims?: boolean | undefined | null): MxArray;
374
- max(axes?: Int32Array | undefined | null, keepdims?: boolean | undefined | null): MxArray;
375
- min(axes?: Int32Array | undefined | null, keepdims?: boolean | undefined | null): MxArray;
376
- prod(axes?: Int32Array | undefined | null, keepdims?: boolean | undefined | null): MxArray;
377
- var(
378
- axes?: Int32Array | undefined | null,
379
- keepdims?: boolean | undefined | null,
380
- ddof?: number | undefined | null,
381
- ): MxArray;
382
- std(
383
- axes?: Int32Array | undefined | null,
384
- keepdims?: boolean | undefined | null,
385
- ddof?: number | undefined | null,
386
- ): MxArray;
387
- logsumexp(axes?: Int32Array | undefined | null, keepdims?: boolean | undefined | null): MxArray;
388
- cumsum(axis: number): MxArray;
389
- cumprod(axis: number): MxArray;
390
- pad(padWidth: Int32Array, constantValue: number): MxArray;
391
- roll(shift: number, axis: number): MxArray;
392
- split(indicesOrSections: number, axis?: number | undefined | null): Array<MxArray>;
393
- tile(reps: Int32Array): MxArray;
394
- repeat(repeats: number, axis: number): MxArray;
395
- squeeze(axes?: Int32Array | undefined | null): MxArray;
396
- expandDims(axis: number): MxArray;
397
- broadcastTo(shape: BigInt64Array): MxArray;
389
+ addmm(c: MxArray, b: MxArray, alpha?: number | undefined | null, beta?: number | undefined | null): MxArray;
398
390
  abs(): MxArray;
399
391
  negative(): MxArray;
400
392
  sign(): MxArray;
@@ -440,6 +432,69 @@ export declare class MxArray {
440
432
  * This is a GPU-native operation that avoids CPU data transfer.
441
433
  */
442
434
  isfinite(): MxArray;
435
+ static randomUniform(shape: BigInt64Array, low: number, high: number, dtype?: DType | undefined | null): MxArray;
436
+ static randomNormal(shape: BigInt64Array, mean: number, std: number, dtype?: DType | undefined | null): MxArray;
437
+ static randomBernoulli(shape: BigInt64Array, prob: number): MxArray;
438
+ static randint(shape: BigInt64Array, low: number, high: number): MxArray;
439
+ /**
440
+ * Sample from categorical distribution
441
+ * Takes logits and returns sampled indices
442
+ */
443
+ categorical(axis?: number | undefined | null): MxArray;
444
+ sum(axes?: Int32Array | undefined | null, keepdims?: boolean | undefined | null): MxArray;
445
+ mean(axes?: Int32Array | undefined | null, keepdims?: boolean | undefined | null): MxArray;
446
+ argmax(axis: number, keepdims?: boolean | undefined | null): MxArray;
447
+ argmin(axis: number, keepdims?: boolean | undefined | null): MxArray;
448
+ max(axes?: Int32Array | undefined | null, keepdims?: boolean | undefined | null): MxArray;
449
+ min(axes?: Int32Array | undefined | null, keepdims?: boolean | undefined | null): MxArray;
450
+ prod(axes?: Int32Array | undefined | null, keepdims?: boolean | undefined | null): MxArray;
451
+ var(
452
+ axes?: Int32Array | undefined | null,
453
+ keepdims?: boolean | undefined | null,
454
+ ddof?: number | undefined | null,
455
+ ): MxArray;
456
+ std(
457
+ axes?: Int32Array | undefined | null,
458
+ keepdims?: boolean | undefined | null,
459
+ ddof?: number | undefined | null,
460
+ ): MxArray;
461
+ logsumexp(axes?: Int32Array | undefined | null, keepdims?: boolean | undefined | null): MxArray;
462
+ cumsum(axis: number): MxArray;
463
+ cumprod(axis: number): MxArray;
464
+ reshape(shape: BigInt64Array): MxArray;
465
+ transpose(axes?: Int32Array | undefined | null): MxArray;
466
+ take(indices: MxArray, axis: number): MxArray;
467
+ takeAlongAxis(indices: MxArray, axis: number): MxArray;
468
+ /**
469
+ * Put values into array at specified indices along an axis
470
+ * Equivalent to: result = array.copy(); result[..., indices] = values
471
+ * This matches MLX's put_along_axis for efficient in-place-style updates
472
+ */
473
+ putAlongAxis(indices: MxArray, values: MxArray, axis: number): MxArray;
474
+ slice(starts: BigInt64Array, stops: BigInt64Array): MxArray;
475
+ /**
476
+ * Concatenate two arrays along an axis
477
+ * Optimized for the common binary concatenation case
478
+ */
479
+ static concatenate(a: MxArray, b: MxArray, axis: number): MxArray;
480
+ /**
481
+ * Concatenate multiple arrays along an axis
482
+ * For concatenating 3 or more arrays
483
+ */
484
+ static concatenateMany(arrays: Array<MxArray>, axis?: number | undefined | null): MxArray;
485
+ sort(axis?: number | undefined | null): MxArray;
486
+ argsort(axis?: number | undefined | null): MxArray;
487
+ partition(kth: number, axis?: number | undefined | null): MxArray;
488
+ argpartition(kth: number, axis?: number | undefined | null): MxArray;
489
+ static stack(arrays: Array<MxArray>, axis?: number | undefined | null): MxArray;
490
+ pad(padWidth: Int32Array, constantValue: number): MxArray;
491
+ roll(shift: number, axis: number): MxArray;
492
+ split(indicesOrSections: number, axis?: number | undefined | null): Array<MxArray>;
493
+ tile(reps: Int32Array): MxArray;
494
+ repeat(repeats: number, axis: number): MxArray;
495
+ squeeze(axes?: Int32Array | undefined | null): MxArray;
496
+ expandDims(axis: number): MxArray;
497
+ broadcastTo(shape: BigInt64Array): MxArray;
443
498
  }
444
499
 
445
500
  /** NAPI-exported reward registry wrapper */
@@ -584,6 +639,128 @@ export declare class OutputStore {
584
639
  queryRaw(sql: string): Promise<string>;
585
640
  }
586
641
 
642
+ /**
643
+ * Qwen3.5 Model -- hybrid linear/full attention with optional MoE.
644
+ *
645
+ * Uses interior mutability (RwLock) for layers, final_norm, lm_head, and caches
646
+ * to allow async generation via spawn_blocking without blocking the Node.js event loop.
647
+ * This matches the pattern used by Qwen3Model.
648
+ */
649
+ export declare class Qwen35Model {
650
+ /** Create a new Qwen3.5 model with the given configuration. */
651
+ constructor(config: Qwen35Config);
652
+ /** Initialize caches for incremental generation. */
653
+ initCaches(): void;
654
+ /** Reset all caches. */
655
+ resetCaches(): void;
656
+ /**
657
+ * Forward pass through the model.
658
+ *
659
+ * # Arguments
660
+ * * `input_ids` - Token IDs [B, T]
661
+ *
662
+ * # Returns
663
+ * Logits [B, T, vocab_size]
664
+ */
665
+ forward(inputIds: MxArray): MxArray;
666
+ /** Forward pass with cache for incremental generation. */
667
+ forwardWithCache(inputIds: MxArray): MxArray;
668
+ /**
669
+ * Load a pretrained model from a directory.
670
+ *
671
+ * Expects the directory to contain:
672
+ * - config.json
673
+ * - model.safetensors (or model-*.safetensors)
674
+ * - tokenizer.json + tokenizer_config.json
675
+ */
676
+ static load(path: string): Promise<Qwen35Model>;
677
+ /**
678
+ * Generate text from a prompt token sequence.
679
+ *
680
+ * Runs generation on a worker thread via spawn_blocking to avoid
681
+ * blocking the Node.js event loop.
682
+ */
683
+ generate(promptTokens: MxArray, config: Qwen35GenerationConfig): Promise<Qwen35GenerationResult>;
684
+ /**
685
+ * Chat API with tool calling support.
686
+ *
687
+ * Runs tokenization + generation on a worker thread via spawn_blocking
688
+ * to avoid blocking the Node.js event loop.
689
+ */
690
+ chat(messages: Array<ChatMessage>, config?: ChatConfig | undefined | null): Promise<ChatResult>;
691
+ /**
692
+ * Streaming chat API with tool calling support.
693
+ *
694
+ * Same as `chat()` but streams tokens one-by-one via the callback.
695
+ * Returns a `ChatStreamHandle` immediately; generation runs in background.
696
+ * Call `handle.cancel()` to abort generation early.
697
+ */
698
+ chatStream(
699
+ messages: ChatMessage[],
700
+ config: ChatConfig | null,
701
+ callback: (err: Error | null, chunk: ChatStreamChunk) => void,
702
+ ): Promise<ChatStreamHandle>;
703
+ /** Get the number of parameters in the model. */
704
+ numParameters(): number;
705
+ /**
706
+ * Save the model weights and configuration to a directory.
707
+ *
708
+ * This saves:
709
+ * - config.json: Model configuration (with model_type for detectModelType)
710
+ * - weights.safetensors: Full model weights in SafeTensors format
711
+ * - weights.mlx: Parameter metadata (for reference)
712
+ *
713
+ * # Arguments
714
+ * * `save_path` - Directory to save the model
715
+ */
716
+ saveModel(savePath: string): Promise<undefined>;
717
+ }
718
+ export type Qwen3_5Model = Qwen35Model;
719
+
720
+ /**
721
+ * Qwen3.5 MoE Model -- hybrid linear/full attention with Mixture-of-Experts.
722
+ *
723
+ * Supports C++ MoE forward path (non-compiled, builds fresh graph per step)
724
+ * when weights are registered via `register_moe_weights_with_cpp`.
725
+ * Falls back to Rust forward_inner path for test models without stored weights.
726
+ */
727
+ export declare class Qwen35MoeModel {
728
+ constructor(config: Qwen35MoeConfig);
729
+ initCaches(): void;
730
+ resetCaches(): void;
731
+ forward(inputIds: MxArray): MxArray;
732
+ forwardWithCache(inputIds: MxArray): MxArray;
733
+ static load(path: string): Promise<Qwen35MoeModel>;
734
+ generate(promptTokens: MxArray, config: Qwen35MoeGenerationConfig): Promise<Qwen35MoeGenerationResult>;
735
+ chat(messages: Array<ChatMessage>, config?: ChatConfig | undefined | null): Promise<ChatResult>;
736
+ /**
737
+ * Streaming chat API with tool calling support.
738
+ *
739
+ * Same as `chat()` but streams tokens one-by-one via the callback.
740
+ * Returns a `ChatStreamHandle` immediately; generation runs in background.
741
+ * Call `handle.cancel()` to abort generation early.
742
+ */
743
+ chatStream(
744
+ messages: ChatMessage[],
745
+ config: ChatConfig | null,
746
+ callback: (err: Error | null, chunk: ChatStreamChunk) => void,
747
+ ): Promise<ChatStreamHandle>;
748
+ numParameters(): number;
749
+ /**
750
+ * Save the model weights and configuration to a directory.
751
+ *
752
+ * This saves:
753
+ * - config.json: Model configuration (with model_type for detectModelType)
754
+ * - weights.safetensors: Full model weights in SafeTensors format
755
+ * - weights.mlx: Parameter metadata (for reference)
756
+ *
757
+ * # Arguments
758
+ * * `save_path` - Directory to save the model
759
+ */
760
+ saveModel(savePath: string): Promise<undefined>;
761
+ }
762
+ export type Qwen3_5MoeModel = Qwen35MoeModel;
763
+
587
764
  /**
588
765
  * Qwen3 Model with automatic differentiation support
589
766
  *
@@ -750,8 +927,8 @@ export declare class Qwen3Model {
750
927
  *
751
928
  * # Example (TypeScript)
752
929
  * ```typescript
753
- * const targetModel = await ModelLoader.loadPretrained('qwen3-7b');
754
- * const draftModel = await ModelLoader.loadPretrained('qwen3-0.5b');
930
+ * const targetModel = await loadModel('qwen3-7b');
931
+ * const draftModel = await loadModel('qwen3-0.5b');
755
932
  *
756
933
  * const result = targetModel.generateSpeculativeSync(draftModel, inputIds, {
757
934
  * numDraftTokens: 5,
@@ -941,7 +1118,7 @@ export declare class Qwen3Model {
941
1118
  *
942
1119
  * # Example
943
1120
  * ```typescript
944
- * const model = await Qwen3Model.loadPretrained("path/to/model");
1121
+ * const model = await Qwen3Model.load("path/to/model");
945
1122
  * const messages = [
946
1123
  * { role: "user", content: "What is 2+2?" }
947
1124
  * ];
@@ -1070,7 +1247,7 @@ export declare class Qwen3Model {
1070
1247
  * Decode token IDs to text using the internal tokenizer
1071
1248
  *
1072
1249
  * Helper method for decoding generated tokens. The model must have been loaded
1073
- * via load_pretrained() to have a tokenizer available.
1250
+ * via load() to have a tokenizer available.
1074
1251
  *
1075
1252
  * # Arguments
1076
1253
  * * `token_ids` - Token IDs to decode as Uint32Array
@@ -1084,7 +1261,7 @@ export declare class Qwen3Model {
1084
1261
  * Apply chat template and encode to token IDs
1085
1262
  *
1086
1263
  * Formats messages using ChatML format (or Jinja2 template with tools) and encodes to tokens.
1087
- * The model must have been loaded via load_pretrained() to have a tokenizer available.
1264
+ * The model must have been loaded via load() to have a tokenizer available.
1088
1265
  *
1089
1266
  * # Arguments
1090
1267
  * * `messages` - Array of chat messages
@@ -1115,7 +1292,7 @@ export declare class Qwen3Model {
1115
1292
  * # Returns
1116
1293
  * * A fully initialized Qwen3Model with loaded weights
1117
1294
  */
1118
- static loadPretrained(modelPath: string): Promise<Qwen3Model>;
1295
+ static load(modelPath: string): Promise<Qwen3Model>;
1119
1296
  /**
1120
1297
  * Save model configuration and weights to disk
1121
1298
  *
@@ -1278,8 +1455,12 @@ export declare class Qwen3Tokenizer {
1278
1455
 
1279
1456
  /** SFT Training Engine */
1280
1457
  export declare class SftTrainingEngine {
1281
- /** Create a new SFT training engine */
1458
+ /** Create a new SFT training engine from a Qwen3 model */
1282
1459
  constructor(model: Qwen3Model, config: SftEngineConfig);
1460
+ /** Create a new SFT training engine from a Qwen3.5 dense model */
1461
+ static fromQwen35(model: Qwen35Model, config: SftEngineConfig): SftTrainingEngine;
1462
+ /** Create a new SFT training engine from a Qwen3.5 MoE model */
1463
+ static fromQwen35Moe(model: Qwen35MoeModel, config: SftEngineConfig): SftTrainingEngine;
1283
1464
  /** Run a single training step */
1284
1465
  trainStep(inputIds: MxArray, labels: MxArray): Promise<SftStepMetrics>;
1285
1466
  /** Get current step number */
@@ -1318,8 +1499,12 @@ export declare class SftTrainingEngine {
1318
1499
  reset(): void;
1319
1500
  /** Restore training state (for resuming from checkpoint) */
1320
1501
  restoreState(step: number, epoch: number): void;
1321
- /** Get the underlying model for checkpointing */
1502
+ /** Get the underlying Qwen3 model for checkpointing */
1322
1503
  getModel(): Qwen3Model;
1504
+ /** Get the underlying Qwen3.5 dense model for checkpointing */
1505
+ getQwen35Model(): Qwen35Model;
1506
+ /** Get the underlying Qwen3.5 MoE model for checkpointing */
1507
+ getQwen35MoeModel(): Qwen35MoeModel;
1323
1508
  }
1324
1509
 
1325
1510
  /**
@@ -1363,25 +1548,112 @@ export declare class Tensor {
1363
1548
  /** Convert to Int32 array */
1364
1549
  toInt32(): Int32Array;
1365
1550
  /**
1366
- * Detach this tensor from the computation graph
1551
+ * Detach this tensor from the computation graph
1552
+ *
1553
+ * Returns a new tensor with the same data but no gradient tracking
1554
+ */
1555
+ detach(): Tensor;
1556
+ /** Create a tensor of zeros */
1557
+ static zeros(
1558
+ shape: BigInt64Array,
1559
+ dtype?: DType | undefined | null,
1560
+ requiresGrad?: boolean | undefined | null,
1561
+ ): Tensor;
1562
+ /** Create a tensor of ones */
1563
+ static ones(
1564
+ shape: BigInt64Array,
1565
+ dtype?: DType | undefined | null,
1566
+ requiresGrad?: boolean | undefined | null,
1567
+ ): Tensor;
1568
+ /** Evaluate the underlying array */
1569
+ eval(): void;
1570
+ }
1571
+
1572
+ /**
1573
+ * PP-OCRv5 Text Detection model (DBNet with PPHGNetV2 backbone).
1574
+ *
1575
+ * Detects text lines in document images and returns bounding boxes.
1576
+ */
1577
+ export declare class TextDetModel {
1578
+ /**
1579
+ * Load a TextDetModel from a directory containing model.safetensors.
1580
+ *
1581
+ * # Arguments
1582
+ * * `model_path` - Path to model directory
1583
+ */
1584
+ static load(modelPath: string): TextDetModel;
1585
+ /**
1586
+ * Detect text lines in an image.
1587
+ *
1588
+ * # Arguments
1589
+ * * `image_data` - Encoded image bytes (PNG/JPEG)
1590
+ * * `threshold` - Optional detection threshold (default from config, typically 0.3)
1591
+ *
1592
+ * # Returns
1593
+ * * Vec of TextBox with bounding boxes and confidence scores
1594
+ */
1595
+ detect(imageData: Buffer, threshold?: number | undefined | null): Array<TextBox>;
1596
+ /**
1597
+ * Detect text lines from raw RGB pixel data.
1598
+ *
1599
+ * # Arguments
1600
+ * * `rgb_data` - Raw RGB pixel data
1601
+ * * `width` - Image width
1602
+ * * `height` - Image height
1603
+ * * `threshold` - Optional detection threshold (default from config)
1604
+ *
1605
+ * # Returns
1606
+ * * Vec of TextBox with bounding boxes and confidence scores
1607
+ */
1608
+ detectCrop(rgbData: Uint8Array, width: number, height: number, threshold?: number | undefined | null): Array<TextBox>;
1609
+ }
1610
+
1611
+ /**
1612
+ * PP-OCRv5 Text Recognition model (PPHGNetV2 + SVTR + CTC).
1613
+ *
1614
+ * Recognizes text from cropped text line images.
1615
+ */
1616
+ export declare class TextRecModel {
1617
+ /**
1618
+ * Load a TextRecModel from a directory containing model.safetensors.
1619
+ *
1620
+ * # Arguments
1621
+ * * `model_path` - Path to model directory
1622
+ * * `dict_path` - Path to character dictionary text file
1623
+ */
1624
+ static load(modelPath: string, dictPath: string): TextRecModel;
1625
+ /**
1626
+ * Recognize text from encoded image bytes.
1627
+ *
1628
+ * # Arguments
1629
+ * * `image_data` - Encoded image bytes (PNG/JPEG)
1630
+ *
1631
+ * # Returns
1632
+ * * RecResult with recognized text and confidence score
1633
+ */
1634
+ recognize(imageData: Buffer): RecResult;
1635
+ /**
1636
+ * Recognize text from multiple encoded images.
1367
1637
  *
1368
- * Returns a new tensor with the same data but no gradient tracking
1638
+ * # Arguments
1639
+ * * `images` - Vec of encoded image bytes (PNG/JPEG)
1640
+ *
1641
+ * # Returns
1642
+ * * Vec of RecResult with recognized text and confidence scores
1369
1643
  */
1370
- detach(): Tensor;
1371
- /** Create a tensor of zeros */
1372
- static zeros(
1373
- shape: BigInt64Array,
1374
- dtype?: DType | undefined | null,
1375
- requiresGrad?: boolean | undefined | null,
1376
- ): Tensor;
1377
- /** Create a tensor of ones */
1378
- static ones(
1379
- shape: BigInt64Array,
1380
- dtype?: DType | undefined | null,
1381
- requiresGrad?: boolean | undefined | null,
1382
- ): Tensor;
1383
- /** Evaluate the underlying array */
1384
- eval(): void;
1644
+ recognizeBatch(images: Array<Buffer>): Array<RecResult>;
1645
+ /**
1646
+ * Recognize text from raw RGB crop data.
1647
+ *
1648
+ * # Arguments
1649
+ * * `rgb_data` - Raw RGB pixel data of a cropped text line
1650
+ * * `width` - Image width
1651
+ * * `height` - Image height
1652
+ *
1653
+ * # Returns
1654
+ * * RecResult with recognized text and confidence score
1655
+ */
1656
+ recognizeCrop(rgbData: Uint8Array, width: number, height: number): RecResult;
1385
1657
  }
1386
1658
 
1387
1659
  /** Result from VLM chat */
@@ -1419,27 +1691,27 @@ export declare class VLModel {
1419
1691
  *
1420
1692
  * # Arguments
1421
1693
  * * `messages` - Chat messages (role + content)
1422
- * * `config` - Chat configuration (including image_paths for automatic processing)
1694
+ * * `config` - Chat configuration (including images for automatic processing)
1423
1695
  *
1424
1696
  * # Returns
1425
1697
  * * VLMChatResult with generated text
1426
1698
  *
1427
1699
  * # Example
1428
1700
  * ```typescript
1429
- * const result = model.chat(
1701
+ * const result = await model.chat(
1430
1702
  * [{ role: 'user', content: 'Describe this image.' }],
1431
- * { imagePaths: ['./photo.jpg'], maxNewTokens: 256 }
1703
+ * { images: [readFileSync('./photo.jpg')], maxNewTokens: 256 }
1432
1704
  * );
1433
1705
  * ```
1434
1706
  */
1435
- chat(messages: Array<VlmChatMessage>, config?: VlmChatConfig | undefined | null): VlmChatResult;
1707
+ chat(messages: Array<VlmChatMessage>, config?: VlmChatConfig | undefined | null): Promise<VlmChatResult>;
1436
1708
  /**
1437
- * Simple OCR: extract text from an image file
1709
+ * Simple OCR: extract text from encoded image bytes
1438
1710
  *
1439
1711
  * Convenience method that processes an image and extracts all text.
1440
1712
  *
1441
1713
  * # Arguments
1442
- * * `image_path` - Path to the image file
1714
+ * * `image_data` - Encoded image bytes (PNG/JPEG)
1443
1715
  * * `prompt` - Optional custom prompt (default: "Extract all text from this image.")
1444
1716
  *
1445
1717
  * # Returns
@@ -1447,11 +1719,11 @@ export declare class VLModel {
1447
1719
  *
1448
1720
  * # Example
1449
1721
  * ```typescript
1450
- * const text = await model.ocr('./receipt.jpg');
1722
+ * const text = await model.ocr(imageBuffer);
1451
1723
  * console.log(text);
1452
1724
  * ```
1453
1725
  */
1454
- ocr(imagePath: string, prompt?: string | undefined | null): string;
1726
+ ocr(imageData: Buffer, prompt?: string | undefined | null): Promise<string>;
1455
1727
  /**
1456
1728
  * Get input embeddings with vision features merged
1457
1729
  *
@@ -1507,7 +1779,40 @@ export declare class VLModel {
1507
1779
  pixelValues?: MxArray | undefined | null,
1508
1780
  imageGridThw?: MxArray | undefined | null,
1509
1781
  config?: GenerationConfig | undefined | null,
1510
- ): GenerationResult;
1782
+ ): Promise<GenerationResult>;
1783
+ /**
1784
+ * Batch OCR: extract text from multiple images simultaneously
1785
+ *
1786
+ * Processes N images with sequential prefill + batched decode for ~N× decode throughput.
1787
+ *
1788
+ * # Arguments
1789
+ * * `images` - Encoded image buffers
1790
+ * * `config` - Optional chat configuration (shared across all items)
1791
+ *
1792
+ * # Returns
1793
+ * * Vec of extracted text strings, one per image
1794
+ *
1795
+ * # Example
1796
+ * ```typescript
1797
+ * import { readFileSync } from 'fs';
1798
+ * const images = ['page1.jpg', 'page2.jpg'].map(p => readFileSync(p));
1799
+ * const texts = await model.ocrBatch(images);
1800
+ * ```
1801
+ */
1802
+ ocrBatch(images: Array<Buffer>, config?: VlmChatConfig | undefined | null): Promise<Array<string>>;
1803
+ /**
1804
+ * Batch chat: process multiple items simultaneously
1805
+ *
1806
+ * Sequential prefill + batched decode. Each item can have different images/prompts.
1807
+ *
1808
+ * # Arguments
1809
+ * * `batch` - Batch items, each with messages and optional images
1810
+ * * `config` - Optional shared chat configuration
1811
+ *
1812
+ * # Returns
1813
+ * * Vec of VLMChatResult, one per batch item
1814
+ */
1815
+ batch(batch: Array<VlmBatchItem>, config?: VlmChatConfig | undefined | null): Promise<Array<VlmChatResult>>;
1511
1816
  /** Get model configuration */
1512
1817
  get config(): ModelConfig;
1513
1818
  /** Check if model is fully initialized */
@@ -1529,7 +1834,7 @@ export declare class VLModel {
1529
1834
  * ```typescript
1530
1835
  * import { VLModel } from '@mlx-node/vlm';
1531
1836
  * const model = await VLModel.load('./models/paddleocr-vl');
1532
- * const result = model.chat(messages, { imagePaths: ['./image.jpg'] });
1837
+ * const result = await model.chat(messages, { images: [readFileSync('./image.jpg')] });
1533
1838
  * ```
1534
1839
  */
1535
1840
  static load(modelPath: string): Promise<VLModel>;
@@ -1559,35 +1864,6 @@ export declare class VLModel {
1559
1864
  *
1560
1865
  * Parses tool calls and thinking from completions, creating structured outputs
1561
1866
  * aligned with the ChatResult structure.
1562
- *
1563
- * # Arguments
1564
- * * `prompts` - Array of prompt texts (one per unique prompt, will be expanded by group_size)
1565
- * * `completions` - Array of completion texts (prompts.len() * group_size total)
1566
- * * `token_counts` - Array of token counts for each completion
1567
- * * `finish_reasons` - Array of finish reasons from generation ("eos", "length", "stop", "repetition")
1568
- * * `group_size` - Number of completions per prompt
1569
- *
1570
- * # Returns
1571
- * Array of RewardOutput objects with structured completion data
1572
- *
1573
- * # Example
1574
- * ```typescript
1575
- * import { buildRewardOutputs } from '@mlx-node/core';
1576
- *
1577
- * const outputs = buildRewardOutputs(
1578
- * ['What is 2+2?'], // prompts
1579
- * ['<think>Let me calculate</think>
1580
-
1581
- 4', '4'], // completions (group_size=2)
1582
- * [10, 5], // token counts
1583
- * ['eos', 'length'], // finish reasons
1584
- * 2 // group_size
1585
- * );
1586
- *
1587
- * outputs[0].completion.thinking; // "Let me calculate"
1588
- * outputs[0].completion.text; // "4"
1589
- * outputs[0].completion.finishReason; // "eos"
1590
- * ```
1591
1867
  */
1592
1868
  export declare function buildRewardOutputs(
1593
1869
  prompts: Array<string>,
@@ -1627,67 +1903,41 @@ export declare const enum BuiltinRewardType {
1627
1903
  XmlFormat = 'XmlFormat',
1628
1904
  /** Length-based scoring */
1629
1905
  Length = 'Length',
1630
- /** JSON schema validation */
1906
+ /** JSON format validation (brace matching + field name check, not full JSON parsing) */
1631
1907
  JsonSchema = 'JsonSchema',
1632
1908
  }
1633
1909
 
1634
- /**
1635
- * Configuration for the high-level `chat()` API
1636
- *
1637
- * Combines tool definitions with generation parameters in a single config object.
1638
- * Tools are optional - when not provided, `chat()` works as a simple conversational API.
1639
- *
1640
- * ## Example
1641
- * ```typescript
1642
- * // Simple chat (no tools)
1643
- * const result = await model.chat(messages);
1644
- *
1645
- * // With tools
1646
- * const result = await model.chat(messages, {
1647
- * tools: [weatherTool, searchTool],
1648
- * maxNewTokens: 2048,
1649
- * temperature: 0.7,
1650
- * });
1651
- * ```
1652
- */
1910
+ /** Unified chat configuration shared by all model variants (Qwen3, Qwen3.5, Qwen3.5 MoE). */
1653
1911
  export interface ChatConfig {
1912
+ maxNewTokens?: number | undefined;
1913
+ temperature?: number | undefined;
1914
+ topK?: number | undefined;
1915
+ topP?: number | undefined;
1916
+ minP?: number | undefined;
1917
+ /** Repetition penalty (1.0 = disabled). Penalizes tokens already in context. */
1918
+ repetitionPenalty?: number | undefined;
1919
+ /** Size of the context window for repetition penalty (default: 256) */
1920
+ repetitionContextSize?: number | undefined;
1921
+ /** Max consecutive identical tokens before stopping (default: 16, 0 = disabled) */
1922
+ maxConsecutiveTokens?: number | undefined;
1923
+ /** Max n-gram repetitions before stopping (default: 3, 0 = disabled) */
1924
+ maxNgramRepeats?: number | undefined;
1925
+ /** Max pattern size for n-gram repetition detection (default: 64) */
1926
+ ngramSize?: number | undefined;
1927
+ tools?: Array<ToolDefinition>;
1654
1928
  /**
1655
- * Tool definitions for function calling (optional)
1656
- *
1657
- * When provided, the model can invoke these tools during generation.
1658
- * Tool calls are parsed and returned in `ChatResult.toolCalls`.
1929
+ * Enable thinking mode (Qwen3's <think> tags). Default: true (model thinks naturally).
1930
+ * Set to false to suppress thinking by injecting empty <think></think> tags.
1659
1931
  */
1660
- tools?: Array<ToolDefinition>;
1661
- /** Maximum number of new tokens to generate (default: 2048 for chat) */
1662
- maxNewTokens?: number;
1663
- /** Sampling temperature (0 = greedy, higher = more random) (default: 0.7) */
1664
- temperature?: number;
1665
- /** Top-k sampling: keep only top k tokens (0 = disabled) (default: 0) */
1666
- topK?: number;
1667
- /** Top-p (nucleus) sampling: keep tokens with cumulative prob < p (default: 0.9) */
1668
- topP?: number;
1669
- /** Min-p sampling: keep tokens with prob > min_p * max_prob (default: 0.0) */
1670
- minP?: number;
1671
- /** Repetition penalty factor (1.0 = no penalty) (default: 1.0) */
1672
- repetitionPenalty?: number;
1673
- /** Number of recent tokens to consider for repetition penalty (default: 20) */
1674
- repetitionContextSize?: number;
1675
- /** Stop if same token repeats this many times consecutively (default: 16) */
1676
- maxConsecutiveTokens?: number;
1677
- /** Stop if an n-gram pattern repeats this many times (default: 8) */
1678
- maxNgramRepeats?: number;
1679
- /** N-gram size for repetition detection (default: 3) */
1680
- ngramSize?: number;
1681
- /** EOS token ID (generation stops when this is generated) */
1682
- eosTokenId?: number;
1683
- /** Whether to return log probabilities (default: true) */
1684
- returnLogprobs?: boolean;
1932
+ enableThinking?: boolean | undefined;
1933
+ /** When true, include performance metrics (TTFT, prefill tok/s, decode tok/s) in the result */
1934
+ reportPerformance?: boolean | undefined;
1685
1935
  }
1686
1936
 
1687
1937
  /** Chat message with tool calling support */
1688
1938
  export interface ChatMessage {
1689
1939
  /** Role: "system", "user", "assistant", or "tool" */
1690
- role: string;
1940
+ role: 'system' | 'user' | 'assistant' | 'tool' | (string & {});
1691
1941
  /** Message content */
1692
1942
  content: string;
1693
1943
  /** Tool calls made by the assistant (for assistant messages) */
@@ -1696,16 +1946,57 @@ export interface ChatMessage {
1696
1946
  toolCallId?: string;
1697
1947
  /** Reasoning content for thinking mode (used with <think> tags) */
1698
1948
  reasoningContent?: string;
1949
+ /** Image data for VLM models (encoded image bytes: PNG/JPEG, passed as Uint8Array/Buffer) */
1950
+ images?: Array<Uint8Array> | undefined;
1951
+ }
1952
+
1953
+ /** Unified chat result shared by all model variants (Qwen3, Qwen3.5, Qwen3.5 MoE). */
1954
+ export interface ChatResult {
1955
+ text: string;
1956
+ toolCalls: Array<ToolCallResult>;
1957
+ thinking?: string;
1958
+ numTokens: number;
1959
+ finishReason: string;
1960
+ rawText: string;
1961
+ /** Performance metrics (present when `reportPerformance: true` in config) */
1962
+ performance?: PerformanceMetrics;
1699
1963
  }
1700
1964
 
1701
- /** Chat message role */
1965
+ /** Chat message role (lowercase values matching standard convention) */
1702
1966
  export declare const enum ChatRole {
1703
1967
  /** User message */
1704
- User = 'User',
1968
+ User = 'user',
1705
1969
  /** Assistant response */
1706
- Assistant = 'Assistant',
1970
+ Assistant = 'assistant',
1707
1971
  /** System prompt */
1708
- System = 'System',
1972
+ System = 'system',
1973
+ /** Tool response */
1974
+ Tool = 'tool',
1975
+ }
1976
+
1977
+ /** A single chunk emitted during streaming chat generation. */
1978
+ export interface ChatStreamChunk {
1979
+ text: string;
1980
+ done: boolean;
1981
+ finishReason?: string;
1982
+ toolCalls?: Array<ToolCallResult>;
1983
+ thinking?: string;
1984
+ numTokens?: number;
1985
+ rawText?: string;
1986
+ /** Performance metrics (only present in the final chunk when `reportPerformance: true`) */
1987
+ performance?: PerformanceMetrics;
1988
+ }
1989
+
1990
+ /** Result from classify_and_rotate: orientation info + corrected image bytes. */
1991
+ export interface ClassifyRotateResult {
1992
+ /** Detected rotation angle (0, 90, 180, or 270 degrees) */
1993
+ angle: number;
1994
+ /** Confidence score */
1995
+ score: number;
1996
+ /** Angle label as string */
1997
+ label: string;
1998
+ /** Corrected image as PNG bytes (or original bytes if angle=0) */
1999
+ image: Buffer;
1709
2000
  }
1710
2001
 
1711
2002
  /** Statistics about cleanup operations (NAPI wrapper) */
@@ -1748,6 +2039,26 @@ export interface ConversionOptions {
1748
2039
  dtype?: string;
1749
2040
  /** Whether to verbose logging (default: false) */
1750
2041
  verbose?: boolean;
2042
+ /** Model type for model-specific weight sanitization (e.g., "paddleocr-vl") */
2043
+ modelType?: string;
2044
+ /** Enable quantization of converted weights */
2045
+ quantize?: boolean;
2046
+ /** Quantization bits: 4 (default) or 8 */
2047
+ quantBits?: number;
2048
+ /** Quantization group size (default: 64 for affine, 32 for mxfp8) */
2049
+ quantGroupSize?: number;
2050
+ /** Quantization mode: "affine" (default) or "mxfp8" */
2051
+ quantMode?: string;
2052
+ /**
2053
+ * Quantization recipe for per-layer mixed-bit quantization.
2054
+ * Options: mixed_2_6, mixed_3_4, mixed_3_6, mixed_4_6, qwen3_5
2055
+ */
2056
+ quantRecipe?: string;
2057
+ /**
2058
+ * Path to an imatrix GGUF file for AWQ-style pre-scaling.
2059
+ * Improves quantization quality by amplifying important weight channels.
2060
+ */
2061
+ imatrixPath?: string;
1751
2062
  }
1752
2063
 
1753
2064
  export interface ConversionResult {
@@ -1761,6 +2072,10 @@ export interface ConversionResult {
1761
2072
  tensorNames: Array<string>;
1762
2073
  }
1763
2074
 
2075
+ export declare function convertForeignWeights(options: ForeignConversionOptions): ForeignConversionResult;
2076
+
2077
+ export declare function convertGgufToSafetensors(options: GgufConversionOptions): Promise<GgufConversionResult>;
2078
+
1764
2079
  /**
1765
2080
  * Convert a HuggingFace SafeTensors model to MLX format
1766
2081
  *
@@ -1806,12 +2121,31 @@ export interface DocumentElement {
1806
2121
  paragraph?: Paragraph;
1807
2122
  }
1808
2123
 
2124
+ /**
2125
+ * Convert a ParsedDocument to an XLSX buffer.
2126
+ *
2127
+ * Each Table element becomes a separate worksheet with bold headers.
2128
+ * Paragraph elements are collected into a "Text" worksheet.
2129
+ *
2130
+ * # Example
2131
+ * ```typescript
2132
+ * import { parseVlmOutput, documentToXlsx } from '@mlx-node/core';
2133
+ * import { writeFileSync } from 'fs';
2134
+ *
2135
+ * const doc = parseVlmOutput(vlmResult.text);
2136
+ * const buffer = documentToXlsx(doc);
2137
+ * writeFileSync('output.xlsx', buffer);
2138
+ * ```
2139
+ */
2140
+ export declare function documentToXlsx(doc: ParsedDocument): Buffer;
2141
+
1809
2142
  export declare const enum DType {
1810
2143
  Float32 = 0,
1811
2144
  Int32 = 1,
1812
2145
  Float16 = 2,
1813
2146
  BFloat16 = 3,
1814
2147
  Uint32 = 4,
2148
+ Uint8 = 5,
1815
2149
  }
1816
2150
 
1817
2151
  /** Document element type */
@@ -1864,6 +2198,23 @@ export interface EngineStepMetrics {
1864
2198
  activeMemoryMb: number;
1865
2199
  }
1866
2200
 
2201
+ export interface ForeignConversionOptions {
2202
+ /** Path to the input weights file (.pdparams, .pkl, .pt, .pth) */
2203
+ inputPath: string;
2204
+ /** Output directory for model.safetensors + config.json */
2205
+ outputDir: string;
2206
+ /** Model type: "pp-lcnet-ori" or "uvdoc" */
2207
+ modelType: string;
2208
+ /** Enable verbose logging */
2209
+ verbose?: boolean;
2210
+ }
2211
+
2212
+ export interface ForeignConversionResult {
2213
+ numTensors: number;
2214
+ outputPath: string;
2215
+ tensorNames: Array<string>;
2216
+ }
2217
+
1867
2218
  /** Format parsed document according to config */
1868
2219
  export declare function formatDocument(doc: ParsedDocument, config?: ParserConfig | undefined | null): string;
1869
2220
 
@@ -1897,13 +2248,13 @@ export interface GenerateBatchResult {
1897
2248
  completionLogprobs: Array<number>;
1898
2249
  /** Lengths of each completion (for reconstruction) */
1899
2250
  completionLengths: Array<number>;
1900
- /** Finish reasons for each completion ("eos", "length", or "repetition") */
2251
+ /** Finish reasons for each completion ("stop", "length", or "repetition") */
1901
2252
  finishReasons: Array<string>;
1902
2253
  }
1903
2254
 
1904
2255
  /** Configuration for text generation */
1905
2256
  export interface GenerationConfig {
1906
- /** Maximum number of new tokens to generate (default: 100) */
2257
+ /** Maximum number of new tokens to generate (default: 2048) */
1907
2258
  maxNewTokens?: number;
1908
2259
  /** Sampling temperature (0 = greedy, higher = more random) (default: 1.0) */
1909
2260
  temperature?: number;
@@ -1926,13 +2277,15 @@ export interface GenerationConfig {
1926
2277
  */
1927
2278
  maxConsecutiveTokens?: number;
1928
2279
  /**
1929
- * Stop if an n-gram pattern repeats this many times (default: 8)
1930
- * Set to 0 to disable. Detects patterns like "A B A B A B A B".
2280
+ * Stop if a pattern repeats this many times consecutively (default: 3)
2281
+ * Set to 0 to disable. Detects patterns like "A B A B A B".
2282
+ * Uses range-based detection: checks all pattern sizes from 2 to ngram_size.
1931
2283
  */
1932
2284
  maxNgramRepeats?: number;
1933
2285
  /**
1934
- * N-gram size for repetition detection (default: 3)
1935
- * Used with max_ngram_repeats to detect repeating patterns.
2286
+ * Maximum pattern size for repetition detection (default: 64)
2287
+ * All pattern sizes from 2 up to this value are checked each decode step.
2288
+ * Larger values catch long phrase-level repetition common in small models.
1936
2289
  */
1937
2290
  ngramSize?: number;
1938
2291
  /** EOS token ID (generation stops when this is generated) */
@@ -1971,6 +2324,33 @@ export interface GenerationConfig {
1971
2324
  numDraftTokens?: number;
1972
2325
  }
1973
2326
 
2327
+ export interface GenerationProfile {
2328
+ /** Label identifying the decode loop variant. */
2329
+ label: string;
2330
+ /** Model type (e.g. "qwen3_5", "qwen3_5_moe", "qwen3"). */
2331
+ modelType: string;
2332
+ /** Number of tokens generated. */
2333
+ numTokens: number;
2334
+ /** Number of prompt tokens. */
2335
+ promptTokens: number;
2336
+ /** Prefill wall-clock time (ms). */
2337
+ prefillMs: number;
2338
+ /** Decode wall-clock time (ms). */
2339
+ decodeMs: number;
2340
+ /** Total wall-clock time (prefill + decode) (ms). */
2341
+ totalMs: number;
2342
+ /** Tokens per second (decode only). */
2343
+ tokensPerSecond: number;
2344
+ /** Time to first token (ms) — from decode loop start to first token extracted. */
2345
+ timeToFirstTokenMs: number;
2346
+ /** Per-phase breakdown. */
2347
+ phases: Array<PhaseProfile>;
2348
+ /** Memory snapshot before generation. */
2349
+ memoryBefore?: MemorySnapshot;
2350
+ /** Memory snapshot after generation. */
2351
+ memoryAfter?: MemorySnapshot;
2352
+ }
2353
+
1974
2354
  /** A generation record (NAPI wrapper) */
1975
2355
  export interface GenerationRecord {
1976
2356
  batchIndex: number;
@@ -1994,6 +2374,62 @@ export interface GenerationWithToolCalls {
1994
2374
  /** Get expected weight keys for PaddleOCR-VL model */
1995
2375
  export declare function getExpectedWeightKeys(): Array<string>;
1996
2376
 
2377
+ /** Retrieve all collected profiling data as a `ProfilingSession`. */
2378
+ export declare function getProfilingData(): ProfilingSession;
2379
+
2380
+ export interface GgufConversionOptions {
2381
+ /** Path to the GGUF file */
2382
+ inputPath: string;
2383
+ /** Output directory for converted SafeTensors model */
2384
+ outputDir: string;
2385
+ /** Target dtype: "float32", "float16", "bfloat16" (default: keep original) */
2386
+ dtype?: string;
2387
+ /** Enable verbose logging */
2388
+ verbose?: boolean;
2389
+ /** Enable quantization of converted weights */
2390
+ quantize?: boolean;
2391
+ /** Quantization bits (default: 4) */
2392
+ quantBits?: number;
2393
+ /** Quantization group size (default: 64) */
2394
+ quantGroupSize?: number;
2395
+ /** Quantization mode: "affine" or "mxfp8" */
2396
+ quantMode?: string;
2397
+ /**
2398
+ * Quantization recipe for per-layer mixed-bit quantization.
2399
+ * Options: mixed_2_6, mixed_3_4, mixed_3_6, mixed_4_6, qwen3_5, unsloth
2400
+ */
2401
+ quantRecipe?: string;
2402
+ /**
2403
+ * Path to an imatrix GGUF file for AWQ-style pre-scaling.
2404
+ * Improves quantization quality by amplifying important weight channels.
2405
+ */
2406
+ imatrixPath?: string;
2407
+ /**
2408
+ * Output filename (default: "model.safetensors").
2409
+ * Useful for saving vision weights separately (e.g., "vision.safetensors").
2410
+ */
2411
+ outputFilename?: string;
2412
+ /**
2413
+ * When true, remap LLM weight keys for VLM compatibility:
2414
+ * "model.X" → "language_model.model.X", "lm_head.X" → "language_model.lm_head.X"
2415
+ * This makes the safetensors compatible with mlx-vlm.
2416
+ */
2417
+ vlmKeyPrefix?: boolean;
2418
+ }
2419
+
2420
+ export interface GgufConversionResult {
2421
+ numTensors: number;
2422
+ numParameters: number;
2423
+ outputPath: string;
2424
+ tensorNames: Array<string>;
2425
+ sourceFormat: string;
2426
+ }
2427
+
2428
+ export interface GpuInfo {
2429
+ /** GPU architecture generation (M1=13, M2=14, M3=15, M4=16, M5=17). */
2430
+ architectureGen: number;
2431
+ }
2432
+
1997
2433
  /** Configuration for the GRPO training engine */
1998
2434
  export interface GrpoEngineConfig {
1999
2435
  /** Learning rate (default: 1e-6) */
@@ -2093,6 +2529,24 @@ export interface GrpoEngineConfig {
2093
2529
  * then expand KV cache for G completions).
2094
2530
  */
2095
2531
  useParallelBatchGeneration?: boolean;
2532
+ /**
2533
+ * Enable gradient checkpointing (default: true).
2534
+ * When true, each transformer layer's activations are discarded during the forward
2535
+ * pass and recomputed during backward, reducing peak memory from O(num_layers) to O(1)
2536
+ * for intermediate states. For Qwen3.5 0.8B, this reduces autograd peak from ~105GB to ~11GB.
2537
+ * The trade-off is ~30% more compute (one extra forward pass per layer during backward).
2538
+ */
2539
+ gradientCheckpointing?: boolean;
2540
+ /** Optimizer type: "sgd" or "adamw" (default: "adamw") */
2541
+ optimizerType?: string;
2542
+ /** AdamW beta1 (default: 0.9) */
2543
+ adamwBeta1?: number;
2544
+ /** AdamW beta2 (default: 0.999) */
2545
+ adamwBeta2?: number;
2546
+ /** AdamW epsilon (default: 1e-8) */
2547
+ adamwEps?: number;
2548
+ /** Weight decay for AdamW (default: 0.01) */
2549
+ weightDecay?: number;
2096
2550
  }
2097
2551
 
2098
2552
  /** Configuration for GRPO loss computation */
@@ -2143,6 +2597,32 @@ export interface GrpoLossConfig {
2143
2597
  vocabChunkSize?: number;
2144
2598
  }
2145
2599
 
2600
+ /** Check whether profiling is currently enabled. */
2601
+ export declare function isProfilingEnabled(): boolean;
2602
+
2603
+ /** A single detected layout element. */
2604
+ export interface LayoutElement {
2605
+ /** Detection confidence score */
2606
+ score: number;
2607
+ /** Class label ID (0-24) */
2608
+ label: number;
2609
+ /** Human-readable label name (e.g., "title", "text", "table") */
2610
+ labelName: string;
2611
+ /** Bounding box in original image coordinates [x1, y1, x2, y2] */
2612
+ bbox: Array<number>;
2613
+ /** Reading order index (0 = first element to read) */
2614
+ order: number;
2615
+ }
2616
+
2617
+ export interface MemorySnapshot {
2618
+ /** Active (non-cached) memory in bytes. */
2619
+ activeBytes: number;
2620
+ /** Peak memory usage in bytes. */
2621
+ peakBytes: number;
2622
+ /** Cache memory in bytes. */
2623
+ cacheBytes: number;
2624
+ }
2625
+
2146
2626
  /** Full model configuration */
2147
2627
  export interface ModelConfig {
2148
2628
  visionConfig: VisionConfig;
@@ -2156,8 +2636,18 @@ export interface ModelConfig {
2156
2636
  eosTokenId: number;
2157
2637
  }
2158
2638
 
2639
+ /** Result from document orientation classification. */
2640
+ export interface OrientationResult {
2641
+ /** Detected rotation angle (0, 90, 180, or 270 degrees) */
2642
+ angle: number;
2643
+ /** Confidence score */
2644
+ score: number;
2645
+ /** Angle label as string */
2646
+ label: string;
2647
+ }
2648
+
2159
2649
  /** Output format options */
2160
- export declare const enum OutputFormat {
2650
+ export enum OutputFormat {
2161
2651
  /** Raw output with minimal processing */
2162
2652
  Raw = 'Raw',
2163
2653
  /** Plain text with aligned columns */
@@ -2166,6 +2656,8 @@ export declare const enum OutputFormat {
2166
2656
  Markdown = 'Markdown',
2167
2657
  /** HTML tables */
2168
2658
  Html = 'Html',
2659
+ /** JSON structured output */
2660
+ Json = 'Json',
2169
2661
  }
2170
2662
 
2171
2663
  /** Configuration for creating an OutputStore connection */
@@ -2196,7 +2688,7 @@ export interface PagedCompletedSequence {
2196
2688
  requestId: string;
2197
2689
  /** All generated tokens (excluding prompt) */
2198
2690
  tokens: Array<number>;
2199
- /** Reason for completion ("eos", "max_tokens", etc.) */
2691
+ /** Reason for completion ("stop", "length", "repetition", "tool_calls") */
2200
2692
  finishReason: string;
2201
2693
  }
2202
2694
 
@@ -2273,22 +2765,7 @@ export interface ParserConfig {
2273
2765
  collapseEmptyRows?: boolean;
2274
2766
  }
2275
2767
 
2276
- /**
2277
- * Parse tool calls from text (NAPI export)
2278
- *
2279
- * Extracts tool calls from model-generated text and returns both the cleaned text
2280
- * and the parsed tool calls.
2281
- *
2282
- * # Example
2283
- * ```typescript
2284
- * import { parseToolCallsFromText } from '@mlx-node/core';
2285
- *
2286
- * const result = parseToolCallsFromText('<tool_call>{"name": "search", "arguments": {"q": "test"}}</tool_call>');
2287
- * console.log(result.text); // ""
2288
- * console.log(result.toolCalls[0].name); // "search"
2289
- * console.log(result.toolCalls[0].arguments.q); // "test"
2290
- * ```
2291
- */
2768
+ /** Parse tool calls from text (NAPI export) */
2292
2769
  export declare function parseToolCallsFromText(text: string): ParseToolCallsResult;
2293
2770
 
2294
2771
  /** Result of parsing tool calls from text */
@@ -2302,6 +2779,162 @@ export interface ParseToolCallsResult {
2302
2779
  /** Parse VLM output into structured document */
2303
2780
  export declare function parseVlmOutput(text: string): ParsedDocument;
2304
2781
 
2782
+ /**
2783
+ * Lightweight performance metrics returned by chat/chatStream when
2784
+ * `reportPerformance: true` is set in the config.
2785
+ */
2786
+ export interface PerformanceMetrics {
2787
+ /**
2788
+ * Time to first token (ms) — wall-clock from generation start to
2789
+ * first token extracted. Includes tokenization, prefill (lazy graph
2790
+ * construction + first GPU eval), and first sample.
2791
+ */
2792
+ ttftMs: number;
2793
+ /** Prefill throughput: prompt_tokens / (ttft_ms / 1000). */
2794
+ prefillTokensPerSecond: number;
2795
+ /**
2796
+ * Decode throughput: (generated_tokens - 1) / decode_time.
2797
+ * Excludes the first token (counted as prefill).
2798
+ */
2799
+ decodeTokensPerSecond: number;
2800
+ }
2801
+
2802
+ export interface PhaseProfile {
2803
+ /** Phase name (e.g. "forward", "sample", "eval_token"). */
2804
+ name: string;
2805
+ /** Total wall-clock time spent in this phase (ms). */
2806
+ totalMs: number;
2807
+ /** Average time per invocation (µs). */
2808
+ avgUsPerToken: number;
2809
+ /** Number of invocations. */
2810
+ count: number;
2811
+ }
2812
+
2813
+ export interface ProfilingSession {
2814
+ /** GPU hardware info. */
2815
+ gpuInfo: GpuInfo;
2816
+ /** Total session duration (ms). */
2817
+ totalDurationMs: number;
2818
+ /** Individual generation profiles. */
2819
+ generations: Array<GenerationProfile>;
2820
+ /** Aggregate summary. */
2821
+ summary: ProfilingSummary;
2822
+ }
2823
+
2824
+ export interface ProfilingSummary {
2825
+ /** Total tokens generated across all generations. */
2826
+ totalTokens: number;
2827
+ /** Total prompt tokens across all generations. */
2828
+ totalPromptTokens: number;
2829
+ /** Average tokens per second. */
2830
+ avgTokensPerSecond: number;
2831
+ /** Average time to first token (ms). */
2832
+ avgTimeToFirstTokenMs: number;
2833
+ /** Average prefill time (ms). */
2834
+ avgPrefillMs: number;
2835
+ }
2836
+
2837
+ /**
2838
+ * Qwen3.5 model configuration (dense variant).
2839
+ *
2840
+ * For MoE models, use `Qwen3_5MoeConfig` from `qwen3_5_moe`.
2841
+ */
2842
+ export interface Qwen35Config {
2843
+ vocabSize: number;
2844
+ hiddenSize: number;
2845
+ numLayers: number;
2846
+ numHeads: number;
2847
+ numKvHeads: number;
2848
+ intermediateSize: number;
2849
+ rmsNormEps: number;
2850
+ headDim: number;
2851
+ tieWordEmbeddings: boolean;
2852
+ attentionBias: boolean;
2853
+ maxPositionEmbeddings: number;
2854
+ padTokenId: number;
2855
+ eosTokenId: number;
2856
+ bosTokenId: number;
2857
+ linearNumValueHeads: number;
2858
+ linearNumKeyHeads: number;
2859
+ linearKeyHeadDim: number;
2860
+ linearValueHeadDim: number;
2861
+ linearConvKernelDim: number;
2862
+ fullAttentionInterval: number;
2863
+ partialRotaryFactor: number;
2864
+ ropeTheta: number;
2865
+ }
2866
+
2867
+ /** Generation configuration for Qwen3.5 */
2868
+ export interface Qwen35GenerationConfig {
2869
+ maxNewTokens: number;
2870
+ temperature?: number | undefined;
2871
+ topK?: number | undefined;
2872
+ topP?: number | undefined;
2873
+ minP?: number | undefined;
2874
+ }
2875
+
2876
+ /** Generation result */
2877
+ export interface Qwen35GenerationResult {
2878
+ tokens: Array<number>;
2879
+ text: string;
2880
+ numTokens: number;
2881
+ finishReason: string;
2882
+ }
2883
+
2884
+ /**
2885
+ * Qwen3.5 MoE model configuration.
2886
+ *
2887
+ * Contains all fields including MoE-specific ones (num_experts, etc.).
2888
+ */
2889
+ export interface Qwen35MoeConfig {
2890
+ vocabSize: number;
2891
+ hiddenSize: number;
2892
+ numLayers: number;
2893
+ numHeads: number;
2894
+ numKvHeads: number;
2895
+ intermediateSize: number;
2896
+ rmsNormEps: number;
2897
+ headDim: number;
2898
+ tieWordEmbeddings: boolean;
2899
+ attentionBias: boolean;
2900
+ maxPositionEmbeddings: number;
2901
+ padTokenId: number;
2902
+ eosTokenId: number;
2903
+ bosTokenId: number;
2904
+ linearNumValueHeads: number;
2905
+ linearNumKeyHeads: number;
2906
+ linearKeyHeadDim: number;
2907
+ linearValueHeadDim: number;
2908
+ linearConvKernelDim: number;
2909
+ fullAttentionInterval: number;
2910
+ partialRotaryFactor: number;
2911
+ ropeTheta: number;
2912
+ numExperts: number;
2913
+ numExpertsPerTok: number;
2914
+ decoderSparseStep: number;
2915
+ sharedExpertIntermediateSize?: number | undefined;
2916
+ moeIntermediateSize?: number | undefined;
2917
+ normTopkProb: boolean;
2918
+ mlpOnlyLayers?: number[] | undefined;
2919
+ }
2920
+
2921
+ /** Generation configuration for Qwen3.5 MoE */
2922
+ export interface Qwen35MoeGenerationConfig {
2923
+ maxNewTokens: number;
2924
+ temperature?: number | undefined;
2925
+ topK?: number | undefined;
2926
+ topP?: number | undefined;
2927
+ minP?: number | undefined;
2928
+ }
2929
+
2930
+ /** Generation result */
2931
+ export interface Qwen35MoeGenerationResult {
2932
+ tokens: Array<number>;
2933
+ text: string;
2934
+ numTokens: number;
2935
+ finishReason: string;
2936
+ }
2937
+
2305
2938
  /** Qwen3 model configuration */
2306
2939
  export interface Qwen3Config {
2307
2940
  vocabSize: number;
@@ -2344,6 +2977,17 @@ export interface Qwen3Config {
2344
2977
  useFp8Cache?: boolean | undefined;
2345
2978
  }
2346
2979
 
2980
+ /** Result of text recognition. */
2981
+ export interface RecResult {
2982
+ /** Recognized text */
2983
+ text: string;
2984
+ /** Confidence score (mean character probability) */
2985
+ score: number;
2986
+ }
2987
+
2988
+ /** Clear all collected profiling data and reset session timer. */
2989
+ export declare function resetProfilingData(): void;
2990
+
2347
2991
  /** Result of resume position computation */
2348
2992
  export interface ResumePosition {
2349
2993
  /** Epoch to start from (0-indexed) */
@@ -2416,6 +3060,20 @@ export interface SamplingConfig {
2416
3060
  minP?: number;
2417
3061
  }
2418
3062
 
3063
+ /**
3064
+ * Parse VLM output and save directly as XLSX file.
3065
+ *
3066
+ * Convenience function that parses VLM output and writes it to an XLSX file.
3067
+ *
3068
+ * # Example
3069
+ * ```typescript
3070
+ * import { saveToXlsx } from '@mlx-node/core';
3071
+ *
3072
+ * saveToXlsx(vlmResult.text, 'output.xlsx');
3073
+ * ```
3074
+ */
3075
+ export declare function saveToXlsx(text: string, filePath: string): void;
3076
+
2419
3077
  /** Scheduler statistics (NAPI-compatible) */
2420
3078
  export interface SchedulerStatsNapi {
2421
3079
  /** Number of requests waiting to be scheduled */
@@ -2432,6 +3090,9 @@ export interface SchedulerStatsNapi {
2432
3090
  totalRunningTokens: number;
2433
3091
  }
2434
3092
 
3093
+ /** Enable or disable profiling globally. */
3094
+ export declare function setProfilingEnabled(enabled: boolean): void;
3095
+
2435
3096
  /** Configuration for the SFT training engine */
2436
3097
  export interface SftEngineConfig {
2437
3098
  /** Learning rate (default: 2e-5) */
@@ -2461,6 +3122,11 @@ export interface SftEngineConfig {
2461
3122
  * per-element analysis - useful for debugging but has significant performance overhead.
2462
3123
  */
2463
3124
  verboseNanDetection?: boolean;
3125
+ /**
3126
+ * Enable gradient checkpointing to reduce memory (default: true)
3127
+ * Trades ~30% more compute for O(1) layer memory instead of O(num_layers).
3128
+ */
3129
+ gradientCheckpointing?: boolean;
2464
3130
  }
2465
3131
 
2466
3132
  /** Metrics from a training epoch */
@@ -2560,6 +3226,14 @@ export interface TableRow {
2560
3226
  cells: Array<TableCell>;
2561
3227
  }
2562
3228
 
3229
+ /** A detected text bounding box. */
3230
+ export interface TextBox {
3231
+ /** Bounding box in original image coordinates [x1, y1, x2, y2] */
3232
+ bbox: Array<number>;
3233
+ /** Detection confidence score (mean probability inside box) */
3234
+ score: number;
3235
+ }
3236
+
2563
3237
  /** Language model (text decoder) configuration */
2564
3238
  export interface TextConfig {
2565
3239
  modelType: string;
@@ -2682,6 +3356,12 @@ export interface TrainStepResultWithOutputs {
2682
3356
  completionLengths: Array<number>;
2683
3357
  }
2684
3358
 
3359
+ /** Result from document unwarping. */
3360
+ export interface UnwarpResult {
3361
+ /** Unwarped image as PNG bytes */
3362
+ image: Buffer;
3363
+ }
3364
+
2685
3365
  /** Vision encoder configuration */
2686
3366
  export interface VisionConfig {
2687
3367
  modelType: string;
@@ -2698,13 +3378,18 @@ export interface VisionConfig {
2698
3378
  spatialMergeSize: number;
2699
3379
  }
2700
3380
 
3381
+ /** A batch item for VLM batch inference */
3382
+ export interface VlmBatchItem {
3383
+ /** Chat messages for this item */
3384
+ messages: Array<VlmChatMessage>;
3385
+ /** Encoded image buffers for this item (one image per item for OCR) */
3386
+ images?: Array<Buffer>;
3387
+ }
3388
+
2701
3389
  /** Configuration for VLM chat */
2702
3390
  export interface VlmChatConfig {
2703
- /**
2704
- * Image paths to process (alternative to passing pre-processed images)
2705
- * These will be automatically processed using the ImageProcessor
2706
- */
2707
- imagePaths?: Array<string>;
3391
+ /** Encoded image buffers to process (PNG/JPEG bytes) */
3392
+ images?: Array<Buffer>;
2708
3393
  /** Maximum number of new tokens to generate (default: 512) */
2709
3394
  maxNewTokens?: number;
2710
3395
  /** Sampling temperature (0 = greedy, higher = more random) (default: 0.0 for OCR) */