@localmode/transformers 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,992 @@
1
+ import { EmbeddingModel, ClassificationModel, ZeroShotClassificationModel, NERModel, RerankerModel, SpeechToTextModel, ImageClassificationModel, ZeroShotImageClassificationModel, ImageCaptionModel, SegmentationModel, ObjectDetectionModel, ImageFeatureModel, ImageToImageModel, TextToSpeechModel, TranslationModel, SummarizationModel, FillMaskModel, QuestionAnsweringModel, OCRModel, DocumentQAModel, ClassificationResultItem, ClassificationUsage, NERResultItem, NERUsage, RankedDocument, RerankUsage, AudioInput, TranscriptionSegment, AudioUsage, ImageInput, ImageClassificationResultItem, VisionUsage, ZeroShotImageClassificationResultItem } from '@localmode/core';
2
+
3
+ /**
4
+ * Transformers Provider Types
5
+ *
6
+ * Provider-specific types for the Transformers.js integration.
7
+ *
8
+ * @packageDocumentation
9
+ */
10
+
11
+ /**
12
+ * Supported compute devices for Transformers.js.
13
+ */
14
+ type TransformersDevice = 'webgpu' | 'wasm' | 'cpu' | 'auto';
15
+ /**
16
+ * Progress callback for model loading.
17
+ * Status types include all possible states from the underlying transformers library.
18
+ */
19
+ interface ModelLoadProgress {
20
+ /** Current status */
21
+ status: 'initiate' | 'download' | 'progress' | 'done' | 'ready';
22
+ /** Model name being loaded */
23
+ name?: string;
24
+ /** File being downloaded */
25
+ file?: string;
26
+ /** Download progress (0-100) */
27
+ progress?: number;
28
+ /** Bytes loaded */
29
+ loaded?: number;
30
+ /** Total bytes */
31
+ total?: number;
32
+ }
33
+ /**
34
+ * Provider-level settings for all Transformers.js models.
35
+ */
36
+ interface TransformersProviderSettings {
37
+ /**
38
+ * Compute device to use.
39
+ * @default 'auto' (prefers WebGPU if available, falls back to WASM)
40
+ */
41
+ device?: TransformersDevice;
42
+ /**
43
+ * Whether to run inference in a Web Worker.
44
+ * This keeps the main thread responsive during model inference.
45
+ * @default false
46
+ */
47
+ useWorker?: boolean;
48
+ /**
49
+ * Progress callback for model loading.
50
+ */
51
+ onProgress?: (progress: ModelLoadProgress) => void;
52
+ /**
53
+ * Custom cache directory (for Node.js environments).
54
+ */
55
+ cacheDir?: string;
56
+ /**
57
+ * Whether to use quantized models for smaller size.
58
+ * @default true
59
+ */
60
+ quantized?: boolean;
61
+ }
62
+ /**
63
+ * Model-level settings that can override provider settings.
64
+ */
65
+ interface ModelSettings {
66
+ /**
67
+ * Override the compute device for this specific model.
68
+ */
69
+ device?: TransformersDevice;
70
+ /**
71
+ * Override worker usage for this specific model.
72
+ */
73
+ useWorker?: boolean;
74
+ /**
75
+ * Override quantization for this specific model.
76
+ */
77
+ quantized?: boolean;
78
+ /**
79
+ * Progress callback for this specific model.
80
+ */
81
+ onProgress?: (progress: ModelLoadProgress) => void;
82
+ }
83
+ /**
84
+ * The Transformers provider interface.
85
+ *
86
+ * Provides factory methods for all supported model types.
87
+ */
88
+ interface TransformersProvider {
89
+ /**
90
+ * Create an embedding model.
91
+ *
92
+ * @example
93
+ * ```ts
94
+ * const model = transformers.embedding('Xenova/all-MiniLM-L6-v2');
95
+ * const { embedding } = await embed({ model, value: 'Hello' });
96
+ * ```
97
+ */
98
+ embedding(modelId: string, settings?: ModelSettings): EmbeddingModel;
99
+ /**
100
+ * Create a text classification model.
101
+ *
102
+ * @example
103
+ * ```ts
104
+ * const model = transformers.classifier('Xenova/distilbert-sst-2');
105
+ * const { label } = await classify({ model, text: 'Great!' });
106
+ * ```
107
+ */
108
+ classifier(modelId: string, settings?: ModelSettings): ClassificationModel;
109
+ /**
110
+ * Create a zero-shot classification model.
111
+ *
112
+ * @example
113
+ * ```ts
114
+ * const model = transformers.zeroShot('Xenova/bart-large-mnli');
115
+ * const { labels } = await classifyZeroShot({
116
+ * model,
117
+ * text: 'I love pizza',
118
+ * candidateLabels: ['food', 'sports', 'politics'],
119
+ * });
120
+ * ```
121
+ */
122
+ zeroShot(modelId: string, settings?: ModelSettings): ZeroShotClassificationModel;
123
+ /**
124
+ * Create a named entity recognition model.
125
+ *
126
+ * @example
127
+ * ```ts
128
+ * const model = transformers.ner('Xenova/bert-base-NER');
129
+ * const { entities } = await extractEntities({ model, text: 'John works at Google' });
130
+ * ```
131
+ */
132
+ ner(modelId: string, settings?: ModelSettings): NERModel;
133
+ /**
134
+ * Create a reranker model.
135
+ *
136
+ * @example
137
+ * ```ts
138
+ * const model = transformers.reranker('Xenova/ms-marco-MiniLM-L-6-v2');
139
+ * const { results } = await rerank({ model, query: 'What is ML?', documents });
140
+ * ```
141
+ */
142
+ reranker(modelId: string, settings?: ModelSettings): RerankerModel;
143
+ /**
144
+ * Create a speech-to-text model.
145
+ *
146
+ * @example
147
+ * ```ts
148
+ * const model = transformers.speechToText('Xenova/whisper-tiny');
149
+ * const { text } = await transcribe({ model, audio: audioBlob });
150
+ * ```
151
+ */
152
+ speechToText(modelId: string, settings?: ModelSettings): SpeechToTextModel;
153
+ /**
154
+ * Create an image classification model.
155
+ *
156
+ * @example
157
+ * ```ts
158
+ * const model = transformers.imageClassifier('Xenova/vit-base-patch16-224');
159
+ * const { predictions } = await classifyImage({ model, image: imageBlob });
160
+ * ```
161
+ */
162
+ imageClassifier(modelId: string, settings?: ModelSettings): ImageClassificationModel;
163
+ /**
164
+ * Create a zero-shot image classification model (e.g., CLIP).
165
+ *
166
+ * @example
167
+ * ```ts
168
+ * const model = transformers.zeroShotImageClassifier('Xenova/clip-vit-base-patch32');
169
+ * const { labels, scores } = await classifyImageZeroShot({
170
+ * model,
171
+ * image: imageBlob,
172
+ * candidateLabels: ['cat', 'dog', 'bird'],
173
+ * });
174
+ * ```
175
+ */
176
+ zeroShotImageClassifier(modelId: string, settings?: ModelSettings): ZeroShotImageClassificationModel;
177
+ /**
178
+ * Create an image captioning model (e.g., BLIP).
179
+ *
180
+ * @example
181
+ * ```ts
182
+ * const model = transformers.captioner('Xenova/blip-image-captioning-base');
183
+ * const { caption } = await captionImage({ model, image: imageBlob });
184
+ * ```
185
+ */
186
+ captioner(modelId: string, settings?: ModelSettings): ImageCaptionModel;
187
+ /**
188
+ * Create an image segmentation model.
189
+ *
190
+ * @example
191
+ * ```ts
192
+ * const model = transformers.segmenter('Xenova/segformer-b0-finetuned-ade-512-512');
193
+ * const { masks } = await segmentImage({ model, image: imageBlob });
194
+ * ```
195
+ */
196
+ segmenter(modelId: string, settings?: ModelSettings): SegmentationModel;
197
+ /**
198
+ * Create an object detection model.
199
+ *
200
+ * @example
201
+ * ```ts
202
+ * const model = transformers.objectDetector('Xenova/detr-resnet-50');
203
+ * const { objects } = await detectObjects({ model, image: imageBlob });
204
+ * ```
205
+ */
206
+ objectDetector(modelId: string, settings?: ModelSettings): ObjectDetectionModel;
207
+ /**
208
+ * Create an image feature extraction model (e.g., CLIP, DINOv2).
209
+ *
210
+ * @example
211
+ * ```ts
212
+ * const model = transformers.imageFeatures('Xenova/clip-vit-base-patch32');
213
+ * const { features } = await extractImageFeatures({ model, image: imageBlob });
214
+ * ```
215
+ */
216
+ imageFeatures(modelId: string, settings?: ModelSettings): ImageFeatureModel;
217
+ /**
218
+ * Create an image-to-image model (e.g., super resolution).
219
+ *
220
+ * @example
221
+ * ```ts
222
+ * const model = transformers.imageToImage('Xenova/swin2SR-classical-sr-x2-64');
223
+ * const { image } = await imageToImage({ model, image: imageBlob });
224
+ * ```
225
+ */
226
+ imageToImage(modelId: string, settings?: ModelSettings): ImageToImageModel;
227
+ /**
228
+ * Create a text-to-speech model.
229
+ *
230
+ * @example
231
+ * ```ts
232
+ * const model = transformers.textToSpeech('Xenova/speecht5-tts');
233
+ * const { audio } = await synthesizeSpeech({ model, text: 'Hello world' });
234
+ * ```
235
+ */
236
+ textToSpeech(modelId: string, settings?: ModelSettings): TextToSpeechModel;
237
+ /**
238
+ * Create a translation model.
239
+ *
240
+ * @example
241
+ * ```ts
242
+ * const model = transformers.translator('Xenova/opus-mt-en-de');
243
+ * const { text } = await translate({ model, text: 'Hello', targetLanguage: 'de' });
244
+ * ```
245
+ */
246
+ translator(modelId: string, settings?: ModelSettings): TranslationModel;
247
+ /**
248
+ * Create a summarization model.
249
+ *
250
+ * @example
251
+ * ```ts
252
+ * const model = transformers.summarizer('Xenova/bart-large-cnn');
253
+ * const { text } = await summarize({ model, text: longText });
254
+ * ```
255
+ */
256
+ summarizer(modelId: string, settings?: ModelSettings): SummarizationModel;
257
+ /**
258
+ * Create a fill-mask model.
259
+ *
260
+ * @example
261
+ * ```ts
262
+ * const model = transformers.fillMask('Xenova/bert-base-uncased');
263
+ * const { predictions } = await fillMask({ model, text: 'The capital of France is [MASK].' });
264
+ * ```
265
+ */
266
+ fillMask(modelId: string, settings?: ModelSettings): FillMaskModel;
267
+ /**
268
+ * Create a question answering model.
269
+ *
270
+ * @example
271
+ * ```ts
272
+ * const model = transformers.questionAnswering('Xenova/distilbert-base-cased-distilled-squad');
273
+ * const { answers } = await answerQuestion({ model, question: 'What is ML?', context });
274
+ * ```
275
+ */
276
+ questionAnswering(modelId: string, settings?: ModelSettings): QuestionAnsweringModel;
277
+ /**
278
+ * Create an OCR model (TrOCR).
279
+ *
280
+ * @example
281
+ * ```ts
282
+ * const model = transformers.ocr('Xenova/trocr-base-handwritten');
283
+ * const { fullText } = await extractText({ model, image: imageBlob });
284
+ * ```
285
+ */
286
+ ocr(modelId: string, settings?: ModelSettings): OCRModel;
287
+ /**
288
+ * Create a document QA model.
289
+ *
290
+ * @example
291
+ * ```ts
292
+ * const model = transformers.documentQA('Xenova/donut-base-finetuned-docvqa');
293
+ * const { answers } = await askDocument({ model, question: 'What is the total?', document });
294
+ * ```
295
+ */
296
+ documentQA(modelId: string, settings?: ModelSettings): DocumentQAModel;
297
+ }
298
+
299
+ /**
300
+ * Transformers Provider
301
+ *
302
+ * Factory for creating Transformers.js model instances.
303
+ *
304
+ * @packageDocumentation
305
+ */
306
+
307
+ /**
308
+ * Create a Transformers.js provider with custom settings.
309
+ *
310
+ * @param settings - Provider-level settings that apply to all models
311
+ * @returns A TransformersProvider instance
312
+ *
313
+ * @example Basic usage
314
+ * ```ts
315
+ * import { createTransformers } from '@localmode/transformers';
316
+ *
317
+ * const myTransformers = createTransformers({
318
+ * device: 'webgpu',
319
+ * onProgress: (p) => console.log(`Loading: ${p.progress}%`),
320
+ * });
321
+ *
322
+ * const embedder = myTransformers.embedding('Xenova/all-MiniLM-L6-v2');
323
+ * ```
324
+ *
325
+ * @example With worker
326
+ * ```ts
327
+ * const workerTransformers = createTransformers({
328
+ * useWorker: true,
329
+ * });
330
+ * ```
331
+ */
332
+ declare function createTransformers(settings?: TransformersProviderSettings): TransformersProvider;
333
+ /**
334
+ * Default Transformers.js provider instance.
335
+ *
336
+ * Use this for quick access without custom configuration.
337
+ *
338
+ * @example
339
+ * ```ts
340
+ * import { transformers } from '@localmode/transformers';
341
+ * import { embed } from '@localmode/core';
342
+ *
343
+ * const { embedding } = await embed({
344
+ * model: transformers.embedding('Xenova/all-MiniLM-L6-v2'),
345
+ * value: 'Hello world',
346
+ * });
347
+ * ```
348
+ */
349
+ declare const transformers: TransformersProvider;
350
+
351
+ /**
352
+ * Transformers Utilities
353
+ *
354
+ * Utility functions for the Transformers.js provider.
355
+ *
356
+ * @packageDocumentation
357
+ */
358
+
359
+ /**
360
+ * Check if WebGPU is available in the current environment.
361
+ *
362
+ * @returns Promise<boolean> True if WebGPU is available
363
+ *
364
+ * @example
365
+ * ```ts
366
+ * if (await isWebGPUAvailable()) {
367
+ * console.log('Using WebGPU for acceleration');
368
+ * } else {
369
+ * console.log('Falling back to WASM');
370
+ * }
371
+ * ```
372
+ */
373
+ declare function isWebGPUAvailable(): Promise<boolean>;
374
+ /**
375
+ * Get the optimal device based on environment capabilities.
376
+ *
377
+ * @returns Promise with the recommended device type
378
+ */
379
+ declare function getOptimalDevice(): Promise<'webgpu' | 'wasm'>;
380
+ /**
381
+ * Check if a model is already cached locally.
382
+ *
383
+ * @param modelId - The model ID to check
384
+ * @returns Promise<boolean> True if the model is cached
385
+ *
386
+ * @example
387
+ * ```ts
388
+ * if (await isModelCached('Xenova/all-MiniLM-L6-v2')) {
389
+ * console.log('Model is ready');
390
+ * } else {
391
+ * console.log('Model will be downloaded on first use');
392
+ * }
393
+ * ```
394
+ */
395
+ declare function isModelCached(modelId: string): Promise<boolean>;
396
+ /**
397
+ * Preload a model to cache it for offline use.
398
+ *
399
+ * This downloads and caches the model without actually using it,
400
+ * allowing for faster subsequent loads.
401
+ *
402
+ * @param modelId - The model ID to preload
403
+ * @param options - Preload options
404
+ *
405
+ * @example
406
+ * ```ts
407
+ * // Preload during app initialization
408
+ * await preloadModel('Xenova/all-MiniLM-L6-v2', {
409
+ * onProgress: (p) => console.log(`${p.progress}%`),
410
+ * });
411
+ * ```
412
+ */
413
+ declare function preloadModel(modelId: string, options?: {
414
+ onProgress?: (progress: ModelLoadProgress) => void;
415
+ quantized?: boolean;
416
+ }): Promise<void>;
417
+ /**
418
+ * Clear all cached models from the browser.
419
+ *
420
+ * @returns Promise<boolean> True if cache was cleared successfully
421
+ */
422
+ declare function clearModelCache(): Promise<boolean>;
423
+ /**
424
+ * Get an estimate of cached model storage usage.
425
+ *
426
+ * @returns Promise with storage estimate in bytes
427
+ */
428
+ declare function getModelStorageUsage(): Promise<number>;
429
+
430
+ /**
431
+ * Popular Model Constants
432
+ *
433
+ * Pre-defined model IDs for commonly used models.
434
+ *
435
+ * @packageDocumentation
436
+ */
437
+ /**
438
+ * Popular embedding models.
439
+ */
440
+ declare const EMBEDDING_MODELS: {
441
+ /** Small, fast, general-purpose embeddings (384 dimensions, ~22MB) */
442
+ readonly ALL_MINILM_L6_V2: "Xenova/all-MiniLM-L6-v2";
443
+ /** Multilingual embeddings for 50+ languages (384 dimensions, ~120MB) */
444
+ readonly PARAPHRASE_MULTILINGUAL_MINILM: "Xenova/paraphrase-multilingual-MiniLM-L12-v2";
445
+ /** Higher quality but larger (768 dimensions, ~420MB) */
446
+ readonly ALL_MPNET_BASE_V2: "Xenova/all-mpnet-base-v2";
447
+ /** BGE embeddings - excellent for RAG (384 dimensions, ~33MB) */
448
+ readonly BGE_SMALL_EN: "Xenova/bge-small-en-v1.5";
449
+ /** BGE base model - better quality (768 dimensions, ~110MB) */
450
+ readonly BGE_BASE_EN: "Xenova/bge-base-en-v1.5";
451
+ };
452
+ /**
453
+ * Popular text classification models.
454
+ */
455
+ declare const CLASSIFICATION_MODELS: {
456
+ /** Fast sentiment analysis (POSITIVE/NEGATIVE) */
457
+ readonly DISTILBERT_SST2: "Xenova/distilbert-base-uncased-finetuned-sst-2-english";
458
+ /** Twitter sentiment (positive/neutral/negative) */
459
+ readonly TWITTER_ROBERTA_SENTIMENT: "Xenova/twitter-roberta-base-sentiment-latest";
460
+ /** Emotion detection */
461
+ readonly DISTILROBERTA_EMOTION: "Xenova/distilroberta-base-emotion";
462
+ };
463
+ /**
464
+ * Popular zero-shot classification models.
465
+ */
466
+ declare const ZERO_SHOT_MODELS: {
467
+ /** BART-based zero-shot classification */
468
+ readonly BART_LARGE_MNLI: "Xenova/bart-large-mnli";
469
+ /** Smaller, faster zero-shot model */
470
+ readonly DISTILBART_MNLI: "Xenova/distilbart-mnli-12-3";
471
+ };
472
+ /**
473
+ * Popular NER (Named Entity Recognition) models.
474
+ */
475
+ declare const NER_MODELS: {
476
+ /** Standard NER: PERSON, ORG, LOC, MISC */
477
+ readonly BERT_BASE_NER: "Xenova/bert-base-NER";
478
+ /** Multilingual NER */
479
+ readonly XLM_ROBERTA_NER: "Xenova/xlm-roberta-large-finetuned-conll03-english";
480
+ };
481
+ /**
482
+ * Popular reranking models.
483
+ */
484
+ declare const RERANKER_MODELS: {
485
+ /** Fast, small reranker (~22MB) */
486
+ readonly MS_MARCO_MINILM_L6: "Xenova/ms-marco-MiniLM-L-6-v2";
487
+ /** Higher quality reranker */
488
+ readonly BGE_RERANKER_BASE: "Xenova/bge-reranker-base";
489
+ };
490
+ /**
491
+ * Popular speech-to-text models.
492
+ */
493
+ declare const SPEECH_TO_TEXT_MODELS: {
494
+ /** Smallest Whisper - fastest, good for short audio (~70MB) */
495
+ readonly WHISPER_TINY: "Xenova/whisper-tiny";
496
+ /** Small Whisper - better accuracy (~240MB) */
497
+ readonly WHISPER_SMALL: "Xenova/whisper-small";
498
+ /** Base Whisper - balanced (~140MB) */
499
+ readonly WHISPER_BASE: "Xenova/whisper-base";
500
+ /** Distilled Whisper - fast and accurate */
501
+ readonly DISTIL_WHISPER_SMALL: "Xenova/distil-whisper-small.en";
502
+ };
503
+ /**
504
+ * Popular image classification models.
505
+ */
506
+ declare const IMAGE_CLASSIFICATION_MODELS: {
507
+ /** ViT base model - ImageNet classes */
508
+ readonly VIT_BASE_PATCH16: "Xenova/vit-base-patch16-224";
509
+ /** Smaller ViT model */
510
+ readonly VIT_SMALL_PATCH16: "Xenova/vit-small-patch16-224";
511
+ /** ResNet alternative */
512
+ readonly RESNET_50: "Xenova/resnet-50";
513
+ };
514
+ /**
515
+ * Popular zero-shot image classification models.
516
+ */
517
+ declare const ZERO_SHOT_IMAGE_MODELS: {
518
+ /** CLIP ViT-Base - versatile zero-shot image classification */
519
+ readonly CLIP_VIT_BASE_PATCH32: "Xenova/clip-vit-base-patch32";
520
+ /** Larger CLIP model with better accuracy */
521
+ readonly CLIP_VIT_LARGE_PATCH14: "Xenova/clip-vit-large-patch14";
522
+ /** SigLIP - improved CLIP variant */
523
+ readonly SIGLIP_BASE_PATCH16: "Xenova/siglip-base-patch16-224";
524
+ };
525
+ /**
526
+ * Popular image captioning models.
527
+ */
528
+ declare const IMAGE_CAPTION_MODELS: {
529
+ /** BLIP base - fast, good quality captions */
530
+ readonly BLIP_BASE: "Xenova/blip-image-captioning-base";
531
+ /** BLIP large - better quality, slower */
532
+ readonly BLIP_LARGE: "Xenova/blip-image-captioning-large";
533
+ /** GIT base - alternative captioning model */
534
+ readonly GIT_BASE: "Xenova/git-base-coco";
535
+ };
536
+ /**
537
+ * All popular models organized by task.
538
+ */
539
+ declare const MODELS: {
540
+ readonly embedding: {
541
+ /** Small, fast, general-purpose embeddings (384 dimensions, ~22MB) */
542
+ readonly ALL_MINILM_L6_V2: "Xenova/all-MiniLM-L6-v2";
543
+ /** Multilingual embeddings for 50+ languages (384 dimensions, ~120MB) */
544
+ readonly PARAPHRASE_MULTILINGUAL_MINILM: "Xenova/paraphrase-multilingual-MiniLM-L12-v2";
545
+ /** Higher quality but larger (768 dimensions, ~420MB) */
546
+ readonly ALL_MPNET_BASE_V2: "Xenova/all-mpnet-base-v2";
547
+ /** BGE embeddings - excellent for RAG (384 dimensions, ~33MB) */
548
+ readonly BGE_SMALL_EN: "Xenova/bge-small-en-v1.5";
549
+ /** BGE base model - better quality (768 dimensions, ~110MB) */
550
+ readonly BGE_BASE_EN: "Xenova/bge-base-en-v1.5";
551
+ };
552
+ readonly classification: {
553
+ /** Fast sentiment analysis (POSITIVE/NEGATIVE) */
554
+ readonly DISTILBERT_SST2: "Xenova/distilbert-base-uncased-finetuned-sst-2-english";
555
+ /** Twitter sentiment (positive/neutral/negative) */
556
+ readonly TWITTER_ROBERTA_SENTIMENT: "Xenova/twitter-roberta-base-sentiment-latest";
557
+ /** Emotion detection */
558
+ readonly DISTILROBERTA_EMOTION: "Xenova/distilroberta-base-emotion";
559
+ };
560
+ readonly zeroShot: {
561
+ /** BART-based zero-shot classification */
562
+ readonly BART_LARGE_MNLI: "Xenova/bart-large-mnli";
563
+ /** Smaller, faster zero-shot model */
564
+ readonly DISTILBART_MNLI: "Xenova/distilbart-mnli-12-3";
565
+ };
566
+ readonly ner: {
567
+ /** Standard NER: PERSON, ORG, LOC, MISC */
568
+ readonly BERT_BASE_NER: "Xenova/bert-base-NER";
569
+ /** Multilingual NER */
570
+ readonly XLM_ROBERTA_NER: "Xenova/xlm-roberta-large-finetuned-conll03-english";
571
+ };
572
+ readonly reranker: {
573
+ /** Fast, small reranker (~22MB) */
574
+ readonly MS_MARCO_MINILM_L6: "Xenova/ms-marco-MiniLM-L-6-v2";
575
+ /** Higher quality reranker */
576
+ readonly BGE_RERANKER_BASE: "Xenova/bge-reranker-base";
577
+ };
578
+ readonly speechToText: {
579
+ /** Smallest Whisper - fastest, good for short audio (~70MB) */
580
+ readonly WHISPER_TINY: "Xenova/whisper-tiny";
581
+ /** Small Whisper - better accuracy (~240MB) */
582
+ readonly WHISPER_SMALL: "Xenova/whisper-small";
583
+ /** Base Whisper - balanced (~140MB) */
584
+ readonly WHISPER_BASE: "Xenova/whisper-base";
585
+ /** Distilled Whisper - fast and accurate */
586
+ readonly DISTIL_WHISPER_SMALL: "Xenova/distil-whisper-small.en";
587
+ };
588
+ readonly imageClassification: {
589
+ /** ViT base model - ImageNet classes */
590
+ readonly VIT_BASE_PATCH16: "Xenova/vit-base-patch16-224";
591
+ /** Smaller ViT model */
592
+ readonly VIT_SMALL_PATCH16: "Xenova/vit-small-patch16-224";
593
+ /** ResNet alternative */
594
+ readonly RESNET_50: "Xenova/resnet-50";
595
+ };
596
+ readonly zeroShotImage: {
597
+ /** CLIP ViT-Base - versatile zero-shot image classification */
598
+ readonly CLIP_VIT_BASE_PATCH32: "Xenova/clip-vit-base-patch32";
599
+ /** Larger CLIP model with better accuracy */
600
+ readonly CLIP_VIT_LARGE_PATCH14: "Xenova/clip-vit-large-patch14";
601
+ /** SigLIP - improved CLIP variant */
602
+ readonly SIGLIP_BASE_PATCH16: "Xenova/siglip-base-patch16-224";
603
+ };
604
+ readonly imageCaption: {
605
+ /** BLIP base - fast, good quality captions */
606
+ readonly BLIP_BASE: "Xenova/blip-image-captioning-base";
607
+ /** BLIP large - better quality, slower */
608
+ readonly BLIP_LARGE: "Xenova/blip-image-captioning-large";
609
+ /** GIT base - alternative captioning model */
610
+ readonly GIT_BASE: "Xenova/git-base-coco";
611
+ };
612
+ };
613
+
614
+ /**
615
+ * Transformers Embedding Model Implementation
616
+ *
617
+ * Implements EmbeddingModel interface using Transformers.js
618
+ *
619
+ * @packageDocumentation
620
+ */
621
+
622
+ /**
623
+ * Embedding model implementation using Transformers.js
624
+ */
625
+ declare class TransformersEmbeddingModel implements EmbeddingModel {
626
+ private baseModelId;
627
+ private settings;
628
+ readonly modelId: string;
629
+ readonly provider = "transformers";
630
+ readonly dimensions: number;
631
+ readonly maxEmbeddingsPerCall = 128;
632
+ readonly supportsParallelCalls = false;
633
+ private pipeline;
634
+ private loadPromise;
635
+ constructor(baseModelId: string, settings?: {
636
+ device?: TransformersDevice;
637
+ quantized?: boolean;
638
+ onProgress?: (progress: ModelLoadProgress) => void;
639
+ });
640
+ /**
641
+ * Estimate dimensions from model ID.
642
+ */
643
+ private getDimensionsFromModelId;
644
+ /**
645
+ * Load the pipeline if not already loaded.
646
+ */
647
+ private loadPipeline;
648
+ doEmbed(options: {
649
+ values: string[];
650
+ abortSignal?: AbortSignal;
651
+ headers?: Record<string, string>;
652
+ providerOptions?: Record<string, Record<string, unknown>>;
653
+ }): Promise<{
654
+ embeddings: Float32Array[];
655
+ usage: {
656
+ tokens: number;
657
+ };
658
+ response: {
659
+ id?: string;
660
+ modelId: string;
661
+ timestamp: Date;
662
+ };
663
+ }>;
664
+ }
665
+
666
+ /**
667
+ * Transformers Classification Model Implementation
668
+ *
669
+ * Implements ClassificationModel interface using Transformers.js
670
+ *
671
+ * @packageDocumentation
672
+ */
673
+
674
+ /**
675
+ * Text classification model implementation using Transformers.js
676
+ */
677
+ declare class TransformersClassificationModel implements ClassificationModel {
678
+ private baseModelId;
679
+ private settings;
680
+ readonly modelId: string;
681
+ readonly provider = "transformers";
682
+ readonly labels: string[];
683
+ private pipeline;
684
+ private loadPromise;
685
+ constructor(baseModelId: string, settings?: {
686
+ device?: TransformersDevice;
687
+ quantized?: boolean;
688
+ onProgress?: (progress: ModelLoadProgress) => void;
689
+ });
690
+ private loadPipeline;
691
+ doClassify(options: {
692
+ texts: string[];
693
+ abortSignal?: AbortSignal;
694
+ headers?: Record<string, string>;
695
+ providerOptions?: Record<string, Record<string, unknown>>;
696
+ }): Promise<{
697
+ results: ClassificationResultItem[];
698
+ usage: ClassificationUsage;
699
+ }>;
700
+ }
701
+
702
+ /**
703
+ * Transformers Zero-Shot Classification Model Implementation
704
+ *
705
+ * Implements ZeroShotClassificationModel interface using Transformers.js
706
+ *
707
+ * @packageDocumentation
708
+ */
709
+
710
+ /**
711
+ * Zero-shot classification model implementation using Transformers.js
712
+ */
713
+ declare class TransformersZeroShotModel implements ZeroShotClassificationModel {
714
+ private baseModelId;
715
+ private settings;
716
+ readonly modelId: string;
717
+ readonly provider = "transformers";
718
+ private pipeline;
719
+ private loadPromise;
720
+ constructor(baseModelId: string, settings?: {
721
+ device?: TransformersDevice;
722
+ quantized?: boolean;
723
+ onProgress?: (progress: ModelLoadProgress) => void;
724
+ });
725
+ private loadPipeline;
726
+ doClassifyZeroShot(options: {
727
+ texts: string[];
728
+ candidateLabels: string[];
729
+ multiLabel?: boolean;
730
+ abortSignal?: AbortSignal;
731
+ headers?: Record<string, string>;
732
+ providerOptions?: Record<string, Record<string, unknown>>;
733
+ }): Promise<{
734
+ results: Array<{
735
+ labels: string[];
736
+ scores: number[];
737
+ }>;
738
+ usage: ClassificationUsage;
739
+ }>;
740
+ }
741
+
742
+ /**
743
+ * Transformers NER Model Implementation
744
+ *
745
+ * Implements NERModel interface using Transformers.js
746
+ *
747
+ * @packageDocumentation
748
+ */
749
+
750
+ /**
751
+ * NER model implementation using Transformers.js
752
+ */
753
+ declare class TransformersNERModel implements NERModel {
754
+ private baseModelId;
755
+ private settings;
756
+ readonly modelId: string;
757
+ readonly provider = "transformers";
758
+ readonly entityTypes: string[];
759
+ private pipeline;
760
+ private loadPromise;
761
+ constructor(baseModelId: string, settings?: {
762
+ device?: TransformersDevice;
763
+ quantized?: boolean;
764
+ onProgress?: (progress: ModelLoadProgress) => void;
765
+ });
766
+ private loadPipeline;
767
+ doExtract(options: {
768
+ texts: string[];
769
+ abortSignal?: AbortSignal;
770
+ headers?: Record<string, string>;
771
+ providerOptions?: Record<string, Record<string, unknown>>;
772
+ }): Promise<{
773
+ results: NERResultItem[];
774
+ usage: NERUsage;
775
+ }>;
776
+ }
777
+
778
+ /**
779
+ * Transformers Reranker Model Implementation
780
+ *
781
+ * Implements RerankerModel interface using Transformers.js
782
+ *
783
+ * @packageDocumentation
784
+ */
785
+
786
+ /**
787
+ * Reranker model implementation using Transformers.js
788
+ *
789
+ * Rerankers score query-document pairs for relevance. They're typically
790
+ * used after initial retrieval to improve result quality.
791
+ */
792
+ declare class TransformersRerankerModel implements RerankerModel {
793
+ private baseModelId;
794
+ private settings;
795
+ readonly modelId: string;
796
+ readonly provider = "transformers";
797
+ private pipeline;
798
+ private loadPromise;
799
+ constructor(baseModelId: string, settings?: {
800
+ device?: TransformersDevice;
801
+ quantized?: boolean;
802
+ onProgress?: (progress: ModelLoadProgress) => void;
803
+ });
804
+ private loadPipeline;
805
+ doRerank(options: {
806
+ query: string;
807
+ documents: string[];
808
+ topK?: number;
809
+ abortSignal?: AbortSignal;
810
+ headers?: Record<string, string>;
811
+ providerOptions?: Record<string, Record<string, unknown>>;
812
+ }): Promise<{
813
+ results: RankedDocument[];
814
+ usage: RerankUsage;
815
+ }>;
816
+ }
817
+
818
+ /**
819
+ * Transformers Speech-to-Text Model Implementation
820
+ *
821
+ * Implements SpeechToTextModel interface using Transformers.js
822
+ *
823
+ * @packageDocumentation
824
+ */
825
+
826
+ /**
827
+ * Speech-to-text model implementation using Transformers.js (Whisper)
828
+ */
829
+ declare class TransformersSpeechToTextModel implements SpeechToTextModel {
830
+ private baseModelId;
831
+ private settings;
832
+ readonly modelId: string;
833
+ readonly provider = "transformers";
834
+ readonly languages: string[];
835
+ private pipeline;
836
+ private loadPromise;
837
+ constructor(baseModelId: string, settings?: {
838
+ device?: TransformersDevice;
839
+ quantized?: boolean;
840
+ onProgress?: (progress: ModelLoadProgress) => void;
841
+ });
842
+ private loadPipeline;
843
+ private objectUrls;
844
+ /**
845
+ * Convert Float32Array PCM samples to a WAV blob URL.
846
+ * This ensures transformers.js correctly interprets the audio format.
847
+ */
848
+ private createWavUrl;
849
+ /**
850
+ * Convert AudioInput to a format Transformers.js can process.
851
+ * For raw PCM samples, converts to WAV URL to ensure correct interpretation.
852
+ */
853
+ private prepareAudio;
854
+ /**
855
+ * Cleanup any created Object URLs
856
+ */
857
+ private cleanupObjectUrls;
858
+ doTranscribe(options: {
859
+ audio: AudioInput;
860
+ language?: string;
861
+ task?: 'transcribe' | 'translate';
862
+ returnTimestamps?: boolean | 'word';
863
+ abortSignal?: AbortSignal;
864
+ headers?: Record<string, string>;
865
+ providerOptions?: Record<string, Record<string, unknown>>;
866
+ }): Promise<{
867
+ text: string;
868
+ segments?: TranscriptionSegment[];
869
+ language?: string;
870
+ usage: AudioUsage;
871
+ }>;
872
+ }
873
+
874
+ /**
875
+ * Transformers Image Classification Model Implementation
876
+ *
877
+ * Implements ImageClassificationModel interface using Transformers.js
878
+ *
879
+ * @packageDocumentation
880
+ */
881
+
882
+ /**
883
+ * Image classification model implementation using Transformers.js
884
+ */
885
+ declare class TransformersImageClassificationModel implements ImageClassificationModel {
886
+ private baseModelId;
887
+ private settings;
888
+ readonly modelId: string;
889
+ readonly provider = "transformers";
890
+ private pipeline;
891
+ private loadPromise;
892
+ constructor(baseModelId: string, settings?: {
893
+ device?: TransformersDevice;
894
+ quantized?: boolean;
895
+ onProgress?: (progress: ModelLoadProgress) => void;
896
+ });
897
+ private loadPipeline;
898
+ /**
899
+ * Convert ImageInput to a format Transformers.js can process
900
+ */
901
+ private prepareImage;
902
+ doClassify(options: {
903
+ images: ImageInput[];
904
+ topK?: number;
905
+ abortSignal?: AbortSignal;
906
+ providerOptions?: Record<string, Record<string, unknown>>;
907
+ }): Promise<{
908
+ results: ImageClassificationResultItem[][];
909
+ usage: VisionUsage;
910
+ }>;
911
+ }
912
+
913
+ /**
914
+ * Transformers Zero-Shot Image Classification Model Implementation
915
+ *
916
+ * Implements ZeroShotImageClassificationModel interface using Transformers.js (CLIP)
917
+ *
918
+ * @packageDocumentation
919
+ */
920
+
921
+ /**
922
+ * Zero-shot image classification model implementation using Transformers.js (CLIP)
923
+ */
924
+ declare class TransformersZeroShotImageModel implements ZeroShotImageClassificationModel {
925
+ private baseModelId;
926
+ private settings;
927
+ readonly modelId: string;
928
+ readonly provider = "transformers";
929
+ private pipeline;
930
+ private loadPromise;
931
+ constructor(baseModelId: string, settings?: {
932
+ device?: TransformersDevice;
933
+ quantized?: boolean;
934
+ onProgress?: (progress: ModelLoadProgress) => void;
935
+ });
936
+ private loadPipeline;
937
+ /**
938
+ * Convert ImageInput to a format Transformers.js can process
939
+ */
940
+ private prepareImage;
941
+ doClassifyZeroShot(options: {
942
+ images: ImageInput[];
943
+ candidateLabels: string[];
944
+ hypothesisTemplate?: string;
945
+ abortSignal?: AbortSignal;
946
+ providerOptions?: Record<string, Record<string, unknown>>;
947
+ }): Promise<{
948
+ results: ZeroShotImageClassificationResultItem[];
949
+ usage: VisionUsage;
950
+ }>;
951
+ }
952
+
953
+ /**
954
+ * Transformers Image Captioning Model Implementation
955
+ *
956
+ * Implements ImageCaptionModel interface using Transformers.js (BLIP)
957
+ *
958
+ * @packageDocumentation
959
+ */
960
+
961
+ /**
962
+ * Image captioning model implementation using Transformers.js (BLIP)
963
+ */
964
+ declare class TransformersCaptionModel implements ImageCaptionModel {
965
+ private baseModelId;
966
+ private settings;
967
+ readonly modelId: string;
968
+ readonly provider = "transformers";
969
+ private pipeline;
970
+ private loadPromise;
971
+ constructor(baseModelId: string, settings?: {
972
+ device?: TransformersDevice;
973
+ quantized?: boolean;
974
+ onProgress?: (progress: ModelLoadProgress) => void;
975
+ });
976
+ private loadPipeline;
977
+ /**
978
+ * Convert ImageInput to a format Transformers.js can process
979
+ */
980
+ private prepareImage;
981
+ doCaption(options: {
982
+ images: ImageInput[];
983
+ maxLength?: number;
984
+ abortSignal?: AbortSignal;
985
+ providerOptions?: Record<string, Record<string, unknown>>;
986
+ }): Promise<{
987
+ captions: string[];
988
+ usage: VisionUsage;
989
+ }>;
990
+ }
991
+
992
+ export { CLASSIFICATION_MODELS, EMBEDDING_MODELS, IMAGE_CAPTION_MODELS, IMAGE_CLASSIFICATION_MODELS, MODELS, type ModelLoadProgress, type ModelSettings, NER_MODELS, RERANKER_MODELS, SPEECH_TO_TEXT_MODELS, TransformersCaptionModel, TransformersClassificationModel, type TransformersDevice, TransformersEmbeddingModel, TransformersImageClassificationModel, TransformersNERModel, type TransformersProvider, type TransformersProviderSettings, TransformersRerankerModel, TransformersSpeechToTextModel, TransformersZeroShotImageModel, TransformersZeroShotModel, ZERO_SHOT_IMAGE_MODELS, ZERO_SHOT_MODELS, clearModelCache, createTransformers, getModelStorageUsage, getOptimalDevice, isModelCached, isWebGPUAvailable, preloadModel, transformers };