@huggingface/transformers 3.0.2 → 3.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (258) hide show
  1. package/README.md +13 -4
  2. package/dist/ort-wasm-simd-threaded.jsep.wasm +0 -0
  3. package/dist/transformers.cjs +16655 -13040
  4. package/dist/transformers.cjs.map +1 -1
  5. package/dist/transformers.js +17095 -13468
  6. package/dist/transformers.js.map +1 -1
  7. package/dist/transformers.min.cjs +244 -52
  8. package/dist/transformers.min.cjs.map +1 -1
  9. package/dist/transformers.min.js +235 -43
  10. package/dist/transformers.min.js.map +1 -1
  11. package/dist/transformers.min.mjs +246 -54
  12. package/dist/transformers.min.mjs.map +1 -1
  13. package/dist/transformers.mjs +16818 -13202
  14. package/dist/transformers.mjs.map +1 -1
  15. package/package.json +4 -4
  16. package/src/base/feature_extraction_utils.js +54 -0
  17. package/src/base/image_processors_utils.js +1089 -0
  18. package/src/base/processing_utils.js +145 -0
  19. package/src/configs.js +15 -4
  20. package/src/env.js +6 -6
  21. package/src/generation/configuration_utils.js +7 -0
  22. package/src/generation/logits_process.js +22 -16
  23. package/src/generation/streamers.js +7 -2
  24. package/src/models/audio_spectrogram_transformer/feature_extraction_audio_spectrogram_transformer.js +90 -0
  25. package/src/models/auto/feature_extraction_auto.js +41 -0
  26. package/src/models/auto/image_processing_auto.js +29 -0
  27. package/src/models/auto/processing_auto.js +100 -0
  28. package/src/models/beit/image_processing_beit.js +5 -0
  29. package/src/models/bit/image_processing_bit.js +5 -0
  30. package/src/models/chinese_clip/image_processing_chinese_clip.js +5 -0
  31. package/src/models/clap/feature_extraction_clap.js +159 -0
  32. package/src/models/clip/image_processing_clip.js +6 -0
  33. package/src/models/convnext/image_processing_convnext.js +45 -0
  34. package/src/models/deit/image_processing_deit.js +6 -0
  35. package/src/models/detr/image_processing_detr.js +52 -0
  36. package/src/models/donut/image_processing_donut.js +31 -0
  37. package/src/models/dpt/image_processing_dpt.js +6 -0
  38. package/src/models/efficientnet/image_processing_efficientnet.js +13 -0
  39. package/src/models/feature_extractors.js +12 -0
  40. package/src/models/florence2/processing_florence2.js +128 -0
  41. package/src/models/glpn/image_processing_glpn.js +5 -0
  42. package/src/models/idefics3/image_processing_idefics3.js +219 -0
  43. package/src/models/idefics3/processing_idefics3.js +136 -0
  44. package/src/models/image_processors.js +37 -0
  45. package/src/models/janus/image_processing_janus.js +26 -0
  46. package/src/models/janus/processing_janus.js +123 -0
  47. package/src/models/jina_clip/image_processing_jina_clip.js +26 -0
  48. package/src/models/jina_clip/processing_jina_clip.js +24 -0
  49. package/src/models/llava_onevision/image_processing_llava_onevision.js +5 -0
  50. package/src/models/mask2former/image_processing_mask2former.js +5 -0
  51. package/src/models/maskformer/image_processing_maskformer.js +18 -0
  52. package/src/models/mgp_str/processing_mgp_str.js +170 -0
  53. package/src/models/mobilenet_v1/image_processing_mobilenet_v1.js +7 -0
  54. package/src/models/mobilenet_v2/image_processing_mobilenet_v2.js +7 -0
  55. package/src/models/mobilenet_v3/image_processing_mobilenet_v3.js +7 -0
  56. package/src/models/mobilenet_v4/image_processing_mobilenet_v4.js +7 -0
  57. package/src/models/mobilevit/image_processing_mobilevit.js +6 -0
  58. package/src/models/nougat/image_processing_nougat.js +5 -0
  59. package/src/models/owlv2/image_processing_owlv2.js +5 -0
  60. package/src/models/owlvit/image_processing_owlvit.js +12 -0
  61. package/src/models/owlvit/processing_owlvit.js +7 -0
  62. package/src/models/processors.js +12 -0
  63. package/src/models/pvt/image_processing_pvt.js +5 -0
  64. package/src/models/pyannote/feature_extraction_pyannote.js +28 -0
  65. package/src/models/pyannote/processing_pyannote.js +71 -0
  66. package/src/models/qwen2_vl/image_processing_qwen2_vl.js +52 -0
  67. package/src/models/qwen2_vl/processing_qwen2_vl.js +52 -0
  68. package/src/models/rt_detr/image_processing_rt_detr.js +12 -0
  69. package/src/models/sam/image_processing_sam.js +242 -0
  70. package/src/models/sam/processing_sam.js +20 -0
  71. package/src/models/sapiens/image_processing_sapiens.js +13 -0
  72. package/src/models/seamless_m4t/feature_extraction_seamless_m4t.js +180 -0
  73. package/src/models/segformer/image_processing_segformer.js +13 -0
  74. package/src/models/siglip/image_processing_siglip.js +5 -0
  75. package/src/models/speecht5/feature_extraction_speecht5.js +4 -0
  76. package/src/models/speecht5/processing_speecht5.js +17 -0
  77. package/src/models/swin2sr/image_processing_swin2sr.js +24 -0
  78. package/src/models/vit/image_processing_vit.js +7 -0
  79. package/src/models/vitmatte/image_processing_vitmatte.js +50 -0
  80. package/src/models/vitpose/image_processing_vitpose.js +89 -0
  81. package/src/models/wav2vec2/feature_extraction_wav2vec2.js +44 -0
  82. package/src/models/wav2vec2/processing_wav2vec2.js +15 -0
  83. package/src/models/wespeaker/feature_extraction_wespeaker.js +100 -0
  84. package/src/models/whisper/feature_extraction_whisper.js +84 -0
  85. package/src/models/whisper/processing_whisper.js +21 -0
  86. package/src/models/yolos/image_processing_yolos.js +12 -0
  87. package/src/models.js +755 -34
  88. package/src/pipelines.js +8 -8
  89. package/src/tokenizers.js +5 -0
  90. package/src/transformers.js +15 -2
  91. package/src/utils/constants.js +8 -1
  92. package/src/utils/core.js +51 -9
  93. package/src/utils/dtypes.js +2 -1
  94. package/src/utils/hub.js +2 -1
  95. package/src/utils/image.js +87 -33
  96. package/src/utils/tensor.js +39 -2
  97. package/types/base/feature_extraction_utils.d.ts +41 -0
  98. package/types/base/feature_extraction_utils.d.ts.map +1 -0
  99. package/types/base/image_processors_utils.d.ts +323 -0
  100. package/types/base/image_processors_utils.d.ts.map +1 -0
  101. package/types/base/processing_utils.d.ts +80 -0
  102. package/types/base/processing_utils.d.ts.map +1 -0
  103. package/types/configs.d.ts +5 -2
  104. package/types/configs.d.ts.map +1 -1
  105. package/types/env.d.ts +1 -1
  106. package/types/env.d.ts.map +1 -1
  107. package/types/generation/configuration_utils.d.ts +6 -0
  108. package/types/generation/configuration_utils.d.ts.map +1 -1
  109. package/types/generation/logits_process.d.ts +30 -20
  110. package/types/generation/logits_process.d.ts.map +1 -1
  111. package/types/generation/streamers.d.ts +13 -8
  112. package/types/generation/streamers.d.ts.map +1 -1
  113. package/types/models/audio_spectrogram_transformer/feature_extraction_audio_spectrogram_transformer.d.ts +25 -0
  114. package/types/models/audio_spectrogram_transformer/feature_extraction_audio_spectrogram_transformer.d.ts.map +1 -0
  115. package/types/models/auto/feature_extraction_auto.d.ts +5 -0
  116. package/types/models/auto/feature_extraction_auto.d.ts.map +1 -0
  117. package/types/models/auto/image_processing_auto.d.ts +5 -0
  118. package/types/models/auto/image_processing_auto.d.ts.map +1 -0
  119. package/types/models/auto/processing_auto.d.ts +35 -0
  120. package/types/models/auto/processing_auto.d.ts.map +1 -0
  121. package/types/models/beit/image_processing_beit.d.ts +4 -0
  122. package/types/models/beit/image_processing_beit.d.ts.map +1 -0
  123. package/types/models/bit/image_processing_bit.d.ts +4 -0
  124. package/types/models/bit/image_processing_bit.d.ts.map +1 -0
  125. package/types/models/chinese_clip/image_processing_chinese_clip.d.ts +4 -0
  126. package/types/models/chinese_clip/image_processing_chinese_clip.d.ts.map +1 -0
  127. package/types/models/clap/feature_extraction_clap.d.ts +57 -0
  128. package/types/models/clap/feature_extraction_clap.d.ts.map +1 -0
  129. package/types/models/clip/image_processing_clip.d.ts +6 -0
  130. package/types/models/clip/image_processing_clip.d.ts.map +1 -0
  131. package/types/models/convnext/image_processing_convnext.d.ts +12 -0
  132. package/types/models/convnext/image_processing_convnext.d.ts.map +1 -0
  133. package/types/models/deit/image_processing_deit.d.ts +6 -0
  134. package/types/models/deit/image_processing_deit.d.ts.map +1 -0
  135. package/types/models/detr/image_processing_detr.d.ts +42 -0
  136. package/types/models/detr/image_processing_detr.d.ts.map +1 -0
  137. package/types/models/donut/image_processing_donut.d.ts +7 -0
  138. package/types/models/donut/image_processing_donut.d.ts.map +1 -0
  139. package/types/models/dpt/image_processing_dpt.d.ts +6 -0
  140. package/types/models/dpt/image_processing_dpt.d.ts.map +1 -0
  141. package/types/models/efficientnet/image_processing_efficientnet.d.ts +6 -0
  142. package/types/models/efficientnet/image_processing_efficientnet.d.ts.map +1 -0
  143. package/types/models/feature_extractors.d.ts +10 -0
  144. package/types/models/feature_extractors.d.ts.map +1 -0
  145. package/types/models/florence2/processing_florence2.d.ts +39 -0
  146. package/types/models/florence2/processing_florence2.d.ts.map +1 -0
  147. package/types/models/glpn/image_processing_glpn.d.ts +4 -0
  148. package/types/models/glpn/image_processing_glpn.d.ts.map +1 -0
  149. package/types/models/idefics3/image_processing_idefics3.d.ts +40 -0
  150. package/types/models/idefics3/image_processing_idefics3.d.ts.map +1 -0
  151. package/types/models/idefics3/processing_idefics3.d.ts +19 -0
  152. package/types/models/idefics3/processing_idefics3.d.ts.map +1 -0
  153. package/types/models/image_processors.d.ts +37 -0
  154. package/types/models/image_processors.d.ts.map +1 -0
  155. package/types/models/janus/image_processing_janus.d.ts +7 -0
  156. package/types/models/janus/image_processing_janus.d.ts.map +1 -0
  157. package/types/models/janus/processing_janus.d.ts +77 -0
  158. package/types/models/janus/processing_janus.d.ts.map +1 -0
  159. package/types/models/jina_clip/image_processing_jina_clip.d.ts +5 -0
  160. package/types/models/jina_clip/image_processing_jina_clip.d.ts.map +1 -0
  161. package/types/models/jina_clip/processing_jina_clip.d.ts +9 -0
  162. package/types/models/jina_clip/processing_jina_clip.d.ts.map +1 -0
  163. package/types/models/llava_onevision/image_processing_llava_onevision.d.ts +4 -0
  164. package/types/models/llava_onevision/image_processing_llava_onevision.d.ts.map +1 -0
  165. package/types/models/mask2former/image_processing_mask2former.d.ts +4 -0
  166. package/types/models/mask2former/image_processing_mask2former.d.ts.map +1 -0
  167. package/types/models/maskformer/image_processing_maskformer.d.ts +22 -0
  168. package/types/models/maskformer/image_processing_maskformer.d.ts.map +1 -0
  169. package/types/models/mgp_str/processing_mgp_str.d.ts +64 -0
  170. package/types/models/mgp_str/processing_mgp_str.d.ts.map +1 -0
  171. package/types/models/mobilenet_v1/image_processing_mobilenet_v1.d.ts +6 -0
  172. package/types/models/mobilenet_v1/image_processing_mobilenet_v1.d.ts.map +1 -0
  173. package/types/models/mobilenet_v2/image_processing_mobilenet_v2.d.ts +6 -0
  174. package/types/models/mobilenet_v2/image_processing_mobilenet_v2.d.ts.map +1 -0
  175. package/types/models/mobilenet_v3/image_processing_mobilenet_v3.d.ts +6 -0
  176. package/types/models/mobilenet_v3/image_processing_mobilenet_v3.d.ts.map +1 -0
  177. package/types/models/mobilenet_v4/image_processing_mobilenet_v4.d.ts +6 -0
  178. package/types/models/mobilenet_v4/image_processing_mobilenet_v4.d.ts.map +1 -0
  179. package/types/models/mobilevit/image_processing_mobilevit.d.ts +6 -0
  180. package/types/models/mobilevit/image_processing_mobilevit.d.ts.map +1 -0
  181. package/types/models/nougat/image_processing_nougat.d.ts +4 -0
  182. package/types/models/nougat/image_processing_nougat.d.ts.map +1 -0
  183. package/types/models/owlv2/image_processing_owlv2.d.ts +4 -0
  184. package/types/models/owlv2/image_processing_owlv2.d.ts.map +1 -0
  185. package/types/models/owlvit/image_processing_owlvit.d.ts +10 -0
  186. package/types/models/owlvit/image_processing_owlvit.d.ts.map +1 -0
  187. package/types/models/owlvit/processing_owlvit.d.ts +8 -0
  188. package/types/models/owlvit/processing_owlvit.d.ts.map +1 -0
  189. package/types/models/processors.d.ts +13 -0
  190. package/types/models/processors.d.ts.map +1 -0
  191. package/types/models/pvt/image_processing_pvt.d.ts +4 -0
  192. package/types/models/pvt/image_processing_pvt.d.ts.map +1 -0
  193. package/types/models/pyannote/feature_extraction_pyannote.d.ts +13 -0
  194. package/types/models/pyannote/feature_extraction_pyannote.d.ts.map +1 -0
  195. package/types/models/pyannote/processing_pyannote.d.ts +30 -0
  196. package/types/models/pyannote/processing_pyannote.d.ts.map +1 -0
  197. package/types/models/qwen2_vl/image_processing_qwen2_vl.d.ts +11 -0
  198. package/types/models/qwen2_vl/image_processing_qwen2_vl.d.ts.map +1 -0
  199. package/types/models/qwen2_vl/processing_qwen2_vl.d.ts +17 -0
  200. package/types/models/qwen2_vl/processing_qwen2_vl.d.ts.map +1 -0
  201. package/types/models/rt_detr/image_processing_rt_detr.d.ts +8 -0
  202. package/types/models/rt_detr/image_processing_rt_detr.d.ts.map +1 -0
  203. package/types/models/sam/image_processing_sam.d.ts +103 -0
  204. package/types/models/sam/image_processing_sam.d.ts.map +1 -0
  205. package/types/models/sam/processing_sam.d.ts +9 -0
  206. package/types/models/sam/processing_sam.d.ts.map +1 -0
  207. package/types/models/seamless_m4t/feature_extraction_seamless_m4t.d.ts +34 -0
  208. package/types/models/seamless_m4t/feature_extraction_seamless_m4t.d.ts.map +1 -0
  209. package/types/models/segformer/image_processing_segformer.d.ts +10 -0
  210. package/types/models/segformer/image_processing_segformer.d.ts.map +1 -0
  211. package/types/models/siglip/image_processing_siglip.d.ts +4 -0
  212. package/types/models/siglip/image_processing_siglip.d.ts.map +1 -0
  213. package/types/models/speecht5/feature_extraction_speecht5.d.ts +4 -0
  214. package/types/models/speecht5/feature_extraction_speecht5.d.ts.map +1 -0
  215. package/types/models/speecht5/processing_speecht5.d.ts +14 -0
  216. package/types/models/speecht5/processing_speecht5.d.ts.map +1 -0
  217. package/types/models/swin2sr/image_processing_swin2sr.d.ts +5 -0
  218. package/types/models/swin2sr/image_processing_swin2sr.d.ts.map +1 -0
  219. package/types/models/vit/image_processing_vit.d.ts +6 -0
  220. package/types/models/vit/image_processing_vit.d.ts.map +1 -0
  221. package/types/models/vitmatte/image_processing_vitmatte.d.ts +12 -0
  222. package/types/models/vitmatte/image_processing_vitmatte.d.ts.map +1 -0
  223. package/types/models/vitpose/image_processing_vitpose.d.ts +26 -0
  224. package/types/models/vitpose/image_processing_vitpose.d.ts.map +1 -0
  225. package/types/models/wav2vec2/feature_extraction_wav2vec2.d.ts +19 -0
  226. package/types/models/wav2vec2/feature_extraction_wav2vec2.d.ts.map +1 -0
  227. package/types/models/wav2vec2/processing_wav2vec2.d.ts +12 -0
  228. package/types/models/wav2vec2/processing_wav2vec2.d.ts.map +1 -0
  229. package/types/models/wespeaker/feature_extraction_wespeaker.d.ts +23 -0
  230. package/types/models/wespeaker/feature_extraction_wespeaker.d.ts.map +1 -0
  231. package/types/models/whisper/feature_extraction_whisper.d.ts +21 -0
  232. package/types/models/whisper/feature_extraction_whisper.d.ts.map +1 -0
  233. package/types/models/whisper/processing_whisper.d.ts +17 -0
  234. package/types/models/whisper/processing_whisper.d.ts.map +1 -0
  235. package/types/models/yolos/image_processing_yolos.d.ts +10 -0
  236. package/types/models/yolos/image_processing_yolos.d.ts.map +1 -0
  237. package/types/models.d.ts +150 -0
  238. package/types/models.d.ts.map +1 -1
  239. package/types/pipelines.d.ts +2 -3
  240. package/types/pipelines.d.ts.map +1 -1
  241. package/types/tokenizers.d.ts +3 -0
  242. package/types/tokenizers.d.ts.map +1 -1
  243. package/types/transformers.d.ts +10 -1
  244. package/types/utils/constants.d.ts +6 -0
  245. package/types/utils/constants.d.ts.map +1 -1
  246. package/types/utils/core.d.ts +65 -3
  247. package/types/utils/core.d.ts.map +1 -1
  248. package/types/utils/dtypes.d.ts +3 -2
  249. package/types/utils/dtypes.d.ts.map +1 -1
  250. package/types/utils/hub.d.ts +1 -1
  251. package/types/utils/hub.d.ts.map +1 -1
  252. package/types/utils/image.d.ts +14 -2
  253. package/types/utils/image.d.ts.map +1 -1
  254. package/types/utils/tensor.d.ts +39 -4
  255. package/types/utils/tensor.d.ts.map +1 -1
  256. package/src/processors.js +0 -2655
  257. package/types/processors.d.ts +0 -924
  258. package/types/processors.d.ts.map +0 -1
@@ -0,0 +1,145 @@
1
+
2
+ /**
3
+ * @file Processors are used to prepare inputs (e.g., text, image or audio) for a model.
4
+ *
5
+ * **Example:** Using a `WhisperProcessor` to prepare an audio input for a model.
6
+ * ```javascript
7
+ * import { AutoProcessor, read_audio } from '@huggingface/transformers';
8
+ *
9
+ * const processor = await AutoProcessor.from_pretrained('openai/whisper-tiny.en');
10
+ * const audio = await read_audio('https://huggingface.co/datasets/Narsil/asr_dummy/resolve/main/mlk.flac', 16000);
11
+ * const { input_features } = await processor(audio);
12
+ * // Tensor {
13
+ * // data: Float32Array(240000) [0.4752984642982483, 0.5597258806228638, 0.56434166431427, ...],
14
+ * // dims: [1, 80, 3000],
15
+ * // type: 'float32',
16
+ * // size: 240000,
17
+ * // }
18
+ * ```
19
+ *
20
+ * @module processors
21
+ */
22
+ import { PROCESSOR_NAME } from '../utils/constants.js';
23
+ import {
24
+ Callable,
25
+ } from '../utils/generic.js';
26
+ import { getModelJSON } from '../utils/hub.js';
27
+
28
+ /**
29
+ * @typedef {Object} ProcessorProperties Additional processor-specific properties.
30
+ * @typedef {import('../utils/hub.js').PretrainedOptions & ProcessorProperties} PretrainedProcessorOptions
31
+ */
32
+
33
+
34
+ /**
35
+ * Represents a Processor that extracts features from an input.
36
+ */
37
+ export class Processor extends Callable {
38
+ static classes = [
39
+ 'image_processor_class',
40
+ 'tokenizer_class',
41
+ 'feature_extractor_class',
42
+ ]
43
+ static uses_processor_config = false;
44
+
45
+ /**
46
+ * Creates a new Processor with the given components
47
+ * @param {Object} config
48
+ * @param {Record<string, Object>} components
49
+ */
50
+ constructor(config, components) {
51
+ super();
52
+ this.config = config;
53
+ this.components = components;
54
+ }
55
+
56
+ /**
57
+ * @returns {import('./image_processors_utils.js').ImageProcessor|undefined} The image processor of the processor, if it exists.
58
+ */
59
+ get image_processor() {
60
+ return this.components.image_processor;
61
+ }
62
+
63
+ /**
64
+ * @returns {import('../tokenizers.js').PreTrainedTokenizer|undefined} The tokenizer of the processor, if it exists.
65
+ */
66
+ get tokenizer() {
67
+ return this.components.tokenizer;
68
+ }
69
+
70
+ /**
71
+ * @returns {import('./feature_extraction_utils.js').FeatureExtractor|undefined} The feature extractor of the processor, if it exists.
72
+ */
73
+ get feature_extractor() {
74
+ return this.components.feature_extractor;
75
+ }
76
+
77
+ apply_chat_template(messages, options = {}) {
78
+ if (!this.tokenizer) {
79
+ throw new Error('Unable to apply chat template without a tokenizer.');
80
+ }
81
+ return this.tokenizer.apply_chat_template(messages, {
82
+ tokenize: false, // default to false
83
+ ...options,
84
+ });
85
+ }
86
+
87
+ batch_decode(...args) {
88
+ if (!this.tokenizer) {
89
+ throw new Error('Unable to decode without a tokenizer.');
90
+ }
91
+ return this.tokenizer.batch_decode(...args);
92
+ }
93
+
94
+
95
+ /**
96
+ * Calls the feature_extractor function with the given input.
97
+ * @param {any} input The input to extract features from.
98
+ * @param {...any} args Additional arguments.
99
+ * @returns {Promise<any>} A Promise that resolves with the extracted features.
100
+ */
101
+ async _call(input, ...args) {
102
+ for (const item of [this.image_processor, this.feature_extractor, this.tokenizer]) {
103
+ if (item) {
104
+ return item(input, ...args);
105
+ }
106
+ }
107
+ throw new Error('No image processor, feature extractor, or tokenizer found.');
108
+ }
109
+
110
+
111
+ /**
112
+ * Instantiate one of the processor classes of the library from a pretrained model.
113
+ *
114
+ * The processor class to instantiate is selected based on the `feature_extractor_type` property of the config object
115
+ * (either passed as an argument or loaded from `pretrained_model_name_or_path` if possible)
116
+ *
117
+ * @param {string} pretrained_model_name_or_path The name or path of the pretrained model. Can be either:
118
+ * - A string, the *model id* of a pretrained processor hosted inside a model repo on huggingface.co.
119
+ * Valid model ids can be located at the root-level, like `bert-base-uncased`, or namespaced under a
120
+ * user or organization name, like `dbmdz/bert-base-german-cased`.
121
+ * - A path to a *directory* containing processor files, e.g., `./my_model_directory/`.
122
+ * @param {PretrainedProcessorOptions} options Additional options for loading the processor.
123
+ *
124
+ * @returns {Promise<Processor>} A new instance of the Processor class.
125
+ */
126
+ static async from_pretrained(pretrained_model_name_or_path, options) {
127
+
128
+ const [config, components] = await Promise.all([
129
+ // TODO:
130
+ this.uses_processor_config
131
+ ? getModelJSON(pretrained_model_name_or_path, PROCESSOR_NAME, true, options)
132
+ : {},
133
+ Promise.all(
134
+ this.classes
135
+ .filter((cls) => cls in this)
136
+ .map(async (cls) => {
137
+ const component = await this[cls].from_pretrained(pretrained_model_name_or_path, options);
138
+ return [cls.replace(/_class$/, ''), component];
139
+ })
140
+ ).then(Object.fromEntries)
141
+ ]);
142
+
143
+ return new this(config, components);
144
+ }
145
+ }
package/src/configs.js CHANGED
@@ -36,6 +36,13 @@ import {
36
36
  * @typedef {import('./utils/hub.js').PretrainedOptions} PretrainedOptions
37
37
  */
38
38
 
39
+ /**
40
+ * @typedef {import('./utils/core.js').ProgressCallback} ProgressCallback
41
+ */
42
+
43
+ /**
44
+ * @typedef {import('./utils/core.js').ProgressInfo} ProgressInfo
45
+ */
39
46
 
40
47
  /**
41
48
  * Loads a config from the specified path.
@@ -61,6 +68,8 @@ function getNormalizedConfig(config) {
61
68
  case 'llava':
62
69
  case 'paligemma':
63
70
  case 'florence2':
71
+ case 'llava_onevision':
72
+ case 'idefics3':
64
73
  init_normalized_config = getNormalizedConfig(config.text_config);
65
74
  break;
66
75
  case 'moondream1':
@@ -69,6 +78,9 @@ function getNormalizedConfig(config) {
69
78
  case 'musicgen':
70
79
  init_normalized_config = getNormalizedConfig(config.decoder);
71
80
  break;
81
+ case 'multi_modality':
82
+ init_normalized_config = getNormalizedConfig(config.language_config);
83
+ break;
72
84
 
73
85
  // Decoder-only models
74
86
  case 'gpt2':
@@ -98,6 +110,7 @@ function getNormalizedConfig(config) {
98
110
  case 'mistral':
99
111
  case 'starcoder2':
100
112
  case 'qwen2':
113
+ case 'qwen2_vl':
101
114
  mapping['num_heads'] = 'num_key_value_heads';
102
115
  mapping['num_layers'] = 'num_hidden_layers';
103
116
  mapping['hidden_size'] = 'hidden_size';
@@ -218,14 +231,12 @@ function getNormalizedConfig(config) {
218
231
  */
219
232
  export function getKeyValueShapes(config, {
220
233
  prefix = 'past_key_values',
234
+ batch_size=1,
221
235
  } = {}) {
222
236
  /** @type {Record<string, number[]>} */
223
237
  const decoderFeeds = {};
224
238
  const normalized_config = config.normalized_config;
225
239
 
226
- // TODO support batches (i.e., batch_size > 1)
227
- const batch_size = 1;
228
-
229
240
  if (normalized_config.is_encoder_decoder && (
230
241
  'num_encoder_heads' in normalized_config && 'num_decoder_heads' in normalized_config
231
242
  )) {
@@ -372,6 +383,6 @@ export class AutoConfig {
372
383
  * See https://onnxruntime.ai/docs/tutorials/web/env-flags-and-session-options.html#freedimensionoverrides
373
384
  * for more information.
374
385
  * @property {import('./utils/devices.js').DeviceType} [device] The default device to use for the model.
375
- * @property {import('./utils/dtypes.js').DataType} [dtype] The default data type to use for the model.
386
+ * @property {import('./utils/dtypes.js').DataType|Record<string, import('./utils/dtypes.js').DataType>} [dtype] The default data type to use for the model.
376
387
  * @property {boolean|Record<string, boolean>} [use_external_data_format=false] Whether to load the model using the external data format (used for models >= 2GB in size).
377
388
  */
package/src/env.js CHANGED
@@ -26,12 +26,12 @@ import fs from 'fs';
26
26
  import path from 'path';
27
27
  import url from 'url';
28
28
 
29
- const VERSION = '3.0.2';
29
+ const VERSION = '3.1.1';
30
30
 
31
31
  // Check if various APIs are available (depends on environment)
32
- const IS_BROWSER_ENV = typeof self !== 'undefined';
33
- const IS_WEBWORKER_ENV = IS_BROWSER_ENV && self.constructor.name === 'DedicatedWorkerGlobalScope';
34
- const IS_WEB_CACHE_AVAILABLE = IS_BROWSER_ENV && 'caches' in self;
32
+ const IS_BROWSER_ENV = typeof window !== "undefined" && typeof window.document !== "undefined";
33
+ const IS_WEBWORKER_ENV = typeof self !== "undefined" && self.constructor?.name === 'DedicatedWorkerGlobalScope';
34
+ const IS_WEB_CACHE_AVAILABLE = typeof self !== "undefined" && 'caches' in self;
35
35
  const IS_WEBGPU_AVAILABLE = typeof navigator !== 'undefined' && 'gpu' in navigator;
36
36
  const IS_WEBNN_AVAILABLE = typeof navigator !== 'undefined' && 'ml' in navigator;
37
37
 
@@ -44,7 +44,7 @@ const IS_PATH_AVAILABLE = !isEmpty(path);
44
44
  * A read-only object containing information about the APIs available in the current environment.
45
45
  */
46
46
  export const apis = Object.freeze({
47
- /** Whether we are running in a browser environment */
47
+ /** Whether we are running in a browser environment (and not a web worker) */
48
48
  IS_BROWSER_ENV,
49
49
 
50
50
  /** Whether we are running in a web worker environment */
@@ -137,7 +137,7 @@ export const env = {
137
137
  remoteHost: 'https://huggingface.co/',
138
138
  remotePathTemplate: '{model}/resolve/{revision}/',
139
139
 
140
- allowLocalModels: !IS_BROWSER_ENV,
140
+ allowLocalModels: !(IS_BROWSER_ENV || IS_WEBWORKER_ENV),
141
141
  localModelPath: localModelPath,
142
142
  useFS: IS_FS_AVAILABLE,
143
143
 
@@ -259,6 +259,13 @@ export class GenerationConfig {
259
259
  */
260
260
  suppress_tokens = null;
261
261
 
262
+ /**
263
+ * A streamer that will be used to stream the generation.
264
+ * @type {import('./streamers.js').TextStreamer}
265
+ * @default null
266
+ */
267
+ streamer = null;
268
+
262
269
  /**
263
270
  * A list of tokens that will be suppressed at the beginning of the generation.
264
271
  * The `SuppressBeginTokens` logit processor will set their log probs to `-inf` so that they are not sampled.
@@ -151,7 +151,7 @@ export class ForcedBOSTokenLogitsProcessor extends LogitsProcessor {
151
151
  * Apply the BOS token forcing to the logits.
152
152
  * @param {bigint[][]} input_ids The input IDs.
153
153
  * @param {Tensor} logits The logits.
154
- * @returns {Object} The logits with BOS token forcing.
154
+ * @returns {Tensor} The logits with BOS token forcing.
155
155
  */
156
156
  _call(input_ids, logits) {
157
157
  for (let i = 0; i < input_ids.length; ++i) {
@@ -221,7 +221,7 @@ export class SuppressTokensAtBeginLogitsProcessor extends LogitsProcessor {
221
221
  * Apply the BOS token forcing to the logits.
222
222
  * @param {bigint[][]} input_ids The input IDs.
223
223
  * @param {Tensor} logits The logits.
224
- * @returns {Object} The logits with BOS token forcing.
224
+ * @returns {Tensor} The logits with BOS token forcing.
225
225
  */
226
226
  _call(input_ids, logits) {
227
227
  for (let i = 0; i < input_ids.length; ++i) {
@@ -391,7 +391,7 @@ export class NoRepeatNGramLogitsProcessor extends LogitsProcessor {
391
391
  * Apply the no-repeat-ngram processor to the logits.
392
392
  * @param {bigint[][]} input_ids The input IDs.
393
393
  * @param {Tensor} logits The logits.
394
- * @returns {Object} The logits with no-repeat-ngram processing.
394
+ * @returns {Tensor} The logits with no-repeat-ngram processing.
395
395
  */
396
396
  _call(input_ids, logits) {
397
397
  for (let i = 0; i < input_ids.length; ++i) {
@@ -406,12 +406,22 @@ export class NoRepeatNGramLogitsProcessor extends LogitsProcessor {
406
406
  }
407
407
 
408
408
  /**
409
- * A logits processor that penalises repeated output tokens.
409
+ * A logits processor that prevents the repetition of previous tokens through a penalty.
410
+ * This penalty is applied at most once per token. Note that, for decoder-only models like most LLMs,
411
+ * the considered tokens include the prompt.
412
+ *
413
+ * In the original [paper](https://arxiv.org/pdf/1909.05858.pdf), the authors suggest the use of a
414
+ * penalty of around 1.2 to achieve a good balance between truthful generation and lack of repetition.
415
+ * To penalize and reduce repetition, use `penalty` values above 1.0, where a higher value penalizes
416
+ * more strongly. To reward and encourage repetition, use `penalty` values between 0.0 and 1.0, where
417
+ * a lower value rewards more strongly.
410
418
  */
411
419
  export class RepetitionPenaltyLogitsProcessor extends LogitsProcessor {
412
420
  /**
413
421
  * Create a RepetitionPenaltyLogitsProcessor.
414
- * @param {number} penalty The penalty to apply for repeated tokens.
422
+ * @param {number} penalty The parameter for repetition penalty.
423
+ * - 1.0 means no penalty. Above 1.0 penalizes previously generated tokens.
424
+ * - Between 0.0 and 1.0 rewards previously generated tokens.
415
425
  */
416
426
  constructor(penalty) {
417
427
  super();
@@ -422,16 +432,12 @@ export class RepetitionPenaltyLogitsProcessor extends LogitsProcessor {
422
432
  * Apply the repetition penalty to the logits.
423
433
  * @param {bigint[][]} input_ids The input IDs.
424
434
  * @param {Tensor} logits The logits.
425
- * @returns {Object} The logits with repetition penalty processing.
435
+ * @returns {Tensor} The logits with repetition penalty processing.
426
436
  */
427
437
  _call(input_ids, logits) {
428
- // Modify the logits corresponding to each element in `input_ids`.
429
- // As a consequence, the logits corresponding to tokens that appear
430
- // many times in the output will be penalised more.
431
-
432
438
  for (let i = 0; i < input_ids.length; ++i) {
433
439
  const batch_logits_data = /** @type {Float32Array} */(logits[i].data);
434
- for (const input_id of input_ids[i]) {
440
+ for (const input_id of new Set(input_ids[i])) {
435
441
  const token = Number(input_id);
436
442
  if (batch_logits_data[token] < 0) {
437
443
  batch_logits_data[token] *= this.penalty;
@@ -464,7 +470,7 @@ export class MinLengthLogitsProcessor extends LogitsProcessor {
464
470
  * Apply logit processor.
465
471
  * @param {bigint[][]} input_ids The input IDs.
466
472
  * @param {Tensor} logits The logits.
467
- * @returns {Object} The processed logits.
473
+ * @returns {Tensor} The processed logits.
468
474
  */
469
475
  _call(input_ids, logits) {
470
476
  for (let i = 0; i < input_ids.length; ++i) {
@@ -502,7 +508,7 @@ export class MinNewTokensLengthLogitsProcessor extends LogitsProcessor {
502
508
  * Apply logit processor.
503
509
  * @param {bigint[][]} input_ids The input IDs.
504
510
  * @param {Tensor} logits The logits.
505
- * @returns {Object} The processed logits.
511
+ * @returns {Tensor} The processed logits.
506
512
  */
507
513
  _call(input_ids, logits) {
508
514
  for (let i = 0; i < input_ids.length; ++i) {
@@ -535,7 +541,7 @@ export class NoBadWordsLogitsProcessor extends LogitsProcessor {
535
541
  * Apply logit processor.
536
542
  * @param {bigint[][]} input_ids The input IDs.
537
543
  * @param {Tensor} logits The logits.
538
- * @returns {Object} The processed logits.
544
+ * @returns {Tensor} The processed logits.
539
545
  */
540
546
  _call(input_ids, logits) {
541
547
  for (let i = 0; i < input_ids.length; ++i) {
@@ -596,7 +602,7 @@ export class ClassifierFreeGuidanceLogitsProcessor extends LogitsProcessor {
596
602
  * Apply logit processor.
597
603
  * @param {bigint[][]} input_ids The input IDs.
598
604
  * @param {Tensor} logits The logits.
599
- * @returns {Object} The processed logits.
605
+ * @returns {Tensor} The processed logits.
600
606
  */
601
607
  _call(input_ids, logits) {
602
608
  if (logits.dims[0] !== 2 * input_ids.length) {
@@ -650,7 +656,7 @@ export class TemperatureLogitsWarper extends LogitsWarper {
650
656
  * Apply logit warper.
651
657
  * @param {bigint[][]} input_ids The input IDs.
652
658
  * @param {Tensor} logits The logits.
653
- * @returns {Object} The processed logits.
659
+ * @returns {Tensor} The processed logits.
654
660
  */
655
661
  _call(input_ids, logits) {
656
662
  const batch_logits_data = /** @type {Float32Array} */(logits.data);
@@ -34,7 +34,12 @@ const stdout_write = apis.IS_PROCESS_AVAILABLE
34
34
  export class TextStreamer extends BaseStreamer {
35
35
  /**
36
36
  *
37
- * @param {import('../tokenizers.js').PreTrainedTokenizer} tokenizer
37
+ * @param {import('../tokenizers.js').PreTrainedTokenizer} tokenizer
38
+ * @param {Object} options
39
+ * @param {boolean} [options.skip_prompt=false] Whether to skip the prompt tokens
40
+ * @param {function(string): void} [options.callback_function=null] Function to call when a piece of text is ready to display
41
+ * @param {function(bigint[]): void} [options.token_callback_function=null] Function to call when a new token is generated
42
+ * @param {Object} [options.decode_kwargs={}] Additional keyword arguments to pass to the tokenizer's decode method
38
43
  */
39
44
  constructor(tokenizer, {
40
45
  skip_prompt = false,
@@ -143,7 +148,7 @@ export class WhisperTextStreamer extends TextStreamer {
143
148
  * @param {Object} options
144
149
  * @param {boolean} [options.skip_prompt=false] Whether to skip the prompt tokens
145
150
  * @param {function(string): void} [options.callback_function=null] Function to call when a piece of text is ready to display
146
- * @param {function(string): void} [options.token_callback_function=null] Function to call when a new token is generated
151
+ * @param {function(bigint[]): void} [options.token_callback_function=null] Function to call when a new token is generated
147
152
  * @param {function(number): void} [options.on_chunk_start=null] Function to call when a new chunk starts
148
153
  * @param {function(number): void} [options.on_chunk_end=null] Function to call when a chunk ends
149
154
  * @param {function(): void} [options.on_finalize=null] Function to call when the stream is finalized
@@ -0,0 +1,90 @@
1
+ import { FeatureExtractor, validate_audio_inputs } from '../../base/feature_extraction_utils.js';
2
+ import { Tensor } from '../../utils/tensor.js';
3
+ import { mel_filter_bank, spectrogram, window_function } from '../../utils/audio.js';
4
+
5
+
6
+ export class ASTFeatureExtractor extends FeatureExtractor {
7
+
8
+ constructor(config) {
9
+ super(config);
10
+
11
+ const sampling_rate = this.config.sampling_rate;
12
+ const mel_filters = mel_filter_bank(
13
+ 256, // num_frequency_bins
14
+ this.config.num_mel_bins, // num_mel_filters
15
+ 20, // min_frequency
16
+ Math.floor(sampling_rate / 2), // max_frequency
17
+ sampling_rate, // sampling_rate
18
+ null, // norm
19
+ "kaldi", // mel_scale
20
+ true, // triangularize_in_mel_space
21
+ );
22
+
23
+ // Do padding:
24
+ for (let i = 0; i < mel_filters.length; ++i) {
25
+ mel_filters[i].push(0);
26
+ }
27
+ this.mel_filters = mel_filters;
28
+
29
+ this.window = window_function(400, 'hann', {
30
+ periodic: false,
31
+ })
32
+
33
+ this.mean = this.config.mean;
34
+ this.std = this.config.std;
35
+ }
36
+
37
+ /**
38
+ * Computes the log-Mel spectrogram of the provided audio waveform.
39
+ * @param {Float32Array|Float64Array} waveform The audio waveform to process.
40
+ * @param {number} max_length The maximum number of frames to return.
41
+ * @returns {Promise<Tensor>} An object containing the log-Mel spectrogram data as a Float32Array and its dimensions as an array of numbers.
42
+ */
43
+ async _extract_fbank_features(waveform, max_length) {
44
+ // NOTE: We don't pad/truncate since that is passed in as `max_num_frames`
45
+ return spectrogram(
46
+ waveform,
47
+ this.window, // window
48
+ 400, // frame_length
49
+ 160, // hop_length
50
+ {
51
+ fft_length: 512,
52
+ power: 2.0,
53
+ center: false,
54
+ preemphasis: 0.97,
55
+ mel_filters: this.mel_filters,
56
+ log_mel: 'log',
57
+ mel_floor: 1.192092955078125e-07,
58
+ remove_dc_offset: true,
59
+
60
+ // Custom
61
+ max_num_frames: max_length,
62
+ transpose: true,
63
+ }
64
+ )
65
+ }
66
+
67
+
68
+ /**
69
+ * Asynchronously extracts features from a given audio using the provided configuration.
70
+ * @param {Float32Array|Float64Array} audio The audio data as a Float32Array/Float64Array.
71
+ * @returns {Promise<{ input_values: Tensor }>} A Promise resolving to an object containing the extracted input features as a Tensor.
72
+ */
73
+ async _call(audio) {
74
+ validate_audio_inputs(audio, 'ASTFeatureExtractor');
75
+
76
+ const features = await this._extract_fbank_features(audio, this.config.max_length);
77
+ if (this.config.do_normalize) {
78
+ // Normalize the input audio spectrogram to have mean=0, std=0.5
79
+ const denom = this.std * 2;
80
+ const features_data = features.data;
81
+ for (let i = 0; i < features_data.length; ++i) {
82
+ features_data[i] = (features_data[i] - this.mean) / denom;
83
+ }
84
+ }
85
+
86
+ return {
87
+ input_values: features.unsqueeze_(0)
88
+ };
89
+ }
90
+ }
@@ -0,0 +1,41 @@
1
+
2
+ import { FEATURE_EXTRACTOR_NAME, GITHUB_ISSUE_URL } from '../../utils/constants.js';
3
+ import { getModelJSON } from '../../utils/hub.js';
4
+ import { FeatureExtractor } from '../../base/feature_extraction_utils.js';
5
+ import * as AllFeatureExtractors from '../feature_extractors.js';
6
+
7
+ export class AutoFeatureExtractor {
8
+
9
+ /**
10
+ * Instantiate one of the feature extractor classes of the library from a pretrained model.
11
+ *
12
+ * The processor class to instantiate is selected based on the `feature_extractor_type` property of
13
+ * the config object (either passed as an argument or loaded from `pretrained_model_name_or_path` if possible)
14
+ *
15
+ * @param {string} pretrained_model_name_or_path The name or path of the pretrained model. Can be either:
16
+ * - A string, the *model id* of a pretrained processor hosted inside a model repo on huggingface.co.
17
+ * Valid model ids can be located at the root-level, like `bert-base-uncased`, or namespaced under a
18
+ * user or organization name, like `dbmdz/bert-base-german-cased`.
19
+ * - A path to a *directory* containing processor files, e.g., `./my_model_directory/`.
20
+ * @param {import('../../utils/hub.js').PretrainedOptions} options Additional options for loading the processor.
21
+ *
22
+ * @returns {Promise<AllFeatureExtractors.ImageProcessor>} A new instance of the Processor class.
23
+ */
24
+
25
+ /** @type {typeof FeatureExtractor.from_pretrained} */
26
+ static async from_pretrained(pretrained_model_name_or_path, options={}) {
27
+
28
+ const preprocessorConfig = await getModelJSON(pretrained_model_name_or_path, FEATURE_EXTRACTOR_NAME, true, options);
29
+
30
+ // Determine feature extractor class
31
+ const key = preprocessorConfig.feature_extractor_type;
32
+ const feature_extractor_class = AllFeatureExtractors[key];
33
+
34
+ if (!feature_extractor_class) {
35
+ throw new Error(`Unknown feature_extractor_type: '${key}'. Please report this at ${GITHUB_ISSUE_URL}.`);
36
+ }
37
+
38
+ // Instantiate feature extractor
39
+ return new feature_extractor_class(preprocessorConfig);
40
+ }
41
+ }
@@ -0,0 +1,29 @@
1
+
2
+ import { GITHUB_ISSUE_URL, IMAGE_PROCESSOR_NAME } from '../../utils/constants.js';
3
+ import { getModelJSON } from '../../utils/hub.js';
4
+ import { ImageProcessor } from '../../base/image_processors_utils.js';
5
+ import * as AllImageProcessors from '../image_processors.js';
6
+
7
+ export class AutoImageProcessor {
8
+
9
+ /** @type {typeof ImageProcessor.from_pretrained} */
10
+ static async from_pretrained(pretrained_model_name_or_path, options={}) {
11
+
12
+ const preprocessorConfig = await getModelJSON(pretrained_model_name_or_path, IMAGE_PROCESSOR_NAME, true, options);
13
+
14
+ // Determine image processor class
15
+ const key = preprocessorConfig.image_processor_type ?? preprocessorConfig.feature_extractor_type;
16
+ let image_processor_class = AllImageProcessors[key];
17
+
18
+ if (!image_processor_class) {
19
+ if (key !== undefined) {
20
+ // Only log a warning if the class is not found and the key is set.
21
+ console.warn(`Image processor type '${key}' not found, assuming base ImageProcessor. Please report this at ${GITHUB_ISSUE_URL}.`)
22
+ }
23
+ image_processor_class = ImageProcessor;
24
+ }
25
+
26
+ // Instantiate image processor
27
+ return new image_processor_class(preprocessorConfig);
28
+ }
29
+ }
@@ -0,0 +1,100 @@
1
+
2
+
3
+ import { IMAGE_PROCESSOR_NAME } from '../../utils/constants.js';
4
+ import { getModelJSON } from '../../utils/hub.js';
5
+ import { Processor } from '../../base/processing_utils.js';
6
+
7
+ import * as AllProcessors from '../processors.js';
8
+ import * as AllImageProcessors from '../image_processors.js';
9
+ import * as AllFeatureExtractors from '../feature_extractors.js';
10
+
11
+ /**
12
+ * Helper class which is used to instantiate pretrained processors with the `from_pretrained` function.
13
+ * The chosen processor class is determined by the type specified in the processor config.
14
+ *
15
+ * **Example:** Load a processor using `from_pretrained`.
16
+ * ```javascript
17
+ * let processor = await AutoProcessor.from_pretrained('openai/whisper-tiny.en');
18
+ * ```
19
+ *
20
+ * **Example:** Run an image through a processor.
21
+ * ```javascript
22
+ * let processor = await AutoProcessor.from_pretrained('Xenova/clip-vit-base-patch16');
23
+ * let image = await RawImage.read('https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/football-match.jpg');
24
+ * let image_inputs = await processor(image);
25
+ * // {
26
+ * // "pixel_values": {
27
+ * // "dims": [ 1, 3, 224, 224 ],
28
+ * // "type": "float32",
29
+ * // "data": Float32Array [ -1.558687686920166, -1.558687686920166, -1.5440893173217773, ... ],
30
+ * // "size": 150528
31
+ * // },
32
+ * // "original_sizes": [
33
+ * // [ 533, 800 ]
34
+ * // ],
35
+ * // "reshaped_input_sizes": [
36
+ * // [ 224, 224 ]
37
+ * // ]
38
+ * // }
39
+ * ```
40
+ */
41
+ export class AutoProcessor {
42
+
43
+ /**
44
+ * Instantiate one of the processor classes of the library from a pretrained model.
45
+ *
46
+ * The processor class to instantiate is selected based on the `image_processor_type` (or `feature_extractor_type`; legacy)
47
+ * property of the config object (either passed as an argument or loaded from `pretrained_model_name_or_path` if possible)
48
+ *
49
+ * @param {string} pretrained_model_name_or_path The name or path of the pretrained model. Can be either:
50
+ * - A string, the *model id* of a pretrained processor hosted inside a model repo on huggingface.co.
51
+ * Valid model ids can be located at the root-level, like `bert-base-uncased`, or namespaced under a
52
+ * user or organization name, like `dbmdz/bert-base-german-cased`.
53
+ * - A path to a *directory* containing processor files, e.g., `./my_model_directory/`.
54
+ * @param {import('../../utils/hub.js').PretrainedOptions} options Additional options for loading the processor.
55
+ *
56
+ * @returns {Promise<Processor>} A new instance of the Processor class.
57
+ */
58
+
59
+ /** @type {typeof Processor.from_pretrained} */
60
+ static async from_pretrained(pretrained_model_name_or_path, options={}) {
61
+
62
+ // TODO: first check for processor.json
63
+ const preprocessorConfig = await getModelJSON(pretrained_model_name_or_path, IMAGE_PROCESSOR_NAME, true, options);
64
+
65
+ const { image_processor_type, feature_extractor_type, processor_class } = preprocessorConfig;
66
+ if (processor_class && AllProcessors[processor_class]) {
67
+ return AllProcessors[processor_class].from_pretrained(pretrained_model_name_or_path, options);
68
+ }
69
+
70
+ if (!image_processor_type && !feature_extractor_type) {
71
+ throw new Error('No `image_processor_type` or `feature_extractor_type` found in the config.');
72
+ }
73
+
74
+ const components = {};
75
+ if (image_processor_type) {
76
+ const image_processor_class = AllImageProcessors[image_processor_type];
77
+ if (!image_processor_class) {
78
+ throw new Error(`Unknown image_processor_type: '${image_processor_type}'.`);
79
+ }
80
+ components.image_processor = new image_processor_class(preprocessorConfig);
81
+ }
82
+
83
+ if (feature_extractor_type) {
84
+ const image_processor_class = AllImageProcessors[feature_extractor_type];
85
+ if (image_processor_class) {
86
+ // Handle legacy case where image processors were specified as feature extractors
87
+ components.image_processor = new image_processor_class(preprocessorConfig);
88
+ } else {
89
+ const feature_extractor_class = AllFeatureExtractors[feature_extractor_type];
90
+ if (!feature_extractor_class) {
91
+ throw new Error(`Unknown feature_extractor_type: '${feature_extractor_type}'.`);
92
+ }
93
+ components.feature_extractor = new feature_extractor_class(preprocessorConfig);
94
+ }
95
+ }
96
+
97
+ const config = {};
98
+ return new Processor(config, components);
99
+ }
100
+ }
@@ -0,0 +1,5 @@
1
+ import {
2
+ ImageProcessor,
3
+ } from "../../base/image_processors_utils.js";
4
+
5
+ export class BeitFeatureExtractor extends ImageProcessor { }
@@ -0,0 +1,5 @@
1
+ import {
2
+ ImageProcessor,
3
+ } from "../../base/image_processors_utils.js";
4
+
5
+ export class BitImageProcessor extends ImageProcessor { }