@huggingface/transformers 3.0.2 → 3.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (258) hide show
  1. package/README.md +13 -4
  2. package/dist/ort-wasm-simd-threaded.jsep.wasm +0 -0
  3. package/dist/transformers.cjs +16655 -13040
  4. package/dist/transformers.cjs.map +1 -1
  5. package/dist/transformers.js +17095 -13468
  6. package/dist/transformers.js.map +1 -1
  7. package/dist/transformers.min.cjs +244 -52
  8. package/dist/transformers.min.cjs.map +1 -1
  9. package/dist/transformers.min.js +235 -43
  10. package/dist/transformers.min.js.map +1 -1
  11. package/dist/transformers.min.mjs +246 -54
  12. package/dist/transformers.min.mjs.map +1 -1
  13. package/dist/transformers.mjs +16818 -13202
  14. package/dist/transformers.mjs.map +1 -1
  15. package/package.json +4 -4
  16. package/src/base/feature_extraction_utils.js +54 -0
  17. package/src/base/image_processors_utils.js +1089 -0
  18. package/src/base/processing_utils.js +145 -0
  19. package/src/configs.js +15 -4
  20. package/src/env.js +6 -6
  21. package/src/generation/configuration_utils.js +7 -0
  22. package/src/generation/logits_process.js +22 -16
  23. package/src/generation/streamers.js +7 -2
  24. package/src/models/audio_spectrogram_transformer/feature_extraction_audio_spectrogram_transformer.js +90 -0
  25. package/src/models/auto/feature_extraction_auto.js +41 -0
  26. package/src/models/auto/image_processing_auto.js +29 -0
  27. package/src/models/auto/processing_auto.js +100 -0
  28. package/src/models/beit/image_processing_beit.js +5 -0
  29. package/src/models/bit/image_processing_bit.js +5 -0
  30. package/src/models/chinese_clip/image_processing_chinese_clip.js +5 -0
  31. package/src/models/clap/feature_extraction_clap.js +159 -0
  32. package/src/models/clip/image_processing_clip.js +6 -0
  33. package/src/models/convnext/image_processing_convnext.js +45 -0
  34. package/src/models/deit/image_processing_deit.js +6 -0
  35. package/src/models/detr/image_processing_detr.js +52 -0
  36. package/src/models/donut/image_processing_donut.js +31 -0
  37. package/src/models/dpt/image_processing_dpt.js +6 -0
  38. package/src/models/efficientnet/image_processing_efficientnet.js +13 -0
  39. package/src/models/feature_extractors.js +12 -0
  40. package/src/models/florence2/processing_florence2.js +128 -0
  41. package/src/models/glpn/image_processing_glpn.js +5 -0
  42. package/src/models/idefics3/image_processing_idefics3.js +219 -0
  43. package/src/models/idefics3/processing_idefics3.js +136 -0
  44. package/src/models/image_processors.js +37 -0
  45. package/src/models/janus/image_processing_janus.js +26 -0
  46. package/src/models/janus/processing_janus.js +123 -0
  47. package/src/models/jina_clip/image_processing_jina_clip.js +26 -0
  48. package/src/models/jina_clip/processing_jina_clip.js +24 -0
  49. package/src/models/llava_onevision/image_processing_llava_onevision.js +5 -0
  50. package/src/models/mask2former/image_processing_mask2former.js +5 -0
  51. package/src/models/maskformer/image_processing_maskformer.js +18 -0
  52. package/src/models/mgp_str/processing_mgp_str.js +170 -0
  53. package/src/models/mobilenet_v1/image_processing_mobilenet_v1.js +7 -0
  54. package/src/models/mobilenet_v2/image_processing_mobilenet_v2.js +7 -0
  55. package/src/models/mobilenet_v3/image_processing_mobilenet_v3.js +7 -0
  56. package/src/models/mobilenet_v4/image_processing_mobilenet_v4.js +7 -0
  57. package/src/models/mobilevit/image_processing_mobilevit.js +6 -0
  58. package/src/models/nougat/image_processing_nougat.js +5 -0
  59. package/src/models/owlv2/image_processing_owlv2.js +5 -0
  60. package/src/models/owlvit/image_processing_owlvit.js +12 -0
  61. package/src/models/owlvit/processing_owlvit.js +7 -0
  62. package/src/models/processors.js +12 -0
  63. package/src/models/pvt/image_processing_pvt.js +5 -0
  64. package/src/models/pyannote/feature_extraction_pyannote.js +28 -0
  65. package/src/models/pyannote/processing_pyannote.js +71 -0
  66. package/src/models/qwen2_vl/image_processing_qwen2_vl.js +52 -0
  67. package/src/models/qwen2_vl/processing_qwen2_vl.js +52 -0
  68. package/src/models/rt_detr/image_processing_rt_detr.js +12 -0
  69. package/src/models/sam/image_processing_sam.js +242 -0
  70. package/src/models/sam/processing_sam.js +20 -0
  71. package/src/models/sapiens/image_processing_sapiens.js +13 -0
  72. package/src/models/seamless_m4t/feature_extraction_seamless_m4t.js +180 -0
  73. package/src/models/segformer/image_processing_segformer.js +13 -0
  74. package/src/models/siglip/image_processing_siglip.js +5 -0
  75. package/src/models/speecht5/feature_extraction_speecht5.js +4 -0
  76. package/src/models/speecht5/processing_speecht5.js +17 -0
  77. package/src/models/swin2sr/image_processing_swin2sr.js +24 -0
  78. package/src/models/vit/image_processing_vit.js +7 -0
  79. package/src/models/vitmatte/image_processing_vitmatte.js +50 -0
  80. package/src/models/vitpose/image_processing_vitpose.js +89 -0
  81. package/src/models/wav2vec2/feature_extraction_wav2vec2.js +44 -0
  82. package/src/models/wav2vec2/processing_wav2vec2.js +15 -0
  83. package/src/models/wespeaker/feature_extraction_wespeaker.js +100 -0
  84. package/src/models/whisper/feature_extraction_whisper.js +84 -0
  85. package/src/models/whisper/processing_whisper.js +21 -0
  86. package/src/models/yolos/image_processing_yolos.js +12 -0
  87. package/src/models.js +755 -34
  88. package/src/pipelines.js +8 -8
  89. package/src/tokenizers.js +5 -0
  90. package/src/transformers.js +15 -2
  91. package/src/utils/constants.js +8 -1
  92. package/src/utils/core.js +51 -9
  93. package/src/utils/dtypes.js +2 -1
  94. package/src/utils/hub.js +2 -1
  95. package/src/utils/image.js +87 -33
  96. package/src/utils/tensor.js +39 -2
  97. package/types/base/feature_extraction_utils.d.ts +41 -0
  98. package/types/base/feature_extraction_utils.d.ts.map +1 -0
  99. package/types/base/image_processors_utils.d.ts +323 -0
  100. package/types/base/image_processors_utils.d.ts.map +1 -0
  101. package/types/base/processing_utils.d.ts +80 -0
  102. package/types/base/processing_utils.d.ts.map +1 -0
  103. package/types/configs.d.ts +5 -2
  104. package/types/configs.d.ts.map +1 -1
  105. package/types/env.d.ts +1 -1
  106. package/types/env.d.ts.map +1 -1
  107. package/types/generation/configuration_utils.d.ts +6 -0
  108. package/types/generation/configuration_utils.d.ts.map +1 -1
  109. package/types/generation/logits_process.d.ts +30 -20
  110. package/types/generation/logits_process.d.ts.map +1 -1
  111. package/types/generation/streamers.d.ts +13 -8
  112. package/types/generation/streamers.d.ts.map +1 -1
  113. package/types/models/audio_spectrogram_transformer/feature_extraction_audio_spectrogram_transformer.d.ts +25 -0
  114. package/types/models/audio_spectrogram_transformer/feature_extraction_audio_spectrogram_transformer.d.ts.map +1 -0
  115. package/types/models/auto/feature_extraction_auto.d.ts +5 -0
  116. package/types/models/auto/feature_extraction_auto.d.ts.map +1 -0
  117. package/types/models/auto/image_processing_auto.d.ts +5 -0
  118. package/types/models/auto/image_processing_auto.d.ts.map +1 -0
  119. package/types/models/auto/processing_auto.d.ts +35 -0
  120. package/types/models/auto/processing_auto.d.ts.map +1 -0
  121. package/types/models/beit/image_processing_beit.d.ts +4 -0
  122. package/types/models/beit/image_processing_beit.d.ts.map +1 -0
  123. package/types/models/bit/image_processing_bit.d.ts +4 -0
  124. package/types/models/bit/image_processing_bit.d.ts.map +1 -0
  125. package/types/models/chinese_clip/image_processing_chinese_clip.d.ts +4 -0
  126. package/types/models/chinese_clip/image_processing_chinese_clip.d.ts.map +1 -0
  127. package/types/models/clap/feature_extraction_clap.d.ts +57 -0
  128. package/types/models/clap/feature_extraction_clap.d.ts.map +1 -0
  129. package/types/models/clip/image_processing_clip.d.ts +6 -0
  130. package/types/models/clip/image_processing_clip.d.ts.map +1 -0
  131. package/types/models/convnext/image_processing_convnext.d.ts +12 -0
  132. package/types/models/convnext/image_processing_convnext.d.ts.map +1 -0
  133. package/types/models/deit/image_processing_deit.d.ts +6 -0
  134. package/types/models/deit/image_processing_deit.d.ts.map +1 -0
  135. package/types/models/detr/image_processing_detr.d.ts +42 -0
  136. package/types/models/detr/image_processing_detr.d.ts.map +1 -0
  137. package/types/models/donut/image_processing_donut.d.ts +7 -0
  138. package/types/models/donut/image_processing_donut.d.ts.map +1 -0
  139. package/types/models/dpt/image_processing_dpt.d.ts +6 -0
  140. package/types/models/dpt/image_processing_dpt.d.ts.map +1 -0
  141. package/types/models/efficientnet/image_processing_efficientnet.d.ts +6 -0
  142. package/types/models/efficientnet/image_processing_efficientnet.d.ts.map +1 -0
  143. package/types/models/feature_extractors.d.ts +10 -0
  144. package/types/models/feature_extractors.d.ts.map +1 -0
  145. package/types/models/florence2/processing_florence2.d.ts +39 -0
  146. package/types/models/florence2/processing_florence2.d.ts.map +1 -0
  147. package/types/models/glpn/image_processing_glpn.d.ts +4 -0
  148. package/types/models/glpn/image_processing_glpn.d.ts.map +1 -0
  149. package/types/models/idefics3/image_processing_idefics3.d.ts +40 -0
  150. package/types/models/idefics3/image_processing_idefics3.d.ts.map +1 -0
  151. package/types/models/idefics3/processing_idefics3.d.ts +19 -0
  152. package/types/models/idefics3/processing_idefics3.d.ts.map +1 -0
  153. package/types/models/image_processors.d.ts +37 -0
  154. package/types/models/image_processors.d.ts.map +1 -0
  155. package/types/models/janus/image_processing_janus.d.ts +7 -0
  156. package/types/models/janus/image_processing_janus.d.ts.map +1 -0
  157. package/types/models/janus/processing_janus.d.ts +77 -0
  158. package/types/models/janus/processing_janus.d.ts.map +1 -0
  159. package/types/models/jina_clip/image_processing_jina_clip.d.ts +5 -0
  160. package/types/models/jina_clip/image_processing_jina_clip.d.ts.map +1 -0
  161. package/types/models/jina_clip/processing_jina_clip.d.ts +9 -0
  162. package/types/models/jina_clip/processing_jina_clip.d.ts.map +1 -0
  163. package/types/models/llava_onevision/image_processing_llava_onevision.d.ts +4 -0
  164. package/types/models/llava_onevision/image_processing_llava_onevision.d.ts.map +1 -0
  165. package/types/models/mask2former/image_processing_mask2former.d.ts +4 -0
  166. package/types/models/mask2former/image_processing_mask2former.d.ts.map +1 -0
  167. package/types/models/maskformer/image_processing_maskformer.d.ts +22 -0
  168. package/types/models/maskformer/image_processing_maskformer.d.ts.map +1 -0
  169. package/types/models/mgp_str/processing_mgp_str.d.ts +64 -0
  170. package/types/models/mgp_str/processing_mgp_str.d.ts.map +1 -0
  171. package/types/models/mobilenet_v1/image_processing_mobilenet_v1.d.ts +6 -0
  172. package/types/models/mobilenet_v1/image_processing_mobilenet_v1.d.ts.map +1 -0
  173. package/types/models/mobilenet_v2/image_processing_mobilenet_v2.d.ts +6 -0
  174. package/types/models/mobilenet_v2/image_processing_mobilenet_v2.d.ts.map +1 -0
  175. package/types/models/mobilenet_v3/image_processing_mobilenet_v3.d.ts +6 -0
  176. package/types/models/mobilenet_v3/image_processing_mobilenet_v3.d.ts.map +1 -0
  177. package/types/models/mobilenet_v4/image_processing_mobilenet_v4.d.ts +6 -0
  178. package/types/models/mobilenet_v4/image_processing_mobilenet_v4.d.ts.map +1 -0
  179. package/types/models/mobilevit/image_processing_mobilevit.d.ts +6 -0
  180. package/types/models/mobilevit/image_processing_mobilevit.d.ts.map +1 -0
  181. package/types/models/nougat/image_processing_nougat.d.ts +4 -0
  182. package/types/models/nougat/image_processing_nougat.d.ts.map +1 -0
  183. package/types/models/owlv2/image_processing_owlv2.d.ts +4 -0
  184. package/types/models/owlv2/image_processing_owlv2.d.ts.map +1 -0
  185. package/types/models/owlvit/image_processing_owlvit.d.ts +10 -0
  186. package/types/models/owlvit/image_processing_owlvit.d.ts.map +1 -0
  187. package/types/models/owlvit/processing_owlvit.d.ts +8 -0
  188. package/types/models/owlvit/processing_owlvit.d.ts.map +1 -0
  189. package/types/models/processors.d.ts +13 -0
  190. package/types/models/processors.d.ts.map +1 -0
  191. package/types/models/pvt/image_processing_pvt.d.ts +4 -0
  192. package/types/models/pvt/image_processing_pvt.d.ts.map +1 -0
  193. package/types/models/pyannote/feature_extraction_pyannote.d.ts +13 -0
  194. package/types/models/pyannote/feature_extraction_pyannote.d.ts.map +1 -0
  195. package/types/models/pyannote/processing_pyannote.d.ts +30 -0
  196. package/types/models/pyannote/processing_pyannote.d.ts.map +1 -0
  197. package/types/models/qwen2_vl/image_processing_qwen2_vl.d.ts +11 -0
  198. package/types/models/qwen2_vl/image_processing_qwen2_vl.d.ts.map +1 -0
  199. package/types/models/qwen2_vl/processing_qwen2_vl.d.ts +17 -0
  200. package/types/models/qwen2_vl/processing_qwen2_vl.d.ts.map +1 -0
  201. package/types/models/rt_detr/image_processing_rt_detr.d.ts +8 -0
  202. package/types/models/rt_detr/image_processing_rt_detr.d.ts.map +1 -0
  203. package/types/models/sam/image_processing_sam.d.ts +103 -0
  204. package/types/models/sam/image_processing_sam.d.ts.map +1 -0
  205. package/types/models/sam/processing_sam.d.ts +9 -0
  206. package/types/models/sam/processing_sam.d.ts.map +1 -0
  207. package/types/models/seamless_m4t/feature_extraction_seamless_m4t.d.ts +34 -0
  208. package/types/models/seamless_m4t/feature_extraction_seamless_m4t.d.ts.map +1 -0
  209. package/types/models/segformer/image_processing_segformer.d.ts +10 -0
  210. package/types/models/segformer/image_processing_segformer.d.ts.map +1 -0
  211. package/types/models/siglip/image_processing_siglip.d.ts +4 -0
  212. package/types/models/siglip/image_processing_siglip.d.ts.map +1 -0
  213. package/types/models/speecht5/feature_extraction_speecht5.d.ts +4 -0
  214. package/types/models/speecht5/feature_extraction_speecht5.d.ts.map +1 -0
  215. package/types/models/speecht5/processing_speecht5.d.ts +14 -0
  216. package/types/models/speecht5/processing_speecht5.d.ts.map +1 -0
  217. package/types/models/swin2sr/image_processing_swin2sr.d.ts +5 -0
  218. package/types/models/swin2sr/image_processing_swin2sr.d.ts.map +1 -0
  219. package/types/models/vit/image_processing_vit.d.ts +6 -0
  220. package/types/models/vit/image_processing_vit.d.ts.map +1 -0
  221. package/types/models/vitmatte/image_processing_vitmatte.d.ts +12 -0
  222. package/types/models/vitmatte/image_processing_vitmatte.d.ts.map +1 -0
  223. package/types/models/vitpose/image_processing_vitpose.d.ts +26 -0
  224. package/types/models/vitpose/image_processing_vitpose.d.ts.map +1 -0
  225. package/types/models/wav2vec2/feature_extraction_wav2vec2.d.ts +19 -0
  226. package/types/models/wav2vec2/feature_extraction_wav2vec2.d.ts.map +1 -0
  227. package/types/models/wav2vec2/processing_wav2vec2.d.ts +12 -0
  228. package/types/models/wav2vec2/processing_wav2vec2.d.ts.map +1 -0
  229. package/types/models/wespeaker/feature_extraction_wespeaker.d.ts +23 -0
  230. package/types/models/wespeaker/feature_extraction_wespeaker.d.ts.map +1 -0
  231. package/types/models/whisper/feature_extraction_whisper.d.ts +21 -0
  232. package/types/models/whisper/feature_extraction_whisper.d.ts.map +1 -0
  233. package/types/models/whisper/processing_whisper.d.ts +17 -0
  234. package/types/models/whisper/processing_whisper.d.ts.map +1 -0
  235. package/types/models/yolos/image_processing_yolos.d.ts +10 -0
  236. package/types/models/yolos/image_processing_yolos.d.ts.map +1 -0
  237. package/types/models.d.ts +150 -0
  238. package/types/models.d.ts.map +1 -1
  239. package/types/pipelines.d.ts +2 -3
  240. package/types/pipelines.d.ts.map +1 -1
  241. package/types/tokenizers.d.ts +3 -0
  242. package/types/tokenizers.d.ts.map +1 -1
  243. package/types/transformers.d.ts +10 -1
  244. package/types/utils/constants.d.ts +6 -0
  245. package/types/utils/constants.d.ts.map +1 -1
  246. package/types/utils/core.d.ts +65 -3
  247. package/types/utils/core.d.ts.map +1 -1
  248. package/types/utils/dtypes.d.ts +3 -2
  249. package/types/utils/dtypes.d.ts.map +1 -1
  250. package/types/utils/hub.d.ts +1 -1
  251. package/types/utils/hub.d.ts.map +1 -1
  252. package/types/utils/image.d.ts +14 -2
  253. package/types/utils/image.d.ts.map +1 -1
  254. package/types/utils/tensor.d.ts +39 -4
  255. package/types/utils/tensor.d.ts.map +1 -1
  256. package/src/processors.js +0 -2655
  257. package/types/processors.d.ts +0 -924
  258. package/types/processors.d.ts.map +0 -1
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@huggingface/transformers",
3
- "version": "3.0.2",
3
+ "version": "3.1.1",
4
4
  "description": "State-of-the-art Machine Learning for the web. Run 🤗 Transformers directly in your browser, with no need for a server!",
5
5
  "main": "./src/transformers.js",
6
6
  "types": "./types/transformers.d.ts",
@@ -61,9 +61,9 @@
61
61
  },
62
62
  "homepage": "https://github.com/huggingface/transformers.js#readme",
63
63
  "dependencies": {
64
- "@huggingface/jinja": "^0.3.0",
65
- "onnxruntime-node": "1.19.2",
66
- "onnxruntime-web": "1.21.0-dev.20241024-d9ca84ef96",
64
+ "@huggingface/jinja": "^0.3.2",
65
+ "onnxruntime-node": "1.20.1",
66
+ "onnxruntime-web": "1.20.1",
67
67
  "sharp": "^0.33.5"
68
68
  },
69
69
  "devDependencies": {
@@ -0,0 +1,54 @@
1
+ import { FEATURE_EXTRACTOR_NAME } from "../utils/constants.js";
2
+ import { Callable } from "../utils/generic.js";
3
+ import { getModelJSON } from "../utils/hub.js";
4
+
5
+ /**
6
+ * Base class for feature extractors.
7
+ */
8
+ export class FeatureExtractor extends Callable {
9
+ /**
10
+ * Constructs a new FeatureExtractor instance.
11
+ *
12
+ * @param {Object} config The configuration for the feature extractor.
13
+ */
14
+ constructor(config) {
15
+ super();
16
+ this.config = config
17
+ }
18
+
19
+ /**
20
+ * Instantiate one of the processor classes of the library from a pretrained model.
21
+ *
22
+ * The processor class to instantiate is selected based on the `image_processor_type` (or `feature_extractor_type`; legacy)
23
+ * property of the config object (either passed as an argument or loaded from `pretrained_model_name_or_path` if possible)
24
+ *
25
+ * @param {string} pretrained_model_name_or_path The name or path of the pretrained model. Can be either:
26
+ * - A string, the *model id* of a pretrained processor hosted inside a model repo on huggingface.co.
27
+ * Valid model ids can be located at the root-level, like `bert-base-uncased`, or namespaced under a
28
+ * user or organization name, like `dbmdz/bert-base-german-cased`.
29
+ * - A path to a *directory* containing processor files, e.g., `./my_model_directory/`.
30
+ * @param {import('../utils/hub.js').PretrainedOptions} options Additional options for loading the processor.
31
+ *
32
+ * @returns {Promise<FeatureExtractor>} A new instance of the Processor class.
33
+ */
34
+ static async from_pretrained(pretrained_model_name_or_path, options) {
35
+ const preprocessorConfig = await getModelJSON(pretrained_model_name_or_path, FEATURE_EXTRACTOR_NAME, true, options);
36
+ return new this(preprocessorConfig);
37
+ }
38
+ }
39
+
40
+
41
+ /**
42
+ * Helper function to validate audio inputs.
43
+ * @param {any} audio The audio data.
44
+ * @param {string} feature_extractor The name of the feature extractor.
45
+ * @private
46
+ */
47
+ export function validate_audio_inputs(audio, feature_extractor) {
48
+ if (!(audio instanceof Float32Array || audio instanceof Float64Array)) {
49
+ throw new Error(
50
+ `${feature_extractor} expects input to be a Float32Array or a Float64Array, but got ${audio?.constructor?.name ?? typeof audio} instead. ` +
51
+ `If using the feature extractor directly, remember to use \`read_audio(url, sampling_rate)\` to obtain the raw audio data of the file/url.`
52
+ )
53
+ }
54
+ }