@huggingface/transformers 3.0.1 → 3.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (239) hide show
  1. package/README.md +14 -4
  2. package/dist/ort-wasm-simd-threaded.jsep.wasm +0 -0
  3. package/dist/transformers.cjs +16607 -13472
  4. package/dist/transformers.cjs.map +1 -1
  5. package/dist/transformers.js +16601 -13451
  6. package/dist/transformers.js.map +1 -1
  7. package/dist/transformers.min.cjs +238 -52
  8. package/dist/transformers.min.cjs.map +1 -1
  9. package/dist/transformers.min.js +229 -43
  10. package/dist/transformers.min.js.map +1 -1
  11. package/dist/transformers.min.mjs +240 -54
  12. package/dist/transformers.min.mjs.map +1 -1
  13. package/dist/transformers.mjs +16017 -12878
  14. package/dist/transformers.mjs.map +1 -1
  15. package/package.json +7 -7
  16. package/src/base/feature_extraction_utils.js +54 -0
  17. package/src/base/image_processors_utils.js +1089 -0
  18. package/src/base/processing_utils.js +145 -0
  19. package/src/configs.js +15 -3
  20. package/src/env.js +15 -4
  21. package/src/models/audio_spectrogram_transformer/feature_extraction_audio_spectrogram_transformer.js +90 -0
  22. package/src/models/auto/feature_extraction_auto.js +41 -0
  23. package/src/models/auto/image_processing_auto.js +29 -0
  24. package/src/models/auto/processing_auto.js +100 -0
  25. package/src/models/beit/image_processing_beit.js +5 -0
  26. package/src/models/bit/image_processing_bit.js +5 -0
  27. package/src/models/chinese_clip/image_processing_chinese_clip.js +5 -0
  28. package/src/models/clap/feature_extraction_clap.js +159 -0
  29. package/src/models/clip/image_processing_clip.js +6 -0
  30. package/src/models/convnext/image_processing_convnext.js +45 -0
  31. package/src/models/deit/image_processing_deit.js +6 -0
  32. package/src/models/detr/image_processing_detr.js +52 -0
  33. package/src/models/donut/image_processing_donut.js +31 -0
  34. package/src/models/dpt/image_processing_dpt.js +6 -0
  35. package/src/models/efficientnet/image_processing_efficientnet.js +13 -0
  36. package/src/models/feature_extractors.js +12 -0
  37. package/src/models/florence2/processing_florence2.js +128 -0
  38. package/src/models/glpn/image_processing_glpn.js +5 -0
  39. package/src/models/image_processors.js +36 -0
  40. package/src/models/janus/image_processing_janus.js +26 -0
  41. package/src/models/janus/processing_janus.js +123 -0
  42. package/src/models/jina_clip/image_processing_jina_clip.js +26 -0
  43. package/src/models/jina_clip/processing_jina_clip.js +24 -0
  44. package/src/models/llava_onevision/image_processing_llava_onevision.js +5 -0
  45. package/src/models/mask2former/image_processing_mask2former.js +5 -0
  46. package/src/models/maskformer/image_processing_maskformer.js +18 -0
  47. package/src/models/mgp_str/processing_mgp_str.js +170 -0
  48. package/src/models/mobilenet_v1/image_processing_mobilenet_v1.js +7 -0
  49. package/src/models/mobilenet_v2/image_processing_mobilenet_v2.js +7 -0
  50. package/src/models/mobilenet_v3/image_processing_mobilenet_v3.js +7 -0
  51. package/src/models/mobilenet_v4/image_processing_mobilenet_v4.js +7 -0
  52. package/src/models/mobilevit/image_processing_mobilevit.js +6 -0
  53. package/src/models/nougat/image_processing_nougat.js +5 -0
  54. package/src/models/owlv2/image_processing_owlv2.js +5 -0
  55. package/src/models/owlvit/image_processing_owlvit.js +12 -0
  56. package/src/models/owlvit/processing_owlvit.js +7 -0
  57. package/src/models/processors.js +11 -0
  58. package/src/models/pvt/image_processing_pvt.js +5 -0
  59. package/src/models/pyannote/feature_extraction_pyannote.js +28 -0
  60. package/src/models/pyannote/processing_pyannote.js +71 -0
  61. package/src/models/qwen2_vl/image_processing_qwen2_vl.js +52 -0
  62. package/src/models/qwen2_vl/processing_qwen2_vl.js +52 -0
  63. package/src/models/rt_detr/image_processing_rt_detr.js +12 -0
  64. package/src/models/sam/image_processing_sam.js +242 -0
  65. package/src/models/sam/processing_sam.js +20 -0
  66. package/src/models/sapiens/image_processing_sapiens.js +13 -0
  67. package/src/models/seamless_m4t/feature_extraction_seamless_m4t.js +180 -0
  68. package/src/models/segformer/image_processing_segformer.js +13 -0
  69. package/src/models/siglip/image_processing_siglip.js +5 -0
  70. package/src/models/speecht5/feature_extraction_speecht5.js +4 -0
  71. package/src/models/speecht5/processing_speecht5.js +17 -0
  72. package/src/models/swin2sr/image_processing_swin2sr.js +24 -0
  73. package/src/models/vit/image_processing_vit.js +7 -0
  74. package/src/models/vitmatte/image_processing_vitmatte.js +50 -0
  75. package/src/models/vitpose/image_processing_vitpose.js +89 -0
  76. package/src/models/wav2vec2/feature_extraction_wav2vec2.js +44 -0
  77. package/src/models/wav2vec2/processing_wav2vec2.js +15 -0
  78. package/src/models/wespeaker/feature_extraction_wespeaker.js +100 -0
  79. package/src/models/whisper/feature_extraction_whisper.js +84 -0
  80. package/src/models/whisper/processing_whisper.js +21 -0
  81. package/src/models/yolos/image_processing_yolos.js +12 -0
  82. package/src/models.js +695 -32
  83. package/src/pipelines.js +8 -8
  84. package/src/tokenizers.js +5 -0
  85. package/src/transformers.js +15 -2
  86. package/src/utils/constants.js +8 -1
  87. package/src/utils/core.js +37 -9
  88. package/src/utils/hub.js +2 -1
  89. package/src/utils/image.js +68 -17
  90. package/src/utils/tensor.js +33 -1
  91. package/types/base/feature_extraction_utils.d.ts +41 -0
  92. package/types/base/feature_extraction_utils.d.ts.map +1 -0
  93. package/types/base/image_processors_utils.d.ts +323 -0
  94. package/types/base/image_processors_utils.d.ts.map +1 -0
  95. package/types/base/processing_utils.d.ts +80 -0
  96. package/types/base/processing_utils.d.ts.map +1 -0
  97. package/types/configs.d.ts +4 -1
  98. package/types/configs.d.ts.map +1 -1
  99. package/types/env.d.ts.map +1 -1
  100. package/types/models/audio_spectrogram_transformer/feature_extraction_audio_spectrogram_transformer.d.ts +25 -0
  101. package/types/models/audio_spectrogram_transformer/feature_extraction_audio_spectrogram_transformer.d.ts.map +1 -0
  102. package/types/models/auto/feature_extraction_auto.d.ts +5 -0
  103. package/types/models/auto/feature_extraction_auto.d.ts.map +1 -0
  104. package/types/models/auto/image_processing_auto.d.ts +5 -0
  105. package/types/models/auto/image_processing_auto.d.ts.map +1 -0
  106. package/types/models/auto/processing_auto.d.ts +35 -0
  107. package/types/models/auto/processing_auto.d.ts.map +1 -0
  108. package/types/models/beit/image_processing_beit.d.ts +4 -0
  109. package/types/models/beit/image_processing_beit.d.ts.map +1 -0
  110. package/types/models/bit/image_processing_bit.d.ts +4 -0
  111. package/types/models/bit/image_processing_bit.d.ts.map +1 -0
  112. package/types/models/chinese_clip/image_processing_chinese_clip.d.ts +4 -0
  113. package/types/models/chinese_clip/image_processing_chinese_clip.d.ts.map +1 -0
  114. package/types/models/clap/feature_extraction_clap.d.ts +57 -0
  115. package/types/models/clap/feature_extraction_clap.d.ts.map +1 -0
  116. package/types/models/clip/image_processing_clip.d.ts +6 -0
  117. package/types/models/clip/image_processing_clip.d.ts.map +1 -0
  118. package/types/models/convnext/image_processing_convnext.d.ts +12 -0
  119. package/types/models/convnext/image_processing_convnext.d.ts.map +1 -0
  120. package/types/models/deit/image_processing_deit.d.ts +6 -0
  121. package/types/models/deit/image_processing_deit.d.ts.map +1 -0
  122. package/types/models/detr/image_processing_detr.d.ts +42 -0
  123. package/types/models/detr/image_processing_detr.d.ts.map +1 -0
  124. package/types/models/donut/image_processing_donut.d.ts +7 -0
  125. package/types/models/donut/image_processing_donut.d.ts.map +1 -0
  126. package/types/models/dpt/image_processing_dpt.d.ts +6 -0
  127. package/types/models/dpt/image_processing_dpt.d.ts.map +1 -0
  128. package/types/models/efficientnet/image_processing_efficientnet.d.ts +6 -0
  129. package/types/models/efficientnet/image_processing_efficientnet.d.ts.map +1 -0
  130. package/types/models/feature_extractors.d.ts +10 -0
  131. package/types/models/feature_extractors.d.ts.map +1 -0
  132. package/types/models/florence2/processing_florence2.d.ts +39 -0
  133. package/types/models/florence2/processing_florence2.d.ts.map +1 -0
  134. package/types/models/glpn/image_processing_glpn.d.ts +4 -0
  135. package/types/models/glpn/image_processing_glpn.d.ts.map +1 -0
  136. package/types/models/image_processors.d.ts +36 -0
  137. package/types/models/image_processors.d.ts.map +1 -0
  138. package/types/models/janus/image_processing_janus.d.ts +7 -0
  139. package/types/models/janus/image_processing_janus.d.ts.map +1 -0
  140. package/types/models/janus/processing_janus.d.ts +77 -0
  141. package/types/models/janus/processing_janus.d.ts.map +1 -0
  142. package/types/models/jina_clip/image_processing_jina_clip.d.ts +5 -0
  143. package/types/models/jina_clip/image_processing_jina_clip.d.ts.map +1 -0
  144. package/types/models/jina_clip/processing_jina_clip.d.ts +9 -0
  145. package/types/models/jina_clip/processing_jina_clip.d.ts.map +1 -0
  146. package/types/models/llava_onevision/image_processing_llava_onevision.d.ts +4 -0
  147. package/types/models/llava_onevision/image_processing_llava_onevision.d.ts.map +1 -0
  148. package/types/models/mask2former/image_processing_mask2former.d.ts +4 -0
  149. package/types/models/mask2former/image_processing_mask2former.d.ts.map +1 -0
  150. package/types/models/maskformer/image_processing_maskformer.d.ts +22 -0
  151. package/types/models/maskformer/image_processing_maskformer.d.ts.map +1 -0
  152. package/types/models/mgp_str/processing_mgp_str.d.ts +64 -0
  153. package/types/models/mgp_str/processing_mgp_str.d.ts.map +1 -0
  154. package/types/models/mobilenet_v1/image_processing_mobilenet_v1.d.ts +6 -0
  155. package/types/models/mobilenet_v1/image_processing_mobilenet_v1.d.ts.map +1 -0
  156. package/types/models/mobilenet_v2/image_processing_mobilenet_v2.d.ts +6 -0
  157. package/types/models/mobilenet_v2/image_processing_mobilenet_v2.d.ts.map +1 -0
  158. package/types/models/mobilenet_v3/image_processing_mobilenet_v3.d.ts +6 -0
  159. package/types/models/mobilenet_v3/image_processing_mobilenet_v3.d.ts.map +1 -0
  160. package/types/models/mobilenet_v4/image_processing_mobilenet_v4.d.ts +6 -0
  161. package/types/models/mobilenet_v4/image_processing_mobilenet_v4.d.ts.map +1 -0
  162. package/types/models/mobilevit/image_processing_mobilevit.d.ts +6 -0
  163. package/types/models/mobilevit/image_processing_mobilevit.d.ts.map +1 -0
  164. package/types/models/nougat/image_processing_nougat.d.ts +4 -0
  165. package/types/models/nougat/image_processing_nougat.d.ts.map +1 -0
  166. package/types/models/owlv2/image_processing_owlv2.d.ts +4 -0
  167. package/types/models/owlv2/image_processing_owlv2.d.ts.map +1 -0
  168. package/types/models/owlvit/image_processing_owlvit.d.ts +10 -0
  169. package/types/models/owlvit/image_processing_owlvit.d.ts.map +1 -0
  170. package/types/models/owlvit/processing_owlvit.d.ts +8 -0
  171. package/types/models/owlvit/processing_owlvit.d.ts.map +1 -0
  172. package/types/models/processors.d.ts +12 -0
  173. package/types/models/processors.d.ts.map +1 -0
  174. package/types/models/pvt/image_processing_pvt.d.ts +4 -0
  175. package/types/models/pvt/image_processing_pvt.d.ts.map +1 -0
  176. package/types/models/pyannote/feature_extraction_pyannote.d.ts +13 -0
  177. package/types/models/pyannote/feature_extraction_pyannote.d.ts.map +1 -0
  178. package/types/models/pyannote/processing_pyannote.d.ts +30 -0
  179. package/types/models/pyannote/processing_pyannote.d.ts.map +1 -0
  180. package/types/models/qwen2_vl/image_processing_qwen2_vl.d.ts +11 -0
  181. package/types/models/qwen2_vl/image_processing_qwen2_vl.d.ts.map +1 -0
  182. package/types/models/qwen2_vl/processing_qwen2_vl.d.ts +17 -0
  183. package/types/models/qwen2_vl/processing_qwen2_vl.d.ts.map +1 -0
  184. package/types/models/rt_detr/image_processing_rt_detr.d.ts +8 -0
  185. package/types/models/rt_detr/image_processing_rt_detr.d.ts.map +1 -0
  186. package/types/models/sam/image_processing_sam.d.ts +103 -0
  187. package/types/models/sam/image_processing_sam.d.ts.map +1 -0
  188. package/types/models/sam/processing_sam.d.ts +9 -0
  189. package/types/models/sam/processing_sam.d.ts.map +1 -0
  190. package/types/models/seamless_m4t/feature_extraction_seamless_m4t.d.ts +34 -0
  191. package/types/models/seamless_m4t/feature_extraction_seamless_m4t.d.ts.map +1 -0
  192. package/types/models/segformer/image_processing_segformer.d.ts +10 -0
  193. package/types/models/segformer/image_processing_segformer.d.ts.map +1 -0
  194. package/types/models/siglip/image_processing_siglip.d.ts +4 -0
  195. package/types/models/siglip/image_processing_siglip.d.ts.map +1 -0
  196. package/types/models/speecht5/feature_extraction_speecht5.d.ts +4 -0
  197. package/types/models/speecht5/feature_extraction_speecht5.d.ts.map +1 -0
  198. package/types/models/speecht5/processing_speecht5.d.ts +14 -0
  199. package/types/models/speecht5/processing_speecht5.d.ts.map +1 -0
  200. package/types/models/swin2sr/image_processing_swin2sr.d.ts +5 -0
  201. package/types/models/swin2sr/image_processing_swin2sr.d.ts.map +1 -0
  202. package/types/models/vit/image_processing_vit.d.ts +6 -0
  203. package/types/models/vit/image_processing_vit.d.ts.map +1 -0
  204. package/types/models/vitmatte/image_processing_vitmatte.d.ts +12 -0
  205. package/types/models/vitmatte/image_processing_vitmatte.d.ts.map +1 -0
  206. package/types/models/vitpose/image_processing_vitpose.d.ts +26 -0
  207. package/types/models/vitpose/image_processing_vitpose.d.ts.map +1 -0
  208. package/types/models/wav2vec2/feature_extraction_wav2vec2.d.ts +19 -0
  209. package/types/models/wav2vec2/feature_extraction_wav2vec2.d.ts.map +1 -0
  210. package/types/models/wav2vec2/processing_wav2vec2.d.ts +12 -0
  211. package/types/models/wav2vec2/processing_wav2vec2.d.ts.map +1 -0
  212. package/types/models/wespeaker/feature_extraction_wespeaker.d.ts +23 -0
  213. package/types/models/wespeaker/feature_extraction_wespeaker.d.ts.map +1 -0
  214. package/types/models/whisper/feature_extraction_whisper.d.ts +21 -0
  215. package/types/models/whisper/feature_extraction_whisper.d.ts.map +1 -0
  216. package/types/models/whisper/processing_whisper.d.ts +17 -0
  217. package/types/models/whisper/processing_whisper.d.ts.map +1 -0
  218. package/types/models/yolos/image_processing_yolos.d.ts +10 -0
  219. package/types/models/yolos/image_processing_yolos.d.ts.map +1 -0
  220. package/types/models.d.ts +152 -0
  221. package/types/models.d.ts.map +1 -1
  222. package/types/pipelines.d.ts +2 -3
  223. package/types/pipelines.d.ts.map +1 -1
  224. package/types/tokenizers.d.ts +3 -0
  225. package/types/tokenizers.d.ts.map +1 -1
  226. package/types/transformers.d.ts +10 -1
  227. package/types/utils/constants.d.ts +6 -0
  228. package/types/utils/constants.d.ts.map +1 -1
  229. package/types/utils/core.d.ts +58 -3
  230. package/types/utils/core.d.ts.map +1 -1
  231. package/types/utils/hub.d.ts +1 -1
  232. package/types/utils/hub.d.ts.map +1 -1
  233. package/types/utils/image.d.ts +10 -2
  234. package/types/utils/image.d.ts.map +1 -1
  235. package/types/utils/tensor.d.ts +34 -1
  236. package/types/utils/tensor.d.ts.map +1 -1
  237. package/src/processors.js +0 -2655
  238. package/types/processors.d.ts +0 -924
  239. package/types/processors.d.ts.map +0 -1
@@ -0,0 +1,323 @@
1
+ /**
2
+ * Post-processes the outputs of the model (for object detection).
3
+ * @param {Object} outputs The outputs of the model that must be post-processed
4
+ * @param {Tensor} outputs.logits The logits
5
+ * @param {Tensor} outputs.pred_boxes The predicted boxes.
6
+ * @param {number} [threshold=0.5] The threshold to use for the scores.
7
+ * @param {[number, number][]} [target_sizes=null] The sizes of the original images.
8
+ * @param {boolean} [is_zero_shot=false] Whether zero-shot object detection was performed.
9
+ * @return {Object[]} An array of objects containing the post-processed outputs.
10
+ */
11
+ export function post_process_object_detection(outputs: {
12
+ logits: Tensor;
13
+ pred_boxes: Tensor;
14
+ }, threshold?: number, target_sizes?: [number, number][], is_zero_shot?: boolean): any[];
15
+ /**
16
+ * Post-processes the outputs of the model (for semantic segmentation).
17
+ * @param {*} outputs Raw outputs of the model.
18
+ * @param {[number, number][]} [target_sizes=null] List of tuples corresponding to the requested final size
19
+ * (height, width) of each prediction. If unset, predictions will not be resized.
20
+ * @returns {{segmentation: Tensor; labels: number[]}[]} The semantic segmentation maps.
21
+ */
22
+ export function post_process_semantic_segmentation(outputs: any, target_sizes?: [number, number][]): {
23
+ segmentation: Tensor;
24
+ labels: number[];
25
+ }[];
26
+ /**
27
+ * Post-process the model output to generate the final panoptic segmentation.
28
+ * @param {*} outputs The model output to post process
29
+ * @param {number} [threshold=0.5] The probability score threshold to keep predicted instance masks.
30
+ * @param {number} [mask_threshold=0.5] Threshold to use when turning the predicted masks into binary values.
31
+ * @param {number} [overlap_mask_area_threshold=0.8] The overlap mask area threshold to merge or discard small disconnected parts within each binary instance mask.
32
+ * @param {Set<number>} [label_ids_to_fuse=null] The labels in this state will have all their instances be fused together.
33
+ * @param {[number, number][]} [target_sizes=null] The target sizes to resize the masks to.
34
+ * @returns {Array<{ segmentation: Tensor, segments_info: Array<{id: number, label_id: number, score: number}>}>}
35
+ */
36
+ export function post_process_panoptic_segmentation(outputs: any, threshold?: number, mask_threshold?: number, overlap_mask_area_threshold?: number, label_ids_to_fuse?: Set<number>, target_sizes?: [number, number][]): Array<{
37
+ segmentation: Tensor;
38
+ segments_info: Array<{
39
+ id: number;
40
+ label_id: number;
41
+ score: number;
42
+ }>;
43
+ }>;
44
+ /**
45
+ * Post-processes the outputs of the model (for instance segmentation).
46
+ * @param {*} outputs Raw outputs of the model.
47
+ * @param {number} [threshold=0.5] The probability score threshold to keep predicted instance masks.
48
+ * @param {[number, number][]} [target_sizes=null] List of tuples corresponding to the requested final size
49
+ * (height, width) of each prediction. If unset, predictions will not be resized.
50
+ * @returns {Array<{ segmentation: Tensor, segments_info: Array<{id: number, label_id: number, score: number}>}>}
51
+ */
52
+ export function post_process_instance_segmentation(outputs: any, threshold?: number, target_sizes?: [number, number][]): Array<{
53
+ segmentation: Tensor;
54
+ segments_info: Array<{
55
+ id: number;
56
+ label_id: number;
57
+ score: number;
58
+ }>;
59
+ }>;
60
+ declare const ImageProcessor_base: new () => {
61
+ (...args: any[]): any;
62
+ _call(...args: any[]): any;
63
+ };
64
+ /**
65
+ * @typedef {Object} ImageProcessorConfig A configuration object used to create an image processor.
66
+ * @property {function} [progress_callback=null] If specified, this function will be called during model construction, to provide the user with progress updates.
67
+ * @property {number[]} [image_mean] The mean values for image normalization.
68
+ * @property {number[]} [image_std] The standard deviation values for image normalization.
69
+ * @property {boolean} [do_rescale] Whether to rescale the image pixel values to the [0,1] range.
70
+ * @property {number} [rescale_factor] The factor to use for rescaling the image pixel values.
71
+ * @property {boolean} [do_normalize] Whether to normalize the image pixel values.
72
+ * @property {boolean} [do_resize] Whether to resize the image.
73
+ * @property {number} [resample] What method to use for resampling.
74
+ * @property {number|Object} [size] The size to resize the image to.
75
+ * @property {number|Object} [image_size] The size to resize the image to (same as `size`).
76
+ * @property {boolean} [do_flip_channel_order=false] Whether to flip the color channels from RGB to BGR.
77
+ * Can be overridden by the `do_flip_channel_order` parameter in the `preprocess` method.
78
+ * @property {boolean} [do_center_crop] Whether to center crop the image to the specified `crop_size`.
79
+ * Can be overridden by `do_center_crop` in the `preprocess` method.
80
+ * @property {boolean} [do_thumbnail] Whether to resize the image using thumbnail method.
81
+ * @property {boolean} [keep_aspect_ratio] If `true`, the image is resized to the largest possible size such that the aspect ratio is preserved.
82
+ * Can be overidden by `keep_aspect_ratio` in `preprocess`.
83
+ * @property {number} [ensure_multiple_of] If `do_resize` is `true`, the image is resized to a size that is a multiple of this value.
84
+ * Can be overidden by `ensure_multiple_of` in `preprocess`.
85
+ *
86
+ * @property {number[]} [mean] The mean values for image normalization (same as `image_mean`).
87
+ * @property {number[]} [std] The standard deviation values for image normalization (same as `image_std`).
88
+ */
89
+ export class ImageProcessor extends ImageProcessor_base {
90
+ /**
91
+ * Instantiate one of the processor classes of the library from a pretrained model.
92
+ *
93
+ * The processor class to instantiate is selected based on the `image_processor_type` (or `feature_extractor_type`; legacy)
94
+ * property of the config object (either passed as an argument or loaded from `pretrained_model_name_or_path` if possible)
95
+ *
96
+ * @param {string} pretrained_model_name_or_path The name or path of the pretrained model. Can be either:
97
+ * - A string, the *model id* of a pretrained processor hosted inside a model repo on huggingface.co.
98
+ * Valid model ids can be located at the root-level, like `bert-base-uncased`, or namespaced under a
99
+ * user or organization name, like `dbmdz/bert-base-german-cased`.
100
+ * - A path to a *directory* containing processor files, e.g., `./my_model_directory/`.
101
+ * @param {import('../utils/hub.js').PretrainedOptions} options Additional options for loading the processor.
102
+ *
103
+ * @returns {Promise<ImageProcessor>} A new instance of the Processor class.
104
+ */
105
+ static from_pretrained(pretrained_model_name_or_path: string, options: import('../utils/hub.js').PretrainedOptions): Promise<ImageProcessor>;
106
+ /**
107
+ * Constructs a new `ImageProcessor`.
108
+ * @param {ImageProcessorConfig} config The configuration object.
109
+ */
110
+ constructor(config: ImageProcessorConfig);
111
+ image_mean: number[];
112
+ image_std: number[];
113
+ resample: number;
114
+ do_rescale: boolean;
115
+ rescale_factor: number;
116
+ do_normalize: boolean;
117
+ do_thumbnail: boolean;
118
+ size: any;
119
+ do_resize: boolean;
120
+ size_divisibility: any;
121
+ do_center_crop: boolean;
122
+ crop_size: any;
123
+ do_convert_rgb: any;
124
+ do_crop_margin: any;
125
+ pad_size: any;
126
+ do_pad: any;
127
+ do_flip_channel_order: boolean;
128
+ config: ImageProcessorConfig;
129
+ /**
130
+ * Resize the image to make a thumbnail. The image is resized so that no dimension is larger than any
131
+ * corresponding dimension of the specified size.
132
+ * @param {RawImage} image The image to be resized.
133
+ * @param {{height:number, width:number}} size The size `{"height": h, "width": w}` to resize the image to.
134
+ * @param {string | 0 | 1 | 2 | 3 | 4 | 5} [resample=2] The resampling filter to use.
135
+ * @returns {Promise<RawImage>} The resized image.
136
+ */
137
+ thumbnail(image: RawImage, size: {
138
+ height: number;
139
+ width: number;
140
+ }, resample?: string | 0 | 1 | 2 | 3 | 4 | 5): Promise<RawImage>;
141
+ /**
142
+ * Crops the margin of the image. Gray pixels are considered margin (i.e., pixels with a value below the threshold).
143
+ * @param {RawImage} image The image to be cropped.
144
+ * @param {number} gray_threshold Value below which pixels are considered to be gray.
145
+ * @returns {Promise<RawImage>} The cropped image.
146
+ */
147
+ crop_margin(image: RawImage, gray_threshold?: number): Promise<RawImage>;
148
+ /**
149
+ * Pad the image by a certain amount.
150
+ * @param {Float32Array} pixelData The pixel data to pad.
151
+ * @param {number[]} imgDims The dimensions of the image (height, width, channels).
152
+ * @param {{width:number; height:number}|number} padSize The dimensions of the padded image.
153
+ * @param {Object} options The options for padding.
154
+ * @param {'constant'|'symmetric'} [options.mode='constant'] The type of padding to add.
155
+ * @param {boolean} [options.center=false] Whether to center the image.
156
+ * @param {number|number[]} [options.constant_values=0] The constant value to use for padding.
157
+ * @returns {[Float32Array, number[]]} The padded pixel data and image dimensions.
158
+ */
159
+ pad_image(pixelData: Float32Array, imgDims: number[], padSize: {
160
+ width: number;
161
+ height: number;
162
+ } | number, { mode, center, constant_values, }?: {
163
+ mode?: 'constant' | 'symmetric';
164
+ center?: boolean;
165
+ constant_values?: number | number[];
166
+ }): [Float32Array, number[]];
167
+ /**
168
+ * Rescale the image' pixel values by `this.rescale_factor`.
169
+ * @param {Float32Array} pixelData The pixel data to rescale.
170
+ * @returns {void}
171
+ */
172
+ rescale(pixelData: Float32Array): void;
173
+ /**
174
+ * Find the target (width, height) dimension of the output image after
175
+ * resizing given the input image and the desired size.
176
+ * @param {RawImage} image The image to resize.
177
+ * @param {any} size The size to use for resizing the image.
178
+ * @returns {[number, number]} The target (width, height) dimension of the output image after resizing.
179
+ */
180
+ get_resize_output_image_size(image: RawImage, size: any): [number, number];
181
+ /**
182
+ * Resizes the image.
183
+ * @param {RawImage} image The image to resize.
184
+ * @returns {Promise<RawImage>} The resized image.
185
+ */
186
+ resize(image: RawImage): Promise<RawImage>;
187
+ /**
188
+ * @typedef {object} PreprocessedImage
189
+ * @property {HeightWidth} original_size The original size of the image.
190
+ * @property {HeightWidth} reshaped_input_size The reshaped input size of the image.
191
+ * @property {Tensor} pixel_values The pixel values of the preprocessed image.
192
+ */
193
+ /**
194
+ * Preprocesses the given image.
195
+ *
196
+ * @param {RawImage} image The image to preprocess.
197
+ * @param {Object} overrides The overrides for the preprocessing options.
198
+ * @returns {Promise<PreprocessedImage>} The preprocessed image.
199
+ */
200
+ preprocess(image: RawImage, { do_normalize, do_pad, do_convert_rgb, do_convert_grayscale, do_flip_channel_order, }?: any): Promise<{
201
+ /**
202
+ * The original size of the image.
203
+ */
204
+ original_size: HeightWidth;
205
+ /**
206
+ * The reshaped input size of the image.
207
+ */
208
+ reshaped_input_size: HeightWidth;
209
+ /**
210
+ * The pixel values of the preprocessed image.
211
+ */
212
+ pixel_values: Tensor;
213
+ }>;
214
+ /**
215
+ * Calls the feature extraction process on an array of images,
216
+ * preprocesses each image, and concatenates the resulting
217
+ * features into a single Tensor.
218
+ * @param {RawImage[]} images The image(s) to extract features from.
219
+ * @param {...any} args Additional arguments.
220
+ * @returns {Promise<ImageProcessorResult>} An object containing the concatenated pixel values (and other metadata) of the preprocessed images.
221
+ */
222
+ _call(images: RawImage[], ...args: any[]): Promise<ImageProcessorResult>;
223
+ }
224
+ /**
225
+ * Named tuple to indicate the order we are using is (height x width),
226
+ * even though the Graphics' industry standard is (width x height).
227
+ */
228
+ export type HeightWidth = [height: number, width: number];
229
+ export type ImageProcessorResult = {
230
+ /**
231
+ * The pixel values of the batched preprocessed images.
232
+ */
233
+ pixel_values: Tensor;
234
+ /**
235
+ * Array of two-dimensional tuples like [[480, 640]].
236
+ */
237
+ original_sizes: HeightWidth[];
238
+ /**
239
+ * Array of two-dimensional tuples like [[1000, 1330]].
240
+ */
241
+ reshaped_input_sizes: HeightWidth[];
242
+ };
243
+ /**
244
+ * A configuration object used to create an image processor.
245
+ */
246
+ export type ImageProcessorConfig = {
247
+ /**
248
+ * If specified, this function will be called during model construction, to provide the user with progress updates.
249
+ */
250
+ progress_callback?: Function;
251
+ /**
252
+ * The mean values for image normalization.
253
+ */
254
+ image_mean?: number[];
255
+ /**
256
+ * The standard deviation values for image normalization.
257
+ */
258
+ image_std?: number[];
259
+ /**
260
+ * Whether to rescale the image pixel values to the [0,1] range.
261
+ */
262
+ do_rescale?: boolean;
263
+ /**
264
+ * The factor to use for rescaling the image pixel values.
265
+ */
266
+ rescale_factor?: number;
267
+ /**
268
+ * Whether to normalize the image pixel values.
269
+ */
270
+ do_normalize?: boolean;
271
+ /**
272
+ * Whether to resize the image.
273
+ */
274
+ do_resize?: boolean;
275
+ /**
276
+ * What method to use for resampling.
277
+ */
278
+ resample?: number;
279
+ /**
280
+ * The size to resize the image to.
281
+ */
282
+ size?: number | any;
283
+ /**
284
+ * The size to resize the image to (same as `size`).
285
+ */
286
+ image_size?: number | any;
287
+ /**
288
+ * Whether to flip the color channels from RGB to BGR.
289
+ * Can be overridden by the `do_flip_channel_order` parameter in the `preprocess` method.
290
+ */
291
+ do_flip_channel_order?: boolean;
292
+ /**
293
+ * Whether to center crop the image to the specified `crop_size`.
294
+ * Can be overridden by `do_center_crop` in the `preprocess` method.
295
+ */
296
+ do_center_crop?: boolean;
297
+ /**
298
+ * Whether to resize the image using thumbnail method.
299
+ */
300
+ do_thumbnail?: boolean;
301
+ /**
302
+ * If `true`, the image is resized to the largest possible size such that the aspect ratio is preserved.
303
+ * Can be overidden by `keep_aspect_ratio` in `preprocess`.
304
+ */
305
+ keep_aspect_ratio?: boolean;
306
+ /**
307
+ * If `do_resize` is `true`, the image is resized to a size that is a multiple of this value.
308
+ * Can be overidden by `ensure_multiple_of` in `preprocess`.
309
+ */
310
+ ensure_multiple_of?: number;
311
+ /**
312
+ * The mean values for image normalization (same as `image_mean`).
313
+ */
314
+ mean?: number[];
315
+ /**
316
+ * The standard deviation values for image normalization (same as `image_std`).
317
+ */
318
+ std?: number[];
319
+ };
320
+ import { Tensor } from "../utils/tensor.js";
321
+ import { RawImage } from "../utils/image.js";
322
+ export {};
323
+ //# sourceMappingURL=image_processors_utils.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"image_processors_utils.d.ts","sourceRoot":"","sources":["../../src/base/image_processors_utils.js"],"names":[],"mappings":"AA+EA;;;;;;;;;GASG;AACH;IAP2B,MAAM,EAAtB,MAAM;IACU,UAAU,EAA1B,MAAM;eACN,MAAM,iBACN,CAAC,MAAM,EAAE,MAAM,CAAC,EAAE,iBAClB,OAAO,GACN,KAAQ,CAwEnB;AAGD;;;;;;GAMG;AACH,gFAJW,CAAC,MAAM,EAAE,MAAM,CAAC,EAAE,GAEhB;IAAC,YAAY,EAAE,MAAM,CAAC;IAAC,MAAM,EAAE,MAAM,EAAE,CAAA;CAAC,EAAE,CAwDtD;AAkPD;;;;;;;;;GASG;AACH,6EAPW,MAAM,mBACN,MAAM,gCACN,MAAM,sBACN,IAAI,MAAM,CAAC,iBACX,CAAC,MAAM,EAAE,MAAM,CAAC,EAAE,GAChB,MAAM;IAAE,YAAY,EAAE,MAAM,CAAC;IAAC,aAAa,EAAE,MAAM;QAAC,EAAE,EAAE,MAAM,CAAC;QAAC,QAAQ,EAAE,MAAM,CAAC;QAAC,KAAK,EAAE,MAAM,CAAA;KAAC,CAAC,CAAA;CAAC,CAAC,CAuE/G;AAGD;;;;;;;GAOG;AACH,6EALW,MAAM,iBACN,CAAC,MAAM,EAAE,MAAM,CAAC,EAAE,GAEhB,MAAM;IAAE,YAAY,EAAE,MAAM,CAAC;IAAC,aAAa,EAAE,MAAM;QAAC,EAAE,EAAE,MAAM,CAAC;QAAC,QAAQ,EAAE,MAAM,CAAC;QAAC,KAAK,EAAE,MAAM,CAAA;KAAC,CAAC,CAAA;CAAC,CAAC,CAI/G;;;;;AAGD;;;;;;;;;;;;;;;;;;;;;;;;GAwBG;AAEH;IAmeI;;;;;;;;;;;;;;OAcG;IACH,sDATW,MAAM,WAKN,OAAO,iBAAiB,EAAE,iBAAiB,GAEzC,QAAQ,cAAc,CAAC,CAKnC;IAnfD;;;OAGG;IACH,oBAFW,oBAAoB,EAmC9B;IA9BG,qBAAkD;IAClD,oBAA+C;IAE/C,iBAAoC;IACpC,oBAA2C;IAC3C,uBAAwD;IACxD,sBAAuC;IAEvC,sBAAuC;IACvC,UAA4C;IAC5C,mBAA8D;IAC9D,uBAAwE;IAExE,wBAA2C;IAC3C,eAAiC;IACjC,oBAAmD;IACnD,oBAA2C;IAE3C,cAA+B;IAC/B,YAA2B;IAQ3B,+BAAkE;IAElE,6BAAoB;IAGxB;;;;;;;OAOG;IACH,iBALW,QAAQ;gBACA,MAAM;eAAQ,MAAM;kBAC5B,MAAM,GAAG,CAAC,GAAG,CAAC,GAAG,CAAC,GAAG,CAAC,GAAG,CAAC,GAAG,CAAC,GAC5B,QAAQ,QAAQ,CAAC,CAsB7B;IAGD;;;;;OAKG;IACH,mBAJW,QAAQ,mBACR,MAAM,GACJ,QAAQ,QAAQ,CAAC,CAiC7B;IAED;;;;;;;;;;OAUG;IACH,qBATW,YAAY,WACZ,MAAM,EAAE,WACR;QAAC,KAAK,EAAC,MAAM,CAAC;QAAC,MAAM,EAAC,MAAM,CAAA;KAAC,GAAC,MAAM;QAEH,IAAI,GAArC,UAAU,GAAC,WAAW;QACJ,MAAM,GAAxB,OAAO;QACmB,eAAe,GAAzC,MAAM,GAAC,MAAM,EAAE;QACb,CAAC,YAAY,EAAE,MAAM,EAAE,CAAC,CA6EpC;IAED;;;;OAIG;IACH,mBAHW,YAAY,GACV,IAAI,CAMhB;IAED;;;;;;OAMG;IACH,oCAJW,QAAQ,QACR,GAAG,GACD,CAAC,MAAM,EAAE,MAAM,CAAC,CA4F5B;IAED;;;;OAIG;IACH,cAHW,QAAQ,GACN,QAAQ,QAAQ,CAAC,CAO7B;IAED;;;;;OAKG;IAEH;;;;;;OAMG;IACH,kBAJW,QAAQ;;;;;;;;;;;;sBANL,MAAM;OA8HnB;IAED;;;;;;;OAOG;IACH,cAJW,QAAQ,EAAE,WACP,GAAG,KACJ,QAAQ,oBAAoB,CAAC,CAqBzC;CAsBJ;;;;;0BArjCY,CAAC,MAAM,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,CAAC;;;;;kBAM9B,MAAM;;;;oBACN,aAAa;;;;0BACb,aAAa;;;;;;;;;;;;;iBAgiBb,MAAM,EAAE;;;;gBACR,MAAM,EAAE;;;;iBACR,OAAO;;;;qBACP,MAAM;;;;mBACN,OAAO;;;;gBACP,OAAO;;;;eACP,MAAM;;;;WACN,MAAM,MAAO;;;;iBACb,MAAM,MAAO;;;;;4BACb,OAAO;;;;;qBAEP,OAAO;;;;mBAEP,OAAO;;;;;wBACP,OAAO;;;;;yBAEP,MAAM;;;;WAGN,MAAM,EAAE;;;;UACR,MAAM,EAAE;;uBAtkBqB,oBAAoB;yBAEtC,mBAAmB"}
@@ -0,0 +1,80 @@
1
+ declare const Processor_base: new () => {
2
+ (...args: any[]): any;
3
+ _call(...args: any[]): any;
4
+ };
5
+ /**
6
+ * @typedef {Object} ProcessorProperties Additional processor-specific properties.
7
+ * @typedef {import('../utils/hub.js').PretrainedOptions & ProcessorProperties} PretrainedProcessorOptions
8
+ */
9
+ /**
10
+ * Represents a Processor that extracts features from an input.
11
+ */
12
+ export class Processor extends Processor_base {
13
+ static classes: string[];
14
+ static uses_processor_config: boolean;
15
+ /**
16
+ * Instantiate one of the processor classes of the library from a pretrained model.
17
+ *
18
+ * The processor class to instantiate is selected based on the `feature_extractor_type` property of the config object
19
+ * (either passed as an argument or loaded from `pretrained_model_name_or_path` if possible)
20
+ *
21
+ * @param {string} pretrained_model_name_or_path The name or path of the pretrained model. Can be either:
22
+ * - A string, the *model id* of a pretrained processor hosted inside a model repo on huggingface.co.
23
+ * Valid model ids can be located at the root-level, like `bert-base-uncased`, or namespaced under a
24
+ * user or organization name, like `dbmdz/bert-base-german-cased`.
25
+ * - A path to a *directory* containing processor files, e.g., `./my_model_directory/`.
26
+ * @param {PretrainedProcessorOptions} options Additional options for loading the processor.
27
+ *
28
+ * @returns {Promise<Processor>} A new instance of the Processor class.
29
+ */
30
+ static from_pretrained(pretrained_model_name_or_path: string, options: PretrainedProcessorOptions): Promise<Processor>;
31
+ /**
32
+ * Creates a new Processor with the given components
33
+ * @param {Object} config
34
+ * @param {Record<string, Object>} components
35
+ */
36
+ constructor(config: any, components: Record<string, any>);
37
+ config: any;
38
+ components: Record<string, any>;
39
+ /**
40
+ * @returns {import('./image_processors_utils.js').ImageProcessor|undefined} The image processor of the processor, if it exists.
41
+ */
42
+ get image_processor(): import("./image_processors_utils.js").ImageProcessor;
43
+ /**
44
+ * @returns {import('../tokenizers.js').PreTrainedTokenizer|undefined} The tokenizer of the processor, if it exists.
45
+ */
46
+ get tokenizer(): import("../tokenizers.js").PreTrainedTokenizer;
47
+ /**
48
+ * @returns {import('./feature_extraction_utils.js').FeatureExtractor|undefined} The feature extractor of the processor, if it exists.
49
+ */
50
+ get feature_extractor(): import("./feature_extraction_utils.js").FeatureExtractor;
51
+ apply_chat_template(messages: any, options?: {}): string | number[] | number[][] | import("../transformers.js").Tensor | {
52
+ /**
53
+ * List of token ids to be fed to a model.
54
+ */
55
+ input_ids: number[] | number[][] | import("../transformers.js").Tensor;
56
+ /**
57
+ * List of indices specifying which tokens should be attended to by the model.
58
+ */
59
+ attention_mask: number[] | number[][] | import("../transformers.js").Tensor;
60
+ /**
61
+ * List of token type ids to be fed to a model.
62
+ */
63
+ token_type_ids?: number[] | number[][] | import("../transformers.js").Tensor;
64
+ };
65
+ batch_decode(...args: any[]): string[];
66
+ /**
67
+ * Calls the feature_extractor function with the given input.
68
+ * @param {any} input The input to extract features from.
69
+ * @param {...any} args Additional arguments.
70
+ * @returns {Promise<any>} A Promise that resolves with the extracted features.
71
+ */
72
+ _call(input: any, ...args: any[]): Promise<any>;
73
+ }
74
+ /**
75
+ * Additional processor-specific properties.
76
+ */
77
+ export type ProcessorProperties = any;
78
+ export type PretrainedProcessorOptions = import('../utils/hub.js').PretrainedOptions & ProcessorProperties;
79
+ export {};
80
+ //# sourceMappingURL=processing_utils.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"processing_utils.d.ts","sourceRoot":"","sources":["../../src/base/processing_utils.js"],"names":[],"mappings":";;;;AA2BA;;;GAGG;AAGH;;GAEG;AACH;IACI,yBAIC;IACD,sCAAqC;IAoErC;;;;;;;;;;;;;;OAcG;IACH,sDATW,MAAM,WAKN,0BAA0B,GAExB,QAAQ,SAAS,CAAC,CAoB9B;IAnGD;;;;OAIG;IACH,qCAFW,OAAO,MAAM,MAAS,EAMhC;IAFG,YAAoB;IACpB,gCAA4B;IAGhC;;OAEG;IACH,4EAEC;IAED;;OAEG;IACH,gEAEC;IAED;;OAEG;IACH,kFAEC;IAED;;;;;;;;;;;;;MAQC;IAED,uCAKC;IAGD;;;;;OAKG;IACH,aAJW,GAAG,WACA,GAAG,KACJ,QAAQ,GAAG,CAAC,CASxB;CAqCJ;;;;;yCAnHY,OAAO,iBAAiB,EAAE,iBAAiB,GAAG,mBAAmB"}
@@ -3,8 +3,9 @@
3
3
  * @param {PretrainedConfig} config
4
4
  * @returns {Record<string, number[]>}
5
5
  */
6
- export function getKeyValueShapes(config: PretrainedConfig, { prefix, }?: {
6
+ export function getKeyValueShapes(config: PretrainedConfig, { prefix, batch_size, }?: {
7
7
  prefix?: string;
8
+ batch_size?: number;
8
9
  }): Record<string, number[]>;
9
10
  /**
10
11
  * Base class for all configuration classes. For more information, see the corresponding
@@ -55,6 +56,8 @@ export class AutoConfig {
55
56
  static from_pretrained(pretrained_model_name_or_path: string, { progress_callback, config, cache_dir, local_files_only, revision, }?: import("./utils/hub.js").PretrainedOptions): Promise<PretrainedConfig>;
56
57
  }
57
58
  export type PretrainedOptions = import('./utils/hub.js').PretrainedOptions;
59
+ export type ProgressCallback = import('./utils/core.js').ProgressCallback;
60
+ export type ProgressInfo = import('./utils/core.js').ProgressInfo;
58
61
  /**
59
62
  * Transformers.js-specific configuration, possibly present in config.json under the key `transformers.js_config`.
60
63
  */
@@ -1 +1 @@
1
- {"version":3,"file":"configs.d.ts","sourceRoot":"","sources":["../src/configs.js"],"names":[],"mappings":"AAmNA;;;;GAIG;AACH,0CAHW,gBAAgB;;IACd,OAAO,MAAM,EAAE,MAAM,EAAE,CAAC,CA6EpC;AACD;;;GAGG;AACH;IAwBI;;;;;;;;OAQG;IACH,sDANW,MAAM,0EACN,iBAAiB,GAGf,QAAQ,gBAAgB,CAAC,CAqBrC;IArCD;;;OAGG;IACH,6BAGC;IAnBD,0BAA0B;IAC1B,YADW,MAAM,GAAC,IAAI,CACJ;IAElB,sBAAsB;IACtB,oBADW,OAAO,CACS;IAE3B,qBAAqB;IACrB,yBADW,MAAM,CACO;IAExB,mCAAmC;IACnC,0BADW,oBAAoB,CACN;IAQrB,uBAAkD;CAgCzD;AAED;;;;;GAKG;AACH;IArCI;;;;;;;;OAQG;IACH,6MAmBC;CAcJ;gCAvUY,OAAO,gBAAgB,EAAE,iBAAiB;;;;;;;;qBA4UzC,OAAO,mBAAmB,EAAE,QAAQ,GAAC,OAAO,OAAO,mBAAmB,EAAE,QAAQ,EAAE,OAAO,mBAAmB,EAAE,QAAQ,CAAC;;;;;;+BACvH,OAAO,MAAM,EAAE,MAAM,CAAC;;;;aAGtB,OAAO,oBAAoB,EAAE,UAAU;;;;YACvC,OAAO,mBAAmB,EAAE,QAAQ;;;;+BACpC,OAAO,GAAC,OAAO,MAAM,EAAE,OAAO,CAAC"}
1
+ {"version":3,"file":"configs.d.ts","sourceRoot":"","sources":["../src/configs.js"],"names":[],"mappings":"AAiOA;;;;GAIG;AACH,0CAHW,gBAAgB;;;IACd,OAAO,MAAM,EAAE,MAAM,EAAE,CAAC,CA2EpC;AACD;;;GAGG;AACH;IAwBI;;;;;;;;OAQG;IACH,sDANW,MAAM,0EACN,iBAAiB,GAGf,QAAQ,gBAAgB,CAAC,CAqBrC;IArCD;;;OAGG;IACH,6BAGC;IAnBD,0BAA0B;IAC1B,YADW,MAAM,GAAC,IAAI,CACJ;IAElB,sBAAsB;IACtB,oBADW,OAAO,CACS;IAE3B,qBAAqB;IACrB,yBADW,MAAM,CACO;IAExB,mCAAmC;IACnC,0BADW,oBAAoB,CACN;IAQrB,uBAAkD;CAgCzD;AAED;;;;;GAKG;AACH;IArCI;;;;;;;;OAQG;IACH,6MAmBC;CAcJ;gCAnVY,OAAO,gBAAgB,EAAE,iBAAiB;+BAI1C,OAAO,iBAAiB,EAAE,gBAAgB;2BAI1C,OAAO,iBAAiB,EAAE,YAAY;;;;;;;;qBAgVrC,OAAO,mBAAmB,EAAE,QAAQ,GAAC,OAAO,OAAO,mBAAmB,EAAE,QAAQ,EAAE,OAAO,mBAAmB,EAAE,QAAQ,CAAC;;;;;;+BACvH,OAAO,MAAM,EAAE,MAAM,CAAC;;;;aAGtB,OAAO,oBAAoB,EAAE,UAAU;;;;YACvC,OAAO,mBAAmB,EAAE,QAAQ;;;;+BACpC,OAAO,GAAC,OAAO,MAAM,EAAE,OAAO,CAAC"}
@@ -1 +1 @@
1
- {"version":3,"file":"env.d.ts","sourceRoot":"","sources":["../src/env.js"],"names":[],"mappings":"AA0CA;;GAEG;AACH;IACI,sDAAsD;;IAGtD,yDAAyD;;IAGzD,yCAAyC;;IAGzC,0CAA0C;;IAG1C,yCAAyC;;IAGzC,mDAAmD;;IAGnD,sDAAsD;;IAGtD,8CAA8C;;IAG9C,wCAAwC;;GAEzC;AAkBH;;;;;;;;;;;;;;;;;;;;GAoBG;AAEH,sCAAsC;AACtC,kBADW,uBAAuB,CA6BjC;;;;;;;;aAhDa,MAAM;;;;;cACN;QAAC,IAAI,EAAE,QAAQ,OAAO,oBAAoB,EAAE,GAAG,CAAC,CAAA;KAAC;;;;;uBAEjD,OAAO;;;;gBAEP,MAAM;;;;wBACN,MAAM;;;;;sBACN,OAAO;;;;oBAEP,MAAM;;;;WACN,OAAO;;;;qBACP,OAAO;;;;gBACP,OAAO;;;;cACP,MAAM;;;;oBACN,OAAO"}
1
+ {"version":3,"file":"env.d.ts","sourceRoot":"","sources":["../src/env.js"],"names":[],"mappings":"AA0CA;;GAEG;AACH;IACI,sDAAsD;;IAGtD,yDAAyD;;IAGzD,yCAAyC;;IAGzC,0CAA0C;;IAG1C,yCAAyC;;IAGzC,mDAAmD;;IAGnD,sDAAsD;;IAGtD,8CAA8C;;IAG9C,wCAAwC;;GAEzC;AA6BH;;;;;;;;;;;;;;;;;;;;GAoBG;AAEH,sCAAsC;AACtC,kBADW,uBAAuB,CA6BjC;;;;;;;;aAhDa,MAAM;;;;;cACN;QAAC,IAAI,EAAE,QAAQ,OAAO,oBAAoB,EAAE,GAAG,CAAC,CAAA;KAAC;;;;;uBAEjD,OAAO;;;;gBAEP,MAAM;;;;wBACN,MAAM;;;;;sBACN,OAAO;;;;oBAEP,MAAM;;;;WACN,OAAO;;;;qBACP,OAAO;;;;gBACP,OAAO;;;;cACP,MAAM;;;;oBACN,OAAO"}
@@ -0,0 +1,25 @@
1
+ export class ASTFeatureExtractor extends FeatureExtractor {
2
+ constructor(config: any);
3
+ mel_filters: number[][];
4
+ window: Float64Array;
5
+ mean: any;
6
+ std: any;
7
+ /**
8
+ * Computes the log-Mel spectrogram of the provided audio waveform.
9
+ * @param {Float32Array|Float64Array} waveform The audio waveform to process.
10
+ * @param {number} max_length The maximum number of frames to return.
11
+ * @returns {Promise<Tensor>} An object containing the log-Mel spectrogram data as a Float32Array and its dimensions as an array of numbers.
12
+ */
13
+ _extract_fbank_features(waveform: Float32Array | Float64Array, max_length: number): Promise<Tensor>;
14
+ /**
15
+ * Asynchronously extracts features from a given audio using the provided configuration.
16
+ * @param {Float32Array|Float64Array} audio The audio data as a Float32Array/Float64Array.
17
+ * @returns {Promise<{ input_values: Tensor }>} A Promise resolving to an object containing the extracted input features as a Tensor.
18
+ */
19
+ _call(audio: Float32Array | Float64Array): Promise<{
20
+ input_values: Tensor;
21
+ }>;
22
+ }
23
+ import { FeatureExtractor } from '../../base/feature_extraction_utils.js';
24
+ import { Tensor } from '../../utils/tensor.js';
25
+ //# sourceMappingURL=feature_extraction_audio_spectrogram_transformer.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"feature_extraction_audio_spectrogram_transformer.d.ts","sourceRoot":"","sources":["../../../src/models/audio_spectrogram_transformer/feature_extraction_audio_spectrogram_transformer.js"],"names":[],"mappings":"AAKA;IAEI,yBA2BC;IARG,wBAA8B;IAE9B,qBAEE;IAEF,UAA4B;IAC5B,SAA0B;IAG9B;;;;;OAKG;IACH,kCAJW,YAAY,GAAC,YAAY,cACzB,MAAM,GACJ,QAAQ,MAAM,CAAC,CAwB3B;IAGD;;;;OAIG;IACH,aAHW,YAAY,GAAC,YAAY,GACvB,QAAQ;QAAE,YAAY,EAAE,MAAM,CAAA;KAAE,CAAC,CAkB7C;CACJ;iCAzFuD,wCAAwC;uBACzE,uBAAuB"}
@@ -0,0 +1,5 @@
1
+ export class AutoFeatureExtractor {
2
+ static from_pretrained(pretrained_model_name_or_path: string, options: import("../../utils/hub.js").PretrainedOptions): Promise<FeatureExtractor>;
3
+ }
4
+ import { FeatureExtractor } from '../../base/feature_extraction_utils.js';
5
+ //# sourceMappingURL=feature_extraction_auto.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"feature_extraction_auto.d.ts","sourceRoot":"","sources":["../../../src/models/auto/feature_extraction_auto.js"],"names":[],"mappings":"AAMA;IAKsD,kJAoBH;CASlD;iCArCgC,wCAAwC"}
@@ -0,0 +1,5 @@
1
+ export class AutoImageProcessor {
2
+ static from_pretrained(pretrained_model_name_or_path: string, options: import("../../utils/hub.js").PretrainedOptions): Promise<ImageProcessor>;
3
+ }
4
+ import { ImageProcessor } from '../../base/image_processors_utils.js';
5
+ //# sourceMappingURL=image_processing_auto.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"image_processing_auto.d.ts","sourceRoot":"","sources":["../../../src/models/auto/image_processing_auto.js"],"names":[],"mappings":"AAMA;IAuBkkwC,gJAAqyC;CADt2yC;+BAzB8B,sCAAsC"}
@@ -0,0 +1,35 @@
1
+ /**
2
+ * Helper class which is used to instantiate pretrained processors with the `from_pretrained` function.
3
+ * The chosen processor class is determined by the type specified in the processor config.
4
+ *
5
+ * **Example:** Load a processor using `from_pretrained`.
6
+ * ```javascript
7
+ * let processor = await AutoProcessor.from_pretrained('openai/whisper-tiny.en');
8
+ * ```
9
+ *
10
+ * **Example:** Run an image through a processor.
11
+ * ```javascript
12
+ * let processor = await AutoProcessor.from_pretrained('Xenova/clip-vit-base-patch16');
13
+ * let image = await RawImage.read('https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/football-match.jpg');
14
+ * let image_inputs = await processor(image);
15
+ * // {
16
+ * // "pixel_values": {
17
+ * // "dims": [ 1, 3, 224, 224 ],
18
+ * // "type": "float32",
19
+ * // "data": Float32Array [ -1.558687686920166, -1.558687686920166, -1.5440893173217773, ... ],
20
+ * // "size": 150528
21
+ * // },
22
+ * // "original_sizes": [
23
+ * // [ 533, 800 ]
24
+ * // ],
25
+ * // "reshaped_input_sizes": [
26
+ * // [ 224, 224 ]
27
+ * // ]
28
+ * // }
29
+ * ```
30
+ */
31
+ export class AutoProcessor {
32
+ static from_pretrained(pretrained_model_name_or_path: string, options: any): Promise<Processor>;
33
+ }
34
+ import { Processor } from '../../base/processing_utils.js';
35
+ //# sourceMappingURL=processing_auto.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"processing_auto.d.ts","sourceRoot":"","sources":["../../../src/models/auto/processing_auto.js"],"names":[],"mappings":"AAUA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA6BG;AACH;IAoCuB,gGAwB8qB;CADpsB;0BA/FyB,gCAAgC"}
@@ -0,0 +1,4 @@
1
+ export class BeitFeatureExtractor extends ImageProcessor {
2
+ }
3
+ import { ImageProcessor } from "../../base/image_processors_utils.js";
4
+ //# sourceMappingURL=image_processing_beit.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"image_processing_beit.d.ts","sourceRoot":"","sources":["../../../src/models/beit/image_processing_beit.js"],"names":[],"mappings":"AAIA;CAA4D;+BAFrD,sCAAsC"}
@@ -0,0 +1,4 @@
1
+ export class BitImageProcessor extends ImageProcessor {
2
+ }
3
+ import { ImageProcessor } from "../../base/image_processors_utils.js";
4
+ //# sourceMappingURL=image_processing_bit.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"image_processing_bit.d.ts","sourceRoot":"","sources":["../../../src/models/bit/image_processing_bit.js"],"names":[],"mappings":"AAIA;CAAyD;+BAFlD,sCAAsC"}
@@ -0,0 +1,4 @@
1
+ export class ChineseCLIPFeatureExtractor extends ImageProcessor {
2
+ }
3
+ import { ImageProcessor } from "../../base/image_processors_utils.js";
4
+ //# sourceMappingURL=image_processing_chinese_clip.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"image_processing_chinese_clip.d.ts","sourceRoot":"","sources":["../../../src/models/chinese_clip/image_processing_chinese_clip.js"],"names":[],"mappings":"AAIA;CAAmE;+BAF5D,sCAAsC"}
@@ -0,0 +1,57 @@
1
+ export class ClapFeatureExtractor extends FeatureExtractor {
2
+ constructor(config: any);
3
+ mel_filters: number[][];
4
+ mel_filters_slaney: number[][];
5
+ window: Float64Array;
6
+ /**
7
+ * Extracts the mel spectrogram and prepares it for the mode based on the `truncation` and `padding` arguments.
8
+ *
9
+ * Four different path are possible:
10
+ * - `truncation="fusion"` and the length of the waveform is greater than the max length: the mel spectrogram
11
+ * will be computed on the entire audio. 3 random crops and a dowsampled version of the full mel spectrogram
12
+ * are then stacked together. They will later be used for `feature_fusion`.
13
+ * - `truncation="rand_trunc"` and the length of the waveform is smaller than the max length: the audio is
14
+ * padded based on `padding`.
15
+ * - `truncation="fusion"` and the length of the waveform is smaller than the max length: the audio is padded
16
+ * based on `padding`, and is repeated `4` times.
17
+ * - `truncation="rand_trunc"` and the length of the waveform is greater than the max length: the mel
18
+ * spectrogram will be computed on a random crop of the waveform.
19
+ *
20
+ * @param {Float32Array|Float64Array} waveform The input waveform.
21
+ * @param {number} max_length The maximum length of the waveform.
22
+ * @param {string} truncation The truncation strategy to use.
23
+ * @param {string} padding The padding strategy to use.
24
+ * @returns {Promise<Tensor>} An object containing the mel spectrogram data as a Float32Array, its dimensions as an array of numbers, and a boolean indicating whether the waveform was longer than the max length.
25
+ * @private
26
+ */
27
+ private _get_input_mel;
28
+ /**
29
+ * Compute the log-mel spectrogram of the provided `waveform` using the Hann window.
30
+ * In CLAP, two different filter banks are used depending on the truncation pattern:
31
+ * - `self.mel_filters`: they correspond to the default parameters of `torchaudio` which can be obtained from
32
+ * calling `torchaudio.transforms.MelSpectrogram().mel_scale.fb`. These filters are used when `truncation`
33
+ * is set to `"fusion"`.
34
+ * - `self.mel_filteres_slaney` : they correspond to the default parameters of `librosa` which used
35
+ * `librosa.filters.mel` when computing the mel spectrogram. These filters were only used in the original
36
+ * implementation when the truncation mode is not `"fusion"`.
37
+ *
38
+ * @param {Float32Array|Float64Array} waveform The audio waveform to process.
39
+ * @param {number[][]} mel_filters The mel filters to use.
40
+ * @param {number} [max_length=null] The maximum number of frames to return.
41
+ * @returns {Promise<Tensor>} An object containing the log-Mel spectrogram data as a Float32Array and its dimensions as an array of numbers.
42
+ */
43
+ _extract_fbank_features(waveform: Float32Array | Float64Array, mel_filters: number[][], max_length?: number): Promise<Tensor>;
44
+ /**
45
+ * Asynchronously extracts features from a given audio using the provided configuration.
46
+ * @param {Float32Array|Float64Array} audio The audio data as a Float32Array/Float64Array.
47
+ * @returns {Promise<{ input_features: Tensor }>} A Promise resolving to an object containing the extracted input features as a Tensor.
48
+ */
49
+ _call(audio: Float32Array | Float64Array, { max_length, }?: {
50
+ max_length?: any;
51
+ }): Promise<{
52
+ input_features: Tensor;
53
+ }>;
54
+ }
55
+ import { FeatureExtractor } from '../../base/feature_extraction_utils.js';
56
+ import { Tensor } from '../../utils/tensor.js';
57
+ //# sourceMappingURL=feature_extraction_clap.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"feature_extraction_clap.d.ts","sourceRoot":"","sources":["../../../src/models/clap/feature_extraction_clap.js"],"names":[],"mappings":"AAKA;IAEI,yBAyBC;IAtBG,wBAQC;IAED,+BAQC;IAED,qBAAkE;IAKtE;;;;;;;;;;;;;;;;;;;;OAoBG;IACH,uBA0CC;IAED;;;;;;;;;;;;;;OAcG;IACH,kCALW,YAAY,GAAC,YAAY,eACzB,MAAM,EAAE,EAAE,eACV,MAAM,GACJ,QAAQ,MAAM,CAAC,CAoB3B;IAGD;;;;OAIG;IACH,aAHW,YAAY,GAAC,YAAY;;QACvB,QAAQ;QAAE,cAAc,EAAE,MAAM,CAAA;KAAE,CAAC,CAkB/C;CACJ;iCA9JuD,wCAAwC;uBACzE,uBAAuB"}
@@ -0,0 +1,6 @@
1
+ export class CLIPImageProcessor extends ImageProcessor {
2
+ }
3
+ export class CLIPFeatureExtractor extends CLIPImageProcessor {
4
+ }
5
+ import { ImageProcessor } from "../../base/image_processors_utils.js";
6
+ //# sourceMappingURL=image_processing_clip.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"image_processing_clip.d.ts","sourceRoot":"","sources":["../../../src/models/clip/image_processing_clip.js"],"names":[],"mappings":"AAIA;CAA0D;AAC1D;CAAgE;+BAHzD,sCAAsC"}