@huggingface/transformers 3.0.1 → 3.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (239) hide show
  1. package/README.md +14 -4
  2. package/dist/ort-wasm-simd-threaded.jsep.wasm +0 -0
  3. package/dist/transformers.cjs +16607 -13472
  4. package/dist/transformers.cjs.map +1 -1
  5. package/dist/transformers.js +16601 -13451
  6. package/dist/transformers.js.map +1 -1
  7. package/dist/transformers.min.cjs +238 -52
  8. package/dist/transformers.min.cjs.map +1 -1
  9. package/dist/transformers.min.js +229 -43
  10. package/dist/transformers.min.js.map +1 -1
  11. package/dist/transformers.min.mjs +240 -54
  12. package/dist/transformers.min.mjs.map +1 -1
  13. package/dist/transformers.mjs +16017 -12878
  14. package/dist/transformers.mjs.map +1 -1
  15. package/package.json +7 -7
  16. package/src/base/feature_extraction_utils.js +54 -0
  17. package/src/base/image_processors_utils.js +1089 -0
  18. package/src/base/processing_utils.js +145 -0
  19. package/src/configs.js +15 -3
  20. package/src/env.js +15 -4
  21. package/src/models/audio_spectrogram_transformer/feature_extraction_audio_spectrogram_transformer.js +90 -0
  22. package/src/models/auto/feature_extraction_auto.js +41 -0
  23. package/src/models/auto/image_processing_auto.js +29 -0
  24. package/src/models/auto/processing_auto.js +100 -0
  25. package/src/models/beit/image_processing_beit.js +5 -0
  26. package/src/models/bit/image_processing_bit.js +5 -0
  27. package/src/models/chinese_clip/image_processing_chinese_clip.js +5 -0
  28. package/src/models/clap/feature_extraction_clap.js +159 -0
  29. package/src/models/clip/image_processing_clip.js +6 -0
  30. package/src/models/convnext/image_processing_convnext.js +45 -0
  31. package/src/models/deit/image_processing_deit.js +6 -0
  32. package/src/models/detr/image_processing_detr.js +52 -0
  33. package/src/models/donut/image_processing_donut.js +31 -0
  34. package/src/models/dpt/image_processing_dpt.js +6 -0
  35. package/src/models/efficientnet/image_processing_efficientnet.js +13 -0
  36. package/src/models/feature_extractors.js +12 -0
  37. package/src/models/florence2/processing_florence2.js +128 -0
  38. package/src/models/glpn/image_processing_glpn.js +5 -0
  39. package/src/models/image_processors.js +36 -0
  40. package/src/models/janus/image_processing_janus.js +26 -0
  41. package/src/models/janus/processing_janus.js +123 -0
  42. package/src/models/jina_clip/image_processing_jina_clip.js +26 -0
  43. package/src/models/jina_clip/processing_jina_clip.js +24 -0
  44. package/src/models/llava_onevision/image_processing_llava_onevision.js +5 -0
  45. package/src/models/mask2former/image_processing_mask2former.js +5 -0
  46. package/src/models/maskformer/image_processing_maskformer.js +18 -0
  47. package/src/models/mgp_str/processing_mgp_str.js +170 -0
  48. package/src/models/mobilenet_v1/image_processing_mobilenet_v1.js +7 -0
  49. package/src/models/mobilenet_v2/image_processing_mobilenet_v2.js +7 -0
  50. package/src/models/mobilenet_v3/image_processing_mobilenet_v3.js +7 -0
  51. package/src/models/mobilenet_v4/image_processing_mobilenet_v4.js +7 -0
  52. package/src/models/mobilevit/image_processing_mobilevit.js +6 -0
  53. package/src/models/nougat/image_processing_nougat.js +5 -0
  54. package/src/models/owlv2/image_processing_owlv2.js +5 -0
  55. package/src/models/owlvit/image_processing_owlvit.js +12 -0
  56. package/src/models/owlvit/processing_owlvit.js +7 -0
  57. package/src/models/processors.js +11 -0
  58. package/src/models/pvt/image_processing_pvt.js +5 -0
  59. package/src/models/pyannote/feature_extraction_pyannote.js +28 -0
  60. package/src/models/pyannote/processing_pyannote.js +71 -0
  61. package/src/models/qwen2_vl/image_processing_qwen2_vl.js +52 -0
  62. package/src/models/qwen2_vl/processing_qwen2_vl.js +52 -0
  63. package/src/models/rt_detr/image_processing_rt_detr.js +12 -0
  64. package/src/models/sam/image_processing_sam.js +242 -0
  65. package/src/models/sam/processing_sam.js +20 -0
  66. package/src/models/sapiens/image_processing_sapiens.js +13 -0
  67. package/src/models/seamless_m4t/feature_extraction_seamless_m4t.js +180 -0
  68. package/src/models/segformer/image_processing_segformer.js +13 -0
  69. package/src/models/siglip/image_processing_siglip.js +5 -0
  70. package/src/models/speecht5/feature_extraction_speecht5.js +4 -0
  71. package/src/models/speecht5/processing_speecht5.js +17 -0
  72. package/src/models/swin2sr/image_processing_swin2sr.js +24 -0
  73. package/src/models/vit/image_processing_vit.js +7 -0
  74. package/src/models/vitmatte/image_processing_vitmatte.js +50 -0
  75. package/src/models/vitpose/image_processing_vitpose.js +89 -0
  76. package/src/models/wav2vec2/feature_extraction_wav2vec2.js +44 -0
  77. package/src/models/wav2vec2/processing_wav2vec2.js +15 -0
  78. package/src/models/wespeaker/feature_extraction_wespeaker.js +100 -0
  79. package/src/models/whisper/feature_extraction_whisper.js +84 -0
  80. package/src/models/whisper/processing_whisper.js +21 -0
  81. package/src/models/yolos/image_processing_yolos.js +12 -0
  82. package/src/models.js +695 -32
  83. package/src/pipelines.js +8 -8
  84. package/src/tokenizers.js +5 -0
  85. package/src/transformers.js +15 -2
  86. package/src/utils/constants.js +8 -1
  87. package/src/utils/core.js +37 -9
  88. package/src/utils/hub.js +2 -1
  89. package/src/utils/image.js +68 -17
  90. package/src/utils/tensor.js +33 -1
  91. package/types/base/feature_extraction_utils.d.ts +41 -0
  92. package/types/base/feature_extraction_utils.d.ts.map +1 -0
  93. package/types/base/image_processors_utils.d.ts +323 -0
  94. package/types/base/image_processors_utils.d.ts.map +1 -0
  95. package/types/base/processing_utils.d.ts +80 -0
  96. package/types/base/processing_utils.d.ts.map +1 -0
  97. package/types/configs.d.ts +4 -1
  98. package/types/configs.d.ts.map +1 -1
  99. package/types/env.d.ts.map +1 -1
  100. package/types/models/audio_spectrogram_transformer/feature_extraction_audio_spectrogram_transformer.d.ts +25 -0
  101. package/types/models/audio_spectrogram_transformer/feature_extraction_audio_spectrogram_transformer.d.ts.map +1 -0
  102. package/types/models/auto/feature_extraction_auto.d.ts +5 -0
  103. package/types/models/auto/feature_extraction_auto.d.ts.map +1 -0
  104. package/types/models/auto/image_processing_auto.d.ts +5 -0
  105. package/types/models/auto/image_processing_auto.d.ts.map +1 -0
  106. package/types/models/auto/processing_auto.d.ts +35 -0
  107. package/types/models/auto/processing_auto.d.ts.map +1 -0
  108. package/types/models/beit/image_processing_beit.d.ts +4 -0
  109. package/types/models/beit/image_processing_beit.d.ts.map +1 -0
  110. package/types/models/bit/image_processing_bit.d.ts +4 -0
  111. package/types/models/bit/image_processing_bit.d.ts.map +1 -0
  112. package/types/models/chinese_clip/image_processing_chinese_clip.d.ts +4 -0
  113. package/types/models/chinese_clip/image_processing_chinese_clip.d.ts.map +1 -0
  114. package/types/models/clap/feature_extraction_clap.d.ts +57 -0
  115. package/types/models/clap/feature_extraction_clap.d.ts.map +1 -0
  116. package/types/models/clip/image_processing_clip.d.ts +6 -0
  117. package/types/models/clip/image_processing_clip.d.ts.map +1 -0
  118. package/types/models/convnext/image_processing_convnext.d.ts +12 -0
  119. package/types/models/convnext/image_processing_convnext.d.ts.map +1 -0
  120. package/types/models/deit/image_processing_deit.d.ts +6 -0
  121. package/types/models/deit/image_processing_deit.d.ts.map +1 -0
  122. package/types/models/detr/image_processing_detr.d.ts +42 -0
  123. package/types/models/detr/image_processing_detr.d.ts.map +1 -0
  124. package/types/models/donut/image_processing_donut.d.ts +7 -0
  125. package/types/models/donut/image_processing_donut.d.ts.map +1 -0
  126. package/types/models/dpt/image_processing_dpt.d.ts +6 -0
  127. package/types/models/dpt/image_processing_dpt.d.ts.map +1 -0
  128. package/types/models/efficientnet/image_processing_efficientnet.d.ts +6 -0
  129. package/types/models/efficientnet/image_processing_efficientnet.d.ts.map +1 -0
  130. package/types/models/feature_extractors.d.ts +10 -0
  131. package/types/models/feature_extractors.d.ts.map +1 -0
  132. package/types/models/florence2/processing_florence2.d.ts +39 -0
  133. package/types/models/florence2/processing_florence2.d.ts.map +1 -0
  134. package/types/models/glpn/image_processing_glpn.d.ts +4 -0
  135. package/types/models/glpn/image_processing_glpn.d.ts.map +1 -0
  136. package/types/models/image_processors.d.ts +36 -0
  137. package/types/models/image_processors.d.ts.map +1 -0
  138. package/types/models/janus/image_processing_janus.d.ts +7 -0
  139. package/types/models/janus/image_processing_janus.d.ts.map +1 -0
  140. package/types/models/janus/processing_janus.d.ts +77 -0
  141. package/types/models/janus/processing_janus.d.ts.map +1 -0
  142. package/types/models/jina_clip/image_processing_jina_clip.d.ts +5 -0
  143. package/types/models/jina_clip/image_processing_jina_clip.d.ts.map +1 -0
  144. package/types/models/jina_clip/processing_jina_clip.d.ts +9 -0
  145. package/types/models/jina_clip/processing_jina_clip.d.ts.map +1 -0
  146. package/types/models/llava_onevision/image_processing_llava_onevision.d.ts +4 -0
  147. package/types/models/llava_onevision/image_processing_llava_onevision.d.ts.map +1 -0
  148. package/types/models/mask2former/image_processing_mask2former.d.ts +4 -0
  149. package/types/models/mask2former/image_processing_mask2former.d.ts.map +1 -0
  150. package/types/models/maskformer/image_processing_maskformer.d.ts +22 -0
  151. package/types/models/maskformer/image_processing_maskformer.d.ts.map +1 -0
  152. package/types/models/mgp_str/processing_mgp_str.d.ts +64 -0
  153. package/types/models/mgp_str/processing_mgp_str.d.ts.map +1 -0
  154. package/types/models/mobilenet_v1/image_processing_mobilenet_v1.d.ts +6 -0
  155. package/types/models/mobilenet_v1/image_processing_mobilenet_v1.d.ts.map +1 -0
  156. package/types/models/mobilenet_v2/image_processing_mobilenet_v2.d.ts +6 -0
  157. package/types/models/mobilenet_v2/image_processing_mobilenet_v2.d.ts.map +1 -0
  158. package/types/models/mobilenet_v3/image_processing_mobilenet_v3.d.ts +6 -0
  159. package/types/models/mobilenet_v3/image_processing_mobilenet_v3.d.ts.map +1 -0
  160. package/types/models/mobilenet_v4/image_processing_mobilenet_v4.d.ts +6 -0
  161. package/types/models/mobilenet_v4/image_processing_mobilenet_v4.d.ts.map +1 -0
  162. package/types/models/mobilevit/image_processing_mobilevit.d.ts +6 -0
  163. package/types/models/mobilevit/image_processing_mobilevit.d.ts.map +1 -0
  164. package/types/models/nougat/image_processing_nougat.d.ts +4 -0
  165. package/types/models/nougat/image_processing_nougat.d.ts.map +1 -0
  166. package/types/models/owlv2/image_processing_owlv2.d.ts +4 -0
  167. package/types/models/owlv2/image_processing_owlv2.d.ts.map +1 -0
  168. package/types/models/owlvit/image_processing_owlvit.d.ts +10 -0
  169. package/types/models/owlvit/image_processing_owlvit.d.ts.map +1 -0
  170. package/types/models/owlvit/processing_owlvit.d.ts +8 -0
  171. package/types/models/owlvit/processing_owlvit.d.ts.map +1 -0
  172. package/types/models/processors.d.ts +12 -0
  173. package/types/models/processors.d.ts.map +1 -0
  174. package/types/models/pvt/image_processing_pvt.d.ts +4 -0
  175. package/types/models/pvt/image_processing_pvt.d.ts.map +1 -0
  176. package/types/models/pyannote/feature_extraction_pyannote.d.ts +13 -0
  177. package/types/models/pyannote/feature_extraction_pyannote.d.ts.map +1 -0
  178. package/types/models/pyannote/processing_pyannote.d.ts +30 -0
  179. package/types/models/pyannote/processing_pyannote.d.ts.map +1 -0
  180. package/types/models/qwen2_vl/image_processing_qwen2_vl.d.ts +11 -0
  181. package/types/models/qwen2_vl/image_processing_qwen2_vl.d.ts.map +1 -0
  182. package/types/models/qwen2_vl/processing_qwen2_vl.d.ts +17 -0
  183. package/types/models/qwen2_vl/processing_qwen2_vl.d.ts.map +1 -0
  184. package/types/models/rt_detr/image_processing_rt_detr.d.ts +8 -0
  185. package/types/models/rt_detr/image_processing_rt_detr.d.ts.map +1 -0
  186. package/types/models/sam/image_processing_sam.d.ts +103 -0
  187. package/types/models/sam/image_processing_sam.d.ts.map +1 -0
  188. package/types/models/sam/processing_sam.d.ts +9 -0
  189. package/types/models/sam/processing_sam.d.ts.map +1 -0
  190. package/types/models/seamless_m4t/feature_extraction_seamless_m4t.d.ts +34 -0
  191. package/types/models/seamless_m4t/feature_extraction_seamless_m4t.d.ts.map +1 -0
  192. package/types/models/segformer/image_processing_segformer.d.ts +10 -0
  193. package/types/models/segformer/image_processing_segformer.d.ts.map +1 -0
  194. package/types/models/siglip/image_processing_siglip.d.ts +4 -0
  195. package/types/models/siglip/image_processing_siglip.d.ts.map +1 -0
  196. package/types/models/speecht5/feature_extraction_speecht5.d.ts +4 -0
  197. package/types/models/speecht5/feature_extraction_speecht5.d.ts.map +1 -0
  198. package/types/models/speecht5/processing_speecht5.d.ts +14 -0
  199. package/types/models/speecht5/processing_speecht5.d.ts.map +1 -0
  200. package/types/models/swin2sr/image_processing_swin2sr.d.ts +5 -0
  201. package/types/models/swin2sr/image_processing_swin2sr.d.ts.map +1 -0
  202. package/types/models/vit/image_processing_vit.d.ts +6 -0
  203. package/types/models/vit/image_processing_vit.d.ts.map +1 -0
  204. package/types/models/vitmatte/image_processing_vitmatte.d.ts +12 -0
  205. package/types/models/vitmatte/image_processing_vitmatte.d.ts.map +1 -0
  206. package/types/models/vitpose/image_processing_vitpose.d.ts +26 -0
  207. package/types/models/vitpose/image_processing_vitpose.d.ts.map +1 -0
  208. package/types/models/wav2vec2/feature_extraction_wav2vec2.d.ts +19 -0
  209. package/types/models/wav2vec2/feature_extraction_wav2vec2.d.ts.map +1 -0
  210. package/types/models/wav2vec2/processing_wav2vec2.d.ts +12 -0
  211. package/types/models/wav2vec2/processing_wav2vec2.d.ts.map +1 -0
  212. package/types/models/wespeaker/feature_extraction_wespeaker.d.ts +23 -0
  213. package/types/models/wespeaker/feature_extraction_wespeaker.d.ts.map +1 -0
  214. package/types/models/whisper/feature_extraction_whisper.d.ts +21 -0
  215. package/types/models/whisper/feature_extraction_whisper.d.ts.map +1 -0
  216. package/types/models/whisper/processing_whisper.d.ts +17 -0
  217. package/types/models/whisper/processing_whisper.d.ts.map +1 -0
  218. package/types/models/yolos/image_processing_yolos.d.ts +10 -0
  219. package/types/models/yolos/image_processing_yolos.d.ts.map +1 -0
  220. package/types/models.d.ts +152 -0
  221. package/types/models.d.ts.map +1 -1
  222. package/types/pipelines.d.ts +2 -3
  223. package/types/pipelines.d.ts.map +1 -1
  224. package/types/tokenizers.d.ts +3 -0
  225. package/types/tokenizers.d.ts.map +1 -1
  226. package/types/transformers.d.ts +10 -1
  227. package/types/utils/constants.d.ts +6 -0
  228. package/types/utils/constants.d.ts.map +1 -1
  229. package/types/utils/core.d.ts +58 -3
  230. package/types/utils/core.d.ts.map +1 -1
  231. package/types/utils/hub.d.ts +1 -1
  232. package/types/utils/hub.d.ts.map +1 -1
  233. package/types/utils/image.d.ts +10 -2
  234. package/types/utils/image.d.ts.map +1 -1
  235. package/types/utils/tensor.d.ts +34 -1
  236. package/types/utils/tensor.d.ts.map +1 -1
  237. package/src/processors.js +0 -2655
  238. package/types/processors.d.ts +0 -924
  239. package/types/processors.d.ts.map +0 -1
@@ -0,0 +1,30 @@
1
+ export class PyAnnoteProcessor extends Processor {
2
+ static feature_extractor_class: typeof AutoFeatureExtractor;
3
+ /**
4
+ * Calls the feature_extractor function with the given audio input.
5
+ * @param {any} audio The audio input to extract features from.
6
+ * @returns {Promise<any>} A Promise that resolves with the extracted features.
7
+ */
8
+ _call(audio: any): Promise<any>;
9
+ /**
10
+ * NOTE: Can return fractional values. `Math.ceil` will ensure correct value.
11
+ * @param {number} samples The number of frames in the audio.
12
+ * @returns {number} The number of frames in the audio.
13
+ */
14
+ samples_to_frames(samples: number): number;
15
+ /**
16
+ * Post-processes the speaker diarization logits output by the model.
17
+ * @param {import('../../utils/tensor.js').Tensor} logits The speaker diarization logits output by the model.
18
+ * @param {number} num_samples Number of samples in the input audio.
19
+ * @returns {Array<Array<{ id: number, start: number, end: number, confidence: number }>>} The post-processed speaker diarization results.
20
+ */
21
+ post_process_speaker_diarization(logits: import('../../utils/tensor.js').Tensor, num_samples: number): Array<Array<{
22
+ id: number;
23
+ start: number;
24
+ end: number;
25
+ confidence: number;
26
+ }>>;
27
+ }
28
+ import { Processor } from '../../base/processing_utils.js';
29
+ import { AutoFeatureExtractor } from '../auto/feature_extraction_auto.js';
30
+ //# sourceMappingURL=processing_pyannote.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"processing_pyannote.d.ts","sourceRoot":"","sources":["../../../src/models/pyannote/processing_pyannote.js"],"names":[],"mappings":"AAIA;IACI,4DAAqD;IAErD;;;;OAIG;IACH,aAHW,GAAG,GACD,QAAQ,GAAG,CAAC,CAIxB;IAED;;;;OAIG;IACH,2BAHW,MAAM,GACJ,MAAM,CAIlB;IAED;;;;;OAKG;IACH,yCAJW,OAAO,uBAAuB,EAAE,MAAM,eACtC,MAAM,GACJ,MAAM,MAAM;QAAE,EAAE,EAAE,MAAM,CAAC;QAAC,KAAK,EAAE,MAAM,CAAC;QAAC,GAAG,EAAE,MAAM,CAAC;QAAC,UAAU,EAAE,MAAM,CAAA;KAAE,CAAC,CAAC,CAwCxF;CACJ;0BAtEyB,gCAAgC;qCACrB,oCAAoC"}
@@ -0,0 +1,11 @@
1
+ export class Qwen2VLImageProcessor extends ImageProcessor {
2
+ _call(images: any, ...args: any[]): Promise<{
3
+ pixel_values: Tensor;
4
+ image_grid_thw: Tensor;
5
+ original_sizes: import("../../base/image_processors_utils.js").HeightWidth[];
6
+ reshaped_input_sizes: import("../../base/image_processors_utils.js").HeightWidth[];
7
+ }>;
8
+ }
9
+ import { ImageProcessor } from "../../base/image_processors_utils.js";
10
+ import { Tensor } from "../../utils/tensor.js";
11
+ //# sourceMappingURL=image_processing_qwen2_vl.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"image_processing_qwen2_vl.d.ts","sourceRoot":"","sources":["../../../src/models/qwen2_vl/image_processing_qwen2_vl.js"],"names":[],"mappings":"AAKA;IACI;;;;;OA2CC;CACJ;+BAhDM,sCAAsC;uBACjB,uBAAuB"}
@@ -0,0 +1,17 @@
1
+ export class Qwen2VLProcessor extends Processor {
2
+ static image_processor_class: typeof AutoImageProcessor;
3
+ static tokenizer_class: typeof AutoTokenizer;
4
+ /**
5
+ *
6
+ * @param {string|string[]} text
7
+ * @param {RawImage|RawImage[]} images
8
+ * @param {...any} args
9
+ * @returns {Promise<any>}
10
+ */
11
+ _call(text: string | string[], images?: RawImage | RawImage[], ...args: any[]): Promise<any>;
12
+ }
13
+ import { Processor } from "../../base/processing_utils.js";
14
+ import { RawImage } from "../../utils/image.js";
15
+ import { AutoImageProcessor } from "../auto/image_processing_auto.js";
16
+ import { AutoTokenizer } from "../../tokenizers.js";
17
+ //# sourceMappingURL=processing_qwen2_vl.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"processing_qwen2_vl.d.ts","sourceRoot":"","sources":["../../../src/models/qwen2_vl/processing_qwen2_vl.js"],"names":[],"mappings":"AAKA;IACI,wDAAiD;IACjD,6CAAsC;IAEtC;;;;;;OAMG;IACH,YALW,MAAM,GAAC,MAAM,EAAE,WACf,QAAQ,GAAC,QAAQ,EAAE,WACf,GAAG,KACL,QAAQ,GAAG,CAAC,CAoCxB;CACJ;0BAnDyB,gCAAgC;yBAGjC,sBAAsB;mCAFZ,kCAAkC;8BACvC,qBAAqB"}
@@ -0,0 +1,8 @@
1
+ export class RTDetrImageProcessor extends ImageProcessor {
2
+ post_process_object_detection(outputs: {
3
+ logits: import("../../transformers.js").Tensor;
4
+ pred_boxes: import("../../transformers.js").Tensor;
5
+ }, threshold?: number, target_sizes?: [number, number][], is_zero_shot?: boolean): any[];
6
+ }
7
+ import { ImageProcessor } from "../../base/image_processors_utils.js";
8
+ //# sourceMappingURL=image_processing_rt_detr.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"image_processing_rt_detr.d.ts","sourceRoot":"","sources":["../../../src/models/rt_detr/image_processing_rt_detr.js"],"names":[],"mappings":"AAMA;IAMgyE;;;6FAAu6F;CADtsK;+BARM,sCAAsC"}
@@ -0,0 +1,103 @@
1
+ /**
2
+ * @typedef {object} SamImageProcessorResult
3
+ * @property {Tensor} pixel_values
4
+ * @property {import("../../base/image_processors_utils.js").HeightWidth[]} original_sizes
5
+ * @property {import("../../base/image_processors_utils.js").HeightWidth[]} reshaped_input_sizes
6
+ * @property {Tensor} [input_points]
7
+ * @property {Tensor} [input_labels]
8
+ * @property {Tensor} [input_boxes]
9
+ */
10
+ export class SamImageProcessor extends ImageProcessor {
11
+ /**
12
+ *
13
+ * @param {any} input_points
14
+ * @param {import("../../base/image_processors_utils.js").HeightWidth[]} original_sizes
15
+ * @param {import("../../base/image_processors_utils.js").HeightWidth[]} reshaped_input_sizes
16
+ * @returns {Tensor}
17
+ */
18
+ reshape_input_points(input_points: any, original_sizes: import("../../base/image_processors_utils.js").HeightWidth[], reshaped_input_sizes: import("../../base/image_processors_utils.js").HeightWidth[], is_bounding_box?: boolean): Tensor;
19
+ /**
20
+ *
21
+ * @param {any} input_labels
22
+ * @param {Tensor} input_points
23
+ * @returns {Tensor}
24
+ */
25
+ add_input_labels(input_labels: any, input_points: Tensor): Tensor;
26
+ /**
27
+ * @param {any[]} images The URL(s) of the image(s) to extract features from.
28
+ * @param {Object} [options] Additional options for the processor.
29
+ * @param {any} [options.input_points=null] A 3D or 4D array, representing the input points provided by the user.
30
+ * - 3D: `[point_batch_size, nb_points_per_image, 2]`. In this case, `batch_size` is assumed to be 1.
31
+ * - 4D: `[batch_size, point_batch_size, nb_points_per_image, 2]`.
32
+ * @param {any} [options.input_labels=null] A 2D or 3D array, representing the input labels for the points, used by the prompt encoder to encode the prompt.
33
+ * - 2D: `[point_batch_size, nb_points_per_image]`. In this case, `batch_size` is assumed to be 1.
34
+ * - 3D: `[batch_size, point_batch_size, nb_points_per_image]`.
35
+ * @param {number[][][]} [options.input_boxes=null] A 3D array of shape `(batch_size, num_boxes, 4)`, representing the input boxes provided by the user.
36
+ * This is used by the prompt encoder to encode the prompt. Generally yields to much better generated masks.
37
+ * The processor will generate a tensor, with each dimension corresponding respectively to the image batch size,
38
+ * the number of boxes per image and the coordinates of the top left and botton right point of the box.
39
+ * In the order (`x1`, `y1`, `x2`, `y2`):
40
+ * - `x1`: the x coordinate of the top left point of the input box
41
+ * - `y1`: the y coordinate of the top left point of the input box
42
+ * - `x2`: the x coordinate of the bottom right point of the input box
43
+ * - `y2`: the y coordinate of the bottom right point of the input box
44
+ * @returns {Promise<SamImageProcessorResult>}
45
+ */
46
+ _call(images: any[], { input_points, input_labels, input_boxes }?: {
47
+ input_points?: any;
48
+ input_labels?: any;
49
+ input_boxes?: number[][][];
50
+ }): Promise<SamImageProcessorResult>;
51
+ /**
52
+ * Remove padding and upscale masks to the original image size.
53
+ * @param {Tensor} masks Batched masks from the mask_decoder in (batch_size, num_channels, height, width) format.
54
+ * @param {[number, number][]} original_sizes The original sizes of each image before it was resized to the model's expected input shape, in (height, width) format.
55
+ * @param {[number, number][]} reshaped_input_sizes The size of each image as it is fed to the model, in (height, width) format. Used to remove padding.
56
+ * @param {Object} options Optional parameters for post-processing.
57
+ * @param {number} [options.mask_threshold] The threshold to use for binarizing the masks.
58
+ * @param {boolean} [options.binarize] Whether to binarize the masks.
59
+ * @param {Object} [options.pad_size] The target size the images were padded to before being passed to the model. If `null`, the target size is assumed to be the processor's `pad_size`.
60
+ * @param {number} [options.pad_size.height] The height the images were padded to.
61
+ * @param {number} [options.pad_size.width] The width the images were padded to.
62
+ * @returns {Promise<Tensor[]>} Batched masks in batch_size, num_channels, height, width) format, where (height, width) is given by original_size.
63
+ */
64
+ post_process_masks(masks: Tensor, original_sizes: [number, number][], reshaped_input_sizes: [number, number][], { mask_threshold, binarize, pad_size, }?: {
65
+ mask_threshold?: number;
66
+ binarize?: boolean;
67
+ pad_size?: {
68
+ height?: number;
69
+ width?: number;
70
+ };
71
+ }): Promise<Tensor[]>;
72
+ /**
73
+ * Generates a list of crop boxes of different sizes. Each layer has (2**i)**2 boxes for the ith layer.
74
+ * @param {import("../../utils/image.js").RawImage} image Input original image
75
+ * @param {number} target_size Target size of the resized image
76
+ * @param {Object} options Options for generating crop boxes
77
+ * @param {number} [options.crop_n_layers] If >0, mask prediction will be run again on crops of the image.
78
+ * Sets the number of layers to run, where each layer has 2**i_layer number of image crops.
79
+ * @param {number} [options.overlap_ratio] Sets the degree to which crops overlap. In the first crop layer,
80
+ * crops will overlap by this fraction of the image length. Later layers with more crops scale down this overlap.
81
+ * @param {number} [options.points_per_crop] Number of points to sample from each crop.
82
+ * @param {number} [options.crop_n_points_downscale_factor] The number of points-per-side sampled in layer n is
83
+ * scaled down by crop_n_points_downscale_factor**n.
84
+ * @returns {Object} An object containing the crop boxes, number of points per crop, cropped images, and input labels.
85
+ */
86
+ generate_crop_boxes(image: import("../../utils/image.js").RawImage, target_size: number, { crop_n_layers, overlap_ratio, points_per_crop, crop_n_points_downscale_factor, }?: {
87
+ crop_n_layers?: number;
88
+ overlap_ratio?: number;
89
+ points_per_crop?: number;
90
+ crop_n_points_downscale_factor?: number;
91
+ }): any;
92
+ }
93
+ export type SamImageProcessorResult = {
94
+ pixel_values: Tensor;
95
+ original_sizes: import("../../base/image_processors_utils.js").HeightWidth[];
96
+ reshaped_input_sizes: import("../../base/image_processors_utils.js").HeightWidth[];
97
+ input_points?: Tensor;
98
+ input_labels?: Tensor;
99
+ input_boxes?: Tensor;
100
+ };
101
+ import { ImageProcessor } from "../../base/image_processors_utils.js";
102
+ import { Tensor } from "../../utils/tensor.js";
103
+ //# sourceMappingURL=image_processing_sam.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"image_processing_sam.d.ts","sourceRoot":"","sources":["../../../src/models/sam/image_processing_sam.js"],"names":[],"mappings":"AAWA;;;;;;;;GAQG;AAEH;IAEI;;;;;;OAMG;IACH,mCALW,GAAG,kBACH,OAAO,sCAAsC,EAAE,WAAW,EAAE,wBAC5D,OAAO,sCAAsC,EAAE,WAAW,EAAE,8BAC1D,MAAM,CA4ClB;IAED;;;;;OAKG;IACH,+BAJW,GAAG,gBACH,MAAM,GACJ,MAAM,CAoBlB;IACD;;;;;;;;;;;;;;;;;;;OAmBG;IACH,cAnBW,GAAG,EAAE;QAES,YAAY,GAA1B,GAAG;QAGW,YAAY,GAA1B,GAAG;QAGoB,WAAW,GAAlC,MAAM,EAAE,EAAE,EAAE;QASV,QAAQ,uBAAuB,CAAC,CA+B5C;IAED;;;;;;;;;;;;OAYG;IACH,0BAXW,MAAM,kBACN,CAAC,MAAM,EAAE,MAAM,CAAC,EAAE,wBAClB,CAAC,MAAM,EAAE,MAAM,CAAC,EAAE;QAED,cAAc,GAA/B,MAAM;QACY,QAAQ,GAA1B,OAAO;QACU,QAAQ;YACC,MAAM,GAAhC,MAAM;YACoB,KAAK,GAA/B,MAAM;;QACJ,QAAQ,MAAM,EAAE,CAAC,CAsD7B;IAED;;;;;;;;;;;;;OAaG;IACH,2BAZW,OAAO,sBAAsB,EAAE,QAAQ,eACvC,MAAM;QAEW,aAAa,GAA9B,MAAM;QAEW,aAAa,GAA9B,MAAM;QAEW,eAAe,GAAhC,MAAM;QACW,8BAA8B,GAA/C,MAAM;YAYhB;CACJ;;kBAnOa,MAAM;oBACN,OAAO,sCAAsC,EAAE,WAAW,EAAE;0BAC5D,OAAO,sCAAsC,EAAE,WAAW,EAAE;mBAC5D,MAAM;mBACN,MAAM;kBACN,MAAM;;+BAhBb,sCAAsC;uBAMtC,uBAAuB"}
@@ -0,0 +1,9 @@
1
+ export class SamProcessor extends Processor {
2
+ static image_processor_class: typeof AutoImageProcessor;
3
+ _call(...args: any[]): Promise<any>;
4
+ post_process_masks(...args: any[]): any;
5
+ reshape_input_points(...args: any[]): any;
6
+ }
7
+ import { Processor } from "../../base/processing_utils.js";
8
+ import { AutoImageProcessor } from "../auto/image_processing_auto.js";
9
+ //# sourceMappingURL=processing_sam.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"processing_sam.d.ts","sourceRoot":"","sources":["../../../src/models/sam/processing_sam.js"],"names":[],"mappings":"AAGA;IACI,wDAAiD;IAEjD,oCAEC;IAED,wCAGC;IAED,0CAGC;CACJ;0BAnByB,gCAAgC;mCACvB,kCAAkC"}
@@ -0,0 +1,34 @@
1
+ export class SeamlessM4TFeatureExtractor extends FeatureExtractor {
2
+ constructor(config: any);
3
+ mel_filters: number[][];
4
+ window: Float64Array;
5
+ /**
6
+ * Computes the log-Mel spectrogram of the provided audio waveform.
7
+ * @param {Float32Array|Float64Array} waveform The audio waveform to process.
8
+ * @param {number} max_length The maximum number of frames to return.
9
+ * @returns {Promise<Tensor>} An object containing the log-Mel spectrogram data as a Float32Array and its dimensions as an array of numbers.
10
+ */
11
+ _extract_fbank_features(waveform: Float32Array | Float64Array, max_length: number): Promise<Tensor>;
12
+ /**
13
+ * Asynchronously extracts features from a given audio using the provided configuration.
14
+ * @param {Float32Array|Float64Array} audio The audio data as a Float32Array/Float64Array.
15
+ * @param {Object} options Optional parameters for feature extraction.
16
+ * @param {boolean} [options.padding=true] Whether to pad the sequence to a multiple of `pad_to_multiple_of`.
17
+ * @param {number} [options.pad_to_multiple_of=2] The number to pad the sequence to a multiple of.
18
+ * @param {boolean} [options.do_normalize_per_mel_bins=true] Whether or not to zero-mean unit-variance normalize the input per mel-channel.
19
+ * @param {boolean} [options.return_attention_mask=true] Whether to return the attention mask.
20
+ * @returns {Promise<{ input_features: Tensor, attention_mask?: Tensor }>} A Promise resolving to an object containing the extracted input features and attention masks as Tensors.
21
+ */
22
+ _call(audio: Float32Array | Float64Array, { padding, pad_to_multiple_of, do_normalize_per_mel_bins, return_attention_mask, }?: {
23
+ padding?: boolean;
24
+ pad_to_multiple_of?: number;
25
+ do_normalize_per_mel_bins?: boolean;
26
+ return_attention_mask?: boolean;
27
+ }): Promise<{
28
+ input_features: Tensor;
29
+ attention_mask?: Tensor;
30
+ }>;
31
+ }
32
+ import { FeatureExtractor } from '../../base/feature_extraction_utils.js';
33
+ import { Tensor } from '../../utils/tensor.js';
34
+ //# sourceMappingURL=feature_extraction_seamless_m4t.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"feature_extraction_seamless_m4t.d.ts","sourceRoot":"","sources":["../../../src/models/seamless_m4t/feature_extraction_seamless_m4t.js"],"names":[],"mappings":"AAIA;IAEI,yBAwBC;IALG,wBAA8B;IAE9B,qBAEE;IAGN;;;;;OAKG;IACH,kCAJW,YAAY,GAAC,YAAY,cACzB,MAAM,GACJ,QAAQ,MAAM,CAAC,CA6B3B;IAED;;;;;;;;;OASG;IACH,aARW,YAAY,GAAC,YAAY;QAEP,OAAO,GAAzB,OAAO;QACU,kBAAkB,GAAnC,MAAM;QACY,yBAAyB,GAA3C,OAAO;QACW,qBAAqB,GAAvC,OAAO;;wBACqB,MAAM;yBAAmB,MAAM;OAuGrE;CACJ;iCAnLuD,wCAAwC;uBACzE,uBAAuB"}
@@ -0,0 +1,10 @@
1
+ export class SegformerImageProcessor extends ImageProcessor {
2
+ post_process_semantic_segmentation(outputs: any, target_sizes?: [number, number][]): {
3
+ segmentation: import("../../transformers.js").Tensor;
4
+ labels: number[];
5
+ }[];
6
+ }
7
+ export class SegformerFeatureExtractor extends SegformerImageProcessor {
8
+ }
9
+ import { ImageProcessor } from "../../base/image_processors_utils.js";
10
+ //# sourceMappingURL=image_processing_segformer.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"image_processing_segformer.d.ts","sourceRoot":"","sources":["../../../src/models/segformer/image_processing_segformer.js"],"names":[],"mappings":"AAMA;IAOqmK;;;QAA21E;CAF/7O;AACD;CAA0E;+BATnE,sCAAsC"}
@@ -0,0 +1,4 @@
1
+ export class SiglipImageProcessor extends ImageProcessor {
2
+ }
3
+ import { ImageProcessor } from "../../base/image_processors_utils.js";
4
+ //# sourceMappingURL=image_processing_siglip.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"image_processing_siglip.d.ts","sourceRoot":"","sources":["../../../src/models/siglip/image_processing_siglip.js"],"names":[],"mappings":"AAIA;CAA4D;+BAFrD,sCAAsC"}
@@ -0,0 +1,4 @@
1
+ export class SpeechT5FeatureExtractor extends FeatureExtractor {
2
+ }
3
+ import { FeatureExtractor } from "../../base/feature_extraction_utils.js";
4
+ //# sourceMappingURL=feature_extraction_speecht5.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"feature_extraction_speecht5.d.ts","sourceRoot":"","sources":["../../../src/models/speecht5/feature_extraction_speecht5.js"],"names":[],"mappings":"AAGA;CAAkE;iCAFjC,wCAAwC"}
@@ -0,0 +1,14 @@
1
+ export class SpeechT5Processor extends Processor {
2
+ static tokenizer_class: typeof AutoTokenizer;
3
+ static feature_extractor_class: typeof AutoFeatureExtractor;
4
+ /**
5
+ * Calls the feature_extractor function with the given input.
6
+ * @param {any} input The input to extract features from.
7
+ * @returns {Promise<any>} A Promise that resolves with the extracted features.
8
+ */
9
+ _call(input: any): Promise<any>;
10
+ }
11
+ import { Processor } from "../../base/processing_utils.js";
12
+ import { AutoTokenizer } from "../../tokenizers.js";
13
+ import { AutoFeatureExtractor } from "../auto/feature_extraction_auto.js";
14
+ //# sourceMappingURL=processing_speecht5.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"processing_speecht5.d.ts","sourceRoot":"","sources":["../../../src/models/speecht5/processing_speecht5.js"],"names":[],"mappings":"AAIA;IACI,6CAAsC;IACtC,4DAAqD;IAErD;;;;OAIG;IACH,aAHW,GAAG,GACD,QAAQ,GAAG,CAAC,CAIxB;CACJ;0BAhByB,gCAAgC;8BAC5B,qBAAqB;qCACd,oCAAoC"}
@@ -0,0 +1,5 @@
1
+ export class Swin2SRImageProcessor extends ImageProcessor {
2
+ pad_image(pixelData: any, imgDims: any, padSize: any, options?: {}): [Float32Array, number[]];
3
+ }
4
+ import { ImageProcessor } from "../../base/image_processors_utils.js";
5
+ //# sourceMappingURL=image_processing_swin2sr.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"image_processing_swin2sr.d.ts","sourceRoot":"","sources":["../../../src/models/swin2sr/image_processing_swin2sr.js"],"names":[],"mappings":"AAIA;IACI,8FAiBC;CACJ;+BArBM,sCAAsC"}
@@ -0,0 +1,6 @@
1
+ export class ViTImageProcessor extends ImageProcessor {
2
+ }
3
+ export class ViTFeatureExtractor extends ViTImageProcessor {
4
+ }
5
+ import { ImageProcessor } from "../../base/image_processors_utils.js";
6
+ //# sourceMappingURL=image_processing_vit.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"image_processing_vit.d.ts","sourceRoot":"","sources":["../../../src/models/vit/image_processing_vit.js"],"names":[],"mappings":"AAIA;CAAyD;AACzD;CAA8D;+BAHvD,sCAAsC"}
@@ -0,0 +1,12 @@
1
+ export class VitMatteImageProcessor extends ImageProcessor {
2
+ /**
3
+ * Calls the feature extraction process on an array of images, preprocesses
4
+ * each image, and concatenates the resulting features into a single Tensor.
5
+ * @param {import("../../utils/image.js").RawImage[]} images The image(s) to extract features from.
6
+ * @param {import("../../utils/image.js").RawImage[]} trimaps The trimaps(s) to extract features from.
7
+ * @returns {Promise<import("../../base/image_processors_utils.js").ImageProcessorResult>} An object containing the concatenated pixel values of the preprocessed images.
8
+ */
9
+ _call(images: import("../../utils/image.js").RawImage[], trimaps: import("../../utils/image.js").RawImage[]): Promise<import("../../base/image_processors_utils.js").ImageProcessorResult>;
10
+ }
11
+ import { ImageProcessor } from "../../base/image_processors_utils.js";
12
+ //# sourceMappingURL=image_processing_vitmatte.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"image_processing_vitmatte.d.ts","sourceRoot":"","sources":["../../../src/models/vitmatte/image_processing_vitmatte.js"],"names":[],"mappings":"AASA;IACI;;;;;;OAMG;IACH,cAJW,OAAO,sBAAsB,EAAE,QAAQ,EAAE,WACzC,OAAO,sBAAsB,EAAE,QAAQ,EAAE,GACvC,QAAQ,OAAO,sCAAsC,EAAE,oBAAoB,CAAC,CAiCxF;CACJ;+BA/CM,sCAAsC"}
@@ -0,0 +1,26 @@
1
+ export class VitPoseImageProcessor extends ImageProcessor {
2
+ /**
3
+ * Transform the heatmaps into keypoint predictions and transform them back to the image.
4
+ * NOTE: This is a naive implementation and does not include advanced post-processing techniques,
5
+ * so the results may not be as accurate as the original implementation.
6
+ * @param {import('../../utils/tensor.js').Tensor} outputs The model outputs.
7
+ * @param {[number, number, number, number][][]} boxes List or array of bounding boxes for each image.
8
+ * Each box should be a list of 4 floats representing the bounding box coordinates in COCO format (top_left_x, top_left_y, width, height).
9
+ * @returns {{
10
+ * bbox: [number, number, number, number],
11
+ * scores: number[],
12
+ * labels: number[],
13
+ * keypoints: [number, number][]
14
+ * }[][]} List of keypoints predictions for each image.
15
+ */
16
+ post_process_pose_estimation(outputs: import('../../utils/tensor.js').Tensor, boxes: [number, number, number, number][][], { threshold, }?: {
17
+ threshold?: any;
18
+ }): {
19
+ bbox: [number, number, number, number];
20
+ scores: number[];
21
+ labels: number[];
22
+ keypoints: [number, number][];
23
+ }[][];
24
+ }
25
+ import { ImageProcessor } from "../../base/image_processors_utils.js";
26
+ //# sourceMappingURL=image_processing_vitpose.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"image_processing_vitpose.d.ts","sourceRoot":"","sources":["../../../src/models/vitpose/image_processing_vitpose.js"],"names":[],"mappings":"AAIA;IAEI;;;;;;;;;;;;;OAaG;IACH,sCAVW,OAAO,uBAAuB,EAAE,MAAM,SACtC,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,CAAC,EAAE,EAAE;;QAElC;QACZ,IAAQ,EAAE,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,CAAC,CAAC;QAC3C,MAAU,EAAE,MAAM,EAAE,CAAC;QACrB,MAAU,EAAE,MAAM,EAAE,CAAC;QACrB,SAAa,EAAE,CAAC,MAAM,EAAE,MAAM,CAAC,EAAE,CAAA;KAC9B,EAAE,EAAE,CAqEP;CACJ;+BAtFM,sCAAsC"}
@@ -0,0 +1,19 @@
1
+ export class Wav2Vec2FeatureExtractor extends FeatureExtractor {
2
+ /**
3
+ * @param {Float32Array} input_values
4
+ * @returns {Float32Array}
5
+ */
6
+ _zero_mean_unit_var_norm(input_values: Float32Array): Float32Array;
7
+ /**
8
+ * Asynchronously extracts features from a given audio using the provided configuration.
9
+ * @param {Float32Array|Float64Array} audio The audio data as a Float32Array/Float64Array.
10
+ * @returns {Promise<{ input_values: Tensor; attention_mask: Tensor }>} A Promise resolving to an object containing the extracted input features and attention mask as Tensors.
11
+ */
12
+ _call(audio: Float32Array | Float64Array): Promise<{
13
+ input_values: Tensor;
14
+ attention_mask: Tensor;
15
+ }>;
16
+ }
17
+ import { FeatureExtractor } from "../../base/feature_extraction_utils.js";
18
+ import { Tensor } from "../../utils/tensor.js";
19
+ //# sourceMappingURL=feature_extraction_wav2vec2.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"feature_extraction_wav2vec2.d.ts","sourceRoot":"","sources":["../../../src/models/wav2vec2/feature_extraction_wav2vec2.js"],"names":[],"mappings":"AAGA;IAEI;;;OAGG;IACH,uCAHW,YAAY,GACV,YAAY,CAQxB;IAED;;;;OAIG;IACH,aAHW,YAAY,GAAC,YAAY;sBACC,MAAM;wBAAkB,MAAM;OAsBlE;CACJ;iCA3CuD,wCAAwC;uBACzE,uBAAuB"}
@@ -0,0 +1,12 @@
1
+ export class Wav2Vec2ProcessorWithLM extends Processor {
2
+ static feature_extractor_class: typeof AutoFeatureExtractor;
3
+ /**
4
+ * Calls the feature_extractor function with the given audio input.
5
+ * @param {any} audio The audio input to extract features from.
6
+ * @returns {Promise<any>} A Promise that resolves with the extracted features.
7
+ */
8
+ _call(audio: any): Promise<any>;
9
+ }
10
+ import { Processor } from "../../base/processing_utils.js";
11
+ import { AutoFeatureExtractor } from "../auto/feature_extraction_auto.js";
12
+ //# sourceMappingURL=processing_wav2vec2.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"processing_wav2vec2.d.ts","sourceRoot":"","sources":["../../../src/models/wav2vec2/processing_wav2vec2.js"],"names":[],"mappings":"AAGA;IACI,4DAAqD;IAErD;;;;OAIG;IACH,aAHW,GAAG,GACD,QAAQ,GAAG,CAAC,CAIxB;CACJ;0BAdyB,gCAAgC;qCACrB,oCAAoC"}
@@ -0,0 +1,23 @@
1
+ export class WeSpeakerFeatureExtractor extends FeatureExtractor {
2
+ constructor(config: any);
3
+ mel_filters: number[][];
4
+ window: Float64Array;
5
+ min_num_frames: any;
6
+ /**
7
+ * Computes the log-Mel spectrogram of the provided audio waveform.
8
+ * @param {Float32Array|Float64Array} waveform The audio waveform to process.
9
+ * @returns {Promise<Tensor>} An object containing the log-Mel spectrogram data as a Float32Array and its dimensions as an array of numbers.
10
+ */
11
+ _extract_fbank_features(waveform: Float32Array | Float64Array): Promise<Tensor>;
12
+ /**
13
+ * Asynchronously extracts features from a given audio using the provided configuration.
14
+ * @param {Float32Array|Float64Array} audio The audio data as a Float32Array/Float64Array.
15
+ * @returns {Promise<{ input_features: Tensor }>} A Promise resolving to an object containing the extracted input features as a Tensor.
16
+ */
17
+ _call(audio: Float32Array | Float64Array): Promise<{
18
+ input_features: Tensor;
19
+ }>;
20
+ }
21
+ import { FeatureExtractor } from '../../base/feature_extraction_utils.js';
22
+ import { Tensor } from '../../utils/tensor.js';
23
+ //# sourceMappingURL=feature_extraction_wespeaker.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"feature_extraction_wespeaker.d.ts","sourceRoot":"","sources":["../../../src/models/wespeaker/feature_extraction_wespeaker.js"],"names":[],"mappings":"AAKA;IAEI,yBAyBC;IANG,wBAA8B;IAE9B,qBAEE;IACF,oBAAgD;IAGpD;;;;OAIG;IACH,kCAHW,YAAY,GAAC,YAAY,GACvB,QAAQ,MAAM,CAAC,CA2B3B;IAGD;;;;OAIG;IACH,aAHW,YAAY,GAAC,YAAY,GACvB,QAAQ;QAAE,cAAc,EAAE,MAAM,CAAA;KAAE,CAAC,CA4B/C;CACJ;iCAnGuD,wCAAwC;uBACzE,uBAAuB"}
@@ -0,0 +1,21 @@
1
+ export class WhisperFeatureExtractor extends FeatureExtractor {
2
+ constructor(config: any);
3
+ window: Float64Array;
4
+ /**
5
+ * Computes the log-Mel spectrogram of the provided audio waveform.
6
+ * @param {Float32Array|Float64Array} waveform The audio waveform to process.
7
+ * @returns {Promise<Tensor>} An object containing the log-Mel spectrogram data as a Float32Array and its dimensions as an array of numbers.
8
+ */
9
+ _extract_fbank_features(waveform: Float32Array | Float64Array): Promise<Tensor>;
10
+ /**
11
+ * Asynchronously extracts features from a given audio using the provided configuration.
12
+ * @param {Float32Array|Float64Array} audio The audio data as a Float32Array/Float64Array.
13
+ * @returns {Promise<{ input_features: Tensor }>} A Promise resolving to an object containing the extracted input features as a Tensor.
14
+ */
15
+ _call(audio: Float32Array | Float64Array): Promise<{
16
+ input_features: Tensor;
17
+ }>;
18
+ }
19
+ import { FeatureExtractor } from '../../base/feature_extraction_utils.js';
20
+ import { Tensor } from '../../utils/tensor.js';
21
+ //# sourceMappingURL=feature_extraction_whisper.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"feature_extraction_whisper.d.ts","sourceRoot":"","sources":["../../../src/models/whisper/feature_extraction_whisper.js"],"names":[],"mappings":"AAKA;IAEI,yBAeC;IADG,qBAAwD;IAG5D;;;;OAIG;IACH,kCAHW,YAAY,GAAC,YAAY,GACvB,QAAQ,MAAM,CAAC,CA0B3B;IAED;;;;OAIG;IACH,aAHW,YAAY,GAAC,YAAY,GACvB,QAAQ;QAAE,cAAc,EAAE,MAAM,CAAA;KAAE,CAAC,CAwB/C;CACJ;iCAnFuD,wCAAwC;uBACzE,uBAAuB"}
@@ -0,0 +1,17 @@
1
+ /**
2
+ * Represents a WhisperProcessor that extracts features from an audio input.
3
+ */
4
+ export class WhisperProcessor extends Processor {
5
+ static tokenizer_class: typeof AutoTokenizer;
6
+ static feature_extractor_class: typeof AutoFeatureExtractor;
7
+ /**
8
+ * Calls the feature_extractor function with the given audio input.
9
+ * @param {any} audio The audio input to extract features from.
10
+ * @returns {Promise<any>} A Promise that resolves with the extracted features.
11
+ */
12
+ _call(audio: any): Promise<any>;
13
+ }
14
+ import { Processor } from "../../base/processing_utils.js";
15
+ import { AutoTokenizer } from "../../tokenizers.js";
16
+ import { AutoFeatureExtractor } from "../auto/feature_extraction_auto.js";
17
+ //# sourceMappingURL=processing_whisper.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"processing_whisper.d.ts","sourceRoot":"","sources":["../../../src/models/whisper/processing_whisper.js"],"names":[],"mappings":"AAIA;;GAEG;AACH;IACI,6CAAsC;IACtC,4DAAqD;IAErD;;;;OAIG;IACH,aAHW,GAAG,GACD,QAAQ,GAAG,CAAC,CAIxB;CACJ;0BAjByB,gCAAgC;8BAD5B,qBAAqB;qCADd,oCAAoC"}
@@ -0,0 +1,10 @@
1
+ export class YolosImageProcessor extends ImageProcessor {
2
+ post_process_object_detection(outputs: {
3
+ logits: import("../../transformers.js").Tensor;
4
+ pred_boxes: import("../../transformers.js").Tensor;
5
+ }, threshold?: number, target_sizes?: [number, number][], is_zero_shot?: boolean): any[];
6
+ }
7
+ export class YolosFeatureExtractor extends YolosImageProcessor {
8
+ }
9
+ import { ImageProcessor } from "../../base/image_processors_utils.js";
10
+ //# sourceMappingURL=image_processing_yolos.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"image_processing_yolos.d.ts","sourceRoot":"","sources":["../../../src/models/yolos/image_processing_yolos.js"],"names":[],"mappings":"AAKA;IAO+tE;;;6FAAu6F;CAFroK;AACD;CAAkE;+BAR3D,sCAAsC"}