@huggingface/transformers 3.0.2 → 3.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (258) hide show
  1. package/README.md +13 -4
  2. package/dist/ort-wasm-simd-threaded.jsep.wasm +0 -0
  3. package/dist/transformers.cjs +16655 -13040
  4. package/dist/transformers.cjs.map +1 -1
  5. package/dist/transformers.js +17095 -13468
  6. package/dist/transformers.js.map +1 -1
  7. package/dist/transformers.min.cjs +244 -52
  8. package/dist/transformers.min.cjs.map +1 -1
  9. package/dist/transformers.min.js +235 -43
  10. package/dist/transformers.min.js.map +1 -1
  11. package/dist/transformers.min.mjs +246 -54
  12. package/dist/transformers.min.mjs.map +1 -1
  13. package/dist/transformers.mjs +16818 -13202
  14. package/dist/transformers.mjs.map +1 -1
  15. package/package.json +4 -4
  16. package/src/base/feature_extraction_utils.js +54 -0
  17. package/src/base/image_processors_utils.js +1089 -0
  18. package/src/base/processing_utils.js +145 -0
  19. package/src/configs.js +15 -4
  20. package/src/env.js +6 -6
  21. package/src/generation/configuration_utils.js +7 -0
  22. package/src/generation/logits_process.js +22 -16
  23. package/src/generation/streamers.js +7 -2
  24. package/src/models/audio_spectrogram_transformer/feature_extraction_audio_spectrogram_transformer.js +90 -0
  25. package/src/models/auto/feature_extraction_auto.js +41 -0
  26. package/src/models/auto/image_processing_auto.js +29 -0
  27. package/src/models/auto/processing_auto.js +100 -0
  28. package/src/models/beit/image_processing_beit.js +5 -0
  29. package/src/models/bit/image_processing_bit.js +5 -0
  30. package/src/models/chinese_clip/image_processing_chinese_clip.js +5 -0
  31. package/src/models/clap/feature_extraction_clap.js +159 -0
  32. package/src/models/clip/image_processing_clip.js +6 -0
  33. package/src/models/convnext/image_processing_convnext.js +45 -0
  34. package/src/models/deit/image_processing_deit.js +6 -0
  35. package/src/models/detr/image_processing_detr.js +52 -0
  36. package/src/models/donut/image_processing_donut.js +31 -0
  37. package/src/models/dpt/image_processing_dpt.js +6 -0
  38. package/src/models/efficientnet/image_processing_efficientnet.js +13 -0
  39. package/src/models/feature_extractors.js +12 -0
  40. package/src/models/florence2/processing_florence2.js +128 -0
  41. package/src/models/glpn/image_processing_glpn.js +5 -0
  42. package/src/models/idefics3/image_processing_idefics3.js +219 -0
  43. package/src/models/idefics3/processing_idefics3.js +136 -0
  44. package/src/models/image_processors.js +37 -0
  45. package/src/models/janus/image_processing_janus.js +26 -0
  46. package/src/models/janus/processing_janus.js +123 -0
  47. package/src/models/jina_clip/image_processing_jina_clip.js +26 -0
  48. package/src/models/jina_clip/processing_jina_clip.js +24 -0
  49. package/src/models/llava_onevision/image_processing_llava_onevision.js +5 -0
  50. package/src/models/mask2former/image_processing_mask2former.js +5 -0
  51. package/src/models/maskformer/image_processing_maskformer.js +18 -0
  52. package/src/models/mgp_str/processing_mgp_str.js +170 -0
  53. package/src/models/mobilenet_v1/image_processing_mobilenet_v1.js +7 -0
  54. package/src/models/mobilenet_v2/image_processing_mobilenet_v2.js +7 -0
  55. package/src/models/mobilenet_v3/image_processing_mobilenet_v3.js +7 -0
  56. package/src/models/mobilenet_v4/image_processing_mobilenet_v4.js +7 -0
  57. package/src/models/mobilevit/image_processing_mobilevit.js +6 -0
  58. package/src/models/nougat/image_processing_nougat.js +5 -0
  59. package/src/models/owlv2/image_processing_owlv2.js +5 -0
  60. package/src/models/owlvit/image_processing_owlvit.js +12 -0
  61. package/src/models/owlvit/processing_owlvit.js +7 -0
  62. package/src/models/processors.js +12 -0
  63. package/src/models/pvt/image_processing_pvt.js +5 -0
  64. package/src/models/pyannote/feature_extraction_pyannote.js +28 -0
  65. package/src/models/pyannote/processing_pyannote.js +71 -0
  66. package/src/models/qwen2_vl/image_processing_qwen2_vl.js +52 -0
  67. package/src/models/qwen2_vl/processing_qwen2_vl.js +52 -0
  68. package/src/models/rt_detr/image_processing_rt_detr.js +12 -0
  69. package/src/models/sam/image_processing_sam.js +242 -0
  70. package/src/models/sam/processing_sam.js +20 -0
  71. package/src/models/sapiens/image_processing_sapiens.js +13 -0
  72. package/src/models/seamless_m4t/feature_extraction_seamless_m4t.js +180 -0
  73. package/src/models/segformer/image_processing_segformer.js +13 -0
  74. package/src/models/siglip/image_processing_siglip.js +5 -0
  75. package/src/models/speecht5/feature_extraction_speecht5.js +4 -0
  76. package/src/models/speecht5/processing_speecht5.js +17 -0
  77. package/src/models/swin2sr/image_processing_swin2sr.js +24 -0
  78. package/src/models/vit/image_processing_vit.js +7 -0
  79. package/src/models/vitmatte/image_processing_vitmatte.js +50 -0
  80. package/src/models/vitpose/image_processing_vitpose.js +89 -0
  81. package/src/models/wav2vec2/feature_extraction_wav2vec2.js +44 -0
  82. package/src/models/wav2vec2/processing_wav2vec2.js +15 -0
  83. package/src/models/wespeaker/feature_extraction_wespeaker.js +100 -0
  84. package/src/models/whisper/feature_extraction_whisper.js +84 -0
  85. package/src/models/whisper/processing_whisper.js +21 -0
  86. package/src/models/yolos/image_processing_yolos.js +12 -0
  87. package/src/models.js +755 -34
  88. package/src/pipelines.js +8 -8
  89. package/src/tokenizers.js +5 -0
  90. package/src/transformers.js +15 -2
  91. package/src/utils/constants.js +8 -1
  92. package/src/utils/core.js +51 -9
  93. package/src/utils/dtypes.js +2 -1
  94. package/src/utils/hub.js +2 -1
  95. package/src/utils/image.js +87 -33
  96. package/src/utils/tensor.js +39 -2
  97. package/types/base/feature_extraction_utils.d.ts +41 -0
  98. package/types/base/feature_extraction_utils.d.ts.map +1 -0
  99. package/types/base/image_processors_utils.d.ts +323 -0
  100. package/types/base/image_processors_utils.d.ts.map +1 -0
  101. package/types/base/processing_utils.d.ts +80 -0
  102. package/types/base/processing_utils.d.ts.map +1 -0
  103. package/types/configs.d.ts +5 -2
  104. package/types/configs.d.ts.map +1 -1
  105. package/types/env.d.ts +1 -1
  106. package/types/env.d.ts.map +1 -1
  107. package/types/generation/configuration_utils.d.ts +6 -0
  108. package/types/generation/configuration_utils.d.ts.map +1 -1
  109. package/types/generation/logits_process.d.ts +30 -20
  110. package/types/generation/logits_process.d.ts.map +1 -1
  111. package/types/generation/streamers.d.ts +13 -8
  112. package/types/generation/streamers.d.ts.map +1 -1
  113. package/types/models/audio_spectrogram_transformer/feature_extraction_audio_spectrogram_transformer.d.ts +25 -0
  114. package/types/models/audio_spectrogram_transformer/feature_extraction_audio_spectrogram_transformer.d.ts.map +1 -0
  115. package/types/models/auto/feature_extraction_auto.d.ts +5 -0
  116. package/types/models/auto/feature_extraction_auto.d.ts.map +1 -0
  117. package/types/models/auto/image_processing_auto.d.ts +5 -0
  118. package/types/models/auto/image_processing_auto.d.ts.map +1 -0
  119. package/types/models/auto/processing_auto.d.ts +35 -0
  120. package/types/models/auto/processing_auto.d.ts.map +1 -0
  121. package/types/models/beit/image_processing_beit.d.ts +4 -0
  122. package/types/models/beit/image_processing_beit.d.ts.map +1 -0
  123. package/types/models/bit/image_processing_bit.d.ts +4 -0
  124. package/types/models/bit/image_processing_bit.d.ts.map +1 -0
  125. package/types/models/chinese_clip/image_processing_chinese_clip.d.ts +4 -0
  126. package/types/models/chinese_clip/image_processing_chinese_clip.d.ts.map +1 -0
  127. package/types/models/clap/feature_extraction_clap.d.ts +57 -0
  128. package/types/models/clap/feature_extraction_clap.d.ts.map +1 -0
  129. package/types/models/clip/image_processing_clip.d.ts +6 -0
  130. package/types/models/clip/image_processing_clip.d.ts.map +1 -0
  131. package/types/models/convnext/image_processing_convnext.d.ts +12 -0
  132. package/types/models/convnext/image_processing_convnext.d.ts.map +1 -0
  133. package/types/models/deit/image_processing_deit.d.ts +6 -0
  134. package/types/models/deit/image_processing_deit.d.ts.map +1 -0
  135. package/types/models/detr/image_processing_detr.d.ts +42 -0
  136. package/types/models/detr/image_processing_detr.d.ts.map +1 -0
  137. package/types/models/donut/image_processing_donut.d.ts +7 -0
  138. package/types/models/donut/image_processing_donut.d.ts.map +1 -0
  139. package/types/models/dpt/image_processing_dpt.d.ts +6 -0
  140. package/types/models/dpt/image_processing_dpt.d.ts.map +1 -0
  141. package/types/models/efficientnet/image_processing_efficientnet.d.ts +6 -0
  142. package/types/models/efficientnet/image_processing_efficientnet.d.ts.map +1 -0
  143. package/types/models/feature_extractors.d.ts +10 -0
  144. package/types/models/feature_extractors.d.ts.map +1 -0
  145. package/types/models/florence2/processing_florence2.d.ts +39 -0
  146. package/types/models/florence2/processing_florence2.d.ts.map +1 -0
  147. package/types/models/glpn/image_processing_glpn.d.ts +4 -0
  148. package/types/models/glpn/image_processing_glpn.d.ts.map +1 -0
  149. package/types/models/idefics3/image_processing_idefics3.d.ts +40 -0
  150. package/types/models/idefics3/image_processing_idefics3.d.ts.map +1 -0
  151. package/types/models/idefics3/processing_idefics3.d.ts +19 -0
  152. package/types/models/idefics3/processing_idefics3.d.ts.map +1 -0
  153. package/types/models/image_processors.d.ts +37 -0
  154. package/types/models/image_processors.d.ts.map +1 -0
  155. package/types/models/janus/image_processing_janus.d.ts +7 -0
  156. package/types/models/janus/image_processing_janus.d.ts.map +1 -0
  157. package/types/models/janus/processing_janus.d.ts +77 -0
  158. package/types/models/janus/processing_janus.d.ts.map +1 -0
  159. package/types/models/jina_clip/image_processing_jina_clip.d.ts +5 -0
  160. package/types/models/jina_clip/image_processing_jina_clip.d.ts.map +1 -0
  161. package/types/models/jina_clip/processing_jina_clip.d.ts +9 -0
  162. package/types/models/jina_clip/processing_jina_clip.d.ts.map +1 -0
  163. package/types/models/llava_onevision/image_processing_llava_onevision.d.ts +4 -0
  164. package/types/models/llava_onevision/image_processing_llava_onevision.d.ts.map +1 -0
  165. package/types/models/mask2former/image_processing_mask2former.d.ts +4 -0
  166. package/types/models/mask2former/image_processing_mask2former.d.ts.map +1 -0
  167. package/types/models/maskformer/image_processing_maskformer.d.ts +22 -0
  168. package/types/models/maskformer/image_processing_maskformer.d.ts.map +1 -0
  169. package/types/models/mgp_str/processing_mgp_str.d.ts +64 -0
  170. package/types/models/mgp_str/processing_mgp_str.d.ts.map +1 -0
  171. package/types/models/mobilenet_v1/image_processing_mobilenet_v1.d.ts +6 -0
  172. package/types/models/mobilenet_v1/image_processing_mobilenet_v1.d.ts.map +1 -0
  173. package/types/models/mobilenet_v2/image_processing_mobilenet_v2.d.ts +6 -0
  174. package/types/models/mobilenet_v2/image_processing_mobilenet_v2.d.ts.map +1 -0
  175. package/types/models/mobilenet_v3/image_processing_mobilenet_v3.d.ts +6 -0
  176. package/types/models/mobilenet_v3/image_processing_mobilenet_v3.d.ts.map +1 -0
  177. package/types/models/mobilenet_v4/image_processing_mobilenet_v4.d.ts +6 -0
  178. package/types/models/mobilenet_v4/image_processing_mobilenet_v4.d.ts.map +1 -0
  179. package/types/models/mobilevit/image_processing_mobilevit.d.ts +6 -0
  180. package/types/models/mobilevit/image_processing_mobilevit.d.ts.map +1 -0
  181. package/types/models/nougat/image_processing_nougat.d.ts +4 -0
  182. package/types/models/nougat/image_processing_nougat.d.ts.map +1 -0
  183. package/types/models/owlv2/image_processing_owlv2.d.ts +4 -0
  184. package/types/models/owlv2/image_processing_owlv2.d.ts.map +1 -0
  185. package/types/models/owlvit/image_processing_owlvit.d.ts +10 -0
  186. package/types/models/owlvit/image_processing_owlvit.d.ts.map +1 -0
  187. package/types/models/owlvit/processing_owlvit.d.ts +8 -0
  188. package/types/models/owlvit/processing_owlvit.d.ts.map +1 -0
  189. package/types/models/processors.d.ts +13 -0
  190. package/types/models/processors.d.ts.map +1 -0
  191. package/types/models/pvt/image_processing_pvt.d.ts +4 -0
  192. package/types/models/pvt/image_processing_pvt.d.ts.map +1 -0
  193. package/types/models/pyannote/feature_extraction_pyannote.d.ts +13 -0
  194. package/types/models/pyannote/feature_extraction_pyannote.d.ts.map +1 -0
  195. package/types/models/pyannote/processing_pyannote.d.ts +30 -0
  196. package/types/models/pyannote/processing_pyannote.d.ts.map +1 -0
  197. package/types/models/qwen2_vl/image_processing_qwen2_vl.d.ts +11 -0
  198. package/types/models/qwen2_vl/image_processing_qwen2_vl.d.ts.map +1 -0
  199. package/types/models/qwen2_vl/processing_qwen2_vl.d.ts +17 -0
  200. package/types/models/qwen2_vl/processing_qwen2_vl.d.ts.map +1 -0
  201. package/types/models/rt_detr/image_processing_rt_detr.d.ts +8 -0
  202. package/types/models/rt_detr/image_processing_rt_detr.d.ts.map +1 -0
  203. package/types/models/sam/image_processing_sam.d.ts +103 -0
  204. package/types/models/sam/image_processing_sam.d.ts.map +1 -0
  205. package/types/models/sam/processing_sam.d.ts +9 -0
  206. package/types/models/sam/processing_sam.d.ts.map +1 -0
  207. package/types/models/seamless_m4t/feature_extraction_seamless_m4t.d.ts +34 -0
  208. package/types/models/seamless_m4t/feature_extraction_seamless_m4t.d.ts.map +1 -0
  209. package/types/models/segformer/image_processing_segformer.d.ts +10 -0
  210. package/types/models/segformer/image_processing_segformer.d.ts.map +1 -0
  211. package/types/models/siglip/image_processing_siglip.d.ts +4 -0
  212. package/types/models/siglip/image_processing_siglip.d.ts.map +1 -0
  213. package/types/models/speecht5/feature_extraction_speecht5.d.ts +4 -0
  214. package/types/models/speecht5/feature_extraction_speecht5.d.ts.map +1 -0
  215. package/types/models/speecht5/processing_speecht5.d.ts +14 -0
  216. package/types/models/speecht5/processing_speecht5.d.ts.map +1 -0
  217. package/types/models/swin2sr/image_processing_swin2sr.d.ts +5 -0
  218. package/types/models/swin2sr/image_processing_swin2sr.d.ts.map +1 -0
  219. package/types/models/vit/image_processing_vit.d.ts +6 -0
  220. package/types/models/vit/image_processing_vit.d.ts.map +1 -0
  221. package/types/models/vitmatte/image_processing_vitmatte.d.ts +12 -0
  222. package/types/models/vitmatte/image_processing_vitmatte.d.ts.map +1 -0
  223. package/types/models/vitpose/image_processing_vitpose.d.ts +26 -0
  224. package/types/models/vitpose/image_processing_vitpose.d.ts.map +1 -0
  225. package/types/models/wav2vec2/feature_extraction_wav2vec2.d.ts +19 -0
  226. package/types/models/wav2vec2/feature_extraction_wav2vec2.d.ts.map +1 -0
  227. package/types/models/wav2vec2/processing_wav2vec2.d.ts +12 -0
  228. package/types/models/wav2vec2/processing_wav2vec2.d.ts.map +1 -0
  229. package/types/models/wespeaker/feature_extraction_wespeaker.d.ts +23 -0
  230. package/types/models/wespeaker/feature_extraction_wespeaker.d.ts.map +1 -0
  231. package/types/models/whisper/feature_extraction_whisper.d.ts +21 -0
  232. package/types/models/whisper/feature_extraction_whisper.d.ts.map +1 -0
  233. package/types/models/whisper/processing_whisper.d.ts +17 -0
  234. package/types/models/whisper/processing_whisper.d.ts.map +1 -0
  235. package/types/models/yolos/image_processing_yolos.d.ts +10 -0
  236. package/types/models/yolos/image_processing_yolos.d.ts.map +1 -0
  237. package/types/models.d.ts +150 -0
  238. package/types/models.d.ts.map +1 -1
  239. package/types/pipelines.d.ts +2 -3
  240. package/types/pipelines.d.ts.map +1 -1
  241. package/types/tokenizers.d.ts +3 -0
  242. package/types/tokenizers.d.ts.map +1 -1
  243. package/types/transformers.d.ts +10 -1
  244. package/types/utils/constants.d.ts +6 -0
  245. package/types/utils/constants.d.ts.map +1 -1
  246. package/types/utils/core.d.ts +65 -3
  247. package/types/utils/core.d.ts.map +1 -1
  248. package/types/utils/dtypes.d.ts +3 -2
  249. package/types/utils/dtypes.d.ts.map +1 -1
  250. package/types/utils/hub.d.ts +1 -1
  251. package/types/utils/hub.d.ts.map +1 -1
  252. package/types/utils/image.d.ts +14 -2
  253. package/types/utils/image.d.ts.map +1 -1
  254. package/types/utils/tensor.d.ts +39 -4
  255. package/types/utils/tensor.d.ts.map +1 -1
  256. package/src/processors.js +0 -2655
  257. package/types/processors.d.ts +0 -924
  258. package/types/processors.d.ts.map +0 -1
@@ -0,0 +1,77 @@
1
+ export class VLChatProcessor extends Processor {
2
+ static image_processor_class: typeof AutoImageProcessor;
3
+ static tokenizer_class: typeof AutoTokenizer;
4
+ constructor(config: any, components: any);
5
+ image_tag: any;
6
+ image_start_tag: any;
7
+ image_end_tag: any;
8
+ num_image_tokens: any;
9
+ /**
10
+ * @typedef {Object} MultimodalMessageProperties Additional properties for multimodal messages.
11
+ * @property {(RawImage | string | URL)[]} [images] The images in the message.
12
+ * @typedef {(import('../../tokenizers.js').Message & MultimodalMessageProperties)[]} MultimodalConversation The conversation possibly containing multimodal inputs.
13
+ */
14
+ /**
15
+ * @typedef {Object} VLCChatProcessorResult The processed input.
16
+ * @property {Tensor} input_ids The input IDs.
17
+ * @property {Tensor} attention_mask The attention mask.
18
+ * @property {Tensor} images_seq_mask The image sequence mask.
19
+ * @property {Tensor} images_emb_mask The image embedding mask.
20
+ */
21
+ /**
22
+ * @param {MultimodalConversation} conversation The chat messages to process.
23
+ * @param {Object} options Additional options for processing.
24
+ * @param {RawImage|RawImage[]} [options.images] The images to process, if not set in the conversation.
25
+ * @param {string} [options.chat_template="default"] The chat template to use.
26
+ * @returns {Promise<VLCChatProcessorResult | VLCChatProcessorResult & import('../../base/image_processors_utils.js').ImageProcessorResult>} The processed input.
27
+ */
28
+ _call(conversation: (import("../../tokenizers.js").Message & {
29
+ /**
30
+ * The images in the message.
31
+ */
32
+ images?: (RawImage | string | URL)[];
33
+ })[], { images, chat_template, }?: {
34
+ images?: RawImage | RawImage[];
35
+ chat_template?: string;
36
+ }): Promise<{
37
+ /**
38
+ * The input IDs.
39
+ */
40
+ input_ids: Tensor;
41
+ /**
42
+ * The attention mask.
43
+ */
44
+ attention_mask: Tensor;
45
+ /**
46
+ * The image sequence mask.
47
+ */
48
+ images_seq_mask: Tensor;
49
+ /**
50
+ * The image embedding mask.
51
+ */
52
+ images_emb_mask: Tensor;
53
+ } | ({
54
+ /**
55
+ * The input IDs.
56
+ */
57
+ input_ids: Tensor;
58
+ /**
59
+ * The attention mask.
60
+ */
61
+ attention_mask: Tensor;
62
+ /**
63
+ * The image sequence mask.
64
+ */
65
+ images_seq_mask: Tensor;
66
+ /**
67
+ * The image embedding mask.
68
+ */
69
+ images_emb_mask: Tensor;
70
+ } & import("../../base/image_processors_utils.js").ImageProcessorResult)>;
71
+ }
72
+ import { Processor } from "../../base/processing_utils.js";
73
+ import { RawImage } from "../../utils/image.js";
74
+ import { Tensor } from "../../utils/tensor.js";
75
+ import { AutoImageProcessor } from "../auto/image_processing_auto.js";
76
+ import { AutoTokenizer } from "../../tokenizers.js";
77
+ //# sourceMappingURL=processing_janus.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"processing_janus.d.ts","sourceRoot":"","sources":["../../../src/models/janus/processing_janus.js"],"names":[],"mappings":"AAQA;IACI,wDAAiD;IACjD,6CAAsC;IAGtC,0CAOC;IAJG,eAAsC;IACtC,qBAAkD;IAClD,mBAA8C;IAC9C,sBAAoD;IAGxD;;;;OAIG;IAEH;;;;;;OAMG;IAEH;;;;;;OAMG;IACH;;;;iBAnBc,CAAC,QAAQ,GAAG,MAAM,GAAG,GAAG,CAAC,EAAE;;QAeA,MAAM,GAApC,QAAQ,GAAC,QAAQ,EAAE;QACF,aAAa,GAA9B,MAAM;;;;;mBAVH,MAAM;;;;wBACN,MAAM;;;;yBACN,MAAM;;;;yBACN,MAAM;;;;;mBAHN,MAAM;;;;wBACN,MAAM;;;;yBACN,MAAM;;;;yBACN,MAAM;8EAwFnB;CACJ;0BAzHyB,gCAAgC;yBAKjC,sBAAsB;uBADxB,uBAAuB;mCAHX,kCAAkC;8BACvC,qBAAqB"}
@@ -0,0 +1,5 @@
1
+ export class JinaCLIPImageProcessor extends ImageProcessor {
2
+ constructor(config: any);
3
+ }
4
+ import { ImageProcessor } from "../../base/image_processors_utils.js";
5
+ //# sourceMappingURL=image_processing_jina_clip.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"image_processing_jina_clip.d.ts","sourceRoot":"","sources":["../../../src/models/jina_clip/image_processing_jina_clip.js"],"names":[],"mappings":"AAIA;IACI,yBAmBC;CACJ;+BAvBM,sCAAsC"}
@@ -0,0 +1,9 @@
1
+ export class JinaCLIPProcessor extends Processor {
2
+ static tokenizer_class: typeof AutoTokenizer;
3
+ static image_processor_class: typeof AutoImageProcessor;
4
+ _call(text?: any, images?: any, kwargs?: {}): Promise<any>;
5
+ }
6
+ import { Processor } from "../../base/processing_utils.js";
7
+ import { AutoTokenizer } from "../../tokenizers.js";
8
+ import { AutoImageProcessor } from "../auto/image_processing_auto.js";
9
+ //# sourceMappingURL=processing_jina_clip.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"processing_jina_clip.d.ts","sourceRoot":"","sources":["../../../src/models/jina_clip/processing_jina_clip.js"],"names":[],"mappings":"AAKA;IACI,6CAAsC;IACtC,wDAAiD;IAEjD,2DAaC;CACJ;0BAtByB,gCAAgC;8BAE5B,qBAAqB;mCADhB,kCAAkC"}
@@ -0,0 +1,4 @@
1
+ export class LlavaOnevisionImageProcessor extends ImageProcessor {
2
+ }
3
+ import { ImageProcessor } from "../../base/image_processors_utils.js";
4
+ //# sourceMappingURL=image_processing_llava_onevision.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"image_processing_llava_onevision.d.ts","sourceRoot":"","sources":["../../../src/models/llava_onevision/image_processing_llava_onevision.js"],"names":[],"mappings":"AAIA;CAAmE;+BAF5D,sCAAsC"}
@@ -0,0 +1,4 @@
1
+ export class Mask2FormerImageProcessor extends MaskFormerImageProcessor {
2
+ }
3
+ import { MaskFormerImageProcessor } from "../maskformer/image_processing_maskformer.js";
4
+ //# sourceMappingURL=image_processing_mask2former.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"image_processing_mask2former.d.ts","sourceRoot":"","sources":["../../../src/models/mask2former/image_processing_mask2former.js"],"names":[],"mappings":"AAIA;CAA2E;yCAHlC,8CAA8C"}
@@ -0,0 +1,22 @@
1
+ export class MaskFormerImageProcessor extends ImageProcessor {
2
+ post_process_panoptic_segmentation(outputs: any, threshold?: number, mask_threshold?: number, overlap_mask_area_threshold?: number, label_ids_to_fuse?: Set<number>, target_sizes?: [number, number][]): {
3
+ segmentation: import("../../transformers.js").Tensor;
4
+ segments_info: {
5
+ id: number;
6
+ label_id: number;
7
+ score: number;
8
+ }[];
9
+ }[];
10
+ post_process_instance_segmentation(outputs: any, threshold?: number, target_sizes?: [number, number][]): {
11
+ segmentation: import("../../transformers.js").Tensor;
12
+ segments_info: {
13
+ id: number;
14
+ label_id: number;
15
+ score: number;
16
+ }[];
17
+ }[];
18
+ }
19
+ export class MaskFormerFeatureExtractor extends MaskFormerImageProcessor {
20
+ }
21
+ import { ImageProcessor } from "../../base/image_processors_utils.js";
22
+ //# sourceMappingURL=image_processing_maskformer.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"image_processing_maskformer.d.ts","sourceRoot":"","sources":["../../../src/models/maskformer/image_processing_maskformer.js"],"names":[],"mappings":"AAMA;IAYure;;;;;;;QAA4qG;IAAA;;;;;;;QAAktB;CAFpjmB;AACD;CAA4E;+BAbrE,sCAAsC"}
@@ -0,0 +1,64 @@
1
+ export class MgpstrProcessor extends Processor {
2
+ static tokenizer_class: typeof AutoTokenizer;
3
+ static image_processor_class: typeof AutoImageProcessor;
4
+ /**
5
+ * @returns {import('../../tokenizers.js').MgpstrTokenizer} The character tokenizer.
6
+ */
7
+ get char_tokenizer(): import("../../tokenizers.js").MgpstrTokenizer;
8
+ /**
9
+ * @returns {import('../../tokenizers.js').GPT2Tokenizer} The BPE tokenizer.
10
+ */
11
+ get bpe_tokenizer(): import("../../tokenizers.js").GPT2Tokenizer;
12
+ /**
13
+ * @returns {import('../../tokenizers.js').BertTokenizer} The WordPiece tokenizer.
14
+ */
15
+ get wp_tokenizer(): import("../../tokenizers.js").BertTokenizer;
16
+ /**
17
+ * Helper function to decode the model prediction logits.
18
+ * @param {import('../../utils/tensor.js').Tensor} pred_logits Model prediction logits.
19
+ * @param {string} format Type of model prediction. Must be one of ['char', 'bpe', 'wp'].
20
+ * @returns {[string[], number[]]} The decoded sentences and their confidence scores.
21
+ */
22
+ _decode_helper(pred_logits: import('../../utils/tensor.js').Tensor, format: string): [string[], number[]];
23
+ /**
24
+ * Convert a list of lists of char token ids into a list of strings by calling char tokenizer.
25
+ * @param {number[][]} sequences List of tokenized input ids.
26
+ * @returns {string[]} The list of char decoded sentences.
27
+ */
28
+ char_decode(sequences: number[][]): string[];
29
+ /**
30
+ * Convert a list of lists of BPE token ids into a list of strings by calling BPE tokenizer.
31
+ * @param {number[][]} sequences List of tokenized input ids.
32
+ * @returns {string[]} The list of BPE decoded sentences.
33
+ */
34
+ bpe_decode(sequences: number[][]): string[];
35
+ /**
36
+ * Convert a list of lists of word piece token ids into a list of strings by calling word piece tokenizer.
37
+ * @param {number[][]} sequences List of tokenized input ids.
38
+ * @returns {string[]} The list of wp decoded sentences.
39
+ */
40
+ wp_decode(sequences: number[][]): string[];
41
+ /**
42
+ * Convert a list of lists of token ids into a list of strings by calling decode.
43
+ * @param {import('../../utils/tensor.js').Tensor[]} sequences List of tokenized input ids.
44
+ * @returns {{generated_text: string[], scores: number[], char_preds: string[], bpe_preds: string[], wp_preds: string[]}}
45
+ * Dictionary of all the outputs of the decoded results.
46
+ * - generated_text: The final results after fusion of char, bpe, and wp.
47
+ * - scores: The final scores after fusion of char, bpe, and wp.
48
+ * - char_preds: The list of character decoded sentences.
49
+ * - bpe_preds: The list of BPE decoded sentences.
50
+ * - wp_preds: The list of wp decoded sentences.
51
+ */
52
+ batch_decode([char_logits, bpe_logits, wp_logits]: import('../../utils/tensor.js').Tensor[]): {
53
+ generated_text: string[];
54
+ scores: number[];
55
+ char_preds: string[];
56
+ bpe_preds: string[];
57
+ wp_preds: string[];
58
+ };
59
+ _call(images: any, text?: any): Promise<any>;
60
+ }
61
+ import { Processor } from "../../base/processing_utils.js";
62
+ import { AutoTokenizer } from "../../tokenizers.js";
63
+ import { AutoImageProcessor } from "../auto/image_processing_auto.js";
64
+ //# sourceMappingURL=processing_mgp_str.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"processing_mgp_str.d.ts","sourceRoot":"","sources":["../../../src/models/mgp_str/processing_mgp_str.js"],"names":[],"mappings":"AAUA;IACI,6CAAsC;IACtC,wDAAiD;IAEjD;;OAEG;IACH,oEAEC;IAED;;OAEG;IACH,iEAEC;IAED;;OAEG;IACH,gEAEC;IAED;;;;;OAKG;IACH,4BAJW,OAAO,uBAAuB,EAAE,MAAM,UACtC,MAAM,GACJ,CAAC,MAAM,EAAE,EAAE,MAAM,EAAE,CAAC,CA0ChC;IAED;;;;OAIG;IACH,uBAHW,MAAM,EAAE,EAAE,GACR,MAAM,EAAE,CAIpB;IAED;;;;OAIG;IACH,sBAHW,MAAM,EAAE,EAAE,GACR,MAAM,EAAE,CAIpB;IAED;;;;OAIG;IACH,qBAHW,MAAM,EAAE,EAAE,GACR,MAAM,EAAE,CAIpB;IAED;;;;;;;;;;OAUG;IACH,mDATW,OAAO,uBAAuB,EAAE,MAAM,EAAE;wBACrB,MAAM,EAAE;gBAAU,MAAM,EAAE;oBAAc,MAAM,EAAE;mBAAa,MAAM,EAAE;kBAAY,MAAM,EAAE;MA4BtH;IAmBD,6CAQC;CACJ;0BAzKyB,gCAAgC;8BAE5B,qBAAqB;mCADhB,kCAAkC"}
@@ -0,0 +1,6 @@
1
+ export class MobileNetV1ImageProcessor extends ImageProcessor {
2
+ }
3
+ export class MobileNetV1FeatureExtractor extends MobileNetV1ImageProcessor {
4
+ }
5
+ import { ImageProcessor } from "../../base/image_processors_utils.js";
6
+ //# sourceMappingURL=image_processing_mobilenet_v1.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"image_processing_mobilenet_v1.d.ts","sourceRoot":"","sources":["../../../src/models/mobilenet_v1/image_processing_mobilenet_v1.js"],"names":[],"mappings":"AAKA;CAAiE;AACjE;CAA8E;+BAJvE,sCAAsC"}
@@ -0,0 +1,6 @@
1
+ export class MobileNetV2ImageProcessor extends ImageProcessor {
2
+ }
3
+ export class MobileNetV2FeatureExtractor extends MobileNetV2ImageProcessor {
4
+ }
5
+ import { ImageProcessor } from "../../base/image_processors_utils.js";
6
+ //# sourceMappingURL=image_processing_mobilenet_v2.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"image_processing_mobilenet_v2.d.ts","sourceRoot":"","sources":["../../../src/models/mobilenet_v2/image_processing_mobilenet_v2.js"],"names":[],"mappings":"AAKA;CAAiE;AACjE;CAA8E;+BAJvE,sCAAsC"}
@@ -0,0 +1,6 @@
1
+ export class MobileNetV3ImageProcessor extends ImageProcessor {
2
+ }
3
+ export class MobileNetV3FeatureExtractor extends MobileNetV3ImageProcessor {
4
+ }
5
+ import { ImageProcessor } from "../../base/image_processors_utils.js";
6
+ //# sourceMappingURL=image_processing_mobilenet_v3.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"image_processing_mobilenet_v3.d.ts","sourceRoot":"","sources":["../../../src/models/mobilenet_v3/image_processing_mobilenet_v3.js"],"names":[],"mappings":"AAKA;CAAiE;AACjE;CAA8E;+BAJvE,sCAAsC"}
@@ -0,0 +1,6 @@
1
+ export class MobileNetV4ImageProcessor extends ImageProcessor {
2
+ }
3
+ export class MobileNetV4FeatureExtractor extends MobileNetV4ImageProcessor {
4
+ }
5
+ import { ImageProcessor } from "../../base/image_processors_utils.js";
6
+ //# sourceMappingURL=image_processing_mobilenet_v4.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"image_processing_mobilenet_v4.d.ts","sourceRoot":"","sources":["../../../src/models/mobilenet_v4/image_processing_mobilenet_v4.js"],"names":[],"mappings":"AAKA;CAAiE;AACjE;CAA8E;+BAJvE,sCAAsC"}
@@ -0,0 +1,6 @@
1
+ export class MobileViTImageProcessor extends ImageProcessor {
2
+ }
3
+ export class MobileViTFeatureExtractor extends MobileViTImageProcessor {
4
+ }
5
+ import { ImageProcessor } from "../../base/image_processors_utils.js";
6
+ //# sourceMappingURL=image_processing_mobilevit.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"image_processing_mobilevit.d.ts","sourceRoot":"","sources":["../../../src/models/mobilevit/image_processing_mobilevit.js"],"names":[],"mappings":"AAIA;CAA+D;AAC/D;CAA0E;+BAHnE,sCAAsC"}
@@ -0,0 +1,4 @@
1
+ export class NougatImageProcessor extends DonutImageProcessor {
2
+ }
3
+ import { DonutImageProcessor } from "../donut/image_processing_donut.js";
4
+ //# sourceMappingURL=image_processing_nougat.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"image_processing_nougat.d.ts","sourceRoot":"","sources":["../../../src/models/nougat/image_processing_nougat.js"],"names":[],"mappings":"AAIA;CAAiE;oCAH7B,oCAAoC"}
@@ -0,0 +1,4 @@
1
+ export class Owlv2ImageProcessor extends OwlViTImageProcessor {
2
+ }
3
+ import { OwlViTImageProcessor } from "../owlvit/image_processing_owlvit.js";
4
+ //# sourceMappingURL=image_processing_owlv2.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"image_processing_owlv2.d.ts","sourceRoot":"","sources":["../../../src/models/owlv2/image_processing_owlv2.js"],"names":[],"mappings":"AAIA;CAAiE;qCAH5B,sCAAsC"}
@@ -0,0 +1,10 @@
1
+ export class OwlViTImageProcessor extends ImageProcessor {
2
+ post_process_object_detection(outputs: {
3
+ logits: import("../../transformers.js").Tensor;
4
+ pred_boxes: import("../../transformers.js").Tensor;
5
+ }, threshold?: number, target_sizes?: [number, number][], is_zero_shot?: boolean): any[];
6
+ }
7
+ export class OwlViTFeatureExtractor extends OwlViTImageProcessor {
8
+ }
9
+ import { ImageProcessor } from "../../base/image_processors_utils.js";
10
+ //# sourceMappingURL=image_processing_owlvit.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"image_processing_owlvit.d.ts","sourceRoot":"","sources":["../../../src/models/owlvit/image_processing_owlvit.js"],"names":[],"mappings":"AAKA;IAO4tE;;;6FAAu6F;CAFloK;AACD;CAAoE;+BAR7D,sCAAsC"}
@@ -0,0 +1,8 @@
1
+ export class OwlViTProcessor extends Processor {
2
+ static tokenizer_class: typeof AutoTokenizer;
3
+ static image_processor_class: typeof AutoImageProcessor;
4
+ }
5
+ import { Processor } from "../../base/processing_utils.js";
6
+ import { AutoTokenizer } from "../../tokenizers.js";
7
+ import { AutoImageProcessor } from "../auto/image_processing_auto.js";
8
+ //# sourceMappingURL=processing_owlvit.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"processing_owlvit.d.ts","sourceRoot":"","sources":["../../../src/models/owlvit/processing_owlvit.js"],"names":[],"mappings":"AAGA;IACI,6CAAsC;IACtC,wDAAiD;CACpD;0BANyB,gCAAgC;8BAE5B,qBAAqB;mCADhB,kCAAkC"}
@@ -0,0 +1,13 @@
1
+ export * from "./florence2/processing_florence2.js";
2
+ export * from "./mgp_str/processing_mgp_str.js";
3
+ export * from "./idefics3/processing_idefics3.js";
4
+ export * from "./janus/processing_janus.js";
5
+ export * from "./jina_clip/processing_jina_clip.js";
6
+ export * from "./owlvit/processing_owlvit.js";
7
+ export * from "./pyannote/processing_pyannote.js";
8
+ export * from "./qwen2_vl/processing_qwen2_vl.js";
9
+ export * from "./sam/processing_sam.js";
10
+ export * from "./speecht5/processing_speecht5.js";
11
+ export * from "./wav2vec2/processing_wav2vec2.js";
12
+ export * from "./whisper/processing_whisper.js";
13
+ //# sourceMappingURL=processors.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"processors.d.ts","sourceRoot":"","sources":["../../src/models/processors.js"],"names":[],"mappings":""}
@@ -0,0 +1,4 @@
1
+ export class PvtImageProcessor extends ImageProcessor {
2
+ }
3
+ import { ImageProcessor } from "../../base/image_processors_utils.js";
4
+ //# sourceMappingURL=image_processing_pvt.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"image_processing_pvt.d.ts","sourceRoot":"","sources":["../../../src/models/pvt/image_processing_pvt.js"],"names":[],"mappings":"AAIA;CAAyD;+BAFlD,sCAAsC"}
@@ -0,0 +1,13 @@
1
+ export class PyAnnoteFeatureExtractor extends FeatureExtractor {
2
+ /**
3
+ * Asynchronously extracts features from a given audio using the provided configuration.
4
+ * @param {Float32Array|Float64Array} audio The audio data as a Float32Array/Float64Array.
5
+ * @returns {Promise<{ input_values: Tensor; }>} The extracted input features.
6
+ */
7
+ _call(audio: Float32Array | Float64Array): Promise<{
8
+ input_values: Tensor;
9
+ }>;
10
+ }
11
+ import { FeatureExtractor } from '../../base/feature_extraction_utils.js';
12
+ import { Tensor } from '../../utils/tensor.js';
13
+ //# sourceMappingURL=feature_extraction_pyannote.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"feature_extraction_pyannote.d.ts","sourceRoot":"","sources":["../../../src/models/pyannote/feature_extraction_pyannote.js"],"names":[],"mappings":"AAIA;IACI;;;;OAIG;IACH,aAHW,YAAY,GAAC,YAAY,GACvB,QAAQ;QAAE,YAAY,EAAE,MAAM,CAAC;KAAE,CAAC,CAiB9C;CAEJ;iCA3BuD,wCAAwC;uBACzE,uBAAuB"}
@@ -0,0 +1,30 @@
1
+ export class PyAnnoteProcessor extends Processor {
2
+ static feature_extractor_class: typeof AutoFeatureExtractor;
3
+ /**
4
+ * Calls the feature_extractor function with the given audio input.
5
+ * @param {any} audio The audio input to extract features from.
6
+ * @returns {Promise<any>} A Promise that resolves with the extracted features.
7
+ */
8
+ _call(audio: any): Promise<any>;
9
+ /**
10
+ * NOTE: Can return fractional values. `Math.ceil` will ensure correct value.
11
+ * @param {number} samples The number of frames in the audio.
12
+ * @returns {number} The number of frames in the audio.
13
+ */
14
+ samples_to_frames(samples: number): number;
15
+ /**
16
+ * Post-processes the speaker diarization logits output by the model.
17
+ * @param {import('../../utils/tensor.js').Tensor} logits The speaker diarization logits output by the model.
18
+ * @param {number} num_samples Number of samples in the input audio.
19
+ * @returns {Array<Array<{ id: number, start: number, end: number, confidence: number }>>} The post-processed speaker diarization results.
20
+ */
21
+ post_process_speaker_diarization(logits: import('../../utils/tensor.js').Tensor, num_samples: number): Array<Array<{
22
+ id: number;
23
+ start: number;
24
+ end: number;
25
+ confidence: number;
26
+ }>>;
27
+ }
28
+ import { Processor } from '../../base/processing_utils.js';
29
+ import { AutoFeatureExtractor } from '../auto/feature_extraction_auto.js';
30
+ //# sourceMappingURL=processing_pyannote.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"processing_pyannote.d.ts","sourceRoot":"","sources":["../../../src/models/pyannote/processing_pyannote.js"],"names":[],"mappings":"AAIA;IACI,4DAAqD;IAErD;;;;OAIG;IACH,aAHW,GAAG,GACD,QAAQ,GAAG,CAAC,CAIxB;IAED;;;;OAIG;IACH,2BAHW,MAAM,GACJ,MAAM,CAIlB;IAED;;;;;OAKG;IACH,yCAJW,OAAO,uBAAuB,EAAE,MAAM,eACtC,MAAM,GACJ,MAAM,MAAM;QAAE,EAAE,EAAE,MAAM,CAAC;QAAC,KAAK,EAAE,MAAM,CAAC;QAAC,GAAG,EAAE,MAAM,CAAC;QAAC,UAAU,EAAE,MAAM,CAAA;KAAE,CAAC,CAAC,CAwCxF;CACJ;0BAtEyB,gCAAgC;qCACrB,oCAAoC"}
@@ -0,0 +1,11 @@
1
+ export class Qwen2VLImageProcessor extends ImageProcessor {
2
+ _call(images: any, ...args: any[]): Promise<{
3
+ pixel_values: Tensor;
4
+ image_grid_thw: Tensor;
5
+ original_sizes: import("../../base/image_processors_utils.js").HeightWidth[];
6
+ reshaped_input_sizes: import("../../base/image_processors_utils.js").HeightWidth[];
7
+ }>;
8
+ }
9
+ import { ImageProcessor } from "../../base/image_processors_utils.js";
10
+ import { Tensor } from "../../utils/tensor.js";
11
+ //# sourceMappingURL=image_processing_qwen2_vl.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"image_processing_qwen2_vl.d.ts","sourceRoot":"","sources":["../../../src/models/qwen2_vl/image_processing_qwen2_vl.js"],"names":[],"mappings":"AAKA;IACI;;;;;OA2CC;CACJ;+BAhDM,sCAAsC;uBACjB,uBAAuB"}
@@ -0,0 +1,17 @@
1
+ export class Qwen2VLProcessor extends Processor {
2
+ static image_processor_class: typeof AutoImageProcessor;
3
+ static tokenizer_class: typeof AutoTokenizer;
4
+ /**
5
+ *
6
+ * @param {string|string[]} text
7
+ * @param {RawImage|RawImage[]} images
8
+ * @param {...any} args
9
+ * @returns {Promise<any>}
10
+ */
11
+ _call(text: string | string[], images?: RawImage | RawImage[], ...args: any[]): Promise<any>;
12
+ }
13
+ import { Processor } from "../../base/processing_utils.js";
14
+ import { RawImage } from "../../utils/image.js";
15
+ import { AutoImageProcessor } from "../auto/image_processing_auto.js";
16
+ import { AutoTokenizer } from "../../tokenizers.js";
17
+ //# sourceMappingURL=processing_qwen2_vl.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"processing_qwen2_vl.d.ts","sourceRoot":"","sources":["../../../src/models/qwen2_vl/processing_qwen2_vl.js"],"names":[],"mappings":"AAKA;IACI,wDAAiD;IACjD,6CAAsC;IAEtC;;;;;;OAMG;IACH,YALW,MAAM,GAAC,MAAM,EAAE,WACf,QAAQ,GAAC,QAAQ,EAAE,WACf,GAAG,KACL,QAAQ,GAAG,CAAC,CAoCxB;CACJ;0BAnDyB,gCAAgC;yBAGjC,sBAAsB;mCAFZ,kCAAkC;8BACvC,qBAAqB"}
@@ -0,0 +1,8 @@
1
+ export class RTDetrImageProcessor extends ImageProcessor {
2
+ post_process_object_detection(outputs: {
3
+ logits: import("../../transformers.js").Tensor;
4
+ pred_boxes: import("../../transformers.js").Tensor;
5
+ }, threshold?: number, target_sizes?: [number, number][], is_zero_shot?: boolean): any[];
6
+ }
7
+ import { ImageProcessor } from "../../base/image_processors_utils.js";
8
+ //# sourceMappingURL=image_processing_rt_detr.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"image_processing_rt_detr.d.ts","sourceRoot":"","sources":["../../../src/models/rt_detr/image_processing_rt_detr.js"],"names":[],"mappings":"AAMA;IAMgyE;;;6FAAu6F;CADtsK;+BARM,sCAAsC"}
@@ -0,0 +1,103 @@
1
+ /**
2
+ * @typedef {object} SamImageProcessorResult
3
+ * @property {Tensor} pixel_values
4
+ * @property {import("../../base/image_processors_utils.js").HeightWidth[]} original_sizes
5
+ * @property {import("../../base/image_processors_utils.js").HeightWidth[]} reshaped_input_sizes
6
+ * @property {Tensor} [input_points]
7
+ * @property {Tensor} [input_labels]
8
+ * @property {Tensor} [input_boxes]
9
+ */
10
+ export class SamImageProcessor extends ImageProcessor {
11
+ /**
12
+ *
13
+ * @param {any} input_points
14
+ * @param {import("../../base/image_processors_utils.js").HeightWidth[]} original_sizes
15
+ * @param {import("../../base/image_processors_utils.js").HeightWidth[]} reshaped_input_sizes
16
+ * @returns {Tensor}
17
+ */
18
+ reshape_input_points(input_points: any, original_sizes: import("../../base/image_processors_utils.js").HeightWidth[], reshaped_input_sizes: import("../../base/image_processors_utils.js").HeightWidth[], is_bounding_box?: boolean): Tensor;
19
+ /**
20
+ *
21
+ * @param {any} input_labels
22
+ * @param {Tensor} input_points
23
+ * @returns {Tensor}
24
+ */
25
+ add_input_labels(input_labels: any, input_points: Tensor): Tensor;
26
+ /**
27
+ * @param {any[]} images The URL(s) of the image(s) to extract features from.
28
+ * @param {Object} [options] Additional options for the processor.
29
+ * @param {any} [options.input_points=null] A 3D or 4D array, representing the input points provided by the user.
30
+ * - 3D: `[point_batch_size, nb_points_per_image, 2]`. In this case, `batch_size` is assumed to be 1.
31
+ * - 4D: `[batch_size, point_batch_size, nb_points_per_image, 2]`.
32
+ * @param {any} [options.input_labels=null] A 2D or 3D array, representing the input labels for the points, used by the prompt encoder to encode the prompt.
33
+ * - 2D: `[point_batch_size, nb_points_per_image]`. In this case, `batch_size` is assumed to be 1.
34
+ * - 3D: `[batch_size, point_batch_size, nb_points_per_image]`.
35
+ * @param {number[][][]} [options.input_boxes=null] A 3D array of shape `(batch_size, num_boxes, 4)`, representing the input boxes provided by the user.
36
+ * This is used by the prompt encoder to encode the prompt. Generally yields to much better generated masks.
37
+ * The processor will generate a tensor, with each dimension corresponding respectively to the image batch size,
38
+ * the number of boxes per image and the coordinates of the top left and botton right point of the box.
39
+ * In the order (`x1`, `y1`, `x2`, `y2`):
40
+ * - `x1`: the x coordinate of the top left point of the input box
41
+ * - `y1`: the y coordinate of the top left point of the input box
42
+ * - `x2`: the x coordinate of the bottom right point of the input box
43
+ * - `y2`: the y coordinate of the bottom right point of the input box
44
+ * @returns {Promise<SamImageProcessorResult>}
45
+ */
46
+ _call(images: any[], { input_points, input_labels, input_boxes }?: {
47
+ input_points?: any;
48
+ input_labels?: any;
49
+ input_boxes?: number[][][];
50
+ }): Promise<SamImageProcessorResult>;
51
+ /**
52
+ * Remove padding and upscale masks to the original image size.
53
+ * @param {Tensor} masks Batched masks from the mask_decoder in (batch_size, num_channels, height, width) format.
54
+ * @param {[number, number][]} original_sizes The original sizes of each image before it was resized to the model's expected input shape, in (height, width) format.
55
+ * @param {[number, number][]} reshaped_input_sizes The size of each image as it is fed to the model, in (height, width) format. Used to remove padding.
56
+ * @param {Object} options Optional parameters for post-processing.
57
+ * @param {number} [options.mask_threshold] The threshold to use for binarizing the masks.
58
+ * @param {boolean} [options.binarize] Whether to binarize the masks.
59
+ * @param {Object} [options.pad_size] The target size the images were padded to before being passed to the model. If `null`, the target size is assumed to be the processor's `pad_size`.
60
+ * @param {number} [options.pad_size.height] The height the images were padded to.
61
+ * @param {number} [options.pad_size.width] The width the images were padded to.
62
+ * @returns {Promise<Tensor[]>} Batched masks in batch_size, num_channels, height, width) format, where (height, width) is given by original_size.
63
+ */
64
+ post_process_masks(masks: Tensor, original_sizes: [number, number][], reshaped_input_sizes: [number, number][], { mask_threshold, binarize, pad_size, }?: {
65
+ mask_threshold?: number;
66
+ binarize?: boolean;
67
+ pad_size?: {
68
+ height?: number;
69
+ width?: number;
70
+ };
71
+ }): Promise<Tensor[]>;
72
+ /**
73
+ * Generates a list of crop boxes of different sizes. Each layer has (2**i)**2 boxes for the ith layer.
74
+ * @param {import("../../utils/image.js").RawImage} image Input original image
75
+ * @param {number} target_size Target size of the resized image
76
+ * @param {Object} options Options for generating crop boxes
77
+ * @param {number} [options.crop_n_layers] If >0, mask prediction will be run again on crops of the image.
78
+ * Sets the number of layers to run, where each layer has 2**i_layer number of image crops.
79
+ * @param {number} [options.overlap_ratio] Sets the degree to which crops overlap. In the first crop layer,
80
+ * crops will overlap by this fraction of the image length. Later layers with more crops scale down this overlap.
81
+ * @param {number} [options.points_per_crop] Number of points to sample from each crop.
82
+ * @param {number} [options.crop_n_points_downscale_factor] The number of points-per-side sampled in layer n is
83
+ * scaled down by crop_n_points_downscale_factor**n.
84
+ * @returns {Object} An object containing the crop boxes, number of points per crop, cropped images, and input labels.
85
+ */
86
+ generate_crop_boxes(image: import("../../utils/image.js").RawImage, target_size: number, { crop_n_layers, overlap_ratio, points_per_crop, crop_n_points_downscale_factor, }?: {
87
+ crop_n_layers?: number;
88
+ overlap_ratio?: number;
89
+ points_per_crop?: number;
90
+ crop_n_points_downscale_factor?: number;
91
+ }): any;
92
+ }
93
+ export type SamImageProcessorResult = {
94
+ pixel_values: Tensor;
95
+ original_sizes: import("../../base/image_processors_utils.js").HeightWidth[];
96
+ reshaped_input_sizes: import("../../base/image_processors_utils.js").HeightWidth[];
97
+ input_points?: Tensor;
98
+ input_labels?: Tensor;
99
+ input_boxes?: Tensor;
100
+ };
101
+ import { ImageProcessor } from "../../base/image_processors_utils.js";
102
+ import { Tensor } from "../../utils/tensor.js";
103
+ //# sourceMappingURL=image_processing_sam.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"image_processing_sam.d.ts","sourceRoot":"","sources":["../../../src/models/sam/image_processing_sam.js"],"names":[],"mappings":"AAWA;;;;;;;;GAQG;AAEH;IAEI;;;;;;OAMG;IACH,mCALW,GAAG,kBACH,OAAO,sCAAsC,EAAE,WAAW,EAAE,wBAC5D,OAAO,sCAAsC,EAAE,WAAW,EAAE,8BAC1D,MAAM,CA4ClB;IAED;;;;;OAKG;IACH,+BAJW,GAAG,gBACH,MAAM,GACJ,MAAM,CAoBlB;IACD;;;;;;;;;;;;;;;;;;;OAmBG;IACH,cAnBW,GAAG,EAAE;QAES,YAAY,GAA1B,GAAG;QAGW,YAAY,GAA1B,GAAG;QAGoB,WAAW,GAAlC,MAAM,EAAE,EAAE,EAAE;QASV,QAAQ,uBAAuB,CAAC,CA+B5C;IAED;;;;;;;;;;;;OAYG;IACH,0BAXW,MAAM,kBACN,CAAC,MAAM,EAAE,MAAM,CAAC,EAAE,wBAClB,CAAC,MAAM,EAAE,MAAM,CAAC,EAAE;QAED,cAAc,GAA/B,MAAM;QACY,QAAQ,GAA1B,OAAO;QACU,QAAQ;YACC,MAAM,GAAhC,MAAM;YACoB,KAAK,GAA/B,MAAM;;QACJ,QAAQ,MAAM,EAAE,CAAC,CAsD7B;IAED;;;;;;;;;;;;;OAaG;IACH,2BAZW,OAAO,sBAAsB,EAAE,QAAQ,eACvC,MAAM;QAEW,aAAa,GAA9B,MAAM;QAEW,aAAa,GAA9B,MAAM;QAEW,eAAe,GAAhC,MAAM;QACW,8BAA8B,GAA/C,MAAM;YAYhB;CACJ;;kBAnOa,MAAM;oBACN,OAAO,sCAAsC,EAAE,WAAW,EAAE;0BAC5D,OAAO,sCAAsC,EAAE,WAAW,EAAE;mBAC5D,MAAM;mBACN,MAAM;kBACN,MAAM;;+BAhBb,sCAAsC;uBAMtC,uBAAuB"}
@@ -0,0 +1,9 @@
1
+ export class SamProcessor extends Processor {
2
+ static image_processor_class: typeof AutoImageProcessor;
3
+ _call(...args: any[]): Promise<any>;
4
+ post_process_masks(...args: any[]): any;
5
+ reshape_input_points(...args: any[]): any;
6
+ }
7
+ import { Processor } from "../../base/processing_utils.js";
8
+ import { AutoImageProcessor } from "../auto/image_processing_auto.js";
9
+ //# sourceMappingURL=processing_sam.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"processing_sam.d.ts","sourceRoot":"","sources":["../../../src/models/sam/processing_sam.js"],"names":[],"mappings":"AAGA;IACI,wDAAiD;IAEjD,oCAEC;IAED,wCAGC;IAED,0CAGC;CACJ;0BAnByB,gCAAgC;mCACvB,kCAAkC"}