@huggingface/transformers 3.0.2 → 3.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +13 -4
- package/dist/ort-wasm-simd-threaded.jsep.wasm +0 -0
- package/dist/transformers.cjs +16655 -13040
- package/dist/transformers.cjs.map +1 -1
- package/dist/transformers.js +17095 -13468
- package/dist/transformers.js.map +1 -1
- package/dist/transformers.min.cjs +244 -52
- package/dist/transformers.min.cjs.map +1 -1
- package/dist/transformers.min.js +235 -43
- package/dist/transformers.min.js.map +1 -1
- package/dist/transformers.min.mjs +246 -54
- package/dist/transformers.min.mjs.map +1 -1
- package/dist/transformers.mjs +16818 -13202
- package/dist/transformers.mjs.map +1 -1
- package/package.json +4 -4
- package/src/base/feature_extraction_utils.js +54 -0
- package/src/base/image_processors_utils.js +1089 -0
- package/src/base/processing_utils.js +145 -0
- package/src/configs.js +15 -4
- package/src/env.js +6 -6
- package/src/generation/configuration_utils.js +7 -0
- package/src/generation/logits_process.js +22 -16
- package/src/generation/streamers.js +7 -2
- package/src/models/audio_spectrogram_transformer/feature_extraction_audio_spectrogram_transformer.js +90 -0
- package/src/models/auto/feature_extraction_auto.js +41 -0
- package/src/models/auto/image_processing_auto.js +29 -0
- package/src/models/auto/processing_auto.js +100 -0
- package/src/models/beit/image_processing_beit.js +5 -0
- package/src/models/bit/image_processing_bit.js +5 -0
- package/src/models/chinese_clip/image_processing_chinese_clip.js +5 -0
- package/src/models/clap/feature_extraction_clap.js +159 -0
- package/src/models/clip/image_processing_clip.js +6 -0
- package/src/models/convnext/image_processing_convnext.js +45 -0
- package/src/models/deit/image_processing_deit.js +6 -0
- package/src/models/detr/image_processing_detr.js +52 -0
- package/src/models/donut/image_processing_donut.js +31 -0
- package/src/models/dpt/image_processing_dpt.js +6 -0
- package/src/models/efficientnet/image_processing_efficientnet.js +13 -0
- package/src/models/feature_extractors.js +12 -0
- package/src/models/florence2/processing_florence2.js +128 -0
- package/src/models/glpn/image_processing_glpn.js +5 -0
- package/src/models/idefics3/image_processing_idefics3.js +219 -0
- package/src/models/idefics3/processing_idefics3.js +136 -0
- package/src/models/image_processors.js +37 -0
- package/src/models/janus/image_processing_janus.js +26 -0
- package/src/models/janus/processing_janus.js +123 -0
- package/src/models/jina_clip/image_processing_jina_clip.js +26 -0
- package/src/models/jina_clip/processing_jina_clip.js +24 -0
- package/src/models/llava_onevision/image_processing_llava_onevision.js +5 -0
- package/src/models/mask2former/image_processing_mask2former.js +5 -0
- package/src/models/maskformer/image_processing_maskformer.js +18 -0
- package/src/models/mgp_str/processing_mgp_str.js +170 -0
- package/src/models/mobilenet_v1/image_processing_mobilenet_v1.js +7 -0
- package/src/models/mobilenet_v2/image_processing_mobilenet_v2.js +7 -0
- package/src/models/mobilenet_v3/image_processing_mobilenet_v3.js +7 -0
- package/src/models/mobilenet_v4/image_processing_mobilenet_v4.js +7 -0
- package/src/models/mobilevit/image_processing_mobilevit.js +6 -0
- package/src/models/nougat/image_processing_nougat.js +5 -0
- package/src/models/owlv2/image_processing_owlv2.js +5 -0
- package/src/models/owlvit/image_processing_owlvit.js +12 -0
- package/src/models/owlvit/processing_owlvit.js +7 -0
- package/src/models/processors.js +12 -0
- package/src/models/pvt/image_processing_pvt.js +5 -0
- package/src/models/pyannote/feature_extraction_pyannote.js +28 -0
- package/src/models/pyannote/processing_pyannote.js +71 -0
- package/src/models/qwen2_vl/image_processing_qwen2_vl.js +52 -0
- package/src/models/qwen2_vl/processing_qwen2_vl.js +52 -0
- package/src/models/rt_detr/image_processing_rt_detr.js +12 -0
- package/src/models/sam/image_processing_sam.js +242 -0
- package/src/models/sam/processing_sam.js +20 -0
- package/src/models/sapiens/image_processing_sapiens.js +13 -0
- package/src/models/seamless_m4t/feature_extraction_seamless_m4t.js +180 -0
- package/src/models/segformer/image_processing_segformer.js +13 -0
- package/src/models/siglip/image_processing_siglip.js +5 -0
- package/src/models/speecht5/feature_extraction_speecht5.js +4 -0
- package/src/models/speecht5/processing_speecht5.js +17 -0
- package/src/models/swin2sr/image_processing_swin2sr.js +24 -0
- package/src/models/vit/image_processing_vit.js +7 -0
- package/src/models/vitmatte/image_processing_vitmatte.js +50 -0
- package/src/models/vitpose/image_processing_vitpose.js +89 -0
- package/src/models/wav2vec2/feature_extraction_wav2vec2.js +44 -0
- package/src/models/wav2vec2/processing_wav2vec2.js +15 -0
- package/src/models/wespeaker/feature_extraction_wespeaker.js +100 -0
- package/src/models/whisper/feature_extraction_whisper.js +84 -0
- package/src/models/whisper/processing_whisper.js +21 -0
- package/src/models/yolos/image_processing_yolos.js +12 -0
- package/src/models.js +755 -34
- package/src/pipelines.js +8 -8
- package/src/tokenizers.js +5 -0
- package/src/transformers.js +15 -2
- package/src/utils/constants.js +8 -1
- package/src/utils/core.js +51 -9
- package/src/utils/dtypes.js +2 -1
- package/src/utils/hub.js +2 -1
- package/src/utils/image.js +87 -33
- package/src/utils/tensor.js +39 -2
- package/types/base/feature_extraction_utils.d.ts +41 -0
- package/types/base/feature_extraction_utils.d.ts.map +1 -0
- package/types/base/image_processors_utils.d.ts +323 -0
- package/types/base/image_processors_utils.d.ts.map +1 -0
- package/types/base/processing_utils.d.ts +80 -0
- package/types/base/processing_utils.d.ts.map +1 -0
- package/types/configs.d.ts +5 -2
- package/types/configs.d.ts.map +1 -1
- package/types/env.d.ts +1 -1
- package/types/env.d.ts.map +1 -1
- package/types/generation/configuration_utils.d.ts +6 -0
- package/types/generation/configuration_utils.d.ts.map +1 -1
- package/types/generation/logits_process.d.ts +30 -20
- package/types/generation/logits_process.d.ts.map +1 -1
- package/types/generation/streamers.d.ts +13 -8
- package/types/generation/streamers.d.ts.map +1 -1
- package/types/models/audio_spectrogram_transformer/feature_extraction_audio_spectrogram_transformer.d.ts +25 -0
- package/types/models/audio_spectrogram_transformer/feature_extraction_audio_spectrogram_transformer.d.ts.map +1 -0
- package/types/models/auto/feature_extraction_auto.d.ts +5 -0
- package/types/models/auto/feature_extraction_auto.d.ts.map +1 -0
- package/types/models/auto/image_processing_auto.d.ts +5 -0
- package/types/models/auto/image_processing_auto.d.ts.map +1 -0
- package/types/models/auto/processing_auto.d.ts +35 -0
- package/types/models/auto/processing_auto.d.ts.map +1 -0
- package/types/models/beit/image_processing_beit.d.ts +4 -0
- package/types/models/beit/image_processing_beit.d.ts.map +1 -0
- package/types/models/bit/image_processing_bit.d.ts +4 -0
- package/types/models/bit/image_processing_bit.d.ts.map +1 -0
- package/types/models/chinese_clip/image_processing_chinese_clip.d.ts +4 -0
- package/types/models/chinese_clip/image_processing_chinese_clip.d.ts.map +1 -0
- package/types/models/clap/feature_extraction_clap.d.ts +57 -0
- package/types/models/clap/feature_extraction_clap.d.ts.map +1 -0
- package/types/models/clip/image_processing_clip.d.ts +6 -0
- package/types/models/clip/image_processing_clip.d.ts.map +1 -0
- package/types/models/convnext/image_processing_convnext.d.ts +12 -0
- package/types/models/convnext/image_processing_convnext.d.ts.map +1 -0
- package/types/models/deit/image_processing_deit.d.ts +6 -0
- package/types/models/deit/image_processing_deit.d.ts.map +1 -0
- package/types/models/detr/image_processing_detr.d.ts +42 -0
- package/types/models/detr/image_processing_detr.d.ts.map +1 -0
- package/types/models/donut/image_processing_donut.d.ts +7 -0
- package/types/models/donut/image_processing_donut.d.ts.map +1 -0
- package/types/models/dpt/image_processing_dpt.d.ts +6 -0
- package/types/models/dpt/image_processing_dpt.d.ts.map +1 -0
- package/types/models/efficientnet/image_processing_efficientnet.d.ts +6 -0
- package/types/models/efficientnet/image_processing_efficientnet.d.ts.map +1 -0
- package/types/models/feature_extractors.d.ts +10 -0
- package/types/models/feature_extractors.d.ts.map +1 -0
- package/types/models/florence2/processing_florence2.d.ts +39 -0
- package/types/models/florence2/processing_florence2.d.ts.map +1 -0
- package/types/models/glpn/image_processing_glpn.d.ts +4 -0
- package/types/models/glpn/image_processing_glpn.d.ts.map +1 -0
- package/types/models/idefics3/image_processing_idefics3.d.ts +40 -0
- package/types/models/idefics3/image_processing_idefics3.d.ts.map +1 -0
- package/types/models/idefics3/processing_idefics3.d.ts +19 -0
- package/types/models/idefics3/processing_idefics3.d.ts.map +1 -0
- package/types/models/image_processors.d.ts +37 -0
- package/types/models/image_processors.d.ts.map +1 -0
- package/types/models/janus/image_processing_janus.d.ts +7 -0
- package/types/models/janus/image_processing_janus.d.ts.map +1 -0
- package/types/models/janus/processing_janus.d.ts +77 -0
- package/types/models/janus/processing_janus.d.ts.map +1 -0
- package/types/models/jina_clip/image_processing_jina_clip.d.ts +5 -0
- package/types/models/jina_clip/image_processing_jina_clip.d.ts.map +1 -0
- package/types/models/jina_clip/processing_jina_clip.d.ts +9 -0
- package/types/models/jina_clip/processing_jina_clip.d.ts.map +1 -0
- package/types/models/llava_onevision/image_processing_llava_onevision.d.ts +4 -0
- package/types/models/llava_onevision/image_processing_llava_onevision.d.ts.map +1 -0
- package/types/models/mask2former/image_processing_mask2former.d.ts +4 -0
- package/types/models/mask2former/image_processing_mask2former.d.ts.map +1 -0
- package/types/models/maskformer/image_processing_maskformer.d.ts +22 -0
- package/types/models/maskformer/image_processing_maskformer.d.ts.map +1 -0
- package/types/models/mgp_str/processing_mgp_str.d.ts +64 -0
- package/types/models/mgp_str/processing_mgp_str.d.ts.map +1 -0
- package/types/models/mobilenet_v1/image_processing_mobilenet_v1.d.ts +6 -0
- package/types/models/mobilenet_v1/image_processing_mobilenet_v1.d.ts.map +1 -0
- package/types/models/mobilenet_v2/image_processing_mobilenet_v2.d.ts +6 -0
- package/types/models/mobilenet_v2/image_processing_mobilenet_v2.d.ts.map +1 -0
- package/types/models/mobilenet_v3/image_processing_mobilenet_v3.d.ts +6 -0
- package/types/models/mobilenet_v3/image_processing_mobilenet_v3.d.ts.map +1 -0
- package/types/models/mobilenet_v4/image_processing_mobilenet_v4.d.ts +6 -0
- package/types/models/mobilenet_v4/image_processing_mobilenet_v4.d.ts.map +1 -0
- package/types/models/mobilevit/image_processing_mobilevit.d.ts +6 -0
- package/types/models/mobilevit/image_processing_mobilevit.d.ts.map +1 -0
- package/types/models/nougat/image_processing_nougat.d.ts +4 -0
- package/types/models/nougat/image_processing_nougat.d.ts.map +1 -0
- package/types/models/owlv2/image_processing_owlv2.d.ts +4 -0
- package/types/models/owlv2/image_processing_owlv2.d.ts.map +1 -0
- package/types/models/owlvit/image_processing_owlvit.d.ts +10 -0
- package/types/models/owlvit/image_processing_owlvit.d.ts.map +1 -0
- package/types/models/owlvit/processing_owlvit.d.ts +8 -0
- package/types/models/owlvit/processing_owlvit.d.ts.map +1 -0
- package/types/models/processors.d.ts +13 -0
- package/types/models/processors.d.ts.map +1 -0
- package/types/models/pvt/image_processing_pvt.d.ts +4 -0
- package/types/models/pvt/image_processing_pvt.d.ts.map +1 -0
- package/types/models/pyannote/feature_extraction_pyannote.d.ts +13 -0
- package/types/models/pyannote/feature_extraction_pyannote.d.ts.map +1 -0
- package/types/models/pyannote/processing_pyannote.d.ts +30 -0
- package/types/models/pyannote/processing_pyannote.d.ts.map +1 -0
- package/types/models/qwen2_vl/image_processing_qwen2_vl.d.ts +11 -0
- package/types/models/qwen2_vl/image_processing_qwen2_vl.d.ts.map +1 -0
- package/types/models/qwen2_vl/processing_qwen2_vl.d.ts +17 -0
- package/types/models/qwen2_vl/processing_qwen2_vl.d.ts.map +1 -0
- package/types/models/rt_detr/image_processing_rt_detr.d.ts +8 -0
- package/types/models/rt_detr/image_processing_rt_detr.d.ts.map +1 -0
- package/types/models/sam/image_processing_sam.d.ts +103 -0
- package/types/models/sam/image_processing_sam.d.ts.map +1 -0
- package/types/models/sam/processing_sam.d.ts +9 -0
- package/types/models/sam/processing_sam.d.ts.map +1 -0
- package/types/models/seamless_m4t/feature_extraction_seamless_m4t.d.ts +34 -0
- package/types/models/seamless_m4t/feature_extraction_seamless_m4t.d.ts.map +1 -0
- package/types/models/segformer/image_processing_segformer.d.ts +10 -0
- package/types/models/segformer/image_processing_segformer.d.ts.map +1 -0
- package/types/models/siglip/image_processing_siglip.d.ts +4 -0
- package/types/models/siglip/image_processing_siglip.d.ts.map +1 -0
- package/types/models/speecht5/feature_extraction_speecht5.d.ts +4 -0
- package/types/models/speecht5/feature_extraction_speecht5.d.ts.map +1 -0
- package/types/models/speecht5/processing_speecht5.d.ts +14 -0
- package/types/models/speecht5/processing_speecht5.d.ts.map +1 -0
- package/types/models/swin2sr/image_processing_swin2sr.d.ts +5 -0
- package/types/models/swin2sr/image_processing_swin2sr.d.ts.map +1 -0
- package/types/models/vit/image_processing_vit.d.ts +6 -0
- package/types/models/vit/image_processing_vit.d.ts.map +1 -0
- package/types/models/vitmatte/image_processing_vitmatte.d.ts +12 -0
- package/types/models/vitmatte/image_processing_vitmatte.d.ts.map +1 -0
- package/types/models/vitpose/image_processing_vitpose.d.ts +26 -0
- package/types/models/vitpose/image_processing_vitpose.d.ts.map +1 -0
- package/types/models/wav2vec2/feature_extraction_wav2vec2.d.ts +19 -0
- package/types/models/wav2vec2/feature_extraction_wav2vec2.d.ts.map +1 -0
- package/types/models/wav2vec2/processing_wav2vec2.d.ts +12 -0
- package/types/models/wav2vec2/processing_wav2vec2.d.ts.map +1 -0
- package/types/models/wespeaker/feature_extraction_wespeaker.d.ts +23 -0
- package/types/models/wespeaker/feature_extraction_wespeaker.d.ts.map +1 -0
- package/types/models/whisper/feature_extraction_whisper.d.ts +21 -0
- package/types/models/whisper/feature_extraction_whisper.d.ts.map +1 -0
- package/types/models/whisper/processing_whisper.d.ts +17 -0
- package/types/models/whisper/processing_whisper.d.ts.map +1 -0
- package/types/models/yolos/image_processing_yolos.d.ts +10 -0
- package/types/models/yolos/image_processing_yolos.d.ts.map +1 -0
- package/types/models.d.ts +150 -0
- package/types/models.d.ts.map +1 -1
- package/types/pipelines.d.ts +2 -3
- package/types/pipelines.d.ts.map +1 -1
- package/types/tokenizers.d.ts +3 -0
- package/types/tokenizers.d.ts.map +1 -1
- package/types/transformers.d.ts +10 -1
- package/types/utils/constants.d.ts +6 -0
- package/types/utils/constants.d.ts.map +1 -1
- package/types/utils/core.d.ts +65 -3
- package/types/utils/core.d.ts.map +1 -1
- package/types/utils/dtypes.d.ts +3 -2
- package/types/utils/dtypes.d.ts.map +1 -1
- package/types/utils/hub.d.ts +1 -1
- package/types/utils/hub.d.ts.map +1 -1
- package/types/utils/image.d.ts +14 -2
- package/types/utils/image.d.ts.map +1 -1
- package/types/utils/tensor.d.ts +39 -4
- package/types/utils/tensor.d.ts.map +1 -1
- package/src/processors.js +0 -2655
- package/types/processors.d.ts +0 -924
- package/types/processors.d.ts.map +0 -1
|
@@ -16,18 +16,23 @@ export class TextStreamer extends BaseStreamer {
|
|
|
16
16
|
/**
|
|
17
17
|
*
|
|
18
18
|
* @param {import('../tokenizers.js').PreTrainedTokenizer} tokenizer
|
|
19
|
+
* @param {Object} options
|
|
20
|
+
* @param {boolean} [options.skip_prompt=false] Whether to skip the prompt tokens
|
|
21
|
+
* @param {function(string): void} [options.callback_function=null] Function to call when a piece of text is ready to display
|
|
22
|
+
* @param {function(bigint[]): void} [options.token_callback_function=null] Function to call when a new token is generated
|
|
23
|
+
* @param {Object} [options.decode_kwargs={}] Additional keyword arguments to pass to the tokenizer's decode method
|
|
19
24
|
*/
|
|
20
25
|
constructor(tokenizer: import('../tokenizers.js').PreTrainedTokenizer, { skip_prompt, callback_function, token_callback_function, decode_kwargs, ...kwargs }?: {
|
|
21
26
|
skip_prompt?: boolean;
|
|
22
|
-
callback_function?:
|
|
23
|
-
token_callback_function?:
|
|
24
|
-
decode_kwargs?:
|
|
27
|
+
callback_function?: (arg0: string) => void;
|
|
28
|
+
token_callback_function?: (arg0: bigint[]) => void;
|
|
29
|
+
decode_kwargs?: any;
|
|
25
30
|
});
|
|
26
31
|
tokenizer: import("../tokenizers.js").PreTrainedTokenizer;
|
|
27
32
|
skip_prompt: boolean;
|
|
28
|
-
callback_function: any;
|
|
29
|
-
token_callback_function:
|
|
30
|
-
decode_kwargs:
|
|
33
|
+
callback_function: (x: any) => void;
|
|
34
|
+
token_callback_function: (arg0: bigint[]) => void;
|
|
35
|
+
decode_kwargs: any;
|
|
31
36
|
token_cache: any[];
|
|
32
37
|
print_len: number;
|
|
33
38
|
next_tokens_are_prompt: boolean;
|
|
@@ -52,7 +57,7 @@ export class WhisperTextStreamer extends TextStreamer {
|
|
|
52
57
|
* @param {Object} options
|
|
53
58
|
* @param {boolean} [options.skip_prompt=false] Whether to skip the prompt tokens
|
|
54
59
|
* @param {function(string): void} [options.callback_function=null] Function to call when a piece of text is ready to display
|
|
55
|
-
* @param {function(
|
|
60
|
+
* @param {function(bigint[]): void} [options.token_callback_function=null] Function to call when a new token is generated
|
|
56
61
|
* @param {function(number): void} [options.on_chunk_start=null] Function to call when a new chunk starts
|
|
57
62
|
* @param {function(number): void} [options.on_chunk_end=null] Function to call when a chunk ends
|
|
58
63
|
* @param {function(): void} [options.on_finalize=null] Function to call when the stream is finalized
|
|
@@ -63,7 +68,7 @@ export class WhisperTextStreamer extends TextStreamer {
|
|
|
63
68
|
constructor(tokenizer: import('../tokenizers.js').WhisperTokenizer, { skip_prompt, callback_function, token_callback_function, on_chunk_start, on_chunk_end, on_finalize, time_precision, skip_special_tokens, decode_kwargs, }?: {
|
|
64
69
|
skip_prompt?: boolean;
|
|
65
70
|
callback_function?: (arg0: string) => void;
|
|
66
|
-
token_callback_function?: (arg0:
|
|
71
|
+
token_callback_function?: (arg0: bigint[]) => void;
|
|
67
72
|
on_chunk_start?: (arg0: number) => void;
|
|
68
73
|
on_chunk_end?: (arg0: number) => void;
|
|
69
74
|
on_finalize?: () => void;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"streamers.d.ts","sourceRoot":"","sources":["../../src/generation/streamers.js"],"names":[],"mappings":"AASA;IACI;;;OAGG;IACH,WAFW,MAAM,EAAE,EAAE,QAIpB;IAED;;OAEG;IACH,YAEC;CACJ;AAMD;;GAEG;AACH;IACI
|
|
1
|
+
{"version":3,"file":"streamers.d.ts","sourceRoot":"","sources":["../../src/generation/streamers.js"],"names":[],"mappings":"AASA;IACI;;;OAGG;IACH,WAFW,MAAM,EAAE,EAAE,QAIpB;IAED;;OAEG;IACH,YAEC;CACJ;AAMD;;GAEG;AACH;IACI;;;;;;;;OAQG;IACH,uBAPW,OAAO,kBAAkB,EAAE,mBAAmB;QAE5B,WAAW,GAA7B,OAAO;QAC0B,iBAAiB,UAAzC,MAAM,KAAG,IAAI;QACa,uBAAuB,UAAjD,MAAM,EAAE,KAAG,IAAI;QACP,aAAa;OAoBxC;IAVG,0DAA0B;IAC1B,qBAA8B;IAC9B,oCAA0D;IAC1D,gCAdgB,MAAM,EAAE,KAAG,IAAI,CAcuB;IACtD,mBAAoD;IAGpD,mBAAqB;IACrB,kBAAkB;IAClB,gCAAkC;IA6DtC;;;;OAIG;IACH,wBAHW,MAAM,cACN,OAAO,QASjB;CACJ;AAED;;;;;;;GAOG;AACH;IACI;;;;;;;;;;;;OAYG;IACH,uBAZW,OAAO,kBAAkB,EAAE,gBAAgB;QAEzB,WAAW,GAA7B,OAAO;QAC0B,iBAAiB,UAAzC,MAAM,KAAG,IAAI;QACa,uBAAuB,UAAjD,MAAM,EAAE,KAAG,IAAI;QACS,cAAc,UAAtC,MAAM,KAAG,IAAI;QACW,YAAY,UAApC,MAAM,KAAG,IAAI;QACK,WAAW,SAA1B,IAAI;QACC,cAAc,GAA/B,MAAM;QACY,mBAAmB,GAArC,OAAO;QACU,aAAa;OA4BxC;IATG,wBAAgD;IAEhD,uBA1BgB,MAAM,KAAG,IAAI,CA0BO;IACpC,qBA1BgB,MAAM,KAAG,IAAI,CA0BG;IAChC,mBA1BmB,IAAI,CA0BO;IAE9B,uBAAoC;IAEpC,+BAAkC;CAiCzC"}
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
export class ASTFeatureExtractor extends FeatureExtractor {
|
|
2
|
+
constructor(config: any);
|
|
3
|
+
mel_filters: number[][];
|
|
4
|
+
window: Float64Array;
|
|
5
|
+
mean: any;
|
|
6
|
+
std: any;
|
|
7
|
+
/**
|
|
8
|
+
* Computes the log-Mel spectrogram of the provided audio waveform.
|
|
9
|
+
* @param {Float32Array|Float64Array} waveform The audio waveform to process.
|
|
10
|
+
* @param {number} max_length The maximum number of frames to return.
|
|
11
|
+
* @returns {Promise<Tensor>} An object containing the log-Mel spectrogram data as a Float32Array and its dimensions as an array of numbers.
|
|
12
|
+
*/
|
|
13
|
+
_extract_fbank_features(waveform: Float32Array | Float64Array, max_length: number): Promise<Tensor>;
|
|
14
|
+
/**
|
|
15
|
+
* Asynchronously extracts features from a given audio using the provided configuration.
|
|
16
|
+
* @param {Float32Array|Float64Array} audio The audio data as a Float32Array/Float64Array.
|
|
17
|
+
* @returns {Promise<{ input_values: Tensor }>} A Promise resolving to an object containing the extracted input features as a Tensor.
|
|
18
|
+
*/
|
|
19
|
+
_call(audio: Float32Array | Float64Array): Promise<{
|
|
20
|
+
input_values: Tensor;
|
|
21
|
+
}>;
|
|
22
|
+
}
|
|
23
|
+
import { FeatureExtractor } from '../../base/feature_extraction_utils.js';
|
|
24
|
+
import { Tensor } from '../../utils/tensor.js';
|
|
25
|
+
//# sourceMappingURL=feature_extraction_audio_spectrogram_transformer.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"feature_extraction_audio_spectrogram_transformer.d.ts","sourceRoot":"","sources":["../../../src/models/audio_spectrogram_transformer/feature_extraction_audio_spectrogram_transformer.js"],"names":[],"mappings":"AAKA;IAEI,yBA2BC;IARG,wBAA8B;IAE9B,qBAEE;IAEF,UAA4B;IAC5B,SAA0B;IAG9B;;;;;OAKG;IACH,kCAJW,YAAY,GAAC,YAAY,cACzB,MAAM,GACJ,QAAQ,MAAM,CAAC,CAwB3B;IAGD;;;;OAIG;IACH,aAHW,YAAY,GAAC,YAAY,GACvB,QAAQ;QAAE,YAAY,EAAE,MAAM,CAAA;KAAE,CAAC,CAkB7C;CACJ;iCAzFuD,wCAAwC;uBACzE,uBAAuB"}
|
|
@@ -0,0 +1,5 @@
|
|
|
1
|
+
export class AutoFeatureExtractor {
|
|
2
|
+
static from_pretrained(pretrained_model_name_or_path: string, options: import("../../utils/hub.js").PretrainedOptions): Promise<FeatureExtractor>;
|
|
3
|
+
}
|
|
4
|
+
import { FeatureExtractor } from '../../base/feature_extraction_utils.js';
|
|
5
|
+
//# sourceMappingURL=feature_extraction_auto.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"feature_extraction_auto.d.ts","sourceRoot":"","sources":["../../../src/models/auto/feature_extraction_auto.js"],"names":[],"mappings":"AAMA;IAKsD,kJAoBH;CASlD;iCArCgC,wCAAwC"}
|
|
@@ -0,0 +1,5 @@
|
|
|
1
|
+
export class AutoImageProcessor {
|
|
2
|
+
static from_pretrained(pretrained_model_name_or_path: string, options: import("../../utils/hub.js").PretrainedOptions): Promise<ImageProcessor>;
|
|
3
|
+
}
|
|
4
|
+
import { ImageProcessor } from '../../base/image_processors_utils.js';
|
|
5
|
+
//# sourceMappingURL=image_processing_auto.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"image_processing_auto.d.ts","sourceRoot":"","sources":["../../../src/models/auto/image_processing_auto.js"],"names":[],"mappings":"AAMA;IAuBkkwC,gJAAqyC;CADt2yC;+BAzB8B,sCAAsC"}
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Helper class which is used to instantiate pretrained processors with the `from_pretrained` function.
|
|
3
|
+
* The chosen processor class is determined by the type specified in the processor config.
|
|
4
|
+
*
|
|
5
|
+
* **Example:** Load a processor using `from_pretrained`.
|
|
6
|
+
* ```javascript
|
|
7
|
+
* let processor = await AutoProcessor.from_pretrained('openai/whisper-tiny.en');
|
|
8
|
+
* ```
|
|
9
|
+
*
|
|
10
|
+
* **Example:** Run an image through a processor.
|
|
11
|
+
* ```javascript
|
|
12
|
+
* let processor = await AutoProcessor.from_pretrained('Xenova/clip-vit-base-patch16');
|
|
13
|
+
* let image = await RawImage.read('https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/football-match.jpg');
|
|
14
|
+
* let image_inputs = await processor(image);
|
|
15
|
+
* // {
|
|
16
|
+
* // "pixel_values": {
|
|
17
|
+
* // "dims": [ 1, 3, 224, 224 ],
|
|
18
|
+
* // "type": "float32",
|
|
19
|
+
* // "data": Float32Array [ -1.558687686920166, -1.558687686920166, -1.5440893173217773, ... ],
|
|
20
|
+
* // "size": 150528
|
|
21
|
+
* // },
|
|
22
|
+
* // "original_sizes": [
|
|
23
|
+
* // [ 533, 800 ]
|
|
24
|
+
* // ],
|
|
25
|
+
* // "reshaped_input_sizes": [
|
|
26
|
+
* // [ 224, 224 ]
|
|
27
|
+
* // ]
|
|
28
|
+
* // }
|
|
29
|
+
* ```
|
|
30
|
+
*/
|
|
31
|
+
export class AutoProcessor {
|
|
32
|
+
static from_pretrained(pretrained_model_name_or_path: string, options: any): Promise<Processor>;
|
|
33
|
+
}
|
|
34
|
+
import { Processor } from '../../base/processing_utils.js';
|
|
35
|
+
//# sourceMappingURL=processing_auto.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"processing_auto.d.ts","sourceRoot":"","sources":["../../../src/models/auto/processing_auto.js"],"names":[],"mappings":"AAUA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA6BG;AACH;IAoCuB,gGAwB8qB;CADpsB;0BA/FyB,gCAAgC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"image_processing_beit.d.ts","sourceRoot":"","sources":["../../../src/models/beit/image_processing_beit.js"],"names":[],"mappings":"AAIA;CAA4D;+BAFrD,sCAAsC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"image_processing_bit.d.ts","sourceRoot":"","sources":["../../../src/models/bit/image_processing_bit.js"],"names":[],"mappings":"AAIA;CAAyD;+BAFlD,sCAAsC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"image_processing_chinese_clip.d.ts","sourceRoot":"","sources":["../../../src/models/chinese_clip/image_processing_chinese_clip.js"],"names":[],"mappings":"AAIA;CAAmE;+BAF5D,sCAAsC"}
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
export class ClapFeatureExtractor extends FeatureExtractor {
|
|
2
|
+
constructor(config: any);
|
|
3
|
+
mel_filters: number[][];
|
|
4
|
+
mel_filters_slaney: number[][];
|
|
5
|
+
window: Float64Array;
|
|
6
|
+
/**
|
|
7
|
+
* Extracts the mel spectrogram and prepares it for the mode based on the `truncation` and `padding` arguments.
|
|
8
|
+
*
|
|
9
|
+
* Four different path are possible:
|
|
10
|
+
* - `truncation="fusion"` and the length of the waveform is greater than the max length: the mel spectrogram
|
|
11
|
+
* will be computed on the entire audio. 3 random crops and a dowsampled version of the full mel spectrogram
|
|
12
|
+
* are then stacked together. They will later be used for `feature_fusion`.
|
|
13
|
+
* - `truncation="rand_trunc"` and the length of the waveform is smaller than the max length: the audio is
|
|
14
|
+
* padded based on `padding`.
|
|
15
|
+
* - `truncation="fusion"` and the length of the waveform is smaller than the max length: the audio is padded
|
|
16
|
+
* based on `padding`, and is repeated `4` times.
|
|
17
|
+
* - `truncation="rand_trunc"` and the length of the waveform is greater than the max length: the mel
|
|
18
|
+
* spectrogram will be computed on a random crop of the waveform.
|
|
19
|
+
*
|
|
20
|
+
* @param {Float32Array|Float64Array} waveform The input waveform.
|
|
21
|
+
* @param {number} max_length The maximum length of the waveform.
|
|
22
|
+
* @param {string} truncation The truncation strategy to use.
|
|
23
|
+
* @param {string} padding The padding strategy to use.
|
|
24
|
+
* @returns {Promise<Tensor>} An object containing the mel spectrogram data as a Float32Array, its dimensions as an array of numbers, and a boolean indicating whether the waveform was longer than the max length.
|
|
25
|
+
* @private
|
|
26
|
+
*/
|
|
27
|
+
private _get_input_mel;
|
|
28
|
+
/**
|
|
29
|
+
* Compute the log-mel spectrogram of the provided `waveform` using the Hann window.
|
|
30
|
+
* In CLAP, two different filter banks are used depending on the truncation pattern:
|
|
31
|
+
* - `self.mel_filters`: they correspond to the default parameters of `torchaudio` which can be obtained from
|
|
32
|
+
* calling `torchaudio.transforms.MelSpectrogram().mel_scale.fb`. These filters are used when `truncation`
|
|
33
|
+
* is set to `"fusion"`.
|
|
34
|
+
* - `self.mel_filteres_slaney` : they correspond to the default parameters of `librosa` which used
|
|
35
|
+
* `librosa.filters.mel` when computing the mel spectrogram. These filters were only used in the original
|
|
36
|
+
* implementation when the truncation mode is not `"fusion"`.
|
|
37
|
+
*
|
|
38
|
+
* @param {Float32Array|Float64Array} waveform The audio waveform to process.
|
|
39
|
+
* @param {number[][]} mel_filters The mel filters to use.
|
|
40
|
+
* @param {number} [max_length=null] The maximum number of frames to return.
|
|
41
|
+
* @returns {Promise<Tensor>} An object containing the log-Mel spectrogram data as a Float32Array and its dimensions as an array of numbers.
|
|
42
|
+
*/
|
|
43
|
+
_extract_fbank_features(waveform: Float32Array | Float64Array, mel_filters: number[][], max_length?: number): Promise<Tensor>;
|
|
44
|
+
/**
|
|
45
|
+
* Asynchronously extracts features from a given audio using the provided configuration.
|
|
46
|
+
* @param {Float32Array|Float64Array} audio The audio data as a Float32Array/Float64Array.
|
|
47
|
+
* @returns {Promise<{ input_features: Tensor }>} A Promise resolving to an object containing the extracted input features as a Tensor.
|
|
48
|
+
*/
|
|
49
|
+
_call(audio: Float32Array | Float64Array, { max_length, }?: {
|
|
50
|
+
max_length?: any;
|
|
51
|
+
}): Promise<{
|
|
52
|
+
input_features: Tensor;
|
|
53
|
+
}>;
|
|
54
|
+
}
|
|
55
|
+
import { FeatureExtractor } from '../../base/feature_extraction_utils.js';
|
|
56
|
+
import { Tensor } from '../../utils/tensor.js';
|
|
57
|
+
//# sourceMappingURL=feature_extraction_clap.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"feature_extraction_clap.d.ts","sourceRoot":"","sources":["../../../src/models/clap/feature_extraction_clap.js"],"names":[],"mappings":"AAKA;IAEI,yBAyBC;IAtBG,wBAQC;IAED,+BAQC;IAED,qBAAkE;IAKtE;;;;;;;;;;;;;;;;;;;;OAoBG;IACH,uBA0CC;IAED;;;;;;;;;;;;;;OAcG;IACH,kCALW,YAAY,GAAC,YAAY,eACzB,MAAM,EAAE,EAAE,eACV,MAAM,GACJ,QAAQ,MAAM,CAAC,CAoB3B;IAGD;;;;OAIG;IACH,aAHW,YAAY,GAAC,YAAY;;QACvB,QAAQ;QAAE,cAAc,EAAE,MAAM,CAAA;KAAE,CAAC,CAkB/C;CACJ;iCA9JuD,wCAAwC;uBACzE,uBAAuB"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"image_processing_clip.d.ts","sourceRoot":"","sources":["../../../src/models/clip/image_processing_clip.js"],"names":[],"mappings":"AAIA;CAA0D;AAC1D;CAAgE;+BAHzD,sCAAsC"}
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
export class ConvNextImageProcessor extends ImageProcessor {
|
|
2
|
+
constructor(config: any);
|
|
3
|
+
/**
|
|
4
|
+
* Percentage of the image to crop. Only has an effect if this.size < 384.
|
|
5
|
+
*/
|
|
6
|
+
crop_pct: any;
|
|
7
|
+
resize(image: any): Promise<any>;
|
|
8
|
+
}
|
|
9
|
+
export class ConvNextFeatureExtractor extends ConvNextImageProcessor {
|
|
10
|
+
}
|
|
11
|
+
import { ImageProcessor } from "../../base/image_processors_utils.js";
|
|
12
|
+
//# sourceMappingURL=image_processing_convnext.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"image_processing_convnext.d.ts","sourceRoot":"","sources":["../../../src/models/convnext/image_processing_convnext.js"],"names":[],"mappings":"AAIA;IACI,yBAOC;IAJG;;OAEG;IACH,cAAmD;IAGvD,iCA4BC;CACJ;AACD;CAAwE;+BA1CjE,sCAAsC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"image_processing_deit.d.ts","sourceRoot":"","sources":["../../../src/models/deit/image_processing_deit.js"],"names":[],"mappings":"AAIA;CAA0D;AAC1D;CAAgE;+BAHzD,sCAAsC"}
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @typedef {object} DetrFeatureExtractorResultProps
|
|
3
|
+
* @property {import('../../utils/tensor.js').Tensor} pixel_mask
|
|
4
|
+
* @typedef {import('../../base/image_processors_utils.js').ImageProcessorResult & DetrFeatureExtractorResultProps} DetrFeatureExtractorResult
|
|
5
|
+
*/
|
|
6
|
+
export class DetrImageProcessor extends ImageProcessor {
|
|
7
|
+
/**
|
|
8
|
+
* Calls the feature extraction process on an array of images, preprocesses
|
|
9
|
+
* each image, and concatenates the resulting features into a single Tensor.
|
|
10
|
+
* @param {import('../../utils/image.js').RawImage[]} images The image(s) to extract features from.
|
|
11
|
+
* @returns {Promise<DetrFeatureExtractorResult>} An object containing the concatenated pixel values of the preprocessed images.
|
|
12
|
+
*/
|
|
13
|
+
_call(images: import('../../utils/image.js').RawImage[]): Promise<DetrFeatureExtractorResult>;
|
|
14
|
+
post_process_object_detection(outputs: {
|
|
15
|
+
logits: import("../../utils/tensor.js").Tensor;
|
|
16
|
+
pred_boxes: import("../../utils/tensor.js").Tensor;
|
|
17
|
+
}, threshold?: number, target_sizes?: [number, number][], is_zero_shot?: boolean): any[];
|
|
18
|
+
post_process_panoptic_segmentation(outputs: any, threshold?: number, mask_threshold?: number, overlap_mask_area_threshold?: number, label_ids_to_fuse?: Set<number>, target_sizes?: [number, number][]): {
|
|
19
|
+
segmentation: import("../../utils/tensor.js").Tensor;
|
|
20
|
+
segments_info: {
|
|
21
|
+
id: number;
|
|
22
|
+
label_id: number;
|
|
23
|
+
score: number;
|
|
24
|
+
}[];
|
|
25
|
+
}[];
|
|
26
|
+
post_process_instance_segmentation(outputs: any, threshold?: number, target_sizes?: [number, number][]): {
|
|
27
|
+
segmentation: import("../../utils/tensor.js").Tensor;
|
|
28
|
+
segments_info: {
|
|
29
|
+
id: number;
|
|
30
|
+
label_id: number;
|
|
31
|
+
score: number;
|
|
32
|
+
}[];
|
|
33
|
+
}[];
|
|
34
|
+
}
|
|
35
|
+
export class DetrFeatureExtractor extends DetrImageProcessor {
|
|
36
|
+
}
|
|
37
|
+
export type DetrFeatureExtractorResultProps = {
|
|
38
|
+
pixel_mask: import('../../utils/tensor.js').Tensor;
|
|
39
|
+
};
|
|
40
|
+
export type DetrFeatureExtractorResult = import('../../base/image_processors_utils.js').ImageProcessorResult & DetrFeatureExtractorResultProps;
|
|
41
|
+
import { ImageProcessor } from "../../base/image_processors_utils.js";
|
|
42
|
+
//# sourceMappingURL=image_processing_detr.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"image_processing_detr.d.ts","sourceRoot":"","sources":["../../../src/models/detr/image_processing_detr.js"],"names":[],"mappings":"AAUA;;;;GAIG;AAEH;IACI;;;;;OAKG;IACH,cAHW,OAAO,sBAAsB,EAAE,QAAQ,EAAE,GACvC,QAAQ,0BAA0B,CAAC,CAY/C;IAmBwmB;;;6FAAu6F;IAA8yU;;;;;;;QAA4qG;IAAA;;;;;;;QAAktB;CAH/rjB;AAED;CAAgE;;gBAvClD,OAAO,uBAAuB,EAAE,MAAM;;yCACvC,OAAO,sCAAsC,EAAE,oBAAoB,GAAG,+BAA+B;+BAR3G,sCAAsC"}
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
export class DonutImageProcessor extends ImageProcessor {
|
|
2
|
+
pad_image(pixelData: any, imgDims: any, padSize: any, options?: {}): [Float32Array, number[]];
|
|
3
|
+
}
|
|
4
|
+
export class DonutFeatureExtractor extends DonutImageProcessor {
|
|
5
|
+
}
|
|
6
|
+
import { ImageProcessor } from "../../base/image_processors_utils.js";
|
|
7
|
+
//# sourceMappingURL=image_processing_donut.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"image_processing_donut.d.ts","sourceRoot":"","sources":["../../../src/models/donut/image_processing_donut.js"],"names":[],"mappings":"AAIA;IACI,8FAuBC;CACJ;AACD;CAAkE;+BA5B3D,sCAAsC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"image_processing_dpt.d.ts","sourceRoot":"","sources":["../../../src/models/dpt/image_processing_dpt.js"],"names":[],"mappings":"AAIA;CAAyD;AACzD;CAA8D;+BAHvD,sCAAsC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"image_processing_efficientnet.d.ts","sourceRoot":"","sources":["../../../src/models/efficientnet/image_processing_efficientnet.js"],"names":[],"mappings":"AAIA;IACI,yBAMC;IAJG,iBAAkD;CAKzD;+BAVM,sCAAsC"}
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
export * from "./audio_spectrogram_transformer/feature_extraction_audio_spectrogram_transformer.js";
|
|
2
|
+
export * from "./clap/feature_extraction_clap.js";
|
|
3
|
+
export * from "./pyannote/feature_extraction_pyannote.js";
|
|
4
|
+
export * from "./seamless_m4t/feature_extraction_seamless_m4t.js";
|
|
5
|
+
export * from "./speecht5/feature_extraction_speecht5.js";
|
|
6
|
+
export * from "./wav2vec2/feature_extraction_wav2vec2.js";
|
|
7
|
+
export * from "./wespeaker/feature_extraction_wespeaker.js";
|
|
8
|
+
export * from "./whisper/feature_extraction_whisper.js";
|
|
9
|
+
export { ImageProcessor as ImageFeatureExtractor } from "../base/image_processors_utils.js";
|
|
10
|
+
//# sourceMappingURL=feature_extractors.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"feature_extractors.d.ts","sourceRoot":"","sources":["../../src/models/feature_extractors.js"],"names":[],"mappings":""}
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
export class Florence2Processor extends Processor {
|
|
2
|
+
static tokenizer_class: typeof AutoTokenizer;
|
|
3
|
+
static image_processor_class: typeof AutoImageProcessor;
|
|
4
|
+
constructor(config: any, components: any);
|
|
5
|
+
/** @type {Map<string, string>} */
|
|
6
|
+
tasks_answer_post_processing_type: Map<string, string>;
|
|
7
|
+
/** @type {Map<string, string>} */
|
|
8
|
+
task_prompts_without_inputs: Map<string, string>;
|
|
9
|
+
/** @type {Map<string, string>} */
|
|
10
|
+
task_prompts_with_input: Map<string, string>;
|
|
11
|
+
regexes: {
|
|
12
|
+
quad_boxes: RegExp;
|
|
13
|
+
bboxes: RegExp;
|
|
14
|
+
};
|
|
15
|
+
size_per_bin: number;
|
|
16
|
+
/**
|
|
17
|
+
* Helper function to construct prompts from input texts
|
|
18
|
+
* @param {string|string[]} text
|
|
19
|
+
* @returns {string[]}
|
|
20
|
+
*/
|
|
21
|
+
construct_prompts(text: string | string[]): string[];
|
|
22
|
+
/**
|
|
23
|
+
* Post-process the output of the model to each of the task outputs.
|
|
24
|
+
* @param {string} text The text to post-process.
|
|
25
|
+
* @param {string} task The task to post-process the text for.
|
|
26
|
+
* @param {[number, number]} image_size The size of the image. height x width.
|
|
27
|
+
*/
|
|
28
|
+
post_process_generation(text: string, task: string, image_size: [number, number]): {
|
|
29
|
+
[x: string]: string | {
|
|
30
|
+
[x: string]: any[];
|
|
31
|
+
labels: any[];
|
|
32
|
+
};
|
|
33
|
+
};
|
|
34
|
+
_call(images: any, text?: any, kwargs?: {}): Promise<any>;
|
|
35
|
+
}
|
|
36
|
+
import { Processor } from "../../base/processing_utils.js";
|
|
37
|
+
import { AutoTokenizer } from "../../tokenizers.js";
|
|
38
|
+
import { AutoImageProcessor } from "../auto/image_processing_auto.js";
|
|
39
|
+
//# sourceMappingURL=processing_florence2.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"processing_florence2.d.ts","sourceRoot":"","sources":["../../../src/models/florence2/processing_florence2.js"],"names":[],"mappings":"AAIA;IACI,6CAAsC;IACtC,wDAAiD;IAEjD,0CAuBC;IAdG,kCAAkC;IAClC,mCADW,IAAI,MAAM,EAAE,MAAM,CAAC,CAC2E;IAEzG,kCAAkC;IAClC,6BADW,IAAI,MAAM,EAAE,MAAM,CAAC,CAC+D;IAE7F,kCAAkC;IAClC,yBADW,IAAI,MAAM,EAAE,MAAM,CAAC,CACuD;IAErF;;;MAGC;IACD,qBAAwB;IAG5B;;;;OAIG;IACH,wBAHW,MAAM,GAAC,MAAM,EAAE,GACb,MAAM,EAAE,CA6BpB;IAED;;;;;OAKG;IACH,8BAJW,MAAM,QACN,MAAM,cACN,CAAC,MAAM,EAAE,MAAM,CAAC;;;;;MAsC1B;IAID,0DAaC;CACJ;0BA/HyB,gCAAgC;8BAE5B,qBAAqB;mCADhB,kCAAkC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"image_processing_glpn.d.ts","sourceRoot":"","sources":["../../../src/models/glpn/image_processing_glpn.js"],"names":[],"mappings":"AAIA;CAA4D;+BAFrD,sCAAsC"}
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
export class Idefics3ImageProcessor extends ImageProcessor {
|
|
2
|
+
constructor(config: any);
|
|
3
|
+
do_image_splitting: any;
|
|
4
|
+
max_image_size: any;
|
|
5
|
+
/**
|
|
6
|
+
* @typedef {import('../../utils/image.js').RawImage} RawImage
|
|
7
|
+
* @typedef {import('../../utils/tensor.js').Tensor} Tensor
|
|
8
|
+
*/
|
|
9
|
+
/**
|
|
10
|
+
* Calculate size to resize images to, to be multiples of `vision_encoder_max_size` while preserving the aspect ratio.
|
|
11
|
+
* @param {Tensor} pixel_values Tensor of the image to resize.
|
|
12
|
+
* @param {number} vision_encoder_max_size Maximum size of the output image. If the image is larger than this size,
|
|
13
|
+
* it will be split into patches of this size, and the original image will be concatenated with the patches, resized to max_size.
|
|
14
|
+
*/
|
|
15
|
+
get_resize_for_vision_encoder(pixel_values: import("../../utils/tensor.js").Tensor, vision_encoder_max_size: number): {
|
|
16
|
+
height: number;
|
|
17
|
+
width: number;
|
|
18
|
+
};
|
|
19
|
+
/** @param {RawImage|RawImage[]|RawImage[][]} images */
|
|
20
|
+
_call(images: import("../../utils/image.js").RawImage | import("../../utils/image.js").RawImage[] | import("../../utils/image.js").RawImage[][], { do_image_splitting, return_row_col_info, }?: {
|
|
21
|
+
do_image_splitting?: any;
|
|
22
|
+
return_row_col_info?: boolean;
|
|
23
|
+
}): Promise<{
|
|
24
|
+
rows?: any[][];
|
|
25
|
+
cols?: any[][];
|
|
26
|
+
pixel_values: import("../../utils/tensor.js").Tensor;
|
|
27
|
+
pixel_attention_mask: import("../../utils/tensor.js").Tensor;
|
|
28
|
+
original_sizes: import("../../base/image_processors_utils.js").HeightWidth[];
|
|
29
|
+
reshaped_input_sizes: import("../../base/image_processors_utils.js").HeightWidth[];
|
|
30
|
+
}>;
|
|
31
|
+
split_image(pixel_values: any, { longest_edge }: {
|
|
32
|
+
longest_edge: any;
|
|
33
|
+
}): Promise<{
|
|
34
|
+
frames: any[];
|
|
35
|
+
num_splits_h: number;
|
|
36
|
+
num_splits_w: number;
|
|
37
|
+
}>;
|
|
38
|
+
}
|
|
39
|
+
import { ImageProcessor } from "../../base/image_processors_utils.js";
|
|
40
|
+
//# sourceMappingURL=image_processing_idefics3.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"image_processing_idefics3.d.ts","sourceRoot":"","sources":["../../../src/models/idefics3/image_processing_idefics3.js"],"names":[],"mappings":"AAOA;IACI,yBAKC;IAFG,wBAA2D;IAC3D,oBAA2C;IAG/C;;;OAGG;IAEH;;;;;OAKG;IACH,6GAHW,MAAM;;;MAiBhB;IAED,uDAAuD;IACvD;;;;;;;;;;OA2HC;IAED;;;;;;OAiDC;CACJ;+BAtNM,sCAAsC"}
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
export class Idefics3Processor extends Processor {
|
|
2
|
+
static image_processor_class: typeof AutoImageProcessor;
|
|
3
|
+
static tokenizer_class: typeof AutoTokenizer;
|
|
4
|
+
fake_image_token: string;
|
|
5
|
+
image_token: string;
|
|
6
|
+
global_img_token: string;
|
|
7
|
+
/**
|
|
8
|
+
*
|
|
9
|
+
* @param {string|string[]} text
|
|
10
|
+
* @param {RawImage|RawImage[]|RawImage[][]} images
|
|
11
|
+
* @returns {Promise<any>}
|
|
12
|
+
*/
|
|
13
|
+
_call(text: string | string[], images?: RawImage | RawImage[] | RawImage[][], options?: {}): Promise<any>;
|
|
14
|
+
}
|
|
15
|
+
import { Processor } from "../../base/processing_utils.js";
|
|
16
|
+
import { RawImage } from "../../utils/image.js";
|
|
17
|
+
import { AutoImageProcessor } from "../auto/image_processing_auto.js";
|
|
18
|
+
import { AutoTokenizer } from "../../tokenizers.js";
|
|
19
|
+
//# sourceMappingURL=processing_idefics3.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"processing_idefics3.d.ts","sourceRoot":"","sources":["../../../src/models/idefics3/processing_idefics3.js"],"names":[],"mappings":"AA6DA;IACI,wDAAiD;IACjD,6CAAsC;IAGtC,yBAA+C;IAC/C,oBAAwB;IACxB,yBAAkC;IAElC;;;;;OAKG;IACH,YAJW,MAAM,GAAC,MAAM,EAAE,WACf,QAAQ,GAAC,QAAQ,EAAE,GAAC,QAAQ,EAAE,EAAE,iBAC9B,QAAQ,GAAG,CAAC,CA4DxB;CACJ;0BAtIyB,gCAAgC;yBAGjC,sBAAsB;mCAFZ,kCAAkC;8BACvC,qBAAqB"}
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
export * from "./beit/image_processing_beit.js";
|
|
2
|
+
export * from "./bit/image_processing_bit.js";
|
|
3
|
+
export * from "./chinese_clip/image_processing_chinese_clip.js";
|
|
4
|
+
export * from "./clip/image_processing_clip.js";
|
|
5
|
+
export * from "./convnext/image_processing_convnext.js";
|
|
6
|
+
export * from "./deit/image_processing_deit.js";
|
|
7
|
+
export * from "./detr/image_processing_detr.js";
|
|
8
|
+
export * from "./donut/image_processing_donut.js";
|
|
9
|
+
export * from "./dpt/image_processing_dpt.js";
|
|
10
|
+
export * from "./efficientnet/image_processing_efficientnet.js";
|
|
11
|
+
export * from "./glpn/image_processing_glpn.js";
|
|
12
|
+
export * from "./idefics3/image_processing_idefics3.js";
|
|
13
|
+
export * from "./janus/image_processing_janus.js";
|
|
14
|
+
export * from "./jina_clip/image_processing_jina_clip.js";
|
|
15
|
+
export * from "./llava_onevision/image_processing_llava_onevision.js";
|
|
16
|
+
export * from "./mask2former/image_processing_mask2former.js";
|
|
17
|
+
export * from "./maskformer/image_processing_maskformer.js";
|
|
18
|
+
export * from "./mobilenet_v1/image_processing_mobilenet_v1.js";
|
|
19
|
+
export * from "./mobilenet_v2/image_processing_mobilenet_v2.js";
|
|
20
|
+
export * from "./mobilenet_v3/image_processing_mobilenet_v3.js";
|
|
21
|
+
export * from "./mobilenet_v4/image_processing_mobilenet_v4.js";
|
|
22
|
+
export * from "./mobilevit/image_processing_mobilevit.js";
|
|
23
|
+
export * from "./nougat/image_processing_nougat.js";
|
|
24
|
+
export * from "./owlv2/image_processing_owlv2.js";
|
|
25
|
+
export * from "./owlvit/image_processing_owlvit.js";
|
|
26
|
+
export * from "./pvt/image_processing_pvt.js";
|
|
27
|
+
export * from "./qwen2_vl/image_processing_qwen2_vl.js";
|
|
28
|
+
export * from "./rt_detr/image_processing_rt_detr.js";
|
|
29
|
+
export * from "./sam/image_processing_sam.js";
|
|
30
|
+
export * from "./segformer/image_processing_segformer.js";
|
|
31
|
+
export * from "./siglip/image_processing_siglip.js";
|
|
32
|
+
export * from "./swin2sr/image_processing_swin2sr.js";
|
|
33
|
+
export * from "./vit/image_processing_vit.js";
|
|
34
|
+
export * from "./vitmatte/image_processing_vitmatte.js";
|
|
35
|
+
export * from "./vitpose/image_processing_vitpose.js";
|
|
36
|
+
export * from "./yolos/image_processing_yolos.js";
|
|
37
|
+
//# sourceMappingURL=image_processors.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"image_processors.d.ts","sourceRoot":"","sources":["../../src/models/image_processors.js"],"names":[],"mappings":""}
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
export class VLMImageProcessor extends ImageProcessor {
|
|
2
|
+
constructor(config: any);
|
|
3
|
+
constant_values: any;
|
|
4
|
+
pad_image(pixelData: any, imgDims: any, padSize: any, options: any): [Float32Array, number[]];
|
|
5
|
+
}
|
|
6
|
+
import { ImageProcessor } from "../../base/image_processors_utils.js";
|
|
7
|
+
//# sourceMappingURL=image_processing_janus.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"image_processing_janus.d.ts","sourceRoot":"","sources":["../../../src/models/janus/image_processing_janus.js"],"names":[],"mappings":"AAKA;IACI,yBAUC;IADG,qBAAqF;IAGzF,8FAMC;CACJ;+BAtBM,sCAAsC"}
|