@min-pack/tfjs-node 2.17.1 → 3.8.1-patch.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (130) hide show
  1. package/README.md +4 -2
  2. package/addon-node_modules/onnxruntime-node/bin/{napi-v3/linux/x64/libonnxruntime.so.1.14.0 → napi-v6/linux/arm64/libonnxruntime.so.1} +0 -0
  3. package/addon-node_modules/onnxruntime-node/bin/napi-v6/linux/arm64/onnxruntime_binding.node +0 -0
  4. package/addon-node_modules/onnxruntime-node/bin/napi-v6/linux/x64/libonnxruntime.so.1 +0 -0
  5. package/addon-node_modules/onnxruntime-node/bin/napi-v6/linux/x64/onnxruntime_binding.node +0 -0
  6. package/index.d.ts +2 -0
  7. package/index.js +7102 -3371
  8. package/package.json +1 -10
  9. package/tfjs-types/backends/onnx.d.ts +37 -0
  10. package/tfjs-types/base/feature_extraction_utils.d.ts +41 -0
  11. package/tfjs-types/base/image_processors_utils.d.ts +332 -0
  12. package/tfjs-types/base/processing_utils.d.ts +89 -0
  13. package/tfjs-types/configs.d.ts +93 -0
  14. package/tfjs-types/env.d.ts +112 -0
  15. package/tfjs-types/generation/configuration_utils.d.ts +326 -0
  16. package/tfjs-types/generation/logits_process.d.ts +364 -0
  17. package/tfjs-types/generation/logits_sampler.d.ts +51 -0
  18. package/tfjs-types/generation/parameters.d.ts +47 -0
  19. package/tfjs-types/generation/stopping_criteria.d.ts +81 -0
  20. package/tfjs-types/generation/streamers.d.ts +88 -0
  21. package/tfjs-types/models/audio_spectrogram_transformer/feature_extraction_audio_spectrogram_transformer.d.ts +25 -0
  22. package/tfjs-types/models/auto/feature_extraction_auto.d.ts +5 -0
  23. package/tfjs-types/models/auto/image_processing_auto.d.ts +5 -0
  24. package/tfjs-types/models/auto/processing_auto.d.ts +39 -0
  25. package/tfjs-types/models/beit/image_processing_beit.d.ts +4 -0
  26. package/tfjs-types/models/bit/image_processing_bit.d.ts +4 -0
  27. package/tfjs-types/models/chinese_clip/image_processing_chinese_clip.d.ts +4 -0
  28. package/tfjs-types/models/clap/feature_extraction_clap.d.ts +57 -0
  29. package/tfjs-types/models/clip/image_processing_clip.d.ts +6 -0
  30. package/tfjs-types/models/convnext/image_processing_convnext.d.ts +12 -0
  31. package/tfjs-types/models/dac/feature_extraction_dac.d.ts +4 -0
  32. package/tfjs-types/models/deit/image_processing_deit.d.ts +6 -0
  33. package/tfjs-types/models/detr/image_processing_detr.d.ts +42 -0
  34. package/tfjs-types/models/dinov3_vit/image_processing_dinov3_vit.d.ts +4 -0
  35. package/tfjs-types/models/donut/image_processing_donut.d.ts +7 -0
  36. package/tfjs-types/models/dpt/image_processing_dpt.d.ts +6 -0
  37. package/tfjs-types/models/efficientnet/image_processing_efficientnet.d.ts +6 -0
  38. package/tfjs-types/models/encodec/feature_extraction_encodec.d.ts +13 -0
  39. package/tfjs-types/models/feature_extractors.d.ts +16 -0
  40. package/tfjs-types/models/florence2/processing_florence2.d.ts +39 -0
  41. package/tfjs-types/models/gemma3n/feature_extraction_gemma3n.d.ts +35 -0
  42. package/tfjs-types/models/gemma3n/processing_gemma3n.d.ts +31 -0
  43. package/tfjs-types/models/glpn/image_processing_glpn.d.ts +4 -0
  44. package/tfjs-types/models/grounding_dino/image_processing_grounding_dino.d.ts +20 -0
  45. package/tfjs-types/models/grounding_dino/processing_grounding_dino.d.ts +27 -0
  46. package/tfjs-types/models/idefics3/image_processing_idefics3.d.ts +40 -0
  47. package/tfjs-types/models/idefics3/processing_idefics3.d.ts +19 -0
  48. package/tfjs-types/models/image_processors.d.ts +44 -0
  49. package/tfjs-types/models/janus/image_processing_janus.d.ts +7 -0
  50. package/tfjs-types/models/janus/processing_janus.d.ts +77 -0
  51. package/tfjs-types/models/jina_clip/image_processing_jina_clip.d.ts +5 -0
  52. package/tfjs-types/models/jina_clip/processing_jina_clip.d.ts +9 -0
  53. package/tfjs-types/models/llava/processing_llava.d.ts +12 -0
  54. package/tfjs-types/models/llava_onevision/image_processing_llava_onevision.d.ts +4 -0
  55. package/tfjs-types/models/mask2former/image_processing_mask2former.d.ts +4 -0
  56. package/tfjs-types/models/maskformer/image_processing_maskformer.d.ts +22 -0
  57. package/tfjs-types/models/mgp_str/processing_mgp_str.d.ts +64 -0
  58. package/tfjs-types/models/mobilenet_v1/image_processing_mobilenet_v1.d.ts +6 -0
  59. package/tfjs-types/models/mobilenet_v2/image_processing_mobilenet_v2.d.ts +6 -0
  60. package/tfjs-types/models/mobilenet_v3/image_processing_mobilenet_v3.d.ts +6 -0
  61. package/tfjs-types/models/mobilenet_v4/image_processing_mobilenet_v4.d.ts +6 -0
  62. package/tfjs-types/models/mobilevit/image_processing_mobilevit.d.ts +6 -0
  63. package/tfjs-types/models/moonshine/feature_extraction_moonshine.d.ts +13 -0
  64. package/tfjs-types/models/moonshine/processing_moonshine.d.ts +17 -0
  65. package/tfjs-types/models/nougat/image_processing_nougat.d.ts +4 -0
  66. package/tfjs-types/models/owlv2/image_processing_owlv2.d.ts +4 -0
  67. package/tfjs-types/models/owlvit/image_processing_owlvit.d.ts +10 -0
  68. package/tfjs-types/models/owlvit/processing_owlvit.d.ts +8 -0
  69. package/tfjs-types/models/paligemma/processing_paligemma.d.ts +12 -0
  70. package/tfjs-types/models/parakeet/feature_extraction_parakeet.d.ts +22 -0
  71. package/tfjs-types/models/phi3_v/image_processing_phi3_v.d.ts +17 -0
  72. package/tfjs-types/models/phi3_v/processing_phi3_v.d.ts +21 -0
  73. package/tfjs-types/models/pixtral/image_processing_pixtral.d.ts +4 -0
  74. package/tfjs-types/models/pixtral/processing_pixtral.d.ts +12 -0
  75. package/tfjs-types/models/processors.d.ts +25 -0
  76. package/tfjs-types/models/pvt/image_processing_pvt.d.ts +4 -0
  77. package/tfjs-types/models/pyannote/feature_extraction_pyannote.d.ts +31 -0
  78. package/tfjs-types/models/pyannote/processing_pyannote.d.ts +19 -0
  79. package/tfjs-types/models/qwen2_vl/image_processing_qwen2_vl.d.ts +11 -0
  80. package/tfjs-types/models/qwen2_vl/processing_qwen2_vl.d.ts +17 -0
  81. package/tfjs-types/models/rt_detr/image_processing_rt_detr.d.ts +8 -0
  82. package/tfjs-types/models/sam/image_processing_sam.d.ts +103 -0
  83. package/tfjs-types/models/sam/processing_sam.d.ts +9 -0
  84. package/tfjs-types/models/sam2/image_processing_sam2.d.ts +2 -0
  85. package/tfjs-types/models/sam2/processing_sam2.d.ts +6 -0
  86. package/tfjs-types/models/sam3/image_processing_sam3.d.ts +2 -0
  87. package/tfjs-types/models/sapiens/image_processing_sapiens.d.ts +10 -0
  88. package/tfjs-types/models/seamless_m4t/feature_extraction_seamless_m4t.d.ts +34 -0
  89. package/tfjs-types/models/segformer/image_processing_segformer.d.ts +10 -0
  90. package/tfjs-types/models/siglip/image_processing_siglip.d.ts +4 -0
  91. package/tfjs-types/models/smolvlm/image_processing_smolvlm.d.ts +2 -0
  92. package/tfjs-types/models/smolvlm/processing_smolvlm.d.ts +2 -0
  93. package/tfjs-types/models/snac/feature_extraction_snac.d.ts +4 -0
  94. package/tfjs-types/models/speecht5/feature_extraction_speecht5.d.ts +4 -0
  95. package/tfjs-types/models/speecht5/processing_speecht5.d.ts +14 -0
  96. package/tfjs-types/models/swin2sr/image_processing_swin2sr.d.ts +5 -0
  97. package/tfjs-types/models/ultravox/processing_ultravox.d.ts +16 -0
  98. package/tfjs-types/models/vit/image_processing_vit.d.ts +6 -0
  99. package/tfjs-types/models/vitmatte/image_processing_vitmatte.d.ts +12 -0
  100. package/tfjs-types/models/vitpose/image_processing_vitpose.d.ts +26 -0
  101. package/tfjs-types/models/voxtral/processing_voxtral.d.ts +16 -0
  102. package/tfjs-types/models/wav2vec2/feature_extraction_wav2vec2.d.ts +19 -0
  103. package/tfjs-types/models/wav2vec2/processing_wav2vec2.d.ts +14 -0
  104. package/tfjs-types/models/wav2vec2_with_lm/processing_wav2vec2_with_lm.d.ts +14 -0
  105. package/tfjs-types/models/wespeaker/feature_extraction_wespeaker.d.ts +23 -0
  106. package/tfjs-types/models/whisper/common_whisper.d.ts +8 -0
  107. package/tfjs-types/models/whisper/feature_extraction_whisper.d.ts +23 -0
  108. package/tfjs-types/models/whisper/generation_whisper.d.ts +76 -0
  109. package/tfjs-types/models/whisper/processing_whisper.d.ts +17 -0
  110. package/tfjs-types/models/yolos/image_processing_yolos.d.ts +10 -0
  111. package/tfjs-types/models.d.ts +4396 -0
  112. package/tfjs-types/ops/registry.d.ts +13 -0
  113. package/tfjs-types/pipelines.d.ts +2433 -0
  114. package/tfjs-types/tokenizers.d.ts +1002 -0
  115. package/tfjs-types/transformers.d.ts +27 -0
  116. package/tfjs-types/utils/audio.d.ts +160 -0
  117. package/tfjs-types/utils/constants.d.ts +8 -0
  118. package/tfjs-types/utils/core.d.ts +231 -0
  119. package/tfjs-types/utils/data-structures.d.ts +294 -0
  120. package/tfjs-types/utils/devices.d.ts +18 -0
  121. package/tfjs-types/utils/dtypes.d.ts +20 -0
  122. package/tfjs-types/utils/generic.d.ts +11 -0
  123. package/tfjs-types/utils/hub.d.ts +175 -0
  124. package/tfjs-types/utils/image.d.ts +141 -0
  125. package/tfjs-types/utils/maths.d.ts +282 -0
  126. package/tfjs-types/utils/tensor.d.ts +490 -0
  127. package/tfjs-types/utils/video.d.ts +37 -0
  128. package/addon-node_modules/onnxruntime-node/bin/napi-v3/linux/arm64/libonnxruntime.so.1.14.0 +0 -0
  129. package/addon-node_modules/onnxruntime-node/bin/napi-v3/linux/arm64/onnxruntime_binding.node +0 -0
  130. package/addon-node_modules/onnxruntime-node/bin/napi-v3/linux/x64/onnxruntime_binding.node +0 -0
@@ -0,0 +1,39 @@
1
+ /**
2
+ * @typedef {import('../../base/processing_utils.js').PretrainedProcessorOptions} PretrainedProcessorOptions
3
+ */
4
+ /**
5
+ * Helper class which is used to instantiate pretrained processors with the `from_pretrained` function.
6
+ * The chosen processor class is determined by the type specified in the processor config.
7
+ *
8
+ * **Example:** Load a processor using `from_pretrained`.
9
+ * ```javascript
10
+ * let processor = await AutoProcessor.from_pretrained('openai/whisper-tiny.en');
11
+ * ```
12
+ *
13
+ * **Example:** Run an image through a processor.
14
+ * ```javascript
15
+ * let processor = await AutoProcessor.from_pretrained('Xenova/clip-vit-base-patch16');
16
+ * let image = await RawImage.read('https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/football-match.jpg');
17
+ * let image_inputs = await processor(image);
18
+ * // {
19
+ * // "pixel_values": {
20
+ * // "dims": [ 1, 3, 224, 224 ],
21
+ * // "type": "float32",
22
+ * // "data": Float32Array [ -1.558687686920166, -1.558687686920166, -1.5440893173217773, ... ],
23
+ * // "size": 150528
24
+ * // },
25
+ * // "original_sizes": [
26
+ * // [ 533, 800 ]
27
+ * // ],
28
+ * // "reshaped_input_sizes": [
29
+ * // [ 224, 224 ]
30
+ * // ]
31
+ * // }
32
+ * ```
33
+ */
34
+ export class AutoProcessor {
35
+ static from_pretrained(pretrained_model_name_or_path: string, options?: import("../../base/processing_utils.js").PretrainedProcessorOptions): Promise<Processor>;
36
+ }
37
+ export type PretrainedProcessorOptions = import("../../base/processing_utils.js").PretrainedProcessorOptions;
38
+ import { Processor } from '../../base/processing_utils.js';
39
+ //# sourceMappingURL=processing_auto.d.ts.map
@@ -0,0 +1,4 @@
1
+ export class BeitFeatureExtractor extends ImageProcessor {
2
+ }
3
+ import { ImageProcessor } from "../../base/image_processors_utils.js";
4
+ //# sourceMappingURL=image_processing_beit.d.ts.map
@@ -0,0 +1,4 @@
1
+ export class BitImageProcessor extends ImageProcessor {
2
+ }
3
+ import { ImageProcessor } from "../../base/image_processors_utils.js";
4
+ //# sourceMappingURL=image_processing_bit.d.ts.map
@@ -0,0 +1,4 @@
1
+ export class ChineseCLIPFeatureExtractor extends ImageProcessor {
2
+ }
3
+ import { ImageProcessor } from "../../base/image_processors_utils.js";
4
+ //# sourceMappingURL=image_processing_chinese_clip.d.ts.map
@@ -0,0 +1,57 @@
1
+ export class ClapFeatureExtractor extends FeatureExtractor {
2
+ constructor(config: any);
3
+ mel_filters: number[][];
4
+ mel_filters_slaney: number[][];
5
+ window: Float64Array<ArrayBufferLike>;
6
+ /**
7
+ * Extracts the mel spectrogram and prepares it for the mode based on the `truncation` and `padding` arguments.
8
+ *
9
+ * Four different path are possible:
10
+ * - `truncation="fusion"` and the length of the waveform is greater than the max length: the mel spectrogram
11
+ * will be computed on the entire audio. 3 random crops and a dowsampled version of the full mel spectrogram
12
+ * are then stacked together. They will later be used for `feature_fusion`.
13
+ * - `truncation="rand_trunc"` and the length of the waveform is smaller than the max length: the audio is
14
+ * padded based on `padding`.
15
+ * - `truncation="fusion"` and the length of the waveform is smaller than the max length: the audio is padded
16
+ * based on `padding`, and is repeated `4` times.
17
+ * - `truncation="rand_trunc"` and the length of the waveform is greater than the max length: the mel
18
+ * spectrogram will be computed on a random crop of the waveform.
19
+ *
20
+ * @param {Float32Array|Float64Array} waveform The input waveform.
21
+ * @param {number} max_length The maximum length of the waveform.
22
+ * @param {string} truncation The truncation strategy to use.
23
+ * @param {string} padding The padding strategy to use.
24
+ * @returns {Promise<Tensor>} An object containing the mel spectrogram data as a Float32Array, its dimensions as an array of numbers, and a boolean indicating whether the waveform was longer than the max length.
25
+ * @private
26
+ */
27
+ private _get_input_mel;
28
+ /**
29
+ * Compute the log-mel spectrogram of the provided `waveform` using the Hann window.
30
+ * In CLAP, two different filter banks are used depending on the truncation pattern:
31
+ * - `self.mel_filters`: they correspond to the default parameters of `torchaudio` which can be obtained from
32
+ * calling `torchaudio.transforms.MelSpectrogram().mel_scale.fb`. These filters are used when `truncation`
33
+ * is set to `"fusion"`.
34
+ * - `self.mel_filteres_slaney` : they correspond to the default parameters of `librosa` which used
35
+ * `librosa.filters.mel` when computing the mel spectrogram. These filters were only used in the original
36
+ * implementation when the truncation mode is not `"fusion"`.
37
+ *
38
+ * @param {Float32Array|Float64Array} waveform The audio waveform to process.
39
+ * @param {number[][]} mel_filters The mel filters to use.
40
+ * @param {number} [max_length=null] The maximum number of frames to return.
41
+ * @returns {Promise<Tensor>} An object containing the log-Mel spectrogram data as a Float32Array and its dimensions as an array of numbers.
42
+ */
43
+ _extract_fbank_features(waveform: Float32Array | Float64Array, mel_filters: number[][], max_length?: number): Promise<Tensor>;
44
+ /**
45
+ * Asynchronously extracts features from a given audio using the provided configuration.
46
+ * @param {Float32Array|Float64Array} audio The audio data as a Float32Array/Float64Array.
47
+ * @returns {Promise<{ input_features: Tensor }>} A Promise resolving to an object containing the extracted input features as a Tensor.
48
+ */
49
+ _call(audio: Float32Array | Float64Array, { max_length, }?: {
50
+ max_length?: any;
51
+ }): Promise<{
52
+ input_features: Tensor;
53
+ }>;
54
+ }
55
+ import { FeatureExtractor } from '../../base/feature_extraction_utils.js';
56
+ import { Tensor } from '../../utils/tensor.js';
57
+ //# sourceMappingURL=feature_extraction_clap.d.ts.map
@@ -0,0 +1,6 @@
1
+ export class CLIPImageProcessor extends ImageProcessor {
2
+ }
3
+ export class CLIPFeatureExtractor extends CLIPImageProcessor {
4
+ }
5
+ import { ImageProcessor } from "../../base/image_processors_utils.js";
6
+ //# sourceMappingURL=image_processing_clip.d.ts.map
@@ -0,0 +1,12 @@
1
+ export class ConvNextImageProcessor extends ImageProcessor {
2
+ constructor(config: any);
3
+ /**
4
+ * Percentage of the image to crop. Only has an effect if this.size < 384.
5
+ */
6
+ crop_pct: any;
7
+ resize(image: any): Promise<any>;
8
+ }
9
+ export class ConvNextFeatureExtractor extends ConvNextImageProcessor {
10
+ }
11
+ import { ImageProcessor } from "../../base/image_processors_utils.js";
12
+ //# sourceMappingURL=image_processing_convnext.d.ts.map
@@ -0,0 +1,4 @@
1
+ export class DacFeatureExtractor extends EncodecFeatureExtractor {
2
+ }
3
+ import { EncodecFeatureExtractor } from '../encodec/feature_extraction_encodec.js';
4
+ //# sourceMappingURL=feature_extraction_dac.d.ts.map
@@ -0,0 +1,6 @@
1
+ export class DeiTImageProcessor extends ImageProcessor {
2
+ }
3
+ export class DeiTFeatureExtractor extends DeiTImageProcessor {
4
+ }
5
+ import { ImageProcessor } from "../../base/image_processors_utils.js";
6
+ //# sourceMappingURL=image_processing_deit.d.ts.map
@@ -0,0 +1,42 @@
1
+ /**
2
+ * @typedef {object} DetrFeatureExtractorResultProps
3
+ * @property {import('../../utils/tensor.js').Tensor} pixel_mask
4
+ * @typedef {import('../../base/image_processors_utils.js').ImageProcessorResult & DetrFeatureExtractorResultProps} DetrFeatureExtractorResult
5
+ */
6
+ export class DetrImageProcessor extends ImageProcessor {
7
+ /**
8
+ * Calls the feature extraction process on an array of images, preprocesses
9
+ * each image, and concatenates the resulting features into a single Tensor.
10
+ * @param {import('../../utils/image.js').RawImage[]} images The image(s) to extract features from.
11
+ * @returns {Promise<DetrFeatureExtractorResult>} An object containing the concatenated pixel values of the preprocessed images.
12
+ */
13
+ _call(images: import("../../utils/image.js").RawImage[]): Promise<DetrFeatureExtractorResult>;
14
+ post_process_object_detection(outputs: {
15
+ logits: import("../../utils/tensor.js").Tensor;
16
+ pred_boxes: import("../../utils/tensor.js").Tensor;
17
+ }, threshold?: number, target_sizes?: [number, number][], is_zero_shot?: boolean): any[];
18
+ post_process_panoptic_segmentation(outputs: any, threshold?: number, mask_threshold?: number, overlap_mask_area_threshold?: number, label_ids_to_fuse?: Set<number>, target_sizes?: [number, number][]): Array<{
19
+ segmentation: import("../../utils/tensor.js").Tensor;
20
+ segments_info: Array<{
21
+ id: number;
22
+ label_id: number;
23
+ score: number;
24
+ }>;
25
+ }>;
26
+ post_process_instance_segmentation(outputs: any, threshold?: number, target_sizes?: [number, number][]): Array<{
27
+ segmentation: import("../../utils/tensor.js").Tensor;
28
+ segments_info: Array<{
29
+ id: number;
30
+ label_id: number;
31
+ score: number;
32
+ }>;
33
+ }>;
34
+ }
35
+ export class DetrFeatureExtractor extends DetrImageProcessor {
36
+ }
37
+ export type DetrFeatureExtractorResultProps = {
38
+ pixel_mask: import("../../utils/tensor.js").Tensor;
39
+ };
40
+ export type DetrFeatureExtractorResult = import("../../base/image_processors_utils.js").ImageProcessorResult & DetrFeatureExtractorResultProps;
41
+ import { ImageProcessor } from "../../base/image_processors_utils.js";
42
+ //# sourceMappingURL=image_processing_detr.d.ts.map
@@ -0,0 +1,4 @@
1
+ export class DINOv3ViTImageProcessor extends ImageProcessor {
2
+ }
3
+ import { ImageProcessor } from "../../base/image_processors_utils.js";
4
+ //# sourceMappingURL=image_processing_dinov3_vit.d.ts.map
@@ -0,0 +1,7 @@
1
+ export class DonutImageProcessor extends ImageProcessor {
2
+ pad_image(pixelData: any, imgDims: any, padSize: any, options?: {}): [Float32Array<ArrayBufferLike>, number[]];
3
+ }
4
+ export class DonutFeatureExtractor extends DonutImageProcessor {
5
+ }
6
+ import { ImageProcessor } from "../../base/image_processors_utils.js";
7
+ //# sourceMappingURL=image_processing_donut.d.ts.map
@@ -0,0 +1,6 @@
1
+ export class DPTImageProcessor extends ImageProcessor {
2
+ }
3
+ export class DPTFeatureExtractor extends DPTImageProcessor {
4
+ }
5
+ import { ImageProcessor } from "../../base/image_processors_utils.js";
6
+ //# sourceMappingURL=image_processing_dpt.d.ts.map
@@ -0,0 +1,6 @@
1
+ export class EfficientNetImageProcessor extends ImageProcessor {
2
+ constructor(config: any);
3
+ include_top: any;
4
+ }
5
+ import { ImageProcessor } from "../../base/image_processors_utils.js";
6
+ //# sourceMappingURL=image_processing_efficientnet.d.ts.map
@@ -0,0 +1,13 @@
1
+ export class EncodecFeatureExtractor extends FeatureExtractor {
2
+ /**
3
+ * Asynchronously extracts input values from a given audio using the provided configuration.
4
+ * @param {Float32Array|Float64Array} audio The audio data as a Float32Array/Float64Array.
5
+ * @returns {Promise<{ input_values: Tensor; }>} The extracted input values.
6
+ */
7
+ _call(audio: Float32Array | Float64Array): Promise<{
8
+ input_values: Tensor;
9
+ }>;
10
+ }
11
+ import { FeatureExtractor } from '../../base/feature_extraction_utils.js';
12
+ import { Tensor } from '../../utils/tensor.js';
13
+ //# sourceMappingURL=feature_extraction_encodec.d.ts.map
@@ -0,0 +1,16 @@
1
+ export * from "./audio_spectrogram_transformer/feature_extraction_audio_spectrogram_transformer.js";
2
+ export * from "./encodec/feature_extraction_encodec.js";
3
+ export * from "./clap/feature_extraction_clap.js";
4
+ export * from "./dac/feature_extraction_dac.js";
5
+ export * from "./gemma3n/feature_extraction_gemma3n.js";
6
+ export * from "./moonshine/feature_extraction_moonshine.js";
7
+ export * from "./parakeet/feature_extraction_parakeet.js";
8
+ export * from "./pyannote/feature_extraction_pyannote.js";
9
+ export * from "./seamless_m4t/feature_extraction_seamless_m4t.js";
10
+ export * from "./snac/feature_extraction_snac.js";
11
+ export * from "./speecht5/feature_extraction_speecht5.js";
12
+ export * from "./wav2vec2/feature_extraction_wav2vec2.js";
13
+ export * from "./wespeaker/feature_extraction_wespeaker.js";
14
+ export * from "./whisper/feature_extraction_whisper.js";
15
+ export { ImageProcessor as ImageFeatureExtractor } from "../base/image_processors_utils.js";
16
+ //# sourceMappingURL=feature_extractors.d.ts.map
@@ -0,0 +1,39 @@
1
+ export class Florence2Processor extends Processor {
2
+ static tokenizer_class: typeof AutoTokenizer;
3
+ static image_processor_class: typeof AutoImageProcessor;
4
+ constructor(config: any, components: any, chat_template: any);
5
+ /** @type {Map<string, string>} */
6
+ tasks_answer_post_processing_type: Map<string, string>;
7
+ /** @type {Map<string, string>} */
8
+ task_prompts_without_inputs: Map<string, string>;
9
+ /** @type {Map<string, string>} */
10
+ task_prompts_with_input: Map<string, string>;
11
+ regexes: {
12
+ quad_boxes: RegExp;
13
+ bboxes: RegExp;
14
+ };
15
+ size_per_bin: number;
16
+ /**
17
+ * Helper function to construct prompts from input texts
18
+ * @param {string|string[]} text
19
+ * @returns {string[]}
20
+ */
21
+ construct_prompts(text: string | string[]): string[];
22
+ /**
23
+ * Post-process the output of the model to each of the task outputs.
24
+ * @param {string} text The text to post-process.
25
+ * @param {string} task The task to post-process the text for.
26
+ * @param {[number, number]} image_size The size of the image. height x width.
27
+ */
28
+ post_process_generation(text: string, task: string, image_size: [number, number]): {
29
+ [task]: string | {
30
+ [x: string]: any[];
31
+ labels: any[];
32
+ };
33
+ };
34
+ _call(images: any, text?: any, kwargs?: {}): Promise<any>;
35
+ }
36
+ import { Processor } from "../../base/processing_utils.js";
37
+ import { AutoTokenizer } from "../../tokenizers.js";
38
+ import { AutoImageProcessor } from "../auto/image_processing_auto.js";
39
+ //# sourceMappingURL=processing_florence2.d.ts.map
@@ -0,0 +1,35 @@
1
+ export class Gemma3nAudioFeatureExtractor extends FeatureExtractor {
2
+ constructor(config: any);
3
+ mel_filters: number[][];
4
+ window: Float64Array<ArrayBufferLike>;
5
+ /**
6
+ * Computes the log-Mel spectrogram of the provided audio waveform.
7
+ * @param {Float32Array|Float64Array} waveform The audio waveform to process.
8
+ * @param {number} max_length The maximum number of frames to return.
9
+ * @returns {Promise<Tensor>} An object containing the log-Mel spectrogram data as a Float32Array and its dimensions as an array of numbers.
10
+ */
11
+ _extract_fbank_features(waveform: Float32Array | Float64Array, max_length: number): Promise<Tensor>;
12
+ /**
13
+ * Asynchronously extracts features from a given audio using the provided configuration.
14
+ * @param {Float32Array|Float64Array} audio The audio data as a Float32Array/Float64Array.
15
+ * @param {Object} options Optional parameters for feature extraction.
16
+ * @param {number} [options.max_length=480_000] If provided, defines the maximum length of the audio to allow.
17
+ * Audio longer than this will be truncated if `truncation=True`.
18
+ * @param {boolean} [options.truncation=true] Whether or not to truncate audio above `max_length`.
19
+ * @param {boolean} [options.padding=true] Whether to pad the sequence to a multiple of `pad_to_multiple_of`.
20
+ * @param {number} [options.pad_to_multiple_of=128] The number to pad the sequence to a multiple of.
21
+ * @returns {Promise<{ input_features: Tensor, input_features_mask: Tensor }>} A Promise resolving to an object containing the extracted input features and attention masks as Tensors.
22
+ */
23
+ _call(audio: Float32Array | Float64Array, { max_length, truncation, padding, pad_to_multiple_of, }?: {
24
+ max_length?: number;
25
+ truncation?: boolean;
26
+ padding?: boolean;
27
+ pad_to_multiple_of?: number;
28
+ }): Promise<{
29
+ input_features: Tensor;
30
+ input_features_mask: Tensor;
31
+ }>;
32
+ }
33
+ import { FeatureExtractor } from '../../base/feature_extraction_utils.js';
34
+ import { Tensor } from '../../utils/tensor.js';
35
+ //# sourceMappingURL=feature_extraction_gemma3n.d.ts.map
@@ -0,0 +1,31 @@
1
+ export class Gemma3nProcessor extends Processor {
2
+ static image_processor_class: typeof AutoImageProcessor;
3
+ static feature_extractor_class: typeof AutoFeatureExtractor;
4
+ static tokenizer_class: typeof AutoTokenizer;
5
+ constructor(config: any, components: any, chat_template: any);
6
+ audio_seq_length: any;
7
+ image_seq_length: any;
8
+ audio_token_id: any;
9
+ boa_token: any;
10
+ audio_token: any;
11
+ full_audio_sequence: string;
12
+ image_token_id: any;
13
+ boi_token: any;
14
+ image_token: any;
15
+ full_image_sequence: string;
16
+ /**
17
+ *
18
+ * @param {string|string[]} text
19
+ * @param {RawImage|RawImage[]|RawImage[][]} images
20
+ * @param {RawAudio|RawAudio[]|RawAudio[][]} audio
21
+ * @returns {Promise<any>}
22
+ */
23
+ _call(text: string | string[], images?: RawImage | RawImage[] | RawImage[][], audio?: RawAudio | RawAudio[] | RawAudio[][], options?: {}): Promise<any>;
24
+ }
25
+ import { Processor } from "../../base/processing_utils.js";
26
+ import { RawImage } from "../../utils/image.js";
27
+ import { RawAudio } from "../../utils/audio.js";
28
+ import { AutoImageProcessor } from "../auto/image_processing_auto.js";
29
+ import { AutoFeatureExtractor } from "../auto/feature_extraction_auto.js";
30
+ import { AutoTokenizer } from "../../tokenizers.js";
31
+ //# sourceMappingURL=processing_gemma3n.d.ts.map
@@ -0,0 +1,4 @@
1
+ export class GLPNFeatureExtractor extends ImageProcessor {
2
+ }
3
+ import { ImageProcessor } from "../../base/image_processors_utils.js";
4
+ //# sourceMappingURL=image_processing_glpn.d.ts.map
@@ -0,0 +1,20 @@
1
+ /**
2
+ * @typedef {object} GroundingDinoFeatureExtractorResultProps
3
+ * @property {import('../../utils/tensor.js').Tensor} pixel_mask
4
+ * @typedef {import('../../base/image_processors_utils.js').ImageProcessorResult & GroundingDinoFeatureExtractorResultProps} GroundingDinoFeatureExtractorResult
5
+ */
6
+ export class GroundingDinoImageProcessor extends ImageProcessor {
7
+ /**
8
+ * Calls the feature extraction process on an array of images, preprocesses
9
+ * each image, and concatenates the resulting features into a single Tensor.
10
+ * @param {import('../../utils/image.js').RawImage[]} images The image(s) to extract features from.
11
+ * @returns {Promise<GroundingDinoFeatureExtractorResult>} An object containing the concatenated pixel values of the preprocessed images.
12
+ */
13
+ _call(images: import("../../utils/image.js").RawImage[]): Promise<GroundingDinoFeatureExtractorResult>;
14
+ }
15
+ export type GroundingDinoFeatureExtractorResultProps = {
16
+ pixel_mask: import("../../utils/tensor.js").Tensor;
17
+ };
18
+ export type GroundingDinoFeatureExtractorResult = import("../../base/image_processors_utils.js").ImageProcessorResult & GroundingDinoFeatureExtractorResultProps;
19
+ import { ImageProcessor } from "../../base/image_processors_utils.js";
20
+ //# sourceMappingURL=image_processing_grounding_dino.d.ts.map
@@ -0,0 +1,27 @@
1
+ export class GroundingDinoProcessor extends Processor {
2
+ static tokenizer_class: typeof AutoTokenizer;
3
+ static image_processor_class: typeof AutoImageProcessor;
4
+ /**
5
+ * @typedef {import('../../utils/image.js').RawImage} RawImage
6
+ */
7
+ /**
8
+ *
9
+ * @param {RawImage|RawImage[]|RawImage[][]} images
10
+ * @param {string|string[]} text
11
+ * @returns {Promise<any>}
12
+ */
13
+ _call(images: import("../../utils/image.js").RawImage | import("../../utils/image.js").RawImage[] | import("../../utils/image.js").RawImage[][], text: string | string[], options?: {}): Promise<any>;
14
+ post_process_grounded_object_detection(outputs: any, input_ids: any, { box_threshold, text_threshold, target_sizes }?: {
15
+ box_threshold?: number;
16
+ text_threshold?: number;
17
+ target_sizes?: any;
18
+ }): {
19
+ scores: any[];
20
+ boxes: any[];
21
+ labels: string[];
22
+ }[];
23
+ }
24
+ import { Processor } from "../../base/processing_utils.js";
25
+ import { AutoTokenizer } from "../../tokenizers.js";
26
+ import { AutoImageProcessor } from "../auto/image_processing_auto.js";
27
+ //# sourceMappingURL=processing_grounding_dino.d.ts.map
@@ -0,0 +1,40 @@
1
+ export class Idefics3ImageProcessor extends ImageProcessor {
2
+ constructor(config: any);
3
+ do_image_splitting: any;
4
+ max_image_size: any;
5
+ /**
6
+ * @typedef {import('../../utils/image.js').RawImage} RawImage
7
+ * @typedef {import('../../utils/tensor.js').Tensor} Tensor
8
+ */
9
+ /**
10
+ * Calculate size to resize images to, to be multiples of `vision_encoder_max_size` while preserving the aspect ratio.
11
+ * @param {Tensor} pixel_values Tensor of the image to resize.
12
+ * @param {number} vision_encoder_max_size Maximum size of the output image. If the image is larger than this size,
13
+ * it will be split into patches of this size, and the original image will be concatenated with the patches, resized to max_size.
14
+ */
15
+ get_resize_for_vision_encoder(pixel_values: import("../../utils/tensor.js").Tensor, vision_encoder_max_size: number): {
16
+ height: number;
17
+ width: number;
18
+ };
19
+ /** @param {RawImage|RawImage[]|RawImage[][]} images */
20
+ _call(images: import("../../utils/image.js").RawImage | import("../../utils/image.js").RawImage[] | import("../../utils/image.js").RawImage[][], { do_image_splitting, return_row_col_info, }?: {
21
+ do_image_splitting?: any;
22
+ return_row_col_info?: boolean;
23
+ }): Promise<{
24
+ rows?: any[][];
25
+ cols?: any[][];
26
+ pixel_values: import("../../utils/tensor.js").Tensor;
27
+ pixel_attention_mask: import("../../utils/tensor.js").Tensor;
28
+ original_sizes: import("../../base/image_processors_utils.js").HeightWidth[];
29
+ reshaped_input_sizes: import("../../base/image_processors_utils.js").HeightWidth[];
30
+ }>;
31
+ split_image(pixel_values: any, { longest_edge }: {
32
+ longest_edge: any;
33
+ }): Promise<{
34
+ frames: any[];
35
+ num_splits_h: number;
36
+ num_splits_w: number;
37
+ }>;
38
+ }
39
+ import { ImageProcessor } from "../../base/image_processors_utils.js";
40
+ //# sourceMappingURL=image_processing_idefics3.d.ts.map
@@ -0,0 +1,19 @@
1
+ export class Idefics3Processor extends Processor {
2
+ static image_processor_class: typeof AutoImageProcessor;
3
+ static tokenizer_class: typeof AutoTokenizer;
4
+ fake_image_token: string;
5
+ image_token: string;
6
+ global_img_token: string;
7
+ /**
8
+ *
9
+ * @param {string|string[]} text
10
+ * @param {RawImage|RawImage[]|RawImage[][]} images
11
+ * @returns {Promise<any>}
12
+ */
13
+ _call(text: string | string[], images?: RawImage | RawImage[] | RawImage[][], options?: {}): Promise<any>;
14
+ }
15
+ import { Processor } from "../../base/processing_utils.js";
16
+ import { RawImage } from "../../utils/image.js";
17
+ import { AutoImageProcessor } from "../auto/image_processing_auto.js";
18
+ import { AutoTokenizer } from "../../tokenizers.js";
19
+ //# sourceMappingURL=processing_idefics3.d.ts.map
@@ -0,0 +1,44 @@
1
+ export * from "./beit/image_processing_beit.js";
2
+ export * from "./bit/image_processing_bit.js";
3
+ export * from "./chinese_clip/image_processing_chinese_clip.js";
4
+ export * from "./clip/image_processing_clip.js";
5
+ export * from "./convnext/image_processing_convnext.js";
6
+ export * from "./deit/image_processing_deit.js";
7
+ export * from "./detr/image_processing_detr.js";
8
+ export * from "./dinov3_vit/image_processing_dinov3_vit.js";
9
+ export * from "./donut/image_processing_donut.js";
10
+ export * from "./dpt/image_processing_dpt.js";
11
+ export * from "./efficientnet/image_processing_efficientnet.js";
12
+ export * from "./glpn/image_processing_glpn.js";
13
+ export * from "./grounding_dino/image_processing_grounding_dino.js";
14
+ export * from "./idefics3/image_processing_idefics3.js";
15
+ export * from "./janus/image_processing_janus.js";
16
+ export * from "./jina_clip/image_processing_jina_clip.js";
17
+ export * from "./llava_onevision/image_processing_llava_onevision.js";
18
+ export * from "./mask2former/image_processing_mask2former.js";
19
+ export * from "./maskformer/image_processing_maskformer.js";
20
+ export * from "./mobilenet_v1/image_processing_mobilenet_v1.js";
21
+ export * from "./mobilenet_v2/image_processing_mobilenet_v2.js";
22
+ export * from "./mobilenet_v3/image_processing_mobilenet_v3.js";
23
+ export * from "./mobilenet_v4/image_processing_mobilenet_v4.js";
24
+ export * from "./mobilevit/image_processing_mobilevit.js";
25
+ export * from "./nougat/image_processing_nougat.js";
26
+ export * from "./owlv2/image_processing_owlv2.js";
27
+ export * from "./owlvit/image_processing_owlvit.js";
28
+ export * from "./phi3_v/image_processing_phi3_v.js";
29
+ export * from "./pixtral/image_processing_pixtral.js";
30
+ export * from "./pvt/image_processing_pvt.js";
31
+ export * from "./qwen2_vl/image_processing_qwen2_vl.js";
32
+ export * from "./rt_detr/image_processing_rt_detr.js";
33
+ export * from "./sam/image_processing_sam.js";
34
+ export * from "./sam2/image_processing_sam2.js";
35
+ export * from "./sam3/image_processing_sam3.js";
36
+ export * from "./segformer/image_processing_segformer.js";
37
+ export * from "./siglip/image_processing_siglip.js";
38
+ export * from "./smolvlm/image_processing_smolvlm.js";
39
+ export * from "./swin2sr/image_processing_swin2sr.js";
40
+ export * from "./vit/image_processing_vit.js";
41
+ export * from "./vitmatte/image_processing_vitmatte.js";
42
+ export * from "./vitpose/image_processing_vitpose.js";
43
+ export * from "./yolos/image_processing_yolos.js";
44
+ //# sourceMappingURL=image_processors.d.ts.map
@@ -0,0 +1,7 @@
1
+ export class VLMImageProcessor extends ImageProcessor {
2
+ constructor(config: any);
3
+ constant_values: any;
4
+ pad_image(pixelData: any, imgDims: any, padSize: any, options: any): [Float32Array<ArrayBufferLike>, number[]];
5
+ }
6
+ import { ImageProcessor } from "../../base/image_processors_utils.js";
7
+ //# sourceMappingURL=image_processing_janus.d.ts.map
@@ -0,0 +1,77 @@
1
+ export class VLChatProcessor extends Processor {
2
+ static image_processor_class: typeof AutoImageProcessor;
3
+ static tokenizer_class: typeof AutoTokenizer;
4
+ constructor(config: any, components: any, chat_template: any);
5
+ image_tag: any;
6
+ image_start_tag: any;
7
+ image_end_tag: any;
8
+ num_image_tokens: any;
9
+ /**
10
+ * @typedef {Object} MultimodalMessageProperties Additional properties for multimodal messages.
11
+ * @property {(RawImage | string | URL)[]} [images] The images in the message.
12
+ * @typedef {(import('../../tokenizers.js').Message & MultimodalMessageProperties)[]} MultimodalConversation The conversation possibly containing multimodal inputs.
13
+ */
14
+ /**
15
+ * @typedef {Object} VLCChatProcessorResult The processed input.
16
+ * @property {Tensor} input_ids The input IDs.
17
+ * @property {Tensor} attention_mask The attention mask.
18
+ * @property {Tensor} images_seq_mask The image sequence mask.
19
+ * @property {Tensor} images_emb_mask The image embedding mask.
20
+ */
21
+ /**
22
+ * @param {MultimodalConversation} conversation The chat messages to process.
23
+ * @param {Object} options Additional options for processing.
24
+ * @param {RawImage|RawImage[]} [options.images] The images to process, if not set in the conversation.
25
+ * @param {string} [options.chat_template="default"] The chat template to use.
26
+ * @returns {Promise<VLCChatProcessorResult | VLCChatProcessorResult & import('../../base/image_processors_utils.js').ImageProcessorResult>} The processed input.
27
+ */
28
+ _call(conversation: (import("../../tokenizers.js").Message & {
29
+ /**
30
+ * The images in the message.
31
+ */
32
+ images?: (RawImage | string | URL)[];
33
+ })[], { images, chat_template, }?: {
34
+ images?: RawImage | RawImage[];
35
+ chat_template?: string;
36
+ }): Promise<{
37
+ /**
38
+ * The input IDs.
39
+ */
40
+ input_ids: Tensor;
41
+ /**
42
+ * The attention mask.
43
+ */
44
+ attention_mask: Tensor;
45
+ /**
46
+ * The image sequence mask.
47
+ */
48
+ images_seq_mask: Tensor;
49
+ /**
50
+ * The image embedding mask.
51
+ */
52
+ images_emb_mask: Tensor;
53
+ } | ({
54
+ /**
55
+ * The input IDs.
56
+ */
57
+ input_ids: Tensor;
58
+ /**
59
+ * The attention mask.
60
+ */
61
+ attention_mask: Tensor;
62
+ /**
63
+ * The image sequence mask.
64
+ */
65
+ images_seq_mask: Tensor;
66
+ /**
67
+ * The image embedding mask.
68
+ */
69
+ images_emb_mask: Tensor;
70
+ } & import("../../base/image_processors_utils.js").ImageProcessorResult)>;
71
+ }
72
+ import { Processor } from "../../base/processing_utils.js";
73
+ import { RawImage } from "../../utils/image.js";
74
+ import { Tensor } from "../../utils/tensor.js";
75
+ import { AutoImageProcessor } from "../auto/image_processing_auto.js";
76
+ import { AutoTokenizer } from "../../tokenizers.js";
77
+ //# sourceMappingURL=processing_janus.d.ts.map
@@ -0,0 +1,5 @@
1
+ export class JinaCLIPImageProcessor extends ImageProcessor {
2
+ constructor(config: any);
3
+ }
4
+ import { ImageProcessor } from "../../base/image_processors_utils.js";
5
+ //# sourceMappingURL=image_processing_jina_clip.d.ts.map
@@ -0,0 +1,9 @@
1
+ export class JinaCLIPProcessor extends Processor {
2
+ static tokenizer_class: typeof AutoTokenizer;
3
+ static image_processor_class: typeof AutoImageProcessor;
4
+ _call(text?: any, images?: any, kwargs?: {}): Promise<any>;
5
+ }
6
+ import { Processor } from "../../base/processing_utils.js";
7
+ import { AutoTokenizer } from "../../tokenizers.js";
8
+ import { AutoImageProcessor } from "../auto/image_processing_auto.js";
9
+ //# sourceMappingURL=processing_jina_clip.d.ts.map