@min-pack/tfjs-node 2.17.2 → 3.8.1-patch.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +4 -2
- package/addon-node_modules/onnxruntime-node/bin/{napi-v3/linux/x64/libonnxruntime.so.1.14.0 → napi-v6/linux/arm64/libonnxruntime.so.1} +0 -0
- package/addon-node_modules/onnxruntime-node/bin/napi-v6/linux/arm64/onnxruntime_binding.node +0 -0
- package/addon-node_modules/onnxruntime-node/bin/napi-v6/linux/x64/libonnxruntime.so.1 +0 -0
- package/addon-node_modules/onnxruntime-node/bin/napi-v6/linux/x64/onnxruntime_binding.node +0 -0
- package/index.d.ts +2 -0
- package/index.js +7102 -3371
- package/package.json +1 -10
- package/tfjs-types/backends/onnx.d.ts +37 -0
- package/tfjs-types/base/feature_extraction_utils.d.ts +41 -0
- package/tfjs-types/base/image_processors_utils.d.ts +332 -0
- package/tfjs-types/base/processing_utils.d.ts +89 -0
- package/tfjs-types/configs.d.ts +93 -0
- package/tfjs-types/env.d.ts +112 -0
- package/tfjs-types/generation/configuration_utils.d.ts +326 -0
- package/tfjs-types/generation/logits_process.d.ts +364 -0
- package/tfjs-types/generation/logits_sampler.d.ts +51 -0
- package/tfjs-types/generation/parameters.d.ts +47 -0
- package/tfjs-types/generation/stopping_criteria.d.ts +81 -0
- package/tfjs-types/generation/streamers.d.ts +88 -0
- package/tfjs-types/models/audio_spectrogram_transformer/feature_extraction_audio_spectrogram_transformer.d.ts +25 -0
- package/tfjs-types/models/auto/feature_extraction_auto.d.ts +5 -0
- package/tfjs-types/models/auto/image_processing_auto.d.ts +5 -0
- package/tfjs-types/models/auto/processing_auto.d.ts +39 -0
- package/tfjs-types/models/beit/image_processing_beit.d.ts +4 -0
- package/tfjs-types/models/bit/image_processing_bit.d.ts +4 -0
- package/tfjs-types/models/chinese_clip/image_processing_chinese_clip.d.ts +4 -0
- package/tfjs-types/models/clap/feature_extraction_clap.d.ts +57 -0
- package/tfjs-types/models/clip/image_processing_clip.d.ts +6 -0
- package/tfjs-types/models/convnext/image_processing_convnext.d.ts +12 -0
- package/tfjs-types/models/dac/feature_extraction_dac.d.ts +4 -0
- package/tfjs-types/models/deit/image_processing_deit.d.ts +6 -0
- package/tfjs-types/models/detr/image_processing_detr.d.ts +42 -0
- package/tfjs-types/models/dinov3_vit/image_processing_dinov3_vit.d.ts +4 -0
- package/tfjs-types/models/donut/image_processing_donut.d.ts +7 -0
- package/tfjs-types/models/dpt/image_processing_dpt.d.ts +6 -0
- package/tfjs-types/models/efficientnet/image_processing_efficientnet.d.ts +6 -0
- package/tfjs-types/models/encodec/feature_extraction_encodec.d.ts +13 -0
- package/tfjs-types/models/feature_extractors.d.ts +16 -0
- package/tfjs-types/models/florence2/processing_florence2.d.ts +39 -0
- package/tfjs-types/models/gemma3n/feature_extraction_gemma3n.d.ts +35 -0
- package/tfjs-types/models/gemma3n/processing_gemma3n.d.ts +31 -0
- package/tfjs-types/models/glpn/image_processing_glpn.d.ts +4 -0
- package/tfjs-types/models/grounding_dino/image_processing_grounding_dino.d.ts +20 -0
- package/tfjs-types/models/grounding_dino/processing_grounding_dino.d.ts +27 -0
- package/tfjs-types/models/idefics3/image_processing_idefics3.d.ts +40 -0
- package/tfjs-types/models/idefics3/processing_idefics3.d.ts +19 -0
- package/tfjs-types/models/image_processors.d.ts +44 -0
- package/tfjs-types/models/janus/image_processing_janus.d.ts +7 -0
- package/tfjs-types/models/janus/processing_janus.d.ts +77 -0
- package/tfjs-types/models/jina_clip/image_processing_jina_clip.d.ts +5 -0
- package/tfjs-types/models/jina_clip/processing_jina_clip.d.ts +9 -0
- package/tfjs-types/models/llava/processing_llava.d.ts +12 -0
- package/tfjs-types/models/llava_onevision/image_processing_llava_onevision.d.ts +4 -0
- package/tfjs-types/models/mask2former/image_processing_mask2former.d.ts +4 -0
- package/tfjs-types/models/maskformer/image_processing_maskformer.d.ts +22 -0
- package/tfjs-types/models/mgp_str/processing_mgp_str.d.ts +64 -0
- package/tfjs-types/models/mobilenet_v1/image_processing_mobilenet_v1.d.ts +6 -0
- package/tfjs-types/models/mobilenet_v2/image_processing_mobilenet_v2.d.ts +6 -0
- package/tfjs-types/models/mobilenet_v3/image_processing_mobilenet_v3.d.ts +6 -0
- package/tfjs-types/models/mobilenet_v4/image_processing_mobilenet_v4.d.ts +6 -0
- package/tfjs-types/models/mobilevit/image_processing_mobilevit.d.ts +6 -0
- package/tfjs-types/models/moonshine/feature_extraction_moonshine.d.ts +13 -0
- package/tfjs-types/models/moonshine/processing_moonshine.d.ts +17 -0
- package/tfjs-types/models/nougat/image_processing_nougat.d.ts +4 -0
- package/tfjs-types/models/owlv2/image_processing_owlv2.d.ts +4 -0
- package/tfjs-types/models/owlvit/image_processing_owlvit.d.ts +10 -0
- package/tfjs-types/models/owlvit/processing_owlvit.d.ts +8 -0
- package/tfjs-types/models/paligemma/processing_paligemma.d.ts +12 -0
- package/tfjs-types/models/parakeet/feature_extraction_parakeet.d.ts +22 -0
- package/tfjs-types/models/phi3_v/image_processing_phi3_v.d.ts +17 -0
- package/tfjs-types/models/phi3_v/processing_phi3_v.d.ts +21 -0
- package/tfjs-types/models/pixtral/image_processing_pixtral.d.ts +4 -0
- package/tfjs-types/models/pixtral/processing_pixtral.d.ts +12 -0
- package/tfjs-types/models/processors.d.ts +25 -0
- package/tfjs-types/models/pvt/image_processing_pvt.d.ts +4 -0
- package/tfjs-types/models/pyannote/feature_extraction_pyannote.d.ts +31 -0
- package/tfjs-types/models/pyannote/processing_pyannote.d.ts +19 -0
- package/tfjs-types/models/qwen2_vl/image_processing_qwen2_vl.d.ts +11 -0
- package/tfjs-types/models/qwen2_vl/processing_qwen2_vl.d.ts +17 -0
- package/tfjs-types/models/rt_detr/image_processing_rt_detr.d.ts +8 -0
- package/tfjs-types/models/sam/image_processing_sam.d.ts +103 -0
- package/tfjs-types/models/sam/processing_sam.d.ts +9 -0
- package/tfjs-types/models/sam2/image_processing_sam2.d.ts +2 -0
- package/tfjs-types/models/sam2/processing_sam2.d.ts +6 -0
- package/tfjs-types/models/sam3/image_processing_sam3.d.ts +2 -0
- package/tfjs-types/models/sapiens/image_processing_sapiens.d.ts +10 -0
- package/tfjs-types/models/seamless_m4t/feature_extraction_seamless_m4t.d.ts +34 -0
- package/tfjs-types/models/segformer/image_processing_segformer.d.ts +10 -0
- package/tfjs-types/models/siglip/image_processing_siglip.d.ts +4 -0
- package/tfjs-types/models/smolvlm/image_processing_smolvlm.d.ts +2 -0
- package/tfjs-types/models/smolvlm/processing_smolvlm.d.ts +2 -0
- package/tfjs-types/models/snac/feature_extraction_snac.d.ts +4 -0
- package/tfjs-types/models/speecht5/feature_extraction_speecht5.d.ts +4 -0
- package/tfjs-types/models/speecht5/processing_speecht5.d.ts +14 -0
- package/tfjs-types/models/swin2sr/image_processing_swin2sr.d.ts +5 -0
- package/tfjs-types/models/ultravox/processing_ultravox.d.ts +16 -0
- package/tfjs-types/models/vit/image_processing_vit.d.ts +6 -0
- package/tfjs-types/models/vitmatte/image_processing_vitmatte.d.ts +12 -0
- package/tfjs-types/models/vitpose/image_processing_vitpose.d.ts +26 -0
- package/tfjs-types/models/voxtral/processing_voxtral.d.ts +16 -0
- package/tfjs-types/models/wav2vec2/feature_extraction_wav2vec2.d.ts +19 -0
- package/tfjs-types/models/wav2vec2/processing_wav2vec2.d.ts +14 -0
- package/tfjs-types/models/wav2vec2_with_lm/processing_wav2vec2_with_lm.d.ts +14 -0
- package/tfjs-types/models/wespeaker/feature_extraction_wespeaker.d.ts +23 -0
- package/tfjs-types/models/whisper/common_whisper.d.ts +8 -0
- package/tfjs-types/models/whisper/feature_extraction_whisper.d.ts +23 -0
- package/tfjs-types/models/whisper/generation_whisper.d.ts +76 -0
- package/tfjs-types/models/whisper/processing_whisper.d.ts +17 -0
- package/tfjs-types/models/yolos/image_processing_yolos.d.ts +10 -0
- package/tfjs-types/models.d.ts +4396 -0
- package/tfjs-types/ops/registry.d.ts +13 -0
- package/tfjs-types/pipelines.d.ts +2433 -0
- package/tfjs-types/tokenizers.d.ts +1002 -0
- package/tfjs-types/transformers.d.ts +27 -0
- package/tfjs-types/utils/audio.d.ts +160 -0
- package/tfjs-types/utils/constants.d.ts +8 -0
- package/tfjs-types/utils/core.d.ts +231 -0
- package/tfjs-types/utils/data-structures.d.ts +294 -0
- package/tfjs-types/utils/devices.d.ts +18 -0
- package/tfjs-types/utils/dtypes.d.ts +20 -0
- package/tfjs-types/utils/generic.d.ts +11 -0
- package/tfjs-types/utils/hub.d.ts +175 -0
- package/tfjs-types/utils/image.d.ts +141 -0
- package/tfjs-types/utils/maths.d.ts +282 -0
- package/tfjs-types/utils/tensor.d.ts +490 -0
- package/tfjs-types/utils/video.d.ts +37 -0
- package/addon-node_modules/onnxruntime-node/bin/napi-v3/linux/arm64/libonnxruntime.so.1.14.0 +0 -0
- package/addon-node_modules/onnxruntime-node/bin/napi-v3/linux/arm64/onnxruntime_binding.node +0 -0
- package/addon-node_modules/onnxruntime-node/bin/napi-v3/linux/x64/onnxruntime_binding.node +0 -0
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
export class SegformerImageProcessor extends ImageProcessor {
|
|
2
|
+
post_process_semantic_segmentation(outputs: any, target_sizes?: [number, number][]): {
|
|
3
|
+
segmentation: import("../../transformers.js").Tensor;
|
|
4
|
+
labels: number[];
|
|
5
|
+
}[];
|
|
6
|
+
}
|
|
7
|
+
export class SegformerFeatureExtractor extends SegformerImageProcessor {
|
|
8
|
+
}
|
|
9
|
+
import { ImageProcessor } from "../../base/image_processors_utils.js";
|
|
10
|
+
//# sourceMappingURL=image_processing_segformer.d.ts.map
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
export class SpeechT5Processor extends Processor {
|
|
2
|
+
static tokenizer_class: typeof AutoTokenizer;
|
|
3
|
+
static feature_extractor_class: typeof AutoFeatureExtractor;
|
|
4
|
+
/**
|
|
5
|
+
* Calls the feature_extractor function with the given input.
|
|
6
|
+
* @param {any} input The input to extract features from.
|
|
7
|
+
* @returns {Promise<any>} A Promise that resolves with the extracted features.
|
|
8
|
+
*/
|
|
9
|
+
_call(input: any): Promise<any>;
|
|
10
|
+
}
|
|
11
|
+
import { Processor } from "../../base/processing_utils.js";
|
|
12
|
+
import { AutoTokenizer } from "../../tokenizers.js";
|
|
13
|
+
import { AutoFeatureExtractor } from "../auto/feature_extraction_auto.js";
|
|
14
|
+
//# sourceMappingURL=processing_speecht5.d.ts.map
|
|
@@ -0,0 +1,5 @@
|
|
|
1
|
+
export class Swin2SRImageProcessor extends ImageProcessor {
|
|
2
|
+
pad_image(pixelData: any, imgDims: any, padSize: any, options?: {}): [Float32Array<ArrayBufferLike>, number[]];
|
|
3
|
+
}
|
|
4
|
+
import { ImageProcessor } from "../../base/image_processors_utils.js";
|
|
5
|
+
//# sourceMappingURL=image_processing_swin2sr.d.ts.map
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Represents a UltravoxProcessor that extracts features from an audio input.
|
|
3
|
+
*/
|
|
4
|
+
export class UltravoxProcessor extends Processor {
|
|
5
|
+
static tokenizer_class: typeof AutoTokenizer;
|
|
6
|
+
static feature_extractor_class: typeof AutoFeatureExtractor;
|
|
7
|
+
/**
|
|
8
|
+
* @param {string} text The text input to process.
|
|
9
|
+
* @param {Float32Array} audio The audio input to process.
|
|
10
|
+
*/
|
|
11
|
+
_call(text: string, audio?: Float32Array, kwargs?: {}): Promise<any>;
|
|
12
|
+
}
|
|
13
|
+
import { Processor } from "../../base/processing_utils.js";
|
|
14
|
+
import { AutoTokenizer } from "../../tokenizers.js";
|
|
15
|
+
import { AutoFeatureExtractor } from "../auto/feature_extraction_auto.js";
|
|
16
|
+
//# sourceMappingURL=processing_ultravox.d.ts.map
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
export class VitMatteImageProcessor extends ImageProcessor {
|
|
2
|
+
/**
|
|
3
|
+
* Calls the feature extraction process on an array of images, preprocesses
|
|
4
|
+
* each image, and concatenates the resulting features into a single Tensor.
|
|
5
|
+
* @param {import("../../utils/image.js").RawImage[]} images The image(s) to extract features from.
|
|
6
|
+
* @param {import("../../utils/image.js").RawImage[]} trimaps The trimaps(s) to extract features from.
|
|
7
|
+
* @returns {Promise<import("../../base/image_processors_utils.js").ImageProcessorResult>} An object containing the concatenated pixel values of the preprocessed images.
|
|
8
|
+
*/
|
|
9
|
+
_call(images: import("../../utils/image.js").RawImage[], trimaps: import("../../utils/image.js").RawImage[]): Promise<import("../../base/image_processors_utils.js").ImageProcessorResult>;
|
|
10
|
+
}
|
|
11
|
+
import { ImageProcessor } from "../../base/image_processors_utils.js";
|
|
12
|
+
//# sourceMappingURL=image_processing_vitmatte.d.ts.map
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
export class VitPoseImageProcessor extends ImageProcessor {
|
|
2
|
+
/**
|
|
3
|
+
* Transform the heatmaps into keypoint predictions and transform them back to the image.
|
|
4
|
+
* NOTE: This is a naive implementation and does not include advanced post-processing techniques,
|
|
5
|
+
* so the results may not be as accurate as the original implementation.
|
|
6
|
+
* @param {import('../../utils/tensor.js').Tensor} outputs The model outputs.
|
|
7
|
+
* @param {[number, number, number, number][][]} boxes List or array of bounding boxes for each image.
|
|
8
|
+
* Each box should be a list of 4 floats representing the bounding box coordinates in COCO format (top_left_x, top_left_y, width, height).
|
|
9
|
+
* @returns {{
|
|
10
|
+
* bbox: [number, number, number, number],
|
|
11
|
+
* scores: number[],
|
|
12
|
+
* labels: number[],
|
|
13
|
+
* keypoints: [number, number][]
|
|
14
|
+
* }[][]} List of keypoints predictions for each image.
|
|
15
|
+
*/
|
|
16
|
+
post_process_pose_estimation(outputs: import("../../utils/tensor.js").Tensor, boxes: [number, number, number, number][][], { threshold, }?: {
|
|
17
|
+
threshold?: any;
|
|
18
|
+
}): {
|
|
19
|
+
bbox: [number, number, number, number];
|
|
20
|
+
scores: number[];
|
|
21
|
+
labels: number[];
|
|
22
|
+
keypoints: [number, number][];
|
|
23
|
+
}[][];
|
|
24
|
+
}
|
|
25
|
+
import { ImageProcessor } from "../../base/image_processors_utils.js";
|
|
26
|
+
//# sourceMappingURL=image_processing_vitpose.d.ts.map
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Represents a VoxtralProcessor that extracts features from an audio input.
|
|
3
|
+
*/
|
|
4
|
+
export class VoxtralProcessor extends Processor {
|
|
5
|
+
static tokenizer_class: typeof AutoTokenizer;
|
|
6
|
+
static feature_extractor_class: typeof AutoFeatureExtractor;
|
|
7
|
+
/**
|
|
8
|
+
* @param {string} text The text input to process.
|
|
9
|
+
* @param {Float32Array|Float32Array[]} audio The audio input(s) to process.
|
|
10
|
+
*/
|
|
11
|
+
_call(text: string, audio?: Float32Array | Float32Array[], kwargs?: {}): Promise<any>;
|
|
12
|
+
}
|
|
13
|
+
import { Processor } from "../../base/processing_utils.js";
|
|
14
|
+
import { AutoTokenizer } from "../../tokenizers.js";
|
|
15
|
+
import { AutoFeatureExtractor } from "../auto/feature_extraction_auto.js";
|
|
16
|
+
//# sourceMappingURL=processing_voxtral.d.ts.map
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
export class Wav2Vec2FeatureExtractor extends FeatureExtractor {
|
|
2
|
+
/**
|
|
3
|
+
* @param {Float32Array} input_values
|
|
4
|
+
* @returns {Float32Array}
|
|
5
|
+
*/
|
|
6
|
+
_zero_mean_unit_var_norm(input_values: Float32Array): Float32Array;
|
|
7
|
+
/**
|
|
8
|
+
* Asynchronously extracts features from a given audio using the provided configuration.
|
|
9
|
+
* @param {Float32Array|Float64Array} audio The audio data as a Float32Array/Float64Array.
|
|
10
|
+
* @returns {Promise<{ input_values: Tensor; attention_mask: Tensor }>} A Promise resolving to an object containing the extracted input features and attention mask as Tensors.
|
|
11
|
+
*/
|
|
12
|
+
_call(audio: Float32Array | Float64Array): Promise<{
|
|
13
|
+
input_values: Tensor;
|
|
14
|
+
attention_mask: Tensor;
|
|
15
|
+
}>;
|
|
16
|
+
}
|
|
17
|
+
import { FeatureExtractor } from "../../base/feature_extraction_utils.js";
|
|
18
|
+
import { Tensor } from "../../utils/tensor.js";
|
|
19
|
+
//# sourceMappingURL=feature_extraction_wav2vec2.d.ts.map
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
export class Wav2Vec2Processor extends Processor {
|
|
2
|
+
static tokenizer_class: typeof AutoTokenizer;
|
|
3
|
+
static feature_extractor_class: typeof AutoFeatureExtractor;
|
|
4
|
+
/**
|
|
5
|
+
* Calls the feature_extractor function with the given audio input.
|
|
6
|
+
* @param {any} audio The audio input to extract features from.
|
|
7
|
+
* @returns {Promise<any>} A Promise that resolves with the extracted features.
|
|
8
|
+
*/
|
|
9
|
+
_call(audio: any): Promise<any>;
|
|
10
|
+
}
|
|
11
|
+
import { Processor } from "../../base/processing_utils.js";
|
|
12
|
+
import { AutoTokenizer } from "../../tokenizers.js";
|
|
13
|
+
import { AutoFeatureExtractor } from "../auto/feature_extraction_auto.js";
|
|
14
|
+
//# sourceMappingURL=processing_wav2vec2.d.ts.map
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
export class Wav2Vec2ProcessorWithLM extends Processor {
|
|
2
|
+
static tokenizer_class: typeof AutoTokenizer;
|
|
3
|
+
static feature_extractor_class: typeof AutoFeatureExtractor;
|
|
4
|
+
/**
|
|
5
|
+
* Calls the feature_extractor function with the given audio input.
|
|
6
|
+
* @param {any} audio The audio input to extract features from.
|
|
7
|
+
* @returns {Promise<any>} A Promise that resolves with the extracted features.
|
|
8
|
+
*/
|
|
9
|
+
_call(audio: any): Promise<any>;
|
|
10
|
+
}
|
|
11
|
+
import { Processor } from "../../base/processing_utils.js";
|
|
12
|
+
import { AutoTokenizer } from "../../tokenizers.js";
|
|
13
|
+
import { AutoFeatureExtractor } from "../auto/feature_extraction_auto.js";
|
|
14
|
+
//# sourceMappingURL=processing_wav2vec2_with_lm.d.ts.map
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
export class WeSpeakerFeatureExtractor extends FeatureExtractor {
|
|
2
|
+
constructor(config: any);
|
|
3
|
+
mel_filters: number[][];
|
|
4
|
+
window: Float64Array<ArrayBufferLike>;
|
|
5
|
+
min_num_frames: any;
|
|
6
|
+
/**
|
|
7
|
+
* Computes the log-Mel spectrogram of the provided audio waveform.
|
|
8
|
+
* @param {Float32Array|Float64Array} waveform The audio waveform to process.
|
|
9
|
+
* @returns {Promise<Tensor>} An object containing the log-Mel spectrogram data as a Float32Array and its dimensions as an array of numbers.
|
|
10
|
+
*/
|
|
11
|
+
_extract_fbank_features(waveform: Float32Array | Float64Array): Promise<Tensor>;
|
|
12
|
+
/**
|
|
13
|
+
* Asynchronously extracts features from a given audio using the provided configuration.
|
|
14
|
+
* @param {Float32Array|Float64Array} audio The audio data as a Float32Array/Float64Array.
|
|
15
|
+
* @returns {Promise<{ input_features: Tensor }>} A Promise resolving to an object containing the extracted input features as a Tensor.
|
|
16
|
+
*/
|
|
17
|
+
_call(audio: Float32Array | Float64Array): Promise<{
|
|
18
|
+
input_features: Tensor;
|
|
19
|
+
}>;
|
|
20
|
+
}
|
|
21
|
+
import { FeatureExtractor } from '../../base/feature_extraction_utils.js';
|
|
22
|
+
import { Tensor } from '../../utils/tensor.js';
|
|
23
|
+
//# sourceMappingURL=feature_extraction_wespeaker.d.ts.map
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @param {string} language The language name or code
|
|
3
|
+
* @returns {string} The language code
|
|
4
|
+
*/
|
|
5
|
+
export function whisper_language_to_code(language: string): string;
|
|
6
|
+
export const WHISPER_LANGUAGE_MAPPING: Map<any, any>;
|
|
7
|
+
export const WHISPER_TO_LANGUAGE_CODE_MAPPING: Map<any, any>;
|
|
8
|
+
//# sourceMappingURL=common_whisper.d.ts.map
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
export class WhisperFeatureExtractor extends FeatureExtractor {
|
|
2
|
+
constructor(config: any);
|
|
3
|
+
window: Float64Array<ArrayBufferLike>;
|
|
4
|
+
/**
|
|
5
|
+
* Computes the log-Mel spectrogram of the provided audio waveform.
|
|
6
|
+
* @param {Float32Array|Float64Array} waveform The audio waveform to process.
|
|
7
|
+
* @returns {Promise<Tensor>} An object containing the log-Mel spectrogram data as a Float32Array and its dimensions as an array of numbers.
|
|
8
|
+
*/
|
|
9
|
+
_extract_fbank_features(waveform: Float32Array | Float64Array): Promise<Tensor>;
|
|
10
|
+
/**
|
|
11
|
+
* Asynchronously extracts features from a given audio using the provided configuration.
|
|
12
|
+
* @param {Float32Array|Float64Array} audio The audio data as a Float32Array/Float64Array.
|
|
13
|
+
* @returns {Promise<{ input_features: Tensor }>} A Promise resolving to an object containing the extracted input features as a Tensor.
|
|
14
|
+
*/
|
|
15
|
+
_call(audio: Float32Array | Float64Array, { max_length, }?: {
|
|
16
|
+
max_length?: any;
|
|
17
|
+
}): Promise<{
|
|
18
|
+
input_features: Tensor;
|
|
19
|
+
}>;
|
|
20
|
+
}
|
|
21
|
+
import { FeatureExtractor } from '../../base/feature_extraction_utils.js';
|
|
22
|
+
import { Tensor } from '../../utils/tensor.js';
|
|
23
|
+
//# sourceMappingURL=feature_extraction_whisper.d.ts.map
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
export class WhisperGenerationConfig extends GenerationConfig {
|
|
2
|
+
/**
|
|
3
|
+
* Whether to return the timestamps with the text. This enables the `WhisperTimestampsLogitsProcessor`.
|
|
4
|
+
* @type {boolean}
|
|
5
|
+
*/
|
|
6
|
+
return_timestamps: boolean;
|
|
7
|
+
/**
|
|
8
|
+
* Whether to return token-level timestamps
|
|
9
|
+
* with the text. This can be used with or without the `return_timestamps` option. To get word-level
|
|
10
|
+
* timestamps, use the tokenizer to group the tokens into words.
|
|
11
|
+
* @type {boolean}
|
|
12
|
+
*/
|
|
13
|
+
return_token_timestamps: boolean;
|
|
14
|
+
/**
|
|
15
|
+
* The number of audio frames available in this chunk. This is only used generating word-level timestamps.
|
|
16
|
+
* @type {number}
|
|
17
|
+
*/
|
|
18
|
+
num_frames: number;
|
|
19
|
+
/**
|
|
20
|
+
* Alignment heads to predict word-level timestamps. This is a list of [layer, head] pairs that
|
|
21
|
+
* select the cross-attention heads that are highly correlated to word-level timing.
|
|
22
|
+
* @type {[number, number][]}
|
|
23
|
+
*/
|
|
24
|
+
alignment_heads: [number, number][];
|
|
25
|
+
/**
|
|
26
|
+
* Task to use for generation, either "translate" or "transcribe".
|
|
27
|
+
* @type {string}
|
|
28
|
+
*/
|
|
29
|
+
task: string;
|
|
30
|
+
/**
|
|
31
|
+
* Language token to use for generation, can be either in the form of `<|en|>`, `en` or `english`.
|
|
32
|
+
* You can find all the possible language tokens in the `model.generation_config.lang_to_id` dictionary.
|
|
33
|
+
* @type {string}
|
|
34
|
+
*/
|
|
35
|
+
language: string;
|
|
36
|
+
/**
|
|
37
|
+
* The id of the `"<|notimestamps|>"` token.
|
|
38
|
+
* @type {number}
|
|
39
|
+
*/
|
|
40
|
+
no_timestamps_token_id: number;
|
|
41
|
+
/**
|
|
42
|
+
* Rank-1 list of token IDs created by passing text to [`~WhisperProcessor.get_prompt_ids`] that is
|
|
43
|
+
* provided as a prompt to each chunk. This can be used to provide or "prompt-engineer" a context for
|
|
44
|
+
* transcription, e.g. custom vocabularies or proper nouns to make it more likely to predict those words
|
|
45
|
+
* correctly. It cannot be used in conjunction with `decoder_start_token_id` as it overwrites this value.
|
|
46
|
+
* @type {number[]}
|
|
47
|
+
*/
|
|
48
|
+
prompt_ids: number[];
|
|
49
|
+
/**
|
|
50
|
+
* Whether the model is multilingual or not.
|
|
51
|
+
* @type {boolean}
|
|
52
|
+
*/
|
|
53
|
+
is_multilingual: boolean;
|
|
54
|
+
/**
|
|
55
|
+
* (Optional) A mapping from language tokens to their corresponding IDs.
|
|
56
|
+
* Only required if the model is multilingual.
|
|
57
|
+
* @type {Record<string, number>|null}
|
|
58
|
+
*/
|
|
59
|
+
lang_to_id: Record<string, number> | null;
|
|
60
|
+
/**
|
|
61
|
+
* (Optional) A mapping from task tokens to their corresponding IDs.
|
|
62
|
+
* @type {Record<string, number>|null}
|
|
63
|
+
*/
|
|
64
|
+
task_to_id: Record<string, number> | null;
|
|
65
|
+
/**
|
|
66
|
+
* Used to set the maximum value of the initial timestamp. This is used to prevent the model from
|
|
67
|
+
* predicting timestamps that are too far in the future.
|
|
68
|
+
* @type {number}
|
|
69
|
+
*/
|
|
70
|
+
max_initial_timestamp_index: number;
|
|
71
|
+
}
|
|
72
|
+
export type WhisperGenerationFunctionParameters = import("../../generation/parameters.js").GenerationFunctionParameters & {
|
|
73
|
+
generation_config: WhisperGenerationConfig;
|
|
74
|
+
} & WhisperGenerationConfig;
|
|
75
|
+
import { GenerationConfig } from "../../generation/configuration_utils.js";
|
|
76
|
+
//# sourceMappingURL=generation_whisper.d.ts.map
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Represents a WhisperProcessor that extracts features from an audio input.
|
|
3
|
+
*/
|
|
4
|
+
export class WhisperProcessor extends Processor {
|
|
5
|
+
static tokenizer_class: typeof AutoTokenizer;
|
|
6
|
+
static feature_extractor_class: typeof AutoFeatureExtractor;
|
|
7
|
+
/**
|
|
8
|
+
* Calls the feature_extractor function with the given audio input.
|
|
9
|
+
* @param {any} audio The audio input to extract features from.
|
|
10
|
+
* @returns {Promise<any>} A Promise that resolves with the extracted features.
|
|
11
|
+
*/
|
|
12
|
+
_call(audio: any): Promise<any>;
|
|
13
|
+
}
|
|
14
|
+
import { Processor } from "../../base/processing_utils.js";
|
|
15
|
+
import { AutoTokenizer } from "../../tokenizers.js";
|
|
16
|
+
import { AutoFeatureExtractor } from "../auto/feature_extraction_auto.js";
|
|
17
|
+
//# sourceMappingURL=processing_whisper.d.ts.map
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
export class YolosImageProcessor extends ImageProcessor {
|
|
2
|
+
post_process_object_detection(outputs: {
|
|
3
|
+
logits: import("../../transformers.js").Tensor;
|
|
4
|
+
pred_boxes: import("../../transformers.js").Tensor;
|
|
5
|
+
}, threshold?: number, target_sizes?: [number, number][], is_zero_shot?: boolean): any[];
|
|
6
|
+
}
|
|
7
|
+
export class YolosFeatureExtractor extends YolosImageProcessor {
|
|
8
|
+
}
|
|
9
|
+
import { ImageProcessor } from "../../base/image_processors_utils.js";
|
|
10
|
+
//# sourceMappingURL=image_processing_yolos.d.ts.map
|