@huggingface/transformers 4.0.0-next.5 → 4.0.0-next.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +12 -4
- package/dist/ort-wasm-simd-threaded.jsep.mjs +24 -24
- package/dist/transformers.js +2189 -1015
- package/dist/transformers.min.js +16 -16
- package/dist/transformers.node.cjs +2234 -1029
- package/dist/transformers.node.min.cjs +20 -20
- package/dist/transformers.node.min.mjs +20 -20
- package/dist/transformers.node.mjs +2194 -1017
- package/dist/transformers.web.js +2175 -1001
- package/dist/transformers.web.min.js +18 -18
- package/package.json +4 -4
- package/src/backends/onnx.js +77 -58
- package/src/backends/utils/cacheWasm.js +22 -43
- package/src/cache_utils.js +62 -0
- package/src/configs.js +32 -5
- package/src/env.js +36 -6
- package/src/image_processors_utils.js +3 -3
- package/src/models/auto/modeling_auto.js +14 -1
- package/src/models/chatterbox/modeling_chatterbox.js +1 -1
- package/src/models/detr/image_processing_detr.js +1 -1
- package/src/models/feature_extractors.js +2 -0
- package/src/models/gemma3n/modeling_gemma3n.js +2 -0
- package/src/models/granite_speech/feature_extraction_granite_speech.js +58 -0
- package/src/models/granite_speech/modeling_granite_speech.js +5 -0
- package/src/models/granite_speech/processing_granite_speech.js +62 -0
- package/src/models/grounding_dino/image_processing_grounding_dino.js +1 -1
- package/src/models/idefics3/modeling_idefics3.js +5 -32
- package/src/models/image_processors.js +1 -0
- package/src/models/lfm2_vl/image_processing_lfm2_vl.js +305 -0
- package/src/models/lfm2_vl/modeling_lfm2_vl.js +13 -0
- package/src/models/lfm2_vl/processing_lfm2_vl.js +77 -0
- package/src/models/llava/modeling_llava.js +1 -1
- package/src/models/mistral3/modeling_mistral3.js +2 -2
- package/src/models/modeling_utils.js +234 -292
- package/src/models/models.js +9 -0
- package/src/models/olmo_hybrid/modeling_olmo_hybrid.js +5 -0
- package/src/models/paligemma/modeling_paligemma.js +2 -25
- package/src/models/processors.js +3 -0
- package/src/models/qwen2_5_vl/modeling_qwen2_5_vl.js +5 -1
- package/src/models/qwen2_moe/modeling_qwen2_moe.js +5 -0
- package/src/models/qwen2_vl/image_processing_qwen2_vl.js +1 -41
- package/src/models/qwen2_vl/modeling_qwen2_vl.js +36 -3
- package/src/models/qwen3_5/modeling_qwen3_5.js +1 -0
- package/src/models/qwen3_5_moe/modeling_qwen3_5_moe.js +2 -1
- package/src/models/qwen3_moe/modeling_qwen3_moe.js +5 -0
- package/src/models/qwen3_next/modeling_qwen3_next.js +5 -0
- package/src/models/qwen3_vl/modeling_qwen3_vl.js +2 -1
- package/src/models/qwen3_vl_moe/modeling_qwen3_vl_moe.js +4 -0
- package/src/models/registry.js +39 -4
- package/src/models/sam/image_processing_sam.js +1 -1
- package/src/models/session.js +17 -6
- package/src/models/smolvlm/modeling_smolvlm.js +7 -0
- package/src/models/ultravox/modeling_ultravox.js +1 -3
- package/src/models/voxtral/modeling_voxtral.js +3 -0
- package/src/models/voxtral_realtime/feature_extraction_voxtral_realtime.js +71 -0
- package/src/models/voxtral_realtime/modeling_voxtral_realtime.js +239 -0
- package/src/models/voxtral_realtime/processing_voxtral_realtime.js +113 -0
- package/src/models/whisper/feature_extraction_whisper.js +2 -12
- package/src/pipelines/index.js +2 -84
- package/src/pipelines.js +40 -77
- package/src/transformers.js +2 -0
- package/src/utils/audio.js +18 -2
- package/src/utils/cache/CrossOriginStorageCache.js +251 -0
- package/src/utils/cache/FileCache.js +128 -0
- package/src/utils/cache/cross-origin-storage.d.ts +38 -0
- package/src/utils/cache.js +8 -3
- package/src/utils/hub/{files.js → FileResponse.js} +0 -105
- package/src/utils/hub/utils.js +35 -1
- package/src/utils/hub.js +6 -5
- package/src/utils/image.js +12 -13
- package/src/utils/lru_cache.js +67 -0
- package/src/utils/memoize_promise.js +45 -0
- package/src/utils/model_registry/ModelRegistry.js +70 -23
- package/src/utils/model_registry/get_file_metadata.js +14 -2
- package/src/utils/model_registry/get_model_files.js +63 -78
- package/src/utils/model_registry/get_pipeline_files.js +15 -24
- package/src/utils/model_registry/is_cached.js +81 -4
- package/src/utils/tensor.js +18 -2
- package/types/backends/onnx.d.ts.map +1 -1
- package/types/backends/utils/cacheWasm.d.ts +3 -17
- package/types/backends/utils/cacheWasm.d.ts.map +1 -1
- package/types/cache_utils.d.ts +29 -0
- package/types/cache_utils.d.ts.map +1 -0
- package/types/configs.d.ts.map +1 -1
- package/types/env.d.ts +18 -3
- package/types/env.d.ts.map +1 -1
- package/types/image_processors_utils.d.ts +17 -1
- package/types/image_processors_utils.d.ts.map +1 -1
- package/types/models/auto/modeling_auto.d.ts +6 -0
- package/types/models/auto/modeling_auto.d.ts.map +1 -1
- package/types/models/detr/image_processing_detr.d.ts +1 -1
- package/types/models/feature_extractors.d.ts +2 -0
- package/types/models/gemma3n/modeling_gemma3n.d.ts +2 -0
- package/types/models/gemma3n/modeling_gemma3n.d.ts.map +1 -1
- package/types/models/granite_speech/feature_extraction_granite_speech.d.ts +16 -0
- package/types/models/granite_speech/feature_extraction_granite_speech.d.ts.map +1 -0
- package/types/models/granite_speech/modeling_granite_speech.d.ts +4 -0
- package/types/models/granite_speech/modeling_granite_speech.d.ts.map +1 -0
- package/types/models/granite_speech/processing_granite_speech.d.ts +19 -0
- package/types/models/granite_speech/processing_granite_speech.d.ts.map +1 -0
- package/types/models/grounding_dino/image_processing_grounding_dino.d.ts +1 -1
- package/types/models/idefics3/modeling_idefics3.d.ts +2 -18
- package/types/models/idefics3/modeling_idefics3.d.ts.map +1 -1
- package/types/models/image_processors.d.ts +1 -0
- package/types/models/lfm2_vl/image_processing_lfm2_vl.d.ts +41 -0
- package/types/models/lfm2_vl/image_processing_lfm2_vl.d.ts.map +1 -0
- package/types/models/lfm2_vl/modeling_lfm2_vl.d.ts +4 -0
- package/types/models/lfm2_vl/modeling_lfm2_vl.d.ts.map +1 -0
- package/types/models/lfm2_vl/processing_lfm2_vl.d.ts +18 -0
- package/types/models/lfm2_vl/processing_lfm2_vl.d.ts.map +1 -0
- package/types/models/mistral3/modeling_mistral3.d.ts +2 -2
- package/types/models/mistral3/modeling_mistral3.d.ts.map +1 -1
- package/types/models/modeling_utils.d.ts +44 -24
- package/types/models/modeling_utils.d.ts.map +1 -1
- package/types/models/models.d.ts +9 -0
- package/types/models/olmo_hybrid/modeling_olmo_hybrid.d.ts +8 -0
- package/types/models/olmo_hybrid/modeling_olmo_hybrid.d.ts.map +1 -0
- package/types/models/paligemma/modeling_paligemma.d.ts +2 -8
- package/types/models/paligemma/modeling_paligemma.d.ts.map +1 -1
- package/types/models/processors.d.ts +3 -0
- package/types/models/qwen2_5_vl/modeling_qwen2_5_vl.d.ts +3 -0
- package/types/models/qwen2_5_vl/modeling_qwen2_5_vl.d.ts.map +1 -1
- package/types/models/qwen2_moe/modeling_qwen2_moe.d.ts +8 -0
- package/types/models/qwen2_moe/modeling_qwen2_moe.d.ts.map +1 -0
- package/types/models/qwen2_vl/image_processing_qwen2_vl.d.ts.map +1 -1
- package/types/models/qwen2_vl/modeling_qwen2_vl.d.ts +2 -0
- package/types/models/qwen2_vl/modeling_qwen2_vl.d.ts.map +1 -1
- package/types/models/qwen3_5/modeling_qwen3_5.d.ts +2 -0
- package/types/models/qwen3_5/modeling_qwen3_5.d.ts.map +1 -1
- package/types/models/qwen3_5_moe/modeling_qwen3_5_moe.d.ts +3 -0
- package/types/models/qwen3_5_moe/modeling_qwen3_5_moe.d.ts.map +1 -1
- package/types/models/qwen3_moe/modeling_qwen3_moe.d.ts +8 -0
- package/types/models/qwen3_moe/modeling_qwen3_moe.d.ts.map +1 -0
- package/types/models/qwen3_next/modeling_qwen3_next.d.ts +8 -0
- package/types/models/qwen3_next/modeling_qwen3_next.d.ts.map +1 -0
- package/types/models/qwen3_vl/modeling_qwen3_vl.d.ts +3 -0
- package/types/models/qwen3_vl/modeling_qwen3_vl.d.ts.map +1 -1
- package/types/models/qwen3_vl_moe/modeling_qwen3_vl_moe.d.ts +7 -0
- package/types/models/qwen3_vl_moe/modeling_qwen3_vl_moe.d.ts.map +1 -0
- package/types/models/registry.d.ts +2 -1
- package/types/models/registry.d.ts.map +1 -1
- package/types/models/sam/image_processing_sam.d.ts +1 -1
- package/types/models/session.d.ts +3 -2
- package/types/models/session.d.ts.map +1 -1
- package/types/models/smolvlm/modeling_smolvlm.d.ts +8 -0
- package/types/models/smolvlm/modeling_smolvlm.d.ts.map +1 -0
- package/types/models/ultravox/modeling_ultravox.d.ts +0 -2
- package/types/models/ultravox/modeling_ultravox.d.ts.map +1 -1
- package/types/models/voxtral/modeling_voxtral.d.ts +4 -0
- package/types/models/voxtral/modeling_voxtral.d.ts.map +1 -0
- package/types/models/voxtral_realtime/feature_extraction_voxtral_realtime.d.ts +28 -0
- package/types/models/voxtral_realtime/feature_extraction_voxtral_realtime.d.ts.map +1 -0
- package/types/models/voxtral_realtime/modeling_voxtral_realtime.d.ts +17 -0
- package/types/models/voxtral_realtime/modeling_voxtral_realtime.d.ts.map +1 -0
- package/types/models/voxtral_realtime/processing_voxtral_realtime.d.ts +44 -0
- package/types/models/voxtral_realtime/processing_voxtral_realtime.d.ts.map +1 -0
- package/types/models/whisper/feature_extraction_whisper.d.ts.map +1 -1
- package/types/pipelines/index.d.ts +0 -34
- package/types/pipelines/index.d.ts.map +1 -1
- package/types/pipelines.d.ts.map +1 -1
- package/types/transformers.d.ts +1 -0
- package/types/transformers.d.ts.map +1 -1
- package/types/utils/audio.d.ts +5 -2
- package/types/utils/audio.d.ts.map +1 -1
- package/types/utils/cache/CrossOriginStorageCache.d.ts +120 -0
- package/types/utils/cache/CrossOriginStorageCache.d.ts.map +1 -0
- package/types/utils/cache/FileCache.d.ts +39 -0
- package/types/utils/cache/FileCache.d.ts.map +1 -0
- package/types/utils/cache.d.ts +4 -4
- package/types/utils/cache.d.ts.map +1 -1
- package/types/utils/dtypes.d.ts +1 -1
- package/types/utils/hub/{files.d.ts → FileResponse.d.ts} +1 -38
- package/types/utils/hub/FileResponse.d.ts.map +1 -0
- package/types/utils/hub/utils.d.ts +17 -2
- package/types/utils/hub/utils.d.ts.map +1 -1
- package/types/utils/hub.d.ts +7 -7
- package/types/utils/hub.d.ts.map +1 -1
- package/types/utils/image.d.ts +1 -1
- package/types/utils/image.d.ts.map +1 -1
- package/types/utils/lru_cache.d.ts +38 -0
- package/types/utils/lru_cache.d.ts.map +1 -0
- package/types/utils/memoize_promise.d.ts +14 -0
- package/types/utils/memoize_promise.d.ts.map +1 -0
- package/types/utils/model_registry/ModelRegistry.d.ts +66 -6
- package/types/utils/model_registry/ModelRegistry.d.ts.map +1 -1
- package/types/utils/model_registry/get_file_metadata.d.ts.map +1 -1
- package/types/utils/model_registry/get_model_files.d.ts +1 -0
- package/types/utils/model_registry/get_model_files.d.ts.map +1 -1
- package/types/utils/model_registry/get_pipeline_files.d.ts +2 -1
- package/types/utils/model_registry/get_pipeline_files.d.ts.map +1 -1
- package/types/utils/model_registry/is_cached.d.ts +47 -4
- package/types/utils/model_registry/is_cached.d.ts.map +1 -1
- package/types/utils/tensor.d.ts.map +1 -1
- package/src/utils/data-structures.js +0 -572
- package/types/utils/data-structures.d.ts +0 -294
- package/types/utils/data-structures.d.ts.map +0 -1
- package/types/utils/hub/files.d.ts.map +0 -1
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"registry.d.ts","sourceRoot":"","sources":["../../src/models/registry.js"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"registry.d.ts","sourceRoot":"","sources":["../../src/models/registry.js"],"names":[],"mappings":"AAgLA,2EAKG;AAmDH,+EAUG;AAEH,oEAqEG;AA4CH,uEAIG;AAmKH,0EAwCE;AA2DF,4EAKG;AAMH,+CAAiF;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AA3bjF,mFAqBG;AAEH,gFAgBG;AA/CH,+EAAuG;AAEvG,4EAIG;AAgIH,qEAmBG;AAEH,8EAiBG;AAuCH,gFAuBG;AAkBH,8EAIG;AAEH,iFASG;AAEH,kFAGG;AApCH,4EAQG;AAEH,sFAIG;AAwBH,2EAKG;AAEH,+DAQG;AAEH,gFAQG;AAEH,yEAAsF;AAEtF,sFAKG;AArGH,uFAEG;AAqGH,yEAAiG;AAOjG,0EAAwG;AAExG,4EAQG;AAEH,6EAAuG;AAEvG,2EAAmG;AAInG,oFAIG;AA/JH,8EAkBG;AAEH,8EAKG"}
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* @typedef {
|
|
2
|
+
* @typedef {Object} SamImageProcessorResult
|
|
3
3
|
* @property {Tensor} pixel_values
|
|
4
4
|
* @property {import("../../image_processors_utils.js").HeightWidth[]} original_sizes
|
|
5
5
|
* @property {import("../../image_processors_utils.js").HeightWidth[]} reshaped_input_sizes
|
|
@@ -4,11 +4,12 @@
|
|
|
4
4
|
* @param {string} pretrained_model_name_or_path The path to the directory containing the model file.
|
|
5
5
|
* @param {Record<string, string>} names The names of the model files to load.
|
|
6
6
|
* @param {import('../utils/hub.js').PretrainedModelOptions} options Additional options for loading the model.
|
|
7
|
-
* @param {string} [
|
|
7
|
+
* @param {Record<string, true>} [cache_sessions] A map from session name to `true`, indicating which
|
|
8
|
+
* sessions should have GPU-pinned KV cache outputs.
|
|
8
9
|
* @returns {Promise<Record<string, any>>} A Promise that resolves to a dictionary of InferenceSession objects.
|
|
9
10
|
* @private
|
|
10
11
|
*/
|
|
11
|
-
export function constructSessions(pretrained_model_name_or_path: string, names: Record<string, string>, options: import("../utils/hub.js").PretrainedModelOptions,
|
|
12
|
+
export function constructSessions(pretrained_model_name_or_path: string, names: Record<string, string>, options: import("../utils/hub.js").PretrainedModelOptions, cache_sessions?: Record<string, true>): Promise<Record<string, any>>;
|
|
12
13
|
/**
|
|
13
14
|
* Executes an InferenceSession using the specified inputs.
|
|
14
15
|
* NOTE: `inputs` must contain at least the input names of the model.
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"session.d.ts","sourceRoot":"","sources":["../../src/models/session.js"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"session.d.ts","sourceRoot":"","sources":["../../src/models/session.js"],"names":[],"mappings":"AAkJA;;;;;;;;;;GAUG;AACH,iEARW,MAAM,SACN,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,WACtB,OAAO,iBAAiB,EAAE,sBAAsB,mBAChD,MAAM,CAAC,MAAM,EAAE,IAAI,CAAC,GAElB,OAAO,CAAC,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,CAoBxC;AAmBD;;;;;;;;;;GAUG;AACH,uDAHa,OAAO,KAAQ,CA+C3B"}
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* The SmolVLM Model with a language modeling head.
|
|
3
|
+
* It is made up a SigLIP vision encoder, with a language modeling head on top.
|
|
4
|
+
*/
|
|
5
|
+
export class SmolVLMForConditionalGeneration extends Idefics3ForConditionalGeneration {
|
|
6
|
+
}
|
|
7
|
+
import { Idefics3ForConditionalGeneration } from '../idefics3/modeling_idefics3.js';
|
|
8
|
+
//# sourceMappingURL=modeling_smolvlm.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"modeling_smolvlm.d.ts","sourceRoot":"","sources":["../../../src/models/smolvlm/modeling_smolvlm.js"],"names":[],"mappings":"AAEA;;;GAGG;AACH;CAAwF;iDANvC,kCAAkC"}
|
|
@@ -6,7 +6,5 @@ export class UltravoxModel extends UltravoxPreTrainedModel {
|
|
|
6
6
|
attention_mask: any;
|
|
7
7
|
};
|
|
8
8
|
}
|
|
9
|
-
export class VoxtralForConditionalGeneration extends UltravoxModel {
|
|
10
|
-
}
|
|
11
9
|
import { PreTrainedModel } from '../modeling_utils.js';
|
|
12
10
|
//# sourceMappingURL=modeling_ultravox.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"modeling_ultravox.d.ts","sourceRoot":"","sources":["../../../src/models/ultravox/modeling_ultravox.js"],"names":[],"mappings":"AAEA;CAEC;AAED;IACI;;;MAUC;CACJ;
|
|
1
|
+
{"version":3,"file":"modeling_ultravox.d.ts","sourceRoot":"","sources":["../../../src/models/ultravox/modeling_ultravox.js"],"names":[],"mappings":"AAEA;CAEC;AAED;IACI;;;MAUC;CACJ;gCAlB4E,sBAAsB"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"modeling_voxtral.d.ts","sourceRoot":"","sources":["../../../src/models/voxtral/modeling_voxtral.js"],"names":[],"mappings":"AAEA;CAAqE;8BAFvC,kCAAkC"}
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
export class VoxtralRealtimeFeatureExtractor extends FeatureExtractor {
|
|
2
|
+
constructor(config: any);
|
|
3
|
+
window: Float64Array<ArrayBufferLike>;
|
|
4
|
+
/**
|
|
5
|
+
* Computes the log-Mel spectrogram of the provided audio waveform.
|
|
6
|
+
* @param {Float32Array|Float64Array} waveform The audio waveform to process.
|
|
7
|
+
* @param {Object} [options]
|
|
8
|
+
* @param {boolean} [options.center=true] Whether to center-pad the waveform for STFT.
|
|
9
|
+
* @returns {Promise<import('../../utils/tensor.js').Tensor>} The log-Mel spectrogram tensor of shape [num_mel_bins, num_frames].
|
|
10
|
+
*/
|
|
11
|
+
_extract_fbank_features(waveform: Float32Array | Float64Array, { center }?: {
|
|
12
|
+
center?: boolean;
|
|
13
|
+
}): Promise<import("../../utils/tensor.js").Tensor>;
|
|
14
|
+
/**
|
|
15
|
+
* Extract mel spectrogram features from audio.
|
|
16
|
+
* @param {Float32Array|Float64Array} audio The audio data.
|
|
17
|
+
* @param {Object} [options]
|
|
18
|
+
* @param {boolean} [options.center=true] Whether to center-pad the waveform.
|
|
19
|
+
* @returns {Promise<{ input_features: import('../../utils/tensor.js').Tensor }>}
|
|
20
|
+
*/
|
|
21
|
+
_call(audio: Float32Array | Float64Array, { center }?: {
|
|
22
|
+
center?: boolean;
|
|
23
|
+
}): Promise<{
|
|
24
|
+
input_features: import("../../utils/tensor.js").Tensor;
|
|
25
|
+
}>;
|
|
26
|
+
}
|
|
27
|
+
import { FeatureExtractor } from '../../feature_extraction_utils.js';
|
|
28
|
+
//# sourceMappingURL=feature_extraction_voxtral_realtime.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"feature_extraction_voxtral_realtime.d.ts","sourceRoot":"","sources":["../../../src/models/voxtral_realtime/feature_extraction_voxtral_realtime.js"],"names":[],"mappings":"AAGA;IACI,yBAcC;IADG,sCAAwD;IAG5D;;;;;;OAMG;IACH,kCALW,YAAY,GAAC,YAAY,eAEjC;QAA0B,MAAM,GAAxB,OAAO;KACf,GAAU,OAAO,CAAC,OAAO,uBAAuB,EAAE,MAAM,CAAC,CA2B3D;IAED;;;;;;OAMG;IACH,aALW,YAAY,GAAC,YAAY,eAEjC;QAA0B,MAAM,GAAxB,OAAO;KACf,GAAU,OAAO,CAAC;QAAE,cAAc,EAAE,OAAO,uBAAuB,EAAE,MAAM,CAAA;KAAE,CAAC,CAU/E;CACJ;iCAtEuD,mCAAmC"}
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
export class VoxtralRealtimePreTrainedModel extends PreTrainedModel {
|
|
2
|
+
}
|
|
3
|
+
export class VoxtralRealtimeForConditionalGeneration extends VoxtralRealtimePreTrainedModel {
|
|
4
|
+
forward({ input_ids, past_key_values, ...kwargs }: {
|
|
5
|
+
[x: string]: any;
|
|
6
|
+
input_ids: any;
|
|
7
|
+
past_key_values: any;
|
|
8
|
+
}): Promise<any>;
|
|
9
|
+
generate({ input_features, stopping_criteria: userStoppingCriteria, ...kwargs }: {
|
|
10
|
+
[x: string]: any;
|
|
11
|
+
input_features: any;
|
|
12
|
+
stopping_criteria: any;
|
|
13
|
+
}): Promise<Tensor | import("../modeling_outputs.js").ModelOutput>;
|
|
14
|
+
}
|
|
15
|
+
import { PreTrainedModel } from '../modeling_utils.js';
|
|
16
|
+
import { Tensor } from '../../utils/tensor.js';
|
|
17
|
+
//# sourceMappingURL=modeling_voxtral_realtime.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"modeling_voxtral_realtime.d.ts","sourceRoot":"","sources":["../../../src/models/voxtral_realtime/modeling_voxtral_realtime.js"],"names":[],"mappings":"AA+LA;CAEC;AAED;IACI;;;;qBAoBC;IAED;;;;uEAmBC;CACJ;gCA9O+B,sBAAsB;uBAGzB,uBAAuB"}
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
export class VoxtralRealtimeProcessor extends Processor {
|
|
2
|
+
static tokenizer_class: typeof AutoTokenizer;
|
|
3
|
+
static feature_extractor_class: typeof AutoFeatureExtractor;
|
|
4
|
+
/** Number of mel frames in the first audio chunk. */
|
|
5
|
+
get num_mel_frames_first_audio_chunk(): number;
|
|
6
|
+
/** Number of raw audio samples in the first audio chunk. */
|
|
7
|
+
get num_samples_first_audio_chunk(): number;
|
|
8
|
+
/** Number of raw audio samples per subsequent audio chunk. */
|
|
9
|
+
get num_samples_per_audio_chunk(): any;
|
|
10
|
+
/** Number of right-pad tokens for non-streaming mode. */
|
|
11
|
+
get num_right_pad_tokens(): number;
|
|
12
|
+
/** Number of mel frames per text token. */
|
|
13
|
+
get audio_length_per_tok(): number;
|
|
14
|
+
/** Number of raw audio samples per token. */
|
|
15
|
+
get raw_audio_length_per_tok(): number;
|
|
16
|
+
/**
|
|
17
|
+
* Process audio input for VoxtralRealtime.
|
|
18
|
+
*
|
|
19
|
+
* In streaming mode with `is_first_audio_chunk=true`, the audio is left-padded
|
|
20
|
+
* with silence and mel features are extracted with `center=true`.
|
|
21
|
+
* Returns `{ input_ids, input_features }`.
|
|
22
|
+
*
|
|
23
|
+
* In streaming mode with `is_first_audio_chunk=false`, the audio chunk is
|
|
24
|
+
* processed with `center=false` and only `{ input_features }` is returned.
|
|
25
|
+
*
|
|
26
|
+
* In non-streaming mode, the audio is right-padded to ensure the model
|
|
27
|
+
* transcribes the full audio, then processed with `center=true`.
|
|
28
|
+
* Returns `{ input_features }`.
|
|
29
|
+
*
|
|
30
|
+
* @param {Float32Array|Float64Array} audio The audio waveform.
|
|
31
|
+
* @param {Object} [options]
|
|
32
|
+
* @param {boolean} [options.is_streaming=false] Whether processing in streaming mode.
|
|
33
|
+
* @param {boolean} [options.is_first_audio_chunk=true] Whether this is the first audio chunk.
|
|
34
|
+
* @returns {Promise<Object>}
|
|
35
|
+
*/
|
|
36
|
+
_call(audio: Float32Array | Float64Array, { is_streaming, is_first_audio_chunk }?: {
|
|
37
|
+
is_streaming?: boolean;
|
|
38
|
+
is_first_audio_chunk?: boolean;
|
|
39
|
+
}): Promise<any>;
|
|
40
|
+
}
|
|
41
|
+
import { Processor } from '../../processing_utils.js';
|
|
42
|
+
import { AutoTokenizer } from '../auto/tokenization_auto.js';
|
|
43
|
+
import { AutoFeatureExtractor } from '../auto/feature_extraction_auto.js';
|
|
44
|
+
//# sourceMappingURL=processing_voxtral_realtime.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"processing_voxtral_realtime.d.ts","sourceRoot":"","sources":["../../../src/models/voxtral_realtime/processing_voxtral_realtime.js"],"names":[],"mappings":"AAeA;IACI,6CAAuC;IACvC,4DAAsD;IAGtD,qDAAqD;IACrD,+CAEC;IAED,4DAA4D;IAC5D,4CAGC;IAED,8DAA8D;IAC9D,uCAGC;IAED,yDAAyD;IACzD,mCAEC;IAED,2CAA2C;IAC3C,mCAEC;IAED,6CAA6C;IAC7C,uCAEC;IAED;;;;;;;;;;;;;;;;;;;OAmBG;IACH,aANW,YAAY,GAAC,YAAY,2CAEjC;QAA0B,YAAY,GAA9B,OAAO;QACW,oBAAoB,GAAtC,OAAO;KACf,GAAU,OAAO,KAAQ,CAyC3B;CACJ;0BA9GyB,2BAA2B;8BADvB,8BAA8B;qCADvB,oCAAoC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"feature_extraction_whisper.d.ts","sourceRoot":"","sources":["../../../src/models/whisper/feature_extraction_whisper.js"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"feature_extraction_whisper.d.ts","sourceRoot":"","sources":["../../../src/models/whisper/feature_extraction_whisper.js"],"names":[],"mappings":"AAKA;IACI,yBAeC;IADG,sCAAwD;IAG5D;;;;OAIG;IACH,kCAHW,YAAY,GAAC,YAAY,GACvB,OAAO,CAAC,MAAM,CAAC,CAoB3B;IAED;;;;OAIG;IACH,aAHW,YAAY,GAAC,YAAY;;QACvB,OAAO,CAAC;QAAE,cAAc,EAAE,MAAM,CAAA;KAAE,CAAC,CA2B/C;CACJ;iCA/EuD,mCAAmC;uBACpE,uBAAuB"}
|
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
export const SUPPORTED_TASKS: Readonly<{
|
|
2
2
|
'text-classification': {
|
|
3
|
-
tokenizer: typeof AutoTokenizer;
|
|
4
3
|
pipeline: typeof TextClassificationPipeline;
|
|
5
4
|
model: typeof AutoModelForSequenceClassification;
|
|
6
5
|
default: {
|
|
@@ -9,7 +8,6 @@ export const SUPPORTED_TASKS: Readonly<{
|
|
|
9
8
|
type: string;
|
|
10
9
|
};
|
|
11
10
|
'token-classification': {
|
|
12
|
-
tokenizer: typeof AutoTokenizer;
|
|
13
11
|
pipeline: typeof TokenClassificationPipeline;
|
|
14
12
|
model: typeof AutoModelForTokenClassification;
|
|
15
13
|
default: {
|
|
@@ -18,7 +16,6 @@ export const SUPPORTED_TASKS: Readonly<{
|
|
|
18
16
|
type: string;
|
|
19
17
|
};
|
|
20
18
|
'question-answering': {
|
|
21
|
-
tokenizer: typeof AutoTokenizer;
|
|
22
19
|
pipeline: typeof QuestionAnsweringPipeline;
|
|
23
20
|
model: typeof AutoModelForQuestionAnswering;
|
|
24
21
|
default: {
|
|
@@ -27,7 +24,6 @@ export const SUPPORTED_TASKS: Readonly<{
|
|
|
27
24
|
type: string;
|
|
28
25
|
};
|
|
29
26
|
'fill-mask': {
|
|
30
|
-
tokenizer: typeof AutoTokenizer;
|
|
31
27
|
pipeline: typeof FillMaskPipeline;
|
|
32
28
|
model: typeof AutoModelForMaskedLM;
|
|
33
29
|
default: {
|
|
@@ -37,7 +33,6 @@ export const SUPPORTED_TASKS: Readonly<{
|
|
|
37
33
|
type: string;
|
|
38
34
|
};
|
|
39
35
|
summarization: {
|
|
40
|
-
tokenizer: typeof AutoTokenizer;
|
|
41
36
|
pipeline: typeof SummarizationPipeline;
|
|
42
37
|
model: typeof AutoModelForSeq2SeqLM;
|
|
43
38
|
default: {
|
|
@@ -46,7 +41,6 @@ export const SUPPORTED_TASKS: Readonly<{
|
|
|
46
41
|
type: string;
|
|
47
42
|
};
|
|
48
43
|
translation: {
|
|
49
|
-
tokenizer: typeof AutoTokenizer;
|
|
50
44
|
pipeline: typeof TranslationPipeline;
|
|
51
45
|
model: typeof AutoModelForSeq2SeqLM;
|
|
52
46
|
default: {
|
|
@@ -55,7 +49,6 @@ export const SUPPORTED_TASKS: Readonly<{
|
|
|
55
49
|
type: string;
|
|
56
50
|
};
|
|
57
51
|
'text2text-generation': {
|
|
58
|
-
tokenizer: typeof AutoTokenizer;
|
|
59
52
|
pipeline: typeof Text2TextGenerationPipeline;
|
|
60
53
|
model: typeof AutoModelForSeq2SeqLM;
|
|
61
54
|
default: {
|
|
@@ -64,7 +57,6 @@ export const SUPPORTED_TASKS: Readonly<{
|
|
|
64
57
|
type: string;
|
|
65
58
|
};
|
|
66
59
|
'text-generation': {
|
|
67
|
-
tokenizer: typeof AutoTokenizer;
|
|
68
60
|
pipeline: typeof TextGenerationPipeline;
|
|
69
61
|
model: typeof AutoModelForCausalLM;
|
|
70
62
|
default: {
|
|
@@ -74,7 +66,6 @@ export const SUPPORTED_TASKS: Readonly<{
|
|
|
74
66
|
type: string;
|
|
75
67
|
};
|
|
76
68
|
'zero-shot-classification': {
|
|
77
|
-
tokenizer: typeof AutoTokenizer;
|
|
78
69
|
pipeline: typeof ZeroShotClassificationPipeline;
|
|
79
70
|
model: typeof AutoModelForSequenceClassification;
|
|
80
71
|
default: {
|
|
@@ -85,37 +76,30 @@ export const SUPPORTED_TASKS: Readonly<{
|
|
|
85
76
|
'audio-classification': {
|
|
86
77
|
pipeline: typeof AudioClassificationPipeline;
|
|
87
78
|
model: typeof AutoModelForAudioClassification;
|
|
88
|
-
processor: typeof AutoProcessor;
|
|
89
79
|
default: {
|
|
90
80
|
model: string;
|
|
91
81
|
};
|
|
92
82
|
type: string;
|
|
93
83
|
};
|
|
94
84
|
'zero-shot-audio-classification': {
|
|
95
|
-
tokenizer: typeof AutoTokenizer;
|
|
96
85
|
pipeline: typeof ZeroShotAudioClassificationPipeline;
|
|
97
86
|
model: typeof AutoModel;
|
|
98
|
-
processor: typeof AutoProcessor;
|
|
99
87
|
default: {
|
|
100
88
|
model: string;
|
|
101
89
|
};
|
|
102
90
|
type: string;
|
|
103
91
|
};
|
|
104
92
|
'automatic-speech-recognition': {
|
|
105
|
-
tokenizer: typeof AutoTokenizer;
|
|
106
93
|
pipeline: typeof AutomaticSpeechRecognitionPipeline;
|
|
107
94
|
model: (typeof AutoModelForSpeechSeq2Seq)[];
|
|
108
|
-
processor: typeof AutoProcessor;
|
|
109
95
|
default: {
|
|
110
96
|
model: string;
|
|
111
97
|
};
|
|
112
98
|
type: string;
|
|
113
99
|
};
|
|
114
100
|
'text-to-audio': {
|
|
115
|
-
tokenizer: typeof AutoTokenizer;
|
|
116
101
|
pipeline: typeof TextToAudioPipeline;
|
|
117
102
|
model: (typeof AutoModelForTextToSpectrogram)[];
|
|
118
|
-
processor: (typeof AutoProcessor)[];
|
|
119
103
|
default: {
|
|
120
104
|
model: string;
|
|
121
105
|
dtype: string;
|
|
@@ -123,10 +107,8 @@ export const SUPPORTED_TASKS: Readonly<{
|
|
|
123
107
|
type: string;
|
|
124
108
|
};
|
|
125
109
|
'image-to-text': {
|
|
126
|
-
tokenizer: typeof AutoTokenizer;
|
|
127
110
|
pipeline: typeof ImageToTextPipeline;
|
|
128
111
|
model: typeof AutoModelForVision2Seq;
|
|
129
|
-
processor: typeof AutoProcessor;
|
|
130
112
|
default: {
|
|
131
113
|
model: string;
|
|
132
114
|
};
|
|
@@ -135,7 +117,6 @@ export const SUPPORTED_TASKS: Readonly<{
|
|
|
135
117
|
'image-classification': {
|
|
136
118
|
pipeline: typeof ImageClassificationPipeline;
|
|
137
119
|
model: typeof AutoModelForImageClassification;
|
|
138
|
-
processor: typeof AutoProcessor;
|
|
139
120
|
default: {
|
|
140
121
|
model: string;
|
|
141
122
|
};
|
|
@@ -144,7 +125,6 @@ export const SUPPORTED_TASKS: Readonly<{
|
|
|
144
125
|
'image-segmentation': {
|
|
145
126
|
pipeline: typeof ImageSegmentationPipeline;
|
|
146
127
|
model: (typeof AutoModelForImageSegmentation)[];
|
|
147
|
-
processor: typeof AutoProcessor;
|
|
148
128
|
default: {
|
|
149
129
|
model: string;
|
|
150
130
|
};
|
|
@@ -153,17 +133,14 @@ export const SUPPORTED_TASKS: Readonly<{
|
|
|
153
133
|
'background-removal': {
|
|
154
134
|
pipeline: typeof BackgroundRemovalPipeline;
|
|
155
135
|
model: (typeof AutoModelForImageSegmentation)[];
|
|
156
|
-
processor: typeof AutoProcessor;
|
|
157
136
|
default: {
|
|
158
137
|
model: string;
|
|
159
138
|
};
|
|
160
139
|
type: string;
|
|
161
140
|
};
|
|
162
141
|
'zero-shot-image-classification': {
|
|
163
|
-
tokenizer: typeof AutoTokenizer;
|
|
164
142
|
pipeline: typeof ZeroShotImageClassificationPipeline;
|
|
165
143
|
model: typeof AutoModel;
|
|
166
|
-
processor: typeof AutoProcessor;
|
|
167
144
|
default: {
|
|
168
145
|
model: string;
|
|
169
146
|
};
|
|
@@ -172,27 +149,22 @@ export const SUPPORTED_TASKS: Readonly<{
|
|
|
172
149
|
'object-detection': {
|
|
173
150
|
pipeline: typeof ObjectDetectionPipeline;
|
|
174
151
|
model: typeof AutoModelForObjectDetection;
|
|
175
|
-
processor: typeof AutoProcessor;
|
|
176
152
|
default: {
|
|
177
153
|
model: string;
|
|
178
154
|
};
|
|
179
155
|
type: string;
|
|
180
156
|
};
|
|
181
157
|
'zero-shot-object-detection': {
|
|
182
|
-
tokenizer: typeof AutoTokenizer;
|
|
183
158
|
pipeline: typeof ZeroShotObjectDetectionPipeline;
|
|
184
159
|
model: typeof AutoModelForZeroShotObjectDetection;
|
|
185
|
-
processor: typeof AutoProcessor;
|
|
186
160
|
default: {
|
|
187
161
|
model: string;
|
|
188
162
|
};
|
|
189
163
|
type: string;
|
|
190
164
|
};
|
|
191
165
|
'document-question-answering': {
|
|
192
|
-
tokenizer: typeof AutoTokenizer;
|
|
193
166
|
pipeline: typeof DocumentQuestionAnsweringPipeline;
|
|
194
167
|
model: typeof AutoModelForDocumentQuestionAnswering;
|
|
195
|
-
processor: typeof AutoProcessor;
|
|
196
168
|
default: {
|
|
197
169
|
model: string;
|
|
198
170
|
};
|
|
@@ -201,7 +173,6 @@ export const SUPPORTED_TASKS: Readonly<{
|
|
|
201
173
|
'image-to-image': {
|
|
202
174
|
pipeline: typeof ImageToImagePipeline;
|
|
203
175
|
model: typeof AutoModelForImageToImage;
|
|
204
|
-
processor: typeof AutoProcessor;
|
|
205
176
|
default: {
|
|
206
177
|
model: string;
|
|
207
178
|
};
|
|
@@ -210,14 +181,12 @@ export const SUPPORTED_TASKS: Readonly<{
|
|
|
210
181
|
'depth-estimation': {
|
|
211
182
|
pipeline: typeof DepthEstimationPipeline;
|
|
212
183
|
model: typeof AutoModelForDepthEstimation;
|
|
213
|
-
processor: typeof AutoProcessor;
|
|
214
184
|
default: {
|
|
215
185
|
model: string;
|
|
216
186
|
};
|
|
217
187
|
type: string;
|
|
218
188
|
};
|
|
219
189
|
'feature-extraction': {
|
|
220
|
-
tokenizer: typeof AutoTokenizer;
|
|
221
190
|
pipeline: typeof FeatureExtractionPipeline;
|
|
222
191
|
model: typeof AutoModel;
|
|
223
192
|
default: {
|
|
@@ -227,7 +196,6 @@ export const SUPPORTED_TASKS: Readonly<{
|
|
|
227
196
|
type: string;
|
|
228
197
|
};
|
|
229
198
|
'image-feature-extraction': {
|
|
230
|
-
processor: typeof AutoProcessor;
|
|
231
199
|
pipeline: typeof ImageFeatureExtractionPipeline;
|
|
232
200
|
model: (typeof AutoModel)[];
|
|
233
201
|
default: {
|
|
@@ -250,7 +218,6 @@ export type AliasType = keyof typeof TASK_ALIASES;
|
|
|
250
218
|
* All possible pipeline types.
|
|
251
219
|
*/
|
|
252
220
|
export type PipelineType = TaskType | AliasType;
|
|
253
|
-
import { AutoTokenizer } from '../models/auto/tokenization_auto.js';
|
|
254
221
|
import { TextClassificationPipeline } from './text-classification.js';
|
|
255
222
|
import { AutoModelForSequenceClassification } from '../models/auto/modeling_auto.js';
|
|
256
223
|
import { TokenClassificationPipeline } from './token-classification.js';
|
|
@@ -268,7 +235,6 @@ import { AutoModelForCausalLM } from '../models/auto/modeling_auto.js';
|
|
|
268
235
|
import { ZeroShotClassificationPipeline } from './zero-shot-classification.js';
|
|
269
236
|
import { AudioClassificationPipeline } from './audio-classification.js';
|
|
270
237
|
import { AutoModelForAudioClassification } from '../models/auto/modeling_auto.js';
|
|
271
|
-
import { AutoProcessor } from '../models/auto/processing_auto.js';
|
|
272
238
|
import { ZeroShotAudioClassificationPipeline } from './zero-shot-audio-classification.js';
|
|
273
239
|
import { AutoModel } from '../models/auto/modeling_auto.js';
|
|
274
240
|
import { AutomaticSpeechRecognitionPipeline } from './automatic-speech-recognition.js';
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/pipelines/index.js"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/pipelines/index.js"],"names":[],"mappings":"AA2DA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA8MG;AAIH;;;;;;GASG;uBA+BU,MAAM,OAAO,eAAe;wBAC5B,MAAM,OAAO,YAAY;;;;2BACzB,QAAQ,GAAG,SAAS;2CAtRU,0BAA0B;mDAF9D,iCAAiC;4CAGI,2BAA2B;gDAHhE,iCAAiC;0CAIE,yBAAyB;8CAJ5D,iCAAiC;iCAKP,gBAAgB;qCAL1C,iCAAiC;sCAMF,oBAAoB;sCANnD,iCAAiC;oCAOJ,kBAAkB;4CACV,2BAA2B;uCAChC,sBAAsB;qCATtD,iCAAiC;+CAUO,+BAA+B;4CAClC,2BAA2B;gDAXhE,iCAAiC;oDAYY,qCAAqC;0BAZlF,iCAAiC;mDAaW,mCAAmC;0CAb/E,iCAAiC;oCAcJ,oBAAoB;8CAdjD,iCAAiC;oCAeJ,oBAAoB;uCAfjD,iCAAiC;4CAgBI,2BAA2B;gDAhBhE,iCAAiC;0CAiBE,yBAAyB;8CAjB5D,iCAAiC;0CAkBE,yBAAyB;oDACf,qCAAqC;wCACjD,uBAAuB;4CApBxD,iCAAiC;gDAqBQ,iCAAiC;oDArB1E,iCAAiC;kDAsBU,kCAAkC;sDAtB7E,iCAAiC;qCAuBH,qBAAqB;yCAvBnD,iCAAiC;wCAwBA,uBAAuB;4CAxBxD,iCAAiC;0CAyBE,yBAAyB;+CACpB,+BAA+B"}
|
package/types/pipelines.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"pipelines.d.ts","sourceRoot":"","sources":["../src/pipelines.js"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"pipelines.d.ts","sourceRoot":"","sources":["../src/pipelines.js"],"names":[],"mappings":"AAsDA;;;;;;;GAOG;AAEH;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA+BG;AACH,yBA7B4B,CAAC,SAAf,YAAa,QAChB,CAAC,UAuBD,MAAM,gKACN,OAAO,gBAAgB,EAAE,sBAAsB,GAC7C,OAAO,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CA2IhC;uBAhLY,MAAM,OAAO,eAAe;wBAC5B,MAAM,OAAO,YAAY;;;;2BACzB,QAAQ,GAAG,SAAS;;;;6BACpB,GAAE,CAAC,IAAI,QAAQ,GAAG,YAAY,CAAC,CAAA,OAAO,eAAe,EAAC,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,GAAC;;;;yBACtE,GAAE,CAAC,IAAI,SAAS,GAAG,YAAY,CAAC,CAAA,OAAO,eAAe;;;;;;MAAiB,CAAC,UAAU,CAAC,CAAC,GAAC;;;;uBACrF,cAAc,GAAG,UAAU;6BA2M3B,OAAO,0BAA0B,EAAE,cAAc;uCACjD,OAAO,oCAAoC,EAAE,wBAAwB;wCACrE,OAAO,qCAAqC,EAAE,yBAAyB;sCACvE,OAAO,mCAAmC,EAAE,uBAAuB;kCACnE,OAAO,8BAA8B,EAAE,mBAAmB;gCAC1D,OAAO,4BAA4B,EAAE,iBAAiB;wCACtD,OAAO,qCAAqC,EAAE,yBAAyB;mCACvE,OAAO,gCAAgC,EAAE,oBAAoB;yCAC7D,OAAO,gCAAgC,EAAE,0BAA0B;uCACnE,OAAO,gCAAgC,EAAE,wBAAwB;2CACjE,OAAO,yCAAyC,EAAE,4BAA4B;wCAC9E,OAAO,qCAAqC,EAAE,yBAAyB;gDACvE,OAAO,+CAA+C,EAAE,iCAAiC;+CACzF,OAAO,6CAA6C,EAAE,gCAAgC;gCACtF,OAAO,8BAA8B,EAAE,iBAAiB;wCACxD,OAAO,qCAAqC,EAAE,yBAAyB;sCACvE,OAAO,mCAAmC,EAAE,uBAAuB;gCACnE,OAAO,8BAA8B,EAAE,iBAAiB;oCACxD,OAAO,iCAAiC,EAAE,qBAAqB;4CAC/D,OAAO,2CAA2C,EAAE,6BAA6B;gDACjF,OAAO,+CAA+C,EAAE,iCAAiC;8CACzF,OAAO,4CAA4C,EAAE,+BAA+B;oCACpF,OAAO,iCAAiC,EAAE,qBAAqB;2CA3OrE,sBAAsB;4CAAtB,sBAAsB;0CAAtB,sBAAsB;iCAAtB,sBAAsB;sCAAtB,sBAAsB;oCAAtB,sBAAsB;4CAAtB,sBAAsB;uCAAtB,sBAAsB;+CAAtB,sBAAsB;4CAAtB,sBAAsB;oDAAtB,sBAAsB;mDAAtB,sBAAsB;oCAAtB,sBAAsB;oCAAtB,sBAAsB;4CAAtB,sBAAsB;0CAAtB,sBAAsB;0CAAtB,sBAAsB;oDAAtB,sBAAsB;wCAAtB,sBAAsB;gDAAtB,sBAAsB;kDAAtB,sBAAsB;qCAAtB,sBAAsB;wCAAtB,sBAAsB;0CAAtB,sBAAsB;+CAAtB,sBAAsB;gCAAtB,sBAAsB;6BAAtB,sBAAsB"}
|
package/types/transformers.d.ts
CHANGED
|
@@ -14,6 +14,7 @@ export * from "./generation/stopping_criteria.js";
|
|
|
14
14
|
export * from "./generation/logits_process.js";
|
|
15
15
|
export * from "./utils/tensor.js";
|
|
16
16
|
export { random } from "./utils/random.js";
|
|
17
|
+
export { DynamicCache } from "./cache_utils.js";
|
|
17
18
|
export { ModelRegistry } from "./utils/model_registry/ModelRegistry.js";
|
|
18
19
|
export type PretrainedModelOptions = import("./utils/hub.js").PretrainedModelOptions;
|
|
19
20
|
export type PretrainedProcessorOptions = import("./processing_utils.js").PretrainedProcessorOptions;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"transformers.d.ts","sourceRoot":"","sources":["../src/transformers.js"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"transformers.d.ts","sourceRoot":"","sources":["../src/transformers.js"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;qCA8Da,OAAO,gBAAgB,EAAE,sBAAsB;yCAC/C,OAAO,uBAAuB,EAAE,0BAA0B;sBAC1D,OAAO,yBAAyB,EAAE,OAAO;yCACzC,OAAO,yBAAyB,EAAE,0BAA0B;uBAC5D,OAAO,mBAAmB,EAAE,QAAQ;yBACpC,OAAO,oBAAoB,EAAE,UAAU;+BACvC,OAAO,iBAAiB,EAAE,gBAAgB;2BAC1C,OAAO,iBAAiB,EAAE,YAAY"}
|
package/types/utils/audio.d.ts
CHANGED
|
@@ -76,8 +76,10 @@ export function mel_filter_bank(num_frequency_bins: number, num_mel_filters: num
|
|
|
76
76
|
* If supplied, applies this filter bank to create a mel spectrogram.
|
|
77
77
|
* @param {number} [options.mel_floor=1e-10] Minimum value of mel frequency banks.
|
|
78
78
|
* @param {string} [options.log_mel=null] How to convert the spectrogram to log scale. Possible options are:
|
|
79
|
-
* `null` (don't convert), `"log"` (take the natural logarithm) `"log10"` (take the base-10 logarithm), `"dB"` (convert to decibels)
|
|
79
|
+
* `null` (don't convert), `"log"` (take the natural logarithm), `"log10"` (take the base-10 logarithm), `"dB"` (convert to decibels),
|
|
80
|
+
* `"log10_max_norm"` (take `log10`, then apply `(max(x, maxVal - 8) + 4) / 4` normalization, where `maxVal` is computed from data or given by `max_log_mel`).
|
|
80
81
|
* Can only be used when `power` is not `null`.
|
|
82
|
+
* @param {number} [options.max_log_mel=null] When `log_mel` is `"log10_max_norm"`, use this fixed value as the max instead of computing from data.
|
|
81
83
|
* @param {number} [options.reference=1.0] Sets the input spectrogram value that corresponds to 0 dB. For example, use `max(spectrogram)[0]` to set
|
|
82
84
|
* the loudest part to 0 dB. Must be greater than zero.
|
|
83
85
|
* @param {number} [options.min_value=1e-10] The spectrogram will be clipped to this minimum value before conversion to decibels, to avoid taking `log(0)`.
|
|
@@ -94,7 +96,7 @@ export function mel_filter_bank(num_frequency_bins: number, num_mel_filters: num
|
|
|
94
96
|
* @param {number} [options.mel_offset=0] Offset to add to the mel spectrogram to avoid taking the log of zero.
|
|
95
97
|
* @returns {Promise<Tensor>} Spectrogram of shape `(num_frequency_bins, length)` (regular spectrogram) or shape `(num_mel_filters, length)` (mel spectrogram).
|
|
96
98
|
*/
|
|
97
|
-
export function spectrogram(waveform: Float32Array | Float64Array, window: Float32Array | Float64Array, frame_length: number, hop_length: number, { fft_length, power, center, pad_mode, onesided, preemphasis, preemphasis_htk_flavor, mel_filters, mel_floor, log_mel, reference, min_value, db_range, remove_dc_offset, min_num_frames, max_num_frames, do_pad, transpose, mel_offset, }?: {
|
|
99
|
+
export function spectrogram(waveform: Float32Array | Float64Array, window: Float32Array | Float64Array, frame_length: number, hop_length: number, { fft_length, power, center, pad_mode, onesided, preemphasis, preemphasis_htk_flavor, mel_filters, mel_floor, log_mel, max_log_mel, reference, min_value, db_range, remove_dc_offset, min_num_frames, max_num_frames, do_pad, transpose, mel_offset, }?: {
|
|
98
100
|
fft_length?: number;
|
|
99
101
|
power?: number;
|
|
100
102
|
center?: boolean;
|
|
@@ -105,6 +107,7 @@ export function spectrogram(waveform: Float32Array | Float64Array, window: Float
|
|
|
105
107
|
mel_filters?: number[][];
|
|
106
108
|
mel_floor?: number;
|
|
107
109
|
log_mel?: string;
|
|
110
|
+
max_log_mel?: number;
|
|
108
111
|
reference?: number;
|
|
109
112
|
min_value?: number;
|
|
110
113
|
db_range?: number;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"audio.d.ts","sourceRoot":"","sources":["../../src/utils/audio.js"],"names":[],"mappings":"AAgBA;;;;;GAKG;AACH,gCAJW,MAAM,GAAC,GAAG,iBACV,MAAM,GACJ,OAAO,CAAC,YAAY,CAAC,CAsDjC;AA2BD;;;;;;GAMG;AACH,2BAHW,MAAM,GACJ,YAAY,CAIxB;AAED;;;;;;GAMG;AACH,2BAHW,MAAM,GACJ,YAAY,CAIxB;AAyGD;;;;;;;;;;;;;;;;;GAiBG;AACH,oDAbW,MAAM,mBAEN,MAAM,iBACN,MAAM,iBACN,MAAM,iBACN,MAAM,SACN,MAAM,GAAC,IAAI,cACX,MAAM,OAAO,oBAAoB,+BACjC,OAAO,GAEL,MAAM,EAAE,EAAE,CA4DtB;AAqHD
|
|
1
|
+
{"version":3,"file":"audio.d.ts","sourceRoot":"","sources":["../../src/utils/audio.js"],"names":[],"mappings":"AAgBA;;;;;GAKG;AACH,gCAJW,MAAM,GAAC,GAAG,iBACV,MAAM,GACJ,OAAO,CAAC,YAAY,CAAC,CAsDjC;AA2BD;;;;;;GAMG;AACH,2BAHW,MAAM,GACJ,YAAY,CAIxB;AAED;;;;;;GAMG;AACH,2BAHW,MAAM,GACJ,YAAY,CAIxB;AAyGD;;;;;;;;;;;;;;;;;GAiBG;AACH,oDAbW,MAAM,mBAEN,MAAM,iBACN,MAAM,iBACN,MAAM,iBACN,MAAM,SACN,MAAM,GAAC,IAAI,cACX,MAAM,OAAO,oBAAoB,+BACjC,OAAO,GAEL,MAAM,EAAE,EAAE,CA4DtB;AAqHD;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAuDG;AACH,sCAzCW,YAAY,GAAC,YAAY,UACzB,YAAY,GAAC,YAAY,gBAEzB,MAAM,cACN,MAAM,2PAEd;IAAyB,UAAU,GAA3B,MAAM;IAEW,KAAK,GAAtB,MAAM;IACY,MAAM,GAAxB,OAAO;IAEU,QAAQ,GAAzB,MAAM;IAEY,QAAQ,GAA1B,OAAO;IAEU,WAAW,GAA5B,MAAM;IACY,sBAAsB,GAAxC,OAAO;IACc,WAAW,GAAhC,MAAM,EAAE,EAAE;IAEO,SAAS,GAA1B,MAAM;IACW,OAAO,GAAxB,MAAM;IAIW,WAAW,GAA5B,MAAM;IACW,SAAS,GAA1B,MAAM;IAEW,SAAS,GAA1B,MAAM;IAGW,QAAQ,GAAzB,MAAM;IAEY,gBAAgB,GAAlC,OAAO;IAEU,cAAc,GAA/B,MAAM;IACW,cAAc,GAA/B,MAAM;IACY,MAAM,GAAxB,OAAO;IACW,SAAS,GAA3B,OAAO;IACU,UAAU,GAA3B,MAAM;CACd,GAAU,OAAO,CAAC,MAAM,CAAC,CAsO3B;AAED;;;;;;;;;;GAUG;AACH,+CATW,MAAM,QACN,MAAM,uCAEd;IAA0B,QAAQ,GAA1B,OAAO;IACU,YAAY,GAA7B,MAAM;IAEY,MAAM,GAAxB,OAAO;CACf,GAAU,YAAY,CAmCxB;AAuDD;IACI;;;;OAIG;IACH,mBAHW,YAAY,GAAC,YAAY,EAAE,iBAC3B,MAAM,EAKhB;IAFG,uEAAkB;IAClB,sBAAkC;IAGtC;;;OAGG;IACH,YAFa,YAAY,CAsBxB;IAED;;;OAGG;IACH,UAFa,IAAI,CAQhB;IAED;;;;OAIG;IACH,WAHW,MAAM,GACJ,OAAO,CAAC,IAAI,CAAC,CAIzB;CACJ;;IAzsBQ,mBAAY,MAAM,UAAqD;IACrE,qBAAY,MAAM,UAAqD;IACtE,sBAAY,MAAM,0EAC+E;;uBAzI9E,aAAa"}
|
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* A cache implementation backed by the experimental `navigator.crossOriginStorage` API,
|
|
3
|
+
* which allows sharing cached files (identified by content hash) across origins.
|
|
4
|
+
*
|
|
5
|
+
* Implements {@link import('../cache.js').CacheInterface}.
|
|
6
|
+
*
|
|
7
|
+
* @see https://github.com/explainers-by-googlers/cross-origin-storage
|
|
8
|
+
*/
|
|
9
|
+
export class CrossOriginStorage {
|
|
10
|
+
/**
|
|
11
|
+
* Returns whether the `navigator.crossOriginStorage` API is available in the current environment.
|
|
12
|
+
* @returns {boolean}
|
|
13
|
+
*/
|
|
14
|
+
static isAvailable: () => boolean;
|
|
15
|
+
/**
|
|
16
|
+
* Returns (and lazily opens) the hash cache, reusing the same promise across concurrent callers.
|
|
17
|
+
* @returns {Promise<Cache>}
|
|
18
|
+
*/
|
|
19
|
+
_getHashCache: () => Promise<Cache>;
|
|
20
|
+
/**
|
|
21
|
+
* Looks up a cached response for the given URL by resolving its SHA-256 hash and requesting
|
|
22
|
+
* the corresponding file handle from cross-origin storage.
|
|
23
|
+
*
|
|
24
|
+
* Implements `CacheInterface.match`.
|
|
25
|
+
*
|
|
26
|
+
* @param {string} request The URL of the resource to look up.
|
|
27
|
+
* @returns {Promise<Response|undefined>} The cached `Response`, or `undefined` if not found.
|
|
28
|
+
*/
|
|
29
|
+
match: (request: string) => Promise<Response | undefined>;
|
|
30
|
+
/**
|
|
31
|
+
* Stores a response in cross-origin storage, keyed by its SHA-256 hash.
|
|
32
|
+
*
|
|
33
|
+
* For LFS-backed URLs the hash is resolved cheaply via `_getFileHash` (which checks
|
|
34
|
+
* `HASH_CACHE_NAME` first, then falls back to fetching the Git LFS pointer file)
|
|
35
|
+
* without reading the response body a second time.
|
|
36
|
+
*
|
|
37
|
+
* For non-LFS resources the hash is unknown upfront. In that case the body is consumed
|
|
38
|
+
* in the background: the stream is read to compute the content hash, the file is written
|
|
39
|
+
* into cross-origin storage, and the computed hash is persisted to `HASH_CACHE_NAME`
|
|
40
|
+
* so that future `match` calls can resolve the file without a network round-trip.
|
|
41
|
+
*
|
|
42
|
+
* Implements `CacheInterface.put`.
|
|
43
|
+
*
|
|
44
|
+
* @param {string} request The URL of the resource (used as the hash-cache key).
|
|
45
|
+
* @param {Response} response The response whose body will be written to the cache.
|
|
46
|
+
* @returns {Promise<void>}
|
|
47
|
+
*/
|
|
48
|
+
put: (request: string, response: Response) => Promise<void>;
|
|
49
|
+
/**
|
|
50
|
+
* Writes a blob into cross-origin storage using the given pre-computed hex hash string.
|
|
51
|
+
*
|
|
52
|
+
* @param {Blob} blob
|
|
53
|
+
* @param {string} hashHex Hex-encoded SHA-256 hash of `blob`.
|
|
54
|
+
* @returns {Promise<void>}
|
|
55
|
+
*/
|
|
56
|
+
_storeBlobInCOS: (blob: Blob, hashHex: string) => Promise<void>;
|
|
57
|
+
/**
|
|
58
|
+
* Background task for non-LFS resources: consumes `stream`, computes the SHA-256 hash
|
|
59
|
+
* of the resulting blob, stores it in cross-origin storage, and persists the computed
|
|
60
|
+
* hash to `HASH_CACHE_NAME` keyed by `request` so future `match` calls can resolve the
|
|
61
|
+
* file without a network round-trip.
|
|
62
|
+
*
|
|
63
|
+
* Called fire-and-forget from `put` — errors are swallowed so failures never surface to
|
|
64
|
+
* the caller.
|
|
65
|
+
*
|
|
66
|
+
* @param {string} request The original resource URL.
|
|
67
|
+
* @param {ReadableStream} stream The response body stream to consume.
|
|
68
|
+
* @returns {Promise<void>}
|
|
69
|
+
*/
|
|
70
|
+
_processAndStore: (request: string, stream: ReadableStream) => Promise<void>;
|
|
71
|
+
/**
|
|
72
|
+
* Deletes the cache entry for the given request.
|
|
73
|
+
*
|
|
74
|
+
* Removes the hash entry from `HASH_CACHE_NAME`. Note: cross-origin storage itself does not
|
|
75
|
+
* expose a delete API, so only the local hash mapping is removed. For non-LFS URLs this
|
|
76
|
+
* permanently prevents `match` from resolving the file. For LFS-backed URLs, `match` will
|
|
77
|
+
* re-fetch the LFS pointer file on the next call and repopulate the hash cache automatically.
|
|
78
|
+
*
|
|
79
|
+
* Implements `CacheInterface.delete`.
|
|
80
|
+
*
|
|
81
|
+
* @param {string} request
|
|
82
|
+
* @returns {Promise<boolean>} Resolves to `true` if the hash entry was deleted, `false` otherwise.
|
|
83
|
+
*/
|
|
84
|
+
delete: (request: string) => Promise<boolean>;
|
|
85
|
+
/**
|
|
86
|
+
* Resolves the SHA-256 hash for a given URL.
|
|
87
|
+
*
|
|
88
|
+
* Returns the cached hash immediately if one has been persisted to `HASH_CACHE_NAME`.
|
|
89
|
+
* Otherwise falls back to `_getLfsFileHash` to retrieve the hash from the Hugging Face
|
|
90
|
+
* LFS pointer file, persisting the result to `HASH_CACHE_NAME` for future lookups.
|
|
91
|
+
*
|
|
92
|
+
* Returns `null` if the hash cannot be determined (e.g. non-LFS URL with no cached entry).
|
|
93
|
+
*
|
|
94
|
+
* @param {string} url The resource URL to resolve a hash for.
|
|
95
|
+
* @returns {Promise<string|null>} The hex-encoded SHA-256 hash, or `null` if unavailable.
|
|
96
|
+
*/
|
|
97
|
+
_getFileHash: (url: string) => Promise<string | null>;
|
|
98
|
+
/**
|
|
99
|
+
* Attempts to retrieve the SHA-256 hash for a Hugging Face resource URL from its raw
|
|
100
|
+
* Git LFS pointer file.
|
|
101
|
+
*
|
|
102
|
+
* Only applicable to URLs containing `/resolve/` (i.e. Hugging Face resolved file URLs).
|
|
103
|
+
* The `/resolve/` segment is rewritten to `/raw/` to fetch the LFS pointer directly.
|
|
104
|
+
* Returns `null` for non-LFS URLs or when the network request fails.
|
|
105
|
+
*
|
|
106
|
+
* @see https://huggingface.co/docs/hub/en/storage-backends#xet
|
|
107
|
+
* @param {string} url The resolved Hugging Face URL of the resource.
|
|
108
|
+
* @returns {Promise<string|null>} The hex-encoded SHA-256 hash, or `null` if unavailable.
|
|
109
|
+
*/
|
|
110
|
+
_getLfsFileHash: (url: string) => Promise<string | null>;
|
|
111
|
+
/**
|
|
112
|
+
* Computes the SHA-256 hash of a `Blob`'s contents.
|
|
113
|
+
*
|
|
114
|
+
* @param {Blob} blob The blob to hash.
|
|
115
|
+
* @returns {Promise<string>} The lowercase hex-encoded SHA-256 hash.
|
|
116
|
+
*/
|
|
117
|
+
_getBlobHash: (blob: Blob) => Promise<string>;
|
|
118
|
+
#private;
|
|
119
|
+
}
|
|
120
|
+
//# sourceMappingURL=CrossOriginStorageCache.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"CrossOriginStorageCache.d.ts","sourceRoot":"","sources":["../../../src/utils/cache/CrossOriginStorageCache.js"],"names":[],"mappings":"AAiBA;;;;;;;GAOG;AACH;IAaI;;;OAGG;IACH,0BAFa,OAAO,CAE6E;IAbjG;;;OAGG;IACH,qBAFa,OAAO,CAAC,KAAK,CAAC,CAKzB;IAQF;;;;;;;;OAQG;IACH,QAAe,SAHJ,MAGW,KAFT,OAAO,CAAC,QAAQ,GAAC,SAAS,CAAC,CAkBtC;IAEF;;;;;;;;;;;;;;;;;OAiBG;IACH,MAAa,SAJF,MAIS,EAAE,UAHX,QAGmB,KAFjB,OAAO,CAAC,IAAI,CAAC,CAexB;IAEF;;;;;;OAMG;IACH,kBAAyB,MAJd,IAIkB,EAAE,SAHpB,MAG2B,KAFzB,OAAO,CAAC,IAAI,CAAC,CASxB;IAEF;;;;;;;;;;;;OAYG;IACH,mBAA0B,SAJf,MAIsB,EAAE,QAHxB,cAG8B,KAF5B,OAAO,CAAC,IAAI,CAAC,CAuBxB;IAEF;;;;;;;;;;;;OAYG;IACH,SAAgB,SAHL,MAGY,KAFV,OAAO,CAAC,OAAO,CAAC,CAS3B;IAEF;;;;;;;;;;;OAWG;IACH,eAAsB,KAHX,MAGc,KAFZ,OAAO,CAAC,MAAM,GAAC,IAAI,CAAC,CAoB/B;IAEF;;;;;;;;;;;OAWG;IACH,kBAAyB,KAHd,MAGiB,KAFf,OAAO,CAAC,MAAM,GAAC,IAAI,CAAC,CAgB/B;IAEF;;;;;OAKG;IACH,eAAsB,MAHX,IAGe,KAFb,OAAO,CAAC,MAAM,CAAC,CAO1B;;CACL"}
|