@huggingface/transformers 3.2.3 → 3.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +5 -3
- package/dist/ort-wasm-simd-threaded.jsep.wasm +0 -0
- package/dist/ort.bundle.min.mjs +2776 -0
- package/dist/transformers.cjs +792 -330
- package/dist/transformers.cjs.map +1 -1
- package/dist/transformers.js +1150 -656
- package/dist/transformers.js.map +1 -1
- package/dist/transformers.min.cjs +1 -1
- package/dist/transformers.min.cjs.map +1 -1
- package/dist/transformers.min.js +1 -1
- package/dist/transformers.min.js.map +1 -1
- package/dist/transformers.min.mjs +1 -1
- package/dist/transformers.min.mjs.map +1 -1
- package/dist/transformers.mjs +798 -331
- package/dist/transformers.mjs.map +1 -1
- package/package.json +3 -3
- package/src/base/feature_extraction_utils.js +9 -9
- package/src/base/image_processors_utils.js +12 -1
- package/src/base/processing_utils.js +24 -3
- package/src/configs.js +5 -0
- package/src/env.js +1 -2
- package/src/generation/streamers.js +5 -2
- package/src/models/auto/feature_extraction_auto.js +0 -16
- package/src/models/auto/processing_auto.js +0 -16
- package/src/models/convnext/image_processing_convnext.js +1 -0
- package/src/models/efficientnet/image_processing_efficientnet.js +1 -0
- package/src/models/florence2/processing_florence2.js +3 -0
- package/src/models/grounding_dino/image_processing_grounding_dino.js +29 -0
- package/src/models/grounding_dino/processing_grounding_dino.js +101 -0
- package/src/models/idefics3/image_processing_idefics3.js +2 -0
- package/src/models/image_processors.js +1 -0
- package/src/models/janus/image_processing_janus.js +1 -0
- package/src/models/mgp_str/processing_mgp_str.js +2 -0
- package/src/models/paligemma/processing_paligemma.js +1 -0
- package/src/models/phi3_v/processing_phi3_v.js +1 -1
- package/src/models/processors.js +3 -2
- package/src/models/pyannote/feature_extraction_pyannote.js +1 -0
- package/src/models/qwen2_vl/processing_qwen2_vl.js +1 -0
- package/src/models/seamless_m4t/feature_extraction_seamless_m4t.js +2 -2
- package/src/models/whisper/feature_extraction_whisper.js +1 -1
- package/src/models.js +72 -20
- package/src/ops/registry.js +10 -0
- package/src/pipelines.js +73 -23
- package/src/tokenizers.js +4 -7
- package/src/utils/audio.js +113 -1
- package/src/utils/core.js +26 -0
- package/src/utils/dtypes.js +2 -0
- package/src/utils/hub.js +1 -1
- package/src/utils/image.js +5 -18
- package/src/utils/maths.js +8 -6
- package/src/utils/tensor.js +134 -114
- package/types/base/feature_extraction_utils.d.ts +7 -7
- package/types/base/image_processors_utils.d.ts +7 -0
- package/types/base/image_processors_utils.d.ts.map +1 -1
- package/types/base/processing_utils.d.ts +25 -19
- package/types/base/processing_utils.d.ts.map +1 -1
- package/types/configs.d.ts.map +1 -1
- package/types/generation/parameters.d.ts +1 -1
- package/types/generation/streamers.d.ts +3 -1
- package/types/generation/streamers.d.ts.map +1 -1
- package/types/models/auto/feature_extraction_auto.d.ts.map +1 -1
- package/types/models/auto/image_processing_auto.d.ts.map +1 -1
- package/types/models/auto/processing_auto.d.ts.map +1 -1
- package/types/models/convnext/image_processing_convnext.d.ts.map +1 -1
- package/types/models/efficientnet/image_processing_efficientnet.d.ts.map +1 -1
- package/types/models/florence2/processing_florence2.d.ts.map +1 -1
- package/types/models/grounding_dino/image_processing_grounding_dino.d.ts +20 -0
- package/types/models/grounding_dino/image_processing_grounding_dino.d.ts.map +1 -0
- package/types/models/grounding_dino/processing_grounding_dino.d.ts +27 -0
- package/types/models/grounding_dino/processing_grounding_dino.d.ts.map +1 -0
- package/types/models/idefics3/image_processing_idefics3.d.ts.map +1 -1
- package/types/models/image_processors.d.ts +1 -0
- package/types/models/janus/image_processing_janus.d.ts.map +1 -1
- package/types/models/mgp_str/processing_mgp_str.d.ts.map +1 -1
- package/types/models/paligemma/processing_paligemma.d.ts.map +1 -1
- package/types/models/phi3_v/processing_phi3_v.d.ts +6 -2
- package/types/models/phi3_v/processing_phi3_v.d.ts.map +1 -1
- package/types/models/processors.d.ts +3 -2
- package/types/models/pyannote/feature_extraction_pyannote.d.ts.map +1 -1
- package/types/models/qwen2_vl/processing_qwen2_vl.d.ts.map +1 -1
- package/types/models/sapiens/image_processing_sapiens.d.ts +10 -0
- package/types/models/sapiens/image_processing_sapiens.d.ts.map +1 -0
- package/types/models/whisper/generation_whisper.d.ts +1 -1
- package/types/models/whisper/generation_whisper.d.ts.map +1 -1
- package/types/models.d.ts +40 -17
- package/types/models.d.ts.map +1 -1
- package/types/ops/registry.d.ts +1 -0
- package/types/ops/registry.d.ts.map +1 -1
- package/types/pipelines.d.ts +7 -12
- package/types/pipelines.d.ts.map +1 -1
- package/types/tokenizers.d.ts.map +1 -1
- package/types/tsconfig.tsbuildinfo +1 -0
- package/types/utils/audio.d.ts +25 -0
- package/types/utils/audio.d.ts.map +1 -1
- package/types/utils/core.d.ts +6 -0
- package/types/utils/core.d.ts.map +1 -1
- package/types/utils/dtypes.d.ts.map +1 -1
- package/types/utils/hub.d.ts +1 -1
- package/types/utils/hub.d.ts.map +1 -1
- package/types/utils/image.d.ts +3 -2
- package/types/utils/image.d.ts.map +1 -1
- package/types/utils/maths.d.ts +8 -6
- package/types/utils/maths.d.ts.map +1 -1
- package/types/utils/tensor.d.ts +22 -6
- package/types/utils/tensor.d.ts.map +1 -1
package/dist/transformers.cjs
CHANGED
|
@@ -4158,23 +4158,23 @@ class FeatureExtractor extends _utils_generic_js__WEBPACK_IMPORTED_MODULE_1__.Ca
|
|
|
4158
4158
|
}
|
|
4159
4159
|
|
|
4160
4160
|
/**
|
|
4161
|
-
* Instantiate one of the
|
|
4161
|
+
* Instantiate one of the feature extractor classes of the library from a pretrained model.
|
|
4162
4162
|
*
|
|
4163
|
-
* The
|
|
4164
|
-
*
|
|
4163
|
+
* The feature extractor class to instantiate is selected based on the `feature_extractor_type` property of
|
|
4164
|
+
* the config object (either passed as an argument or loaded from `pretrained_model_name_or_path` if possible)
|
|
4165
4165
|
*
|
|
4166
4166
|
* @param {string} pretrained_model_name_or_path The name or path of the pretrained model. Can be either:
|
|
4167
|
-
* - A string, the *model id* of a pretrained
|
|
4167
|
+
* - A string, the *model id* of a pretrained feature_extractor hosted inside a model repo on huggingface.co.
|
|
4168
4168
|
* Valid model ids can be located at the root-level, like `bert-base-uncased`, or namespaced under a
|
|
4169
4169
|
* user or organization name, like `dbmdz/bert-base-german-cased`.
|
|
4170
|
-
* - A path to a *directory* containing
|
|
4171
|
-
* @param {import('../utils/hub.js').PretrainedOptions} options Additional options for loading the
|
|
4170
|
+
* - A path to a *directory* containing feature_extractor files, e.g., `./my_model_directory/`.
|
|
4171
|
+
* @param {import('../utils/hub.js').PretrainedOptions} options Additional options for loading the feature_extractor.
|
|
4172
4172
|
*
|
|
4173
|
-
* @returns {Promise<FeatureExtractor>} A new instance of the
|
|
4173
|
+
* @returns {Promise<FeatureExtractor>} A new instance of the Feature Extractor class.
|
|
4174
4174
|
*/
|
|
4175
4175
|
static async from_pretrained(pretrained_model_name_or_path, options) {
|
|
4176
|
-
const
|
|
4177
|
-
return new this(
|
|
4176
|
+
const config = await (0,_utils_hub_js__WEBPACK_IMPORTED_MODULE_2__.getModelJSON)(pretrained_model_name_or_path, _utils_constants_js__WEBPACK_IMPORTED_MODULE_0__.FEATURE_EXTRACTOR_NAME, true, options);
|
|
4177
|
+
return new this(config);
|
|
4178
4178
|
}
|
|
4179
4179
|
}
|
|
4180
4180
|
|
|
@@ -4207,6 +4207,7 @@ function validate_audio_inputs(audio, feature_extractor) {
|
|
|
4207
4207
|
__webpack_require__.r(__webpack_exports__);
|
|
4208
4208
|
/* harmony export */ __webpack_require__.d(__webpack_exports__, {
|
|
4209
4209
|
/* harmony export */ ImageProcessor: () => (/* binding */ ImageProcessor),
|
|
4210
|
+
/* harmony export */ center_to_corners_format: () => (/* binding */ center_to_corners_format),
|
|
4210
4211
|
/* harmony export */ post_process_instance_segmentation: () => (/* binding */ post_process_instance_segmentation),
|
|
4211
4212
|
/* harmony export */ post_process_object_detection: () => (/* binding */ post_process_object_detection),
|
|
4212
4213
|
/* harmony export */ post_process_panoptic_segmentation: () => (/* binding */ post_process_panoptic_segmentation),
|
|
@@ -4825,14 +4826,20 @@ class ImageProcessor extends _utils_generic_js__WEBPACK_IMPORTED_MODULE_0__.Call
|
|
|
4825
4826
|
this.do_thumbnail = config.do_thumbnail;
|
|
4826
4827
|
this.size = config.size ?? config.image_size;
|
|
4827
4828
|
this.do_resize = config.do_resize ?? (this.size !== undefined);
|
|
4829
|
+
// @ts-expect-error TS2339
|
|
4828
4830
|
this.size_divisibility = config.size_divisibility ?? config.size_divisor;
|
|
4829
4831
|
|
|
4830
4832
|
this.do_center_crop = config.do_center_crop;
|
|
4833
|
+
// @ts-expect-error TS2339
|
|
4831
4834
|
this.crop_size = config.crop_size;
|
|
4835
|
+
// @ts-expect-error TS2339
|
|
4832
4836
|
this.do_convert_rgb = config.do_convert_rgb ?? true;
|
|
4837
|
+
// @ts-expect-error TS2339
|
|
4833
4838
|
this.do_crop_margin = config.do_crop_margin;
|
|
4834
4839
|
|
|
4840
|
+
// @ts-expect-error TS2339
|
|
4835
4841
|
this.pad_size = config.pad_size;
|
|
4842
|
+
// @ts-expect-error TS2339
|
|
4836
4843
|
this.do_pad = config.do_pad;
|
|
4837
4844
|
|
|
4838
4845
|
if (this.do_pad && !this.pad_size && this.size && this.size.width !== undefined && this.size.height !== undefined) {
|
|
@@ -5041,6 +5048,7 @@ class ImageProcessor extends _utils_generic_js__WEBPACK_IMPORTED_MODULE_0__.Call
|
|
|
5041
5048
|
// Support both formats for backwards compatibility
|
|
5042
5049
|
else if (Number.isInteger(size)) {
|
|
5043
5050
|
shortest_edge = size;
|
|
5051
|
+
// @ts-expect-error TS2339
|
|
5044
5052
|
longest_edge = this.config.max_size ?? shortest_edge;
|
|
5045
5053
|
|
|
5046
5054
|
} else if (size !== undefined) {
|
|
@@ -5109,6 +5117,7 @@ class ImageProcessor extends _utils_generic_js__WEBPACK_IMPORTED_MODULE_0__.Call
|
|
|
5109
5117
|
} else if (size.min_pixels !== undefined && size.max_pixels !== undefined) {
|
|
5110
5118
|
// Custom resize logic for Qwen2-VL models
|
|
5111
5119
|
const { min_pixels, max_pixels } = size;
|
|
5120
|
+
// @ts-expect-error TS2339
|
|
5112
5121
|
const factor = this.config.patch_size * this.config.merge_size;
|
|
5113
5122
|
return smart_resize(srcHeight, srcWidth, factor, min_pixels, max_pixels);
|
|
5114
5123
|
} else {
|
|
@@ -5124,6 +5133,7 @@ class ImageProcessor extends _utils_generic_js__WEBPACK_IMPORTED_MODULE_0__.Call
|
|
|
5124
5133
|
async resize(image) {
|
|
5125
5134
|
const [newWidth, newHeight] = this.get_resize_output_image_size(image, this.size);
|
|
5126
5135
|
return await image.resize(newWidth, newHeight, {
|
|
5136
|
+
// @ts-expect-error TS2322
|
|
5127
5137
|
resample: this.resample,
|
|
5128
5138
|
});
|
|
5129
5139
|
}
|
|
@@ -5174,6 +5184,7 @@ class ImageProcessor extends _utils_generic_js__WEBPACK_IMPORTED_MODULE_0__.Call
|
|
|
5174
5184
|
|
|
5175
5185
|
// Resize the image using thumbnail method.
|
|
5176
5186
|
if (this.do_thumbnail) {
|
|
5187
|
+
// @ts-expect-error TS2345
|
|
5177
5188
|
image = await this.thumbnail(image, this.size, this.resample);
|
|
5178
5189
|
}
|
|
5179
5190
|
|
|
@@ -5198,6 +5209,7 @@ class ImageProcessor extends _utils_generic_js__WEBPACK_IMPORTED_MODULE_0__.Call
|
|
|
5198
5209
|
// NOTE: All pixel-level manipulation (i.e., modifying `pixelData`)
|
|
5199
5210
|
// occurs with data in the hwc format (height, width, channels),
|
|
5200
5211
|
// to emulate the behavior of the original Python code (w/ numpy).
|
|
5212
|
+
/** @type {Float32Array} */
|
|
5201
5213
|
let pixelData = Float32Array.from(image.data);
|
|
5202
5214
|
let imgDims = [image.height, image.width, image.channels];
|
|
5203
5215
|
|
|
@@ -5356,6 +5368,7 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
5356
5368
|
/**
|
|
5357
5369
|
* @typedef {Object} ProcessorProperties Additional processor-specific properties.
|
|
5358
5370
|
* @typedef {import('../utils/hub.js').PretrainedOptions & ProcessorProperties} PretrainedProcessorOptions
|
|
5371
|
+
* @typedef {import('../tokenizers.js').PreTrainedTokenizer} PreTrainedTokenizer
|
|
5359
5372
|
*/
|
|
5360
5373
|
|
|
5361
5374
|
|
|
@@ -5389,7 +5402,7 @@ class Processor extends _utils_generic_js__WEBPACK_IMPORTED_MODULE_1__.Callable
|
|
|
5389
5402
|
}
|
|
5390
5403
|
|
|
5391
5404
|
/**
|
|
5392
|
-
* @returns {
|
|
5405
|
+
* @returns {PreTrainedTokenizer|undefined} The tokenizer of the processor, if it exists.
|
|
5393
5406
|
*/
|
|
5394
5407
|
get tokenizer() {
|
|
5395
5408
|
return this.components.tokenizer;
|
|
@@ -5402,6 +5415,11 @@ class Processor extends _utils_generic_js__WEBPACK_IMPORTED_MODULE_1__.Callable
|
|
|
5402
5415
|
return this.components.feature_extractor;
|
|
5403
5416
|
}
|
|
5404
5417
|
|
|
5418
|
+
/**
|
|
5419
|
+
* @param {Parameters<PreTrainedTokenizer['apply_chat_template']>[0]} messages
|
|
5420
|
+
* @param {Parameters<PreTrainedTokenizer['apply_chat_template']>[1]} options
|
|
5421
|
+
* @returns {ReturnType<PreTrainedTokenizer['apply_chat_template']>}
|
|
5422
|
+
*/
|
|
5405
5423
|
apply_chat_template(messages, options = {}) {
|
|
5406
5424
|
if (!this.tokenizer) {
|
|
5407
5425
|
throw new Error('Unable to apply chat template without a tokenizer.');
|
|
@@ -5412,6 +5430,10 @@ class Processor extends _utils_generic_js__WEBPACK_IMPORTED_MODULE_1__.Callable
|
|
|
5412
5430
|
});
|
|
5413
5431
|
}
|
|
5414
5432
|
|
|
5433
|
+
/**
|
|
5434
|
+
* @param {Parameters<PreTrainedTokenizer['batch_decode']>} args
|
|
5435
|
+
* @returns {ReturnType<PreTrainedTokenizer['batch_decode']>}
|
|
5436
|
+
*/
|
|
5415
5437
|
batch_decode(...args) {
|
|
5416
5438
|
if (!this.tokenizer) {
|
|
5417
5439
|
throw new Error('Unable to decode without a tokenizer.');
|
|
@@ -5419,6 +5441,17 @@ class Processor extends _utils_generic_js__WEBPACK_IMPORTED_MODULE_1__.Callable
|
|
|
5419
5441
|
return this.tokenizer.batch_decode(...args);
|
|
5420
5442
|
}
|
|
5421
5443
|
|
|
5444
|
+
/**
|
|
5445
|
+
* @param {Parameters<PreTrainedTokenizer['decode']>} args
|
|
5446
|
+
* @returns {ReturnType<PreTrainedTokenizer['decode']>}
|
|
5447
|
+
*/
|
|
5448
|
+
decode(...args) {
|
|
5449
|
+
if (!this.tokenizer) {
|
|
5450
|
+
throw new Error('Unable to decode without a tokenizer.');
|
|
5451
|
+
}
|
|
5452
|
+
return this.tokenizer.decode(...args);
|
|
5453
|
+
}
|
|
5454
|
+
|
|
5422
5455
|
|
|
5423
5456
|
/**
|
|
5424
5457
|
* Calls the feature_extractor function with the given input.
|
|
@@ -5439,8 +5472,8 @@ class Processor extends _utils_generic_js__WEBPACK_IMPORTED_MODULE_1__.Callable
|
|
|
5439
5472
|
/**
|
|
5440
5473
|
* Instantiate one of the processor classes of the library from a pretrained model.
|
|
5441
5474
|
*
|
|
5442
|
-
* The processor class to instantiate is selected based on the `
|
|
5443
|
-
* (either passed as an argument or loaded from `pretrained_model_name_or_path` if possible)
|
|
5475
|
+
* The processor class to instantiate is selected based on the `image_processor_type` (or `feature_extractor_type`; legacy)
|
|
5476
|
+
* property of the config object (either passed as an argument or loaded from `pretrained_model_name_or_path` if possible)
|
|
5444
5477
|
*
|
|
5445
5478
|
* @param {string} pretrained_model_name_or_path The name or path of the pretrained model. Can be either:
|
|
5446
5479
|
* - A string, the *model id* of a pretrained processor hosted inside a model repo on huggingface.co.
|
|
@@ -5560,15 +5593,19 @@ function getNormalizedConfig(config) {
|
|
|
5560
5593
|
case 'florence2':
|
|
5561
5594
|
case 'llava_onevision':
|
|
5562
5595
|
case 'idefics3':
|
|
5596
|
+
// @ts-expect-error TS2339
|
|
5563
5597
|
init_normalized_config = getNormalizedConfig(config.text_config);
|
|
5564
5598
|
break;
|
|
5565
5599
|
case 'moondream1':
|
|
5600
|
+
// @ts-expect-error TS2339
|
|
5566
5601
|
init_normalized_config = getNormalizedConfig(config.phi_config);
|
|
5567
5602
|
break;
|
|
5568
5603
|
case 'musicgen':
|
|
5604
|
+
// @ts-expect-error TS2339
|
|
5569
5605
|
init_normalized_config = getNormalizedConfig(config.decoder);
|
|
5570
5606
|
break;
|
|
5571
5607
|
case 'multi_modality':
|
|
5608
|
+
// @ts-expect-error TS2339
|
|
5572
5609
|
init_normalized_config = getNormalizedConfig(config.language_config);
|
|
5573
5610
|
break;
|
|
5574
5611
|
|
|
@@ -5689,6 +5726,7 @@ function getNormalizedConfig(config) {
|
|
|
5689
5726
|
break;
|
|
5690
5727
|
|
|
5691
5728
|
case 'vision-encoder-decoder':
|
|
5729
|
+
// @ts-expect-error TS2339
|
|
5692
5730
|
const decoderConfig = getNormalizedConfig(config.decoder);
|
|
5693
5731
|
|
|
5694
5732
|
const add_encoder_pkv = 'num_decoder_layers' in decoderConfig;
|
|
@@ -5932,7 +5970,7 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
5932
5970
|
|
|
5933
5971
|
|
|
5934
5972
|
|
|
5935
|
-
const VERSION = '3.
|
|
5973
|
+
const VERSION = '3.3.0';
|
|
5936
5974
|
|
|
5937
5975
|
// Check if various APIs are available (depends on environment)
|
|
5938
5976
|
const IS_BROWSER_ENV = typeof window !== "undefined" && typeof window.document !== "undefined";
|
|
@@ -6068,7 +6106,6 @@ function isEmpty(obj) {
|
|
|
6068
6106
|
}
|
|
6069
6107
|
|
|
6070
6108
|
|
|
6071
|
-
|
|
6072
6109
|
/***/ }),
|
|
6073
6110
|
|
|
6074
6111
|
/***/ "./src/generation/configuration_utils.js":
|
|
@@ -7683,6 +7720,7 @@ class TextStreamer extends BaseStreamer {
|
|
|
7683
7720
|
* @param {import('../tokenizers.js').PreTrainedTokenizer} tokenizer
|
|
7684
7721
|
* @param {Object} options
|
|
7685
7722
|
* @param {boolean} [options.skip_prompt=false] Whether to skip the prompt tokens
|
|
7723
|
+
* @param {boolean} [options.skip_special_tokens=true] Whether to skip special tokens when decoding
|
|
7686
7724
|
* @param {function(string): void} [options.callback_function=null] Function to call when a piece of text is ready to display
|
|
7687
7725
|
* @param {function(bigint[]): void} [options.token_callback_function=null] Function to call when a new token is generated
|
|
7688
7726
|
* @param {Object} [options.decode_kwargs={}] Additional keyword arguments to pass to the tokenizer's decode method
|
|
@@ -7691,6 +7729,7 @@ class TextStreamer extends BaseStreamer {
|
|
|
7691
7729
|
skip_prompt = false,
|
|
7692
7730
|
callback_function = null,
|
|
7693
7731
|
token_callback_function = null,
|
|
7732
|
+
skip_special_tokens = true,
|
|
7694
7733
|
decode_kwargs = {},
|
|
7695
7734
|
...kwargs
|
|
7696
7735
|
} = {}) {
|
|
@@ -7699,7 +7738,7 @@ class TextStreamer extends BaseStreamer {
|
|
|
7699
7738
|
this.skip_prompt = skip_prompt;
|
|
7700
7739
|
this.callback_function = callback_function ?? stdout_write;
|
|
7701
7740
|
this.token_callback_function = token_callback_function;
|
|
7702
|
-
this.decode_kwargs = { ...decode_kwargs, ...kwargs };
|
|
7741
|
+
this.decode_kwargs = { skip_special_tokens, ...decode_kwargs, ...kwargs };
|
|
7703
7742
|
|
|
7704
7743
|
// variables used in the streaming process
|
|
7705
7744
|
this.token_cache = [];
|
|
@@ -7815,9 +7854,10 @@ class WhisperTextStreamer extends TextStreamer {
|
|
|
7815
7854
|
} = {}) {
|
|
7816
7855
|
super(tokenizer, {
|
|
7817
7856
|
skip_prompt,
|
|
7857
|
+
skip_special_tokens,
|
|
7818
7858
|
callback_function,
|
|
7819
7859
|
token_callback_function,
|
|
7820
|
-
decode_kwargs
|
|
7860
|
+
decode_kwargs,
|
|
7821
7861
|
});
|
|
7822
7862
|
this.timestamp_begin = tokenizer.timestamp_begin;
|
|
7823
7863
|
|
|
@@ -8071,6 +8111,8 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
8071
8111
|
/* harmony export */ GraniteForCausalLM: () => (/* binding */ GraniteForCausalLM),
|
|
8072
8112
|
/* harmony export */ GraniteModel: () => (/* binding */ GraniteModel),
|
|
8073
8113
|
/* harmony export */ GranitePreTrainedModel: () => (/* binding */ GranitePreTrainedModel),
|
|
8114
|
+
/* harmony export */ GroundingDinoForObjectDetection: () => (/* binding */ GroundingDinoForObjectDetection),
|
|
8115
|
+
/* harmony export */ GroundingDinoPreTrainedModel: () => (/* binding */ GroundingDinoPreTrainedModel),
|
|
8074
8116
|
/* harmony export */ GroupViTModel: () => (/* binding */ GroupViTModel),
|
|
8075
8117
|
/* harmony export */ GroupViTPreTrainedModel: () => (/* binding */ GroupViTPreTrainedModel),
|
|
8076
8118
|
/* harmony export */ HieraForImageClassification: () => (/* binding */ HieraForImageClassification),
|
|
@@ -8279,6 +8321,8 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
8279
8321
|
/* harmony export */ Starcoder2ForCausalLM: () => (/* binding */ Starcoder2ForCausalLM),
|
|
8280
8322
|
/* harmony export */ Starcoder2Model: () => (/* binding */ Starcoder2Model),
|
|
8281
8323
|
/* harmony export */ Starcoder2PreTrainedModel: () => (/* binding */ Starcoder2PreTrainedModel),
|
|
8324
|
+
/* harmony export */ StyleTextToSpeech2Model: () => (/* binding */ StyleTextToSpeech2Model),
|
|
8325
|
+
/* harmony export */ StyleTextToSpeech2PreTrainedModel: () => (/* binding */ StyleTextToSpeech2PreTrainedModel),
|
|
8282
8326
|
/* harmony export */ Swin2SRForImageSuperResolution: () => (/* binding */ Swin2SRForImageSuperResolution),
|
|
8283
8327
|
/* harmony export */ Swin2SRModel: () => (/* binding */ Swin2SRModel),
|
|
8284
8328
|
/* harmony export */ Swin2SRPreTrainedModel: () => (/* binding */ Swin2SRPreTrainedModel),
|
|
@@ -8594,8 +8638,11 @@ async function getSession(pretrained_model_name_or_path, fileName, options) {
|
|
|
8594
8638
|
} else if (session_options.externalData !== undefined) {
|
|
8595
8639
|
externalDataPromises = session_options.externalData.map(async (ext) => {
|
|
8596
8640
|
// if the external data is a string, fetch the file and replace the string with its content
|
|
8641
|
+
// @ts-expect-error TS2339
|
|
8597
8642
|
if (typeof ext.data === "string") {
|
|
8643
|
+
// @ts-expect-error TS2339
|
|
8598
8644
|
const ext_buffer = await (0,_utils_hub_js__WEBPACK_IMPORTED_MODULE_5__.getModelFile)(pretrained_model_name_or_path, ext.data, true, options);
|
|
8645
|
+
// @ts-expect-error TS2698
|
|
8599
8646
|
return { ...ext, data: ext_buffer };
|
|
8600
8647
|
}
|
|
8601
8648
|
return ext;
|
|
@@ -8853,14 +8900,23 @@ async function encoderForward(self, model_inputs) {
|
|
|
8853
8900
|
encoderFeeds.inputs_embeds = await self.encode_text({ input_ids: model_inputs.input_ids });
|
|
8854
8901
|
}
|
|
8855
8902
|
if (session.inputNames.includes('token_type_ids') && !encoderFeeds.token_type_ids) {
|
|
8903
|
+
if (!encoderFeeds.input_ids) {
|
|
8904
|
+
throw new Error('Both `input_ids` and `token_type_ids` are missing in the model inputs.');
|
|
8905
|
+
}
|
|
8856
8906
|
// Assign default `token_type_ids` (all zeroes) to the `encoderFeeds` if the model expects it,
|
|
8857
8907
|
// but they weren't created by the tokenizer.
|
|
8858
|
-
encoderFeeds.token_type_ids =
|
|
8859
|
-
'int64',
|
|
8860
|
-
new BigInt64Array(encoderFeeds.input_ids.data.length),
|
|
8861
|
-
encoderFeeds.input_ids.dims
|
|
8862
|
-
)
|
|
8908
|
+
encoderFeeds.token_type_ids = (0,_utils_tensor_js__WEBPACK_IMPORTED_MODULE_9__.zeros_like)(encoderFeeds.input_ids);
|
|
8863
8909
|
}
|
|
8910
|
+
if (session.inputNames.includes('pixel_mask') && !encoderFeeds.pixel_mask) {
|
|
8911
|
+
if (!encoderFeeds.pixel_values) {
|
|
8912
|
+
throw new Error('Both `pixel_values` and `pixel_mask` are missing in the model inputs.');
|
|
8913
|
+
}
|
|
8914
|
+
// Assign default `pixel_mask` (all ones) to the `encoderFeeds` if the model expects it,
|
|
8915
|
+
// but they weren't created by the processor.
|
|
8916
|
+
const dims = encoderFeeds.pixel_values.dims;
|
|
8917
|
+
encoderFeeds.pixel_mask = (0,_utils_tensor_js__WEBPACK_IMPORTED_MODULE_9__.ones)([dims[0], dims[2], dims[3]]);
|
|
8918
|
+
}
|
|
8919
|
+
|
|
8864
8920
|
return await sessionRun(session, encoderFeeds);
|
|
8865
8921
|
}
|
|
8866
8922
|
|
|
@@ -9843,6 +9899,7 @@ class PreTrainedModel extends _utils_generic_js__WEBPACK_IMPORTED_MODULE_3__.Cal
|
|
|
9843
9899
|
if (this.config.model_type === 'musicgen') {
|
|
9844
9900
|
// Custom logic (TODO: move to Musicgen class)
|
|
9845
9901
|
decoder_input_ids = Array.from({
|
|
9902
|
+
// @ts-expect-error TS2339
|
|
9846
9903
|
length: batch_size * this.config.decoder.num_codebooks
|
|
9847
9904
|
}, () => [decoder_start_token_id]);
|
|
9848
9905
|
|
|
@@ -10172,11 +10229,13 @@ class PreTrainedModel extends _utils_generic_js__WEBPACK_IMPORTED_MODULE_3__.Cal
|
|
|
10172
10229
|
async encode_image({ pixel_values }) {
|
|
10173
10230
|
// image_inputs === { pixel_values }
|
|
10174
10231
|
const features = (await sessionRun(this.sessions['vision_encoder'], { pixel_values })).image_features;
|
|
10232
|
+
// @ts-expect-error TS2339
|
|
10175
10233
|
if (!this.config.num_image_tokens) {
|
|
10176
10234
|
console.warn(
|
|
10177
10235
|
'The number of image tokens was not set in the model configuration. ' +
|
|
10178
10236
|
`Setting it to the number of features detected by the vision encoder (${features.dims[1]}).`
|
|
10179
10237
|
)
|
|
10238
|
+
// @ts-expect-error TS2339
|
|
10180
10239
|
this.config.num_image_tokens = features.dims[1];
|
|
10181
10240
|
}
|
|
10182
10241
|
return features;
|
|
@@ -11604,6 +11663,7 @@ class WhisperForConditionalGeneration extends WhisperPreTrainedModel {
|
|
|
11604
11663
|
|
|
11605
11664
|
if (generation_config.return_token_timestamps) {
|
|
11606
11665
|
outputs["token_timestamps"] = this._extract_token_timestamps(
|
|
11666
|
+
// @ts-expect-error TS2345
|
|
11607
11667
|
outputs,
|
|
11608
11668
|
generation_config.alignment_heads,
|
|
11609
11669
|
generation_config.num_frames,
|
|
@@ -11639,6 +11699,7 @@ class WhisperForConditionalGeneration extends WhisperPreTrainedModel {
|
|
|
11639
11699
|
);
|
|
11640
11700
|
}
|
|
11641
11701
|
|
|
11702
|
+
// @ts-expect-error TS2339
|
|
11642
11703
|
let median_filter_width = this.config.median_filter_width;
|
|
11643
11704
|
if (median_filter_width === undefined) {
|
|
11644
11705
|
console.warn("Model config has no `median_filter_width`, using default value of 7.")
|
|
@@ -11649,6 +11710,7 @@ class WhisperForConditionalGeneration extends WhisperPreTrainedModel {
|
|
|
11649
11710
|
const batch = generate_outputs.cross_attentions;
|
|
11650
11711
|
// Create a list with `decoder_layers` elements, each a tensor of shape
|
|
11651
11712
|
// (batch size, attention_heads, output length, input length).
|
|
11713
|
+
// @ts-expect-error TS2339
|
|
11652
11714
|
const cross_attentions = Array.from({ length: this.config.decoder_layers },
|
|
11653
11715
|
// Concatenate the cross attentions for each layer across sequence length dimension.
|
|
11654
11716
|
(_, i) => (0,_utils_tensor_js__WEBPACK_IMPORTED_MODULE_9__.cat)(batch.map(x => x[i]), 2)
|
|
@@ -11792,6 +11854,7 @@ class LlavaForConditionalGeneration extends LlavaPreTrainedModel {
|
|
|
11792
11854
|
attention_mask,
|
|
11793
11855
|
}) {
|
|
11794
11856
|
|
|
11857
|
+
// @ts-expect-error TS2339
|
|
11795
11858
|
const image_token_index = this.config.image_token_index;
|
|
11796
11859
|
|
|
11797
11860
|
const idsList = input_ids.tolist();
|
|
@@ -12777,6 +12840,7 @@ class Qwen2VLForConditionalGeneration extends Qwen2VLPreTrainedModel {
|
|
|
12777
12840
|
const image_nums = vision_tokens.filter(x => x == image_token_id).length;
|
|
12778
12841
|
const video_nums = vision_tokens.filter(x => x == video_token_id).length;
|
|
12779
12842
|
|
|
12843
|
+
/** @type {number[][]} */
|
|
12780
12844
|
let llm_pos_ids_list = [];
|
|
12781
12845
|
let st = 0;
|
|
12782
12846
|
let remain_images = image_nums;
|
|
@@ -12846,6 +12910,7 @@ class Qwen2VLForConditionalGeneration extends Qwen2VLPreTrainedModel {
|
|
|
12846
12910
|
// NOTE: Each item in llm_pos_ids_list is an array of shape (3, text_len),
|
|
12847
12911
|
// meaning to perform concatenation along dim=1, we can do the following:
|
|
12848
12912
|
const num_items = llm_pos_ids_list.reduce((acc, x) => acc + x.length, 0);
|
|
12913
|
+
/** @type {number[]} */
|
|
12849
12914
|
const llm_positions = new Array(num_items);
|
|
12850
12915
|
let index = 0;
|
|
12851
12916
|
for (let x = 0; x < 3; ++x) {
|
|
@@ -12886,9 +12951,10 @@ class Qwen2VLForConditionalGeneration extends Qwen2VLPreTrainedModel {
|
|
|
12886
12951
|
{ length: 3 * data.length },
|
|
12887
12952
|
(_, i) => data[i % data.length]
|
|
12888
12953
|
);
|
|
12954
|
+
/** @type {bigint[]} */
|
|
12889
12955
|
const mrope_position_deltas = Array.from(
|
|
12890
12956
|
{ length: dims[0] },
|
|
12891
|
-
(_, i) => (0,_utils_maths_js__WEBPACK_IMPORTED_MODULE_11__.max)(data.subarray(dims[1] * i, dims[1] * (i + 1)))[0] +
|
|
12957
|
+
(_, i) => (0,_utils_maths_js__WEBPACK_IMPORTED_MODULE_11__.max)(data.subarray(dims[1] * i, dims[1] * (i + 1)))[0] + 1n + BigInt(dims[1])
|
|
12892
12958
|
);
|
|
12893
12959
|
|
|
12894
12960
|
return [
|
|
@@ -13459,7 +13525,7 @@ class DPTModel extends DPTPreTrainedModel { }
|
|
|
13459
13525
|
*
|
|
13460
13526
|
* **Example:** Depth estimation w/ `Xenova/dpt-hybrid-midas`.
|
|
13461
13527
|
* ```javascript
|
|
13462
|
-
* import { DPTForDepthEstimation, AutoProcessor, RawImage,
|
|
13528
|
+
* import { DPTForDepthEstimation, AutoProcessor, RawImage, interpolate_4d } from '@huggingface/transformers';
|
|
13463
13529
|
*
|
|
13464
13530
|
* // Load model and processor
|
|
13465
13531
|
* const model_id = 'Xenova/dpt-hybrid-midas';
|
|
@@ -13468,7 +13534,7 @@ class DPTModel extends DPTPreTrainedModel { }
|
|
|
13468
13534
|
*
|
|
13469
13535
|
* // Load image from URL
|
|
13470
13536
|
* const url = 'http://images.cocodataset.org/val2017/000000039769.jpg';
|
|
13471
|
-
* const image = await RawImage.
|
|
13537
|
+
* const image = await RawImage.read(url);
|
|
13472
13538
|
*
|
|
13473
13539
|
* // Prepare image for the model
|
|
13474
13540
|
* const inputs = await processor(image);
|
|
@@ -13477,10 +13543,15 @@ class DPTModel extends DPTPreTrainedModel { }
|
|
|
13477
13543
|
* const { predicted_depth } = await model(inputs);
|
|
13478
13544
|
*
|
|
13479
13545
|
* // Interpolate to original size
|
|
13480
|
-
* const prediction =
|
|
13546
|
+
* const prediction = (await interpolate_4d(predicted_depth.unsqueeze(1), {
|
|
13547
|
+
* size: image.size.reverse(),
|
|
13548
|
+
* mode: 'bilinear',
|
|
13549
|
+
* })).squeeze(1);
|
|
13481
13550
|
*
|
|
13482
13551
|
* // Visualize the prediction
|
|
13483
|
-
* const
|
|
13552
|
+
* const min = prediction.min().item();
|
|
13553
|
+
* const max = prediction.max().item();
|
|
13554
|
+
* const formatted = prediction.sub_(min).div_(max - min).mul_(255).to('uint8');
|
|
13484
13555
|
* const depth = RawImage.fromTensor(formatted);
|
|
13485
13556
|
* // RawImage {
|
|
13486
13557
|
* // data: Uint8Array(307200) [ 85, 85, 84, ... ],
|
|
@@ -13530,11 +13601,7 @@ class GLPNPreTrainedModel extends PreTrainedModel { }
|
|
|
13530
13601
|
class GLPNModel extends GLPNPreTrainedModel { }
|
|
13531
13602
|
|
|
13532
13603
|
/**
|
|
13533
|
-
*
|
|
13534
|
-
*
|
|
13535
|
-
* **Example:** Depth estimation w/ `Xenova/glpn-kitti`.
|
|
13536
|
-
* ```javascript
|
|
13537
|
-
* import { GLPNForDepthEstimation, AutoProcessor, RawImage, interpolate, max } from '@huggingface/transformers';
|
|
13604
|
+
* import { GLPNForDepthEstimation, AutoProcessor, RawImage, interpolate_4d } from '@huggingface/transformers';
|
|
13538
13605
|
*
|
|
13539
13606
|
* // Load model and processor
|
|
13540
13607
|
* const model_id = 'Xenova/glpn-kitti';
|
|
@@ -13543,7 +13610,7 @@ class GLPNModel extends GLPNPreTrainedModel { }
|
|
|
13543
13610
|
*
|
|
13544
13611
|
* // Load image from URL
|
|
13545
13612
|
* const url = 'http://images.cocodataset.org/val2017/000000039769.jpg';
|
|
13546
|
-
* const image = await RawImage.
|
|
13613
|
+
* const image = await RawImage.read(url);
|
|
13547
13614
|
*
|
|
13548
13615
|
* // Prepare image for the model
|
|
13549
13616
|
* const inputs = await processor(image);
|
|
@@ -13552,13 +13619,18 @@ class GLPNModel extends GLPNPreTrainedModel { }
|
|
|
13552
13619
|
* const { predicted_depth } = await model(inputs);
|
|
13553
13620
|
*
|
|
13554
13621
|
* // Interpolate to original size
|
|
13555
|
-
* const prediction =
|
|
13622
|
+
* const prediction = (await interpolate_4d(predicted_depth.unsqueeze(1), {
|
|
13623
|
+
* size: image.size.reverse(),
|
|
13624
|
+
* mode: 'bilinear',
|
|
13625
|
+
* })).squeeze(1);
|
|
13556
13626
|
*
|
|
13557
13627
|
* // Visualize the prediction
|
|
13558
|
-
* const
|
|
13628
|
+
* const min = prediction.min().item();
|
|
13629
|
+
* const max = prediction.max().item();
|
|
13630
|
+
* const formatted = prediction.sub_(min).div_(max - min).mul_(255).to('uint8');
|
|
13559
13631
|
* const depth = RawImage.fromTensor(formatted);
|
|
13560
13632
|
* // RawImage {
|
|
13561
|
-
* // data: Uint8Array(307200) [
|
|
13633
|
+
* // data: Uint8Array(307200) [ 85, 85, 84, ... ],
|
|
13562
13634
|
* // width: 640,
|
|
13563
13635
|
* // height: 480,
|
|
13564
13636
|
* // channels: 1
|
|
@@ -13733,6 +13805,8 @@ class Dinov2WithRegistersForImageClassification extends Dinov2WithRegistersPreTr
|
|
|
13733
13805
|
}
|
|
13734
13806
|
}
|
|
13735
13807
|
//////////////////////////////////////////////////
|
|
13808
|
+
class GroundingDinoPreTrainedModel extends PreTrainedModel { }
|
|
13809
|
+
class GroundingDinoForObjectDetection extends GroundingDinoPreTrainedModel { }
|
|
13736
13810
|
|
|
13737
13811
|
//////////////////////////////////////////////////
|
|
13738
13812
|
class YolosPreTrainedModel extends PreTrainedModel { }
|
|
@@ -14431,6 +14505,9 @@ class WavLMForAudioFrameClassification extends WavLMPreTrainedModel {
|
|
|
14431
14505
|
}
|
|
14432
14506
|
}
|
|
14433
14507
|
|
|
14508
|
+
class StyleTextToSpeech2PreTrainedModel extends PreTrainedModel { }
|
|
14509
|
+
class StyleTextToSpeech2Model extends StyleTextToSpeech2PreTrainedModel { }
|
|
14510
|
+
|
|
14434
14511
|
//////////////////////////////////////////////////
|
|
14435
14512
|
// SpeechT5 models
|
|
14436
14513
|
/**
|
|
@@ -14525,10 +14602,12 @@ class SpeechT5ForTextToSpeech extends SpeechT5PreTrainedModel {
|
|
|
14525
14602
|
|
|
14526
14603
|
const { encoder_outputs, encoder_attention_mask } = await encoderForward(this, model_inputs);
|
|
14527
14604
|
|
|
14605
|
+
// @ts-expect-error TS2339
|
|
14528
14606
|
const r = encoder_outputs.dims[1] / this.config.reduction_factor;
|
|
14529
14607
|
const maxlen = Math.floor(r * maxlenratio);
|
|
14530
14608
|
const minlen = Math.floor(r * minlenratio);
|
|
14531
14609
|
|
|
14610
|
+
// @ts-expect-error TS2339
|
|
14532
14611
|
const num_mel_bins = this.config.num_mel_bins;
|
|
14533
14612
|
|
|
14534
14613
|
let spectrogramParts = [];
|
|
@@ -14893,11 +14972,13 @@ class MusicgenForConditionalGeneration extends PreTrainedModel { // NOTE: not Mu
|
|
|
14893
14972
|
*/
|
|
14894
14973
|
_apply_and_filter_by_delay_pattern_mask(outputs) {
|
|
14895
14974
|
const [bs_x_codebooks, seqLength] = outputs.dims;
|
|
14975
|
+
// @ts-expect-error TS2339
|
|
14896
14976
|
const num_codebooks = this.config.decoder.num_codebooks;
|
|
14897
14977
|
const upperBound = (seqLength - num_codebooks);
|
|
14898
14978
|
|
|
14899
14979
|
let newDataSize = 0;
|
|
14900
14980
|
for (let i = 0; i < outputs.size; ++i) {
|
|
14981
|
+
// @ts-expect-error TS2339
|
|
14901
14982
|
if (outputs.data[i] === this.config.decoder.pad_token_id) {
|
|
14902
14983
|
continue;
|
|
14903
14984
|
}
|
|
@@ -14927,7 +15008,9 @@ class MusicgenForConditionalGeneration extends PreTrainedModel { // NOTE: not Mu
|
|
|
14927
15008
|
let clonedInputIds = structuredClone(input_ids);
|
|
14928
15009
|
for (let i = 0; i < clonedInputIds.length; ++i) {
|
|
14929
15010
|
for (let j = 0; j < clonedInputIds[i].length; ++j) {
|
|
15011
|
+
// @ts-expect-error TS2339
|
|
14930
15012
|
if ((i % this.config.decoder.num_codebooks) >= j) {
|
|
15013
|
+
// @ts-expect-error TS2339
|
|
14931
15014
|
clonedInputIds[i][j] = BigInt(this.config.decoder.pad_token_id);
|
|
14932
15015
|
}
|
|
14933
15016
|
}
|
|
@@ -15084,6 +15167,9 @@ class MultiModalityCausalLM extends MultiModalityPreTrainedModel {
|
|
|
15084
15167
|
'past_key_values',
|
|
15085
15168
|
];
|
|
15086
15169
|
|
|
15170
|
+
/**
|
|
15171
|
+
* @param {ConstructorParameters<typeof MultiModalityPreTrainedModel>} args
|
|
15172
|
+
*/
|
|
15087
15173
|
constructor(...args) {
|
|
15088
15174
|
super(...args);
|
|
15089
15175
|
|
|
@@ -15385,6 +15471,8 @@ const MODEL_MAPPING_NAMES_ENCODER_ONLY = new Map([
|
|
|
15385
15471
|
|
|
15386
15472
|
['maskformer', ['MaskFormerModel', MaskFormerModel]],
|
|
15387
15473
|
['mgp-str', ['MgpstrForSceneTextRecognition', MgpstrForSceneTextRecognition]],
|
|
15474
|
+
|
|
15475
|
+
['style_text_to_speech_2', ['StyleTextToSpeech2Model', StyleTextToSpeech2Model]],
|
|
15388
15476
|
]);
|
|
15389
15477
|
|
|
15390
15478
|
const MODEL_MAPPING_NAMES_ENCODER_DECODER = new Map([
|
|
@@ -15629,6 +15717,7 @@ const MODEL_FOR_OBJECT_DETECTION_MAPPING_NAMES = new Map([
|
|
|
15629
15717
|
const MODEL_FOR_ZERO_SHOT_OBJECT_DETECTION_MAPPING_NAMES = new Map([
|
|
15630
15718
|
['owlvit', ['OwlViTForObjectDetection', OwlViTForObjectDetection]],
|
|
15631
15719
|
['owlv2', ['Owlv2ForObjectDetection', Owlv2ForObjectDetection]],
|
|
15720
|
+
['grounding-dino', ['GroundingDinoForObjectDetection', GroundingDinoForObjectDetection]],
|
|
15632
15721
|
]);
|
|
15633
15722
|
|
|
15634
15723
|
const MODEL_FOR_IMAGE_SEGMENTATION_MAPPING_NAMES = new Map([
|
|
@@ -16052,10 +16141,17 @@ class SequenceClassifierOutput extends ModelOutput {
|
|
|
16052
16141
|
/**
|
|
16053
16142
|
* @param {Object} output The output of the model.
|
|
16054
16143
|
* @param {Tensor} output.logits classification (or regression if config.num_labels==1) scores (before SoftMax).
|
|
16144
|
+
* @param {Record<string, Tensor>} [output.attentions] Object of `torch.FloatTensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length, sequence_length)`.
|
|
16145
|
+
* Attentions weights after the attention softmax, used to compute the weighted average in the self-attention heads.
|
|
16055
16146
|
*/
|
|
16056
|
-
constructor({ logits }) {
|
|
16147
|
+
constructor({ logits, ...attentions }) {
|
|
16057
16148
|
super();
|
|
16058
16149
|
this.logits = logits;
|
|
16150
|
+
const attentions_list = Object.values(attentions);
|
|
16151
|
+
if (attentions_list.length > 0) {
|
|
16152
|
+
// Only set attentions if they are not empty
|
|
16153
|
+
this.attentions = attentions_list;
|
|
16154
|
+
}
|
|
16059
16155
|
}
|
|
16060
16156
|
}
|
|
16061
16157
|
|
|
@@ -16313,22 +16409,6 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
16313
16409
|
|
|
16314
16410
|
class AutoFeatureExtractor {
|
|
16315
16411
|
|
|
16316
|
-
/**
|
|
16317
|
-
* Instantiate one of the feature extractor classes of the library from a pretrained model.
|
|
16318
|
-
*
|
|
16319
|
-
* The processor class to instantiate is selected based on the `feature_extractor_type` property of
|
|
16320
|
-
* the config object (either passed as an argument or loaded from `pretrained_model_name_or_path` if possible)
|
|
16321
|
-
*
|
|
16322
|
-
* @param {string} pretrained_model_name_or_path The name or path of the pretrained model. Can be either:
|
|
16323
|
-
* - A string, the *model id* of a pretrained processor hosted inside a model repo on huggingface.co.
|
|
16324
|
-
* Valid model ids can be located at the root-level, like `bert-base-uncased`, or namespaced under a
|
|
16325
|
-
* user or organization name, like `dbmdz/bert-base-german-cased`.
|
|
16326
|
-
* - A path to a *directory* containing processor files, e.g., `./my_model_directory/`.
|
|
16327
|
-
* @param {import('../../utils/hub.js').PretrainedOptions} options Additional options for loading the processor.
|
|
16328
|
-
*
|
|
16329
|
-
* @returns {Promise<AllFeatureExtractors.ImageProcessor>} A new instance of the Processor class.
|
|
16330
|
-
*/
|
|
16331
|
-
|
|
16332
16412
|
/** @type {typeof FeatureExtractor.from_pretrained} */
|
|
16333
16413
|
static async from_pretrained(pretrained_model_name_or_path, options={}) {
|
|
16334
16414
|
|
|
@@ -16457,22 +16537,6 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
16457
16537
|
*/
|
|
16458
16538
|
class AutoProcessor {
|
|
16459
16539
|
|
|
16460
|
-
/**
|
|
16461
|
-
* Instantiate one of the processor classes of the library from a pretrained model.
|
|
16462
|
-
*
|
|
16463
|
-
* The processor class to instantiate is selected based on the `image_processor_type` (or `feature_extractor_type`; legacy)
|
|
16464
|
-
* property of the config object (either passed as an argument or loaded from `pretrained_model_name_or_path` if possible)
|
|
16465
|
-
*
|
|
16466
|
-
* @param {string} pretrained_model_name_or_path The name or path of the pretrained model. Can be either:
|
|
16467
|
-
* - A string, the *model id* of a pretrained processor hosted inside a model repo on huggingface.co.
|
|
16468
|
-
* Valid model ids can be located at the root-level, like `bert-base-uncased`, or namespaced under a
|
|
16469
|
-
* user or organization name, like `dbmdz/bert-base-german-cased`.
|
|
16470
|
-
* - A path to a *directory* containing processor files, e.g., `./my_model_directory/`.
|
|
16471
|
-
* @param {import('../../utils/hub.js').PretrainedOptions} options Additional options for loading the processor.
|
|
16472
|
-
*
|
|
16473
|
-
* @returns {Promise<Processor>} A new instance of the Processor class.
|
|
16474
|
-
*/
|
|
16475
|
-
|
|
16476
16540
|
/** @type {typeof Processor.from_pretrained} */
|
|
16477
16541
|
static async from_pretrained(pretrained_model_name_or_path, options={}) {
|
|
16478
16542
|
|
|
@@ -16796,6 +16860,7 @@ class ConvNextImageProcessor extends _base_image_processors_utils_js__WEBPACK_IM
|
|
|
16796
16860
|
/**
|
|
16797
16861
|
* Percentage of the image to crop. Only has an effect if this.size < 384.
|
|
16798
16862
|
*/
|
|
16863
|
+
// @ts-expect-error TS2339
|
|
16799
16864
|
this.crop_pct = this.config.crop_pct ?? (224 / 256);
|
|
16800
16865
|
}
|
|
16801
16866
|
|
|
@@ -17003,6 +17068,7 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
17003
17068
|
class EfficientNetImageProcessor extends _base_image_processors_utils_js__WEBPACK_IMPORTED_MODULE_0__.ImageProcessor {
|
|
17004
17069
|
constructor(config) {
|
|
17005
17070
|
super(config);
|
|
17071
|
+
// @ts-expect-error TS2339
|
|
17006
17072
|
this.include_top = this.config.include_top ?? true;
|
|
17007
17073
|
if (this.include_top) {
|
|
17008
17074
|
this.image_std = this.image_std.map(x => x * x);
|
|
@@ -17086,8 +17152,11 @@ class Florence2Processor extends _base_processing_utils_js__WEBPACK_IMPORTED_MOD
|
|
|
17086
17152
|
super(config, components);
|
|
17087
17153
|
|
|
17088
17154
|
const {
|
|
17155
|
+
// @ts-expect-error TS2339
|
|
17089
17156
|
tasks_answer_post_processing_type,
|
|
17157
|
+
// @ts-expect-error TS2339
|
|
17090
17158
|
task_prompts_without_inputs,
|
|
17159
|
+
// @ts-expect-error TS2339
|
|
17091
17160
|
task_prompts_with_input,
|
|
17092
17161
|
} = this.image_processor.config;
|
|
17093
17162
|
|
|
@@ -17223,6 +17292,170 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
17223
17292
|
class GLPNFeatureExtractor extends _base_image_processors_utils_js__WEBPACK_IMPORTED_MODULE_0__.ImageProcessor { }
|
|
17224
17293
|
|
|
17225
17294
|
|
|
17295
|
+
/***/ }),
|
|
17296
|
+
|
|
17297
|
+
/***/ "./src/models/grounding_dino/image_processing_grounding_dino.js":
|
|
17298
|
+
/*!**********************************************************************!*\
|
|
17299
|
+
!*** ./src/models/grounding_dino/image_processing_grounding_dino.js ***!
|
|
17300
|
+
\**********************************************************************/
|
|
17301
|
+
/***/ ((__unused_webpack___webpack_module__, __webpack_exports__, __webpack_require__) => {
|
|
17302
|
+
|
|
17303
|
+
"use strict";
|
|
17304
|
+
__webpack_require__.r(__webpack_exports__);
|
|
17305
|
+
/* harmony export */ __webpack_require__.d(__webpack_exports__, {
|
|
17306
|
+
/* harmony export */ GroundingDinoImageProcessor: () => (/* binding */ GroundingDinoImageProcessor)
|
|
17307
|
+
/* harmony export */ });
|
|
17308
|
+
/* harmony import */ var _base_image_processors_utils_js__WEBPACK_IMPORTED_MODULE_0__ = __webpack_require__(/*! ../../base/image_processors_utils.js */ "./src/base/image_processors_utils.js");
|
|
17309
|
+
/* harmony import */ var _utils_tensor_js__WEBPACK_IMPORTED_MODULE_1__ = __webpack_require__(/*! ../../utils/tensor.js */ "./src/utils/tensor.js");
|
|
17310
|
+
|
|
17311
|
+
|
|
17312
|
+
|
|
17313
|
+
|
|
17314
|
+
|
|
17315
|
+
/**
|
|
17316
|
+
* @typedef {object} GroundingDinoFeatureExtractorResultProps
|
|
17317
|
+
* @property {import('../../utils/tensor.js').Tensor} pixel_mask
|
|
17318
|
+
* @typedef {import('../../base/image_processors_utils.js').ImageProcessorResult & GroundingDinoFeatureExtractorResultProps} GroundingDinoFeatureExtractorResult
|
|
17319
|
+
*/
|
|
17320
|
+
|
|
17321
|
+
class GroundingDinoImageProcessor extends _base_image_processors_utils_js__WEBPACK_IMPORTED_MODULE_0__.ImageProcessor {
|
|
17322
|
+
/**
|
|
17323
|
+
* Calls the feature extraction process on an array of images, preprocesses
|
|
17324
|
+
* each image, and concatenates the resulting features into a single Tensor.
|
|
17325
|
+
* @param {import('../../utils/image.js').RawImage[]} images The image(s) to extract features from.
|
|
17326
|
+
* @returns {Promise<GroundingDinoFeatureExtractorResult>} An object containing the concatenated pixel values of the preprocessed images.
|
|
17327
|
+
*/
|
|
17328
|
+
async _call(images) {
|
|
17329
|
+
const result = await super._call(images);
|
|
17330
|
+
|
|
17331
|
+
const dims = result.pixel_values.dims;
|
|
17332
|
+
const pixel_mask = (0,_utils_tensor_js__WEBPACK_IMPORTED_MODULE_1__.ones)([dims[0], dims[2], dims[3]]);
|
|
17333
|
+
|
|
17334
|
+
return { ...result, pixel_mask };
|
|
17335
|
+
}
|
|
17336
|
+
}
|
|
17337
|
+
|
|
17338
|
+
|
|
17339
|
+
/***/ }),
|
|
17340
|
+
|
|
17341
|
+
/***/ "./src/models/grounding_dino/processing_grounding_dino.js":
|
|
17342
|
+
/*!****************************************************************!*\
|
|
17343
|
+
!*** ./src/models/grounding_dino/processing_grounding_dino.js ***!
|
|
17344
|
+
\****************************************************************/
|
|
17345
|
+
/***/ ((__unused_webpack___webpack_module__, __webpack_exports__, __webpack_require__) => {
|
|
17346
|
+
|
|
17347
|
+
"use strict";
|
|
17348
|
+
__webpack_require__.r(__webpack_exports__);
|
|
17349
|
+
/* harmony export */ __webpack_require__.d(__webpack_exports__, {
|
|
17350
|
+
/* harmony export */ GroundingDinoProcessor: () => (/* binding */ GroundingDinoProcessor)
|
|
17351
|
+
/* harmony export */ });
|
|
17352
|
+
/* harmony import */ var _base_processing_utils_js__WEBPACK_IMPORTED_MODULE_0__ = __webpack_require__(/*! ../../base/processing_utils.js */ "./src/base/processing_utils.js");
|
|
17353
|
+
/* harmony import */ var _auto_image_processing_auto_js__WEBPACK_IMPORTED_MODULE_1__ = __webpack_require__(/*! ../auto/image_processing_auto.js */ "./src/models/auto/image_processing_auto.js");
|
|
17354
|
+
/* harmony import */ var _tokenizers_js__WEBPACK_IMPORTED_MODULE_2__ = __webpack_require__(/*! ../../tokenizers.js */ "./src/tokenizers.js");
|
|
17355
|
+
/* harmony import */ var _base_image_processors_utils_js__WEBPACK_IMPORTED_MODULE_3__ = __webpack_require__(/*! ../../base/image_processors_utils.js */ "./src/base/image_processors_utils.js");
|
|
17356
|
+
|
|
17357
|
+
|
|
17358
|
+
|
|
17359
|
+
|
|
17360
|
+
|
|
17361
|
+
/**
|
|
17362
|
+
* Get token ids of phrases from posmaps and input_ids.
|
|
17363
|
+
* @param {import('../../utils/tensor.js').Tensor} posmaps A boolean tensor of unbatched text-thresholded logits related to the detected bounding boxes of shape `(hidden_size, )`.
|
|
17364
|
+
* @param {import('../../utils/tensor.js').Tensor} input_ids A tensor of token ids of shape `(sequence_length, )`.
|
|
17365
|
+
*/
|
|
17366
|
+
function get_phrases_from_posmap(posmaps, input_ids) {
|
|
17367
|
+
|
|
17368
|
+
const left_idx = 0;
|
|
17369
|
+
const right_idx = posmaps.dims.at(-1) - 1;
|
|
17370
|
+
|
|
17371
|
+
const posmaps_list = posmaps.tolist();
|
|
17372
|
+
posmaps_list.fill(false, 0, left_idx + 1);
|
|
17373
|
+
posmaps_list.fill(false, right_idx);
|
|
17374
|
+
|
|
17375
|
+
const input_ids_list = input_ids.tolist();
|
|
17376
|
+
return posmaps_list
|
|
17377
|
+
.map((val, idx) => val ? idx : null)
|
|
17378
|
+
.filter(idx => idx !== null)
|
|
17379
|
+
.map(i => input_ids_list[i]);
|
|
17380
|
+
}
|
|
17381
|
+
|
|
17382
|
+
class GroundingDinoProcessor extends _base_processing_utils_js__WEBPACK_IMPORTED_MODULE_0__.Processor {
|
|
17383
|
+
static tokenizer_class = _tokenizers_js__WEBPACK_IMPORTED_MODULE_2__.AutoTokenizer
|
|
17384
|
+
static image_processor_class = _auto_image_processing_auto_js__WEBPACK_IMPORTED_MODULE_1__.AutoImageProcessor
|
|
17385
|
+
|
|
17386
|
+
/**
|
|
17387
|
+
* @typedef {import('../../utils/image.js').RawImage} RawImage
|
|
17388
|
+
*/
|
|
17389
|
+
/**
|
|
17390
|
+
*
|
|
17391
|
+
* @param {RawImage|RawImage[]|RawImage[][]} images
|
|
17392
|
+
* @param {string|string[]} text
|
|
17393
|
+
* @returns {Promise<any>}
|
|
17394
|
+
*/
|
|
17395
|
+
async _call(images, text, options = {}) {
|
|
17396
|
+
|
|
17397
|
+
const image_inputs = images ? await this.image_processor(images, options) : {};
|
|
17398
|
+
const text_inputs = text ? this.tokenizer(text, options) : {};
|
|
17399
|
+
|
|
17400
|
+
return {
|
|
17401
|
+
...text_inputs,
|
|
17402
|
+
...image_inputs,
|
|
17403
|
+
}
|
|
17404
|
+
}
|
|
17405
|
+
post_process_grounded_object_detection(outputs, input_ids, {
|
|
17406
|
+
box_threshold = 0.25,
|
|
17407
|
+
text_threshold = 0.25,
|
|
17408
|
+
target_sizes = null
|
|
17409
|
+
} = {}) {
|
|
17410
|
+
const { logits, pred_boxes } = outputs;
|
|
17411
|
+
const batch_size = logits.dims[0];
|
|
17412
|
+
|
|
17413
|
+
if (target_sizes !== null && target_sizes.length !== batch_size) {
|
|
17414
|
+
throw Error("Make sure that you pass in as many target sizes as the batch dimension of the logits")
|
|
17415
|
+
}
|
|
17416
|
+
const num_queries = logits.dims.at(1);
|
|
17417
|
+
|
|
17418
|
+
const probs = logits.sigmoid(); // (batch_size, num_queries, 256)
|
|
17419
|
+
const scores = probs.max(-1).tolist(); // (batch_size, num_queries)
|
|
17420
|
+
|
|
17421
|
+
// Convert to [x0, y0, x1, y1] format
|
|
17422
|
+
const boxes = pred_boxes.tolist() // (batch_size, num_queries, 4)
|
|
17423
|
+
.map(batch => batch.map(box => (0,_base_image_processors_utils_js__WEBPACK_IMPORTED_MODULE_3__.center_to_corners_format)(box)));
|
|
17424
|
+
|
|
17425
|
+
const results = [];
|
|
17426
|
+
for (let i = 0; i < batch_size; ++i) {
|
|
17427
|
+
const target_size = target_sizes !== null ? target_sizes[i] : null;
|
|
17428
|
+
|
|
17429
|
+
// Convert from relative [0, 1] to absolute [0, height] coordinates
|
|
17430
|
+
if (target_size !== null) {
|
|
17431
|
+
boxes[i] = boxes[i].map(box => box.map((x, j) => x * target_size[(j + 1) % 2]));
|
|
17432
|
+
}
|
|
17433
|
+
|
|
17434
|
+
const batch_scores = scores[i];
|
|
17435
|
+
const final_scores = [];
|
|
17436
|
+
const final_phrases = [];
|
|
17437
|
+
const final_boxes = [];
|
|
17438
|
+
for (let j = 0; j < num_queries; ++j) {
|
|
17439
|
+
const score = batch_scores[j];
|
|
17440
|
+
if (score <= box_threshold) {
|
|
17441
|
+
continue;
|
|
17442
|
+
}
|
|
17443
|
+
const box = boxes[i][j];
|
|
17444
|
+
const prob = probs[i][j];
|
|
17445
|
+
|
|
17446
|
+
final_scores.push(score);
|
|
17447
|
+
final_boxes.push(box);
|
|
17448
|
+
|
|
17449
|
+
const phrases = get_phrases_from_posmap(prob.gt(text_threshold), input_ids[i]);
|
|
17450
|
+
final_phrases.push(phrases);
|
|
17451
|
+
}
|
|
17452
|
+
results.push({ scores: final_scores, boxes: final_boxes, labels: this.batch_decode(final_phrases) });
|
|
17453
|
+
}
|
|
17454
|
+
return results;
|
|
17455
|
+
}
|
|
17456
|
+
}
|
|
17457
|
+
|
|
17458
|
+
|
|
17226
17459
|
/***/ }),
|
|
17227
17460
|
|
|
17228
17461
|
/***/ "./src/models/idefics3/image_processing_idefics3.js":
|
|
@@ -17384,6 +17617,8 @@ class Idefics3ImageProcessor extends _base_image_processors_utils_js__WEBPACK_IM
|
|
|
17384
17617
|
|
|
17385
17618
|
const start_offset = i * pixel_attention_mask_stride + num_patches * h * w;
|
|
17386
17619
|
const end_offset = (i + 1) * pixel_attention_mask_stride;
|
|
17620
|
+
|
|
17621
|
+
// @ts-expect-error
|
|
17387
17622
|
pixel_attention_mask_data.fill(false, start_offset, end_offset);
|
|
17388
17623
|
}
|
|
17389
17624
|
}
|
|
@@ -17652,42 +17887,43 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
17652
17887
|
/* harmony export */ DonutImageProcessor: () => (/* reexport safe */ _donut_image_processing_donut_js__WEBPACK_IMPORTED_MODULE_7__.DonutImageProcessor),
|
|
17653
17888
|
/* harmony export */ EfficientNetImageProcessor: () => (/* reexport safe */ _efficientnet_image_processing_efficientnet_js__WEBPACK_IMPORTED_MODULE_9__.EfficientNetImageProcessor),
|
|
17654
17889
|
/* harmony export */ GLPNFeatureExtractor: () => (/* reexport safe */ _glpn_image_processing_glpn_js__WEBPACK_IMPORTED_MODULE_10__.GLPNFeatureExtractor),
|
|
17655
|
-
/* harmony export */
|
|
17656
|
-
/* harmony export */
|
|
17657
|
-
/* harmony export */
|
|
17658
|
-
/* harmony export */
|
|
17659
|
-
/* harmony export */
|
|
17660
|
-
/* harmony export */
|
|
17661
|
-
/* harmony export */
|
|
17662
|
-
/* harmony export */
|
|
17663
|
-
/* harmony export */
|
|
17664
|
-
/* harmony export */
|
|
17665
|
-
/* harmony export */
|
|
17666
|
-
/* harmony export */
|
|
17667
|
-
/* harmony export */
|
|
17668
|
-
/* harmony export */
|
|
17669
|
-
/* harmony export */
|
|
17670
|
-
/* harmony export */
|
|
17671
|
-
/* harmony export */
|
|
17672
|
-
/* harmony export */
|
|
17673
|
-
/* harmony export */
|
|
17674
|
-
/* harmony export */
|
|
17675
|
-
/* harmony export */
|
|
17676
|
-
/* harmony export */
|
|
17677
|
-
/* harmony export */
|
|
17678
|
-
/* harmony export */
|
|
17679
|
-
/* harmony export */
|
|
17680
|
-
/* harmony export */
|
|
17681
|
-
/* harmony export */
|
|
17682
|
-
/* harmony export */
|
|
17683
|
-
/* harmony export */
|
|
17684
|
-
/* harmony export */
|
|
17685
|
-
/* harmony export */
|
|
17686
|
-
/* harmony export */
|
|
17687
|
-
/* harmony export */
|
|
17688
|
-
/* harmony export */
|
|
17689
|
-
/* harmony export */
|
|
17690
|
-
/* harmony export */
|
|
17890
|
+
/* harmony export */ GroundingDinoImageProcessor: () => (/* reexport safe */ _grounding_dino_image_processing_grounding_dino_js__WEBPACK_IMPORTED_MODULE_11__.GroundingDinoImageProcessor),
|
|
17891
|
+
/* harmony export */ Idefics3ImageProcessor: () => (/* reexport safe */ _idefics3_image_processing_idefics3_js__WEBPACK_IMPORTED_MODULE_12__.Idefics3ImageProcessor),
|
|
17892
|
+
/* harmony export */ JinaCLIPImageProcessor: () => (/* reexport safe */ _jina_clip_image_processing_jina_clip_js__WEBPACK_IMPORTED_MODULE_14__.JinaCLIPImageProcessor),
|
|
17893
|
+
/* harmony export */ LlavaOnevisionImageProcessor: () => (/* reexport safe */ _llava_onevision_image_processing_llava_onevision_js__WEBPACK_IMPORTED_MODULE_15__.LlavaOnevisionImageProcessor),
|
|
17894
|
+
/* harmony export */ Mask2FormerImageProcessor: () => (/* reexport safe */ _mask2former_image_processing_mask2former_js__WEBPACK_IMPORTED_MODULE_16__.Mask2FormerImageProcessor),
|
|
17895
|
+
/* harmony export */ MaskFormerFeatureExtractor: () => (/* reexport safe */ _maskformer_image_processing_maskformer_js__WEBPACK_IMPORTED_MODULE_17__.MaskFormerFeatureExtractor),
|
|
17896
|
+
/* harmony export */ MaskFormerImageProcessor: () => (/* reexport safe */ _maskformer_image_processing_maskformer_js__WEBPACK_IMPORTED_MODULE_17__.MaskFormerImageProcessor),
|
|
17897
|
+
/* harmony export */ MobileNetV1FeatureExtractor: () => (/* reexport safe */ _mobilenet_v1_image_processing_mobilenet_v1_js__WEBPACK_IMPORTED_MODULE_18__.MobileNetV1FeatureExtractor),
|
|
17898
|
+
/* harmony export */ MobileNetV1ImageProcessor: () => (/* reexport safe */ _mobilenet_v1_image_processing_mobilenet_v1_js__WEBPACK_IMPORTED_MODULE_18__.MobileNetV1ImageProcessor),
|
|
17899
|
+
/* harmony export */ MobileNetV2FeatureExtractor: () => (/* reexport safe */ _mobilenet_v2_image_processing_mobilenet_v2_js__WEBPACK_IMPORTED_MODULE_19__.MobileNetV2FeatureExtractor),
|
|
17900
|
+
/* harmony export */ MobileNetV2ImageProcessor: () => (/* reexport safe */ _mobilenet_v2_image_processing_mobilenet_v2_js__WEBPACK_IMPORTED_MODULE_19__.MobileNetV2ImageProcessor),
|
|
17901
|
+
/* harmony export */ MobileNetV3FeatureExtractor: () => (/* reexport safe */ _mobilenet_v3_image_processing_mobilenet_v3_js__WEBPACK_IMPORTED_MODULE_20__.MobileNetV3FeatureExtractor),
|
|
17902
|
+
/* harmony export */ MobileNetV3ImageProcessor: () => (/* reexport safe */ _mobilenet_v3_image_processing_mobilenet_v3_js__WEBPACK_IMPORTED_MODULE_20__.MobileNetV3ImageProcessor),
|
|
17903
|
+
/* harmony export */ MobileNetV4FeatureExtractor: () => (/* reexport safe */ _mobilenet_v4_image_processing_mobilenet_v4_js__WEBPACK_IMPORTED_MODULE_21__.MobileNetV4FeatureExtractor),
|
|
17904
|
+
/* harmony export */ MobileNetV4ImageProcessor: () => (/* reexport safe */ _mobilenet_v4_image_processing_mobilenet_v4_js__WEBPACK_IMPORTED_MODULE_21__.MobileNetV4ImageProcessor),
|
|
17905
|
+
/* harmony export */ MobileViTFeatureExtractor: () => (/* reexport safe */ _mobilevit_image_processing_mobilevit_js__WEBPACK_IMPORTED_MODULE_22__.MobileViTFeatureExtractor),
|
|
17906
|
+
/* harmony export */ MobileViTImageProcessor: () => (/* reexport safe */ _mobilevit_image_processing_mobilevit_js__WEBPACK_IMPORTED_MODULE_22__.MobileViTImageProcessor),
|
|
17907
|
+
/* harmony export */ NougatImageProcessor: () => (/* reexport safe */ _nougat_image_processing_nougat_js__WEBPACK_IMPORTED_MODULE_23__.NougatImageProcessor),
|
|
17908
|
+
/* harmony export */ OwlViTFeatureExtractor: () => (/* reexport safe */ _owlvit_image_processing_owlvit_js__WEBPACK_IMPORTED_MODULE_25__.OwlViTFeatureExtractor),
|
|
17909
|
+
/* harmony export */ OwlViTImageProcessor: () => (/* reexport safe */ _owlvit_image_processing_owlvit_js__WEBPACK_IMPORTED_MODULE_25__.OwlViTImageProcessor),
|
|
17910
|
+
/* harmony export */ Owlv2ImageProcessor: () => (/* reexport safe */ _owlv2_image_processing_owlv2_js__WEBPACK_IMPORTED_MODULE_24__.Owlv2ImageProcessor),
|
|
17911
|
+
/* harmony export */ Phi3VImageProcessor: () => (/* reexport safe */ _phi3_v_image_processing_phi3_v_js__WEBPACK_IMPORTED_MODULE_26__.Phi3VImageProcessor),
|
|
17912
|
+
/* harmony export */ PvtImageProcessor: () => (/* reexport safe */ _pvt_image_processing_pvt_js__WEBPACK_IMPORTED_MODULE_27__.PvtImageProcessor),
|
|
17913
|
+
/* harmony export */ Qwen2VLImageProcessor: () => (/* reexport safe */ _qwen2_vl_image_processing_qwen2_vl_js__WEBPACK_IMPORTED_MODULE_28__.Qwen2VLImageProcessor),
|
|
17914
|
+
/* harmony export */ RTDetrImageProcessor: () => (/* reexport safe */ _rt_detr_image_processing_rt_detr_js__WEBPACK_IMPORTED_MODULE_29__.RTDetrImageProcessor),
|
|
17915
|
+
/* harmony export */ SamImageProcessor: () => (/* reexport safe */ _sam_image_processing_sam_js__WEBPACK_IMPORTED_MODULE_30__.SamImageProcessor),
|
|
17916
|
+
/* harmony export */ SegformerFeatureExtractor: () => (/* reexport safe */ _segformer_image_processing_segformer_js__WEBPACK_IMPORTED_MODULE_31__.SegformerFeatureExtractor),
|
|
17917
|
+
/* harmony export */ SegformerImageProcessor: () => (/* reexport safe */ _segformer_image_processing_segformer_js__WEBPACK_IMPORTED_MODULE_31__.SegformerImageProcessor),
|
|
17918
|
+
/* harmony export */ SiglipImageProcessor: () => (/* reexport safe */ _siglip_image_processing_siglip_js__WEBPACK_IMPORTED_MODULE_32__.SiglipImageProcessor),
|
|
17919
|
+
/* harmony export */ Swin2SRImageProcessor: () => (/* reexport safe */ _swin2sr_image_processing_swin2sr_js__WEBPACK_IMPORTED_MODULE_33__.Swin2SRImageProcessor),
|
|
17920
|
+
/* harmony export */ VLMImageProcessor: () => (/* reexport safe */ _janus_image_processing_janus_js__WEBPACK_IMPORTED_MODULE_13__.VLMImageProcessor),
|
|
17921
|
+
/* harmony export */ ViTFeatureExtractor: () => (/* reexport safe */ _vit_image_processing_vit_js__WEBPACK_IMPORTED_MODULE_34__.ViTFeatureExtractor),
|
|
17922
|
+
/* harmony export */ ViTImageProcessor: () => (/* reexport safe */ _vit_image_processing_vit_js__WEBPACK_IMPORTED_MODULE_34__.ViTImageProcessor),
|
|
17923
|
+
/* harmony export */ VitMatteImageProcessor: () => (/* reexport safe */ _vitmatte_image_processing_vitmatte_js__WEBPACK_IMPORTED_MODULE_35__.VitMatteImageProcessor),
|
|
17924
|
+
/* harmony export */ VitPoseImageProcessor: () => (/* reexport safe */ _vitpose_image_processing_vitpose_js__WEBPACK_IMPORTED_MODULE_36__.VitPoseImageProcessor),
|
|
17925
|
+
/* harmony export */ YolosFeatureExtractor: () => (/* reexport safe */ _yolos_image_processing_yolos_js__WEBPACK_IMPORTED_MODULE_37__.YolosFeatureExtractor),
|
|
17926
|
+
/* harmony export */ YolosImageProcessor: () => (/* reexport safe */ _yolos_image_processing_yolos_js__WEBPACK_IMPORTED_MODULE_37__.YolosImageProcessor)
|
|
17691
17927
|
/* harmony export */ });
|
|
17692
17928
|
/* harmony import */ var _beit_image_processing_beit_js__WEBPACK_IMPORTED_MODULE_0__ = __webpack_require__(/*! ./beit/image_processing_beit.js */ "./src/models/beit/image_processing_beit.js");
|
|
17693
17929
|
/* harmony import */ var _bit_image_processing_bit_js__WEBPACK_IMPORTED_MODULE_1__ = __webpack_require__(/*! ./bit/image_processing_bit.js */ "./src/models/bit/image_processing_bit.js");
|
|
@@ -17700,32 +17936,34 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
17700
17936
|
/* harmony import */ var _dpt_image_processing_dpt_js__WEBPACK_IMPORTED_MODULE_8__ = __webpack_require__(/*! ./dpt/image_processing_dpt.js */ "./src/models/dpt/image_processing_dpt.js");
|
|
17701
17937
|
/* harmony import */ var _efficientnet_image_processing_efficientnet_js__WEBPACK_IMPORTED_MODULE_9__ = __webpack_require__(/*! ./efficientnet/image_processing_efficientnet.js */ "./src/models/efficientnet/image_processing_efficientnet.js");
|
|
17702
17938
|
/* harmony import */ var _glpn_image_processing_glpn_js__WEBPACK_IMPORTED_MODULE_10__ = __webpack_require__(/*! ./glpn/image_processing_glpn.js */ "./src/models/glpn/image_processing_glpn.js");
|
|
17703
|
-
/* harmony import */ var
|
|
17704
|
-
/* harmony import */ var
|
|
17705
|
-
/* harmony import */ var
|
|
17706
|
-
/* harmony import */ var
|
|
17707
|
-
/* harmony import */ var
|
|
17708
|
-
/* harmony import */ var
|
|
17709
|
-
/* harmony import */ var
|
|
17710
|
-
/* harmony import */ var
|
|
17711
|
-
/* harmony import */ var
|
|
17712
|
-
/* harmony import */ var
|
|
17713
|
-
/* harmony import */ var
|
|
17714
|
-
/* harmony import */ var
|
|
17715
|
-
/* harmony import */ var
|
|
17716
|
-
/* harmony import */ var
|
|
17717
|
-
/* harmony import */ var
|
|
17718
|
-
/* harmony import */ var
|
|
17719
|
-
/* harmony import */ var
|
|
17720
|
-
/* harmony import */ var
|
|
17721
|
-
/* harmony import */ var
|
|
17722
|
-
/* harmony import */ var
|
|
17723
|
-
/* harmony import */ var
|
|
17724
|
-
/* harmony import */ var
|
|
17725
|
-
/* harmony import */ var
|
|
17726
|
-
/* harmony import */ var
|
|
17727
|
-
/* harmony import */ var
|
|
17728
|
-
/* harmony import */ var
|
|
17939
|
+
/* harmony import */ var _grounding_dino_image_processing_grounding_dino_js__WEBPACK_IMPORTED_MODULE_11__ = __webpack_require__(/*! ./grounding_dino/image_processing_grounding_dino.js */ "./src/models/grounding_dino/image_processing_grounding_dino.js");
|
|
17940
|
+
/* harmony import */ var _idefics3_image_processing_idefics3_js__WEBPACK_IMPORTED_MODULE_12__ = __webpack_require__(/*! ./idefics3/image_processing_idefics3.js */ "./src/models/idefics3/image_processing_idefics3.js");
|
|
17941
|
+
/* harmony import */ var _janus_image_processing_janus_js__WEBPACK_IMPORTED_MODULE_13__ = __webpack_require__(/*! ./janus/image_processing_janus.js */ "./src/models/janus/image_processing_janus.js");
|
|
17942
|
+
/* harmony import */ var _jina_clip_image_processing_jina_clip_js__WEBPACK_IMPORTED_MODULE_14__ = __webpack_require__(/*! ./jina_clip/image_processing_jina_clip.js */ "./src/models/jina_clip/image_processing_jina_clip.js");
|
|
17943
|
+
/* harmony import */ var _llava_onevision_image_processing_llava_onevision_js__WEBPACK_IMPORTED_MODULE_15__ = __webpack_require__(/*! ./llava_onevision/image_processing_llava_onevision.js */ "./src/models/llava_onevision/image_processing_llava_onevision.js");
|
|
17944
|
+
/* harmony import */ var _mask2former_image_processing_mask2former_js__WEBPACK_IMPORTED_MODULE_16__ = __webpack_require__(/*! ./mask2former/image_processing_mask2former.js */ "./src/models/mask2former/image_processing_mask2former.js");
|
|
17945
|
+
/* harmony import */ var _maskformer_image_processing_maskformer_js__WEBPACK_IMPORTED_MODULE_17__ = __webpack_require__(/*! ./maskformer/image_processing_maskformer.js */ "./src/models/maskformer/image_processing_maskformer.js");
|
|
17946
|
+
/* harmony import */ var _mobilenet_v1_image_processing_mobilenet_v1_js__WEBPACK_IMPORTED_MODULE_18__ = __webpack_require__(/*! ./mobilenet_v1/image_processing_mobilenet_v1.js */ "./src/models/mobilenet_v1/image_processing_mobilenet_v1.js");
|
|
17947
|
+
/* harmony import */ var _mobilenet_v2_image_processing_mobilenet_v2_js__WEBPACK_IMPORTED_MODULE_19__ = __webpack_require__(/*! ./mobilenet_v2/image_processing_mobilenet_v2.js */ "./src/models/mobilenet_v2/image_processing_mobilenet_v2.js");
|
|
17948
|
+
/* harmony import */ var _mobilenet_v3_image_processing_mobilenet_v3_js__WEBPACK_IMPORTED_MODULE_20__ = __webpack_require__(/*! ./mobilenet_v3/image_processing_mobilenet_v3.js */ "./src/models/mobilenet_v3/image_processing_mobilenet_v3.js");
|
|
17949
|
+
/* harmony import */ var _mobilenet_v4_image_processing_mobilenet_v4_js__WEBPACK_IMPORTED_MODULE_21__ = __webpack_require__(/*! ./mobilenet_v4/image_processing_mobilenet_v4.js */ "./src/models/mobilenet_v4/image_processing_mobilenet_v4.js");
|
|
17950
|
+
/* harmony import */ var _mobilevit_image_processing_mobilevit_js__WEBPACK_IMPORTED_MODULE_22__ = __webpack_require__(/*! ./mobilevit/image_processing_mobilevit.js */ "./src/models/mobilevit/image_processing_mobilevit.js");
|
|
17951
|
+
/* harmony import */ var _nougat_image_processing_nougat_js__WEBPACK_IMPORTED_MODULE_23__ = __webpack_require__(/*! ./nougat/image_processing_nougat.js */ "./src/models/nougat/image_processing_nougat.js");
|
|
17952
|
+
/* harmony import */ var _owlv2_image_processing_owlv2_js__WEBPACK_IMPORTED_MODULE_24__ = __webpack_require__(/*! ./owlv2/image_processing_owlv2.js */ "./src/models/owlv2/image_processing_owlv2.js");
|
|
17953
|
+
/* harmony import */ var _owlvit_image_processing_owlvit_js__WEBPACK_IMPORTED_MODULE_25__ = __webpack_require__(/*! ./owlvit/image_processing_owlvit.js */ "./src/models/owlvit/image_processing_owlvit.js");
|
|
17954
|
+
/* harmony import */ var _phi3_v_image_processing_phi3_v_js__WEBPACK_IMPORTED_MODULE_26__ = __webpack_require__(/*! ./phi3_v/image_processing_phi3_v.js */ "./src/models/phi3_v/image_processing_phi3_v.js");
|
|
17955
|
+
/* harmony import */ var _pvt_image_processing_pvt_js__WEBPACK_IMPORTED_MODULE_27__ = __webpack_require__(/*! ./pvt/image_processing_pvt.js */ "./src/models/pvt/image_processing_pvt.js");
|
|
17956
|
+
/* harmony import */ var _qwen2_vl_image_processing_qwen2_vl_js__WEBPACK_IMPORTED_MODULE_28__ = __webpack_require__(/*! ./qwen2_vl/image_processing_qwen2_vl.js */ "./src/models/qwen2_vl/image_processing_qwen2_vl.js");
|
|
17957
|
+
/* harmony import */ var _rt_detr_image_processing_rt_detr_js__WEBPACK_IMPORTED_MODULE_29__ = __webpack_require__(/*! ./rt_detr/image_processing_rt_detr.js */ "./src/models/rt_detr/image_processing_rt_detr.js");
|
|
17958
|
+
/* harmony import */ var _sam_image_processing_sam_js__WEBPACK_IMPORTED_MODULE_30__ = __webpack_require__(/*! ./sam/image_processing_sam.js */ "./src/models/sam/image_processing_sam.js");
|
|
17959
|
+
/* harmony import */ var _segformer_image_processing_segformer_js__WEBPACK_IMPORTED_MODULE_31__ = __webpack_require__(/*! ./segformer/image_processing_segformer.js */ "./src/models/segformer/image_processing_segformer.js");
|
|
17960
|
+
/* harmony import */ var _siglip_image_processing_siglip_js__WEBPACK_IMPORTED_MODULE_32__ = __webpack_require__(/*! ./siglip/image_processing_siglip.js */ "./src/models/siglip/image_processing_siglip.js");
|
|
17961
|
+
/* harmony import */ var _swin2sr_image_processing_swin2sr_js__WEBPACK_IMPORTED_MODULE_33__ = __webpack_require__(/*! ./swin2sr/image_processing_swin2sr.js */ "./src/models/swin2sr/image_processing_swin2sr.js");
|
|
17962
|
+
/* harmony import */ var _vit_image_processing_vit_js__WEBPACK_IMPORTED_MODULE_34__ = __webpack_require__(/*! ./vit/image_processing_vit.js */ "./src/models/vit/image_processing_vit.js");
|
|
17963
|
+
/* harmony import */ var _vitmatte_image_processing_vitmatte_js__WEBPACK_IMPORTED_MODULE_35__ = __webpack_require__(/*! ./vitmatte/image_processing_vitmatte.js */ "./src/models/vitmatte/image_processing_vitmatte.js");
|
|
17964
|
+
/* harmony import */ var _vitpose_image_processing_vitpose_js__WEBPACK_IMPORTED_MODULE_36__ = __webpack_require__(/*! ./vitpose/image_processing_vitpose.js */ "./src/models/vitpose/image_processing_vitpose.js");
|
|
17965
|
+
/* harmony import */ var _yolos_image_processing_yolos_js__WEBPACK_IMPORTED_MODULE_37__ = __webpack_require__(/*! ./yolos/image_processing_yolos.js */ "./src/models/yolos/image_processing_yolos.js");
|
|
17966
|
+
|
|
17729
17967
|
|
|
17730
17968
|
|
|
17731
17969
|
|
|
@@ -17793,6 +18031,7 @@ class VLMImageProcessor extends _base_image_processors_utils_js__WEBPACK_IMPORTE
|
|
|
17793
18031
|
},
|
|
17794
18032
|
...config,
|
|
17795
18033
|
});
|
|
18034
|
+
// @ts-expect-error TS2339
|
|
17796
18035
|
this.constant_values = this.config.background_color.map(x => x * this.rescale_factor)
|
|
17797
18036
|
}
|
|
17798
18037
|
|
|
@@ -18241,6 +18480,8 @@ class MgpstrProcessor extends _base_processing_utils_js__WEBPACK_IMPORTED_MODULE
|
|
|
18241
18480
|
* - bpe_preds: The list of BPE decoded sentences.
|
|
18242
18481
|
* - wp_preds: The list of wp decoded sentences.
|
|
18243
18482
|
*/
|
|
18483
|
+
// @ts-expect-error The type of this method is not compatible with the one
|
|
18484
|
+
// in the base class. It might be a good idea to fix this.
|
|
18244
18485
|
batch_decode([char_logits, bpe_logits, wp_logits]) {
|
|
18245
18486
|
const [char_preds, char_scores] = this._decode_helper(char_logits, 'char');
|
|
18246
18487
|
const [bpe_preds, bpe_scores] = this._decode_helper(bpe_logits, 'bpe');
|
|
@@ -18634,6 +18875,7 @@ class PaliGemmaProcessor extends _base_processing_utils_js__WEBPACK_IMPORTED_MOD
|
|
|
18634
18875
|
}
|
|
18635
18876
|
|
|
18636
18877
|
const bos_token = this.tokenizer.bos_token;
|
|
18878
|
+
// @ts-expect-error TS2339
|
|
18637
18879
|
const image_seq_length = this.image_processor.config.image_seq_length;
|
|
18638
18880
|
let input_strings;
|
|
18639
18881
|
if (text.some((t) => t.includes(IMAGE_TOKEN))) {
|
|
@@ -18886,7 +19128,7 @@ class Phi3VProcessor extends _base_processing_utils_js__WEBPACK_IMPORTED_MODULE_
|
|
|
18886
19128
|
*
|
|
18887
19129
|
* @param {string|string[]} text
|
|
18888
19130
|
* @param {RawImage|RawImage[]} images
|
|
18889
|
-
* @param {
|
|
19131
|
+
* @param { { padding?: boolean, truncation?: boolean, num_crops?: number } | undefined } options
|
|
18890
19132
|
* @returns {Promise<any>}
|
|
18891
19133
|
*/
|
|
18892
19134
|
async _call(text, images = null, {
|
|
@@ -18937,36 +19179,39 @@ class Phi3VProcessor extends _base_processing_utils_js__WEBPACK_IMPORTED_MODULE_
|
|
|
18937
19179
|
__webpack_require__.r(__webpack_exports__);
|
|
18938
19180
|
/* harmony export */ __webpack_require__.d(__webpack_exports__, {
|
|
18939
19181
|
/* harmony export */ Florence2Processor: () => (/* reexport safe */ _florence2_processing_florence2_js__WEBPACK_IMPORTED_MODULE_0__.Florence2Processor),
|
|
18940
|
-
/* harmony export */
|
|
18941
|
-
/* harmony export */
|
|
18942
|
-
/* harmony export */
|
|
18943
|
-
/* harmony export */
|
|
18944
|
-
/* harmony export */
|
|
18945
|
-
/* harmony export */
|
|
18946
|
-
/* harmony export */
|
|
18947
|
-
/* harmony export */
|
|
18948
|
-
/* harmony export */
|
|
18949
|
-
/* harmony export */
|
|
18950
|
-
/* harmony export */
|
|
18951
|
-
/* harmony export */
|
|
18952
|
-
/* harmony export */
|
|
18953
|
-
/* harmony export */
|
|
19182
|
+
/* harmony export */ GroundingDinoProcessor: () => (/* reexport safe */ _grounding_dino_processing_grounding_dino_js__WEBPACK_IMPORTED_MODULE_1__.GroundingDinoProcessor),
|
|
19183
|
+
/* harmony export */ Idefics3Processor: () => (/* reexport safe */ _idefics3_processing_idefics3_js__WEBPACK_IMPORTED_MODULE_2__.Idefics3Processor),
|
|
19184
|
+
/* harmony export */ JinaCLIPProcessor: () => (/* reexport safe */ _jina_clip_processing_jina_clip_js__WEBPACK_IMPORTED_MODULE_4__.JinaCLIPProcessor),
|
|
19185
|
+
/* harmony export */ MgpstrProcessor: () => (/* reexport safe */ _mgp_str_processing_mgp_str_js__WEBPACK_IMPORTED_MODULE_5__.MgpstrProcessor),
|
|
19186
|
+
/* harmony export */ MoonshineProcessor: () => (/* reexport safe */ _moonshine_processing_moonshine_js__WEBPACK_IMPORTED_MODULE_6__.MoonshineProcessor),
|
|
19187
|
+
/* harmony export */ OwlViTProcessor: () => (/* reexport safe */ _owlvit_processing_owlvit_js__WEBPACK_IMPORTED_MODULE_7__.OwlViTProcessor),
|
|
19188
|
+
/* harmony export */ PaliGemmaProcessor: () => (/* reexport safe */ _paligemma_processing_paligemma_js__WEBPACK_IMPORTED_MODULE_9__.PaliGemmaProcessor),
|
|
19189
|
+
/* harmony export */ Phi3VProcessor: () => (/* reexport safe */ _phi3_v_processing_phi3_v_js__WEBPACK_IMPORTED_MODULE_8__.Phi3VProcessor),
|
|
19190
|
+
/* harmony export */ PyAnnoteProcessor: () => (/* reexport safe */ _pyannote_processing_pyannote_js__WEBPACK_IMPORTED_MODULE_10__.PyAnnoteProcessor),
|
|
19191
|
+
/* harmony export */ Qwen2VLProcessor: () => (/* reexport safe */ _qwen2_vl_processing_qwen2_vl_js__WEBPACK_IMPORTED_MODULE_11__.Qwen2VLProcessor),
|
|
19192
|
+
/* harmony export */ SamProcessor: () => (/* reexport safe */ _sam_processing_sam_js__WEBPACK_IMPORTED_MODULE_12__.SamProcessor),
|
|
19193
|
+
/* harmony export */ SpeechT5Processor: () => (/* reexport safe */ _speecht5_processing_speecht5_js__WEBPACK_IMPORTED_MODULE_13__.SpeechT5Processor),
|
|
19194
|
+
/* harmony export */ VLChatProcessor: () => (/* reexport safe */ _janus_processing_janus_js__WEBPACK_IMPORTED_MODULE_3__.VLChatProcessor),
|
|
19195
|
+
/* harmony export */ Wav2Vec2ProcessorWithLM: () => (/* reexport safe */ _wav2vec2_processing_wav2vec2_js__WEBPACK_IMPORTED_MODULE_14__.Wav2Vec2ProcessorWithLM),
|
|
19196
|
+
/* harmony export */ WhisperProcessor: () => (/* reexport safe */ _whisper_processing_whisper_js__WEBPACK_IMPORTED_MODULE_15__.WhisperProcessor)
|
|
18954
19197
|
/* harmony export */ });
|
|
18955
19198
|
/* harmony import */ var _florence2_processing_florence2_js__WEBPACK_IMPORTED_MODULE_0__ = __webpack_require__(/*! ./florence2/processing_florence2.js */ "./src/models/florence2/processing_florence2.js");
|
|
18956
|
-
/* harmony import */ var
|
|
18957
|
-
/* harmony import */ var
|
|
18958
|
-
/* harmony import */ var
|
|
18959
|
-
/* harmony import */ var
|
|
18960
|
-
/* harmony import */ var
|
|
18961
|
-
/* harmony import */ var
|
|
18962
|
-
/* harmony import */ var
|
|
18963
|
-
/* harmony import */ var
|
|
18964
|
-
/* harmony import */ var
|
|
18965
|
-
/* harmony import */ var
|
|
18966
|
-
/* harmony import */ var
|
|
18967
|
-
/* harmony import */ var
|
|
18968
|
-
/* harmony import */ var
|
|
18969
|
-
/* harmony import */ var
|
|
19199
|
+
/* harmony import */ var _grounding_dino_processing_grounding_dino_js__WEBPACK_IMPORTED_MODULE_1__ = __webpack_require__(/*! ./grounding_dino/processing_grounding_dino.js */ "./src/models/grounding_dino/processing_grounding_dino.js");
|
|
19200
|
+
/* harmony import */ var _idefics3_processing_idefics3_js__WEBPACK_IMPORTED_MODULE_2__ = __webpack_require__(/*! ./idefics3/processing_idefics3.js */ "./src/models/idefics3/processing_idefics3.js");
|
|
19201
|
+
/* harmony import */ var _janus_processing_janus_js__WEBPACK_IMPORTED_MODULE_3__ = __webpack_require__(/*! ./janus/processing_janus.js */ "./src/models/janus/processing_janus.js");
|
|
19202
|
+
/* harmony import */ var _jina_clip_processing_jina_clip_js__WEBPACK_IMPORTED_MODULE_4__ = __webpack_require__(/*! ./jina_clip/processing_jina_clip.js */ "./src/models/jina_clip/processing_jina_clip.js");
|
|
19203
|
+
/* harmony import */ var _mgp_str_processing_mgp_str_js__WEBPACK_IMPORTED_MODULE_5__ = __webpack_require__(/*! ./mgp_str/processing_mgp_str.js */ "./src/models/mgp_str/processing_mgp_str.js");
|
|
19204
|
+
/* harmony import */ var _moonshine_processing_moonshine_js__WEBPACK_IMPORTED_MODULE_6__ = __webpack_require__(/*! ./moonshine/processing_moonshine.js */ "./src/models/moonshine/processing_moonshine.js");
|
|
19205
|
+
/* harmony import */ var _owlvit_processing_owlvit_js__WEBPACK_IMPORTED_MODULE_7__ = __webpack_require__(/*! ./owlvit/processing_owlvit.js */ "./src/models/owlvit/processing_owlvit.js");
|
|
19206
|
+
/* harmony import */ var _phi3_v_processing_phi3_v_js__WEBPACK_IMPORTED_MODULE_8__ = __webpack_require__(/*! ./phi3_v/processing_phi3_v.js */ "./src/models/phi3_v/processing_phi3_v.js");
|
|
19207
|
+
/* harmony import */ var _paligemma_processing_paligemma_js__WEBPACK_IMPORTED_MODULE_9__ = __webpack_require__(/*! ./paligemma/processing_paligemma.js */ "./src/models/paligemma/processing_paligemma.js");
|
|
19208
|
+
/* harmony import */ var _pyannote_processing_pyannote_js__WEBPACK_IMPORTED_MODULE_10__ = __webpack_require__(/*! ./pyannote/processing_pyannote.js */ "./src/models/pyannote/processing_pyannote.js");
|
|
19209
|
+
/* harmony import */ var _qwen2_vl_processing_qwen2_vl_js__WEBPACK_IMPORTED_MODULE_11__ = __webpack_require__(/*! ./qwen2_vl/processing_qwen2_vl.js */ "./src/models/qwen2_vl/processing_qwen2_vl.js");
|
|
19210
|
+
/* harmony import */ var _sam_processing_sam_js__WEBPACK_IMPORTED_MODULE_12__ = __webpack_require__(/*! ./sam/processing_sam.js */ "./src/models/sam/processing_sam.js");
|
|
19211
|
+
/* harmony import */ var _speecht5_processing_speecht5_js__WEBPACK_IMPORTED_MODULE_13__ = __webpack_require__(/*! ./speecht5/processing_speecht5.js */ "./src/models/speecht5/processing_speecht5.js");
|
|
19212
|
+
/* harmony import */ var _wav2vec2_processing_wav2vec2_js__WEBPACK_IMPORTED_MODULE_14__ = __webpack_require__(/*! ./wav2vec2/processing_wav2vec2.js */ "./src/models/wav2vec2/processing_wav2vec2.js");
|
|
19213
|
+
/* harmony import */ var _whisper_processing_whisper_js__WEBPACK_IMPORTED_MODULE_15__ = __webpack_require__(/*! ./whisper/processing_whisper.js */ "./src/models/whisper/processing_whisper.js");
|
|
19214
|
+
|
|
18970
19215
|
|
|
18971
19216
|
|
|
18972
19217
|
|
|
@@ -19073,6 +19318,7 @@ class PyAnnoteFeatureExtractor extends _base_feature_extraction_utils_js__WEBPAC
|
|
|
19073
19318
|
|
|
19074
19319
|
let current_speaker = -1;
|
|
19075
19320
|
for (let i = 0; i < scores.length; ++i) {
|
|
19321
|
+
/** @type {number[]} */
|
|
19076
19322
|
const probabilities = (0,_utils_maths_js__WEBPACK_IMPORTED_MODULE_2__.softmax)(scores[i]);
|
|
19077
19323
|
const [score, id] = (0,_utils_maths_js__WEBPACK_IMPORTED_MODULE_2__.max)(probabilities);
|
|
19078
19324
|
const [start, end] = [i, i + 1];
|
|
@@ -19260,6 +19506,7 @@ class Qwen2VLProcessor extends _base_processing_utils_js__WEBPACK_IMPORTED_MODUL
|
|
|
19260
19506
|
}
|
|
19261
19507
|
|
|
19262
19508
|
if (image_grid_thw) {
|
|
19509
|
+
// @ts-expect-error TS2551
|
|
19263
19510
|
let merge_length = this.image_processor.config.merge_size ** 2;
|
|
19264
19511
|
let index = 0;
|
|
19265
19512
|
|
|
@@ -19751,8 +19998,8 @@ class SeamlessM4TFeatureExtractor extends _base_feature_extraction_utils_js__WEB
|
|
|
19751
19998
|
'int64',
|
|
19752
19999
|
new BigInt64Array(numPaddedFrames),
|
|
19753
20000
|
[1, numPaddedFrames],
|
|
19754
|
-
)
|
|
19755
|
-
padded_attention_mask.data.fill(1n, 0, num_frames);
|
|
20001
|
+
);
|
|
20002
|
+
/** @type {BigInt64Array} */ (padded_attention_mask.data).fill(1n, 0, num_frames);
|
|
19756
20003
|
}
|
|
19757
20004
|
}
|
|
19758
20005
|
}
|
|
@@ -20565,7 +20812,7 @@ class WhisperFeatureExtractor extends _base_feature_extraction_utils_js__WEBPACK
|
|
|
20565
20812
|
)
|
|
20566
20813
|
|
|
20567
20814
|
const data = features.data;
|
|
20568
|
-
const maxValue = (0,_utils_maths_js__WEBPACK_IMPORTED_MODULE_3__.max)(data)[0];
|
|
20815
|
+
const maxValue = (0,_utils_maths_js__WEBPACK_IMPORTED_MODULE_3__.max)(/** @type {Float32Array} */(data))[0];
|
|
20569
20816
|
|
|
20570
20817
|
for (let i = 0; i < data.length; ++i) {
|
|
20571
20818
|
data[i] = (Math.max(data[i], maxValue - 8.0) + 4.0) / 4.0;
|
|
@@ -20828,6 +21075,16 @@ class TensorOpRegistry {
|
|
|
20828
21075
|
// executionProviders: ['webgpu'],
|
|
20829
21076
|
};
|
|
20830
21077
|
|
|
21078
|
+
static get nearest_interpolate_4d() {
|
|
21079
|
+
if (!this._nearest_interpolate_4d) {
|
|
21080
|
+
this._nearest_interpolate_4d = wrap(
|
|
21081
|
+
[8, 10, 18, 0, 58, 129, 1, 10, 41, 10, 1, 120, 10, 0, 10, 0, 10, 1, 115, 18, 1, 121, 34, 6, 82, 101, 115, 105, 122, 101, 42, 18, 10, 4, 109, 111, 100, 101, 34, 7, 110, 101, 97, 114, 101, 115, 116, 160, 1, 3, 18, 1, 114, 90, 31, 10, 1, 120, 18, 26, 10, 24, 8, 1, 18, 20, 10, 3, 18, 1, 98, 10, 3, 18, 1, 99, 10, 3, 18, 1, 104, 10, 3, 18, 1, 119, 90, 15, 10, 1, 115, 18, 10, 10, 8, 8, 7, 18, 4, 10, 2, 8, 4, 98, 31, 10, 1, 121, 18, 26, 10, 24, 8, 1, 18, 20, 10, 3, 18, 1, 98, 10, 3, 18, 1, 99, 10, 3, 18, 1, 104, 10, 3, 18, 1, 119, 66, 2, 16, 21],
|
|
21082
|
+
this.session_options,
|
|
21083
|
+
'y',
|
|
21084
|
+
);
|
|
21085
|
+
}
|
|
21086
|
+
return this._nearest_interpolate_4d;
|
|
21087
|
+
}
|
|
20831
21088
|
static get bilinear_interpolate_4d() {
|
|
20832
21089
|
if (!this._bilinear_interpolate_4d) {
|
|
20833
21090
|
this._bilinear_interpolate_4d = wrap(
|
|
@@ -21202,6 +21459,7 @@ class TextClassificationPipeline extends (/** @type {new (options: TextPipelineC
|
|
|
21202
21459
|
|
|
21203
21460
|
// TODO: Use softmax tensor function
|
|
21204
21461
|
const function_to_apply =
|
|
21462
|
+
// @ts-expect-error TS2339
|
|
21205
21463
|
this.model.config.problem_type === 'multi_label_classification'
|
|
21206
21464
|
? batch => batch.sigmoid()
|
|
21207
21465
|
: batch => new _utils_tensor_js__WEBPACK_IMPORTED_MODULE_8__.Tensor(
|
|
@@ -21210,6 +21468,7 @@ class TextClassificationPipeline extends (/** @type {new (options: TextPipelineC
|
|
|
21210
21468
|
batch.dims,
|
|
21211
21469
|
); // single_label_classification (default)
|
|
21212
21470
|
|
|
21471
|
+
// @ts-expect-error TS2339
|
|
21213
21472
|
const id2label = this.model.config.id2label;
|
|
21214
21473
|
|
|
21215
21474
|
const toReturn = [];
|
|
@@ -21312,6 +21571,7 @@ class TokenClassificationPipeline extends (/** @type {new (options: TextPipeline
|
|
|
21312
21571
|
const outputs = await this.model(model_inputs)
|
|
21313
21572
|
|
|
21314
21573
|
const logits = outputs.logits;
|
|
21574
|
+
// @ts-expect-error TS2339
|
|
21315
21575
|
const id2label = this.model.config.id2label;
|
|
21316
21576
|
|
|
21317
21577
|
const toReturn = [];
|
|
@@ -21651,11 +21911,14 @@ class Text2TextGenerationPipeline extends (/** @type {new (options: TextPipeline
|
|
|
21651
21911
|
|
|
21652
21912
|
|
|
21653
21913
|
// Add global prefix, if present
|
|
21914
|
+
// @ts-expect-error TS2339
|
|
21654
21915
|
if (this.model.config.prefix) {
|
|
21916
|
+
// @ts-expect-error TS2339
|
|
21655
21917
|
texts = texts.map(x => this.model.config.prefix + x)
|
|
21656
21918
|
}
|
|
21657
21919
|
|
|
21658
21920
|
// Handle task specific params:
|
|
21921
|
+
// @ts-expect-error TS2339
|
|
21659
21922
|
const task_specific_params = this.model.config.task_specific_params
|
|
21660
21923
|
if (task_specific_params && task_specific_params[this.task]) {
|
|
21661
21924
|
// Add prefixes, if present
|
|
@@ -22394,6 +22657,7 @@ class AudioClassificationPipeline extends (/** @type {new (options: AudioPipelin
|
|
|
22394
22657
|
const sampling_rate = this.processor.feature_extractor.config.sampling_rate;
|
|
22395
22658
|
const preparedAudios = await prepareAudios(audio, sampling_rate);
|
|
22396
22659
|
|
|
22660
|
+
// @ts-expect-error TS2339
|
|
22397
22661
|
const id2label = this.model.config.id2label;
|
|
22398
22662
|
|
|
22399
22663
|
const toReturn = [];
|
|
@@ -22704,6 +22968,7 @@ class AutomaticSpeechRecognitionPipeline extends (/** @type {new (options: TextA
|
|
|
22704
22968
|
audio = [/** @type {AudioInput} */ (audio)];
|
|
22705
22969
|
}
|
|
22706
22970
|
|
|
22971
|
+
// @ts-expect-error TS2339
|
|
22707
22972
|
const time_precision = this.processor.feature_extractor.config.chunk_length / this.model.config.max_source_positions;
|
|
22708
22973
|
const hop_length = this.processor.feature_extractor.config.hop_length;
|
|
22709
22974
|
|
|
@@ -22769,7 +23034,9 @@ class AutomaticSpeechRecognitionPipeline extends (/** @type {new (options: TextA
|
|
|
22769
23034
|
|
|
22770
23035
|
// TODO: Right now we only get top beam
|
|
22771
23036
|
if (return_timestamps === 'word') {
|
|
23037
|
+
// @ts-expect-error TS2339
|
|
22772
23038
|
chunk.tokens = data.sequences.tolist()[0];
|
|
23039
|
+
// @ts-expect-error TS2339
|
|
22773
23040
|
chunk.token_timestamps = data.token_timestamps.tolist()[0].map(
|
|
22774
23041
|
(/** @type {number} */ x) => (0,_utils_maths_js__WEBPACK_IMPORTED_MODULE_6__.round)(x, 2)
|
|
22775
23042
|
);
|
|
@@ -22814,7 +23081,7 @@ class AutomaticSpeechRecognitionPipeline extends (/** @type {new (options: TextA
|
|
|
22814
23081
|
const max_new_tokens = Math.floor(aud.length / sampling_rate) * 6;
|
|
22815
23082
|
const outputs = await this.model.generate({ max_new_tokens, ...kwargs, ...inputs });
|
|
22816
23083
|
|
|
22817
|
-
const text = this.processor.batch_decode(outputs, { skip_special_tokens: true })[0];
|
|
23084
|
+
const text = this.processor.batch_decode(/** @type {Tensor} */(outputs), { skip_special_tokens: true })[0];
|
|
22818
23085
|
toReturn.push({ text });
|
|
22819
23086
|
}
|
|
22820
23087
|
return single ? toReturn[0] : toReturn;
|
|
@@ -22963,6 +23230,7 @@ class ImageClassificationPipeline extends (/** @type {new (options: ImagePipelin
|
|
|
22963
23230
|
const { pixel_values } = await this.processor(preparedImages);
|
|
22964
23231
|
const output = await this.model({ pixel_values });
|
|
22965
23232
|
|
|
23233
|
+
// @ts-expect-error TS2339
|
|
22966
23234
|
const id2label = this.model.config.id2label;
|
|
22967
23235
|
|
|
22968
23236
|
/** @type {ImageClassificationOutput[]} */
|
|
@@ -23077,6 +23345,7 @@ class ImageSegmentationPipeline extends (/** @type {new (options: ImagePipelineC
|
|
|
23077
23345
|
}
|
|
23078
23346
|
}
|
|
23079
23347
|
|
|
23348
|
+
// @ts-expect-error TS2339
|
|
23080
23349
|
const id2label = this.model.config.id2label;
|
|
23081
23350
|
|
|
23082
23351
|
/** @type {ImageSegmentationPipelineOutput[]} */
|
|
@@ -23303,6 +23572,7 @@ class ObjectDetectionPipeline extends (/** @type {new (options: ImagePipelineCon
|
|
|
23303
23572
|
const processed = this.processor.image_processor.post_process_object_detection(output, threshold, imageSizes);
|
|
23304
23573
|
|
|
23305
23574
|
// Add labels
|
|
23575
|
+
// @ts-expect-error TS2339
|
|
23306
23576
|
const id2label = this.model.config.id2label;
|
|
23307
23577
|
|
|
23308
23578
|
// Format output
|
|
@@ -23447,13 +23717,35 @@ class ZeroShotObjectDetectionPipeline extends (/** @type {new (options: TextImag
|
|
|
23447
23717
|
// Run model with both text and pixel inputs
|
|
23448
23718
|
const output = await this.model({ ...text_inputs, pixel_values });
|
|
23449
23719
|
|
|
23450
|
-
|
|
23451
|
-
|
|
23452
|
-
|
|
23453
|
-
|
|
23454
|
-
|
|
23455
|
-
|
|
23456
|
-
|
|
23720
|
+
let result;
|
|
23721
|
+
if('post_process_grounded_object_detection' in this.processor) {
|
|
23722
|
+
// @ts-ignore
|
|
23723
|
+
const processed = this.processor.post_process_grounded_object_detection(
|
|
23724
|
+
output,
|
|
23725
|
+
text_inputs.input_ids,
|
|
23726
|
+
{
|
|
23727
|
+
// TODO: support separate threshold values
|
|
23728
|
+
box_threshold: threshold,
|
|
23729
|
+
text_threshold: threshold,
|
|
23730
|
+
target_sizes: imageSize,
|
|
23731
|
+
},
|
|
23732
|
+
)[0];
|
|
23733
|
+
result = processed.boxes.map((box, i) => ({
|
|
23734
|
+
score: processed.scores[i],
|
|
23735
|
+
label: processed.labels[i],
|
|
23736
|
+
box: get_bounding_box(box, !percentage),
|
|
23737
|
+
}))
|
|
23738
|
+
} else {
|
|
23739
|
+
// @ts-ignore
|
|
23740
|
+
const processed = this.processor.image_processor.post_process_object_detection(output, threshold, imageSize, true)[0];
|
|
23741
|
+
result = processed.boxes.map((box, i) => ({
|
|
23742
|
+
score: processed.scores[i],
|
|
23743
|
+
label: candidate_labels[processed.classes[i]],
|
|
23744
|
+
box: get_bounding_box(box, !percentage),
|
|
23745
|
+
}))
|
|
23746
|
+
}
|
|
23747
|
+
result.sort((a, b) => b.score - a.score);
|
|
23748
|
+
|
|
23457
23749
|
if (top_k !== null) {
|
|
23458
23750
|
result = result.slice(0, top_k);
|
|
23459
23751
|
}
|
|
@@ -23522,6 +23814,7 @@ class DocumentQuestionAnsweringPipeline extends (/** @type {new (options: TextIm
|
|
|
23522
23814
|
// Run model
|
|
23523
23815
|
const output = await this.model.generate({
|
|
23524
23816
|
inputs: pixel_values,
|
|
23817
|
+
// @ts-expect-error TS2339
|
|
23525
23818
|
max_length: this.model.config.decoder.max_position_embeddings,
|
|
23526
23819
|
decoder_input_ids,
|
|
23527
23820
|
...generate_kwargs,
|
|
@@ -23572,7 +23865,7 @@ class DocumentQuestionAnsweringPipeline extends (/** @type {new (options: TextIm
|
|
|
23572
23865
|
* const synthesizer = await pipeline('text-to-speech', 'Xenova/speecht5_tts', { quantized: false });
|
|
23573
23866
|
* const speaker_embeddings = 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/speaker_embeddings.bin';
|
|
23574
23867
|
* const out = await synthesizer('Hello, my dog is cute', { speaker_embeddings });
|
|
23575
|
-
* // {
|
|
23868
|
+
* // RawAudio {
|
|
23576
23869
|
* // audio: Float32Array(26112) [-0.00005657337896991521, 0.00020583874720614403, ...],
|
|
23577
23870
|
* // sampling_rate: 16000
|
|
23578
23871
|
* // }
|
|
@@ -23592,7 +23885,7 @@ class DocumentQuestionAnsweringPipeline extends (/** @type {new (options: TextIm
|
|
|
23592
23885
|
* ```javascript
|
|
23593
23886
|
* const synthesizer = await pipeline('text-to-speech', 'Xenova/mms-tts-fra');
|
|
23594
23887
|
* const out = await synthesizer('Bonjour');
|
|
23595
|
-
* // {
|
|
23888
|
+
* // RawAudio {
|
|
23596
23889
|
* // audio: Float32Array(23808) [-0.00037693005288019776, 0.0003325853613205254, ...],
|
|
23597
23890
|
* // sampling_rate: 16000
|
|
23598
23891
|
* // }
|
|
@@ -23637,11 +23930,12 @@ class TextToAudioPipeline extends (/** @type {new (options: TextToAudioPipelineC
|
|
|
23637
23930
|
// Generate waveform
|
|
23638
23931
|
const { waveform } = await this.model(inputs);
|
|
23639
23932
|
|
|
23933
|
+
// @ts-expect-error TS2339
|
|
23640
23934
|
const sampling_rate = this.model.config.sampling_rate;
|
|
23641
|
-
return
|
|
23642
|
-
|
|
23935
|
+
return new _utils_audio_js__WEBPACK_IMPORTED_MODULE_7__.RawAudio(
|
|
23936
|
+
waveform.data,
|
|
23643
23937
|
sampling_rate,
|
|
23644
|
-
|
|
23938
|
+
)
|
|
23645
23939
|
}
|
|
23646
23940
|
|
|
23647
23941
|
async _call_text_to_spectrogram(text_inputs, { speaker_embeddings }) {
|
|
@@ -23681,10 +23975,10 @@ class TextToAudioPipeline extends (/** @type {new (options: TextToAudioPipelineC
|
|
|
23681
23975
|
const { waveform } = await this.model.generate_speech(input_ids, speaker_embeddings, { vocoder: this.vocoder });
|
|
23682
23976
|
|
|
23683
23977
|
const sampling_rate = this.processor.feature_extractor.config.sampling_rate;
|
|
23684
|
-
return
|
|
23685
|
-
|
|
23978
|
+
return new _utils_audio_js__WEBPACK_IMPORTED_MODULE_7__.RawAudio(
|
|
23979
|
+
waveform.data,
|
|
23686
23980
|
sampling_rate,
|
|
23687
|
-
|
|
23981
|
+
)
|
|
23688
23982
|
}
|
|
23689
23983
|
}
|
|
23690
23984
|
|
|
@@ -23794,11 +24088,23 @@ class DepthEstimationPipeline extends (/** @type {new (options: ImagePipelineCon
|
|
|
23794
24088
|
|
|
23795
24089
|
const toReturn = [];
|
|
23796
24090
|
for (let i = 0; i < preparedImages.length; ++i) {
|
|
23797
|
-
const
|
|
23798
|
-
const
|
|
24091
|
+
const batch = predicted_depth[i];
|
|
24092
|
+
const [height, width] = batch.dims.slice(-2);
|
|
24093
|
+
const [new_width, new_height] = preparedImages[i].size;
|
|
24094
|
+
|
|
24095
|
+
// Interpolate to original size
|
|
24096
|
+
const prediction = (await (0,_utils_tensor_js__WEBPACK_IMPORTED_MODULE_8__.interpolate_4d)(batch.view(1, 1, height, width), {
|
|
24097
|
+
size: [new_height, new_width],
|
|
24098
|
+
mode: 'bilinear',
|
|
24099
|
+
})).view(new_height, new_width);
|
|
24100
|
+
|
|
24101
|
+
const minval = /** @type {number} */(prediction.min().item());
|
|
24102
|
+
const maxval = /** @type {number} */(prediction.max().item());
|
|
24103
|
+
const formatted = prediction.sub(minval).div_(maxval - minval).mul_(255).to('uint8').unsqueeze(0);
|
|
24104
|
+
const depth = _utils_image_js__WEBPACK_IMPORTED_MODULE_9__.RawImage.fromTensor(formatted);
|
|
23799
24105
|
toReturn.push({
|
|
23800
|
-
predicted_depth:
|
|
23801
|
-
depth
|
|
24106
|
+
predicted_depth: prediction,
|
|
24107
|
+
depth,
|
|
23802
24108
|
});
|
|
23803
24109
|
}
|
|
23804
24110
|
|
|
@@ -24278,6 +24584,7 @@ async function loadItems(mapping, model, pretrainedOptions) {
|
|
|
24278
24584
|
return result;
|
|
24279
24585
|
}
|
|
24280
24586
|
|
|
24587
|
+
|
|
24281
24588
|
/***/ }),
|
|
24282
24589
|
|
|
24283
24590
|
/***/ "./src/tokenizers.js":
|
|
@@ -24347,7 +24654,6 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
24347
24654
|
/* harmony import */ var _utils_data_structures_js__WEBPACK_IMPORTED_MODULE_5__ = __webpack_require__(/*! ./utils/data-structures.js */ "./src/utils/data-structures.js");
|
|
24348
24655
|
/* harmony import */ var _huggingface_jinja__WEBPACK_IMPORTED_MODULE_6__ = __webpack_require__(/*! @huggingface/jinja */ "./node_modules/@huggingface/jinja/dist/index.js");
|
|
24349
24656
|
/* harmony import */ var _models_whisper_common_whisper_js__WEBPACK_IMPORTED_MODULE_7__ = __webpack_require__(/*! ./models/whisper/common_whisper.js */ "./src/models/whisper/common_whisper.js");
|
|
24350
|
-
/* harmony import */ var _utils_constants_js__WEBPACK_IMPORTED_MODULE_8__ = __webpack_require__(/*! ./utils/constants.js */ "./src/utils/constants.js");
|
|
24351
24657
|
|
|
24352
24658
|
/**
|
|
24353
24659
|
* @file Tokenizers are used to prepare textual inputs for a model.
|
|
@@ -24384,7 +24690,6 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
24384
24690
|
|
|
24385
24691
|
|
|
24386
24692
|
|
|
24387
|
-
|
|
24388
24693
|
/**
|
|
24389
24694
|
* @typedef {Object} TokenizerProperties Additional tokenizer-specific properties.
|
|
24390
24695
|
* @property {boolean} [legacy=false] Whether or not the `legacy` behavior of the tokenizer should be used.
|
|
@@ -24868,7 +25173,7 @@ class Unigram extends TokenizerModel {
|
|
|
24868
25173
|
* Create a new Unigram tokenizer model.
|
|
24869
25174
|
* @param {Object} config The configuration object for the Unigram model.
|
|
24870
25175
|
* @param {number} config.unk_id The ID of the unknown token
|
|
24871
|
-
* @param {
|
|
25176
|
+
* @param {[string, number][]} config.vocab A 2D array representing a mapping of tokens to scores.
|
|
24872
25177
|
* @param {Object} moreConfig Additional configuration object for the Unigram model.
|
|
24873
25178
|
*/
|
|
24874
25179
|
constructor(config, moreConfig) {
|
|
@@ -24876,11 +25181,10 @@ class Unigram extends TokenizerModel {
|
|
|
24876
25181
|
|
|
24877
25182
|
const vocabSize = config.vocab.length;
|
|
24878
25183
|
this.vocab = new Array(vocabSize);
|
|
25184
|
+
/** @type {number[]} */
|
|
24879
25185
|
this.scores = new Array(vocabSize);
|
|
24880
25186
|
for (let i = 0; i < vocabSize; ++i) {
|
|
24881
|
-
|
|
24882
|
-
this.vocab[i] = piece[0];
|
|
24883
|
-
this.scores[i] = piece[1];
|
|
25187
|
+
[this.vocab[i], this.scores[i]] = config.vocab[i];
|
|
24884
25188
|
}
|
|
24885
25189
|
|
|
24886
25190
|
this.unk_token_id = config.unk_id;
|
|
@@ -28720,6 +29024,7 @@ class AutoTokenizer {
|
|
|
28720
29024
|
"use strict";
|
|
28721
29025
|
__webpack_require__.r(__webpack_exports__);
|
|
28722
29026
|
/* harmony export */ __webpack_require__.d(__webpack_exports__, {
|
|
29027
|
+
/* harmony export */ RawAudio: () => (/* binding */ RawAudio),
|
|
28723
29028
|
/* harmony export */ hamming: () => (/* binding */ hamming),
|
|
28724
29029
|
/* harmony export */ hanning: () => (/* binding */ hanning),
|
|
28725
29030
|
/* harmony export */ mel_filter_bank: () => (/* binding */ mel_filter_bank),
|
|
@@ -28730,7 +29035,9 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
28730
29035
|
/* harmony import */ var _hub_js__WEBPACK_IMPORTED_MODULE_0__ = __webpack_require__(/*! ./hub.js */ "./src/utils/hub.js");
|
|
28731
29036
|
/* harmony import */ var _maths_js__WEBPACK_IMPORTED_MODULE_1__ = __webpack_require__(/*! ./maths.js */ "./src/utils/maths.js");
|
|
28732
29037
|
/* harmony import */ var _core_js__WEBPACK_IMPORTED_MODULE_2__ = __webpack_require__(/*! ./core.js */ "./src/utils/core.js");
|
|
28733
|
-
/* harmony import */ var
|
|
29038
|
+
/* harmony import */ var _env_js__WEBPACK_IMPORTED_MODULE_3__ = __webpack_require__(/*! ../env.js */ "./src/env.js");
|
|
29039
|
+
/* harmony import */ var fs__WEBPACK_IMPORTED_MODULE_4__ = __webpack_require__(/*! fs */ "fs");
|
|
29040
|
+
/* harmony import */ var _tensor_js__WEBPACK_IMPORTED_MODULE_5__ = __webpack_require__(/*! ./tensor.js */ "./src/utils/tensor.js");
|
|
28734
29041
|
/**
|
|
28735
29042
|
* @file Helper module for audio processing.
|
|
28736
29043
|
*
|
|
@@ -28746,6 +29053,8 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
28746
29053
|
|
|
28747
29054
|
|
|
28748
29055
|
|
|
29056
|
+
|
|
29057
|
+
|
|
28749
29058
|
/**
|
|
28750
29059
|
* Helper function to read audio from a path/URL.
|
|
28751
29060
|
* @param {string|URL} url The path/URL to load the audio from.
|
|
@@ -29339,10 +29648,10 @@ async function spectrogram(
|
|
|
29339
29648
|
// - mel_filters.shape=(80, 201)
|
|
29340
29649
|
// - magnitudes.shape=(3000, 201) => magnitudes.T.shape=(201, 3000)
|
|
29341
29650
|
// - mel_spec.shape=(80, 3000)
|
|
29342
|
-
let mel_spec = await (0,
|
|
29651
|
+
let mel_spec = await (0,_tensor_js__WEBPACK_IMPORTED_MODULE_5__.matmul)(
|
|
29343
29652
|
// TODO: Make `mel_filters` a Tensor during initialization
|
|
29344
|
-
new
|
|
29345
|
-
new
|
|
29653
|
+
new _tensor_js__WEBPACK_IMPORTED_MODULE_5__.Tensor('float32', mel_filters.flat(), [num_mel_filters, num_frequency_bins]),
|
|
29654
|
+
new _tensor_js__WEBPACK_IMPORTED_MODULE_5__.Tensor('float32', transposedMagnitudeData, [num_frequency_bins, d1Max]),
|
|
29346
29655
|
);
|
|
29347
29656
|
if (transpose) {
|
|
29348
29657
|
mel_spec = mel_spec.transpose(1, 0);
|
|
@@ -29432,6 +29741,116 @@ function window_function(window_length, name, {
|
|
|
29432
29741
|
return window;
|
|
29433
29742
|
}
|
|
29434
29743
|
|
|
29744
|
+
/**
|
|
29745
|
+
* Encode audio data to a WAV file.
|
|
29746
|
+
* WAV file specs : https://en.wikipedia.org/wiki/WAV#WAV_File_header
|
|
29747
|
+
*
|
|
29748
|
+
* Adapted from https://www.npmjs.com/package/audiobuffer-to-wav
|
|
29749
|
+
* @param {Float32Array} samples The audio samples.
|
|
29750
|
+
* @param {number} rate The sample rate.
|
|
29751
|
+
* @returns {ArrayBuffer} The WAV audio buffer.
|
|
29752
|
+
*/
|
|
29753
|
+
function encodeWAV(samples, rate) {
|
|
29754
|
+
let offset = 44;
|
|
29755
|
+
const buffer = new ArrayBuffer(offset + samples.length * 4);
|
|
29756
|
+
const view = new DataView(buffer);
|
|
29757
|
+
|
|
29758
|
+
/* RIFF identifier */
|
|
29759
|
+
writeString(view, 0, "RIFF");
|
|
29760
|
+
/* RIFF chunk length */
|
|
29761
|
+
view.setUint32(4, 36 + samples.length * 4, true);
|
|
29762
|
+
/* RIFF type */
|
|
29763
|
+
writeString(view, 8, "WAVE");
|
|
29764
|
+
/* format chunk identifier */
|
|
29765
|
+
writeString(view, 12, "fmt ");
|
|
29766
|
+
/* format chunk length */
|
|
29767
|
+
view.setUint32(16, 16, true);
|
|
29768
|
+
/* sample format (raw) */
|
|
29769
|
+
view.setUint16(20, 3, true);
|
|
29770
|
+
/* channel count */
|
|
29771
|
+
view.setUint16(22, 1, true);
|
|
29772
|
+
/* sample rate */
|
|
29773
|
+
view.setUint32(24, rate, true);
|
|
29774
|
+
/* byte rate (sample rate * block align) */
|
|
29775
|
+
view.setUint32(28, rate * 4, true);
|
|
29776
|
+
/* block align (channel count * bytes per sample) */
|
|
29777
|
+
view.setUint16(32, 4, true);
|
|
29778
|
+
/* bits per sample */
|
|
29779
|
+
view.setUint16(34, 32, true);
|
|
29780
|
+
/* data chunk identifier */
|
|
29781
|
+
writeString(view, 36, "data");
|
|
29782
|
+
/* data chunk length */
|
|
29783
|
+
view.setUint32(40, samples.length * 4, true);
|
|
29784
|
+
|
|
29785
|
+
for (let i = 0; i < samples.length; ++i, offset += 4) {
|
|
29786
|
+
view.setFloat32(offset, samples[i], true);
|
|
29787
|
+
}
|
|
29788
|
+
|
|
29789
|
+
return buffer;
|
|
29790
|
+
}
|
|
29791
|
+
|
|
29792
|
+
function writeString(view, offset, string) {
|
|
29793
|
+
for (let i = 0; i < string.length; ++i) {
|
|
29794
|
+
view.setUint8(offset + i, string.charCodeAt(i));
|
|
29795
|
+
}
|
|
29796
|
+
}
|
|
29797
|
+
|
|
29798
|
+
|
|
29799
|
+
class RawAudio {
|
|
29800
|
+
|
|
29801
|
+
/**
|
|
29802
|
+
* Create a new `RawAudio` object.
|
|
29803
|
+
* @param {Float32Array} audio Audio data
|
|
29804
|
+
* @param {number} sampling_rate Sampling rate of the audio data
|
|
29805
|
+
*/
|
|
29806
|
+
constructor(audio, sampling_rate) {
|
|
29807
|
+
this.audio = audio
|
|
29808
|
+
this.sampling_rate = sampling_rate
|
|
29809
|
+
}
|
|
29810
|
+
|
|
29811
|
+
/**
|
|
29812
|
+
* Convert the audio to a wav file buffer.
|
|
29813
|
+
* @returns {ArrayBuffer} The WAV file.
|
|
29814
|
+
*/
|
|
29815
|
+
toWav() {
|
|
29816
|
+
return encodeWAV(this.audio, this.sampling_rate)
|
|
29817
|
+
}
|
|
29818
|
+
|
|
29819
|
+
/**
|
|
29820
|
+
* Convert the audio to a blob.
|
|
29821
|
+
* @returns {Blob}
|
|
29822
|
+
*/
|
|
29823
|
+
toBlob() {
|
|
29824
|
+
const wav = this.toWav();
|
|
29825
|
+
const blob = new Blob([wav], { type: 'audio/wav' });
|
|
29826
|
+
return blob;
|
|
29827
|
+
}
|
|
29828
|
+
|
|
29829
|
+
/**
|
|
29830
|
+
* Save the audio to a wav file.
|
|
29831
|
+
* @param {string} path
|
|
29832
|
+
*/
|
|
29833
|
+
async save(path) {
|
|
29834
|
+
let fn;
|
|
29835
|
+
|
|
29836
|
+
if (_env_js__WEBPACK_IMPORTED_MODULE_3__.apis.IS_BROWSER_ENV) {
|
|
29837
|
+
if (_env_js__WEBPACK_IMPORTED_MODULE_3__.apis.IS_WEBWORKER_ENV) {
|
|
29838
|
+
throw new Error('Unable to save a file from a Web Worker.')
|
|
29839
|
+
}
|
|
29840
|
+
fn = _core_js__WEBPACK_IMPORTED_MODULE_2__.saveBlob;
|
|
29841
|
+
} else if (_env_js__WEBPACK_IMPORTED_MODULE_3__.apis.IS_FS_AVAILABLE) {
|
|
29842
|
+
fn = async (/** @type {string} */ path, /** @type {Blob} */ blob) => {
|
|
29843
|
+
let buffer = await blob.arrayBuffer();
|
|
29844
|
+
fs__WEBPACK_IMPORTED_MODULE_4__.writeFileSync(path, Buffer.from(buffer));
|
|
29845
|
+
}
|
|
29846
|
+
} else {
|
|
29847
|
+
throw new Error('Unable to save because filesystem is disabled in this environment.')
|
|
29848
|
+
}
|
|
29849
|
+
|
|
29850
|
+
await fn(path, this.toBlob())
|
|
29851
|
+
}
|
|
29852
|
+
}
|
|
29853
|
+
|
|
29435
29854
|
|
|
29436
29855
|
/***/ }),
|
|
29437
29856
|
|
|
@@ -29487,7 +29906,8 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
29487
29906
|
/* harmony export */ pick: () => (/* binding */ pick),
|
|
29488
29907
|
/* harmony export */ pop: () => (/* binding */ pop),
|
|
29489
29908
|
/* harmony export */ product: () => (/* binding */ product),
|
|
29490
|
-
/* harmony export */ reverseDictionary: () => (/* binding */ reverseDictionary)
|
|
29909
|
+
/* harmony export */ reverseDictionary: () => (/* binding */ reverseDictionary),
|
|
29910
|
+
/* harmony export */ saveBlob: () => (/* binding */ saveBlob)
|
|
29491
29911
|
/* harmony export */ });
|
|
29492
29912
|
|
|
29493
29913
|
/**
|
|
@@ -29680,6 +30100,32 @@ function calculateReflectOffset(i, w) {
|
|
|
29680
30100
|
return Math.abs((i + w) % (2 * w) - w);
|
|
29681
30101
|
}
|
|
29682
30102
|
|
|
30103
|
+
/**
|
|
30104
|
+
* Save blob file on the web.
|
|
30105
|
+
* @param {string} path The path to save the blob to
|
|
30106
|
+
* @param {Blob} blob The blob to save
|
|
30107
|
+
*/
|
|
30108
|
+
function saveBlob(path, blob){
|
|
30109
|
+
// Convert the canvas content to a data URL
|
|
30110
|
+
const dataURL = URL.createObjectURL(blob);
|
|
30111
|
+
|
|
30112
|
+
// Create an anchor element with the data URL as the href attribute
|
|
30113
|
+
const downloadLink = document.createElement('a');
|
|
30114
|
+
downloadLink.href = dataURL;
|
|
30115
|
+
|
|
30116
|
+
// Set the download attribute to specify the desired filename for the downloaded image
|
|
30117
|
+
downloadLink.download = path;
|
|
30118
|
+
|
|
30119
|
+
// Trigger the download
|
|
30120
|
+
downloadLink.click();
|
|
30121
|
+
|
|
30122
|
+
// Clean up: remove the anchor element from the DOM
|
|
30123
|
+
downloadLink.remove();
|
|
30124
|
+
|
|
30125
|
+
// Revoke the Object URL to free up memory
|
|
30126
|
+
URL.revokeObjectURL(dataURL);
|
|
30127
|
+
}
|
|
30128
|
+
|
|
29683
30129
|
/**
|
|
29684
30130
|
*
|
|
29685
30131
|
* @param {Object} o
|
|
@@ -30243,6 +30689,8 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
30243
30689
|
/* harmony export */ });
|
|
30244
30690
|
/* harmony import */ var _env_js__WEBPACK_IMPORTED_MODULE_0__ = __webpack_require__(/*! ../env.js */ "./src/env.js");
|
|
30245
30691
|
/* harmony import */ var _devices_js__WEBPACK_IMPORTED_MODULE_1__ = __webpack_require__(/*! ./devices.js */ "./src/utils/devices.js");
|
|
30692
|
+
/// <reference types="@webgpu/types" />
|
|
30693
|
+
|
|
30246
30694
|
|
|
30247
30695
|
|
|
30248
30696
|
|
|
@@ -30498,7 +30946,7 @@ class FileResponse {
|
|
|
30498
30946
|
*/
|
|
30499
30947
|
async arrayBuffer() {
|
|
30500
30948
|
const data = await fs__WEBPACK_IMPORTED_MODULE_0__.promises.readFile(this.filePath);
|
|
30501
|
-
return data.buffer;
|
|
30949
|
+
return /** @type {ArrayBuffer} */ (data.buffer);
|
|
30502
30950
|
}
|
|
30503
30951
|
|
|
30504
30952
|
/**
|
|
@@ -31860,23 +32308,9 @@ class RawImage {
|
|
|
31860
32308
|
// Convert image to Blob
|
|
31861
32309
|
const blob = await this.toBlob(mime);
|
|
31862
32310
|
|
|
31863
|
-
|
|
31864
|
-
const dataURL = URL.createObjectURL(blob);
|
|
32311
|
+
(0,_core_js__WEBPACK_IMPORTED_MODULE_0__.saveBlob)(path, blob)
|
|
31865
32312
|
|
|
31866
|
-
|
|
31867
|
-
const downloadLink = document.createElement('a');
|
|
31868
|
-
downloadLink.href = dataURL;
|
|
31869
|
-
|
|
31870
|
-
// Set the download attribute to specify the desired filename for the downloaded image
|
|
31871
|
-
downloadLink.download = path;
|
|
31872
|
-
|
|
31873
|
-
// Trigger the download
|
|
31874
|
-
downloadLink.click();
|
|
31875
|
-
|
|
31876
|
-
// Clean up: remove the anchor element from the DOM
|
|
31877
|
-
downloadLink.remove();
|
|
31878
|
-
|
|
31879
|
-
} else if (!_env_js__WEBPACK_IMPORTED_MODULE_2__.env.useFS) {
|
|
32313
|
+
} else if (!_env_js__WEBPACK_IMPORTED_MODULE_2__.apis.IS_FS_AVAILABLE) {
|
|
31880
32314
|
throw new Error('Unable to save the image because filesystem is disabled in this environment.')
|
|
31881
32315
|
|
|
31882
32316
|
} else {
|
|
@@ -31906,6 +32340,7 @@ class RawImage {
|
|
|
31906
32340
|
const load_image = RawImage.read.bind(RawImage);
|
|
31907
32341
|
|
|
31908
32342
|
|
|
32343
|
+
|
|
31909
32344
|
/***/ }),
|
|
31910
32345
|
|
|
31911
32346
|
/***/ "./src/utils/maths.js":
|
|
@@ -32159,8 +32594,9 @@ function magnitude(arr) {
|
|
|
32159
32594
|
|
|
32160
32595
|
/**
|
|
32161
32596
|
* Returns the value and index of the minimum element in an array.
|
|
32162
|
-
* @
|
|
32163
|
-
* @
|
|
32597
|
+
* @template {number[]|bigint[]|AnyTypedArray} T
|
|
32598
|
+
* @param {T} arr array of numbers.
|
|
32599
|
+
* @returns {T extends bigint[]|BigTypedArray ? [bigint, number] : [number, number]} the value and index of the minimum element, of the form: [valueOfMin, indexOfMin]
|
|
32164
32600
|
* @throws {Error} If array is empty.
|
|
32165
32601
|
*/
|
|
32166
32602
|
function min(arr) {
|
|
@@ -32173,14 +32609,15 @@ function min(arr) {
|
|
|
32173
32609
|
indexOfMin = i;
|
|
32174
32610
|
}
|
|
32175
32611
|
}
|
|
32176
|
-
return [min, indexOfMin];
|
|
32612
|
+
return /** @type {T extends bigint[]|BigTypedArray ? [bigint, number] : [number, number]} */([min, indexOfMin]);
|
|
32177
32613
|
}
|
|
32178
32614
|
|
|
32179
32615
|
|
|
32180
32616
|
/**
|
|
32181
32617
|
* Returns the value and index of the maximum element in an array.
|
|
32182
|
-
* @
|
|
32183
|
-
* @
|
|
32618
|
+
* @template {number[]|bigint[]|AnyTypedArray} T
|
|
32619
|
+
* @param {T} arr array of numbers.
|
|
32620
|
+
* @returns {T extends bigint[]|BigTypedArray ? [bigint, number] : [number, number]} the value and index of the maximum element, of the form: [valueOfMax, indexOfMax]
|
|
32184
32621
|
* @throws {Error} If array is empty.
|
|
32185
32622
|
*/
|
|
32186
32623
|
function max(arr) {
|
|
@@ -32193,7 +32630,7 @@ function max(arr) {
|
|
|
32193
32630
|
indexOfMax = i;
|
|
32194
32631
|
}
|
|
32195
32632
|
}
|
|
32196
|
-
return [
|
|
32633
|
+
return /** @type {T extends bigint[]|BigTypedArray ? [bigint, number] : [number, number]} */([max, indexOfMax]);
|
|
32197
32634
|
}
|
|
32198
32635
|
|
|
32199
32636
|
function isPowerOfTwo(number) {
|
|
@@ -33491,8 +33928,6 @@ class Tensor {
|
|
|
33491
33928
|
return this.permute(...dims);
|
|
33492
33929
|
}
|
|
33493
33930
|
|
|
33494
|
-
// TODO add .max() and .min() methods
|
|
33495
|
-
|
|
33496
33931
|
/**
|
|
33497
33932
|
* Returns the sum of each row of the input tensor in the given dimension dim.
|
|
33498
33933
|
*
|
|
@@ -33521,55 +33956,22 @@ class Tensor {
|
|
|
33521
33956
|
}
|
|
33522
33957
|
|
|
33523
33958
|
const this_data = this.data;
|
|
33959
|
+
const fn = (a, b) => a + (b ** p);
|
|
33524
33960
|
|
|
33525
33961
|
if (dim === null) {
|
|
33526
33962
|
// @ts-ignore
|
|
33527
|
-
|
|
33963
|
+
const val = this_data.reduce(fn, 0) ** (1 / p);
|
|
33528
33964
|
return new Tensor(this.type, [val], []);
|
|
33529
33965
|
}
|
|
33530
33966
|
|
|
33531
|
-
|
|
33532
|
-
dim = safeIndex(dim, this.dims.length);
|
|
33533
|
-
|
|
33534
|
-
// Calculate the shape of the resulting array after summation
|
|
33535
|
-
const resultDims = this.dims.slice(); // Copy the original dimensions
|
|
33536
|
-
resultDims[dim] = 1; // Remove the specified axis
|
|
33537
|
-
|
|
33538
|
-
// Create a new array to store the accumulated values
|
|
33539
|
-
// @ts-ignore
|
|
33540
|
-
const result = new this_data.constructor(this_data.length / this.dims[dim]);
|
|
33541
|
-
|
|
33542
|
-
// Iterate over the data array
|
|
33543
|
-
for (let i = 0; i < this_data.length; ++i) {
|
|
33544
|
-
|
|
33545
|
-
// Calculate the index in the resulting array
|
|
33546
|
-
let resultIndex = 0;
|
|
33547
|
-
|
|
33548
|
-
for (let j = this.dims.length - 1, num = i, resultMultiplier = 1; j >= 0; --j) {
|
|
33549
|
-
const size = this.dims[j];
|
|
33550
|
-
if (j !== dim) {
|
|
33551
|
-
const index = num % size;
|
|
33552
|
-
resultIndex += index * resultMultiplier;
|
|
33553
|
-
resultMultiplier *= resultDims[j];
|
|
33554
|
-
}
|
|
33555
|
-
num = Math.floor(num / size);
|
|
33556
|
-
}
|
|
33557
|
-
|
|
33558
|
-
// Accumulate the value at the current index
|
|
33559
|
-
result[resultIndex] += (this_data[i]) ** p;
|
|
33560
|
-
}
|
|
33967
|
+
const [type, result, resultDims] = reduce_helper(fn, this, dim, keepdim);
|
|
33561
33968
|
|
|
33562
33969
|
if (p !== 1) {
|
|
33563
33970
|
for (let i = 0; i < result.length; ++i) {
|
|
33564
33971
|
result[i] = result[i] ** (1 / p);
|
|
33565
33972
|
}
|
|
33566
33973
|
}
|
|
33567
|
-
|
|
33568
|
-
if (!keepdim) {
|
|
33569
|
-
resultDims.splice(dim, 1);
|
|
33570
|
-
}
|
|
33571
|
-
|
|
33572
|
-
return new Tensor(this.type, result, resultDims);
|
|
33974
|
+
return new Tensor(type, result, resultDims);
|
|
33573
33975
|
}
|
|
33574
33976
|
|
|
33575
33977
|
/**
|
|
@@ -33632,7 +34034,7 @@ class Tensor {
|
|
|
33632
34034
|
* NOTE: The returned tensor shares the storage with the input tensor, so changing the contents of one will change the contents of the other.
|
|
33633
34035
|
* If you would like a copy, use `tensor.clone()` before squeezing.
|
|
33634
34036
|
*
|
|
33635
|
-
* @param {number} [dim=null] If given, the input will be squeezed only in the specified dimensions.
|
|
34037
|
+
* @param {number|number[]} [dim=null] If given, the input will be squeezed only in the specified dimensions.
|
|
33636
34038
|
* @returns {Tensor} The squeezed tensor
|
|
33637
34039
|
*/
|
|
33638
34040
|
squeeze(dim = null) {
|
|
@@ -33742,6 +34144,34 @@ class Tensor {
|
|
|
33742
34144
|
return this.clone().neg_();
|
|
33743
34145
|
}
|
|
33744
34146
|
|
|
34147
|
+
/**
|
|
34148
|
+
* Computes input > val element-wise.
|
|
34149
|
+
* @param {number} val The value to compare with.
|
|
34150
|
+
* @returns {Tensor} A boolean tensor that is `true` where input is greater than other and `false` elsewhere.
|
|
34151
|
+
*/
|
|
34152
|
+
gt(val) {
|
|
34153
|
+
const mask = new Uint8Array(this.data.length);
|
|
34154
|
+
const this_data = this.data;
|
|
34155
|
+
for (let i = 0; i < this_data.length; ++i) {
|
|
34156
|
+
mask[i] = this_data[i] > val ? 1 : 0;
|
|
34157
|
+
}
|
|
34158
|
+
return new Tensor('bool', mask, this.dims);
|
|
34159
|
+
}
|
|
34160
|
+
|
|
34161
|
+
/**
|
|
34162
|
+
* Computes input < val element-wise.
|
|
34163
|
+
* @param {number} val The value to compare with.
|
|
34164
|
+
* @returns {Tensor} A boolean tensor that is `true` where input is less than other and `false` elsewhere.
|
|
34165
|
+
*/
|
|
34166
|
+
lt(val) {
|
|
34167
|
+
const mask = new Uint8Array(this.data.length);
|
|
34168
|
+
const this_data = this.data;
|
|
34169
|
+
for (let i = 0; i < this_data.length; ++i) {
|
|
34170
|
+
mask[i] = this_data[i] < val ? 1 : 0;
|
|
34171
|
+
}
|
|
34172
|
+
return new Tensor('bool', mask, this.dims);
|
|
34173
|
+
}
|
|
34174
|
+
|
|
33745
34175
|
/**
|
|
33746
34176
|
* In-place version of @see {@link Tensor.clamp}
|
|
33747
34177
|
*/
|
|
@@ -33786,6 +34216,41 @@ class Tensor {
|
|
|
33786
34216
|
return mean(this, dim, keepdim);
|
|
33787
34217
|
}
|
|
33788
34218
|
|
|
34219
|
+
min(dim = null, keepdim = false) {
|
|
34220
|
+
if (dim === null) {
|
|
34221
|
+
// None to reduce over all dimensions.
|
|
34222
|
+
const val = (0,_maths_js__WEBPACK_IMPORTED_MODULE_0__.min)(this.data)[0];
|
|
34223
|
+
return new Tensor(this.type, [val], [/* scalar */]);
|
|
34224
|
+
}
|
|
34225
|
+
const [type, result, resultDims] = reduce_helper((a, b) => Math.min(a, b), this, dim, keepdim, Infinity);
|
|
34226
|
+
return new Tensor(type, result, resultDims);
|
|
34227
|
+
}
|
|
34228
|
+
|
|
34229
|
+
max(dim = null, keepdim = false) {
|
|
34230
|
+
if (dim === null) {
|
|
34231
|
+
// None to reduce over all dimensions.
|
|
34232
|
+
const val = (0,_maths_js__WEBPACK_IMPORTED_MODULE_0__.max)(this.data)[0];
|
|
34233
|
+
return new Tensor(this.type, [val], [/* scalar */]);
|
|
34234
|
+
}
|
|
34235
|
+
const [type, result, resultDims] = reduce_helper((a, b) => Math.max(a, b), this, dim, keepdim, -Infinity);
|
|
34236
|
+
return new Tensor(type, result, resultDims);
|
|
34237
|
+
}
|
|
34238
|
+
|
|
34239
|
+
argmin(dim = null, keepdim = false) {
|
|
34240
|
+
if (dim !== null) {
|
|
34241
|
+
throw new Error("`dim !== null` not yet implemented.");
|
|
34242
|
+
}
|
|
34243
|
+
const index = (0,_maths_js__WEBPACK_IMPORTED_MODULE_0__.min)(this.data)[1];
|
|
34244
|
+
return new Tensor('int64', [BigInt(index)], []);
|
|
34245
|
+
}
|
|
34246
|
+
argmax(dim = null, keepdim = false) {
|
|
34247
|
+
if (dim !== null) {
|
|
34248
|
+
throw new Error("`dim !== null` not yet implemented.");
|
|
34249
|
+
}
|
|
34250
|
+
const index = (0,_maths_js__WEBPACK_IMPORTED_MODULE_0__.max)(this.data)[1];
|
|
34251
|
+
return new Tensor('int64', [BigInt(index)], []);
|
|
34252
|
+
}
|
|
34253
|
+
|
|
33789
34254
|
/**
|
|
33790
34255
|
* Performs Tensor dtype conversion.
|
|
33791
34256
|
* @param {DataType} type The desired data type.
|
|
@@ -33919,7 +34384,7 @@ function interpolate(input, [out_height, out_width], mode = 'bilinear', align_co
|
|
|
33919
34384
|
* @param {Tensor} input the input tensor
|
|
33920
34385
|
* @param {Object} options the options for the interpolation
|
|
33921
34386
|
* @param {[number, number]|[number, number, number]|[number, number, number, number]} [options.size=null] output spatial size.
|
|
33922
|
-
* @param {"bilinear"|"bicubic"} [options.mode='bilinear'] algorithm used for upsampling
|
|
34387
|
+
* @param {"nearest"|"bilinear"|"bicubic"} [options.mode='bilinear'] algorithm used for upsampling
|
|
33923
34388
|
* @returns {Promise<Tensor>} The interpolated tensor.
|
|
33924
34389
|
*/
|
|
33925
34390
|
async function interpolate_4d(input, {
|
|
@@ -33949,7 +34414,9 @@ async function interpolate_4d(input, {
|
|
|
33949
34414
|
}
|
|
33950
34415
|
|
|
33951
34416
|
let op;
|
|
33952
|
-
if (mode === '
|
|
34417
|
+
if (mode === 'nearest') {
|
|
34418
|
+
op = await _ops_registry_js__WEBPACK_IMPORTED_MODULE_2__.TensorOpRegistry.nearest_interpolate_4d;
|
|
34419
|
+
} else if (mode === 'bilinear') {
|
|
33953
34420
|
op = await _ops_registry_js__WEBPACK_IMPORTED_MODULE_2__.TensorOpRegistry.bilinear_interpolate_4d;
|
|
33954
34421
|
} else if (mode === 'bicubic') {
|
|
33955
34422
|
op = await _ops_registry_js__WEBPACK_IMPORTED_MODULE_2__.TensorOpRegistry.bicubic_interpolate_4d;
|
|
@@ -33990,13 +34457,13 @@ async function rfft(x, a) {
|
|
|
33990
34457
|
* Returns the k largest elements of the given input tensor.
|
|
33991
34458
|
* Inspired by https://pytorch.org/docs/stable/generated/torch.topk.html
|
|
33992
34459
|
* @param {Tensor} x the input tensor
|
|
33993
|
-
* @param {number} k the k in "top-k"
|
|
34460
|
+
* @param {number} [k] the k in "top-k"
|
|
33994
34461
|
* @returns {Promise<[Tensor, Tensor]>} the output tuple of (Tensor, LongTensor) of top-k elements and their indices.
|
|
33995
34462
|
*/
|
|
33996
34463
|
async function topk(x, k) {
|
|
33997
34464
|
const op = await _ops_registry_js__WEBPACK_IMPORTED_MODULE_2__.TensorOpRegistry.top_k;
|
|
33998
34465
|
|
|
33999
|
-
if (k
|
|
34466
|
+
if (k == null) {
|
|
34000
34467
|
k = x.dims.at(-1);
|
|
34001
34468
|
} else {
|
|
34002
34469
|
k = Math.min(k, x.dims.at(-1));
|
|
@@ -34025,10 +34492,10 @@ const arrayToIndexTensor = (array) => new Tensor('int64', array, [array.length])
|
|
|
34025
34492
|
async function slice(data, starts, ends, axes, steps) {
|
|
34026
34493
|
const op = await _ops_registry_js__WEBPACK_IMPORTED_MODULE_2__.TensorOpRegistry.slice;
|
|
34027
34494
|
return await op({
|
|
34028
|
-
x: data,
|
|
34029
|
-
s: arrayToIndexTensor(starts),
|
|
34030
|
-
e: arrayToIndexTensor(ends),
|
|
34031
|
-
a: arrayToIndexTensor(axes),
|
|
34495
|
+
x: data,
|
|
34496
|
+
s: arrayToIndexTensor(starts),
|
|
34497
|
+
e: arrayToIndexTensor(ends),
|
|
34498
|
+
a: arrayToIndexTensor(axes),
|
|
34032
34499
|
t: arrayToIndexTensor(steps ?? new Array(axes.length).fill(1)),
|
|
34033
34500
|
});
|
|
34034
34501
|
}
|
|
@@ -34263,35 +34730,19 @@ function stack(tensors, dim = 0) {
|
|
|
34263
34730
|
|
|
34264
34731
|
|
|
34265
34732
|
/**
|
|
34266
|
-
*
|
|
34267
|
-
* @param {Tensor} input the input
|
|
34268
|
-
* @param {number|null} dim the dimension to reduce.
|
|
34269
|
-
* @param {number} correction difference between the sample size and sample degrees of freedom. Defaults to Bessel's correction, correction=1.
|
|
34733
|
+
* @param {(previousValue: any, currentValue: any, currentIndex?: number, resultIndex?: number) => any} callbackfn
|
|
34734
|
+
* @param {Tensor} input the input tensor.
|
|
34735
|
+
* @param {number|null} dim the dimension to reduce.
|
|
34270
34736
|
* @param {boolean} keepdim whether the output tensor has dim retained or not.
|
|
34271
|
-
* @returns {
|
|
34737
|
+
* @returns {[DataType, any, number[]]} The reduced tensor data.
|
|
34272
34738
|
*/
|
|
34273
|
-
function
|
|
34274
|
-
const inputData =
|
|
34739
|
+
function reduce_helper(callbackfn, input, dim = null, keepdim = false, initialValue = null) {
|
|
34740
|
+
const inputData = input.data;
|
|
34275
34741
|
const inputDims = input.dims;
|
|
34276
34742
|
|
|
34277
|
-
if (dim === null) {
|
|
34278
|
-
// None to reduce over all dimensions.
|
|
34279
|
-
const sum = inputData.reduce((a, b) => a + b, 0);
|
|
34280
|
-
const mean = sum / inputData.length;
|
|
34281
|
-
const std = Math.sqrt(inputData.reduce((a, b) => a + (b - mean) ** 2, 0) / (inputData.length - correction));
|
|
34282
|
-
|
|
34283
|
-
const meanTensor = new Tensor(input.type, [mean], [/* scalar */]);
|
|
34284
|
-
const stdTensor = new Tensor(input.type, [std], [/* scalar */]);
|
|
34285
|
-
|
|
34286
|
-
return [stdTensor, meanTensor];
|
|
34287
|
-
}
|
|
34288
|
-
|
|
34289
34743
|
// Negative indexing
|
|
34290
34744
|
dim = safeIndex(dim, inputDims.length);
|
|
34291
34745
|
|
|
34292
|
-
const meanTensor = mean(input, dim, keepdim);
|
|
34293
|
-
const meanTensorData = meanTensor.data;
|
|
34294
|
-
|
|
34295
34746
|
// Calculate the shape of the resulting array after summation
|
|
34296
34747
|
const resultDims = inputDims.slice(); // Copy the original dimensions
|
|
34297
34748
|
resultDims[dim] = 1; // Remove the specified axis
|
|
@@ -34299,6 +34750,9 @@ function std_mean(input, dim = null, correction = 1, keepdim = false) {
|
|
|
34299
34750
|
// Create a new array to store the accumulated values
|
|
34300
34751
|
// @ts-ignore
|
|
34301
34752
|
const result = new inputData.constructor(inputData.length / inputDims[dim]);
|
|
34753
|
+
if (initialValue !== null) {
|
|
34754
|
+
result.fill(initialValue);
|
|
34755
|
+
}
|
|
34302
34756
|
|
|
34303
34757
|
// Iterate over the data array
|
|
34304
34758
|
for (let i = 0; i < inputData.length; ++i) {
|
|
@@ -34317,23 +34771,55 @@ function std_mean(input, dim = null, correction = 1, keepdim = false) {
|
|
|
34317
34771
|
}
|
|
34318
34772
|
|
|
34319
34773
|
// Accumulate the value at the current index
|
|
34320
|
-
result[resultIndex]
|
|
34774
|
+
result[resultIndex] = callbackfn(result[resultIndex], inputData[i], i, resultIndex);
|
|
34321
34775
|
}
|
|
34322
34776
|
|
|
34323
|
-
|
|
34324
|
-
|
|
34777
|
+
if (!keepdim) resultDims.splice(dim, 1);
|
|
34778
|
+
|
|
34779
|
+
return [input.type, result, resultDims];
|
|
34780
|
+
}
|
|
34781
|
+
|
|
34782
|
+
|
|
34783
|
+
/**
|
|
34784
|
+
* Calculates the standard deviation and mean over the dimensions specified by dim. dim can be a single dimension or `null` to reduce over all dimensions.
|
|
34785
|
+
* @param {Tensor} input the input tenso
|
|
34786
|
+
* @param {number|null} dim the dimension to reduce. If None, all dimensions are reduced.
|
|
34787
|
+
* @param {number} correction difference between the sample size and sample degrees of freedom. Defaults to Bessel's correction, correction=1.
|
|
34788
|
+
* @param {boolean} keepdim whether the output tensor has dim retained or not.
|
|
34789
|
+
* @returns {Tensor[]} A tuple of (std, mean) tensors.
|
|
34790
|
+
*/
|
|
34791
|
+
function std_mean(input, dim = null, correction = 1, keepdim = false) {
|
|
34792
|
+
const inputData = /** @type {Float32Array} */(input.data);
|
|
34793
|
+
const inputDims = input.dims;
|
|
34794
|
+
|
|
34795
|
+
if (dim === null) {
|
|
34796
|
+
// None to reduce over all dimensions.
|
|
34797
|
+
const sum = inputData.reduce((a, b) => a + b, 0);
|
|
34798
|
+
const mean = sum / inputData.length;
|
|
34799
|
+
const std = Math.sqrt(inputData.reduce((a, b) => a + (b - mean) ** 2, 0) / (inputData.length - correction));
|
|
34800
|
+
|
|
34801
|
+
const meanTensor = new Tensor(input.type, [mean], [/* scalar */]);
|
|
34802
|
+
const stdTensor = new Tensor(input.type, [std], [/* scalar */]);
|
|
34803
|
+
|
|
34804
|
+
return [stdTensor, meanTensor];
|
|
34325
34805
|
}
|
|
34806
|
+
dim = safeIndex(dim, inputDims.length);
|
|
34807
|
+
const meanTensor = mean(input, dim, keepdim);
|
|
34808
|
+
const meanTensorData = meanTensor.data;
|
|
34326
34809
|
|
|
34327
|
-
|
|
34328
|
-
|
|
34810
|
+
// Compute squared sum
|
|
34811
|
+
const [type, result, resultDims] = reduce_helper((a, b, i, j) => a + (b - meanTensorData[j]) ** 2, input, dim, keepdim);
|
|
34812
|
+
|
|
34813
|
+
// Square root of the squared sum
|
|
34814
|
+
for (let i = 0; i < result.length; ++i) {
|
|
34815
|
+
result[i] = Math.sqrt(result[i] / (inputDims[dim] - correction));
|
|
34329
34816
|
}
|
|
34330
34817
|
|
|
34331
|
-
const stdTensor = new Tensor(
|
|
34818
|
+
const stdTensor = new Tensor(type, result, resultDims);
|
|
34332
34819
|
|
|
34333
34820
|
return [stdTensor, meanTensor];
|
|
34334
34821
|
}
|
|
34335
34822
|
|
|
34336
|
-
|
|
34337
34823
|
/**
|
|
34338
34824
|
* Returns the mean value of each row of the input tensor in the given dimension dim.
|
|
34339
34825
|
* @param {Tensor} input the input tensor.
|
|
@@ -34342,58 +34828,27 @@ function std_mean(input, dim = null, correction = 1, keepdim = false) {
|
|
|
34342
34828
|
* @returns {Tensor} A new tensor with means taken along the specified dimension.
|
|
34343
34829
|
*/
|
|
34344
34830
|
function mean(input, dim = null, keepdim = false) {
|
|
34831
|
+
const inputDims = input.dims;
|
|
34345
34832
|
const inputData = /** @type {Float32Array} */(input.data);
|
|
34346
34833
|
|
|
34347
34834
|
if (dim === null) {
|
|
34348
34835
|
// None to reduce over all dimensions.
|
|
34349
|
-
// @ts-ignore
|
|
34350
34836
|
const val = inputData.reduce((a, b) => a + b, 0);
|
|
34351
34837
|
return new Tensor(input.type, [val / inputData.length], [/* scalar */]);
|
|
34352
34838
|
}
|
|
34353
|
-
const inputDims = input.dims;
|
|
34354
|
-
|
|
34355
|
-
// Negative indexing
|
|
34356
34839
|
dim = safeIndex(dim, inputDims.length);
|
|
34357
34840
|
|
|
34358
|
-
//
|
|
34359
|
-
const resultDims =
|
|
34360
|
-
resultDims[dim] = 1; // Remove the specified axis
|
|
34361
|
-
|
|
34362
|
-
// Create a new array to store the accumulated values
|
|
34363
|
-
// @ts-ignore
|
|
34364
|
-
const result = new inputData.constructor(inputData.length / inputDims[dim]);
|
|
34365
|
-
|
|
34366
|
-
// Iterate over the data array
|
|
34367
|
-
for (let i = 0; i < inputData.length; ++i) {
|
|
34368
|
-
|
|
34369
|
-
// Calculate the index in the resulting array
|
|
34370
|
-
let resultIndex = 0;
|
|
34371
|
-
|
|
34372
|
-
for (let j = inputDims.length - 1, num = i, resultMultiplier = 1; j >= 0; --j) {
|
|
34373
|
-
const size = inputDims[j];
|
|
34374
|
-
if (j !== dim) {
|
|
34375
|
-
const index = num % size;
|
|
34376
|
-
resultIndex += index * resultMultiplier;
|
|
34377
|
-
resultMultiplier *= resultDims[j];
|
|
34378
|
-
}
|
|
34379
|
-
num = Math.floor(num / size);
|
|
34380
|
-
}
|
|
34381
|
-
|
|
34382
|
-
// Accumulate the value at the current index
|
|
34383
|
-
result[resultIndex] += inputData[i];
|
|
34384
|
-
}
|
|
34841
|
+
// Compute sum
|
|
34842
|
+
const [type, result, resultDims] = reduce_helper((a, b) => a + b, input, dim, keepdim);
|
|
34385
34843
|
|
|
34844
|
+
// Divide by number of elements in the dimension
|
|
34386
34845
|
if (inputDims[dim] !== 1) {
|
|
34387
34846
|
for (let i = 0; i < result.length; ++i) {
|
|
34388
|
-
result[i]
|
|
34847
|
+
result[i] /= inputDims[dim];
|
|
34389
34848
|
}
|
|
34390
34849
|
}
|
|
34391
34850
|
|
|
34392
|
-
|
|
34393
|
-
resultDims.splice(dim, 1);
|
|
34394
|
-
}
|
|
34395
|
-
|
|
34396
|
-
return new Tensor(input.type, result, resultDims);
|
|
34851
|
+
return new Tensor(type, result, resultDims);
|
|
34397
34852
|
}
|
|
34398
34853
|
|
|
34399
34854
|
|
|
@@ -34893,6 +35348,10 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
34893
35348
|
/* harmony export */ GraniteModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.GraniteModel),
|
|
34894
35349
|
/* harmony export */ GranitePreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.GranitePreTrainedModel),
|
|
34895
35350
|
/* harmony export */ Grok1Tokenizer: () => (/* reexport safe */ _tokenizers_js__WEBPACK_IMPORTED_MODULE_3__.Grok1Tokenizer),
|
|
35351
|
+
/* harmony export */ GroundingDinoForObjectDetection: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.GroundingDinoForObjectDetection),
|
|
35352
|
+
/* harmony export */ GroundingDinoImageProcessor: () => (/* reexport safe */ _models_image_processors_js__WEBPACK_IMPORTED_MODULE_13__.GroundingDinoImageProcessor),
|
|
35353
|
+
/* harmony export */ GroundingDinoPreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.GroundingDinoPreTrainedModel),
|
|
35354
|
+
/* harmony export */ GroundingDinoProcessor: () => (/* reexport safe */ _models_processors_js__WEBPACK_IMPORTED_MODULE_16__.GroundingDinoProcessor),
|
|
34896
35355
|
/* harmony export */ GroupViTModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.GroupViTModel),
|
|
34897
35356
|
/* harmony export */ GroupViTPreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.GroupViTPreTrainedModel),
|
|
34898
35357
|
/* harmony export */ HerbertTokenizer: () => (/* reexport safe */ _tokenizers_js__WEBPACK_IMPORTED_MODULE_3__.HerbertTokenizer),
|
|
@@ -35122,6 +35581,7 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
35122
35581
|
/* harmony export */ RTDetrModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.RTDetrModel),
|
|
35123
35582
|
/* harmony export */ RTDetrObjectDetectionOutput: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.RTDetrObjectDetectionOutput),
|
|
35124
35583
|
/* harmony export */ RTDetrPreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.RTDetrPreTrainedModel),
|
|
35584
|
+
/* harmony export */ RawAudio: () => (/* reexport safe */ _utils_audio_js__WEBPACK_IMPORTED_MODULE_5__.RawAudio),
|
|
35125
35585
|
/* harmony export */ RawImage: () => (/* reexport safe */ _utils_image_js__WEBPACK_IMPORTED_MODULE_6__.RawImage),
|
|
35126
35586
|
/* harmony export */ RepetitionPenaltyLogitsProcessor: () => (/* reexport safe */ _generation_logits_process_js__WEBPACK_IMPORTED_MODULE_20__.RepetitionPenaltyLogitsProcessor),
|
|
35127
35587
|
/* harmony export */ ResNetForImageClassification: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.ResNetForImageClassification),
|
|
@@ -35187,6 +35647,8 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
35187
35647
|
/* harmony export */ Starcoder2PreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.Starcoder2PreTrainedModel),
|
|
35188
35648
|
/* harmony export */ StoppingCriteria: () => (/* reexport safe */ _generation_stopping_criteria_js__WEBPACK_IMPORTED_MODULE_19__.StoppingCriteria),
|
|
35189
35649
|
/* harmony export */ StoppingCriteriaList: () => (/* reexport safe */ _generation_stopping_criteria_js__WEBPACK_IMPORTED_MODULE_19__.StoppingCriteriaList),
|
|
35650
|
+
/* harmony export */ StyleTextToSpeech2Model: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.StyleTextToSpeech2Model),
|
|
35651
|
+
/* harmony export */ StyleTextToSpeech2PreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.StyleTextToSpeech2PreTrainedModel),
|
|
35190
35652
|
/* harmony export */ SummarizationPipeline: () => (/* reexport safe */ _pipelines_js__WEBPACK_IMPORTED_MODULE_1__.SummarizationPipeline),
|
|
35191
35653
|
/* harmony export */ SuppressTokensAtBeginLogitsProcessor: () => (/* reexport safe */ _generation_logits_process_js__WEBPACK_IMPORTED_MODULE_20__.SuppressTokensAtBeginLogitsProcessor),
|
|
35192
35654
|
/* harmony export */ Swin2SRForImageSuperResolution: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.Swin2SRForImageSuperResolution),
|