@huggingface/transformers 3.2.2 → 3.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -2
- package/dist/transformers.cjs +252 -113
- package/dist/transformers.cjs.map +1 -1
- package/dist/transformers.js +256 -114
- package/dist/transformers.js.map +1 -1
- package/dist/transformers.min.cjs +1 -1
- package/dist/transformers.min.cjs.map +1 -1
- package/dist/transformers.min.js +1 -1
- package/dist/transformers.min.js.map +1 -1
- package/dist/transformers.min.mjs +1 -1
- package/dist/transformers.min.mjs.map +1 -1
- package/dist/transformers.mjs +256 -114
- package/dist/transformers.mjs.map +1 -1
- package/package.json +2 -2
- package/src/base/feature_extraction_utils.js +9 -9
- package/src/base/image_processors_utils.js +11 -0
- package/src/base/processing_utils.js +13 -3
- package/src/configs.js +5 -0
- package/src/env.js +1 -1
- package/src/models/auto/feature_extraction_auto.js +0 -16
- package/src/models/auto/processing_auto.js +0 -16
- package/src/models/convnext/image_processing_convnext.js +1 -0
- package/src/models/efficientnet/image_processing_efficientnet.js +1 -0
- package/src/models/florence2/processing_florence2.js +3 -0
- package/src/models/idefics3/image_processing_idefics3.js +2 -0
- package/src/models/janus/image_processing_janus.js +1 -0
- package/src/models/mgp_str/processing_mgp_str.js +2 -0
- package/src/models/paligemma/processing_paligemma.js +1 -0
- package/src/models/phi3_v/processing_phi3_v.js +1 -1
- package/src/models/pyannote/feature_extraction_pyannote.js +1 -0
- package/src/models/qwen2_vl/processing_qwen2_vl.js +1 -0
- package/src/models/seamless_m4t/feature_extraction_seamless_m4t.js +2 -2
- package/src/models/whisper/feature_extraction_whisper.js +1 -1
- package/src/models.js +93 -36
- package/src/ops/registry.js +10 -0
- package/src/pipelines.js +34 -7
- package/src/tokenizers.js +4 -7
- package/src/utils/dtypes.js +2 -0
- package/src/utils/hub.js +1 -1
- package/src/utils/maths.js +8 -6
- package/src/utils/tensor.js +42 -10
- package/types/base/feature_extraction_utils.d.ts +7 -7
- package/types/base/image_processors_utils.d.ts.map +1 -1
- package/types/base/processing_utils.d.ts +17 -19
- package/types/base/processing_utils.d.ts.map +1 -1
- package/types/configs.d.ts.map +1 -1
- package/types/generation/parameters.d.ts +1 -1
- package/types/models/auto/feature_extraction_auto.d.ts.map +1 -1
- package/types/models/auto/image_processing_auto.d.ts.map +1 -1
- package/types/models/auto/processing_auto.d.ts.map +1 -1
- package/types/models/convnext/image_processing_convnext.d.ts.map +1 -1
- package/types/models/efficientnet/image_processing_efficientnet.d.ts.map +1 -1
- package/types/models/florence2/processing_florence2.d.ts.map +1 -1
- package/types/models/idefics3/image_processing_idefics3.d.ts.map +1 -1
- package/types/models/janus/image_processing_janus.d.ts.map +1 -1
- package/types/models/mgp_str/processing_mgp_str.d.ts.map +1 -1
- package/types/models/paligemma/processing_paligemma.d.ts.map +1 -1
- package/types/models/phi3_v/processing_phi3_v.d.ts +6 -2
- package/types/models/phi3_v/processing_phi3_v.d.ts.map +1 -1
- package/types/models/pyannote/feature_extraction_pyannote.d.ts.map +1 -1
- package/types/models/qwen2_vl/processing_qwen2_vl.d.ts.map +1 -1
- package/types/models/sapiens/image_processing_sapiens.d.ts +10 -0
- package/types/models/sapiens/image_processing_sapiens.d.ts.map +1 -0
- package/types/models/whisper/generation_whisper.d.ts +1 -1
- package/types/models/whisper/generation_whisper.d.ts.map +1 -1
- package/types/models.d.ts +48 -17
- package/types/models.d.ts.map +1 -1
- package/types/ops/registry.d.ts +1 -0
- package/types/ops/registry.d.ts.map +1 -1
- package/types/pipelines.d.ts +2 -2
- package/types/pipelines.d.ts.map +1 -1
- package/types/tokenizers.d.ts.map +1 -1
- package/types/tsconfig.tsbuildinfo +1 -0
- package/types/utils/dtypes.d.ts.map +1 -1
- package/types/utils/hub.d.ts +1 -1
- package/types/utils/hub.d.ts.map +1 -1
- package/types/utils/image.d.ts +3 -2
- package/types/utils/image.d.ts.map +1 -1
- package/types/utils/maths.d.ts +8 -6
- package/types/utils/maths.d.ts.map +1 -1
- package/types/utils/tensor.d.ts +8 -4
- package/types/utils/tensor.d.ts.map +1 -1
package/dist/transformers.cjs
CHANGED
|
@@ -4158,23 +4158,23 @@ class FeatureExtractor extends _utils_generic_js__WEBPACK_IMPORTED_MODULE_1__.Ca
|
|
|
4158
4158
|
}
|
|
4159
4159
|
|
|
4160
4160
|
/**
|
|
4161
|
-
* Instantiate one of the
|
|
4161
|
+
* Instantiate one of the feature extractor classes of the library from a pretrained model.
|
|
4162
4162
|
*
|
|
4163
|
-
* The
|
|
4164
|
-
*
|
|
4163
|
+
* The feature extractor class to instantiate is selected based on the `feature_extractor_type` property of
|
|
4164
|
+
* the config object (either passed as an argument or loaded from `pretrained_model_name_or_path` if possible)
|
|
4165
4165
|
*
|
|
4166
4166
|
* @param {string} pretrained_model_name_or_path The name or path of the pretrained model. Can be either:
|
|
4167
|
-
* - A string, the *model id* of a pretrained
|
|
4167
|
+
* - A string, the *model id* of a pretrained feature_extractor hosted inside a model repo on huggingface.co.
|
|
4168
4168
|
* Valid model ids can be located at the root-level, like `bert-base-uncased`, or namespaced under a
|
|
4169
4169
|
* user or organization name, like `dbmdz/bert-base-german-cased`.
|
|
4170
|
-
* - A path to a *directory* containing
|
|
4171
|
-
* @param {import('../utils/hub.js').PretrainedOptions} options Additional options for loading the
|
|
4170
|
+
* - A path to a *directory* containing feature_extractor files, e.g., `./my_model_directory/`.
|
|
4171
|
+
* @param {import('../utils/hub.js').PretrainedOptions} options Additional options for loading the feature_extractor.
|
|
4172
4172
|
*
|
|
4173
|
-
* @returns {Promise<FeatureExtractor>} A new instance of the
|
|
4173
|
+
* @returns {Promise<FeatureExtractor>} A new instance of the Feature Extractor class.
|
|
4174
4174
|
*/
|
|
4175
4175
|
static async from_pretrained(pretrained_model_name_or_path, options) {
|
|
4176
|
-
const
|
|
4177
|
-
return new this(
|
|
4176
|
+
const config = await (0,_utils_hub_js__WEBPACK_IMPORTED_MODULE_2__.getModelJSON)(pretrained_model_name_or_path, _utils_constants_js__WEBPACK_IMPORTED_MODULE_0__.FEATURE_EXTRACTOR_NAME, true, options);
|
|
4177
|
+
return new this(config);
|
|
4178
4178
|
}
|
|
4179
4179
|
}
|
|
4180
4180
|
|
|
@@ -4825,14 +4825,20 @@ class ImageProcessor extends _utils_generic_js__WEBPACK_IMPORTED_MODULE_0__.Call
|
|
|
4825
4825
|
this.do_thumbnail = config.do_thumbnail;
|
|
4826
4826
|
this.size = config.size ?? config.image_size;
|
|
4827
4827
|
this.do_resize = config.do_resize ?? (this.size !== undefined);
|
|
4828
|
+
// @ts-expect-error TS2339
|
|
4828
4829
|
this.size_divisibility = config.size_divisibility ?? config.size_divisor;
|
|
4829
4830
|
|
|
4830
4831
|
this.do_center_crop = config.do_center_crop;
|
|
4832
|
+
// @ts-expect-error TS2339
|
|
4831
4833
|
this.crop_size = config.crop_size;
|
|
4834
|
+
// @ts-expect-error TS2339
|
|
4832
4835
|
this.do_convert_rgb = config.do_convert_rgb ?? true;
|
|
4836
|
+
// @ts-expect-error TS2339
|
|
4833
4837
|
this.do_crop_margin = config.do_crop_margin;
|
|
4834
4838
|
|
|
4839
|
+
// @ts-expect-error TS2339
|
|
4835
4840
|
this.pad_size = config.pad_size;
|
|
4841
|
+
// @ts-expect-error TS2339
|
|
4836
4842
|
this.do_pad = config.do_pad;
|
|
4837
4843
|
|
|
4838
4844
|
if (this.do_pad && !this.pad_size && this.size && this.size.width !== undefined && this.size.height !== undefined) {
|
|
@@ -5041,6 +5047,7 @@ class ImageProcessor extends _utils_generic_js__WEBPACK_IMPORTED_MODULE_0__.Call
|
|
|
5041
5047
|
// Support both formats for backwards compatibility
|
|
5042
5048
|
else if (Number.isInteger(size)) {
|
|
5043
5049
|
shortest_edge = size;
|
|
5050
|
+
// @ts-expect-error TS2339
|
|
5044
5051
|
longest_edge = this.config.max_size ?? shortest_edge;
|
|
5045
5052
|
|
|
5046
5053
|
} else if (size !== undefined) {
|
|
@@ -5109,6 +5116,7 @@ class ImageProcessor extends _utils_generic_js__WEBPACK_IMPORTED_MODULE_0__.Call
|
|
|
5109
5116
|
} else if (size.min_pixels !== undefined && size.max_pixels !== undefined) {
|
|
5110
5117
|
// Custom resize logic for Qwen2-VL models
|
|
5111
5118
|
const { min_pixels, max_pixels } = size;
|
|
5119
|
+
// @ts-expect-error TS2339
|
|
5112
5120
|
const factor = this.config.patch_size * this.config.merge_size;
|
|
5113
5121
|
return smart_resize(srcHeight, srcWidth, factor, min_pixels, max_pixels);
|
|
5114
5122
|
} else {
|
|
@@ -5124,6 +5132,7 @@ class ImageProcessor extends _utils_generic_js__WEBPACK_IMPORTED_MODULE_0__.Call
|
|
|
5124
5132
|
async resize(image) {
|
|
5125
5133
|
const [newWidth, newHeight] = this.get_resize_output_image_size(image, this.size);
|
|
5126
5134
|
return await image.resize(newWidth, newHeight, {
|
|
5135
|
+
// @ts-expect-error TS2322
|
|
5127
5136
|
resample: this.resample,
|
|
5128
5137
|
});
|
|
5129
5138
|
}
|
|
@@ -5174,6 +5183,7 @@ class ImageProcessor extends _utils_generic_js__WEBPACK_IMPORTED_MODULE_0__.Call
|
|
|
5174
5183
|
|
|
5175
5184
|
// Resize the image using thumbnail method.
|
|
5176
5185
|
if (this.do_thumbnail) {
|
|
5186
|
+
// @ts-expect-error TS2345
|
|
5177
5187
|
image = await this.thumbnail(image, this.size, this.resample);
|
|
5178
5188
|
}
|
|
5179
5189
|
|
|
@@ -5198,6 +5208,7 @@ class ImageProcessor extends _utils_generic_js__WEBPACK_IMPORTED_MODULE_0__.Call
|
|
|
5198
5208
|
// NOTE: All pixel-level manipulation (i.e., modifying `pixelData`)
|
|
5199
5209
|
// occurs with data in the hwc format (height, width, channels),
|
|
5200
5210
|
// to emulate the behavior of the original Python code (w/ numpy).
|
|
5211
|
+
/** @type {Float32Array} */
|
|
5201
5212
|
let pixelData = Float32Array.from(image.data);
|
|
5202
5213
|
let imgDims = [image.height, image.width, image.channels];
|
|
5203
5214
|
|
|
@@ -5356,6 +5367,7 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
5356
5367
|
/**
|
|
5357
5368
|
* @typedef {Object} ProcessorProperties Additional processor-specific properties.
|
|
5358
5369
|
* @typedef {import('../utils/hub.js').PretrainedOptions & ProcessorProperties} PretrainedProcessorOptions
|
|
5370
|
+
* @typedef {import('../tokenizers.js').PreTrainedTokenizer} PreTrainedTokenizer
|
|
5359
5371
|
*/
|
|
5360
5372
|
|
|
5361
5373
|
|
|
@@ -5389,7 +5401,7 @@ class Processor extends _utils_generic_js__WEBPACK_IMPORTED_MODULE_1__.Callable
|
|
|
5389
5401
|
}
|
|
5390
5402
|
|
|
5391
5403
|
/**
|
|
5392
|
-
* @returns {
|
|
5404
|
+
* @returns {PreTrainedTokenizer|undefined} The tokenizer of the processor, if it exists.
|
|
5393
5405
|
*/
|
|
5394
5406
|
get tokenizer() {
|
|
5395
5407
|
return this.components.tokenizer;
|
|
@@ -5402,6 +5414,11 @@ class Processor extends _utils_generic_js__WEBPACK_IMPORTED_MODULE_1__.Callable
|
|
|
5402
5414
|
return this.components.feature_extractor;
|
|
5403
5415
|
}
|
|
5404
5416
|
|
|
5417
|
+
/**
|
|
5418
|
+
* @param {Parameters<PreTrainedTokenizer['apply_chat_template']>[0]} messages
|
|
5419
|
+
* @param {Parameters<PreTrainedTokenizer['apply_chat_template']>[1]} options
|
|
5420
|
+
* @returns {ReturnType<PreTrainedTokenizer['apply_chat_template']>}
|
|
5421
|
+
*/
|
|
5405
5422
|
apply_chat_template(messages, options = {}) {
|
|
5406
5423
|
if (!this.tokenizer) {
|
|
5407
5424
|
throw new Error('Unable to apply chat template without a tokenizer.');
|
|
@@ -5412,6 +5429,10 @@ class Processor extends _utils_generic_js__WEBPACK_IMPORTED_MODULE_1__.Callable
|
|
|
5412
5429
|
});
|
|
5413
5430
|
}
|
|
5414
5431
|
|
|
5432
|
+
/**
|
|
5433
|
+
* @param {Parameters<PreTrainedTokenizer['batch_decode']>} args
|
|
5434
|
+
* @returns {ReturnType<PreTrainedTokenizer['batch_decode']>}
|
|
5435
|
+
*/
|
|
5415
5436
|
batch_decode(...args) {
|
|
5416
5437
|
if (!this.tokenizer) {
|
|
5417
5438
|
throw new Error('Unable to decode without a tokenizer.');
|
|
@@ -5439,8 +5460,8 @@ class Processor extends _utils_generic_js__WEBPACK_IMPORTED_MODULE_1__.Callable
|
|
|
5439
5460
|
/**
|
|
5440
5461
|
* Instantiate one of the processor classes of the library from a pretrained model.
|
|
5441
5462
|
*
|
|
5442
|
-
* The processor class to instantiate is selected based on the `
|
|
5443
|
-
* (either passed as an argument or loaded from `pretrained_model_name_or_path` if possible)
|
|
5463
|
+
* The processor class to instantiate is selected based on the `image_processor_type` (or `feature_extractor_type`; legacy)
|
|
5464
|
+
* property of the config object (either passed as an argument or loaded from `pretrained_model_name_or_path` if possible)
|
|
5444
5465
|
*
|
|
5445
5466
|
* @param {string} pretrained_model_name_or_path The name or path of the pretrained model. Can be either:
|
|
5446
5467
|
* - A string, the *model id* of a pretrained processor hosted inside a model repo on huggingface.co.
|
|
@@ -5560,15 +5581,19 @@ function getNormalizedConfig(config) {
|
|
|
5560
5581
|
case 'florence2':
|
|
5561
5582
|
case 'llava_onevision':
|
|
5562
5583
|
case 'idefics3':
|
|
5584
|
+
// @ts-expect-error TS2339
|
|
5563
5585
|
init_normalized_config = getNormalizedConfig(config.text_config);
|
|
5564
5586
|
break;
|
|
5565
5587
|
case 'moondream1':
|
|
5588
|
+
// @ts-expect-error TS2339
|
|
5566
5589
|
init_normalized_config = getNormalizedConfig(config.phi_config);
|
|
5567
5590
|
break;
|
|
5568
5591
|
case 'musicgen':
|
|
5592
|
+
// @ts-expect-error TS2339
|
|
5569
5593
|
init_normalized_config = getNormalizedConfig(config.decoder);
|
|
5570
5594
|
break;
|
|
5571
5595
|
case 'multi_modality':
|
|
5596
|
+
// @ts-expect-error TS2339
|
|
5572
5597
|
init_normalized_config = getNormalizedConfig(config.language_config);
|
|
5573
5598
|
break;
|
|
5574
5599
|
|
|
@@ -5689,6 +5714,7 @@ function getNormalizedConfig(config) {
|
|
|
5689
5714
|
break;
|
|
5690
5715
|
|
|
5691
5716
|
case 'vision-encoder-decoder':
|
|
5717
|
+
// @ts-expect-error TS2339
|
|
5692
5718
|
const decoderConfig = getNormalizedConfig(config.decoder);
|
|
5693
5719
|
|
|
5694
5720
|
const add_encoder_pkv = 'num_decoder_layers' in decoderConfig;
|
|
@@ -5932,7 +5958,7 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
5932
5958
|
|
|
5933
5959
|
|
|
5934
5960
|
|
|
5935
|
-
const VERSION = '3.2.
|
|
5961
|
+
const VERSION = '3.2.4';
|
|
5936
5962
|
|
|
5937
5963
|
// Check if various APIs are available (depends on environment)
|
|
5938
5964
|
const IS_BROWSER_ENV = typeof window !== "undefined" && typeof window.document !== "undefined";
|
|
@@ -8008,6 +8034,9 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
8008
8034
|
/* harmony export */ Dinov2ForImageClassification: () => (/* binding */ Dinov2ForImageClassification),
|
|
8009
8035
|
/* harmony export */ Dinov2Model: () => (/* binding */ Dinov2Model),
|
|
8010
8036
|
/* harmony export */ Dinov2PreTrainedModel: () => (/* binding */ Dinov2PreTrainedModel),
|
|
8037
|
+
/* harmony export */ Dinov2WithRegistersForImageClassification: () => (/* binding */ Dinov2WithRegistersForImageClassification),
|
|
8038
|
+
/* harmony export */ Dinov2WithRegistersModel: () => (/* binding */ Dinov2WithRegistersModel),
|
|
8039
|
+
/* harmony export */ Dinov2WithRegistersPreTrainedModel: () => (/* binding */ Dinov2WithRegistersPreTrainedModel),
|
|
8011
8040
|
/* harmony export */ DistilBertForMaskedLM: () => (/* binding */ DistilBertForMaskedLM),
|
|
8012
8041
|
/* harmony export */ DistilBertForQuestionAnswering: () => (/* binding */ DistilBertForQuestionAnswering),
|
|
8013
8042
|
/* harmony export */ DistilBertForSequenceClassification: () => (/* binding */ DistilBertForSequenceClassification),
|
|
@@ -8591,8 +8620,11 @@ async function getSession(pretrained_model_name_or_path, fileName, options) {
|
|
|
8591
8620
|
} else if (session_options.externalData !== undefined) {
|
|
8592
8621
|
externalDataPromises = session_options.externalData.map(async (ext) => {
|
|
8593
8622
|
// if the external data is a string, fetch the file and replace the string with its content
|
|
8623
|
+
// @ts-expect-error TS2339
|
|
8594
8624
|
if (typeof ext.data === "string") {
|
|
8625
|
+
// @ts-expect-error TS2339
|
|
8595
8626
|
const ext_buffer = await (0,_utils_hub_js__WEBPACK_IMPORTED_MODULE_5__.getModelFile)(pretrained_model_name_or_path, ext.data, true, options);
|
|
8627
|
+
// @ts-expect-error TS2698
|
|
8596
8628
|
return { ...ext, data: ext_buffer };
|
|
8597
8629
|
}
|
|
8598
8630
|
return ext;
|
|
@@ -9840,6 +9872,7 @@ class PreTrainedModel extends _utils_generic_js__WEBPACK_IMPORTED_MODULE_3__.Cal
|
|
|
9840
9872
|
if (this.config.model_type === 'musicgen') {
|
|
9841
9873
|
// Custom logic (TODO: move to Musicgen class)
|
|
9842
9874
|
decoder_input_ids = Array.from({
|
|
9875
|
+
// @ts-expect-error TS2339
|
|
9843
9876
|
length: batch_size * this.config.decoder.num_codebooks
|
|
9844
9877
|
}, () => [decoder_start_token_id]);
|
|
9845
9878
|
|
|
@@ -10169,11 +10202,13 @@ class PreTrainedModel extends _utils_generic_js__WEBPACK_IMPORTED_MODULE_3__.Cal
|
|
|
10169
10202
|
async encode_image({ pixel_values }) {
|
|
10170
10203
|
// image_inputs === { pixel_values }
|
|
10171
10204
|
const features = (await sessionRun(this.sessions['vision_encoder'], { pixel_values })).image_features;
|
|
10205
|
+
// @ts-expect-error TS2339
|
|
10172
10206
|
if (!this.config.num_image_tokens) {
|
|
10173
10207
|
console.warn(
|
|
10174
10208
|
'The number of image tokens was not set in the model configuration. ' +
|
|
10175
10209
|
`Setting it to the number of features detected by the vision encoder (${features.dims[1]}).`
|
|
10176
10210
|
)
|
|
10211
|
+
// @ts-expect-error TS2339
|
|
10177
10212
|
this.config.num_image_tokens = features.dims[1];
|
|
10178
10213
|
}
|
|
10179
10214
|
return features;
|
|
@@ -11601,6 +11636,7 @@ class WhisperForConditionalGeneration extends WhisperPreTrainedModel {
|
|
|
11601
11636
|
|
|
11602
11637
|
if (generation_config.return_token_timestamps) {
|
|
11603
11638
|
outputs["token_timestamps"] = this._extract_token_timestamps(
|
|
11639
|
+
// @ts-expect-error TS2345
|
|
11604
11640
|
outputs,
|
|
11605
11641
|
generation_config.alignment_heads,
|
|
11606
11642
|
generation_config.num_frames,
|
|
@@ -11636,6 +11672,7 @@ class WhisperForConditionalGeneration extends WhisperPreTrainedModel {
|
|
|
11636
11672
|
);
|
|
11637
11673
|
}
|
|
11638
11674
|
|
|
11675
|
+
// @ts-expect-error TS2339
|
|
11639
11676
|
let median_filter_width = this.config.median_filter_width;
|
|
11640
11677
|
if (median_filter_width === undefined) {
|
|
11641
11678
|
console.warn("Model config has no `median_filter_width`, using default value of 7.")
|
|
@@ -11646,6 +11683,7 @@ class WhisperForConditionalGeneration extends WhisperPreTrainedModel {
|
|
|
11646
11683
|
const batch = generate_outputs.cross_attentions;
|
|
11647
11684
|
// Create a list with `decoder_layers` elements, each a tensor of shape
|
|
11648
11685
|
// (batch size, attention_heads, output length, input length).
|
|
11686
|
+
// @ts-expect-error TS2339
|
|
11649
11687
|
const cross_attentions = Array.from({ length: this.config.decoder_layers },
|
|
11650
11688
|
// Concatenate the cross attentions for each layer across sequence length dimension.
|
|
11651
11689
|
(_, i) => (0,_utils_tensor_js__WEBPACK_IMPORTED_MODULE_9__.cat)(batch.map(x => x[i]), 2)
|
|
@@ -11742,7 +11780,7 @@ class MoonshinePreTrainedModel extends PreTrainedModel {
|
|
|
11742
11780
|
*/
|
|
11743
11781
|
class MoonshineModel extends MoonshinePreTrainedModel { }
|
|
11744
11782
|
|
|
11745
|
-
class MoonshineForConditionalGeneration extends MoonshinePreTrainedModel { }
|
|
11783
|
+
class MoonshineForConditionalGeneration extends MoonshinePreTrainedModel { }
|
|
11746
11784
|
//////////////////////////////////////////////////
|
|
11747
11785
|
|
|
11748
11786
|
|
|
@@ -11789,6 +11827,7 @@ class LlavaForConditionalGeneration extends LlavaPreTrainedModel {
|
|
|
11789
11827
|
attention_mask,
|
|
11790
11828
|
}) {
|
|
11791
11829
|
|
|
11830
|
+
// @ts-expect-error TS2339
|
|
11792
11831
|
const image_token_index = this.config.image_token_index;
|
|
11793
11832
|
|
|
11794
11833
|
const idsList = input_ids.tolist();
|
|
@@ -12142,9 +12181,9 @@ class CLIPTextModel extends CLIPPreTrainedModel {
|
|
|
12142
12181
|
/** @type {typeof PreTrainedModel.from_pretrained} */
|
|
12143
12182
|
static async from_pretrained(pretrained_model_name_or_path, options = {}) {
|
|
12144
12183
|
return super.from_pretrained(pretrained_model_name_or_path, {
|
|
12145
|
-
// Update default model file name if not provided
|
|
12146
|
-
model_file_name: 'text_model',
|
|
12147
12184
|
...options,
|
|
12185
|
+
// Update default model file name if not provided
|
|
12186
|
+
model_file_name: options.model_file_name ?? 'text_model',
|
|
12148
12187
|
});
|
|
12149
12188
|
}
|
|
12150
12189
|
}
|
|
@@ -12179,9 +12218,9 @@ class CLIPTextModelWithProjection extends CLIPPreTrainedModel {
|
|
|
12179
12218
|
/** @type {typeof PreTrainedModel.from_pretrained} */
|
|
12180
12219
|
static async from_pretrained(pretrained_model_name_or_path, options = {}) {
|
|
12181
12220
|
return super.from_pretrained(pretrained_model_name_or_path, {
|
|
12182
|
-
// Update default model file name if not provided
|
|
12183
|
-
model_file_name: 'text_model',
|
|
12184
12221
|
...options,
|
|
12222
|
+
// Update default model file name if not provided
|
|
12223
|
+
model_file_name: options.model_file_name ?? 'text_model',
|
|
12185
12224
|
});
|
|
12186
12225
|
}
|
|
12187
12226
|
}
|
|
@@ -12193,9 +12232,9 @@ class CLIPVisionModel extends CLIPPreTrainedModel {
|
|
|
12193
12232
|
/** @type {typeof PreTrainedModel.from_pretrained} */
|
|
12194
12233
|
static async from_pretrained(pretrained_model_name_or_path, options = {}) {
|
|
12195
12234
|
return super.from_pretrained(pretrained_model_name_or_path, {
|
|
12196
|
-
// Update default model file name if not provided
|
|
12197
|
-
model_file_name: 'vision_model',
|
|
12198
12235
|
...options,
|
|
12236
|
+
// Update default model file name if not provided
|
|
12237
|
+
model_file_name: options.model_file_name ?? 'vision_model',
|
|
12199
12238
|
});
|
|
12200
12239
|
}
|
|
12201
12240
|
}
|
|
@@ -12230,9 +12269,9 @@ class CLIPVisionModelWithProjection extends CLIPPreTrainedModel {
|
|
|
12230
12269
|
/** @type {typeof PreTrainedModel.from_pretrained} */
|
|
12231
12270
|
static async from_pretrained(pretrained_model_name_or_path, options = {}) {
|
|
12232
12271
|
return super.from_pretrained(pretrained_model_name_or_path, {
|
|
12233
|
-
// Update default model file name if not provided
|
|
12234
|
-
model_file_name: 'vision_model',
|
|
12235
12272
|
...options,
|
|
12273
|
+
// Update default model file name if not provided
|
|
12274
|
+
model_file_name: options.model_file_name ?? 'vision_model',
|
|
12236
12275
|
});
|
|
12237
12276
|
}
|
|
12238
12277
|
}
|
|
@@ -12318,9 +12357,9 @@ class SiglipTextModel extends SiglipPreTrainedModel {
|
|
|
12318
12357
|
/** @type {typeof PreTrainedModel.from_pretrained} */
|
|
12319
12358
|
static async from_pretrained(pretrained_model_name_or_path, options = {}) {
|
|
12320
12359
|
return super.from_pretrained(pretrained_model_name_or_path, {
|
|
12321
|
-
// Update default model file name if not provided
|
|
12322
|
-
model_file_name: 'text_model',
|
|
12323
12360
|
...options,
|
|
12361
|
+
// Update default model file name if not provided
|
|
12362
|
+
model_file_name: options.model_file_name ?? 'text_model',
|
|
12324
12363
|
});
|
|
12325
12364
|
}
|
|
12326
12365
|
}
|
|
@@ -12355,9 +12394,9 @@ class SiglipVisionModel extends CLIPPreTrainedModel {
|
|
|
12355
12394
|
/** @type {typeof PreTrainedModel.from_pretrained} */
|
|
12356
12395
|
static async from_pretrained(pretrained_model_name_or_path, options = {}) {
|
|
12357
12396
|
return super.from_pretrained(pretrained_model_name_or_path, {
|
|
12358
|
-
// Update default model file name if not provided
|
|
12359
|
-
model_file_name: 'vision_model',
|
|
12360
12397
|
...options,
|
|
12398
|
+
// Update default model file name if not provided
|
|
12399
|
+
model_file_name: options.model_file_name ?? 'vision_model',
|
|
12361
12400
|
});
|
|
12362
12401
|
}
|
|
12363
12402
|
}
|
|
@@ -12414,9 +12453,9 @@ class JinaCLIPTextModel extends JinaCLIPPreTrainedModel {
|
|
|
12414
12453
|
/** @type {typeof PreTrainedModel.from_pretrained} */
|
|
12415
12454
|
static async from_pretrained(pretrained_model_name_or_path, options = {}) {
|
|
12416
12455
|
return super.from_pretrained(pretrained_model_name_or_path, {
|
|
12417
|
-
// Update default model file name if not provided
|
|
12418
|
-
model_file_name: 'text_model',
|
|
12419
12456
|
...options,
|
|
12457
|
+
// Update default model file name if not provided
|
|
12458
|
+
model_file_name: options.model_file_name ?? 'text_model',
|
|
12420
12459
|
});
|
|
12421
12460
|
}
|
|
12422
12461
|
}
|
|
@@ -12425,9 +12464,9 @@ class JinaCLIPVisionModel extends JinaCLIPPreTrainedModel {
|
|
|
12425
12464
|
/** @type {typeof PreTrainedModel.from_pretrained} */
|
|
12426
12465
|
static async from_pretrained(pretrained_model_name_or_path, options = {}) {
|
|
12427
12466
|
return super.from_pretrained(pretrained_model_name_or_path, {
|
|
12428
|
-
// Update default model file name if not provided
|
|
12429
|
-
model_file_name: 'vision_model',
|
|
12430
12467
|
...options,
|
|
12468
|
+
// Update default model file name if not provided
|
|
12469
|
+
model_file_name: options.model_file_name ?? 'vision_model',
|
|
12431
12470
|
});
|
|
12432
12471
|
}
|
|
12433
12472
|
}
|
|
@@ -12774,6 +12813,7 @@ class Qwen2VLForConditionalGeneration extends Qwen2VLPreTrainedModel {
|
|
|
12774
12813
|
const image_nums = vision_tokens.filter(x => x == image_token_id).length;
|
|
12775
12814
|
const video_nums = vision_tokens.filter(x => x == video_token_id).length;
|
|
12776
12815
|
|
|
12816
|
+
/** @type {number[][]} */
|
|
12777
12817
|
let llm_pos_ids_list = [];
|
|
12778
12818
|
let st = 0;
|
|
12779
12819
|
let remain_images = image_nums;
|
|
@@ -12843,6 +12883,7 @@ class Qwen2VLForConditionalGeneration extends Qwen2VLPreTrainedModel {
|
|
|
12843
12883
|
// NOTE: Each item in llm_pos_ids_list is an array of shape (3, text_len),
|
|
12844
12884
|
// meaning to perform concatenation along dim=1, we can do the following:
|
|
12845
12885
|
const num_items = llm_pos_ids_list.reduce((acc, x) => acc + x.length, 0);
|
|
12886
|
+
/** @type {number[]} */
|
|
12846
12887
|
const llm_positions = new Array(num_items);
|
|
12847
12888
|
let index = 0;
|
|
12848
12889
|
for (let x = 0; x < 3; ++x) {
|
|
@@ -12883,9 +12924,10 @@ class Qwen2VLForConditionalGeneration extends Qwen2VLPreTrainedModel {
|
|
|
12883
12924
|
{ length: 3 * data.length },
|
|
12884
12925
|
(_, i) => data[i % data.length]
|
|
12885
12926
|
);
|
|
12927
|
+
/** @type {bigint[]} */
|
|
12886
12928
|
const mrope_position_deltas = Array.from(
|
|
12887
12929
|
{ length: dims[0] },
|
|
12888
|
-
(_, i) => (0,_utils_maths_js__WEBPACK_IMPORTED_MODULE_11__.max)(data.subarray(dims[1] * i, dims[1] * (i + 1)))[0] +
|
|
12930
|
+
(_, i) => (0,_utils_maths_js__WEBPACK_IMPORTED_MODULE_11__.max)(data.subarray(dims[1] * i, dims[1] * (i + 1)))[0] + 1n + BigInt(dims[1])
|
|
12889
12931
|
);
|
|
12890
12932
|
|
|
12891
12933
|
return [
|
|
@@ -13456,7 +13498,7 @@ class DPTModel extends DPTPreTrainedModel { }
|
|
|
13456
13498
|
*
|
|
13457
13499
|
* **Example:** Depth estimation w/ `Xenova/dpt-hybrid-midas`.
|
|
13458
13500
|
* ```javascript
|
|
13459
|
-
* import { DPTForDepthEstimation, AutoProcessor, RawImage,
|
|
13501
|
+
* import { DPTForDepthEstimation, AutoProcessor, RawImage, interpolate_4d } from '@huggingface/transformers';
|
|
13460
13502
|
*
|
|
13461
13503
|
* // Load model and processor
|
|
13462
13504
|
* const model_id = 'Xenova/dpt-hybrid-midas';
|
|
@@ -13465,7 +13507,7 @@ class DPTModel extends DPTPreTrainedModel { }
|
|
|
13465
13507
|
*
|
|
13466
13508
|
* // Load image from URL
|
|
13467
13509
|
* const url = 'http://images.cocodataset.org/val2017/000000039769.jpg';
|
|
13468
|
-
* const image = await RawImage.
|
|
13510
|
+
* const image = await RawImage.read(url);
|
|
13469
13511
|
*
|
|
13470
13512
|
* // Prepare image for the model
|
|
13471
13513
|
* const inputs = await processor(image);
|
|
@@ -13474,10 +13516,15 @@ class DPTModel extends DPTPreTrainedModel { }
|
|
|
13474
13516
|
* const { predicted_depth } = await model(inputs);
|
|
13475
13517
|
*
|
|
13476
13518
|
* // Interpolate to original size
|
|
13477
|
-
* const prediction =
|
|
13519
|
+
* const prediction = (await interpolate_4d(predicted_depth.unsqueeze(1), {
|
|
13520
|
+
* size: image.size.reverse(),
|
|
13521
|
+
* mode: 'bilinear',
|
|
13522
|
+
* })).squeeze(1);
|
|
13478
13523
|
*
|
|
13479
13524
|
* // Visualize the prediction
|
|
13480
|
-
* const
|
|
13525
|
+
* const min = prediction.min().item();
|
|
13526
|
+
* const max = prediction.max().item();
|
|
13527
|
+
* const formatted = prediction.sub_(min).div_(max - min).mul_(255).to('uint8');
|
|
13481
13528
|
* const depth = RawImage.fromTensor(formatted);
|
|
13482
13529
|
* // RawImage {
|
|
13483
13530
|
* // data: Uint8Array(307200) [ 85, 85, 84, ... ],
|
|
@@ -13527,11 +13574,7 @@ class GLPNPreTrainedModel extends PreTrainedModel { }
|
|
|
13527
13574
|
class GLPNModel extends GLPNPreTrainedModel { }
|
|
13528
13575
|
|
|
13529
13576
|
/**
|
|
13530
|
-
*
|
|
13531
|
-
*
|
|
13532
|
-
* **Example:** Depth estimation w/ `Xenova/glpn-kitti`.
|
|
13533
|
-
* ```javascript
|
|
13534
|
-
* import { GLPNForDepthEstimation, AutoProcessor, RawImage, interpolate, max } from '@huggingface/transformers';
|
|
13577
|
+
* import { GLPNForDepthEstimation, AutoProcessor, RawImage, interpolate_4d } from '@huggingface/transformers';
|
|
13535
13578
|
*
|
|
13536
13579
|
* // Load model and processor
|
|
13537
13580
|
* const model_id = 'Xenova/glpn-kitti';
|
|
@@ -13540,7 +13583,7 @@ class GLPNModel extends GLPNPreTrainedModel { }
|
|
|
13540
13583
|
*
|
|
13541
13584
|
* // Load image from URL
|
|
13542
13585
|
* const url = 'http://images.cocodataset.org/val2017/000000039769.jpg';
|
|
13543
|
-
* const image = await RawImage.
|
|
13586
|
+
* const image = await RawImage.read(url);
|
|
13544
13587
|
*
|
|
13545
13588
|
* // Prepare image for the model
|
|
13546
13589
|
* const inputs = await processor(image);
|
|
@@ -13549,13 +13592,18 @@ class GLPNModel extends GLPNPreTrainedModel { }
|
|
|
13549
13592
|
* const { predicted_depth } = await model(inputs);
|
|
13550
13593
|
*
|
|
13551
13594
|
* // Interpolate to original size
|
|
13552
|
-
* const prediction =
|
|
13595
|
+
* const prediction = (await interpolate_4d(predicted_depth.unsqueeze(1), {
|
|
13596
|
+
* size: image.size.reverse(),
|
|
13597
|
+
* mode: 'bilinear',
|
|
13598
|
+
* })).squeeze(1);
|
|
13553
13599
|
*
|
|
13554
13600
|
* // Visualize the prediction
|
|
13555
|
-
* const
|
|
13601
|
+
* const min = prediction.min().item();
|
|
13602
|
+
* const max = prediction.max().item();
|
|
13603
|
+
* const formatted = prediction.sub_(min).div_(max - min).mul_(255).to('uint8');
|
|
13556
13604
|
* const depth = RawImage.fromTensor(formatted);
|
|
13557
13605
|
* // RawImage {
|
|
13558
|
-
* // data: Uint8Array(307200) [
|
|
13606
|
+
* // data: Uint8Array(307200) [ 85, 85, 84, ... ],
|
|
13559
13607
|
* // width: 640,
|
|
13560
13608
|
* // height: 480,
|
|
13561
13609
|
* // channels: 1
|
|
@@ -13710,6 +13758,26 @@ class Dinov2ForImageClassification extends Dinov2PreTrainedModel {
|
|
|
13710
13758
|
}
|
|
13711
13759
|
//////////////////////////////////////////////////
|
|
13712
13760
|
|
|
13761
|
+
//////////////////////////////////////////////////
|
|
13762
|
+
class Dinov2WithRegistersPreTrainedModel extends PreTrainedModel { }
|
|
13763
|
+
|
|
13764
|
+
/**
|
|
13765
|
+
* The bare Dinov2WithRegisters Model transformer outputting raw hidden-states without any specific head on top.
|
|
13766
|
+
*/
|
|
13767
|
+
class Dinov2WithRegistersModel extends Dinov2WithRegistersPreTrainedModel { }
|
|
13768
|
+
|
|
13769
|
+
/**
|
|
13770
|
+
* Dinov2WithRegisters Model transformer with an image classification head on top (a linear layer on top of the final hidden state of the [CLS] token) e.g. for ImageNet.
|
|
13771
|
+
*/
|
|
13772
|
+
class Dinov2WithRegistersForImageClassification extends Dinov2WithRegistersPreTrainedModel {
|
|
13773
|
+
/**
|
|
13774
|
+
* @param {any} model_inputs
|
|
13775
|
+
*/
|
|
13776
|
+
async _call(model_inputs) {
|
|
13777
|
+
return new SequenceClassifierOutput(await super._call(model_inputs));
|
|
13778
|
+
}
|
|
13779
|
+
}
|
|
13780
|
+
//////////////////////////////////////////////////
|
|
13713
13781
|
|
|
13714
13782
|
//////////////////////////////////////////////////
|
|
13715
13783
|
class YolosPreTrainedModel extends PreTrainedModel { }
|
|
@@ -14502,10 +14570,12 @@ class SpeechT5ForTextToSpeech extends SpeechT5PreTrainedModel {
|
|
|
14502
14570
|
|
|
14503
14571
|
const { encoder_outputs, encoder_attention_mask } = await encoderForward(this, model_inputs);
|
|
14504
14572
|
|
|
14573
|
+
// @ts-expect-error TS2339
|
|
14505
14574
|
const r = encoder_outputs.dims[1] / this.config.reduction_factor;
|
|
14506
14575
|
const maxlen = Math.floor(r * maxlenratio);
|
|
14507
14576
|
const minlen = Math.floor(r * minlenratio);
|
|
14508
14577
|
|
|
14578
|
+
// @ts-expect-error TS2339
|
|
14509
14579
|
const num_mel_bins = this.config.num_mel_bins;
|
|
14510
14580
|
|
|
14511
14581
|
let spectrogramParts = [];
|
|
@@ -14659,9 +14729,9 @@ class ClapTextModelWithProjection extends ClapPreTrainedModel {
|
|
|
14659
14729
|
/** @type {typeof PreTrainedModel.from_pretrained} */
|
|
14660
14730
|
static async from_pretrained(pretrained_model_name_or_path, options = {}) {
|
|
14661
14731
|
return super.from_pretrained(pretrained_model_name_or_path, {
|
|
14662
|
-
// Update default model file name if not provided
|
|
14663
|
-
model_file_name: 'text_model',
|
|
14664
14732
|
...options,
|
|
14733
|
+
// Update default model file name if not provided
|
|
14734
|
+
model_file_name: options.model_file_name ?? 'text_model',
|
|
14665
14735
|
});
|
|
14666
14736
|
}
|
|
14667
14737
|
}
|
|
@@ -14696,9 +14766,9 @@ class ClapAudioModelWithProjection extends ClapPreTrainedModel {
|
|
|
14696
14766
|
/** @type {typeof PreTrainedModel.from_pretrained} */
|
|
14697
14767
|
static async from_pretrained(pretrained_model_name_or_path, options = {}) {
|
|
14698
14768
|
return super.from_pretrained(pretrained_model_name_or_path, {
|
|
14699
|
-
// Update default model file name if not provided
|
|
14700
|
-
model_file_name: 'audio_model',
|
|
14701
14769
|
...options,
|
|
14770
|
+
// Update default model file name if not provided
|
|
14771
|
+
model_file_name: options.model_file_name ?? 'audio_model',
|
|
14702
14772
|
});
|
|
14703
14773
|
}
|
|
14704
14774
|
}
|
|
@@ -14870,11 +14940,13 @@ class MusicgenForConditionalGeneration extends PreTrainedModel { // NOTE: not Mu
|
|
|
14870
14940
|
*/
|
|
14871
14941
|
_apply_and_filter_by_delay_pattern_mask(outputs) {
|
|
14872
14942
|
const [bs_x_codebooks, seqLength] = outputs.dims;
|
|
14943
|
+
// @ts-expect-error TS2339
|
|
14873
14944
|
const num_codebooks = this.config.decoder.num_codebooks;
|
|
14874
14945
|
const upperBound = (seqLength - num_codebooks);
|
|
14875
14946
|
|
|
14876
14947
|
let newDataSize = 0;
|
|
14877
14948
|
for (let i = 0; i < outputs.size; ++i) {
|
|
14949
|
+
// @ts-expect-error TS2339
|
|
14878
14950
|
if (outputs.data[i] === this.config.decoder.pad_token_id) {
|
|
14879
14951
|
continue;
|
|
14880
14952
|
}
|
|
@@ -14904,7 +14976,9 @@ class MusicgenForConditionalGeneration extends PreTrainedModel { // NOTE: not Mu
|
|
|
14904
14976
|
let clonedInputIds = structuredClone(input_ids);
|
|
14905
14977
|
for (let i = 0; i < clonedInputIds.length; ++i) {
|
|
14906
14978
|
for (let j = 0; j < clonedInputIds[i].length; ++j) {
|
|
14979
|
+
// @ts-expect-error TS2339
|
|
14907
14980
|
if ((i % this.config.decoder.num_codebooks) >= j) {
|
|
14981
|
+
// @ts-expect-error TS2339
|
|
14908
14982
|
clonedInputIds[i][j] = BigInt(this.config.decoder.pad_token_id);
|
|
14909
14983
|
}
|
|
14910
14984
|
}
|
|
@@ -15061,6 +15135,9 @@ class MultiModalityCausalLM extends MultiModalityPreTrainedModel {
|
|
|
15061
15135
|
'past_key_values',
|
|
15062
15136
|
];
|
|
15063
15137
|
|
|
15138
|
+
/**
|
|
15139
|
+
* @param {ConstructorParameters<typeof MultiModalityPreTrainedModel>} args
|
|
15140
|
+
*/
|
|
15064
15141
|
constructor(...args) {
|
|
15065
15142
|
super(...args);
|
|
15066
15143
|
|
|
@@ -15339,6 +15416,7 @@ const MODEL_MAPPING_NAMES_ENCODER_ONLY = new Map([
|
|
|
15339
15416
|
['convnext', ['ConvNextModel', ConvNextModel]],
|
|
15340
15417
|
['convnextv2', ['ConvNextV2Model', ConvNextV2Model]],
|
|
15341
15418
|
['dinov2', ['Dinov2Model', Dinov2Model]],
|
|
15419
|
+
['dinov2_with_registers', ['Dinov2WithRegistersModel', Dinov2WithRegistersModel]],
|
|
15342
15420
|
['resnet', ['ResNetModel', ResNetModel]],
|
|
15343
15421
|
['swin', ['SwinModel', SwinModel]],
|
|
15344
15422
|
['swin2sr', ['Swin2SRModel', Swin2SRModel]],
|
|
@@ -15584,6 +15662,7 @@ const MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING_NAMES = new Map([
|
|
|
15584
15662
|
['convnext', ['ConvNextForImageClassification', ConvNextForImageClassification]],
|
|
15585
15663
|
['convnextv2', ['ConvNextV2ForImageClassification', ConvNextV2ForImageClassification]],
|
|
15586
15664
|
['dinov2', ['Dinov2ForImageClassification', Dinov2ForImageClassification]],
|
|
15665
|
+
['dinov2_with_registers', ['Dinov2WithRegistersForImageClassification', Dinov2WithRegistersForImageClassification]],
|
|
15587
15666
|
['resnet', ['ResNetForImageClassification', ResNetForImageClassification]],
|
|
15588
15667
|
['swin', ['SwinForImageClassification', SwinForImageClassification]],
|
|
15589
15668
|
['segformer', ['SegformerForImageClassification', SegformerForImageClassification]],
|
|
@@ -16027,10 +16106,17 @@ class SequenceClassifierOutput extends ModelOutput {
|
|
|
16027
16106
|
/**
|
|
16028
16107
|
* @param {Object} output The output of the model.
|
|
16029
16108
|
* @param {Tensor} output.logits classification (or regression if config.num_labels==1) scores (before SoftMax).
|
|
16109
|
+
* @param {Record<string, Tensor>} [output.attentions] Object of `torch.FloatTensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length, sequence_length)`.
|
|
16110
|
+
* Attentions weights after the attention softmax, used to compute the weighted average in the self-attention heads.
|
|
16030
16111
|
*/
|
|
16031
|
-
constructor({ logits }) {
|
|
16112
|
+
constructor({ logits, ...attentions }) {
|
|
16032
16113
|
super();
|
|
16033
16114
|
this.logits = logits;
|
|
16115
|
+
const attentions_list = Object.values(attentions);
|
|
16116
|
+
if (attentions_list.length > 0) {
|
|
16117
|
+
// Only set attentions if they are not empty
|
|
16118
|
+
this.attentions = attentions_list;
|
|
16119
|
+
}
|
|
16034
16120
|
}
|
|
16035
16121
|
}
|
|
16036
16122
|
|
|
@@ -16288,22 +16374,6 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
16288
16374
|
|
|
16289
16375
|
class AutoFeatureExtractor {
|
|
16290
16376
|
|
|
16291
|
-
/**
|
|
16292
|
-
* Instantiate one of the feature extractor classes of the library from a pretrained model.
|
|
16293
|
-
*
|
|
16294
|
-
* The processor class to instantiate is selected based on the `feature_extractor_type` property of
|
|
16295
|
-
* the config object (either passed as an argument or loaded from `pretrained_model_name_or_path` if possible)
|
|
16296
|
-
*
|
|
16297
|
-
* @param {string} pretrained_model_name_or_path The name or path of the pretrained model. Can be either:
|
|
16298
|
-
* - A string, the *model id* of a pretrained processor hosted inside a model repo on huggingface.co.
|
|
16299
|
-
* Valid model ids can be located at the root-level, like `bert-base-uncased`, or namespaced under a
|
|
16300
|
-
* user or organization name, like `dbmdz/bert-base-german-cased`.
|
|
16301
|
-
* - A path to a *directory* containing processor files, e.g., `./my_model_directory/`.
|
|
16302
|
-
* @param {import('../../utils/hub.js').PretrainedOptions} options Additional options for loading the processor.
|
|
16303
|
-
*
|
|
16304
|
-
* @returns {Promise<AllFeatureExtractors.ImageProcessor>} A new instance of the Processor class.
|
|
16305
|
-
*/
|
|
16306
|
-
|
|
16307
16377
|
/** @type {typeof FeatureExtractor.from_pretrained} */
|
|
16308
16378
|
static async from_pretrained(pretrained_model_name_or_path, options={}) {
|
|
16309
16379
|
|
|
@@ -16432,22 +16502,6 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
16432
16502
|
*/
|
|
16433
16503
|
class AutoProcessor {
|
|
16434
16504
|
|
|
16435
|
-
/**
|
|
16436
|
-
* Instantiate one of the processor classes of the library from a pretrained model.
|
|
16437
|
-
*
|
|
16438
|
-
* The processor class to instantiate is selected based on the `image_processor_type` (or `feature_extractor_type`; legacy)
|
|
16439
|
-
* property of the config object (either passed as an argument or loaded from `pretrained_model_name_or_path` if possible)
|
|
16440
|
-
*
|
|
16441
|
-
* @param {string} pretrained_model_name_or_path The name or path of the pretrained model. Can be either:
|
|
16442
|
-
* - A string, the *model id* of a pretrained processor hosted inside a model repo on huggingface.co.
|
|
16443
|
-
* Valid model ids can be located at the root-level, like `bert-base-uncased`, or namespaced under a
|
|
16444
|
-
* user or organization name, like `dbmdz/bert-base-german-cased`.
|
|
16445
|
-
* - A path to a *directory* containing processor files, e.g., `./my_model_directory/`.
|
|
16446
|
-
* @param {import('../../utils/hub.js').PretrainedOptions} options Additional options for loading the processor.
|
|
16447
|
-
*
|
|
16448
|
-
* @returns {Promise<Processor>} A new instance of the Processor class.
|
|
16449
|
-
*/
|
|
16450
|
-
|
|
16451
16505
|
/** @type {typeof Processor.from_pretrained} */
|
|
16452
16506
|
static async from_pretrained(pretrained_model_name_or_path, options={}) {
|
|
16453
16507
|
|
|
@@ -16771,6 +16825,7 @@ class ConvNextImageProcessor extends _base_image_processors_utils_js__WEBPACK_IM
|
|
|
16771
16825
|
/**
|
|
16772
16826
|
* Percentage of the image to crop. Only has an effect if this.size < 384.
|
|
16773
16827
|
*/
|
|
16828
|
+
// @ts-expect-error TS2339
|
|
16774
16829
|
this.crop_pct = this.config.crop_pct ?? (224 / 256);
|
|
16775
16830
|
}
|
|
16776
16831
|
|
|
@@ -16978,6 +17033,7 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
16978
17033
|
class EfficientNetImageProcessor extends _base_image_processors_utils_js__WEBPACK_IMPORTED_MODULE_0__.ImageProcessor {
|
|
16979
17034
|
constructor(config) {
|
|
16980
17035
|
super(config);
|
|
17036
|
+
// @ts-expect-error TS2339
|
|
16981
17037
|
this.include_top = this.config.include_top ?? true;
|
|
16982
17038
|
if (this.include_top) {
|
|
16983
17039
|
this.image_std = this.image_std.map(x => x * x);
|
|
@@ -17061,8 +17117,11 @@ class Florence2Processor extends _base_processing_utils_js__WEBPACK_IMPORTED_MOD
|
|
|
17061
17117
|
super(config, components);
|
|
17062
17118
|
|
|
17063
17119
|
const {
|
|
17120
|
+
// @ts-expect-error TS2339
|
|
17064
17121
|
tasks_answer_post_processing_type,
|
|
17122
|
+
// @ts-expect-error TS2339
|
|
17065
17123
|
task_prompts_without_inputs,
|
|
17124
|
+
// @ts-expect-error TS2339
|
|
17066
17125
|
task_prompts_with_input,
|
|
17067
17126
|
} = this.image_processor.config;
|
|
17068
17127
|
|
|
@@ -17359,6 +17418,8 @@ class Idefics3ImageProcessor extends _base_image_processors_utils_js__WEBPACK_IM
|
|
|
17359
17418
|
|
|
17360
17419
|
const start_offset = i * pixel_attention_mask_stride + num_patches * h * w;
|
|
17361
17420
|
const end_offset = (i + 1) * pixel_attention_mask_stride;
|
|
17421
|
+
|
|
17422
|
+
// @ts-expect-error
|
|
17362
17423
|
pixel_attention_mask_data.fill(false, start_offset, end_offset);
|
|
17363
17424
|
}
|
|
17364
17425
|
}
|
|
@@ -17768,6 +17829,7 @@ class VLMImageProcessor extends _base_image_processors_utils_js__WEBPACK_IMPORTE
|
|
|
17768
17829
|
},
|
|
17769
17830
|
...config,
|
|
17770
17831
|
});
|
|
17832
|
+
// @ts-expect-error TS2339
|
|
17771
17833
|
this.constant_values = this.config.background_color.map(x => x * this.rescale_factor)
|
|
17772
17834
|
}
|
|
17773
17835
|
|
|
@@ -18216,6 +18278,8 @@ class MgpstrProcessor extends _base_processing_utils_js__WEBPACK_IMPORTED_MODULE
|
|
|
18216
18278
|
* - bpe_preds: The list of BPE decoded sentences.
|
|
18217
18279
|
* - wp_preds: The list of wp decoded sentences.
|
|
18218
18280
|
*/
|
|
18281
|
+
// @ts-expect-error The type of this method is not compatible with the one
|
|
18282
|
+
// in the base class. It might be a good idea to fix this.
|
|
18219
18283
|
batch_decode([char_logits, bpe_logits, wp_logits]) {
|
|
18220
18284
|
const [char_preds, char_scores] = this._decode_helper(char_logits, 'char');
|
|
18221
18285
|
const [bpe_preds, bpe_scores] = this._decode_helper(bpe_logits, 'bpe');
|
|
@@ -18609,6 +18673,7 @@ class PaliGemmaProcessor extends _base_processing_utils_js__WEBPACK_IMPORTED_MOD
|
|
|
18609
18673
|
}
|
|
18610
18674
|
|
|
18611
18675
|
const bos_token = this.tokenizer.bos_token;
|
|
18676
|
+
// @ts-expect-error TS2339
|
|
18612
18677
|
const image_seq_length = this.image_processor.config.image_seq_length;
|
|
18613
18678
|
let input_strings;
|
|
18614
18679
|
if (text.some((t) => t.includes(IMAGE_TOKEN))) {
|
|
@@ -18861,7 +18926,7 @@ class Phi3VProcessor extends _base_processing_utils_js__WEBPACK_IMPORTED_MODULE_
|
|
|
18861
18926
|
*
|
|
18862
18927
|
* @param {string|string[]} text
|
|
18863
18928
|
* @param {RawImage|RawImage[]} images
|
|
18864
|
-
* @param {
|
|
18929
|
+
* @param { { padding?: boolean, truncation?: boolean, num_crops?: number } | undefined } options
|
|
18865
18930
|
* @returns {Promise<any>}
|
|
18866
18931
|
*/
|
|
18867
18932
|
async _call(text, images = null, {
|
|
@@ -19048,6 +19113,7 @@ class PyAnnoteFeatureExtractor extends _base_feature_extraction_utils_js__WEBPAC
|
|
|
19048
19113
|
|
|
19049
19114
|
let current_speaker = -1;
|
|
19050
19115
|
for (let i = 0; i < scores.length; ++i) {
|
|
19116
|
+
/** @type {number[]} */
|
|
19051
19117
|
const probabilities = (0,_utils_maths_js__WEBPACK_IMPORTED_MODULE_2__.softmax)(scores[i]);
|
|
19052
19118
|
const [score, id] = (0,_utils_maths_js__WEBPACK_IMPORTED_MODULE_2__.max)(probabilities);
|
|
19053
19119
|
const [start, end] = [i, i + 1];
|
|
@@ -19235,6 +19301,7 @@ class Qwen2VLProcessor extends _base_processing_utils_js__WEBPACK_IMPORTED_MODUL
|
|
|
19235
19301
|
}
|
|
19236
19302
|
|
|
19237
19303
|
if (image_grid_thw) {
|
|
19304
|
+
// @ts-expect-error TS2551
|
|
19238
19305
|
let merge_length = this.image_processor.config.merge_size ** 2;
|
|
19239
19306
|
let index = 0;
|
|
19240
19307
|
|
|
@@ -19726,8 +19793,8 @@ class SeamlessM4TFeatureExtractor extends _base_feature_extraction_utils_js__WEB
|
|
|
19726
19793
|
'int64',
|
|
19727
19794
|
new BigInt64Array(numPaddedFrames),
|
|
19728
19795
|
[1, numPaddedFrames],
|
|
19729
|
-
)
|
|
19730
|
-
padded_attention_mask.data.fill(1n, 0, num_frames);
|
|
19796
|
+
);
|
|
19797
|
+
/** @type {BigInt64Array} */ (padded_attention_mask.data).fill(1n, 0, num_frames);
|
|
19731
19798
|
}
|
|
19732
19799
|
}
|
|
19733
19800
|
}
|
|
@@ -20540,7 +20607,7 @@ class WhisperFeatureExtractor extends _base_feature_extraction_utils_js__WEBPACK
|
|
|
20540
20607
|
)
|
|
20541
20608
|
|
|
20542
20609
|
const data = features.data;
|
|
20543
|
-
const maxValue = (0,_utils_maths_js__WEBPACK_IMPORTED_MODULE_3__.max)(data)[0];
|
|
20610
|
+
const maxValue = (0,_utils_maths_js__WEBPACK_IMPORTED_MODULE_3__.max)(/** @type {Float32Array} */(data))[0];
|
|
20544
20611
|
|
|
20545
20612
|
for (let i = 0; i < data.length; ++i) {
|
|
20546
20613
|
data[i] = (Math.max(data[i], maxValue - 8.0) + 4.0) / 4.0;
|
|
@@ -20803,6 +20870,16 @@ class TensorOpRegistry {
|
|
|
20803
20870
|
// executionProviders: ['webgpu'],
|
|
20804
20871
|
};
|
|
20805
20872
|
|
|
20873
|
+
static get nearest_interpolate_4d() {
|
|
20874
|
+
if (!this._nearest_interpolate_4d) {
|
|
20875
|
+
this._nearest_interpolate_4d = wrap(
|
|
20876
|
+
[8, 10, 18, 0, 58, 129, 1, 10, 41, 10, 1, 120, 10, 0, 10, 0, 10, 1, 115, 18, 1, 121, 34, 6, 82, 101, 115, 105, 122, 101, 42, 18, 10, 4, 109, 111, 100, 101, 34, 7, 110, 101, 97, 114, 101, 115, 116, 160, 1, 3, 18, 1, 114, 90, 31, 10, 1, 120, 18, 26, 10, 24, 8, 1, 18, 20, 10, 3, 18, 1, 98, 10, 3, 18, 1, 99, 10, 3, 18, 1, 104, 10, 3, 18, 1, 119, 90, 15, 10, 1, 115, 18, 10, 10, 8, 8, 7, 18, 4, 10, 2, 8, 4, 98, 31, 10, 1, 121, 18, 26, 10, 24, 8, 1, 18, 20, 10, 3, 18, 1, 98, 10, 3, 18, 1, 99, 10, 3, 18, 1, 104, 10, 3, 18, 1, 119, 66, 2, 16, 21],
|
|
20877
|
+
this.session_options,
|
|
20878
|
+
'y',
|
|
20879
|
+
);
|
|
20880
|
+
}
|
|
20881
|
+
return this._nearest_interpolate_4d;
|
|
20882
|
+
}
|
|
20806
20883
|
static get bilinear_interpolate_4d() {
|
|
20807
20884
|
if (!this._bilinear_interpolate_4d) {
|
|
20808
20885
|
this._bilinear_interpolate_4d = wrap(
|
|
@@ -21177,6 +21254,7 @@ class TextClassificationPipeline extends (/** @type {new (options: TextPipelineC
|
|
|
21177
21254
|
|
|
21178
21255
|
// TODO: Use softmax tensor function
|
|
21179
21256
|
const function_to_apply =
|
|
21257
|
+
// @ts-expect-error TS2339
|
|
21180
21258
|
this.model.config.problem_type === 'multi_label_classification'
|
|
21181
21259
|
? batch => batch.sigmoid()
|
|
21182
21260
|
: batch => new _utils_tensor_js__WEBPACK_IMPORTED_MODULE_8__.Tensor(
|
|
@@ -21185,6 +21263,7 @@ class TextClassificationPipeline extends (/** @type {new (options: TextPipelineC
|
|
|
21185
21263
|
batch.dims,
|
|
21186
21264
|
); // single_label_classification (default)
|
|
21187
21265
|
|
|
21266
|
+
// @ts-expect-error TS2339
|
|
21188
21267
|
const id2label = this.model.config.id2label;
|
|
21189
21268
|
|
|
21190
21269
|
const toReturn = [];
|
|
@@ -21287,6 +21366,7 @@ class TokenClassificationPipeline extends (/** @type {new (options: TextPipeline
|
|
|
21287
21366
|
const outputs = await this.model(model_inputs)
|
|
21288
21367
|
|
|
21289
21368
|
const logits = outputs.logits;
|
|
21369
|
+
// @ts-expect-error TS2339
|
|
21290
21370
|
const id2label = this.model.config.id2label;
|
|
21291
21371
|
|
|
21292
21372
|
const toReturn = [];
|
|
@@ -21626,11 +21706,14 @@ class Text2TextGenerationPipeline extends (/** @type {new (options: TextPipeline
|
|
|
21626
21706
|
|
|
21627
21707
|
|
|
21628
21708
|
// Add global prefix, if present
|
|
21709
|
+
// @ts-expect-error TS2339
|
|
21629
21710
|
if (this.model.config.prefix) {
|
|
21711
|
+
// @ts-expect-error TS2339
|
|
21630
21712
|
texts = texts.map(x => this.model.config.prefix + x)
|
|
21631
21713
|
}
|
|
21632
21714
|
|
|
21633
21715
|
// Handle task specific params:
|
|
21716
|
+
// @ts-expect-error TS2339
|
|
21634
21717
|
const task_specific_params = this.model.config.task_specific_params
|
|
21635
21718
|
if (task_specific_params && task_specific_params[this.task]) {
|
|
21636
21719
|
// Add prefixes, if present
|
|
@@ -22369,6 +22452,7 @@ class AudioClassificationPipeline extends (/** @type {new (options: AudioPipelin
|
|
|
22369
22452
|
const sampling_rate = this.processor.feature_extractor.config.sampling_rate;
|
|
22370
22453
|
const preparedAudios = await prepareAudios(audio, sampling_rate);
|
|
22371
22454
|
|
|
22455
|
+
// @ts-expect-error TS2339
|
|
22372
22456
|
const id2label = this.model.config.id2label;
|
|
22373
22457
|
|
|
22374
22458
|
const toReturn = [];
|
|
@@ -22679,6 +22763,7 @@ class AutomaticSpeechRecognitionPipeline extends (/** @type {new (options: TextA
|
|
|
22679
22763
|
audio = [/** @type {AudioInput} */ (audio)];
|
|
22680
22764
|
}
|
|
22681
22765
|
|
|
22766
|
+
// @ts-expect-error TS2339
|
|
22682
22767
|
const time_precision = this.processor.feature_extractor.config.chunk_length / this.model.config.max_source_positions;
|
|
22683
22768
|
const hop_length = this.processor.feature_extractor.config.hop_length;
|
|
22684
22769
|
|
|
@@ -22744,7 +22829,9 @@ class AutomaticSpeechRecognitionPipeline extends (/** @type {new (options: TextA
|
|
|
22744
22829
|
|
|
22745
22830
|
// TODO: Right now we only get top beam
|
|
22746
22831
|
if (return_timestamps === 'word') {
|
|
22832
|
+
// @ts-expect-error TS2339
|
|
22747
22833
|
chunk.tokens = data.sequences.tolist()[0];
|
|
22834
|
+
// @ts-expect-error TS2339
|
|
22748
22835
|
chunk.token_timestamps = data.token_timestamps.tolist()[0].map(
|
|
22749
22836
|
(/** @type {number} */ x) => (0,_utils_maths_js__WEBPACK_IMPORTED_MODULE_6__.round)(x, 2)
|
|
22750
22837
|
);
|
|
@@ -22789,7 +22876,7 @@ class AutomaticSpeechRecognitionPipeline extends (/** @type {new (options: TextA
|
|
|
22789
22876
|
const max_new_tokens = Math.floor(aud.length / sampling_rate) * 6;
|
|
22790
22877
|
const outputs = await this.model.generate({ max_new_tokens, ...kwargs, ...inputs });
|
|
22791
22878
|
|
|
22792
|
-
const text = this.processor.batch_decode(outputs, { skip_special_tokens: true })[0];
|
|
22879
|
+
const text = this.processor.batch_decode(/** @type {Tensor} */(outputs), { skip_special_tokens: true })[0];
|
|
22793
22880
|
toReturn.push({ text });
|
|
22794
22881
|
}
|
|
22795
22882
|
return single ? toReturn[0] : toReturn;
|
|
@@ -22938,6 +23025,7 @@ class ImageClassificationPipeline extends (/** @type {new (options: ImagePipelin
|
|
|
22938
23025
|
const { pixel_values } = await this.processor(preparedImages);
|
|
22939
23026
|
const output = await this.model({ pixel_values });
|
|
22940
23027
|
|
|
23028
|
+
// @ts-expect-error TS2339
|
|
22941
23029
|
const id2label = this.model.config.id2label;
|
|
22942
23030
|
|
|
22943
23031
|
/** @type {ImageClassificationOutput[]} */
|
|
@@ -23052,6 +23140,7 @@ class ImageSegmentationPipeline extends (/** @type {new (options: ImagePipelineC
|
|
|
23052
23140
|
}
|
|
23053
23141
|
}
|
|
23054
23142
|
|
|
23143
|
+
// @ts-expect-error TS2339
|
|
23055
23144
|
const id2label = this.model.config.id2label;
|
|
23056
23145
|
|
|
23057
23146
|
/** @type {ImageSegmentationPipelineOutput[]} */
|
|
@@ -23278,6 +23367,7 @@ class ObjectDetectionPipeline extends (/** @type {new (options: ImagePipelineCon
|
|
|
23278
23367
|
const processed = this.processor.image_processor.post_process_object_detection(output, threshold, imageSizes);
|
|
23279
23368
|
|
|
23280
23369
|
// Add labels
|
|
23370
|
+
// @ts-expect-error TS2339
|
|
23281
23371
|
const id2label = this.model.config.id2label;
|
|
23282
23372
|
|
|
23283
23373
|
// Format output
|
|
@@ -23497,6 +23587,7 @@ class DocumentQuestionAnsweringPipeline extends (/** @type {new (options: TextIm
|
|
|
23497
23587
|
// Run model
|
|
23498
23588
|
const output = await this.model.generate({
|
|
23499
23589
|
inputs: pixel_values,
|
|
23590
|
+
// @ts-expect-error TS2339
|
|
23500
23591
|
max_length: this.model.config.decoder.max_position_embeddings,
|
|
23501
23592
|
decoder_input_ids,
|
|
23502
23593
|
...generate_kwargs,
|
|
@@ -23612,6 +23703,7 @@ class TextToAudioPipeline extends (/** @type {new (options: TextToAudioPipelineC
|
|
|
23612
23703
|
// Generate waveform
|
|
23613
23704
|
const { waveform } = await this.model(inputs);
|
|
23614
23705
|
|
|
23706
|
+
// @ts-expect-error TS2339
|
|
23615
23707
|
const sampling_rate = this.model.config.sampling_rate;
|
|
23616
23708
|
return {
|
|
23617
23709
|
audio: waveform.data,
|
|
@@ -23769,11 +23861,23 @@ class DepthEstimationPipeline extends (/** @type {new (options: ImagePipelineCon
|
|
|
23769
23861
|
|
|
23770
23862
|
const toReturn = [];
|
|
23771
23863
|
for (let i = 0; i < preparedImages.length; ++i) {
|
|
23772
|
-
const
|
|
23773
|
-
const
|
|
23864
|
+
const batch = predicted_depth[i];
|
|
23865
|
+
const [height, width] = batch.dims.slice(-2);
|
|
23866
|
+
const [new_width, new_height] = preparedImages[i].size;
|
|
23867
|
+
|
|
23868
|
+
// Interpolate to original size
|
|
23869
|
+
const prediction = (await (0,_utils_tensor_js__WEBPACK_IMPORTED_MODULE_8__.interpolate_4d)(batch.view(1, 1, height, width), {
|
|
23870
|
+
size: [new_height, new_width],
|
|
23871
|
+
mode: 'bilinear',
|
|
23872
|
+
})).view(new_height, new_width);
|
|
23873
|
+
|
|
23874
|
+
const minval = /** @type {number} */(prediction.min().item());
|
|
23875
|
+
const maxval = /** @type {number} */(prediction.max().item());
|
|
23876
|
+
const formatted = prediction.sub(minval).div_(maxval - minval).mul_(255).to('uint8').unsqueeze(0);
|
|
23877
|
+
const depth = _utils_image_js__WEBPACK_IMPORTED_MODULE_9__.RawImage.fromTensor(formatted);
|
|
23774
23878
|
toReturn.push({
|
|
23775
|
-
predicted_depth:
|
|
23776
|
-
depth
|
|
23879
|
+
predicted_depth: prediction,
|
|
23880
|
+
depth,
|
|
23777
23881
|
});
|
|
23778
23882
|
}
|
|
23779
23883
|
|
|
@@ -24253,6 +24357,7 @@ async function loadItems(mapping, model, pretrainedOptions) {
|
|
|
24253
24357
|
return result;
|
|
24254
24358
|
}
|
|
24255
24359
|
|
|
24360
|
+
|
|
24256
24361
|
/***/ }),
|
|
24257
24362
|
|
|
24258
24363
|
/***/ "./src/tokenizers.js":
|
|
@@ -24322,7 +24427,6 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
24322
24427
|
/* harmony import */ var _utils_data_structures_js__WEBPACK_IMPORTED_MODULE_5__ = __webpack_require__(/*! ./utils/data-structures.js */ "./src/utils/data-structures.js");
|
|
24323
24428
|
/* harmony import */ var _huggingface_jinja__WEBPACK_IMPORTED_MODULE_6__ = __webpack_require__(/*! @huggingface/jinja */ "./node_modules/@huggingface/jinja/dist/index.js");
|
|
24324
24429
|
/* harmony import */ var _models_whisper_common_whisper_js__WEBPACK_IMPORTED_MODULE_7__ = __webpack_require__(/*! ./models/whisper/common_whisper.js */ "./src/models/whisper/common_whisper.js");
|
|
24325
|
-
/* harmony import */ var _utils_constants_js__WEBPACK_IMPORTED_MODULE_8__ = __webpack_require__(/*! ./utils/constants.js */ "./src/utils/constants.js");
|
|
24326
24430
|
|
|
24327
24431
|
/**
|
|
24328
24432
|
* @file Tokenizers are used to prepare textual inputs for a model.
|
|
@@ -24359,7 +24463,6 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
24359
24463
|
|
|
24360
24464
|
|
|
24361
24465
|
|
|
24362
|
-
|
|
24363
24466
|
/**
|
|
24364
24467
|
* @typedef {Object} TokenizerProperties Additional tokenizer-specific properties.
|
|
24365
24468
|
* @property {boolean} [legacy=false] Whether or not the `legacy` behavior of the tokenizer should be used.
|
|
@@ -24843,7 +24946,7 @@ class Unigram extends TokenizerModel {
|
|
|
24843
24946
|
* Create a new Unigram tokenizer model.
|
|
24844
24947
|
* @param {Object} config The configuration object for the Unigram model.
|
|
24845
24948
|
* @param {number} config.unk_id The ID of the unknown token
|
|
24846
|
-
* @param {
|
|
24949
|
+
* @param {[string, number][]} config.vocab A 2D array representing a mapping of tokens to scores.
|
|
24847
24950
|
* @param {Object} moreConfig Additional configuration object for the Unigram model.
|
|
24848
24951
|
*/
|
|
24849
24952
|
constructor(config, moreConfig) {
|
|
@@ -24851,11 +24954,10 @@ class Unigram extends TokenizerModel {
|
|
|
24851
24954
|
|
|
24852
24955
|
const vocabSize = config.vocab.length;
|
|
24853
24956
|
this.vocab = new Array(vocabSize);
|
|
24957
|
+
/** @type {number[]} */
|
|
24854
24958
|
this.scores = new Array(vocabSize);
|
|
24855
24959
|
for (let i = 0; i < vocabSize; ++i) {
|
|
24856
|
-
|
|
24857
|
-
this.vocab[i] = piece[0];
|
|
24858
|
-
this.scores[i] = piece[1];
|
|
24960
|
+
[this.vocab[i], this.scores[i]] = config.vocab[i];
|
|
24859
24961
|
}
|
|
24860
24962
|
|
|
24861
24963
|
this.unk_token_id = config.unk_id;
|
|
@@ -30218,6 +30320,8 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
30218
30320
|
/* harmony export */ });
|
|
30219
30321
|
/* harmony import */ var _env_js__WEBPACK_IMPORTED_MODULE_0__ = __webpack_require__(/*! ../env.js */ "./src/env.js");
|
|
30220
30322
|
/* harmony import */ var _devices_js__WEBPACK_IMPORTED_MODULE_1__ = __webpack_require__(/*! ./devices.js */ "./src/utils/devices.js");
|
|
30323
|
+
/// <reference types="@webgpu/types" />
|
|
30324
|
+
|
|
30221
30325
|
|
|
30222
30326
|
|
|
30223
30327
|
|
|
@@ -30473,7 +30577,7 @@ class FileResponse {
|
|
|
30473
30577
|
*/
|
|
30474
30578
|
async arrayBuffer() {
|
|
30475
30579
|
const data = await fs__WEBPACK_IMPORTED_MODULE_0__.promises.readFile(this.filePath);
|
|
30476
|
-
return data.buffer;
|
|
30580
|
+
return /** @type {ArrayBuffer} */ (data.buffer);
|
|
30477
30581
|
}
|
|
30478
30582
|
|
|
30479
30583
|
/**
|
|
@@ -32134,8 +32238,9 @@ function magnitude(arr) {
|
|
|
32134
32238
|
|
|
32135
32239
|
/**
|
|
32136
32240
|
* Returns the value and index of the minimum element in an array.
|
|
32137
|
-
* @
|
|
32138
|
-
* @
|
|
32241
|
+
* @template {number[]|bigint[]|AnyTypedArray} T
|
|
32242
|
+
* @param {T} arr array of numbers.
|
|
32243
|
+
* @returns {T extends bigint[]|BigTypedArray ? [bigint, number] : [number, number]} the value and index of the minimum element, of the form: [valueOfMin, indexOfMin]
|
|
32139
32244
|
* @throws {Error} If array is empty.
|
|
32140
32245
|
*/
|
|
32141
32246
|
function min(arr) {
|
|
@@ -32148,14 +32253,15 @@ function min(arr) {
|
|
|
32148
32253
|
indexOfMin = i;
|
|
32149
32254
|
}
|
|
32150
32255
|
}
|
|
32151
|
-
return [min, indexOfMin];
|
|
32256
|
+
return /** @type {T extends bigint[]|BigTypedArray ? [bigint, number] : [number, number]} */([min, indexOfMin]);
|
|
32152
32257
|
}
|
|
32153
32258
|
|
|
32154
32259
|
|
|
32155
32260
|
/**
|
|
32156
32261
|
* Returns the value and index of the maximum element in an array.
|
|
32157
|
-
* @
|
|
32158
|
-
* @
|
|
32262
|
+
* @template {number[]|bigint[]|AnyTypedArray} T
|
|
32263
|
+
* @param {T} arr array of numbers.
|
|
32264
|
+
* @returns {T extends bigint[]|BigTypedArray ? [bigint, number] : [number, number]} the value and index of the maximum element, of the form: [valueOfMax, indexOfMax]
|
|
32159
32265
|
* @throws {Error} If array is empty.
|
|
32160
32266
|
*/
|
|
32161
32267
|
function max(arr) {
|
|
@@ -32168,7 +32274,7 @@ function max(arr) {
|
|
|
32168
32274
|
indexOfMax = i;
|
|
32169
32275
|
}
|
|
32170
32276
|
}
|
|
32171
|
-
return [
|
|
32277
|
+
return /** @type {T extends bigint[]|BigTypedArray ? [bigint, number] : [number, number]} */([max, indexOfMax]);
|
|
32172
32278
|
}
|
|
32173
32279
|
|
|
32174
32280
|
function isPowerOfTwo(number) {
|
|
@@ -33466,8 +33572,6 @@ class Tensor {
|
|
|
33466
33572
|
return this.permute(...dims);
|
|
33467
33573
|
}
|
|
33468
33574
|
|
|
33469
|
-
// TODO add .max() and .min() methods
|
|
33470
|
-
|
|
33471
33575
|
/**
|
|
33472
33576
|
* Returns the sum of each row of the input tensor in the given dimension dim.
|
|
33473
33577
|
*
|
|
@@ -33761,6 +33865,36 @@ class Tensor {
|
|
|
33761
33865
|
return mean(this, dim, keepdim);
|
|
33762
33866
|
}
|
|
33763
33867
|
|
|
33868
|
+
min(dim = null, keepdim = false) {
|
|
33869
|
+
if (dim !== null) {
|
|
33870
|
+
throw new Error("`dim !== null` not yet implemented.");
|
|
33871
|
+
}
|
|
33872
|
+
const value = (0,_maths_js__WEBPACK_IMPORTED_MODULE_0__.min)(this.data)[0];
|
|
33873
|
+
return new Tensor(this.type, [value], []);
|
|
33874
|
+
}
|
|
33875
|
+
max(dim = null, keepdim = false) {
|
|
33876
|
+
if (dim !== null) {
|
|
33877
|
+
throw new Error("`dim !== null` not yet implemented.");
|
|
33878
|
+
}
|
|
33879
|
+
const value = (0,_maths_js__WEBPACK_IMPORTED_MODULE_0__.max)(this.data)[0];
|
|
33880
|
+
return new Tensor(this.type, [value], []);
|
|
33881
|
+
}
|
|
33882
|
+
|
|
33883
|
+
argmin(dim = null, keepdim = false) {
|
|
33884
|
+
if (dim !== null) {
|
|
33885
|
+
throw new Error("`dim !== null` not yet implemented.");
|
|
33886
|
+
}
|
|
33887
|
+
const index = (0,_maths_js__WEBPACK_IMPORTED_MODULE_0__.min)(this.data)[1];
|
|
33888
|
+
return new Tensor('int64', [BigInt(index)], []);
|
|
33889
|
+
}
|
|
33890
|
+
argmax(dim = null, keepdim = false) {
|
|
33891
|
+
if (dim !== null) {
|
|
33892
|
+
throw new Error("`dim !== null` not yet implemented.");
|
|
33893
|
+
}
|
|
33894
|
+
const index = (0,_maths_js__WEBPACK_IMPORTED_MODULE_0__.max)(this.data)[1];
|
|
33895
|
+
return new Tensor('int64', [BigInt(index)], []);
|
|
33896
|
+
}
|
|
33897
|
+
|
|
33764
33898
|
/**
|
|
33765
33899
|
* Performs Tensor dtype conversion.
|
|
33766
33900
|
* @param {DataType} type The desired data type.
|
|
@@ -33894,7 +34028,7 @@ function interpolate(input, [out_height, out_width], mode = 'bilinear', align_co
|
|
|
33894
34028
|
* @param {Tensor} input the input tensor
|
|
33895
34029
|
* @param {Object} options the options for the interpolation
|
|
33896
34030
|
* @param {[number, number]|[number, number, number]|[number, number, number, number]} [options.size=null] output spatial size.
|
|
33897
|
-
* @param {"bilinear"|"bicubic"} [options.mode='bilinear'] algorithm used for upsampling
|
|
34031
|
+
* @param {"nearest"|"bilinear"|"bicubic"} [options.mode='bilinear'] algorithm used for upsampling
|
|
33898
34032
|
* @returns {Promise<Tensor>} The interpolated tensor.
|
|
33899
34033
|
*/
|
|
33900
34034
|
async function interpolate_4d(input, {
|
|
@@ -33924,7 +34058,9 @@ async function interpolate_4d(input, {
|
|
|
33924
34058
|
}
|
|
33925
34059
|
|
|
33926
34060
|
let op;
|
|
33927
|
-
if (mode === '
|
|
34061
|
+
if (mode === 'nearest') {
|
|
34062
|
+
op = await _ops_registry_js__WEBPACK_IMPORTED_MODULE_2__.TensorOpRegistry.nearest_interpolate_4d;
|
|
34063
|
+
} else if (mode === 'bilinear') {
|
|
33928
34064
|
op = await _ops_registry_js__WEBPACK_IMPORTED_MODULE_2__.TensorOpRegistry.bilinear_interpolate_4d;
|
|
33929
34065
|
} else if (mode === 'bicubic') {
|
|
33930
34066
|
op = await _ops_registry_js__WEBPACK_IMPORTED_MODULE_2__.TensorOpRegistry.bicubic_interpolate_4d;
|
|
@@ -33965,13 +34101,13 @@ async function rfft(x, a) {
|
|
|
33965
34101
|
* Returns the k largest elements of the given input tensor.
|
|
33966
34102
|
* Inspired by https://pytorch.org/docs/stable/generated/torch.topk.html
|
|
33967
34103
|
* @param {Tensor} x the input tensor
|
|
33968
|
-
* @param {number} k the k in "top-k"
|
|
34104
|
+
* @param {number} [k] the k in "top-k"
|
|
33969
34105
|
* @returns {Promise<[Tensor, Tensor]>} the output tuple of (Tensor, LongTensor) of top-k elements and their indices.
|
|
33970
34106
|
*/
|
|
33971
34107
|
async function topk(x, k) {
|
|
33972
34108
|
const op = await _ops_registry_js__WEBPACK_IMPORTED_MODULE_2__.TensorOpRegistry.top_k;
|
|
33973
34109
|
|
|
33974
|
-
if (k
|
|
34110
|
+
if (k == null) {
|
|
33975
34111
|
k = x.dims.at(-1);
|
|
33976
34112
|
} else {
|
|
33977
34113
|
k = Math.min(k, x.dims.at(-1));
|
|
@@ -34000,10 +34136,10 @@ const arrayToIndexTensor = (array) => new Tensor('int64', array, [array.length])
|
|
|
34000
34136
|
async function slice(data, starts, ends, axes, steps) {
|
|
34001
34137
|
const op = await _ops_registry_js__WEBPACK_IMPORTED_MODULE_2__.TensorOpRegistry.slice;
|
|
34002
34138
|
return await op({
|
|
34003
|
-
x: data,
|
|
34004
|
-
s: arrayToIndexTensor(starts),
|
|
34005
|
-
e: arrayToIndexTensor(ends),
|
|
34006
|
-
a: arrayToIndexTensor(axes),
|
|
34139
|
+
x: data,
|
|
34140
|
+
s: arrayToIndexTensor(starts),
|
|
34141
|
+
e: arrayToIndexTensor(ends),
|
|
34142
|
+
a: arrayToIndexTensor(axes),
|
|
34007
34143
|
t: arrayToIndexTensor(steps ?? new Array(axes.length).fill(1)),
|
|
34008
34144
|
});
|
|
34009
34145
|
}
|
|
@@ -34784,6 +34920,9 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
34784
34920
|
/* harmony export */ Dinov2ForImageClassification: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.Dinov2ForImageClassification),
|
|
34785
34921
|
/* harmony export */ Dinov2Model: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.Dinov2Model),
|
|
34786
34922
|
/* harmony export */ Dinov2PreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.Dinov2PreTrainedModel),
|
|
34923
|
+
/* harmony export */ Dinov2WithRegistersForImageClassification: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.Dinov2WithRegistersForImageClassification),
|
|
34924
|
+
/* harmony export */ Dinov2WithRegistersModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.Dinov2WithRegistersModel),
|
|
34925
|
+
/* harmony export */ Dinov2WithRegistersPreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.Dinov2WithRegistersPreTrainedModel),
|
|
34787
34926
|
/* harmony export */ DistilBertForMaskedLM: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.DistilBertForMaskedLM),
|
|
34788
34927
|
/* harmony export */ DistilBertForQuestionAnswering: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.DistilBertForQuestionAnswering),
|
|
34789
34928
|
/* harmony export */ DistilBertForSequenceClassification: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.DistilBertForSequenceClassification),
|