@huggingface/transformers 3.1.1 → 3.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +10 -4
- package/dist/ort-wasm-simd-threaded.jsep.wasm +0 -0
- package/dist/transformers.cjs +1062 -183
- package/dist/transformers.cjs.map +1 -1
- package/dist/transformers.js +2239 -1232
- package/dist/transformers.js.map +1 -1
- package/dist/transformers.min.cjs +1 -358
- package/dist/transformers.min.cjs.map +1 -1
- package/dist/transformers.min.js +1 -421
- package/dist/transformers.min.js.map +1 -1
- package/dist/transformers.min.mjs +1 -358
- package/dist/transformers.min.mjs.map +1 -1
- package/dist/transformers.mjs +1082 -181
- package/dist/transformers.mjs.map +1 -1
- package/package.json +11 -16
- package/src/backends/onnx.js +2 -7
- package/src/base/image_processors_utils.js +3 -1
- package/src/configs.js +11 -2
- package/src/env.js +1 -1
- package/src/models/feature_extractors.js +1 -0
- package/src/models/idefics3/image_processing_idefics3.js +24 -13
- package/src/models/image_processors.js +1 -0
- package/src/models/moonshine/feature_extraction_moonshine.js +26 -0
- package/src/models/moonshine/processing_moonshine.js +20 -0
- package/src/models/paligemma/processing_paligemma.js +82 -0
- package/src/models/phi3_v/image_processing_phi3_v.js +163 -0
- package/src/models/phi3_v/processing_phi3_v.js +53 -0
- package/src/models/processors.js +3 -0
- package/src/models/pyannote/feature_extraction_pyannote.js +56 -0
- package/src/models/pyannote/processing_pyannote.js +7 -54
- package/src/models.js +233 -35
- package/src/ops/registry.js +11 -0
- package/src/pipelines.js +30 -0
- package/src/tokenizers.js +12 -1
- package/src/utils/core.js +39 -9
- package/src/utils/hub.js +8 -12
- package/src/utils/image.js +40 -0
- package/src/utils/tensor.js +51 -1
- package/types/backends/onnx.d.ts +2 -2
- package/types/backends/onnx.d.ts.map +1 -1
- package/types/base/feature_extraction_utils.d.ts +1 -1
- package/types/base/feature_extraction_utils.d.ts.map +1 -1
- package/types/base/image_processors_utils.d.ts +4 -4
- package/types/base/image_processors_utils.d.ts.map +1 -1
- package/types/base/processing_utils.d.ts +4 -4
- package/types/base/processing_utils.d.ts.map +1 -1
- package/types/configs.d.ts +7 -7
- package/types/configs.d.ts.map +1 -1
- package/types/env.d.ts +1 -1
- package/types/env.d.ts.map +1 -1
- package/types/generation/configuration_utils.d.ts +2 -2
- package/types/generation/logits_process.d.ts +2 -2
- package/types/generation/logits_process.d.ts.map +1 -1
- package/types/generation/logits_sampler.d.ts.map +1 -1
- package/types/generation/parameters.d.ts +5 -5
- package/types/generation/stopping_criteria.d.ts +1 -1
- package/types/generation/stopping_criteria.d.ts.map +1 -1
- package/types/generation/streamers.d.ts +2 -2
- package/types/generation/streamers.d.ts.map +1 -1
- package/types/models/audio_spectrogram_transformer/feature_extraction_audio_spectrogram_transformer.d.ts +1 -1
- package/types/models/audio_spectrogram_transformer/feature_extraction_audio_spectrogram_transformer.d.ts.map +1 -1
- package/types/models/auto/feature_extraction_auto.d.ts.map +1 -1
- package/types/models/auto/image_processing_auto.d.ts.map +1 -1
- package/types/models/auto/processing_auto.d.ts +1 -1
- package/types/models/auto/processing_auto.d.ts.map +1 -1
- package/types/models/clap/feature_extraction_clap.d.ts +1 -1
- package/types/models/clap/feature_extraction_clap.d.ts.map +1 -1
- package/types/models/detr/image_processing_detr.d.ts +11 -11
- package/types/models/detr/image_processing_detr.d.ts.map +1 -1
- package/types/models/donut/image_processing_donut.d.ts +1 -1
- package/types/models/donut/image_processing_donut.d.ts.map +1 -1
- package/types/models/feature_extractors.d.ts +1 -0
- package/types/models/florence2/processing_florence2.d.ts.map +1 -1
- package/types/models/idefics3/image_processing_idefics3.d.ts.map +1 -1
- package/types/models/idefics3/processing_idefics3.d.ts.map +1 -1
- package/types/models/image_processors.d.ts +1 -0
- package/types/models/janus/image_processing_janus.d.ts +1 -1
- package/types/models/janus/image_processing_janus.d.ts.map +1 -1
- package/types/models/janus/processing_janus.d.ts.map +1 -1
- package/types/models/maskformer/image_processing_maskformer.d.ts +8 -8
- package/types/models/maskformer/image_processing_maskformer.d.ts.map +1 -1
- package/types/models/mgp_str/processing_mgp_str.d.ts +2 -2
- package/types/models/mgp_str/processing_mgp_str.d.ts.map +1 -1
- package/types/models/moonshine/feature_extraction_moonshine.d.ts +13 -0
- package/types/models/moonshine/feature_extraction_moonshine.d.ts.map +1 -0
- package/types/models/moonshine/processing_moonshine.d.ts +17 -0
- package/types/models/moonshine/processing_moonshine.d.ts.map +1 -0
- package/types/models/owlvit/image_processing_owlvit.d.ts.map +1 -1
- package/types/models/paligemma/processing_paligemma.d.ts +12 -0
- package/types/models/paligemma/processing_paligemma.d.ts.map +1 -0
- package/types/models/phi3_v/image_processing_phi3_v.d.ts +17 -0
- package/types/models/phi3_v/image_processing_phi3_v.d.ts.map +1 -0
- package/types/models/phi3_v/processing_phi3_v.d.ts +17 -0
- package/types/models/phi3_v/processing_phi3_v.d.ts.map +1 -0
- package/types/models/processors.d.ts +3 -0
- package/types/models/pyannote/feature_extraction_pyannote.d.ts +18 -0
- package/types/models/pyannote/feature_extraction_pyannote.d.ts.map +1 -1
- package/types/models/pyannote/processing_pyannote.d.ts +4 -15
- package/types/models/pyannote/processing_pyannote.d.ts.map +1 -1
- package/types/models/qwen2_vl/processing_qwen2_vl.d.ts.map +1 -1
- package/types/models/rt_detr/image_processing_rt_detr.d.ts.map +1 -1
- package/types/models/sam/image_processing_sam.d.ts.map +1 -1
- package/types/models/seamless_m4t/feature_extraction_seamless_m4t.d.ts +1 -1
- package/types/models/seamless_m4t/feature_extraction_seamless_m4t.d.ts.map +1 -1
- package/types/models/segformer/image_processing_segformer.d.ts.map +1 -1
- package/types/models/speecht5/processing_speecht5.d.ts.map +1 -1
- package/types/models/swin2sr/image_processing_swin2sr.d.ts +1 -1
- package/types/models/swin2sr/image_processing_swin2sr.d.ts.map +1 -1
- package/types/models/vitmatte/image_processing_vitmatte.d.ts.map +1 -1
- package/types/models/vitpose/image_processing_vitpose.d.ts +1 -1
- package/types/models/vitpose/image_processing_vitpose.d.ts.map +1 -1
- package/types/models/wav2vec2/feature_extraction_wav2vec2.d.ts.map +1 -1
- package/types/models/wav2vec2/processing_wav2vec2.d.ts.map +1 -1
- package/types/models/wespeaker/feature_extraction_wespeaker.d.ts +1 -1
- package/types/models/wespeaker/feature_extraction_wespeaker.d.ts.map +1 -1
- package/types/models/whisper/feature_extraction_whisper.d.ts +1 -1
- package/types/models/whisper/feature_extraction_whisper.d.ts.map +1 -1
- package/types/models/whisper/generation_whisper.d.ts.map +1 -1
- package/types/models/whisper/processing_whisper.d.ts.map +1 -1
- package/types/models/yolos/image_processing_yolos.d.ts.map +1 -1
- package/types/models.d.ts +61 -5
- package/types/models.d.ts.map +1 -1
- package/types/ops/registry.d.ts +1 -0
- package/types/ops/registry.d.ts.map +1 -1
- package/types/pipelines.d.ts +31 -51
- package/types/pipelines.d.ts.map +1 -1
- package/types/tokenizers.d.ts +10 -6
- package/types/tokenizers.d.ts.map +1 -1
- package/types/utils/audio.d.ts.map +1 -1
- package/types/utils/constants.d.ts.map +1 -1
- package/types/utils/core.d.ts +87 -22
- package/types/utils/core.d.ts.map +1 -1
- package/types/utils/data-structures.d.ts.map +1 -1
- package/types/utils/devices.d.ts.map +1 -1
- package/types/utils/dtypes.d.ts.map +1 -1
- package/types/utils/generic.d.ts.map +1 -1
- package/types/utils/hub.d.ts +3 -3
- package/types/utils/hub.d.ts.map +1 -1
- package/types/utils/image.d.ts +10 -1
- package/types/utils/image.d.ts.map +1 -1
- package/types/utils/maths.d.ts +10 -10
- package/types/utils/maths.d.ts.map +1 -1
- package/types/utils/tensor.d.ts +22 -6
- package/types/utils/tensor.d.ts.map +1 -1
package/dist/transformers.cjs
CHANGED
|
@@ -56,10 +56,10 @@ module.exports = require("url");
|
|
|
56
56
|
|
|
57
57
|
/***/ }),
|
|
58
58
|
|
|
59
|
-
/***/ "?
|
|
60
|
-
|
|
61
|
-
!***
|
|
62
|
-
|
|
59
|
+
/***/ "?8b6b":
|
|
60
|
+
/*!*********************************!*\
|
|
61
|
+
!*** onnxruntime-web (ignored) ***!
|
|
62
|
+
\*********************************/
|
|
63
63
|
/***/ (() => {
|
|
64
64
|
|
|
65
65
|
/* (ignored) */
|
|
@@ -3896,7 +3896,7 @@ const version = '1.20.1';
|
|
|
3896
3896
|
|
|
3897
3897
|
"use strict";
|
|
3898
3898
|
var onnxruntime_node__WEBPACK_IMPORTED_MODULE_1___namespace_cache;
|
|
3899
|
-
var
|
|
3899
|
+
var onnxruntime_web__WEBPACK_IMPORTED_MODULE_2___namespace_cache;
|
|
3900
3900
|
__webpack_require__.r(__webpack_exports__);
|
|
3901
3901
|
/* harmony export */ __webpack_require__.d(__webpack_exports__, {
|
|
3902
3902
|
/* harmony export */ Tensor: () => (/* reexport safe */ onnxruntime_common__WEBPACK_IMPORTED_MODULE_3__.Tensor),
|
|
@@ -3907,7 +3907,7 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
3907
3907
|
/* harmony export */ });
|
|
3908
3908
|
/* harmony import */ var _env_js__WEBPACK_IMPORTED_MODULE_0__ = __webpack_require__(/*! ../env.js */ "./src/env.js");
|
|
3909
3909
|
/* harmony import */ var onnxruntime_node__WEBPACK_IMPORTED_MODULE_1__ = __webpack_require__(/*! onnxruntime-node */ "onnxruntime-node");
|
|
3910
|
-
/* harmony import */ var
|
|
3910
|
+
/* harmony import */ var onnxruntime_web__WEBPACK_IMPORTED_MODULE_2__ = __webpack_require__(/*! onnxruntime-web */ "?8b6b");
|
|
3911
3911
|
/* harmony import */ var onnxruntime_common__WEBPACK_IMPORTED_MODULE_3__ = __webpack_require__(/*! onnxruntime-common */ "./node_modules/onnxruntime-common/dist/esm/index.js");
|
|
3912
3912
|
/**
|
|
3913
3913
|
* @file Handler file for choosing the correct version of ONNX Runtime, based on the environment.
|
|
@@ -3933,11 +3933,6 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
3933
3933
|
// In either case, we select the default export if it exists, otherwise we use the named export.
|
|
3934
3934
|
|
|
3935
3935
|
|
|
3936
|
-
// Use subpath-imports to ensure Node.js and browser interoperability.
|
|
3937
|
-
// See package.json and https://nodejs.org/api/packages.html#subpath-imports
|
|
3938
|
-
// for more information.
|
|
3939
|
-
// @ts-ignore
|
|
3940
|
-
|
|
3941
3936
|
|
|
3942
3937
|
|
|
3943
3938
|
|
|
@@ -3979,7 +3974,7 @@ if (ORT_SYMBOL in globalThis) {
|
|
|
3979
3974
|
} else if (_env_js__WEBPACK_IMPORTED_MODULE_0__.apis.IS_NODE_ENV) {
|
|
3980
3975
|
ONNX = onnxruntime_node__WEBPACK_IMPORTED_MODULE_1__ ?? /*#__PURE__*/ (onnxruntime_node__WEBPACK_IMPORTED_MODULE_1___namespace_cache || (onnxruntime_node__WEBPACK_IMPORTED_MODULE_1___namespace_cache = __webpack_require__.t(onnxruntime_node__WEBPACK_IMPORTED_MODULE_1__, 2)));
|
|
3981
3976
|
|
|
3982
|
-
// Updated as of ONNX Runtime 1.
|
|
3977
|
+
// Updated as of ONNX Runtime 1.20.1
|
|
3983
3978
|
// The following table lists the supported versions of ONNX Runtime Node.js binding provided with pre-built binaries.
|
|
3984
3979
|
// | EPs/Platforms | Windows x64 | Windows arm64 | Linux x64 | Linux arm64 | MacOS x64 | MacOS arm64 |
|
|
3985
3980
|
// | ------------- | ----------- | ------------- | ----------------- | ----------- | --------- | ----------- |
|
|
@@ -4002,7 +3997,7 @@ if (ORT_SYMBOL in globalThis) {
|
|
|
4002
3997
|
supportedDevices.push('cpu');
|
|
4003
3998
|
defaultDevices = ['cpu'];
|
|
4004
3999
|
} else {
|
|
4005
|
-
ONNX = /*#__PURE__*/ (
|
|
4000
|
+
ONNX = /*#__PURE__*/ (onnxruntime_web__WEBPACK_IMPORTED_MODULE_2___namespace_cache || (onnxruntime_web__WEBPACK_IMPORTED_MODULE_2___namespace_cache = __webpack_require__.t(onnxruntime_web__WEBPACK_IMPORTED_MODULE_2__, 2)));
|
|
4006
4001
|
|
|
4007
4002
|
if (_env_js__WEBPACK_IMPORTED_MODULE_0__.apis.IS_WEBNN_AVAILABLE) {
|
|
4008
4003
|
// TODO: Only push supported providers (depending on available hardware)
|
|
@@ -4925,7 +4920,7 @@ class ImageProcessor extends _utils_generic_js__WEBPACK_IMPORTED_MODULE_0__.Call
|
|
|
4925
4920
|
* Pad the image by a certain amount.
|
|
4926
4921
|
* @param {Float32Array} pixelData The pixel data to pad.
|
|
4927
4922
|
* @param {number[]} imgDims The dimensions of the image (height, width, channels).
|
|
4928
|
-
* @param {{width:number; height:number}|number} padSize The dimensions of the padded image.
|
|
4923
|
+
* @param {{width:number; height:number}|number|'square'} padSize The dimensions of the padded image.
|
|
4929
4924
|
* @param {Object} options The options for padding.
|
|
4930
4925
|
* @param {'constant'|'symmetric'} [options.mode='constant'] The type of padding to add.
|
|
4931
4926
|
* @param {boolean} [options.center=false] Whether to center the image.
|
|
@@ -4943,6 +4938,8 @@ class ImageProcessor extends _utils_generic_js__WEBPACK_IMPORTED_MODULE_0__.Call
|
|
|
4943
4938
|
if (typeof padSize === 'number') {
|
|
4944
4939
|
paddedImageWidth = padSize;
|
|
4945
4940
|
paddedImageHeight = padSize;
|
|
4941
|
+
} else if (padSize === 'square') {
|
|
4942
|
+
paddedImageWidth = paddedImageHeight = Math.max(imageHeight, imageWidth);
|
|
4946
4943
|
} else {
|
|
4947
4944
|
paddedImageWidth = padSize.width;
|
|
4948
4945
|
paddedImageHeight = padSize.height;
|
|
@@ -5588,8 +5585,6 @@ function getNormalizedConfig(config) {
|
|
|
5588
5585
|
case 'gpt_neox':
|
|
5589
5586
|
case 'stablelm':
|
|
5590
5587
|
case 'opt':
|
|
5591
|
-
case 'phi':
|
|
5592
|
-
case 'phi3':
|
|
5593
5588
|
case 'falcon':
|
|
5594
5589
|
mapping['num_heads'] = 'num_attention_heads';
|
|
5595
5590
|
mapping['num_layers'] = 'num_hidden_layers';
|
|
@@ -5597,6 +5592,7 @@ function getNormalizedConfig(config) {
|
|
|
5597
5592
|
break;
|
|
5598
5593
|
case 'llama':
|
|
5599
5594
|
case 'olmo':
|
|
5595
|
+
case 'olmo2':
|
|
5600
5596
|
case 'mobilellm':
|
|
5601
5597
|
case 'granite':
|
|
5602
5598
|
case 'cohere':
|
|
@@ -5604,6 +5600,9 @@ function getNormalizedConfig(config) {
|
|
|
5604
5600
|
case 'starcoder2':
|
|
5605
5601
|
case 'qwen2':
|
|
5606
5602
|
case 'qwen2_vl':
|
|
5603
|
+
case 'phi':
|
|
5604
|
+
case 'phi3':
|
|
5605
|
+
case 'phi3_v':
|
|
5607
5606
|
mapping['num_heads'] = 'num_key_value_heads';
|
|
5608
5607
|
mapping['num_layers'] = 'num_hidden_layers';
|
|
5609
5608
|
mapping['hidden_size'] = 'hidden_size';
|
|
@@ -5636,6 +5635,12 @@ function getNormalizedConfig(config) {
|
|
|
5636
5635
|
mapping['num_layers'] = 'n_layers';
|
|
5637
5636
|
mapping['hidden_size'] = 'd_model';
|
|
5638
5637
|
break;
|
|
5638
|
+
case 'exaone':
|
|
5639
|
+
mapping['num_heads'] = 'num_key_value_heads';
|
|
5640
|
+
mapping['num_layers'] = 'num_layers';
|
|
5641
|
+
mapping['dim_kv'] = 'head_dim';
|
|
5642
|
+
mapping['num_attention_heads'] = 'num_attention_heads';
|
|
5643
|
+
break;
|
|
5639
5644
|
|
|
5640
5645
|
// Encoder-decoder models
|
|
5641
5646
|
case 't5':
|
|
@@ -5677,6 +5682,7 @@ function getNormalizedConfig(config) {
|
|
|
5677
5682
|
mapping['encoder_hidden_size'] = mapping['decoder_hidden_size'] = 'd_model';
|
|
5678
5683
|
break;
|
|
5679
5684
|
case 'musicgen_decoder':
|
|
5685
|
+
case 'moonshine':
|
|
5680
5686
|
mapping['num_encoder_layers'] = mapping['num_decoder_layers'] = 'num_hidden_layers';
|
|
5681
5687
|
mapping['num_encoder_heads'] = mapping['num_decoder_heads'] = 'num_attention_heads';
|
|
5682
5688
|
mapping['encoder_hidden_size'] = mapping['decoder_hidden_size'] = 'hidden_size';
|
|
@@ -5926,7 +5932,7 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
5926
5932
|
|
|
5927
5933
|
|
|
5928
5934
|
|
|
5929
|
-
const VERSION = '3.
|
|
5935
|
+
const VERSION = '3.2.0';
|
|
5930
5936
|
|
|
5931
5937
|
// Check if various APIs are available (depends on environment)
|
|
5932
5938
|
const IS_BROWSER_ENV = typeof window !== "undefined" && typeof window.document !== "undefined";
|
|
@@ -8024,6 +8030,9 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
8024
8030
|
/* harmony export */ EsmForTokenClassification: () => (/* binding */ EsmForTokenClassification),
|
|
8025
8031
|
/* harmony export */ EsmModel: () => (/* binding */ EsmModel),
|
|
8026
8032
|
/* harmony export */ EsmPreTrainedModel: () => (/* binding */ EsmPreTrainedModel),
|
|
8033
|
+
/* harmony export */ ExaoneForCausalLM: () => (/* binding */ ExaoneForCausalLM),
|
|
8034
|
+
/* harmony export */ ExaoneModel: () => (/* binding */ ExaoneModel),
|
|
8035
|
+
/* harmony export */ ExaonePreTrainedModel: () => (/* binding */ ExaonePreTrainedModel),
|
|
8027
8036
|
/* harmony export */ FalconForCausalLM: () => (/* binding */ FalconForCausalLM),
|
|
8028
8037
|
/* harmony export */ FalconModel: () => (/* binding */ FalconModel),
|
|
8029
8038
|
/* harmony export */ FalconPreTrainedModel: () => (/* binding */ FalconPreTrainedModel),
|
|
@@ -8068,6 +8077,9 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
8068
8077
|
/* harmony export */ HubertForSequenceClassification: () => (/* binding */ HubertForSequenceClassification),
|
|
8069
8078
|
/* harmony export */ HubertModel: () => (/* binding */ HubertModel),
|
|
8070
8079
|
/* harmony export */ HubertPreTrainedModel: () => (/* binding */ HubertPreTrainedModel),
|
|
8080
|
+
/* harmony export */ IJepaForImageClassification: () => (/* binding */ IJepaForImageClassification),
|
|
8081
|
+
/* harmony export */ IJepaModel: () => (/* binding */ IJepaModel),
|
|
8082
|
+
/* harmony export */ IJepaPreTrainedModel: () => (/* binding */ IJepaPreTrainedModel),
|
|
8071
8083
|
/* harmony export */ Idefics3ForConditionalGeneration: () => (/* binding */ Idefics3ForConditionalGeneration),
|
|
8072
8084
|
/* harmony export */ Idefics3PreTrainedModel: () => (/* binding */ Idefics3PreTrainedModel),
|
|
8073
8085
|
/* harmony export */ ImageMattingOutput: () => (/* binding */ ImageMattingOutput),
|
|
@@ -8145,6 +8157,9 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
8145
8157
|
/* harmony export */ MobileViTV2PreTrainedModel: () => (/* binding */ MobileViTV2PreTrainedModel),
|
|
8146
8158
|
/* harmony export */ ModelOutput: () => (/* binding */ ModelOutput),
|
|
8147
8159
|
/* harmony export */ Moondream1ForConditionalGeneration: () => (/* binding */ Moondream1ForConditionalGeneration),
|
|
8160
|
+
/* harmony export */ MoonshineForConditionalGeneration: () => (/* binding */ MoonshineForConditionalGeneration),
|
|
8161
|
+
/* harmony export */ MoonshineModel: () => (/* binding */ MoonshineModel),
|
|
8162
|
+
/* harmony export */ MoonshinePreTrainedModel: () => (/* binding */ MoonshinePreTrainedModel),
|
|
8148
8163
|
/* harmony export */ MptForCausalLM: () => (/* binding */ MptForCausalLM),
|
|
8149
8164
|
/* harmony export */ MptModel: () => (/* binding */ MptModel),
|
|
8150
8165
|
/* harmony export */ MptPreTrainedModel: () => (/* binding */ MptPreTrainedModel),
|
|
@@ -8159,6 +8174,9 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
8159
8174
|
/* harmony export */ OPTForCausalLM: () => (/* binding */ OPTForCausalLM),
|
|
8160
8175
|
/* harmony export */ OPTModel: () => (/* binding */ OPTModel),
|
|
8161
8176
|
/* harmony export */ OPTPreTrainedModel: () => (/* binding */ OPTPreTrainedModel),
|
|
8177
|
+
/* harmony export */ Olmo2ForCausalLM: () => (/* binding */ Olmo2ForCausalLM),
|
|
8178
|
+
/* harmony export */ Olmo2Model: () => (/* binding */ Olmo2Model),
|
|
8179
|
+
/* harmony export */ Olmo2PreTrainedModel: () => (/* binding */ Olmo2PreTrainedModel),
|
|
8162
8180
|
/* harmony export */ OlmoForCausalLM: () => (/* binding */ OlmoForCausalLM),
|
|
8163
8181
|
/* harmony export */ OlmoModel: () => (/* binding */ OlmoModel),
|
|
8164
8182
|
/* harmony export */ OlmoPreTrainedModel: () => (/* binding */ OlmoPreTrainedModel),
|
|
@@ -8171,6 +8189,8 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
8171
8189
|
/* harmony export */ Owlv2ForObjectDetection: () => (/* binding */ Owlv2ForObjectDetection),
|
|
8172
8190
|
/* harmony export */ Owlv2Model: () => (/* binding */ Owlv2Model),
|
|
8173
8191
|
/* harmony export */ Owlv2PreTrainedModel: () => (/* binding */ Owlv2PreTrainedModel),
|
|
8192
|
+
/* harmony export */ PaliGemmaForConditionalGeneration: () => (/* binding */ PaliGemmaForConditionalGeneration),
|
|
8193
|
+
/* harmony export */ PaliGemmaPreTrainedModel: () => (/* binding */ PaliGemmaPreTrainedModel),
|
|
8174
8194
|
/* harmony export */ PatchTSMixerForPrediction: () => (/* binding */ PatchTSMixerForPrediction),
|
|
8175
8195
|
/* harmony export */ PatchTSMixerModel: () => (/* binding */ PatchTSMixerModel),
|
|
8176
8196
|
/* harmony export */ PatchTSMixerPreTrainedModel: () => (/* binding */ PatchTSMixerPreTrainedModel),
|
|
@@ -8180,6 +8200,8 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
8180
8200
|
/* harmony export */ Phi3ForCausalLM: () => (/* binding */ Phi3ForCausalLM),
|
|
8181
8201
|
/* harmony export */ Phi3Model: () => (/* binding */ Phi3Model),
|
|
8182
8202
|
/* harmony export */ Phi3PreTrainedModel: () => (/* binding */ Phi3PreTrainedModel),
|
|
8203
|
+
/* harmony export */ Phi3VForCausalLM: () => (/* binding */ Phi3VForCausalLM),
|
|
8204
|
+
/* harmony export */ Phi3VPreTrainedModel: () => (/* binding */ Phi3VPreTrainedModel),
|
|
8183
8205
|
/* harmony export */ PhiForCausalLM: () => (/* binding */ PhiForCausalLM),
|
|
8184
8206
|
/* harmony export */ PhiModel: () => (/* binding */ PhiModel),
|
|
8185
8207
|
/* harmony export */ PhiPreTrainedModel: () => (/* binding */ PhiPreTrainedModel),
|
|
@@ -8425,6 +8447,7 @@ const MODEL_TYPES = {
|
|
|
8425
8447
|
ImageTextToText: 6,
|
|
8426
8448
|
Musicgen: 7,
|
|
8427
8449
|
MultiModality: 8,
|
|
8450
|
+
Phi3V: 9,
|
|
8428
8451
|
}
|
|
8429
8452
|
//////////////////////////////////////////////////
|
|
8430
8453
|
|
|
@@ -8852,7 +8875,9 @@ async function decoderForward(self, model_inputs, is_encoder_decoder = false) {
|
|
|
8852
8875
|
new_model_inputs.use_cache_branch = boolTensor(!!past_key_values);
|
|
8853
8876
|
}
|
|
8854
8877
|
if (session.inputNames.includes('position_ids') && new_model_inputs.attention_mask && !new_model_inputs.position_ids) {
|
|
8855
|
-
|
|
8878
|
+
// NOTE: Handle a special case for paligemma models, where positions are 1-indexed
|
|
8879
|
+
const start_index = self.config.model_type === 'paligemma' ? 1 : 0;
|
|
8880
|
+
new_model_inputs.position_ids = createPositionIds(new_model_inputs, past_key_values, start_index);
|
|
8856
8881
|
}
|
|
8857
8882
|
|
|
8858
8883
|
// Unpack the `past_key_values` object into model inputs
|
|
@@ -8988,14 +9013,14 @@ async function imageTextToTextForward(self, {
|
|
|
8988
9013
|
* @param {Tensor} attention_mask
|
|
8989
9014
|
* @returns {{data: BigInt64Array, dims: number[]}}
|
|
8990
9015
|
*/
|
|
8991
|
-
function cumsum_masked_fill(attention_mask) {
|
|
9016
|
+
function cumsum_masked_fill(attention_mask, start_index = 0) {
|
|
8992
9017
|
const [bz, seq_len] = attention_mask.dims;
|
|
8993
9018
|
const attn_mask_data = attention_mask.data;
|
|
8994
9019
|
|
|
8995
9020
|
const data = new BigInt64Array(attn_mask_data.length);
|
|
8996
9021
|
for (let i = 0; i < bz; ++i) {
|
|
8997
9022
|
const start = i * seq_len;
|
|
8998
|
-
let sum = BigInt(
|
|
9023
|
+
let sum = BigInt(start_index);
|
|
8999
9024
|
for (let j = 0; j < seq_len; ++j) {
|
|
9000
9025
|
const index = start + j;
|
|
9001
9026
|
if (attn_mask_data[index] === 0n) {
|
|
@@ -9022,10 +9047,10 @@ function cumsum_masked_fill(attention_mask) {
|
|
|
9022
9047
|
* position_ids = position_ids[:, -input_ids.shape[1] :]
|
|
9023
9048
|
* ```
|
|
9024
9049
|
*/
|
|
9025
|
-
function createPositionIds(model_inputs, past_key_values = null) {
|
|
9050
|
+
function createPositionIds(model_inputs, past_key_values = null, start_index = 0) {
|
|
9026
9051
|
const { input_ids, inputs_embeds, attention_mask } = model_inputs;
|
|
9027
9052
|
|
|
9028
|
-
const { data, dims } = cumsum_masked_fill(attention_mask);
|
|
9053
|
+
const { data, dims } = cumsum_masked_fill(attention_mask, start_index);
|
|
9029
9054
|
let position_ids = new _utils_tensor_js__WEBPACK_IMPORTED_MODULE_9__.Tensor('int64', data, dims);
|
|
9030
9055
|
if (past_key_values) {
|
|
9031
9056
|
const offset = -(input_ids ?? inputs_embeds).dims.at(1);
|
|
@@ -9198,6 +9223,10 @@ class PreTrainedModel extends _utils_generic_js__WEBPACK_IMPORTED_MODULE_3__.Cal
|
|
|
9198
9223
|
this._forward = imageTextToTextForward;
|
|
9199
9224
|
this._prepare_inputs_for_generation = image_text_to_text_prepare_inputs_for_generation;
|
|
9200
9225
|
break;
|
|
9226
|
+
case MODEL_TYPES.Phi3V:
|
|
9227
|
+
this.can_generate = true;
|
|
9228
|
+
this._prepare_inputs_for_generation = image_text_to_text_prepare_inputs_for_generation;
|
|
9229
|
+
break;
|
|
9201
9230
|
|
|
9202
9231
|
case MODEL_TYPES.MultiModality:
|
|
9203
9232
|
this.can_generate = true;
|
|
@@ -9362,6 +9391,18 @@ class PreTrainedModel extends _utils_generic_js__WEBPACK_IMPORTED_MODULE_3__.Cal
|
|
|
9362
9391
|
}, options),
|
|
9363
9392
|
]);
|
|
9364
9393
|
|
|
9394
|
+
} else if (modelType === MODEL_TYPES.Phi3V) {
|
|
9395
|
+
info = await Promise.all([
|
|
9396
|
+
constructSessions(pretrained_model_name_or_path, {
|
|
9397
|
+
prepare_inputs_embeds: 'prepare_inputs_embeds',
|
|
9398
|
+
model: 'model',
|
|
9399
|
+
vision_encoder: 'vision_encoder',
|
|
9400
|
+
}, options),
|
|
9401
|
+
getOptionalConfigs(pretrained_model_name_or_path, {
|
|
9402
|
+
generation_config: 'generation_config.json',
|
|
9403
|
+
}, options),
|
|
9404
|
+
]);
|
|
9405
|
+
|
|
9365
9406
|
} else { // should be MODEL_TYPES.EncoderOnly
|
|
9366
9407
|
if (modelType !== MODEL_TYPES.EncoderOnly) {
|
|
9367
9408
|
const type = modelName ?? config?.model_type;
|
|
@@ -11634,6 +11675,29 @@ class WhisperForConditionalGeneration extends WhisperPreTrainedModel {
|
|
|
11634
11675
|
}
|
|
11635
11676
|
//////////////////////////////////////////////////
|
|
11636
11677
|
|
|
11678
|
+
|
|
11679
|
+
//////////////////////////////////////////////////
|
|
11680
|
+
// Moonshine models
|
|
11681
|
+
class MoonshinePreTrainedModel extends PreTrainedModel {
|
|
11682
|
+
|
|
11683
|
+
requires_attention_mask = false;
|
|
11684
|
+
main_input_name = 'input_values';
|
|
11685
|
+
forward_params = [
|
|
11686
|
+
'input_values',
|
|
11687
|
+
'decoder_input_ids',
|
|
11688
|
+
'past_key_values',
|
|
11689
|
+
];
|
|
11690
|
+
};
|
|
11691
|
+
|
|
11692
|
+
/**
|
|
11693
|
+
* MoonshineModel class for training Moonshine models without a language model head.
|
|
11694
|
+
*/
|
|
11695
|
+
class MoonshineModel extends MoonshinePreTrainedModel { }
|
|
11696
|
+
|
|
11697
|
+
class MoonshineForConditionalGeneration extends MoonshinePreTrainedModel { }
|
|
11698
|
+
//////////////////////////////////////////////////
|
|
11699
|
+
|
|
11700
|
+
|
|
11637
11701
|
//////////////////////////////////////////////////
|
|
11638
11702
|
/**
|
|
11639
11703
|
* Vision Encoder-Decoder model based on OpenAI's GPT architecture for image captioning and other vision tasks
|
|
@@ -11842,6 +11906,30 @@ class Florence2ForConditionalGeneration extends Florence2PreTrainedModel {
|
|
|
11842
11906
|
}
|
|
11843
11907
|
}
|
|
11844
11908
|
|
|
11909
|
+
class PaliGemmaPreTrainedModel extends PreTrainedModel {
|
|
11910
|
+
forward_params = [
|
|
11911
|
+
'input_ids',
|
|
11912
|
+
// 'inputs_embeds',
|
|
11913
|
+
'attention_mask',
|
|
11914
|
+
'pixel_values',
|
|
11915
|
+
'position_ids',
|
|
11916
|
+
'past_key_values',
|
|
11917
|
+
];
|
|
11918
|
+
}
|
|
11919
|
+
|
|
11920
|
+
class PaliGemmaForConditionalGeneration extends PaliGemmaPreTrainedModel {
|
|
11921
|
+
_merge_input_ids_with_image_features(kwargs) {
|
|
11922
|
+
const vision_hidden_size = kwargs.image_features.dims.at(-1);
|
|
11923
|
+
const reshaped_image_hidden_states = kwargs.image_features.view(-1, vision_hidden_size);
|
|
11924
|
+
|
|
11925
|
+
return default_merge_input_ids_with_image_features({
|
|
11926
|
+
// @ts-ignore
|
|
11927
|
+
image_token_id: this.config.image_token_index,
|
|
11928
|
+
...kwargs,
|
|
11929
|
+
image_features: reshaped_image_hidden_states,
|
|
11930
|
+
})
|
|
11931
|
+
}
|
|
11932
|
+
}
|
|
11845
11933
|
|
|
11846
11934
|
//////////////////////////////////////////////////
|
|
11847
11935
|
// Idefics3 Models
|
|
@@ -11880,6 +11968,77 @@ class Idefics3ForConditionalGeneration extends Idefics3PreTrainedModel {
|
|
|
11880
11968
|
}
|
|
11881
11969
|
//////////////////////////////////////////////////
|
|
11882
11970
|
|
|
11971
|
+
class Phi3VPreTrainedModel extends PreTrainedModel {
|
|
11972
|
+
forward_params = [
|
|
11973
|
+
'input_ids',
|
|
11974
|
+
'inputs_embeds',
|
|
11975
|
+
'attention_mask',
|
|
11976
|
+
'position_ids',
|
|
11977
|
+
'pixel_values',
|
|
11978
|
+
'image_sizes',
|
|
11979
|
+
'past_key_values',
|
|
11980
|
+
];
|
|
11981
|
+
}
|
|
11982
|
+
class Phi3VForCausalLM extends Phi3VPreTrainedModel {
|
|
11983
|
+
|
|
11984
|
+
async forward({
|
|
11985
|
+
// Produced by the tokenizer/processor:
|
|
11986
|
+
input_ids = null,
|
|
11987
|
+
attention_mask = null,
|
|
11988
|
+
pixel_values = null,
|
|
11989
|
+
image_sizes = null,
|
|
11990
|
+
|
|
11991
|
+
// Used during generation:
|
|
11992
|
+
position_ids = null,
|
|
11993
|
+
inputs_embeds = null,
|
|
11994
|
+
past_key_values = null,
|
|
11995
|
+
|
|
11996
|
+
// Generic generation parameters
|
|
11997
|
+
generation_config = null,
|
|
11998
|
+
logits_processor = null,
|
|
11999
|
+
|
|
12000
|
+
// TODO: needed?
|
|
12001
|
+
...kwargs
|
|
12002
|
+
}) {
|
|
12003
|
+
if (!inputs_embeds) {
|
|
12004
|
+
let image_features;
|
|
12005
|
+
if (pixel_values && input_ids.dims[1] !== 1) {
|
|
12006
|
+
if (!image_sizes) {
|
|
12007
|
+
throw new Error('`image_sizes` must be provided when `pixel_values` is provided.');
|
|
12008
|
+
}
|
|
12009
|
+
|
|
12010
|
+
// Encode the image
|
|
12011
|
+
({ image_features } = await sessionRun(this.sessions['vision_encoder'], {
|
|
12012
|
+
pixel_values,
|
|
12013
|
+
image_sizes,
|
|
12014
|
+
}));
|
|
12015
|
+
} else {
|
|
12016
|
+
const hidden_size = this.config.normalized_config.hidden_size;
|
|
12017
|
+
image_features = new _utils_tensor_js__WEBPACK_IMPORTED_MODULE_9__.Tensor(
|
|
12018
|
+
'float32',
|
|
12019
|
+
[],
|
|
12020
|
+
[0, hidden_size],
|
|
12021
|
+
);
|
|
12022
|
+
}
|
|
12023
|
+
|
|
12024
|
+
({ inputs_embeds } = await sessionRun(this.sessions['prepare_inputs_embeds'], {
|
|
12025
|
+
input_ids,
|
|
12026
|
+
image_features,
|
|
12027
|
+
}));
|
|
12028
|
+
}
|
|
12029
|
+
|
|
12030
|
+
const outputs = await decoderForward(this, {
|
|
12031
|
+
inputs_embeds,
|
|
12032
|
+
past_key_values,
|
|
12033
|
+
attention_mask,
|
|
12034
|
+
position_ids,
|
|
12035
|
+
generation_config,
|
|
12036
|
+
logits_processor,
|
|
12037
|
+
}, false);
|
|
12038
|
+
return outputs;
|
|
12039
|
+
}
|
|
12040
|
+
}
|
|
12041
|
+
|
|
11883
12042
|
//////////////////////////////////////////////////
|
|
11884
12043
|
class CLIPPreTrainedModel extends PreTrainedModel { }
|
|
11885
12044
|
|
|
@@ -11934,9 +12093,11 @@ class CLIPModel extends CLIPPreTrainedModel { }
|
|
|
11934
12093
|
class CLIPTextModel extends CLIPPreTrainedModel {
|
|
11935
12094
|
/** @type {typeof PreTrainedModel.from_pretrained} */
|
|
11936
12095
|
static async from_pretrained(pretrained_model_name_or_path, options = {}) {
|
|
11937
|
-
|
|
11938
|
-
|
|
11939
|
-
|
|
12096
|
+
return super.from_pretrained(pretrained_model_name_or_path, {
|
|
12097
|
+
// Update default model file name if not provided
|
|
12098
|
+
model_file_name: 'text_model',
|
|
12099
|
+
...options,
|
|
12100
|
+
});
|
|
11940
12101
|
}
|
|
11941
12102
|
}
|
|
11942
12103
|
|
|
@@ -11969,9 +12130,11 @@ class CLIPTextModel extends CLIPPreTrainedModel {
|
|
|
11969
12130
|
class CLIPTextModelWithProjection extends CLIPPreTrainedModel {
|
|
11970
12131
|
/** @type {typeof PreTrainedModel.from_pretrained} */
|
|
11971
12132
|
static async from_pretrained(pretrained_model_name_or_path, options = {}) {
|
|
11972
|
-
|
|
11973
|
-
|
|
11974
|
-
|
|
12133
|
+
return super.from_pretrained(pretrained_model_name_or_path, {
|
|
12134
|
+
// Update default model file name if not provided
|
|
12135
|
+
model_file_name: 'text_model',
|
|
12136
|
+
...options,
|
|
12137
|
+
});
|
|
11975
12138
|
}
|
|
11976
12139
|
}
|
|
11977
12140
|
|
|
@@ -11981,9 +12144,11 @@ class CLIPTextModelWithProjection extends CLIPPreTrainedModel {
|
|
|
11981
12144
|
class CLIPVisionModel extends CLIPPreTrainedModel {
|
|
11982
12145
|
/** @type {typeof PreTrainedModel.from_pretrained} */
|
|
11983
12146
|
static async from_pretrained(pretrained_model_name_or_path, options = {}) {
|
|
11984
|
-
|
|
11985
|
-
|
|
11986
|
-
|
|
12147
|
+
return super.from_pretrained(pretrained_model_name_or_path, {
|
|
12148
|
+
// Update default model file name if not provided
|
|
12149
|
+
model_file_name: 'vision_model',
|
|
12150
|
+
...options,
|
|
12151
|
+
});
|
|
11987
12152
|
}
|
|
11988
12153
|
}
|
|
11989
12154
|
|
|
@@ -12016,9 +12181,11 @@ class CLIPVisionModel extends CLIPPreTrainedModel {
|
|
|
12016
12181
|
class CLIPVisionModelWithProjection extends CLIPPreTrainedModel {
|
|
12017
12182
|
/** @type {typeof PreTrainedModel.from_pretrained} */
|
|
12018
12183
|
static async from_pretrained(pretrained_model_name_or_path, options = {}) {
|
|
12019
|
-
|
|
12020
|
-
|
|
12021
|
-
|
|
12184
|
+
return super.from_pretrained(pretrained_model_name_or_path, {
|
|
12185
|
+
// Update default model file name if not provided
|
|
12186
|
+
model_file_name: 'vision_model',
|
|
12187
|
+
...options,
|
|
12188
|
+
});
|
|
12022
12189
|
}
|
|
12023
12190
|
}
|
|
12024
12191
|
//////////////////////////////////////////////////
|
|
@@ -12102,9 +12269,11 @@ class SiglipModel extends SiglipPreTrainedModel { }
|
|
|
12102
12269
|
class SiglipTextModel extends SiglipPreTrainedModel {
|
|
12103
12270
|
/** @type {typeof PreTrainedModel.from_pretrained} */
|
|
12104
12271
|
static async from_pretrained(pretrained_model_name_or_path, options = {}) {
|
|
12105
|
-
|
|
12106
|
-
|
|
12107
|
-
|
|
12272
|
+
return super.from_pretrained(pretrained_model_name_or_path, {
|
|
12273
|
+
// Update default model file name if not provided
|
|
12274
|
+
model_file_name: 'text_model',
|
|
12275
|
+
...options,
|
|
12276
|
+
});
|
|
12108
12277
|
}
|
|
12109
12278
|
}
|
|
12110
12279
|
|
|
@@ -12137,9 +12306,11 @@ class SiglipTextModel extends SiglipPreTrainedModel {
|
|
|
12137
12306
|
class SiglipVisionModel extends CLIPPreTrainedModel {
|
|
12138
12307
|
/** @type {typeof PreTrainedModel.from_pretrained} */
|
|
12139
12308
|
static async from_pretrained(pretrained_model_name_or_path, options = {}) {
|
|
12140
|
-
|
|
12141
|
-
|
|
12142
|
-
|
|
12309
|
+
return super.from_pretrained(pretrained_model_name_or_path, {
|
|
12310
|
+
// Update default model file name if not provided
|
|
12311
|
+
model_file_name: 'vision_model',
|
|
12312
|
+
...options,
|
|
12313
|
+
});
|
|
12143
12314
|
}
|
|
12144
12315
|
}
|
|
12145
12316
|
//////////////////////////////////////////////////
|
|
@@ -12194,18 +12365,22 @@ class JinaCLIPModel extends JinaCLIPPreTrainedModel {
|
|
|
12194
12365
|
class JinaCLIPTextModel extends JinaCLIPPreTrainedModel {
|
|
12195
12366
|
/** @type {typeof PreTrainedModel.from_pretrained} */
|
|
12196
12367
|
static async from_pretrained(pretrained_model_name_or_path, options = {}) {
|
|
12197
|
-
|
|
12198
|
-
|
|
12199
|
-
|
|
12368
|
+
return super.from_pretrained(pretrained_model_name_or_path, {
|
|
12369
|
+
// Update default model file name if not provided
|
|
12370
|
+
model_file_name: 'text_model',
|
|
12371
|
+
...options,
|
|
12372
|
+
});
|
|
12200
12373
|
}
|
|
12201
12374
|
}
|
|
12202
12375
|
|
|
12203
12376
|
class JinaCLIPVisionModel extends JinaCLIPPreTrainedModel {
|
|
12204
12377
|
/** @type {typeof PreTrainedModel.from_pretrained} */
|
|
12205
12378
|
static async from_pretrained(pretrained_model_name_or_path, options = {}) {
|
|
12206
|
-
|
|
12207
|
-
|
|
12208
|
-
|
|
12379
|
+
return super.from_pretrained(pretrained_model_name_or_path, {
|
|
12380
|
+
// Update default model file name if not provided
|
|
12381
|
+
model_file_name: 'vision_model',
|
|
12382
|
+
...options,
|
|
12383
|
+
});
|
|
12209
12384
|
}
|
|
12210
12385
|
}
|
|
12211
12386
|
//////////////////////////////////////////////////
|
|
@@ -12365,6 +12540,14 @@ class LlamaForCausalLM extends LlamaPreTrainedModel { }
|
|
|
12365
12540
|
//////////////////////////////////////////////////
|
|
12366
12541
|
|
|
12367
12542
|
|
|
12543
|
+
//////////////////////////////////////////////////
|
|
12544
|
+
// EXAONE models
|
|
12545
|
+
class ExaonePreTrainedModel extends PreTrainedModel { }
|
|
12546
|
+
class ExaoneModel extends ExaonePreTrainedModel { }
|
|
12547
|
+
class ExaoneForCausalLM extends ExaonePreTrainedModel { }
|
|
12548
|
+
//////////////////////////////////////////////////
|
|
12549
|
+
|
|
12550
|
+
|
|
12368
12551
|
//////////////////////////////////////////////////
|
|
12369
12552
|
// MobileLLM models
|
|
12370
12553
|
class MobileLLMPreTrainedModel extends PreTrainedModel { }
|
|
@@ -12380,6 +12563,13 @@ class OlmoModel extends OlmoPreTrainedModel { }
|
|
|
12380
12563
|
class OlmoForCausalLM extends OlmoPreTrainedModel { }
|
|
12381
12564
|
//////////////////////////////////////////////////
|
|
12382
12565
|
|
|
12566
|
+
//////////////////////////////////////////////////
|
|
12567
|
+
// OLMo2 models
|
|
12568
|
+
class Olmo2PreTrainedModel extends PreTrainedModel { }
|
|
12569
|
+
class Olmo2Model extends Olmo2PreTrainedModel { }
|
|
12570
|
+
class Olmo2ForCausalLM extends Olmo2PreTrainedModel { }
|
|
12571
|
+
//////////////////////////////////////////////////
|
|
12572
|
+
|
|
12383
12573
|
|
|
12384
12574
|
//////////////////////////////////////////////////
|
|
12385
12575
|
// Granite models
|
|
@@ -12796,6 +12986,20 @@ class ViTForImageClassification extends ViTPreTrainedModel {
|
|
|
12796
12986
|
//////////////////////////////////////////////////
|
|
12797
12987
|
|
|
12798
12988
|
|
|
12989
|
+
//////////////////////////////////////////////////
|
|
12990
|
+
class IJepaPreTrainedModel extends PreTrainedModel { }
|
|
12991
|
+
class IJepaModel extends IJepaPreTrainedModel { }
|
|
12992
|
+
class IJepaForImageClassification extends IJepaPreTrainedModel {
|
|
12993
|
+
/**
|
|
12994
|
+
* @param {any} model_inputs
|
|
12995
|
+
*/
|
|
12996
|
+
async _call(model_inputs) {
|
|
12997
|
+
return new SequenceClassifierOutput(await super._call(model_inputs));
|
|
12998
|
+
}
|
|
12999
|
+
}
|
|
13000
|
+
//////////////////////////////////////////////////
|
|
13001
|
+
|
|
13002
|
+
|
|
12799
13003
|
//////////////////////////////////////////////////
|
|
12800
13004
|
class VitPosePreTrainedModel extends PreTrainedModel { }
|
|
12801
13005
|
|
|
@@ -14406,9 +14610,11 @@ class ClapModel extends ClapPreTrainedModel { }
|
|
|
14406
14610
|
class ClapTextModelWithProjection extends ClapPreTrainedModel {
|
|
14407
14611
|
/** @type {typeof PreTrainedModel.from_pretrained} */
|
|
14408
14612
|
static async from_pretrained(pretrained_model_name_or_path, options = {}) {
|
|
14409
|
-
|
|
14410
|
-
|
|
14411
|
-
|
|
14613
|
+
return super.from_pretrained(pretrained_model_name_or_path, {
|
|
14614
|
+
// Update default model file name if not provided
|
|
14615
|
+
model_file_name: 'text_model',
|
|
14616
|
+
...options,
|
|
14617
|
+
});
|
|
14412
14618
|
}
|
|
14413
14619
|
}
|
|
14414
14620
|
|
|
@@ -14441,9 +14647,11 @@ class ClapTextModelWithProjection extends ClapPreTrainedModel {
|
|
|
14441
14647
|
class ClapAudioModelWithProjection extends ClapPreTrainedModel {
|
|
14442
14648
|
/** @type {typeof PreTrainedModel.from_pretrained} */
|
|
14443
14649
|
static async from_pretrained(pretrained_model_name_or_path, options = {}) {
|
|
14444
|
-
|
|
14445
|
-
|
|
14446
|
-
|
|
14650
|
+
return super.from_pretrained(pretrained_model_name_or_path, {
|
|
14651
|
+
// Update default model file name if not provided
|
|
14652
|
+
model_file_name: 'audio_model',
|
|
14653
|
+
...options,
|
|
14654
|
+
});
|
|
14447
14655
|
}
|
|
14448
14656
|
}
|
|
14449
14657
|
//////////////////////////////////////////////////
|
|
@@ -15066,6 +15274,7 @@ const MODEL_MAPPING_NAMES_ENCODER_ONLY = new Map([
|
|
|
15066
15274
|
['rt_detr', ['RTDetrModel', RTDetrModel]],
|
|
15067
15275
|
['table-transformer', ['TableTransformerModel', TableTransformerModel]],
|
|
15068
15276
|
['vit', ['ViTModel', ViTModel]],
|
|
15277
|
+
['ijepa', ['IJepaModel', IJepaModel]],
|
|
15069
15278
|
['pvt', ['PvtModel', PvtModel]],
|
|
15070
15279
|
['vit_msn', ['ViTMSNModel', ViTMSNModel]],
|
|
15071
15280
|
['vit_mae', ['ViTMAEModel', ViTMAEModel]],
|
|
@@ -15129,7 +15338,9 @@ const MODEL_MAPPING_NAMES_DECODER_ONLY = new Map([
|
|
|
15129
15338
|
['gpt_neox', ['GPTNeoXModel', GPTNeoXModel]],
|
|
15130
15339
|
['codegen', ['CodeGenModel', CodeGenModel]],
|
|
15131
15340
|
['llama', ['LlamaModel', LlamaModel]],
|
|
15341
|
+
['exaone', ['ExaoneModel', ExaoneModel]],
|
|
15132
15342
|
['olmo', ['OlmoModel', OlmoModel]],
|
|
15343
|
+
['olmo2', ['Olmo2Model', Olmo2Model]],
|
|
15133
15344
|
['mobilellm', ['MobileLLMModel', MobileLLMModel]],
|
|
15134
15345
|
['granite', ['GraniteModel', GraniteModel]],
|
|
15135
15346
|
['cohere', ['CohereModel', CohereModel]],
|
|
@@ -15150,6 +15361,7 @@ const MODEL_MAPPING_NAMES_DECODER_ONLY = new Map([
|
|
|
15150
15361
|
const MODEL_FOR_SPEECH_SEQ_2_SEQ_MAPPING_NAMES = new Map([
|
|
15151
15362
|
['speecht5', ['SpeechT5ForSpeechToText', SpeechT5ForSpeechToText]],
|
|
15152
15363
|
['whisper', ['WhisperForConditionalGeneration', WhisperForConditionalGeneration]],
|
|
15364
|
+
['moonshine', ['MoonshineForConditionalGeneration', MoonshineForConditionalGeneration]],
|
|
15153
15365
|
]);
|
|
15154
15366
|
|
|
15155
15367
|
const MODEL_FOR_TEXT_TO_SPECTROGRAM_MAPPING_NAMES = new Map([
|
|
@@ -15220,7 +15432,9 @@ const MODEL_FOR_CAUSAL_LM_MAPPING_NAMES = new Map([
|
|
|
15220
15432
|
['gpt_neox', ['GPTNeoXForCausalLM', GPTNeoXForCausalLM]],
|
|
15221
15433
|
['codegen', ['CodeGenForCausalLM', CodeGenForCausalLM]],
|
|
15222
15434
|
['llama', ['LlamaForCausalLM', LlamaForCausalLM]],
|
|
15435
|
+
['exaone', ['ExaoneForCausalLM', ExaoneForCausalLM]],
|
|
15223
15436
|
['olmo', ['OlmoForCausalLM', OlmoForCausalLM]],
|
|
15437
|
+
['olmo2', ['Olmo2ForCausalLM', Olmo2ForCausalLM]],
|
|
15224
15438
|
['mobilellm', ['MobileLLMForCausalLM', MobileLLMForCausalLM]],
|
|
15225
15439
|
['granite', ['GraniteForCausalLM', GraniteForCausalLM]],
|
|
15226
15440
|
['cohere', ['CohereForCausalLM', CohereForCausalLM]],
|
|
@@ -15238,6 +15452,9 @@ const MODEL_FOR_CAUSAL_LM_MAPPING_NAMES = new Map([
|
|
|
15238
15452
|
['falcon', ['FalconForCausalLM', FalconForCausalLM]],
|
|
15239
15453
|
['trocr', ['TrOCRForCausalLM', TrOCRForCausalLM]],
|
|
15240
15454
|
['stablelm', ['StableLmForCausalLM', StableLmForCausalLM]],
|
|
15455
|
+
|
|
15456
|
+
// Also image-text-to-text
|
|
15457
|
+
['phi3_v', ['Phi3VForCausalLM', Phi3VForCausalLM]],
|
|
15241
15458
|
]);
|
|
15242
15459
|
|
|
15243
15460
|
const MODEL_FOR_MULTIMODALITY_MAPPING_NAMES = new Map([
|
|
@@ -15294,6 +15511,7 @@ const MODEL_FOR_IMAGE_TEXT_TO_TEXT_MAPPING_NAMES = new Map([
|
|
|
15294
15511
|
['florence2', ['Florence2ForConditionalGeneration', Florence2ForConditionalGeneration]],
|
|
15295
15512
|
['qwen2-vl', ['Qwen2VLForConditionalGeneration', Qwen2VLForConditionalGeneration]],
|
|
15296
15513
|
['idefics3', ['Idefics3ForConditionalGeneration', Idefics3ForConditionalGeneration]],
|
|
15514
|
+
['paligemma', ['PaliGemmaForConditionalGeneration', PaliGemmaForConditionalGeneration]],
|
|
15297
15515
|
]);
|
|
15298
15516
|
|
|
15299
15517
|
const MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING_NAMES = new Map([
|
|
@@ -15302,6 +15520,7 @@ const MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING_NAMES = new Map([
|
|
|
15302
15520
|
|
|
15303
15521
|
const MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING_NAMES = new Map([
|
|
15304
15522
|
['vit', ['ViTForImageClassification', ViTForImageClassification]],
|
|
15523
|
+
['ijepa', ['IJepaForImageClassification', IJepaForImageClassification]],
|
|
15305
15524
|
['pvt', ['PvtForImageClassification', PvtForImageClassification]],
|
|
15306
15525
|
['vit_msn', ['ViTMSNForImageClassification', ViTMSNForImageClassification]],
|
|
15307
15526
|
['fastvit', ['FastViTForImageClassification', FastViTForImageClassification]],
|
|
@@ -15473,6 +15692,7 @@ const CUSTOM_MAPPING = [
|
|
|
15473
15692
|
// OVERRIDE:
|
|
15474
15693
|
// TODO: Refactor to allow class to specify model
|
|
15475
15694
|
['MusicgenForConditionalGeneration', MusicgenForConditionalGeneration, MODEL_TYPES.Musicgen],
|
|
15695
|
+
['Phi3VForCausalLM', Phi3VForCausalLM, MODEL_TYPES.Phi3V],
|
|
15476
15696
|
|
|
15477
15697
|
['CLIPTextModelWithProjection', CLIPTextModelWithProjection, MODEL_TYPES.EncoderOnly],
|
|
15478
15698
|
['SiglipTextModel', SiglipTextModel, MODEL_TYPES.EncoderOnly],
|
|
@@ -16727,23 +16947,26 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
16727
16947
|
/* harmony export */ __webpack_require__.d(__webpack_exports__, {
|
|
16728
16948
|
/* harmony export */ ASTFeatureExtractor: () => (/* reexport safe */ _audio_spectrogram_transformer_feature_extraction_audio_spectrogram_transformer_js__WEBPACK_IMPORTED_MODULE_0__.ASTFeatureExtractor),
|
|
16729
16949
|
/* harmony export */ ClapFeatureExtractor: () => (/* reexport safe */ _clap_feature_extraction_clap_js__WEBPACK_IMPORTED_MODULE_1__.ClapFeatureExtractor),
|
|
16730
|
-
/* harmony export */ ImageFeatureExtractor: () => (/* reexport safe */
|
|
16731
|
-
/* harmony export */
|
|
16732
|
-
/* harmony export */
|
|
16733
|
-
/* harmony export */
|
|
16734
|
-
/* harmony export */
|
|
16735
|
-
/* harmony export */
|
|
16736
|
-
/* harmony export */
|
|
16950
|
+
/* harmony export */ ImageFeatureExtractor: () => (/* reexport safe */ _base_image_processors_utils_js__WEBPACK_IMPORTED_MODULE_9__.ImageProcessor),
|
|
16951
|
+
/* harmony export */ MoonshineFeatureExtractor: () => (/* reexport safe */ _moonshine_feature_extraction_moonshine_js__WEBPACK_IMPORTED_MODULE_2__.MoonshineFeatureExtractor),
|
|
16952
|
+
/* harmony export */ PyAnnoteFeatureExtractor: () => (/* reexport safe */ _pyannote_feature_extraction_pyannote_js__WEBPACK_IMPORTED_MODULE_3__.PyAnnoteFeatureExtractor),
|
|
16953
|
+
/* harmony export */ SeamlessM4TFeatureExtractor: () => (/* reexport safe */ _seamless_m4t_feature_extraction_seamless_m4t_js__WEBPACK_IMPORTED_MODULE_4__.SeamlessM4TFeatureExtractor),
|
|
16954
|
+
/* harmony export */ SpeechT5FeatureExtractor: () => (/* reexport safe */ _speecht5_feature_extraction_speecht5_js__WEBPACK_IMPORTED_MODULE_5__.SpeechT5FeatureExtractor),
|
|
16955
|
+
/* harmony export */ Wav2Vec2FeatureExtractor: () => (/* reexport safe */ _wav2vec2_feature_extraction_wav2vec2_js__WEBPACK_IMPORTED_MODULE_6__.Wav2Vec2FeatureExtractor),
|
|
16956
|
+
/* harmony export */ WeSpeakerFeatureExtractor: () => (/* reexport safe */ _wespeaker_feature_extraction_wespeaker_js__WEBPACK_IMPORTED_MODULE_7__.WeSpeakerFeatureExtractor),
|
|
16957
|
+
/* harmony export */ WhisperFeatureExtractor: () => (/* reexport safe */ _whisper_feature_extraction_whisper_js__WEBPACK_IMPORTED_MODULE_8__.WhisperFeatureExtractor)
|
|
16737
16958
|
/* harmony export */ });
|
|
16738
16959
|
/* harmony import */ var _audio_spectrogram_transformer_feature_extraction_audio_spectrogram_transformer_js__WEBPACK_IMPORTED_MODULE_0__ = __webpack_require__(/*! ./audio_spectrogram_transformer/feature_extraction_audio_spectrogram_transformer.js */ "./src/models/audio_spectrogram_transformer/feature_extraction_audio_spectrogram_transformer.js");
|
|
16739
16960
|
/* harmony import */ var _clap_feature_extraction_clap_js__WEBPACK_IMPORTED_MODULE_1__ = __webpack_require__(/*! ./clap/feature_extraction_clap.js */ "./src/models/clap/feature_extraction_clap.js");
|
|
16740
|
-
/* harmony import */ var
|
|
16741
|
-
/* harmony import */ var
|
|
16742
|
-
/* harmony import */ var
|
|
16743
|
-
/* harmony import */ var
|
|
16744
|
-
/* harmony import */ var
|
|
16745
|
-
/* harmony import */ var
|
|
16746
|
-
/* harmony import */ var
|
|
16961
|
+
/* harmony import */ var _moonshine_feature_extraction_moonshine_js__WEBPACK_IMPORTED_MODULE_2__ = __webpack_require__(/*! ./moonshine/feature_extraction_moonshine.js */ "./src/models/moonshine/feature_extraction_moonshine.js");
|
|
16962
|
+
/* harmony import */ var _pyannote_feature_extraction_pyannote_js__WEBPACK_IMPORTED_MODULE_3__ = __webpack_require__(/*! ./pyannote/feature_extraction_pyannote.js */ "./src/models/pyannote/feature_extraction_pyannote.js");
|
|
16963
|
+
/* harmony import */ var _seamless_m4t_feature_extraction_seamless_m4t_js__WEBPACK_IMPORTED_MODULE_4__ = __webpack_require__(/*! ./seamless_m4t/feature_extraction_seamless_m4t.js */ "./src/models/seamless_m4t/feature_extraction_seamless_m4t.js");
|
|
16964
|
+
/* harmony import */ var _speecht5_feature_extraction_speecht5_js__WEBPACK_IMPORTED_MODULE_5__ = __webpack_require__(/*! ./speecht5/feature_extraction_speecht5.js */ "./src/models/speecht5/feature_extraction_speecht5.js");
|
|
16965
|
+
/* harmony import */ var _wav2vec2_feature_extraction_wav2vec2_js__WEBPACK_IMPORTED_MODULE_6__ = __webpack_require__(/*! ./wav2vec2/feature_extraction_wav2vec2.js */ "./src/models/wav2vec2/feature_extraction_wav2vec2.js");
|
|
16966
|
+
/* harmony import */ var _wespeaker_feature_extraction_wespeaker_js__WEBPACK_IMPORTED_MODULE_7__ = __webpack_require__(/*! ./wespeaker/feature_extraction_wespeaker.js */ "./src/models/wespeaker/feature_extraction_wespeaker.js");
|
|
16967
|
+
/* harmony import */ var _whisper_feature_extraction_whisper_js__WEBPACK_IMPORTED_MODULE_8__ = __webpack_require__(/*! ./whisper/feature_extraction_whisper.js */ "./src/models/whisper/feature_extraction_whisper.js");
|
|
16968
|
+
/* harmony import */ var _base_image_processors_utils_js__WEBPACK_IMPORTED_MODULE_9__ = __webpack_require__(/*! ../base/image_processors_utils.js */ "./src/base/image_processors_utils.js");
|
|
16969
|
+
|
|
16747
16970
|
|
|
16748
16971
|
|
|
16749
16972
|
|
|
@@ -17124,18 +17347,29 @@ class Idefics3ImageProcessor extends _base_image_processors_utils_js__WEBPACK_IM
|
|
|
17124
17347
|
const optimal_width = Math.ceil(width / num_splits_w);
|
|
17125
17348
|
|
|
17126
17349
|
// Iterate through each row and column
|
|
17127
|
-
for (let r = 0; r < num_splits_h; r
|
|
17128
|
-
for (let c = 0; c < num_splits_w; c
|
|
17129
|
-
|
|
17130
|
-
|
|
17131
|
-
|
|
17132
|
-
|
|
17133
|
-
|
|
17134
|
-
|
|
17135
|
-
|
|
17136
|
-
|
|
17137
|
-
//
|
|
17138
|
-
|
|
17350
|
+
for (let r = 0; r < num_splits_h; ++r) {
|
|
17351
|
+
for (let c = 0; c < num_splits_w; ++c) {
|
|
17352
|
+
let start_x, start_y, end_x, end_y;
|
|
17353
|
+
if (r === num_splits_h - 1) { // At bottom
|
|
17354
|
+
start_y = height - optimal_height;
|
|
17355
|
+
end_y = height;
|
|
17356
|
+
} else {
|
|
17357
|
+
start_y = r * optimal_height;
|
|
17358
|
+
end_y = (r + 1) * optimal_height;
|
|
17359
|
+
}
|
|
17360
|
+
if (c === num_splits_w - 1) { // At right
|
|
17361
|
+
start_x = width - optimal_width;
|
|
17362
|
+
end_x = width;
|
|
17363
|
+
} else {
|
|
17364
|
+
start_x = c * optimal_width;
|
|
17365
|
+
end_x = (c + 1) * optimal_width;
|
|
17366
|
+
}
|
|
17367
|
+
|
|
17368
|
+
const starts = [start_y, start_x];
|
|
17369
|
+
const ends = [end_y, end_x];
|
|
17370
|
+
|
|
17371
|
+
const patch = await (0,_utils_tensor_js__WEBPACK_IMPORTED_MODULE_1__.slice)(pixel_values, starts, ends, [2, 3]);
|
|
17372
|
+
frames.push(patch);
|
|
17139
17373
|
}
|
|
17140
17374
|
}
|
|
17141
17375
|
|
|
@@ -17361,21 +17595,22 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
17361
17595
|
/* harmony export */ OwlViTFeatureExtractor: () => (/* reexport safe */ _owlvit_image_processing_owlvit_js__WEBPACK_IMPORTED_MODULE_24__.OwlViTFeatureExtractor),
|
|
17362
17596
|
/* harmony export */ OwlViTImageProcessor: () => (/* reexport safe */ _owlvit_image_processing_owlvit_js__WEBPACK_IMPORTED_MODULE_24__.OwlViTImageProcessor),
|
|
17363
17597
|
/* harmony export */ Owlv2ImageProcessor: () => (/* reexport safe */ _owlv2_image_processing_owlv2_js__WEBPACK_IMPORTED_MODULE_23__.Owlv2ImageProcessor),
|
|
17364
|
-
/* harmony export */
|
|
17365
|
-
/* harmony export */
|
|
17366
|
-
/* harmony export */
|
|
17367
|
-
/* harmony export */
|
|
17368
|
-
/* harmony export */
|
|
17369
|
-
/* harmony export */
|
|
17370
|
-
/* harmony export */
|
|
17371
|
-
/* harmony export */
|
|
17598
|
+
/* harmony export */ Phi3VImageProcessor: () => (/* reexport safe */ _phi3_v_image_processing_phi3_v_js__WEBPACK_IMPORTED_MODULE_25__.Phi3VImageProcessor),
|
|
17599
|
+
/* harmony export */ PvtImageProcessor: () => (/* reexport safe */ _pvt_image_processing_pvt_js__WEBPACK_IMPORTED_MODULE_26__.PvtImageProcessor),
|
|
17600
|
+
/* harmony export */ Qwen2VLImageProcessor: () => (/* reexport safe */ _qwen2_vl_image_processing_qwen2_vl_js__WEBPACK_IMPORTED_MODULE_27__.Qwen2VLImageProcessor),
|
|
17601
|
+
/* harmony export */ RTDetrImageProcessor: () => (/* reexport safe */ _rt_detr_image_processing_rt_detr_js__WEBPACK_IMPORTED_MODULE_28__.RTDetrImageProcessor),
|
|
17602
|
+
/* harmony export */ SamImageProcessor: () => (/* reexport safe */ _sam_image_processing_sam_js__WEBPACK_IMPORTED_MODULE_29__.SamImageProcessor),
|
|
17603
|
+
/* harmony export */ SegformerFeatureExtractor: () => (/* reexport safe */ _segformer_image_processing_segformer_js__WEBPACK_IMPORTED_MODULE_30__.SegformerFeatureExtractor),
|
|
17604
|
+
/* harmony export */ SegformerImageProcessor: () => (/* reexport safe */ _segformer_image_processing_segformer_js__WEBPACK_IMPORTED_MODULE_30__.SegformerImageProcessor),
|
|
17605
|
+
/* harmony export */ SiglipImageProcessor: () => (/* reexport safe */ _siglip_image_processing_siglip_js__WEBPACK_IMPORTED_MODULE_31__.SiglipImageProcessor),
|
|
17606
|
+
/* harmony export */ Swin2SRImageProcessor: () => (/* reexport safe */ _swin2sr_image_processing_swin2sr_js__WEBPACK_IMPORTED_MODULE_32__.Swin2SRImageProcessor),
|
|
17372
17607
|
/* harmony export */ VLMImageProcessor: () => (/* reexport safe */ _janus_image_processing_janus_js__WEBPACK_IMPORTED_MODULE_12__.VLMImageProcessor),
|
|
17373
|
-
/* harmony export */ ViTFeatureExtractor: () => (/* reexport safe */
|
|
17374
|
-
/* harmony export */ ViTImageProcessor: () => (/* reexport safe */
|
|
17375
|
-
/* harmony export */ VitMatteImageProcessor: () => (/* reexport safe */
|
|
17376
|
-
/* harmony export */ VitPoseImageProcessor: () => (/* reexport safe */
|
|
17377
|
-
/* harmony export */ YolosFeatureExtractor: () => (/* reexport safe */
|
|
17378
|
-
/* harmony export */ YolosImageProcessor: () => (/* reexport safe */
|
|
17608
|
+
/* harmony export */ ViTFeatureExtractor: () => (/* reexport safe */ _vit_image_processing_vit_js__WEBPACK_IMPORTED_MODULE_33__.ViTFeatureExtractor),
|
|
17609
|
+
/* harmony export */ ViTImageProcessor: () => (/* reexport safe */ _vit_image_processing_vit_js__WEBPACK_IMPORTED_MODULE_33__.ViTImageProcessor),
|
|
17610
|
+
/* harmony export */ VitMatteImageProcessor: () => (/* reexport safe */ _vitmatte_image_processing_vitmatte_js__WEBPACK_IMPORTED_MODULE_34__.VitMatteImageProcessor),
|
|
17611
|
+
/* harmony export */ VitPoseImageProcessor: () => (/* reexport safe */ _vitpose_image_processing_vitpose_js__WEBPACK_IMPORTED_MODULE_35__.VitPoseImageProcessor),
|
|
17612
|
+
/* harmony export */ YolosFeatureExtractor: () => (/* reexport safe */ _yolos_image_processing_yolos_js__WEBPACK_IMPORTED_MODULE_36__.YolosFeatureExtractor),
|
|
17613
|
+
/* harmony export */ YolosImageProcessor: () => (/* reexport safe */ _yolos_image_processing_yolos_js__WEBPACK_IMPORTED_MODULE_36__.YolosImageProcessor)
|
|
17379
17614
|
/* harmony export */ });
|
|
17380
17615
|
/* harmony import */ var _beit_image_processing_beit_js__WEBPACK_IMPORTED_MODULE_0__ = __webpack_require__(/*! ./beit/image_processing_beit.js */ "./src/models/beit/image_processing_beit.js");
|
|
17381
17616
|
/* harmony import */ var _bit_image_processing_bit_js__WEBPACK_IMPORTED_MODULE_1__ = __webpack_require__(/*! ./bit/image_processing_bit.js */ "./src/models/bit/image_processing_bit.js");
|
|
@@ -17402,17 +17637,19 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
17402
17637
|
/* harmony import */ var _nougat_image_processing_nougat_js__WEBPACK_IMPORTED_MODULE_22__ = __webpack_require__(/*! ./nougat/image_processing_nougat.js */ "./src/models/nougat/image_processing_nougat.js");
|
|
17403
17638
|
/* harmony import */ var _owlv2_image_processing_owlv2_js__WEBPACK_IMPORTED_MODULE_23__ = __webpack_require__(/*! ./owlv2/image_processing_owlv2.js */ "./src/models/owlv2/image_processing_owlv2.js");
|
|
17404
17639
|
/* harmony import */ var _owlvit_image_processing_owlvit_js__WEBPACK_IMPORTED_MODULE_24__ = __webpack_require__(/*! ./owlvit/image_processing_owlvit.js */ "./src/models/owlvit/image_processing_owlvit.js");
|
|
17405
|
-
/* harmony import */ var
|
|
17406
|
-
/* harmony import */ var
|
|
17407
|
-
/* harmony import */ var
|
|
17408
|
-
/* harmony import */ var
|
|
17409
|
-
/* harmony import */ var
|
|
17410
|
-
/* harmony import */ var
|
|
17411
|
-
/* harmony import */ var
|
|
17412
|
-
/* harmony import */ var
|
|
17413
|
-
/* harmony import */ var
|
|
17414
|
-
/* harmony import */ var
|
|
17415
|
-
/* harmony import */ var
|
|
17640
|
+
/* harmony import */ var _phi3_v_image_processing_phi3_v_js__WEBPACK_IMPORTED_MODULE_25__ = __webpack_require__(/*! ./phi3_v/image_processing_phi3_v.js */ "./src/models/phi3_v/image_processing_phi3_v.js");
|
|
17641
|
+
/* harmony import */ var _pvt_image_processing_pvt_js__WEBPACK_IMPORTED_MODULE_26__ = __webpack_require__(/*! ./pvt/image_processing_pvt.js */ "./src/models/pvt/image_processing_pvt.js");
|
|
17642
|
+
/* harmony import */ var _qwen2_vl_image_processing_qwen2_vl_js__WEBPACK_IMPORTED_MODULE_27__ = __webpack_require__(/*! ./qwen2_vl/image_processing_qwen2_vl.js */ "./src/models/qwen2_vl/image_processing_qwen2_vl.js");
|
|
17643
|
+
/* harmony import */ var _rt_detr_image_processing_rt_detr_js__WEBPACK_IMPORTED_MODULE_28__ = __webpack_require__(/*! ./rt_detr/image_processing_rt_detr.js */ "./src/models/rt_detr/image_processing_rt_detr.js");
|
|
17644
|
+
/* harmony import */ var _sam_image_processing_sam_js__WEBPACK_IMPORTED_MODULE_29__ = __webpack_require__(/*! ./sam/image_processing_sam.js */ "./src/models/sam/image_processing_sam.js");
|
|
17645
|
+
/* harmony import */ var _segformer_image_processing_segformer_js__WEBPACK_IMPORTED_MODULE_30__ = __webpack_require__(/*! ./segformer/image_processing_segformer.js */ "./src/models/segformer/image_processing_segformer.js");
|
|
17646
|
+
/* harmony import */ var _siglip_image_processing_siglip_js__WEBPACK_IMPORTED_MODULE_31__ = __webpack_require__(/*! ./siglip/image_processing_siglip.js */ "./src/models/siglip/image_processing_siglip.js");
|
|
17647
|
+
/* harmony import */ var _swin2sr_image_processing_swin2sr_js__WEBPACK_IMPORTED_MODULE_32__ = __webpack_require__(/*! ./swin2sr/image_processing_swin2sr.js */ "./src/models/swin2sr/image_processing_swin2sr.js");
|
|
17648
|
+
/* harmony import */ var _vit_image_processing_vit_js__WEBPACK_IMPORTED_MODULE_33__ = __webpack_require__(/*! ./vit/image_processing_vit.js */ "./src/models/vit/image_processing_vit.js");
|
|
17649
|
+
/* harmony import */ var _vitmatte_image_processing_vitmatte_js__WEBPACK_IMPORTED_MODULE_34__ = __webpack_require__(/*! ./vitmatte/image_processing_vitmatte.js */ "./src/models/vitmatte/image_processing_vitmatte.js");
|
|
17650
|
+
/* harmony import */ var _vitpose_image_processing_vitpose_js__WEBPACK_IMPORTED_MODULE_35__ = __webpack_require__(/*! ./vitpose/image_processing_vitpose.js */ "./src/models/vitpose/image_processing_vitpose.js");
|
|
17651
|
+
/* harmony import */ var _yolos_image_processing_yolos_js__WEBPACK_IMPORTED_MODULE_36__ = __webpack_require__(/*! ./yolos/image_processing_yolos.js */ "./src/models/yolos/image_processing_yolos.js");
|
|
17652
|
+
|
|
17416
17653
|
|
|
17417
17654
|
|
|
17418
17655
|
|
|
@@ -18087,6 +18324,87 @@ class MobileViTImageProcessor extends _base_image_processors_utils_js__WEBPACK_I
|
|
|
18087
18324
|
class MobileViTFeatureExtractor extends MobileViTImageProcessor { }
|
|
18088
18325
|
|
|
18089
18326
|
|
|
18327
|
+
/***/ }),
|
|
18328
|
+
|
|
18329
|
+
/***/ "./src/models/moonshine/feature_extraction_moonshine.js":
|
|
18330
|
+
/*!**************************************************************!*\
|
|
18331
|
+
!*** ./src/models/moonshine/feature_extraction_moonshine.js ***!
|
|
18332
|
+
\**************************************************************/
|
|
18333
|
+
/***/ ((__unused_webpack___webpack_module__, __webpack_exports__, __webpack_require__) => {
|
|
18334
|
+
|
|
18335
|
+
"use strict";
|
|
18336
|
+
__webpack_require__.r(__webpack_exports__);
|
|
18337
|
+
/* harmony export */ __webpack_require__.d(__webpack_exports__, {
|
|
18338
|
+
/* harmony export */ MoonshineFeatureExtractor: () => (/* binding */ MoonshineFeatureExtractor)
|
|
18339
|
+
/* harmony export */ });
|
|
18340
|
+
/* harmony import */ var _base_feature_extraction_utils_js__WEBPACK_IMPORTED_MODULE_0__ = __webpack_require__(/*! ../../base/feature_extraction_utils.js */ "./src/base/feature_extraction_utils.js");
|
|
18341
|
+
/* harmony import */ var _utils_tensor_js__WEBPACK_IMPORTED_MODULE_1__ = __webpack_require__(/*! ../../utils/tensor.js */ "./src/utils/tensor.js");
|
|
18342
|
+
|
|
18343
|
+
|
|
18344
|
+
|
|
18345
|
+
|
|
18346
|
+
class MoonshineFeatureExtractor extends _base_feature_extraction_utils_js__WEBPACK_IMPORTED_MODULE_0__.FeatureExtractor {
|
|
18347
|
+
/**
|
|
18348
|
+
* Asynchronously extracts input values from a given audio using the provided configuration.
|
|
18349
|
+
* @param {Float32Array|Float64Array} audio The audio data as a Float32Array/Float64Array.
|
|
18350
|
+
* @returns {Promise<{ input_values: Tensor; }>} The extracted input values.
|
|
18351
|
+
*/
|
|
18352
|
+
async _call(audio) {
|
|
18353
|
+
(0,_base_feature_extraction_utils_js__WEBPACK_IMPORTED_MODULE_0__.validate_audio_inputs)(audio, 'MoonshineFeatureExtractor');
|
|
18354
|
+
|
|
18355
|
+
if (audio instanceof Float64Array) {
|
|
18356
|
+
audio = new Float32Array(audio);
|
|
18357
|
+
}
|
|
18358
|
+
|
|
18359
|
+
const shape = [
|
|
18360
|
+
1, /* batch_size */
|
|
18361
|
+
audio.length, /* num_samples */
|
|
18362
|
+
];
|
|
18363
|
+
return {
|
|
18364
|
+
input_values: new _utils_tensor_js__WEBPACK_IMPORTED_MODULE_1__.Tensor('float32', audio, shape),
|
|
18365
|
+
};
|
|
18366
|
+
}
|
|
18367
|
+
}
|
|
18368
|
+
|
|
18369
|
+
|
|
18370
|
+
/***/ }),
|
|
18371
|
+
|
|
18372
|
+
/***/ "./src/models/moonshine/processing_moonshine.js":
|
|
18373
|
+
/*!******************************************************!*\
|
|
18374
|
+
!*** ./src/models/moonshine/processing_moonshine.js ***!
|
|
18375
|
+
\******************************************************/
|
|
18376
|
+
/***/ ((__unused_webpack___webpack_module__, __webpack_exports__, __webpack_require__) => {
|
|
18377
|
+
|
|
18378
|
+
"use strict";
|
|
18379
|
+
__webpack_require__.r(__webpack_exports__);
|
|
18380
|
+
/* harmony export */ __webpack_require__.d(__webpack_exports__, {
|
|
18381
|
+
/* harmony export */ MoonshineProcessor: () => (/* binding */ MoonshineProcessor)
|
|
18382
|
+
/* harmony export */ });
|
|
18383
|
+
/* harmony import */ var _auto_feature_extraction_auto_js__WEBPACK_IMPORTED_MODULE_0__ = __webpack_require__(/*! ../auto/feature_extraction_auto.js */ "./src/models/auto/feature_extraction_auto.js");
|
|
18384
|
+
/* harmony import */ var _tokenizers_js__WEBPACK_IMPORTED_MODULE_1__ = __webpack_require__(/*! ../../tokenizers.js */ "./src/tokenizers.js");
|
|
18385
|
+
/* harmony import */ var _base_processing_utils_js__WEBPACK_IMPORTED_MODULE_2__ = __webpack_require__(/*! ../../base/processing_utils.js */ "./src/base/processing_utils.js");
|
|
18386
|
+
|
|
18387
|
+
|
|
18388
|
+
|
|
18389
|
+
|
|
18390
|
+
/**
|
|
18391
|
+
* Represents a MoonshineProcessor that extracts features from an audio input.
|
|
18392
|
+
*/
|
|
18393
|
+
class MoonshineProcessor extends _base_processing_utils_js__WEBPACK_IMPORTED_MODULE_2__.Processor {
|
|
18394
|
+
static tokenizer_class = _tokenizers_js__WEBPACK_IMPORTED_MODULE_1__.AutoTokenizer
|
|
18395
|
+
static feature_extractor_class = _auto_feature_extraction_auto_js__WEBPACK_IMPORTED_MODULE_0__.AutoFeatureExtractor
|
|
18396
|
+
|
|
18397
|
+
/**
|
|
18398
|
+
* Calls the feature_extractor function with the given audio input.
|
|
18399
|
+
* @param {any} audio The audio input to extract features from.
|
|
18400
|
+
* @returns {Promise<any>} A Promise that resolves with the extracted features.
|
|
18401
|
+
*/
|
|
18402
|
+
async _call(audio) {
|
|
18403
|
+
return await this.feature_extractor(audio);
|
|
18404
|
+
}
|
|
18405
|
+
}
|
|
18406
|
+
|
|
18407
|
+
|
|
18090
18408
|
/***/ }),
|
|
18091
18409
|
|
|
18092
18410
|
/***/ "./src/models/nougat/image_processing_nougat.js":
|
|
@@ -18180,6 +18498,356 @@ class OwlViTProcessor extends _base_processing_utils_js__WEBPACK_IMPORTED_MODULE
|
|
|
18180
18498
|
}
|
|
18181
18499
|
|
|
18182
18500
|
|
|
18501
|
+
/***/ }),
|
|
18502
|
+
|
|
18503
|
+
/***/ "./src/models/paligemma/processing_paligemma.js":
|
|
18504
|
+
/*!******************************************************!*\
|
|
18505
|
+
!*** ./src/models/paligemma/processing_paligemma.js ***!
|
|
18506
|
+
\******************************************************/
|
|
18507
|
+
/***/ ((__unused_webpack___webpack_module__, __webpack_exports__, __webpack_require__) => {
|
|
18508
|
+
|
|
18509
|
+
"use strict";
|
|
18510
|
+
__webpack_require__.r(__webpack_exports__);
|
|
18511
|
+
/* harmony export */ __webpack_require__.d(__webpack_exports__, {
|
|
18512
|
+
/* harmony export */ PaliGemmaProcessor: () => (/* binding */ PaliGemmaProcessor)
|
|
18513
|
+
/* harmony export */ });
|
|
18514
|
+
/* harmony import */ var _base_processing_utils_js__WEBPACK_IMPORTED_MODULE_0__ = __webpack_require__(/*! ../../base/processing_utils.js */ "./src/base/processing_utils.js");
|
|
18515
|
+
/* harmony import */ var _auto_image_processing_auto_js__WEBPACK_IMPORTED_MODULE_1__ = __webpack_require__(/*! ../auto/image_processing_auto.js */ "./src/models/auto/image_processing_auto.js");
|
|
18516
|
+
/* harmony import */ var _tokenizers_js__WEBPACK_IMPORTED_MODULE_2__ = __webpack_require__(/*! ../../tokenizers.js */ "./src/tokenizers.js");
|
|
18517
|
+
|
|
18518
|
+
|
|
18519
|
+
|
|
18520
|
+
|
|
18521
|
+
const IMAGE_TOKEN = "<image>";
|
|
18522
|
+
|
|
18523
|
+
function build_string_from_input(
|
|
18524
|
+
prompt,
|
|
18525
|
+
bos_token,
|
|
18526
|
+
image_seq_len,
|
|
18527
|
+
image_token,
|
|
18528
|
+
num_images,
|
|
18529
|
+
) {
|
|
18530
|
+
return `${image_token.repeat(image_seq_len * num_images)}${bos_token}${prompt}\n`
|
|
18531
|
+
}
|
|
18532
|
+
|
|
18533
|
+
class PaliGemmaProcessor extends _base_processing_utils_js__WEBPACK_IMPORTED_MODULE_0__.Processor {
|
|
18534
|
+
static tokenizer_class = _tokenizers_js__WEBPACK_IMPORTED_MODULE_2__.AutoTokenizer
|
|
18535
|
+
static image_processor_class = _auto_image_processing_auto_js__WEBPACK_IMPORTED_MODULE_1__.AutoImageProcessor
|
|
18536
|
+
static uses_processor_config = false;
|
|
18537
|
+
|
|
18538
|
+
/**
|
|
18539
|
+
* @typedef {import('../../utils/image.js').RawImage} RawImage
|
|
18540
|
+
*/
|
|
18541
|
+
|
|
18542
|
+
// `images` is required, `text` is optional
|
|
18543
|
+
async _call(/** @type {RawImage|RawImage[]} */ images, text = null, kwargs = {}) {
|
|
18544
|
+
if (!text) {
|
|
18545
|
+
console.warn(
|
|
18546
|
+
"You are using PaliGemma without a text prefix. It will perform as a picture-captioning model."
|
|
18547
|
+
)
|
|
18548
|
+
text = ""
|
|
18549
|
+
}
|
|
18550
|
+
|
|
18551
|
+
if (!Array.isArray(images)) {
|
|
18552
|
+
images = [images]
|
|
18553
|
+
}
|
|
18554
|
+
|
|
18555
|
+
if (!Array.isArray(text)) {
|
|
18556
|
+
text = [text]
|
|
18557
|
+
}
|
|
18558
|
+
|
|
18559
|
+
const bos_token = this.tokenizer.bos_token;
|
|
18560
|
+
const image_seq_length = this.image_processor.config.image_seq_length;
|
|
18561
|
+
let input_strings;
|
|
18562
|
+
if (text.some((t) => t.includes(IMAGE_TOKEN))) {
|
|
18563
|
+
input_strings = text.map(
|
|
18564
|
+
sample => {
|
|
18565
|
+
const expanded_sample = sample.replaceAll(IMAGE_TOKEN, IMAGE_TOKEN.repeat(image_seq_length));
|
|
18566
|
+
const bos_rfind_index = expanded_sample.lastIndexOf(IMAGE_TOKEN);
|
|
18567
|
+
const bos_index = bos_rfind_index === -1 ? 0 : bos_rfind_index + IMAGE_TOKEN.length;
|
|
18568
|
+
return expanded_sample.slice(0, bos_index) + bos_token + expanded_sample.slice(bos_index) + "\n";
|
|
18569
|
+
}
|
|
18570
|
+
)
|
|
18571
|
+
} else {
|
|
18572
|
+
console.warn(
|
|
18573
|
+
"You are passing both `text` and `images` to `PaliGemmaProcessor`. The processor expects special " +
|
|
18574
|
+
"image tokens in the text, as many tokens as there are images per each text. It is recommended to " +
|
|
18575
|
+
"add `<image>` tokens in the very beginning of your text. For this call, we will infer how many images " +
|
|
18576
|
+
"each text has and add special tokens."
|
|
18577
|
+
)
|
|
18578
|
+
|
|
18579
|
+
input_strings = text.map(
|
|
18580
|
+
sample => build_string_from_input(
|
|
18581
|
+
sample,
|
|
18582
|
+
bos_token,
|
|
18583
|
+
image_seq_length,
|
|
18584
|
+
IMAGE_TOKEN,
|
|
18585
|
+
images.length,
|
|
18586
|
+
)
|
|
18587
|
+
)
|
|
18588
|
+
}
|
|
18589
|
+
|
|
18590
|
+
const text_inputs = this.tokenizer(input_strings, kwargs);
|
|
18591
|
+
const image_inputs = await this.image_processor(images, kwargs);
|
|
18592
|
+
|
|
18593
|
+
return {
|
|
18594
|
+
...image_inputs,
|
|
18595
|
+
...text_inputs,
|
|
18596
|
+
}
|
|
18597
|
+
}
|
|
18598
|
+
}
|
|
18599
|
+
|
|
18600
|
+
|
|
18601
|
+
/***/ }),
|
|
18602
|
+
|
|
18603
|
+
/***/ "./src/models/phi3_v/image_processing_phi3_v.js":
|
|
18604
|
+
/*!******************************************************!*\
|
|
18605
|
+
!*** ./src/models/phi3_v/image_processing_phi3_v.js ***!
|
|
18606
|
+
\******************************************************/
|
|
18607
|
+
/***/ ((__unused_webpack___webpack_module__, __webpack_exports__, __webpack_require__) => {
|
|
18608
|
+
|
|
18609
|
+
"use strict";
|
|
18610
|
+
__webpack_require__.r(__webpack_exports__);
|
|
18611
|
+
/* harmony export */ __webpack_require__.d(__webpack_exports__, {
|
|
18612
|
+
/* harmony export */ Phi3VImageProcessor: () => (/* binding */ Phi3VImageProcessor)
|
|
18613
|
+
/* harmony export */ });
|
|
18614
|
+
/* harmony import */ var _base_image_processors_utils_js__WEBPACK_IMPORTED_MODULE_0__ = __webpack_require__(/*! ../../base/image_processors_utils.js */ "./src/base/image_processors_utils.js");
|
|
18615
|
+
/* harmony import */ var _utils_tensor_js__WEBPACK_IMPORTED_MODULE_1__ = __webpack_require__(/*! ../../utils/tensor.js */ "./src/utils/tensor.js");
|
|
18616
|
+
|
|
18617
|
+
|
|
18618
|
+
|
|
18619
|
+
const IMAGE_SIZE = 336;
|
|
18620
|
+
const SLICE_AXES = [2, 3]; // axes to slice on
|
|
18621
|
+
const { ceil, floor, sqrt } = Math;
|
|
18622
|
+
|
|
18623
|
+
class Phi3VImageProcessor extends _base_image_processors_utils_js__WEBPACK_IMPORTED_MODULE_0__.ImageProcessor {
|
|
18624
|
+
constructor(config) {
|
|
18625
|
+
super({
|
|
18626
|
+
...config,
|
|
18627
|
+
do_normalize: true,
|
|
18628
|
+
do_pad: true,
|
|
18629
|
+
pad_size: 'custom',
|
|
18630
|
+
do_convert_rgb: true,
|
|
18631
|
+
do_resize: true, // Smart resizing "hd_transform"
|
|
18632
|
+
});
|
|
18633
|
+
|
|
18634
|
+
this._num_crops = config.num_crops;
|
|
18635
|
+
}
|
|
18636
|
+
calc_num_image_tokens_from_image_size(width, height) {
|
|
18637
|
+
// @ts-expect-error
|
|
18638
|
+
const { num_img_tokens } = this.config;
|
|
18639
|
+
return floor(((floor((height / IMAGE_SIZE)) * floor((width / IMAGE_SIZE)) + 1) * num_img_tokens) + 1 + (floor(height / IMAGE_SIZE) + 1) * sqrt(num_img_tokens));
|
|
18640
|
+
}
|
|
18641
|
+
|
|
18642
|
+
/** @type {ImageProcessor['get_resize_output_image_size']} */
|
|
18643
|
+
get_resize_output_image_size(image, size) {
|
|
18644
|
+
const hd_num = this._num_crops;
|
|
18645
|
+
const [width, height] = image.size
|
|
18646
|
+
|
|
18647
|
+
let ratio = width / height;
|
|
18648
|
+
let scale = 1;
|
|
18649
|
+
|
|
18650
|
+
// Calculate the scaling factor
|
|
18651
|
+
while (scale * Math.ceil(scale / ratio) <= hd_num) {
|
|
18652
|
+
scale += 1;
|
|
18653
|
+
}
|
|
18654
|
+
scale -= 1;
|
|
18655
|
+
|
|
18656
|
+
// Compute the new dimensions
|
|
18657
|
+
const new_w = Math.floor(scale * 336);
|
|
18658
|
+
const new_h = Math.floor(new_w / ratio);
|
|
18659
|
+
|
|
18660
|
+
return [new_w, new_h]
|
|
18661
|
+
}
|
|
18662
|
+
|
|
18663
|
+
|
|
18664
|
+
/** @type {ImageProcessor['pad_image']} */
|
|
18665
|
+
pad_image(pixelData, imgDims, padSize, options = {}) {
|
|
18666
|
+
// Phi3V uses a custom padding strategy:
|
|
18667
|
+
// - Pad to a multiple of 336
|
|
18668
|
+
// - Pad with white pixels
|
|
18669
|
+
const [imageHeight, imageWidth] = imgDims;
|
|
18670
|
+
const height = IMAGE_SIZE * ceil(imageHeight / IMAGE_SIZE);
|
|
18671
|
+
const width = IMAGE_SIZE * ceil(imageWidth / IMAGE_SIZE);
|
|
18672
|
+
|
|
18673
|
+
// NOTE: Since padding is done after normalization, we need to fill with the normalized values
|
|
18674
|
+
const constant_values = [1, 1, 1].map((x, i) => (x - this.image_mean[i]) / this.image_std[i]);
|
|
18675
|
+
return super.pad_image(pixelData, imgDims, { width, height }, {
|
|
18676
|
+
center: true,
|
|
18677
|
+
constant_values,
|
|
18678
|
+
...options,
|
|
18679
|
+
});
|
|
18680
|
+
}
|
|
18681
|
+
|
|
18682
|
+
async _call(images, {
|
|
18683
|
+
num_crops = null,
|
|
18684
|
+
} = {}) {
|
|
18685
|
+
// @ts-expect-error
|
|
18686
|
+
this._num_crops = num_crops ??= this.config.num_crops;
|
|
18687
|
+
if (num_crops < 4 || sqrt(num_crops) % 1 !== 0) {
|
|
18688
|
+
throw new Error("num_crops must be a square number >= 4");
|
|
18689
|
+
}
|
|
18690
|
+
|
|
18691
|
+
if (!Array.isArray(images)) {
|
|
18692
|
+
images = [images];
|
|
18693
|
+
}
|
|
18694
|
+
|
|
18695
|
+
const num_images = images.length;
|
|
18696
|
+
const imageData = await Promise.all(images.map(x => this.preprocess(x)));
|
|
18697
|
+
|
|
18698
|
+
const original_sizes = imageData.map(x => x.original_size);
|
|
18699
|
+
const reshaped_input_sizes = imageData.map(x => x.reshaped_input_size);
|
|
18700
|
+
|
|
18701
|
+
// Process each image in batch
|
|
18702
|
+
const all_pixel_values = [];
|
|
18703
|
+
for (const { pixel_values } of imageData) {
|
|
18704
|
+
pixel_values.unsqueeze_(0); // Easier processing as 4D tensor
|
|
18705
|
+
|
|
18706
|
+
const [height, width] = pixel_values.dims.slice(-2);
|
|
18707
|
+
|
|
18708
|
+
// Global image (Tensor of shape [num_channels, height, width])
|
|
18709
|
+
const batch_pixel_values = await (0,_utils_tensor_js__WEBPACK_IMPORTED_MODULE_1__.interpolate_4d)(pixel_values, {
|
|
18710
|
+
size: [IMAGE_SIZE, IMAGE_SIZE],
|
|
18711
|
+
mode: 'bicubic',
|
|
18712
|
+
});
|
|
18713
|
+
|
|
18714
|
+
if (num_crops > 0) {
|
|
18715
|
+
const patches = [];
|
|
18716
|
+
const sqrt_patches = sqrt(num_crops);
|
|
18717
|
+
const patch_width = floor(width / sqrt_patches);
|
|
18718
|
+
const patch_height = floor(height / sqrt_patches);
|
|
18719
|
+
for (let y = 0; y < sqrt_patches; ++y) {
|
|
18720
|
+
for (let x = 0; x < sqrt_patches; ++x) {
|
|
18721
|
+
let start_x, start_y, end_x, end_y;
|
|
18722
|
+
if (y === sqrt_patches - 1) { // At bottom
|
|
18723
|
+
start_y = height - patch_height;
|
|
18724
|
+
end_y = height;
|
|
18725
|
+
} else {
|
|
18726
|
+
start_y = y * patch_height;
|
|
18727
|
+
end_y = (y + 1) * patch_height;
|
|
18728
|
+
}
|
|
18729
|
+
if (x === sqrt_patches - 1) { // At right
|
|
18730
|
+
start_x = width - patch_width;
|
|
18731
|
+
end_x = width;
|
|
18732
|
+
} else {
|
|
18733
|
+
start_x = x * patch_width;
|
|
18734
|
+
end_x = (x + 1) * patch_width;
|
|
18735
|
+
}
|
|
18736
|
+
|
|
18737
|
+
const starts = [start_y, start_x];
|
|
18738
|
+
const ends = [end_y, end_x];
|
|
18739
|
+
const patch = await (0,_utils_tensor_js__WEBPACK_IMPORTED_MODULE_1__.slice)(pixel_values, starts, ends, SLICE_AXES);
|
|
18740
|
+
patches.push(patch);
|
|
18741
|
+
}
|
|
18742
|
+
}
|
|
18743
|
+
|
|
18744
|
+
const resized_tensors = await (0,_utils_tensor_js__WEBPACK_IMPORTED_MODULE_1__.interpolate_4d)((0,_utils_tensor_js__WEBPACK_IMPORTED_MODULE_1__.cat)(patches, 0), {
|
|
18745
|
+
size: [IMAGE_SIZE, IMAGE_SIZE],
|
|
18746
|
+
mode: 'bicubic',
|
|
18747
|
+
}); // [num_crops, 3, 336, 336]
|
|
18748
|
+
|
|
18749
|
+
// Concatenate the global image with the patches
|
|
18750
|
+
all_pixel_values.push((0,_utils_tensor_js__WEBPACK_IMPORTED_MODULE_1__.cat)([batch_pixel_values, resized_tensors], 0));
|
|
18751
|
+
} else {
|
|
18752
|
+
// Only use the global image
|
|
18753
|
+
// NOTE: Not currently supported in modelling code
|
|
18754
|
+
all_pixel_values.push(batch_pixel_values);
|
|
18755
|
+
}
|
|
18756
|
+
}
|
|
18757
|
+
|
|
18758
|
+
// [num_images, 1 + num_crops, num_channels=3, height, width]
|
|
18759
|
+
const pixel_values = (0,_utils_tensor_js__WEBPACK_IMPORTED_MODULE_1__.stack)(all_pixel_values, 0);
|
|
18760
|
+
|
|
18761
|
+
// Calculate padded image sizes
|
|
18762
|
+
const sizes = reshaped_input_sizes.map(x => x.map(y => IMAGE_SIZE * ceil(y / IMAGE_SIZE)));
|
|
18763
|
+
|
|
18764
|
+
const image_sizes = new _utils_tensor_js__WEBPACK_IMPORTED_MODULE_1__.Tensor(
|
|
18765
|
+
'int64',
|
|
18766
|
+
sizes.flat(),
|
|
18767
|
+
[num_images, 2],
|
|
18768
|
+
);
|
|
18769
|
+
|
|
18770
|
+
const num_img_tokens = sizes.map(
|
|
18771
|
+
([height, width]) => this.calc_num_image_tokens_from_image_size(width, height),
|
|
18772
|
+
);
|
|
18773
|
+
|
|
18774
|
+
return { pixel_values, original_sizes, reshaped_input_sizes, image_sizes, num_img_tokens };
|
|
18775
|
+
}
|
|
18776
|
+
}
|
|
18777
|
+
|
|
18778
|
+
|
|
18779
|
+
/***/ }),
|
|
18780
|
+
|
|
18781
|
+
/***/ "./src/models/phi3_v/processing_phi3_v.js":
|
|
18782
|
+
/*!************************************************!*\
|
|
18783
|
+
!*** ./src/models/phi3_v/processing_phi3_v.js ***!
|
|
18784
|
+
\************************************************/
|
|
18785
|
+
/***/ ((__unused_webpack___webpack_module__, __webpack_exports__, __webpack_require__) => {
|
|
18786
|
+
|
|
18787
|
+
"use strict";
|
|
18788
|
+
__webpack_require__.r(__webpack_exports__);
|
|
18789
|
+
/* harmony export */ __webpack_require__.d(__webpack_exports__, {
|
|
18790
|
+
/* harmony export */ Phi3VProcessor: () => (/* binding */ Phi3VProcessor)
|
|
18791
|
+
/* harmony export */ });
|
|
18792
|
+
/* harmony import */ var _base_processing_utils_js__WEBPACK_IMPORTED_MODULE_0__ = __webpack_require__(/*! ../../base/processing_utils.js */ "./src/base/processing_utils.js");
|
|
18793
|
+
/* harmony import */ var _auto_image_processing_auto_js__WEBPACK_IMPORTED_MODULE_1__ = __webpack_require__(/*! ../auto/image_processing_auto.js */ "./src/models/auto/image_processing_auto.js");
|
|
18794
|
+
/* harmony import */ var _tokenizers_js__WEBPACK_IMPORTED_MODULE_2__ = __webpack_require__(/*! ../../tokenizers.js */ "./src/tokenizers.js");
|
|
18795
|
+
/* harmony import */ var _utils_image_js__WEBPACK_IMPORTED_MODULE_3__ = __webpack_require__(/*! ../../utils/image.js */ "./src/utils/image.js");
|
|
18796
|
+
|
|
18797
|
+
|
|
18798
|
+
|
|
18799
|
+
|
|
18800
|
+
|
|
18801
|
+
const IMAGE_TOKEN = "<|image|>";
|
|
18802
|
+
const IMAGE_TOKEN_PATTERN = /<\|image_\d+\|>/g;
|
|
18803
|
+
|
|
18804
|
+
class Phi3VProcessor extends _base_processing_utils_js__WEBPACK_IMPORTED_MODULE_0__.Processor {
|
|
18805
|
+
static image_processor_class = _auto_image_processing_auto_js__WEBPACK_IMPORTED_MODULE_1__.AutoImageProcessor
|
|
18806
|
+
static tokenizer_class = _tokenizers_js__WEBPACK_IMPORTED_MODULE_2__.AutoTokenizer
|
|
18807
|
+
|
|
18808
|
+
/**
|
|
18809
|
+
*
|
|
18810
|
+
* @param {string|string[]} text
|
|
18811
|
+
* @param {RawImage|RawImage[]} images
|
|
18812
|
+
* @param {...any} args
|
|
18813
|
+
* @returns {Promise<any>}
|
|
18814
|
+
*/
|
|
18815
|
+
async _call(text, images = null, {
|
|
18816
|
+
padding = true,
|
|
18817
|
+
truncation = true,
|
|
18818
|
+
num_crops = null,
|
|
18819
|
+
} = {}) {
|
|
18820
|
+
|
|
18821
|
+
if (!Array.isArray(text)) {
|
|
18822
|
+
text = [text];
|
|
18823
|
+
}
|
|
18824
|
+
|
|
18825
|
+
let text_inputs, image_inputs;
|
|
18826
|
+
if (images) {
|
|
18827
|
+
image_inputs = await this.image_processor(images, { num_crops });
|
|
18828
|
+
const { num_img_tokens } = image_inputs;
|
|
18829
|
+
|
|
18830
|
+
// The original implementation adds a bos_token before the image tokens
|
|
18831
|
+
// TODO: Check if this affects performance, since it looks like a bug in the original implementation
|
|
18832
|
+
const prompt_chunks = text.map((t, i) => t.split(IMAGE_TOKEN_PATTERN).join(IMAGE_TOKEN.repeat(num_img_tokens[i])));
|
|
18833
|
+
|
|
18834
|
+
text_inputs = this.tokenizer(prompt_chunks, { padding, truncation });
|
|
18835
|
+
|
|
18836
|
+
// The model expects image tokens to be negative, so we negate the image token ids
|
|
18837
|
+
const image_token_id = this.tokenizer.model.convert_tokens_to_ids([IMAGE_TOKEN])[0];
|
|
18838
|
+
text_inputs.input_ids.map_(id => (id == image_token_id) ? -id : id);
|
|
18839
|
+
} else {
|
|
18840
|
+
text_inputs = this.tokenizer(text);
|
|
18841
|
+
}
|
|
18842
|
+
|
|
18843
|
+
return {
|
|
18844
|
+
...text_inputs,
|
|
18845
|
+
...image_inputs,
|
|
18846
|
+
}
|
|
18847
|
+
}
|
|
18848
|
+
}
|
|
18849
|
+
|
|
18850
|
+
|
|
18183
18851
|
/***/ }),
|
|
18184
18852
|
|
|
18185
18853
|
/***/ "./src/models/processors.js":
|
|
@@ -18192,30 +18860,39 @@ class OwlViTProcessor extends _base_processing_utils_js__WEBPACK_IMPORTED_MODULE
|
|
|
18192
18860
|
__webpack_require__.r(__webpack_exports__);
|
|
18193
18861
|
/* harmony export */ __webpack_require__.d(__webpack_exports__, {
|
|
18194
18862
|
/* harmony export */ Florence2Processor: () => (/* reexport safe */ _florence2_processing_florence2_js__WEBPACK_IMPORTED_MODULE_0__.Florence2Processor),
|
|
18195
|
-
/* harmony export */ Idefics3Processor: () => (/* reexport safe */
|
|
18196
|
-
/* harmony export */ JinaCLIPProcessor: () => (/* reexport safe */
|
|
18863
|
+
/* harmony export */ Idefics3Processor: () => (/* reexport safe */ _idefics3_processing_idefics3_js__WEBPACK_IMPORTED_MODULE_3__.Idefics3Processor),
|
|
18864
|
+
/* harmony export */ JinaCLIPProcessor: () => (/* reexport safe */ _jina_clip_processing_jina_clip_js__WEBPACK_IMPORTED_MODULE_5__.JinaCLIPProcessor),
|
|
18197
18865
|
/* harmony export */ MgpstrProcessor: () => (/* reexport safe */ _mgp_str_processing_mgp_str_js__WEBPACK_IMPORTED_MODULE_1__.MgpstrProcessor),
|
|
18198
|
-
/* harmony export */
|
|
18199
|
-
/* harmony export */
|
|
18200
|
-
/* harmony export */
|
|
18201
|
-
/* harmony export */
|
|
18202
|
-
/* harmony export */
|
|
18203
|
-
/* harmony export */
|
|
18204
|
-
/* harmony export */
|
|
18205
|
-
/* harmony export */
|
|
18866
|
+
/* harmony export */ MoonshineProcessor: () => (/* reexport safe */ _moonshine_processing_moonshine_js__WEBPACK_IMPORTED_MODULE_2__.MoonshineProcessor),
|
|
18867
|
+
/* harmony export */ OwlViTProcessor: () => (/* reexport safe */ _owlvit_processing_owlvit_js__WEBPACK_IMPORTED_MODULE_6__.OwlViTProcessor),
|
|
18868
|
+
/* harmony export */ PaliGemmaProcessor: () => (/* reexport safe */ _paligemma_processing_paligemma_js__WEBPACK_IMPORTED_MODULE_8__.PaliGemmaProcessor),
|
|
18869
|
+
/* harmony export */ Phi3VProcessor: () => (/* reexport safe */ _phi3_v_processing_phi3_v_js__WEBPACK_IMPORTED_MODULE_7__.Phi3VProcessor),
|
|
18870
|
+
/* harmony export */ PyAnnoteProcessor: () => (/* reexport safe */ _pyannote_processing_pyannote_js__WEBPACK_IMPORTED_MODULE_9__.PyAnnoteProcessor),
|
|
18871
|
+
/* harmony export */ Qwen2VLProcessor: () => (/* reexport safe */ _qwen2_vl_processing_qwen2_vl_js__WEBPACK_IMPORTED_MODULE_10__.Qwen2VLProcessor),
|
|
18872
|
+
/* harmony export */ SamProcessor: () => (/* reexport safe */ _sam_processing_sam_js__WEBPACK_IMPORTED_MODULE_11__.SamProcessor),
|
|
18873
|
+
/* harmony export */ SpeechT5Processor: () => (/* reexport safe */ _speecht5_processing_speecht5_js__WEBPACK_IMPORTED_MODULE_12__.SpeechT5Processor),
|
|
18874
|
+
/* harmony export */ VLChatProcessor: () => (/* reexport safe */ _janus_processing_janus_js__WEBPACK_IMPORTED_MODULE_4__.VLChatProcessor),
|
|
18875
|
+
/* harmony export */ Wav2Vec2ProcessorWithLM: () => (/* reexport safe */ _wav2vec2_processing_wav2vec2_js__WEBPACK_IMPORTED_MODULE_13__.Wav2Vec2ProcessorWithLM),
|
|
18876
|
+
/* harmony export */ WhisperProcessor: () => (/* reexport safe */ _whisper_processing_whisper_js__WEBPACK_IMPORTED_MODULE_14__.WhisperProcessor)
|
|
18206
18877
|
/* harmony export */ });
|
|
18207
18878
|
/* harmony import */ var _florence2_processing_florence2_js__WEBPACK_IMPORTED_MODULE_0__ = __webpack_require__(/*! ./florence2/processing_florence2.js */ "./src/models/florence2/processing_florence2.js");
|
|
18208
18879
|
/* harmony import */ var _mgp_str_processing_mgp_str_js__WEBPACK_IMPORTED_MODULE_1__ = __webpack_require__(/*! ./mgp_str/processing_mgp_str.js */ "./src/models/mgp_str/processing_mgp_str.js");
|
|
18209
|
-
/* harmony import */ var
|
|
18210
|
-
/* harmony import */ var
|
|
18211
|
-
/* harmony import */ var
|
|
18212
|
-
/* harmony import */ var
|
|
18213
|
-
/* harmony import */ var
|
|
18214
|
-
/* harmony import */ var
|
|
18215
|
-
/* harmony import */ var
|
|
18216
|
-
/* harmony import */ var
|
|
18217
|
-
/* harmony import */ var
|
|
18218
|
-
/* harmony import */ var
|
|
18880
|
+
/* harmony import */ var _moonshine_processing_moonshine_js__WEBPACK_IMPORTED_MODULE_2__ = __webpack_require__(/*! ./moonshine/processing_moonshine.js */ "./src/models/moonshine/processing_moonshine.js");
|
|
18881
|
+
/* harmony import */ var _idefics3_processing_idefics3_js__WEBPACK_IMPORTED_MODULE_3__ = __webpack_require__(/*! ./idefics3/processing_idefics3.js */ "./src/models/idefics3/processing_idefics3.js");
|
|
18882
|
+
/* harmony import */ var _janus_processing_janus_js__WEBPACK_IMPORTED_MODULE_4__ = __webpack_require__(/*! ./janus/processing_janus.js */ "./src/models/janus/processing_janus.js");
|
|
18883
|
+
/* harmony import */ var _jina_clip_processing_jina_clip_js__WEBPACK_IMPORTED_MODULE_5__ = __webpack_require__(/*! ./jina_clip/processing_jina_clip.js */ "./src/models/jina_clip/processing_jina_clip.js");
|
|
18884
|
+
/* harmony import */ var _owlvit_processing_owlvit_js__WEBPACK_IMPORTED_MODULE_6__ = __webpack_require__(/*! ./owlvit/processing_owlvit.js */ "./src/models/owlvit/processing_owlvit.js");
|
|
18885
|
+
/* harmony import */ var _phi3_v_processing_phi3_v_js__WEBPACK_IMPORTED_MODULE_7__ = __webpack_require__(/*! ./phi3_v/processing_phi3_v.js */ "./src/models/phi3_v/processing_phi3_v.js");
|
|
18886
|
+
/* harmony import */ var _paligemma_processing_paligemma_js__WEBPACK_IMPORTED_MODULE_8__ = __webpack_require__(/*! ./paligemma/processing_paligemma.js */ "./src/models/paligemma/processing_paligemma.js");
|
|
18887
|
+
/* harmony import */ var _pyannote_processing_pyannote_js__WEBPACK_IMPORTED_MODULE_9__ = __webpack_require__(/*! ./pyannote/processing_pyannote.js */ "./src/models/pyannote/processing_pyannote.js");
|
|
18888
|
+
/* harmony import */ var _qwen2_vl_processing_qwen2_vl_js__WEBPACK_IMPORTED_MODULE_10__ = __webpack_require__(/*! ./qwen2_vl/processing_qwen2_vl.js */ "./src/models/qwen2_vl/processing_qwen2_vl.js");
|
|
18889
|
+
/* harmony import */ var _sam_processing_sam_js__WEBPACK_IMPORTED_MODULE_11__ = __webpack_require__(/*! ./sam/processing_sam.js */ "./src/models/sam/processing_sam.js");
|
|
18890
|
+
/* harmony import */ var _speecht5_processing_speecht5_js__WEBPACK_IMPORTED_MODULE_12__ = __webpack_require__(/*! ./speecht5/processing_speecht5.js */ "./src/models/speecht5/processing_speecht5.js");
|
|
18891
|
+
/* harmony import */ var _wav2vec2_processing_wav2vec2_js__WEBPACK_IMPORTED_MODULE_13__ = __webpack_require__(/*! ./wav2vec2/processing_wav2vec2.js */ "./src/models/wav2vec2/processing_wav2vec2.js");
|
|
18892
|
+
/* harmony import */ var _whisper_processing_whisper_js__WEBPACK_IMPORTED_MODULE_14__ = __webpack_require__(/*! ./whisper/processing_whisper.js */ "./src/models/whisper/processing_whisper.js");
|
|
18893
|
+
|
|
18894
|
+
|
|
18895
|
+
|
|
18219
18896
|
|
|
18220
18897
|
|
|
18221
18898
|
|
|
@@ -18264,6 +18941,8 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
18264
18941
|
/* harmony export */ });
|
|
18265
18942
|
/* harmony import */ var _base_feature_extraction_utils_js__WEBPACK_IMPORTED_MODULE_0__ = __webpack_require__(/*! ../../base/feature_extraction_utils.js */ "./src/base/feature_extraction_utils.js");
|
|
18266
18943
|
/* harmony import */ var _utils_tensor_js__WEBPACK_IMPORTED_MODULE_1__ = __webpack_require__(/*! ../../utils/tensor.js */ "./src/utils/tensor.js");
|
|
18944
|
+
/* harmony import */ var _utils_maths_js__WEBPACK_IMPORTED_MODULE_2__ = __webpack_require__(/*! ../../utils/maths.js */ "./src/utils/maths.js");
|
|
18945
|
+
|
|
18267
18946
|
|
|
18268
18947
|
|
|
18269
18948
|
|
|
@@ -18291,41 +18970,6 @@ class PyAnnoteFeatureExtractor extends _base_feature_extraction_utils_js__WEBPAC
|
|
|
18291
18970
|
};
|
|
18292
18971
|
}
|
|
18293
18972
|
|
|
18294
|
-
}
|
|
18295
|
-
|
|
18296
|
-
|
|
18297
|
-
/***/ }),
|
|
18298
|
-
|
|
18299
|
-
/***/ "./src/models/pyannote/processing_pyannote.js":
|
|
18300
|
-
/*!****************************************************!*\
|
|
18301
|
-
!*** ./src/models/pyannote/processing_pyannote.js ***!
|
|
18302
|
-
\****************************************************/
|
|
18303
|
-
/***/ ((__unused_webpack___webpack_module__, __webpack_exports__, __webpack_require__) => {
|
|
18304
|
-
|
|
18305
|
-
"use strict";
|
|
18306
|
-
__webpack_require__.r(__webpack_exports__);
|
|
18307
|
-
/* harmony export */ __webpack_require__.d(__webpack_exports__, {
|
|
18308
|
-
/* harmony export */ PyAnnoteProcessor: () => (/* binding */ PyAnnoteProcessor)
|
|
18309
|
-
/* harmony export */ });
|
|
18310
|
-
/* harmony import */ var _base_processing_utils_js__WEBPACK_IMPORTED_MODULE_0__ = __webpack_require__(/*! ../../base/processing_utils.js */ "./src/base/processing_utils.js");
|
|
18311
|
-
/* harmony import */ var _auto_feature_extraction_auto_js__WEBPACK_IMPORTED_MODULE_1__ = __webpack_require__(/*! ../auto/feature_extraction_auto.js */ "./src/models/auto/feature_extraction_auto.js");
|
|
18312
|
-
/* harmony import */ var _utils_maths_js__WEBPACK_IMPORTED_MODULE_2__ = __webpack_require__(/*! ../../utils/maths.js */ "./src/utils/maths.js");
|
|
18313
|
-
|
|
18314
|
-
|
|
18315
|
-
|
|
18316
|
-
|
|
18317
|
-
class PyAnnoteProcessor extends _base_processing_utils_js__WEBPACK_IMPORTED_MODULE_0__.Processor {
|
|
18318
|
-
static feature_extractor_class = _auto_feature_extraction_auto_js__WEBPACK_IMPORTED_MODULE_1__.AutoFeatureExtractor
|
|
18319
|
-
|
|
18320
|
-
/**
|
|
18321
|
-
* Calls the feature_extractor function with the given audio input.
|
|
18322
|
-
* @param {any} audio The audio input to extract features from.
|
|
18323
|
-
* @returns {Promise<any>} A Promise that resolves with the extracted features.
|
|
18324
|
-
*/
|
|
18325
|
-
async _call(audio) {
|
|
18326
|
-
return await this.feature_extractor(audio)
|
|
18327
|
-
}
|
|
18328
|
-
|
|
18329
18973
|
/**
|
|
18330
18974
|
* NOTE: Can return fractional values. `Math.ceil` will ensure correct value.
|
|
18331
18975
|
* @param {number} samples The number of frames in the audio.
|
|
@@ -18380,6 +19024,48 @@ class PyAnnoteProcessor extends _base_processing_utils_js__WEBPACK_IMPORTED_MODU
|
|
|
18380
19024
|
}
|
|
18381
19025
|
return results;
|
|
18382
19026
|
}
|
|
19027
|
+
|
|
19028
|
+
}
|
|
19029
|
+
|
|
19030
|
+
|
|
19031
|
+
/***/ }),
|
|
19032
|
+
|
|
19033
|
+
/***/ "./src/models/pyannote/processing_pyannote.js":
|
|
19034
|
+
/*!****************************************************!*\
|
|
19035
|
+
!*** ./src/models/pyannote/processing_pyannote.js ***!
|
|
19036
|
+
\****************************************************/
|
|
19037
|
+
/***/ ((__unused_webpack___webpack_module__, __webpack_exports__, __webpack_require__) => {
|
|
19038
|
+
|
|
19039
|
+
"use strict";
|
|
19040
|
+
__webpack_require__.r(__webpack_exports__);
|
|
19041
|
+
/* harmony export */ __webpack_require__.d(__webpack_exports__, {
|
|
19042
|
+
/* harmony export */ PyAnnoteProcessor: () => (/* binding */ PyAnnoteProcessor)
|
|
19043
|
+
/* harmony export */ });
|
|
19044
|
+
/* harmony import */ var _base_processing_utils_js__WEBPACK_IMPORTED_MODULE_0__ = __webpack_require__(/*! ../../base/processing_utils.js */ "./src/base/processing_utils.js");
|
|
19045
|
+
/* harmony import */ var _feature_extraction_pyannote_js__WEBPACK_IMPORTED_MODULE_1__ = __webpack_require__(/*! ./feature_extraction_pyannote.js */ "./src/models/pyannote/feature_extraction_pyannote.js");
|
|
19046
|
+
|
|
19047
|
+
|
|
19048
|
+
|
|
19049
|
+
class PyAnnoteProcessor extends _base_processing_utils_js__WEBPACK_IMPORTED_MODULE_0__.Processor {
|
|
19050
|
+
static feature_extractor_class = _feature_extraction_pyannote_js__WEBPACK_IMPORTED_MODULE_1__.PyAnnoteFeatureExtractor
|
|
19051
|
+
|
|
19052
|
+
/**
|
|
19053
|
+
* Calls the feature_extractor function with the given audio input.
|
|
19054
|
+
* @param {any} audio The audio input to extract features from.
|
|
19055
|
+
* @returns {Promise<any>} A Promise that resolves with the extracted features.
|
|
19056
|
+
*/
|
|
19057
|
+
async _call(audio) {
|
|
19058
|
+
return await this.feature_extractor(audio)
|
|
19059
|
+
}
|
|
19060
|
+
|
|
19061
|
+
/** @type {PyAnnoteFeatureExtractor['post_process_speaker_diarization']} */
|
|
19062
|
+
post_process_speaker_diarization(...args) {
|
|
19063
|
+
return /** @type {PyAnnoteFeatureExtractor} */(this.feature_extractor).post_process_speaker_diarization(...args);
|
|
19064
|
+
}
|
|
19065
|
+
|
|
19066
|
+
get sampling_rate() {
|
|
19067
|
+
return this.feature_extractor.config.sampling_rate;
|
|
19068
|
+
}
|
|
18383
19069
|
}
|
|
18384
19070
|
|
|
18385
19071
|
|
|
@@ -20129,6 +20815,17 @@ class TensorOpRegistry {
|
|
|
20129
20815
|
}
|
|
20130
20816
|
return this._top_k;
|
|
20131
20817
|
}
|
|
20818
|
+
|
|
20819
|
+
static get slice() {
|
|
20820
|
+
if (!this._slice) {
|
|
20821
|
+
this._slice = wrap(
|
|
20822
|
+
[8, 7, 18, 0, 58, 96, 10, 25, 10, 1, 120, 10, 1, 115, 10, 1, 101, 10, 1, 97, 10, 1, 116, 18, 1, 121, 34, 5, 83, 108, 105, 99, 101, 18, 1, 114, 90, 9, 10, 1, 120, 18, 4, 10, 2, 8, 1, 90, 9, 10, 1, 115, 18, 4, 10, 2, 8, 7, 90, 9, 10, 1, 101, 18, 4, 10, 2, 8, 7, 90, 9, 10, 1, 97, 18, 4, 10, 2, 8, 7, 90, 9, 10, 1, 116, 18, 4, 10, 2, 8, 7, 98, 9, 10, 1, 121, 18, 4, 10, 2, 8, 1, 66, 2, 16, 13],
|
|
20823
|
+
this.session_options,
|
|
20824
|
+
'y',
|
|
20825
|
+
)
|
|
20826
|
+
}
|
|
20827
|
+
return this._slice;
|
|
20828
|
+
}
|
|
20132
20829
|
}
|
|
20133
20830
|
|
|
20134
20831
|
|
|
@@ -21862,6 +22559,8 @@ class AutomaticSpeechRecognitionPipeline extends (/** @type {new (options: TextA
|
|
|
21862
22559
|
case 'unispeech-sat':
|
|
21863
22560
|
case 'hubert':
|
|
21864
22561
|
return this._call_wav2vec2(audio, kwargs)
|
|
22562
|
+
case 'moonshine':
|
|
22563
|
+
return this._call_moonshine(audio, kwargs)
|
|
21865
22564
|
default:
|
|
21866
22565
|
throw new Error(`AutomaticSpeechRecognitionPipeline does not support model type '${this.model.config.model_type}'.`)
|
|
21867
22566
|
}
|
|
@@ -22015,6 +22714,34 @@ class AutomaticSpeechRecognitionPipeline extends (/** @type {new (options: TextA
|
|
|
22015
22714
|
}
|
|
22016
22715
|
return single ? toReturn[0] : toReturn;
|
|
22017
22716
|
}
|
|
22717
|
+
|
|
22718
|
+
/**
|
|
22719
|
+
* @type {AutomaticSpeechRecognitionPipelineCallback}
|
|
22720
|
+
* @private
|
|
22721
|
+
*/
|
|
22722
|
+
async _call_moonshine(audio, kwargs) {
|
|
22723
|
+
const single = !Array.isArray(audio);
|
|
22724
|
+
if (single) {
|
|
22725
|
+
audio = [/** @type {AudioInput} */ (audio)];
|
|
22726
|
+
}
|
|
22727
|
+
const sampling_rate = this.processor.feature_extractor.config.sampling_rate;
|
|
22728
|
+
const preparedAudios = await prepareAudios(audio, sampling_rate);
|
|
22729
|
+
const toReturn = [];
|
|
22730
|
+
for (const aud of preparedAudios) {
|
|
22731
|
+
const inputs = await this.processor(aud);
|
|
22732
|
+
|
|
22733
|
+
// According to the [paper](https://arxiv.org/pdf/2410.15608):
|
|
22734
|
+
// "We use greedy decoding, with a heuristic limit of 6 output tokens
|
|
22735
|
+
// per second of audio to avoid repeated output sequences."
|
|
22736
|
+
const max_new_tokens = Math.floor(aud.length / sampling_rate) * 6;
|
|
22737
|
+
const outputs = await this.model.generate({ max_new_tokens, ...kwargs, ...inputs });
|
|
22738
|
+
|
|
22739
|
+
const text = this.processor.batch_decode(outputs, { skip_special_tokens: true })[0];
|
|
22740
|
+
toReturn.push({ text });
|
|
22741
|
+
}
|
|
22742
|
+
return single ? toReturn[0] : toReturn;
|
|
22743
|
+
}
|
|
22744
|
+
|
|
22018
22745
|
}
|
|
22019
22746
|
|
|
22020
22747
|
/**
|
|
@@ -26133,6 +26860,12 @@ class PreTrainedTokenizer extends _utils_generic_js__WEBPACK_IMPORTED_MODULE_0__
|
|
|
26133
26860
|
this.unk_token = this.getToken('unk_token');
|
|
26134
26861
|
this.unk_token_id = this.model.tokens_to_ids.get(this.unk_token);
|
|
26135
26862
|
|
|
26863
|
+
this.bos_token = this.getToken('bos_token');
|
|
26864
|
+
this.bos_token_id = this.model.tokens_to_ids.get(this.bos_token);
|
|
26865
|
+
|
|
26866
|
+
this.eos_token = this.getToken('eos_token');
|
|
26867
|
+
this.eos_token_id = this.model.tokens_to_ids.get(this.eos_token);
|
|
26868
|
+
|
|
26136
26869
|
this.model_max_length = tokenizerConfig.model_max_length;
|
|
26137
26870
|
|
|
26138
26871
|
/** @type {boolean} Whether or not to strip the text when tokenizing (removing excess spaces before and after the string). */
|
|
@@ -27105,6 +27838,11 @@ class WhisperTokenizer extends PreTrainedTokenizer {
|
|
|
27105
27838
|
let chunk = new_chunk();
|
|
27106
27839
|
let time_offset = 0.0;
|
|
27107
27840
|
const timestamp_begin = this.timestamp_begin;
|
|
27841
|
+
// Whisper timestamp tokens start from 0.00 and go to timestamp 30.00 in 0.02 increments.
|
|
27842
|
+
// We can calculate the last time stamp token as timestamp_begin plus the number of tokens
|
|
27843
|
+
// tokens from 0.00 to 30.00 which is 1500.
|
|
27844
|
+
const total_timestamp_tokens = 1500; // (30.00 - 0.00) / 0.02
|
|
27845
|
+
const timestamp_end = timestamp_begin + total_timestamp_tokens;
|
|
27108
27846
|
|
|
27109
27847
|
let previous_tokens = [];
|
|
27110
27848
|
let previous_token_timestamps = [];
|
|
@@ -27192,7 +27930,7 @@ class WhisperTokenizer extends PreTrainedTokenizer {
|
|
|
27192
27930
|
} else {
|
|
27193
27931
|
// 2/ This is a regular special token, ignoring it
|
|
27194
27932
|
}
|
|
27195
|
-
} else if (token >= timestamp_begin) {
|
|
27933
|
+
} else if (token >= timestamp_begin && token <= timestamp_end) {
|
|
27196
27934
|
// 3/ Timestamp token
|
|
27197
27935
|
const time = (token - timestamp_begin) * time_precision + time_offset;
|
|
27198
27936
|
const rounded_time = (0,_utils_maths_js__WEBPACK_IMPORTED_MODULE_3__.round)(time, 2);
|
|
@@ -28684,15 +29422,45 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
28684
29422
|
*/
|
|
28685
29423
|
|
|
28686
29424
|
/**
|
|
28687
|
-
* @typedef {Object}
|
|
28688
|
-
* @property {'initiate'
|
|
28689
|
-
* @property {string} name
|
|
28690
|
-
*
|
|
28691
|
-
|
|
28692
|
-
|
|
28693
|
-
|
|
28694
|
-
* @
|
|
28695
|
-
* @property {
|
|
29425
|
+
* @typedef {Object} InitiateProgressInfo
|
|
29426
|
+
* @property {'initiate'} status
|
|
29427
|
+
* @property {string} name The model id or directory path.
|
|
29428
|
+
* @property {string} file The name of the file.
|
|
29429
|
+
*/
|
|
29430
|
+
|
|
29431
|
+
/**
|
|
29432
|
+
* @typedef {Object} DownloadProgressInfo
|
|
29433
|
+
* @property {'download'} status
|
|
29434
|
+
* @property {string} name The model id or directory path.
|
|
29435
|
+
* @property {string} file The name of the file.
|
|
29436
|
+
*/
|
|
29437
|
+
|
|
29438
|
+
/**
|
|
29439
|
+
* @typedef {Object} ProgressStatusInfo
|
|
29440
|
+
* @property {'progress'} status
|
|
29441
|
+
* @property {string} name The model id or directory path.
|
|
29442
|
+
* @property {string} file The name of the file.
|
|
29443
|
+
* @property {number} progress A number between 0 and 100.
|
|
29444
|
+
* @property {number} loaded The number of bytes loaded.
|
|
29445
|
+
* @property {number} total The total number of bytes to be loaded.
|
|
29446
|
+
*/
|
|
29447
|
+
|
|
29448
|
+
/**
|
|
29449
|
+
* @typedef {Object} DoneProgressInfo
|
|
29450
|
+
* @property {'done'} status
|
|
29451
|
+
* @property {string} name The model id or directory path.
|
|
29452
|
+
* @property {string} file The name of the file.
|
|
29453
|
+
*/
|
|
29454
|
+
|
|
29455
|
+
/**
|
|
29456
|
+
* @typedef {Object} ReadyProgressInfo
|
|
29457
|
+
* @property {'ready'} status
|
|
29458
|
+
* @property {string} task The loaded task.
|
|
29459
|
+
* @property {string} model The loaded model.
|
|
29460
|
+
*/
|
|
29461
|
+
|
|
29462
|
+
/**
|
|
29463
|
+
* @typedef {InitiateProgressInfo | DownloadProgressInfo | ProgressStatusInfo | DoneProgressInfo | ReadyProgressInfo} ProgressInfo
|
|
28696
29464
|
*/
|
|
28697
29465
|
|
|
28698
29466
|
/**
|
|
@@ -30035,13 +30803,6 @@ async function getModelFile(path_or_repo_id, filename, fatal = true, options = {
|
|
|
30035
30803
|
file: filename
|
|
30036
30804
|
})
|
|
30037
30805
|
|
|
30038
|
-
/** @type {import('./core.js').ProgressInfo} */
|
|
30039
|
-
const progressInfo = {
|
|
30040
|
-
status: 'progress',
|
|
30041
|
-
name: path_or_repo_id,
|
|
30042
|
-
file: filename
|
|
30043
|
-
}
|
|
30044
|
-
|
|
30045
30806
|
/** @type {Uint8Array} */
|
|
30046
30807
|
let buffer;
|
|
30047
30808
|
|
|
@@ -30061,7 +30822,9 @@ async function getModelFile(path_or_repo_id, filename, fatal = true, options = {
|
|
|
30061
30822
|
|
|
30062
30823
|
// For completeness, we still fire the final progress callback
|
|
30063
30824
|
(0,_core_js__WEBPACK_IMPORTED_MODULE_3__.dispatchCallback)(options.progress_callback, {
|
|
30064
|
-
|
|
30825
|
+
status: 'progress',
|
|
30826
|
+
name: path_or_repo_id,
|
|
30827
|
+
file: filename,
|
|
30065
30828
|
progress: 100,
|
|
30066
30829
|
loaded: buffer.length,
|
|
30067
30830
|
total: buffer.length,
|
|
@@ -30069,7 +30832,9 @@ async function getModelFile(path_or_repo_id, filename, fatal = true, options = {
|
|
|
30069
30832
|
} else {
|
|
30070
30833
|
buffer = await readResponse(response, data => {
|
|
30071
30834
|
(0,_core_js__WEBPACK_IMPORTED_MODULE_3__.dispatchCallback)(options.progress_callback, {
|
|
30072
|
-
|
|
30835
|
+
status: 'progress',
|
|
30836
|
+
name: path_or_repo_id,
|
|
30837
|
+
file: filename,
|
|
30073
30838
|
...data,
|
|
30074
30839
|
})
|
|
30075
30840
|
})
|
|
@@ -30126,12 +30891,11 @@ async function getModelJSON(modelPath, fileName, fatal = true, options = {}) {
|
|
|
30126
30891
|
|
|
30127
30892
|
return JSON.parse(jsonData);
|
|
30128
30893
|
}
|
|
30129
|
-
|
|
30130
30894
|
/**
|
|
30131
30895
|
* Read and track progress when reading a Response object
|
|
30132
30896
|
*
|
|
30133
|
-
* @param {
|
|
30134
|
-
* @param {
|
|
30897
|
+
* @param {Response|FileResponse} response The Response object to read
|
|
30898
|
+
* @param {(data: {progress: number, loaded: number, total: number}) => void} progress_callback The function to call with progress updates
|
|
30135
30899
|
* @returns {Promise<Uint8Array>} A Promise that resolves with the Uint8Array buffer
|
|
30136
30900
|
*/
|
|
30137
30901
|
async function readResponse(response, progress_callback) {
|
|
@@ -30528,6 +31292,46 @@ class RawImage {
|
|
|
30528
31292
|
return this._update(newData, this.width, this.height, 4);
|
|
30529
31293
|
}
|
|
30530
31294
|
|
|
31295
|
+
/**
|
|
31296
|
+
* Apply an alpha mask to the image. Operates in place.
|
|
31297
|
+
* @param {RawImage} mask The mask to apply. It should have a single channel.
|
|
31298
|
+
* @returns {RawImage} The masked image.
|
|
31299
|
+
* @throws {Error} If the mask is not the same size as the image.
|
|
31300
|
+
* @throws {Error} If the image does not have 4 channels.
|
|
31301
|
+
* @throws {Error} If the mask is not a single channel.
|
|
31302
|
+
*/
|
|
31303
|
+
putAlpha(mask) {
|
|
31304
|
+
if (mask.width !== this.width || mask.height !== this.height) {
|
|
31305
|
+
throw new Error(`Expected mask size to be ${this.width}x${this.height}, but got ${mask.width}x${mask.height}`);
|
|
31306
|
+
}
|
|
31307
|
+
if (mask.channels !== 1) {
|
|
31308
|
+
throw new Error(`Expected mask to have 1 channel, but got ${mask.channels}`);
|
|
31309
|
+
}
|
|
31310
|
+
|
|
31311
|
+
const this_data = this.data;
|
|
31312
|
+
const mask_data = mask.data;
|
|
31313
|
+
const num_pixels = this.width * this.height;
|
|
31314
|
+
if (this.channels === 3) {
|
|
31315
|
+
// Convert to RGBA and simultaneously apply mask to alpha channel
|
|
31316
|
+
const newData = new Uint8ClampedArray(num_pixels * 4);
|
|
31317
|
+
for (let i = 0, in_offset = 0, out_offset = 0; i < num_pixels; ++i) {
|
|
31318
|
+
newData[out_offset++] = this_data[in_offset++];
|
|
31319
|
+
newData[out_offset++] = this_data[in_offset++];
|
|
31320
|
+
newData[out_offset++] = this_data[in_offset++];
|
|
31321
|
+
newData[out_offset++] = mask_data[i];
|
|
31322
|
+
}
|
|
31323
|
+
return this._update(newData, this.width, this.height, 4);
|
|
31324
|
+
|
|
31325
|
+
} else if (this.channels === 4) {
|
|
31326
|
+
// Apply mask to alpha channel in place
|
|
31327
|
+
for (let i = 0; i < num_pixels; ++i) {
|
|
31328
|
+
this_data[4 * i + 3] = mask_data[i];
|
|
31329
|
+
}
|
|
31330
|
+
return this;
|
|
31331
|
+
}
|
|
31332
|
+
throw new Error(`Expected image to have 3 or 4 channels, but got ${this.channels}`);
|
|
31333
|
+
}
|
|
31334
|
+
|
|
30531
31335
|
/**
|
|
30532
31336
|
* Resize the image to the given dimensions. This method uses the canvas API to perform the resizing.
|
|
30533
31337
|
* @param {number} width The width of the new image. `null` or `-1` will preserve the aspect ratio.
|
|
@@ -32136,7 +32940,9 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
32136
32940
|
/* harmony export */ ones_like: () => (/* binding */ ones_like),
|
|
32137
32941
|
/* harmony export */ permute: () => (/* binding */ permute),
|
|
32138
32942
|
/* harmony export */ quantize_embeddings: () => (/* binding */ quantize_embeddings),
|
|
32943
|
+
/* harmony export */ rand: () => (/* binding */ rand),
|
|
32139
32944
|
/* harmony export */ rfft: () => (/* binding */ rfft),
|
|
32945
|
+
/* harmony export */ slice: () => (/* binding */ slice),
|
|
32140
32946
|
/* harmony export */ stack: () => (/* binding */ stack),
|
|
32141
32947
|
/* harmony export */ std_mean: () => (/* binding */ std_mean),
|
|
32142
32948
|
/* harmony export */ topk: () => (/* binding */ topk),
|
|
@@ -32915,8 +33721,21 @@ class Tensor {
|
|
|
32915
33721
|
if (!DataTypeMap.hasOwnProperty(type)) {
|
|
32916
33722
|
throw new Error(`Unsupported type: ${type}`);
|
|
32917
33723
|
}
|
|
33724
|
+
|
|
33725
|
+
// Handle special cases where a mapping function is needed (e.g., where one type is a bigint and the other is a number)
|
|
33726
|
+
let map_fn;
|
|
33727
|
+
const is_source_bigint = ['int64', 'uint64'].includes(this.type);
|
|
33728
|
+
const is_dest_bigint = ['int64', 'uint64'].includes(type);
|
|
33729
|
+
if (is_source_bigint && !is_dest_bigint) {
|
|
33730
|
+
// TypeError: Cannot convert a BigInt value to a number
|
|
33731
|
+
map_fn = Number;
|
|
33732
|
+
} else if (!is_source_bigint && is_dest_bigint) {
|
|
33733
|
+
// TypeError: Cannot convert [x] to a BigInt
|
|
33734
|
+
map_fn = BigInt;
|
|
33735
|
+
}
|
|
33736
|
+
|
|
32918
33737
|
// @ts-ignore
|
|
32919
|
-
return new Tensor(type, DataTypeMap[type].from(this.data), this.dims);
|
|
33738
|
+
return new Tensor(type, DataTypeMap[type].from(this.data, map_fn), this.dims);
|
|
32920
33739
|
}
|
|
32921
33740
|
}
|
|
32922
33741
|
|
|
@@ -33114,6 +33933,29 @@ async function topk(x, k) {
|
|
|
33114
33933
|
});
|
|
33115
33934
|
}
|
|
33116
33935
|
|
|
33936
|
+
|
|
33937
|
+
const arrayToIndexTensor = (array) => new Tensor('int64', array, [array.length]);
|
|
33938
|
+
/**
|
|
33939
|
+
* Slice a multidimensional float32 tensor.
|
|
33940
|
+
* @param {Tensor} data: Tensor of data to extract slices from
|
|
33941
|
+
* @param {number[]} starts: 1-D array of starting indices of corresponding axis in axes
|
|
33942
|
+
* @param {number[]} ends: 1-D array of ending indices (exclusive) of corresponding axis in axes
|
|
33943
|
+
* @param {number[]} axes: 1-D array of axes that starts and ends apply to
|
|
33944
|
+
* @param {number[]} [steps]: 1-D array of slice step of corresponding axis in axes.
|
|
33945
|
+
* @returns {Promise<Tensor>} Sliced data tensor.
|
|
33946
|
+
*/
|
|
33947
|
+
async function slice(data, starts, ends, axes, steps) {
|
|
33948
|
+
const op = await _ops_registry_js__WEBPACK_IMPORTED_MODULE_2__.TensorOpRegistry.slice;
|
|
33949
|
+
return await op({
|
|
33950
|
+
x: data,
|
|
33951
|
+
s: arrayToIndexTensor(starts),
|
|
33952
|
+
e: arrayToIndexTensor(ends),
|
|
33953
|
+
a: arrayToIndexTensor(axes),
|
|
33954
|
+
t: arrayToIndexTensor(steps ?? new Array(axes.length).fill(1)),
|
|
33955
|
+
});
|
|
33956
|
+
}
|
|
33957
|
+
|
|
33958
|
+
|
|
33117
33959
|
/**
|
|
33118
33960
|
* Perform mean pooling of the last hidden state followed by a normalization step.
|
|
33119
33961
|
* @param {Tensor} last_hidden_state Tensor of shape [batchSize, seqLength, embedDim]
|
|
@@ -33560,6 +34402,20 @@ function zeros_like(tensor) {
|
|
|
33560
34402
|
return zeros(tensor.dims);
|
|
33561
34403
|
}
|
|
33562
34404
|
|
|
34405
|
+
/**
|
|
34406
|
+
* Returns a tensor filled with random numbers from a uniform distribution on the interval [0, 1)
|
|
34407
|
+
* @param {number[]} size A sequence of integers defining the shape of the output tensor.
|
|
34408
|
+
* @returns {Tensor} The random tensor.
|
|
34409
|
+
*/
|
|
34410
|
+
function rand(size) {
|
|
34411
|
+
const length = size.reduce((a, b) => a * b, 1);
|
|
34412
|
+
return new Tensor(
|
|
34413
|
+
"float32",
|
|
34414
|
+
Float32Array.from({ length }, () => Math.random()),
|
|
34415
|
+
size,
|
|
34416
|
+
)
|
|
34417
|
+
}
|
|
34418
|
+
|
|
33563
34419
|
/**
|
|
33564
34420
|
* Quantizes the embeddings tensor to binary or unsigned binary precision.
|
|
33565
34421
|
* @param {Tensor} tensor The tensor to quantize.
|
|
@@ -33694,7 +34550,7 @@ function quantize_embeddings(tensor, precision) {
|
|
|
33694
34550
|
/******/
|
|
33695
34551
|
/************************************************************************/
|
|
33696
34552
|
var __webpack_exports__ = {};
|
|
33697
|
-
// This entry
|
|
34553
|
+
// This entry needs to be wrapped in an IIFE because it needs to be in strict mode.
|
|
33698
34554
|
(() => {
|
|
33699
34555
|
"use strict";
|
|
33700
34556
|
/*!*****************************!*\
|
|
@@ -33905,6 +34761,9 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
33905
34761
|
/* harmony export */ EsmModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.EsmModel),
|
|
33906
34762
|
/* harmony export */ EsmPreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.EsmPreTrainedModel),
|
|
33907
34763
|
/* harmony export */ EsmTokenizer: () => (/* reexport safe */ _tokenizers_js__WEBPACK_IMPORTED_MODULE_3__.EsmTokenizer),
|
|
34764
|
+
/* harmony export */ ExaoneForCausalLM: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.ExaoneForCausalLM),
|
|
34765
|
+
/* harmony export */ ExaoneModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.ExaoneModel),
|
|
34766
|
+
/* harmony export */ ExaonePreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.ExaonePreTrainedModel),
|
|
33908
34767
|
/* harmony export */ FFT: () => (/* reexport safe */ _utils_maths_js__WEBPACK_IMPORTED_MODULE_8__.FFT),
|
|
33909
34768
|
/* harmony export */ FalconForCausalLM: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.FalconForCausalLM),
|
|
33910
34769
|
/* harmony export */ FalconModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.FalconModel),
|
|
@@ -33963,6 +34822,9 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
33963
34822
|
/* harmony export */ HubertForSequenceClassification: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.HubertForSequenceClassification),
|
|
33964
34823
|
/* harmony export */ HubertModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.HubertModel),
|
|
33965
34824
|
/* harmony export */ HubertPreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.HubertPreTrainedModel),
|
|
34825
|
+
/* harmony export */ IJepaForImageClassification: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.IJepaForImageClassification),
|
|
34826
|
+
/* harmony export */ IJepaModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.IJepaModel),
|
|
34827
|
+
/* harmony export */ IJepaPreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.IJepaPreTrainedModel),
|
|
33966
34828
|
/* harmony export */ Idefics3ForConditionalGeneration: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.Idefics3ForConditionalGeneration),
|
|
33967
34829
|
/* harmony export */ Idefics3ImageProcessor: () => (/* reexport safe */ _models_image_processors_js__WEBPACK_IMPORTED_MODULE_13__.Idefics3ImageProcessor),
|
|
33968
34830
|
/* harmony export */ Idefics3PreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.Idefics3PreTrainedModel),
|
|
@@ -34081,6 +34943,11 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
34081
34943
|
/* harmony export */ MobileViTV2PreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.MobileViTV2PreTrainedModel),
|
|
34082
34944
|
/* harmony export */ ModelOutput: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.ModelOutput),
|
|
34083
34945
|
/* harmony export */ Moondream1ForConditionalGeneration: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.Moondream1ForConditionalGeneration),
|
|
34946
|
+
/* harmony export */ MoonshineFeatureExtractor: () => (/* reexport safe */ _models_feature_extractors_js__WEBPACK_IMPORTED_MODULE_10__.MoonshineFeatureExtractor),
|
|
34947
|
+
/* harmony export */ MoonshineForConditionalGeneration: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.MoonshineForConditionalGeneration),
|
|
34948
|
+
/* harmony export */ MoonshineModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.MoonshineModel),
|
|
34949
|
+
/* harmony export */ MoonshinePreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.MoonshinePreTrainedModel),
|
|
34950
|
+
/* harmony export */ MoonshineProcessor: () => (/* reexport safe */ _models_processors_js__WEBPACK_IMPORTED_MODULE_16__.MoonshineProcessor),
|
|
34084
34951
|
/* harmony export */ MptForCausalLM: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.MptForCausalLM),
|
|
34085
34952
|
/* harmony export */ MptModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.MptModel),
|
|
34086
34953
|
/* harmony export */ MptPreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.MptPreTrainedModel),
|
|
@@ -34101,6 +34968,9 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
34101
34968
|
/* harmony export */ OPTModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.OPTModel),
|
|
34102
34969
|
/* harmony export */ OPTPreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.OPTPreTrainedModel),
|
|
34103
34970
|
/* harmony export */ ObjectDetectionPipeline: () => (/* reexport safe */ _pipelines_js__WEBPACK_IMPORTED_MODULE_1__.ObjectDetectionPipeline),
|
|
34971
|
+
/* harmony export */ Olmo2ForCausalLM: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.Olmo2ForCausalLM),
|
|
34972
|
+
/* harmony export */ Olmo2Model: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.Olmo2Model),
|
|
34973
|
+
/* harmony export */ Olmo2PreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.Olmo2PreTrainedModel),
|
|
34104
34974
|
/* harmony export */ OlmoForCausalLM: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.OlmoForCausalLM),
|
|
34105
34975
|
/* harmony export */ OlmoModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.OlmoModel),
|
|
34106
34976
|
/* harmony export */ OlmoPreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.OlmoPreTrainedModel),
|
|
@@ -34117,6 +34987,9 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
34117
34987
|
/* harmony export */ Owlv2ImageProcessor: () => (/* reexport safe */ _models_image_processors_js__WEBPACK_IMPORTED_MODULE_13__.Owlv2ImageProcessor),
|
|
34118
34988
|
/* harmony export */ Owlv2Model: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.Owlv2Model),
|
|
34119
34989
|
/* harmony export */ Owlv2PreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.Owlv2PreTrainedModel),
|
|
34990
|
+
/* harmony export */ PaliGemmaForConditionalGeneration: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.PaliGemmaForConditionalGeneration),
|
|
34991
|
+
/* harmony export */ PaliGemmaPreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.PaliGemmaPreTrainedModel),
|
|
34992
|
+
/* harmony export */ PaliGemmaProcessor: () => (/* reexport safe */ _models_processors_js__WEBPACK_IMPORTED_MODULE_16__.PaliGemmaProcessor),
|
|
34120
34993
|
/* harmony export */ PatchTSMixerForPrediction: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.PatchTSMixerForPrediction),
|
|
34121
34994
|
/* harmony export */ PatchTSMixerModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.PatchTSMixerModel),
|
|
34122
34995
|
/* harmony export */ PatchTSMixerPreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.PatchTSMixerPreTrainedModel),
|
|
@@ -34126,6 +34999,10 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
34126
34999
|
/* harmony export */ Phi3ForCausalLM: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.Phi3ForCausalLM),
|
|
34127
35000
|
/* harmony export */ Phi3Model: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.Phi3Model),
|
|
34128
35001
|
/* harmony export */ Phi3PreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.Phi3PreTrainedModel),
|
|
35002
|
+
/* harmony export */ Phi3VForCausalLM: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.Phi3VForCausalLM),
|
|
35003
|
+
/* harmony export */ Phi3VImageProcessor: () => (/* reexport safe */ _models_image_processors_js__WEBPACK_IMPORTED_MODULE_13__.Phi3VImageProcessor),
|
|
35004
|
+
/* harmony export */ Phi3VPreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.Phi3VPreTrainedModel),
|
|
35005
|
+
/* harmony export */ Phi3VProcessor: () => (/* reexport safe */ _models_processors_js__WEBPACK_IMPORTED_MODULE_16__.Phi3VProcessor),
|
|
34129
35006
|
/* harmony export */ PhiForCausalLM: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.PhiForCausalLM),
|
|
34130
35007
|
/* harmony export */ PhiModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.PhiModel),
|
|
34131
35008
|
/* harmony export */ PhiPreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.PhiPreTrainedModel),
|
|
@@ -34374,9 +35251,11 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
34374
35251
|
/* harmony export */ permute_data: () => (/* reexport safe */ _utils_maths_js__WEBPACK_IMPORTED_MODULE_8__.permute_data),
|
|
34375
35252
|
/* harmony export */ pipeline: () => (/* reexport safe */ _pipelines_js__WEBPACK_IMPORTED_MODULE_1__.pipeline),
|
|
34376
35253
|
/* harmony export */ quantize_embeddings: () => (/* reexport safe */ _utils_tensor_js__WEBPACK_IMPORTED_MODULE_7__.quantize_embeddings),
|
|
35254
|
+
/* harmony export */ rand: () => (/* reexport safe */ _utils_tensor_js__WEBPACK_IMPORTED_MODULE_7__.rand),
|
|
34377
35255
|
/* harmony export */ read_audio: () => (/* reexport safe */ _utils_audio_js__WEBPACK_IMPORTED_MODULE_5__.read_audio),
|
|
34378
35256
|
/* harmony export */ rfft: () => (/* reexport safe */ _utils_tensor_js__WEBPACK_IMPORTED_MODULE_7__.rfft),
|
|
34379
35257
|
/* harmony export */ round: () => (/* reexport safe */ _utils_maths_js__WEBPACK_IMPORTED_MODULE_8__.round),
|
|
35258
|
+
/* harmony export */ slice: () => (/* reexport safe */ _utils_tensor_js__WEBPACK_IMPORTED_MODULE_7__.slice),
|
|
34380
35259
|
/* harmony export */ softmax: () => (/* reexport safe */ _utils_maths_js__WEBPACK_IMPORTED_MODULE_8__.softmax),
|
|
34381
35260
|
/* harmony export */ spectrogram: () => (/* reexport safe */ _utils_audio_js__WEBPACK_IMPORTED_MODULE_5__.spectrogram),
|
|
34382
35261
|
/* harmony export */ stack: () => (/* reexport safe */ _utils_tensor_js__WEBPACK_IMPORTED_MODULE_7__.stack),
|
|
@@ -34452,7 +35331,7 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
34452
35331
|
})();
|
|
34453
35332
|
|
|
34454
35333
|
var __webpack_export_target__ = exports;
|
|
34455
|
-
for(var
|
|
35334
|
+
for(var __webpack_i__ in __webpack_exports__) __webpack_export_target__[__webpack_i__] = __webpack_exports__[__webpack_i__];
|
|
34456
35335
|
if(__webpack_exports__.__esModule) Object.defineProperty(__webpack_export_target__, "__esModule", { value: true });
|
|
34457
35336
|
/******/ })()
|
|
34458
35337
|
;
|