@huggingface/transformers 3.1.0 → 3.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +7 -3
- package/dist/ort-wasm-simd-threaded.jsep.wasm +0 -0
- package/dist/transformers.cjs +965 -195
- package/dist/transformers.cjs.map +1 -1
- package/dist/transformers.js +2251 -1360
- package/dist/transformers.js.map +1 -1
- package/dist/transformers.min.cjs +1 -352
- package/dist/transformers.min.cjs.map +1 -1
- package/dist/transformers.min.js +1 -415
- package/dist/transformers.min.js.map +1 -1
- package/dist/transformers.min.mjs +1 -352
- package/dist/transformers.min.mjs.map +1 -1
- package/dist/transformers.mjs +979 -194
- package/dist/transformers.mjs.map +1 -1
- package/package.json +11 -16
- package/src/backends/onnx.js +2 -7
- package/src/configs.js +3 -1
- package/src/env.js +6 -6
- package/src/generation/configuration_utils.js +7 -0
- package/src/generation/logits_process.js +22 -16
- package/src/generation/streamers.js +7 -2
- package/src/models/idefics3/image_processing_idefics3.js +219 -0
- package/src/models/idefics3/processing_idefics3.js +136 -0
- package/src/models/image_processors.js +1 -0
- package/src/models/paligemma/processing_paligemma.js +82 -0
- package/src/models/processors.js +2 -0
- package/src/models.js +169 -39
- package/src/tokenizers.js +12 -1
- package/src/utils/core.js +53 -9
- package/src/utils/dtypes.js +2 -1
- package/src/utils/hub.js +8 -12
- package/src/utils/image.js +59 -16
- package/src/utils/tensor.js +6 -1
- package/types/backends/onnx.d.ts +2 -2
- package/types/backends/onnx.d.ts.map +1 -1
- package/types/base/feature_extraction_utils.d.ts +1 -1
- package/types/base/feature_extraction_utils.d.ts.map +1 -1
- package/types/base/image_processors_utils.d.ts +2 -2
- package/types/base/image_processors_utils.d.ts.map +1 -1
- package/types/base/processing_utils.d.ts +4 -4
- package/types/base/processing_utils.d.ts.map +1 -1
- package/types/configs.d.ts +7 -7
- package/types/configs.d.ts.map +1 -1
- package/types/env.d.ts +2 -2
- package/types/env.d.ts.map +1 -1
- package/types/generation/configuration_utils.d.ts +7 -1
- package/types/generation/configuration_utils.d.ts.map +1 -1
- package/types/generation/logits_process.d.ts +32 -22
- package/types/generation/logits_process.d.ts.map +1 -1
- package/types/generation/logits_sampler.d.ts.map +1 -1
- package/types/generation/parameters.d.ts +5 -5
- package/types/generation/stopping_criteria.d.ts +1 -1
- package/types/generation/stopping_criteria.d.ts.map +1 -1
- package/types/generation/streamers.d.ts +15 -10
- package/types/generation/streamers.d.ts.map +1 -1
- package/types/models/audio_spectrogram_transformer/feature_extraction_audio_spectrogram_transformer.d.ts +1 -1
- package/types/models/audio_spectrogram_transformer/feature_extraction_audio_spectrogram_transformer.d.ts.map +1 -1
- package/types/models/auto/feature_extraction_auto.d.ts.map +1 -1
- package/types/models/auto/image_processing_auto.d.ts.map +1 -1
- package/types/models/auto/processing_auto.d.ts +1 -1
- package/types/models/auto/processing_auto.d.ts.map +1 -1
- package/types/models/clap/feature_extraction_clap.d.ts +1 -1
- package/types/models/clap/feature_extraction_clap.d.ts.map +1 -1
- package/types/models/detr/image_processing_detr.d.ts +11 -11
- package/types/models/detr/image_processing_detr.d.ts.map +1 -1
- package/types/models/donut/image_processing_donut.d.ts +1 -1
- package/types/models/donut/image_processing_donut.d.ts.map +1 -1
- package/types/models/florence2/processing_florence2.d.ts.map +1 -1
- package/types/models/idefics3/image_processing_idefics3.d.ts +40 -0
- package/types/models/idefics3/image_processing_idefics3.d.ts.map +1 -0
- package/types/models/idefics3/processing_idefics3.d.ts +19 -0
- package/types/models/idefics3/processing_idefics3.d.ts.map +1 -0
- package/types/models/image_processors.d.ts +1 -0
- package/types/models/janus/image_processing_janus.d.ts +1 -1
- package/types/models/janus/image_processing_janus.d.ts.map +1 -1
- package/types/models/janus/processing_janus.d.ts.map +1 -1
- package/types/models/maskformer/image_processing_maskformer.d.ts +8 -8
- package/types/models/maskformer/image_processing_maskformer.d.ts.map +1 -1
- package/types/models/mgp_str/processing_mgp_str.d.ts +2 -2
- package/types/models/mgp_str/processing_mgp_str.d.ts.map +1 -1
- package/types/models/owlvit/image_processing_owlvit.d.ts.map +1 -1
- package/types/models/paligemma/processing_paligemma.d.ts +12 -0
- package/types/models/paligemma/processing_paligemma.d.ts.map +1 -0
- package/types/models/processors.d.ts +2 -0
- package/types/models/pyannote/feature_extraction_pyannote.d.ts.map +1 -1
- package/types/models/pyannote/processing_pyannote.d.ts +1 -1
- package/types/models/pyannote/processing_pyannote.d.ts.map +1 -1
- package/types/models/qwen2_vl/processing_qwen2_vl.d.ts.map +1 -1
- package/types/models/rt_detr/image_processing_rt_detr.d.ts.map +1 -1
- package/types/models/sam/image_processing_sam.d.ts.map +1 -1
- package/types/models/seamless_m4t/feature_extraction_seamless_m4t.d.ts +1 -1
- package/types/models/seamless_m4t/feature_extraction_seamless_m4t.d.ts.map +1 -1
- package/types/models/segformer/image_processing_segformer.d.ts.map +1 -1
- package/types/models/speecht5/processing_speecht5.d.ts.map +1 -1
- package/types/models/swin2sr/image_processing_swin2sr.d.ts +1 -1
- package/types/models/swin2sr/image_processing_swin2sr.d.ts.map +1 -1
- package/types/models/vitmatte/image_processing_vitmatte.d.ts.map +1 -1
- package/types/models/vitpose/image_processing_vitpose.d.ts +1 -1
- package/types/models/vitpose/image_processing_vitpose.d.ts.map +1 -1
- package/types/models/wav2vec2/feature_extraction_wav2vec2.d.ts.map +1 -1
- package/types/models/wav2vec2/processing_wav2vec2.d.ts.map +1 -1
- package/types/models/wespeaker/feature_extraction_wespeaker.d.ts +1 -1
- package/types/models/wespeaker/feature_extraction_wespeaker.d.ts.map +1 -1
- package/types/models/whisper/feature_extraction_whisper.d.ts +1 -1
- package/types/models/whisper/feature_extraction_whisper.d.ts.map +1 -1
- package/types/models/whisper/generation_whisper.d.ts.map +1 -1
- package/types/models/whisper/processing_whisper.d.ts.map +1 -1
- package/types/models/yolos/image_processing_yolos.d.ts.map +1 -1
- package/types/models.d.ts +44 -10
- package/types/models.d.ts.map +1 -1
- package/types/ops/registry.d.ts.map +1 -1
- package/types/pipelines.d.ts +26 -51
- package/types/pipelines.d.ts.map +1 -1
- package/types/tokenizers.d.ts +10 -6
- package/types/tokenizers.d.ts.map +1 -1
- package/types/utils/audio.d.ts.map +1 -1
- package/types/utils/constants.d.ts.map +1 -1
- package/types/utils/core.d.ts +94 -22
- package/types/utils/core.d.ts.map +1 -1
- package/types/utils/data-structures.d.ts.map +1 -1
- package/types/utils/devices.d.ts.map +1 -1
- package/types/utils/dtypes.d.ts +3 -2
- package/types/utils/dtypes.d.ts.map +1 -1
- package/types/utils/generic.d.ts.map +1 -1
- package/types/utils/hub.d.ts +3 -3
- package/types/utils/hub.d.ts.map +1 -1
- package/types/utils/image.d.ts +14 -1
- package/types/utils/image.d.ts.map +1 -1
- package/types/utils/maths.d.ts +10 -10
- package/types/utils/maths.d.ts.map +1 -1
- package/types/utils/tensor.d.ts +10 -8
- package/types/utils/tensor.d.ts.map +1 -1
package/dist/transformers.cjs
CHANGED
|
@@ -56,10 +56,10 @@ module.exports = require("url");
|
|
|
56
56
|
|
|
57
57
|
/***/ }),
|
|
58
58
|
|
|
59
|
-
/***/ "?
|
|
60
|
-
|
|
61
|
-
!***
|
|
62
|
-
|
|
59
|
+
/***/ "?8b6b":
|
|
60
|
+
/*!*********************************!*\
|
|
61
|
+
!*** onnxruntime-web (ignored) ***!
|
|
62
|
+
\*********************************/
|
|
63
63
|
/***/ (() => {
|
|
64
64
|
|
|
65
65
|
/* (ignored) */
|
|
@@ -3896,7 +3896,7 @@ const version = '1.20.1';
|
|
|
3896
3896
|
|
|
3897
3897
|
"use strict";
|
|
3898
3898
|
var onnxruntime_node__WEBPACK_IMPORTED_MODULE_1___namespace_cache;
|
|
3899
|
-
var
|
|
3899
|
+
var onnxruntime_web__WEBPACK_IMPORTED_MODULE_2___namespace_cache;
|
|
3900
3900
|
__webpack_require__.r(__webpack_exports__);
|
|
3901
3901
|
/* harmony export */ __webpack_require__.d(__webpack_exports__, {
|
|
3902
3902
|
/* harmony export */ Tensor: () => (/* reexport safe */ onnxruntime_common__WEBPACK_IMPORTED_MODULE_3__.Tensor),
|
|
@@ -3907,7 +3907,7 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
3907
3907
|
/* harmony export */ });
|
|
3908
3908
|
/* harmony import */ var _env_js__WEBPACK_IMPORTED_MODULE_0__ = __webpack_require__(/*! ../env.js */ "./src/env.js");
|
|
3909
3909
|
/* harmony import */ var onnxruntime_node__WEBPACK_IMPORTED_MODULE_1__ = __webpack_require__(/*! onnxruntime-node */ "onnxruntime-node");
|
|
3910
|
-
/* harmony import */ var
|
|
3910
|
+
/* harmony import */ var onnxruntime_web__WEBPACK_IMPORTED_MODULE_2__ = __webpack_require__(/*! onnxruntime-web */ "?8b6b");
|
|
3911
3911
|
/* harmony import */ var onnxruntime_common__WEBPACK_IMPORTED_MODULE_3__ = __webpack_require__(/*! onnxruntime-common */ "./node_modules/onnxruntime-common/dist/esm/index.js");
|
|
3912
3912
|
/**
|
|
3913
3913
|
* @file Handler file for choosing the correct version of ONNX Runtime, based on the environment.
|
|
@@ -3933,11 +3933,6 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
3933
3933
|
// In either case, we select the default export if it exists, otherwise we use the named export.
|
|
3934
3934
|
|
|
3935
3935
|
|
|
3936
|
-
// Use subpath-imports to ensure Node.js and browser interoperability.
|
|
3937
|
-
// See package.json and https://nodejs.org/api/packages.html#subpath-imports
|
|
3938
|
-
// for more information.
|
|
3939
|
-
// @ts-ignore
|
|
3940
|
-
|
|
3941
3936
|
|
|
3942
3937
|
|
|
3943
3938
|
|
|
@@ -3979,7 +3974,7 @@ if (ORT_SYMBOL in globalThis) {
|
|
|
3979
3974
|
} else if (_env_js__WEBPACK_IMPORTED_MODULE_0__.apis.IS_NODE_ENV) {
|
|
3980
3975
|
ONNX = onnxruntime_node__WEBPACK_IMPORTED_MODULE_1__ ?? /*#__PURE__*/ (onnxruntime_node__WEBPACK_IMPORTED_MODULE_1___namespace_cache || (onnxruntime_node__WEBPACK_IMPORTED_MODULE_1___namespace_cache = __webpack_require__.t(onnxruntime_node__WEBPACK_IMPORTED_MODULE_1__, 2)));
|
|
3981
3976
|
|
|
3982
|
-
// Updated as of ONNX Runtime 1.
|
|
3977
|
+
// Updated as of ONNX Runtime 1.20.1
|
|
3983
3978
|
// The following table lists the supported versions of ONNX Runtime Node.js binding provided with pre-built binaries.
|
|
3984
3979
|
// | EPs/Platforms | Windows x64 | Windows arm64 | Linux x64 | Linux arm64 | MacOS x64 | MacOS arm64 |
|
|
3985
3980
|
// | ------------- | ----------- | ------------- | ----------------- | ----------- | --------- | ----------- |
|
|
@@ -4002,7 +3997,7 @@ if (ORT_SYMBOL in globalThis) {
|
|
|
4002
3997
|
supportedDevices.push('cpu');
|
|
4003
3998
|
defaultDevices = ['cpu'];
|
|
4004
3999
|
} else {
|
|
4005
|
-
ONNX = /*#__PURE__*/ (
|
|
4000
|
+
ONNX = /*#__PURE__*/ (onnxruntime_web__WEBPACK_IMPORTED_MODULE_2___namespace_cache || (onnxruntime_web__WEBPACK_IMPORTED_MODULE_2___namespace_cache = __webpack_require__.t(onnxruntime_web__WEBPACK_IMPORTED_MODULE_2__, 2)));
|
|
4006
4001
|
|
|
4007
4002
|
if (_env_js__WEBPACK_IMPORTED_MODULE_0__.apis.IS_WEBNN_AVAILABLE) {
|
|
4008
4003
|
// TODO: Only push supported providers (depending on available hardware)
|
|
@@ -5562,6 +5557,7 @@ function getNormalizedConfig(config) {
|
|
|
5562
5557
|
case 'paligemma':
|
|
5563
5558
|
case 'florence2':
|
|
5564
5559
|
case 'llava_onevision':
|
|
5560
|
+
case 'idefics3':
|
|
5565
5561
|
init_normalized_config = getNormalizedConfig(config.text_config);
|
|
5566
5562
|
break;
|
|
5567
5563
|
case 'moondream1':
|
|
@@ -5596,6 +5592,7 @@ function getNormalizedConfig(config) {
|
|
|
5596
5592
|
break;
|
|
5597
5593
|
case 'llama':
|
|
5598
5594
|
case 'olmo':
|
|
5595
|
+
case 'olmo2':
|
|
5599
5596
|
case 'mobilellm':
|
|
5600
5597
|
case 'granite':
|
|
5601
5598
|
case 'cohere':
|
|
@@ -5875,7 +5872,7 @@ class AutoConfig {
|
|
|
5875
5872
|
* See https://onnxruntime.ai/docs/tutorials/web/env-flags-and-session-options.html#freedimensionoverrides
|
|
5876
5873
|
* for more information.
|
|
5877
5874
|
* @property {import('./utils/devices.js').DeviceType} [device] The default device to use for the model.
|
|
5878
|
-
* @property {import('./utils/dtypes.js').DataType} [dtype] The default data type to use for the model.
|
|
5875
|
+
* @property {import('./utils/dtypes.js').DataType|Record<string, import('./utils/dtypes.js').DataType>} [dtype] The default data type to use for the model.
|
|
5879
5876
|
* @property {boolean|Record<string, boolean>} [use_external_data_format=false] Whether to load the model using the external data format (used for models >= 2GB in size).
|
|
5880
5877
|
*/
|
|
5881
5878
|
|
|
@@ -5925,12 +5922,12 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
5925
5922
|
|
|
5926
5923
|
|
|
5927
5924
|
|
|
5928
|
-
const VERSION = '3.1.
|
|
5925
|
+
const VERSION = '3.1.2';
|
|
5929
5926
|
|
|
5930
5927
|
// Check if various APIs are available (depends on environment)
|
|
5931
|
-
const IS_BROWSER_ENV = typeof
|
|
5932
|
-
const IS_WEBWORKER_ENV =
|
|
5933
|
-
const IS_WEB_CACHE_AVAILABLE =
|
|
5928
|
+
const IS_BROWSER_ENV = typeof window !== "undefined" && typeof window.document !== "undefined";
|
|
5929
|
+
const IS_WEBWORKER_ENV = typeof self !== "undefined" && self.constructor?.name === 'DedicatedWorkerGlobalScope';
|
|
5930
|
+
const IS_WEB_CACHE_AVAILABLE = typeof self !== "undefined" && 'caches' in self;
|
|
5934
5931
|
const IS_WEBGPU_AVAILABLE = typeof navigator !== 'undefined' && 'gpu' in navigator;
|
|
5935
5932
|
const IS_WEBNN_AVAILABLE = typeof navigator !== 'undefined' && 'ml' in navigator;
|
|
5936
5933
|
|
|
@@ -5943,7 +5940,7 @@ const IS_PATH_AVAILABLE = !isEmpty(path__WEBPACK_IMPORTED_MODULE_1__);
|
|
|
5943
5940
|
* A read-only object containing information about the APIs available in the current environment.
|
|
5944
5941
|
*/
|
|
5945
5942
|
const apis = Object.freeze({
|
|
5946
|
-
/** Whether we are running in a browser environment */
|
|
5943
|
+
/** Whether we are running in a browser environment (and not a web worker) */
|
|
5947
5944
|
IS_BROWSER_ENV,
|
|
5948
5945
|
|
|
5949
5946
|
/** Whether we are running in a web worker environment */
|
|
@@ -6036,7 +6033,7 @@ const env = {
|
|
|
6036
6033
|
remoteHost: 'https://huggingface.co/',
|
|
6037
6034
|
remotePathTemplate: '{model}/resolve/{revision}/',
|
|
6038
6035
|
|
|
6039
|
-
allowLocalModels: !IS_BROWSER_ENV,
|
|
6036
|
+
allowLocalModels: !(IS_BROWSER_ENV || IS_WEBWORKER_ENV),
|
|
6040
6037
|
localModelPath: localModelPath,
|
|
6041
6038
|
useFS: IS_FS_AVAILABLE,
|
|
6042
6039
|
|
|
@@ -6337,6 +6334,13 @@ class GenerationConfig {
|
|
|
6337
6334
|
*/
|
|
6338
6335
|
suppress_tokens = null;
|
|
6339
6336
|
|
|
6337
|
+
/**
|
|
6338
|
+
* A streamer that will be used to stream the generation.
|
|
6339
|
+
* @type {import('./streamers.js').TextStreamer}
|
|
6340
|
+
* @default null
|
|
6341
|
+
*/
|
|
6342
|
+
streamer = null;
|
|
6343
|
+
|
|
6340
6344
|
/**
|
|
6341
6345
|
* A list of tokens that will be suppressed at the beginning of the generation.
|
|
6342
6346
|
* The `SuppressBeginTokens` logit processor will set their log probs to `-inf` so that they are not sampled.
|
|
@@ -6643,7 +6647,7 @@ class ForcedBOSTokenLogitsProcessor extends LogitsProcessor {
|
|
|
6643
6647
|
* Apply the BOS token forcing to the logits.
|
|
6644
6648
|
* @param {bigint[][]} input_ids The input IDs.
|
|
6645
6649
|
* @param {Tensor} logits The logits.
|
|
6646
|
-
* @returns {
|
|
6650
|
+
* @returns {Tensor} The logits with BOS token forcing.
|
|
6647
6651
|
*/
|
|
6648
6652
|
_call(input_ids, logits) {
|
|
6649
6653
|
for (let i = 0; i < input_ids.length; ++i) {
|
|
@@ -6713,7 +6717,7 @@ class SuppressTokensAtBeginLogitsProcessor extends LogitsProcessor {
|
|
|
6713
6717
|
* Apply the BOS token forcing to the logits.
|
|
6714
6718
|
* @param {bigint[][]} input_ids The input IDs.
|
|
6715
6719
|
* @param {Tensor} logits The logits.
|
|
6716
|
-
* @returns {
|
|
6720
|
+
* @returns {Tensor} The logits with BOS token forcing.
|
|
6717
6721
|
*/
|
|
6718
6722
|
_call(input_ids, logits) {
|
|
6719
6723
|
for (let i = 0; i < input_ids.length; ++i) {
|
|
@@ -6883,7 +6887,7 @@ class NoRepeatNGramLogitsProcessor extends LogitsProcessor {
|
|
|
6883
6887
|
* Apply the no-repeat-ngram processor to the logits.
|
|
6884
6888
|
* @param {bigint[][]} input_ids The input IDs.
|
|
6885
6889
|
* @param {Tensor} logits The logits.
|
|
6886
|
-
* @returns {
|
|
6890
|
+
* @returns {Tensor} The logits with no-repeat-ngram processing.
|
|
6887
6891
|
*/
|
|
6888
6892
|
_call(input_ids, logits) {
|
|
6889
6893
|
for (let i = 0; i < input_ids.length; ++i) {
|
|
@@ -6898,12 +6902,22 @@ class NoRepeatNGramLogitsProcessor extends LogitsProcessor {
|
|
|
6898
6902
|
}
|
|
6899
6903
|
|
|
6900
6904
|
/**
|
|
6901
|
-
* A logits processor that
|
|
6905
|
+
* A logits processor that prevents the repetition of previous tokens through a penalty.
|
|
6906
|
+
* This penalty is applied at most once per token. Note that, for decoder-only models like most LLMs,
|
|
6907
|
+
* the considered tokens include the prompt.
|
|
6908
|
+
*
|
|
6909
|
+
* In the original [paper](https://arxiv.org/pdf/1909.05858.pdf), the authors suggest the use of a
|
|
6910
|
+
* penalty of around 1.2 to achieve a good balance between truthful generation and lack of repetition.
|
|
6911
|
+
* To penalize and reduce repetition, use `penalty` values above 1.0, where a higher value penalizes
|
|
6912
|
+
* more strongly. To reward and encourage repetition, use `penalty` values between 0.0 and 1.0, where
|
|
6913
|
+
* a lower value rewards more strongly.
|
|
6902
6914
|
*/
|
|
6903
6915
|
class RepetitionPenaltyLogitsProcessor extends LogitsProcessor {
|
|
6904
6916
|
/**
|
|
6905
6917
|
* Create a RepetitionPenaltyLogitsProcessor.
|
|
6906
|
-
* @param {number} penalty The
|
|
6918
|
+
* @param {number} penalty The parameter for repetition penalty.
|
|
6919
|
+
* - 1.0 means no penalty. Above 1.0 penalizes previously generated tokens.
|
|
6920
|
+
* - Between 0.0 and 1.0 rewards previously generated tokens.
|
|
6907
6921
|
*/
|
|
6908
6922
|
constructor(penalty) {
|
|
6909
6923
|
super();
|
|
@@ -6914,16 +6928,12 @@ class RepetitionPenaltyLogitsProcessor extends LogitsProcessor {
|
|
|
6914
6928
|
* Apply the repetition penalty to the logits.
|
|
6915
6929
|
* @param {bigint[][]} input_ids The input IDs.
|
|
6916
6930
|
* @param {Tensor} logits The logits.
|
|
6917
|
-
* @returns {
|
|
6931
|
+
* @returns {Tensor} The logits with repetition penalty processing.
|
|
6918
6932
|
*/
|
|
6919
6933
|
_call(input_ids, logits) {
|
|
6920
|
-
// Modify the logits corresponding to each element in `input_ids`.
|
|
6921
|
-
// As a consequence, the logits corresponding to tokens that appear
|
|
6922
|
-
// many times in the output will be penalised more.
|
|
6923
|
-
|
|
6924
6934
|
for (let i = 0; i < input_ids.length; ++i) {
|
|
6925
6935
|
const batch_logits_data = /** @type {Float32Array} */(logits[i].data);
|
|
6926
|
-
for (const input_id of input_ids[i]) {
|
|
6936
|
+
for (const input_id of new Set(input_ids[i])) {
|
|
6927
6937
|
const token = Number(input_id);
|
|
6928
6938
|
if (batch_logits_data[token] < 0) {
|
|
6929
6939
|
batch_logits_data[token] *= this.penalty;
|
|
@@ -6956,7 +6966,7 @@ class MinLengthLogitsProcessor extends LogitsProcessor {
|
|
|
6956
6966
|
* Apply logit processor.
|
|
6957
6967
|
* @param {bigint[][]} input_ids The input IDs.
|
|
6958
6968
|
* @param {Tensor} logits The logits.
|
|
6959
|
-
* @returns {
|
|
6969
|
+
* @returns {Tensor} The processed logits.
|
|
6960
6970
|
*/
|
|
6961
6971
|
_call(input_ids, logits) {
|
|
6962
6972
|
for (let i = 0; i < input_ids.length; ++i) {
|
|
@@ -6994,7 +7004,7 @@ class MinNewTokensLengthLogitsProcessor extends LogitsProcessor {
|
|
|
6994
7004
|
* Apply logit processor.
|
|
6995
7005
|
* @param {bigint[][]} input_ids The input IDs.
|
|
6996
7006
|
* @param {Tensor} logits The logits.
|
|
6997
|
-
* @returns {
|
|
7007
|
+
* @returns {Tensor} The processed logits.
|
|
6998
7008
|
*/
|
|
6999
7009
|
_call(input_ids, logits) {
|
|
7000
7010
|
for (let i = 0; i < input_ids.length; ++i) {
|
|
@@ -7027,7 +7037,7 @@ class NoBadWordsLogitsProcessor extends LogitsProcessor {
|
|
|
7027
7037
|
* Apply logit processor.
|
|
7028
7038
|
* @param {bigint[][]} input_ids The input IDs.
|
|
7029
7039
|
* @param {Tensor} logits The logits.
|
|
7030
|
-
* @returns {
|
|
7040
|
+
* @returns {Tensor} The processed logits.
|
|
7031
7041
|
*/
|
|
7032
7042
|
_call(input_ids, logits) {
|
|
7033
7043
|
for (let i = 0; i < input_ids.length; ++i) {
|
|
@@ -7088,7 +7098,7 @@ class ClassifierFreeGuidanceLogitsProcessor extends LogitsProcessor {
|
|
|
7088
7098
|
* Apply logit processor.
|
|
7089
7099
|
* @param {bigint[][]} input_ids The input IDs.
|
|
7090
7100
|
* @param {Tensor} logits The logits.
|
|
7091
|
-
* @returns {
|
|
7101
|
+
* @returns {Tensor} The processed logits.
|
|
7092
7102
|
*/
|
|
7093
7103
|
_call(input_ids, logits) {
|
|
7094
7104
|
if (logits.dims[0] !== 2 * input_ids.length) {
|
|
@@ -7142,7 +7152,7 @@ class TemperatureLogitsWarper extends LogitsWarper {
|
|
|
7142
7152
|
* Apply logit warper.
|
|
7143
7153
|
* @param {bigint[][]} input_ids The input IDs.
|
|
7144
7154
|
* @param {Tensor} logits The logits.
|
|
7145
|
-
* @returns {
|
|
7155
|
+
* @returns {Tensor} The processed logits.
|
|
7146
7156
|
*/
|
|
7147
7157
|
_call(input_ids, logits) {
|
|
7148
7158
|
const batch_logits_data = /** @type {Float32Array} */(logits.data);
|
|
@@ -7660,7 +7670,12 @@ const stdout_write = _env_js__WEBPACK_IMPORTED_MODULE_2__.apis.IS_PROCESS_AVAILA
|
|
|
7660
7670
|
class TextStreamer extends BaseStreamer {
|
|
7661
7671
|
/**
|
|
7662
7672
|
*
|
|
7663
|
-
* @param {import('../tokenizers.js').PreTrainedTokenizer} tokenizer
|
|
7673
|
+
* @param {import('../tokenizers.js').PreTrainedTokenizer} tokenizer
|
|
7674
|
+
* @param {Object} options
|
|
7675
|
+
* @param {boolean} [options.skip_prompt=false] Whether to skip the prompt tokens
|
|
7676
|
+
* @param {function(string): void} [options.callback_function=null] Function to call when a piece of text is ready to display
|
|
7677
|
+
* @param {function(bigint[]): void} [options.token_callback_function=null] Function to call when a new token is generated
|
|
7678
|
+
* @param {Object} [options.decode_kwargs={}] Additional keyword arguments to pass to the tokenizer's decode method
|
|
7664
7679
|
*/
|
|
7665
7680
|
constructor(tokenizer, {
|
|
7666
7681
|
skip_prompt = false,
|
|
@@ -7769,7 +7784,7 @@ class WhisperTextStreamer extends TextStreamer {
|
|
|
7769
7784
|
* @param {Object} options
|
|
7770
7785
|
* @param {boolean} [options.skip_prompt=false] Whether to skip the prompt tokens
|
|
7771
7786
|
* @param {function(string): void} [options.callback_function=null] Function to call when a piece of text is ready to display
|
|
7772
|
-
* @param {function(
|
|
7787
|
+
* @param {function(bigint[]): void} [options.token_callback_function=null] Function to call when a new token is generated
|
|
7773
7788
|
* @param {function(number): void} [options.on_chunk_start=null] Function to call when a new chunk starts
|
|
7774
7789
|
* @param {function(number): void} [options.on_chunk_end=null] Function to call when a chunk ends
|
|
7775
7790
|
* @param {function(): void} [options.on_finalize=null] Function to call when the stream is finalized
|
|
@@ -8049,6 +8064,11 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
8049
8064
|
/* harmony export */ HubertForSequenceClassification: () => (/* binding */ HubertForSequenceClassification),
|
|
8050
8065
|
/* harmony export */ HubertModel: () => (/* binding */ HubertModel),
|
|
8051
8066
|
/* harmony export */ HubertPreTrainedModel: () => (/* binding */ HubertPreTrainedModel),
|
|
8067
|
+
/* harmony export */ IJepaForImageClassification: () => (/* binding */ IJepaForImageClassification),
|
|
8068
|
+
/* harmony export */ IJepaModel: () => (/* binding */ IJepaModel),
|
|
8069
|
+
/* harmony export */ IJepaPreTrainedModel: () => (/* binding */ IJepaPreTrainedModel),
|
|
8070
|
+
/* harmony export */ Idefics3ForConditionalGeneration: () => (/* binding */ Idefics3ForConditionalGeneration),
|
|
8071
|
+
/* harmony export */ Idefics3PreTrainedModel: () => (/* binding */ Idefics3PreTrainedModel),
|
|
8052
8072
|
/* harmony export */ ImageMattingOutput: () => (/* binding */ ImageMattingOutput),
|
|
8053
8073
|
/* harmony export */ JAISLMHeadModel: () => (/* binding */ JAISLMHeadModel),
|
|
8054
8074
|
/* harmony export */ JAISModel: () => (/* binding */ JAISModel),
|
|
@@ -8138,6 +8158,9 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
8138
8158
|
/* harmony export */ OPTForCausalLM: () => (/* binding */ OPTForCausalLM),
|
|
8139
8159
|
/* harmony export */ OPTModel: () => (/* binding */ OPTModel),
|
|
8140
8160
|
/* harmony export */ OPTPreTrainedModel: () => (/* binding */ OPTPreTrainedModel),
|
|
8161
|
+
/* harmony export */ Olmo2ForCausalLM: () => (/* binding */ Olmo2ForCausalLM),
|
|
8162
|
+
/* harmony export */ Olmo2Model: () => (/* binding */ Olmo2Model),
|
|
8163
|
+
/* harmony export */ Olmo2PreTrainedModel: () => (/* binding */ Olmo2PreTrainedModel),
|
|
8141
8164
|
/* harmony export */ OlmoForCausalLM: () => (/* binding */ OlmoForCausalLM),
|
|
8142
8165
|
/* harmony export */ OlmoModel: () => (/* binding */ OlmoModel),
|
|
8143
8166
|
/* harmony export */ OlmoPreTrainedModel: () => (/* binding */ OlmoPreTrainedModel),
|
|
@@ -8150,6 +8173,8 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
8150
8173
|
/* harmony export */ Owlv2ForObjectDetection: () => (/* binding */ Owlv2ForObjectDetection),
|
|
8151
8174
|
/* harmony export */ Owlv2Model: () => (/* binding */ Owlv2Model),
|
|
8152
8175
|
/* harmony export */ Owlv2PreTrainedModel: () => (/* binding */ Owlv2PreTrainedModel),
|
|
8176
|
+
/* harmony export */ PaliGemmaForConditionalGeneration: () => (/* binding */ PaliGemmaForConditionalGeneration),
|
|
8177
|
+
/* harmony export */ PaliGemmaPreTrainedModel: () => (/* binding */ PaliGemmaPreTrainedModel),
|
|
8153
8178
|
/* harmony export */ PatchTSMixerForPrediction: () => (/* binding */ PatchTSMixerForPrediction),
|
|
8154
8179
|
/* harmony export */ PatchTSMixerModel: () => (/* binding */ PatchTSMixerModel),
|
|
8155
8180
|
/* harmony export */ PatchTSMixerPreTrainedModel: () => (/* binding */ PatchTSMixerPreTrainedModel),
|
|
@@ -8455,6 +8480,22 @@ async function getSession(pretrained_model_name_or_path, fileName, options) {
|
|
|
8455
8480
|
}
|
|
8456
8481
|
}
|
|
8457
8482
|
|
|
8483
|
+
if (dtype === _utils_dtypes_js__WEBPACK_IMPORTED_MODULE_2__.DATA_TYPES.auto) {
|
|
8484
|
+
// Try to choose the auto dtype based on the custom config
|
|
8485
|
+
let config_dtype = custom_config.dtype;
|
|
8486
|
+
if (typeof config_dtype !== 'string') {
|
|
8487
|
+
config_dtype = config_dtype[fileName];
|
|
8488
|
+
}
|
|
8489
|
+
|
|
8490
|
+
if (config_dtype && config_dtype !== _utils_dtypes_js__WEBPACK_IMPORTED_MODULE_2__.DATA_TYPES.auto && _utils_dtypes_js__WEBPACK_IMPORTED_MODULE_2__.DATA_TYPES.hasOwnProperty(config_dtype)) {
|
|
8491
|
+
// Defined by the custom config, and is not "auto"
|
|
8492
|
+
dtype = config_dtype;
|
|
8493
|
+
} else {
|
|
8494
|
+
// Choose default dtype based on device, falling back to fp32
|
|
8495
|
+
dtype = _utils_dtypes_js__WEBPACK_IMPORTED_MODULE_2__.DEFAULT_DEVICE_DTYPE_MAPPING[selectedDevice] ?? _utils_dtypes_js__WEBPACK_IMPORTED_MODULE_2__.DATA_TYPES.fp32;
|
|
8496
|
+
}
|
|
8497
|
+
}
|
|
8498
|
+
|
|
8458
8499
|
const selectedDtype = /** @type {import("./utils/dtypes.js").DataType} */(dtype);
|
|
8459
8500
|
|
|
8460
8501
|
if (!_utils_dtypes_js__WEBPACK_IMPORTED_MODULE_2__.DEFAULT_DTYPE_SUFFIX_MAPPING.hasOwnProperty(selectedDtype)) {
|
|
@@ -8660,9 +8701,17 @@ async function sessionRun(session, inputs) {
|
|
|
8660
8701
|
output = replaceTensors(output);
|
|
8661
8702
|
return output;
|
|
8662
8703
|
} catch (e) {
|
|
8704
|
+
// Error messages can be long (nested) and uninformative. For this reason,
|
|
8705
|
+
// we apply minor formatting to show the most important information
|
|
8706
|
+
const formatted = Object.fromEntries(Object.entries(checkedInputs)
|
|
8707
|
+
.map(([k, { type, dims, data }]) => [k, {
|
|
8708
|
+
// Extract these properties from the underlying ORT tensor
|
|
8709
|
+
type, dims, data,
|
|
8710
|
+
}]));
|
|
8711
|
+
|
|
8663
8712
|
// This usually occurs when the inputs are of the wrong type.
|
|
8664
8713
|
console.error(`An error occurred during model execution: "${e}".`);
|
|
8665
|
-
console.error('Inputs given to model:',
|
|
8714
|
+
console.error('Inputs given to model:', formatted);
|
|
8666
8715
|
throw e;
|
|
8667
8716
|
}
|
|
8668
8717
|
}
|
|
@@ -8807,7 +8856,9 @@ async function decoderForward(self, model_inputs, is_encoder_decoder = false) {
|
|
|
8807
8856
|
new_model_inputs.use_cache_branch = boolTensor(!!past_key_values);
|
|
8808
8857
|
}
|
|
8809
8858
|
if (session.inputNames.includes('position_ids') && new_model_inputs.attention_mask && !new_model_inputs.position_ids) {
|
|
8810
|
-
|
|
8859
|
+
// NOTE: Handle a special case for paligemma models, where positions are 1-indexed
|
|
8860
|
+
const start_index = self.config.model_type === 'paligemma' ? 1 : 0;
|
|
8861
|
+
new_model_inputs.position_ids = createPositionIds(new_model_inputs, past_key_values, start_index);
|
|
8811
8862
|
}
|
|
8812
8863
|
|
|
8813
8864
|
// Unpack the `past_key_values` object into model inputs
|
|
@@ -8819,6 +8870,39 @@ async function decoderForward(self, model_inputs, is_encoder_decoder = false) {
|
|
|
8819
8870
|
}
|
|
8820
8871
|
|
|
8821
8872
|
|
|
8873
|
+
|
|
8874
|
+
function default_merge_input_ids_with_image_features({
|
|
8875
|
+
image_token_id,
|
|
8876
|
+
inputs_embeds,
|
|
8877
|
+
image_features,
|
|
8878
|
+
input_ids,
|
|
8879
|
+
attention_mask,
|
|
8880
|
+
}) {
|
|
8881
|
+
const image_tokens = input_ids.tolist().map(ids =>
|
|
8882
|
+
ids.reduce((acc, x, idx) => {
|
|
8883
|
+
if (x == image_token_id) acc.push(idx);
|
|
8884
|
+
return acc;
|
|
8885
|
+
}, [])
|
|
8886
|
+
);
|
|
8887
|
+
const n_image_tokens = image_tokens.reduce((acc, x) => acc + x.length, 0);
|
|
8888
|
+
const n_image_features = image_features.dims[0];
|
|
8889
|
+
if (n_image_tokens !== n_image_features) {
|
|
8890
|
+
throw new Error(`Image features and image tokens do not match: tokens: ${n_image_tokens}, features ${n_image_features}`);
|
|
8891
|
+
}
|
|
8892
|
+
|
|
8893
|
+
// Equivalent to performing a masked_scatter
|
|
8894
|
+
let img = 0;
|
|
8895
|
+
for (let i = 0; i < image_tokens.length; ++i) {
|
|
8896
|
+
const tokens = image_tokens[i];
|
|
8897
|
+
const embeds = inputs_embeds[i];
|
|
8898
|
+
for (let j = 0; j < tokens.length; ++j) {
|
|
8899
|
+
embeds[tokens[j]].data.set(image_features[img++].data)
|
|
8900
|
+
}
|
|
8901
|
+
}
|
|
8902
|
+
return { inputs_embeds, attention_mask }
|
|
8903
|
+
}
|
|
8904
|
+
|
|
8905
|
+
|
|
8822
8906
|
/**
|
|
8823
8907
|
* Forward pass of an image-text-to-text model.
|
|
8824
8908
|
* @param {Object} self The image-text-to-text model model.
|
|
@@ -8910,14 +8994,14 @@ async function imageTextToTextForward(self, {
|
|
|
8910
8994
|
* @param {Tensor} attention_mask
|
|
8911
8995
|
* @returns {{data: BigInt64Array, dims: number[]}}
|
|
8912
8996
|
*/
|
|
8913
|
-
function cumsum_masked_fill(attention_mask) {
|
|
8997
|
+
function cumsum_masked_fill(attention_mask, start_index = 0) {
|
|
8914
8998
|
const [bz, seq_len] = attention_mask.dims;
|
|
8915
8999
|
const attn_mask_data = attention_mask.data;
|
|
8916
9000
|
|
|
8917
9001
|
const data = new BigInt64Array(attn_mask_data.length);
|
|
8918
9002
|
for (let i = 0; i < bz; ++i) {
|
|
8919
9003
|
const start = i * seq_len;
|
|
8920
|
-
let sum = BigInt(
|
|
9004
|
+
let sum = BigInt(start_index);
|
|
8921
9005
|
for (let j = 0; j < seq_len; ++j) {
|
|
8922
9006
|
const index = start + j;
|
|
8923
9007
|
if (attn_mask_data[index] === 0n) {
|
|
@@ -8944,10 +9028,10 @@ function cumsum_masked_fill(attention_mask) {
|
|
|
8944
9028
|
* position_ids = position_ids[:, -input_ids.shape[1] :]
|
|
8945
9029
|
* ```
|
|
8946
9030
|
*/
|
|
8947
|
-
function createPositionIds(model_inputs, past_key_values = null) {
|
|
9031
|
+
function createPositionIds(model_inputs, past_key_values = null, start_index = 0) {
|
|
8948
9032
|
const { input_ids, inputs_embeds, attention_mask } = model_inputs;
|
|
8949
9033
|
|
|
8950
|
-
const { data, dims } = cumsum_masked_fill(attention_mask);
|
|
9034
|
+
const { data, dims } = cumsum_masked_fill(attention_mask, start_index);
|
|
8951
9035
|
let position_ids = new _utils_tensor_js__WEBPACK_IMPORTED_MODULE_9__.Tensor('int64', data, dims);
|
|
8952
9036
|
if (past_key_values) {
|
|
8953
9037
|
const offset = -(input_ids ?? inputs_embeds).dims.at(1);
|
|
@@ -9286,7 +9370,10 @@ class PreTrainedModel extends _utils_generic_js__WEBPACK_IMPORTED_MODULE_3__.Cal
|
|
|
9286
9370
|
|
|
9287
9371
|
} else { // should be MODEL_TYPES.EncoderOnly
|
|
9288
9372
|
if (modelType !== MODEL_TYPES.EncoderOnly) {
|
|
9289
|
-
|
|
9373
|
+
const type = modelName ?? config?.model_type;
|
|
9374
|
+
if (type !== 'custom') {
|
|
9375
|
+
console.warn(`Model type for '${type}' not found, assuming encoder-only architecture. Please report this at ${_utils_constants_js__WEBPACK_IMPORTED_MODULE_6__.GITHUB_ISSUE_URL}.`)
|
|
9376
|
+
}
|
|
9290
9377
|
}
|
|
9291
9378
|
info = await Promise.all([
|
|
9292
9379
|
constructSessions(pretrained_model_name_or_path, {
|
|
@@ -10030,7 +10117,7 @@ class PreTrainedModel extends _utils_generic_js__WEBPACK_IMPORTED_MODULE_3__.Cal
|
|
|
10030
10117
|
const dtype = session?.config?.kv_cache_dtype ?? 'float32';
|
|
10031
10118
|
const empty = (dtype === 'float16') ? new Uint16Array() : [];
|
|
10032
10119
|
|
|
10033
|
-
const batch_size = (decoderFeeds[this.main_input_name] ?? decoderFeeds.attention_mask)
|
|
10120
|
+
const batch_size = (decoderFeeds[this.main_input_name] ?? decoderFeeds.attention_mask)?.dims?.[0] ?? 1;
|
|
10034
10121
|
const shapes = (0,_configs_js__WEBPACK_IMPORTED_MODULE_0__.getKeyValueShapes)(this.config, { batch_size });
|
|
10035
10122
|
|
|
10036
10123
|
for (const name in shapes) {
|
|
@@ -11577,8 +11664,8 @@ class VisionEncoderDecoderModel extends PreTrainedModel {
|
|
|
11577
11664
|
class LlavaPreTrainedModel extends PreTrainedModel {
|
|
11578
11665
|
forward_params = [
|
|
11579
11666
|
'input_ids',
|
|
11580
|
-
'pixel_values',
|
|
11581
11667
|
'attention_mask',
|
|
11668
|
+
'pixel_values',
|
|
11582
11669
|
'position_ids',
|
|
11583
11670
|
'past_key_values',
|
|
11584
11671
|
];
|
|
@@ -11760,6 +11847,70 @@ class Florence2ForConditionalGeneration extends Florence2PreTrainedModel {
|
|
|
11760
11847
|
return decoder_outputs;
|
|
11761
11848
|
}
|
|
11762
11849
|
}
|
|
11850
|
+
|
|
11851
|
+
class PaliGemmaPreTrainedModel extends PreTrainedModel {
|
|
11852
|
+
forward_params = [
|
|
11853
|
+
'input_ids',
|
|
11854
|
+
// 'inputs_embeds',
|
|
11855
|
+
'attention_mask',
|
|
11856
|
+
'pixel_values',
|
|
11857
|
+
'position_ids',
|
|
11858
|
+
'past_key_values',
|
|
11859
|
+
];
|
|
11860
|
+
}
|
|
11861
|
+
|
|
11862
|
+
class PaliGemmaForConditionalGeneration extends PaliGemmaPreTrainedModel {
|
|
11863
|
+
_merge_input_ids_with_image_features(kwargs) {
|
|
11864
|
+
const vision_hidden_size = kwargs.image_features.dims.at(-1);
|
|
11865
|
+
const reshaped_image_hidden_states = kwargs.image_features.view(-1, vision_hidden_size);
|
|
11866
|
+
|
|
11867
|
+
return default_merge_input_ids_with_image_features({
|
|
11868
|
+
// @ts-ignore
|
|
11869
|
+
image_token_id: this.config.image_token_index,
|
|
11870
|
+
...kwargs,
|
|
11871
|
+
image_features: reshaped_image_hidden_states,
|
|
11872
|
+
})
|
|
11873
|
+
}
|
|
11874
|
+
}
|
|
11875
|
+
|
|
11876
|
+
//////////////////////////////////////////////////
|
|
11877
|
+
// Idefics3 Models
|
|
11878
|
+
class Idefics3PreTrainedModel extends PreTrainedModel {
|
|
11879
|
+
forward_params = [
|
|
11880
|
+
'input_ids',
|
|
11881
|
+
'attention_mask',
|
|
11882
|
+
'pixel_values',
|
|
11883
|
+
'pixel_attention_mask',
|
|
11884
|
+
'position_ids',
|
|
11885
|
+
'past_key_values',
|
|
11886
|
+
];
|
|
11887
|
+
}
|
|
11888
|
+
|
|
11889
|
+
/**
|
|
11890
|
+
* The LLAVA model which consists of a vision backbone and a language model.
|
|
11891
|
+
*/
|
|
11892
|
+
class Idefics3ForConditionalGeneration extends Idefics3PreTrainedModel {
|
|
11893
|
+
|
|
11894
|
+
async encode_image({ pixel_values, pixel_attention_mask }) {
|
|
11895
|
+
const features = (await sessionRun(this.sessions['vision_encoder'], { pixel_values, pixel_attention_mask })).image_features;
|
|
11896
|
+
return features;
|
|
11897
|
+
}
|
|
11898
|
+
|
|
11899
|
+
_merge_input_ids_with_image_features(kwargs) {
|
|
11900
|
+
const vision_hidden_size = kwargs.image_features.dims.at(-1);
|
|
11901
|
+
const reshaped_image_hidden_states = kwargs.image_features.view(-1, vision_hidden_size);
|
|
11902
|
+
|
|
11903
|
+
return default_merge_input_ids_with_image_features({
|
|
11904
|
+
// @ts-ignore
|
|
11905
|
+
image_token_id: this.config.image_token_id,
|
|
11906
|
+
...kwargs,
|
|
11907
|
+
image_features: reshaped_image_hidden_states,
|
|
11908
|
+
})
|
|
11909
|
+
}
|
|
11910
|
+
}
|
|
11911
|
+
//////////////////////////////////////////////////
|
|
11912
|
+
|
|
11913
|
+
//////////////////////////////////////////////////
|
|
11763
11914
|
class CLIPPreTrainedModel extends PreTrainedModel { }
|
|
11764
11915
|
|
|
11765
11916
|
/**
|
|
@@ -12259,6 +12410,13 @@ class OlmoModel extends OlmoPreTrainedModel { }
|
|
|
12259
12410
|
class OlmoForCausalLM extends OlmoPreTrainedModel { }
|
|
12260
12411
|
//////////////////////////////////////////////////
|
|
12261
12412
|
|
|
12413
|
+
//////////////////////////////////////////////////
|
|
12414
|
+
// OLMo2 models
|
|
12415
|
+
class Olmo2PreTrainedModel extends PreTrainedModel { }
|
|
12416
|
+
class Olmo2Model extends Olmo2PreTrainedModel { }
|
|
12417
|
+
class Olmo2ForCausalLM extends Olmo2PreTrainedModel { }
|
|
12418
|
+
//////////////////////////////////////////////////
|
|
12419
|
+
|
|
12262
12420
|
|
|
12263
12421
|
//////////////////////////////////////////////////
|
|
12264
12422
|
// Granite models
|
|
@@ -12553,36 +12711,12 @@ class Qwen2VLForConditionalGeneration extends Qwen2VLPreTrainedModel {
|
|
|
12553
12711
|
return features;
|
|
12554
12712
|
}
|
|
12555
12713
|
|
|
12556
|
-
_merge_input_ids_with_image_features({
|
|
12557
|
-
|
|
12558
|
-
|
|
12559
|
-
|
|
12560
|
-
|
|
12561
|
-
|
|
12562
|
-
// @ts-ignore
|
|
12563
|
-
const { image_token_id } = this.config;
|
|
12564
|
-
const image_tokens = input_ids.tolist().map(ids =>
|
|
12565
|
-
ids.reduce((acc, x, idx) => {
|
|
12566
|
-
if (x == image_token_id) acc.push(idx);
|
|
12567
|
-
return acc;
|
|
12568
|
-
}, [])
|
|
12569
|
-
);
|
|
12570
|
-
const n_image_tokens = image_tokens.reduce((acc, x) => acc + x.length, 0);
|
|
12571
|
-
const n_image_features = image_features.dims[0];
|
|
12572
|
-
if (n_image_tokens !== n_image_features) {
|
|
12573
|
-
throw new Error(`Image features and image tokens do not match: tokens: ${n_image_tokens}, features ${n_image_features}`);
|
|
12574
|
-
}
|
|
12575
|
-
|
|
12576
|
-
// Equivalent to performing a masked_scatter
|
|
12577
|
-
let img = 0;
|
|
12578
|
-
for (let i = 0; i < image_tokens.length; ++i) {
|
|
12579
|
-
const tokens = image_tokens[i];
|
|
12580
|
-
const embeds = inputs_embeds[i];
|
|
12581
|
-
for (let j = 0; j < tokens.length; ++j) {
|
|
12582
|
-
embeds[tokens[j]].data.set(image_features[img++].data)
|
|
12583
|
-
}
|
|
12584
|
-
}
|
|
12585
|
-
return { inputs_embeds, attention_mask }
|
|
12714
|
+
_merge_input_ids_with_image_features(kwargs) {
|
|
12715
|
+
return default_merge_input_ids_with_image_features({
|
|
12716
|
+
// @ts-ignore
|
|
12717
|
+
image_token_id: this.config.image_token_id,
|
|
12718
|
+
...kwargs
|
|
12719
|
+
})
|
|
12586
12720
|
}
|
|
12587
12721
|
|
|
12588
12722
|
prepare_inputs_for_generation(input_ids, model_inputs, generation_config) {
|
|
@@ -12699,6 +12833,20 @@ class ViTForImageClassification extends ViTPreTrainedModel {
|
|
|
12699
12833
|
//////////////////////////////////////////////////
|
|
12700
12834
|
|
|
12701
12835
|
|
|
12836
|
+
//////////////////////////////////////////////////
|
|
12837
|
+
class IJepaPreTrainedModel extends PreTrainedModel { }
|
|
12838
|
+
class IJepaModel extends IJepaPreTrainedModel { }
|
|
12839
|
+
class IJepaForImageClassification extends IJepaPreTrainedModel {
|
|
12840
|
+
/**
|
|
12841
|
+
* @param {any} model_inputs
|
|
12842
|
+
*/
|
|
12843
|
+
async _call(model_inputs) {
|
|
12844
|
+
return new SequenceClassifierOutput(await super._call(model_inputs));
|
|
12845
|
+
}
|
|
12846
|
+
}
|
|
12847
|
+
//////////////////////////////////////////////////
|
|
12848
|
+
|
|
12849
|
+
|
|
12702
12850
|
//////////////////////////////////////////////////
|
|
12703
12851
|
class VitPosePreTrainedModel extends PreTrainedModel { }
|
|
12704
12852
|
|
|
@@ -14969,6 +15117,7 @@ const MODEL_MAPPING_NAMES_ENCODER_ONLY = new Map([
|
|
|
14969
15117
|
['rt_detr', ['RTDetrModel', RTDetrModel]],
|
|
14970
15118
|
['table-transformer', ['TableTransformerModel', TableTransformerModel]],
|
|
14971
15119
|
['vit', ['ViTModel', ViTModel]],
|
|
15120
|
+
['ijepa', ['IJepaModel', IJepaModel]],
|
|
14972
15121
|
['pvt', ['PvtModel', PvtModel]],
|
|
14973
15122
|
['vit_msn', ['ViTMSNModel', ViTMSNModel]],
|
|
14974
15123
|
['vit_mae', ['ViTMAEModel', ViTMAEModel]],
|
|
@@ -15033,6 +15182,7 @@ const MODEL_MAPPING_NAMES_DECODER_ONLY = new Map([
|
|
|
15033
15182
|
['codegen', ['CodeGenModel', CodeGenModel]],
|
|
15034
15183
|
['llama', ['LlamaModel', LlamaModel]],
|
|
15035
15184
|
['olmo', ['OlmoModel', OlmoModel]],
|
|
15185
|
+
['olmo2', ['Olmo2Model', Olmo2Model]],
|
|
15036
15186
|
['mobilellm', ['MobileLLMModel', MobileLLMModel]],
|
|
15037
15187
|
['granite', ['GraniteModel', GraniteModel]],
|
|
15038
15188
|
['cohere', ['CohereModel', CohereModel]],
|
|
@@ -15124,6 +15274,7 @@ const MODEL_FOR_CAUSAL_LM_MAPPING_NAMES = new Map([
|
|
|
15124
15274
|
['codegen', ['CodeGenForCausalLM', CodeGenForCausalLM]],
|
|
15125
15275
|
['llama', ['LlamaForCausalLM', LlamaForCausalLM]],
|
|
15126
15276
|
['olmo', ['OlmoForCausalLM', OlmoForCausalLM]],
|
|
15277
|
+
['olmo2', ['Olmo2ForCausalLM', Olmo2ForCausalLM]],
|
|
15127
15278
|
['mobilellm', ['MobileLLMForCausalLM', MobileLLMForCausalLM]],
|
|
15128
15279
|
['granite', ['GraniteForCausalLM', GraniteForCausalLM]],
|
|
15129
15280
|
['cohere', ['CohereForCausalLM', CohereForCausalLM]],
|
|
@@ -15187,6 +15338,7 @@ const MODEL_FOR_QUESTION_ANSWERING_MAPPING_NAMES = new Map([
|
|
|
15187
15338
|
|
|
15188
15339
|
const MODEL_FOR_VISION_2_SEQ_MAPPING_NAMES = new Map([
|
|
15189
15340
|
['vision-encoder-decoder', ['VisionEncoderDecoderModel', VisionEncoderDecoderModel]],
|
|
15341
|
+
['idefics3', ['Idefics3ForConditionalGeneration', Idefics3ForConditionalGeneration]],
|
|
15190
15342
|
]);
|
|
15191
15343
|
|
|
15192
15344
|
const MODEL_FOR_IMAGE_TEXT_TO_TEXT_MAPPING_NAMES = new Map([
|
|
@@ -15195,6 +15347,8 @@ const MODEL_FOR_IMAGE_TEXT_TO_TEXT_MAPPING_NAMES = new Map([
|
|
|
15195
15347
|
['moondream1', ['Moondream1ForConditionalGeneration', Moondream1ForConditionalGeneration]],
|
|
15196
15348
|
['florence2', ['Florence2ForConditionalGeneration', Florence2ForConditionalGeneration]],
|
|
15197
15349
|
['qwen2-vl', ['Qwen2VLForConditionalGeneration', Qwen2VLForConditionalGeneration]],
|
|
15350
|
+
['idefics3', ['Idefics3ForConditionalGeneration', Idefics3ForConditionalGeneration]],
|
|
15351
|
+
['paligemma', ['PaliGemmaForConditionalGeneration', PaliGemmaForConditionalGeneration]],
|
|
15198
15352
|
]);
|
|
15199
15353
|
|
|
15200
15354
|
const MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING_NAMES = new Map([
|
|
@@ -15203,6 +15357,7 @@ const MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING_NAMES = new Map([
|
|
|
15203
15357
|
|
|
15204
15358
|
const MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING_NAMES = new Map([
|
|
15205
15359
|
['vit', ['ViTForImageClassification', ViTForImageClassification]],
|
|
15360
|
+
['ijepa', ['IJepaForImageClassification', IJepaForImageClassification]],
|
|
15206
15361
|
['pvt', ['PvtForImageClassification', PvtForImageClassification]],
|
|
15207
15362
|
['vit_msn', ['ViTMSNForImageClassification', ViTMSNForImageClassification]],
|
|
15208
15363
|
['fastvit', ['FastViTForImageClassification', FastViTForImageClassification]],
|
|
@@ -16824,6 +16979,396 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
16824
16979
|
class GLPNFeatureExtractor extends _base_image_processors_utils_js__WEBPACK_IMPORTED_MODULE_0__.ImageProcessor { }
|
|
16825
16980
|
|
|
16826
16981
|
|
|
16982
|
+
/***/ }),
|
|
16983
|
+
|
|
16984
|
+
/***/ "./src/models/idefics3/image_processing_idefics3.js":
|
|
16985
|
+
/*!**********************************************************!*\
|
|
16986
|
+
!*** ./src/models/idefics3/image_processing_idefics3.js ***!
|
|
16987
|
+
\**********************************************************/
|
|
16988
|
+
/***/ ((__unused_webpack___webpack_module__, __webpack_exports__, __webpack_require__) => {
|
|
16989
|
+
|
|
16990
|
+
"use strict";
|
|
16991
|
+
__webpack_require__.r(__webpack_exports__);
|
|
16992
|
+
/* harmony export */ __webpack_require__.d(__webpack_exports__, {
|
|
16993
|
+
/* harmony export */ Idefics3ImageProcessor: () => (/* binding */ Idefics3ImageProcessor)
|
|
16994
|
+
/* harmony export */ });
|
|
16995
|
+
/* harmony import */ var _base_image_processors_utils_js__WEBPACK_IMPORTED_MODULE_0__ = __webpack_require__(/*! ../../base/image_processors_utils.js */ "./src/base/image_processors_utils.js");
|
|
16996
|
+
/* harmony import */ var _utils_tensor_js__WEBPACK_IMPORTED_MODULE_1__ = __webpack_require__(/*! ../../utils/tensor.js */ "./src/utils/tensor.js");
|
|
16997
|
+
|
|
16998
|
+
|
|
16999
|
+
|
|
17000
|
+
|
|
17001
|
+
|
|
17002
|
+
class Idefics3ImageProcessor extends _base_image_processors_utils_js__WEBPACK_IMPORTED_MODULE_0__.ImageProcessor {
|
|
17003
|
+
constructor(config) {
|
|
17004
|
+
super(config);
|
|
17005
|
+
|
|
17006
|
+
this.do_image_splitting = config.do_image_splitting ?? true;
|
|
17007
|
+
this.max_image_size = config.max_image_size;
|
|
17008
|
+
}
|
|
17009
|
+
|
|
17010
|
+
/**
|
|
17011
|
+
* @typedef {import('../../utils/image.js').RawImage} RawImage
|
|
17012
|
+
* @typedef {import('../../utils/tensor.js').Tensor} Tensor
|
|
17013
|
+
*/
|
|
17014
|
+
|
|
17015
|
+
/**
|
|
17016
|
+
* Calculate size to resize images to, to be multiples of `vision_encoder_max_size` while preserving the aspect ratio.
|
|
17017
|
+
* @param {Tensor} pixel_values Tensor of the image to resize.
|
|
17018
|
+
* @param {number} vision_encoder_max_size Maximum size of the output image. If the image is larger than this size,
|
|
17019
|
+
* it will be split into patches of this size, and the original image will be concatenated with the patches, resized to max_size.
|
|
17020
|
+
*/
|
|
17021
|
+
get_resize_for_vision_encoder(pixel_values, vision_encoder_max_size) {
|
|
17022
|
+
let [height, width] = pixel_values.dims.slice(-2);
|
|
17023
|
+
|
|
17024
|
+
const aspect_ratio = width / height;
|
|
17025
|
+
if (width >= height) {
|
|
17026
|
+
width = Math.ceil(width / vision_encoder_max_size) * vision_encoder_max_size;
|
|
17027
|
+
height = Math.floor(width / aspect_ratio);
|
|
17028
|
+
height = Math.ceil(height / vision_encoder_max_size) * vision_encoder_max_size;
|
|
17029
|
+
} else {
|
|
17030
|
+
height = Math.ceil(height / vision_encoder_max_size) * vision_encoder_max_size;
|
|
17031
|
+
width = Math.floor(height * aspect_ratio);
|
|
17032
|
+
width = Math.ceil(width / vision_encoder_max_size) * vision_encoder_max_size;
|
|
17033
|
+
}
|
|
17034
|
+
return { height, width };
|
|
17035
|
+
}
|
|
17036
|
+
|
|
17037
|
+
/** @param {RawImage|RawImage[]|RawImage[][]} images */
|
|
17038
|
+
async _call(images, {
|
|
17039
|
+
do_image_splitting = null,
|
|
17040
|
+
return_row_col_info = false,
|
|
17041
|
+
} = {}) {
|
|
17042
|
+
|
|
17043
|
+
/** @type {RawImage[][]} */
|
|
17044
|
+
let batched_2d_images;
|
|
17045
|
+
if (!Array.isArray(images)) {
|
|
17046
|
+
batched_2d_images = [[images]];
|
|
17047
|
+
} else {
|
|
17048
|
+
if (images.length === 0 || !images[0]) {
|
|
17049
|
+
throw new Error("No images provided.");
|
|
17050
|
+
}
|
|
17051
|
+
if (!Array.isArray(images[0])) {
|
|
17052
|
+
batched_2d_images = [/** @type {RawImage[]} */(images)];
|
|
17053
|
+
} else {
|
|
17054
|
+
batched_2d_images = /** @type {RawImage[][]} */(images);
|
|
17055
|
+
}
|
|
17056
|
+
}
|
|
17057
|
+
|
|
17058
|
+
// List of tensors, each with shape [patches, channels, height, width]
|
|
17059
|
+
let all_pixel_values = [];
|
|
17060
|
+
let images_list_rows = [];
|
|
17061
|
+
let images_list_cols = [];
|
|
17062
|
+
|
|
17063
|
+
const original_sizes = [];
|
|
17064
|
+
const reshaped_input_sizes = [];
|
|
17065
|
+
for (const image_batch of batched_2d_images) {
|
|
17066
|
+
|
|
17067
|
+
let images_list = await Promise.all(image_batch.map(x => this.preprocess(x)));
|
|
17068
|
+
|
|
17069
|
+
// Original sizes of images
|
|
17070
|
+
original_sizes.push(...images_list.map(x => x.original_size));
|
|
17071
|
+
|
|
17072
|
+
// Reshaped sizes of images, before padding or cropping
|
|
17073
|
+
reshaped_input_sizes.push(...images_list.map(x => x.reshaped_input_size));
|
|
17074
|
+
|
|
17075
|
+
// Convert images to 4D tensors for easier processing
|
|
17076
|
+
images_list.forEach(x => x.pixel_values.unsqueeze_(0));
|
|
17077
|
+
|
|
17078
|
+
const { longest_edge } = this.max_image_size;
|
|
17079
|
+
|
|
17080
|
+
/** @type {Tensor[]} */
|
|
17081
|
+
let images_tensor;
|
|
17082
|
+
if (do_image_splitting ?? this.do_image_splitting) {
|
|
17083
|
+
let image_rows = new Array(images_list.length);
|
|
17084
|
+
let image_cols = new Array(images_list.length);
|
|
17085
|
+
|
|
17086
|
+
// We first resize both height and width of each image to the nearest max_image_size multiple, disregarding the aspect ratio
|
|
17087
|
+
images_tensor = await Promise.all(images_list.map(async (x, i) => {
|
|
17088
|
+
const new_size = this.get_resize_for_vision_encoder(x.pixel_values, longest_edge);
|
|
17089
|
+
|
|
17090
|
+
const resized = await (0,_utils_tensor_js__WEBPACK_IMPORTED_MODULE_1__.interpolate_4d)(x.pixel_values, {
|
|
17091
|
+
size: [new_size.height, new_size.width],
|
|
17092
|
+
});
|
|
17093
|
+
|
|
17094
|
+
const { frames, num_splits_h, num_splits_w } = await this.split_image(resized, this.max_image_size);
|
|
17095
|
+
image_rows[i] = num_splits_h;
|
|
17096
|
+
image_cols[i] = num_splits_w;
|
|
17097
|
+
return (0,_utils_tensor_js__WEBPACK_IMPORTED_MODULE_1__.cat)(frames, 0);
|
|
17098
|
+
}));
|
|
17099
|
+
|
|
17100
|
+
images_list_rows.push(image_rows);
|
|
17101
|
+
images_list_cols.push(image_cols);
|
|
17102
|
+
|
|
17103
|
+
} else {
|
|
17104
|
+
/** @type {[number, number]} */
|
|
17105
|
+
const size = [longest_edge, longest_edge];
|
|
17106
|
+
images_tensor = await Promise.all(
|
|
17107
|
+
images_list.map(x => (0,_utils_tensor_js__WEBPACK_IMPORTED_MODULE_1__.interpolate_4d)(x.pixel_values, { size }))
|
|
17108
|
+
);
|
|
17109
|
+
|
|
17110
|
+
images_list_rows.push(new Array(images_list.length).fill(0));
|
|
17111
|
+
images_list_cols.push(new Array(images_list.length).fill(0));
|
|
17112
|
+
}
|
|
17113
|
+
|
|
17114
|
+
all_pixel_values.push((0,_utils_tensor_js__WEBPACK_IMPORTED_MODULE_1__.cat)(images_tensor, 0));
|
|
17115
|
+
}
|
|
17116
|
+
|
|
17117
|
+
const batch_size = all_pixel_values.length;
|
|
17118
|
+
const [n, c, h, w] = all_pixel_values[0].dims;
|
|
17119
|
+
|
|
17120
|
+
// Stack pixel values
|
|
17121
|
+
let pixel_values;
|
|
17122
|
+
let pixel_attention_mask;
|
|
17123
|
+
if (batch_size === 1) {
|
|
17124
|
+
pixel_values = all_pixel_values[0].unsqueeze_(0);
|
|
17125
|
+
pixel_attention_mask = (0,_utils_tensor_js__WEBPACK_IMPORTED_MODULE_1__.full)([batch_size, n, h, w], true);
|
|
17126
|
+
} else {
|
|
17127
|
+
// Add padding (if necessary) to images with less patches than the maximum number of patches
|
|
17128
|
+
const max_num_patches = Math.max(...all_pixel_values.map(x => x.dims.at(0)));
|
|
17129
|
+
|
|
17130
|
+
pixel_attention_mask = (0,_utils_tensor_js__WEBPACK_IMPORTED_MODULE_1__.full)([batch_size, max_num_patches, h, w], true);
|
|
17131
|
+
const pixel_attention_mask_data = pixel_attention_mask.data;
|
|
17132
|
+
const pixel_attention_mask_stride = max_num_patches * h * w;
|
|
17133
|
+
for (let i = 0; i < batch_size; ++i) {
|
|
17134
|
+
const num_patches = all_pixel_values[i].dims[0];
|
|
17135
|
+
if (num_patches < max_num_patches) {
|
|
17136
|
+
all_pixel_values[i] = (0,_utils_tensor_js__WEBPACK_IMPORTED_MODULE_1__.cat)([
|
|
17137
|
+
all_pixel_values[i],
|
|
17138
|
+
(0,_utils_tensor_js__WEBPACK_IMPORTED_MODULE_1__.full)([max_num_patches - num_patches, c, h, w], 0),
|
|
17139
|
+
], 0);
|
|
17140
|
+
|
|
17141
|
+
const start_offset = i * pixel_attention_mask_stride + num_patches * h * w;
|
|
17142
|
+
const end_offset = (i + 1) * pixel_attention_mask_stride;
|
|
17143
|
+
pixel_attention_mask_data.fill(false, start_offset, end_offset);
|
|
17144
|
+
}
|
|
17145
|
+
}
|
|
17146
|
+
pixel_values = (0,_utils_tensor_js__WEBPACK_IMPORTED_MODULE_1__.stack)(all_pixel_values, 0);
|
|
17147
|
+
}
|
|
17148
|
+
|
|
17149
|
+
return {
|
|
17150
|
+
pixel_values,
|
|
17151
|
+
pixel_attention_mask,
|
|
17152
|
+
|
|
17153
|
+
original_sizes,
|
|
17154
|
+
reshaped_input_sizes,
|
|
17155
|
+
...(
|
|
17156
|
+
return_row_col_info
|
|
17157
|
+
? { rows: images_list_rows, cols: images_list_cols }
|
|
17158
|
+
: {}
|
|
17159
|
+
),
|
|
17160
|
+
}
|
|
17161
|
+
}
|
|
17162
|
+
|
|
17163
|
+
async split_image(pixel_values, { longest_edge }) {
|
|
17164
|
+
const max_height = longest_edge;
|
|
17165
|
+
const max_width = longest_edge;
|
|
17166
|
+
|
|
17167
|
+
const frames = [];
|
|
17168
|
+
|
|
17169
|
+
const [height, width] = pixel_values.dims.slice(-2);
|
|
17170
|
+
|
|
17171
|
+
let num_splits_h = 0, num_splits_w = 0;
|
|
17172
|
+
|
|
17173
|
+
if (height > max_height || width > max_width) {
|
|
17174
|
+
// Calculate the number of splits
|
|
17175
|
+
num_splits_h = Math.ceil(height / max_height);
|
|
17176
|
+
num_splits_w = Math.ceil(width / max_width);
|
|
17177
|
+
|
|
17178
|
+
// Calculate the optimal width and height for the sub-images
|
|
17179
|
+
const optimal_height = Math.ceil(height / num_splits_h);
|
|
17180
|
+
const optimal_width = Math.ceil(width / num_splits_w);
|
|
17181
|
+
|
|
17182
|
+
// Iterate through each row and column
|
|
17183
|
+
for (let r = 0; r < num_splits_h; r++) {
|
|
17184
|
+
for (let c = 0; c < num_splits_w; c++) {
|
|
17185
|
+
// Calculate the starting point of the crop
|
|
17186
|
+
const start_x = c * optimal_width;
|
|
17187
|
+
const start_y = r * optimal_height;
|
|
17188
|
+
|
|
17189
|
+
// Calculate the ending point of the crop
|
|
17190
|
+
const end_x = Math.min(start_x + optimal_width, width);
|
|
17191
|
+
const end_y = Math.min(start_y + optimal_height, height);
|
|
17192
|
+
|
|
17193
|
+
// Crop the image
|
|
17194
|
+
frames.push(pixel_values.slice(null, null, [start_y, end_y], [start_x, end_x]));
|
|
17195
|
+
}
|
|
17196
|
+
}
|
|
17197
|
+
|
|
17198
|
+
// Resize the global image to match max dimensions for memory efficiency
|
|
17199
|
+
const global_image_height = max_height;
|
|
17200
|
+
const global_image_width = max_width;
|
|
17201
|
+
|
|
17202
|
+
if (height !== global_image_height || width !== global_image_width) {
|
|
17203
|
+
pixel_values = await (0,_utils_tensor_js__WEBPACK_IMPORTED_MODULE_1__.interpolate_4d)(pixel_values, {
|
|
17204
|
+
size: [global_image_height, global_image_width],
|
|
17205
|
+
})
|
|
17206
|
+
}
|
|
17207
|
+
}
|
|
17208
|
+
|
|
17209
|
+
frames.push(pixel_values);
|
|
17210
|
+
|
|
17211
|
+
return { frames, num_splits_h, num_splits_w };
|
|
17212
|
+
}
|
|
17213
|
+
}
|
|
17214
|
+
|
|
17215
|
+
|
|
17216
|
+
/***/ }),
|
|
17217
|
+
|
|
17218
|
+
/***/ "./src/models/idefics3/processing_idefics3.js":
|
|
17219
|
+
/*!****************************************************!*\
|
|
17220
|
+
!*** ./src/models/idefics3/processing_idefics3.js ***!
|
|
17221
|
+
\****************************************************/
|
|
17222
|
+
/***/ ((__unused_webpack___webpack_module__, __webpack_exports__, __webpack_require__) => {
|
|
17223
|
+
|
|
17224
|
+
"use strict";
|
|
17225
|
+
__webpack_require__.r(__webpack_exports__);
|
|
17226
|
+
/* harmony export */ __webpack_require__.d(__webpack_exports__, {
|
|
17227
|
+
/* harmony export */ Idefics3Processor: () => (/* binding */ Idefics3Processor)
|
|
17228
|
+
/* harmony export */ });
|
|
17229
|
+
/* harmony import */ var _base_processing_utils_js__WEBPACK_IMPORTED_MODULE_0__ = __webpack_require__(/*! ../../base/processing_utils.js */ "./src/base/processing_utils.js");
|
|
17230
|
+
/* harmony import */ var _auto_image_processing_auto_js__WEBPACK_IMPORTED_MODULE_1__ = __webpack_require__(/*! ../auto/image_processing_auto.js */ "./src/models/auto/image_processing_auto.js");
|
|
17231
|
+
/* harmony import */ var _tokenizers_js__WEBPACK_IMPORTED_MODULE_2__ = __webpack_require__(/*! ../../tokenizers.js */ "./src/tokenizers.js");
|
|
17232
|
+
/* harmony import */ var _utils_image_js__WEBPACK_IMPORTED_MODULE_3__ = __webpack_require__(/*! ../../utils/image.js */ "./src/utils/image.js");
|
|
17233
|
+
/* harmony import */ var _utils_core_js__WEBPACK_IMPORTED_MODULE_4__ = __webpack_require__(/*! ../../utils/core.js */ "./src/utils/core.js");
|
|
17234
|
+
|
|
17235
|
+
|
|
17236
|
+
|
|
17237
|
+
|
|
17238
|
+
|
|
17239
|
+
|
|
17240
|
+
|
|
17241
|
+
/**
|
|
17242
|
+
* Prompt with expanded image tokens for when the image is split into patches.
|
|
17243
|
+
* @private
|
|
17244
|
+
*/
|
|
17245
|
+
function _prompt_split_image(image_seq_len, image_rows, image_cols, fake_token_around_image, image_token, global_img_token) {
|
|
17246
|
+
let text_split_images = "";
|
|
17247
|
+
for (let n_h = 0; n_h < image_rows; ++n_h) {
|
|
17248
|
+
for (let n_w = 0; n_w < image_cols; ++n_w) {
|
|
17249
|
+
text_split_images += (
|
|
17250
|
+
fake_token_around_image +
|
|
17251
|
+
`<row_${n_h + 1}_col_${n_w + 1}>` +
|
|
17252
|
+
image_token.repeat(image_seq_len)
|
|
17253
|
+
);
|
|
17254
|
+
}
|
|
17255
|
+
text_split_images += "\n";
|
|
17256
|
+
}
|
|
17257
|
+
|
|
17258
|
+
text_split_images += (
|
|
17259
|
+
`\n${fake_token_around_image}` +
|
|
17260
|
+
`${global_img_token}` +
|
|
17261
|
+
image_token.repeat(image_seq_len) +
|
|
17262
|
+
`${fake_token_around_image}`
|
|
17263
|
+
);
|
|
17264
|
+
return text_split_images;
|
|
17265
|
+
}
|
|
17266
|
+
|
|
17267
|
+
/**
|
|
17268
|
+
* Prompt with expanded image tokens for a single image.
|
|
17269
|
+
* @private
|
|
17270
|
+
*/
|
|
17271
|
+
function _prompt_single_image(image_seq_len, fake_token_around_image, image_token, global_img_token) {
|
|
17272
|
+
return (
|
|
17273
|
+
`${fake_token_around_image}` +
|
|
17274
|
+
`${global_img_token}` +
|
|
17275
|
+
image_token.repeat(image_seq_len) +
|
|
17276
|
+
`${fake_token_around_image}`
|
|
17277
|
+
);
|
|
17278
|
+
}
|
|
17279
|
+
|
|
17280
|
+
function get_image_prompt_string(image_rows, image_cols, image_seq_len, fake_token_around_image, image_token, global_img_token) {
|
|
17281
|
+
if (image_rows === 0 && image_cols === 0) {
|
|
17282
|
+
return _prompt_single_image(
|
|
17283
|
+
image_seq_len,
|
|
17284
|
+
fake_token_around_image,
|
|
17285
|
+
image_token,
|
|
17286
|
+
global_img_token
|
|
17287
|
+
);
|
|
17288
|
+
}
|
|
17289
|
+
return _prompt_split_image(
|
|
17290
|
+
image_seq_len, image_rows, image_cols, fake_token_around_image, image_token, global_img_token
|
|
17291
|
+
);
|
|
17292
|
+
}
|
|
17293
|
+
|
|
17294
|
+
|
|
17295
|
+
class Idefics3Processor extends _base_processing_utils_js__WEBPACK_IMPORTED_MODULE_0__.Processor {
|
|
17296
|
+
static image_processor_class = _auto_image_processing_auto_js__WEBPACK_IMPORTED_MODULE_1__.AutoImageProcessor
|
|
17297
|
+
static tokenizer_class = _tokenizers_js__WEBPACK_IMPORTED_MODULE_2__.AutoTokenizer
|
|
17298
|
+
static uses_processor_config = true;
|
|
17299
|
+
|
|
17300
|
+
fake_image_token = "<fake_token_around_image>";
|
|
17301
|
+
image_token = "<image>";
|
|
17302
|
+
global_img_token = "<global-img>";
|
|
17303
|
+
|
|
17304
|
+
/**
|
|
17305
|
+
*
|
|
17306
|
+
* @param {string|string[]} text
|
|
17307
|
+
* @param {RawImage|RawImage[]|RawImage[][]} images
|
|
17308
|
+
* @returns {Promise<any>}
|
|
17309
|
+
*/
|
|
17310
|
+
async _call(text, images = null, options = {}) {
|
|
17311
|
+
options.return_row_col_info ??= true;
|
|
17312
|
+
|
|
17313
|
+
let image_inputs;
|
|
17314
|
+
|
|
17315
|
+
if (images) {
|
|
17316
|
+
image_inputs = await this.image_processor(images, options);
|
|
17317
|
+
}
|
|
17318
|
+
|
|
17319
|
+
// NOTE: We assume text is present
|
|
17320
|
+
if (!Array.isArray(text)) {
|
|
17321
|
+
text = [text];
|
|
17322
|
+
}
|
|
17323
|
+
|
|
17324
|
+
const image_rows = image_inputs.rows ?? [new Array(text.length).fill(0)];
|
|
17325
|
+
const image_cols = image_inputs.cols ?? [new Array(text.length).fill(0)];
|
|
17326
|
+
|
|
17327
|
+
const image_seq_len = this.config.image_seq_len;
|
|
17328
|
+
const n_images_in_text = []
|
|
17329
|
+
const prompt_strings = [];
|
|
17330
|
+
for (let i = 0; i < text.length; ++i) {
|
|
17331
|
+
const sample = text[i];
|
|
17332
|
+
const sample_rows = image_rows[i];
|
|
17333
|
+
const sample_cols = image_cols[i];
|
|
17334
|
+
|
|
17335
|
+
n_images_in_text.push((0,_utils_core_js__WEBPACK_IMPORTED_MODULE_4__.count)(sample, this.image_token));
|
|
17336
|
+
|
|
17337
|
+
// Replace the image token with fake tokens around the expanded image token sequence of length `image_seq_len`
|
|
17338
|
+
const image_prompt_strings = sample_rows.map(
|
|
17339
|
+
(n_rows, j) => get_image_prompt_string(
|
|
17340
|
+
n_rows,
|
|
17341
|
+
sample_cols[j],
|
|
17342
|
+
image_seq_len,
|
|
17343
|
+
this.fake_image_token,
|
|
17344
|
+
this.image_token,
|
|
17345
|
+
this.global_img_token,
|
|
17346
|
+
)
|
|
17347
|
+
);
|
|
17348
|
+
|
|
17349
|
+
const split_sample = sample.split(this.image_token);
|
|
17350
|
+
if (split_sample.length === 0) {
|
|
17351
|
+
throw new Error("The image token should be present in the text.");
|
|
17352
|
+
}
|
|
17353
|
+
|
|
17354
|
+
// Place in the image prompt strings where the image tokens are
|
|
17355
|
+
let new_sample = split_sample[0];
|
|
17356
|
+
for (let j = 0; j < image_prompt_strings.length; ++j) {
|
|
17357
|
+
new_sample += image_prompt_strings[j] + split_sample[j + 1];
|
|
17358
|
+
}
|
|
17359
|
+
prompt_strings.push(new_sample);
|
|
17360
|
+
}
|
|
17361
|
+
|
|
17362
|
+
const text_inputs = this.tokenizer(prompt_strings);
|
|
17363
|
+
|
|
17364
|
+
return {
|
|
17365
|
+
...text_inputs,
|
|
17366
|
+
...image_inputs,
|
|
17367
|
+
}
|
|
17368
|
+
}
|
|
17369
|
+
}
|
|
17370
|
+
|
|
17371
|
+
|
|
16827
17372
|
/***/ }),
|
|
16828
17373
|
|
|
16829
17374
|
/***/ "./src/models/image_processors.js":
|
|
@@ -16852,40 +17397,41 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
16852
17397
|
/* harmony export */ DonutImageProcessor: () => (/* reexport safe */ _donut_image_processing_donut_js__WEBPACK_IMPORTED_MODULE_7__.DonutImageProcessor),
|
|
16853
17398
|
/* harmony export */ EfficientNetImageProcessor: () => (/* reexport safe */ _efficientnet_image_processing_efficientnet_js__WEBPACK_IMPORTED_MODULE_9__.EfficientNetImageProcessor),
|
|
16854
17399
|
/* harmony export */ GLPNFeatureExtractor: () => (/* reexport safe */ _glpn_image_processing_glpn_js__WEBPACK_IMPORTED_MODULE_10__.GLPNFeatureExtractor),
|
|
16855
|
-
/* harmony export */
|
|
16856
|
-
/* harmony export */
|
|
16857
|
-
/* harmony export */
|
|
16858
|
-
/* harmony export */
|
|
16859
|
-
/* harmony export */
|
|
16860
|
-
/* harmony export */
|
|
16861
|
-
/* harmony export */
|
|
16862
|
-
/* harmony export */
|
|
16863
|
-
/* harmony export */
|
|
16864
|
-
/* harmony export */
|
|
16865
|
-
/* harmony export */
|
|
16866
|
-
/* harmony export */
|
|
16867
|
-
/* harmony export */
|
|
16868
|
-
/* harmony export */
|
|
16869
|
-
/* harmony export */
|
|
16870
|
-
/* harmony export */
|
|
16871
|
-
/* harmony export */
|
|
16872
|
-
/* harmony export */
|
|
16873
|
-
/* harmony export */
|
|
16874
|
-
/* harmony export */
|
|
16875
|
-
/* harmony export */
|
|
16876
|
-
/* harmony export */
|
|
16877
|
-
/* harmony export */
|
|
16878
|
-
/* harmony export */
|
|
16879
|
-
/* harmony export */
|
|
16880
|
-
/* harmony export */
|
|
16881
|
-
/* harmony export */
|
|
16882
|
-
/* harmony export */
|
|
16883
|
-
/* harmony export */
|
|
16884
|
-
/* harmony export */
|
|
16885
|
-
/* harmony export */
|
|
16886
|
-
/* harmony export */
|
|
16887
|
-
/* harmony export */
|
|
16888
|
-
/* harmony export */
|
|
17400
|
+
/* harmony export */ Idefics3ImageProcessor: () => (/* reexport safe */ _idefics3_image_processing_idefics3_js__WEBPACK_IMPORTED_MODULE_11__.Idefics3ImageProcessor),
|
|
17401
|
+
/* harmony export */ JinaCLIPImageProcessor: () => (/* reexport safe */ _jina_clip_image_processing_jina_clip_js__WEBPACK_IMPORTED_MODULE_13__.JinaCLIPImageProcessor),
|
|
17402
|
+
/* harmony export */ LlavaOnevisionImageProcessor: () => (/* reexport safe */ _llava_onevision_image_processing_llava_onevision_js__WEBPACK_IMPORTED_MODULE_14__.LlavaOnevisionImageProcessor),
|
|
17403
|
+
/* harmony export */ Mask2FormerImageProcessor: () => (/* reexport safe */ _mask2former_image_processing_mask2former_js__WEBPACK_IMPORTED_MODULE_15__.Mask2FormerImageProcessor),
|
|
17404
|
+
/* harmony export */ MaskFormerFeatureExtractor: () => (/* reexport safe */ _maskformer_image_processing_maskformer_js__WEBPACK_IMPORTED_MODULE_16__.MaskFormerFeatureExtractor),
|
|
17405
|
+
/* harmony export */ MaskFormerImageProcessor: () => (/* reexport safe */ _maskformer_image_processing_maskformer_js__WEBPACK_IMPORTED_MODULE_16__.MaskFormerImageProcessor),
|
|
17406
|
+
/* harmony export */ MobileNetV1FeatureExtractor: () => (/* reexport safe */ _mobilenet_v1_image_processing_mobilenet_v1_js__WEBPACK_IMPORTED_MODULE_17__.MobileNetV1FeatureExtractor),
|
|
17407
|
+
/* harmony export */ MobileNetV1ImageProcessor: () => (/* reexport safe */ _mobilenet_v1_image_processing_mobilenet_v1_js__WEBPACK_IMPORTED_MODULE_17__.MobileNetV1ImageProcessor),
|
|
17408
|
+
/* harmony export */ MobileNetV2FeatureExtractor: () => (/* reexport safe */ _mobilenet_v2_image_processing_mobilenet_v2_js__WEBPACK_IMPORTED_MODULE_18__.MobileNetV2FeatureExtractor),
|
|
17409
|
+
/* harmony export */ MobileNetV2ImageProcessor: () => (/* reexport safe */ _mobilenet_v2_image_processing_mobilenet_v2_js__WEBPACK_IMPORTED_MODULE_18__.MobileNetV2ImageProcessor),
|
|
17410
|
+
/* harmony export */ MobileNetV3FeatureExtractor: () => (/* reexport safe */ _mobilenet_v3_image_processing_mobilenet_v3_js__WEBPACK_IMPORTED_MODULE_19__.MobileNetV3FeatureExtractor),
|
|
17411
|
+
/* harmony export */ MobileNetV3ImageProcessor: () => (/* reexport safe */ _mobilenet_v3_image_processing_mobilenet_v3_js__WEBPACK_IMPORTED_MODULE_19__.MobileNetV3ImageProcessor),
|
|
17412
|
+
/* harmony export */ MobileNetV4FeatureExtractor: () => (/* reexport safe */ _mobilenet_v4_image_processing_mobilenet_v4_js__WEBPACK_IMPORTED_MODULE_20__.MobileNetV4FeatureExtractor),
|
|
17413
|
+
/* harmony export */ MobileNetV4ImageProcessor: () => (/* reexport safe */ _mobilenet_v4_image_processing_mobilenet_v4_js__WEBPACK_IMPORTED_MODULE_20__.MobileNetV4ImageProcessor),
|
|
17414
|
+
/* harmony export */ MobileViTFeatureExtractor: () => (/* reexport safe */ _mobilevit_image_processing_mobilevit_js__WEBPACK_IMPORTED_MODULE_21__.MobileViTFeatureExtractor),
|
|
17415
|
+
/* harmony export */ MobileViTImageProcessor: () => (/* reexport safe */ _mobilevit_image_processing_mobilevit_js__WEBPACK_IMPORTED_MODULE_21__.MobileViTImageProcessor),
|
|
17416
|
+
/* harmony export */ NougatImageProcessor: () => (/* reexport safe */ _nougat_image_processing_nougat_js__WEBPACK_IMPORTED_MODULE_22__.NougatImageProcessor),
|
|
17417
|
+
/* harmony export */ OwlViTFeatureExtractor: () => (/* reexport safe */ _owlvit_image_processing_owlvit_js__WEBPACK_IMPORTED_MODULE_24__.OwlViTFeatureExtractor),
|
|
17418
|
+
/* harmony export */ OwlViTImageProcessor: () => (/* reexport safe */ _owlvit_image_processing_owlvit_js__WEBPACK_IMPORTED_MODULE_24__.OwlViTImageProcessor),
|
|
17419
|
+
/* harmony export */ Owlv2ImageProcessor: () => (/* reexport safe */ _owlv2_image_processing_owlv2_js__WEBPACK_IMPORTED_MODULE_23__.Owlv2ImageProcessor),
|
|
17420
|
+
/* harmony export */ PvtImageProcessor: () => (/* reexport safe */ _pvt_image_processing_pvt_js__WEBPACK_IMPORTED_MODULE_25__.PvtImageProcessor),
|
|
17421
|
+
/* harmony export */ Qwen2VLImageProcessor: () => (/* reexport safe */ _qwen2_vl_image_processing_qwen2_vl_js__WEBPACK_IMPORTED_MODULE_26__.Qwen2VLImageProcessor),
|
|
17422
|
+
/* harmony export */ RTDetrImageProcessor: () => (/* reexport safe */ _rt_detr_image_processing_rt_detr_js__WEBPACK_IMPORTED_MODULE_27__.RTDetrImageProcessor),
|
|
17423
|
+
/* harmony export */ SamImageProcessor: () => (/* reexport safe */ _sam_image_processing_sam_js__WEBPACK_IMPORTED_MODULE_28__.SamImageProcessor),
|
|
17424
|
+
/* harmony export */ SegformerFeatureExtractor: () => (/* reexport safe */ _segformer_image_processing_segformer_js__WEBPACK_IMPORTED_MODULE_29__.SegformerFeatureExtractor),
|
|
17425
|
+
/* harmony export */ SegformerImageProcessor: () => (/* reexport safe */ _segformer_image_processing_segformer_js__WEBPACK_IMPORTED_MODULE_29__.SegformerImageProcessor),
|
|
17426
|
+
/* harmony export */ SiglipImageProcessor: () => (/* reexport safe */ _siglip_image_processing_siglip_js__WEBPACK_IMPORTED_MODULE_30__.SiglipImageProcessor),
|
|
17427
|
+
/* harmony export */ Swin2SRImageProcessor: () => (/* reexport safe */ _swin2sr_image_processing_swin2sr_js__WEBPACK_IMPORTED_MODULE_31__.Swin2SRImageProcessor),
|
|
17428
|
+
/* harmony export */ VLMImageProcessor: () => (/* reexport safe */ _janus_image_processing_janus_js__WEBPACK_IMPORTED_MODULE_12__.VLMImageProcessor),
|
|
17429
|
+
/* harmony export */ ViTFeatureExtractor: () => (/* reexport safe */ _vit_image_processing_vit_js__WEBPACK_IMPORTED_MODULE_32__.ViTFeatureExtractor),
|
|
17430
|
+
/* harmony export */ ViTImageProcessor: () => (/* reexport safe */ _vit_image_processing_vit_js__WEBPACK_IMPORTED_MODULE_32__.ViTImageProcessor),
|
|
17431
|
+
/* harmony export */ VitMatteImageProcessor: () => (/* reexport safe */ _vitmatte_image_processing_vitmatte_js__WEBPACK_IMPORTED_MODULE_33__.VitMatteImageProcessor),
|
|
17432
|
+
/* harmony export */ VitPoseImageProcessor: () => (/* reexport safe */ _vitpose_image_processing_vitpose_js__WEBPACK_IMPORTED_MODULE_34__.VitPoseImageProcessor),
|
|
17433
|
+
/* harmony export */ YolosFeatureExtractor: () => (/* reexport safe */ _yolos_image_processing_yolos_js__WEBPACK_IMPORTED_MODULE_35__.YolosFeatureExtractor),
|
|
17434
|
+
/* harmony export */ YolosImageProcessor: () => (/* reexport safe */ _yolos_image_processing_yolos_js__WEBPACK_IMPORTED_MODULE_35__.YolosImageProcessor)
|
|
16889
17435
|
/* harmony export */ });
|
|
16890
17436
|
/* harmony import */ var _beit_image_processing_beit_js__WEBPACK_IMPORTED_MODULE_0__ = __webpack_require__(/*! ./beit/image_processing_beit.js */ "./src/models/beit/image_processing_beit.js");
|
|
16891
17437
|
/* harmony import */ var _bit_image_processing_bit_js__WEBPACK_IMPORTED_MODULE_1__ = __webpack_require__(/*! ./bit/image_processing_bit.js */ "./src/models/bit/image_processing_bit.js");
|
|
@@ -16898,30 +17444,32 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
16898
17444
|
/* harmony import */ var _dpt_image_processing_dpt_js__WEBPACK_IMPORTED_MODULE_8__ = __webpack_require__(/*! ./dpt/image_processing_dpt.js */ "./src/models/dpt/image_processing_dpt.js");
|
|
16899
17445
|
/* harmony import */ var _efficientnet_image_processing_efficientnet_js__WEBPACK_IMPORTED_MODULE_9__ = __webpack_require__(/*! ./efficientnet/image_processing_efficientnet.js */ "./src/models/efficientnet/image_processing_efficientnet.js");
|
|
16900
17446
|
/* harmony import */ var _glpn_image_processing_glpn_js__WEBPACK_IMPORTED_MODULE_10__ = __webpack_require__(/*! ./glpn/image_processing_glpn.js */ "./src/models/glpn/image_processing_glpn.js");
|
|
16901
|
-
/* harmony import */ var
|
|
16902
|
-
/* harmony import */ var
|
|
16903
|
-
/* harmony import */ var
|
|
16904
|
-
/* harmony import */ var
|
|
16905
|
-
/* harmony import */ var
|
|
16906
|
-
/* harmony import */ var
|
|
16907
|
-
/* harmony import */ var
|
|
16908
|
-
/* harmony import */ var
|
|
16909
|
-
/* harmony import */ var
|
|
16910
|
-
/* harmony import */ var
|
|
16911
|
-
/* harmony import */ var
|
|
16912
|
-
/* harmony import */ var
|
|
16913
|
-
/* harmony import */ var
|
|
16914
|
-
/* harmony import */ var
|
|
16915
|
-
/* harmony import */ var
|
|
16916
|
-
/* harmony import */ var
|
|
16917
|
-
/* harmony import */ var
|
|
16918
|
-
/* harmony import */ var
|
|
16919
|
-
/* harmony import */ var
|
|
16920
|
-
/* harmony import */ var
|
|
16921
|
-
/* harmony import */ var
|
|
16922
|
-
/* harmony import */ var
|
|
16923
|
-
/* harmony import */ var
|
|
16924
|
-
/* harmony import */ var
|
|
17447
|
+
/* harmony import */ var _idefics3_image_processing_idefics3_js__WEBPACK_IMPORTED_MODULE_11__ = __webpack_require__(/*! ./idefics3/image_processing_idefics3.js */ "./src/models/idefics3/image_processing_idefics3.js");
|
|
17448
|
+
/* harmony import */ var _janus_image_processing_janus_js__WEBPACK_IMPORTED_MODULE_12__ = __webpack_require__(/*! ./janus/image_processing_janus.js */ "./src/models/janus/image_processing_janus.js");
|
|
17449
|
+
/* harmony import */ var _jina_clip_image_processing_jina_clip_js__WEBPACK_IMPORTED_MODULE_13__ = __webpack_require__(/*! ./jina_clip/image_processing_jina_clip.js */ "./src/models/jina_clip/image_processing_jina_clip.js");
|
|
17450
|
+
/* harmony import */ var _llava_onevision_image_processing_llava_onevision_js__WEBPACK_IMPORTED_MODULE_14__ = __webpack_require__(/*! ./llava_onevision/image_processing_llava_onevision.js */ "./src/models/llava_onevision/image_processing_llava_onevision.js");
|
|
17451
|
+
/* harmony import */ var _mask2former_image_processing_mask2former_js__WEBPACK_IMPORTED_MODULE_15__ = __webpack_require__(/*! ./mask2former/image_processing_mask2former.js */ "./src/models/mask2former/image_processing_mask2former.js");
|
|
17452
|
+
/* harmony import */ var _maskformer_image_processing_maskformer_js__WEBPACK_IMPORTED_MODULE_16__ = __webpack_require__(/*! ./maskformer/image_processing_maskformer.js */ "./src/models/maskformer/image_processing_maskformer.js");
|
|
17453
|
+
/* harmony import */ var _mobilenet_v1_image_processing_mobilenet_v1_js__WEBPACK_IMPORTED_MODULE_17__ = __webpack_require__(/*! ./mobilenet_v1/image_processing_mobilenet_v1.js */ "./src/models/mobilenet_v1/image_processing_mobilenet_v1.js");
|
|
17454
|
+
/* harmony import */ var _mobilenet_v2_image_processing_mobilenet_v2_js__WEBPACK_IMPORTED_MODULE_18__ = __webpack_require__(/*! ./mobilenet_v2/image_processing_mobilenet_v2.js */ "./src/models/mobilenet_v2/image_processing_mobilenet_v2.js");
|
|
17455
|
+
/* harmony import */ var _mobilenet_v3_image_processing_mobilenet_v3_js__WEBPACK_IMPORTED_MODULE_19__ = __webpack_require__(/*! ./mobilenet_v3/image_processing_mobilenet_v3.js */ "./src/models/mobilenet_v3/image_processing_mobilenet_v3.js");
|
|
17456
|
+
/* harmony import */ var _mobilenet_v4_image_processing_mobilenet_v4_js__WEBPACK_IMPORTED_MODULE_20__ = __webpack_require__(/*! ./mobilenet_v4/image_processing_mobilenet_v4.js */ "./src/models/mobilenet_v4/image_processing_mobilenet_v4.js");
|
|
17457
|
+
/* harmony import */ var _mobilevit_image_processing_mobilevit_js__WEBPACK_IMPORTED_MODULE_21__ = __webpack_require__(/*! ./mobilevit/image_processing_mobilevit.js */ "./src/models/mobilevit/image_processing_mobilevit.js");
|
|
17458
|
+
/* harmony import */ var _nougat_image_processing_nougat_js__WEBPACK_IMPORTED_MODULE_22__ = __webpack_require__(/*! ./nougat/image_processing_nougat.js */ "./src/models/nougat/image_processing_nougat.js");
|
|
17459
|
+
/* harmony import */ var _owlv2_image_processing_owlv2_js__WEBPACK_IMPORTED_MODULE_23__ = __webpack_require__(/*! ./owlv2/image_processing_owlv2.js */ "./src/models/owlv2/image_processing_owlv2.js");
|
|
17460
|
+
/* harmony import */ var _owlvit_image_processing_owlvit_js__WEBPACK_IMPORTED_MODULE_24__ = __webpack_require__(/*! ./owlvit/image_processing_owlvit.js */ "./src/models/owlvit/image_processing_owlvit.js");
|
|
17461
|
+
/* harmony import */ var _pvt_image_processing_pvt_js__WEBPACK_IMPORTED_MODULE_25__ = __webpack_require__(/*! ./pvt/image_processing_pvt.js */ "./src/models/pvt/image_processing_pvt.js");
|
|
17462
|
+
/* harmony import */ var _qwen2_vl_image_processing_qwen2_vl_js__WEBPACK_IMPORTED_MODULE_26__ = __webpack_require__(/*! ./qwen2_vl/image_processing_qwen2_vl.js */ "./src/models/qwen2_vl/image_processing_qwen2_vl.js");
|
|
17463
|
+
/* harmony import */ var _rt_detr_image_processing_rt_detr_js__WEBPACK_IMPORTED_MODULE_27__ = __webpack_require__(/*! ./rt_detr/image_processing_rt_detr.js */ "./src/models/rt_detr/image_processing_rt_detr.js");
|
|
17464
|
+
/* harmony import */ var _sam_image_processing_sam_js__WEBPACK_IMPORTED_MODULE_28__ = __webpack_require__(/*! ./sam/image_processing_sam.js */ "./src/models/sam/image_processing_sam.js");
|
|
17465
|
+
/* harmony import */ var _segformer_image_processing_segformer_js__WEBPACK_IMPORTED_MODULE_29__ = __webpack_require__(/*! ./segformer/image_processing_segformer.js */ "./src/models/segformer/image_processing_segformer.js");
|
|
17466
|
+
/* harmony import */ var _siglip_image_processing_siglip_js__WEBPACK_IMPORTED_MODULE_30__ = __webpack_require__(/*! ./siglip/image_processing_siglip.js */ "./src/models/siglip/image_processing_siglip.js");
|
|
17467
|
+
/* harmony import */ var _swin2sr_image_processing_swin2sr_js__WEBPACK_IMPORTED_MODULE_31__ = __webpack_require__(/*! ./swin2sr/image_processing_swin2sr.js */ "./src/models/swin2sr/image_processing_swin2sr.js");
|
|
17468
|
+
/* harmony import */ var _vit_image_processing_vit_js__WEBPACK_IMPORTED_MODULE_32__ = __webpack_require__(/*! ./vit/image_processing_vit.js */ "./src/models/vit/image_processing_vit.js");
|
|
17469
|
+
/* harmony import */ var _vitmatte_image_processing_vitmatte_js__WEBPACK_IMPORTED_MODULE_33__ = __webpack_require__(/*! ./vitmatte/image_processing_vitmatte.js */ "./src/models/vitmatte/image_processing_vitmatte.js");
|
|
17470
|
+
/* harmony import */ var _vitpose_image_processing_vitpose_js__WEBPACK_IMPORTED_MODULE_34__ = __webpack_require__(/*! ./vitpose/image_processing_vitpose.js */ "./src/models/vitpose/image_processing_vitpose.js");
|
|
17471
|
+
/* harmony import */ var _yolos_image_processing_yolos_js__WEBPACK_IMPORTED_MODULE_35__ = __webpack_require__(/*! ./yolos/image_processing_yolos.js */ "./src/models/yolos/image_processing_yolos.js");
|
|
17472
|
+
|
|
16925
17473
|
|
|
16926
17474
|
|
|
16927
17475
|
|
|
@@ -17688,6 +18236,106 @@ class OwlViTProcessor extends _base_processing_utils_js__WEBPACK_IMPORTED_MODULE
|
|
|
17688
18236
|
}
|
|
17689
18237
|
|
|
17690
18238
|
|
|
18239
|
+
/***/ }),
|
|
18240
|
+
|
|
18241
|
+
/***/ "./src/models/paligemma/processing_paligemma.js":
|
|
18242
|
+
/*!******************************************************!*\
|
|
18243
|
+
!*** ./src/models/paligemma/processing_paligemma.js ***!
|
|
18244
|
+
\******************************************************/
|
|
18245
|
+
/***/ ((__unused_webpack___webpack_module__, __webpack_exports__, __webpack_require__) => {
|
|
18246
|
+
|
|
18247
|
+
"use strict";
|
|
18248
|
+
__webpack_require__.r(__webpack_exports__);
|
|
18249
|
+
/* harmony export */ __webpack_require__.d(__webpack_exports__, {
|
|
18250
|
+
/* harmony export */ PaliGemmaProcessor: () => (/* binding */ PaliGemmaProcessor)
|
|
18251
|
+
/* harmony export */ });
|
|
18252
|
+
/* harmony import */ var _base_processing_utils_js__WEBPACK_IMPORTED_MODULE_0__ = __webpack_require__(/*! ../../base/processing_utils.js */ "./src/base/processing_utils.js");
|
|
18253
|
+
/* harmony import */ var _auto_image_processing_auto_js__WEBPACK_IMPORTED_MODULE_1__ = __webpack_require__(/*! ../auto/image_processing_auto.js */ "./src/models/auto/image_processing_auto.js");
|
|
18254
|
+
/* harmony import */ var _tokenizers_js__WEBPACK_IMPORTED_MODULE_2__ = __webpack_require__(/*! ../../tokenizers.js */ "./src/tokenizers.js");
|
|
18255
|
+
|
|
18256
|
+
|
|
18257
|
+
|
|
18258
|
+
|
|
18259
|
+
const IMAGE_TOKEN = "<image>";
|
|
18260
|
+
|
|
18261
|
+
function build_string_from_input(
|
|
18262
|
+
prompt,
|
|
18263
|
+
bos_token,
|
|
18264
|
+
image_seq_len,
|
|
18265
|
+
image_token,
|
|
18266
|
+
num_images,
|
|
18267
|
+
) {
|
|
18268
|
+
return `${image_token.repeat(image_seq_len * num_images)}${bos_token}${prompt}\n`
|
|
18269
|
+
}
|
|
18270
|
+
|
|
18271
|
+
class PaliGemmaProcessor extends _base_processing_utils_js__WEBPACK_IMPORTED_MODULE_0__.Processor {
|
|
18272
|
+
static tokenizer_class = _tokenizers_js__WEBPACK_IMPORTED_MODULE_2__.AutoTokenizer
|
|
18273
|
+
static image_processor_class = _auto_image_processing_auto_js__WEBPACK_IMPORTED_MODULE_1__.AutoImageProcessor
|
|
18274
|
+
static uses_processor_config = false;
|
|
18275
|
+
|
|
18276
|
+
/**
|
|
18277
|
+
* @typedef {import('../../utils/image.js').RawImage} RawImage
|
|
18278
|
+
*/
|
|
18279
|
+
|
|
18280
|
+
// `images` is required, `text` is optional
|
|
18281
|
+
async _call(/** @type {RawImage|RawImage[]} */ images, text = null, kwargs = {}) {
|
|
18282
|
+
if (!text) {
|
|
18283
|
+
console.warn(
|
|
18284
|
+
"You are using PaliGemma without a text prefix. It will perform as a picture-captioning model."
|
|
18285
|
+
)
|
|
18286
|
+
text = ""
|
|
18287
|
+
}
|
|
18288
|
+
|
|
18289
|
+
if (!Array.isArray(images)) {
|
|
18290
|
+
images = [images]
|
|
18291
|
+
}
|
|
18292
|
+
|
|
18293
|
+
if (!Array.isArray(text)) {
|
|
18294
|
+
text = [text]
|
|
18295
|
+
}
|
|
18296
|
+
|
|
18297
|
+
const bos_token = this.tokenizer.bos_token;
|
|
18298
|
+
const image_seq_length = this.image_processor.config.image_seq_length;
|
|
18299
|
+
let input_strings;
|
|
18300
|
+
if (text.some((t) => t.includes(IMAGE_TOKEN))) {
|
|
18301
|
+
input_strings = text.map(
|
|
18302
|
+
sample => {
|
|
18303
|
+
const expanded_sample = sample.replaceAll(IMAGE_TOKEN, IMAGE_TOKEN.repeat(image_seq_length));
|
|
18304
|
+
const bos_rfind_index = expanded_sample.lastIndexOf(IMAGE_TOKEN);
|
|
18305
|
+
const bos_index = bos_rfind_index === -1 ? 0 : bos_rfind_index + IMAGE_TOKEN.length;
|
|
18306
|
+
return expanded_sample.slice(0, bos_index) + bos_token + expanded_sample.slice(bos_index) + "\n";
|
|
18307
|
+
}
|
|
18308
|
+
)
|
|
18309
|
+
} else {
|
|
18310
|
+
console.warn(
|
|
18311
|
+
"You are passing both `text` and `images` to `PaliGemmaProcessor`. The processor expects special " +
|
|
18312
|
+
"image tokens in the text, as many tokens as there are images per each text. It is recommended to " +
|
|
18313
|
+
"add `<image>` tokens in the very beginning of your text. For this call, we will infer how many images " +
|
|
18314
|
+
"each text has and add special tokens."
|
|
18315
|
+
)
|
|
18316
|
+
|
|
18317
|
+
input_strings = text.map(
|
|
18318
|
+
sample => build_string_from_input(
|
|
18319
|
+
sample,
|
|
18320
|
+
bos_token,
|
|
18321
|
+
image_seq_length,
|
|
18322
|
+
IMAGE_TOKEN,
|
|
18323
|
+
images.length,
|
|
18324
|
+
)
|
|
18325
|
+
)
|
|
18326
|
+
}
|
|
18327
|
+
|
|
18328
|
+
const text_inputs = this.tokenizer(input_strings, kwargs);
|
|
18329
|
+
const image_inputs = await this.image_processor(images, kwargs);
|
|
18330
|
+
|
|
18331
|
+
return {
|
|
18332
|
+
...image_inputs,
|
|
18333
|
+
...text_inputs,
|
|
18334
|
+
}
|
|
18335
|
+
}
|
|
18336
|
+
}
|
|
18337
|
+
|
|
18338
|
+
|
|
17691
18339
|
/***/ }),
|
|
17692
18340
|
|
|
17693
18341
|
/***/ "./src/models/processors.js":
|
|
@@ -17700,28 +18348,34 @@ class OwlViTProcessor extends _base_processing_utils_js__WEBPACK_IMPORTED_MODULE
|
|
|
17700
18348
|
__webpack_require__.r(__webpack_exports__);
|
|
17701
18349
|
/* harmony export */ __webpack_require__.d(__webpack_exports__, {
|
|
17702
18350
|
/* harmony export */ Florence2Processor: () => (/* reexport safe */ _florence2_processing_florence2_js__WEBPACK_IMPORTED_MODULE_0__.Florence2Processor),
|
|
17703
|
-
/* harmony export */
|
|
18351
|
+
/* harmony export */ Idefics3Processor: () => (/* reexport safe */ _idefics3_processing_idefics3_js__WEBPACK_IMPORTED_MODULE_2__.Idefics3Processor),
|
|
18352
|
+
/* harmony export */ JinaCLIPProcessor: () => (/* reexport safe */ _jina_clip_processing_jina_clip_js__WEBPACK_IMPORTED_MODULE_4__.JinaCLIPProcessor),
|
|
17704
18353
|
/* harmony export */ MgpstrProcessor: () => (/* reexport safe */ _mgp_str_processing_mgp_str_js__WEBPACK_IMPORTED_MODULE_1__.MgpstrProcessor),
|
|
17705
|
-
/* harmony export */ OwlViTProcessor: () => (/* reexport safe */
|
|
17706
|
-
/* harmony export */
|
|
17707
|
-
/* harmony export */
|
|
17708
|
-
/* harmony export */
|
|
17709
|
-
/* harmony export */
|
|
17710
|
-
/* harmony export */
|
|
17711
|
-
/* harmony export */
|
|
17712
|
-
/* harmony export */
|
|
18354
|
+
/* harmony export */ OwlViTProcessor: () => (/* reexport safe */ _owlvit_processing_owlvit_js__WEBPACK_IMPORTED_MODULE_5__.OwlViTProcessor),
|
|
18355
|
+
/* harmony export */ PaliGemmaProcessor: () => (/* reexport safe */ _paligemma_processing_paligemma_js__WEBPACK_IMPORTED_MODULE_6__.PaliGemmaProcessor),
|
|
18356
|
+
/* harmony export */ PyAnnoteProcessor: () => (/* reexport safe */ _pyannote_processing_pyannote_js__WEBPACK_IMPORTED_MODULE_7__.PyAnnoteProcessor),
|
|
18357
|
+
/* harmony export */ Qwen2VLProcessor: () => (/* reexport safe */ _qwen2_vl_processing_qwen2_vl_js__WEBPACK_IMPORTED_MODULE_8__.Qwen2VLProcessor),
|
|
18358
|
+
/* harmony export */ SamProcessor: () => (/* reexport safe */ _sam_processing_sam_js__WEBPACK_IMPORTED_MODULE_9__.SamProcessor),
|
|
18359
|
+
/* harmony export */ SpeechT5Processor: () => (/* reexport safe */ _speecht5_processing_speecht5_js__WEBPACK_IMPORTED_MODULE_10__.SpeechT5Processor),
|
|
18360
|
+
/* harmony export */ VLChatProcessor: () => (/* reexport safe */ _janus_processing_janus_js__WEBPACK_IMPORTED_MODULE_3__.VLChatProcessor),
|
|
18361
|
+
/* harmony export */ Wav2Vec2ProcessorWithLM: () => (/* reexport safe */ _wav2vec2_processing_wav2vec2_js__WEBPACK_IMPORTED_MODULE_11__.Wav2Vec2ProcessorWithLM),
|
|
18362
|
+
/* harmony export */ WhisperProcessor: () => (/* reexport safe */ _whisper_processing_whisper_js__WEBPACK_IMPORTED_MODULE_12__.WhisperProcessor)
|
|
17713
18363
|
/* harmony export */ });
|
|
17714
18364
|
/* harmony import */ var _florence2_processing_florence2_js__WEBPACK_IMPORTED_MODULE_0__ = __webpack_require__(/*! ./florence2/processing_florence2.js */ "./src/models/florence2/processing_florence2.js");
|
|
17715
18365
|
/* harmony import */ var _mgp_str_processing_mgp_str_js__WEBPACK_IMPORTED_MODULE_1__ = __webpack_require__(/*! ./mgp_str/processing_mgp_str.js */ "./src/models/mgp_str/processing_mgp_str.js");
|
|
17716
|
-
/* harmony import */ var
|
|
17717
|
-
/* harmony import */ var
|
|
17718
|
-
/* harmony import */ var
|
|
17719
|
-
/* harmony import */ var
|
|
17720
|
-
/* harmony import */ var
|
|
17721
|
-
/* harmony import */ var
|
|
17722
|
-
/* harmony import */ var
|
|
17723
|
-
/* harmony import */ var
|
|
17724
|
-
/* harmony import */ var
|
|
18366
|
+
/* harmony import */ var _idefics3_processing_idefics3_js__WEBPACK_IMPORTED_MODULE_2__ = __webpack_require__(/*! ./idefics3/processing_idefics3.js */ "./src/models/idefics3/processing_idefics3.js");
|
|
18367
|
+
/* harmony import */ var _janus_processing_janus_js__WEBPACK_IMPORTED_MODULE_3__ = __webpack_require__(/*! ./janus/processing_janus.js */ "./src/models/janus/processing_janus.js");
|
|
18368
|
+
/* harmony import */ var _jina_clip_processing_jina_clip_js__WEBPACK_IMPORTED_MODULE_4__ = __webpack_require__(/*! ./jina_clip/processing_jina_clip.js */ "./src/models/jina_clip/processing_jina_clip.js");
|
|
18369
|
+
/* harmony import */ var _owlvit_processing_owlvit_js__WEBPACK_IMPORTED_MODULE_5__ = __webpack_require__(/*! ./owlvit/processing_owlvit.js */ "./src/models/owlvit/processing_owlvit.js");
|
|
18370
|
+
/* harmony import */ var _paligemma_processing_paligemma_js__WEBPACK_IMPORTED_MODULE_6__ = __webpack_require__(/*! ./paligemma/processing_paligemma.js */ "./src/models/paligemma/processing_paligemma.js");
|
|
18371
|
+
/* harmony import */ var _pyannote_processing_pyannote_js__WEBPACK_IMPORTED_MODULE_7__ = __webpack_require__(/*! ./pyannote/processing_pyannote.js */ "./src/models/pyannote/processing_pyannote.js");
|
|
18372
|
+
/* harmony import */ var _qwen2_vl_processing_qwen2_vl_js__WEBPACK_IMPORTED_MODULE_8__ = __webpack_require__(/*! ./qwen2_vl/processing_qwen2_vl.js */ "./src/models/qwen2_vl/processing_qwen2_vl.js");
|
|
18373
|
+
/* harmony import */ var _sam_processing_sam_js__WEBPACK_IMPORTED_MODULE_9__ = __webpack_require__(/*! ./sam/processing_sam.js */ "./src/models/sam/processing_sam.js");
|
|
18374
|
+
/* harmony import */ var _speecht5_processing_speecht5_js__WEBPACK_IMPORTED_MODULE_10__ = __webpack_require__(/*! ./speecht5/processing_speecht5.js */ "./src/models/speecht5/processing_speecht5.js");
|
|
18375
|
+
/* harmony import */ var _wav2vec2_processing_wav2vec2_js__WEBPACK_IMPORTED_MODULE_11__ = __webpack_require__(/*! ./wav2vec2/processing_wav2vec2.js */ "./src/models/wav2vec2/processing_wav2vec2.js");
|
|
18376
|
+
/* harmony import */ var _whisper_processing_whisper_js__WEBPACK_IMPORTED_MODULE_12__ = __webpack_require__(/*! ./whisper/processing_whisper.js */ "./src/models/whisper/processing_whisper.js");
|
|
18377
|
+
|
|
18378
|
+
|
|
17725
18379
|
|
|
17726
18380
|
|
|
17727
18381
|
|
|
@@ -25638,6 +26292,12 @@ class PreTrainedTokenizer extends _utils_generic_js__WEBPACK_IMPORTED_MODULE_0__
|
|
|
25638
26292
|
this.unk_token = this.getToken('unk_token');
|
|
25639
26293
|
this.unk_token_id = this.model.tokens_to_ids.get(this.unk_token);
|
|
25640
26294
|
|
|
26295
|
+
this.bos_token = this.getToken('bos_token');
|
|
26296
|
+
this.bos_token_id = this.model.tokens_to_ids.get(this.bos_token);
|
|
26297
|
+
|
|
26298
|
+
this.eos_token = this.getToken('eos_token');
|
|
26299
|
+
this.eos_token_id = this.model.tokens_to_ids.get(this.eos_token);
|
|
26300
|
+
|
|
25641
26301
|
this.model_max_length = tokenizerConfig.model_max_length;
|
|
25642
26302
|
|
|
25643
26303
|
/** @type {boolean} Whether or not to strip the text when tokenizing (removing excess spaces before and after the string). */
|
|
@@ -26610,6 +27270,11 @@ class WhisperTokenizer extends PreTrainedTokenizer {
|
|
|
26610
27270
|
let chunk = new_chunk();
|
|
26611
27271
|
let time_offset = 0.0;
|
|
26612
27272
|
const timestamp_begin = this.timestamp_begin;
|
|
27273
|
+
// Whisper timestamp tokens start from 0.00 and go to timestamp 30.00 in 0.02 increments.
|
|
27274
|
+
// We can calculate the last time stamp token as timestamp_begin plus the number of tokens
|
|
27275
|
+
// tokens from 0.00 to 30.00 which is 1500.
|
|
27276
|
+
const total_timestamp_tokens = 1500; // (30.00 - 0.00) / 0.02
|
|
27277
|
+
const timestamp_end = timestamp_begin + total_timestamp_tokens;
|
|
26613
27278
|
|
|
26614
27279
|
let previous_tokens = [];
|
|
26615
27280
|
let previous_token_timestamps = [];
|
|
@@ -26697,7 +27362,7 @@ class WhisperTokenizer extends PreTrainedTokenizer {
|
|
|
26697
27362
|
} else {
|
|
26698
27363
|
// 2/ This is a regular special token, ignoring it
|
|
26699
27364
|
}
|
|
26700
|
-
} else if (token >= timestamp_begin) {
|
|
27365
|
+
} else if (token >= timestamp_begin && token <= timestamp_end) {
|
|
26701
27366
|
// 3/ Timestamp token
|
|
26702
27367
|
const time = (token - timestamp_begin) * time_precision + time_offset;
|
|
26703
27368
|
const rounded_time = (0,_utils_maths_js__WEBPACK_IMPORTED_MODULE_3__.round)(time, 2);
|
|
@@ -28165,6 +28830,7 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
28165
28830
|
/* harmony export */ __webpack_require__.d(__webpack_exports__, {
|
|
28166
28831
|
/* harmony export */ calculateDimensions: () => (/* binding */ calculateDimensions),
|
|
28167
28832
|
/* harmony export */ calculateReflectOffset: () => (/* binding */ calculateReflectOffset),
|
|
28833
|
+
/* harmony export */ count: () => (/* binding */ count),
|
|
28168
28834
|
/* harmony export */ dispatchCallback: () => (/* binding */ dispatchCallback),
|
|
28169
28835
|
/* harmony export */ escapeRegExp: () => (/* binding */ escapeRegExp),
|
|
28170
28836
|
/* harmony export */ isIntegralNumber: () => (/* binding */ isIntegralNumber),
|
|
@@ -28188,15 +28854,45 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
28188
28854
|
*/
|
|
28189
28855
|
|
|
28190
28856
|
/**
|
|
28191
|
-
* @typedef {Object}
|
|
28192
|
-
* @property {'initiate'
|
|
28193
|
-
* @property {string} name
|
|
28194
|
-
*
|
|
28195
|
-
|
|
28196
|
-
|
|
28197
|
-
|
|
28198
|
-
* @
|
|
28199
|
-
* @property {
|
|
28857
|
+
* @typedef {Object} InitiateProgressInfo
|
|
28858
|
+
* @property {'initiate'} status
|
|
28859
|
+
* @property {string} name The model id or directory path.
|
|
28860
|
+
* @property {string} file The name of the file.
|
|
28861
|
+
*/
|
|
28862
|
+
|
|
28863
|
+
/**
|
|
28864
|
+
* @typedef {Object} DownloadProgressInfo
|
|
28865
|
+
* @property {'download'} status
|
|
28866
|
+
* @property {string} name The model id or directory path.
|
|
28867
|
+
* @property {string} file The name of the file.
|
|
28868
|
+
*/
|
|
28869
|
+
|
|
28870
|
+
/**
|
|
28871
|
+
* @typedef {Object} ProgressStatusInfo
|
|
28872
|
+
* @property {'progress'} status
|
|
28873
|
+
* @property {string} name The model id or directory path.
|
|
28874
|
+
* @property {string} file The name of the file.
|
|
28875
|
+
* @property {number} progress A number between 0 and 100.
|
|
28876
|
+
* @property {number} loaded The number of bytes loaded.
|
|
28877
|
+
* @property {number} total The total number of bytes to be loaded.
|
|
28878
|
+
*/
|
|
28879
|
+
|
|
28880
|
+
/**
|
|
28881
|
+
* @typedef {Object} DoneProgressInfo
|
|
28882
|
+
* @property {'done'} status
|
|
28883
|
+
* @property {string} name The model id or directory path.
|
|
28884
|
+
* @property {string} file The name of the file.
|
|
28885
|
+
*/
|
|
28886
|
+
|
|
28887
|
+
/**
|
|
28888
|
+
* @typedef {Object} ReadyProgressInfo
|
|
28889
|
+
* @property {'ready'} status
|
|
28890
|
+
* @property {string} task The loaded task.
|
|
28891
|
+
* @property {string} model The loaded model.
|
|
28892
|
+
*/
|
|
28893
|
+
|
|
28894
|
+
/**
|
|
28895
|
+
* @typedef {InitiateProgressInfo | DownloadProgressInfo | ProgressStatusInfo | DoneProgressInfo | ReadyProgressInfo} ProgressInfo
|
|
28200
28896
|
*/
|
|
28201
28897
|
|
|
28202
28898
|
/**
|
|
@@ -28367,6 +29063,20 @@ function len(s) {
|
|
|
28367
29063
|
return length;
|
|
28368
29064
|
}
|
|
28369
29065
|
|
|
29066
|
+
/**
|
|
29067
|
+
* Count the occurrences of a value in an array or string.
|
|
29068
|
+
* This mimics the behavior of Python's `count` method.
|
|
29069
|
+
* @param {any[]|string} arr The array or string to search.
|
|
29070
|
+
* @param {any} value The value to count.
|
|
29071
|
+
*/
|
|
29072
|
+
function count(arr, value) {
|
|
29073
|
+
let count = 0;
|
|
29074
|
+
for (const v of arr) {
|
|
29075
|
+
if (v === value) ++count;
|
|
29076
|
+
}
|
|
29077
|
+
return count;
|
|
29078
|
+
}
|
|
29079
|
+
|
|
28370
29080
|
|
|
28371
29081
|
/***/ }),
|
|
28372
29082
|
|
|
@@ -28920,6 +29630,7 @@ const isWebGpuFp16Supported = (function () {
|
|
|
28920
29630
|
})();
|
|
28921
29631
|
|
|
28922
29632
|
const DATA_TYPES = Object.freeze({
|
|
29633
|
+
auto: 'auto', // Auto-detect based on environment
|
|
28923
29634
|
fp32: 'fp32',
|
|
28924
29635
|
fp16: 'fp16',
|
|
28925
29636
|
q8: 'q8',
|
|
@@ -28936,7 +29647,7 @@ const DEFAULT_DEVICE_DTYPE_MAPPING = Object.freeze({
|
|
|
28936
29647
|
[_devices_js__WEBPACK_IMPORTED_MODULE_1__.DEVICE_TYPES.wasm]: DATA_TYPES.q8,
|
|
28937
29648
|
});
|
|
28938
29649
|
|
|
28939
|
-
/** @type {Record<DataType, string>} */
|
|
29650
|
+
/** @type {Record<Exclude<DataType, "auto">, string>} */
|
|
28940
29651
|
const DEFAULT_DTYPE_SUFFIX_MAPPING = Object.freeze({
|
|
28941
29652
|
[DATA_TYPES.fp32]: '',
|
|
28942
29653
|
[DATA_TYPES.fp16]: '_fp16',
|
|
@@ -29524,13 +30235,6 @@ async function getModelFile(path_or_repo_id, filename, fatal = true, options = {
|
|
|
29524
30235
|
file: filename
|
|
29525
30236
|
})
|
|
29526
30237
|
|
|
29527
|
-
/** @type {import('./core.js').ProgressInfo} */
|
|
29528
|
-
const progressInfo = {
|
|
29529
|
-
status: 'progress',
|
|
29530
|
-
name: path_or_repo_id,
|
|
29531
|
-
file: filename
|
|
29532
|
-
}
|
|
29533
|
-
|
|
29534
30238
|
/** @type {Uint8Array} */
|
|
29535
30239
|
let buffer;
|
|
29536
30240
|
|
|
@@ -29550,7 +30254,9 @@ async function getModelFile(path_or_repo_id, filename, fatal = true, options = {
|
|
|
29550
30254
|
|
|
29551
30255
|
// For completeness, we still fire the final progress callback
|
|
29552
30256
|
(0,_core_js__WEBPACK_IMPORTED_MODULE_3__.dispatchCallback)(options.progress_callback, {
|
|
29553
|
-
|
|
30257
|
+
status: 'progress',
|
|
30258
|
+
name: path_or_repo_id,
|
|
30259
|
+
file: filename,
|
|
29554
30260
|
progress: 100,
|
|
29555
30261
|
loaded: buffer.length,
|
|
29556
30262
|
total: buffer.length,
|
|
@@ -29558,7 +30264,9 @@ async function getModelFile(path_or_repo_id, filename, fatal = true, options = {
|
|
|
29558
30264
|
} else {
|
|
29559
30265
|
buffer = await readResponse(response, data => {
|
|
29560
30266
|
(0,_core_js__WEBPACK_IMPORTED_MODULE_3__.dispatchCallback)(options.progress_callback, {
|
|
29561
|
-
|
|
30267
|
+
status: 'progress',
|
|
30268
|
+
name: path_or_repo_id,
|
|
30269
|
+
file: filename,
|
|
29562
30270
|
...data,
|
|
29563
30271
|
})
|
|
29564
30272
|
})
|
|
@@ -29615,12 +30323,11 @@ async function getModelJSON(modelPath, fileName, fatal = true, options = {}) {
|
|
|
29615
30323
|
|
|
29616
30324
|
return JSON.parse(jsonData);
|
|
29617
30325
|
}
|
|
29618
|
-
|
|
29619
30326
|
/**
|
|
29620
30327
|
* Read and track progress when reading a Response object
|
|
29621
30328
|
*
|
|
29622
|
-
* @param {
|
|
29623
|
-
* @param {
|
|
30329
|
+
* @param {Response|FileResponse} response The Response object to read
|
|
30330
|
+
* @param {(data: {progress: number, loaded: number, total: number}) => void} progress_callback The function to call with progress updates
|
|
29624
30331
|
* @returns {Promise<Uint8Array>} A Promise that resolves with the Uint8Array buffer
|
|
29625
30332
|
*/
|
|
29626
30333
|
async function readResponse(response, progress_callback) {
|
|
@@ -29704,7 +30411,8 @@ function pathJoin(...parts) {
|
|
|
29704
30411
|
"use strict";
|
|
29705
30412
|
__webpack_require__.r(__webpack_exports__);
|
|
29706
30413
|
/* harmony export */ __webpack_require__.d(__webpack_exports__, {
|
|
29707
|
-
/* harmony export */ RawImage: () => (/* binding */ RawImage)
|
|
30414
|
+
/* harmony export */ RawImage: () => (/* binding */ RawImage),
|
|
30415
|
+
/* harmony export */ load_image: () => (/* binding */ load_image)
|
|
29708
30416
|
/* harmony export */ });
|
|
29709
30417
|
/* harmony import */ var _core_js__WEBPACK_IMPORTED_MODULE_0__ = __webpack_require__(/*! ./core.js */ "./src/utils/core.js");
|
|
29710
30418
|
/* harmony import */ var _hub_js__WEBPACK_IMPORTED_MODULE_1__ = __webpack_require__(/*! ./hub.js */ "./src/utils/hub.js");
|
|
@@ -29729,13 +30437,11 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
29729
30437
|
// Will be empty (or not used) if running in browser or web-worker
|
|
29730
30438
|
|
|
29731
30439
|
|
|
29732
|
-
const BROWSER_ENV = typeof self !== 'undefined';
|
|
29733
|
-
const WEBWORKER_ENV = BROWSER_ENV && self.constructor.name === 'DedicatedWorkerGlobalScope';
|
|
29734
|
-
|
|
29735
30440
|
let createCanvasFunction;
|
|
29736
30441
|
let ImageDataClass;
|
|
29737
30442
|
let loadImageFunction;
|
|
29738
|
-
|
|
30443
|
+
const IS_BROWSER_OR_WEBWORKER = _env_js__WEBPACK_IMPORTED_MODULE_2__.apis.IS_BROWSER_ENV || _env_js__WEBPACK_IMPORTED_MODULE_2__.apis.IS_WEBWORKER_ENV;
|
|
30444
|
+
if (IS_BROWSER_OR_WEBWORKER) {
|
|
29739
30445
|
// Running in browser or web-worker
|
|
29740
30446
|
createCanvasFunction = (/** @type {number} */ width, /** @type {number} */ height) => {
|
|
29741
30447
|
if (!self.OffscreenCanvas) {
|
|
@@ -29845,7 +30551,7 @@ class RawImage {
|
|
|
29845
30551
|
* @returns {RawImage} The image object.
|
|
29846
30552
|
*/
|
|
29847
30553
|
static fromCanvas(canvas) {
|
|
29848
|
-
if (!
|
|
30554
|
+
if (!IS_BROWSER_OR_WEBWORKER) {
|
|
29849
30555
|
throw new Error('fromCanvas() is only supported in browser environments.')
|
|
29850
30556
|
}
|
|
29851
30557
|
|
|
@@ -29874,7 +30580,7 @@ class RawImage {
|
|
|
29874
30580
|
* @returns {Promise<RawImage>} The image object.
|
|
29875
30581
|
*/
|
|
29876
30582
|
static async fromBlob(blob) {
|
|
29877
|
-
if (
|
|
30583
|
+
if (IS_BROWSER_OR_WEBWORKER) {
|
|
29878
30584
|
// Running in environment with canvas
|
|
29879
30585
|
const img = await loadImageFunction(blob);
|
|
29880
30586
|
|
|
@@ -30018,6 +30724,46 @@ class RawImage {
|
|
|
30018
30724
|
return this._update(newData, this.width, this.height, 4);
|
|
30019
30725
|
}
|
|
30020
30726
|
|
|
30727
|
+
/**
|
|
30728
|
+
* Apply an alpha mask to the image. Operates in place.
|
|
30729
|
+
* @param {RawImage} mask The mask to apply. It should have a single channel.
|
|
30730
|
+
* @returns {RawImage} The masked image.
|
|
30731
|
+
* @throws {Error} If the mask is not the same size as the image.
|
|
30732
|
+
* @throws {Error} If the image does not have 4 channels.
|
|
30733
|
+
* @throws {Error} If the mask is not a single channel.
|
|
30734
|
+
*/
|
|
30735
|
+
putAlpha(mask) {
|
|
30736
|
+
if (mask.width !== this.width || mask.height !== this.height) {
|
|
30737
|
+
throw new Error(`Expected mask size to be ${this.width}x${this.height}, but got ${mask.width}x${mask.height}`);
|
|
30738
|
+
}
|
|
30739
|
+
if (mask.channels !== 1) {
|
|
30740
|
+
throw new Error(`Expected mask to have 1 channel, but got ${mask.channels}`);
|
|
30741
|
+
}
|
|
30742
|
+
|
|
30743
|
+
const this_data = this.data;
|
|
30744
|
+
const mask_data = mask.data;
|
|
30745
|
+
const num_pixels = this.width * this.height;
|
|
30746
|
+
if (this.channels === 3) {
|
|
30747
|
+
// Convert to RGBA and simultaneously apply mask to alpha channel
|
|
30748
|
+
const newData = new Uint8ClampedArray(num_pixels * 4);
|
|
30749
|
+
for (let i = 0, in_offset = 0, out_offset = 0; i < num_pixels; ++i) {
|
|
30750
|
+
newData[out_offset++] = this_data[in_offset++];
|
|
30751
|
+
newData[out_offset++] = this_data[in_offset++];
|
|
30752
|
+
newData[out_offset++] = this_data[in_offset++];
|
|
30753
|
+
newData[out_offset++] = mask_data[i];
|
|
30754
|
+
}
|
|
30755
|
+
return this._update(newData, this.width, this.height, 4);
|
|
30756
|
+
|
|
30757
|
+
} else if (this.channels === 4) {
|
|
30758
|
+
// Apply mask to alpha channel in place
|
|
30759
|
+
for (let i = 0; i < num_pixels; ++i) {
|
|
30760
|
+
this_data[4 * i + 3] = mask_data[i];
|
|
30761
|
+
}
|
|
30762
|
+
return this;
|
|
30763
|
+
}
|
|
30764
|
+
throw new Error(`Expected image to have 3 or 4 channels, but got ${this.channels}`);
|
|
30765
|
+
}
|
|
30766
|
+
|
|
30021
30767
|
/**
|
|
30022
30768
|
* Resize the image to the given dimensions. This method uses the canvas API to perform the resizing.
|
|
30023
30769
|
* @param {number} width The width of the new image. `null` or `-1` will preserve the aspect ratio.
|
|
@@ -30052,7 +30798,7 @@ class RawImage {
|
|
|
30052
30798
|
height = (width / this.width) * this.height;
|
|
30053
30799
|
}
|
|
30054
30800
|
|
|
30055
|
-
if (
|
|
30801
|
+
if (IS_BROWSER_OR_WEBWORKER) {
|
|
30056
30802
|
// TODO use `resample` in browser environment
|
|
30057
30803
|
|
|
30058
30804
|
// Store number of channels before resizing
|
|
@@ -30125,7 +30871,7 @@ class RawImage {
|
|
|
30125
30871
|
return this;
|
|
30126
30872
|
}
|
|
30127
30873
|
|
|
30128
|
-
if (
|
|
30874
|
+
if (IS_BROWSER_OR_WEBWORKER) {
|
|
30129
30875
|
// Store number of channels before padding
|
|
30130
30876
|
const numChannels = this.channels;
|
|
30131
30877
|
|
|
@@ -30174,7 +30920,7 @@ class RawImage {
|
|
|
30174
30920
|
const crop_width = x_max - x_min + 1;
|
|
30175
30921
|
const crop_height = y_max - y_min + 1;
|
|
30176
30922
|
|
|
30177
|
-
if (
|
|
30923
|
+
if (IS_BROWSER_OR_WEBWORKER) {
|
|
30178
30924
|
// Store number of channels before resizing
|
|
30179
30925
|
const numChannels = this.channels;
|
|
30180
30926
|
|
|
@@ -30222,7 +30968,7 @@ class RawImage {
|
|
|
30222
30968
|
const height_offset = (this.height - crop_height) / 2;
|
|
30223
30969
|
|
|
30224
30970
|
|
|
30225
|
-
if (
|
|
30971
|
+
if (IS_BROWSER_OR_WEBWORKER) {
|
|
30226
30972
|
// Store number of channels before resizing
|
|
30227
30973
|
const numChannels = this.channels;
|
|
30228
30974
|
|
|
@@ -30327,7 +31073,7 @@ class RawImage {
|
|
|
30327
31073
|
}
|
|
30328
31074
|
|
|
30329
31075
|
async toBlob(type = 'image/png', quality = 1) {
|
|
30330
|
-
if (!
|
|
31076
|
+
if (!IS_BROWSER_OR_WEBWORKER) {
|
|
30331
31077
|
throw new Error('toBlob() is only supported in browser environments.')
|
|
30332
31078
|
}
|
|
30333
31079
|
|
|
@@ -30353,7 +31099,7 @@ class RawImage {
|
|
|
30353
31099
|
}
|
|
30354
31100
|
|
|
30355
31101
|
toCanvas() {
|
|
30356
|
-
if (!
|
|
31102
|
+
if (!IS_BROWSER_OR_WEBWORKER) {
|
|
30357
31103
|
throw new Error('toCanvas() is only supported in browser environments.')
|
|
30358
31104
|
}
|
|
30359
31105
|
|
|
@@ -30457,8 +31203,8 @@ class RawImage {
|
|
|
30457
31203
|
*/
|
|
30458
31204
|
async save(path) {
|
|
30459
31205
|
|
|
30460
|
-
if (
|
|
30461
|
-
if (
|
|
31206
|
+
if (IS_BROWSER_OR_WEBWORKER) {
|
|
31207
|
+
if (_env_js__WEBPACK_IMPORTED_MODULE_2__.apis.IS_WEBWORKER_ENV) {
|
|
30462
31208
|
throw new Error('Unable to save an image from a Web Worker.')
|
|
30463
31209
|
}
|
|
30464
31210
|
|
|
@@ -30494,7 +31240,7 @@ class RawImage {
|
|
|
30494
31240
|
}
|
|
30495
31241
|
|
|
30496
31242
|
toSharp() {
|
|
30497
|
-
if (
|
|
31243
|
+
if (IS_BROWSER_OR_WEBWORKER) {
|
|
30498
31244
|
throw new Error('toSharp() is only supported in server-side environments.')
|
|
30499
31245
|
}
|
|
30500
31246
|
|
|
@@ -30508,6 +31254,11 @@ class RawImage {
|
|
|
30508
31254
|
}
|
|
30509
31255
|
}
|
|
30510
31256
|
|
|
31257
|
+
/**
|
|
31258
|
+
* Helper function to load an image from a URL, path, etc.
|
|
31259
|
+
*/
|
|
31260
|
+
const load_image = RawImage.read.bind(RawImage);
|
|
31261
|
+
|
|
30511
31262
|
|
|
30512
31263
|
/***/ }),
|
|
30513
31264
|
|
|
@@ -31660,6 +32411,8 @@ const DataTypeMap = Object.freeze({
|
|
|
31660
32411
|
int64: BigInt64Array,
|
|
31661
32412
|
uint64: BigUint64Array,
|
|
31662
32413
|
bool: Uint8Array,
|
|
32414
|
+
uint4: Uint8Array,
|
|
32415
|
+
int4: Int8Array,
|
|
31663
32416
|
});
|
|
31664
32417
|
|
|
31665
32418
|
/**
|
|
@@ -32981,7 +33734,7 @@ function fullHelper(size, fill_value, dtype, cls) {
|
|
|
32981
33734
|
/**
|
|
32982
33735
|
* Creates a tensor of size size filled with fill_value. The tensor's dtype is inferred from fill_value.
|
|
32983
33736
|
* @param {number[]} size A sequence of integers defining the shape of the output tensor.
|
|
32984
|
-
* @param {number|bigint} fill_value The value to fill the output tensor with.
|
|
33737
|
+
* @param {number|bigint|boolean} fill_value The value to fill the output tensor with.
|
|
32985
33738
|
* @returns {Tensor} The filled tensor.
|
|
32986
33739
|
*/
|
|
32987
33740
|
function full(size, fill_value) {
|
|
@@ -32993,6 +33746,9 @@ function full(size, fill_value) {
|
|
|
32993
33746
|
} else if (typeof fill_value === 'bigint') {
|
|
32994
33747
|
dtype = 'int64';
|
|
32995
33748
|
typedArrayCls = BigInt64Array;
|
|
33749
|
+
} else if (typeof fill_value === 'boolean') {
|
|
33750
|
+
dtype = 'bool';
|
|
33751
|
+
typedArrayCls = Uint8Array;
|
|
32996
33752
|
} else {
|
|
32997
33753
|
// TODO: support other dtypes
|
|
32998
33754
|
throw new Error(`Unsupported data type: ${typeof fill_value}`);
|
|
@@ -33174,7 +33930,7 @@ function quantize_embeddings(tensor, precision) {
|
|
|
33174
33930
|
/******/
|
|
33175
33931
|
/************************************************************************/
|
|
33176
33932
|
var __webpack_exports__ = {};
|
|
33177
|
-
// This entry
|
|
33933
|
+
// This entry needs to be wrapped in an IIFE because it needs to be in strict mode.
|
|
33178
33934
|
(() => {
|
|
33179
33935
|
"use strict";
|
|
33180
33936
|
/*!*****************************!*\
|
|
@@ -33443,6 +34199,13 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
33443
34199
|
/* harmony export */ HubertForSequenceClassification: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.HubertForSequenceClassification),
|
|
33444
34200
|
/* harmony export */ HubertModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.HubertModel),
|
|
33445
34201
|
/* harmony export */ HubertPreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.HubertPreTrainedModel),
|
|
34202
|
+
/* harmony export */ IJepaForImageClassification: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.IJepaForImageClassification),
|
|
34203
|
+
/* harmony export */ IJepaModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.IJepaModel),
|
|
34204
|
+
/* harmony export */ IJepaPreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.IJepaPreTrainedModel),
|
|
34205
|
+
/* harmony export */ Idefics3ForConditionalGeneration: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.Idefics3ForConditionalGeneration),
|
|
34206
|
+
/* harmony export */ Idefics3ImageProcessor: () => (/* reexport safe */ _models_image_processors_js__WEBPACK_IMPORTED_MODULE_13__.Idefics3ImageProcessor),
|
|
34207
|
+
/* harmony export */ Idefics3PreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.Idefics3PreTrainedModel),
|
|
34208
|
+
/* harmony export */ Idefics3Processor: () => (/* reexport safe */ _models_processors_js__WEBPACK_IMPORTED_MODULE_16__.Idefics3Processor),
|
|
33446
34209
|
/* harmony export */ ImageClassificationPipeline: () => (/* reexport safe */ _pipelines_js__WEBPACK_IMPORTED_MODULE_1__.ImageClassificationPipeline),
|
|
33447
34210
|
/* harmony export */ ImageFeatureExtractionPipeline: () => (/* reexport safe */ _pipelines_js__WEBPACK_IMPORTED_MODULE_1__.ImageFeatureExtractionPipeline),
|
|
33448
34211
|
/* harmony export */ ImageFeatureExtractor: () => (/* reexport safe */ _models_feature_extractors_js__WEBPACK_IMPORTED_MODULE_10__.ImageFeatureExtractor),
|
|
@@ -33577,6 +34340,9 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
33577
34340
|
/* harmony export */ OPTModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.OPTModel),
|
|
33578
34341
|
/* harmony export */ OPTPreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.OPTPreTrainedModel),
|
|
33579
34342
|
/* harmony export */ ObjectDetectionPipeline: () => (/* reexport safe */ _pipelines_js__WEBPACK_IMPORTED_MODULE_1__.ObjectDetectionPipeline),
|
|
34343
|
+
/* harmony export */ Olmo2ForCausalLM: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.Olmo2ForCausalLM),
|
|
34344
|
+
/* harmony export */ Olmo2Model: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.Olmo2Model),
|
|
34345
|
+
/* harmony export */ Olmo2PreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.Olmo2PreTrainedModel),
|
|
33580
34346
|
/* harmony export */ OlmoForCausalLM: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.OlmoForCausalLM),
|
|
33581
34347
|
/* harmony export */ OlmoModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.OlmoModel),
|
|
33582
34348
|
/* harmony export */ OlmoPreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.OlmoPreTrainedModel),
|
|
@@ -33593,6 +34359,9 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
33593
34359
|
/* harmony export */ Owlv2ImageProcessor: () => (/* reexport safe */ _models_image_processors_js__WEBPACK_IMPORTED_MODULE_13__.Owlv2ImageProcessor),
|
|
33594
34360
|
/* harmony export */ Owlv2Model: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.Owlv2Model),
|
|
33595
34361
|
/* harmony export */ Owlv2PreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.Owlv2PreTrainedModel),
|
|
34362
|
+
/* harmony export */ PaliGemmaForConditionalGeneration: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.PaliGemmaForConditionalGeneration),
|
|
34363
|
+
/* harmony export */ PaliGemmaPreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.PaliGemmaPreTrainedModel),
|
|
34364
|
+
/* harmony export */ PaliGemmaProcessor: () => (/* reexport safe */ _models_processors_js__WEBPACK_IMPORTED_MODULE_16__.PaliGemmaProcessor),
|
|
33596
34365
|
/* harmony export */ PatchTSMixerForPrediction: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.PatchTSMixerForPrediction),
|
|
33597
34366
|
/* harmony export */ PatchTSMixerModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.PatchTSMixerModel),
|
|
33598
34367
|
/* harmony export */ PatchTSMixerPreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.PatchTSMixerPreTrainedModel),
|
|
@@ -33834,6 +34603,7 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
33834
34603
|
/* harmony export */ interpolate_data: () => (/* reexport safe */ _utils_maths_js__WEBPACK_IMPORTED_MODULE_8__.interpolate_data),
|
|
33835
34604
|
/* harmony export */ is_chinese_char: () => (/* reexport safe */ _tokenizers_js__WEBPACK_IMPORTED_MODULE_3__.is_chinese_char),
|
|
33836
34605
|
/* harmony export */ layer_norm: () => (/* reexport safe */ _utils_tensor_js__WEBPACK_IMPORTED_MODULE_7__.layer_norm),
|
|
34606
|
+
/* harmony export */ load_image: () => (/* reexport safe */ _utils_image_js__WEBPACK_IMPORTED_MODULE_6__.load_image),
|
|
33837
34607
|
/* harmony export */ log_softmax: () => (/* reexport safe */ _utils_maths_js__WEBPACK_IMPORTED_MODULE_8__.log_softmax),
|
|
33838
34608
|
/* harmony export */ magnitude: () => (/* reexport safe */ _utils_maths_js__WEBPACK_IMPORTED_MODULE_8__.magnitude),
|
|
33839
34609
|
/* harmony export */ matmul: () => (/* reexport safe */ _utils_tensor_js__WEBPACK_IMPORTED_MODULE_7__.matmul),
|
|
@@ -33927,7 +34697,7 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
33927
34697
|
})();
|
|
33928
34698
|
|
|
33929
34699
|
var __webpack_export_target__ = exports;
|
|
33930
|
-
for(var
|
|
34700
|
+
for(var __webpack_i__ in __webpack_exports__) __webpack_export_target__[__webpack_i__] = __webpack_exports__[__webpack_i__];
|
|
33931
34701
|
if(__webpack_exports__.__esModule) Object.defineProperty(__webpack_export_target__, "__esModule", { value: true });
|
|
33932
34702
|
/******/ })()
|
|
33933
34703
|
;
|