@huggingface/transformers 3.1.0 → 3.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -2
- package/dist/transformers.cjs +678 -153
- package/dist/transformers.cjs.map +1 -1
- package/dist/transformers.js +682 -154
- package/dist/transformers.js.map +1 -1
- package/dist/transformers.min.cjs +24 -18
- package/dist/transformers.min.cjs.map +1 -1
- package/dist/transformers.min.js +19 -13
- package/dist/transformers.min.js.map +1 -1
- package/dist/transformers.min.mjs +30 -24
- package/dist/transformers.min.mjs.map +1 -1
- package/dist/transformers.mjs +682 -154
- package/dist/transformers.mjs.map +1 -1
- package/package.json +1 -1
- package/src/configs.js +2 -1
- package/src/env.js +6 -6
- package/src/generation/configuration_utils.js +7 -0
- package/src/generation/logits_process.js +22 -16
- package/src/generation/streamers.js +7 -2
- package/src/models/idefics3/image_processing_idefics3.js +219 -0
- package/src/models/idefics3/processing_idefics3.js +136 -0
- package/src/models/image_processors.js +1 -0
- package/src/models/processors.js +1 -0
- package/src/models.js +112 -34
- package/src/utils/core.js +14 -0
- package/src/utils/dtypes.js +2 -1
- package/src/utils/image.js +19 -16
- package/src/utils/tensor.js +6 -1
- package/types/configs.d.ts +1 -1
- package/types/configs.d.ts.map +1 -1
- package/types/env.d.ts +1 -1
- package/types/env.d.ts.map +1 -1
- package/types/generation/configuration_utils.d.ts +6 -0
- package/types/generation/configuration_utils.d.ts.map +1 -1
- package/types/generation/logits_process.d.ts +30 -20
- package/types/generation/logits_process.d.ts.map +1 -1
- package/types/generation/streamers.d.ts +13 -8
- package/types/generation/streamers.d.ts.map +1 -1
- package/types/models/idefics3/image_processing_idefics3.d.ts +40 -0
- package/types/models/idefics3/image_processing_idefics3.d.ts.map +1 -0
- package/types/models/idefics3/processing_idefics3.d.ts +19 -0
- package/types/models/idefics3/processing_idefics3.d.ts.map +1 -0
- package/types/models/image_processors.d.ts +1 -0
- package/types/models/processors.d.ts +1 -0
- package/types/models.d.ts +16 -6
- package/types/models.d.ts.map +1 -1
- package/types/utils/core.d.ts +7 -0
- package/types/utils/core.d.ts.map +1 -1
- package/types/utils/dtypes.d.ts +3 -2
- package/types/utils/dtypes.d.ts.map +1 -1
- package/types/utils/image.d.ts +4 -0
- package/types/utils/image.d.ts.map +1 -1
- package/types/utils/tensor.d.ts +5 -3
- package/types/utils/tensor.d.ts.map +1 -1
package/dist/transformers.cjs
CHANGED
|
@@ -5562,6 +5562,7 @@ function getNormalizedConfig(config) {
|
|
|
5562
5562
|
case 'paligemma':
|
|
5563
5563
|
case 'florence2':
|
|
5564
5564
|
case 'llava_onevision':
|
|
5565
|
+
case 'idefics3':
|
|
5565
5566
|
init_normalized_config = getNormalizedConfig(config.text_config);
|
|
5566
5567
|
break;
|
|
5567
5568
|
case 'moondream1':
|
|
@@ -5875,7 +5876,7 @@ class AutoConfig {
|
|
|
5875
5876
|
* See https://onnxruntime.ai/docs/tutorials/web/env-flags-and-session-options.html#freedimensionoverrides
|
|
5876
5877
|
* for more information.
|
|
5877
5878
|
* @property {import('./utils/devices.js').DeviceType} [device] The default device to use for the model.
|
|
5878
|
-
* @property {import('./utils/dtypes.js').DataType} [dtype] The default data type to use for the model.
|
|
5879
|
+
* @property {import('./utils/dtypes.js').DataType|Record<string, import('./utils/dtypes.js').DataType>} [dtype] The default data type to use for the model.
|
|
5879
5880
|
* @property {boolean|Record<string, boolean>} [use_external_data_format=false] Whether to load the model using the external data format (used for models >= 2GB in size).
|
|
5880
5881
|
*/
|
|
5881
5882
|
|
|
@@ -5925,12 +5926,12 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
5925
5926
|
|
|
5926
5927
|
|
|
5927
5928
|
|
|
5928
|
-
const VERSION = '3.1.
|
|
5929
|
+
const VERSION = '3.1.1';
|
|
5929
5930
|
|
|
5930
5931
|
// Check if various APIs are available (depends on environment)
|
|
5931
|
-
const IS_BROWSER_ENV = typeof
|
|
5932
|
-
const IS_WEBWORKER_ENV =
|
|
5933
|
-
const IS_WEB_CACHE_AVAILABLE =
|
|
5932
|
+
const IS_BROWSER_ENV = typeof window !== "undefined" && typeof window.document !== "undefined";
|
|
5933
|
+
const IS_WEBWORKER_ENV = typeof self !== "undefined" && self.constructor?.name === 'DedicatedWorkerGlobalScope';
|
|
5934
|
+
const IS_WEB_CACHE_AVAILABLE = typeof self !== "undefined" && 'caches' in self;
|
|
5934
5935
|
const IS_WEBGPU_AVAILABLE = typeof navigator !== 'undefined' && 'gpu' in navigator;
|
|
5935
5936
|
const IS_WEBNN_AVAILABLE = typeof navigator !== 'undefined' && 'ml' in navigator;
|
|
5936
5937
|
|
|
@@ -5943,7 +5944,7 @@ const IS_PATH_AVAILABLE = !isEmpty(path__WEBPACK_IMPORTED_MODULE_1__);
|
|
|
5943
5944
|
* A read-only object containing information about the APIs available in the current environment.
|
|
5944
5945
|
*/
|
|
5945
5946
|
const apis = Object.freeze({
|
|
5946
|
-
/** Whether we are running in a browser environment */
|
|
5947
|
+
/** Whether we are running in a browser environment (and not a web worker) */
|
|
5947
5948
|
IS_BROWSER_ENV,
|
|
5948
5949
|
|
|
5949
5950
|
/** Whether we are running in a web worker environment */
|
|
@@ -6036,7 +6037,7 @@ const env = {
|
|
|
6036
6037
|
remoteHost: 'https://huggingface.co/',
|
|
6037
6038
|
remotePathTemplate: '{model}/resolve/{revision}/',
|
|
6038
6039
|
|
|
6039
|
-
allowLocalModels: !IS_BROWSER_ENV,
|
|
6040
|
+
allowLocalModels: !(IS_BROWSER_ENV || IS_WEBWORKER_ENV),
|
|
6040
6041
|
localModelPath: localModelPath,
|
|
6041
6042
|
useFS: IS_FS_AVAILABLE,
|
|
6042
6043
|
|
|
@@ -6337,6 +6338,13 @@ class GenerationConfig {
|
|
|
6337
6338
|
*/
|
|
6338
6339
|
suppress_tokens = null;
|
|
6339
6340
|
|
|
6341
|
+
/**
|
|
6342
|
+
* A streamer that will be used to stream the generation.
|
|
6343
|
+
* @type {import('./streamers.js').TextStreamer}
|
|
6344
|
+
* @default null
|
|
6345
|
+
*/
|
|
6346
|
+
streamer = null;
|
|
6347
|
+
|
|
6340
6348
|
/**
|
|
6341
6349
|
* A list of tokens that will be suppressed at the beginning of the generation.
|
|
6342
6350
|
* The `SuppressBeginTokens` logit processor will set their log probs to `-inf` so that they are not sampled.
|
|
@@ -6643,7 +6651,7 @@ class ForcedBOSTokenLogitsProcessor extends LogitsProcessor {
|
|
|
6643
6651
|
* Apply the BOS token forcing to the logits.
|
|
6644
6652
|
* @param {bigint[][]} input_ids The input IDs.
|
|
6645
6653
|
* @param {Tensor} logits The logits.
|
|
6646
|
-
* @returns {
|
|
6654
|
+
* @returns {Tensor} The logits with BOS token forcing.
|
|
6647
6655
|
*/
|
|
6648
6656
|
_call(input_ids, logits) {
|
|
6649
6657
|
for (let i = 0; i < input_ids.length; ++i) {
|
|
@@ -6713,7 +6721,7 @@ class SuppressTokensAtBeginLogitsProcessor extends LogitsProcessor {
|
|
|
6713
6721
|
* Apply the BOS token forcing to the logits.
|
|
6714
6722
|
* @param {bigint[][]} input_ids The input IDs.
|
|
6715
6723
|
* @param {Tensor} logits The logits.
|
|
6716
|
-
* @returns {
|
|
6724
|
+
* @returns {Tensor} The logits with BOS token forcing.
|
|
6717
6725
|
*/
|
|
6718
6726
|
_call(input_ids, logits) {
|
|
6719
6727
|
for (let i = 0; i < input_ids.length; ++i) {
|
|
@@ -6883,7 +6891,7 @@ class NoRepeatNGramLogitsProcessor extends LogitsProcessor {
|
|
|
6883
6891
|
* Apply the no-repeat-ngram processor to the logits.
|
|
6884
6892
|
* @param {bigint[][]} input_ids The input IDs.
|
|
6885
6893
|
* @param {Tensor} logits The logits.
|
|
6886
|
-
* @returns {
|
|
6894
|
+
* @returns {Tensor} The logits with no-repeat-ngram processing.
|
|
6887
6895
|
*/
|
|
6888
6896
|
_call(input_ids, logits) {
|
|
6889
6897
|
for (let i = 0; i < input_ids.length; ++i) {
|
|
@@ -6898,12 +6906,22 @@ class NoRepeatNGramLogitsProcessor extends LogitsProcessor {
|
|
|
6898
6906
|
}
|
|
6899
6907
|
|
|
6900
6908
|
/**
|
|
6901
|
-
* A logits processor that
|
|
6909
|
+
* A logits processor that prevents the repetition of previous tokens through a penalty.
|
|
6910
|
+
* This penalty is applied at most once per token. Note that, for decoder-only models like most LLMs,
|
|
6911
|
+
* the considered tokens include the prompt.
|
|
6912
|
+
*
|
|
6913
|
+
* In the original [paper](https://arxiv.org/pdf/1909.05858.pdf), the authors suggest the use of a
|
|
6914
|
+
* penalty of around 1.2 to achieve a good balance between truthful generation and lack of repetition.
|
|
6915
|
+
* To penalize and reduce repetition, use `penalty` values above 1.0, where a higher value penalizes
|
|
6916
|
+
* more strongly. To reward and encourage repetition, use `penalty` values between 0.0 and 1.0, where
|
|
6917
|
+
* a lower value rewards more strongly.
|
|
6902
6918
|
*/
|
|
6903
6919
|
class RepetitionPenaltyLogitsProcessor extends LogitsProcessor {
|
|
6904
6920
|
/**
|
|
6905
6921
|
* Create a RepetitionPenaltyLogitsProcessor.
|
|
6906
|
-
* @param {number} penalty The
|
|
6922
|
+
* @param {number} penalty The parameter for repetition penalty.
|
|
6923
|
+
* - 1.0 means no penalty. Above 1.0 penalizes previously generated tokens.
|
|
6924
|
+
* - Between 0.0 and 1.0 rewards previously generated tokens.
|
|
6907
6925
|
*/
|
|
6908
6926
|
constructor(penalty) {
|
|
6909
6927
|
super();
|
|
@@ -6914,16 +6932,12 @@ class RepetitionPenaltyLogitsProcessor extends LogitsProcessor {
|
|
|
6914
6932
|
* Apply the repetition penalty to the logits.
|
|
6915
6933
|
* @param {bigint[][]} input_ids The input IDs.
|
|
6916
6934
|
* @param {Tensor} logits The logits.
|
|
6917
|
-
* @returns {
|
|
6935
|
+
* @returns {Tensor} The logits with repetition penalty processing.
|
|
6918
6936
|
*/
|
|
6919
6937
|
_call(input_ids, logits) {
|
|
6920
|
-
// Modify the logits corresponding to each element in `input_ids`.
|
|
6921
|
-
// As a consequence, the logits corresponding to tokens that appear
|
|
6922
|
-
// many times in the output will be penalised more.
|
|
6923
|
-
|
|
6924
6938
|
for (let i = 0; i < input_ids.length; ++i) {
|
|
6925
6939
|
const batch_logits_data = /** @type {Float32Array} */(logits[i].data);
|
|
6926
|
-
for (const input_id of input_ids[i]) {
|
|
6940
|
+
for (const input_id of new Set(input_ids[i])) {
|
|
6927
6941
|
const token = Number(input_id);
|
|
6928
6942
|
if (batch_logits_data[token] < 0) {
|
|
6929
6943
|
batch_logits_data[token] *= this.penalty;
|
|
@@ -6956,7 +6970,7 @@ class MinLengthLogitsProcessor extends LogitsProcessor {
|
|
|
6956
6970
|
* Apply logit processor.
|
|
6957
6971
|
* @param {bigint[][]} input_ids The input IDs.
|
|
6958
6972
|
* @param {Tensor} logits The logits.
|
|
6959
|
-
* @returns {
|
|
6973
|
+
* @returns {Tensor} The processed logits.
|
|
6960
6974
|
*/
|
|
6961
6975
|
_call(input_ids, logits) {
|
|
6962
6976
|
for (let i = 0; i < input_ids.length; ++i) {
|
|
@@ -6994,7 +7008,7 @@ class MinNewTokensLengthLogitsProcessor extends LogitsProcessor {
|
|
|
6994
7008
|
* Apply logit processor.
|
|
6995
7009
|
* @param {bigint[][]} input_ids The input IDs.
|
|
6996
7010
|
* @param {Tensor} logits The logits.
|
|
6997
|
-
* @returns {
|
|
7011
|
+
* @returns {Tensor} The processed logits.
|
|
6998
7012
|
*/
|
|
6999
7013
|
_call(input_ids, logits) {
|
|
7000
7014
|
for (let i = 0; i < input_ids.length; ++i) {
|
|
@@ -7027,7 +7041,7 @@ class NoBadWordsLogitsProcessor extends LogitsProcessor {
|
|
|
7027
7041
|
* Apply logit processor.
|
|
7028
7042
|
* @param {bigint[][]} input_ids The input IDs.
|
|
7029
7043
|
* @param {Tensor} logits The logits.
|
|
7030
|
-
* @returns {
|
|
7044
|
+
* @returns {Tensor} The processed logits.
|
|
7031
7045
|
*/
|
|
7032
7046
|
_call(input_ids, logits) {
|
|
7033
7047
|
for (let i = 0; i < input_ids.length; ++i) {
|
|
@@ -7088,7 +7102,7 @@ class ClassifierFreeGuidanceLogitsProcessor extends LogitsProcessor {
|
|
|
7088
7102
|
* Apply logit processor.
|
|
7089
7103
|
* @param {bigint[][]} input_ids The input IDs.
|
|
7090
7104
|
* @param {Tensor} logits The logits.
|
|
7091
|
-
* @returns {
|
|
7105
|
+
* @returns {Tensor} The processed logits.
|
|
7092
7106
|
*/
|
|
7093
7107
|
_call(input_ids, logits) {
|
|
7094
7108
|
if (logits.dims[0] !== 2 * input_ids.length) {
|
|
@@ -7142,7 +7156,7 @@ class TemperatureLogitsWarper extends LogitsWarper {
|
|
|
7142
7156
|
* Apply logit warper.
|
|
7143
7157
|
* @param {bigint[][]} input_ids The input IDs.
|
|
7144
7158
|
* @param {Tensor} logits The logits.
|
|
7145
|
-
* @returns {
|
|
7159
|
+
* @returns {Tensor} The processed logits.
|
|
7146
7160
|
*/
|
|
7147
7161
|
_call(input_ids, logits) {
|
|
7148
7162
|
const batch_logits_data = /** @type {Float32Array} */(logits.data);
|
|
@@ -7660,7 +7674,12 @@ const stdout_write = _env_js__WEBPACK_IMPORTED_MODULE_2__.apis.IS_PROCESS_AVAILA
|
|
|
7660
7674
|
class TextStreamer extends BaseStreamer {
|
|
7661
7675
|
/**
|
|
7662
7676
|
*
|
|
7663
|
-
* @param {import('../tokenizers.js').PreTrainedTokenizer} tokenizer
|
|
7677
|
+
* @param {import('../tokenizers.js').PreTrainedTokenizer} tokenizer
|
|
7678
|
+
* @param {Object} options
|
|
7679
|
+
* @param {boolean} [options.skip_prompt=false] Whether to skip the prompt tokens
|
|
7680
|
+
* @param {function(string): void} [options.callback_function=null] Function to call when a piece of text is ready to display
|
|
7681
|
+
* @param {function(bigint[]): void} [options.token_callback_function=null] Function to call when a new token is generated
|
|
7682
|
+
* @param {Object} [options.decode_kwargs={}] Additional keyword arguments to pass to the tokenizer's decode method
|
|
7664
7683
|
*/
|
|
7665
7684
|
constructor(tokenizer, {
|
|
7666
7685
|
skip_prompt = false,
|
|
@@ -7769,7 +7788,7 @@ class WhisperTextStreamer extends TextStreamer {
|
|
|
7769
7788
|
* @param {Object} options
|
|
7770
7789
|
* @param {boolean} [options.skip_prompt=false] Whether to skip the prompt tokens
|
|
7771
7790
|
* @param {function(string): void} [options.callback_function=null] Function to call when a piece of text is ready to display
|
|
7772
|
-
* @param {function(
|
|
7791
|
+
* @param {function(bigint[]): void} [options.token_callback_function=null] Function to call when a new token is generated
|
|
7773
7792
|
* @param {function(number): void} [options.on_chunk_start=null] Function to call when a new chunk starts
|
|
7774
7793
|
* @param {function(number): void} [options.on_chunk_end=null] Function to call when a chunk ends
|
|
7775
7794
|
* @param {function(): void} [options.on_finalize=null] Function to call when the stream is finalized
|
|
@@ -8049,6 +8068,8 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
8049
8068
|
/* harmony export */ HubertForSequenceClassification: () => (/* binding */ HubertForSequenceClassification),
|
|
8050
8069
|
/* harmony export */ HubertModel: () => (/* binding */ HubertModel),
|
|
8051
8070
|
/* harmony export */ HubertPreTrainedModel: () => (/* binding */ HubertPreTrainedModel),
|
|
8071
|
+
/* harmony export */ Idefics3ForConditionalGeneration: () => (/* binding */ Idefics3ForConditionalGeneration),
|
|
8072
|
+
/* harmony export */ Idefics3PreTrainedModel: () => (/* binding */ Idefics3PreTrainedModel),
|
|
8052
8073
|
/* harmony export */ ImageMattingOutput: () => (/* binding */ ImageMattingOutput),
|
|
8053
8074
|
/* harmony export */ JAISLMHeadModel: () => (/* binding */ JAISLMHeadModel),
|
|
8054
8075
|
/* harmony export */ JAISModel: () => (/* binding */ JAISModel),
|
|
@@ -8455,6 +8476,22 @@ async function getSession(pretrained_model_name_or_path, fileName, options) {
|
|
|
8455
8476
|
}
|
|
8456
8477
|
}
|
|
8457
8478
|
|
|
8479
|
+
if (dtype === _utils_dtypes_js__WEBPACK_IMPORTED_MODULE_2__.DATA_TYPES.auto) {
|
|
8480
|
+
// Try to choose the auto dtype based on the custom config
|
|
8481
|
+
let config_dtype = custom_config.dtype;
|
|
8482
|
+
if (typeof config_dtype !== 'string') {
|
|
8483
|
+
config_dtype = config_dtype[fileName];
|
|
8484
|
+
}
|
|
8485
|
+
|
|
8486
|
+
if (config_dtype && config_dtype !== _utils_dtypes_js__WEBPACK_IMPORTED_MODULE_2__.DATA_TYPES.auto && _utils_dtypes_js__WEBPACK_IMPORTED_MODULE_2__.DATA_TYPES.hasOwnProperty(config_dtype)) {
|
|
8487
|
+
// Defined by the custom config, and is not "auto"
|
|
8488
|
+
dtype = config_dtype;
|
|
8489
|
+
} else {
|
|
8490
|
+
// Choose default dtype based on device, falling back to fp32
|
|
8491
|
+
dtype = _utils_dtypes_js__WEBPACK_IMPORTED_MODULE_2__.DEFAULT_DEVICE_DTYPE_MAPPING[selectedDevice] ?? _utils_dtypes_js__WEBPACK_IMPORTED_MODULE_2__.DATA_TYPES.fp32;
|
|
8492
|
+
}
|
|
8493
|
+
}
|
|
8494
|
+
|
|
8458
8495
|
const selectedDtype = /** @type {import("./utils/dtypes.js").DataType} */(dtype);
|
|
8459
8496
|
|
|
8460
8497
|
if (!_utils_dtypes_js__WEBPACK_IMPORTED_MODULE_2__.DEFAULT_DTYPE_SUFFIX_MAPPING.hasOwnProperty(selectedDtype)) {
|
|
@@ -8660,9 +8697,17 @@ async function sessionRun(session, inputs) {
|
|
|
8660
8697
|
output = replaceTensors(output);
|
|
8661
8698
|
return output;
|
|
8662
8699
|
} catch (e) {
|
|
8700
|
+
// Error messages can be long (nested) and uninformative. For this reason,
|
|
8701
|
+
// we apply minor formatting to show the most important information
|
|
8702
|
+
const formatted = Object.fromEntries(Object.entries(checkedInputs)
|
|
8703
|
+
.map(([k, { type, dims, data }]) => [k, {
|
|
8704
|
+
// Extract these properties from the underlying ORT tensor
|
|
8705
|
+
type, dims, data,
|
|
8706
|
+
}]));
|
|
8707
|
+
|
|
8663
8708
|
// This usually occurs when the inputs are of the wrong type.
|
|
8664
8709
|
console.error(`An error occurred during model execution: "${e}".`);
|
|
8665
|
-
console.error('Inputs given to model:',
|
|
8710
|
+
console.error('Inputs given to model:', formatted);
|
|
8666
8711
|
throw e;
|
|
8667
8712
|
}
|
|
8668
8713
|
}
|
|
@@ -8819,6 +8864,39 @@ async function decoderForward(self, model_inputs, is_encoder_decoder = false) {
|
|
|
8819
8864
|
}
|
|
8820
8865
|
|
|
8821
8866
|
|
|
8867
|
+
|
|
8868
|
+
function default_merge_input_ids_with_image_features({
|
|
8869
|
+
image_token_id,
|
|
8870
|
+
inputs_embeds,
|
|
8871
|
+
image_features,
|
|
8872
|
+
input_ids,
|
|
8873
|
+
attention_mask,
|
|
8874
|
+
}) {
|
|
8875
|
+
const image_tokens = input_ids.tolist().map(ids =>
|
|
8876
|
+
ids.reduce((acc, x, idx) => {
|
|
8877
|
+
if (x == image_token_id) acc.push(idx);
|
|
8878
|
+
return acc;
|
|
8879
|
+
}, [])
|
|
8880
|
+
);
|
|
8881
|
+
const n_image_tokens = image_tokens.reduce((acc, x) => acc + x.length, 0);
|
|
8882
|
+
const n_image_features = image_features.dims[0];
|
|
8883
|
+
if (n_image_tokens !== n_image_features) {
|
|
8884
|
+
throw new Error(`Image features and image tokens do not match: tokens: ${n_image_tokens}, features ${n_image_features}`);
|
|
8885
|
+
}
|
|
8886
|
+
|
|
8887
|
+
// Equivalent to performing a masked_scatter
|
|
8888
|
+
let img = 0;
|
|
8889
|
+
for (let i = 0; i < image_tokens.length; ++i) {
|
|
8890
|
+
const tokens = image_tokens[i];
|
|
8891
|
+
const embeds = inputs_embeds[i];
|
|
8892
|
+
for (let j = 0; j < tokens.length; ++j) {
|
|
8893
|
+
embeds[tokens[j]].data.set(image_features[img++].data)
|
|
8894
|
+
}
|
|
8895
|
+
}
|
|
8896
|
+
return { inputs_embeds, attention_mask }
|
|
8897
|
+
}
|
|
8898
|
+
|
|
8899
|
+
|
|
8822
8900
|
/**
|
|
8823
8901
|
* Forward pass of an image-text-to-text model.
|
|
8824
8902
|
* @param {Object} self The image-text-to-text model model.
|
|
@@ -9286,7 +9364,10 @@ class PreTrainedModel extends _utils_generic_js__WEBPACK_IMPORTED_MODULE_3__.Cal
|
|
|
9286
9364
|
|
|
9287
9365
|
} else { // should be MODEL_TYPES.EncoderOnly
|
|
9288
9366
|
if (modelType !== MODEL_TYPES.EncoderOnly) {
|
|
9289
|
-
|
|
9367
|
+
const type = modelName ?? config?.model_type;
|
|
9368
|
+
if (type !== 'custom') {
|
|
9369
|
+
console.warn(`Model type for '${type}' not found, assuming encoder-only architecture. Please report this at ${_utils_constants_js__WEBPACK_IMPORTED_MODULE_6__.GITHUB_ISSUE_URL}.`)
|
|
9370
|
+
}
|
|
9290
9371
|
}
|
|
9291
9372
|
info = await Promise.all([
|
|
9292
9373
|
constructSessions(pretrained_model_name_or_path, {
|
|
@@ -10030,7 +10111,7 @@ class PreTrainedModel extends _utils_generic_js__WEBPACK_IMPORTED_MODULE_3__.Cal
|
|
|
10030
10111
|
const dtype = session?.config?.kv_cache_dtype ?? 'float32';
|
|
10031
10112
|
const empty = (dtype === 'float16') ? new Uint16Array() : [];
|
|
10032
10113
|
|
|
10033
|
-
const batch_size = (decoderFeeds[this.main_input_name] ?? decoderFeeds.attention_mask)
|
|
10114
|
+
const batch_size = (decoderFeeds[this.main_input_name] ?? decoderFeeds.attention_mask)?.dims?.[0] ?? 1;
|
|
10034
10115
|
const shapes = (0,_configs_js__WEBPACK_IMPORTED_MODULE_0__.getKeyValueShapes)(this.config, { batch_size });
|
|
10035
10116
|
|
|
10036
10117
|
for (const name in shapes) {
|
|
@@ -11577,8 +11658,8 @@ class VisionEncoderDecoderModel extends PreTrainedModel {
|
|
|
11577
11658
|
class LlavaPreTrainedModel extends PreTrainedModel {
|
|
11578
11659
|
forward_params = [
|
|
11579
11660
|
'input_ids',
|
|
11580
|
-
'pixel_values',
|
|
11581
11661
|
'attention_mask',
|
|
11662
|
+
'pixel_values',
|
|
11582
11663
|
'position_ids',
|
|
11583
11664
|
'past_key_values',
|
|
11584
11665
|
];
|
|
@@ -11760,6 +11841,46 @@ class Florence2ForConditionalGeneration extends Florence2PreTrainedModel {
|
|
|
11760
11841
|
return decoder_outputs;
|
|
11761
11842
|
}
|
|
11762
11843
|
}
|
|
11844
|
+
|
|
11845
|
+
|
|
11846
|
+
//////////////////////////////////////////////////
|
|
11847
|
+
// Idefics3 Models
|
|
11848
|
+
class Idefics3PreTrainedModel extends PreTrainedModel {
|
|
11849
|
+
forward_params = [
|
|
11850
|
+
'input_ids',
|
|
11851
|
+
'attention_mask',
|
|
11852
|
+
'pixel_values',
|
|
11853
|
+
'pixel_attention_mask',
|
|
11854
|
+
'position_ids',
|
|
11855
|
+
'past_key_values',
|
|
11856
|
+
];
|
|
11857
|
+
}
|
|
11858
|
+
|
|
11859
|
+
/**
|
|
11860
|
+
* The LLAVA model which consists of a vision backbone and a language model.
|
|
11861
|
+
*/
|
|
11862
|
+
class Idefics3ForConditionalGeneration extends Idefics3PreTrainedModel {
|
|
11863
|
+
|
|
11864
|
+
async encode_image({ pixel_values, pixel_attention_mask }) {
|
|
11865
|
+
const features = (await sessionRun(this.sessions['vision_encoder'], { pixel_values, pixel_attention_mask })).image_features;
|
|
11866
|
+
return features;
|
|
11867
|
+
}
|
|
11868
|
+
|
|
11869
|
+
_merge_input_ids_with_image_features(kwargs) {
|
|
11870
|
+
const vision_hidden_size = kwargs.image_features.dims.at(-1);
|
|
11871
|
+
const reshaped_image_hidden_states = kwargs.image_features.view(-1, vision_hidden_size);
|
|
11872
|
+
|
|
11873
|
+
return default_merge_input_ids_with_image_features({
|
|
11874
|
+
// @ts-ignore
|
|
11875
|
+
image_token_id: this.config.image_token_id,
|
|
11876
|
+
...kwargs,
|
|
11877
|
+
image_features: reshaped_image_hidden_states,
|
|
11878
|
+
})
|
|
11879
|
+
}
|
|
11880
|
+
}
|
|
11881
|
+
//////////////////////////////////////////////////
|
|
11882
|
+
|
|
11883
|
+
//////////////////////////////////////////////////
|
|
11763
11884
|
class CLIPPreTrainedModel extends PreTrainedModel { }
|
|
11764
11885
|
|
|
11765
11886
|
/**
|
|
@@ -12553,36 +12674,12 @@ class Qwen2VLForConditionalGeneration extends Qwen2VLPreTrainedModel {
|
|
|
12553
12674
|
return features;
|
|
12554
12675
|
}
|
|
12555
12676
|
|
|
12556
|
-
_merge_input_ids_with_image_features({
|
|
12557
|
-
|
|
12558
|
-
|
|
12559
|
-
|
|
12560
|
-
|
|
12561
|
-
|
|
12562
|
-
// @ts-ignore
|
|
12563
|
-
const { image_token_id } = this.config;
|
|
12564
|
-
const image_tokens = input_ids.tolist().map(ids =>
|
|
12565
|
-
ids.reduce((acc, x, idx) => {
|
|
12566
|
-
if (x == image_token_id) acc.push(idx);
|
|
12567
|
-
return acc;
|
|
12568
|
-
}, [])
|
|
12569
|
-
);
|
|
12570
|
-
const n_image_tokens = image_tokens.reduce((acc, x) => acc + x.length, 0);
|
|
12571
|
-
const n_image_features = image_features.dims[0];
|
|
12572
|
-
if (n_image_tokens !== n_image_features) {
|
|
12573
|
-
throw new Error(`Image features and image tokens do not match: tokens: ${n_image_tokens}, features ${n_image_features}`);
|
|
12574
|
-
}
|
|
12575
|
-
|
|
12576
|
-
// Equivalent to performing a masked_scatter
|
|
12577
|
-
let img = 0;
|
|
12578
|
-
for (let i = 0; i < image_tokens.length; ++i) {
|
|
12579
|
-
const tokens = image_tokens[i];
|
|
12580
|
-
const embeds = inputs_embeds[i];
|
|
12581
|
-
for (let j = 0; j < tokens.length; ++j) {
|
|
12582
|
-
embeds[tokens[j]].data.set(image_features[img++].data)
|
|
12583
|
-
}
|
|
12584
|
-
}
|
|
12585
|
-
return { inputs_embeds, attention_mask }
|
|
12677
|
+
_merge_input_ids_with_image_features(kwargs) {
|
|
12678
|
+
return default_merge_input_ids_with_image_features({
|
|
12679
|
+
// @ts-ignore
|
|
12680
|
+
image_token_id: this.config.image_token_id,
|
|
12681
|
+
...kwargs
|
|
12682
|
+
})
|
|
12586
12683
|
}
|
|
12587
12684
|
|
|
12588
12685
|
prepare_inputs_for_generation(input_ids, model_inputs, generation_config) {
|
|
@@ -15187,6 +15284,7 @@ const MODEL_FOR_QUESTION_ANSWERING_MAPPING_NAMES = new Map([
|
|
|
15187
15284
|
|
|
15188
15285
|
const MODEL_FOR_VISION_2_SEQ_MAPPING_NAMES = new Map([
|
|
15189
15286
|
['vision-encoder-decoder', ['VisionEncoderDecoderModel', VisionEncoderDecoderModel]],
|
|
15287
|
+
['idefics3', ['Idefics3ForConditionalGeneration', Idefics3ForConditionalGeneration]],
|
|
15190
15288
|
]);
|
|
15191
15289
|
|
|
15192
15290
|
const MODEL_FOR_IMAGE_TEXT_TO_TEXT_MAPPING_NAMES = new Map([
|
|
@@ -15195,6 +15293,7 @@ const MODEL_FOR_IMAGE_TEXT_TO_TEXT_MAPPING_NAMES = new Map([
|
|
|
15195
15293
|
['moondream1', ['Moondream1ForConditionalGeneration', Moondream1ForConditionalGeneration]],
|
|
15196
15294
|
['florence2', ['Florence2ForConditionalGeneration', Florence2ForConditionalGeneration]],
|
|
15197
15295
|
['qwen2-vl', ['Qwen2VLForConditionalGeneration', Qwen2VLForConditionalGeneration]],
|
|
15296
|
+
['idefics3', ['Idefics3ForConditionalGeneration', Idefics3ForConditionalGeneration]],
|
|
15198
15297
|
]);
|
|
15199
15298
|
|
|
15200
15299
|
const MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING_NAMES = new Map([
|
|
@@ -16824,6 +16923,396 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
16824
16923
|
class GLPNFeatureExtractor extends _base_image_processors_utils_js__WEBPACK_IMPORTED_MODULE_0__.ImageProcessor { }
|
|
16825
16924
|
|
|
16826
16925
|
|
|
16926
|
+
/***/ }),
|
|
16927
|
+
|
|
16928
|
+
/***/ "./src/models/idefics3/image_processing_idefics3.js":
|
|
16929
|
+
/*!**********************************************************!*\
|
|
16930
|
+
!*** ./src/models/idefics3/image_processing_idefics3.js ***!
|
|
16931
|
+
\**********************************************************/
|
|
16932
|
+
/***/ ((__unused_webpack___webpack_module__, __webpack_exports__, __webpack_require__) => {
|
|
16933
|
+
|
|
16934
|
+
"use strict";
|
|
16935
|
+
__webpack_require__.r(__webpack_exports__);
|
|
16936
|
+
/* harmony export */ __webpack_require__.d(__webpack_exports__, {
|
|
16937
|
+
/* harmony export */ Idefics3ImageProcessor: () => (/* binding */ Idefics3ImageProcessor)
|
|
16938
|
+
/* harmony export */ });
|
|
16939
|
+
/* harmony import */ var _base_image_processors_utils_js__WEBPACK_IMPORTED_MODULE_0__ = __webpack_require__(/*! ../../base/image_processors_utils.js */ "./src/base/image_processors_utils.js");
|
|
16940
|
+
/* harmony import */ var _utils_tensor_js__WEBPACK_IMPORTED_MODULE_1__ = __webpack_require__(/*! ../../utils/tensor.js */ "./src/utils/tensor.js");
|
|
16941
|
+
|
|
16942
|
+
|
|
16943
|
+
|
|
16944
|
+
|
|
16945
|
+
|
|
16946
|
+
class Idefics3ImageProcessor extends _base_image_processors_utils_js__WEBPACK_IMPORTED_MODULE_0__.ImageProcessor {
|
|
16947
|
+
constructor(config) {
|
|
16948
|
+
super(config);
|
|
16949
|
+
|
|
16950
|
+
this.do_image_splitting = config.do_image_splitting ?? true;
|
|
16951
|
+
this.max_image_size = config.max_image_size;
|
|
16952
|
+
}
|
|
16953
|
+
|
|
16954
|
+
/**
|
|
16955
|
+
* @typedef {import('../../utils/image.js').RawImage} RawImage
|
|
16956
|
+
* @typedef {import('../../utils/tensor.js').Tensor} Tensor
|
|
16957
|
+
*/
|
|
16958
|
+
|
|
16959
|
+
/**
|
|
16960
|
+
* Calculate size to resize images to, to be multiples of `vision_encoder_max_size` while preserving the aspect ratio.
|
|
16961
|
+
* @param {Tensor} pixel_values Tensor of the image to resize.
|
|
16962
|
+
* @param {number} vision_encoder_max_size Maximum size of the output image. If the image is larger than this size,
|
|
16963
|
+
* it will be split into patches of this size, and the original image will be concatenated with the patches, resized to max_size.
|
|
16964
|
+
*/
|
|
16965
|
+
get_resize_for_vision_encoder(pixel_values, vision_encoder_max_size) {
|
|
16966
|
+
let [height, width] = pixel_values.dims.slice(-2);
|
|
16967
|
+
|
|
16968
|
+
const aspect_ratio = width / height;
|
|
16969
|
+
if (width >= height) {
|
|
16970
|
+
width = Math.ceil(width / vision_encoder_max_size) * vision_encoder_max_size;
|
|
16971
|
+
height = Math.floor(width / aspect_ratio);
|
|
16972
|
+
height = Math.ceil(height / vision_encoder_max_size) * vision_encoder_max_size;
|
|
16973
|
+
} else {
|
|
16974
|
+
height = Math.ceil(height / vision_encoder_max_size) * vision_encoder_max_size;
|
|
16975
|
+
width = Math.floor(height * aspect_ratio);
|
|
16976
|
+
width = Math.ceil(width / vision_encoder_max_size) * vision_encoder_max_size;
|
|
16977
|
+
}
|
|
16978
|
+
return { height, width };
|
|
16979
|
+
}
|
|
16980
|
+
|
|
16981
|
+
/** @param {RawImage|RawImage[]|RawImage[][]} images */
|
|
16982
|
+
async _call(images, {
|
|
16983
|
+
do_image_splitting = null,
|
|
16984
|
+
return_row_col_info = false,
|
|
16985
|
+
} = {}) {
|
|
16986
|
+
|
|
16987
|
+
/** @type {RawImage[][]} */
|
|
16988
|
+
let batched_2d_images;
|
|
16989
|
+
if (!Array.isArray(images)) {
|
|
16990
|
+
batched_2d_images = [[images]];
|
|
16991
|
+
} else {
|
|
16992
|
+
if (images.length === 0 || !images[0]) {
|
|
16993
|
+
throw new Error("No images provided.");
|
|
16994
|
+
}
|
|
16995
|
+
if (!Array.isArray(images[0])) {
|
|
16996
|
+
batched_2d_images = [/** @type {RawImage[]} */(images)];
|
|
16997
|
+
} else {
|
|
16998
|
+
batched_2d_images = /** @type {RawImage[][]} */(images);
|
|
16999
|
+
}
|
|
17000
|
+
}
|
|
17001
|
+
|
|
17002
|
+
// List of tensors, each with shape [patches, channels, height, width]
|
|
17003
|
+
let all_pixel_values = [];
|
|
17004
|
+
let images_list_rows = [];
|
|
17005
|
+
let images_list_cols = [];
|
|
17006
|
+
|
|
17007
|
+
const original_sizes = [];
|
|
17008
|
+
const reshaped_input_sizes = [];
|
|
17009
|
+
for (const image_batch of batched_2d_images) {
|
|
17010
|
+
|
|
17011
|
+
let images_list = await Promise.all(image_batch.map(x => this.preprocess(x)));
|
|
17012
|
+
|
|
17013
|
+
// Original sizes of images
|
|
17014
|
+
original_sizes.push(...images_list.map(x => x.original_size));
|
|
17015
|
+
|
|
17016
|
+
// Reshaped sizes of images, before padding or cropping
|
|
17017
|
+
reshaped_input_sizes.push(...images_list.map(x => x.reshaped_input_size));
|
|
17018
|
+
|
|
17019
|
+
// Convert images to 4D tensors for easier processing
|
|
17020
|
+
images_list.forEach(x => x.pixel_values.unsqueeze_(0));
|
|
17021
|
+
|
|
17022
|
+
const { longest_edge } = this.max_image_size;
|
|
17023
|
+
|
|
17024
|
+
/** @type {Tensor[]} */
|
|
17025
|
+
let images_tensor;
|
|
17026
|
+
if (do_image_splitting ?? this.do_image_splitting) {
|
|
17027
|
+
let image_rows = new Array(images_list.length);
|
|
17028
|
+
let image_cols = new Array(images_list.length);
|
|
17029
|
+
|
|
17030
|
+
// We first resize both height and width of each image to the nearest max_image_size multiple, disregarding the aspect ratio
|
|
17031
|
+
images_tensor = await Promise.all(images_list.map(async (x, i) => {
|
|
17032
|
+
const new_size = this.get_resize_for_vision_encoder(x.pixel_values, longest_edge);
|
|
17033
|
+
|
|
17034
|
+
const resized = await (0,_utils_tensor_js__WEBPACK_IMPORTED_MODULE_1__.interpolate_4d)(x.pixel_values, {
|
|
17035
|
+
size: [new_size.height, new_size.width],
|
|
17036
|
+
});
|
|
17037
|
+
|
|
17038
|
+
const { frames, num_splits_h, num_splits_w } = await this.split_image(resized, this.max_image_size);
|
|
17039
|
+
image_rows[i] = num_splits_h;
|
|
17040
|
+
image_cols[i] = num_splits_w;
|
|
17041
|
+
return (0,_utils_tensor_js__WEBPACK_IMPORTED_MODULE_1__.cat)(frames, 0);
|
|
17042
|
+
}));
|
|
17043
|
+
|
|
17044
|
+
images_list_rows.push(image_rows);
|
|
17045
|
+
images_list_cols.push(image_cols);
|
|
17046
|
+
|
|
17047
|
+
} else {
|
|
17048
|
+
/** @type {[number, number]} */
|
|
17049
|
+
const size = [longest_edge, longest_edge];
|
|
17050
|
+
images_tensor = await Promise.all(
|
|
17051
|
+
images_list.map(x => (0,_utils_tensor_js__WEBPACK_IMPORTED_MODULE_1__.interpolate_4d)(x.pixel_values, { size }))
|
|
17052
|
+
);
|
|
17053
|
+
|
|
17054
|
+
images_list_rows.push(new Array(images_list.length).fill(0));
|
|
17055
|
+
images_list_cols.push(new Array(images_list.length).fill(0));
|
|
17056
|
+
}
|
|
17057
|
+
|
|
17058
|
+
all_pixel_values.push((0,_utils_tensor_js__WEBPACK_IMPORTED_MODULE_1__.cat)(images_tensor, 0));
|
|
17059
|
+
}
|
|
17060
|
+
|
|
17061
|
+
const batch_size = all_pixel_values.length;
|
|
17062
|
+
const [n, c, h, w] = all_pixel_values[0].dims;
|
|
17063
|
+
|
|
17064
|
+
// Stack pixel values
|
|
17065
|
+
let pixel_values;
|
|
17066
|
+
let pixel_attention_mask;
|
|
17067
|
+
if (batch_size === 1) {
|
|
17068
|
+
pixel_values = all_pixel_values[0].unsqueeze_(0);
|
|
17069
|
+
pixel_attention_mask = (0,_utils_tensor_js__WEBPACK_IMPORTED_MODULE_1__.full)([batch_size, n, h, w], true);
|
|
17070
|
+
} else {
|
|
17071
|
+
// Add padding (if necessary) to images with less patches than the maximum number of patches
|
|
17072
|
+
const max_num_patches = Math.max(...all_pixel_values.map(x => x.dims.at(0)));
|
|
17073
|
+
|
|
17074
|
+
pixel_attention_mask = (0,_utils_tensor_js__WEBPACK_IMPORTED_MODULE_1__.full)([batch_size, max_num_patches, h, w], true);
|
|
17075
|
+
const pixel_attention_mask_data = pixel_attention_mask.data;
|
|
17076
|
+
const pixel_attention_mask_stride = max_num_patches * h * w;
|
|
17077
|
+
for (let i = 0; i < batch_size; ++i) {
|
|
17078
|
+
const num_patches = all_pixel_values[i].dims[0];
|
|
17079
|
+
if (num_patches < max_num_patches) {
|
|
17080
|
+
all_pixel_values[i] = (0,_utils_tensor_js__WEBPACK_IMPORTED_MODULE_1__.cat)([
|
|
17081
|
+
all_pixel_values[i],
|
|
17082
|
+
(0,_utils_tensor_js__WEBPACK_IMPORTED_MODULE_1__.full)([max_num_patches - num_patches, c, h, w], 0),
|
|
17083
|
+
], 0);
|
|
17084
|
+
|
|
17085
|
+
const start_offset = i * pixel_attention_mask_stride + num_patches * h * w;
|
|
17086
|
+
const end_offset = (i + 1) * pixel_attention_mask_stride;
|
|
17087
|
+
pixel_attention_mask_data.fill(false, start_offset, end_offset);
|
|
17088
|
+
}
|
|
17089
|
+
}
|
|
17090
|
+
pixel_values = (0,_utils_tensor_js__WEBPACK_IMPORTED_MODULE_1__.stack)(all_pixel_values, 0);
|
|
17091
|
+
}
|
|
17092
|
+
|
|
17093
|
+
return {
|
|
17094
|
+
pixel_values,
|
|
17095
|
+
pixel_attention_mask,
|
|
17096
|
+
|
|
17097
|
+
original_sizes,
|
|
17098
|
+
reshaped_input_sizes,
|
|
17099
|
+
...(
|
|
17100
|
+
return_row_col_info
|
|
17101
|
+
? { rows: images_list_rows, cols: images_list_cols }
|
|
17102
|
+
: {}
|
|
17103
|
+
),
|
|
17104
|
+
}
|
|
17105
|
+
}
|
|
17106
|
+
|
|
17107
|
+
async split_image(pixel_values, { longest_edge }) {
|
|
17108
|
+
const max_height = longest_edge;
|
|
17109
|
+
const max_width = longest_edge;
|
|
17110
|
+
|
|
17111
|
+
const frames = [];
|
|
17112
|
+
|
|
17113
|
+
const [height, width] = pixel_values.dims.slice(-2);
|
|
17114
|
+
|
|
17115
|
+
let num_splits_h = 0, num_splits_w = 0;
|
|
17116
|
+
|
|
17117
|
+
if (height > max_height || width > max_width) {
|
|
17118
|
+
// Calculate the number of splits
|
|
17119
|
+
num_splits_h = Math.ceil(height / max_height);
|
|
17120
|
+
num_splits_w = Math.ceil(width / max_width);
|
|
17121
|
+
|
|
17122
|
+
// Calculate the optimal width and height for the sub-images
|
|
17123
|
+
const optimal_height = Math.ceil(height / num_splits_h);
|
|
17124
|
+
const optimal_width = Math.ceil(width / num_splits_w);
|
|
17125
|
+
|
|
17126
|
+
// Iterate through each row and column
|
|
17127
|
+
for (let r = 0; r < num_splits_h; r++) {
|
|
17128
|
+
for (let c = 0; c < num_splits_w; c++) {
|
|
17129
|
+
// Calculate the starting point of the crop
|
|
17130
|
+
const start_x = c * optimal_width;
|
|
17131
|
+
const start_y = r * optimal_height;
|
|
17132
|
+
|
|
17133
|
+
// Calculate the ending point of the crop
|
|
17134
|
+
const end_x = Math.min(start_x + optimal_width, width);
|
|
17135
|
+
const end_y = Math.min(start_y + optimal_height, height);
|
|
17136
|
+
|
|
17137
|
+
// Crop the image
|
|
17138
|
+
frames.push(pixel_values.slice(null, null, [start_y, end_y], [start_x, end_x]));
|
|
17139
|
+
}
|
|
17140
|
+
}
|
|
17141
|
+
|
|
17142
|
+
// Resize the global image to match max dimensions for memory efficiency
|
|
17143
|
+
const global_image_height = max_height;
|
|
17144
|
+
const global_image_width = max_width;
|
|
17145
|
+
|
|
17146
|
+
if (height !== global_image_height || width !== global_image_width) {
|
|
17147
|
+
pixel_values = await (0,_utils_tensor_js__WEBPACK_IMPORTED_MODULE_1__.interpolate_4d)(pixel_values, {
|
|
17148
|
+
size: [global_image_height, global_image_width],
|
|
17149
|
+
})
|
|
17150
|
+
}
|
|
17151
|
+
}
|
|
17152
|
+
|
|
17153
|
+
frames.push(pixel_values);
|
|
17154
|
+
|
|
17155
|
+
return { frames, num_splits_h, num_splits_w };
|
|
17156
|
+
}
|
|
17157
|
+
}
|
|
17158
|
+
|
|
17159
|
+
|
|
17160
|
+
/***/ }),
|
|
17161
|
+
|
|
17162
|
+
/***/ "./src/models/idefics3/processing_idefics3.js":
|
|
17163
|
+
/*!****************************************************!*\
|
|
17164
|
+
!*** ./src/models/idefics3/processing_idefics3.js ***!
|
|
17165
|
+
\****************************************************/
|
|
17166
|
+
/***/ ((__unused_webpack___webpack_module__, __webpack_exports__, __webpack_require__) => {
|
|
17167
|
+
|
|
17168
|
+
"use strict";
|
|
17169
|
+
__webpack_require__.r(__webpack_exports__);
|
|
17170
|
+
/* harmony export */ __webpack_require__.d(__webpack_exports__, {
|
|
17171
|
+
/* harmony export */ Idefics3Processor: () => (/* binding */ Idefics3Processor)
|
|
17172
|
+
/* harmony export */ });
|
|
17173
|
+
/* harmony import */ var _base_processing_utils_js__WEBPACK_IMPORTED_MODULE_0__ = __webpack_require__(/*! ../../base/processing_utils.js */ "./src/base/processing_utils.js");
|
|
17174
|
+
/* harmony import */ var _auto_image_processing_auto_js__WEBPACK_IMPORTED_MODULE_1__ = __webpack_require__(/*! ../auto/image_processing_auto.js */ "./src/models/auto/image_processing_auto.js");
|
|
17175
|
+
/* harmony import */ var _tokenizers_js__WEBPACK_IMPORTED_MODULE_2__ = __webpack_require__(/*! ../../tokenizers.js */ "./src/tokenizers.js");
|
|
17176
|
+
/* harmony import */ var _utils_image_js__WEBPACK_IMPORTED_MODULE_3__ = __webpack_require__(/*! ../../utils/image.js */ "./src/utils/image.js");
|
|
17177
|
+
/* harmony import */ var _utils_core_js__WEBPACK_IMPORTED_MODULE_4__ = __webpack_require__(/*! ../../utils/core.js */ "./src/utils/core.js");
|
|
17178
|
+
|
|
17179
|
+
|
|
17180
|
+
|
|
17181
|
+
|
|
17182
|
+
|
|
17183
|
+
|
|
17184
|
+
|
|
17185
|
+
/**
|
|
17186
|
+
* Prompt with expanded image tokens for when the image is split into patches.
|
|
17187
|
+
* @private
|
|
17188
|
+
*/
|
|
17189
|
+
function _prompt_split_image(image_seq_len, image_rows, image_cols, fake_token_around_image, image_token, global_img_token) {
|
|
17190
|
+
let text_split_images = "";
|
|
17191
|
+
for (let n_h = 0; n_h < image_rows; ++n_h) {
|
|
17192
|
+
for (let n_w = 0; n_w < image_cols; ++n_w) {
|
|
17193
|
+
text_split_images += (
|
|
17194
|
+
fake_token_around_image +
|
|
17195
|
+
`<row_${n_h + 1}_col_${n_w + 1}>` +
|
|
17196
|
+
image_token.repeat(image_seq_len)
|
|
17197
|
+
);
|
|
17198
|
+
}
|
|
17199
|
+
text_split_images += "\n";
|
|
17200
|
+
}
|
|
17201
|
+
|
|
17202
|
+
text_split_images += (
|
|
17203
|
+
`\n${fake_token_around_image}` +
|
|
17204
|
+
`${global_img_token}` +
|
|
17205
|
+
image_token.repeat(image_seq_len) +
|
|
17206
|
+
`${fake_token_around_image}`
|
|
17207
|
+
);
|
|
17208
|
+
return text_split_images;
|
|
17209
|
+
}
|
|
17210
|
+
|
|
17211
|
+
/**
|
|
17212
|
+
* Prompt with expanded image tokens for a single image.
|
|
17213
|
+
* @private
|
|
17214
|
+
*/
|
|
17215
|
+
function _prompt_single_image(image_seq_len, fake_token_around_image, image_token, global_img_token) {
|
|
17216
|
+
return (
|
|
17217
|
+
`${fake_token_around_image}` +
|
|
17218
|
+
`${global_img_token}` +
|
|
17219
|
+
image_token.repeat(image_seq_len) +
|
|
17220
|
+
`${fake_token_around_image}`
|
|
17221
|
+
);
|
|
17222
|
+
}
|
|
17223
|
+
|
|
17224
|
+
function get_image_prompt_string(image_rows, image_cols, image_seq_len, fake_token_around_image, image_token, global_img_token) {
|
|
17225
|
+
if (image_rows === 0 && image_cols === 0) {
|
|
17226
|
+
return _prompt_single_image(
|
|
17227
|
+
image_seq_len,
|
|
17228
|
+
fake_token_around_image,
|
|
17229
|
+
image_token,
|
|
17230
|
+
global_img_token
|
|
17231
|
+
);
|
|
17232
|
+
}
|
|
17233
|
+
return _prompt_split_image(
|
|
17234
|
+
image_seq_len, image_rows, image_cols, fake_token_around_image, image_token, global_img_token
|
|
17235
|
+
);
|
|
17236
|
+
}
|
|
17237
|
+
|
|
17238
|
+
|
|
17239
|
+
class Idefics3Processor extends _base_processing_utils_js__WEBPACK_IMPORTED_MODULE_0__.Processor {
|
|
17240
|
+
static image_processor_class = _auto_image_processing_auto_js__WEBPACK_IMPORTED_MODULE_1__.AutoImageProcessor
|
|
17241
|
+
static tokenizer_class = _tokenizers_js__WEBPACK_IMPORTED_MODULE_2__.AutoTokenizer
|
|
17242
|
+
static uses_processor_config = true;
|
|
17243
|
+
|
|
17244
|
+
fake_image_token = "<fake_token_around_image>";
|
|
17245
|
+
image_token = "<image>";
|
|
17246
|
+
global_img_token = "<global-img>";
|
|
17247
|
+
|
|
17248
|
+
/**
|
|
17249
|
+
*
|
|
17250
|
+
* @param {string|string[]} text
|
|
17251
|
+
* @param {RawImage|RawImage[]|RawImage[][]} images
|
|
17252
|
+
* @returns {Promise<any>}
|
|
17253
|
+
*/
|
|
17254
|
+
async _call(text, images = null, options = {}) {
|
|
17255
|
+
options.return_row_col_info ??= true;
|
|
17256
|
+
|
|
17257
|
+
let image_inputs;
|
|
17258
|
+
|
|
17259
|
+
if (images) {
|
|
17260
|
+
image_inputs = await this.image_processor(images, options);
|
|
17261
|
+
}
|
|
17262
|
+
|
|
17263
|
+
// NOTE: We assume text is present
|
|
17264
|
+
if (!Array.isArray(text)) {
|
|
17265
|
+
text = [text];
|
|
17266
|
+
}
|
|
17267
|
+
|
|
17268
|
+
const image_rows = image_inputs.rows ?? [new Array(text.length).fill(0)];
|
|
17269
|
+
const image_cols = image_inputs.cols ?? [new Array(text.length).fill(0)];
|
|
17270
|
+
|
|
17271
|
+
const image_seq_len = this.config.image_seq_len;
|
|
17272
|
+
const n_images_in_text = []
|
|
17273
|
+
const prompt_strings = [];
|
|
17274
|
+
for (let i = 0; i < text.length; ++i) {
|
|
17275
|
+
const sample = text[i];
|
|
17276
|
+
const sample_rows = image_rows[i];
|
|
17277
|
+
const sample_cols = image_cols[i];
|
|
17278
|
+
|
|
17279
|
+
n_images_in_text.push((0,_utils_core_js__WEBPACK_IMPORTED_MODULE_4__.count)(sample, this.image_token));
|
|
17280
|
+
|
|
17281
|
+
// Replace the image token with fake tokens around the expanded image token sequence of length `image_seq_len`
|
|
17282
|
+
const image_prompt_strings = sample_rows.map(
|
|
17283
|
+
(n_rows, j) => get_image_prompt_string(
|
|
17284
|
+
n_rows,
|
|
17285
|
+
sample_cols[j],
|
|
17286
|
+
image_seq_len,
|
|
17287
|
+
this.fake_image_token,
|
|
17288
|
+
this.image_token,
|
|
17289
|
+
this.global_img_token,
|
|
17290
|
+
)
|
|
17291
|
+
);
|
|
17292
|
+
|
|
17293
|
+
const split_sample = sample.split(this.image_token);
|
|
17294
|
+
if (split_sample.length === 0) {
|
|
17295
|
+
throw new Error("The image token should be present in the text.");
|
|
17296
|
+
}
|
|
17297
|
+
|
|
17298
|
+
// Place in the image prompt strings where the image tokens are
|
|
17299
|
+
let new_sample = split_sample[0];
|
|
17300
|
+
for (let j = 0; j < image_prompt_strings.length; ++j) {
|
|
17301
|
+
new_sample += image_prompt_strings[j] + split_sample[j + 1];
|
|
17302
|
+
}
|
|
17303
|
+
prompt_strings.push(new_sample);
|
|
17304
|
+
}
|
|
17305
|
+
|
|
17306
|
+
const text_inputs = this.tokenizer(prompt_strings);
|
|
17307
|
+
|
|
17308
|
+
return {
|
|
17309
|
+
...text_inputs,
|
|
17310
|
+
...image_inputs,
|
|
17311
|
+
}
|
|
17312
|
+
}
|
|
17313
|
+
}
|
|
17314
|
+
|
|
17315
|
+
|
|
16827
17316
|
/***/ }),
|
|
16828
17317
|
|
|
16829
17318
|
/***/ "./src/models/image_processors.js":
|
|
@@ -16852,40 +17341,41 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
16852
17341
|
/* harmony export */ DonutImageProcessor: () => (/* reexport safe */ _donut_image_processing_donut_js__WEBPACK_IMPORTED_MODULE_7__.DonutImageProcessor),
|
|
16853
17342
|
/* harmony export */ EfficientNetImageProcessor: () => (/* reexport safe */ _efficientnet_image_processing_efficientnet_js__WEBPACK_IMPORTED_MODULE_9__.EfficientNetImageProcessor),
|
|
16854
17343
|
/* harmony export */ GLPNFeatureExtractor: () => (/* reexport safe */ _glpn_image_processing_glpn_js__WEBPACK_IMPORTED_MODULE_10__.GLPNFeatureExtractor),
|
|
16855
|
-
/* harmony export */
|
|
16856
|
-
/* harmony export */
|
|
16857
|
-
/* harmony export */
|
|
16858
|
-
/* harmony export */
|
|
16859
|
-
/* harmony export */
|
|
16860
|
-
/* harmony export */
|
|
16861
|
-
/* harmony export */
|
|
16862
|
-
/* harmony export */
|
|
16863
|
-
/* harmony export */
|
|
16864
|
-
/* harmony export */
|
|
16865
|
-
/* harmony export */
|
|
16866
|
-
/* harmony export */
|
|
16867
|
-
/* harmony export */
|
|
16868
|
-
/* harmony export */
|
|
16869
|
-
/* harmony export */
|
|
16870
|
-
/* harmony export */
|
|
16871
|
-
/* harmony export */
|
|
16872
|
-
/* harmony export */
|
|
16873
|
-
/* harmony export */
|
|
16874
|
-
/* harmony export */
|
|
16875
|
-
/* harmony export */
|
|
16876
|
-
/* harmony export */
|
|
16877
|
-
/* harmony export */
|
|
16878
|
-
/* harmony export */
|
|
16879
|
-
/* harmony export */
|
|
16880
|
-
/* harmony export */
|
|
16881
|
-
/* harmony export */
|
|
16882
|
-
/* harmony export */
|
|
16883
|
-
/* harmony export */
|
|
16884
|
-
/* harmony export */
|
|
16885
|
-
/* harmony export */
|
|
16886
|
-
/* harmony export */
|
|
16887
|
-
/* harmony export */
|
|
16888
|
-
/* harmony export */
|
|
17344
|
+
/* harmony export */ Idefics3ImageProcessor: () => (/* reexport safe */ _idefics3_image_processing_idefics3_js__WEBPACK_IMPORTED_MODULE_11__.Idefics3ImageProcessor),
|
|
17345
|
+
/* harmony export */ JinaCLIPImageProcessor: () => (/* reexport safe */ _jina_clip_image_processing_jina_clip_js__WEBPACK_IMPORTED_MODULE_13__.JinaCLIPImageProcessor),
|
|
17346
|
+
/* harmony export */ LlavaOnevisionImageProcessor: () => (/* reexport safe */ _llava_onevision_image_processing_llava_onevision_js__WEBPACK_IMPORTED_MODULE_14__.LlavaOnevisionImageProcessor),
|
|
17347
|
+
/* harmony export */ Mask2FormerImageProcessor: () => (/* reexport safe */ _mask2former_image_processing_mask2former_js__WEBPACK_IMPORTED_MODULE_15__.Mask2FormerImageProcessor),
|
|
17348
|
+
/* harmony export */ MaskFormerFeatureExtractor: () => (/* reexport safe */ _maskformer_image_processing_maskformer_js__WEBPACK_IMPORTED_MODULE_16__.MaskFormerFeatureExtractor),
|
|
17349
|
+
/* harmony export */ MaskFormerImageProcessor: () => (/* reexport safe */ _maskformer_image_processing_maskformer_js__WEBPACK_IMPORTED_MODULE_16__.MaskFormerImageProcessor),
|
|
17350
|
+
/* harmony export */ MobileNetV1FeatureExtractor: () => (/* reexport safe */ _mobilenet_v1_image_processing_mobilenet_v1_js__WEBPACK_IMPORTED_MODULE_17__.MobileNetV1FeatureExtractor),
|
|
17351
|
+
/* harmony export */ MobileNetV1ImageProcessor: () => (/* reexport safe */ _mobilenet_v1_image_processing_mobilenet_v1_js__WEBPACK_IMPORTED_MODULE_17__.MobileNetV1ImageProcessor),
|
|
17352
|
+
/* harmony export */ MobileNetV2FeatureExtractor: () => (/* reexport safe */ _mobilenet_v2_image_processing_mobilenet_v2_js__WEBPACK_IMPORTED_MODULE_18__.MobileNetV2FeatureExtractor),
|
|
17353
|
+
/* harmony export */ MobileNetV2ImageProcessor: () => (/* reexport safe */ _mobilenet_v2_image_processing_mobilenet_v2_js__WEBPACK_IMPORTED_MODULE_18__.MobileNetV2ImageProcessor),
|
|
17354
|
+
/* harmony export */ MobileNetV3FeatureExtractor: () => (/* reexport safe */ _mobilenet_v3_image_processing_mobilenet_v3_js__WEBPACK_IMPORTED_MODULE_19__.MobileNetV3FeatureExtractor),
|
|
17355
|
+
/* harmony export */ MobileNetV3ImageProcessor: () => (/* reexport safe */ _mobilenet_v3_image_processing_mobilenet_v3_js__WEBPACK_IMPORTED_MODULE_19__.MobileNetV3ImageProcessor),
|
|
17356
|
+
/* harmony export */ MobileNetV4FeatureExtractor: () => (/* reexport safe */ _mobilenet_v4_image_processing_mobilenet_v4_js__WEBPACK_IMPORTED_MODULE_20__.MobileNetV4FeatureExtractor),
|
|
17357
|
+
/* harmony export */ MobileNetV4ImageProcessor: () => (/* reexport safe */ _mobilenet_v4_image_processing_mobilenet_v4_js__WEBPACK_IMPORTED_MODULE_20__.MobileNetV4ImageProcessor),
|
|
17358
|
+
/* harmony export */ MobileViTFeatureExtractor: () => (/* reexport safe */ _mobilevit_image_processing_mobilevit_js__WEBPACK_IMPORTED_MODULE_21__.MobileViTFeatureExtractor),
|
|
17359
|
+
/* harmony export */ MobileViTImageProcessor: () => (/* reexport safe */ _mobilevit_image_processing_mobilevit_js__WEBPACK_IMPORTED_MODULE_21__.MobileViTImageProcessor),
|
|
17360
|
+
/* harmony export */ NougatImageProcessor: () => (/* reexport safe */ _nougat_image_processing_nougat_js__WEBPACK_IMPORTED_MODULE_22__.NougatImageProcessor),
|
|
17361
|
+
/* harmony export */ OwlViTFeatureExtractor: () => (/* reexport safe */ _owlvit_image_processing_owlvit_js__WEBPACK_IMPORTED_MODULE_24__.OwlViTFeatureExtractor),
|
|
17362
|
+
/* harmony export */ OwlViTImageProcessor: () => (/* reexport safe */ _owlvit_image_processing_owlvit_js__WEBPACK_IMPORTED_MODULE_24__.OwlViTImageProcessor),
|
|
17363
|
+
/* harmony export */ Owlv2ImageProcessor: () => (/* reexport safe */ _owlv2_image_processing_owlv2_js__WEBPACK_IMPORTED_MODULE_23__.Owlv2ImageProcessor),
|
|
17364
|
+
/* harmony export */ PvtImageProcessor: () => (/* reexport safe */ _pvt_image_processing_pvt_js__WEBPACK_IMPORTED_MODULE_25__.PvtImageProcessor),
|
|
17365
|
+
/* harmony export */ Qwen2VLImageProcessor: () => (/* reexport safe */ _qwen2_vl_image_processing_qwen2_vl_js__WEBPACK_IMPORTED_MODULE_26__.Qwen2VLImageProcessor),
|
|
17366
|
+
/* harmony export */ RTDetrImageProcessor: () => (/* reexport safe */ _rt_detr_image_processing_rt_detr_js__WEBPACK_IMPORTED_MODULE_27__.RTDetrImageProcessor),
|
|
17367
|
+
/* harmony export */ SamImageProcessor: () => (/* reexport safe */ _sam_image_processing_sam_js__WEBPACK_IMPORTED_MODULE_28__.SamImageProcessor),
|
|
17368
|
+
/* harmony export */ SegformerFeatureExtractor: () => (/* reexport safe */ _segformer_image_processing_segformer_js__WEBPACK_IMPORTED_MODULE_29__.SegformerFeatureExtractor),
|
|
17369
|
+
/* harmony export */ SegformerImageProcessor: () => (/* reexport safe */ _segformer_image_processing_segformer_js__WEBPACK_IMPORTED_MODULE_29__.SegformerImageProcessor),
|
|
17370
|
+
/* harmony export */ SiglipImageProcessor: () => (/* reexport safe */ _siglip_image_processing_siglip_js__WEBPACK_IMPORTED_MODULE_30__.SiglipImageProcessor),
|
|
17371
|
+
/* harmony export */ Swin2SRImageProcessor: () => (/* reexport safe */ _swin2sr_image_processing_swin2sr_js__WEBPACK_IMPORTED_MODULE_31__.Swin2SRImageProcessor),
|
|
17372
|
+
/* harmony export */ VLMImageProcessor: () => (/* reexport safe */ _janus_image_processing_janus_js__WEBPACK_IMPORTED_MODULE_12__.VLMImageProcessor),
|
|
17373
|
+
/* harmony export */ ViTFeatureExtractor: () => (/* reexport safe */ _vit_image_processing_vit_js__WEBPACK_IMPORTED_MODULE_32__.ViTFeatureExtractor),
|
|
17374
|
+
/* harmony export */ ViTImageProcessor: () => (/* reexport safe */ _vit_image_processing_vit_js__WEBPACK_IMPORTED_MODULE_32__.ViTImageProcessor),
|
|
17375
|
+
/* harmony export */ VitMatteImageProcessor: () => (/* reexport safe */ _vitmatte_image_processing_vitmatte_js__WEBPACK_IMPORTED_MODULE_33__.VitMatteImageProcessor),
|
|
17376
|
+
/* harmony export */ VitPoseImageProcessor: () => (/* reexport safe */ _vitpose_image_processing_vitpose_js__WEBPACK_IMPORTED_MODULE_34__.VitPoseImageProcessor),
|
|
17377
|
+
/* harmony export */ YolosFeatureExtractor: () => (/* reexport safe */ _yolos_image_processing_yolos_js__WEBPACK_IMPORTED_MODULE_35__.YolosFeatureExtractor),
|
|
17378
|
+
/* harmony export */ YolosImageProcessor: () => (/* reexport safe */ _yolos_image_processing_yolos_js__WEBPACK_IMPORTED_MODULE_35__.YolosImageProcessor)
|
|
16889
17379
|
/* harmony export */ });
|
|
16890
17380
|
/* harmony import */ var _beit_image_processing_beit_js__WEBPACK_IMPORTED_MODULE_0__ = __webpack_require__(/*! ./beit/image_processing_beit.js */ "./src/models/beit/image_processing_beit.js");
|
|
16891
17381
|
/* harmony import */ var _bit_image_processing_bit_js__WEBPACK_IMPORTED_MODULE_1__ = __webpack_require__(/*! ./bit/image_processing_bit.js */ "./src/models/bit/image_processing_bit.js");
|
|
@@ -16898,30 +17388,32 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
16898
17388
|
/* harmony import */ var _dpt_image_processing_dpt_js__WEBPACK_IMPORTED_MODULE_8__ = __webpack_require__(/*! ./dpt/image_processing_dpt.js */ "./src/models/dpt/image_processing_dpt.js");
|
|
16899
17389
|
/* harmony import */ var _efficientnet_image_processing_efficientnet_js__WEBPACK_IMPORTED_MODULE_9__ = __webpack_require__(/*! ./efficientnet/image_processing_efficientnet.js */ "./src/models/efficientnet/image_processing_efficientnet.js");
|
|
16900
17390
|
/* harmony import */ var _glpn_image_processing_glpn_js__WEBPACK_IMPORTED_MODULE_10__ = __webpack_require__(/*! ./glpn/image_processing_glpn.js */ "./src/models/glpn/image_processing_glpn.js");
|
|
16901
|
-
/* harmony import */ var
|
|
16902
|
-
/* harmony import */ var
|
|
16903
|
-
/* harmony import */ var
|
|
16904
|
-
/* harmony import */ var
|
|
16905
|
-
/* harmony import */ var
|
|
16906
|
-
/* harmony import */ var
|
|
16907
|
-
/* harmony import */ var
|
|
16908
|
-
/* harmony import */ var
|
|
16909
|
-
/* harmony import */ var
|
|
16910
|
-
/* harmony import */ var
|
|
16911
|
-
/* harmony import */ var
|
|
16912
|
-
/* harmony import */ var
|
|
16913
|
-
/* harmony import */ var
|
|
16914
|
-
/* harmony import */ var
|
|
16915
|
-
/* harmony import */ var
|
|
16916
|
-
/* harmony import */ var
|
|
16917
|
-
/* harmony import */ var
|
|
16918
|
-
/* harmony import */ var
|
|
16919
|
-
/* harmony import */ var
|
|
16920
|
-
/* harmony import */ var
|
|
16921
|
-
/* harmony import */ var
|
|
16922
|
-
/* harmony import */ var
|
|
16923
|
-
/* harmony import */ var
|
|
16924
|
-
/* harmony import */ var
|
|
17391
|
+
/* harmony import */ var _idefics3_image_processing_idefics3_js__WEBPACK_IMPORTED_MODULE_11__ = __webpack_require__(/*! ./idefics3/image_processing_idefics3.js */ "./src/models/idefics3/image_processing_idefics3.js");
|
|
17392
|
+
/* harmony import */ var _janus_image_processing_janus_js__WEBPACK_IMPORTED_MODULE_12__ = __webpack_require__(/*! ./janus/image_processing_janus.js */ "./src/models/janus/image_processing_janus.js");
|
|
17393
|
+
/* harmony import */ var _jina_clip_image_processing_jina_clip_js__WEBPACK_IMPORTED_MODULE_13__ = __webpack_require__(/*! ./jina_clip/image_processing_jina_clip.js */ "./src/models/jina_clip/image_processing_jina_clip.js");
|
|
17394
|
+
/* harmony import */ var _llava_onevision_image_processing_llava_onevision_js__WEBPACK_IMPORTED_MODULE_14__ = __webpack_require__(/*! ./llava_onevision/image_processing_llava_onevision.js */ "./src/models/llava_onevision/image_processing_llava_onevision.js");
|
|
17395
|
+
/* harmony import */ var _mask2former_image_processing_mask2former_js__WEBPACK_IMPORTED_MODULE_15__ = __webpack_require__(/*! ./mask2former/image_processing_mask2former.js */ "./src/models/mask2former/image_processing_mask2former.js");
|
|
17396
|
+
/* harmony import */ var _maskformer_image_processing_maskformer_js__WEBPACK_IMPORTED_MODULE_16__ = __webpack_require__(/*! ./maskformer/image_processing_maskformer.js */ "./src/models/maskformer/image_processing_maskformer.js");
|
|
17397
|
+
/* harmony import */ var _mobilenet_v1_image_processing_mobilenet_v1_js__WEBPACK_IMPORTED_MODULE_17__ = __webpack_require__(/*! ./mobilenet_v1/image_processing_mobilenet_v1.js */ "./src/models/mobilenet_v1/image_processing_mobilenet_v1.js");
|
|
17398
|
+
/* harmony import */ var _mobilenet_v2_image_processing_mobilenet_v2_js__WEBPACK_IMPORTED_MODULE_18__ = __webpack_require__(/*! ./mobilenet_v2/image_processing_mobilenet_v2.js */ "./src/models/mobilenet_v2/image_processing_mobilenet_v2.js");
|
|
17399
|
+
/* harmony import */ var _mobilenet_v3_image_processing_mobilenet_v3_js__WEBPACK_IMPORTED_MODULE_19__ = __webpack_require__(/*! ./mobilenet_v3/image_processing_mobilenet_v3.js */ "./src/models/mobilenet_v3/image_processing_mobilenet_v3.js");
|
|
17400
|
+
/* harmony import */ var _mobilenet_v4_image_processing_mobilenet_v4_js__WEBPACK_IMPORTED_MODULE_20__ = __webpack_require__(/*! ./mobilenet_v4/image_processing_mobilenet_v4.js */ "./src/models/mobilenet_v4/image_processing_mobilenet_v4.js");
|
|
17401
|
+
/* harmony import */ var _mobilevit_image_processing_mobilevit_js__WEBPACK_IMPORTED_MODULE_21__ = __webpack_require__(/*! ./mobilevit/image_processing_mobilevit.js */ "./src/models/mobilevit/image_processing_mobilevit.js");
|
|
17402
|
+
/* harmony import */ var _nougat_image_processing_nougat_js__WEBPACK_IMPORTED_MODULE_22__ = __webpack_require__(/*! ./nougat/image_processing_nougat.js */ "./src/models/nougat/image_processing_nougat.js");
|
|
17403
|
+
/* harmony import */ var _owlv2_image_processing_owlv2_js__WEBPACK_IMPORTED_MODULE_23__ = __webpack_require__(/*! ./owlv2/image_processing_owlv2.js */ "./src/models/owlv2/image_processing_owlv2.js");
|
|
17404
|
+
/* harmony import */ var _owlvit_image_processing_owlvit_js__WEBPACK_IMPORTED_MODULE_24__ = __webpack_require__(/*! ./owlvit/image_processing_owlvit.js */ "./src/models/owlvit/image_processing_owlvit.js");
|
|
17405
|
+
/* harmony import */ var _pvt_image_processing_pvt_js__WEBPACK_IMPORTED_MODULE_25__ = __webpack_require__(/*! ./pvt/image_processing_pvt.js */ "./src/models/pvt/image_processing_pvt.js");
|
|
17406
|
+
/* harmony import */ var _qwen2_vl_image_processing_qwen2_vl_js__WEBPACK_IMPORTED_MODULE_26__ = __webpack_require__(/*! ./qwen2_vl/image_processing_qwen2_vl.js */ "./src/models/qwen2_vl/image_processing_qwen2_vl.js");
|
|
17407
|
+
/* harmony import */ var _rt_detr_image_processing_rt_detr_js__WEBPACK_IMPORTED_MODULE_27__ = __webpack_require__(/*! ./rt_detr/image_processing_rt_detr.js */ "./src/models/rt_detr/image_processing_rt_detr.js");
|
|
17408
|
+
/* harmony import */ var _sam_image_processing_sam_js__WEBPACK_IMPORTED_MODULE_28__ = __webpack_require__(/*! ./sam/image_processing_sam.js */ "./src/models/sam/image_processing_sam.js");
|
|
17409
|
+
/* harmony import */ var _segformer_image_processing_segformer_js__WEBPACK_IMPORTED_MODULE_29__ = __webpack_require__(/*! ./segformer/image_processing_segformer.js */ "./src/models/segformer/image_processing_segformer.js");
|
|
17410
|
+
/* harmony import */ var _siglip_image_processing_siglip_js__WEBPACK_IMPORTED_MODULE_30__ = __webpack_require__(/*! ./siglip/image_processing_siglip.js */ "./src/models/siglip/image_processing_siglip.js");
|
|
17411
|
+
/* harmony import */ var _swin2sr_image_processing_swin2sr_js__WEBPACK_IMPORTED_MODULE_31__ = __webpack_require__(/*! ./swin2sr/image_processing_swin2sr.js */ "./src/models/swin2sr/image_processing_swin2sr.js");
|
|
17412
|
+
/* harmony import */ var _vit_image_processing_vit_js__WEBPACK_IMPORTED_MODULE_32__ = __webpack_require__(/*! ./vit/image_processing_vit.js */ "./src/models/vit/image_processing_vit.js");
|
|
17413
|
+
/* harmony import */ var _vitmatte_image_processing_vitmatte_js__WEBPACK_IMPORTED_MODULE_33__ = __webpack_require__(/*! ./vitmatte/image_processing_vitmatte.js */ "./src/models/vitmatte/image_processing_vitmatte.js");
|
|
17414
|
+
/* harmony import */ var _vitpose_image_processing_vitpose_js__WEBPACK_IMPORTED_MODULE_34__ = __webpack_require__(/*! ./vitpose/image_processing_vitpose.js */ "./src/models/vitpose/image_processing_vitpose.js");
|
|
17415
|
+
/* harmony import */ var _yolos_image_processing_yolos_js__WEBPACK_IMPORTED_MODULE_35__ = __webpack_require__(/*! ./yolos/image_processing_yolos.js */ "./src/models/yolos/image_processing_yolos.js");
|
|
17416
|
+
|
|
16925
17417
|
|
|
16926
17418
|
|
|
16927
17419
|
|
|
@@ -17700,28 +18192,31 @@ class OwlViTProcessor extends _base_processing_utils_js__WEBPACK_IMPORTED_MODULE
|
|
|
17700
18192
|
__webpack_require__.r(__webpack_exports__);
|
|
17701
18193
|
/* harmony export */ __webpack_require__.d(__webpack_exports__, {
|
|
17702
18194
|
/* harmony export */ Florence2Processor: () => (/* reexport safe */ _florence2_processing_florence2_js__WEBPACK_IMPORTED_MODULE_0__.Florence2Processor),
|
|
17703
|
-
/* harmony export */
|
|
18195
|
+
/* harmony export */ Idefics3Processor: () => (/* reexport safe */ _idefics3_processing_idefics3_js__WEBPACK_IMPORTED_MODULE_2__.Idefics3Processor),
|
|
18196
|
+
/* harmony export */ JinaCLIPProcessor: () => (/* reexport safe */ _jina_clip_processing_jina_clip_js__WEBPACK_IMPORTED_MODULE_4__.JinaCLIPProcessor),
|
|
17704
18197
|
/* harmony export */ MgpstrProcessor: () => (/* reexport safe */ _mgp_str_processing_mgp_str_js__WEBPACK_IMPORTED_MODULE_1__.MgpstrProcessor),
|
|
17705
|
-
/* harmony export */ OwlViTProcessor: () => (/* reexport safe */
|
|
17706
|
-
/* harmony export */ PyAnnoteProcessor: () => (/* reexport safe */
|
|
17707
|
-
/* harmony export */ Qwen2VLProcessor: () => (/* reexport safe */
|
|
17708
|
-
/* harmony export */ SamProcessor: () => (/* reexport safe */
|
|
17709
|
-
/* harmony export */ SpeechT5Processor: () => (/* reexport safe */
|
|
17710
|
-
/* harmony export */ VLChatProcessor: () => (/* reexport safe */
|
|
17711
|
-
/* harmony export */ Wav2Vec2ProcessorWithLM: () => (/* reexport safe */
|
|
17712
|
-
/* harmony export */ WhisperProcessor: () => (/* reexport safe */
|
|
18198
|
+
/* harmony export */ OwlViTProcessor: () => (/* reexport safe */ _owlvit_processing_owlvit_js__WEBPACK_IMPORTED_MODULE_5__.OwlViTProcessor),
|
|
18199
|
+
/* harmony export */ PyAnnoteProcessor: () => (/* reexport safe */ _pyannote_processing_pyannote_js__WEBPACK_IMPORTED_MODULE_6__.PyAnnoteProcessor),
|
|
18200
|
+
/* harmony export */ Qwen2VLProcessor: () => (/* reexport safe */ _qwen2_vl_processing_qwen2_vl_js__WEBPACK_IMPORTED_MODULE_7__.Qwen2VLProcessor),
|
|
18201
|
+
/* harmony export */ SamProcessor: () => (/* reexport safe */ _sam_processing_sam_js__WEBPACK_IMPORTED_MODULE_8__.SamProcessor),
|
|
18202
|
+
/* harmony export */ SpeechT5Processor: () => (/* reexport safe */ _speecht5_processing_speecht5_js__WEBPACK_IMPORTED_MODULE_9__.SpeechT5Processor),
|
|
18203
|
+
/* harmony export */ VLChatProcessor: () => (/* reexport safe */ _janus_processing_janus_js__WEBPACK_IMPORTED_MODULE_3__.VLChatProcessor),
|
|
18204
|
+
/* harmony export */ Wav2Vec2ProcessorWithLM: () => (/* reexport safe */ _wav2vec2_processing_wav2vec2_js__WEBPACK_IMPORTED_MODULE_10__.Wav2Vec2ProcessorWithLM),
|
|
18205
|
+
/* harmony export */ WhisperProcessor: () => (/* reexport safe */ _whisper_processing_whisper_js__WEBPACK_IMPORTED_MODULE_11__.WhisperProcessor)
|
|
17713
18206
|
/* harmony export */ });
|
|
17714
18207
|
/* harmony import */ var _florence2_processing_florence2_js__WEBPACK_IMPORTED_MODULE_0__ = __webpack_require__(/*! ./florence2/processing_florence2.js */ "./src/models/florence2/processing_florence2.js");
|
|
17715
18208
|
/* harmony import */ var _mgp_str_processing_mgp_str_js__WEBPACK_IMPORTED_MODULE_1__ = __webpack_require__(/*! ./mgp_str/processing_mgp_str.js */ "./src/models/mgp_str/processing_mgp_str.js");
|
|
17716
|
-
/* harmony import */ var
|
|
17717
|
-
/* harmony import */ var
|
|
17718
|
-
/* harmony import */ var
|
|
17719
|
-
/* harmony import */ var
|
|
17720
|
-
/* harmony import */ var
|
|
17721
|
-
/* harmony import */ var
|
|
17722
|
-
/* harmony import */ var
|
|
17723
|
-
/* harmony import */ var
|
|
17724
|
-
/* harmony import */ var
|
|
18209
|
+
/* harmony import */ var _idefics3_processing_idefics3_js__WEBPACK_IMPORTED_MODULE_2__ = __webpack_require__(/*! ./idefics3/processing_idefics3.js */ "./src/models/idefics3/processing_idefics3.js");
|
|
18210
|
+
/* harmony import */ var _janus_processing_janus_js__WEBPACK_IMPORTED_MODULE_3__ = __webpack_require__(/*! ./janus/processing_janus.js */ "./src/models/janus/processing_janus.js");
|
|
18211
|
+
/* harmony import */ var _jina_clip_processing_jina_clip_js__WEBPACK_IMPORTED_MODULE_4__ = __webpack_require__(/*! ./jina_clip/processing_jina_clip.js */ "./src/models/jina_clip/processing_jina_clip.js");
|
|
18212
|
+
/* harmony import */ var _owlvit_processing_owlvit_js__WEBPACK_IMPORTED_MODULE_5__ = __webpack_require__(/*! ./owlvit/processing_owlvit.js */ "./src/models/owlvit/processing_owlvit.js");
|
|
18213
|
+
/* harmony import */ var _pyannote_processing_pyannote_js__WEBPACK_IMPORTED_MODULE_6__ = __webpack_require__(/*! ./pyannote/processing_pyannote.js */ "./src/models/pyannote/processing_pyannote.js");
|
|
18214
|
+
/* harmony import */ var _qwen2_vl_processing_qwen2_vl_js__WEBPACK_IMPORTED_MODULE_7__ = __webpack_require__(/*! ./qwen2_vl/processing_qwen2_vl.js */ "./src/models/qwen2_vl/processing_qwen2_vl.js");
|
|
18215
|
+
/* harmony import */ var _sam_processing_sam_js__WEBPACK_IMPORTED_MODULE_8__ = __webpack_require__(/*! ./sam/processing_sam.js */ "./src/models/sam/processing_sam.js");
|
|
18216
|
+
/* harmony import */ var _speecht5_processing_speecht5_js__WEBPACK_IMPORTED_MODULE_9__ = __webpack_require__(/*! ./speecht5/processing_speecht5.js */ "./src/models/speecht5/processing_speecht5.js");
|
|
18217
|
+
/* harmony import */ var _wav2vec2_processing_wav2vec2_js__WEBPACK_IMPORTED_MODULE_10__ = __webpack_require__(/*! ./wav2vec2/processing_wav2vec2.js */ "./src/models/wav2vec2/processing_wav2vec2.js");
|
|
18218
|
+
/* harmony import */ var _whisper_processing_whisper_js__WEBPACK_IMPORTED_MODULE_11__ = __webpack_require__(/*! ./whisper/processing_whisper.js */ "./src/models/whisper/processing_whisper.js");
|
|
18219
|
+
|
|
17725
18220
|
|
|
17726
18221
|
|
|
17727
18222
|
|
|
@@ -28165,6 +28660,7 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
28165
28660
|
/* harmony export */ __webpack_require__.d(__webpack_exports__, {
|
|
28166
28661
|
/* harmony export */ calculateDimensions: () => (/* binding */ calculateDimensions),
|
|
28167
28662
|
/* harmony export */ calculateReflectOffset: () => (/* binding */ calculateReflectOffset),
|
|
28663
|
+
/* harmony export */ count: () => (/* binding */ count),
|
|
28168
28664
|
/* harmony export */ dispatchCallback: () => (/* binding */ dispatchCallback),
|
|
28169
28665
|
/* harmony export */ escapeRegExp: () => (/* binding */ escapeRegExp),
|
|
28170
28666
|
/* harmony export */ isIntegralNumber: () => (/* binding */ isIntegralNumber),
|
|
@@ -28367,6 +28863,20 @@ function len(s) {
|
|
|
28367
28863
|
return length;
|
|
28368
28864
|
}
|
|
28369
28865
|
|
|
28866
|
+
/**
|
|
28867
|
+
* Count the occurrences of a value in an array or string.
|
|
28868
|
+
* This mimics the behavior of Python's `count` method.
|
|
28869
|
+
* @param {any[]|string} arr The array or string to search.
|
|
28870
|
+
* @param {any} value The value to count.
|
|
28871
|
+
*/
|
|
28872
|
+
function count(arr, value) {
|
|
28873
|
+
let count = 0;
|
|
28874
|
+
for (const v of arr) {
|
|
28875
|
+
if (v === value) ++count;
|
|
28876
|
+
}
|
|
28877
|
+
return count;
|
|
28878
|
+
}
|
|
28879
|
+
|
|
28370
28880
|
|
|
28371
28881
|
/***/ }),
|
|
28372
28882
|
|
|
@@ -28920,6 +29430,7 @@ const isWebGpuFp16Supported = (function () {
|
|
|
28920
29430
|
})();
|
|
28921
29431
|
|
|
28922
29432
|
const DATA_TYPES = Object.freeze({
|
|
29433
|
+
auto: 'auto', // Auto-detect based on environment
|
|
28923
29434
|
fp32: 'fp32',
|
|
28924
29435
|
fp16: 'fp16',
|
|
28925
29436
|
q8: 'q8',
|
|
@@ -28936,7 +29447,7 @@ const DEFAULT_DEVICE_DTYPE_MAPPING = Object.freeze({
|
|
|
28936
29447
|
[_devices_js__WEBPACK_IMPORTED_MODULE_1__.DEVICE_TYPES.wasm]: DATA_TYPES.q8,
|
|
28937
29448
|
});
|
|
28938
29449
|
|
|
28939
|
-
/** @type {Record<DataType, string>} */
|
|
29450
|
+
/** @type {Record<Exclude<DataType, "auto">, string>} */
|
|
28940
29451
|
const DEFAULT_DTYPE_SUFFIX_MAPPING = Object.freeze({
|
|
28941
29452
|
[DATA_TYPES.fp32]: '',
|
|
28942
29453
|
[DATA_TYPES.fp16]: '_fp16',
|
|
@@ -29704,7 +30215,8 @@ function pathJoin(...parts) {
|
|
|
29704
30215
|
"use strict";
|
|
29705
30216
|
__webpack_require__.r(__webpack_exports__);
|
|
29706
30217
|
/* harmony export */ __webpack_require__.d(__webpack_exports__, {
|
|
29707
|
-
/* harmony export */ RawImage: () => (/* binding */ RawImage)
|
|
30218
|
+
/* harmony export */ RawImage: () => (/* binding */ RawImage),
|
|
30219
|
+
/* harmony export */ load_image: () => (/* binding */ load_image)
|
|
29708
30220
|
/* harmony export */ });
|
|
29709
30221
|
/* harmony import */ var _core_js__WEBPACK_IMPORTED_MODULE_0__ = __webpack_require__(/*! ./core.js */ "./src/utils/core.js");
|
|
29710
30222
|
/* harmony import */ var _hub_js__WEBPACK_IMPORTED_MODULE_1__ = __webpack_require__(/*! ./hub.js */ "./src/utils/hub.js");
|
|
@@ -29729,13 +30241,11 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
29729
30241
|
// Will be empty (or not used) if running in browser or web-worker
|
|
29730
30242
|
|
|
29731
30243
|
|
|
29732
|
-
const BROWSER_ENV = typeof self !== 'undefined';
|
|
29733
|
-
const WEBWORKER_ENV = BROWSER_ENV && self.constructor.name === 'DedicatedWorkerGlobalScope';
|
|
29734
|
-
|
|
29735
30244
|
let createCanvasFunction;
|
|
29736
30245
|
let ImageDataClass;
|
|
29737
30246
|
let loadImageFunction;
|
|
29738
|
-
|
|
30247
|
+
const IS_BROWSER_OR_WEBWORKER = _env_js__WEBPACK_IMPORTED_MODULE_2__.apis.IS_BROWSER_ENV || _env_js__WEBPACK_IMPORTED_MODULE_2__.apis.IS_WEBWORKER_ENV;
|
|
30248
|
+
if (IS_BROWSER_OR_WEBWORKER) {
|
|
29739
30249
|
// Running in browser or web-worker
|
|
29740
30250
|
createCanvasFunction = (/** @type {number} */ width, /** @type {number} */ height) => {
|
|
29741
30251
|
if (!self.OffscreenCanvas) {
|
|
@@ -29845,7 +30355,7 @@ class RawImage {
|
|
|
29845
30355
|
* @returns {RawImage} The image object.
|
|
29846
30356
|
*/
|
|
29847
30357
|
static fromCanvas(canvas) {
|
|
29848
|
-
if (!
|
|
30358
|
+
if (!IS_BROWSER_OR_WEBWORKER) {
|
|
29849
30359
|
throw new Error('fromCanvas() is only supported in browser environments.')
|
|
29850
30360
|
}
|
|
29851
30361
|
|
|
@@ -29874,7 +30384,7 @@ class RawImage {
|
|
|
29874
30384
|
* @returns {Promise<RawImage>} The image object.
|
|
29875
30385
|
*/
|
|
29876
30386
|
static async fromBlob(blob) {
|
|
29877
|
-
if (
|
|
30387
|
+
if (IS_BROWSER_OR_WEBWORKER) {
|
|
29878
30388
|
// Running in environment with canvas
|
|
29879
30389
|
const img = await loadImageFunction(blob);
|
|
29880
30390
|
|
|
@@ -30052,7 +30562,7 @@ class RawImage {
|
|
|
30052
30562
|
height = (width / this.width) * this.height;
|
|
30053
30563
|
}
|
|
30054
30564
|
|
|
30055
|
-
if (
|
|
30565
|
+
if (IS_BROWSER_OR_WEBWORKER) {
|
|
30056
30566
|
// TODO use `resample` in browser environment
|
|
30057
30567
|
|
|
30058
30568
|
// Store number of channels before resizing
|
|
@@ -30125,7 +30635,7 @@ class RawImage {
|
|
|
30125
30635
|
return this;
|
|
30126
30636
|
}
|
|
30127
30637
|
|
|
30128
|
-
if (
|
|
30638
|
+
if (IS_BROWSER_OR_WEBWORKER) {
|
|
30129
30639
|
// Store number of channels before padding
|
|
30130
30640
|
const numChannels = this.channels;
|
|
30131
30641
|
|
|
@@ -30174,7 +30684,7 @@ class RawImage {
|
|
|
30174
30684
|
const crop_width = x_max - x_min + 1;
|
|
30175
30685
|
const crop_height = y_max - y_min + 1;
|
|
30176
30686
|
|
|
30177
|
-
if (
|
|
30687
|
+
if (IS_BROWSER_OR_WEBWORKER) {
|
|
30178
30688
|
// Store number of channels before resizing
|
|
30179
30689
|
const numChannels = this.channels;
|
|
30180
30690
|
|
|
@@ -30222,7 +30732,7 @@ class RawImage {
|
|
|
30222
30732
|
const height_offset = (this.height - crop_height) / 2;
|
|
30223
30733
|
|
|
30224
30734
|
|
|
30225
|
-
if (
|
|
30735
|
+
if (IS_BROWSER_OR_WEBWORKER) {
|
|
30226
30736
|
// Store number of channels before resizing
|
|
30227
30737
|
const numChannels = this.channels;
|
|
30228
30738
|
|
|
@@ -30327,7 +30837,7 @@ class RawImage {
|
|
|
30327
30837
|
}
|
|
30328
30838
|
|
|
30329
30839
|
async toBlob(type = 'image/png', quality = 1) {
|
|
30330
|
-
if (!
|
|
30840
|
+
if (!IS_BROWSER_OR_WEBWORKER) {
|
|
30331
30841
|
throw new Error('toBlob() is only supported in browser environments.')
|
|
30332
30842
|
}
|
|
30333
30843
|
|
|
@@ -30353,7 +30863,7 @@ class RawImage {
|
|
|
30353
30863
|
}
|
|
30354
30864
|
|
|
30355
30865
|
toCanvas() {
|
|
30356
|
-
if (!
|
|
30866
|
+
if (!IS_BROWSER_OR_WEBWORKER) {
|
|
30357
30867
|
throw new Error('toCanvas() is only supported in browser environments.')
|
|
30358
30868
|
}
|
|
30359
30869
|
|
|
@@ -30457,8 +30967,8 @@ class RawImage {
|
|
|
30457
30967
|
*/
|
|
30458
30968
|
async save(path) {
|
|
30459
30969
|
|
|
30460
|
-
if (
|
|
30461
|
-
if (
|
|
30970
|
+
if (IS_BROWSER_OR_WEBWORKER) {
|
|
30971
|
+
if (_env_js__WEBPACK_IMPORTED_MODULE_2__.apis.IS_WEBWORKER_ENV) {
|
|
30462
30972
|
throw new Error('Unable to save an image from a Web Worker.')
|
|
30463
30973
|
}
|
|
30464
30974
|
|
|
@@ -30494,7 +31004,7 @@ class RawImage {
|
|
|
30494
31004
|
}
|
|
30495
31005
|
|
|
30496
31006
|
toSharp() {
|
|
30497
|
-
if (
|
|
31007
|
+
if (IS_BROWSER_OR_WEBWORKER) {
|
|
30498
31008
|
throw new Error('toSharp() is only supported in server-side environments.')
|
|
30499
31009
|
}
|
|
30500
31010
|
|
|
@@ -30508,6 +31018,11 @@ class RawImage {
|
|
|
30508
31018
|
}
|
|
30509
31019
|
}
|
|
30510
31020
|
|
|
31021
|
+
/**
|
|
31022
|
+
* Helper function to load an image from a URL, path, etc.
|
|
31023
|
+
*/
|
|
31024
|
+
const load_image = RawImage.read.bind(RawImage);
|
|
31025
|
+
|
|
30511
31026
|
|
|
30512
31027
|
/***/ }),
|
|
30513
31028
|
|
|
@@ -31660,6 +32175,8 @@ const DataTypeMap = Object.freeze({
|
|
|
31660
32175
|
int64: BigInt64Array,
|
|
31661
32176
|
uint64: BigUint64Array,
|
|
31662
32177
|
bool: Uint8Array,
|
|
32178
|
+
uint4: Uint8Array,
|
|
32179
|
+
int4: Int8Array,
|
|
31663
32180
|
});
|
|
31664
32181
|
|
|
31665
32182
|
/**
|
|
@@ -32981,7 +33498,7 @@ function fullHelper(size, fill_value, dtype, cls) {
|
|
|
32981
33498
|
/**
|
|
32982
33499
|
* Creates a tensor of size size filled with fill_value. The tensor's dtype is inferred from fill_value.
|
|
32983
33500
|
* @param {number[]} size A sequence of integers defining the shape of the output tensor.
|
|
32984
|
-
* @param {number|bigint} fill_value The value to fill the output tensor with.
|
|
33501
|
+
* @param {number|bigint|boolean} fill_value The value to fill the output tensor with.
|
|
32985
33502
|
* @returns {Tensor} The filled tensor.
|
|
32986
33503
|
*/
|
|
32987
33504
|
function full(size, fill_value) {
|
|
@@ -32993,6 +33510,9 @@ function full(size, fill_value) {
|
|
|
32993
33510
|
} else if (typeof fill_value === 'bigint') {
|
|
32994
33511
|
dtype = 'int64';
|
|
32995
33512
|
typedArrayCls = BigInt64Array;
|
|
33513
|
+
} else if (typeof fill_value === 'boolean') {
|
|
33514
|
+
dtype = 'bool';
|
|
33515
|
+
typedArrayCls = Uint8Array;
|
|
32996
33516
|
} else {
|
|
32997
33517
|
// TODO: support other dtypes
|
|
32998
33518
|
throw new Error(`Unsupported data type: ${typeof fill_value}`);
|
|
@@ -33443,6 +33963,10 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
33443
33963
|
/* harmony export */ HubertForSequenceClassification: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.HubertForSequenceClassification),
|
|
33444
33964
|
/* harmony export */ HubertModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.HubertModel),
|
|
33445
33965
|
/* harmony export */ HubertPreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.HubertPreTrainedModel),
|
|
33966
|
+
/* harmony export */ Idefics3ForConditionalGeneration: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.Idefics3ForConditionalGeneration),
|
|
33967
|
+
/* harmony export */ Idefics3ImageProcessor: () => (/* reexport safe */ _models_image_processors_js__WEBPACK_IMPORTED_MODULE_13__.Idefics3ImageProcessor),
|
|
33968
|
+
/* harmony export */ Idefics3PreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.Idefics3PreTrainedModel),
|
|
33969
|
+
/* harmony export */ Idefics3Processor: () => (/* reexport safe */ _models_processors_js__WEBPACK_IMPORTED_MODULE_16__.Idefics3Processor),
|
|
33446
33970
|
/* harmony export */ ImageClassificationPipeline: () => (/* reexport safe */ _pipelines_js__WEBPACK_IMPORTED_MODULE_1__.ImageClassificationPipeline),
|
|
33447
33971
|
/* harmony export */ ImageFeatureExtractionPipeline: () => (/* reexport safe */ _pipelines_js__WEBPACK_IMPORTED_MODULE_1__.ImageFeatureExtractionPipeline),
|
|
33448
33972
|
/* harmony export */ ImageFeatureExtractor: () => (/* reexport safe */ _models_feature_extractors_js__WEBPACK_IMPORTED_MODULE_10__.ImageFeatureExtractor),
|
|
@@ -33834,6 +34358,7 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
33834
34358
|
/* harmony export */ interpolate_data: () => (/* reexport safe */ _utils_maths_js__WEBPACK_IMPORTED_MODULE_8__.interpolate_data),
|
|
33835
34359
|
/* harmony export */ is_chinese_char: () => (/* reexport safe */ _tokenizers_js__WEBPACK_IMPORTED_MODULE_3__.is_chinese_char),
|
|
33836
34360
|
/* harmony export */ layer_norm: () => (/* reexport safe */ _utils_tensor_js__WEBPACK_IMPORTED_MODULE_7__.layer_norm),
|
|
34361
|
+
/* harmony export */ load_image: () => (/* reexport safe */ _utils_image_js__WEBPACK_IMPORTED_MODULE_6__.load_image),
|
|
33837
34362
|
/* harmony export */ log_softmax: () => (/* reexport safe */ _utils_maths_js__WEBPACK_IMPORTED_MODULE_8__.log_softmax),
|
|
33838
34363
|
/* harmony export */ magnitude: () => (/* reexport safe */ _utils_maths_js__WEBPACK_IMPORTED_MODULE_8__.magnitude),
|
|
33839
34364
|
/* harmony export */ matmul: () => (/* reexport safe */ _utils_tensor_js__WEBPACK_IMPORTED_MODULE_7__.matmul),
|