@huggingface/transformers 3.0.0-alpha.14 → 3.0.0-alpha.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +12 -6
- package/dist/ort-wasm-simd-threaded.jsep.wasm +0 -0
- package/dist/transformers.cjs +571 -353
- package/dist/transformers.cjs.map +1 -1
- package/dist/transformers.js +1000 -735
- package/dist/transformers.js.map +1 -1
- package/dist/transformers.min.cjs +11 -11
- package/dist/transformers.min.cjs.map +1 -1
- package/dist/transformers.min.js +14 -14
- package/dist/transformers.min.js.map +1 -1
- package/dist/transformers.min.mjs +52 -52
- package/dist/transformers.min.mjs.map +1 -1
- package/dist/transformers.mjs +592 -354
- package/dist/transformers.mjs.map +1 -1
- package/package.json +4 -5
- package/src/env.js +4 -4
- package/src/models.js +108 -3
- package/src/pipelines.js +5 -4
- package/src/processors.js +313 -285
- package/src/tokenizers.js +89 -53
- package/src/utils/maths.js +13 -4
- package/types/models.d.ts +62 -0
- package/types/models.d.ts.map +1 -1
- package/types/pipelines.d.ts.map +1 -1
- package/types/processors.d.ts +42 -52
- package/types/processors.d.ts.map +1 -1
- package/types/tokenizers.d.ts +23 -1
- package/types/tokenizers.d.ts.map +1 -1
- package/types/utils/maths.d.ts.map +1 -1
package/dist/transformers.cjs
CHANGED
|
@@ -3779,7 +3779,7 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
3779
3779
|
// Licensed under the MIT License.
|
|
3780
3780
|
// This file is generated by /js/scripts/update-version.ts
|
|
3781
3781
|
// Do not modify file content manually.
|
|
3782
|
-
const version = '1.19.
|
|
3782
|
+
const version = '1.19.2';
|
|
3783
3783
|
//# sourceMappingURL=version.js.map
|
|
3784
3784
|
|
|
3785
3785
|
/***/ }),
|
|
@@ -4437,7 +4437,7 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
4437
4437
|
|
|
4438
4438
|
|
|
4439
4439
|
|
|
4440
|
-
const VERSION = '3.0.0-alpha.
|
|
4440
|
+
const VERSION = '3.0.0-alpha.15';
|
|
4441
4441
|
|
|
4442
4442
|
// Check if various APIs are available (depends on environment)
|
|
4443
4443
|
const IS_BROWSER_ENV = typeof self !== 'undefined';
|
|
@@ -4484,19 +4484,19 @@ const apis = Object.freeze({
|
|
|
4484
4484
|
});
|
|
4485
4485
|
|
|
4486
4486
|
const RUNNING_LOCALLY = IS_FS_AVAILABLE && IS_PATH_AVAILABLE;
|
|
4487
|
-
const
|
|
4487
|
+
const dirname__ = RUNNING_LOCALLY
|
|
4488
4488
|
? path__WEBPACK_IMPORTED_MODULE_1__.dirname(path__WEBPACK_IMPORTED_MODULE_1__.dirname(url__WEBPACK_IMPORTED_MODULE_2__.fileURLToPath("file:///workspaces/transformers.js/src/env.js")))
|
|
4489
4489
|
: './';
|
|
4490
4490
|
|
|
4491
4491
|
// Only used for environments with access to file system
|
|
4492
4492
|
const DEFAULT_CACHE_DIR = RUNNING_LOCALLY
|
|
4493
|
-
? path__WEBPACK_IMPORTED_MODULE_1__.join(
|
|
4493
|
+
? path__WEBPACK_IMPORTED_MODULE_1__.join(dirname__, '/.cache/')
|
|
4494
4494
|
: null;
|
|
4495
4495
|
|
|
4496
4496
|
// Set local model path, based on available APIs
|
|
4497
4497
|
const DEFAULT_LOCAL_MODEL_PATH = '/models/';
|
|
4498
4498
|
const localModelPath = RUNNING_LOCALLY
|
|
4499
|
-
? path__WEBPACK_IMPORTED_MODULE_1__.join(
|
|
4499
|
+
? path__WEBPACK_IMPORTED_MODULE_1__.join(dirname__, DEFAULT_LOCAL_MODEL_PATH)
|
|
4500
4500
|
: DEFAULT_LOCAL_MODEL_PATH;
|
|
4501
4501
|
|
|
4502
4502
|
/**
|
|
@@ -6382,6 +6382,7 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
6382
6382
|
/* harmony export */ AutoModelForTextToSpectrogram: () => (/* binding */ AutoModelForTextToSpectrogram),
|
|
6383
6383
|
/* harmony export */ AutoModelForTextToWaveform: () => (/* binding */ AutoModelForTextToWaveform),
|
|
6384
6384
|
/* harmony export */ AutoModelForTokenClassification: () => (/* binding */ AutoModelForTokenClassification),
|
|
6385
|
+
/* harmony export */ AutoModelForUniversalSegmentation: () => (/* binding */ AutoModelForUniversalSegmentation),
|
|
6385
6386
|
/* harmony export */ AutoModelForVision2Seq: () => (/* binding */ AutoModelForVision2Seq),
|
|
6386
6387
|
/* harmony export */ AutoModelForXVector: () => (/* binding */ AutoModelForXVector),
|
|
6387
6388
|
/* harmony export */ AutoModelForZeroShotObjectDetection: () => (/* binding */ AutoModelForZeroShotObjectDetection),
|
|
@@ -6413,7 +6414,9 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
6413
6414
|
/* harmony export */ CLIPSegForImageSegmentation: () => (/* binding */ CLIPSegForImageSegmentation),
|
|
6414
6415
|
/* harmony export */ CLIPSegModel: () => (/* binding */ CLIPSegModel),
|
|
6415
6416
|
/* harmony export */ CLIPSegPreTrainedModel: () => (/* binding */ CLIPSegPreTrainedModel),
|
|
6417
|
+
/* harmony export */ CLIPTextModel: () => (/* binding */ CLIPTextModel),
|
|
6416
6418
|
/* harmony export */ CLIPTextModelWithProjection: () => (/* binding */ CLIPTextModelWithProjection),
|
|
6419
|
+
/* harmony export */ CLIPVisionModel: () => (/* binding */ CLIPVisionModel),
|
|
6417
6420
|
/* harmony export */ CLIPVisionModelWithProjection: () => (/* binding */ CLIPVisionModelWithProjection),
|
|
6418
6421
|
/* harmony export */ CamembertForMaskedLM: () => (/* binding */ CamembertForMaskedLM),
|
|
6419
6422
|
/* harmony export */ CamembertForQuestionAnswering: () => (/* binding */ CamembertForQuestionAnswering),
|
|
@@ -6462,6 +6465,8 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
6462
6465
|
/* harmony export */ DebertaV2ForTokenClassification: () => (/* binding */ DebertaV2ForTokenClassification),
|
|
6463
6466
|
/* harmony export */ DebertaV2Model: () => (/* binding */ DebertaV2Model),
|
|
6464
6467
|
/* harmony export */ DebertaV2PreTrainedModel: () => (/* binding */ DebertaV2PreTrainedModel),
|
|
6468
|
+
/* harmony export */ DecisionTransformerModel: () => (/* binding */ DecisionTransformerModel),
|
|
6469
|
+
/* harmony export */ DecisionTransformerPreTrainedModel: () => (/* binding */ DecisionTransformerPreTrainedModel),
|
|
6465
6470
|
/* harmony export */ DeiTForImageClassification: () => (/* binding */ DeiTForImageClassification),
|
|
6466
6471
|
/* harmony export */ DeiTModel: () => (/* binding */ DeiTModel),
|
|
6467
6472
|
/* harmony export */ DeiTPreTrainedModel: () => (/* binding */ DeiTPreTrainedModel),
|
|
@@ -6530,6 +6535,8 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
6530
6535
|
/* harmony export */ GemmaForCausalLM: () => (/* binding */ GemmaForCausalLM),
|
|
6531
6536
|
/* harmony export */ GemmaModel: () => (/* binding */ GemmaModel),
|
|
6532
6537
|
/* harmony export */ GemmaPreTrainedModel: () => (/* binding */ GemmaPreTrainedModel),
|
|
6538
|
+
/* harmony export */ GroupViTModel: () => (/* binding */ GroupViTModel),
|
|
6539
|
+
/* harmony export */ GroupViTPreTrainedModel: () => (/* binding */ GroupViTPreTrainedModel),
|
|
6533
6540
|
/* harmony export */ HieraForImageClassification: () => (/* binding */ HieraForImageClassification),
|
|
6534
6541
|
/* harmony export */ HieraModel: () => (/* binding */ HieraModel),
|
|
6535
6542
|
/* harmony export */ HieraPreTrainedModel: () => (/* binding */ HieraPreTrainedModel),
|
|
@@ -6569,6 +6576,9 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
6569
6576
|
/* harmony export */ MarianMTModel: () => (/* binding */ MarianMTModel),
|
|
6570
6577
|
/* harmony export */ MarianModel: () => (/* binding */ MarianModel),
|
|
6571
6578
|
/* harmony export */ MarianPreTrainedModel: () => (/* binding */ MarianPreTrainedModel),
|
|
6579
|
+
/* harmony export */ MaskFormerForInstanceSegmentation: () => (/* binding */ MaskFormerForInstanceSegmentation),
|
|
6580
|
+
/* harmony export */ MaskFormerModel: () => (/* binding */ MaskFormerModel),
|
|
6581
|
+
/* harmony export */ MaskFormerPreTrainedModel: () => (/* binding */ MaskFormerPreTrainedModel),
|
|
6572
6582
|
/* harmony export */ MaskedLMOutput: () => (/* binding */ MaskedLMOutput),
|
|
6573
6583
|
/* harmony export */ MistralForCausalLM: () => (/* binding */ MistralForCausalLM),
|
|
6574
6584
|
/* harmony export */ MistralModel: () => (/* binding */ MistralModel),
|
|
@@ -6627,6 +6637,9 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
6627
6637
|
/* harmony export */ PhiPreTrainedModel: () => (/* binding */ PhiPreTrainedModel),
|
|
6628
6638
|
/* harmony export */ PreTrainedModel: () => (/* binding */ PreTrainedModel),
|
|
6629
6639
|
/* harmony export */ PretrainedMixin: () => (/* binding */ PretrainedMixin),
|
|
6640
|
+
/* harmony export */ PvtForImageClassification: () => (/* binding */ PvtForImageClassification),
|
|
6641
|
+
/* harmony export */ PvtModel: () => (/* binding */ PvtModel),
|
|
6642
|
+
/* harmony export */ PvtPreTrainedModel: () => (/* binding */ PvtPreTrainedModel),
|
|
6630
6643
|
/* harmony export */ PyAnnoteForAudioFrameClassification: () => (/* binding */ PyAnnoteForAudioFrameClassification),
|
|
6631
6644
|
/* harmony export */ PyAnnoteModel: () => (/* binding */ PyAnnoteModel),
|
|
6632
6645
|
/* harmony export */ PyAnnotePreTrainedModel: () => (/* binding */ PyAnnotePreTrainedModel),
|
|
@@ -6712,6 +6725,11 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
6712
6725
|
/* harmony export */ UniSpeechSatModel: () => (/* binding */ UniSpeechSatModel),
|
|
6713
6726
|
/* harmony export */ UniSpeechSatPreTrainedModel: () => (/* binding */ UniSpeechSatPreTrainedModel),
|
|
6714
6727
|
/* harmony export */ ViTForImageClassification: () => (/* binding */ ViTForImageClassification),
|
|
6728
|
+
/* harmony export */ ViTMAEModel: () => (/* binding */ ViTMAEModel),
|
|
6729
|
+
/* harmony export */ ViTMAEPreTrainedModel: () => (/* binding */ ViTMAEPreTrainedModel),
|
|
6730
|
+
/* harmony export */ ViTMSNForImageClassification: () => (/* binding */ ViTMSNForImageClassification),
|
|
6731
|
+
/* harmony export */ ViTMSNModel: () => (/* binding */ ViTMSNModel),
|
|
6732
|
+
/* harmony export */ ViTMSNPreTrainedModel: () => (/* binding */ ViTMSNPreTrainedModel),
|
|
6715
6733
|
/* harmony export */ ViTModel: () => (/* binding */ ViTModel),
|
|
6716
6734
|
/* harmony export */ ViTPreTrainedModel: () => (/* binding */ ViTPreTrainedModel),
|
|
6717
6735
|
/* harmony export */ VisionEncoderDecoderModel: () => (/* binding */ VisionEncoderDecoderModel),
|
|
@@ -10227,6 +10245,18 @@ class CLIPPreTrainedModel extends PreTrainedModel { }
|
|
|
10227
10245
|
*/
|
|
10228
10246
|
class CLIPModel extends CLIPPreTrainedModel { }
|
|
10229
10247
|
|
|
10248
|
+
/**
|
|
10249
|
+
* The text model from CLIP without any head or projection on top.
|
|
10250
|
+
*/
|
|
10251
|
+
class CLIPTextModel extends CLIPPreTrainedModel {
|
|
10252
|
+
/** @type {PreTrainedModel.from_pretrained} */
|
|
10253
|
+
static async from_pretrained(pretrained_model_name_or_path, options = {}) {
|
|
10254
|
+
// Update default model file name if not provided
|
|
10255
|
+
options.model_file_name ??= 'text_model';
|
|
10256
|
+
return super.from_pretrained(pretrained_model_name_or_path, options);
|
|
10257
|
+
}
|
|
10258
|
+
}
|
|
10259
|
+
|
|
10230
10260
|
/**
|
|
10231
10261
|
* CLIP Text Model with a projection layer on top (a linear layer on top of the pooled output)
|
|
10232
10262
|
*
|
|
@@ -10254,7 +10284,6 @@ class CLIPModel extends CLIPPreTrainedModel { }
|
|
|
10254
10284
|
* ```
|
|
10255
10285
|
*/
|
|
10256
10286
|
class CLIPTextModelWithProjection extends CLIPPreTrainedModel {
|
|
10257
|
-
|
|
10258
10287
|
/** @type {PreTrainedModel.from_pretrained} */
|
|
10259
10288
|
static async from_pretrained(pretrained_model_name_or_path, options = {}) {
|
|
10260
10289
|
// Update default model file name if not provided
|
|
@@ -10263,6 +10292,18 @@ class CLIPTextModelWithProjection extends CLIPPreTrainedModel {
|
|
|
10263
10292
|
}
|
|
10264
10293
|
}
|
|
10265
10294
|
|
|
10295
|
+
/**
|
|
10296
|
+
* The vision model from CLIP without any head or projection on top.
|
|
10297
|
+
*/
|
|
10298
|
+
class CLIPVisionModel extends CLIPPreTrainedModel {
|
|
10299
|
+
/** @type {PreTrainedModel.from_pretrained} */
|
|
10300
|
+
static async from_pretrained(pretrained_model_name_or_path, options = {}) {
|
|
10301
|
+
// Update default model file name if not provided
|
|
10302
|
+
options.model_file_name ??= 'vision_model';
|
|
10303
|
+
return super.from_pretrained(pretrained_model_name_or_path, options);
|
|
10304
|
+
}
|
|
10305
|
+
}
|
|
10306
|
+
|
|
10266
10307
|
/**
|
|
10267
10308
|
* CLIP Vision Model with a projection layer on top (a linear layer on top of the pooled output)
|
|
10268
10309
|
*
|
|
@@ -10929,6 +10970,43 @@ class ViTForImageClassification extends ViTPreTrainedModel {
|
|
|
10929
10970
|
}
|
|
10930
10971
|
//////////////////////////////////////////////////
|
|
10931
10972
|
|
|
10973
|
+
//////////////////////////////////////////////////
|
|
10974
|
+
class PvtPreTrainedModel extends PreTrainedModel { }
|
|
10975
|
+
class PvtModel extends PvtPreTrainedModel { }
|
|
10976
|
+
class PvtForImageClassification extends PvtPreTrainedModel {
|
|
10977
|
+
/**
|
|
10978
|
+
* @param {any} model_inputs
|
|
10979
|
+
*/
|
|
10980
|
+
async _call(model_inputs) {
|
|
10981
|
+
return new SequenceClassifierOutput(await super._call(model_inputs));
|
|
10982
|
+
}
|
|
10983
|
+
}
|
|
10984
|
+
//////////////////////////////////////////////////
|
|
10985
|
+
|
|
10986
|
+
//////////////////////////////////////////////////
|
|
10987
|
+
class ViTMAEPreTrainedModel extends PreTrainedModel { }
|
|
10988
|
+
class ViTMAEModel extends ViTMAEPreTrainedModel { }
|
|
10989
|
+
//////////////////////////////////////////////////
|
|
10990
|
+
|
|
10991
|
+
|
|
10992
|
+
//////////////////////////////////////////////////
|
|
10993
|
+
class ViTMSNPreTrainedModel extends PreTrainedModel { }
|
|
10994
|
+
class ViTMSNModel extends ViTMSNPreTrainedModel { }
|
|
10995
|
+
class ViTMSNForImageClassification extends ViTMSNPreTrainedModel {
|
|
10996
|
+
/**
|
|
10997
|
+
* @param {any} model_inputs
|
|
10998
|
+
*/
|
|
10999
|
+
async _call(model_inputs) {
|
|
11000
|
+
return new SequenceClassifierOutput(await super._call(model_inputs));
|
|
11001
|
+
}
|
|
11002
|
+
}
|
|
11003
|
+
//////////////////////////////////////////////////
|
|
11004
|
+
|
|
11005
|
+
//////////////////////////////////////////////////
|
|
11006
|
+
class GroupViTPreTrainedModel extends PreTrainedModel { }
|
|
11007
|
+
class GroupViTModel extends GroupViTPreTrainedModel { }
|
|
11008
|
+
//////////////////////////////////////////////////
|
|
11009
|
+
|
|
10932
11010
|
|
|
10933
11011
|
//////////////////////////////////////////////////
|
|
10934
11012
|
class FastViTPreTrainedModel extends PreTrainedModel { }
|
|
@@ -11341,6 +11419,11 @@ class SapiensForDepthEstimation extends SapiensPreTrainedModel { }
|
|
|
11341
11419
|
class SapiensForNormalEstimation extends SapiensPreTrainedModel { }
|
|
11342
11420
|
//////////////////////////////////////////////////
|
|
11343
11421
|
|
|
11422
|
+
//////////////////////////////////////////////////
|
|
11423
|
+
class MaskFormerPreTrainedModel extends PreTrainedModel { }
|
|
11424
|
+
class MaskFormerModel extends MaskFormerPreTrainedModel { }
|
|
11425
|
+
class MaskFormerForInstanceSegmentation extends MaskFormerPreTrainedModel { }
|
|
11426
|
+
//////////////////////////////////////////////////
|
|
11344
11427
|
|
|
11345
11428
|
//////////////////////////////////////////////////
|
|
11346
11429
|
class GLPNPreTrainedModel extends PreTrainedModel { }
|
|
@@ -12863,6 +12946,7 @@ class MusicgenForConditionalGeneration extends PreTrainedModel { // NOTE: not Mu
|
|
|
12863
12946
|
return audio_values;
|
|
12864
12947
|
}
|
|
12865
12948
|
}
|
|
12949
|
+
//////////////////////////////////////////////////
|
|
12866
12950
|
|
|
12867
12951
|
//////////////////////////////////////////////////
|
|
12868
12952
|
// MobileNetV1 models
|
|
@@ -12956,6 +13040,17 @@ class MobileNetV4ForImageClassification extends MobileNetV4PreTrainedModel {
|
|
|
12956
13040
|
}
|
|
12957
13041
|
//////////////////////////////////////////////////
|
|
12958
13042
|
|
|
13043
|
+
//////////////////////////////////////////////////
|
|
13044
|
+
// Decision Transformer models
|
|
13045
|
+
class DecisionTransformerPreTrainedModel extends PreTrainedModel { }
|
|
13046
|
+
|
|
13047
|
+
/**
|
|
13048
|
+
* The model builds upon the GPT2 architecture to perform autoregressive prediction of actions in an offline RL setting.
|
|
13049
|
+
* Refer to the paper for more details: https://arxiv.org/abs/2106.01345
|
|
13050
|
+
*/
|
|
13051
|
+
class DecisionTransformerModel extends DecisionTransformerPreTrainedModel { }
|
|
13052
|
+
|
|
13053
|
+
//////////////////////////////////////////////////
|
|
12959
13054
|
|
|
12960
13055
|
//////////////////////////////////////////////////
|
|
12961
13056
|
// AutoModels, used to simplify construction of PreTrainedModels
|
|
@@ -12994,7 +13089,7 @@ class PretrainedMixin {
|
|
|
12994
13089
|
session_options = {},
|
|
12995
13090
|
} = {}) {
|
|
12996
13091
|
|
|
12997
|
-
|
|
13092
|
+
const options = {
|
|
12998
13093
|
progress_callback,
|
|
12999
13094
|
config,
|
|
13000
13095
|
cache_dir,
|
|
@@ -13013,7 +13108,7 @@ class PretrainedMixin {
|
|
|
13013
13108
|
throw new Error("`MODEL_CLASS_MAPPINGS` not implemented for this type of `AutoClass`: " + this.name);
|
|
13014
13109
|
}
|
|
13015
13110
|
|
|
13016
|
-
for (
|
|
13111
|
+
for (const MODEL_CLASS_MAPPING of this.MODEL_CLASS_MAPPINGS) {
|
|
13017
13112
|
const modelInfo = MODEL_CLASS_MAPPING.get(options.config.model_type);
|
|
13018
13113
|
if (!modelInfo) {
|
|
13019
13114
|
continue; // Item not found in this mapping
|
|
@@ -13068,6 +13163,10 @@ const MODEL_MAPPING_NAMES_ENCODER_ONLY = new Map([
|
|
|
13068
13163
|
['rt_detr', ['RTDetrModel', RTDetrModel]],
|
|
13069
13164
|
['table-transformer', ['TableTransformerModel', TableTransformerModel]],
|
|
13070
13165
|
['vit', ['ViTModel', ViTModel]],
|
|
13166
|
+
['pvt', ['PvtModel', PvtModel]],
|
|
13167
|
+
['vit_msn', ['ViTMSNModel', ViTMSNModel]],
|
|
13168
|
+
['vit_mae', ['ViTMAEModel', ViTMAEModel]],
|
|
13169
|
+
['groupvit', ['GroupViTModel', GroupViTModel]],
|
|
13071
13170
|
['fastvit', ['FastViTModel', FastViTModel]],
|
|
13072
13171
|
['mobilevit', ['MobileViTModel', MobileViTModel]],
|
|
13073
13172
|
['mobilevitv2', ['MobileViTV2Model', MobileViTV2Model]],
|
|
@@ -13090,10 +13189,14 @@ const MODEL_MAPPING_NAMES_ENCODER_ONLY = new Map([
|
|
|
13090
13189
|
['hifigan', ['SpeechT5HifiGan', SpeechT5HifiGan]],
|
|
13091
13190
|
['efficientnet', ['EfficientNetModel', EfficientNetModel]],
|
|
13092
13191
|
|
|
13192
|
+
['decision_transformer', ['DecisionTransformerModel', DecisionTransformerModel]],
|
|
13193
|
+
|
|
13093
13194
|
['mobilenet_v1', ['MobileNetV1Model', MobileNetV1Model]],
|
|
13094
13195
|
['mobilenet_v2', ['MobileNetV2Model', MobileNetV2Model]],
|
|
13095
13196
|
['mobilenet_v3', ['MobileNetV3Model', MobileNetV3Model]],
|
|
13096
13197
|
['mobilenet_v4', ['MobileNetV4Model', MobileNetV4Model]],
|
|
13198
|
+
|
|
13199
|
+
['maskformer', ['MaskFormerModel', MaskFormerModel]],
|
|
13097
13200
|
]);
|
|
13098
13201
|
|
|
13099
13202
|
const MODEL_MAPPING_NAMES_ENCODER_DECODER = new Map([
|
|
@@ -13278,6 +13381,8 @@ const MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING_NAMES = new Map([
|
|
|
13278
13381
|
|
|
13279
13382
|
const MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING_NAMES = new Map([
|
|
13280
13383
|
['vit', ['ViTForImageClassification', ViTForImageClassification]],
|
|
13384
|
+
['pvt', ['PvtForImageClassification', PvtForImageClassification]],
|
|
13385
|
+
['vit_msn', ['ViTMSNForImageClassification', ViTMSNForImageClassification]],
|
|
13281
13386
|
['fastvit', ['FastViTForImageClassification', FastViTForImageClassification]],
|
|
13282
13387
|
['mobilevit', ['MobileViTForImageClassification', MobileViTForImageClassification]],
|
|
13283
13388
|
['mobilevitv2', ['MobileViTV2ForImageClassification', MobileViTV2ForImageClassification]],
|
|
@@ -13310,6 +13415,7 @@ const MODEL_FOR_ZERO_SHOT_OBJECT_DETECTION_MAPPING_NAMES = new Map([
|
|
|
13310
13415
|
]);
|
|
13311
13416
|
|
|
13312
13417
|
const MODEL_FOR_IMAGE_SEGMENTATION_MAPPING_NAMES = new Map([
|
|
13418
|
+
// TODO: Do not add new models here
|
|
13313
13419
|
['detr', ['DetrForSegmentation', DetrForSegmentation]],
|
|
13314
13420
|
['clipseg', ['CLIPSegForImageSegmentation', CLIPSegForImageSegmentation]],
|
|
13315
13421
|
]);
|
|
@@ -13319,6 +13425,11 @@ const MODEL_FOR_SEMANTIC_SEGMENTATION_MAPPING_NAMES = new Map([
|
|
|
13319
13425
|
['sapiens', ['SapiensForSemanticSegmentation', SapiensForSemanticSegmentation]],
|
|
13320
13426
|
]);
|
|
13321
13427
|
|
|
13428
|
+
const MODEL_FOR_UNIVERSAL_SEGMENTATION_MAPPING_NAMES = new Map([
|
|
13429
|
+
['detr', ['DetrForSegmentation', DetrForSegmentation]],
|
|
13430
|
+
['maskformer', ['MaskFormerForInstanceSegmentation', MaskFormerForInstanceSegmentation]],
|
|
13431
|
+
]);
|
|
13432
|
+
|
|
13322
13433
|
const MODEL_FOR_MASK_GENERATION_MAPPING_NAMES = new Map([
|
|
13323
13434
|
['sam', ['SamModel', SamModel]],
|
|
13324
13435
|
]);
|
|
@@ -13394,6 +13505,7 @@ const MODEL_CLASS_TYPE_MAPPING = [
|
|
|
13394
13505
|
[MODEL_FOR_IMAGE_TEXT_TO_TEXT_MAPPING_NAMES, MODEL_TYPES.ImageTextToText],
|
|
13395
13506
|
[MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING_NAMES, MODEL_TYPES.EncoderOnly],
|
|
13396
13507
|
[MODEL_FOR_IMAGE_SEGMENTATION_MAPPING_NAMES, MODEL_TYPES.EncoderOnly],
|
|
13508
|
+
[MODEL_FOR_UNIVERSAL_SEGMENTATION_MAPPING_NAMES, MODEL_TYPES.EncoderOnly],
|
|
13397
13509
|
[MODEL_FOR_SEMANTIC_SEGMENTATION_MAPPING_NAMES, MODEL_TYPES.EncoderOnly],
|
|
13398
13510
|
[MODEL_FOR_IMAGE_MATTING_MAPPING_NAMES, MODEL_TYPES.EncoderOnly],
|
|
13399
13511
|
[MODEL_FOR_IMAGE_TO_IMAGE_MAPPING_NAMES, MODEL_TYPES.EncoderOnly],
|
|
@@ -13596,6 +13708,17 @@ class AutoModelForSemanticSegmentation extends PretrainedMixin {
|
|
|
13596
13708
|
static MODEL_CLASS_MAPPINGS = [MODEL_FOR_SEMANTIC_SEGMENTATION_MAPPING_NAMES];
|
|
13597
13709
|
}
|
|
13598
13710
|
|
|
13711
|
+
/**
|
|
13712
|
+
* Helper class which is used to instantiate pretrained universal image segmentation models with the `from_pretrained` function.
|
|
13713
|
+
* The chosen model class is determined by the type specified in the model config.
|
|
13714
|
+
*
|
|
13715
|
+
* @example
|
|
13716
|
+
* let model = await AutoModelForUniversalSegmentation.from_pretrained('hf-internal-testing/tiny-random-MaskFormerForInstanceSegmentation');
|
|
13717
|
+
*/
|
|
13718
|
+
class AutoModelForUniversalSegmentation extends PretrainedMixin {
|
|
13719
|
+
static MODEL_CLASS_MAPPINGS = [MODEL_FOR_UNIVERSAL_SEGMENTATION_MAPPING_NAMES];
|
|
13720
|
+
}
|
|
13721
|
+
|
|
13599
13722
|
/**
|
|
13600
13723
|
* Helper class which is used to instantiate pretrained object detection models with the `from_pretrained` function.
|
|
13601
13724
|
* The chosen model class is determined by the type specified in the model config.
|
|
@@ -17259,7 +17382,7 @@ const SUPPORTED_TASKS = Object.freeze({
|
|
|
17259
17382
|
"image-segmentation": {
|
|
17260
17383
|
// no tokenizer
|
|
17261
17384
|
"pipeline": ImageSegmentationPipeline,
|
|
17262
|
-
"model": [_models_js__WEBPACK_IMPORTED_MODULE_1__.AutoModelForImageSegmentation, _models_js__WEBPACK_IMPORTED_MODULE_1__.AutoModelForSemanticSegmentation],
|
|
17385
|
+
"model": [_models_js__WEBPACK_IMPORTED_MODULE_1__.AutoModelForImageSegmentation, _models_js__WEBPACK_IMPORTED_MODULE_1__.AutoModelForSemanticSegmentation, _models_js__WEBPACK_IMPORTED_MODULE_1__.AutoModelForUniversalSegmentation],
|
|
17263
17386
|
"processor": _processors_js__WEBPACK_IMPORTED_MODULE_2__.AutoProcessor,
|
|
17264
17387
|
"default": {
|
|
17265
17388
|
// TODO: replace with original
|
|
@@ -17501,7 +17624,7 @@ async function loadItems(mapping, model, pretrainedOptions) {
|
|
|
17501
17624
|
|
|
17502
17625
|
/**@type {Promise[]} */
|
|
17503
17626
|
const promises = [];
|
|
17504
|
-
for (
|
|
17627
|
+
for (const [name, cls] of mapping.entries()) {
|
|
17505
17628
|
if (!cls) continue;
|
|
17506
17629
|
|
|
17507
17630
|
/**@type {Promise} */
|
|
@@ -17509,7 +17632,7 @@ async function loadItems(mapping, model, pretrainedOptions) {
|
|
|
17509
17632
|
if (Array.isArray(cls)) {
|
|
17510
17633
|
promise = new Promise(async (resolve, reject) => {
|
|
17511
17634
|
let e;
|
|
17512
|
-
for (
|
|
17635
|
+
for (const c of cls) {
|
|
17513
17636
|
if (c === null) {
|
|
17514
17637
|
// If null, we resolve it immediately, meaning the relevant
|
|
17515
17638
|
// class was not found, but it is optional.
|
|
@@ -17547,7 +17670,7 @@ async function loadItems(mapping, model, pretrainedOptions) {
|
|
|
17547
17670
|
await Promise.all(promises);
|
|
17548
17671
|
|
|
17549
17672
|
// Then assign to result
|
|
17550
|
-
for (
|
|
17673
|
+
for (const [name, promise] of Object.entries(result)) {
|
|
17551
17674
|
result[name] = await promise;
|
|
17552
17675
|
}
|
|
17553
17676
|
|
|
@@ -17585,6 +17708,7 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
17585
17708
|
/* harmony export */ Florence2Processor: () => (/* binding */ Florence2Processor),
|
|
17586
17709
|
/* harmony export */ GLPNFeatureExtractor: () => (/* binding */ GLPNFeatureExtractor),
|
|
17587
17710
|
/* harmony export */ ImageFeatureExtractor: () => (/* binding */ ImageFeatureExtractor),
|
|
17711
|
+
/* harmony export */ MaskFormerFeatureExtractor: () => (/* binding */ MaskFormerFeatureExtractor),
|
|
17588
17712
|
/* harmony export */ MobileNetV1FeatureExtractor: () => (/* binding */ MobileNetV1FeatureExtractor),
|
|
17589
17713
|
/* harmony export */ MobileNetV2FeatureExtractor: () => (/* binding */ MobileNetV2FeatureExtractor),
|
|
17590
17714
|
/* harmony export */ MobileNetV3FeatureExtractor: () => (/* binding */ MobileNetV3FeatureExtractor),
|
|
@@ -17596,6 +17720,7 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
17596
17720
|
/* harmony export */ OwlViTProcessor: () => (/* binding */ OwlViTProcessor),
|
|
17597
17721
|
/* harmony export */ Owlv2ImageProcessor: () => (/* binding */ Owlv2ImageProcessor),
|
|
17598
17722
|
/* harmony export */ Processor: () => (/* binding */ Processor),
|
|
17723
|
+
/* harmony export */ PvtImageProcessor: () => (/* binding */ PvtImageProcessor),
|
|
17599
17724
|
/* harmony export */ PyAnnoteFeatureExtractor: () => (/* binding */ PyAnnoteFeatureExtractor),
|
|
17600
17725
|
/* harmony export */ PyAnnoteProcessor: () => (/* binding */ PyAnnoteProcessor),
|
|
17601
17726
|
/* harmony export */ RTDetrImageProcessor: () => (/* binding */ RTDetrImageProcessor),
|
|
@@ -17684,7 +17809,7 @@ function center_to_corners_format([centerX, centerY, width, height]) {
|
|
|
17684
17809
|
* @param {Tensor} outputs.logits The logits
|
|
17685
17810
|
* @param {Tensor} outputs.pred_boxes The predicted boxes.
|
|
17686
17811
|
* @param {number} [threshold=0.5] The threshold to use for the scores.
|
|
17687
|
-
* @param {number
|
|
17812
|
+
* @param {[number, number][]} [target_sizes=null] The sizes of the original images.
|
|
17688
17813
|
* @param {boolean} [is_zero_shot=false] Whether zero-shot object detection was performed.
|
|
17689
17814
|
* @return {Object[]} An array of objects containing the post-processed outputs.
|
|
17690
17815
|
* @private
|
|
@@ -17765,7 +17890,7 @@ function post_process_object_detection(outputs, threshold = 0.5, target_sizes =
|
|
|
17765
17890
|
/**
|
|
17766
17891
|
* Post-processes the outputs of the model (for semantic segmentation).
|
|
17767
17892
|
* @param {*} outputs Raw outputs of the model.
|
|
17768
|
-
* @param {number
|
|
17893
|
+
* @param {[number, number][]} [target_sizes=null] List of tuples corresponding to the requested final size
|
|
17769
17894
|
* (height, width) of each prediction. If unset, predictions will not be resized.
|
|
17770
17895
|
* @returns {{segmentation: Tensor; labels: number[]}[]} The semantic segmentation maps.
|
|
17771
17896
|
*/
|
|
@@ -17825,6 +17950,300 @@ function post_process_semantic_segmentation(outputs, target_sizes = null) {
|
|
|
17825
17950
|
return toReturn;
|
|
17826
17951
|
}
|
|
17827
17952
|
|
|
17953
|
+
|
|
17954
|
+
/**
|
|
17955
|
+
* Binarize the given masks using `object_mask_threshold`, it returns the associated values of `masks`, `scores` and `labels`.
|
|
17956
|
+
* @param {Tensor} class_logits The class logits.
|
|
17957
|
+
* @param {Tensor} mask_logits The mask logits.
|
|
17958
|
+
* @param {number} object_mask_threshold A number between 0 and 1 used to binarize the masks.
|
|
17959
|
+
* @param {number} num_labels The number of labels.
|
|
17960
|
+
* @returns {[Tensor[], number[], number[]]} The binarized masks, the scores, and the labels.
|
|
17961
|
+
* @private
|
|
17962
|
+
*/
|
|
17963
|
+
function remove_low_and_no_objects(class_logits, mask_logits, object_mask_threshold, num_labels) {
|
|
17964
|
+
|
|
17965
|
+
const mask_probs_item = [];
|
|
17966
|
+
const pred_scores_item = [];
|
|
17967
|
+
const pred_labels_item = [];
|
|
17968
|
+
|
|
17969
|
+
for (let j = 0; j < class_logits.dims[0]; ++j) {
|
|
17970
|
+
const cls = class_logits[j];
|
|
17971
|
+
const mask = mask_logits[j];
|
|
17972
|
+
|
|
17973
|
+
const pred_label = (0,_utils_maths_js__WEBPACK_IMPORTED_MODULE_3__.max)(cls.data)[1];
|
|
17974
|
+
if (pred_label === num_labels) {
|
|
17975
|
+
// Is the background, so we ignore it
|
|
17976
|
+
continue;
|
|
17977
|
+
}
|
|
17978
|
+
|
|
17979
|
+
const scores = (0,_utils_maths_js__WEBPACK_IMPORTED_MODULE_3__.softmax)(cls.data);
|
|
17980
|
+
const pred_score = scores[pred_label];
|
|
17981
|
+
if (pred_score > object_mask_threshold) {
|
|
17982
|
+
mask_probs_item.push(mask);
|
|
17983
|
+
pred_scores_item.push(pred_score);
|
|
17984
|
+
pred_labels_item.push(pred_label);
|
|
17985
|
+
}
|
|
17986
|
+
}
|
|
17987
|
+
|
|
17988
|
+
return [mask_probs_item, pred_scores_item, pred_labels_item];
|
|
17989
|
+
}
|
|
17990
|
+
|
|
17991
|
+
/**
|
|
17992
|
+
* Checks whether the segment is valid or not.
|
|
17993
|
+
* @param {Int32Array} mask_labels Labels for each pixel in the mask.
|
|
17994
|
+
* @param {Tensor[]} mask_probs Probabilities for each pixel in the masks.
|
|
17995
|
+
* @param {number} k The class id of the segment.
|
|
17996
|
+
* @param {number} mask_threshold The mask threshold.
|
|
17997
|
+
* @param {number} overlap_mask_area_threshold The overlap mask area threshold.
|
|
17998
|
+
* @returns {[boolean, number[]]} Whether the segment is valid or not, and the indices of the valid labels.
|
|
17999
|
+
* @private
|
|
18000
|
+
*/
|
|
18001
|
+
function check_segment_validity(
|
|
18002
|
+
mask_labels,
|
|
18003
|
+
mask_probs,
|
|
18004
|
+
k,
|
|
18005
|
+
mask_threshold = 0.5,
|
|
18006
|
+
overlap_mask_area_threshold = 0.8
|
|
18007
|
+
) {
|
|
18008
|
+
// mask_k is a 1D array of indices, indicating where the mask is equal to k
|
|
18009
|
+
const mask_k = [];
|
|
18010
|
+
let mask_k_area = 0;
|
|
18011
|
+
let original_area = 0;
|
|
18012
|
+
|
|
18013
|
+
const mask_probs_k_data = mask_probs[k].data;
|
|
18014
|
+
|
|
18015
|
+
// Compute the area of all the stuff in query k
|
|
18016
|
+
for (let i = 0; i < mask_labels.length; ++i) {
|
|
18017
|
+
if (mask_labels[i] === k) {
|
|
18018
|
+
mask_k.push(i);
|
|
18019
|
+
++mask_k_area;
|
|
18020
|
+
}
|
|
18021
|
+
|
|
18022
|
+
if (mask_probs_k_data[i] >= mask_threshold) {
|
|
18023
|
+
++original_area;
|
|
18024
|
+
}
|
|
18025
|
+
}
|
|
18026
|
+
let mask_exists = mask_k_area > 0 && original_area > 0;
|
|
18027
|
+
|
|
18028
|
+
// Eliminate disconnected tiny segments
|
|
18029
|
+
if (mask_exists) {
|
|
18030
|
+
// Perform additional check
|
|
18031
|
+
let area_ratio = mask_k_area / original_area;
|
|
18032
|
+
mask_exists = area_ratio > overlap_mask_area_threshold;
|
|
18033
|
+
}
|
|
18034
|
+
|
|
18035
|
+
return [mask_exists, mask_k]
|
|
18036
|
+
}
|
|
18037
|
+
|
|
18038
|
+
/**
|
|
18039
|
+
* Computes the segments.
|
|
18040
|
+
* @param {Tensor[]} mask_probs The mask probabilities.
|
|
18041
|
+
* @param {number[]} pred_scores The predicted scores.
|
|
18042
|
+
* @param {number[]} pred_labels The predicted labels.
|
|
18043
|
+
* @param {number} mask_threshold The mask threshold.
|
|
18044
|
+
* @param {number} overlap_mask_area_threshold The overlap mask area threshold.
|
|
18045
|
+
* @param {Set<number>} label_ids_to_fuse The label ids to fuse.
|
|
18046
|
+
* @param {number[]} target_size The target size of the image.
|
|
18047
|
+
* @returns {[Tensor, Array<{id: number, label_id: number, score: number}>]} The computed segments.
|
|
18048
|
+
* @private
|
|
18049
|
+
*/
|
|
18050
|
+
function compute_segments(
|
|
18051
|
+
mask_probs,
|
|
18052
|
+
pred_scores,
|
|
18053
|
+
pred_labels,
|
|
18054
|
+
mask_threshold,
|
|
18055
|
+
overlap_mask_area_threshold,
|
|
18056
|
+
label_ids_to_fuse = null,
|
|
18057
|
+
target_size = null,
|
|
18058
|
+
) {
|
|
18059
|
+
const [height, width] = target_size ?? mask_probs[0].dims;
|
|
18060
|
+
|
|
18061
|
+
const segmentation = new _utils_tensor_js__WEBPACK_IMPORTED_MODULE_4__.Tensor(
|
|
18062
|
+
'int32',
|
|
18063
|
+
new Int32Array(height * width),
|
|
18064
|
+
[height, width]
|
|
18065
|
+
);
|
|
18066
|
+
const segments = [];
|
|
18067
|
+
|
|
18068
|
+
// 1. If target_size is not null, we need to resize the masks to the target size
|
|
18069
|
+
if (target_size !== null) {
|
|
18070
|
+
// resize the masks to the target size
|
|
18071
|
+
for (let i = 0; i < mask_probs.length; ++i) {
|
|
18072
|
+
mask_probs[i] = (0,_utils_tensor_js__WEBPACK_IMPORTED_MODULE_4__.interpolate)(mask_probs[i], target_size, 'bilinear', false);
|
|
18073
|
+
}
|
|
18074
|
+
}
|
|
18075
|
+
|
|
18076
|
+
// 2. Weigh each mask by its prediction score
|
|
18077
|
+
// NOTE: `mask_probs` is updated in-place
|
|
18078
|
+
//
|
|
18079
|
+
// Temporary storage for the best label/scores for each pixel ([height, width]):
|
|
18080
|
+
const mask_labels = new Int32Array(mask_probs[0].data.length);
|
|
18081
|
+
const bestScores = new Float32Array(mask_probs[0].data.length);
|
|
18082
|
+
|
|
18083
|
+
for (let i = 0; i < mask_probs.length; ++i) {
|
|
18084
|
+
let score = pred_scores[i];
|
|
18085
|
+
|
|
18086
|
+
const mask_probs_i_data = mask_probs[i].data;
|
|
18087
|
+
|
|
18088
|
+
for (let j = 0; j < mask_probs_i_data.length; ++j) {
|
|
18089
|
+
mask_probs_i_data[j] *= score
|
|
18090
|
+
if (mask_probs_i_data[j] > bestScores[j]) {
|
|
18091
|
+
mask_labels[j] = i;
|
|
18092
|
+
bestScores[j] = mask_probs_i_data[j];
|
|
18093
|
+
}
|
|
18094
|
+
}
|
|
18095
|
+
}
|
|
18096
|
+
|
|
18097
|
+
let current_segment_id = 0;
|
|
18098
|
+
|
|
18099
|
+
// let stuff_memory_list = {}
|
|
18100
|
+
const segmentation_data = segmentation.data;
|
|
18101
|
+
for (let k = 0; k < pred_labels.length; ++k) {
|
|
18102
|
+
const pred_class = pred_labels[k];
|
|
18103
|
+
|
|
18104
|
+
// TODO add `should_fuse`
|
|
18105
|
+
// let should_fuse = pred_class in label_ids_to_fuse
|
|
18106
|
+
|
|
18107
|
+
// Check if mask exists and large enough to be a segment
|
|
18108
|
+
const [mask_exists, mask_k] = check_segment_validity(
|
|
18109
|
+
mask_labels,
|
|
18110
|
+
mask_probs,
|
|
18111
|
+
k,
|
|
18112
|
+
mask_threshold,
|
|
18113
|
+
overlap_mask_area_threshold
|
|
18114
|
+
)
|
|
18115
|
+
|
|
18116
|
+
if (!mask_exists) {
|
|
18117
|
+
// Nothing to see here
|
|
18118
|
+
continue;
|
|
18119
|
+
}
|
|
18120
|
+
|
|
18121
|
+
// TODO
|
|
18122
|
+
// if (pred_class in stuff_memory_list) {
|
|
18123
|
+
// current_segment_id = stuff_memory_list[pred_class]
|
|
18124
|
+
// } else {
|
|
18125
|
+
// current_segment_id += 1;
|
|
18126
|
+
// }
|
|
18127
|
+
++current_segment_id;
|
|
18128
|
+
|
|
18129
|
+
|
|
18130
|
+
// Add current object segment to final segmentation map
|
|
18131
|
+
for (const index of mask_k) {
|
|
18132
|
+
segmentation_data[index] = current_segment_id;
|
|
18133
|
+
}
|
|
18134
|
+
|
|
18135
|
+
segments.push({
|
|
18136
|
+
id: current_segment_id,
|
|
18137
|
+
label_id: pred_class,
|
|
18138
|
+
// was_fused: should_fuse, TODO
|
|
18139
|
+
score: pred_scores[k],
|
|
18140
|
+
})
|
|
18141
|
+
|
|
18142
|
+
// TODO
|
|
18143
|
+
// if(should_fuse){
|
|
18144
|
+
// stuff_memory_list[pred_class] = current_segment_id
|
|
18145
|
+
// }
|
|
18146
|
+
}
|
|
18147
|
+
|
|
18148
|
+
return [segmentation, segments];
|
|
18149
|
+
}
|
|
18150
|
+
|
|
18151
|
+
|
|
18152
|
+
/**
|
|
18153
|
+
* Post-process the model output to generate the final panoptic segmentation.
|
|
18154
|
+
* @param {*} outputs The model output to post process
|
|
18155
|
+
* @param {number} [threshold=0.5] The probability score threshold to keep predicted instance masks.
|
|
18156
|
+
* @param {number} [mask_threshold=0.5] Threshold to use when turning the predicted masks into binary values.
|
|
18157
|
+
* @param {number} [overlap_mask_area_threshold=0.8] The overlap mask area threshold to merge or discard small disconnected parts within each binary instance mask.
|
|
18158
|
+
* @param {Set<number>} [label_ids_to_fuse=null] The labels in this state will have all their instances be fused together.
|
|
18159
|
+
* @param {[number, number][]} [target_sizes=null] The target sizes to resize the masks to.
|
|
18160
|
+
* @returns {Array<{ segmentation: Tensor, segments_info: Array<{id: number, label_id: number, score: number}>}>}
|
|
18161
|
+
*/
|
|
18162
|
+
function post_process_panoptic_segmentation(
|
|
18163
|
+
outputs,
|
|
18164
|
+
threshold = 0.5,
|
|
18165
|
+
mask_threshold = 0.5,
|
|
18166
|
+
overlap_mask_area_threshold = 0.8,
|
|
18167
|
+
label_ids_to_fuse = null,
|
|
18168
|
+
target_sizes = null,
|
|
18169
|
+
) {
|
|
18170
|
+
if (label_ids_to_fuse === null) {
|
|
18171
|
+
console.warn("`label_ids_to_fuse` unset. No instance will be fused.")
|
|
18172
|
+
label_ids_to_fuse = new Set();
|
|
18173
|
+
}
|
|
18174
|
+
|
|
18175
|
+
const class_queries_logits = outputs.class_queries_logits ?? outputs.logits; // [batch_size, num_queries, num_classes+1]
|
|
18176
|
+
const masks_queries_logits = outputs.masks_queries_logits ?? outputs.pred_masks; // [batch_size, num_queries, height, width]
|
|
18177
|
+
|
|
18178
|
+
const mask_probs = masks_queries_logits.sigmoid() // [batch_size, num_queries, height, width]
|
|
18179
|
+
|
|
18180
|
+
let [batch_size, num_queries, num_labels] = class_queries_logits.dims;
|
|
18181
|
+
num_labels -= 1; // Remove last class (background)
|
|
18182
|
+
|
|
18183
|
+
if (target_sizes !== null && target_sizes.length !== batch_size) {
|
|
18184
|
+
throw Error("Make sure that you pass in as many target sizes as the batch dimension of the logits")
|
|
18185
|
+
}
|
|
18186
|
+
|
|
18187
|
+
let toReturn = [];
|
|
18188
|
+
for (let i = 0; i < batch_size; ++i) {
|
|
18189
|
+
let target_size = target_sizes !== null ? target_sizes[i] : null;
|
|
18190
|
+
|
|
18191
|
+
let class_logits = class_queries_logits[i];
|
|
18192
|
+
let mask_logits = mask_probs[i];
|
|
18193
|
+
|
|
18194
|
+
let [mask_probs_item, pred_scores_item, pred_labels_item] = remove_low_and_no_objects(class_logits, mask_logits, threshold, num_labels);
|
|
18195
|
+
|
|
18196
|
+
if (pred_labels_item.length === 0) {
|
|
18197
|
+
// No mask found
|
|
18198
|
+
let [height, width] = target_size ?? mask_logits.dims.slice(-2);
|
|
18199
|
+
|
|
18200
|
+
let segmentation = new _utils_tensor_js__WEBPACK_IMPORTED_MODULE_4__.Tensor(
|
|
18201
|
+
'int32',
|
|
18202
|
+
new Int32Array(height * width).fill(-1),
|
|
18203
|
+
[height, width]
|
|
18204
|
+
)
|
|
18205
|
+
toReturn.push({
|
|
18206
|
+
segmentation: segmentation,
|
|
18207
|
+
segments_info: []
|
|
18208
|
+
});
|
|
18209
|
+
continue;
|
|
18210
|
+
}
|
|
18211
|
+
|
|
18212
|
+
|
|
18213
|
+
// Get segmentation map and segment information of batch item
|
|
18214
|
+
let [segmentation, segments] = compute_segments(
|
|
18215
|
+
mask_probs_item,
|
|
18216
|
+
pred_scores_item,
|
|
18217
|
+
pred_labels_item,
|
|
18218
|
+
mask_threshold,
|
|
18219
|
+
overlap_mask_area_threshold,
|
|
18220
|
+
label_ids_to_fuse,
|
|
18221
|
+
target_size,
|
|
18222
|
+
)
|
|
18223
|
+
|
|
18224
|
+
toReturn.push({
|
|
18225
|
+
segmentation: segmentation,
|
|
18226
|
+
segments_info: segments
|
|
18227
|
+
})
|
|
18228
|
+
}
|
|
18229
|
+
|
|
18230
|
+
return toReturn;
|
|
18231
|
+
}
|
|
18232
|
+
|
|
18233
|
+
|
|
18234
|
+
/**
|
|
18235
|
+
* Post-processes the outputs of the model (for instance segmentation).
|
|
18236
|
+
* @param {*} outputs Raw outputs of the model.
|
|
18237
|
+
* @param {number} [threshold=0.5] The probability score threshold to keep predicted instance masks.
|
|
18238
|
+
* @param {[number, number][]} [target_sizes=null] List of tuples corresponding to the requested final size
|
|
18239
|
+
* (height, width) of each prediction. If unset, predictions will not be resized.
|
|
18240
|
+
* @returns {Array<{ segmentation: Tensor, segments_info: Array<{id: number, label_id: number, score: number}>}>}
|
|
18241
|
+
*/
|
|
18242
|
+
function post_process_instance_segmentation(outputs, threshold = 0.5, target_sizes = null) {
|
|
18243
|
+
throw new Error('Not implemented yet');
|
|
18244
|
+
return [];
|
|
18245
|
+
}
|
|
18246
|
+
|
|
17828
18247
|
/**
|
|
17829
18248
|
* Named tuple to indicate the order we are using is (height x width), even though
|
|
17830
18249
|
* the Graphics’ industry standard is (width x height).
|
|
@@ -18413,6 +18832,7 @@ class SegformerFeatureExtractor extends ImageFeatureExtractor {
|
|
|
18413
18832
|
return post_process_semantic_segmentation(...args);
|
|
18414
18833
|
}
|
|
18415
18834
|
}
|
|
18835
|
+
class PvtImageProcessor extends ImageFeatureExtractor { }
|
|
18416
18836
|
class DPTFeatureExtractor extends ImageFeatureExtractor { }
|
|
18417
18837
|
class DPTImageProcessor extends DPTFeatureExtractor { } // NOTE: extends DPTFeatureExtractor
|
|
18418
18838
|
class BitImageProcessor extends ImageFeatureExtractor { }
|
|
@@ -18552,302 +18972,32 @@ class DetrFeatureExtractor extends ImageFeatureExtractor {
|
|
|
18552
18972
|
// TODO support different mask sizes (not just 64x64)
|
|
18553
18973
|
// Currently, just fill pixel mask with 1s
|
|
18554
18974
|
const maskSize = [result.pixel_values.dims[0], 64, 64];
|
|
18555
|
-
const pixel_mask =
|
|
18556
|
-
'int64',
|
|
18557
|
-
new BigInt64Array(maskSize.reduce((a, b) => a * b)).fill(1n),
|
|
18558
|
-
maskSize
|
|
18559
|
-
);
|
|
18975
|
+
const pixel_mask = (0,_utils_tensor_js__WEBPACK_IMPORTED_MODULE_4__.full)(maskSize, 1n);
|
|
18560
18976
|
|
|
18561
18977
|
return { ...result, pixel_mask };
|
|
18562
18978
|
}
|
|
18563
18979
|
|
|
18564
|
-
/**
|
|
18565
|
-
* Post-processes the outputs of the model (for object detection).
|
|
18566
|
-
* @param {Object} outputs The outputs of the model that must be post-processed
|
|
18567
|
-
* @param {Tensor} outputs.logits The logits
|
|
18568
|
-
* @param {Tensor} outputs.pred_boxes The predicted boxes.
|
|
18569
|
-
* @return {Object[]} An array of objects containing the post-processed outputs.
|
|
18570
|
-
*/
|
|
18571
|
-
|
|
18572
18980
|
/** @type {typeof post_process_object_detection} */
|
|
18573
18981
|
post_process_object_detection(...args) {
|
|
18574
18982
|
return post_process_object_detection(...args);
|
|
18575
18983
|
}
|
|
18576
18984
|
|
|
18577
|
-
/**
|
|
18578
|
-
|
|
18579
|
-
|
|
18580
|
-
* @param {Tensor} mask_logits The mask logits.
|
|
18581
|
-
* @param {number} object_mask_threshold A number between 0 and 1 used to binarize the masks.
|
|
18582
|
-
* @param {number} num_labels The number of labels.
|
|
18583
|
-
* @returns {[Tensor[], number[], number[]]} The binarized masks, the scores, and the labels.
|
|
18584
|
-
*/
|
|
18585
|
-
remove_low_and_no_objects(class_logits, mask_logits, object_mask_threshold, num_labels) {
|
|
18586
|
-
|
|
18587
|
-
let mask_probs_item = [];
|
|
18588
|
-
let pred_scores_item = [];
|
|
18589
|
-
let pred_labels_item = [];
|
|
18590
|
-
|
|
18591
|
-
for (let j = 0; j < class_logits.dims[0]; ++j) {
|
|
18592
|
-
let cls = class_logits[j];
|
|
18593
|
-
let mask = mask_logits[j];
|
|
18594
|
-
|
|
18595
|
-
let pred_label = (0,_utils_maths_js__WEBPACK_IMPORTED_MODULE_3__.max)(cls.data)[1];
|
|
18596
|
-
if (pred_label === num_labels) {
|
|
18597
|
-
// Is the background, so we ignore it
|
|
18598
|
-
continue;
|
|
18599
|
-
}
|
|
18600
|
-
|
|
18601
|
-
let scores = (0,_utils_maths_js__WEBPACK_IMPORTED_MODULE_3__.softmax)(cls.data);
|
|
18602
|
-
let pred_score = scores[pred_label];
|
|
18603
|
-
if (pred_score > object_mask_threshold) {
|
|
18604
|
-
mask_probs_item.push(mask);
|
|
18605
|
-
pred_scores_item.push(pred_score);
|
|
18606
|
-
pred_labels_item.push(pred_label);
|
|
18607
|
-
}
|
|
18608
|
-
}
|
|
18609
|
-
|
|
18610
|
-
return [mask_probs_item, pred_scores_item, pred_labels_item];
|
|
18611
|
-
|
|
18612
|
-
}
|
|
18613
|
-
|
|
18614
|
-
/**
|
|
18615
|
-
* Checks whether the segment is valid or not.
|
|
18616
|
-
* @param {Int32Array} mask_labels Labels for each pixel in the mask.
|
|
18617
|
-
* @param {Tensor[]} mask_probs Probabilities for each pixel in the masks.
|
|
18618
|
-
* @param {number} k The class id of the segment.
|
|
18619
|
-
* @param {number} mask_threshold The mask threshold.
|
|
18620
|
-
* @param {number} overlap_mask_area_threshold The overlap mask area threshold.
|
|
18621
|
-
* @returns {[boolean, number[]]} Whether the segment is valid or not, and the indices of the valid labels.
|
|
18622
|
-
*/
|
|
18623
|
-
check_segment_validity(
|
|
18624
|
-
mask_labels,
|
|
18625
|
-
mask_probs,
|
|
18626
|
-
k,
|
|
18627
|
-
mask_threshold = 0.5,
|
|
18628
|
-
overlap_mask_area_threshold = 0.8
|
|
18629
|
-
) {
|
|
18630
|
-
// mask_k is a 1D array of indices, indicating where the mask is equal to k
|
|
18631
|
-
let mask_k = [];
|
|
18632
|
-
let mask_k_area = 0;
|
|
18633
|
-
let original_area = 0;
|
|
18634
|
-
|
|
18635
|
-
const mask_probs_k_data = mask_probs[k].data;
|
|
18636
|
-
|
|
18637
|
-
// Compute the area of all the stuff in query k
|
|
18638
|
-
for (let i = 0; i < mask_labels.length; ++i) {
|
|
18639
|
-
if (mask_labels[i] === k) {
|
|
18640
|
-
mask_k.push(i);
|
|
18641
|
-
++mask_k_area;
|
|
18642
|
-
}
|
|
18643
|
-
|
|
18644
|
-
if (mask_probs_k_data[i] >= mask_threshold) {
|
|
18645
|
-
++original_area;
|
|
18646
|
-
}
|
|
18647
|
-
}
|
|
18648
|
-
let mask_exists = mask_k_area > 0 && original_area > 0;
|
|
18649
|
-
|
|
18650
|
-
// Eliminate disconnected tiny segments
|
|
18651
|
-
if (mask_exists) {
|
|
18652
|
-
// Perform additional check
|
|
18653
|
-
let area_ratio = mask_k_area / original_area;
|
|
18654
|
-
mask_exists = area_ratio > overlap_mask_area_threshold;
|
|
18655
|
-
}
|
|
18656
|
-
|
|
18657
|
-
return [mask_exists, mask_k]
|
|
18985
|
+
/** @type {typeof post_process_panoptic_segmentation} */
|
|
18986
|
+
post_process_panoptic_segmentation(...args) {
|
|
18987
|
+
return post_process_panoptic_segmentation(...args);
|
|
18658
18988
|
}
|
|
18659
18989
|
|
|
18660
|
-
|
|
18661
|
-
|
|
18662
|
-
|
|
18663
|
-
* @param {number[]} pred_scores The predicted scores.
|
|
18664
|
-
* @param {number[]} pred_labels The predicted labels.
|
|
18665
|
-
* @param {number} mask_threshold The mask threshold.
|
|
18666
|
-
* @param {number} overlap_mask_area_threshold The overlap mask area threshold.
|
|
18667
|
-
* @param {Set<number>} label_ids_to_fuse The label ids to fuse.
|
|
18668
|
-
* @param {number[]} target_size The target size of the image.
|
|
18669
|
-
* @returns {[Tensor, Array<{id: number, label_id: number, score: number}>]} The computed segments.
|
|
18670
|
-
*/
|
|
18671
|
-
compute_segments(
|
|
18672
|
-
mask_probs,
|
|
18673
|
-
pred_scores,
|
|
18674
|
-
pred_labels,
|
|
18675
|
-
mask_threshold,
|
|
18676
|
-
overlap_mask_area_threshold,
|
|
18677
|
-
label_ids_to_fuse = null,
|
|
18678
|
-
target_size = null,
|
|
18679
|
-
) {
|
|
18680
|
-
let [height, width] = target_size ?? mask_probs[0].dims;
|
|
18681
|
-
|
|
18682
|
-
let segmentation = new _utils_tensor_js__WEBPACK_IMPORTED_MODULE_4__.Tensor(
|
|
18683
|
-
'int32',
|
|
18684
|
-
new Int32Array(height * width),
|
|
18685
|
-
[height, width]
|
|
18686
|
-
);
|
|
18687
|
-
let segments = [];
|
|
18688
|
-
|
|
18689
|
-
// 1. If target_size is not null, we need to resize the masks to the target size
|
|
18690
|
-
if (target_size !== null) {
|
|
18691
|
-
// resize the masks to the target size
|
|
18692
|
-
for (let i = 0; i < mask_probs.length; ++i) {
|
|
18693
|
-
mask_probs[i] = (0,_utils_tensor_js__WEBPACK_IMPORTED_MODULE_4__.interpolate)(mask_probs[i], target_size, 'bilinear', false);
|
|
18694
|
-
}
|
|
18695
|
-
}
|
|
18696
|
-
|
|
18697
|
-
// 2. Weigh each mask by its prediction score
|
|
18698
|
-
// NOTE: `mask_probs` is updated in-place
|
|
18699
|
-
//
|
|
18700
|
-
// Temporary storage for the best label/scores for each pixel ([height, width]):
|
|
18701
|
-
let mask_labels = new Int32Array(mask_probs[0].data.length);
|
|
18702
|
-
let bestScores = new Float32Array(mask_probs[0].data.length);
|
|
18703
|
-
|
|
18704
|
-
for (let i = 0; i < mask_probs.length; ++i) {
|
|
18705
|
-
let score = pred_scores[i];
|
|
18706
|
-
|
|
18707
|
-
const mask_probs_i_data = mask_probs[i].data;
|
|
18708
|
-
|
|
18709
|
-
for (let j = 0; j < mask_probs_i_data.length; ++j) {
|
|
18710
|
-
mask_probs_i_data[j] *= score
|
|
18711
|
-
if (mask_probs_i_data[j] > bestScores[j]) {
|
|
18712
|
-
mask_labels[j] = i;
|
|
18713
|
-
bestScores[j] = mask_probs_i_data[j];
|
|
18714
|
-
}
|
|
18715
|
-
}
|
|
18716
|
-
}
|
|
18717
|
-
|
|
18718
|
-
let current_segment_id = 0;
|
|
18719
|
-
|
|
18720
|
-
// let stuff_memory_list = {}
|
|
18721
|
-
const segmentation_data = segmentation.data;
|
|
18722
|
-
for (let k = 0; k < pred_labels.length; ++k) {
|
|
18723
|
-
let pred_class = pred_labels[k];
|
|
18724
|
-
|
|
18725
|
-
// TODO add `should_fuse`
|
|
18726
|
-
// let should_fuse = pred_class in label_ids_to_fuse
|
|
18727
|
-
|
|
18728
|
-
// Check if mask exists and large enough to be a segment
|
|
18729
|
-
let [mask_exists, mask_k] = this.check_segment_validity(
|
|
18730
|
-
mask_labels,
|
|
18731
|
-
mask_probs,
|
|
18732
|
-
k,
|
|
18733
|
-
mask_threshold,
|
|
18734
|
-
overlap_mask_area_threshold
|
|
18735
|
-
)
|
|
18736
|
-
|
|
18737
|
-
if (!mask_exists) {
|
|
18738
|
-
// Nothing to see here
|
|
18739
|
-
continue;
|
|
18740
|
-
}
|
|
18741
|
-
|
|
18742
|
-
// TODO
|
|
18743
|
-
// if (pred_class in stuff_memory_list) {
|
|
18744
|
-
// current_segment_id = stuff_memory_list[pred_class]
|
|
18745
|
-
// } else {
|
|
18746
|
-
// current_segment_id += 1;
|
|
18747
|
-
// }
|
|
18748
|
-
++current_segment_id;
|
|
18749
|
-
|
|
18750
|
-
|
|
18751
|
-
// Add current object segment to final segmentation map
|
|
18752
|
-
for (let index of mask_k) {
|
|
18753
|
-
segmentation_data[index] = current_segment_id;
|
|
18754
|
-
}
|
|
18755
|
-
|
|
18756
|
-
segments.push({
|
|
18757
|
-
id: current_segment_id,
|
|
18758
|
-
label_id: pred_class,
|
|
18759
|
-
// was_fused: should_fuse, TODO
|
|
18760
|
-
score: pred_scores[k],
|
|
18761
|
-
})
|
|
18762
|
-
|
|
18763
|
-
// TODO
|
|
18764
|
-
// if(should_fuse){
|
|
18765
|
-
// stuff_memory_list[pred_class] = current_segment_id
|
|
18766
|
-
// }
|
|
18767
|
-
}
|
|
18768
|
-
|
|
18769
|
-
return [segmentation, segments];
|
|
18990
|
+
post_process_instance_segmentation() {
|
|
18991
|
+
// TODO
|
|
18992
|
+
throw Error("Not implemented yet");
|
|
18770
18993
|
}
|
|
18994
|
+
}
|
|
18771
18995
|
|
|
18772
|
-
|
|
18773
|
-
* Post-process the model output to generate the final panoptic segmentation.
|
|
18774
|
-
* @param {*} outputs The model output to post process
|
|
18775
|
-
* @param {number} [threshold=0.5] The probability score threshold to keep predicted instance masks.
|
|
18776
|
-
* @param {number} [mask_threshold=0.5] Threshold to use when turning the predicted masks into binary values.
|
|
18777
|
-
* @param {number} [overlap_mask_area_threshold=0.8] The overlap mask area threshold to merge or discard small disconnected parts within each binary instance mask.
|
|
18778
|
-
* @param {Set<number>} [label_ids_to_fuse=null] The labels in this state will have all their instances be fused together.
|
|
18779
|
-
* @param {number[][]} [target_sizes=null] The target sizes to resize the masks to.
|
|
18780
|
-
* @returns {Array<{ segmentation: Tensor, segments_info: Array<{id: number, label_id: number, score: number}>}>}
|
|
18781
|
-
*/
|
|
18782
|
-
post_process_panoptic_segmentation(
|
|
18783
|
-
outputs,
|
|
18784
|
-
threshold = 0.5,
|
|
18785
|
-
mask_threshold = 0.5,
|
|
18786
|
-
overlap_mask_area_threshold = 0.8,
|
|
18787
|
-
label_ids_to_fuse = null,
|
|
18788
|
-
target_sizes = null,
|
|
18789
|
-
) {
|
|
18790
|
-
if (label_ids_to_fuse === null) {
|
|
18791
|
-
console.warn("`label_ids_to_fuse` unset. No instance will be fused.")
|
|
18792
|
-
label_ids_to_fuse = new Set();
|
|
18793
|
-
}
|
|
18794
|
-
|
|
18795
|
-
const class_queries_logits = outputs.logits; // [batch_size, num_queries, num_classes+1]
|
|
18796
|
-
const masks_queries_logits = outputs.pred_masks; // [batch_size, num_queries, height, width]
|
|
18797
|
-
|
|
18798
|
-
const mask_probs = masks_queries_logits.sigmoid() // [batch_size, num_queries, height, width]
|
|
18799
|
-
|
|
18800
|
-
let [batch_size, num_queries, num_labels] = class_queries_logits.dims;
|
|
18801
|
-
num_labels -= 1; // Remove last class (background)
|
|
18802
|
-
|
|
18803
|
-
if (target_sizes !== null && target_sizes.length !== batch_size) {
|
|
18804
|
-
throw Error("Make sure that you pass in as many target sizes as the batch dimension of the logits")
|
|
18805
|
-
}
|
|
18806
|
-
|
|
18807
|
-
let toReturn = [];
|
|
18808
|
-
for (let i = 0; i < batch_size; ++i) {
|
|
18809
|
-
let target_size = target_sizes !== null ? target_sizes[i] : null;
|
|
18810
|
-
|
|
18811
|
-
let class_logits = class_queries_logits[i];
|
|
18812
|
-
let mask_logits = mask_probs[i];
|
|
18813
|
-
|
|
18814
|
-
let [mask_probs_item, pred_scores_item, pred_labels_item] = this.remove_low_and_no_objects(class_logits, mask_logits, threshold, num_labels);
|
|
18815
|
-
|
|
18816
|
-
if (pred_labels_item.length === 0) {
|
|
18817
|
-
// No mask found
|
|
18818
|
-
let [height, width] = target_size ?? mask_logits.dims.slice(-2);
|
|
18819
|
-
|
|
18820
|
-
let segmentation = new _utils_tensor_js__WEBPACK_IMPORTED_MODULE_4__.Tensor(
|
|
18821
|
-
'int32',
|
|
18822
|
-
new Int32Array(height * width).fill(-1),
|
|
18823
|
-
[height, width]
|
|
18824
|
-
)
|
|
18825
|
-
toReturn.push({
|
|
18826
|
-
segmentation: segmentation,
|
|
18827
|
-
segments_info: []
|
|
18828
|
-
});
|
|
18829
|
-
continue;
|
|
18830
|
-
}
|
|
18831
|
-
|
|
18832
|
-
|
|
18833
|
-
// Get segmentation map and segment information of batch item
|
|
18834
|
-
let [segmentation, segments] = this.compute_segments(
|
|
18835
|
-
mask_probs_item,
|
|
18836
|
-
pred_scores_item,
|
|
18837
|
-
pred_labels_item,
|
|
18838
|
-
mask_threshold,
|
|
18839
|
-
overlap_mask_area_threshold,
|
|
18840
|
-
label_ids_to_fuse,
|
|
18841
|
-
target_size,
|
|
18842
|
-
)
|
|
18843
|
-
|
|
18844
|
-
toReturn.push({
|
|
18845
|
-
segmentation: segmentation,
|
|
18846
|
-
segments_info: segments
|
|
18847
|
-
})
|
|
18848
|
-
}
|
|
18996
|
+
class MaskFormerFeatureExtractor extends ImageFeatureExtractor {
|
|
18849
18997
|
|
|
18850
|
-
|
|
18998
|
+
/** @type {typeof post_process_panoptic_segmentation} */
|
|
18999
|
+
post_process_panoptic_segmentation(...args) {
|
|
19000
|
+
return post_process_panoptic_segmentation(...args);
|
|
18851
19001
|
}
|
|
18852
19002
|
|
|
18853
19003
|
post_process_instance_segmentation() {
|
|
@@ -18856,6 +19006,7 @@ class DetrFeatureExtractor extends ImageFeatureExtractor {
|
|
|
18856
19006
|
}
|
|
18857
19007
|
}
|
|
18858
19008
|
|
|
19009
|
+
|
|
18859
19010
|
class YolosFeatureExtractor extends ImageFeatureExtractor {
|
|
18860
19011
|
/** @type {typeof post_process_object_detection} */
|
|
18861
19012
|
post_process_object_detection(...args) {
|
|
@@ -20145,11 +20296,13 @@ class AutoProcessor {
|
|
|
20145
20296
|
BitImageProcessor,
|
|
20146
20297
|
DPTImageProcessor,
|
|
20147
20298
|
DPTFeatureExtractor,
|
|
20299
|
+
PvtImageProcessor,
|
|
20148
20300
|
GLPNFeatureExtractor,
|
|
20149
20301
|
BeitFeatureExtractor,
|
|
20150
20302
|
DeiTFeatureExtractor,
|
|
20151
20303
|
DetrFeatureExtractor,
|
|
20152
20304
|
RTDetrImageProcessor,
|
|
20305
|
+
MaskFormerFeatureExtractor,
|
|
20153
20306
|
YolosFeatureExtractor,
|
|
20154
20307
|
DonutFeatureExtractor,
|
|
20155
20308
|
NougatImageProcessor,
|
|
@@ -20527,23 +20680,26 @@ function is_chinese_char(cp) {
|
|
|
20527
20680
|
}
|
|
20528
20681
|
|
|
20529
20682
|
/**
|
|
20530
|
-
* Helper function to fuse consecutive
|
|
20531
|
-
* @param {string[]} arr The input
|
|
20532
|
-
* @param {any}
|
|
20533
|
-
* @param {
|
|
20683
|
+
* Helper function to fuse consecutive unknown tokens.
|
|
20684
|
+
* @param {string[]} arr The list of input tokens
|
|
20685
|
+
* @param {Map<string, any>} tokens_to_ids The mapping from tokens to token ids.
|
|
20686
|
+
* @param {number} unk_token_id The value to fuse on.
|
|
20687
|
+
* @private
|
|
20534
20688
|
*/
|
|
20535
|
-
function
|
|
20689
|
+
function fuse_unk(arr, tokens_to_ids, unk_token_id) {
|
|
20536
20690
|
const fused = [];
|
|
20537
20691
|
let i = 0;
|
|
20538
20692
|
while (i < arr.length) {
|
|
20539
20693
|
fused.push(arr[i])
|
|
20540
|
-
if ((
|
|
20694
|
+
if ((tokens_to_ids.get(arr[i]) ?? unk_token_id) !== unk_token_id) {
|
|
20541
20695
|
++i;
|
|
20542
20696
|
continue;
|
|
20543
20697
|
}
|
|
20544
20698
|
|
|
20545
|
-
while (i < arr.length && (
|
|
20546
|
-
|
|
20699
|
+
while (++i < arr.length && (tokens_to_ids.get(arr[i]) ?? unk_token_id) === unk_token_id) {
|
|
20700
|
+
if (tokens_to_ids.get(fused.at(-1)) !== unk_token_id) {
|
|
20701
|
+
fused[fused.length - 1] += arr[i];
|
|
20702
|
+
}
|
|
20547
20703
|
}
|
|
20548
20704
|
}
|
|
20549
20705
|
|
|
@@ -20660,15 +20816,15 @@ class TokenizerModel extends _utils_generic_js__WEBPACK_IMPORTED_MODULE_0__.Call
|
|
|
20660
20816
|
/**
|
|
20661
20817
|
* Internal function to call the TokenizerModel instance.
|
|
20662
20818
|
* @param {string[]} tokens The tokens to encode.
|
|
20663
|
-
* @returns {string[]} The encoded
|
|
20819
|
+
* @returns {string[]} The encoded tokens.
|
|
20664
20820
|
*/
|
|
20665
20821
|
_call(tokens) {
|
|
20666
|
-
|
|
20822
|
+
tokens = this.encode(tokens);
|
|
20667
20823
|
if (this.fuse_unk) {
|
|
20668
20824
|
// Fuse unknown tokens
|
|
20669
|
-
|
|
20825
|
+
tokens = fuse_unk(tokens, this.tokens_to_ids, this.unk_token_id);
|
|
20670
20826
|
}
|
|
20671
|
-
return
|
|
20827
|
+
return tokens;
|
|
20672
20828
|
}
|
|
20673
20829
|
|
|
20674
20830
|
/**
|
|
@@ -21166,15 +21322,19 @@ class BPE extends TokenizerModel {
|
|
|
21166
21322
|
for (const t of bpe_token_list) {
|
|
21167
21323
|
if (this.tokens_to_ids.has(t)) {
|
|
21168
21324
|
outputTokens.push(t);
|
|
21169
|
-
} else {
|
|
21170
|
-
|
|
21171
|
-
|
|
21172
|
-
|
|
21173
|
-
|
|
21174
|
-
|
|
21325
|
+
} else if (this.byte_fallback) {
|
|
21326
|
+
const byteTokens = Array.from(this.text_encoder.encode(t))
|
|
21327
|
+
.map(x => `<0x${x.toString(16).toUpperCase().padStart(2, '0')}>`);
|
|
21328
|
+
if (byteTokens.every(x => this.tokens_to_ids.has(x))) {
|
|
21329
|
+
// Ensure the byte tokens are actually in the vocabulary, otherwise
|
|
21330
|
+
// we fall back to the unknown token. For more information, see
|
|
21331
|
+
// https://github.com/huggingface/transformers/issues/28096.
|
|
21332
|
+
outputTokens.push(...byteTokens);
|
|
21175
21333
|
} else {
|
|
21176
21334
|
outputTokens.push(this.unk_token);
|
|
21177
21335
|
}
|
|
21336
|
+
} else {
|
|
21337
|
+
outputTokens.push(this.unk_token);
|
|
21178
21338
|
}
|
|
21179
21339
|
}
|
|
21180
21340
|
}
|
|
@@ -22846,11 +23006,10 @@ class PreTrainedTokenizer extends _utils_generic_js__WEBPACK_IMPORTED_MODULE_0__
|
|
|
22846
23006
|
this.decoder.end_of_word_suffix = this.model.end_of_word_suffix;
|
|
22847
23007
|
}
|
|
22848
23008
|
|
|
22849
|
-
|
|
22850
23009
|
this.added_tokens_regex = this.added_tokens.length > 0 ? new RegExp(
|
|
22851
|
-
this.added_tokens
|
|
23010
|
+
this.added_tokens.slice()
|
|
22852
23011
|
// Sort by length (desc) to avoid early partial matches
|
|
22853
|
-
.
|
|
23012
|
+
.sort((a, b) => b.content.length - a.content.length)
|
|
22854
23013
|
.map(x => `${x.lstrip ? '\\s*' : ''}(${(0,_utils_core_js__WEBPACK_IMPORTED_MODULE_1__.escapeRegExp)(x.content)})${x.rstrip ? '\\s*' : ''}`)
|
|
22855
23014
|
.join('|')
|
|
22856
23015
|
) : null;
|
|
@@ -23348,6 +23507,67 @@ class PreTrainedTokenizer extends _utils_generic_js__WEBPACK_IMPORTED_MODULE_0__
|
|
|
23348
23507
|
|
|
23349
23508
|
return decoded;
|
|
23350
23509
|
}
|
|
23510
|
+
|
|
23511
|
+
/**
|
|
23512
|
+
* Retrieve the chat template string used for tokenizing chat messages. This template is used
|
|
23513
|
+
* internally by the `apply_chat_template` method and can also be used externally to retrieve the model's chat
|
|
23514
|
+
* template for better generation tracking.
|
|
23515
|
+
*
|
|
23516
|
+
* @param {Object} options An optional object containing the following properties:
|
|
23517
|
+
* @param {string} [options.chat_template=null]
|
|
23518
|
+
* A Jinja template or the name of a template to use for this conversion.
|
|
23519
|
+
* It is usually not necessary to pass anything to this argument,
|
|
23520
|
+
* as the model's template will be used by default.
|
|
23521
|
+
* @param {Object[]} [options.tools=null]
|
|
23522
|
+
* A list of tools (callable functions) that will be accessible to the model. If the template does not
|
|
23523
|
+
* support function calling, this argument will have no effect. Each tool should be passed as a JSON Schema,
|
|
23524
|
+
* giving the name, description and argument types for the tool. See our
|
|
23525
|
+
* [chat templating guide](https://huggingface.co/docs/transformers/main/en/chat_templating#automated-function-conversion-for-tool-use)
|
|
23526
|
+
* for more information.
|
|
23527
|
+
* @returns {string} The chat template string.
|
|
23528
|
+
*/
|
|
23529
|
+
get_chat_template({
|
|
23530
|
+
chat_template = null,
|
|
23531
|
+
tools = null,
|
|
23532
|
+
} = {}) {
|
|
23533
|
+
|
|
23534
|
+
// First, handle the cases when the model has a dict of multiple templates
|
|
23535
|
+
if (this.chat_template && typeof this.chat_template === 'object') {
|
|
23536
|
+
const template_dict = this.chat_template;
|
|
23537
|
+
|
|
23538
|
+
if (chat_template !== null && Object.hasOwn(template_dict, chat_template)) {
|
|
23539
|
+
// The user can pass the name of a template to the chat template argument instead of an entire template
|
|
23540
|
+
chat_template = template_dict[chat_template];
|
|
23541
|
+
} else if (chat_template === null) {
|
|
23542
|
+
if (tools !== null && 'tool_use' in template_dict) {
|
|
23543
|
+
chat_template = template_dict['tool_use'];
|
|
23544
|
+
} else if ('default' in template_dict) {
|
|
23545
|
+
chat_template = template_dict['default'];
|
|
23546
|
+
} else {
|
|
23547
|
+
throw Error(
|
|
23548
|
+
`This model has multiple chat templates with no default specified! Please either pass a chat ` +
|
|
23549
|
+
`template or the name of the template you wish to use to the 'chat_template' argument. Available ` +
|
|
23550
|
+
`template names are ${Object.keys(template_dict).sort()}.`
|
|
23551
|
+
)
|
|
23552
|
+
}
|
|
23553
|
+
}
|
|
23554
|
+
} else if (chat_template === null) {
|
|
23555
|
+
// These are the cases when the model has a single template
|
|
23556
|
+
// priority: `chat_template` argument > `tokenizer.chat_template`
|
|
23557
|
+
if (this.chat_template) {
|
|
23558
|
+
chat_template = this.chat_template;
|
|
23559
|
+
} else {
|
|
23560
|
+
throw Error(
|
|
23561
|
+
"Cannot use apply_chat_template() because tokenizer.chat_template is not set and no template " +
|
|
23562
|
+
"argument was passed! For information about writing templates and setting the " +
|
|
23563
|
+
"tokenizer.chat_template attribute, please see the documentation at " +
|
|
23564
|
+
"https://huggingface.co/docs/transformers/main/en/chat_templating"
|
|
23565
|
+
)
|
|
23566
|
+
}
|
|
23567
|
+
}
|
|
23568
|
+
return chat_template;
|
|
23569
|
+
}
|
|
23570
|
+
|
|
23351
23571
|
/**
|
|
23352
23572
|
* Converts a list of message objects with `"role"` and `"content"` keys to a list of token
|
|
23353
23573
|
* ids. This method is intended for use with chat models, and will read the tokenizer's chat_template attribute to
|
|
@@ -23421,39 +23641,8 @@ class PreTrainedTokenizer extends _utils_generic_js__WEBPACK_IMPORTED_MODULE_0__
|
|
|
23421
23641
|
...kwargs
|
|
23422
23642
|
} = {}) {
|
|
23423
23643
|
|
|
23424
|
-
|
|
23425
|
-
if (
|
|
23426
|
-
(this.chat_template && typeof this.chat_template === 'object')
|
|
23427
|
-
|| this.chat_template === null
|
|
23428
|
-
) {
|
|
23429
|
-
const template_dict = this.chat_template;
|
|
23644
|
+
chat_template = this.get_chat_template({ chat_template, tools });
|
|
23430
23645
|
|
|
23431
|
-
if (chat_template !== null && Object.hasOwn(template_dict, chat_template)) {
|
|
23432
|
-
// The user can pass the name of a template to the chat template argument instead of an entire template
|
|
23433
|
-
chat_template = template_dict[chat_template];
|
|
23434
|
-
} else if (chat_template === null && 'default' in template_dict) {
|
|
23435
|
-
chat_template = template_dict['default'];
|
|
23436
|
-
} else if (chat_template === null) {
|
|
23437
|
-
throw Error(
|
|
23438
|
-
`This model has multiple chat templates with no default specified! Please either pass a chat ` +
|
|
23439
|
-
`template or the name of the template you wish to use to the 'chat_template' argument. Available ` +
|
|
23440
|
-
`template names are ${Object.keys(template_dict).sort()}.`
|
|
23441
|
-
)
|
|
23442
|
-
}
|
|
23443
|
-
} else {
|
|
23444
|
-
// These are the cases when the model has a single template
|
|
23445
|
-
// priority: `chat_template` argument > `tokenizer.chat_template`
|
|
23446
|
-
if (this.chat_template) {
|
|
23447
|
-
chat_template = this.chat_template;
|
|
23448
|
-
} else {
|
|
23449
|
-
throw Error(
|
|
23450
|
-
"Cannot use apply_chat_template() because tokenizer.chat_template is not set and no template " +
|
|
23451
|
-
"argument was passed! For information about writing templates and setting the " +
|
|
23452
|
-
"tokenizer.chat_template attribute, please see the documentation at " +
|
|
23453
|
-
"https://huggingface.co/docs/transformers/main/en/chat_templating"
|
|
23454
|
-
)
|
|
23455
|
-
}
|
|
23456
|
-
}
|
|
23457
23646
|
if (typeof chat_template !== 'string') {
|
|
23458
23647
|
throw Error(`chat_template must be a string, but got ${typeof chat_template}`);
|
|
23459
23648
|
}
|
|
@@ -27871,11 +28060,20 @@ function softmax(arr) {
|
|
|
27871
28060
|
* @returns {T} The resulting log_softmax array.
|
|
27872
28061
|
*/
|
|
27873
28062
|
function log_softmax(arr) {
|
|
27874
|
-
// Compute the
|
|
27875
|
-
const
|
|
28063
|
+
// Compute the maximum value in the array
|
|
28064
|
+
const maxVal = max(arr)[0];
|
|
28065
|
+
|
|
28066
|
+
// Compute the sum of the exponentials
|
|
28067
|
+
let sumExps = 0;
|
|
28068
|
+
for(let i = 0; i < arr.length; ++i) {
|
|
28069
|
+
sumExps += Math.exp(arr[i] - maxVal);
|
|
28070
|
+
}
|
|
28071
|
+
|
|
28072
|
+
// Compute the log of the sum
|
|
28073
|
+
const logSum = Math.log(sumExps);
|
|
27876
28074
|
|
|
27877
|
-
//
|
|
27878
|
-
const logSoftmaxArr =
|
|
28075
|
+
// Compute the softmax values
|
|
28076
|
+
const logSoftmaxArr = arr.map(x => x - maxVal - logSum);
|
|
27879
28077
|
|
|
27880
28078
|
return /** @type {T} */(logSoftmaxArr);
|
|
27881
28079
|
}
|
|
@@ -30329,6 +30527,7 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
30329
30527
|
/* harmony export */ AutoModelForTextToSpectrogram: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.AutoModelForTextToSpectrogram),
|
|
30330
30528
|
/* harmony export */ AutoModelForTextToWaveform: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.AutoModelForTextToWaveform),
|
|
30331
30529
|
/* harmony export */ AutoModelForTokenClassification: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.AutoModelForTokenClassification),
|
|
30530
|
+
/* harmony export */ AutoModelForUniversalSegmentation: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.AutoModelForUniversalSegmentation),
|
|
30332
30531
|
/* harmony export */ AutoModelForVision2Seq: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.AutoModelForVision2Seq),
|
|
30333
30532
|
/* harmony export */ AutoModelForXVector: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.AutoModelForXVector),
|
|
30334
30533
|
/* harmony export */ AutoModelForZeroShotObjectDetection: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.AutoModelForZeroShotObjectDetection),
|
|
@@ -30373,8 +30572,10 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
30373
30572
|
/* harmony export */ CLIPSegForImageSegmentation: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.CLIPSegForImageSegmentation),
|
|
30374
30573
|
/* harmony export */ CLIPSegModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.CLIPSegModel),
|
|
30375
30574
|
/* harmony export */ CLIPSegPreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.CLIPSegPreTrainedModel),
|
|
30575
|
+
/* harmony export */ CLIPTextModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.CLIPTextModel),
|
|
30376
30576
|
/* harmony export */ CLIPTextModelWithProjection: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.CLIPTextModelWithProjection),
|
|
30377
30577
|
/* harmony export */ CLIPTokenizer: () => (/* reexport safe */ _tokenizers_js__WEBPACK_IMPORTED_MODULE_3__.CLIPTokenizer),
|
|
30578
|
+
/* harmony export */ CLIPVisionModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.CLIPVisionModel),
|
|
30378
30579
|
/* harmony export */ CLIPVisionModelWithProjection: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.CLIPVisionModelWithProjection),
|
|
30379
30580
|
/* harmony export */ CamembertForMaskedLM: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.CamembertForMaskedLM),
|
|
30380
30581
|
/* harmony export */ CamembertForQuestionAnswering: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.CamembertForQuestionAnswering),
|
|
@@ -30436,6 +30637,8 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
30436
30637
|
/* harmony export */ DebertaV2Model: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.DebertaV2Model),
|
|
30437
30638
|
/* harmony export */ DebertaV2PreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.DebertaV2PreTrainedModel),
|
|
30438
30639
|
/* harmony export */ DebertaV2Tokenizer: () => (/* reexport safe */ _tokenizers_js__WEBPACK_IMPORTED_MODULE_3__.DebertaV2Tokenizer),
|
|
30640
|
+
/* harmony export */ DecisionTransformerModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.DecisionTransformerModel),
|
|
30641
|
+
/* harmony export */ DecisionTransformerPreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.DecisionTransformerPreTrainedModel),
|
|
30439
30642
|
/* harmony export */ DeiTFeatureExtractor: () => (/* reexport safe */ _processors_js__WEBPACK_IMPORTED_MODULE_4__.DeiTFeatureExtractor),
|
|
30440
30643
|
/* harmony export */ DeiTForImageClassification: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.DeiTForImageClassification),
|
|
30441
30644
|
/* harmony export */ DeiTModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.DeiTModel),
|
|
@@ -30525,6 +30728,8 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
30525
30728
|
/* harmony export */ GemmaPreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.GemmaPreTrainedModel),
|
|
30526
30729
|
/* harmony export */ GemmaTokenizer: () => (/* reexport safe */ _tokenizers_js__WEBPACK_IMPORTED_MODULE_3__.GemmaTokenizer),
|
|
30527
30730
|
/* harmony export */ Grok1Tokenizer: () => (/* reexport safe */ _tokenizers_js__WEBPACK_IMPORTED_MODULE_3__.Grok1Tokenizer),
|
|
30731
|
+
/* harmony export */ GroupViTModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.GroupViTModel),
|
|
30732
|
+
/* harmony export */ GroupViTPreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.GroupViTPreTrainedModel),
|
|
30528
30733
|
/* harmony export */ HerbertTokenizer: () => (/* reexport safe */ _tokenizers_js__WEBPACK_IMPORTED_MODULE_3__.HerbertTokenizer),
|
|
30529
30734
|
/* harmony export */ HieraForImageClassification: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.HieraForImageClassification),
|
|
30530
30735
|
/* harmony export */ HieraModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.HieraModel),
|
|
@@ -30578,6 +30783,10 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
30578
30783
|
/* harmony export */ MarianModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.MarianModel),
|
|
30579
30784
|
/* harmony export */ MarianPreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.MarianPreTrainedModel),
|
|
30580
30785
|
/* harmony export */ MarianTokenizer: () => (/* reexport safe */ _tokenizers_js__WEBPACK_IMPORTED_MODULE_3__.MarianTokenizer),
|
|
30786
|
+
/* harmony export */ MaskFormerFeatureExtractor: () => (/* reexport safe */ _processors_js__WEBPACK_IMPORTED_MODULE_4__.MaskFormerFeatureExtractor),
|
|
30787
|
+
/* harmony export */ MaskFormerForInstanceSegmentation: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.MaskFormerForInstanceSegmentation),
|
|
30788
|
+
/* harmony export */ MaskFormerModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.MaskFormerModel),
|
|
30789
|
+
/* harmony export */ MaskFormerPreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.MaskFormerPreTrainedModel),
|
|
30581
30790
|
/* harmony export */ MaskedLMOutput: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.MaskedLMOutput),
|
|
30582
30791
|
/* harmony export */ MaxLengthCriteria: () => (/* reexport safe */ _generation_stopping_criteria_js__WEBPACK_IMPORTED_MODULE_11__.MaxLengthCriteria),
|
|
30583
30792
|
/* harmony export */ MistralForCausalLM: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.MistralForCausalLM),
|
|
@@ -30655,6 +30864,10 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
30655
30864
|
/* harmony export */ PretrainedConfig: () => (/* reexport safe */ _configs_js__WEBPACK_IMPORTED_MODULE_5__.PretrainedConfig),
|
|
30656
30865
|
/* harmony export */ PretrainedMixin: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.PretrainedMixin),
|
|
30657
30866
|
/* harmony export */ Processor: () => (/* reexport safe */ _processors_js__WEBPACK_IMPORTED_MODULE_4__.Processor),
|
|
30867
|
+
/* harmony export */ PvtForImageClassification: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.PvtForImageClassification),
|
|
30868
|
+
/* harmony export */ PvtImageProcessor: () => (/* reexport safe */ _processors_js__WEBPACK_IMPORTED_MODULE_4__.PvtImageProcessor),
|
|
30869
|
+
/* harmony export */ PvtModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.PvtModel),
|
|
30870
|
+
/* harmony export */ PvtPreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.PvtPreTrainedModel),
|
|
30658
30871
|
/* harmony export */ PyAnnoteFeatureExtractor: () => (/* reexport safe */ _processors_js__WEBPACK_IMPORTED_MODULE_4__.PyAnnoteFeatureExtractor),
|
|
30659
30872
|
/* harmony export */ PyAnnoteForAudioFrameClassification: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.PyAnnoteForAudioFrameClassification),
|
|
30660
30873
|
/* harmony export */ PyAnnoteModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.PyAnnoteModel),
|
|
@@ -30775,6 +30988,11 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
30775
30988
|
/* harmony export */ ViTFeatureExtractor: () => (/* reexport safe */ _processors_js__WEBPACK_IMPORTED_MODULE_4__.ViTFeatureExtractor),
|
|
30776
30989
|
/* harmony export */ ViTForImageClassification: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.ViTForImageClassification),
|
|
30777
30990
|
/* harmony export */ ViTImageProcessor: () => (/* reexport safe */ _processors_js__WEBPACK_IMPORTED_MODULE_4__.ViTImageProcessor),
|
|
30991
|
+
/* harmony export */ ViTMAEModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.ViTMAEModel),
|
|
30992
|
+
/* harmony export */ ViTMAEPreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.ViTMAEPreTrainedModel),
|
|
30993
|
+
/* harmony export */ ViTMSNForImageClassification: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.ViTMSNForImageClassification),
|
|
30994
|
+
/* harmony export */ ViTMSNModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.ViTMSNModel),
|
|
30995
|
+
/* harmony export */ ViTMSNPreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.ViTMSNPreTrainedModel),
|
|
30778
30996
|
/* harmony export */ ViTModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.ViTModel),
|
|
30779
30997
|
/* harmony export */ ViTPreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.ViTPreTrainedModel),
|
|
30780
30998
|
/* harmony export */ VisionEncoderDecoderModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.VisionEncoderDecoderModel),
|