@huggingface/transformers 3.0.0-alpha.13 → 3.0.0-alpha.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +13 -6
- package/dist/ort-wasm-simd-threaded.jsep.wasm +0 -0
- package/dist/transformers.cjs +613 -363
- package/dist/transformers.cjs.map +1 -1
- package/dist/transformers.js +1045 -745
- package/dist/transformers.js.map +1 -1
- package/dist/transformers.min.cjs +12 -12
- package/dist/transformers.min.cjs.map +1 -1
- package/dist/transformers.min.js +15 -15
- package/dist/transformers.min.js.map +1 -1
- package/dist/transformers.min.mjs +52 -52
- package/dist/transformers.min.mjs.map +1 -1
- package/dist/transformers.mjs +637 -364
- package/dist/transformers.mjs.map +1 -1
- package/package.json +4 -5
- package/src/env.js +4 -4
- package/src/generation/logits_process.js +3 -3
- package/src/models.js +124 -3
- package/src/ops/registry.js +14 -3
- package/src/pipelines.js +5 -4
- package/src/processors.js +315 -288
- package/src/tokenizers.js +89 -53
- package/src/utils/maths.js +14 -5
- package/types/models.d.ts +72 -0
- package/types/models.d.ts.map +1 -1
- package/types/ops/registry.d.ts +6 -6
- package/types/ops/registry.d.ts.map +1 -1
- package/types/pipelines.d.ts.map +1 -1
- package/types/processors.d.ts +42 -52
- package/types/processors.d.ts.map +1 -1
- package/types/tokenizers.d.ts +23 -1
- package/types/tokenizers.d.ts.map +1 -1
- package/types/utils/maths.d.ts +2 -2
- package/types/utils/maths.d.ts.map +1 -1
package/dist/transformers.cjs
CHANGED
|
@@ -3779,7 +3779,7 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
3779
3779
|
// Licensed under the MIT License.
|
|
3780
3780
|
// This file is generated by /js/scripts/update-version.ts
|
|
3781
3781
|
// Do not modify file content manually.
|
|
3782
|
-
const version = '1.19.
|
|
3782
|
+
const version = '1.19.2';
|
|
3783
3783
|
//# sourceMappingURL=version.js.map
|
|
3784
3784
|
|
|
3785
3785
|
/***/ }),
|
|
@@ -4437,7 +4437,7 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
4437
4437
|
|
|
4438
4438
|
|
|
4439
4439
|
|
|
4440
|
-
const VERSION = '3.0.0-alpha.
|
|
4440
|
+
const VERSION = '3.0.0-alpha.15';
|
|
4441
4441
|
|
|
4442
4442
|
// Check if various APIs are available (depends on environment)
|
|
4443
4443
|
const IS_BROWSER_ENV = typeof self !== 'undefined';
|
|
@@ -4484,19 +4484,19 @@ const apis = Object.freeze({
|
|
|
4484
4484
|
});
|
|
4485
4485
|
|
|
4486
4486
|
const RUNNING_LOCALLY = IS_FS_AVAILABLE && IS_PATH_AVAILABLE;
|
|
4487
|
-
const
|
|
4487
|
+
const dirname__ = RUNNING_LOCALLY
|
|
4488
4488
|
? path__WEBPACK_IMPORTED_MODULE_1__.dirname(path__WEBPACK_IMPORTED_MODULE_1__.dirname(url__WEBPACK_IMPORTED_MODULE_2__.fileURLToPath("file:///workspaces/transformers.js/src/env.js")))
|
|
4489
4489
|
: './';
|
|
4490
4490
|
|
|
4491
4491
|
// Only used for environments with access to file system
|
|
4492
4492
|
const DEFAULT_CACHE_DIR = RUNNING_LOCALLY
|
|
4493
|
-
? path__WEBPACK_IMPORTED_MODULE_1__.join(
|
|
4493
|
+
? path__WEBPACK_IMPORTED_MODULE_1__.join(dirname__, '/.cache/')
|
|
4494
4494
|
: null;
|
|
4495
4495
|
|
|
4496
4496
|
// Set local model path, based on available APIs
|
|
4497
4497
|
const DEFAULT_LOCAL_MODEL_PATH = '/models/';
|
|
4498
4498
|
const localModelPath = RUNNING_LOCALLY
|
|
4499
|
-
? path__WEBPACK_IMPORTED_MODULE_1__.join(
|
|
4499
|
+
? path__WEBPACK_IMPORTED_MODULE_1__.join(dirname__, DEFAULT_LOCAL_MODEL_PATH)
|
|
4500
4500
|
: DEFAULT_LOCAL_MODEL_PATH;
|
|
4501
4501
|
|
|
4502
4502
|
/**
|
|
@@ -5533,18 +5533,18 @@ class NoBadWordsLogitsProcessor extends LogitsProcessor {
|
|
|
5533
5533
|
_call(input_ids, logits) {
|
|
5534
5534
|
for (let i = 0; i < input_ids.length; ++i) {
|
|
5535
5535
|
const batch_logits_data = /** @type {Float32Array} */(logits[i].data);
|
|
5536
|
-
|
|
5536
|
+
const ids = input_ids[i];
|
|
5537
5537
|
for (const bad_word_ids of this.bad_words_ids) {
|
|
5538
5538
|
// Whether to modify the logits of the last token in the bad word id sequence
|
|
5539
5539
|
let mark = true;
|
|
5540
5540
|
|
|
5541
5541
|
// For each bad word in the list, if the current sequence of input ids ends with this sequence (excluding the last),
|
|
5542
5542
|
// then we set the logits of the last bad word id to -Infinity.
|
|
5543
|
-
for (let
|
|
5543
|
+
for (let j = 1; j <= bad_word_ids.length - 1 && bad_word_ids.length < ids.length; ++j) {
|
|
5544
5544
|
|
|
5545
5545
|
// NOTE: We use != instead of !== to compare bigint and number
|
|
5546
5546
|
// @ts-ignore
|
|
5547
|
-
if (bad_word_ids.at(-
|
|
5547
|
+
if (bad_word_ids.at(-j - 1) != ids.at(-j)) {
|
|
5548
5548
|
// We have found a mismatch
|
|
5549
5549
|
mark = false;
|
|
5550
5550
|
break;
|
|
@@ -6382,6 +6382,7 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
6382
6382
|
/* harmony export */ AutoModelForTextToSpectrogram: () => (/* binding */ AutoModelForTextToSpectrogram),
|
|
6383
6383
|
/* harmony export */ AutoModelForTextToWaveform: () => (/* binding */ AutoModelForTextToWaveform),
|
|
6384
6384
|
/* harmony export */ AutoModelForTokenClassification: () => (/* binding */ AutoModelForTokenClassification),
|
|
6385
|
+
/* harmony export */ AutoModelForUniversalSegmentation: () => (/* binding */ AutoModelForUniversalSegmentation),
|
|
6385
6386
|
/* harmony export */ AutoModelForVision2Seq: () => (/* binding */ AutoModelForVision2Seq),
|
|
6386
6387
|
/* harmony export */ AutoModelForXVector: () => (/* binding */ AutoModelForXVector),
|
|
6387
6388
|
/* harmony export */ AutoModelForZeroShotObjectDetection: () => (/* binding */ AutoModelForZeroShotObjectDetection),
|
|
@@ -6413,7 +6414,9 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
6413
6414
|
/* harmony export */ CLIPSegForImageSegmentation: () => (/* binding */ CLIPSegForImageSegmentation),
|
|
6414
6415
|
/* harmony export */ CLIPSegModel: () => (/* binding */ CLIPSegModel),
|
|
6415
6416
|
/* harmony export */ CLIPSegPreTrainedModel: () => (/* binding */ CLIPSegPreTrainedModel),
|
|
6417
|
+
/* harmony export */ CLIPTextModel: () => (/* binding */ CLIPTextModel),
|
|
6416
6418
|
/* harmony export */ CLIPTextModelWithProjection: () => (/* binding */ CLIPTextModelWithProjection),
|
|
6419
|
+
/* harmony export */ CLIPVisionModel: () => (/* binding */ CLIPVisionModel),
|
|
6417
6420
|
/* harmony export */ CLIPVisionModelWithProjection: () => (/* binding */ CLIPVisionModelWithProjection),
|
|
6418
6421
|
/* harmony export */ CamembertForMaskedLM: () => (/* binding */ CamembertForMaskedLM),
|
|
6419
6422
|
/* harmony export */ CamembertForQuestionAnswering: () => (/* binding */ CamembertForQuestionAnswering),
|
|
@@ -6462,6 +6465,8 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
6462
6465
|
/* harmony export */ DebertaV2ForTokenClassification: () => (/* binding */ DebertaV2ForTokenClassification),
|
|
6463
6466
|
/* harmony export */ DebertaV2Model: () => (/* binding */ DebertaV2Model),
|
|
6464
6467
|
/* harmony export */ DebertaV2PreTrainedModel: () => (/* binding */ DebertaV2PreTrainedModel),
|
|
6468
|
+
/* harmony export */ DecisionTransformerModel: () => (/* binding */ DecisionTransformerModel),
|
|
6469
|
+
/* harmony export */ DecisionTransformerPreTrainedModel: () => (/* binding */ DecisionTransformerPreTrainedModel),
|
|
6465
6470
|
/* harmony export */ DeiTForImageClassification: () => (/* binding */ DeiTForImageClassification),
|
|
6466
6471
|
/* harmony export */ DeiTModel: () => (/* binding */ DeiTModel),
|
|
6467
6472
|
/* harmony export */ DeiTPreTrainedModel: () => (/* binding */ DeiTPreTrainedModel),
|
|
@@ -6530,6 +6535,11 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
6530
6535
|
/* harmony export */ GemmaForCausalLM: () => (/* binding */ GemmaForCausalLM),
|
|
6531
6536
|
/* harmony export */ GemmaModel: () => (/* binding */ GemmaModel),
|
|
6532
6537
|
/* harmony export */ GemmaPreTrainedModel: () => (/* binding */ GemmaPreTrainedModel),
|
|
6538
|
+
/* harmony export */ GroupViTModel: () => (/* binding */ GroupViTModel),
|
|
6539
|
+
/* harmony export */ GroupViTPreTrainedModel: () => (/* binding */ GroupViTPreTrainedModel),
|
|
6540
|
+
/* harmony export */ HieraForImageClassification: () => (/* binding */ HieraForImageClassification),
|
|
6541
|
+
/* harmony export */ HieraModel: () => (/* binding */ HieraModel),
|
|
6542
|
+
/* harmony export */ HieraPreTrainedModel: () => (/* binding */ HieraPreTrainedModel),
|
|
6533
6543
|
/* harmony export */ HubertForCTC: () => (/* binding */ HubertForCTC),
|
|
6534
6544
|
/* harmony export */ HubertForSequenceClassification: () => (/* binding */ HubertForSequenceClassification),
|
|
6535
6545
|
/* harmony export */ HubertModel: () => (/* binding */ HubertModel),
|
|
@@ -6566,6 +6576,9 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
6566
6576
|
/* harmony export */ MarianMTModel: () => (/* binding */ MarianMTModel),
|
|
6567
6577
|
/* harmony export */ MarianModel: () => (/* binding */ MarianModel),
|
|
6568
6578
|
/* harmony export */ MarianPreTrainedModel: () => (/* binding */ MarianPreTrainedModel),
|
|
6579
|
+
/* harmony export */ MaskFormerForInstanceSegmentation: () => (/* binding */ MaskFormerForInstanceSegmentation),
|
|
6580
|
+
/* harmony export */ MaskFormerModel: () => (/* binding */ MaskFormerModel),
|
|
6581
|
+
/* harmony export */ MaskFormerPreTrainedModel: () => (/* binding */ MaskFormerPreTrainedModel),
|
|
6569
6582
|
/* harmony export */ MaskedLMOutput: () => (/* binding */ MaskedLMOutput),
|
|
6570
6583
|
/* harmony export */ MistralForCausalLM: () => (/* binding */ MistralForCausalLM),
|
|
6571
6584
|
/* harmony export */ MistralModel: () => (/* binding */ MistralModel),
|
|
@@ -6624,6 +6637,9 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
6624
6637
|
/* harmony export */ PhiPreTrainedModel: () => (/* binding */ PhiPreTrainedModel),
|
|
6625
6638
|
/* harmony export */ PreTrainedModel: () => (/* binding */ PreTrainedModel),
|
|
6626
6639
|
/* harmony export */ PretrainedMixin: () => (/* binding */ PretrainedMixin),
|
|
6640
|
+
/* harmony export */ PvtForImageClassification: () => (/* binding */ PvtForImageClassification),
|
|
6641
|
+
/* harmony export */ PvtModel: () => (/* binding */ PvtModel),
|
|
6642
|
+
/* harmony export */ PvtPreTrainedModel: () => (/* binding */ PvtPreTrainedModel),
|
|
6627
6643
|
/* harmony export */ PyAnnoteForAudioFrameClassification: () => (/* binding */ PyAnnoteForAudioFrameClassification),
|
|
6628
6644
|
/* harmony export */ PyAnnoteModel: () => (/* binding */ PyAnnoteModel),
|
|
6629
6645
|
/* harmony export */ PyAnnotePreTrainedModel: () => (/* binding */ PyAnnotePreTrainedModel),
|
|
@@ -6709,6 +6725,11 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
6709
6725
|
/* harmony export */ UniSpeechSatModel: () => (/* binding */ UniSpeechSatModel),
|
|
6710
6726
|
/* harmony export */ UniSpeechSatPreTrainedModel: () => (/* binding */ UniSpeechSatPreTrainedModel),
|
|
6711
6727
|
/* harmony export */ ViTForImageClassification: () => (/* binding */ ViTForImageClassification),
|
|
6728
|
+
/* harmony export */ ViTMAEModel: () => (/* binding */ ViTMAEModel),
|
|
6729
|
+
/* harmony export */ ViTMAEPreTrainedModel: () => (/* binding */ ViTMAEPreTrainedModel),
|
|
6730
|
+
/* harmony export */ ViTMSNForImageClassification: () => (/* binding */ ViTMSNForImageClassification),
|
|
6731
|
+
/* harmony export */ ViTMSNModel: () => (/* binding */ ViTMSNModel),
|
|
6732
|
+
/* harmony export */ ViTMSNPreTrainedModel: () => (/* binding */ ViTMSNPreTrainedModel),
|
|
6712
6733
|
/* harmony export */ ViTModel: () => (/* binding */ ViTModel),
|
|
6713
6734
|
/* harmony export */ ViTPreTrainedModel: () => (/* binding */ ViTPreTrainedModel),
|
|
6714
6735
|
/* harmony export */ VisionEncoderDecoderModel: () => (/* binding */ VisionEncoderDecoderModel),
|
|
@@ -6958,6 +6979,7 @@ async function getSession(pretrained_model_name_or_path, fileName, options) {
|
|
|
6958
6979
|
});
|
|
6959
6980
|
if (Object.keys(shapes).length > 0 && !(0,_backends_onnx_js__WEBPACK_IMPORTED_MODULE_1__.isONNXProxy)()) {
|
|
6960
6981
|
// Only set preferredOutputLocation if shapes are present and we aren't proxying ONNX
|
|
6982
|
+
/** @type {Record<string, import('onnxruntime-common').Tensor.DataLocation>} */
|
|
6961
6983
|
const preferredOutputLocation = {};
|
|
6962
6984
|
for (const key in shapes) {
|
|
6963
6985
|
preferredOutputLocation[key] = 'gpu-buffer';
|
|
@@ -10223,6 +10245,18 @@ class CLIPPreTrainedModel extends PreTrainedModel { }
|
|
|
10223
10245
|
*/
|
|
10224
10246
|
class CLIPModel extends CLIPPreTrainedModel { }
|
|
10225
10247
|
|
|
10248
|
+
/**
|
|
10249
|
+
* The text model from CLIP without any head or projection on top.
|
|
10250
|
+
*/
|
|
10251
|
+
class CLIPTextModel extends CLIPPreTrainedModel {
|
|
10252
|
+
/** @type {PreTrainedModel.from_pretrained} */
|
|
10253
|
+
static async from_pretrained(pretrained_model_name_or_path, options = {}) {
|
|
10254
|
+
// Update default model file name if not provided
|
|
10255
|
+
options.model_file_name ??= 'text_model';
|
|
10256
|
+
return super.from_pretrained(pretrained_model_name_or_path, options);
|
|
10257
|
+
}
|
|
10258
|
+
}
|
|
10259
|
+
|
|
10226
10260
|
/**
|
|
10227
10261
|
* CLIP Text Model with a projection layer on top (a linear layer on top of the pooled output)
|
|
10228
10262
|
*
|
|
@@ -10250,7 +10284,6 @@ class CLIPModel extends CLIPPreTrainedModel { }
|
|
|
10250
10284
|
* ```
|
|
10251
10285
|
*/
|
|
10252
10286
|
class CLIPTextModelWithProjection extends CLIPPreTrainedModel {
|
|
10253
|
-
|
|
10254
10287
|
/** @type {PreTrainedModel.from_pretrained} */
|
|
10255
10288
|
static async from_pretrained(pretrained_model_name_or_path, options = {}) {
|
|
10256
10289
|
// Update default model file name if not provided
|
|
@@ -10259,6 +10292,18 @@ class CLIPTextModelWithProjection extends CLIPPreTrainedModel {
|
|
|
10259
10292
|
}
|
|
10260
10293
|
}
|
|
10261
10294
|
|
|
10295
|
+
/**
|
|
10296
|
+
* The vision model from CLIP without any head or projection on top.
|
|
10297
|
+
*/
|
|
10298
|
+
class CLIPVisionModel extends CLIPPreTrainedModel {
|
|
10299
|
+
/** @type {PreTrainedModel.from_pretrained} */
|
|
10300
|
+
static async from_pretrained(pretrained_model_name_or_path, options = {}) {
|
|
10301
|
+
// Update default model file name if not provided
|
|
10302
|
+
options.model_file_name ??= 'vision_model';
|
|
10303
|
+
return super.from_pretrained(pretrained_model_name_or_path, options);
|
|
10304
|
+
}
|
|
10305
|
+
}
|
|
10306
|
+
|
|
10262
10307
|
/**
|
|
10263
10308
|
* CLIP Vision Model with a projection layer on top (a linear layer on top of the pooled output)
|
|
10264
10309
|
*
|
|
@@ -10925,6 +10970,43 @@ class ViTForImageClassification extends ViTPreTrainedModel {
|
|
|
10925
10970
|
}
|
|
10926
10971
|
//////////////////////////////////////////////////
|
|
10927
10972
|
|
|
10973
|
+
//////////////////////////////////////////////////
|
|
10974
|
+
class PvtPreTrainedModel extends PreTrainedModel { }
|
|
10975
|
+
class PvtModel extends PvtPreTrainedModel { }
|
|
10976
|
+
class PvtForImageClassification extends PvtPreTrainedModel {
|
|
10977
|
+
/**
|
|
10978
|
+
* @param {any} model_inputs
|
|
10979
|
+
*/
|
|
10980
|
+
async _call(model_inputs) {
|
|
10981
|
+
return new SequenceClassifierOutput(await super._call(model_inputs));
|
|
10982
|
+
}
|
|
10983
|
+
}
|
|
10984
|
+
//////////////////////////////////////////////////
|
|
10985
|
+
|
|
10986
|
+
//////////////////////////////////////////////////
|
|
10987
|
+
class ViTMAEPreTrainedModel extends PreTrainedModel { }
|
|
10988
|
+
class ViTMAEModel extends ViTMAEPreTrainedModel { }
|
|
10989
|
+
//////////////////////////////////////////////////
|
|
10990
|
+
|
|
10991
|
+
|
|
10992
|
+
//////////////////////////////////////////////////
|
|
10993
|
+
class ViTMSNPreTrainedModel extends PreTrainedModel { }
|
|
10994
|
+
class ViTMSNModel extends ViTMSNPreTrainedModel { }
|
|
10995
|
+
class ViTMSNForImageClassification extends ViTMSNPreTrainedModel {
|
|
10996
|
+
/**
|
|
10997
|
+
* @param {any} model_inputs
|
|
10998
|
+
*/
|
|
10999
|
+
async _call(model_inputs) {
|
|
11000
|
+
return new SequenceClassifierOutput(await super._call(model_inputs));
|
|
11001
|
+
}
|
|
11002
|
+
}
|
|
11003
|
+
//////////////////////////////////////////////////
|
|
11004
|
+
|
|
11005
|
+
//////////////////////////////////////////////////
|
|
11006
|
+
class GroupViTPreTrainedModel extends PreTrainedModel { }
|
|
11007
|
+
class GroupViTModel extends GroupViTPreTrainedModel { }
|
|
11008
|
+
//////////////////////////////////////////////////
|
|
11009
|
+
|
|
10928
11010
|
|
|
10929
11011
|
//////////////////////////////////////////////////
|
|
10930
11012
|
class FastViTPreTrainedModel extends PreTrainedModel { }
|
|
@@ -11178,6 +11260,19 @@ class DeiTForImageClassification extends DeiTPreTrainedModel {
|
|
|
11178
11260
|
}
|
|
11179
11261
|
//////////////////////////////////////////////////
|
|
11180
11262
|
|
|
11263
|
+
//////////////////////////////////////////////////
|
|
11264
|
+
class HieraPreTrainedModel extends PreTrainedModel { }
|
|
11265
|
+
class HieraModel extends HieraPreTrainedModel { }
|
|
11266
|
+
class HieraForImageClassification extends HieraPreTrainedModel {
|
|
11267
|
+
/**
|
|
11268
|
+
* @param {any} model_inputs
|
|
11269
|
+
*/
|
|
11270
|
+
async _call(model_inputs) {
|
|
11271
|
+
return new SequenceClassifierOutput(await super._call(model_inputs));
|
|
11272
|
+
}
|
|
11273
|
+
}
|
|
11274
|
+
//////////////////////////////////////////////////
|
|
11275
|
+
|
|
11181
11276
|
|
|
11182
11277
|
//////////////////////////////////////////////////
|
|
11183
11278
|
/**
|
|
@@ -11324,6 +11419,11 @@ class SapiensForDepthEstimation extends SapiensPreTrainedModel { }
|
|
|
11324
11419
|
class SapiensForNormalEstimation extends SapiensPreTrainedModel { }
|
|
11325
11420
|
//////////////////////////////////////////////////
|
|
11326
11421
|
|
|
11422
|
+
//////////////////////////////////////////////////
|
|
11423
|
+
class MaskFormerPreTrainedModel extends PreTrainedModel { }
|
|
11424
|
+
class MaskFormerModel extends MaskFormerPreTrainedModel { }
|
|
11425
|
+
class MaskFormerForInstanceSegmentation extends MaskFormerPreTrainedModel { }
|
|
11426
|
+
//////////////////////////////////////////////////
|
|
11327
11427
|
|
|
11328
11428
|
//////////////////////////////////////////////////
|
|
11329
11429
|
class GLPNPreTrainedModel extends PreTrainedModel { }
|
|
@@ -12846,6 +12946,7 @@ class MusicgenForConditionalGeneration extends PreTrainedModel { // NOTE: not Mu
|
|
|
12846
12946
|
return audio_values;
|
|
12847
12947
|
}
|
|
12848
12948
|
}
|
|
12949
|
+
//////////////////////////////////////////////////
|
|
12849
12950
|
|
|
12850
12951
|
//////////////////////////////////////////////////
|
|
12851
12952
|
// MobileNetV1 models
|
|
@@ -12939,6 +13040,17 @@ class MobileNetV4ForImageClassification extends MobileNetV4PreTrainedModel {
|
|
|
12939
13040
|
}
|
|
12940
13041
|
//////////////////////////////////////////////////
|
|
12941
13042
|
|
|
13043
|
+
//////////////////////////////////////////////////
|
|
13044
|
+
// Decision Transformer models
|
|
13045
|
+
class DecisionTransformerPreTrainedModel extends PreTrainedModel { }
|
|
13046
|
+
|
|
13047
|
+
/**
|
|
13048
|
+
* The model builds upon the GPT2 architecture to perform autoregressive prediction of actions in an offline RL setting.
|
|
13049
|
+
* Refer to the paper for more details: https://arxiv.org/abs/2106.01345
|
|
13050
|
+
*/
|
|
13051
|
+
class DecisionTransformerModel extends DecisionTransformerPreTrainedModel { }
|
|
13052
|
+
|
|
13053
|
+
//////////////////////////////////////////////////
|
|
12942
13054
|
|
|
12943
13055
|
//////////////////////////////////////////////////
|
|
12944
13056
|
// AutoModels, used to simplify construction of PreTrainedModels
|
|
@@ -12977,7 +13089,7 @@ class PretrainedMixin {
|
|
|
12977
13089
|
session_options = {},
|
|
12978
13090
|
} = {}) {
|
|
12979
13091
|
|
|
12980
|
-
|
|
13092
|
+
const options = {
|
|
12981
13093
|
progress_callback,
|
|
12982
13094
|
config,
|
|
12983
13095
|
cache_dir,
|
|
@@ -12996,7 +13108,7 @@ class PretrainedMixin {
|
|
|
12996
13108
|
throw new Error("`MODEL_CLASS_MAPPINGS` not implemented for this type of `AutoClass`: " + this.name);
|
|
12997
13109
|
}
|
|
12998
13110
|
|
|
12999
|
-
for (
|
|
13111
|
+
for (const MODEL_CLASS_MAPPING of this.MODEL_CLASS_MAPPINGS) {
|
|
13000
13112
|
const modelInfo = MODEL_CLASS_MAPPING.get(options.config.model_type);
|
|
13001
13113
|
if (!modelInfo) {
|
|
13002
13114
|
continue; // Item not found in this mapping
|
|
@@ -13051,6 +13163,10 @@ const MODEL_MAPPING_NAMES_ENCODER_ONLY = new Map([
|
|
|
13051
13163
|
['rt_detr', ['RTDetrModel', RTDetrModel]],
|
|
13052
13164
|
['table-transformer', ['TableTransformerModel', TableTransformerModel]],
|
|
13053
13165
|
['vit', ['ViTModel', ViTModel]],
|
|
13166
|
+
['pvt', ['PvtModel', PvtModel]],
|
|
13167
|
+
['vit_msn', ['ViTMSNModel', ViTMSNModel]],
|
|
13168
|
+
['vit_mae', ['ViTMAEModel', ViTMAEModel]],
|
|
13169
|
+
['groupvit', ['GroupViTModel', GroupViTModel]],
|
|
13054
13170
|
['fastvit', ['FastViTModel', FastViTModel]],
|
|
13055
13171
|
['mobilevit', ['MobileViTModel', MobileViTModel]],
|
|
13056
13172
|
['mobilevitv2', ['MobileViTV2Model', MobileViTV2Model]],
|
|
@@ -13058,6 +13174,7 @@ const MODEL_MAPPING_NAMES_ENCODER_ONLY = new Map([
|
|
|
13058
13174
|
['owlv2', ['Owlv2Model', Owlv2Model]],
|
|
13059
13175
|
['beit', ['BeitModel', BeitModel]],
|
|
13060
13176
|
['deit', ['DeiTModel', DeiTModel]],
|
|
13177
|
+
['hiera', ['HieraModel', HieraModel]],
|
|
13061
13178
|
['convnext', ['ConvNextModel', ConvNextModel]],
|
|
13062
13179
|
['convnextv2', ['ConvNextV2Model', ConvNextV2Model]],
|
|
13063
13180
|
['dinov2', ['Dinov2Model', Dinov2Model]],
|
|
@@ -13072,10 +13189,14 @@ const MODEL_MAPPING_NAMES_ENCODER_ONLY = new Map([
|
|
|
13072
13189
|
['hifigan', ['SpeechT5HifiGan', SpeechT5HifiGan]],
|
|
13073
13190
|
['efficientnet', ['EfficientNetModel', EfficientNetModel]],
|
|
13074
13191
|
|
|
13192
|
+
['decision_transformer', ['DecisionTransformerModel', DecisionTransformerModel]],
|
|
13193
|
+
|
|
13075
13194
|
['mobilenet_v1', ['MobileNetV1Model', MobileNetV1Model]],
|
|
13076
13195
|
['mobilenet_v2', ['MobileNetV2Model', MobileNetV2Model]],
|
|
13077
13196
|
['mobilenet_v3', ['MobileNetV3Model', MobileNetV3Model]],
|
|
13078
13197
|
['mobilenet_v4', ['MobileNetV4Model', MobileNetV4Model]],
|
|
13198
|
+
|
|
13199
|
+
['maskformer', ['MaskFormerModel', MaskFormerModel]],
|
|
13079
13200
|
]);
|
|
13080
13201
|
|
|
13081
13202
|
const MODEL_MAPPING_NAMES_ENCODER_DECODER = new Map([
|
|
@@ -13260,11 +13381,14 @@ const MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING_NAMES = new Map([
|
|
|
13260
13381
|
|
|
13261
13382
|
const MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING_NAMES = new Map([
|
|
13262
13383
|
['vit', ['ViTForImageClassification', ViTForImageClassification]],
|
|
13384
|
+
['pvt', ['PvtForImageClassification', PvtForImageClassification]],
|
|
13385
|
+
['vit_msn', ['ViTMSNForImageClassification', ViTMSNForImageClassification]],
|
|
13263
13386
|
['fastvit', ['FastViTForImageClassification', FastViTForImageClassification]],
|
|
13264
13387
|
['mobilevit', ['MobileViTForImageClassification', MobileViTForImageClassification]],
|
|
13265
13388
|
['mobilevitv2', ['MobileViTV2ForImageClassification', MobileViTV2ForImageClassification]],
|
|
13266
13389
|
['beit', ['BeitForImageClassification', BeitForImageClassification]],
|
|
13267
13390
|
['deit', ['DeiTForImageClassification', DeiTForImageClassification]],
|
|
13391
|
+
['hiera', ['HieraForImageClassification', HieraForImageClassification]],
|
|
13268
13392
|
['convnext', ['ConvNextForImageClassification', ConvNextForImageClassification]],
|
|
13269
13393
|
['convnextv2', ['ConvNextV2ForImageClassification', ConvNextV2ForImageClassification]],
|
|
13270
13394
|
['dinov2', ['Dinov2ForImageClassification', Dinov2ForImageClassification]],
|
|
@@ -13291,6 +13415,7 @@ const MODEL_FOR_ZERO_SHOT_OBJECT_DETECTION_MAPPING_NAMES = new Map([
|
|
|
13291
13415
|
]);
|
|
13292
13416
|
|
|
13293
13417
|
const MODEL_FOR_IMAGE_SEGMENTATION_MAPPING_NAMES = new Map([
|
|
13418
|
+
// TODO: Do not add new models here
|
|
13294
13419
|
['detr', ['DetrForSegmentation', DetrForSegmentation]],
|
|
13295
13420
|
['clipseg', ['CLIPSegForImageSegmentation', CLIPSegForImageSegmentation]],
|
|
13296
13421
|
]);
|
|
@@ -13300,6 +13425,11 @@ const MODEL_FOR_SEMANTIC_SEGMENTATION_MAPPING_NAMES = new Map([
|
|
|
13300
13425
|
['sapiens', ['SapiensForSemanticSegmentation', SapiensForSemanticSegmentation]],
|
|
13301
13426
|
]);
|
|
13302
13427
|
|
|
13428
|
+
const MODEL_FOR_UNIVERSAL_SEGMENTATION_MAPPING_NAMES = new Map([
|
|
13429
|
+
['detr', ['DetrForSegmentation', DetrForSegmentation]],
|
|
13430
|
+
['maskformer', ['MaskFormerForInstanceSegmentation', MaskFormerForInstanceSegmentation]],
|
|
13431
|
+
]);
|
|
13432
|
+
|
|
13303
13433
|
const MODEL_FOR_MASK_GENERATION_MAPPING_NAMES = new Map([
|
|
13304
13434
|
['sam', ['SamModel', SamModel]],
|
|
13305
13435
|
]);
|
|
@@ -13375,6 +13505,7 @@ const MODEL_CLASS_TYPE_MAPPING = [
|
|
|
13375
13505
|
[MODEL_FOR_IMAGE_TEXT_TO_TEXT_MAPPING_NAMES, MODEL_TYPES.ImageTextToText],
|
|
13376
13506
|
[MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING_NAMES, MODEL_TYPES.EncoderOnly],
|
|
13377
13507
|
[MODEL_FOR_IMAGE_SEGMENTATION_MAPPING_NAMES, MODEL_TYPES.EncoderOnly],
|
|
13508
|
+
[MODEL_FOR_UNIVERSAL_SEGMENTATION_MAPPING_NAMES, MODEL_TYPES.EncoderOnly],
|
|
13378
13509
|
[MODEL_FOR_SEMANTIC_SEGMENTATION_MAPPING_NAMES, MODEL_TYPES.EncoderOnly],
|
|
13379
13510
|
[MODEL_FOR_IMAGE_MATTING_MAPPING_NAMES, MODEL_TYPES.EncoderOnly],
|
|
13380
13511
|
[MODEL_FOR_IMAGE_TO_IMAGE_MAPPING_NAMES, MODEL_TYPES.EncoderOnly],
|
|
@@ -13577,6 +13708,17 @@ class AutoModelForSemanticSegmentation extends PretrainedMixin {
|
|
|
13577
13708
|
static MODEL_CLASS_MAPPINGS = [MODEL_FOR_SEMANTIC_SEGMENTATION_MAPPING_NAMES];
|
|
13578
13709
|
}
|
|
13579
13710
|
|
|
13711
|
+
/**
|
|
13712
|
+
* Helper class which is used to instantiate pretrained universal image segmentation models with the `from_pretrained` function.
|
|
13713
|
+
* The chosen model class is determined by the type specified in the model config.
|
|
13714
|
+
*
|
|
13715
|
+
* @example
|
|
13716
|
+
* let model = await AutoModelForUniversalSegmentation.from_pretrained('hf-internal-testing/tiny-random-MaskFormerForInstanceSegmentation');
|
|
13717
|
+
*/
|
|
13718
|
+
class AutoModelForUniversalSegmentation extends PretrainedMixin {
|
|
13719
|
+
static MODEL_CLASS_MAPPINGS = [MODEL_FOR_UNIVERSAL_SEGMENTATION_MAPPING_NAMES];
|
|
13720
|
+
}
|
|
13721
|
+
|
|
13580
13722
|
/**
|
|
13581
13723
|
* Helper class which is used to instantiate pretrained object detection models with the `from_pretrained` function.
|
|
13582
13724
|
* The chosen model class is determined by the type specified in the model config.
|
|
@@ -14092,20 +14234,31 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
14092
14234
|
|
|
14093
14235
|
|
|
14094
14236
|
|
|
14237
|
+
/**
|
|
14238
|
+
* Asynchronously creates a wrapper function for running an ONNX inference session.
|
|
14239
|
+
*
|
|
14240
|
+
* @param {number[]} session_bytes The session data in bytes.
|
|
14241
|
+
* @param {import('onnxruntime-common').InferenceSession.SessionOptions} session_options The options for the ONNX session.
|
|
14242
|
+
* @template {string | [string] | string[]} T
|
|
14243
|
+
* @param {T} names The name(s) of the output tensor(s).
|
|
14244
|
+
*
|
|
14245
|
+
* @returns {Promise<function(Record<string, Tensor>): Promise<T extends string ? Tensor : T extends string[] ? { [K in keyof T]: Tensor } : never>>}
|
|
14246
|
+
* The wrapper function for running the ONNX inference session.
|
|
14247
|
+
*/
|
|
14095
14248
|
const wrap = async (session_bytes, session_options, names) => {
|
|
14096
14249
|
const session = await (0,_backends_onnx_js__WEBPACK_IMPORTED_MODULE_0__.createInferenceSession)(
|
|
14097
14250
|
new Uint8Array(session_bytes), session_options,
|
|
14098
14251
|
);
|
|
14099
|
-
return async (inputs) => {
|
|
14252
|
+
return /** @type {any} */(async (/** @type {Record<string, Tensor>} */ inputs) => {
|
|
14100
14253
|
const ortFeed = Object.fromEntries(Object.entries(inputs).map(([k, v]) => [k, v.ort_tensor]));
|
|
14101
14254
|
const outputs = await session.run(ortFeed);
|
|
14102
14255
|
|
|
14103
14256
|
if (Array.isArray(names)) {
|
|
14104
14257
|
return names.map((n) => new _utils_tensor_js__WEBPACK_IMPORTED_MODULE_1__.Tensor(outputs[n]));
|
|
14105
14258
|
} else {
|
|
14106
|
-
return new _utils_tensor_js__WEBPACK_IMPORTED_MODULE_1__.Tensor(outputs[names]);
|
|
14259
|
+
return new _utils_tensor_js__WEBPACK_IMPORTED_MODULE_1__.Tensor(outputs[/** @type {string} */(names)]);
|
|
14107
14260
|
}
|
|
14108
|
-
}
|
|
14261
|
+
})
|
|
14109
14262
|
}
|
|
14110
14263
|
|
|
14111
14264
|
// In-memory registry of initialized ONNX operators
|
|
@@ -17229,7 +17382,7 @@ const SUPPORTED_TASKS = Object.freeze({
|
|
|
17229
17382
|
"image-segmentation": {
|
|
17230
17383
|
// no tokenizer
|
|
17231
17384
|
"pipeline": ImageSegmentationPipeline,
|
|
17232
|
-
"model": [_models_js__WEBPACK_IMPORTED_MODULE_1__.AutoModelForImageSegmentation, _models_js__WEBPACK_IMPORTED_MODULE_1__.AutoModelForSemanticSegmentation],
|
|
17385
|
+
"model": [_models_js__WEBPACK_IMPORTED_MODULE_1__.AutoModelForImageSegmentation, _models_js__WEBPACK_IMPORTED_MODULE_1__.AutoModelForSemanticSegmentation, _models_js__WEBPACK_IMPORTED_MODULE_1__.AutoModelForUniversalSegmentation],
|
|
17233
17386
|
"processor": _processors_js__WEBPACK_IMPORTED_MODULE_2__.AutoProcessor,
|
|
17234
17387
|
"default": {
|
|
17235
17388
|
// TODO: replace with original
|
|
@@ -17471,7 +17624,7 @@ async function loadItems(mapping, model, pretrainedOptions) {
|
|
|
17471
17624
|
|
|
17472
17625
|
/**@type {Promise[]} */
|
|
17473
17626
|
const promises = [];
|
|
17474
|
-
for (
|
|
17627
|
+
for (const [name, cls] of mapping.entries()) {
|
|
17475
17628
|
if (!cls) continue;
|
|
17476
17629
|
|
|
17477
17630
|
/**@type {Promise} */
|
|
@@ -17479,7 +17632,7 @@ async function loadItems(mapping, model, pretrainedOptions) {
|
|
|
17479
17632
|
if (Array.isArray(cls)) {
|
|
17480
17633
|
promise = new Promise(async (resolve, reject) => {
|
|
17481
17634
|
let e;
|
|
17482
|
-
for (
|
|
17635
|
+
for (const c of cls) {
|
|
17483
17636
|
if (c === null) {
|
|
17484
17637
|
// If null, we resolve it immediately, meaning the relevant
|
|
17485
17638
|
// class was not found, but it is optional.
|
|
@@ -17517,7 +17670,7 @@ async function loadItems(mapping, model, pretrainedOptions) {
|
|
|
17517
17670
|
await Promise.all(promises);
|
|
17518
17671
|
|
|
17519
17672
|
// Then assign to result
|
|
17520
|
-
for (
|
|
17673
|
+
for (const [name, promise] of Object.entries(result)) {
|
|
17521
17674
|
result[name] = await promise;
|
|
17522
17675
|
}
|
|
17523
17676
|
|
|
@@ -17555,6 +17708,7 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
17555
17708
|
/* harmony export */ Florence2Processor: () => (/* binding */ Florence2Processor),
|
|
17556
17709
|
/* harmony export */ GLPNFeatureExtractor: () => (/* binding */ GLPNFeatureExtractor),
|
|
17557
17710
|
/* harmony export */ ImageFeatureExtractor: () => (/* binding */ ImageFeatureExtractor),
|
|
17711
|
+
/* harmony export */ MaskFormerFeatureExtractor: () => (/* binding */ MaskFormerFeatureExtractor),
|
|
17558
17712
|
/* harmony export */ MobileNetV1FeatureExtractor: () => (/* binding */ MobileNetV1FeatureExtractor),
|
|
17559
17713
|
/* harmony export */ MobileNetV2FeatureExtractor: () => (/* binding */ MobileNetV2FeatureExtractor),
|
|
17560
17714
|
/* harmony export */ MobileNetV3FeatureExtractor: () => (/* binding */ MobileNetV3FeatureExtractor),
|
|
@@ -17566,6 +17720,7 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
17566
17720
|
/* harmony export */ OwlViTProcessor: () => (/* binding */ OwlViTProcessor),
|
|
17567
17721
|
/* harmony export */ Owlv2ImageProcessor: () => (/* binding */ Owlv2ImageProcessor),
|
|
17568
17722
|
/* harmony export */ Processor: () => (/* binding */ Processor),
|
|
17723
|
+
/* harmony export */ PvtImageProcessor: () => (/* binding */ PvtImageProcessor),
|
|
17569
17724
|
/* harmony export */ PyAnnoteFeatureExtractor: () => (/* binding */ PyAnnoteFeatureExtractor),
|
|
17570
17725
|
/* harmony export */ PyAnnoteProcessor: () => (/* binding */ PyAnnoteProcessor),
|
|
17571
17726
|
/* harmony export */ RTDetrImageProcessor: () => (/* binding */ RTDetrImageProcessor),
|
|
@@ -17654,7 +17809,7 @@ function center_to_corners_format([centerX, centerY, width, height]) {
|
|
|
17654
17809
|
* @param {Tensor} outputs.logits The logits
|
|
17655
17810
|
* @param {Tensor} outputs.pred_boxes The predicted boxes.
|
|
17656
17811
|
* @param {number} [threshold=0.5] The threshold to use for the scores.
|
|
17657
|
-
* @param {number
|
|
17812
|
+
* @param {[number, number][]} [target_sizes=null] The sizes of the original images.
|
|
17658
17813
|
* @param {boolean} [is_zero_shot=false] Whether zero-shot object detection was performed.
|
|
17659
17814
|
* @return {Object[]} An array of objects containing the post-processed outputs.
|
|
17660
17815
|
* @private
|
|
@@ -17735,7 +17890,7 @@ function post_process_object_detection(outputs, threshold = 0.5, target_sizes =
|
|
|
17735
17890
|
/**
|
|
17736
17891
|
* Post-processes the outputs of the model (for semantic segmentation).
|
|
17737
17892
|
* @param {*} outputs Raw outputs of the model.
|
|
17738
|
-
* @param {number
|
|
17893
|
+
* @param {[number, number][]} [target_sizes=null] List of tuples corresponding to the requested final size
|
|
17739
17894
|
* (height, width) of each prediction. If unset, predictions will not be resized.
|
|
17740
17895
|
* @returns {{segmentation: Tensor; labels: number[]}[]} The semantic segmentation maps.
|
|
17741
17896
|
*/
|
|
@@ -17783,9 +17938,8 @@ function post_process_semantic_segmentation(outputs, target_sizes = null) {
|
|
|
17783
17938
|
// Store which objects have labels
|
|
17784
17939
|
// This is much more efficient that creating a set of the final values
|
|
17785
17940
|
const hasLabel = new Array(data.dims[0]);
|
|
17786
|
-
|
|
17787
|
-
|
|
17788
|
-
const index = out[j];
|
|
17941
|
+
for (let j = 0; j < segmentation_data.length; ++j) {
|
|
17942
|
+
const index = segmentation_data[j];
|
|
17789
17943
|
hasLabel[index] = index;
|
|
17790
17944
|
}
|
|
17791
17945
|
/** @type {number[]} The unique list of labels that were detected */
|
|
@@ -17796,6 +17950,300 @@ function post_process_semantic_segmentation(outputs, target_sizes = null) {
|
|
|
17796
17950
|
return toReturn;
|
|
17797
17951
|
}
|
|
17798
17952
|
|
|
17953
|
+
|
|
17954
|
+
/**
|
|
17955
|
+
* Binarize the given masks using `object_mask_threshold`, it returns the associated values of `masks`, `scores` and `labels`.
|
|
17956
|
+
* @param {Tensor} class_logits The class logits.
|
|
17957
|
+
* @param {Tensor} mask_logits The mask logits.
|
|
17958
|
+
* @param {number} object_mask_threshold A number between 0 and 1 used to binarize the masks.
|
|
17959
|
+
* @param {number} num_labels The number of labels.
|
|
17960
|
+
* @returns {[Tensor[], number[], number[]]} The binarized masks, the scores, and the labels.
|
|
17961
|
+
* @private
|
|
17962
|
+
*/
|
|
17963
|
+
function remove_low_and_no_objects(class_logits, mask_logits, object_mask_threshold, num_labels) {
|
|
17964
|
+
|
|
17965
|
+
const mask_probs_item = [];
|
|
17966
|
+
const pred_scores_item = [];
|
|
17967
|
+
const pred_labels_item = [];
|
|
17968
|
+
|
|
17969
|
+
for (let j = 0; j < class_logits.dims[0]; ++j) {
|
|
17970
|
+
const cls = class_logits[j];
|
|
17971
|
+
const mask = mask_logits[j];
|
|
17972
|
+
|
|
17973
|
+
const pred_label = (0,_utils_maths_js__WEBPACK_IMPORTED_MODULE_3__.max)(cls.data)[1];
|
|
17974
|
+
if (pred_label === num_labels) {
|
|
17975
|
+
// Is the background, so we ignore it
|
|
17976
|
+
continue;
|
|
17977
|
+
}
|
|
17978
|
+
|
|
17979
|
+
const scores = (0,_utils_maths_js__WEBPACK_IMPORTED_MODULE_3__.softmax)(cls.data);
|
|
17980
|
+
const pred_score = scores[pred_label];
|
|
17981
|
+
if (pred_score > object_mask_threshold) {
|
|
17982
|
+
mask_probs_item.push(mask);
|
|
17983
|
+
pred_scores_item.push(pred_score);
|
|
17984
|
+
pred_labels_item.push(pred_label);
|
|
17985
|
+
}
|
|
17986
|
+
}
|
|
17987
|
+
|
|
17988
|
+
return [mask_probs_item, pred_scores_item, pred_labels_item];
|
|
17989
|
+
}
|
|
17990
|
+
|
|
17991
|
+
/**
|
|
17992
|
+
* Checks whether the segment is valid or not.
|
|
17993
|
+
* @param {Int32Array} mask_labels Labels for each pixel in the mask.
|
|
17994
|
+
* @param {Tensor[]} mask_probs Probabilities for each pixel in the masks.
|
|
17995
|
+
* @param {number} k The class id of the segment.
|
|
17996
|
+
* @param {number} mask_threshold The mask threshold.
|
|
17997
|
+
* @param {number} overlap_mask_area_threshold The overlap mask area threshold.
|
|
17998
|
+
* @returns {[boolean, number[]]} Whether the segment is valid or not, and the indices of the valid labels.
|
|
17999
|
+
* @private
|
|
18000
|
+
*/
|
|
18001
|
+
function check_segment_validity(
|
|
18002
|
+
mask_labels,
|
|
18003
|
+
mask_probs,
|
|
18004
|
+
k,
|
|
18005
|
+
mask_threshold = 0.5,
|
|
18006
|
+
overlap_mask_area_threshold = 0.8
|
|
18007
|
+
) {
|
|
18008
|
+
// mask_k is a 1D array of indices, indicating where the mask is equal to k
|
|
18009
|
+
const mask_k = [];
|
|
18010
|
+
let mask_k_area = 0;
|
|
18011
|
+
let original_area = 0;
|
|
18012
|
+
|
|
18013
|
+
const mask_probs_k_data = mask_probs[k].data;
|
|
18014
|
+
|
|
18015
|
+
// Compute the area of all the stuff in query k
|
|
18016
|
+
for (let i = 0; i < mask_labels.length; ++i) {
|
|
18017
|
+
if (mask_labels[i] === k) {
|
|
18018
|
+
mask_k.push(i);
|
|
18019
|
+
++mask_k_area;
|
|
18020
|
+
}
|
|
18021
|
+
|
|
18022
|
+
if (mask_probs_k_data[i] >= mask_threshold) {
|
|
18023
|
+
++original_area;
|
|
18024
|
+
}
|
|
18025
|
+
}
|
|
18026
|
+
let mask_exists = mask_k_area > 0 && original_area > 0;
|
|
18027
|
+
|
|
18028
|
+
// Eliminate disconnected tiny segments
|
|
18029
|
+
if (mask_exists) {
|
|
18030
|
+
// Perform additional check
|
|
18031
|
+
let area_ratio = mask_k_area / original_area;
|
|
18032
|
+
mask_exists = area_ratio > overlap_mask_area_threshold;
|
|
18033
|
+
}
|
|
18034
|
+
|
|
18035
|
+
return [mask_exists, mask_k]
|
|
18036
|
+
}
|
|
18037
|
+
|
|
18038
|
+
/**
|
|
18039
|
+
* Computes the segments.
|
|
18040
|
+
* @param {Tensor[]} mask_probs The mask probabilities.
|
|
18041
|
+
* @param {number[]} pred_scores The predicted scores.
|
|
18042
|
+
* @param {number[]} pred_labels The predicted labels.
|
|
18043
|
+
* @param {number} mask_threshold The mask threshold.
|
|
18044
|
+
* @param {number} overlap_mask_area_threshold The overlap mask area threshold.
|
|
18045
|
+
* @param {Set<number>} label_ids_to_fuse The label ids to fuse.
|
|
18046
|
+
* @param {number[]} target_size The target size of the image.
|
|
18047
|
+
* @returns {[Tensor, Array<{id: number, label_id: number, score: number}>]} The computed segments.
|
|
18048
|
+
* @private
|
|
18049
|
+
*/
|
|
18050
|
+
function compute_segments(
|
|
18051
|
+
mask_probs,
|
|
18052
|
+
pred_scores,
|
|
18053
|
+
pred_labels,
|
|
18054
|
+
mask_threshold,
|
|
18055
|
+
overlap_mask_area_threshold,
|
|
18056
|
+
label_ids_to_fuse = null,
|
|
18057
|
+
target_size = null,
|
|
18058
|
+
) {
|
|
18059
|
+
const [height, width] = target_size ?? mask_probs[0].dims;
|
|
18060
|
+
|
|
18061
|
+
const segmentation = new _utils_tensor_js__WEBPACK_IMPORTED_MODULE_4__.Tensor(
|
|
18062
|
+
'int32',
|
|
18063
|
+
new Int32Array(height * width),
|
|
18064
|
+
[height, width]
|
|
18065
|
+
);
|
|
18066
|
+
const segments = [];
|
|
18067
|
+
|
|
18068
|
+
// 1. If target_size is not null, we need to resize the masks to the target size
|
|
18069
|
+
if (target_size !== null) {
|
|
18070
|
+
// resize the masks to the target size
|
|
18071
|
+
for (let i = 0; i < mask_probs.length; ++i) {
|
|
18072
|
+
mask_probs[i] = (0,_utils_tensor_js__WEBPACK_IMPORTED_MODULE_4__.interpolate)(mask_probs[i], target_size, 'bilinear', false);
|
|
18073
|
+
}
|
|
18074
|
+
}
|
|
18075
|
+
|
|
18076
|
+
// 2. Weigh each mask by its prediction score
|
|
18077
|
+
// NOTE: `mask_probs` is updated in-place
|
|
18078
|
+
//
|
|
18079
|
+
// Temporary storage for the best label/scores for each pixel ([height, width]):
|
|
18080
|
+
const mask_labels = new Int32Array(mask_probs[0].data.length);
|
|
18081
|
+
const bestScores = new Float32Array(mask_probs[0].data.length);
|
|
18082
|
+
|
|
18083
|
+
for (let i = 0; i < mask_probs.length; ++i) {
|
|
18084
|
+
let score = pred_scores[i];
|
|
18085
|
+
|
|
18086
|
+
const mask_probs_i_data = mask_probs[i].data;
|
|
18087
|
+
|
|
18088
|
+
for (let j = 0; j < mask_probs_i_data.length; ++j) {
|
|
18089
|
+
mask_probs_i_data[j] *= score
|
|
18090
|
+
if (mask_probs_i_data[j] > bestScores[j]) {
|
|
18091
|
+
mask_labels[j] = i;
|
|
18092
|
+
bestScores[j] = mask_probs_i_data[j];
|
|
18093
|
+
}
|
|
18094
|
+
}
|
|
18095
|
+
}
|
|
18096
|
+
|
|
18097
|
+
let current_segment_id = 0;
|
|
18098
|
+
|
|
18099
|
+
// let stuff_memory_list = {}
|
|
18100
|
+
const segmentation_data = segmentation.data;
|
|
18101
|
+
for (let k = 0; k < pred_labels.length; ++k) {
|
|
18102
|
+
const pred_class = pred_labels[k];
|
|
18103
|
+
|
|
18104
|
+
// TODO add `should_fuse`
|
|
18105
|
+
// let should_fuse = pred_class in label_ids_to_fuse
|
|
18106
|
+
|
|
18107
|
+
// Check if mask exists and large enough to be a segment
|
|
18108
|
+
const [mask_exists, mask_k] = check_segment_validity(
|
|
18109
|
+
mask_labels,
|
|
18110
|
+
mask_probs,
|
|
18111
|
+
k,
|
|
18112
|
+
mask_threshold,
|
|
18113
|
+
overlap_mask_area_threshold
|
|
18114
|
+
)
|
|
18115
|
+
|
|
18116
|
+
if (!mask_exists) {
|
|
18117
|
+
// Nothing to see here
|
|
18118
|
+
continue;
|
|
18119
|
+
}
|
|
18120
|
+
|
|
18121
|
+
// TODO
|
|
18122
|
+
// if (pred_class in stuff_memory_list) {
|
|
18123
|
+
// current_segment_id = stuff_memory_list[pred_class]
|
|
18124
|
+
// } else {
|
|
18125
|
+
// current_segment_id += 1;
|
|
18126
|
+
// }
|
|
18127
|
+
++current_segment_id;
|
|
18128
|
+
|
|
18129
|
+
|
|
18130
|
+
// Add current object segment to final segmentation map
|
|
18131
|
+
for (const index of mask_k) {
|
|
18132
|
+
segmentation_data[index] = current_segment_id;
|
|
18133
|
+
}
|
|
18134
|
+
|
|
18135
|
+
segments.push({
|
|
18136
|
+
id: current_segment_id,
|
|
18137
|
+
label_id: pred_class,
|
|
18138
|
+
// was_fused: should_fuse, TODO
|
|
18139
|
+
score: pred_scores[k],
|
|
18140
|
+
})
|
|
18141
|
+
|
|
18142
|
+
// TODO
|
|
18143
|
+
// if(should_fuse){
|
|
18144
|
+
// stuff_memory_list[pred_class] = current_segment_id
|
|
18145
|
+
// }
|
|
18146
|
+
}
|
|
18147
|
+
|
|
18148
|
+
return [segmentation, segments];
|
|
18149
|
+
}
|
|
18150
|
+
|
|
18151
|
+
|
|
18152
|
+
/**
|
|
18153
|
+
* Post-process the model output to generate the final panoptic segmentation.
|
|
18154
|
+
* @param {*} outputs The model output to post process
|
|
18155
|
+
* @param {number} [threshold=0.5] The probability score threshold to keep predicted instance masks.
|
|
18156
|
+
* @param {number} [mask_threshold=0.5] Threshold to use when turning the predicted masks into binary values.
|
|
18157
|
+
* @param {number} [overlap_mask_area_threshold=0.8] The overlap mask area threshold to merge or discard small disconnected parts within each binary instance mask.
|
|
18158
|
+
* @param {Set<number>} [label_ids_to_fuse=null] The labels in this state will have all their instances be fused together.
|
|
18159
|
+
* @param {[number, number][]} [target_sizes=null] The target sizes to resize the masks to.
|
|
18160
|
+
* @returns {Array<{ segmentation: Tensor, segments_info: Array<{id: number, label_id: number, score: number}>}>}
|
|
18161
|
+
*/
|
|
18162
|
+
function post_process_panoptic_segmentation(
|
|
18163
|
+
outputs,
|
|
18164
|
+
threshold = 0.5,
|
|
18165
|
+
mask_threshold = 0.5,
|
|
18166
|
+
overlap_mask_area_threshold = 0.8,
|
|
18167
|
+
label_ids_to_fuse = null,
|
|
18168
|
+
target_sizes = null,
|
|
18169
|
+
) {
|
|
18170
|
+
if (label_ids_to_fuse === null) {
|
|
18171
|
+
console.warn("`label_ids_to_fuse` unset. No instance will be fused.")
|
|
18172
|
+
label_ids_to_fuse = new Set();
|
|
18173
|
+
}
|
|
18174
|
+
|
|
18175
|
+
const class_queries_logits = outputs.class_queries_logits ?? outputs.logits; // [batch_size, num_queries, num_classes+1]
|
|
18176
|
+
const masks_queries_logits = outputs.masks_queries_logits ?? outputs.pred_masks; // [batch_size, num_queries, height, width]
|
|
18177
|
+
|
|
18178
|
+
const mask_probs = masks_queries_logits.sigmoid() // [batch_size, num_queries, height, width]
|
|
18179
|
+
|
|
18180
|
+
let [batch_size, num_queries, num_labels] = class_queries_logits.dims;
|
|
18181
|
+
num_labels -= 1; // Remove last class (background)
|
|
18182
|
+
|
|
18183
|
+
if (target_sizes !== null && target_sizes.length !== batch_size) {
|
|
18184
|
+
throw Error("Make sure that you pass in as many target sizes as the batch dimension of the logits")
|
|
18185
|
+
}
|
|
18186
|
+
|
|
18187
|
+
let toReturn = [];
|
|
18188
|
+
for (let i = 0; i < batch_size; ++i) {
|
|
18189
|
+
let target_size = target_sizes !== null ? target_sizes[i] : null;
|
|
18190
|
+
|
|
18191
|
+
let class_logits = class_queries_logits[i];
|
|
18192
|
+
let mask_logits = mask_probs[i];
|
|
18193
|
+
|
|
18194
|
+
let [mask_probs_item, pred_scores_item, pred_labels_item] = remove_low_and_no_objects(class_logits, mask_logits, threshold, num_labels);
|
|
18195
|
+
|
|
18196
|
+
if (pred_labels_item.length === 0) {
|
|
18197
|
+
// No mask found
|
|
18198
|
+
let [height, width] = target_size ?? mask_logits.dims.slice(-2);
|
|
18199
|
+
|
|
18200
|
+
let segmentation = new _utils_tensor_js__WEBPACK_IMPORTED_MODULE_4__.Tensor(
|
|
18201
|
+
'int32',
|
|
18202
|
+
new Int32Array(height * width).fill(-1),
|
|
18203
|
+
[height, width]
|
|
18204
|
+
)
|
|
18205
|
+
toReturn.push({
|
|
18206
|
+
segmentation: segmentation,
|
|
18207
|
+
segments_info: []
|
|
18208
|
+
});
|
|
18209
|
+
continue;
|
|
18210
|
+
}
|
|
18211
|
+
|
|
18212
|
+
|
|
18213
|
+
// Get segmentation map and segment information of batch item
|
|
18214
|
+
let [segmentation, segments] = compute_segments(
|
|
18215
|
+
mask_probs_item,
|
|
18216
|
+
pred_scores_item,
|
|
18217
|
+
pred_labels_item,
|
|
18218
|
+
mask_threshold,
|
|
18219
|
+
overlap_mask_area_threshold,
|
|
18220
|
+
label_ids_to_fuse,
|
|
18221
|
+
target_size,
|
|
18222
|
+
)
|
|
18223
|
+
|
|
18224
|
+
toReturn.push({
|
|
18225
|
+
segmentation: segmentation,
|
|
18226
|
+
segments_info: segments
|
|
18227
|
+
})
|
|
18228
|
+
}
|
|
18229
|
+
|
|
18230
|
+
return toReturn;
|
|
18231
|
+
}
|
|
18232
|
+
|
|
18233
|
+
|
|
18234
|
+
/**
|
|
18235
|
+
* Post-processes the outputs of the model (for instance segmentation).
|
|
18236
|
+
* @param {*} outputs Raw outputs of the model.
|
|
18237
|
+
* @param {number} [threshold=0.5] The probability score threshold to keep predicted instance masks.
|
|
18238
|
+
* @param {[number, number][]} [target_sizes=null] List of tuples corresponding to the requested final size
|
|
18239
|
+
* (height, width) of each prediction. If unset, predictions will not be resized.
|
|
18240
|
+
* @returns {Array<{ segmentation: Tensor, segments_info: Array<{id: number, label_id: number, score: number}>}>}
|
|
18241
|
+
*/
|
|
18242
|
+
function post_process_instance_segmentation(outputs, threshold = 0.5, target_sizes = null) {
|
|
18243
|
+
throw new Error('Not implemented yet');
|
|
18244
|
+
return [];
|
|
18245
|
+
}
|
|
18246
|
+
|
|
17799
18247
|
/**
|
|
17800
18248
|
* Named tuple to indicate the order we are using is (height x width), even though
|
|
17801
18249
|
* the Graphics’ industry standard is (width x height).
|
|
@@ -18384,6 +18832,7 @@ class SegformerFeatureExtractor extends ImageFeatureExtractor {
|
|
|
18384
18832
|
return post_process_semantic_segmentation(...args);
|
|
18385
18833
|
}
|
|
18386
18834
|
}
|
|
18835
|
+
class PvtImageProcessor extends ImageFeatureExtractor { }
|
|
18387
18836
|
class DPTFeatureExtractor extends ImageFeatureExtractor { }
|
|
18388
18837
|
class DPTImageProcessor extends DPTFeatureExtractor { } // NOTE: extends DPTFeatureExtractor
|
|
18389
18838
|
class BitImageProcessor extends ImageFeatureExtractor { }
|
|
@@ -18523,302 +18972,32 @@ class DetrFeatureExtractor extends ImageFeatureExtractor {
|
|
|
18523
18972
|
// TODO support different mask sizes (not just 64x64)
|
|
18524
18973
|
// Currently, just fill pixel mask with 1s
|
|
18525
18974
|
const maskSize = [result.pixel_values.dims[0], 64, 64];
|
|
18526
|
-
const pixel_mask =
|
|
18527
|
-
'int64',
|
|
18528
|
-
new BigInt64Array(maskSize.reduce((a, b) => a * b)).fill(1n),
|
|
18529
|
-
maskSize
|
|
18530
|
-
);
|
|
18975
|
+
const pixel_mask = (0,_utils_tensor_js__WEBPACK_IMPORTED_MODULE_4__.full)(maskSize, 1n);
|
|
18531
18976
|
|
|
18532
18977
|
return { ...result, pixel_mask };
|
|
18533
18978
|
}
|
|
18534
18979
|
|
|
18535
|
-
/**
|
|
18536
|
-
* Post-processes the outputs of the model (for object detection).
|
|
18537
|
-
* @param {Object} outputs The outputs of the model that must be post-processed
|
|
18538
|
-
* @param {Tensor} outputs.logits The logits
|
|
18539
|
-
* @param {Tensor} outputs.pred_boxes The predicted boxes.
|
|
18540
|
-
* @return {Object[]} An array of objects containing the post-processed outputs.
|
|
18541
|
-
*/
|
|
18542
|
-
|
|
18543
18980
|
/** @type {typeof post_process_object_detection} */
|
|
18544
18981
|
post_process_object_detection(...args) {
|
|
18545
18982
|
return post_process_object_detection(...args);
|
|
18546
18983
|
}
|
|
18547
18984
|
|
|
18548
|
-
/**
|
|
18549
|
-
|
|
18550
|
-
|
|
18551
|
-
* @param {Tensor} mask_logits The mask logits.
|
|
18552
|
-
* @param {number} object_mask_threshold A number between 0 and 1 used to binarize the masks.
|
|
18553
|
-
* @param {number} num_labels The number of labels.
|
|
18554
|
-
* @returns {[Tensor[], number[], number[]]} The binarized masks, the scores, and the labels.
|
|
18555
|
-
*/
|
|
18556
|
-
remove_low_and_no_objects(class_logits, mask_logits, object_mask_threshold, num_labels) {
|
|
18557
|
-
|
|
18558
|
-
let mask_probs_item = [];
|
|
18559
|
-
let pred_scores_item = [];
|
|
18560
|
-
let pred_labels_item = [];
|
|
18561
|
-
|
|
18562
|
-
for (let j = 0; j < class_logits.dims[0]; ++j) {
|
|
18563
|
-
let cls = class_logits[j];
|
|
18564
|
-
let mask = mask_logits[j];
|
|
18565
|
-
|
|
18566
|
-
let pred_label = (0,_utils_maths_js__WEBPACK_IMPORTED_MODULE_3__.max)(cls.data)[1];
|
|
18567
|
-
if (pred_label === num_labels) {
|
|
18568
|
-
// Is the background, so we ignore it
|
|
18569
|
-
continue;
|
|
18570
|
-
}
|
|
18571
|
-
|
|
18572
|
-
let scores = (0,_utils_maths_js__WEBPACK_IMPORTED_MODULE_3__.softmax)(cls.data);
|
|
18573
|
-
let pred_score = scores[pred_label];
|
|
18574
|
-
if (pred_score > object_mask_threshold) {
|
|
18575
|
-
mask_probs_item.push(mask);
|
|
18576
|
-
pred_scores_item.push(pred_score);
|
|
18577
|
-
pred_labels_item.push(pred_label);
|
|
18578
|
-
}
|
|
18579
|
-
}
|
|
18580
|
-
|
|
18581
|
-
return [mask_probs_item, pred_scores_item, pred_labels_item];
|
|
18582
|
-
|
|
18985
|
+
/** @type {typeof post_process_panoptic_segmentation} */
|
|
18986
|
+
post_process_panoptic_segmentation(...args) {
|
|
18987
|
+
return post_process_panoptic_segmentation(...args);
|
|
18583
18988
|
}
|
|
18584
18989
|
|
|
18585
|
-
|
|
18586
|
-
|
|
18587
|
-
|
|
18588
|
-
* @param {Tensor[]} mask_probs Probabilities for each pixel in the masks.
|
|
18589
|
-
* @param {number} k The class id of the segment.
|
|
18590
|
-
* @param {number} mask_threshold The mask threshold.
|
|
18591
|
-
* @param {number} overlap_mask_area_threshold The overlap mask area threshold.
|
|
18592
|
-
* @returns {[boolean, number[]]} Whether the segment is valid or not, and the indices of the valid labels.
|
|
18593
|
-
*/
|
|
18594
|
-
check_segment_validity(
|
|
18595
|
-
mask_labels,
|
|
18596
|
-
mask_probs,
|
|
18597
|
-
k,
|
|
18598
|
-
mask_threshold = 0.5,
|
|
18599
|
-
overlap_mask_area_threshold = 0.8
|
|
18600
|
-
) {
|
|
18601
|
-
// mask_k is a 1D array of indices, indicating where the mask is equal to k
|
|
18602
|
-
let mask_k = [];
|
|
18603
|
-
let mask_k_area = 0;
|
|
18604
|
-
let original_area = 0;
|
|
18605
|
-
|
|
18606
|
-
const mask_probs_k_data = mask_probs[k].data;
|
|
18607
|
-
|
|
18608
|
-
// Compute the area of all the stuff in query k
|
|
18609
|
-
for (let i = 0; i < mask_labels.length; ++i) {
|
|
18610
|
-
if (mask_labels[i] === k) {
|
|
18611
|
-
mask_k.push(i);
|
|
18612
|
-
++mask_k_area;
|
|
18613
|
-
}
|
|
18614
|
-
|
|
18615
|
-
if (mask_probs_k_data[i] >= mask_threshold) {
|
|
18616
|
-
++original_area;
|
|
18617
|
-
}
|
|
18618
|
-
}
|
|
18619
|
-
let mask_exists = mask_k_area > 0 && original_area > 0;
|
|
18620
|
-
|
|
18621
|
-
// Eliminate disconnected tiny segments
|
|
18622
|
-
if (mask_exists) {
|
|
18623
|
-
// Perform additional check
|
|
18624
|
-
let area_ratio = mask_k_area / original_area;
|
|
18625
|
-
mask_exists = area_ratio > overlap_mask_area_threshold;
|
|
18626
|
-
}
|
|
18627
|
-
|
|
18628
|
-
return [mask_exists, mask_k]
|
|
18629
|
-
}
|
|
18630
|
-
|
|
18631
|
-
/**
|
|
18632
|
-
* Computes the segments.
|
|
18633
|
-
* @param {Tensor[]} mask_probs The mask probabilities.
|
|
18634
|
-
* @param {number[]} pred_scores The predicted scores.
|
|
18635
|
-
* @param {number[]} pred_labels The predicted labels.
|
|
18636
|
-
* @param {number} mask_threshold The mask threshold.
|
|
18637
|
-
* @param {number} overlap_mask_area_threshold The overlap mask area threshold.
|
|
18638
|
-
* @param {Set<number>} label_ids_to_fuse The label ids to fuse.
|
|
18639
|
-
* @param {number[]} target_size The target size of the image.
|
|
18640
|
-
* @returns {[Tensor, Array<{id: number, label_id: number, score: number}>]} The computed segments.
|
|
18641
|
-
*/
|
|
18642
|
-
compute_segments(
|
|
18643
|
-
mask_probs,
|
|
18644
|
-
pred_scores,
|
|
18645
|
-
pred_labels,
|
|
18646
|
-
mask_threshold,
|
|
18647
|
-
overlap_mask_area_threshold,
|
|
18648
|
-
label_ids_to_fuse = null,
|
|
18649
|
-
target_size = null,
|
|
18650
|
-
) {
|
|
18651
|
-
let [height, width] = target_size ?? mask_probs[0].dims;
|
|
18652
|
-
|
|
18653
|
-
let segmentation = new _utils_tensor_js__WEBPACK_IMPORTED_MODULE_4__.Tensor(
|
|
18654
|
-
'int32',
|
|
18655
|
-
new Int32Array(height * width),
|
|
18656
|
-
[height, width]
|
|
18657
|
-
);
|
|
18658
|
-
let segments = [];
|
|
18659
|
-
|
|
18660
|
-
// 1. If target_size is not null, we need to resize the masks to the target size
|
|
18661
|
-
if (target_size !== null) {
|
|
18662
|
-
// resize the masks to the target size
|
|
18663
|
-
for (let i = 0; i < mask_probs.length; ++i) {
|
|
18664
|
-
mask_probs[i] = (0,_utils_tensor_js__WEBPACK_IMPORTED_MODULE_4__.interpolate)(mask_probs[i], target_size, 'bilinear', false);
|
|
18665
|
-
}
|
|
18666
|
-
}
|
|
18667
|
-
|
|
18668
|
-
// 2. Weigh each mask by its prediction score
|
|
18669
|
-
// NOTE: `mask_probs` is updated in-place
|
|
18670
|
-
//
|
|
18671
|
-
// Temporary storage for the best label/scores for each pixel ([height, width]):
|
|
18672
|
-
let mask_labels = new Int32Array(mask_probs[0].data.length);
|
|
18673
|
-
let bestScores = new Float32Array(mask_probs[0].data.length);
|
|
18674
|
-
|
|
18675
|
-
for (let i = 0; i < mask_probs.length; ++i) {
|
|
18676
|
-
let score = pred_scores[i];
|
|
18677
|
-
|
|
18678
|
-
const mask_probs_i_data = mask_probs[i].data;
|
|
18679
|
-
|
|
18680
|
-
for (let j = 0; j < mask_probs_i_data.length; ++j) {
|
|
18681
|
-
mask_probs_i_data[j] *= score
|
|
18682
|
-
if (mask_probs_i_data[j] > bestScores[j]) {
|
|
18683
|
-
mask_labels[j] = i;
|
|
18684
|
-
bestScores[j] = mask_probs_i_data[j];
|
|
18685
|
-
}
|
|
18686
|
-
}
|
|
18687
|
-
}
|
|
18688
|
-
|
|
18689
|
-
let current_segment_id = 0;
|
|
18690
|
-
|
|
18691
|
-
// let stuff_memory_list = {}
|
|
18692
|
-
const segmentation_data = segmentation.data;
|
|
18693
|
-
for (let k = 0; k < pred_labels.length; ++k) {
|
|
18694
|
-
let pred_class = pred_labels[k];
|
|
18695
|
-
|
|
18696
|
-
// TODO add `should_fuse`
|
|
18697
|
-
// let should_fuse = pred_class in label_ids_to_fuse
|
|
18698
|
-
|
|
18699
|
-
// Check if mask exists and large enough to be a segment
|
|
18700
|
-
let [mask_exists, mask_k] = this.check_segment_validity(
|
|
18701
|
-
mask_labels,
|
|
18702
|
-
mask_probs,
|
|
18703
|
-
k,
|
|
18704
|
-
mask_threshold,
|
|
18705
|
-
overlap_mask_area_threshold
|
|
18706
|
-
)
|
|
18707
|
-
|
|
18708
|
-
if (!mask_exists) {
|
|
18709
|
-
// Nothing to see here
|
|
18710
|
-
continue;
|
|
18711
|
-
}
|
|
18712
|
-
|
|
18713
|
-
// TODO
|
|
18714
|
-
// if (pred_class in stuff_memory_list) {
|
|
18715
|
-
// current_segment_id = stuff_memory_list[pred_class]
|
|
18716
|
-
// } else {
|
|
18717
|
-
// current_segment_id += 1;
|
|
18718
|
-
// }
|
|
18719
|
-
++current_segment_id;
|
|
18720
|
-
|
|
18721
|
-
|
|
18722
|
-
// Add current object segment to final segmentation map
|
|
18723
|
-
for (let index of mask_k) {
|
|
18724
|
-
segmentation_data[index] = current_segment_id;
|
|
18725
|
-
}
|
|
18726
|
-
|
|
18727
|
-
segments.push({
|
|
18728
|
-
id: current_segment_id,
|
|
18729
|
-
label_id: pred_class,
|
|
18730
|
-
// was_fused: should_fuse, TODO
|
|
18731
|
-
score: pred_scores[k],
|
|
18732
|
-
})
|
|
18733
|
-
|
|
18734
|
-
// TODO
|
|
18735
|
-
// if(should_fuse){
|
|
18736
|
-
// stuff_memory_list[pred_class] = current_segment_id
|
|
18737
|
-
// }
|
|
18738
|
-
}
|
|
18739
|
-
|
|
18740
|
-
return [segmentation, segments];
|
|
18990
|
+
post_process_instance_segmentation() {
|
|
18991
|
+
// TODO
|
|
18992
|
+
throw Error("Not implemented yet");
|
|
18741
18993
|
}
|
|
18994
|
+
}
|
|
18742
18995
|
|
|
18743
|
-
|
|
18744
|
-
* Post-process the model output to generate the final panoptic segmentation.
|
|
18745
|
-
* @param {*} outputs The model output to post process
|
|
18746
|
-
* @param {number} [threshold=0.5] The probability score threshold to keep predicted instance masks.
|
|
18747
|
-
* @param {number} [mask_threshold=0.5] Threshold to use when turning the predicted masks into binary values.
|
|
18748
|
-
* @param {number} [overlap_mask_area_threshold=0.8] The overlap mask area threshold to merge or discard small disconnected parts within each binary instance mask.
|
|
18749
|
-
* @param {Set<number>} [label_ids_to_fuse=null] The labels in this state will have all their instances be fused together.
|
|
18750
|
-
* @param {number[][]} [target_sizes=null] The target sizes to resize the masks to.
|
|
18751
|
-
* @returns {Array<{ segmentation: Tensor, segments_info: Array<{id: number, label_id: number, score: number}>}>}
|
|
18752
|
-
*/
|
|
18753
|
-
post_process_panoptic_segmentation(
|
|
18754
|
-
outputs,
|
|
18755
|
-
threshold = 0.5,
|
|
18756
|
-
mask_threshold = 0.5,
|
|
18757
|
-
overlap_mask_area_threshold = 0.8,
|
|
18758
|
-
label_ids_to_fuse = null,
|
|
18759
|
-
target_sizes = null,
|
|
18760
|
-
) {
|
|
18761
|
-
if (label_ids_to_fuse === null) {
|
|
18762
|
-
console.warn("`label_ids_to_fuse` unset. No instance will be fused.")
|
|
18763
|
-
label_ids_to_fuse = new Set();
|
|
18764
|
-
}
|
|
18765
|
-
|
|
18766
|
-
const class_queries_logits = outputs.logits; // [batch_size, num_queries, num_classes+1]
|
|
18767
|
-
const masks_queries_logits = outputs.pred_masks; // [batch_size, num_queries, height, width]
|
|
18768
|
-
|
|
18769
|
-
const mask_probs = masks_queries_logits.sigmoid() // [batch_size, num_queries, height, width]
|
|
18770
|
-
|
|
18771
|
-
let [batch_size, num_queries, num_labels] = class_queries_logits.dims;
|
|
18772
|
-
num_labels -= 1; // Remove last class (background)
|
|
18773
|
-
|
|
18774
|
-
if (target_sizes !== null && target_sizes.length !== batch_size) {
|
|
18775
|
-
throw Error("Make sure that you pass in as many target sizes as the batch dimension of the logits")
|
|
18776
|
-
}
|
|
18777
|
-
|
|
18778
|
-
let toReturn = [];
|
|
18779
|
-
for (let i = 0; i < batch_size; ++i) {
|
|
18780
|
-
let target_size = target_sizes !== null ? target_sizes[i] : null;
|
|
18781
|
-
|
|
18782
|
-
let class_logits = class_queries_logits[i];
|
|
18783
|
-
let mask_logits = mask_probs[i];
|
|
18784
|
-
|
|
18785
|
-
let [mask_probs_item, pred_scores_item, pred_labels_item] = this.remove_low_and_no_objects(class_logits, mask_logits, threshold, num_labels);
|
|
18786
|
-
|
|
18787
|
-
if (pred_labels_item.length === 0) {
|
|
18788
|
-
// No mask found
|
|
18789
|
-
let [height, width] = target_size ?? mask_logits.dims.slice(-2);
|
|
18790
|
-
|
|
18791
|
-
let segmentation = new _utils_tensor_js__WEBPACK_IMPORTED_MODULE_4__.Tensor(
|
|
18792
|
-
'int32',
|
|
18793
|
-
new Int32Array(height * width).fill(-1),
|
|
18794
|
-
[height, width]
|
|
18795
|
-
)
|
|
18796
|
-
toReturn.push({
|
|
18797
|
-
segmentation: segmentation,
|
|
18798
|
-
segments_info: []
|
|
18799
|
-
});
|
|
18800
|
-
continue;
|
|
18801
|
-
}
|
|
18802
|
-
|
|
18803
|
-
|
|
18804
|
-
// Get segmentation map and segment information of batch item
|
|
18805
|
-
let [segmentation, segments] = this.compute_segments(
|
|
18806
|
-
mask_probs_item,
|
|
18807
|
-
pred_scores_item,
|
|
18808
|
-
pred_labels_item,
|
|
18809
|
-
mask_threshold,
|
|
18810
|
-
overlap_mask_area_threshold,
|
|
18811
|
-
label_ids_to_fuse,
|
|
18812
|
-
target_size,
|
|
18813
|
-
)
|
|
18814
|
-
|
|
18815
|
-
toReturn.push({
|
|
18816
|
-
segmentation: segmentation,
|
|
18817
|
-
segments_info: segments
|
|
18818
|
-
})
|
|
18819
|
-
}
|
|
18996
|
+
class MaskFormerFeatureExtractor extends ImageFeatureExtractor {
|
|
18820
18997
|
|
|
18821
|
-
|
|
18998
|
+
/** @type {typeof post_process_panoptic_segmentation} */
|
|
18999
|
+
post_process_panoptic_segmentation(...args) {
|
|
19000
|
+
return post_process_panoptic_segmentation(...args);
|
|
18822
19001
|
}
|
|
18823
19002
|
|
|
18824
19003
|
post_process_instance_segmentation() {
|
|
@@ -18827,6 +19006,7 @@ class DetrFeatureExtractor extends ImageFeatureExtractor {
|
|
|
18827
19006
|
}
|
|
18828
19007
|
}
|
|
18829
19008
|
|
|
19009
|
+
|
|
18830
19010
|
class YolosFeatureExtractor extends ImageFeatureExtractor {
|
|
18831
19011
|
/** @type {typeof post_process_object_detection} */
|
|
18832
19012
|
post_process_object_detection(...args) {
|
|
@@ -20116,11 +20296,13 @@ class AutoProcessor {
|
|
|
20116
20296
|
BitImageProcessor,
|
|
20117
20297
|
DPTImageProcessor,
|
|
20118
20298
|
DPTFeatureExtractor,
|
|
20299
|
+
PvtImageProcessor,
|
|
20119
20300
|
GLPNFeatureExtractor,
|
|
20120
20301
|
BeitFeatureExtractor,
|
|
20121
20302
|
DeiTFeatureExtractor,
|
|
20122
20303
|
DetrFeatureExtractor,
|
|
20123
20304
|
RTDetrImageProcessor,
|
|
20305
|
+
MaskFormerFeatureExtractor,
|
|
20124
20306
|
YolosFeatureExtractor,
|
|
20125
20307
|
DonutFeatureExtractor,
|
|
20126
20308
|
NougatImageProcessor,
|
|
@@ -20498,23 +20680,26 @@ function is_chinese_char(cp) {
|
|
|
20498
20680
|
}
|
|
20499
20681
|
|
|
20500
20682
|
/**
|
|
20501
|
-
* Helper function to fuse consecutive
|
|
20502
|
-
* @param {string[]} arr The input
|
|
20503
|
-
* @param {any}
|
|
20504
|
-
* @param {
|
|
20683
|
+
* Helper function to fuse consecutive unknown tokens.
|
|
20684
|
+
* @param {string[]} arr The list of input tokens
|
|
20685
|
+
* @param {Map<string, any>} tokens_to_ids The mapping from tokens to token ids.
|
|
20686
|
+
* @param {number} unk_token_id The value to fuse on.
|
|
20687
|
+
* @private
|
|
20505
20688
|
*/
|
|
20506
|
-
function
|
|
20689
|
+
function fuse_unk(arr, tokens_to_ids, unk_token_id) {
|
|
20507
20690
|
const fused = [];
|
|
20508
20691
|
let i = 0;
|
|
20509
20692
|
while (i < arr.length) {
|
|
20510
20693
|
fused.push(arr[i])
|
|
20511
|
-
if ((
|
|
20694
|
+
if ((tokens_to_ids.get(arr[i]) ?? unk_token_id) !== unk_token_id) {
|
|
20512
20695
|
++i;
|
|
20513
20696
|
continue;
|
|
20514
20697
|
}
|
|
20515
20698
|
|
|
20516
|
-
while (i < arr.length && (
|
|
20517
|
-
|
|
20699
|
+
while (++i < arr.length && (tokens_to_ids.get(arr[i]) ?? unk_token_id) === unk_token_id) {
|
|
20700
|
+
if (tokens_to_ids.get(fused.at(-1)) !== unk_token_id) {
|
|
20701
|
+
fused[fused.length - 1] += arr[i];
|
|
20702
|
+
}
|
|
20518
20703
|
}
|
|
20519
20704
|
}
|
|
20520
20705
|
|
|
@@ -20631,15 +20816,15 @@ class TokenizerModel extends _utils_generic_js__WEBPACK_IMPORTED_MODULE_0__.Call
|
|
|
20631
20816
|
/**
|
|
20632
20817
|
* Internal function to call the TokenizerModel instance.
|
|
20633
20818
|
* @param {string[]} tokens The tokens to encode.
|
|
20634
|
-
* @returns {string[]} The encoded
|
|
20819
|
+
* @returns {string[]} The encoded tokens.
|
|
20635
20820
|
*/
|
|
20636
20821
|
_call(tokens) {
|
|
20637
|
-
|
|
20822
|
+
tokens = this.encode(tokens);
|
|
20638
20823
|
if (this.fuse_unk) {
|
|
20639
20824
|
// Fuse unknown tokens
|
|
20640
|
-
|
|
20825
|
+
tokens = fuse_unk(tokens, this.tokens_to_ids, this.unk_token_id);
|
|
20641
20826
|
}
|
|
20642
|
-
return
|
|
20827
|
+
return tokens;
|
|
20643
20828
|
}
|
|
20644
20829
|
|
|
20645
20830
|
/**
|
|
@@ -21137,15 +21322,19 @@ class BPE extends TokenizerModel {
|
|
|
21137
21322
|
for (const t of bpe_token_list) {
|
|
21138
21323
|
if (this.tokens_to_ids.has(t)) {
|
|
21139
21324
|
outputTokens.push(t);
|
|
21140
|
-
} else {
|
|
21141
|
-
|
|
21142
|
-
|
|
21143
|
-
|
|
21144
|
-
|
|
21145
|
-
|
|
21325
|
+
} else if (this.byte_fallback) {
|
|
21326
|
+
const byteTokens = Array.from(this.text_encoder.encode(t))
|
|
21327
|
+
.map(x => `<0x${x.toString(16).toUpperCase().padStart(2, '0')}>`);
|
|
21328
|
+
if (byteTokens.every(x => this.tokens_to_ids.has(x))) {
|
|
21329
|
+
// Ensure the byte tokens are actually in the vocabulary, otherwise
|
|
21330
|
+
// we fall back to the unknown token. For more information, see
|
|
21331
|
+
// https://github.com/huggingface/transformers/issues/28096.
|
|
21332
|
+
outputTokens.push(...byteTokens);
|
|
21146
21333
|
} else {
|
|
21147
21334
|
outputTokens.push(this.unk_token);
|
|
21148
21335
|
}
|
|
21336
|
+
} else {
|
|
21337
|
+
outputTokens.push(this.unk_token);
|
|
21149
21338
|
}
|
|
21150
21339
|
}
|
|
21151
21340
|
}
|
|
@@ -22817,11 +23006,10 @@ class PreTrainedTokenizer extends _utils_generic_js__WEBPACK_IMPORTED_MODULE_0__
|
|
|
22817
23006
|
this.decoder.end_of_word_suffix = this.model.end_of_word_suffix;
|
|
22818
23007
|
}
|
|
22819
23008
|
|
|
22820
|
-
|
|
22821
23009
|
this.added_tokens_regex = this.added_tokens.length > 0 ? new RegExp(
|
|
22822
|
-
this.added_tokens
|
|
23010
|
+
this.added_tokens.slice()
|
|
22823
23011
|
// Sort by length (desc) to avoid early partial matches
|
|
22824
|
-
.
|
|
23012
|
+
.sort((a, b) => b.content.length - a.content.length)
|
|
22825
23013
|
.map(x => `${x.lstrip ? '\\s*' : ''}(${(0,_utils_core_js__WEBPACK_IMPORTED_MODULE_1__.escapeRegExp)(x.content)})${x.rstrip ? '\\s*' : ''}`)
|
|
22826
23014
|
.join('|')
|
|
22827
23015
|
) : null;
|
|
@@ -23319,6 +23507,67 @@ class PreTrainedTokenizer extends _utils_generic_js__WEBPACK_IMPORTED_MODULE_0__
|
|
|
23319
23507
|
|
|
23320
23508
|
return decoded;
|
|
23321
23509
|
}
|
|
23510
|
+
|
|
23511
|
+
/**
|
|
23512
|
+
* Retrieve the chat template string used for tokenizing chat messages. This template is used
|
|
23513
|
+
* internally by the `apply_chat_template` method and can also be used externally to retrieve the model's chat
|
|
23514
|
+
* template for better generation tracking.
|
|
23515
|
+
*
|
|
23516
|
+
* @param {Object} options An optional object containing the following properties:
|
|
23517
|
+
* @param {string} [options.chat_template=null]
|
|
23518
|
+
* A Jinja template or the name of a template to use for this conversion.
|
|
23519
|
+
* It is usually not necessary to pass anything to this argument,
|
|
23520
|
+
* as the model's template will be used by default.
|
|
23521
|
+
* @param {Object[]} [options.tools=null]
|
|
23522
|
+
* A list of tools (callable functions) that will be accessible to the model. If the template does not
|
|
23523
|
+
* support function calling, this argument will have no effect. Each tool should be passed as a JSON Schema,
|
|
23524
|
+
* giving the name, description and argument types for the tool. See our
|
|
23525
|
+
* [chat templating guide](https://huggingface.co/docs/transformers/main/en/chat_templating#automated-function-conversion-for-tool-use)
|
|
23526
|
+
* for more information.
|
|
23527
|
+
* @returns {string} The chat template string.
|
|
23528
|
+
*/
|
|
23529
|
+
get_chat_template({
|
|
23530
|
+
chat_template = null,
|
|
23531
|
+
tools = null,
|
|
23532
|
+
} = {}) {
|
|
23533
|
+
|
|
23534
|
+
// First, handle the cases when the model has a dict of multiple templates
|
|
23535
|
+
if (this.chat_template && typeof this.chat_template === 'object') {
|
|
23536
|
+
const template_dict = this.chat_template;
|
|
23537
|
+
|
|
23538
|
+
if (chat_template !== null && Object.hasOwn(template_dict, chat_template)) {
|
|
23539
|
+
// The user can pass the name of a template to the chat template argument instead of an entire template
|
|
23540
|
+
chat_template = template_dict[chat_template];
|
|
23541
|
+
} else if (chat_template === null) {
|
|
23542
|
+
if (tools !== null && 'tool_use' in template_dict) {
|
|
23543
|
+
chat_template = template_dict['tool_use'];
|
|
23544
|
+
} else if ('default' in template_dict) {
|
|
23545
|
+
chat_template = template_dict['default'];
|
|
23546
|
+
} else {
|
|
23547
|
+
throw Error(
|
|
23548
|
+
`This model has multiple chat templates with no default specified! Please either pass a chat ` +
|
|
23549
|
+
`template or the name of the template you wish to use to the 'chat_template' argument. Available ` +
|
|
23550
|
+
`template names are ${Object.keys(template_dict).sort()}.`
|
|
23551
|
+
)
|
|
23552
|
+
}
|
|
23553
|
+
}
|
|
23554
|
+
} else if (chat_template === null) {
|
|
23555
|
+
// These are the cases when the model has a single template
|
|
23556
|
+
// priority: `chat_template` argument > `tokenizer.chat_template`
|
|
23557
|
+
if (this.chat_template) {
|
|
23558
|
+
chat_template = this.chat_template;
|
|
23559
|
+
} else {
|
|
23560
|
+
throw Error(
|
|
23561
|
+
"Cannot use apply_chat_template() because tokenizer.chat_template is not set and no template " +
|
|
23562
|
+
"argument was passed! For information about writing templates and setting the " +
|
|
23563
|
+
"tokenizer.chat_template attribute, please see the documentation at " +
|
|
23564
|
+
"https://huggingface.co/docs/transformers/main/en/chat_templating"
|
|
23565
|
+
)
|
|
23566
|
+
}
|
|
23567
|
+
}
|
|
23568
|
+
return chat_template;
|
|
23569
|
+
}
|
|
23570
|
+
|
|
23322
23571
|
/**
|
|
23323
23572
|
* Converts a list of message objects with `"role"` and `"content"` keys to a list of token
|
|
23324
23573
|
* ids. This method is intended for use with chat models, and will read the tokenizer's chat_template attribute to
|
|
@@ -23392,39 +23641,8 @@ class PreTrainedTokenizer extends _utils_generic_js__WEBPACK_IMPORTED_MODULE_0__
|
|
|
23392
23641
|
...kwargs
|
|
23393
23642
|
} = {}) {
|
|
23394
23643
|
|
|
23395
|
-
|
|
23396
|
-
if (
|
|
23397
|
-
(this.chat_template && typeof this.chat_template === 'object')
|
|
23398
|
-
|| this.chat_template === null
|
|
23399
|
-
) {
|
|
23400
|
-
const template_dict = this.chat_template;
|
|
23644
|
+
chat_template = this.get_chat_template({ chat_template, tools });
|
|
23401
23645
|
|
|
23402
|
-
if (chat_template !== null && Object.hasOwn(template_dict, chat_template)) {
|
|
23403
|
-
// The user can pass the name of a template to the chat template argument instead of an entire template
|
|
23404
|
-
chat_template = template_dict[chat_template];
|
|
23405
|
-
} else if (chat_template === null && 'default' in template_dict) {
|
|
23406
|
-
chat_template = template_dict['default'];
|
|
23407
|
-
} else if (chat_template === null) {
|
|
23408
|
-
throw Error(
|
|
23409
|
-
`This model has multiple chat templates with no default specified! Please either pass a chat ` +
|
|
23410
|
-
`template or the name of the template you wish to use to the 'chat_template' argument. Available ` +
|
|
23411
|
-
`template names are ${Object.keys(template_dict).sort()}.`
|
|
23412
|
-
)
|
|
23413
|
-
}
|
|
23414
|
-
} else {
|
|
23415
|
-
// These are the cases when the model has a single template
|
|
23416
|
-
// priority: `chat_template` argument > `tokenizer.chat_template`
|
|
23417
|
-
if (this.chat_template) {
|
|
23418
|
-
chat_template = this.chat_template;
|
|
23419
|
-
} else {
|
|
23420
|
-
throw Error(
|
|
23421
|
-
"Cannot use apply_chat_template() because tokenizer.chat_template is not set and no template " +
|
|
23422
|
-
"argument was passed! For information about writing templates and setting the " +
|
|
23423
|
-
"tokenizer.chat_template attribute, please see the documentation at " +
|
|
23424
|
-
"https://huggingface.co/docs/transformers/main/en/chat_templating"
|
|
23425
|
-
)
|
|
23426
|
-
}
|
|
23427
|
-
}
|
|
23428
23646
|
if (typeof chat_template !== 'string') {
|
|
23429
23647
|
throw Error(`chat_template must be a string, but got ${typeof chat_template}`);
|
|
23430
23648
|
}
|
|
@@ -27842,11 +28060,20 @@ function softmax(arr) {
|
|
|
27842
28060
|
* @returns {T} The resulting log_softmax array.
|
|
27843
28061
|
*/
|
|
27844
28062
|
function log_softmax(arr) {
|
|
27845
|
-
// Compute the
|
|
27846
|
-
const
|
|
28063
|
+
// Compute the maximum value in the array
|
|
28064
|
+
const maxVal = max(arr)[0];
|
|
28065
|
+
|
|
28066
|
+
// Compute the sum of the exponentials
|
|
28067
|
+
let sumExps = 0;
|
|
28068
|
+
for(let i = 0; i < arr.length; ++i) {
|
|
28069
|
+
sumExps += Math.exp(arr[i] - maxVal);
|
|
28070
|
+
}
|
|
28071
|
+
|
|
28072
|
+
// Compute the log of the sum
|
|
28073
|
+
const logSum = Math.log(sumExps);
|
|
27847
28074
|
|
|
27848
|
-
//
|
|
27849
|
-
const logSoftmaxArr =
|
|
28075
|
+
// Compute the softmax values
|
|
28076
|
+
const logSoftmaxArr = arr.map(x => x - maxVal - logSum);
|
|
27850
28077
|
|
|
27851
28078
|
return /** @type {T} */(logSoftmaxArr);
|
|
27852
28079
|
}
|
|
@@ -27901,7 +28128,7 @@ function magnitude(arr) {
|
|
|
27901
28128
|
/**
|
|
27902
28129
|
* Returns the value and index of the minimum element in an array.
|
|
27903
28130
|
* @param {number[]|TypedArray} arr array of numbers.
|
|
27904
|
-
* @returns {number
|
|
28131
|
+
* @returns {[number, number]} the value and index of the minimum element, of the form: [valueOfMin, indexOfMin]
|
|
27905
28132
|
* @throws {Error} If array is empty.
|
|
27906
28133
|
*/
|
|
27907
28134
|
function min(arr) {
|
|
@@ -30300,6 +30527,7 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
30300
30527
|
/* harmony export */ AutoModelForTextToSpectrogram: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.AutoModelForTextToSpectrogram),
|
|
30301
30528
|
/* harmony export */ AutoModelForTextToWaveform: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.AutoModelForTextToWaveform),
|
|
30302
30529
|
/* harmony export */ AutoModelForTokenClassification: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.AutoModelForTokenClassification),
|
|
30530
|
+
/* harmony export */ AutoModelForUniversalSegmentation: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.AutoModelForUniversalSegmentation),
|
|
30303
30531
|
/* harmony export */ AutoModelForVision2Seq: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.AutoModelForVision2Seq),
|
|
30304
30532
|
/* harmony export */ AutoModelForXVector: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.AutoModelForXVector),
|
|
30305
30533
|
/* harmony export */ AutoModelForZeroShotObjectDetection: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.AutoModelForZeroShotObjectDetection),
|
|
@@ -30344,8 +30572,10 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
30344
30572
|
/* harmony export */ CLIPSegForImageSegmentation: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.CLIPSegForImageSegmentation),
|
|
30345
30573
|
/* harmony export */ CLIPSegModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.CLIPSegModel),
|
|
30346
30574
|
/* harmony export */ CLIPSegPreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.CLIPSegPreTrainedModel),
|
|
30575
|
+
/* harmony export */ CLIPTextModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.CLIPTextModel),
|
|
30347
30576
|
/* harmony export */ CLIPTextModelWithProjection: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.CLIPTextModelWithProjection),
|
|
30348
30577
|
/* harmony export */ CLIPTokenizer: () => (/* reexport safe */ _tokenizers_js__WEBPACK_IMPORTED_MODULE_3__.CLIPTokenizer),
|
|
30578
|
+
/* harmony export */ CLIPVisionModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.CLIPVisionModel),
|
|
30349
30579
|
/* harmony export */ CLIPVisionModelWithProjection: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.CLIPVisionModelWithProjection),
|
|
30350
30580
|
/* harmony export */ CamembertForMaskedLM: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.CamembertForMaskedLM),
|
|
30351
30581
|
/* harmony export */ CamembertForQuestionAnswering: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.CamembertForQuestionAnswering),
|
|
@@ -30407,6 +30637,8 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
30407
30637
|
/* harmony export */ DebertaV2Model: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.DebertaV2Model),
|
|
30408
30638
|
/* harmony export */ DebertaV2PreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.DebertaV2PreTrainedModel),
|
|
30409
30639
|
/* harmony export */ DebertaV2Tokenizer: () => (/* reexport safe */ _tokenizers_js__WEBPACK_IMPORTED_MODULE_3__.DebertaV2Tokenizer),
|
|
30640
|
+
/* harmony export */ DecisionTransformerModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.DecisionTransformerModel),
|
|
30641
|
+
/* harmony export */ DecisionTransformerPreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.DecisionTransformerPreTrainedModel),
|
|
30410
30642
|
/* harmony export */ DeiTFeatureExtractor: () => (/* reexport safe */ _processors_js__WEBPACK_IMPORTED_MODULE_4__.DeiTFeatureExtractor),
|
|
30411
30643
|
/* harmony export */ DeiTForImageClassification: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.DeiTForImageClassification),
|
|
30412
30644
|
/* harmony export */ DeiTModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.DeiTModel),
|
|
@@ -30496,7 +30728,12 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
30496
30728
|
/* harmony export */ GemmaPreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.GemmaPreTrainedModel),
|
|
30497
30729
|
/* harmony export */ GemmaTokenizer: () => (/* reexport safe */ _tokenizers_js__WEBPACK_IMPORTED_MODULE_3__.GemmaTokenizer),
|
|
30498
30730
|
/* harmony export */ Grok1Tokenizer: () => (/* reexport safe */ _tokenizers_js__WEBPACK_IMPORTED_MODULE_3__.Grok1Tokenizer),
|
|
30731
|
+
/* harmony export */ GroupViTModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.GroupViTModel),
|
|
30732
|
+
/* harmony export */ GroupViTPreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.GroupViTPreTrainedModel),
|
|
30499
30733
|
/* harmony export */ HerbertTokenizer: () => (/* reexport safe */ _tokenizers_js__WEBPACK_IMPORTED_MODULE_3__.HerbertTokenizer),
|
|
30734
|
+
/* harmony export */ HieraForImageClassification: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.HieraForImageClassification),
|
|
30735
|
+
/* harmony export */ HieraModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.HieraModel),
|
|
30736
|
+
/* harmony export */ HieraPreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.HieraPreTrainedModel),
|
|
30500
30737
|
/* harmony export */ HubertForCTC: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.HubertForCTC),
|
|
30501
30738
|
/* harmony export */ HubertForSequenceClassification: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.HubertForSequenceClassification),
|
|
30502
30739
|
/* harmony export */ HubertModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.HubertModel),
|
|
@@ -30546,6 +30783,10 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
30546
30783
|
/* harmony export */ MarianModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.MarianModel),
|
|
30547
30784
|
/* harmony export */ MarianPreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.MarianPreTrainedModel),
|
|
30548
30785
|
/* harmony export */ MarianTokenizer: () => (/* reexport safe */ _tokenizers_js__WEBPACK_IMPORTED_MODULE_3__.MarianTokenizer),
|
|
30786
|
+
/* harmony export */ MaskFormerFeatureExtractor: () => (/* reexport safe */ _processors_js__WEBPACK_IMPORTED_MODULE_4__.MaskFormerFeatureExtractor),
|
|
30787
|
+
/* harmony export */ MaskFormerForInstanceSegmentation: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.MaskFormerForInstanceSegmentation),
|
|
30788
|
+
/* harmony export */ MaskFormerModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.MaskFormerModel),
|
|
30789
|
+
/* harmony export */ MaskFormerPreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.MaskFormerPreTrainedModel),
|
|
30549
30790
|
/* harmony export */ MaskedLMOutput: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.MaskedLMOutput),
|
|
30550
30791
|
/* harmony export */ MaxLengthCriteria: () => (/* reexport safe */ _generation_stopping_criteria_js__WEBPACK_IMPORTED_MODULE_11__.MaxLengthCriteria),
|
|
30551
30792
|
/* harmony export */ MistralForCausalLM: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.MistralForCausalLM),
|
|
@@ -30623,6 +30864,10 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
30623
30864
|
/* harmony export */ PretrainedConfig: () => (/* reexport safe */ _configs_js__WEBPACK_IMPORTED_MODULE_5__.PretrainedConfig),
|
|
30624
30865
|
/* harmony export */ PretrainedMixin: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.PretrainedMixin),
|
|
30625
30866
|
/* harmony export */ Processor: () => (/* reexport safe */ _processors_js__WEBPACK_IMPORTED_MODULE_4__.Processor),
|
|
30867
|
+
/* harmony export */ PvtForImageClassification: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.PvtForImageClassification),
|
|
30868
|
+
/* harmony export */ PvtImageProcessor: () => (/* reexport safe */ _processors_js__WEBPACK_IMPORTED_MODULE_4__.PvtImageProcessor),
|
|
30869
|
+
/* harmony export */ PvtModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.PvtModel),
|
|
30870
|
+
/* harmony export */ PvtPreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.PvtPreTrainedModel),
|
|
30626
30871
|
/* harmony export */ PyAnnoteFeatureExtractor: () => (/* reexport safe */ _processors_js__WEBPACK_IMPORTED_MODULE_4__.PyAnnoteFeatureExtractor),
|
|
30627
30872
|
/* harmony export */ PyAnnoteForAudioFrameClassification: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.PyAnnoteForAudioFrameClassification),
|
|
30628
30873
|
/* harmony export */ PyAnnoteModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.PyAnnoteModel),
|
|
@@ -30743,6 +30988,11 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
30743
30988
|
/* harmony export */ ViTFeatureExtractor: () => (/* reexport safe */ _processors_js__WEBPACK_IMPORTED_MODULE_4__.ViTFeatureExtractor),
|
|
30744
30989
|
/* harmony export */ ViTForImageClassification: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.ViTForImageClassification),
|
|
30745
30990
|
/* harmony export */ ViTImageProcessor: () => (/* reexport safe */ _processors_js__WEBPACK_IMPORTED_MODULE_4__.ViTImageProcessor),
|
|
30991
|
+
/* harmony export */ ViTMAEModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.ViTMAEModel),
|
|
30992
|
+
/* harmony export */ ViTMAEPreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.ViTMAEPreTrainedModel),
|
|
30993
|
+
/* harmony export */ ViTMSNForImageClassification: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.ViTMSNForImageClassification),
|
|
30994
|
+
/* harmony export */ ViTMSNModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.ViTMSNModel),
|
|
30995
|
+
/* harmony export */ ViTMSNPreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.ViTMSNPreTrainedModel),
|
|
30746
30996
|
/* harmony export */ ViTModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.ViTModel),
|
|
30747
30997
|
/* harmony export */ ViTPreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.ViTPreTrainedModel),
|
|
30748
30998
|
/* harmony export */ VisionEncoderDecoderModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.VisionEncoderDecoderModel),
|