@huggingface/transformers 3.0.1 → 3.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +14 -4
- package/dist/ort-wasm-simd-threaded.jsep.wasm +0 -0
- package/dist/transformers.cjs +16607 -13472
- package/dist/transformers.cjs.map +1 -1
- package/dist/transformers.js +16601 -13451
- package/dist/transformers.js.map +1 -1
- package/dist/transformers.min.cjs +238 -52
- package/dist/transformers.min.cjs.map +1 -1
- package/dist/transformers.min.js +229 -43
- package/dist/transformers.min.js.map +1 -1
- package/dist/transformers.min.mjs +240 -54
- package/dist/transformers.min.mjs.map +1 -1
- package/dist/transformers.mjs +16017 -12878
- package/dist/transformers.mjs.map +1 -1
- package/package.json +7 -7
- package/src/base/feature_extraction_utils.js +54 -0
- package/src/base/image_processors_utils.js +1089 -0
- package/src/base/processing_utils.js +145 -0
- package/src/configs.js +15 -3
- package/src/env.js +15 -4
- package/src/models/audio_spectrogram_transformer/feature_extraction_audio_spectrogram_transformer.js +90 -0
- package/src/models/auto/feature_extraction_auto.js +41 -0
- package/src/models/auto/image_processing_auto.js +29 -0
- package/src/models/auto/processing_auto.js +100 -0
- package/src/models/beit/image_processing_beit.js +5 -0
- package/src/models/bit/image_processing_bit.js +5 -0
- package/src/models/chinese_clip/image_processing_chinese_clip.js +5 -0
- package/src/models/clap/feature_extraction_clap.js +159 -0
- package/src/models/clip/image_processing_clip.js +6 -0
- package/src/models/convnext/image_processing_convnext.js +45 -0
- package/src/models/deit/image_processing_deit.js +6 -0
- package/src/models/detr/image_processing_detr.js +52 -0
- package/src/models/donut/image_processing_donut.js +31 -0
- package/src/models/dpt/image_processing_dpt.js +6 -0
- package/src/models/efficientnet/image_processing_efficientnet.js +13 -0
- package/src/models/feature_extractors.js +12 -0
- package/src/models/florence2/processing_florence2.js +128 -0
- package/src/models/glpn/image_processing_glpn.js +5 -0
- package/src/models/image_processors.js +36 -0
- package/src/models/janus/image_processing_janus.js +26 -0
- package/src/models/janus/processing_janus.js +123 -0
- package/src/models/jina_clip/image_processing_jina_clip.js +26 -0
- package/src/models/jina_clip/processing_jina_clip.js +24 -0
- package/src/models/llava_onevision/image_processing_llava_onevision.js +5 -0
- package/src/models/mask2former/image_processing_mask2former.js +5 -0
- package/src/models/maskformer/image_processing_maskformer.js +18 -0
- package/src/models/mgp_str/processing_mgp_str.js +170 -0
- package/src/models/mobilenet_v1/image_processing_mobilenet_v1.js +7 -0
- package/src/models/mobilenet_v2/image_processing_mobilenet_v2.js +7 -0
- package/src/models/mobilenet_v3/image_processing_mobilenet_v3.js +7 -0
- package/src/models/mobilenet_v4/image_processing_mobilenet_v4.js +7 -0
- package/src/models/mobilevit/image_processing_mobilevit.js +6 -0
- package/src/models/nougat/image_processing_nougat.js +5 -0
- package/src/models/owlv2/image_processing_owlv2.js +5 -0
- package/src/models/owlvit/image_processing_owlvit.js +12 -0
- package/src/models/owlvit/processing_owlvit.js +7 -0
- package/src/models/processors.js +11 -0
- package/src/models/pvt/image_processing_pvt.js +5 -0
- package/src/models/pyannote/feature_extraction_pyannote.js +28 -0
- package/src/models/pyannote/processing_pyannote.js +71 -0
- package/src/models/qwen2_vl/image_processing_qwen2_vl.js +52 -0
- package/src/models/qwen2_vl/processing_qwen2_vl.js +52 -0
- package/src/models/rt_detr/image_processing_rt_detr.js +12 -0
- package/src/models/sam/image_processing_sam.js +242 -0
- package/src/models/sam/processing_sam.js +20 -0
- package/src/models/sapiens/image_processing_sapiens.js +13 -0
- package/src/models/seamless_m4t/feature_extraction_seamless_m4t.js +180 -0
- package/src/models/segformer/image_processing_segformer.js +13 -0
- package/src/models/siglip/image_processing_siglip.js +5 -0
- package/src/models/speecht5/feature_extraction_speecht5.js +4 -0
- package/src/models/speecht5/processing_speecht5.js +17 -0
- package/src/models/swin2sr/image_processing_swin2sr.js +24 -0
- package/src/models/vit/image_processing_vit.js +7 -0
- package/src/models/vitmatte/image_processing_vitmatte.js +50 -0
- package/src/models/vitpose/image_processing_vitpose.js +89 -0
- package/src/models/wav2vec2/feature_extraction_wav2vec2.js +44 -0
- package/src/models/wav2vec2/processing_wav2vec2.js +15 -0
- package/src/models/wespeaker/feature_extraction_wespeaker.js +100 -0
- package/src/models/whisper/feature_extraction_whisper.js +84 -0
- package/src/models/whisper/processing_whisper.js +21 -0
- package/src/models/yolos/image_processing_yolos.js +12 -0
- package/src/models.js +695 -32
- package/src/pipelines.js +8 -8
- package/src/tokenizers.js +5 -0
- package/src/transformers.js +15 -2
- package/src/utils/constants.js +8 -1
- package/src/utils/core.js +37 -9
- package/src/utils/hub.js +2 -1
- package/src/utils/image.js +68 -17
- package/src/utils/tensor.js +33 -1
- package/types/base/feature_extraction_utils.d.ts +41 -0
- package/types/base/feature_extraction_utils.d.ts.map +1 -0
- package/types/base/image_processors_utils.d.ts +323 -0
- package/types/base/image_processors_utils.d.ts.map +1 -0
- package/types/base/processing_utils.d.ts +80 -0
- package/types/base/processing_utils.d.ts.map +1 -0
- package/types/configs.d.ts +4 -1
- package/types/configs.d.ts.map +1 -1
- package/types/env.d.ts.map +1 -1
- package/types/models/audio_spectrogram_transformer/feature_extraction_audio_spectrogram_transformer.d.ts +25 -0
- package/types/models/audio_spectrogram_transformer/feature_extraction_audio_spectrogram_transformer.d.ts.map +1 -0
- package/types/models/auto/feature_extraction_auto.d.ts +5 -0
- package/types/models/auto/feature_extraction_auto.d.ts.map +1 -0
- package/types/models/auto/image_processing_auto.d.ts +5 -0
- package/types/models/auto/image_processing_auto.d.ts.map +1 -0
- package/types/models/auto/processing_auto.d.ts +35 -0
- package/types/models/auto/processing_auto.d.ts.map +1 -0
- package/types/models/beit/image_processing_beit.d.ts +4 -0
- package/types/models/beit/image_processing_beit.d.ts.map +1 -0
- package/types/models/bit/image_processing_bit.d.ts +4 -0
- package/types/models/bit/image_processing_bit.d.ts.map +1 -0
- package/types/models/chinese_clip/image_processing_chinese_clip.d.ts +4 -0
- package/types/models/chinese_clip/image_processing_chinese_clip.d.ts.map +1 -0
- package/types/models/clap/feature_extraction_clap.d.ts +57 -0
- package/types/models/clap/feature_extraction_clap.d.ts.map +1 -0
- package/types/models/clip/image_processing_clip.d.ts +6 -0
- package/types/models/clip/image_processing_clip.d.ts.map +1 -0
- package/types/models/convnext/image_processing_convnext.d.ts +12 -0
- package/types/models/convnext/image_processing_convnext.d.ts.map +1 -0
- package/types/models/deit/image_processing_deit.d.ts +6 -0
- package/types/models/deit/image_processing_deit.d.ts.map +1 -0
- package/types/models/detr/image_processing_detr.d.ts +42 -0
- package/types/models/detr/image_processing_detr.d.ts.map +1 -0
- package/types/models/donut/image_processing_donut.d.ts +7 -0
- package/types/models/donut/image_processing_donut.d.ts.map +1 -0
- package/types/models/dpt/image_processing_dpt.d.ts +6 -0
- package/types/models/dpt/image_processing_dpt.d.ts.map +1 -0
- package/types/models/efficientnet/image_processing_efficientnet.d.ts +6 -0
- package/types/models/efficientnet/image_processing_efficientnet.d.ts.map +1 -0
- package/types/models/feature_extractors.d.ts +10 -0
- package/types/models/feature_extractors.d.ts.map +1 -0
- package/types/models/florence2/processing_florence2.d.ts +39 -0
- package/types/models/florence2/processing_florence2.d.ts.map +1 -0
- package/types/models/glpn/image_processing_glpn.d.ts +4 -0
- package/types/models/glpn/image_processing_glpn.d.ts.map +1 -0
- package/types/models/image_processors.d.ts +36 -0
- package/types/models/image_processors.d.ts.map +1 -0
- package/types/models/janus/image_processing_janus.d.ts +7 -0
- package/types/models/janus/image_processing_janus.d.ts.map +1 -0
- package/types/models/janus/processing_janus.d.ts +77 -0
- package/types/models/janus/processing_janus.d.ts.map +1 -0
- package/types/models/jina_clip/image_processing_jina_clip.d.ts +5 -0
- package/types/models/jina_clip/image_processing_jina_clip.d.ts.map +1 -0
- package/types/models/jina_clip/processing_jina_clip.d.ts +9 -0
- package/types/models/jina_clip/processing_jina_clip.d.ts.map +1 -0
- package/types/models/llava_onevision/image_processing_llava_onevision.d.ts +4 -0
- package/types/models/llava_onevision/image_processing_llava_onevision.d.ts.map +1 -0
- package/types/models/mask2former/image_processing_mask2former.d.ts +4 -0
- package/types/models/mask2former/image_processing_mask2former.d.ts.map +1 -0
- package/types/models/maskformer/image_processing_maskformer.d.ts +22 -0
- package/types/models/maskformer/image_processing_maskformer.d.ts.map +1 -0
- package/types/models/mgp_str/processing_mgp_str.d.ts +64 -0
- package/types/models/mgp_str/processing_mgp_str.d.ts.map +1 -0
- package/types/models/mobilenet_v1/image_processing_mobilenet_v1.d.ts +6 -0
- package/types/models/mobilenet_v1/image_processing_mobilenet_v1.d.ts.map +1 -0
- package/types/models/mobilenet_v2/image_processing_mobilenet_v2.d.ts +6 -0
- package/types/models/mobilenet_v2/image_processing_mobilenet_v2.d.ts.map +1 -0
- package/types/models/mobilenet_v3/image_processing_mobilenet_v3.d.ts +6 -0
- package/types/models/mobilenet_v3/image_processing_mobilenet_v3.d.ts.map +1 -0
- package/types/models/mobilenet_v4/image_processing_mobilenet_v4.d.ts +6 -0
- package/types/models/mobilenet_v4/image_processing_mobilenet_v4.d.ts.map +1 -0
- package/types/models/mobilevit/image_processing_mobilevit.d.ts +6 -0
- package/types/models/mobilevit/image_processing_mobilevit.d.ts.map +1 -0
- package/types/models/nougat/image_processing_nougat.d.ts +4 -0
- package/types/models/nougat/image_processing_nougat.d.ts.map +1 -0
- package/types/models/owlv2/image_processing_owlv2.d.ts +4 -0
- package/types/models/owlv2/image_processing_owlv2.d.ts.map +1 -0
- package/types/models/owlvit/image_processing_owlvit.d.ts +10 -0
- package/types/models/owlvit/image_processing_owlvit.d.ts.map +1 -0
- package/types/models/owlvit/processing_owlvit.d.ts +8 -0
- package/types/models/owlvit/processing_owlvit.d.ts.map +1 -0
- package/types/models/processors.d.ts +12 -0
- package/types/models/processors.d.ts.map +1 -0
- package/types/models/pvt/image_processing_pvt.d.ts +4 -0
- package/types/models/pvt/image_processing_pvt.d.ts.map +1 -0
- package/types/models/pyannote/feature_extraction_pyannote.d.ts +13 -0
- package/types/models/pyannote/feature_extraction_pyannote.d.ts.map +1 -0
- package/types/models/pyannote/processing_pyannote.d.ts +30 -0
- package/types/models/pyannote/processing_pyannote.d.ts.map +1 -0
- package/types/models/qwen2_vl/image_processing_qwen2_vl.d.ts +11 -0
- package/types/models/qwen2_vl/image_processing_qwen2_vl.d.ts.map +1 -0
- package/types/models/qwen2_vl/processing_qwen2_vl.d.ts +17 -0
- package/types/models/qwen2_vl/processing_qwen2_vl.d.ts.map +1 -0
- package/types/models/rt_detr/image_processing_rt_detr.d.ts +8 -0
- package/types/models/rt_detr/image_processing_rt_detr.d.ts.map +1 -0
- package/types/models/sam/image_processing_sam.d.ts +103 -0
- package/types/models/sam/image_processing_sam.d.ts.map +1 -0
- package/types/models/sam/processing_sam.d.ts +9 -0
- package/types/models/sam/processing_sam.d.ts.map +1 -0
- package/types/models/seamless_m4t/feature_extraction_seamless_m4t.d.ts +34 -0
- package/types/models/seamless_m4t/feature_extraction_seamless_m4t.d.ts.map +1 -0
- package/types/models/segformer/image_processing_segformer.d.ts +10 -0
- package/types/models/segformer/image_processing_segformer.d.ts.map +1 -0
- package/types/models/siglip/image_processing_siglip.d.ts +4 -0
- package/types/models/siglip/image_processing_siglip.d.ts.map +1 -0
- package/types/models/speecht5/feature_extraction_speecht5.d.ts +4 -0
- package/types/models/speecht5/feature_extraction_speecht5.d.ts.map +1 -0
- package/types/models/speecht5/processing_speecht5.d.ts +14 -0
- package/types/models/speecht5/processing_speecht5.d.ts.map +1 -0
- package/types/models/swin2sr/image_processing_swin2sr.d.ts +5 -0
- package/types/models/swin2sr/image_processing_swin2sr.d.ts.map +1 -0
- package/types/models/vit/image_processing_vit.d.ts +6 -0
- package/types/models/vit/image_processing_vit.d.ts.map +1 -0
- package/types/models/vitmatte/image_processing_vitmatte.d.ts +12 -0
- package/types/models/vitmatte/image_processing_vitmatte.d.ts.map +1 -0
- package/types/models/vitpose/image_processing_vitpose.d.ts +26 -0
- package/types/models/vitpose/image_processing_vitpose.d.ts.map +1 -0
- package/types/models/wav2vec2/feature_extraction_wav2vec2.d.ts +19 -0
- package/types/models/wav2vec2/feature_extraction_wav2vec2.d.ts.map +1 -0
- package/types/models/wav2vec2/processing_wav2vec2.d.ts +12 -0
- package/types/models/wav2vec2/processing_wav2vec2.d.ts.map +1 -0
- package/types/models/wespeaker/feature_extraction_wespeaker.d.ts +23 -0
- package/types/models/wespeaker/feature_extraction_wespeaker.d.ts.map +1 -0
- package/types/models/whisper/feature_extraction_whisper.d.ts +21 -0
- package/types/models/whisper/feature_extraction_whisper.d.ts.map +1 -0
- package/types/models/whisper/processing_whisper.d.ts +17 -0
- package/types/models/whisper/processing_whisper.d.ts.map +1 -0
- package/types/models/yolos/image_processing_yolos.d.ts +10 -0
- package/types/models/yolos/image_processing_yolos.d.ts.map +1 -0
- package/types/models.d.ts +152 -0
- package/types/models.d.ts.map +1 -1
- package/types/pipelines.d.ts +2 -3
- package/types/pipelines.d.ts.map +1 -1
- package/types/tokenizers.d.ts +3 -0
- package/types/tokenizers.d.ts.map +1 -1
- package/types/transformers.d.ts +10 -1
- package/types/utils/constants.d.ts +6 -0
- package/types/utils/constants.d.ts.map +1 -1
- package/types/utils/core.d.ts +58 -3
- package/types/utils/core.d.ts.map +1 -1
- package/types/utils/hub.d.ts +1 -1
- package/types/utils/hub.d.ts.map +1 -1
- package/types/utils/image.d.ts +10 -2
- package/types/utils/image.d.ts.map +1 -1
- package/types/utils/tensor.d.ts +34 -1
- package/types/utils/tensor.d.ts.map +1 -1
- package/src/processors.js +0 -2655
- package/types/processors.d.ts +0 -924
- package/types/processors.d.ts.map +0 -1
package/src/pipelines.js
CHANGED
|
@@ -45,8 +45,10 @@ import {
|
|
|
45
45
|
} from './models.js';
|
|
46
46
|
import {
|
|
47
47
|
AutoProcessor,
|
|
48
|
-
|
|
49
|
-
|
|
48
|
+
} from './models/auto/processing_auto.js';
|
|
49
|
+
import {
|
|
50
|
+
Processor,
|
|
51
|
+
} from './base/processing_utils.js';
|
|
50
52
|
|
|
51
53
|
import {
|
|
52
54
|
Callable,
|
|
@@ -54,7 +56,6 @@ import {
|
|
|
54
56
|
|
|
55
57
|
import {
|
|
56
58
|
dispatchCallback,
|
|
57
|
-
pop,
|
|
58
59
|
product,
|
|
59
60
|
} from './utils/core.js';
|
|
60
61
|
import {
|
|
@@ -158,7 +159,6 @@ function get_bounding_box(box, asInteger) {
|
|
|
158
159
|
/**
|
|
159
160
|
* The Pipeline class is the class from which all pipelines inherit.
|
|
160
161
|
* Refer to this class for methods shared across different pipelines.
|
|
161
|
-
* @extends Callable
|
|
162
162
|
*/
|
|
163
163
|
export class Pipeline extends Callable {
|
|
164
164
|
/**
|
|
@@ -2131,8 +2131,8 @@ export class ImageSegmentationPipeline extends (/** @type {new (options: ImagePi
|
|
|
2131
2131
|
fn = this.subtasks_mapping[subtask];
|
|
2132
2132
|
} else {
|
|
2133
2133
|
for (let [task, func] of Object.entries(this.subtasks_mapping)) {
|
|
2134
|
-
if (func in this.processor.
|
|
2135
|
-
fn = this.processor.
|
|
2134
|
+
if (func in this.processor.image_processor) {
|
|
2135
|
+
fn = this.processor.image_processor[func].bind(this.processor.image_processor);
|
|
2136
2136
|
subtask = task;
|
|
2137
2137
|
break;
|
|
2138
2138
|
}
|
|
@@ -2362,7 +2362,7 @@ export class ObjectDetectionPipeline extends (/** @type {new (options: ImagePipe
|
|
|
2362
2362
|
const output = await this.model({ pixel_values, pixel_mask });
|
|
2363
2363
|
|
|
2364
2364
|
// @ts-ignore
|
|
2365
|
-
const processed = this.processor.
|
|
2365
|
+
const processed = this.processor.image_processor.post_process_object_detection(output, threshold, imageSizes);
|
|
2366
2366
|
|
|
2367
2367
|
// Add labels
|
|
2368
2368
|
const id2label = this.model.config.id2label;
|
|
@@ -2510,7 +2510,7 @@ export class ZeroShotObjectDetectionPipeline extends (/** @type {new (options: T
|
|
|
2510
2510
|
const output = await this.model({ ...text_inputs, pixel_values });
|
|
2511
2511
|
|
|
2512
2512
|
// @ts-ignore
|
|
2513
|
-
const processed = this.processor.
|
|
2513
|
+
const processed = this.processor.image_processor.post_process_object_detection(output, threshold, imageSize, true)[0];
|
|
2514
2514
|
let result = processed.boxes.map((box, i) => ({
|
|
2515
2515
|
score: processed.scores[i],
|
|
2516
2516
|
label: candidate_labels[processed.classes[i]],
|
package/src/tokenizers.js
CHANGED
|
@@ -1518,6 +1518,8 @@ class SplitPreTokenizer extends PreTokenizer {
|
|
|
1518
1518
|
|
|
1519
1519
|
if (this.config.invert) {
|
|
1520
1520
|
return text.match(this.pattern) || [];
|
|
1521
|
+
} else if (this.config.behavior?.toLowerCase() === 'removed') {
|
|
1522
|
+
return text.split(this.pattern).filter(x => x);
|
|
1521
1523
|
} else {
|
|
1522
1524
|
return regexSplit(text, this.pattern);
|
|
1523
1525
|
}
|
|
@@ -4255,6 +4257,8 @@ export class VitsTokenizer extends PreTrainedTokenizer {
|
|
|
4255
4257
|
|
|
4256
4258
|
export class CohereTokenizer extends PreTrainedTokenizer { }
|
|
4257
4259
|
|
|
4260
|
+
export class MgpstrTokenizer extends PreTrainedTokenizer { }
|
|
4261
|
+
|
|
4258
4262
|
/**
|
|
4259
4263
|
* Helper class which is used to instantiate pretrained tokenizers with the `from_pretrained` function.
|
|
4260
4264
|
* The chosen tokenizer class is determined by the type specified in the tokenizer config.
|
|
@@ -4308,6 +4312,7 @@ export class AutoTokenizer {
|
|
|
4308
4312
|
GemmaTokenizer,
|
|
4309
4313
|
Grok1Tokenizer,
|
|
4310
4314
|
CohereTokenizer,
|
|
4315
|
+
MgpstrTokenizer,
|
|
4311
4316
|
|
|
4312
4317
|
// Base case:
|
|
4313
4318
|
PreTrainedTokenizer,
|
package/src/transformers.js
CHANGED
|
@@ -12,10 +12,10 @@
|
|
|
12
12
|
*/
|
|
13
13
|
|
|
14
14
|
export { env } from './env.js';
|
|
15
|
+
|
|
15
16
|
export * from './pipelines.js';
|
|
16
17
|
export * from './models.js';
|
|
17
18
|
export * from './tokenizers.js';
|
|
18
|
-
export * from './processors.js';
|
|
19
19
|
export * from './configs.js';
|
|
20
20
|
|
|
21
21
|
export * from './utils/audio.js';
|
|
@@ -23,6 +23,19 @@ export * from './utils/image.js';
|
|
|
23
23
|
export * from './utils/tensor.js';
|
|
24
24
|
export * from './utils/maths.js';
|
|
25
25
|
|
|
26
|
+
|
|
27
|
+
export { FeatureExtractor } from './base/feature_extraction_utils.js';
|
|
28
|
+
export * from './models/feature_extractors.js';
|
|
29
|
+
export * from './models/auto/feature_extraction_auto.js';
|
|
30
|
+
|
|
31
|
+
export { ImageProcessor } from './base/image_processors_utils.js';
|
|
32
|
+
export * from './models/image_processors.js';
|
|
33
|
+
export * from './models/auto/image_processing_auto.js';
|
|
34
|
+
|
|
35
|
+
export { Processor } from './base/processing_utils.js';
|
|
36
|
+
export * from './models/processors.js';
|
|
37
|
+
export * from './models/auto/processing_auto.js';
|
|
38
|
+
|
|
26
39
|
export * from './generation/streamers.js';
|
|
27
40
|
export * from './generation/stopping_criteria.js';
|
|
28
|
-
|
|
41
|
+
export * from './generation/logits_process.js';
|
package/src/utils/constants.js
CHANGED
|
@@ -1,2 +1,9 @@
|
|
|
1
1
|
|
|
2
|
-
export const GITHUB_ISSUE_URL = 'https://github.com/huggingface/transformers.js/issues/new/choose';
|
|
2
|
+
export const GITHUB_ISSUE_URL = 'https://github.com/huggingface/transformers.js/issues/new/choose';
|
|
3
|
+
|
|
4
|
+
export const CONFIG_NAME = "config.json"
|
|
5
|
+
export const FEATURE_EXTRACTOR_NAME = "preprocessor_config.json"
|
|
6
|
+
export const IMAGE_PROCESSOR_NAME = FEATURE_EXTRACTOR_NAME
|
|
7
|
+
export const PROCESSOR_NAME = "processor_config.json"
|
|
8
|
+
export const CHAT_TEMPLATE_NAME = "chat_template.json"
|
|
9
|
+
export const GENERATION_CONFIG_NAME = "generation_config.json"
|
package/src/utils/core.js
CHANGED
|
@@ -1,18 +1,37 @@
|
|
|
1
1
|
|
|
2
2
|
/**
|
|
3
3
|
* @file Core utility functions/classes for Transformers.js.
|
|
4
|
-
*
|
|
4
|
+
*
|
|
5
5
|
* These are only used internally, meaning an end-user shouldn't
|
|
6
6
|
* need to access anything here.
|
|
7
|
-
*
|
|
7
|
+
*
|
|
8
8
|
* @module utils/core
|
|
9
9
|
*/
|
|
10
10
|
|
|
11
|
+
/**
|
|
12
|
+
* @typedef {Object} ProgressInfo
|
|
13
|
+
* @property {'initiate' | 'download' | 'progress' | 'done'} status The status of the progress item.
|
|
14
|
+
* @property {string} name This can be either:
|
|
15
|
+
* - a string, the *model id* of a model repo on huggingface.co.
|
|
16
|
+
* - a path to a *directory* potentially containing the file.
|
|
17
|
+
* @property {string} file The name of the file
|
|
18
|
+
* @property {number} [progress] A number between 0 and 100. Only available for the 'progress' status.
|
|
19
|
+
* @property {number} [loaded] The number of bytes loaded. Only available for the 'progress' status.
|
|
20
|
+
* @property {number} [total] The total number of bytes to be loaded. Only available for the 'progress' status.
|
|
21
|
+
*/
|
|
22
|
+
|
|
23
|
+
/**
|
|
24
|
+
* A callback function that is called with progress information.
|
|
25
|
+
* @callback ProgressCallback
|
|
26
|
+
* @param {ProgressInfo} progressInfo
|
|
27
|
+
* @returns {void}
|
|
28
|
+
*/
|
|
29
|
+
|
|
11
30
|
/**
|
|
12
31
|
* Helper function to dispatch progress callbacks.
|
|
13
32
|
*
|
|
14
|
-
* @param {
|
|
15
|
-
* @param {
|
|
33
|
+
* @param {ProgressCallback | null | undefined} progress_callback The progress callback function to dispatch.
|
|
34
|
+
* @param {ProgressInfo} data The data to pass to the progress callback function.
|
|
16
35
|
* @returns {void}
|
|
17
36
|
* @private
|
|
18
37
|
*/
|
|
@@ -46,7 +65,7 @@ export function escapeRegExp(string) {
|
|
|
46
65
|
* Check if a value is a typed array.
|
|
47
66
|
* @param {*} val The value to check.
|
|
48
67
|
* @returns {boolean} True if the value is a `TypedArray`, false otherwise.
|
|
49
|
-
*
|
|
68
|
+
*
|
|
50
69
|
* Adapted from https://stackoverflow.com/a/71091338/13989043
|
|
51
70
|
*/
|
|
52
71
|
export function isTypedArray(val) {
|
|
@@ -63,6 +82,15 @@ export function isIntegralNumber(x) {
|
|
|
63
82
|
return Number.isInteger(x) || typeof x === 'bigint'
|
|
64
83
|
}
|
|
65
84
|
|
|
85
|
+
/**
|
|
86
|
+
* Determine if a provided width or height is nullish.
|
|
87
|
+
* @param {*} x The value to check.
|
|
88
|
+
* @returns {boolean} True if the value is `null`, `undefined` or `-1`, false otherwise.
|
|
89
|
+
*/
|
|
90
|
+
export function isNullishDimension(x) {
|
|
91
|
+
return x === null || x === undefined || x === -1;
|
|
92
|
+
}
|
|
93
|
+
|
|
66
94
|
/**
|
|
67
95
|
* Calculates the dimensions of a nested array.
|
|
68
96
|
*
|
|
@@ -132,9 +160,9 @@ export function calculateReflectOffset(i, w) {
|
|
|
132
160
|
}
|
|
133
161
|
|
|
134
162
|
/**
|
|
135
|
-
*
|
|
136
|
-
* @param {Object} o
|
|
137
|
-
* @param {string[]} props
|
|
163
|
+
*
|
|
164
|
+
* @param {Object} o
|
|
165
|
+
* @param {string[]} props
|
|
138
166
|
* @returns {Object}
|
|
139
167
|
*/
|
|
140
168
|
export function pick(o, props) {
|
|
@@ -151,7 +179,7 @@ export function pick(o, props) {
|
|
|
151
179
|
/**
|
|
152
180
|
* Calculate the length of a string, taking multi-byte characters into account.
|
|
153
181
|
* This mimics the behavior of Python's `len` function.
|
|
154
|
-
* @param {string} s The string to calculate the length of.
|
|
182
|
+
* @param {string} s The string to calculate the length of.
|
|
155
183
|
* @returns {number} The length of the string.
|
|
156
184
|
*/
|
|
157
185
|
export function len(s) {
|
package/src/utils/hub.js
CHANGED
|
@@ -13,7 +13,7 @@ import { dispatchCallback } from './core.js';
|
|
|
13
13
|
|
|
14
14
|
/**
|
|
15
15
|
* @typedef {Object} PretrainedOptions Options for loading a pretrained model.
|
|
16
|
-
* @property {
|
|
16
|
+
* @property {import('./core.js').ProgressCallback} [progress_callback=null] If specified, this function will be called during model construction, to provide the user with progress updates.
|
|
17
17
|
* @property {import('../configs.js').PretrainedConfig} [config=null] Configuration for the model to use instead of an automatically loaded configuration. Configuration can be automatically loaded when:
|
|
18
18
|
* - The model is a model provided by the library (loaded with the *model id* string of a pretrained model).
|
|
19
19
|
* - The model is loaded by supplying a local directory as `pretrained_model_name_or_path` and a configuration JSON file named *config.json* is found in the directory.
|
|
@@ -504,6 +504,7 @@ export async function getModelFile(path_or_repo_id, filename, fatal = true, opti
|
|
|
504
504
|
file: filename
|
|
505
505
|
})
|
|
506
506
|
|
|
507
|
+
/** @type {import('./core.js').ProgressInfo} */
|
|
507
508
|
const progressInfo = {
|
|
508
509
|
status: 'progress',
|
|
509
510
|
name: path_or_repo_id,
|
package/src/utils/image.js
CHANGED
|
@@ -1,13 +1,14 @@
|
|
|
1
1
|
|
|
2
2
|
/**
|
|
3
|
-
* @file Helper module for image processing.
|
|
4
|
-
*
|
|
5
|
-
* These functions and classes are only used internally,
|
|
3
|
+
* @file Helper module for image processing.
|
|
4
|
+
*
|
|
5
|
+
* These functions and classes are only used internally,
|
|
6
6
|
* meaning an end-user shouldn't need to access anything here.
|
|
7
|
-
*
|
|
7
|
+
*
|
|
8
8
|
* @module utils/image
|
|
9
9
|
*/
|
|
10
10
|
|
|
11
|
+
import { isNullishDimension } from './core.js';
|
|
11
12
|
import { getFile } from './hub.js';
|
|
12
13
|
import { env } from '../env.js';
|
|
13
14
|
import { Tensor } from './tensor.js';
|
|
@@ -91,7 +92,7 @@ export class RawImage {
|
|
|
91
92
|
this.channels = channels;
|
|
92
93
|
}
|
|
93
94
|
|
|
94
|
-
/**
|
|
95
|
+
/**
|
|
95
96
|
* Returns the size of the image (width, height).
|
|
96
97
|
* @returns {[number, number]} The size of the image (width, height).
|
|
97
98
|
*/
|
|
@@ -101,9 +102,9 @@ export class RawImage {
|
|
|
101
102
|
|
|
102
103
|
/**
|
|
103
104
|
* Helper method for reading an image from a variety of input types.
|
|
104
|
-
* @param {RawImage|string|URL} input
|
|
105
|
+
* @param {RawImage|string|URL} input
|
|
105
106
|
* @returns The image object.
|
|
106
|
-
*
|
|
107
|
+
*
|
|
107
108
|
* **Example:** Read image from a URL.
|
|
108
109
|
* ```javascript
|
|
109
110
|
* let image = await RawImage.read('https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/football-match.jpg');
|
|
@@ -181,7 +182,7 @@ export class RawImage {
|
|
|
181
182
|
|
|
182
183
|
/**
|
|
183
184
|
* Helper method to create a new Image from a tensor
|
|
184
|
-
* @param {Tensor} tensor
|
|
185
|
+
* @param {Tensor} tensor
|
|
185
186
|
*/
|
|
186
187
|
static fromTensor(tensor, channel_format = 'CHW') {
|
|
187
188
|
if (tensor.dims.length !== 3) {
|
|
@@ -306,8 +307,8 @@ export class RawImage {
|
|
|
306
307
|
|
|
307
308
|
/**
|
|
308
309
|
* Resize the image to the given dimensions. This method uses the canvas API to perform the resizing.
|
|
309
|
-
* @param {number} width The width of the new image.
|
|
310
|
-
* @param {number} height The height of the new image.
|
|
310
|
+
* @param {number} width The width of the new image. `null` or `-1` will preserve the aspect ratio.
|
|
311
|
+
* @param {number} height The height of the new image. `null` or `-1` will preserve the aspect ratio.
|
|
311
312
|
* @param {Object} options Additional options for resizing.
|
|
312
313
|
* @param {0|1|2|3|4|5|string} [options.resample] The resampling method to use.
|
|
313
314
|
* @returns {Promise<RawImage>} `this` to support chaining.
|
|
@@ -316,9 +317,28 @@ export class RawImage {
|
|
|
316
317
|
resample = 2,
|
|
317
318
|
} = {}) {
|
|
318
319
|
|
|
320
|
+
// Do nothing if the image already has the desired size
|
|
321
|
+
if (this.width === width && this.height === height) {
|
|
322
|
+
return this;
|
|
323
|
+
}
|
|
324
|
+
|
|
319
325
|
// Ensure resample method is a string
|
|
320
326
|
let resampleMethod = RESAMPLING_MAPPING[resample] ?? resample;
|
|
321
327
|
|
|
328
|
+
// Calculate width / height to maintain aspect ratio, in the event that
|
|
329
|
+
// the user passed a null value in.
|
|
330
|
+
// This allows users to pass in something like `resize(320, null)` to
|
|
331
|
+
// resize to 320 width, but maintain aspect ratio.
|
|
332
|
+
const nullish_width = isNullishDimension(width);
|
|
333
|
+
const nullish_height = isNullishDimension(height);
|
|
334
|
+
if (nullish_width && nullish_height) {
|
|
335
|
+
return this;
|
|
336
|
+
} else if (nullish_width) {
|
|
337
|
+
width = (height / this.height) * this.width;
|
|
338
|
+
} else if (nullish_height) {
|
|
339
|
+
height = (width / this.width) * this.height;
|
|
340
|
+
}
|
|
341
|
+
|
|
322
342
|
if (BROWSER_ENV) {
|
|
323
343
|
// TODO use `resample` in browser environment
|
|
324
344
|
|
|
@@ -355,7 +375,7 @@ export class RawImage {
|
|
|
355
375
|
case 'nearest':
|
|
356
376
|
case 'bilinear':
|
|
357
377
|
case 'bicubic':
|
|
358
|
-
// Perform resizing using affine transform.
|
|
378
|
+
// Perform resizing using affine transform.
|
|
359
379
|
// This matches how the python Pillow library does it.
|
|
360
380
|
img = img.affine([width / this.width, 0, 0, height / this.height], {
|
|
361
381
|
interpolator: resampleMethod
|
|
@@ -368,7 +388,7 @@ export class RawImage {
|
|
|
368
388
|
img = img.resize({
|
|
369
389
|
width, height,
|
|
370
390
|
fit: 'fill',
|
|
371
|
-
kernel: 'lanczos3', // PIL Lanczos uses a kernel size of 3
|
|
391
|
+
kernel: 'lanczos3', // PIL Lanczos uses a kernel size of 3
|
|
372
392
|
});
|
|
373
393
|
break;
|
|
374
394
|
|
|
@@ -408,13 +428,14 @@ export class RawImage {
|
|
|
408
428
|
// Draw image to context, padding in the process
|
|
409
429
|
ctx.drawImage(canvas,
|
|
410
430
|
0, 0, this.width, this.height,
|
|
411
|
-
left, top,
|
|
431
|
+
left, top, this.width, this.height
|
|
412
432
|
);
|
|
413
433
|
|
|
414
434
|
// Create image from the padded data
|
|
415
435
|
const paddedImage = new RawImage(
|
|
416
436
|
ctx.getImageData(0, 0, newWidth, newHeight).data,
|
|
417
|
-
newWidth, newHeight, 4
|
|
437
|
+
newWidth, newHeight, 4
|
|
438
|
+
);
|
|
418
439
|
|
|
419
440
|
// Convert back so that image has the same number of channels as before
|
|
420
441
|
return paddedImage.convert(numChannels);
|
|
@@ -447,7 +468,7 @@ export class RawImage {
|
|
|
447
468
|
// Create canvas object for this image
|
|
448
469
|
const canvas = this.toCanvas();
|
|
449
470
|
|
|
450
|
-
// Create a new canvas of the desired size. This is needed since if the
|
|
471
|
+
// Create a new canvas of the desired size. This is needed since if the
|
|
451
472
|
// image is too small, we need to pad it with black pixels.
|
|
452
473
|
const ctx = createCanvasFunction(crop_width, crop_height).getContext('2d');
|
|
453
474
|
|
|
@@ -495,7 +516,7 @@ export class RawImage {
|
|
|
495
516
|
// Create canvas object for this image
|
|
496
517
|
const canvas = this.toCanvas();
|
|
497
518
|
|
|
498
|
-
// Create a new canvas of the desired size. This is needed since if the
|
|
519
|
+
// Create a new canvas of the desired size. This is needed since if the
|
|
499
520
|
// image is too small, we need to pad it with black pixels.
|
|
500
521
|
const ctx = createCanvasFunction(crop_width, crop_height).getContext('2d');
|
|
501
522
|
|
|
@@ -637,6 +658,36 @@ export class RawImage {
|
|
|
637
658
|
return clonedCanvas;
|
|
638
659
|
}
|
|
639
660
|
|
|
661
|
+
/**
|
|
662
|
+
* Split this image into individual bands. This method returns an array of individual image bands from an image.
|
|
663
|
+
* For example, splitting an "RGB" image creates three new images each containing a copy of one of the original bands (red, green, blue).
|
|
664
|
+
*
|
|
665
|
+
* Inspired by PIL's `Image.split()` [function](https://pillow.readthedocs.io/en/latest/reference/Image.html#PIL.Image.Image.split).
|
|
666
|
+
* @returns {RawImage[]} An array containing bands.
|
|
667
|
+
*/
|
|
668
|
+
split() {
|
|
669
|
+
const { data, width, height, channels } = this;
|
|
670
|
+
|
|
671
|
+
/** @type {typeof Uint8Array | typeof Uint8ClampedArray} */
|
|
672
|
+
const data_type = /** @type {any} */(data.constructor);
|
|
673
|
+
const per_channel_length = data.length / channels;
|
|
674
|
+
|
|
675
|
+
// Pre-allocate buffers for each channel
|
|
676
|
+
const split_data = Array.from(
|
|
677
|
+
{ length: channels },
|
|
678
|
+
() => new data_type(per_channel_length),
|
|
679
|
+
);
|
|
680
|
+
|
|
681
|
+
// Write pixel data
|
|
682
|
+
for (let i = 0; i < per_channel_length; ++i) {
|
|
683
|
+
const data_offset = channels * i;
|
|
684
|
+
for (let j = 0; j < channels; ++j) {
|
|
685
|
+
split_data[j][i] = data[data_offset + j];
|
|
686
|
+
}
|
|
687
|
+
}
|
|
688
|
+
return split_data.map((data) => new RawImage(data, width, height, 1));
|
|
689
|
+
}
|
|
690
|
+
|
|
640
691
|
/**
|
|
641
692
|
* Helper method to update the image data.
|
|
642
693
|
* @param {Uint8ClampedArray} data The new image data.
|
|
@@ -742,4 +793,4 @@ export class RawImage {
|
|
|
742
793
|
}
|
|
743
794
|
});
|
|
744
795
|
}
|
|
745
|
-
}
|
|
796
|
+
}
|
package/src/utils/tensor.js
CHANGED
|
@@ -340,10 +340,43 @@ export class Tensor {
|
|
|
340
340
|
return this;
|
|
341
341
|
}
|
|
342
342
|
|
|
343
|
+
/**
|
|
344
|
+
* Creates a deep copy of the current Tensor.
|
|
345
|
+
* @returns {Tensor} A new Tensor with the same type, data, and dimensions as the original.
|
|
346
|
+
*/
|
|
343
347
|
clone() {
|
|
344
348
|
return new Tensor(this.type, this.data.slice(), this.dims.slice());
|
|
345
349
|
}
|
|
346
350
|
|
|
351
|
+
/**
|
|
352
|
+
* Performs a slice operation on the Tensor along specified dimensions.
|
|
353
|
+
*
|
|
354
|
+
* Consider a Tensor that has a dimension of [4, 7]:
|
|
355
|
+
* ```
|
|
356
|
+
* [ 1, 2, 3, 4, 5, 6, 7]
|
|
357
|
+
* [ 8, 9, 10, 11, 12, 13, 14]
|
|
358
|
+
* [15, 16, 17, 18, 19, 20, 21]
|
|
359
|
+
* [22, 23, 24, 25, 26, 27, 28]
|
|
360
|
+
* ```
|
|
361
|
+
* We can slice against the two dims of row and column, for instance in this
|
|
362
|
+
* case we can start at the second element, and return to the second last,
|
|
363
|
+
* like this:
|
|
364
|
+
* ```
|
|
365
|
+
* tensor.slice([1, -1], [1, -1]);
|
|
366
|
+
* ```
|
|
367
|
+
* which would return:
|
|
368
|
+
* ```
|
|
369
|
+
* [ 9, 10, 11, 12, 13 ]
|
|
370
|
+
* [ 16, 17, 18, 19, 20 ]
|
|
371
|
+
* ```
|
|
372
|
+
*
|
|
373
|
+
* @param {...(number|number[]|null)} slices The slice specifications for each dimension.
|
|
374
|
+
* - If a number is given, then a single element is selected.
|
|
375
|
+
* - If an array of two numbers is given, then a range of elements [start, end (exclusive)] is selected.
|
|
376
|
+
* - If null is given, then the entire dimension is selected.
|
|
377
|
+
* @returns {Tensor} A new Tensor containing the selected elements.
|
|
378
|
+
* @throws {Error} If the slice input is invalid.
|
|
379
|
+
*/
|
|
347
380
|
slice(...slices) {
|
|
348
381
|
// This allows for slicing with ranges and numbers
|
|
349
382
|
const newTensorDims = [];
|
|
@@ -413,7 +446,6 @@ export class Tensor {
|
|
|
413
446
|
data[i] = this_data[originalIndex];
|
|
414
447
|
}
|
|
415
448
|
return new Tensor(this.type, data, newTensorDims);
|
|
416
|
-
|
|
417
449
|
}
|
|
418
450
|
|
|
419
451
|
/**
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Helper function to validate audio inputs.
|
|
3
|
+
* @param {any} audio The audio data.
|
|
4
|
+
* @param {string} feature_extractor The name of the feature extractor.
|
|
5
|
+
* @private
|
|
6
|
+
*/
|
|
7
|
+
export function validate_audio_inputs(audio: any, feature_extractor: string): void;
|
|
8
|
+
declare const FeatureExtractor_base: new () => {
|
|
9
|
+
(...args: any[]): any;
|
|
10
|
+
_call(...args: any[]): any;
|
|
11
|
+
};
|
|
12
|
+
/**
|
|
13
|
+
* Base class for feature extractors.
|
|
14
|
+
*/
|
|
15
|
+
export class FeatureExtractor extends FeatureExtractor_base {
|
|
16
|
+
/**
|
|
17
|
+
* Instantiate one of the processor classes of the library from a pretrained model.
|
|
18
|
+
*
|
|
19
|
+
* The processor class to instantiate is selected based on the `image_processor_type` (or `feature_extractor_type`; legacy)
|
|
20
|
+
* property of the config object (either passed as an argument or loaded from `pretrained_model_name_or_path` if possible)
|
|
21
|
+
*
|
|
22
|
+
* @param {string} pretrained_model_name_or_path The name or path of the pretrained model. Can be either:
|
|
23
|
+
* - A string, the *model id* of a pretrained processor hosted inside a model repo on huggingface.co.
|
|
24
|
+
* Valid model ids can be located at the root-level, like `bert-base-uncased`, or namespaced under a
|
|
25
|
+
* user or organization name, like `dbmdz/bert-base-german-cased`.
|
|
26
|
+
* - A path to a *directory* containing processor files, e.g., `./my_model_directory/`.
|
|
27
|
+
* @param {import('../utils/hub.js').PretrainedOptions} options Additional options for loading the processor.
|
|
28
|
+
*
|
|
29
|
+
* @returns {Promise<FeatureExtractor>} A new instance of the Processor class.
|
|
30
|
+
*/
|
|
31
|
+
static from_pretrained(pretrained_model_name_or_path: string, options: import('../utils/hub.js').PretrainedOptions): Promise<FeatureExtractor>;
|
|
32
|
+
/**
|
|
33
|
+
* Constructs a new FeatureExtractor instance.
|
|
34
|
+
*
|
|
35
|
+
* @param {Object} config The configuration for the feature extractor.
|
|
36
|
+
*/
|
|
37
|
+
constructor(config: any);
|
|
38
|
+
config: any;
|
|
39
|
+
}
|
|
40
|
+
export {};
|
|
41
|
+
//# sourceMappingURL=feature_extraction_utils.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"feature_extraction_utils.d.ts","sourceRoot":"","sources":["../../src/base/feature_extraction_utils.js"],"names":[],"mappings":"AAwCA;;;;;GAKG;AACH,6CAJW,GAAG,qBACH,MAAM,QAUhB;;;;;AAjDD;;GAEG;AACH;IAWI;;;;;;;;;;;;;;OAcG;IACH,sDATW,MAAM,WAKN,OAAO,iBAAiB,EAAE,iBAAiB,GAEzC,QAAQ,gBAAgB,CAAC,CAKrC;IA5BD;;;;OAIG;IACH,yBAGC;IADG,YAAoB;CAsB3B"}
|