@huggingface/transformers 3.0.0 → 3.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +55 -32
- package/dist/ort-wasm-simd-threaded.jsep.wasm +0 -0
- package/dist/transformers.cjs +83 -30
- package/dist/transformers.cjs.map +1 -1
- package/dist/transformers.js +462 -413
- package/dist/transformers.js.map +1 -1
- package/dist/transformers.min.cjs +6 -6
- package/dist/transformers.min.cjs.map +1 -1
- package/dist/transformers.min.js +39 -39
- package/dist/transformers.min.js.map +1 -1
- package/dist/transformers.min.mjs +9 -9
- package/dist/transformers.min.mjs.map +1 -1
- package/dist/transformers.mjs +91 -31
- package/dist/transformers.mjs.map +1 -1
- package/package.json +5 -5
- package/src/configs.js +2 -0
- package/src/env.js +15 -4
- package/src/models.js +50 -25
- package/src/pipelines.js +0 -1
- package/src/processors.js +2 -0
- package/types/configs.d.ts.map +1 -1
- package/types/env.d.ts.map +1 -1
- package/types/models.d.ts +12 -0
- package/types/models.d.ts.map +1 -1
- package/types/pipelines.d.ts.map +1 -1
- package/types/processors.d.ts +3 -0
- package/types/processors.d.ts.map +1 -1
package/README.md
CHANGED
|
@@ -11,25 +11,19 @@
|
|
|
11
11
|
</p>
|
|
12
12
|
|
|
13
13
|
<p align="center">
|
|
14
|
-
<a href="https://www.npmjs.com/package/@huggingface/transformers">
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
<a href="https://
|
|
18
|
-
|
|
19
|
-
</a>
|
|
20
|
-
<a href="https://www.jsdelivr.com/package/npm/@huggingface/transformers">
|
|
21
|
-
<img alt="jsDelivr Hits" src="https://img.shields.io/jsdelivr/npm/hw/@huggingface/transformers">
|
|
22
|
-
</a>
|
|
23
|
-
<a href="https://github.com/huggingface/transformers.js/blob/main/LICENSE">
|
|
24
|
-
<img alt="License" src="https://img.shields.io/github/license/huggingface/transformers.js?color=blue">
|
|
25
|
-
</a>
|
|
26
|
-
<a href="https://huggingface.co/docs/transformers.js/index">
|
|
27
|
-
<img alt="Documentation" src="https://img.shields.io/website/http/huggingface.co/docs/transformers.js/index.svg?down_color=red&down_message=offline&up_message=online">
|
|
28
|
-
</a>
|
|
14
|
+
<a href="https://www.npmjs.com/package/@huggingface/transformers"><img alt="NPM" src="https://img.shields.io/npm/v/@huggingface/transformers"></a>
|
|
15
|
+
<a href="https://www.npmjs.com/package/@huggingface/transformers"><img alt="NPM Downloads" src="https://img.shields.io/npm/dw/@huggingface/transformers"></a>
|
|
16
|
+
<a href="https://www.jsdelivr.com/package/npm/@huggingface/transformers"><img alt="jsDelivr Hits" src="https://img.shields.io/jsdelivr/npm/hw/@huggingface/transformers"></a>
|
|
17
|
+
<a href="https://github.com/huggingface/transformers.js/blob/main/LICENSE"><img alt="License" src="https://img.shields.io/github/license/huggingface/transformers.js?color=blue"></a>
|
|
18
|
+
<a href="https://huggingface.co/docs/transformers.js/index"><img alt="Documentation" src="https://img.shields.io/website/http/huggingface.co/docs/transformers.js/index.svg?down_color=red&down_message=offline&up_message=online"></a>
|
|
29
19
|
</p>
|
|
30
20
|
|
|
31
21
|
|
|
32
|
-
|
|
22
|
+
<h3 align="center">
|
|
23
|
+
<p>State-of-the-art Machine Learning for the Web</p>
|
|
24
|
+
</h3>
|
|
25
|
+
|
|
26
|
+
Run 🤗 Transformers directly in your browser, with no need for a server!
|
|
33
27
|
|
|
34
28
|
Transformers.js is designed to be functionally equivalent to Hugging Face's [transformers](https://github.com/huggingface/transformers) python library, meaning you can run the same pretrained models using a very similar API. These models support common tasks in different modalities, such as:
|
|
35
29
|
- 📝 **Natural Language Processing**: text classification, named entity recognition, question answering, language modeling, summarization, translation, multiple choice, and text generation.
|
|
@@ -42,6 +36,22 @@ Transformers.js uses [ONNX Runtime](https://onnxruntime.ai/) to run models in th
|
|
|
42
36
|
For more information, check out the full [documentation](https://huggingface.co/docs/transformers.js).
|
|
43
37
|
|
|
44
38
|
|
|
39
|
+
## Installation
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
To install via [NPM](https://www.npmjs.com/package/@huggingface/transformers), run:
|
|
43
|
+
```bash
|
|
44
|
+
npm i @huggingface/transformers
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
Alternatively, you can use it in vanilla JS, without any bundler, by using a CDN or static hosting. For example, using [ES Modules](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Modules), you can import the library with:
|
|
48
|
+
```html
|
|
49
|
+
<script type="module">
|
|
50
|
+
import { pipeline } from 'https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.0.2';
|
|
51
|
+
</script>
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
|
|
45
55
|
## Quick tour
|
|
46
56
|
|
|
47
57
|
|
|
@@ -72,9 +82,9 @@ out = pipe('I love transformers!')
|
|
|
72
82
|
import { pipeline } from '@huggingface/transformers';
|
|
73
83
|
|
|
74
84
|
// Allocate a pipeline for sentiment-analysis
|
|
75
|
-
|
|
85
|
+
const pipe = await pipeline('sentiment-analysis');
|
|
76
86
|
|
|
77
|
-
|
|
87
|
+
const out = await pipe('I love transformers!');
|
|
78
88
|
// [{'label': 'POSITIVE', 'score': 0.999817686}]
|
|
79
89
|
```
|
|
80
90
|
|
|
@@ -86,29 +96,40 @@ let out = await pipe('I love transformers!');
|
|
|
86
96
|
You can also use a different model by specifying the model id or path as the second argument to the `pipeline` function. For example:
|
|
87
97
|
```javascript
|
|
88
98
|
// Use a different model for sentiment-analysis
|
|
89
|
-
|
|
99
|
+
const pipe = await pipeline('sentiment-analysis', 'Xenova/bert-base-multilingual-uncased-sentiment');
|
|
90
100
|
```
|
|
91
101
|
|
|
102
|
+
By default, when running in the browser, the model will be run on your CPU (via WASM). If you would like
|
|
103
|
+
to run the model on your GPU (via WebGPU), you can do this by setting `device: 'webgpu'`, for example:
|
|
104
|
+
```javascript
|
|
105
|
+
// Run the model on WebGPU
|
|
106
|
+
const pipe = await pipeline('sentiment-analysis', 'Xenova/distilbert-base-uncased-finetuned-sst-2-english', {
|
|
107
|
+
device: 'webgpu',
|
|
108
|
+
});
|
|
109
|
+
```
|
|
92
110
|
|
|
93
|
-
|
|
94
|
-
|
|
111
|
+
For more information, check out the [WebGPU guide](https://huggingface.co/docs/transformers.js/guides/webgpu).
|
|
95
112
|
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
```
|
|
113
|
+
> [!WARNING]
|
|
114
|
+
> The WebGPU API is still experimental in many browsers, so if you run into any issues,
|
|
115
|
+
> please file a [bug report](https://github.com/huggingface/transformers.js/issues/new?title=%5BWebGPU%5D%20Error%20running%20MODEL_ID_GOES_HERE&assignees=&labels=bug,webgpu&projects=&template=1_bug-report.yml).
|
|
100
116
|
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
117
|
+
In resource-constrained environments, such as web browsers, it is advisable to use a quantized version of
|
|
118
|
+
the model to lower bandwidth and optimize performance. This can be achieved by adjusting the `dtype` option,
|
|
119
|
+
which allows you to select the appropriate data type for your model. While the available options may vary
|
|
120
|
+
depending on the specific model, typical choices include `"fp32"` (default for WebGPU), `"fp16"`, `"q8"`
|
|
121
|
+
(default for WASM), and `"q4"`. For more information, check out the [quantization guide](https://huggingface.co/docs/transformers.js/guides/dtypes).
|
|
122
|
+
```javascript
|
|
123
|
+
// Run the model at 4-bit quantization
|
|
124
|
+
const pipe = await pipeline('sentiment-analysis', 'Xenova/distilbert-base-uncased-finetuned-sst-2-english', {
|
|
125
|
+
dtype: 'q4',
|
|
126
|
+
});
|
|
106
127
|
```
|
|
107
128
|
|
|
108
129
|
|
|
109
130
|
## Examples
|
|
110
131
|
|
|
111
|
-
Want to jump straight in? Get started with one of our sample applications/templates
|
|
132
|
+
Want to jump straight in? Get started with one of our sample applications/templates, which can be found [here](https://github.com/huggingface/transformers.js-examples).
|
|
112
133
|
|
|
113
134
|
| Name | Description | Links |
|
|
114
135
|
|-------------------|----------------------------------|-------------------------------|
|
|
@@ -134,7 +155,7 @@ Check out the Transformers.js [template](https://huggingface.co/new-space?templa
|
|
|
134
155
|
|
|
135
156
|
|
|
136
157
|
|
|
137
|
-
By default, Transformers.js uses [hosted pretrained models](https://huggingface.co/models?library=transformers.js) and [precompiled WASM binaries](https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.0.
|
|
158
|
+
By default, Transformers.js uses [hosted pretrained models](https://huggingface.co/models?library=transformers.js) and [precompiled WASM binaries](https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.0.2/dist/), which should work out-of-the-box. You can customize this as follows:
|
|
138
159
|
|
|
139
160
|
### Settings
|
|
140
161
|
|
|
@@ -331,6 +352,7 @@ You can refine your search by selecting the task you're interested in (e.g., [te
|
|
|
331
352
|
1. **[MMS](https://huggingface.co/docs/transformers/model_doc/mms)** (from Facebook) released with the paper [Scaling Speech Technology to 1,000+ Languages](https://arxiv.org/abs/2305.13516) by Vineel Pratap, Andros Tjandra, Bowen Shi, Paden Tomasello, Arun Babu, Sayani Kundu, Ali Elkahky, Zhaoheng Ni, Apoorv Vyas, Maryam Fazel-Zarandi, Alexei Baevski, Yossi Adi, Xiaohui Zhang, Wei-Ning Hsu, Alexis Conneau, Michael Auli.
|
|
332
353
|
1. **[MobileBERT](https://huggingface.co/docs/transformers/model_doc/mobilebert)** (from CMU/Google Brain) released with the paper [MobileBERT: a Compact Task-Agnostic BERT for Resource-Limited Devices](https://arxiv.org/abs/2004.02984) by Zhiqing Sun, Hongkun Yu, Xiaodan Song, Renjie Liu, Yiming Yang, and Denny Zhou.
|
|
333
354
|
1. **MobileCLIP** (from Apple) released with the paper [MobileCLIP: Fast Image-Text Models through Multi-Modal Reinforced Training](https://arxiv.org/abs/2311.17049) by Pavan Kumar Anasosalu Vasu, Hadi Pouransari, Fartash Faghri, Raviteja Vemulapalli, Oncel Tuzel.
|
|
355
|
+
1. **MobileLLM** (from Meta) released with the paper [MobileLLM: Optimizing Sub-billion Parameter Language Models for On-Device Use Cases](https://arxiv.org/abs/2402.14905) by Zechun Liu, Changsheng Zhao, Forrest Iandola, Chen Lai, Yuandong Tian, Igor Fedorov, Yunyang Xiong, Ernie Chang, Yangyang Shi, Raghuraman Krishnamoorthi, Liangzhen Lai, Vikas Chandra.
|
|
334
356
|
1. **[MobileNetV1](https://huggingface.co/docs/transformers/model_doc/mobilenet_v1)** (from Google Inc.) released with the paper [MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications](https://arxiv.org/abs/1704.04861) by Andrew G. Howard, Menglong Zhu, Bo Chen, Dmitry Kalenichenko, Weijun Wang, Tobias Weyand, Marco Andreetto, Hartwig Adam.
|
|
335
357
|
1. **[MobileNetV2](https://huggingface.co/docs/transformers/model_doc/mobilenet_v2)** (from Google Inc.) released with the paper [MobileNetV2: Inverted Residuals and Linear Bottlenecks](https://arxiv.org/abs/1801.04381) by Mark Sandler, Andrew Howard, Menglong Zhu, Andrey Zhmoginov, Liang-Chieh Chen.
|
|
336
358
|
1. **MobileNetV3** (from Google Inc.) released with the paper [Searching for MobileNetV3](https://arxiv.org/abs/1905.02244) by Andrew Howard, Mark Sandler, Grace Chu, Liang-Chieh Chen, Bo Chen, Mingxing Tan, Weijun Wang, Yukun Zhu, Ruoming Pang, Vijay Vasudevan, Quoc V. Le, Hartwig Adam.
|
|
@@ -343,6 +365,7 @@ You can refine your search by selecting the task you're interested in (e.g., [te
|
|
|
343
365
|
1. **[MT5](https://huggingface.co/docs/transformers/model_doc/mt5)** (from Google AI) released with the paper [mT5: A massively multilingual pre-trained text-to-text transformer](https://arxiv.org/abs/2010.11934) by Linting Xue, Noah Constant, Adam Roberts, Mihir Kale, Rami Al-Rfou, Aditya Siddhant, Aditya Barua, Colin Raffel.
|
|
344
366
|
1. **[NLLB](https://huggingface.co/docs/transformers/model_doc/nllb)** (from Meta) released with the paper [No Language Left Behind: Scaling Human-Centered Machine Translation](https://arxiv.org/abs/2207.04672) by the NLLB team.
|
|
345
367
|
1. **[Nougat](https://huggingface.co/docs/transformers/model_doc/nougat)** (from Meta AI) released with the paper [Nougat: Neural Optical Understanding for Academic Documents](https://arxiv.org/abs/2308.13418) by Lukas Blecher, Guillem Cucurull, Thomas Scialom, Robert Stojnic.
|
|
368
|
+
1. **[OLMo](https://huggingface.co/docs/transformers/master/model_doc/olmo)** (from AI2) released with the paper [OLMo: Accelerating the Science of Language Models](https://arxiv.org/abs/2402.00838) by Dirk Groeneveld, Iz Beltagy, Pete Walsh, Akshita Bhagia, Rodney Kinney, Oyvind Tafjord, Ananya Harsh Jha, Hamish Ivison, Ian Magnusson, Yizhong Wang, Shane Arora, David Atkinson, Russell Authur, Khyathi Raghavi Chandu, Arman Cohan, Jennifer Dumas, Yanai Elazar, Yuling Gu, Jack Hessel, Tushar Khot, William Merrill, Jacob Morrison, Niklas Muennighoff, Aakanksha Naik, Crystal Nam, Matthew E. Peters, Valentina Pyatkin, Abhilasha Ravichander, Dustin Schwenk, Saurabh Shah, Will Smith, Emma Strubell, Nishant Subramani, Mitchell Wortsman, Pradeep Dasigi, Nathan Lambert, Kyle Richardson, Luke Zettlemoyer, Jesse Dodge, Kyle Lo, Luca Soldaini, Noah A. Smith, Hannaneh Hajishirzi.
|
|
346
369
|
1. **OpenELM** (from Apple) released with the paper [OpenELM: An Efficient Language Model Family with Open-source Training and Inference Framework](https://arxiv.org/abs/2404.14619) by Sachin Mehta, Mohammad Hossein Sekhavat, Qingqing Cao, Maxwell Horton, Yanzi Jin, Chenfan Sun, Iman Mirzadeh, Mahyar Najibi, Dmitry Belenko, Peter Zatloukal, Mohammad Rastegari.
|
|
347
370
|
1. **[OPT](https://huggingface.co/docs/transformers/master/model_doc/opt)** (from Meta AI) released with the paper [OPT: Open Pre-trained Transformer Language Models](https://arxiv.org/abs/2205.01068) by Susan Zhang, Stephen Roller, Naman Goyal, Mikel Artetxe, Moya Chen, Shuohui Chen et al.
|
|
348
371
|
1. **[OWL-ViT](https://huggingface.co/docs/transformers/model_doc/owlvit)** (from Google AI) released with the paper [Simple Open-Vocabulary Object Detection with Vision Transformers](https://arxiv.org/abs/2205.06230) by Matthias Minderer, Alexey Gritsenko, Austin Stone, Maxim Neumann, Dirk Weissenborn, Alexey Dosovitskiy, Aravindh Mahendran, Anurag Arnab, Mostafa Dehghani, Zhuoran Shen, Xiao Wang, Xiaohua Zhai, Thomas Kipf, and Neil Houlsby.
|
|
Binary file
|
package/dist/transformers.cjs
CHANGED
|
@@ -4131,6 +4131,8 @@ function getNormalizedConfig(config) {
|
|
|
4131
4131
|
mapping['hidden_size'] = 'hidden_size';
|
|
4132
4132
|
break;
|
|
4133
4133
|
case 'llama':
|
|
4134
|
+
case 'olmo':
|
|
4135
|
+
case 'mobilellm':
|
|
4134
4136
|
case 'granite':
|
|
4135
4137
|
case 'cohere':
|
|
4136
4138
|
case 'mistral':
|
|
@@ -4460,7 +4462,7 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
4460
4462
|
|
|
4461
4463
|
|
|
4462
4464
|
|
|
4463
|
-
const VERSION = '3.0.
|
|
4465
|
+
const VERSION = '3.0.2';
|
|
4464
4466
|
|
|
4465
4467
|
// Check if various APIs are available (depends on environment)
|
|
4466
4468
|
const IS_BROWSER_ENV = typeof self !== 'undefined';
|
|
@@ -4507,9 +4509,20 @@ const apis = Object.freeze({
|
|
|
4507
4509
|
});
|
|
4508
4510
|
|
|
4509
4511
|
const RUNNING_LOCALLY = IS_FS_AVAILABLE && IS_PATH_AVAILABLE;
|
|
4510
|
-
|
|
4511
|
-
|
|
4512
|
-
|
|
4512
|
+
|
|
4513
|
+
let dirname__ = './';
|
|
4514
|
+
if (RUNNING_LOCALLY) {
|
|
4515
|
+
// NOTE: We wrap `import.meta` in a call to `Object` to prevent Webpack from trying to bundle it in CommonJS.
|
|
4516
|
+
// Although we get the warning: "Accessing import.meta directly is unsupported (only property access or destructuring is supported)",
|
|
4517
|
+
// it is safe to ignore since the bundled value (`{}`) isn't used for CommonJS environments (we use __dirname instead).
|
|
4518
|
+
const _import_meta_url = Object(({})).url;
|
|
4519
|
+
|
|
4520
|
+
if (_import_meta_url) {
|
|
4521
|
+
dirname__ = path__WEBPACK_IMPORTED_MODULE_1__.dirname(path__WEBPACK_IMPORTED_MODULE_1__.dirname(url__WEBPACK_IMPORTED_MODULE_2__.fileURLToPath(_import_meta_url))) // ESM
|
|
4522
|
+
} else if (typeof __dirname !== 'undefined') {
|
|
4523
|
+
dirname__ = path__WEBPACK_IMPORTED_MODULE_1__.dirname(__dirname) // CommonJS
|
|
4524
|
+
}
|
|
4525
|
+
}
|
|
4513
4526
|
|
|
4514
4527
|
// Only used for environments with access to file system
|
|
4515
4528
|
const DEFAULT_CACHE_DIR = RUNNING_LOCALLY
|
|
@@ -6616,6 +6629,9 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
6616
6629
|
/* harmony export */ MobileBertForSequenceClassification: () => (/* binding */ MobileBertForSequenceClassification),
|
|
6617
6630
|
/* harmony export */ MobileBertModel: () => (/* binding */ MobileBertModel),
|
|
6618
6631
|
/* harmony export */ MobileBertPreTrainedModel: () => (/* binding */ MobileBertPreTrainedModel),
|
|
6632
|
+
/* harmony export */ MobileLLMForCausalLM: () => (/* binding */ MobileLLMForCausalLM),
|
|
6633
|
+
/* harmony export */ MobileLLMModel: () => (/* binding */ MobileLLMModel),
|
|
6634
|
+
/* harmony export */ MobileLLMPreTrainedModel: () => (/* binding */ MobileLLMPreTrainedModel),
|
|
6619
6635
|
/* harmony export */ MobileNetV1ForImageClassification: () => (/* binding */ MobileNetV1ForImageClassification),
|
|
6620
6636
|
/* harmony export */ MobileNetV1Model: () => (/* binding */ MobileNetV1Model),
|
|
6621
6637
|
/* harmony export */ MobileNetV1PreTrainedModel: () => (/* binding */ MobileNetV1PreTrainedModel),
|
|
@@ -6648,6 +6664,9 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
6648
6664
|
/* harmony export */ OPTForCausalLM: () => (/* binding */ OPTForCausalLM),
|
|
6649
6665
|
/* harmony export */ OPTModel: () => (/* binding */ OPTModel),
|
|
6650
6666
|
/* harmony export */ OPTPreTrainedModel: () => (/* binding */ OPTPreTrainedModel),
|
|
6667
|
+
/* harmony export */ OlmoForCausalLM: () => (/* binding */ OlmoForCausalLM),
|
|
6668
|
+
/* harmony export */ OlmoModel: () => (/* binding */ OlmoModel),
|
|
6669
|
+
/* harmony export */ OlmoPreTrainedModel: () => (/* binding */ OlmoPreTrainedModel),
|
|
6651
6670
|
/* harmony export */ OpenELMForCausalLM: () => (/* binding */ OpenELMForCausalLM),
|
|
6652
6671
|
/* harmony export */ OpenELMModel: () => (/* binding */ OpenELMModel),
|
|
6653
6672
|
/* harmony export */ OpenELMPreTrainedModel: () => (/* binding */ OpenELMPreTrainedModel),
|
|
@@ -7181,7 +7200,7 @@ function replaceTensors(obj) {
|
|
|
7181
7200
|
|
|
7182
7201
|
/**
|
|
7183
7202
|
* Converts an array or Tensor of integers to an int64 Tensor.
|
|
7184
|
-
* @param {
|
|
7203
|
+
* @param {any[]|Tensor} items The input integers to be converted.
|
|
7185
7204
|
* @returns {Tensor} The int64 Tensor with the converted values.
|
|
7186
7205
|
* @throws {Error} If the input array is empty or the input is a batched Tensor and not all sequences have the same length.
|
|
7187
7206
|
* @private
|
|
@@ -8104,35 +8123,37 @@ class PreTrainedModel extends _utils_generic_js__WEBPACK_IMPORTED_MODULE_3__.Cal
|
|
|
8104
8123
|
let { decoder_input_ids, ...model_inputs } = model_kwargs;
|
|
8105
8124
|
|
|
8106
8125
|
// Prepare input ids if the user has not defined `decoder_input_ids` manually.
|
|
8107
|
-
if (!decoder_input_ids) {
|
|
8108
|
-
|
|
8109
|
-
|
|
8110
|
-
|
|
8111
|
-
|
|
8112
|
-
|
|
8113
|
-
|
|
8114
|
-
|
|
8115
|
-
|
|
8116
|
-
|
|
8117
|
-
if (decoder_start_token_id
|
|
8118
|
-
|
|
8119
|
-
|
|
8120
|
-
|
|
8126
|
+
if (!(decoder_input_ids instanceof _utils_tensor_js__WEBPACK_IMPORTED_MODULE_9__.Tensor)) {
|
|
8127
|
+
if (!decoder_input_ids) {
|
|
8128
|
+
decoder_start_token_id ??= bos_token_id;
|
|
8129
|
+
|
|
8130
|
+
if (this.config.model_type === 'musicgen') {
|
|
8131
|
+
// Custom logic (TODO: move to Musicgen class)
|
|
8132
|
+
decoder_input_ids = Array.from({
|
|
8133
|
+
length: batch_size * this.config.decoder.num_codebooks
|
|
8134
|
+
}, () => [decoder_start_token_id]);
|
|
8135
|
+
|
|
8136
|
+
} else if (Array.isArray(decoder_start_token_id)) {
|
|
8137
|
+
if (decoder_start_token_id.length !== batch_size) {
|
|
8138
|
+
throw new Error(
|
|
8139
|
+
`\`decoder_start_token_id\` expcted to have length ${batch_size} but got ${decoder_start_token_id.length}`
|
|
8140
|
+
)
|
|
8141
|
+
}
|
|
8142
|
+
decoder_input_ids = decoder_start_token_id;
|
|
8143
|
+
} else {
|
|
8144
|
+
decoder_input_ids = Array.from({
|
|
8145
|
+
length: batch_size,
|
|
8146
|
+
}, () => [decoder_start_token_id]);
|
|
8121
8147
|
}
|
|
8122
|
-
|
|
8123
|
-
|
|
8148
|
+
} else if (!Array.isArray(decoder_input_ids[0])) {
|
|
8149
|
+
// Correct batch size
|
|
8124
8150
|
decoder_input_ids = Array.from({
|
|
8125
8151
|
length: batch_size,
|
|
8126
|
-
}, () =>
|
|
8152
|
+
}, () => decoder_input_ids);
|
|
8127
8153
|
}
|
|
8128
|
-
|
|
8129
|
-
// Correct batch size
|
|
8130
|
-
decoder_input_ids = Array.from({
|
|
8131
|
-
length: batch_size,
|
|
8132
|
-
}, () => decoder_input_ids);
|
|
8154
|
+
decoder_input_ids = toI64Tensor(decoder_input_ids);
|
|
8133
8155
|
}
|
|
8134
8156
|
|
|
8135
|
-
decoder_input_ids = toI64Tensor(decoder_input_ids);
|
|
8136
8157
|
model_kwargs['decoder_attention_mask'] = (0,_utils_tensor_js__WEBPACK_IMPORTED_MODULE_9__.ones_like)(decoder_input_ids);
|
|
8137
8158
|
|
|
8138
8159
|
return { input_ids: decoder_input_ids, model_inputs };
|
|
@@ -9955,8 +9976,11 @@ class WhisperForConditionalGeneration extends WhisperPreTrainedModel {
|
|
|
9955
9976
|
class VisionEncoderDecoderModel extends PreTrainedModel {
|
|
9956
9977
|
main_input_name = 'pixel_values';
|
|
9957
9978
|
forward_params = [
|
|
9979
|
+
// Encoder inputs
|
|
9958
9980
|
'pixel_values',
|
|
9959
|
-
|
|
9981
|
+
|
|
9982
|
+
// Decoder inpputs
|
|
9983
|
+
'decoder_input_ids',
|
|
9960
9984
|
'encoder_hidden_states',
|
|
9961
9985
|
'past_key_values',
|
|
9962
9986
|
];
|
|
@@ -10575,6 +10599,22 @@ class LlamaForCausalLM extends LlamaPreTrainedModel { }
|
|
|
10575
10599
|
//////////////////////////////////////////////////
|
|
10576
10600
|
|
|
10577
10601
|
|
|
10602
|
+
//////////////////////////////////////////////////
|
|
10603
|
+
// MobileLLM models
|
|
10604
|
+
class MobileLLMPreTrainedModel extends PreTrainedModel { }
|
|
10605
|
+
class MobileLLMModel extends MobileLLMPreTrainedModel { }
|
|
10606
|
+
class MobileLLMForCausalLM extends MobileLLMPreTrainedModel { }
|
|
10607
|
+
//////////////////////////////////////////////////
|
|
10608
|
+
|
|
10609
|
+
|
|
10610
|
+
//////////////////////////////////////////////////
|
|
10611
|
+
// OLMo models
|
|
10612
|
+
class OlmoPreTrainedModel extends PreTrainedModel { }
|
|
10613
|
+
class OlmoModel extends OlmoPreTrainedModel { }
|
|
10614
|
+
class OlmoForCausalLM extends OlmoPreTrainedModel { }
|
|
10615
|
+
//////////////////////////////////////////////////
|
|
10616
|
+
|
|
10617
|
+
|
|
10578
10618
|
//////////////////////////////////////////////////
|
|
10579
10619
|
// Granite models
|
|
10580
10620
|
class GranitePreTrainedModel extends PreTrainedModel { }
|
|
@@ -12890,6 +12930,8 @@ const MODEL_MAPPING_NAMES_DECODER_ONLY = new Map([
|
|
|
12890
12930
|
['gpt_neox', ['GPTNeoXModel', GPTNeoXModel]],
|
|
12891
12931
|
['codegen', ['CodeGenModel', CodeGenModel]],
|
|
12892
12932
|
['llama', ['LlamaModel', LlamaModel]],
|
|
12933
|
+
['olmo', ['OlmoModel', OlmoModel]],
|
|
12934
|
+
['mobilellm', ['MobileLLMModel', MobileLLMModel]],
|
|
12893
12935
|
['granite', ['GraniteModel', GraniteModel]],
|
|
12894
12936
|
['cohere', ['CohereModel', CohereModel]],
|
|
12895
12937
|
['gemma', ['GemmaModel', GemmaModel]],
|
|
@@ -12979,6 +13021,8 @@ const MODEL_FOR_CAUSAL_LM_MAPPING_NAMES = new Map([
|
|
|
12979
13021
|
['gpt_neox', ['GPTNeoXForCausalLM', GPTNeoXForCausalLM]],
|
|
12980
13022
|
['codegen', ['CodeGenForCausalLM', CodeGenForCausalLM]],
|
|
12981
13023
|
['llama', ['LlamaForCausalLM', LlamaForCausalLM]],
|
|
13024
|
+
['olmo', ['OlmoForCausalLM', OlmoForCausalLM]],
|
|
13025
|
+
['mobilellm', ['MobileLLMForCausalLM', MobileLLMForCausalLM]],
|
|
12982
13026
|
['granite', ['GraniteForCausalLM', GraniteForCausalLM]],
|
|
12983
13027
|
['cohere', ['CohereForCausalLM', CohereForCausalLM]],
|
|
12984
13028
|
['gemma', ['GemmaForCausalLM', GemmaForCausalLM]],
|
|
@@ -16572,7 +16616,6 @@ class DocumentQuestionAnsweringPipeline extends (/** @type {new (options: TextIm
|
|
|
16572
16616
|
|
|
16573
16617
|
/** @type {DocumentQuestionAnsweringPipelineCallback} */
|
|
16574
16618
|
async _call(image, question, generate_kwargs = {}) {
|
|
16575
|
-
throw new Error('This pipeline is not yet supported in Transformers.js v3.'); // TODO: Remove when implemented
|
|
16576
16619
|
|
|
16577
16620
|
// NOTE: For now, we only support a batch size of 1
|
|
16578
16621
|
|
|
@@ -17373,6 +17416,7 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
17373
17416
|
/* harmony export */ DeiTFeatureExtractor: () => (/* binding */ DeiTFeatureExtractor),
|
|
17374
17417
|
/* harmony export */ DetrFeatureExtractor: () => (/* binding */ DetrFeatureExtractor),
|
|
17375
17418
|
/* harmony export */ DonutFeatureExtractor: () => (/* binding */ DonutFeatureExtractor),
|
|
17419
|
+
/* harmony export */ DonutImageProcessor: () => (/* binding */ DonutImageProcessor),
|
|
17376
17420
|
/* harmony export */ EfficientNetImageProcessor: () => (/* binding */ EfficientNetImageProcessor),
|
|
17377
17421
|
/* harmony export */ FeatureExtractor: () => (/* binding */ FeatureExtractor),
|
|
17378
17422
|
/* harmony export */ Florence2Processor: () => (/* binding */ Florence2Processor),
|
|
@@ -18615,6 +18659,7 @@ class DonutFeatureExtractor extends ImageFeatureExtractor {
|
|
|
18615
18659
|
});
|
|
18616
18660
|
}
|
|
18617
18661
|
}
|
|
18662
|
+
class DonutImageProcessor extends DonutFeatureExtractor { } // NOTE extends DonutFeatureExtractor
|
|
18618
18663
|
class NougatImageProcessor extends DonutFeatureExtractor { } // NOTE extends DonutFeatureExtractor
|
|
18619
18664
|
|
|
18620
18665
|
/**
|
|
@@ -19975,6 +20020,7 @@ class AutoProcessor {
|
|
|
19975
20020
|
MaskFormerFeatureExtractor,
|
|
19976
20021
|
YolosFeatureExtractor,
|
|
19977
20022
|
DonutFeatureExtractor,
|
|
20023
|
+
DonutImageProcessor,
|
|
19978
20024
|
NougatImageProcessor,
|
|
19979
20025
|
EfficientNetImageProcessor,
|
|
19980
20026
|
|
|
@@ -30304,6 +30350,7 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
30304
30350
|
/* harmony export */ DistilBertTokenizer: () => (/* reexport safe */ _tokenizers_js__WEBPACK_IMPORTED_MODULE_3__.DistilBertTokenizer),
|
|
30305
30351
|
/* harmony export */ DocumentQuestionAnsweringPipeline: () => (/* reexport safe */ _pipelines_js__WEBPACK_IMPORTED_MODULE_1__.DocumentQuestionAnsweringPipeline),
|
|
30306
30352
|
/* harmony export */ DonutFeatureExtractor: () => (/* reexport safe */ _processors_js__WEBPACK_IMPORTED_MODULE_4__.DonutFeatureExtractor),
|
|
30353
|
+
/* harmony export */ DonutImageProcessor: () => (/* reexport safe */ _processors_js__WEBPACK_IMPORTED_MODULE_4__.DonutImageProcessor),
|
|
30307
30354
|
/* harmony export */ DonutSwinModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.DonutSwinModel),
|
|
30308
30355
|
/* harmony export */ DonutSwinPreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.DonutSwinPreTrainedModel),
|
|
30309
30356
|
/* harmony export */ EfficientNetForImageClassification: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.EfficientNetForImageClassification),
|
|
@@ -30440,6 +30487,9 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
30440
30487
|
/* harmony export */ MobileBertModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.MobileBertModel),
|
|
30441
30488
|
/* harmony export */ MobileBertPreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.MobileBertPreTrainedModel),
|
|
30442
30489
|
/* harmony export */ MobileBertTokenizer: () => (/* reexport safe */ _tokenizers_js__WEBPACK_IMPORTED_MODULE_3__.MobileBertTokenizer),
|
|
30490
|
+
/* harmony export */ MobileLLMForCausalLM: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.MobileLLMForCausalLM),
|
|
30491
|
+
/* harmony export */ MobileLLMModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.MobileLLMModel),
|
|
30492
|
+
/* harmony export */ MobileLLMPreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.MobileLLMPreTrainedModel),
|
|
30443
30493
|
/* harmony export */ MobileNetV1FeatureExtractor: () => (/* reexport safe */ _processors_js__WEBPACK_IMPORTED_MODULE_4__.MobileNetV1FeatureExtractor),
|
|
30444
30494
|
/* harmony export */ MobileNetV1ForImageClassification: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.MobileNetV1ForImageClassification),
|
|
30445
30495
|
/* harmony export */ MobileNetV1Model: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.MobileNetV1Model),
|
|
@@ -30482,6 +30532,9 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
30482
30532
|
/* harmony export */ OPTModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.OPTModel),
|
|
30483
30533
|
/* harmony export */ OPTPreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.OPTPreTrainedModel),
|
|
30484
30534
|
/* harmony export */ ObjectDetectionPipeline: () => (/* reexport safe */ _pipelines_js__WEBPACK_IMPORTED_MODULE_1__.ObjectDetectionPipeline),
|
|
30535
|
+
/* harmony export */ OlmoForCausalLM: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.OlmoForCausalLM),
|
|
30536
|
+
/* harmony export */ OlmoModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.OlmoModel),
|
|
30537
|
+
/* harmony export */ OlmoPreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.OlmoPreTrainedModel),
|
|
30485
30538
|
/* harmony export */ OpenELMForCausalLM: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.OpenELMForCausalLM),
|
|
30486
30539
|
/* harmony export */ OpenELMModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.OpenELMModel),
|
|
30487
30540
|
/* harmony export */ OpenELMPreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.OpenELMPreTrainedModel),
|