@huggingface/transformers 3.0.0-alpha.17 → 3.0.0-alpha.19
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -2
- package/dist/ort-wasm-simd-threaded.jsep.wasm +0 -0
- package/dist/transformers.cjs +12 -10
- package/dist/transformers.cjs.map +1 -1
- package/dist/transformers.js +419 -417
- package/dist/transformers.js.map +1 -1
- package/dist/transformers.min.cjs +8 -8
- package/dist/transformers.min.cjs.map +1 -1
- package/dist/transformers.min.js +37 -37
- package/dist/transformers.min.js.map +1 -1
- package/dist/transformers.min.mjs +3 -3
- package/dist/transformers.min.mjs.map +1 -1
- package/dist/transformers.mjs +12 -10
- package/dist/transformers.mjs.map +1 -1
- package/package.json +2 -2
- package/src/env.js +1 -1
- package/src/models.js +0 -3
- package/src/tokenizers.js +11 -6
- package/types/models.d.ts.map +1 -1
- package/types/tokenizers.d.ts.map +1 -1
package/README.md
CHANGED
|
@@ -101,7 +101,7 @@ npm i @huggingface/transformers
|
|
|
101
101
|
Alternatively, you can use it in vanilla JS, without any bundler, by using a CDN or static hosting. For example, using [ES Modules](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Modules), you can import the library with:
|
|
102
102
|
```html
|
|
103
103
|
<script type="module">
|
|
104
|
-
import { pipeline } from 'https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.0.0-alpha.
|
|
104
|
+
import { pipeline } from 'https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.0.0-alpha.19';
|
|
105
105
|
</script>
|
|
106
106
|
```
|
|
107
107
|
|
|
@@ -134,7 +134,7 @@ Check out the Transformers.js [template](https://huggingface.co/new-space?templa
|
|
|
134
134
|
|
|
135
135
|
|
|
136
136
|
|
|
137
|
-
By default, Transformers.js uses [hosted pretrained models](https://huggingface.co/models?library=transformers.js) and [precompiled WASM binaries](https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.0.0-alpha.
|
|
137
|
+
By default, Transformers.js uses [hosted pretrained models](https://huggingface.co/models?library=transformers.js) and [precompiled WASM binaries](https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.0.0-alpha.19/dist/), which should work out-of-the-box. You can customize this as follows:
|
|
138
138
|
|
|
139
139
|
### Settings
|
|
140
140
|
|
|
Binary file
|
package/dist/transformers.cjs
CHANGED
|
@@ -4449,7 +4449,7 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
4449
4449
|
|
|
4450
4450
|
|
|
4451
4451
|
|
|
4452
|
-
const VERSION = '3.0.0-alpha.
|
|
4452
|
+
const VERSION = '3.0.0-alpha.19';
|
|
4453
4453
|
|
|
4454
4454
|
// Check if various APIs are available (depends on environment)
|
|
4455
4455
|
const IS_BROWSER_ENV = typeof self !== 'undefined';
|
|
@@ -7005,9 +7005,6 @@ async function getSession(pretrained_model_name_or_path, fileName, options) {
|
|
|
7005
7005
|
/** @type {Record<string, import('onnxruntime-common').Tensor.DataLocation>} */
|
|
7006
7006
|
const preferredOutputLocation = {};
|
|
7007
7007
|
for (const key in shapes) {
|
|
7008
|
-
// TODO: For now, we keep encoder outputs on the CPU
|
|
7009
|
-
// (otherwise, this causes a memory leak or throws an error "Error: previous buffer is not registered")
|
|
7010
|
-
if (key.includes('encoder')) continue;
|
|
7011
7008
|
preferredOutputLocation[key] = 'gpu-buffer';
|
|
7012
7009
|
}
|
|
7013
7010
|
session_options.preferredOutputLocation = preferredOutputLocation;
|
|
@@ -21114,7 +21111,7 @@ class BPE extends TokenizerModel {
|
|
|
21114
21111
|
* Create a BPE instance.
|
|
21115
21112
|
* @param {Object} config The configuration object for BPE.
|
|
21116
21113
|
* @param {Object} config.vocab A mapping of tokens to ids.
|
|
21117
|
-
* @param {string[]} config.merges An array of BPE merges as strings.
|
|
21114
|
+
* @param {string[]|[string, string][]} config.merges An array of BPE merges as strings.
|
|
21118
21115
|
* @param {string} config.unk_token The unknown token used for out of vocabulary words.
|
|
21119
21116
|
* @param {string} config.end_of_word_suffix The suffix to place at the end of each word.
|
|
21120
21117
|
* @param {string} [config.continuing_subword_suffix] The suffix to insert between words.
|
|
@@ -21124,8 +21121,6 @@ class BPE extends TokenizerModel {
|
|
|
21124
21121
|
constructor(config) {
|
|
21125
21122
|
super(config);
|
|
21126
21123
|
|
|
21127
|
-
this.BPE_SPLIT_TOKEN = ' ';
|
|
21128
|
-
|
|
21129
21124
|
/** @type {Map<string, number>} */
|
|
21130
21125
|
this.tokens_to_ids = objectToMap(config.vocab);
|
|
21131
21126
|
|
|
@@ -21137,8 +21132,15 @@ class BPE extends TokenizerModel {
|
|
|
21137
21132
|
this.vocab[value] = key;
|
|
21138
21133
|
}
|
|
21139
21134
|
|
|
21140
|
-
|
|
21141
|
-
|
|
21135
|
+
// Tokenizers >= 0.20.0 serializes BPE merges as a [string, string][] instead of a string[],
|
|
21136
|
+
// which resolves the ambiguity for merges containing spaces.
|
|
21137
|
+
const use_new_merge_format = Array.isArray(config.merges[0]);
|
|
21138
|
+
|
|
21139
|
+
/** @type {[string, string][]} */
|
|
21140
|
+
this.merges = use_new_merge_format
|
|
21141
|
+
? /** @type {[string, string][]} */(config.merges)
|
|
21142
|
+
: (/** @type {string[]} */(config.merges)).map(x => /** @type {[string, string]} */(x.split(' ', 2)));
|
|
21143
|
+
this.bpe_ranks = new Map(this.merges.map((x, i) => [JSON.stringify(x), i]));
|
|
21142
21144
|
|
|
21143
21145
|
this.end_of_word_suffix = config.end_of_word_suffix;
|
|
21144
21146
|
|
|
@@ -21298,7 +21300,7 @@ class BPE extends TokenizerModel {
|
|
|
21298
21300
|
// `score` is a measure of the merge priority: lower means higher priority
|
|
21299
21301
|
// We use the BPE rank as a measure of priority (i.e., the local of the merge in the merges list)
|
|
21300
21302
|
// We also add a fractional component to the score to break ties (with the earlier character having higher priority)
|
|
21301
|
-
const rank = this.bpe_ranks.get(node.token
|
|
21303
|
+
const rank = this.bpe_ranks.get(JSON.stringify([node.token, node.next.token]));
|
|
21302
21304
|
if (rank !== undefined) {
|
|
21303
21305
|
node.score = rank + node.bias;
|
|
21304
21306
|
queue.push(node);
|