@huggingface/transformers 3.0.0-alpha.17 → 3.0.0-alpha.18
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -2
- package/dist/ort-wasm-simd-threaded.jsep.wasm +0 -0
- package/dist/transformers.cjs +12 -7
- package/dist/transformers.cjs.map +1 -1
- package/dist/transformers.js +419 -414
- package/dist/transformers.js.map +1 -1
- package/dist/transformers.min.cjs +8 -8
- package/dist/transformers.min.cjs.map +1 -1
- package/dist/transformers.min.js +37 -37
- package/dist/transformers.min.js.map +1 -1
- package/dist/transformers.min.mjs +2 -2
- package/dist/transformers.min.mjs.map +1 -1
- package/dist/transformers.mjs +12 -7
- package/dist/transformers.mjs.map +1 -1
- package/package.json +2 -2
- package/src/env.js +1 -1
- package/src/tokenizers.js +11 -6
- package/types/tokenizers.d.ts.map +1 -1
package/README.md
CHANGED
|
@@ -101,7 +101,7 @@ npm i @huggingface/transformers
|
|
|
101
101
|
Alternatively, you can use it in vanilla JS, without any bundler, by using a CDN or static hosting. For example, using [ES Modules](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Modules), you can import the library with:
|
|
102
102
|
```html
|
|
103
103
|
<script type="module">
|
|
104
|
-
import { pipeline } from 'https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.0.0-alpha.
|
|
104
|
+
import { pipeline } from 'https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.0.0-alpha.18';
|
|
105
105
|
</script>
|
|
106
106
|
```
|
|
107
107
|
|
|
@@ -134,7 +134,7 @@ Check out the Transformers.js [template](https://huggingface.co/new-space?templa
|
|
|
134
134
|
|
|
135
135
|
|
|
136
136
|
|
|
137
|
-
By default, Transformers.js uses [hosted pretrained models](https://huggingface.co/models?library=transformers.js) and [precompiled WASM binaries](https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.0.0-alpha.
|
|
137
|
+
By default, Transformers.js uses [hosted pretrained models](https://huggingface.co/models?library=transformers.js) and [precompiled WASM binaries](https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.0.0-alpha.18/dist/), which should work out-of-the-box. You can customize this as follows:
|
|
138
138
|
|
|
139
139
|
### Settings
|
|
140
140
|
|
|
Binary file
|
package/dist/transformers.cjs
CHANGED
|
@@ -4449,7 +4449,7 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
4449
4449
|
|
|
4450
4450
|
|
|
4451
4451
|
|
|
4452
|
-
const VERSION = '3.0.0-alpha.
|
|
4452
|
+
const VERSION = '3.0.0-alpha.18';
|
|
4453
4453
|
|
|
4454
4454
|
// Check if various APIs are available (depends on environment)
|
|
4455
4455
|
const IS_BROWSER_ENV = typeof self !== 'undefined';
|
|
@@ -21114,7 +21114,7 @@ class BPE extends TokenizerModel {
|
|
|
21114
21114
|
* Create a BPE instance.
|
|
21115
21115
|
* @param {Object} config The configuration object for BPE.
|
|
21116
21116
|
* @param {Object} config.vocab A mapping of tokens to ids.
|
|
21117
|
-
* @param {string[]} config.merges An array of BPE merges as strings.
|
|
21117
|
+
* @param {string[]|[string, string][]} config.merges An array of BPE merges as strings.
|
|
21118
21118
|
* @param {string} config.unk_token The unknown token used for out of vocabulary words.
|
|
21119
21119
|
* @param {string} config.end_of_word_suffix The suffix to place at the end of each word.
|
|
21120
21120
|
* @param {string} [config.continuing_subword_suffix] The suffix to insert between words.
|
|
@@ -21124,8 +21124,6 @@ class BPE extends TokenizerModel {
|
|
|
21124
21124
|
constructor(config) {
|
|
21125
21125
|
super(config);
|
|
21126
21126
|
|
|
21127
|
-
this.BPE_SPLIT_TOKEN = ' ';
|
|
21128
|
-
|
|
21129
21127
|
/** @type {Map<string, number>} */
|
|
21130
21128
|
this.tokens_to_ids = objectToMap(config.vocab);
|
|
21131
21129
|
|
|
@@ -21137,8 +21135,15 @@ class BPE extends TokenizerModel {
|
|
|
21137
21135
|
this.vocab[value] = key;
|
|
21138
21136
|
}
|
|
21139
21137
|
|
|
21140
|
-
|
|
21141
|
-
|
|
21138
|
+
// Tokenizers >= 0.20.0 serializes BPE merges as a [string, string][] instead of a string[],
|
|
21139
|
+
// which resolves the ambiguity for merges containing spaces.
|
|
21140
|
+
const use_new_merge_format = Array.isArray(config.merges[0]);
|
|
21141
|
+
|
|
21142
|
+
/** @type {[string, string][]} */
|
|
21143
|
+
this.merges = use_new_merge_format
|
|
21144
|
+
? /** @type {[string, string][]} */(config.merges)
|
|
21145
|
+
: (/** @type {string[]} */(config.merges)).map(x => /** @type {[string, string]} */(x.split(' ', 2)));
|
|
21146
|
+
this.bpe_ranks = new Map(this.merges.map((x, i) => [JSON.stringify(x), i]));
|
|
21142
21147
|
|
|
21143
21148
|
this.end_of_word_suffix = config.end_of_word_suffix;
|
|
21144
21149
|
|
|
@@ -21298,7 +21303,7 @@ class BPE extends TokenizerModel {
|
|
|
21298
21303
|
// `score` is a measure of the merge priority: lower means higher priority
|
|
21299
21304
|
// We use the BPE rank as a measure of priority (i.e., the local of the merge in the merges list)
|
|
21300
21305
|
// We also add a fractional component to the score to break ties (with the earlier character having higher priority)
|
|
21301
|
-
const rank = this.bpe_ranks.get(node.token
|
|
21306
|
+
const rank = this.bpe_ranks.get(JSON.stringify([node.token, node.next.token]));
|
|
21302
21307
|
if (rank !== undefined) {
|
|
21303
21308
|
node.score = rank + node.bias;
|
|
21304
21309
|
queue.push(node);
|