@huggingface/transformers 3.0.0-alpha.17 → 3.0.0-alpha.18
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -2
- package/dist/ort-wasm-simd-threaded.jsep.wasm +0 -0
- package/dist/transformers.cjs +12 -7
- package/dist/transformers.cjs.map +1 -1
- package/dist/transformers.js +419 -414
- package/dist/transformers.js.map +1 -1
- package/dist/transformers.min.cjs +8 -8
- package/dist/transformers.min.cjs.map +1 -1
- package/dist/transformers.min.js +37 -37
- package/dist/transformers.min.js.map +1 -1
- package/dist/transformers.min.mjs +2 -2
- package/dist/transformers.min.mjs.map +1 -1
- package/dist/transformers.mjs +12 -7
- package/dist/transformers.mjs.map +1 -1
- package/package.json +2 -2
- package/src/env.js +1 -1
- package/src/tokenizers.js +11 -6
- package/types/tokenizers.d.ts.map +1 -1
package/dist/transformers.mjs
CHANGED
|
@@ -4422,7 +4422,7 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
4422
4422
|
|
|
4423
4423
|
|
|
4424
4424
|
|
|
4425
|
-
const VERSION = '3.0.0-alpha.
|
|
4425
|
+
const VERSION = '3.0.0-alpha.18';
|
|
4426
4426
|
|
|
4427
4427
|
// Check if various APIs are available (depends on environment)
|
|
4428
4428
|
const IS_BROWSER_ENV = typeof self !== 'undefined';
|
|
@@ -21075,7 +21075,7 @@ class BPE extends TokenizerModel {
|
|
|
21075
21075
|
* Create a BPE instance.
|
|
21076
21076
|
* @param {Object} config The configuration object for BPE.
|
|
21077
21077
|
* @param {Object} config.vocab A mapping of tokens to ids.
|
|
21078
|
-
* @param {string[]} config.merges An array of BPE merges as strings.
|
|
21078
|
+
* @param {string[]|[string, string][]} config.merges An array of BPE merges as strings.
|
|
21079
21079
|
* @param {string} config.unk_token The unknown token used for out of vocabulary words.
|
|
21080
21080
|
* @param {string} config.end_of_word_suffix The suffix to place at the end of each word.
|
|
21081
21081
|
* @param {string} [config.continuing_subword_suffix] The suffix to insert between words.
|
|
@@ -21085,8 +21085,6 @@ class BPE extends TokenizerModel {
|
|
|
21085
21085
|
constructor(config) {
|
|
21086
21086
|
super(config);
|
|
21087
21087
|
|
|
21088
|
-
this.BPE_SPLIT_TOKEN = ' ';
|
|
21089
|
-
|
|
21090
21088
|
/** @type {Map<string, number>} */
|
|
21091
21089
|
this.tokens_to_ids = objectToMap(config.vocab);
|
|
21092
21090
|
|
|
@@ -21098,8 +21096,15 @@ class BPE extends TokenizerModel {
|
|
|
21098
21096
|
this.vocab[value] = key;
|
|
21099
21097
|
}
|
|
21100
21098
|
|
|
21101
|
-
|
|
21102
|
-
|
|
21099
|
+
// Tokenizers >= 0.20.0 serializes BPE merges as a [string, string][] instead of a string[],
|
|
21100
|
+
// which resolves the ambiguity for merges containing spaces.
|
|
21101
|
+
const use_new_merge_format = Array.isArray(config.merges[0]);
|
|
21102
|
+
|
|
21103
|
+
/** @type {[string, string][]} */
|
|
21104
|
+
this.merges = use_new_merge_format
|
|
21105
|
+
? /** @type {[string, string][]} */(config.merges)
|
|
21106
|
+
: (/** @type {string[]} */(config.merges)).map(x => /** @type {[string, string]} */(x.split(' ', 2)));
|
|
21107
|
+
this.bpe_ranks = new Map(this.merges.map((x, i) => [JSON.stringify(x), i]));
|
|
21103
21108
|
|
|
21104
21109
|
this.end_of_word_suffix = config.end_of_word_suffix;
|
|
21105
21110
|
|
|
@@ -21259,7 +21264,7 @@ class BPE extends TokenizerModel {
|
|
|
21259
21264
|
// `score` is a measure of the merge priority: lower means higher priority
|
|
21260
21265
|
// We use the BPE rank as a measure of priority (i.e., the local of the merge in the merges list)
|
|
21261
21266
|
// We also add a fractional component to the score to break ties (with the earlier character having higher priority)
|
|
21262
|
-
const rank = this.bpe_ranks.get(node.token
|
|
21267
|
+
const rank = this.bpe_ranks.get(JSON.stringify([node.token, node.next.token]));
|
|
21263
21268
|
if (rank !== undefined) {
|
|
21264
21269
|
node.score = rank + node.bias;
|
|
21265
21270
|
queue.push(node);
|