npm - @huggingface/transformers - Versions diffs - 3.0.0-alpha.17 → 3.0.0-alpha.18 - Mend

@huggingface/transformers 3.0.0-alpha.17 → 3.0.0-alpha.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

package/README.md +2 -2
package/dist/ort-wasm-simd-threaded.jsep.wasm +0 -0
package/dist/transformers.cjs +12 -7
package/dist/transformers.cjs.map +1 -1
package/dist/transformers.js +419 -414
package/dist/transformers.js.map +1 -1
package/dist/transformers.min.cjs +8 -8
package/dist/transformers.min.cjs.map +1 -1
package/dist/transformers.min.js +37 -37
package/dist/transformers.min.js.map +1 -1
package/dist/transformers.min.mjs +2 -2
package/dist/transformers.min.mjs.map +1 -1
package/dist/transformers.mjs +12 -7
package/dist/transformers.mjs.map +1 -1
package/package.json +2 -2
package/src/env.js +1 -1
package/src/tokenizers.js +11 -6
package/types/tokenizers.d.ts.map +1 -1

package/README.md CHANGED Viewed

@@ -101,7 +101,7 @@ npm i @huggingface/transformers
 Alternatively, you can use it in vanilla JS, without any bundler, by using a CDN or static hosting. For example, using [ES Modules](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Modules), you can import the library with:
 ```html
 <script type="module">
-    import { pipeline } from 'https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.0.0-alpha.17';
+    import { pipeline } from 'https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.0.0-alpha.18';
 </script>
 ```
@@ -134,7 +134,7 @@ Check out the Transformers.js [template](https://huggingface.co/new-space?templa
-By default, Transformers.js uses [hosted pretrained models](https://huggingface.co/models?library=transformers.js) and [precompiled WASM binaries](https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.0.0-alpha.17/dist/), which should work out-of-the-box. You can customize this as follows:
+By default, Transformers.js uses [hosted pretrained models](https://huggingface.co/models?library=transformers.js) and [precompiled WASM binaries](https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.0.0-alpha.18/dist/), which should work out-of-the-box. You can customize this as follows:
 ### Settings

package/dist/ort-wasm-simd-threaded.jsep.wasm CHANGED Viewed

Binary file

package/dist/transformers.cjs CHANGED Viewed

@@ -4449,7 +4449,7 @@ __webpack_require__.r(__webpack_exports__);
-const VERSION = '3.0.0-alpha.17';
+const VERSION = '3.0.0-alpha.18';
 // Check if various APIs are available (depends on environment)
 const IS_BROWSER_ENV = typeof self !== 'undefined';
@@ -21114,7 +21114,7 @@ class BPE extends TokenizerModel {
      * Create a BPE instance.
      * @param {Object} config The configuration object for BPE.
      * @param {Object} config.vocab A mapping of tokens to ids.
-     * @param {string[]} config.merges An array of BPE merges as strings.
+     * @param {string[]|[string, string][]} config.merges An array of BPE merges as strings.
      * @param {string} config.unk_token The unknown token used for out of vocabulary words.
      * @param {string} config.end_of_word_suffix The suffix to place at the end of each word.
      * @param {string} [config.continuing_subword_suffix] The suffix to insert between words.
@@ -21124,8 +21124,6 @@ class BPE extends TokenizerModel {
     constructor(config) {
         super(config);
-        this.BPE_SPLIT_TOKEN = ' ';
         /** @type {Map<string, number>} */
         this.tokens_to_ids = objectToMap(config.vocab);
@@ -21137,8 +21135,15 @@ class BPE extends TokenizerModel {
             this.vocab[value] = key;
         }
-        this.bpe_ranks = new Map(config.merges.map((x, i) => [x, i]));
-        this.merges = config.merges.map(x => x.split(this.BPE_SPLIT_TOKEN));
+        // Tokenizers >= 0.20.0 serializes BPE merges as a [string, string][] instead of a string[],
+        // which resolves the ambiguity for merges containing spaces.
+        const use_new_merge_format = Array.isArray(config.merges[0]);
+        /** @type {[string, string][]} */
+        this.merges = use_new_merge_format
+            ? /** @type {[string, string][]} */(config.merges)
+            : (/** @type {string[]} */(config.merges)).map(x => /** @type {[string, string]} */(x.split(' ', 2)));
+        this.bpe_ranks = new Map(this.merges.map((x, i) => [JSON.stringify(x), i]));
         this.end_of_word_suffix = config.end_of_word_suffix;
@@ -21298,7 +21303,7 @@ class BPE extends TokenizerModel {
         // `score` is a measure of the merge priority: lower means higher priority
         // We use the BPE rank as a measure of priority (i.e., the local of the merge in the merges list)
         // We also add a fractional component to the score to break ties (with the earlier character having higher priority)
-        const rank = this.bpe_ranks.get(node.token + this.BPE_SPLIT_TOKEN + node.next.token);
+        const rank = this.bpe_ranks.get(JSON.stringify([node.token, node.next.token]));
         if (rank !== undefined) {
             node.score = rank + node.bias;
             queue.push(node);