npm - @huggingface/transformers - Versions diffs - 3.0.0-alpha.2 → 3.0.0-alpha.20 - Mend

@huggingface/transformers 3.0.0-alpha.2 → 3.0.0-alpha.20

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (62) hide show

package/README.md +19 -9
package/dist/ort-wasm-simd-threaded.jsep.wasm +0 -0
package/dist/transformers.cjs +2402 -2039
package/dist/transformers.cjs.map +1 -1
package/dist/transformers.js +3423 -2999
package/dist/transformers.js.map +1 -1
package/dist/transformers.min.cjs +37 -43
package/dist/transformers.min.cjs.map +1 -1
package/dist/transformers.min.js +39 -40
package/dist/transformers.min.js.map +1 -1
package/dist/transformers.min.mjs +63 -70
package/dist/transformers.min.mjs.map +1 -1
package/dist/transformers.mjs +2452 -2063
package/dist/transformers.mjs.map +1 -1
package/package.json +23 -13
package/src/backends/onnx.js +98 -36
package/src/configs.js +18 -4
package/src/env.js +9 -9
package/src/generation/logits_process.js +40 -37
package/src/generation/streamers.js +3 -3
package/src/models.js +238 -74
package/src/ops/registry.js +14 -3
package/src/pipelines.js +5 -4
package/src/processors.js +390 -351
package/src/tokenizers.js +139 -174
package/src/utils/core.js +12 -0
package/src/utils/data-structures.js +13 -11
package/src/utils/devices.js +15 -4
package/src/utils/dtypes.js +1 -3
package/src/utils/hub.js +18 -17
package/src/utils/maths.js +14 -5
package/src/utils/tensor.js +23 -0
package/types/backends/onnx.d.ts +6 -5
package/types/backends/onnx.d.ts.map +1 -1
package/types/configs.d.ts +29 -3
package/types/configs.d.ts.map +1 -1
package/types/env.d.ts +6 -2
package/types/env.d.ts.map +1 -1
package/types/generation/logits_process.d.ts.map +1 -1
package/types/models.d.ts +108 -2
package/types/models.d.ts.map +1 -1
package/types/ops/registry.d.ts +6 -6
package/types/ops/registry.d.ts.map +1 -1
package/types/pipelines.d.ts.map +1 -1
package/types/processors.d.ts +55 -51
package/types/processors.d.ts.map +1 -1
package/types/tokenizers.d.ts +23 -32
package/types/tokenizers.d.ts.map +1 -1
package/types/utils/core.d.ts +7 -0
package/types/utils/core.d.ts.map +1 -1
package/types/utils/data-structures.d.ts +6 -6
package/types/utils/data-structures.d.ts.map +1 -1
package/types/utils/devices.d.ts +11 -1
package/types/utils/devices.d.ts.map +1 -1
package/types/utils/dtypes.d.ts +0 -3
package/types/utils/dtypes.d.ts.map +1 -1
package/types/utils/hub.d.ts +2 -41
package/types/utils/hub.d.ts.map +1 -1
package/types/utils/maths.d.ts +2 -2
package/types/utils/maths.d.ts.map +1 -1
package/types/utils/tensor.d.ts +13 -1
package/types/utils/tensor.d.ts.map +1 -1

package/package.json CHANGED Viewed

@@ -1,16 +1,31 @@
 {
   "name": "@huggingface/transformers",
-  "version": "3.0.0-alpha.2",
+  "version": "3.0.0-alpha.20",
   "description": "State-of-the-art Machine Learning for the web. Run 🤗 Transformers directly in your browser, with no need for a server!",
   "main": "./src/transformers.js",
   "types": "./types/transformers.d.ts",
   "type": "module",
   "exports": {
     "node": {
-      "import": "./dist/transformers.mjs",
-      "require": "./dist/transformers.cjs"
+      "import": {
+        "types": "./types/transformers.d.ts",
+        "default": "./dist/transformers.mjs"
+      },
+      "require": {
+        "types": "./types/transformers.d.ts",
+        "default": "./dist/transformers.cjs"
+      }
     },
-    "default": "./dist/transformers.js"
+    "default": {
+      "types": "./types/transformers.d.ts",
+      "default": "./dist/transformers.js"
+    }
+  },
+  "imports": {
+    "#onnxruntime-webgpu": {
+      "node": "onnxruntime-web",
+      "default": "onnxruntime-web/webgpu"
+    }
   },
   "scripts": {
     "format": "prettier --write .",
@@ -18,8 +33,7 @@
     "typegen": "tsc ./src/transformers.js --allowJs --declaration --emitDeclarationOnly --declarationMap --outDir types",
     "dev": "webpack serve --no-client-overlay",
     "build": "webpack && npm run typegen",
-    "generate-tests": "python -m tests.generate_tests",
-    "test": "node --experimental-vm-modules node_modules/jest/bin/jest.js --verbose --maxConcurrency 1",
+    "test": "node --experimental-vm-modules node_modules/jest/bin/jest.js --verbose",
     "readme": "python ./docs/scripts/build_readme.py",
     "docs-api": "node ./docs/scripts/generate.js",
     "docs-preview": "doc-builder preview transformers.js ./docs/source/ --not_python_module",
@@ -48,9 +62,9 @@
   "homepage": "https://github.com/xenova/transformers.js#readme",
   "dependencies": {
     "@huggingface/jinja": "^0.3.0",
-    "onnxruntime-node": "1.18.0",
-    "onnxruntime-web": "1.19.0-dev.20240804-ee2fe87e2d",
-    "sharp": "^0.33.2"
+    "onnxruntime-node": "1.19.2",
+    "onnxruntime-web": "1.20.0-dev.20240928-1bda91fc57",
+    "sharp": "^0.33.5"
   },
   "devDependencies": {
     "@types/jest": "^29.5.1",
@@ -66,10 +80,6 @@
     "webpack-cli": "^5.0.2",
     "webpack-dev-server": "^4.13.3"
   },
-  "overrides": {
-    "semver": "^7.6.3",
-    "protobufjs": "^7.2.6"
-  },
   "files": [
     "src",
     "dist",

package/src/backends/onnx.js CHANGED Viewed

@@ -21,27 +21,89 @@ import { env, apis } from '../env.js';
 // NOTE: Import order matters here. We need to import `onnxruntime-node` before `onnxruntime-web`.
 // In either case, we select the default export if it exists, otherwise we use the named export.
 import * as ONNX_NODE from 'onnxruntime-node';
-import * as ONNX_WEB from 'onnxruntime-web/webgpu';
+// Use subpath-imports to ensure Node.js and browser interoperability.
+// See package.json and https://nodejs.org/api/packages.html#subpath-imports
+// for more information.
+// @ts-ignore
+import * as ONNX_WEB from '#onnxruntime-webgpu';
 export { Tensor } from 'onnxruntime-common';
-/** @type {import('../utils/devices.js').DeviceType[]} */
-const supportedExecutionProviders = [];
+/**
+ * @typedef {import('onnxruntime-common').InferenceSession.ExecutionProviderConfig} ONNXExecutionProviders
+ */
+/** @type {Record<import("../utils/devices.js").DeviceType, ONNXExecutionProviders>} */
+const DEVICE_TO_EXECUTION_PROVIDER_MAPPING = Object.freeze({
+    auto: null, // Auto-detect based on device and environment
+    gpu: null, // Auto-detect GPU
+    cpu: 'cpu', // CPU
+    wasm: 'wasm', // WebAssembly
+    webgpu: 'webgpu', // WebGPU
+    cuda: 'cuda', // CUDA
+    dml: 'dml', // DirectML
+    webnn: { name: 'webnn', deviceType: 'cpu' }, // WebNN (default)
+    'webnn-npu': { name: 'webnn', deviceType: 'npu' }, // WebNN NPU
+    'webnn-gpu': { name: 'webnn', deviceType: 'gpu' }, // WebNN GPU
+    'webnn-cpu': { name: 'webnn', deviceType: 'cpu' }, // WebNN CPU
+});
+/**
+ * The list of supported devices, sorted by priority/performance.
+ * @type {import("../utils/devices.js").DeviceType[]}
+ */
+const supportedDevices = [];
-/** @type {import('../utils/devices.js').DeviceType[]} */
-let defaultExecutionProviders;
+/** @type {ONNXExecutionProviders[]} */
+let defaultDevices;
 let ONNX;
-if (apis.IS_NODE_ENV) {
+const ORT_SYMBOL = Symbol.for('onnxruntime');
+if (ORT_SYMBOL in globalThis) {
+  // If the JS runtime exposes their own ONNX runtime, use it
+  ONNX = globalThis[ORT_SYMBOL];
+} else if (apis.IS_NODE_ENV) {
     ONNX = ONNX_NODE.default ?? ONNX_NODE;
-    supportedExecutionProviders.push('cpu');
-    defaultExecutionProviders = ['cpu'];
+    // Updated as of ONNX Runtime 1.18.0
+    // The following table lists the supported versions of ONNX Runtime Node.js binding provided with pre-built binaries.
+    // | EPs/Platforms | Windows x64 | Windows arm64 | Linux x64         | Linux arm64 | MacOS x64 | MacOS arm64 |
+    // | ------------- | ----------- | ------------- | ----------------- | ----------- | --------- | ----------- |
+    // | CPU           | ✔️          | ✔️            | ✔️                | ✔️          | ✔️        | ✔️          |
+    // | DirectML      | ✔️          | ✔️            | ❌                | ❌          | ❌        | ❌          |
+    // | CUDA          | ❌          | ❌            | ✔️ (CUDA v11.8)   | ❌          | ❌        | ❌          |
+    switch (process.platform) {
+        case 'win32': // Windows x64 and Windows arm64
+            supportedDevices.push('dml');
+            break;
+        case 'linux': // Linux x64 and Linux arm64
+            if (process.arch === 'x64') {
+                supportedDevices.push('cuda');
+            }
+            break;
+        case 'darwin': // MacOS x64 and MacOS arm64
+            break;
+    }
+    supportedDevices.push('cpu');
+    defaultDevices = ['cpu'];
 } else {
     ONNX = ONNX_WEB;
+    if (apis.IS_WEBNN_AVAILABLE) {
+        // TODO: Only push supported providers (depending on available hardware)
+        supportedDevices.push('webnn-npu', 'webnn-gpu', 'webnn-cpu', 'webnn');
+    }
     if (apis.IS_WEBGPU_AVAILABLE) {
-        supportedExecutionProviders.push('webgpu');
+        supportedDevices.push('webgpu');
     }
-    supportedExecutionProviders.push('wasm');
-    defaultExecutionProviders = ['wasm'];
+    supportedDevices.push('wasm');
+    defaultDevices = ['wasm'];
 }
 // @ts-ignore
@@ -49,19 +111,28 @@ const InferenceSession = ONNX.InferenceSession;
 /**
  * Map a device to the execution providers to use for the given device.
- * @param {import("../utils/devices.js").DeviceType} [device=null] (Optional) The device to run the inference on.
- * @returns {import("../utils/devices.js").DeviceType[]} The execution providers to use for the given device.
+ * @param {import("../utils/devices.js").DeviceType|"auto"|null} [device=null] (Optional) The device to run the inference on.
+ * @returns {ONNXExecutionProviders[]} The execution providers to use for the given device.
  */
-export function deviceToExecutionProviders(device) {
-    // TODO: Use mapping from device to execution providers for overloaded devices (e.g., 'gpu' or 'cpu').
-    let executionProviders = defaultExecutionProviders;
-    if (device) { // User has specified a device
-        if (!supportedExecutionProviders.includes(device)) {
-            throw new Error(`Unsupported device: "${device}". Should be one of: ${supportedExecutionProviders.join(', ')}.`)
-        }
-        executionProviders = [device];
+export function deviceToExecutionProviders(device = null) {
+    // Use the default execution providers if the user hasn't specified anything
+    if (!device) return defaultDevices;
+    // Handle overloaded cases
+    switch (device) {
+        case "auto":
+            return supportedDevices;
+        case "gpu":
+            return supportedDevices.filter(x =>
+                ["webgpu", "cuda", "dml", "webnn-gpu"].includes(x),
+            );
     }
-    return executionProviders;
+    if (supportedDevices.includes(device)) {
+        return [DEVICE_TO_EXECUTION_PROVIDER_MAPPING[device] ?? device];
+    }
+    throw new Error(`Unsupported device: "${device}". Should be one of: ${supportedDevices.join(', ')}.`)
 }
@@ -76,7 +147,7 @@ let wasmInitPromise = null;
 /**
  * Create an ONNX inference session.
  * @param {Uint8Array} buffer The ONNX model buffer.
- * @param {Object} session_options ONNX inference session options.
+ * @param {import('onnxruntime-common').InferenceSession.SessionOptions} session_options ONNX inference session options.
  * @returns {Promise<import('onnxruntime-common').InferenceSession>} The ONNX inference session.
  */
 export async function createInferenceSession(buffer, session_options) {
@@ -100,6 +171,7 @@ export function isONNXTensor(x) {
     return x instanceof ONNX.Tensor;
 }
+/** @type {import('onnxruntime-common').Env} */
 // @ts-ignore
 const ONNX_ENV = ONNX?.env;
 if (ONNX_ENV?.wasm) {
@@ -114,24 +186,14 @@ if (ONNX_ENV?.wasm) {
     // TODO: Add support for loading WASM files from cached buffer when we upgrade to onnxruntime-web@1.19.0
     // https://github.com/microsoft/onnxruntime/pull/21534
-    // Proxy the WASM backend to prevent the UI from freezing
-    // NOTE: This is only needed when running in a non-worker browser environment.
-    ONNX_ENV.wasm.proxy = !apis.IS_WEBWORKER_ENV;
+    // Users may wish to proxy the WASM backend to prevent the UI from freezing,
+    // However, this is not necessary when using WebGPU, so we default to false.
+    ONNX_ENV.wasm.proxy = false;
     // https://developer.mozilla.org/en-US/docs/Web/API/crossOriginIsolated
     if (typeof crossOriginIsolated === 'undefined' || !crossOriginIsolated) {
         ONNX_ENV.wasm.numThreads = 1;
     }
-    // Running in a browser-environment
-    // TODO: Check if 1.17.1 fixes this issue.
-    // SIMD for WebAssembly does not operate correctly in some recent versions of iOS (16.4.x).
-    // As a temporary fix, we disable it for now.
-    // For more information, see: https://github.com/microsoft/onnxruntime/issues/15644
-    const isIOS = typeof navigator !== 'undefined' && /iP(hone|od|ad).+16_4.+AppleWebKit/.test(navigator.userAgent);
-    if (isIOS) {
-        ONNX_ENV.wasm.simd = false;
-    }
 }
 if (ONNX_ENV?.webgpu) {

package/src/configs.js CHANGED Viewed

@@ -73,6 +73,7 @@ function getNormalizedConfig(config) {
         // Decoder-only models
         case 'gpt2':
         case 'gptj':
+        case 'jais':
         case 'codegen':
         case 'gpt_bigcode':
             mapping['num_heads'] = 'n_head';
@@ -295,16 +296,23 @@ export function getKeyValueShapes(config, {
 export class PretrainedConfig {
     // NOTE: Typo in original
+    /** @type {string|null} */
+    model_type = null;
+    /** @type {boolean} */
+    is_encoder_decoder = false;
+    /** @type {number} */
     max_position_embeddings;
+    /** @type {TransformersJSConfig} */
+    'transformers.js_config';
     /**
      * Create a new PreTrainedTokenizer instance.
      * @param {Object} configJSON The JSON of the config.
      */
     constructor(configJSON) {
-        this.model_type = null;
-        this.is_encoder_decoder = false;
         Object.assign(this, configJSON);
         this.normalized_config = getNormalizedConfig(this);
     }
@@ -356,5 +364,11 @@ export class AutoConfig {
 /**
  * Transformers.js-specific configuration, possibly present in config.json under the key `transformers.js_config`.
  * @typedef {Object} TransformersJSConfig
- * @property {import('./transformers.js').DataType} [kv_cache_dtype]
+ * @property {import('./utils/tensor.js').DataType} [kv_cache_dtype] The data type of the key-value cache.
+ * @property {Record<string, number>} [free_dimension_overrides] Override the free dimensions of the model.
+ * See https://onnxruntime.ai/docs/tutorials/web/env-flags-and-session-options.html#freedimensionoverrides
+ * for more information.
+ * @property {import('./utils/devices.js').DeviceType} [device] The default device to use for the model.
+ * @property {import('./utils/dtypes.js').DataType} [dtype] The default data type to use for the model.
+ * @property {boolean|Record<string, boolean>} [use_external_data_format=false] Whether to load the model using the external data format (used for models >= 2GB in size).
  */

package/src/env.js CHANGED Viewed

@@ -26,13 +26,14 @@ import fs from 'fs';
 import path from 'path';
 import url from 'url';
-const VERSION = '3.0.0-alpha.2';
+const VERSION = '3.0.0-alpha.20';
 // Check if various APIs are available (depends on environment)
 const IS_BROWSER_ENV = typeof self !== 'undefined';
 const IS_WEBWORKER_ENV = IS_BROWSER_ENV && self.constructor.name === 'DedicatedWorkerGlobalScope';
 const IS_WEB_CACHE_AVAILABLE = IS_BROWSER_ENV && 'caches' in self;
 const IS_WEBGPU_AVAILABLE = typeof navigator !== 'undefined' && 'gpu' in navigator;
+const IS_WEBNN_AVAILABLE = typeof navigator !== 'undefined' && 'ml' in navigator;
 const IS_PROCESS_AVAILABLE = typeof process !== 'undefined';
 const IS_NODE_ENV = IS_PROCESS_AVAILABLE && process?.release?.name === 'node';
@@ -55,6 +56,9 @@ export const apis = Object.freeze({
     /** Whether the WebGPU API is available */
     IS_WEBGPU_AVAILABLE,
+    /** Whether the WebNN API is available */
+    IS_WEBNN_AVAILABLE,
     /** Whether the Node.js process API is available */
     IS_PROCESS_AVAILABLE,
@@ -69,26 +73,26 @@ export const apis = Object.freeze({
 });
 const RUNNING_LOCALLY = IS_FS_AVAILABLE && IS_PATH_AVAILABLE;
-const __dirname = RUNNING_LOCALLY
+const dirname__ = RUNNING_LOCALLY
     ? path.dirname(path.dirname(url.fileURLToPath(import.meta.url)))
     : './';
 // Only used for environments with access to file system
 const DEFAULT_CACHE_DIR = RUNNING_LOCALLY
-    ? path.join(__dirname, '/.cache/')
+    ? path.join(dirname__, '/.cache/')
     : null;
 // Set local model path, based on available APIs
 const DEFAULT_LOCAL_MODEL_PATH = '/models/';
 const localModelPath = RUNNING_LOCALLY
-    ? path.join(__dirname, DEFAULT_LOCAL_MODEL_PATH)
+    ? path.join(dirname__, DEFAULT_LOCAL_MODEL_PATH)
     : DEFAULT_LOCAL_MODEL_PATH;
 /**
  * Global variable given visible to users to control execution. This provides users a simple way to configure Transformers.js.
  * @typedef {Object} TransformersEnvironment
  * @property {string} version This version of Transformers.js.
- * @property {Object} backends Expose environment variables of different backends,
+ * @property {{onnx: Partial<import('onnxruntime-common').Env>}} backends Expose environment variables of different backends,
  * allowing users to set these variables if they want to.
  * @property {boolean} allowRemoteModels Whether to allow loading of remote files, defaults to `true`.
  * If set to `false`, it will have the same effect as setting `local_files_only=true` when loading pipelines, models, tokenizers, processors, etc.
@@ -115,12 +119,8 @@ export const env = {
     backends: {
         // onnxruntime-web/onnxruntime-node
         onnx: {},
-        // TensorFlow.js
-        tfjs: {},
     },
     /////////////////// Model settings ///////////////////
     allowRemoteModels: true,
     remoteHost: 'https://huggingface.co/',

package/src/generation/logits_process.js CHANGED Viewed

@@ -156,9 +156,9 @@ export class ForcedBOSTokenLogitsProcessor extends LogitsProcessor {
     _call(input_ids, logits) {
         for (let i = 0; i < input_ids.length; ++i) {
             if (input_ids[i].length === 1) {
-                const batch_logits = logits[i];
-                batch_logits.data.fill(-Infinity);
-                batch_logits.data[this.bos_token_id] = 0;
+                const batch_logits_data = /** @type {Float32Array} */(logits[i].data);
+                batch_logits_data.fill(-Infinity);
+                batch_logits_data[this.bos_token_id] = 0;
             }
         }
         return logits;
@@ -189,11 +189,10 @@ export class ForcedEOSTokenLogitsProcessor extends LogitsProcessor {
     _call(input_ids, logits) {
         for (let i = 0; i < input_ids.length; ++i) {
             if (input_ids[i].length === this.max_length - 1) {
-                const batch_logits = logits[i];
-                batch_logits.data.fill(-Infinity);
+                const batch_logits_data = /** @type {Float32Array} */(logits[i].data);
+                batch_logits_data.fill(-Infinity);
                 for (const eos_token of this.eos_token_id) {
-                    batch_logits.data[eos_token] = 0;
+                    batch_logits_data[eos_token] = 0;
                 }
             }
         }
@@ -227,9 +226,9 @@ export class SuppressTokensAtBeginLogitsProcessor extends LogitsProcessor {
     _call(input_ids, logits) {
         for (let i = 0; i < input_ids.length; ++i) {
             if (input_ids[i].length === this.begin_index) {
-                const batch_logits = logits[i];
+                const batch_logits_data = /** @type {Float32Array} */(logits[i].data);
                 for (const token_id of this.begin_suppress_tokens) {
-                    batch_logits.data[token_id] = -Infinity;
+                    batch_logits_data[token_id] = -Infinity;
                 }
             }
         }
@@ -271,15 +270,14 @@ export class WhisperTimeStampLogitsProcessor extends LogitsProcessor {
      */
     _call(input_ids, logits) {
         for (let i = 0; i < input_ids.length; ++i) {
-            const batch_logits = logits[i];
-            const logitsData = /** @type {Float32Array} */(batch_logits.data);
+            const batch_logits_data = /** @type {Float32Array} */(logits[i].data);
             // suppress <|notimestamps|> which is handled by without_timestamps
-            logitsData[this.no_timestamps_token_id] = -Infinity;
+            batch_logits_data[this.no_timestamps_token_id] = -Infinity;
             if (input_ids[i].length === this.begin_index - 1) {
-                logitsData.fill(-Infinity);
-                logitsData[this.timestamp_begin] = 0;
+                batch_logits_data.fill(-Infinity);
+                batch_logits_data[this.timestamp_begin] = 0;
                 continue;
             }
@@ -290,25 +288,25 @@ export class WhisperTimeStampLogitsProcessor extends LogitsProcessor {
             if (last_was_timestamp) {
                 if (penultimate_was_timestamp) { // has to be non-timestamp
-                    logitsData.subarray(this.timestamp_begin).fill(-Infinity);
+                    batch_logits_data.subarray(this.timestamp_begin).fill(-Infinity);
                 } else { // cannot be normal text tokens
-                    logitsData.subarray(0, this.eos_token_id).fill(-Infinity);
+                    batch_logits_data.subarray(0, this.eos_token_id).fill(-Infinity);
                 }
             }
             // apply the `max_initial_timestamp` option
             if (input_ids[i].length === this.begin_index && this.max_initial_timestamp_index !== null) {
                 const last_allowed = this.timestamp_begin + this.max_initial_timestamp_index;
-                logitsData.subarray(last_allowed + 1).fill(-Infinity);
+                batch_logits_data.subarray(last_allowed + 1).fill(-Infinity);
             }
             // if sum of probability over timestamps is above any other token, sample timestamp
-            const logprobs = log_softmax(logitsData);
+            const logprobs = log_softmax(batch_logits_data);
             const timestamp_logprob = Math.log(logprobs.subarray(this.timestamp_begin).map(Math.exp).reduce((a, b) => a + b));
             const max_text_token_logprob = max(logprobs.subarray(0, this.timestamp_begin))[0];
             if (timestamp_logprob > max_text_token_logprob) {
-                logitsData.subarray(0, this.timestamp_begin).fill(-Infinity);
+                batch_logits_data.subarray(0, this.timestamp_begin).fill(-Infinity);
             }
         }
@@ -397,10 +395,10 @@ export class NoRepeatNGramLogitsProcessor extends LogitsProcessor {
      */
     _call(input_ids, logits) {
         for (let i = 0; i < input_ids.length; ++i) {
-            const batch_logits = logits[i];
+            const batch_logits_data = /** @type {Float32Array} */(logits[i].data);
             const bannedTokens = this.calcBannedNgramTokens(input_ids[i]);
             for (const token of bannedTokens) {
-                batch_logits.data[token] = -Infinity;
+                batch_logits_data[token] = -Infinity;
             }
         }
         return logits;
@@ -432,13 +430,13 @@ export class RepetitionPenaltyLogitsProcessor extends LogitsProcessor {
         // many times in the output will be penalised more.
         for (let i = 0; i < input_ids.length; ++i) {
-            const batch_logits = logits[i];
+            const batch_logits_data = /** @type {Float32Array} */(logits[i].data);
             for (const input_id of input_ids[i]) {
-                if (batch_logits.data[input_id] < 0) {
-                    batch_logits.data[input_id] *= this.penalty;
+                const token = Number(input_id);
+                if (batch_logits_data[token] < 0) {
+                    batch_logits_data[token] *= this.penalty;
                 } else {
-                    batch_logits.data[input_id] /= this.penalty;
+                    batch_logits_data[token] /= this.penalty;
                 }
             }
         }
@@ -471,9 +469,10 @@ export class MinLengthLogitsProcessor extends LogitsProcessor {
     _call(input_ids, logits) {
         for (let i = 0; i < input_ids.length; ++i) {
             if (input_ids[i].length < this.min_length) {
-                const batch_logits = logits[i];
+                const batch_logits_data = /** @type {Float32Array} */(logits[i].data);
                 for (const eos_token of this.eos_token_id) {
-                    batch_logits.data[eos_token] = -Infinity;
+                    batch_logits_data[eos_token] = -Infinity;
                 }
             }
         }
@@ -509,9 +508,10 @@ export class MinNewTokensLengthLogitsProcessor extends LogitsProcessor {
         for (let i = 0; i < input_ids.length; ++i) {
             const new_tokens_length = input_ids[i].length - this.prompt_length_to_skip;
             if (new_tokens_length < this.min_new_tokens) {
-                const batch_logits = logits[i];
+                const batch_logits_data = /** @type {Float32Array} */(logits[i].data);
                 for (const eos_token of this.eos_token_id) {
-                    batch_logits[eos_token] = -Infinity;
+                    batch_logits_data[eos_token] = -Infinity;
                 }
             }
         }
@@ -539,23 +539,26 @@ export class NoBadWordsLogitsProcessor extends LogitsProcessor {
      */
     _call(input_ids, logits) {
         for (let i = 0; i < input_ids.length; ++i) {
-            const batch_logits = logits[i];
+            const batch_logits_data = /** @type {Float32Array} */(logits[i].data);
+            const ids = input_ids[i];
             for (const bad_word_ids of this.bad_words_ids) {
                 // Whether to modify the logits of the last token in the bad word id sequence
                 let mark = true;
                 // For each bad word in the list, if the current sequence of input ids ends with this sequence (excluding the last),
                 // then we set the logits of the last bad word id to -Infinity.
-                for (let i = 1; i <= bad_word_ids.length - 1 && bad_word_ids.length < input_ids[i].length; ++i) {
+                for (let j = 1; j <= bad_word_ids.length - 1 && bad_word_ids.length < ids.length; ++j) {
-                    if (bad_word_ids.at(-i - 1) !== Number(input_ids[i].at(-i))) {
+                    // NOTE: We use != instead of !== to compare bigint and number
+                    // @ts-ignore
+                    if (bad_word_ids.at(-j - 1) != ids.at(-j)) {
                         // We have found a mismatch
                         mark = false;
                         break;
                     }
                 }
                 if (mark) {
-                    batch_logits[bad_word_ids.at(-1)] = -Infinity;
+                    batch_logits_data[bad_word_ids.at(-1)] = -Infinity;
                 }
             }
         }
@@ -650,9 +653,9 @@ export class TemperatureLogitsWarper extends LogitsWarper {
      * @returns {Object} The processed logits.
      */
     _call(input_ids, logits) {
-        const logitsData = /** @type {Float32Array} */(logits.data);
-        for (let i = 0; i < logitsData.length; ++i) {
-            logitsData[i] /= this.temperature;
+        const batch_logits_data = /** @type {Float32Array} */(logits.data);
+        for (let i = 0; i < batch_logits_data.length; ++i) {
+            batch_logits_data[i] /= this.temperature;
         }
         return logits;
     }

package/src/generation/streamers.js CHANGED Viewed

@@ -65,14 +65,14 @@ export class TextStreamer extends BaseStreamer {
             throw Error('TextStreamer only supports batch size of 1');
         }
-        const tokens = value[0];
-        this.token_callback_function?.(tokens)
         if (this.skip_prompt && this.next_tokens_are_prompt) {
             this.next_tokens_are_prompt = false;
             return;
         }
+        const tokens = value[0];
+        this.token_callback_function?.(tokens)
         // Add the new token to the cache and decodes the entire thing.
         this.token_cache = mergeArrays(this.token_cache, tokens);
         const text = this.tokenizer.decode(this.token_cache, this.decode_kwargs);