npm - @huggingface/transformers - Versions diffs - 4.0.0-next.1 → 4.0.0-next.10 - Mend

@huggingface/transformers 4.0.0-next.1 → 4.0.0-next.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (364) hide show

package/src/configs.js CHANGED Viewed

@@ -68,12 +68,20 @@ function getNormalizedConfig(config) {
         case 'florence2':
         case 'llava_onevision':
         case 'idefics3':
+        case 'granite_speech':
         case 'ultravox':
         case 'voxtral':
+        case 'voxtral_realtime':
         case 'smolvlm':
         case 'gemma3n':
+        case 'lfm2_vl':
         case 'chatterbox':
+        case 'lighton_ocr':
+        case 'glm_ocr':
         case 'mistral3':
+        case 'qwen2_5_vl':
+        case 'qwen3_vl':
+        case 'qwen3_vl_moe':
             // @ts-expect-error TS2339
             init_normalized_config = getNormalizedConfig(config.text_config);
             break;
@@ -115,6 +123,7 @@ function getNormalizedConfig(config) {
         case 'nanochat':
         case 'apertus':
         case 'arcee':
+        case 'afmoe':
         case 'lfm2':
         case 'lfm2_moe':
         case 'smollm3':
@@ -125,10 +134,19 @@ function getNormalizedConfig(config) {
         case 'granite':
         case 'granitemoehybrid':
         case 'cohere':
+        case 'cohere2':
         case 'mistral':
+        case 'voxtral_realtime_text':
+        case 'voxtral_realtime_encoder':
         case 'starcoder2':
         case 'qwen2':
+        case 'qwen2_moe':
         case 'qwen2_vl':
+        case 'qwen2_vl_text':
+        case 'qwen2_5_vl_text':
+        case 'qwen3_moe':
+        case 'qwen3_vl_text':
+        case 'qwen3_vl_moe_text':
         case 'phi':
         case 'phi3':
         case 'phi3_v':
@@ -140,6 +158,8 @@ function getNormalizedConfig(config) {
             mapping['dim_kv'] = 'head_dim';
             break;
         case 'qwen3':
+        case 'solar_open':
+        case 'glm_ocr_text':
         case 'gemma':
         case 'gemma2':
         case 'vaultgemma':
@@ -150,6 +170,7 @@ function getNormalizedConfig(config) {
         case 'ernie4_5':
         case 'hunyuan_v1_dense':
         case 'falcon_h1':
+        case 'nemotron_h':
         case 'ministral':
         case 'ministral3':
             mapping['num_heads'] = 'num_key_value_heads';
@@ -184,6 +205,9 @@ function getNormalizedConfig(config) {
             mapping['num_attention_heads'] = 'num_attention_heads';
             break;
         case 'youtu':
+        case 'deepseek_v3':
+        case 'glm_moe_dsa':
+        case 'mistral4':
             mapping['num_heads'] = 'num_key_value_heads';
             mapping['num_layers'] = 'num_hidden_layers';
             mapping['dim_kv'] = 'qk_head_dim';
@@ -242,6 +266,24 @@ function getNormalizedConfig(config) {
             mapping['num_encoder_heads'] = 'encoder_num_key_value_heads';
             mapping['encoder_hidden_size'] = mapping['decoder_hidden_size'] = 'hidden_size';
             break;
+        case 'cohere_asr':
+            mapping['num_decoder_layers'] = 'num_hidden_layers';
+            mapping['num_decoder_heads'] = 'num_key_value_heads';
+            mapping['decoder_hidden_size'] = 'hidden_size';
+            mapping['decoder_dim_kv'] = 'head_dim';
+            const {
+                num_hidden_layers: num_encoder_layers,
+                num_attention_heads: num_encoder_heads,
+                hidden_size: encoder_hidden_size,
+            } = /** @type {any} */ (config).encoder_config;
+            init_normalized_config = {
+                num_encoder_layers,
+                num_encoder_heads,
+                encoder_hidden_size,
+                // @ts-expect-error TS2339
+                encoder_dim_kv: config.head_dim,
+            };
+            break;
         case 'vision-encoder-decoder':
             // @ts-expect-error TS2339
             const decoderConfig = getNormalizedConfig(config.decoder);
@@ -283,17 +325,20 @@ function getNormalizedConfig(config) {
  * @returns {Record<string, number[]>}
  */
 export function getCacheShapes(config, options) {
+    if (!(config instanceof PretrainedConfig)) {
+        config = new PretrainedConfig(config);
+    }
+    const batch_size = options?.batch_size ?? 1;
     if (['lfm2', 'lfm2_moe'].includes(config.model_type)) {
         const pkv_prefix = options?.prefix ?? 'past_key_values';
         const conv_prefix = pkv_prefix === 'present' ? 'present' : 'past';
-        // Custom caching mechanism for LFM2
         /** @type {Record<string, number[]>} */
         const cache_values = {};
-        // @ts-expect-error TS2339
-        const { layer_types, num_attention_heads, num_key_value_heads, hidden_size, conv_L_cache } = config;
+        const { layer_types, num_attention_heads, num_key_value_heads, hidden_size, conv_L_cache } =
+            /** @type {any} */ (config);
         const head_dim = hidden_size / num_attention_heads;
-        const batch_size = options?.batch_size ?? 1;
         for (let i = 0; i < layer_types.length; ++i) {
             if (layer_types[i] === 'full_attention') {
                 for (const kv of ['key', 'value']) {
@@ -306,44 +351,99 @@ export function getCacheShapes(config, options) {
             }
         }
         return cache_values;
-    } else if (['granitemoehybrid', 'falcon_h1'].includes(config.model_type)) {
+    } else if (['granitemoehybrid', 'falcon_h1', 'nemotron_h'].includes(config.model_type)) {
         const pkv_prefix = options?.prefix ?? 'past_key_values';
         const conv_prefix = pkv_prefix === 'present' ? 'present' : 'past';
+        const c = /** @type {any} */ (config);
+        // Normalize config field names across model types
+        const layer_types = c.layer_types ?? c.layers_block_type;
+        const num_layers = c.num_hidden_layers ?? layer_types?.length;
+        const num_key_value_heads = c.num_key_value_heads;
+        const head_dim = c.head_dim ?? c.hidden_size / c.num_attention_heads;
+        const mamba_n_heads = c.mamba_n_heads ?? c.mamba_num_heads;
+        const mamba_d_head = c.mamba_d_head ?? c.mamba_head_dim;
+        const mamba_d_state = c.mamba_d_state ?? c.ssm_state_size;
+        const mamba_n_groups = c.mamba_n_groups ?? c.n_groups;
+        const mamba_d_conv = c.mamba_d_conv ?? c.conv_kernel;
+        const mamba_d_ssm =
+            c.mamba_d_ssm ?? (c.mamba_expand ? c.mamba_expand * c.hidden_size : mamba_n_heads * mamba_d_head);
+        const conv_d_inner = mamba_d_ssm + 2 * mamba_n_groups * mamba_d_state;
         /** @type {Record<string, number[]>} */
         const cache_values = {};
+        for (let i = 0; i < num_layers; ++i) {
+            if (!layer_types || layer_types[i] === 'mamba') {
+                cache_values[`${conv_prefix}_conv.${i}`] = [batch_size, conv_d_inner, mamba_d_conv];
+                cache_values[`${conv_prefix}_ssm.${i}`] = [batch_size, mamba_n_heads, mamba_d_head, mamba_d_state];
+            }
+            if (!layer_types || layer_types[i] === 'attention') {
+                for (const kv of ['key', 'value']) {
+                    cache_values[`${pkv_prefix}.${i}.${kv}`] = [batch_size, num_key_value_heads, 0, head_dim];
+                }
+            }
+        }
+        return cache_values;
+    } else if (['qwen3_next', 'qwen3_5_text', 'qwen3_5_moe_text', 'olmo_hybrid'].includes(config.model_type)) {
+        const pkv_prefix = options?.prefix ?? 'past_key_values';
+        const conv_prefix = pkv_prefix === 'present' ? 'present' : 'past';
+        /** @type {Record<string, number[]>} */
+        const cache_values = {};
         const {
+            head_dim,
             layer_types,
-            num_hidden_layers,
             num_attention_heads,
             num_key_value_heads,
             hidden_size,
-            mamba_d_conv,
-            mamba_n_heads,
-            mamba_d_head,
-            mamba_d_state,
-            mamba_n_groups,
-            mamba_expand,
-            mamba_d_ssm,
+            linear_num_value_heads,
+            linear_num_key_heads,
+            linear_key_head_dim,
+            linear_value_head_dim,
+            linear_conv_kernel_dim,
         } = /** @type {any} */ (config);
-        const head_dim = hidden_size / num_attention_heads;
-        const batch_size = options?.batch_size ?? 1;
-        const conv_d_inner = (mamba_d_ssm ?? mamba_expand * hidden_size) + 2 * mamba_n_groups * mamba_d_state;
-        for (let i = 0; i < num_hidden_layers; ++i) {
-            if (!layer_types || layer_types[i] === 'mamba') {
-                cache_values[`${conv_prefix}_conv.${i}`] = [batch_size, conv_d_inner, mamba_d_conv];
-                cache_values[`${conv_prefix}_ssm.${i}`] = [batch_size, mamba_n_heads, mamba_d_head, mamba_d_state];
-            }
-            if (!layer_types || layer_types[i] === 'attention') {
+        const key_dim = linear_key_head_dim * linear_num_key_heads;
+        const value_dim = linear_value_head_dim * linear_num_value_heads;
+        const final_head_dim = head_dim ?? hidden_size / num_attention_heads;
+        for (let i = 0; i < layer_types.length; ++i) {
+            if (layer_types[i] === 'full_attention') {
                 for (const kv of ['key', 'value']) {
-                    cache_values[`${pkv_prefix}.${i}.${kv}`] = [batch_size, num_key_value_heads, 0, head_dim];
+                    cache_values[`${pkv_prefix}.${i}.${kv}`] = [batch_size, num_key_value_heads, 0, final_head_dim];
                 }
+            } else if (layer_types[i] === 'linear_attention') {
+                if (config.model_type === 'olmo_hybrid') {
+                    cache_values[`${conv_prefix}_conv.${i}.key`] = [batch_size, key_dim, linear_conv_kernel_dim];
+                    cache_values[`${conv_prefix}_conv.${i}.value`] = [batch_size, value_dim, linear_conv_kernel_dim];
+                    cache_values[`${conv_prefix}_conv.${i}.query`] = [batch_size, key_dim, linear_conv_kernel_dim];
+                } else {
+                    const conv_dim = key_dim * 2 + value_dim;
+                    cache_values[`${conv_prefix}_conv.${i}`] = [batch_size, conv_dim, linear_conv_kernel_dim];
+                }
+                cache_values[`${conv_prefix}_recurrent.${i}`] = [
+                    batch_size,
+                    linear_num_value_heads,
+                    linear_key_head_dim,
+                    linear_value_head_dim,
+                ];
+            } else {
+                throw new Error(`Unsupported layer type: ${layer_types[i]}`);
             }
         }
         return cache_values;
+    } else if (['lfm2_vl', 'qwen3_5', 'qwen3_5_moe', 'voxtral_realtime'].includes(config.model_type)) {
+        let subConfig;
+        if (config.model_type === 'voxtral_realtime' && options?.session_name === 'audio_encoder') {
+            subConfig = /** @type {any} */ (config).audio_config;
+        } else {
+            subConfig = /** @type {any} */ (config).text_config;
+        }
+        return getCacheShapes(subConfig, options);
     }
     return getKeyValueShapes(config, options);
 }

package/src/env.js CHANGED Viewed

@@ -26,27 +26,43 @@ import fs from 'node:fs';
 import path from 'node:path';
 import url from 'node:url';
-const VERSION = '4.0.0-next.1';
+const VERSION = '4.0.0-next.10';
+const HAS_SELF = typeof self !== 'undefined';
-const IS_PROCESS_AVAILABLE = typeof process !== 'undefined';
-const IS_NODE_ENV = IS_PROCESS_AVAILABLE && process?.release?.name === 'node';
 const IS_FS_AVAILABLE = !isEmpty(fs);
 const IS_PATH_AVAILABLE = !isEmpty(path);
+const IS_WEB_CACHE_AVAILABLE = HAS_SELF && 'caches' in self;
 // Runtime detection
 const IS_DENO_RUNTIME = typeof globalThis.Deno !== 'undefined';
 const IS_BUN_RUNTIME = typeof globalThis.Bun !== 'undefined';
+const IS_DENO_WEB_RUNTIME = IS_DENO_RUNTIME && IS_WEB_CACHE_AVAILABLE && !IS_FS_AVAILABLE;
+const IS_PROCESS_AVAILABLE = typeof process !== 'undefined';
+const IS_NODE_ENV = IS_PROCESS_AVAILABLE && process?.release?.name === 'node' && !IS_DENO_WEB_RUNTIME;
 // Check if various APIs are available (depends on environment)
 const IS_BROWSER_ENV = typeof window !== 'undefined' && typeof window.document !== 'undefined';
 const IS_WEBWORKER_ENV =
-    typeof self !== 'undefined' &&
+    HAS_SELF &&
     ['DedicatedWorkerGlobalScope', 'ServiceWorkerGlobalScope', 'SharedWorkerGlobalScope'].includes(
         self.constructor?.name,
     );
-const IS_WEB_CACHE_AVAILABLE = typeof self !== 'undefined' && 'caches' in self;
+const IS_WEB_ENV = IS_BROWSER_ENV || IS_WEBWORKER_ENV || IS_DENO_WEB_RUNTIME;
 const IS_WEBGPU_AVAILABLE = IS_NODE_ENV || (typeof navigator !== 'undefined' && 'gpu' in navigator);
 const IS_WEBNN_AVAILABLE = typeof navigator !== 'undefined' && 'ml' in navigator;
+const IS_CRYPTO_AVAILABLE = typeof crypto !== 'undefined' && typeof crypto.getRandomValues === 'function';
+const IS_CHROME_AVAILABLE =
+    // @ts-ignore - chrome may not exist in all environments
+    typeof chrome !== 'undefined' && typeof chrome.runtime !== 'undefined' && typeof chrome.runtime.id === 'string';
+const IS_SERVICE_WORKER_ENV =
+    // @ts-ignore - ServiceWorkerGlobalScope may not exist in all environments
+    typeof ServiceWorkerGlobalScope !== 'undefined' && HAS_SELF && self instanceof ServiceWorkerGlobalScope;
 /**
  * Check if the current environment is Safari browser.
@@ -86,6 +102,15 @@ export const apis = Object.freeze({
     /** Whether we are running in a web worker environment */
     IS_WEBWORKER_ENV,
+    /** Whether we are running in a web-like environment (browser, web worker, or Deno web runtime) */
+    IS_WEB_ENV,
+    /** Whether we are running in a service worker environment */
+    IS_SERVICE_WORKER_ENV,
+    /** Whether we are running in Deno's web runtime (CDN imports, Cache API available, no filesystem) */
+    IS_DENO_WEB_RUNTIME,
     /** Whether the Cache API is available */
     IS_WEB_CACHE_AVAILABLE,
@@ -109,6 +134,12 @@ export const apis = Object.freeze({
     /** Whether the path API is available */
     IS_PATH_AVAILABLE,
+    /** Whether the crypto API is available */
+    IS_CRYPTO_AVAILABLE,
+    /** Whether the Chrome runtime API is available */
+    IS_CHROME_AVAILABLE,
 });
 const RUNNING_LOCALLY = IS_FS_AVAILABLE && IS_PATH_AVAILABLE;
@@ -134,12 +165,48 @@ const DEFAULT_CACHE_DIR = RUNNING_LOCALLY ? path.join(dirname__, '/.cache/') : n
 const DEFAULT_LOCAL_MODEL_PATH = '/models/';
 const localModelPath = RUNNING_LOCALLY ? path.join(dirname__, DEFAULT_LOCAL_MODEL_PATH) : DEFAULT_LOCAL_MODEL_PATH;
+// Ensure default fetch is called with the correct receiver in browser environments.
+const DEFAULT_FETCH = typeof globalThis.fetch === 'function' ? globalThis.fetch.bind(globalThis) : undefined;
+/**
+ * Log levels for controlling output verbosity.
+ *
+ * Each level is represented by a number, where higher numbers include all lower level messages.
+ * Use these values to set `env.logLevel`.
+ *
+ * @example
+ * import { env, LogLevel } from '@huggingface/transformers';
+ *
+ * // Set log level to show only errors
+ * env.logLevel = LogLevel.ERROR;
+ *
+ * // Set log level to show errors, warnings, and info
+ * env.logLevel = LogLevel.INFO;
+ *
+ * // Disable all logging
+ * env.logLevel = LogLevel.NONE;
+ *
+ */
+export const LogLevel = Object.freeze({
+    /** All messages including debug output (value: 10) */
+    DEBUG: 10,
+    /** Errors, warnings, and info messages (value: 20) */
+    INFO: 20,
+    /** Errors and warnings (value: 30) */
+    WARNING: 30,
+    /** Only error messages (value: 40) */
+    ERROR: 40,
+    /** No logging output (value: 50) */
+    NONE: 50,
+});
 /**
  * Global variable given visible to users to control execution. This provides users a simple way to configure Transformers.js.
  * @typedef {Object} TransformersEnvironment
  * @property {string} version This version of Transformers.js.
- * @property {{onnx: Partial<import('onnxruntime-common').Env>}} backends Expose environment variables of different backends,
+ * @property {{onnx: Partial<import('onnxruntime-common').Env> & { setLogLevel?: (logLevel: number) => void }}} backends Expose environment variables of different backends,
  * allowing users to set these variables if they want to.
+ * @property {number} logLevel The logging level. Use LogLevel enum values. Defaults to LogLevel.ERROR.
  * @property {boolean} allowRemoteModels Whether to allow loading of remote files, defaults to `true`.
  * If set to `false`, it will have the same effect as setting `local_files_only=true` when loading pipelines, models, tokenizers, processors, etc.
  * @property {string} remoteHost Host URL to load models from. Defaults to the Hugging Face Hub.
@@ -154,12 +221,18 @@ const localModelPath = RUNNING_LOCALLY ? path.join(dirname__, DEFAULT_LOCAL_MODE
  * @property {boolean} useCustomCache Whether to use a custom cache system (defined by `customCache`), defaults to `false`.
  * @property {import('./utils/cache.js').CacheInterface|null} customCache The custom cache to use. Defaults to `null`. Note: this must be an object which
  * implements the `match` and `put` functions of the Web Cache API. For more information, see https://developer.mozilla.org/en-US/docs/Web/API/Cache.
- * @property {boolean} useWasmCache Whether to pre-load and cache WASM binaries for ONNX Runtime. Defaults to `true` when cache is available.
- * This can improve performance by avoiding repeated downloads of WASM files. Note: Only the WASM binary is cached.
- * The MJS loader file still requires network access unless you use a Service Worker.
+ * @property {boolean} useWasmCache Whether to pre-load and cache WASM binaries and the WASM factory (.mjs) for ONNX Runtime.
+ * Defaults to `true` when cache is available. This can improve performance and enables offline usage by avoiding repeated downloads.
  * @property {string} cacheKey The cache key to use for storing models and WASM binaries. Defaults to 'transformers-cache'.
+ * @property {boolean} experimental_useCrossOriginStorage Whether to use the Cross-Origin Storage API to cache model files
+ * across origins, allowing different sites to share the same cached model weights. Defaults to `false`.
+ * Requires the Cross-Origin Storage Chrome extension: {@link https://chromewebstore.google.com/detail/cross-origin-storage/denpnpcgjgikjpoglpjefakmdcbmlgih}.
+ * The `experimental_` prefix indicates that the underlying browser API is not yet standardised and may change or be
+ * removed without a major version bump. For more information, see {@link https://github.com/WICG/cross-origin-storage}.
+ * @property {(input: string | URL, init?: any) => Promise<any>} fetch The fetch function to use. Defaults to `fetch`.
  */
+let logLevel = LogLevel.WARNING; // Default log level
 /** @type {TransformersEnvironment} */
 export const env = {
     version: VERSION,
@@ -171,17 +244,27 @@ export const env = {
         onnx: {},
     },
+    /////////////////// Logging settings ///////////////////
+    get logLevel() {
+        return logLevel;
+    },
+    set logLevel(level) {
+        logLevel = level;
+        // invoke hook to set ONNX Runtime log level when Transformers.js log level changes
+        env.backends.onnx?.setLogLevel?.(level);
+    },
     /////////////////// Model settings ///////////////////
     allowRemoteModels: true,
     remoteHost: 'https://huggingface.co/',
     remotePathTemplate: '{model}/resolve/{revision}/',
-    allowLocalModels: !(IS_BROWSER_ENV || IS_WEBWORKER_ENV),
+    allowLocalModels: !(IS_BROWSER_ENV || IS_WEBWORKER_ENV || IS_DENO_WEB_RUNTIME), // Default to true for non-web environments, false for web environments
     localModelPath: localModelPath,
     useFS: IS_FS_AVAILABLE,
     /////////////////// Cache settings ///////////////////
-    useBrowserCache: IS_WEB_CACHE_AVAILABLE && !IS_DENO_RUNTIME,
+    useBrowserCache: IS_WEB_CACHE_AVAILABLE,
     useFSCache: IS_FS_AVAILABLE,
     cacheDir: DEFAULT_CACHE_DIR,
@@ -191,6 +274,12 @@ export const env = {
     useWasmCache: IS_WEB_CACHE_AVAILABLE || IS_FS_AVAILABLE,
     cacheKey: 'transformers-cache',
+    experimental_useCrossOriginStorage: false,
+    /////////////////// Custom fetch /////////////////////
+    fetch: DEFAULT_FETCH,
     //////////////////////////////////////////////////////
 };

package/src/generation/logits_sampler.js CHANGED Viewed

@@ -6,6 +6,7 @@ import { Callable } from '../utils/generic.js';
 import { Tensor, topk } from '../utils/tensor.js';
 import { max, softmax } from '../utils/maths.js';
+import { _weightedIndex } from '../utils/random.js';
 import { GenerationConfig } from '../generation/configuration_utils.js';
 /**
@@ -64,24 +65,11 @@ export class LogitsSampler extends Callable {
     /**
      * Selects an item randomly based on the specified probabilities.
-     * @param {import("../transformers.js").DataArray} probabilities An array of probabilities to use for selection.
+     * @param {Float32Array} probabilities An array of probabilities to use for selection.
      * @returns {number} The index of the selected item.
      */
     randomSelect(probabilities) {
-        // Return index of chosen item
-        let sumProbabilities = 0;
-        for (let i = 0; i < probabilities.length; ++i) {
-            sumProbabilities += probabilities[i];
-        }
-        let r = Math.random() * sumProbabilities;
-        for (let i = 0; i < probabilities.length; ++i) {
-            r -= probabilities[i];
-            if (r <= 0) {
-                return i;
-            }
-        }
-        return 0; // return first (most probable) as a fallback
+        return _weightedIndex(probabilities);
     }
     /**

package/src/generation/parameters.js CHANGED Viewed

@@ -21,7 +21,7 @@
  * Custom logits processors that complement the default logits processors built from arguments and
  * generation config. If a logit processor is passed that is already created with the arguments or a
  * generation config an error is thrown. This feature is intended for advanced users.
- * @property {import('./stopping_criteria.js').StoppingCriteriaList} [stopping_criteria=null] (`StoppingCriteriaList`, *optional*):
+ * @property {import('./stopping_criteria.js').StoppingCriteria|import('./stopping_criteria.js').StoppingCriteria[]|import('./stopping_criteria.js').StoppingCriteriaList} [stopping_criteria=null] (`StoppingCriteriaList`, *optional*):
  * Custom stopping criteria that complements the default stopping criteria built from arguments and a
  * generation config. If a stopping criteria is passed that is already created with the arguments or a
  * generation config an error is thrown. This feature is intended for advanced users.

package/src/generation/streamers.js CHANGED Viewed

@@ -70,6 +70,9 @@ export class TextStreamer extends BaseStreamer {
         this.token_cache = [];
         this.print_len = 0;
         this.next_tokens_are_prompt = true;
+        // Track special token IDs for special handling during streaming.
+        this.special_ids = new Set(this.tokenizer.all_special_ids.map(BigInt));
     }
     /**
@@ -90,6 +93,24 @@ export class TextStreamer extends BaseStreamer {
         const tokens = value[0];
         this.token_callback_function?.(tokens);
+        // Handle special tokens: flush any existing text, then print or skip them
+        if (tokens.length === 1 && this.special_ids.has(tokens[0])) {
+            if (this.decode_kwargs.skip_special_tokens) return;
+            // Flush any existing cached text first
+            if (this.token_cache.length > 0) {
+                const text = this.tokenizer.decode(this.token_cache, this.decode_kwargs);
+                const printable_text = text.slice(this.print_len);
+                this.on_finalized_text(printable_text, false);
+                this.token_cache = [];
+                this.print_len = 0;
+            }
+            // Print the special token immediately
+            const special_text = this.tokenizer.decode(tokens, this.decode_kwargs);
+            this.on_finalized_text(special_text, false);
+            return;
+        }
         // Add the new token to the cache and decodes the entire thing.
         this.token_cache = mergeArrays(this.token_cache, tokens);
         const text = this.tokenizer.decode(this.token_cache, this.decode_kwargs);

package/src/image_processors_utils.js CHANGED Viewed

@@ -5,6 +5,7 @@ import { RawImage } from './utils/image.js';
 import { calculateReflectOffset } from './utils/core.js';
 import { getModelJSON } from './utils/hub.js';
 import { IMAGE_PROCESSOR_NAME } from './utils/constants.js';
+import { logger } from './utils/logger.js';
 /**
  * Named tuple to indicate the order we are using is (height x width),
@@ -13,7 +14,7 @@ import { IMAGE_PROCESSOR_NAME } from './utils/constants.js';
  */
 /**
- * @typedef {object} ImageProcessorResult
+ * @typedef {Object} ImageProcessorResult
  * @property {Tensor} pixel_values The pixel values of the batched preprocessed images.
  * @property {HeightWidth[]} original_sizes Array of two-dimensional tuples like [[480, 640]].
  * @property {HeightWidth[]} reshaped_input_sizes Array of two-dimensional tuples like [[1000, 1330]].
@@ -403,13 +404,24 @@ function compute_segments(
  * @param {number} [factor=28] The factor to use for resizing.
  * @param {number} [min_pixels=56*56] The minimum number of pixels.
  * @param {number} [max_pixels=14*14*4*1280] The maximum number of pixels.
- * @returns {[number, number]} The new height and width of the image.
+ * @param {number} [temporal_factor=1] The temporal factor to include in the pixel budget (e.g. temporal_patch_size for video/3D models).
+ * @returns {[number, number]} The new width and height of the image.
  * @throws {Error} If the height or width is smaller than the factor.
  */
-function smart_resize(height, width, factor = 28, min_pixels = 56 * 56, max_pixels = 14 * 14 * 4 * 1280) {
+export function smart_resize(
+    height,
+    width,
+    factor = 28,
+    min_pixels = 56 * 56,
+    max_pixels = 14 * 14 * 4 * 1280,
+    temporal_factor = 1,
+) {
     if (height < factor || width < factor) {
-        throw new Error(`height:${height} or width:${width} must be larger than factor:${factor}`);
-    } else if (Math.max(height, width) / Math.min(height, width) > 200) {
+        const scale = Math.max(factor / height, factor / width);
+        height = Math.round(height * scale);
+        width = Math.round(width * scale);
+    }
+    if (Math.max(height, width) / Math.min(height, width) > 200) {
         throw new Error(
             `absolute aspect ratio must be smaller than 200, got ${Math.max(height, width) / Math.min(height, width)}`,
         );
@@ -418,17 +430,17 @@ function smart_resize(height, width, factor = 28, min_pixels = 56 * 56, max_pixe
     let h_bar = Math.round(height / factor) * factor;
     let w_bar = Math.round(width / factor) * factor;
-    if (h_bar * w_bar > max_pixels) {
-        const beta = Math.sqrt((height * width) / max_pixels);
-        h_bar = Math.floor(height / beta / factor) * factor;
-        w_bar = Math.floor(width / beta / factor) * factor;
-    } else if (h_bar * w_bar < min_pixels) {
-        const beta = Math.sqrt(min_pixels / (height * width));
+    if (temporal_factor * h_bar * w_bar > max_pixels) {
+        const beta = Math.sqrt((temporal_factor * height * width) / max_pixels);
+        h_bar = Math.max(factor, Math.floor(height / beta / factor) * factor);
+        w_bar = Math.max(factor, Math.floor(width / beta / factor) * factor);
+    } else if (temporal_factor * h_bar * w_bar < min_pixels) {
+        const beta = Math.sqrt(min_pixels / (temporal_factor * height * width));
         h_bar = Math.ceil((height * beta) / factor) * factor;
         w_bar = Math.ceil((width * beta) / factor) * factor;
     }
-    return [h_bar, w_bar];
+    return [w_bar, h_bar];
 }
 /**
@@ -450,7 +462,7 @@ export function post_process_panoptic_segmentation(
     target_sizes = null,
 ) {
     if (label_ids_to_fuse === null) {
-        console.warn('`label_ids_to_fuse` unset. No instance will be fused.');
+        logger.warn('`label_ids_to_fuse` unset. No instance will be fused.');
         label_ids_to_fuse = new Set();
     }
@@ -592,6 +604,7 @@ export class ImageProcessor extends Callable {
         if (
             this.do_pad &&
             !this.pad_size &&
+            !this.size_divisibility &&
             this.size &&
             this.size.width !== undefined &&
             this.size.height !== undefined
@@ -864,11 +877,6 @@ export class ImageProcessor extends Callable {
             return [newWidth, newHeight];
         } else if (this.size_divisibility !== undefined) {
             return enforce_size_divisibility([srcWidth, srcHeight], this.size_divisibility);
-        } else if (this.min_pixels !== undefined && this.max_pixels !== undefined) {
-            // Custom resize logic for Qwen2-VL models
-            // @ts-expect-error TS2339
-            const factor = this.config.patch_size * this.config.merge_size;
-            return smart_resize(srcHeight, srcWidth, factor, this.min_pixels, this.max_pixels);
         } else {
             throw new Error(
                 `Could not resize image due to unsupported \`this.size\` option in config: ${JSON.stringify(size)}`,
@@ -890,7 +898,7 @@ export class ImageProcessor extends Callable {
     }
     /**
-     * @typedef {object} PreprocessedImage
+     * @typedef {Object} PreprocessedImage
      * @property {HeightWidth} original_size The original size of the image.
      * @property {HeightWidth} reshaped_input_size The reshaped input size of the image.
      * @property {Tensor} pixel_values The pixel values of the preprocessed image.
@@ -1000,10 +1008,8 @@ export class ImageProcessor extends Callable {
                 const padded = this.pad_image(pixelData, [image.height, image.width, image.channels], this.pad_size);
                 [pixelData, imgDims] = padded; // Update pixel data and image dimensions
             } else if (this.size_divisibility) {
-                const [paddedWidth, paddedHeight] = enforce_size_divisibility(
-                    [imgDims[1], imgDims[0]],
-                    this.size_divisibility,
-                );
+                const paddedWidth = Math.ceil(imgDims[1] / this.size_divisibility) * this.size_divisibility;
+                const paddedHeight = Math.ceil(imgDims[0] / this.size_divisibility) * this.size_divisibility;
                 [pixelData, imgDims] = this.pad_image(pixelData, imgDims, { width: paddedWidth, height: paddedHeight });
             }
         }

package/src/models/afmoe/modeling_afmoe.js ADDED Viewed

@@ -0,0 +1,5 @@
+import { PreTrainedModel } from '../modeling_utils.js';
+export class AfmoePreTrainedModel extends PreTrainedModel {}
+export class AfmoeModel extends AfmoePreTrainedModel {}
+export class AfmoeForCausalLM extends AfmoePreTrainedModel {}

package/src/models/auto/image_processing_auto.js CHANGED Viewed

@@ -2,6 +2,7 @@ import { getModelJSON } from '../../utils/hub.js';
 import { ImageProcessor } from '../../image_processors_utils.js';
 import * as AllImageProcessors from '../image_processors.js';
 import { GITHUB_ISSUE_URL, IMAGE_PROCESSOR_NAME } from '../../utils/constants.js';
+import { logger } from '../../utils/logger.js';
 export class AutoImageProcessor {
     /** @type {typeof ImageProcessor.from_pretrained} */
@@ -20,7 +21,7 @@ export class AutoImageProcessor {
         if (!image_processor_class) {
             if (key !== undefined) {
                 // Only log a warning if the class is not found and the key is set.
-                console.warn(
+                logger.warn(
                     `Image processor type '${key}' not found, assuming base ImageProcessor. Please report this at ${GITHUB_ISSUE_URL}.`,
                 );
             }