npm - @huggingface/transformers - Versions diffs - 4.0.0-next.3 → 4.0.0-next.5 - Mend

@huggingface/transformers 4.0.0-next.3 → 4.0.0-next.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (168) hide show

package/README.md +10 -4
package/dist/ort-wasm-simd-threaded.jsep.mjs +28 -28
package/dist/transformers.js +3109 -2099
package/dist/transformers.min.js +17 -19
package/dist/transformers.node.cjs +3100 -2060
package/dist/transformers.node.min.cjs +19 -21
package/dist/transformers.node.min.mjs +19 -21
package/dist/transformers.node.mjs +3085 -2060
package/dist/transformers.web.js +1312 -276
package/dist/transformers.web.min.js +15 -15
package/package.json +4 -4
package/src/backends/onnx.js +66 -10
package/src/backends/utils/cacheWasm.js +9 -6
package/src/configs.js +52 -3
package/src/env.js +66 -7
package/src/generation/logits_sampler.js +3 -15
package/src/image_processors_utils.js +2 -6
package/src/models/afmoe/modeling_afmoe.js +5 -0
package/src/models/auto/image_processing_auto.js +2 -1
package/src/models/auto/modeling_auto.js +2 -1
package/src/models/auto/tokenization_auto.js +2 -1
package/src/models/clap/feature_extraction_clap.js +2 -1
package/src/models/cohere2/modeling_cohere2.js +5 -0
package/src/models/marian/tokenization_marian.js +3 -2
package/src/models/modeling_utils.js +14 -4
package/src/models/models.js +6 -0
package/src/models/paligemma/processing_paligemma.js +3 -2
package/src/models/processors.js +2 -0
package/src/models/qwen2_5_vl/modeling_qwen2_5_vl.js +5 -0
package/src/models/qwen2_5_vl/processing_qwen2_5_vl.js +3 -0
package/src/models/qwen2_vl/image_processing_qwen2_vl.js +54 -0
package/src/models/qwen2_vl/modeling_qwen2_vl.js +8 -2
package/src/models/qwen3_5/modeling_qwen3_5.js +3 -0
package/src/models/qwen3_5_moe/modeling_qwen3_5_moe.js +3 -0
package/src/models/qwen3_vl/modeling_qwen3_vl.js +3 -0
package/src/models/qwen3_vl/processing_qwen3_vl.js +3 -0
package/src/models/registry.js +9 -1
package/src/models/session.js +16 -50
package/src/models/whisper/feature_extraction_whisper.js +2 -1
package/src/models/whisper/modeling_whisper.js +6 -5
package/src/models/xlm/tokenization_xlm.js +2 -1
package/src/pipelines/automatic-speech-recognition.js +3 -2
package/src/pipelines/index.js +395 -0
package/src/pipelines/text-generation.js +4 -0
package/src/pipelines/text-to-audio.js +4 -2
package/src/pipelines/zero-shot-classification.js +3 -2
package/src/pipelines.js +104 -356
package/src/tokenization_utils.js +42 -21
package/src/transformers.js +8 -1
package/src/utils/audio.js +2 -1
package/src/utils/cache.js +4 -1
package/src/utils/core.js +23 -1
package/src/utils/devices.js +22 -0
package/src/utils/dtypes.js +55 -0
package/src/utils/hub/files.js +17 -2
package/src/utils/hub/utils.js +10 -4
package/src/utils/hub.js +57 -17
package/src/utils/image.js +2 -1
package/src/utils/logger.js +67 -0
package/src/utils/model-loader.js +35 -17
package/src/utils/model_registry/ModelRegistry.js +299 -0
package/src/utils/model_registry/clear_cache.js +128 -0
package/src/utils/model_registry/get_file_metadata.js +149 -0
package/src/utils/model_registry/get_files.js +42 -0
package/src/utils/model_registry/get_model_files.js +182 -0
package/src/utils/model_registry/get_pipeline_files.js +53 -0
package/src/utils/model_registry/get_processor_files.js +20 -0
package/src/utils/model_registry/get_tokenizer_files.js +21 -0
package/src/utils/model_registry/is_cached.js +92 -0
package/src/utils/random.js +225 -0
package/src/utils/tensor.js +8 -21
package/src/utils/video.js +2 -2
package/types/backends/onnx.d.ts.map +1 -1
package/types/backends/utils/cacheWasm.d.ts.map +1 -1
package/types/configs.d.ts.map +1 -1
package/types/env.d.ts +42 -24
package/types/env.d.ts.map +1 -1
package/types/generation/logits_sampler.d.ts +2 -2
package/types/generation/logits_sampler.d.ts.map +1 -1
package/types/image_processors_utils.d.ts.map +1 -1
package/types/models/afmoe/modeling_afmoe.d.ts +8 -0
package/types/models/afmoe/modeling_afmoe.d.ts.map +1 -0
package/types/models/auto/image_processing_auto.d.ts.map +1 -1
package/types/models/auto/modeling_auto.d.ts.map +1 -1
package/types/models/auto/tokenization_auto.d.ts.map +1 -1
package/types/models/clap/feature_extraction_clap.d.ts.map +1 -1
package/types/models/cohere2/modeling_cohere2.d.ts +8 -0
package/types/models/cohere2/modeling_cohere2.d.ts.map +1 -0
package/types/models/marian/tokenization_marian.d.ts.map +1 -1
package/types/models/modeling_utils.d.ts.map +1 -1
package/types/models/models.d.ts +6 -0
package/types/models/paligemma/processing_paligemma.d.ts.map +1 -1
package/types/models/processors.d.ts +2 -0
package/types/models/qwen2_5_vl/modeling_qwen2_5_vl.d.ts +4 -0
package/types/models/qwen2_5_vl/modeling_qwen2_5_vl.d.ts.map +1 -0
package/types/models/qwen2_5_vl/processing_qwen2_5_vl.d.ts +4 -0
package/types/models/qwen2_5_vl/processing_qwen2_5_vl.d.ts.map +1 -0
package/types/models/qwen2_vl/image_processing_qwen2_vl.d.ts +3 -0
package/types/models/qwen2_vl/image_processing_qwen2_vl.d.ts.map +1 -1
package/types/models/qwen2_vl/modeling_qwen2_vl.d.ts +1 -0
package/types/models/qwen2_vl/modeling_qwen2_vl.d.ts.map +1 -1
package/types/models/qwen3_5/modeling_qwen3_5.d.ts +4 -0
package/types/models/qwen3_5/modeling_qwen3_5.d.ts.map +1 -0
package/types/models/qwen3_5_moe/modeling_qwen3_5_moe.d.ts +4 -0
package/types/models/qwen3_5_moe/modeling_qwen3_5_moe.d.ts.map +1 -0
package/types/models/qwen3_vl/modeling_qwen3_vl.d.ts +4 -0
package/types/models/qwen3_vl/modeling_qwen3_vl.d.ts.map +1 -0
package/types/models/qwen3_vl/processing_qwen3_vl.d.ts +4 -0
package/types/models/qwen3_vl/processing_qwen3_vl.d.ts.map +1 -0
package/types/models/registry.d.ts.map +1 -1
package/types/models/session.d.ts.map +1 -1
package/types/models/whisper/feature_extraction_whisper.d.ts.map +1 -1
package/types/models/whisper/modeling_whisper.d.ts.map +1 -1
package/types/models/xlm/tokenization_xlm.d.ts.map +1 -1
package/types/pipelines/automatic-speech-recognition.d.ts.map +1 -1
package/types/pipelines/index.d.ts +299 -0
package/types/pipelines/index.d.ts.map +1 -0
package/types/pipelines/text-generation.d.ts +5 -1
package/types/pipelines/text-generation.d.ts.map +1 -1
package/types/pipelines/text-to-audio.d.ts.map +1 -1
package/types/pipelines/zero-shot-classification.d.ts.map +1 -1
package/types/pipelines.d.ts +50 -291
package/types/pipelines.d.ts.map +1 -1
package/types/tokenization_utils.d.ts +44 -26
package/types/tokenization_utils.d.ts.map +1 -1
package/types/transformers.d.ts +6 -1
package/types/transformers.d.ts.map +1 -1
package/types/utils/audio.d.ts.map +1 -1
package/types/utils/cache.d.ts +6 -0
package/types/utils/cache.d.ts.map +1 -1
package/types/utils/core.d.ts +59 -2
package/types/utils/core.d.ts.map +1 -1
package/types/utils/devices.d.ts +15 -0
package/types/utils/devices.d.ts.map +1 -1
package/types/utils/dtypes.d.ts +16 -0
package/types/utils/dtypes.d.ts.map +1 -1
package/types/utils/hub/files.d.ts +6 -0
package/types/utils/hub/files.d.ts.map +1 -1
package/types/utils/hub/utils.d.ts +2 -1
package/types/utils/hub/utils.d.ts.map +1 -1
package/types/utils/hub.d.ts +29 -0
package/types/utils/hub.d.ts.map +1 -1
package/types/utils/image.d.ts.map +1 -1
package/types/utils/logger.d.ts +28 -0
package/types/utils/logger.d.ts.map +1 -0
package/types/utils/model-loader.d.ts +15 -0
package/types/utils/model-loader.d.ts.map +1 -1
package/types/utils/model_registry/ModelRegistry.d.ts +211 -0
package/types/utils/model_registry/ModelRegistry.d.ts.map +1 -0
package/types/utils/model_registry/clear_cache.d.ts +74 -0
package/types/utils/model_registry/clear_cache.d.ts.map +1 -0
package/types/utils/model_registry/get_file_metadata.d.ts +20 -0
package/types/utils/model_registry/get_file_metadata.d.ts.map +1 -0
package/types/utils/model_registry/get_files.d.ts +23 -0
package/types/utils/model_registry/get_files.d.ts.map +1 -0
package/types/utils/model_registry/get_model_files.d.ts +22 -0
package/types/utils/model_registry/get_model_files.d.ts.map +1 -0
package/types/utils/model_registry/get_pipeline_files.d.ts +21 -0
package/types/utils/model_registry/get_pipeline_files.d.ts.map +1 -0
package/types/utils/model_registry/get_processor_files.d.ts +9 -0
package/types/utils/model_registry/get_processor_files.d.ts.map +1 -0
package/types/utils/model_registry/get_tokenizer_files.d.ts +9 -0
package/types/utils/model_registry/get_tokenizer_files.d.ts.map +1 -0
package/types/utils/model_registry/is_cached.d.ts +62 -0
package/types/utils/model_registry/is_cached.d.ts.map +1 -0
package/types/utils/random.d.ts +86 -0
package/types/utils/random.d.ts.map +1 -0
package/types/utils/tensor.d.ts.map +1 -1

package/src/utils/model_registry/get_model_files.js ADDED Viewed

@@ -0,0 +1,182 @@
+import { DEFAULT_DTYPE_SUFFIX_MAPPING, selectDtype } from '../dtypes.js';
+import { selectDevice } from '../devices.js';
+import { resolveExternalDataFormat, getExternalDataChunkNames } from '../model-loader.js';
+import { MODEL_TYPES, MODEL_TYPE_MAPPING } from '../../models/modeling_utils.js';
+import { AutoConfig } from '../../configs.js';
+import { GITHUB_ISSUE_URL } from '../constants.js';
+import { logger } from '../logger.js';
+/**
+ * Returns the list of files that will be loaded for a model based on its configuration.
+ *
+ * This function reads configuration from the model's config.json on the hub.
+ * If dtype/device are not specified in the config, you can provide them to match
+ * what the pipeline will actually use.
+ *
+ * @param {string} modelId The model id (e.g., "onnx-community/granite-4.0-350m-ONNX-web")
+ * @param {Object} [options] Optional parameters
+ * @param {import('../../configs.js').PretrainedConfig} [options.config=null] Pre-loaded model config (optional, will be fetched if not provided)
+ * @param {import('../dtypes.js').DataType|Record<string, import('../dtypes.js').DataType>} [options.dtype=null] Override dtype (use this if passing dtype to pipeline)
+ * @param {import('../devices.js').DeviceType|Record<string, import('../devices.js').DeviceType>} [options.device=null] Override device (use this if passing device to pipeline)
+ * @param {string} [options.model_file_name=null] Override the model file name (excluding .onnx suffix).
+ * @returns {Promise<string[]>} Array of file paths that will be loaded
+ */
+export async function get_model_files(
+    modelId,
+    { config = null, dtype: overrideDtype = null, device: overrideDevice = null, model_file_name = null } = {},
+) {
+    config = await AutoConfig.from_pretrained(modelId, { config });
+    const files = [
+        // Add config.json (always loaded)
+        'config.json',
+    ];
+    const custom_config = config['transformers.js_config'] ?? {};
+    const use_external_data_format = custom_config.use_external_data_format;
+    const subfolder = 'onnx'; // Always 'onnx' as per the default in from_pretrained
+    const rawDevice = overrideDevice ?? custom_config.device;
+    let dtype = overrideDtype ?? custom_config.dtype;
+    // Infer model type from config
+    let modelType;
+    // @ts-ignore - architectures is set via Object.assign in PretrainedConfig constructor
+    const architectures = /** @type {string[]} */ (config.architectures || []);
+    // Try to find a known architecture in MODEL_TYPE_MAPPING
+    // This ensures we use the same logic as from_pretrained()
+    let foundInMapping = false;
+    for (const arch of architectures) {
+        const mappedType = MODEL_TYPE_MAPPING.get(arch);
+        if (mappedType !== undefined) {
+            modelType = mappedType;
+            foundInMapping = true;
+            break;
+        }
+    }
+    // If not found by architecture, try model_type (handles custom models with no architectures)
+    if (!foundInMapping && config.model_type) {
+        const mappedType = MODEL_TYPE_MAPPING.get(config.model_type);
+        if (mappedType !== undefined) {
+            modelType = mappedType;
+            foundInMapping = true;
+        }
+    }
+    // Fall back to EncoderOnly if not found in mapping
+    if (!foundInMapping) {
+        const archList = architectures.length > 0 ? architectures.join(', ') : '(none)';
+        logger.warn(
+            `[get_model_files] Architecture(s) not found in MODEL_TYPE_MAPPING: [${archList}] ` +
+                `for model type '${config.model_type}'. Falling back to EncoderOnly (single model.onnx file). ` +
+                `If you encounter issues, please report at: ${GITHUB_ISSUE_URL}`,
+        );
+        // Always fallback to EncoderOnly (single model.onnx file)
+        // Other model types (Vision2Seq, Musicgen, etc.) require specific file structures
+        // and should be properly registered in MODEL_TYPE_MAPPING if they are valid.
+        modelType = MODEL_TYPES.EncoderOnly;
+    }
+    const add_model_file = (fileName, baseName = null) => {
+        baseName = baseName ?? fileName;
+        const selectedDevice = selectDevice(rawDevice, fileName);
+        const selectedDtype = selectDtype(dtype, fileName, selectedDevice);
+        const suffix = DEFAULT_DTYPE_SUFFIX_MAPPING[selectedDtype] ?? '';
+        const fullName = `${baseName}${suffix}.onnx`;
+        const fullPath = subfolder ? `${subfolder}/${fullName}` : fullName;
+        files.push(fullPath);
+        // Check for external data files
+        const num_chunks = resolveExternalDataFormat(use_external_data_format, fullName, fileName);
+        for (const dataFileName of getExternalDataChunkNames(fullName, num_chunks)) {
+            const dataFilePath = subfolder ? `${subfolder}/${dataFileName}` : dataFileName;
+            files.push(dataFilePath);
+        }
+    };
+    // model_file_name overrides the default ONNX file name for single-model architectures
+    // (encoder-only, decoder-only). Multi-component models use fixed names.
+    const singleModelName = model_file_name ?? 'model';
+    // Add model files based on model type
+    if (modelType === MODEL_TYPES.DecoderOnly) {
+        add_model_file('model', singleModelName);
+        files.push('generation_config.json');
+    } else if (modelType === MODEL_TYPES.DecoderOnlyWithoutHead) {
+        add_model_file('model', singleModelName);
+        // Do not load generation_config.json for models without generation head
+    } else if (modelType === MODEL_TYPES.Seq2Seq || modelType === MODEL_TYPES.Vision2Seq) {
+        add_model_file('model', 'encoder_model');
+        add_model_file('decoder_model_merged');
+        // Note: generation_config.json is only loaded for generation models (e.g., T5ForConditionalGeneration)
+        // not for base models (e.g., T5Model). Since we can't determine the specific class here,
+        // we include it as it's loaded for most use cases.
+        files.push('generation_config.json');
+    } else if (modelType === MODEL_TYPES.MaskGeneration) {
+        add_model_file('model', 'vision_encoder');
+        add_model_file('prompt_encoder_mask_decoder');
+    } else if (modelType === MODEL_TYPES.EncoderDecoder) {
+        add_model_file('model', 'encoder_model');
+        add_model_file('decoder_model_merged');
+    } else if (modelType === MODEL_TYPES.ImageTextToText) {
+        add_model_file('embed_tokens');
+        add_model_file('vision_encoder');
+        add_model_file('decoder_model_merged');
+        if (config.is_encoder_decoder) {
+            add_model_file('model', 'encoder_model');
+        }
+        files.push('generation_config.json');
+    } else if (modelType === MODEL_TYPES.AudioTextToText) {
+        add_model_file('embed_tokens');
+        add_model_file('audio_encoder');
+        add_model_file('decoder_model_merged');
+        files.push('generation_config.json');
+    } else if (modelType === MODEL_TYPES.ImageAudioTextToText) {
+        add_model_file('embed_tokens');
+        add_model_file('audio_encoder');
+        add_model_file('vision_encoder');
+        add_model_file('decoder_model_merged');
+        files.push('generation_config.json');
+    } else if (modelType === MODEL_TYPES.Musicgen) {
+        add_model_file('model', 'text_encoder');
+        add_model_file('decoder_model_merged');
+        add_model_file('encodec_decode');
+        files.push('generation_config.json');
+    } else if (modelType === MODEL_TYPES.MultiModality) {
+        add_model_file('prepare_inputs_embeds');
+        add_model_file('model', 'language_model');
+        add_model_file('lm_head');
+        add_model_file('gen_head');
+        add_model_file('gen_img_embeds');
+        add_model_file('image_decode');
+        files.push('generation_config.json');
+    } else if (modelType === MODEL_TYPES.Phi3V) {
+        add_model_file('prepare_inputs_embeds');
+        add_model_file('model');
+        add_model_file('vision_encoder');
+        files.push('generation_config.json');
+    } else if (modelType === MODEL_TYPES.Chatterbox) {
+        add_model_file('embed_tokens');
+        add_model_file('speech_encoder');
+        add_model_file('model', 'language_model');
+        add_model_file('conditional_decoder');
+        files.push('generation_config.json');
+    } else if (modelType === MODEL_TYPES.AutoEncoder) {
+        add_model_file('encoder_model');
+        add_model_file('decoder_model');
+    } else if (modelType === MODEL_TYPES.Supertonic) {
+        add_model_file('text_encoder');
+        add_model_file('latent_denoiser');
+        add_model_file('voice_decoder');
+    } else {
+        // MODEL_TYPES.EncoderOnly or unknown
+        add_model_file('model', singleModelName);
+    }
+    return files;
+}

package/src/utils/model_registry/get_pipeline_files.js ADDED Viewed

@@ -0,0 +1,53 @@
+import { get_files } from './get_files.js';
+import { SUPPORTED_TASKS, TASK_ALIASES } from '../../pipelines/index.js';
+/**
+ * Extract component requirements from SUPPORTED_TASKS
+ * @private
+ * @param {string} task
+ * @returns {{tokenizer: boolean, processor: boolean}}
+ */
+function get_task_components(task) {
+    const taskConfig = SUPPORTED_TASKS[task];
+    if (!taskConfig) {
+        return null;
+    }
+    return {
+        tokenizer: !!taskConfig.tokenizer,
+        processor: !!taskConfig.processor,
+    };
+}
+/**
+ * Get all files needed for a specific pipeline task.
+ * Automatically determines which components (tokenizer, processor) are needed based on the task.
+ *
+ * @param {string} task - The pipeline task (e.g., "text-generation", "image-classification")
+ * @param {string} modelId - The model id (e.g., "Xenova/bert-base-uncased")
+ * @param {Object} [options] - Optional parameters
+ * @param {import('../../configs.js').PretrainedConfig} [options.config=null] - Pre-loaded config
+ * @param {import('../dtypes.js').DataType|Record<string, import('../dtypes.js').DataType>} [options.dtype=null] - Override dtype
+ * @param {import('../devices.js').DeviceType|Record<string, import('../devices.js').DeviceType>} [options.device=null] - Override device
+ * @param {string} [options.model_file_name=null] - Override the model file name (excluding .onnx suffix)
+ * @returns {Promise<string[]>} Array of file paths that will be loaded
+ * @throws {Error} If the task is not supported
+ */
+export async function get_pipeline_files(task, modelId, options = {}) {
+    // Apply task aliases
+    task = TASK_ALIASES[task] ?? task;
+    // Get component requirements for this task from SUPPORTED_TASKS
+    const components = get_task_components(task);
+    if (!components) {
+        throw new Error(
+            `Unsupported pipeline task: ${task}. Must be one of [${Object.keys(SUPPORTED_TASKS).join(', ')}]`,
+        );
+    }
+    // Get files with appropriate component flags
+    return get_files(modelId, {
+        ...options,
+        include_tokenizer: components.tokenizer,
+        include_processor: components.processor,
+    });
+}

package/src/utils/model_registry/get_processor_files.js ADDED Viewed

@@ -0,0 +1,20 @@
+import { IMAGE_PROCESSOR_NAME } from '../constants.js';
+import { get_file_metadata } from './get_file_metadata.js';
+/**
+ * Returns the list of processor files that will be loaded for a model.
+ * Auto-detects if the model has a processor by checking if preprocessor_config.json exists.
+ *
+ * @param {string} modelId The model id (e.g., "Xenova/detr-resnet-50")
+ * @returns {Promise<string[]>} Array of processor file names (empty if no processor)
+ */
+export async function get_processor_files(modelId) {
+    if (!modelId) {
+        throw new Error('modelId is required');
+    }
+    // Check if preprocessor_config.json exists
+    const metadata = await get_file_metadata(modelId, IMAGE_PROCESSOR_NAME, {});
+    return metadata.exists ? [IMAGE_PROCESSOR_NAME] : [];
+}

package/src/utils/model_registry/get_tokenizer_files.js ADDED Viewed

@@ -0,0 +1,21 @@
+import { get_file_metadata } from './get_file_metadata.js';
+/**
+ * Returns the list of files that will be loaded for a tokenizer.
+ * Automatically detects whether the model has tokenizer files.
+ *
+ * @param {string} modelId The model id to check for tokenizer files
+ * @returns {Promise<string[]>} An array of file names that will be loaded
+ */
+export async function get_tokenizer_files(modelId) {
+    if (!modelId) {
+        throw new Error('modelId is required for get_tokenizer_files');
+    }
+    const metadata = await get_file_metadata(modelId, 'tokenizer_config.json', {});
+    if (metadata.exists) {
+        return ['tokenizer.json', 'tokenizer_config.json'];
+    }
+    return [];
+}

package/src/utils/model_registry/is_cached.js ADDED Viewed

@@ -0,0 +1,92 @@
+import { getCache } from '../cache.js';
+import { buildResourcePaths, checkCachedResource } from '../hub.js';
+import { get_files } from './get_files.js';
+import { get_pipeline_files } from './get_pipeline_files.js';
+/**
+ * @typedef {Object} FileCacheStatus
+ * @property {string} file - The file path
+ * @property {boolean} cached - Whether the file is cached
+ */
+/**
+ * @typedef {Object} CacheCheckResult
+ * @property {boolean} allCached - Whether all files are cached
+ * @property {FileCacheStatus[]} files - Array of files with their cache status
+ */
+/**
+ * Internal helper to check cache status for a list of files
+ * @private
+ * @param {string} modelId - The model id
+ * @param {string[]} files - List of file paths to check
+ * @param {Object} options - Options including cache_dir
+ * @returns {Promise<CacheCheckResult>}
+ */
+async function check_files_cache(modelId, files, options = {}) {
+    const cache = await getCache(options?.cache_dir);
+    if (!cache) {
+        const fileStatuses = files.map((filename) => ({ file: filename, cached: false }));
+        // No cache available, all files considered not cached
+        return { allCached: false, files: fileStatuses };
+    }
+    const fileStatuses = await Promise.all(
+        files.map(async (filename) => {
+            const { localPath, proposedCacheKey } = buildResourcePaths(modelId, filename, options, cache);
+            const cached = await checkCachedResource(cache, localPath, proposedCacheKey);
+            return { file: filename, cached: !!cached };
+        }),
+    );
+    return { allCached: fileStatuses.every((f) => f.cached), files: fileStatuses };
+}
+/**
+ * Checks if all files for a given model are already cached.
+ * Automatically determines which files are needed using get_files().
+ *
+ * @param {string} modelId The model id (e.g., "Xenova/gpt2")
+ * @param {Object} [options] Optional parameters
+ * @param {string} [options.cache_dir] Custom cache directory
+ * @param {string} [options.revision] Model revision (default: 'main')
+ * @param {import('../../configs.js').PretrainedConfig} [options.config] Pre-loaded config
+ * @param {import('../dtypes.js').DataType|Record<string, import('../dtypes.js').DataType>} [options.dtype] Override dtype
+ * @param {import('../devices.js').DeviceType|Record<string, import('../devices.js').DeviceType>} [options.device] Override device
+ * @returns {Promise<CacheCheckResult>} Object with allCached boolean and files array with cache status
+ */
+export async function is_cached(modelId, options = {}) {
+    if (!modelId) {
+        throw new Error('modelId is required');
+    }
+    const files = await get_files(modelId, options);
+    return await check_files_cache(modelId, files, options);
+}
+/**
+ * Checks if all files for a specific pipeline task are already cached.
+ * Automatically determines which components are needed based on the task.
+ *
+ * @param {string} task - The pipeline task (e.g., "text-generation", "image-classification")
+ * @param {string} modelId - The model id (e.g., "Xenova/gpt2")
+ * @param {Object} [options] - Optional parameters
+ * @param {string} [options.cache_dir] - Custom cache directory
+ * @param {string} [options.revision] - Model revision (default: 'main')
+ * @param {import('../../configs.js').PretrainedConfig} [options.config] - Pre-loaded config
+ * @param {import('../dtypes.js').DataType|Record<string, import('../dtypes.js').DataType>} [options.dtype] - Override dtype
+ * @param {import('../devices.js').DeviceType|Record<string, import('../devices.js').DeviceType>} [options.device] - Override device
+ * @returns {Promise<CacheCheckResult>} Object with allCached boolean and files array with cache status
+ */
+export async function is_pipeline_cached(task, modelId, options = {}) {
+    if (!task) {
+        throw new Error('task is required');
+    }
+    if (!modelId) {
+        throw new Error('modelId is required');
+    }
+    const files = await get_pipeline_files(task, modelId, options);
+    return await check_files_cache(modelId, files, options);
+}

package/src/utils/random.js ADDED Viewed

@@ -0,0 +1,225 @@
+/**
+ * Let there be order amidst the chaos.
+ *
+ * This file implements Mersenne Twister 19937, matching Python's `random` module exactly for reproducibility.
+ *
+ * ```javascript
+ * import { random } from '@huggingface/transformers';
+ *
+ * random.seed(42);
+ * random.random();           // 0.6394267984578837  (matches Python)
+ * random.gauss(0, 1);        // normal-distributed value
+ * random.choices(['a','b'], [3, 1]);  // weighted pick
+ *
+ * const arr = [1, 2, 3, 4, 5];
+ * random.shuffle(arr);       // in-place Fisher-Yates shuffle
+ *
+ * // Use a separate instance to avoid affecting the global state:
+ * const rng = new random.Random(42);
+ * rng.random();              // 0.6394267984578837  (same seed, independent state)
+ * ```
+ *
+ * **Note on Reproducibility:**
+ * Similarly to the [Python random](https://docs.python.org/3/library/random.html#notes-on-reproducibility)
+ * module, it is useful to be able to reproduce the sequences given by a pseudo-random number generator.
+ * By reusing a seed value, the same sequence should be reproducible from run to run as long as multiple
+ * threads or asynchronous operations are not running concurrently.
+ *
+ * @module utils/random
+ */
+import { apis } from '../env.js';
+/**
+ * Mersenne Twister 19937 PRNG, matching Python's `random.Random` class exactly.
+ *
+ * Each instance has its own independent state, so seeding one instance does not
+ * affect any other instance or the global helper functions.
+ *
+ * @example
+ * const rng1 = new Random(42);
+ * const rng2 = new Random(42);
+ * rng1.random() === rng2.random(); // true (same seed, independent state)
+ */
+export class Random {
+    constructor(seed) {
+        this._mt = new Uint32Array(624);
+        this._idx = 625;
+        this._gauss_next = null;
+        this._random_fn = this.random.bind(this);
+        this.seed(seed);
+    }
+    /**
+     * Seeds this instance's PRNG.
+     *
+     * When called with a number, initializes the state deterministically from that value.
+     * When called with no arguments (or `undefined`/`null`), seeds from OS entropy
+     * via `crypto.getRandomValues`, matching Python's `random.seed()` behaviour.
+     *
+     * @param {number} [n] The seed value. Omit to seed from OS entropy.
+     */
+    seed(n) {
+        if (n === undefined || n === null) {
+            if (apis.IS_CRYPTO_AVAILABLE) {
+                const buf = new Uint32Array(1);
+                crypto.getRandomValues(buf);
+                n = buf[0];
+            } else {
+                n = Date.now() >>> 0;
+            }
+        }
+        const mt = this._mt;
+        const u = (a, b) => Math.imul(a, b) >>> 0,
+            key = [];
+        for (let v = n || 0; v > 0; v = Math.floor(v / 0x100000000)) key.push(v & 0xffffffff);
+        if (!key.length) key.push(0);
+        mt[0] = 19650218;
+        for (let k = 1; k < 624; ++k) mt[k] = (u(1812433253, mt[k - 1] ^ (mt[k - 1] >>> 30)) + k) >>> 0;
+        let i = 1,
+            j = 0;
+        for (let k = Math.max(624, key.length); k > 0; --k, ++i, ++j) {
+            if (i >= 624) {
+                mt[0] = mt[623];
+                i = 1;
+            }
+            if (j >= key.length) j = 0;
+            mt[i] = ((mt[i] ^ u(mt[i - 1] ^ (mt[i - 1] >>> 30), 1664525)) + key[j] + j) >>> 0;
+        }
+        for (let k = 623; k > 0; --k, ++i) {
+            if (i >= 624) {
+                mt[0] = mt[623];
+                i = 1;
+            }
+            mt[i] = ((mt[i] ^ u(mt[i - 1] ^ (mt[i - 1] >>> 30), 1566083941)) - i) >>> 0;
+        }
+        mt[0] = 0x80000000;
+        this._idx = 624;
+        this._gauss_next = null;
+    }
+    /**
+     * Generates a random unsigned 32-bit integer.
+     *
+     * Performs the "twist" step when the state buffer is exhausted,
+     * then applies the standard MT19937 tempering transform.
+     *
+     * @returns {number} A random integer in the range [0, 2^32 - 1].
+     */
+    _int32() {
+        const mt = this._mt;
+        if (this._idx >= 624) {
+            for (let k = 0; k < 624; ++k) {
+                // twist
+                const y = (mt[k] & 0x80000000) | (mt[(k + 1) % 624] & 0x7fffffff);
+                mt[k] = (mt[(k + 397) % 624] ^ (y >>> 1) ^ (y & 1 ? 0x9908b0df : 0)) >>> 0;
+            }
+            this._idx = 0;
+        }
+        let y = mt[this._idx++];
+        y ^= y >>> 11;
+        y ^= (y << 7) & 0x9d2c5680;
+        y ^= (y << 15) & 0xefc60000;
+        y ^= y >>> 18;
+        return y >>> 0;
+    }
+    /**
+     * Generates a random floating-point number in the half-open interval [0, 1).
+     *
+     * Combines two 32-bit integers (using 53 bits of precision) to produce
+     * a uniformly distributed double, matching Python's `random.random()`.
+     *
+     * @returns {number} A random float in [0, 1).
+     */
+    random() {
+        return ((this._int32() >>> 5) * 67108864.0 + (this._int32() >>> 6)) / 9007199254740992.0;
+    }
+    /**
+     * Generates a random number from a Gaussian (normal) distribution.
+     *
+     * Uses the Box-Muller transform with a cached spare value,
+     * matching Python's `random.gauss()` output for the same seed.
+     *
+     * @param {number} [mu=0] The mean of the distribution.
+     * @param {number} [sigma=1] The standard deviation of the distribution.
+     * @returns {number} A normally distributed random value.
+     */
+    gauss(mu = 0, sigma = 1) {
+        let z = this._gauss_next;
+        this._gauss_next = null;
+        if (z === null) {
+            const x2pi = this.random() * 2 * Math.PI,
+                g2rad = Math.sqrt(-2 * Math.log(1 - this.random()));
+            z = Math.cos(x2pi) * g2rad;
+            this._gauss_next = Math.sin(x2pi) * g2rad;
+        }
+        return mu + z * sigma;
+    }
+    /**
+     * Shuffles an array in-place using the Fisher-Yates algorithm.
+     *
+     * Uses rejection sampling via `getrandbits`-style bit masking to ensure
+     * a uniform distribution, matching Python's `random.shuffle()`.
+     *
+     * @param {any[]} arr The array to shuffle in-place.
+     */
+    shuffle(arr) {
+        for (let i = arr.length - 1; i > 0; --i) {
+            const k = 32 - Math.clz32(i + 1);
+            let r = this._int32() >>> (32 - k);
+            while (r > i) r = this._int32() >>> (32 - k);
+            const t = arr[i];
+            arr[i] = arr[r];
+            arr[r] = t;
+        }
+    }
+    /**
+     * Selects a single element from a weighted population.
+     *
+     * Matches Python's `random.choices(population, weights=weights, k=1)[0]`
+     *
+     * @param {any[]} population The array of items to choose from.
+     * @param {number[]} weights An array of non-negative weights, one per population element.
+     * @returns {*} A single randomly selected element from the population.
+     */
+    choices(population, weights) {
+        return population[_weightedIndexWith(this._random_fn, weights)];
+    }
+}
+/**
+ * Returns a random index into `weights`, where each index's probability
+ * is proportional to its weight. Uses a linear scan: O(n) time, O(1) memory.
+ *
+ * @param {() => number} randomFn A function returning a uniform random float in [0, 1).
+ * @param {ArrayLike<number>} weights Non-negative weights.
+ * @returns {number} A randomly selected index in `[0, weights.length)`.
+ */
+function _weightedIndexWith(randomFn, weights) {
+    let sum = 0;
+    for (let i = 0; i < weights.length; ++i) sum += weights[i];
+    let x = randomFn() * sum;
+    for (let i = 0; i < weights.length; ++i) {
+        x -= weights[i];
+        if (x < 0) return i;
+    }
+    return weights.length - 1; // floating-point guard
+}
+// Global default instance: mirrors the module-level functions in Python's `random` module.
+const _default = new Random();
+export const random = Object.freeze({
+    Random,
+    seed: _default.seed.bind(_default),
+    random: _default.random.bind(_default),
+    gauss: _default.gauss.bind(_default),
+    shuffle: _default.shuffle.bind(_default),
+    choices: _default.choices.bind(_default),
+});
+// Private helper function, used by LogitsSampler, but not exported as part of the public API.
+export const _weightedIndex = (weights) => _weightedIndexWith(random.random, weights);

package/src/utils/tensor.js CHANGED Viewed

@@ -15,6 +15,8 @@ import { TensorOpRegistry } from '../ops/registry.js';
 import { DataTypeMap } from './dtypes.js';
+import { random } from './random.js';
 /**
  * @typedef {keyof typeof DataTypeMap} DataType
  * @typedef {import('./maths.js').AnyTypedArray | any[]} DataArray
@@ -1591,7 +1593,7 @@ export function rand(size) {
     const length = size.reduce((a, b) => a * b, 1);
     return new Tensor(
         'float32',
-        Float32Array.from({ length }, () => Math.random()),
+        Float32Array.from({ length }, () => random.random()),
         size,
     );
 }
@@ -1603,26 +1605,11 @@ export function rand(size) {
  */
 export function randn(size) {
     const length = size.reduce((a, b) => a * b, 1);
-    const data = new Float32Array(length);
-    for (let i = 0; i < length; i += 2) {
-        // Box-Muller transform
-        const u = 1 - Math.random(); // Avoids log(0)
-        const v = Math.random();
-        const mag = Math.sqrt(-2.0 * Math.log(u));
-        const angle = 2.0 * Math.PI * v;
-        // Assign the first value
-        data[i] = mag * Math.cos(angle);
-        // Assign the second value (if valid index)
-        if (i + 1 < length) {
-            data[i + 1] = mag * Math.sin(angle);
-        }
-    }
-    return new Tensor('float32', data, size);
+    return new Tensor(
+        'float32',
+        Float32Array.from({ length }, () => random.gauss()),
+        size,
+    );
 }
 /**

package/src/utils/video.js CHANGED Viewed

@@ -1,5 +1,5 @@
 import { RawImage } from './image.js';
-import { apis } from '../env.js';
+import { env, apis } from '../env.js';
 export class RawVideoFrame {
     /**
@@ -79,7 +79,7 @@ export async function load_video(src, { num_frames = null, fps = null } = {}) {
     if (video.seekable.start(0) === video.seekable.end(0)) {
         // Fallback: Download entire video if not seekable
-        const response = await fetch(video.src);
+        const response = await env.fetch(video.src);
         const blob = await response.blob();
         video.src = URL.createObjectURL(blob);
         await new Promise((resolve) => (video.onloadedmetadata = resolve));

package/types/backends/onnx.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"onnx.d.ts","sourceRoot":"","sources":["../../src/backends/onnx.js"],"names":[],"mappings":"~~AAkHA~~;;;;GAIG;AACH,oDAHW,OAAO,qBAAqB,EAAE,UAAU,GAAC,MAAM,GAAC,IAAI,GAClD,sBAAsB,EAAE,CAmBpC;AAoFD;;;;;;GAMG;AACH,uDALW,UAAU,GAAC,MAAM,mBACjB,OAAO,oBAAoB,EAAE,gBAAgB,CAAC,cAAc,wBAE1D,OAAO,CAAC,OAAO,oBAAoB,EAAE,gBAAgB,GAAG;IAAE,MAAM,MAAQ;CAAE,CAAC,~~CAavF~~;AASD;;;;;GAKG;AACH,6CAJW,OAAO,oBAAoB,EAAE,gBAAgB,WAC7C,MAAM,CAAC,MAAM,EAAE,OAAO,oBAAoB,EAAE,MAAM,CAAC,GACjD,OAAO,CAAC,MAAM,CAAC,MAAM,EAAE,OAAO,oBAAoB,EAAE,MAAM,CAAC,CAAC,CAMxE;AAED;;;;GAIG;AACH,gCAHW,GAAG,GACD,OAAO,CAInB;~~AAsCD~~;;;GAGG;AACH,+BAFa,OAAO,CAKnB;;~~qCA3RY~~,OAAO,oBAAoB,EAAE,gBAAgB,CAAC,uBAAuB"}
1	+ {"version":3,"file":"onnx.d.ts","sourceRoot":"","sources":["../../src/backends/onnx.js"],"names":[],"mappings":"AA0JA;;;;GAIG;AACH,oDAHW,OAAO,qBAAqB,EAAE,UAAU,GAAC,MAAM,GAAC,IAAI,GAClD,sBAAsB,EAAE,CAmBpC;AAoFD;;;;;;GAMG;AACH,uDALW,UAAU,GAAC,MAAM,mBACjB,OAAO,oBAAoB,EAAE,gBAAgB,CAAC,cAAc,wBAE1D,OAAO,CAAC,OAAO,oBAAoB,EAAE,gBAAgB,GAAG;IAAE,MAAM,MAAQ;CAAE,CAAC,CAcvF;AASD;;;;;GAKG;AACH,6CAJW,OAAO,oBAAoB,EAAE,gBAAgB,WAC7C,MAAM,CAAC,MAAM,EAAE,OAAO,oBAAoB,EAAE,MAAM,CAAC,GACjD,OAAO,CAAC,MAAM,CAAC,MAAM,EAAE,OAAO,oBAAoB,EAAE,MAAM,CAAC,CAAC,CAMxE;AAED;;;;GAIG;AACH,gCAHW,GAAG,GACD,OAAO,CAInB;AAqCD;;;GAGG;AACH,+BAFa,OAAO,CAKnB;;qCAlUY,OAAO,oBAAoB,EAAE,gBAAgB,CAAC,uBAAuB"}

package/types/backends/utils/cacheWasm.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"cacheWasm.d.ts","sourceRoot":"","sources":["../../../src/backends/utils/cacheWasm.js"],"names":[],"mappings":"~~AA8CA~~;;;;GAIG;AAEH,wCAJW,MAAM,GACJ,OAAO,CAAC,WAAW,GAAC,IAAI,CAAC,CAarC;AAED;;;;GAIG;AACH,wCAHW,MAAM,GACJ,OAAO,CAAC,MAAM,GAAC,IAAI,CAAC,~~CAiBhC~~;AAED;;;;;GAKG;AACH,+BAHW,MAAM,GACJ,OAAO,CAInB;AAED;;;;;;GAMG;AACH,mCAHW,MAAM,GACJ,MAAM,CAiBlB"}
1	+ {"version":3,"file":"cacheWasm.d.ts","sourceRoot":"","sources":["../../../src/backends/utils/cacheWasm.js"],"names":[],"mappings":"AAgDA;;;;GAIG;AAEH,wCAJW,MAAM,GACJ,OAAO,CAAC,WAAW,GAAC,IAAI,CAAC,CAarC;AAED;;;;GAIG;AACH,wCAHW,MAAM,GACJ,OAAO,CAAC,MAAM,GAAC,IAAI,CAAC,CAkBhC;AAED;;;;;GAKG;AACH,+BAHW,MAAM,GACJ,OAAO,CAInB;AAED;;;;;;GAMG;AACH,mCAHW,MAAM,GACJ,MAAM,CAiBlB"}