npm - @huggingface/transformers - Versions diffs - 3.0.2 → 3.1.1 - Mend

@huggingface/transformers 3.0.2 → 3.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (258) hide show

package/src/base/processing_utils.js ADDED Viewed

@@ -0,0 +1,145 @@
+/**
+ * @file Processors are used to prepare inputs (e.g., text, image or audio) for a model.
+ *
+ * **Example:** Using a `WhisperProcessor` to prepare an audio input for a model.
+ * ```javascript
+ * import { AutoProcessor, read_audio } from '@huggingface/transformers';
+ *
+ * const processor = await AutoProcessor.from_pretrained('openai/whisper-tiny.en');
+ * const audio = await read_audio('https://huggingface.co/datasets/Narsil/asr_dummy/resolve/main/mlk.flac', 16000);
+ * const { input_features } = await processor(audio);
+ * // Tensor {
+ * //   data: Float32Array(240000) [0.4752984642982483, 0.5597258806228638, 0.56434166431427, ...],
+ * //   dims: [1, 80, 3000],
+ * //   type: 'float32',
+ * //   size: 240000,
+ * // }
+ * ```
+ *
+ * @module processors
+ */
+import { PROCESSOR_NAME } from '../utils/constants.js';
+import {
+    Callable,
+} from '../utils/generic.js';
+import { getModelJSON } from '../utils/hub.js';
+/**
+ * @typedef {Object} ProcessorProperties Additional processor-specific properties.
+ * @typedef {import('../utils/hub.js').PretrainedOptions & ProcessorProperties} PretrainedProcessorOptions
+ */
+/**
+ * Represents a Processor that extracts features from an input.
+ */
+export class Processor extends Callable {
+    static classes = [
+        'image_processor_class',
+        'tokenizer_class',
+        'feature_extractor_class',
+    ]
+    static uses_processor_config = false;
+    /**
+     * Creates a new Processor with the given components
+     * @param {Object} config
+     * @param {Record<string, Object>} components
+     */
+    constructor(config, components) {
+        super();
+        this.config = config;
+        this.components = components;
+    }
+    /**
+     * @returns {import('./image_processors_utils.js').ImageProcessor|undefined} The image processor of the processor, if it exists.
+     */
+    get image_processor() {
+        return this.components.image_processor;
+    }
+    /**
+     * @returns {import('../tokenizers.js').PreTrainedTokenizer|undefined} The tokenizer of the processor, if it exists.
+     */
+    get tokenizer() {
+        return this.components.tokenizer;
+    }
+    /**
+     * @returns {import('./feature_extraction_utils.js').FeatureExtractor|undefined} The feature extractor of the processor, if it exists.
+     */
+    get feature_extractor() {
+        return this.components.feature_extractor;
+    }
+    apply_chat_template(messages, options = {}) {
+        if (!this.tokenizer) {
+            throw new Error('Unable to apply chat template without a tokenizer.');
+        }
+        return this.tokenizer.apply_chat_template(messages, {
+            tokenize: false, // default to false
+            ...options,
+        });
+    }
+    batch_decode(...args) {
+        if (!this.tokenizer) {
+            throw new Error('Unable to decode without a tokenizer.');
+        }
+        return this.tokenizer.batch_decode(...args);
+    }
+    /**
+     * Calls the feature_extractor function with the given input.
+     * @param {any} input The input to extract features from.
+     * @param {...any} args Additional arguments.
+     * @returns {Promise<any>} A Promise that resolves with the extracted features.
+     */
+    async _call(input, ...args) {
+        for (const item of [this.image_processor, this.feature_extractor, this.tokenizer]) {
+            if (item) {
+                return item(input, ...args);
+            }
+        }
+        throw new Error('No image processor, feature extractor, or tokenizer found.');
+    }
+    /**
+     * Instantiate one of the processor classes of the library from a pretrained model.
+     *
+     * The processor class to instantiate is selected based on the `feature_extractor_type` property of the config object
+     * (either passed as an argument or loaded from `pretrained_model_name_or_path` if possible)
+     *
+     * @param {string} pretrained_model_name_or_path The name or path of the pretrained model. Can be either:
+     * - A string, the *model id* of a pretrained processor hosted inside a model repo on huggingface.co.
+     *   Valid model ids can be located at the root-level, like `bert-base-uncased`, or namespaced under a
+     *   user or organization name, like `dbmdz/bert-base-german-cased`.
+     * - A path to a *directory* containing processor files, e.g., `./my_model_directory/`.
+     * @param {PretrainedProcessorOptions} options Additional options for loading the processor.
+     *
+     * @returns {Promise<Processor>} A new instance of the Processor class.
+     */
+    static async from_pretrained(pretrained_model_name_or_path, options) {
+        const [config, components] = await Promise.all([
+            // TODO:
+            this.uses_processor_config
+                ? getModelJSON(pretrained_model_name_or_path, PROCESSOR_NAME, true, options)
+                : {},
+            Promise.all(
+                this.classes
+                    .filter((cls) => cls in this)
+                    .map(async (cls) => {
+                        const component = await this[cls].from_pretrained(pretrained_model_name_or_path, options);
+                        return [cls.replace(/_class$/, ''), component];
+                    })
+            ).then(Object.fromEntries)
+        ]);
+        return new this(config, components);
+    }
+}

package/src/configs.js CHANGED Viewed

@@ -36,6 +36,13 @@ import {
  * @typedef {import('./utils/hub.js').PretrainedOptions} PretrainedOptions
  */
+/**
+ * @typedef {import('./utils/core.js').ProgressCallback} ProgressCallback
+ */
+/**
+ * @typedef {import('./utils/core.js').ProgressInfo} ProgressInfo
+ */
 /**
  * Loads a config from the specified path.
@@ -61,6 +68,8 @@ function getNormalizedConfig(config) {
         case 'llava':
         case 'paligemma':
         case 'florence2':
+        case 'llava_onevision':
+        case 'idefics3':
             init_normalized_config = getNormalizedConfig(config.text_config);
             break;
         case 'moondream1':
@@ -69,6 +78,9 @@ function getNormalizedConfig(config) {
         case 'musicgen':
             init_normalized_config = getNormalizedConfig(config.decoder);
             break;
+        case 'multi_modality':
+            init_normalized_config = getNormalizedConfig(config.language_config);
+            break;
         // Decoder-only models
         case 'gpt2':
@@ -98,6 +110,7 @@ function getNormalizedConfig(config) {
         case 'mistral':
         case 'starcoder2':
         case 'qwen2':
+        case 'qwen2_vl':
             mapping['num_heads'] = 'num_key_value_heads';
             mapping['num_layers'] = 'num_hidden_layers';
             mapping['hidden_size'] = 'hidden_size';
@@ -218,14 +231,12 @@ function getNormalizedConfig(config) {
  */
 export function getKeyValueShapes(config, {
     prefix = 'past_key_values',
+    batch_size=1,
 } = {}) {
     /** @type {Record<string, number[]>} */
     const decoderFeeds = {};
     const normalized_config = config.normalized_config;
-    // TODO support batches (i.e., batch_size > 1)
-    const batch_size = 1;
     if (normalized_config.is_encoder_decoder && (
         'num_encoder_heads' in normalized_config && 'num_decoder_heads' in normalized_config
     )) {
@@ -372,6 +383,6 @@ export class AutoConfig {
  * See https://onnxruntime.ai/docs/tutorials/web/env-flags-and-session-options.html#freedimensionoverrides
  * for more information.
  * @property {import('./utils/devices.js').DeviceType} [device] The default device to use for the model.
- * @property {import('./utils/dtypes.js').DataType} [dtype] The default data type to use for the model.
+ * @property {import('./utils/dtypes.js').DataType|Record<string, import('./utils/dtypes.js').DataType>} [dtype] The default data type to use for the model.
  * @property {boolean|Record<string, boolean>} [use_external_data_format=false] Whether to load the model using the external data format (used for models >= 2GB in size).
  */

package/src/env.js CHANGED Viewed

@@ -26,12 +26,12 @@ import fs from 'fs';
 import path from 'path';
 import url from 'url';
-const VERSION = '3.0.2';
+const VERSION = '3.1.1';
 // Check if various APIs are available (depends on environment)
-const IS_BROWSER_ENV = typeof self !== 'undefined';
-const IS_WEBWORKER_ENV = IS_BROWSER_ENV && self.constructor.name === 'DedicatedWorkerGlobalScope';
-const IS_WEB_CACHE_AVAILABLE = IS_BROWSER_ENV && 'caches' in self;
+const IS_BROWSER_ENV = typeof window !== "undefined" && typeof window.document !== "undefined";
+const IS_WEBWORKER_ENV = typeof self !== "undefined"  && self.constructor?.name === 'DedicatedWorkerGlobalScope';
+const IS_WEB_CACHE_AVAILABLE = typeof self !== "undefined" && 'caches' in self;
 const IS_WEBGPU_AVAILABLE = typeof navigator !== 'undefined' && 'gpu' in navigator;
 const IS_WEBNN_AVAILABLE = typeof navigator !== 'undefined' && 'ml' in navigator;
@@ -44,7 +44,7 @@ const IS_PATH_AVAILABLE = !isEmpty(path);
  * A read-only object containing information about the APIs available in the current environment.
  */
 export const apis = Object.freeze({
-    /** Whether we are running in a browser environment */
+    /** Whether we are running in a browser environment (and not a web worker) */
     IS_BROWSER_ENV,
     /** Whether we are running in a web worker environment */
@@ -137,7 +137,7 @@ export const env = {
     remoteHost: 'https://huggingface.co/',
     remotePathTemplate: '{model}/resolve/{revision}/',
-    allowLocalModels: !IS_BROWSER_ENV,
+    allowLocalModels: !(IS_BROWSER_ENV || IS_WEBWORKER_ENV),
     localModelPath: localModelPath,
     useFS: IS_FS_AVAILABLE,

package/src/generation/configuration_utils.js CHANGED Viewed

@@ -259,6 +259,13 @@ export class GenerationConfig {
      */
     suppress_tokens = null;
+    /**
+     * A streamer that will be used to stream the generation.
+     * @type {import('./streamers.js').TextStreamer}
+     * @default null
+     */
+    streamer = null;
     /**
      * A list of tokens that will be suppressed at the beginning of the generation.
      * The `SuppressBeginTokens` logit processor will set their log probs to `-inf` so that they are not sampled.

package/src/generation/logits_process.js CHANGED Viewed

@@ -151,7 +151,7 @@ export class ForcedBOSTokenLogitsProcessor extends LogitsProcessor {
      * Apply the BOS token forcing to the logits.
      * @param {bigint[][]} input_ids The input IDs.
      * @param {Tensor} logits The logits.
-     * @returns {Object} The logits with BOS token forcing.
+     * @returns {Tensor} The logits with BOS token forcing.
      */
     _call(input_ids, logits) {
         for (let i = 0; i < input_ids.length; ++i) {
@@ -221,7 +221,7 @@ export class SuppressTokensAtBeginLogitsProcessor extends LogitsProcessor {
      * Apply the BOS token forcing to the logits.
      * @param {bigint[][]} input_ids The input IDs.
      * @param {Tensor} logits The logits.
-     * @returns {Object} The logits with BOS token forcing.
+     * @returns {Tensor} The logits with BOS token forcing.
      */
     _call(input_ids, logits) {
         for (let i = 0; i < input_ids.length; ++i) {
@@ -391,7 +391,7 @@ export class NoRepeatNGramLogitsProcessor extends LogitsProcessor {
      * Apply the no-repeat-ngram processor to the logits.
      * @param {bigint[][]} input_ids The input IDs.
      * @param {Tensor} logits The logits.
-     * @returns {Object} The logits with no-repeat-ngram processing.
+     * @returns {Tensor} The logits with no-repeat-ngram processing.
      */
     _call(input_ids, logits) {
         for (let i = 0; i < input_ids.length; ++i) {
@@ -406,12 +406,22 @@ export class NoRepeatNGramLogitsProcessor extends LogitsProcessor {
 }
 /**
- * A logits processor that penalises repeated output tokens.
+ * A logits processor that prevents the repetition of previous tokens through a penalty.
+ * This penalty is applied at most once per token. Note that, for decoder-only models like most LLMs,
+ * the considered tokens include the prompt.
+ *
+ * In the original [paper](https://arxiv.org/pdf/1909.05858.pdf), the authors suggest the use of a
+ * penalty of around 1.2 to achieve a good balance between truthful generation and lack of repetition.
+ * To penalize and reduce repetition, use `penalty` values above 1.0, where a higher value penalizes
+ * more strongly. To reward and encourage repetition, use `penalty` values between 0.0 and 1.0, where
+ * a lower value rewards more strongly.
  */
 export class RepetitionPenaltyLogitsProcessor extends LogitsProcessor {
     /**
      * Create a RepetitionPenaltyLogitsProcessor.
-     * @param {number} penalty The penalty to apply for repeated tokens.
+     * @param {number} penalty The parameter for repetition penalty.
+     * - 1.0 means no penalty. Above 1.0 penalizes previously generated tokens.
+     * - Between 0.0 and 1.0 rewards previously generated tokens.
      */
     constructor(penalty) {
         super();
@@ -422,16 +432,12 @@ export class RepetitionPenaltyLogitsProcessor extends LogitsProcessor {
      * Apply the repetition penalty to the logits.
      * @param {bigint[][]} input_ids The input IDs.
      * @param {Tensor} logits The logits.
-     * @returns {Object} The logits with repetition penalty processing.
+     * @returns {Tensor} The logits with repetition penalty processing.
      */
     _call(input_ids, logits) {
-        // Modify the logits corresponding to each element in `input_ids`.
-        // As a consequence, the logits corresponding to tokens that appear
-        // many times in the output will be penalised more.
         for (let i = 0; i < input_ids.length; ++i) {
             const batch_logits_data = /** @type {Float32Array} */(logits[i].data);
-            for (const input_id of input_ids[i]) {
+            for (const input_id of new Set(input_ids[i])) {
                 const token = Number(input_id);
                 if (batch_logits_data[token] < 0) {
                     batch_logits_data[token] *= this.penalty;
@@ -464,7 +470,7 @@ export class MinLengthLogitsProcessor extends LogitsProcessor {
      * Apply logit processor.
      * @param {bigint[][]} input_ids The input IDs.
      * @param {Tensor} logits The logits.
-     * @returns {Object} The processed logits.
+     * @returns {Tensor} The processed logits.
      */
     _call(input_ids, logits) {
         for (let i = 0; i < input_ids.length; ++i) {
@@ -502,7 +508,7 @@ export class MinNewTokensLengthLogitsProcessor extends LogitsProcessor {
      * Apply logit processor.
      * @param {bigint[][]} input_ids The input IDs.
      * @param {Tensor} logits The logits.
-     * @returns {Object} The processed logits.
+     * @returns {Tensor} The processed logits.
      */
     _call(input_ids, logits) {
         for (let i = 0; i < input_ids.length; ++i) {
@@ -535,7 +541,7 @@ export class NoBadWordsLogitsProcessor extends LogitsProcessor {
      * Apply logit processor.
      * @param {bigint[][]} input_ids The input IDs.
      * @param {Tensor} logits The logits.
-     * @returns {Object} The processed logits.
+     * @returns {Tensor} The processed logits.
      */
     _call(input_ids, logits) {
         for (let i = 0; i < input_ids.length; ++i) {
@@ -596,7 +602,7 @@ export class ClassifierFreeGuidanceLogitsProcessor extends LogitsProcessor {
      * Apply logit processor.
      * @param {bigint[][]} input_ids The input IDs.
      * @param {Tensor} logits The logits.
-     * @returns {Object} The processed logits.
+     * @returns {Tensor} The processed logits.
      */
     _call(input_ids, logits) {
         if (logits.dims[0] !== 2 * input_ids.length) {
@@ -650,7 +656,7 @@ export class TemperatureLogitsWarper extends LogitsWarper {
      * Apply logit warper.
      * @param {bigint[][]} input_ids The input IDs.
      * @param {Tensor} logits The logits.
-     * @returns {Object} The processed logits.
+     * @returns {Tensor} The processed logits.
      */
     _call(input_ids, logits) {
         const batch_logits_data = /** @type {Float32Array} */(logits.data);

package/src/generation/streamers.js CHANGED Viewed

@@ -34,7 +34,12 @@ const stdout_write = apis.IS_PROCESS_AVAILABLE
 export class TextStreamer extends BaseStreamer {
     /**
      *
-     * @param {import('../tokenizers.js').PreTrainedTokenizer} tokenizer
+     * @param {import('../tokenizers.js').PreTrainedTokenizer} tokenizer
+     * @param {Object} options
+     * @param {boolean} [options.skip_prompt=false] Whether to skip the prompt tokens
+     * @param {function(string): void} [options.callback_function=null] Function to call when a piece of text is ready to display
+     * @param {function(bigint[]): void} [options.token_callback_function=null] Function to call when a new token is generated
+     * @param {Object} [options.decode_kwargs={}] Additional keyword arguments to pass to the tokenizer's decode method
      */
     constructor(tokenizer, {
         skip_prompt = false,
@@ -143,7 +148,7 @@ export class WhisperTextStreamer extends TextStreamer {
      * @param {Object} options
      * @param {boolean} [options.skip_prompt=false] Whether to skip the prompt tokens
      * @param {function(string): void} [options.callback_function=null] Function to call when a piece of text is ready to display
-     * @param {function(string): void} [options.token_callback_function=null] Function to call when a new token is generated
+     * @param {function(bigint[]): void} [options.token_callback_function=null] Function to call when a new token is generated
      * @param {function(number): void} [options.on_chunk_start=null] Function to call when a new chunk starts
      * @param {function(number): void} [options.on_chunk_end=null] Function to call when a chunk ends
      * @param {function(): void} [options.on_finalize=null] Function to call when the stream is finalized

package/src/models/audio_spectrogram_transformer/feature_extraction_audio_spectrogram_transformer.js ADDED Viewed

@@ -0,0 +1,90 @@
+import { FeatureExtractor, validate_audio_inputs } from '../../base/feature_extraction_utils.js';
+import { Tensor } from '../../utils/tensor.js';
+import { mel_filter_bank, spectrogram, window_function } from '../../utils/audio.js';
+export class ASTFeatureExtractor extends FeatureExtractor {
+    constructor(config) {
+        super(config);
+        const sampling_rate = this.config.sampling_rate;
+        const mel_filters = mel_filter_bank(
+            256, // num_frequency_bins
+            this.config.num_mel_bins, // num_mel_filters
+            20, // min_frequency
+            Math.floor(sampling_rate / 2), // max_frequency
+            sampling_rate, // sampling_rate
+            null, // norm
+            "kaldi", // mel_scale
+            true, // triangularize_in_mel_space
+        );
+        // Do padding:
+        for (let i = 0; i < mel_filters.length; ++i) {
+            mel_filters[i].push(0);
+        }
+        this.mel_filters = mel_filters;
+        this.window = window_function(400, 'hann', {
+            periodic: false,
+        })
+        this.mean = this.config.mean;
+        this.std = this.config.std;
+    }
+    /**
+     * Computes the log-Mel spectrogram of the provided audio waveform.
+     * @param {Float32Array|Float64Array} waveform The audio waveform to process.
+     * @param {number} max_length The maximum number of frames to return.
+     * @returns {Promise<Tensor>} An object containing the log-Mel spectrogram data as a Float32Array and its dimensions as an array of numbers.
+     */
+    async _extract_fbank_features(waveform, max_length) {
+        // NOTE: We don't pad/truncate since that is passed in as `max_num_frames`
+        return spectrogram(
+            waveform,
+            this.window, // window
+            400, // frame_length
+            160, // hop_length
+            {
+                fft_length: 512,
+                power: 2.0,
+                center: false,
+                preemphasis: 0.97,
+                mel_filters: this.mel_filters,
+                log_mel: 'log',
+                mel_floor: 1.192092955078125e-07,
+                remove_dc_offset: true,
+                // Custom
+                max_num_frames: max_length,
+                transpose: true,
+            }
+        )
+    }
+    /**
+     * Asynchronously extracts features from a given audio using the provided configuration.
+     * @param {Float32Array|Float64Array} audio The audio data as a Float32Array/Float64Array.
+     * @returns {Promise<{ input_values: Tensor }>} A Promise resolving to an object containing the extracted input features as a Tensor.
+     */
+    async _call(audio) {
+        validate_audio_inputs(audio, 'ASTFeatureExtractor');
+        const features = await this._extract_fbank_features(audio, this.config.max_length);
+        if (this.config.do_normalize) {
+            // Normalize the input audio spectrogram to have mean=0, std=0.5
+            const denom = this.std * 2;
+            const features_data = features.data;
+            for (let i = 0; i < features_data.length; ++i) {
+                features_data[i] = (features_data[i] - this.mean) / denom;
+            }
+        }
+        return {
+            input_values: features.unsqueeze_(0)
+        };
+    }
+}

package/src/models/auto/feature_extraction_auto.js ADDED Viewed

@@ -0,0 +1,41 @@
+import { FEATURE_EXTRACTOR_NAME, GITHUB_ISSUE_URL } from '../../utils/constants.js';
+import { getModelJSON } from '../../utils/hub.js';
+import { FeatureExtractor } from '../../base/feature_extraction_utils.js';
+import * as AllFeatureExtractors from '../feature_extractors.js';
+export class AutoFeatureExtractor {
+    /**
+     * Instantiate one of the feature extractor classes of the library from a pretrained model.
+     *
+     * The processor class to instantiate is selected based on the `feature_extractor_type` property of
+     * the config object (either passed as an argument or loaded from `pretrained_model_name_or_path` if possible)
+     *
+     * @param {string} pretrained_model_name_or_path The name or path of the pretrained model. Can be either:
+     * - A string, the *model id* of a pretrained processor hosted inside a model repo on huggingface.co.
+     *   Valid model ids can be located at the root-level, like `bert-base-uncased`, or namespaced under a
+     *   user or organization name, like `dbmdz/bert-base-german-cased`.
+     * - A path to a *directory* containing processor files, e.g., `./my_model_directory/`.
+     * @param {import('../../utils/hub.js').PretrainedOptions} options Additional options for loading the processor.
+     *
+     * @returns {Promise<AllFeatureExtractors.ImageProcessor>} A new instance of the Processor class.
+     */
+    /** @type {typeof FeatureExtractor.from_pretrained} */
+    static async from_pretrained(pretrained_model_name_or_path, options={}) {
+        const preprocessorConfig = await getModelJSON(pretrained_model_name_or_path, FEATURE_EXTRACTOR_NAME, true, options);
+        // Determine feature extractor class
+        const key = preprocessorConfig.feature_extractor_type;
+        const feature_extractor_class = AllFeatureExtractors[key];
+        if (!feature_extractor_class) {
+            throw new Error(`Unknown feature_extractor_type: '${key}'. Please report this at ${GITHUB_ISSUE_URL}.`);
+        }
+        // Instantiate feature extractor
+        return new feature_extractor_class(preprocessorConfig);
+    }
+}

package/src/models/auto/image_processing_auto.js ADDED Viewed

@@ -0,0 +1,29 @@
+import { GITHUB_ISSUE_URL, IMAGE_PROCESSOR_NAME } from '../../utils/constants.js';
+import { getModelJSON } from '../../utils/hub.js';
+import { ImageProcessor } from '../../base/image_processors_utils.js';
+import * as AllImageProcessors from '../image_processors.js';
+export class AutoImageProcessor {
+    /** @type {typeof ImageProcessor.from_pretrained} */
+    static async from_pretrained(pretrained_model_name_or_path, options={}) {
+        const preprocessorConfig = await getModelJSON(pretrained_model_name_or_path, IMAGE_PROCESSOR_NAME, true, options);
+        // Determine image processor class
+        const key = preprocessorConfig.image_processor_type ?? preprocessorConfig.feature_extractor_type;
+        let image_processor_class = AllImageProcessors[key];
+        if (!image_processor_class) {
+            if (key !== undefined) {
+                // Only log a warning if the class is not found and the key is set.
+                console.warn(`Image processor type '${key}' not found, assuming base ImageProcessor. Please report this at ${GITHUB_ISSUE_URL}.`)
+            }
+            image_processor_class = ImageProcessor;
+        }
+        // Instantiate image processor
+        return new image_processor_class(preprocessorConfig);
+    }
+}

package/src/models/auto/processing_auto.js ADDED Viewed

@@ -0,0 +1,100 @@
+import { IMAGE_PROCESSOR_NAME } from '../../utils/constants.js';
+import { getModelJSON } from '../../utils/hub.js';
+import { Processor } from '../../base/processing_utils.js';
+import * as AllProcessors from '../processors.js';
+import * as AllImageProcessors from '../image_processors.js';
+import * as AllFeatureExtractors from '../feature_extractors.js';
+/**
+ * Helper class which is used to instantiate pretrained processors with the `from_pretrained` function.
+ * The chosen processor class is determined by the type specified in the processor config.
+ *
+ * **Example:** Load a processor using `from_pretrained`.
+ * ```javascript
+ * let processor = await AutoProcessor.from_pretrained('openai/whisper-tiny.en');
+ * ```
+ *
+ * **Example:** Run an image through a processor.
+ * ```javascript
+ * let processor = await AutoProcessor.from_pretrained('Xenova/clip-vit-base-patch16');
+ * let image = await RawImage.read('https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/football-match.jpg');
+ * let image_inputs = await processor(image);
+ * // {
+ * //   "pixel_values": {
+ * //     "dims": [ 1, 3, 224, 224 ],
+ * //     "type": "float32",
+ * //     "data": Float32Array [ -1.558687686920166, -1.558687686920166, -1.5440893173217773, ... ],
+ * //     "size": 150528
+ * //   },
+ * //   "original_sizes": [
+ * //     [ 533, 800 ]
+ * //   ],
+ * //   "reshaped_input_sizes": [
+ * //     [ 224, 224 ]
+ * //   ]
+ * // }
+ * ```
+ */
+export class AutoProcessor {
+    /**
+     * Instantiate one of the processor classes of the library from a pretrained model.
+     *
+     * The processor class to instantiate is selected based on the `image_processor_type` (or `feature_extractor_type`; legacy)
+     * property of the config object (either passed as an argument or loaded from `pretrained_model_name_or_path` if possible)
+     *
+     * @param {string} pretrained_model_name_or_path The name or path of the pretrained model. Can be either:
+     * - A string, the *model id* of a pretrained processor hosted inside a model repo on huggingface.co.
+     *   Valid model ids can be located at the root-level, like `bert-base-uncased`, or namespaced under a
+     *   user or organization name, like `dbmdz/bert-base-german-cased`.
+     * - A path to a *directory* containing processor files, e.g., `./my_model_directory/`.
+     * @param {import('../../utils/hub.js').PretrainedOptions} options Additional options for loading the processor.
+     *
+     * @returns {Promise<Processor>} A new instance of the Processor class.
+     */
+    /** @type {typeof Processor.from_pretrained} */
+    static async from_pretrained(pretrained_model_name_or_path, options={}) {
+        // TODO: first check for processor.json
+        const preprocessorConfig = await getModelJSON(pretrained_model_name_or_path, IMAGE_PROCESSOR_NAME, true, options);
+        const { image_processor_type, feature_extractor_type, processor_class } = preprocessorConfig;
+        if (processor_class && AllProcessors[processor_class]) {
+            return AllProcessors[processor_class].from_pretrained(pretrained_model_name_or_path, options);
+        }
+        if (!image_processor_type && !feature_extractor_type) {
+            throw new Error('No `image_processor_type` or `feature_extractor_type` found in the config.');
+        }
+        const components = {};
+        if (image_processor_type) {
+            const image_processor_class = AllImageProcessors[image_processor_type];
+            if (!image_processor_class) {
+                throw new Error(`Unknown image_processor_type: '${image_processor_type}'.`);
+            }
+            components.image_processor = new image_processor_class(preprocessorConfig);
+        }
+        if (feature_extractor_type) {
+            const image_processor_class = AllImageProcessors[feature_extractor_type];
+            if (image_processor_class) {
+                // Handle legacy case where image processors were specified as feature extractors
+                components.image_processor = new image_processor_class(preprocessorConfig);
+            } else {
+                const feature_extractor_class = AllFeatureExtractors[feature_extractor_type];
+                if (!feature_extractor_class) {
+                    throw new Error(`Unknown feature_extractor_type: '${feature_extractor_type}'.`);
+                }
+                components.feature_extractor = new feature_extractor_class(preprocessorConfig);
+            }
+        }
+        const config = {};
+        return new Processor(config, components);
+    }
+}

package/src/models/beit/image_processing_beit.js ADDED Viewed

@@ -0,0 +1,5 @@
+import {
+    ImageProcessor,
+} from "../../base/image_processors_utils.js";
+export class BeitFeatureExtractor extends ImageProcessor { }

package/src/models/bit/image_processing_bit.js ADDED Viewed

@@ -0,0 +1,5 @@
+import {
+    ImageProcessor,
+} from "../../base/image_processors_utils.js";
+export class BitImageProcessor extends ImageProcessor { }