npm - @huggingface/transformers - Versions diffs - 3.3.3 → 3.4.0 - Mend

@huggingface/transformers 3.3.3 → 3.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (89) hide show

package/README.md +9 -3
package/dist/ort-wasm-simd-threaded.jsep.mjs +124 -115
package/dist/ort-wasm-simd-threaded.jsep.wasm +0 -0
package/dist/transformers.js +2480 -1457
package/dist/transformers.js.map +1 -1
package/dist/transformers.min.js +1 -1
package/dist/transformers.min.js.map +1 -1
package/dist/{transformers.cjs → transformers.node.cjs} +1412 -2395
package/dist/transformers.node.cjs.map +1 -0
package/dist/transformers.node.min.cjs +2 -0
package/dist/transformers.node.min.cjs.map +1 -0
package/dist/transformers.node.min.mjs +2 -0
package/dist/transformers.node.min.mjs.map +1 -0
package/dist/{transformers.mjs → transformers.node.mjs} +1440 -2375
package/dist/transformers.node.mjs.map +1 -0
package/dist/transformers.web.js +35713 -0
package/dist/transformers.web.js.map +1 -0
package/dist/transformers.web.min.js +2 -0
package/dist/transformers.web.min.js.map +1 -0
package/package.json +6 -6
package/src/backends/onnx.js +14 -15
package/src/configs.js +4 -1
package/src/env.js +1 -1
package/src/generation/streamers.js +4 -3
package/src/models/dac/feature_extraction_dac.js +3 -0
package/src/models/encodec/feature_extraction_encodec.js +32 -0
package/src/models/feature_extractors.js +2 -0
package/src/models/idefics3/image_processing_idefics3.js +1 -1
package/src/models/image_processors.js +1 -0
package/src/models/processors.js +2 -0
package/src/models/smolvlm/image_processing_smolvlm.js +2 -0
package/src/models/smolvlm/processing_smolvlm.js +2 -0
package/src/models/ultravox/processing_ultravox.js +54 -0
package/src/models/whisper/common_whisper.js +7 -1
package/src/models/whisper/feature_extraction_whisper.js +18 -10
package/src/models.js +456 -76
package/src/pipelines.js +111 -7
package/src/tokenizers.js +42 -28
package/src/transformers.js +1 -0
package/src/utils/audio.js +2 -0
package/src/utils/hub.js +140 -80
package/src/utils/maths.js +1 -1
package/src/utils/tensor.js +6 -3
package/src/utils/video.js +128 -0
package/types/backends/onnx.d.ts +2 -2
package/types/backends/onnx.d.ts.map +1 -1
package/types/configs.d.ts +1 -1
package/types/configs.d.ts.map +1 -1
package/types/generation/streamers.d.ts.map +1 -1
package/types/models/dac/feature_extraction_dac.d.ts +4 -0
package/types/models/dac/feature_extraction_dac.d.ts.map +1 -0
package/types/models/encodec/feature_extraction_encodec.d.ts +13 -0
package/types/models/encodec/feature_extraction_encodec.d.ts.map +1 -0
package/types/models/feature_extractors.d.ts +2 -0
package/types/models/florence2/processing_florence2.d.ts +1 -1
package/types/models/florence2/processing_florence2.d.ts.map +1 -1
package/types/models/image_processors.d.ts +1 -0
package/types/models/processors.d.ts +2 -0
package/types/models/smolvlm/image_processing_smolvlm.d.ts +2 -0
package/types/models/smolvlm/image_processing_smolvlm.d.ts.map +1 -0
package/types/models/smolvlm/processing_smolvlm.d.ts +2 -0
package/types/models/smolvlm/processing_smolvlm.d.ts.map +1 -0
package/types/models/ultravox/processing_ultravox.d.ts +16 -0
package/types/models/ultravox/processing_ultravox.d.ts.map +1 -0
package/types/models/whisper/common_whisper.d.ts.map +1 -1
package/types/models/whisper/feature_extraction_whisper.d.ts +3 -1
package/types/models/whisper/feature_extraction_whisper.d.ts.map +1 -1
package/types/models.d.ts +132 -4
package/types/models.d.ts.map +1 -1
package/types/pipelines.d.ts +50 -4
package/types/pipelines.d.ts.map +1 -1
package/types/tokenizers.d.ts.map +1 -1
package/types/transformers.d.ts +1 -0
package/types/tsconfig.tsbuildinfo +1 -1
package/types/utils/audio.d.ts.map +1 -1
package/types/utils/hub.d.ts +19 -7
package/types/utils/hub.d.ts.map +1 -1
package/types/utils/maths.d.ts +2 -2
package/types/utils/maths.d.ts.map +1 -1
package/types/utils/tensor.d.ts +17 -18
package/types/utils/tensor.d.ts.map +1 -1
package/types/utils/video.d.ts +37 -0
package/types/utils/video.d.ts.map +1 -0
package/dist/transformers.cjs.map +0 -1
package/dist/transformers.min.cjs +0 -2
package/dist/transformers.min.cjs.map +0 -1
package/dist/transformers.min.mjs +0 -2
package/dist/transformers.min.mjs.map +0 -1
package/dist/transformers.mjs.map +0 -1

package/src/pipelines.js CHANGED Viewed

@@ -1730,6 +1730,7 @@ export class AutomaticSpeechRecognitionPipeline extends (/** @type {new (options
     async _call(audio, kwargs = {}) {
         switch (this.model.config.model_type) {
             case 'whisper':
+            case 'lite-whisper':
                 return this._call_whisper(audio, kwargs)
             case 'wav2vec2':
             case 'wav2vec2-bert':
@@ -2095,7 +2096,7 @@ export class ImageClassificationPipeline extends (/** @type {new (options: Image
 /**
  * @typedef {Object} ImageSegmentationPipelineOutput
- * @property {string} label The label of the segment.
+ * @property {string|null} label The label of the segment.
  * @property {number|null} score The score of the segment.
  * @property {RawImage} mask The mask of the segment.
  *
@@ -2165,14 +2166,30 @@ export class ImageSegmentationPipeline extends (/** @type {new (options: ImagePi
         const preparedImages = await prepareImages(images);
         const imageSizes = preparedImages.map(x => [x.height, x.width]);
-        const { pixel_values, pixel_mask } = await this.processor(preparedImages);
-        const output = await this.model({ pixel_values, pixel_mask });
+        const inputs = await this.processor(preparedImages);
+        const { inputNames, outputNames } = this.model.sessions['model'];
+        if (!inputNames.includes('pixel_values')) {
+            if (inputNames.length !== 1) {
+                throw Error(`Expected a single input name, but got ${inputNames.length} inputs: ${inputNames}.`);
+            }
+            const newName = inputNames[0];
+            if (newName in inputs) {
+                throw Error(`Input name ${newName} already exists in the inputs.`);
+            }
+            // To ensure compatibility with certain background-removal models,
+            // we may need to perform a mapping of input to output names
+            inputs[newName] = inputs.pixel_values;
+        }
+        const output = await this.model(inputs);
         let fn = null;
         if (subtask !== null) {
             fn = this.subtasks_mapping[subtask];
-        } else {
-            for (let [task, func] of Object.entries(this.subtasks_mapping)) {
+        } else if (this.processor.image_processor) {
+            for (const [task, func] of Object.entries(this.subtasks_mapping)) {
                 if (func in this.processor.image_processor) {
                     fn = this.processor.image_processor[func].bind(this.processor.image_processor);
                     subtask = task;
@@ -2186,7 +2203,23 @@ export class ImageSegmentationPipeline extends (/** @type {new (options: ImagePi
         /** @type {ImageSegmentationPipelineOutput[]} */
         const annotation = [];
-        if (subtask === 'panoptic' || subtask === 'instance') {
+        if (!subtask) {
+            // Perform standard image segmentation
+            const result = output[outputNames[0]];
+            for (let i = 0; i < imageSizes.length; ++i) {
+                const size = imageSizes[i];
+                const item = result[i];
+                if (item.data.some(x => x < 0 || x > 1)) {
+                    item.sigmoid_();
+                }
+                const mask = await RawImage.fromTensor(item.mul_(255).to('uint8')).resize(size[1], size[0]);
+                annotation.push({
+                    label: null,
+                    score: null,
+                    mask
+                });
+            }
+        } else if (subtask === 'panoptic' || subtask === 'instance') {
             const processed = fn(
                 output,
                 threshold,
@@ -2242,6 +2275,63 @@ export class ImageSegmentationPipeline extends (/** @type {new (options: ImagePi
     }
 }
+/**
+ * @typedef {Object} BackgroundRemovalPipelineOptions Parameters specific to image segmentation pipelines.
+ *
+ * @callback BackgroundRemovalPipelineCallback Segment the input images.
+ * @param {ImagePipelineInputs} images The input images.
+ * @param {BackgroundRemovalPipelineOptions} [options] The options to use for image segmentation.
+ * @returns {Promise<RawImage[]>} The images with the background removed.
+ *
+ * @typedef {ImagePipelineConstructorArgs & BackgroundRemovalPipelineCallback & Disposable} BackgroundRemovalPipelineType
+ */
+/**
+ * Background removal pipeline using certain `AutoModelForXXXSegmentation`.
+ * This pipeline removes the backgrounds of images.
+ *
+ * **Example:** Perform background removal with `Xenova/modnet`.
+ * ```javascript
+ * const segmenter = await pipeline('background-removal', 'Xenova/modnet');
+ * const url = 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/portrait-of-woman_small.jpg';
+ * const output = await segmenter(url);
+ * // [
+ * //   RawImage { data: Uint8ClampedArray(648000) [ ... ], width: 360, height: 450, channels: 4 }
+ * // ]
+ * ```
+ */
+export class BackgroundRemovalPipeline extends (/** @type {new (options: ImagePipelineConstructorArgs) => ImageSegmentationPipelineType} */ (ImageSegmentationPipeline)) {
+    /**
+     * Create a new BackgroundRemovalPipeline.
+     * @param {ImagePipelineConstructorArgs} options An object used to instantiate the pipeline.
+     */
+    constructor(options) {
+        super(options);
+    }
+    /** @type {BackgroundRemovalPipelineCallback} */
+    async _call(images, options = {}) {
+        const isBatched = Array.isArray(images);
+        if (isBatched && images.length !== 1) {
+            throw Error("Background removal pipeline currently only supports a batch size of 1.");
+        }
+        const preparedImages = await prepareImages(images);
+        // @ts-expect-error TS2339
+        const masks = await super._call(images, options);
+        const result = preparedImages.map((img, i) => {
+            const cloned = img.clone();
+            cloned.putAlpha(masks[i].mask);
+            return cloned;
+        });
+        return result;
+    }
+}
 /**
  * @typedef {Object} ZeroShotImageClassificationOutput
  * @property {string} label The label identified by the model. It is one of the suggested `candidate_label`.
@@ -2554,7 +2644,7 @@ export class ZeroShotObjectDetectionPipeline extends (/** @type {new (options: T
             const output = await this.model({ ...text_inputs, pixel_values });
             let result;
-            if('post_process_grounded_object_detection' in this.processor) {
+            if ('post_process_grounded_object_detection' in this.processor) {
                 // @ts-ignore
                 const processed = this.processor.post_process_grounded_object_detection(
                     output,
@@ -3134,6 +3224,16 @@ const SUPPORTED_TASKS = Object.freeze({
         },
         "type": "multimodal",
     },
+    "background-removal": {
+        // no tokenizer
+        "pipeline": BackgroundRemovalPipeline,
+        "model": [AutoModelForImageSegmentation, AutoModelForSemanticSegmentation, AutoModelForUniversalSegmentation],
+        "processor": AutoProcessor,
+        "default": {
+            "model": "Xenova/modnet",
+        },
+        "type": "image",
+    },
     "zero-shot-image-classification": {
         "tokenizer": AutoTokenizer,
@@ -3299,6 +3399,8 @@ export async function pipeline(
         revision = 'main',
         device = null,
         dtype = null,
+        subfolder = 'onnx',
+        use_external_data_format = null,
         model_file_name = null,
         session_options = {},
     } = {}
@@ -3329,6 +3431,8 @@ export async function pipeline(
         revision,
         device,
         dtype,
+        subfolder,
+        use_external_data_format,
         model_file_name,
         session_options,
     }

package/src/tokenizers.js CHANGED Viewed

@@ -995,6 +995,8 @@ class Normalizer extends Callable {
                 return new Replace(config);
             case 'NFC':
                 return new NFC(config);
+            case 'NFD':
+                return new NFD(config);
             case 'NFKC':
                 return new NFKC(config);
             case 'NFKD':
@@ -1053,50 +1055,62 @@ class Replace extends Normalizer {
 }
 /**
- * A normalizer that applies Unicode normalization form C (NFC) to the input text.
+ * A normalizer that applies Unicode normalization to the input text.
  * @extends Normalizer
+ * @abstract
  */
-class NFC extends Normalizer {
+class UnicodeNormalizer extends Normalizer {
+    /**
+     * @type {string} The Unicode normalization form to apply.
+     * Should be one of: 'NFC', 'NFD', 'NFKC', or 'NFKD'.
+     */
+    form = undefined;
     /**
-     * Normalize the input text by applying Unicode normalization form C (NFC).
+     * Normalize the input text by applying Unicode normalization.
      * @param {string} text The input text to be normalized.
      * @returns {string} The normalized text.
      */
     normalize(text) {
-        text = text.normalize('NFC')
+        text = text.normalize(this.form)
         return text;
     }
 }
 /**
- * NFKC Normalizer.
- * @extends Normalizer
+ * A normalizer that applies Unicode normalization form C (NFC) to the input text.
+ * Canonical Decomposition, followed by Canonical Composition.
+ * @extends UnicodeNormalizer
  */
-class NFKC extends Normalizer {
-    /**
-     * Normalize text using NFKC normalization.
-     * @param {string} text The text to be normalized.
-     * @returns {string} The normalized text.
-     */
-    normalize(text) {
-        text = text.normalize('NFKC')
-        return text;
-    }
+class NFC extends UnicodeNormalizer {
+    form = 'NFC';
 }
 /**
- * NFKD Normalizer.
- * @extends Normalizer
+ * A normalizer that applies Unicode normalization form D (NFD) to the input text.
+ * Canonical Decomposition.
+ * @extends UnicodeNormalizer
  */
-class NFKD extends Normalizer {
-    /**
-     * Normalize text using NFKD normalization.
-     * @param {string} text The text to be normalized.
-     * @returns {string} The normalized text.
-     */
-    normalize(text) {
-        text = text.normalize('NFKD')
-        return text;
-    }
+class NFD extends UnicodeNormalizer {
+    form = 'NFD';
+}
+/**
+ * A normalizer that applies Unicode normalization form KC (NFKC) to the input text.
+ * Compatibility Decomposition, followed by Canonical Composition.
+ * @extends UnicodeNormalizer
+ */
+class NFKC extends UnicodeNormalizer {
+    form = 'NFKC';
+}
+/**
+ * A normalizer that applies Unicode normalization form KD (NFKD) to the input text.
+ * Compatibility Decomposition.
+ * @extends UnicodeNormalizer
+ */
+class NFKD extends UnicodeNormalizer {
+    form = 'NFKD';
 }
 /**

package/src/transformers.js CHANGED Viewed

@@ -20,6 +20,7 @@ export * from './configs.js';
 export * from './utils/audio.js';
 export * from './utils/image.js';
+export * from './utils/video.js';
 export * from './utils/tensor.js';
 export * from './utils/maths.js';

package/src/utils/audio.js CHANGED Viewed

@@ -150,6 +150,7 @@ function hertz_to_mel(freq, mel_scale = "htk") {
         throw new Error('mel_scale should be one of "htk", "slaney" or "kaldi".');
     }
+    // @ts-expect-error ts(2322)
     return typeof freq === 'number' ? fn(freq) : freq.map(x => fn(x));
 }
@@ -173,6 +174,7 @@ function mel_to_hertz(mels, mel_scale = "htk") {
         throw new Error('mel_scale should be one of "htk", "slaney" or "kaldi".');
     }
+    // @ts-expect-error ts(2322)
     return typeof mels === 'number' ? fn(mels) : mels.map(x => fn(x));
 }