@huggingface/transformers 3.2.3 → 3.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (105) hide show
  1. package/README.md +5 -3
  2. package/dist/ort-wasm-simd-threaded.jsep.wasm +0 -0
  3. package/dist/ort.bundle.min.mjs +2776 -0
  4. package/dist/transformers.cjs +792 -330
  5. package/dist/transformers.cjs.map +1 -1
  6. package/dist/transformers.js +1150 -656
  7. package/dist/transformers.js.map +1 -1
  8. package/dist/transformers.min.cjs +1 -1
  9. package/dist/transformers.min.cjs.map +1 -1
  10. package/dist/transformers.min.js +1 -1
  11. package/dist/transformers.min.js.map +1 -1
  12. package/dist/transformers.min.mjs +1 -1
  13. package/dist/transformers.min.mjs.map +1 -1
  14. package/dist/transformers.mjs +798 -331
  15. package/dist/transformers.mjs.map +1 -1
  16. package/package.json +3 -3
  17. package/src/base/feature_extraction_utils.js +9 -9
  18. package/src/base/image_processors_utils.js +12 -1
  19. package/src/base/processing_utils.js +24 -3
  20. package/src/configs.js +5 -0
  21. package/src/env.js +1 -2
  22. package/src/generation/streamers.js +5 -2
  23. package/src/models/auto/feature_extraction_auto.js +0 -16
  24. package/src/models/auto/processing_auto.js +0 -16
  25. package/src/models/convnext/image_processing_convnext.js +1 -0
  26. package/src/models/efficientnet/image_processing_efficientnet.js +1 -0
  27. package/src/models/florence2/processing_florence2.js +3 -0
  28. package/src/models/grounding_dino/image_processing_grounding_dino.js +29 -0
  29. package/src/models/grounding_dino/processing_grounding_dino.js +101 -0
  30. package/src/models/idefics3/image_processing_idefics3.js +2 -0
  31. package/src/models/image_processors.js +1 -0
  32. package/src/models/janus/image_processing_janus.js +1 -0
  33. package/src/models/mgp_str/processing_mgp_str.js +2 -0
  34. package/src/models/paligemma/processing_paligemma.js +1 -0
  35. package/src/models/phi3_v/processing_phi3_v.js +1 -1
  36. package/src/models/processors.js +3 -2
  37. package/src/models/pyannote/feature_extraction_pyannote.js +1 -0
  38. package/src/models/qwen2_vl/processing_qwen2_vl.js +1 -0
  39. package/src/models/seamless_m4t/feature_extraction_seamless_m4t.js +2 -2
  40. package/src/models/whisper/feature_extraction_whisper.js +1 -1
  41. package/src/models.js +72 -20
  42. package/src/ops/registry.js +10 -0
  43. package/src/pipelines.js +73 -23
  44. package/src/tokenizers.js +4 -7
  45. package/src/utils/audio.js +113 -1
  46. package/src/utils/core.js +26 -0
  47. package/src/utils/dtypes.js +2 -0
  48. package/src/utils/hub.js +1 -1
  49. package/src/utils/image.js +5 -18
  50. package/src/utils/maths.js +8 -6
  51. package/src/utils/tensor.js +134 -114
  52. package/types/base/feature_extraction_utils.d.ts +7 -7
  53. package/types/base/image_processors_utils.d.ts +7 -0
  54. package/types/base/image_processors_utils.d.ts.map +1 -1
  55. package/types/base/processing_utils.d.ts +25 -19
  56. package/types/base/processing_utils.d.ts.map +1 -1
  57. package/types/configs.d.ts.map +1 -1
  58. package/types/generation/parameters.d.ts +1 -1
  59. package/types/generation/streamers.d.ts +3 -1
  60. package/types/generation/streamers.d.ts.map +1 -1
  61. package/types/models/auto/feature_extraction_auto.d.ts.map +1 -1
  62. package/types/models/auto/image_processing_auto.d.ts.map +1 -1
  63. package/types/models/auto/processing_auto.d.ts.map +1 -1
  64. package/types/models/convnext/image_processing_convnext.d.ts.map +1 -1
  65. package/types/models/efficientnet/image_processing_efficientnet.d.ts.map +1 -1
  66. package/types/models/florence2/processing_florence2.d.ts.map +1 -1
  67. package/types/models/grounding_dino/image_processing_grounding_dino.d.ts +20 -0
  68. package/types/models/grounding_dino/image_processing_grounding_dino.d.ts.map +1 -0
  69. package/types/models/grounding_dino/processing_grounding_dino.d.ts +27 -0
  70. package/types/models/grounding_dino/processing_grounding_dino.d.ts.map +1 -0
  71. package/types/models/idefics3/image_processing_idefics3.d.ts.map +1 -1
  72. package/types/models/image_processors.d.ts +1 -0
  73. package/types/models/janus/image_processing_janus.d.ts.map +1 -1
  74. package/types/models/mgp_str/processing_mgp_str.d.ts.map +1 -1
  75. package/types/models/paligemma/processing_paligemma.d.ts.map +1 -1
  76. package/types/models/phi3_v/processing_phi3_v.d.ts +6 -2
  77. package/types/models/phi3_v/processing_phi3_v.d.ts.map +1 -1
  78. package/types/models/processors.d.ts +3 -2
  79. package/types/models/pyannote/feature_extraction_pyannote.d.ts.map +1 -1
  80. package/types/models/qwen2_vl/processing_qwen2_vl.d.ts.map +1 -1
  81. package/types/models/sapiens/image_processing_sapiens.d.ts +10 -0
  82. package/types/models/sapiens/image_processing_sapiens.d.ts.map +1 -0
  83. package/types/models/whisper/generation_whisper.d.ts +1 -1
  84. package/types/models/whisper/generation_whisper.d.ts.map +1 -1
  85. package/types/models.d.ts +40 -17
  86. package/types/models.d.ts.map +1 -1
  87. package/types/ops/registry.d.ts +1 -0
  88. package/types/ops/registry.d.ts.map +1 -1
  89. package/types/pipelines.d.ts +7 -12
  90. package/types/pipelines.d.ts.map +1 -1
  91. package/types/tokenizers.d.ts.map +1 -1
  92. package/types/tsconfig.tsbuildinfo +1 -0
  93. package/types/utils/audio.d.ts +25 -0
  94. package/types/utils/audio.d.ts.map +1 -1
  95. package/types/utils/core.d.ts +6 -0
  96. package/types/utils/core.d.ts.map +1 -1
  97. package/types/utils/dtypes.d.ts.map +1 -1
  98. package/types/utils/hub.d.ts +1 -1
  99. package/types/utils/hub.d.ts.map +1 -1
  100. package/types/utils/image.d.ts +3 -2
  101. package/types/utils/image.d.ts.map +1 -1
  102. package/types/utils/maths.d.ts +8 -6
  103. package/types/utils/maths.d.ts.map +1 -1
  104. package/types/utils/tensor.d.ts +22 -6
  105. package/types/utils/tensor.d.ts.map +1 -1
@@ -12,8 +12,10 @@ import {
12
12
  } from './hub.js';
13
13
  import { FFT, max } from './maths.js';
14
14
  import {
15
- calculateReflectOffset,
15
+ calculateReflectOffset, saveBlob,
16
16
  } from './core.js';
17
+ import { apis } from '../env.js';
18
+ import fs from 'fs';
17
19
  import { Tensor, matmul } from './tensor.js';
18
20
 
19
21
 
@@ -702,3 +704,113 @@ export function window_function(window_length, name, {
702
704
 
703
705
  return window;
704
706
  }
707
+
708
+ /**
709
+ * Encode audio data to a WAV file.
710
+ * WAV file specs : https://en.wikipedia.org/wiki/WAV#WAV_File_header
711
+ *
712
+ * Adapted from https://www.npmjs.com/package/audiobuffer-to-wav
713
+ * @param {Float32Array} samples The audio samples.
714
+ * @param {number} rate The sample rate.
715
+ * @returns {ArrayBuffer} The WAV audio buffer.
716
+ */
717
+ function encodeWAV(samples, rate) {
718
+ let offset = 44;
719
+ const buffer = new ArrayBuffer(offset + samples.length * 4);
720
+ const view = new DataView(buffer);
721
+
722
+ /* RIFF identifier */
723
+ writeString(view, 0, "RIFF");
724
+ /* RIFF chunk length */
725
+ view.setUint32(4, 36 + samples.length * 4, true);
726
+ /* RIFF type */
727
+ writeString(view, 8, "WAVE");
728
+ /* format chunk identifier */
729
+ writeString(view, 12, "fmt ");
730
+ /* format chunk length */
731
+ view.setUint32(16, 16, true);
732
+ /* sample format (raw) */
733
+ view.setUint16(20, 3, true);
734
+ /* channel count */
735
+ view.setUint16(22, 1, true);
736
+ /* sample rate */
737
+ view.setUint32(24, rate, true);
738
+ /* byte rate (sample rate * block align) */
739
+ view.setUint32(28, rate * 4, true);
740
+ /* block align (channel count * bytes per sample) */
741
+ view.setUint16(32, 4, true);
742
+ /* bits per sample */
743
+ view.setUint16(34, 32, true);
744
+ /* data chunk identifier */
745
+ writeString(view, 36, "data");
746
+ /* data chunk length */
747
+ view.setUint32(40, samples.length * 4, true);
748
+
749
+ for (let i = 0; i < samples.length; ++i, offset += 4) {
750
+ view.setFloat32(offset, samples[i], true);
751
+ }
752
+
753
+ return buffer;
754
+ }
755
+
756
+ function writeString(view, offset, string) {
757
+ for (let i = 0; i < string.length; ++i) {
758
+ view.setUint8(offset + i, string.charCodeAt(i));
759
+ }
760
+ }
761
+
762
+
763
+ export class RawAudio {
764
+
765
+ /**
766
+ * Create a new `RawAudio` object.
767
+ * @param {Float32Array} audio Audio data
768
+ * @param {number} sampling_rate Sampling rate of the audio data
769
+ */
770
+ constructor(audio, sampling_rate) {
771
+ this.audio = audio
772
+ this.sampling_rate = sampling_rate
773
+ }
774
+
775
+ /**
776
+ * Convert the audio to a wav file buffer.
777
+ * @returns {ArrayBuffer} The WAV file.
778
+ */
779
+ toWav() {
780
+ return encodeWAV(this.audio, this.sampling_rate)
781
+ }
782
+
783
+ /**
784
+ * Convert the audio to a blob.
785
+ * @returns {Blob}
786
+ */
787
+ toBlob() {
788
+ const wav = this.toWav();
789
+ const blob = new Blob([wav], { type: 'audio/wav' });
790
+ return blob;
791
+ }
792
+
793
+ /**
794
+ * Save the audio to a wav file.
795
+ * @param {string} path
796
+ */
797
+ async save(path) {
798
+ let fn;
799
+
800
+ if (apis.IS_BROWSER_ENV) {
801
+ if (apis.IS_WEBWORKER_ENV) {
802
+ throw new Error('Unable to save a file from a Web Worker.')
803
+ }
804
+ fn = saveBlob;
805
+ } else if (apis.IS_FS_AVAILABLE) {
806
+ fn = async (/** @type {string} */ path, /** @type {Blob} */ blob) => {
807
+ let buffer = await blob.arrayBuffer();
808
+ fs.writeFileSync(path, Buffer.from(buffer));
809
+ }
810
+ } else {
811
+ throw new Error('Unable to save because filesystem is disabled in this environment.')
812
+ }
813
+
814
+ await fn(path, this.toBlob())
815
+ }
816
+ }
package/src/utils/core.js CHANGED
@@ -189,6 +189,32 @@ export function calculateReflectOffset(i, w) {
189
189
  return Math.abs((i + w) % (2 * w) - w);
190
190
  }
191
191
 
192
+ /**
193
+ * Save blob file on the web.
194
+ * @param {string} path The path to save the blob to
195
+ * @param {Blob} blob The blob to save
196
+ */
197
+ export function saveBlob(path, blob){
198
+ // Convert the canvas content to a data URL
199
+ const dataURL = URL.createObjectURL(blob);
200
+
201
+ // Create an anchor element with the data URL as the href attribute
202
+ const downloadLink = document.createElement('a');
203
+ downloadLink.href = dataURL;
204
+
205
+ // Set the download attribute to specify the desired filename for the downloaded image
206
+ downloadLink.download = path;
207
+
208
+ // Trigger the download
209
+ downloadLink.click();
210
+
211
+ // Clean up: remove the anchor element from the DOM
212
+ downloadLink.remove();
213
+
214
+ // Revoke the Object URL to free up memory
215
+ URL.revokeObjectURL(dataURL);
216
+ }
217
+
192
218
  /**
193
219
  *
194
220
  * @param {Object} o
@@ -1,3 +1,5 @@
1
+ /// <reference types="@webgpu/types" />
2
+
1
3
  import { apis } from "../env.js";
2
4
 
3
5
  import { DEVICE_TYPES } from "./devices.js";
package/src/utils/hub.js CHANGED
@@ -121,7 +121,7 @@ class FileResponse {
121
121
  */
122
122
  async arrayBuffer() {
123
123
  const data = await fs.promises.readFile(this.filePath);
124
- return data.buffer;
124
+ return /** @type {ArrayBuffer} */ (data.buffer);
125
125
  }
126
126
 
127
127
  /**
@@ -8,9 +8,9 @@
8
8
  * @module utils/image
9
9
  */
10
10
 
11
- import { isNullishDimension } from './core.js';
11
+ import { isNullishDimension, saveBlob } from './core.js';
12
12
  import { getFile } from './hub.js';
13
- import { env, apis } from '../env.js';
13
+ import { apis } from '../env.js';
14
14
  import { Tensor } from './tensor.js';
15
15
 
16
16
  // Will be empty (or not used) if running in browser or web-worker
@@ -793,23 +793,9 @@ export class RawImage {
793
793
  // Convert image to Blob
794
794
  const blob = await this.toBlob(mime);
795
795
 
796
- // Convert the canvas content to a data URL
797
- const dataURL = URL.createObjectURL(blob);
796
+ saveBlob(path, blob)
798
797
 
799
- // Create an anchor element with the data URL as the href attribute
800
- const downloadLink = document.createElement('a');
801
- downloadLink.href = dataURL;
802
-
803
- // Set the download attribute to specify the desired filename for the downloaded image
804
- downloadLink.download = path;
805
-
806
- // Trigger the download
807
- downloadLink.click();
808
-
809
- // Clean up: remove the anchor element from the DOM
810
- downloadLink.remove();
811
-
812
- } else if (!env.useFS) {
798
+ } else if (!apis.IS_FS_AVAILABLE) {
813
799
  throw new Error('Unable to save the image because filesystem is disabled in this environment.')
814
800
 
815
801
  } else {
@@ -837,3 +823,4 @@ export class RawImage {
837
823
  * Helper function to load an image from a URL, path, etc.
838
824
  */
839
825
  export const load_image = RawImage.read.bind(RawImage);
826
+
@@ -225,8 +225,9 @@ export function magnitude(arr) {
225
225
 
226
226
  /**
227
227
  * Returns the value and index of the minimum element in an array.
228
- * @param {number[]|TypedArray} arr array of numbers.
229
- * @returns {[number, number]} the value and index of the minimum element, of the form: [valueOfMin, indexOfMin]
228
+ * @template {number[]|bigint[]|AnyTypedArray} T
229
+ * @param {T} arr array of numbers.
230
+ * @returns {T extends bigint[]|BigTypedArray ? [bigint, number] : [number, number]} the value and index of the minimum element, of the form: [valueOfMin, indexOfMin]
230
231
  * @throws {Error} If array is empty.
231
232
  */
232
233
  export function min(arr) {
@@ -239,14 +240,15 @@ export function min(arr) {
239
240
  indexOfMin = i;
240
241
  }
241
242
  }
242
- return [min, indexOfMin];
243
+ return /** @type {T extends bigint[]|BigTypedArray ? [bigint, number] : [number, number]} */([min, indexOfMin]);
243
244
  }
244
245
 
245
246
 
246
247
  /**
247
248
  * Returns the value and index of the maximum element in an array.
248
- * @param {number[]|AnyTypedArray} arr array of numbers.
249
- * @returns {[number, number]} the value and index of the maximum element, of the form: [valueOfMax, indexOfMax]
249
+ * @template {number[]|bigint[]|AnyTypedArray} T
250
+ * @param {T} arr array of numbers.
251
+ * @returns {T extends bigint[]|BigTypedArray ? [bigint, number] : [number, number]} the value and index of the maximum element, of the form: [valueOfMax, indexOfMax]
250
252
  * @throws {Error} If array is empty.
251
253
  */
252
254
  export function max(arr) {
@@ -259,7 +261,7 @@ export function max(arr) {
259
261
  indexOfMax = i;
260
262
  }
261
263
  }
262
- return [Number(max), indexOfMax];
264
+ return /** @type {T extends bigint[]|BigTypedArray ? [bigint, number] : [number, number]} */([max, indexOfMax]);
263
265
  }
264
266
 
265
267
  function isPowerOfTwo(number) {
@@ -9,6 +9,8 @@
9
9
 
10
10
  import {
11
11
  interpolate_data,
12
+ max,
13
+ min,
12
14
  permute_data
13
15
  } from './maths.js';
14
16
 
@@ -464,8 +466,6 @@ export class Tensor {
464
466
  return this.permute(...dims);
465
467
  }
466
468
 
467
- // TODO add .max() and .min() methods
468
-
469
469
  /**
470
470
  * Returns the sum of each row of the input tensor in the given dimension dim.
471
471
  *
@@ -494,55 +494,22 @@ export class Tensor {
494
494
  }
495
495
 
496
496
  const this_data = this.data;
497
+ const fn = (a, b) => a + (b ** p);
497
498
 
498
499
  if (dim === null) {
499
500
  // @ts-ignore
500
- let val = this_data.reduce((a, b) => a + (b ** p), 0) ** (1 / p);
501
+ const val = this_data.reduce(fn, 0) ** (1 / p);
501
502
  return new Tensor(this.type, [val], []);
502
503
  }
503
504
 
504
- // Negative indexing
505
- dim = safeIndex(dim, this.dims.length);
506
-
507
- // Calculate the shape of the resulting array after summation
508
- const resultDims = this.dims.slice(); // Copy the original dimensions
509
- resultDims[dim] = 1; // Remove the specified axis
510
-
511
- // Create a new array to store the accumulated values
512
- // @ts-ignore
513
- const result = new this_data.constructor(this_data.length / this.dims[dim]);
514
-
515
- // Iterate over the data array
516
- for (let i = 0; i < this_data.length; ++i) {
517
-
518
- // Calculate the index in the resulting array
519
- let resultIndex = 0;
520
-
521
- for (let j = this.dims.length - 1, num = i, resultMultiplier = 1; j >= 0; --j) {
522
- const size = this.dims[j];
523
- if (j !== dim) {
524
- const index = num % size;
525
- resultIndex += index * resultMultiplier;
526
- resultMultiplier *= resultDims[j];
527
- }
528
- num = Math.floor(num / size);
529
- }
530
-
531
- // Accumulate the value at the current index
532
- result[resultIndex] += (this_data[i]) ** p;
533
- }
505
+ const [type, result, resultDims] = reduce_helper(fn, this, dim, keepdim);
534
506
 
535
507
  if (p !== 1) {
536
508
  for (let i = 0; i < result.length; ++i) {
537
509
  result[i] = result[i] ** (1 / p);
538
510
  }
539
511
  }
540
-
541
- if (!keepdim) {
542
- resultDims.splice(dim, 1);
543
- }
544
-
545
- return new Tensor(this.type, result, resultDims);
512
+ return new Tensor(type, result, resultDims);
546
513
  }
547
514
 
548
515
  /**
@@ -605,7 +572,7 @@ export class Tensor {
605
572
  * NOTE: The returned tensor shares the storage with the input tensor, so changing the contents of one will change the contents of the other.
606
573
  * If you would like a copy, use `tensor.clone()` before squeezing.
607
574
  *
608
- * @param {number} [dim=null] If given, the input will be squeezed only in the specified dimensions.
575
+ * @param {number|number[]} [dim=null] If given, the input will be squeezed only in the specified dimensions.
609
576
  * @returns {Tensor} The squeezed tensor
610
577
  */
611
578
  squeeze(dim = null) {
@@ -715,6 +682,34 @@ export class Tensor {
715
682
  return this.clone().neg_();
716
683
  }
717
684
 
685
+ /**
686
+ * Computes input > val element-wise.
687
+ * @param {number} val The value to compare with.
688
+ * @returns {Tensor} A boolean tensor that is `true` where input is greater than other and `false` elsewhere.
689
+ */
690
+ gt(val) {
691
+ const mask = new Uint8Array(this.data.length);
692
+ const this_data = this.data;
693
+ for (let i = 0; i < this_data.length; ++i) {
694
+ mask[i] = this_data[i] > val ? 1 : 0;
695
+ }
696
+ return new Tensor('bool', mask, this.dims);
697
+ }
698
+
699
+ /**
700
+ * Computes input < val element-wise.
701
+ * @param {number} val The value to compare with.
702
+ * @returns {Tensor} A boolean tensor that is `true` where input is less than other and `false` elsewhere.
703
+ */
704
+ lt(val) {
705
+ const mask = new Uint8Array(this.data.length);
706
+ const this_data = this.data;
707
+ for (let i = 0; i < this_data.length; ++i) {
708
+ mask[i] = this_data[i] < val ? 1 : 0;
709
+ }
710
+ return new Tensor('bool', mask, this.dims);
711
+ }
712
+
718
713
  /**
719
714
  * In-place version of @see {@link Tensor.clamp}
720
715
  */
@@ -759,6 +754,41 @@ export class Tensor {
759
754
  return mean(this, dim, keepdim);
760
755
  }
761
756
 
757
+ min(dim = null, keepdim = false) {
758
+ if (dim === null) {
759
+ // None to reduce over all dimensions.
760
+ const val = min(this.data)[0];
761
+ return new Tensor(this.type, [val], [/* scalar */]);
762
+ }
763
+ const [type, result, resultDims] = reduce_helper((a, b) => Math.min(a, b), this, dim, keepdim, Infinity);
764
+ return new Tensor(type, result, resultDims);
765
+ }
766
+
767
+ max(dim = null, keepdim = false) {
768
+ if (dim === null) {
769
+ // None to reduce over all dimensions.
770
+ const val = max(this.data)[0];
771
+ return new Tensor(this.type, [val], [/* scalar */]);
772
+ }
773
+ const [type, result, resultDims] = reduce_helper((a, b) => Math.max(a, b), this, dim, keepdim, -Infinity);
774
+ return new Tensor(type, result, resultDims);
775
+ }
776
+
777
+ argmin(dim = null, keepdim = false) {
778
+ if (dim !== null) {
779
+ throw new Error("`dim !== null` not yet implemented.");
780
+ }
781
+ const index = min(this.data)[1];
782
+ return new Tensor('int64', [BigInt(index)], []);
783
+ }
784
+ argmax(dim = null, keepdim = false) {
785
+ if (dim !== null) {
786
+ throw new Error("`dim !== null` not yet implemented.");
787
+ }
788
+ const index = max(this.data)[1];
789
+ return new Tensor('int64', [BigInt(index)], []);
790
+ }
791
+
762
792
  /**
763
793
  * Performs Tensor dtype conversion.
764
794
  * @param {DataType} type The desired data type.
@@ -892,7 +922,7 @@ export function interpolate(input, [out_height, out_width], mode = 'bilinear', a
892
922
  * @param {Tensor} input the input tensor
893
923
  * @param {Object} options the options for the interpolation
894
924
  * @param {[number, number]|[number, number, number]|[number, number, number, number]} [options.size=null] output spatial size.
895
- * @param {"bilinear"|"bicubic"} [options.mode='bilinear'] algorithm used for upsampling
925
+ * @param {"nearest"|"bilinear"|"bicubic"} [options.mode='bilinear'] algorithm used for upsampling
896
926
  * @returns {Promise<Tensor>} The interpolated tensor.
897
927
  */
898
928
  export async function interpolate_4d(input, {
@@ -922,7 +952,9 @@ export async function interpolate_4d(input, {
922
952
  }
923
953
 
924
954
  let op;
925
- if (mode === 'bilinear') {
955
+ if (mode === 'nearest') {
956
+ op = await TensorOpRegistry.nearest_interpolate_4d;
957
+ } else if (mode === 'bilinear') {
926
958
  op = await TensorOpRegistry.bilinear_interpolate_4d;
927
959
  } else if (mode === 'bicubic') {
928
960
  op = await TensorOpRegistry.bicubic_interpolate_4d;
@@ -963,13 +995,13 @@ export async function rfft(x, a) {
963
995
  * Returns the k largest elements of the given input tensor.
964
996
  * Inspired by https://pytorch.org/docs/stable/generated/torch.topk.html
965
997
  * @param {Tensor} x the input tensor
966
- * @param {number} k the k in "top-k"
998
+ * @param {number} [k] the k in "top-k"
967
999
  * @returns {Promise<[Tensor, Tensor]>} the output tuple of (Tensor, LongTensor) of top-k elements and their indices.
968
1000
  */
969
1001
  export async function topk(x, k) {
970
1002
  const op = await TensorOpRegistry.top_k;
971
1003
 
972
- if (k === null) {
1004
+ if (k == null) {
973
1005
  k = x.dims.at(-1);
974
1006
  } else {
975
1007
  k = Math.min(k, x.dims.at(-1));
@@ -998,10 +1030,10 @@ const arrayToIndexTensor = (array) => new Tensor('int64', array, [array.length])
998
1030
  export async function slice(data, starts, ends, axes, steps) {
999
1031
  const op = await TensorOpRegistry.slice;
1000
1032
  return await op({
1001
- x: data,
1002
- s: arrayToIndexTensor(starts),
1003
- e: arrayToIndexTensor(ends),
1004
- a: arrayToIndexTensor(axes),
1033
+ x: data,
1034
+ s: arrayToIndexTensor(starts),
1035
+ e: arrayToIndexTensor(ends),
1036
+ a: arrayToIndexTensor(axes),
1005
1037
  t: arrayToIndexTensor(steps ?? new Array(axes.length).fill(1)),
1006
1038
  });
1007
1039
  }
@@ -1236,35 +1268,19 @@ export function stack(tensors, dim = 0) {
1236
1268
 
1237
1269
 
1238
1270
  /**
1239
- * Calculates the standard deviation and mean over the dimensions specified by dim. dim can be a single dimension or `null` to reduce over all dimensions.
1240
- * @param {Tensor} input the input tenso
1241
- * @param {number|null} dim the dimension to reduce. If None, all dimensions are reduced.
1242
- * @param {number} correction difference between the sample size and sample degrees of freedom. Defaults to Bessel's correction, correction=1.
1271
+ * @param {(previousValue: any, currentValue: any, currentIndex?: number, resultIndex?: number) => any} callbackfn
1272
+ * @param {Tensor} input the input tensor.
1273
+ * @param {number|null} dim the dimension to reduce.
1243
1274
  * @param {boolean} keepdim whether the output tensor has dim retained or not.
1244
- * @returns {Tensor[]} A tuple of (std, mean) tensors.
1275
+ * @returns {[DataType, any, number[]]} The reduced tensor data.
1245
1276
  */
1246
- export function std_mean(input, dim = null, correction = 1, keepdim = false) {
1247
- const inputData = /** @type {Float32Array} */(input.data);
1277
+ function reduce_helper(callbackfn, input, dim = null, keepdim = false, initialValue = null) {
1278
+ const inputData = input.data;
1248
1279
  const inputDims = input.dims;
1249
1280
 
1250
- if (dim === null) {
1251
- // None to reduce over all dimensions.
1252
- const sum = inputData.reduce((a, b) => a + b, 0);
1253
- const mean = sum / inputData.length;
1254
- const std = Math.sqrt(inputData.reduce((a, b) => a + (b - mean) ** 2, 0) / (inputData.length - correction));
1255
-
1256
- const meanTensor = new Tensor(input.type, [mean], [/* scalar */]);
1257
- const stdTensor = new Tensor(input.type, [std], [/* scalar */]);
1258
-
1259
- return [stdTensor, meanTensor];
1260
- }
1261
-
1262
1281
  // Negative indexing
1263
1282
  dim = safeIndex(dim, inputDims.length);
1264
1283
 
1265
- const meanTensor = mean(input, dim, keepdim);
1266
- const meanTensorData = meanTensor.data;
1267
-
1268
1284
  // Calculate the shape of the resulting array after summation
1269
1285
  const resultDims = inputDims.slice(); // Copy the original dimensions
1270
1286
  resultDims[dim] = 1; // Remove the specified axis
@@ -1272,6 +1288,9 @@ export function std_mean(input, dim = null, correction = 1, keepdim = false) {
1272
1288
  // Create a new array to store the accumulated values
1273
1289
  // @ts-ignore
1274
1290
  const result = new inputData.constructor(inputData.length / inputDims[dim]);
1291
+ if (initialValue !== null) {
1292
+ result.fill(initialValue);
1293
+ }
1275
1294
 
1276
1295
  // Iterate over the data array
1277
1296
  for (let i = 0; i < inputData.length; ++i) {
@@ -1290,23 +1309,55 @@ export function std_mean(input, dim = null, correction = 1, keepdim = false) {
1290
1309
  }
1291
1310
 
1292
1311
  // Accumulate the value at the current index
1293
- result[resultIndex] += (inputData[i] - meanTensorData[resultIndex]) ** 2;
1312
+ result[resultIndex] = callbackfn(result[resultIndex], inputData[i], i, resultIndex);
1294
1313
  }
1295
1314
 
1296
- for (let i = 0; i < result.length; ++i) {
1297
- result[i] = Math.sqrt(result[i] / (inputDims[dim] - correction));
1315
+ if (!keepdim) resultDims.splice(dim, 1);
1316
+
1317
+ return [input.type, result, resultDims];
1318
+ }
1319
+
1320
+
1321
+ /**
1322
+ * Calculates the standard deviation and mean over the dimensions specified by dim. dim can be a single dimension or `null` to reduce over all dimensions.
1323
+ * @param {Tensor} input the input tenso
1324
+ * @param {number|null} dim the dimension to reduce. If None, all dimensions are reduced.
1325
+ * @param {number} correction difference between the sample size and sample degrees of freedom. Defaults to Bessel's correction, correction=1.
1326
+ * @param {boolean} keepdim whether the output tensor has dim retained or not.
1327
+ * @returns {Tensor[]} A tuple of (std, mean) tensors.
1328
+ */
1329
+ export function std_mean(input, dim = null, correction = 1, keepdim = false) {
1330
+ const inputData = /** @type {Float32Array} */(input.data);
1331
+ const inputDims = input.dims;
1332
+
1333
+ if (dim === null) {
1334
+ // None to reduce over all dimensions.
1335
+ const sum = inputData.reduce((a, b) => a + b, 0);
1336
+ const mean = sum / inputData.length;
1337
+ const std = Math.sqrt(inputData.reduce((a, b) => a + (b - mean) ** 2, 0) / (inputData.length - correction));
1338
+
1339
+ const meanTensor = new Tensor(input.type, [mean], [/* scalar */]);
1340
+ const stdTensor = new Tensor(input.type, [std], [/* scalar */]);
1341
+
1342
+ return [stdTensor, meanTensor];
1298
1343
  }
1344
+ dim = safeIndex(dim, inputDims.length);
1345
+ const meanTensor = mean(input, dim, keepdim);
1346
+ const meanTensorData = meanTensor.data;
1347
+
1348
+ // Compute squared sum
1349
+ const [type, result, resultDims] = reduce_helper((a, b, i, j) => a + (b - meanTensorData[j]) ** 2, input, dim, keepdim);
1299
1350
 
1300
- if (!keepdim) {
1301
- resultDims.splice(dim, 1);
1351
+ // Square root of the squared sum
1352
+ for (let i = 0; i < result.length; ++i) {
1353
+ result[i] = Math.sqrt(result[i] / (inputDims[dim] - correction));
1302
1354
  }
1303
1355
 
1304
- const stdTensor = new Tensor(input.type, result, resultDims);
1356
+ const stdTensor = new Tensor(type, result, resultDims);
1305
1357
 
1306
1358
  return [stdTensor, meanTensor];
1307
1359
  }
1308
1360
 
1309
-
1310
1361
  /**
1311
1362
  * Returns the mean value of each row of the input tensor in the given dimension dim.
1312
1363
  * @param {Tensor} input the input tensor.
@@ -1315,58 +1366,27 @@ export function std_mean(input, dim = null, correction = 1, keepdim = false) {
1315
1366
  * @returns {Tensor} A new tensor with means taken along the specified dimension.
1316
1367
  */
1317
1368
  export function mean(input, dim = null, keepdim = false) {
1369
+ const inputDims = input.dims;
1318
1370
  const inputData = /** @type {Float32Array} */(input.data);
1319
1371
 
1320
1372
  if (dim === null) {
1321
1373
  // None to reduce over all dimensions.
1322
- // @ts-ignore
1323
1374
  const val = inputData.reduce((a, b) => a + b, 0);
1324
1375
  return new Tensor(input.type, [val / inputData.length], [/* scalar */]);
1325
1376
  }
1326
- const inputDims = input.dims;
1327
-
1328
- // Negative indexing
1329
1377
  dim = safeIndex(dim, inputDims.length);
1330
1378
 
1331
- // Calculate the shape of the resulting array after summation
1332
- const resultDims = inputDims.slice(); // Copy the original dimensions
1333
- resultDims[dim] = 1; // Remove the specified axis
1334
-
1335
- // Create a new array to store the accumulated values
1336
- // @ts-ignore
1337
- const result = new inputData.constructor(inputData.length / inputDims[dim]);
1338
-
1339
- // Iterate over the data array
1340
- for (let i = 0; i < inputData.length; ++i) {
1341
-
1342
- // Calculate the index in the resulting array
1343
- let resultIndex = 0;
1344
-
1345
- for (let j = inputDims.length - 1, num = i, resultMultiplier = 1; j >= 0; --j) {
1346
- const size = inputDims[j];
1347
- if (j !== dim) {
1348
- const index = num % size;
1349
- resultIndex += index * resultMultiplier;
1350
- resultMultiplier *= resultDims[j];
1351
- }
1352
- num = Math.floor(num / size);
1353
- }
1354
-
1355
- // Accumulate the value at the current index
1356
- result[resultIndex] += inputData[i];
1357
- }
1379
+ // Compute sum
1380
+ const [type, result, resultDims] = reduce_helper((a, b) => a + b, input, dim, keepdim);
1358
1381
 
1382
+ // Divide by number of elements in the dimension
1359
1383
  if (inputDims[dim] !== 1) {
1360
1384
  for (let i = 0; i < result.length; ++i) {
1361
- result[i] = result[i] / inputDims[dim];
1385
+ result[i] /= inputDims[dim];
1362
1386
  }
1363
1387
  }
1364
1388
 
1365
- if (!keepdim) {
1366
- resultDims.splice(dim, 1);
1367
- }
1368
-
1369
- return new Tensor(input.type, result, resultDims);
1389
+ return new Tensor(type, result, resultDims);
1370
1390
  }
1371
1391
 
1372
1392
 
@@ -14,19 +14,19 @@ declare const FeatureExtractor_base: new () => {
14
14
  */
15
15
  export class FeatureExtractor extends FeatureExtractor_base {
16
16
  /**
17
- * Instantiate one of the processor classes of the library from a pretrained model.
17
+ * Instantiate one of the feature extractor classes of the library from a pretrained model.
18
18
  *
19
- * The processor class to instantiate is selected based on the `image_processor_type` (or `feature_extractor_type`; legacy)
20
- * property of the config object (either passed as an argument or loaded from `pretrained_model_name_or_path` if possible)
19
+ * The feature extractor class to instantiate is selected based on the `feature_extractor_type` property of
20
+ * the config object (either passed as an argument or loaded from `pretrained_model_name_or_path` if possible)
21
21
  *
22
22
  * @param {string} pretrained_model_name_or_path The name or path of the pretrained model. Can be either:
23
- * - A string, the *model id* of a pretrained processor hosted inside a model repo on huggingface.co.
23
+ * - A string, the *model id* of a pretrained feature_extractor hosted inside a model repo on huggingface.co.
24
24
  * Valid model ids can be located at the root-level, like `bert-base-uncased`, or namespaced under a
25
25
  * user or organization name, like `dbmdz/bert-base-german-cased`.
26
- * - A path to a *directory* containing processor files, e.g., `./my_model_directory/`.
27
- * @param {import('../utils/hub.js').PretrainedOptions} options Additional options for loading the processor.
26
+ * - A path to a *directory* containing feature_extractor files, e.g., `./my_model_directory/`.
27
+ * @param {import('../utils/hub.js').PretrainedOptions} options Additional options for loading the feature_extractor.
28
28
  *
29
- * @returns {Promise<FeatureExtractor>} A new instance of the Processor class.
29
+ * @returns {Promise<FeatureExtractor>} A new instance of the Feature Extractor class.
30
30
  */
31
31
  static from_pretrained(pretrained_model_name_or_path: string, options: import("../utils/hub.js").PretrainedOptions): Promise<FeatureExtractor>;
32
32
  /**