@huggingface/transformers 3.0.0-alpha.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. package/LICENSE +202 -0
  2. package/README.md +376 -0
  3. package/dist/ort-wasm-simd-threaded.jsep.wasm +0 -0
  4. package/dist/transformers.cjs +30741 -0
  5. package/dist/transformers.cjs.map +1 -0
  6. package/dist/transformers.js +33858 -0
  7. package/dist/transformers.js.map +1 -0
  8. package/dist/transformers.min.cjs +173 -0
  9. package/dist/transformers.min.cjs.map +1 -0
  10. package/dist/transformers.min.js +231 -0
  11. package/dist/transformers.min.js.map +1 -0
  12. package/package.json +92 -0
  13. package/src/backends/onnx.js +151 -0
  14. package/src/configs.js +360 -0
  15. package/src/env.js +152 -0
  16. package/src/generation/configuration_utils.js +381 -0
  17. package/src/generation/logits_process.js +716 -0
  18. package/src/generation/logits_sampler.js +204 -0
  19. package/src/generation/parameters.js +35 -0
  20. package/src/generation/stopping_criteria.js +156 -0
  21. package/src/generation/streamers.js +212 -0
  22. package/src/models/whisper/common_whisper.js +151 -0
  23. package/src/models/whisper/generation_whisper.js +89 -0
  24. package/src/models.js +7028 -0
  25. package/src/ops/registry.js +92 -0
  26. package/src/pipelines.js +3341 -0
  27. package/src/processors.js +2614 -0
  28. package/src/tokenizers.js +4395 -0
  29. package/src/transformers.js +28 -0
  30. package/src/utils/audio.js +704 -0
  31. package/src/utils/constants.js +2 -0
  32. package/src/utils/core.js +149 -0
  33. package/src/utils/data-structures.js +445 -0
  34. package/src/utils/devices.js +11 -0
  35. package/src/utils/dtypes.js +62 -0
  36. package/src/utils/generic.js +35 -0
  37. package/src/utils/hub.js +671 -0
  38. package/src/utils/image.js +745 -0
  39. package/src/utils/maths.js +1050 -0
  40. package/src/utils/tensor.js +1378 -0
  41. package/types/backends/onnx.d.ts +26 -0
  42. package/types/backends/onnx.d.ts.map +1 -0
  43. package/types/configs.d.ts +59 -0
  44. package/types/configs.d.ts.map +1 -0
  45. package/types/env.d.ts +106 -0
  46. package/types/env.d.ts.map +1 -0
  47. package/types/generation/configuration_utils.d.ts +320 -0
  48. package/types/generation/configuration_utils.d.ts.map +1 -0
  49. package/types/generation/logits_process.d.ts +354 -0
  50. package/types/generation/logits_process.d.ts.map +1 -0
  51. package/types/generation/logits_sampler.d.ts +51 -0
  52. package/types/generation/logits_sampler.d.ts.map +1 -0
  53. package/types/generation/parameters.d.ts +47 -0
  54. package/types/generation/parameters.d.ts.map +1 -0
  55. package/types/generation/stopping_criteria.d.ts +81 -0
  56. package/types/generation/stopping_criteria.d.ts.map +1 -0
  57. package/types/generation/streamers.d.ts +81 -0
  58. package/types/generation/streamers.d.ts.map +1 -0
  59. package/types/models/whisper/common_whisper.d.ts +8 -0
  60. package/types/models/whisper/common_whisper.d.ts.map +1 -0
  61. package/types/models/whisper/generation_whisper.d.ts +76 -0
  62. package/types/models/whisper/generation_whisper.d.ts.map +1 -0
  63. package/types/models.d.ts +3845 -0
  64. package/types/models.d.ts.map +1 -0
  65. package/types/ops/registry.d.ts +11 -0
  66. package/types/ops/registry.d.ts.map +1 -0
  67. package/types/pipelines.d.ts +2403 -0
  68. package/types/pipelines.d.ts.map +1 -0
  69. package/types/processors.d.ts +917 -0
  70. package/types/processors.d.ts.map +1 -0
  71. package/types/tokenizers.d.ts +999 -0
  72. package/types/tokenizers.d.ts.map +1 -0
  73. package/types/transformers.d.ts +13 -0
  74. package/types/transformers.d.ts.map +1 -0
  75. package/types/utils/audio.d.ts +130 -0
  76. package/types/utils/audio.d.ts.map +1 -0
  77. package/types/utils/constants.d.ts +2 -0
  78. package/types/utils/constants.d.ts.map +1 -0
  79. package/types/utils/core.d.ts +91 -0
  80. package/types/utils/core.d.ts.map +1 -0
  81. package/types/utils/data-structures.d.ts +236 -0
  82. package/types/utils/data-structures.d.ts.map +1 -0
  83. package/types/utils/devices.d.ts +8 -0
  84. package/types/utils/devices.d.ts.map +1 -0
  85. package/types/utils/dtypes.d.ts +22 -0
  86. package/types/utils/dtypes.d.ts.map +1 -0
  87. package/types/utils/generic.d.ts +11 -0
  88. package/types/utils/generic.d.ts.map +1 -0
  89. package/types/utils/hub.d.ts +191 -0
  90. package/types/utils/hub.d.ts.map +1 -0
  91. package/types/utils/image.d.ts +119 -0
  92. package/types/utils/image.d.ts.map +1 -0
  93. package/types/utils/maths.d.ts +280 -0
  94. package/types/utils/maths.d.ts.map +1 -0
  95. package/types/utils/tensor.d.ts +392 -0
  96. package/types/utils/tensor.d.ts.map +1 -0
@@ -0,0 +1,1050 @@
1
+
2
+ /**
3
+ * @file Helper module for mathematical processing.
4
+ *
5
+ * These functions and classes are only used internally,
6
+ * meaning an end-user shouldn't need to access anything here.
7
+ *
8
+ * @module utils/maths
9
+ */
10
+
11
+ /**
12
+ * @typedef {Int8Array | Uint8Array | Uint8ClampedArray | Int16Array | Uint16Array | Int32Array | Uint32Array | Float32Array | Float64Array} TypedArray
13
+ * @typedef {BigInt64Array | BigUint64Array} BigTypedArray
14
+ * @typedef {TypedArray | BigTypedArray} AnyTypedArray
15
+ */
16
+
17
+ /**
18
+ * @param {TypedArray} input
19
+ */
20
+ export function interpolate_data(input, [in_channels, in_height, in_width], [out_height, out_width], mode = 'bilinear', align_corners = false) {
21
+ // TODO use mode and align_corners
22
+
23
+ // Output image dimensions
24
+ const x_scale = out_width / in_width;
25
+ const y_scale = out_height / in_height;
26
+
27
+ // Output image
28
+ // @ts-ignore
29
+ const out_img = new input.constructor(out_height * out_width * in_channels);
30
+
31
+ // Pre-calculate strides
32
+ const inStride = in_height * in_width;
33
+ const outStride = out_height * out_width;
34
+
35
+ for (let i = 0; i < out_height; ++i) {
36
+ for (let j = 0; j < out_width; ++j) {
37
+ // Calculate output offset
38
+ const outOffset = i * out_width + j;
39
+
40
+ // Calculate input pixel coordinates
41
+ const x = (j + 0.5) / x_scale - 0.5;
42
+ const y = (i + 0.5) / y_scale - 0.5;
43
+
44
+ // Calculate the four nearest input pixels
45
+ // We also check if the input pixel coordinates are within the image bounds
46
+ let x1 = Math.floor(x);
47
+ let y1 = Math.floor(y);
48
+ const x2 = Math.min(x1 + 1, in_width - 1);
49
+ const y2 = Math.min(y1 + 1, in_height - 1);
50
+
51
+ x1 = Math.max(x1, 0);
52
+ y1 = Math.max(y1, 0);
53
+
54
+
55
+ // Calculate the fractional distances between the input pixel and the four nearest pixels
56
+ const s = x - x1;
57
+ const t = y - y1;
58
+
59
+ // Perform bilinear interpolation
60
+ const w1 = (1 - s) * (1 - t);
61
+ const w2 = s * (1 - t);
62
+ const w3 = (1 - s) * t;
63
+ const w4 = s * t;
64
+
65
+ // Calculate the four nearest input pixel indices
66
+ const yStride = y1 * in_width;
67
+ const xStride = y2 * in_width;
68
+ const idx1 = yStride + x1;
69
+ const idx2 = yStride + x2;
70
+ const idx3 = xStride + x1;
71
+ const idx4 = xStride + x2;
72
+
73
+ for (let k = 0; k < in_channels; ++k) {
74
+ // Calculate channel offset
75
+ const cOffset = k * inStride;
76
+
77
+ out_img[k * outStride + outOffset] =
78
+ w1 * input[cOffset + idx1] +
79
+ w2 * input[cOffset + idx2] +
80
+ w3 * input[cOffset + idx3] +
81
+ w4 * input[cOffset + idx4];
82
+ }
83
+ }
84
+ }
85
+
86
+ return out_img;
87
+ }
88
+
89
+
90
+ /**
91
+ * Helper method to permute a `AnyTypedArray` directly
92
+ * @template {AnyTypedArray} T
93
+ * @param {T} array
94
+ * @param {number[]} dims
95
+ * @param {number[]} axes
96
+ * @returns {[T, number[]]} The permuted array and the new shape.
97
+ */
98
+ export function permute_data(array, dims, axes) {
99
+ // Calculate the new shape of the permuted array
100
+ // and the stride of the original array
101
+ const shape = new Array(axes.length);
102
+ const stride = new Array(axes.length);
103
+
104
+ for (let i = axes.length - 1, s = 1; i >= 0; --i) {
105
+ stride[i] = s;
106
+ shape[i] = dims[axes[i]];
107
+ s *= shape[i];
108
+ }
109
+
110
+ // Precompute inverse mapping of stride
111
+ const invStride = axes.map((_, i) => stride[axes.indexOf(i)]);
112
+
113
+ // Create the permuted array with the new shape
114
+ // @ts-ignore
115
+ const permutedData = new array.constructor(array.length);
116
+
117
+ // Permute the original array to the new array
118
+ for (let i = 0; i < array.length; ++i) {
119
+ let newIndex = 0;
120
+ for (let j = dims.length - 1, k = i; j >= 0; --j) {
121
+ newIndex += (k % dims[j]) * invStride[j];
122
+ k = Math.floor(k / dims[j]);
123
+ }
124
+ permutedData[newIndex] = array[i];
125
+ }
126
+
127
+ return [permutedData, shape];
128
+ }
129
+
130
+
131
+ /**
132
+ * Compute the softmax of an array of numbers.
133
+ * @template {TypedArray|number[]} T
134
+ * @param {T} arr The array of numbers to compute the softmax of.
135
+ * @returns {T} The softmax array.
136
+ */
137
+ export function softmax(arr) {
138
+ // Compute the maximum value in the array
139
+ const maxVal = max(arr)[0];
140
+
141
+ // Compute the exponentials of the array values
142
+ const exps = arr.map(x => Math.exp(x - maxVal));
143
+
144
+ // Compute the sum of the exponentials
145
+ // @ts-ignore
146
+ const sumExps = exps.reduce((acc, val) => acc + val, 0);
147
+
148
+ // Compute the softmax values
149
+ const softmaxArr = exps.map(x => x / sumExps);
150
+
151
+ return /** @type {T} */(softmaxArr);
152
+ }
153
+
154
+ /**
155
+ * Calculates the logarithm of the softmax function for the input array.
156
+ * @template {TypedArray|number[]} T
157
+ * @param {T} arr The input array to calculate the log_softmax function for.
158
+ * @returns {T} The resulting log_softmax array.
159
+ */
160
+ export function log_softmax(arr) {
161
+ // Compute the softmax values
162
+ const softmaxArr = softmax(arr);
163
+
164
+ // Apply log formula to each element
165
+ const logSoftmaxArr = softmaxArr.map(x => Math.log(x));
166
+
167
+ return /** @type {T} */(logSoftmaxArr);
168
+ }
169
+
170
+ /**
171
+ * Calculates the dot product of two arrays.
172
+ * @param {number[]} arr1 The first array.
173
+ * @param {number[]} arr2 The second array.
174
+ * @returns {number} The dot product of arr1 and arr2.
175
+ */
176
+ export function dot(arr1, arr2) {
177
+ let result = 0;
178
+ for (let i = 0; i < arr1.length; ++i) {
179
+ result += arr1[i] * arr2[i];
180
+ }
181
+ return result;
182
+ }
183
+
184
+ /**
185
+ * Computes the cosine similarity between two arrays.
186
+ *
187
+ * @param {number[]} arr1 The first array.
188
+ * @param {number[]} arr2 The second array.
189
+ * @returns {number} The cosine similarity between the two arrays.
190
+ */
191
+ export function cos_sim(arr1, arr2) {
192
+ // Calculate dot product of the two arrays
193
+ const dotProduct = dot(arr1, arr2);
194
+
195
+ // Calculate the magnitude of the first array
196
+ const magnitudeA = magnitude(arr1);
197
+
198
+ // Calculate the magnitude of the second array
199
+ const magnitudeB = magnitude(arr2);
200
+
201
+ // Calculate the cosine similarity
202
+ const cosineSimilarity = dotProduct / (magnitudeA * magnitudeB);
203
+
204
+ return cosineSimilarity;
205
+ }
206
+
207
+ /**
208
+ * Calculates the magnitude of a given array.
209
+ * @param {number[]} arr The array to calculate the magnitude of.
210
+ * @returns {number} The magnitude of the array.
211
+ */
212
+ export function magnitude(arr) {
213
+ return Math.sqrt(arr.reduce((acc, val) => acc + val * val, 0));
214
+ }
215
+
216
+
217
+ /**
218
+ * Returns the value and index of the minimum element in an array.
219
+ * @param {number[]|TypedArray} arr array of numbers.
220
+ * @returns {number[]} the value and index of the minimum element, of the form: [valueOfMin, indexOfMin]
221
+ * @throws {Error} If array is empty.
222
+ */
223
+ export function min(arr) {
224
+ if (arr.length === 0) throw Error('Array must not be empty');
225
+ let min = arr[0];
226
+ let indexOfMin = 0;
227
+ for (let i = 1; i < arr.length; ++i) {
228
+ if (arr[i] < min) {
229
+ min = arr[i];
230
+ indexOfMin = i;
231
+ }
232
+ }
233
+ return [min, indexOfMin];
234
+ }
235
+
236
+
237
+ /**
238
+ * Returns the value and index of the maximum element in an array.
239
+ * @param {number[]|AnyTypedArray} arr array of numbers.
240
+ * @returns {[number, number]} the value and index of the maximum element, of the form: [valueOfMax, indexOfMax]
241
+ * @throws {Error} If array is empty.
242
+ */
243
+ export function max(arr) {
244
+ if (arr.length === 0) throw Error('Array must not be empty');
245
+ let max = arr[0];
246
+ let indexOfMax = 0;
247
+ for (let i = 1; i < arr.length; ++i) {
248
+ if (arr[i] > max) {
249
+ max = arr[i];
250
+ indexOfMax = i;
251
+ }
252
+ }
253
+ return [Number(max), indexOfMax];
254
+ }
255
+
256
+ function isPowerOfTwo(number) {
257
+ // Check if the number is greater than 0 and has only one bit set to 1
258
+ return (number > 0) && ((number & (number - 1)) === 0);
259
+ }
260
+
261
+ /**
262
+ * Implementation of Radix-4 FFT.
263
+ *
264
+ * P2FFT class provides functionality for performing Fast Fourier Transform on arrays
265
+ * which are a power of two in length.
266
+ * Code adapted from https://www.npmjs.com/package/fft.js
267
+ */
268
+ class P2FFT {
269
+ /**
270
+ * @param {number} size The size of the input array. Must be a power of two larger than 1.
271
+ * @throws {Error} FFT size must be a power of two larger than 1.
272
+ */
273
+ constructor(size) {
274
+ this.size = size | 0; // convert to a 32-bit signed integer
275
+ if (this.size <= 1 || !isPowerOfTwo(this.size))
276
+ throw new Error('FFT size must be a power of two larger than 1');
277
+
278
+ this._csize = size << 1;
279
+
280
+ this.table = new Float64Array(this.size * 2);
281
+ for (let i = 0; i < this.table.length; i += 2) {
282
+ const angle = Math.PI * i / this.size;
283
+ this.table[i] = Math.cos(angle);
284
+ this.table[i + 1] = -Math.sin(angle);
285
+ }
286
+
287
+ // Find size's power of two
288
+ let power = 0;
289
+ for (let t = 1; this.size > t; t <<= 1)
290
+ ++power;
291
+
292
+ // Calculate initial step's width:
293
+ // * If we are full radix-4, it is 2x smaller to give inital len=8
294
+ // * Otherwise it is the same as `power` to give len=4
295
+ this._width = power % 2 === 0 ? power - 1 : power;
296
+
297
+ // Pre-compute bit-reversal patterns
298
+ this._bitrev = new Int32Array(1 << this._width);
299
+ for (let j = 0; j < this._bitrev.length; ++j) {
300
+ this._bitrev[j] = 0;
301
+ for (let shift = 0; shift < this._width; shift += 2) {
302
+ const revShift = this._width - shift - 2;
303
+ this._bitrev[j] |= ((j >>> shift) & 3) << revShift;
304
+ }
305
+ }
306
+ }
307
+
308
+ /**
309
+ * Create a complex number array with size `2 * size`
310
+ *
311
+ * @returns {Float64Array} A complex number array with size `2 * size`
312
+ */
313
+ createComplexArray() {
314
+ return new Float64Array(this._csize);
315
+ }
316
+
317
+ /**
318
+ * Converts a complex number representation stored in a Float64Array to an array of real numbers.
319
+ *
320
+ * @param {Float64Array} complex The complex number representation to be converted.
321
+ * @param {number[]} [storage] An optional array to store the result in.
322
+ * @returns {number[]} An array of real numbers representing the input complex number representation.
323
+ */
324
+ fromComplexArray(complex, storage) {
325
+ const res = storage || new Array(complex.length >>> 1);
326
+ for (let i = 0; i < complex.length; i += 2)
327
+ res[i >>> 1] = complex[i];
328
+ return res;
329
+ }
330
+
331
+ /**
332
+ * Convert a real-valued input array to a complex-valued output array.
333
+ * @param {Float64Array} input The real-valued input array.
334
+ * @param {Float64Array} [storage] Optional buffer to store the output array.
335
+ * @returns {Float64Array} The complex-valued output array.
336
+ */
337
+ toComplexArray(input, storage) {
338
+ const res = storage || this.createComplexArray();
339
+ for (let i = 0; i < res.length; i += 2) {
340
+ res[i] = input[i >>> 1];
341
+ res[i + 1] = 0;
342
+ }
343
+ return res;
344
+ }
345
+
346
+ /**
347
+ * Performs a Fast Fourier Transform (FFT) on the given input data and stores the result in the output buffer.
348
+ *
349
+ * @param {Float64Array} out The output buffer to store the result.
350
+ * @param {Float64Array} data The input data to transform.
351
+ *
352
+ * @throws {Error} Input and output buffers must be different.
353
+ *
354
+ * @returns {void}
355
+ */
356
+ transform(out, data) {
357
+ if (out === data)
358
+ throw new Error('Input and output buffers must be different');
359
+
360
+ this._transform4(out, data, 1 /* DONE */);
361
+ }
362
+
363
+ /**
364
+ * Performs a real-valued forward FFT on the given input buffer and stores the result in the given output buffer.
365
+ * The input buffer must contain real values only, while the output buffer will contain complex values. The input and
366
+ * output buffers must be different.
367
+ *
368
+ * @param {Float64Array} out The output buffer.
369
+ * @param {Float64Array} data The input buffer containing real values.
370
+ *
371
+ * @throws {Error} If the input and output buffers are the same.
372
+ */
373
+ realTransform(out, data) {
374
+ if (out === data)
375
+ throw new Error('Input and output buffers must be different');
376
+
377
+ this._realTransform4(out, data, 1 /* DONE */);
378
+ }
379
+
380
+ /**
381
+ * Performs an inverse FFT transformation on the given `data` array, and stores the result in `out`.
382
+ * The `out` array must be a different buffer than the `data` array. The `out` array will contain the
383
+ * result of the transformation. The `data` array will not be modified.
384
+ *
385
+ * @param {Float64Array} out The output buffer for the transformed data.
386
+ * @param {Float64Array} data The input data to transform.
387
+ * @throws {Error} If `out` and `data` refer to the same buffer.
388
+ * @returns {void}
389
+ */
390
+ inverseTransform(out, data) {
391
+ if (out === data)
392
+ throw new Error('Input and output buffers must be different');
393
+
394
+ this._transform4(out, data, -1 /* DONE */);
395
+ for (let i = 0; i < out.length; ++i)
396
+ out[i] /= this.size;
397
+ }
398
+
399
+ /**
400
+ * Performs a radix-4 implementation of a discrete Fourier transform on a given set of data.
401
+ *
402
+ * @param {Float64Array} out The output buffer for the transformed data.
403
+ * @param {Float64Array} data The input buffer of data to be transformed.
404
+ * @param {number} inv A scaling factor to apply to the transform.
405
+ * @returns {void}
406
+ */
407
+ _transform4(out, data, inv) {
408
+ // radix-4 implementation
409
+
410
+ const size = this._csize;
411
+
412
+ // Initial step (permute and transform)
413
+ const width = this._width;
414
+ let step = 1 << width;
415
+ let len = (size / step) << 1;
416
+
417
+ let outOff;
418
+ let t;
419
+ const bitrev = this._bitrev;
420
+ if (len === 4) {
421
+ for (outOff = 0, t = 0; outOff < size; outOff += len, ++t) {
422
+ const off = bitrev[t];
423
+ this._singleTransform2(data, out, outOff, off, step);
424
+ }
425
+ } else {
426
+ // len === 8
427
+ for (outOff = 0, t = 0; outOff < size; outOff += len, ++t) {
428
+ const off = bitrev[t];
429
+ this._singleTransform4(data, out, outOff, off, step, inv);
430
+ }
431
+ }
432
+
433
+ // Loop through steps in decreasing order
434
+ const table = this.table;
435
+ for (step >>= 2; step >= 2; step >>= 2) {
436
+ len = (size / step) << 1;
437
+ const quarterLen = len >>> 2;
438
+
439
+ // Loop through offsets in the data
440
+ for (outOff = 0; outOff < size; outOff += len) {
441
+ // Full case
442
+ const limit = outOff + quarterLen - 1;
443
+ for (let i = outOff, k = 0; i < limit; i += 2, k += step) {
444
+ const A = i;
445
+ const B = A + quarterLen;
446
+ const C = B + quarterLen;
447
+ const D = C + quarterLen;
448
+
449
+ // Original values
450
+ const Ar = out[A];
451
+ const Ai = out[A + 1];
452
+ const Br = out[B];
453
+ const Bi = out[B + 1];
454
+ const Cr = out[C];
455
+ const Ci = out[C + 1];
456
+ const Dr = out[D];
457
+ const Di = out[D + 1];
458
+
459
+ const tableBr = table[k];
460
+ const tableBi = inv * table[k + 1];
461
+ const MBr = Br * tableBr - Bi * tableBi;
462
+ const MBi = Br * tableBi + Bi * tableBr;
463
+
464
+ const tableCr = table[2 * k];
465
+ const tableCi = inv * table[2 * k + 1];
466
+ const MCr = Cr * tableCr - Ci * tableCi;
467
+ const MCi = Cr * tableCi + Ci * tableCr;
468
+
469
+ const tableDr = table[3 * k];
470
+ const tableDi = inv * table[3 * k + 1];
471
+ const MDr = Dr * tableDr - Di * tableDi;
472
+ const MDi = Dr * tableDi + Di * tableDr;
473
+
474
+ // Pre-Final values
475
+ const T0r = Ar + MCr;
476
+ const T0i = Ai + MCi;
477
+ const T1r = Ar - MCr;
478
+ const T1i = Ai - MCi;
479
+ const T2r = MBr + MDr;
480
+ const T2i = MBi + MDi;
481
+ const T3r = inv * (MBr - MDr);
482
+ const T3i = inv * (MBi - MDi);
483
+
484
+ // Final values
485
+ out[A] = T0r + T2r;
486
+ out[A + 1] = T0i + T2i;
487
+ out[B] = T1r + T3i;
488
+ out[B + 1] = T1i - T3r;
489
+ out[C] = T0r - T2r;
490
+ out[C + 1] = T0i - T2i;
491
+ out[D] = T1r - T3i;
492
+ out[D + 1] = T1i + T3r;
493
+ }
494
+ }
495
+ }
496
+ }
497
+
498
+ /**
499
+ * Performs a radix-2 implementation of a discrete Fourier transform on a given set of data.
500
+ *
501
+ * @param {Float64Array} data The input buffer of data to be transformed.
502
+ * @param {Float64Array} out The output buffer for the transformed data.
503
+ * @param {number} outOff The offset at which to write the output data.
504
+ * @param {number} off The offset at which to begin reading the input data.
505
+ * @param {number} step The step size for indexing the input data.
506
+ * @returns {void}
507
+ */
508
+ _singleTransform2(data, out, outOff, off, step) {
509
+ // radix-2 implementation
510
+ // NOTE: Only called for len=4
511
+
512
+ const evenR = data[off];
513
+ const evenI = data[off + 1];
514
+ const oddR = data[off + step];
515
+ const oddI = data[off + step + 1];
516
+
517
+ out[outOff] = evenR + oddR;
518
+ out[outOff + 1] = evenI + oddI;
519
+ out[outOff + 2] = evenR - oddR;
520
+ out[outOff + 3] = evenI - oddI;
521
+ }
522
+
523
+ /**
524
+ * Performs radix-4 transformation on input data of length 8
525
+ *
526
+ * @param {Float64Array} data Input data array of length 8
527
+ * @param {Float64Array} out Output data array of length 8
528
+ * @param {number} outOff Index of output array to start writing from
529
+ * @param {number} off Index of input array to start reading from
530
+ * @param {number} step Step size between elements in input array
531
+ * @param {number} inv Scaling factor for inverse transform
532
+ *
533
+ * @returns {void}
534
+ */
535
+ _singleTransform4(data, out, outOff, off, step, inv) {
536
+ // radix-4
537
+ // NOTE: Only called for len=8
538
+ const step2 = step * 2;
539
+ const step3 = step * 3;
540
+
541
+ // Original values
542
+ const Ar = data[off];
543
+ const Ai = data[off + 1];
544
+ const Br = data[off + step];
545
+ const Bi = data[off + step + 1];
546
+ const Cr = data[off + step2];
547
+ const Ci = data[off + step2 + 1];
548
+ const Dr = data[off + step3];
549
+ const Di = data[off + step3 + 1];
550
+
551
+ // Pre-Final values
552
+ const T0r = Ar + Cr;
553
+ const T0i = Ai + Ci;
554
+ const T1r = Ar - Cr;
555
+ const T1i = Ai - Ci;
556
+ const T2r = Br + Dr;
557
+ const T2i = Bi + Di;
558
+ const T3r = inv * (Br - Dr);
559
+ const T3i = inv * (Bi - Di);
560
+
561
+ // Final values
562
+ out[outOff] = T0r + T2r;
563
+ out[outOff + 1] = T0i + T2i;
564
+ out[outOff + 2] = T1r + T3i;
565
+ out[outOff + 3] = T1i - T3r;
566
+ out[outOff + 4] = T0r - T2r;
567
+ out[outOff + 5] = T0i - T2i;
568
+ out[outOff + 6] = T1r - T3i;
569
+ out[outOff + 7] = T1i + T3r;
570
+ }
571
+
572
+ /**
573
+ * Real input radix-4 implementation
574
+ * @param {Float64Array} out Output array for the transformed data
575
+ * @param {Float64Array} data Input array of real data to be transformed
576
+ * @param {number} inv The scale factor used to normalize the inverse transform
577
+ */
578
+ _realTransform4(out, data, inv) {
579
+ // Real input radix-4 implementation
580
+ const size = this._csize;
581
+
582
+ // Initial step (permute and transform)
583
+ const width = this._width;
584
+ let step = 1 << width;
585
+ let len = (size / step) << 1;
586
+
587
+ let outOff;
588
+ let t;
589
+ const bitrev = this._bitrev;
590
+ if (len === 4) {
591
+ for (outOff = 0, t = 0; outOff < size; outOff += len, ++t) {
592
+ const off = bitrev[t];
593
+ this._singleRealTransform2(data, out, outOff, off >>> 1, step >>> 1);
594
+ }
595
+ } else {
596
+ // len === 8
597
+ for (outOff = 0, t = 0; outOff < size; outOff += len, ++t) {
598
+ const off = bitrev[t];
599
+ this._singleRealTransform4(data, out, outOff, off >>> 1, step >>> 1, inv);
600
+ }
601
+ }
602
+
603
+ // Loop through steps in decreasing order
604
+ const table = this.table;
605
+ for (step >>= 2; step >= 2; step >>= 2) {
606
+ len = (size / step) << 1;
607
+ const halfLen = len >>> 1;
608
+ const quarterLen = halfLen >>> 1;
609
+ const hquarterLen = quarterLen >>> 1;
610
+
611
+ // Loop through offsets in the data
612
+ for (outOff = 0; outOff < size; outOff += len) {
613
+ for (let i = 0, k = 0; i <= hquarterLen; i += 2, k += step) {
614
+ const A = outOff + i;
615
+ const B = A + quarterLen;
616
+ const C = B + quarterLen;
617
+ const D = C + quarterLen;
618
+
619
+ // Original values
620
+ const Ar = out[A];
621
+ const Ai = out[A + 1];
622
+ const Br = out[B];
623
+ const Bi = out[B + 1];
624
+ const Cr = out[C];
625
+ const Ci = out[C + 1];
626
+ const Dr = out[D];
627
+ const Di = out[D + 1];
628
+
629
+ // Middle values
630
+ const MAr = Ar;
631
+ const MAi = Ai;
632
+
633
+ const tableBr = table[k];
634
+ const tableBi = inv * table[k + 1];
635
+ const MBr = Br * tableBr - Bi * tableBi;
636
+ const MBi = Br * tableBi + Bi * tableBr;
637
+
638
+ const tableCr = table[2 * k];
639
+ const tableCi = inv * table[2 * k + 1];
640
+ const MCr = Cr * tableCr - Ci * tableCi;
641
+ const MCi = Cr * tableCi + Ci * tableCr;
642
+
643
+ const tableDr = table[3 * k];
644
+ const tableDi = inv * table[3 * k + 1];
645
+ const MDr = Dr * tableDr - Di * tableDi;
646
+ const MDi = Dr * tableDi + Di * tableDr;
647
+
648
+ // Pre-Final values
649
+ const T0r = MAr + MCr;
650
+ const T0i = MAi + MCi;
651
+ const T1r = MAr - MCr;
652
+ const T1i = MAi - MCi;
653
+ const T2r = MBr + MDr;
654
+ const T2i = MBi + MDi;
655
+ const T3r = inv * (MBr - MDr);
656
+ const T3i = inv * (MBi - MDi);
657
+
658
+ // Final values
659
+ out[A] = T0r + T2r;
660
+ out[A + 1] = T0i + T2i;
661
+ out[B] = T1r + T3i;
662
+ out[B + 1] = T1i - T3r;
663
+
664
+ // Output final middle point
665
+ if (i === 0) {
666
+ out[C] = T0r - T2r;
667
+ out[C + 1] = T0i - T2i;
668
+ continue;
669
+ }
670
+
671
+ // Do not overwrite ourselves
672
+ if (i === hquarterLen)
673
+ continue;
674
+
675
+ const SA = outOff + quarterLen - i;
676
+ const SB = outOff + halfLen - i;
677
+
678
+ out[SA] = T1r - inv * T3i;
679
+ out[SA + 1] = -T1i - inv * T3r;
680
+ out[SB] = T0r - inv * T2r;
681
+ out[SB + 1] = -T0i + inv * T2i;
682
+ }
683
+ }
684
+ }
685
+
686
+ // Complete the spectrum by adding its mirrored negative frequency components.
687
+ const half = size >>> 1;
688
+ for (let i = 2; i < half; i += 2) {
689
+ out[size - i] = out[i];
690
+ out[size - i + 1] = -out[i + 1];
691
+ }
692
+ }
693
+
694
+ /**
695
+ * Performs a single real input radix-2 transformation on the provided data
696
+ *
697
+ * @param {Float64Array} data The input data array
698
+ * @param {Float64Array} out The output data array
699
+ * @param {number} outOff The output offset
700
+ * @param {number} off The input offset
701
+ * @param {number} step The step
702
+ *
703
+ * @returns {void}
704
+ */
705
+ _singleRealTransform2(data, out, outOff, off, step) {
706
+ // radix-2 implementation
707
+ // NOTE: Only called for len=4
708
+
709
+ const evenR = data[off];
710
+ const oddR = data[off + step];
711
+
712
+ out[outOff] = evenR + oddR;
713
+ out[outOff + 1] = 0;
714
+ out[outOff + 2] = evenR - oddR;
715
+ out[outOff + 3] = 0;
716
+ }
717
+
718
+ /**
719
+ * Computes a single real-valued transform using radix-4 algorithm.
720
+ * This method is only called for len=8.
721
+ *
722
+ * @param {Float64Array} data The input data array.
723
+ * @param {Float64Array} out The output data array.
724
+ * @param {number} outOff The offset into the output array.
725
+ * @param {number} off The offset into the input array.
726
+ * @param {number} step The step size for the input array.
727
+ * @param {number} inv The value of inverse.
728
+ */
729
+ _singleRealTransform4(data, out, outOff, off, step, inv) {
730
+ // radix-4
731
+ // NOTE: Only called for len=8
732
+ const step2 = step * 2;
733
+ const step3 = step * 3;
734
+
735
+ // Original values
736
+ const Ar = data[off];
737
+ const Br = data[off + step];
738
+ const Cr = data[off + step2];
739
+ const Dr = data[off + step3];
740
+
741
+ // Pre-Final values
742
+ const T0r = Ar + Cr;
743
+ const T1r = Ar - Cr;
744
+ const T2r = Br + Dr;
745
+ const T3r = inv * (Br - Dr);
746
+
747
+ // Final values
748
+ out[outOff] = T0r + T2r;
749
+ out[outOff + 1] = 0;
750
+ out[outOff + 2] = T1r;
751
+ out[outOff + 3] = -T3r;
752
+ out[outOff + 4] = T0r - T2r;
753
+ out[outOff + 5] = 0;
754
+ out[outOff + 6] = T1r;
755
+ out[outOff + 7] = T3r;
756
+ }
757
+ }
758
+
759
+ /**
760
+ * NP2FFT class provides functionality for performing Fast Fourier Transform on arrays
761
+ * which are not a power of two in length. In such cases, the chirp-z transform is used.
762
+ *
763
+ * For more information, see: https://math.stackexchange.com/questions/77118/non-power-of-2-ffts/77156#77156
764
+ */
765
+ class NP2FFT {
766
+
767
+ /**
768
+ * Constructs a new NP2FFT object.
769
+ * @param {number} fft_length The length of the FFT
770
+ */
771
+ constructor(fft_length) {
772
+ // Helper variables
773
+ const a = 2 * (fft_length - 1);
774
+ const b = 2 * (2 * fft_length - 1);
775
+ const nextP2 = 2 ** (Math.ceil(Math.log2(b)))
776
+ this.bufferSize = nextP2;
777
+ this._a = a;
778
+
779
+ // Define buffers
780
+ // Compute chirp for transform
781
+ const chirp = new Float64Array(b);
782
+ const ichirp = new Float64Array(nextP2);
783
+ this._chirpBuffer = new Float64Array(nextP2);
784
+ this._buffer1 = new Float64Array(nextP2);
785
+ this._buffer2 = new Float64Array(nextP2);
786
+ this._outBuffer1 = new Float64Array(nextP2);
787
+ this._outBuffer2 = new Float64Array(nextP2);
788
+
789
+ // Compute complex exponentiation
790
+ const theta = -2 * Math.PI / fft_length;
791
+ const baseR = Math.cos(theta);
792
+ const baseI = Math.sin(theta);
793
+
794
+ // Precompute helper for chirp-z transform
795
+ for (let i = 0; i < b >> 1; ++i) {
796
+ // Compute complex power:
797
+ const e = (i + 1 - fft_length) ** 2 / 2.0;
798
+
799
+ // Compute the modulus and argument of the result
800
+ const result_mod = Math.sqrt(baseR ** 2 + baseI ** 2) ** e;
801
+ const result_arg = e * Math.atan2(baseI, baseR);
802
+
803
+ // Convert the result back to rectangular form
804
+ // and assign to chirp and ichirp
805
+ const i2 = 2 * i;
806
+ chirp[i2] = result_mod * Math.cos(result_arg);
807
+ chirp[i2 + 1] = result_mod * Math.sin(result_arg);
808
+
809
+ // conjugate
810
+ ichirp[i2] = chirp[i2];
811
+ ichirp[i2 + 1] = - chirp[i2 + 1];
812
+ }
813
+ this._slicedChirpBuffer = chirp.subarray(a, b);
814
+
815
+ // create object to perform Fast Fourier Transforms
816
+ // with `nextP2` complex numbers
817
+ this._f = new P2FFT(nextP2 >> 1);
818
+ this._f.transform(this._chirpBuffer, ichirp);
819
+ }
820
+
821
+ _transform(output, input, real) {
822
+ const ib1 = this._buffer1;
823
+ const ib2 = this._buffer2;
824
+ const ob2 = this._outBuffer1;
825
+ const ob3 = this._outBuffer2;
826
+ const cb = this._chirpBuffer;
827
+ const sb = this._slicedChirpBuffer;
828
+ const a = this._a;
829
+
830
+ if (real) {
831
+ // Real multiplication
832
+ for (let j = 0; j < sb.length; j += 2) {
833
+ const j2 = j + 1
834
+ const j3 = j >> 1;
835
+
836
+ const a_real = input[j3];
837
+ ib1[j] = a_real * sb[j];
838
+ ib1[j2] = a_real * sb[j2];
839
+ }
840
+ } else {
841
+ // Complex multiplication
842
+ for (let j = 0; j < sb.length; j += 2) {
843
+ const j2 = j + 1
844
+ ib1[j] = input[j] * sb[j] - input[j2] * sb[j2];
845
+ ib1[j2] = input[j] * sb[j2] + input[j2] * sb[j];
846
+ }
847
+ }
848
+ this._f.transform(ob2, ib1);
849
+
850
+ for (let j = 0; j < cb.length; j += 2) {
851
+ const j2 = j + 1;
852
+
853
+ ib2[j] = ob2[j] * cb[j] - ob2[j2] * cb[j2];
854
+ ib2[j2] = ob2[j] * cb[j2] + ob2[j2] * cb[j];
855
+ }
856
+ this._f.inverseTransform(ob3, ib2);
857
+
858
+ for (let j = 0; j < ob3.length; j += 2) {
859
+ const a_real = ob3[j + a];
860
+ const a_imag = ob3[j + a + 1];
861
+ const b_real = sb[j];
862
+ const b_imag = sb[j + 1];
863
+
864
+ output[j] = a_real * b_real - a_imag * b_imag;
865
+ output[j + 1] = a_real * b_imag + a_imag * b_real;
866
+ }
867
+ }
868
+
869
+ transform(output, input) {
870
+ this._transform(output, input, false);
871
+ }
872
+
873
+ realTransform(output, input) {
874
+ this._transform(output, input, true);
875
+ }
876
+ }
877
+
878
+ export class FFT {
879
+ constructor(fft_length) {
880
+ this.fft_length = fft_length;
881
+ this.isPowerOfTwo = isPowerOfTwo(fft_length);
882
+ if (this.isPowerOfTwo) {
883
+ this.fft = new P2FFT(fft_length);
884
+ this.outputBufferSize = 2 * fft_length;
885
+ } else {
886
+ this.fft = new NP2FFT(fft_length);
887
+ this.outputBufferSize = this.fft.bufferSize;
888
+ }
889
+ }
890
+
891
+ realTransform(out, input) {
892
+ this.fft.realTransform(out, input);
893
+ }
894
+
895
+ transform(out, input) {
896
+ this.fft.transform(out, input);
897
+ }
898
+ }
899
+
900
+
901
+ /**
902
+ * Performs median filter on the provided data. Padding is done by mirroring the data.
903
+ * @param {AnyTypedArray} data The input array
904
+ * @param {number} windowSize The window size
905
+ */
906
+ export function medianFilter(data, windowSize) {
907
+
908
+ if (windowSize % 2 === 0 || windowSize <= 0) {
909
+ throw new Error('Window size must be a positive odd number');
910
+ }
911
+
912
+ // @ts-ignore
913
+ const outputArray = new data.constructor(data.length);
914
+
915
+ // @ts-ignore
916
+ const buffer = new data.constructor(windowSize); // Reusable array for storing values
917
+
918
+ const halfWindowSize = Math.floor(windowSize / 2);
919
+
920
+ for (let i = 0; i < data.length; ++i) {
921
+ let valuesIndex = 0;
922
+
923
+ for (let j = -halfWindowSize; j <= halfWindowSize; ++j) {
924
+ let index = i + j;
925
+ if (index < 0) {
926
+ index = Math.abs(index);
927
+ } else if (index >= data.length) {
928
+ index = 2 * (data.length - 1) - index;
929
+ }
930
+
931
+ buffer[valuesIndex++] = data[index];
932
+ }
933
+
934
+ buffer.sort();
935
+ outputArray[i] = buffer[halfWindowSize];
936
+ }
937
+
938
+ return outputArray;
939
+ }
940
+
941
+ /**
942
+ * Helper function to round a number to a given number of decimals
943
+ * @param {number} num The number to round
944
+ * @param {number} decimals The number of decimals
945
+ * @returns {number} The rounded number
946
+ */
947
+ export function round(num, decimals) {
948
+ const pow = Math.pow(10, decimals);
949
+ return Math.round(num * pow) / pow;
950
+ }
951
+
952
+ /**
953
+ * Helper function to round a number to the nearest integer, with ties rounded to the nearest even number.
954
+ * Also known as "bankers' rounding". This is the default rounding mode in python. For example:
955
+ * 1.5 rounds to 2 and 2.5 rounds to 2.
956
+ *
957
+ * @param {number} x The number to round
958
+ * @returns {number} The rounded number
959
+ */
960
+ export function bankers_round(x) {
961
+ const r = Math.round(x);
962
+ const br = Math.abs(x) % 1 === 0.5 ? (r % 2 === 0 ? r : r - 1) : r;
963
+ return br;
964
+ }
965
+
966
+
967
+ /**
968
+ * Measures similarity between two temporal sequences (e.g., input audio and output tokens
969
+ * to generate token-level timestamps).
970
+ * @param {number[][]} matrix
971
+ * @returns {number[][]}
972
+ */
973
+ export function dynamic_time_warping(matrix) {
974
+ const output_length = matrix.length;
975
+ const input_length = matrix[0].length;
976
+
977
+ const outputShape = [output_length + 1, input_length + 1];
978
+
979
+ const cost = Array.from(
980
+ { length: outputShape[0] },
981
+ () => Array(outputShape[1]).fill(Infinity)
982
+ );
983
+ cost[0][0] = 0;
984
+
985
+ const trace = Array.from(
986
+ { length: outputShape[0] },
987
+ () => Array(outputShape[1]).fill(-1)
988
+ );
989
+
990
+ for (let j = 1; j < outputShape[1]; ++j) {
991
+ for (let i = 1; i < outputShape[0]; ++i) {
992
+ const c0 = cost[i - 1][j - 1];
993
+ const c1 = cost[i - 1][j];
994
+ const c2 = cost[i][j - 1];
995
+
996
+ let c, t;
997
+ if (c0 < c1 && c0 < c2) {
998
+ c = c0;
999
+ t = 0;
1000
+ } else if (c1 < c0 && c1 < c2) {
1001
+ c = c1;
1002
+ t = 1;
1003
+ } else {
1004
+ c = c2;
1005
+ t = 2;
1006
+ }
1007
+ cost[i][j] = matrix[i - 1][j - 1] + c;
1008
+ trace[i][j] = t;
1009
+ }
1010
+ }
1011
+
1012
+ for (let i = 0; i < outputShape[1]; ++i) { // trace[0, :] = 2
1013
+ trace[0][i] = 2;
1014
+ }
1015
+ for (let i = 0; i < outputShape[0]; ++i) { // trace[:, 0] = 1
1016
+ trace[i][0] = 1;
1017
+ }
1018
+
1019
+ // backtrace
1020
+ let i = output_length;
1021
+ let j = input_length;
1022
+ let text_indices = [];
1023
+ let time_indices = [];
1024
+ while (i > 0 || j > 0) {
1025
+ text_indices.push(i - 1);
1026
+ time_indices.push(j - 1);
1027
+
1028
+ switch (trace[i][j]) {
1029
+ case 0:
1030
+ --i; --j;
1031
+ break;
1032
+ case 1:
1033
+ --i;
1034
+ break;
1035
+ case 2:
1036
+ --j;
1037
+ break;
1038
+ default:
1039
+ throw new Error(
1040
+ `Internal error in dynamic time warping. Unexpected trace[${i}, ${j}]. Please file a bug report.`
1041
+ )
1042
+ }
1043
+ }
1044
+
1045
+ text_indices.reverse();
1046
+ time_indices.reverse();
1047
+
1048
+ return [text_indices, time_indices];
1049
+
1050
+ }