@huggingface/transformers 3.3.3 → 3.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (95) hide show
  1. package/README.md +13 -3
  2. package/dist/ort-wasm-simd-threaded.jsep.mjs +124 -115
  3. package/dist/ort-wasm-simd-threaded.jsep.wasm +0 -0
  4. package/dist/transformers.js +2778 -1592
  5. package/dist/transformers.js.map +1 -1
  6. package/dist/transformers.min.js +1 -1
  7. package/dist/transformers.min.js.map +1 -1
  8. package/dist/{transformers.cjs → transformers.node.cjs} +1699 -2530
  9. package/dist/transformers.node.cjs.map +1 -0
  10. package/dist/transformers.node.min.cjs +2 -0
  11. package/dist/transformers.node.min.cjs.map +1 -0
  12. package/dist/transformers.node.min.mjs +2 -0
  13. package/dist/transformers.node.min.mjs.map +1 -0
  14. package/dist/{transformers.mjs → transformers.node.mjs} +1738 -2510
  15. package/dist/transformers.node.mjs.map +1 -0
  16. package/dist/transformers.web.js +35876 -0
  17. package/dist/transformers.web.js.map +1 -0
  18. package/dist/transformers.web.min.js +2 -0
  19. package/dist/transformers.web.min.js.map +1 -0
  20. package/package.json +6 -6
  21. package/src/backends/onnx.js +14 -15
  22. package/src/configs.js +6 -1
  23. package/src/env.js +1 -1
  24. package/src/generation/streamers.js +4 -3
  25. package/src/models/dac/feature_extraction_dac.js +3 -0
  26. package/src/models/encodec/feature_extraction_encodec.js +32 -0
  27. package/src/models/feature_extractors.js +3 -0
  28. package/src/models/idefics3/image_processing_idefics3.js +1 -1
  29. package/src/models/image_processors.js +1 -0
  30. package/src/models/processors.js +2 -0
  31. package/src/models/smolvlm/image_processing_smolvlm.js +2 -0
  32. package/src/models/smolvlm/processing_smolvlm.js +2 -0
  33. package/src/models/snac/feature_extraction_snac.js +3 -0
  34. package/src/models/ultravox/processing_ultravox.js +54 -0
  35. package/src/models/whisper/common_whisper.js +7 -1
  36. package/src/models/whisper/feature_extraction_whisper.js +18 -10
  37. package/src/models.js +546 -78
  38. package/src/pipelines.js +246 -137
  39. package/src/tokenizers.js +42 -28
  40. package/src/transformers.js +1 -0
  41. package/src/utils/audio.js +2 -0
  42. package/src/utils/hub.js +140 -80
  43. package/src/utils/image.js +9 -1
  44. package/src/utils/maths.js +1 -1
  45. package/src/utils/tensor.js +12 -5
  46. package/src/utils/video.js +128 -0
  47. package/types/backends/onnx.d.ts +2 -2
  48. package/types/backends/onnx.d.ts.map +1 -1
  49. package/types/configs.d.ts +1 -1
  50. package/types/configs.d.ts.map +1 -1
  51. package/types/generation/streamers.d.ts.map +1 -1
  52. package/types/models/dac/feature_extraction_dac.d.ts +4 -0
  53. package/types/models/dac/feature_extraction_dac.d.ts.map +1 -0
  54. package/types/models/encodec/feature_extraction_encodec.d.ts +13 -0
  55. package/types/models/encodec/feature_extraction_encodec.d.ts.map +1 -0
  56. package/types/models/feature_extractors.d.ts +3 -0
  57. package/types/models/florence2/processing_florence2.d.ts +1 -1
  58. package/types/models/florence2/processing_florence2.d.ts.map +1 -1
  59. package/types/models/image_processors.d.ts +1 -0
  60. package/types/models/processors.d.ts +2 -0
  61. package/types/models/smolvlm/image_processing_smolvlm.d.ts +2 -0
  62. package/types/models/smolvlm/image_processing_smolvlm.d.ts.map +1 -0
  63. package/types/models/smolvlm/processing_smolvlm.d.ts +2 -0
  64. package/types/models/smolvlm/processing_smolvlm.d.ts.map +1 -0
  65. package/types/models/snac/feature_extraction_snac.d.ts +4 -0
  66. package/types/models/snac/feature_extraction_snac.d.ts.map +1 -0
  67. package/types/models/ultravox/processing_ultravox.d.ts +16 -0
  68. package/types/models/ultravox/processing_ultravox.d.ts.map +1 -0
  69. package/types/models/whisper/common_whisper.d.ts.map +1 -1
  70. package/types/models/whisper/feature_extraction_whisper.d.ts +3 -1
  71. package/types/models/whisper/feature_extraction_whisper.d.ts.map +1 -1
  72. package/types/models.d.ts +180 -4
  73. package/types/models.d.ts.map +1 -1
  74. package/types/pipelines.d.ts +51 -5
  75. package/types/pipelines.d.ts.map +1 -1
  76. package/types/tokenizers.d.ts.map +1 -1
  77. package/types/transformers.d.ts +1 -0
  78. package/types/tsconfig.tsbuildinfo +1 -1
  79. package/types/utils/audio.d.ts.map +1 -1
  80. package/types/utils/hub.d.ts +19 -7
  81. package/types/utils/hub.d.ts.map +1 -1
  82. package/types/utils/image.d.ts +2 -2
  83. package/types/utils/image.d.ts.map +1 -1
  84. package/types/utils/maths.d.ts +2 -2
  85. package/types/utils/maths.d.ts.map +1 -1
  86. package/types/utils/tensor.d.ts +17 -18
  87. package/types/utils/tensor.d.ts.map +1 -1
  88. package/types/utils/video.d.ts +37 -0
  89. package/types/utils/video.d.ts.map +1 -0
  90. package/dist/transformers.cjs.map +0 -1
  91. package/dist/transformers.min.cjs +0 -2
  92. package/dist/transformers.min.cjs.map +0 -1
  93. package/dist/transformers.min.mjs +0 -2
  94. package/dist/transformers.min.mjs.map +0 -1
  95. package/dist/transformers.mjs.map +0 -1
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@huggingface/transformers",
3
- "version": "3.3.3",
3
+ "version": "3.4.1",
4
4
  "description": "State-of-the-art Machine Learning for the web. Run 🤗 Transformers directly in your browser, with no need for a server!",
5
5
  "main": "./src/transformers.js",
6
6
  "types": "./types/transformers.d.ts",
@@ -9,16 +9,16 @@
9
9
  "node": {
10
10
  "import": {
11
11
  "types": "./types/transformers.d.ts",
12
- "default": "./dist/transformers.mjs"
12
+ "default": "./dist/transformers.node.mjs"
13
13
  },
14
14
  "require": {
15
15
  "types": "./types/transformers.d.ts",
16
- "default": "./dist/transformers.cjs"
16
+ "default": "./dist/transformers.node.cjs"
17
17
  }
18
18
  },
19
19
  "default": {
20
20
  "types": "./types/transformers.d.ts",
21
- "default": "./dist/transformers.js"
21
+ "default": "./dist/transformers.web.js"
22
22
  }
23
23
  },
24
24
  "scripts": {
@@ -57,7 +57,7 @@
57
57
  "dependencies": {
58
58
  "@huggingface/jinja": "^0.3.3",
59
59
  "onnxruntime-node": "1.20.1",
60
- "onnxruntime-web": "1.21.0-dev.20250206-d981b153d3",
60
+ "onnxruntime-web": "1.22.0-dev.20250306-ccf8fdd9ea",
61
61
  "sharp": "^0.33.5"
62
62
  },
63
63
  "devDependencies": {
@@ -69,7 +69,7 @@
69
69
  "jest-environment-node": "^30.0.0-alpha.6",
70
70
  "jsdoc-to-markdown": "^9.1.1",
71
71
  "prettier": "3.4.2",
72
- "typescript": "^5.7.2",
72
+ "typescript": "^5.8.2",
73
73
  "wavefile": "11.0.0",
74
74
  "webpack": "^5.97.1",
75
75
  "webpack-cli": "^5.1.4",
@@ -57,8 +57,8 @@ let ONNX;
57
57
  const ORT_SYMBOL = Symbol.for('onnxruntime');
58
58
 
59
59
  if (ORT_SYMBOL in globalThis) {
60
- // If the JS runtime exposes their own ONNX runtime, use it
61
- ONNX = globalThis[ORT_SYMBOL];
60
+ // If the JS runtime exposes their own ONNX runtime, use it
61
+ ONNX = globalThis[ORT_SYMBOL];
62
62
 
63
63
  } else if (apis.IS_NODE_ENV) {
64
64
  ONNX = ONNX_NODE.default ?? ONNX_NODE;
@@ -141,19 +141,19 @@ let wasmInitPromise = null;
141
141
 
142
142
  /**
143
143
  * Create an ONNX inference session.
144
- * @param {Uint8Array} buffer The ONNX model buffer.
144
+ * @param {Uint8Array|string} buffer_or_path The ONNX model buffer or path.
145
145
  * @param {import('onnxruntime-common').InferenceSession.SessionOptions} session_options ONNX inference session options.
146
146
  * @param {Object} session_config ONNX inference session configuration.
147
147
  * @returns {Promise<import('onnxruntime-common').InferenceSession & { config: Object}>} The ONNX inference session.
148
148
  */
149
- export async function createInferenceSession(buffer, session_options, session_config) {
149
+ export async function createInferenceSession(buffer_or_path, session_options, session_config) {
150
150
  if (wasmInitPromise) {
151
151
  // A previous session has already initialized the WASM runtime
152
152
  // so we wait for it to resolve before creating this new session.
153
153
  await wasmInitPromise;
154
154
  }
155
155
 
156
- const sessionPromise = InferenceSession.create(buffer, session_options);
156
+ const sessionPromise = InferenceSession.create(buffer_or_path, session_options);
157
157
  wasmInitPromise ??= sessionPromise;
158
158
  const session = await sessionPromise;
159
159
  session.config = session_config;
@@ -175,11 +175,15 @@ const ONNX_ENV = ONNX?.env;
175
175
  if (ONNX_ENV?.wasm) {
176
176
  // Initialize wasm backend with suitable default settings.
177
177
 
178
- // (Optional) Set path to wasm files. This is needed when running in a web worker.
179
- // https://onnxruntime.ai/docs/api/js/interfaces/Env.WebAssemblyFlags.html#wasmPaths
180
- // We use remote wasm files by default to make it easier for newer users.
181
- // In practice, users should probably self-host the necessary .wasm files.
182
- ONNX_ENV.wasm.wasmPaths = `https://cdn.jsdelivr.net/npm/@huggingface/transformers@${env.version}/dist/`;
178
+ // (Optional) Set path to wasm files. This will override the default path search behavior of onnxruntime-web.
179
+ // By default, we only do this if we are not in a service worker and the wasmPaths are not already set.
180
+ if (
181
+ // @ts-ignore Cannot find name 'ServiceWorkerGlobalScope'.ts(2304)
182
+ !(typeof ServiceWorkerGlobalScope !== 'undefined' && self instanceof ServiceWorkerGlobalScope)
183
+ && !ONNX_ENV.wasm.wasmPaths
184
+ ) {
185
+ ONNX_ENV.wasm.wasmPaths = `https://cdn.jsdelivr.net/npm/@huggingface/transformers@${env.version}/dist/`;
186
+ }
183
187
 
184
188
  // TODO: Add support for loading WASM files from cached buffer when we upgrade to onnxruntime-web@1.19.0
185
189
  // https://github.com/microsoft/onnxruntime/pull/21534
@@ -187,11 +191,6 @@ if (ONNX_ENV?.wasm) {
187
191
  // Users may wish to proxy the WASM backend to prevent the UI from freezing,
188
192
  // However, this is not necessary when using WebGPU, so we default to false.
189
193
  ONNX_ENV.wasm.proxy = false;
190
-
191
- // https://developer.mozilla.org/en-US/docs/Web/API/crossOriginIsolated
192
- if (typeof crossOriginIsolated === 'undefined' || !crossOriginIsolated) {
193
- ONNX_ENV.wasm.numThreads = 1;
194
- }
195
194
  }
196
195
 
197
196
  if (ONNX_ENV?.webgpu) {
package/src/configs.js CHANGED
@@ -67,9 +67,12 @@ function getNormalizedConfig(config) {
67
67
  // Sub-configs
68
68
  case 'llava':
69
69
  case 'paligemma':
70
+ case 'gemma3':
70
71
  case 'florence2':
71
72
  case 'llava_onevision':
72
73
  case 'idefics3':
74
+ case 'ultravox':
75
+ case 'smolvlm':
73
76
  // @ts-expect-error TS2339
74
77
  init_normalized_config = getNormalizedConfig(config.text_config);
75
78
  break;
@@ -124,6 +127,7 @@ function getNormalizedConfig(config) {
124
127
  break;
125
128
  case 'gemma':
126
129
  case 'gemma2':
130
+ case 'gemma3_text':
127
131
  case 'glm':
128
132
  case 'helium':
129
133
  mapping['num_heads'] = 'num_key_value_heads';
@@ -173,6 +177,7 @@ function getNormalizedConfig(config) {
173
177
  case 'mbart':
174
178
  case 'marian':
175
179
  case 'whisper':
180
+ case 'lite-whisper':
176
181
  case 'm2m_100':
177
182
  case 'blenderbot':
178
183
  case 'blenderbot-small':
@@ -405,5 +410,5 @@ export class AutoConfig {
405
410
  * for more information.
406
411
  * @property {import('./utils/devices.js').DeviceType} [device] The default device to use for the model.
407
412
  * @property {import('./utils/dtypes.js').DataType|Record<string, import('./utils/dtypes.js').DataType>} [dtype] The default data type to use for the model.
408
- * @property {boolean|Record<string, boolean>} [use_external_data_format=false] Whether to load the model using the external data format (used for models >= 2GB in size).
413
+ * @property {import('./utils/hub.js').ExternalData|Record<string, import('./utils/hub.js').ExternalData>} [use_external_data_format=false] Whether to load the model using the external data format (used for models >= 2GB in size).
409
414
  */
package/src/env.js CHANGED
@@ -26,7 +26,7 @@ import fs from 'fs';
26
26
  import path from 'path';
27
27
  import url from 'url';
28
28
 
29
- const VERSION = '3.3.3';
29
+ const VERSION = '3.4.1';
30
30
 
31
31
  // Check if various APIs are available (depends on environment)
32
32
  const IS_BROWSER_ENV = typeof window !== "undefined" && typeof window.document !== "undefined";
@@ -72,9 +72,10 @@ export class TextStreamer extends BaseStreamer {
72
72
  throw Error('TextStreamer only supports batch size of 1');
73
73
  }
74
74
 
75
- if (this.skip_prompt && this.next_tokens_are_prompt) {
75
+ const is_prompt = this.next_tokens_are_prompt;
76
+ if (is_prompt) {
76
77
  this.next_tokens_are_prompt = false;
77
- return;
78
+ if (this.skip_prompt) return;
78
79
  }
79
80
 
80
81
  const tokens = value[0];
@@ -85,7 +86,7 @@ export class TextStreamer extends BaseStreamer {
85
86
  const text = this.tokenizer.decode(this.token_cache, this.decode_kwargs);
86
87
 
87
88
  let printable_text;
88
- if (text.endsWith('\n')) {
89
+ if (is_prompt || text.endsWith('\n')) {
89
90
  // After the symbol for a new line, we flush the cache.
90
91
  printable_text = text.slice(this.print_len);
91
92
  this.token_cache = [];
@@ -0,0 +1,3 @@
1
+ import { EncodecFeatureExtractor } from '../encodec/feature_extraction_encodec.js';
2
+
3
+ export class DacFeatureExtractor extends EncodecFeatureExtractor { }
@@ -0,0 +1,32 @@
1
+ import { FeatureExtractor, validate_audio_inputs } from '../../base/feature_extraction_utils.js';
2
+ import { Tensor } from '../../utils/tensor.js';
3
+
4
+
5
+ export class EncodecFeatureExtractor extends FeatureExtractor {
6
+ /**
7
+ * Asynchronously extracts input values from a given audio using the provided configuration.
8
+ * @param {Float32Array|Float64Array} audio The audio data as a Float32Array/Float64Array.
9
+ * @returns {Promise<{ input_values: Tensor; }>} The extracted input values.
10
+ */
11
+ async _call(audio) {
12
+ validate_audio_inputs(audio, 'EncodecFeatureExtractor');
13
+
14
+ if (audio instanceof Float64Array) {
15
+ audio = new Float32Array(audio);
16
+ }
17
+
18
+ const num_channels = this.config.feature_size;
19
+ if (audio.length % num_channels !== 0) {
20
+ throw new Error(`The length of the audio data must be a multiple of the number of channels (${num_channels}).`);
21
+ }
22
+
23
+ const shape = [
24
+ 1, /* batch_size */
25
+ num_channels, /* num_channels */
26
+ audio.length / num_channels, /* num_samples */
27
+ ];
28
+ return {
29
+ input_values: new Tensor('float32', audio, shape),
30
+ };
31
+ }
32
+ }
@@ -1,9 +1,12 @@
1
1
 
2
2
  export * from './audio_spectrogram_transformer/feature_extraction_audio_spectrogram_transformer.js';
3
+ export * from './encodec/feature_extraction_encodec.js';
3
4
  export * from './clap/feature_extraction_clap.js';
5
+ export * from './dac/feature_extraction_dac.js';
4
6
  export * from './moonshine/feature_extraction_moonshine.js';
5
7
  export * from './pyannote/feature_extraction_pyannote.js';
6
8
  export * from './seamless_m4t/feature_extraction_seamless_m4t.js';
9
+ export * from './snac/feature_extraction_snac.js';
7
10
  export * from './speecht5/feature_extraction_speecht5.js';
8
11
  export * from './wav2vec2/feature_extraction_wav2vec2.js';
9
12
  export * from './wespeaker/feature_extraction_wespeaker.js';
@@ -147,7 +147,7 @@ export class Idefics3ImageProcessor extends ImageProcessor {
147
147
  const start_offset = i * pixel_attention_mask_stride + num_patches * h * w;
148
148
  const end_offset = (i + 1) * pixel_attention_mask_stride;
149
149
 
150
- // @ts-expect-error
150
+ // @ts-ignore
151
151
  pixel_attention_mask_data.fill(false, start_offset, end_offset);
152
152
  }
153
153
  }
@@ -32,6 +32,7 @@ export * from './rt_detr/image_processing_rt_detr.js'
32
32
  export * from './sam/image_processing_sam.js'
33
33
  export * from './segformer/image_processing_segformer.js'
34
34
  export * from './siglip/image_processing_siglip.js'
35
+ export * from './smolvlm/image_processing_smolvlm.js'
35
36
  export * from './swin2sr/image_processing_swin2sr.js'
36
37
  export * from './vit/image_processing_vit.js'
37
38
  export * from './vitmatte/image_processing_vitmatte.js'
@@ -11,7 +11,9 @@ export * from './paligemma/processing_paligemma.js';
11
11
  export * from './pyannote/processing_pyannote.js';
12
12
  export * from './qwen2_vl/processing_qwen2_vl.js';
13
13
  export * from './sam/processing_sam.js';
14
+ export * from './smolvlm/processing_smolvlm.js';
14
15
  export * from './speecht5/processing_speecht5.js';
16
+ export * from './ultravox/processing_ultravox.js';
15
17
  export * from './wav2vec2/processing_wav2vec2.js';
16
18
  export * from './wav2vec2_with_lm/processing_wav2vec2_with_lm.js';
17
19
  export * from './whisper/processing_whisper.js';
@@ -0,0 +1,2 @@
1
+
2
+ export { Idefics3ImageProcessor as SmolVLMImageProcessor } from "../idefics3/image_processing_idefics3.js";
@@ -0,0 +1,2 @@
1
+
2
+ export { Idefics3Processor as SmolVLMProcessor } from "../idefics3/processing_idefics3.js";
@@ -0,0 +1,3 @@
1
+ import { DacFeatureExtractor } from '../dac/feature_extraction_dac.js';
2
+
3
+ export class SnacFeatureExtractor extends DacFeatureExtractor { }
@@ -0,0 +1,54 @@
1
+ import { AutoFeatureExtractor } from "../auto/feature_extraction_auto.js"
2
+ import { AutoTokenizer } from "../../tokenizers.js"
3
+ import { Processor } from "../../base/processing_utils.js"
4
+
5
+ /**
6
+ * Represents a UltravoxProcessor that extracts features from an audio input.
7
+ */
8
+ export class UltravoxProcessor extends Processor {
9
+ static tokenizer_class = AutoTokenizer
10
+ static feature_extractor_class = AutoFeatureExtractor
11
+ static uses_processor_config = true;
12
+
13
+ /**
14
+ * @param {string} text The text input to process.
15
+ * @param {Float32Array} audio The audio input to process.
16
+ */
17
+ async _call(text, audio = null, kwargs = {}) {
18
+ // TODO: Support batched inputs
19
+ if (Array.isArray(text)) {
20
+ throw new Error("Batched inputs are not supported yet.");
21
+ }
22
+
23
+ let audio_inputs = {};
24
+ if (audio) {
25
+ const audio_len = audio.length;
26
+ const { input_features } = await this.feature_extractor(audio, {
27
+ ...kwargs,
28
+ max_length: audio_len,
29
+ });
30
+ const nb_encoder_frames = Math.round(audio_len / this.config.encoder_ds_factor + 1e-4);
31
+
32
+ // NOTE: The python version appears to have an off-by-one error.
33
+ const audio_embed_frames = 1 + Math.ceil(nb_encoder_frames / this.config.stack_factor);
34
+ audio_inputs["audio_token_len"] = [audio_embed_frames];
35
+ audio_inputs["audio_values"] = input_features;
36
+
37
+ const image_token = this.config.audio_placeholder;
38
+ if (!text.includes(image_token)) {
39
+ throw new Error(`The input text does not contain the image token ${image_token}.`);
40
+ }
41
+ text = text.replaceAll(image_token, image_token.repeat(audio_embed_frames));
42
+ }
43
+
44
+ const text_inputs = this.tokenizer(text, {
45
+ add_special_tokens: false,
46
+ ...kwargs,
47
+ });
48
+
49
+ return {
50
+ ...text_inputs,
51
+ ...audio_inputs,
52
+ }
53
+ }
54
+ }
@@ -135,6 +135,12 @@ export function whisper_language_to_code(language) {
135
135
  if (language_code === undefined) {
136
136
  // User provided something that is not a language name
137
137
 
138
+ // Perhaps the user passed the special token itself
139
+ const language_special_token = language.match(/^<\|([a-z]{2})\|>$/);
140
+ if (language_special_token) {
141
+ language = language_special_token[1];
142
+ }
143
+
138
144
  if (WHISPER_LANGUAGE_MAPPING.has(language)) {
139
145
  // User provided the language code directly (e.g., "en")
140
146
  language_code = language;
@@ -144,7 +150,7 @@ export function whisper_language_to_code(language) {
144
150
  const is_language_code = language.length === 2;
145
151
  const langs = is_language_code ? WHISPER_LANGUAGE_MAPPING.keys() : WHISPER_LANGUAGE_MAPPING.values();
146
152
 
147
- throw new Error(`Language "${language}" is not supported. Must be one of: ${JSON.stringify(langs)}`);
153
+ throw new Error(`Language "${language}" is not supported. Must be one of: ${JSON.stringify(Array.from(langs))}`);
148
154
  }
149
155
  }
150
156
  return language_code;
@@ -39,7 +39,10 @@ export class WhisperFeatureExtractor extends FeatureExtractor {
39
39
  log_mel: 'log10',
40
40
 
41
41
  // Custom
42
- max_num_frames: this.config.nb_max_frames, // 3000
42
+ max_num_frames: Math.min(
43
+ Math.floor(waveform.length / this.config.hop_length),
44
+ this.config.nb_max_frames, // 3000
45
+ )
43
46
  }
44
47
  )
45
48
 
@@ -58,20 +61,25 @@ export class WhisperFeatureExtractor extends FeatureExtractor {
58
61
  * @param {Float32Array|Float64Array} audio The audio data as a Float32Array/Float64Array.
59
62
  * @returns {Promise<{ input_features: Tensor }>} A Promise resolving to an object containing the extracted input features as a Tensor.
60
63
  */
61
- async _call(audio) {
64
+ async _call(audio, {
65
+ max_length = null,
66
+ } = {}) {
62
67
  validate_audio_inputs(audio, 'WhisperFeatureExtractor');
63
68
 
64
69
  let waveform;
65
- if (audio.length > this.config.n_samples) {
66
- console.warn(
67
- "Attempting to extract features for audio longer than 30 seconds. " +
68
- "If using a pipeline to extract transcript from a long audio clip, " +
69
- "remember to specify `chunk_length_s` and/or `stride_length_s`."
70
- );
71
- waveform = audio.slice(0, this.config.n_samples);
70
+ const length = max_length ?? this.config.n_samples;
71
+ if (audio.length > length) {
72
+ if (audio.length > this.config.n_samples) {
73
+ console.warn(
74
+ "Attempting to extract features for audio longer than 30 seconds. " +
75
+ "If using a pipeline to extract transcript from a long audio clip, " +
76
+ "remember to specify `chunk_length_s` and/or `stride_length_s`."
77
+ );
78
+ }
79
+ waveform = audio.slice(0, length);
72
80
  } else {
73
81
  // pad with zeros
74
- waveform = new Float32Array(this.config.n_samples);
82
+ waveform = new Float32Array(length);
75
83
  waveform.set(audio);
76
84
  }
77
85