@mediapipe/tasks-audio 0.1.0-alpha-14 → 0.1.0-alpha-15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/audio.d.ts +456 -0
  2. package/package.json +1 -1
package/audio.d.ts ADDED
@@ -0,0 +1,456 @@
1
+ /** Performs audio classification. */
2
+ export declare class AudioClassifier extends AudioTaskRunner<AudioClassifierResult[]> {
3
+ /**
4
+ * Initializes the Wasm runtime and creates a new audio classifier from the
5
+ * provided options.
6
+ * @param wasmFileset A configuration object that provides the location of the
7
+ * Wasm binary and its loader.
8
+ * @param audioClassifierOptions The options for the audio classifier. Note
9
+ * that either a path to the model asset or a model buffer needs to be
10
+ * provided (via `baseOptions`).
11
+ */
12
+ static createFromOptions(wasmFileset: WasmFileset, audioClassifierOptions: AudioClassifierOptions): Promise<AudioClassifier>;
13
+ /**
14
+ * Initializes the Wasm runtime and creates a new audio classifier based on
15
+ * the provided model asset buffer.
16
+ * @param wasmFileset A configuration object that provides the location of the
17
+ * Wasm binary and its loader.
18
+ * @param modelAssetBuffer A binary representation of the model.
19
+ */
20
+ static createFromModelBuffer(wasmFileset: WasmFileset, modelAssetBuffer: Uint8Array): Promise<AudioClassifier>;
21
+ /**
22
+ * Initializes the Wasm runtime and creates a new audio classifier based on
23
+ * the path to the model asset.
24
+ * @param wasmFileset A configuration object that provides the location of the
25
+ * Wasm binary and its loader.
26
+ * @param modelAssetPath The path to the model asset.
27
+ */
28
+ static createFromModelPath(wasmFileset: WasmFileset, modelAssetPath: string): Promise<AudioClassifier>;
29
+ private constructor();
30
+ /**
31
+ * Sets new options for the audio classifier.
32
+ *
33
+ * Calling `setOptions()` with a subset of options only affects those options.
34
+ * You can reset an option back to its default value by explicitly setting it
35
+ * to `undefined`.
36
+ *
37
+ * @param options The options for the audio classifier.
38
+ */
39
+ setOptions(options: AudioClassifierOptions): Promise<void>;
40
+ /**
41
+ * Performs audio classification on the provided audio clip and waits
42
+ * synchronously for the response.
43
+ *
44
+ * @param audioData An array of raw audio capture data, like from a call to
45
+ * `getChannelData()` on an AudioBuffer.
46
+ * @param sampleRate The sample rate in Hz of the provided audio data. If not
47
+ * set, defaults to the sample rate set via `setDefaultSampleRate()` or
48
+ * `48000` if no custom default was set.
49
+ * @return The classification result of the audio datas
50
+ */
51
+ classify(audioData: Float32Array, sampleRate?: number): AudioClassifierResult[];
52
+ }
53
+
54
+ /** Options to configure the MediaPipe Audio Classifier Task */
55
+ export declare interface AudioClassifierOptions extends ClassifierOptions, TaskRunnerOptions {
56
+ }
57
+
58
+ /** Classification results of a model. */
59
+ export declare interface AudioClassifierResult {
60
+ /** The classification results for each head of the model. */
61
+ classifications: Classifications[];
62
+ /**
63
+ * The optional timestamp (in milliseconds) of the start of the chunk of data
64
+ * corresponding to these results.
65
+ *
66
+ * This is only used for classification on time series (e.g. audio
67
+ * classification). In these use cases, the amount of data to process might
68
+ * exceed the maximum size that the model can process: to solve this, the
69
+ * input data is split into multiple chunks starting at different timestamps.
70
+ */
71
+ timestampMs?: number;
72
+ }
73
+
74
+ /** Performs embedding extraction on audio. */
75
+ export declare class AudioEmbedder extends AudioTaskRunner<AudioEmbedderResult[]> {
76
+ /**
77
+ * Initializes the Wasm runtime and creates a new audio embedder from the
78
+ * provided options.
79
+ * @param wasmFileset A configuration object that provides the location of the
80
+ * Wasm binary and its loader.
81
+ * @param audioEmbedderOptions The options for the audio embedder. Note that
82
+ * either a path to the TFLite model or the model itself needs to be
83
+ * provided (via `baseOptions`).
84
+ */
85
+ static createFromOptions(wasmFileset: WasmFileset, audioEmbedderOptions: AudioEmbedderOptions): Promise<AudioEmbedder>;
86
+ /**
87
+ * Initializes the Wasm runtime and creates a new audio embedder based on the
88
+ * provided model asset buffer.
89
+ * @param wasmFileset A configuration object that provides the location of the
90
+ * Wasm binary and its loader.
91
+ * @param modelAssetBuffer A binary representation of the TFLite model.
92
+ */
93
+ static createFromModelBuffer(wasmFileset: WasmFileset, modelAssetBuffer: Uint8Array): Promise<AudioEmbedder>;
94
+ /**
95
+ * Initializes the Wasm runtime and creates a new audio embedder based on the
96
+ * path to the model asset.
97
+ * @param wasmFileset A configuration object that provides the location of the
98
+ * Wasm binary and its loader.
99
+ * @param modelAssetPath The path to the TFLite model.
100
+ */
101
+ static createFromModelPath(wasmFileset: WasmFileset, modelAssetPath: string): Promise<AudioEmbedder>;
102
+ private constructor();
103
+ /**
104
+ * Sets new options for the audio embedder.
105
+ *
106
+ * Calling `setOptions()` with a subset of options only affects those options.
107
+ * You can reset an option back to its default value by explicitly setting it
108
+ * to `undefined`.
109
+ *
110
+ * @param options The options for the audio embedder.
111
+ */
112
+ setOptions(options: AudioEmbedderOptions): Promise<void>;
113
+ /**
114
+ * Performs embeding extraction on the provided audio clip and waits
115
+ * synchronously for the response.
116
+ *
117
+ * @param audioData An array of raw audio capture data, like from a call to
118
+ * `getChannelData()` on an AudioBuffer.
119
+ * @param sampleRate The sample rate in Hz of the provided audio data. If not
120
+ * set, defaults to the sample rate set via `setDefaultSampleRate()` or
121
+ * `48000` if no custom default was set.
122
+ * @return The embedding resuls of the audio
123
+ */
124
+ embed(audioData: Float32Array, sampleRate?: number): AudioEmbedderResult[];
125
+ }
126
+
127
+ /** Options to configure the MediaPipe Audio Embedder Task */
128
+ export declare interface AudioEmbedderOptions extends EmbedderOptions, TaskRunnerOptions {
129
+ }
130
+
131
+ /** Embedding results for a given embedder model. */
132
+ export declare interface AudioEmbedderResult {
133
+ /**
134
+ * The embedding results for each model head, i.e. one for each output tensor.
135
+ */
136
+ embeddings: Embedding[];
137
+ /**
138
+ * The optional timestamp (in milliseconds) of the start of the chunk of
139
+ * data corresponding to these results.
140
+ *
141
+ * This is only used for embedding extraction on time series (e.g. audio
142
+ * embedding). In these use cases, the amount of data to process might
143
+ * exceed the maximum size that the model can process: to solve this, the
144
+ * input data is split into multiple chunks starting at different timestamps.
145
+ */
146
+ timestampMs?: number;
147
+ }
148
+
149
+ /** Base class for all MediaPipe Audio Tasks. */
150
+ declare abstract class AudioTaskRunner<T> extends TaskRunner {
151
+ /**
152
+ * Sets the sample rate for API calls that omit an explicit sample rate.
153
+ * `48000` is used as a default if this method is not called.
154
+ *
155
+ * @param sampleRate A sample rate (e.g. `44100`).
156
+ */
157
+ setDefaultSampleRate(sampleRate: number): void;
158
+ }
159
+
160
+ /**
161
+ * Copyright 2022 The MediaPipe Authors.
162
+ *
163
+ * Licensed under the Apache License, Version 2.0 (the "License");
164
+ * you may not use this file except in compliance with the License.
165
+ * You may obtain a copy of the License at
166
+ *
167
+ * http://www.apache.org/licenses/LICENSE-2.0
168
+ *
169
+ * Unless required by applicable law or agreed to in writing, software
170
+ * distributed under the License is distributed on an "AS IS" BASIS,
171
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
172
+ * See the License for the specific language governing permissions and
173
+ * limitations under the License.
174
+ */
175
+ /** Options to configure MediaPipe model loading and processing. */
176
+ declare interface BaseOptions_2 {
177
+ /**
178
+ * The model path to the model asset file. Only one of `modelAssetPath` or
179
+ * `modelAssetBuffer` can be set.
180
+ */
181
+ modelAssetPath?: string | undefined;
182
+ /**
183
+ * A buffer containing the model aaset. Only one of `modelAssetPath` or
184
+ * `modelAssetBuffer` can be set.
185
+ */
186
+ modelAssetBuffer?: Uint8Array | undefined;
187
+ /** Overrides the default backend to use for the provided model. */
188
+ delegate?: "CPU" | "GPU" | undefined;
189
+ }
190
+
191
+ /**
192
+ * Copyright 2022 The MediaPipe Authors.
193
+ *
194
+ * Licensed under the Apache License, Version 2.0 (the "License");
195
+ * you may not use this file except in compliance with the License.
196
+ * You may obtain a copy of the License at
197
+ *
198
+ * http://www.apache.org/licenses/LICENSE-2.0
199
+ *
200
+ * Unless required by applicable law or agreed to in writing, software
201
+ * distributed under the License is distributed on an "AS IS" BASIS,
202
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
203
+ * See the License for the specific language governing permissions and
204
+ * limitations under the License.
205
+ */
206
+ /** A classification category. */
207
+ export declare interface Category {
208
+ /** The probability score of this label category. */
209
+ score: number;
210
+ /** The index of the category in the corresponding label file. */
211
+ index: number;
212
+ /**
213
+ * The label of this category object. Defaults to an empty string if there is
214
+ * no category.
215
+ */
216
+ categoryName: string;
217
+ /**
218
+ * The display name of the label, which may be translated for different
219
+ * locales. For example, a label, "apple", may be translated into Spanish for
220
+ * display purpose, so that the `display_name` is "manzana". Defaults to an
221
+ * empty string if there is no display name.
222
+ */
223
+ displayName: string;
224
+ }
225
+
226
+ /** Classification results for a given classifier head. */
227
+ export declare interface Classifications {
228
+ /**
229
+ * The array of predicted categories, usually sorted by descending scores,
230
+ * e.g., from high to low probability.
231
+ */
232
+ categories: Category[];
233
+ /**
234
+ * The index of the classifier head these categories refer to. This is
235
+ * useful for multi-head models.
236
+ */
237
+ headIndex: number;
238
+ /**
239
+ * The name of the classifier head, which is the corresponding tensor
240
+ * metadata name. Defaults to an empty string if there is no such metadata.
241
+ */
242
+ headName: string;
243
+ }
244
+
245
+ /**
246
+ * Copyright 2022 The MediaPipe Authors.
247
+ *
248
+ * Licensed under the Apache License, Version 2.0 (the "License");
249
+ * you may not use this file except in compliance with the License.
250
+ * You may obtain a copy of the License at
251
+ *
252
+ * http://www.apache.org/licenses/LICENSE-2.0
253
+ *
254
+ * Unless required by applicable law or agreed to in writing, software
255
+ * distributed under the License is distributed on an "AS IS" BASIS,
256
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
257
+ * See the License for the specific language governing permissions and
258
+ * limitations under the License.
259
+ */
260
+ /** Options to configure a MediaPipe Classifier Task. */
261
+ declare interface ClassifierOptions {
262
+ /**
263
+ * The locale to use for display names specified through the TFLite Model
264
+ * Metadata, if any. Defaults to English.
265
+ */
266
+ displayNamesLocale?: string | undefined;
267
+ /** The maximum number of top-scored detection results to return. */
268
+ maxResults?: number | undefined;
269
+ /**
270
+ * Overrides the value provided in the model metadata. Results below this
271
+ * value are rejected.
272
+ */
273
+ scoreThreshold?: number | undefined;
274
+ /**
275
+ * Allowlist of category names. If non-empty, detection results whose category
276
+ * name is not in this set will be filtered out. Duplicate or unknown category
277
+ * names are ignored. Mutually exclusive with `categoryDenylist`.
278
+ */
279
+ categoryAllowlist?: string[] | undefined;
280
+ /**
281
+ * Denylist of category names. If non-empty, detection results whose category
282
+ * name is in this set will be filtered out. Duplicate or unknown category
283
+ * names are ignored. Mutually exclusive with `categoryAllowlist`.
284
+ */
285
+ categoryDenylist?: string[] | undefined;
286
+ }
287
+
288
+ /**
289
+ * Copyright 2022 The MediaPipe Authors.
290
+ *
291
+ * Licensed under the Apache License, Version 2.0 (the "License");
292
+ * you may not use this file except in compliance with the License.
293
+ * You may obtain a copy of the License at
294
+ *
295
+ * http://www.apache.org/licenses/LICENSE-2.0
296
+ *
297
+ * Unless required by applicable law or agreed to in writing, software
298
+ * distributed under the License is distributed on an "AS IS" BASIS,
299
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
300
+ * See the License for the specific language governing permissions and
301
+ * limitations under the License.
302
+ */
303
+ /** Options to configure a MediaPipe Embedder Task */
304
+ declare interface EmbedderOptions {
305
+ /**
306
+ * Whether to normalize the returned feature vector with L2 norm. Use this
307
+ * option only if the model does not already contain a native L2_NORMALIZATION
308
+ * TF Lite Op. In most cases, this is already the case and L2 norm is thus
309
+ * achieved through TF Lite inference.
310
+ */
311
+ l2Normalize?: boolean | undefined;
312
+ /**
313
+ * Whether the returned embedding should be quantized to bytes via scalar
314
+ * quantization. Embeddings are implicitly assumed to be unit-norm and
315
+ * therefore any dimension is guaranteed to have a value in [-1.0, 1.0]. Use
316
+ * the l2_normalize option if this is not the case.
317
+ */
318
+ quantize?: boolean | undefined;
319
+ }
320
+
321
+ /**
322
+ * Copyright 2022 The MediaPipe Authors.
323
+ *
324
+ * Licensed under the Apache License, Version 2.0 (the "License");
325
+ * you may not use this file except in compliance with the License.
326
+ * You may obtain a copy of the License at
327
+ *
328
+ * http://www.apache.org/licenses/LICENSE-2.0
329
+ *
330
+ * Unless required by applicable law or agreed to in writing, software
331
+ * distributed under the License is distributed on an "AS IS" BASIS,
332
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
333
+ * See the License for the specific language governing permissions and
334
+ * limitations under the License.
335
+ */
336
+ /**
337
+ * List of embeddings with an optional timestamp.
338
+ *
339
+ * One and only one of the two 'floatEmbedding' and 'quantizedEmbedding' will
340
+ * contain data, based on whether or not the embedder was configured to perform
341
+ * scalar quantization.
342
+ */
343
+ export declare interface Embedding {
344
+ /**
345
+ * Floating-point embedding. Empty if the embedder was configured to perform
346
+ * scalar-quantization.
347
+ */
348
+ floatEmbedding?: number[];
349
+ /**
350
+ * Scalar-quantized embedding. Empty if the embedder was not configured to
351
+ * perform scalar quantization.
352
+ */
353
+ quantizedEmbedding?: Uint8Array;
354
+ /**
355
+ * The index of the classifier head these categories refer to. This is
356
+ * useful for multi-head models.
357
+ */
358
+ headIndex: number;
359
+ /**
360
+ * The name of the classifier head, which is the corresponding tensor
361
+ * metadata name.
362
+ */
363
+ headName: string;
364
+ }
365
+
366
+ /**
367
+ * Resolves the files required for the MediaPipe Task APIs.
368
+ *
369
+ * This class verifies whether SIMD is supported in the current environment and
370
+ * loads the SIMD files only if support is detected. The returned filesets
371
+ * require that the Wasm files are published without renaming. If this is not
372
+ * possible, you can invoke the MediaPipe Tasks APIs using a manually created
373
+ * `WasmFileset`.
374
+ */
375
+ export declare class FilesetResolver {
376
+ /**
377
+ * Returns whether SIMD is supported in the current environment.
378
+ *
379
+ * If your environment requires custom locations for the MediaPipe Wasm files,
380
+ * you can use `isSimdSupported()` to decide whether to load the SIMD-based
381
+ * assets.
382
+ *
383
+ * @return Whether SIMD support was detected in the current environment.
384
+ */
385
+ static isSimdSupported(): Promise<boolean>;
386
+ /**
387
+ * Creates a fileset for the MediaPipe Audio tasks.
388
+ *
389
+ * @param basePath An optional base path to specify the directory the Wasm
390
+ * files should be loaded from. If not specified, the Wasm files are
391
+ * loaded from the host's root directory.
392
+ * @return A `WasmFileset` that can be used to initialize MediaPipe Audio
393
+ * tasks.
394
+ */
395
+ static forAudioTasks(basePath?: string): Promise<WasmFileset>;
396
+ /**
397
+ * Creates a fileset for the MediaPipe Text tasks.
398
+ *
399
+ * @param basePath An optional base path to specify the directory the Wasm
400
+ * files should be loaded from. If not specified, the Wasm files are
401
+ * loaded from the host's root directory.
402
+ * @return A `WasmFileset` that can be used to initialize MediaPipe Text
403
+ * tasks.
404
+ */
405
+ static forTextTasks(basePath?: string): Promise<WasmFileset>;
406
+ /**
407
+ * Creates a fileset for the MediaPipe Vision tasks.
408
+ *
409
+ * @param basePath An optional base path to specify the directory the Wasm
410
+ * files should be loaded from. If not specified, the Wasm files are
411
+ * loaded from the host's root directory.
412
+ * @return A `WasmFileset` that can be used to initialize MediaPipe Vision
413
+ * tasks.
414
+ */
415
+ static forVisionTasks(basePath?: string): Promise<WasmFileset>;
416
+ }
417
+
418
+ /** Base class for all MediaPipe Tasks. */
419
+ declare abstract class TaskRunner {
420
+ protected constructor();
421
+ /** Configures the task with custom options. */
422
+ abstract setOptions(options: TaskRunnerOptions): Promise<void>;
423
+ /** Closes and cleans up the resources held by this task. */
424
+ close(): void;
425
+ }
426
+
427
+ /** Options to configure MediaPipe Tasks in general. */
428
+ declare interface TaskRunnerOptions {
429
+ /** Options to configure the loading of the model assets. */
430
+ baseOptions?: BaseOptions_2;
431
+ }
432
+
433
+ /**
434
+ * Copyright 2022 The MediaPipe Authors.
435
+ *
436
+ * Licensed under the Apache License, Version 2.0 (the "License");
437
+ * you may not use this file except in compliance with the License.
438
+ * You may obtain a copy of the License at
439
+ *
440
+ * http://www.apache.org/licenses/LICENSE-2.0
441
+ *
442
+ * Unless required by applicable law or agreed to in writing, software
443
+ * distributed under the License is distributed on an "AS IS" BASIS,
444
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
445
+ * See the License for the specific language governing permissions and
446
+ * limitations under the License.
447
+ */
448
+ /** An object containing the locations of the Wasm assets */
449
+ declare interface WasmFileset {
450
+ /** The path to the Wasm loader script. */
451
+ wasmLoaderPath: string;
452
+ /** The path to the Wasm binary. */
453
+ wasmBinaryPath: string;
454
+ }
455
+
456
+ export { }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@mediapipe/tasks-audio",
3
- "version": "0.1.0-alpha-14",
3
+ "version": "0.1.0-alpha-15",
4
4
  "description": "MediaPipe Audio Tasks",
5
5
  "main": "audio_bundle.js",
6
6
  "author": "mediapipe@google.com",