nuxt-module-essentia 1.0.2 → 1.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md
CHANGED
|
@@ -1,4 +1,9 @@
|
|
|
1
1
|
# nuxt-module-essentia
|
|
2
|
+

|
|
3
|
+
[](https://github.com/nikitakashin/nuxt-module-essentia/stargazers)
|
|
4
|
+

|
|
5
|
+
[](https://nuxt.com)
|
|
6
|
+
[](https://nuxt.com)
|
|
2
7
|
|
|
3
8
|
Nuxt модуль для интеграции Essentia.js WASM библиотеки анализа аудио.
|
|
4
9
|
Работает на Nuxt 3 и 4
|
|
@@ -4,7 +4,7 @@ export interface KeyBpmResult {
|
|
|
4
4
|
scale: string;
|
|
5
5
|
bpm: number;
|
|
6
6
|
}
|
|
7
|
-
export declare const useAudioAnalizer: () => {
|
|
7
|
+
export declare const useAudioAnalizer: (colors?: string[]) => {
|
|
8
8
|
getKeyMoodAndBpm: () => void;
|
|
9
9
|
keyBpmResults: Ref<{}, {}>;
|
|
10
10
|
moodResults: Ref<{}, {}>;
|
package/package.json
CHANGED
|
@@ -1,5 +1,10 @@
|
|
|
1
1
|
import { ref, type Ref } from "vue";
|
|
2
|
-
import type {
|
|
2
|
+
import type {
|
|
3
|
+
EssentiaJS,
|
|
4
|
+
EssentiaVector,
|
|
5
|
+
KeyData,
|
|
6
|
+
BpmData,
|
|
7
|
+
} from "../types/essentia";
|
|
3
8
|
|
|
4
9
|
export interface KeyBpmResult {
|
|
5
10
|
key: string;
|
|
@@ -7,7 +12,7 @@ export interface KeyBpmResult {
|
|
|
7
12
|
bpm: number;
|
|
8
13
|
}
|
|
9
14
|
|
|
10
|
-
export const useAudioAnalizer = () => {
|
|
15
|
+
export const useAudioAnalizer = (colors?: string[]) => {
|
|
11
16
|
if (!import.meta.client) {
|
|
12
17
|
return {
|
|
13
18
|
getKeyMoodAndBpm: () => {},
|
|
@@ -17,37 +22,47 @@ export const useAudioAnalizer = () => {
|
|
|
17
22
|
};
|
|
18
23
|
}
|
|
19
24
|
|
|
25
|
+
const defaultColors = [
|
|
26
|
+
"light-blue-lighten-2",
|
|
27
|
+
"light-blue-lighten-1",
|
|
28
|
+
"light-blue-darken-1",
|
|
29
|
+
"light-blue-darken-2",
|
|
30
|
+
"light-blue-darken-3",
|
|
31
|
+
];
|
|
32
|
+
|
|
33
|
+
const moodColors = colors && colors.length === 5 ? colors : defaultColors;
|
|
34
|
+
|
|
20
35
|
const DEFAULT_MOOD_VALUE = [
|
|
21
36
|
{
|
|
22
|
-
color:
|
|
23
|
-
icon: "
|
|
37
|
+
color: moodColors[0],
|
|
38
|
+
icon: "�",
|
|
24
39
|
title: "Танцевальный",
|
|
25
40
|
key: "danceability",
|
|
26
41
|
value: 0,
|
|
27
42
|
},
|
|
28
43
|
{
|
|
29
|
-
color:
|
|
30
|
-
icon: "
|
|
44
|
+
color: moodColors[1],
|
|
45
|
+
icon: "�",
|
|
31
46
|
title: "Радостный",
|
|
32
47
|
key: "mood_happy",
|
|
33
48
|
value: 0,
|
|
34
49
|
},
|
|
35
50
|
{
|
|
36
|
-
color:
|
|
37
|
-
icon: "
|
|
51
|
+
color: moodColors[2],
|
|
52
|
+
icon: "�",
|
|
38
53
|
title: "Грустный",
|
|
39
54
|
key: "mood_sad",
|
|
40
55
|
value: 0,
|
|
41
56
|
},
|
|
42
57
|
{
|
|
43
|
-
color:
|
|
58
|
+
color: moodColors[3],
|
|
44
59
|
icon: "😌",
|
|
45
60
|
title: "Расслабляющий",
|
|
46
61
|
key: "mood_relaxed",
|
|
47
62
|
value: 0,
|
|
48
63
|
},
|
|
49
64
|
{
|
|
50
|
-
color:
|
|
65
|
+
color: moodColors[4],
|
|
51
66
|
icon: "😤",
|
|
52
67
|
title: "Агрессивный",
|
|
53
68
|
key: "mood_aggressive",
|
|
@@ -100,10 +115,11 @@ export const useAudioAnalizer = () => {
|
|
|
100
115
|
document.head.appendChild(script);
|
|
101
116
|
}
|
|
102
117
|
|
|
103
|
-
|
|
104
118
|
function createInferenceWorker() {
|
|
105
119
|
inferenceWorker = new Worker(`${basePath}workers/inference.js`);
|
|
106
|
-
inferenceWorker.onmessage = function listenToWorker(
|
|
120
|
+
inferenceWorker.onmessage = function listenToWorker(
|
|
121
|
+
msg: MessageEvent<{ predictions?: Record<string, number> }>,
|
|
122
|
+
) {
|
|
107
123
|
if (msg.data.predictions) {
|
|
108
124
|
const preds = msg.data.predictions;
|
|
109
125
|
|
|
@@ -115,21 +131,24 @@ export const useAudioAnalizer = () => {
|
|
|
115
131
|
}
|
|
116
132
|
|
|
117
133
|
function createFeatureExtractionWorker() {
|
|
118
|
-
featureExtractionWorker = new Worker(
|
|
134
|
+
featureExtractionWorker = new Worker(
|
|
135
|
+
`${basePath}workers/featureExtraction.js`,
|
|
136
|
+
);
|
|
119
137
|
featureExtractionWorker.postMessage({
|
|
120
138
|
init: true,
|
|
121
139
|
});
|
|
122
|
-
featureExtractionWorker.onmessage =
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
140
|
+
featureExtractionWorker.onmessage =
|
|
141
|
+
function listenToFeatureExtractionWorker(
|
|
142
|
+
msg: MessageEvent<{ embeddings?: Float32Array }>,
|
|
143
|
+
) {
|
|
144
|
+
// feed to models
|
|
145
|
+
if (msg.data.embeddings) {
|
|
146
|
+
// send features off to each of the models
|
|
147
|
+
inferenceWorker.postMessage({
|
|
148
|
+
embeddings: msg.data.embeddings,
|
|
149
|
+
});
|
|
150
|
+
}
|
|
151
|
+
};
|
|
133
152
|
}
|
|
134
153
|
|
|
135
154
|
function monomix(buffer: AudioBuffer) {
|
|
@@ -159,7 +178,10 @@ export const useAudioAnalizer = () => {
|
|
|
159
178
|
|
|
160
179
|
if (trim) {
|
|
161
180
|
const discardSamples = Math.floor(0.1 * audioIn.length); // discard 10% on beginning and end
|
|
162
|
-
audioIn = audioIn.subarray(
|
|
181
|
+
audioIn = audioIn.subarray(
|
|
182
|
+
discardSamples,
|
|
183
|
+
audioIn.length - discardSamples,
|
|
184
|
+
); // create new view of buffer without beginning and end
|
|
163
185
|
}
|
|
164
186
|
|
|
165
187
|
const ratioSampleLength = Math.ceil(audioIn.length * keepRatio);
|
|
@@ -167,7 +189,9 @@ export const useAudioAnalizer = () => {
|
|
|
167
189
|
const numPatchesToKeep = Math.ceil(ratioSampleLength / patchSampleLength);
|
|
168
190
|
|
|
169
191
|
// space patchesToKeep evenly
|
|
170
|
-
const skipSize = Math.floor(
|
|
192
|
+
const skipSize = Math.floor(
|
|
193
|
+
(audioIn.length - ratioSampleLength) / (numPatchesToKeep - 1),
|
|
194
|
+
);
|
|
171
195
|
|
|
172
196
|
let audioOut = [];
|
|
173
197
|
let startIndex = 0;
|
|
@@ -181,7 +205,11 @@ export const useAudioAnalizer = () => {
|
|
|
181
205
|
return Float32Array.from(audioOut);
|
|
182
206
|
}
|
|
183
207
|
|
|
184
|
-
function downsampleArray(
|
|
208
|
+
function downsampleArray(
|
|
209
|
+
audioIn: Float32Array,
|
|
210
|
+
sampleRateIn: number,
|
|
211
|
+
sampleRateOut: number,
|
|
212
|
+
) {
|
|
185
213
|
if (sampleRateOut === sampleRateIn) {
|
|
186
214
|
return audioIn;
|
|
187
215
|
}
|
|
@@ -195,7 +223,11 @@ export const useAudioAnalizer = () => {
|
|
|
195
223
|
let nextOffsetAudioIn = Math.round((offsetResult + 1) * sampleRateRatio);
|
|
196
224
|
let accum = 0,
|
|
197
225
|
count = 0;
|
|
198
|
-
for (
|
|
226
|
+
for (
|
|
227
|
+
let i = offsetAudioIn;
|
|
228
|
+
i < nextOffsetAudioIn && i < audioIn.length;
|
|
229
|
+
i++
|
|
230
|
+
) {
|
|
199
231
|
// @ts-ignore
|
|
200
232
|
accum += audioIn[i];
|
|
201
233
|
count++;
|
|
@@ -212,10 +244,15 @@ export const useAudioAnalizer = () => {
|
|
|
212
244
|
if (arr.length === 0) return 0;
|
|
213
245
|
const sorted = [...arr].sort((a, b) => a - b);
|
|
214
246
|
const mid = Math.floor(sorted.length / 2);
|
|
215
|
-
return sorted.length % 2 === 0
|
|
247
|
+
return sorted.length % 2 === 0
|
|
248
|
+
? (sorted[mid - 1] + sorted[mid]) / 2
|
|
249
|
+
: sorted[mid];
|
|
216
250
|
}
|
|
217
251
|
|
|
218
|
-
function estimateTuningFrequency(
|
|
252
|
+
function estimateTuningFrequency(
|
|
253
|
+
vectorSignal: EssentiaVector,
|
|
254
|
+
sampleRate = 16000,
|
|
255
|
+
): number {
|
|
219
256
|
// Параметры для pitch-экстракции
|
|
220
257
|
const frameSize = 2048;
|
|
221
258
|
const hopSize = 512;
|
|
@@ -237,7 +274,7 @@ export const useAudioAnalizer = () => {
|
|
|
237
274
|
hopSize,
|
|
238
275
|
minFreq,
|
|
239
276
|
maxFreq,
|
|
240
|
-
silenceThreshold
|
|
277
|
+
silenceThreshold,
|
|
241
278
|
);
|
|
242
279
|
|
|
243
280
|
const centsDeviations = [];
|
|
@@ -262,7 +299,9 @@ export const useAudioAnalizer = () => {
|
|
|
262
299
|
|
|
263
300
|
// Если мало данных — возвращаем 440 по умолчанию
|
|
264
301
|
if (centsDeviations.length < 10) {
|
|
265
|
-
console.warn(
|
|
302
|
+
console.warn(
|
|
303
|
+
"Недостаточно надёжных данных для оценки строя. Используется A=440.",
|
|
304
|
+
);
|
|
266
305
|
return 440;
|
|
267
306
|
}
|
|
268
307
|
|
|
@@ -279,11 +318,16 @@ export const useAudioAnalizer = () => {
|
|
|
279
318
|
// downmix to mono, and downsample to 16kHz sr for essentia tensorflow models
|
|
280
319
|
return downsampleArray(mono, audioBuffer.sampleRate, 16000);
|
|
281
320
|
} else {
|
|
282
|
-
throw new TypeError(
|
|
321
|
+
throw new TypeError(
|
|
322
|
+
"Input to audio preprocessing is not of type AudioBuffer",
|
|
323
|
+
);
|
|
283
324
|
}
|
|
284
325
|
}
|
|
285
326
|
|
|
286
|
-
function computeKeyBPM(audioSignal: Float32Array): {
|
|
327
|
+
function computeKeyBPM(audioSignal: Float32Array): {
|
|
328
|
+
keyData: KeyData;
|
|
329
|
+
bpm: number;
|
|
330
|
+
} {
|
|
287
331
|
if (!essentia) {
|
|
288
332
|
throw new Error("Essentia not initialized");
|
|
289
333
|
}
|
|
@@ -309,7 +353,7 @@ export const useAudioAnalizer = () => {
|
|
|
309
353
|
0.0001,
|
|
310
354
|
440,
|
|
311
355
|
"cosine",
|
|
312
|
-
"hann"
|
|
356
|
+
"hann",
|
|
313
357
|
);
|
|
314
358
|
const bpmData = essentia.PercivalBpmEstimator(
|
|
315
359
|
vectorSignal,
|
|
@@ -319,7 +363,7 @@ export const useAudioAnalizer = () => {
|
|
|
319
363
|
128,
|
|
320
364
|
210,
|
|
321
365
|
50,
|
|
322
|
-
16000
|
|
366
|
+
16000,
|
|
323
367
|
);
|
|
324
368
|
|
|
325
369
|
return {
|
|
@@ -330,35 +374,40 @@ export const useAudioAnalizer = () => {
|
|
|
330
374
|
|
|
331
375
|
function processFile(arrayBuffer: ArrayBuffer) {
|
|
332
376
|
audioCtx.resume().then(() => {
|
|
333
|
-
audioCtx
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
377
|
+
audioCtx
|
|
378
|
+
.decodeAudioData(arrayBuffer)
|
|
379
|
+
.then(async function handleDecodedAudio(audioBuffer) {
|
|
380
|
+
const prepocessedAudio = preprocess(audioBuffer);
|
|
381
|
+
await audioCtx.suspend();
|
|
382
|
+
|
|
383
|
+
if (essentia) {
|
|
384
|
+
essentiaAnalysis = computeKeyBPM(prepocessedAudio);
|
|
385
|
+
|
|
386
|
+
const bpmValue =
|
|
387
|
+
essentiaAnalysis.bpm <= 69
|
|
388
|
+
? essentiaAnalysis.bpm * 2
|
|
389
|
+
: essentiaAnalysis.bpm;
|
|
390
|
+
|
|
391
|
+
keyBpmResults.value = {
|
|
392
|
+
key: essentiaAnalysis.keyData.key,
|
|
393
|
+
scale: essentiaAnalysis.keyData.scale,
|
|
394
|
+
bpm: parseFloat(bpmValue.toFixed(2)),
|
|
395
|
+
};
|
|
396
|
+
}
|
|
397
|
+
|
|
398
|
+
// reduce amount of audio to analyse
|
|
399
|
+
let audioData = shortenAudio(prepocessedAudio, KEEP_PERCENTAGE, true); // <-- TRIMMED start/end
|
|
400
|
+
|
|
401
|
+
// send for feature extraction
|
|
402
|
+
if (featureExtractionWorker) {
|
|
403
|
+
featureExtractionWorker.postMessage(
|
|
404
|
+
{
|
|
405
|
+
audio: audioData.buffer,
|
|
406
|
+
},
|
|
407
|
+
[audioData.buffer],
|
|
408
|
+
);
|
|
409
|
+
}
|
|
410
|
+
});
|
|
362
411
|
});
|
|
363
412
|
}
|
|
364
413
|
|
|
@@ -381,6 +430,6 @@ export const useAudioAnalizer = () => {
|
|
|
381
430
|
resetMoodResults,
|
|
382
431
|
essentia,
|
|
383
432
|
essentiaAnalysis,
|
|
384
|
-
featureExtractionWorker
|
|
433
|
+
featureExtractionWorker,
|
|
385
434
|
};
|
|
386
435
|
};
|
|
@@ -9,7 +9,7 @@ const extractor = new EssentiaModel.EssentiaTFInputExtractor(EssentiaWASM, "musi
|
|
|
9
9
|
let modelStart = 0;
|
|
10
10
|
|
|
11
11
|
let model;
|
|
12
|
-
let modelURL = "
|
|
12
|
+
let modelURL = "../models/msd-musicnn-1/model.json";
|
|
13
13
|
let modelLoaded = false;
|
|
14
14
|
let modelReady = false;
|
|
15
15
|
|
|
@@ -59,7 +59,6 @@ async function initTensorflowWASM() {
|
|
|
59
59
|
if (defaultBackend != "wasm") {
|
|
60
60
|
return;
|
|
61
61
|
importScripts("./lib/tf-backend-wasm-3.5.0.js");
|
|
62
|
-
// importScripts('https://cdn.jsdelivr.net/npm/@tensorflow/tfjs-backend-wasm/dist/tf-backend-wasm.js');
|
|
63
62
|
tf.setBackend("wasm");
|
|
64
63
|
tf.ready()
|
|
65
64
|
.then(() => {
|