@camstack/addon-pipeline 1.0.0 → 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/audio-analyzer/index.js +141 -51
- package/dist/audio-analyzer/index.mjs +142 -51
- package/dist/decoder-nodeav/index.js +1 -1
- package/dist/decoder-nodeav/index.mjs +1 -1
- package/dist/detection-pipeline/index.js +56 -321
- package/dist/detection-pipeline/index.mjs +56 -321
- package/dist/stream-broker/{_virtual_mf-localSharedImportMap___mfe_internal__addon_stream_broker_widgets-Bak8zYXf.mjs → _virtual_mf-localSharedImportMap___mfe_internal__addon_stream_broker_widgets-qX99--rF.mjs} +3 -3
- package/dist/stream-broker/{hostInit-CYCw2DW3.mjs → hostInit-Bx41KdYV.mjs} +3 -3
- package/dist/stream-broker/index.js +2 -2
- package/dist/stream-broker/index.mjs +2 -2
- package/dist/stream-broker/remoteEntry.js +1 -1
- package/package.json +2 -7
- package/python/requirements-audio.txt +5 -0
- package/python/yamnet_audio.py +113 -0
- package/dist/constants-B_b0a-6h.mjs +0 -3119
- package/dist/constants-D65v6yp6.js +0 -5963
|
@@ -13,43 +13,69 @@ let _camstack_core = require("@camstack/core");
|
|
|
13
13
|
/**
|
|
14
14
|
* Create the appropriate audio pipeline.
|
|
15
15
|
*
|
|
16
|
-
* - 'yamnet-onnx': Cross-platform YAMNet ONNX (requires
|
|
16
|
+
* - 'yamnet-onnx': Cross-platform YAMNet ONNX via embedded Python (requires
|
|
17
|
+
* model download + `options.pythonPath`)
|
|
17
18
|
* - 'apple-soundanalysis': macOS 12+ Apple SoundAnalysis (zero model download, Neural Engine)
|
|
18
19
|
* - undefined: auto-detect (Apple SA on macOS, YAMNet on Linux)
|
|
19
20
|
*/
|
|
20
21
|
async function createAudioPipeline(modelsDir, logger, options) {
|
|
21
22
|
if ((options?.backend ?? (process.platform === "darwin" ? "apple-soundanalysis" : "yamnet-onnx")) === "apple-soundanalysis") return new AppleSoundAnalysisPipeline(logger);
|
|
22
|
-
|
|
23
|
+
if (!options?.pythonPath) throw new Error("YAMNet audio backend requires the embedded Python interpreter — pass options.pythonPath (ctx.deps.ensurePython()).");
|
|
24
|
+
return new YamnetPythonPipeline(modelsDir, logger, options.pythonPath, options.installPythonRequirements);
|
|
23
25
|
}
|
|
24
26
|
/**
|
|
25
27
|
* Canonical model URLs on the camstack HuggingFace mirror. Mirrors the
|
|
26
28
|
* convention every detection model follows (single point of truth =
|
|
27
29
|
* `HF_BASE_URL` from `@camstack/types`); the auto-download path uses
|
|
28
30
|
* `downloadFile` from `@camstack/core`, the SAME helper detection-
|
|
29
|
-
* pipeline uses to materialise its YOLO/face/plate models.
|
|
30
|
-
* model on disk → fetch from HF; cached file → no-op.
|
|
31
|
+
* pipeline uses to materialise its YOLO/face/plate models.
|
|
31
32
|
*
|
|
32
|
-
* Repo layout follows the detection-pipeline pattern:
|
|
33
33
|
* {domain}/{family}/{format}/{filename}
|
|
34
34
|
* For YAMNet that's `audioClassification/yamnet/onnx/camstack-yamnet.onnx`,
|
|
35
|
-
* with the labels JSON
|
|
36
|
-
* because they're format-agnostic (same 521 AudioSet class names whether
|
|
37
|
-
* the runtime is ONNX, OpenVINO, or TF).
|
|
35
|
+
* with the labels JSON one level up (`audioClassification/yamnet/`).
|
|
38
36
|
*/
|
|
39
37
|
var YAMNET_MODEL_URL = `${require_dist.HF_BASE_URL}/audioClassification/yamnet/onnx/camstack-yamnet.onnx`;
|
|
40
38
|
var YAMNET_LABELS_URL = `${require_dist.HF_BASE_URL}/audioClassification/yamnet/camstack-yamnet-labels.json`;
|
|
41
|
-
|
|
39
|
+
/**
|
|
40
|
+
* Locate the addon's bundled `python/` dir (holds `yamnet_audio.py` +
|
|
41
|
+
* `requirements-audio.txt`). Mirrors detection-pipeline's resolver: the
|
|
42
|
+
* published package first, then `__dirname`-relative candidates for the
|
|
43
|
+
* in-tree dev build. This file compiles to `dist/audio-analyzer/index.*`,
|
|
44
|
+
* so the bundle's `python/` is `../../python`.
|
|
45
|
+
*/
|
|
46
|
+
function resolveAudioPythonDir() {
|
|
47
|
+
const candidates = [];
|
|
48
|
+
try {
|
|
49
|
+
const pkgPath = require.resolve("@camstack/addon-pipeline/package.json");
|
|
50
|
+
candidates.push(node_path.join(node_path.dirname(pkgPath), "python"));
|
|
51
|
+
} catch {}
|
|
52
|
+
candidates.push(node_path.join(__dirname, "../../python"), node_path.join(__dirname, "../python"), node_path.join(__dirname, "../../../python"));
|
|
53
|
+
for (const c of candidates) if (node_fs.existsSync(node_path.join(c, "yamnet_audio.py"))) return c;
|
|
54
|
+
throw new Error(`audio-analyzer: python/ dir (yamnet_audio.py) not found. Searched:\n${candidates.join("\n")}`);
|
|
55
|
+
}
|
|
56
|
+
/**
|
|
57
|
+
* YAMNet ONNX inference via a persistent embedded-Python subprocess
|
|
58
|
+
* (`yamnet_audio.py`). Replaces the former onnxruntime-node path so the hub
|
|
59
|
+
* ships no Node ONNX runtime. Wire protocol = length-prefixed frames
|
|
60
|
+
* ([4B LE length][payload]), matching the Apple SoundAnalysis CLI: outbound
|
|
61
|
+
* payload is the raw float32 16 kHz-mono waveform, inbound is a JSON result.
|
|
62
|
+
*/
|
|
63
|
+
var YamnetPythonPipeline = class {
|
|
42
64
|
modelsDir;
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
labels = [];
|
|
65
|
+
pythonPath;
|
|
66
|
+
installPythonRequirements;
|
|
46
67
|
log;
|
|
47
|
-
|
|
68
|
+
process = null;
|
|
69
|
+
receiveBuffer = Buffer.alloc(0);
|
|
70
|
+
pendingResolve = null;
|
|
71
|
+
pendingReject = null;
|
|
72
|
+
constructor(modelsDir, logger, pythonPath, installPythonRequirements) {
|
|
48
73
|
this.modelsDir = modelsDir;
|
|
74
|
+
this.pythonPath = pythonPath;
|
|
75
|
+
this.installPythonRequirements = installPythonRequirements;
|
|
49
76
|
this.log = logger;
|
|
50
77
|
}
|
|
51
78
|
async initialize() {
|
|
52
|
-
const ort = await import("onnxruntime-node");
|
|
53
79
|
const modelPath = node_path.join(this.modelsDir, "camstack-yamnet.onnx");
|
|
54
80
|
const labelsPath = node_path.join(this.modelsDir, "camstack-yamnet-labels.json");
|
|
55
81
|
if (!node_fs.existsSync(modelPath)) {
|
|
@@ -67,49 +93,101 @@ var YamnetOnnxPipeline = class {
|
|
|
67
93
|
} });
|
|
68
94
|
await (0, _camstack_core.downloadFile)(YAMNET_LABELS_URL, labelsPath);
|
|
69
95
|
}
|
|
70
|
-
|
|
71
|
-
this.
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
this.
|
|
96
|
+
const pythonDir = resolveAudioPythonDir();
|
|
97
|
+
if (this.installPythonRequirements) await this.installPythonRequirements(node_path.join(pythonDir, "requirements-audio.txt"));
|
|
98
|
+
const scriptPath = node_path.join(pythonDir, "yamnet_audio.py");
|
|
99
|
+
const { spawn } = await import("node:child_process");
|
|
100
|
+
this.process = spawn(this.pythonPath, [
|
|
101
|
+
scriptPath,
|
|
102
|
+
"--model",
|
|
103
|
+
modelPath,
|
|
104
|
+
"--labels",
|
|
105
|
+
labelsPath
|
|
106
|
+
], { stdio: [
|
|
107
|
+
"pipe",
|
|
108
|
+
"pipe",
|
|
109
|
+
"pipe"
|
|
110
|
+
] });
|
|
111
|
+
this.process.stderr?.on("data", (chunk) => {
|
|
112
|
+
const text = chunk.toString().trim();
|
|
113
|
+
if (text) this.log.warn(text);
|
|
114
|
+
});
|
|
115
|
+
this.process.on("error", (err) => {
|
|
116
|
+
this.log.error("YAMNet Python process error", { meta: { error: err.message } });
|
|
117
|
+
this.pendingReject?.(err);
|
|
118
|
+
this.pendingReject = null;
|
|
119
|
+
this.pendingResolve = null;
|
|
120
|
+
});
|
|
121
|
+
this.process.on("exit", (code) => {
|
|
122
|
+
if (code !== 0 && code !== null) {
|
|
123
|
+
this.log.error("YAMNet Python process exited", { meta: { code } });
|
|
124
|
+
const err = /* @__PURE__ */ new Error(`YAMNet Python: process exited with code ${code}`);
|
|
125
|
+
this.pendingReject?.(err);
|
|
126
|
+
this.pendingReject = null;
|
|
127
|
+
this.pendingResolve = null;
|
|
128
|
+
}
|
|
129
|
+
});
|
|
130
|
+
this.process.stdout.on("data", (chunk) => {
|
|
131
|
+
this.receiveBuffer = Buffer.concat([this.receiveBuffer, chunk]);
|
|
132
|
+
this.tryReceive();
|
|
133
|
+
});
|
|
134
|
+
const ready = await this.receiveMessage();
|
|
135
|
+
if (ready["status"] !== "ready") throw new Error(`YAMNet Python: unexpected init response: ${JSON.stringify(ready)}`);
|
|
136
|
+
this.log.info(`YAMNet Python pipeline initialized (${String(ready["labels"] ?? "?")} labels)`);
|
|
75
137
|
}
|
|
76
138
|
async classify(chunk) {
|
|
77
|
-
if (!this.
|
|
78
|
-
const start = Date.now();
|
|
79
|
-
const ort = await import("onnxruntime-node");
|
|
139
|
+
if (!this.process?.stdin) throw new Error("YAMNet Python: process not initialized");
|
|
80
140
|
const waveform = chunk.sampleRate === 16e3 && chunk.channels === 1 ? chunk.data : resampleMono16k(chunk);
|
|
81
|
-
const
|
|
82
|
-
const
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
const
|
|
86
|
-
const numClasses = 521;
|
|
87
|
-
const numFrames = scores.length / numClasses;
|
|
88
|
-
const avgScores = new Float32Array(numClasses);
|
|
89
|
-
for (let f = 0; f < numFrames; f++) for (let c = 0; c < numClasses; c++) avgScores[c] += scores[f * numClasses + c];
|
|
90
|
-
for (let c = 0; c < numClasses; c++) avgScores[c] = avgScores[c] / numFrames;
|
|
91
|
-
const minScore = .05;
|
|
92
|
-
const classifications = [];
|
|
93
|
-
for (let c = 0; c < numClasses; c++) {
|
|
94
|
-
const score = avgScores[c];
|
|
95
|
-
if (score >= minScore) {
|
|
96
|
-
const label = c < this.labels.length ? this.labels[c] : String(c);
|
|
97
|
-
classifications.push({
|
|
98
|
-
className: label,
|
|
99
|
-
score: Math.round(score * 1e3) / 1e3
|
|
100
|
-
});
|
|
101
|
-
}
|
|
102
|
-
}
|
|
103
|
-
classifications.sort((a, b) => b.score - a.score);
|
|
141
|
+
const audioBuffer = Buffer.from(waveform.buffer, waveform.byteOffset, waveform.byteLength);
|
|
142
|
+
const lengthBuf = Buffer.allocUnsafe(4);
|
|
143
|
+
lengthBuf.writeUInt32LE(audioBuffer.length, 0);
|
|
144
|
+
this.process.stdin.write(Buffer.concat([lengthBuf, audioBuffer]));
|
|
145
|
+
const result = await this.receiveMessage();
|
|
104
146
|
return {
|
|
105
|
-
classifications: classifications
|
|
106
|
-
inferenceMs:
|
|
147
|
+
classifications: result["classifications"] ?? [],
|
|
148
|
+
inferenceMs: result["inferenceMs"] ?? 0
|
|
107
149
|
};
|
|
108
150
|
}
|
|
109
151
|
async dispose() {
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
152
|
+
const proc = this.process;
|
|
153
|
+
if (!proc) return;
|
|
154
|
+
this.process = null;
|
|
155
|
+
proc.stdin?.end();
|
|
156
|
+
proc.kill("SIGTERM");
|
|
157
|
+
if (!await new Promise((resolve) => {
|
|
158
|
+
const timer = setTimeout(() => resolve(false), 5e3);
|
|
159
|
+
proc.once("exit", () => {
|
|
160
|
+
clearTimeout(timer);
|
|
161
|
+
resolve(true);
|
|
162
|
+
});
|
|
163
|
+
})) {
|
|
164
|
+
try {
|
|
165
|
+
proc.kill("SIGKILL");
|
|
166
|
+
} catch {}
|
|
167
|
+
this.log.warn("YAMNet Python process did not exit gracefully — sent SIGKILL");
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
receiveMessage() {
|
|
171
|
+
return new Promise((resolve, reject) => {
|
|
172
|
+
this.pendingResolve = resolve;
|
|
173
|
+
this.pendingReject = reject;
|
|
174
|
+
});
|
|
175
|
+
}
|
|
176
|
+
tryReceive() {
|
|
177
|
+
if (this.receiveBuffer.length < 4) return;
|
|
178
|
+
const length = this.receiveBuffer.readUInt32LE(0);
|
|
179
|
+
if (this.receiveBuffer.length < 4 + length) return;
|
|
180
|
+
const jsonBytes = this.receiveBuffer.subarray(4, 4 + length);
|
|
181
|
+
this.receiveBuffer = this.receiveBuffer.subarray(4 + length);
|
|
182
|
+
const resolve = this.pendingResolve;
|
|
183
|
+
const reject = this.pendingReject;
|
|
184
|
+
this.pendingResolve = null;
|
|
185
|
+
this.pendingReject = null;
|
|
186
|
+
if (!resolve) return;
|
|
187
|
+
try {
|
|
188
|
+
resolve(JSON.parse(jsonBytes.toString("utf8")));
|
|
189
|
+
} catch (err) {
|
|
190
|
+
reject?.(err instanceof Error ? err : new Error(String(err)));
|
|
113
191
|
}
|
|
114
192
|
}
|
|
115
193
|
};
|
|
@@ -429,6 +507,14 @@ var AudioAnalyzerProvider = class {
|
|
|
429
507
|
});
|
|
430
508
|
}
|
|
431
509
|
this.classifyCallCount++;
|
|
510
|
+
const meaningful = result.labels.filter((l) => l.score >= .15 && l.className.toLowerCase() !== "silence");
|
|
511
|
+
if (meaningful.length > 0) this.log.info("audio classification", {
|
|
512
|
+
tags: chunk.deviceId !== void 0 ? { deviceId: chunk.deviceId } : void 0,
|
|
513
|
+
meta: {
|
|
514
|
+
top: meaningful.slice(0, 4).map((l) => `${l.className}(${(l.score * 100).toFixed(0)}%)`).join(", "),
|
|
515
|
+
inferenceMs: result.inferenceMs
|
|
516
|
+
}
|
|
517
|
+
});
|
|
432
518
|
if (result.inferenceMs > 0) {
|
|
433
519
|
const minConf = settings.minConfidence;
|
|
434
520
|
const allowedSet = settings.allowedClasses.length > 0 ? new Set(settings.allowedClasses.map((c) => c.toLowerCase())) : null;
|
|
@@ -683,7 +769,11 @@ var AudioAnalyzerAddon = class extends require_dist.BaseAddon {
|
|
|
683
769
|
effectiveBackend: backend,
|
|
684
770
|
selectedModel: this.config.selectedAudioModel || null
|
|
685
771
|
} });
|
|
686
|
-
const p = await createAudioPipeline(modelsDir, logger, {
|
|
772
|
+
const p = await createAudioPipeline(modelsDir, logger, {
|
|
773
|
+
backend,
|
|
774
|
+
pythonPath: backend === "yamnet-onnx" ? await this.ctx.deps.ensurePython() ?? void 0 : void 0,
|
|
775
|
+
installPythonRequirements: (f) => this.ctx.deps.installPythonRequirements(f)
|
|
776
|
+
});
|
|
687
777
|
await p.initialize();
|
|
688
778
|
this.pipeline = p;
|
|
689
779
|
if (!this.config.probedBestAudioBackend) this.reprobeAudioEngine().catch((err) => {
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import { t as __require } from "../chunk-BdkLduGY.mjs";
|
|
1
2
|
import { L as mapAudioLabelToMacro, P as hydrateSchema, S as audioAnalyzerCapability, c as DEFAULT_AUDIO_ANALYZER_CONFIG, i as BaseAddon, j as errMsg, m as HF_BASE_URL, n as AUDIO_BACKEND_CHOICES, x as audioAnalysisCapability } from "../dist-C5jnNl0n.mjs";
|
|
2
3
|
import * as fs from "node:fs";
|
|
3
4
|
import * as path$1 from "node:path";
|
|
@@ -6,43 +7,69 @@ import { downloadFile } from "@camstack/core";
|
|
|
6
7
|
/**
|
|
7
8
|
* Create the appropriate audio pipeline.
|
|
8
9
|
*
|
|
9
|
-
* - 'yamnet-onnx': Cross-platform YAMNet ONNX (requires
|
|
10
|
+
* - 'yamnet-onnx': Cross-platform YAMNet ONNX via embedded Python (requires
|
|
11
|
+
* model download + `options.pythonPath`)
|
|
10
12
|
* - 'apple-soundanalysis': macOS 12+ Apple SoundAnalysis (zero model download, Neural Engine)
|
|
11
13
|
* - undefined: auto-detect (Apple SA on macOS, YAMNet on Linux)
|
|
12
14
|
*/
|
|
13
15
|
async function createAudioPipeline(modelsDir, logger, options) {
|
|
14
16
|
if ((options?.backend ?? (process.platform === "darwin" ? "apple-soundanalysis" : "yamnet-onnx")) === "apple-soundanalysis") return new AppleSoundAnalysisPipeline(logger);
|
|
15
|
-
|
|
17
|
+
if (!options?.pythonPath) throw new Error("YAMNet audio backend requires the embedded Python interpreter — pass options.pythonPath (ctx.deps.ensurePython()).");
|
|
18
|
+
return new YamnetPythonPipeline(modelsDir, logger, options.pythonPath, options.installPythonRequirements);
|
|
16
19
|
}
|
|
17
20
|
/**
|
|
18
21
|
* Canonical model URLs on the camstack HuggingFace mirror. Mirrors the
|
|
19
22
|
* convention every detection model follows (single point of truth =
|
|
20
23
|
* `HF_BASE_URL` from `@camstack/types`); the auto-download path uses
|
|
21
24
|
* `downloadFile` from `@camstack/core`, the SAME helper detection-
|
|
22
|
-
* pipeline uses to materialise its YOLO/face/plate models.
|
|
23
|
-
* model on disk → fetch from HF; cached file → no-op.
|
|
25
|
+
* pipeline uses to materialise its YOLO/face/plate models.
|
|
24
26
|
*
|
|
25
|
-
* Repo layout follows the detection-pipeline pattern:
|
|
26
27
|
* {domain}/{family}/{format}/{filename}
|
|
27
28
|
* For YAMNet that's `audioClassification/yamnet/onnx/camstack-yamnet.onnx`,
|
|
28
|
-
* with the labels JSON
|
|
29
|
-
* because they're format-agnostic (same 521 AudioSet class names whether
|
|
30
|
-
* the runtime is ONNX, OpenVINO, or TF).
|
|
29
|
+
* with the labels JSON one level up (`audioClassification/yamnet/`).
|
|
31
30
|
*/
|
|
32
31
|
var YAMNET_MODEL_URL = `${HF_BASE_URL}/audioClassification/yamnet/onnx/camstack-yamnet.onnx`;
|
|
33
32
|
var YAMNET_LABELS_URL = `${HF_BASE_URL}/audioClassification/yamnet/camstack-yamnet-labels.json`;
|
|
34
|
-
|
|
33
|
+
/**
|
|
34
|
+
* Locate the addon's bundled `python/` dir (holds `yamnet_audio.py` +
|
|
35
|
+
* `requirements-audio.txt`). Mirrors detection-pipeline's resolver: the
|
|
36
|
+
* published package first, then `__dirname`-relative candidates for the
|
|
37
|
+
* in-tree dev build. This file compiles to `dist/audio-analyzer/index.*`,
|
|
38
|
+
* so the bundle's `python/` is `../../python`.
|
|
39
|
+
*/
|
|
40
|
+
function resolveAudioPythonDir() {
|
|
41
|
+
const candidates = [];
|
|
42
|
+
try {
|
|
43
|
+
const pkgPath = __require.resolve("@camstack/addon-pipeline/package.json");
|
|
44
|
+
candidates.push(path$1.join(path$1.dirname(pkgPath), "python"));
|
|
45
|
+
} catch {}
|
|
46
|
+
candidates.push(path$1.join(__dirname, "../../python"), path$1.join(__dirname, "../python"), path$1.join(__dirname, "../../../python"));
|
|
47
|
+
for (const c of candidates) if (fs.existsSync(path$1.join(c, "yamnet_audio.py"))) return c;
|
|
48
|
+
throw new Error(`audio-analyzer: python/ dir (yamnet_audio.py) not found. Searched:\n${candidates.join("\n")}`);
|
|
49
|
+
}
|
|
50
|
+
/**
|
|
51
|
+
* YAMNet ONNX inference via a persistent embedded-Python subprocess
|
|
52
|
+
* (`yamnet_audio.py`). Replaces the former onnxruntime-node path so the hub
|
|
53
|
+
* ships no Node ONNX runtime. Wire protocol = length-prefixed frames
|
|
54
|
+
* ([4B LE length][payload]), matching the Apple SoundAnalysis CLI: outbound
|
|
55
|
+
* payload is the raw float32 16 kHz-mono waveform, inbound is a JSON result.
|
|
56
|
+
*/
|
|
57
|
+
var YamnetPythonPipeline = class {
|
|
35
58
|
modelsDir;
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
labels = [];
|
|
59
|
+
pythonPath;
|
|
60
|
+
installPythonRequirements;
|
|
39
61
|
log;
|
|
40
|
-
|
|
62
|
+
process = null;
|
|
63
|
+
receiveBuffer = Buffer.alloc(0);
|
|
64
|
+
pendingResolve = null;
|
|
65
|
+
pendingReject = null;
|
|
66
|
+
constructor(modelsDir, logger, pythonPath, installPythonRequirements) {
|
|
41
67
|
this.modelsDir = modelsDir;
|
|
68
|
+
this.pythonPath = pythonPath;
|
|
69
|
+
this.installPythonRequirements = installPythonRequirements;
|
|
42
70
|
this.log = logger;
|
|
43
71
|
}
|
|
44
72
|
async initialize() {
|
|
45
|
-
const ort = await import("onnxruntime-node");
|
|
46
73
|
const modelPath = path$1.join(this.modelsDir, "camstack-yamnet.onnx");
|
|
47
74
|
const labelsPath = path$1.join(this.modelsDir, "camstack-yamnet-labels.json");
|
|
48
75
|
if (!fs.existsSync(modelPath)) {
|
|
@@ -60,49 +87,101 @@ var YamnetOnnxPipeline = class {
|
|
|
60
87
|
} });
|
|
61
88
|
await downloadFile(YAMNET_LABELS_URL, labelsPath);
|
|
62
89
|
}
|
|
63
|
-
|
|
64
|
-
this.
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
this.
|
|
90
|
+
const pythonDir = resolveAudioPythonDir();
|
|
91
|
+
if (this.installPythonRequirements) await this.installPythonRequirements(path$1.join(pythonDir, "requirements-audio.txt"));
|
|
92
|
+
const scriptPath = path$1.join(pythonDir, "yamnet_audio.py");
|
|
93
|
+
const { spawn } = await import("node:child_process");
|
|
94
|
+
this.process = spawn(this.pythonPath, [
|
|
95
|
+
scriptPath,
|
|
96
|
+
"--model",
|
|
97
|
+
modelPath,
|
|
98
|
+
"--labels",
|
|
99
|
+
labelsPath
|
|
100
|
+
], { stdio: [
|
|
101
|
+
"pipe",
|
|
102
|
+
"pipe",
|
|
103
|
+
"pipe"
|
|
104
|
+
] });
|
|
105
|
+
this.process.stderr?.on("data", (chunk) => {
|
|
106
|
+
const text = chunk.toString().trim();
|
|
107
|
+
if (text) this.log.warn(text);
|
|
108
|
+
});
|
|
109
|
+
this.process.on("error", (err) => {
|
|
110
|
+
this.log.error("YAMNet Python process error", { meta: { error: err.message } });
|
|
111
|
+
this.pendingReject?.(err);
|
|
112
|
+
this.pendingReject = null;
|
|
113
|
+
this.pendingResolve = null;
|
|
114
|
+
});
|
|
115
|
+
this.process.on("exit", (code) => {
|
|
116
|
+
if (code !== 0 && code !== null) {
|
|
117
|
+
this.log.error("YAMNet Python process exited", { meta: { code } });
|
|
118
|
+
const err = /* @__PURE__ */ new Error(`YAMNet Python: process exited with code ${code}`);
|
|
119
|
+
this.pendingReject?.(err);
|
|
120
|
+
this.pendingReject = null;
|
|
121
|
+
this.pendingResolve = null;
|
|
122
|
+
}
|
|
123
|
+
});
|
|
124
|
+
this.process.stdout.on("data", (chunk) => {
|
|
125
|
+
this.receiveBuffer = Buffer.concat([this.receiveBuffer, chunk]);
|
|
126
|
+
this.tryReceive();
|
|
127
|
+
});
|
|
128
|
+
const ready = await this.receiveMessage();
|
|
129
|
+
if (ready["status"] !== "ready") throw new Error(`YAMNet Python: unexpected init response: ${JSON.stringify(ready)}`);
|
|
130
|
+
this.log.info(`YAMNet Python pipeline initialized (${String(ready["labels"] ?? "?")} labels)`);
|
|
68
131
|
}
|
|
69
132
|
async classify(chunk) {
|
|
70
|
-
if (!this.
|
|
71
|
-
const start = Date.now();
|
|
72
|
-
const ort = await import("onnxruntime-node");
|
|
133
|
+
if (!this.process?.stdin) throw new Error("YAMNet Python: process not initialized");
|
|
73
134
|
const waveform = chunk.sampleRate === 16e3 && chunk.channels === 1 ? chunk.data : resampleMono16k(chunk);
|
|
74
|
-
const
|
|
75
|
-
const
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
const
|
|
79
|
-
const numClasses = 521;
|
|
80
|
-
const numFrames = scores.length / numClasses;
|
|
81
|
-
const avgScores = new Float32Array(numClasses);
|
|
82
|
-
for (let f = 0; f < numFrames; f++) for (let c = 0; c < numClasses; c++) avgScores[c] += scores[f * numClasses + c];
|
|
83
|
-
for (let c = 0; c < numClasses; c++) avgScores[c] = avgScores[c] / numFrames;
|
|
84
|
-
const minScore = .05;
|
|
85
|
-
const classifications = [];
|
|
86
|
-
for (let c = 0; c < numClasses; c++) {
|
|
87
|
-
const score = avgScores[c];
|
|
88
|
-
if (score >= minScore) {
|
|
89
|
-
const label = c < this.labels.length ? this.labels[c] : String(c);
|
|
90
|
-
classifications.push({
|
|
91
|
-
className: label,
|
|
92
|
-
score: Math.round(score * 1e3) / 1e3
|
|
93
|
-
});
|
|
94
|
-
}
|
|
95
|
-
}
|
|
96
|
-
classifications.sort((a, b) => b.score - a.score);
|
|
135
|
+
const audioBuffer = Buffer.from(waveform.buffer, waveform.byteOffset, waveform.byteLength);
|
|
136
|
+
const lengthBuf = Buffer.allocUnsafe(4);
|
|
137
|
+
lengthBuf.writeUInt32LE(audioBuffer.length, 0);
|
|
138
|
+
this.process.stdin.write(Buffer.concat([lengthBuf, audioBuffer]));
|
|
139
|
+
const result = await this.receiveMessage();
|
|
97
140
|
return {
|
|
98
|
-
classifications: classifications
|
|
99
|
-
inferenceMs:
|
|
141
|
+
classifications: result["classifications"] ?? [],
|
|
142
|
+
inferenceMs: result["inferenceMs"] ?? 0
|
|
100
143
|
};
|
|
101
144
|
}
|
|
102
145
|
async dispose() {
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
146
|
+
const proc = this.process;
|
|
147
|
+
if (!proc) return;
|
|
148
|
+
this.process = null;
|
|
149
|
+
proc.stdin?.end();
|
|
150
|
+
proc.kill("SIGTERM");
|
|
151
|
+
if (!await new Promise((resolve) => {
|
|
152
|
+
const timer = setTimeout(() => resolve(false), 5e3);
|
|
153
|
+
proc.once("exit", () => {
|
|
154
|
+
clearTimeout(timer);
|
|
155
|
+
resolve(true);
|
|
156
|
+
});
|
|
157
|
+
})) {
|
|
158
|
+
try {
|
|
159
|
+
proc.kill("SIGKILL");
|
|
160
|
+
} catch {}
|
|
161
|
+
this.log.warn("YAMNet Python process did not exit gracefully — sent SIGKILL");
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
receiveMessage() {
|
|
165
|
+
return new Promise((resolve, reject) => {
|
|
166
|
+
this.pendingResolve = resolve;
|
|
167
|
+
this.pendingReject = reject;
|
|
168
|
+
});
|
|
169
|
+
}
|
|
170
|
+
tryReceive() {
|
|
171
|
+
if (this.receiveBuffer.length < 4) return;
|
|
172
|
+
const length = this.receiveBuffer.readUInt32LE(0);
|
|
173
|
+
if (this.receiveBuffer.length < 4 + length) return;
|
|
174
|
+
const jsonBytes = this.receiveBuffer.subarray(4, 4 + length);
|
|
175
|
+
this.receiveBuffer = this.receiveBuffer.subarray(4 + length);
|
|
176
|
+
const resolve = this.pendingResolve;
|
|
177
|
+
const reject = this.pendingReject;
|
|
178
|
+
this.pendingResolve = null;
|
|
179
|
+
this.pendingReject = null;
|
|
180
|
+
if (!resolve) return;
|
|
181
|
+
try {
|
|
182
|
+
resolve(JSON.parse(jsonBytes.toString("utf8")));
|
|
183
|
+
} catch (err) {
|
|
184
|
+
reject?.(err instanceof Error ? err : new Error(String(err)));
|
|
106
185
|
}
|
|
107
186
|
}
|
|
108
187
|
};
|
|
@@ -422,6 +501,14 @@ var AudioAnalyzerProvider = class {
|
|
|
422
501
|
});
|
|
423
502
|
}
|
|
424
503
|
this.classifyCallCount++;
|
|
504
|
+
const meaningful = result.labels.filter((l) => l.score >= .15 && l.className.toLowerCase() !== "silence");
|
|
505
|
+
if (meaningful.length > 0) this.log.info("audio classification", {
|
|
506
|
+
tags: chunk.deviceId !== void 0 ? { deviceId: chunk.deviceId } : void 0,
|
|
507
|
+
meta: {
|
|
508
|
+
top: meaningful.slice(0, 4).map((l) => `${l.className}(${(l.score * 100).toFixed(0)}%)`).join(", "),
|
|
509
|
+
inferenceMs: result.inferenceMs
|
|
510
|
+
}
|
|
511
|
+
});
|
|
425
512
|
if (result.inferenceMs > 0) {
|
|
426
513
|
const minConf = settings.minConfidence;
|
|
427
514
|
const allowedSet = settings.allowedClasses.length > 0 ? new Set(settings.allowedClasses.map((c) => c.toLowerCase())) : null;
|
|
@@ -676,7 +763,11 @@ var AudioAnalyzerAddon = class extends BaseAddon {
|
|
|
676
763
|
effectiveBackend: backend,
|
|
677
764
|
selectedModel: this.config.selectedAudioModel || null
|
|
678
765
|
} });
|
|
679
|
-
const p = await createAudioPipeline(modelsDir, logger, {
|
|
766
|
+
const p = await createAudioPipeline(modelsDir, logger, {
|
|
767
|
+
backend,
|
|
768
|
+
pythonPath: backend === "yamnet-onnx" ? await this.ctx.deps.ensurePython() ?? void 0 : void 0,
|
|
769
|
+
installPythonRequirements: (f) => this.ctx.deps.installPythonRequirements(f)
|
|
770
|
+
});
|
|
680
771
|
await p.initialize();
|
|
681
772
|
this.pipeline = p;
|
|
682
773
|
if (!this.config.probedBestAudioBackend) this.reprobeAudioEngine().catch((err) => {
|
|
@@ -302,7 +302,7 @@ async function getNodeAv() {
|
|
|
302
302
|
return _nav;
|
|
303
303
|
}
|
|
304
304
|
async function getConstants() {
|
|
305
|
-
if (!_consts) _consts = await
|
|
305
|
+
if (!_consts) _consts = await import("node-av/constants");
|
|
306
306
|
return _consts;
|
|
307
307
|
}
|
|
308
308
|
async function getSharp() {
|
|
@@ -298,7 +298,7 @@ async function getNodeAv() {
|
|
|
298
298
|
return _nav;
|
|
299
299
|
}
|
|
300
300
|
async function getConstants() {
|
|
301
|
-
if (!_consts) _consts = await import("
|
|
301
|
+
if (!_consts) _consts = await import("node-av/constants");
|
|
302
302
|
return _consts;
|
|
303
303
|
}
|
|
304
304
|
async function getSharp() {
|