@camstack/addon-pipeline 1.0.0 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@camstack/addon-pipeline",
3
- "version": "1.0.0",
3
+ "version": "1.0.1",
4
4
  "description": "CamStack Pipeline bundle — runner, detection, motion, decoders, audio + stream broker. Multi-entry npm package shipping 7 addons under a single bundle.",
5
5
  "keywords": [
6
6
  "camstack",
@@ -69,9 +69,7 @@
69
69
  "description": "Detection, motion, decoders, audio + stream broker."
70
70
  },
71
71
  "nativeDependencies": {
72
- "node-av": "^5.2.4",
73
- "onnxruntime-node": "^1.24.3",
74
- "sharp": "^0.34.0"
72
+ "node-av": "^5.2.4"
75
73
  },
76
74
  "addons": [
77
75
  {
@@ -296,7 +294,6 @@
296
294
  "@camstack/types": "^0.1.0",
297
295
  "react": ">=18",
298
296
  "react-dom": ">=18",
299
- "sharp": "^0.34.0",
300
297
  "werift": "^0.22.9"
301
298
  },
302
299
  "dependencies": {
@@ -304,8 +301,6 @@
304
301
  "lucide-react": "^0.511.0",
305
302
  "mp4box": "0.5.4",
306
303
  "node-av": "^5.2.4",
307
- "onnxruntime-node": "^1.24.3",
308
- "sharp": "^0.35.2",
309
304
  "zod": "^4.3.6"
310
305
  },
311
306
  "devDependencies": {
@@ -0,0 +1,5 @@
1
+ # YAMNet audio classification (yamnet_audio.py) — runs in the embedded
2
+ # portable Python. Installed lazily via ctx.deps.installPythonRequirements
3
+ # the first time the cross-platform audio backend boots.
4
+ numpy>=1.26,<3
5
+ onnxruntime>=1.20,<2
@@ -0,0 +1,113 @@
1
+ #!/usr/bin/env python3
2
+ """YAMNet audio classification subprocess.
3
+
4
+ Runs YAMNet ONNX inference via the embedded portable Python's onnxruntime,
5
+ replacing the former Node `onnxruntime-node` path. Persistent process: reads
6
+ length-prefixed float32 waveforms (16 kHz mono) from stdin, writes
7
+ length-prefixed JSON results to stdout. Mirrors the Apple SoundAnalysis Swift
8
+ CLI wire protocol so the Node side (YamnetPythonPipeline) reuses one receive
9
+ loop.
10
+
11
+ Wire protocol (both directions): [4B little-endian length][payload]
12
+ stdin payload = raw float32 waveform bytes (16 kHz mono)
13
+ stdout payload = JSON {"classifications":[{"className","score"}], "inferenceMs"}
14
+ Startup: emits {"status":"ready"} once the model is loaded. Diagnostics go to
15
+ stderr (stdout is the binary framing channel only).
16
+ """
17
+ from __future__ import annotations
18
+
19
+ import argparse
20
+ import json
21
+ import struct
22
+ import sys
23
+ import time
24
+
25
+ import numpy as np
26
+ import onnxruntime as ort
27
+
28
+ # Match the former Node YamnetOnnxPipeline thresholds.
29
+ MIN_SCORE = 0.05
30
+ TOP_K = 10
31
+
32
+
33
+ def write_msg(obj: dict) -> None:
34
+ data = json.dumps(obj).encode("utf-8")
35
+ sys.stdout.buffer.write(struct.pack("<I", len(data)))
36
+ sys.stdout.buffer.write(data)
37
+ sys.stdout.buffer.flush()
38
+
39
+
40
+ def read_exact(n: int) -> bytes | None:
41
+ buf = bytearray()
42
+ while len(buf) < n:
43
+ chunk = sys.stdin.buffer.read(n - len(buf))
44
+ if not chunk:
45
+ return None
46
+ buf.extend(chunk)
47
+ return bytes(buf)
48
+
49
+
50
+ def main() -> None:
51
+ ap = argparse.ArgumentParser()
52
+ ap.add_argument("--model", required=True)
53
+ ap.add_argument("--labels", default="")
54
+ args = ap.parse_args()
55
+
56
+ labels: list[str] = []
57
+ if args.labels:
58
+ try:
59
+ with open(args.labels, "r", encoding="utf-8") as f:
60
+ labels = json.load(f)
61
+ except Exception as exc: # noqa: BLE001 — labels are best-effort
62
+ print(f"yamnet_audio: failed to read labels: {exc}", file=sys.stderr)
63
+ labels = []
64
+
65
+ session = ort.InferenceSession(args.model, providers=["CPUExecutionProvider"])
66
+ input_name = session.get_inputs()[0].name
67
+ output_name = session.get_outputs()[0].name
68
+
69
+ write_msg({"status": "ready", "labels": len(labels)})
70
+
71
+ while True:
72
+ header = read_exact(4)
73
+ if header is None:
74
+ break
75
+ (length,) = struct.unpack("<I", header)
76
+ payload = read_exact(length)
77
+ if payload is None:
78
+ break
79
+
80
+ start = time.time()
81
+ try:
82
+ waveform = np.frombuffer(payload, dtype=np.float32)
83
+ outputs = session.run([output_name], {input_name: waveform})
84
+ scores = np.asarray(outputs[0], dtype=np.float32)
85
+ if scores.ndim == 1:
86
+ scores = scores.reshape(1, -1)
87
+ # Average across frames → [num_classes]
88
+ avg = np.mean(scores, axis=0)
89
+ results = []
90
+ for c in range(avg.shape[0]):
91
+ s = float(avg[c])
92
+ if s >= MIN_SCORE:
93
+ label = labels[c] if c < len(labels) else str(c)
94
+ results.append({"className": label, "score": round(s, 3)})
95
+ results.sort(key=lambda x: x["score"], reverse=True)
96
+ write_msg(
97
+ {
98
+ "classifications": results[:TOP_K],
99
+ "inferenceMs": int((time.time() - start) * 1000),
100
+ }
101
+ )
102
+ except Exception as exc: # noqa: BLE001 — never kill the loop on one chunk
103
+ write_msg(
104
+ {
105
+ "classifications": [],
106
+ "inferenceMs": int((time.time() - start) * 1000),
107
+ "error": str(exc),
108
+ }
109
+ )
110
+
111
+
112
+ if __name__ == "__main__":
113
+ main()