quietplace 0.2.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Aliaksei Kaliutau
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,224 @@
1
+ Metadata-Version: 2.4
2
+ Name: quietplace
3
+ Version: 0.2.2
4
+ Summary: Semantic noise suppression demo controlled by Gemma/Gemma-compatible models served through vLLM.
5
+ Author: Aliaksei Kaliutau
6
+ License: MIT
7
+ Requires-Python: >=3.10
8
+ Description-Content-Type: text/markdown
9
+ License-File: LICENSE
10
+ Requires-Dist: numpy>=1.26
11
+ Requires-Dist: scipy>=1.11
12
+ Requires-Dist: soundfile>=0.12
13
+ Requires-Dist: pandas>=2.1
14
+ Requires-Dist: matplotlib>=3.8
15
+ Requires-Dist: openai>=1.40
16
+ Requires-Dist: torch>=2.2
17
+ Requires-Dist: transformers>=4.43
18
+ Requires-Dist: tqdm>=4.66
19
+ Provides-Extra: serve
20
+ Requires-Dist: vllm>=0.6; extra == "serve"
21
+ Provides-Extra: dev
22
+ Requires-Dist: build>=1.2; extra == "dev"
23
+ Requires-Dist: twine>=5.1; extra == "dev"
24
+ Requires-Dist: pytest>=8.2; extra == "dev"
25
+ Provides-Extra: test
26
+ Requires-Dist: pytest>=8.0; extra == "test"
27
+ Dynamic: license-file
28
+
29
+ # QuietPlace vLLM
30
+
31
+ QuietPlace is a package for safety-aware semantic noise suppression.
32
+ The package keeps the AST event classifier, deterministic DSP executor, metrics, and plotting code as a reusable Python library,
33
+ then uses a Jupyter notebook as the public demo surface.
34
+
35
+ Gemma/Gemma-compatible inference is called through a **vLLM OpenAI-compatible server**.
36
+
37
+ ## Chapter 0 — Install the package
38
+
39
+ 1. Clone the repository
40
+
41
+ ```bash
42
+ git clone https://github.com/akaliutau/quietplace.git
43
+ cd quietplace
44
+ ```
45
+
46
+ 2. Create and activate a Conda environment
47
+
48
+ ```bash
49
+ conda create -n quietplace python=3.12 -y
50
+ conda activate quietplace
51
+ ```
52
+
53
+ 3. Install dependencies
54
+
55
+ ```bash
56
+ pip install -r requirements.txt
57
+ ```
58
+
59
+ ## Chapter 1 — Put the trained AST classifier in the hard-coded location
60
+
61
+ The demo assumes the AST classifier is already trained and exported with `save_pretrained()`.
62
+
63
+ Hard-coded pointer:
64
+
65
+ ```text
66
+ /kaggle/input/quietplace-policy-ast/quietplace_policy_ast
67
+ ```
68
+
69
+ This is defined in:
70
+
71
+ ```text
72
+ quietplace/config.py
73
+ ```
74
+
75
+ Expected contents:
76
+
77
+ ```text
78
+ config.json
79
+ model.safetensors or pytorch_model.bin
80
+ preprocessor_config.json
81
+ policy_map.json # optional but recommended
82
+ ```
83
+
84
+ ## Chapter 2 — Start vLLM instead of Ollama
85
+
86
+ Example local server:
87
+
88
+ ```bash
89
+ vllm serve google/gemma-4-e2b-it \
90
+ --host 0.0.0.0 \
91
+ --port 8000 \
92
+ --dtype auto \
93
+ --api-key EMPTY
94
+ ```
95
+
96
+ The package calls:
97
+
98
+ ```text
99
+ http://localhost:8000/v1/chat/completions
100
+ ```
101
+
102
+ via the official `openai` Python client. If your vLLM model/tool parser does not support native tool calls,
103
+ the controller falls back to JSON parsing and the deterministic DSP executor still only runs whitelisted actions.
104
+
105
+ ## Chapter 3 — Create a realistic demo input
106
+
107
+ Best competition input: combine held-out dataset clips so the demo has known ground-truth segments.
108
+
109
+ ```bash
110
+ quietplace make-demo-scene \
111
+ --traffic-path /datasets/fsd50k/traffic_heldout.wav \
112
+ --siren-path /datasets/urbansound8k/fold10/siren_heldout.wav \
113
+ --speech-path /data/recorded_help.wav \
114
+ --out data/demo_scene
115
+ ```
116
+
117
+ No dataset files available? Use the deterministic synthetic fallback:
118
+
119
+ ```bash
120
+ quietplace make-demo-scene --out data/demo_scene
121
+ ```
122
+
123
+ Outputs:
124
+
125
+ ```text
126
+ data/demo_scene/quietplace_realistic_demo_scene.wav
127
+ data/demo_scene/events_demo.json
128
+ data/demo_scene/scene_manifest.json
129
+ ```
130
+
131
+ ## Chapter 4 — Run the package pipeline
132
+
133
+ With the generated event table:
134
+
135
+ ```bash
136
+ quietplace run data/demo_scene/quietplace_realistic_demo_scene.wav \
137
+ --events-json data/demo_scene/events_demo.json \
138
+ --backend auto \
139
+ --vllm-base-url http://localhost:8000/v1 \
140
+ --vllm-api-key EMPTY \
141
+ --vllm-model google/gemma-4-e2b-it \
142
+ --out runs/quietplace_demo
143
+ ```
144
+
145
+ To force vLLM and fail if the server is down:
146
+
147
+ ```bash
148
+ quietplace run data/demo_scene/quietplace_realistic_demo_scene.wav \
149
+ --events-json data/demo_scene/events_demo.json \
150
+ --backend vllm \
151
+ --out runs/quietplace_demo
152
+ ```
153
+
154
+ Outputs:
155
+
156
+ ```text
157
+ runs/quietplace_demo/original.wav
158
+ runs/quietplace_demo/suppressed.wav
159
+ runs/quietplace_demo/events.json
160
+ runs/quietplace_demo/controller_policy.json
161
+ runs/quietplace_demo/features.json
162
+ runs/quietplace_demo/segment_metrics.csv
163
+ runs/quietplace_demo/bandpower_metrics.csv
164
+ runs/quietplace_demo/metrics_summary.json
165
+ ```
166
+
167
+ ## Chapter 5 — Generate charts for the video/writeup
168
+
169
+ ```bash
170
+ quietplace charts data/demo_scene/quietplace_realistic_demo_scene.wav \
171
+ --events-json data/demo_scene/events_demo.json \
172
+ --backend auto \
173
+ --out runs/quietplace_demo \
174
+ --charts-out runs/quietplace_demo/charts
175
+ ```
176
+
177
+ Charts:
178
+
179
+ ```text
180
+ 01_before_after_spectrograms.png
181
+ 02_waveform_ab.png
182
+ 03_segment_metrics.png
183
+ 04_bandpower_metrics.png
184
+ ```
185
+
186
+ ## Chapter 6 — Create writeup chapters from the run
187
+
188
+ ```bash
189
+ quietplace chapters \
190
+ --run-dir runs/quietplace_demo \
191
+ --out reports/chapters
192
+ ```
193
+
194
+ This creates draft markdown chapters:
195
+
196
+ ```text
197
+ reports/chapters/01_problem.md
198
+ reports/chapters/02_architecture.md
199
+ reports/chapters/03_results.md
200
+ reports/chapters/04_demo_script.md
201
+ ```
202
+
203
+ Use these as the structure for the Kaggle writeup and video narration.
204
+
205
+
206
+ ## Project structure
207
+
208
+ ```text
209
+ quietplace/audio.py # audio IO, event schema, spectral features
210
+ quietplace/classifier.py # hard-coded AST classifier wrapper
211
+ quietplace/controller.py # vLLM/OpenAI-compatible policy controller
212
+ quietplace/dsp.py # deterministic suppressor executor
213
+ quietplace/metrics.py # calm-zone and preservation metrics
214
+ quietplace/viz.py # notebook charts
215
+ quietplace/demo_scene.py # realistic scene builder / synthetic fallback
216
+ quietplace/pipeline.py # end-to-end orchestration
217
+ quietplace/cli.py # commands used above
218
+ notebooks/00_quietplace_vllm_demo.ipynb
219
+ ```
220
+
221
+ ## Build and publish the package
222
+
223
+ see the details in [publish.md](publish.md)
224
+
@@ -0,0 +1,196 @@
1
+ # QuietPlace vLLM
2
+
3
+ QuietPlace is a package for safety-aware semantic noise suppression.
4
+ The package keeps the AST event classifier, deterministic DSP executor, metrics, and plotting code as a reusable Python library,
5
+ then uses a Jupyter notebook as the public demo surface.
6
+
7
+ Gemma/Gemma-compatible inference is called through a **vLLM OpenAI-compatible server**.
8
+
9
+ ## Chapter 0 — Install the package
10
+
11
+ 1. Clone the repository
12
+
13
+ ```bash
14
+ git clone https://github.com/akaliutau/quietplace.git
15
+ cd quietplace
16
+ ```
17
+
18
+ 2. Create and activate a Conda environment
19
+
20
+ ```bash
21
+ conda create -n quietplace python=3.12 -y
22
+ conda activate quietplace
23
+ ```
24
+
25
+ 3. Install dependencies
26
+
27
+ ```bash
28
+ pip install -r requirements.txt
29
+ ```
30
+
31
+ ## Chapter 1 — Put the trained AST classifier in the hard-coded location
32
+
33
+ The demo assumes the AST classifier is already trained and exported with `save_pretrained()`.
34
+
35
+ Hard-coded pointer:
36
+
37
+ ```text
38
+ /kaggle/input/quietplace-policy-ast/quietplace_policy_ast
39
+ ```
40
+
41
+ This is defined in:
42
+
43
+ ```text
44
+ quietplace/config.py
45
+ ```
46
+
47
+ Expected contents:
48
+
49
+ ```text
50
+ config.json
51
+ model.safetensors or pytorch_model.bin
52
+ preprocessor_config.json
53
+ policy_map.json # optional but recommended
54
+ ```
55
+
56
+ ## Chapter 2 — Start vLLM instead of Ollama
57
+
58
+ Example local server:
59
+
60
+ ```bash
61
+ vllm serve google/gemma-4-e2b-it \
62
+ --host 0.0.0.0 \
63
+ --port 8000 \
64
+ --dtype auto \
65
+ --api-key EMPTY
66
+ ```
67
+
68
+ The package calls:
69
+
70
+ ```text
71
+ http://localhost:8000/v1/chat/completions
72
+ ```
73
+
74
+ via the official `openai` Python client. If your vLLM model/tool parser does not support native tool calls,
75
+ the controller falls back to JSON parsing and the deterministic DSP executor still only runs whitelisted actions.
76
+
77
+ ## Chapter 3 — Create a realistic demo input
78
+
79
+ Best competition input: combine held-out dataset clips so the demo has known ground-truth segments.
80
+
81
+ ```bash
82
+ quietplace make-demo-scene \
83
+ --traffic-path /datasets/fsd50k/traffic_heldout.wav \
84
+ --siren-path /datasets/urbansound8k/fold10/siren_heldout.wav \
85
+ --speech-path /data/recorded_help.wav \
86
+ --out data/demo_scene
87
+ ```
88
+
89
+ No dataset files available? Use the deterministic synthetic fallback:
90
+
91
+ ```bash
92
+ quietplace make-demo-scene --out data/demo_scene
93
+ ```
94
+
95
+ Outputs:
96
+
97
+ ```text
98
+ data/demo_scene/quietplace_realistic_demo_scene.wav
99
+ data/demo_scene/events_demo.json
100
+ data/demo_scene/scene_manifest.json
101
+ ```
102
+
103
+ ## Chapter 4 — Run the package pipeline
104
+
105
+ With the generated event table:
106
+
107
+ ```bash
108
+ quietplace run data/demo_scene/quietplace_realistic_demo_scene.wav \
109
+ --events-json data/demo_scene/events_demo.json \
110
+ --backend auto \
111
+ --vllm-base-url http://localhost:8000/v1 \
112
+ --vllm-api-key EMPTY \
113
+ --vllm-model google/gemma-4-e2b-it \
114
+ --out runs/quietplace_demo
115
+ ```
116
+
117
+ To force vLLM and fail if the server is down:
118
+
119
+ ```bash
120
+ quietplace run data/demo_scene/quietplace_realistic_demo_scene.wav \
121
+ --events-json data/demo_scene/events_demo.json \
122
+ --backend vllm \
123
+ --out runs/quietplace_demo
124
+ ```
125
+
126
+ Outputs:
127
+
128
+ ```text
129
+ runs/quietplace_demo/original.wav
130
+ runs/quietplace_demo/suppressed.wav
131
+ runs/quietplace_demo/events.json
132
+ runs/quietplace_demo/controller_policy.json
133
+ runs/quietplace_demo/features.json
134
+ runs/quietplace_demo/segment_metrics.csv
135
+ runs/quietplace_demo/bandpower_metrics.csv
136
+ runs/quietplace_demo/metrics_summary.json
137
+ ```
138
+
139
+ ## Chapter 5 — Generate charts for the video/writeup
140
+
141
+ ```bash
142
+ quietplace charts data/demo_scene/quietplace_realistic_demo_scene.wav \
143
+ --events-json data/demo_scene/events_demo.json \
144
+ --backend auto \
145
+ --out runs/quietplace_demo \
146
+ --charts-out runs/quietplace_demo/charts
147
+ ```
148
+
149
+ Charts:
150
+
151
+ ```text
152
+ 01_before_after_spectrograms.png
153
+ 02_waveform_ab.png
154
+ 03_segment_metrics.png
155
+ 04_bandpower_metrics.png
156
+ ```
157
+
158
+ ## Chapter 6 — Create writeup chapters from the run
159
+
160
+ ```bash
161
+ quietplace chapters \
162
+ --run-dir runs/quietplace_demo \
163
+ --out reports/chapters
164
+ ```
165
+
166
+ This creates draft markdown chapters:
167
+
168
+ ```text
169
+ reports/chapters/01_problem.md
170
+ reports/chapters/02_architecture.md
171
+ reports/chapters/03_results.md
172
+ reports/chapters/04_demo_script.md
173
+ ```
174
+
175
+ Use these as the structure for the Kaggle writeup and video narration.
176
+
177
+
178
+ ## Project structure
179
+
180
+ ```text
181
+ quietplace/audio.py # audio IO, event schema, spectral features
182
+ quietplace/classifier.py # hard-coded AST classifier wrapper
183
+ quietplace/controller.py # vLLM/OpenAI-compatible policy controller
184
+ quietplace/dsp.py # deterministic suppressor executor
185
+ quietplace/metrics.py # calm-zone and preservation metrics
186
+ quietplace/viz.py # notebook charts
187
+ quietplace/demo_scene.py # realistic scene builder / synthetic fallback
188
+ quietplace/pipeline.py # end-to-end orchestration
189
+ quietplace/cli.py # commands used above
190
+ notebooks/00_quietplace_vllm_demo.ipynb
191
+ ```
192
+
193
+ ## Build and publish the package
194
+
195
+ see the details in [publish.md](publish.md)
196
+
@@ -0,0 +1,46 @@
1
+ [build-system]
2
+ requires = ["setuptools>=69", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "quietplace"
7
+ version = "0.2.2"
8
+ description = "Semantic noise suppression demo controlled by Gemma/Gemma-compatible models served through vLLM."
9
+ readme = "README.md"
10
+ requires-python = ">=3.10"
11
+ license = { text = "MIT" }
12
+ authors = [{ name = "Aliaksei Kaliutau" }]
13
+ dependencies = [
14
+ "numpy>=1.26",
15
+ "scipy>=1.11",
16
+ "soundfile>=0.12",
17
+ "pandas>=2.1",
18
+ "matplotlib>=3.8",
19
+ "openai>=1.40",
20
+ "torch>=2.2",
21
+ "transformers>=4.43",
22
+ "tqdm>=4.66",
23
+ ]
24
+
25
+ [project.optional-dependencies]
26
+ serve = [
27
+ "vllm>=0.6",
28
+ ]
29
+ dev = [
30
+ "build>=1.2",
31
+ "twine>=5.1",
32
+ "pytest>=8.2",
33
+ ]
34
+ test = [
35
+ "pytest>=8.0",
36
+ ]
37
+
38
+ [project.scripts]
39
+ quietplace = "quietplace.cli:main"
40
+
41
+ [tool.setuptools.packages.find]
42
+ include = ["quietplace*"]
43
+
44
+ [tool.pytest.ini_options]
45
+ pythonpath = [".", "quietplace"]
46
+ testpaths = ["tests"]
@@ -0,0 +1,7 @@
1
+ """quiteplace: notebook-first semantic noise suppression."""
2
+
3
+ from .audio import Event
4
+ from .pipeline import run_pipeline
5
+
6
+ __all__ = ["Event", "run_pipeline"]
7
+ __version__ = "0.2.0"
@@ -0,0 +1,188 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import math
5
+ from dataclasses import asdict, dataclass
6
+ from pathlib import Path
7
+ from typing import Any, Iterable
8
+
9
+ import numpy as np
10
+ import soundfile as sf
11
+ from scipy import signal
12
+
13
+ from .config import PROTECTED_LABELS, SR, SUPPRESSIBLE_LABELS
14
+
15
+
16
+ @dataclass(slots=True)
17
+ class Event:
18
+ label: str
19
+ start: float
20
+ end: float
21
+ confidence: float
22
+ policy_hint: str = "uncertain"
23
+ source: str = "classifier"
24
+
25
+ def as_dict(self) -> dict[str, Any]:
26
+ return asdict(self)
27
+
28
+ @classmethod
29
+ def from_dict(cls, row: dict[str, Any]) -> "Event":
30
+ return cls(
31
+ label=str(row.get("label", "unknown")),
32
+ start=float(row.get("start", 0.0)),
33
+ end=float(row.get("end", 0.0)),
34
+ confidence=float(row.get("confidence", 0.0)),
35
+ policy_hint=infer_policy(str(row.get("label", "unknown")), str(row.get("policy_hint", row.get("policy", "uncertain")))),
36
+ source=str(row.get("source", "json")),
37
+ )
38
+
39
+
40
+ def infer_policy(label: str, fallback: str = "uncertain") -> str:
41
+ normalized = normalize_label(label)
42
+ if normalized in PROTECTED_LABELS:
43
+ return "preserve"
44
+ if normalized in SUPPRESSIBLE_LABELS:
45
+ return "suppress"
46
+ return fallback if fallback in {"preserve", "suppress", "uncertain"} else "uncertain"
47
+
48
+
49
+ def normalize_label(label: str) -> str:
50
+ return "_".join(str(label).strip().lower().replace("/", " ").replace("-", " ").split())
51
+
52
+
53
+ def normalize(y: np.ndarray, peak: float = 0.98) -> np.ndarray:
54
+ y = np.asarray(y, dtype=np.float32).reshape(-1)
55
+ m = float(np.max(np.abs(y))) if y.size else 0.0
56
+ if m <= 1e-9:
57
+ return y.astype(np.float32)
58
+ return (y / m * float(peak)).astype(np.float32)
59
+
60
+
61
+ def to_mono(y: np.ndarray) -> np.ndarray:
62
+ y = np.asarray(y, dtype=np.float32)
63
+ if y.ndim == 1:
64
+ return y
65
+ return y.mean(axis=1).astype(np.float32)
66
+
67
+
68
+ def resample_audio(y: np.ndarray, sr: int, target_sr: int = SR) -> np.ndarray:
69
+ if int(sr) == int(target_sr):
70
+ return y.astype(np.float32)
71
+ g = math.gcd(int(sr), int(target_sr))
72
+ up = int(target_sr) // g
73
+ down = int(sr) // g
74
+ return signal.resample_poly(y, up, down).astype(np.float32)
75
+
76
+
77
+ def load_audio(path: str | Path, sr: int = SR, peak: float | None = None) -> tuple[np.ndarray, int]:
78
+ y, source_sr = sf.read(str(path), always_2d=False)
79
+ y = to_mono(y)
80
+ y = resample_audio(y, int(source_sr), sr)
81
+ if peak is not None:
82
+ y = normalize(y, peak=peak)
83
+ return y.astype(np.float32), sr
84
+
85
+
86
+ def write_audio(path: str | Path, y: np.ndarray, sr: int = SR) -> None:
87
+ Path(path).parent.mkdir(parents=True, exist_ok=True)
88
+ sf.write(str(path), np.asarray(y, dtype=np.float32), sr)
89
+
90
+
91
+ def fit_audio(y: np.ndarray, n_samples: int) -> np.ndarray:
92
+ y = np.asarray(y, dtype=np.float32).reshape(-1)
93
+ if y.size >= n_samples:
94
+ return y[:n_samples]
95
+ return np.pad(y, (0, n_samples - y.size)).astype(np.float32)
96
+
97
+
98
+ def paste_clip(base: np.ndarray, clip: np.ndarray, sr: int, start_sec: float, gain: float = 1.0) -> np.ndarray:
99
+ out = np.asarray(base, dtype=np.float32).copy()
100
+ start = max(0, int(round(float(start_sec) * sr)))
101
+ if start >= len(out):
102
+ return out
103
+ end = min(len(out), start + len(clip))
104
+ out[start:end] += float(gain) * clip[: end - start]
105
+ return out.astype(np.float32)
106
+
107
+
108
+ def read_events_json(path: str | Path) -> list[Event]:
109
+ data = json.loads(Path(path).read_text())
110
+ if isinstance(data, dict) and "events" in data:
111
+ data = data["events"]
112
+ return [Event.from_dict(row) for row in data]
113
+
114
+
115
+ def write_events_json(path: str | Path, events: Iterable[Event]) -> None:
116
+ Path(path).parent.mkdir(parents=True, exist_ok=True)
117
+ Path(path).write_text(json.dumps([e.as_dict() for e in events], indent=2))
118
+
119
+
120
+ def dominant_freqs(y: np.ndarray, sr: int = SR, max_count: int = 6) -> list[float]:
121
+ y = np.asarray(y, dtype=np.float32)
122
+ if y.size < 64:
123
+ return []
124
+ nperseg = min(4096, max(256, len(y)))
125
+ freqs, psd = signal.welch(y, fs=sr, nperseg=nperseg)
126
+ band = (freqs >= 40) & (freqs <= 1000)
127
+ if not np.any(band):
128
+ return []
129
+ bf = freqs[band]
130
+ bp = psd[band]
131
+ distance = max(1, int(35 / (bf[1] - bf[0]))) if len(bf) > 1 else 1
132
+ peaks, props = signal.find_peaks(bp, distance=distance, prominence=np.max(bp) * 0.02 if len(bp) else 0.0)
133
+ if len(peaks) == 0:
134
+ idx = np.argsort(bp)[-max_count:]
135
+ else:
136
+ idx = peaks[np.argsort(bp[peaks])[-max_count:]]
137
+ vals = sorted(float(round(bf[i], 1)) for i in idx)
138
+ return vals
139
+
140
+
141
+ def band_energy(y: np.ndarray, sr: int, low_hz: float, high_hz: float) -> float:
142
+ y = np.asarray(y, dtype=np.float32)
143
+ if y.size < 64:
144
+ return 0.0
145
+ freqs, psd = signal.welch(y, fs=sr, nperseg=min(4096, len(y)))
146
+ mask = (freqs >= float(low_hz)) & (freqs <= float(high_hz))
147
+ return float(psd[mask].sum() + 1e-12)
148
+
149
+
150
+ def spectral_features(y: np.ndarray, sr: int = SR) -> dict[str, Any]:
151
+ y = np.asarray(y, dtype=np.float32)
152
+ if y.size == 0:
153
+ return {"duration_sec": 0.0, "rms": 0.0, "peak": 0.0, "dominant_freqs_hz": []}
154
+ freqs, psd = signal.welch(y, fs=sr, nperseg=min(4096, len(y)))
155
+ total = float(psd.sum() + 1e-12)
156
+ low = float(psd[(freqs >= 40) & (freqs <= 300)].sum() / total)
157
+ mid = float(psd[(freqs > 300) & (freqs <= 2000)].sum() / total)
158
+ high = float(psd[freqs > 2000].sum() / total)
159
+ rms = float(np.sqrt(np.mean(y * y) + 1e-12))
160
+ return {
161
+ "duration_sec": round(len(y) / sr, 3),
162
+ "rms": round(rms, 6),
163
+ "peak": round(float(np.max(np.abs(y))), 6),
164
+ "low_band_energy_share_40_300hz": round(low, 4),
165
+ "mid_band_energy_share_300_2000hz": round(mid, 4),
166
+ "high_band_energy_share_2000hz_plus": round(high, 4),
167
+ "dominant_freqs_hz": dominant_freqs(y, sr),
168
+ }
169
+
170
+
171
+ def protected_mask(n_samples: int, sr: int, protected: Iterable[tuple[float, float] | list[float]]) -> np.ndarray:
172
+ mask = np.zeros(int(n_samples), dtype=bool)
173
+ for start, end in protected:
174
+ a = max(0, min(int(n_samples), int(round(float(start) * sr))))
175
+ b = max(0, min(int(n_samples), int(round(float(end) * sr))))
176
+ if b > a:
177
+ mask[a:b] = True
178
+ return mask
179
+
180
+
181
+ def segment_mask(n_samples: int, sr: int, segments: Iterable[tuple[float, float] | list[float]]) -> np.ndarray:
182
+ mask = np.zeros(int(n_samples), dtype=bool)
183
+ for start, end in segments:
184
+ a = max(0, min(int(n_samples), int(round(float(start) * sr))))
185
+ b = max(0, min(int(n_samples), int(round(float(end) * sr))))
186
+ if b > a:
187
+ mask[a:b] = True
188
+ return mask