pysilero-vad 1.0.0__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,60 @@
1
+ import logging
2
+ from pathlib import Path
3
+ from typing import Final, Union
4
+
5
+ import numpy as np
6
+ import onnxruntime
7
+
8
+ _RATE: Final = 16000
9
+ _MAX_WAV: Final = 32767
10
+ _DIR = Path(__file__).parent
11
+ _DEFAULT_ONNX_PATH = _DIR / "models" / "silero_vad.onnx"
12
+
13
+ _LOGGER = logging.getLogger()
14
+
15
+
16
+ class SileroVoiceActivityDetector:
17
+ """Detects speech/silence using Silero VAD.
18
+
19
+ https://github.com/snakers4/silero-vad
20
+ """
21
+
22
+ def __init__(self, onnx_path: Union[str, Path] = _DEFAULT_ONNX_PATH) -> None:
23
+ onnx_path = str(onnx_path)
24
+
25
+ opts = onnxruntime.SessionOptions()
26
+ opts.inter_op_num_threads = 1
27
+ opts.intra_op_num_threads = 1
28
+
29
+ self.session = onnxruntime.InferenceSession(
30
+ onnx_path, providers=["CPUExecutionProvider"], sess_options=opts
31
+ )
32
+
33
+ self._h = np.zeros((2, 1, 64)).astype("float32")
34
+ self._c = np.zeros((2, 1, 64)).astype("float32")
35
+
36
+ def reset(self) -> None:
37
+ """Reset state."""
38
+ self._h = np.zeros((2, 1, 64)).astype("float32")
39
+ self._c = np.zeros((2, 1, 64)).astype("float32")
40
+
41
+ def __call__(self, audio: bytes) -> float:
42
+ """Return probability of speech in audio [0-1].
43
+
44
+ Audio must be 16Khz 16-bit mono PCM.
45
+ """
46
+ audio_array = np.frombuffer(audio, dtype=np.int16).astype(np.float32) / _MAX_WAV
47
+
48
+ # Add batch dimension
49
+ audio_array = np.expand_dims(audio_array, 0)
50
+
51
+ ort_inputs = {
52
+ "input": audio_array,
53
+ "h": self._h,
54
+ "c": self._c,
55
+ "sr": np.array(_RATE, dtype=np.int64),
56
+ }
57
+ ort_outs = self.session.run(None, ort_inputs)
58
+ out, self._h, self._c = ort_outs
59
+
60
+ return out.squeeze()
Binary file
pysilero_vad/py.typed ADDED
File without changes
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2023 Michael Hansen
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,43 @@
1
+ Metadata-Version: 2.1
2
+ Name: pysilero-vad
3
+ Version: 1.0.0
4
+ Summary: Pre-packaged voice activity detector using silero-vad
5
+ Home-page: http://github.com/rhasspy/pysilero-vad
6
+ Author: Michael Hansen
7
+ Author-email: mike@rhasspy.org
8
+ License: MIT
9
+ Keywords: voice activity vad
10
+ Classifier: Development Status :: 3 - Alpha
11
+ Classifier: Intended Audience :: Developers
12
+ Classifier: Topic :: Multimedia :: Sound/Audio :: Speech
13
+ Classifier: License :: OSI Approved :: MIT License
14
+ Classifier: Programming Language :: Python :: 3.7
15
+ Classifier: Programming Language :: Python :: 3.8
16
+ Classifier: Programming Language :: Python :: 3.9
17
+ Classifier: Programming Language :: Python :: 3.10
18
+ Classifier: Programming Language :: Python :: 3.11
19
+ Description-Content-Type: text/markdown
20
+ License-File: LICENSE.md
21
+ Requires-Dist: onnxruntime <2,>=1.10.0
22
+ Requires-Dist: numpy <1.26
23
+
24
+ # pySilero VAD
25
+
26
+ A pre-packaged voice activity detector using [silero-vad](https://github.com/snakers4/silero-vad).
27
+
28
+ ``` sh
29
+ pip install pysilero-vad
30
+ ```
31
+
32
+ ``` python
33
+ from pysilero_vad import SileroVoiceActivityDetector
34
+
35
+ vad = SileroVoiceActivityDetector()
36
+
37
+ # Audio must be 16Khz, 16-bit mono PCM
38
+ if vad(audio_bytes) >= 0.5:
39
+ print("Speech")
40
+ else:
41
+ print("Silence")
42
+ ```
43
+
@@ -0,0 +1,8 @@
1
+ pysilero_vad/__init__.py,sha256=k0kb-HkhJwqD_O5YsOAQhV0Zbk7gnAy5XRTy2iiLQXY,1708
2
+ pysilero_vad/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
+ pysilero_vad/models/silero_vad.onnx,sha256=o16_Uv085fFGmyo2FY26dhvEe5c-ozgrMYbKFbH1ryg,1807522
4
+ pysilero_vad-1.0.0.dist-info/LICENSE.md,sha256=E3RtUJ105V6iJl--8gS7fNv4SoMVsCB-mIMmy1Q4cCg,1071
5
+ pysilero_vad-1.0.0.dist-info/METADATA,sha256=-0F8V6kxyed4OmFwVuJkSZ9lbvnvnmEny0INFxmnHbQ,1219
6
+ pysilero_vad-1.0.0.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
7
+ pysilero_vad-1.0.0.dist-info/top_level.txt,sha256=QQlOVbq_uDMukkVxjBFRi8eOwSrzJDrbP8YY1MCeMIs,13
8
+ pysilero_vad-1.0.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: bdist_wheel (0.42.0)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1 @@
1
+ pysilero_vad