pysilero-vad 2.0.1__tar.gz → 2.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pysilero_vad-2.0.1 → pysilero_vad-2.1.0}/PKG-INFO +4 -3
- {pysilero_vad-2.0.1 → pysilero_vad-2.1.0}/pyproject.toml +5 -2
- {pysilero_vad-2.0.1 → pysilero_vad-2.1.0}/pysilero_vad/__init__.py +11 -0
- {pysilero_vad-2.0.1 → pysilero_vad-2.1.0}/pysilero_vad.egg-info/PKG-INFO +4 -3
- {pysilero_vad-2.0.1 → pysilero_vad-2.1.0}/pysilero_vad.egg-info/requires.txt +1 -1
- {pysilero_vad-2.0.1 → pysilero_vad-2.1.0}/tests/test_vad.py +17 -1
- {pysilero_vad-2.0.1 → pysilero_vad-2.1.0}/LICENSE.md +0 -0
- {pysilero_vad-2.0.1 → pysilero_vad-2.1.0}/README.md +0 -0
- {pysilero_vad-2.0.1 → pysilero_vad-2.1.0}/pysilero_vad/models/silero_vad.onnx +0 -0
- {pysilero_vad-2.0.1 → pysilero_vad-2.1.0}/pysilero_vad/py.typed +0 -0
- {pysilero_vad-2.0.1 → pysilero_vad-2.1.0}/pysilero_vad.egg-info/SOURCES.txt +0 -0
- {pysilero_vad-2.0.1 → pysilero_vad-2.1.0}/pysilero_vad.egg-info/dependency_links.txt +0 -0
- {pysilero_vad-2.0.1 → pysilero_vad-2.1.0}/pysilero_vad.egg-info/top_level.txt +0 -0
- {pysilero_vad-2.0.1 → pysilero_vad-2.1.0}/pysilero_vad.egg-info/zip-safe +0 -0
- {pysilero_vad-2.0.1 → pysilero_vad-2.1.0}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
|
|
1
|
-
Metadata-Version: 2.
|
1
|
+
Metadata-Version: 2.4
|
2
2
|
Name: pysilero_vad
|
3
|
-
Version: 2.0
|
3
|
+
Version: 2.1.0
|
4
4
|
Summary: Pre-packaged voice activity detector using silero-vad
|
5
5
|
Author-email: Michael Hansen <mike@rhasspy.org>
|
6
6
|
License: MIT
|
@@ -21,7 +21,8 @@ Requires-Python: >=3.8.0
|
|
21
21
|
Description-Content-Type: text/markdown
|
22
22
|
License-File: LICENSE.md
|
23
23
|
Requires-Dist: onnxruntime<2,>=1.18.0
|
24
|
-
Requires-Dist: numpy
|
24
|
+
Requires-Dist: numpy
|
25
|
+
Dynamic: license-file
|
25
26
|
|
26
27
|
# pySilero VAD
|
27
28
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
4
4
|
|
5
5
|
[project]
|
6
6
|
name = "pysilero_vad"
|
7
|
-
version = "2.0
|
7
|
+
version = "2.1.0"
|
8
8
|
license = {text = "MIT"}
|
9
9
|
description = "Pre-packaged voice activity detector using silero-vad"
|
10
10
|
readme = "README.md"
|
@@ -27,7 +27,7 @@ classifiers = [
|
|
27
27
|
requires-python = ">=3.8.0"
|
28
28
|
dependencies = [
|
29
29
|
"onnxruntime>=1.18.0,<2",
|
30
|
-
"numpy
|
30
|
+
"numpy",
|
31
31
|
]
|
32
32
|
|
33
33
|
[project.urls]
|
@@ -41,3 +41,6 @@ include-package-data = true
|
|
41
41
|
[tool.setuptools.packages.find]
|
42
42
|
include = ["pysilero_vad"]
|
43
43
|
exclude = ["tests", "tests.*"]
|
44
|
+
|
45
|
+
[tool.setuptools.package-data]
|
46
|
+
pysilero_vad = ["models/silero_vad.onnx"]
|
@@ -73,6 +73,17 @@ class SileroVoiceActivityDetector:
|
|
73
73
|
|
74
74
|
audio_array = np.frombuffer(audio, dtype=np.int16).astype(np.float32) / _MAX_WAV
|
75
75
|
|
76
|
+
return self.process_array(audio_array)
|
77
|
+
|
78
|
+
def process_array(self, audio_array: np.ndarray) -> float:
|
79
|
+
"""Return probability of speech [0-1] in a single audio chunk.
|
80
|
+
|
81
|
+
Audio *must* be 512 float samples [0-1] of 16Khz mono.
|
82
|
+
"""
|
83
|
+
if len(audio_array) != _CHUNK_SAMPLES:
|
84
|
+
# Window size is fixed at 512 samples in v5
|
85
|
+
raise InvalidChunkSizeError
|
86
|
+
|
76
87
|
# Add batch dimension and context
|
77
88
|
audio_array = np.concatenate(
|
78
89
|
(self._context, audio_array[np.newaxis, :]), axis=1
|
@@ -1,6 +1,6 @@
|
|
1
|
-
Metadata-Version: 2.
|
1
|
+
Metadata-Version: 2.4
|
2
2
|
Name: pysilero_vad
|
3
|
-
Version: 2.0
|
3
|
+
Version: 2.1.0
|
4
4
|
Summary: Pre-packaged voice activity detector using silero-vad
|
5
5
|
Author-email: Michael Hansen <mike@rhasspy.org>
|
6
6
|
License: MIT
|
@@ -21,7 +21,8 @@ Requires-Python: >=3.8.0
|
|
21
21
|
Description-Content-Type: text/markdown
|
22
22
|
License-File: LICENSE.md
|
23
23
|
Requires-Dist: onnxruntime<2,>=1.18.0
|
24
|
-
Requires-Dist: numpy
|
24
|
+
Requires-Dist: numpy
|
25
|
+
Dynamic: license-file
|
25
26
|
|
26
27
|
# pySilero VAD
|
27
28
|
|
@@ -1,2 +1,2 @@
|
|
1
1
|
onnxruntime<2,>=1.18.0
|
2
|
-
numpy
|
2
|
+
numpy
|
@@ -2,8 +2,10 @@ import wave
|
|
2
2
|
from pathlib import Path
|
3
3
|
from typing import Union
|
4
4
|
|
5
|
+
import numpy as np
|
5
6
|
import pytest
|
6
|
-
|
7
|
+
|
8
|
+
from pysilero_vad import InvalidChunkSizeError, SileroVoiceActivityDetector
|
7
9
|
|
8
10
|
_DIR = Path(__file__).parent
|
9
11
|
|
@@ -29,12 +31,16 @@ def test_speech() -> None:
|
|
29
31
|
vad = SileroVoiceActivityDetector()
|
30
32
|
assert any(p >= 0.5 for p in vad.process_chunks(_load_wav(_DIR / "speech.wav")))
|
31
33
|
|
34
|
+
|
32
35
|
def test_invalid_chunk_size() -> None:
|
33
36
|
"""Test that chunk size must be 512 samples."""
|
34
37
|
vad = SileroVoiceActivityDetector()
|
35
38
|
|
36
39
|
# Should work
|
37
40
|
vad(bytes(SileroVoiceActivityDetector.chunk_bytes()))
|
41
|
+
vad.process_array(
|
42
|
+
np.zeros(SileroVoiceActivityDetector.chunk_samples(), dtype=np.float32)
|
43
|
+
)
|
38
44
|
|
39
45
|
# Should fail
|
40
46
|
with pytest.raises(InvalidChunkSizeError):
|
@@ -42,3 +48,13 @@ def test_invalid_chunk_size() -> None:
|
|
42
48
|
|
43
49
|
with pytest.raises(InvalidChunkSizeError):
|
44
50
|
vad(bytes(SileroVoiceActivityDetector.chunk_bytes() // 2))
|
51
|
+
|
52
|
+
with pytest.raises(InvalidChunkSizeError):
|
53
|
+
vad.process_array(
|
54
|
+
np.zeros(SileroVoiceActivityDetector.chunk_samples() * 2, dtype=np.float32)
|
55
|
+
)
|
56
|
+
|
57
|
+
with pytest.raises(InvalidChunkSizeError):
|
58
|
+
vad.process_array(
|
59
|
+
np.zeros(SileroVoiceActivityDetector.chunk_samples() // 2, dtype=np.float32)
|
60
|
+
)
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|