pysilero-vad 2.0.0__tar.gz → 2.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pysilero_vad-2.0.0 → pysilero_vad-2.1.0}/PKG-INFO +16 -10
- {pysilero_vad-2.0.0 → pysilero_vad-2.1.0}/README.md +4 -1
- pysilero_vad-2.1.0/pyproject.toml +46 -0
- {pysilero_vad-2.0.0 → pysilero_vad-2.1.0}/pysilero_vad/__init__.py +11 -0
- {pysilero_vad-2.0.0 → pysilero_vad-2.1.0}/pysilero_vad.egg-info/PKG-INFO +17 -11
- {pysilero_vad-2.0.0 → pysilero_vad-2.1.0}/pysilero_vad.egg-info/SOURCES.txt +2 -3
- {pysilero_vad-2.0.0 → pysilero_vad-2.1.0}/pysilero_vad.egg-info/requires.txt +1 -1
- pysilero_vad-2.1.0/pysilero_vad.egg-info/zip-safe +1 -0
- {pysilero_vad-2.0.0 → pysilero_vad-2.1.0}/tests/test_vad.py +17 -1
- pysilero_vad-2.0.0/MANIFEST.in +0 -2
- pysilero_vad-2.0.0/requirements.txt +0 -2
- pysilero_vad-2.0.0/setup.py +0 -53
- {pysilero_vad-2.0.0 → pysilero_vad-2.1.0}/LICENSE.md +0 -0
- {pysilero_vad-2.0.0 → pysilero_vad-2.1.0}/pysilero_vad/models/silero_vad.onnx +0 -0
- {pysilero_vad-2.0.0 → pysilero_vad-2.1.0}/pysilero_vad/py.typed +0 -0
- {pysilero_vad-2.0.0 → pysilero_vad-2.1.0}/pysilero_vad.egg-info/dependency_links.txt +0 -0
- {pysilero_vad-2.0.0 → pysilero_vad-2.1.0}/pysilero_vad.egg-info/top_level.txt +0 -0
- {pysilero_vad-2.0.0 → pysilero_vad-2.1.0}/setup.cfg +0 -0
@@ -1,25 +1,28 @@
|
|
1
|
-
Metadata-Version: 2.
|
1
|
+
Metadata-Version: 2.4
|
2
2
|
Name: pysilero_vad
|
3
|
-
Version: 2.
|
3
|
+
Version: 2.1.0
|
4
4
|
Summary: Pre-packaged voice activity detector using silero-vad
|
5
|
-
|
6
|
-
Author: Michael Hansen
|
7
|
-
Author-email: mike@rhasspy.org
|
5
|
+
Author-email: Michael Hansen <mike@rhasspy.org>
|
8
6
|
License: MIT
|
9
|
-
|
7
|
+
Project-URL: Source Code, http://github.com/rhasspy/pysilero-vad
|
8
|
+
Keywords: voice,activity,vad
|
9
|
+
Platform: any
|
10
10
|
Classifier: Development Status :: 3 - Alpha
|
11
11
|
Classifier: Intended Audience :: Developers
|
12
|
-
Classifier: Topic ::
|
12
|
+
Classifier: Topic :: Text Processing :: Linguistic
|
13
13
|
Classifier: License :: OSI Approved :: MIT License
|
14
|
-
Classifier: Programming Language :: Python :: 3.7
|
15
14
|
Classifier: Programming Language :: Python :: 3.8
|
16
15
|
Classifier: Programming Language :: Python :: 3.9
|
17
16
|
Classifier: Programming Language :: Python :: 3.10
|
18
17
|
Classifier: Programming Language :: Python :: 3.11
|
18
|
+
Classifier: Programming Language :: Python :: 3.12
|
19
|
+
Classifier: Programming Language :: Python :: 3.13
|
20
|
+
Requires-Python: >=3.8.0
|
19
21
|
Description-Content-Type: text/markdown
|
20
22
|
License-File: LICENSE.md
|
21
23
|
Requires-Dist: onnxruntime<2,>=1.18.0
|
22
|
-
Requires-Dist: numpy
|
24
|
+
Requires-Dist: numpy
|
25
|
+
Dynamic: license-file
|
23
26
|
|
24
27
|
# pySilero VAD
|
25
28
|
|
@@ -34,7 +37,10 @@ from pysilero_vad import SileroVoiceActivityDetector
|
|
34
37
|
|
35
38
|
vad = SileroVoiceActivityDetector()
|
36
39
|
|
37
|
-
# Audio must be 16Khz, 16-bit mono PCM
|
40
|
+
# Audio must be 16Khz, 16-bit mono PCM with correct chunk size
|
41
|
+
# See also: vad.chunk_samples()
|
42
|
+
assert len(audio_bytes) == vad.chunk_bytes()
|
43
|
+
|
38
44
|
if vad(audio_bytes) >= 0.5:
|
39
45
|
print("Speech")
|
40
46
|
else:
|
@@ -11,7 +11,10 @@ from pysilero_vad import SileroVoiceActivityDetector
|
|
11
11
|
|
12
12
|
vad = SileroVoiceActivityDetector()
|
13
13
|
|
14
|
-
# Audio must be 16Khz, 16-bit mono PCM
|
14
|
+
# Audio must be 16Khz, 16-bit mono PCM with correct chunk size
|
15
|
+
# See also: vad.chunk_samples()
|
16
|
+
assert len(audio_bytes) == vad.chunk_bytes()
|
17
|
+
|
15
18
|
if vad(audio_bytes) >= 0.5:
|
16
19
|
print("Speech")
|
17
20
|
else:
|
@@ -0,0 +1,46 @@
|
|
1
|
+
[build-system]
|
2
|
+
requires = ["setuptools>=62.3"]
|
3
|
+
build-backend = "setuptools.build_meta"
|
4
|
+
|
5
|
+
[project]
|
6
|
+
name = "pysilero_vad"
|
7
|
+
version = "2.1.0"
|
8
|
+
license = {text = "MIT"}
|
9
|
+
description = "Pre-packaged voice activity detector using silero-vad"
|
10
|
+
readme = "README.md"
|
11
|
+
authors = [
|
12
|
+
{name = "Michael Hansen", email = "mike@rhasspy.org"}
|
13
|
+
]
|
14
|
+
keywords = ["voice", "activity", "vad"]
|
15
|
+
classifiers = [
|
16
|
+
"Development Status :: 3 - Alpha",
|
17
|
+
"Intended Audience :: Developers",
|
18
|
+
"Topic :: Text Processing :: Linguistic",
|
19
|
+
"License :: OSI Approved :: MIT License",
|
20
|
+
"Programming Language :: Python :: 3.8",
|
21
|
+
"Programming Language :: Python :: 3.9",
|
22
|
+
"Programming Language :: Python :: 3.10",
|
23
|
+
"Programming Language :: Python :: 3.11",
|
24
|
+
"Programming Language :: Python :: 3.12",
|
25
|
+
"Programming Language :: Python :: 3.13",
|
26
|
+
]
|
27
|
+
requires-python = ">=3.8.0"
|
28
|
+
dependencies = [
|
29
|
+
"onnxruntime>=1.18.0,<2",
|
30
|
+
"numpy",
|
31
|
+
]
|
32
|
+
|
33
|
+
[project.urls]
|
34
|
+
"Source Code" = "http://github.com/rhasspy/pysilero-vad"
|
35
|
+
|
36
|
+
[tool.setuptools]
|
37
|
+
platforms = ["any"]
|
38
|
+
zip-safe = true
|
39
|
+
include-package-data = true
|
40
|
+
|
41
|
+
[tool.setuptools.packages.find]
|
42
|
+
include = ["pysilero_vad"]
|
43
|
+
exclude = ["tests", "tests.*"]
|
44
|
+
|
45
|
+
[tool.setuptools.package-data]
|
46
|
+
pysilero_vad = ["models/silero_vad.onnx"]
|
@@ -73,6 +73,17 @@ class SileroVoiceActivityDetector:
|
|
73
73
|
|
74
74
|
audio_array = np.frombuffer(audio, dtype=np.int16).astype(np.float32) / _MAX_WAV
|
75
75
|
|
76
|
+
return self.process_array(audio_array)
|
77
|
+
|
78
|
+
def process_array(self, audio_array: np.ndarray) -> float:
|
79
|
+
"""Return probability of speech [0-1] in a single audio chunk.
|
80
|
+
|
81
|
+
Audio *must* be 512 float samples [0-1] of 16Khz mono.
|
82
|
+
"""
|
83
|
+
if len(audio_array) != _CHUNK_SAMPLES:
|
84
|
+
# Window size is fixed at 512 samples in v5
|
85
|
+
raise InvalidChunkSizeError
|
86
|
+
|
76
87
|
# Add batch dimension and context
|
77
88
|
audio_array = np.concatenate(
|
78
89
|
(self._context, audio_array[np.newaxis, :]), axis=1
|
@@ -1,25 +1,28 @@
|
|
1
|
-
Metadata-Version: 2.
|
2
|
-
Name:
|
3
|
-
Version: 2.
|
1
|
+
Metadata-Version: 2.4
|
2
|
+
Name: pysilero_vad
|
3
|
+
Version: 2.1.0
|
4
4
|
Summary: Pre-packaged voice activity detector using silero-vad
|
5
|
-
|
6
|
-
Author: Michael Hansen
|
7
|
-
Author-email: mike@rhasspy.org
|
5
|
+
Author-email: Michael Hansen <mike@rhasspy.org>
|
8
6
|
License: MIT
|
9
|
-
|
7
|
+
Project-URL: Source Code, http://github.com/rhasspy/pysilero-vad
|
8
|
+
Keywords: voice,activity,vad
|
9
|
+
Platform: any
|
10
10
|
Classifier: Development Status :: 3 - Alpha
|
11
11
|
Classifier: Intended Audience :: Developers
|
12
|
-
Classifier: Topic ::
|
12
|
+
Classifier: Topic :: Text Processing :: Linguistic
|
13
13
|
Classifier: License :: OSI Approved :: MIT License
|
14
|
-
Classifier: Programming Language :: Python :: 3.7
|
15
14
|
Classifier: Programming Language :: Python :: 3.8
|
16
15
|
Classifier: Programming Language :: Python :: 3.9
|
17
16
|
Classifier: Programming Language :: Python :: 3.10
|
18
17
|
Classifier: Programming Language :: Python :: 3.11
|
18
|
+
Classifier: Programming Language :: Python :: 3.12
|
19
|
+
Classifier: Programming Language :: Python :: 3.13
|
20
|
+
Requires-Python: >=3.8.0
|
19
21
|
Description-Content-Type: text/markdown
|
20
22
|
License-File: LICENSE.md
|
21
23
|
Requires-Dist: onnxruntime<2,>=1.18.0
|
22
|
-
Requires-Dist: numpy
|
24
|
+
Requires-Dist: numpy
|
25
|
+
Dynamic: license-file
|
23
26
|
|
24
27
|
# pySilero VAD
|
25
28
|
|
@@ -34,7 +37,10 @@ from pysilero_vad import SileroVoiceActivityDetector
|
|
34
37
|
|
35
38
|
vad = SileroVoiceActivityDetector()
|
36
39
|
|
37
|
-
# Audio must be 16Khz, 16-bit mono PCM
|
40
|
+
# Audio must be 16Khz, 16-bit mono PCM with correct chunk size
|
41
|
+
# See also: vad.chunk_samples()
|
42
|
+
assert len(audio_bytes) == vad.chunk_bytes()
|
43
|
+
|
38
44
|
if vad(audio_bytes) >= 0.5:
|
39
45
|
print("Speech")
|
40
46
|
else:
|
@@ -1,9 +1,7 @@
|
|
1
1
|
LICENSE.md
|
2
|
-
MANIFEST.in
|
3
2
|
README.md
|
4
|
-
|
3
|
+
pyproject.toml
|
5
4
|
setup.cfg
|
6
|
-
setup.py
|
7
5
|
pysilero_vad/__init__.py
|
8
6
|
pysilero_vad/py.typed
|
9
7
|
pysilero_vad.egg-info/PKG-INFO
|
@@ -11,5 +9,6 @@ pysilero_vad.egg-info/SOURCES.txt
|
|
11
9
|
pysilero_vad.egg-info/dependency_links.txt
|
12
10
|
pysilero_vad.egg-info/requires.txt
|
13
11
|
pysilero_vad.egg-info/top_level.txt
|
12
|
+
pysilero_vad.egg-info/zip-safe
|
14
13
|
pysilero_vad/models/silero_vad.onnx
|
15
14
|
tests/test_vad.py
|
@@ -1,2 +1,2 @@
|
|
1
1
|
onnxruntime<2,>=1.18.0
|
2
|
-
numpy
|
2
|
+
numpy
|
@@ -0,0 +1 @@
|
|
1
|
+
|
@@ -2,8 +2,10 @@ import wave
|
|
2
2
|
from pathlib import Path
|
3
3
|
from typing import Union
|
4
4
|
|
5
|
+
import numpy as np
|
5
6
|
import pytest
|
6
|
-
|
7
|
+
|
8
|
+
from pysilero_vad import InvalidChunkSizeError, SileroVoiceActivityDetector
|
7
9
|
|
8
10
|
_DIR = Path(__file__).parent
|
9
11
|
|
@@ -29,12 +31,16 @@ def test_speech() -> None:
|
|
29
31
|
vad = SileroVoiceActivityDetector()
|
30
32
|
assert any(p >= 0.5 for p in vad.process_chunks(_load_wav(_DIR / "speech.wav")))
|
31
33
|
|
34
|
+
|
32
35
|
def test_invalid_chunk_size() -> None:
|
33
36
|
"""Test that chunk size must be 512 samples."""
|
34
37
|
vad = SileroVoiceActivityDetector()
|
35
38
|
|
36
39
|
# Should work
|
37
40
|
vad(bytes(SileroVoiceActivityDetector.chunk_bytes()))
|
41
|
+
vad.process_array(
|
42
|
+
np.zeros(SileroVoiceActivityDetector.chunk_samples(), dtype=np.float32)
|
43
|
+
)
|
38
44
|
|
39
45
|
# Should fail
|
40
46
|
with pytest.raises(InvalidChunkSizeError):
|
@@ -42,3 +48,13 @@ def test_invalid_chunk_size() -> None:
|
|
42
48
|
|
43
49
|
with pytest.raises(InvalidChunkSizeError):
|
44
50
|
vad(bytes(SileroVoiceActivityDetector.chunk_bytes() // 2))
|
51
|
+
|
52
|
+
with pytest.raises(InvalidChunkSizeError):
|
53
|
+
vad.process_array(
|
54
|
+
np.zeros(SileroVoiceActivityDetector.chunk_samples() * 2, dtype=np.float32)
|
55
|
+
)
|
56
|
+
|
57
|
+
with pytest.raises(InvalidChunkSizeError):
|
58
|
+
vad.process_array(
|
59
|
+
np.zeros(SileroVoiceActivityDetector.chunk_samples() // 2, dtype=np.float32)
|
60
|
+
)
|
pysilero_vad-2.0.0/MANIFEST.in
DELETED
pysilero_vad-2.0.0/setup.py
DELETED
@@ -1,53 +0,0 @@
|
|
1
|
-
#!/usr/bin/env python3
|
2
|
-
from pathlib import Path
|
3
|
-
|
4
|
-
import setuptools
|
5
|
-
from setuptools import setup
|
6
|
-
|
7
|
-
this_dir = Path(__file__).parent
|
8
|
-
module_dir = this_dir / "pysilero_vad"
|
9
|
-
|
10
|
-
# -----------------------------------------------------------------------------
|
11
|
-
|
12
|
-
# Load README in as long description
|
13
|
-
long_description: str = ""
|
14
|
-
readme_path = this_dir / "README.md"
|
15
|
-
if readme_path.is_file():
|
16
|
-
long_description = readme_path.read_text(encoding="utf-8")
|
17
|
-
|
18
|
-
requirements = []
|
19
|
-
requirements_path = this_dir / "requirements.txt"
|
20
|
-
if requirements_path.is_file():
|
21
|
-
with open(requirements_path, "r", encoding="utf-8") as requirements_file:
|
22
|
-
requirements = requirements_file.read().splitlines()
|
23
|
-
|
24
|
-
# -----------------------------------------------------------------------------
|
25
|
-
|
26
|
-
setup(
|
27
|
-
name="pysilero_vad",
|
28
|
-
version="2.0.0",
|
29
|
-
description="Pre-packaged voice activity detector using silero-vad",
|
30
|
-
long_description=long_description,
|
31
|
-
long_description_content_type="text/markdown",
|
32
|
-
url="http://github.com/rhasspy/pysilero-vad",
|
33
|
-
author="Michael Hansen",
|
34
|
-
author_email="mike@rhasspy.org",
|
35
|
-
license="MIT",
|
36
|
-
packages=setuptools.find_packages(),
|
37
|
-
package_data={
|
38
|
-
"pysilero_vad": ["py.typed", "models/silero_vad.onnx"],
|
39
|
-
},
|
40
|
-
install_requires=requirements,
|
41
|
-
classifiers=[
|
42
|
-
"Development Status :: 3 - Alpha",
|
43
|
-
"Intended Audience :: Developers",
|
44
|
-
"Topic :: Multimedia :: Sound/Audio :: Speech",
|
45
|
-
"License :: OSI Approved :: MIT License",
|
46
|
-
"Programming Language :: Python :: 3.7",
|
47
|
-
"Programming Language :: Python :: 3.8",
|
48
|
-
"Programming Language :: Python :: 3.9",
|
49
|
-
"Programming Language :: Python :: 3.10",
|
50
|
-
"Programming Language :: Python :: 3.11",
|
51
|
-
],
|
52
|
-
keywords="voice activity vad",
|
53
|
-
)
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|