pysilero-vad 2.0.0__tar.gz → 2.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,25 +1,28 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.4
2
2
  Name: pysilero_vad
3
- Version: 2.0.0
3
+ Version: 2.1.0
4
4
  Summary: Pre-packaged voice activity detector using silero-vad
5
- Home-page: http://github.com/rhasspy/pysilero-vad
6
- Author: Michael Hansen
7
- Author-email: mike@rhasspy.org
5
+ Author-email: Michael Hansen <mike@rhasspy.org>
8
6
  License: MIT
9
- Keywords: voice activity vad
7
+ Project-URL: Source Code, http://github.com/rhasspy/pysilero-vad
8
+ Keywords: voice,activity,vad
9
+ Platform: any
10
10
  Classifier: Development Status :: 3 - Alpha
11
11
  Classifier: Intended Audience :: Developers
12
- Classifier: Topic :: Multimedia :: Sound/Audio :: Speech
12
+ Classifier: Topic :: Text Processing :: Linguistic
13
13
  Classifier: License :: OSI Approved :: MIT License
14
- Classifier: Programming Language :: Python :: 3.7
15
14
  Classifier: Programming Language :: Python :: 3.8
16
15
  Classifier: Programming Language :: Python :: 3.9
17
16
  Classifier: Programming Language :: Python :: 3.10
18
17
  Classifier: Programming Language :: Python :: 3.11
18
+ Classifier: Programming Language :: Python :: 3.12
19
+ Classifier: Programming Language :: Python :: 3.13
20
+ Requires-Python: >=3.8.0
19
21
  Description-Content-Type: text/markdown
20
22
  License-File: LICENSE.md
21
23
  Requires-Dist: onnxruntime<2,>=1.18.0
22
- Requires-Dist: numpy<2
24
+ Requires-Dist: numpy
25
+ Dynamic: license-file
23
26
 
24
27
  # pySilero VAD
25
28
 
@@ -34,7 +37,10 @@ from pysilero_vad import SileroVoiceActivityDetector
34
37
 
35
38
  vad = SileroVoiceActivityDetector()
36
39
 
37
- # Audio must be 16Khz, 16-bit mono PCM
40
+ # Audio must be 16Khz, 16-bit mono PCM with correct chunk size
41
+ # See also: vad.chunk_samples()
42
+ assert len(audio_bytes) == vad.chunk_bytes()
43
+
38
44
  if vad(audio_bytes) >= 0.5:
39
45
  print("Speech")
40
46
  else:
@@ -11,7 +11,10 @@ from pysilero_vad import SileroVoiceActivityDetector
11
11
 
12
12
  vad = SileroVoiceActivityDetector()
13
13
 
14
- # Audio must be 16Khz, 16-bit mono PCM
14
+ # Audio must be 16Khz, 16-bit mono PCM with correct chunk size
15
+ # See also: vad.chunk_samples()
16
+ assert len(audio_bytes) == vad.chunk_bytes()
17
+
15
18
  if vad(audio_bytes) >= 0.5:
16
19
  print("Speech")
17
20
  else:
@@ -0,0 +1,46 @@
1
+ [build-system]
2
+ requires = ["setuptools>=62.3"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "pysilero_vad"
7
+ version = "2.1.0"
8
+ license = {text = "MIT"}
9
+ description = "Pre-packaged voice activity detector using silero-vad"
10
+ readme = "README.md"
11
+ authors = [
12
+ {name = "Michael Hansen", email = "mike@rhasspy.org"}
13
+ ]
14
+ keywords = ["voice", "activity", "vad"]
15
+ classifiers = [
16
+ "Development Status :: 3 - Alpha",
17
+ "Intended Audience :: Developers",
18
+ "Topic :: Text Processing :: Linguistic",
19
+ "License :: OSI Approved :: MIT License",
20
+ "Programming Language :: Python :: 3.8",
21
+ "Programming Language :: Python :: 3.9",
22
+ "Programming Language :: Python :: 3.10",
23
+ "Programming Language :: Python :: 3.11",
24
+ "Programming Language :: Python :: 3.12",
25
+ "Programming Language :: Python :: 3.13",
26
+ ]
27
+ requires-python = ">=3.8.0"
28
+ dependencies = [
29
+ "onnxruntime>=1.18.0,<2",
30
+ "numpy",
31
+ ]
32
+
33
+ [project.urls]
34
+ "Source Code" = "http://github.com/rhasspy/pysilero-vad"
35
+
36
+ [tool.setuptools]
37
+ platforms = ["any"]
38
+ zip-safe = true
39
+ include-package-data = true
40
+
41
+ [tool.setuptools.packages.find]
42
+ include = ["pysilero_vad"]
43
+ exclude = ["tests", "tests.*"]
44
+
45
+ [tool.setuptools.package-data]
46
+ pysilero_vad = ["models/silero_vad.onnx"]
@@ -73,6 +73,17 @@ class SileroVoiceActivityDetector:
73
73
 
74
74
  audio_array = np.frombuffer(audio, dtype=np.int16).astype(np.float32) / _MAX_WAV
75
75
 
76
+ return self.process_array(audio_array)
77
+
78
+ def process_array(self, audio_array: np.ndarray) -> float:
79
+ """Return probability of speech [0-1] in a single audio chunk.
80
+
81
+ Audio *must* be 512 float samples [0-1] of 16Khz mono.
82
+ """
83
+ if len(audio_array) != _CHUNK_SAMPLES:
84
+ # Window size is fixed at 512 samples in v5
85
+ raise InvalidChunkSizeError
86
+
76
87
  # Add batch dimension and context
77
88
  audio_array = np.concatenate(
78
89
  (self._context, audio_array[np.newaxis, :]), axis=1
@@ -1,25 +1,28 @@
1
- Metadata-Version: 2.1
2
- Name: pysilero-vad
3
- Version: 2.0.0
1
+ Metadata-Version: 2.4
2
+ Name: pysilero_vad
3
+ Version: 2.1.0
4
4
  Summary: Pre-packaged voice activity detector using silero-vad
5
- Home-page: http://github.com/rhasspy/pysilero-vad
6
- Author: Michael Hansen
7
- Author-email: mike@rhasspy.org
5
+ Author-email: Michael Hansen <mike@rhasspy.org>
8
6
  License: MIT
9
- Keywords: voice activity vad
7
+ Project-URL: Source Code, http://github.com/rhasspy/pysilero-vad
8
+ Keywords: voice,activity,vad
9
+ Platform: any
10
10
  Classifier: Development Status :: 3 - Alpha
11
11
  Classifier: Intended Audience :: Developers
12
- Classifier: Topic :: Multimedia :: Sound/Audio :: Speech
12
+ Classifier: Topic :: Text Processing :: Linguistic
13
13
  Classifier: License :: OSI Approved :: MIT License
14
- Classifier: Programming Language :: Python :: 3.7
15
14
  Classifier: Programming Language :: Python :: 3.8
16
15
  Classifier: Programming Language :: Python :: 3.9
17
16
  Classifier: Programming Language :: Python :: 3.10
18
17
  Classifier: Programming Language :: Python :: 3.11
18
+ Classifier: Programming Language :: Python :: 3.12
19
+ Classifier: Programming Language :: Python :: 3.13
20
+ Requires-Python: >=3.8.0
19
21
  Description-Content-Type: text/markdown
20
22
  License-File: LICENSE.md
21
23
  Requires-Dist: onnxruntime<2,>=1.18.0
22
- Requires-Dist: numpy<2
24
+ Requires-Dist: numpy
25
+ Dynamic: license-file
23
26
 
24
27
  # pySilero VAD
25
28
 
@@ -34,7 +37,10 @@ from pysilero_vad import SileroVoiceActivityDetector
34
37
 
35
38
  vad = SileroVoiceActivityDetector()
36
39
 
37
- # Audio must be 16Khz, 16-bit mono PCM
40
+ # Audio must be 16Khz, 16-bit mono PCM with correct chunk size
41
+ # See also: vad.chunk_samples()
42
+ assert len(audio_bytes) == vad.chunk_bytes()
43
+
38
44
  if vad(audio_bytes) >= 0.5:
39
45
  print("Speech")
40
46
  else:
@@ -1,9 +1,7 @@
1
1
  LICENSE.md
2
- MANIFEST.in
3
2
  README.md
4
- requirements.txt
3
+ pyproject.toml
5
4
  setup.cfg
6
- setup.py
7
5
  pysilero_vad/__init__.py
8
6
  pysilero_vad/py.typed
9
7
  pysilero_vad.egg-info/PKG-INFO
@@ -11,5 +9,6 @@ pysilero_vad.egg-info/SOURCES.txt
11
9
  pysilero_vad.egg-info/dependency_links.txt
12
10
  pysilero_vad.egg-info/requires.txt
13
11
  pysilero_vad.egg-info/top_level.txt
12
+ pysilero_vad.egg-info/zip-safe
14
13
  pysilero_vad/models/silero_vad.onnx
15
14
  tests/test_vad.py
@@ -1,2 +1,2 @@
1
1
  onnxruntime<2,>=1.18.0
2
- numpy<2
2
+ numpy
@@ -2,8 +2,10 @@ import wave
2
2
  from pathlib import Path
3
3
  from typing import Union
4
4
 
5
+ import numpy as np
5
6
  import pytest
6
- from pysilero_vad import SileroVoiceActivityDetector, InvalidChunkSizeError
7
+
8
+ from pysilero_vad import InvalidChunkSizeError, SileroVoiceActivityDetector
7
9
 
8
10
  _DIR = Path(__file__).parent
9
11
 
@@ -29,12 +31,16 @@ def test_speech() -> None:
29
31
  vad = SileroVoiceActivityDetector()
30
32
  assert any(p >= 0.5 for p in vad.process_chunks(_load_wav(_DIR / "speech.wav")))
31
33
 
34
+
32
35
  def test_invalid_chunk_size() -> None:
33
36
  """Test that chunk size must be 512 samples."""
34
37
  vad = SileroVoiceActivityDetector()
35
38
 
36
39
  # Should work
37
40
  vad(bytes(SileroVoiceActivityDetector.chunk_bytes()))
41
+ vad.process_array(
42
+ np.zeros(SileroVoiceActivityDetector.chunk_samples(), dtype=np.float32)
43
+ )
38
44
 
39
45
  # Should fail
40
46
  with pytest.raises(InvalidChunkSizeError):
@@ -42,3 +48,13 @@ def test_invalid_chunk_size() -> None:
42
48
 
43
49
  with pytest.raises(InvalidChunkSizeError):
44
50
  vad(bytes(SileroVoiceActivityDetector.chunk_bytes() // 2))
51
+
52
+ with pytest.raises(InvalidChunkSizeError):
53
+ vad.process_array(
54
+ np.zeros(SileroVoiceActivityDetector.chunk_samples() * 2, dtype=np.float32)
55
+ )
56
+
57
+ with pytest.raises(InvalidChunkSizeError):
58
+ vad.process_array(
59
+ np.zeros(SileroVoiceActivityDetector.chunk_samples() // 2, dtype=np.float32)
60
+ )
@@ -1,2 +0,0 @@
1
- include requirements.txt
2
- include pysilero_vad/models/*.onnx
@@ -1,2 +0,0 @@
1
- onnxruntime>=1.18.0,<2
2
- numpy<2
@@ -1,53 +0,0 @@
1
- #!/usr/bin/env python3
2
- from pathlib import Path
3
-
4
- import setuptools
5
- from setuptools import setup
6
-
7
- this_dir = Path(__file__).parent
8
- module_dir = this_dir / "pysilero_vad"
9
-
10
- # -----------------------------------------------------------------------------
11
-
12
- # Load README in as long description
13
- long_description: str = ""
14
- readme_path = this_dir / "README.md"
15
- if readme_path.is_file():
16
- long_description = readme_path.read_text(encoding="utf-8")
17
-
18
- requirements = []
19
- requirements_path = this_dir / "requirements.txt"
20
- if requirements_path.is_file():
21
- with open(requirements_path, "r", encoding="utf-8") as requirements_file:
22
- requirements = requirements_file.read().splitlines()
23
-
24
- # -----------------------------------------------------------------------------
25
-
26
- setup(
27
- name="pysilero_vad",
28
- version="2.0.0",
29
- description="Pre-packaged voice activity detector using silero-vad",
30
- long_description=long_description,
31
- long_description_content_type="text/markdown",
32
- url="http://github.com/rhasspy/pysilero-vad",
33
- author="Michael Hansen",
34
- author_email="mike@rhasspy.org",
35
- license="MIT",
36
- packages=setuptools.find_packages(),
37
- package_data={
38
- "pysilero_vad": ["py.typed", "models/silero_vad.onnx"],
39
- },
40
- install_requires=requirements,
41
- classifiers=[
42
- "Development Status :: 3 - Alpha",
43
- "Intended Audience :: Developers",
44
- "Topic :: Multimedia :: Sound/Audio :: Speech",
45
- "License :: OSI Approved :: MIT License",
46
- "Programming Language :: Python :: 3.7",
47
- "Programming Language :: Python :: 3.8",
48
- "Programming Language :: Python :: 3.9",
49
- "Programming Language :: Python :: 3.10",
50
- "Programming Language :: Python :: 3.11",
51
- ],
52
- keywords="voice activity vad",
53
- )
File without changes
File without changes