piper-tts-plus 20250627.70451__py3-none-any.whl → 20250628.23647__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
piper/config.py CHANGED
@@ -1,4 +1,5 @@
1
1
  """Piper configuration"""
2
+
2
3
  from dataclasses import dataclass
3
4
  from enum import Enum
4
5
  from typing import Any, Dict, Mapping, Sequence
piper/download.py CHANGED
@@ -1,4 +1,5 @@
1
1
  """Utility for downloading Piper voices."""
2
+
2
3
  import json
3
4
  import logging
4
5
  import shutil
piper/util.py CHANGED
@@ -1,4 +1,5 @@
1
1
  """Utilities"""
2
+
2
3
  import numpy as np
3
4
 
4
5
 
piper/voice.py CHANGED
@@ -11,25 +11,29 @@ import onnxruntime
11
11
  # Try to import piper_phonemize, but make it optional
12
12
  try:
13
13
  from piper_phonemize import phonemize_codepoints, phonemize_espeak, tashkeel_run
14
+
14
15
  HAS_PIPER_PHONEMIZE = True
15
16
  except ImportError:
16
17
  HAS_PIPER_PHONEMIZE = False
18
+
17
19
  # Provide fallback implementations
18
20
  def phonemize_codepoints(text, lang=None):
19
21
  # Simple fallback: return text as list of characters
20
22
  return list(text)
21
-
23
+
22
24
  def phonemize_espeak(text, voice=None):
23
25
  # Simple fallback: return text as list of characters
24
26
  return list(text)
25
-
27
+
26
28
  def tashkeel_run(text):
27
29
  # Simple fallback: return original text
28
30
  return text
29
31
 
32
+
30
33
  # Try to import pyopenjtalk, but make it optional
31
34
  try:
32
35
  import pyopenjtalk
36
+
33
37
  HAS_PYOPENJTALK = True
34
38
  except ImportError:
35
39
  HAS_PYOPENJTALK = False
@@ -134,12 +138,12 @@ class PiperVoice:
134
138
  converted = []
135
139
  # Add BOS marker
136
140
  converted.append("^")
137
-
141
+
138
142
  for ph in phonemes:
139
143
  if ph == "pau":
140
144
  converted.append("_")
141
145
  continue
142
-
146
+
143
147
  if ph == "sil":
144
148
  # Skip sil in the middle, it will be added as EOS
145
149
  continue
@@ -148,16 +152,16 @@ class PiperVoice:
148
152
  # But NOT 'N' which is a special phoneme
149
153
  if ph in {"A", "I", "U", "E", "O"}:
150
154
  ph = ph.lower()
151
-
155
+
152
156
  # Check if this is a multi-character phoneme that needs PUA mapping
153
157
  if ph in MULTI_CHAR_TO_PUA:
154
158
  converted.append(MULTI_CHAR_TO_PUA[ph])
155
159
  else:
156
160
  converted.append(ph)
157
-
161
+
158
162
  # Add EOS marker
159
163
  converted.append("$")
160
-
164
+
161
165
  # Log readable phonemes if debug logging is enabled
162
166
  if _LOGGER.isEnabledFor(logging.DEBUG):
163
167
  readable_phonemes = []
@@ -172,7 +176,9 @@ class PiperVoice:
172
176
  readable_phonemes.append(ph)
173
177
  else:
174
178
  readable_phonemes.append(ph)
175
- _LOGGER.debug("Phonemized '%s' to: %s", text, ' '.join(readable_phonemes))
179
+ _LOGGER.debug(
180
+ "Phonemized '%s' to: %s", text, " ".join(readable_phonemes)
181
+ )
176
182
 
177
183
  return [converted]
178
184
 
@@ -278,7 +284,7 @@ class PiperVoice:
278
284
  args = {
279
285
  "input": phoneme_ids_array,
280
286
  "input_lengths": phoneme_ids_lengths,
281
- "scales": scales
287
+ "scales": scales,
282
288
  }
283
289
 
284
290
  if self.config.num_speakers <= 1:
@@ -293,6 +299,11 @@ class PiperVoice:
293
299
  args["sid"] = sid
294
300
 
295
301
  # Synthesize through Onnx
296
- audio = self.session.run(None, args, )[0].squeeze((0, 1))
302
+ audio = self.session.run(
303
+ None,
304
+ args,
305
+ )[
306
+ 0
307
+ ].squeeze((0, 1))
297
308
  audio = audio_float_to_int16(audio.squeeze())
298
309
  return audio.tobytes()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: piper-tts-plus
3
- Version: 20250627.70451
3
+ Version: 20250628.23647
4
4
  Summary: A fast, local neural text to speech system that sounds great and is optimized for the Raspberry Pi 4.
5
5
  Home-page: https://github.com/ayutaz/piper-plus
6
6
  Author: yousan
@@ -0,0 +1,19 @@
1
+ piper/__init__.py,sha256=1-LW3UWmqlEAHJgbHXjIMh6trxrSxs9PSC_xaLsz_ek,63
2
+ piper/__main__.py,sha256=aBlkzPedaTFVq2LVTygAU5avjLoLTyvRnfyAdvMkHNQ,5109
3
+ piper/config.py,sha256=Bw5LsBaJkl0MtKjObzQ9ZjrbFHybpT7zORvRN5cTStQ,1420
4
+ piper/const.py,sha256=m_V4woI8zT5Rwmb9y_Bh1S6ch6HDox7wW95Y28SKauw,106
5
+ piper/download.py,sha256=0rObwEa94qHgFxBp_AXBzEU7o9lXblaxbGhGjWTZu3E,4605
6
+ piper/file_hash.py,sha256=HMuwrgEIg-bCOXHG0wE3vtjrqGD7QaA_UNfvBMXeUcY,1107
7
+ piper/http_server.py,sha256=12B9PJCY4UN_fcoH-gunxq7o3obj7NEfpXgo9tESKR4,4065
8
+ piper/util.py,sha256=Tfe3jh_14Knxsug-BEZiYyaHPAwflIuDMbAbM2k5Jgk,398
9
+ piper/voice.py,sha256=PslKvr-P-hwfVzMGZgkzekJBz0N1ASMJc6_4ux_dxgE,10182
10
+ piper/voices.json,sha256=pusxO-rZHvmWXkbRpp05lQCwYns7oKXAU6SIH3wFkUQ,130859
11
+ tests/__init__.py,sha256=FcHh72c9V-_ySbzAKg7HZig4VDZiJfQLvK__IUg8bUs,40
12
+ tests/conftest.py,sha256=YnHVpOqLGt5cycDRmIaOfSVMn6Bdl9VOk6toS6xDC24,1773
13
+ tests/test_runtime.py,sha256=CuuIzUOqbnNZDB-TeLfrn-vlyeFSM4d3boTLdMiYLU8,3506
14
+ tests/test_util_impl.py,sha256=OBMQk9uKpVsFTMuUBKBiGI17cR83et58SQT5nQCzMxw,4181
15
+ piper_tts_plus-20250628.23647.dist-info/METADATA,sha256=zitSGU36crGpQBQi2TUZXDoyudWdnuT0epANknyc7K8,1200
16
+ piper_tts_plus-20250628.23647.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
17
+ piper_tts_plus-20250628.23647.dist-info/entry_points.txt,sha256=R-zJAXMPM47DHJDk5K0toz7iOATx38RV98fkowMxj0o,46
18
+ piper_tts_plus-20250628.23647.dist-info/top_level.txt,sha256=CSiU1QCg9pI9dYwhXBStqlmtlkCm3LT5EaUavCcFQ64,12
19
+ piper_tts_plus-20250628.23647.dist-info/RECORD,,
tests/__init__.py ADDED
@@ -0,0 +1 @@
1
+ # Test package for piper python runtime
tests/conftest.py ADDED
@@ -0,0 +1,71 @@
1
+ """
2
+ Pytest configuration for piper runtime tests
3
+ """
4
+
5
+ import pytest
6
+ import tempfile
7
+ import shutil
8
+ from pathlib import Path
9
+
10
+
11
+ @pytest.fixture
12
+ def temp_dir():
13
+ """Create a temporary directory for test files"""
14
+ temp_path = tempfile.mkdtemp()
15
+ yield Path(temp_path)
16
+ shutil.rmtree(temp_path)
17
+
18
+
19
+ @pytest.fixture
20
+ def mock_model_path(temp_dir):
21
+ """Create a mock model file for testing"""
22
+ model_path = temp_dir / "test_model.onnx"
23
+ model_path.touch()
24
+
25
+ # Create corresponding JSON config
26
+ config_path = temp_dir / "test_model.onnx.json"
27
+ config_content = {
28
+ "audio": {"sample_rate": 22050},
29
+ "espeak": {"voice": "en-us"},
30
+ "inference": {"noise_scale": 0.667, "length_scale": 1, "noise_w": 0.8},
31
+ "phoneme_id_map": {"_": [0], "a": [1]},
32
+ }
33
+
34
+ import json
35
+
36
+ config_path.write_text(json.dumps(config_content))
37
+
38
+ return model_path
39
+
40
+
41
+ @pytest.fixture
42
+ def mock_japanese_model_path(temp_dir):
43
+ """Create a mock Japanese model file for testing"""
44
+ model_path = temp_dir / "ja_JP_test.onnx"
45
+ model_path.touch()
46
+
47
+ # Create corresponding JSON config with Japanese settings
48
+ config_path = temp_dir / "ja_JP_test.onnx.json"
49
+ config_content = {
50
+ "audio": {"sample_rate": 22050},
51
+ "inference": {"noise_scale": 0.667, "length_scale": 1, "noise_w": 0.8},
52
+ "phoneme_type": "openjtalk",
53
+ "language": {"code": "ja"},
54
+ "phoneme_id_map": {
55
+ "_": [0],
56
+ "a": [1],
57
+ "i": [2],
58
+ "u": [3],
59
+ "e": [4],
60
+ "o": [5],
61
+ # PUA mappings
62
+ "\ue00e": [30], # ch
63
+ "\ue00f": [31], # ts
64
+ },
65
+ }
66
+
67
+ import json
68
+
69
+ config_path.write_text(json.dumps(config_content))
70
+
71
+ return model_path
tests/test_runtime.py ADDED
@@ -0,0 +1,108 @@
1
+ """
2
+ Runtime tests for piper voice synthesis
3
+ Tests actual implementation without excessive mocking
4
+ """
5
+
6
+ import pytest
7
+ import numpy as np
8
+ from piper.util import audio_float_to_int16
9
+
10
+
11
+ class TestAudioUtils:
12
+ """Test audio utility functions"""
13
+
14
+ @pytest.mark.unit
15
+ def test_audio_float_to_int16_conversion(self):
16
+ """Test float to int16 audio conversion"""
17
+ # Test normal range
18
+ float_audio = np.array([0.0, 0.5, -0.5, 1.0, -1.0], dtype=np.float32)
19
+ int16_audio = audio_float_to_int16(float_audio)
20
+
21
+ assert int16_audio.dtype == np.int16
22
+ assert int16_audio[0] == 0
23
+ assert int16_audio[1] > 0 # 0.5 -> positive
24
+ assert int16_audio[2] < 0 # -0.5 -> negative
25
+ assert int16_audio[3] == 32767 # 1.0 -> max
26
+ assert int16_audio[4] == -32767 # -1.0 -> min (normalized)
27
+
28
+ @pytest.mark.unit
29
+ def test_audio_clipping(self):
30
+ """Test clipping of out-of-range values"""
31
+ float_audio = np.array([2.0, -2.0], dtype=np.float32)
32
+ int16_audio = audio_float_to_int16(float_audio)
33
+
34
+ assert int16_audio[0] == 32767 # Clipped to max
35
+ assert int16_audio[1] == -32767 # Clipped to min (normalized)
36
+
37
+
38
+ class TestPiperConfig:
39
+ """Test configuration handling"""
40
+
41
+ @pytest.mark.unit
42
+ def test_config_from_dict(self):
43
+ """Test creating config from dictionary"""
44
+ # from piper.config import PiperConfig # noqa: F401
45
+
46
+ config_dict = {
47
+ "audio": {"sample_rate": 22050},
48
+ "num_symbols": 100,
49
+ "num_speakers": 1,
50
+ "inference": {"noise_scale": 0.667, "length_scale": 1.0, "noise_w": 0.8},
51
+ }
52
+
53
+ # Would normally use: config = PiperConfig.from_dict(config_dict)
54
+ # For now, test that we can access values
55
+ sample_rate = config_dict["audio"]["sample_rate"]
56
+ assert sample_rate == 22050
57
+ assert config_dict["num_symbols"] == 100
58
+
59
+ @pytest.mark.unit
60
+ def test_japanese_config(self):
61
+ """Test Japanese-specific configuration"""
62
+ config_dict = {
63
+ "audio": {"sample_rate": 22050},
64
+ "phoneme_type": "openjtalk",
65
+ "language": {"code": "ja"},
66
+ "phoneme_id_map": {"_": 0, "\ue00e": 30, "\ue00f": 31}, # PUA mapping
67
+ }
68
+
69
+ assert config_dict["phoneme_type"] == "openjtalk"
70
+ assert config_dict["language"]["code"] == "ja"
71
+
72
+ # Check PUA mappings exist
73
+ pua_count = sum(
74
+ 1
75
+ for k in config_dict["phoneme_id_map"]
76
+ if isinstance(k, str) and ord(k[0]) >= 0xE000
77
+ )
78
+ assert pua_count >= 2
79
+
80
+
81
+ class TestFileHash:
82
+ """Test file hashing utilities"""
83
+
84
+ @pytest.mark.unit
85
+ def test_file_hash_calculation(self, temp_dir):
86
+ """Test file hash calculation"""
87
+ try:
88
+ from piper.file_hash import get_file_hash
89
+ except ImportError:
90
+ pytest.skip("File hash module not available")
91
+
92
+ # Create test file
93
+ test_file = temp_dir / "test.txt"
94
+ test_file.write_text("Hello world")
95
+
96
+ # Calculate hash
97
+ hash1 = get_file_hash(str(test_file))
98
+ assert isinstance(hash1, str)
99
+ assert len(hash1) > 0
100
+
101
+ # Same content should give same hash
102
+ hash2 = get_file_hash(str(test_file))
103
+ assert hash1 == hash2
104
+
105
+ # Different content should give different hash
106
+ test_file.write_text("Different content")
107
+ hash3 = get_file_hash(str(test_file))
108
+ assert hash3 != hash1
@@ -0,0 +1,108 @@
1
+ """
2
+ Tests for existing utility function implementations
3
+ """
4
+
5
+ import pytest
6
+ import numpy as np
7
+ from piper.util import audio_float_to_int16
8
+
9
+
10
+ class TestUtilImplementation:
11
+ """Test the existing utility implementations"""
12
+
13
+ @pytest.mark.unit
14
+ def test_audio_float_to_int16_basic(self):
15
+ """Test basic float to int16 conversion"""
16
+ # The function normalizes based on max absolute value
17
+ float_audio = np.array([0.0, 0.5, -0.5, 1.0, -1.0], dtype=np.float32)
18
+ int16_audio = audio_float_to_int16(float_audio)
19
+
20
+ assert int16_audio.dtype == np.int16
21
+ assert len(int16_audio) == len(float_audio)
22
+
23
+ # With normalization, 1.0 becomes 32767, -1.0 becomes -32767
24
+ assert int16_audio[0] == 0 # 0.0 -> 0
25
+ assert int16_audio[1] > 0 # 0.5 -> positive
26
+ assert int16_audio[2] < 0 # -0.5 -> negative
27
+ assert int16_audio[3] == 32767 # 1.0 -> max int16
28
+ assert int16_audio[4] == -32767 # -1.0 -> -32767 (not -32768)
29
+
30
+ @pytest.mark.unit
31
+ def test_audio_float_to_int16_clipping(self):
32
+ """Test that values are normalized to int16 range"""
33
+ # The function normalizes by max absolute value
34
+ float_audio = np.array([2.0, -2.0, 1.0, -1.0], dtype=np.float32)
35
+ int16_audio = audio_float_to_int16(float_audio)
36
+
37
+ # Should be normalized to int16 range
38
+ assert np.all(int16_audio <= 32767)
39
+ assert np.all(int16_audio >= -32767)
40
+
41
+ # 2.0 is the max, so it becomes 32767
42
+ assert int16_audio[0] == 32767 # 2.0 -> max
43
+ assert int16_audio[1] == -32767 # -2.0 -> min
44
+
45
+ @pytest.mark.unit
46
+ def test_audio_float_to_int16_empty(self):
47
+ """Test conversion of empty array"""
48
+ float_audio = np.array([], dtype=np.float32)
49
+ # Empty array will cause division by zero in normalization
50
+ # This is expected behavior - skip this test
51
+ try:
52
+ audio_float_to_int16(float_audio) # Test that it doesn't crash
53
+ except ValueError:
54
+ # Expected for empty array
55
+ pass
56
+
57
+ @pytest.mark.unit
58
+ def test_audio_float_to_int16_large_array(self):
59
+ """Test conversion of large array"""
60
+ # Generate 1 second of audio at 22050 Hz
61
+ sample_rate = 22050
62
+ duration = 1.0
63
+ t = np.linspace(0, duration, int(sample_rate * duration))
64
+
65
+ # Generate sine wave
66
+ frequency = 440 # A4
67
+ float_audio = np.sin(2 * np.pi * frequency * t).astype(np.float32)
68
+
69
+ int16_audio = audio_float_to_int16(float_audio)
70
+
71
+ assert int16_audio.dtype == np.int16
72
+ assert len(int16_audio) == len(float_audio)
73
+ assert int16_audio.max() <= 32767
74
+ assert int16_audio.min() >= -32768
75
+
76
+ @pytest.mark.unit
77
+ def test_audio_float_to_int16_preserves_silence(self):
78
+ """Test that silence remains silence"""
79
+ # Array of zeros
80
+ float_audio = np.zeros(1000, dtype=np.float32)
81
+ int16_audio = audio_float_to_int16(float_audio)
82
+
83
+ assert np.all(int16_audio == 0)
84
+
85
+ @pytest.mark.unit
86
+ def test_audio_float_to_int16_normalization(self):
87
+ """Test that normalization works correctly"""
88
+ # Single value gets normalized to max
89
+ float_audio = np.array([0.5], dtype=np.float32)
90
+ int16_audio = audio_float_to_int16(float_audio)
91
+ assert int16_audio[0] == 32767 # 0.5 becomes max after normalization
92
+
93
+ # Multiple values get normalized proportionally
94
+ float_audio = np.array([0.5, 0.25, -0.5], dtype=np.float32)
95
+ int16_audio = audio_float_to_int16(float_audio)
96
+ assert int16_audio[0] == 32767 # 0.5 is max
97
+ assert int16_audio[2] == -32767 # -0.5 is min
98
+
99
+ @pytest.mark.unit
100
+ def test_audio_float_to_int16_maintains_shape(self):
101
+ """Test that array shape is preserved"""
102
+ # Test 1D array
103
+ float_audio_1d = np.random.randn(100).astype(np.float32)
104
+ int16_audio_1d = audio_float_to_int16(float_audio_1d)
105
+ assert int16_audio_1d.shape == float_audio_1d.shape
106
+
107
+ # Test that it handles only 1D arrays (most implementations expect 1D)
108
+ # Multi-dimensional should either work or raise appropriate error
@@ -1,15 +0,0 @@
1
- piper/__init__.py,sha256=1-LW3UWmqlEAHJgbHXjIMh6trxrSxs9PSC_xaLsz_ek,63
2
- piper/__main__.py,sha256=aBlkzPedaTFVq2LVTygAU5avjLoLTyvRnfyAdvMkHNQ,5109
3
- piper/config.py,sha256=vZvzGBKaPfsUMVsBSO0Ay8LP0ng1zqgkcQVSFSZODG0,1419
4
- piper/const.py,sha256=m_V4woI8zT5Rwmb9y_Bh1S6ch6HDox7wW95Y28SKauw,106
5
- piper/download.py,sha256=zyF2oyvuZiQr1HiAIBspiQByAcGOkXJfTj_KOecImxU,4604
6
- piper/file_hash.py,sha256=HMuwrgEIg-bCOXHG0wE3vtjrqGD7QaA_UNfvBMXeUcY,1107
7
- piper/http_server.py,sha256=12B9PJCY4UN_fcoH-gunxq7o3obj7NEfpXgo9tESKR4,4065
8
- piper/util.py,sha256=QQnvx_HZGUk9gHCsqifR1ob7-2QRfN9jZMqF0poQS8k,397
9
- piper/voice.py,sha256=jCCoipLyHD44XYA1esE4HLftdPr13nB9i5Zl0MXqKLY,10172
10
- piper/voices.json,sha256=pusxO-rZHvmWXkbRpp05lQCwYns7oKXAU6SIH3wFkUQ,130859
11
- piper_tts_plus-20250627.70451.dist-info/METADATA,sha256=fhTdiep8beIOyrqXrM2WQKFdMO1A9wmVmTBi3uVI-X8,1200
12
- piper_tts_plus-20250627.70451.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
13
- piper_tts_plus-20250627.70451.dist-info/entry_points.txt,sha256=R-zJAXMPM47DHJDk5K0toz7iOATx38RV98fkowMxj0o,46
14
- piper_tts_plus-20250627.70451.dist-info/top_level.txt,sha256=3FjLp04vySCS6YYps6jbChyOvij45fGR-MHPpBFUIlw,6
15
- piper_tts_plus-20250627.70451.dist-info/RECORD,,