piper-tts-plus 0.0.0.dev0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- piper_tts_plus-0.0.0.dev0/MANIFEST.in +2 -0
- piper_tts_plus-0.0.0.dev0/PKG-INFO +32 -0
- piper_tts_plus-0.0.0.dev0/piper/__init__.py +5 -0
- piper_tts_plus-0.0.0.dev0/piper/__main__.py +159 -0
- piper_tts_plus-0.0.0.dev0/piper/config.py +54 -0
- piper_tts_plus-0.0.0.dev0/piper/const.py +5 -0
- piper_tts_plus-0.0.0.dev0/piper/download.py +139 -0
- piper_tts_plus-0.0.0.dev0/piper/file_hash.py +46 -0
- piper_tts_plus-0.0.0.dev0/piper/http_server.py +127 -0
- piper_tts_plus-0.0.0.dev0/piper/util.py +12 -0
- piper_tts_plus-0.0.0.dev0/piper/voice.py +216 -0
- piper_tts_plus-0.0.0.dev0/piper/voices.json +4222 -0
- piper_tts_plus-0.0.0.dev0/piper_tts_plus.egg-info/PKG-INFO +32 -0
- piper_tts_plus-0.0.0.dev0/piper_tts_plus.egg-info/SOURCES.txt +20 -0
- piper_tts_plus-0.0.0.dev0/piper_tts_plus.egg-info/dependency_links.txt +1 -0
- piper_tts_plus-0.0.0.dev0/piper_tts_plus.egg-info/entry_points.txt +2 -0
- piper_tts_plus-0.0.0.dev0/piper_tts_plus.egg-info/requires.txt +8 -0
- piper_tts_plus-0.0.0.dev0/piper_tts_plus.egg-info/top_level.txt +1 -0
- piper_tts_plus-0.0.0.dev0/requirements.txt +2 -0
- piper_tts_plus-0.0.0.dev0/setup.cfg +21 -0
- piper_tts_plus-0.0.0.dev0/setup.py +48 -0
@@ -0,0 +1,32 @@
|
|
1
|
+
Metadata-Version: 2.4
|
2
|
+
Name: piper-tts-plus
|
3
|
+
Version: 0.0.0.dev0
|
4
|
+
Summary: A fast, local neural text to speech system that sounds great and is optimized for the Raspberry Pi 4.
|
5
|
+
Home-page: https://github.com/ayutaz/piper-plus
|
6
|
+
Author: yousan
|
7
|
+
Author-email: rabbitcats77@gmail.com
|
8
|
+
License: MIT
|
9
|
+
Keywords: piper japanese and other languages tts
|
10
|
+
Classifier: Development Status :: 3 - Alpha
|
11
|
+
Classifier: Intended Audience :: Developers
|
12
|
+
Classifier: Topic :: Text Processing :: Linguistic
|
13
|
+
Classifier: License :: OSI Approved :: MIT License
|
14
|
+
Classifier: Programming Language :: Python :: 3.7
|
15
|
+
Classifier: Programming Language :: Python :: 3.8
|
16
|
+
Classifier: Programming Language :: Python :: 3.9
|
17
|
+
Classifier: Programming Language :: Python :: 3.10
|
18
|
+
Requires-Dist: piper-phonemize~=1.1.0
|
19
|
+
Requires-Dist: onnxruntime<2,>=1.11.0
|
20
|
+
Provides-Extra: gpu
|
21
|
+
Requires-Dist: onnxruntime-gpu<2,>=1.11.0; extra == "gpu"
|
22
|
+
Provides-Extra: http
|
23
|
+
Requires-Dist: flask<4,>=3; extra == "http"
|
24
|
+
Dynamic: author
|
25
|
+
Dynamic: author-email
|
26
|
+
Dynamic: classifier
|
27
|
+
Dynamic: home-page
|
28
|
+
Dynamic: keywords
|
29
|
+
Dynamic: license
|
30
|
+
Dynamic: provides-extra
|
31
|
+
Dynamic: requires-dist
|
32
|
+
Dynamic: summary
|
@@ -0,0 +1,159 @@
|
|
1
|
+
import argparse
|
2
|
+
import logging
|
3
|
+
import sys
|
4
|
+
import time
|
5
|
+
import wave
|
6
|
+
from pathlib import Path
|
7
|
+
from typing import Any, Dict
|
8
|
+
|
9
|
+
from . import PiperVoice
|
10
|
+
from .download import ensure_voice_exists, find_voice, get_voices
|
11
|
+
|
12
|
+
_FILE = Path(__file__)
|
13
|
+
_DIR = _FILE.parent
|
14
|
+
_LOGGER = logging.getLogger(_FILE.stem)
|
15
|
+
|
16
|
+
|
17
|
+
def main() -> None:
|
18
|
+
parser = argparse.ArgumentParser()
|
19
|
+
parser.add_argument("-m", "--model", required=True, help="Path to Onnx model file")
|
20
|
+
parser.add_argument("-c", "--config", help="Path to model config file")
|
21
|
+
parser.add_argument(
|
22
|
+
"-f",
|
23
|
+
"--output-file",
|
24
|
+
"--output_file",
|
25
|
+
help="Path to output WAV file (default: stdout)",
|
26
|
+
)
|
27
|
+
parser.add_argument(
|
28
|
+
"-d",
|
29
|
+
"--output-dir",
|
30
|
+
"--output_dir",
|
31
|
+
help="Path to output directory (default: cwd)",
|
32
|
+
)
|
33
|
+
parser.add_argument(
|
34
|
+
"--output-raw",
|
35
|
+
"--output_raw",
|
36
|
+
action="store_true",
|
37
|
+
help="Stream raw audio to stdout",
|
38
|
+
)
|
39
|
+
#
|
40
|
+
parser.add_argument("-s", "--speaker", type=int, help="Id of speaker (default: 0)")
|
41
|
+
parser.add_argument(
|
42
|
+
"--length-scale", "--length_scale", type=float, help="Phoneme length"
|
43
|
+
)
|
44
|
+
parser.add_argument(
|
45
|
+
"--noise-scale", "--noise_scale", type=float, help="Generator noise"
|
46
|
+
)
|
47
|
+
parser.add_argument(
|
48
|
+
"--noise-w", "--noise_w", type=float, help="Phoneme width noise"
|
49
|
+
)
|
50
|
+
#
|
51
|
+
parser.add_argument("--cuda", action="store_true", help="Use GPU")
|
52
|
+
#
|
53
|
+
parser.add_argument(
|
54
|
+
"--sentence-silence",
|
55
|
+
"--sentence_silence",
|
56
|
+
type=float,
|
57
|
+
default=0.0,
|
58
|
+
help="Seconds of silence after each sentence",
|
59
|
+
)
|
60
|
+
#
|
61
|
+
parser.add_argument(
|
62
|
+
"--data-dir",
|
63
|
+
"--data_dir",
|
64
|
+
action="append",
|
65
|
+
default=[str(Path.cwd())],
|
66
|
+
help="Data directory to check for downloaded models (default: current directory)",
|
67
|
+
)
|
68
|
+
parser.add_argument(
|
69
|
+
"--download-dir",
|
70
|
+
"--download_dir",
|
71
|
+
help="Directory to download voices into (default: first data dir)",
|
72
|
+
)
|
73
|
+
#
|
74
|
+
parser.add_argument(
|
75
|
+
"--update-voices",
|
76
|
+
action="store_true",
|
77
|
+
help="Download latest voices.json during startup",
|
78
|
+
)
|
79
|
+
#
|
80
|
+
parser.add_argument(
|
81
|
+
"--debug", action="store_true", help="Print DEBUG messages to console"
|
82
|
+
)
|
83
|
+
args = parser.parse_args()
|
84
|
+
logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO)
|
85
|
+
_LOGGER.debug(args)
|
86
|
+
|
87
|
+
if not args.download_dir:
|
88
|
+
# Download to first data directory by default
|
89
|
+
args.download_dir = args.data_dir[0]
|
90
|
+
|
91
|
+
# Download voice if file doesn't exist
|
92
|
+
model_path = Path(args.model)
|
93
|
+
if not model_path.exists():
|
94
|
+
# Load voice info
|
95
|
+
voices_info = get_voices(args.download_dir, update_voices=args.update_voices)
|
96
|
+
|
97
|
+
# Resolve aliases for backwards compatibility with old voice names
|
98
|
+
aliases_info: Dict[str, Any] = {}
|
99
|
+
for voice_info in voices_info.values():
|
100
|
+
for voice_alias in voice_info.get("aliases", []):
|
101
|
+
aliases_info[voice_alias] = {"_is_alias": True, **voice_info}
|
102
|
+
|
103
|
+
voices_info.update(aliases_info)
|
104
|
+
ensure_voice_exists(args.model, args.data_dir, args.download_dir, voices_info)
|
105
|
+
args.model, args.config = find_voice(args.model, args.data_dir)
|
106
|
+
|
107
|
+
# Load voice
|
108
|
+
voice = PiperVoice.load(args.model, config_path=args.config, use_cuda=args.cuda)
|
109
|
+
synthesize_args = {
|
110
|
+
"speaker_id": args.speaker,
|
111
|
+
"length_scale": args.length_scale,
|
112
|
+
"noise_scale": args.noise_scale,
|
113
|
+
"noise_w": args.noise_w,
|
114
|
+
"sentence_silence": args.sentence_silence,
|
115
|
+
}
|
116
|
+
|
117
|
+
if args.output_raw:
|
118
|
+
# Read line-by-line
|
119
|
+
for line in sys.stdin:
|
120
|
+
line = line.strip()
|
121
|
+
if not line:
|
122
|
+
continue
|
123
|
+
|
124
|
+
# Write raw audio to stdout as its produced
|
125
|
+
audio_stream = voice.synthesize_stream_raw(line, **synthesize_args)
|
126
|
+
for audio_bytes in audio_stream:
|
127
|
+
sys.stdout.buffer.write(audio_bytes)
|
128
|
+
sys.stdout.buffer.flush()
|
129
|
+
elif args.output_dir:
|
130
|
+
output_dir = Path(args.output_dir)
|
131
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
132
|
+
|
133
|
+
# Read line-by-line
|
134
|
+
for line in sys.stdin:
|
135
|
+
line = line.strip()
|
136
|
+
if not line:
|
137
|
+
continue
|
138
|
+
|
139
|
+
wav_path = output_dir / f"{time.monotonic_ns()}.wav"
|
140
|
+
with wave.open(str(wav_path), "wb") as wav_file:
|
141
|
+
voice.synthesize(line, wav_file, **synthesize_args)
|
142
|
+
|
143
|
+
_LOGGER.info("Wrote %s", wav_path)
|
144
|
+
else:
|
145
|
+
# Read entire input
|
146
|
+
text = sys.stdin.read()
|
147
|
+
|
148
|
+
if (not args.output_file) or (args.output_file == "-"):
|
149
|
+
# Write to stdout
|
150
|
+
with wave.open(sys.stdout.buffer, "wb") as wav_file:
|
151
|
+
voice.synthesize(text, wav_file, **synthesize_args)
|
152
|
+
else:
|
153
|
+
# Write to file
|
154
|
+
with wave.open(args.output_file, "wb") as wav_file:
|
155
|
+
voice.synthesize(text, wav_file, **synthesize_args)
|
156
|
+
|
157
|
+
|
158
|
+
if __name__ == "__main__":
|
159
|
+
main()
|
@@ -0,0 +1,54 @@
|
|
1
|
+
"""Piper configuration"""
|
2
|
+
from dataclasses import dataclass
|
3
|
+
from enum import Enum
|
4
|
+
from typing import Any, Dict, Mapping, Sequence
|
5
|
+
|
6
|
+
|
7
|
+
class PhonemeType(str, Enum):
|
8
|
+
ESPEAK = "espeak"
|
9
|
+
TEXT = "text"
|
10
|
+
OPENJTALK = "openjtalk"
|
11
|
+
|
12
|
+
|
13
|
+
@dataclass
|
14
|
+
class PiperConfig:
|
15
|
+
"""Piper configuration"""
|
16
|
+
|
17
|
+
num_symbols: int
|
18
|
+
"""Number of phonemes"""
|
19
|
+
|
20
|
+
num_speakers: int
|
21
|
+
"""Number of speakers"""
|
22
|
+
|
23
|
+
sample_rate: int
|
24
|
+
"""Sample rate of output audio"""
|
25
|
+
|
26
|
+
espeak_voice: str
|
27
|
+
"""Name of espeak-ng voice or alphabet"""
|
28
|
+
|
29
|
+
length_scale: float
|
30
|
+
noise_scale: float
|
31
|
+
noise_w: float
|
32
|
+
|
33
|
+
phoneme_id_map: Mapping[str, Sequence[int]]
|
34
|
+
"""Phoneme -> [id,]"""
|
35
|
+
|
36
|
+
phoneme_type: PhonemeType
|
37
|
+
"""espeak or text"""
|
38
|
+
|
39
|
+
@staticmethod
|
40
|
+
def from_dict(config: Dict[str, Any]) -> "PiperConfig":
|
41
|
+
inference = config.get("inference", {})
|
42
|
+
|
43
|
+
return PiperConfig(
|
44
|
+
num_symbols=config["num_symbols"],
|
45
|
+
num_speakers=config["num_speakers"],
|
46
|
+
sample_rate=config["audio"]["sample_rate"],
|
47
|
+
noise_scale=inference.get("noise_scale", 0.667),
|
48
|
+
length_scale=inference.get("length_scale", 1.0),
|
49
|
+
noise_w=inference.get("noise_w", 0.8),
|
50
|
+
#
|
51
|
+
espeak_voice=config["espeak"]["voice"],
|
52
|
+
phoneme_id_map=config["phoneme_id_map"],
|
53
|
+
phoneme_type=PhonemeType(config.get("phoneme_type", PhonemeType.ESPEAK)),
|
54
|
+
)
|
@@ -0,0 +1,139 @@
|
|
1
|
+
"""Utility for downloading Piper voices."""
|
2
|
+
import json
|
3
|
+
import logging
|
4
|
+
import shutil
|
5
|
+
from pathlib import Path
|
6
|
+
from typing import Any, Dict, Iterable, Set, Tuple, Union
|
7
|
+
from urllib.request import urlopen
|
8
|
+
|
9
|
+
from .file_hash import get_file_hash
|
10
|
+
|
11
|
+
URL_FORMAT = "https://huggingface.co/rhasspy/piper-voices/resolve/v1.0.0/{file}"
|
12
|
+
|
13
|
+
_DIR = Path(__file__).parent
|
14
|
+
_LOGGER = logging.getLogger(__name__)
|
15
|
+
|
16
|
+
_SKIP_FILES = {"MODEL_CARD"}
|
17
|
+
|
18
|
+
|
19
|
+
class VoiceNotFoundError(Exception):
|
20
|
+
pass
|
21
|
+
|
22
|
+
|
23
|
+
def get_voices(
|
24
|
+
download_dir: Union[str, Path], update_voices: bool = False
|
25
|
+
) -> Dict[str, Any]:
|
26
|
+
"""Loads available voices from downloaded or embedded JSON file."""
|
27
|
+
download_dir = Path(download_dir)
|
28
|
+
voices_download = download_dir / "voices.json"
|
29
|
+
|
30
|
+
if update_voices:
|
31
|
+
# Download latest voices.json
|
32
|
+
voices_url = URL_FORMAT.format(file="voices.json")
|
33
|
+
_LOGGER.debug("Downloading %s to %s", voices_url, voices_download)
|
34
|
+
with urlopen(voices_url) as response, open(
|
35
|
+
voices_download, "wb"
|
36
|
+
) as download_file:
|
37
|
+
shutil.copyfileobj(response, download_file)
|
38
|
+
|
39
|
+
# Prefer downloaded file to embedded
|
40
|
+
voices_embedded = _DIR / "voices.json"
|
41
|
+
voices_path = voices_download if voices_download.exists() else voices_embedded
|
42
|
+
|
43
|
+
_LOGGER.debug("Loading %s", voices_path)
|
44
|
+
with open(voices_path, "r", encoding="utf-8") as voices_file:
|
45
|
+
return json.load(voices_file)
|
46
|
+
|
47
|
+
|
48
|
+
def ensure_voice_exists(
|
49
|
+
name: str,
|
50
|
+
data_dirs: Iterable[Union[str, Path]],
|
51
|
+
download_dir: Union[str, Path],
|
52
|
+
voices_info: Dict[str, Any],
|
53
|
+
):
|
54
|
+
assert data_dirs, "No data dirs"
|
55
|
+
if name not in voices_info:
|
56
|
+
raise VoiceNotFoundError(name)
|
57
|
+
|
58
|
+
voice_info = voices_info[name]
|
59
|
+
voice_files = voice_info["files"]
|
60
|
+
files_to_download: Set[str] = set()
|
61
|
+
|
62
|
+
for data_dir in data_dirs:
|
63
|
+
data_dir = Path(data_dir)
|
64
|
+
|
65
|
+
# Check sizes/hashes
|
66
|
+
for file_path, file_info in voice_files.items():
|
67
|
+
if file_path in files_to_download:
|
68
|
+
# Already planning to download
|
69
|
+
continue
|
70
|
+
|
71
|
+
file_name = Path(file_path).name
|
72
|
+
if file_name in _SKIP_FILES:
|
73
|
+
continue
|
74
|
+
|
75
|
+
data_file_path = data_dir / file_name
|
76
|
+
_LOGGER.debug("Checking %s", data_file_path)
|
77
|
+
if not data_file_path.exists():
|
78
|
+
_LOGGER.debug("Missing %s", data_file_path)
|
79
|
+
files_to_download.add(file_path)
|
80
|
+
continue
|
81
|
+
|
82
|
+
expected_size = file_info["size_bytes"]
|
83
|
+
actual_size = data_file_path.stat().st_size
|
84
|
+
if expected_size != actual_size:
|
85
|
+
_LOGGER.warning(
|
86
|
+
"Wrong size (expected=%s, actual=%s) for %s",
|
87
|
+
expected_size,
|
88
|
+
actual_size,
|
89
|
+
data_file_path,
|
90
|
+
)
|
91
|
+
files_to_download.add(file_path)
|
92
|
+
continue
|
93
|
+
|
94
|
+
expected_hash = file_info["md5_digest"]
|
95
|
+
actual_hash = get_file_hash(data_file_path)
|
96
|
+
if expected_hash != actual_hash:
|
97
|
+
_LOGGER.warning(
|
98
|
+
"Wrong hash (expected=%s, actual=%s) for %s",
|
99
|
+
expected_hash,
|
100
|
+
actual_hash,
|
101
|
+
data_file_path,
|
102
|
+
)
|
103
|
+
files_to_download.add(file_path)
|
104
|
+
continue
|
105
|
+
|
106
|
+
if (not voice_files) and (not files_to_download):
|
107
|
+
raise ValueError(f"Unable to find or download voice: {name}")
|
108
|
+
|
109
|
+
# Download missing files
|
110
|
+
download_dir = Path(download_dir)
|
111
|
+
|
112
|
+
for file_path in files_to_download:
|
113
|
+
file_name = Path(file_path).name
|
114
|
+
if file_name in _SKIP_FILES:
|
115
|
+
continue
|
116
|
+
|
117
|
+
file_url = URL_FORMAT.format(file=file_path)
|
118
|
+
download_file_path = download_dir / file_name
|
119
|
+
download_file_path.parent.mkdir(parents=True, exist_ok=True)
|
120
|
+
|
121
|
+
_LOGGER.debug("Downloading %s to %s", file_url, download_file_path)
|
122
|
+
with urlopen(file_url) as response, open(
|
123
|
+
download_file_path, "wb"
|
124
|
+
) as download_file:
|
125
|
+
shutil.copyfileobj(response, download_file)
|
126
|
+
|
127
|
+
_LOGGER.info("Downloaded %s (%s)", download_file_path, file_url)
|
128
|
+
|
129
|
+
|
130
|
+
def find_voice(name: str, data_dirs: Iterable[Union[str, Path]]) -> Tuple[Path, Path]:
|
131
|
+
for data_dir in data_dirs:
|
132
|
+
data_dir = Path(data_dir)
|
133
|
+
onnx_path = data_dir / f"{name}.onnx"
|
134
|
+
config_path = data_dir / f"{name}.onnx.json"
|
135
|
+
|
136
|
+
if onnx_path.exists() and config_path.exists():
|
137
|
+
return onnx_path, config_path
|
138
|
+
|
139
|
+
raise ValueError(f"Missing files for voice {name}")
|
@@ -0,0 +1,46 @@
|
|
1
|
+
import argparse
|
2
|
+
import hashlib
|
3
|
+
import json
|
4
|
+
import sys
|
5
|
+
from pathlib import Path
|
6
|
+
from typing import Union
|
7
|
+
|
8
|
+
|
9
|
+
def get_file_hash(path: Union[str, Path], bytes_per_chunk: int = 8192) -> str:
|
10
|
+
"""Hash a file in chunks using md5."""
|
11
|
+
path_hash = hashlib.md5()
|
12
|
+
with open(path, "rb") as path_file:
|
13
|
+
chunk = path_file.read(bytes_per_chunk)
|
14
|
+
while chunk:
|
15
|
+
path_hash.update(chunk)
|
16
|
+
chunk = path_file.read(bytes_per_chunk)
|
17
|
+
|
18
|
+
return path_hash.hexdigest()
|
19
|
+
|
20
|
+
|
21
|
+
# -----------------------------------------------------------------------------
|
22
|
+
|
23
|
+
|
24
|
+
def main():
|
25
|
+
parser = argparse.ArgumentParser()
|
26
|
+
parser.add_argument("file", nargs="+")
|
27
|
+
parser.add_argument("--dir", help="Parent directory")
|
28
|
+
args = parser.parse_args()
|
29
|
+
|
30
|
+
if args.dir:
|
31
|
+
args.dir = Path(args.dir)
|
32
|
+
|
33
|
+
hashes = {}
|
34
|
+
for path_str in args.file:
|
35
|
+
path = Path(path_str)
|
36
|
+
path_hash = get_file_hash(path)
|
37
|
+
if args.dir:
|
38
|
+
path = path.relative_to(args.dir)
|
39
|
+
|
40
|
+
hashes[str(path)] = path_hash
|
41
|
+
|
42
|
+
json.dump(hashes, sys.stdout)
|
43
|
+
|
44
|
+
|
45
|
+
if __name__ == "__main__":
|
46
|
+
main()
|
@@ -0,0 +1,127 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
import argparse
|
3
|
+
import io
|
4
|
+
import logging
|
5
|
+
import wave
|
6
|
+
from pathlib import Path
|
7
|
+
from typing import Any, Dict
|
8
|
+
|
9
|
+
from flask import Flask, request
|
10
|
+
|
11
|
+
from . import PiperVoice
|
12
|
+
from .download import ensure_voice_exists, find_voice, get_voices
|
13
|
+
|
14
|
+
_LOGGER = logging.getLogger()
|
15
|
+
|
16
|
+
|
17
|
+
def main() -> None:
|
18
|
+
parser = argparse.ArgumentParser()
|
19
|
+
parser.add_argument("--host", default="0.0.0.0", help="HTTP server host")
|
20
|
+
parser.add_argument("--port", type=int, default=5000, help="HTTP server port")
|
21
|
+
#
|
22
|
+
parser.add_argument("-m", "--model", required=True, help="Path to Onnx model file")
|
23
|
+
parser.add_argument("-c", "--config", help="Path to model config file")
|
24
|
+
#
|
25
|
+
parser.add_argument("-s", "--speaker", type=int, help="Id of speaker (default: 0)")
|
26
|
+
parser.add_argument(
|
27
|
+
"--length-scale", "--length_scale", type=float, help="Phoneme length"
|
28
|
+
)
|
29
|
+
parser.add_argument(
|
30
|
+
"--noise-scale", "--noise_scale", type=float, help="Generator noise"
|
31
|
+
)
|
32
|
+
parser.add_argument(
|
33
|
+
"--noise-w", "--noise_w", type=float, help="Phoneme width noise"
|
34
|
+
)
|
35
|
+
#
|
36
|
+
parser.add_argument("--cuda", action="store_true", help="Use GPU")
|
37
|
+
#
|
38
|
+
parser.add_argument(
|
39
|
+
"--sentence-silence",
|
40
|
+
"--sentence_silence",
|
41
|
+
type=float,
|
42
|
+
default=0.0,
|
43
|
+
help="Seconds of silence after each sentence",
|
44
|
+
)
|
45
|
+
#
|
46
|
+
parser.add_argument(
|
47
|
+
"--data-dir",
|
48
|
+
"--data_dir",
|
49
|
+
action="append",
|
50
|
+
default=[str(Path.cwd())],
|
51
|
+
help="Data directory to check for downloaded models (default: current directory)",
|
52
|
+
)
|
53
|
+
parser.add_argument(
|
54
|
+
"--download-dir",
|
55
|
+
"--download_dir",
|
56
|
+
help="Directory to download voices into (default: first data dir)",
|
57
|
+
)
|
58
|
+
#
|
59
|
+
parser.add_argument(
|
60
|
+
"--update-voices",
|
61
|
+
action="store_true",
|
62
|
+
help="Download latest voices.json during startup",
|
63
|
+
)
|
64
|
+
#
|
65
|
+
parser.add_argument(
|
66
|
+
"--debug", action="store_true", help="Print DEBUG messages to console"
|
67
|
+
)
|
68
|
+
args = parser.parse_args()
|
69
|
+
logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO)
|
70
|
+
_LOGGER.debug(args)
|
71
|
+
|
72
|
+
if not args.download_dir:
|
73
|
+
# Download to first data directory by default
|
74
|
+
args.download_dir = args.data_dir[0]
|
75
|
+
|
76
|
+
# Download voice if file doesn't exist
|
77
|
+
model_path = Path(args.model)
|
78
|
+
if not model_path.exists():
|
79
|
+
# Load voice info
|
80
|
+
voices_info = get_voices(args.download_dir, update_voices=args.update_voices)
|
81
|
+
|
82
|
+
# Resolve aliases for backwards compatibility with old voice names
|
83
|
+
aliases_info: Dict[str, Any] = {}
|
84
|
+
for voice_info in voices_info.values():
|
85
|
+
for voice_alias in voice_info.get("aliases", []):
|
86
|
+
aliases_info[voice_alias] = {"_is_alias": True, **voice_info}
|
87
|
+
|
88
|
+
voices_info.update(aliases_info)
|
89
|
+
ensure_voice_exists(args.model, args.data_dir, args.download_dir, voices_info)
|
90
|
+
args.model, args.config = find_voice(args.model, args.data_dir)
|
91
|
+
|
92
|
+
# Load voice
|
93
|
+
voice = PiperVoice.load(args.model, config_path=args.config, use_cuda=args.cuda)
|
94
|
+
synthesize_args = {
|
95
|
+
"speaker_id": args.speaker,
|
96
|
+
"length_scale": args.length_scale,
|
97
|
+
"noise_scale": args.noise_scale,
|
98
|
+
"noise_w": args.noise_w,
|
99
|
+
"sentence_silence": args.sentence_silence,
|
100
|
+
}
|
101
|
+
|
102
|
+
# Create web server
|
103
|
+
app = Flask(__name__)
|
104
|
+
|
105
|
+
@app.route("/", methods=["GET", "POST"])
|
106
|
+
def app_synthesize() -> bytes:
|
107
|
+
if request.method == "POST":
|
108
|
+
text = request.data.decode("utf-8")
|
109
|
+
else:
|
110
|
+
text = request.args.get("text", "")
|
111
|
+
|
112
|
+
text = text.strip()
|
113
|
+
if not text:
|
114
|
+
raise ValueError("No text provided")
|
115
|
+
|
116
|
+
_LOGGER.debug("Synthesizing text: %s", text)
|
117
|
+
with io.BytesIO() as wav_io:
|
118
|
+
with wave.open(wav_io, "wb") as wav_file:
|
119
|
+
voice.synthesize(text, wav_file, **synthesize_args)
|
120
|
+
|
121
|
+
return wav_io.getvalue()
|
122
|
+
|
123
|
+
app.run(host=args.host, port=args.port)
|
124
|
+
|
125
|
+
|
126
|
+
if __name__ == "__main__":
|
127
|
+
main()
|
@@ -0,0 +1,12 @@
|
|
1
|
+
"""Utilities"""
|
2
|
+
import numpy as np
|
3
|
+
|
4
|
+
|
5
|
+
def audio_float_to_int16(
|
6
|
+
audio: np.ndarray, max_wav_value: float = 32767.0
|
7
|
+
) -> np.ndarray:
|
8
|
+
"""Normalize audio and convert to int16 range"""
|
9
|
+
audio_norm = audio * (max_wav_value / max(0.01, np.max(np.abs(audio))))
|
10
|
+
audio_norm = np.clip(audio_norm, -max_wav_value, max_wav_value)
|
11
|
+
audio_norm = audio_norm.astype("int16")
|
12
|
+
return audio_norm
|