phoonnx 0.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- phoonnx/__init__.py +0 -0
- phoonnx/config.py +490 -0
- phoonnx/locale/ca/phonetic_spellings.txt +2 -0
- phoonnx/locale/en/phonetic_spellings.txt +1 -0
- phoonnx/locale/gl/phonetic_spellings.txt +2 -0
- phoonnx/locale/pt/phonetic_spellings.txt +2 -0
- phoonnx/phoneme_ids.py +453 -0
- phoonnx/phonemizers/__init__.py +45 -0
- phoonnx/phonemizers/ar.py +42 -0
- phoonnx/phonemizers/base.py +216 -0
- phoonnx/phonemizers/en.py +250 -0
- phoonnx/phonemizers/fa.py +46 -0
- phoonnx/phonemizers/gl.py +142 -0
- phoonnx/phonemizers/he.py +67 -0
- phoonnx/phonemizers/ja.py +119 -0
- phoonnx/phonemizers/ko.py +97 -0
- phoonnx/phonemizers/mul.py +606 -0
- phoonnx/phonemizers/vi.py +44 -0
- phoonnx/phonemizers/zh.py +308 -0
- phoonnx/thirdparty/__init__.py +0 -0
- phoonnx/thirdparty/arpa2ipa.py +249 -0
- phoonnx/thirdparty/cotovia/cotovia_aarch64 +0 -0
- phoonnx/thirdparty/cotovia/cotovia_x86_64 +0 -0
- phoonnx/thirdparty/hangul2ipa.py +783 -0
- phoonnx/thirdparty/ko_tables/aspiration.csv +20 -0
- phoonnx/thirdparty/ko_tables/assimilation.csv +31 -0
- phoonnx/thirdparty/ko_tables/double_coda.csv +17 -0
- phoonnx/thirdparty/ko_tables/hanja.tsv +8525 -0
- phoonnx/thirdparty/ko_tables/ipa.csv +22 -0
- phoonnx/thirdparty/ko_tables/neutralization.csv +11 -0
- phoonnx/thirdparty/ko_tables/tensification.csv +56 -0
- phoonnx/thirdparty/ko_tables/yale.csv +22 -0
- phoonnx/thirdparty/kog2p/__init__.py +385 -0
- phoonnx/thirdparty/kog2p/rulebook.txt +212 -0
- phoonnx/thirdparty/mantoq/__init__.py +67 -0
- phoonnx/thirdparty/mantoq/buck/__init__.py +0 -0
- phoonnx/thirdparty/mantoq/buck/phonetise_buckwalter.py +569 -0
- phoonnx/thirdparty/mantoq/buck/symbols.py +64 -0
- phoonnx/thirdparty/mantoq/buck/tokenization.py +105 -0
- phoonnx/thirdparty/mantoq/num2words.py +37 -0
- phoonnx/thirdparty/mantoq/pyarabic/__init__.py +12 -0
- phoonnx/thirdparty/mantoq/pyarabic/arabrepr.py +64 -0
- phoonnx/thirdparty/mantoq/pyarabic/araby.py +1647 -0
- phoonnx/thirdparty/mantoq/pyarabic/named_const.py +227 -0
- phoonnx/thirdparty/mantoq/pyarabic/normalize.py +161 -0
- phoonnx/thirdparty/mantoq/pyarabic/number.py +826 -0
- phoonnx/thirdparty/mantoq/pyarabic/number_const.py +1704 -0
- phoonnx/thirdparty/mantoq/pyarabic/stack.py +52 -0
- phoonnx/thirdparty/mantoq/pyarabic/trans.py +517 -0
- phoonnx/thirdparty/mantoq/unicode_symbol2label.py +4173 -0
- phoonnx/thirdparty/tashkeel/LICENSE +22 -0
- phoonnx/thirdparty/tashkeel/SOURCE +1 -0
- phoonnx/thirdparty/tashkeel/__init__.py +212 -0
- phoonnx/thirdparty/tashkeel/hint_id_map.json +18 -0
- phoonnx/thirdparty/tashkeel/input_id_map.json +56 -0
- phoonnx/thirdparty/tashkeel/model.onnx +0 -0
- phoonnx/thirdparty/tashkeel/target_id_map.json +17 -0
- phoonnx/thirdparty/zh_num.py +238 -0
- phoonnx/util.py +705 -0
- phoonnx/version.py +6 -0
- phoonnx/voice.py +521 -0
- phoonnx-0.0.0.dist-info/METADATA +255 -0
- phoonnx-0.0.0.dist-info/RECORD +86 -0
- phoonnx-0.0.0.dist-info/WHEEL +5 -0
- phoonnx-0.0.0.dist-info/top_level.txt +2 -0
- phoonnx_train/__main__.py +151 -0
- phoonnx_train/export_onnx.py +109 -0
- phoonnx_train/norm_audio/__init__.py +92 -0
- phoonnx_train/norm_audio/trim.py +54 -0
- phoonnx_train/norm_audio/vad.py +54 -0
- phoonnx_train/preprocess.py +420 -0
- phoonnx_train/vits/__init__.py +0 -0
- phoonnx_train/vits/attentions.py +427 -0
- phoonnx_train/vits/commons.py +147 -0
- phoonnx_train/vits/config.py +330 -0
- phoonnx_train/vits/dataset.py +214 -0
- phoonnx_train/vits/lightning.py +352 -0
- phoonnx_train/vits/losses.py +58 -0
- phoonnx_train/vits/mel_processing.py +139 -0
- phoonnx_train/vits/models.py +732 -0
- phoonnx_train/vits/modules.py +527 -0
- phoonnx_train/vits/monotonic_align/__init__.py +20 -0
- phoonnx_train/vits/monotonic_align/setup.py +13 -0
- phoonnx_train/vits/transforms.py +212 -0
- phoonnx_train/vits/utils.py +16 -0
- phoonnx_train/vits/wavfile.py +860 -0
@@ -0,0 +1,212 @@
|
|
1
|
+
import numpy as np
|
2
|
+
import torch
|
3
|
+
from torch.nn import functional as F
|
4
|
+
|
5
|
+
DEFAULT_MIN_BIN_WIDTH = 1e-3
|
6
|
+
DEFAULT_MIN_BIN_HEIGHT = 1e-3
|
7
|
+
DEFAULT_MIN_DERIVATIVE = 1e-3
|
8
|
+
|
9
|
+
|
10
|
+
def piecewise_rational_quadratic_transform(
|
11
|
+
inputs,
|
12
|
+
unnormalized_widths,
|
13
|
+
unnormalized_heights,
|
14
|
+
unnormalized_derivatives,
|
15
|
+
inverse=False,
|
16
|
+
tails=None,
|
17
|
+
tail_bound=1.0,
|
18
|
+
min_bin_width=DEFAULT_MIN_BIN_WIDTH,
|
19
|
+
min_bin_height=DEFAULT_MIN_BIN_HEIGHT,
|
20
|
+
min_derivative=DEFAULT_MIN_DERIVATIVE,
|
21
|
+
):
|
22
|
+
|
23
|
+
if tails is None:
|
24
|
+
spline_fn = rational_quadratic_spline
|
25
|
+
spline_kwargs = {}
|
26
|
+
else:
|
27
|
+
spline_fn = unconstrained_rational_quadratic_spline
|
28
|
+
spline_kwargs = {"tails": tails, "tail_bound": tail_bound}
|
29
|
+
|
30
|
+
outputs, logabsdet = spline_fn(
|
31
|
+
inputs=inputs,
|
32
|
+
unnormalized_widths=unnormalized_widths,
|
33
|
+
unnormalized_heights=unnormalized_heights,
|
34
|
+
unnormalized_derivatives=unnormalized_derivatives,
|
35
|
+
inverse=inverse,
|
36
|
+
min_bin_width=min_bin_width,
|
37
|
+
min_bin_height=min_bin_height,
|
38
|
+
min_derivative=min_derivative,
|
39
|
+
**spline_kwargs
|
40
|
+
)
|
41
|
+
return outputs, logabsdet
|
42
|
+
|
43
|
+
|
44
|
+
def searchsorted(bin_locations, inputs, eps=1e-6):
|
45
|
+
# bin_locations[..., -1] += eps
|
46
|
+
bin_locations[..., bin_locations.size(-1) - 1] += eps
|
47
|
+
return torch.sum(inputs[..., None] >= bin_locations, dim=-1) - 1
|
48
|
+
|
49
|
+
|
50
|
+
def unconstrained_rational_quadratic_spline(
|
51
|
+
inputs,
|
52
|
+
unnormalized_widths,
|
53
|
+
unnormalized_heights,
|
54
|
+
unnormalized_derivatives,
|
55
|
+
inverse=False,
|
56
|
+
tails="linear",
|
57
|
+
tail_bound=1.0,
|
58
|
+
min_bin_width=DEFAULT_MIN_BIN_WIDTH,
|
59
|
+
min_bin_height=DEFAULT_MIN_BIN_HEIGHT,
|
60
|
+
min_derivative=DEFAULT_MIN_DERIVATIVE,
|
61
|
+
):
|
62
|
+
inside_interval_mask = (inputs >= -tail_bound) & (inputs <= tail_bound)
|
63
|
+
outside_interval_mask = ~inside_interval_mask
|
64
|
+
|
65
|
+
outputs = torch.zeros_like(inputs)
|
66
|
+
logabsdet = torch.zeros_like(inputs)
|
67
|
+
|
68
|
+
if tails == "linear":
|
69
|
+
unnormalized_derivatives = F.pad(unnormalized_derivatives, pad=(1, 1))
|
70
|
+
constant = np.log(np.exp(1 - min_derivative) - 1)
|
71
|
+
unnormalized_derivatives[..., 0] = constant
|
72
|
+
# unnormalized_derivatives[..., -1] = constant
|
73
|
+
unnormalized_derivatives[..., unnormalized_derivatives.size(-1) - 1] = constant
|
74
|
+
|
75
|
+
outputs[outside_interval_mask] = inputs[outside_interval_mask]
|
76
|
+
logabsdet[outside_interval_mask] = 0
|
77
|
+
else:
|
78
|
+
raise RuntimeError("{} tails are not implemented.".format(tails))
|
79
|
+
|
80
|
+
(
|
81
|
+
outputs[inside_interval_mask],
|
82
|
+
logabsdet[inside_interval_mask],
|
83
|
+
) = rational_quadratic_spline(
|
84
|
+
inputs=inputs[inside_interval_mask],
|
85
|
+
unnormalized_widths=unnormalized_widths[inside_interval_mask, :],
|
86
|
+
unnormalized_heights=unnormalized_heights[inside_interval_mask, :],
|
87
|
+
unnormalized_derivatives=unnormalized_derivatives[inside_interval_mask, :],
|
88
|
+
inverse=inverse,
|
89
|
+
left=-tail_bound,
|
90
|
+
right=tail_bound,
|
91
|
+
bottom=-tail_bound,
|
92
|
+
top=tail_bound,
|
93
|
+
min_bin_width=min_bin_width,
|
94
|
+
min_bin_height=min_bin_height,
|
95
|
+
min_derivative=min_derivative,
|
96
|
+
)
|
97
|
+
|
98
|
+
return outputs, logabsdet
|
99
|
+
|
100
|
+
|
101
|
+
def rational_quadratic_spline(
|
102
|
+
inputs,
|
103
|
+
unnormalized_widths,
|
104
|
+
unnormalized_heights,
|
105
|
+
unnormalized_derivatives,
|
106
|
+
inverse=False,
|
107
|
+
left=0.0,
|
108
|
+
right=1.0,
|
109
|
+
bottom=0.0,
|
110
|
+
top=1.0,
|
111
|
+
min_bin_width=DEFAULT_MIN_BIN_WIDTH,
|
112
|
+
min_bin_height=DEFAULT_MIN_BIN_HEIGHT,
|
113
|
+
min_derivative=DEFAULT_MIN_DERIVATIVE,
|
114
|
+
):
|
115
|
+
# if torch.min(inputs) < left or torch.max(inputs) > right:
|
116
|
+
# raise ValueError("Input to a transform is not within its domain")
|
117
|
+
|
118
|
+
num_bins = unnormalized_widths.shape[-1]
|
119
|
+
|
120
|
+
# if min_bin_width * num_bins > 1.0:
|
121
|
+
# raise ValueError("Minimal bin width too large for the number of bins")
|
122
|
+
# if min_bin_height * num_bins > 1.0:
|
123
|
+
# raise ValueError("Minimal bin height too large for the number of bins")
|
124
|
+
|
125
|
+
widths = F.softmax(unnormalized_widths, dim=-1)
|
126
|
+
widths = min_bin_width + (1 - min_bin_width * num_bins) * widths
|
127
|
+
cumwidths = torch.cumsum(widths, dim=-1)
|
128
|
+
cumwidths = F.pad(cumwidths, pad=(1, 0), mode="constant", value=0.0)
|
129
|
+
cumwidths = (right - left) * cumwidths + left
|
130
|
+
cumwidths[..., 0] = left
|
131
|
+
# cumwidths[..., -1] = right
|
132
|
+
cumwidths[..., cumwidths.size(-1) - 1] = right
|
133
|
+
widths = cumwidths[..., 1:] - cumwidths[..., :-1]
|
134
|
+
|
135
|
+
derivatives = min_derivative + F.softplus(unnormalized_derivatives)
|
136
|
+
|
137
|
+
heights = F.softmax(unnormalized_heights, dim=-1)
|
138
|
+
heights = min_bin_height + (1 - min_bin_height * num_bins) * heights
|
139
|
+
cumheights = torch.cumsum(heights, dim=-1)
|
140
|
+
cumheights = F.pad(cumheights, pad=(1, 0), mode="constant", value=0.0)
|
141
|
+
cumheights = (top - bottom) * cumheights + bottom
|
142
|
+
cumheights[..., 0] = bottom
|
143
|
+
# cumheights[..., -1] = top
|
144
|
+
cumheights[..., cumheights.size(-1) - 1] = top
|
145
|
+
heights = cumheights[..., 1:] - cumheights[..., :-1]
|
146
|
+
|
147
|
+
if inverse:
|
148
|
+
bin_idx = searchsorted(cumheights, inputs)[..., None]
|
149
|
+
else:
|
150
|
+
bin_idx = searchsorted(cumwidths, inputs)[..., None]
|
151
|
+
|
152
|
+
input_cumwidths = cumwidths.gather(-1, bin_idx)[..., 0]
|
153
|
+
input_bin_widths = widths.gather(-1, bin_idx)[..., 0]
|
154
|
+
|
155
|
+
input_cumheights = cumheights.gather(-1, bin_idx)[..., 0]
|
156
|
+
delta = heights / widths
|
157
|
+
input_delta = delta.gather(-1, bin_idx)[..., 0]
|
158
|
+
|
159
|
+
input_derivatives = derivatives.gather(-1, bin_idx)[..., 0]
|
160
|
+
input_derivatives_plus_one = derivatives[..., 1:].gather(-1, bin_idx)[..., 0]
|
161
|
+
|
162
|
+
input_heights = heights.gather(-1, bin_idx)[..., 0]
|
163
|
+
|
164
|
+
if inverse:
|
165
|
+
a = (inputs - input_cumheights) * (
|
166
|
+
input_derivatives + input_derivatives_plus_one - 2 * input_delta
|
167
|
+
) + input_heights * (input_delta - input_derivatives)
|
168
|
+
b = input_heights * input_derivatives - (inputs - input_cumheights) * (
|
169
|
+
input_derivatives + input_derivatives_plus_one - 2 * input_delta
|
170
|
+
)
|
171
|
+
c = -input_delta * (inputs - input_cumheights)
|
172
|
+
|
173
|
+
discriminant = b.pow(2) - 4 * a * c
|
174
|
+
assert (discriminant >= 0).all(), discriminant
|
175
|
+
|
176
|
+
root = (2 * c) / (-b - torch.sqrt(discriminant))
|
177
|
+
outputs = root * input_bin_widths + input_cumwidths
|
178
|
+
|
179
|
+
theta_one_minus_theta = root * (1 - root)
|
180
|
+
denominator = input_delta + (
|
181
|
+
(input_derivatives + input_derivatives_plus_one - 2 * input_delta)
|
182
|
+
* theta_one_minus_theta
|
183
|
+
)
|
184
|
+
derivative_numerator = input_delta.pow(2) * (
|
185
|
+
input_derivatives_plus_one * root.pow(2)
|
186
|
+
+ 2 * input_delta * theta_one_minus_theta
|
187
|
+
+ input_derivatives * (1 - root).pow(2)
|
188
|
+
)
|
189
|
+
logabsdet = torch.log(derivative_numerator) - 2 * torch.log(denominator)
|
190
|
+
|
191
|
+
return outputs, -logabsdet
|
192
|
+
|
193
|
+
theta = (inputs - input_cumwidths) / input_bin_widths
|
194
|
+
theta_one_minus_theta = theta * (1 - theta)
|
195
|
+
|
196
|
+
numerator = input_heights * (
|
197
|
+
input_delta * theta.pow(2) + input_derivatives * theta_one_minus_theta
|
198
|
+
)
|
199
|
+
denominator = input_delta + (
|
200
|
+
(input_derivatives + input_derivatives_plus_one - 2 * input_delta)
|
201
|
+
* theta_one_minus_theta
|
202
|
+
)
|
203
|
+
outputs = input_cumheights + numerator / denominator
|
204
|
+
|
205
|
+
derivative_numerator = input_delta.pow(2) * (
|
206
|
+
input_derivatives_plus_one * theta.pow(2)
|
207
|
+
+ 2 * input_delta * theta_one_minus_theta
|
208
|
+
+ input_derivatives * (1 - theta).pow(2)
|
209
|
+
)
|
210
|
+
logabsdet = torch.log(derivative_numerator) - 2 * torch.log(denominator)
|
211
|
+
|
212
|
+
return outputs, logabsdet
|
@@ -0,0 +1,16 @@
|
|
1
|
+
import numpy as np
|
2
|
+
import torch
|
3
|
+
|
4
|
+
|
5
|
+
def to_gpu(x: torch.Tensor) -> torch.Tensor:
|
6
|
+
return x.contiguous().cuda(non_blocking=True)
|
7
|
+
|
8
|
+
|
9
|
+
def audio_float_to_int16(
|
10
|
+
audio: np.ndarray, max_wav_value: float = 32767.0
|
11
|
+
) -> np.ndarray:
|
12
|
+
"""Normalize audio and convert to int16 range"""
|
13
|
+
audio_norm = audio * (max_wav_value / max(0.01, np.max(np.abs(audio))))
|
14
|
+
audio_norm = np.clip(audio_norm, -max_wav_value, max_wav_value)
|
15
|
+
audio_norm = audio_norm.astype("int16")
|
16
|
+
return audio_norm
|