phoonnx 0.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. phoonnx/__init__.py +0 -0
  2. phoonnx/config.py +490 -0
  3. phoonnx/locale/ca/phonetic_spellings.txt +2 -0
  4. phoonnx/locale/en/phonetic_spellings.txt +1 -0
  5. phoonnx/locale/gl/phonetic_spellings.txt +2 -0
  6. phoonnx/locale/pt/phonetic_spellings.txt +2 -0
  7. phoonnx/phoneme_ids.py +453 -0
  8. phoonnx/phonemizers/__init__.py +45 -0
  9. phoonnx/phonemizers/ar.py +42 -0
  10. phoonnx/phonemizers/base.py +216 -0
  11. phoonnx/phonemizers/en.py +250 -0
  12. phoonnx/phonemizers/fa.py +46 -0
  13. phoonnx/phonemizers/gl.py +142 -0
  14. phoonnx/phonemizers/he.py +67 -0
  15. phoonnx/phonemizers/ja.py +119 -0
  16. phoonnx/phonemizers/ko.py +97 -0
  17. phoonnx/phonemizers/mul.py +606 -0
  18. phoonnx/phonemizers/vi.py +44 -0
  19. phoonnx/phonemizers/zh.py +308 -0
  20. phoonnx/thirdparty/__init__.py +0 -0
  21. phoonnx/thirdparty/arpa2ipa.py +249 -0
  22. phoonnx/thirdparty/cotovia/cotovia_aarch64 +0 -0
  23. phoonnx/thirdparty/cotovia/cotovia_x86_64 +0 -0
  24. phoonnx/thirdparty/hangul2ipa.py +783 -0
  25. phoonnx/thirdparty/ko_tables/aspiration.csv +20 -0
  26. phoonnx/thirdparty/ko_tables/assimilation.csv +31 -0
  27. phoonnx/thirdparty/ko_tables/double_coda.csv +17 -0
  28. phoonnx/thirdparty/ko_tables/hanja.tsv +8525 -0
  29. phoonnx/thirdparty/ko_tables/ipa.csv +22 -0
  30. phoonnx/thirdparty/ko_tables/neutralization.csv +11 -0
  31. phoonnx/thirdparty/ko_tables/tensification.csv +56 -0
  32. phoonnx/thirdparty/ko_tables/yale.csv +22 -0
  33. phoonnx/thirdparty/kog2p/__init__.py +385 -0
  34. phoonnx/thirdparty/kog2p/rulebook.txt +212 -0
  35. phoonnx/thirdparty/mantoq/__init__.py +67 -0
  36. phoonnx/thirdparty/mantoq/buck/__init__.py +0 -0
  37. phoonnx/thirdparty/mantoq/buck/phonetise_buckwalter.py +569 -0
  38. phoonnx/thirdparty/mantoq/buck/symbols.py +64 -0
  39. phoonnx/thirdparty/mantoq/buck/tokenization.py +105 -0
  40. phoonnx/thirdparty/mantoq/num2words.py +37 -0
  41. phoonnx/thirdparty/mantoq/pyarabic/__init__.py +12 -0
  42. phoonnx/thirdparty/mantoq/pyarabic/arabrepr.py +64 -0
  43. phoonnx/thirdparty/mantoq/pyarabic/araby.py +1647 -0
  44. phoonnx/thirdparty/mantoq/pyarabic/named_const.py +227 -0
  45. phoonnx/thirdparty/mantoq/pyarabic/normalize.py +161 -0
  46. phoonnx/thirdparty/mantoq/pyarabic/number.py +826 -0
  47. phoonnx/thirdparty/mantoq/pyarabic/number_const.py +1704 -0
  48. phoonnx/thirdparty/mantoq/pyarabic/stack.py +52 -0
  49. phoonnx/thirdparty/mantoq/pyarabic/trans.py +517 -0
  50. phoonnx/thirdparty/mantoq/unicode_symbol2label.py +4173 -0
  51. phoonnx/thirdparty/tashkeel/LICENSE +22 -0
  52. phoonnx/thirdparty/tashkeel/SOURCE +1 -0
  53. phoonnx/thirdparty/tashkeel/__init__.py +212 -0
  54. phoonnx/thirdparty/tashkeel/hint_id_map.json +18 -0
  55. phoonnx/thirdparty/tashkeel/input_id_map.json +56 -0
  56. phoonnx/thirdparty/tashkeel/model.onnx +0 -0
  57. phoonnx/thirdparty/tashkeel/target_id_map.json +17 -0
  58. phoonnx/thirdparty/zh_num.py +238 -0
  59. phoonnx/util.py +705 -0
  60. phoonnx/version.py +6 -0
  61. phoonnx/voice.py +521 -0
  62. phoonnx-0.0.0.dist-info/METADATA +255 -0
  63. phoonnx-0.0.0.dist-info/RECORD +86 -0
  64. phoonnx-0.0.0.dist-info/WHEEL +5 -0
  65. phoonnx-0.0.0.dist-info/top_level.txt +2 -0
  66. phoonnx_train/__main__.py +151 -0
  67. phoonnx_train/export_onnx.py +109 -0
  68. phoonnx_train/norm_audio/__init__.py +92 -0
  69. phoonnx_train/norm_audio/trim.py +54 -0
  70. phoonnx_train/norm_audio/vad.py +54 -0
  71. phoonnx_train/preprocess.py +420 -0
  72. phoonnx_train/vits/__init__.py +0 -0
  73. phoonnx_train/vits/attentions.py +427 -0
  74. phoonnx_train/vits/commons.py +147 -0
  75. phoonnx_train/vits/config.py +330 -0
  76. phoonnx_train/vits/dataset.py +214 -0
  77. phoonnx_train/vits/lightning.py +352 -0
  78. phoonnx_train/vits/losses.py +58 -0
  79. phoonnx_train/vits/mel_processing.py +139 -0
  80. phoonnx_train/vits/models.py +732 -0
  81. phoonnx_train/vits/modules.py +527 -0
  82. phoonnx_train/vits/monotonic_align/__init__.py +20 -0
  83. phoonnx_train/vits/monotonic_align/setup.py +13 -0
  84. phoonnx_train/vits/transforms.py +212 -0
  85. phoonnx_train/vits/utils.py +16 -0
  86. phoonnx_train/vits/wavfile.py +860 -0
@@ -0,0 +1,212 @@
1
+ import numpy as np
2
+ import torch
3
+ from torch.nn import functional as F
4
+
5
+ DEFAULT_MIN_BIN_WIDTH = 1e-3
6
+ DEFAULT_MIN_BIN_HEIGHT = 1e-3
7
+ DEFAULT_MIN_DERIVATIVE = 1e-3
8
+
9
+
10
+ def piecewise_rational_quadratic_transform(
11
+ inputs,
12
+ unnormalized_widths,
13
+ unnormalized_heights,
14
+ unnormalized_derivatives,
15
+ inverse=False,
16
+ tails=None,
17
+ tail_bound=1.0,
18
+ min_bin_width=DEFAULT_MIN_BIN_WIDTH,
19
+ min_bin_height=DEFAULT_MIN_BIN_HEIGHT,
20
+ min_derivative=DEFAULT_MIN_DERIVATIVE,
21
+ ):
22
+
23
+ if tails is None:
24
+ spline_fn = rational_quadratic_spline
25
+ spline_kwargs = {}
26
+ else:
27
+ spline_fn = unconstrained_rational_quadratic_spline
28
+ spline_kwargs = {"tails": tails, "tail_bound": tail_bound}
29
+
30
+ outputs, logabsdet = spline_fn(
31
+ inputs=inputs,
32
+ unnormalized_widths=unnormalized_widths,
33
+ unnormalized_heights=unnormalized_heights,
34
+ unnormalized_derivatives=unnormalized_derivatives,
35
+ inverse=inverse,
36
+ min_bin_width=min_bin_width,
37
+ min_bin_height=min_bin_height,
38
+ min_derivative=min_derivative,
39
+ **spline_kwargs
40
+ )
41
+ return outputs, logabsdet
42
+
43
+
44
+ def searchsorted(bin_locations, inputs, eps=1e-6):
45
+ # bin_locations[..., -1] += eps
46
+ bin_locations[..., bin_locations.size(-1) - 1] += eps
47
+ return torch.sum(inputs[..., None] >= bin_locations, dim=-1) - 1
48
+
49
+
50
+ def unconstrained_rational_quadratic_spline(
51
+ inputs,
52
+ unnormalized_widths,
53
+ unnormalized_heights,
54
+ unnormalized_derivatives,
55
+ inverse=False,
56
+ tails="linear",
57
+ tail_bound=1.0,
58
+ min_bin_width=DEFAULT_MIN_BIN_WIDTH,
59
+ min_bin_height=DEFAULT_MIN_BIN_HEIGHT,
60
+ min_derivative=DEFAULT_MIN_DERIVATIVE,
61
+ ):
62
+ inside_interval_mask = (inputs >= -tail_bound) & (inputs <= tail_bound)
63
+ outside_interval_mask = ~inside_interval_mask
64
+
65
+ outputs = torch.zeros_like(inputs)
66
+ logabsdet = torch.zeros_like(inputs)
67
+
68
+ if tails == "linear":
69
+ unnormalized_derivatives = F.pad(unnormalized_derivatives, pad=(1, 1))
70
+ constant = np.log(np.exp(1 - min_derivative) - 1)
71
+ unnormalized_derivatives[..., 0] = constant
72
+ # unnormalized_derivatives[..., -1] = constant
73
+ unnormalized_derivatives[..., unnormalized_derivatives.size(-1) - 1] = constant
74
+
75
+ outputs[outside_interval_mask] = inputs[outside_interval_mask]
76
+ logabsdet[outside_interval_mask] = 0
77
+ else:
78
+ raise RuntimeError("{} tails are not implemented.".format(tails))
79
+
80
+ (
81
+ outputs[inside_interval_mask],
82
+ logabsdet[inside_interval_mask],
83
+ ) = rational_quadratic_spline(
84
+ inputs=inputs[inside_interval_mask],
85
+ unnormalized_widths=unnormalized_widths[inside_interval_mask, :],
86
+ unnormalized_heights=unnormalized_heights[inside_interval_mask, :],
87
+ unnormalized_derivatives=unnormalized_derivatives[inside_interval_mask, :],
88
+ inverse=inverse,
89
+ left=-tail_bound,
90
+ right=tail_bound,
91
+ bottom=-tail_bound,
92
+ top=tail_bound,
93
+ min_bin_width=min_bin_width,
94
+ min_bin_height=min_bin_height,
95
+ min_derivative=min_derivative,
96
+ )
97
+
98
+ return outputs, logabsdet
99
+
100
+
101
+ def rational_quadratic_spline(
102
+ inputs,
103
+ unnormalized_widths,
104
+ unnormalized_heights,
105
+ unnormalized_derivatives,
106
+ inverse=False,
107
+ left=0.0,
108
+ right=1.0,
109
+ bottom=0.0,
110
+ top=1.0,
111
+ min_bin_width=DEFAULT_MIN_BIN_WIDTH,
112
+ min_bin_height=DEFAULT_MIN_BIN_HEIGHT,
113
+ min_derivative=DEFAULT_MIN_DERIVATIVE,
114
+ ):
115
+ # if torch.min(inputs) < left or torch.max(inputs) > right:
116
+ # raise ValueError("Input to a transform is not within its domain")
117
+
118
+ num_bins = unnormalized_widths.shape[-1]
119
+
120
+ # if min_bin_width * num_bins > 1.0:
121
+ # raise ValueError("Minimal bin width too large for the number of bins")
122
+ # if min_bin_height * num_bins > 1.0:
123
+ # raise ValueError("Minimal bin height too large for the number of bins")
124
+
125
+ widths = F.softmax(unnormalized_widths, dim=-1)
126
+ widths = min_bin_width + (1 - min_bin_width * num_bins) * widths
127
+ cumwidths = torch.cumsum(widths, dim=-1)
128
+ cumwidths = F.pad(cumwidths, pad=(1, 0), mode="constant", value=0.0)
129
+ cumwidths = (right - left) * cumwidths + left
130
+ cumwidths[..., 0] = left
131
+ # cumwidths[..., -1] = right
132
+ cumwidths[..., cumwidths.size(-1) - 1] = right
133
+ widths = cumwidths[..., 1:] - cumwidths[..., :-1]
134
+
135
+ derivatives = min_derivative + F.softplus(unnormalized_derivatives)
136
+
137
+ heights = F.softmax(unnormalized_heights, dim=-1)
138
+ heights = min_bin_height + (1 - min_bin_height * num_bins) * heights
139
+ cumheights = torch.cumsum(heights, dim=-1)
140
+ cumheights = F.pad(cumheights, pad=(1, 0), mode="constant", value=0.0)
141
+ cumheights = (top - bottom) * cumheights + bottom
142
+ cumheights[..., 0] = bottom
143
+ # cumheights[..., -1] = top
144
+ cumheights[..., cumheights.size(-1) - 1] = top
145
+ heights = cumheights[..., 1:] - cumheights[..., :-1]
146
+
147
+ if inverse:
148
+ bin_idx = searchsorted(cumheights, inputs)[..., None]
149
+ else:
150
+ bin_idx = searchsorted(cumwidths, inputs)[..., None]
151
+
152
+ input_cumwidths = cumwidths.gather(-1, bin_idx)[..., 0]
153
+ input_bin_widths = widths.gather(-1, bin_idx)[..., 0]
154
+
155
+ input_cumheights = cumheights.gather(-1, bin_idx)[..., 0]
156
+ delta = heights / widths
157
+ input_delta = delta.gather(-1, bin_idx)[..., 0]
158
+
159
+ input_derivatives = derivatives.gather(-1, bin_idx)[..., 0]
160
+ input_derivatives_plus_one = derivatives[..., 1:].gather(-1, bin_idx)[..., 0]
161
+
162
+ input_heights = heights.gather(-1, bin_idx)[..., 0]
163
+
164
+ if inverse:
165
+ a = (inputs - input_cumheights) * (
166
+ input_derivatives + input_derivatives_plus_one - 2 * input_delta
167
+ ) + input_heights * (input_delta - input_derivatives)
168
+ b = input_heights * input_derivatives - (inputs - input_cumheights) * (
169
+ input_derivatives + input_derivatives_plus_one - 2 * input_delta
170
+ )
171
+ c = -input_delta * (inputs - input_cumheights)
172
+
173
+ discriminant = b.pow(2) - 4 * a * c
174
+ assert (discriminant >= 0).all(), discriminant
175
+
176
+ root = (2 * c) / (-b - torch.sqrt(discriminant))
177
+ outputs = root * input_bin_widths + input_cumwidths
178
+
179
+ theta_one_minus_theta = root * (1 - root)
180
+ denominator = input_delta + (
181
+ (input_derivatives + input_derivatives_plus_one - 2 * input_delta)
182
+ * theta_one_minus_theta
183
+ )
184
+ derivative_numerator = input_delta.pow(2) * (
185
+ input_derivatives_plus_one * root.pow(2)
186
+ + 2 * input_delta * theta_one_minus_theta
187
+ + input_derivatives * (1 - root).pow(2)
188
+ )
189
+ logabsdet = torch.log(derivative_numerator) - 2 * torch.log(denominator)
190
+
191
+ return outputs, -logabsdet
192
+
193
+ theta = (inputs - input_cumwidths) / input_bin_widths
194
+ theta_one_minus_theta = theta * (1 - theta)
195
+
196
+ numerator = input_heights * (
197
+ input_delta * theta.pow(2) + input_derivatives * theta_one_minus_theta
198
+ )
199
+ denominator = input_delta + (
200
+ (input_derivatives + input_derivatives_plus_one - 2 * input_delta)
201
+ * theta_one_minus_theta
202
+ )
203
+ outputs = input_cumheights + numerator / denominator
204
+
205
+ derivative_numerator = input_delta.pow(2) * (
206
+ input_derivatives_plus_one * theta.pow(2)
207
+ + 2 * input_delta * theta_one_minus_theta
208
+ + input_derivatives * (1 - theta).pow(2)
209
+ )
210
+ logabsdet = torch.log(derivative_numerator) - 2 * torch.log(denominator)
211
+
212
+ return outputs, logabsdet
@@ -0,0 +1,16 @@
1
+ import numpy as np
2
+ import torch
3
+
4
+
5
+ def to_gpu(x: torch.Tensor) -> torch.Tensor:
6
+ return x.contiguous().cuda(non_blocking=True)
7
+
8
+
9
+ def audio_float_to_int16(
10
+ audio: np.ndarray, max_wav_value: float = 32767.0
11
+ ) -> np.ndarray:
12
+ """Normalize audio and convert to int16 range"""
13
+ audio_norm = audio * (max_wav_value / max(0.01, np.max(np.abs(audio))))
14
+ audio_norm = np.clip(audio_norm, -max_wav_value, max_wav_value)
15
+ audio_norm = audio_norm.astype("int16")
16
+ return audio_norm