phoonnx 0.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. phoonnx/__init__.py +0 -0
  2. phoonnx/config.py +490 -0
  3. phoonnx/locale/ca/phonetic_spellings.txt +2 -0
  4. phoonnx/locale/en/phonetic_spellings.txt +1 -0
  5. phoonnx/locale/gl/phonetic_spellings.txt +2 -0
  6. phoonnx/locale/pt/phonetic_spellings.txt +2 -0
  7. phoonnx/phoneme_ids.py +453 -0
  8. phoonnx/phonemizers/__init__.py +45 -0
  9. phoonnx/phonemizers/ar.py +42 -0
  10. phoonnx/phonemizers/base.py +216 -0
  11. phoonnx/phonemizers/en.py +250 -0
  12. phoonnx/phonemizers/fa.py +46 -0
  13. phoonnx/phonemizers/gl.py +142 -0
  14. phoonnx/phonemizers/he.py +67 -0
  15. phoonnx/phonemizers/ja.py +119 -0
  16. phoonnx/phonemizers/ko.py +97 -0
  17. phoonnx/phonemizers/mul.py +606 -0
  18. phoonnx/phonemizers/vi.py +44 -0
  19. phoonnx/phonemizers/zh.py +308 -0
  20. phoonnx/thirdparty/__init__.py +0 -0
  21. phoonnx/thirdparty/arpa2ipa.py +249 -0
  22. phoonnx/thirdparty/cotovia/cotovia_aarch64 +0 -0
  23. phoonnx/thirdparty/cotovia/cotovia_x86_64 +0 -0
  24. phoonnx/thirdparty/hangul2ipa.py +783 -0
  25. phoonnx/thirdparty/ko_tables/aspiration.csv +20 -0
  26. phoonnx/thirdparty/ko_tables/assimilation.csv +31 -0
  27. phoonnx/thirdparty/ko_tables/double_coda.csv +17 -0
  28. phoonnx/thirdparty/ko_tables/hanja.tsv +8525 -0
  29. phoonnx/thirdparty/ko_tables/ipa.csv +22 -0
  30. phoonnx/thirdparty/ko_tables/neutralization.csv +11 -0
  31. phoonnx/thirdparty/ko_tables/tensification.csv +56 -0
  32. phoonnx/thirdparty/ko_tables/yale.csv +22 -0
  33. phoonnx/thirdparty/kog2p/__init__.py +385 -0
  34. phoonnx/thirdparty/kog2p/rulebook.txt +212 -0
  35. phoonnx/thirdparty/mantoq/__init__.py +67 -0
  36. phoonnx/thirdparty/mantoq/buck/__init__.py +0 -0
  37. phoonnx/thirdparty/mantoq/buck/phonetise_buckwalter.py +569 -0
  38. phoonnx/thirdparty/mantoq/buck/symbols.py +64 -0
  39. phoonnx/thirdparty/mantoq/buck/tokenization.py +105 -0
  40. phoonnx/thirdparty/mantoq/num2words.py +37 -0
  41. phoonnx/thirdparty/mantoq/pyarabic/__init__.py +12 -0
  42. phoonnx/thirdparty/mantoq/pyarabic/arabrepr.py +64 -0
  43. phoonnx/thirdparty/mantoq/pyarabic/araby.py +1647 -0
  44. phoonnx/thirdparty/mantoq/pyarabic/named_const.py +227 -0
  45. phoonnx/thirdparty/mantoq/pyarabic/normalize.py +161 -0
  46. phoonnx/thirdparty/mantoq/pyarabic/number.py +826 -0
  47. phoonnx/thirdparty/mantoq/pyarabic/number_const.py +1704 -0
  48. phoonnx/thirdparty/mantoq/pyarabic/stack.py +52 -0
  49. phoonnx/thirdparty/mantoq/pyarabic/trans.py +517 -0
  50. phoonnx/thirdparty/mantoq/unicode_symbol2label.py +4173 -0
  51. phoonnx/thirdparty/tashkeel/LICENSE +22 -0
  52. phoonnx/thirdparty/tashkeel/SOURCE +1 -0
  53. phoonnx/thirdparty/tashkeel/__init__.py +212 -0
  54. phoonnx/thirdparty/tashkeel/hint_id_map.json +18 -0
  55. phoonnx/thirdparty/tashkeel/input_id_map.json +56 -0
  56. phoonnx/thirdparty/tashkeel/model.onnx +0 -0
  57. phoonnx/thirdparty/tashkeel/target_id_map.json +17 -0
  58. phoonnx/thirdparty/zh_num.py +238 -0
  59. phoonnx/util.py +705 -0
  60. phoonnx/version.py +6 -0
  61. phoonnx/voice.py +521 -0
  62. phoonnx-0.0.0.dist-info/METADATA +255 -0
  63. phoonnx-0.0.0.dist-info/RECORD +86 -0
  64. phoonnx-0.0.0.dist-info/WHEEL +5 -0
  65. phoonnx-0.0.0.dist-info/top_level.txt +2 -0
  66. phoonnx_train/__main__.py +151 -0
  67. phoonnx_train/export_onnx.py +109 -0
  68. phoonnx_train/norm_audio/__init__.py +92 -0
  69. phoonnx_train/norm_audio/trim.py +54 -0
  70. phoonnx_train/norm_audio/vad.py +54 -0
  71. phoonnx_train/preprocess.py +420 -0
  72. phoonnx_train/vits/__init__.py +0 -0
  73. phoonnx_train/vits/attentions.py +427 -0
  74. phoonnx_train/vits/commons.py +147 -0
  75. phoonnx_train/vits/config.py +330 -0
  76. phoonnx_train/vits/dataset.py +214 -0
  77. phoonnx_train/vits/lightning.py +352 -0
  78. phoonnx_train/vits/losses.py +58 -0
  79. phoonnx_train/vits/mel_processing.py +139 -0
  80. phoonnx_train/vits/models.py +732 -0
  81. phoonnx_train/vits/modules.py +527 -0
  82. phoonnx_train/vits/monotonic_align/__init__.py +20 -0
  83. phoonnx_train/vits/monotonic_align/setup.py +13 -0
  84. phoonnx_train/vits/transforms.py +212 -0
  85. phoonnx_train/vits/utils.py +16 -0
  86. phoonnx_train/vits/wavfile.py +860 -0
@@ -0,0 +1,527 @@
1
+ import math
2
+ import typing
3
+
4
+ import torch
5
+ from torch import nn
6
+ from torch.nn import Conv1d
7
+ from torch.nn import functional as F
8
+ from torch.nn.utils import remove_weight_norm, weight_norm
9
+
10
+ from .commons import fused_add_tanh_sigmoid_multiply, get_padding, init_weights
11
+ from .transforms import piecewise_rational_quadratic_transform
12
+
13
+
14
+ class LayerNorm(nn.Module):
15
+ def __init__(self, channels: int, eps: float = 1e-5):
16
+ super().__init__()
17
+ self.channels = channels
18
+ self.eps = eps
19
+
20
+ self.gamma = nn.Parameter(torch.ones(channels))
21
+ self.beta = nn.Parameter(torch.zeros(channels))
22
+
23
+ def forward(self, x):
24
+ x = x.transpose(1, -1)
25
+ x = F.layer_norm(x, (self.channels,), self.gamma, self.beta, self.eps)
26
+ return x.transpose(1, -1)
27
+
28
+
29
+ class ConvReluNorm(nn.Module):
30
+ def __init__(
31
+ self,
32
+ in_channels: int,
33
+ hidden_channels: int,
34
+ out_channels: int,
35
+ kernel_size: int,
36
+ n_layers: int,
37
+ p_dropout: float,
38
+ ):
39
+ super().__init__()
40
+ self.in_channels = in_channels
41
+ self.hidden_channels = hidden_channels
42
+ self.out_channels = out_channels
43
+ self.kernel_size = kernel_size
44
+ self.n_layers = n_layers
45
+ self.p_dropout = p_dropout
46
+ assert n_layers > 1, "Number of layers should be larger than 0."
47
+
48
+ self.conv_layers = nn.ModuleList()
49
+ self.norm_layers = nn.ModuleList()
50
+ self.conv_layers.append(
51
+ nn.Conv1d(
52
+ in_channels, hidden_channels, kernel_size, padding=kernel_size // 2
53
+ )
54
+ )
55
+ self.norm_layers.append(LayerNorm(hidden_channels))
56
+ self.relu_drop = nn.Sequential(nn.ReLU(), nn.Dropout(p_dropout))
57
+ for _ in range(n_layers - 1):
58
+ self.conv_layers.append(
59
+ nn.Conv1d(
60
+ hidden_channels,
61
+ hidden_channels,
62
+ kernel_size,
63
+ padding=kernel_size // 2,
64
+ )
65
+ )
66
+ self.norm_layers.append(LayerNorm(hidden_channels))
67
+ self.proj = nn.Conv1d(hidden_channels, out_channels, 1)
68
+ self.proj.weight.data.zero_()
69
+ self.proj.bias.data.zero_()
70
+
71
+ def forward(self, x, x_mask):
72
+ x_org = x
73
+ for i in range(self.n_layers):
74
+ x = self.conv_layers[i](x * x_mask)
75
+ x = self.norm_layers[i](x)
76
+ x = self.relu_drop(x)
77
+ x = x_org + self.proj(x)
78
+ return x * x_mask
79
+
80
+
81
+ class DDSConv(nn.Module):
82
+ """
83
+ Dialted and Depth-Separable Convolution
84
+ """
85
+
86
+ def __init__(
87
+ self, channels: int, kernel_size: int, n_layers: int, p_dropout: float = 0.0
88
+ ):
89
+ super().__init__()
90
+ self.channels = channels
91
+ self.kernel_size = kernel_size
92
+ self.n_layers = n_layers
93
+ self.p_dropout = p_dropout
94
+
95
+ self.drop = nn.Dropout(p_dropout)
96
+ self.convs_sep = nn.ModuleList()
97
+ self.convs_1x1 = nn.ModuleList()
98
+ self.norms_1 = nn.ModuleList()
99
+ self.norms_2 = nn.ModuleList()
100
+ for i in range(n_layers):
101
+ dilation = kernel_size**i
102
+ padding = (kernel_size * dilation - dilation) // 2
103
+ self.convs_sep.append(
104
+ nn.Conv1d(
105
+ channels,
106
+ channels,
107
+ kernel_size,
108
+ groups=channels,
109
+ dilation=dilation,
110
+ padding=padding,
111
+ )
112
+ )
113
+ self.convs_1x1.append(nn.Conv1d(channels, channels, 1))
114
+ self.norms_1.append(LayerNorm(channels))
115
+ self.norms_2.append(LayerNorm(channels))
116
+
117
+ def forward(self, x, x_mask, g=None):
118
+ if g is not None:
119
+ x = x + g
120
+ for i in range(self.n_layers):
121
+ y = self.convs_sep[i](x * x_mask)
122
+ y = self.norms_1[i](y)
123
+ y = F.gelu(y)
124
+ y = self.convs_1x1[i](y)
125
+ y = self.norms_2[i](y)
126
+ y = F.gelu(y)
127
+ y = self.drop(y)
128
+ x = x + y
129
+ return x * x_mask
130
+
131
+
132
+ class WN(torch.nn.Module):
133
+ def __init__(
134
+ self,
135
+ hidden_channels: int,
136
+ kernel_size: int,
137
+ dilation_rate: int,
138
+ n_layers: int,
139
+ gin_channels: int = 0,
140
+ p_dropout: float = 0,
141
+ ):
142
+ super().__init__()
143
+ assert kernel_size % 2 == 1
144
+ self.hidden_channels = hidden_channels
145
+ self.kernel_size = (kernel_size,)
146
+ self.dilation_rate = dilation_rate
147
+ self.n_layers = n_layers
148
+ self.gin_channels = gin_channels
149
+ self.p_dropout = p_dropout
150
+
151
+ self.in_layers = torch.nn.ModuleList()
152
+ self.res_skip_layers = torch.nn.ModuleList()
153
+ self.drop = nn.Dropout(p_dropout)
154
+
155
+ if gin_channels != 0:
156
+ cond_layer = torch.nn.Conv1d(
157
+ gin_channels, 2 * hidden_channels * n_layers, 1
158
+ )
159
+ self.cond_layer = torch.nn.utils.weight_norm(cond_layer, name="weight")
160
+
161
+ for i in range(n_layers):
162
+ dilation = dilation_rate**i
163
+ padding = int((kernel_size * dilation - dilation) / 2)
164
+ in_layer = torch.nn.Conv1d(
165
+ hidden_channels,
166
+ 2 * hidden_channels,
167
+ kernel_size,
168
+ dilation=dilation,
169
+ padding=padding,
170
+ )
171
+ in_layer = torch.nn.utils.weight_norm(in_layer, name="weight")
172
+ self.in_layers.append(in_layer)
173
+
174
+ # last one is not necessary
175
+ if i < n_layers - 1:
176
+ res_skip_channels = 2 * hidden_channels
177
+ else:
178
+ res_skip_channels = hidden_channels
179
+
180
+ res_skip_layer = torch.nn.Conv1d(hidden_channels, res_skip_channels, 1)
181
+ res_skip_layer = torch.nn.utils.weight_norm(res_skip_layer, name="weight")
182
+ self.res_skip_layers.append(res_skip_layer)
183
+
184
+ def forward(self, x, x_mask, g=None, **kwargs):
185
+ output = torch.zeros_like(x)
186
+ n_channels_tensor = torch.IntTensor([self.hidden_channels])
187
+
188
+ if g is not None:
189
+ g = self.cond_layer(g)
190
+
191
+ for i in range(self.n_layers):
192
+ x_in = self.in_layers[i](x)
193
+ if g is not None:
194
+ cond_offset = i * 2 * self.hidden_channels
195
+ g_l = g[:, cond_offset : cond_offset + 2 * self.hidden_channels, :]
196
+ else:
197
+ g_l = torch.zeros_like(x_in)
198
+
199
+ acts = fused_add_tanh_sigmoid_multiply(x_in, g_l, n_channels_tensor)
200
+ acts = self.drop(acts)
201
+
202
+ res_skip_acts = self.res_skip_layers[i](acts)
203
+ if i < self.n_layers - 1:
204
+ res_acts = res_skip_acts[:, : self.hidden_channels, :]
205
+ x = (x + res_acts) * x_mask
206
+ output = output + res_skip_acts[:, self.hidden_channels :, :]
207
+ else:
208
+ output = output + res_skip_acts
209
+ return output * x_mask
210
+
211
+ def remove_weight_norm(self):
212
+ if self.gin_channels != 0:
213
+ torch.nn.utils.remove_weight_norm(self.cond_layer)
214
+ for l in self.in_layers:
215
+ torch.nn.utils.remove_weight_norm(l)
216
+ for l in self.res_skip_layers:
217
+ torch.nn.utils.remove_weight_norm(l)
218
+
219
+
220
+ class ResBlock1(torch.nn.Module):
221
+ def __init__(
222
+ self,
223
+ channels: int,
224
+ kernel_size: int = 3,
225
+ dilation: typing.Tuple[int] = (1, 3, 5),
226
+ ):
227
+ super(ResBlock1, self).__init__()
228
+ self.LRELU_SLOPE = 0.1
229
+ self.convs1 = nn.ModuleList(
230
+ [
231
+ weight_norm(
232
+ Conv1d(
233
+ channels,
234
+ channels,
235
+ kernel_size,
236
+ 1,
237
+ dilation=dilation[0],
238
+ padding=get_padding(kernel_size, dilation[0]),
239
+ )
240
+ ),
241
+ weight_norm(
242
+ Conv1d(
243
+ channels,
244
+ channels,
245
+ kernel_size,
246
+ 1,
247
+ dilation=dilation[1],
248
+ padding=get_padding(kernel_size, dilation[1]),
249
+ )
250
+ ),
251
+ weight_norm(
252
+ Conv1d(
253
+ channels,
254
+ channels,
255
+ kernel_size,
256
+ 1,
257
+ dilation=dilation[2],
258
+ padding=get_padding(kernel_size, dilation[2]),
259
+ )
260
+ ),
261
+ ]
262
+ )
263
+ self.convs1.apply(init_weights)
264
+
265
+ self.convs2 = nn.ModuleList(
266
+ [
267
+ weight_norm(
268
+ Conv1d(
269
+ channels,
270
+ channels,
271
+ kernel_size,
272
+ 1,
273
+ dilation=1,
274
+ padding=get_padding(kernel_size, 1),
275
+ )
276
+ ),
277
+ weight_norm(
278
+ Conv1d(
279
+ channels,
280
+ channels,
281
+ kernel_size,
282
+ 1,
283
+ dilation=1,
284
+ padding=get_padding(kernel_size, 1),
285
+ )
286
+ ),
287
+ weight_norm(
288
+ Conv1d(
289
+ channels,
290
+ channels,
291
+ kernel_size,
292
+ 1,
293
+ dilation=1,
294
+ padding=get_padding(kernel_size, 1),
295
+ )
296
+ ),
297
+ ]
298
+ )
299
+ self.convs2.apply(init_weights)
300
+
301
+ def forward(self, x, x_mask=None):
302
+ for c1, c2 in zip(self.convs1, self.convs2):
303
+ xt = F.leaky_relu(x, self.LRELU_SLOPE)
304
+ if x_mask is not None:
305
+ xt = xt * x_mask
306
+ xt = c1(xt)
307
+ xt = F.leaky_relu(xt, self.LRELU_SLOPE)
308
+ if x_mask is not None:
309
+ xt = xt * x_mask
310
+ xt = c2(xt)
311
+ x = xt + x
312
+ if x_mask is not None:
313
+ x = x * x_mask
314
+ return x
315
+
316
+ def remove_weight_norm(self):
317
+ for l in self.convs1:
318
+ remove_weight_norm(l)
319
+ for l in self.convs2:
320
+ remove_weight_norm(l)
321
+
322
+
323
+ class ResBlock2(torch.nn.Module):
324
+ def __init__(
325
+ self, channels: int, kernel_size: int = 3, dilation: typing.Tuple[int] = (1, 3)
326
+ ):
327
+ super(ResBlock2, self).__init__()
328
+ self.LRELU_SLOPE = 0.1
329
+ self.convs = nn.ModuleList(
330
+ [
331
+ weight_norm(
332
+ Conv1d(
333
+ channels,
334
+ channels,
335
+ kernel_size,
336
+ 1,
337
+ dilation=dilation[0],
338
+ padding=get_padding(kernel_size, dilation[0]),
339
+ )
340
+ ),
341
+ weight_norm(
342
+ Conv1d(
343
+ channels,
344
+ channels,
345
+ kernel_size,
346
+ 1,
347
+ dilation=dilation[1],
348
+ padding=get_padding(kernel_size, dilation[1]),
349
+ )
350
+ ),
351
+ ]
352
+ )
353
+ self.convs.apply(init_weights)
354
+
355
+ def forward(self, x, x_mask=None):
356
+ for c in self.convs:
357
+ xt = F.leaky_relu(x, self.LRELU_SLOPE)
358
+ if x_mask is not None:
359
+ xt = xt * x_mask
360
+ xt = c(xt)
361
+ x = xt + x
362
+ if x_mask is not None:
363
+ x = x * x_mask
364
+ return x
365
+
366
+ def remove_weight_norm(self):
367
+ for l in self.convs:
368
+ remove_weight_norm(l)
369
+
370
+
371
+ class Log(nn.Module):
372
+ def forward(
373
+ self, x: torch.Tensor, x_mask: torch.Tensor, reverse: bool = False, **kwargs
374
+ ):
375
+ if not reverse:
376
+ y = torch.log(torch.clamp_min(x, 1e-5)) * x_mask
377
+ logdet = torch.sum(-y, [1, 2])
378
+ return y, logdet
379
+ else:
380
+ x = torch.exp(x) * x_mask
381
+ return x
382
+
383
+
384
+ class Flip(nn.Module):
385
+ def forward(self, x: torch.Tensor, *args, reverse: bool = False, **kwargs):
386
+ x = torch.flip(x, [1])
387
+ if not reverse:
388
+ logdet = torch.zeros(x.size(0)).type_as(x)
389
+ return x, logdet
390
+ else:
391
+ return x
392
+
393
+
394
+ class ElementwiseAffine(nn.Module):
395
+ def __init__(self, channels: int):
396
+ super().__init__()
397
+ self.channels = channels
398
+ self.m = nn.Parameter(torch.zeros(channels, 1))
399
+ self.logs = nn.Parameter(torch.zeros(channels, 1))
400
+
401
+ def forward(self, x, x_mask, reverse=False, **kwargs):
402
+ if not reverse:
403
+ y = self.m + torch.exp(self.logs) * x
404
+ y = y * x_mask
405
+ logdet = torch.sum(self.logs * x_mask, [1, 2])
406
+ return y, logdet
407
+ else:
408
+ x = (x - self.m) * torch.exp(-self.logs) * x_mask
409
+ return x
410
+
411
+
412
+ class ResidualCouplingLayer(nn.Module):
413
+ def __init__(
414
+ self,
415
+ channels: int,
416
+ hidden_channels: int,
417
+ kernel_size: int,
418
+ dilation_rate: int,
419
+ n_layers: int,
420
+ p_dropout: float = 0,
421
+ gin_channels: int = 0,
422
+ mean_only: bool = False,
423
+ ):
424
+ assert channels % 2 == 0, "channels should be divisible by 2"
425
+ super().__init__()
426
+ self.channels = channels
427
+ self.hidden_channels = hidden_channels
428
+ self.kernel_size = kernel_size
429
+ self.dilation_rate = dilation_rate
430
+ self.n_layers = n_layers
431
+ self.half_channels = channels // 2
432
+ self.mean_only = mean_only
433
+
434
+ self.pre = nn.Conv1d(self.half_channels, hidden_channels, 1)
435
+ self.enc = WN(
436
+ hidden_channels,
437
+ kernel_size,
438
+ dilation_rate,
439
+ n_layers,
440
+ p_dropout=p_dropout,
441
+ gin_channels=gin_channels,
442
+ )
443
+ self.post = nn.Conv1d(hidden_channels, self.half_channels * (2 - mean_only), 1)
444
+ self.post.weight.data.zero_()
445
+ self.post.bias.data.zero_()
446
+
447
+ def forward(self, x, x_mask, g=None, reverse=False):
448
+ x0, x1 = torch.split(x, [self.half_channels] * 2, 1)
449
+ h = self.pre(x0) * x_mask
450
+ h = self.enc(h, x_mask, g=g)
451
+ stats = self.post(h) * x_mask
452
+ if not self.mean_only:
453
+ m, logs = torch.split(stats, [self.half_channels] * 2, 1)
454
+ else:
455
+ m = stats
456
+ logs = torch.zeros_like(m)
457
+
458
+ if not reverse:
459
+ x1 = m + x1 * torch.exp(logs) * x_mask
460
+ x = torch.cat([x0, x1], 1)
461
+ logdet = torch.sum(logs, [1, 2])
462
+ return x, logdet
463
+ else:
464
+ x1 = (x1 - m) * torch.exp(-logs) * x_mask
465
+ x = torch.cat([x0, x1], 1)
466
+ return x
467
+
468
+
469
+ class ConvFlow(nn.Module):
470
+ def __init__(
471
+ self,
472
+ in_channels: int,
473
+ filter_channels: int,
474
+ kernel_size: int,
475
+ n_layers: int,
476
+ num_bins: int = 10,
477
+ tail_bound: float = 5.0,
478
+ ):
479
+ super().__init__()
480
+ self.in_channels = in_channels
481
+ self.filter_channels = filter_channels
482
+ self.kernel_size = kernel_size
483
+ self.n_layers = n_layers
484
+ self.num_bins = num_bins
485
+ self.tail_bound = tail_bound
486
+ self.half_channels = in_channels // 2
487
+
488
+ self.pre = nn.Conv1d(self.half_channels, filter_channels, 1)
489
+ self.convs = DDSConv(filter_channels, kernel_size, n_layers, p_dropout=0.0)
490
+ self.proj = nn.Conv1d(
491
+ filter_channels, self.half_channels * (num_bins * 3 - 1), 1
492
+ )
493
+ self.proj.weight.data.zero_()
494
+ self.proj.bias.data.zero_()
495
+
496
+ def forward(self, x, x_mask, g=None, reverse=False):
497
+ x0, x1 = torch.split(x, [self.half_channels] * 2, 1)
498
+ h = self.pre(x0)
499
+ h = self.convs(h, x_mask, g=g)
500
+ h = self.proj(h) * x_mask
501
+
502
+ b, c, t = x0.shape
503
+ h = h.reshape(b, c, -1, t).permute(0, 1, 3, 2) # [b, cx?, t] -> [b, c, t, ?]
504
+
505
+ unnormalized_widths = h[..., : self.num_bins] / math.sqrt(self.filter_channels)
506
+ unnormalized_heights = h[..., self.num_bins : 2 * self.num_bins] / math.sqrt(
507
+ self.filter_channels
508
+ )
509
+ unnormalized_derivatives = h[..., 2 * self.num_bins :]
510
+
511
+ x1, logabsdet = piecewise_rational_quadratic_transform(
512
+ x1,
513
+ unnormalized_widths,
514
+ unnormalized_heights,
515
+ unnormalized_derivatives,
516
+ inverse=reverse,
517
+ tails="linear",
518
+ tail_bound=self.tail_bound,
519
+ )
520
+
521
+ x = torch.cat([x0, x1], 1) * x_mask
522
+
523
+ logdet = torch.sum(logabsdet * x_mask, [1, 2])
524
+ if not reverse:
525
+ return x, logdet
526
+ else:
527
+ return x
@@ -0,0 +1,20 @@
1
+ import numpy as np
2
+ import torch
3
+
4
+ from .monotonic_align.core import maximum_path_c
5
+
6
+
7
+ def maximum_path(neg_cent, mask):
8
+ """Cython optimized version.
9
+ neg_cent: [b, t_t, t_s]
10
+ mask: [b, t_t, t_s]
11
+ """
12
+ device = neg_cent.device
13
+ dtype = neg_cent.dtype
14
+ neg_cent = neg_cent.data.cpu().numpy().astype(np.float32)
15
+ path = np.zeros(neg_cent.shape, dtype=np.int32)
16
+
17
+ t_t_max = mask.sum(1)[:, 0].data.cpu().numpy().astype(np.int32)
18
+ t_s_max = mask.sum(2)[:, 0].data.cpu().numpy().astype(np.int32)
19
+ maximum_path_c(path, neg_cent, t_t_max, t_s_max)
20
+ return torch.from_numpy(path).to(device=device, dtype=dtype)
@@ -0,0 +1,13 @@
1
+ from distutils.core import setup
2
+ from pathlib import Path
3
+
4
+ import numpy
5
+ from Cython.Build import cythonize
6
+
7
+ _DIR = Path(__file__).parent
8
+
9
+ setup(
10
+ name="monotonic_align",
11
+ ext_modules=cythonize(str(_DIR / "core.pyx")),
12
+ include_dirs=[numpy.get_include()],
13
+ )