PyPI - phoonnx - Versions diffs - 0.0.0__py3-none-any.whl - Mend

phoonnx 0.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (86) hide show

phoonnx/__init__.py +0 -0
phoonnx/config.py +490 -0
phoonnx/locale/ca/phonetic_spellings.txt +2 -0
phoonnx/locale/en/phonetic_spellings.txt +1 -0
phoonnx/locale/gl/phonetic_spellings.txt +2 -0
phoonnx/locale/pt/phonetic_spellings.txt +2 -0
phoonnx/phoneme_ids.py +453 -0
phoonnx/phonemizers/__init__.py +45 -0
phoonnx/phonemizers/ar.py +42 -0
phoonnx/phonemizers/base.py +216 -0
phoonnx/phonemizers/en.py +250 -0
phoonnx/phonemizers/fa.py +46 -0
phoonnx/phonemizers/gl.py +142 -0
phoonnx/phonemizers/he.py +67 -0
phoonnx/phonemizers/ja.py +119 -0
phoonnx/phonemizers/ko.py +97 -0
phoonnx/phonemizers/mul.py +606 -0
phoonnx/phonemizers/vi.py +44 -0
phoonnx/phonemizers/zh.py +308 -0
phoonnx/thirdparty/__init__.py +0 -0
phoonnx/thirdparty/arpa2ipa.py +249 -0
phoonnx/thirdparty/cotovia/cotovia_aarch64 +0 -0
phoonnx/thirdparty/cotovia/cotovia_x86_64 +0 -0
phoonnx/thirdparty/hangul2ipa.py +783 -0
phoonnx/thirdparty/ko_tables/aspiration.csv +20 -0
phoonnx/thirdparty/ko_tables/assimilation.csv +31 -0
phoonnx/thirdparty/ko_tables/double_coda.csv +17 -0
phoonnx/thirdparty/ko_tables/hanja.tsv +8525 -0
phoonnx/thirdparty/ko_tables/ipa.csv +22 -0
phoonnx/thirdparty/ko_tables/neutralization.csv +11 -0
phoonnx/thirdparty/ko_tables/tensification.csv +56 -0
phoonnx/thirdparty/ko_tables/yale.csv +22 -0
phoonnx/thirdparty/kog2p/__init__.py +385 -0
phoonnx/thirdparty/kog2p/rulebook.txt +212 -0
phoonnx/thirdparty/mantoq/__init__.py +67 -0
phoonnx/thirdparty/mantoq/buck/__init__.py +0 -0
phoonnx/thirdparty/mantoq/buck/phonetise_buckwalter.py +569 -0
phoonnx/thirdparty/mantoq/buck/symbols.py +64 -0
phoonnx/thirdparty/mantoq/buck/tokenization.py +105 -0
phoonnx/thirdparty/mantoq/num2words.py +37 -0
phoonnx/thirdparty/mantoq/pyarabic/__init__.py +12 -0
phoonnx/thirdparty/mantoq/pyarabic/arabrepr.py +64 -0
phoonnx/thirdparty/mantoq/pyarabic/araby.py +1647 -0
phoonnx/thirdparty/mantoq/pyarabic/named_const.py +227 -0
phoonnx/thirdparty/mantoq/pyarabic/normalize.py +161 -0
phoonnx/thirdparty/mantoq/pyarabic/number.py +826 -0
phoonnx/thirdparty/mantoq/pyarabic/number_const.py +1704 -0
phoonnx/thirdparty/mantoq/pyarabic/stack.py +52 -0
phoonnx/thirdparty/mantoq/pyarabic/trans.py +517 -0
phoonnx/thirdparty/mantoq/unicode_symbol2label.py +4173 -0
phoonnx/thirdparty/tashkeel/LICENSE +22 -0
phoonnx/thirdparty/tashkeel/SOURCE +1 -0
phoonnx/thirdparty/tashkeel/__init__.py +212 -0
phoonnx/thirdparty/tashkeel/hint_id_map.json +18 -0
phoonnx/thirdparty/tashkeel/input_id_map.json +56 -0
phoonnx/thirdparty/tashkeel/model.onnx +0 -0
phoonnx/thirdparty/tashkeel/target_id_map.json +17 -0
phoonnx/thirdparty/zh_num.py +238 -0
phoonnx/util.py +705 -0
phoonnx/version.py +6 -0
phoonnx/voice.py +521 -0
phoonnx-0.0.0.dist-info/METADATA +255 -0
phoonnx-0.0.0.dist-info/RECORD +86 -0
phoonnx-0.0.0.dist-info/WHEEL +5 -0
phoonnx-0.0.0.dist-info/top_level.txt +2 -0
phoonnx_train/__main__.py +151 -0
phoonnx_train/export_onnx.py +109 -0
phoonnx_train/norm_audio/__init__.py +92 -0
phoonnx_train/norm_audio/trim.py +54 -0
phoonnx_train/norm_audio/vad.py +54 -0
phoonnx_train/preprocess.py +420 -0
phoonnx_train/vits/__init__.py +0 -0
phoonnx_train/vits/attentions.py +427 -0
phoonnx_train/vits/commons.py +147 -0
phoonnx_train/vits/config.py +330 -0
phoonnx_train/vits/dataset.py +214 -0
phoonnx_train/vits/lightning.py +352 -0
phoonnx_train/vits/losses.py +58 -0
phoonnx_train/vits/mel_processing.py +139 -0
phoonnx_train/vits/models.py +732 -0
phoonnx_train/vits/modules.py +527 -0
phoonnx_train/vits/monotonic_align/__init__.py +20 -0
phoonnx_train/vits/monotonic_align/setup.py +13 -0
phoonnx_train/vits/transforms.py +212 -0
phoonnx_train/vits/utils.py +16 -0
phoonnx_train/vits/wavfile.py +860 -0

phoonnx_train/vits/modules.py ADDED Viewed

@@ -0,0 +1,527 @@
+import math
+import typing
+import torch
+from torch import nn
+from torch.nn import Conv1d
+from torch.nn import functional as F
+from torch.nn.utils import remove_weight_norm, weight_norm
+from .commons import fused_add_tanh_sigmoid_multiply, get_padding, init_weights
+from .transforms import piecewise_rational_quadratic_transform
+class LayerNorm(nn.Module):
+    def __init__(self, channels: int, eps: float = 1e-5):
+        super().__init__()
+        self.channels = channels
+        self.eps = eps
+        self.gamma = nn.Parameter(torch.ones(channels))
+        self.beta = nn.Parameter(torch.zeros(channels))
+    def forward(self, x):
+        x = x.transpose(1, -1)
+        x = F.layer_norm(x, (self.channels,), self.gamma, self.beta, self.eps)
+        return x.transpose(1, -1)
+class ConvReluNorm(nn.Module):
+    def __init__(
+        self,
+        in_channels: int,
+        hidden_channels: int,
+        out_channels: int,
+        kernel_size: int,
+        n_layers: int,
+        p_dropout: float,
+    ):
+        super().__init__()
+        self.in_channels = in_channels
+        self.hidden_channels = hidden_channels
+        self.out_channels = out_channels
+        self.kernel_size = kernel_size
+        self.n_layers = n_layers
+        self.p_dropout = p_dropout
+        assert n_layers > 1, "Number of layers should be larger than 0."
+        self.conv_layers = nn.ModuleList()
+        self.norm_layers = nn.ModuleList()
+        self.conv_layers.append(
+            nn.Conv1d(
+                in_channels, hidden_channels, kernel_size, padding=kernel_size // 2
+            )
+        )
+        self.norm_layers.append(LayerNorm(hidden_channels))
+        self.relu_drop = nn.Sequential(nn.ReLU(), nn.Dropout(p_dropout))
+        for _ in range(n_layers - 1):
+            self.conv_layers.append(
+                nn.Conv1d(
+                    hidden_channels,
+                    hidden_channels,
+                    kernel_size,
+                    padding=kernel_size // 2,
+                )
+            )
+            self.norm_layers.append(LayerNorm(hidden_channels))
+        self.proj = nn.Conv1d(hidden_channels, out_channels, 1)
+        self.proj.weight.data.zero_()
+        self.proj.bias.data.zero_()
+    def forward(self, x, x_mask):
+        x_org = x
+        for i in range(self.n_layers):
+            x = self.conv_layers[i](x * x_mask)
+            x = self.norm_layers[i](x)
+            x = self.relu_drop(x)
+        x = x_org + self.proj(x)
+        return x * x_mask
+class DDSConv(nn.Module):
+    """
+    Dialted and Depth-Separable Convolution
+    """
+    def __init__(
+        self, channels: int, kernel_size: int, n_layers: int, p_dropout: float = 0.0
+    ):
+        super().__init__()
+        self.channels = channels
+        self.kernel_size = kernel_size
+        self.n_layers = n_layers
+        self.p_dropout = p_dropout
+        self.drop = nn.Dropout(p_dropout)
+        self.convs_sep = nn.ModuleList()
+        self.convs_1x1 = nn.ModuleList()
+        self.norms_1 = nn.ModuleList()
+        self.norms_2 = nn.ModuleList()
+        for i in range(n_layers):
+            dilation = kernel_size**i
+            padding = (kernel_size * dilation - dilation) // 2
+            self.convs_sep.append(
+                nn.Conv1d(
+                    channels,
+                    channels,
+                    kernel_size,
+                    groups=channels,
+                    dilation=dilation,
+                    padding=padding,
+                )
+            )
+            self.convs_1x1.append(nn.Conv1d(channels, channels, 1))
+            self.norms_1.append(LayerNorm(channels))
+            self.norms_2.append(LayerNorm(channels))
+    def forward(self, x, x_mask, g=None):
+        if g is not None:
+            x = x + g
+        for i in range(self.n_layers):
+            y = self.convs_sep[i](x * x_mask)
+            y = self.norms_1[i](y)
+            y = F.gelu(y)
+            y = self.convs_1x1[i](y)
+            y = self.norms_2[i](y)
+            y = F.gelu(y)
+            y = self.drop(y)
+            x = x + y
+        return x * x_mask
+class WN(torch.nn.Module):
+    def __init__(
+        self,
+        hidden_channels: int,
+        kernel_size: int,
+        dilation_rate: int,
+        n_layers: int,
+        gin_channels: int = 0,
+        p_dropout: float = 0,
+    ):
+        super().__init__()
+        assert kernel_size % 2 == 1
+        self.hidden_channels = hidden_channels
+        self.kernel_size = (kernel_size,)
+        self.dilation_rate = dilation_rate
+        self.n_layers = n_layers
+        self.gin_channels = gin_channels
+        self.p_dropout = p_dropout
+        self.in_layers = torch.nn.ModuleList()
+        self.res_skip_layers = torch.nn.ModuleList()
+        self.drop = nn.Dropout(p_dropout)
+        if gin_channels != 0:
+            cond_layer = torch.nn.Conv1d(
+                gin_channels, 2 * hidden_channels * n_layers, 1
+            )
+            self.cond_layer = torch.nn.utils.weight_norm(cond_layer, name="weight")
+        for i in range(n_layers):
+            dilation = dilation_rate**i
+            padding = int((kernel_size * dilation - dilation) / 2)
+            in_layer = torch.nn.Conv1d(
+                hidden_channels,
+                2 * hidden_channels,
+                kernel_size,
+                dilation=dilation,
+                padding=padding,
+            )
+            in_layer = torch.nn.utils.weight_norm(in_layer, name="weight")
+            self.in_layers.append(in_layer)
+            # last one is not necessary
+            if i < n_layers - 1:
+                res_skip_channels = 2 * hidden_channels
+            else:
+                res_skip_channels = hidden_channels
+            res_skip_layer = torch.nn.Conv1d(hidden_channels, res_skip_channels, 1)
+            res_skip_layer = torch.nn.utils.weight_norm(res_skip_layer, name="weight")
+            self.res_skip_layers.append(res_skip_layer)
+    def forward(self, x, x_mask, g=None, **kwargs):
+        output = torch.zeros_like(x)
+        n_channels_tensor = torch.IntTensor([self.hidden_channels])
+        if g is not None:
+            g = self.cond_layer(g)
+        for i in range(self.n_layers):
+            x_in = self.in_layers[i](x)
+            if g is not None:
+                cond_offset = i * 2 * self.hidden_channels
+                g_l = g[:, cond_offset : cond_offset + 2 * self.hidden_channels, :]
+            else:
+                g_l = torch.zeros_like(x_in)
+            acts = fused_add_tanh_sigmoid_multiply(x_in, g_l, n_channels_tensor)
+            acts = self.drop(acts)
+            res_skip_acts = self.res_skip_layers[i](acts)
+            if i < self.n_layers - 1:
+                res_acts = res_skip_acts[:, : self.hidden_channels, :]
+                x = (x + res_acts) * x_mask
+                output = output + res_skip_acts[:, self.hidden_channels :, :]
+            else:
+                output = output + res_skip_acts
+        return output * x_mask
+    def remove_weight_norm(self):
+        if self.gin_channels != 0:
+            torch.nn.utils.remove_weight_norm(self.cond_layer)
+        for l in self.in_layers:
+            torch.nn.utils.remove_weight_norm(l)
+        for l in self.res_skip_layers:
+            torch.nn.utils.remove_weight_norm(l)
+class ResBlock1(torch.nn.Module):
+    def __init__(
+        self,
+        channels: int,
+        kernel_size: int = 3,
+        dilation: typing.Tuple[int] = (1, 3, 5),
+    ):
+        super(ResBlock1, self).__init__()
+        self.LRELU_SLOPE = 0.1
+        self.convs1 = nn.ModuleList(
+            [
+                weight_norm(
+                    Conv1d(
+                        channels,
+                        channels,
+                        kernel_size,
+                        1,
+                        dilation=dilation[0],
+                        padding=get_padding(kernel_size, dilation[0]),
+                    )
+                ),
+                weight_norm(
+                    Conv1d(
+                        channels,
+                        channels,
+                        kernel_size,
+                        1,
+                        dilation=dilation[1],
+                        padding=get_padding(kernel_size, dilation[1]),
+                    )
+                ),
+                weight_norm(
+                    Conv1d(
+                        channels,
+                        channels,
+                        kernel_size,
+                        1,
+                        dilation=dilation[2],
+                        padding=get_padding(kernel_size, dilation[2]),
+                    )
+                ),
+            ]
+        )
+        self.convs1.apply(init_weights)
+        self.convs2 = nn.ModuleList(
+            [
+                weight_norm(
+                    Conv1d(
+                        channels,
+                        channels,
+                        kernel_size,
+                        1,
+                        dilation=1,
+                        padding=get_padding(kernel_size, 1),
+                    )
+                ),
+                weight_norm(
+                    Conv1d(
+                        channels,
+                        channels,
+                        kernel_size,
+                        1,
+                        dilation=1,
+                        padding=get_padding(kernel_size, 1),
+                    )
+                ),
+                weight_norm(
+                    Conv1d(
+                        channels,
+                        channels,
+                        kernel_size,
+                        1,
+                        dilation=1,
+                        padding=get_padding(kernel_size, 1),
+                    )
+                ),
+            ]
+        )
+        self.convs2.apply(init_weights)
+    def forward(self, x, x_mask=None):
+        for c1, c2 in zip(self.convs1, self.convs2):
+            xt = F.leaky_relu(x, self.LRELU_SLOPE)
+            if x_mask is not None:
+                xt = xt * x_mask
+            xt = c1(xt)
+            xt = F.leaky_relu(xt, self.LRELU_SLOPE)
+            if x_mask is not None:
+                xt = xt * x_mask
+            xt = c2(xt)
+            x = xt + x
+        if x_mask is not None:
+            x = x * x_mask
+        return x
+    def remove_weight_norm(self):
+        for l in self.convs1:
+            remove_weight_norm(l)
+        for l in self.convs2:
+            remove_weight_norm(l)
+class ResBlock2(torch.nn.Module):
+    def __init__(
+        self, channels: int, kernel_size: int = 3, dilation: typing.Tuple[int] = (1, 3)
+    ):
+        super(ResBlock2, self).__init__()
+        self.LRELU_SLOPE = 0.1
+        self.convs = nn.ModuleList(
+            [
+                weight_norm(
+                    Conv1d(
+                        channels,
+                        channels,
+                        kernel_size,
+                        1,
+                        dilation=dilation[0],
+                        padding=get_padding(kernel_size, dilation[0]),
+                    )
+                ),
+                weight_norm(
+                    Conv1d(
+                        channels,
+                        channels,
+                        kernel_size,
+                        1,
+                        dilation=dilation[1],
+                        padding=get_padding(kernel_size, dilation[1]),
+                    )
+                ),
+            ]
+        )
+        self.convs.apply(init_weights)
+    def forward(self, x, x_mask=None):
+        for c in self.convs:
+            xt = F.leaky_relu(x, self.LRELU_SLOPE)
+            if x_mask is not None:
+                xt = xt * x_mask
+            xt = c(xt)
+            x = xt + x
+        if x_mask is not None:
+            x = x * x_mask
+        return x
+    def remove_weight_norm(self):
+        for l in self.convs:
+            remove_weight_norm(l)
+class Log(nn.Module):
+    def forward(
+        self, x: torch.Tensor, x_mask: torch.Tensor, reverse: bool = False, **kwargs
+    ):
+        if not reverse:
+            y = torch.log(torch.clamp_min(x, 1e-5)) * x_mask
+            logdet = torch.sum(-y, [1, 2])
+            return y, logdet
+        else:
+            x = torch.exp(x) * x_mask
+            return x
+class Flip(nn.Module):
+    def forward(self, x: torch.Tensor, *args, reverse: bool = False, **kwargs):
+        x = torch.flip(x, [1])
+        if not reverse:
+            logdet = torch.zeros(x.size(0)).type_as(x)
+            return x, logdet
+        else:
+            return x
+class ElementwiseAffine(nn.Module):
+    def __init__(self, channels: int):
+        super().__init__()
+        self.channels = channels
+        self.m = nn.Parameter(torch.zeros(channels, 1))
+        self.logs = nn.Parameter(torch.zeros(channels, 1))
+    def forward(self, x, x_mask, reverse=False, **kwargs):
+        if not reverse:
+            y = self.m + torch.exp(self.logs) * x
+            y = y * x_mask
+            logdet = torch.sum(self.logs * x_mask, [1, 2])
+            return y, logdet
+        else:
+            x = (x - self.m) * torch.exp(-self.logs) * x_mask
+            return x
+class ResidualCouplingLayer(nn.Module):
+    def __init__(
+        self,
+        channels: int,
+        hidden_channels: int,
+        kernel_size: int,
+        dilation_rate: int,
+        n_layers: int,
+        p_dropout: float = 0,
+        gin_channels: int = 0,
+        mean_only: bool = False,
+    ):
+        assert channels % 2 == 0, "channels should be divisible by 2"
+        super().__init__()
+        self.channels = channels
+        self.hidden_channels = hidden_channels
+        self.kernel_size = kernel_size
+        self.dilation_rate = dilation_rate
+        self.n_layers = n_layers
+        self.half_channels = channels // 2
+        self.mean_only = mean_only
+        self.pre = nn.Conv1d(self.half_channels, hidden_channels, 1)
+        self.enc = WN(
+            hidden_channels,
+            kernel_size,
+            dilation_rate,
+            n_layers,
+            p_dropout=p_dropout,
+            gin_channels=gin_channels,
+        )
+        self.post = nn.Conv1d(hidden_channels, self.half_channels * (2 - mean_only), 1)
+        self.post.weight.data.zero_()
+        self.post.bias.data.zero_()
+    def forward(self, x, x_mask, g=None, reverse=False):
+        x0, x1 = torch.split(x, [self.half_channels] * 2, 1)
+        h = self.pre(x0) * x_mask
+        h = self.enc(h, x_mask, g=g)
+        stats = self.post(h) * x_mask
+        if not self.mean_only:
+            m, logs = torch.split(stats, [self.half_channels] * 2, 1)
+        else:
+            m = stats
+            logs = torch.zeros_like(m)
+        if not reverse:
+            x1 = m + x1 * torch.exp(logs) * x_mask
+            x = torch.cat([x0, x1], 1)
+            logdet = torch.sum(logs, [1, 2])
+            return x, logdet
+        else:
+            x1 = (x1 - m) * torch.exp(-logs) * x_mask
+            x = torch.cat([x0, x1], 1)
+            return x
+class ConvFlow(nn.Module):
+    def __init__(
+        self,
+        in_channels: int,
+        filter_channels: int,
+        kernel_size: int,
+        n_layers: int,
+        num_bins: int = 10,
+        tail_bound: float = 5.0,
+    ):
+        super().__init__()
+        self.in_channels = in_channels
+        self.filter_channels = filter_channels
+        self.kernel_size = kernel_size
+        self.n_layers = n_layers
+        self.num_bins = num_bins
+        self.tail_bound = tail_bound
+        self.half_channels = in_channels // 2
+        self.pre = nn.Conv1d(self.half_channels, filter_channels, 1)
+        self.convs = DDSConv(filter_channels, kernel_size, n_layers, p_dropout=0.0)
+        self.proj = nn.Conv1d(
+            filter_channels, self.half_channels * (num_bins * 3 - 1), 1
+        )
+        self.proj.weight.data.zero_()
+        self.proj.bias.data.zero_()
+    def forward(self, x, x_mask, g=None, reverse=False):
+        x0, x1 = torch.split(x, [self.half_channels] * 2, 1)
+        h = self.pre(x0)
+        h = self.convs(h, x_mask, g=g)
+        h = self.proj(h) * x_mask
+        b, c, t = x0.shape
+        h = h.reshape(b, c, -1, t).permute(0, 1, 3, 2)  # [b, cx?, t] -> [b, c, t, ?]
+        unnormalized_widths = h[..., : self.num_bins] / math.sqrt(self.filter_channels)
+        unnormalized_heights = h[..., self.num_bins : 2 * self.num_bins] / math.sqrt(
+            self.filter_channels
+        )
+        unnormalized_derivatives = h[..., 2 * self.num_bins :]
+        x1, logabsdet = piecewise_rational_quadratic_transform(
+            x1,
+            unnormalized_widths,
+            unnormalized_heights,
+            unnormalized_derivatives,
+            inverse=reverse,
+            tails="linear",
+            tail_bound=self.tail_bound,
+        )
+        x = torch.cat([x0, x1], 1) * x_mask
+        logdet = torch.sum(logabsdet * x_mask, [1, 2])
+        if not reverse:
+            return x, logdet
+        else:
+            return x

phoonnx_train/vits/monotonic_align/__init__.py ADDED Viewed

@@ -0,0 +1,20 @@
+import numpy as np
+import torch
+from .monotonic_align.core import maximum_path_c
+def maximum_path(neg_cent, mask):
+    """Cython optimized version.
+    neg_cent: [b, t_t, t_s]
+    mask: [b, t_t, t_s]
+    """
+    device = neg_cent.device
+    dtype = neg_cent.dtype
+    neg_cent = neg_cent.data.cpu().numpy().astype(np.float32)
+    path = np.zeros(neg_cent.shape, dtype=np.int32)
+    t_t_max = mask.sum(1)[:, 0].data.cpu().numpy().astype(np.int32)
+    t_s_max = mask.sum(2)[:, 0].data.cpu().numpy().astype(np.int32)
+    maximum_path_c(path, neg_cent, t_t_max, t_s_max)
+    return torch.from_numpy(path).to(device=device, dtype=dtype)

phoonnx_train/vits/monotonic_align/setup.py ADDED Viewed

@@ -0,0 +1,13 @@
+from distutils.core import setup
+from pathlib import Path
+import numpy
+from Cython.Build import cythonize
+_DIR = Path(__file__).parent
+setup(
+    name="monotonic_align",
+    ext_modules=cythonize(str(_DIR / "core.pyx")),
+    include_dirs=[numpy.get_include()],
+)