PyPI - phoonnx - Versions diffs - 0.0.0__py3-none-any.whl - Mend

phoonnx 0.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (86) hide show

phoonnx/__init__.py +0 -0
phoonnx/config.py +490 -0
phoonnx/locale/ca/phonetic_spellings.txt +2 -0
phoonnx/locale/en/phonetic_spellings.txt +1 -0
phoonnx/locale/gl/phonetic_spellings.txt +2 -0
phoonnx/locale/pt/phonetic_spellings.txt +2 -0
phoonnx/phoneme_ids.py +453 -0
phoonnx/phonemizers/__init__.py +45 -0
phoonnx/phonemizers/ar.py +42 -0
phoonnx/phonemizers/base.py +216 -0
phoonnx/phonemizers/en.py +250 -0
phoonnx/phonemizers/fa.py +46 -0
phoonnx/phonemizers/gl.py +142 -0
phoonnx/phonemizers/he.py +67 -0
phoonnx/phonemizers/ja.py +119 -0
phoonnx/phonemizers/ko.py +97 -0
phoonnx/phonemizers/mul.py +606 -0
phoonnx/phonemizers/vi.py +44 -0
phoonnx/phonemizers/zh.py +308 -0
phoonnx/thirdparty/__init__.py +0 -0
phoonnx/thirdparty/arpa2ipa.py +249 -0
phoonnx/thirdparty/cotovia/cotovia_aarch64 +0 -0
phoonnx/thirdparty/cotovia/cotovia_x86_64 +0 -0
phoonnx/thirdparty/hangul2ipa.py +783 -0
phoonnx/thirdparty/ko_tables/aspiration.csv +20 -0
phoonnx/thirdparty/ko_tables/assimilation.csv +31 -0
phoonnx/thirdparty/ko_tables/double_coda.csv +17 -0
phoonnx/thirdparty/ko_tables/hanja.tsv +8525 -0
phoonnx/thirdparty/ko_tables/ipa.csv +22 -0
phoonnx/thirdparty/ko_tables/neutralization.csv +11 -0
phoonnx/thirdparty/ko_tables/tensification.csv +56 -0
phoonnx/thirdparty/ko_tables/yale.csv +22 -0
phoonnx/thirdparty/kog2p/__init__.py +385 -0
phoonnx/thirdparty/kog2p/rulebook.txt +212 -0
phoonnx/thirdparty/mantoq/__init__.py +67 -0
phoonnx/thirdparty/mantoq/buck/__init__.py +0 -0
phoonnx/thirdparty/mantoq/buck/phonetise_buckwalter.py +569 -0
phoonnx/thirdparty/mantoq/buck/symbols.py +64 -0
phoonnx/thirdparty/mantoq/buck/tokenization.py +105 -0
phoonnx/thirdparty/mantoq/num2words.py +37 -0
phoonnx/thirdparty/mantoq/pyarabic/__init__.py +12 -0
phoonnx/thirdparty/mantoq/pyarabic/arabrepr.py +64 -0
phoonnx/thirdparty/mantoq/pyarabic/araby.py +1647 -0
phoonnx/thirdparty/mantoq/pyarabic/named_const.py +227 -0
phoonnx/thirdparty/mantoq/pyarabic/normalize.py +161 -0
phoonnx/thirdparty/mantoq/pyarabic/number.py +826 -0
phoonnx/thirdparty/mantoq/pyarabic/number_const.py +1704 -0
phoonnx/thirdparty/mantoq/pyarabic/stack.py +52 -0
phoonnx/thirdparty/mantoq/pyarabic/trans.py +517 -0
phoonnx/thirdparty/mantoq/unicode_symbol2label.py +4173 -0
phoonnx/thirdparty/tashkeel/LICENSE +22 -0
phoonnx/thirdparty/tashkeel/SOURCE +1 -0
phoonnx/thirdparty/tashkeel/__init__.py +212 -0
phoonnx/thirdparty/tashkeel/hint_id_map.json +18 -0
phoonnx/thirdparty/tashkeel/input_id_map.json +56 -0
phoonnx/thirdparty/tashkeel/model.onnx +0 -0
phoonnx/thirdparty/tashkeel/target_id_map.json +17 -0
phoonnx/thirdparty/zh_num.py +238 -0
phoonnx/util.py +705 -0
phoonnx/version.py +6 -0
phoonnx/voice.py +521 -0
phoonnx-0.0.0.dist-info/METADATA +255 -0
phoonnx-0.0.0.dist-info/RECORD +86 -0
phoonnx-0.0.0.dist-info/WHEEL +5 -0
phoonnx-0.0.0.dist-info/top_level.txt +2 -0
phoonnx_train/__main__.py +151 -0
phoonnx_train/export_onnx.py +109 -0
phoonnx_train/norm_audio/__init__.py +92 -0
phoonnx_train/norm_audio/trim.py +54 -0
phoonnx_train/norm_audio/vad.py +54 -0
phoonnx_train/preprocess.py +420 -0
phoonnx_train/vits/__init__.py +0 -0
phoonnx_train/vits/attentions.py +427 -0
phoonnx_train/vits/commons.py +147 -0
phoonnx_train/vits/config.py +330 -0
phoonnx_train/vits/dataset.py +214 -0
phoonnx_train/vits/lightning.py +352 -0
phoonnx_train/vits/losses.py +58 -0
phoonnx_train/vits/mel_processing.py +139 -0
phoonnx_train/vits/models.py +732 -0
phoonnx_train/vits/modules.py +527 -0
phoonnx_train/vits/monotonic_align/__init__.py +20 -0
phoonnx_train/vits/monotonic_align/setup.py +13 -0
phoonnx_train/vits/transforms.py +212 -0
phoonnx_train/vits/utils.py +16 -0
phoonnx_train/vits/wavfile.py +860 -0

phoonnx_train/vits/wavfile.py ADDED Viewed

@@ -0,0 +1,860 @@
+"""
+Module to read / write wav files using NumPy arrays
+Functions
+---------
+`read`: Return the sample rate (in samples/sec) and data from a WAV file.
+`write`: Write a NumPy array as a WAV file.
+"""
+import io
+import struct
+import sys
+import warnings
+from enum import IntEnum
+import numpy
+__all__ = ["WavFileWarning", "read", "write"]
+class WavFileWarning(UserWarning):
+    pass
+class WAVE_FORMAT(IntEnum):
+    """
+    WAVE form wFormatTag IDs
+    Complete list is in mmreg.h in Windows 10 SDK.  ALAC and OPUS are the
+    newest additions, in v10.0.14393 2016-07
+    """
+    UNKNOWN = 0x0000
+    PCM = 0x0001
+    ADPCM = 0x0002
+    IEEE_FLOAT = 0x0003
+    VSELP = 0x0004
+    IBM_CVSD = 0x0005
+    ALAW = 0x0006
+    MULAW = 0x0007
+    DTS = 0x0008
+    DRM = 0x0009
+    WMAVOICE9 = 0x000A
+    WMAVOICE10 = 0x000B
+    OKI_ADPCM = 0x0010
+    DVI_ADPCM = 0x0011
+    IMA_ADPCM = 0x0011  # Duplicate
+    MEDIASPACE_ADPCM = 0x0012
+    SIERRA_ADPCM = 0x0013
+    G723_ADPCM = 0x0014
+    DIGISTD = 0x0015
+    DIGIFIX = 0x0016
+    DIALOGIC_OKI_ADPCM = 0x0017
+    MEDIAVISION_ADPCM = 0x0018
+    CU_CODEC = 0x0019
+    HP_DYN_VOICE = 0x001A
+    YAMAHA_ADPCM = 0x0020
+    SONARC = 0x0021
+    DSPGROUP_TRUESPEECH = 0x0022
+    ECHOSC1 = 0x0023
+    AUDIOFILE_AF36 = 0x0024
+    APTX = 0x0025
+    AUDIOFILE_AF10 = 0x0026
+    PROSODY_1612 = 0x0027
+    LRC = 0x0028
+    DOLBY_AC2 = 0x0030
+    GSM610 = 0x0031
+    MSNAUDIO = 0x0032
+    ANTEX_ADPCME = 0x0033
+    CONTROL_RES_VQLPC = 0x0034
+    DIGIREAL = 0x0035
+    DIGIADPCM = 0x0036
+    CONTROL_RES_CR10 = 0x0037
+    NMS_VBXADPCM = 0x0038
+    CS_IMAADPCM = 0x0039
+    ECHOSC3 = 0x003A
+    ROCKWELL_ADPCM = 0x003B
+    ROCKWELL_DIGITALK = 0x003C
+    XEBEC = 0x003D
+    G721_ADPCM = 0x0040
+    G728_CELP = 0x0041
+    MSG723 = 0x0042
+    INTEL_G723_1 = 0x0043
+    INTEL_G729 = 0x0044
+    SHARP_G726 = 0x0045
+    MPEG = 0x0050
+    RT24 = 0x0052
+    PAC = 0x0053
+    MPEGLAYER3 = 0x0055
+    LUCENT_G723 = 0x0059
+    CIRRUS = 0x0060
+    ESPCM = 0x0061
+    VOXWARE = 0x0062
+    CANOPUS_ATRAC = 0x0063
+    G726_ADPCM = 0x0064
+    G722_ADPCM = 0x0065
+    DSAT = 0x0066
+    DSAT_DISPLAY = 0x0067
+    VOXWARE_BYTE_ALIGNED = 0x0069
+    VOXWARE_AC8 = 0x0070
+    VOXWARE_AC10 = 0x0071
+    VOXWARE_AC16 = 0x0072
+    VOXWARE_AC20 = 0x0073
+    VOXWARE_RT24 = 0x0074
+    VOXWARE_RT29 = 0x0075
+    VOXWARE_RT29HW = 0x0076
+    VOXWARE_VR12 = 0x0077
+    VOXWARE_VR18 = 0x0078
+    VOXWARE_TQ40 = 0x0079
+    VOXWARE_SC3 = 0x007A
+    VOXWARE_SC3_1 = 0x007B
+    SOFTSOUND = 0x0080
+    VOXWARE_TQ60 = 0x0081
+    MSRT24 = 0x0082
+    G729A = 0x0083
+    MVI_MVI2 = 0x0084
+    DF_G726 = 0x0085
+    DF_GSM610 = 0x0086
+    ISIAUDIO = 0x0088
+    ONLIVE = 0x0089
+    MULTITUDE_FT_SX20 = 0x008A
+    INFOCOM_ITS_G721_ADPCM = 0x008B
+    CONVEDIA_G729 = 0x008C
+    CONGRUENCY = 0x008D
+    SBC24 = 0x0091
+    DOLBY_AC3_SPDIF = 0x0092
+    MEDIASONIC_G723 = 0x0093
+    PROSODY_8KBPS = 0x0094
+    ZYXEL_ADPCM = 0x0097
+    PHILIPS_LPCBB = 0x0098
+    PACKED = 0x0099
+    MALDEN_PHONYTALK = 0x00A0
+    RACAL_RECORDER_GSM = 0x00A1
+    RACAL_RECORDER_G720_A = 0x00A2
+    RACAL_RECORDER_G723_1 = 0x00A3
+    RACAL_RECORDER_TETRA_ACELP = 0x00A4
+    NEC_AAC = 0x00B0
+    RAW_AAC1 = 0x00FF
+    RHETOREX_ADPCM = 0x0100
+    IRAT = 0x0101
+    VIVO_G723 = 0x0111
+    VIVO_SIREN = 0x0112
+    PHILIPS_CELP = 0x0120
+    PHILIPS_GRUNDIG = 0x0121
+    DIGITAL_G723 = 0x0123
+    SANYO_LD_ADPCM = 0x0125
+    SIPROLAB_ACEPLNET = 0x0130
+    SIPROLAB_ACELP4800 = 0x0131
+    SIPROLAB_ACELP8V3 = 0x0132
+    SIPROLAB_G729 = 0x0133
+    SIPROLAB_G729A = 0x0134
+    SIPROLAB_KELVIN = 0x0135
+    VOICEAGE_AMR = 0x0136
+    G726ADPCM = 0x0140
+    DICTAPHONE_CELP68 = 0x0141
+    DICTAPHONE_CELP54 = 0x0142
+    QUALCOMM_PUREVOICE = 0x0150
+    QUALCOMM_HALFRATE = 0x0151
+    TUBGSM = 0x0155
+    MSAUDIO1 = 0x0160
+    WMAUDIO2 = 0x0161
+    WMAUDIO3 = 0x0162
+    WMAUDIO_LOSSLESS = 0x0163
+    WMASPDIF = 0x0164
+    UNISYS_NAP_ADPCM = 0x0170
+    UNISYS_NAP_ULAW = 0x0171
+    UNISYS_NAP_ALAW = 0x0172
+    UNISYS_NAP_16K = 0x0173
+    SYCOM_ACM_SYC008 = 0x0174
+    SYCOM_ACM_SYC701_G726L = 0x0175
+    SYCOM_ACM_SYC701_CELP54 = 0x0176
+    SYCOM_ACM_SYC701_CELP68 = 0x0177
+    KNOWLEDGE_ADVENTURE_ADPCM = 0x0178
+    FRAUNHOFER_IIS_MPEG2_AAC = 0x0180
+    DTS_DS = 0x0190
+    CREATIVE_ADPCM = 0x0200
+    CREATIVE_FASTSPEECH8 = 0x0202
+    CREATIVE_FASTSPEECH10 = 0x0203
+    UHER_ADPCM = 0x0210
+    ULEAD_DV_AUDIO = 0x0215
+    ULEAD_DV_AUDIO_1 = 0x0216
+    QUARTERDECK = 0x0220
+    ILINK_VC = 0x0230
+    RAW_SPORT = 0x0240
+    ESST_AC3 = 0x0241
+    GENERIC_PASSTHRU = 0x0249
+    IPI_HSX = 0x0250
+    IPI_RPELP = 0x0251
+    CS2 = 0x0260
+    SONY_SCX = 0x0270
+    SONY_SCY = 0x0271
+    SONY_ATRAC3 = 0x0272
+    SONY_SPC = 0x0273
+    TELUM_AUDIO = 0x0280
+    TELUM_IA_AUDIO = 0x0281
+    NORCOM_VOICE_SYSTEMS_ADPCM = 0x0285
+    FM_TOWNS_SND = 0x0300
+    MICRONAS = 0x0350
+    MICRONAS_CELP833 = 0x0351
+    BTV_DIGITAL = 0x0400
+    INTEL_MUSIC_CODER = 0x0401
+    INDEO_AUDIO = 0x0402
+    QDESIGN_MUSIC = 0x0450
+    ON2_VP7_AUDIO = 0x0500
+    ON2_VP6_AUDIO = 0x0501
+    VME_VMPCM = 0x0680
+    TPC = 0x0681
+    LIGHTWAVE_LOSSLESS = 0x08AE
+    OLIGSM = 0x1000
+    OLIADPCM = 0x1001
+    OLICELP = 0x1002
+    OLISBC = 0x1003
+    OLIOPR = 0x1004
+    LH_CODEC = 0x1100
+    LH_CODEC_CELP = 0x1101
+    LH_CODEC_SBC8 = 0x1102
+    LH_CODEC_SBC12 = 0x1103
+    LH_CODEC_SBC16 = 0x1104
+    NORRIS = 0x1400
+    ISIAUDIO_2 = 0x1401
+    SOUNDSPACE_MUSICOMPRESS = 0x1500
+    MPEG_ADTS_AAC = 0x1600
+    MPEG_RAW_AAC = 0x1601
+    MPEG_LOAS = 0x1602
+    NOKIA_MPEG_ADTS_AAC = 0x1608
+    NOKIA_MPEG_RAW_AAC = 0x1609
+    VODAFONE_MPEG_ADTS_AAC = 0x160A
+    VODAFONE_MPEG_RAW_AAC = 0x160B
+    MPEG_HEAAC = 0x1610
+    VOXWARE_RT24_SPEECH = 0x181C
+    SONICFOUNDRY_LOSSLESS = 0x1971
+    INNINGS_TELECOM_ADPCM = 0x1979
+    LUCENT_SX8300P = 0x1C07
+    LUCENT_SX5363S = 0x1C0C
+    CUSEEME = 0x1F03
+    NTCSOFT_ALF2CM_ACM = 0x1FC4
+    DVM = 0x2000
+    DTS2 = 0x2001
+    MAKEAVIS = 0x3313
+    DIVIO_MPEG4_AAC = 0x4143
+    NOKIA_ADAPTIVE_MULTIRATE = 0x4201
+    DIVIO_G726 = 0x4243
+    LEAD_SPEECH = 0x434C
+    LEAD_VORBIS = 0x564C
+    WAVPACK_AUDIO = 0x5756
+    OGG_VORBIS_MODE_1 = 0x674F
+    OGG_VORBIS_MODE_2 = 0x6750
+    OGG_VORBIS_MODE_3 = 0x6751
+    OGG_VORBIS_MODE_1_PLUS = 0x676F
+    OGG_VORBIS_MODE_2_PLUS = 0x6770
+    OGG_VORBIS_MODE_3_PLUS = 0x6771
+    ALAC = 0x6C61
+    _3COM_NBX = 0x7000  # Can't have leading digit
+    OPUS = 0x704F
+    FAAD_AAC = 0x706D
+    AMR_NB = 0x7361
+    AMR_WB = 0x7362
+    AMR_WP = 0x7363
+    GSM_AMR_CBR = 0x7A21
+    GSM_AMR_VBR_SID = 0x7A22
+    COMVERSE_INFOSYS_G723_1 = 0xA100
+    COMVERSE_INFOSYS_AVQSBC = 0xA101
+    COMVERSE_INFOSYS_SBC = 0xA102
+    SYMBOL_G729_A = 0xA103
+    VOICEAGE_AMR_WB = 0xA104
+    INGENIENT_G726 = 0xA105
+    MPEG4_AAC = 0xA106
+    ENCORE_G726 = 0xA107
+    ZOLL_ASAO = 0xA108
+    SPEEX_VOICE = 0xA109
+    VIANIX_MASC = 0xA10A
+    WM9_SPECTRUM_ANALYZER = 0xA10B
+    WMF_SPECTRUM_ANAYZER = 0xA10C
+    GSM_610 = 0xA10D
+    GSM_620 = 0xA10E
+    GSM_660 = 0xA10F
+    GSM_690 = 0xA110
+    GSM_ADAPTIVE_MULTIRATE_WB = 0xA111
+    POLYCOM_G722 = 0xA112
+    POLYCOM_G728 = 0xA113
+    POLYCOM_G729_A = 0xA114
+    POLYCOM_SIREN = 0xA115
+    GLOBAL_IP_ILBC = 0xA116
+    RADIOTIME_TIME_SHIFT_RADIO = 0xA117
+    NICE_ACA = 0xA118
+    NICE_ADPCM = 0xA119
+    VOCORD_G721 = 0xA11A
+    VOCORD_G726 = 0xA11B
+    VOCORD_G722_1 = 0xA11C
+    VOCORD_G728 = 0xA11D
+    VOCORD_G729 = 0xA11E
+    VOCORD_G729_A = 0xA11F
+    VOCORD_G723_1 = 0xA120
+    VOCORD_LBC = 0xA121
+    NICE_G728 = 0xA122
+    FRACE_TELECOM_G729 = 0xA123
+    CODIAN = 0xA124
+    FLAC = 0xF1AC
+    EXTENSIBLE = 0xFFFE
+    DEVELOPMENT = 0xFFFF
+KNOWN_WAVE_FORMATS = {WAVE_FORMAT.PCM, WAVE_FORMAT.IEEE_FLOAT}
+def _raise_bad_format(format_tag):
+    try:
+        format_name = WAVE_FORMAT(format_tag).name
+    except ValueError:
+        format_name = f"{format_tag:#06x}"
+    raise ValueError(
+        f"Unknown wave file format: {format_name}. Supported "
+        "formats: " + ", ".join(x.name for x in KNOWN_WAVE_FORMATS)
+    )
+def _read_fmt_chunk(fid, is_big_endian):
+    """
+    Returns
+    -------
+    size : int
+        size of format subchunk in bytes (minus 8 for "fmt " and itself)
+    format_tag : int
+        PCM, float, or compressed format
+    channels : int
+        number of channels
+    fs : int
+        sampling frequency in samples per second
+    bytes_per_second : int
+        overall byte rate for the file
+    block_align : int
+        bytes per sample, including all channels
+    bit_depth : int
+        bits per sample
+    Notes
+    -----
+    Assumes file pointer is immediately after the 'fmt ' id
+    """
+    if is_big_endian:
+        fmt = ">"
+    else:
+        fmt = "<"
+    size = struct.unpack(fmt + "I", fid.read(4))[0]
+    if size < 16:
+        raise ValueError("Binary structure of wave file is not compliant")
+    res = struct.unpack(fmt + "HHIIHH", fid.read(16))
+    bytes_read = 16
+    format_tag, channels, fs, bytes_per_second, block_align, bit_depth = res
+    if format_tag == WAVE_FORMAT.EXTENSIBLE and size >= (16 + 2):
+        ext_chunk_size = struct.unpack(fmt + "H", fid.read(2))[0]
+        bytes_read += 2
+        if ext_chunk_size >= 22:
+            extensible_chunk_data = fid.read(22)
+            bytes_read += 22
+            raw_guid = extensible_chunk_data[2 + 4 : 2 + 4 + 16]
+            # GUID template {XXXXXXXX-0000-0010-8000-00AA00389B71} (RFC-2361)
+            # MS GUID byte order: first three groups are native byte order,
+            # rest is Big Endian
+            if is_big_endian:
+                tail = b"\x00\x00\x00\x10\x80\x00\x00\xAA\x00\x38\x9B\x71"
+            else:
+                tail = b"\x00\x00\x10\x00\x80\x00\x00\xAA\x00\x38\x9B\x71"
+            if raw_guid.endswith(tail):
+                format_tag = struct.unpack(fmt + "I", raw_guid[:4])[0]
+        else:
+            raise ValueError("Binary structure of wave file is not compliant")
+    if format_tag not in KNOWN_WAVE_FORMATS:
+        _raise_bad_format(format_tag)
+    # move file pointer to next chunk
+    if size > bytes_read:
+        fid.read(size - bytes_read)
+    # fmt should always be 16, 18 or 40, but handle it just in case
+    _handle_pad_byte(fid, size)
+    return (size, format_tag, channels, fs, bytes_per_second, block_align, bit_depth)
+def _read_data_chunk(
+    fid, format_tag, channels, bit_depth, is_big_endian, block_align, mmap=False
+):
+    """
+    Notes
+    -----
+    Assumes file pointer is immediately after the 'data' id
+    It's possible to not use all available bits in a container, or to store
+    samples in a container bigger than necessary, so bytes_per_sample uses
+    the actual reported container size (nBlockAlign / nChannels).  Real-world
+    examples:
+    Adobe Audition's "24-bit packed int (type 1, 20-bit)"
+        nChannels = 2, nBlockAlign = 6, wBitsPerSample = 20
+    http://www-mmsp.ece.mcgill.ca/Documents/AudioFormats/WAVE/Samples/AFsp/M1F1-int12-AFsp.wav
+    is:
+        nChannels = 2, nBlockAlign = 4, wBitsPerSample = 12
+    http://www-mmsp.ece.mcgill.ca/Documents/AudioFormats/WAVE/Docs/multichaudP.pdf
+    gives an example of:
+        nChannels = 2, nBlockAlign = 8, wBitsPerSample = 20
+    """
+    if is_big_endian:
+        fmt = ">"
+    else:
+        fmt = "<"
+    # Size of the data subchunk in bytes
+    size = struct.unpack(fmt + "I", fid.read(4))[0]
+    # Number of bytes per sample (sample container size)
+    bytes_per_sample = block_align // channels
+    n_samples = size // bytes_per_sample
+    if format_tag == WAVE_FORMAT.PCM:
+        if 1 <= bit_depth <= 8:
+            dtype = "u1"  # WAV of 8-bit integer or less are unsigned
+        elif bytes_per_sample in {3, 5, 6, 7}:
+            # No compatible dtype.  Load as raw bytes for reshaping later.
+            dtype = "V1"
+        elif bit_depth <= 64:
+            # Remaining bit depths can map directly to signed numpy dtypes
+            dtype = f"{fmt}i{bytes_per_sample}"
+        else:
+            raise ValueError(
+                "Unsupported bit depth: the WAV file "
+                f"has {bit_depth}-bit integer data."
+            )
+    elif format_tag == WAVE_FORMAT.IEEE_FLOAT:
+        if bit_depth in {32, 64}:
+            dtype = f"{fmt}f{bytes_per_sample}"
+        else:
+            raise ValueError(
+                "Unsupported bit depth: the WAV file "
+                f"has {bit_depth}-bit floating-point data."
+            )
+    else:
+        _raise_bad_format(format_tag)
+    start = fid.tell()
+    if not mmap:
+        try:
+            count = size if dtype == "V1" else n_samples
+            data = numpy.fromfile(fid, dtype=dtype, count=count)
+        except io.UnsupportedOperation:  # not a C-like file
+            fid.seek(start, 0)  # just in case it seeked, though it shouldn't
+            data = numpy.frombuffer(fid.read(size), dtype=dtype)
+        if dtype == "V1":
+            # Rearrange raw bytes into smallest compatible numpy dtype
+            dt = f"{fmt}i4" if bytes_per_sample == 3 else f"{fmt}i8"
+            a = numpy.zeros(
+                (len(data) // bytes_per_sample, numpy.dtype(dt).itemsize), dtype="V1"
+            )
+            if is_big_endian:
+                a[:, :bytes_per_sample] = data.reshape((-1, bytes_per_sample))
+            else:
+                a[:, -bytes_per_sample:] = data.reshape((-1, bytes_per_sample))
+            data = a.view(dt).reshape(a.shape[:-1])
+    else:
+        if bytes_per_sample in {1, 2, 4, 8}:
+            start = fid.tell()
+            data = numpy.memmap(
+                fid, dtype=dtype, mode="c", offset=start, shape=(n_samples,)
+            )
+            fid.seek(start + size)
+        else:
+            raise ValueError(
+                "mmap=True not compatible with "
+                f"{bytes_per_sample}-byte container size."
+            )
+    _handle_pad_byte(fid, size)
+    if channels > 1:
+        data = data.reshape(-1, channels)
+    return data
+def _skip_unknown_chunk(fid, is_big_endian):
+    if is_big_endian:
+        fmt = ">I"
+    else:
+        fmt = "<I"
+    data = fid.read(4)
+    # call unpack() and seek() only if we have really read data from file
+    # otherwise empty read at the end of the file would trigger
+    # unnecessary exception at unpack() call
+    # in case data equals somehow to 0, there is no need for seek() anyway
+    if data:
+        size = struct.unpack(fmt, data)[0]
+        fid.seek(size, 1)
+        _handle_pad_byte(fid, size)
+def _read_riff_chunk(fid):
+    str1 = fid.read(4)  # File signature
+    if str1 == b"RIFF":
+        is_big_endian = False
+        fmt = "<I"
+    elif str1 == b"RIFX":
+        is_big_endian = True
+        fmt = ">I"
+    else:
+        # There are also .wav files with "FFIR" or "XFIR" signatures?
+        raise ValueError(
+            f"File format {repr(str1)} not understood. Only "
+            "'RIFF' and 'RIFX' supported."
+        )
+    # Size of entire file
+    file_size = struct.unpack(fmt, fid.read(4))[0] + 8
+    str2 = fid.read(4)
+    if str2 != b"WAVE":
+        raise ValueError(f"Not a WAV file. RIFF form type is {repr(str2)}.")
+    return file_size, is_big_endian
+def _handle_pad_byte(fid, size):
+    # "If the chunk size is an odd number of bytes, a pad byte with value zero
+    # is written after ckData." So we need to seek past this after each chunk.
+    if size % 2:
+        fid.seek(1, 1)
+def read(filename, mmap=False):
+    """
+    Open a WAV file.
+    Return the sample rate (in samples/sec) and data from an LPCM WAV file.
+    Parameters
+    ----------
+    filename : string or open file handle
+        Input WAV file.
+    mmap : bool, optional
+        Whether to read data as memory-mapped (default: False).  Not compatible
+        with some bit depths; see Notes.  Only to be used on real files.
+        .. versionadded:: 0.12.0
+    Returns
+    -------
+    rate : int
+        Sample rate of WAV file.
+    data : numpy array
+        Data read from WAV file. Data-type is determined from the file;
+        see Notes.  Data is 1-D for 1-channel WAV, or 2-D of shape
+        (Nsamples, Nchannels) otherwise. If a file-like input without a
+        C-like file descriptor (e.g., :class:`python:io.BytesIO`) is
+        passed, this will not be writeable.
+    Notes
+    -----
+    Common data types: [1]_
+    =====================  ===========  ===========  =============
+         WAV format            Min          Max       NumPy dtype
+    =====================  ===========  ===========  =============
+    32-bit floating-point  -1.0         +1.0         float32
+    32-bit integer PCM     -2147483648  +2147483647  int32
+    24-bit integer PCM     -2147483648  +2147483392  int32
+    16-bit integer PCM     -32768       +32767       int16
+    8-bit integer PCM      0            255          uint8
+    =====================  ===========  ===========  =============
+    WAV files can specify arbitrary bit depth, and this function supports
+    reading any integer PCM depth from 1 to 64 bits.  Data is returned in the
+    smallest compatible numpy int type, in left-justified format.  8-bit and
+    lower is unsigned, while 9-bit and higher is signed.
+    For example, 24-bit data will be stored as int32, with the MSB of the
+    24-bit data stored at the MSB of the int32, and typically the least
+    significant byte is 0x00.  (However, if a file actually contains data past
+    its specified bit depth, those bits will be read and output, too. [2]_)
+    This bit justification and sign matches WAV's native internal format, which
+    allows memory mapping of WAV files that use 1, 2, 4, or 8 bytes per sample
+    (so 24-bit files cannot be memory-mapped, but 32-bit can).
+    IEEE float PCM in 32- or 64-bit format is supported, with or without mmap.
+    Values exceeding [-1, +1] are not clipped.
+    Non-linear PCM (mu-law, A-law) is not supported.
+    References
+    ----------
+    .. [1] IBM Corporation and Microsoft Corporation, "Multimedia Programming
+       Interface and Data Specifications 1.0", section "Data Format of the
+       Samples", August 1991
+       http://www.tactilemedia.com/info/MCI_Control_Info.html
+    .. [2] Adobe Systems Incorporated, "Adobe Audition 3 User Guide", section
+       "Audio file formats: 24-bit Packed Int (type 1, 20-bit)", 2007
+    Examples
+    --------
+    >>> from os.path import dirname, join as pjoin
+    >>> from scipy.io import wavfile
+    >>> import scipy.io
+    Get the filename for an example .wav file from the tests/data directory.
+    >>> data_dir = pjoin(dirname(scipy.io.__file__), 'tests', 'data')
+    >>> wav_fname = pjoin(data_dir, 'test-44100Hz-2ch-32bit-float-be.wav')
+    Load the .wav file contents.
+    >>> samplerate, data = wavfile.read(wav_fname)
+    >>> print(f"number of channels = {data.shape[1]}")
+    number of channels = 2
+    >>> length = data.shape[0] / samplerate
+    >>> print(f"length = {length}s")
+    length = 0.01s
+    Plot the waveform.
+    >>> import matplotlib.pyplot as plt
+    >>> import numpy as np
+    >>> time = np.linspace(0., length, data.shape[0])
+    >>> plt.plot(time, data[:, 0], label="Left channel")
+    >>> plt.plot(time, data[:, 1], label="Right channel")
+    >>> plt.legend()
+    >>> plt.xlabel("Time [s]")
+    >>> plt.ylabel("Amplitude")
+    >>> plt.show()
+    """
+    if hasattr(filename, "read"):
+        fid = filename
+        mmap = False
+    else:
+        # pylint: disable=consider-using-with
+        fid = open(filename, "rb")
+    try:
+        file_size, is_big_endian = _read_riff_chunk(fid)
+        fmt_chunk_received = False
+        data_chunk_received = False
+        while fid.tell() < file_size:
+            # read the next chunk
+            chunk_id = fid.read(4)
+            if not chunk_id:
+                if data_chunk_received:
+                    # End of file but data successfully read
+                    warnings.warn(
+                        f"Reached EOF prematurely; finished at {fid.tell()} bytes, "
+                        "expected {file_size} bytes from header.",
+                        WavFileWarning,
+                        stacklevel=2,
+                    )
+                    break
+                raise ValueError("Unexpected end of file.")
+            if len(chunk_id) < 4:
+                msg = f"Incomplete chunk ID: {repr(chunk_id)}"
+                # If we have the data, ignore the broken chunk
+                if fmt_chunk_received and data_chunk_received:
+                    warnings.warn(msg + ", ignoring it.", WavFileWarning, stacklevel=2)
+                else:
+                    raise ValueError(msg)
+            if chunk_id == b"fmt ":
+                fmt_chunk_received = True
+                fmt_chunk = _read_fmt_chunk(fid, is_big_endian)
+                format_tag, channels, fs = fmt_chunk[1:4]
+                bit_depth = fmt_chunk[6]
+                block_align = fmt_chunk[5]
+            elif chunk_id == b"fact":
+                _skip_unknown_chunk(fid, is_big_endian)
+            elif chunk_id == b"data":
+                data_chunk_received = True
+                if not fmt_chunk_received:
+                    raise ValueError("No fmt chunk before data")
+                data = _read_data_chunk(
+                    fid,
+                    format_tag,
+                    channels,
+                    bit_depth,
+                    is_big_endian,
+                    block_align,
+                    mmap,
+                )
+            elif chunk_id == b"LIST":
+                # Someday this could be handled properly but for now skip it
+                _skip_unknown_chunk(fid, is_big_endian)
+            elif chunk_id in {b"JUNK", b"Fake"}:
+                # Skip alignment chunks without warning
+                _skip_unknown_chunk(fid, is_big_endian)
+            else:
+                warnings.warn(
+                    "Chunk (non-data) not understood, skipping it.",
+                    WavFileWarning,
+                    stacklevel=2,
+                )
+                _skip_unknown_chunk(fid, is_big_endian)
+    finally:
+        if not hasattr(filename, "read"):
+            fid.close()
+        else:
+            fid.seek(0)
+    return fs, data
+def write(filename, rate, data):
+    """
+    Write a NumPy array as a WAV file.
+    Parameters
+    ----------
+    filename : string or open file handle
+        Output wav file.
+    rate : int
+        The sample rate (in samples/sec).
+    data : ndarray
+        A 1-D or 2-D NumPy array of either integer or float data-type.
+    Notes
+    -----
+    * Writes a simple uncompressed WAV file.
+    * To write multiple-channels, use a 2-D array of shape
+      (Nsamples, Nchannels).
+    * The bits-per-sample and PCM/float will be determined by the data-type.
+    Common data types: [1]_
+    =====================  ===========  ===========  =============
+         WAV format            Min          Max       NumPy dtype
+    =====================  ===========  ===========  =============
+    32-bit floating-point  -1.0         +1.0         float32
+    32-bit PCM             -2147483648  +2147483647  int32
+    16-bit PCM             -32768       +32767       int16
+    8-bit PCM              0            255          uint8
+    =====================  ===========  ===========  =============
+    Note that 8-bit PCM is unsigned.
+    References
+    ----------
+    .. [1] IBM Corporation and Microsoft Corporation, "Multimedia Programming
+       Interface and Data Specifications 1.0", section "Data Format of the
+       Samples", August 1991
+       http://www.tactilemedia.com/info/MCI_Control_Info.html
+    Examples
+    --------
+    Create a 100Hz sine wave, sampled at 44100Hz.
+    Write to 16-bit PCM, Mono.
+    >>> from scipy.io.wavfile import write
+    >>> samplerate = 44100; fs = 100
+    >>> t = np.linspace(0., 1., samplerate)
+    >>> amplitude = np.iinfo(np.int16).max
+    >>> data = amplitude * np.sin(2. * np.pi * fs * t)
+    >>> write("example.wav", samplerate, data.astype(np.int16))
+    """
+    if hasattr(filename, "write"):
+        fid = filename
+    else:
+        # pylint: disable=consider-using-with
+        fid = open(filename, "wb")
+    fs = rate
+    try:
+        dkind = data.dtype.kind
+        if not (
+            dkind == "i" or dkind == "f" or (dkind == "u" and data.dtype.itemsize == 1)
+        ):
+            raise ValueError(f"Unsupported data type '{data.dtype}'")
+        header_data = b""
+        header_data += b"RIFF"
+        header_data += b"\x00\x00\x00\x00"
+        header_data += b"WAVE"
+        # fmt chunk
+        header_data += b"fmt "
+        if dkind == "f":
+            format_tag = WAVE_FORMAT.IEEE_FLOAT
+        else:
+            format_tag = WAVE_FORMAT.PCM
+        if data.ndim == 1:
+            channels = 1
+        else:
+            channels = data.shape[1]
+        bit_depth = data.dtype.itemsize * 8
+        bytes_per_second = fs * (bit_depth // 8) * channels
+        block_align = channels * (bit_depth // 8)
+        fmt_chunk_data = struct.pack(
+            "<HHIIHH",
+            format_tag,
+            channels,
+            fs,
+            bytes_per_second,
+            block_align,
+            bit_depth,
+        )
+        if not (dkind in ("i", "u")):
+            # add cbSize field for non-PCM files
+            fmt_chunk_data += b"\x00\x00"
+        header_data += struct.pack("<I", len(fmt_chunk_data))
+        header_data += fmt_chunk_data
+        # fact chunk (non-PCM files)
+        if not (dkind in ("i", "u")):
+            header_data += b"fact"
+            header_data += struct.pack("<II", 4, data.shape[0])
+        # check data size (needs to be immediately before the data chunk)
+        if ((len(header_data) - 4 - 4) + (4 + 4 + data.nbytes)) > 0xFFFFFFFF:
+            raise ValueError("Data exceeds wave file size limit")
+        fid.write(header_data)
+        # data chunk
+        fid.write(b"data")
+        fid.write(struct.pack("<I", data.nbytes))
+        if data.dtype.byteorder == ">" or (
+            data.dtype.byteorder == "=" and sys.byteorder == "big"
+        ):
+            data = data.byteswap()
+        _array_tofile(fid, data)
+        # Determine file size and place it in correct
+        #  position at start of the file.
+        size = fid.tell()
+        fid.seek(4)
+        fid.write(struct.pack("<I", size - 8))
+    finally:
+        if not hasattr(filename, "write"):
+            fid.close()
+        else:
+            fid.seek(0)
+def _array_tofile(fid, data):
+    # ravel gives a c-contiguous buffer
+    fid.write(data.ravel().view("b").data)