hunterHearsPy 1.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hunterHearsPy/__init__.py +36 -0
- hunterHearsPy/amplitude.py +170 -0
- hunterHearsPy/autoRevert.py +75 -0
- hunterHearsPy/clippingArrays.py +143 -0
- hunterHearsPy/ioAudio.py +663 -0
- hunterHearsPy/py.typed +0 -0
- hunterHearsPy/theTypes.py +118 -0
- hunterHearsPy/windowingFunctions.py +211 -0
- hunterHearsPy/windowingFunctionsTensor.py +168 -0
- hunterhearspy-1.0.3.dist-info/METADATA +115 -0
- hunterhearspy-1.0.3.dist-info/RECORD +13 -0
- hunterhearspy-1.0.3.dist-info/WHEEL +4 -0
- hunterhearspy-1.0.3.dist-info/licenses/LICENSE +407 -0
hunterHearsPy/ioAudio.py
ADDED
|
@@ -0,0 +1,663 @@
|
|
|
1
|
+
"""Read, write, resample, and transform audio waveforms between time and frequency domains.
|
|
2
|
+
|
|
3
|
+
You can use this module to load audio files into NumPy arrays, resample waveforms, convert
|
|
4
|
+
between waveforms and spectrograms using the Short-Time Fourier Transform, and write
|
|
5
|
+
waveforms back to WAV files. All audio is normalized to stereo, 32-bit float `Waveform`
|
|
6
|
+
arrays shaped `(channels, samples)`. All spectrograms are complex 64-bit float
|
|
7
|
+
`Spectrogram` arrays shaped `(channels, frequencies, frames)`.
|
|
8
|
+
|
|
9
|
+
Contents
|
|
10
|
+
--------
|
|
11
|
+
Functions
|
|
12
|
+
getWaveformMetadata
|
|
13
|
+
Retrieve metadata for a collection of audio waveform files.
|
|
14
|
+
loadSpectrograms
|
|
15
|
+
Load spectrograms from a list of audio files.
|
|
16
|
+
loadWaveforms
|
|
17
|
+
Load a list of audio files into a single stacked NumPy array.
|
|
18
|
+
readAudioFile
|
|
19
|
+
Read an audio file and return stereo waveform data as a NumPy array.
|
|
20
|
+
resampleWaveform
|
|
21
|
+
Resample a waveform array to a target sample rate.
|
|
22
|
+
spectrogramToWAV
|
|
23
|
+
Write a complex spectrogram to a WAV file.
|
|
24
|
+
stft
|
|
25
|
+
Perform Short-Time Fourier Transform or its inverse on waveform or spectrogram data.
|
|
26
|
+
waveformSpectrogramWaveform
|
|
27
|
+
Decorate a spectrogram-processing callable to accept and return waveforms.
|
|
28
|
+
writeWAV
|
|
29
|
+
Write a waveform array to a WAV file.
|
|
30
|
+
|
|
31
|
+
"""
|
|
32
|
+
from __future__ import annotations
|
|
33
|
+
|
|
34
|
+
from hunterHearsPy import (
|
|
35
|
+
ArraySpectrograms, ArrayWaveforms, halfsine, ParametersShortTimeFFT, ParametersSTFT, ParametersUniversal, Spectrogram, Waveform,
|
|
36
|
+
WaveformMetadata, WindowingFunction)
|
|
37
|
+
from hunterMakesPy.filesystemToolkit import makeDirectorySafely
|
|
38
|
+
from math import ceil as ceiling, log2 as log_base2
|
|
39
|
+
from multiprocessing import set_start_method as multiprocessing_set_start_method
|
|
40
|
+
from numpy import complex64, dtype, float32, floating, ndarray
|
|
41
|
+
from scipy.signal import ShortTimeFFT
|
|
42
|
+
from tqdm.auto import tqdm
|
|
43
|
+
from typing import Any, BinaryIO, cast, Literal, overload, TYPE_CHECKING
|
|
44
|
+
import numpy
|
|
45
|
+
import resampy
|
|
46
|
+
import soundfile
|
|
47
|
+
|
|
48
|
+
if TYPE_CHECKING:
|
|
49
|
+
from collections.abc import Callable, Sequence
|
|
50
|
+
from os import PathLike
|
|
51
|
+
|
|
52
|
+
if __name__ == '__main__':
|
|
53
|
+
multiprocessing_set_start_method('spawn')
|
|
54
|
+
|
|
55
|
+
# Design coordinated, user-overridable universal parameter defaults for audio functions
|
|
56
|
+
# https://github.com/hunterhogan/hunterHearsPy/issues/5
|
|
57
|
+
universalDtypeWaveform = float32
|
|
58
|
+
"""Module-wide NumPy dtype for waveform arrays; controls memory layout and numeric precision."""
|
|
59
|
+
universalDtypeSpectrogram = complex64
|
|
60
|
+
"""Module-wide NumPy dtype for spectrogram arrays; complex 64-bit float balances precision and memory."""
|
|
61
|
+
parametersShortTimeFFTUniversal: ParametersShortTimeFFT = {'fft_mode': 'onesided'}
|
|
62
|
+
"""Module-wide keyword parameters passed to `scipy.signal.ShortTimeFFT` on construction."""
|
|
63
|
+
parametersSTFTUniversal: ParametersSTFT = {'padding': 'even', 'axis': -1}
|
|
64
|
+
"""Module-wide keyword parameters passed to `ShortTimeFFT.stft` and `ShortTimeFFT.istft` on each call."""
|
|
65
|
+
|
|
66
|
+
lengthWindowingFunctionDEFAULT = 1024
|
|
67
|
+
"""Default length in samples of the windowing function used when no override is provided."""
|
|
68
|
+
windowingFunctionCallableDEFAULT = halfsine
|
|
69
|
+
"""Default callable that constructs a `WindowingFunction` array from a length in samples."""
|
|
70
|
+
parametersDEFAULT = ParametersUniversal(
|
|
71
|
+
lengthFFT=2048,
|
|
72
|
+
lengthHop=512,
|
|
73
|
+
lengthWindowingFunction=lengthWindowingFunctionDEFAULT,
|
|
74
|
+
sampleRate=44100,
|
|
75
|
+
windowingFunction=windowingFunctionCallableDEFAULT(lengthWindowingFunctionDEFAULT),
|
|
76
|
+
)
|
|
77
|
+
"""Factory `ParametersUniversal` applied when `setParametersUniversal` is `None`."""
|
|
78
|
+
|
|
79
|
+
setParametersUniversal = None
|
|
80
|
+
"""Override `ParametersUniversal` for all module functions; when `None`, `parametersDEFAULT` is used."""
|
|
81
|
+
|
|
82
|
+
windowingFunctionCallableUniversal = windowingFunctionCallableDEFAULT
|
|
83
|
+
"""Active callable for constructing windowing functions; overridable at module level."""
|
|
84
|
+
if not setParametersUniversal:
|
|
85
|
+
parametersUniversal: ParametersUniversal = parametersDEFAULT
|
|
86
|
+
"""Active `ParametersUniversal` used by all functions when no per-call override is provided."""
|
|
87
|
+
|
|
88
|
+
def getWaveformMetadata(listPathFilenames: Sequence[str | PathLike[str]], sampleRate: float) -> dict[int, WaveformMetadata]:
|
|
89
|
+
"""Retrieve metadata for a collection of audio waveform files.
|
|
90
|
+
|
|
91
|
+
You can use this function to inspect the length of each audio file before loading
|
|
92
|
+
waveforms into memory. `getWaveformMetadata` reads each file at `sampleRate`, measures
|
|
93
|
+
the sample count, and returns one `WaveformMetadata` [1] record per file indexed by
|
|
94
|
+
position in `listPathFilenames`. Each record's `samplesLeading` and `samplesTrailing`
|
|
95
|
+
fields are initialized to `0`; callers may adjust them before passing the result to
|
|
96
|
+
downstream loaders such as `loadWaveforms` or `loadSpectrograms`.
|
|
97
|
+
|
|
98
|
+
Parameters
|
|
99
|
+
----------
|
|
100
|
+
listPathFilenames : Sequence[str | PathLike[str]]
|
|
101
|
+
Sequence of paths to audio files.
|
|
102
|
+
sampleRate : float
|
|
103
|
+
Target sample rate used when reading each file to measure its length in samples.
|
|
104
|
+
|
|
105
|
+
Returns
|
|
106
|
+
-------
|
|
107
|
+
dictionaryWaveformMetadata : dict[int, WaveformMetadata]
|
|
108
|
+
Dictionary mapping each integer index to a `WaveformMetadata` [1] record. Each
|
|
109
|
+
record contains `pathFilename` (string path), `lengthWaveform` (sample count at
|
|
110
|
+
`sampleRate`), `samplesLeading` (initialized to `0`), and `samplesTrailing`
|
|
111
|
+
(initialized to `0`).
|
|
112
|
+
|
|
113
|
+
File Reading Progress
|
|
114
|
+
---------------------
|
|
115
|
+
`tqdm` [2] displays a progress bar in the terminal while `getWaveformMetadata` reads
|
|
116
|
+
each file in `listPathFilenames`.
|
|
117
|
+
|
|
118
|
+
References
|
|
119
|
+
----------
|
|
120
|
+
[1] `WaveformMetadata`
|
|
121
|
+
|
|
122
|
+
[2] tqdm — fast, extensible progress bar for Python and CLI
|
|
123
|
+
https://tqdm.github.io/
|
|
124
|
+
|
|
125
|
+
"""
|
|
126
|
+
axisTime: int = -1
|
|
127
|
+
dictionaryWaveformMetadata: dict[int, WaveformMetadata] = {}
|
|
128
|
+
for index, pathFilename in enumerate(tqdm(listPathFilenames)):
|
|
129
|
+
lengthWaveform = readAudioFile(pathFilename, sampleRate).shape[axisTime]
|
|
130
|
+
dictionaryWaveformMetadata[index] = WaveformMetadata(
|
|
131
|
+
pathFilename=str(pathFilename),
|
|
132
|
+
lengthWaveform=lengthWaveform,
|
|
133
|
+
samplesLeading=0,
|
|
134
|
+
samplesTrailing=0,
|
|
135
|
+
)
|
|
136
|
+
return dictionaryWaveformMetadata
|
|
137
|
+
|
|
138
|
+
def readAudioFile(pathFilename: str | PathLike[Any] | BinaryIO, sampleRate: float | None = None) -> Waveform:
|
|
139
|
+
"""Read an audio file and return stereo waveform data as a NumPy array.
|
|
140
|
+
|
|
141
|
+
You can use this function to load any audio file that `soundfile` [1] supports. The returned
|
|
142
|
+
`Waveform` [2] is always shaped `(channels, samples)` where `channels` is `2`. When the source
|
|
143
|
+
file is mono, `readAudioFile` duplicates the single channel to produce a stereo array. When
|
|
144
|
+
`sampleRate` differs from the file's native sample rate, `readAudioFile` resamples using
|
|
145
|
+
`resampleWaveform`.
|
|
146
|
+
|
|
147
|
+
Parameters
|
|
148
|
+
----------
|
|
149
|
+
pathFilename : str | PathLike[Any] | BinaryIO
|
|
150
|
+
Path to the audio file or a binary stream compatible with `soundfile` [1].
|
|
151
|
+
sampleRate : float | None = 44100
|
|
152
|
+
Target sample rate of the returned `Waveform` [2] in Hz. Defaults to `44100` when `None`.
|
|
153
|
+
|
|
154
|
+
Returns
|
|
155
|
+
-------
|
|
156
|
+
waveform : Waveform
|
|
157
|
+
Stereo audio data shaped `(2, samples)` as `float32`.
|
|
158
|
+
|
|
159
|
+
Raises
|
|
160
|
+
------
|
|
161
|
+
FileNotFoundError
|
|
162
|
+
When `pathFilename` does not exist on the filesystem.
|
|
163
|
+
soundfile.LibsndfileError
|
|
164
|
+
When `pathFilename` is an unsupported or corrupted audio format.
|
|
165
|
+
|
|
166
|
+
References
|
|
167
|
+
----------
|
|
168
|
+
[1] soundfile — audio library based on libsndfile
|
|
169
|
+
https://python-soundfile.readthedocs.io/en/0.12.1/
|
|
170
|
+
|
|
171
|
+
[2] `Waveform`
|
|
172
|
+
|
|
173
|
+
"""
|
|
174
|
+
if sampleRate is None:
|
|
175
|
+
sampleRate = parametersUniversal['sampleRate']
|
|
176
|
+
try:
|
|
177
|
+
with soundfile.SoundFile(str(pathFilename)) as readSoundFile:
|
|
178
|
+
sampleRateSource: int = readSoundFile.samplerate
|
|
179
|
+
waveform: Waveform = readSoundFile.read(dtype='float32', always_2d=True).astype(universalDtypeWaveform)
|
|
180
|
+
except soundfile.LibsndfileError as ERRORmessage:
|
|
181
|
+
if 'System error' in str(ERRORmessage):
|
|
182
|
+
message = f"File not found: {pathFilename}"
|
|
183
|
+
raise FileNotFoundError(message) from ERRORmessage
|
|
184
|
+
else: # noqa: RET506
|
|
185
|
+
raise
|
|
186
|
+
# GitHub #3 Implement semantic axes for audio data
|
|
187
|
+
axisTime = 0
|
|
188
|
+
axisChannels = 1
|
|
189
|
+
waveform = cast('Waveform', resampleWaveform(waveform, sampleRateDesired=sampleRate, sampleRateSource=sampleRateSource, axisTime=axisTime))
|
|
190
|
+
# TODO In my audio ecosystem, must I force a minimum of 2 channels, or can I merely force an axis for time, even if the axis is length=1?
|
|
191
|
+
if waveform.shape[axisChannels] == 1:
|
|
192
|
+
waveform = cast('Waveform', numpy.repeat(waveform, 2, axis=axisChannels))
|
|
193
|
+
return cast('Waveform', numpy.transpose(waveform, axes=(axisChannels, axisTime)))
|
|
194
|
+
|
|
195
|
+
def resampleWaveform(waveform: ndarray[tuple[int, ...], dtype[floating[Any]]], sampleRateDesired: float, sampleRateSource: float, axisTime: int = -1) -> ndarray[tuple[int, ...], dtype[floating[Any]]]:
|
|
196
|
+
"""Resample a waveform array to a target sample rate using `resampy` [1].
|
|
197
|
+
|
|
198
|
+
You can use this function to change the sample rate of any floating-point NumPy array [2].
|
|
199
|
+
`resampleWaveform` passes `waveform` to `resampy.resample` [1] along the `axisTime` axis.
|
|
200
|
+
When `sampleRateSource` equals `sampleRateDesired`, `resampleWaveform` returns `waveform`
|
|
201
|
+
unchanged without invoking `resampy`.
|
|
202
|
+
|
|
203
|
+
Parameters
|
|
204
|
+
----------
|
|
205
|
+
waveform : ndarray[tuple[int, ...], dtype[floating[Any]]]
|
|
206
|
+
Input audio data as any floating-point NumPy array [2].
|
|
207
|
+
sampleRateDesired : float
|
|
208
|
+
Target sample rate in Hz.
|
|
209
|
+
sampleRateSource : float
|
|
210
|
+
Original sample rate of `waveform` in Hz.
|
|
211
|
+
axisTime : int = -1
|
|
212
|
+
Axis along which resampling is performed. Negative values index from the last axis.
|
|
213
|
+
|
|
214
|
+
Returns
|
|
215
|
+
-------
|
|
216
|
+
waveformResampled : ndarray[tuple[int, ...], dtype[floating[Any]]]
|
|
217
|
+
Waveform resampled to `sampleRateDesired`. Returns `waveform` unchanged when
|
|
218
|
+
`sampleRateSource` equals `sampleRateDesired`.
|
|
219
|
+
|
|
220
|
+
Sample Rate Rounding
|
|
221
|
+
--------------------
|
|
222
|
+
Both `sampleRateDesired` and `sampleRateSource` are rounded to the nearest integer
|
|
223
|
+
before passing to `resampy.resample` [1]. `resampy` expects integer sample rates.
|
|
224
|
+
|
|
225
|
+
References
|
|
226
|
+
----------
|
|
227
|
+
[1] resampy — efficient signal resampling
|
|
228
|
+
https://resampy.readthedocs.io/en/stable/
|
|
229
|
+
|
|
230
|
+
[2] numpy.ndarray
|
|
231
|
+
https://numpy.org/doc/stable/reference/index.html
|
|
232
|
+
|
|
233
|
+
"""
|
|
234
|
+
if sampleRateSource != sampleRateDesired:
|
|
235
|
+
sampleRateDesired = round(sampleRateDesired)
|
|
236
|
+
sampleRateSource = round(sampleRateSource)
|
|
237
|
+
waveformResampled: ndarray[tuple[int, ...], dtype[floating[Any]]] = resampy.resample(waveform, sampleRateSource, sampleRateDesired, axis=axisTime)
|
|
238
|
+
return waveformResampled
|
|
239
|
+
return waveform
|
|
240
|
+
|
|
241
|
+
def loadWaveforms(listPathFilenames: Sequence[str | PathLike[str]], sampleRateTarget: float | None = None) -> ArrayWaveforms:
|
|
242
|
+
"""Load a list of audio files into a single stacked NumPy array.
|
|
243
|
+
|
|
244
|
+
You can use this function to batch-load multiple audio files into one `ArrayWaveforms` [1]
|
|
245
|
+
array. All waveforms are resampled to `sampleRateTarget`, converted to stereo when
|
|
246
|
+
necessary, and zero-padded on the trailing edge to match the length of the longest
|
|
247
|
+
waveform. The resulting array is shaped `(channels, lengthWaveformMaximum, countFiles)`.
|
|
248
|
+
|
|
249
|
+
Parameters
|
|
250
|
+
----------
|
|
251
|
+
listPathFilenames : Sequence[str | PathLike[str]]
|
|
252
|
+
List of paths to audio files.
|
|
253
|
+
sampleRateTarget : float | None = None
|
|
254
|
+
Target sample rate in Hz. Defaults to `44100` when `None`.
|
|
255
|
+
|
|
256
|
+
Returns
|
|
257
|
+
-------
|
|
258
|
+
arrayWaveforms : ArrayWaveforms
|
|
259
|
+
Stacked waveform data shaped `(2, lengthWaveformMaximum, countFiles)` as `float32`,
|
|
260
|
+
where `lengthWaveformMaximum` is the maximum sample count across all files at
|
|
261
|
+
`sampleRateTarget`.
|
|
262
|
+
|
|
263
|
+
Zero-Padding
|
|
264
|
+
------------
|
|
265
|
+
Waveforms shorter than `lengthWaveformMaximum` are zero-padded on the trailing edge.
|
|
266
|
+
Leading padding is applied when `WaveformMetadata.samplesLeading` [2] is non-zero;
|
|
267
|
+
`getWaveformMetadata` initializes `samplesLeading` to `0` by default.
|
|
268
|
+
|
|
269
|
+
References
|
|
270
|
+
----------
|
|
271
|
+
[1] `ArrayWaveforms`
|
|
272
|
+
|
|
273
|
+
[2] `WaveformMetadata`
|
|
274
|
+
"""
|
|
275
|
+
if sampleRateTarget is None:
|
|
276
|
+
sampleRateTarget = parametersUniversal['sampleRate']
|
|
277
|
+
|
|
278
|
+
# GitHub #3 Implement semantic axes for audio data
|
|
279
|
+
axisOrderMapping: dict[str, int] = {'indexingAxis': -1, 'axisTime': -2, 'axisChannels': 0}
|
|
280
|
+
axesSizes: dict[str, int] = dict.fromkeys(axisOrderMapping.keys(), 1)
|
|
281
|
+
countAxes: int = len(axisOrderMapping)
|
|
282
|
+
listShapeIndexToSize: list[int] = [9001] * countAxes
|
|
283
|
+
|
|
284
|
+
countWaveforms: int = len(listPathFilenames)
|
|
285
|
+
axesSizes['indexingAxis'] = countWaveforms
|
|
286
|
+
countChannels: int = 2
|
|
287
|
+
axesSizes['axisChannels'] = countChannels
|
|
288
|
+
|
|
289
|
+
axisTime: int = -1 # pyright: ignore[reportUnusedVariable] # noqa: F841
|
|
290
|
+
dictionaryWaveformMetadata: dict[int, WaveformMetadata] = getWaveformMetadata(listPathFilenames, sampleRateTarget)
|
|
291
|
+
samplesTotalMaximum = max(entry['lengthWaveform'] + entry['samplesLeading'] + entry['samplesTrailing'] for entry in dictionaryWaveformMetadata.values())
|
|
292
|
+
axesSizes['axisTime'] = samplesTotalMaximum
|
|
293
|
+
|
|
294
|
+
for keyName, axisSize in axesSizes.items():
|
|
295
|
+
axisNormalized: int = (axisOrderMapping[keyName] + countAxes) % countAxes
|
|
296
|
+
listShapeIndexToSize[axisNormalized] = axisSize
|
|
297
|
+
tupleShapeArray: tuple[int, int, int] = cast('tuple[int, int, int]', tuple(listShapeIndexToSize))
|
|
298
|
+
|
|
299
|
+
arrayWaveforms: ArrayWaveforms = numpy.zeros(tupleShapeArray, dtype=universalDtypeWaveform)
|
|
300
|
+
|
|
301
|
+
for index, metadata in dictionaryWaveformMetadata.items():
|
|
302
|
+
waveform: Waveform = readAudioFile(metadata['pathFilename'], sampleRateTarget)
|
|
303
|
+
samplesTrailing = metadata['lengthWaveform'] + metadata['samplesLeading'] - samplesTotalMaximum
|
|
304
|
+
if samplesTrailing == 0:
|
|
305
|
+
samplesTrailing = None
|
|
306
|
+
# GitHub #4 Add padding logic to `loadWaveforms` and `loadSpectrograms`
|
|
307
|
+
arrayWaveforms[:, metadata['samplesLeading']:samplesTrailing, index] = waveform
|
|
308
|
+
|
|
309
|
+
return arrayWaveforms
|
|
310
|
+
|
|
311
|
+
def writeWAV(pathFilename: str | PathLike[Any] | BinaryIO, waveform: Waveform, sampleRate: float | None = None) -> None:
|
|
312
|
+
"""Write a waveform array to a WAV file.
|
|
313
|
+
|
|
314
|
+
You can use this function to save a `Waveform` [1] or any compatible NumPy array to a
|
|
315
|
+
32-bit float WAV file. `writeWAV` creates any missing parent directories before writing
|
|
316
|
+
using `makeDirsSafely` from `hunterMakesPy` [2].
|
|
317
|
+
|
|
318
|
+
Parameters
|
|
319
|
+
----------
|
|
320
|
+
pathFilename : str | PathLike[Any] | BinaryIO
|
|
321
|
+
Destination path for the WAV file, or a binary stream.
|
|
322
|
+
waveform : Waveform
|
|
323
|
+
Audio data shaped `(channels, samples)` or `(samples,)`.
|
|
324
|
+
sampleRate : float | None = None
|
|
325
|
+
Sample rate of `waveform` in Hz. Defaults to `44100` when `None`.
|
|
326
|
+
|
|
327
|
+
File Overwrite and Format
|
|
328
|
+
-------------------------
|
|
329
|
+
`writeWAV` overwrites any existing file at `pathFilename` without prompting. All files
|
|
330
|
+
are written as 32-bit float WAV using `soundfile.write` [3].
|
|
331
|
+
|
|
332
|
+
References
|
|
333
|
+
----------
|
|
334
|
+
[1] `Waveform`
|
|
335
|
+
|
|
336
|
+
[2] hunterMakesPy — makeDirsSafely
|
|
337
|
+
https://context7.com/hunterhogan/huntermakespy
|
|
338
|
+
|
|
339
|
+
[3] soundfile — audio library based on libsndfile
|
|
340
|
+
https://python-soundfile.readthedocs.io/en/0.12.1/
|
|
341
|
+
|
|
342
|
+
"""
|
|
343
|
+
if sampleRate is None:
|
|
344
|
+
sampleRate = parametersUniversal['sampleRate']
|
|
345
|
+
makeDirectorySafely(pathFilename)
|
|
346
|
+
soundfile.write(file=pathFilename, data=waveform.T, samplerate=int(sampleRate), subtype='FLOAT', format='WAV')
|
|
347
|
+
|
|
348
|
+
@overload # stft 1 ndarray
|
|
349
|
+
def stft(arrayTarget: Waveform, *, sampleRate: float | None = None, lengthHop: int | None = None, windowingFunction: WindowingFunction | None = None, lengthWindowingFunction: int | None = None, lengthFFT: int | None = None, inverse: Literal[False] = False, lengthWaveform: None = None, indexingAxis: None = None) -> Spectrogram: ...
|
|
350
|
+
|
|
351
|
+
@overload # stft many ndarray
|
|
352
|
+
def stft(arrayTarget: ArrayWaveforms, *, sampleRate: float | None = None, lengthHop: int | None = None, windowingFunction: WindowingFunction | None = None, lengthWindowingFunction: int | None = None, lengthFFT: int | None = None, inverse: Literal[False] = False, lengthWaveform: None = None, indexingAxis: int = -1) -> ArraySpectrograms: ...
|
|
353
|
+
|
|
354
|
+
@overload # istft 1 ndarray
|
|
355
|
+
def stft(arrayTarget: Spectrogram, *, sampleRate: float | None = None, lengthHop: int | None = None, windowingFunction: WindowingFunction | None = None, lengthWindowingFunction: int | None = None, lengthFFT: int | None = None, inverse: Literal[True], lengthWaveform: int, indexingAxis: None = None) -> Waveform: ...
|
|
356
|
+
|
|
357
|
+
@overload # istft many ndarray
|
|
358
|
+
def stft(arrayTarget: ArraySpectrograms, *, sampleRate: float | None = None, lengthHop: int | None = None, windowingFunction: WindowingFunction | None = None, lengthWindowingFunction: int | None = None, lengthFFT: int | None = None, inverse: Literal[True], lengthWaveform: int, indexingAxis: int = -1) -> ArrayWaveforms: ...
|
|
359
|
+
|
|
360
|
+
def stft(arrayTarget: Waveform | ArrayWaveforms | Spectrogram | ArraySpectrograms
|
|
361
|
+
, *
|
|
362
|
+
, sampleRate: float | None = None
|
|
363
|
+
, lengthHop: int | None = None
|
|
364
|
+
, windowingFunction: WindowingFunction | None = None
|
|
365
|
+
, lengthWindowingFunction: int | None = None
|
|
366
|
+
, lengthFFT: int | None = None
|
|
367
|
+
, inverse: bool = False
|
|
368
|
+
, lengthWaveform: int | None = None
|
|
369
|
+
, indexingAxis: int | None = None
|
|
370
|
+
) -> Waveform | ArrayWaveforms | Spectrogram | ArraySpectrograms:
|
|
371
|
+
"""Perform Short-Time Fourier Transform or its inverse on waveform or spectrogram data.
|
|
372
|
+
|
|
373
|
+
You can use this function to convert a `Waveform` [1] to a `Spectrogram` [2] or reverse
|
|
374
|
+
the transformation with `inverse=True`. Pass `ArrayWaveforms` [3] or
|
|
375
|
+
`ArraySpectrograms` [4] with an `indexingAxis` to transform a batch of signals at once.
|
|
376
|
+
All transform behavior is governed by `scipy.signal.ShortTimeFFT` [5].
|
|
377
|
+
|
|
378
|
+
Four overloads determine the return type from `arrayTarget` and `inverse`:
|
|
379
|
+
- `Waveform` [1] → `Spectrogram` [2] (single forward transform)
|
|
380
|
+
- `ArrayWaveforms` [3] → `ArraySpectrograms` [4] (batch forward transform)
|
|
381
|
+
- `Spectrogram` [2] → `Waveform` [1] (single inverse transform)
|
|
382
|
+
- `ArraySpectrograms` [4] → `ArrayWaveforms` [3] (batch inverse transform)
|
|
383
|
+
|
|
384
|
+
Parameters
|
|
385
|
+
----------
|
|
386
|
+
arrayTarget : Waveform | ArrayWaveforms | Spectrogram | ArraySpectrograms
|
|
387
|
+
Input array for transformation.
|
|
388
|
+
sampleRate : float | None = None
|
|
389
|
+
Sample rate of the signal in Hz. Defaults to `44100` when `None`.
|
|
390
|
+
lengthHop : int | None = None
|
|
391
|
+
Number of samples between successive analysis frames. Defaults to `512` when `None`.
|
|
392
|
+
windowingFunction : WindowingFunction | None = None
|
|
393
|
+
Windowing function array [6]. When `None`, `windowingFunctionCallableUniversal` is
|
|
394
|
+
called with `lengthWindowingFunction`, or the universal default is used.
|
|
395
|
+
lengthWindowingFunction : int | None = None
|
|
396
|
+
Length of the windowing function in samples. Used only when `windowingFunction` is
|
|
397
|
+
`None`. Defaults to `1024` when `None`.
|
|
398
|
+
lengthFFT : int | None = None
|
|
399
|
+
Length of the FFT in samples. Defaults to `2048` or the next power of two ≥
|
|
400
|
+
`lengthWindowingFunction` when `None`.
|
|
401
|
+
inverse : bool = False
|
|
402
|
+
When `True`, perform inverse STFT. When `False`, perform forward STFT.
|
|
403
|
+
lengthWaveform : int | None = None
|
|
404
|
+
Required output length in samples for inverse transform. Must be provided when
|
|
405
|
+
`inverse` is `True`.
|
|
406
|
+
indexingAxis : int | None = None
|
|
407
|
+
Axis along which multiple signals are stacked. Use `None` for single-signal input.
|
|
408
|
+
|
|
409
|
+
Returns
|
|
410
|
+
-------
|
|
411
|
+
arrayTransformed : Waveform | ArrayWaveforms | Spectrogram | ArraySpectrograms
|
|
412
|
+
Transformed signal or batch of signals. Return type mirrors `arrayTarget` with
|
|
413
|
+
forward and inverse swapped.
|
|
414
|
+
|
|
415
|
+
Raises
|
|
416
|
+
------
|
|
417
|
+
ValueError
|
|
418
|
+
When `inverse` is `True` and `lengthWaveform` is not provided.
|
|
419
|
+
|
|
420
|
+
References
|
|
421
|
+
----------
|
|
422
|
+
[1] `Waveform`
|
|
423
|
+
|
|
424
|
+
[2] `Spectrogram`
|
|
425
|
+
|
|
426
|
+
[3] `ArrayWaveforms`
|
|
427
|
+
|
|
428
|
+
[4] `ArraySpectrograms`
|
|
429
|
+
|
|
430
|
+
[5] scipy.signal.ShortTimeFFT
|
|
431
|
+
https://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.ShortTimeFFT.html
|
|
432
|
+
|
|
433
|
+
[6] `WindowingFunction`
|
|
434
|
+
|
|
435
|
+
"""
|
|
436
|
+
if sampleRate is None:
|
|
437
|
+
sampleRate = parametersUniversal['sampleRate']
|
|
438
|
+
if lengthHop is None:
|
|
439
|
+
lengthHop = parametersUniversal['lengthHop']
|
|
440
|
+
|
|
441
|
+
if windowingFunction is None:
|
|
442
|
+
if lengthWindowingFunction is not None and windowingFunctionCallableUniversal: # pyright: ignore[reportUnnecessaryComparison]
|
|
443
|
+
windowingFunction = windowingFunctionCallableUniversal(lengthWindowingFunction)
|
|
444
|
+
else:
|
|
445
|
+
windowingFunction = parametersUniversal['windowingFunction']
|
|
446
|
+
if lengthFFT is None:
|
|
447
|
+
lengthFFTSherpa = parametersUniversal['lengthFFT']
|
|
448
|
+
if lengthFFTSherpa >= windowingFunction.size:
|
|
449
|
+
lengthFFT = lengthFFTSherpa
|
|
450
|
+
|
|
451
|
+
if lengthFFT is None:
|
|
452
|
+
lengthWindowingFunction = windowingFunction.size
|
|
453
|
+
lengthFFT = 2 ** ceiling(log_base2(lengthWindowingFunction))
|
|
454
|
+
|
|
455
|
+
if inverse and not lengthWaveform:
|
|
456
|
+
message = "lengthWaveform must be specified for inverse transform"
|
|
457
|
+
raise ValueError(message)
|
|
458
|
+
|
|
459
|
+
stftWorkhorse = ShortTimeFFT(win=windowingFunction, hop=lengthHop, fs=sampleRate, mfft=lengthFFT, **parametersShortTimeFFTUniversal)
|
|
460
|
+
|
|
461
|
+
def doTransformation(arrayInput: Waveform | Spectrogram, lengthWaveform: int | None, inverse: bool) -> Waveform | Spectrogram: # noqa: FBT001
|
|
462
|
+
if inverse:
|
|
463
|
+
return cast('Waveform', stftWorkhorse.istft(S=arrayInput, k1=lengthWaveform))
|
|
464
|
+
return cast('Spectrogram', stftWorkhorse.stft(x=arrayInput, **parametersSTFTUniversal))
|
|
465
|
+
|
|
466
|
+
if indexingAxis is None:
|
|
467
|
+
singleton: Waveform | Spectrogram = cast('Waveform | Spectrogram', arrayTarget)
|
|
468
|
+
return doTransformation(singleton, lengthWaveform=lengthWaveform, inverse=inverse)
|
|
469
|
+
else:
|
|
470
|
+
arrayTARGET: ArrayWaveforms | ArraySpectrograms = cast('ArrayWaveforms | ArraySpectrograms', numpy.moveaxis(arrayTarget, indexingAxis, -1))
|
|
471
|
+
index = 0
|
|
472
|
+
arrayTransformed: ArrayWaveforms | ArraySpectrograms = cast('ArrayWaveforms | ArraySpectrograms', numpy.tile(doTransformation(cast('Waveform | Spectrogram', arrayTARGET[..., index]), lengthWaveform, inverse)[..., numpy.newaxis], arrayTARGET.shape[-1]))
|
|
473
|
+
|
|
474
|
+
for index in range(1, arrayTARGET.shape[-1]):
|
|
475
|
+
arrayTransformed[..., index] = doTransformation(cast('Waveform | Spectrogram', arrayTARGET[..., index]), lengthWaveform, inverse)
|
|
476
|
+
|
|
477
|
+
return numpy.moveaxis(arrayTransformed, -1, indexingAxis)
|
|
478
|
+
|
|
479
|
+
def _getSpectrogram(waveform: Waveform, metadata: WaveformMetadata, sampleRateTarget: float, **parametersSTFT: Any) -> Spectrogram:
|
|
480
|
+
"""I use this to load a single audio file into a pre-allocated waveform buffer and compute its spectrogram.
|
|
481
|
+
|
|
482
|
+
(AI generated docstring)
|
|
483
|
+
|
|
484
|
+
I use this shared subroutine inside `loadSpectrograms` to avoid reallocating a waveform
|
|
485
|
+
buffer for each file. `_getSpectrogram` copies audio data from `metadata['pathFilename']`
|
|
486
|
+
into the caller-provided `waveform` buffer at the position described by `metadata`, then
|
|
487
|
+
computes `stft` with `sampleRateTarget` and any additional `parametersSTFT`. The caller
|
|
488
|
+
must pass a fresh copy of the buffer for each iteration.
|
|
489
|
+
|
|
490
|
+
Parameters
|
|
491
|
+
----------
|
|
492
|
+
waveform : Waveform
|
|
493
|
+
Pre-allocated buffer into which audio data is copied before the STFT. The caller
|
|
494
|
+
must pass a separate copy for each file to prevent data from accumulating across
|
|
495
|
+
iterations.
|
|
496
|
+
metadata : WaveformMetadata
|
|
497
|
+
Record describing `pathFilename`, `lengthWaveform`, `samplesLeading`, and
|
|
498
|
+
`samplesTrailing` for the audio file being loaded.
|
|
499
|
+
sampleRateTarget : float
|
|
500
|
+
Target sample rate passed to `readAudioFile`.
|
|
501
|
+
**parametersSTFT : Any
|
|
502
|
+
Keyword parameters forwarded to `stft`.
|
|
503
|
+
|
|
504
|
+
Returns
|
|
505
|
+
-------
|
|
506
|
+
spectrogram : Spectrogram
|
|
507
|
+
Complex spectrogram of `waveform` after copying the audio file into the buffer.
|
|
508
|
+
|
|
509
|
+
"""
|
|
510
|
+
# All waveforms have the same shape so that all spectrograms have the same shape.
|
|
511
|
+
# GitHub #4 Add padding logic to `loadWaveforms` and `loadSpectrograms`
|
|
512
|
+
lengthWaveform = metadata['lengthWaveform'] + metadata['samplesLeading'] + metadata['samplesTrailing']
|
|
513
|
+
# All shorter waveforms are forced to have trailing zeros.
|
|
514
|
+
waveform[:, 0:lengthWaveform] = readAudioFile(metadata['pathFilename'], sampleRateTarget)
|
|
515
|
+
return stft(waveform, sampleRate=sampleRateTarget, **parametersSTFT)
|
|
516
|
+
|
|
517
|
+
def loadSpectrograms(listPathFilenames: Sequence[str | PathLike[str]], sampleRateTarget: float | None = None, **parametersSTFT: Any) -> tuple[ArraySpectrograms, dict[int, WaveformMetadata]]:
|
|
518
|
+
"""Load spectrograms from a list of audio files.
|
|
519
|
+
|
|
520
|
+
You can use this function to batch-convert audio files to spectrograms in a single call.
|
|
521
|
+
`loadSpectrograms` reads each file, pads all waveforms to the same length, computes the
|
|
522
|
+
Short-Time Fourier Transform for each, and stacks the results into one
|
|
523
|
+
`ArraySpectrograms` [1] array. The function also returns a `WaveformMetadata` [2]
|
|
524
|
+
dictionary that describes each file's original length and padding.
|
|
525
|
+
|
|
526
|
+
Parameters
|
|
527
|
+
----------
|
|
528
|
+
listPathFilenames : Sequence[str | PathLike[str]]
|
|
529
|
+
List of paths to audio files.
|
|
530
|
+
sampleRateTarget : float | None = None
|
|
531
|
+
Target sample rate in Hz. Defaults to `44100` when `None`.
|
|
532
|
+
**parametersSTFT : Any
|
|
533
|
+
Keyword parameters forwarded to `stft`, such as `lengthWindowingFunction` and
|
|
534
|
+
`lengthHop`.
|
|
535
|
+
|
|
536
|
+
Returns
|
|
537
|
+
-------
|
|
538
|
+
tupleSpectrogramsMetadata : tuple[ArraySpectrograms, dict[int, WaveformMetadata]]
|
|
539
|
+
A two-element `tuple`. The first element is `ArraySpectrograms` [1] shaped
|
|
540
|
+
`(channels, frequencies, frames, countFiles)` as `complex64`. The second element
|
|
541
|
+
is a `dict` mapping integer file indices to `WaveformMetadata` [2] records.
|
|
542
|
+
|
|
543
|
+
File Reading Progress
|
|
544
|
+
---------------------
|
|
545
|
+
`tqdm` [3] displays a progress bar in the terminal during the spectrogram computation
|
|
546
|
+
loop.
|
|
547
|
+
|
|
548
|
+
References
|
|
549
|
+
----------
|
|
550
|
+
[1] `ArraySpectrograms`
|
|
551
|
+
|
|
552
|
+
[2] `WaveformMetadata`
|
|
553
|
+
|
|
554
|
+
[3] tqdm — fast, extensible progress bar for Python and CLI
|
|
555
|
+
https://tqdm.github.io/
|
|
556
|
+
|
|
557
|
+
"""
|
|
558
|
+
if sampleRateTarget is None:
|
|
559
|
+
sampleRateTarget = parametersUniversal['sampleRate']
|
|
560
|
+
|
|
561
|
+
max_workersHARDCODED: int = 3
|
|
562
|
+
max_workers = max_workersHARDCODED # pyright: ignore[reportUnusedVariable] # noqa: F841
|
|
563
|
+
|
|
564
|
+
dictionaryWaveformMetadata: dict[int, WaveformMetadata] = getWaveformMetadata(listPathFilenames, sampleRateTarget)
|
|
565
|
+
|
|
566
|
+
samplesTotalMaximum: int = max(entry['lengthWaveform'] + entry['samplesLeading'] + entry['samplesTrailing'] for entry in dictionaryWaveformMetadata.values())
|
|
567
|
+
countChannels = 2
|
|
568
|
+
waveformTemplate: Waveform = numpy.zeros(shape=(countChannels, samplesTotalMaximum), dtype=universalDtypeWaveform)
|
|
569
|
+
spectrogramTemplate: Spectrogram = stft(waveformTemplate, sampleRate=sampleRateTarget, **parametersSTFT)
|
|
570
|
+
|
|
571
|
+
arraySpectrograms: ArraySpectrograms = numpy.zeros(shape=(*spectrogramTemplate.shape, len(dictionaryWaveformMetadata)), dtype=universalDtypeSpectrogram)
|
|
572
|
+
|
|
573
|
+
for index, metadata in tqdm(dictionaryWaveformMetadata.items()):
|
|
574
|
+
arraySpectrograms[..., index] = _getSpectrogram(waveformTemplate.copy(), metadata, sampleRateTarget, **parametersSTFT)
|
|
575
|
+
|
|
576
|
+
# with ProcessPoolExecutor(max_workers) as concurrencyManager:
|
|
577
|
+
# dictionaryConcurrency = {concurrencyManager.submit(
|
|
578
|
+
# _getSpectrogram, waveformTemplate.copy(), metadata, sampleRateTarget, **parametersSTFT): index
|
|
579
|
+
# for index, metadata in dictionaryWaveformMetadata.items()}
|
|
580
|
+
|
|
581
|
+
# for claimTicket in tqdm(as_completed(dictionaryConcurrency), total=len(dictionaryConcurrency)):
|
|
582
|
+
# arraySpectrograms[..., dictionaryConcurrency[claimTicket]] = claimTicket.result() # noqa: ERA001
|
|
583
|
+
|
|
584
|
+
return arraySpectrograms, dictionaryWaveformMetadata
|
|
585
|
+
|
|
586
|
+
def spectrogramToWAV(spectrogram: Spectrogram, pathFilename: str | PathLike[Any] | BinaryIO, lengthWaveform: int, sampleRate: float | None = None, **parametersSTFT: Any) -> None:
|
|
587
|
+
"""Write a complex spectrogram to a WAV file by computing the inverse STFT.
|
|
588
|
+
|
|
589
|
+
You can use this function to reconstruct a waveform from a `Spectrogram` [1] and save
|
|
590
|
+
it directly to a WAV file. `spectrogramToWAV` calls `stft` with `inverse=True` to
|
|
591
|
+
obtain the reconstructed `Waveform` [2], then passes it to `writeWAV`.
|
|
592
|
+
|
|
593
|
+
Parameters
|
|
594
|
+
----------
|
|
595
|
+
spectrogram : Spectrogram
|
|
596
|
+
Complex spectrogram to convert back to a waveform.
|
|
597
|
+
pathFilename : str | PathLike[Any] | BinaryIO
|
|
598
|
+
Destination path for the WAV file, or a binary stream.
|
|
599
|
+
lengthWaveform : int
|
|
600
|
+
Number of samples in the output waveform. The inverse STFT cannot recover the
|
|
601
|
+
original length from the spectrogram alone, so `lengthWaveform` is required.
|
|
602
|
+
sampleRate : float | None = None
|
|
603
|
+
Sample rate for the output WAV file in Hz. Defaults to `44100` when `None`.
|
|
604
|
+
**parametersSTFT : Any
|
|
605
|
+
Keyword parameters forwarded to `stft`, such as `lengthWindowingFunction` and
|
|
606
|
+
`lengthHop`.
|
|
607
|
+
|
|
608
|
+
File Overwrite and Format
|
|
609
|
+
-------------------------
|
|
610
|
+
See `writeWAV` for file overwrite behavior and output format details.
|
|
611
|
+
|
|
612
|
+
References
|
|
613
|
+
----------
|
|
614
|
+
[1] `Spectrogram`
|
|
615
|
+
|
|
616
|
+
[2] `Waveform`
|
|
617
|
+
|
|
618
|
+
"""
|
|
619
|
+
if sampleRate is None:
|
|
620
|
+
sampleRate = parametersUniversal['sampleRate']
|
|
621
|
+
|
|
622
|
+
waveform: Waveform = stft(spectrogram, inverse=True, lengthWaveform=lengthWaveform, sampleRate=sampleRate, **parametersSTFT)
|
|
623
|
+
writeWAV(pathFilename, waveform, sampleRate)
|
|
624
|
+
|
|
625
|
+
def waveformSpectrogramWaveform(callableNeedsSpectrogram: Callable[[Spectrogram], Spectrogram]) -> Callable[[Waveform], Waveform]:
|
|
626
|
+
"""Decorate a spectrogram-processing callable to accept and return waveforms.
|
|
627
|
+
|
|
628
|
+
You can use this function as a decorator when you have a function that transforms
|
|
629
|
+
`Spectrogram` [1] data and you want a version that operates directly on `Waveform` [2]
|
|
630
|
+
data. The returned function applies `stft` to convert the input `Waveform` [2] to a
|
|
631
|
+
`Spectrogram` [1], calls `callableNeedsSpectrogram`, then applies inverse `stft` to
|
|
632
|
+
convert the result back to a `Waveform` [2] of the original length.
|
|
633
|
+
|
|
634
|
+
Parameters
|
|
635
|
+
----------
|
|
636
|
+
callableNeedsSpectrogram : Callable[[Spectrogram], Spectrogram]
|
|
637
|
+
A function that accepts and returns a `Spectrogram` [1].
|
|
638
|
+
|
|
639
|
+
Returns
|
|
640
|
+
-------
|
|
641
|
+
stft_istft : Callable[[Waveform], Waveform]
|
|
642
|
+
A function that accepts a `Waveform` [2], converts it to a `Spectrogram` [1],
|
|
643
|
+
applies `callableNeedsSpectrogram`, and returns the reconstructed `Waveform` [2]
|
|
644
|
+
at the original length.
|
|
645
|
+
|
|
646
|
+
Time Axis Assumption
|
|
647
|
+
--------------------
|
|
648
|
+
The inner function `stft_istft` assumes the time axis of the input `Waveform` [2] is
|
|
649
|
+
the last axis (`-1`). This matches the `(channels, samples)` shape convention.
|
|
650
|
+
|
|
651
|
+
References
|
|
652
|
+
----------
|
|
653
|
+
[1] `Spectrogram`
|
|
654
|
+
|
|
655
|
+
[2] `Waveform`
|
|
656
|
+
|
|
657
|
+
"""
|
|
658
|
+
def stft_istft(waveform: Waveform) -> Waveform:
|
|
659
|
+
axisTime = -1
|
|
660
|
+
arrayTarget = stft(waveform)
|
|
661
|
+
spectrogram = callableNeedsSpectrogram(arrayTarget)
|
|
662
|
+
return stft(spectrogram, inverse=True, indexingAxis=None, lengthWaveform=waveform.shape[axisTime])
|
|
663
|
+
return stft_istft
|
hunterHearsPy/py.typed
ADDED
|
File without changes
|