hunterHearsPy 1.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,663 @@
1
+ """Read, write, resample, and transform audio waveforms between time and frequency domains.
2
+
3
+ You can use this module to load audio files into NumPy arrays, resample waveforms, convert
4
+ between waveforms and spectrograms using the Short-Time Fourier Transform, and write
5
+ waveforms back to WAV files. All audio is normalized to stereo, 32-bit float `Waveform`
6
+ arrays shaped `(channels, samples)`. All spectrograms are complex 64-bit float
7
+ `Spectrogram` arrays shaped `(channels, frequencies, frames)`.
8
+
9
+ Contents
10
+ --------
11
+ Functions
12
+ getWaveformMetadata
13
+ Retrieve metadata for a collection of audio waveform files.
14
+ loadSpectrograms
15
+ Load spectrograms from a list of audio files.
16
+ loadWaveforms
17
+ Load a list of audio files into a single stacked NumPy array.
18
+ readAudioFile
19
+ Read an audio file and return stereo waveform data as a NumPy array.
20
+ resampleWaveform
21
+ Resample a waveform array to a target sample rate.
22
+ spectrogramToWAV
23
+ Write a complex spectrogram to a WAV file.
24
+ stft
25
+ Perform Short-Time Fourier Transform or its inverse on waveform or spectrogram data.
26
+ waveformSpectrogramWaveform
27
+ Decorate a spectrogram-processing callable to accept and return waveforms.
28
+ writeWAV
29
+ Write a waveform array to a WAV file.
30
+
31
+ """
32
+ from __future__ import annotations
33
+
34
+ from hunterHearsPy import (
35
+ ArraySpectrograms, ArrayWaveforms, halfsine, ParametersShortTimeFFT, ParametersSTFT, ParametersUniversal, Spectrogram, Waveform,
36
+ WaveformMetadata, WindowingFunction)
37
+ from hunterMakesPy.filesystemToolkit import makeDirectorySafely
38
+ from math import ceil as ceiling, log2 as log_base2
39
+ from multiprocessing import set_start_method as multiprocessing_set_start_method
40
+ from numpy import complex64, dtype, float32, floating, ndarray
41
+ from scipy.signal import ShortTimeFFT
42
+ from tqdm.auto import tqdm
43
+ from typing import Any, BinaryIO, cast, Literal, overload, TYPE_CHECKING
44
+ import numpy
45
+ import resampy
46
+ import soundfile
47
+
48
+ if TYPE_CHECKING:
49
+ from collections.abc import Callable, Sequence
50
+ from os import PathLike
51
+
52
+ if __name__ == '__main__':
53
+ multiprocessing_set_start_method('spawn')
54
+
55
+ # Design coordinated, user-overridable universal parameter defaults for audio functions
56
+ # https://github.com/hunterhogan/hunterHearsPy/issues/5
57
+ universalDtypeWaveform = float32
58
+ """Module-wide NumPy dtype for waveform arrays; controls memory layout and numeric precision."""
59
+ universalDtypeSpectrogram = complex64
60
+ """Module-wide NumPy dtype for spectrogram arrays; complex 64-bit float balances precision and memory."""
61
+ parametersShortTimeFFTUniversal: ParametersShortTimeFFT = {'fft_mode': 'onesided'}
62
+ """Module-wide keyword parameters passed to `scipy.signal.ShortTimeFFT` on construction."""
63
+ parametersSTFTUniversal: ParametersSTFT = {'padding': 'even', 'axis': -1}
64
+ """Module-wide keyword parameters passed to `ShortTimeFFT.stft` and `ShortTimeFFT.istft` on each call."""
65
+
66
+ lengthWindowingFunctionDEFAULT = 1024
67
+ """Default length in samples of the windowing function used when no override is provided."""
68
+ windowingFunctionCallableDEFAULT = halfsine
69
+ """Default callable that constructs a `WindowingFunction` array from a length in samples."""
70
+ parametersDEFAULT = ParametersUniversal(
71
+ lengthFFT=2048,
72
+ lengthHop=512,
73
+ lengthWindowingFunction=lengthWindowingFunctionDEFAULT,
74
+ sampleRate=44100,
75
+ windowingFunction=windowingFunctionCallableDEFAULT(lengthWindowingFunctionDEFAULT),
76
+ )
77
+ """Factory `ParametersUniversal` applied when `setParametersUniversal` is `None`."""
78
+
79
+ setParametersUniversal = None
80
+ """Override `ParametersUniversal` for all module functions; when `None`, `parametersDEFAULT` is used."""
81
+
82
+ windowingFunctionCallableUniversal = windowingFunctionCallableDEFAULT
83
+ """Active callable for constructing windowing functions; overridable at module level."""
84
+ if not setParametersUniversal:
85
+ parametersUniversal: ParametersUniversal = parametersDEFAULT
86
+ """Active `ParametersUniversal` used by all functions when no per-call override is provided."""
87
+
88
+ def getWaveformMetadata(listPathFilenames: Sequence[str | PathLike[str]], sampleRate: float) -> dict[int, WaveformMetadata]:
89
+ """Retrieve metadata for a collection of audio waveform files.
90
+
91
+ You can use this function to inspect the length of each audio file before loading
92
+ waveforms into memory. `getWaveformMetadata` reads each file at `sampleRate`, measures
93
+ the sample count, and returns one `WaveformMetadata` [1] record per file indexed by
94
+ position in `listPathFilenames`. Each record's `samplesLeading` and `samplesTrailing`
95
+ fields are initialized to `0`; callers may adjust them before passing the result to
96
+ downstream loaders such as `loadWaveforms` or `loadSpectrograms`.
97
+
98
+ Parameters
99
+ ----------
100
+ listPathFilenames : Sequence[str | PathLike[str]]
101
+ Sequence of paths to audio files.
102
+ sampleRate : float
103
+ Target sample rate used when reading each file to measure its length in samples.
104
+
105
+ Returns
106
+ -------
107
+ dictionaryWaveformMetadata : dict[int, WaveformMetadata]
108
+ Dictionary mapping each integer index to a `WaveformMetadata` [1] record. Each
109
+ record contains `pathFilename` (string path), `lengthWaveform` (sample count at
110
+ `sampleRate`), `samplesLeading` (initialized to `0`), and `samplesTrailing`
111
+ (initialized to `0`).
112
+
113
+ File Reading Progress
114
+ ---------------------
115
+ `tqdm` [2] displays a progress bar in the terminal while `getWaveformMetadata` reads
116
+ each file in `listPathFilenames`.
117
+
118
+ References
119
+ ----------
120
+ [1] `WaveformMetadata`
121
+
122
+ [2] tqdm — fast, extensible progress bar for Python and CLI
123
+ https://tqdm.github.io/
124
+
125
+ """
126
+ axisTime: int = -1
127
+ dictionaryWaveformMetadata: dict[int, WaveformMetadata] = {}
128
+ for index, pathFilename in enumerate(tqdm(listPathFilenames)):
129
+ lengthWaveform = readAudioFile(pathFilename, sampleRate).shape[axisTime]
130
+ dictionaryWaveformMetadata[index] = WaveformMetadata(
131
+ pathFilename=str(pathFilename),
132
+ lengthWaveform=lengthWaveform,
133
+ samplesLeading=0,
134
+ samplesTrailing=0,
135
+ )
136
+ return dictionaryWaveformMetadata
137
+
138
+ def readAudioFile(pathFilename: str | PathLike[Any] | BinaryIO, sampleRate: float | None = None) -> Waveform:
139
+ """Read an audio file and return stereo waveform data as a NumPy array.
140
+
141
+ You can use this function to load any audio file that `soundfile` [1] supports. The returned
142
+ `Waveform` [2] is always shaped `(channels, samples)` where `channels` is `2`. When the source
143
+ file is mono, `readAudioFile` duplicates the single channel to produce a stereo array. When
144
+ `sampleRate` differs from the file's native sample rate, `readAudioFile` resamples using
145
+ `resampleWaveform`.
146
+
147
+ Parameters
148
+ ----------
149
+ pathFilename : str | PathLike[Any] | BinaryIO
150
+ Path to the audio file or a binary stream compatible with `soundfile` [1].
151
+ sampleRate : float | None = 44100
152
+ Target sample rate of the returned `Waveform` [2] in Hz. Defaults to `44100` when `None`.
153
+
154
+ Returns
155
+ -------
156
+ waveform : Waveform
157
+ Stereo audio data shaped `(2, samples)` as `float32`.
158
+
159
+ Raises
160
+ ------
161
+ FileNotFoundError
162
+ When `pathFilename` does not exist on the filesystem.
163
+ soundfile.LibsndfileError
164
+ When `pathFilename` is an unsupported or corrupted audio format.
165
+
166
+ References
167
+ ----------
168
+ [1] soundfile — audio library based on libsndfile
169
+ https://python-soundfile.readthedocs.io/en/0.12.1/
170
+
171
+ [2] `Waveform`
172
+
173
+ """
174
+ if sampleRate is None:
175
+ sampleRate = parametersUniversal['sampleRate']
176
+ try:
177
+ with soundfile.SoundFile(str(pathFilename)) as readSoundFile:
178
+ sampleRateSource: int = readSoundFile.samplerate
179
+ waveform: Waveform = readSoundFile.read(dtype='float32', always_2d=True).astype(universalDtypeWaveform)
180
+ except soundfile.LibsndfileError as ERRORmessage:
181
+ if 'System error' in str(ERRORmessage):
182
+ message = f"File not found: {pathFilename}"
183
+ raise FileNotFoundError(message) from ERRORmessage
184
+ else: # noqa: RET506
185
+ raise
186
+ # GitHub #3 Implement semantic axes for audio data
187
+ axisTime = 0
188
+ axisChannels = 1
189
+ waveform = cast('Waveform', resampleWaveform(waveform, sampleRateDesired=sampleRate, sampleRateSource=sampleRateSource, axisTime=axisTime))
190
+ # TODO In my audio ecosystem, must I force a minimum of 2 channels, or can I merely force an axis for time, even if the axis is length=1?
191
+ if waveform.shape[axisChannels] == 1:
192
+ waveform = cast('Waveform', numpy.repeat(waveform, 2, axis=axisChannels))
193
+ return cast('Waveform', numpy.transpose(waveform, axes=(axisChannels, axisTime)))
194
+
195
+ def resampleWaveform(waveform: ndarray[tuple[int, ...], dtype[floating[Any]]], sampleRateDesired: float, sampleRateSource: float, axisTime: int = -1) -> ndarray[tuple[int, ...], dtype[floating[Any]]]:
196
+ """Resample a waveform array to a target sample rate using `resampy` [1].
197
+
198
+ You can use this function to change the sample rate of any floating-point NumPy array [2].
199
+ `resampleWaveform` passes `waveform` to `resampy.resample` [1] along the `axisTime` axis.
200
+ When `sampleRateSource` equals `sampleRateDesired`, `resampleWaveform` returns `waveform`
201
+ unchanged without invoking `resampy`.
202
+
203
+ Parameters
204
+ ----------
205
+ waveform : ndarray[tuple[int, ...], dtype[floating[Any]]]
206
+ Input audio data as any floating-point NumPy array [2].
207
+ sampleRateDesired : float
208
+ Target sample rate in Hz.
209
+ sampleRateSource : float
210
+ Original sample rate of `waveform` in Hz.
211
+ axisTime : int = -1
212
+ Axis along which resampling is performed. Negative values index from the last axis.
213
+
214
+ Returns
215
+ -------
216
+ waveformResampled : ndarray[tuple[int, ...], dtype[floating[Any]]]
217
+ Waveform resampled to `sampleRateDesired`. Returns `waveform` unchanged when
218
+ `sampleRateSource` equals `sampleRateDesired`.
219
+
220
+ Sample Rate Rounding
221
+ --------------------
222
+ Both `sampleRateDesired` and `sampleRateSource` are rounded to the nearest integer
223
+ before passing to `resampy.resample` [1]. `resampy` expects integer sample rates.
224
+
225
+ References
226
+ ----------
227
+ [1] resampy — efficient signal resampling
228
+ https://resampy.readthedocs.io/en/stable/
229
+
230
+ [2] numpy.ndarray
231
+ https://numpy.org/doc/stable/reference/index.html
232
+
233
+ """
234
+ if sampleRateSource != sampleRateDesired:
235
+ sampleRateDesired = round(sampleRateDesired)
236
+ sampleRateSource = round(sampleRateSource)
237
+ waveformResampled: ndarray[tuple[int, ...], dtype[floating[Any]]] = resampy.resample(waveform, sampleRateSource, sampleRateDesired, axis=axisTime)
238
+ return waveformResampled
239
+ return waveform
240
+
241
+ def loadWaveforms(listPathFilenames: Sequence[str | PathLike[str]], sampleRateTarget: float | None = None) -> ArrayWaveforms:
242
+ """Load a list of audio files into a single stacked NumPy array.
243
+
244
+ You can use this function to batch-load multiple audio files into one `ArrayWaveforms` [1]
245
+ array. All waveforms are resampled to `sampleRateTarget`, converted to stereo when
246
+ necessary, and zero-padded on the trailing edge to match the length of the longest
247
+ waveform. The resulting array is shaped `(channels, lengthWaveformMaximum, countFiles)`.
248
+
249
+ Parameters
250
+ ----------
251
+ listPathFilenames : Sequence[str | PathLike[str]]
252
+ List of paths to audio files.
253
+ sampleRateTarget : float | None = None
254
+ Target sample rate in Hz. Defaults to `44100` when `None`.
255
+
256
+ Returns
257
+ -------
258
+ arrayWaveforms : ArrayWaveforms
259
+ Stacked waveform data shaped `(2, lengthWaveformMaximum, countFiles)` as `float32`,
260
+ where `lengthWaveformMaximum` is the maximum sample count across all files at
261
+ `sampleRateTarget`.
262
+
263
+ Zero-Padding
264
+ ------------
265
+ Waveforms shorter than `lengthWaveformMaximum` are zero-padded on the trailing edge.
266
+ Leading padding is applied when `WaveformMetadata.samplesLeading` [2] is non-zero;
267
+ `getWaveformMetadata` initializes `samplesLeading` to `0` by default.
268
+
269
+ References
270
+ ----------
271
+ [1] `ArrayWaveforms`
272
+
273
+ [2] `WaveformMetadata`
274
+ """
275
+ if sampleRateTarget is None:
276
+ sampleRateTarget = parametersUniversal['sampleRate']
277
+
278
+ # GitHub #3 Implement semantic axes for audio data
279
+ axisOrderMapping: dict[str, int] = {'indexingAxis': -1, 'axisTime': -2, 'axisChannels': 0}
280
+ axesSizes: dict[str, int] = dict.fromkeys(axisOrderMapping.keys(), 1)
281
+ countAxes: int = len(axisOrderMapping)
282
+ listShapeIndexToSize: list[int] = [9001] * countAxes
283
+
284
+ countWaveforms: int = len(listPathFilenames)
285
+ axesSizes['indexingAxis'] = countWaveforms
286
+ countChannels: int = 2
287
+ axesSizes['axisChannels'] = countChannels
288
+
289
+ axisTime: int = -1 # pyright: ignore[reportUnusedVariable] # noqa: F841
290
+ dictionaryWaveformMetadata: dict[int, WaveformMetadata] = getWaveformMetadata(listPathFilenames, sampleRateTarget)
291
+ samplesTotalMaximum = max(entry['lengthWaveform'] + entry['samplesLeading'] + entry['samplesTrailing'] for entry in dictionaryWaveformMetadata.values())
292
+ axesSizes['axisTime'] = samplesTotalMaximum
293
+
294
+ for keyName, axisSize in axesSizes.items():
295
+ axisNormalized: int = (axisOrderMapping[keyName] + countAxes) % countAxes
296
+ listShapeIndexToSize[axisNormalized] = axisSize
297
+ tupleShapeArray: tuple[int, int, int] = cast('tuple[int, int, int]', tuple(listShapeIndexToSize))
298
+
299
+ arrayWaveforms: ArrayWaveforms = numpy.zeros(tupleShapeArray, dtype=universalDtypeWaveform)
300
+
301
+ for index, metadata in dictionaryWaveformMetadata.items():
302
+ waveform: Waveform = readAudioFile(metadata['pathFilename'], sampleRateTarget)
303
+ samplesTrailing = metadata['lengthWaveform'] + metadata['samplesLeading'] - samplesTotalMaximum
304
+ if samplesTrailing == 0:
305
+ samplesTrailing = None
306
+ # GitHub #4 Add padding logic to `loadWaveforms` and `loadSpectrograms`
307
+ arrayWaveforms[:, metadata['samplesLeading']:samplesTrailing, index] = waveform
308
+
309
+ return arrayWaveforms
310
+
311
+ def writeWAV(pathFilename: str | PathLike[Any] | BinaryIO, waveform: Waveform, sampleRate: float | None = None) -> None:
312
+ """Write a waveform array to a WAV file.
313
+
314
+ You can use this function to save a `Waveform` [1] or any compatible NumPy array to a
315
+ 32-bit float WAV file. `writeWAV` creates any missing parent directories before writing
316
+ using `makeDirsSafely` from `hunterMakesPy` [2].
317
+
318
+ Parameters
319
+ ----------
320
+ pathFilename : str | PathLike[Any] | BinaryIO
321
+ Destination path for the WAV file, or a binary stream.
322
+ waveform : Waveform
323
+ Audio data shaped `(channels, samples)` or `(samples,)`.
324
+ sampleRate : float | None = None
325
+ Sample rate of `waveform` in Hz. Defaults to `44100` when `None`.
326
+
327
+ File Overwrite and Format
328
+ -------------------------
329
+ `writeWAV` overwrites any existing file at `pathFilename` without prompting. All files
330
+ are written as 32-bit float WAV using `soundfile.write` [3].
331
+
332
+ References
333
+ ----------
334
+ [1] `Waveform`
335
+
336
+ [2] hunterMakesPy — makeDirsSafely
337
+ https://context7.com/hunterhogan/huntermakespy
338
+
339
+ [3] soundfile — audio library based on libsndfile
340
+ https://python-soundfile.readthedocs.io/en/0.12.1/
341
+
342
+ """
343
+ if sampleRate is None:
344
+ sampleRate = parametersUniversal['sampleRate']
345
+ makeDirectorySafely(pathFilename)
346
+ soundfile.write(file=pathFilename, data=waveform.T, samplerate=int(sampleRate), subtype='FLOAT', format='WAV')
347
+
348
+ @overload # stft 1 ndarray
349
+ def stft(arrayTarget: Waveform, *, sampleRate: float | None = None, lengthHop: int | None = None, windowingFunction: WindowingFunction | None = None, lengthWindowingFunction: int | None = None, lengthFFT: int | None = None, inverse: Literal[False] = False, lengthWaveform: None = None, indexingAxis: None = None) -> Spectrogram: ...
350
+
351
+ @overload # stft many ndarray
352
+ def stft(arrayTarget: ArrayWaveforms, *, sampleRate: float | None = None, lengthHop: int | None = None, windowingFunction: WindowingFunction | None = None, lengthWindowingFunction: int | None = None, lengthFFT: int | None = None, inverse: Literal[False] = False, lengthWaveform: None = None, indexingAxis: int = -1) -> ArraySpectrograms: ...
353
+
354
+ @overload # istft 1 ndarray
355
+ def stft(arrayTarget: Spectrogram, *, sampleRate: float | None = None, lengthHop: int | None = None, windowingFunction: WindowingFunction | None = None, lengthWindowingFunction: int | None = None, lengthFFT: int | None = None, inverse: Literal[True], lengthWaveform: int, indexingAxis: None = None) -> Waveform: ...
356
+
357
+ @overload # istft many ndarray
358
+ def stft(arrayTarget: ArraySpectrograms, *, sampleRate: float | None = None, lengthHop: int | None = None, windowingFunction: WindowingFunction | None = None, lengthWindowingFunction: int | None = None, lengthFFT: int | None = None, inverse: Literal[True], lengthWaveform: int, indexingAxis: int = -1) -> ArrayWaveforms: ...
359
+
360
+ def stft(arrayTarget: Waveform | ArrayWaveforms | Spectrogram | ArraySpectrograms
361
+ , *
362
+ , sampleRate: float | None = None
363
+ , lengthHop: int | None = None
364
+ , windowingFunction: WindowingFunction | None = None
365
+ , lengthWindowingFunction: int | None = None
366
+ , lengthFFT: int | None = None
367
+ , inverse: bool = False
368
+ , lengthWaveform: int | None = None
369
+ , indexingAxis: int | None = None
370
+ ) -> Waveform | ArrayWaveforms | Spectrogram | ArraySpectrograms:
371
+ """Perform Short-Time Fourier Transform or its inverse on waveform or spectrogram data.
372
+
373
+ You can use this function to convert a `Waveform` [1] to a `Spectrogram` [2] or reverse
374
+ the transformation with `inverse=True`. Pass `ArrayWaveforms` [3] or
375
+ `ArraySpectrograms` [4] with an `indexingAxis` to transform a batch of signals at once.
376
+ All transform behavior is governed by `scipy.signal.ShortTimeFFT` [5].
377
+
378
+ Four overloads determine the return type from `arrayTarget` and `inverse`:
379
+ - `Waveform` [1] → `Spectrogram` [2] (single forward transform)
380
+ - `ArrayWaveforms` [3] → `ArraySpectrograms` [4] (batch forward transform)
381
+ - `Spectrogram` [2] → `Waveform` [1] (single inverse transform)
382
+ - `ArraySpectrograms` [4] → `ArrayWaveforms` [3] (batch inverse transform)
383
+
384
+ Parameters
385
+ ----------
386
+ arrayTarget : Waveform | ArrayWaveforms | Spectrogram | ArraySpectrograms
387
+ Input array for transformation.
388
+ sampleRate : float | None = None
389
+ Sample rate of the signal in Hz. Defaults to `44100` when `None`.
390
+ lengthHop : int | None = None
391
+ Number of samples between successive analysis frames. Defaults to `512` when `None`.
392
+ windowingFunction : WindowingFunction | None = None
393
+ Windowing function array [6]. When `None`, `windowingFunctionCallableUniversal` is
394
+ called with `lengthWindowingFunction`, or the universal default is used.
395
+ lengthWindowingFunction : int | None = None
396
+ Length of the windowing function in samples. Used only when `windowingFunction` is
397
+ `None`. Defaults to `1024` when `None`.
398
+ lengthFFT : int | None = None
399
+ Length of the FFT in samples. Defaults to `2048` or the next power of two ≥
400
+ `lengthWindowingFunction` when `None`.
401
+ inverse : bool = False
402
+ When `True`, perform inverse STFT. When `False`, perform forward STFT.
403
+ lengthWaveform : int | None = None
404
+ Required output length in samples for inverse transform. Must be provided when
405
+ `inverse` is `True`.
406
+ indexingAxis : int | None = None
407
+ Axis along which multiple signals are stacked. Use `None` for single-signal input.
408
+
409
+ Returns
410
+ -------
411
+ arrayTransformed : Waveform | ArrayWaveforms | Spectrogram | ArraySpectrograms
412
+ Transformed signal or batch of signals. Return type mirrors `arrayTarget` with
413
+ forward and inverse swapped.
414
+
415
+ Raises
416
+ ------
417
+ ValueError
418
+ When `inverse` is `True` and `lengthWaveform` is not provided.
419
+
420
+ References
421
+ ----------
422
+ [1] `Waveform`
423
+
424
+ [2] `Spectrogram`
425
+
426
+ [3] `ArrayWaveforms`
427
+
428
+ [4] `ArraySpectrograms`
429
+
430
+ [5] scipy.signal.ShortTimeFFT
431
+ https://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.ShortTimeFFT.html
432
+
433
+ [6] `WindowingFunction`
434
+
435
+ """
436
+ if sampleRate is None:
437
+ sampleRate = parametersUniversal['sampleRate']
438
+ if lengthHop is None:
439
+ lengthHop = parametersUniversal['lengthHop']
440
+
441
+ if windowingFunction is None:
442
+ if lengthWindowingFunction is not None and windowingFunctionCallableUniversal: # pyright: ignore[reportUnnecessaryComparison]
443
+ windowingFunction = windowingFunctionCallableUniversal(lengthWindowingFunction)
444
+ else:
445
+ windowingFunction = parametersUniversal['windowingFunction']
446
+ if lengthFFT is None:
447
+ lengthFFTSherpa = parametersUniversal['lengthFFT']
448
+ if lengthFFTSherpa >= windowingFunction.size:
449
+ lengthFFT = lengthFFTSherpa
450
+
451
+ if lengthFFT is None:
452
+ lengthWindowingFunction = windowingFunction.size
453
+ lengthFFT = 2 ** ceiling(log_base2(lengthWindowingFunction))
454
+
455
+ if inverse and not lengthWaveform:
456
+ message = "lengthWaveform must be specified for inverse transform"
457
+ raise ValueError(message)
458
+
459
+ stftWorkhorse = ShortTimeFFT(win=windowingFunction, hop=lengthHop, fs=sampleRate, mfft=lengthFFT, **parametersShortTimeFFTUniversal)
460
+
461
+ def doTransformation(arrayInput: Waveform | Spectrogram, lengthWaveform: int | None, inverse: bool) -> Waveform | Spectrogram: # noqa: FBT001
462
+ if inverse:
463
+ return cast('Waveform', stftWorkhorse.istft(S=arrayInput, k1=lengthWaveform))
464
+ return cast('Spectrogram', stftWorkhorse.stft(x=arrayInput, **parametersSTFTUniversal))
465
+
466
+ if indexingAxis is None:
467
+ singleton: Waveform | Spectrogram = cast('Waveform | Spectrogram', arrayTarget)
468
+ return doTransformation(singleton, lengthWaveform=lengthWaveform, inverse=inverse)
469
+ else:
470
+ arrayTARGET: ArrayWaveforms | ArraySpectrograms = cast('ArrayWaveforms | ArraySpectrograms', numpy.moveaxis(arrayTarget, indexingAxis, -1))
471
+ index = 0
472
+ arrayTransformed: ArrayWaveforms | ArraySpectrograms = cast('ArrayWaveforms | ArraySpectrograms', numpy.tile(doTransformation(cast('Waveform | Spectrogram', arrayTARGET[..., index]), lengthWaveform, inverse)[..., numpy.newaxis], arrayTARGET.shape[-1]))
473
+
474
+ for index in range(1, arrayTARGET.shape[-1]):
475
+ arrayTransformed[..., index] = doTransformation(cast('Waveform | Spectrogram', arrayTARGET[..., index]), lengthWaveform, inverse)
476
+
477
+ return numpy.moveaxis(arrayTransformed, -1, indexingAxis)
478
+
479
+ def _getSpectrogram(waveform: Waveform, metadata: WaveformMetadata, sampleRateTarget: float, **parametersSTFT: Any) -> Spectrogram:
480
+ """I use this to load a single audio file into a pre-allocated waveform buffer and compute its spectrogram.
481
+
482
+ (AI generated docstring)
483
+
484
+ I use this shared subroutine inside `loadSpectrograms` to avoid reallocating a waveform
485
+ buffer for each file. `_getSpectrogram` copies audio data from `metadata['pathFilename']`
486
+ into the caller-provided `waveform` buffer at the position described by `metadata`, then
487
+ computes `stft` with `sampleRateTarget` and any additional `parametersSTFT`. The caller
488
+ must pass a fresh copy of the buffer for each iteration.
489
+
490
+ Parameters
491
+ ----------
492
+ waveform : Waveform
493
+ Pre-allocated buffer into which audio data is copied before the STFT. The caller
494
+ must pass a separate copy for each file to prevent data from accumulating across
495
+ iterations.
496
+ metadata : WaveformMetadata
497
+ Record describing `pathFilename`, `lengthWaveform`, `samplesLeading`, and
498
+ `samplesTrailing` for the audio file being loaded.
499
+ sampleRateTarget : float
500
+ Target sample rate passed to `readAudioFile`.
501
+ **parametersSTFT : Any
502
+ Keyword parameters forwarded to `stft`.
503
+
504
+ Returns
505
+ -------
506
+ spectrogram : Spectrogram
507
+ Complex spectrogram of `waveform` after copying the audio file into the buffer.
508
+
509
+ """
510
+ # All waveforms have the same shape so that all spectrograms have the same shape.
511
+ # GitHub #4 Add padding logic to `loadWaveforms` and `loadSpectrograms`
512
+ lengthWaveform = metadata['lengthWaveform'] + metadata['samplesLeading'] + metadata['samplesTrailing']
513
+ # All shorter waveforms are forced to have trailing zeros.
514
+ waveform[:, 0:lengthWaveform] = readAudioFile(metadata['pathFilename'], sampleRateTarget)
515
+ return stft(waveform, sampleRate=sampleRateTarget, **parametersSTFT)
516
+
517
+ def loadSpectrograms(listPathFilenames: Sequence[str | PathLike[str]], sampleRateTarget: float | None = None, **parametersSTFT: Any) -> tuple[ArraySpectrograms, dict[int, WaveformMetadata]]:
518
+ """Load spectrograms from a list of audio files.
519
+
520
+ You can use this function to batch-convert audio files to spectrograms in a single call.
521
+ `loadSpectrograms` reads each file, pads all waveforms to the same length, computes the
522
+ Short-Time Fourier Transform for each, and stacks the results into one
523
+ `ArraySpectrograms` [1] array. The function also returns a `WaveformMetadata` [2]
524
+ dictionary that describes each file's original length and padding.
525
+
526
+ Parameters
527
+ ----------
528
+ listPathFilenames : Sequence[str | PathLike[str]]
529
+ List of paths to audio files.
530
+ sampleRateTarget : float | None = None
531
+ Target sample rate in Hz. Defaults to `44100` when `None`.
532
+ **parametersSTFT : Any
533
+ Keyword parameters forwarded to `stft`, such as `lengthWindowingFunction` and
534
+ `lengthHop`.
535
+
536
+ Returns
537
+ -------
538
+ tupleSpectrogramsMetadata : tuple[ArraySpectrograms, dict[int, WaveformMetadata]]
539
+ A two-element `tuple`. The first element is `ArraySpectrograms` [1] shaped
540
+ `(channels, frequencies, frames, countFiles)` as `complex64`. The second element
541
+ is a `dict` mapping integer file indices to `WaveformMetadata` [2] records.
542
+
543
+ File Reading Progress
544
+ ---------------------
545
+ `tqdm` [3] displays a progress bar in the terminal during the spectrogram computation
546
+ loop.
547
+
548
+ References
549
+ ----------
550
+ [1] `ArraySpectrograms`
551
+
552
+ [2] `WaveformMetadata`
553
+
554
+ [3] tqdm — fast, extensible progress bar for Python and CLI
555
+ https://tqdm.github.io/
556
+
557
+ """
558
+ if sampleRateTarget is None:
559
+ sampleRateTarget = parametersUniversal['sampleRate']
560
+
561
+ max_workersHARDCODED: int = 3
562
+ max_workers = max_workersHARDCODED # pyright: ignore[reportUnusedVariable] # noqa: F841
563
+
564
+ dictionaryWaveformMetadata: dict[int, WaveformMetadata] = getWaveformMetadata(listPathFilenames, sampleRateTarget)
565
+
566
+ samplesTotalMaximum: int = max(entry['lengthWaveform'] + entry['samplesLeading'] + entry['samplesTrailing'] for entry in dictionaryWaveformMetadata.values())
567
+ countChannels = 2
568
+ waveformTemplate: Waveform = numpy.zeros(shape=(countChannels, samplesTotalMaximum), dtype=universalDtypeWaveform)
569
+ spectrogramTemplate: Spectrogram = stft(waveformTemplate, sampleRate=sampleRateTarget, **parametersSTFT)
570
+
571
+ arraySpectrograms: ArraySpectrograms = numpy.zeros(shape=(*spectrogramTemplate.shape, len(dictionaryWaveformMetadata)), dtype=universalDtypeSpectrogram)
572
+
573
+ for index, metadata in tqdm(dictionaryWaveformMetadata.items()):
574
+ arraySpectrograms[..., index] = _getSpectrogram(waveformTemplate.copy(), metadata, sampleRateTarget, **parametersSTFT)
575
+
576
+ # with ProcessPoolExecutor(max_workers) as concurrencyManager:
577
+ # dictionaryConcurrency = {concurrencyManager.submit(
578
+ # _getSpectrogram, waveformTemplate.copy(), metadata, sampleRateTarget, **parametersSTFT): index
579
+ # for index, metadata in dictionaryWaveformMetadata.items()}
580
+
581
+ # for claimTicket in tqdm(as_completed(dictionaryConcurrency), total=len(dictionaryConcurrency)):
582
+ # arraySpectrograms[..., dictionaryConcurrency[claimTicket]] = claimTicket.result() # noqa: ERA001
583
+
584
+ return arraySpectrograms, dictionaryWaveformMetadata
585
+
586
+ def spectrogramToWAV(spectrogram: Spectrogram, pathFilename: str | PathLike[Any] | BinaryIO, lengthWaveform: int, sampleRate: float | None = None, **parametersSTFT: Any) -> None:
587
+ """Write a complex spectrogram to a WAV file by computing the inverse STFT.
588
+
589
+ You can use this function to reconstruct a waveform from a `Spectrogram` [1] and save
590
+ it directly to a WAV file. `spectrogramToWAV` calls `stft` with `inverse=True` to
591
+ obtain the reconstructed `Waveform` [2], then passes it to `writeWAV`.
592
+
593
+ Parameters
594
+ ----------
595
+ spectrogram : Spectrogram
596
+ Complex spectrogram to convert back to a waveform.
597
+ pathFilename : str | PathLike[Any] | BinaryIO
598
+ Destination path for the WAV file, or a binary stream.
599
+ lengthWaveform : int
600
+ Number of samples in the output waveform. The inverse STFT cannot recover the
601
+ original length from the spectrogram alone, so `lengthWaveform` is required.
602
+ sampleRate : float | None = None
603
+ Sample rate for the output WAV file in Hz. Defaults to `44100` when `None`.
604
+ **parametersSTFT : Any
605
+ Keyword parameters forwarded to `stft`, such as `lengthWindowingFunction` and
606
+ `lengthHop`.
607
+
608
+ File Overwrite and Format
609
+ -------------------------
610
+ See `writeWAV` for file overwrite behavior and output format details.
611
+
612
+ References
613
+ ----------
614
+ [1] `Spectrogram`
615
+
616
+ [2] `Waveform`
617
+
618
+ """
619
+ if sampleRate is None:
620
+ sampleRate = parametersUniversal['sampleRate']
621
+
622
+ waveform: Waveform = stft(spectrogram, inverse=True, lengthWaveform=lengthWaveform, sampleRate=sampleRate, **parametersSTFT)
623
+ writeWAV(pathFilename, waveform, sampleRate)
624
+
625
+ def waveformSpectrogramWaveform(callableNeedsSpectrogram: Callable[[Spectrogram], Spectrogram]) -> Callable[[Waveform], Waveform]:
626
+ """Decorate a spectrogram-processing callable to accept and return waveforms.
627
+
628
+ You can use this function as a decorator when you have a function that transforms
629
+ `Spectrogram` [1] data and you want a version that operates directly on `Waveform` [2]
630
+ data. The returned function applies `stft` to convert the input `Waveform` [2] to a
631
+ `Spectrogram` [1], calls `callableNeedsSpectrogram`, then applies inverse `stft` to
632
+ convert the result back to a `Waveform` [2] of the original length.
633
+
634
+ Parameters
635
+ ----------
636
+ callableNeedsSpectrogram : Callable[[Spectrogram], Spectrogram]
637
+ A function that accepts and returns a `Spectrogram` [1].
638
+
639
+ Returns
640
+ -------
641
+ stft_istft : Callable[[Waveform], Waveform]
642
+ A function that accepts a `Waveform` [2], converts it to a `Spectrogram` [1],
643
+ applies `callableNeedsSpectrogram`, and returns the reconstructed `Waveform` [2]
644
+ at the original length.
645
+
646
+ Time Axis Assumption
647
+ --------------------
648
+ The inner function `stft_istft` assumes the time axis of the input `Waveform` [2] is
649
+ the last axis (`-1`). This matches the `(channels, samples)` shape convention.
650
+
651
+ References
652
+ ----------
653
+ [1] `Spectrogram`
654
+
655
+ [2] `Waveform`
656
+
657
+ """
658
+ def stft_istft(waveform: Waveform) -> Waveform:
659
+ axisTime = -1
660
+ arrayTarget = stft(waveform)
661
+ spectrogram = callableNeedsSpectrogram(arrayTarget)
662
+ return stft(spectrogram, inverse=True, indexingAxis=None, lengthWaveform=waveform.shape[axisTime])
663
+ return stft_istft
hunterHearsPy/py.typed ADDED
File without changes