hunterHearsPy 1.0.3__tar.gz → 1.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {hunterhearspy-1.0.3 → hunterhearspy-1.1.0}/PKG-INFO +78 -1
- hunterhearspy-1.1.0/README.md +150 -0
- {hunterhearspy-1.0.3 → hunterhearspy-1.1.0}/pyproject.toml +25 -6
- hunterhearspy-1.1.0/src/hunterHearsPy/__init__.py +51 -0
- hunterhearspy-1.1.0/src/hunterHearsPy/_arrays.py +109 -0
- hunterhearspy-1.1.0/src/hunterHearsPy/_fft.py +127 -0
- hunterhearspy-1.1.0/src/hunterHearsPy/_io.py +151 -0
- hunterhearspy-1.1.0/src/hunterHearsPy/_resample.py +43 -0
- {hunterhearspy-1.0.3 → hunterhearspy-1.1.0}/src/hunterHearsPy/amplitude.py +99 -16
- {hunterhearspy-1.0.3 → hunterhearspy-1.1.0}/src/hunterHearsPy/autoRevert.py +5 -5
- {hunterhearspy-1.0.3 → hunterhearspy-1.1.0}/src/hunterHearsPy/clippingArrays.py +11 -9
- hunterhearspy-1.1.0/src/hunterHearsPy/dataBaskets.py +34 -0
- hunterhearspy-1.1.0/src/hunterHearsPy/theSSOT.py +101 -0
- hunterhearspy-1.1.0/src/hunterHearsPy/theTypes.py +101 -0
- hunterhearspy-1.0.3/README.md +0 -78
- hunterhearspy-1.0.3/src/hunterHearsPy/__init__.py +0 -36
- hunterhearspy-1.0.3/src/hunterHearsPy/ioAudio.py +0 -663
- hunterhearspy-1.0.3/src/hunterHearsPy/theTypes.py +0 -118
- {hunterhearspy-1.0.3 → hunterhearspy-1.1.0}/LICENSE +0 -0
- {hunterhearspy-1.0.3 → hunterhearspy-1.1.0}/src/hunterHearsPy/py.typed +0 -0
- {hunterhearspy-1.0.3 → hunterhearspy-1.1.0}/src/hunterHearsPy/windowingFunctions.py +0 -0
- {hunterhearspy-1.0.3 → hunterhearspy-1.1.0}/src/hunterHearsPy/windowingFunctionsTensor.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: hunterHearsPy
|
|
3
|
-
Version: 1.0
|
|
3
|
+
Version: 1.1.0
|
|
4
4
|
Summary: Audio processing.
|
|
5
5
|
Keywords:
|
|
6
6
|
Author: Hunter Hogan
|
|
@@ -14,6 +14,9 @@ Classifier: Intended Audience :: Science/Research
|
|
|
14
14
|
Classifier: Natural Language :: English
|
|
15
15
|
Classifier: Operating System :: OS Independent
|
|
16
16
|
Classifier: Programming Language :: Python :: 3.10
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
17
20
|
Classifier: Programming Language :: Python :: 3.14
|
|
18
21
|
Classifier: Typing :: Typed
|
|
19
22
|
Requires-Dist: huntermakespy>=0.7.0
|
|
@@ -22,6 +25,8 @@ Requires-Dist: resampy
|
|
|
22
25
|
Requires-Dist: scipy
|
|
23
26
|
Requires-Dist: soundfile>=0.14.0
|
|
24
27
|
Requires-Dist: tqdm
|
|
28
|
+
Requires-Dist: typing-extensions>=4.0.0
|
|
29
|
+
Requires-Dist: z0z-tools>=2.1.0
|
|
25
30
|
Requires-Dist: torch ; extra == 'torch'
|
|
26
31
|
Maintainer: Hunter Hogan
|
|
27
32
|
Maintainer-email: Hunter Hogan <HunterHogan@pm.me>
|
|
@@ -101,6 +106,78 @@ processor = waveformSpectrogramWaveform(boost_low_frequencies)
|
|
|
101
106
|
processed_waveform = processor(original_waveform)
|
|
102
107
|
```
|
|
103
108
|
|
|
109
|
+
## Development
|
|
110
|
+
|
|
111
|
+
I want:
|
|
112
|
+
|
|
113
|
+
- consistent waveforms,
|
|
114
|
+
- consistent spectrograms, and
|
|
115
|
+
- efficient, reliable transformations.
|
|
116
|
+
|
|
117
|
+
Therefore, I want one package, this package, to manage those objectives with a single source of truth for configurable universal settings.
|
|
118
|
+
|
|
119
|
+
- I need good default universal settings.
|
|
120
|
+
- I need an easy way to change a universal setting.
|
|
121
|
+
- I want _ad hoc_ overrides for some or all universal settings.
|
|
122
|
+
|
|
123
|
+
I don't know a good way to implement user-configurable universal settings. Therefore, as I normally do, I will use my HARDCODED system as a placeholder.
|
|
124
|
+
|
|
125
|
+
### Semiotics
|
|
126
|
+
|
|
127
|
+
- channel, rarely channels.
|
|
128
|
+
- time is more generic than samples and often preferred.
|
|
129
|
+
- array is generic.
|
|
130
|
+
- when talking about a NumPy `ndarray`, write `ndarray` not array.
|
|
131
|
+
- `sampleRate` is giving me problems in the "ingest" functions.
|
|
132
|
+
- Regularly review these semiotics until the system is clear.
|
|
133
|
+
- Most of the time, `sampleRate` is descriptive: this is the sample rate. See `writeWAV`.
|
|
134
|
+
- In `readAudioFile`, the parameter is prescriptive: make this the sample rate. Even worse, the
|
|
135
|
+
attribute of the current sample rate is `readSoundFile.samplerate`, which is descriptive, of course.
|
|
136
|
+
- However, `def resampleWaveform(waveform, sampleRateDesired: float, sampleRateSource: float):` has
|
|
137
|
+
excellent semiotics because it does not use `sampleRate`.
|
|
138
|
+
|
|
139
|
+
### Preferred packages
|
|
140
|
+
|
|
141
|
+
- NumPy
|
|
142
|
+
- scipy
|
|
143
|
+
- hunterMakesPy
|
|
144
|
+
- tqdm (for status messages)
|
|
145
|
+
- cytoolz via the [Z0Z_tools](https://github.com/hunterhogan/Z0Z_tools) package (until it finds a forever home)
|
|
146
|
+
- more_itertools
|
|
147
|
+
- `astToolKit`
|
|
148
|
+
- `pytest`
|
|
149
|
+
|
|
150
|
+
### Probably won't need
|
|
151
|
+
|
|
152
|
+
- `analyzeAudio`
|
|
153
|
+
- `gmpy2`
|
|
154
|
+
- `numba`
|
|
155
|
+
- `platformdirs`
|
|
156
|
+
- `sympy`
|
|
157
|
+
- `torch-einops-kit`
|
|
158
|
+
|
|
159
|
+
### Not using `PyTorch` for "business" logic
|
|
160
|
+
|
|
161
|
+
- But, I must have `torch` compatibility.
|
|
162
|
+
- At a minimum, a transformation to and from `torch` that the user must call.
|
|
163
|
+
- `astToolkit` easily creates real `torch` APIs and identifiers, however, for the `windowingFunctions` module in `windowingFunctionsTensor`.
|
|
164
|
+
|
|
165
|
+
### Not using librosa
|
|
166
|
+
|
|
167
|
+
- Dependency bloated.
|
|
168
|
+
- Slow.
|
|
169
|
+
- Less precise than I want.
|
|
170
|
+
- I generally dislike the API and identifiers.
|
|
171
|
+
|
|
172
|
+
### Disfavored packages
|
|
173
|
+
|
|
174
|
+
- `pandas`
|
|
175
|
+
|
|
176
|
+
### More vectorization
|
|
177
|
+
|
|
178
|
+
- Few or no `for` loops.
|
|
179
|
+
- Few or no `for` object comprehensions.
|
|
180
|
+
|
|
104
181
|
## Installation
|
|
105
182
|
|
|
106
183
|
```bash
|
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
# hunterHearsPy
|
|
2
|
+
|
|
3
|
+
A comprehensive collection of Python utilities for audio processing.
|
|
4
|
+
|
|
5
|
+
## Audio Processing Made Simple
|
|
6
|
+
|
|
7
|
+
### Load and Save Audio Files
|
|
8
|
+
|
|
9
|
+
Read audio files with automatic stereo conversion and sample rate control:
|
|
10
|
+
|
|
11
|
+
```python
|
|
12
|
+
from hunterHearsPy import readAudioFile, writeWAV
|
|
13
|
+
|
|
14
|
+
# Load audio with sample rate conversion
|
|
15
|
+
waveform = readAudioFile('input.wav', sampleRate=44100)
|
|
16
|
+
|
|
17
|
+
# Save in WAV format (always 32-bit float)
|
|
18
|
+
writeWAV('output.wav', waveform)
|
|
19
|
+
```
|
|
20
|
+
|
|
21
|
+
### Process Multiple Audio Files at Once
|
|
22
|
+
|
|
23
|
+
Load and process batches of audio files:
|
|
24
|
+
|
|
25
|
+
```python
|
|
26
|
+
from hunterHearsPy import loadWaveforms
|
|
27
|
+
|
|
28
|
+
# Load multiple files with consistent formatting
|
|
29
|
+
array_waveforms = loadWaveforms(['file1.wav', 'file2.wav', 'file3.wav'])
|
|
30
|
+
|
|
31
|
+
# The result is a unified array with shape (channels, samples, file_count)
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
### Work with Spectrograms
|
|
35
|
+
|
|
36
|
+
Convert between waveforms and spectrograms:
|
|
37
|
+
|
|
38
|
+
```python
|
|
39
|
+
from hunterHearsPy import stft, halfsine
|
|
40
|
+
|
|
41
|
+
# Create a spectrogram with a half-sine window
|
|
42
|
+
spectrogram = stft(waveform, windowingFunction=halfsine(1024))
|
|
43
|
+
|
|
44
|
+
# Convert back to a waveform
|
|
45
|
+
reconstructed = stft(spectrogram, inverse=True, lengthWaveform=original_length)
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
### Process Audio in the Frequency Domain
|
|
49
|
+
|
|
50
|
+
Create functions that operate on spectrograms:
|
|
51
|
+
|
|
52
|
+
```python
|
|
53
|
+
from hunterHearsPy import waveformSpectrogramWaveform
|
|
54
|
+
|
|
55
|
+
def boost_low_frequencies(spectrogram):
|
|
56
|
+
# Boost frequencies below 500 Hz
|
|
57
|
+
spectrogram[:, :10, :] *= 2.0
|
|
58
|
+
return spectrogram
|
|
59
|
+
|
|
60
|
+
# Create a processor that handles the STFT/ISTFT automatically
|
|
61
|
+
processor = waveformSpectrogramWaveform(boost_low_frequencies)
|
|
62
|
+
|
|
63
|
+
# Apply the processor to a waveform
|
|
64
|
+
processed_waveform = processor(original_waveform)
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
## Development
|
|
68
|
+
|
|
69
|
+
I want:
|
|
70
|
+
|
|
71
|
+
- consistent waveforms,
|
|
72
|
+
- consistent spectrograms, and
|
|
73
|
+
- efficient, reliable transformations.
|
|
74
|
+
|
|
75
|
+
Therefore, I want one package, this package, to manage those objectives with a single source of truth for configurable universal settings.
|
|
76
|
+
|
|
77
|
+
- I need good default universal settings.
|
|
78
|
+
- I need an easy way to change a universal setting.
|
|
79
|
+
- I want _ad hoc_ overrides for some or all universal settings.
|
|
80
|
+
|
|
81
|
+
I don't know a good way to implement user-configurable universal settings. Therefore, as I normally do, I will use my HARDCODED system as a placeholder.
|
|
82
|
+
|
|
83
|
+
### Semiotics
|
|
84
|
+
|
|
85
|
+
- channel, rarely channels.
|
|
86
|
+
- time is more generic than samples and often preferred.
|
|
87
|
+
- array is generic.
|
|
88
|
+
- when talking about a NumPy `ndarray`, write `ndarray` not array.
|
|
89
|
+
- `sampleRate` is giving me problems in the "ingest" functions.
|
|
90
|
+
- Regularly review these semiotics until the system is clear.
|
|
91
|
+
- Most of the time, `sampleRate` is descriptive: this is the sample rate. See `writeWAV`.
|
|
92
|
+
- In `readAudioFile`, the parameter is prescriptive: make this the sample rate. Even worse, the
|
|
93
|
+
attribute of the current sample rate is `readSoundFile.samplerate`, which is descriptive, of course.
|
|
94
|
+
- However, `def resampleWaveform(waveform, sampleRateDesired: float, sampleRateSource: float):` has
|
|
95
|
+
excellent semiotics because it does not use `sampleRate`.
|
|
96
|
+
|
|
97
|
+
### Preferred packages
|
|
98
|
+
|
|
99
|
+
- NumPy
|
|
100
|
+
- scipy
|
|
101
|
+
- hunterMakesPy
|
|
102
|
+
- tqdm (for status messages)
|
|
103
|
+
- cytoolz via the [Z0Z_tools](https://github.com/hunterhogan/Z0Z_tools) package (until it finds a forever home)
|
|
104
|
+
- more_itertools
|
|
105
|
+
- `astToolKit`
|
|
106
|
+
- `pytest`
|
|
107
|
+
|
|
108
|
+
### Probably won't need
|
|
109
|
+
|
|
110
|
+
- `analyzeAudio`
|
|
111
|
+
- `gmpy2`
|
|
112
|
+
- `numba`
|
|
113
|
+
- `platformdirs`
|
|
114
|
+
- `sympy`
|
|
115
|
+
- `torch-einops-kit`
|
|
116
|
+
|
|
117
|
+
### Not using `PyTorch` for "business" logic
|
|
118
|
+
|
|
119
|
+
- But, I must have `torch` compatibility.
|
|
120
|
+
- At a minimum, a transformation to and from `torch` that the user must call.
|
|
121
|
+
- `astToolkit` easily creates real `torch` APIs and identifiers, however, for the `windowingFunctions` module in `windowingFunctionsTensor`.
|
|
122
|
+
|
|
123
|
+
### Not using librosa
|
|
124
|
+
|
|
125
|
+
- Dependency bloated.
|
|
126
|
+
- Slow.
|
|
127
|
+
- Less precise than I want.
|
|
128
|
+
- I generally dislike the API and identifiers.
|
|
129
|
+
|
|
130
|
+
### Disfavored packages
|
|
131
|
+
|
|
132
|
+
- `pandas`
|
|
133
|
+
|
|
134
|
+
### More vectorization
|
|
135
|
+
|
|
136
|
+
- Few or no `for` loops.
|
|
137
|
+
- Few or no `for` object comprehensions.
|
|
138
|
+
|
|
139
|
+
## Installation
|
|
140
|
+
|
|
141
|
+
```bash
|
|
142
|
+
pip install hunterHearsPy
|
|
143
|
+
```
|
|
144
|
+
|
|
145
|
+
## My recovery
|
|
146
|
+
|
|
147
|
+
[](https://HunterThinks.com/support)
|
|
148
|
+
[](https://www.youtube.com/@HunterHogan)
|
|
149
|
+
|
|
150
|
+
[](https://creativecommons.org/licenses/by-nc/4.0/)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "hunterHearsPy"
|
|
3
|
-
version = "1.0
|
|
3
|
+
version = "1.1.0"
|
|
4
4
|
description = "Audio processing."
|
|
5
5
|
readme = "README.md"
|
|
6
6
|
requires-python = ">=3.10"
|
|
@@ -17,6 +17,9 @@ classifiers = [
|
|
|
17
17
|
"Natural Language :: English",
|
|
18
18
|
"Operating System :: OS Independent",
|
|
19
19
|
"Programming Language :: Python :: 3.10",
|
|
20
|
+
"Programming Language :: Python :: 3.11",
|
|
21
|
+
"Programming Language :: Python :: 3.12",
|
|
22
|
+
"Programming Language :: Python :: 3.13",
|
|
20
23
|
"Programming Language :: Python :: 3.14",
|
|
21
24
|
"Typing :: Typed",
|
|
22
25
|
]
|
|
@@ -27,6 +30,8 @@ dependencies = [
|
|
|
27
30
|
"scipy",
|
|
28
31
|
"soundfile>=0.14.0",
|
|
29
32
|
"tqdm",
|
|
33
|
+
"typing-extensions>=4.0.0",
|
|
34
|
+
"Z0Z_tools>=2.1.0",
|
|
30
35
|
]
|
|
31
36
|
optional-dependencies = { torch = ["torch"] }
|
|
32
37
|
|
|
@@ -39,8 +44,13 @@ Issues = "https://github.com/hunterhogan/hunterHearsPy/issues"
|
|
|
39
44
|
Repository = "https://github.com/hunterhogan/hunterHearsPy.git"
|
|
40
45
|
|
|
41
46
|
[dependency-groups]
|
|
42
|
-
dev = [
|
|
43
|
-
|
|
47
|
+
dev = [
|
|
48
|
+
"astToolkit",
|
|
49
|
+
"pytest-cov",
|
|
50
|
+
"scipy-stubs",
|
|
51
|
+
"types-resampy",
|
|
52
|
+
]
|
|
53
|
+
pytorch = ["torch==2.10.0"]
|
|
44
54
|
testing = ["pytest", "pytest-mock", "pytest-xdist", "torch"]
|
|
45
55
|
|
|
46
56
|
[build-system]
|
|
@@ -54,7 +64,7 @@ run = { branch = true, concurrency = ["multiprocessing"], data_file = "tests/cov
|
|
|
54
64
|
], parallel = true, source = ["."] }
|
|
55
65
|
|
|
56
66
|
[tool.pytest]
|
|
57
|
-
addopts = ["--color=auto", "-n 4"]
|
|
67
|
+
addopts = ["--color=auto", "-n 4", "--tb=line"]
|
|
58
68
|
log_auto_indent = "On"
|
|
59
69
|
pythonpath = ["src"]
|
|
60
70
|
testpaths = ["tests"]
|
|
@@ -68,10 +78,19 @@ build-backend = { module-name = "hunterHearsPy", module-root = "src" }
|
|
|
68
78
|
default-groups = ["dev", "testing"]
|
|
69
79
|
|
|
70
80
|
[[tool.uv.index]]
|
|
81
|
+
# NOTE I use this to install torch+cuda on my computer that doesn't have cuda. 2026-06-13
|
|
82
|
+
# `uv sync --group pytorch` OR `uv sync --all-groups` will trigger the following.
|
|
83
|
+
# dependency-groups.pytorch amends the dependencies: `torch`, limitation `==2.10.0`, group = "pytorch".
|
|
84
|
+
# At this point, the default source, pypi, is the highest priority on the `torch` sources list. (LIFO)
|
|
85
|
+
# tool.uv.sources sees '`torch`, group = "pytorch"' so pushes "indexPytorch" on top the `torch` sources list.
|
|
86
|
+
# tool.uv.index translates "indexPytorch" to the url of a package index.
|
|
87
|
+
# uv queries the package index for all `torch` that match all limitations.
|
|
88
|
+
# If the package index returns any matches, uv will install the "best" match.
|
|
89
|
+
# Because the package index only has cuda 12.8 versions, uv installs torch+cu128.
|
|
90
|
+
# I picked torch==2.10.0 + cuda 12.8 because that is the pre-installed package on Google Colab T4.
|
|
71
91
|
explicit = true
|
|
72
92
|
name = "indexPytorch"
|
|
73
93
|
url = "https://download.pytorch.org/whl/cu128"
|
|
74
94
|
|
|
75
95
|
[tool.uv.sources]
|
|
76
|
-
torch = {
|
|
77
|
-
torchaudio = { index = "indexPytorch", group = "pytorch" }
|
|
96
|
+
torch = { group = "pytorch", index = "indexPytorch" }
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
# ruff: noqa: D104
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
from hunterHearsPy.theTypes import (
|
|
5
|
+
ArraySpectrograms as ArraySpectrograms, ArrayWaveforms as ArrayWaveforms, ArrayWaveformsFloating as ArrayWaveformsFloating,
|
|
6
|
+
ArrayWaveformsShape as ArrayWaveformsShape, callableReturnsNDArray as callableReturnsNDArray, E733TH4X0R as E733TH4X0R,
|
|
7
|
+
FileDescriptorOrPath as FileDescriptorOrPath, NormalizationReverter as NormalizationReverter, OptionsAlign as OptionsAlign,
|
|
8
|
+
Parameters_stft as Parameters_stft, ParametersShortTimeFFT as ParametersShortTimeFFT, Spectrogram as Spectrogram,
|
|
9
|
+
SpectrogramDtype as SpectrogramDtype, Waveform as Waveform, WaveformAxes as WaveformAxes, WaveformDtype as WaveformDtype,
|
|
10
|
+
WaveformFloating as WaveformFloating, WaveformFloatingDtype as WaveformFloatingDtype, WaveformMetadata as WaveformMetadata,
|
|
11
|
+
WindowingFunction as WindowingFunction, WindowingFunctionDtype as WindowingFunctionDtype, 个 as 个, 形floating as 形floating,
|
|
12
|
+
形ndarray as 形ndarray, 形Shape as 形Shape)
|
|
13
|
+
|
|
14
|
+
# isort: split
|
|
15
|
+
from hunterHearsPy.windowingFunctions import cosineWings as cosineWings, equalPower as equalPower, halfsine as halfsine, tukey as tukey
|
|
16
|
+
|
|
17
|
+
# isort: split
|
|
18
|
+
from contextlib import suppress
|
|
19
|
+
|
|
20
|
+
with suppress(ModuleNotFoundError): # noqa: RUF067
|
|
21
|
+
from hunterHearsPy.windowingFunctionsTensor import (
|
|
22
|
+
cosineWingsTensor as cosineWingsTensor, equalPowerTensor as equalPowerTensor, halfsineTensor as halfsineTensor,
|
|
23
|
+
tukeyTensor as tukeyTensor)
|
|
24
|
+
|
|
25
|
+
# isort: split
|
|
26
|
+
from hunterHearsPy.theSSOT import getAxis as getAxis, setting as setting
|
|
27
|
+
|
|
28
|
+
# isort: split
|
|
29
|
+
from hunterHearsPy.dataBaskets import Translator as Translator
|
|
30
|
+
|
|
31
|
+
# isort: split
|
|
32
|
+
from hunterHearsPy._resample import resampleWaveform as resampleWaveform
|
|
33
|
+
from hunterHearsPy.amplitude import (
|
|
34
|
+
amplitudeIntegerToFloating as amplitudeIntegerToFloating, amplitudeToSoundfile as amplitudeToSoundfile,
|
|
35
|
+
normalizeArrayWaveforms as normalizeArrayWaveforms, normalizeWaveform as normalizeWaveform)
|
|
36
|
+
|
|
37
|
+
# isort: split
|
|
38
|
+
from hunterHearsPy._fft import stft as stft, waveformSpectrogramWaveform as waveformSpectrogramWaveform
|
|
39
|
+
|
|
40
|
+
# isort: split
|
|
41
|
+
from hunterHearsPy._io import readAudioFile as readAudioFile, spectrogramToWAV as spectrogramToWAV, writeWAV as writeWAV
|
|
42
|
+
|
|
43
|
+
# isort: split
|
|
44
|
+
from hunterHearsPy.clippingArrays import applyHardLimit as applyHardLimit, applyHardLimitComplexValued as applyHardLimitComplexValued
|
|
45
|
+
|
|
46
|
+
# isort: split
|
|
47
|
+
from hunterHearsPy.autoRevert import moveToAxisOfOperation as moveToAxisOfOperation
|
|
48
|
+
|
|
49
|
+
# isort: split
|
|
50
|
+
from hunterHearsPy._arrays import (
|
|
51
|
+
getWaveformMetadata as getWaveformMetadata, loadSpectrograms as loadSpectrograms, loadWaveforms as loadWaveforms)
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
# ruff: noqa: DOC201
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
from concurrent.futures import ThreadPoolExecutor
|
|
5
|
+
from hunterHearsPy import ArrayWaveformsShape, getAxis, readAudioFile, setting, stft, WaveformAxes, WaveformMetadata
|
|
6
|
+
from hunterMakesPy.parseParameters import defineConcurrencyLimit
|
|
7
|
+
from tqdm.auto import tqdm
|
|
8
|
+
from typing import TYPE_CHECKING
|
|
9
|
+
import numpy
|
|
10
|
+
import sys
|
|
11
|
+
|
|
12
|
+
if TYPE_CHECKING:
|
|
13
|
+
from collections.abc import Sequence
|
|
14
|
+
from hunterHearsPy import ArraySpectrograms, ArrayWaveforms, FileDescriptorOrPath, OptionsAlign, Waveform
|
|
15
|
+
from numpy.typing import DTypeLike
|
|
16
|
+
from soundfile import dtype_str as Options_dtype_str
|
|
17
|
+
from typing import Any
|
|
18
|
+
|
|
19
|
+
def getWaveformMetadata(
|
|
20
|
+
listPathFilenames: Sequence[FileDescriptorOrPath], sampleRate: float, align: OptionsAlign
|
|
21
|
+
) -> tuple[dict[int, WaveformMetadata], dict[str, WaveformAxes]]:
|
|
22
|
+
"""Retrieve metadata for a collection of audio waveform files."""
|
|
23
|
+
# ======== Initialize ==========================================================
|
|
24
|
+
|
|
25
|
+
axis: dict[str, WaveformAxes] = getAxis()
|
|
26
|
+
channelMaximum: int = 0
|
|
27
|
+
dictionaryWaveformMetadata: dict[int, WaveformMetadata] = {}
|
|
28
|
+
lengthMaximum: int = 0
|
|
29
|
+
|
|
30
|
+
# ======== Populate ===========================================================
|
|
31
|
+
|
|
32
|
+
if len(listPathFilenames) == 0:
|
|
33
|
+
message: str = f'I received `{len(listPathFilenames) = }`, so `arrayWaveforms` will have zero-sized axes.'
|
|
34
|
+
sys.stderr.write(message + '\n')
|
|
35
|
+
|
|
36
|
+
for index, pathFilename in enumerate(tqdm(listPathFilenames, desc='Preparing combined array', leave=False)):
|
|
37
|
+
channels, lengthWaveform = readAudioFile(pathFilename, sampleRate).shape
|
|
38
|
+
dictionaryWaveformMetadata[index] = WaveformMetadata(
|
|
39
|
+
channels=channels, lengthWaveform=lengthWaveform, pathFilename=pathFilename, samplesStart=0, samplesStop=0
|
|
40
|
+
)
|
|
41
|
+
channelMaximum = max(channelMaximum, channels)
|
|
42
|
+
lengthMaximum = max(lengthMaximum, lengthWaveform)
|
|
43
|
+
|
|
44
|
+
axis['channel'] = WaveformAxes(number=axis['channel'].number, size=channelMaximum)
|
|
45
|
+
axis['indexing'] = WaveformAxes(number=axis['indexing'].number, size=len(listPathFilenames))
|
|
46
|
+
axis['time'] = WaveformAxes(number=axis['time'].number, size=lengthMaximum)
|
|
47
|
+
|
|
48
|
+
# ======== Calculate ===========================================================
|
|
49
|
+
|
|
50
|
+
multiplicandSamplesStart: float = max((align == 'center') / 2, align == 'start')
|
|
51
|
+
|
|
52
|
+
for metadata in dictionaryWaveformMetadata.values():
|
|
53
|
+
samplesPadding: int = axis['time'].size - metadata['lengthWaveform']
|
|
54
|
+
# TODO document that if `samplesPadding` is odd, the extra pad-sample is added to samplesStop.
|
|
55
|
+
metadata['samplesStart'] = int(samplesPadding * multiplicandSamplesStart)
|
|
56
|
+
metadata['samplesStop'] = metadata['samplesStart'] + metadata['lengthWaveform']
|
|
57
|
+
|
|
58
|
+
return dictionaryWaveformMetadata, axis
|
|
59
|
+
|
|
60
|
+
def loadWaveforms(listPathFilenames: Sequence[FileDescriptorOrPath], *, CPUlimit: bool | float | int | None = None, **keywordArguments: Any) -> ArrayWaveforms:
|
|
61
|
+
"""Load a list of audio files into a single stacked NumPy array."""
|
|
62
|
+
align: OptionsAlign = keywordArguments.get('align', setting.align)
|
|
63
|
+
dtype: DTypeLike = keywordArguments.get('dtype', setting.dtypeWaveform)
|
|
64
|
+
dtype_str: Options_dtype_str = keywordArguments.get('dtype_str', setting.dtype_str)
|
|
65
|
+
sampleRateDesired: float = keywordArguments.get('sampleRateDesired', setting.sampleRate)
|
|
66
|
+
|
|
67
|
+
max_workers: int = defineConcurrencyLimit(limit=CPUlimit)
|
|
68
|
+
|
|
69
|
+
dictionaryWaveformMetadata, axis = getWaveformMetadata(listPathFilenames, sampleRateDesired, align)
|
|
70
|
+
|
|
71
|
+
arrayWaveforms: ArrayWaveforms = numpy.zeros(ArrayWaveformsShape(*(entry.size for entry in sorted(axis.values()))), dtype)
|
|
72
|
+
# TODO frustrating! ^^^ in the line above, the axis order is entirely based on the SSOT, `axis`,
|
|
73
|
+
# but IMMEDIATELY below, the axis order is hardcoded!
|
|
74
|
+
|
|
75
|
+
def workhorse(index: int, metadata: WaveformMetadata) -> None:
|
|
76
|
+
arrayWaveforms[:, metadata['samplesStart'] : metadata['samplesStop'], index] = readAudioFile(
|
|
77
|
+
metadata['pathFilename'], sampleRateDesired, dtype_str
|
|
78
|
+
).astype(dtype, copy=False)
|
|
79
|
+
|
|
80
|
+
with ThreadPoolExecutor(max_workers=max_workers) as threadManager:
|
|
81
|
+
tuple(tqdm(threadManager.map(workhorse, dictionaryWaveformMetadata, dictionaryWaveformMetadata.values()), total=len(dictionaryWaveformMetadata)))
|
|
82
|
+
|
|
83
|
+
return arrayWaveforms
|
|
84
|
+
|
|
85
|
+
def loadSpectrograms(listPathFilenames: Sequence[FileDescriptorOrPath], *, CPUlimit: bool | float | int | None = None, **keywordArguments: Any) -> tuple[ArraySpectrograms, dict[int, WaveformMetadata]]:
|
|
86
|
+
"""Load spectrograms from a list of audio files."""
|
|
87
|
+
align: OptionsAlign = keywordArguments.get('align', setting.align)
|
|
88
|
+
dtype: DTypeLike = keywordArguments.get('dtype', setting.dtypeSpectrogram)
|
|
89
|
+
dtype_str: Options_dtype_str = keywordArguments.get('dtype_str', setting.dtype_str)
|
|
90
|
+
dtypeWaveform: DTypeLike = keywordArguments.get('dtypeWaveform', setting.dtypeWaveform)
|
|
91
|
+
sampleRateDesired: float = keywordArguments.get('sampleRateDesired', setting.sampleRate)
|
|
92
|
+
|
|
93
|
+
max_workers: int = defineConcurrencyLimit(limit=CPUlimit)
|
|
94
|
+
|
|
95
|
+
dictionaryWaveformMetadata, axis = getWaveformMetadata(listPathFilenames, sampleRateDesired, align)
|
|
96
|
+
|
|
97
|
+
waveformZeros: Waveform = numpy.zeros((axis['channel'].size, axis['time'].size), dtypeWaveform)
|
|
98
|
+
arraySpectrograms: ArraySpectrograms = numpy.zeros(shape=(*stft(waveformZeros, **keywordArguments).shape, len(dictionaryWaveformMetadata)), dtype=dtype)
|
|
99
|
+
|
|
100
|
+
def workhorse(index: int, metadata: WaveformMetadata) -> None:
|
|
101
|
+
waveform: Waveform = waveformZeros.copy()
|
|
102
|
+
waveform[:, metadata['samplesStart'] : metadata['samplesStop']] = readAudioFile(metadata['pathFilename'], sampleRateDesired, dtype_str).astype(dtypeWaveform, copy=False)
|
|
103
|
+
# TODO Think about numpy.pad.mode waveform = numpy.pad(waveform, ((0, 0), (metadata['samplesStart'], waveform.shape[1] - metadata['samplesStop'])), mode=mode)
|
|
104
|
+
arraySpectrograms[..., index] = stft(waveform, **keywordArguments)
|
|
105
|
+
|
|
106
|
+
with ThreadPoolExecutor(max_workers=max_workers) as threadManager:
|
|
107
|
+
tuple(tqdm(threadManager.map(workhorse, dictionaryWaveformMetadata.keys(), dictionaryWaveformMetadata.values()), desc='Loading spectrograms', total=len(dictionaryWaveformMetadata)))
|
|
108
|
+
|
|
109
|
+
return arraySpectrograms, dictionaryWaveformMetadata
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from humpy_cytoolz.dicttoolz import keyfilter, merge
|
|
4
|
+
from hunterHearsPy import amplitudeIntegerToFloating, Parameters_stft, ParametersShortTimeFFT, setting, Translator
|
|
5
|
+
from scipy.signal import ShortTimeFFT
|
|
6
|
+
from typing import overload, TYPE_CHECKING
|
|
7
|
+
from typing_extensions import Unpack
|
|
8
|
+
import numpy
|
|
9
|
+
|
|
10
|
+
if TYPE_CHECKING:
|
|
11
|
+
from collections.abc import Callable
|
|
12
|
+
from hunterHearsPy import ArraySpectrograms, ArrayWaveforms, ArrayWaveformsFloating, Spectrogram, Waveform, WaveformFloating
|
|
13
|
+
from pathlib import PurePath
|
|
14
|
+
from scipy.signal._short_time_fft import _PadType
|
|
15
|
+
|
|
16
|
+
@overload # stft 1 ndarray
|
|
17
|
+
def stft(arrayTarget: Waveform, *, lengthWaveform: int = 0, indexingAxis: int = -1, **keywordArguments: Unpack[Parameters_stft]) -> Spectrogram: ...
|
|
18
|
+
@overload # stft many ndarray
|
|
19
|
+
def stft(arrayTarget: ArrayWaveforms, *, lengthWaveform: int = 0, indexingAxis: int = -1, **keywordArguments: Unpack[Parameters_stft]) -> ArraySpectrograms: ...
|
|
20
|
+
@overload # istft 1 ndarray
|
|
21
|
+
def stft(arrayTarget: Spectrogram, *, lengthWaveform: int, indexingAxis: int = -1, **keywordArguments: Unpack[Parameters_stft]) -> Waveform: ...
|
|
22
|
+
@overload # istft many ndarray
|
|
23
|
+
def stft(arrayTarget: ArraySpectrograms, *, lengthWaveform: int, indexingAxis: int = -1, **keywordArguments: Unpack[Parameters_stft]) -> ArrayWaveforms: ...
|
|
24
|
+
def stft(arrayTarget: Waveform | ArrayWaveforms | Spectrogram | ArraySpectrograms
|
|
25
|
+
, *
|
|
26
|
+
, lengthWaveform: int = 0
|
|
27
|
+
, indexingAxis: int = -1
|
|
28
|
+
, **keywordArguments: Unpack[Parameters_stft]
|
|
29
|
+
) -> Waveform | ArrayWaveforms | Spectrogram | ArraySpectrograms:
|
|
30
|
+
if numpy.issubdtype(arrayTarget.dtype, numpy.integer):
|
|
31
|
+
if arrayTarget.ndim == 3:
|
|
32
|
+
arrayFloating: ArrayWaveformsFloating = amplitudeIntegerToFloating(arrayTarget)
|
|
33
|
+
if arrayTarget.ndim == 2:
|
|
34
|
+
arrayFloating: WaveformFloating = amplitudeIntegerToFloating(arrayTarget)
|
|
35
|
+
else:
|
|
36
|
+
arrayFloating = arrayTarget
|
|
37
|
+
if numpy.issubdtype(arrayTarget.dtype, numpy.complexfloating) and (lengthWaveform < 1):
|
|
38
|
+
from hunterHearsPy._io import saveOnError # noqa: PLC0415
|
|
39
|
+
pathFilename: PurePath = saveOnError(arrayTarget)
|
|
40
|
+
message: str = (
|
|
41
|
+
"I did not receive `lengthWaveform`, so I could not perform the inverse STFT. "
|
|
42
|
+
"I saved `arrayTarget` to a file in this computer's temporary directory so you might recover the data. "
|
|
43
|
+
f"{arrayTarget.shape = }, {arrayTarget.dtype = }\n"
|
|
44
|
+
f"{pathFilename = }"
|
|
45
|
+
)
|
|
46
|
+
raise ValueError(message)
|
|
47
|
+
|
|
48
|
+
parametersShortTimeFFT = Translator(**ParametersShortTimeFFT(keyfilter(setting.ShortTimeFFT.keys().__contains__, merge(setting.ShortTimeFFT, keywordArguments)))) # pyright: ignore[reportArgumentType] # ty:ignore[invalid-argument-type]
|
|
49
|
+
|
|
50
|
+
padding: _PadType = keywordArguments.get('padding', setting.padding)
|
|
51
|
+
|
|
52
|
+
workhorseSTFT: ShortTimeFFT = ShortTimeFFT(**parametersShortTimeFFT.e733T)
|
|
53
|
+
|
|
54
|
+
def mushroom(waveform: WaveformFloating) -> Spectrogram:
|
|
55
|
+
return workhorseSTFT.stft(x=waveform, padding=padding)
|
|
56
|
+
|
|
57
|
+
def turtleShell(spectrogram: Spectrogram, lengthWaveform: int) -> WaveformFloating:
|
|
58
|
+
return workhorseSTFT.istft(S=spectrogram, k1=lengthWaveform)
|
|
59
|
+
|
|
60
|
+
if arrayFloating.ndim == 2:
|
|
61
|
+
arrayFloating: ArrayWaveformsFloating = numpy.expand_dims(arrayFloating, indexingAxis)
|
|
62
|
+
elif (arrayTarget.ndim == 3) and (numpy.issubdtype(arrayTarget.dtype, numpy.complexfloating)):
|
|
63
|
+
spectrogram: Spectrogram = arrayTarget
|
|
64
|
+
return turtleShell(spectrogram, lengthWaveform)
|
|
65
|
+
|
|
66
|
+
if (arrayFloating.ndim == 3) and (numpy.issubdtype(arrayFloating.dtype, numpy.floating)):
|
|
67
|
+
arrayWaveforms: ArrayWaveformsFloating = arrayFloating
|
|
68
|
+
arrayWaveforms = numpy.moveaxis(arrayWaveforms, indexingAxis, -1)
|
|
69
|
+
index = 0
|
|
70
|
+
arraySpectrograms: ArraySpectrograms = numpy.tile(mushroom(arrayWaveforms[..., index])[..., numpy.newaxis], arrayWaveforms.shape[-1])
|
|
71
|
+
for index in range(1, arrayWaveforms.shape[-1]):
|
|
72
|
+
arraySpectrograms[..., index] = mushroom(arrayWaveforms[..., index])
|
|
73
|
+
arraySpectrograms = numpy.moveaxis(arraySpectrograms, -1, indexingAxis)
|
|
74
|
+
if arraySpectrograms.shape[indexingAxis] == 1:
|
|
75
|
+
arraySpectrograms = numpy.squeeze(arraySpectrograms, indexingAxis)
|
|
76
|
+
return arraySpectrograms
|
|
77
|
+
|
|
78
|
+
elif (arrayTarget.ndim == 4) and (numpy.issubdtype(arrayTarget.dtype, numpy.complexfloating)):
|
|
79
|
+
arrayTARGET: ArraySpectrograms = arrayTarget
|
|
80
|
+
arrayTARGET = numpy.moveaxis(arrayTARGET, indexingAxis, -1)
|
|
81
|
+
index = 0
|
|
82
|
+
arrayTransformed: ArrayWaveforms = numpy.tile(turtleShell(arrayTARGET[..., index], lengthWaveform)[..., numpy.newaxis], arrayTARGET.shape[-1])
|
|
83
|
+
for index in range(1, arrayTARGET.shape[-1]):
|
|
84
|
+
arrayTransformed[..., index] = turtleShell(arrayTARGET[..., index], lengthWaveform)
|
|
85
|
+
return numpy.moveaxis(arrayTransformed, -1, indexingAxis)
|
|
86
|
+
else:
|
|
87
|
+
return arrayTarget
|
|
88
|
+
|
|
89
|
+
def waveformSpectrogramWaveform(callableNeedsSpectrogram: Callable[[Spectrogram], Spectrogram]) -> Callable[[Waveform], Waveform]:
|
|
90
|
+
"""Decorate a spectrogram-processing callable to accept and return waveforms.
|
|
91
|
+
|
|
92
|
+
You can use this function as a decorator when you have a function that transforms `Spectrogram`
|
|
93
|
+
[1] data and you want a version that operates directly on `Waveform` [2] data. The returned
|
|
94
|
+
function applies `stft` to convert the input `Waveform` [2] to a `Spectrogram` [1], calls
|
|
95
|
+
`callableNeedsSpectrogram`, then applies inverse `stft` to convert the result back to a `Waveform`
|
|
96
|
+
[2] of the original length.
|
|
97
|
+
|
|
98
|
+
Parameters
|
|
99
|
+
----------
|
|
100
|
+
callableNeedsSpectrogram : Callable[[Spectrogram], Spectrogram]
|
|
101
|
+
A function that accepts and returns a `Spectrogram` [1].
|
|
102
|
+
|
|
103
|
+
Returns
|
|
104
|
+
-------
|
|
105
|
+
stft_istft : Callable[[Waveform], Waveform]
|
|
106
|
+
A function that accepts a `Waveform` [2], converts it to a `Spectrogram` [1], applies
|
|
107
|
+
`callableNeedsSpectrogram`, and returns the reconstructed `Waveform` [2] at the original
|
|
108
|
+
length.
|
|
109
|
+
|
|
110
|
+
Time Axis Assumption
|
|
111
|
+
--------------------
|
|
112
|
+
The inner function `stft_istft` assumes the time axis of the input `Waveform` [2] is the last axis
|
|
113
|
+
(`-1`). This matches the `(channels, samples)` shape convention.
|
|
114
|
+
|
|
115
|
+
References
|
|
116
|
+
----------
|
|
117
|
+
[1] `Spectrogram`
|
|
118
|
+
|
|
119
|
+
[2] `Waveform`
|
|
120
|
+
|
|
121
|
+
"""
|
|
122
|
+
def stft_istft(waveform: Waveform) -> Waveform:
|
|
123
|
+
axisTime = -1
|
|
124
|
+
arrayTarget = stft(waveform)
|
|
125
|
+
spectrogram = callableNeedsSpectrogram(arrayTarget)
|
|
126
|
+
return stft(spectrogram, lengthWaveform=waveform.shape[axisTime])
|
|
127
|
+
return stft_istft
|