analyzeAudio 0.0.11__py3-none-any.whl → 0.0.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- analyzeAudio/__init__.py +5 -5
- analyzeAudio/analyzersUseFilename.py +144 -144
- analyzeAudio/analyzersUseSpectrogram.py +14 -12
- analyzeAudio/analyzersUseTensor.py +4 -4
- analyzeAudio/analyzersUseWaveform.py +11 -10
- analyzeAudio/audioAspectsRegistry.py +166 -167
- analyzeAudio/pythonator.py +78 -77
- analyzeaudio-0.0.12.dist-info/LICENSE +407 -0
- {analyzeAudio-0.0.11.dist-info → analyzeaudio-0.0.12.dist-info}/METADATA +24 -29
- analyzeaudio-0.0.12.dist-info/RECORD +16 -0
- {analyzeAudio-0.0.11.dist-info → analyzeaudio-0.0.12.dist-info}/WHEEL +1 -1
- analyzeaudio-0.0.12.dist-info/entry_points.txt +2 -0
- tests/test_audioAspectsRegistry.py +0 -1
- tests/test_other.py +7 -10
- analyzeAudio-0.0.11.dist-info/RECORD +0 -14
- {analyzeAudio-0.0.11.dist-info → analyzeaudio-0.0.12.dist-info}/top_level.txt +0 -0
analyzeAudio/__init__.py
CHANGED
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
from .audioAspectsRegistry import registrationAudioAspect, cacheAudioAnalyzers, analyzeAudioFile, \
|
|
2
|
-
|
|
2
|
+
analyzeAudioListPathFilenames, getListAvailableAudioAspects, audioAspects
|
|
3
3
|
|
|
4
4
|
__all__ = [
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
5
|
+
'analyzeAudioFile',
|
|
6
|
+
'analyzeAudioListPathFilenames',
|
|
7
|
+
'audioAspects',
|
|
8
|
+
'getListAvailableAudioAspects',
|
|
9
9
|
]
|
|
10
10
|
|
|
11
11
|
from . import analyzersUseFilename
|
|
@@ -1,234 +1,234 @@
|
|
|
1
1
|
from .pythonator import pythonizeFFprobe
|
|
2
2
|
from analyzeAudio import registrationAudioAspect, cacheAudioAnalyzers
|
|
3
|
+
from os import PathLike
|
|
3
4
|
from statistics import mean
|
|
4
|
-
from typing import Any,
|
|
5
|
+
from typing import Any, cast
|
|
5
6
|
import cachetools
|
|
6
7
|
import numpy
|
|
7
|
-
import os
|
|
8
8
|
import pathlib
|
|
9
9
|
import re as regex
|
|
10
10
|
import subprocess
|
|
11
11
|
|
|
12
12
|
@registrationAudioAspect('SI-SDR mean')
|
|
13
|
-
def getSI_SDRmean(pathFilenameAlpha:
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
13
|
+
def getSI_SDRmean(pathFilenameAlpha: str | PathLike[Any], pathFilenameBeta: str | PathLike[Any]) -> float | None:
|
|
14
|
+
"""
|
|
15
|
+
Calculate the mean Scale-Invariant Signal-to-Distortion Ratio (SI-SDR) between two audio files.
|
|
16
|
+
This function uses FFmpeg to compute the SI-SDR between two audio files specified by their paths.
|
|
17
|
+
The SI-SDR values are extracted from the FFmpeg output and their mean is calculated.
|
|
18
|
+
Parameters:
|
|
19
|
+
pathFilenameAlpha: Path to the first audio file.
|
|
20
|
+
pathFilenameBeta: Path to the second audio file.
|
|
21
|
+
Returns:
|
|
22
|
+
SI_SDRmean: The mean SI-SDR value in decibels (dB).
|
|
23
|
+
Raises:
|
|
24
|
+
subprocess.CalledProcessError: If the FFmpeg command fails.
|
|
25
|
+
ValueError: If no SI-SDR values are found in the FFmpeg output.
|
|
26
|
+
"""
|
|
27
|
+
commandLineFFmpeg = [
|
|
28
|
+
'ffmpeg', '-hide_banner', '-loglevel', '32',
|
|
29
|
+
'-i', f'{str(pathlib.Path(pathFilenameAlpha))}', '-i', f'{str(pathlib.Path(pathFilenameBeta))}',
|
|
30
|
+
'-filter_complex', '[0][1]asisdr', '-f', 'null', '-'
|
|
31
|
+
]
|
|
32
|
+
systemProcessFFmpeg = subprocess.run(commandLineFFmpeg, check=True, stderr=subprocess.PIPE)
|
|
33
|
+
|
|
34
|
+
stderrFFmpeg = systemProcessFFmpeg.stderr.decode()
|
|
35
|
+
|
|
36
|
+
regexSI_SDR = regex.compile(r"^\[Parsed_asisdr_.* (.*) dB", regex.MULTILINE)
|
|
37
|
+
|
|
38
|
+
listMatchesSI_SDR = regexSI_SDR.findall(stderrFFmpeg)
|
|
39
|
+
SI_SDRmean = mean(float(match) for match in listMatchesSI_SDR)
|
|
40
|
+
return SI_SDRmean
|
|
41
41
|
|
|
42
42
|
@cachetools.cached(cache=cacheAudioAnalyzers)
|
|
43
|
-
def ffprobeShotgunAndCache(pathFilename:
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
43
|
+
def ffprobeShotgunAndCache(pathFilename: str | PathLike[Any]) -> dict[str, float]:
|
|
44
|
+
# for lavfi amovie/movie, the colons after driveLetter letters need to be escaped twice.
|
|
45
|
+
pFn = pathlib.PureWindowsPath(pathFilename)
|
|
46
|
+
lavfiPathFilename = pFn.drive.replace(":", "\\\\:")+pathlib.PureWindowsPath(pFn.root,pFn.relative_to(pFn.anchor)).as_posix()
|
|
47
|
+
|
|
48
|
+
filterChain: list[str] = []
|
|
49
|
+
filterChain += ["astats=metadata=1:measure_perchannel=Crest_factor+Zero_crossings_rate+Dynamic_range:measure_overall=all"]
|
|
50
|
+
filterChain += ["aspectralstats"]
|
|
51
|
+
filterChain += ["ebur128=metadata=1:framelog=quiet"]
|
|
52
|
+
|
|
53
|
+
entriesFFprobe = ["frame_tags"]
|
|
54
|
+
|
|
55
|
+
commandLineFFprobe = [
|
|
56
|
+
"ffprobe", "-hide_banner",
|
|
57
|
+
"-f", "lavfi", f"amovie={lavfiPathFilename},{','.join(filterChain)}",
|
|
58
|
+
"-show_entries", ':'.join(entriesFFprobe),
|
|
59
|
+
"-output_format", "json=compact=1",
|
|
60
|
+
]
|
|
61
|
+
|
|
62
|
+
systemProcessFFprobe = subprocess.Popen(commandLineFFprobe, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
|
63
|
+
stdoutFFprobe, _DISCARDstderr = systemProcessFFprobe.communicate()
|
|
64
|
+
FFprobeStructured = pythonizeFFprobe(stdoutFFprobe.decode('utf-8'))[-1]
|
|
65
|
+
|
|
66
|
+
dictionaryAspectsAnalyzed: dict[str, float] = {}
|
|
67
|
+
if 'aspectralstats' in FFprobeStructured:
|
|
68
|
+
for keyName in FFprobeStructured['aspectralstats']:
|
|
69
|
+
dictionaryAspectsAnalyzed[keyName] = numpy.mean(FFprobeStructured['aspectralstats'][keyName]).astype(float)
|
|
70
|
+
if 'r128' in FFprobeStructured:
|
|
71
|
+
for keyName in FFprobeStructured['r128']:
|
|
72
|
+
dictionaryAspectsAnalyzed[keyName] = FFprobeStructured['r128'][keyName][-1]
|
|
73
|
+
if 'astats' in FFprobeStructured:
|
|
74
|
+
for keyName, arrayFeatureValues in cast(dict[str, numpy.ndarray[Any, Any]], FFprobeStructured['astats']).items():
|
|
75
|
+
dictionaryAspectsAnalyzed[keyName.split('.')[-1]] = numpy.mean(arrayFeatureValues[..., -1:]).astype(float)
|
|
76
|
+
|
|
77
|
+
return dictionaryAspectsAnalyzed
|
|
78
78
|
|
|
79
79
|
@registrationAudioAspect('Zero-crossings rate')
|
|
80
|
-
def analyzeZero_crossings_rate(pathFilename:
|
|
81
|
-
|
|
80
|
+
def analyzeZero_crossings_rate(pathFilename: str | PathLike[Any]) -> float | None:
|
|
81
|
+
return ffprobeShotgunAndCache(pathFilename).get('Zero_crossings_rate')
|
|
82
82
|
|
|
83
83
|
@registrationAudioAspect('DC offset')
|
|
84
|
-
def analyzeDCoffset(pathFilename:
|
|
85
|
-
|
|
84
|
+
def analyzeDCoffset(pathFilename: str | PathLike[Any]) -> float | None:
|
|
85
|
+
return ffprobeShotgunAndCache(pathFilename).get('DC_offset')
|
|
86
86
|
|
|
87
87
|
@registrationAudioAspect('Dynamic range')
|
|
88
|
-
def analyzeDynamicRange(pathFilename:
|
|
89
|
-
|
|
88
|
+
def analyzeDynamicRange(pathFilename: str | PathLike[Any]) -> float | None:
|
|
89
|
+
return ffprobeShotgunAndCache(pathFilename).get('Dynamic_range')
|
|
90
90
|
|
|
91
91
|
@registrationAudioAspect('Signal entropy')
|
|
92
|
-
def analyzeSignalEntropy(pathFilename:
|
|
93
|
-
|
|
92
|
+
def analyzeSignalEntropy(pathFilename: str | PathLike[Any]) -> float | None:
|
|
93
|
+
return ffprobeShotgunAndCache(pathFilename).get('Entropy')
|
|
94
94
|
|
|
95
95
|
@registrationAudioAspect('Duration-samples')
|
|
96
|
-
def analyzeNumber_of_samples(pathFilename:
|
|
97
|
-
|
|
96
|
+
def analyzeNumber_of_samples(pathFilename: str | PathLike[Any]) -> float | None:
|
|
97
|
+
return ffprobeShotgunAndCache(pathFilename).get('Number_of_samples')
|
|
98
98
|
|
|
99
99
|
@registrationAudioAspect('Peak dB')
|
|
100
|
-
def analyzePeak_level(pathFilename:
|
|
101
|
-
|
|
100
|
+
def analyzePeak_level(pathFilename: str | PathLike[Any]) -> float | None:
|
|
101
|
+
return ffprobeShotgunAndCache(pathFilename).get('Peak_level')
|
|
102
102
|
|
|
103
103
|
@registrationAudioAspect('RMS total')
|
|
104
|
-
def analyzeRMS_level(pathFilename:
|
|
105
|
-
|
|
104
|
+
def analyzeRMS_level(pathFilename: str | PathLike[Any]) -> float | None:
|
|
105
|
+
return ffprobeShotgunAndCache(pathFilename).get('RMS_level')
|
|
106
106
|
|
|
107
107
|
@registrationAudioAspect('Crest factor')
|
|
108
|
-
def analyzeCrest_factor(pathFilename:
|
|
109
|
-
|
|
108
|
+
def analyzeCrest_factor(pathFilename: str | PathLike[Any]) -> float | None:
|
|
109
|
+
return ffprobeShotgunAndCache(pathFilename).get('Crest_factor')
|
|
110
110
|
|
|
111
111
|
@registrationAudioAspect('RMS peak')
|
|
112
|
-
def analyzeRMS_peak(pathFilename:
|
|
113
|
-
|
|
112
|
+
def analyzeRMS_peak(pathFilename: str | PathLike[Any]) -> float | None:
|
|
113
|
+
return ffprobeShotgunAndCache(pathFilename).get('RMS_peak')
|
|
114
114
|
|
|
115
115
|
@registrationAudioAspect('LUFS integrated')
|
|
116
|
-
def analyzeLUFSintegrated(pathFilename:
|
|
117
|
-
|
|
116
|
+
def analyzeLUFSintegrated(pathFilename: str | PathLike[Any]) -> float | None:
|
|
117
|
+
return ffprobeShotgunAndCache(pathFilename).get('I')
|
|
118
118
|
|
|
119
119
|
@registrationAudioAspect('LUFS loudness range')
|
|
120
|
-
def analyzeLRA(pathFilename:
|
|
121
|
-
|
|
120
|
+
def analyzeLRA(pathFilename: str | PathLike[Any]) -> float | None:
|
|
121
|
+
return ffprobeShotgunAndCache(pathFilename).get('LRA')
|
|
122
122
|
|
|
123
123
|
@registrationAudioAspect('LUFS low')
|
|
124
|
-
def analyzeLUFSlow(pathFilename:
|
|
125
|
-
|
|
124
|
+
def analyzeLUFSlow(pathFilename: str | PathLike[Any]) -> float | None:
|
|
125
|
+
return ffprobeShotgunAndCache(pathFilename).get('LRA.low')
|
|
126
126
|
|
|
127
127
|
@registrationAudioAspect('LUFS high')
|
|
128
|
-
def analyzeLUFShigh(pathFilename:
|
|
129
|
-
|
|
128
|
+
def analyzeLUFShigh(pathFilename: str | PathLike[Any]) -> float | None:
|
|
129
|
+
return ffprobeShotgunAndCache(pathFilename).get('LRA.high')
|
|
130
130
|
|
|
131
131
|
@registrationAudioAspect('Spectral mean')
|
|
132
|
-
def analyzeMean(pathFilename:
|
|
133
|
-
|
|
132
|
+
def analyzeMean(pathFilename: str | PathLike[Any]) -> float | None:
|
|
133
|
+
return ffprobeShotgunAndCache(pathFilename).get('mean')
|
|
134
134
|
|
|
135
135
|
@registrationAudioAspect('Spectral variance')
|
|
136
|
-
def analyzeVariance(pathFilename:
|
|
137
|
-
|
|
136
|
+
def analyzeVariance(pathFilename: str | PathLike[Any]) -> float | None:
|
|
137
|
+
return ffprobeShotgunAndCache(pathFilename).get('variance')
|
|
138
138
|
|
|
139
139
|
@registrationAudioAspect('Spectral centroid')
|
|
140
|
-
def analyzeCentroid(pathFilename:
|
|
141
|
-
|
|
140
|
+
def analyzeCentroid(pathFilename: str | PathLike[Any]) -> float | None:
|
|
141
|
+
return ffprobeShotgunAndCache(pathFilename).get('centroid')
|
|
142
142
|
|
|
143
143
|
@registrationAudioAspect('Spectral spread')
|
|
144
|
-
def analyzeSpread(pathFilename:
|
|
145
|
-
|
|
144
|
+
def analyzeSpread(pathFilename: str | PathLike[Any]) -> float | None:
|
|
145
|
+
return ffprobeShotgunAndCache(pathFilename).get('spread')
|
|
146
146
|
|
|
147
147
|
@registrationAudioAspect('Spectral skewness')
|
|
148
|
-
def analyzeSkewness(pathFilename:
|
|
149
|
-
|
|
148
|
+
def analyzeSkewness(pathFilename: str | PathLike[Any]) -> float | None:
|
|
149
|
+
return ffprobeShotgunAndCache(pathFilename).get('skewness')
|
|
150
150
|
|
|
151
151
|
@registrationAudioAspect('Spectral kurtosis')
|
|
152
|
-
def analyzeKurtosis(pathFilename:
|
|
153
|
-
|
|
152
|
+
def analyzeKurtosis(pathFilename: str | PathLike[Any]) -> float | None:
|
|
153
|
+
return ffprobeShotgunAndCache(pathFilename).get('kurtosis')
|
|
154
154
|
|
|
155
155
|
@registrationAudioAspect('Spectral entropy')
|
|
156
|
-
def analyzeSpectralEntropy(pathFilename:
|
|
157
|
-
|
|
156
|
+
def analyzeSpectralEntropy(pathFilename: str | PathLike[Any]) -> float | None:
|
|
157
|
+
return ffprobeShotgunAndCache(pathFilename).get('entropy')
|
|
158
158
|
|
|
159
159
|
@registrationAudioAspect('Spectral flatness')
|
|
160
|
-
def analyzeFlatness(pathFilename:
|
|
161
|
-
|
|
160
|
+
def analyzeFlatness(pathFilename: str | PathLike[Any]) -> float | None:
|
|
161
|
+
return ffprobeShotgunAndCache(pathFilename).get('flatness')
|
|
162
162
|
|
|
163
163
|
@registrationAudioAspect('Spectral crest')
|
|
164
|
-
def analyzeCrest(pathFilename:
|
|
165
|
-
|
|
164
|
+
def analyzeCrest(pathFilename: str | PathLike[Any]) -> float | None:
|
|
165
|
+
return ffprobeShotgunAndCache(pathFilename).get('crest')
|
|
166
166
|
|
|
167
167
|
@registrationAudioAspect('Spectral flux')
|
|
168
|
-
def analyzeFlux(pathFilename:
|
|
169
|
-
|
|
168
|
+
def analyzeFlux(pathFilename: str | PathLike[Any]) -> float | None:
|
|
169
|
+
return ffprobeShotgunAndCache(pathFilename).get('flux')
|
|
170
170
|
|
|
171
171
|
@registrationAudioAspect('Spectral slope')
|
|
172
|
-
def analyzeSlope(pathFilename:
|
|
173
|
-
|
|
172
|
+
def analyzeSlope(pathFilename: str | PathLike[Any]) -> float | None:
|
|
173
|
+
return ffprobeShotgunAndCache(pathFilename).get('slope')
|
|
174
174
|
|
|
175
175
|
@registrationAudioAspect('Spectral decrease')
|
|
176
|
-
def analyzeDecrease(pathFilename:
|
|
177
|
-
|
|
176
|
+
def analyzeDecrease(pathFilename: str | PathLike[Any]) -> float | None:
|
|
177
|
+
return ffprobeShotgunAndCache(pathFilename).get('decrease')
|
|
178
178
|
|
|
179
179
|
@registrationAudioAspect('Spectral rolloff')
|
|
180
|
-
def analyzeRolloff(pathFilename:
|
|
181
|
-
|
|
180
|
+
def analyzeRolloff(pathFilename: str | PathLike[Any]) -> float | None:
|
|
181
|
+
return ffprobeShotgunAndCache(pathFilename).get('rolloff')
|
|
182
182
|
|
|
183
183
|
@registrationAudioAspect('Abs_Peak_count')
|
|
184
|
-
def analyzeAbs_Peak_count(pathFilename:
|
|
185
|
-
|
|
186
|
-
|
|
184
|
+
def analyzeAbs_Peak_count(pathFilename: str | PathLike[Any]) -> float | None:
|
|
185
|
+
print('Abs_Peak_count', pathFilename)
|
|
186
|
+
return ffprobeShotgunAndCache(pathFilename).get('Abs_Peak_count')
|
|
187
187
|
|
|
188
188
|
@registrationAudioAspect('Bit_depth')
|
|
189
|
-
def analyzeBit_depth(pathFilename:
|
|
190
|
-
|
|
189
|
+
def analyzeBit_depth(pathFilename: str | PathLike[Any]) -> float | None:
|
|
190
|
+
return ffprobeShotgunAndCache(pathFilename).get('Bit_depth')
|
|
191
191
|
|
|
192
192
|
@registrationAudioAspect('Flat_factor')
|
|
193
|
-
def analyzeFlat_factor(pathFilename:
|
|
194
|
-
|
|
193
|
+
def analyzeFlat_factor(pathFilename: str | PathLike[Any]) -> float | None:
|
|
194
|
+
return ffprobeShotgunAndCache(pathFilename).get('Flat_factor')
|
|
195
195
|
|
|
196
196
|
@registrationAudioAspect('Max_difference')
|
|
197
|
-
def analyzeMax_difference(pathFilename:
|
|
198
|
-
|
|
197
|
+
def analyzeMax_difference(pathFilename: str | PathLike[Any]) -> float | None:
|
|
198
|
+
return ffprobeShotgunAndCache(pathFilename).get('Max_difference')
|
|
199
199
|
|
|
200
200
|
@registrationAudioAspect('Max_level')
|
|
201
|
-
def analyzeMax_level(pathFilename:
|
|
202
|
-
|
|
201
|
+
def analyzeMax_level(pathFilename: str | PathLike[Any]) -> float | None:
|
|
202
|
+
return ffprobeShotgunAndCache(pathFilename).get('Max_level')
|
|
203
203
|
|
|
204
204
|
@registrationAudioAspect('Mean_difference')
|
|
205
|
-
def analyzeMean_difference(pathFilename:
|
|
206
|
-
|
|
205
|
+
def analyzeMean_difference(pathFilename: str | PathLike[Any]) -> float | None:
|
|
206
|
+
return ffprobeShotgunAndCache(pathFilename).get('Mean_difference')
|
|
207
207
|
|
|
208
208
|
@registrationAudioAspect('Min_difference')
|
|
209
|
-
def analyzeMin_difference(pathFilename:
|
|
210
|
-
|
|
209
|
+
def analyzeMin_difference(pathFilename: str | PathLike[Any]) -> float | None:
|
|
210
|
+
return ffprobeShotgunAndCache(pathFilename).get('Min_difference')
|
|
211
211
|
|
|
212
212
|
@registrationAudioAspect('Min_level')
|
|
213
|
-
def analyzeMin_level(pathFilename:
|
|
214
|
-
|
|
213
|
+
def analyzeMin_level(pathFilename: str | PathLike[Any]) -> float | None:
|
|
214
|
+
return ffprobeShotgunAndCache(pathFilename).get('Min_level')
|
|
215
215
|
|
|
216
216
|
@registrationAudioAspect('Noise_floor')
|
|
217
|
-
def analyzeNoise_floor(pathFilename:
|
|
218
|
-
|
|
217
|
+
def analyzeNoise_floor(pathFilename: str | PathLike[Any]) -> float | None:
|
|
218
|
+
return ffprobeShotgunAndCache(pathFilename).get('Noise_floor')
|
|
219
219
|
|
|
220
220
|
@registrationAudioAspect('Noise_floor_count')
|
|
221
|
-
def analyzeNoise_floor_count(pathFilename:
|
|
222
|
-
|
|
221
|
+
def analyzeNoise_floor_count(pathFilename: str | PathLike[Any]) -> float | None:
|
|
222
|
+
return ffprobeShotgunAndCache(pathFilename).get('Noise_floor_count')
|
|
223
223
|
|
|
224
224
|
@registrationAudioAspect('Peak_count')
|
|
225
|
-
def analyzePeak_count(pathFilename:
|
|
226
|
-
|
|
225
|
+
def analyzePeak_count(pathFilename: str | PathLike[Any]) -> float | None:
|
|
226
|
+
return ffprobeShotgunAndCache(pathFilename).get('Peak_count')
|
|
227
227
|
|
|
228
228
|
@registrationAudioAspect('RMS_difference')
|
|
229
|
-
def analyzeRMS_difference(pathFilename:
|
|
230
|
-
|
|
229
|
+
def analyzeRMS_difference(pathFilename: str | PathLike[Any]) -> float | None:
|
|
230
|
+
return ffprobeShotgunAndCache(pathFilename).get('RMS_difference')
|
|
231
231
|
|
|
232
232
|
@registrationAudioAspect('RMS_trough')
|
|
233
|
-
def analyzeRMS_trough(pathFilename:
|
|
234
|
-
|
|
233
|
+
def analyzeRMS_trough(pathFilename: str | PathLike[Any]) -> float | None:
|
|
234
|
+
return ffprobeShotgunAndCache(pathFilename).get('RMS_trough')
|
|
@@ -3,26 +3,28 @@ from typing import Any
|
|
|
3
3
|
import cachetools
|
|
4
4
|
import librosa
|
|
5
5
|
import numpy
|
|
6
|
+
from optype.numpy import AnyFloatingDType, ToArray3D, ToFloat3D
|
|
7
|
+
from numpy import dtype, floating
|
|
6
8
|
|
|
7
9
|
@registrationAudioAspect('Chromagram')
|
|
8
|
-
def analyzeChromagram(spectrogramPower: numpy.ndarray, sampleRate: int, **keywordArguments: Any) -> numpy.ndarray:
|
|
9
|
-
|
|
10
|
+
def analyzeChromagram(spectrogramPower: numpy.ndarray[Any, dtype[floating[Any]]], sampleRate: int, **keywordArguments: Any) -> numpy.ndarray:
|
|
11
|
+
return librosa.feature.chroma_stft(S=spectrogramPower, sr=sampleRate, **keywordArguments)
|
|
10
12
|
|
|
11
13
|
@registrationAudioAspect('Spectral Contrast')
|
|
12
|
-
def analyzeSpectralContrast(spectrogramMagnitude: numpy.ndarray, **keywordArguments: Any) -> numpy.ndarray:
|
|
13
|
-
|
|
14
|
+
def analyzeSpectralContrast(spectrogramMagnitude: numpy.ndarray[Any, dtype[floating[Any]]], **keywordArguments: Any) -> numpy.ndarray:
|
|
15
|
+
return librosa.feature.spectral_contrast(S=spectrogramMagnitude, **keywordArguments)
|
|
14
16
|
|
|
15
17
|
@registrationAudioAspect('Spectral Bandwidth')
|
|
16
|
-
def analyzeSpectralBandwidth(spectrogramMagnitude: numpy.ndarray, **keywordArguments: Any) -> numpy.ndarray:
|
|
17
|
-
|
|
18
|
-
|
|
18
|
+
def analyzeSpectralBandwidth(spectrogramMagnitude: numpy.ndarray[Any, dtype[floating[Any]]], **keywordArguments: Any) -> numpy.ndarray:
|
|
19
|
+
centroid = audioAspects['Spectral Centroid']['analyzer'](spectrogramMagnitude)
|
|
20
|
+
return librosa.feature.spectral_bandwidth(S=spectrogramMagnitude, centroid=centroid, **keywordArguments)
|
|
19
21
|
|
|
20
22
|
@cachetools.cached(cache=cacheAudioAnalyzers)
|
|
21
23
|
@registrationAudioAspect('Spectral Centroid')
|
|
22
|
-
def analyzeSpectralCentroid(spectrogramMagnitude: numpy.ndarray, **keywordArguments: Any) -> numpy.ndarray:
|
|
23
|
-
|
|
24
|
+
def analyzeSpectralCentroid(spectrogramMagnitude: numpy.ndarray[Any, dtype[floating[Any]]], **keywordArguments: Any) -> numpy.ndarray:
|
|
25
|
+
return librosa.feature.spectral_centroid(S=spectrogramMagnitude, **keywordArguments)
|
|
24
26
|
|
|
25
27
|
@registrationAudioAspect('Spectral Flatness')
|
|
26
|
-
def analyzeSpectralFlatness(spectrogramMagnitude: numpy.ndarray, **keywordArguments: Any) -> numpy.ndarray:
|
|
27
|
-
|
|
28
|
-
|
|
28
|
+
def analyzeSpectralFlatness(spectrogramMagnitude: numpy.ndarray[Any, dtype[floating[Any]]], **keywordArguments: Any) -> numpy.ndarray:
|
|
29
|
+
spectralFlatness = librosa.feature.spectral_flatness(S=spectrogramMagnitude, **keywordArguments)
|
|
30
|
+
return 20 * numpy.log10(spectralFlatness, where=(spectralFlatness != 0)) # dB
|
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
from analyzeAudio import registrationAudioAspect
|
|
2
2
|
from torchmetrics.functional.audio.srmr import speech_reverberation_modulation_energy_ratio
|
|
3
|
-
from typing import Any
|
|
3
|
+
from typing import Any
|
|
4
4
|
import numpy
|
|
5
5
|
import torch
|
|
6
6
|
|
|
7
7
|
@registrationAudioAspect('SRMR')
|
|
8
|
-
def analyzeSRMR(tensorAudio: torch.Tensor, sampleRate: int, pytorchOnCPU:
|
|
9
|
-
|
|
10
|
-
|
|
8
|
+
def analyzeSRMR(tensorAudio: torch.Tensor, sampleRate: int, pytorchOnCPU: bool | None, **keywordArguments: Any) -> numpy.ndarray:
|
|
9
|
+
keywordArguments['fast'] = keywordArguments.get('fast') or pytorchOnCPU or None
|
|
10
|
+
return torch.Tensor.numpy(speech_reverberation_modulation_energy_ratio(tensorAudio, sampleRate, **keywordArguments))
|
|
@@ -2,24 +2,25 @@ from analyzeAudio import registrationAudioAspect, audioAspects, cacheAudioAnalyz
|
|
|
2
2
|
from typing import Any
|
|
3
3
|
import librosa
|
|
4
4
|
import numpy
|
|
5
|
+
from optype.numpy import ToArray2D, AnyFloatingDType
|
|
5
6
|
import cachetools
|
|
6
7
|
|
|
7
8
|
@cachetools.cached(cache=cacheAudioAnalyzers)
|
|
8
9
|
@registrationAudioAspect('Tempogram')
|
|
9
|
-
def analyzeTempogram(waveform:
|
|
10
|
-
|
|
10
|
+
def analyzeTempogram(waveform: ToArray2D[AnyFloatingDType], sampleRate: int, **keywordArguments: Any) -> numpy.ndarray:
|
|
11
|
+
return librosa.feature.tempogram(y=waveform, sr=sampleRate, **keywordArguments)
|
|
11
12
|
|
|
12
13
|
# "RMS value from audio samples is faster ... However, ... spectrogram ... more accurate ... because ... windowed"
|
|
13
14
|
@registrationAudioAspect('RMS from waveform')
|
|
14
|
-
def analyzeRMS(waveform:
|
|
15
|
-
|
|
16
|
-
|
|
15
|
+
def analyzeRMS(waveform: ToArray2D[AnyFloatingDType], **keywordArguments: Any) -> numpy.ndarray:
|
|
16
|
+
arrayRMS = librosa.feature.rms(y=waveform, **keywordArguments)
|
|
17
|
+
return 20 * numpy.log10(arrayRMS, where=(arrayRMS != 0)) # dB
|
|
17
18
|
|
|
18
19
|
@registrationAudioAspect('Tempo')
|
|
19
|
-
def analyzeTempo(waveform:
|
|
20
|
-
|
|
21
|
-
|
|
20
|
+
def analyzeTempo(waveform: ToArray2D[AnyFloatingDType], sampleRate: int, **keywordArguments: Any) -> numpy.ndarray:
|
|
21
|
+
tempogram = audioAspects['Tempogram']['analyzer'](waveform, sampleRate)
|
|
22
|
+
return librosa.feature.tempo(y=waveform, sr=sampleRate, tg=tempogram, **keywordArguments)
|
|
22
23
|
|
|
23
24
|
@registrationAudioAspect('Zero-crossing rate') # This is distinct from 'Zero-crossings rate'
|
|
24
|
-
def analyzeZeroCrossingRate(waveform:
|
|
25
|
-
|
|
25
|
+
def analyzeZeroCrossingRate(waveform: ToArray2D[AnyFloatingDType], **keywordArguments: Any) -> numpy.ndarray:
|
|
26
|
+
return librosa.feature.zero_crossing_rate(y=waveform, **keywordArguments)
|