analyzeAudio 0.0.11__py3-none-any.whl → 0.0.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- analyzeAudio/__init__.py +5 -5
- analyzeAudio/analyzersUseFilename.py +149 -145
- analyzeAudio/analyzersUseSpectrogram.py +14 -12
- analyzeAudio/analyzersUseTensor.py +4 -4
- analyzeAudio/analyzersUseWaveform.py +11 -10
- analyzeAudio/audioAspectsRegistry.py +166 -167
- analyzeAudio/pythonator.py +113 -77
- analyzeaudio-0.0.13.dist-info/LICENSE +407 -0
- analyzeaudio-0.0.13.dist-info/METADATA +181 -0
- analyzeaudio-0.0.13.dist-info/RECORD +16 -0
- {analyzeAudio-0.0.11.dist-info → analyzeaudio-0.0.13.dist-info}/WHEEL +1 -1
- analyzeaudio-0.0.13.dist-info/entry_points.txt +2 -0
- tests/test_audioAspectsRegistry.py +0 -1
- tests/test_other.py +7 -10
- analyzeAudio-0.0.11.dist-info/METADATA +0 -108
- analyzeAudio-0.0.11.dist-info/RECORD +0 -14
- {analyzeAudio-0.0.11.dist-info → analyzeaudio-0.0.13.dist-info}/top_level.txt +0 -0
analyzeAudio/__init__.py
CHANGED
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
from .audioAspectsRegistry import registrationAudioAspect, cacheAudioAnalyzers, analyzeAudioFile, \
|
|
2
|
-
|
|
2
|
+
analyzeAudioListPathFilenames, getListAvailableAudioAspects, audioAspects
|
|
3
3
|
|
|
4
4
|
__all__ = [
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
5
|
+
'analyzeAudioFile',
|
|
6
|
+
'analyzeAudioListPathFilenames',
|
|
7
|
+
'audioAspects',
|
|
8
|
+
'getListAvailableAudioAspects',
|
|
9
9
|
]
|
|
10
10
|
|
|
11
11
|
from . import analyzersUseFilename
|
|
@@ -1,234 +1,238 @@
|
|
|
1
1
|
from .pythonator import pythonizeFFprobe
|
|
2
2
|
from analyzeAudio import registrationAudioAspect, cacheAudioAnalyzers
|
|
3
|
+
from os import PathLike
|
|
3
4
|
from statistics import mean
|
|
4
|
-
from typing import Any,
|
|
5
|
+
from typing import Any, cast
|
|
5
6
|
import cachetools
|
|
6
7
|
import numpy
|
|
7
|
-
import os
|
|
8
8
|
import pathlib
|
|
9
9
|
import re as regex
|
|
10
10
|
import subprocess
|
|
11
11
|
|
|
12
12
|
@registrationAudioAspect('SI-SDR mean')
|
|
13
|
-
def getSI_SDRmean(pathFilenameAlpha:
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
13
|
+
def getSI_SDRmean(pathFilenameAlpha: str | PathLike[Any], pathFilenameBeta: str | PathLike[Any]) -> float | None:
|
|
14
|
+
"""
|
|
15
|
+
Calculate the mean Scale-Invariant Signal-to-Distortion Ratio (SI-SDR) between two audio files.
|
|
16
|
+
This function uses FFmpeg to compute the SI-SDR between two audio files specified by their paths.
|
|
17
|
+
The SI-SDR values are extracted from the FFmpeg output and their mean is calculated.
|
|
18
|
+
Parameters:
|
|
19
|
+
pathFilenameAlpha: Path to the first audio file.
|
|
20
|
+
pathFilenameBeta: Path to the second audio file.
|
|
21
|
+
Returns:
|
|
22
|
+
SI_SDRmean: The mean SI-SDR value in decibels (dB).
|
|
23
|
+
Raises:
|
|
24
|
+
subprocess.CalledProcessError: If the FFmpeg command fails.
|
|
25
|
+
ValueError: If no SI-SDR values are found in the FFmpeg output.
|
|
26
|
+
"""
|
|
27
|
+
commandLineFFmpeg = [
|
|
28
|
+
'ffmpeg', '-hide_banner', '-loglevel', '32',
|
|
29
|
+
'-i', f'{str(pathlib.Path(pathFilenameAlpha))}', '-i', f'{str(pathlib.Path(pathFilenameBeta))}',
|
|
30
|
+
'-filter_complex', '[0][1]asisdr', '-f', 'null', '-'
|
|
31
|
+
]
|
|
32
|
+
systemProcessFFmpeg = subprocess.run(commandLineFFmpeg, check=True, stderr=subprocess.PIPE)
|
|
33
|
+
|
|
34
|
+
stderrFFmpeg = systemProcessFFmpeg.stderr.decode()
|
|
35
|
+
|
|
36
|
+
regexSI_SDR = regex.compile(r"^\[Parsed_asisdr_.* (.*) dB", regex.MULTILINE)
|
|
37
|
+
|
|
38
|
+
listMatchesSI_SDR = regexSI_SDR.findall(stderrFFmpeg)
|
|
39
|
+
SI_SDRmean = mean(float(match) for match in listMatchesSI_SDR)
|
|
40
|
+
return SI_SDRmean
|
|
41
41
|
|
|
42
42
|
@cachetools.cached(cache=cacheAudioAnalyzers)
|
|
43
|
-
def ffprobeShotgunAndCache(pathFilename:
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
43
|
+
def ffprobeShotgunAndCache(pathFilename: str | PathLike[Any]) -> dict[str, float | numpy.ndarray]:
|
|
44
|
+
# for lavfi amovie/movie, the colons after driveLetter letters need to be escaped twice.
|
|
45
|
+
pFn = pathlib.PureWindowsPath(pathFilename)
|
|
46
|
+
lavfiPathFilename = pFn.drive.replace(":", "\\\\:")+pathlib.PureWindowsPath(pFn.root,pFn.relative_to(pFn.anchor)).as_posix()
|
|
47
|
+
|
|
48
|
+
filterChain: list[str] = []
|
|
49
|
+
filterChain += ["astats=metadata=1:measure_perchannel=Crest_factor+Zero_crossings_rate+Dynamic_range:measure_overall=all"]
|
|
50
|
+
filterChain += ["aspectralstats"]
|
|
51
|
+
filterChain += ["ebur128=metadata=1:framelog=quiet"]
|
|
52
|
+
|
|
53
|
+
entriesFFprobe = ["frame_tags"]
|
|
54
|
+
|
|
55
|
+
commandLineFFprobe = [
|
|
56
|
+
"ffprobe", "-hide_banner",
|
|
57
|
+
"-f", "lavfi", f"amovie={lavfiPathFilename},{','.join(filterChain)}",
|
|
58
|
+
"-show_entries", ':'.join(entriesFFprobe),
|
|
59
|
+
"-output_format", "json=compact=1",
|
|
60
|
+
]
|
|
61
|
+
|
|
62
|
+
systemProcessFFprobe = subprocess.Popen(commandLineFFprobe, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
|
63
|
+
stdoutFFprobe, _DISCARDstderr = systemProcessFFprobe.communicate()
|
|
64
|
+
FFprobeStructured = pythonizeFFprobe(stdoutFFprobe.decode('utf-8'))[-1]
|
|
65
|
+
|
|
66
|
+
dictionaryAspectsAnalyzed: dict[str, float | numpy.ndarray] = {}
|
|
67
|
+
if 'aspectralstats' in FFprobeStructured:
|
|
68
|
+
for keyName in FFprobeStructured['aspectralstats']:
|
|
69
|
+
# No matter how many channels, each keyName is `numpy.ndarray[tuple[int, int], numpy.dtype[numpy.float64]]`
|
|
70
|
+
# where `tuple[int, int]` is (channel, frame)
|
|
71
|
+
# NOTE (as of this writing) `registrar` can only understand the generic class `numpy.ndarray` and not more specific typing
|
|
72
|
+
dictionaryAspectsAnalyzed[keyName] = FFprobeStructured['aspectralstats'][keyName]
|
|
73
|
+
# dictionaryAspectsAnalyzed[keyName] = numpy.mean(FFprobeStructured['aspectralstats'][keyName]).astype(float)
|
|
74
|
+
if 'r128' in FFprobeStructured:
|
|
75
|
+
for keyName in FFprobeStructured['r128']:
|
|
76
|
+
dictionaryAspectsAnalyzed[keyName] = FFprobeStructured['r128'][keyName][-1]
|
|
77
|
+
if 'astats' in FFprobeStructured:
|
|
78
|
+
for keyName, arrayFeatureValues in cast(dict[str, numpy.ndarray[Any, Any]], FFprobeStructured['astats']).items():
|
|
79
|
+
dictionaryAspectsAnalyzed[keyName.split('.')[-1]] = numpy.mean(arrayFeatureValues[..., -1:None]).astype(float)
|
|
80
|
+
|
|
81
|
+
return dictionaryAspectsAnalyzed
|
|
78
82
|
|
|
79
83
|
@registrationAudioAspect('Zero-crossings rate')
|
|
80
|
-
def analyzeZero_crossings_rate(pathFilename:
|
|
81
|
-
|
|
84
|
+
def analyzeZero_crossings_rate(pathFilename: str | PathLike[Any]) -> float | None:
|
|
85
|
+
return ffprobeShotgunAndCache(pathFilename).get('Zero_crossings_rate')
|
|
82
86
|
|
|
83
87
|
@registrationAudioAspect('DC offset')
|
|
84
|
-
def analyzeDCoffset(pathFilename:
|
|
85
|
-
|
|
88
|
+
def analyzeDCoffset(pathFilename: str | PathLike[Any]) -> float | None:
|
|
89
|
+
return ffprobeShotgunAndCache(pathFilename).get('DC_offset')
|
|
86
90
|
|
|
87
91
|
@registrationAudioAspect('Dynamic range')
|
|
88
|
-
def analyzeDynamicRange(pathFilename:
|
|
89
|
-
|
|
92
|
+
def analyzeDynamicRange(pathFilename: str | PathLike[Any]) -> float | None:
|
|
93
|
+
return ffprobeShotgunAndCache(pathFilename).get('Dynamic_range')
|
|
90
94
|
|
|
91
95
|
@registrationAudioAspect('Signal entropy')
|
|
92
|
-
def analyzeSignalEntropy(pathFilename:
|
|
93
|
-
|
|
96
|
+
def analyzeSignalEntropy(pathFilename: str | PathLike[Any]) -> float | None:
|
|
97
|
+
return ffprobeShotgunAndCache(pathFilename).get('Entropy')
|
|
94
98
|
|
|
95
99
|
@registrationAudioAspect('Duration-samples')
|
|
96
|
-
def analyzeNumber_of_samples(pathFilename:
|
|
97
|
-
|
|
100
|
+
def analyzeNumber_of_samples(pathFilename: str | PathLike[Any]) -> float | None:
|
|
101
|
+
return ffprobeShotgunAndCache(pathFilename).get('Number_of_samples')
|
|
98
102
|
|
|
99
103
|
@registrationAudioAspect('Peak dB')
|
|
100
|
-
def analyzePeak_level(pathFilename:
|
|
101
|
-
|
|
104
|
+
def analyzePeak_level(pathFilename: str | PathLike[Any]) -> float | None:
|
|
105
|
+
return ffprobeShotgunAndCache(pathFilename).get('Peak_level')
|
|
102
106
|
|
|
103
107
|
@registrationAudioAspect('RMS total')
|
|
104
|
-
def analyzeRMS_level(pathFilename:
|
|
105
|
-
|
|
108
|
+
def analyzeRMS_level(pathFilename: str | PathLike[Any]) -> float | None:
|
|
109
|
+
return ffprobeShotgunAndCache(pathFilename).get('RMS_level')
|
|
106
110
|
|
|
107
111
|
@registrationAudioAspect('Crest factor')
|
|
108
|
-
def analyzeCrest_factor(pathFilename:
|
|
109
|
-
|
|
112
|
+
def analyzeCrest_factor(pathFilename: str | PathLike[Any]) -> float | None:
|
|
113
|
+
return ffprobeShotgunAndCache(pathFilename).get('Crest_factor')
|
|
110
114
|
|
|
111
115
|
@registrationAudioAspect('RMS peak')
|
|
112
|
-
def analyzeRMS_peak(pathFilename:
|
|
113
|
-
|
|
116
|
+
def analyzeRMS_peak(pathFilename: str | PathLike[Any]) -> float | None:
|
|
117
|
+
return ffprobeShotgunAndCache(pathFilename).get('RMS_peak')
|
|
114
118
|
|
|
115
119
|
@registrationAudioAspect('LUFS integrated')
|
|
116
|
-
def analyzeLUFSintegrated(pathFilename:
|
|
117
|
-
|
|
120
|
+
def analyzeLUFSintegrated(pathFilename: str | PathLike[Any]) -> float | None:
|
|
121
|
+
return ffprobeShotgunAndCache(pathFilename).get('I')
|
|
118
122
|
|
|
119
123
|
@registrationAudioAspect('LUFS loudness range')
|
|
120
|
-
def analyzeLRA(pathFilename:
|
|
121
|
-
|
|
124
|
+
def analyzeLRA(pathFilename: str | PathLike[Any]) -> float | None:
|
|
125
|
+
return ffprobeShotgunAndCache(pathFilename).get('LRA')
|
|
122
126
|
|
|
123
127
|
@registrationAudioAspect('LUFS low')
|
|
124
|
-
def analyzeLUFSlow(pathFilename:
|
|
125
|
-
|
|
128
|
+
def analyzeLUFSlow(pathFilename: str | PathLike[Any]) -> float | None:
|
|
129
|
+
return ffprobeShotgunAndCache(pathFilename).get('LRA.low')
|
|
126
130
|
|
|
127
131
|
@registrationAudioAspect('LUFS high')
|
|
128
|
-
def analyzeLUFShigh(pathFilename:
|
|
129
|
-
|
|
132
|
+
def analyzeLUFShigh(pathFilename: str | PathLike[Any]) -> float | None:
|
|
133
|
+
return ffprobeShotgunAndCache(pathFilename).get('LRA.high')
|
|
130
134
|
|
|
131
|
-
@registrationAudioAspect('
|
|
132
|
-
def analyzeMean(pathFilename:
|
|
133
|
-
|
|
135
|
+
@registrationAudioAspect('Power spectral density')
|
|
136
|
+
def analyzeMean(pathFilename: str | PathLike[Any]) -> numpy.ndarray:
|
|
137
|
+
return ffprobeShotgunAndCache(pathFilename).get('mean')
|
|
134
138
|
|
|
135
139
|
@registrationAudioAspect('Spectral variance')
|
|
136
|
-
def analyzeVariance(pathFilename:
|
|
137
|
-
|
|
140
|
+
def analyzeVariance(pathFilename: str | PathLike[Any]) -> numpy.ndarray:
|
|
141
|
+
return ffprobeShotgunAndCache(pathFilename).get('variance')
|
|
138
142
|
|
|
139
143
|
@registrationAudioAspect('Spectral centroid')
|
|
140
|
-
def analyzeCentroid(pathFilename:
|
|
141
|
-
|
|
144
|
+
def analyzeCentroid(pathFilename: str | PathLike[Any]) -> numpy.ndarray:
|
|
145
|
+
return ffprobeShotgunAndCache(pathFilename).get('centroid')
|
|
142
146
|
|
|
143
147
|
@registrationAudioAspect('Spectral spread')
|
|
144
|
-
def analyzeSpread(pathFilename:
|
|
145
|
-
|
|
148
|
+
def analyzeSpread(pathFilename: str | PathLike[Any]) -> numpy.ndarray:
|
|
149
|
+
return ffprobeShotgunAndCache(pathFilename).get('spread')
|
|
146
150
|
|
|
147
151
|
@registrationAudioAspect('Spectral skewness')
|
|
148
|
-
def analyzeSkewness(pathFilename:
|
|
149
|
-
|
|
152
|
+
def analyzeSkewness(pathFilename: str | PathLike[Any]) -> numpy.ndarray:
|
|
153
|
+
return ffprobeShotgunAndCache(pathFilename).get('skewness')
|
|
150
154
|
|
|
151
155
|
@registrationAudioAspect('Spectral kurtosis')
|
|
152
|
-
def analyzeKurtosis(pathFilename:
|
|
153
|
-
|
|
156
|
+
def analyzeKurtosis(pathFilename: str | PathLike[Any]) -> numpy.ndarray:
|
|
157
|
+
return ffprobeShotgunAndCache(pathFilename).get('kurtosis')
|
|
154
158
|
|
|
155
159
|
@registrationAudioAspect('Spectral entropy')
|
|
156
|
-
def analyzeSpectralEntropy(pathFilename:
|
|
157
|
-
|
|
160
|
+
def analyzeSpectralEntropy(pathFilename: str | PathLike[Any]) -> numpy.ndarray:
|
|
161
|
+
return ffprobeShotgunAndCache(pathFilename).get('entropy')
|
|
158
162
|
|
|
159
163
|
@registrationAudioAspect('Spectral flatness')
|
|
160
|
-
def analyzeFlatness(pathFilename:
|
|
161
|
-
|
|
164
|
+
def analyzeFlatness(pathFilename: str | PathLike[Any]) -> numpy.ndarray:
|
|
165
|
+
return ffprobeShotgunAndCache(pathFilename).get('flatness')
|
|
162
166
|
|
|
163
167
|
@registrationAudioAspect('Spectral crest')
|
|
164
|
-
def analyzeCrest(pathFilename:
|
|
165
|
-
|
|
168
|
+
def analyzeCrest(pathFilename: str | PathLike[Any]) -> numpy.ndarray:
|
|
169
|
+
return ffprobeShotgunAndCache(pathFilename).get('crest')
|
|
166
170
|
|
|
167
171
|
@registrationAudioAspect('Spectral flux')
|
|
168
|
-
def analyzeFlux(pathFilename:
|
|
169
|
-
|
|
172
|
+
def analyzeFlux(pathFilename: str | PathLike[Any]) -> numpy.ndarray:
|
|
173
|
+
return ffprobeShotgunAndCache(pathFilename).get('flux')
|
|
170
174
|
|
|
171
175
|
@registrationAudioAspect('Spectral slope')
|
|
172
|
-
def analyzeSlope(pathFilename:
|
|
173
|
-
|
|
176
|
+
def analyzeSlope(pathFilename: str | PathLike[Any]) -> numpy.ndarray:
|
|
177
|
+
return ffprobeShotgunAndCache(pathFilename).get('slope')
|
|
174
178
|
|
|
175
179
|
@registrationAudioAspect('Spectral decrease')
|
|
176
|
-
def analyzeDecrease(pathFilename:
|
|
177
|
-
|
|
180
|
+
def analyzeDecrease(pathFilename: str | PathLike[Any]) -> numpy.ndarray:
|
|
181
|
+
return ffprobeShotgunAndCache(pathFilename).get('decrease')
|
|
178
182
|
|
|
179
183
|
@registrationAudioAspect('Spectral rolloff')
|
|
180
|
-
def analyzeRolloff(pathFilename:
|
|
181
|
-
|
|
184
|
+
def analyzeRolloff(pathFilename: str | PathLike[Any]) -> numpy.ndarray:
|
|
185
|
+
return ffprobeShotgunAndCache(pathFilename).get('rolloff')
|
|
182
186
|
|
|
183
187
|
@registrationAudioAspect('Abs_Peak_count')
|
|
184
|
-
def analyzeAbs_Peak_count(pathFilename:
|
|
185
|
-
|
|
186
|
-
|
|
188
|
+
def analyzeAbs_Peak_count(pathFilename: str | PathLike[Any]) -> float | None:
|
|
189
|
+
print('Abs_Peak_count', pathFilename)
|
|
190
|
+
return ffprobeShotgunAndCache(pathFilename).get('Abs_Peak_count')
|
|
187
191
|
|
|
188
192
|
@registrationAudioAspect('Bit_depth')
|
|
189
|
-
def analyzeBit_depth(pathFilename:
|
|
190
|
-
|
|
193
|
+
def analyzeBit_depth(pathFilename: str | PathLike[Any]) -> float | None:
|
|
194
|
+
return ffprobeShotgunAndCache(pathFilename).get('Bit_depth')
|
|
191
195
|
|
|
192
196
|
@registrationAudioAspect('Flat_factor')
|
|
193
|
-
def analyzeFlat_factor(pathFilename:
|
|
194
|
-
|
|
197
|
+
def analyzeFlat_factor(pathFilename: str | PathLike[Any]) -> float | None:
|
|
198
|
+
return ffprobeShotgunAndCache(pathFilename).get('Flat_factor')
|
|
195
199
|
|
|
196
200
|
@registrationAudioAspect('Max_difference')
|
|
197
|
-
def analyzeMax_difference(pathFilename:
|
|
198
|
-
|
|
201
|
+
def analyzeMax_difference(pathFilename: str | PathLike[Any]) -> float | None:
|
|
202
|
+
return ffprobeShotgunAndCache(pathFilename).get('Max_difference')
|
|
199
203
|
|
|
200
204
|
@registrationAudioAspect('Max_level')
|
|
201
|
-
def analyzeMax_level(pathFilename:
|
|
202
|
-
|
|
205
|
+
def analyzeMax_level(pathFilename: str | PathLike[Any]) -> float | None:
|
|
206
|
+
return ffprobeShotgunAndCache(pathFilename).get('Max_level')
|
|
203
207
|
|
|
204
208
|
@registrationAudioAspect('Mean_difference')
|
|
205
|
-
def analyzeMean_difference(pathFilename:
|
|
206
|
-
|
|
209
|
+
def analyzeMean_difference(pathFilename: str | PathLike[Any]) -> float | None:
|
|
210
|
+
return ffprobeShotgunAndCache(pathFilename).get('Mean_difference')
|
|
207
211
|
|
|
208
212
|
@registrationAudioAspect('Min_difference')
|
|
209
|
-
def analyzeMin_difference(pathFilename:
|
|
210
|
-
|
|
213
|
+
def analyzeMin_difference(pathFilename: str | PathLike[Any]) -> float | None:
|
|
214
|
+
return ffprobeShotgunAndCache(pathFilename).get('Min_difference')
|
|
211
215
|
|
|
212
216
|
@registrationAudioAspect('Min_level')
|
|
213
|
-
def analyzeMin_level(pathFilename:
|
|
214
|
-
|
|
217
|
+
def analyzeMin_level(pathFilename: str | PathLike[Any]) -> float | None:
|
|
218
|
+
return ffprobeShotgunAndCache(pathFilename).get('Min_level')
|
|
215
219
|
|
|
216
220
|
@registrationAudioAspect('Noise_floor')
|
|
217
|
-
def analyzeNoise_floor(pathFilename:
|
|
218
|
-
|
|
221
|
+
def analyzeNoise_floor(pathFilename: str | PathLike[Any]) -> float | None:
|
|
222
|
+
return ffprobeShotgunAndCache(pathFilename).get('Noise_floor')
|
|
219
223
|
|
|
220
224
|
@registrationAudioAspect('Noise_floor_count')
|
|
221
|
-
def analyzeNoise_floor_count(pathFilename:
|
|
222
|
-
|
|
225
|
+
def analyzeNoise_floor_count(pathFilename: str | PathLike[Any]) -> float | None:
|
|
226
|
+
return ffprobeShotgunAndCache(pathFilename).get('Noise_floor_count')
|
|
223
227
|
|
|
224
228
|
@registrationAudioAspect('Peak_count')
|
|
225
|
-
def analyzePeak_count(pathFilename:
|
|
226
|
-
|
|
229
|
+
def analyzePeak_count(pathFilename: str | PathLike[Any]) -> float | None:
|
|
230
|
+
return ffprobeShotgunAndCache(pathFilename).get('Peak_count')
|
|
227
231
|
|
|
228
232
|
@registrationAudioAspect('RMS_difference')
|
|
229
|
-
def analyzeRMS_difference(pathFilename:
|
|
230
|
-
|
|
233
|
+
def analyzeRMS_difference(pathFilename: str | PathLike[Any]) -> float | None:
|
|
234
|
+
return ffprobeShotgunAndCache(pathFilename).get('RMS_difference')
|
|
231
235
|
|
|
232
236
|
@registrationAudioAspect('RMS_trough')
|
|
233
|
-
def analyzeRMS_trough(pathFilename:
|
|
234
|
-
|
|
237
|
+
def analyzeRMS_trough(pathFilename: str | PathLike[Any]) -> float | None:
|
|
238
|
+
return ffprobeShotgunAndCache(pathFilename).get('RMS_trough')
|
|
@@ -3,26 +3,28 @@ from typing import Any
|
|
|
3
3
|
import cachetools
|
|
4
4
|
import librosa
|
|
5
5
|
import numpy
|
|
6
|
+
from optype.numpy import AnyFloatingDType, ToArray3D, ToFloat3D
|
|
7
|
+
from numpy import dtype, floating
|
|
6
8
|
|
|
7
9
|
@registrationAudioAspect('Chromagram')
|
|
8
|
-
def analyzeChromagram(spectrogramPower: numpy.ndarray, sampleRate: int, **keywordArguments: Any) -> numpy.ndarray:
|
|
9
|
-
|
|
10
|
+
def analyzeChromagram(spectrogramPower: numpy.ndarray[Any, dtype[floating[Any]]], sampleRate: int, **keywordArguments: Any) -> numpy.ndarray:
|
|
11
|
+
return librosa.feature.chroma_stft(S=spectrogramPower, sr=sampleRate, **keywordArguments)
|
|
10
12
|
|
|
11
13
|
@registrationAudioAspect('Spectral Contrast')
|
|
12
|
-
def analyzeSpectralContrast(spectrogramMagnitude: numpy.ndarray, **keywordArguments: Any) -> numpy.ndarray:
|
|
13
|
-
|
|
14
|
+
def analyzeSpectralContrast(spectrogramMagnitude: numpy.ndarray[Any, dtype[floating[Any]]], **keywordArguments: Any) -> numpy.ndarray:
|
|
15
|
+
return librosa.feature.spectral_contrast(S=spectrogramMagnitude, **keywordArguments)
|
|
14
16
|
|
|
15
17
|
@registrationAudioAspect('Spectral Bandwidth')
|
|
16
|
-
def analyzeSpectralBandwidth(spectrogramMagnitude: numpy.ndarray, **keywordArguments: Any) -> numpy.ndarray:
|
|
17
|
-
|
|
18
|
-
|
|
18
|
+
def analyzeSpectralBandwidth(spectrogramMagnitude: numpy.ndarray[Any, dtype[floating[Any]]], **keywordArguments: Any) -> numpy.ndarray:
|
|
19
|
+
centroid = audioAspects['Spectral Centroid']['analyzer'](spectrogramMagnitude)
|
|
20
|
+
return librosa.feature.spectral_bandwidth(S=spectrogramMagnitude, centroid=centroid, **keywordArguments)
|
|
19
21
|
|
|
20
22
|
@cachetools.cached(cache=cacheAudioAnalyzers)
|
|
21
23
|
@registrationAudioAspect('Spectral Centroid')
|
|
22
|
-
def analyzeSpectralCentroid(spectrogramMagnitude: numpy.ndarray, **keywordArguments: Any) -> numpy.ndarray:
|
|
23
|
-
|
|
24
|
+
def analyzeSpectralCentroid(spectrogramMagnitude: numpy.ndarray[Any, dtype[floating[Any]]], **keywordArguments: Any) -> numpy.ndarray:
|
|
25
|
+
return librosa.feature.spectral_centroid(S=spectrogramMagnitude, **keywordArguments)
|
|
24
26
|
|
|
25
27
|
@registrationAudioAspect('Spectral Flatness')
|
|
26
|
-
def analyzeSpectralFlatness(spectrogramMagnitude: numpy.ndarray, **keywordArguments: Any) -> numpy.ndarray:
|
|
27
|
-
|
|
28
|
-
|
|
28
|
+
def analyzeSpectralFlatness(spectrogramMagnitude: numpy.ndarray[Any, dtype[floating[Any]]], **keywordArguments: Any) -> numpy.ndarray:
|
|
29
|
+
spectralFlatness = librosa.feature.spectral_flatness(S=spectrogramMagnitude, **keywordArguments)
|
|
30
|
+
return 20 * numpy.log10(spectralFlatness, where=(spectralFlatness != 0)) # dB
|
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
from analyzeAudio import registrationAudioAspect
|
|
2
2
|
from torchmetrics.functional.audio.srmr import speech_reverberation_modulation_energy_ratio
|
|
3
|
-
from typing import Any
|
|
3
|
+
from typing import Any
|
|
4
4
|
import numpy
|
|
5
5
|
import torch
|
|
6
6
|
|
|
7
7
|
@registrationAudioAspect('SRMR')
|
|
8
|
-
def analyzeSRMR(tensorAudio: torch.Tensor, sampleRate: int, pytorchOnCPU:
|
|
9
|
-
|
|
10
|
-
|
|
8
|
+
def analyzeSRMR(tensorAudio: torch.Tensor, sampleRate: int, pytorchOnCPU: bool | None, **keywordArguments: Any) -> numpy.ndarray:
|
|
9
|
+
keywordArguments['fast'] = keywordArguments.get('fast') or pytorchOnCPU or None
|
|
10
|
+
return torch.Tensor.numpy(speech_reverberation_modulation_energy_ratio(tensorAudio, sampleRate, **keywordArguments))
|
|
@@ -2,24 +2,25 @@ from analyzeAudio import registrationAudioAspect, audioAspects, cacheAudioAnalyz
|
|
|
2
2
|
from typing import Any
|
|
3
3
|
import librosa
|
|
4
4
|
import numpy
|
|
5
|
+
from optype.numpy import ToArray2D, AnyFloatingDType
|
|
5
6
|
import cachetools
|
|
6
7
|
|
|
7
8
|
@cachetools.cached(cache=cacheAudioAnalyzers)
|
|
8
9
|
@registrationAudioAspect('Tempogram')
|
|
9
|
-
def analyzeTempogram(waveform:
|
|
10
|
-
|
|
10
|
+
def analyzeTempogram(waveform: ToArray2D[AnyFloatingDType], sampleRate: int, **keywordArguments: Any) -> numpy.ndarray:
|
|
11
|
+
return librosa.feature.tempogram(y=waveform, sr=sampleRate, **keywordArguments)
|
|
11
12
|
|
|
12
13
|
# "RMS value from audio samples is faster ... However, ... spectrogram ... more accurate ... because ... windowed"
|
|
13
14
|
@registrationAudioAspect('RMS from waveform')
|
|
14
|
-
def analyzeRMS(waveform:
|
|
15
|
-
|
|
16
|
-
|
|
15
|
+
def analyzeRMS(waveform: ToArray2D[AnyFloatingDType], **keywordArguments: Any) -> numpy.ndarray:
|
|
16
|
+
arrayRMS = librosa.feature.rms(y=waveform, **keywordArguments)
|
|
17
|
+
return 20 * numpy.log10(arrayRMS, where=(arrayRMS != 0)) # dB
|
|
17
18
|
|
|
18
19
|
@registrationAudioAspect('Tempo')
|
|
19
|
-
def analyzeTempo(waveform:
|
|
20
|
-
|
|
21
|
-
|
|
20
|
+
def analyzeTempo(waveform: ToArray2D[AnyFloatingDType], sampleRate: int, **keywordArguments: Any) -> numpy.ndarray:
|
|
21
|
+
tempogram = audioAspects['Tempogram']['analyzer'](waveform, sampleRate)
|
|
22
|
+
return librosa.feature.tempo(y=waveform, sr=sampleRate, tg=tempogram, **keywordArguments)
|
|
22
23
|
|
|
23
24
|
@registrationAudioAspect('Zero-crossing rate') # This is distinct from 'Zero-crossings rate'
|
|
24
|
-
def analyzeZeroCrossingRate(waveform:
|
|
25
|
-
|
|
25
|
+
def analyzeZeroCrossingRate(waveform: ToArray2D[AnyFloatingDType], **keywordArguments: Any) -> numpy.ndarray:
|
|
26
|
+
return librosa.feature.zero_crossing_rate(y=waveform, **keywordArguments)
|