nkululeko 0.94.3__py3-none-any.whl → 0.95.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nkululeko/augmenting/resampler.py +5 -2
- nkululeko/autopredict/ap_emotion.py +36 -0
- nkululeko/autopredict/ap_text.py +45 -0
- nkululeko/autopredict/tests/__init__.py +0 -0
- nkululeko/autopredict/tests/test_whisper_transcriber.py +122 -0
- nkululeko/autopredict/whisper_transcriber.py +81 -0
- nkululeko/balance.py +222 -0
- nkululeko/constants.py +1 -1
- nkululeko/experiment.py +53 -3
- nkululeko/explore.py +32 -13
- nkululeko/feat_extract/feats_analyser.py +45 -17
- nkululeko/feat_extract/feats_emotion2vec.py +51 -26
- nkululeko/feat_extract/feats_praat.py +3 -3
- nkululeko/feat_extract/feats_praat_core.py +769 -0
- nkululeko/feat_extract/tests/__init__.py +1 -0
- nkululeko/feat_extract/tests/test_feats_opensmile.py +162 -0
- nkululeko/feat_extract/tests/test_feats_praat_core.py +507 -0
- nkululeko/glob_conf.py +9 -0
- nkululeko/modelrunner.py +15 -39
- nkululeko/models/model.py +4 -42
- nkululeko/models/model_tuned.py +416 -84
- nkululeko/models/model_xgb.py +148 -2
- nkululeko/models/tests/test_model_knn.py +49 -0
- nkululeko/models/tests/test_model_mlp.py +153 -0
- nkululeko/models/tests/test_model_xgb.py +33 -0
- nkululeko/nkululeko.py +0 -9
- nkululeko/plots.py +25 -19
- nkululeko/predict.py +8 -6
- nkululeko/reporting/report.py +7 -5
- nkululeko/reporting/reporter.py +20 -5
- nkululeko/test_predictor.py +7 -1
- nkululeko/tests/__init__.py +1 -0
- nkululeko/tests/test_balancing.py +270 -0
- nkululeko/utils/util.py +38 -6
- {nkululeko-0.94.3.dist-info → nkululeko-0.95.1.dist-info}/METADATA +1 -1
- {nkululeko-0.94.3.dist-info → nkululeko-0.95.1.dist-info}/RECORD +40 -27
- nkululeko/feat_extract/feats_opensmile copy.py +0 -93
- nkululeko/feat_extract/feinberg_praat.py +0 -628
- {nkululeko-0.94.3.dist-info → nkululeko-0.95.1.dist-info}/WHEEL +0 -0
- {nkululeko-0.94.3.dist-info → nkululeko-0.95.1.dist-info}/entry_points.txt +0 -0
- {nkululeko-0.94.3.dist-info → nkululeko-0.95.1.dist-info}/licenses/LICENSE +0 -0
- {nkululeko-0.94.3.dist-info → nkululeko-0.95.1.dist-info}/top_level.txt +0 -0
@@ -1,628 +0,0 @@
|
|
1
|
-
"""This is a copy of David R. Feinberg's Praat scripts.
|
2
|
-
https://github.com/drfeinberg/PraatScripts
|
3
|
-
taken June 23rd 2022.
|
4
|
-
"""
|
5
|
-
|
6
|
-
#!/usr/bin/env python3
|
7
|
-
import math
|
8
|
-
import statistics
|
9
|
-
|
10
|
-
import audiofile
|
11
|
-
import numpy as np
|
12
|
-
import pandas as pd
|
13
|
-
import parselmouth
|
14
|
-
from parselmouth.praat import call
|
15
|
-
from scipy.stats.mstats import zscore
|
16
|
-
from sklearn.decomposition import PCA
|
17
|
-
from tqdm import tqdm
|
18
|
-
|
19
|
-
# This is the function to measure source acoustics using default male parameters.
|
20
|
-
|
21
|
-
|
22
|
-
def measure_pitch(voice_id, f0min, f0max, unit):
|
23
|
-
sound = parselmouth.Sound(voice_id) # read the sound
|
24
|
-
duration = call(sound, "Get total duration") # duration
|
25
|
-
pitch = call(sound, "To Pitch", 0.0, f0min, f0max) # create a praat pitch object
|
26
|
-
mean_f0 = call(pitch, "Get mean", 0, 0, unit) # get mean pitch
|
27
|
-
stdev_f0 = call(
|
28
|
-
pitch, "Get standard deviation", 0, 0, unit
|
29
|
-
) # get standard deviation
|
30
|
-
harmonicity = call(sound, "To Harmonicity (cc)", 0.01, f0min, 0.1, 1.0)
|
31
|
-
hnr = call(harmonicity, "Get mean", 0, 0)
|
32
|
-
point_process = call(sound, "To PointProcess (periodic, cc)", f0min, f0max)
|
33
|
-
local_jitter = call(point_process, "Get jitter (local)", 0, 0, 0.0001, 0.02, 1.3)
|
34
|
-
localabsolute_jitter = call(
|
35
|
-
point_process, "Get jitter (local, absolute)", 0, 0, 0.0001, 0.02, 1.3
|
36
|
-
)
|
37
|
-
rap_jitter = call(point_process, "Get jitter (rap)", 0, 0, 0.0001, 0.02, 1.3)
|
38
|
-
ppq5_jitter = call(point_process, "Get jitter (ppq5)", 0, 0, 0.0001, 0.02, 1.3)
|
39
|
-
ddp_jitter = call(point_process, "Get jitter (ddp)", 0, 0, 0.0001, 0.02, 1.3)
|
40
|
-
local_shimmer = call(
|
41
|
-
[sound, point_process],
|
42
|
-
"Get shimmer (local)",
|
43
|
-
0,
|
44
|
-
0,
|
45
|
-
0.0001,
|
46
|
-
0.02,
|
47
|
-
1.3,
|
48
|
-
1.6,
|
49
|
-
)
|
50
|
-
localdb_shimmer = call(
|
51
|
-
[sound, point_process],
|
52
|
-
"Get shimmer (local_dB)",
|
53
|
-
0,
|
54
|
-
0,
|
55
|
-
0.0001,
|
56
|
-
0.02,
|
57
|
-
1.3,
|
58
|
-
1.6,
|
59
|
-
)
|
60
|
-
apq3_shimmer = call(
|
61
|
-
[sound, point_process],
|
62
|
-
"Get shimmer (apq3)",
|
63
|
-
0,
|
64
|
-
0,
|
65
|
-
0.0001,
|
66
|
-
0.02,
|
67
|
-
1.3,
|
68
|
-
1.6,
|
69
|
-
)
|
70
|
-
aqpq5_shimmer = call(
|
71
|
-
[sound, point_process],
|
72
|
-
"Get shimmer (apq5)",
|
73
|
-
0,
|
74
|
-
0,
|
75
|
-
0.0001,
|
76
|
-
0.02,
|
77
|
-
1.3,
|
78
|
-
1.6,
|
79
|
-
)
|
80
|
-
apq11_shimmer = call(
|
81
|
-
[sound, point_process],
|
82
|
-
"Get shimmer (apq11)",
|
83
|
-
0,
|
84
|
-
0,
|
85
|
-
0.0001,
|
86
|
-
0.02,
|
87
|
-
1.3,
|
88
|
-
1.6,
|
89
|
-
)
|
90
|
-
dda_shimmer = call(
|
91
|
-
[sound, point_process], "Get shimmer (dda)", 0, 0, 0.0001, 0.02, 1.3, 1.6
|
92
|
-
)
|
93
|
-
|
94
|
-
return (
|
95
|
-
duration,
|
96
|
-
mean_f0,
|
97
|
-
stdev_f0,
|
98
|
-
hnr,
|
99
|
-
local_jitter,
|
100
|
-
localabsolute_jitter,
|
101
|
-
rap_jitter,
|
102
|
-
ppq5_jitter,
|
103
|
-
ddp_jitter,
|
104
|
-
local_shimmer,
|
105
|
-
localdb_shimmer,
|
106
|
-
apq3_shimmer,
|
107
|
-
aqpq5_shimmer,
|
108
|
-
apq11_shimmer,
|
109
|
-
dda_shimmer,
|
110
|
-
)
|
111
|
-
|
112
|
-
|
113
|
-
# ## This function measures formants at each glottal pulse
|
114
|
-
#
|
115
|
-
# Puts, D. A., Apicella, C. L., & Cárdenas, R. A. (2012). Masculine voices signal men's threat potential in forager and industrial societies. Proceedings of the Royal Society of London B: Biological Sciences, 279(1728), 601-609.
|
116
|
-
#
|
117
|
-
# Adapted from: DOI 10.17605/OSF.IO/K2BHS
|
118
|
-
# This function measures formants using Formant Position formula
|
119
|
-
# def measureFormants(sound, wave_file, f0min,f0max):
|
120
|
-
def measure_formants(sound, f0min, f0max):
|
121
|
-
sound = parselmouth.Sound(sound) # read the sound
|
122
|
-
# pitch = call(sound, "To Pitch (cc)", 0, f0min, 15, 'no', 0.03, 0.45, 0.01, 0.35, 0.14, f0max)
|
123
|
-
point_process = call(sound, "To PointProcess (periodic, cc)", f0min, f0max)
|
124
|
-
|
125
|
-
formants = call(sound, "To Formant (burg)", 0.0025, 5, 5000, 0.025, 50)
|
126
|
-
num_points = call(point_process, "Get number of points")
|
127
|
-
|
128
|
-
f1_list = []
|
129
|
-
f2_list = []
|
130
|
-
f3_list = []
|
131
|
-
f4_list = []
|
132
|
-
|
133
|
-
# Measure formants only at glottal pulses
|
134
|
-
for point in range(0, num_points):
|
135
|
-
point += 1
|
136
|
-
t = call(point_process, "Get time from index", point)
|
137
|
-
f1 = call(formants, "Get value at time", 1, t, "Hertz", "Linear")
|
138
|
-
f2 = call(formants, "Get value at time", 2, t, "Hertz", "Linear")
|
139
|
-
f3 = call(formants, "Get value at time", 3, t, "Hertz", "Linear")
|
140
|
-
f4 = call(formants, "Get value at time", 4, t, "Hertz", "Linear")
|
141
|
-
f1_list.append(f1)
|
142
|
-
f2_list.append(f2)
|
143
|
-
f3_list.append(f3)
|
144
|
-
f4_list.append(f4)
|
145
|
-
|
146
|
-
f1_list = [f1 for f1 in f1_list if str(f1) != "nan"]
|
147
|
-
f2_list = [f2 for f2 in f2_list if str(f2) != "nan"]
|
148
|
-
f3_list = [f3 for f3 in f3_list if str(f3) != "nan"]
|
149
|
-
f4_list = [f4 for f4 in f4_list if str(f4) != "nan"]
|
150
|
-
|
151
|
-
# calculate mean formants across pulses
|
152
|
-
f1_mean = statistics.mean(f1_list)
|
153
|
-
f2_mean = statistics.mean(f2_list)
|
154
|
-
f3_mean = statistics.mean(f3_list)
|
155
|
-
f4_mean = statistics.mean(f4_list)
|
156
|
-
|
157
|
-
# calculate median formants across pulses, this is what is used in all subsequent calcualtions
|
158
|
-
# you can use mean if you want, just edit the code in the boxes below to replace median with mean
|
159
|
-
f1_median = statistics.median(f1_list)
|
160
|
-
f2_median = statistics.median(f2_list)
|
161
|
-
f3_median = statistics.median(f3_list)
|
162
|
-
f4_median = statistics.median(f4_list)
|
163
|
-
|
164
|
-
return (
|
165
|
-
f1_mean,
|
166
|
-
f2_mean,
|
167
|
-
f3_mean,
|
168
|
-
f4_mean,
|
169
|
-
f1_median,
|
170
|
-
f2_median,
|
171
|
-
f3_median,
|
172
|
-
f4_median,
|
173
|
-
)
|
174
|
-
|
175
|
-
|
176
|
-
# ## This function runs a 2-factor Principle Components Analysis (PCA) on Jitter and Shimmer
|
177
|
-
|
178
|
-
|
179
|
-
def run_pca(df):
|
180
|
-
# z-score the Jitter and Shimmer measurements
|
181
|
-
measures = [
|
182
|
-
"localJitter",
|
183
|
-
"localabsoluteJitter",
|
184
|
-
"rapJitter",
|
185
|
-
"ppq5Jitter",
|
186
|
-
"ddpJitter",
|
187
|
-
"localShimmer",
|
188
|
-
"localdbShimmer",
|
189
|
-
"apq3Shimmer",
|
190
|
-
"apq5Shimmer",
|
191
|
-
"apq11Shimmer",
|
192
|
-
"ddaShimmer",
|
193
|
-
]
|
194
|
-
x = df.loc[:, measures].values
|
195
|
-
# f = open('x.pickle', 'wb')
|
196
|
-
# pickle.dump(x, f)
|
197
|
-
# f.close()
|
198
|
-
|
199
|
-
# x = StandardScaler().fit_transform(x)
|
200
|
-
if np.any(np.isnan(x[0])):
|
201
|
-
print(
|
202
|
-
f"Warning: {np.count_nonzero(np.isnan(x))} Nans in x, replacing" " with 0"
|
203
|
-
)
|
204
|
-
x[np.isnan(x)] = 0
|
205
|
-
# if np.any(np.isfinite(x[0])):
|
206
|
-
# print(f"Warning: {np.count_nonzero(np.isfinite(x))} finite in x")
|
207
|
-
|
208
|
-
# PCA
|
209
|
-
pca = PCA(n_components=2)
|
210
|
-
try:
|
211
|
-
principal_components = pca.fit_transform(x)
|
212
|
-
if np.any(np.isnan(principal_components)):
|
213
|
-
print("pc is nan")
|
214
|
-
print(f"count: {np.count_nonzero(np.isnan(principal_components))}")
|
215
|
-
print(principal_components)
|
216
|
-
principal_components = np.nan_to_num(principal_components)
|
217
|
-
except ValueError:
|
218
|
-
print("need more than one file for pca")
|
219
|
-
principal_components = [[0, 0]]
|
220
|
-
principal_df = pd.DataFrame(
|
221
|
-
data=principal_components, columns=["JitterPCA", "ShimmerPCA"]
|
222
|
-
)
|
223
|
-
return principal_df
|
224
|
-
|
225
|
-
|
226
|
-
# ## This block of code runs the above functions on all of the '.wav' files in the /audio folder
|
227
|
-
|
228
|
-
|
229
|
-
def compute_features(file_index):
|
230
|
-
# create lists to put the results
|
231
|
-
duration_list = []
|
232
|
-
mean_f0_list = []
|
233
|
-
sd_f0_list = []
|
234
|
-
hnr_list = []
|
235
|
-
local_jitter_list = []
|
236
|
-
localabsolute_jitter_list = []
|
237
|
-
rap_jitter_list = []
|
238
|
-
ppq5_jitter_list = []
|
239
|
-
ddp_jitter_list = []
|
240
|
-
local_shimmer_list = []
|
241
|
-
localdb_shimmer_list = []
|
242
|
-
apq3_shimmer_list = []
|
243
|
-
aqpq5_shimmer_list = []
|
244
|
-
apq11_shimmer_list = []
|
245
|
-
dda_shimmer_list = []
|
246
|
-
f1_mean_list = []
|
247
|
-
f2_mean_list = []
|
248
|
-
f3_mean_list = []
|
249
|
-
f4_mean_list = []
|
250
|
-
f1_median_list = []
|
251
|
-
f2_median_list = []
|
252
|
-
f3_median_list = []
|
253
|
-
f4_median_list = []
|
254
|
-
# Go through all the wave files in the folder and measure all the acoustics
|
255
|
-
# for i, wave_file in enumerate(file_list):
|
256
|
-
for idx, (wave_file, start, end) in enumerate(tqdm(file_index.to_list())):
|
257
|
-
signal, sampling_rate = audiofile.read(
|
258
|
-
wave_file,
|
259
|
-
offset=start.total_seconds(),
|
260
|
-
duration=(end - start).total_seconds(),
|
261
|
-
always_2d=True,
|
262
|
-
)
|
263
|
-
try:
|
264
|
-
sound = parselmouth.Sound(values=signal, sampling_frequency=sampling_rate)
|
265
|
-
(
|
266
|
-
duration,
|
267
|
-
mean_f0,
|
268
|
-
stdev_f0,
|
269
|
-
hnr,
|
270
|
-
local_jitter,
|
271
|
-
localabsolute_jitter,
|
272
|
-
rap_jitter,
|
273
|
-
ppq5_jitter,
|
274
|
-
ddp_jitter,
|
275
|
-
local_shimmer,
|
276
|
-
localdb_shimmer,
|
277
|
-
apq3_shimmer,
|
278
|
-
aqpq5_shimmer,
|
279
|
-
apq11_shimmer,
|
280
|
-
dda_shimmer,
|
281
|
-
) = measure_pitch(sound, 75, 300, "Hertz")
|
282
|
-
(
|
283
|
-
f1_mean,
|
284
|
-
f2_mean,
|
285
|
-
f3_mean,
|
286
|
-
f4_mean,
|
287
|
-
f1_median,
|
288
|
-
f2_median,
|
289
|
-
f3_median,
|
290
|
-
f4_median,
|
291
|
-
) = measure_formants(sound, 75, 300)
|
292
|
-
# file_list.append(wave_file) # make an ID list
|
293
|
-
except (statistics.StatisticsError, parselmouth.PraatError) as errors:
|
294
|
-
print(f"error on file {wave_file}: {errors}")
|
295
|
-
|
296
|
-
duration_list.append(duration) # make duration list
|
297
|
-
mean_f0_list.append(mean_f0) # make a mean F0 list
|
298
|
-
sd_f0_list.append(stdev_f0) # make a sd F0 list
|
299
|
-
hnr_list.append(hnr) # add HNR data
|
300
|
-
|
301
|
-
# add raw jitter and shimmer measures
|
302
|
-
local_jitter_list.append(local_jitter)
|
303
|
-
localabsolute_jitter_list.append(localabsolute_jitter)
|
304
|
-
rap_jitter_list.append(rap_jitter)
|
305
|
-
ppq5_jitter_list.append(ppq5_jitter)
|
306
|
-
ddp_jitter_list.append(ddp_jitter)
|
307
|
-
local_shimmer_list.append(local_shimmer)
|
308
|
-
localdb_shimmer_list.append(localdb_shimmer)
|
309
|
-
apq3_shimmer_list.append(apq3_shimmer)
|
310
|
-
aqpq5_shimmer_list.append(aqpq5_shimmer)
|
311
|
-
apq11_shimmer_list.append(apq11_shimmer)
|
312
|
-
dda_shimmer_list.append(dda_shimmer)
|
313
|
-
|
314
|
-
# add the formant data
|
315
|
-
f1_mean_list.append(f1_mean)
|
316
|
-
f2_mean_list.append(f2_mean)
|
317
|
-
f3_mean_list.append(f3_mean)
|
318
|
-
f4_mean_list.append(f4_mean)
|
319
|
-
f1_median_list.append(f1_median)
|
320
|
-
f2_median_list.append(f2_median)
|
321
|
-
f3_median_list.append(f3_median)
|
322
|
-
f4_median_list.append(f4_median)
|
323
|
-
# ## This block of code adds all of that data we just generated to a Pandas data frame
|
324
|
-
# Add the data to Pandas
|
325
|
-
df = pd.DataFrame(
|
326
|
-
np.column_stack(
|
327
|
-
[
|
328
|
-
duration_list,
|
329
|
-
mean_f0_list,
|
330
|
-
sd_f0_list,
|
331
|
-
hnr_list,
|
332
|
-
local_jitter_list,
|
333
|
-
localabsolute_jitter_list,
|
334
|
-
rap_jitter_list,
|
335
|
-
ppq5_jitter_list,
|
336
|
-
ddp_jitter_list,
|
337
|
-
local_shimmer_list,
|
338
|
-
localdb_shimmer_list,
|
339
|
-
apq3_shimmer_list,
|
340
|
-
aqpq5_shimmer_list,
|
341
|
-
apq11_shimmer_list,
|
342
|
-
dda_shimmer_list,
|
343
|
-
f1_mean_list,
|
344
|
-
f2_mean_list,
|
345
|
-
f3_mean_list,
|
346
|
-
f4_mean_list,
|
347
|
-
f1_median_list,
|
348
|
-
f2_median_list,
|
349
|
-
f3_median_list,
|
350
|
-
f4_median_list,
|
351
|
-
]
|
352
|
-
),
|
353
|
-
columns=[
|
354
|
-
"duration",
|
355
|
-
"meanF0Hz",
|
356
|
-
"stdevF0Hz",
|
357
|
-
"HNR",
|
358
|
-
"localJitter",
|
359
|
-
"localabsoluteJitter",
|
360
|
-
"rapJitter",
|
361
|
-
"ppq5Jitter",
|
362
|
-
"ddpJitter",
|
363
|
-
"localShimmer",
|
364
|
-
"localdbShimmer",
|
365
|
-
"apq3Shimmer",
|
366
|
-
"apq5Shimmer",
|
367
|
-
"apq11Shimmer",
|
368
|
-
"ddaShimmer",
|
369
|
-
"f1_mean",
|
370
|
-
"f2_mean",
|
371
|
-
"f3_mean",
|
372
|
-
"f4_mean",
|
373
|
-
"f1_median",
|
374
|
-
"f2_median",
|
375
|
-
"f3_median",
|
376
|
-
"f4_median",
|
377
|
-
],
|
378
|
-
)
|
379
|
-
|
380
|
-
# add pca data
|
381
|
-
pca_data = run_pca(df) # Run jitter and shimmer PCA
|
382
|
-
df = pd.concat([df, pca_data], axis=1) # Add PCA data
|
383
|
-
# reload the data so it's all numbers
|
384
|
-
# df.to_csv("processed_results.csv", index=False)
|
385
|
-
# df = pd.read_csv("processed_results.csv", header=0)
|
386
|
-
# df.sort_values('voiceID').head(20)
|
387
|
-
# ## Next we calculate the vocal-tract length estimates
|
388
|
-
|
389
|
-
# ### Formant position
|
390
|
-
# Puts, D. A., Apicella, C. L., & Cárdenas, R. A. (2012). Masculine voices signal men's threat potential in forager and industrial societies. Proceedings of the Royal Society of London B: Biological Sciences, 279(1728), 601-609.
|
391
|
-
|
392
|
-
df["pF"] = (
|
393
|
-
zscore(df.f1_median)
|
394
|
-
+ zscore(df.f2_median)
|
395
|
-
+ zscore(df.f3_median)
|
396
|
-
+ zscore(df.f4_median)
|
397
|
-
) / 4
|
398
|
-
|
399
|
-
# ### Formant Dispersion
|
400
|
-
# Fitch, W. T. (1997). Vocal tract length and formant frequency dispersion correlate with body size in rhesus macaques. The Journal of the Acoustical Society of America, 102(2), 1213-1222.
|
401
|
-
|
402
|
-
df["fdisp"] = (df["f4_median"] - df["f1_median"]) / 3
|
403
|
-
|
404
|
-
# ### Fn (Average Formant)
|
405
|
-
# Pisanski, K., & Rendall, D. (2011). The prioritization of voice fundamental frequency or formants in listeners’ assessments of speaker size, masculinity, and attractiveness. The Journal of the Acoustical Society of America, 129(4), 2201-2212.
|
406
|
-
|
407
|
-
df["avgFormant"] = (
|
408
|
-
df["f1_median"] + df["f2_median"] + df["f3_median"] + df["f4_median"]
|
409
|
-
) / 4
|
410
|
-
|
411
|
-
# ### MFF
|
412
|
-
# Smith, D. R., & Patterson, R. D. (2005). The interaction of glottal-pulse rate and vocal-tract length in judgements of speaker size, sex, and age. The Journal of the Acoustical Society of America, 118(5), 3177-3186.
|
413
|
-
|
414
|
-
df["mff"] = (
|
415
|
-
df["f1_median"] * df["f2_median"] * df["f3_median"] * df["f4_median"]
|
416
|
-
) ** 0.25
|
417
|
-
|
418
|
-
# ### Fitch VTL
|
419
|
-
# Fitch, W. T. (1997). Vocal tract length and formant frequency dispersion correlate with body size in rhesus macaques. The Journal of the Acoustical Society of America, 102(2), 1213-1222.
|
420
|
-
|
421
|
-
# reload the data again
|
422
|
-
# df.to_csv("processed_results.csv", index=False)
|
423
|
-
# df = pd.read_csv('processed_results.csv', header=0)
|
424
|
-
|
425
|
-
df["fitch_vtl"] = (
|
426
|
-
(1 * (35000 / (4 * df["f1_median"])))
|
427
|
-
+ (3 * (35000 / (4 * df["f2_median"])))
|
428
|
-
+ (5 * (35000 / (4 * df["f3_median"])))
|
429
|
-
+ (7 * (35000 / (4 * df["f4_median"])))
|
430
|
-
) / 4
|
431
|
-
|
432
|
-
# ### $\Delta$F
|
433
|
-
# Reby,D.,& McComb,K.(2003). Anatomical constraints generate honesty: acoustic cues to age and weight in the roars of red deer stags. Animal Behaviour, 65, 519e-530.
|
434
|
-
|
435
|
-
xysum = (
|
436
|
-
(0.5 * df["f1_median"])
|
437
|
-
+ (1.5 * df["f2_median"])
|
438
|
-
+ (2.5 * df["f3_median"])
|
439
|
-
+ (3.5 * df["f4_median"])
|
440
|
-
)
|
441
|
-
xsquaredsum = (0.5**2) + (1.5**2) + (2.5**2) + (3.5**2)
|
442
|
-
df["delta_f"] = xysum / xsquaredsum
|
443
|
-
|
444
|
-
# ### VTL($\Delta$F)
|
445
|
-
# Reby,D.,&McComb,K.(2003).Anatomical constraints generate honesty: acoustic cues to age and weight in the roars of red deer stags. Animal Behaviour, 65, 519e-530.
|
446
|
-
|
447
|
-
df["vtl_delta_f"] = 35000 / (2 * df["delta_f"])
|
448
|
-
|
449
|
-
print("Now extracting speech rate parameters...")
|
450
|
-
|
451
|
-
df_speechrate = get_speech_rate(file_index)
|
452
|
-
print("")
|
453
|
-
|
454
|
-
return df.join(df_speechrate)
|
455
|
-
|
456
|
-
|
457
|
-
"""
|
458
|
-
Speech rate script taken from https://github.com/drfeinberg/PraatScripts
|
459
|
-
on 25/05/23
|
460
|
-
"""
|
461
|
-
|
462
|
-
|
463
|
-
def get_speech_rate(file_index):
|
464
|
-
cols = [
|
465
|
-
"nsyll",
|
466
|
-
"npause",
|
467
|
-
"dur_s",
|
468
|
-
"phonationtime_s",
|
469
|
-
"speechrate_nsyll_dur",
|
470
|
-
"articulation_rate_nsyll_phonationtime",
|
471
|
-
"ASD_speakingtime_nsyll",
|
472
|
-
]
|
473
|
-
datalist = []
|
474
|
-
for idx, (wave_file, start, end) in enumerate(tqdm(file_index.to_list())):
|
475
|
-
signal, sampling_rate = audiofile.read(
|
476
|
-
wave_file,
|
477
|
-
offset=start.total_seconds(),
|
478
|
-
duration=(end - start).total_seconds(),
|
479
|
-
always_2d=True,
|
480
|
-
)
|
481
|
-
try:
|
482
|
-
sound = parselmouth.Sound(values=signal, sampling_frequency=sampling_rate)
|
483
|
-
# print(f'processing {file}')
|
484
|
-
speechrate_dictionary = speech_rate(sound)
|
485
|
-
datalist.append(speechrate_dictionary)
|
486
|
-
except IndexError as ie:
|
487
|
-
print(f"error extracting speech-rate on file {wave_file}: {ie}")
|
488
|
-
except parselmouth.PraatError as pe:
|
489
|
-
print(f"error extracting speech-rate on file {wave_file}: {pe}")
|
490
|
-
df = pd.DataFrame(datalist)
|
491
|
-
return df
|
492
|
-
|
493
|
-
|
494
|
-
def speech_rate(sound):
|
495
|
-
silencedb = -25
|
496
|
-
mindip = 2
|
497
|
-
minpause = 0.3
|
498
|
-
originaldur = sound.get_total_duration()
|
499
|
-
intensity = sound.to_intensity(50)
|
500
|
-
start = call(intensity, "Get time from frame number", 1)
|
501
|
-
nframes = call(intensity, "Get number of frames")
|
502
|
-
end = call(intensity, "Get time from frame number", nframes)
|
503
|
-
min_intensity = call(intensity, "Get minimum", 0, 0, "Parabolic")
|
504
|
-
max_intensity = call(intensity, "Get maximum", 0, 0, "Parabolic")
|
505
|
-
|
506
|
-
# get .99 quantile to get maximum (without influence of non-speech sound bursts)
|
507
|
-
max_99_intensity = call(intensity, "Get quantile", 0, 0, 0.99)
|
508
|
-
|
509
|
-
# estimate Intensity threshold
|
510
|
-
threshold = max_99_intensity + silencedb
|
511
|
-
threshold2 = max_intensity - max_99_intensity
|
512
|
-
threshold3 = silencedb - threshold2
|
513
|
-
if threshold < min_intensity:
|
514
|
-
threshold = min_intensity
|
515
|
-
|
516
|
-
# get pauses (silences) and speakingtime
|
517
|
-
textgrid = call(
|
518
|
-
intensity,
|
519
|
-
"To TextGrid (silences)",
|
520
|
-
threshold3,
|
521
|
-
minpause,
|
522
|
-
0.1,
|
523
|
-
"silent",
|
524
|
-
"sounding",
|
525
|
-
)
|
526
|
-
silencetier = call(textgrid, "Extract tier", 1)
|
527
|
-
silencetable = call(silencetier, "Down to TableOfReal", "sounding")
|
528
|
-
npauses = call(silencetable, "Get number of rows")
|
529
|
-
speakingtot = 0
|
530
|
-
for ipause in range(npauses):
|
531
|
-
pause = ipause + 1
|
532
|
-
beginsound = call(silencetable, "Get value", pause, 1)
|
533
|
-
endsound = call(silencetable, "Get value", pause, 2)
|
534
|
-
speakingdur = endsound - beginsound
|
535
|
-
speakingtot += speakingdur
|
536
|
-
|
537
|
-
intensity_matrix = call(intensity, "Down to Matrix")
|
538
|
-
# sndintid = sound_from_intensity_matrix
|
539
|
-
sound_from_intensity_matrix = call(intensity_matrix, "To Sound (slice)", 1)
|
540
|
-
# use total duration, not end time, to find out duration of intdur (intensity_duration)
|
541
|
-
# in order to allow nonzero starting times.
|
542
|
-
intensity_duration = call(sound_from_intensity_matrix, "Get total duration")
|
543
|
-
intensity_max = call(sound_from_intensity_matrix, "Get maximum", 0, 0, "Parabolic")
|
544
|
-
point_process = call(
|
545
|
-
sound_from_intensity_matrix,
|
546
|
-
"To PointProcess (extrema)",
|
547
|
-
"Left",
|
548
|
-
"yes",
|
549
|
-
"no",
|
550
|
-
"Sinc70",
|
551
|
-
)
|
552
|
-
# estimate peak positions (all peaks)
|
553
|
-
numpeaks = call(point_process, "Get number of points")
|
554
|
-
t = [call(point_process, "Get time from index", i + 1) for i in range(numpeaks)]
|
555
|
-
|
556
|
-
# fill array with intensity values
|
557
|
-
timepeaks = []
|
558
|
-
peakcount = 0
|
559
|
-
intensities = []
|
560
|
-
for i in range(numpeaks):
|
561
|
-
value = call(sound_from_intensity_matrix, "Get value at time", t[i], "Cubic")
|
562
|
-
if value > threshold:
|
563
|
-
peakcount += 1
|
564
|
-
intensities.append(value)
|
565
|
-
timepeaks.append(t[i])
|
566
|
-
|
567
|
-
# fill array with valid peaks: only intensity values if preceding
|
568
|
-
# dip in intensity is greater than mindip
|
569
|
-
validpeakcount = 0
|
570
|
-
currenttime = timepeaks[0]
|
571
|
-
currentint = intensities[0]
|
572
|
-
validtime = []
|
573
|
-
|
574
|
-
for p in range(peakcount - 1):
|
575
|
-
following = p + 1
|
576
|
-
followingtime = timepeaks[p + 1]
|
577
|
-
dip = call(intensity, "Get minimum", currenttime, timepeaks[p + 1], "None")
|
578
|
-
diffint = abs(currentint - dip)
|
579
|
-
if diffint > mindip:
|
580
|
-
validpeakcount += 1
|
581
|
-
validtime.append(timepeaks[p])
|
582
|
-
currenttime = timepeaks[following]
|
583
|
-
currentint = call(intensity, "Get value at time", timepeaks[following], "Cubic")
|
584
|
-
|
585
|
-
# Look for only voiced parts
|
586
|
-
pitch = sound.to_pitch_ac(0.02, 30, 4, False, 0.03, 0.25, 0.01, 0.35, 0.25, 450)
|
587
|
-
voicedcount = 0
|
588
|
-
voicedpeak = []
|
589
|
-
|
590
|
-
for time in range(validpeakcount):
|
591
|
-
querytime = validtime[time]
|
592
|
-
whichinterval = call(textgrid, "Get interval at time", 1, querytime)
|
593
|
-
whichlabel = call(textgrid, "Get label of interval", 1, whichinterval)
|
594
|
-
value = pitch.get_value_at_time(querytime)
|
595
|
-
if not math.isnan(value):
|
596
|
-
if whichlabel == "sounding":
|
597
|
-
voicedcount += 1
|
598
|
-
voicedpeak.append(validtime[time])
|
599
|
-
|
600
|
-
# calculate time correction due to shift in time for Sound object versus
|
601
|
-
# intensity object
|
602
|
-
timecorrection = originaldur / intensity_duration
|
603
|
-
|
604
|
-
# Insert voiced peaks in TextGrid
|
605
|
-
call(textgrid, "Insert point tier", 1, "syllables")
|
606
|
-
for i in range(len(voicedpeak)):
|
607
|
-
position = voicedpeak[i] * timecorrection
|
608
|
-
call(textgrid, "Insert point", 1, position, "")
|
609
|
-
|
610
|
-
# return results
|
611
|
-
speakingrate = voicedcount / originaldur
|
612
|
-
articulationrate = voicedcount / speakingtot
|
613
|
-
npause = npauses - 1
|
614
|
-
try:
|
615
|
-
asd = speakingtot / voicedcount
|
616
|
-
except ZeroDivisionError:
|
617
|
-
asd = 0
|
618
|
-
print("caught zero division")
|
619
|
-
speechrate_dictionary = {
|
620
|
-
"nsyll": voicedcount,
|
621
|
-
"npause": npause,
|
622
|
-
"dur_s": originaldur,
|
623
|
-
"phonationtime_s": intensity_duration,
|
624
|
-
"speechrate_nsyll_dur": speakingrate,
|
625
|
-
"articulation_rate_nsyll_phonationtime": articulationrate,
|
626
|
-
"ASD_speakingtime_nsyll": asd,
|
627
|
-
}
|
628
|
-
return speechrate_dictionary
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|