nkululeko 0.94.3__py3-none-any.whl → 0.95.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. nkululeko/augmenting/resampler.py +5 -2
  2. nkululeko/autopredict/ap_emotion.py +36 -0
  3. nkululeko/autopredict/ap_text.py +45 -0
  4. nkululeko/autopredict/tests/__init__.py +0 -0
  5. nkululeko/autopredict/tests/test_whisper_transcriber.py +122 -0
  6. nkululeko/autopredict/whisper_transcriber.py +81 -0
  7. nkululeko/balance.py +222 -0
  8. nkululeko/constants.py +1 -1
  9. nkululeko/experiment.py +53 -3
  10. nkululeko/explore.py +32 -13
  11. nkululeko/feat_extract/feats_analyser.py +45 -17
  12. nkululeko/feat_extract/feats_emotion2vec.py +51 -26
  13. nkululeko/feat_extract/feats_praat.py +3 -3
  14. nkululeko/feat_extract/feats_praat_core.py +769 -0
  15. nkululeko/feat_extract/tests/__init__.py +1 -0
  16. nkululeko/feat_extract/tests/test_feats_opensmile.py +162 -0
  17. nkululeko/feat_extract/tests/test_feats_praat_core.py +507 -0
  18. nkululeko/glob_conf.py +9 -0
  19. nkululeko/modelrunner.py +15 -39
  20. nkululeko/models/model.py +4 -42
  21. nkululeko/models/model_tuned.py +416 -84
  22. nkululeko/models/model_xgb.py +148 -2
  23. nkululeko/models/tests/test_model_knn.py +49 -0
  24. nkululeko/models/tests/test_model_mlp.py +153 -0
  25. nkululeko/models/tests/test_model_xgb.py +33 -0
  26. nkululeko/nkululeko.py +0 -9
  27. nkululeko/plots.py +25 -19
  28. nkululeko/predict.py +8 -6
  29. nkululeko/reporting/report.py +7 -5
  30. nkululeko/reporting/reporter.py +20 -5
  31. nkululeko/test_predictor.py +7 -1
  32. nkululeko/tests/__init__.py +1 -0
  33. nkululeko/tests/test_balancing.py +270 -0
  34. nkululeko/utils/util.py +38 -6
  35. {nkululeko-0.94.3.dist-info → nkululeko-0.95.1.dist-info}/METADATA +1 -1
  36. {nkululeko-0.94.3.dist-info → nkululeko-0.95.1.dist-info}/RECORD +40 -27
  37. nkululeko/feat_extract/feats_opensmile copy.py +0 -93
  38. nkululeko/feat_extract/feinberg_praat.py +0 -628
  39. {nkululeko-0.94.3.dist-info → nkululeko-0.95.1.dist-info}/WHEEL +0 -0
  40. {nkululeko-0.94.3.dist-info → nkululeko-0.95.1.dist-info}/entry_points.txt +0 -0
  41. {nkululeko-0.94.3.dist-info → nkululeko-0.95.1.dist-info}/licenses/LICENSE +0 -0
  42. {nkululeko-0.94.3.dist-info → nkululeko-0.95.1.dist-info}/top_level.txt +0 -0
@@ -1,628 +0,0 @@
1
- """This is a copy of David R. Feinberg's Praat scripts.
2
- https://github.com/drfeinberg/PraatScripts
3
- taken June 23rd 2022.
4
- """
5
-
6
- #!/usr/bin/env python3
7
- import math
8
- import statistics
9
-
10
- import audiofile
11
- import numpy as np
12
- import pandas as pd
13
- import parselmouth
14
- from parselmouth.praat import call
15
- from scipy.stats.mstats import zscore
16
- from sklearn.decomposition import PCA
17
- from tqdm import tqdm
18
-
19
- # This is the function to measure source acoustics using default male parameters.
20
-
21
-
22
- def measure_pitch(voice_id, f0min, f0max, unit):
23
- sound = parselmouth.Sound(voice_id) # read the sound
24
- duration = call(sound, "Get total duration") # duration
25
- pitch = call(sound, "To Pitch", 0.0, f0min, f0max) # create a praat pitch object
26
- mean_f0 = call(pitch, "Get mean", 0, 0, unit) # get mean pitch
27
- stdev_f0 = call(
28
- pitch, "Get standard deviation", 0, 0, unit
29
- ) # get standard deviation
30
- harmonicity = call(sound, "To Harmonicity (cc)", 0.01, f0min, 0.1, 1.0)
31
- hnr = call(harmonicity, "Get mean", 0, 0)
32
- point_process = call(sound, "To PointProcess (periodic, cc)", f0min, f0max)
33
- local_jitter = call(point_process, "Get jitter (local)", 0, 0, 0.0001, 0.02, 1.3)
34
- localabsolute_jitter = call(
35
- point_process, "Get jitter (local, absolute)", 0, 0, 0.0001, 0.02, 1.3
36
- )
37
- rap_jitter = call(point_process, "Get jitter (rap)", 0, 0, 0.0001, 0.02, 1.3)
38
- ppq5_jitter = call(point_process, "Get jitter (ppq5)", 0, 0, 0.0001, 0.02, 1.3)
39
- ddp_jitter = call(point_process, "Get jitter (ddp)", 0, 0, 0.0001, 0.02, 1.3)
40
- local_shimmer = call(
41
- [sound, point_process],
42
- "Get shimmer (local)",
43
- 0,
44
- 0,
45
- 0.0001,
46
- 0.02,
47
- 1.3,
48
- 1.6,
49
- )
50
- localdb_shimmer = call(
51
- [sound, point_process],
52
- "Get shimmer (local_dB)",
53
- 0,
54
- 0,
55
- 0.0001,
56
- 0.02,
57
- 1.3,
58
- 1.6,
59
- )
60
- apq3_shimmer = call(
61
- [sound, point_process],
62
- "Get shimmer (apq3)",
63
- 0,
64
- 0,
65
- 0.0001,
66
- 0.02,
67
- 1.3,
68
- 1.6,
69
- )
70
- aqpq5_shimmer = call(
71
- [sound, point_process],
72
- "Get shimmer (apq5)",
73
- 0,
74
- 0,
75
- 0.0001,
76
- 0.02,
77
- 1.3,
78
- 1.6,
79
- )
80
- apq11_shimmer = call(
81
- [sound, point_process],
82
- "Get shimmer (apq11)",
83
- 0,
84
- 0,
85
- 0.0001,
86
- 0.02,
87
- 1.3,
88
- 1.6,
89
- )
90
- dda_shimmer = call(
91
- [sound, point_process], "Get shimmer (dda)", 0, 0, 0.0001, 0.02, 1.3, 1.6
92
- )
93
-
94
- return (
95
- duration,
96
- mean_f0,
97
- stdev_f0,
98
- hnr,
99
- local_jitter,
100
- localabsolute_jitter,
101
- rap_jitter,
102
- ppq5_jitter,
103
- ddp_jitter,
104
- local_shimmer,
105
- localdb_shimmer,
106
- apq3_shimmer,
107
- aqpq5_shimmer,
108
- apq11_shimmer,
109
- dda_shimmer,
110
- )
111
-
112
-
113
- # ## This function measures formants at each glottal pulse
114
- #
115
- # Puts, D. A., Apicella, C. L., & Cárdenas, R. A. (2012). Masculine voices signal men's threat potential in forager and industrial societies. Proceedings of the Royal Society of London B: Biological Sciences, 279(1728), 601-609.
116
- #
117
- # Adapted from: DOI 10.17605/OSF.IO/K2BHS
118
- # This function measures formants using Formant Position formula
119
- # def measureFormants(sound, wave_file, f0min,f0max):
120
- def measure_formants(sound, f0min, f0max):
121
- sound = parselmouth.Sound(sound) # read the sound
122
- # pitch = call(sound, "To Pitch (cc)", 0, f0min, 15, 'no', 0.03, 0.45, 0.01, 0.35, 0.14, f0max)
123
- point_process = call(sound, "To PointProcess (periodic, cc)", f0min, f0max)
124
-
125
- formants = call(sound, "To Formant (burg)", 0.0025, 5, 5000, 0.025, 50)
126
- num_points = call(point_process, "Get number of points")
127
-
128
- f1_list = []
129
- f2_list = []
130
- f3_list = []
131
- f4_list = []
132
-
133
- # Measure formants only at glottal pulses
134
- for point in range(0, num_points):
135
- point += 1
136
- t = call(point_process, "Get time from index", point)
137
- f1 = call(formants, "Get value at time", 1, t, "Hertz", "Linear")
138
- f2 = call(formants, "Get value at time", 2, t, "Hertz", "Linear")
139
- f3 = call(formants, "Get value at time", 3, t, "Hertz", "Linear")
140
- f4 = call(formants, "Get value at time", 4, t, "Hertz", "Linear")
141
- f1_list.append(f1)
142
- f2_list.append(f2)
143
- f3_list.append(f3)
144
- f4_list.append(f4)
145
-
146
- f1_list = [f1 for f1 in f1_list if str(f1) != "nan"]
147
- f2_list = [f2 for f2 in f2_list if str(f2) != "nan"]
148
- f3_list = [f3 for f3 in f3_list if str(f3) != "nan"]
149
- f4_list = [f4 for f4 in f4_list if str(f4) != "nan"]
150
-
151
- # calculate mean formants across pulses
152
- f1_mean = statistics.mean(f1_list)
153
- f2_mean = statistics.mean(f2_list)
154
- f3_mean = statistics.mean(f3_list)
155
- f4_mean = statistics.mean(f4_list)
156
-
157
- # calculate median formants across pulses, this is what is used in all subsequent calcualtions
158
- # you can use mean if you want, just edit the code in the boxes below to replace median with mean
159
- f1_median = statistics.median(f1_list)
160
- f2_median = statistics.median(f2_list)
161
- f3_median = statistics.median(f3_list)
162
- f4_median = statistics.median(f4_list)
163
-
164
- return (
165
- f1_mean,
166
- f2_mean,
167
- f3_mean,
168
- f4_mean,
169
- f1_median,
170
- f2_median,
171
- f3_median,
172
- f4_median,
173
- )
174
-
175
-
176
- # ## This function runs a 2-factor Principle Components Analysis (PCA) on Jitter and Shimmer
177
-
178
-
179
- def run_pca(df):
180
- # z-score the Jitter and Shimmer measurements
181
- measures = [
182
- "localJitter",
183
- "localabsoluteJitter",
184
- "rapJitter",
185
- "ppq5Jitter",
186
- "ddpJitter",
187
- "localShimmer",
188
- "localdbShimmer",
189
- "apq3Shimmer",
190
- "apq5Shimmer",
191
- "apq11Shimmer",
192
- "ddaShimmer",
193
- ]
194
- x = df.loc[:, measures].values
195
- # f = open('x.pickle', 'wb')
196
- # pickle.dump(x, f)
197
- # f.close()
198
-
199
- # x = StandardScaler().fit_transform(x)
200
- if np.any(np.isnan(x[0])):
201
- print(
202
- f"Warning: {np.count_nonzero(np.isnan(x))} Nans in x, replacing" " with 0"
203
- )
204
- x[np.isnan(x)] = 0
205
- # if np.any(np.isfinite(x[0])):
206
- # print(f"Warning: {np.count_nonzero(np.isfinite(x))} finite in x")
207
-
208
- # PCA
209
- pca = PCA(n_components=2)
210
- try:
211
- principal_components = pca.fit_transform(x)
212
- if np.any(np.isnan(principal_components)):
213
- print("pc is nan")
214
- print(f"count: {np.count_nonzero(np.isnan(principal_components))}")
215
- print(principal_components)
216
- principal_components = np.nan_to_num(principal_components)
217
- except ValueError:
218
- print("need more than one file for pca")
219
- principal_components = [[0, 0]]
220
- principal_df = pd.DataFrame(
221
- data=principal_components, columns=["JitterPCA", "ShimmerPCA"]
222
- )
223
- return principal_df
224
-
225
-
226
- # ## This block of code runs the above functions on all of the '.wav' files in the /audio folder
227
-
228
-
229
- def compute_features(file_index):
230
- # create lists to put the results
231
- duration_list = []
232
- mean_f0_list = []
233
- sd_f0_list = []
234
- hnr_list = []
235
- local_jitter_list = []
236
- localabsolute_jitter_list = []
237
- rap_jitter_list = []
238
- ppq5_jitter_list = []
239
- ddp_jitter_list = []
240
- local_shimmer_list = []
241
- localdb_shimmer_list = []
242
- apq3_shimmer_list = []
243
- aqpq5_shimmer_list = []
244
- apq11_shimmer_list = []
245
- dda_shimmer_list = []
246
- f1_mean_list = []
247
- f2_mean_list = []
248
- f3_mean_list = []
249
- f4_mean_list = []
250
- f1_median_list = []
251
- f2_median_list = []
252
- f3_median_list = []
253
- f4_median_list = []
254
- # Go through all the wave files in the folder and measure all the acoustics
255
- # for i, wave_file in enumerate(file_list):
256
- for idx, (wave_file, start, end) in enumerate(tqdm(file_index.to_list())):
257
- signal, sampling_rate = audiofile.read(
258
- wave_file,
259
- offset=start.total_seconds(),
260
- duration=(end - start).total_seconds(),
261
- always_2d=True,
262
- )
263
- try:
264
- sound = parselmouth.Sound(values=signal, sampling_frequency=sampling_rate)
265
- (
266
- duration,
267
- mean_f0,
268
- stdev_f0,
269
- hnr,
270
- local_jitter,
271
- localabsolute_jitter,
272
- rap_jitter,
273
- ppq5_jitter,
274
- ddp_jitter,
275
- local_shimmer,
276
- localdb_shimmer,
277
- apq3_shimmer,
278
- aqpq5_shimmer,
279
- apq11_shimmer,
280
- dda_shimmer,
281
- ) = measure_pitch(sound, 75, 300, "Hertz")
282
- (
283
- f1_mean,
284
- f2_mean,
285
- f3_mean,
286
- f4_mean,
287
- f1_median,
288
- f2_median,
289
- f3_median,
290
- f4_median,
291
- ) = measure_formants(sound, 75, 300)
292
- # file_list.append(wave_file) # make an ID list
293
- except (statistics.StatisticsError, parselmouth.PraatError) as errors:
294
- print(f"error on file {wave_file}: {errors}")
295
-
296
- duration_list.append(duration) # make duration list
297
- mean_f0_list.append(mean_f0) # make a mean F0 list
298
- sd_f0_list.append(stdev_f0) # make a sd F0 list
299
- hnr_list.append(hnr) # add HNR data
300
-
301
- # add raw jitter and shimmer measures
302
- local_jitter_list.append(local_jitter)
303
- localabsolute_jitter_list.append(localabsolute_jitter)
304
- rap_jitter_list.append(rap_jitter)
305
- ppq5_jitter_list.append(ppq5_jitter)
306
- ddp_jitter_list.append(ddp_jitter)
307
- local_shimmer_list.append(local_shimmer)
308
- localdb_shimmer_list.append(localdb_shimmer)
309
- apq3_shimmer_list.append(apq3_shimmer)
310
- aqpq5_shimmer_list.append(aqpq5_shimmer)
311
- apq11_shimmer_list.append(apq11_shimmer)
312
- dda_shimmer_list.append(dda_shimmer)
313
-
314
- # add the formant data
315
- f1_mean_list.append(f1_mean)
316
- f2_mean_list.append(f2_mean)
317
- f3_mean_list.append(f3_mean)
318
- f4_mean_list.append(f4_mean)
319
- f1_median_list.append(f1_median)
320
- f2_median_list.append(f2_median)
321
- f3_median_list.append(f3_median)
322
- f4_median_list.append(f4_median)
323
- # ## This block of code adds all of that data we just generated to a Pandas data frame
324
- # Add the data to Pandas
325
- df = pd.DataFrame(
326
- np.column_stack(
327
- [
328
- duration_list,
329
- mean_f0_list,
330
- sd_f0_list,
331
- hnr_list,
332
- local_jitter_list,
333
- localabsolute_jitter_list,
334
- rap_jitter_list,
335
- ppq5_jitter_list,
336
- ddp_jitter_list,
337
- local_shimmer_list,
338
- localdb_shimmer_list,
339
- apq3_shimmer_list,
340
- aqpq5_shimmer_list,
341
- apq11_shimmer_list,
342
- dda_shimmer_list,
343
- f1_mean_list,
344
- f2_mean_list,
345
- f3_mean_list,
346
- f4_mean_list,
347
- f1_median_list,
348
- f2_median_list,
349
- f3_median_list,
350
- f4_median_list,
351
- ]
352
- ),
353
- columns=[
354
- "duration",
355
- "meanF0Hz",
356
- "stdevF0Hz",
357
- "HNR",
358
- "localJitter",
359
- "localabsoluteJitter",
360
- "rapJitter",
361
- "ppq5Jitter",
362
- "ddpJitter",
363
- "localShimmer",
364
- "localdbShimmer",
365
- "apq3Shimmer",
366
- "apq5Shimmer",
367
- "apq11Shimmer",
368
- "ddaShimmer",
369
- "f1_mean",
370
- "f2_mean",
371
- "f3_mean",
372
- "f4_mean",
373
- "f1_median",
374
- "f2_median",
375
- "f3_median",
376
- "f4_median",
377
- ],
378
- )
379
-
380
- # add pca data
381
- pca_data = run_pca(df) # Run jitter and shimmer PCA
382
- df = pd.concat([df, pca_data], axis=1) # Add PCA data
383
- # reload the data so it's all numbers
384
- # df.to_csv("processed_results.csv", index=False)
385
- # df = pd.read_csv("processed_results.csv", header=0)
386
- # df.sort_values('voiceID').head(20)
387
- # ## Next we calculate the vocal-tract length estimates
388
-
389
- # ### Formant position
390
- # Puts, D. A., Apicella, C. L., & Cárdenas, R. A. (2012). Masculine voices signal men's threat potential in forager and industrial societies. Proceedings of the Royal Society of London B: Biological Sciences, 279(1728), 601-609.
391
-
392
- df["pF"] = (
393
- zscore(df.f1_median)
394
- + zscore(df.f2_median)
395
- + zscore(df.f3_median)
396
- + zscore(df.f4_median)
397
- ) / 4
398
-
399
- # ### Formant Dispersion
400
- # Fitch, W. T. (1997). Vocal tract length and formant frequency dispersion correlate with body size in rhesus macaques. The Journal of the Acoustical Society of America, 102(2), 1213-1222.
401
-
402
- df["fdisp"] = (df["f4_median"] - df["f1_median"]) / 3
403
-
404
- # ### Fn (Average Formant)
405
- # Pisanski, K., & Rendall, D. (2011). The prioritization of voice fundamental frequency or formants in listeners’ assessments of speaker size, masculinity, and attractiveness. The Journal of the Acoustical Society of America, 129(4), 2201-2212.
406
-
407
- df["avgFormant"] = (
408
- df["f1_median"] + df["f2_median"] + df["f3_median"] + df["f4_median"]
409
- ) / 4
410
-
411
- # ### MFF
412
- # Smith, D. R., & Patterson, R. D. (2005). The interaction of glottal-pulse rate and vocal-tract length in judgements of speaker size, sex, and age. The Journal of the Acoustical Society of America, 118(5), 3177-3186.
413
-
414
- df["mff"] = (
415
- df["f1_median"] * df["f2_median"] * df["f3_median"] * df["f4_median"]
416
- ) ** 0.25
417
-
418
- # ### Fitch VTL
419
- # Fitch, W. T. (1997). Vocal tract length and formant frequency dispersion correlate with body size in rhesus macaques. The Journal of the Acoustical Society of America, 102(2), 1213-1222.
420
-
421
- # reload the data again
422
- # df.to_csv("processed_results.csv", index=False)
423
- # df = pd.read_csv('processed_results.csv', header=0)
424
-
425
- df["fitch_vtl"] = (
426
- (1 * (35000 / (4 * df["f1_median"])))
427
- + (3 * (35000 / (4 * df["f2_median"])))
428
- + (5 * (35000 / (4 * df["f3_median"])))
429
- + (7 * (35000 / (4 * df["f4_median"])))
430
- ) / 4
431
-
432
- # ### $\Delta$F
433
- # Reby,D.,& McComb,K.(2003). Anatomical constraints generate honesty: acoustic cues to age and weight in the roars of red deer stags. Animal Behaviour, 65, 519e-530.
434
-
435
- xysum = (
436
- (0.5 * df["f1_median"])
437
- + (1.5 * df["f2_median"])
438
- + (2.5 * df["f3_median"])
439
- + (3.5 * df["f4_median"])
440
- )
441
- xsquaredsum = (0.5**2) + (1.5**2) + (2.5**2) + (3.5**2)
442
- df["delta_f"] = xysum / xsquaredsum
443
-
444
- # ### VTL($\Delta$F)
445
- # Reby,D.,&McComb,K.(2003).Anatomical constraints generate honesty: acoustic cues to age and weight in the roars of red deer stags. Animal Behaviour, 65, 519e-530.
446
-
447
- df["vtl_delta_f"] = 35000 / (2 * df["delta_f"])
448
-
449
- print("Now extracting speech rate parameters...")
450
-
451
- df_speechrate = get_speech_rate(file_index)
452
- print("")
453
-
454
- return df.join(df_speechrate)
455
-
456
-
457
- """
458
- Speech rate script taken from https://github.com/drfeinberg/PraatScripts
459
- on 25/05/23
460
- """
461
-
462
-
463
- def get_speech_rate(file_index):
464
- cols = [
465
- "nsyll",
466
- "npause",
467
- "dur_s",
468
- "phonationtime_s",
469
- "speechrate_nsyll_dur",
470
- "articulation_rate_nsyll_phonationtime",
471
- "ASD_speakingtime_nsyll",
472
- ]
473
- datalist = []
474
- for idx, (wave_file, start, end) in enumerate(tqdm(file_index.to_list())):
475
- signal, sampling_rate = audiofile.read(
476
- wave_file,
477
- offset=start.total_seconds(),
478
- duration=(end - start).total_seconds(),
479
- always_2d=True,
480
- )
481
- try:
482
- sound = parselmouth.Sound(values=signal, sampling_frequency=sampling_rate)
483
- # print(f'processing {file}')
484
- speechrate_dictionary = speech_rate(sound)
485
- datalist.append(speechrate_dictionary)
486
- except IndexError as ie:
487
- print(f"error extracting speech-rate on file {wave_file}: {ie}")
488
- except parselmouth.PraatError as pe:
489
- print(f"error extracting speech-rate on file {wave_file}: {pe}")
490
- df = pd.DataFrame(datalist)
491
- return df
492
-
493
-
494
- def speech_rate(sound):
495
- silencedb = -25
496
- mindip = 2
497
- minpause = 0.3
498
- originaldur = sound.get_total_duration()
499
- intensity = sound.to_intensity(50)
500
- start = call(intensity, "Get time from frame number", 1)
501
- nframes = call(intensity, "Get number of frames")
502
- end = call(intensity, "Get time from frame number", nframes)
503
- min_intensity = call(intensity, "Get minimum", 0, 0, "Parabolic")
504
- max_intensity = call(intensity, "Get maximum", 0, 0, "Parabolic")
505
-
506
- # get .99 quantile to get maximum (without influence of non-speech sound bursts)
507
- max_99_intensity = call(intensity, "Get quantile", 0, 0, 0.99)
508
-
509
- # estimate Intensity threshold
510
- threshold = max_99_intensity + silencedb
511
- threshold2 = max_intensity - max_99_intensity
512
- threshold3 = silencedb - threshold2
513
- if threshold < min_intensity:
514
- threshold = min_intensity
515
-
516
- # get pauses (silences) and speakingtime
517
- textgrid = call(
518
- intensity,
519
- "To TextGrid (silences)",
520
- threshold3,
521
- minpause,
522
- 0.1,
523
- "silent",
524
- "sounding",
525
- )
526
- silencetier = call(textgrid, "Extract tier", 1)
527
- silencetable = call(silencetier, "Down to TableOfReal", "sounding")
528
- npauses = call(silencetable, "Get number of rows")
529
- speakingtot = 0
530
- for ipause in range(npauses):
531
- pause = ipause + 1
532
- beginsound = call(silencetable, "Get value", pause, 1)
533
- endsound = call(silencetable, "Get value", pause, 2)
534
- speakingdur = endsound - beginsound
535
- speakingtot += speakingdur
536
-
537
- intensity_matrix = call(intensity, "Down to Matrix")
538
- # sndintid = sound_from_intensity_matrix
539
- sound_from_intensity_matrix = call(intensity_matrix, "To Sound (slice)", 1)
540
- # use total duration, not end time, to find out duration of intdur (intensity_duration)
541
- # in order to allow nonzero starting times.
542
- intensity_duration = call(sound_from_intensity_matrix, "Get total duration")
543
- intensity_max = call(sound_from_intensity_matrix, "Get maximum", 0, 0, "Parabolic")
544
- point_process = call(
545
- sound_from_intensity_matrix,
546
- "To PointProcess (extrema)",
547
- "Left",
548
- "yes",
549
- "no",
550
- "Sinc70",
551
- )
552
- # estimate peak positions (all peaks)
553
- numpeaks = call(point_process, "Get number of points")
554
- t = [call(point_process, "Get time from index", i + 1) for i in range(numpeaks)]
555
-
556
- # fill array with intensity values
557
- timepeaks = []
558
- peakcount = 0
559
- intensities = []
560
- for i in range(numpeaks):
561
- value = call(sound_from_intensity_matrix, "Get value at time", t[i], "Cubic")
562
- if value > threshold:
563
- peakcount += 1
564
- intensities.append(value)
565
- timepeaks.append(t[i])
566
-
567
- # fill array with valid peaks: only intensity values if preceding
568
- # dip in intensity is greater than mindip
569
- validpeakcount = 0
570
- currenttime = timepeaks[0]
571
- currentint = intensities[0]
572
- validtime = []
573
-
574
- for p in range(peakcount - 1):
575
- following = p + 1
576
- followingtime = timepeaks[p + 1]
577
- dip = call(intensity, "Get minimum", currenttime, timepeaks[p + 1], "None")
578
- diffint = abs(currentint - dip)
579
- if diffint > mindip:
580
- validpeakcount += 1
581
- validtime.append(timepeaks[p])
582
- currenttime = timepeaks[following]
583
- currentint = call(intensity, "Get value at time", timepeaks[following], "Cubic")
584
-
585
- # Look for only voiced parts
586
- pitch = sound.to_pitch_ac(0.02, 30, 4, False, 0.03, 0.25, 0.01, 0.35, 0.25, 450)
587
- voicedcount = 0
588
- voicedpeak = []
589
-
590
- for time in range(validpeakcount):
591
- querytime = validtime[time]
592
- whichinterval = call(textgrid, "Get interval at time", 1, querytime)
593
- whichlabel = call(textgrid, "Get label of interval", 1, whichinterval)
594
- value = pitch.get_value_at_time(querytime)
595
- if not math.isnan(value):
596
- if whichlabel == "sounding":
597
- voicedcount += 1
598
- voicedpeak.append(validtime[time])
599
-
600
- # calculate time correction due to shift in time for Sound object versus
601
- # intensity object
602
- timecorrection = originaldur / intensity_duration
603
-
604
- # Insert voiced peaks in TextGrid
605
- call(textgrid, "Insert point tier", 1, "syllables")
606
- for i in range(len(voicedpeak)):
607
- position = voicedpeak[i] * timecorrection
608
- call(textgrid, "Insert point", 1, position, "")
609
-
610
- # return results
611
- speakingrate = voicedcount / originaldur
612
- articulationrate = voicedcount / speakingtot
613
- npause = npauses - 1
614
- try:
615
- asd = speakingtot / voicedcount
616
- except ZeroDivisionError:
617
- asd = 0
618
- print("caught zero division")
619
- speechrate_dictionary = {
620
- "nsyll": voicedcount,
621
- "npause": npause,
622
- "dur_s": originaldur,
623
- "phonationtime_s": intensity_duration,
624
- "speechrate_nsyll_dur": speakingrate,
625
- "articulation_rate_nsyll_phonationtime": articulationrate,
626
- "ASD_speakingtime_nsyll": asd,
627
- }
628
- return speechrate_dictionary