nkululeko 0.59.1__py3-none-any.whl → 0.61.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. nkululeko/constants.py +1 -1
  2. nkululeko/experiment.py +43 -43
  3. nkululeko/feature_extractor.py +101 -58
  4. nkululeko/modelrunner.py +14 -14
  5. nkululeko/plots.py +11 -0
  6. nkululeko/segment.py +23 -27
  7. nkululeko/test_predictor.py +1 -1
  8. {nkululeko-0.59.1.dist-info → nkululeko-0.61.0.dist-info}/METADATA +13 -1
  9. nkululeko-0.61.0.dist-info/RECORD +31 -0
  10. {nkululeko-0.59.1.dist-info → nkululeko-0.61.0.dist-info}/WHEEL +1 -1
  11. nkululeko/ap_age.py +0 -31
  12. nkululeko/ap_arousal.py +0 -30
  13. nkululeko/ap_dominance.py +0 -29
  14. nkululeko/ap_gender.py +0 -29
  15. nkululeko/ap_mos.py +0 -35
  16. nkululeko/ap_pesq.py +0 -35
  17. nkululeko/ap_sdr.py +0 -36
  18. nkululeko/ap_snr.py +0 -35
  19. nkululeko/ap_stoi.py +0 -34
  20. nkululeko/ap_valence.py +0 -30
  21. nkululeko/augmenter.py +0 -64
  22. nkululeko/dataset.py +0 -415
  23. nkululeko/dataset_csv.py +0 -49
  24. nkululeko/dataset_ravdess.py +0 -19
  25. nkululeko/estimate_snr.py +0 -89
  26. nkululeko/feats_agender.py +0 -63
  27. nkululeko/feats_agender_agender.py +0 -65
  28. nkululeko/feats_analyser.py +0 -87
  29. nkululeko/feats_audmodel.py +0 -63
  30. nkululeko/feats_audmodel_dim.py +0 -63
  31. nkululeko/feats_clap.py +0 -74
  32. nkululeko/feats_import.py +0 -44
  33. nkululeko/feats_mld.py +0 -47
  34. nkululeko/feats_mos.py +0 -92
  35. nkululeko/feats_opensmile.py +0 -84
  36. nkululeko/feats_oxbow.py +0 -87
  37. nkululeko/feats_praat.py +0 -72
  38. nkululeko/feats_snr.py +0 -63
  39. nkululeko/feats_squim.py +0 -99
  40. nkululeko/feats_trill.py +0 -74
  41. nkululeko/feats_wav2vec2.py +0 -94
  42. nkululeko/featureset.py +0 -41
  43. nkululeko/feinberg_praat.py +0 -430
  44. nkululeko/loss_ccc.py +0 -28
  45. nkululeko/loss_softf1loss.py +0 -40
  46. nkululeko/model.py +0 -256
  47. nkululeko/model_bayes.py +0 -14
  48. nkululeko/model_cnn.py +0 -118
  49. nkululeko/model_gmm.py +0 -16
  50. nkululeko/model_knn.py +0 -16
  51. nkululeko/model_knn_reg.py +0 -16
  52. nkululeko/model_mlp.py +0 -175
  53. nkululeko/model_mlp_regression.py +0 -197
  54. nkululeko/model_svm.py +0 -18
  55. nkululeko/model_svr.py +0 -18
  56. nkululeko/model_tree.py +0 -14
  57. nkululeko/model_tree_reg.py +0 -14
  58. nkululeko/model_xgb.py +0 -12
  59. nkululeko/model_xgr.py +0 -12
  60. nkululeko/randomsplicer.py +0 -76
  61. nkululeko/randomsplicing.py +0 -74
  62. nkululeko-0.59.1.dist-info/RECORD +0 -82
  63. {nkululeko-0.59.1.dist-info → nkululeko-0.61.0.dist-info}/LICENSE +0 -0
  64. {nkululeko-0.59.1.dist-info → nkululeko-0.61.0.dist-info}/top_level.txt +0 -0
@@ -1,430 +0,0 @@
1
- """
2
- This is a copy of David R. Feinberg's Praat scripts
3
- https://github.com/drfeinberg/PraatScripts
4
- taken June 23rd 2022
5
- """
6
-
7
- #!/usr/bin/env python3
8
- import numpy as np
9
- import pandas as pd
10
- import math
11
- import parselmouth
12
- import statistics
13
- from nkululeko.util import Util
14
- import audiofile
15
- from parselmouth.praat import call
16
- from scipy.stats.mstats import zscore
17
- from sklearn.decomposition import PCA
18
- from sklearn.preprocessing import StandardScaler
19
-
20
-
21
- # This is the function to measure source acoustics using default male parameters.
22
-
23
- def measurePitch(voiceID, f0min, f0max, unit):
24
- sound = parselmouth.Sound(voiceID) # read the sound
25
- duration = call(sound, "Get total duration") # duration
26
- pitch = call(sound, "To Pitch", 0.0, f0min, f0max) #create a praat pitch object
27
- meanF0 = call(pitch, "Get mean", 0, 0, unit) # get mean pitch
28
- stdevF0 = call(pitch, "Get standard deviation", 0 ,0, unit) # get standard deviation
29
- harmonicity = call(sound, "To Harmonicity (cc)", 0.01, f0min, 0.1, 1.0)
30
- hnr = call(harmonicity, "Get mean", 0, 0)
31
- pointProcess = call(sound, "To PointProcess (periodic, cc)", f0min, f0max)
32
- localJitter = call(pointProcess, "Get jitter (local)", 0, 0, 0.0001, 0.02, 1.3)
33
- localabsoluteJitter = call(pointProcess, "Get jitter (local, absolute)", 0, 0, 0.0001, 0.02, 1.3)
34
- rapJitter = call(pointProcess, "Get jitter (rap)", 0, 0, 0.0001, 0.02, 1.3)
35
- ppq5Jitter = call(pointProcess, "Get jitter (ppq5)", 0, 0, 0.0001, 0.02, 1.3)
36
- ddpJitter = call(pointProcess, "Get jitter (ddp)", 0, 0, 0.0001, 0.02, 1.3)
37
- localShimmer = call([sound, pointProcess], "Get shimmer (local)", 0, 0, 0.0001, 0.02, 1.3, 1.6)
38
- localdbShimmer = call([sound, pointProcess], "Get shimmer (local_dB)", 0, 0, 0.0001, 0.02, 1.3, 1.6)
39
- apq3Shimmer = call([sound, pointProcess], "Get shimmer (apq3)", 0, 0, 0.0001, 0.02, 1.3, 1.6)
40
- aqpq5Shimmer = call([sound, pointProcess], "Get shimmer (apq5)", 0, 0, 0.0001, 0.02, 1.3, 1.6)
41
- apq11Shimmer = call([sound, pointProcess], "Get shimmer (apq11)", 0, 0, 0.0001, 0.02, 1.3, 1.6)
42
- ddaShimmer = call([sound, pointProcess], "Get shimmer (dda)", 0, 0, 0.0001, 0.02, 1.3, 1.6)
43
-
44
- return duration, meanF0, stdevF0, hnr, localJitter, localabsoluteJitter, rapJitter, ppq5Jitter, ddpJitter, localShimmer, localdbShimmer, apq3Shimmer, aqpq5Shimmer, apq11Shimmer, ddaShimmer
45
-
46
-
47
- # ## This function measures formants at each glottal pulse
48
- #
49
- # Puts, D. A., Apicella, C. L., & Cárdenas, R. A. (2012). Masculine voices signal men's threat potential in forager and industrial societies. Proceedings of the Royal Society of London B: Biological Sciences, 279(1728), 601-609.
50
- #
51
- # Adapted from: DOI 10.17605/OSF.IO/K2BHS
52
- # This function measures formants using Formant Position formula
53
- #def measureFormants(sound, wave_file, f0min,f0max):
54
- def measureFormants(sound, f0min,f0max):
55
- sound = parselmouth.Sound(sound) # read the sound
56
- # pitch = call(sound, "To Pitch (cc)", 0, f0min, 15, 'no', 0.03, 0.45, 0.01, 0.35, 0.14, f0max)
57
- pointProcess = call(sound, "To PointProcess (periodic, cc)", f0min, f0max)
58
-
59
- formants = call(sound, "To Formant (burg)", 0.0025, 5, 5000, 0.025, 50)
60
- numPoints = call(pointProcess, "Get number of points")
61
-
62
- f1_list = []
63
- f2_list = []
64
- f3_list = []
65
- f4_list = []
66
-
67
- # Measure formants only at glottal pulses
68
- for point in range(0, numPoints):
69
- point += 1
70
- t = call(pointProcess, "Get time from index", point)
71
- f1 = call(formants, "Get value at time", 1, t, 'Hertz', 'Linear')
72
- f2 = call(formants, "Get value at time", 2, t, 'Hertz', 'Linear')
73
- f3 = call(formants, "Get value at time", 3, t, 'Hertz', 'Linear')
74
- f4 = call(formants, "Get value at time", 4, t, 'Hertz', 'Linear')
75
- f1_list.append(f1)
76
- f2_list.append(f2)
77
- f3_list.append(f3)
78
- f4_list.append(f4)
79
-
80
- f1_list = [f1 for f1 in f1_list if str(f1) != 'nan']
81
- f2_list = [f2 for f2 in f2_list if str(f2) != 'nan']
82
- f3_list = [f3 for f3 in f3_list if str(f3) != 'nan']
83
- f4_list = [f4 for f4 in f4_list if str(f4) != 'nan']
84
-
85
- # calculate mean formants across pulses
86
- f1_mean = statistics.mean(f1_list)
87
- f2_mean = statistics.mean(f2_list)
88
- f3_mean = statistics.mean(f3_list)
89
- f4_mean = statistics.mean(f4_list)
90
-
91
- # calculate median formants across pulses, this is what is used in all subsequent calcualtions
92
- # you can use mean if you want, just edit the code in the boxes below to replace median with mean
93
- f1_median = statistics.median(f1_list)
94
- f2_median = statistics.median(f2_list)
95
- f3_median = statistics.median(f3_list)
96
- f4_median = statistics.median(f4_list)
97
-
98
- return f1_mean, f2_mean, f3_mean, f4_mean, f1_median, f2_median, f3_median, f4_median
99
-
100
-
101
- # ## This function runs a 2-factor Principle Components Analysis (PCA) on Jitter and Shimmer
102
-
103
- def runPCA(df):
104
- # z-score the Jitter and Shimmer measurements
105
- measures = ['localJitter', 'localabsoluteJitter', 'rapJitter', 'ppq5Jitter', 'ddpJitter',
106
- 'localShimmer', 'localdbShimmer', 'apq3Shimmer', 'apq5Shimmer', 'apq11Shimmer', 'ddaShimmer']
107
- x = df.loc[:, measures].values
108
- # f = open('x.pickle', 'wb')
109
- # pickle.dump(x, f)
110
- # f.close()
111
-
112
- x = StandardScaler().fit_transform(x)
113
- if np.any(np.isnan(x)):
114
- print (f'Warning: {np.count_nonzero(np.isnan(x))} Nans in x, replacing with 0')
115
- x[np.isnan(x)] = 0
116
- if np.any(np.isfinite(x)):
117
- print (f'Warning: {np.count_nonzero(np.isfinite(x))} infinite in x')
118
-
119
- # PCA
120
- pca = PCA(n_components=2)
121
- principalComponents = pca.fit_transform(x)
122
- if np.any(np.isnan(principalComponents)):
123
- print ('pc is nan')
124
- print(f'count: {np.count_nonzero(np.isnan(principalComponents))}')
125
- print(principalComponents)
126
- principalComponents=np.nan_to_num(principalComponents)
127
-
128
- principalDf = pd.DataFrame(data = principalComponents, columns = ['JitterPCA', 'ShimmerPCA'])
129
-
130
- return principalDf
131
-
132
-
133
- # ## This block of code runs the above functions on all of the '.wav' files in the /audio folder
134
-
135
- def compute_features(file_index):
136
- # create lists to put the results
137
- file_list = []
138
- duration_list = []
139
- mean_F0_list = []
140
- sd_F0_list = []
141
- hnr_list = []
142
- localJitter_list = []
143
- localabsoluteJitter_list = []
144
- rapJitter_list = []
145
- ppq5Jitter_list = []
146
- ddpJitter_list = []
147
- localShimmer_list = []
148
- localdbShimmer_list = []
149
- apq3Shimmer_list = []
150
- aqpq5Shimmer_list = []
151
- apq11Shimmer_list = []
152
- ddaShimmer_list = []
153
- f1_mean_list = []
154
- f2_mean_list = []
155
- f3_mean_list = []
156
- f4_mean_list = []
157
- f1_median_list = []
158
- f2_median_list = []
159
- f3_median_list = []
160
- f4_median_list = []
161
- # Go through all the wave files in the folder and measure all the acoustics
162
- # for i, wave_file in enumerate(file_list):
163
- for idx, (wave_file, start, end) in enumerate(file_index.to_list()):
164
- signal, sampling_rate = audiofile.read(wave_file, offset=start.total_seconds(), duration=(end-start).total_seconds(), always_2d=True)
165
- sound = parselmouth.Sound(values=signal, sampling_frequency=sampling_rate)
166
- if idx%10==0:
167
- print(f'praat: extracting file {idx} of {len(file_index.to_list())}')
168
- #sound = parselmouth.Sound(wave_file)
169
- (duration, meanF0, stdevF0, hnr, localJitter, localabsoluteJitter, rapJitter, ppq5Jitter, ddpJitter,
170
- localShimmer, localdbShimmer, apq3Shimmer, aqpq5Shimmer, apq11Shimmer, ddaShimmer) = measurePitch(
171
- sound, 75, 300, "Hertz")
172
- (f1_mean, f2_mean, f3_mean, f4_mean, f1_median, f2_median, f3_median, f4_median) = measureFormants(
173
- sound, 75, 300)
174
- # file_list.append(wave_file) # make an ID list
175
- duration_list.append(duration) # make duration list
176
- mean_F0_list.append(meanF0) # make a mean F0 list
177
- sd_F0_list.append(stdevF0) # make a sd F0 list
178
- hnr_list.append(hnr) #add HNR data
179
-
180
- # add raw jitter and shimmer measures
181
- localJitter_list.append(localJitter)
182
- localabsoluteJitter_list.append(localabsoluteJitter)
183
- rapJitter_list.append(rapJitter)
184
- ppq5Jitter_list.append(ppq5Jitter)
185
- ddpJitter_list.append(ddpJitter)
186
- localShimmer_list.append(localShimmer)
187
- localdbShimmer_list.append(localdbShimmer)
188
- apq3Shimmer_list.append(apq3Shimmer)
189
- aqpq5Shimmer_list.append(aqpq5Shimmer)
190
- apq11Shimmer_list.append(apq11Shimmer)
191
- ddaShimmer_list.append(ddaShimmer)
192
-
193
- # add the formant data
194
- f1_mean_list.append(f1_mean)
195
- f2_mean_list.append(f2_mean)
196
- f3_mean_list.append(f3_mean)
197
- f4_mean_list.append(f4_mean)
198
- f1_median_list.append(f1_median)
199
- f2_median_list.append(f2_median)
200
- f3_median_list.append(f3_median)
201
- f4_median_list.append(f4_median)
202
-
203
- # ## This block of code adds all of that data we just generated to a Pandas data frame
204
- # Add the data to Pandas
205
- df = pd.DataFrame(np.column_stack([duration_list, mean_F0_list, sd_F0_list, hnr_list,
206
- localJitter_list, localabsoluteJitter_list, rapJitter_list,
207
- ppq5Jitter_list, ddpJitter_list, localShimmer_list,
208
- localdbShimmer_list, apq3Shimmer_list, aqpq5Shimmer_list,
209
- apq11Shimmer_list, ddaShimmer_list, f1_mean_list,
210
- f2_mean_list, f3_mean_list, f4_mean_list,
211
- f1_median_list, f2_median_list, f3_median_list,
212
- f4_median_list]),
213
- columns=['duration', 'meanF0Hz', 'stdevF0Hz', 'HNR',
214
- 'localJitter', 'localabsoluteJitter', 'rapJitter',
215
- 'ppq5Jitter', 'ddpJitter', 'localShimmer',
216
- 'localdbShimmer', 'apq3Shimmer', 'apq5Shimmer',
217
- 'apq11Shimmer', 'ddaShimmer', 'f1_mean', 'f2_mean',
218
- 'f3_mean', 'f4_mean', 'f1_median',
219
- 'f2_median', 'f3_median', 'f4_median'])
220
-
221
- # add pca data
222
- pcaData = runPCA(df) # Run jitter and shimmer PCA
223
- df = pd.concat([df, pcaData], axis=1) # Add PCA data
224
- # reload the data so it's all numbers
225
- df.to_csv("processed_results.csv", index=False)
226
- df = pd.read_csv('processed_results.csv', header=0)
227
- # df.sort_values('voiceID').head(20)
228
- # ## Next we calculate the vocal-tract length estimates
229
-
230
- # ### Formant position
231
- # Puts, D. A., Apicella, C. L., & Cárdenas, R. A. (2012). Masculine voices signal men's threat potential in forager and industrial societies. Proceedings of the Royal Society of London B: Biological Sciences, 279(1728), 601-609.
232
-
233
- df['pF'] = (zscore(df.f1_median) + zscore(df.f2_median) + zscore(df.f3_median) + zscore(df.f4_median)) / 4
234
-
235
- # ### Formant Dispersion
236
- # Fitch, W. T. (1997). Vocal tract length and formant frequency dispersion correlate with body size in rhesus macaques. The Journal of the Acoustical Society of America, 102(2), 1213-1222.
237
-
238
-
239
- df['fdisp'] = (df['f4_median'] - df['f1_median']) / 3
240
-
241
-
242
- # ### Fn (Average Formant)
243
- # Pisanski, K., & Rendall, D. (2011). The prioritization of voice fundamental frequency or formants in listeners’ assessments of speaker size, masculinity, and attractiveness. The Journal of the Acoustical Society of America, 129(4), 2201-2212.
244
-
245
- df['avgFormant'] = (df['f1_median'] + df['f2_median'] + df['f3_median'] + df['f4_median']) / 4
246
-
247
- # ### MFF
248
- # Smith, D. R., & Patterson, R. D. (2005). The interaction of glottal-pulse rate and vocal-tract length in judgements of speaker size, sex, and age. The Journal of the Acoustical Society of America, 118(5), 3177-3186.
249
-
250
- df['mff'] = (df['f1_median'] * df['f2_median'] * df['f3_median'] * df['f4_median']) ** 0.25
251
-
252
-
253
- # ### Fitch VTL
254
- # Fitch, W. T. (1997). Vocal tract length and formant frequency dispersion correlate with body size in rhesus macaques. The Journal of the Acoustical Society of America, 102(2), 1213-1222.
255
-
256
-
257
- # reload the data again
258
- #df.to_csv("processed_results.csv", index=False)
259
- #df = pd.read_csv('processed_results.csv', header=0)
260
-
261
- df['fitch_vtl'] = ((1 * (35000 / (4 * df['f1_median']))) +
262
- (3 * (35000 / (4 * df['f2_median']))) +
263
- (5 * (35000 / (4 * df['f3_median']))) +
264
- (7 * (35000 / (4 * df['f4_median'])))) / 4
265
-
266
-
267
- # ### $\Delta$F
268
- # Reby,D.,& McComb,K.(2003). Anatomical constraints generate honesty: acoustic cues to age and weight in the roars of red deer stags. Animal Behaviour, 65, 519e-530.
269
-
270
-
271
- xysum = (0.5 * df['f1_median']) + (1.5 * df['f2_median']) + (2.5 * df['f3_median']) + (3.5 * df['f4_median'])
272
- xsquaredsum = (0.5 ** 2) + (1.5 ** 2) + (2.5 ** 2) + (3.5 ** 2)
273
- df['delta_f'] = xysum / xsquaredsum
274
-
275
-
276
- # ### VTL($\Delta$F)
277
- # Reby,D.,&McComb,K.(2003).Anatomical constraints generate honesty: acoustic cues to age and weight in the roars of red deer stags. Animal Behaviour, 65, 519e-530.
278
-
279
-
280
- df['vtl_delta_f'] = 35000 / (2 * df['delta_f'])
281
-
282
- print('Now extracting speech rate parameters...')
283
-
284
- df_speechrate = get_speech_rate(file_index)
285
- print('')
286
-
287
- return df.join(df_speechrate)
288
-
289
-
290
-
291
-
292
- """
293
- Speech rate script taken from https://github.com/drfeinberg/PraatScripts
294
- on 25/05/23
295
- """
296
-
297
- def get_speech_rate(file_index):
298
- cols = ['nsyll', 'npause', 'dur(s)', 'phonationtime(s)', 'speechrate(nsyll / dur)', 'articulation '
299
- 'rate(nsyll / phonationtime)', 'ASD(speakingtime / nsyll)']
300
- datalist = []
301
- for idx, (wave_file, start, end) in enumerate(file_index.to_list()):
302
- signal, sampling_rate = audiofile.read(wave_file, offset=start.total_seconds(), duration=(end-start).total_seconds(), always_2d=True)
303
- sound = parselmouth.Sound(values=signal, sampling_frequency=sampling_rate)
304
- # print(f'processing {file}')
305
- speechrate_dictionary = speech_rate(sound)
306
- datalist.append(speechrate_dictionary)
307
- if idx%10==0:
308
- print('.', end=' ')
309
- df = pd.DataFrame(datalist)
310
- return df
311
-
312
- def speech_rate(sound):
313
- silencedb = -25
314
- mindip = 2
315
- minpause = 0.3
316
- originaldur = sound.get_total_duration()
317
- intensity = sound.to_intensity(50)
318
- start = call(intensity, "Get time from frame number", 1)
319
- nframes = call(intensity, "Get number of frames")
320
- end = call(intensity, "Get time from frame number", nframes)
321
- min_intensity = call(intensity, "Get minimum", 0, 0, "Parabolic")
322
- max_intensity = call(intensity, "Get maximum", 0, 0, "Parabolic")
323
-
324
- # get .99 quantile to get maximum (without influence of non-speech sound bursts)
325
- max_99_intensity = call(intensity, "Get quantile", 0, 0, 0.99)
326
-
327
- # estimate Intensity threshold
328
- threshold = max_99_intensity + silencedb
329
- threshold2 = max_intensity - max_99_intensity
330
- threshold3 = silencedb - threshold2
331
- if threshold < min_intensity:
332
- threshold = min_intensity
333
-
334
- # get pauses (silences) and speakingtime
335
- textgrid = call(intensity, "To TextGrid (silences)", threshold3, minpause, 0.1, "silent", "sounding")
336
- silencetier = call(textgrid, "Extract tier", 1)
337
- silencetable = call(silencetier, "Down to TableOfReal", "sounding")
338
- npauses = call(silencetable, "Get number of rows")
339
- speakingtot = 0
340
- for ipause in range(npauses):
341
- pause = ipause + 1
342
- beginsound = call(silencetable, "Get value", pause, 1)
343
- endsound = call(silencetable, "Get value", pause, 2)
344
- speakingdur = endsound - beginsound
345
- speakingtot += speakingdur
346
-
347
- intensity_matrix = call(intensity, "Down to Matrix")
348
- # sndintid = sound_from_intensity_matrix
349
- sound_from_intensity_matrix = call(intensity_matrix, "To Sound (slice)", 1)
350
- # use total duration, not end time, to find out duration of intdur (intensity_duration)
351
- # in order to allow nonzero starting times.
352
- intensity_duration = call(sound_from_intensity_matrix, "Get total duration")
353
- intensity_max = call(sound_from_intensity_matrix, "Get maximum", 0, 0, "Parabolic")
354
- point_process = call(sound_from_intensity_matrix, "To PointProcess (extrema)", "Left", "yes", "no", "Sinc70")
355
- # estimate peak positions (all peaks)
356
- numpeaks = call(point_process, "Get number of points")
357
- t = [call(point_process, "Get time from index", i + 1) for i in range(numpeaks)]
358
-
359
- # fill array with intensity values
360
- timepeaks = []
361
- peakcount = 0
362
- intensities = []
363
- for i in range(numpeaks):
364
- value = call(sound_from_intensity_matrix, "Get value at time", t[i], "Cubic")
365
- if value > threshold:
366
- peakcount += 1
367
- intensities.append(value)
368
- timepeaks.append(t[i])
369
-
370
- # fill array with valid peaks: only intensity values if preceding
371
- # dip in intensity is greater than mindip
372
- validpeakcount = 0
373
- currenttime = timepeaks[0]
374
- currentint = intensities[0]
375
- validtime = []
376
-
377
- for p in range(peakcount - 1):
378
- following = p + 1
379
- followingtime = timepeaks[p + 1]
380
- dip = call(intensity, "Get minimum", currenttime, timepeaks[p + 1], "None")
381
- diffint = abs(currentint - dip)
382
- if diffint > mindip:
383
- validpeakcount += 1
384
- validtime.append(timepeaks[p])
385
- currenttime = timepeaks[following]
386
- currentint = call(intensity, "Get value at time", timepeaks[following], "Cubic")
387
-
388
- # Look for only voiced parts
389
- pitch = sound.to_pitch_ac(0.02, 30, 4, False, 0.03, 0.25, 0.01, 0.35, 0.25, 450)
390
- voicedcount = 0
391
- voicedpeak = []
392
-
393
- for time in range(validpeakcount):
394
- querytime = validtime[time]
395
- whichinterval = call(textgrid, "Get interval at time", 1, querytime)
396
- whichlabel = call(textgrid, "Get label of interval", 1, whichinterval)
397
- value = pitch.get_value_at_time(querytime)
398
- if not math.isnan(value):
399
- if whichlabel == "sounding":
400
- voicedcount += 1
401
- voicedpeak.append(validtime[time])
402
-
403
- # calculate time correction due to shift in time for Sound object versus
404
- # intensity object
405
- timecorrection = originaldur / intensity_duration
406
-
407
- # Insert voiced peaks in TextGrid
408
- call(textgrid, "Insert point tier", 1, "syllables")
409
- for i in range(len(voicedpeak)):
410
- position = (voicedpeak[i] * timecorrection)
411
- call(textgrid, "Insert point", 1, position, "")
412
-
413
- # return results
414
- speakingrate = voicedcount / originaldur
415
- articulationrate = voicedcount / speakingtot
416
- npause = npauses - 1
417
- try:
418
- asd = speakingtot / voicedcount
419
- except ZeroDivisionError:
420
- asd = 0
421
- print('caught zero division')
422
- speechrate_dictionary = {'nsyll':voicedcount,
423
- 'npause': npause,
424
- 'dur(s)':originaldur,
425
- 'phonationtime(s)':intensity_duration,
426
- 'speechrate(nsyll / dur)': speakingrate,
427
- "articulation rate(nsyll / phonationtime)":articulationrate,
428
- "ASD(speakingtime / nsyll)":asd}
429
- return speechrate_dictionary
430
-
nkululeko/loss_ccc.py DELETED
@@ -1,28 +0,0 @@
1
- # loss_concordance_cor_coeff.py
2
-
3
- import torch
4
-
5
- class ConcordanceCorCoeff(torch.nn.Module):
6
-
7
- def __init__(self):
8
- super().__init__()
9
- self.mean = torch.mean
10
- self.var = torch.var
11
- self.sum = torch.sum
12
- self.sqrt = torch.sqrt
13
- self.std = torch.std
14
-
15
- def forward(self, prediction, ground_truth):
16
- mean_gt = self.mean(ground_truth, 0)
17
- mean_pred = self.mean(prediction, 0)
18
- var_gt = self.var (ground_truth, 0)
19
- var_pred = self.var (prediction, 0)
20
- v_pred = prediction - mean_pred
21
- v_gt = ground_truth - mean_gt
22
- cor = self.sum (v_pred * v_gt) / (self.sqrt(self.sum(v_pred ** 2)) * self.sqrt(self.sum(v_gt ** 2)))
23
- sd_gt = self.std(ground_truth)
24
- sd_pred = self.std(prediction)
25
- numerator=2*cor*sd_gt*sd_pred
26
- denominator=var_gt+var_pred+(mean_gt-mean_pred)**2
27
- ccc = numerator/denominator
28
- return 1-ccc
@@ -1,40 +0,0 @@
1
- import torch
2
- from torch.nn.functional import one_hot, softmax
3
-
4
- class SoftF1Loss(torch.nn.Module):
5
-
6
- ''' differentiable F1 loss, adapted from
7
- https://gist.github.com/SuperShinyEyes/dcc68a08ff8b615442e3bc6a9b55a354
8
-
9
- written by Uwe Reichel'''
10
-
11
- def __init__(self, weight=None, epsilon=1e-7, num_classes=-1):
12
- super().__init__()
13
- self.weight = weight
14
- self.epsilon = epsilon
15
- self.num_classes = num_classes
16
-
17
- def forward(self, y_pred, y_true):
18
- assert y_pred.ndim == 2
19
- assert y_true.ndim == 1
20
-
21
- y_true = one_hot(y_true, self.num_classes).to(torch.float32)
22
- y_pred = softmax(y_pred, dim=1)
23
-
24
- tp = (y_true * y_pred).sum(dim=0).to(torch.float32)
25
- fp = ((1 - y_true) * y_pred).sum(dim=0).to(torch.float32)
26
- fn = (y_true * (1 - y_pred)).sum(dim=0).to(torch.float32)
27
-
28
- precision = tp / (tp + fp + self.epsilon)
29
- recall = tp / (tp + fn + self.epsilon)
30
-
31
- f1 = 2 * (precision * recall) / (precision + recall + self.epsilon)
32
- f1 = f1.clamp(min=self.epsilon, max=1-self.epsilon)
33
-
34
- # unweighted mean F1 loss
35
- if self.weight is None:
36
- return 1 - f1.mean()
37
-
38
- # weighted mean F1 loss
39
- wm = torch.sum(f1 * self.weight) / torch.sum(self.weight)
40
- return 1 - wm