lingualabpy 0.0.3__py3-none-any.whl → 0.0.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
lingualabpy/__init__.py CHANGED
@@ -4,12 +4,17 @@
4
4
  """lingualabpy"""
5
5
  from __future__ import annotations
6
6
 
7
- __version__ = "0.0.3"
7
+ __version__ = "0.0.5"
8
8
 
9
9
  default_config = {
10
10
  "participant_col": "participant_id",
11
11
  "participant_label": "IE",
12
12
  "clinician_label": "IV",
13
+ "f0_bounds": {
14
+ "female": [100.0, 600.0],
15
+ "male": [75.0, 300.0],
16
+ },
17
+ "unit_frequency": "Hertz",
13
18
  }
14
19
 
15
20
  from lingualabpy.io import read_audio, read_docx, read_json, write_json, read_textgrid
@@ -0,0 +1,85 @@
1
+ from collections import defaultdict
2
+ import numpy as np
3
+ from parselmouth import Sound
4
+ from parselmouth.praat import call
5
+
6
+ from lingualabpy.tools.data import UnchangeableDict
7
+
8
+
9
+ def measure_pitch(sound: Sound, f0min: str, f0max: str, unit: str) -> UnchangeableDict:
10
+ """
11
+ This function measures duration, pitch, HNR, jitter, and shimmer
12
+ This is the function to measure source acoustics using default male parameters.
13
+ """
14
+ # compute usefull praat object
15
+ pitch = call(sound, "To Pitch", 0.0, f0min, f0max)
16
+ harmonicity = call(sound, "To Harmonicity (cc)", 0.01, f0min, 0.1, 1.0)
17
+ point_process = call(sound, "To PointProcess (periodic, cc)", f0min, f0max)
18
+
19
+ # metrics container
20
+ metrics = UnchangeableDict()
21
+
22
+ # Metrics computation
23
+ metrics["duration"] = call(sound, "Get total duration")
24
+ metrics["f0_mean"] = call(pitch, "Get mean", 0, 0, unit)
25
+ metrics["F0_std"] = call(pitch, "Get standard deviation", 0, 0, unit)
26
+ metrics["hnr"] = call(harmonicity, "Get mean", 0, 0)
27
+
28
+ # jitter
29
+ jitter_types = ["local", ["local", "absolute"], "rap", "ppq5", "ddp"]
30
+ for jitter_type in jitter_types:
31
+ if isinstance(jitter_type, list):
32
+ metric_name = f"jitter_{'_'.join(jitter_type)}"
33
+ praat_function = f"Get jitter ({', '.join(jitter_type)})"
34
+ else:
35
+ metric_name = f"jitter_{jitter_type}"
36
+ praat_function = f"Get jitter ({jitter_type})"
37
+ metrics[metric_name] = call(
38
+ point_process, praat_function, 0, 0, 0.0001, 0.02, 1.3
39
+ )
40
+
41
+ # shimmer
42
+ shimmer_types = ["local", "local_dB", "apq3", "apq5", "apq11", "dda"]
43
+ for shimmer_type in shimmer_types:
44
+ metric_name = f"shimmer_{shimmer_type}"
45
+ praat_function = f"Get shimmer ({shimmer_type})"
46
+ metrics[metric_name] = call(
47
+ [sound, point_process], praat_function, 0, 0, 0.0001, 0.02, 1.3, 1.6
48
+ )
49
+
50
+ return metrics
51
+
52
+
53
+ def measure_formants(
54
+ sound: Sound, f0min: str, f0max: str, unit: str
55
+ ) -> UnchangeableDict:
56
+ """
57
+ This function measures formants at each glottal pulse
58
+
59
+ Puts, D. A., Apicella, C. L., & Cárdenas, R. A. (2012). Masculine voices signal men's threat potential in forager and industrial societies. Proceedings of the Royal Society of London B: Biological Sciences, 279(1728), 601-609.
60
+
61
+ Adapted from: DOI 10.17605/OSF.IO/K2BHS
62
+ """
63
+ # compute usefull praat object
64
+ point_process = call(sound, "To PointProcess (periodic, cc)", f0min, f0max)
65
+ formants = call(sound, "To Formant (burg)", 0.0025, 5, 5000, 0.025, 50)
66
+ number_of_points = call(point_process, "Get number of points")
67
+
68
+ # metrics container
69
+ metrics = UnchangeableDict()
70
+
71
+ # Measure formants only at glottal pulses
72
+ formants_list = defaultdict(list)
73
+ for index in range(1, number_of_points + 1):
74
+ time = call(point_process, "Get time from index", index)
75
+ for pulse in [1, 2, 3, 4]:
76
+ value = call(formants, "Get value at time", pulse, time, unit, "Linear")
77
+ if str(value) != "nan":
78
+ formants_list[pulse].append(value)
79
+
80
+ # calculate mean and median formants across pulses, median is what is used in all subsequent calculations
81
+ for pulse in [1, 2, 3, 4]:
82
+ metrics[f"formants_{pulse}_mean"] = np.mean(formants_list[pulse])
83
+ metrics[f"formants_{pulse}_median"] = np.median(formants_list[pulse])
84
+
85
+ return metrics
@@ -0,0 +1,59 @@
1
+ import click
2
+ from parselmouth import Sound
3
+ from pathlib import Path
4
+
5
+ from lingualabpy import default_config, write_json
6
+ from lingualabpy.audio.metrics import measure_pitch, measure_formants
7
+
8
+
9
+ @click.command()
10
+ @click.option(
11
+ "--sex",
12
+ type=click.Choice(["female", "male"]),
13
+ help=f"Set f0min and f0max for praat analysis. {default_config['f0_bounds']}",
14
+ )
15
+ @click.option(
16
+ "--f0min",
17
+ type=float,
18
+ help="Define f0min for praat analysis. Not required if sex is specify",
19
+ )
20
+ @click.option(
21
+ "--f0max",
22
+ type=float,
23
+ help="Define f0max for praat analysis. Not required if sex is specify",
24
+ )
25
+ @click.option(
26
+ "--unit_frequency",
27
+ default=default_config["unit_frequency"],
28
+ show_default=True,
29
+ )
30
+ @click.option("--participant_id", "-p", default=None, help="")
31
+ @click.option("--output_json", default=None, help="")
32
+ @click.argument("audiofile", nargs=1, type=click.Path(exists=True))
33
+ def main(sex, f0min, f0max, unit_frequency, participant_id, output_json, audiofile):
34
+ """Doc"""
35
+ if sex:
36
+ f0min, f0max = default_config["f0_bounds"][sex]
37
+ else:
38
+ if not f0min or not f0max:
39
+ raise click.UsageError(
40
+ "'--f0min' and '--f0max' are required if '--sex' is not specified"
41
+ )
42
+
43
+ sound = Sound(audiofile)
44
+ metrics = measure_pitch(sound, f0min, f0max, unit_frequency)
45
+ metrics.update(measure_formants(sound, f0min, f0max, unit_frequency))
46
+
47
+ audiofile_stem = Path(audiofile).stem
48
+
49
+ if participant_id:
50
+ metrics["participant_id"] = participant_id
51
+
52
+ audiofile = Path(audiofile)
53
+
54
+ metrics["filename"] = audiofile.name
55
+
56
+ if not output_json:
57
+ output_json = audiofile.stem + "_metric-audio.json"
58
+
59
+ write_json(dict(metrics), output_json)
@@ -3,7 +3,7 @@ import click
3
3
  from lingualabpy import default_config, read_audio, read_textgrid
4
4
  from lingualabpy.audio.triming import extract_audio
5
5
  from lingualabpy.text.textgrid import extract_intervals
6
- from lingualabpy.tools.interval import intervals_masking
6
+ from lingualabpy.tools.interval import intervals_masking, interval_to_list
7
7
 
8
8
 
9
9
  @click.command()
@@ -17,21 +17,32 @@ from lingualabpy.tools.interval import intervals_masking
17
17
  default=default_config["clinician_label"],
18
18
  show_default=True,
19
19
  )
20
+ @click.option("--remove_overlap", is_flag=True, show_default=True)
20
21
  @click.argument("textgrid", nargs=1, type=click.Path(exists=True))
21
22
  @click.argument("audiofile", nargs=1, type=click.Path(exists=True))
22
23
  @click.argument("output", nargs=1)
23
- def main(participant_label, clinician_label, textgrid, audiofile, output):
24
+ def main(
25
+ participant_label, clinician_label, remove_overlap, textgrid, audiofile, output
26
+ ):
24
27
  """Doc"""
25
28
  grid = read_textgrid(textgrid)
26
- participant_intervals, clinician_intervals = extract_intervals(
27
- grid, [participant_label, clinician_label]
28
- )
29
- participant_intervals_clean = intervals_masking(
30
- participant_intervals, clinician_intervals
31
- )
29
+
30
+ try:
31
+ participant_intervals, clinician_intervals = extract_intervals(
32
+ grid, [participant_label, clinician_label]
33
+ )
34
+ except Exception as e:
35
+ raise Exception(f"Failed to extract intervals for {textgrid}", repr(e))
36
+
37
+ if remove_overlap:
38
+ participant_intervals = intervals_masking(
39
+ participant_intervals, clinician_intervals
40
+ )
41
+ else:
42
+ participant_intervals = map(interval_to_list, participant_intervals)
32
43
 
33
44
  audio = read_audio(audiofile)
34
45
 
35
- audio_clean = extract_audio(audio, participant_intervals_clean)
46
+ audio_clean = extract_audio(audio, participant_intervals)
36
47
 
37
- audio_clean.export(output)
48
+ audio_clean.export(output, format="wav")
lingualabpy/io.py CHANGED
@@ -33,10 +33,17 @@ def read_json(json_path: str) -> Union[list, dict]:
33
33
  def write_json(data: Union[list, dict], json_path: str) -> None:
34
34
  """"""
35
35
  with open(json_path, "w") as file:
36
- json.dump(data, file)
36
+ json.dump(data, file, indent=4)
37
37
 
38
38
 
39
39
  # .TextGrid files
40
40
  def read_textgrid(textgrid_path: str) -> TextGrid:
41
41
  """"""
42
- return TextGrid(textgrid_path)
42
+ textgrid = TextGrid(textgrid_path)
43
+ # Cleaning of the interval text
44
+ for intervals in textgrid.values():
45
+ for interval in intervals:
46
+ interval.text = (
47
+ interval.text.encode().decode("unicode_escape").strip(" \n\r\t")
48
+ )
49
+ return textgrid
@@ -0,0 +1,536 @@
1
+ # PRAAT SCRIPT FILLED PAUSES
2
+ # Set Syllable Boundaries at -6 dB around points in the tier "Nuclei" (as set by the
3
+ # script "SyllableNucleiv3.praat"), compute a number of global (speaker specific) and
4
+ # local (syllable specific) parameters for automatic detection of Filled Pauses.
5
+ #
6
+ # Optionally, save the local parameters for all syllables in a table.
7
+ #
8
+ # J J A Pacilly, 1-nov-2019, for Nivja de Jong, on behalf of:
9
+ # British Council, Aptis Research Grants
10
+ # J J A Pacilly, 14-feb-2020, retain max. similarity with version for internal use
11
+ #
12
+ # Note that this script is used by "SyllableNucleiv3.Praat", but it can also
13
+ # be used as a standalone script with a selected Sound and Textgrid object
14
+ # as long as this TextGrid contains a pointTier with the name "Nuclei".
15
+ #
16
+ # The settings and thresholds in this script are trained and tested on
17
+ # Dutch and English L2 data. See [LINK?] the ARAG report by De Jong and
18
+ # Pacilly (2019) for more information.
19
+ #
20
+ # Copyright (C) 2019 - J J A Pacilly & N H de Jong, LUCL - Universiteit Leiden
21
+ #
22
+ # This program is free software: you can redistribute it and/or modify
23
+ # it under the terms of the GNU General Public License as published by
24
+ # the Free Software Foundation, either version 3 of the License, or
25
+ # (at your option) any later version.
26
+ #
27
+ # This program is distributed in the hope that it will be useful,
28
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
29
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
30
+ # See the GNU General Public License for more details.
31
+ #
32
+ # You should have received a copy of the GNU General Public License
33
+ # along with this program. If not, see http://www.gnu.org/licenses/
34
+
35
+ form Detect Filled Pauses
36
+ optionmenu Language 1
37
+ option English
38
+ # option Mandarin (not yet implemented)
39
+ # option Spanish (not yet implemented)
40
+ option Dutch
41
+ real Filled_Pause_threshold 1.00 ; cut-off higher/lower
42
+ boolean Save_Table 0
43
+ endform
44
+
45
+ idSnd = selected ("Sound")
46
+ name$ = selected$("Sound")
47
+ idTG = selected ("TextGrid")
48
+
49
+ @setSB: idSnd, idTG ; set/replace tier(s) and define vectors for initial analysis
50
+
51
+ if nrSyllables
52
+ @doGlobalAnalyses: idSnd ; do global analysis of ALL syllables identified by tier Nuclei
53
+ @sdF0: idSnd ; fills the arrays dF0[], dqF0[] and sdF0[]
54
+ @replaceUndefinedF0: 0 ; replace Undefined values by mean
55
+ @sdFmt: idSnd ; fills the arrays dF1-3[], dqF1-3[] and sdF1-3[]
56
+ @processData: idTG, name$, "Auto" ; create Auto table, set labels
57
+ idTableAuto = processData.idTable
58
+ endif
59
+
60
+ selectObject: idSnd, idTG
61
+ if idTableAuto
62
+ plusObject: idTableAuto
63
+ endif
64
+
65
+
66
+ procedure setSB: .idSnd, .idTG
67
+
68
+ # For testing, allow successive runs of this script
69
+
70
+ selectObject: .idTG
71
+ .tierNuclei = 0
72
+ .tierPhrases = 0
73
+ .tierAuto = 0
74
+ .nrTiers = Get number of tiers
75
+ for .tier to .nrTiers
76
+ .name$ = Get tier name: .tier
77
+ if .name$ == "Nuclei"
78
+ .tierNuclei = .tier
79
+ elif .name$ == "Phrases"
80
+ .tierPhrases = .tier
81
+ elif .name$ == "DFauto" + " ('language$')"
82
+ Remove tier: .tier
83
+ Insert interval tier: .tier, "DFauto" + " ('language$')"
84
+ .tierAuto = .tier
85
+ elif left$(.name$, 5) == "DFman" or left$(.name$, 5) == "dfMan"
86
+ .tierMan = .tier
87
+ endif
88
+ endfor
89
+
90
+ if .tierNuclei == 0
91
+ exitScript: "No tier ""Nuclei"" found, please run ""SyllableNuclei.praat"" first."
92
+ endif
93
+
94
+ nrSyllables = Get number of points: .tierNuclei
95
+ d# = zero#(nrSyllables)
96
+
97
+ tNuc[0] = Get start time
98
+ tNuc[nrSyllables+1] = Get end time
99
+ for syllable to nrSyllables
100
+ tNuc[syllable] = Get time of point: .tierNuclei, syllable
101
+ if .tierPhrases
102
+ .iPhrase = Get interval at time: .tierPhrases, tNuc[syllable]
103
+ .tFromPhrase[syllable] = Get start time of interval: .tierPhrases, .iPhrase
104
+ .tToPhrase[syllable] = Get end time of interval: .tierPhrases, .iPhrase
105
+ endif
106
+ endfor
107
+
108
+ # Get minimum Intensity *between* Nuclei
109
+
110
+ selectObject: .idSnd
111
+ .idInt = To Intensity: 100, 0, "yes"
112
+ nrFrames = Get number of frames
113
+ for syllable to nrSyllables+1
114
+ tSBMin[syllable] = Get time of minimum: tNuc[syllable-1], tNuc[syllable], "Parabolic"
115
+ endfor
116
+ tMeanSyllable = (tSBMin[nrSyllables+1] - tSBMin[1]) / nrSyllables
117
+
118
+ # Find -6 dB *around* Nuclei but avoid that these boundaries cross
119
+ # the 'minimum Intensity boundaries' (yields twice as much intervals)
120
+
121
+ for syllable to nrSyllables
122
+ frNuc = Get frame number from time: tNuc[syllable]
123
+ frNuc = round(frNuc)
124
+ dBNuc = Get value in frame: frNuc
125
+
126
+ frFrom = frNuc
127
+ repeat
128
+ frFrom -= 1
129
+ dBL = Get value in frame: frFrom
130
+ tL = Get time from frame number: frFrom
131
+ until dBL < dBNuc - 6 or tL < tSBMin[syllable] or frFrom < 2
132
+ tFrom[syllable] = Get time from frame number: frFrom
133
+
134
+ frTo = frNuc
135
+ repeat
136
+ frTo += 1
137
+ dBR = Get value in frame: frTo
138
+ tR = Get time from frame number: frTo
139
+ until dBR < dBNuc - 6 or tR > tSBMin[syllable+1] or frTo > nrFrames-1
140
+ tTo[syllable] = Get time from frame number: frTo
141
+
142
+ d6Org[syllable] = tTo[syllable] - tFrom[syllable]
143
+
144
+ if .tierPhrases
145
+ tFrom[syllable] = max(.tFromPhrase[syllable], tFrom[syllable])
146
+ tTo [syllable] = min(.tToPhrase [syllable], tTo [syllable])
147
+ endif
148
+ endfor
149
+
150
+ # only the boundaries *around* Nuclei are being used
151
+
152
+ selectObject: .idTG
153
+ if not .tierAuto
154
+ Insert interval tier: .nrTiers + 1, "DFauto" + " ('language$')"
155
+ .tierAuto = .nrTiers + 1
156
+ endif
157
+
158
+ for syllable to nrSyllables
159
+ # Insert boundary: .tierMin, tSBMin[syllable]
160
+
161
+ if tFrom[syllable] > tSBMin[syllable]
162
+ Insert boundary: .tierAuto, tFrom[syllable]
163
+ ts[syllable] = tFrom[syllable]
164
+ else
165
+ Insert boundary: .tierAuto, tSBMin[syllable]+0.00005
166
+ ts[syllable] = tSBMin[syllable]
167
+ endif
168
+ if tTo[syllable] < tSBMin[syllable+1]
169
+ Insert boundary: .tierAuto, tTo[syllable]
170
+ te[syllable] = tTo[syllable]
171
+ else
172
+ Insert boundary: .tierAuto, tSBMin[syllable+1]-0.00005
173
+ te[syllable] = tSBMin[syllable+1]
174
+ endif
175
+
176
+ d [syllable] = te[syllable] - ts[syllable]
177
+ d#[syllable] = te[syllable] - ts[syllable]
178
+
179
+ endfor
180
+ # Insert boundary: .tierMin, tSBMin[nrSyllables+1]
181
+ removeObject: .idInt
182
+ endproc
183
+
184
+ procedure doGlobalAnalyses: .idSnd
185
+
186
+ # Concatenate all Syllables
187
+
188
+ .id# = zero#(nrSyllables)
189
+ for syllable to nrSyllables
190
+ selectObject: .idSnd
191
+ .id#[syllable] = Extract part: ts[syllable], te[syllable], "rectangular", 1, "no"
192
+ endfor
193
+ selectObject: .id#
194
+ .idSndTmp = Concatenate with overlap: 0.01
195
+ removeObject: .id#
196
+
197
+ # Perform Initial Global Pitch analysis to determine Global Quantile
198
+ selectObject: .idSndTmp
199
+ .idPTmpInit = noprogress To Pitch (ac): 0.02, 30, 4, "no", 0.03, 0.25, 0.01, 0.35, 0.25, 450
200
+ .qGlobF0Init = Get quantile: 0, 0, 0.5, "Hertz"
201
+
202
+ # Perform Global Pitch and Formant analysis
203
+ selectObject: .idSndTmp
204
+ .idPTmp = noprogress To Pitch (ac): 0.02, 30, 4, "no", 0.03, 0.25, 0.01, 0.35, 0.25, 2.5 * .qGlobF0Init
205
+ qGlobF0 = Get quantile: 0, 0, 0.5, "semitones re 100 Hz"
206
+
207
+ selectObject: .idSndTmp
208
+ .idFmtTmp = noprogress To Formant (burg): 0, 4, 4000 + 4 * (.qGlobF0Init - 100), 0.025, 50
209
+ qGlobF1 = Get quantile: 1, 0, 0, "bark", 0.5
210
+ qGlobF2 = Get quantile: 2, 0, 0, "bark", 0.5
211
+ qGlobF3 = Get quantile: 3, 0, 0, "bark", 0.5
212
+ # appendFileLine: "FilledPauses.txt", name$, tab$, "qGlobF0: (",
213
+ #.. fixed$(.qGlobF0Init, 1), "/", fixed$(qGlobF0, 1), ")"
214
+
215
+ removeObject: .idSndTmp, .idPTmpInit, .idPTmp, .idFmtTmp
216
+ endproc
217
+
218
+ procedure sdF0: .idSnd
219
+ selectObject: .idSnd
220
+
221
+ f0# = zero#(nrSyllables)
222
+ dF0# = zero#(nrSyllables)
223
+ dqF0# = zero#(nrSyllables)
224
+ sdF0# = zero#(nrSyllables)
225
+
226
+ .idF0 = noprogress To Pitch (ac): 0.02, 30, 4, "no", 0.03, 0.25, 0.01, 0.35, 0.25, 2.5 * doGlobalAnalyses.qGlobF0Init
227
+
228
+ for syllable to nrSyllables
229
+ q50F0 = Get quantile: ts[syllable], te[syllable], 0.50, "semitones re 100 Hz"
230
+ f0 [syllable] = q50F0
231
+ f0#[syllable] = f0 [syllable]
232
+
233
+ dF0 [syllable] = qGlobF0 - q50F0
234
+ dF0#[syllable] = dF0[syllable]
235
+
236
+ q95F0 = Get quantile: ts[syllable], te[syllable], 0.95, "semitones re 100 Hz"
237
+ q05F0 = Get quantile: ts[syllable], te[syllable], 0.05, "semitones re 100 Hz"
238
+ dqF0 [syllable] = q95F0 - q05F0
239
+ dqF0#[syllable] = dqF0 [syllable]
240
+
241
+ sdF0 [syllable] = Get standard deviation: ts[syllable], te[syllable], "semitones"
242
+ sdF0#[syllable] = sdF0 [syllable]
243
+ endfor
244
+ removeObject: .idF0
245
+ endproc
246
+
247
+ procedure replaceUndefinedF0: .dummy
248
+ # small amounts (< 10%) of undefined F0 values are replaced by MEAN without warning
249
+ nrUndef# = zero#(4)
250
+ total# = zero#(4)
251
+ for syllable to nrSyllables
252
+ if f0#[syllable] == undefined
253
+ nrUndef#[1] = nrUndef#[1] + 1
254
+ listUndefined[1, nrUndef#[1]] = syllable
255
+ else
256
+ total#[1] = total#[1] + f0#[syllable]
257
+ endif
258
+ if dF0#[syllable] == undefined
259
+ nrUndef#[2] = nrUndef#[2] + 1
260
+ listUndefined[2, nrUndef#[2]] = syllable
261
+ else
262
+ total#[2] = total#[2] + dF0#[syllable]
263
+ endif
264
+ if dqF0#[syllable] == undefined
265
+ nrUndef#[3] = nrUndef#[3] + 1
266
+ listUndefined[3, nrUndef#[3]] = syllable
267
+ else
268
+ total#[3] = total#[3] + dqF0#[syllable]
269
+ endif
270
+ if sdF0#[syllable] == undefined
271
+ nrUndef#[4] = nrUndef#[4] + 1
272
+ listUndefined[4, nrUndef#[4]] = syllable
273
+ else
274
+ total#[4] = total#[4] + sdF0#[syllable]
275
+ endif
276
+ endfor
277
+ mean__F0 = total#[1] / (nrSyllables - nrUndef#[1])
278
+ mean_dF0 = total#[2] / (nrSyllables - nrUndef#[2])
279
+ meandqF0 = total#[3] / (nrSyllables - nrUndef#[3])
280
+ meansdF0 = total#[4] / (nrSyllables - nrUndef#[4])
281
+ for syllable to nrUndef#[1]
282
+ f0#[listUndefined[1, syllable]] = mean__F0
283
+ if syllable == 1 and nrUndef#[1] > nrSyllables / 10
284
+ appendInfoLine: "Warning: replaced ", nrUndef#[1], "/'nrSyllables' F0 values by mean ('mean__F0:3') in 'name$'."
285
+ endif
286
+ endfor
287
+ for syllable to nrUndef#[2]
288
+ dF0#[listUndefined[2, syllable]] = mean_dF0
289
+ if syllable == 1 and nrUndef#[2] > nrSyllables / 10
290
+ appendInfoLine: "Warning: replaced ", nrUndef#[2], "/'nrSyllables' dF0 values by mean ('mean_dF0:3') in 'name$'."
291
+ endif
292
+ endfor
293
+ for syllable to nrUndef#[3]
294
+ dqF0#[listUndefined[3, syllable]] = meandqF0
295
+ if syllable == 1 and nrUndef#[3] > nrSyllables / 10
296
+ appendInfoLine: "Warning: replaced ", nrUndef#[3], "/'nrSyllables' dqF0 values by mean ('meandqF0:3') in 'name$'."
297
+ endif
298
+ endfor
299
+ for syllable to nrUndef#[4]
300
+ sdF0#[listUndefined[4, syllable]] = meansdF0
301
+ if syllable == 1 and nrUndef#[4] > nrSyllables / 10
302
+ appendInfoLine: "Warning: replaced ", nrUndef#[4], "/'nrSyllables' sdF0 values by mean ('meansdF0:3') in 'name$'."
303
+ endif
304
+ endfor
305
+ endproc
306
+
307
+ procedure sdFmt: .idSnd
308
+ selectObject: .idSnd
309
+ .idFmt = noprogress To Formant (burg): 0, 4, 4000 + 4 * (doGlobalAnalyses.qGlobF0Init - 100), 0.025, 50
310
+
311
+ f1# = zero#(nrSyllables)
312
+ f2# = zero#(nrSyllables)
313
+ f3# = zero#(nrSyllables)
314
+ dF1# = zero#(nrSyllables)
315
+ dF2# = zero#(nrSyllables)
316
+ dF3# = zero#(nrSyllables)
317
+ dqF1# = zero#(nrSyllables)
318
+ dqF2# = zero#(nrSyllables)
319
+ dqF3# = zero#(nrSyllables)
320
+ sdF1# = zero#(nrSyllables)
321
+ sdF2# = zero#(nrSyllables)
322
+ sdF3# = zero#(nrSyllables)
323
+
324
+ for syllable to nrSyllables
325
+ fs = Get frame number from time: ts[syllable]
326
+ fs = round(fs)
327
+ if fs < 1 ; are these frame numbers reliable ?!?
328
+ fs = 1
329
+ endif
330
+ fe = Get frame number from time: te[syllable]
331
+ fe = round(fe)
332
+ f1 [syllable] = Get quantile: 1, ts[syllable], te[syllable], "bark", 0.5
333
+ f2 [syllable] = Get quantile: 2, ts[syllable], te[syllable], "bark", 0.5
334
+ f3 [syllable] = Get quantile: 3, ts[syllable], te[syllable], "bark", 0.5
335
+ f1#[syllable] = f1[syllable]
336
+ f2#[syllable] = f2[syllable]
337
+ f3#[syllable] = f3[syllable]
338
+
339
+ dF1[syllable] = 0
340
+ dF2[syllable] = 0
341
+ dF3[syllable] = 0
342
+ for frame from fs to fe
343
+ t = Get time from frame number: frame
344
+ lF1 = Get value at time: 1, t, "bark", "Linear"
345
+ lF2 = Get value at time: 2, t, "bark", "Linear"
346
+ lF3 = Get value at time: 3, t, "bark", "Linear"
347
+ if lF1 <> undefined
348
+ dF1[syllable] += abs(qGlobF1 - lF1)
349
+ endif
350
+ if lF2 <> undefined
351
+ dF2[syllable] += abs(qGlobF2 - lF2)
352
+ endif
353
+ if lF3 <> undefined
354
+ dF3[syllable] += abs(qGlobF3 - lF3)
355
+ endif
356
+ endfor
357
+ dF1 [syllable] /= (fe-fs+1)
358
+ dF2 [syllable] /= (fe-fs+1)
359
+ dF3 [syllable] /= (fe-fs+1)
360
+ dF1#[syllable] = dF1[syllable]
361
+ dF2#[syllable] = dF2[syllable]
362
+ dF3#[syllable] = dF3[syllable]
363
+
364
+ q95F1 = Get quantile: 1, ts[syllable], te[syllable], "bark", 0.95
365
+ q05F1 = Get quantile: 1, ts[syllable], te[syllable], "bark", 0.05
366
+ q95F2 = Get quantile: 2, ts[syllable], te[syllable], "bark", 0.95
367
+ q05F2 = Get quantile: 2, ts[syllable], te[syllable], "bark", 0.05
368
+ q95F3 = Get quantile: 3, ts[syllable], te[syllable], "bark", 0.95
369
+ q05F3 = Get quantile: 3, ts[syllable], te[syllable], "bark", 0.05
370
+ dqF1 [syllable] = q95F1 - q05F1
371
+ dqF2 [syllable] = q95F2 - q05F2
372
+ dqF3 [syllable] = q95F3 - q05F3
373
+ dqF1#[syllable] = dqF1[syllable]
374
+ dqF2#[syllable] = dqF2[syllable]
375
+ dqF3#[syllable] = dqF3[syllable]
376
+
377
+ sdF1 [syllable] = Get standard deviation: 1, ts[syllable], te[syllable], "bark"
378
+ sdF2 [syllable] = Get standard deviation: 2, ts[syllable], te[syllable], "bark"
379
+ sdF3 [syllable] = Get standard deviation: 3, ts[syllable], te[syllable], "bark"
380
+ sdF1#[syllable] = sdF1[syllable]
381
+ sdF2#[syllable] = sdF2[syllable]
382
+ sdF3#[syllable] = sdF3[syllable]
383
+ endfor
384
+ removeObject: .idFmt
385
+ endproc
386
+
387
+ procedure processData: .idTG, .name$, .type$
388
+
389
+ if save_Table
390
+ .idTable = Create Table with column names: .name$, 0, "type ts dur durz F0 F0z F1 F1z F2 F2z F3 F3z dF0 dF0z dF1 dF1z dF2 dF2z dF3 dF3z
391
+ ... dqF0 dqF0z dqF1 dqF1z dqF2 dqF2z dqF3 dqF3z sdF0 sdF0z sdF1 sdF1z sdF2 sdF2z sdF3 sdF3z score"
392
+ else
393
+ .idTable = 0
394
+ endif
395
+
396
+ if .type$ == "Auto"
397
+ mean_d = mean( d#)
398
+ sd_d = stdev( d#)
399
+ mean_F0 = mean( f0#)
400
+ sd_F0 = stdev( f0#)
401
+ mean_F1 = mean( f1#)
402
+ sd_F1 = stdev( f1#)
403
+ mean_F2 = mean( f2#)
404
+ sd_F2 = stdev( f2#)
405
+ mean_F3 = mean( f3#)
406
+ sd_F3 = stdev( f3#)
407
+ mean_dF0 = mean( dF0#)
408
+ sd_dF0 = stdev( dF0#)
409
+ mean_dF1 = mean( dF1#)
410
+ sd_dF1 = stdev( dF1#)
411
+ mean_dF2 = mean( dF2#)
412
+ sd_dF2 = stdev( dF2#)
413
+ mean_dF3 = mean( dF3#)
414
+ sd_dF3 = stdev( dF3#)
415
+ mean_dqF0 = mean(dqF0#)
416
+ sd_dqF0 = stdev(dqF0#)
417
+ mean_dqF1 = mean(dqF1#)
418
+ sd_dqF1 = stdev(dqF1#)
419
+ mean_dqF2 = mean(dqF2#)
420
+ sd_dqF2 = stdev(dqF2#)
421
+ mean_dqF3 = mean(dqF3#)
422
+ sd_dqF3 = stdev(dqF3#)
423
+ mean_sdF0 = mean(sdF0#)
424
+ sd_sdF0 = stdev(sdF0#)
425
+ mean_sdF1 = mean(sdF1#)
426
+ sd_sdF1 = stdev(sdF1#)
427
+ mean_sdF2 = mean(sdF2#)
428
+ sd_sdF2 = stdev(sdF2#)
429
+ mean_sdF3 = mean(sdF3#)
430
+ sd_sdF3 = stdev(sdF3#)
431
+ endif
432
+
433
+ # z-transform data
434
+
435
+ dz# = ( d# - mean_d ) / sd_d
436
+ f0z# = ( f0# - mean_F0 ) / sd_F0
437
+ f1z# = ( f1# - mean_F1 ) / sd_F1
438
+ f2z# = ( f2# - mean_F2 ) / sd_F2
439
+ f3z# = ( f3# - mean_F3 ) / sd_F3
440
+ dF0z# = ( dF0# - mean_dF0 ) / sd_dF0
441
+ dF1z# = ( dF1# - mean_dF1 ) / sd_dF1
442
+ dF2z# = ( dF2# - mean_dF2 ) / sd_dF2
443
+ dF3z# = ( dF3# - mean_dF3 ) / sd_dF3
444
+ dqF0z# = (dqF0# - mean_dqF0) / sd_dqF0
445
+ dqF1z# = (dqF1# - mean_dqF1) / sd_dqF1
446
+ dqF2z# = (dqF2# - mean_dqF2) / sd_dqF2
447
+ dqF3z# = (dqF3# - mean_dqF3) / sd_dqF3
448
+ sdF0z# = (sdF0# - mean_sdF0) / sd_sdF0
449
+ sdF1z# = (sdF1# - mean_sdF1) / sd_sdF1
450
+ sdF2z# = (sdF2# - mean_sdF2) / sd_sdF2
451
+ sdF3z# = (sdF3# - mean_sdF3) / sd_sdF3
452
+
453
+ for syllable to nrSyllables
454
+ selectObject: .idTG
455
+
456
+ scoreUK = 4.73 * sqrt( d[syllable]) - 0.29 * f0z#[syllable]
457
+ ... - 0.32 * sqrt(sdF1[syllable]) - 0.10 * sqrt(dF1[syllable])
458
+ ... - 1.38 * sqrt(sdF2[syllable]) - 0.80 * sqrt(dF2[syllable])
459
+ ... - 0.20 * (f2[syllable] - f1[syllable])
460
+ ... + 0.31 * f3[syllable]
461
+
462
+ scoreNL = 8.62 * sqrt( d[syllable]) - 0.36 * f0z#[syllable]
463
+ ... - 0.72 * sqrt(dF1[syllable])
464
+ ... - 1.36 * sqrt(sdF2[syllable]) - 1.62 * sqrt(dF2[syllable])
465
+ ... - 1.02 * sqrt(sdF3[syllable])
466
+ ... - 0.11 * (f2[syllable] - f1[syllable])
467
+ ... + 0.21 * f3[syllable]
468
+
469
+ lbl2$ = Get label of interval: setSB.tierAuto, 2*syllable
470
+
471
+ if language$ == "English"
472
+ score = scoreUK
473
+ if score > 3.4942 * filled_Pause_threshold
474
+ lbl2$ += "fp"
475
+ endif
476
+ elif language$ == "Dutch"
477
+ score = scoreNL
478
+ if score > 2.7094 * filled_Pause_threshold
479
+ lbl2$ += "fp"
480
+ endif
481
+ else
482
+ exitScript: "Language not supported."
483
+ endif
484
+
485
+ if .type$ == "Auto"
486
+ type$ [syllable] = lbl2$
487
+ Set interval text: setSB.tierAuto, 2*syllable, lbl2$
488
+ # Set interval text: setSB.tierAuto, 2*syllable, fixed$(score, 3)
489
+ endif
490
+
491
+ if save_Table
492
+ selectObject: .idTable
493
+ Append row
494
+ row = Get number of rows
495
+
496
+ Set string value: row, "type" , type$ [syllable]
497
+ Set string value: row, "ts", fixed$(ts [syllable], 3)
498
+ Set string value: row, "dur" , fixed$( d [syllable], 3)
499
+ Set string value: row, "durz", fixed$( dz# [syllable], 3)
500
+ Set string value: row, "F0" , fixed$( f0 [syllable], 3)
501
+ Set string value: row, "F0z", fixed$( f0z#[syllable], 3)
502
+ Set string value: row, "F1" , fixed$( f1 [syllable], 3)
503
+ Set string value: row, "F1z", fixed$( f1z#[syllable], 3)
504
+ Set string value: row, "F2" , fixed$( f2 [syllable], 3)
505
+ Set string value: row, "F2z", fixed$( f2z#[syllable], 3)
506
+ Set string value: row, "F3" , fixed$( f3 [syllable], 3)
507
+ Set string value: row, "F3z", fixed$( f3z#[syllable], 3)
508
+ Set string value: row, "dF0" , fixed$( dF0 [syllable], 3)
509
+ Set string value: row, "dF0z", fixed$( dF0z#[syllable], 3)
510
+ Set string value: row, "dF1" , fixed$( dF1 [syllable], 3)
511
+ Set string value: row, "dF1z", fixed$( dF1z#[syllable], 3)
512
+ Set string value: row, "dF2" , fixed$( dF2 [syllable], 3)
513
+ Set string value: row, "dF2z", fixed$( dF2z#[syllable], 3)
514
+ Set string value: row, "dF3" , fixed$( dF3 [syllable], 3)
515
+ Set string value: row, "dF3z", fixed$( dF3z#[syllable], 3)
516
+ Set string value: row, "dqF0" , fixed$(dqF0 [syllable], 3)
517
+ Set string value: row, "dqF0z", fixed$(dqF0z#[syllable], 3)
518
+ Set string value: row, "dqF1" , fixed$(dqF1 [syllable], 3)
519
+ Set string value: row, "dqF1z", fixed$(dqF1z#[syllable], 3)
520
+ Set string value: row, "dqF2" , fixed$(dqF2 [syllable], 3)
521
+ Set string value: row, "dqF2z", fixed$(dqF2z#[syllable], 3)
522
+ Set string value: row, "dqF3" , fixed$(dqF3 [syllable], 3)
523
+ Set string value: row, "dqF3z", fixed$(dqF3z#[syllable], 3)
524
+ Set string value: row, "sdF0" , fixed$(sdF0 [syllable], 3)
525
+ Set string value: row, "sdF0z", fixed$(sdF0z#[syllable], 3)
526
+ Set string value: row, "sdF1" , fixed$(sdF1 [syllable], 3)
527
+ Set string value: row, "sdF1z", fixed$(sdF1z#[syllable], 3)
528
+ Set string value: row, "sdF2" , fixed$(sdF2 [syllable], 3)
529
+ Set string value: row, "sdF2z", fixed$(sdF2z#[syllable], 3)
530
+ Set string value: row, "sdF3" , fixed$(sdF3 [syllable], 3)
531
+ Set string value: row, "sdF3z", fixed$(sdF3z#[syllable], 3)
532
+ Set string value: row, "score", fixed$(score, 3)
533
+ endif
534
+ endfor
535
+ endproc
536
+
@@ -23,10 +23,6 @@ def extract_intervals(textgrid: TextGrid, speakers: list[str]) -> list[list[Inte
23
23
  for speaker in speakers:
24
24
  speaker_intervals = []
25
25
  for interval in textgrid[speaker]:
26
- # Cleaning of the interval text
27
- interval.text = (
28
- interval.text.encode().decode("unicode_escape").strip(" \n\r\t")
29
- )
30
26
  if interval.text:
31
27
  speaker_intervals.append(interval)
32
28
  speakers_intervals.append(speaker_intervals)
lingualabpy/tools/data.py CHANGED
@@ -1,8 +1,20 @@
1
+ from collections import UserDict
1
2
  from pandas import DataFrame
2
3
 
3
4
  from typing import Any, Dict, List
4
5
 
5
6
 
7
+ class UnchangeableDict(UserDict):
8
+ """A dictionary in which you can add new keys but not modify them in the future."""
9
+
10
+ def __setitem__(self, key: Any, item: Any) -> None:
11
+ try:
12
+ self.__getitem__(key)
13
+ raise ValueError("duplicate key '{}' found".format(key))
14
+ except KeyError:
15
+ return super().__setitem__(key, item)
16
+
17
+
6
18
  def merge_participants_to_df(
7
19
  data_participants: List[Dict[Any, Any]],
8
20
  participant_col: str,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: lingualabpy
3
- Version: 0.0.3
3
+ Version: 0.0.5
4
4
  Summary: Tools and utilities from the LINGUA laboratory
5
5
  Author-email: Christophe Bedetti <christophe.bedetti@umontreal.ca>
6
6
  Requires-Python: >=3.8.1
@@ -0,0 +1,23 @@
1
+ lingualabpy/__init__.py,sha256=_oiqa1RV-lhkXP60vELtzeIEbRJWn9DagQRqNSDzl-s,642
2
+ lingualabpy/io.py,sha256=r3Y781XKAea_T3V2L8aJlxoOEQcYCsdd7c-f-8NcEeI,1155
3
+ lingualabpy/audio/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
+ lingualabpy/audio/metrics.py,sha256=u5FlADmqeYQOSpqhY2l1l8CSC4tfBV6cWp3g6Hri6bE,3502
5
+ lingualabpy/audio/triming.py,sha256=6CY9pH43KFGAPj8Nw34y1YnlOb8gxGLU1btcuRy-Hgc,288
6
+ lingualabpy/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
+ lingualabpy/cli/audio_metrics.py,sha256=N_kIKmro5bjFEKjHPNfmuY4G5TXQB9qegbc55pUch6Q,1803
8
+ lingualabpy/cli/audio_triming.py,sha256=H62FTRmwhha-qAXoZ450TNNf_3sHg69GATmbMHQBoCM,1566
9
+ lingualabpy/cli/docx2json.py,sha256=Bj5f89B76NtA7Xx71xXGnSucrDEyaH9mUFifQo0wfn4,590
10
+ lingualabpy/cli/jsons2csv.py,sha256=_AcIXiQUCF5SsKqMg6WjTr8fhbuflaJNFrCP91ccSYs,596
11
+ lingualabpy/resources/FilledPauses.praat,sha256=8tY0tAcG71KwnNhawgqyZ7vT__Udf5mSaVutH1eetd8,19623
12
+ lingualabpy/resources/syllablenucleiv3.praat,sha256=aZYRGB2iLGpYxL3ma-UMutQEyP_NvbbbF1yKhGvVcsA,35818
13
+ lingualabpy/text/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
+ lingualabpy/text/parser.py,sha256=qZqhzi-6UHdbsXEWi5IMxsDK5Tsosb3pdSo67hcA6To,913
15
+ lingualabpy/text/textgrid.py,sha256=nTVj3UOCJRsdybxAW0M2jlDq0eMMRYSSo6GdXYkH2wU,1459
16
+ lingualabpy/tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
17
+ lingualabpy/tools/data.py,sha256=FU0_3TaeAZNCu1WpNIOkVBV3bYiEhI1rPw_l8q8z0gk,1523
18
+ lingualabpy/tools/interval.py,sha256=50lzbMTNHF26mPRG50mykCUQE3pdyRjPWMwsskwy0tg,2060
19
+ lingualabpy-0.0.5.dist-info/entry_points.txt,sha256=IXEsa7Cgqjph5bkKSBMXZIBVP4ocrRaSh13dFPBwBmE,247
20
+ lingualabpy-0.0.5.dist-info/LICENSE,sha256=s3hbMsmwGq2XFcxpMD3oHc8GSUeXAmPVXJbn7SYXdos,1095
21
+ lingualabpy-0.0.5.dist-info/WHEEL,sha256=EZbGkh7Ie4PoZfRQ8I0ZuP9VklN_TvcZ6DSE5Uar4z4,81
22
+ lingualabpy-0.0.5.dist-info/METADATA,sha256=ocw3j6BfTe0NSSKORBs-JjU5KacofzOi81fYRbZ47F4,1703
23
+ lingualabpy-0.0.5.dist-info/RECORD,,
@@ -1,4 +1,5 @@
1
1
  [console_scripts]
2
+ lingualabpy_audio_metrics=lingualabpy.cli.audio_metrics:main
2
3
  lingualabpy_audio_triming=lingualabpy.cli.audio_triming:main
3
4
  lingualabpy_docx2json=lingualabpy.cli.docx2json:main
4
5
  lingualabpy_jsons2csv=lingualabpy.cli.jsons2csv:main
@@ -1,19 +0,0 @@
1
- lingualabpy/__init__.py,sha256=Klpz9mrtYXzZ3eSXg7ciwak9mAkgKVC5G1w0uaMn7Q8,515
2
- lingualabpy/io.py,sha256=TF8eSuX_xfGWWbQ1C0TLnia7HS1Vexn0RqKMvCzHGnE,878
3
- lingualabpy/audio/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
- lingualabpy/audio/triming.py,sha256=6CY9pH43KFGAPj8Nw34y1YnlOb8gxGLU1btcuRy-Hgc,288
5
- lingualabpy/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
- lingualabpy/cli/audio_triming.py,sha256=pAsLv2IuAKLoj8jBHB-SR5mZ7Jb0w26m41-Cya4VvoU,1194
7
- lingualabpy/cli/docx2json.py,sha256=Bj5f89B76NtA7Xx71xXGnSucrDEyaH9mUFifQo0wfn4,590
8
- lingualabpy/cli/jsons2csv.py,sha256=_AcIXiQUCF5SsKqMg6WjTr8fhbuflaJNFrCP91ccSYs,596
9
- lingualabpy/text/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
- lingualabpy/text/parser.py,sha256=qZqhzi-6UHdbsXEWi5IMxsDK5Tsosb3pdSo67hcA6To,913
11
- lingualabpy/text/textgrid.py,sha256=LXdDAY4aEl3Q998Uq28fz0gryFj3KWq1j0RsuWOlEC0,1632
12
- lingualabpy/tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
13
- lingualabpy/tools/data.py,sha256=FTjxbckza65vZ_MWEO5wi4mDXpJ2u9KkiEA3-HGfOt8,1106
14
- lingualabpy/tools/interval.py,sha256=50lzbMTNHF26mPRG50mykCUQE3pdyRjPWMwsskwy0tg,2060
15
- lingualabpy-0.0.3.dist-info/entry_points.txt,sha256=QvnRy1hJXRGGbVQgS-u--5Rgs7rPBmgWC9K1iaxS5gQ,186
16
- lingualabpy-0.0.3.dist-info/LICENSE,sha256=s3hbMsmwGq2XFcxpMD3oHc8GSUeXAmPVXJbn7SYXdos,1095
17
- lingualabpy-0.0.3.dist-info/WHEEL,sha256=EZbGkh7Ie4PoZfRQ8I0ZuP9VklN_TvcZ6DSE5Uar4z4,81
18
- lingualabpy-0.0.3.dist-info/METADATA,sha256=JI1jTk5UA5CeLCcLO6HCC157WmeUW0aUTm0hKZaEXm8,1703
19
- lingualabpy-0.0.3.dist-info/RECORD,,