lingualabpy 0.0.4__tar.gz → 0.0.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (23) hide show
  1. {lingualabpy-0.0.4 → lingualabpy-0.0.5}/PKG-INFO +1 -1
  2. lingualabpy-0.0.5/pyproject.toml +55 -0
  3. {lingualabpy-0.0.4 → lingualabpy-0.0.5}/src/lingualabpy/__init__.py +1 -1
  4. {lingualabpy-0.0.4 → lingualabpy-0.0.5}/src/lingualabpy/cli/audio_metrics.py +5 -3
  5. {lingualabpy-0.0.4 → lingualabpy-0.0.5}/src/lingualabpy/cli/audio_triming.py +13 -7
  6. {lingualabpy-0.0.4 → lingualabpy-0.0.5}/src/lingualabpy/io.py +8 -1
  7. lingualabpy-0.0.5/src/lingualabpy/resources/FilledPauses.praat +536 -0
  8. lingualabpy-0.0.5/src/lingualabpy/resources/syllablenucleiv3.praat +0 -0
  9. {lingualabpy-0.0.4 → lingualabpy-0.0.5}/src/lingualabpy/text/textgrid.py +0 -4
  10. lingualabpy-0.0.4/pyproject.toml +0 -55
  11. {lingualabpy-0.0.4 → lingualabpy-0.0.5}/LICENSE +0 -0
  12. {lingualabpy-0.0.4 → lingualabpy-0.0.5}/README.md +0 -0
  13. {lingualabpy-0.0.4 → lingualabpy-0.0.5}/src/lingualabpy/audio/__init__.py +0 -0
  14. {lingualabpy-0.0.4 → lingualabpy-0.0.5}/src/lingualabpy/audio/metrics.py +0 -0
  15. {lingualabpy-0.0.4 → lingualabpy-0.0.5}/src/lingualabpy/audio/triming.py +0 -0
  16. {lingualabpy-0.0.4 → lingualabpy-0.0.5}/src/lingualabpy/cli/__init__.py +0 -0
  17. {lingualabpy-0.0.4 → lingualabpy-0.0.5}/src/lingualabpy/cli/docx2json.py +0 -0
  18. {lingualabpy-0.0.4 → lingualabpy-0.0.5}/src/lingualabpy/cli/jsons2csv.py +0 -0
  19. {lingualabpy-0.0.4 → lingualabpy-0.0.5}/src/lingualabpy/text/__init__.py +0 -0
  20. {lingualabpy-0.0.4 → lingualabpy-0.0.5}/src/lingualabpy/text/parser.py +0 -0
  21. {lingualabpy-0.0.4 → lingualabpy-0.0.5}/src/lingualabpy/tools/__init__.py +0 -0
  22. {lingualabpy-0.0.4 → lingualabpy-0.0.5}/src/lingualabpy/tools/data.py +0 -0
  23. {lingualabpy-0.0.4 → lingualabpy-0.0.5}/src/lingualabpy/tools/interval.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: lingualabpy
3
- Version: 0.0.4
3
+ Version: 0.0.5
4
4
  Summary: Tools and utilities from the LINGUA laboratory
5
5
  Author-email: Christophe Bedetti <christophe.bedetti@umontreal.ca>
6
6
  Requires-Python: >=3.8.1
@@ -0,0 +1,55 @@
1
+ [build-system]
2
+ requires = ["flit_core >=3.2,<4"]
3
+ build-backend = "flit_core.buildapi"
4
+
5
+ [project]
6
+ name = "lingualabpy"
7
+ authors = [
8
+ { name = "Christophe Bedetti", email = "christophe.bedetti@umontreal.ca" },
9
+ ]
10
+ license = { file = "LICENSE" }
11
+ description = "Tools and utilities from the LINGUA laboratory"
12
+ readme = "README.md"
13
+ classifiers = [
14
+ "Development Status :: 3 - Alpha",
15
+ "Intended Audience :: Developers",
16
+ "Intended Audience :: Science/Research",
17
+ "Topic :: Scientific/Engineering",
18
+ "License :: OSI Approved :: MIT License",
19
+ "Operating System :: POSIX :: Linux",
20
+ "Operating System :: MacOS :: MacOS X",
21
+ "Operating System :: Microsoft :: Windows",
22
+ "Programming Language :: Python :: 3 :: Only",
23
+ "Programming Language :: Python :: 3.8",
24
+ "Programming Language :: Python :: 3.9",
25
+ "Programming Language :: Python :: 3.10",
26
+ "Programming Language :: Python :: 3.11",
27
+ ]
28
+ requires-python = ">=3.8.1"
29
+ dynamic = ["version"]
30
+
31
+ dependencies = ["click", "pandas", "praat-parselmouth", "praat-textgrids", "pydub", "python-docx"]
32
+
33
+ [project.optional-dependencies]
34
+ test = ["pytest", "pytest-cov"]
35
+ doc = []
36
+ lint = ["black"]
37
+ feature = []
38
+ dev = ["lingualabpy[test, doc, lint, feature]"]
39
+
40
+ [project.scripts]
41
+ lingualabpy_audio_metrics = "lingualabpy.cli.audio_metrics:main"
42
+ lingualabpy_audio_triming = "lingualabpy.cli.audio_triming:main"
43
+ lingualabpy_docx2json = "lingualabpy.cli.docx2json:main"
44
+ lingualabpy_jsons2csv = "lingualabpy.cli.jsons2csv:main"
45
+
46
+ [project.urls]
47
+ Documentation = "https://github.com/lingualab/lingualabpy"
48
+ Source = "https://github.com/lingualab/lingualabpy"
49
+ Tracker = "https://github.com/lingualab/lingualabpy/issues"
50
+
51
+ [tool.flit.module]
52
+ name = "lingualabpy"
53
+
54
+ [tool.pytest.ini_options]
55
+ addopts = "--cov=src --cov-report=html"
@@ -4,7 +4,7 @@
4
4
  """lingualabpy"""
5
5
  from __future__ import annotations
6
6
 
7
- __version__ = "0.0.4"
7
+ __version__ = "0.0.5"
8
8
 
9
9
  default_config = {
10
10
  "participant_col": "participant_id",
@@ -48,10 +48,12 @@ def main(sex, f0min, f0max, unit_frequency, participant_id, output_json, audiofi
48
48
 
49
49
  if participant_id:
50
50
  metrics["participant_id"] = participant_id
51
- else:
52
- metrics["participant_id"] = audiofile_stem.split("_")[0]
51
+
52
+ audiofile = Path(audiofile)
53
+
54
+ metrics["filename"] = audiofile.name
53
55
 
54
56
  if not output_json:
55
- output_json = audiofile_stem + "_metric-audio.json"
57
+ output_json = audiofile.stem + "_metric-audio.json"
56
58
 
57
59
  write_json(dict(metrics), output_json)
@@ -3,7 +3,7 @@ import click
3
3
  from lingualabpy import default_config, read_audio, read_textgrid
4
4
  from lingualabpy.audio.triming import extract_audio
5
5
  from lingualabpy.text.textgrid import extract_intervals
6
- from lingualabpy.tools.interval import intervals_masking
6
+ from lingualabpy.tools.interval import intervals_masking, interval_to_list
7
7
 
8
8
 
9
9
  @click.command()
@@ -17,10 +17,13 @@ from lingualabpy.tools.interval import intervals_masking
17
17
  default=default_config["clinician_label"],
18
18
  show_default=True,
19
19
  )
20
+ @click.option("--remove_overlap", is_flag=True, show_default=True)
20
21
  @click.argument("textgrid", nargs=1, type=click.Path(exists=True))
21
22
  @click.argument("audiofile", nargs=1, type=click.Path(exists=True))
22
23
  @click.argument("output", nargs=1)
23
- def main(participant_label, clinician_label, textgrid, audiofile, output):
24
+ def main(
25
+ participant_label, clinician_label, remove_overlap, textgrid, audiofile, output
26
+ ):
24
27
  """Doc"""
25
28
  grid = read_textgrid(textgrid)
26
29
 
@@ -31,12 +34,15 @@ def main(participant_label, clinician_label, textgrid, audiofile, output):
31
34
  except Exception as e:
32
35
  raise Exception(f"Failed to extract intervals for {textgrid}", repr(e))
33
36
 
34
- participant_intervals_clean = intervals_masking(
35
- participant_intervals, clinician_intervals
36
- )
37
+ if remove_overlap:
38
+ participant_intervals = intervals_masking(
39
+ participant_intervals, clinician_intervals
40
+ )
41
+ else:
42
+ participant_intervals = map(interval_to_list, participant_intervals)
37
43
 
38
44
  audio = read_audio(audiofile)
39
45
 
40
- audio_clean = extract_audio(audio, participant_intervals_clean)
46
+ audio_clean = extract_audio(audio, participant_intervals)
41
47
 
42
- audio_clean.export(output)
48
+ audio_clean.export(output, format="wav")
@@ -39,4 +39,11 @@ def write_json(data: Union[list, dict], json_path: str) -> None:
39
39
  # .TextGrid files
40
40
  def read_textgrid(textgrid_path: str) -> TextGrid:
41
41
  """"""
42
- return TextGrid(textgrid_path)
42
+ textgrid = TextGrid(textgrid_path)
43
+ # Cleaning of the interval text
44
+ for intervals in textgrid.values():
45
+ for interval in intervals:
46
+ interval.text = (
47
+ interval.text.encode().decode("unicode_escape").strip(" \n\r\t")
48
+ )
49
+ return textgrid
@@ -0,0 +1,536 @@
1
+ # PRAAT SCRIPT FILLED PAUSES
2
+ # Set Syllable Boundaries at -6 dB around points in the tier "Nuclei" (as set by the
3
+ # script "SyllableNucleiv3.praat"), compute a number of global (speaker specific) and
4
+ # local (syllable specific) parameters for automatic detection of Filled Pauses.
5
+ #
6
+ # Optionally, save the local parameters for all syllables in a table.
7
+ #
8
+ # J J A Pacilly, 1-nov-2019, for Nivja de Jong, on behalf of:
9
+ # British Council, Aptis Research Grants
10
+ # J J A Pacilly, 14-feb-2020, retain max. similarity with version for internal use
11
+ #
12
+ # Note that this script is used by "SyllableNucleiv3.Praat", but it can also
13
+ # be used as a standalone script with a selected Sound and Textgrid object
14
+ # as long as this TextGrid contains a pointTier with the name "Nuclei".
15
+ #
16
+ # The settings and thresholds in this script are trained and tested on
17
+ # Dutch and English L2 data. See [LINK?] the ARAG report by De Jong and
18
+ # Pacilly (2019) for more information.
19
+ #
20
+ # Copyright (C) 2019 - J J A Pacilly & N H de Jong, LUCL - Universiteit Leiden
21
+ #
22
+ # This program is free software: you can redistribute it and/or modify
23
+ # it under the terms of the GNU General Public License as published by
24
+ # the Free Software Foundation, either version 3 of the License, or
25
+ # (at your option) any later version.
26
+ #
27
+ # This program is distributed in the hope that it will be useful,
28
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
29
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
30
+ # See the GNU General Public License for more details.
31
+ #
32
+ # You should have received a copy of the GNU General Public License
33
+ # along with this program. If not, see http://www.gnu.org/licenses/
34
+
35
+ form Detect Filled Pauses
36
+ optionmenu Language 1
37
+ option English
38
+ # option Mandarin (not yet implemented)
39
+ # option Spanish (not yet implemented)
40
+ option Dutch
41
+ real Filled_Pause_threshold 1.00 ; cut-off higher/lower
42
+ boolean Save_Table 0
43
+ endform
44
+
45
+ idSnd = selected ("Sound")
46
+ name$ = selected$("Sound")
47
+ idTG = selected ("TextGrid")
48
+
49
+ @setSB: idSnd, idTG ; set/replace tier(s) and define vectors for initial analysis
50
+
51
+ if nrSyllables
52
+ @doGlobalAnalyses: idSnd ; do global analysis of ALL syllables identified by tier Nuclei
53
+ @sdF0: idSnd ; fills the arrays dF0[], dqF0[] and sdF0[]
54
+ @replaceUndefinedF0: 0 ; replace Undefined values by mean
55
+ @sdFmt: idSnd ; fills the arrays dF1-3[], dqF1-3[] and sdF1-3[]
56
+ @processData: idTG, name$, "Auto" ; create Auto table, set labels
57
+ idTableAuto = processData.idTable
58
+ endif
59
+
60
+ selectObject: idSnd, idTG
61
+ if idTableAuto
62
+ plusObject: idTableAuto
63
+ endif
64
+
65
+
66
+ procedure setSB: .idSnd, .idTG
67
+
68
+ # For testing, allow successive runs of this script
69
+
70
+ selectObject: .idTG
71
+ .tierNuclei = 0
72
+ .tierPhrases = 0
73
+ .tierAuto = 0
74
+ .nrTiers = Get number of tiers
75
+ for .tier to .nrTiers
76
+ .name$ = Get tier name: .tier
77
+ if .name$ == "Nuclei"
78
+ .tierNuclei = .tier
79
+ elif .name$ == "Phrases"
80
+ .tierPhrases = .tier
81
+ elif .name$ == "DFauto" + " ('language$')"
82
+ Remove tier: .tier
83
+ Insert interval tier: .tier, "DFauto" + " ('language$')"
84
+ .tierAuto = .tier
85
+ elif left$(.name$, 5) == "DFman" or left$(.name$, 5) == "dfMan"
86
+ .tierMan = .tier
87
+ endif
88
+ endfor
89
+
90
+ if .tierNuclei == 0
91
+ exitScript: "No tier ""Nuclei"" found, please run ""SyllableNuclei.praat"" first."
92
+ endif
93
+
94
+ nrSyllables = Get number of points: .tierNuclei
95
+ d# = zero#(nrSyllables)
96
+
97
+ tNuc[0] = Get start time
98
+ tNuc[nrSyllables+1] = Get end time
99
+ for syllable to nrSyllables
100
+ tNuc[syllable] = Get time of point: .tierNuclei, syllable
101
+ if .tierPhrases
102
+ .iPhrase = Get interval at time: .tierPhrases, tNuc[syllable]
103
+ .tFromPhrase[syllable] = Get start time of interval: .tierPhrases, .iPhrase
104
+ .tToPhrase[syllable] = Get end time of interval: .tierPhrases, .iPhrase
105
+ endif
106
+ endfor
107
+
108
+ # Get minimum Intensity *between* Nuclei
109
+
110
+ selectObject: .idSnd
111
+ .idInt = To Intensity: 100, 0, "yes"
112
+ nrFrames = Get number of frames
113
+ for syllable to nrSyllables+1
114
+ tSBMin[syllable] = Get time of minimum: tNuc[syllable-1], tNuc[syllable], "Parabolic"
115
+ endfor
116
+ tMeanSyllable = (tSBMin[nrSyllables+1] - tSBMin[1]) / nrSyllables
117
+
118
+ # Find -6 dB *around* Nuclei but avoid that these boundaries cross
119
+ # the 'minimum Intensity boundaries' (yields twice as much intervals)
120
+
121
+ for syllable to nrSyllables
122
+ frNuc = Get frame number from time: tNuc[syllable]
123
+ frNuc = round(frNuc)
124
+ dBNuc = Get value in frame: frNuc
125
+
126
+ frFrom = frNuc
127
+ repeat
128
+ frFrom -= 1
129
+ dBL = Get value in frame: frFrom
130
+ tL = Get time from frame number: frFrom
131
+ until dBL < dBNuc - 6 or tL < tSBMin[syllable] or frFrom < 2
132
+ tFrom[syllable] = Get time from frame number: frFrom
133
+
134
+ frTo = frNuc
135
+ repeat
136
+ frTo += 1
137
+ dBR = Get value in frame: frTo
138
+ tR = Get time from frame number: frTo
139
+ until dBR < dBNuc - 6 or tR > tSBMin[syllable+1] or frTo > nrFrames-1
140
+ tTo[syllable] = Get time from frame number: frTo
141
+
142
+ d6Org[syllable] = tTo[syllable] - tFrom[syllable]
143
+
144
+ if .tierPhrases
145
+ tFrom[syllable] = max(.tFromPhrase[syllable], tFrom[syllable])
146
+ tTo [syllable] = min(.tToPhrase [syllable], tTo [syllable])
147
+ endif
148
+ endfor
149
+
150
+ # only the boundaries *around* Nuclei are being used
151
+
152
+ selectObject: .idTG
153
+ if not .tierAuto
154
+ Insert interval tier: .nrTiers + 1, "DFauto" + " ('language$')"
155
+ .tierAuto = .nrTiers + 1
156
+ endif
157
+
158
+ for syllable to nrSyllables
159
+ # Insert boundary: .tierMin, tSBMin[syllable]
160
+
161
+ if tFrom[syllable] > tSBMin[syllable]
162
+ Insert boundary: .tierAuto, tFrom[syllable]
163
+ ts[syllable] = tFrom[syllable]
164
+ else
165
+ Insert boundary: .tierAuto, tSBMin[syllable]+0.00005
166
+ ts[syllable] = tSBMin[syllable]
167
+ endif
168
+ if tTo[syllable] < tSBMin[syllable+1]
169
+ Insert boundary: .tierAuto, tTo[syllable]
170
+ te[syllable] = tTo[syllable]
171
+ else
172
+ Insert boundary: .tierAuto, tSBMin[syllable+1]-0.00005
173
+ te[syllable] = tSBMin[syllable+1]
174
+ endif
175
+
176
+ d [syllable] = te[syllable] - ts[syllable]
177
+ d#[syllable] = te[syllable] - ts[syllable]
178
+
179
+ endfor
180
+ # Insert boundary: .tierMin, tSBMin[nrSyllables+1]
181
+ removeObject: .idInt
182
+ endproc
183
+
184
+ procedure doGlobalAnalyses: .idSnd
185
+
186
+ # Concatenate all Syllables
187
+
188
+ .id# = zero#(nrSyllables)
189
+ for syllable to nrSyllables
190
+ selectObject: .idSnd
191
+ .id#[syllable] = Extract part: ts[syllable], te[syllable], "rectangular", 1, "no"
192
+ endfor
193
+ selectObject: .id#
194
+ .idSndTmp = Concatenate with overlap: 0.01
195
+ removeObject: .id#
196
+
197
+ # Perform Initial Global Pitch analysis to determine Global Quantile
198
+ selectObject: .idSndTmp
199
+ .idPTmpInit = noprogress To Pitch (ac): 0.02, 30, 4, "no", 0.03, 0.25, 0.01, 0.35, 0.25, 450
200
+ .qGlobF0Init = Get quantile: 0, 0, 0.5, "Hertz"
201
+
202
+ # Perform Global Pitch and Formant analysis
203
+ selectObject: .idSndTmp
204
+ .idPTmp = noprogress To Pitch (ac): 0.02, 30, 4, "no", 0.03, 0.25, 0.01, 0.35, 0.25, 2.5 * .qGlobF0Init
205
+ qGlobF0 = Get quantile: 0, 0, 0.5, "semitones re 100 Hz"
206
+
207
+ selectObject: .idSndTmp
208
+ .idFmtTmp = noprogress To Formant (burg): 0, 4, 4000 + 4 * (.qGlobF0Init - 100), 0.025, 50
209
+ qGlobF1 = Get quantile: 1, 0, 0, "bark", 0.5
210
+ qGlobF2 = Get quantile: 2, 0, 0, "bark", 0.5
211
+ qGlobF3 = Get quantile: 3, 0, 0, "bark", 0.5
212
+ # appendFileLine: "FilledPauses.txt", name$, tab$, "qGlobF0: (",
213
+ #.. fixed$(.qGlobF0Init, 1), "/", fixed$(qGlobF0, 1), ")"
214
+
215
+ removeObject: .idSndTmp, .idPTmpInit, .idPTmp, .idFmtTmp
216
+ endproc
217
+
218
+ procedure sdF0: .idSnd
219
+ selectObject: .idSnd
220
+
221
+ f0# = zero#(nrSyllables)
222
+ dF0# = zero#(nrSyllables)
223
+ dqF0# = zero#(nrSyllables)
224
+ sdF0# = zero#(nrSyllables)
225
+
226
+ .idF0 = noprogress To Pitch (ac): 0.02, 30, 4, "no", 0.03, 0.25, 0.01, 0.35, 0.25, 2.5 * doGlobalAnalyses.qGlobF0Init
227
+
228
+ for syllable to nrSyllables
229
+ q50F0 = Get quantile: ts[syllable], te[syllable], 0.50, "semitones re 100 Hz"
230
+ f0 [syllable] = q50F0
231
+ f0#[syllable] = f0 [syllable]
232
+
233
+ dF0 [syllable] = qGlobF0 - q50F0
234
+ dF0#[syllable] = dF0[syllable]
235
+
236
+ q95F0 = Get quantile: ts[syllable], te[syllable], 0.95, "semitones re 100 Hz"
237
+ q05F0 = Get quantile: ts[syllable], te[syllable], 0.05, "semitones re 100 Hz"
238
+ dqF0 [syllable] = q95F0 - q05F0
239
+ dqF0#[syllable] = dqF0 [syllable]
240
+
241
+ sdF0 [syllable] = Get standard deviation: ts[syllable], te[syllable], "semitones"
242
+ sdF0#[syllable] = sdF0 [syllable]
243
+ endfor
244
+ removeObject: .idF0
245
+ endproc
246
+
247
+ procedure replaceUndefinedF0: .dummy
248
+ # small amounts (< 10%) of undefined F0 values are replaced by MEAN without warning
249
+ nrUndef# = zero#(4)
250
+ total# = zero#(4)
251
+ for syllable to nrSyllables
252
+ if f0#[syllable] == undefined
253
+ nrUndef#[1] = nrUndef#[1] + 1
254
+ listUndefined[1, nrUndef#[1]] = syllable
255
+ else
256
+ total#[1] = total#[1] + f0#[syllable]
257
+ endif
258
+ if dF0#[syllable] == undefined
259
+ nrUndef#[2] = nrUndef#[2] + 1
260
+ listUndefined[2, nrUndef#[2]] = syllable
261
+ else
262
+ total#[2] = total#[2] + dF0#[syllable]
263
+ endif
264
+ if dqF0#[syllable] == undefined
265
+ nrUndef#[3] = nrUndef#[3] + 1
266
+ listUndefined[3, nrUndef#[3]] = syllable
267
+ else
268
+ total#[3] = total#[3] + dqF0#[syllable]
269
+ endif
270
+ if sdF0#[syllable] == undefined
271
+ nrUndef#[4] = nrUndef#[4] + 1
272
+ listUndefined[4, nrUndef#[4]] = syllable
273
+ else
274
+ total#[4] = total#[4] + sdF0#[syllable]
275
+ endif
276
+ endfor
277
+ mean__F0 = total#[1] / (nrSyllables - nrUndef#[1])
278
+ mean_dF0 = total#[2] / (nrSyllables - nrUndef#[2])
279
+ meandqF0 = total#[3] / (nrSyllables - nrUndef#[3])
280
+ meansdF0 = total#[4] / (nrSyllables - nrUndef#[4])
281
+ for syllable to nrUndef#[1]
282
+ f0#[listUndefined[1, syllable]] = mean__F0
283
+ if syllable == 1 and nrUndef#[1] > nrSyllables / 10
284
+ appendInfoLine: "Warning: replaced ", nrUndef#[1], "/'nrSyllables' F0 values by mean ('mean__F0:3') in 'name$'."
285
+ endif
286
+ endfor
287
+ for syllable to nrUndef#[2]
288
+ dF0#[listUndefined[2, syllable]] = mean_dF0
289
+ if syllable == 1 and nrUndef#[2] > nrSyllables / 10
290
+ appendInfoLine: "Warning: replaced ", nrUndef#[2], "/'nrSyllables' dF0 values by mean ('mean_dF0:3') in 'name$'."
291
+ endif
292
+ endfor
293
+ for syllable to nrUndef#[3]
294
+ dqF0#[listUndefined[3, syllable]] = meandqF0
295
+ if syllable == 1 and nrUndef#[3] > nrSyllables / 10
296
+ appendInfoLine: "Warning: replaced ", nrUndef#[3], "/'nrSyllables' dqF0 values by mean ('meandqF0:3') in 'name$'."
297
+ endif
298
+ endfor
299
+ for syllable to nrUndef#[4]
300
+ sdF0#[listUndefined[4, syllable]] = meansdF0
301
+ if syllable == 1 and nrUndef#[4] > nrSyllables / 10
302
+ appendInfoLine: "Warning: replaced ", nrUndef#[4], "/'nrSyllables' sdF0 values by mean ('meansdF0:3') in 'name$'."
303
+ endif
304
+ endfor
305
+ endproc
306
+
307
+ procedure sdFmt: .idSnd
308
+ selectObject: .idSnd
309
+ .idFmt = noprogress To Formant (burg): 0, 4, 4000 + 4 * (doGlobalAnalyses.qGlobF0Init - 100), 0.025, 50
310
+
311
+ f1# = zero#(nrSyllables)
312
+ f2# = zero#(nrSyllables)
313
+ f3# = zero#(nrSyllables)
314
+ dF1# = zero#(nrSyllables)
315
+ dF2# = zero#(nrSyllables)
316
+ dF3# = zero#(nrSyllables)
317
+ dqF1# = zero#(nrSyllables)
318
+ dqF2# = zero#(nrSyllables)
319
+ dqF3# = zero#(nrSyllables)
320
+ sdF1# = zero#(nrSyllables)
321
+ sdF2# = zero#(nrSyllables)
322
+ sdF3# = zero#(nrSyllables)
323
+
324
+ for syllable to nrSyllables
325
+ fs = Get frame number from time: ts[syllable]
326
+ fs = round(fs)
327
+ if fs < 1 ; are these frame numbers reliable ?!?
328
+ fs = 1
329
+ endif
330
+ fe = Get frame number from time: te[syllable]
331
+ fe = round(fe)
332
+ f1 [syllable] = Get quantile: 1, ts[syllable], te[syllable], "bark", 0.5
333
+ f2 [syllable] = Get quantile: 2, ts[syllable], te[syllable], "bark", 0.5
334
+ f3 [syllable] = Get quantile: 3, ts[syllable], te[syllable], "bark", 0.5
335
+ f1#[syllable] = f1[syllable]
336
+ f2#[syllable] = f2[syllable]
337
+ f3#[syllable] = f3[syllable]
338
+
339
+ dF1[syllable] = 0
340
+ dF2[syllable] = 0
341
+ dF3[syllable] = 0
342
+ for frame from fs to fe
343
+ t = Get time from frame number: frame
344
+ lF1 = Get value at time: 1, t, "bark", "Linear"
345
+ lF2 = Get value at time: 2, t, "bark", "Linear"
346
+ lF3 = Get value at time: 3, t, "bark", "Linear"
347
+ if lF1 <> undefined
348
+ dF1[syllable] += abs(qGlobF1 - lF1)
349
+ endif
350
+ if lF2 <> undefined
351
+ dF2[syllable] += abs(qGlobF2 - lF2)
352
+ endif
353
+ if lF3 <> undefined
354
+ dF3[syllable] += abs(qGlobF3 - lF3)
355
+ endif
356
+ endfor
357
+ dF1 [syllable] /= (fe-fs+1)
358
+ dF2 [syllable] /= (fe-fs+1)
359
+ dF3 [syllable] /= (fe-fs+1)
360
+ dF1#[syllable] = dF1[syllable]
361
+ dF2#[syllable] = dF2[syllable]
362
+ dF3#[syllable] = dF3[syllable]
363
+
364
+ q95F1 = Get quantile: 1, ts[syllable], te[syllable], "bark", 0.95
365
+ q05F1 = Get quantile: 1, ts[syllable], te[syllable], "bark", 0.05
366
+ q95F2 = Get quantile: 2, ts[syllable], te[syllable], "bark", 0.95
367
+ q05F2 = Get quantile: 2, ts[syllable], te[syllable], "bark", 0.05
368
+ q95F3 = Get quantile: 3, ts[syllable], te[syllable], "bark", 0.95
369
+ q05F3 = Get quantile: 3, ts[syllable], te[syllable], "bark", 0.05
370
+ dqF1 [syllable] = q95F1 - q05F1
371
+ dqF2 [syllable] = q95F2 - q05F2
372
+ dqF3 [syllable] = q95F3 - q05F3
373
+ dqF1#[syllable] = dqF1[syllable]
374
+ dqF2#[syllable] = dqF2[syllable]
375
+ dqF3#[syllable] = dqF3[syllable]
376
+
377
+ sdF1 [syllable] = Get standard deviation: 1, ts[syllable], te[syllable], "bark"
378
+ sdF2 [syllable] = Get standard deviation: 2, ts[syllable], te[syllable], "bark"
379
+ sdF3 [syllable] = Get standard deviation: 3, ts[syllable], te[syllable], "bark"
380
+ sdF1#[syllable] = sdF1[syllable]
381
+ sdF2#[syllable] = sdF2[syllable]
382
+ sdF3#[syllable] = sdF3[syllable]
383
+ endfor
384
+ removeObject: .idFmt
385
+ endproc
386
+
387
+ procedure processData: .idTG, .name$, .type$
388
+
389
+ if save_Table
390
+ .idTable = Create Table with column names: .name$, 0, "type ts dur durz F0 F0z F1 F1z F2 F2z F3 F3z dF0 dF0z dF1 dF1z dF2 dF2z dF3 dF3z
391
+ ... dqF0 dqF0z dqF1 dqF1z dqF2 dqF2z dqF3 dqF3z sdF0 sdF0z sdF1 sdF1z sdF2 sdF2z sdF3 sdF3z score"
392
+ else
393
+ .idTable = 0
394
+ endif
395
+
396
+ if .type$ == "Auto"
397
+ mean_d = mean( d#)
398
+ sd_d = stdev( d#)
399
+ mean_F0 = mean( f0#)
400
+ sd_F0 = stdev( f0#)
401
+ mean_F1 = mean( f1#)
402
+ sd_F1 = stdev( f1#)
403
+ mean_F2 = mean( f2#)
404
+ sd_F2 = stdev( f2#)
405
+ mean_F3 = mean( f3#)
406
+ sd_F3 = stdev( f3#)
407
+ mean_dF0 = mean( dF0#)
408
+ sd_dF0 = stdev( dF0#)
409
+ mean_dF1 = mean( dF1#)
410
+ sd_dF1 = stdev( dF1#)
411
+ mean_dF2 = mean( dF2#)
412
+ sd_dF2 = stdev( dF2#)
413
+ mean_dF3 = mean( dF3#)
414
+ sd_dF3 = stdev( dF3#)
415
+ mean_dqF0 = mean(dqF0#)
416
+ sd_dqF0 = stdev(dqF0#)
417
+ mean_dqF1 = mean(dqF1#)
418
+ sd_dqF1 = stdev(dqF1#)
419
+ mean_dqF2 = mean(dqF2#)
420
+ sd_dqF2 = stdev(dqF2#)
421
+ mean_dqF3 = mean(dqF3#)
422
+ sd_dqF3 = stdev(dqF3#)
423
+ mean_sdF0 = mean(sdF0#)
424
+ sd_sdF0 = stdev(sdF0#)
425
+ mean_sdF1 = mean(sdF1#)
426
+ sd_sdF1 = stdev(sdF1#)
427
+ mean_sdF2 = mean(sdF2#)
428
+ sd_sdF2 = stdev(sdF2#)
429
+ mean_sdF3 = mean(sdF3#)
430
+ sd_sdF3 = stdev(sdF3#)
431
+ endif
432
+
433
+ # z-transform data
434
+
435
+ dz# = ( d# - mean_d ) / sd_d
436
+ f0z# = ( f0# - mean_F0 ) / sd_F0
437
+ f1z# = ( f1# - mean_F1 ) / sd_F1
438
+ f2z# = ( f2# - mean_F2 ) / sd_F2
439
+ f3z# = ( f3# - mean_F3 ) / sd_F3
440
+ dF0z# = ( dF0# - mean_dF0 ) / sd_dF0
441
+ dF1z# = ( dF1# - mean_dF1 ) / sd_dF1
442
+ dF2z# = ( dF2# - mean_dF2 ) / sd_dF2
443
+ dF3z# = ( dF3# - mean_dF3 ) / sd_dF3
444
+ dqF0z# = (dqF0# - mean_dqF0) / sd_dqF0
445
+ dqF1z# = (dqF1# - mean_dqF1) / sd_dqF1
446
+ dqF2z# = (dqF2# - mean_dqF2) / sd_dqF2
447
+ dqF3z# = (dqF3# - mean_dqF3) / sd_dqF3
448
+ sdF0z# = (sdF0# - mean_sdF0) / sd_sdF0
449
+ sdF1z# = (sdF1# - mean_sdF1) / sd_sdF1
450
+ sdF2z# = (sdF2# - mean_sdF2) / sd_sdF2
451
+ sdF3z# = (sdF3# - mean_sdF3) / sd_sdF3
452
+
453
+ for syllable to nrSyllables
454
+ selectObject: .idTG
455
+
456
+ scoreUK = 4.73 * sqrt( d[syllable]) - 0.29 * f0z#[syllable]
457
+ ... - 0.32 * sqrt(sdF1[syllable]) - 0.10 * sqrt(dF1[syllable])
458
+ ... - 1.38 * sqrt(sdF2[syllable]) - 0.80 * sqrt(dF2[syllable])
459
+ ... - 0.20 * (f2[syllable] - f1[syllable])
460
+ ... + 0.31 * f3[syllable]
461
+
462
+ scoreNL = 8.62 * sqrt( d[syllable]) - 0.36 * f0z#[syllable]
463
+ ... - 0.72 * sqrt(dF1[syllable])
464
+ ... - 1.36 * sqrt(sdF2[syllable]) - 1.62 * sqrt(dF2[syllable])
465
+ ... - 1.02 * sqrt(sdF3[syllable])
466
+ ... - 0.11 * (f2[syllable] - f1[syllable])
467
+ ... + 0.21 * f3[syllable]
468
+
469
+ lbl2$ = Get label of interval: setSB.tierAuto, 2*syllable
470
+
471
+ if language$ == "English"
472
+ score = scoreUK
473
+ if score > 3.4942 * filled_Pause_threshold
474
+ lbl2$ += "fp"
475
+ endif
476
+ elif language$ == "Dutch"
477
+ score = scoreNL
478
+ if score > 2.7094 * filled_Pause_threshold
479
+ lbl2$ += "fp"
480
+ endif
481
+ else
482
+ exitScript: "Language not supported."
483
+ endif
484
+
485
+ if .type$ == "Auto"
486
+ type$ [syllable] = lbl2$
487
+ Set interval text: setSB.tierAuto, 2*syllable, lbl2$
488
+ # Set interval text: setSB.tierAuto, 2*syllable, fixed$(score, 3)
489
+ endif
490
+
491
+ if save_Table
492
+ selectObject: .idTable
493
+ Append row
494
+ row = Get number of rows
495
+
496
+ Set string value: row, "type" , type$ [syllable]
497
+ Set string value: row, "ts", fixed$(ts [syllable], 3)
498
+ Set string value: row, "dur" , fixed$( d [syllable], 3)
499
+ Set string value: row, "durz", fixed$( dz# [syllable], 3)
500
+ Set string value: row, "F0" , fixed$( f0 [syllable], 3)
501
+ Set string value: row, "F0z", fixed$( f0z#[syllable], 3)
502
+ Set string value: row, "F1" , fixed$( f1 [syllable], 3)
503
+ Set string value: row, "F1z", fixed$( f1z#[syllable], 3)
504
+ Set string value: row, "F2" , fixed$( f2 [syllable], 3)
505
+ Set string value: row, "F2z", fixed$( f2z#[syllable], 3)
506
+ Set string value: row, "F3" , fixed$( f3 [syllable], 3)
507
+ Set string value: row, "F3z", fixed$( f3z#[syllable], 3)
508
+ Set string value: row, "dF0" , fixed$( dF0 [syllable], 3)
509
+ Set string value: row, "dF0z", fixed$( dF0z#[syllable], 3)
510
+ Set string value: row, "dF1" , fixed$( dF1 [syllable], 3)
511
+ Set string value: row, "dF1z", fixed$( dF1z#[syllable], 3)
512
+ Set string value: row, "dF2" , fixed$( dF2 [syllable], 3)
513
+ Set string value: row, "dF2z", fixed$( dF2z#[syllable], 3)
514
+ Set string value: row, "dF3" , fixed$( dF3 [syllable], 3)
515
+ Set string value: row, "dF3z", fixed$( dF3z#[syllable], 3)
516
+ Set string value: row, "dqF0" , fixed$(dqF0 [syllable], 3)
517
+ Set string value: row, "dqF0z", fixed$(dqF0z#[syllable], 3)
518
+ Set string value: row, "dqF1" , fixed$(dqF1 [syllable], 3)
519
+ Set string value: row, "dqF1z", fixed$(dqF1z#[syllable], 3)
520
+ Set string value: row, "dqF2" , fixed$(dqF2 [syllable], 3)
521
+ Set string value: row, "dqF2z", fixed$(dqF2z#[syllable], 3)
522
+ Set string value: row, "dqF3" , fixed$(dqF3 [syllable], 3)
523
+ Set string value: row, "dqF3z", fixed$(dqF3z#[syllable], 3)
524
+ Set string value: row, "sdF0" , fixed$(sdF0 [syllable], 3)
525
+ Set string value: row, "sdF0z", fixed$(sdF0z#[syllable], 3)
526
+ Set string value: row, "sdF1" , fixed$(sdF1 [syllable], 3)
527
+ Set string value: row, "sdF1z", fixed$(sdF1z#[syllable], 3)
528
+ Set string value: row, "sdF2" , fixed$(sdF2 [syllable], 3)
529
+ Set string value: row, "sdF2z", fixed$(sdF2z#[syllable], 3)
530
+ Set string value: row, "sdF3" , fixed$(sdF3 [syllable], 3)
531
+ Set string value: row, "sdF3z", fixed$(sdF3z#[syllable], 3)
532
+ Set string value: row, "score", fixed$(score, 3)
533
+ endif
534
+ endfor
535
+ endproc
536
+
@@ -23,10 +23,6 @@ def extract_intervals(textgrid: TextGrid, speakers: list[str]) -> list[list[Inte
23
23
  for speaker in speakers:
24
24
  speaker_intervals = []
25
25
  for interval in textgrid[speaker]:
26
- # Cleaning of the interval text
27
- interval.text = (
28
- interval.text.encode().decode("unicode_escape").strip(" \n\r\t")
29
- )
30
26
  if interval.text:
31
27
  speaker_intervals.append(interval)
32
28
  speakers_intervals.append(speaker_intervals)
@@ -1,55 +0,0 @@
1
- [build-system]
2
- requires = ['flit_core >=3.2,<4']
3
- build-backend = 'flit_core.buildapi'
4
-
5
- [project]
6
- name = 'lingualabpy'
7
- authors = [
8
- { name = 'Christophe Bedetti', email = 'christophe.bedetti@umontreal.ca' },
9
- ]
10
- license = { file = 'LICENSE' }
11
- description = 'Tools and utilities from the LINGUA laboratory'
12
- readme = 'README.md'
13
- classifiers = [
14
- 'Development Status :: 3 - Alpha',
15
- 'Intended Audience :: Developers',
16
- 'Intended Audience :: Science/Research',
17
- 'Topic :: Scientific/Engineering',
18
- 'License :: OSI Approved :: MIT License',
19
- 'Operating System :: POSIX :: Linux',
20
- 'Operating System :: MacOS :: MacOS X',
21
- 'Operating System :: Microsoft :: Windows',
22
- 'Programming Language :: Python :: 3 :: Only',
23
- 'Programming Language :: Python :: 3.8',
24
- 'Programming Language :: Python :: 3.9',
25
- 'Programming Language :: Python :: 3.10',
26
- 'Programming Language :: Python :: 3.11',
27
- ]
28
- requires-python = '>=3.8.1'
29
- dynamic = ['version']
30
-
31
- dependencies = ['click', 'pandas', 'praat-parselmouth', 'praat-textgrids', 'pydub', 'python-docx']
32
-
33
- [project.optional-dependencies]
34
- test = ['pytest', 'pytest-cov']
35
- doc = []
36
- lint = ['black']
37
- feature = []
38
- dev = ['lingualabpy[test, doc, lint, feature]']
39
-
40
- [project.scripts]
41
- lingualabpy_audio_metrics = 'lingualabpy.cli.audio_metrics:main'
42
- lingualabpy_audio_triming = 'lingualabpy.cli.audio_triming:main'
43
- lingualabpy_docx2json = 'lingualabpy.cli.docx2json:main'
44
- lingualabpy_jsons2csv = 'lingualabpy.cli.jsons2csv:main'
45
-
46
- [project.urls]
47
- Documentation = 'https://github.com/lingualab/lingualabpy'
48
- Source = 'https://github.com/lingualab/lingualabpy'
49
- Tracker = 'https://github.com/lingualab/lingualabpy/issues'
50
-
51
- [tool.flit.module]
52
- name = 'lingualabpy'
53
-
54
- [tool.pytest.ini_options]
55
- addopts = "--cov=src --cov-report=html"
File without changes
File without changes