PyPI - lingualabpy - Versions diffs - 0.0.3__py3-none-any.whl → 0.0.5__py3-none-any.whl - Mend

lingualabpy 0.0.3py3-none-any.whl → 0.0.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

lingualabpy/__init__.py +6 -1
lingualabpy/audio/metrics.py +85 -0
lingualabpy/cli/audio_metrics.py +59 -0
lingualabpy/cli/audio_triming.py +21 -10
lingualabpy/io.py +9 -2
lingualabpy/resources/FilledPauses.praat +536 -0
lingualabpy/resources/syllablenucleiv3.praat +0 -0
lingualabpy/text/textgrid.py +0 -4
lingualabpy/tools/data.py +12 -0
{lingualabpy-0.0.3.dist-info → lingualabpy-0.0.5.dist-info}/METADATA +1 -1
lingualabpy-0.0.5.dist-info/RECORD +23 -0
{lingualabpy-0.0.3.dist-info → lingualabpy-0.0.5.dist-info}/entry_points.txt +1 -0
lingualabpy-0.0.3.dist-info/RECORD +0 -19
{lingualabpy-0.0.3.dist-info → lingualabpy-0.0.5.dist-info}/LICENSE +0 -0
{lingualabpy-0.0.3.dist-info → lingualabpy-0.0.5.dist-info}/WHEEL +0 -0

lingualabpy/__init__.py CHANGED Viewed

@@ -4,12 +4,17 @@
 """lingualabpy"""
 from __future__ import annotations
-__version__ = "0.0.3"
+__version__ = "0.0.5"
 default_config = {
     "participant_col": "participant_id",
     "participant_label": "IE",
     "clinician_label": "IV",
+    "f0_bounds": {
+        "female": [100.0, 600.0],
+        "male": [75.0, 300.0],
+    },
+    "unit_frequency": "Hertz",
 }
 from lingualabpy.io import read_audio, read_docx, read_json, write_json, read_textgrid

lingualabpy/audio/metrics.py ADDED Viewed

@@ -0,0 +1,85 @@
+from collections import defaultdict
+import numpy as np
+from parselmouth import Sound
+from parselmouth.praat import call
+from lingualabpy.tools.data import UnchangeableDict
+def measure_pitch(sound: Sound, f0min: str, f0max: str, unit: str) -> UnchangeableDict:
+    """
+    This function measures duration, pitch, HNR, jitter, and shimmer
+    This is the function to measure source acoustics using default male parameters.
+    """
+    # compute usefull praat object
+    pitch = call(sound, "To Pitch", 0.0, f0min, f0max)
+    harmonicity = call(sound, "To Harmonicity (cc)", 0.01, f0min, 0.1, 1.0)
+    point_process = call(sound, "To PointProcess (periodic, cc)", f0min, f0max)
+    # metrics container
+    metrics = UnchangeableDict()
+    # Metrics computation
+    metrics["duration"] = call(sound, "Get total duration")
+    metrics["f0_mean"] = call(pitch, "Get mean", 0, 0, unit)
+    metrics["F0_std"] = call(pitch, "Get standard deviation", 0, 0, unit)
+    metrics["hnr"] = call(harmonicity, "Get mean", 0, 0)
+    # jitter
+    jitter_types = ["local", ["local", "absolute"], "rap", "ppq5", "ddp"]
+    for jitter_type in jitter_types:
+        if isinstance(jitter_type, list):
+            metric_name = f"jitter_{'_'.join(jitter_type)}"
+            praat_function = f"Get jitter ({', '.join(jitter_type)})"
+        else:
+            metric_name = f"jitter_{jitter_type}"
+            praat_function = f"Get jitter ({jitter_type})"
+        metrics[metric_name] = call(
+            point_process, praat_function, 0, 0, 0.0001, 0.02, 1.3
+        )
+    # shimmer
+    shimmer_types = ["local", "local_dB", "apq3", "apq5", "apq11", "dda"]
+    for shimmer_type in shimmer_types:
+        metric_name = f"shimmer_{shimmer_type}"
+        praat_function = f"Get shimmer ({shimmer_type})"
+        metrics[metric_name] = call(
+            [sound, point_process], praat_function, 0, 0, 0.0001, 0.02, 1.3, 1.6
+        )
+    return metrics
+def measure_formants(
+    sound: Sound, f0min: str, f0max: str, unit: str
+) -> UnchangeableDict:
+    """
+    This function measures formants at each glottal pulse
+    Puts, D. A., Apicella, C. L., & Cárdenas, R. A. (2012). Masculine voices signal men's threat potential in forager and industrial societies. Proceedings of the Royal Society of London B: Biological Sciences, 279(1728), 601-609.
+    Adapted from: DOI 10.17605/OSF.IO/K2BHS
+    """
+    # compute usefull praat object
+    point_process = call(sound, "To PointProcess (periodic, cc)", f0min, f0max)
+    formants = call(sound, "To Formant (burg)", 0.0025, 5, 5000, 0.025, 50)
+    number_of_points = call(point_process, "Get number of points")
+    # metrics container
+    metrics = UnchangeableDict()
+    # Measure formants only at glottal pulses
+    formants_list = defaultdict(list)
+    for index in range(1, number_of_points + 1):
+        time = call(point_process, "Get time from index", index)
+        for pulse in [1, 2, 3, 4]:
+            value = call(formants, "Get value at time", pulse, time, unit, "Linear")
+            if str(value) != "nan":
+                formants_list[pulse].append(value)
+    # calculate mean and median formants across pulses, median is what is used in all subsequent calculations
+    for pulse in [1, 2, 3, 4]:
+        metrics[f"formants_{pulse}_mean"] = np.mean(formants_list[pulse])
+        metrics[f"formants_{pulse}_median"] = np.median(formants_list[pulse])
+    return metrics

lingualabpy/cli/audio_metrics.py ADDED Viewed

@@ -0,0 +1,59 @@
+import click
+from parselmouth import Sound
+from pathlib import Path
+from lingualabpy import default_config, write_json
+from lingualabpy.audio.metrics import measure_pitch, measure_formants
+@click.command()
+@click.option(
+    "--sex",
+    type=click.Choice(["female", "male"]),
+    help=f"Set f0min and f0max for praat analysis. {default_config['f0_bounds']}",
+)
+@click.option(
+    "--f0min",
+    type=float,
+    help="Define f0min for praat analysis. Not required if sex is specify",
+)
+@click.option(
+    "--f0max",
+    type=float,
+    help="Define f0max for praat analysis. Not required if sex is specify",
+)
+@click.option(
+    "--unit_frequency",
+    default=default_config["unit_frequency"],
+    show_default=True,
+)
+@click.option("--participant_id", "-p", default=None, help="")
+@click.option("--output_json", default=None, help="")
+@click.argument("audiofile", nargs=1, type=click.Path(exists=True))
+def main(sex, f0min, f0max, unit_frequency, participant_id, output_json, audiofile):
+    """Doc"""
+    if sex:
+        f0min, f0max = default_config["f0_bounds"][sex]
+    else:
+        if not f0min or not f0max:
+            raise click.UsageError(
+                "'--f0min' and '--f0max' are required if '--sex' is not specified"
+            )
+    sound = Sound(audiofile)
+    metrics = measure_pitch(sound, f0min, f0max, unit_frequency)
+    metrics.update(measure_formants(sound, f0min, f0max, unit_frequency))
+    audiofile_stem = Path(audiofile).stem
+    if participant_id:
+        metrics["participant_id"] = participant_id
+    audiofile = Path(audiofile)
+    metrics["filename"] = audiofile.name
+    if not output_json:
+        output_json = audiofile.stem + "_metric-audio.json"
+    write_json(dict(metrics), output_json)

lingualabpy/cli/audio_triming.py CHANGED Viewed

@@ -3,7 +3,7 @@ import click
 from lingualabpy import default_config, read_audio, read_textgrid
 from lingualabpy.audio.triming import extract_audio
 from lingualabpy.text.textgrid import extract_intervals
-from lingualabpy.tools.interval import intervals_masking
+from lingualabpy.tools.interval import intervals_masking, interval_to_list
 @click.command()
@@ -17,21 +17,32 @@ from lingualabpy.tools.interval import intervals_masking
     default=default_config["clinician_label"],
     show_default=True,
 )
+@click.option("--remove_overlap", is_flag=True, show_default=True)
 @click.argument("textgrid", nargs=1, type=click.Path(exists=True))
 @click.argument("audiofile", nargs=1, type=click.Path(exists=True))
 @click.argument("output", nargs=1)
-def main(participant_label, clinician_label, textgrid, audiofile, output):
+def main(
+    participant_label, clinician_label, remove_overlap, textgrid, audiofile, output
+):
     """Doc"""
     grid = read_textgrid(textgrid)
-    participant_intervals, clinician_intervals = extract_intervals(
-        grid, [participant_label, clinician_label]
-    )
-    participant_intervals_clean = intervals_masking(
-        participant_intervals, clinician_intervals
-    )
+    try:
+        participant_intervals, clinician_intervals = extract_intervals(
+            grid, [participant_label, clinician_label]
+        )
+    except Exception as e:
+        raise Exception(f"Failed to extract intervals for {textgrid}", repr(e))
+    if remove_overlap:
+        participant_intervals = intervals_masking(
+            participant_intervals, clinician_intervals
+        )
+    else:
+        participant_intervals = map(interval_to_list, participant_intervals)
     audio = read_audio(audiofile)
-    audio_clean = extract_audio(audio, participant_intervals_clean)
+    audio_clean = extract_audio(audio, participant_intervals)
-    audio_clean.export(output)
+    audio_clean.export(output, format="wav")

lingualabpy/io.py CHANGED Viewed

@@ -33,10 +33,17 @@ def read_json(json_path: str) -> Union[list, dict]:
 def write_json(data: Union[list, dict], json_path: str) -> None:
     """"""
     with open(json_path, "w") as file:
-        json.dump(data, file)
+        json.dump(data, file, indent=4)
 # .TextGrid files
 def read_textgrid(textgrid_path: str) -> TextGrid:
     """"""
-    return TextGrid(textgrid_path)
+    textgrid = TextGrid(textgrid_path)
+    # Cleaning of the interval text
+    for intervals in textgrid.values():
+        for interval in intervals:
+            interval.text = (
+                interval.text.encode().decode("unicode_escape").strip(" \n\r\t")
+            )
+    return textgrid

lingualabpy/resources/FilledPauses.praat ADDED Viewed

@@ -0,0 +1,536 @@
+#	PRAAT SCRIPT FILLED PAUSES
+#	Set Syllable Boundaries at -6 dB around points in the tier "Nuclei" (as set by the
+#	script "SyllableNucleiv3.praat"), compute a number of global (speaker specific) and
+#	local (syllable specific) parameters for automatic detection of Filled Pauses.
+#
+#	Optionally, save the local parameters for all syllables in a table.
+#
+#	J J A Pacilly,  1-nov-2019, for Nivja de Jong, on behalf of:
+#	  British Council, Aptis Research Grants
+#	J J A Pacilly, 14-feb-2020, retain max. similarity with version for internal use
+#
+#	Note that this script is used by "SyllableNucleiv3.Praat", but it can also
+#	be used as a standalone script with a selected Sound and Textgrid object
+#	as long as this TextGrid contains a pointTier with the name "Nuclei".
+#
+#	The settings and thresholds in this script are trained and tested on
+#	Dutch and English L2 data. See [LINK?] the ARAG report by De Jong and
+#	Pacilly (2019) for more information.
+#
+#	Copyright (C) 2019 - J J A Pacilly & N H de Jong, LUCL - Universiteit Leiden
+#
+#	This program is free software: you can redistribute it and/or modify
+#	it under the terms of the GNU General Public License as published by
+#	the Free Software Foundation, either version 3 of the License, or
+#	(at your option) any later version.
+#
+#	This program is distributed in the hope that it will be useful,
+#	but WITHOUT ANY WARRANTY; without even the implied warranty of
+#	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+#	See the GNU General Public License for more details.
+#
+#	You should have received a copy of the GNU General Public License
+#	along with this program.  If not, see http://www.gnu.org/licenses/
+form Detect Filled Pauses
+  optionmenu Language 1
+    option English
+#   option Mandarin (not yet implemented)
+#   option Spanish  (not yet implemented)
+    option Dutch
+  real Filled_Pause_threshold 1.00  ; cut-off higher/lower
+  boolean Save_Table 0
+  endform
+idSnd = selected ("Sound")
+name$ = selected$("Sound")
+idTG  = selected ("TextGrid")
+@setSB: idSnd, idTG			; set/replace tier(s) and define vectors for initial analysis
+if nrSyllables
+  @doGlobalAnalyses: idSnd		; do global analysis of ALL syllables identified by tier Nuclei
+  @sdF0:  idSnd				; fills the arrays dF0[],   dqF0[]   and sdF0[]
+  @replaceUndefinedF0: 0		; replace Undefined values by mean
+  @sdFmt: idSnd				; fills the arrays dF1-3[], dqF1-3[] and sdF1-3[]
+  @processData: idTG, name$, "Auto"	; create Auto table, set labels
+  idTableAuto = processData.idTable
+  endif
+selectObject: idSnd, idTG
+if idTableAuto
+  plusObject: idTableAuto
+  endif
+procedure setSB: .idSnd, .idTG
+# For testing, allow successive runs of this script
+  selectObject: .idTG
+  .tierNuclei  = 0
+  .tierPhrases = 0
+  .tierAuto    = 0
+  .nrTiers = Get number of tiers
+  for .tier to .nrTiers
+    .name$ = Get tier name: .tier
+    if   .name$ == "Nuclei"
+      .tierNuclei = .tier
+    elif .name$ == "Phrases"
+      .tierPhrases = .tier
+    elif .name$ == "DFauto" + " ('language$')"
+      Remove tier: .tier
+      Insert interval tier: .tier, "DFauto" + " ('language$')"
+      .tierAuto = .tier
+    elif left$(.name$, 5) == "DFman" or left$(.name$, 5) == "dfMan"
+      .tierMan = .tier
+      endif
+    endfor
+  if .tierNuclei == 0
+    exitScript: "No tier ""Nuclei"" found, please run ""SyllableNuclei.praat"" first."
+    endif
+  nrSyllables = Get number of points: .tierNuclei
+  d#   = zero#(nrSyllables)
+  tNuc[0]             = Get start time
+  tNuc[nrSyllables+1] = Get end time
+  for syllable to nrSyllables
+    tNuc[syllable] = Get time of point: .tierNuclei, syllable
+    if .tierPhrases
+      .iPhrase = Get interval at time: .tierPhrases, tNuc[syllable]
+      .tFromPhrase[syllable] = Get start time of interval: .tierPhrases, .iPhrase
+      .tToPhrase[syllable]   = Get end time of interval: .tierPhrases, .iPhrase
+      endif
+    endfor
+# Get minimum Intensity *between* Nuclei
+  selectObject: .idSnd
+  .idInt   = To Intensity: 100, 0, "yes"
+  nrFrames = Get number of frames
+  for syllable to nrSyllables+1
+    tSBMin[syllable] = Get time of minimum: tNuc[syllable-1], tNuc[syllable], "Parabolic"
+    endfor
+  tMeanSyllable = (tSBMin[nrSyllables+1] - tSBMin[1]) / nrSyllables
+# Find -6 dB *around* Nuclei but avoid that these boundaries cross
+# the 'minimum Intensity boundaries' (yields twice as much intervals)
+  for syllable to nrSyllables
+    frNuc = Get frame number from time: tNuc[syllable]
+    frNuc = round(frNuc)
+    dBNuc = Get value in frame: frNuc
+    frFrom = frNuc
+    repeat
+      frFrom -= 1
+      dBL = Get value in frame: frFrom
+      tL  = Get time from frame number: frFrom
+      until dBL < dBNuc - 6 or tL < tSBMin[syllable] or frFrom < 2
+    tFrom[syllable] = Get time from frame number: frFrom
+    frTo = frNuc
+    repeat
+      frTo += 1
+      dBR = Get value in frame: frTo
+      tR  = Get time from frame number: frTo
+      until dBR < dBNuc - 6 or tR > tSBMin[syllable+1] or frTo > nrFrames-1
+    tTo[syllable] = Get time from frame number: frTo
+    d6Org[syllable] = tTo[syllable] - tFrom[syllable]
+    if .tierPhrases
+      tFrom[syllable] = max(.tFromPhrase[syllable], tFrom[syllable])
+      tTo  [syllable] = min(.tToPhrase  [syllable], tTo  [syllable])
+      endif
+    endfor
+# only the boundaries *around* Nuclei are being used
+  selectObject: .idTG
+  if not .tierAuto
+    Insert interval tier: .nrTiers + 1, "DFauto" + " ('language$')"
+    .tierAuto = .nrTiers + 1
+    endif
+  for syllable to nrSyllables
+#   Insert boundary: .tierMin, tSBMin[syllable]
+    if tFrom[syllable] > tSBMin[syllable]
+      Insert boundary: .tierAuto, tFrom[syllable]
+      ts[syllable] = tFrom[syllable]
+    else
+      Insert boundary: .tierAuto, tSBMin[syllable]+0.00005
+      ts[syllable] = tSBMin[syllable]
+      endif
+    if tTo[syllable] < tSBMin[syllable+1]
+      Insert boundary: .tierAuto, tTo[syllable]
+      te[syllable] = tTo[syllable]
+    else
+      Insert boundary: .tierAuto, tSBMin[syllable+1]-0.00005
+      te[syllable] = tSBMin[syllable+1]
+      endif
+    d [syllable] = te[syllable] - ts[syllable]
+    d#[syllable] = te[syllable] - ts[syllable]
+    endfor
+# Insert boundary: .tierMin, tSBMin[nrSyllables+1]
+  removeObject: .idInt
+  endproc
+procedure doGlobalAnalyses: .idSnd
+# Concatenate all Syllables
+  .id# = zero#(nrSyllables)
+  for syllable to nrSyllables
+    selectObject: .idSnd
+    .id#[syllable] = Extract part: ts[syllable], te[syllable], "rectangular", 1, "no"
+    endfor
+  selectObject: .id#
+  .idSndTmp = Concatenate with overlap: 0.01
+  removeObject: .id#
+# Perform Initial Global Pitch analysis to determine Global Quantile
+  selectObject: .idSndTmp
+  .idPTmpInit  = noprogress To Pitch (ac): 0.02, 30, 4, "no", 0.03, 0.25, 0.01, 0.35, 0.25, 450
+  .qGlobF0Init = Get quantile: 0, 0, 0.5, "Hertz"
+# Perform Global Pitch and Formant analysis
+  selectObject: .idSndTmp
+  .idPTmp = noprogress To Pitch (ac): 0.02, 30, 4, "no", 0.03, 0.25, 0.01, 0.35, 0.25, 2.5 * .qGlobF0Init
+  qGlobF0 = Get quantile: 0, 0, 0.5, "semitones re 100 Hz"
+  selectObject: .idSndTmp
+  .idFmtTmp = noprogress To Formant (burg): 0, 4, 4000 + 4 * (.qGlobF0Init - 100), 0.025, 50
+  qGlobF1 = Get quantile: 1, 0, 0, "bark", 0.5
+  qGlobF2 = Get quantile: 2, 0, 0, "bark", 0.5
+  qGlobF3 = Get quantile: 3, 0, 0, "bark", 0.5
+# appendFileLine: "FilledPauses.txt", name$, tab$, "qGlobF0: (",
+#..	fixed$(.qGlobF0Init, 1), "/", fixed$(qGlobF0, 1), ")"
+  removeObject: .idSndTmp, .idPTmpInit, .idPTmp, .idFmtTmp
+  endproc
+procedure sdF0: .idSnd
+  selectObject: .idSnd
+    f0# = zero#(nrSyllables)
+   dF0# = zero#(nrSyllables)
+  dqF0# = zero#(nrSyllables)
+  sdF0# = zero#(nrSyllables)
+  .idF0   = noprogress To Pitch (ac): 0.02, 30, 4, "no", 0.03, 0.25, 0.01, 0.35, 0.25, 2.5 * doGlobalAnalyses.qGlobF0Init
+  for syllable to nrSyllables
+    q50F0 = Get quantile:  ts[syllable], te[syllable], 0.50, "semitones re 100 Hz"
+    f0 [syllable] = q50F0
+    f0#[syllable] = f0 [syllable]
+    dF0 [syllable] = qGlobF0 - q50F0
+    dF0#[syllable] = dF0[syllable]
+    q95F0 = Get quantile:  ts[syllable], te[syllable], 0.95, "semitones re 100 Hz"
+    q05F0 = Get quantile:  ts[syllable], te[syllable], 0.05, "semitones re 100 Hz"
+    dqF0 [syllable] = q95F0 - q05F0
+    dqF0#[syllable] = dqF0 [syllable]
+    sdF0 [syllable] = Get standard deviation: ts[syllable], te[syllable], "semitones"
+    sdF0#[syllable] = sdF0 [syllable]
+    endfor
+  removeObject: .idF0
+  endproc
+procedure replaceUndefinedF0: .dummy
+# small amounts (< 10%) of undefined F0 values are replaced by MEAN without warning
+  nrUndef# = zero#(4)
+  total#   = zero#(4)
+  for syllable to nrSyllables
+    if f0#[syllable] == undefined
+      nrUndef#[1] = nrUndef#[1] + 1
+      listUndefined[1, nrUndef#[1]] = syllable
+    else
+      total#[1] = total#[1] + f0#[syllable]
+      endif
+    if dF0#[syllable] == undefined
+      nrUndef#[2] = nrUndef#[2] + 1
+      listUndefined[2, nrUndef#[2]] = syllable
+    else
+      total#[2] = total#[2] + dF0#[syllable]
+      endif
+    if dqF0#[syllable] == undefined
+      nrUndef#[3] = nrUndef#[3] + 1
+      listUndefined[3, nrUndef#[3]] = syllable
+    else
+      total#[3] = total#[3] + dqF0#[syllable]
+      endif
+    if sdF0#[syllable] == undefined
+      nrUndef#[4] = nrUndef#[4] + 1
+      listUndefined[4, nrUndef#[4]] = syllable
+    else
+      total#[4] = total#[4] + sdF0#[syllable]
+      endif
+    endfor
+  mean__F0 = total#[1] / (nrSyllables - nrUndef#[1])
+  mean_dF0 = total#[2] / (nrSyllables - nrUndef#[2])
+  meandqF0 = total#[3] / (nrSyllables - nrUndef#[3])
+  meansdF0 = total#[4] / (nrSyllables - nrUndef#[4])
+  for syllable to nrUndef#[1]
+      f0#[listUndefined[1, syllable]] = mean__F0
+    if syllable == 1 and nrUndef#[1] > nrSyllables / 10
+      appendInfoLine: "Warning: replaced ", nrUndef#[1], "/'nrSyllables' F0 values by mean ('mean__F0:3') in 'name$'."
+      endif
+    endfor
+  for syllable to nrUndef#[2]
+     dF0#[listUndefined[2, syllable]] = mean_dF0
+    if syllable == 1 and nrUndef#[2] > nrSyllables / 10
+      appendInfoLine: "Warning: replaced ", nrUndef#[2], "/'nrSyllables' dF0 values by mean ('mean_dF0:3') in 'name$'."
+      endif
+    endfor
+  for syllable to nrUndef#[3]
+    dqF0#[listUndefined[3, syllable]] = meandqF0
+    if syllable == 1 and nrUndef#[3] > nrSyllables / 10
+      appendInfoLine: "Warning: replaced ", nrUndef#[3], "/'nrSyllables' dqF0 values by mean ('meandqF0:3') in 'name$'."
+      endif
+    endfor
+  for syllable to nrUndef#[4]
+    sdF0#[listUndefined[4, syllable]] = meansdF0
+    if syllable == 1 and nrUndef#[4] > nrSyllables / 10
+      appendInfoLine: "Warning: replaced ", nrUndef#[4], "/'nrSyllables' sdF0 values by mean ('meansdF0:3') in 'name$'."
+      endif
+    endfor
+  endproc
+procedure sdFmt: .idSnd
+  selectObject: .idSnd
+  .idFmt   = noprogress To Formant (burg): 0, 4, 4000 + 4 * (doGlobalAnalyses.qGlobF0Init - 100), 0.025, 50
+    f1# = zero#(nrSyllables)
+    f2# = zero#(nrSyllables)
+    f3# = zero#(nrSyllables)
+   dF1# = zero#(nrSyllables)
+   dF2# = zero#(nrSyllables)
+   dF3# = zero#(nrSyllables)
+  dqF1# = zero#(nrSyllables)
+  dqF2# = zero#(nrSyllables)
+  dqF3# = zero#(nrSyllables)
+  sdF1# = zero#(nrSyllables)
+  sdF2# = zero#(nrSyllables)
+  sdF3# = zero#(nrSyllables)
+  for syllable to nrSyllables
+    fs = Get frame number from time: ts[syllable]
+    fs = round(fs)
+    if fs < 1						; are these frame numbers reliable ?!?
+      fs = 1
+      endif
+    fe = Get frame number from time: te[syllable]
+    fe = round(fe)
+    f1 [syllable] = Get quantile: 1, ts[syllable], te[syllable], "bark", 0.5
+    f2 [syllable] = Get quantile: 2, ts[syllable], te[syllable], "bark", 0.5
+    f3 [syllable] = Get quantile: 3, ts[syllable], te[syllable], "bark", 0.5
+    f1#[syllable] = f1[syllable]
+    f2#[syllable] = f2[syllable]
+    f3#[syllable] = f3[syllable]
+    dF1[syllable] = 0
+    dF2[syllable] = 0
+    dF3[syllable] = 0
+    for frame from fs to fe
+      t   = Get time from frame number: frame
+      lF1 = Get value at time: 1, t, "bark", "Linear"
+      lF2 = Get value at time: 2, t, "bark", "Linear"
+      lF3 = Get value at time: 3, t, "bark", "Linear"
+      if lF1 <> undefined
+        dF1[syllable] += abs(qGlobF1 - lF1)
+        endif
+      if lF2 <> undefined
+        dF2[syllable] += abs(qGlobF2 - lF2)
+        endif
+      if lF3 <> undefined
+        dF3[syllable] += abs(qGlobF3 - lF3)
+        endif
+      endfor
+    dF1 [syllable] /= (fe-fs+1)
+    dF2 [syllable] /= (fe-fs+1)
+    dF3 [syllable] /= (fe-fs+1)
+    dF1#[syllable]  = dF1[syllable]
+    dF2#[syllable]  = dF2[syllable]
+    dF3#[syllable]  = dF3[syllable]
+    q95F1 = Get quantile: 1, ts[syllable], te[syllable], "bark", 0.95
+    q05F1 = Get quantile: 1, ts[syllable], te[syllable], "bark", 0.05
+    q95F2 = Get quantile: 2, ts[syllable], te[syllable], "bark", 0.95
+    q05F2 = Get quantile: 2, ts[syllable], te[syllable], "bark", 0.05
+    q95F3 = Get quantile: 3, ts[syllable], te[syllable], "bark", 0.95
+    q05F3 = Get quantile: 3, ts[syllable], te[syllable], "bark", 0.05
+    dqF1 [syllable] = q95F1 - q05F1
+    dqF2 [syllable] = q95F2 - q05F2
+    dqF3 [syllable] = q95F3 - q05F3
+    dqF1#[syllable] = dqF1[syllable]
+    dqF2#[syllable] = dqF2[syllable]
+    dqF3#[syllable] = dqF3[syllable]
+    sdF1 [syllable] = Get standard deviation: 1, ts[syllable], te[syllable], "bark"
+    sdF2 [syllable] = Get standard deviation: 2, ts[syllable], te[syllable], "bark"
+    sdF3 [syllable] = Get standard deviation: 3, ts[syllable], te[syllable], "bark"
+    sdF1#[syllable] = sdF1[syllable]
+    sdF2#[syllable] = sdF2[syllable]
+    sdF3#[syllable] = sdF3[syllable]
+    endfor
+  removeObject: .idFmt
+  endproc
+procedure processData: .idTG, .name$, .type$
+  if save_Table
+    .idTable = Create Table with column names: .name$, 0, "type ts dur durz F0 F0z F1 F1z F2 F2z F3 F3z dF0 dF0z dF1 dF1z dF2 dF2z dF3 dF3z
+...		dqF0 dqF0z dqF1 dqF1z dqF2 dqF2z dqF3 dqF3z sdF0 sdF0z sdF1 sdF1z sdF2 sdF2z sdF3 sdF3z score"
+  else
+    .idTable = 0
+    endif
+  if .type$ == "Auto"
+    mean_d    =  mean(   d#)
+      sd_d    = stdev(   d#)
+     mean_F0  =  mean(  f0#)
+       sd_F0  = stdev(  f0#)
+     mean_F1  =  mean(  f1#)
+       sd_F1  = stdev(  f1#)
+     mean_F2  =  mean(  f2#)
+       sd_F2  = stdev(  f2#)
+     mean_F3  =  mean(  f3#)
+       sd_F3  = stdev(  f3#)
+    mean_dF0  =  mean( dF0#)
+      sd_dF0  = stdev( dF0#)
+    mean_dF1  =  mean( dF1#)
+      sd_dF1  = stdev( dF1#)
+    mean_dF2  =  mean( dF2#)
+      sd_dF2  = stdev( dF2#)
+    mean_dF3  =  mean( dF3#)
+      sd_dF3  = stdev( dF3#)
+    mean_dqF0 =  mean(dqF0#)
+      sd_dqF0 = stdev(dqF0#)
+    mean_dqF1 =  mean(dqF1#)
+      sd_dqF1 = stdev(dqF1#)
+    mean_dqF2 =  mean(dqF2#)
+      sd_dqF2 = stdev(dqF2#)
+    mean_dqF3 =  mean(dqF3#)
+      sd_dqF3 = stdev(dqF3#)
+    mean_sdF0 =  mean(sdF0#)
+      sd_sdF0 = stdev(sdF0#)
+    mean_sdF1 =  mean(sdF1#)
+      sd_sdF1 = stdev(sdF1#)
+    mean_sdF2 =  mean(sdF2#)
+      sd_sdF2 = stdev(sdF2#)
+    mean_sdF3 =  mean(sdF3#)
+      sd_sdF3 = stdev(sdF3#)
+  endif
+# z-transform data
+  dz#    = ( d#   - mean_d   ) / sd_d
+   f0z#  = (  f0# - mean_F0  ) / sd_F0
+   f1z#  = (  f1# - mean_F1  ) / sd_F1
+   f2z#  = (  f2# - mean_F2  ) / sd_F2
+   f3z#  = (  f3# - mean_F3  ) / sd_F3
+  dF0z#  = ( dF0# - mean_dF0 ) / sd_dF0
+  dF1z#  = ( dF1# - mean_dF1 ) / sd_dF1
+  dF2z#  = ( dF2# - mean_dF2 ) / sd_dF2
+  dF3z#  = ( dF3# - mean_dF3 ) / sd_dF3
+  dqF0z# = (dqF0# - mean_dqF0) / sd_dqF0
+  dqF1z# = (dqF1# - mean_dqF1) / sd_dqF1
+  dqF2z# = (dqF2# - mean_dqF2) / sd_dqF2
+  dqF3z# = (dqF3# - mean_dqF3) / sd_dqF3
+  sdF0z# = (sdF0# - mean_sdF0) / sd_sdF0
+  sdF1z# = (sdF1# - mean_sdF1) / sd_sdF1
+  sdF2z# = (sdF2# - mean_sdF2) / sd_sdF2
+  sdF3z# = (sdF3# - mean_sdF3) / sd_sdF3
+  for syllable to nrSyllables
+    selectObject: .idTG
+    scoreUK =  4.73 * sqrt(   d[syllable]) - 0.29 * f0z#[syllable]
+...          - 0.32 * sqrt(sdF1[syllable]) - 0.10 * sqrt(dF1[syllable])
+...          - 1.38 * sqrt(sdF2[syllable]) - 0.80 * sqrt(dF2[syllable])
+...          - 0.20 * (f2[syllable] - f1[syllable])
+...          + 0.31 *  f3[syllable]
+    scoreNL =  8.62 * sqrt(   d[syllable]) - 0.36 * f0z#[syllable]
+...                                        - 0.72 * sqrt(dF1[syllable])
+...          - 1.36 * sqrt(sdF2[syllable]) - 1.62 * sqrt(dF2[syllable])
+...          - 1.02 * sqrt(sdF3[syllable])
+...          - 0.11 * (f2[syllable] - f1[syllable])
+...          + 0.21 *  f3[syllable]
+    lbl2$ = Get label of interval: setSB.tierAuto, 2*syllable
+    if   language$ == "English"
+      score = scoreUK
+      if  score > 3.4942 * filled_Pause_threshold
+        lbl2$ += "fp"
+        endif
+    elif language$ == "Dutch"
+      score = scoreNL
+      if score > 2.7094 * filled_Pause_threshold
+        lbl2$ += "fp"
+        endif
+    else
+      exitScript: "Language not supported."
+      endif
+    if .type$ == "Auto"
+      type$ [syllable] = lbl2$
+      Set interval text: setSB.tierAuto,  2*syllable, lbl2$
+#     Set interval text: setSB.tierAuto,  2*syllable, fixed$(score, 3)
+      endif
+    if save_Table
+      selectObject: .idTable
+      Append row
+      row = Get number of rows
+      Set string value: row, "type" ,          type$ [syllable]
+      Set string value: row,    "ts",   fixed$(ts    [syllable], 3)
+      Set string value: row,  "dur" ,   fixed$( d    [syllable], 3)
+      Set string value: row,  "durz",   fixed$( dz#  [syllable], 3)
+      Set string value: row,   "F0" ,   fixed$(  f0  [syllable], 3)
+      Set string value: row,   "F0z",   fixed$(  f0z#[syllable], 3)
+      Set string value: row,   "F1" ,   fixed$(  f1  [syllable], 3)
+      Set string value: row,   "F1z",   fixed$(  f1z#[syllable], 3)
+      Set string value: row,   "F2" ,   fixed$(  f2  [syllable], 3)
+      Set string value: row,   "F2z",   fixed$(  f2z#[syllable], 3)
+      Set string value: row,   "F3" ,   fixed$(  f3  [syllable], 3)
+      Set string value: row,   "F3z",   fixed$(  f3z#[syllable], 3)
+      Set string value: row,  "dF0" ,   fixed$( dF0  [syllable], 3)
+      Set string value: row,  "dF0z",   fixed$( dF0z#[syllable], 3)
+      Set string value: row,  "dF1" ,   fixed$( dF1  [syllable], 3)
+      Set string value: row,  "dF1z",   fixed$( dF1z#[syllable], 3)
+      Set string value: row,  "dF2" ,   fixed$( dF2  [syllable], 3)
+      Set string value: row,  "dF2z",   fixed$( dF2z#[syllable], 3)
+      Set string value: row,  "dF3" ,   fixed$( dF3  [syllable], 3)
+      Set string value: row,  "dF3z",   fixed$( dF3z#[syllable], 3)
+      Set string value: row, "dqF0" ,   fixed$(dqF0  [syllable], 3)
+      Set string value: row, "dqF0z",   fixed$(dqF0z#[syllable], 3)
+      Set string value: row, "dqF1" ,   fixed$(dqF1  [syllable], 3)
+      Set string value: row, "dqF1z",   fixed$(dqF1z#[syllable], 3)
+      Set string value: row, "dqF2" ,   fixed$(dqF2  [syllable], 3)
+      Set string value: row, "dqF2z",   fixed$(dqF2z#[syllable], 3)
+      Set string value: row, "dqF3" ,   fixed$(dqF3  [syllable], 3)
+      Set string value: row, "dqF3z",   fixed$(dqF3z#[syllable], 3)
+      Set string value: row, "sdF0" ,   fixed$(sdF0  [syllable], 3)
+      Set string value: row, "sdF0z",   fixed$(sdF0z#[syllable], 3)
+      Set string value: row, "sdF1" ,   fixed$(sdF1  [syllable], 3)
+      Set string value: row, "sdF1z",   fixed$(sdF1z#[syllable], 3)
+      Set string value: row, "sdF2" ,   fixed$(sdF2  [syllable], 3)
+      Set string value: row, "sdF2z",   fixed$(sdF2z#[syllable], 3)
+      Set string value: row, "sdF3" ,   fixed$(sdF3  [syllable], 3)
+      Set string value: row, "sdF3z",   fixed$(sdF3z#[syllable], 3)
+      Set string value: row, "score",   fixed$(score,            3)
+      endif
+    endfor
+  endproc

lingualabpy/resources/syllablenucleiv3.praat ADDED Viewed

Binary file

lingualabpy/text/textgrid.py CHANGED Viewed

@@ -23,10 +23,6 @@ def extract_intervals(textgrid: TextGrid, speakers: list[str]) -> list[list[Inte
     for speaker in speakers:
         speaker_intervals = []
         for interval in textgrid[speaker]:
-            # Cleaning of the interval text
-            interval.text = (
-                interval.text.encode().decode("unicode_escape").strip(" \n\r\t")
-            )
             if interval.text:
                 speaker_intervals.append(interval)
         speakers_intervals.append(speaker_intervals)

lingualabpy/tools/data.py CHANGED Viewed

@@ -1,8 +1,20 @@
+from collections import UserDict
 from pandas import DataFrame
 from typing import Any, Dict, List
+class UnchangeableDict(UserDict):
+    """A dictionary in which you can add new keys but not modify them in the future."""
+    def __setitem__(self, key: Any, item: Any) -> None:
+        try:
+            self.__getitem__(key)
+            raise ValueError("duplicate key '{}' found".format(key))
+        except KeyError:
+            return super().__setitem__(key, item)
 def merge_participants_to_df(
     data_participants: List[Dict[Any, Any]],
     participant_col: str,

{lingualabpy-0.0.3.dist-info → lingualabpy-0.0.5.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: lingualabpy
-Version: 0.0.3
+Version: 0.0.5
 Summary: Tools and utilities from the LINGUA laboratory
 Author-email: Christophe Bedetti <christophe.bedetti@umontreal.ca>
 Requires-Python: >=3.8.1

lingualabpy-0.0.5.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,23 @@
+lingualabpy/__init__.py,sha256=_oiqa1RV-lhkXP60vELtzeIEbRJWn9DagQRqNSDzl-s,642
+lingualabpy/io.py,sha256=r3Y781XKAea_T3V2L8aJlxoOEQcYCsdd7c-f-8NcEeI,1155
+lingualabpy/audio/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+lingualabpy/audio/metrics.py,sha256=u5FlADmqeYQOSpqhY2l1l8CSC4tfBV6cWp3g6Hri6bE,3502
+lingualabpy/audio/triming.py,sha256=6CY9pH43KFGAPj8Nw34y1YnlOb8gxGLU1btcuRy-Hgc,288
+lingualabpy/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+lingualabpy/cli/audio_metrics.py,sha256=N_kIKmro5bjFEKjHPNfmuY4G5TXQB9qegbc55pUch6Q,1803
+lingualabpy/cli/audio_triming.py,sha256=H62FTRmwhha-qAXoZ450TNNf_3sHg69GATmbMHQBoCM,1566
+lingualabpy/cli/docx2json.py,sha256=Bj5f89B76NtA7Xx71xXGnSucrDEyaH9mUFifQo0wfn4,590
+lingualabpy/cli/jsons2csv.py,sha256=_AcIXiQUCF5SsKqMg6WjTr8fhbuflaJNFrCP91ccSYs,596
+lingualabpy/resources/FilledPauses.praat,sha256=8tY0tAcG71KwnNhawgqyZ7vT__Udf5mSaVutH1eetd8,19623
+lingualabpy/resources/syllablenucleiv3.praat,sha256=aZYRGB2iLGpYxL3ma-UMutQEyP_NvbbbF1yKhGvVcsA,35818
+lingualabpy/text/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+lingualabpy/text/parser.py,sha256=qZqhzi-6UHdbsXEWi5IMxsDK5Tsosb3pdSo67hcA6To,913
+lingualabpy/text/textgrid.py,sha256=nTVj3UOCJRsdybxAW0M2jlDq0eMMRYSSo6GdXYkH2wU,1459
+lingualabpy/tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+lingualabpy/tools/data.py,sha256=FU0_3TaeAZNCu1WpNIOkVBV3bYiEhI1rPw_l8q8z0gk,1523
+lingualabpy/tools/interval.py,sha256=50lzbMTNHF26mPRG50mykCUQE3pdyRjPWMwsskwy0tg,2060
+lingualabpy-0.0.5.dist-info/entry_points.txt,sha256=IXEsa7Cgqjph5bkKSBMXZIBVP4ocrRaSh13dFPBwBmE,247
+lingualabpy-0.0.5.dist-info/LICENSE,sha256=s3hbMsmwGq2XFcxpMD3oHc8GSUeXAmPVXJbn7SYXdos,1095
+lingualabpy-0.0.5.dist-info/WHEEL,sha256=EZbGkh7Ie4PoZfRQ8I0ZuP9VklN_TvcZ6DSE5Uar4z4,81
+lingualabpy-0.0.5.dist-info/METADATA,sha256=ocw3j6BfTe0NSSKORBs-JjU5KacofzOi81fYRbZ47F4,1703
+lingualabpy-0.0.5.dist-info/RECORD,,

{lingualabpy-0.0.3.dist-info → lingualabpy-0.0.5.dist-info}/entry_points.txt RENAMED Viewed

@@ -1,4 +1,5 @@
 [console_scripts]
+lingualabpy_audio_metrics=lingualabpy.cli.audio_metrics:main
 lingualabpy_audio_triming=lingualabpy.cli.audio_triming:main
 lingualabpy_docx2json=lingualabpy.cli.docx2json:main
 lingualabpy_jsons2csv=lingualabpy.cli.jsons2csv:main

lingualabpy-0.0.3.dist-info/RECORD DELETED Viewed

@@ -1,19 +0,0 @@
-lingualabpy/__init__.py,sha256=Klpz9mrtYXzZ3eSXg7ciwak9mAkgKVC5G1w0uaMn7Q8,515
-lingualabpy/io.py,sha256=TF8eSuX_xfGWWbQ1C0TLnia7HS1Vexn0RqKMvCzHGnE,878
-lingualabpy/audio/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-lingualabpy/audio/triming.py,sha256=6CY9pH43KFGAPj8Nw34y1YnlOb8gxGLU1btcuRy-Hgc,288
-lingualabpy/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-lingualabpy/cli/audio_triming.py,sha256=pAsLv2IuAKLoj8jBHB-SR5mZ7Jb0w26m41-Cya4VvoU,1194
-lingualabpy/cli/docx2json.py,sha256=Bj5f89B76NtA7Xx71xXGnSucrDEyaH9mUFifQo0wfn4,590
-lingualabpy/cli/jsons2csv.py,sha256=_AcIXiQUCF5SsKqMg6WjTr8fhbuflaJNFrCP91ccSYs,596
-lingualabpy/text/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-lingualabpy/text/parser.py,sha256=qZqhzi-6UHdbsXEWi5IMxsDK5Tsosb3pdSo67hcA6To,913
-lingualabpy/text/textgrid.py,sha256=LXdDAY4aEl3Q998Uq28fz0gryFj3KWq1j0RsuWOlEC0,1632
-lingualabpy/tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-lingualabpy/tools/data.py,sha256=FTjxbckza65vZ_MWEO5wi4mDXpJ2u9KkiEA3-HGfOt8,1106
-lingualabpy/tools/interval.py,sha256=50lzbMTNHF26mPRG50mykCUQE3pdyRjPWMwsskwy0tg,2060
-lingualabpy-0.0.3.dist-info/entry_points.txt,sha256=QvnRy1hJXRGGbVQgS-u--5Rgs7rPBmgWC9K1iaxS5gQ,186
-lingualabpy-0.0.3.dist-info/LICENSE,sha256=s3hbMsmwGq2XFcxpMD3oHc8GSUeXAmPVXJbn7SYXdos,1095
-lingualabpy-0.0.3.dist-info/WHEEL,sha256=EZbGkh7Ie4PoZfRQ8I0ZuP9VklN_TvcZ6DSE5Uar4z4,81
-lingualabpy-0.0.3.dist-info/METADATA,sha256=JI1jTk5UA5CeLCcLO6HCC157WmeUW0aUTm0hKZaEXm8,1703
-lingualabpy-0.0.3.dist-info/RECORD,,

{lingualabpy-0.0.3.dist-info → lingualabpy-0.0.5.dist-info}/LICENSE RENAMED Viewed

File without changes

{lingualabpy-0.0.3.dist-info → lingualabpy-0.0.5.dist-info}/WHEEL RENAMED Viewed

File without changes

lingualabpy 0.0.3__py3-none-any.whl → 0.0.5__py3-none-any.whl

lingualabpy 0.0.3py3-none-any.whl → 0.0.5py3-none-any.whl