PyPI - lingualabpy - Versions diffs - 0.0.6__py3-none-any.whl → 0.1.1__py3-none-any.whl - Mend

lingualabpy 0.0.6py3-none-any.whl → 0.1.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

lingualabpy/__init__.py +25 -20
lingualabpy/_version.py +34 -0
lingualabpy/audio/metrics.py +85 -85
lingualabpy/audio/triming.py +11 -11
lingualabpy/cli/audio_metrics.py +59 -59
lingualabpy/cli/audio_triming.py +48 -48
lingualabpy/cli/docx2json.py +21 -21
lingualabpy/cli/jsons2csv.py +23 -23
lingualabpy/cli/plot_sound.py +55 -55
lingualabpy/io.py +49 -49
lingualabpy/neuroimaging/__init__.py +0 -0
lingualabpy/neuroimaging/hcp_connectome.py +151 -0
lingualabpy/plot.py +23 -23
lingualabpy/text/parser.py +35 -35
lingualabpy/text/textgrid.py +41 -41
lingualabpy/tools/data.py +41 -41
lingualabpy/tools/interval.py +59 -59
lingualabpy-0.1.1.dist-info/METADATA +66 -0
lingualabpy-0.1.1.dist-info/RECORD +26 -0
{lingualabpy-0.0.6.dist-info → lingualabpy-0.1.1.dist-info}/WHEEL +1 -1
lingualabpy-0.1.1.dist-info/entry_points.txt +7 -0
{lingualabpy-0.0.6.dist-info → lingualabpy-0.1.1.dist-info/licenses}/LICENSE +21 -21
lingualabpy/resources/FilledPauses.praat +0 -536
lingualabpy/resources/syllablenucleiv3.praat +0 -0
lingualabpy-0.0.6.dist-info/METADATA +0 -46
lingualabpy-0.0.6.dist-info/RECORD +0 -25
lingualabpy-0.0.6.dist-info/entry_points.txt +0 -7

lingualabpy/cli/plot_sound.py CHANGED Viewed

@@ -1,55 +1,55 @@
-import cv2
-import os
-import click
-from parselmouth import Sound
-import matplotlib.pyplot as plt
-from pathlib import Path
-from lingualabpy.plot import draw_pitch, draw_spectrogram
-@click.command()
-@click.option("--output", default=None, help="")
-@click.argument("audiofile", nargs=1, type=click.Path(exists=True))
-def main(audiofile, output):
-    if not output:
-        output = Path(audiofile).stem + ".png"
-    sound = Sound(audiofile)
-    pitch = sound.to_pitch()
-    # If desired, pre-emphasize the sound fragment before calculating the spectrogram
-    pre_emphasized_snd = sound.copy()
-    pre_emphasized_snd.pre_emphasize()
-    spectrogram = pre_emphasized_snd.to_spectrogram(
-        window_length=0.03, maximum_frequency=8000
-    )
-    # amplitude figure
-    tmp_amplitude_png = "tmp_amplitude.png"
-    amplitude = plt.figure()
-    plt.plot(sound.xs(), sound.values.T)
-    plt.xlim([sound.xmin, sound.xmax])
-    plt.xlabel("time [s]")
-    plt.ylabel("amplitude")
-    amplitude.set_figwidth(sound.xmax / 4)
-    plt.savefig(tmp_amplitude_png)
-    # spectro pitch figure
-    tmp_spectro_pitch_png = "tmp_spectro_pitch.png"
-    spectro_pitch = plt.figure()
-    draw_spectrogram(spectrogram)
-    plt.twinx()
-    draw_pitch(pitch)
-    plt.xlim([sound.xmin, sound.xmax])
-    spectro_pitch.set_figwidth(sound.xmax / 4)
-    plt.savefig(tmp_spectro_pitch_png)
-    # concatenation
-    fig_concat = cv2.vconcat(
-        [cv2.imread(tmp_amplitude_png), cv2.imread(tmp_spectro_pitch_png)]
-    )
-    cv2.imwrite(output, fig_concat)
-    os.remove(tmp_amplitude_png)
-    os.remove(tmp_spectro_pitch_png)
+import cv2
+import os
+import click
+from parselmouth import Sound
+import matplotlib.pyplot as plt
+from pathlib import Path
+from lingualabpy.plot import draw_pitch, draw_spectrogram
+@click.command()
+@click.option("--output", default=None, help="")
+@click.argument("audiofile", nargs=1, type=click.Path(exists=True))
+def main(audiofile, output):
+    if not output:
+        output = Path(audiofile).stem + ".png"
+    sound = Sound(audiofile)
+    pitch = sound.to_pitch()
+    # If desired, pre-emphasize the sound fragment before calculating the spectrogram
+    pre_emphasized_snd = sound.copy()
+    pre_emphasized_snd.pre_emphasize()
+    spectrogram = pre_emphasized_snd.to_spectrogram(
+        window_length=0.03, maximum_frequency=8000
+    )
+    # amplitude figure
+    tmp_amplitude_png = "tmp_amplitude.png"
+    amplitude = plt.figure()
+    plt.plot(sound.xs(), sound.values.T)
+    plt.xlim([sound.xmin, sound.xmax])
+    plt.xlabel("time [s]")
+    plt.ylabel("amplitude")
+    amplitude.set_figwidth(sound.xmax / 4)
+    plt.savefig(tmp_amplitude_png)
+    # spectro pitch figure
+    tmp_spectro_pitch_png = "tmp_spectro_pitch.png"
+    spectro_pitch = plt.figure()
+    draw_spectrogram(spectrogram)
+    plt.twinx()
+    draw_pitch(pitch)
+    plt.xlim([sound.xmin, sound.xmax])
+    spectro_pitch.set_figwidth(sound.xmax / 4)
+    plt.savefig(tmp_spectro_pitch_png)
+    # concatenation
+    fig_concat = cv2.vconcat(
+        [cv2.imread(tmp_amplitude_png), cv2.imread(tmp_spectro_pitch_png)]
+    )
+    cv2.imwrite(output, fig_concat)
+    os.remove(tmp_amplitude_png)
+    os.remove(tmp_spectro_pitch_png)

lingualabpy/io.py CHANGED Viewed

@@ -1,49 +1,49 @@
-"""
-Module contains tools for processing files
-"""
-import json
-from docx import Document
-from textgrids import TextGrid
-from pydub import AudioSegment
-from typing import Union
-# audio files
-def read_audio(sound_path: str) -> AudioSegment:
-    """"""
-    return AudioSegment.from_file(sound_path)
-# .docx files
-def read_docx(docx_path: str) -> Document:
-    """"""
-    return Document(docx_path)
-# .json files
-def read_json(json_path: str) -> Union[list, dict]:
-    """"""
-    with open(json_path, "r") as file:
-        content = json.load(file)
-    return content
-def write_json(data: Union[list, dict], json_path: str) -> None:
-    """"""
-    with open(json_path, "w") as file:
-        json.dump(data, file, indent=4)
-# .TextGrid files
-def read_textgrid(textgrid_path: str) -> TextGrid:
-    """"""
-    textgrid = TextGrid(textgrid_path)
-    # Cleaning of the interval text
-    for intervals in textgrid.values():
-        for interval in intervals:
-            interval.text = (
-                interval.text.encode().decode("unicode_escape").strip(" \n\r\t")
-            )
-    return textgrid
+"""
+Module contains tools for processing files
+"""
+import json
+from docx import Document
+from textgrids import TextGrid
+from pydub import AudioSegment
+from typing import Union
+# audio files
+def read_audio(sound_path: str) -> AudioSegment:
+    """"""
+    return AudioSegment.from_file(sound_path)
+# .docx files
+def read_docx(docx_path: str) -> Document:
+    """"""
+    return Document(docx_path)
+# .json files
+def read_json(json_path: str) -> Union[list, dict]:
+    """"""
+    with open(json_path, "r") as file:
+        content = json.load(file)
+    return content
+def write_json(data: Union[list, dict], json_path: str) -> None:
+    """"""
+    with open(json_path, "w") as file:
+        json.dump(data, file, indent=4)
+# .TextGrid files
+def read_textgrid(textgrid_path: str) -> TextGrid:
+    """"""
+    textgrid = TextGrid(textgrid_path)
+    # Cleaning of the interval text
+    for intervals in textgrid.values():
+        for interval in intervals:
+            interval.text = (
+                interval.text.encode().decode("unicode_escape").strip(" \n\r\t")
+            )
+    return textgrid

lingualabpy/neuroimaging/__init__.py ADDED Viewed

File without changes

lingualabpy/neuroimaging/hcp_connectome.py ADDED Viewed

@@ -0,0 +1,151 @@
+"""
+This module processes resting-state fMRI data from the HCP-Young-Adult-2025 release.
+It extracts timeseries from brain regions using an atlas, computes the connectome,
+and saves the results along with the masker report.
+"""
+import re
+import click
+import numpy as np
+import pandas as pd
+from pathlib import Path
+from nilearn.maskers import NiftiLabelsMasker
+from nilearn.connectome import ConnectivityMeasure
+class Connectome:
+    path = None
+    brainmask = None
+    output_folder = None
+    timeseries = None
+    report = None
+    relmat = None
+    relmat_z = None
+    def make_output_folder(self):
+        self.output_folder.mkdir(parents=True, exist_ok=True)
+    def save_timeseries(self, timeseries):
+        pd.DataFrame(timeseries).to_csv(self.timeseries, sep="\t", index=False)
+    def save_report(self, masker):
+        masker.generate_report().save_as_html(self.report)
+    def save_connectome(self, connectome):
+        pd.DataFrame(connectome).to_csv(self.relmat, sep="\t", index=False)
+    def save_connectome_fisher_z(self, connectome):
+        pd.DataFrame(connectome).to_csv(self.relmat_z, sep="\t", index=False)
+class ConnectomeHcp2025(Connectome):
+    HCP_2025_PATTERN = re.compile(
+        r"^.*/(?P<participant_id>[0-9]{6})/MNINonLinear/Results/"
+        r"rfMRI_REST(?P<run>[12])_(?P<pe>LR|RL)/"
+        r"rfMRI_REST(?P=run)_(?P=pe)_hp2000_clean_rclean_tclean\.nii\.gz$"
+    )
+    def __init__(self, path: Path, output: Path):
+        # Check if the path is from the HCP-Young-Adult-2025 release
+        hcp_match = self.HCP_2025_PATTERN.match(path.as_posix())
+        if not hcp_match:
+            raise ValueError(
+                f"Invalid HCP-Young-Adult-2025 rs-fMRI path:\n  {path}\n"
+                "Expected: <participant_id>/MNINonLinear/Results/rfMRI_REST{1,2}_{LR,RL}/rfMRI_REST{1,2}_{LR,RL}_hp2000_clean_rclean_tclean.nii.gz"
+            )
+        # Helper variables to build filenames
+        output = Path(output)
+        pid = f"sub-{hcp_match.group('participant_id')}"
+        run = f"run-{hcp_match.group('pe')}{hcp_match.group('run')}"
+        basename = f"{pid}_task-rest_{run}_seg-SENSAAS"
+        # HCP-Young-Adult-2025 input
+        self.path = path
+        self.brainmask = path.parent / "brainmask_fs.2.nii.gz"
+        # Define output filenames
+        self.output_folder = output / pid / "func"
+        self.timeseries = self.output_folder / f"{basename}_timeseries.tsv"
+        self.report = self.output_folder / f"{basename}_report.html"
+        self.relmat = (
+            self.output_folder / f"{basename}_meas-PearsonCorrelation_relmat.tsv"
+        )
+        self.relmat_z = self.output_folder / f"{basename}_meas-FisherZ_relmat.tsv"
+@click.command()
+@click.option(
+    "--output", type=click.Path(), default="results", help="Directory to save outputs"
+)
+@click.option(
+    "--smoothing_fwhm",
+    type=float,
+    default=5.0,
+    help="full-width at half maximum in millimeters of the spatial smoothing to apply to the signal",
+)
+@click.option(
+    "--kind",
+    type=str,
+    default="correlation",
+    help="kind of functional connectivity matrices",
+)
+@click.argument("atlas_path", nargs=1, type=click.Path(exists=True))
+@click.argument("lut_path", nargs=1, type=click.Path(exists=True))
+@click.argument("rs_path", nargs=1, type=click.Path(exists=True))
+def main(atlas_path, lut_path, rs_path, output, smoothing_fwhm, kind):
+    """Process resting-state fMRI from the HCP-Young-Adult-2025 release to extract connectome.
+    1. Validates input resting-state fMRI data structure
+    2. Extracts timeseries using an atlas
+    3. Computes Pearson correlations and fisher_z connectomes
+    4. Saves timeseries, connectomes, and visualization report
+    Args:
+        atlas_path (str): Path to atlas NIfTI file defining brain regions
+        lut_path (str): Path to lookup table file for atlas labels
+        rs_path (str): Path to resting-state fMRI NIfTI file
+        output (str): Path to save results
+        smoothing_fwhm (float): full-width at half maximum in millimeters of the spatial smoothing to apply to the signal
+        kind (str): kind of functional connectivity matrices
+    """
+    resting_state = ConnectomeHcp2025(path=Path(rs_path), output=Path(output))
+    atlas_masker = NiftiLabelsMasker(
+        labels_img=atlas_path,
+        lut=lut_path,
+        mask_img=resting_state.brainmask,
+        smoothing_fwhm=smoothing_fwhm,
+        standardize="zscore_sample",
+        t_r=0.72,
+    )
+    correlation_measure = ConnectivityMeasure(
+        kind=kind,
+        standardize=False,
+        vectorize=False,
+    )
+    # Extract timeseries and connectomes
+    timeseries = atlas_masker.fit_transform(resting_state.path)
+    connectome = correlation_measure.fit_transform([timeseries])[0]
+    connectome_fisher_z = np.arctanh(np.clip(connectome, -0.999999, 0.999999))
+    # Save results
+    resting_state.make_output_folder()
+    resting_state.save_timeseries(timeseries)
+    resting_state.save_report(atlas_masker)
+    resting_state.save_connectome(connectome)
+    resting_state.save_connectome_fisher_z(connectome_fisher_z)

lingualabpy/plot.py CHANGED Viewed

@@ -1,23 +1,23 @@
-import numpy as np
-import matplotlib.pyplot as plt
-def draw_spectrogram(spectrogram, dynamic_range=70):
-    X, Y = spectrogram.x_grid(), spectrogram.y_grid()
-    sg_db = 10 * np.log10(spectrogram.values)
-    plt.pcolormesh(X, Y, sg_db, vmin=sg_db.max() - dynamic_range, cmap="afmhot")
-    plt.ylim([spectrogram.ymin, spectrogram.ymax])
-    plt.xlabel("time [s]")
-    plt.ylabel("frequency [Hz]")
-def draw_pitch(pitch):
-    # Extract selected pitch contour, and
-    # replace unvoiced samples by NaN to not plot
-    pitch_values = pitch.selected_array["frequency"]
-    pitch_values[pitch_values == 0] = np.nan
-    plt.plot(pitch.xs(), pitch_values, "o", markersize=5, color="w")
-    plt.plot(pitch.xs(), pitch_values, "o", markersize=2)
-    plt.grid(False)
-    plt.ylim(0, pitch.ceiling)
-    plt.ylabel("fundamental frequency [Hz]")
+import numpy as np
+import matplotlib.pyplot as plt
+def draw_spectrogram(spectrogram, dynamic_range=70):
+    X, Y = spectrogram.x_grid(), spectrogram.y_grid()
+    sg_db = 10 * np.log10(spectrogram.values)
+    plt.pcolormesh(X, Y, sg_db, vmin=sg_db.max() - dynamic_range, cmap="afmhot")
+    plt.ylim([spectrogram.ymin, spectrogram.ymax])
+    plt.xlabel("time [s]")
+    plt.ylabel("frequency [Hz]")
+def draw_pitch(pitch):
+    # Extract selected pitch contour, and
+    # replace unvoiced samples by NaN to not plot
+    pitch_values = pitch.selected_array["frequency"]
+    pitch_values[pitch_values == 0] = np.nan
+    plt.plot(pitch.xs(), pitch_values, "o", markersize=5, color="w")
+    plt.plot(pitch.xs(), pitch_values, "o", markersize=2)
+    plt.grid(False)
+    plt.ylim(0, pitch.ceiling)
+    plt.ylabel("fundamental frequency [Hz]")

lingualabpy/text/parser.py CHANGED Viewed

@@ -1,35 +1,35 @@
-import re
-from docx import Document
-from collections import defaultdict
-def parse_waywithwords(document: Document) -> dict:
-    """"""
-    waywithwords = {
-        "IV": "interviewer",
-        "IE": "interviewee",
-    }
-    results = defaultdict(list)
-    for para in document.paragraphs:
-        try:
-            content = para.text.split()
-            speaker = content[0]
-            transcription = " ".join(content[1:])
-        except:
-            speaker = None
-        if (
-            speaker in waywithwords.keys()
-            and not transcription.lower() in waywithwords.values()
-        ):
-            results[waywithwords[speaker]].append(transcription)
-        elif re.findall(r"[0-9][0-9]:[0-5][0-9]:[0-5][0-9]", para.text):
-            results["time"].append(para.text)
-        else:
-            results["remainder"].append(para.text)
-    return results
+import re
+from docx import Document
+from collections import defaultdict
+def parse_waywithwords(document: Document) -> dict:
+    """"""
+    waywithwords = {
+        "IV": "interviewer",
+        "IE": "interviewee",
+    }
+    results = defaultdict(list)
+    for para in document.paragraphs:
+        try:
+            content = para.text.split()
+            speaker = content[0]
+            transcription = " ".join(content[1:])
+        except:
+            speaker = None
+        if (
+            speaker in waywithwords.keys()
+            and not transcription.lower() in waywithwords.values()
+        ):
+            results[waywithwords[speaker]].append(transcription)
+        elif re.findall(r"[0-9][0-9]:[0-5][0-9]:[0-5][0-9]", para.text):
+            results["time"].append(para.text)
+        else:
+            results["remainder"].append(para.text)
+    return results

lingualabpy/text/textgrid.py CHANGED Viewed

@@ -1,41 +1,41 @@
-import re
-from textgrids import TextGrid, Interval
-import warnings
-def extract_intervals(textgrid: TextGrid, speakers: list[str]) -> list[list[Interval]]:
-    """"""
-    # Check if speakers are in the textgrid tiers
-    tiers = set(textgrid.keys())
-    if not set(speakers).issubset(tiers):
-        raise ValueError(
-            f"Some speaker(s) '{speakers}' are not a tier in the TextGrid '{tiers}'"
-        )
-    # Check if there is other speaker in the textgrid
-    if not set(speakers) == tiers:
-        warnings.warn(
-            f"TextGrid '{tiers}' have more speakers than specify '{speakers}'"
-        )
-    # Extraction of intervals with text value
-    speakers_intervals = []
-    for speaker in speakers:
-        speaker_intervals = []
-        for interval in textgrid[speaker]:
-            if interval.text:
-                speaker_intervals.append(interval)
-        speakers_intervals.append(speaker_intervals)
-    # Checking if all intervals are correctly labeled
-    def interval_qc(intervals, label):
-        labels = set([_.text for _ in intervals])
-        if not (len(labels) == 1 and labels.pop() == label):
-            raise Exception(
-                f"TextGrid was not labeled correctly, current label(s) '{labels}', should be '{label}'."
-            )
-    for intervals, speaker in zip(speakers_intervals, speakers):
-        interval_qc(intervals, speaker)
-    return speakers_intervals
+import re
+from textgrids import TextGrid, Interval
+import warnings
+def extract_intervals(textgrid: TextGrid, speakers: list[str]) -> list[list[Interval]]:
+    """"""
+    # Check if speakers are in the textgrid tiers
+    tiers = set(textgrid.keys())
+    if not set(speakers).issubset(tiers):
+        raise ValueError(
+            f"Some speaker(s) '{speakers}' are not a tier in the TextGrid '{tiers}'"
+        )
+    # Check if there is other speaker in the textgrid
+    if not set(speakers) == tiers:
+        warnings.warn(
+            f"TextGrid '{tiers}' have more speakers than specify '{speakers}'"
+        )
+    # Extraction of intervals with text value
+    speakers_intervals = []
+    for speaker in speakers:
+        speaker_intervals = []
+        for interval in textgrid[speaker]:
+            if interval.text:
+                speaker_intervals.append(interval)
+        speakers_intervals.append(speaker_intervals)
+    # Checking if all intervals are correctly labeled
+    def interval_qc(intervals, label):
+        labels = set([_.text for _ in intervals])
+        if not (len(labels) == 1 and labels.pop() == label):
+            raise Exception(
+                f"TextGrid was not labeled correctly, current label(s) '{labels}', should be '{label}'."
+            )
+    for intervals, speaker in zip(speakers_intervals, speakers):
+        interval_qc(intervals, speaker)
+    return speakers_intervals

lingualabpy/tools/data.py CHANGED Viewed

@@ -1,41 +1,41 @@
-from collections import UserDict
-from pandas import DataFrame
-from typing import Any, Dict, List
-class UnchangeableDict(UserDict):
-    """A dictionary in which you can add new keys but not modify them in the future."""
-    def __setitem__(self, key: Any, item: Any) -> None:
-        try:
-            self.__getitem__(key)
-            raise ValueError("duplicate key '{}' found".format(key))
-        except KeyError:
-            return super().__setitem__(key, item)
-def merge_participants_to_df(
-    data_participants: List[Dict[Any, Any]],
-    participant_col: str,
-) -> DataFrame:
-    # Check if all data have a `participant_col` key
-    participant_col_checks = [_.get(participant_col) for _ in data_participants]
-    if not all(participant_col_checks):
-        raise Exception(
-            f"One of the samples does not contain the '{participant_col}' information."
-        )
-    # Check if there are no duplicates in the data
-    df_raw = DataFrame.from_dict(data_participants)
-    df_melt = df_raw.melt(id_vars=[participant_col]).dropna()
-    df_for_test = df_melt.drop(columns="value")
-    duplicates = df_for_test[df_for_test.duplicated()]
-    if duplicates.empty:
-        return df_melt.pivot(index=participant_col, columns="variable")["value"]
-    else:
-        error_msg = "There are duplicates in your data "
-        for participant_id, variable in duplicates.values:
-            error_msg += f"\n{participant_id}: {variable}"
-        raise Exception(error_msg)
+from collections import UserDict
+from pandas import DataFrame
+from typing import Any, Dict, List
+class UnchangeableDict(UserDict):
+    """A dictionary in which you can add new keys but not modify them in the future."""
+    def __setitem__(self, key: Any, item: Any) -> None:
+        try:
+            self.__getitem__(key)
+            raise ValueError("duplicate key '{}' found".format(key))
+        except KeyError:
+            return super().__setitem__(key, item)
+def merge_participants_to_df(
+    data_participants: List[Dict[Any, Any]],
+    participant_col: str,
+) -> DataFrame:
+    # Check if all data have a `participant_col` key
+    participant_col_checks = [_.get(participant_col) for _ in data_participants]
+    if not all(participant_col_checks):
+        raise Exception(
+            f"One of the samples does not contain the '{participant_col}' information."
+        )
+    # Check if there are no duplicates in the data
+    df_raw = DataFrame.from_dict(data_participants)
+    df_melt = df_raw.melt(id_vars=[participant_col]).dropna()
+    df_for_test = df_melt.drop(columns="value")
+    duplicates = df_for_test[df_for_test.duplicated()]
+    if duplicates.empty:
+        return df_melt.pivot(index=participant_col, columns="variable")["value"]
+    else:
+        error_msg = "There are duplicates in your data "
+        for participant_id, variable in duplicates.values:
+            error_msg += f"\n{participant_id}: {variable}"
+        raise Exception(error_msg)

lingualabpy 0.0.6__py3-none-any.whl → 0.1.1__py3-none-any.whl

lingualabpy 0.0.6py3-none-any.whl → 0.1.1py3-none-any.whl