lingualabpy 0.0.5__py3-none-any.whl → 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
lingualabpy/io.py CHANGED
@@ -1,49 +1,49 @@
1
- """
2
- Module contains tools for processing files
3
- """
4
-
5
- import json
6
- from docx import Document
7
- from textgrids import TextGrid
8
- from pydub import AudioSegment
9
-
10
- from typing import Union
11
-
12
-
13
- # audio files
14
- def read_audio(sound_path: str) -> AudioSegment:
15
- """"""
16
- return AudioSegment.from_file(sound_path)
17
-
18
-
19
- # .docx files
20
- def read_docx(docx_path: str) -> Document:
21
- """"""
22
- return Document(docx_path)
23
-
24
-
25
- # .json files
26
- def read_json(json_path: str) -> Union[list, dict]:
27
- """"""
28
- with open(json_path, "r") as file:
29
- content = json.load(file)
30
- return content
31
-
32
-
33
- def write_json(data: Union[list, dict], json_path: str) -> None:
34
- """"""
35
- with open(json_path, "w") as file:
36
- json.dump(data, file, indent=4)
37
-
38
-
39
- # .TextGrid files
40
- def read_textgrid(textgrid_path: str) -> TextGrid:
41
- """"""
42
- textgrid = TextGrid(textgrid_path)
43
- # Cleaning of the interval text
44
- for intervals in textgrid.values():
45
- for interval in intervals:
46
- interval.text = (
47
- interval.text.encode().decode("unicode_escape").strip(" \n\r\t")
48
- )
49
- return textgrid
1
+ """
2
+ Module contains tools for processing files
3
+ """
4
+
5
+ import json
6
+ from docx import Document
7
+ from textgrids import TextGrid
8
+ from pydub import AudioSegment
9
+
10
+ from typing import Union
11
+
12
+
13
+ # audio files
14
+ def read_audio(sound_path: str) -> AudioSegment:
15
+ """"""
16
+ return AudioSegment.from_file(sound_path)
17
+
18
+
19
+ # .docx files
20
+ def read_docx(docx_path: str) -> Document:
21
+ """"""
22
+ return Document(docx_path)
23
+
24
+
25
+ # .json files
26
+ def read_json(json_path: str) -> Union[list, dict]:
27
+ """"""
28
+ with open(json_path, "r") as file:
29
+ content = json.load(file)
30
+ return content
31
+
32
+
33
+ def write_json(data: Union[list, dict], json_path: str) -> None:
34
+ """"""
35
+ with open(json_path, "w") as file:
36
+ json.dump(data, file, indent=4)
37
+
38
+
39
+ # .TextGrid files
40
+ def read_textgrid(textgrid_path: str) -> TextGrid:
41
+ """"""
42
+ textgrid = TextGrid(textgrid_path)
43
+ # Cleaning of the interval text
44
+ for intervals in textgrid.values():
45
+ for interval in intervals:
46
+ interval.text = (
47
+ interval.text.encode().decode("unicode_escape").strip(" \n\r\t")
48
+ )
49
+ return textgrid
File without changes
@@ -0,0 +1,143 @@
1
+ """
2
+ This module processes resting-state fMRI data from the HCP-Young-Adult-2025 release.
3
+ It extracts timeseries from brain regions using an atlas, computes the connectome,
4
+ and saves the results along with the masker report.
5
+ """
6
+
7
+ import re
8
+ import click
9
+ import numpy as np
10
+ import pandas as pd
11
+ from pathlib import Path
12
+
13
+ from nilearn.maskers import NiftiLabelsMasker
14
+ from nilearn.connectome import ConnectivityMeasure
15
+
16
+
17
+ class Connectome:
18
+
19
+ path = None
20
+ brainmask = None
21
+ output_folder = None
22
+ timeseries = None
23
+ report = None
24
+ relmat = None
25
+ relmat_z = None
26
+
27
+ def make_output_folder(self):
28
+ self.output_folder.mkdir(parents=True, exist_ok=True)
29
+
30
+ def save_timeseries(self, timeseries):
31
+ pd.DataFrame(timeseries).to_csv(self.timeseries, sep="\t", index=False)
32
+
33
+ def save_report(self, masker):
34
+ masker.generate_report().save_as_html(self.report)
35
+
36
+ def save_connectome(self, connectome):
37
+ pd.DataFrame(connectome).to_csv(self.relmat, sep="\t", index=False)
38
+
39
+ def save_connectome_fisher_z(self, connectome):
40
+ pd.DataFrame(connectome).to_csv(self.relmat_z, sep="\t", index=False)
41
+
42
+
43
+ class ConnectomeHcp2025(Connectome):
44
+
45
+ HCP_2025_PATTERN = re.compile(
46
+ r"^.*/(?P<participant_id>[0-9]{6})/MNINonLinear/Results/"
47
+ r"rfMRI_REST(?P<run>[12])_(?P<pe>LR|RL)/"
48
+ r"rfMRI_REST(?P=run)_(?P=pe)_hp2000_clean_rclean_tclean\.nii\.gz$"
49
+ )
50
+
51
+ def __init__(self, path: Path, output: Path):
52
+ # Check if the path is from the HCP-Young-Adult-2025 release
53
+ hcp_match = self.HCP_2025_PATTERN.match(path.as_posix())
54
+ if not hcp_match:
55
+ raise ValueError(
56
+ f"Invalid HCP-Young-Adult-2025 rs-fMRI path:\n {path}\n"
57
+ "Expected: <participant_id>/MNINonLinear/Results/rfMRI_REST{1,2}_{LR,RL}/rfMRI_REST{1,2}_{LR,RL}_hp2000_clean_rclean_tclean.nii.gz"
58
+ )
59
+
60
+ # Helper variables to build filenames
61
+ output = Path(output)
62
+ pid = f"sub-{hcp_match.group('participant_id')}"
63
+ run = f"run-{hcp_match.group('pe')}{hcp_match.group('run')}"
64
+ basename = f"{pid}_task-rest_{run}_seg-SENSAAS"
65
+
66
+ # HCP-Young-Adult-2025 input
67
+ self.path = path
68
+ self.brainmask = path.parent / "brainmask_fs.2.nii.gz"
69
+
70
+ # Define output filenames
71
+ self.output_folder = output / pid / "func"
72
+ self.timeseries = self.output_folder / f"{basename}_timeseries.tsv"
73
+ self.report = self.output_folder / f"{basename}_report.html"
74
+ self.relmat = (
75
+ self.output_folder / f"{basename}_meas-PearsonCorrelation_relmat.tsv"
76
+ )
77
+ self.relmat_z = self.output_folder / f"{basename}_meas-FisherZ_relmat.tsv"
78
+
79
+
80
+ @click.command()
81
+ @click.option(
82
+ "--output", type=click.Path(), default="results", help="Directory to save outputs"
83
+ )
84
+ @click.option(
85
+ "--smoothing_fwhm",
86
+ type=float,
87
+ default=5.0,
88
+ help="full-width at half maximum in millimeters of the spatial smoothing to apply to the signal",
89
+ )
90
+ @click.argument("atlas_path", nargs=1, type=click.Path(exists=True))
91
+ @click.argument("lut_path", nargs=1, type=click.Path(exists=True))
92
+ @click.argument("rs_path", nargs=1, type=click.Path(exists=True))
93
+ def main(atlas_path, lut_path, rs_path, output, smoothing_fwhm):
94
+ """Process resting-state fMRI from the HCP-Young-Adult-2025 release to extract connectome.
95
+
96
+ 1. Validates input resting-state fMRI data structure
97
+
98
+ 2. Extracts timeseries using an atlas
99
+
100
+ 3. Computes Pearson correlations and fisher_z connectomes
101
+
102
+ 4. Saves timeseries, connectomes, and visualization report
103
+
104
+ Args:
105
+
106
+ atlas_path (str): Path to atlas NIfTI file defining brain regions
107
+
108
+ lut_path (str): Path to lookup table file for atlas labels
109
+
110
+ rs_path (str): Path to resting-state fMRI NIfTI file
111
+
112
+ output (str): Path to save results
113
+
114
+ smoothing_fwhm (float): full-width at half maximum in millimeters of the spatial smoothing to apply to the signal
115
+ """
116
+ resting_state = ConnectomeHcp2025(path=Path(rs_path), output=Path(output))
117
+
118
+ atlas_masker = NiftiLabelsMasker(
119
+ labels_img=atlas_path,
120
+ lut=lut_path,
121
+ mask_img=resting_state.brainmask,
122
+ smoothing_fwhm=smoothing_fwhm,
123
+ standardize="zscore_sample",
124
+ t_r=0.72,
125
+ )
126
+
127
+ correlation_measure = ConnectivityMeasure(
128
+ kind="correlation",
129
+ standardize=False,
130
+ vectorize=False,
131
+ )
132
+
133
+ # Extract timeseries and connectomes
134
+ timeseries = atlas_masker.fit_transform(resting_state.path)
135
+ connectome = correlation_measure.fit_transform([timeseries])[0]
136
+ connectome_fisher_z = np.arctanh(np.clip(connectome, -0.999999, 0.999999))
137
+
138
+ # Save results
139
+ resting_state.make_output_folder()
140
+ resting_state.save_timeseries(timeseries)
141
+ resting_state.save_report(atlas_masker)
142
+ resting_state.save_connectome(connectome)
143
+ resting_state.save_connectome_fisher_z(connectome_fisher_z)
lingualabpy/plot.py ADDED
@@ -0,0 +1,23 @@
1
+ import numpy as np
2
+ import matplotlib.pyplot as plt
3
+
4
+
5
+ def draw_spectrogram(spectrogram, dynamic_range=70):
6
+ X, Y = spectrogram.x_grid(), spectrogram.y_grid()
7
+ sg_db = 10 * np.log10(spectrogram.values)
8
+ plt.pcolormesh(X, Y, sg_db, vmin=sg_db.max() - dynamic_range, cmap="afmhot")
9
+ plt.ylim([spectrogram.ymin, spectrogram.ymax])
10
+ plt.xlabel("time [s]")
11
+ plt.ylabel("frequency [Hz]")
12
+
13
+
14
+ def draw_pitch(pitch):
15
+ # Extract selected pitch contour, and
16
+ # replace unvoiced samples by NaN to not plot
17
+ pitch_values = pitch.selected_array["frequency"]
18
+ pitch_values[pitch_values == 0] = np.nan
19
+ plt.plot(pitch.xs(), pitch_values, "o", markersize=5, color="w")
20
+ plt.plot(pitch.xs(), pitch_values, "o", markersize=2)
21
+ plt.grid(False)
22
+ plt.ylim(0, pitch.ceiling)
23
+ plt.ylabel("fundamental frequency [Hz]")
@@ -1,35 +1,35 @@
1
- import re
2
- from docx import Document
3
- from collections import defaultdict
4
-
5
-
6
- def parse_waywithwords(document: Document) -> dict:
7
- """"""
8
- waywithwords = {
9
- "IV": "interviewer",
10
- "IE": "interviewee",
11
- }
12
-
13
- results = defaultdict(list)
14
-
15
- for para in document.paragraphs:
16
- try:
17
- content = para.text.split()
18
- speaker = content[0]
19
- transcription = " ".join(content[1:])
20
- except:
21
- speaker = None
22
-
23
- if (
24
- speaker in waywithwords.keys()
25
- and not transcription.lower() in waywithwords.values()
26
- ):
27
- results[waywithwords[speaker]].append(transcription)
28
-
29
- elif re.findall(r"[0-9][0-9]:[0-5][0-9]:[0-5][0-9]", para.text):
30
- results["time"].append(para.text)
31
-
32
- else:
33
- results["remainder"].append(para.text)
34
-
35
- return results
1
+ import re
2
+ from docx import Document
3
+ from collections import defaultdict
4
+
5
+
6
+ def parse_waywithwords(document: Document) -> dict:
7
+ """"""
8
+ waywithwords = {
9
+ "IV": "interviewer",
10
+ "IE": "interviewee",
11
+ }
12
+
13
+ results = defaultdict(list)
14
+
15
+ for para in document.paragraphs:
16
+ try:
17
+ content = para.text.split()
18
+ speaker = content[0]
19
+ transcription = " ".join(content[1:])
20
+ except:
21
+ speaker = None
22
+
23
+ if (
24
+ speaker in waywithwords.keys()
25
+ and not transcription.lower() in waywithwords.values()
26
+ ):
27
+ results[waywithwords[speaker]].append(transcription)
28
+
29
+ elif re.findall(r"[0-9][0-9]:[0-5][0-9]:[0-5][0-9]", para.text):
30
+ results["time"].append(para.text)
31
+
32
+ else:
33
+ results["remainder"].append(para.text)
34
+
35
+ return results
@@ -1,41 +1,41 @@
1
- import re
2
- from textgrids import TextGrid, Interval
3
- import warnings
4
-
5
-
6
- def extract_intervals(textgrid: TextGrid, speakers: list[str]) -> list[list[Interval]]:
7
- """"""
8
- # Check if speakers are in the textgrid tiers
9
- tiers = set(textgrid.keys())
10
- if not set(speakers).issubset(tiers):
11
- raise ValueError(
12
- f"Some speaker(s) '{speakers}' are not a tier in the TextGrid '{tiers}'"
13
- )
14
-
15
- # Check if there is other speaker in the textgrid
16
- if not set(speakers) == tiers:
17
- warnings.warn(
18
- f"TextGrid '{tiers}' have more speakers than specify '{speakers}'"
19
- )
20
-
21
- # Extraction of intervals with text value
22
- speakers_intervals = []
23
- for speaker in speakers:
24
- speaker_intervals = []
25
- for interval in textgrid[speaker]:
26
- if interval.text:
27
- speaker_intervals.append(interval)
28
- speakers_intervals.append(speaker_intervals)
29
-
30
- # Checking if all intervals are correctly labeled
31
- def interval_qc(intervals, label):
32
- labels = set([_.text for _ in intervals])
33
- if not (len(labels) == 1 and labels.pop() == label):
34
- raise Exception(
35
- f"TextGrid was not labeled correctly, current label(s) '{labels}', should be '{label}'."
36
- )
37
-
38
- for intervals, speaker in zip(speakers_intervals, speakers):
39
- interval_qc(intervals, speaker)
40
-
41
- return speakers_intervals
1
+ import re
2
+ from textgrids import TextGrid, Interval
3
+ import warnings
4
+
5
+
6
+ def extract_intervals(textgrid: TextGrid, speakers: list[str]) -> list[list[Interval]]:
7
+ """"""
8
+ # Check if speakers are in the textgrid tiers
9
+ tiers = set(textgrid.keys())
10
+ if not set(speakers).issubset(tiers):
11
+ raise ValueError(
12
+ f"Some speaker(s) '{speakers}' are not a tier in the TextGrid '{tiers}'"
13
+ )
14
+
15
+ # Check if there is other speaker in the textgrid
16
+ if not set(speakers) == tiers:
17
+ warnings.warn(
18
+ f"TextGrid '{tiers}' have more speakers than specify '{speakers}'"
19
+ )
20
+
21
+ # Extraction of intervals with text value
22
+ speakers_intervals = []
23
+ for speaker in speakers:
24
+ speaker_intervals = []
25
+ for interval in textgrid[speaker]:
26
+ if interval.text:
27
+ speaker_intervals.append(interval)
28
+ speakers_intervals.append(speaker_intervals)
29
+
30
+ # Checking if all intervals are correctly labeled
31
+ def interval_qc(intervals, label):
32
+ labels = set([_.text for _ in intervals])
33
+ if not (len(labels) == 1 and labels.pop() == label):
34
+ raise Exception(
35
+ f"TextGrid was not labeled correctly, current label(s) '{labels}', should be '{label}'."
36
+ )
37
+
38
+ for intervals, speaker in zip(speakers_intervals, speakers):
39
+ interval_qc(intervals, speaker)
40
+
41
+ return speakers_intervals
lingualabpy/tools/data.py CHANGED
@@ -1,41 +1,41 @@
1
- from collections import UserDict
2
- from pandas import DataFrame
3
-
4
- from typing import Any, Dict, List
5
-
6
-
7
- class UnchangeableDict(UserDict):
8
- """A dictionary in which you can add new keys but not modify them in the future."""
9
-
10
- def __setitem__(self, key: Any, item: Any) -> None:
11
- try:
12
- self.__getitem__(key)
13
- raise ValueError("duplicate key '{}' found".format(key))
14
- except KeyError:
15
- return super().__setitem__(key, item)
16
-
17
-
18
- def merge_participants_to_df(
19
- data_participants: List[Dict[Any, Any]],
20
- participant_col: str,
21
- ) -> DataFrame:
22
- # Check if all data have a `participant_col` key
23
- participant_col_checks = [_.get(participant_col) for _ in data_participants]
24
- if not all(participant_col_checks):
25
- raise Exception(
26
- f"One of the samples does not contain the '{participant_col}' information."
27
- )
28
-
29
- # Check if there are no duplicates in the data
30
- df_raw = DataFrame.from_dict(data_participants)
31
- df_melt = df_raw.melt(id_vars=[participant_col]).dropna()
32
- df_for_test = df_melt.drop(columns="value")
33
- duplicates = df_for_test[df_for_test.duplicated()]
34
-
35
- if duplicates.empty:
36
- return df_melt.pivot(index=participant_col, columns="variable")["value"]
37
- else:
38
- error_msg = "There are duplicates in your data "
39
- for participant_id, variable in duplicates.values:
40
- error_msg += f"\n{participant_id}: {variable}"
41
- raise Exception(error_msg)
1
+ from collections import UserDict
2
+ from pandas import DataFrame
3
+
4
+ from typing import Any, Dict, List
5
+
6
+
7
+ class UnchangeableDict(UserDict):
8
+ """A dictionary in which you can add new keys but not modify them in the future."""
9
+
10
+ def __setitem__(self, key: Any, item: Any) -> None:
11
+ try:
12
+ self.__getitem__(key)
13
+ raise ValueError("duplicate key '{}' found".format(key))
14
+ except KeyError:
15
+ return super().__setitem__(key, item)
16
+
17
+
18
+ def merge_participants_to_df(
19
+ data_participants: List[Dict[Any, Any]],
20
+ participant_col: str,
21
+ ) -> DataFrame:
22
+ # Check if all data have a `participant_col` key
23
+ participant_col_checks = [_.get(participant_col) for _ in data_participants]
24
+ if not all(participant_col_checks):
25
+ raise Exception(
26
+ f"One of the samples does not contain the '{participant_col}' information."
27
+ )
28
+
29
+ # Check if there are no duplicates in the data
30
+ df_raw = DataFrame.from_dict(data_participants)
31
+ df_melt = df_raw.melt(id_vars=[participant_col]).dropna()
32
+ df_for_test = df_melt.drop(columns="value")
33
+ duplicates = df_for_test[df_for_test.duplicated()]
34
+
35
+ if duplicates.empty:
36
+ return df_melt.pivot(index=participant_col, columns="variable")["value"]
37
+ else:
38
+ error_msg = "There are duplicates in your data "
39
+ for participant_id, variable in duplicates.values:
40
+ error_msg += f"\n{participant_id}: {variable}"
41
+ raise Exception(error_msg)
@@ -1,59 +1,59 @@
1
- from textgrids import Interval
2
-
3
-
4
- def interval_to_list(interval: Interval) -> list[float]:
5
- """"""
6
- return [interval.xmin, interval.xmax]
7
-
8
-
9
- def is_overlap(interval0: Interval, interval1: Interval) -> bool:
10
- """Check if two intervals overlap"""
11
- return interval0.xmin <= interval1.xmax and interval1.xmin <= interval0.xmax
12
-
13
-
14
- def remove_overlap(interval: Interval, interval_to_remove: Interval) -> list[Interval]:
15
- """"""
16
- # Return interval as a list if there is no overlap
17
- if not is_overlap(interval, interval_to_remove):
18
- return [interval]
19
-
20
- else:
21
- updated_intervals = []
22
-
23
- # If the start of the interval is before the start of the interval to be removed,
24
- # add the non-overlapping part to the result.
25
- if interval.xmin < interval_to_remove.xmin:
26
- updated_intervals.append(
27
- Interval(xmin=interval.xmin, xmax=interval_to_remove.xmin)
28
- )
29
-
30
- # If the end of the interval is after the end of the interval to be removed,
31
- # add the non-overlapping part to the result.
32
- if interval.xmax > interval_to_remove.xmax:
33
- updated_intervals.append(
34
- Interval(xmin=interval_to_remove.xmax, xmax=interval.xmax)
35
- )
36
-
37
- return updated_intervals
38
-
39
-
40
- def intervals_masking(
41
- intervals: list[Interval], intervals_mask: list[Interval]
42
- ) -> list[list[float]]:
43
- """"""
44
- # Each intervals mask will be remove from all the intervals
45
- for interval_to_remove in intervals_mask:
46
- new_intervals = []
47
- for interval in intervals:
48
-
49
- # if the start of the interval is after the end of the mask
50
- # we can just add the interval they are sorted
51
- if interval.xmin > interval_to_remove.xmax:
52
- new_intervals.append(interval)
53
-
54
- else:
55
- new_intervals += remove_overlap(interval, interval_to_remove)
56
-
57
- intervals = new_intervals
58
-
59
- return [interval_to_list(_) for _ in intervals]
1
+ from textgrids import Interval
2
+
3
+
4
+ def interval_to_list(interval: Interval) -> list[float]:
5
+ """"""
6
+ return [interval.xmin, interval.xmax]
7
+
8
+
9
+ def is_overlap(interval0: Interval, interval1: Interval) -> bool:
10
+ """Check if two intervals overlap"""
11
+ return interval0.xmin <= interval1.xmax and interval1.xmin <= interval0.xmax
12
+
13
+
14
+ def remove_overlap(interval: Interval, interval_to_remove: Interval) -> list[Interval]:
15
+ """"""
16
+ # Return interval as a list if there is no overlap
17
+ if not is_overlap(interval, interval_to_remove):
18
+ return [interval]
19
+
20
+ else:
21
+ updated_intervals = []
22
+
23
+ # If the start of the interval is before the start of the interval to be removed,
24
+ # add the non-overlapping part to the result.
25
+ if interval.xmin < interval_to_remove.xmin:
26
+ updated_intervals.append(
27
+ Interval(xmin=interval.xmin, xmax=interval_to_remove.xmin)
28
+ )
29
+
30
+ # If the end of the interval is after the end of the interval to be removed,
31
+ # add the non-overlapping part to the result.
32
+ if interval.xmax > interval_to_remove.xmax:
33
+ updated_intervals.append(
34
+ Interval(xmin=interval_to_remove.xmax, xmax=interval.xmax)
35
+ )
36
+
37
+ return updated_intervals
38
+
39
+
40
+ def intervals_masking(
41
+ intervals: list[Interval], intervals_mask: list[Interval]
42
+ ) -> list[list[float]]:
43
+ """"""
44
+ # Each intervals mask will be remove from all the intervals
45
+ for interval_to_remove in intervals_mask:
46
+ new_intervals = []
47
+ for interval in intervals:
48
+
49
+ # if the start of the interval is after the end of the mask
50
+ # we can just add the interval they are sorted
51
+ if interval.xmin > interval_to_remove.xmax:
52
+ new_intervals.append(interval)
53
+
54
+ else:
55
+ new_intervals += remove_overlap(interval, interval_to_remove)
56
+
57
+ intervals = new_intervals
58
+
59
+ return [interval_to_list(_) for _ in intervals]