lingualabpy 0.0.2__py3-none-any.whl → 0.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lingualabpy/__init__.py +1 -1
- lingualabpy/cli/jsons2csv.py +1 -1
- lingualabpy/text/textgrid.py +26 -2
- lingualabpy/tools/data.py +3 -3
- {lingualabpy-0.0.2.dist-info → lingualabpy-0.0.3.dist-info}/METADATA +6 -6
- {lingualabpy-0.0.2.dist-info → lingualabpy-0.0.3.dist-info}/RECORD +9 -9
- {lingualabpy-0.0.2.dist-info → lingualabpy-0.0.3.dist-info}/LICENSE +0 -0
- {lingualabpy-0.0.2.dist-info → lingualabpy-0.0.3.dist-info}/WHEEL +0 -0
- {lingualabpy-0.0.2.dist-info → lingualabpy-0.0.3.dist-info}/entry_points.txt +0 -0
lingualabpy/__init__.py
CHANGED
lingualabpy/cli/jsons2csv.py
CHANGED
lingualabpy/text/textgrid.py
CHANGED
|
@@ -1,11 +1,35 @@
|
|
|
1
|
+
import re
|
|
1
2
|
from textgrids import TextGrid, Interval
|
|
3
|
+
import warnings
|
|
2
4
|
|
|
3
5
|
|
|
4
|
-
def extract_intervals(
|
|
6
|
+
def extract_intervals(textgrid: TextGrid, speakers: list[str]) -> list[list[Interval]]:
|
|
5
7
|
""""""
|
|
8
|
+
# Check if speakers are in the textgrid tiers
|
|
9
|
+
tiers = set(textgrid.keys())
|
|
10
|
+
if not set(speakers).issubset(tiers):
|
|
11
|
+
raise ValueError(
|
|
12
|
+
f"Some speaker(s) '{speakers}' are not a tier in the TextGrid '{tiers}'"
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
# Check if there is other speaker in the textgrid
|
|
16
|
+
if not set(speakers) == tiers:
|
|
17
|
+
warnings.warn(
|
|
18
|
+
f"TextGrid '{tiers}' have more speakers than specify '{speakers}'"
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
# Extraction of intervals with text value
|
|
6
22
|
speakers_intervals = []
|
|
7
23
|
for speaker in speakers:
|
|
8
|
-
|
|
24
|
+
speaker_intervals = []
|
|
25
|
+
for interval in textgrid[speaker]:
|
|
26
|
+
# Cleaning of the interval text
|
|
27
|
+
interval.text = (
|
|
28
|
+
interval.text.encode().decode("unicode_escape").strip(" \n\r\t")
|
|
29
|
+
)
|
|
30
|
+
if interval.text:
|
|
31
|
+
speaker_intervals.append(interval)
|
|
32
|
+
speakers_intervals.append(speaker_intervals)
|
|
9
33
|
|
|
10
34
|
# Checking if all intervals are correctly labeled
|
|
11
35
|
def interval_qc(intervals, label):
|
lingualabpy/tools/data.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
|
|
1
|
+
from pandas import DataFrame
|
|
2
2
|
|
|
3
3
|
from typing import Any, Dict, List
|
|
4
4
|
|
|
@@ -6,7 +6,7 @@ from typing import Any, Dict, List
|
|
|
6
6
|
def merge_participants_to_df(
|
|
7
7
|
data_participants: List[Dict[Any, Any]],
|
|
8
8
|
participant_col: str,
|
|
9
|
-
) ->
|
|
9
|
+
) -> DataFrame:
|
|
10
10
|
# Check if all data have a `participant_col` key
|
|
11
11
|
participant_col_checks = [_.get(participant_col) for _ in data_participants]
|
|
12
12
|
if not all(participant_col_checks):
|
|
@@ -15,7 +15,7 @@ def merge_participants_to_df(
|
|
|
15
15
|
)
|
|
16
16
|
|
|
17
17
|
# Check if there are no duplicates in the data
|
|
18
|
-
df_raw =
|
|
18
|
+
df_raw = DataFrame.from_dict(data_participants)
|
|
19
19
|
df_melt = df_raw.melt(id_vars=[participant_col]).dropna()
|
|
20
20
|
df_for_test = df_melt.drop(columns="value")
|
|
21
21
|
duplicates = df_for_test[df_for_test.duplicated()]
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: lingualabpy
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.3
|
|
4
4
|
Summary: Tools and utilities from the LINGUA laboratory
|
|
5
5
|
Author-email: Christophe Bedetti <christophe.bedetti@umontreal.ca>
|
|
6
6
|
Requires-Python: >=3.8.1
|
|
@@ -24,18 +24,18 @@ Requires-Dist: praat-parselmouth
|
|
|
24
24
|
Requires-Dist: praat-textgrids
|
|
25
25
|
Requires-Dist: pydub
|
|
26
26
|
Requires-Dist: python-docx
|
|
27
|
-
Requires-Dist: lingualabpy[
|
|
27
|
+
Requires-Dist: lingualabpy[test, doc, lint, feature] ; extra == "dev"
|
|
28
28
|
Requires-Dist: black ; extra == "lint"
|
|
29
|
-
Requires-Dist: pytest ; extra == "
|
|
30
|
-
Requires-Dist: pytest-cov ; extra == "
|
|
29
|
+
Requires-Dist: pytest ; extra == "test"
|
|
30
|
+
Requires-Dist: pytest-cov ; extra == "test"
|
|
31
31
|
Project-URL: Documentation, https://github.com/lingualab/lingualabpy
|
|
32
32
|
Project-URL: Source, https://github.com/lingualab/lingualabpy
|
|
33
33
|
Project-URL: Tracker, https://github.com/lingualab/lingualabpy/issues
|
|
34
34
|
Provides-Extra: dev
|
|
35
|
-
Provides-Extra:
|
|
35
|
+
Provides-Extra: doc
|
|
36
36
|
Provides-Extra: feature
|
|
37
37
|
Provides-Extra: lint
|
|
38
|
-
Provides-Extra:
|
|
38
|
+
Provides-Extra: test
|
|
39
39
|
|
|
40
40
|
# lingualabpy
|
|
41
41
|
|
|
@@ -1,19 +1,19 @@
|
|
|
1
|
-
lingualabpy/__init__.py,sha256=
|
|
1
|
+
lingualabpy/__init__.py,sha256=Klpz9mrtYXzZ3eSXg7ciwak9mAkgKVC5G1w0uaMn7Q8,515
|
|
2
2
|
lingualabpy/io.py,sha256=TF8eSuX_xfGWWbQ1C0TLnia7HS1Vexn0RqKMvCzHGnE,878
|
|
3
3
|
lingualabpy/audio/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
4
|
lingualabpy/audio/triming.py,sha256=6CY9pH43KFGAPj8Nw34y1YnlOb8gxGLU1btcuRy-Hgc,288
|
|
5
5
|
lingualabpy/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
6
6
|
lingualabpy/cli/audio_triming.py,sha256=pAsLv2IuAKLoj8jBHB-SR5mZ7Jb0w26m41-Cya4VvoU,1194
|
|
7
7
|
lingualabpy/cli/docx2json.py,sha256=Bj5f89B76NtA7Xx71xXGnSucrDEyaH9mUFifQo0wfn4,590
|
|
8
|
-
lingualabpy/cli/jsons2csv.py,sha256=
|
|
8
|
+
lingualabpy/cli/jsons2csv.py,sha256=_AcIXiQUCF5SsKqMg6WjTr8fhbuflaJNFrCP91ccSYs,596
|
|
9
9
|
lingualabpy/text/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
10
10
|
lingualabpy/text/parser.py,sha256=qZqhzi-6UHdbsXEWi5IMxsDK5Tsosb3pdSo67hcA6To,913
|
|
11
|
-
lingualabpy/text/textgrid.py,sha256=
|
|
11
|
+
lingualabpy/text/textgrid.py,sha256=LXdDAY4aEl3Q998Uq28fz0gryFj3KWq1j0RsuWOlEC0,1632
|
|
12
12
|
lingualabpy/tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
13
|
-
lingualabpy/tools/data.py,sha256=
|
|
13
|
+
lingualabpy/tools/data.py,sha256=FTjxbckza65vZ_MWEO5wi4mDXpJ2u9KkiEA3-HGfOt8,1106
|
|
14
14
|
lingualabpy/tools/interval.py,sha256=50lzbMTNHF26mPRG50mykCUQE3pdyRjPWMwsskwy0tg,2060
|
|
15
|
-
lingualabpy-0.0.
|
|
16
|
-
lingualabpy-0.0.
|
|
17
|
-
lingualabpy-0.0.
|
|
18
|
-
lingualabpy-0.0.
|
|
19
|
-
lingualabpy-0.0.
|
|
15
|
+
lingualabpy-0.0.3.dist-info/entry_points.txt,sha256=QvnRy1hJXRGGbVQgS-u--5Rgs7rPBmgWC9K1iaxS5gQ,186
|
|
16
|
+
lingualabpy-0.0.3.dist-info/LICENSE,sha256=s3hbMsmwGq2XFcxpMD3oHc8GSUeXAmPVXJbn7SYXdos,1095
|
|
17
|
+
lingualabpy-0.0.3.dist-info/WHEEL,sha256=EZbGkh7Ie4PoZfRQ8I0ZuP9VklN_TvcZ6DSE5Uar4z4,81
|
|
18
|
+
lingualabpy-0.0.3.dist-info/METADATA,sha256=JI1jTk5UA5CeLCcLO6HCC157WmeUW0aUTm0hKZaEXm8,1703
|
|
19
|
+
lingualabpy-0.0.3.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|