pelican-nlp 0.1.0__py3-none-any.whl → 0.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pelican_nlp/Nils_backup/__init__.py +0 -0
- pelican_nlp/Nils_backup/extract_acoustic_features.py +274 -0
- pelican_nlp/Nils_backup/fluency/__init__.py +0 -0
- pelican_nlp/Nils_backup/fluency/aggregate_fluency_results.py +186 -0
- pelican_nlp/Nils_backup/fluency/behavioral_data.py +42 -0
- pelican_nlp/Nils_backup/fluency/check_duplicates.py +169 -0
- pelican_nlp/Nils_backup/fluency/coherence.py +653 -0
- pelican_nlp/Nils_backup/fluency/config.py +231 -0
- pelican_nlp/Nils_backup/fluency/main.py +182 -0
- pelican_nlp/Nils_backup/fluency/optimality_without_tsa.py +466 -0
- pelican_nlp/Nils_backup/fluency/plot_fluency.py +573 -0
- pelican_nlp/Nils_backup/fluency/plotting_utils.py +170 -0
- pelican_nlp/Nils_backup/fluency/questionnaires_data.py +43 -0
- pelican_nlp/Nils_backup/fluency/stats_fluency.py +930 -0
- pelican_nlp/Nils_backup/fluency/utils.py +41 -0
- pelican_nlp/Nils_backup/speaker_diarization_Nils.py +328 -0
- pelican_nlp/Nils_backup/transcription/__init__.py +0 -0
- pelican_nlp/Nils_backup/transcription/annotation_tool.py +1001 -0
- pelican_nlp/Nils_backup/transcription/annotation_tool_boundaries.py +1122 -0
- pelican_nlp/Nils_backup/transcription/annotation_tool_sandbox.py +985 -0
- pelican_nlp/Nils_backup/transcription/output/holmes_control_nova_all_outputs.json +7948 -0
- pelican_nlp/Nils_backup/transcription/test.json +1 -0
- pelican_nlp/Nils_backup/transcription/transcribe_audio.py +314 -0
- pelican_nlp/Nils_backup/transcription/transcribe_audio_chunked.py +695 -0
- pelican_nlp/Nils_backup/transcription/transcription.py +801 -0
- pelican_nlp/Nils_backup/transcription/transcription_gui.py +955 -0
- pelican_nlp/Nils_backup/transcription/word_boundaries.py +190 -0
- pelican_nlp/Silvia_files/Opensmile/opensmile_feature_extraction.py +66 -0
- pelican_nlp/Silvia_files/prosogram/prosogram.py +104 -0
- pelican_nlp/__init__.py +1 -1
- pelican_nlp/_version.py +1 -0
- pelican_nlp/configuration_files/config_audio.yml +150 -0
- pelican_nlp/configuration_files/config_discourse.yml +104 -0
- pelican_nlp/configuration_files/config_fluency.yml +108 -0
- pelican_nlp/configuration_files/config_general.yml +131 -0
- pelican_nlp/configuration_files/config_morteza.yml +103 -0
- pelican_nlp/praat/__init__.py +29 -0
- {pelican_nlp-0.1.0.dist-info → pelican_nlp-0.1.2.dist-info}/METADATA +14 -21
- pelican_nlp-0.1.2.dist-info/RECORD +75 -0
- pelican_nlp-0.1.0.dist-info/RECORD +0 -39
- {pelican_nlp-0.1.0.dist-info → pelican_nlp-0.1.2.dist-info}/WHEEL +0 -0
- {pelican_nlp-0.1.0.dist-info → pelican_nlp-0.1.2.dist-info}/licenses/LICENSE +0 -0
- {pelican_nlp-0.1.0.dist-info → pelican_nlp-0.1.2.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,108 @@
|
|
1
|
+
# Configuration file for fluency task
|
2
|
+
# =======================================
|
3
|
+
input_file: "text" #or 'audio'
|
4
|
+
fluency_task: &fluency_flag true
|
5
|
+
#========================================
|
6
|
+
|
7
|
+
#general configurations; always adapt
|
8
|
+
PATH_TO_PROJECT_FOLDER: "/home/yvespauli/PycharmProjects/FluencyTest"
|
9
|
+
language: "german"
|
10
|
+
multiple_sessions: &session_flag false
|
11
|
+
|
12
|
+
corpus_names: #names of fluency tasks (e.g. "animals", "clothes")
|
13
|
+
- "animals"
|
14
|
+
- "clothes"
|
15
|
+
- "food"
|
16
|
+
|
17
|
+
#Specify linguistic metrics to extract
|
18
|
+
metric_to_extract: 'embeddings' #Possible options: 'embeddings', 'logits'
|
19
|
+
output_document_information: true
|
20
|
+
#====================================================================
|
21
|
+
|
22
|
+
#Optional configurations; Change with preference. However, default settings recommended
|
23
|
+
cleaning_options:
|
24
|
+
general_cleaning: true
|
25
|
+
#Options for fluency tasks
|
26
|
+
fluency_task: *fluency_flag
|
27
|
+
word_splitter: ';' #default split with ',' add different word_splitter if necessary
|
28
|
+
remove_hyphens: true
|
29
|
+
remove_duplicates: false
|
30
|
+
lowercase: false
|
31
|
+
#Optional cleaning
|
32
|
+
remove_brackets_and_bracketcontent: false #default 'false'
|
33
|
+
remove_timestamps: false #default 'false'
|
34
|
+
timestamp_pattern_example: null #e.g. "#00:00:23-00#"
|
35
|
+
remove_punctuation: false #Careful!: If set to true word_splitter might be removed
|
36
|
+
|
37
|
+
options_embeddings:
|
38
|
+
tokenization_method: "whitespace" #or "model"
|
39
|
+
model_name: "fastText" #e.g. "fastText", "xlm-roberta-base"
|
40
|
+
pytorch_based_model: false
|
41
|
+
method: "model_instance"
|
42
|
+
max_length: null
|
43
|
+
clean_embedding_tokens: true
|
44
|
+
|
45
|
+
semantic-similarity: true
|
46
|
+
distance-from-randomness: false
|
47
|
+
|
48
|
+
options_dis_from_randomness:
|
49
|
+
window_size: 8
|
50
|
+
min_len: null
|
51
|
+
bootstrap: 10000
|
52
|
+
shuffle_mode: 'include0_includeN'
|
53
|
+
parallel_computing: false #not yet set up
|
54
|
+
|
55
|
+
options_semantic-similarity:
|
56
|
+
window_sizes: #'all' or window size as integer
|
57
|
+
- 2
|
58
|
+
- 8
|
59
|
+
#==================================================================
|
60
|
+
|
61
|
+
#Extra configurations;
|
62
|
+
task_name: "fluency"
|
63
|
+
create_aggregation_of_results: true
|
64
|
+
|
65
|
+
pipeline_options:
|
66
|
+
quality_check: false
|
67
|
+
clean_text: true
|
68
|
+
tokenize_text: false
|
69
|
+
normalize_text: false
|
70
|
+
|
71
|
+
general_cleaning_options:
|
72
|
+
strip_whitespace: true
|
73
|
+
merge_multiple_whitespaces: true
|
74
|
+
remove_whitespace_before_punctuation: true
|
75
|
+
merge_newline_characters: true
|
76
|
+
remove_backslashes: true
|
77
|
+
|
78
|
+
has_multiple_sections: false
|
79
|
+
has_section_titles: false
|
80
|
+
section_identification: null
|
81
|
+
number_of_sections: 1
|
82
|
+
number_of_speakers: 1
|
83
|
+
discourse: false
|
84
|
+
|
85
|
+
document_information_output:
|
86
|
+
parameters:
|
87
|
+
- subject_ID
|
88
|
+
- fluency_word_count
|
89
|
+
- fluency_duplicate_count
|
90
|
+
|
91
|
+
#================================================================
|
92
|
+
|
93
|
+
#Detail configurations; Changes optional, mostly used for quality checking / error handling
|
94
|
+
recompute_everything: true
|
95
|
+
number_of_subjects: null
|
96
|
+
|
97
|
+
# Filename components configuration
|
98
|
+
filename_components:
|
99
|
+
subject: true # mandatory
|
100
|
+
session: *session_flag
|
101
|
+
task: true # mandatory
|
102
|
+
task_addition: false
|
103
|
+
corpus: true # typically true for fluency tasks (e.g., "animals", "clothes")
|
104
|
+
metric: true
|
105
|
+
additional_tags: []
|
106
|
+
|
107
|
+
|
108
|
+
|
@@ -0,0 +1,131 @@
|
|
1
|
+
# Master Configuration File
|
2
|
+
# ========================
|
3
|
+
|
4
|
+
# Basic Settings
|
5
|
+
# -------------
|
6
|
+
input_file: "text" # Options: 'text' or 'audio'
|
7
|
+
PATH_TO_PROJECT_FOLDER: "/home/yvespauli/PycharmProjects/FluencyTest"
|
8
|
+
language: "german" # Options: 'german', 'english'
|
9
|
+
recompute_everything: true # If false, reuses previously computed results
|
10
|
+
|
11
|
+
# Task Configuration
|
12
|
+
# -----------------
|
13
|
+
task_name: "fluency" # Options: 'fluency', 'interview'
|
14
|
+
fluency_task: &fluency_flag true # Flag for fluency-specific settings
|
15
|
+
discourse: &discourse_flag false # Flag for discourse-specific settings
|
16
|
+
corpus_names: # List of task corpora
|
17
|
+
- "animals"
|
18
|
+
|
19
|
+
# Session and Subject Settings
|
20
|
+
# --------------------------
|
21
|
+
multiple_sessions: false
|
22
|
+
number_of_subjects: null # If null, auto-detected
|
23
|
+
number_of_speakers: 1
|
24
|
+
subject_speakertag: null # Speaker tag for subject (e.g., "B")
|
25
|
+
|
26
|
+
# Document Structure
|
27
|
+
# ----------------
|
28
|
+
has_multiple_sections: false
|
29
|
+
has_section_titles: false
|
30
|
+
section_identification: null # e.g., "Section:"
|
31
|
+
number_of_sections: 1 # If null, auto-detected
|
32
|
+
|
33
|
+
# Processing Pipeline
|
34
|
+
# -----------------
|
35
|
+
pipeline_options:
|
36
|
+
quality_check: false
|
37
|
+
clean_text: true
|
38
|
+
tokenize_text: false
|
39
|
+
normalize_text: false
|
40
|
+
|
41
|
+
# Metric Extraction
|
42
|
+
# ---------------
|
43
|
+
metric_to_extract: "embeddings" # Options: 'embeddings', 'logits'
|
44
|
+
extract_logits: null
|
45
|
+
extract_embeddings: true
|
46
|
+
|
47
|
+
# Cleaning Options
|
48
|
+
# --------------
|
49
|
+
cleaning_options:
|
50
|
+
general_cleaning: true
|
51
|
+
remove_punctuation: false
|
52
|
+
lowercase: true
|
53
|
+
remove_brackets_and_bracketcontent: false
|
54
|
+
remove_timestamps: false
|
55
|
+
timestamp_pattern_example: null # e.g., "#00:00:23-00#"
|
56
|
+
# Fluency-specific options
|
57
|
+
fluency_task: *fluency_flag
|
58
|
+
word_splitter: ';'
|
59
|
+
remove_hyphens: true
|
60
|
+
remove_duplicates: true
|
61
|
+
|
62
|
+
general_cleaning_options:
|
63
|
+
strip_whitespace: true
|
64
|
+
merge_multiple_whitespaces: true
|
65
|
+
remove_whitespace_before_punctuation: true
|
66
|
+
merge_newline_characters: true
|
67
|
+
remove_backslashes: true
|
68
|
+
|
69
|
+
# Embedding Options
|
70
|
+
# ---------------
|
71
|
+
options_embeddings:
|
72
|
+
tokenization_method: "whitespace" # Options: 'whitespace', 'model'
|
73
|
+
model_name: "fastText" # Options: 'fastText', 'xlm-roberta-base'
|
74
|
+
pytorch_based_model: false
|
75
|
+
method: "model_instance"
|
76
|
+
max_length: 512
|
77
|
+
clean_embedding_tokens: true
|
78
|
+
remove_punctuation: false
|
79
|
+
lowercase: false
|
80
|
+
keep_speakertags: false
|
81
|
+
semantic-similarity: true
|
82
|
+
window_size: null
|
83
|
+
clean_tokens: true
|
84
|
+
divergence_from_optimality: false
|
85
|
+
output_options:
|
86
|
+
exclude_special_tokens: true
|
87
|
+
remove_'_'_character: true
|
88
|
+
remove_speaker_labels: true
|
89
|
+
remove_punctuation_and_symbols: true
|
90
|
+
remove_brackets_and_content: true
|
91
|
+
|
92
|
+
# Logits Options
|
93
|
+
# -------------
|
94
|
+
options_logits:
|
95
|
+
chunk_size: 128
|
96
|
+
overlap_size: 64
|
97
|
+
tokenization_method: "model"
|
98
|
+
model_name: "DiscoResearch/Llama3-German-8B-32k"
|
99
|
+
remove_punctuation: true
|
100
|
+
lowercase: true
|
101
|
+
keep_speakertags: true
|
102
|
+
|
103
|
+
# Analysis Options
|
104
|
+
# --------------
|
105
|
+
options_semantic-similarity:
|
106
|
+
window_sizes: # 'all' or window size as integer
|
107
|
+
- 2
|
108
|
+
- 8
|
109
|
+
|
110
|
+
options_dis_from_randomness:
|
111
|
+
window_size: 8
|
112
|
+
min_len: null
|
113
|
+
bootstrap: 10000
|
114
|
+
shuffle_mode: 'include0_includeN'
|
115
|
+
parallel_computing: false
|
116
|
+
|
117
|
+
# Normalization Options
|
118
|
+
# -------------------
|
119
|
+
normalization_options:
|
120
|
+
method: "lemmatization" # Options: 'lemmatization', 'stemming'
|
121
|
+
|
122
|
+
# Filename Configuration
|
123
|
+
# --------------------
|
124
|
+
filename_components:
|
125
|
+
subject: true # mandatory
|
126
|
+
session: false
|
127
|
+
task: true # mandatory
|
128
|
+
task_addition: false
|
129
|
+
corpus: true
|
130
|
+
metric: true
|
131
|
+
additional_tags: []
|
@@ -0,0 +1,103 @@
|
|
1
|
+
# Configuration file: variable parameters
|
2
|
+
# =======================================
|
3
|
+
input_file: "text" #or 'audio'
|
4
|
+
discourse: &discourse_flag true
|
5
|
+
|
6
|
+
#PATH_TO_PROJECT_FOLDER: "/home/yvespauli/PycharmProjects/KetamineStudy/KetamineStudy_ProjectFolder/" # Set default to home directory, e.g., '/home/usr/...'
|
7
|
+
PATH_TO_PROJECT_FOLDER: "/home/yvespauli/PycharmProjects/Morteza/"
|
8
|
+
language: "german" # Possibly add options for German and English
|
9
|
+
|
10
|
+
task_name: "interview" # Give name of task used for creation of the input file (e.g., ['fluency', 'interview'])
|
11
|
+
corpus_names:
|
12
|
+
- "schizophrenia"
|
13
|
+
|
14
|
+
|
15
|
+
number_of_subjects: null # Specify number of subjects; if 'null', number of subjects is automatically detected
|
16
|
+
multiple_sessions: false # Set to True if multiple sessions per subject
|
17
|
+
|
18
|
+
recompute_everything: true #If set to 'false' pelican-nlp will try to reuse previously computed results stored on your drive
|
19
|
+
|
20
|
+
has_multiple_sections: false #evaluated independently
|
21
|
+
has_section_titles: false
|
22
|
+
section_identification: null #e.g. "Section:", 'null' if file does not have multiple sections, use pattern that is unlikely to appear in rest of transcript
|
23
|
+
number_of_sections: null #if 'null' number of sections automatically detected, however, specifying number recommended if known.
|
24
|
+
|
25
|
+
number_of_speakers: 3
|
26
|
+
subject_speakertag: "B"
|
27
|
+
|
28
|
+
metric_to_extract: "embeddings"
|
29
|
+
extract_logits: false
|
30
|
+
extract_embeddings: true
|
31
|
+
|
32
|
+
pipeline_options:
|
33
|
+
quality_check: false
|
34
|
+
clean_text: true
|
35
|
+
tokenize_text: false
|
36
|
+
normalize_text: false
|
37
|
+
|
38
|
+
tokenization: "wordLevel" # Options: 'characterLevel', 'subWordLevel'
|
39
|
+
|
40
|
+
# Options for extract_logits
|
41
|
+
chunk_size: null
|
42
|
+
overlap_size: null
|
43
|
+
|
44
|
+
# Options for extract_embeddings
|
45
|
+
window_sizes: [2]
|
46
|
+
metric_function: cosine_similarity
|
47
|
+
aggregation_functions: mean_of_means
|
48
|
+
|
49
|
+
|
50
|
+
fluency_task: &fluency_flag false
|
51
|
+
cleaning_options:
|
52
|
+
general_cleaning: true # General cleaning options used for most text preprocessing, default: True.
|
53
|
+
remove_brackets_and_bracketcontent: true
|
54
|
+
remove_timestamps: true
|
55
|
+
timestamp_pattern_example: "#00:00:19-0#"
|
56
|
+
#Options for fluency tasks
|
57
|
+
fluency_task: *fluency_flag
|
58
|
+
word_splitter: null
|
59
|
+
remove_hyphens: null
|
60
|
+
remove_duplicates: null
|
61
|
+
|
62
|
+
general_cleaning_options:
|
63
|
+
strip_whitespace: true
|
64
|
+
merge_multiple_whitespaces: true
|
65
|
+
remove_whitespace_before_punctuation: true
|
66
|
+
merge_newline_characters: true
|
67
|
+
remove_backslashes: true
|
68
|
+
|
69
|
+
tokenization_options_logits:
|
70
|
+
method: "model_instance" # Options: model_instance, regex, nltk, etc.
|
71
|
+
model_name: "DiscoResearch/Llama3-German-8B-32k" # Replace with your model instance name
|
72
|
+
remove_punctuation: true
|
73
|
+
lowercase: true
|
74
|
+
|
75
|
+
options_embeddings:
|
76
|
+
tokenization_method: "model_roberta" #or "whitespace", "model"
|
77
|
+
max_length: 512 #max sequence length
|
78
|
+
model_name: "xlm-roberta-base" #e.g. "fastText", "xlm-roberta-base"
|
79
|
+
pytorch_based_model: true
|
80
|
+
method: "model_instance"
|
81
|
+
remove_punctuation: false
|
82
|
+
lowercase: false
|
83
|
+
keep_speakertags: true
|
84
|
+
clean_embedding_tokens: true
|
85
|
+
output_options:
|
86
|
+
exclude_special_tokens: true
|
87
|
+
remove_'_'_character: true
|
88
|
+
remove_speaker_labels: true
|
89
|
+
remove_punctuation_and_symbols: true
|
90
|
+
remove_brackets_and_content: true
|
91
|
+
|
92
|
+
window_size: null
|
93
|
+
|
94
|
+
semantic-similarity: false
|
95
|
+
distance-from-randomness: false
|
96
|
+
|
97
|
+
normalization_options:
|
98
|
+
method: "lemmatization" #Options: lemmatization or stemming
|
99
|
+
|
100
|
+
create_aggregation_of_results: false
|
101
|
+
output_document_information: false
|
102
|
+
|
103
|
+
|
@@ -0,0 +1,29 @@
|
|
1
|
+
import os
|
2
|
+
|
3
|
+
# Get the directory where the Praat scripts are stored
|
4
|
+
PRAAT_SCRIPTS_DIR = os.path.dirname(os.path.abspath(__file__))
|
5
|
+
|
6
|
+
# Define paths to individual scripts
|
7
|
+
PROSOMAIN_SCRIPT = os.path.join(PRAAT_SCRIPTS_DIR, 'prosomain.praat')
|
8
|
+
PROSOGRAM_SCRIPT = os.path.join(PRAAT_SCRIPTS_DIR, 'prosogram.praat')
|
9
|
+
PROSOPLOT_SCRIPT = os.path.join(PRAAT_SCRIPTS_DIR, 'prosoplot.praat')
|
10
|
+
SEGMENT_SCRIPT = os.path.join(PRAAT_SCRIPTS_DIR, 'segment.praat')
|
11
|
+
STYLIZE_SCRIPT = os.path.join(PRAAT_SCRIPTS_DIR, 'stylize.praat')
|
12
|
+
POLYTONIA_SCRIPT = os.path.join(PRAAT_SCRIPTS_DIR, 'polytonia.praat')
|
13
|
+
UTIL_SCRIPT = os.path.join(PRAAT_SCRIPTS_DIR, 'util.praat')
|
14
|
+
EPS_CONV_SCRIPT = os.path.join(PRAAT_SCRIPTS_DIR, 'eps_conv.praat')
|
15
|
+
SETUP_SCRIPT = os.path.join(PRAAT_SCRIPTS_DIR, 'setup.praat')
|
16
|
+
|
17
|
+
# Export all script paths
|
18
|
+
__all__ = [
|
19
|
+
'PRAAT_SCRIPTS_DIR',
|
20
|
+
'PROSOMAIN_SCRIPT',
|
21
|
+
'PROSOGRAM_SCRIPT',
|
22
|
+
'PROSOPLOT_SCRIPT',
|
23
|
+
'SEGMENT_SCRIPT',
|
24
|
+
'STYLIZE_SCRIPT',
|
25
|
+
'POLYTONIA_SCRIPT',
|
26
|
+
'UTIL_SCRIPT',
|
27
|
+
'EPS_CONV_SCRIPT',
|
28
|
+
'SETUP_SCRIPT'
|
29
|
+
]
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: pelican_nlp
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.2
|
4
4
|
Summary: Preprocessing and Extraction of Linguistic Information for Computational Analysis
|
5
5
|
Author-email: Yves Pauli <yves.pauli@gmail.com>
|
6
6
|
License-Expression: CC-BY-NC-4.0
|
@@ -51,7 +51,7 @@ PELICAN_nlp
|
|
51
51
|
PELICAN_nlp stands for "Preprocessing and Extraction of Linguistic Information for Computational Analysis - Natural Language Processing". This package enables the creation of standardized and reproducible language processing pipelines, extracting linguistic features from various tasks like discourse, fluency, and image descriptions.
|
52
52
|
|
53
53
|
.. image:: https://img.shields.io/pypi/v/package-name.svg
|
54
|
-
:target: https://pypi.org/project/
|
54
|
+
:target: https://pypi.org/project/pelican-nlp/
|
55
55
|
:alt: PyPI version
|
56
56
|
|
57
57
|
.. image:: https://img.shields.io/github/license/username/package-name.svg
|
@@ -59,7 +59,7 @@ PELICAN_nlp stands for "Preprocessing and Extraction of Linguistic Information f
|
|
59
59
|
:alt: License
|
60
60
|
|
61
61
|
.. image:: https://img.shields.io/pypi/pyversions/package-name.svg
|
62
|
-
:target: https://pypi.org/project/
|
62
|
+
:target: https://pypi.org/project/pelican-nlp/
|
63
63
|
:alt: Supported Python Versions
|
64
64
|
|
65
65
|
Installation
|
@@ -75,13 +75,16 @@ For the latest development version:
|
|
75
75
|
|
76
76
|
.. code-block:: bash
|
77
77
|
|
78
|
-
pip install
|
78
|
+
pip install https://github.com/yourusername/yourrepo/releases/tag/v0.1.0-alpha
|
79
79
|
|
80
80
|
Usage
|
81
81
|
=====
|
82
82
|
|
83
83
|
To use the pelican_nlp package:
|
84
84
|
|
85
|
+
Adapt your configuration file to your needs.
|
86
|
+
ALWAYS change the specified project folder location.
|
87
|
+
|
85
88
|
.. code-block:: python
|
86
89
|
|
87
90
|
from pelican_nlp.main import Pelican
|
@@ -94,7 +97,7 @@ For reliable operation, data must be stored in the *Language Processing Data Str
|
|
94
97
|
|
95
98
|
Text and audio files should follow this naming convention:
|
96
99
|
|
97
|
-
|
100
|
+
[subjectID]_[sessionID]_[task]_[task-supplement]_[corpus].[extension]
|
98
101
|
|
99
102
|
- subjectID: ID of subject (e.g., sub-01), mandatory
|
100
103
|
- sessionID: ID of session (e.g., ses-01), if available
|
@@ -104,7 +107,8 @@ subjectID_sessionID_task_task-supplement_corpus.extension
|
|
104
107
|
- extension: file extension (e.g., txt / pdf / docx / rtf), mandatory
|
105
108
|
|
106
109
|
Example filenames:
|
107
|
-
|
110
|
+
|
111
|
+
- sub-01_interview_schizophrenia.rtf
|
108
112
|
- sub-03_ses-02_fluency_semantic_animals.docx
|
109
113
|
|
110
114
|
To optimize performance, close other programs and limit GPU usage during language processing.
|
@@ -121,26 +125,15 @@ Features
|
|
121
125
|
Examples
|
122
126
|
========
|
123
127
|
|
124
|
-
|
125
|
-
|
126
|
-
.. code-block:: python
|
127
|
-
|
128
|
-
from package_name import SomeClass
|
129
|
-
|
130
|
-
configuration_file = "config_fluency.yml"
|
131
|
-
pelican.run(configuration_file)
|
132
|
-
|
133
|
-
*Link to config_fluency.yml*
|
134
|
-
|
135
|
-
Sample folder for data collection of the semantic fluency task:
|
136
|
-
*Link to sample_folder*
|
128
|
+
You can find example setups in the [`examples/`](https://github.com/ypauli/PELICAN-nlp/examples) folder.
|
129
|
+
ALWAYS change the path to the project folder specified in the configuration file to your specific project location.
|
137
130
|
|
138
131
|
Contributing
|
139
132
|
============
|
140
133
|
|
141
|
-
Contributions are welcome! Please check out the `contributing guide <https://github.com/ypauli/PELICAN/blob/main/CONTRIBUTING.md>`_.
|
134
|
+
Contributions are welcome! Please check out the `contributing guide <https://github.com/ypauli/PELICAN-nlp/blob/main/CONTRIBUTING.md>`_.
|
142
135
|
|
143
136
|
License
|
144
137
|
=======
|
145
138
|
|
146
|
-
This project is licensed under Attribution-NonCommercial 4.0 International. See the `LICENSE <https://github.com/ypauli/PELICAN/blob/main/LICENSE>`_ file for details.
|
139
|
+
This project is licensed under Attribution-NonCommercial 4.0 International. See the `LICENSE <https://github.com/ypauli/PELICAN-nlp/blob/main/LICENSE>`_ file for details.
|
@@ -0,0 +1,75 @@
|
|
1
|
+
pelican_nlp/__init__.py,sha256=TD5xjKeXXAH6nUWG-6igbClgovi5r8RIEqI_ix1QeYo,204
|
2
|
+
pelican_nlp/_version.py,sha256=K5SiDdEGYMpdqXThrqwTqECJJBOQNTQDrnpc2K5mzKs,21
|
3
|
+
pelican_nlp/main.py,sha256=xKUqqA3sh9kbk07lKA_poILIU1c8oIeaSsVqPOPY5Tk,7596
|
4
|
+
pelican_nlp/Nils_backup/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
5
|
+
pelican_nlp/Nils_backup/extract_acoustic_features.py,sha256=eSP8lXxbZ15YE1HqxGtma9uWOcSN-fI-ig-NwQ9eOA8,10771
|
6
|
+
pelican_nlp/Nils_backup/speaker_diarization_Nils.py,sha256=3RIhjKihu4Z1rruMt9KESFE2lqesfzIpRr7rLummUEo,10219
|
7
|
+
pelican_nlp/Nils_backup/fluency/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
8
|
+
pelican_nlp/Nils_backup/fluency/aggregate_fluency_results.py,sha256=VVsKR8_Epck-jk-uT6zNV-QO7EiM342MUzzHmVhOSdo,6392
|
9
|
+
pelican_nlp/Nils_backup/fluency/behavioral_data.py,sha256=TJRpBhOh9JMdoL6OcNwhlChe_sNTFQlAhVKl2ml0X0w,1181
|
10
|
+
pelican_nlp/Nils_backup/fluency/check_duplicates.py,sha256=XjfF7NEkilNmPdU0yOVug7xqsc6JbRu-HYO54FZQ8hg,6126
|
11
|
+
pelican_nlp/Nils_backup/fluency/coherence.py,sha256=JGv-3RWwwYboEDZep2mQMuNivZNjV_H5ZrjwY2JHS10,21437
|
12
|
+
pelican_nlp/Nils_backup/fluency/config.py,sha256=Ef9NdLcpCe6XH690plV5FBM_KEjoZR0wy9uYCdAFo78,9233
|
13
|
+
pelican_nlp/Nils_backup/fluency/main.py,sha256=zMDTeNRj971xlMGSb7UOz-l0uvXG3kEeX4U06R_Vbv8,5910
|
14
|
+
pelican_nlp/Nils_backup/fluency/optimality_without_tsa.py,sha256=ZmuQY25n7nVClYjF7j17M4kW0PbXzKCVvTVlDvZ_xa0,15065
|
15
|
+
pelican_nlp/Nils_backup/fluency/plot_fluency.py,sha256=4SrnLhGPG0u-ycW9ryxEX02o3qasQiG_aMxMDpfNbcE,20749
|
16
|
+
pelican_nlp/Nils_backup/fluency/plotting_utils.py,sha256=d0G9qSfBfrfnUCAvM_Su8xOH0lLGwq5KmLBC5sUbx0g,4946
|
17
|
+
pelican_nlp/Nils_backup/fluency/questionnaires_data.py,sha256=xKACAI078si__TiOGahiAvo0nz_UCiJrTV1oEsWPU8A,1175
|
18
|
+
pelican_nlp/Nils_backup/fluency/stats_fluency.py,sha256=HXGMzSGjXJAmw_MnHU5wM16nWSgPkZCWCSYtKaSChPQ,42981
|
19
|
+
pelican_nlp/Nils_backup/fluency/utils.py,sha256=yF7TS_HhUscb6ZgNnk61WJ4qBJLHAJUCt53UWwfIc0U,1340
|
20
|
+
pelican_nlp/Nils_backup/transcription/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
21
|
+
pelican_nlp/Nils_backup/transcription/annotation_tool.py,sha256=O528LXdvs4TkzD201szzHOrTBCZsJa51gr-6iRddGmg,40185
|
22
|
+
pelican_nlp/Nils_backup/transcription/annotation_tool_boundaries.py,sha256=4NnGMWuORKqNp0YFnkD90BuyaPRpo64W2kKCboE9oFE,45384
|
23
|
+
pelican_nlp/Nils_backup/transcription/annotation_tool_sandbox.py,sha256=UHyKmUtVTeBgdBCDVCntqJW9gQN2p2GgKIng6E0LKiw,40405
|
24
|
+
pelican_nlp/Nils_backup/transcription/test.json,sha256=T1PNoYwrqgwDVLtfmj7L5e0Sq02OEbqHPC8RFhICuUU,2
|
25
|
+
pelican_nlp/Nils_backup/transcription/transcribe_audio.py,sha256=uJUXtE6uTXg34FB3f_WQ4WeuikPcPJdlpVrw2Rf0P7M,12600
|
26
|
+
pelican_nlp/Nils_backup/transcription/transcribe_audio_chunked.py,sha256=PZUJ9Cnu96Chhi-MQmaoEd4ximdCAwAdReBzPrHPlZE,27644
|
27
|
+
pelican_nlp/Nils_backup/transcription/transcription.py,sha256=aoIH8vsMh5rAJ1_j44gN6yBxcG-7AoGklVlMPSNnfKU,33031
|
28
|
+
pelican_nlp/Nils_backup/transcription/transcription_gui.py,sha256=HtVEWZyU6_w-viUT4KCf55ZnQY0VxaII5zds1CUqlv8,38482
|
29
|
+
pelican_nlp/Nils_backup/transcription/word_boundaries.py,sha256=n6erYFVgDWLkgMnSNxcTJvJV7Lh557EjWTtEgbwrZVo,6976
|
30
|
+
pelican_nlp/Nils_backup/transcription/output/holmes_control_nova_all_outputs.json,sha256=Eo2pseyXGWSRLs44fDBIAUU7d57gXFXcq4A58iuLoVo,212326
|
31
|
+
pelican_nlp/Silvia_files/Opensmile/opensmile_feature_extraction.py,sha256=sJsthRqJI8bfH38H-OwyQdxXCBIrXEdFm3qSARamYIw,2093
|
32
|
+
pelican_nlp/Silvia_files/prosogram/prosogram.py,sha256=ndjtTSgVzEchPEcRNb9jAHuiKRJYXI2C3Y__Deyc1rU,3324
|
33
|
+
pelican_nlp/configuration_files/config_audio.yml,sha256=aUneGp32RkBQD2xxgYw7J4djWatfWEjTm7Z8UeO49ec,3868
|
34
|
+
pelican_nlp/configuration_files/config_discourse.yml,sha256=WrZk5J2xWMQQPOu25BsqIOM5CrYcAhxCxoMcQVKbDIU,3661
|
35
|
+
pelican_nlp/configuration_files/config_fluency.yml,sha256=nBTGJXnbj8IhWsJGwP4ZutZCeIu2ybHUepG7RAWA1y0,3060
|
36
|
+
pelican_nlp/configuration_files/config_general.yml,sha256=dOBiqOhw0VgV0LZ1boYJhhjCsnTaYBk6qoCTai-fk-o,3474
|
37
|
+
pelican_nlp/configuration_files/config_morteza.yml,sha256=T378fxvBY9hERVGsnXroDFCy8Zh5PIq4dyer2b5AiDY,3376
|
38
|
+
pelican_nlp/core/__init__.py,sha256=whJc5dWsGsKn2IAw-D4BvCvUKW1sVtWYE1WJIuUr5uI,165
|
39
|
+
pelican_nlp/core/audio_document.py,sha256=hhSJNgeqSYa6_uws2ho66agHhAdHuKN3EIEdIsIcXKg,586
|
40
|
+
pelican_nlp/core/corpus.py,sha256=6pDRmeO0XoHylhjLE4Fi5Tc3HCMQJ-Xk0YRzEfz5Z1Y,15168
|
41
|
+
pelican_nlp/core/document.py,sha256=j2HP5FX6cfmXHo7OWVFCX6cMsDyqsOmNlnGNNNfCm2c,8467
|
42
|
+
pelican_nlp/core/subject.py,sha256=-pi3jDzb2zLiG8JNAi9i-9Jd-VtsPxDO4ShQci2QSMg,1059
|
43
|
+
pelican_nlp/extraction/__init__.py,sha256=hfqFiaKpQBS6cwRm9Yd7MpOcV60_xJmwuQ2Kegary5k,84
|
44
|
+
pelican_nlp/extraction/acoustic_feature_extraction.py,sha256=6Csrr6uotarhuAzxYlGFAil9K4PLUqa9vWw607peRoA,2319
|
45
|
+
pelican_nlp/extraction/distance_from_randomness.py,sha256=yikZ3GK2dqpzuNFPVsjuUK0lo6kHOIoIhKPaVrGXRMQ,3365
|
46
|
+
pelican_nlp/extraction/extract_embeddings.py,sha256=e5bcNlskd7f-JkWtfd7YutGV5bqcURKrAkETRyTx93Q,2457
|
47
|
+
pelican_nlp/extraction/extract_logits.py,sha256=Lc7Es86T8mlSvLMhiDHpFdCc0kCZ9fNr3-VFnOyeybs,3869
|
48
|
+
pelican_nlp/extraction/language_model.py,sha256=4tHJZIRCEeHVTwEf2jmOtu-zDGkdXiDjKmlpuxDuLiw,2929
|
49
|
+
pelican_nlp/extraction/semantic_similarity.py,sha256=QhY5CAOAorxEo3UBWPlMegFvbySF0KH6j4j3m2I3_NY,2552
|
50
|
+
pelican_nlp/extraction/test_documents/test_features.csv,sha256=LR_3m4vIm-YWKw5gI5ziswhS-NF9VhKv14c2udLxtJU,488482
|
51
|
+
pelican_nlp/extraction/test_documents/wallace_1.15_3.txt,sha256=ShXxOHUZzGPNUqIcOn6-OYkarzNtTC22V05a_Xpvtlw,3731
|
52
|
+
pelican_nlp/extraction/test_documents/wallace_1.1_3.txt,sha256=gs5REE10myK3Nm9JBOV8hjqKcMRkrl7BasuK7HSBe5M,3695
|
53
|
+
pelican_nlp/extraction/test_documents/wallace_1_4.txt,sha256=95Z7gS92KERCocrbOAFbJntf5QoE-6p0GL67XQEffqI,3963
|
54
|
+
pelican_nlp/metrics_statistics/embeddings_metrics_statistics.py,sha256=svXXyLEA62mLa0KUfSiOSFFMjYk17K7BJbxUoLf0l9w,1468
|
55
|
+
pelican_nlp/praat/__init__.py,sha256=uSEaUZ2nw7lH0twbRJL5BltJTJpopj5XCVhIbeM42bg,1035
|
56
|
+
pelican_nlp/preprocessing/LPDS.py,sha256=4UWkMMSrdU-nWVi8eKiWQSGD7f7lemB42aI0fFn6ZLU,4097
|
57
|
+
pelican_nlp/preprocessing/__init__.py,sha256=ZYgOUlKPXmltYez3urPZmsAWRWSEqZ3_l_gN2aqd15s,293
|
58
|
+
pelican_nlp/preprocessing/pipeline.py,sha256=t2zJAvZRO12MdAKQgm8XZxfZND7_8gFtzHF9Rq2L2aE,1796
|
59
|
+
pelican_nlp/preprocessing/speaker_diarization.py,sha256=N6dZCa2AHHGw__g9e-ZUyZM_In0-nzFOkZ44cBnoKLk,1122
|
60
|
+
pelican_nlp/preprocessing/text_cleaner.py,sha256=QKqxwoRR8dnuBYiY-PXK1kB7744TVUcUMJb7dbKvXGk,7512
|
61
|
+
pelican_nlp/preprocessing/text_importer.py,sha256=FtSyJjFXDxVle7Jpyw6EqCLDbLTCRxqVQi9ymWWtPB4,1356
|
62
|
+
pelican_nlp/preprocessing/text_normalizer.py,sha256=huo5VFqJ0p2jq-ud1047XvMu1qNeaiuG879SF3zkJoM,894
|
63
|
+
pelican_nlp/preprocessing/text_tokenizer.py,sha256=h875bXr0YuMrLh4HtQUvpHmASScddtkQXGaF9mm7uwU,1642
|
64
|
+
pelican_nlp/sample_configuration_files/config_discourse.yml,sha256=xVHIUpSORV6iR0nEvuess6rfiAvuGEkqmaMWD_6kyFE,3618
|
65
|
+
pelican_nlp/sample_configuration_files/config_fluency.yml,sha256=oQ6Y2BhRLExEMpS3VRH2pFrGHi788L66aSYUm05nV_A,3038
|
66
|
+
pelican_nlp/sample_configuration_files/config_general.yml,sha256=UuGnZUa-SVmioE9NmXWOMKuv3uG5mNjIuXgA6-Y0JS0,3440
|
67
|
+
pelican_nlp/utils/__init__.py,sha256=q1tGdOOj5UPRC2mGhoMUh8p4cbFCkkbD21bQaOVvFao,189
|
68
|
+
pelican_nlp/utils/csv_functions.py,sha256=hsG73gm3Up9sAerp6gIxuNHaeP1vJj6HSh7ggVm1SSo,7272
|
69
|
+
pelican_nlp/utils/sample_usage.py,sha256=W__OVMjWND-ZtxxRhfGJDHwbVpGlB-anXDxyA5P4cME,353
|
70
|
+
pelican_nlp/utils/setup_functions.py,sha256=s0QcarswU8qeFBcEQNIYC1ooaD-xwRiTJn--yPEId8E,3612
|
71
|
+
pelican_nlp-0.1.2.dist-info/licenses/LICENSE,sha256=m3jshBZIXKiBX6qhmhtJcLTVJ1N6BEkQGIflneXvpYg,19336
|
72
|
+
pelican_nlp-0.1.2.dist-info/METADATA,sha256=Iuz8Y4HbCCFdcCYvXlGjANQBGd9Zf1Ez6tjD9nnVLuw,5001
|
73
|
+
pelican_nlp-0.1.2.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
|
74
|
+
pelican_nlp-0.1.2.dist-info/top_level.txt,sha256=F0qlyqy5FCd3sTS_npUYPeLKN9_BZq6wD4qo9pI0xbg,12
|
75
|
+
pelican_nlp-0.1.2.dist-info/RECORD,,
|
@@ -1,39 +0,0 @@
|
|
1
|
-
pelican_nlp/__init__.py,sha256=yLyG5Amt7nuHQMHz0tuGUVsGBtgVmXT0LMeRo3an-OU,192
|
2
|
-
pelican_nlp/main.py,sha256=xKUqqA3sh9kbk07lKA_poILIU1c8oIeaSsVqPOPY5Tk,7596
|
3
|
-
pelican_nlp/core/__init__.py,sha256=whJc5dWsGsKn2IAw-D4BvCvUKW1sVtWYE1WJIuUr5uI,165
|
4
|
-
pelican_nlp/core/audio_document.py,sha256=hhSJNgeqSYa6_uws2ho66agHhAdHuKN3EIEdIsIcXKg,586
|
5
|
-
pelican_nlp/core/corpus.py,sha256=6pDRmeO0XoHylhjLE4Fi5Tc3HCMQJ-Xk0YRzEfz5Z1Y,15168
|
6
|
-
pelican_nlp/core/document.py,sha256=j2HP5FX6cfmXHo7OWVFCX6cMsDyqsOmNlnGNNNfCm2c,8467
|
7
|
-
pelican_nlp/core/subject.py,sha256=-pi3jDzb2zLiG8JNAi9i-9Jd-VtsPxDO4ShQci2QSMg,1059
|
8
|
-
pelican_nlp/extraction/__init__.py,sha256=hfqFiaKpQBS6cwRm9Yd7MpOcV60_xJmwuQ2Kegary5k,84
|
9
|
-
pelican_nlp/extraction/acoustic_feature_extraction.py,sha256=6Csrr6uotarhuAzxYlGFAil9K4PLUqa9vWw607peRoA,2319
|
10
|
-
pelican_nlp/extraction/distance_from_randomness.py,sha256=yikZ3GK2dqpzuNFPVsjuUK0lo6kHOIoIhKPaVrGXRMQ,3365
|
11
|
-
pelican_nlp/extraction/extract_embeddings.py,sha256=e5bcNlskd7f-JkWtfd7YutGV5bqcURKrAkETRyTx93Q,2457
|
12
|
-
pelican_nlp/extraction/extract_logits.py,sha256=Lc7Es86T8mlSvLMhiDHpFdCc0kCZ9fNr3-VFnOyeybs,3869
|
13
|
-
pelican_nlp/extraction/language_model.py,sha256=4tHJZIRCEeHVTwEf2jmOtu-zDGkdXiDjKmlpuxDuLiw,2929
|
14
|
-
pelican_nlp/extraction/semantic_similarity.py,sha256=QhY5CAOAorxEo3UBWPlMegFvbySF0KH6j4j3m2I3_NY,2552
|
15
|
-
pelican_nlp/extraction/test_documents/test_features.csv,sha256=LR_3m4vIm-YWKw5gI5ziswhS-NF9VhKv14c2udLxtJU,488482
|
16
|
-
pelican_nlp/extraction/test_documents/wallace_1.15_3.txt,sha256=ShXxOHUZzGPNUqIcOn6-OYkarzNtTC22V05a_Xpvtlw,3731
|
17
|
-
pelican_nlp/extraction/test_documents/wallace_1.1_3.txt,sha256=gs5REE10myK3Nm9JBOV8hjqKcMRkrl7BasuK7HSBe5M,3695
|
18
|
-
pelican_nlp/extraction/test_documents/wallace_1_4.txt,sha256=95Z7gS92KERCocrbOAFbJntf5QoE-6p0GL67XQEffqI,3963
|
19
|
-
pelican_nlp/metrics_statistics/embeddings_metrics_statistics.py,sha256=svXXyLEA62mLa0KUfSiOSFFMjYk17K7BJbxUoLf0l9w,1468
|
20
|
-
pelican_nlp/preprocessing/LPDS.py,sha256=4UWkMMSrdU-nWVi8eKiWQSGD7f7lemB42aI0fFn6ZLU,4097
|
21
|
-
pelican_nlp/preprocessing/__init__.py,sha256=ZYgOUlKPXmltYez3urPZmsAWRWSEqZ3_l_gN2aqd15s,293
|
22
|
-
pelican_nlp/preprocessing/pipeline.py,sha256=t2zJAvZRO12MdAKQgm8XZxfZND7_8gFtzHF9Rq2L2aE,1796
|
23
|
-
pelican_nlp/preprocessing/speaker_diarization.py,sha256=N6dZCa2AHHGw__g9e-ZUyZM_In0-nzFOkZ44cBnoKLk,1122
|
24
|
-
pelican_nlp/preprocessing/text_cleaner.py,sha256=QKqxwoRR8dnuBYiY-PXK1kB7744TVUcUMJb7dbKvXGk,7512
|
25
|
-
pelican_nlp/preprocessing/text_importer.py,sha256=FtSyJjFXDxVle7Jpyw6EqCLDbLTCRxqVQi9ymWWtPB4,1356
|
26
|
-
pelican_nlp/preprocessing/text_normalizer.py,sha256=huo5VFqJ0p2jq-ud1047XvMu1qNeaiuG879SF3zkJoM,894
|
27
|
-
pelican_nlp/preprocessing/text_tokenizer.py,sha256=h875bXr0YuMrLh4HtQUvpHmASScddtkQXGaF9mm7uwU,1642
|
28
|
-
pelican_nlp/sample_configuration_files/config_discourse.yml,sha256=xVHIUpSORV6iR0nEvuess6rfiAvuGEkqmaMWD_6kyFE,3618
|
29
|
-
pelican_nlp/sample_configuration_files/config_fluency.yml,sha256=oQ6Y2BhRLExEMpS3VRH2pFrGHi788L66aSYUm05nV_A,3038
|
30
|
-
pelican_nlp/sample_configuration_files/config_general.yml,sha256=UuGnZUa-SVmioE9NmXWOMKuv3uG5mNjIuXgA6-Y0JS0,3440
|
31
|
-
pelican_nlp/utils/__init__.py,sha256=q1tGdOOj5UPRC2mGhoMUh8p4cbFCkkbD21bQaOVvFao,189
|
32
|
-
pelican_nlp/utils/csv_functions.py,sha256=hsG73gm3Up9sAerp6gIxuNHaeP1vJj6HSh7ggVm1SSo,7272
|
33
|
-
pelican_nlp/utils/sample_usage.py,sha256=W__OVMjWND-ZtxxRhfGJDHwbVpGlB-anXDxyA5P4cME,353
|
34
|
-
pelican_nlp/utils/setup_functions.py,sha256=s0QcarswU8qeFBcEQNIYC1ooaD-xwRiTJn--yPEId8E,3612
|
35
|
-
pelican_nlp-0.1.0.dist-info/licenses/LICENSE,sha256=m3jshBZIXKiBX6qhmhtJcLTVJ1N6BEkQGIflneXvpYg,19336
|
36
|
-
pelican_nlp-0.1.0.dist-info/METADATA,sha256=kIWgpFUOeQC1c-DYvSPoN82OXBgV7TJtPLUGLNC5KDs,4947
|
37
|
-
pelican_nlp-0.1.0.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
|
38
|
-
pelican_nlp-0.1.0.dist-info/top_level.txt,sha256=F0qlyqy5FCd3sTS_npUYPeLKN9_BZq6wD4qo9pI0xbg,12
|
39
|
-
pelican_nlp-0.1.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|