pelican-nlp 0.1.1__py3-none-any.whl → 0.1.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pelican_nlp/Nils_backup/__init__.py +0 -0
- pelican_nlp/Nils_backup/extract_acoustic_features.py +274 -0
- pelican_nlp/Nils_backup/fluency/__init__.py +0 -0
- pelican_nlp/Nils_backup/fluency/aggregate_fluency_results.py +186 -0
- pelican_nlp/Nils_backup/fluency/behavioral_data.py +42 -0
- pelican_nlp/Nils_backup/fluency/check_duplicates.py +169 -0
- pelican_nlp/Nils_backup/fluency/coherence.py +653 -0
- pelican_nlp/Nils_backup/fluency/config.py +231 -0
- pelican_nlp/Nils_backup/fluency/main.py +182 -0
- pelican_nlp/Nils_backup/fluency/optimality_without_tsa.py +466 -0
- pelican_nlp/Nils_backup/fluency/plot_fluency.py +573 -0
- pelican_nlp/Nils_backup/fluency/plotting_utils.py +170 -0
- pelican_nlp/Nils_backup/fluency/questionnaires_data.py +43 -0
- pelican_nlp/Nils_backup/fluency/stats_fluency.py +930 -0
- pelican_nlp/Nils_backup/fluency/utils.py +41 -0
- pelican_nlp/Nils_backup/speaker_diarization_Nils.py +328 -0
- pelican_nlp/Nils_backup/transcription/__init__.py +0 -0
- pelican_nlp/Nils_backup/transcription/annotation_tool.py +1001 -0
- pelican_nlp/Nils_backup/transcription/annotation_tool_boundaries.py +1122 -0
- pelican_nlp/Nils_backup/transcription/annotation_tool_sandbox.py +985 -0
- pelican_nlp/Nils_backup/transcription/output/holmes_control_nova_all_outputs.json +7948 -0
- pelican_nlp/Nils_backup/transcription/test.json +1 -0
- pelican_nlp/Nils_backup/transcription/transcribe_audio.py +314 -0
- pelican_nlp/Nils_backup/transcription/transcribe_audio_chunked.py +695 -0
- pelican_nlp/Nils_backup/transcription/transcription.py +801 -0
- pelican_nlp/Nils_backup/transcription/transcription_gui.py +955 -0
- pelican_nlp/Nils_backup/transcription/word_boundaries.py +190 -0
- pelican_nlp/Silvia_files/Opensmile/opensmile_feature_extraction.py +66 -0
- pelican_nlp/Silvia_files/prosogram/prosogram.py +104 -0
- pelican_nlp/__init__.py +1 -1
- pelican_nlp/_version.py +1 -0
- pelican_nlp/configuration_files/config_audio.yml +150 -0
- pelican_nlp/configuration_files/config_discourse.yml +104 -0
- pelican_nlp/configuration_files/config_fluency.yml +108 -0
- pelican_nlp/configuration_files/config_general.yml +131 -0
- pelican_nlp/configuration_files/config_morteza.yml +103 -0
- pelican_nlp/praat/__init__.py +29 -0
- {pelican_nlp-0.1.1.dist-info → pelican_nlp-0.1.3.dist-info}/METADATA +15 -14
- pelican_nlp-0.1.3.dist-info/RECORD +75 -0
- pelican_nlp-0.1.1.dist-info/RECORD +0 -39
- {pelican_nlp-0.1.1.dist-info → pelican_nlp-0.1.3.dist-info}/WHEEL +0 -0
- {pelican_nlp-0.1.1.dist-info → pelican_nlp-0.1.3.dist-info}/licenses/LICENSE +0 -0
- {pelican_nlp-0.1.1.dist-info → pelican_nlp-0.1.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,108 @@
|
|
1
|
+
# Configuration file for fluency task
|
2
|
+
# =======================================
|
3
|
+
input_file: "text" #or 'audio'
|
4
|
+
fluency_task: &fluency_flag true
|
5
|
+
#========================================
|
6
|
+
|
7
|
+
#general configurations; always adapt
|
8
|
+
PATH_TO_PROJECT_FOLDER: "/home/yvespauli/PycharmProjects/FluencyTest"
|
9
|
+
language: "german"
|
10
|
+
multiple_sessions: &session_flag false
|
11
|
+
|
12
|
+
corpus_names: #names of fluency tasks (e.g. "animals", "clothes")
|
13
|
+
- "animals"
|
14
|
+
- "clothes"
|
15
|
+
- "food"
|
16
|
+
|
17
|
+
#Specify linguistic metrics to extract
|
18
|
+
metric_to_extract: 'embeddings' #Possible options: 'embeddings', 'logits'
|
19
|
+
output_document_information: true
|
20
|
+
#====================================================================
|
21
|
+
|
22
|
+
#Optional configurations; Change with preference. However, default settings recommended
|
23
|
+
cleaning_options:
|
24
|
+
general_cleaning: true
|
25
|
+
#Options for fluency tasks
|
26
|
+
fluency_task: *fluency_flag
|
27
|
+
word_splitter: ';' #default split with ',' add different word_splitter if necessary
|
28
|
+
remove_hyphens: true
|
29
|
+
remove_duplicates: false
|
30
|
+
lowercase: false
|
31
|
+
#Optional cleaning
|
32
|
+
remove_brackets_and_bracketcontent: false #default 'false'
|
33
|
+
remove_timestamps: false #default 'false'
|
34
|
+
timestamp_pattern_example: null #e.g. "#00:00:23-00#"
|
35
|
+
remove_punctuation: false #Careful!: If set to true word_splitter might be removed
|
36
|
+
|
37
|
+
options_embeddings:
|
38
|
+
tokenization_method: "whitespace" #or "model"
|
39
|
+
model_name: "fastText" #e.g. "fastText", "xlm-roberta-base"
|
40
|
+
pytorch_based_model: false
|
41
|
+
method: "model_instance"
|
42
|
+
max_length: null
|
43
|
+
clean_embedding_tokens: true
|
44
|
+
|
45
|
+
semantic-similarity: true
|
46
|
+
distance-from-randomness: false
|
47
|
+
|
48
|
+
options_dis_from_randomness:
|
49
|
+
window_size: 8
|
50
|
+
min_len: null
|
51
|
+
bootstrap: 10000
|
52
|
+
shuffle_mode: 'include0_includeN'
|
53
|
+
parallel_computing: false #not yet set up
|
54
|
+
|
55
|
+
options_semantic-similarity:
|
56
|
+
window_sizes: #'all' or window size as integer
|
57
|
+
- 2
|
58
|
+
- 8
|
59
|
+
#==================================================================
|
60
|
+
|
61
|
+
#Extra configurations;
|
62
|
+
task_name: "fluency"
|
63
|
+
create_aggregation_of_results: true
|
64
|
+
|
65
|
+
pipeline_options:
|
66
|
+
quality_check: false
|
67
|
+
clean_text: true
|
68
|
+
tokenize_text: false
|
69
|
+
normalize_text: false
|
70
|
+
|
71
|
+
general_cleaning_options:
|
72
|
+
strip_whitespace: true
|
73
|
+
merge_multiple_whitespaces: true
|
74
|
+
remove_whitespace_before_punctuation: true
|
75
|
+
merge_newline_characters: true
|
76
|
+
remove_backslashes: true
|
77
|
+
|
78
|
+
has_multiple_sections: false
|
79
|
+
has_section_titles: false
|
80
|
+
section_identification: null
|
81
|
+
number_of_sections: 1
|
82
|
+
number_of_speakers: 1
|
83
|
+
discourse: false
|
84
|
+
|
85
|
+
document_information_output:
|
86
|
+
parameters:
|
87
|
+
- subject_ID
|
88
|
+
- fluency_word_count
|
89
|
+
- fluency_duplicate_count
|
90
|
+
|
91
|
+
#================================================================
|
92
|
+
|
93
|
+
#Detail configurations; Changes optional, mostly used for quality checking / error handling
|
94
|
+
recompute_everything: true
|
95
|
+
number_of_subjects: null
|
96
|
+
|
97
|
+
# Filename components configuration
|
98
|
+
filename_components:
|
99
|
+
subject: true # mandatory
|
100
|
+
session: *session_flag
|
101
|
+
task: true # mandatory
|
102
|
+
task_addition: false
|
103
|
+
corpus: true # typically true for fluency tasks (e.g., "animals", "clothes")
|
104
|
+
metric: true
|
105
|
+
additional_tags: []
|
106
|
+
|
107
|
+
|
108
|
+
|
@@ -0,0 +1,131 @@
|
|
1
|
+
# Master Configuration File
|
2
|
+
# ========================
|
3
|
+
|
4
|
+
# Basic Settings
|
5
|
+
# -------------
|
6
|
+
input_file: "text" # Options: 'text' or 'audio'
|
7
|
+
PATH_TO_PROJECT_FOLDER: "/home/yvespauli/PycharmProjects/FluencyTest"
|
8
|
+
language: "german" # Options: 'german', 'english'
|
9
|
+
recompute_everything: true # If false, reuses previously computed results
|
10
|
+
|
11
|
+
# Task Configuration
|
12
|
+
# -----------------
|
13
|
+
task_name: "fluency" # Options: 'fluency', 'interview'
|
14
|
+
fluency_task: &fluency_flag true # Flag for fluency-specific settings
|
15
|
+
discourse: &discourse_flag false # Flag for discourse-specific settings
|
16
|
+
corpus_names: # List of task corpora
|
17
|
+
- "animals"
|
18
|
+
|
19
|
+
# Session and Subject Settings
|
20
|
+
# --------------------------
|
21
|
+
multiple_sessions: false
|
22
|
+
number_of_subjects: null # If null, auto-detected
|
23
|
+
number_of_speakers: 1
|
24
|
+
subject_speakertag: null # Speaker tag for subject (e.g., "B")
|
25
|
+
|
26
|
+
# Document Structure
|
27
|
+
# ----------------
|
28
|
+
has_multiple_sections: false
|
29
|
+
has_section_titles: false
|
30
|
+
section_identification: null # e.g., "Section:"
|
31
|
+
number_of_sections: 1 # If null, auto-detected
|
32
|
+
|
33
|
+
# Processing Pipeline
|
34
|
+
# -----------------
|
35
|
+
pipeline_options:
|
36
|
+
quality_check: false
|
37
|
+
clean_text: true
|
38
|
+
tokenize_text: false
|
39
|
+
normalize_text: false
|
40
|
+
|
41
|
+
# Metric Extraction
|
42
|
+
# ---------------
|
43
|
+
metric_to_extract: "embeddings" # Options: 'embeddings', 'logits'
|
44
|
+
extract_logits: null
|
45
|
+
extract_embeddings: true
|
46
|
+
|
47
|
+
# Cleaning Options
|
48
|
+
# --------------
|
49
|
+
cleaning_options:
|
50
|
+
general_cleaning: true
|
51
|
+
remove_punctuation: false
|
52
|
+
lowercase: true
|
53
|
+
remove_brackets_and_bracketcontent: false
|
54
|
+
remove_timestamps: false
|
55
|
+
timestamp_pattern_example: null # e.g., "#00:00:23-00#"
|
56
|
+
# Fluency-specific options
|
57
|
+
fluency_task: *fluency_flag
|
58
|
+
word_splitter: ';'
|
59
|
+
remove_hyphens: true
|
60
|
+
remove_duplicates: true
|
61
|
+
|
62
|
+
general_cleaning_options:
|
63
|
+
strip_whitespace: true
|
64
|
+
merge_multiple_whitespaces: true
|
65
|
+
remove_whitespace_before_punctuation: true
|
66
|
+
merge_newline_characters: true
|
67
|
+
remove_backslashes: true
|
68
|
+
|
69
|
+
# Embedding Options
|
70
|
+
# ---------------
|
71
|
+
options_embeddings:
|
72
|
+
tokenization_method: "whitespace" # Options: 'whitespace', 'model'
|
73
|
+
model_name: "fastText" # Options: 'fastText', 'xlm-roberta-base'
|
74
|
+
pytorch_based_model: false
|
75
|
+
method: "model_instance"
|
76
|
+
max_length: 512
|
77
|
+
clean_embedding_tokens: true
|
78
|
+
remove_punctuation: false
|
79
|
+
lowercase: false
|
80
|
+
keep_speakertags: false
|
81
|
+
semantic-similarity: true
|
82
|
+
window_size: null
|
83
|
+
clean_tokens: true
|
84
|
+
divergence_from_optimality: false
|
85
|
+
output_options:
|
86
|
+
exclude_special_tokens: true
|
87
|
+
remove_'_'_character: true
|
88
|
+
remove_speaker_labels: true
|
89
|
+
remove_punctuation_and_symbols: true
|
90
|
+
remove_brackets_and_content: true
|
91
|
+
|
92
|
+
# Logits Options
|
93
|
+
# -------------
|
94
|
+
options_logits:
|
95
|
+
chunk_size: 128
|
96
|
+
overlap_size: 64
|
97
|
+
tokenization_method: "model"
|
98
|
+
model_name: "DiscoResearch/Llama3-German-8B-32k"
|
99
|
+
remove_punctuation: true
|
100
|
+
lowercase: true
|
101
|
+
keep_speakertags: true
|
102
|
+
|
103
|
+
# Analysis Options
|
104
|
+
# --------------
|
105
|
+
options_semantic-similarity:
|
106
|
+
window_sizes: # 'all' or window size as integer
|
107
|
+
- 2
|
108
|
+
- 8
|
109
|
+
|
110
|
+
options_dis_from_randomness:
|
111
|
+
window_size: 8
|
112
|
+
min_len: null
|
113
|
+
bootstrap: 10000
|
114
|
+
shuffle_mode: 'include0_includeN'
|
115
|
+
parallel_computing: false
|
116
|
+
|
117
|
+
# Normalization Options
|
118
|
+
# -------------------
|
119
|
+
normalization_options:
|
120
|
+
method: "lemmatization" # Options: 'lemmatization', 'stemming'
|
121
|
+
|
122
|
+
# Filename Configuration
|
123
|
+
# --------------------
|
124
|
+
filename_components:
|
125
|
+
subject: true # mandatory
|
126
|
+
session: false
|
127
|
+
task: true # mandatory
|
128
|
+
task_addition: false
|
129
|
+
corpus: true
|
130
|
+
metric: true
|
131
|
+
additional_tags: []
|
@@ -0,0 +1,103 @@
|
|
1
|
+
# Configuration file: variable parameters
|
2
|
+
# =======================================
|
3
|
+
input_file: "text" #or 'audio'
|
4
|
+
discourse: &discourse_flag true
|
5
|
+
|
6
|
+
#PATH_TO_PROJECT_FOLDER: "/home/yvespauli/PycharmProjects/KetamineStudy/KetamineStudy_ProjectFolder/" # Set default to home directory, e.g., '/home/usr/...'
|
7
|
+
PATH_TO_PROJECT_FOLDER: "/home/yvespauli/PycharmProjects/Morteza/"
|
8
|
+
language: "german" # Possibly add options for German and English
|
9
|
+
|
10
|
+
task_name: "interview" # Give name of task used for creation of the input file (e.g., ['fluency', 'interview'])
|
11
|
+
corpus_names:
|
12
|
+
- "schizophrenia"
|
13
|
+
|
14
|
+
|
15
|
+
number_of_subjects: null # Specify number of subjects; if 'null', number of subjects is automatically detected
|
16
|
+
multiple_sessions: false # Set to True if multiple sessions per subject
|
17
|
+
|
18
|
+
recompute_everything: true #If set to 'false' pelican-nlp will try to reuse previously computed results stored on your drive
|
19
|
+
|
20
|
+
has_multiple_sections: false #evaluated independently
|
21
|
+
has_section_titles: false
|
22
|
+
section_identification: null #e.g. "Section:", 'null' if file does not have multiple sections, use pattern that is unlikely to appear in rest of transcript
|
23
|
+
number_of_sections: null #if 'null' number of sections automatically detected, however, specifying number recommended if known.
|
24
|
+
|
25
|
+
number_of_speakers: 3
|
26
|
+
subject_speakertag: "B"
|
27
|
+
|
28
|
+
metric_to_extract: "embeddings"
|
29
|
+
extract_logits: false
|
30
|
+
extract_embeddings: true
|
31
|
+
|
32
|
+
pipeline_options:
|
33
|
+
quality_check: false
|
34
|
+
clean_text: true
|
35
|
+
tokenize_text: false
|
36
|
+
normalize_text: false
|
37
|
+
|
38
|
+
tokenization: "wordLevel" # Options: 'characterLevel', 'subWordLevel'
|
39
|
+
|
40
|
+
# Options for extract_logits
|
41
|
+
chunk_size: null
|
42
|
+
overlap_size: null
|
43
|
+
|
44
|
+
# Options for extract_embeddings
|
45
|
+
window_sizes: [2]
|
46
|
+
metric_function: cosine_similarity
|
47
|
+
aggregation_functions: mean_of_means
|
48
|
+
|
49
|
+
|
50
|
+
fluency_task: &fluency_flag false
|
51
|
+
cleaning_options:
|
52
|
+
general_cleaning: true # General cleaning options used for most text preprocessing, default: True.
|
53
|
+
remove_brackets_and_bracketcontent: true
|
54
|
+
remove_timestamps: true
|
55
|
+
timestamp_pattern_example: "#00:00:19-0#"
|
56
|
+
#Options for fluency tasks
|
57
|
+
fluency_task: *fluency_flag
|
58
|
+
word_splitter: null
|
59
|
+
remove_hyphens: null
|
60
|
+
remove_duplicates: null
|
61
|
+
|
62
|
+
general_cleaning_options:
|
63
|
+
strip_whitespace: true
|
64
|
+
merge_multiple_whitespaces: true
|
65
|
+
remove_whitespace_before_punctuation: true
|
66
|
+
merge_newline_characters: true
|
67
|
+
remove_backslashes: true
|
68
|
+
|
69
|
+
tokenization_options_logits:
|
70
|
+
method: "model_instance" # Options: model_instance, regex, nltk, etc.
|
71
|
+
model_name: "DiscoResearch/Llama3-German-8B-32k" # Replace with your model instance name
|
72
|
+
remove_punctuation: true
|
73
|
+
lowercase: true
|
74
|
+
|
75
|
+
options_embeddings:
|
76
|
+
tokenization_method: "model_roberta" #or "whitespace", "model"
|
77
|
+
max_length: 512 #max sequence length
|
78
|
+
model_name: "xlm-roberta-base" #e.g. "fastText", "xlm-roberta-base"
|
79
|
+
pytorch_based_model: true
|
80
|
+
method: "model_instance"
|
81
|
+
remove_punctuation: false
|
82
|
+
lowercase: false
|
83
|
+
keep_speakertags: true
|
84
|
+
clean_embedding_tokens: true
|
85
|
+
output_options:
|
86
|
+
exclude_special_tokens: true
|
87
|
+
remove_'_'_character: true
|
88
|
+
remove_speaker_labels: true
|
89
|
+
remove_punctuation_and_symbols: true
|
90
|
+
remove_brackets_and_content: true
|
91
|
+
|
92
|
+
window_size: null
|
93
|
+
|
94
|
+
semantic-similarity: false
|
95
|
+
distance-from-randomness: false
|
96
|
+
|
97
|
+
normalization_options:
|
98
|
+
method: "lemmatization" #Options: lemmatization or stemming
|
99
|
+
|
100
|
+
create_aggregation_of_results: false
|
101
|
+
output_document_information: false
|
102
|
+
|
103
|
+
|
@@ -0,0 +1,29 @@
|
|
1
|
+
import os
|
2
|
+
|
3
|
+
# Get the directory where the Praat scripts are stored
|
4
|
+
PRAAT_SCRIPTS_DIR = os.path.dirname(os.path.abspath(__file__))
|
5
|
+
|
6
|
+
# Define paths to individual scripts
|
7
|
+
PROSOMAIN_SCRIPT = os.path.join(PRAAT_SCRIPTS_DIR, 'prosomain.praat')
|
8
|
+
PROSOGRAM_SCRIPT = os.path.join(PRAAT_SCRIPTS_DIR, 'prosogram.praat')
|
9
|
+
PROSOPLOT_SCRIPT = os.path.join(PRAAT_SCRIPTS_DIR, 'prosoplot.praat')
|
10
|
+
SEGMENT_SCRIPT = os.path.join(PRAAT_SCRIPTS_DIR, 'segment.praat')
|
11
|
+
STYLIZE_SCRIPT = os.path.join(PRAAT_SCRIPTS_DIR, 'stylize.praat')
|
12
|
+
POLYTONIA_SCRIPT = os.path.join(PRAAT_SCRIPTS_DIR, 'polytonia.praat')
|
13
|
+
UTIL_SCRIPT = os.path.join(PRAAT_SCRIPTS_DIR, 'util.praat')
|
14
|
+
EPS_CONV_SCRIPT = os.path.join(PRAAT_SCRIPTS_DIR, 'eps_conv.praat')
|
15
|
+
SETUP_SCRIPT = os.path.join(PRAAT_SCRIPTS_DIR, 'setup.praat')
|
16
|
+
|
17
|
+
# Export all script paths
|
18
|
+
__all__ = [
|
19
|
+
'PRAAT_SCRIPTS_DIR',
|
20
|
+
'PROSOMAIN_SCRIPT',
|
21
|
+
'PROSOGRAM_SCRIPT',
|
22
|
+
'PROSOPLOT_SCRIPT',
|
23
|
+
'SEGMENT_SCRIPT',
|
24
|
+
'STYLIZE_SCRIPT',
|
25
|
+
'POLYTONIA_SCRIPT',
|
26
|
+
'UTIL_SCRIPT',
|
27
|
+
'EPS_CONV_SCRIPT',
|
28
|
+
'SETUP_SCRIPT'
|
29
|
+
]
|
@@ -1,13 +1,13 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: pelican_nlp
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.3
|
4
4
|
Summary: Preprocessing and Extraction of Linguistic Information for Computational Analysis
|
5
5
|
Author-email: Yves Pauli <yves.pauli@gmail.com>
|
6
6
|
License-Expression: CC-BY-NC-4.0
|
7
|
-
Project-URL: Homepage, https://github.com/ypauli/
|
8
|
-
Project-URL: Repository, https://github.com/ypauli/
|
9
|
-
Project-URL: Documentation, https://github.com/ypauli/
|
10
|
-
Project-URL: Bug Tracker, https://github.com/ypauli/
|
7
|
+
Project-URL: Homepage, https://github.com/ypauli/pelican_nlp
|
8
|
+
Project-URL: Repository, https://github.com/ypauli/pelican_nlp
|
9
|
+
Project-URL: Documentation, https://github.com/ypauli/pelican_nlp#readme
|
10
|
+
Project-URL: Bug Tracker, https://github.com/ypauli/pelican_nlp/issues
|
11
11
|
Keywords: nlp,linguistics,preprocessing,language-processing,text-analysis
|
12
12
|
Classifier: Development Status :: 1 - Planning
|
13
13
|
Classifier: Intended Audience :: Science/Research
|
@@ -48,18 +48,18 @@ Dynamic: license-file
|
|
48
48
|
PELICAN_nlp
|
49
49
|
====================================
|
50
50
|
|
51
|
-
|
51
|
+
pelican_nlp stands for "Preprocessing and Extraction of Linguistic Information for Computational Analysis - Natural Language Processing". This package enables the creation of standardized and reproducible language processing pipelines, extracting linguistic features from various tasks like discourse, fluency, and image descriptions.
|
52
52
|
|
53
53
|
.. image:: https://img.shields.io/pypi/v/package-name.svg
|
54
|
-
:target: https://pypi.org/project/
|
54
|
+
:target: https://pypi.org/project/pelican_nlp/
|
55
55
|
:alt: PyPI version
|
56
56
|
|
57
57
|
.. image:: https://img.shields.io/github/license/username/package-name.svg
|
58
|
-
:target: https://github.com/ypauli/
|
58
|
+
:target: https://github.com/ypauli/pelican_nlp/blob/main/LICENSE
|
59
59
|
:alt: License
|
60
60
|
|
61
61
|
.. image:: https://img.shields.io/pypi/pyversions/package-name.svg
|
62
|
-
:target: https://pypi.org/project/
|
62
|
+
:target: https://pypi.org/project/pelican_nlp/
|
63
63
|
:alt: Supported Python Versions
|
64
64
|
|
65
65
|
Installation
|
@@ -75,7 +75,7 @@ For the latest development version:
|
|
75
75
|
|
76
76
|
.. code-block:: bash
|
77
77
|
|
78
|
-
pip install
|
78
|
+
pip install https://github.com/ypauli/pelican_nlp/releases/tag/v0.1.2-alpha
|
79
79
|
|
80
80
|
Usage
|
81
81
|
=====
|
@@ -107,7 +107,8 @@ Text and audio files should follow this naming convention:
|
|
107
107
|
- extension: file extension (e.g., txt / pdf / docx / rtf), mandatory
|
108
108
|
|
109
109
|
Example filenames:
|
110
|
-
|
110
|
+
|
111
|
+
- sub-01_interview_schizophrenia.rtf
|
111
112
|
- sub-03_ses-02_fluency_semantic_animals.docx
|
112
113
|
|
113
114
|
To optimize performance, close other programs and limit GPU usage during language processing.
|
@@ -124,15 +125,15 @@ Features
|
|
124
125
|
Examples
|
125
126
|
========
|
126
127
|
|
127
|
-
You can find example setups in the [`examples/`](https://github.com/ypauli/
|
128
|
+
You can find example setups in the [`examples/`](https://github.com/ypauli/pelican_nlp/examples) folder.
|
128
129
|
ALWAYS change the path to the project folder specified in the configuration file to your specific project location.
|
129
130
|
|
130
131
|
Contributing
|
131
132
|
============
|
132
133
|
|
133
|
-
Contributions are welcome! Please check out the `contributing guide <https://github.com/ypauli/
|
134
|
+
Contributions are welcome! Please check out the `contributing guide <https://github.com/ypauli/pelican_nlp/blob/main/CONTRIBUTING.md>`_.
|
134
135
|
|
135
136
|
License
|
136
137
|
=======
|
137
138
|
|
138
|
-
This project is licensed under Attribution-NonCommercial 4.0 International. See the `LICENSE <https://github.com/ypauli/
|
139
|
+
This project is licensed under Attribution-NonCommercial 4.0 International. See the `LICENSE <https://github.com/ypauli/pelican_nlp/blob/main/LICENSE>`_ file for details.
|
@@ -0,0 +1,75 @@
|
|
1
|
+
pelican_nlp/__init__.py,sha256=TD5xjKeXXAH6nUWG-6igbClgovi5r8RIEqI_ix1QeYo,204
|
2
|
+
pelican_nlp/_version.py,sha256=R5TtpJu7Qu6sOarfDpp-5Oyy8Pi2Ir3VewCvsCQiAgo,21
|
3
|
+
pelican_nlp/main.py,sha256=xKUqqA3sh9kbk07lKA_poILIU1c8oIeaSsVqPOPY5Tk,7596
|
4
|
+
pelican_nlp/Nils_backup/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
5
|
+
pelican_nlp/Nils_backup/extract_acoustic_features.py,sha256=eSP8lXxbZ15YE1HqxGtma9uWOcSN-fI-ig-NwQ9eOA8,10771
|
6
|
+
pelican_nlp/Nils_backup/speaker_diarization_Nils.py,sha256=3RIhjKihu4Z1rruMt9KESFE2lqesfzIpRr7rLummUEo,10219
|
7
|
+
pelican_nlp/Nils_backup/fluency/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
8
|
+
pelican_nlp/Nils_backup/fluency/aggregate_fluency_results.py,sha256=VVsKR8_Epck-jk-uT6zNV-QO7EiM342MUzzHmVhOSdo,6392
|
9
|
+
pelican_nlp/Nils_backup/fluency/behavioral_data.py,sha256=TJRpBhOh9JMdoL6OcNwhlChe_sNTFQlAhVKl2ml0X0w,1181
|
10
|
+
pelican_nlp/Nils_backup/fluency/check_duplicates.py,sha256=XjfF7NEkilNmPdU0yOVug7xqsc6JbRu-HYO54FZQ8hg,6126
|
11
|
+
pelican_nlp/Nils_backup/fluency/coherence.py,sha256=JGv-3RWwwYboEDZep2mQMuNivZNjV_H5ZrjwY2JHS10,21437
|
12
|
+
pelican_nlp/Nils_backup/fluency/config.py,sha256=Ef9NdLcpCe6XH690plV5FBM_KEjoZR0wy9uYCdAFo78,9233
|
13
|
+
pelican_nlp/Nils_backup/fluency/main.py,sha256=zMDTeNRj971xlMGSb7UOz-l0uvXG3kEeX4U06R_Vbv8,5910
|
14
|
+
pelican_nlp/Nils_backup/fluency/optimality_without_tsa.py,sha256=ZmuQY25n7nVClYjF7j17M4kW0PbXzKCVvTVlDvZ_xa0,15065
|
15
|
+
pelican_nlp/Nils_backup/fluency/plot_fluency.py,sha256=4SrnLhGPG0u-ycW9ryxEX02o3qasQiG_aMxMDpfNbcE,20749
|
16
|
+
pelican_nlp/Nils_backup/fluency/plotting_utils.py,sha256=d0G9qSfBfrfnUCAvM_Su8xOH0lLGwq5KmLBC5sUbx0g,4946
|
17
|
+
pelican_nlp/Nils_backup/fluency/questionnaires_data.py,sha256=xKACAI078si__TiOGahiAvo0nz_UCiJrTV1oEsWPU8A,1175
|
18
|
+
pelican_nlp/Nils_backup/fluency/stats_fluency.py,sha256=HXGMzSGjXJAmw_MnHU5wM16nWSgPkZCWCSYtKaSChPQ,42981
|
19
|
+
pelican_nlp/Nils_backup/fluency/utils.py,sha256=yF7TS_HhUscb6ZgNnk61WJ4qBJLHAJUCt53UWwfIc0U,1340
|
20
|
+
pelican_nlp/Nils_backup/transcription/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
21
|
+
pelican_nlp/Nils_backup/transcription/annotation_tool.py,sha256=O528LXdvs4TkzD201szzHOrTBCZsJa51gr-6iRddGmg,40185
|
22
|
+
pelican_nlp/Nils_backup/transcription/annotation_tool_boundaries.py,sha256=4NnGMWuORKqNp0YFnkD90BuyaPRpo64W2kKCboE9oFE,45384
|
23
|
+
pelican_nlp/Nils_backup/transcription/annotation_tool_sandbox.py,sha256=UHyKmUtVTeBgdBCDVCntqJW9gQN2p2GgKIng6E0LKiw,40405
|
24
|
+
pelican_nlp/Nils_backup/transcription/test.json,sha256=T1PNoYwrqgwDVLtfmj7L5e0Sq02OEbqHPC8RFhICuUU,2
|
25
|
+
pelican_nlp/Nils_backup/transcription/transcribe_audio.py,sha256=uJUXtE6uTXg34FB3f_WQ4WeuikPcPJdlpVrw2Rf0P7M,12600
|
26
|
+
pelican_nlp/Nils_backup/transcription/transcribe_audio_chunked.py,sha256=PZUJ9Cnu96Chhi-MQmaoEd4ximdCAwAdReBzPrHPlZE,27644
|
27
|
+
pelican_nlp/Nils_backup/transcription/transcription.py,sha256=aoIH8vsMh5rAJ1_j44gN6yBxcG-7AoGklVlMPSNnfKU,33031
|
28
|
+
pelican_nlp/Nils_backup/transcription/transcription_gui.py,sha256=HtVEWZyU6_w-viUT4KCf55ZnQY0VxaII5zds1CUqlv8,38482
|
29
|
+
pelican_nlp/Nils_backup/transcription/word_boundaries.py,sha256=n6erYFVgDWLkgMnSNxcTJvJV7Lh557EjWTtEgbwrZVo,6976
|
30
|
+
pelican_nlp/Nils_backup/transcription/output/holmes_control_nova_all_outputs.json,sha256=Eo2pseyXGWSRLs44fDBIAUU7d57gXFXcq4A58iuLoVo,212326
|
31
|
+
pelican_nlp/Silvia_files/Opensmile/opensmile_feature_extraction.py,sha256=sJsthRqJI8bfH38H-OwyQdxXCBIrXEdFm3qSARamYIw,2093
|
32
|
+
pelican_nlp/Silvia_files/prosogram/prosogram.py,sha256=ndjtTSgVzEchPEcRNb9jAHuiKRJYXI2C3Y__Deyc1rU,3324
|
33
|
+
pelican_nlp/configuration_files/config_audio.yml,sha256=aUneGp32RkBQD2xxgYw7J4djWatfWEjTm7Z8UeO49ec,3868
|
34
|
+
pelican_nlp/configuration_files/config_discourse.yml,sha256=WrZk5J2xWMQQPOu25BsqIOM5CrYcAhxCxoMcQVKbDIU,3661
|
35
|
+
pelican_nlp/configuration_files/config_fluency.yml,sha256=nBTGJXnbj8IhWsJGwP4ZutZCeIu2ybHUepG7RAWA1y0,3060
|
36
|
+
pelican_nlp/configuration_files/config_general.yml,sha256=dOBiqOhw0VgV0LZ1boYJhhjCsnTaYBk6qoCTai-fk-o,3474
|
37
|
+
pelican_nlp/configuration_files/config_morteza.yml,sha256=T378fxvBY9hERVGsnXroDFCy8Zh5PIq4dyer2b5AiDY,3376
|
38
|
+
pelican_nlp/core/__init__.py,sha256=whJc5dWsGsKn2IAw-D4BvCvUKW1sVtWYE1WJIuUr5uI,165
|
39
|
+
pelican_nlp/core/audio_document.py,sha256=hhSJNgeqSYa6_uws2ho66agHhAdHuKN3EIEdIsIcXKg,586
|
40
|
+
pelican_nlp/core/corpus.py,sha256=6pDRmeO0XoHylhjLE4Fi5Tc3HCMQJ-Xk0YRzEfz5Z1Y,15168
|
41
|
+
pelican_nlp/core/document.py,sha256=j2HP5FX6cfmXHo7OWVFCX6cMsDyqsOmNlnGNNNfCm2c,8467
|
42
|
+
pelican_nlp/core/subject.py,sha256=-pi3jDzb2zLiG8JNAi9i-9Jd-VtsPxDO4ShQci2QSMg,1059
|
43
|
+
pelican_nlp/extraction/__init__.py,sha256=hfqFiaKpQBS6cwRm9Yd7MpOcV60_xJmwuQ2Kegary5k,84
|
44
|
+
pelican_nlp/extraction/acoustic_feature_extraction.py,sha256=6Csrr6uotarhuAzxYlGFAil9K4PLUqa9vWw607peRoA,2319
|
45
|
+
pelican_nlp/extraction/distance_from_randomness.py,sha256=yikZ3GK2dqpzuNFPVsjuUK0lo6kHOIoIhKPaVrGXRMQ,3365
|
46
|
+
pelican_nlp/extraction/extract_embeddings.py,sha256=e5bcNlskd7f-JkWtfd7YutGV5bqcURKrAkETRyTx93Q,2457
|
47
|
+
pelican_nlp/extraction/extract_logits.py,sha256=Lc7Es86T8mlSvLMhiDHpFdCc0kCZ9fNr3-VFnOyeybs,3869
|
48
|
+
pelican_nlp/extraction/language_model.py,sha256=4tHJZIRCEeHVTwEf2jmOtu-zDGkdXiDjKmlpuxDuLiw,2929
|
49
|
+
pelican_nlp/extraction/semantic_similarity.py,sha256=QhY5CAOAorxEo3UBWPlMegFvbySF0KH6j4j3m2I3_NY,2552
|
50
|
+
pelican_nlp/extraction/test_documents/test_features.csv,sha256=LR_3m4vIm-YWKw5gI5ziswhS-NF9VhKv14c2udLxtJU,488482
|
51
|
+
pelican_nlp/extraction/test_documents/wallace_1.15_3.txt,sha256=ShXxOHUZzGPNUqIcOn6-OYkarzNtTC22V05a_Xpvtlw,3731
|
52
|
+
pelican_nlp/extraction/test_documents/wallace_1.1_3.txt,sha256=gs5REE10myK3Nm9JBOV8hjqKcMRkrl7BasuK7HSBe5M,3695
|
53
|
+
pelican_nlp/extraction/test_documents/wallace_1_4.txt,sha256=95Z7gS92KERCocrbOAFbJntf5QoE-6p0GL67XQEffqI,3963
|
54
|
+
pelican_nlp/metrics_statistics/embeddings_metrics_statistics.py,sha256=svXXyLEA62mLa0KUfSiOSFFMjYk17K7BJbxUoLf0l9w,1468
|
55
|
+
pelican_nlp/praat/__init__.py,sha256=uSEaUZ2nw7lH0twbRJL5BltJTJpopj5XCVhIbeM42bg,1035
|
56
|
+
pelican_nlp/preprocessing/LPDS.py,sha256=4UWkMMSrdU-nWVi8eKiWQSGD7f7lemB42aI0fFn6ZLU,4097
|
57
|
+
pelican_nlp/preprocessing/__init__.py,sha256=ZYgOUlKPXmltYez3urPZmsAWRWSEqZ3_l_gN2aqd15s,293
|
58
|
+
pelican_nlp/preprocessing/pipeline.py,sha256=t2zJAvZRO12MdAKQgm8XZxfZND7_8gFtzHF9Rq2L2aE,1796
|
59
|
+
pelican_nlp/preprocessing/speaker_diarization.py,sha256=N6dZCa2AHHGw__g9e-ZUyZM_In0-nzFOkZ44cBnoKLk,1122
|
60
|
+
pelican_nlp/preprocessing/text_cleaner.py,sha256=QKqxwoRR8dnuBYiY-PXK1kB7744TVUcUMJb7dbKvXGk,7512
|
61
|
+
pelican_nlp/preprocessing/text_importer.py,sha256=FtSyJjFXDxVle7Jpyw6EqCLDbLTCRxqVQi9ymWWtPB4,1356
|
62
|
+
pelican_nlp/preprocessing/text_normalizer.py,sha256=huo5VFqJ0p2jq-ud1047XvMu1qNeaiuG879SF3zkJoM,894
|
63
|
+
pelican_nlp/preprocessing/text_tokenizer.py,sha256=h875bXr0YuMrLh4HtQUvpHmASScddtkQXGaF9mm7uwU,1642
|
64
|
+
pelican_nlp/sample_configuration_files/config_discourse.yml,sha256=xVHIUpSORV6iR0nEvuess6rfiAvuGEkqmaMWD_6kyFE,3618
|
65
|
+
pelican_nlp/sample_configuration_files/config_fluency.yml,sha256=oQ6Y2BhRLExEMpS3VRH2pFrGHi788L66aSYUm05nV_A,3038
|
66
|
+
pelican_nlp/sample_configuration_files/config_general.yml,sha256=UuGnZUa-SVmioE9NmXWOMKuv3uG5mNjIuXgA6-Y0JS0,3440
|
67
|
+
pelican_nlp/utils/__init__.py,sha256=q1tGdOOj5UPRC2mGhoMUh8p4cbFCkkbD21bQaOVvFao,189
|
68
|
+
pelican_nlp/utils/csv_functions.py,sha256=hsG73gm3Up9sAerp6gIxuNHaeP1vJj6HSh7ggVm1SSo,7272
|
69
|
+
pelican_nlp/utils/sample_usage.py,sha256=W__OVMjWND-ZtxxRhfGJDHwbVpGlB-anXDxyA5P4cME,353
|
70
|
+
pelican_nlp/utils/setup_functions.py,sha256=s0QcarswU8qeFBcEQNIYC1ooaD-xwRiTJn--yPEId8E,3612
|
71
|
+
pelican_nlp-0.1.3.dist-info/licenses/LICENSE,sha256=m3jshBZIXKiBX6qhmhtJcLTVJ1N6BEkQGIflneXvpYg,19336
|
72
|
+
pelican_nlp-0.1.3.dist-info/METADATA,sha256=U_PWoZdXS5KVZUXUokNZBWQikMj-8VfOKyh-6nscySE,4998
|
73
|
+
pelican_nlp-0.1.3.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
|
74
|
+
pelican_nlp-0.1.3.dist-info/top_level.txt,sha256=F0qlyqy5FCd3sTS_npUYPeLKN9_BZq6wD4qo9pI0xbg,12
|
75
|
+
pelican_nlp-0.1.3.dist-info/RECORD,,
|
@@ -1,39 +0,0 @@
|
|
1
|
-
pelican_nlp/__init__.py,sha256=opumkxDDKxGZgYyNkkUiu8kbOrBixP8zP_bhljYYktc,192
|
2
|
-
pelican_nlp/main.py,sha256=xKUqqA3sh9kbk07lKA_poILIU1c8oIeaSsVqPOPY5Tk,7596
|
3
|
-
pelican_nlp/core/__init__.py,sha256=whJc5dWsGsKn2IAw-D4BvCvUKW1sVtWYE1WJIuUr5uI,165
|
4
|
-
pelican_nlp/core/audio_document.py,sha256=hhSJNgeqSYa6_uws2ho66agHhAdHuKN3EIEdIsIcXKg,586
|
5
|
-
pelican_nlp/core/corpus.py,sha256=6pDRmeO0XoHylhjLE4Fi5Tc3HCMQJ-Xk0YRzEfz5Z1Y,15168
|
6
|
-
pelican_nlp/core/document.py,sha256=j2HP5FX6cfmXHo7OWVFCX6cMsDyqsOmNlnGNNNfCm2c,8467
|
7
|
-
pelican_nlp/core/subject.py,sha256=-pi3jDzb2zLiG8JNAi9i-9Jd-VtsPxDO4ShQci2QSMg,1059
|
8
|
-
pelican_nlp/extraction/__init__.py,sha256=hfqFiaKpQBS6cwRm9Yd7MpOcV60_xJmwuQ2Kegary5k,84
|
9
|
-
pelican_nlp/extraction/acoustic_feature_extraction.py,sha256=6Csrr6uotarhuAzxYlGFAil9K4PLUqa9vWw607peRoA,2319
|
10
|
-
pelican_nlp/extraction/distance_from_randomness.py,sha256=yikZ3GK2dqpzuNFPVsjuUK0lo6kHOIoIhKPaVrGXRMQ,3365
|
11
|
-
pelican_nlp/extraction/extract_embeddings.py,sha256=e5bcNlskd7f-JkWtfd7YutGV5bqcURKrAkETRyTx93Q,2457
|
12
|
-
pelican_nlp/extraction/extract_logits.py,sha256=Lc7Es86T8mlSvLMhiDHpFdCc0kCZ9fNr3-VFnOyeybs,3869
|
13
|
-
pelican_nlp/extraction/language_model.py,sha256=4tHJZIRCEeHVTwEf2jmOtu-zDGkdXiDjKmlpuxDuLiw,2929
|
14
|
-
pelican_nlp/extraction/semantic_similarity.py,sha256=QhY5CAOAorxEo3UBWPlMegFvbySF0KH6j4j3m2I3_NY,2552
|
15
|
-
pelican_nlp/extraction/test_documents/test_features.csv,sha256=LR_3m4vIm-YWKw5gI5ziswhS-NF9VhKv14c2udLxtJU,488482
|
16
|
-
pelican_nlp/extraction/test_documents/wallace_1.15_3.txt,sha256=ShXxOHUZzGPNUqIcOn6-OYkarzNtTC22V05a_Xpvtlw,3731
|
17
|
-
pelican_nlp/extraction/test_documents/wallace_1.1_3.txt,sha256=gs5REE10myK3Nm9JBOV8hjqKcMRkrl7BasuK7HSBe5M,3695
|
18
|
-
pelican_nlp/extraction/test_documents/wallace_1_4.txt,sha256=95Z7gS92KERCocrbOAFbJntf5QoE-6p0GL67XQEffqI,3963
|
19
|
-
pelican_nlp/metrics_statistics/embeddings_metrics_statistics.py,sha256=svXXyLEA62mLa0KUfSiOSFFMjYk17K7BJbxUoLf0l9w,1468
|
20
|
-
pelican_nlp/preprocessing/LPDS.py,sha256=4UWkMMSrdU-nWVi8eKiWQSGD7f7lemB42aI0fFn6ZLU,4097
|
21
|
-
pelican_nlp/preprocessing/__init__.py,sha256=ZYgOUlKPXmltYez3urPZmsAWRWSEqZ3_l_gN2aqd15s,293
|
22
|
-
pelican_nlp/preprocessing/pipeline.py,sha256=t2zJAvZRO12MdAKQgm8XZxfZND7_8gFtzHF9Rq2L2aE,1796
|
23
|
-
pelican_nlp/preprocessing/speaker_diarization.py,sha256=N6dZCa2AHHGw__g9e-ZUyZM_In0-nzFOkZ44cBnoKLk,1122
|
24
|
-
pelican_nlp/preprocessing/text_cleaner.py,sha256=QKqxwoRR8dnuBYiY-PXK1kB7744TVUcUMJb7dbKvXGk,7512
|
25
|
-
pelican_nlp/preprocessing/text_importer.py,sha256=FtSyJjFXDxVle7Jpyw6EqCLDbLTCRxqVQi9ymWWtPB4,1356
|
26
|
-
pelican_nlp/preprocessing/text_normalizer.py,sha256=huo5VFqJ0p2jq-ud1047XvMu1qNeaiuG879SF3zkJoM,894
|
27
|
-
pelican_nlp/preprocessing/text_tokenizer.py,sha256=h875bXr0YuMrLh4HtQUvpHmASScddtkQXGaF9mm7uwU,1642
|
28
|
-
pelican_nlp/sample_configuration_files/config_discourse.yml,sha256=xVHIUpSORV6iR0nEvuess6rfiAvuGEkqmaMWD_6kyFE,3618
|
29
|
-
pelican_nlp/sample_configuration_files/config_fluency.yml,sha256=oQ6Y2BhRLExEMpS3VRH2pFrGHi788L66aSYUm05nV_A,3038
|
30
|
-
pelican_nlp/sample_configuration_files/config_general.yml,sha256=UuGnZUa-SVmioE9NmXWOMKuv3uG5mNjIuXgA6-Y0JS0,3440
|
31
|
-
pelican_nlp/utils/__init__.py,sha256=q1tGdOOj5UPRC2mGhoMUh8p4cbFCkkbD21bQaOVvFao,189
|
32
|
-
pelican_nlp/utils/csv_functions.py,sha256=hsG73gm3Up9sAerp6gIxuNHaeP1vJj6HSh7ggVm1SSo,7272
|
33
|
-
pelican_nlp/utils/sample_usage.py,sha256=W__OVMjWND-ZtxxRhfGJDHwbVpGlB-anXDxyA5P4cME,353
|
34
|
-
pelican_nlp/utils/setup_functions.py,sha256=s0QcarswU8qeFBcEQNIYC1ooaD-xwRiTJn--yPEId8E,3612
|
35
|
-
pelican_nlp-0.1.1.dist-info/licenses/LICENSE,sha256=m3jshBZIXKiBX6qhmhtJcLTVJ1N6BEkQGIflneXvpYg,19336
|
36
|
-
pelican_nlp-0.1.1.dist-info/METADATA,sha256=WV78gC4295adtWepMzCQm1yCkUa1LIBm9vkz9YFx3Dk,4986
|
37
|
-
pelican_nlp-0.1.1.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
|
38
|
-
pelican_nlp-0.1.1.dist-info/top_level.txt,sha256=F0qlyqy5FCd3sTS_npUYPeLKN9_BZq6wD4qo9pI0xbg,12
|
39
|
-
pelican_nlp-0.1.1.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|