pelican-nlp 0.1.1__py3-none-any.whl → 0.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pelican_nlp/Nils_backup/__init__.py +0 -0
- pelican_nlp/Nils_backup/extract_acoustic_features.py +274 -0
- pelican_nlp/Nils_backup/fluency/__init__.py +0 -0
- pelican_nlp/Nils_backup/fluency/aggregate_fluency_results.py +186 -0
- pelican_nlp/Nils_backup/fluency/behavioral_data.py +42 -0
- pelican_nlp/Nils_backup/fluency/check_duplicates.py +169 -0
- pelican_nlp/Nils_backup/fluency/coherence.py +653 -0
- pelican_nlp/Nils_backup/fluency/config.py +231 -0
- pelican_nlp/Nils_backup/fluency/main.py +182 -0
- pelican_nlp/Nils_backup/fluency/optimality_without_tsa.py +466 -0
- pelican_nlp/Nils_backup/fluency/plot_fluency.py +573 -0
- pelican_nlp/Nils_backup/fluency/plotting_utils.py +170 -0
- pelican_nlp/Nils_backup/fluency/questionnaires_data.py +43 -0
- pelican_nlp/Nils_backup/fluency/stats_fluency.py +930 -0
- pelican_nlp/Nils_backup/fluency/utils.py +41 -0
- pelican_nlp/Nils_backup/speaker_diarization_Nils.py +328 -0
- pelican_nlp/Nils_backup/transcription/__init__.py +0 -0
- pelican_nlp/Nils_backup/transcription/annotation_tool.py +1001 -0
- pelican_nlp/Nils_backup/transcription/annotation_tool_boundaries.py +1122 -0
- pelican_nlp/Nils_backup/transcription/annotation_tool_sandbox.py +985 -0
- pelican_nlp/Nils_backup/transcription/output/holmes_control_nova_all_outputs.json +7948 -0
- pelican_nlp/Nils_backup/transcription/test.json +1 -0
- pelican_nlp/Nils_backup/transcription/transcribe_audio.py +314 -0
- pelican_nlp/Nils_backup/transcription/transcribe_audio_chunked.py +695 -0
- pelican_nlp/Nils_backup/transcription/transcription.py +801 -0
- pelican_nlp/Nils_backup/transcription/transcription_gui.py +955 -0
- pelican_nlp/Nils_backup/transcription/word_boundaries.py +190 -0
- pelican_nlp/Silvia_files/Opensmile/opensmile_feature_extraction.py +66 -0
- pelican_nlp/Silvia_files/prosogram/prosogram.py +104 -0
- pelican_nlp/__init__.py +1 -1
- pelican_nlp/_version.py +1 -0
- pelican_nlp/configuration_files/config_audio.yml +150 -0
- pelican_nlp/configuration_files/config_discourse.yml +104 -0
- pelican_nlp/configuration_files/config_fluency.yml +108 -0
- pelican_nlp/configuration_files/config_general.yml +131 -0
- pelican_nlp/configuration_files/config_morteza.yml +103 -0
- pelican_nlp/praat/__init__.py +29 -0
- {pelican_nlp-0.1.1.dist-info → pelican_nlp-0.1.2.dist-info}/METADATA +4 -3
- pelican_nlp-0.1.2.dist-info/RECORD +75 -0
- pelican_nlp-0.1.1.dist-info/RECORD +0 -39
- {pelican_nlp-0.1.1.dist-info → pelican_nlp-0.1.2.dist-info}/WHEEL +0 -0
- {pelican_nlp-0.1.1.dist-info → pelican_nlp-0.1.2.dist-info}/licenses/LICENSE +0 -0
- {pelican_nlp-0.1.1.dist-info → pelican_nlp-0.1.2.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,170 @@
|
|
1
|
+
"""
|
2
|
+
Shared utilities and constants for plotting functions.
|
3
|
+
"""
|
4
|
+
|
5
|
+
# Colors for different groups
|
6
|
+
COLORS = {
|
7
|
+
"Patients": '#242424',
|
8
|
+
"Psychosis": '#242424',
|
9
|
+
"HS": '#6d6d6d',
|
10
|
+
"LS": '#b6b6b6',
|
11
|
+
"Low Sczt": '#b6b6b6',
|
12
|
+
"High Sczt": '#6d6d6d',
|
13
|
+
}
|
14
|
+
|
15
|
+
# Colors for different outcomes
|
16
|
+
OUTCOME_COLORS = {
|
17
|
+
"Age": '#2b2b2b',
|
18
|
+
"Education": '#cc78bc',
|
19
|
+
"Male Gender": '#949494',
|
20
|
+
"German Native": 'sienna',
|
21
|
+
"Other Native": 'slategrey',
|
22
|
+
'Working Memory': '#2b2b2b',
|
23
|
+
'Psychomotor Speed': '#cc78bc',
|
24
|
+
'Negative Inhibition': '#949494',
|
25
|
+
'MSS Pos': 'sienna',
|
26
|
+
'MSS Neg': 'slategrey',
|
27
|
+
'PANSS Gen': '#ca9161',
|
28
|
+
'MSS Dis': '#ca9161',
|
29
|
+
'PANSS Pos': 'sienna',
|
30
|
+
'PANSS Neg': 'slategrey'
|
31
|
+
}
|
32
|
+
|
33
|
+
# Ordered list of outcomes for consistent plotting
|
34
|
+
SORTED_OUTCOMES = [
|
35
|
+
'Age',
|
36
|
+
'Education',
|
37
|
+
'Male Gender',
|
38
|
+
"German Native",
|
39
|
+
"Other Native",
|
40
|
+
'Working Memory',
|
41
|
+
'Psychomotor Speed',
|
42
|
+
'Negative Inhibition',
|
43
|
+
'MSS Pos',
|
44
|
+
'MSS Neg',
|
45
|
+
'MSS Dis',
|
46
|
+
'PANSS Pos',
|
47
|
+
'PANSS Neg',
|
48
|
+
'PANSS Gen',
|
49
|
+
'Age of Disease Onset',
|
50
|
+
'Duration of Untreated Illness',
|
51
|
+
'Duration of Antipsychotic Treatment',
|
52
|
+
"Risperidone Equivalent",
|
53
|
+
]
|
54
|
+
|
55
|
+
# Group dictionary for consistent naming
|
56
|
+
GROUP_DICT = {
|
57
|
+
"ls": "Low Sczt",
|
58
|
+
"hs": "High Sczt",
|
59
|
+
"control": "Healthy Controls",
|
60
|
+
"patient": "Psychosis"
|
61
|
+
}
|
62
|
+
|
63
|
+
# Variable name mapping for plots
|
64
|
+
NAMES = {
|
65
|
+
"const": "Constant",
|
66
|
+
"gender_male": "Male Gender",
|
67
|
+
"age": "Age",
|
68
|
+
"education": "Education",
|
69
|
+
"age_onset": "Age of Disease Onset",
|
70
|
+
"antipsy_duration": "Duration of Antipsychotic Treatment",
|
71
|
+
"duration_untreated": "Duration of Untreated Illness",
|
72
|
+
"first_language_German": "German Native",
|
73
|
+
"first_language_Other": "Other Native",
|
74
|
+
"total_risp_eq": "Risperidone Equivalent",
|
75
|
+
"semantic_coherence_0_mean_of_window_means": "Average Sim",
|
76
|
+
"semantic_coherence_2_mean_of_window_means": "Coherence 2",
|
77
|
+
"semantic_coherence_5_mean_of_window_means": "Coherence 5",
|
78
|
+
"semantic_coherence_8_mean_of_window_means": "Coherence 8",
|
79
|
+
"semantic_coherence_16_mean_of_window_means": "Coherence 16",
|
80
|
+
"semantic_coherence_32_mean_of_window_means": "Coherence 32",
|
81
|
+
"number_tokens": "\# Items",
|
82
|
+
"z_Real_semantic_include0_includeN_5": "Optimality 5",
|
83
|
+
"z_Real_semantic_include0_includeN_8": "Optimality 8",
|
84
|
+
"z_Real_semantic_include0_includeN_16": "Optimality 16",
|
85
|
+
"z_Real_semantic_include0_includeN_32": "Optimality 32",
|
86
|
+
"panss_p_total": "PANSS Pos",
|
87
|
+
"panss_g_total": "PANSS Gen",
|
88
|
+
"panss_n_total": "PANSS Neg",
|
89
|
+
"panss_total": "PANSS Total",
|
90
|
+
"mss_sum": "MSS",
|
91
|
+
"mss_pos_sum": "MSS Pos",
|
92
|
+
"mss_neg_sum": "MSS Neg",
|
93
|
+
"mss_dis_sum": "MSS Dis",
|
94
|
+
"tlc_mean": "TLC Mean",
|
95
|
+
"stroop_psychomotor": "Psychomotor Speed",
|
96
|
+
"stroop_attention": "Selective Attention",
|
97
|
+
"stroop_inhibition": "Negative Inhibition",
|
98
|
+
"working_memory": "Working Memory",
|
99
|
+
'panss_sim_total': "PANSS Similarities",
|
100
|
+
'tangentiality_mean': "Tangentiality Mean",
|
101
|
+
'derailment_mean': "Derailment Mean",
|
102
|
+
"top_p": "Probability Mass Cutoff",
|
103
|
+
"temperature": "Temperature",
|
104
|
+
"semantic_coherence_": "Semantic Similarity Metric"
|
105
|
+
}
|
106
|
+
|
107
|
+
# Key metrics for analysis
|
108
|
+
METRICS = [
|
109
|
+
"semantic_coherence_2_mean_of_window_means",
|
110
|
+
"semantic_coherence_8_mean_of_window_means",
|
111
|
+
"z_Real_semantic_include0_includeN_8",
|
112
|
+
"number_tokens"
|
113
|
+
]
|
114
|
+
|
115
|
+
# Cognitive variables
|
116
|
+
COG_VAR = [
|
117
|
+
'working_memory',
|
118
|
+
'stroop_inhibition'
|
119
|
+
]
|
120
|
+
|
121
|
+
def format_p_value(p_value: float) -> str:
|
122
|
+
"""
|
123
|
+
Format a p-value according to APA style.
|
124
|
+
|
125
|
+
Args:
|
126
|
+
p_value: The p-value to format.
|
127
|
+
|
128
|
+
Returns:
|
129
|
+
A string representing the formatted p-value.
|
130
|
+
"""
|
131
|
+
if p_value < 0.001:
|
132
|
+
return "p < .001"
|
133
|
+
elif p_value < 0.01:
|
134
|
+
return f"p = .{str(round(p_value, 3))[2:]}"
|
135
|
+
else:
|
136
|
+
return f"p = .{str(round(p_value, 2))[2:]}"
|
137
|
+
|
138
|
+
def set_size(width: float = 750, fraction: float = 1, subplots: tuple = (1, 1)) -> tuple:
|
139
|
+
"""
|
140
|
+
Set figure dimensions to avoid scaling in LaTeX.
|
141
|
+
|
142
|
+
Args:
|
143
|
+
width: Document width in points, or string of predefined document type
|
144
|
+
fraction: Fraction of the width which you wish the figure to occupy
|
145
|
+
subplots: The number of rows and columns of subplots.
|
146
|
+
|
147
|
+
Returns:
|
148
|
+
Dimensions of figure in inches
|
149
|
+
"""
|
150
|
+
if width == "thesis":
|
151
|
+
width_pt = 426.79135
|
152
|
+
elif width == "beamer":
|
153
|
+
width_pt = 307.28987
|
154
|
+
else:
|
155
|
+
width_pt = width
|
156
|
+
|
157
|
+
# Width of figure (in pts)
|
158
|
+
fig_width_pt = width_pt * fraction
|
159
|
+
# Convert from pt to inches
|
160
|
+
inches_per_pt = 1 / 72.27
|
161
|
+
|
162
|
+
# Golden ratio to set aesthetic figure height
|
163
|
+
golden_ratio = (5**0.5 - 1) / 2
|
164
|
+
|
165
|
+
# Figure width in inches
|
166
|
+
fig_width_in = fig_width_pt * inches_per_pt
|
167
|
+
# Figure height in inches
|
168
|
+
fig_height_in = fig_width_in * golden_ratio * (subplots[0] / subplots[1])
|
169
|
+
|
170
|
+
return (fig_width_in, fig_height_in)
|
@@ -0,0 +1,43 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
"""
|
4
|
+
Process VELAS questionnaire data.
|
5
|
+
|
6
|
+
This script:
|
7
|
+
1. Loads questionnaire data from master file
|
8
|
+
2. Selects relevant columns
|
9
|
+
3. Outputs selected questionnaire scores
|
10
|
+
"""
|
11
|
+
import pandas as pd
|
12
|
+
from utils import ensure_output_dir
|
13
|
+
from config import QUESTIONNAIRES_CONFIG
|
14
|
+
|
15
|
+
def load_questionnaire_data(questionnaire_path: str) -> pd.DataFrame:
|
16
|
+
"""
|
17
|
+
Load questionnaire data from master file.
|
18
|
+
|
19
|
+
Args:
|
20
|
+
questionnaire_path: Path to master questionnaire data
|
21
|
+
|
22
|
+
Returns:
|
23
|
+
DataFrame with questionnaire data
|
24
|
+
"""
|
25
|
+
return pd.read_csv(questionnaire_path)
|
26
|
+
|
27
|
+
def save_questionnaire_data(df: pd.DataFrame, output_path: str) -> None:
|
28
|
+
"""Save processed questionnaire data to CSV."""
|
29
|
+
ensure_output_dir(output_path)
|
30
|
+
df[QUESTIONNAIRES_CONFIG["columns_to_save"]].to_csv(output_path, index=False)
|
31
|
+
|
32
|
+
def main():
|
33
|
+
# Get paths from config
|
34
|
+
paths = QUESTIONNAIRES_CONFIG["paths"]
|
35
|
+
|
36
|
+
# Process data
|
37
|
+
df = load_questionnaire_data(paths["input"])
|
38
|
+
save_questionnaire_data(df, paths["output"])
|
39
|
+
|
40
|
+
print("Questionnaire data processed successfully!")
|
41
|
+
|
42
|
+
if __name__ == "__main__":
|
43
|
+
main()
|