pelican-nlp 0.1.1__py3-none-any.whl → 0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. pelican_nlp/Nils_backup/__init__.py +0 -0
  2. pelican_nlp/Nils_backup/extract_acoustic_features.py +274 -0
  3. pelican_nlp/Nils_backup/fluency/__init__.py +0 -0
  4. pelican_nlp/Nils_backup/fluency/aggregate_fluency_results.py +186 -0
  5. pelican_nlp/Nils_backup/fluency/behavioral_data.py +42 -0
  6. pelican_nlp/Nils_backup/fluency/check_duplicates.py +169 -0
  7. pelican_nlp/Nils_backup/fluency/coherence.py +653 -0
  8. pelican_nlp/Nils_backup/fluency/config.py +231 -0
  9. pelican_nlp/Nils_backup/fluency/main.py +182 -0
  10. pelican_nlp/Nils_backup/fluency/optimality_without_tsa.py +466 -0
  11. pelican_nlp/Nils_backup/fluency/plot_fluency.py +573 -0
  12. pelican_nlp/Nils_backup/fluency/plotting_utils.py +170 -0
  13. pelican_nlp/Nils_backup/fluency/questionnaires_data.py +43 -0
  14. pelican_nlp/Nils_backup/fluency/stats_fluency.py +930 -0
  15. pelican_nlp/Nils_backup/fluency/utils.py +41 -0
  16. pelican_nlp/Nils_backup/speaker_diarization_Nils.py +328 -0
  17. pelican_nlp/Nils_backup/transcription/__init__.py +0 -0
  18. pelican_nlp/Nils_backup/transcription/annotation_tool.py +1001 -0
  19. pelican_nlp/Nils_backup/transcription/annotation_tool_boundaries.py +1122 -0
  20. pelican_nlp/Nils_backup/transcription/annotation_tool_sandbox.py +985 -0
  21. pelican_nlp/Nils_backup/transcription/output/holmes_control_nova_all_outputs.json +7948 -0
  22. pelican_nlp/Nils_backup/transcription/test.json +1 -0
  23. pelican_nlp/Nils_backup/transcription/transcribe_audio.py +314 -0
  24. pelican_nlp/Nils_backup/transcription/transcribe_audio_chunked.py +695 -0
  25. pelican_nlp/Nils_backup/transcription/transcription.py +801 -0
  26. pelican_nlp/Nils_backup/transcription/transcription_gui.py +955 -0
  27. pelican_nlp/Nils_backup/transcription/word_boundaries.py +190 -0
  28. pelican_nlp/Silvia_files/Opensmile/opensmile_feature_extraction.py +66 -0
  29. pelican_nlp/Silvia_files/prosogram/prosogram.py +104 -0
  30. pelican_nlp/__init__.py +1 -1
  31. pelican_nlp/_version.py +1 -0
  32. pelican_nlp/configuration_files/config_audio.yml +150 -0
  33. pelican_nlp/configuration_files/config_discourse.yml +104 -0
  34. pelican_nlp/configuration_files/config_fluency.yml +108 -0
  35. pelican_nlp/configuration_files/config_general.yml +131 -0
  36. pelican_nlp/configuration_files/config_morteza.yml +103 -0
  37. pelican_nlp/praat/__init__.py +29 -0
  38. {pelican_nlp-0.1.1.dist-info → pelican_nlp-0.1.2.dist-info}/METADATA +4 -3
  39. pelican_nlp-0.1.2.dist-info/RECORD +75 -0
  40. pelican_nlp-0.1.1.dist-info/RECORD +0 -39
  41. {pelican_nlp-0.1.1.dist-info → pelican_nlp-0.1.2.dist-info}/WHEEL +0 -0
  42. {pelican_nlp-0.1.1.dist-info → pelican_nlp-0.1.2.dist-info}/licenses/LICENSE +0 -0
  43. {pelican_nlp-0.1.1.dist-info → pelican_nlp-0.1.2.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,170 @@
1
+ """
2
+ Shared utilities and constants for plotting functions.
3
+ """
4
+
5
+ # Colors for different groups
6
+ COLORS = {
7
+ "Patients": '#242424',
8
+ "Psychosis": '#242424',
9
+ "HS": '#6d6d6d',
10
+ "LS": '#b6b6b6',
11
+ "Low Sczt": '#b6b6b6',
12
+ "High Sczt": '#6d6d6d',
13
+ }
14
+
15
+ # Colors for different outcomes
16
+ OUTCOME_COLORS = {
17
+ "Age": '#2b2b2b',
18
+ "Education": '#cc78bc',
19
+ "Male Gender": '#949494',
20
+ "German Native": 'sienna',
21
+ "Other Native": 'slategrey',
22
+ 'Working Memory': '#2b2b2b',
23
+ 'Psychomotor Speed': '#cc78bc',
24
+ 'Negative Inhibition': '#949494',
25
+ 'MSS Pos': 'sienna',
26
+ 'MSS Neg': 'slategrey',
27
+ 'PANSS Gen': '#ca9161',
28
+ 'MSS Dis': '#ca9161',
29
+ 'PANSS Pos': 'sienna',
30
+ 'PANSS Neg': 'slategrey'
31
+ }
32
+
33
+ # Ordered list of outcomes for consistent plotting
34
+ SORTED_OUTCOMES = [
35
+ 'Age',
36
+ 'Education',
37
+ 'Male Gender',
38
+ "German Native",
39
+ "Other Native",
40
+ 'Working Memory',
41
+ 'Psychomotor Speed',
42
+ 'Negative Inhibition',
43
+ 'MSS Pos',
44
+ 'MSS Neg',
45
+ 'MSS Dis',
46
+ 'PANSS Pos',
47
+ 'PANSS Neg',
48
+ 'PANSS Gen',
49
+ 'Age of Disease Onset',
50
+ 'Duration of Untreated Illness',
51
+ 'Duration of Antipsychotic Treatment',
52
+ "Risperidone Equivalent",
53
+ ]
54
+
55
+ # Group dictionary for consistent naming
56
+ GROUP_DICT = {
57
+ "ls": "Low Sczt",
58
+ "hs": "High Sczt",
59
+ "control": "Healthy Controls",
60
+ "patient": "Psychosis"
61
+ }
62
+
63
+ # Variable name mapping for plots
64
+ NAMES = {
65
+ "const": "Constant",
66
+ "gender_male": "Male Gender",
67
+ "age": "Age",
68
+ "education": "Education",
69
+ "age_onset": "Age of Disease Onset",
70
+ "antipsy_duration": "Duration of Antipsychotic Treatment",
71
+ "duration_untreated": "Duration of Untreated Illness",
72
+ "first_language_German": "German Native",
73
+ "first_language_Other": "Other Native",
74
+ "total_risp_eq": "Risperidone Equivalent",
75
+ "semantic_coherence_0_mean_of_window_means": "Average Sim",
76
+ "semantic_coherence_2_mean_of_window_means": "Coherence 2",
77
+ "semantic_coherence_5_mean_of_window_means": "Coherence 5",
78
+ "semantic_coherence_8_mean_of_window_means": "Coherence 8",
79
+ "semantic_coherence_16_mean_of_window_means": "Coherence 16",
80
+ "semantic_coherence_32_mean_of_window_means": "Coherence 32",
81
+ "number_tokens": "\# Items",
82
+ "z_Real_semantic_include0_includeN_5": "Optimality 5",
83
+ "z_Real_semantic_include0_includeN_8": "Optimality 8",
84
+ "z_Real_semantic_include0_includeN_16": "Optimality 16",
85
+ "z_Real_semantic_include0_includeN_32": "Optimality 32",
86
+ "panss_p_total": "PANSS Pos",
87
+ "panss_g_total": "PANSS Gen",
88
+ "panss_n_total": "PANSS Neg",
89
+ "panss_total": "PANSS Total",
90
+ "mss_sum": "MSS",
91
+ "mss_pos_sum": "MSS Pos",
92
+ "mss_neg_sum": "MSS Neg",
93
+ "mss_dis_sum": "MSS Dis",
94
+ "tlc_mean": "TLC Mean",
95
+ "stroop_psychomotor": "Psychomotor Speed",
96
+ "stroop_attention": "Selective Attention",
97
+ "stroop_inhibition": "Negative Inhibition",
98
+ "working_memory": "Working Memory",
99
+ 'panss_sim_total': "PANSS Similarities",
100
+ 'tangentiality_mean': "Tangentiality Mean",
101
+ 'derailment_mean': "Derailment Mean",
102
+ "top_p": "Probability Mass Cutoff",
103
+ "temperature": "Temperature",
104
+ "semantic_coherence_": "Semantic Similarity Metric"
105
+ }
106
+
107
+ # Key metrics for analysis
108
+ METRICS = [
109
+ "semantic_coherence_2_mean_of_window_means",
110
+ "semantic_coherence_8_mean_of_window_means",
111
+ "z_Real_semantic_include0_includeN_8",
112
+ "number_tokens"
113
+ ]
114
+
115
+ # Cognitive variables
116
+ COG_VAR = [
117
+ 'working_memory',
118
+ 'stroop_inhibition'
119
+ ]
120
+
121
+ def format_p_value(p_value: float) -> str:
122
+ """
123
+ Format a p-value according to APA style.
124
+
125
+ Args:
126
+ p_value: The p-value to format.
127
+
128
+ Returns:
129
+ A string representing the formatted p-value.
130
+ """
131
+ if p_value < 0.001:
132
+ return "p < .001"
133
+ elif p_value < 0.01:
134
+ return f"p = .{str(round(p_value, 3))[2:]}"
135
+ else:
136
+ return f"p = .{str(round(p_value, 2))[2:]}"
137
+
138
+ def set_size(width: float = 750, fraction: float = 1, subplots: tuple = (1, 1)) -> tuple:
139
+ """
140
+ Set figure dimensions to avoid scaling in LaTeX.
141
+
142
+ Args:
143
+ width: Document width in points, or string of predefined document type
144
+ fraction: Fraction of the width which you wish the figure to occupy
145
+ subplots: The number of rows and columns of subplots.
146
+
147
+ Returns:
148
+ Dimensions of figure in inches
149
+ """
150
+ if width == "thesis":
151
+ width_pt = 426.79135
152
+ elif width == "beamer":
153
+ width_pt = 307.28987
154
+ else:
155
+ width_pt = width
156
+
157
+ # Width of figure (in pts)
158
+ fig_width_pt = width_pt * fraction
159
+ # Convert from pt to inches
160
+ inches_per_pt = 1 / 72.27
161
+
162
+ # Golden ratio to set aesthetic figure height
163
+ golden_ratio = (5**0.5 - 1) / 2
164
+
165
+ # Figure width in inches
166
+ fig_width_in = fig_width_pt * inches_per_pt
167
+ # Figure height in inches
168
+ fig_height_in = fig_width_in * golden_ratio * (subplots[0] / subplots[1])
169
+
170
+ return (fig_width_in, fig_height_in)
@@ -0,0 +1,43 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Process VELAS questionnaire data.
5
+
6
+ This script:
7
+ 1. Loads questionnaire data from master file
8
+ 2. Selects relevant columns
9
+ 3. Outputs selected questionnaire scores
10
+ """
11
+ import pandas as pd
12
+ from utils import ensure_output_dir
13
+ from config import QUESTIONNAIRES_CONFIG
14
+
15
+ def load_questionnaire_data(questionnaire_path: str) -> pd.DataFrame:
16
+ """
17
+ Load questionnaire data from master file.
18
+
19
+ Args:
20
+ questionnaire_path: Path to master questionnaire data
21
+
22
+ Returns:
23
+ DataFrame with questionnaire data
24
+ """
25
+ return pd.read_csv(questionnaire_path)
26
+
27
+ def save_questionnaire_data(df: pd.DataFrame, output_path: str) -> None:
28
+ """Save processed questionnaire data to CSV."""
29
+ ensure_output_dir(output_path)
30
+ df[QUESTIONNAIRES_CONFIG["columns_to_save"]].to_csv(output_path, index=False)
31
+
32
+ def main():
33
+ # Get paths from config
34
+ paths = QUESTIONNAIRES_CONFIG["paths"]
35
+
36
+ # Process data
37
+ df = load_questionnaire_data(paths["input"])
38
+ save_questionnaire_data(df, paths["output"])
39
+
40
+ print("Questionnaire data processed successfully!")
41
+
42
+ if __name__ == "__main__":
43
+ main()