DynamiSpectra 1.0.4__py3-none-any.whl → 1.0.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dynamispectra/FractionSS.py +100 -0
- dynamispectra/Hbond.py +219 -0
- dynamispectra/PCA.py +118 -0
- dynamispectra/RMSD.py +218 -0
- dynamispectra/RMSF.py +216 -0
- dynamispectra/Rg.py +218 -0
- dynamispectra/SASA.py +219 -0
- dynamispectra/SecondaryStructure.py +201 -0
- dynamispectra/__init__.py +21 -0
- dynamispectra/cly.py +0 -0
- dynamispectra/ligand_density.py +55 -0
- dynamispectra/main.py +4 -0
- dynamispectra/saltbridge.py +171 -0
- {dynamispectra-1.0.4.dist-info → dynamispectra-1.0.6.dist-info}/METADATA +1 -1
- dynamispectra-1.0.6.dist-info/RECORD +18 -0
- {dynamispectra-1.0.4.dist-info → dynamispectra-1.0.6.dist-info}/WHEEL +1 -1
- dynamispectra-1.0.6.dist-info/top_level.txt +1 -0
- dynamispectra-1.0.4.dist-info/RECORD +0 -5
- dynamispectra-1.0.4.dist-info/top_level.txt +0 -1
- {dynamispectra-1.0.4.dist-info → dynamispectra-1.0.6.dist-info}/entry_points.txt +0 -0
dynamispectra/RMSF.py
ADDED
@@ -0,0 +1,216 @@
|
|
1
|
+
import numpy as np
|
2
|
+
import matplotlib.pyplot as plt
|
3
|
+
from scipy.stats import gaussian_kde
|
4
|
+
import os
|
5
|
+
|
6
|
+
def read_rmsf(file):
|
7
|
+
"""
|
8
|
+
Reads RMSF data from a .xvg file.
|
9
|
+
|
10
|
+
Parameters:
|
11
|
+
-----------
|
12
|
+
file : str
|
13
|
+
Path to the .xvg file.
|
14
|
+
|
15
|
+
Returns:
|
16
|
+
--------
|
17
|
+
times : numpy.ndarray
|
18
|
+
Array of simulation times.
|
19
|
+
rmsfs : numpy.ndarray
|
20
|
+
Array of RMSF values.
|
21
|
+
"""
|
22
|
+
try:
|
23
|
+
print(f"Reading file: {file}")
|
24
|
+
|
25
|
+
# Open the file and process line by line
|
26
|
+
times = []
|
27
|
+
rmsfs = []
|
28
|
+
|
29
|
+
with open(file, 'r') as f:
|
30
|
+
for line in f:
|
31
|
+
# Skip comment lines and empty lines
|
32
|
+
if line.startswith(('#', '@', ';')) or line.strip() == '':
|
33
|
+
continue
|
34
|
+
|
35
|
+
# Try to extract the first two numeric values from the line
|
36
|
+
try:
|
37
|
+
values = line.split()
|
38
|
+
# Check if there are at least two values in the line
|
39
|
+
if len(values) >= 2:
|
40
|
+
time, rmsf = map(float, values[:2]) # Use the first two values
|
41
|
+
times.append(time)
|
42
|
+
rmsfs.append(rmsf)
|
43
|
+
except ValueError:
|
44
|
+
# Skip lines that cannot be converted to numbers
|
45
|
+
print(f"Error processing line: {line.strip()}")
|
46
|
+
continue
|
47
|
+
|
48
|
+
# Check if the data is valid
|
49
|
+
if len(times) == 0 or len(rmsfs) == 0:
|
50
|
+
raise ValueError(f"File {file} does not contain valid data.")
|
51
|
+
|
52
|
+
# Convert lists to numpy arrays
|
53
|
+
times = np.array(times)
|
54
|
+
rmsfs = np.array(rmsfs)
|
55
|
+
|
56
|
+
return times, rmsfs
|
57
|
+
|
58
|
+
except Exception as e:
|
59
|
+
print(f"Error reading file {file}: {e}")
|
60
|
+
return None, None
|
61
|
+
|
62
|
+
def check_simulation_times(*time_arrays):
|
63
|
+
r"""
|
64
|
+
Checks if simulation times are consistent across files.
|
65
|
+
|
66
|
+
Parameters:
|
67
|
+
-----------
|
68
|
+
\*time_arrays : list of numpy.ndarray
|
69
|
+
Arrays of times to compare.
|
70
|
+
"""
|
71
|
+
for i in range(1, len(time_arrays)):
|
72
|
+
if not np.allclose(time_arrays[0], time_arrays[i]):
|
73
|
+
raise ValueError(f"Simulation times do not match between file 1 and file {i+1}")
|
74
|
+
|
75
|
+
def plot_rmsf(time_simulation1, mean_simulation1, std_simulation1,
|
76
|
+
time_simulation2, mean_simulation2, std_simulation2,
|
77
|
+
time_simulation3, mean_simulation3, std_simulation3,
|
78
|
+
output_folder):
|
79
|
+
"""
|
80
|
+
Generates the RMSF plot with mean and standard deviation for the groups provided.
|
81
|
+
"""
|
82
|
+
# Create figure for the RMSF plot
|
83
|
+
plt.figure(figsize=(7, 6))
|
84
|
+
plt.plot()
|
85
|
+
|
86
|
+
# Plot for simulation1 (if provided)
|
87
|
+
if time_simulation1 is not None:
|
88
|
+
plt.plot(time_simulation1, mean_simulation1, label='Simulation 1', color='#333333', linewidth=2)
|
89
|
+
plt.fill_between(time_simulation1, mean_simulation1 - std_simulation1, mean_simulation1 + std_simulation1, color='#333333', alpha=0.2)
|
90
|
+
|
91
|
+
# Plot for simulation2 (if provided)
|
92
|
+
if time_simulation2 is not None:
|
93
|
+
plt.plot(time_simulation2, mean_simulation2, label='Simulation 2', color='#6A9EDA', linewidth=2)
|
94
|
+
plt.fill_between(time_simulation2, mean_simulation2 - std_simulation2, mean_simulation2 + std_simulation2, color='#6A9EDA', alpha=0.2)
|
95
|
+
|
96
|
+
# Plot for simulation3 (if provided)
|
97
|
+
if time_simulation3 is not None:
|
98
|
+
plt.plot(time_simulation3, mean_simulation3, label='Simulation 3', color='#54b36a', linewidth=2)
|
99
|
+
plt.fill_between(time_simulation3, mean_simulation3 - std_simulation3, mean_simulation3 + std_simulation3, color='#54b36a', alpha=0.2)
|
100
|
+
|
101
|
+
# Configure the RMSF plot
|
102
|
+
plt.xlabel('Residue', fontsize=12)
|
103
|
+
plt.ylabel('RMSF (nm)', fontsize=12)
|
104
|
+
plt.title('', fontsize=14)
|
105
|
+
plt.legend(frameon=False, loc='upper right', fontsize=10)
|
106
|
+
plt.tick_params(axis='both', which='major', labelsize=10)
|
107
|
+
plt.grid(False)
|
108
|
+
plt.xlim(left=1)
|
109
|
+
plt.xlim(right=42)
|
110
|
+
plt.tight_layout()
|
111
|
+
|
112
|
+
# Save the RMSF plot in TIFF and PNG formats
|
113
|
+
os.makedirs(output_folder, exist_ok=True) # Create the folder if it doesn't exist
|
114
|
+
|
115
|
+
# Save as TIFF
|
116
|
+
plt.savefig(os.path.join(output_folder, 'rmsf_plot.tiff'), format='tiff', dpi=300)
|
117
|
+
# Save as PNG
|
118
|
+
plt.savefig(os.path.join(output_folder, 'rmsf_plot.png'), format='png', dpi=300)
|
119
|
+
|
120
|
+
# Show the RMSF plot
|
121
|
+
plt.show()
|
122
|
+
|
123
|
+
def plot_density(mean_simulation1, mean_simulation2, mean_simulation3, output_folder):
|
124
|
+
"""
|
125
|
+
Generates the density plot for the groups provided.
|
126
|
+
"""
|
127
|
+
# Create figure for the density plot
|
128
|
+
plt.figure(figsize=(6, 6))
|
129
|
+
plt.plot()
|
130
|
+
|
131
|
+
# Add KDE (Kernel Density Estimation) for each dataset (if provided)
|
132
|
+
if mean_simulation1 is not None:
|
133
|
+
kde_simulation1 = gaussian_kde(mean_simulation1)
|
134
|
+
x_vals = np.linspace(0, max(mean_simulation1), 1000)
|
135
|
+
plt.fill_between(x_vals, kde_simulation1(x_vals), color='#333333', alpha=0.5, label='Simulation 1')
|
136
|
+
|
137
|
+
if mean_simulation2 is not None:
|
138
|
+
kde_simulation2 = gaussian_kde(mean_simulation2)
|
139
|
+
x_vals = np.linspace(0, max(mean_simulation2), 1000)
|
140
|
+
plt.fill_between(x_vals, kde_simulation2(x_vals), color='#6A9EDA', alpha=0.6, label='Simulation 2')
|
141
|
+
|
142
|
+
if mean_simulation3 is not None:
|
143
|
+
kde_simulation3 = gaussian_kde(mean_simulation3)
|
144
|
+
x_vals = np.linspace(0, max(mean_simulation3), 1000)
|
145
|
+
plt.fill_between(x_vals, kde_simulation3(x_vals), color='#54b36a', alpha=0.5, label='Simulation 3')
|
146
|
+
|
147
|
+
# Configure the density plot
|
148
|
+
plt.xlabel('RMSF (nm)', fontsize=12)
|
149
|
+
plt.ylabel('Density', fontsize=12)
|
150
|
+
plt.title('', fontsize=14)
|
151
|
+
plt.legend(frameon=False, loc='upper left', fontsize=10)
|
152
|
+
plt.tick_params(axis='both', which='major', labelsize=10)
|
153
|
+
plt.grid(False)
|
154
|
+
plt.yticks([0, 2, 4, 6, 8, 10])
|
155
|
+
plt.tight_layout()
|
156
|
+
|
157
|
+
# Save the density plot in TIFF and PNG formats
|
158
|
+
# Save as TIFF
|
159
|
+
plt.savefig(os.path.join(output_folder, 'density_plot.tiff'), format='tiff', dpi=300)
|
160
|
+
# Save as PNG
|
161
|
+
plt.savefig(os.path.join(output_folder, 'density_plot.png'), format='png', dpi=300)
|
162
|
+
|
163
|
+
# Show the density plot
|
164
|
+
plt.show()
|
165
|
+
|
166
|
+
def rmsf_analysis(output_folder, *simulation_files_groups):
|
167
|
+
r"""
|
168
|
+
Main function to generate RMSF analysis and plots.
|
169
|
+
|
170
|
+
Parameters:
|
171
|
+
-----------
|
172
|
+
output_folder : str
|
173
|
+
Output folder to save the plots.
|
174
|
+
\*simulation_files_groups : list of str
|
175
|
+
List of paths to .xvg files for each simulation group.
|
176
|
+
You can pass 1, 2, or 3 groups.
|
177
|
+
"""
|
178
|
+
# Helper function to process a group of files
|
179
|
+
def process_group(file_paths):
|
180
|
+
times = []
|
181
|
+
rmsfs = []
|
182
|
+
for file in file_paths:
|
183
|
+
time, rmsf = read_rmsf(file)
|
184
|
+
times.append(time)
|
185
|
+
rmsfs.append(rmsf)
|
186
|
+
check_simulation_times(*times) # Check if times are consistent
|
187
|
+
mean_rmsf = np.mean(rmsfs, axis=0) # Calculate mean
|
188
|
+
std_rmsf = np.std(rmsfs, axis=0) # Calculate standard deviation
|
189
|
+
return times[0], mean_rmsf, std_rmsf
|
190
|
+
|
191
|
+
# Process each group of files
|
192
|
+
results = []
|
193
|
+
for group in simulation_files_groups:
|
194
|
+
if group: # Check if the list is not empty
|
195
|
+
time, mean, std = process_group(group)
|
196
|
+
results.append((time, mean, std))
|
197
|
+
|
198
|
+
# Generate plots based on the number of groups
|
199
|
+
if len(results) == 1:
|
200
|
+
# Plot for 1 group
|
201
|
+
plot_rmsf(results[0][0], results[0][1], results[0][2], None, None, None, None, None, None, output_folder)
|
202
|
+
plot_density(results[0][1], None, None, output_folder)
|
203
|
+
elif len(results) == 2:
|
204
|
+
# Plot for 2 groups
|
205
|
+
plot_rmsf(results[0][0], results[0][1], results[0][2],
|
206
|
+
results[1][0], results[1][1], results[1][2],
|
207
|
+
None, None, None, output_folder)
|
208
|
+
plot_density(results[0][1], results[1][1], None, output_folder)
|
209
|
+
elif len(results) == 3:
|
210
|
+
# Plot for 3 groups
|
211
|
+
plot_rmsf(results[0][0], results[0][1], results[0][2],
|
212
|
+
results[1][0], results[1][1], results[1][2],
|
213
|
+
results[2][0], results[2][1], results[2][2], output_folder)
|
214
|
+
plot_density(results[0][1], results[1][1], results[2][1], output_folder)
|
215
|
+
else:
|
216
|
+
raise ValueError("You must provide at least one group of simulation files.")
|
dynamispectra/Rg.py
ADDED
@@ -0,0 +1,218 @@
|
|
1
|
+
import numpy as np
|
2
|
+
import matplotlib.pyplot as plt
|
3
|
+
from scipy.stats import gaussian_kde
|
4
|
+
import os
|
5
|
+
|
6
|
+
def read_rg(file):
|
7
|
+
"""
|
8
|
+
Reads Rg data from a .xvg file.
|
9
|
+
|
10
|
+
Parameters:
|
11
|
+
-----------
|
12
|
+
file : str
|
13
|
+
Path to the .xvg file.
|
14
|
+
|
15
|
+
Returns:
|
16
|
+
--------
|
17
|
+
times : numpy.ndarray
|
18
|
+
Array of simulation times.
|
19
|
+
rg_values : numpy.ndarray
|
20
|
+
Array of Rg values.
|
21
|
+
"""
|
22
|
+
try:
|
23
|
+
print(f"Reading file: {file}")
|
24
|
+
|
25
|
+
# Open the file and process line by line
|
26
|
+
times = []
|
27
|
+
rg_values = []
|
28
|
+
|
29
|
+
with open(file, 'r') as f:
|
30
|
+
for line in f:
|
31
|
+
# Skip comment lines and empty lines
|
32
|
+
if line.startswith(('#', '@', ';')) or line.strip() == '':
|
33
|
+
continue
|
34
|
+
|
35
|
+
# Try to extract the first two numeric values from the line
|
36
|
+
try:
|
37
|
+
line_values = line.split()
|
38
|
+
time = float(line_values[0]) # Time
|
39
|
+
rg_total = float(line_values[1]) # Rg value
|
40
|
+
times.append(time / 1000) # Convert time to nanoseconds
|
41
|
+
rg_values.append(rg_total)
|
42
|
+
except ValueError:
|
43
|
+
# Skip lines that cannot be converted to numbers
|
44
|
+
print(f"Error processing line: {line.strip()}")
|
45
|
+
continue
|
46
|
+
|
47
|
+
# Check if the data is valid
|
48
|
+
if len(times) == 0 or len(rg_values) == 0:
|
49
|
+
raise ValueError(f"File {file} does not contain valid data.")
|
50
|
+
|
51
|
+
# Convert lists to numpy arrays
|
52
|
+
times = np.array(times)
|
53
|
+
rg_values = np.array(rg_values)
|
54
|
+
|
55
|
+
return times, rg_values
|
56
|
+
|
57
|
+
except Exception as e:
|
58
|
+
print(f"Error reading file {file}: {e}")
|
59
|
+
return None, None
|
60
|
+
|
61
|
+
def check_simulation_times(*time_arrays):
|
62
|
+
r"""
|
63
|
+
Checks if simulation times are consistent across files.
|
64
|
+
|
65
|
+
Parameters:
|
66
|
+
-----------
|
67
|
+
\*time_arrays : list of numpy.ndarray
|
68
|
+
Arrays of times to compare.
|
69
|
+
"""
|
70
|
+
for i in range(1, len(time_arrays)):
|
71
|
+
if not np.allclose(time_arrays[0], time_arrays[i]):
|
72
|
+
raise ValueError(f"Simulation times do not match between file 1 and file {i+1}")
|
73
|
+
|
74
|
+
def plot_rg(time_simulation1, mean_simulation1, std_simulation1,
|
75
|
+
time_simulation2, mean_simulation2, std_simulation2,
|
76
|
+
time_simulation3, mean_simulation3, std_simulation3,
|
77
|
+
output_folder):
|
78
|
+
"""
|
79
|
+
Generates the Rg plot with mean and standard deviation for the groups provided.
|
80
|
+
"""
|
81
|
+
# Create figure for the Rg plot
|
82
|
+
plt.figure(figsize=(7, 6))
|
83
|
+
plt.plot()
|
84
|
+
|
85
|
+
# Plot for simulation1 (if provided)
|
86
|
+
if time_simulation1 is not None:
|
87
|
+
plt.plot(time_simulation1, mean_simulation1, label='Simulation 1', color='#333333', linewidth=2)
|
88
|
+
plt.fill_between(time_simulation1, mean_simulation1 - std_simulation1, mean_simulation1 + std_simulation1, color='#333333', alpha=0.2)
|
89
|
+
|
90
|
+
# Plot for simulation2 (if provided)
|
91
|
+
if time_simulation2 is not None:
|
92
|
+
plt.plot(time_simulation2, mean_simulation2, label='Simulation 2', color='#6A9EDA', linewidth=2)
|
93
|
+
plt.fill_between(time_simulation2, mean_simulation2 - std_simulation2, mean_simulation2 + std_simulation2, color='#6A9EDA', alpha=0.2)
|
94
|
+
|
95
|
+
# Plot for simulation3 (if provided)
|
96
|
+
if time_simulation3 is not None:
|
97
|
+
plt.plot(time_simulation3, mean_simulation3, label='Simulation 3', color='#54b36a', linewidth=2)
|
98
|
+
plt.fill_between(time_simulation3, mean_simulation3 - std_simulation3, mean_simulation3 + std_simulation3, color='#54b36a', alpha=0.2)
|
99
|
+
|
100
|
+
# Configure the Rg plot
|
101
|
+
plt.xlabel('Time (ns)', fontsize=12)
|
102
|
+
plt.ylabel('Rg (nm)', fontsize=12)
|
103
|
+
plt.title('', fontsize=14)
|
104
|
+
plt.legend(frameon=False, loc='upper right', fontsize=10)
|
105
|
+
plt.tick_params(axis='both', which='major', labelsize=10)
|
106
|
+
plt.grid(False)
|
107
|
+
|
108
|
+
# Adjust x-axis limits to start at 0
|
109
|
+
plt.xlim(left=0) # Set the minimum x-axis limit to 0
|
110
|
+
plt.xlim(right=100)
|
111
|
+
|
112
|
+
# Adjust layout
|
113
|
+
plt.tight_layout()
|
114
|
+
|
115
|
+
# Save the Rg plot in TIFF and PNG formats
|
116
|
+
os.makedirs(output_folder, exist_ok=True) # Create the folder if it doesn't exist
|
117
|
+
|
118
|
+
# Save as TIFF
|
119
|
+
plt.savefig(os.path.join(output_folder, 'rg_plot.tiff'), format='tiff', dpi=300)
|
120
|
+
# Save as PNG
|
121
|
+
plt.savefig(os.path.join(output_folder, 'rg_plot.png'), format='png', dpi=300)
|
122
|
+
|
123
|
+
# Show the Rg plot
|
124
|
+
plt.show()
|
125
|
+
|
126
|
+
def plot_density(mean_simulation1, mean_simulation2, mean_simulation3, output_folder):
|
127
|
+
"""
|
128
|
+
Generates the density plot for the groups provided.
|
129
|
+
"""
|
130
|
+
# Create figure for the density plot
|
131
|
+
plt.figure(figsize=(6, 6))
|
132
|
+
plt.plot()
|
133
|
+
|
134
|
+
# Add KDE (Kernel Density Estimation) for each dataset (if provided)
|
135
|
+
if mean_simulation1 is not None:
|
136
|
+
kde_simulation1 = gaussian_kde(mean_simulation1)
|
137
|
+
x_vals = np.linspace(0, max(mean_simulation1), 1000)
|
138
|
+
plt.fill_between(x_vals, kde_simulation1(x_vals), color='#333333', alpha=0.5, label='Simulation 1')
|
139
|
+
|
140
|
+
if mean_simulation2 is not None:
|
141
|
+
kde_simulation2 = gaussian_kde(mean_simulation2)
|
142
|
+
x_vals = np.linspace(0, max(mean_simulation2), 1000)
|
143
|
+
plt.fill_between(x_vals, kde_simulation2(x_vals), color='#6A9EDA', alpha=0.6, label='Simulation 2')
|
144
|
+
|
145
|
+
if mean_simulation3 is not None:
|
146
|
+
kde_simulation3 = gaussian_kde(mean_simulation3)
|
147
|
+
x_vals = np.linspace(0, max(mean_simulation3), 1000)
|
148
|
+
plt.fill_between(x_vals, kde_simulation3(x_vals), color='#54b36a', alpha=0.5, label='Simulation 3')
|
149
|
+
|
150
|
+
# Configure the density plot
|
151
|
+
plt.xlabel('Rg (nm)', fontsize=12)
|
152
|
+
plt.ylabel('Density', fontsize=12)
|
153
|
+
plt.title('', fontsize=14)
|
154
|
+
plt.legend(frameon=False, loc='upper left', fontsize=10)
|
155
|
+
plt.tick_params(axis='both', which='major', labelsize=10)
|
156
|
+
plt.grid(False)
|
157
|
+
plt.tight_layout()
|
158
|
+
|
159
|
+
# Save the density plot in TIFF and PNG formats
|
160
|
+
# Save as TIFF
|
161
|
+
plt.savefig(os.path.join(output_folder, 'density_plot.tiff'), format='tiff', dpi=300)
|
162
|
+
# Save as PNG
|
163
|
+
plt.savefig(os.path.join(output_folder, 'density_plot.png'), format='png', dpi=300)
|
164
|
+
|
165
|
+
# Show the density plot
|
166
|
+
plt.show()
|
167
|
+
|
168
|
+
def rg_analysis(output_folder, *simulation_files_groups):
|
169
|
+
r"""
|
170
|
+
Main function to generate Rg analysis and plots.
|
171
|
+
|
172
|
+
Parameters:
|
173
|
+
-----------
|
174
|
+
output_folder : str
|
175
|
+
Output folder to save the plots.
|
176
|
+
\*simulation_files_groups : list of str
|
177
|
+
List of paths to .xvg files for each simulation group.
|
178
|
+
You can pass 1, 2, or 3 groups.
|
179
|
+
"""
|
180
|
+
# Helper function to process a group of files
|
181
|
+
def process_group(file_paths):
|
182
|
+
times = []
|
183
|
+
rg_values = []
|
184
|
+
for file in file_paths:
|
185
|
+
time, rg = read_rg(file)
|
186
|
+
times.append(time)
|
187
|
+
rg_values.append(rg)
|
188
|
+
check_simulation_times(*times) # Check if times are consistent
|
189
|
+
mean_rg = np.mean(rg_values, axis=0) # Calculate mean
|
190
|
+
std_rg = np.std(rg_values, axis=0) # Calculate standard deviation
|
191
|
+
return times[0], mean_rg, std_rg
|
192
|
+
|
193
|
+
# Process each group of files
|
194
|
+
results = []
|
195
|
+
for group in simulation_files_groups:
|
196
|
+
if group: # Check if the list is not empty
|
197
|
+
time, mean, std = process_group(group)
|
198
|
+
results.append((time, mean, std))
|
199
|
+
|
200
|
+
# Generate plots based on the number of groups
|
201
|
+
if len(results) == 1:
|
202
|
+
# Plot for 1 group
|
203
|
+
plot_rg(results[0][0], results[0][1], results[0][2], None, None, None, None, None, None, output_folder)
|
204
|
+
plot_density(results[0][1], None, None, output_folder)
|
205
|
+
elif len(results) == 2:
|
206
|
+
# Plot for 2 groups
|
207
|
+
plot_rg(results[0][0], results[0][1], results[0][2],
|
208
|
+
results[1][0], results[1][1], results[1][2],
|
209
|
+
None, None, None, output_folder)
|
210
|
+
plot_density(results[0][1], results[1][1], None, output_folder)
|
211
|
+
elif len(results) == 3:
|
212
|
+
# Plot for 3 groups
|
213
|
+
plot_rg(results[0][0], results[0][1], results[0][2],
|
214
|
+
results[1][0], results[1][1], results[1][2],
|
215
|
+
results[2][0], results[2][1], results[2][2], output_folder)
|
216
|
+
plot_density(results[0][1], results[1][1], results[2][1], output_folder)
|
217
|
+
else:
|
218
|
+
raise ValueError("You must provide at least one group of simulation files.")
|
dynamispectra/SASA.py
ADDED
@@ -0,0 +1,219 @@
|
|
1
|
+
import numpy as np
|
2
|
+
import matplotlib.pyplot as plt
|
3
|
+
from scipy.stats import gaussian_kde
|
4
|
+
import os
|
5
|
+
|
6
|
+
def read_sasa(file):
|
7
|
+
"""
|
8
|
+
Reads SASA data from a .xvg file.
|
9
|
+
|
10
|
+
Parameters:
|
11
|
+
-----------
|
12
|
+
file : str
|
13
|
+
Path to the .xvg file.
|
14
|
+
|
15
|
+
Returns:
|
16
|
+
--------
|
17
|
+
times : numpy.ndarray
|
18
|
+
Array of simulation times.
|
19
|
+
sasas : numpy.ndarray
|
20
|
+
Array of SASA values.
|
21
|
+
"""
|
22
|
+
try:
|
23
|
+
print(f"Reading file: {file}")
|
24
|
+
|
25
|
+
# Open the file and process line by line
|
26
|
+
times = []
|
27
|
+
sasas = []
|
28
|
+
|
29
|
+
with open(file, 'r') as f:
|
30
|
+
for line in f:
|
31
|
+
# Skip comment lines and empty lines
|
32
|
+
if line.startswith(('#', '@', ';')) or line.strip() == '':
|
33
|
+
continue
|
34
|
+
|
35
|
+
# Try to extract the first two numeric values from the line
|
36
|
+
try:
|
37
|
+
values = line.split()
|
38
|
+
# Check if there are at least two values in the line
|
39
|
+
if len(values) >= 2:
|
40
|
+
time, sasa = map(float, values[:2]) # Use the first two values
|
41
|
+
times.append(time / 1000) # Convert time to nanoseconds
|
42
|
+
sasas.append(sasa)
|
43
|
+
except ValueError:
|
44
|
+
# Skip lines that cannot be converted to numbers
|
45
|
+
print(f"Error processing line: {line.strip()}")
|
46
|
+
continue
|
47
|
+
|
48
|
+
# Check if the data is valid
|
49
|
+
if len(times) == 0 or len(sasas) == 0:
|
50
|
+
raise ValueError(f"File {file} does not contain valid data.")
|
51
|
+
|
52
|
+
# Convert lists to numpy arrays
|
53
|
+
times = np.array(times)
|
54
|
+
sasas = np.array(sasas)
|
55
|
+
|
56
|
+
return times, sasas
|
57
|
+
|
58
|
+
except Exception as e:
|
59
|
+
print(f"Error reading file {file}: {e}")
|
60
|
+
return None, None
|
61
|
+
|
62
|
+
def check_simulation_times(*time_arrays):
|
63
|
+
r"""
|
64
|
+
Checks if simulation times are consistent across files.
|
65
|
+
|
66
|
+
Parameters:
|
67
|
+
-----------
|
68
|
+
\*time_arrays : list of numpy.ndarray
|
69
|
+
Arrays of times to compare.
|
70
|
+
"""
|
71
|
+
for i in range(1, len(time_arrays)):
|
72
|
+
if not np.allclose(time_arrays[0], time_arrays[i]):
|
73
|
+
raise ValueError(f"Simulation times do not match between file 1 and file {i+1}")
|
74
|
+
|
75
|
+
def plot_sasa(time_simulation1, mean_simulation1, std_simulation1,
|
76
|
+
time_simulation2, mean_simulation2, std_simulation2,
|
77
|
+
time_simulation3, mean_simulation3, std_simulation3,
|
78
|
+
output_folder):
|
79
|
+
"""
|
80
|
+
Generates the SASA plot with mean and standard deviation for the groups provided.
|
81
|
+
"""
|
82
|
+
# Create figure for the SASA plot
|
83
|
+
plt.figure(figsize=(7, 6))
|
84
|
+
plt.plot()
|
85
|
+
|
86
|
+
# Plot for simulation1 (if provided)
|
87
|
+
if time_simulation1 is not None:
|
88
|
+
plt.plot(time_simulation1, mean_simulation1, label='Simulation 1', color='#333333', linewidth=2)
|
89
|
+
plt.fill_between(time_simulation1, mean_simulation1 - std_simulation1, mean_simulation1 + std_simulation1, color='#333333', alpha=0.2)
|
90
|
+
|
91
|
+
# Plot for simulation2 (if provided)
|
92
|
+
if time_simulation2 is not None:
|
93
|
+
plt.plot(time_simulation2, mean_simulation2, label='Simulation 2', color='#6A9EDA', linewidth=2)
|
94
|
+
plt.fill_between(time_simulation2, mean_simulation2 - std_simulation2, mean_simulation2 + std_simulation2, color='#6A9EDA', alpha=0.2)
|
95
|
+
|
96
|
+
# Plot for simulation3 (if provided)
|
97
|
+
if time_simulation3 is not None:
|
98
|
+
plt.plot(time_simulation3, mean_simulation3, label='Simulation 3', color='#54b36a', linewidth=2)
|
99
|
+
plt.fill_between(time_simulation3, mean_simulation3 - std_simulation3, mean_simulation3 + std_simulation3, color='#54b36a', alpha=0.2)
|
100
|
+
|
101
|
+
# Configure the SASA plot
|
102
|
+
plt.xlabel('Time (ns)', fontsize=12)
|
103
|
+
plt.ylabel('SASA (nm²)', fontsize=12)
|
104
|
+
plt.title('', fontsize=14)
|
105
|
+
plt.legend(frameon=False, loc='upper right', fontsize=10)
|
106
|
+
plt.tick_params(axis='both', which='major', labelsize=10)
|
107
|
+
plt.grid(False)
|
108
|
+
|
109
|
+
# Adjust x-axis limits to start at 0
|
110
|
+
plt.xlim(left=0) # Set the minimum x-axis limit to 0
|
111
|
+
plt.xlim(right=100)
|
112
|
+
|
113
|
+
# Adjust layout
|
114
|
+
plt.tight_layout()
|
115
|
+
|
116
|
+
# Save the SASA plot in TIFF and PNG formats
|
117
|
+
os.makedirs(output_folder, exist_ok=True) # Create the folder if it doesn't exist
|
118
|
+
|
119
|
+
# Save as TIFF
|
120
|
+
plt.savefig(os.path.join(output_folder, 'sasa_plot.tiff'), format='tiff', dpi=300)
|
121
|
+
# Save as PNG
|
122
|
+
plt.savefig(os.path.join(output_folder, 'sasa_plot.png'), format='png', dpi=300)
|
123
|
+
|
124
|
+
# Show the SASA plot
|
125
|
+
plt.show()
|
126
|
+
|
127
|
+
def plot_density(mean_simulation1, mean_simulation2, mean_simulation3, output_folder):
|
128
|
+
"""
|
129
|
+
Generates the density plot for the groups provided.
|
130
|
+
"""
|
131
|
+
# Create figure for the density plot
|
132
|
+
plt.figure(figsize=(6, 6))
|
133
|
+
plt.plot()
|
134
|
+
|
135
|
+
# Add KDE (Kernel Density Estimation) for each dataset (if provided)
|
136
|
+
if mean_simulation1 is not None:
|
137
|
+
kde_simulation1 = gaussian_kde(mean_simulation1)
|
138
|
+
x_vals = np.linspace(0, max(mean_simulation1), 1000)
|
139
|
+
plt.fill_between(x_vals, kde_simulation1(x_vals), color='#333333', alpha=0.5, label='Simulation 1')
|
140
|
+
|
141
|
+
if mean_simulation2 is not None:
|
142
|
+
kde_simulation2 = gaussian_kde(mean_simulation2)
|
143
|
+
x_vals = np.linspace(0, max(mean_simulation2), 1000)
|
144
|
+
plt.fill_between(x_vals, kde_simulation2(x_vals), color='#6A9EDA', alpha=0.6, label='Simulation 2')
|
145
|
+
|
146
|
+
if mean_simulation3 is not None:
|
147
|
+
kde_simulation3 = gaussian_kde(mean_simulation3)
|
148
|
+
x_vals = np.linspace(0, max(mean_simulation3), 1000)
|
149
|
+
plt.fill_between(x_vals, kde_simulation3(x_vals), color='#54b36a', alpha=0.5, label='Simulation 3')
|
150
|
+
|
151
|
+
# Configure the density plot
|
152
|
+
plt.xlabel('SASA (nm²)', fontsize=12)
|
153
|
+
plt.ylabel('Density', fontsize=12)
|
154
|
+
plt.title('', fontsize=14)
|
155
|
+
plt.legend(frameon=False, loc='upper left', fontsize=10)
|
156
|
+
plt.tick_params(axis='both', which='major', labelsize=10)
|
157
|
+
plt.grid(False)
|
158
|
+
plt.tight_layout()
|
159
|
+
|
160
|
+
# Save the density plot in TIFF and PNG formats
|
161
|
+
# Save as TIFF
|
162
|
+
plt.savefig(os.path.join(output_folder, 'density_plot.tiff'), format='tiff', dpi=300)
|
163
|
+
# Save as PNG
|
164
|
+
plt.savefig(os.path.join(output_folder, 'density_plot.png'), format='png', dpi=300)
|
165
|
+
|
166
|
+
# Show the density plot
|
167
|
+
plt.show()
|
168
|
+
|
169
|
+
def sasa_analysis(output_folder, *simulation_files_groups):
|
170
|
+
r"""
|
171
|
+
Main function to generate SASA analysis and plots.
|
172
|
+
|
173
|
+
Parameters:
|
174
|
+
-----------
|
175
|
+
output_folder : str
|
176
|
+
Output folder to save the plots.
|
177
|
+
\*simulation_files_groups : list of str
|
178
|
+
List of paths to .xvg files for each simulation group.
|
179
|
+
You can pass 1, 2, or 3 groups.
|
180
|
+
"""
|
181
|
+
# Helper function to process a group of files
|
182
|
+
def process_group(file_paths):
|
183
|
+
times = []
|
184
|
+
sasas = []
|
185
|
+
for file in file_paths:
|
186
|
+
time, sasa = read_sasa(file)
|
187
|
+
times.append(time)
|
188
|
+
sasas.append(sasa)
|
189
|
+
check_simulation_times(*times) # Check if times are consistent
|
190
|
+
mean_sasa = np.mean(sasas, axis=0) # Calculate mean
|
191
|
+
std_sasa = np.std(sasas, axis=0) # Calculate standard deviation
|
192
|
+
return times[0], mean_sasa, std_sasa
|
193
|
+
|
194
|
+
# Process each group of files
|
195
|
+
results = []
|
196
|
+
for group in simulation_files_groups:
|
197
|
+
if group: # Check if the list is not empty
|
198
|
+
time, mean, std = process_group(group)
|
199
|
+
results.append((time, mean, std))
|
200
|
+
|
201
|
+
# Generate plots based on the number of groups
|
202
|
+
if len(results) == 1:
|
203
|
+
# Plot for 1 group
|
204
|
+
plot_sasa(results[0][0], results[0][1], results[0][2], None, None, None, None, None, None, output_folder)
|
205
|
+
plot_density(results[0][1], None, None, output_folder)
|
206
|
+
elif len(results) == 2:
|
207
|
+
# Plot for 2 groups
|
208
|
+
plot_sasa(results[0][0], results[0][1], results[0][2],
|
209
|
+
results[1][0], results[1][1], results[1][2],
|
210
|
+
None, None, None, output_folder)
|
211
|
+
plot_density(results[0][1], results[1][1], None, output_folder)
|
212
|
+
elif len(results) == 3:
|
213
|
+
# Plot for 3 groups
|
214
|
+
plot_sasa(results[0][0], results[0][1], results[0][2],
|
215
|
+
results[1][0], results[1][1], results[1][2],
|
216
|
+
results[2][0], results[2][1], results[2][2], output_folder)
|
217
|
+
plot_density(results[0][1], results[1][1], results[2][1], output_folder)
|
218
|
+
else:
|
219
|
+
raise ValueError("You must provide at least one group of simulation files.")
|