DynamiSpectra 1.0.4__py3-none-any.whl → 1.0.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dynamispectra/FractionSS.py +100 -0
- dynamispectra/Hbond.py +219 -0
- dynamispectra/PCA.py +118 -0
- dynamispectra/RMSD.py +218 -0
- dynamispectra/RMSF.py +216 -0
- dynamispectra/Rg.py +218 -0
- dynamispectra/SASA.py +219 -0
- dynamispectra/SecondaryStructure.py +201 -0
- dynamispectra/__init__.py +19 -0
- dynamispectra/cly.py +0 -0
- dynamispectra/main.py +4 -0
- {dynamispectra-1.0.4.dist-info → dynamispectra-1.0.5.dist-info}/METADATA +1 -1
- dynamispectra-1.0.5.dist-info/RECORD +16 -0
- dynamispectra-1.0.5.dist-info/top_level.txt +1 -0
- dynamispectra-1.0.4.dist-info/RECORD +0 -5
- dynamispectra-1.0.4.dist-info/top_level.txt +0 -1
- {dynamispectra-1.0.4.dist-info → dynamispectra-1.0.5.dist-info}/WHEEL +0 -0
- {dynamispectra-1.0.4.dist-info → dynamispectra-1.0.5.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,100 @@
|
|
1
|
+
# analysis.py
|
2
|
+
import pandas as pd
|
3
|
+
import matplotlib.pyplot as plt
|
4
|
+
|
5
|
+
# State mapping
|
6
|
+
STATE_MAPPING = {
|
7
|
+
'H': 1, # α-Helix
|
8
|
+
'E': 2, # β-Sheet
|
9
|
+
'C': 0, # Loop/Coil
|
10
|
+
'T': 3, # Turn
|
11
|
+
'S': 4, # Bend
|
12
|
+
'G': 5, # 3-Helix
|
13
|
+
'~': -1, # Undefined structure
|
14
|
+
'B': -1, # Treat as undefined structure
|
15
|
+
}
|
16
|
+
|
17
|
+
def load_data(file_path):
|
18
|
+
"""Loads the data file."""
|
19
|
+
try:
|
20
|
+
df = pd.read_csv(file_path, header=None)
|
21
|
+
print("First few rows of the file:")
|
22
|
+
print(df.head())
|
23
|
+
return df
|
24
|
+
except Exception as e:
|
25
|
+
print(f"Error loading the file: {e}")
|
26
|
+
return None
|
27
|
+
|
28
|
+
def calculate_fractions(df):
|
29
|
+
"""Calculates the fractions of each conformation over time."""
|
30
|
+
time = []
|
31
|
+
helix_fraction = []
|
32
|
+
sheet_fraction = []
|
33
|
+
coil_fraction = []
|
34
|
+
turn_fraction = []
|
35
|
+
bend_fraction = []
|
36
|
+
three_helix_fraction = []
|
37
|
+
|
38
|
+
for index, row in df.iterrows():
|
39
|
+
sequence = row[0]
|
40
|
+
total_residues = len(sequence)
|
41
|
+
|
42
|
+
helix_count = sequence.count("H")
|
43
|
+
sheet_count = sequence.count("E")
|
44
|
+
coil_count = sequence.count("C")
|
45
|
+
turn_count = sequence.count("T")
|
46
|
+
bend_count = sequence.count("S")
|
47
|
+
three_helix_count = sequence.count("G")
|
48
|
+
|
49
|
+
helix_fraction.append(helix_count / total_residues)
|
50
|
+
sheet_fraction.append(sheet_count / total_residues)
|
51
|
+
coil_fraction.append(coil_count / total_residues)
|
52
|
+
turn_fraction.append(turn_count / total_residues)
|
53
|
+
bend_fraction.append(bend_count / total_residues)
|
54
|
+
three_helix_fraction.append(three_helix_count / total_residues)
|
55
|
+
|
56
|
+
time.append(index)
|
57
|
+
|
58
|
+
results_df = pd.DataFrame({
|
59
|
+
"Time": time,
|
60
|
+
"Helix Fraction": helix_fraction,
|
61
|
+
"Sheet Fraction": sheet_fraction,
|
62
|
+
"Coil Fraction": coil_fraction,
|
63
|
+
"Turn Fraction": turn_fraction,
|
64
|
+
"Bend Fraction": bend_fraction,
|
65
|
+
"3-Helix Fraction": three_helix_fraction
|
66
|
+
})
|
67
|
+
|
68
|
+
print("First few rows of the results:")
|
69
|
+
print(results_df.head())
|
70
|
+
return results_df
|
71
|
+
|
72
|
+
def plot_results(results_df, title, output_png, output_tiff):
|
73
|
+
"""Plots the results and saves the graphs."""
|
74
|
+
plt.figure(figsize=(7, 6))
|
75
|
+
|
76
|
+
plt.plot(results_df["Time"], results_df["Helix Fraction"], label="α-Helix", color="#6A9EDA", linewidth=2)
|
77
|
+
plt.plot(results_df["Time"], results_df["Sheet Fraction"], label="β-Sheet", color="#f2444d", linewidth=2)
|
78
|
+
plt.plot(results_df["Time"], results_df["Coil Fraction"], label="Loop/Coil", color="#4bab44", linewidth=2)
|
79
|
+
plt.plot(results_df["Time"], results_df["Turn Fraction"], label="Turn", color="#fc9e19", linewidth=2)
|
80
|
+
plt.plot(results_df["Time"], results_df["Bend Fraction"], label="Bend", color="#54b36a", linewidth=2)
|
81
|
+
plt.plot(results_df["Time"], results_df["3-Helix Fraction"], label="3-Helix", color="#c9824f", linewidth=2)
|
82
|
+
|
83
|
+
plt.xlabel("Frames")
|
84
|
+
plt.ylabel("Fraction of Residues")
|
85
|
+
plt.title(title)
|
86
|
+
plt.legend(loc="lower center", bbox_to_anchor=(0.5, -0.2), ncol=6, frameon=False, markerscale=2, handlelength=2, handleheight=2)
|
87
|
+
plt.grid(False)
|
88
|
+
plt.xlim(0, 10000)
|
89
|
+
plt.ylim(0, 0.85)
|
90
|
+
|
91
|
+
plt.savefig(output_png, dpi=300, bbox_inches='tight')
|
92
|
+
plt.savefig(output_tiff, dpi=300, bbox_inches='tight')
|
93
|
+
plt.show()
|
94
|
+
|
95
|
+
def fractions_ss_analysis(file_path, output_png, output_tiff, title):
|
96
|
+
"""Main function for secondary structure analysis."""
|
97
|
+
df = load_data(file_path)
|
98
|
+
if df is not None:
|
99
|
+
results_df = calculate_fractions(df)
|
100
|
+
plot_results(results_df, title, output_png, output_tiff)
|
dynamispectra/Hbond.py
ADDED
@@ -0,0 +1,219 @@
|
|
1
|
+
import numpy as np
|
2
|
+
import matplotlib.pyplot as plt
|
3
|
+
from scipy.stats import gaussian_kde
|
4
|
+
import os
|
5
|
+
|
6
|
+
def read_hbond(file):
|
7
|
+
"""
|
8
|
+
Reads hbond data from a .xvg file.
|
9
|
+
|
10
|
+
Parameters:
|
11
|
+
-----------
|
12
|
+
file : str
|
13
|
+
Path to the .xvg file.
|
14
|
+
|
15
|
+
Returns:
|
16
|
+
--------
|
17
|
+
times : numpy.ndarray
|
18
|
+
Array of simulation times.
|
19
|
+
hbonds : numpy.ndarray
|
20
|
+
Array of hbond values.
|
21
|
+
"""
|
22
|
+
try:
|
23
|
+
print(f"Reading file: {file}")
|
24
|
+
|
25
|
+
# Open the file and process line by line
|
26
|
+
times = []
|
27
|
+
hbonds = []
|
28
|
+
|
29
|
+
with open(file, 'r') as f:
|
30
|
+
for line in f:
|
31
|
+
# Skip comment lines and empty lines
|
32
|
+
if line.startswith(('#', '@', ';')) or line.strip() == '':
|
33
|
+
continue
|
34
|
+
|
35
|
+
# Try to extract the first two numeric values from the line
|
36
|
+
try:
|
37
|
+
values = line.split()
|
38
|
+
# Check if there are at least two values in the line
|
39
|
+
if len(values) >= 2:
|
40
|
+
time, hbond = map(float, values[:2]) # Use the first two values
|
41
|
+
times.append(time)
|
42
|
+
hbonds.append(hbond)
|
43
|
+
except ValueError:
|
44
|
+
# Skip lines that cannot be converted to numbers
|
45
|
+
print(f"Error processing line: {line.strip()}")
|
46
|
+
continue
|
47
|
+
|
48
|
+
# Check if the data is valid
|
49
|
+
if len(times) == 0 or len(hbonds) == 0:
|
50
|
+
raise ValueError(f"File {file} does not contain valid data.")
|
51
|
+
|
52
|
+
# Convert lists to numpy arrays
|
53
|
+
times = np.array(times)
|
54
|
+
hbonds = np.array(hbonds)
|
55
|
+
|
56
|
+
return times, hbonds
|
57
|
+
|
58
|
+
except Exception as e:
|
59
|
+
print(f"Error reading file {file}: {e}")
|
60
|
+
return None, None
|
61
|
+
|
62
|
+
def check_simulation_times(*time_arrays):
|
63
|
+
r"""
|
64
|
+
Checks if simulation times are consistent across files.
|
65
|
+
|
66
|
+
Parameters:
|
67
|
+
-----------
|
68
|
+
\*time_arrays : list of numpy.ndarray
|
69
|
+
Arrays of times to compare.
|
70
|
+
"""
|
71
|
+
for i in range(1, len(time_arrays)):
|
72
|
+
if not np.allclose(time_arrays[0], time_arrays[i]):
|
73
|
+
raise ValueError(f"Simulation times do not match between file 1 and file {i+1}")
|
74
|
+
|
75
|
+
def plot_hbond(time_simulation1, mean_simulation1, std_simulation1,
|
76
|
+
time_simulation2, mean_simulation2, std_simulation2,
|
77
|
+
time_simulation3, mean_simulation3, std_simulation3,
|
78
|
+
output_folder):
|
79
|
+
"""
|
80
|
+
Generates the hbond plot with mean and standard deviation for the groups provided.
|
81
|
+
"""
|
82
|
+
# Create figure for the hbond plot
|
83
|
+
plt.figure(figsize=(7, 6))
|
84
|
+
plt.plot()
|
85
|
+
|
86
|
+
# Plot for simulation1 (if provided)
|
87
|
+
if time_simulation1 is not None:
|
88
|
+
plt.plot(time_simulation1, mean_simulation1, label='Simulation 1', color='#333333', linewidth=2)
|
89
|
+
plt.fill_between(time_simulation1, mean_simulation1 - std_simulation1, mean_simulation1 + std_simulation1, color='#333333', alpha=0.2)
|
90
|
+
|
91
|
+
# Plot for simulation2 (if provided)
|
92
|
+
if time_simulation2 is not None:
|
93
|
+
plt.plot(time_simulation2, mean_simulation2, label='Simulation 2', color='#6A9EDA', linewidth=2)
|
94
|
+
plt.fill_between(time_simulation2, mean_simulation2 - std_simulation2, mean_simulation2 + std_simulation2, color='#6A9EDA', alpha=0.2)
|
95
|
+
|
96
|
+
# Plot for simulation3 (if provided)
|
97
|
+
if time_simulation3 is not None:
|
98
|
+
plt.plot(time_simulation3, mean_simulation3, label='Simulation 3', color='#54b36a', linewidth=2)
|
99
|
+
plt.fill_between(time_simulation3, mean_simulation3 - std_simulation3, mean_simulation3 + std_simulation3, color='#54b36a', alpha=0.2)
|
100
|
+
|
101
|
+
# Configure the hbond plot
|
102
|
+
plt.xlabel('Time (ns)', fontsize=12)
|
103
|
+
plt.ylabel('H-bonds', fontsize=12)
|
104
|
+
plt.title('', fontsize=14)
|
105
|
+
plt.legend(frameon=False, loc='upper right', fontsize=10)
|
106
|
+
plt.tick_params(axis='both', which='major', labelsize=10)
|
107
|
+
plt.grid(False)
|
108
|
+
|
109
|
+
# Adjust x-axis limits to start at 0
|
110
|
+
plt.xlim(left=0) # Set the minimum x-axis limit to 0
|
111
|
+
plt.xlim(right=100)
|
112
|
+
|
113
|
+
# Adjust layout
|
114
|
+
plt.tight_layout()
|
115
|
+
|
116
|
+
# Save the hbond plot in TIFF and PNG formats
|
117
|
+
os.makedirs(output_folder, exist_ok=True) # Create the folder if it doesn't exist
|
118
|
+
|
119
|
+
# Save as TIFF
|
120
|
+
plt.savefig(os.path.join(output_folder, 'hbond_plot.tiff'), format='tiff', dpi=300)
|
121
|
+
# Save as PNG
|
122
|
+
plt.savefig(os.path.join(output_folder, 'hbond_plot.png'), format='png', dpi=300)
|
123
|
+
|
124
|
+
# Show the hbond plot
|
125
|
+
plt.show()
|
126
|
+
|
127
|
+
def plot_density(mean_simulation1, mean_simulation2, mean_simulation3, output_folder):
|
128
|
+
"""
|
129
|
+
Generates the density plot for the groups provided.
|
130
|
+
"""
|
131
|
+
# Create figure for the density plot
|
132
|
+
plt.figure(figsize=(6, 6))
|
133
|
+
plt.plot()
|
134
|
+
|
135
|
+
# Add KDE (Kernel Density Estimation) for each dataset (if provided)
|
136
|
+
if mean_simulation1 is not None:
|
137
|
+
kde_simulation1 = gaussian_kde(mean_simulation1)
|
138
|
+
x_vals = np.linspace(0, max(mean_simulation1), 1000)
|
139
|
+
plt.fill_between(x_vals, kde_simulation1(x_vals), color='#333333', alpha=0.5, label='Simulation 1')
|
140
|
+
|
141
|
+
if mean_simulation2 is not None:
|
142
|
+
kde_simulation2 = gaussian_kde(mean_simulation2)
|
143
|
+
x_vals = np.linspace(0, max(mean_simulation2), 1000)
|
144
|
+
plt.fill_between(x_vals, kde_simulation2(x_vals), color='#6A9EDA', alpha=0.6, label='Simulation 2')
|
145
|
+
|
146
|
+
if mean_simulation3 is not None:
|
147
|
+
kde_simulation3 = gaussian_kde(mean_simulation3)
|
148
|
+
x_vals = np.linspace(0, max(mean_simulation3), 1000)
|
149
|
+
plt.fill_between(x_vals, kde_simulation3(x_vals), color='#54b36a', alpha=0.5, label='Simulation 3')
|
150
|
+
|
151
|
+
# Configure the density plot
|
152
|
+
plt.xlabel('H-bonds', fontsize=12)
|
153
|
+
plt.ylabel('Density', fontsize=12)
|
154
|
+
plt.title('', fontsize=14)
|
155
|
+
plt.legend(frameon=False, loc='upper left', fontsize=10)
|
156
|
+
plt.tick_params(axis='both', which='major', labelsize=10)
|
157
|
+
plt.grid(False)
|
158
|
+
plt.tight_layout()
|
159
|
+
|
160
|
+
# Save the density plot in TIFF and PNG formats
|
161
|
+
# Save as TIFF
|
162
|
+
plt.savefig(os.path.join(output_folder, 'density_plot.tiff'), format='tiff', dpi=300)
|
163
|
+
# Save as PNG
|
164
|
+
plt.savefig(os.path.join(output_folder, 'density_plot.png'), format='png', dpi=300)
|
165
|
+
|
166
|
+
# Show the density plot
|
167
|
+
plt.show()
|
168
|
+
|
169
|
+
def hbond_analysis(output_folder, *simulation_files_groups):
|
170
|
+
r"""
|
171
|
+
Main function to generate hbond analysis and plots.
|
172
|
+
|
173
|
+
Parameters:
|
174
|
+
-----------
|
175
|
+
output_folder : str
|
176
|
+
Output folder to save the plots.
|
177
|
+
\*simulation_files_groups : list of str
|
178
|
+
List of paths to .xvg files for each simulation group.
|
179
|
+
You can pass 1, 2, or 3 groups.
|
180
|
+
"""
|
181
|
+
# Helper function to process a group of files
|
182
|
+
def process_group(file_paths):
|
183
|
+
times = []
|
184
|
+
hbonds = []
|
185
|
+
for file in file_paths:
|
186
|
+
time, hbond = read_hbond(file)
|
187
|
+
times.append(time)
|
188
|
+
hbonds.append(hbond)
|
189
|
+
check_simulation_times(*times) # Check if times are consistent
|
190
|
+
mean_hbond = np.mean(hbonds, axis=0) # Calculate mean
|
191
|
+
std_hbond = np.std(hbonds, axis=0) # Calculate standard deviation
|
192
|
+
return times[0], mean_hbond, std_hbond
|
193
|
+
|
194
|
+
# Process each group of files
|
195
|
+
results = []
|
196
|
+
for group in simulation_files_groups:
|
197
|
+
if group: # Check if the list is not empty
|
198
|
+
time, mean, std = process_group(group)
|
199
|
+
results.append((time, mean, std))
|
200
|
+
|
201
|
+
# Generate plots based on the number of groups
|
202
|
+
if len(results) == 1:
|
203
|
+
# Plot for 1 group
|
204
|
+
plot_hbond(results[0][0], results[0][1], results[0][2], None, None, None, None, None, None, output_folder)
|
205
|
+
plot_density(results[0][1], None, None, output_folder)
|
206
|
+
elif len(results) == 2:
|
207
|
+
# Plot for 2 groups
|
208
|
+
plot_hbond(results[0][0], results[0][1], results[0][2],
|
209
|
+
results[1][0], results[1][1], results[1][2],
|
210
|
+
None, None, None, output_folder)
|
211
|
+
plot_density(results[0][1], results[1][1], None, output_folder)
|
212
|
+
elif len(results) == 3:
|
213
|
+
# Plot for 3 groups
|
214
|
+
plot_hbond(results[0][0], results[0][1], results[0][2],
|
215
|
+
results[1][0], results[1][1], results[1][2],
|
216
|
+
results[2][0], results[2][1], results[2][2], output_folder)
|
217
|
+
plot_density(results[0][1], results[1][1], results[2][1], output_folder)
|
218
|
+
else:
|
219
|
+
raise ValueError("You must provide at least one group of simulation files.")
|
dynamispectra/PCA.py
ADDED
@@ -0,0 +1,118 @@
|
|
1
|
+
import numpy as np
|
2
|
+
import matplotlib.pyplot as plt
|
3
|
+
import os
|
4
|
+
|
5
|
+
def read_xvg(file_path):
|
6
|
+
"""
|
7
|
+
Reads data from a .xvg file.
|
8
|
+
|
9
|
+
Parameters:
|
10
|
+
-----------
|
11
|
+
file_path : str
|
12
|
+
Path to the .xvg file.
|
13
|
+
|
14
|
+
Returns:
|
15
|
+
--------
|
16
|
+
data : numpy.ndarray
|
17
|
+
Array of numerical data from the .xvg file.
|
18
|
+
"""
|
19
|
+
data = []
|
20
|
+
with open(file_path, "r") as file:
|
21
|
+
for line in file:
|
22
|
+
if not line.startswith(("#", "@")): # Ignore headers and metadata
|
23
|
+
values = line.split()
|
24
|
+
data.append([float(values[0]), float(values[1])]) # PC1 and PC2
|
25
|
+
return np.array(data)
|
26
|
+
|
27
|
+
def read_eigenvalues(eigenval_path):
|
28
|
+
"""
|
29
|
+
Reads eigenvalues from a .xvg file.
|
30
|
+
|
31
|
+
Parameters:
|
32
|
+
-----------
|
33
|
+
eigenval_path : str
|
34
|
+
Path to the .xvg file containing eigenvalues.
|
35
|
+
|
36
|
+
Returns:
|
37
|
+
--------
|
38
|
+
eigenvalues : numpy.ndarray
|
39
|
+
Array of eigenvalues.
|
40
|
+
"""
|
41
|
+
eigenvalues = []
|
42
|
+
with open(eigenval_path, "r") as file:
|
43
|
+
for line in file:
|
44
|
+
if not line.startswith(("#", "@")):
|
45
|
+
# Extract the second column (eigenvalues)
|
46
|
+
value = line.split()[1]
|
47
|
+
eigenvalues.append(float(value))
|
48
|
+
return np.array(eigenvalues)
|
49
|
+
|
50
|
+
def plot_pca(pca_data, eigenvalues, output_folder, title="PCA"):
|
51
|
+
"""
|
52
|
+
Generates the PCA plot.
|
53
|
+
|
54
|
+
Parameters:
|
55
|
+
-----------
|
56
|
+
pca_data : numpy.ndarray
|
57
|
+
Array of PCA data (PC1 and PC2).
|
58
|
+
eigenvalues : numpy.ndarray
|
59
|
+
Array of eigenvalues.
|
60
|
+
output_folder : str
|
61
|
+
Output folder to save the plot.
|
62
|
+
title : str, optional
|
63
|
+
Title of the plot.
|
64
|
+
"""
|
65
|
+
# Calculate the explained variance in percentage
|
66
|
+
total_variance = np.sum(eigenvalues)
|
67
|
+
pc1_variance = (eigenvalues[0] / total_variance) * 100
|
68
|
+
pc2_variance = (eigenvalues[1] / total_variance) * 100
|
69
|
+
|
70
|
+
# Create the scatter plot (PC1 vs PC2)
|
71
|
+
plt.figure(figsize=(7, 6))
|
72
|
+
scatter = plt.scatter(
|
73
|
+
pca_data[:, 0], pca_data[:, 1],
|
74
|
+
c=np.linspace(0, 1, len(pca_data)), # Gradient of colors for points
|
75
|
+
cmap="viridis", # Colormap to differentiate points
|
76
|
+
alpha=0.8, edgecolors='k', linewidths=0.8
|
77
|
+
)
|
78
|
+
|
79
|
+
# Update axis labels with explained variance
|
80
|
+
plt.xlabel(f"PC1 ({pc1_variance:.2f}%)")
|
81
|
+
plt.ylabel(f"PC2 ({pc2_variance:.2f}%)")
|
82
|
+
plt.title(title)
|
83
|
+
|
84
|
+
# Add color bar
|
85
|
+
plt.colorbar(scatter, label="Simulation times")
|
86
|
+
plt.grid(False)
|
87
|
+
|
88
|
+
# Save the plot as PNG with 300 DPI
|
89
|
+
os.makedirs(output_folder, exist_ok=True) # Create the folder if it doesn't exist
|
90
|
+
output_path = os.path.join(output_folder, 'pca_plot.png')
|
91
|
+
plt.savefig(output_path, dpi=300)
|
92
|
+
|
93
|
+
# Show the plot
|
94
|
+
plt.show()
|
95
|
+
|
96
|
+
def pca_analysis(pca_file_path, eigenval_path, output_folder, title="PCA"):
|
97
|
+
"""
|
98
|
+
Main function to generate PCA analysis and plot.
|
99
|
+
|
100
|
+
Parameters:
|
101
|
+
-----------
|
102
|
+
pca_file_path : str
|
103
|
+
Path to the .xvg file containing PCA data.
|
104
|
+
eigenval_path : str
|
105
|
+
Path to the .xvg file containing eigenvalues.
|
106
|
+
output_folder : str
|
107
|
+
Output folder to save the plot.
|
108
|
+
title : str, optional
|
109
|
+
Title of the plot.
|
110
|
+
"""
|
111
|
+
# Read PCA data
|
112
|
+
pca_data = read_xvg(pca_file_path)
|
113
|
+
|
114
|
+
# Read eigenvalues
|
115
|
+
eigenvalues = read_eigenvalues(eigenval_path)
|
116
|
+
|
117
|
+
# Generate the PCA plot
|
118
|
+
plot_pca(pca_data, eigenvalues, output_folder, title)
|
dynamispectra/RMSD.py
ADDED
@@ -0,0 +1,218 @@
|
|
1
|
+
import numpy as np
|
2
|
+
import matplotlib.pyplot as plt
|
3
|
+
from scipy.stats import gaussian_kde
|
4
|
+
import os
|
5
|
+
|
6
|
+
def read_rmsd(file):
|
7
|
+
"""
|
8
|
+
Reads RMSD data from a .xvg file.
|
9
|
+
|
10
|
+
Parameters:
|
11
|
+
-----------
|
12
|
+
file : str
|
13
|
+
Path to the .xvg file.
|
14
|
+
|
15
|
+
Returns:
|
16
|
+
--------
|
17
|
+
times : numpy.ndarray
|
18
|
+
Array of simulation times.
|
19
|
+
rmsd : numpy.ndarray
|
20
|
+
Array of RMSD values.
|
21
|
+
"""
|
22
|
+
try:
|
23
|
+
print(f"Reading file: {file}")
|
24
|
+
|
25
|
+
# Open the file and process line by line
|
26
|
+
times = []
|
27
|
+
rmsd = []
|
28
|
+
|
29
|
+
with open(file, 'r') as f:
|
30
|
+
for line in f:
|
31
|
+
# Skip comment lines and empty lines
|
32
|
+
if line.startswith(('#', '@', ';')) or line.strip() == '':
|
33
|
+
continue
|
34
|
+
|
35
|
+
# Try to extract the first two numeric values from the line
|
36
|
+
try:
|
37
|
+
values = line.split()
|
38
|
+
# Check if there are at least two values in the line
|
39
|
+
if len(values) >= 2:
|
40
|
+
time, rmsd_val = map(float, values[:2]) # Use the first two values
|
41
|
+
times.append(time)
|
42
|
+
rmsd.append(rmsd_val)
|
43
|
+
except ValueError:
|
44
|
+
# Skip lines that cannot be converted to numbers
|
45
|
+
print(f"Error processing line: {line.strip()}")
|
46
|
+
continue
|
47
|
+
|
48
|
+
# Check if the data is valid
|
49
|
+
if len(times) == 0 or len(rmsd) == 0:
|
50
|
+
raise ValueError(f"File {file} does not contain valid data.")
|
51
|
+
|
52
|
+
# Convert lists to numpy arrays
|
53
|
+
times = np.array(times)
|
54
|
+
rmsd = np.array(rmsd)
|
55
|
+
|
56
|
+
return times, rmsd
|
57
|
+
|
58
|
+
except Exception as e:
|
59
|
+
print(f"Error reading file {file}: {e}")
|
60
|
+
return None, None
|
61
|
+
|
62
|
+
def check_simulation_times(*time_arrays):
|
63
|
+
r"""
|
64
|
+
Checks if simulation times are consistent across files.
|
65
|
+
|
66
|
+
Parameters:
|
67
|
+
-----------
|
68
|
+
\*time_arrays : list of numpy.ndarray
|
69
|
+
Arrays of times to compare.
|
70
|
+
"""
|
71
|
+
for i in range(1, len(time_arrays)):
|
72
|
+
if not np.allclose(time_arrays[0], time_arrays[i]):
|
73
|
+
raise ValueError(f"Simulation times do not match between file 1 and file {i+1}")
|
74
|
+
|
75
|
+
def plot_rmsd(time_simulation1, mean_simulation1, std_simulation1,
|
76
|
+
time_simulation2, mean_simulation2, std_simulation2,
|
77
|
+
time_simulation3, mean_simulation3, std_simulation3,
|
78
|
+
output_folder):
|
79
|
+
"""
|
80
|
+
Generates the RMSD plot with mean and standard deviation for the groups provided.
|
81
|
+
"""
|
82
|
+
# Create figure for the RMSD plot
|
83
|
+
plt.figure(figsize=(7, 6))
|
84
|
+
plt.plot()
|
85
|
+
# Plot for simulation1 (if provided)
|
86
|
+
if time_simulation1 is not None:
|
87
|
+
plt.plot(time_simulation1, mean_simulation1, label='Simulation 1', color='#333333', linewidth=2)
|
88
|
+
plt.fill_between(time_simulation1, mean_simulation1 - std_simulation1, mean_simulation1 + std_simulation1, color='#333333', alpha=0.2)
|
89
|
+
|
90
|
+
# Plot for simulation2 (if provided)
|
91
|
+
if time_simulation2 is not None:
|
92
|
+
plt.plot(time_simulation2, mean_simulation2, label='Simulation 2', color='#6A9EDA', linewidth=2)
|
93
|
+
plt.fill_between(time_simulation2, mean_simulation2 - std_simulation2, mean_simulation2 + std_simulation2, color='#6A9EDA', alpha=0.2)
|
94
|
+
|
95
|
+
# Plot for simulation3 (if provided)
|
96
|
+
if time_simulation3 is not None:
|
97
|
+
plt.plot(time_simulation3, mean_simulation3, label='Simulation 3', color='#54b36a', linewidth=2)
|
98
|
+
plt.fill_between(time_simulation3, mean_simulation3 - std_simulation3, mean_simulation3 + std_simulation3, color='#54b36a', alpha=0.2)
|
99
|
+
|
100
|
+
# Configure the RMSD plot
|
101
|
+
plt.xlabel('Time (ns)', fontsize=12)
|
102
|
+
plt.ylabel('RMSD (nm)', fontsize=12)
|
103
|
+
plt.title('', fontsize=14)
|
104
|
+
plt.legend(frameon=False, loc='lower right', fontsize=10)
|
105
|
+
plt.tick_params(axis='both', which='major', labelsize=10)
|
106
|
+
plt.grid(False)
|
107
|
+
|
108
|
+
# Adjust x-axis limits to start at 0
|
109
|
+
plt.xlim(left=0) # Set the minimum x-axis limit to 0
|
110
|
+
plt.xlim(right=100)
|
111
|
+
|
112
|
+
# Adjust layout
|
113
|
+
plt.tight_layout()
|
114
|
+
|
115
|
+
# Save the RMSD plot in TIFF and PNG formats
|
116
|
+
os.makedirs(output_folder, exist_ok=True) # Create the folder if it doesn't exist
|
117
|
+
|
118
|
+
# Save as TIFF
|
119
|
+
plt.savefig(os.path.join(output_folder, 'rmsd_plot.tiff'), format='tiff', dpi=300)
|
120
|
+
# Save as PNG
|
121
|
+
plt.savefig(os.path.join(output_folder, 'rmsd_plot.png'), format='png', dpi=300)
|
122
|
+
|
123
|
+
# Show the RMSD plot
|
124
|
+
plt.show()
|
125
|
+
|
126
|
+
def plot_density(mean_simulation1, mean_simulation2, mean_simulation3, output_folder):
|
127
|
+
"""
|
128
|
+
Generates the density plot for the groups provided.
|
129
|
+
"""
|
130
|
+
# Create figure for the density plot
|
131
|
+
plt.figure(figsize=(6, 6))
|
132
|
+
plt.plot()
|
133
|
+
|
134
|
+
# Add KDE (Kernel Density Estimation) for each dataset (if provided)
|
135
|
+
if mean_simulation1 is not None:
|
136
|
+
kde_simulation1 = gaussian_kde(mean_simulation1)
|
137
|
+
x_vals = np.linspace(0, max(mean_simulation1), 1000)
|
138
|
+
plt.fill_between(x_vals, kde_simulation1(x_vals), color='#333333', alpha=0.5, label='Simulation 1')
|
139
|
+
|
140
|
+
if mean_simulation2 is not None:
|
141
|
+
kde_simulation2 = gaussian_kde(mean_simulation2)
|
142
|
+
x_vals = np.linspace(0, max(mean_simulation2), 1000)
|
143
|
+
plt.fill_between(x_vals, kde_simulation2(x_vals), color='#6A9EDA', alpha=0.6, label='Simulation 2')
|
144
|
+
|
145
|
+
if mean_simulation3 is not None:
|
146
|
+
kde_simulation3 = gaussian_kde(mean_simulation3)
|
147
|
+
x_vals = np.linspace(0, max(mean_simulation3), 1000)
|
148
|
+
plt.fill_between(x_vals, kde_simulation3(x_vals), color='#54b36a', alpha=0.5, label='Simulation 3')
|
149
|
+
|
150
|
+
# Configure the density plot
|
151
|
+
plt.xlabel('RMSD (nm)', fontsize=12)
|
152
|
+
plt.ylabel('Density', fontsize=12)
|
153
|
+
plt.title('', fontsize=14)
|
154
|
+
plt.legend(frameon=False, loc='upper left', fontsize=10)
|
155
|
+
plt.tick_params(axis='both', which='major', labelsize=10)
|
156
|
+
plt.grid(False)
|
157
|
+
plt.tight_layout()
|
158
|
+
|
159
|
+
# Save the density plot in TIFF and PNG formats
|
160
|
+
# Save as TIFF
|
161
|
+
plt.savefig(os.path.join(output_folder, 'density_plot.tiff'), format='tiff', dpi=300)
|
162
|
+
# Save as PNG
|
163
|
+
plt.savefig(os.path.join(output_folder, 'density_plot.png'), format='png', dpi=300)
|
164
|
+
|
165
|
+
# Show the density plot
|
166
|
+
plt.show()
|
167
|
+
|
168
|
+
def rmsd_analysis(output_folder, *simulation_files_groups):
|
169
|
+
r"""
|
170
|
+
Main function to generate RMSD analysis and plots.
|
171
|
+
|
172
|
+
Parameters:
|
173
|
+
-----------
|
174
|
+
output_folder : str
|
175
|
+
Output folder to save the plots.
|
176
|
+
\*simulation_files_groups : list of str
|
177
|
+
List of paths to .xvg files for each simulation group.
|
178
|
+
You can pass 1, 2, or 3 groups.
|
179
|
+
"""
|
180
|
+
# Helper function to process a group of files
|
181
|
+
def process_group(file_paths):
|
182
|
+
times = []
|
183
|
+
rmsd = []
|
184
|
+
for file in file_paths:
|
185
|
+
time, rmsd_val = read_rmsd(file)
|
186
|
+
times.append(time)
|
187
|
+
rmsd.append(rmsd_val)
|
188
|
+
check_simulation_times(*times) # Check if times are consistent
|
189
|
+
mean_rmsd = np.mean(rmsd, axis=0) # Calculate mean
|
190
|
+
std_rmsd = np.std(rmsd, axis=0) # Calculate standard deviation
|
191
|
+
return times[0], mean_rmsd, std_rmsd
|
192
|
+
|
193
|
+
# Process each group of files
|
194
|
+
results = []
|
195
|
+
for group in simulation_files_groups:
|
196
|
+
if group: # Check if the list is not empty
|
197
|
+
time, mean, std = process_group(group)
|
198
|
+
results.append((time, mean, std))
|
199
|
+
|
200
|
+
# Generate plots based on the number of groups
|
201
|
+
if len(results) == 1:
|
202
|
+
# Plot for 1 group
|
203
|
+
plot_rmsd(results[0][0], results[0][1], results[0][2], None, None, None, None, None, None, output_folder)
|
204
|
+
plot_density(results[0][1], None, None, output_folder)
|
205
|
+
elif len(results) == 2:
|
206
|
+
# Plot for 2 groups
|
207
|
+
plot_rmsd(results[0][0], results[0][1], results[0][2],
|
208
|
+
results[1][0], results[1][1], results[1][2],
|
209
|
+
None, None, None, output_folder)
|
210
|
+
plot_density(results[0][1], results[1][1], None, output_folder)
|
211
|
+
elif len(results) == 3:
|
212
|
+
# Plot for 3 groups
|
213
|
+
plot_rmsd(results[0][0], results[0][1], results[0][2],
|
214
|
+
results[1][0], results[1][1], results[1][2],
|
215
|
+
results[2][0], results[2][1], results[2][2], output_folder)
|
216
|
+
plot_density(results[0][1], results[1][1], results[2][1], output_folder)
|
217
|
+
else:
|
218
|
+
raise ValueError("You must provide at least one group of simulation files.")
|