DynamiSpectra 1.0.4__py3-none-any.whl → 1.0.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,100 @@
1
+ # analysis.py
2
+ import pandas as pd
3
+ import matplotlib.pyplot as plt
4
+
5
+ # State mapping
6
+ STATE_MAPPING = {
7
+ 'H': 1, # α-Helix
8
+ 'E': 2, # β-Sheet
9
+ 'C': 0, # Loop/Coil
10
+ 'T': 3, # Turn
11
+ 'S': 4, # Bend
12
+ 'G': 5, # 3-Helix
13
+ '~': -1, # Undefined structure
14
+ 'B': -1, # Treat as undefined structure
15
+ }
16
+
17
+ def load_data(file_path):
18
+ """Loads the data file."""
19
+ try:
20
+ df = pd.read_csv(file_path, header=None)
21
+ print("First few rows of the file:")
22
+ print(df.head())
23
+ return df
24
+ except Exception as e:
25
+ print(f"Error loading the file: {e}")
26
+ return None
27
+
28
+ def calculate_fractions(df):
29
+ """Calculates the fractions of each conformation over time."""
30
+ time = []
31
+ helix_fraction = []
32
+ sheet_fraction = []
33
+ coil_fraction = []
34
+ turn_fraction = []
35
+ bend_fraction = []
36
+ three_helix_fraction = []
37
+
38
+ for index, row in df.iterrows():
39
+ sequence = row[0]
40
+ total_residues = len(sequence)
41
+
42
+ helix_count = sequence.count("H")
43
+ sheet_count = sequence.count("E")
44
+ coil_count = sequence.count("C")
45
+ turn_count = sequence.count("T")
46
+ bend_count = sequence.count("S")
47
+ three_helix_count = sequence.count("G")
48
+
49
+ helix_fraction.append(helix_count / total_residues)
50
+ sheet_fraction.append(sheet_count / total_residues)
51
+ coil_fraction.append(coil_count / total_residues)
52
+ turn_fraction.append(turn_count / total_residues)
53
+ bend_fraction.append(bend_count / total_residues)
54
+ three_helix_fraction.append(three_helix_count / total_residues)
55
+
56
+ time.append(index)
57
+
58
+ results_df = pd.DataFrame({
59
+ "Time": time,
60
+ "Helix Fraction": helix_fraction,
61
+ "Sheet Fraction": sheet_fraction,
62
+ "Coil Fraction": coil_fraction,
63
+ "Turn Fraction": turn_fraction,
64
+ "Bend Fraction": bend_fraction,
65
+ "3-Helix Fraction": three_helix_fraction
66
+ })
67
+
68
+ print("First few rows of the results:")
69
+ print(results_df.head())
70
+ return results_df
71
+
72
+ def plot_results(results_df, title, output_png, output_tiff):
73
+ """Plots the results and saves the graphs."""
74
+ plt.figure(figsize=(7, 6))
75
+
76
+ plt.plot(results_df["Time"], results_df["Helix Fraction"], label="α-Helix", color="#6A9EDA", linewidth=2)
77
+ plt.plot(results_df["Time"], results_df["Sheet Fraction"], label="β-Sheet", color="#f2444d", linewidth=2)
78
+ plt.plot(results_df["Time"], results_df["Coil Fraction"], label="Loop/Coil", color="#4bab44", linewidth=2)
79
+ plt.plot(results_df["Time"], results_df["Turn Fraction"], label="Turn", color="#fc9e19", linewidth=2)
80
+ plt.plot(results_df["Time"], results_df["Bend Fraction"], label="Bend", color="#54b36a", linewidth=2)
81
+ plt.plot(results_df["Time"], results_df["3-Helix Fraction"], label="3-Helix", color="#c9824f", linewidth=2)
82
+
83
+ plt.xlabel("Frames")
84
+ plt.ylabel("Fraction of Residues")
85
+ plt.title(title)
86
+ plt.legend(loc="lower center", bbox_to_anchor=(0.5, -0.2), ncol=6, frameon=False, markerscale=2, handlelength=2, handleheight=2)
87
+ plt.grid(False)
88
+ plt.xlim(0, 10000)
89
+ plt.ylim(0, 0.85)
90
+
91
+ plt.savefig(output_png, dpi=300, bbox_inches='tight')
92
+ plt.savefig(output_tiff, dpi=300, bbox_inches='tight')
93
+ plt.show()
94
+
95
+ def fractions_ss_analysis(file_path, output_png, output_tiff, title):
96
+ """Main function for secondary structure analysis."""
97
+ df = load_data(file_path)
98
+ if df is not None:
99
+ results_df = calculate_fractions(df)
100
+ plot_results(results_df, title, output_png, output_tiff)
dynamispectra/Hbond.py ADDED
@@ -0,0 +1,219 @@
1
+ import numpy as np
2
+ import matplotlib.pyplot as plt
3
+ from scipy.stats import gaussian_kde
4
+ import os
5
+
6
+ def read_hbond(file):
7
+ """
8
+ Reads hbond data from a .xvg file.
9
+
10
+ Parameters:
11
+ -----------
12
+ file : str
13
+ Path to the .xvg file.
14
+
15
+ Returns:
16
+ --------
17
+ times : numpy.ndarray
18
+ Array of simulation times.
19
+ hbonds : numpy.ndarray
20
+ Array of hbond values.
21
+ """
22
+ try:
23
+ print(f"Reading file: {file}")
24
+
25
+ # Open the file and process line by line
26
+ times = []
27
+ hbonds = []
28
+
29
+ with open(file, 'r') as f:
30
+ for line in f:
31
+ # Skip comment lines and empty lines
32
+ if line.startswith(('#', '@', ';')) or line.strip() == '':
33
+ continue
34
+
35
+ # Try to extract the first two numeric values from the line
36
+ try:
37
+ values = line.split()
38
+ # Check if there are at least two values in the line
39
+ if len(values) >= 2:
40
+ time, hbond = map(float, values[:2]) # Use the first two values
41
+ times.append(time)
42
+ hbonds.append(hbond)
43
+ except ValueError:
44
+ # Skip lines that cannot be converted to numbers
45
+ print(f"Error processing line: {line.strip()}")
46
+ continue
47
+
48
+ # Check if the data is valid
49
+ if len(times) == 0 or len(hbonds) == 0:
50
+ raise ValueError(f"File {file} does not contain valid data.")
51
+
52
+ # Convert lists to numpy arrays
53
+ times = np.array(times)
54
+ hbonds = np.array(hbonds)
55
+
56
+ return times, hbonds
57
+
58
+ except Exception as e:
59
+ print(f"Error reading file {file}: {e}")
60
+ return None, None
61
+
62
+ def check_simulation_times(*time_arrays):
63
+ r"""
64
+ Checks if simulation times are consistent across files.
65
+
66
+ Parameters:
67
+ -----------
68
+ \*time_arrays : list of numpy.ndarray
69
+ Arrays of times to compare.
70
+ """
71
+ for i in range(1, len(time_arrays)):
72
+ if not np.allclose(time_arrays[0], time_arrays[i]):
73
+ raise ValueError(f"Simulation times do not match between file 1 and file {i+1}")
74
+
75
+ def plot_hbond(time_simulation1, mean_simulation1, std_simulation1,
76
+ time_simulation2, mean_simulation2, std_simulation2,
77
+ time_simulation3, mean_simulation3, std_simulation3,
78
+ output_folder):
79
+ """
80
+ Generates the hbond plot with mean and standard deviation for the groups provided.
81
+ """
82
+ # Create figure for the hbond plot
83
+ plt.figure(figsize=(7, 6))
84
+ plt.plot()
85
+
86
+ # Plot for simulation1 (if provided)
87
+ if time_simulation1 is not None:
88
+ plt.plot(time_simulation1, mean_simulation1, label='Simulation 1', color='#333333', linewidth=2)
89
+ plt.fill_between(time_simulation1, mean_simulation1 - std_simulation1, mean_simulation1 + std_simulation1, color='#333333', alpha=0.2)
90
+
91
+ # Plot for simulation2 (if provided)
92
+ if time_simulation2 is not None:
93
+ plt.plot(time_simulation2, mean_simulation2, label='Simulation 2', color='#6A9EDA', linewidth=2)
94
+ plt.fill_between(time_simulation2, mean_simulation2 - std_simulation2, mean_simulation2 + std_simulation2, color='#6A9EDA', alpha=0.2)
95
+
96
+ # Plot for simulation3 (if provided)
97
+ if time_simulation3 is not None:
98
+ plt.plot(time_simulation3, mean_simulation3, label='Simulation 3', color='#54b36a', linewidth=2)
99
+ plt.fill_between(time_simulation3, mean_simulation3 - std_simulation3, mean_simulation3 + std_simulation3, color='#54b36a', alpha=0.2)
100
+
101
+ # Configure the hbond plot
102
+ plt.xlabel('Time (ns)', fontsize=12)
103
+ plt.ylabel('H-bonds', fontsize=12)
104
+ plt.title('', fontsize=14)
105
+ plt.legend(frameon=False, loc='upper right', fontsize=10)
106
+ plt.tick_params(axis='both', which='major', labelsize=10)
107
+ plt.grid(False)
108
+
109
+ # Adjust x-axis limits to start at 0
110
+ plt.xlim(left=0) # Set the minimum x-axis limit to 0
111
+ plt.xlim(right=100)
112
+
113
+ # Adjust layout
114
+ plt.tight_layout()
115
+
116
+ # Save the hbond plot in TIFF and PNG formats
117
+ os.makedirs(output_folder, exist_ok=True) # Create the folder if it doesn't exist
118
+
119
+ # Save as TIFF
120
+ plt.savefig(os.path.join(output_folder, 'hbond_plot.tiff'), format='tiff', dpi=300)
121
+ # Save as PNG
122
+ plt.savefig(os.path.join(output_folder, 'hbond_plot.png'), format='png', dpi=300)
123
+
124
+ # Show the hbond plot
125
+ plt.show()
126
+
127
+ def plot_density(mean_simulation1, mean_simulation2, mean_simulation3, output_folder):
128
+ """
129
+ Generates the density plot for the groups provided.
130
+ """
131
+ # Create figure for the density plot
132
+ plt.figure(figsize=(6, 6))
133
+ plt.plot()
134
+
135
+ # Add KDE (Kernel Density Estimation) for each dataset (if provided)
136
+ if mean_simulation1 is not None:
137
+ kde_simulation1 = gaussian_kde(mean_simulation1)
138
+ x_vals = np.linspace(0, max(mean_simulation1), 1000)
139
+ plt.fill_between(x_vals, kde_simulation1(x_vals), color='#333333', alpha=0.5, label='Simulation 1')
140
+
141
+ if mean_simulation2 is not None:
142
+ kde_simulation2 = gaussian_kde(mean_simulation2)
143
+ x_vals = np.linspace(0, max(mean_simulation2), 1000)
144
+ plt.fill_between(x_vals, kde_simulation2(x_vals), color='#6A9EDA', alpha=0.6, label='Simulation 2')
145
+
146
+ if mean_simulation3 is not None:
147
+ kde_simulation3 = gaussian_kde(mean_simulation3)
148
+ x_vals = np.linspace(0, max(mean_simulation3), 1000)
149
+ plt.fill_between(x_vals, kde_simulation3(x_vals), color='#54b36a', alpha=0.5, label='Simulation 3')
150
+
151
+ # Configure the density plot
152
+ plt.xlabel('H-bonds', fontsize=12)
153
+ plt.ylabel('Density', fontsize=12)
154
+ plt.title('', fontsize=14)
155
+ plt.legend(frameon=False, loc='upper left', fontsize=10)
156
+ plt.tick_params(axis='both', which='major', labelsize=10)
157
+ plt.grid(False)
158
+ plt.tight_layout()
159
+
160
+ # Save the density plot in TIFF and PNG formats
161
+ # Save as TIFF
162
+ plt.savefig(os.path.join(output_folder, 'density_plot.tiff'), format='tiff', dpi=300)
163
+ # Save as PNG
164
+ plt.savefig(os.path.join(output_folder, 'density_plot.png'), format='png', dpi=300)
165
+
166
+ # Show the density plot
167
+ plt.show()
168
+
169
+ def hbond_analysis(output_folder, *simulation_files_groups):
170
+ r"""
171
+ Main function to generate hbond analysis and plots.
172
+
173
+ Parameters:
174
+ -----------
175
+ output_folder : str
176
+ Output folder to save the plots.
177
+ \*simulation_files_groups : list of str
178
+ List of paths to .xvg files for each simulation group.
179
+ You can pass 1, 2, or 3 groups.
180
+ """
181
+ # Helper function to process a group of files
182
+ def process_group(file_paths):
183
+ times = []
184
+ hbonds = []
185
+ for file in file_paths:
186
+ time, hbond = read_hbond(file)
187
+ times.append(time)
188
+ hbonds.append(hbond)
189
+ check_simulation_times(*times) # Check if times are consistent
190
+ mean_hbond = np.mean(hbonds, axis=0) # Calculate mean
191
+ std_hbond = np.std(hbonds, axis=0) # Calculate standard deviation
192
+ return times[0], mean_hbond, std_hbond
193
+
194
+ # Process each group of files
195
+ results = []
196
+ for group in simulation_files_groups:
197
+ if group: # Check if the list is not empty
198
+ time, mean, std = process_group(group)
199
+ results.append((time, mean, std))
200
+
201
+ # Generate plots based on the number of groups
202
+ if len(results) == 1:
203
+ # Plot for 1 group
204
+ plot_hbond(results[0][0], results[0][1], results[0][2], None, None, None, None, None, None, output_folder)
205
+ plot_density(results[0][1], None, None, output_folder)
206
+ elif len(results) == 2:
207
+ # Plot for 2 groups
208
+ plot_hbond(results[0][0], results[0][1], results[0][2],
209
+ results[1][0], results[1][1], results[1][2],
210
+ None, None, None, output_folder)
211
+ plot_density(results[0][1], results[1][1], None, output_folder)
212
+ elif len(results) == 3:
213
+ # Plot for 3 groups
214
+ plot_hbond(results[0][0], results[0][1], results[0][2],
215
+ results[1][0], results[1][1], results[1][2],
216
+ results[2][0], results[2][1], results[2][2], output_folder)
217
+ plot_density(results[0][1], results[1][1], results[2][1], output_folder)
218
+ else:
219
+ raise ValueError("You must provide at least one group of simulation files.")
dynamispectra/PCA.py ADDED
@@ -0,0 +1,118 @@
1
+ import numpy as np
2
+ import matplotlib.pyplot as plt
3
+ import os
4
+
5
+ def read_xvg(file_path):
6
+ """
7
+ Reads data from a .xvg file.
8
+
9
+ Parameters:
10
+ -----------
11
+ file_path : str
12
+ Path to the .xvg file.
13
+
14
+ Returns:
15
+ --------
16
+ data : numpy.ndarray
17
+ Array of numerical data from the .xvg file.
18
+ """
19
+ data = []
20
+ with open(file_path, "r") as file:
21
+ for line in file:
22
+ if not line.startswith(("#", "@")): # Ignore headers and metadata
23
+ values = line.split()
24
+ data.append([float(values[0]), float(values[1])]) # PC1 and PC2
25
+ return np.array(data)
26
+
27
+ def read_eigenvalues(eigenval_path):
28
+ """
29
+ Reads eigenvalues from a .xvg file.
30
+
31
+ Parameters:
32
+ -----------
33
+ eigenval_path : str
34
+ Path to the .xvg file containing eigenvalues.
35
+
36
+ Returns:
37
+ --------
38
+ eigenvalues : numpy.ndarray
39
+ Array of eigenvalues.
40
+ """
41
+ eigenvalues = []
42
+ with open(eigenval_path, "r") as file:
43
+ for line in file:
44
+ if not line.startswith(("#", "@")):
45
+ # Extract the second column (eigenvalues)
46
+ value = line.split()[1]
47
+ eigenvalues.append(float(value))
48
+ return np.array(eigenvalues)
49
+
50
+ def plot_pca(pca_data, eigenvalues, output_folder, title="PCA"):
51
+ """
52
+ Generates the PCA plot.
53
+
54
+ Parameters:
55
+ -----------
56
+ pca_data : numpy.ndarray
57
+ Array of PCA data (PC1 and PC2).
58
+ eigenvalues : numpy.ndarray
59
+ Array of eigenvalues.
60
+ output_folder : str
61
+ Output folder to save the plot.
62
+ title : str, optional
63
+ Title of the plot.
64
+ """
65
+ # Calculate the explained variance in percentage
66
+ total_variance = np.sum(eigenvalues)
67
+ pc1_variance = (eigenvalues[0] / total_variance) * 100
68
+ pc2_variance = (eigenvalues[1] / total_variance) * 100
69
+
70
+ # Create the scatter plot (PC1 vs PC2)
71
+ plt.figure(figsize=(7, 6))
72
+ scatter = plt.scatter(
73
+ pca_data[:, 0], pca_data[:, 1],
74
+ c=np.linspace(0, 1, len(pca_data)), # Gradient of colors for points
75
+ cmap="viridis", # Colormap to differentiate points
76
+ alpha=0.8, edgecolors='k', linewidths=0.8
77
+ )
78
+
79
+ # Update axis labels with explained variance
80
+ plt.xlabel(f"PC1 ({pc1_variance:.2f}%)")
81
+ plt.ylabel(f"PC2 ({pc2_variance:.2f}%)")
82
+ plt.title(title)
83
+
84
+ # Add color bar
85
+ plt.colorbar(scatter, label="Simulation times")
86
+ plt.grid(False)
87
+
88
+ # Save the plot as PNG with 300 DPI
89
+ os.makedirs(output_folder, exist_ok=True) # Create the folder if it doesn't exist
90
+ output_path = os.path.join(output_folder, 'pca_plot.png')
91
+ plt.savefig(output_path, dpi=300)
92
+
93
+ # Show the plot
94
+ plt.show()
95
+
96
+ def pca_analysis(pca_file_path, eigenval_path, output_folder, title="PCA"):
97
+ """
98
+ Main function to generate PCA analysis and plot.
99
+
100
+ Parameters:
101
+ -----------
102
+ pca_file_path : str
103
+ Path to the .xvg file containing PCA data.
104
+ eigenval_path : str
105
+ Path to the .xvg file containing eigenvalues.
106
+ output_folder : str
107
+ Output folder to save the plot.
108
+ title : str, optional
109
+ Title of the plot.
110
+ """
111
+ # Read PCA data
112
+ pca_data = read_xvg(pca_file_path)
113
+
114
+ # Read eigenvalues
115
+ eigenvalues = read_eigenvalues(eigenval_path)
116
+
117
+ # Generate the PCA plot
118
+ plot_pca(pca_data, eigenvalues, output_folder, title)
dynamispectra/RMSD.py ADDED
@@ -0,0 +1,218 @@
1
+ import numpy as np
2
+ import matplotlib.pyplot as plt
3
+ from scipy.stats import gaussian_kde
4
+ import os
5
+
6
+ def read_rmsd(file):
7
+ """
8
+ Reads RMSD data from a .xvg file.
9
+
10
+ Parameters:
11
+ -----------
12
+ file : str
13
+ Path to the .xvg file.
14
+
15
+ Returns:
16
+ --------
17
+ times : numpy.ndarray
18
+ Array of simulation times.
19
+ rmsd : numpy.ndarray
20
+ Array of RMSD values.
21
+ """
22
+ try:
23
+ print(f"Reading file: {file}")
24
+
25
+ # Open the file and process line by line
26
+ times = []
27
+ rmsd = []
28
+
29
+ with open(file, 'r') as f:
30
+ for line in f:
31
+ # Skip comment lines and empty lines
32
+ if line.startswith(('#', '@', ';')) or line.strip() == '':
33
+ continue
34
+
35
+ # Try to extract the first two numeric values from the line
36
+ try:
37
+ values = line.split()
38
+ # Check if there are at least two values in the line
39
+ if len(values) >= 2:
40
+ time, rmsd_val = map(float, values[:2]) # Use the first two values
41
+ times.append(time)
42
+ rmsd.append(rmsd_val)
43
+ except ValueError:
44
+ # Skip lines that cannot be converted to numbers
45
+ print(f"Error processing line: {line.strip()}")
46
+ continue
47
+
48
+ # Check if the data is valid
49
+ if len(times) == 0 or len(rmsd) == 0:
50
+ raise ValueError(f"File {file} does not contain valid data.")
51
+
52
+ # Convert lists to numpy arrays
53
+ times = np.array(times)
54
+ rmsd = np.array(rmsd)
55
+
56
+ return times, rmsd
57
+
58
+ except Exception as e:
59
+ print(f"Error reading file {file}: {e}")
60
+ return None, None
61
+
62
+ def check_simulation_times(*time_arrays):
63
+ r"""
64
+ Checks if simulation times are consistent across files.
65
+
66
+ Parameters:
67
+ -----------
68
+ \*time_arrays : list of numpy.ndarray
69
+ Arrays of times to compare.
70
+ """
71
+ for i in range(1, len(time_arrays)):
72
+ if not np.allclose(time_arrays[0], time_arrays[i]):
73
+ raise ValueError(f"Simulation times do not match between file 1 and file {i+1}")
74
+
75
+ def plot_rmsd(time_simulation1, mean_simulation1, std_simulation1,
76
+ time_simulation2, mean_simulation2, std_simulation2,
77
+ time_simulation3, mean_simulation3, std_simulation3,
78
+ output_folder):
79
+ """
80
+ Generates the RMSD plot with mean and standard deviation for the groups provided.
81
+ """
82
+ # Create figure for the RMSD plot
83
+ plt.figure(figsize=(7, 6))
84
+ plt.plot()
85
+ # Plot for simulation1 (if provided)
86
+ if time_simulation1 is not None:
87
+ plt.plot(time_simulation1, mean_simulation1, label='Simulation 1', color='#333333', linewidth=2)
88
+ plt.fill_between(time_simulation1, mean_simulation1 - std_simulation1, mean_simulation1 + std_simulation1, color='#333333', alpha=0.2)
89
+
90
+ # Plot for simulation2 (if provided)
91
+ if time_simulation2 is not None:
92
+ plt.plot(time_simulation2, mean_simulation2, label='Simulation 2', color='#6A9EDA', linewidth=2)
93
+ plt.fill_between(time_simulation2, mean_simulation2 - std_simulation2, mean_simulation2 + std_simulation2, color='#6A9EDA', alpha=0.2)
94
+
95
+ # Plot for simulation3 (if provided)
96
+ if time_simulation3 is not None:
97
+ plt.plot(time_simulation3, mean_simulation3, label='Simulation 3', color='#54b36a', linewidth=2)
98
+ plt.fill_between(time_simulation3, mean_simulation3 - std_simulation3, mean_simulation3 + std_simulation3, color='#54b36a', alpha=0.2)
99
+
100
+ # Configure the RMSD plot
101
+ plt.xlabel('Time (ns)', fontsize=12)
102
+ plt.ylabel('RMSD (nm)', fontsize=12)
103
+ plt.title('', fontsize=14)
104
+ plt.legend(frameon=False, loc='lower right', fontsize=10)
105
+ plt.tick_params(axis='both', which='major', labelsize=10)
106
+ plt.grid(False)
107
+
108
+ # Adjust x-axis limits to start at 0
109
+ plt.xlim(left=0) # Set the minimum x-axis limit to 0
110
+ plt.xlim(right=100)
111
+
112
+ # Adjust layout
113
+ plt.tight_layout()
114
+
115
+ # Save the RMSD plot in TIFF and PNG formats
116
+ os.makedirs(output_folder, exist_ok=True) # Create the folder if it doesn't exist
117
+
118
+ # Save as TIFF
119
+ plt.savefig(os.path.join(output_folder, 'rmsd_plot.tiff'), format='tiff', dpi=300)
120
+ # Save as PNG
121
+ plt.savefig(os.path.join(output_folder, 'rmsd_plot.png'), format='png', dpi=300)
122
+
123
+ # Show the RMSD plot
124
+ plt.show()
125
+
126
+ def plot_density(mean_simulation1, mean_simulation2, mean_simulation3, output_folder):
127
+ """
128
+ Generates the density plot for the groups provided.
129
+ """
130
+ # Create figure for the density plot
131
+ plt.figure(figsize=(6, 6))
132
+ plt.plot()
133
+
134
+ # Add KDE (Kernel Density Estimation) for each dataset (if provided)
135
+ if mean_simulation1 is not None:
136
+ kde_simulation1 = gaussian_kde(mean_simulation1)
137
+ x_vals = np.linspace(0, max(mean_simulation1), 1000)
138
+ plt.fill_between(x_vals, kde_simulation1(x_vals), color='#333333', alpha=0.5, label='Simulation 1')
139
+
140
+ if mean_simulation2 is not None:
141
+ kde_simulation2 = gaussian_kde(mean_simulation2)
142
+ x_vals = np.linspace(0, max(mean_simulation2), 1000)
143
+ plt.fill_between(x_vals, kde_simulation2(x_vals), color='#6A9EDA', alpha=0.6, label='Simulation 2')
144
+
145
+ if mean_simulation3 is not None:
146
+ kde_simulation3 = gaussian_kde(mean_simulation3)
147
+ x_vals = np.linspace(0, max(mean_simulation3), 1000)
148
+ plt.fill_between(x_vals, kde_simulation3(x_vals), color='#54b36a', alpha=0.5, label='Simulation 3')
149
+
150
+ # Configure the density plot
151
+ plt.xlabel('RMSD (nm)', fontsize=12)
152
+ plt.ylabel('Density', fontsize=12)
153
+ plt.title('', fontsize=14)
154
+ plt.legend(frameon=False, loc='upper left', fontsize=10)
155
+ plt.tick_params(axis='both', which='major', labelsize=10)
156
+ plt.grid(False)
157
+ plt.tight_layout()
158
+
159
+ # Save the density plot in TIFF and PNG formats
160
+ # Save as TIFF
161
+ plt.savefig(os.path.join(output_folder, 'density_plot.tiff'), format='tiff', dpi=300)
162
+ # Save as PNG
163
+ plt.savefig(os.path.join(output_folder, 'density_plot.png'), format='png', dpi=300)
164
+
165
+ # Show the density plot
166
+ plt.show()
167
+
168
+ def rmsd_analysis(output_folder, *simulation_files_groups):
169
+ r"""
170
+ Main function to generate RMSD analysis and plots.
171
+
172
+ Parameters:
173
+ -----------
174
+ output_folder : str
175
+ Output folder to save the plots.
176
+ \*simulation_files_groups : list of str
177
+ List of paths to .xvg files for each simulation group.
178
+ You can pass 1, 2, or 3 groups.
179
+ """
180
+ # Helper function to process a group of files
181
+ def process_group(file_paths):
182
+ times = []
183
+ rmsd = []
184
+ for file in file_paths:
185
+ time, rmsd_val = read_rmsd(file)
186
+ times.append(time)
187
+ rmsd.append(rmsd_val)
188
+ check_simulation_times(*times) # Check if times are consistent
189
+ mean_rmsd = np.mean(rmsd, axis=0) # Calculate mean
190
+ std_rmsd = np.std(rmsd, axis=0) # Calculate standard deviation
191
+ return times[0], mean_rmsd, std_rmsd
192
+
193
+ # Process each group of files
194
+ results = []
195
+ for group in simulation_files_groups:
196
+ if group: # Check if the list is not empty
197
+ time, mean, std = process_group(group)
198
+ results.append((time, mean, std))
199
+
200
+ # Generate plots based on the number of groups
201
+ if len(results) == 1:
202
+ # Plot for 1 group
203
+ plot_rmsd(results[0][0], results[0][1], results[0][2], None, None, None, None, None, None, output_folder)
204
+ plot_density(results[0][1], None, None, output_folder)
205
+ elif len(results) == 2:
206
+ # Plot for 2 groups
207
+ plot_rmsd(results[0][0], results[0][1], results[0][2],
208
+ results[1][0], results[1][1], results[1][2],
209
+ None, None, None, output_folder)
210
+ plot_density(results[0][1], results[1][1], None, output_folder)
211
+ elif len(results) == 3:
212
+ # Plot for 3 groups
213
+ plot_rmsd(results[0][0], results[0][1], results[0][2],
214
+ results[1][0], results[1][1], results[1][2],
215
+ results[2][0], results[2][1], results[2][2], output_folder)
216
+ plot_density(results[0][1], results[1][1], results[2][1], output_folder)
217
+ else:
218
+ raise ValueError("You must provide at least one group of simulation files.")