crisp-ase 1.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- CRISP/__init__.py +99 -0
- CRISP/_version.py +1 -0
- CRISP/cli.py +41 -0
- CRISP/data_analysis/__init__.py +38 -0
- CRISP/data_analysis/clustering.py +838 -0
- CRISP/data_analysis/contact_coordination.py +915 -0
- CRISP/data_analysis/h_bond.py +772 -0
- CRISP/data_analysis/msd.py +1199 -0
- CRISP/data_analysis/prdf.py +404 -0
- CRISP/data_analysis/volumetric_atomic_density.py +527 -0
- CRISP/py.typed +1 -0
- CRISP/simulation_utility/__init__.py +31 -0
- CRISP/simulation_utility/atomic_indices.py +155 -0
- CRISP/simulation_utility/atomic_traj_linemap.py +278 -0
- CRISP/simulation_utility/error_analysis.py +254 -0
- CRISP/simulation_utility/interatomic_distances.py +200 -0
- CRISP/simulation_utility/subsampling.py +241 -0
- CRISP/tests/DataAnalysis/__init__.py +1 -0
- CRISP/tests/DataAnalysis/test_clustering_extended.py +212 -0
- CRISP/tests/DataAnalysis/test_contact_coordination.py +184 -0
- CRISP/tests/DataAnalysis/test_contact_coordination_extended.py +465 -0
- CRISP/tests/DataAnalysis/test_h_bond_complete.py +326 -0
- CRISP/tests/DataAnalysis/test_h_bond_extended.py +322 -0
- CRISP/tests/DataAnalysis/test_msd_complete.py +305 -0
- CRISP/tests/DataAnalysis/test_msd_extended.py +522 -0
- CRISP/tests/DataAnalysis/test_prdf.py +206 -0
- CRISP/tests/DataAnalysis/test_volumetric_atomic_density.py +463 -0
- CRISP/tests/SimulationUtility/__init__.py +1 -0
- CRISP/tests/SimulationUtility/test_atomic_traj_linemap.py +101 -0
- CRISP/tests/SimulationUtility/test_atomic_traj_linemap_extended.py +469 -0
- CRISP/tests/SimulationUtility/test_error_analysis_extended.py +151 -0
- CRISP/tests/SimulationUtility/test_interatomic_distances.py +223 -0
- CRISP/tests/SimulationUtility/test_subsampling.py +365 -0
- CRISP/tests/__init__.py +1 -0
- CRISP/tests/test_CRISP.py +28 -0
- CRISP/tests/test_cli.py +87 -0
- CRISP/tests/test_crisp_comprehensive.py +679 -0
- crisp_ase-1.1.2.dist-info/METADATA +141 -0
- crisp_ase-1.1.2.dist-info/RECORD +42 -0
- crisp_ase-1.1.2.dist-info/WHEEL +5 -0
- crisp_ase-1.1.2.dist-info/entry_points.txt +2 -0
- crisp_ase-1.1.2.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,772 @@
|
|
|
1
|
+
"""
|
|
2
|
+
CRISP/data_analysis/h_bond.py
|
|
3
|
+
|
|
4
|
+
This script performs hydrogen bond analysis on molecular dynamics trajectory data.
|
|
5
|
+
"""
|
|
6
|
+
from ase.io import read
|
|
7
|
+
import numpy as np
|
|
8
|
+
import csv
|
|
9
|
+
from joblib import Parallel, delayed
|
|
10
|
+
import argparse
|
|
11
|
+
import os
|
|
12
|
+
from typing import Union, List, Optional, Tuple, Any, Dict
|
|
13
|
+
import matplotlib.pyplot as plt
|
|
14
|
+
from ase.data import vdw_radii, atomic_numbers, chemical_symbols
|
|
15
|
+
import seaborn as sns
|
|
16
|
+
import itertools
|
|
17
|
+
import pandas as pd
|
|
18
|
+
import networkx as nx
|
|
19
|
+
import plotly.graph_objects as go
|
|
20
|
+
import plotly.io as pio
|
|
21
|
+
|
|
22
|
+
pio.renderers.default = 'svg'
|
|
23
|
+
pio.renderers.default = 'notebook'
|
|
24
|
+
|
|
25
|
+
__all__ = ['indices', 'count_hydrogen_bonds', 'aggregate_data', 'hydrogen_bonds']
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def indices(atoms, ind: Union[str, List[Union[int, str]]]) -> np.ndarray:
|
|
29
|
+
"""
|
|
30
|
+
Extract atom indices from an ASE Atoms object based on the input specifier.
|
|
31
|
+
|
|
32
|
+
Parameters
|
|
33
|
+
----------
|
|
34
|
+
atoms : ase.Atoms
|
|
35
|
+
ASE Atoms object containing atomic structure
|
|
36
|
+
ind : Union[str, List[Union[int, str]]]
|
|
37
|
+
Index specifier, can be:
|
|
38
|
+
- "all" or None: all atoms
|
|
39
|
+
- string ending with ".npy": load indices from NumPy file
|
|
40
|
+
- integer or list of integers: direct atom indices
|
|
41
|
+
- string or list of strings: chemical symbols to select
|
|
42
|
+
|
|
43
|
+
Returns
|
|
44
|
+
-------
|
|
45
|
+
np.ndarray
|
|
46
|
+
Array of selected indices
|
|
47
|
+
|
|
48
|
+
Raises
|
|
49
|
+
------
|
|
50
|
+
ValueError
|
|
51
|
+
If the index type is invalid
|
|
52
|
+
"""
|
|
53
|
+
if ind == "all" or ind is None:
|
|
54
|
+
return np.arange(len(atoms))
|
|
55
|
+
|
|
56
|
+
if isinstance(ind, str) and ind.endswith(".npy"):
|
|
57
|
+
return np.load(ind, allow_pickle=True)
|
|
58
|
+
|
|
59
|
+
if not isinstance(ind, list):
|
|
60
|
+
ind = [ind]
|
|
61
|
+
|
|
62
|
+
if any(isinstance(item, int) for item in ind):
|
|
63
|
+
return np.array(ind)
|
|
64
|
+
|
|
65
|
+
if any(isinstance(item, str) for item in ind):
|
|
66
|
+
idx = []
|
|
67
|
+
for symbol in ind:
|
|
68
|
+
idx.append(np.where(np.array(atoms.get_chemical_symbols()) == symbol)[0])
|
|
69
|
+
return np.concatenate(idx)
|
|
70
|
+
|
|
71
|
+
raise ValueError("Invalid index type")
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def count_hydrogen_bonds(
|
|
75
|
+
atoms,
|
|
76
|
+
acceptor_atoms: List[str] = None,
|
|
77
|
+
angle_cutoff: float = 120,
|
|
78
|
+
h_bond_cutoff: float = 2.4,
|
|
79
|
+
bond_cutoff: float = 1.6,
|
|
80
|
+
mic: bool = True,
|
|
81
|
+
single_h_bond: bool = False
|
|
82
|
+
) -> Tuple[Dict[int, List], int]:
|
|
83
|
+
"""Count hydrogen bonds in an atomic structure.
|
|
84
|
+
|
|
85
|
+
Parameters
|
|
86
|
+
----------
|
|
87
|
+
atoms : ase.Atoms
|
|
88
|
+
ASE Atoms object containing atomic structure
|
|
89
|
+
acceptor_atoms : List[str], optional
|
|
90
|
+
List of acceptor atom symbols (default: ["N", "O", "F"])
|
|
91
|
+
angle_cutoff : float, optional
|
|
92
|
+
Minimum angle in degrees for hydrogen bond (default: 120)
|
|
93
|
+
h_bond_cutoff : float, optional
|
|
94
|
+
Maximum distance for hydrogen bond in Angstroms (default: 2.4)
|
|
95
|
+
bond_cutoff : float, optional
|
|
96
|
+
Maximum distance for covalent bond in Angstroms (default: 1.6)
|
|
97
|
+
mic : bool, optional
|
|
98
|
+
Use minimum image convention (default: True)
|
|
99
|
+
single_h_bond : bool, optional
|
|
100
|
+
Count only atoms with single hydrogen bonds (default: False)
|
|
101
|
+
|
|
102
|
+
Returns
|
|
103
|
+
-------
|
|
104
|
+
Tuple[Dict[int, List], int]
|
|
105
|
+
Dictionary mapping hydrogen indices to acceptor bonds and count of H-bonds
|
|
106
|
+
"""
|
|
107
|
+
if acceptor_atoms is None:
|
|
108
|
+
acceptor_atoms = ["N", "O", "F"]
|
|
109
|
+
|
|
110
|
+
indices_hydrogen = indices(atoms, "H")
|
|
111
|
+
indices_acceptor = indices(atoms, acceptor_atoms)
|
|
112
|
+
|
|
113
|
+
dm = atoms.get_all_distances(mic=mic)
|
|
114
|
+
np.fill_diagonal(dm, np.inf)
|
|
115
|
+
|
|
116
|
+
sub_dm = dm[indices_hydrogen, :][:, indices_acceptor]
|
|
117
|
+
|
|
118
|
+
hb_hyd = indices_hydrogen[np.where(sub_dm < h_bond_cutoff)[0]]
|
|
119
|
+
hb_acc = indices_acceptor[np.where(sub_dm < h_bond_cutoff)[1]]
|
|
120
|
+
|
|
121
|
+
distances = sub_dm[np.where(sub_dm < h_bond_cutoff)]
|
|
122
|
+
|
|
123
|
+
hydrogen_dict = {}
|
|
124
|
+
|
|
125
|
+
for hydrogen, acceptor, distance in zip(hb_hyd, hb_acc, distances):
|
|
126
|
+
if hydrogen not in hydrogen_dict:
|
|
127
|
+
hydrogen_dict[hydrogen] = []
|
|
128
|
+
hydrogen_dict[hydrogen].append([acceptor, distance])
|
|
129
|
+
|
|
130
|
+
hydrogen_dict = {hydrogen: sorted(acceptors, key=lambda x: x[1]) for hydrogen, acceptors in hydrogen_dict.items()}
|
|
131
|
+
|
|
132
|
+
for hydrogen, bonds in hydrogen_dict.items():
|
|
133
|
+
if len(bonds) > 0 and bonds[0][1] < bond_cutoff:
|
|
134
|
+
filtered_bonds = [bonds[0]]
|
|
135
|
+
for acceptor_h_bond in bonds[1:]:
|
|
136
|
+
angle = atoms.get_angle(bonds[0][0], hydrogen, acceptor_h_bond[0], mic=mic)
|
|
137
|
+
if angle >= angle_cutoff:
|
|
138
|
+
acceptor_h_bond.append(angle)
|
|
139
|
+
filtered_bonds.append(acceptor_h_bond)
|
|
140
|
+
hydrogen_dict[hydrogen] = filtered_bonds
|
|
141
|
+
else:
|
|
142
|
+
hydrogen_dict[hydrogen] = []
|
|
143
|
+
|
|
144
|
+
for idx in indices_hydrogen:
|
|
145
|
+
if idx not in hydrogen_dict:
|
|
146
|
+
hydrogen_dict[idx] = []
|
|
147
|
+
|
|
148
|
+
if single_h_bond:
|
|
149
|
+
num_hydrogen_bonds = sum(1 for bonds in hydrogen_dict.values() if len(bonds) > 1)
|
|
150
|
+
else:
|
|
151
|
+
num_hydrogen_bonds = sum(len(bonds[1:]) for bonds in hydrogen_dict.values())
|
|
152
|
+
|
|
153
|
+
return hydrogen_dict, num_hydrogen_bonds
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
def aggregate_data(
|
|
157
|
+
data: List[Dict[int, List]],
|
|
158
|
+
index_map: Dict[int, int],
|
|
159
|
+
N: int
|
|
160
|
+
) -> np.ndarray:
|
|
161
|
+
"""Aggregate hydrogen bond data across multiple frames.
|
|
162
|
+
|
|
163
|
+
Parameters
|
|
164
|
+
----------
|
|
165
|
+
data : List[Dict[int, List]]
|
|
166
|
+
List of hydrogen bond dictionaries from each frame
|
|
167
|
+
index_map : Dict[int, int]
|
|
168
|
+
Mapping from global to local indices
|
|
169
|
+
N : int
|
|
170
|
+
Number of unique atoms
|
|
171
|
+
|
|
172
|
+
Returns
|
|
173
|
+
-------
|
|
174
|
+
np.ndarray
|
|
175
|
+
Aggregated hydrogen bond count array
|
|
176
|
+
"""
|
|
177
|
+
"""
|
|
178
|
+
Aggregates hydrogen bond data to create correlation matrix and network graph.
|
|
179
|
+
|
|
180
|
+
Parameters
|
|
181
|
+
----------
|
|
182
|
+
data : pandas.DataFrame
|
|
183
|
+
DataFrame containing hydrogen bond data
|
|
184
|
+
index_map : dict
|
|
185
|
+
Mapping from atom indices to array indices
|
|
186
|
+
N : int
|
|
187
|
+
Number of atoms to include in correlation matrix
|
|
188
|
+
|
|
189
|
+
Returns
|
|
190
|
+
-------
|
|
191
|
+
Tuple[np.ndarray, nx.Graph, List]
|
|
192
|
+
Correlation matrix, NetworkX graph, and list of all pairs
|
|
193
|
+
"""
|
|
194
|
+
# Aggregates hydrogen bond data
|
|
195
|
+
node_frequency = {node: 0 for node in index_map.keys()}
|
|
196
|
+
edge_weight = {}
|
|
197
|
+
all_pairs = []
|
|
198
|
+
|
|
199
|
+
for frame, group in data.groupby('Frame'):
|
|
200
|
+
pairs = group[['Donor', 'Acceptor']].values
|
|
201
|
+
all_pairs.extend(pairs)
|
|
202
|
+
|
|
203
|
+
for donor, acceptor in pairs:
|
|
204
|
+
if donor in index_map and acceptor in index_map:
|
|
205
|
+
node_frequency[donor] += 1
|
|
206
|
+
node_frequency[acceptor] += 1
|
|
207
|
+
edge = tuple(sorted([donor, acceptor]))
|
|
208
|
+
edge_weight[edge] = edge_weight.get(edge, 0) + 1
|
|
209
|
+
|
|
210
|
+
G = nx.Graph()
|
|
211
|
+
|
|
212
|
+
for node, freq in node_frequency.items():
|
|
213
|
+
G.add_node(node, size=freq)
|
|
214
|
+
|
|
215
|
+
for (donor, acceptor), weight in edge_weight.items():
|
|
216
|
+
G.add_edge(donor, acceptor, weight=weight)
|
|
217
|
+
|
|
218
|
+
corr_matrix = np.zeros((N, N), dtype=int)
|
|
219
|
+
for (donor, acceptor), weight in edge_weight.items():
|
|
220
|
+
donor_idx = index_map[donor]
|
|
221
|
+
acceptor_idx = index_map[acceptor]
|
|
222
|
+
corr_matrix[donor_idx, acceptor_idx] = weight
|
|
223
|
+
corr_matrix[acceptor_idx, donor_idx] = weight
|
|
224
|
+
|
|
225
|
+
return corr_matrix, G, all_pairs
|
|
226
|
+
|
|
227
|
+
|
|
228
|
+
def process_frame(data, donor_acceptor_indices, frame_index, index_map, N):
|
|
229
|
+
"""
|
|
230
|
+
Processes data for a specific frame to create correlation matrix and network graph.
|
|
231
|
+
|
|
232
|
+
Parameters
|
|
233
|
+
----------
|
|
234
|
+
data : pandas.DataFrame
|
|
235
|
+
DataFrame containing hydrogen bond data
|
|
236
|
+
donor_acceptor_indices : np.ndarray
|
|
237
|
+
Array of donor/acceptor atom indices
|
|
238
|
+
frame_index : int
|
|
239
|
+
Frame index to process
|
|
240
|
+
index_map : dict
|
|
241
|
+
Mapping from atom indices to array indices
|
|
242
|
+
N : int
|
|
243
|
+
Number of atoms to include in correlation matrix
|
|
244
|
+
|
|
245
|
+
Returns
|
|
246
|
+
-------
|
|
247
|
+
Tuple[np.ndarray, nx.Graph, List]
|
|
248
|
+
Correlation matrix, NetworkX graph, and list of all pairs
|
|
249
|
+
"""
|
|
250
|
+
frame_data = data[data['Frame'] == frame_index]
|
|
251
|
+
|
|
252
|
+
has_distance = 'Distance' in frame_data.columns
|
|
253
|
+
|
|
254
|
+
pairs = frame_data[['Donor', 'Acceptor']].values
|
|
255
|
+
distances = frame_data['Distance'].values if has_distance else [1.0] * len(pairs)
|
|
256
|
+
|
|
257
|
+
corr_matrix = np.zeros((N, N), dtype=float)
|
|
258
|
+
|
|
259
|
+
for i, (donor, acceptor) in enumerate(pairs):
|
|
260
|
+
if donor in index_map and acceptor in index_map:
|
|
261
|
+
donor_idx = index_map[donor]
|
|
262
|
+
acceptor_idx = index_map[acceptor]
|
|
263
|
+
distance = float(distances[i]) if has_distance and distances[i] != "N/A" else 0
|
|
264
|
+
|
|
265
|
+
# Stores the distance in the correlation matrix
|
|
266
|
+
if corr_matrix[donor_idx, acceptor_idx] == 0:
|
|
267
|
+
corr_matrix[donor_idx, acceptor_idx] = distance
|
|
268
|
+
corr_matrix[acceptor_idx, donor_idx] = distance
|
|
269
|
+
else:
|
|
270
|
+
# If multiple bonds exist, uses the average distance
|
|
271
|
+
corr_matrix[donor_idx, acceptor_idx] = (corr_matrix[donor_idx, acceptor_idx] + distance) / 2
|
|
272
|
+
corr_matrix[acceptor_idx, donor_idx] = (corr_matrix[acceptor_idx, donor_idx] + distance) / 2
|
|
273
|
+
|
|
274
|
+
G = nx.Graph()
|
|
275
|
+
for i, donor_idx in enumerate(range(len(donor_acceptor_indices))):
|
|
276
|
+
for j, acceptor_idx in enumerate(range(len(donor_acceptor_indices))):
|
|
277
|
+
if corr_matrix[i, j] > 0:
|
|
278
|
+
G.add_edge(donor_acceptor_indices[i], donor_acceptor_indices[j],
|
|
279
|
+
weight=1, # default weight of 1 for edge thickness
|
|
280
|
+
distance=corr_matrix[i, j]) # Store the actual distance
|
|
281
|
+
|
|
282
|
+
return corr_matrix, G, pairs
|
|
283
|
+
|
|
284
|
+
|
|
285
|
+
def visualize_hydrogen_bonds_matrix(corr_matrix, donor_acceptor_indices=None, frame_index=None, average=False, output_dir=None):
|
|
286
|
+
"""
|
|
287
|
+
Visualizes the hydrogen bond correlation matrix using Matplotlib.
|
|
288
|
+
|
|
289
|
+
Parameters
|
|
290
|
+
----------
|
|
291
|
+
corr_matrix : np.ndarray
|
|
292
|
+
Correlation matrix of hydrogen bonds
|
|
293
|
+
donor_acceptor_indices : np.ndarray, optional
|
|
294
|
+
Array of donor/acceptor atom indices
|
|
295
|
+
frame_index : int, optional
|
|
296
|
+
Frame index for title
|
|
297
|
+
average : bool, optional
|
|
298
|
+
Whether this is an average across frames
|
|
299
|
+
output_dir : str, optional
|
|
300
|
+
Directory to save output file
|
|
301
|
+
|
|
302
|
+
Returns
|
|
303
|
+
-------
|
|
304
|
+
None
|
|
305
|
+
"""
|
|
306
|
+
plt.figure(figsize=(10, 8))
|
|
307
|
+
|
|
308
|
+
if np.issubdtype(corr_matrix.dtype, np.integer):
|
|
309
|
+
fmt = "d" # Integer format
|
|
310
|
+
else:
|
|
311
|
+
fmt = ".2f"
|
|
312
|
+
|
|
313
|
+
sns.heatmap(corr_matrix, annot=True, fmt=fmt, cmap='viridis',
|
|
314
|
+
xticklabels=donor_acceptor_indices, yticklabels=donor_acceptor_indices)
|
|
315
|
+
|
|
316
|
+
if average:
|
|
317
|
+
plt.title("Average Hydrogen Bond Correlation Matrix Across All Frames")
|
|
318
|
+
filename = "hbond_correlation_matrix_average.png"
|
|
319
|
+
else:
|
|
320
|
+
plt.title(f"Hydrogen Bond Correlation Matrix for Frame {frame_index}")
|
|
321
|
+
filename = f"hbond_correlation_matrix_frame_{frame_index}.png"
|
|
322
|
+
|
|
323
|
+
plt.xlabel("Atom Index")
|
|
324
|
+
plt.ylabel("Atom Index")
|
|
325
|
+
|
|
326
|
+
if output_dir:
|
|
327
|
+
plt.savefig(os.path.join(output_dir, filename), bbox_inches='tight')
|
|
328
|
+
plt.show()
|
|
329
|
+
plt.close()
|
|
330
|
+
print(f"Correlation matrix saved as '{os.path.join(output_dir, filename)}'")
|
|
331
|
+
else:
|
|
332
|
+
plt.show()
|
|
333
|
+
|
|
334
|
+
|
|
335
|
+
def visualize_hydrogen_bonds_plotly(G, donor_acceptor_indices=None, frame_index=None, average=False, output_dir=None):
|
|
336
|
+
"""
|
|
337
|
+
Visualizes the hydrogen bond network using Plotly.
|
|
338
|
+
|
|
339
|
+
Parameters
|
|
340
|
+
----------
|
|
341
|
+
G : nx.Graph
|
|
342
|
+
NetworkX graph of hydrogen bonds
|
|
343
|
+
donor_acceptor_indices : np.ndarray, optional
|
|
344
|
+
Array of donor/acceptor atom indices
|
|
345
|
+
frame_index : int, optional
|
|
346
|
+
Frame index for title
|
|
347
|
+
average : bool, optional
|
|
348
|
+
Whether this is an average across frames
|
|
349
|
+
output_dir : str, optional
|
|
350
|
+
Directory to save output file
|
|
351
|
+
|
|
352
|
+
Returns
|
|
353
|
+
-------
|
|
354
|
+
None
|
|
355
|
+
"""
|
|
356
|
+
seed = 42
|
|
357
|
+
pos = nx.spring_layout(G, seed=seed)
|
|
358
|
+
|
|
359
|
+
node_size = [G.nodes[node].get('size', 20) for node in G.nodes()]
|
|
360
|
+
node_color = [G.degree(node) for node in G.nodes()]
|
|
361
|
+
edge_width = [G[u][v].get('weight', 1) * 0.5 for u, v in G.edges()]
|
|
362
|
+
|
|
363
|
+
edge_distances = []
|
|
364
|
+
for u, v in G.edges():
|
|
365
|
+
distance = G[u][v].get('distance', None)
|
|
366
|
+
if distance is not None and distance != "N/A":
|
|
367
|
+
edge_distances.append(f"{distance:.2f} Å")
|
|
368
|
+
else:
|
|
369
|
+
edge_distances.append("N/A")
|
|
370
|
+
|
|
371
|
+
x_nodes = [pos[node][0] for node in G.nodes()]
|
|
372
|
+
y_nodes = [pos[node][1] for node in G.nodes()]
|
|
373
|
+
|
|
374
|
+
edge_trace = []
|
|
375
|
+
for i, (u, v) in enumerate(G.edges()):
|
|
376
|
+
x0, y0 = pos[u]
|
|
377
|
+
x1, y1 = pos[v]
|
|
378
|
+
|
|
379
|
+
edge_info = f'Bond {u}-{v}: {edge_distances[i]}'
|
|
380
|
+
|
|
381
|
+
edge_trace.append(go.Scatter(
|
|
382
|
+
x=[x0, x1],
|
|
383
|
+
y=[y0, y1],
|
|
384
|
+
mode='lines',
|
|
385
|
+
line=dict(width=edge_width[i], color='Magenta'),
|
|
386
|
+
name=edge_info,
|
|
387
|
+
hoverinfo='text',
|
|
388
|
+
text=edge_info
|
|
389
|
+
))
|
|
390
|
+
|
|
391
|
+
node_trace = go.Scatter(
|
|
392
|
+
x=x_nodes, y=y_nodes, mode='markers',
|
|
393
|
+
marker=dict(
|
|
394
|
+
size=node_size,
|
|
395
|
+
color=node_color,
|
|
396
|
+
colorscale='Viridis',
|
|
397
|
+
colorbar=dict(
|
|
398
|
+
thickness=15,
|
|
399
|
+
title=dict(text='Node Connections', side='right'),
|
|
400
|
+
xanchor='left'
|
|
401
|
+
)
|
|
402
|
+
),
|
|
403
|
+
text=[str(node) for node in G.nodes()],
|
|
404
|
+
textposition='top center',
|
|
405
|
+
name='Donor/Acceptor Atoms'
|
|
406
|
+
)
|
|
407
|
+
|
|
408
|
+
title = "Average Hydrogen Bond Network Across All Frames" if average else f"Hydrogen Bond Network for Frame {frame_index}"
|
|
409
|
+
|
|
410
|
+
fig = go.Figure(
|
|
411
|
+
data=edge_trace + [node_trace],
|
|
412
|
+
layout=go.Layout(
|
|
413
|
+
title=dict(text=f'<br>{title}', font=dict(size=16)),
|
|
414
|
+
showlegend=False,
|
|
415
|
+
hovermode='closest',
|
|
416
|
+
margin=dict(b=20, l=5, r=5, t=40),
|
|
417
|
+
annotations=[dict(
|
|
418
|
+
text="Network graph visualization of hydrogen bonds",
|
|
419
|
+
showarrow=False,
|
|
420
|
+
xref="paper",
|
|
421
|
+
yref="paper",
|
|
422
|
+
x=0.005,
|
|
423
|
+
y=-0.002
|
|
424
|
+
)],
|
|
425
|
+
xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
|
|
426
|
+
yaxis=dict(showgrid=False, zeroline=False, showticklabels=False)
|
|
427
|
+
)
|
|
428
|
+
)
|
|
429
|
+
|
|
430
|
+
if output_dir:
|
|
431
|
+
filename = "hbond_network_average.html" if average else f"hbond_network_frame_{frame_index}.html"
|
|
432
|
+
fig.write_html(os.path.join(output_dir, filename))
|
|
433
|
+
print(f"Figure saved as '{os.path.join(output_dir, filename)}'")
|
|
434
|
+
else:
|
|
435
|
+
fig.show()
|
|
436
|
+
|
|
437
|
+
|
|
438
|
+
def visualize_hydrogen_bonds_matrix_plotly(corr_matrix, donor_acceptor_indices=None, frame_index=None, average=False, output_dir=None):
|
|
439
|
+
"""
|
|
440
|
+
Visualizes the hydrogen bond correlation matrix using Plotly.
|
|
441
|
+
|
|
442
|
+
Parameters
|
|
443
|
+
----------
|
|
444
|
+
corr_matrix : np.ndarray
|
|
445
|
+
Correlation matrix of hydrogen bonds
|
|
446
|
+
donor_acceptor_indices : np.ndarray, optional
|
|
447
|
+
Array of donor/acceptor atom indices
|
|
448
|
+
frame_index : int, optional
|
|
449
|
+
Frame index for title
|
|
450
|
+
average : bool, optional
|
|
451
|
+
Whether this is an average across frames
|
|
452
|
+
output_dir : str, optional
|
|
453
|
+
Directory to save output file
|
|
454
|
+
|
|
455
|
+
Returns
|
|
456
|
+
-------
|
|
457
|
+
None
|
|
458
|
+
"""
|
|
459
|
+
# Format hover text based on matrix values
|
|
460
|
+
hover_text = [[f"Donor: {donor_acceptor_indices[i]}<br>Acceptor: {donor_acceptor_indices[j]}<br>Value: {val:.2f}"
|
|
461
|
+
if isinstance(val, float) else f"Donor: {donor_acceptor_indices[i]}<br>Acceptor: {donor_acceptor_indices[j]}<br>Value: {val}"
|
|
462
|
+
for j, val in enumerate(row)] for i, row in enumerate(corr_matrix)]
|
|
463
|
+
|
|
464
|
+
fig = go.Figure(data=go.Heatmap(
|
|
465
|
+
z=corr_matrix,
|
|
466
|
+
x=[str(idx) for idx in donor_acceptor_indices],
|
|
467
|
+
y=[str(idx) for idx in donor_acceptor_indices],
|
|
468
|
+
colorscale='Viridis',
|
|
469
|
+
text=hover_text,
|
|
470
|
+
hoverinfo='text',
|
|
471
|
+
colorbar=dict(title='No. of H-Bond')
|
|
472
|
+
))
|
|
473
|
+
|
|
474
|
+
title = "Average Hydrogen Bond Correlation Matrix Across All Frames" if average else f"Hydrogen Bond Correlation Matrix for Frame {frame_index}"
|
|
475
|
+
|
|
476
|
+
fig.update_layout(
|
|
477
|
+
title=dict(text=title, font=dict(size=16)),
|
|
478
|
+
xaxis=dict(title='Atom Index', tickfont=dict(size=10)),
|
|
479
|
+
yaxis=dict(title='Atom Index', tickfont=dict(size=10)),
|
|
480
|
+
width=900,
|
|
481
|
+
height=800,
|
|
482
|
+
autosize=True
|
|
483
|
+
)
|
|
484
|
+
|
|
485
|
+
if output_dir:
|
|
486
|
+
filename = "hbond_correlation_matrix_average.html" if average else f"hbond_correlation_matrix_frame_{frame_index}.html"
|
|
487
|
+
fig.write_html(os.path.join(output_dir, filename))
|
|
488
|
+
print(f"Interactive correlation matrix saved as '{os.path.join(output_dir, filename)}'")
|
|
489
|
+
else:
|
|
490
|
+
fig.show()
|
|
491
|
+
|
|
492
|
+
|
|
493
|
+
def visualize_hydrogen_bonds(csv_file, indices_path, frame_index=None, average=False, output_dir=None):
|
|
494
|
+
"""
|
|
495
|
+
Visualizes hydrogen bonds from a CSV file containing donor-acceptor pairs.
|
|
496
|
+
|
|
497
|
+
Parameters
|
|
498
|
+
----------
|
|
499
|
+
csv_file : str
|
|
500
|
+
Path to CSV file with hydrogen bond data
|
|
501
|
+
indices_path : str
|
|
502
|
+
Path to NumPy array file with donor/acceptor indices
|
|
503
|
+
frame_index : int, optional
|
|
504
|
+
Frame index to visualize (required if average=False)
|
|
505
|
+
average : bool, optional
|
|
506
|
+
Whether to visualize average across all frames
|
|
507
|
+
output_dir : str, optional
|
|
508
|
+
Directory to save output files
|
|
509
|
+
|
|
510
|
+
Returns
|
|
511
|
+
-------
|
|
512
|
+
None
|
|
513
|
+
"""
|
|
514
|
+
data = pd.read_csv(csv_file)
|
|
515
|
+
donor_acceptor_indices = np.load(indices_path)
|
|
516
|
+
index_map = {idx: i for i, idx in enumerate(donor_acceptor_indices)}
|
|
517
|
+
N = len(donor_acceptor_indices)
|
|
518
|
+
|
|
519
|
+
if average:
|
|
520
|
+
corr_matrix, G, pairs = aggregate_data(data, index_map, N)
|
|
521
|
+
visualize_hydrogen_bonds_matrix_plotly(corr_matrix, donor_acceptor_indices=donor_acceptor_indices, average=True, output_dir=output_dir)
|
|
522
|
+
visualize_hydrogen_bonds_plotly(G, average=True, output_dir=output_dir)
|
|
523
|
+
else:
|
|
524
|
+
if frame_index is None:
|
|
525
|
+
raise ValueError("frame_index must be provided when average=False")
|
|
526
|
+
|
|
527
|
+
corr_matrix, G, pairs = process_frame(data, donor_acceptor_indices, frame_index, index_map, N)
|
|
528
|
+
visualize_hydrogen_bonds_matrix_plotly(corr_matrix, donor_acceptor_indices=donor_acceptor_indices, frame_index=frame_index, output_dir=output_dir)
|
|
529
|
+
visualize_hydrogen_bonds_plotly(G, frame_index=frame_index, output_dir=output_dir)
|
|
530
|
+
|
|
531
|
+
|
|
532
|
+
def hydrogen_bonds(traj_path, frame_skip=10, acceptor_atoms=["N","O","F"], angle_cutoff=120,
|
|
533
|
+
h_bond_cutoff=2.4, bond_cutoff=1.6, mic=True, single_h_bond=False,
|
|
534
|
+
output_dir="./", time_step=None, plot_count=False, plot_heatmap=False,
|
|
535
|
+
plot_graph_frame=True, plot_graph_average=False, indices_path=None,
|
|
536
|
+
graph_frame_index=0):
|
|
537
|
+
"""
|
|
538
|
+
Analyze hydrogen bonds in a molecular dynamics trajectory.
|
|
539
|
+
|
|
540
|
+
Parameters
|
|
541
|
+
----------
|
|
542
|
+
traj_path : str
|
|
543
|
+
Path to trajectory file
|
|
544
|
+
frame_skip : int, optional
|
|
545
|
+
Number of frames to skip (default: 10)
|
|
546
|
+
acceptor_atoms : List[str], optional
|
|
547
|
+
List of element symbols that can be acceptors (default: ["N","O","F"])
|
|
548
|
+
angle_cutoff : float, optional
|
|
549
|
+
Minimum angle in degrees for hydrogen bond (default: 120)
|
|
550
|
+
h_bond_cutoff : float, optional
|
|
551
|
+
Maximum distance in Å for hydrogen bond (default: 2.4)
|
|
552
|
+
bond_cutoff : float, optional
|
|
553
|
+
Maximum distance in Å for covalent bond (default: 1.6)
|
|
554
|
+
mic : bool, optional
|
|
555
|
+
Whether to use minimum image convention (default: True)
|
|
556
|
+
single_h_bond : bool, optional
|
|
557
|
+
Whether to count only first hydrogen bond per atom (default: False)
|
|
558
|
+
output_dir : str, optional
|
|
559
|
+
Directory to save output files (default: "./")
|
|
560
|
+
time_step : float, optional
|
|
561
|
+
Simulation time step for plotting (default: None)
|
|
562
|
+
plot_count : bool, optional
|
|
563
|
+
Whether to plot hydrogen bond count (default: False)
|
|
564
|
+
plot_heatmap : bool, optional
|
|
565
|
+
Whether to plot 2D histogram (default: False)
|
|
566
|
+
plot_graph_frame : bool, optional
|
|
567
|
+
Whether to plot interactive hydrogen bond network graph for specific frame (default: True)
|
|
568
|
+
plot_graph_average : bool, optional
|
|
569
|
+
Whether to plot interactive average hydrogen bond network graph (default: False)
|
|
570
|
+
indices_path : str, optional
|
|
571
|
+
Path to NumPy array with donor/acceptor atom indices for graph plotting.
|
|
572
|
+
If None, the unique indices will be automatically extracted and saved (default: None)
|
|
573
|
+
graph_frame_index : int, optional
|
|
574
|
+
Frame index to use for graph visualization (default: 0)
|
|
575
|
+
|
|
576
|
+
Returns
|
|
577
|
+
-------
|
|
578
|
+
List[int]
|
|
579
|
+
List of hydrogen bond counts per frame
|
|
580
|
+
"""
|
|
581
|
+
os.makedirs(output_dir, exist_ok=True)
|
|
582
|
+
|
|
583
|
+
output_filename = os.path.join(output_dir, f'hydrogen_bonds_{frame_skip}skips.csv')
|
|
584
|
+
total_bonds_filename = os.path.join(output_dir, f'total_hydrogen_bonds_per_frame_{frame_skip}skips.csv')
|
|
585
|
+
|
|
586
|
+
trajectory = read(traj_path, index=f"::{frame_skip}")
|
|
587
|
+
|
|
588
|
+
all_data = Parallel(n_jobs=-1)(
|
|
589
|
+
delayed(count_hydrogen_bonds)(
|
|
590
|
+
atoms, acceptor_atoms=acceptor_atoms, angle_cutoff=angle_cutoff, h_bond_cutoff=h_bond_cutoff,
|
|
591
|
+
bond_cutoff=bond_cutoff, mic=True, single_h_bond=single_h_bond
|
|
592
|
+
) for i, atoms in enumerate(trajectory)
|
|
593
|
+
)
|
|
594
|
+
|
|
595
|
+
h_bonds_per_frame = [num_bonds for _, num_bonds in all_data]
|
|
596
|
+
frame_dict_list = [frame_dict for frame_dict, _ in all_data]
|
|
597
|
+
data_dict = {i*frame_skip: d for i, d in enumerate(frame_dict_list)}
|
|
598
|
+
|
|
599
|
+
# Donor-acceptor distances for each frame
|
|
600
|
+
donor_acceptor_distances = {}
|
|
601
|
+
for frame_idx, frame in enumerate(list(data_dict.keys())):
|
|
602
|
+
frame_atoms = trajectory[frame_idx]
|
|
603
|
+
dm = frame_atoms.get_all_distances(mic=mic)
|
|
604
|
+
|
|
605
|
+
donor_acceptor_distances[frame] = {}
|
|
606
|
+
|
|
607
|
+
for hydrogen in data_dict[frame]:
|
|
608
|
+
if len(data_dict[frame][hydrogen]) > 1:
|
|
609
|
+
donor = data_dict[frame][hydrogen][0][0]
|
|
610
|
+
donor_acceptor_distances[frame][hydrogen] = {}
|
|
611
|
+
|
|
612
|
+
for sublist in data_dict[frame][hydrogen][1:]:
|
|
613
|
+
acceptor = sublist[0]
|
|
614
|
+
# Get direct donor-acceptor distance (e.g., O-O distance in water)
|
|
615
|
+
donor_acceptor_dist = dm[donor, acceptor]
|
|
616
|
+
donor_acceptor_distances[frame][hydrogen][acceptor] = donor_acceptor_dist
|
|
617
|
+
|
|
618
|
+
with open(output_filename, 'w', newline='') as csvfile:
|
|
619
|
+
fieldnames = [
|
|
620
|
+
'Frame', 'Hydrogen', 'Donor', 'Acceptor(s)',
|
|
621
|
+
'Donor-Hydrogen Distance', 'Hydrogen-Acceptor(s) Distance(s)',
|
|
622
|
+
'Donor-Acceptor(s) Distance(s)', 'Angle(A-H-D)'
|
|
623
|
+
]
|
|
624
|
+
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
|
|
625
|
+
writer.writeheader()
|
|
626
|
+
for frame in list(data_dict.keys()):
|
|
627
|
+
for hydrogen in data_dict[frame]:
|
|
628
|
+
donor = data_dict[frame][hydrogen][0][0] if len(data_dict[frame][hydrogen]) > 0 else ""
|
|
629
|
+
donor_hydrogen_dist = data_dict[frame][hydrogen][0][1] if len(data_dict[frame][hydrogen]) > 0 else ""
|
|
630
|
+
|
|
631
|
+
if len(data_dict[frame][hydrogen]) > 1:
|
|
632
|
+
acceptors = [sublist[0] for sublist in data_dict[frame][hydrogen][1:]]
|
|
633
|
+
acceptors_hydrogen_dist = [sublist[1] for sublist in data_dict[frame][hydrogen][1:]]
|
|
634
|
+
angles = [sublist[2] for sublist in data_dict[frame][hydrogen][1:]]
|
|
635
|
+
|
|
636
|
+
# Get donor-acceptor distances
|
|
637
|
+
donor_acceptor_dist = [donor_acceptor_distances[frame][hydrogen].get(acc, "N/A") for acc in acceptors]
|
|
638
|
+
else:
|
|
639
|
+
acceptors = ""
|
|
640
|
+
acceptors_hydrogen_dist = ""
|
|
641
|
+
donor_acceptor_dist = ""
|
|
642
|
+
angles = ""
|
|
643
|
+
|
|
644
|
+
row = {
|
|
645
|
+
'Frame': frame,
|
|
646
|
+
'Hydrogen': hydrogen,
|
|
647
|
+
'Donor': donor,
|
|
648
|
+
'Acceptor(s)': acceptors,
|
|
649
|
+
'Donor-Hydrogen Distance': donor_hydrogen_dist,
|
|
650
|
+
'Hydrogen-Acceptor(s) Distance(s)': acceptors_hydrogen_dist,
|
|
651
|
+
'Donor-Acceptor(s) Distance(s)': donor_acceptor_dist,
|
|
652
|
+
'Angle(A-H-D)': angles,
|
|
653
|
+
}
|
|
654
|
+
|
|
655
|
+
writer.writerow(row)
|
|
656
|
+
|
|
657
|
+
|
|
658
|
+
with open(total_bonds_filename, 'w', newline='') as f:
|
|
659
|
+
writer = csv.writer(f)
|
|
660
|
+
writer.writerow(['Frame', 'Total Hydrogen Bonds'])
|
|
661
|
+
for frame_idx, num_bonds in enumerate(h_bonds_per_frame):
|
|
662
|
+
writer.writerow([frame_idx * frame_skip, num_bonds])
|
|
663
|
+
|
|
664
|
+
if time_step is not None and plot_count:
|
|
665
|
+
plot_hydrogen_count(h_bonds_per_frame, frame_skip, time_step, output_dir)
|
|
666
|
+
|
|
667
|
+
if plot_heatmap:
|
|
668
|
+
plot_2Dheatmap(data_dict, output_dir)
|
|
669
|
+
|
|
670
|
+
if plot_graph_frame or plot_graph_average:
|
|
671
|
+
network_csv = os.path.join(output_dir, "hydrogen_bonds_network.csv")
|
|
672
|
+
|
|
673
|
+
unique_atoms = set()
|
|
674
|
+
|
|
675
|
+
with open(network_csv, 'w', newline='') as csvfile:
|
|
676
|
+
writer = csv.writer(csvfile)
|
|
677
|
+
writer.writerow(['Frame', 'Donor', 'Acceptor', 'Distance'])
|
|
678
|
+
for frame in list(data_dict.keys()):
|
|
679
|
+
for hydrogen in data_dict[frame]:
|
|
680
|
+
if len(data_dict[frame][hydrogen]) > 1:
|
|
681
|
+
donor = data_dict[frame][hydrogen][0][0]
|
|
682
|
+
unique_atoms.add(donor) # Added donor to unique atoms
|
|
683
|
+
|
|
684
|
+
for i, sublist in enumerate(data_dict[frame][hydrogen][1:]):
|
|
685
|
+
acceptor = sublist[0]
|
|
686
|
+
unique_atoms.add(acceptor) # Added acceptor to unique atoms
|
|
687
|
+
|
|
688
|
+
# Include the donor-acceptor distance in the network file
|
|
689
|
+
distance = donor_acceptor_distances[frame][hydrogen].get(acceptor, "N/A")
|
|
690
|
+
writer.writerow([frame, donor, acceptor, distance])
|
|
691
|
+
|
|
692
|
+
if indices_path is None:
|
|
693
|
+
indices_path = os.path.join(output_dir, "donor_acceptor_indices.npy")
|
|
694
|
+
unique_atoms_array = np.array(sorted(list(unique_atoms)), dtype=int)
|
|
695
|
+
np.save(indices_path, unique_atoms_array)
|
|
696
|
+
print(f"Generated and saved {len(unique_atoms_array)} unique donor/acceptor atom indices to {indices_path}")
|
|
697
|
+
|
|
698
|
+
if plot_graph_frame:
|
|
699
|
+
print(f"Generating hydrogen bond network visualizations for frame {graph_frame_index}...")
|
|
700
|
+
visualize_hydrogen_bonds(network_csv, indices_path,
|
|
701
|
+
frame_index=graph_frame_index, average=False,
|
|
702
|
+
output_dir=output_dir)
|
|
703
|
+
|
|
704
|
+
if plot_graph_average:
|
|
705
|
+
print("Generating average hydrogen bond network visualization...")
|
|
706
|
+
visualize_hydrogen_bonds(network_csv, indices_path,
|
|
707
|
+
average=True, output_dir=output_dir)
|
|
708
|
+
|
|
709
|
+
return h_bonds_per_frame
|
|
710
|
+
|
|
711
|
+
|
|
712
|
+
def plot_hydrogen_count(h_bonds_per_frame, frame_skip, time_step, output_dir):
|
|
713
|
+
"""
|
|
714
|
+
Plot hydrogen bond count over time.
|
|
715
|
+
|
|
716
|
+
Parameters
|
|
717
|
+
----------
|
|
718
|
+
h_bonds_per_frame : List[int]
|
|
719
|
+
Number of hydrogen bonds per frame
|
|
720
|
+
frame_skip : int
|
|
721
|
+
Number of frames skipped in trajectory analysis
|
|
722
|
+
time_step : float
|
|
723
|
+
Time step between frames in fs
|
|
724
|
+
output_dir : str
|
|
725
|
+
Directory to save output file
|
|
726
|
+
|
|
727
|
+
Returns
|
|
728
|
+
-------
|
|
729
|
+
None
|
|
730
|
+
"""
|
|
731
|
+
x = np.arange(len(h_bonds_per_frame)) * time_step * frame_skip / 1000
|
|
732
|
+
|
|
733
|
+
fig, ax1 = plt.subplots(figsize=(8, 6))
|
|
734
|
+
ax1.plot(x, h_bonds_per_frame, '-', color="blue", label="H-bond count")
|
|
735
|
+
ax1.axhline(np.mean(h_bonds_per_frame), linestyle="--", color="blue", label=f"Mean: {np.mean(h_bonds_per_frame):.2f}")
|
|
736
|
+
ax1.set_xlabel("Time [ps]", fontsize=12)
|
|
737
|
+
ax1.set_ylabel("Count", fontsize=12)
|
|
738
|
+
plt.title("Hydrogen bond count", fontsize=14)
|
|
739
|
+
fig.legend(loc="center right", bbox_to_anchor=(1.1, 0.5))
|
|
740
|
+
filename = os.path.join(output_dir, "h_bond_count.png")
|
|
741
|
+
plt.savefig(filename, bbox_inches="tight")
|
|
742
|
+
plt.show()
|
|
743
|
+
plt.close()
|
|
744
|
+
print(f"Hydrogen bond count plot saved to '{filename}'")
|
|
745
|
+
|
|
746
|
+
|
|
747
|
+
def plot_2Dheatmap(data_dict, output_dir):
|
|
748
|
+
angles_list = []
|
|
749
|
+
dist_list = []
|
|
750
|
+
|
|
751
|
+
for frame in list(data_dict.keys()):
|
|
752
|
+
for hydrogen in data_dict[frame]:
|
|
753
|
+
if len(data_dict[frame][hydrogen]) > 1:
|
|
754
|
+
dist_list.append([sublist[1] for sublist in data_dict[frame][hydrogen][1:]])
|
|
755
|
+
angles_list.append([sublist[2] for sublist in data_dict[frame][hydrogen][1:]])
|
|
756
|
+
|
|
757
|
+
angles_list = list(itertools.chain(*angles_list))
|
|
758
|
+
dist_list = list(itertools.chain(*dist_list))
|
|
759
|
+
|
|
760
|
+
hb = plt.hist2d(angles_list, dist_list, bins=30, cmap="viridis")
|
|
761
|
+
|
|
762
|
+
plt.colorbar(hb[3], label="Count")
|
|
763
|
+
|
|
764
|
+
plt.xlabel("Donor-Hydrogen-Acceptor Angle [°]",fontsize=12)
|
|
765
|
+
plt.ylabel("Acceptor-Hydrogen Distance [Å]",fontsize=12)
|
|
766
|
+
plt.title("2D Histogram of H-bonds parameters",fontsize=14)
|
|
767
|
+
|
|
768
|
+
filename = os.path.join(output_dir, "h_bond_structure.png")
|
|
769
|
+
plt.savefig(filename, bbox_inches="tight")
|
|
770
|
+
plt.show()
|
|
771
|
+
plt.close()
|
|
772
|
+
print(f"H-bond structure 2D histogram saved to '{filename}'")
|