crisp-ase 1.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. CRISP/__init__.py +99 -0
  2. CRISP/_version.py +1 -0
  3. CRISP/cli.py +41 -0
  4. CRISP/data_analysis/__init__.py +38 -0
  5. CRISP/data_analysis/clustering.py +838 -0
  6. CRISP/data_analysis/contact_coordination.py +915 -0
  7. CRISP/data_analysis/h_bond.py +772 -0
  8. CRISP/data_analysis/msd.py +1199 -0
  9. CRISP/data_analysis/prdf.py +404 -0
  10. CRISP/data_analysis/volumetric_atomic_density.py +527 -0
  11. CRISP/py.typed +1 -0
  12. CRISP/simulation_utility/__init__.py +31 -0
  13. CRISP/simulation_utility/atomic_indices.py +155 -0
  14. CRISP/simulation_utility/atomic_traj_linemap.py +278 -0
  15. CRISP/simulation_utility/error_analysis.py +254 -0
  16. CRISP/simulation_utility/interatomic_distances.py +200 -0
  17. CRISP/simulation_utility/subsampling.py +241 -0
  18. CRISP/tests/DataAnalysis/__init__.py +1 -0
  19. CRISP/tests/DataAnalysis/test_clustering_extended.py +212 -0
  20. CRISP/tests/DataAnalysis/test_contact_coordination.py +184 -0
  21. CRISP/tests/DataAnalysis/test_contact_coordination_extended.py +465 -0
  22. CRISP/tests/DataAnalysis/test_h_bond_complete.py +326 -0
  23. CRISP/tests/DataAnalysis/test_h_bond_extended.py +322 -0
  24. CRISP/tests/DataAnalysis/test_msd_complete.py +305 -0
  25. CRISP/tests/DataAnalysis/test_msd_extended.py +522 -0
  26. CRISP/tests/DataAnalysis/test_prdf.py +206 -0
  27. CRISP/tests/DataAnalysis/test_volumetric_atomic_density.py +463 -0
  28. CRISP/tests/SimulationUtility/__init__.py +1 -0
  29. CRISP/tests/SimulationUtility/test_atomic_traj_linemap.py +101 -0
  30. CRISP/tests/SimulationUtility/test_atomic_traj_linemap_extended.py +469 -0
  31. CRISP/tests/SimulationUtility/test_error_analysis_extended.py +151 -0
  32. CRISP/tests/SimulationUtility/test_interatomic_distances.py +223 -0
  33. CRISP/tests/SimulationUtility/test_subsampling.py +365 -0
  34. CRISP/tests/__init__.py +1 -0
  35. CRISP/tests/test_CRISP.py +28 -0
  36. CRISP/tests/test_cli.py +87 -0
  37. CRISP/tests/test_crisp_comprehensive.py +679 -0
  38. crisp_ase-1.1.2.dist-info/METADATA +141 -0
  39. crisp_ase-1.1.2.dist-info/RECORD +42 -0
  40. crisp_ase-1.1.2.dist-info/WHEEL +5 -0
  41. crisp_ase-1.1.2.dist-info/entry_points.txt +2 -0
  42. crisp_ase-1.1.2.dist-info/top_level.txt +1 -0
@@ -0,0 +1,772 @@
1
+ """
2
+ CRISP/data_analysis/h_bond.py
3
+
4
+ This script performs hydrogen bond analysis on molecular dynamics trajectory data.
5
+ """
6
+ from ase.io import read
7
+ import numpy as np
8
+ import csv
9
+ from joblib import Parallel, delayed
10
+ import argparse
11
+ import os
12
+ from typing import Union, List, Optional, Tuple, Any, Dict
13
+ import matplotlib.pyplot as plt
14
+ from ase.data import vdw_radii, atomic_numbers, chemical_symbols
15
+ import seaborn as sns
16
+ import itertools
17
+ import pandas as pd
18
+ import networkx as nx
19
+ import plotly.graph_objects as go
20
+ import plotly.io as pio
21
+
22
+ pio.renderers.default = 'svg'
23
+ pio.renderers.default = 'notebook'
24
+
25
+ __all__ = ['indices', 'count_hydrogen_bonds', 'aggregate_data', 'hydrogen_bonds']
26
+
27
+
28
+ def indices(atoms, ind: Union[str, List[Union[int, str]]]) -> np.ndarray:
29
+ """
30
+ Extract atom indices from an ASE Atoms object based on the input specifier.
31
+
32
+ Parameters
33
+ ----------
34
+ atoms : ase.Atoms
35
+ ASE Atoms object containing atomic structure
36
+ ind : Union[str, List[Union[int, str]]]
37
+ Index specifier, can be:
38
+ - "all" or None: all atoms
39
+ - string ending with ".npy": load indices from NumPy file
40
+ - integer or list of integers: direct atom indices
41
+ - string or list of strings: chemical symbols to select
42
+
43
+ Returns
44
+ -------
45
+ np.ndarray
46
+ Array of selected indices
47
+
48
+ Raises
49
+ ------
50
+ ValueError
51
+ If the index type is invalid
52
+ """
53
+ if ind == "all" or ind is None:
54
+ return np.arange(len(atoms))
55
+
56
+ if isinstance(ind, str) and ind.endswith(".npy"):
57
+ return np.load(ind, allow_pickle=True)
58
+
59
+ if not isinstance(ind, list):
60
+ ind = [ind]
61
+
62
+ if any(isinstance(item, int) for item in ind):
63
+ return np.array(ind)
64
+
65
+ if any(isinstance(item, str) for item in ind):
66
+ idx = []
67
+ for symbol in ind:
68
+ idx.append(np.where(np.array(atoms.get_chemical_symbols()) == symbol)[0])
69
+ return np.concatenate(idx)
70
+
71
+ raise ValueError("Invalid index type")
72
+
73
+
74
+ def count_hydrogen_bonds(
75
+ atoms,
76
+ acceptor_atoms: List[str] = None,
77
+ angle_cutoff: float = 120,
78
+ h_bond_cutoff: float = 2.4,
79
+ bond_cutoff: float = 1.6,
80
+ mic: bool = True,
81
+ single_h_bond: bool = False
82
+ ) -> Tuple[Dict[int, List], int]:
83
+ """Count hydrogen bonds in an atomic structure.
84
+
85
+ Parameters
86
+ ----------
87
+ atoms : ase.Atoms
88
+ ASE Atoms object containing atomic structure
89
+ acceptor_atoms : List[str], optional
90
+ List of acceptor atom symbols (default: ["N", "O", "F"])
91
+ angle_cutoff : float, optional
92
+ Minimum angle in degrees for hydrogen bond (default: 120)
93
+ h_bond_cutoff : float, optional
94
+ Maximum distance for hydrogen bond in Angstroms (default: 2.4)
95
+ bond_cutoff : float, optional
96
+ Maximum distance for covalent bond in Angstroms (default: 1.6)
97
+ mic : bool, optional
98
+ Use minimum image convention (default: True)
99
+ single_h_bond : bool, optional
100
+ Count only atoms with single hydrogen bonds (default: False)
101
+
102
+ Returns
103
+ -------
104
+ Tuple[Dict[int, List], int]
105
+ Dictionary mapping hydrogen indices to acceptor bonds and count of H-bonds
106
+ """
107
+ if acceptor_atoms is None:
108
+ acceptor_atoms = ["N", "O", "F"]
109
+
110
+ indices_hydrogen = indices(atoms, "H")
111
+ indices_acceptor = indices(atoms, acceptor_atoms)
112
+
113
+ dm = atoms.get_all_distances(mic=mic)
114
+ np.fill_diagonal(dm, np.inf)
115
+
116
+ sub_dm = dm[indices_hydrogen, :][:, indices_acceptor]
117
+
118
+ hb_hyd = indices_hydrogen[np.where(sub_dm < h_bond_cutoff)[0]]
119
+ hb_acc = indices_acceptor[np.where(sub_dm < h_bond_cutoff)[1]]
120
+
121
+ distances = sub_dm[np.where(sub_dm < h_bond_cutoff)]
122
+
123
+ hydrogen_dict = {}
124
+
125
+ for hydrogen, acceptor, distance in zip(hb_hyd, hb_acc, distances):
126
+ if hydrogen not in hydrogen_dict:
127
+ hydrogen_dict[hydrogen] = []
128
+ hydrogen_dict[hydrogen].append([acceptor, distance])
129
+
130
+ hydrogen_dict = {hydrogen: sorted(acceptors, key=lambda x: x[1]) for hydrogen, acceptors in hydrogen_dict.items()}
131
+
132
+ for hydrogen, bonds in hydrogen_dict.items():
133
+ if len(bonds) > 0 and bonds[0][1] < bond_cutoff:
134
+ filtered_bonds = [bonds[0]]
135
+ for acceptor_h_bond in bonds[1:]:
136
+ angle = atoms.get_angle(bonds[0][0], hydrogen, acceptor_h_bond[0], mic=mic)
137
+ if angle >= angle_cutoff:
138
+ acceptor_h_bond.append(angle)
139
+ filtered_bonds.append(acceptor_h_bond)
140
+ hydrogen_dict[hydrogen] = filtered_bonds
141
+ else:
142
+ hydrogen_dict[hydrogen] = []
143
+
144
+ for idx in indices_hydrogen:
145
+ if idx not in hydrogen_dict:
146
+ hydrogen_dict[idx] = []
147
+
148
+ if single_h_bond:
149
+ num_hydrogen_bonds = sum(1 for bonds in hydrogen_dict.values() if len(bonds) > 1)
150
+ else:
151
+ num_hydrogen_bonds = sum(len(bonds[1:]) for bonds in hydrogen_dict.values())
152
+
153
+ return hydrogen_dict, num_hydrogen_bonds
154
+
155
+
156
+ def aggregate_data(
157
+ data: List[Dict[int, List]],
158
+ index_map: Dict[int, int],
159
+ N: int
160
+ ) -> np.ndarray:
161
+ """Aggregate hydrogen bond data across multiple frames.
162
+
163
+ Parameters
164
+ ----------
165
+ data : List[Dict[int, List]]
166
+ List of hydrogen bond dictionaries from each frame
167
+ index_map : Dict[int, int]
168
+ Mapping from global to local indices
169
+ N : int
170
+ Number of unique atoms
171
+
172
+ Returns
173
+ -------
174
+ np.ndarray
175
+ Aggregated hydrogen bond count array
176
+ """
177
+ """
178
+ Aggregates hydrogen bond data to create correlation matrix and network graph.
179
+
180
+ Parameters
181
+ ----------
182
+ data : pandas.DataFrame
183
+ DataFrame containing hydrogen bond data
184
+ index_map : dict
185
+ Mapping from atom indices to array indices
186
+ N : int
187
+ Number of atoms to include in correlation matrix
188
+
189
+ Returns
190
+ -------
191
+ Tuple[np.ndarray, nx.Graph, List]
192
+ Correlation matrix, NetworkX graph, and list of all pairs
193
+ """
194
+ # Aggregates hydrogen bond data
195
+ node_frequency = {node: 0 for node in index_map.keys()}
196
+ edge_weight = {}
197
+ all_pairs = []
198
+
199
+ for frame, group in data.groupby('Frame'):
200
+ pairs = group[['Donor', 'Acceptor']].values
201
+ all_pairs.extend(pairs)
202
+
203
+ for donor, acceptor in pairs:
204
+ if donor in index_map and acceptor in index_map:
205
+ node_frequency[donor] += 1
206
+ node_frequency[acceptor] += 1
207
+ edge = tuple(sorted([donor, acceptor]))
208
+ edge_weight[edge] = edge_weight.get(edge, 0) + 1
209
+
210
+ G = nx.Graph()
211
+
212
+ for node, freq in node_frequency.items():
213
+ G.add_node(node, size=freq)
214
+
215
+ for (donor, acceptor), weight in edge_weight.items():
216
+ G.add_edge(donor, acceptor, weight=weight)
217
+
218
+ corr_matrix = np.zeros((N, N), dtype=int)
219
+ for (donor, acceptor), weight in edge_weight.items():
220
+ donor_idx = index_map[donor]
221
+ acceptor_idx = index_map[acceptor]
222
+ corr_matrix[donor_idx, acceptor_idx] = weight
223
+ corr_matrix[acceptor_idx, donor_idx] = weight
224
+
225
+ return corr_matrix, G, all_pairs
226
+
227
+
228
+ def process_frame(data, donor_acceptor_indices, frame_index, index_map, N):
229
+ """
230
+ Processes data for a specific frame to create correlation matrix and network graph.
231
+
232
+ Parameters
233
+ ----------
234
+ data : pandas.DataFrame
235
+ DataFrame containing hydrogen bond data
236
+ donor_acceptor_indices : np.ndarray
237
+ Array of donor/acceptor atom indices
238
+ frame_index : int
239
+ Frame index to process
240
+ index_map : dict
241
+ Mapping from atom indices to array indices
242
+ N : int
243
+ Number of atoms to include in correlation matrix
244
+
245
+ Returns
246
+ -------
247
+ Tuple[np.ndarray, nx.Graph, List]
248
+ Correlation matrix, NetworkX graph, and list of all pairs
249
+ """
250
+ frame_data = data[data['Frame'] == frame_index]
251
+
252
+ has_distance = 'Distance' in frame_data.columns
253
+
254
+ pairs = frame_data[['Donor', 'Acceptor']].values
255
+ distances = frame_data['Distance'].values if has_distance else [1.0] * len(pairs)
256
+
257
+ corr_matrix = np.zeros((N, N), dtype=float)
258
+
259
+ for i, (donor, acceptor) in enumerate(pairs):
260
+ if donor in index_map and acceptor in index_map:
261
+ donor_idx = index_map[donor]
262
+ acceptor_idx = index_map[acceptor]
263
+ distance = float(distances[i]) if has_distance and distances[i] != "N/A" else 0
264
+
265
+ # Stores the distance in the correlation matrix
266
+ if corr_matrix[donor_idx, acceptor_idx] == 0:
267
+ corr_matrix[donor_idx, acceptor_idx] = distance
268
+ corr_matrix[acceptor_idx, donor_idx] = distance
269
+ else:
270
+ # If multiple bonds exist, uses the average distance
271
+ corr_matrix[donor_idx, acceptor_idx] = (corr_matrix[donor_idx, acceptor_idx] + distance) / 2
272
+ corr_matrix[acceptor_idx, donor_idx] = (corr_matrix[acceptor_idx, donor_idx] + distance) / 2
273
+
274
+ G = nx.Graph()
275
+ for i, donor_idx in enumerate(range(len(donor_acceptor_indices))):
276
+ for j, acceptor_idx in enumerate(range(len(donor_acceptor_indices))):
277
+ if corr_matrix[i, j] > 0:
278
+ G.add_edge(donor_acceptor_indices[i], donor_acceptor_indices[j],
279
+ weight=1, # default weight of 1 for edge thickness
280
+ distance=corr_matrix[i, j]) # Store the actual distance
281
+
282
+ return corr_matrix, G, pairs
283
+
284
+
285
+ def visualize_hydrogen_bonds_matrix(corr_matrix, donor_acceptor_indices=None, frame_index=None, average=False, output_dir=None):
286
+ """
287
+ Visualizes the hydrogen bond correlation matrix using Matplotlib.
288
+
289
+ Parameters
290
+ ----------
291
+ corr_matrix : np.ndarray
292
+ Correlation matrix of hydrogen bonds
293
+ donor_acceptor_indices : np.ndarray, optional
294
+ Array of donor/acceptor atom indices
295
+ frame_index : int, optional
296
+ Frame index for title
297
+ average : bool, optional
298
+ Whether this is an average across frames
299
+ output_dir : str, optional
300
+ Directory to save output file
301
+
302
+ Returns
303
+ -------
304
+ None
305
+ """
306
+ plt.figure(figsize=(10, 8))
307
+
308
+ if np.issubdtype(corr_matrix.dtype, np.integer):
309
+ fmt = "d" # Integer format
310
+ else:
311
+ fmt = ".2f"
312
+
313
+ sns.heatmap(corr_matrix, annot=True, fmt=fmt, cmap='viridis',
314
+ xticklabels=donor_acceptor_indices, yticklabels=donor_acceptor_indices)
315
+
316
+ if average:
317
+ plt.title("Average Hydrogen Bond Correlation Matrix Across All Frames")
318
+ filename = "hbond_correlation_matrix_average.png"
319
+ else:
320
+ plt.title(f"Hydrogen Bond Correlation Matrix for Frame {frame_index}")
321
+ filename = f"hbond_correlation_matrix_frame_{frame_index}.png"
322
+
323
+ plt.xlabel("Atom Index")
324
+ plt.ylabel("Atom Index")
325
+
326
+ if output_dir:
327
+ plt.savefig(os.path.join(output_dir, filename), bbox_inches='tight')
328
+ plt.show()
329
+ plt.close()
330
+ print(f"Correlation matrix saved as '{os.path.join(output_dir, filename)}'")
331
+ else:
332
+ plt.show()
333
+
334
+
335
+ def visualize_hydrogen_bonds_plotly(G, donor_acceptor_indices=None, frame_index=None, average=False, output_dir=None):
336
+ """
337
+ Visualizes the hydrogen bond network using Plotly.
338
+
339
+ Parameters
340
+ ----------
341
+ G : nx.Graph
342
+ NetworkX graph of hydrogen bonds
343
+ donor_acceptor_indices : np.ndarray, optional
344
+ Array of donor/acceptor atom indices
345
+ frame_index : int, optional
346
+ Frame index for title
347
+ average : bool, optional
348
+ Whether this is an average across frames
349
+ output_dir : str, optional
350
+ Directory to save output file
351
+
352
+ Returns
353
+ -------
354
+ None
355
+ """
356
+ seed = 42
357
+ pos = nx.spring_layout(G, seed=seed)
358
+
359
+ node_size = [G.nodes[node].get('size', 20) for node in G.nodes()]
360
+ node_color = [G.degree(node) for node in G.nodes()]
361
+ edge_width = [G[u][v].get('weight', 1) * 0.5 for u, v in G.edges()]
362
+
363
+ edge_distances = []
364
+ for u, v in G.edges():
365
+ distance = G[u][v].get('distance', None)
366
+ if distance is not None and distance != "N/A":
367
+ edge_distances.append(f"{distance:.2f} Å")
368
+ else:
369
+ edge_distances.append("N/A")
370
+
371
+ x_nodes = [pos[node][0] for node in G.nodes()]
372
+ y_nodes = [pos[node][1] for node in G.nodes()]
373
+
374
+ edge_trace = []
375
+ for i, (u, v) in enumerate(G.edges()):
376
+ x0, y0 = pos[u]
377
+ x1, y1 = pos[v]
378
+
379
+ edge_info = f'Bond {u}-{v}: {edge_distances[i]}'
380
+
381
+ edge_trace.append(go.Scatter(
382
+ x=[x0, x1],
383
+ y=[y0, y1],
384
+ mode='lines',
385
+ line=dict(width=edge_width[i], color='Magenta'),
386
+ name=edge_info,
387
+ hoverinfo='text',
388
+ text=edge_info
389
+ ))
390
+
391
+ node_trace = go.Scatter(
392
+ x=x_nodes, y=y_nodes, mode='markers',
393
+ marker=dict(
394
+ size=node_size,
395
+ color=node_color,
396
+ colorscale='Viridis',
397
+ colorbar=dict(
398
+ thickness=15,
399
+ title=dict(text='Node Connections', side='right'),
400
+ xanchor='left'
401
+ )
402
+ ),
403
+ text=[str(node) for node in G.nodes()],
404
+ textposition='top center',
405
+ name='Donor/Acceptor Atoms'
406
+ )
407
+
408
+ title = "Average Hydrogen Bond Network Across All Frames" if average else f"Hydrogen Bond Network for Frame {frame_index}"
409
+
410
+ fig = go.Figure(
411
+ data=edge_trace + [node_trace],
412
+ layout=go.Layout(
413
+ title=dict(text=f'<br>{title}', font=dict(size=16)),
414
+ showlegend=False,
415
+ hovermode='closest',
416
+ margin=dict(b=20, l=5, r=5, t=40),
417
+ annotations=[dict(
418
+ text="Network graph visualization of hydrogen bonds",
419
+ showarrow=False,
420
+ xref="paper",
421
+ yref="paper",
422
+ x=0.005,
423
+ y=-0.002
424
+ )],
425
+ xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
426
+ yaxis=dict(showgrid=False, zeroline=False, showticklabels=False)
427
+ )
428
+ )
429
+
430
+ if output_dir:
431
+ filename = "hbond_network_average.html" if average else f"hbond_network_frame_{frame_index}.html"
432
+ fig.write_html(os.path.join(output_dir, filename))
433
+ print(f"Figure saved as '{os.path.join(output_dir, filename)}'")
434
+ else:
435
+ fig.show()
436
+
437
+
438
+ def visualize_hydrogen_bonds_matrix_plotly(corr_matrix, donor_acceptor_indices=None, frame_index=None, average=False, output_dir=None):
439
+ """
440
+ Visualizes the hydrogen bond correlation matrix using Plotly.
441
+
442
+ Parameters
443
+ ----------
444
+ corr_matrix : np.ndarray
445
+ Correlation matrix of hydrogen bonds
446
+ donor_acceptor_indices : np.ndarray, optional
447
+ Array of donor/acceptor atom indices
448
+ frame_index : int, optional
449
+ Frame index for title
450
+ average : bool, optional
451
+ Whether this is an average across frames
452
+ output_dir : str, optional
453
+ Directory to save output file
454
+
455
+ Returns
456
+ -------
457
+ None
458
+ """
459
+ # Format hover text based on matrix values
460
+ hover_text = [[f"Donor: {donor_acceptor_indices[i]}<br>Acceptor: {donor_acceptor_indices[j]}<br>Value: {val:.2f}"
461
+ if isinstance(val, float) else f"Donor: {donor_acceptor_indices[i]}<br>Acceptor: {donor_acceptor_indices[j]}<br>Value: {val}"
462
+ for j, val in enumerate(row)] for i, row in enumerate(corr_matrix)]
463
+
464
+ fig = go.Figure(data=go.Heatmap(
465
+ z=corr_matrix,
466
+ x=[str(idx) for idx in donor_acceptor_indices],
467
+ y=[str(idx) for idx in donor_acceptor_indices],
468
+ colorscale='Viridis',
469
+ text=hover_text,
470
+ hoverinfo='text',
471
+ colorbar=dict(title='No. of H-Bond')
472
+ ))
473
+
474
+ title = "Average Hydrogen Bond Correlation Matrix Across All Frames" if average else f"Hydrogen Bond Correlation Matrix for Frame {frame_index}"
475
+
476
+ fig.update_layout(
477
+ title=dict(text=title, font=dict(size=16)),
478
+ xaxis=dict(title='Atom Index', tickfont=dict(size=10)),
479
+ yaxis=dict(title='Atom Index', tickfont=dict(size=10)),
480
+ width=900,
481
+ height=800,
482
+ autosize=True
483
+ )
484
+
485
+ if output_dir:
486
+ filename = "hbond_correlation_matrix_average.html" if average else f"hbond_correlation_matrix_frame_{frame_index}.html"
487
+ fig.write_html(os.path.join(output_dir, filename))
488
+ print(f"Interactive correlation matrix saved as '{os.path.join(output_dir, filename)}'")
489
+ else:
490
+ fig.show()
491
+
492
+
493
+ def visualize_hydrogen_bonds(csv_file, indices_path, frame_index=None, average=False, output_dir=None):
494
+ """
495
+ Visualizes hydrogen bonds from a CSV file containing donor-acceptor pairs.
496
+
497
+ Parameters
498
+ ----------
499
+ csv_file : str
500
+ Path to CSV file with hydrogen bond data
501
+ indices_path : str
502
+ Path to NumPy array file with donor/acceptor indices
503
+ frame_index : int, optional
504
+ Frame index to visualize (required if average=False)
505
+ average : bool, optional
506
+ Whether to visualize average across all frames
507
+ output_dir : str, optional
508
+ Directory to save output files
509
+
510
+ Returns
511
+ -------
512
+ None
513
+ """
514
+ data = pd.read_csv(csv_file)
515
+ donor_acceptor_indices = np.load(indices_path)
516
+ index_map = {idx: i for i, idx in enumerate(donor_acceptor_indices)}
517
+ N = len(donor_acceptor_indices)
518
+
519
+ if average:
520
+ corr_matrix, G, pairs = aggregate_data(data, index_map, N)
521
+ visualize_hydrogen_bonds_matrix_plotly(corr_matrix, donor_acceptor_indices=donor_acceptor_indices, average=True, output_dir=output_dir)
522
+ visualize_hydrogen_bonds_plotly(G, average=True, output_dir=output_dir)
523
+ else:
524
+ if frame_index is None:
525
+ raise ValueError("frame_index must be provided when average=False")
526
+
527
+ corr_matrix, G, pairs = process_frame(data, donor_acceptor_indices, frame_index, index_map, N)
528
+ visualize_hydrogen_bonds_matrix_plotly(corr_matrix, donor_acceptor_indices=donor_acceptor_indices, frame_index=frame_index, output_dir=output_dir)
529
+ visualize_hydrogen_bonds_plotly(G, frame_index=frame_index, output_dir=output_dir)
530
+
531
+
532
+ def hydrogen_bonds(traj_path, frame_skip=10, acceptor_atoms=["N","O","F"], angle_cutoff=120,
533
+ h_bond_cutoff=2.4, bond_cutoff=1.6, mic=True, single_h_bond=False,
534
+ output_dir="./", time_step=None, plot_count=False, plot_heatmap=False,
535
+ plot_graph_frame=True, plot_graph_average=False, indices_path=None,
536
+ graph_frame_index=0):
537
+ """
538
+ Analyze hydrogen bonds in a molecular dynamics trajectory.
539
+
540
+ Parameters
541
+ ----------
542
+ traj_path : str
543
+ Path to trajectory file
544
+ frame_skip : int, optional
545
+ Number of frames to skip (default: 10)
546
+ acceptor_atoms : List[str], optional
547
+ List of element symbols that can be acceptors (default: ["N","O","F"])
548
+ angle_cutoff : float, optional
549
+ Minimum angle in degrees for hydrogen bond (default: 120)
550
+ h_bond_cutoff : float, optional
551
+ Maximum distance in Å for hydrogen bond (default: 2.4)
552
+ bond_cutoff : float, optional
553
+ Maximum distance in Å for covalent bond (default: 1.6)
554
+ mic : bool, optional
555
+ Whether to use minimum image convention (default: True)
556
+ single_h_bond : bool, optional
557
+ Whether to count only first hydrogen bond per atom (default: False)
558
+ output_dir : str, optional
559
+ Directory to save output files (default: "./")
560
+ time_step : float, optional
561
+ Simulation time step for plotting (default: None)
562
+ plot_count : bool, optional
563
+ Whether to plot hydrogen bond count (default: False)
564
+ plot_heatmap : bool, optional
565
+ Whether to plot 2D histogram (default: False)
566
+ plot_graph_frame : bool, optional
567
+ Whether to plot interactive hydrogen bond network graph for specific frame (default: True)
568
+ plot_graph_average : bool, optional
569
+ Whether to plot interactive average hydrogen bond network graph (default: False)
570
+ indices_path : str, optional
571
+ Path to NumPy array with donor/acceptor atom indices for graph plotting.
572
+ If None, the unique indices will be automatically extracted and saved (default: None)
573
+ graph_frame_index : int, optional
574
+ Frame index to use for graph visualization (default: 0)
575
+
576
+ Returns
577
+ -------
578
+ List[int]
579
+ List of hydrogen bond counts per frame
580
+ """
581
+ os.makedirs(output_dir, exist_ok=True)
582
+
583
+ output_filename = os.path.join(output_dir, f'hydrogen_bonds_{frame_skip}skips.csv')
584
+ total_bonds_filename = os.path.join(output_dir, f'total_hydrogen_bonds_per_frame_{frame_skip}skips.csv')
585
+
586
+ trajectory = read(traj_path, index=f"::{frame_skip}")
587
+
588
+ all_data = Parallel(n_jobs=-1)(
589
+ delayed(count_hydrogen_bonds)(
590
+ atoms, acceptor_atoms=acceptor_atoms, angle_cutoff=angle_cutoff, h_bond_cutoff=h_bond_cutoff,
591
+ bond_cutoff=bond_cutoff, mic=True, single_h_bond=single_h_bond
592
+ ) for i, atoms in enumerate(trajectory)
593
+ )
594
+
595
+ h_bonds_per_frame = [num_bonds for _, num_bonds in all_data]
596
+ frame_dict_list = [frame_dict for frame_dict, _ in all_data]
597
+ data_dict = {i*frame_skip: d for i, d in enumerate(frame_dict_list)}
598
+
599
+ # Donor-acceptor distances for each frame
600
+ donor_acceptor_distances = {}
601
+ for frame_idx, frame in enumerate(list(data_dict.keys())):
602
+ frame_atoms = trajectory[frame_idx]
603
+ dm = frame_atoms.get_all_distances(mic=mic)
604
+
605
+ donor_acceptor_distances[frame] = {}
606
+
607
+ for hydrogen in data_dict[frame]:
608
+ if len(data_dict[frame][hydrogen]) > 1:
609
+ donor = data_dict[frame][hydrogen][0][0]
610
+ donor_acceptor_distances[frame][hydrogen] = {}
611
+
612
+ for sublist in data_dict[frame][hydrogen][1:]:
613
+ acceptor = sublist[0]
614
+ # Get direct donor-acceptor distance (e.g., O-O distance in water)
615
+ donor_acceptor_dist = dm[donor, acceptor]
616
+ donor_acceptor_distances[frame][hydrogen][acceptor] = donor_acceptor_dist
617
+
618
+ with open(output_filename, 'w', newline='') as csvfile:
619
+ fieldnames = [
620
+ 'Frame', 'Hydrogen', 'Donor', 'Acceptor(s)',
621
+ 'Donor-Hydrogen Distance', 'Hydrogen-Acceptor(s) Distance(s)',
622
+ 'Donor-Acceptor(s) Distance(s)', 'Angle(A-H-D)'
623
+ ]
624
+ writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
625
+ writer.writeheader()
626
+ for frame in list(data_dict.keys()):
627
+ for hydrogen in data_dict[frame]:
628
+ donor = data_dict[frame][hydrogen][0][0] if len(data_dict[frame][hydrogen]) > 0 else ""
629
+ donor_hydrogen_dist = data_dict[frame][hydrogen][0][1] if len(data_dict[frame][hydrogen]) > 0 else ""
630
+
631
+ if len(data_dict[frame][hydrogen]) > 1:
632
+ acceptors = [sublist[0] for sublist in data_dict[frame][hydrogen][1:]]
633
+ acceptors_hydrogen_dist = [sublist[1] for sublist in data_dict[frame][hydrogen][1:]]
634
+ angles = [sublist[2] for sublist in data_dict[frame][hydrogen][1:]]
635
+
636
+ # Get donor-acceptor distances
637
+ donor_acceptor_dist = [donor_acceptor_distances[frame][hydrogen].get(acc, "N/A") for acc in acceptors]
638
+ else:
639
+ acceptors = ""
640
+ acceptors_hydrogen_dist = ""
641
+ donor_acceptor_dist = ""
642
+ angles = ""
643
+
644
+ row = {
645
+ 'Frame': frame,
646
+ 'Hydrogen': hydrogen,
647
+ 'Donor': donor,
648
+ 'Acceptor(s)': acceptors,
649
+ 'Donor-Hydrogen Distance': donor_hydrogen_dist,
650
+ 'Hydrogen-Acceptor(s) Distance(s)': acceptors_hydrogen_dist,
651
+ 'Donor-Acceptor(s) Distance(s)': donor_acceptor_dist,
652
+ 'Angle(A-H-D)': angles,
653
+ }
654
+
655
+ writer.writerow(row)
656
+
657
+
658
+ with open(total_bonds_filename, 'w', newline='') as f:
659
+ writer = csv.writer(f)
660
+ writer.writerow(['Frame', 'Total Hydrogen Bonds'])
661
+ for frame_idx, num_bonds in enumerate(h_bonds_per_frame):
662
+ writer.writerow([frame_idx * frame_skip, num_bonds])
663
+
664
+ if time_step is not None and plot_count:
665
+ plot_hydrogen_count(h_bonds_per_frame, frame_skip, time_step, output_dir)
666
+
667
+ if plot_heatmap:
668
+ plot_2Dheatmap(data_dict, output_dir)
669
+
670
+ if plot_graph_frame or plot_graph_average:
671
+ network_csv = os.path.join(output_dir, "hydrogen_bonds_network.csv")
672
+
673
+ unique_atoms = set()
674
+
675
+ with open(network_csv, 'w', newline='') as csvfile:
676
+ writer = csv.writer(csvfile)
677
+ writer.writerow(['Frame', 'Donor', 'Acceptor', 'Distance'])
678
+ for frame in list(data_dict.keys()):
679
+ for hydrogen in data_dict[frame]:
680
+ if len(data_dict[frame][hydrogen]) > 1:
681
+ donor = data_dict[frame][hydrogen][0][0]
682
+ unique_atoms.add(donor) # Added donor to unique atoms
683
+
684
+ for i, sublist in enumerate(data_dict[frame][hydrogen][1:]):
685
+ acceptor = sublist[0]
686
+ unique_atoms.add(acceptor) # Added acceptor to unique atoms
687
+
688
+ # Include the donor-acceptor distance in the network file
689
+ distance = donor_acceptor_distances[frame][hydrogen].get(acceptor, "N/A")
690
+ writer.writerow([frame, donor, acceptor, distance])
691
+
692
+ if indices_path is None:
693
+ indices_path = os.path.join(output_dir, "donor_acceptor_indices.npy")
694
+ unique_atoms_array = np.array(sorted(list(unique_atoms)), dtype=int)
695
+ np.save(indices_path, unique_atoms_array)
696
+ print(f"Generated and saved {len(unique_atoms_array)} unique donor/acceptor atom indices to {indices_path}")
697
+
698
+ if plot_graph_frame:
699
+ print(f"Generating hydrogen bond network visualizations for frame {graph_frame_index}...")
700
+ visualize_hydrogen_bonds(network_csv, indices_path,
701
+ frame_index=graph_frame_index, average=False,
702
+ output_dir=output_dir)
703
+
704
+ if plot_graph_average:
705
+ print("Generating average hydrogen bond network visualization...")
706
+ visualize_hydrogen_bonds(network_csv, indices_path,
707
+ average=True, output_dir=output_dir)
708
+
709
+ return h_bonds_per_frame
710
+
711
+
712
+ def plot_hydrogen_count(h_bonds_per_frame, frame_skip, time_step, output_dir):
713
+ """
714
+ Plot hydrogen bond count over time.
715
+
716
+ Parameters
717
+ ----------
718
+ h_bonds_per_frame : List[int]
719
+ Number of hydrogen bonds per frame
720
+ frame_skip : int
721
+ Number of frames skipped in trajectory analysis
722
+ time_step : float
723
+ Time step between frames in fs
724
+ output_dir : str
725
+ Directory to save output file
726
+
727
+ Returns
728
+ -------
729
+ None
730
+ """
731
+ x = np.arange(len(h_bonds_per_frame)) * time_step * frame_skip / 1000
732
+
733
+ fig, ax1 = plt.subplots(figsize=(8, 6))
734
+ ax1.plot(x, h_bonds_per_frame, '-', color="blue", label="H-bond count")
735
+ ax1.axhline(np.mean(h_bonds_per_frame), linestyle="--", color="blue", label=f"Mean: {np.mean(h_bonds_per_frame):.2f}")
736
+ ax1.set_xlabel("Time [ps]", fontsize=12)
737
+ ax1.set_ylabel("Count", fontsize=12)
738
+ plt.title("Hydrogen bond count", fontsize=14)
739
+ fig.legend(loc="center right", bbox_to_anchor=(1.1, 0.5))
740
+ filename = os.path.join(output_dir, "h_bond_count.png")
741
+ plt.savefig(filename, bbox_inches="tight")
742
+ plt.show()
743
+ plt.close()
744
+ print(f"Hydrogen bond count plot saved to '{filename}'")
745
+
746
+
747
+ def plot_2Dheatmap(data_dict, output_dir):
748
+ angles_list = []
749
+ dist_list = []
750
+
751
+ for frame in list(data_dict.keys()):
752
+ for hydrogen in data_dict[frame]:
753
+ if len(data_dict[frame][hydrogen]) > 1:
754
+ dist_list.append([sublist[1] for sublist in data_dict[frame][hydrogen][1:]])
755
+ angles_list.append([sublist[2] for sublist in data_dict[frame][hydrogen][1:]])
756
+
757
+ angles_list = list(itertools.chain(*angles_list))
758
+ dist_list = list(itertools.chain(*dist_list))
759
+
760
+ hb = plt.hist2d(angles_list, dist_list, bins=30, cmap="viridis")
761
+
762
+ plt.colorbar(hb[3], label="Count")
763
+
764
+ plt.xlabel("Donor-Hydrogen-Acceptor Angle [°]",fontsize=12)
765
+ plt.ylabel("Acceptor-Hydrogen Distance [Å]",fontsize=12)
766
+ plt.title("2D Histogram of H-bonds parameters",fontsize=14)
767
+
768
+ filename = os.path.join(output_dir, "h_bond_structure.png")
769
+ plt.savefig(filename, bbox_inches="tight")
770
+ plt.show()
771
+ plt.close()
772
+ print(f"H-bond structure 2D histogram saved to '{filename}'")