hillclimber 0.1.0a1__py3-none-any.whl → 0.1.0a3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hillclimber might be problematic. Click here for more details.

@@ -0,0 +1,636 @@
1
+ """Analysis utilities for metadynamics simulations.
2
+
3
+ This module provides tools for analyzing metadynamics simulations,
4
+ including free energy surface reconstruction and other post-processing tasks.
5
+ """
6
+
7
+ import os
8
+ import re
9
+ import shutil
10
+ import subprocess
11
+ import typing as t
12
+ from pathlib import Path
13
+
14
+ import numpy as np
15
+
16
+
17
+ def _validate_multi_cv_params(
18
+ min_bounds: float | list[float] | None = None,
19
+ max_bounds: float | list[float] | None = None,
20
+ bin: int | list[int] | None = None,
21
+ spacing: float | list[float] | None = None,
22
+ sigma: float | list[float] | None = None,
23
+ idw: str | list[str] | None = None,
24
+ ) -> None:
25
+ """Validate that multi-CV parameters have consistent dimensions.
26
+
27
+ Parameters
28
+ ----------
29
+ min_bounds, max_bounds, bin, spacing, sigma, idw
30
+ Parameters from sum_hills that can be lists for multi-CV cases.
31
+
32
+ Raises
33
+ ------
34
+ ValueError
35
+ If list parameters have inconsistent lengths.
36
+ """
37
+ # Collect all list parameters and their lengths
38
+ list_params: dict[str, int] = {}
39
+
40
+ params_to_check = {
41
+ "min_bounds": min_bounds,
42
+ "max_bounds": max_bounds,
43
+ "bin": bin,
44
+ "spacing": spacing,
45
+ "sigma": sigma,
46
+ "idw": idw,
47
+ }
48
+
49
+ for name, value in params_to_check.items():
50
+ if isinstance(value, (list, tuple)):
51
+ list_params[name] = len(value)
52
+
53
+ # If no list parameters, nothing to validate (single CV case)
54
+ if not list_params:
55
+ return
56
+
57
+ # Check that all list parameters have the same length
58
+ lengths = set(list_params.values())
59
+ if len(lengths) > 1:
60
+ # Build a detailed error message
61
+ param_details = ", ".join(
62
+ f"{name}={length}" for name, length in list_params.items()
63
+ )
64
+ raise ValueError(
65
+ f"Inconsistent number of CVs in parameters. "
66
+ f"All list parameters must have the same length. "
67
+ f"Got: {param_details}"
68
+ )
69
+
70
+
71
+ def sum_hills(
72
+ hills_file: str | Path,
73
+ plumed_bin_path: str | Path | None = None,
74
+ # Boolean flags
75
+ negbias: bool = False,
76
+ nohistory: bool = False,
77
+ mintozero: bool = False,
78
+ # File/histogram options
79
+ histo: str | Path | None = None,
80
+ # Grid parameters
81
+ stride: int | None = None,
82
+ min_bounds: float | list[float] | None = None,
83
+ max_bounds: float | list[float] | None = None,
84
+ bin: int | list[int] | None = None,
85
+ spacing: float | list[float] | None = None,
86
+ # Variable selection
87
+ idw: str | list[str] | None = None,
88
+ # Output options
89
+ outfile: str | Path | None = None,
90
+ outhisto: str | Path | None = None,
91
+ # Integration parameters
92
+ kt: float | None = None,
93
+ sigma: float | list[float] | None = None,
94
+ # Format
95
+ fmt: str | None = None,
96
+ # Additional options
97
+ verbose: bool = True,
98
+ check: bool = True,
99
+ ) -> subprocess.CompletedProcess:
100
+ """Run PLUMED sum_hills to reconstruct free energy surfaces from metadynamics.
101
+
102
+ This function wraps the PLUMED ``sum_hills`` command-line tool, which analyzes
103
+ HILLS files from metadynamics simulations to reconstruct the free energy surface.
104
+
105
+ Parameters
106
+ ----------
107
+ hills_file : str or Path
108
+ Path to the HILLS file to analyze. This file is generated during
109
+ metadynamics simulations and contains the deposited Gaussian hills.
110
+ plumed_bin_path : str or Path, optional
111
+ Path to the PLUMED installation directory (containing ``bin/`` and ``lib/``
112
+ subdirectories). If None, searches for ``plumed`` in the system PATH.
113
+ When a full installation path is provided, the function will properly set
114
+ LD_LIBRARY_PATH to include the PLUMED libraries.
115
+ negbias : bool, default=False
116
+ Print the negative bias instead of the free energy.
117
+ nohistory : bool, default=False
118
+ To be used with ``stride``: splits the bias/histogram without previous history.
119
+ mintozero : bool, default=False
120
+ Translate all minimum values in bias/histogram to zero.
121
+ histo : str or Path, optional
122
+ Name of the file for histogram (a COLVAR/HILLS file is good).
123
+ stride : int, optional
124
+ Stride for integrating hills file. Default is 0 (never integrate).
125
+ min_bounds : float or list[float], optional
126
+ Lower bounds for the grid. For multi-dimensional CVs, provide a list with
127
+ one value per CV (e.g., ``[-3.14, -3.14]`` for two torsion angles).
128
+ max_bounds : float or list[float], optional
129
+ Upper bounds for the grid. For multi-dimensional CVs, provide a list with
130
+ one value per CV (e.g., ``[3.14, 3.14]`` for two torsion angles).
131
+ bin : int or list[int], optional
132
+ Number of bins for the grid. For multi-dimensional CVs, provide a list with
133
+ one value per CV (e.g., ``[250, 250]`` for two CVs with 250 bins each).
134
+ spacing : float or list[float], optional
135
+ Grid spacing, alternative to the number of bins. For multi-dimensional CVs,
136
+ provide a list with one value per CV.
137
+ idw : str or list[str], optional
138
+ Variables to be used for the free-energy/histogram. For multi-dimensional CVs,
139
+ provide a list with one variable name per CV (e.g., ``['phi', 'psi']``).
140
+ outfile : str or Path, optional
141
+ Output file for sum_hills. Default is ``fes.dat``.
142
+ outhisto : str or Path, optional
143
+ Output file for the histogram.
144
+ kt : float, optional
145
+ Temperature in energy units (kJ/mol) for integrating out variables.
146
+ sigma : float or list[float], optional
147
+ Sigma for binning (only needed when doing histogram). For multi-dimensional CVs,
148
+ provide a list with one value per CV.
149
+ fmt : str, optional
150
+ Output format specification.
151
+ verbose : bool, default=True
152
+ Print command output to stdout/stderr.
153
+ check : bool, default=True
154
+ Raise exception if command fails.
155
+
156
+ Returns
157
+ -------
158
+ subprocess.CompletedProcess
159
+ The completed process object from subprocess.run.
160
+
161
+ Raises
162
+ ------
163
+ FileNotFoundError
164
+ If the HILLS file, PLUMED executable, or PLUMED installation directory
165
+ cannot be found.
166
+ ValueError
167
+ If list-based parameters (``bin``, ``min_bounds``, ``max_bounds``, etc.)
168
+ have inconsistent lengths when using multiple CVs.
169
+ subprocess.CalledProcessError
170
+ If the PLUMED command fails and ``check=True``.
171
+
172
+ Examples
173
+ --------
174
+ Basic usage to reconstruct a 1D free energy surface:
175
+
176
+ >>> import hillclimber as hc
177
+ >>> hc.sum_hills("HILLS")
178
+
179
+ With custom grid resolution and output file:
180
+
181
+ >>> hc.sum_hills(
182
+ ... "HILLS",
183
+ ... bin=1000,
184
+ ... outfile="custom_fes.dat"
185
+ ... )
186
+
187
+ For a 2D free energy surface with explicit bounds:
188
+
189
+ >>> hc.sum_hills(
190
+ ... "HILLS",
191
+ ... bin=[100, 100],
192
+ ... min_bounds=[0.0, 0.0],
193
+ ... max_bounds=[10.0, 10.0],
194
+ ... outfile="fes_2d.dat"
195
+ ... )
196
+
197
+ For protein backbone torsion angles (phi and psi):
198
+
199
+ >>> hc.sum_hills(
200
+ ... "HILLS",
201
+ ... bin=[250, 250],
202
+ ... min_bounds=[-3.14, -3.14],
203
+ ... max_bounds=[3.14, 3.14],
204
+ ... idw=["phi", "psi"],
205
+ ... outfile="ramachandran.dat"
206
+ ... )
207
+
208
+ Resources
209
+ ---------
210
+ - https://www.plumed.org/doc-master/user-doc/html/sum_hills.html
211
+
212
+ Notes
213
+ -----
214
+ The HILLS file is automatically generated during metadynamics simulations
215
+ when using the METAD action. Each line in the file represents a deposited
216
+ Gaussian hill with its position, width (sigma), and height.
217
+
218
+ The free energy surface is reconstructed by summing all deposited hills:
219
+ F(s) = -V(s) where V(s) is the bias potential.
220
+
221
+ **Multi-CV Consistency:**
222
+ When using multiple collective variables (CVs), all list-based parameters
223
+ must have the same length. For example, if analyzing two CVs (phi and psi),
224
+ then ``bin``, ``min_bounds``, ``max_bounds``, and ``idw`` (if provided as lists)
225
+ must all have exactly 2 elements. The function will raise a ``ValueError``
226
+ if inconsistent list lengths are detected.
227
+ """
228
+ # Convert to Path object
229
+ hills_file = Path(hills_file)
230
+
231
+ # Verify HILLS file exists
232
+ if not hills_file.exists():
233
+ raise FileNotFoundError(f"HILLS file not found: {hills_file}")
234
+
235
+ # Find PLUMED executable and set up environment
236
+ env = os.environ.copy()
237
+
238
+ if plumed_bin_path is None:
239
+ # Try to find plumed in system PATH
240
+ plumed_exec = shutil.which("plumed")
241
+ if plumed_exec is None:
242
+ raise FileNotFoundError(
243
+ "PLUMED executable not found in system PATH. "
244
+ "Please install PLUMED or specify the installation path with plumed_bin_path="
245
+ )
246
+ else:
247
+ # Use provided PLUMED installation path
248
+ plumed_bin_path = Path(plumed_bin_path)
249
+ plumed_exec = plumed_bin_path / "bin" / "plumed"
250
+ lib_path = plumed_bin_path / "lib"
251
+
252
+ # Verify paths exist
253
+ if not plumed_exec.exists():
254
+ raise FileNotFoundError(
255
+ f"PLUMED executable not found at: {plumed_exec}\n"
256
+ f"Make sure plumed_bin_path points to the PLUMED installation directory "
257
+ f"containing bin/ and lib/ subdirectories."
258
+ )
259
+ if not lib_path.exists():
260
+ raise FileNotFoundError(f"PLUMED lib directory not found: {lib_path}")
261
+
262
+ # Set LD_LIBRARY_PATH for PLUMED libraries
263
+ current_ld_path = env.get("LD_LIBRARY_PATH", "")
264
+ if current_ld_path:
265
+ env["LD_LIBRARY_PATH"] = f"{lib_path}:{current_ld_path}"
266
+ else:
267
+ env["LD_LIBRARY_PATH"] = str(lib_path)
268
+
269
+ plumed_exec = str(plumed_exec)
270
+
271
+ # Validate multi-CV parameter consistency
272
+ _validate_multi_cv_params(
273
+ min_bounds=min_bounds,
274
+ max_bounds=max_bounds,
275
+ bin=bin,
276
+ spacing=spacing,
277
+ sigma=sigma,
278
+ idw=idw,
279
+ )
280
+
281
+ # Build command
282
+ cmd_parts = [plumed_exec, "sum_hills"]
283
+
284
+ # Add hills file
285
+ cmd_parts.extend(["--hills", str(hills_file)])
286
+
287
+ # Add boolean flags
288
+ if negbias:
289
+ cmd_parts.append("--negbias")
290
+ if nohistory:
291
+ cmd_parts.append("--nohistory")
292
+ if mintozero:
293
+ cmd_parts.append("--mintozero")
294
+
295
+ # Helper function to format list parameters
296
+ def format_param(value: t.Any) -> str:
297
+ if isinstance(value, (list, tuple)):
298
+ return ",".join(str(v) for v in value)
299
+ return str(value)
300
+
301
+ # Add optional parameters
302
+ if histo is not None:
303
+ cmd_parts.extend(["--histo", str(histo)])
304
+ if stride is not None:
305
+ cmd_parts.extend(["--stride", str(stride)])
306
+ if min_bounds is not None:
307
+ cmd_parts.extend(["--min", format_param(min_bounds)])
308
+ if max_bounds is not None:
309
+ cmd_parts.extend(["--max", format_param(max_bounds)])
310
+ if bin is not None:
311
+ cmd_parts.extend(["--bin", format_param(bin)])
312
+ if spacing is not None:
313
+ cmd_parts.extend(["--spacing", format_param(spacing)])
314
+ if idw is not None:
315
+ cmd_parts.extend(["--idw", format_param(idw)])
316
+ if outfile is not None:
317
+ cmd_parts.extend(["--outfile", str(outfile)])
318
+ if outhisto is not None:
319
+ cmd_parts.extend(["--outhisto", str(outhisto)])
320
+ if kt is not None:
321
+ cmd_parts.extend(["--kt", str(kt)])
322
+ if sigma is not None:
323
+ cmd_parts.extend(["--sigma", format_param(sigma)])
324
+ if fmt is not None:
325
+ cmd_parts.extend(["--fmt", str(fmt)])
326
+
327
+ # Run command
328
+ if verbose:
329
+ print(f"Running: {' '.join(cmd_parts)}")
330
+
331
+ result = subprocess.run(
332
+ cmd_parts,
333
+ env=env,
334
+ capture_output=not verbose,
335
+ text=True,
336
+ check=check,
337
+ )
338
+
339
+ if verbose and result.returncode == 0:
340
+ print("sum_hills completed successfully")
341
+
342
+ return result
343
+
344
+
345
+ def read_colvar(
346
+ colvar_file: str | Path,
347
+ ) -> dict[str, np.ndarray]:
348
+ """Read a PLUMED COLVAR file and parse its contents.
349
+
350
+ This function reads a COLVAR file produced by PLUMED, extracts the field names
351
+ from the header (which starts with ``#! FIELDS``), and returns the data as a
352
+ dictionary mapping field names to numpy arrays.
353
+
354
+ Parameters
355
+ ----------
356
+ colvar_file : str or Path
357
+ Path to the COLVAR file to read.
358
+
359
+ Returns
360
+ -------
361
+ dict[str, np.ndarray]
362
+ Dictionary mapping field names to 1D numpy arrays containing the data.
363
+ Keys correspond to the fields specified in the COLVAR header.
364
+
365
+ Raises
366
+ ------
367
+ FileNotFoundError
368
+ If the COLVAR file does not exist.
369
+ ValueError
370
+ If the COLVAR file does not contain a valid ``#! FIELDS`` header.
371
+
372
+ Examples
373
+ --------
374
+ >>> import hillclimber as hc
375
+ >>> data = hc.read_colvar("COLVAR")
376
+ >>> print(data.keys())
377
+ dict_keys(['time', 'phi', 'psi'])
378
+ >>> print(data['time'][:5])
379
+ [0. 1. 2. 3. 4.]
380
+
381
+ Notes
382
+ -----
383
+ The COLVAR file format from PLUMED starts with a header line:
384
+ ``#! FIELDS time cv1 cv2 ...``
385
+
386
+ All subsequent lines starting with ``#`` are treated as comments and ignored.
387
+ Data lines are parsed as whitespace-separated numeric values.
388
+
389
+ Resources
390
+ ---------
391
+ - https://www.plumed.org/doc-master/user-doc/html/colvar.html
392
+ """
393
+ colvar_file = Path(colvar_file)
394
+
395
+ if not colvar_file.exists():
396
+ raise FileNotFoundError(f"COLVAR file not found: {colvar_file}")
397
+
398
+ # Read the file
399
+ with open(colvar_file, "r") as f:
400
+ lines = f.readlines()
401
+
402
+ # Find and parse the header
403
+ field_names: list[str] | None = None
404
+ for line in lines:
405
+ if line.startswith("#! FIELDS"):
406
+ # Extract field names from the header
407
+ # Format: "#! FIELDS time phi psi ..."
408
+ fields_match = re.match(r"#!\s*FIELDS\s+(.+)", line)
409
+ if fields_match:
410
+ field_names = fields_match.group(1).split()
411
+ break
412
+
413
+ if field_names is None:
414
+ raise ValueError(
415
+ f"COLVAR file {colvar_file} does not contain a valid '#! FIELDS' header"
416
+ )
417
+
418
+ # Parse data lines (skip comments)
419
+ data_lines = []
420
+ for line in lines:
421
+ # Skip comments and empty lines
422
+ if line.startswith("#") or not line.strip():
423
+ continue
424
+ # Parse numeric data
425
+ values = line.split()
426
+ if len(values) == len(field_names):
427
+ data_lines.append([float(v) for v in values])
428
+
429
+ # Convert to numpy array
430
+ data_array = np.array(data_lines)
431
+
432
+ # Create dictionary mapping field names to columns
433
+ result = {name: data_array[:, i] for i, name in enumerate(field_names)}
434
+
435
+ return result
436
+
437
+
438
+ def plot_cv_time_series(
439
+ colvar_file: str | Path,
440
+ cv_names: list[str] | None = None,
441
+ time_unit: str = "ps",
442
+ exclude_patterns: list[str] | None = None,
443
+ figsize: tuple[float, float] = (8, 5),
444
+ kde_width: str = "25%",
445
+ colors: list[str] | None = None,
446
+ alpha: float = 0.5,
447
+ marker: str = "x",
448
+ marker_size: float = 10,
449
+ ) -> tuple[t.Any, t.Any]:
450
+ """Plot collective variables over time with KDE distributions.
451
+
452
+ This function creates a visualization showing CV evolution over time as scatter
453
+ plots, with kernel density estimation (KDE) plots displayed on the right side
454
+ to show the distribution of each CV.
455
+
456
+ Parameters
457
+ ----------
458
+ colvar_file : str or Path
459
+ Path to the COLVAR file to plot.
460
+ cv_names : list[str], optional
461
+ List of CV names to plot. If None, automatically detects CVs by excluding
462
+ common non-CV fields like 'time', 'sigma_*', 'height', 'biasf'.
463
+ time_unit : str, default='ps'
464
+ Unit label for the time axis.
465
+ exclude_patterns : list[str], optional
466
+ Additional regex patterns for field names to exclude from auto-detection.
467
+ Default excludes: 'time', 'sigma_.*', 'height', 'biasf'.
468
+ figsize : tuple[float, float], default=(8, 5)
469
+ Figure size in inches (width, height).
470
+ kde_width : str, default='25%'
471
+ Width of the KDE subplot as a percentage of the main plot width.
472
+ colors : list[str], optional
473
+ List of colors to use for each CV. If None, uses default color cycle.
474
+ alpha : float, default=0.5
475
+ Transparency for scatter points.
476
+ marker : str, default='x'
477
+ Marker style for scatter points.
478
+ marker_size : float, default=10
479
+ Size of scatter markers.
480
+
481
+ Returns
482
+ -------
483
+ fig : matplotlib.figure.Figure
484
+ The matplotlib figure object.
485
+ axes : tuple
486
+ Tuple of (main_axis, kde_axis) matplotlib axes objects.
487
+
488
+ Raises
489
+ ------
490
+ ImportError
491
+ If matplotlib or seaborn is not installed.
492
+ FileNotFoundError
493
+ If the COLVAR file does not exist.
494
+
495
+ Examples
496
+ --------
497
+ Basic usage with auto-detected CVs:
498
+
499
+ >>> import hillclimber as hc
500
+ >>> fig, axes = hc.plot_cv_time_series("COLVAR")
501
+
502
+ Plot specific CVs:
503
+
504
+ >>> fig, axes = hc.plot_cv_time_series("COLVAR", cv_names=["phi", "psi"])
505
+
506
+ Customize appearance:
507
+
508
+ >>> fig, axes = hc.plot_cv_time_series(
509
+ ... "COLVAR",
510
+ ... figsize=(10, 6),
511
+ ... colors=["blue", "red"],
512
+ ... alpha=0.7
513
+ ... )
514
+
515
+ Notes
516
+ -----
517
+ This function requires matplotlib and seaborn to be installed.
518
+
519
+ The function automatically detects CVs by excluding common metadata fields
520
+ such as 'time', 'sigma_*', 'height', and 'biasf'. You can specify additional
521
+ exclusion patterns or explicitly provide the CV names to plot.
522
+
523
+ Resources
524
+ ---------
525
+ - https://www.plumed.org/doc-master/user-doc/html/colvar.html
526
+ """
527
+ try:
528
+ import matplotlib.pyplot as plt
529
+ import seaborn as sns
530
+ from mpl_toolkits.axes_grid1 import make_axes_locatable
531
+ except ImportError as e:
532
+ raise ImportError(
533
+ "matplotlib and seaborn are required for plotting. "
534
+ "Install them with: pip install matplotlib seaborn"
535
+ ) from e
536
+
537
+ # Read the COLVAR file
538
+ data = read_colvar(colvar_file)
539
+
540
+ # Auto-detect CVs if not specified
541
+ if cv_names is None:
542
+ # Default exclusion patterns
543
+ default_exclude = [
544
+ r"^time$",
545
+ r"^sigma_.*$",
546
+ r"^height$",
547
+ r"^biasf$",
548
+ ]
549
+ if exclude_patterns is not None:
550
+ default_exclude.extend(exclude_patterns)
551
+
552
+ # Filter field names
553
+ detected_cvs: list[str] = []
554
+ for field in data.keys():
555
+ # Check if field matches any exclusion pattern
556
+ exclude = False
557
+ for pattern in default_exclude:
558
+ if re.match(pattern, field):
559
+ exclude = True
560
+ break
561
+ if not exclude:
562
+ detected_cvs.append(field)
563
+
564
+ if not detected_cvs:
565
+ raise ValueError(
566
+ "No CVs detected in COLVAR file. "
567
+ "All fields were excluded by the exclusion patterns."
568
+ )
569
+ cv_names = detected_cvs
570
+
571
+ # Verify that all requested CVs exist
572
+ missing_cvs = [cv for cv in cv_names if cv not in data]
573
+ if missing_cvs:
574
+ raise ValueError(
575
+ f"CVs not found in COLVAR file: {missing_cvs}. "
576
+ f"Available fields: {list(data.keys())}"
577
+ )
578
+
579
+ # Get time data
580
+ if "time" not in data:
581
+ raise ValueError("COLVAR file must contain a 'time' field")
582
+ time = data["time"]
583
+
584
+ # Default colors if not provided
585
+ if colors is None:
586
+ colors = plt.cm.tab10.colors # type: ignore
587
+
588
+ # Set seaborn style
589
+ sns.set(style="whitegrid")
590
+
591
+ # Create figure
592
+ fig, ax = plt.subplots(figsize=figsize)
593
+
594
+ # Plot each CV
595
+ for i, cv_name in enumerate(cv_names):
596
+ color = colors[i % len(colors)]
597
+ cv_data = data[cv_name]
598
+ ax.scatter(
599
+ time,
600
+ cv_data,
601
+ c=[color],
602
+ label=cv_name,
603
+ marker=marker,
604
+ s=marker_size,
605
+ alpha=alpha,
606
+ )
607
+
608
+ ax.set_xlabel(f"Time / {time_unit}")
609
+ ax.set_ylabel("CV value")
610
+ ax.legend()
611
+
612
+ # Create KDE subplot on the right
613
+ divider = make_axes_locatable(ax)
614
+ ax_kde = divider.append_axes("right", size=kde_width, pad=0.1, sharey=ax)
615
+
616
+ # Plot KDE for each CV
617
+ for i, cv_name in enumerate(cv_names):
618
+ color = colors[i % len(colors)]
619
+ cv_data = data[cv_name]
620
+ sns.kdeplot(
621
+ y=cv_data,
622
+ ax=ax_kde,
623
+ color=color,
624
+ fill=True,
625
+ alpha=0.3,
626
+ linewidth=1.5,
627
+ label=cv_name,
628
+ )
629
+
630
+ # Clean up KDE axis
631
+ ax_kde.set_xlabel("Density")
632
+ ax_kde.yaxis.set_tick_params(labelleft=False)
633
+
634
+ plt.tight_layout()
635
+
636
+ return fig, (ax, ax_kde)