chemparseplot 0.0.3__tar.gz → 1.0.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {chemparseplot-0.0.3 → chemparseplot-1.0.1}/.gitignore +1 -0
- {chemparseplot-0.0.3 → chemparseplot-1.0.1}/PKG-INFO +8 -3
- {chemparseplot-0.0.3 → chemparseplot-1.0.1}/_version.py +2 -2
- {chemparseplot-0.0.3 → chemparseplot-1.0.1}/chemparseplot/__init__.py +1 -1
- {chemparseplot-0.0.3 → chemparseplot-1.0.1}/chemparseplot/parse/eon/minimization.py +1 -1
- chemparseplot-1.0.1/chemparseplot/parse/eon/neb.py +388 -0
- {chemparseplot-0.0.3 → chemparseplot-1.0.1}/chemparseplot/parse/eon/saddle_search.py +10 -8
- chemparseplot-1.0.1/chemparseplot/parse/file_.py +12 -0
- {chemparseplot-0.0.3 → chemparseplot-1.0.1}/chemparseplot/parse/orca/neb/interp.py +1 -1
- {chemparseplot-0.0.3 → chemparseplot-1.0.1}/chemparseplot/parse/sella/saddle_search.py +1 -1
- {chemparseplot-0.0.3 → chemparseplot-1.0.1}/chemparseplot/plot/geomscan.py +1 -1
- chemparseplot-1.0.1/chemparseplot/plot/neb.py +541 -0
- {chemparseplot-0.0.3 → chemparseplot-1.0.1}/chemparseplot/plot/structs.py +1 -1
- chemparseplot-1.0.1/chemparseplot/plot/theme.py +152 -0
- {chemparseplot-0.0.3 → chemparseplot-1.0.1}/chemparseplot/util.py +1 -1
- {chemparseplot-0.0.3 → chemparseplot-1.0.1}/pyproject.toml +16 -31
- {chemparseplot-0.0.3 → chemparseplot-1.0.1}/tests/parse/orca/test_interp.py +1 -1
- chemparseplot-0.0.3/chemparseplot/analyze/dist.py +0 -130
- chemparseplot-0.0.3/chemparseplot/analyze/use_ira.py +0 -73
- chemparseplot-0.0.3/chemparseplot/basetypes.py +0 -90
- chemparseplot-0.0.3/chemparseplot/plot/_aids.py +0 -9
- {chemparseplot-0.0.3 → chemparseplot-1.0.1}/LICENSE +0 -0
- {chemparseplot-0.0.3 → chemparseplot-1.0.1}/chemparseplot/parse/__init__.py +0 -0
- {chemparseplot-0.0.3 → chemparseplot-1.0.1}/chemparseplot/parse/converter.py +0 -0
- {chemparseplot-0.0.3 → chemparseplot-1.0.1}/chemparseplot/parse/eon/gprd.py +0 -0
- {chemparseplot-0.0.3 → chemparseplot-1.0.1}/chemparseplot/parse/orca/__init__.py +0 -0
- {chemparseplot-0.0.3 → chemparseplot-1.0.1}/chemparseplot/parse/orca/geomscan.py +0 -0
- {chemparseplot-0.0.3 → chemparseplot-1.0.1}/chemparseplot/parse/patterns.py +0 -0
- {chemparseplot-0.0.3 → chemparseplot-1.0.1}/chemparseplot/plot/__init__.py +0 -0
- {chemparseplot-0.0.3 → chemparseplot-1.0.1}/chemparseplot/units.py +0 -0
- {chemparseplot-0.0.3 → chemparseplot-1.0.1}/readme.md +0 -0
- {chemparseplot-0.0.3 → chemparseplot-1.0.1}/tests/parse/orca/test_geomscan.py +0 -0
- {chemparseplot-0.0.3 → chemparseplot-1.0.1}/tests/parse/test_converter.py +0 -0
- {chemparseplot-0.0.3 → chemparseplot-1.0.1}/tests/parse/test_patterns.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: chemparseplot
|
|
3
|
-
Version:
|
|
3
|
+
Version: 1.0.1
|
|
4
4
|
Summary: Parsers and plotting tools for computational chemistry
|
|
5
5
|
Project-URL: Documentation, https://github.com/HaoZeke/chemparseplot#readme
|
|
6
6
|
Project-URL: Issues, https://github.com/HaoZeke/chemparseplot/issues
|
|
@@ -12,14 +12,14 @@ Keywords: compchem,parser,plot
|
|
|
12
12
|
Classifier: Development Status :: 4 - Beta
|
|
13
13
|
Classifier: Programming Language :: Python
|
|
14
14
|
Classifier: Programming Language :: Python :: 3 :: Only
|
|
15
|
-
Classifier: Programming Language :: Python :: 3.9
|
|
16
15
|
Classifier: Programming Language :: Python :: 3.10
|
|
17
16
|
Classifier: Programming Language :: Python :: 3.11
|
|
18
17
|
Classifier: Programming Language :: Python :: 3.12
|
|
19
18
|
Classifier: Programming Language :: Python :: Implementation :: CPython
|
|
20
|
-
Requires-Python: >=3.
|
|
19
|
+
Requires-Python: >=3.10
|
|
21
20
|
Requires-Dist: numpy>=1.26.2
|
|
22
21
|
Requires-Dist: pint>=0.22
|
|
22
|
+
Requires-Dist: rgpycrumbs>=1.0.0
|
|
23
23
|
Provides-Extra: doc
|
|
24
24
|
Requires-Dist: mdit-py-plugins>=0.3.4; extra == 'doc'
|
|
25
25
|
Requires-Dist: myst-nb>=1; extra == 'doc'
|
|
@@ -31,9 +31,14 @@ Requires-Dist: sphinx-sitemap>=2.5.1; extra == 'doc'
|
|
|
31
31
|
Requires-Dist: sphinx-togglebutton>=0.3.2; extra == 'doc'
|
|
32
32
|
Requires-Dist: sphinx>=7.2.6; extra == 'doc'
|
|
33
33
|
Requires-Dist: sphinxcontrib-apidoc>=0.4; extra == 'doc'
|
|
34
|
+
Provides-Extra: lint
|
|
35
|
+
Requires-Dist: ruff>=0.1.6; extra == 'lint'
|
|
34
36
|
Provides-Extra: plot
|
|
35
37
|
Requires-Dist: cmcrameri>=1.7; extra == 'plot'
|
|
36
38
|
Requires-Dist: matplotlib>=3.8.2; extra == 'plot'
|
|
39
|
+
Provides-Extra: test
|
|
40
|
+
Requires-Dist: pytest-cov>=4.1.0; extra == 'test'
|
|
41
|
+
Requires-Dist: pytest>=7.4.3; extra == 'test'
|
|
37
42
|
Description-Content-Type: text/markdown
|
|
38
43
|
|
|
39
44
|
|
|
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
|
|
|
28
28
|
commit_id: COMMIT_ID
|
|
29
29
|
__commit_id__: COMMIT_ID
|
|
30
30
|
|
|
31
|
-
__version__ = version = '
|
|
32
|
-
__version_tuple__ = version_tuple = (
|
|
31
|
+
__version__ = version = '1.0.1'
|
|
32
|
+
__version_tuple__ = version_tuple = (1, 0, 1)
|
|
33
33
|
|
|
34
34
|
__commit_id__ = commit_id = None
|
|
@@ -8,7 +8,7 @@ def min_e_result(eresp: Path) -> dict:
|
|
|
8
8
|
"""Reads and parses the results.dat file.
|
|
9
9
|
|
|
10
10
|
Args:
|
|
11
|
-
eresp: Path to the
|
|
11
|
+
eresp: Path to the eOn results directory.
|
|
12
12
|
|
|
13
13
|
Returns:
|
|
14
14
|
A dictionary containing the parsed data from results.dat, or None if the file
|
|
@@ -0,0 +1,388 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from collections.abc import Callable
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
import numpy as np
|
|
6
|
+
import polars as pl
|
|
7
|
+
from ase import Atoms
|
|
8
|
+
from ase.io import read as ase_read
|
|
9
|
+
|
|
10
|
+
try:
|
|
11
|
+
from rgpycrumbs._aux import _import_from_parent_env
|
|
12
|
+
from rgpycrumbs.geom.api.alignment import (
|
|
13
|
+
calculate_rmsd_from_ref,
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
ira_mod = _import_from_parent_env("ira_mod")
|
|
17
|
+
except ImportError:
|
|
18
|
+
ira_mod = None
|
|
19
|
+
|
|
20
|
+
log = logging.getLogger(__name__)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def calculate_landscape_coords(
|
|
24
|
+
atoms_list: list[Atoms], ira_instance, ira_kmax: float
|
|
25
|
+
) -> tuple[np.ndarray, np.ndarray]:
|
|
26
|
+
"""
|
|
27
|
+
Calculates 2D landscape coordinates (RMSD-R, RMSD-P) for a path.
|
|
28
|
+
|
|
29
|
+
:param atoms_list: List of ASE Atoms objects representing the path.
|
|
30
|
+
:param ira_instance: An instantiated IRA object (or None).
|
|
31
|
+
:param ira_kmax: kmax factor for IRA.
|
|
32
|
+
:return: A tuple of (rmsd_r, rmsd_p) arrays.
|
|
33
|
+
"""
|
|
34
|
+
logging.info("Calculating landscape coordinates (RMSD-R, RMSD-P)...")
|
|
35
|
+
rmsd_r = calculate_rmsd_from_ref(
|
|
36
|
+
atoms_list, ira_instance, ref_atom=atoms_list[0], ira_kmax=ira_kmax
|
|
37
|
+
)
|
|
38
|
+
rmsd_p = calculate_rmsd_from_ref(
|
|
39
|
+
atoms_list, ira_instance, ref_atom=atoms_list[-1], ira_kmax=ira_kmax
|
|
40
|
+
)
|
|
41
|
+
return rmsd_r, rmsd_p
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def _validate_data_atoms_match(z_data, atoms, dat_file_name):
|
|
45
|
+
"""Checks if data points count matches structure count."""
|
|
46
|
+
if len(z_data) != len(atoms):
|
|
47
|
+
errmsg = (
|
|
48
|
+
f"Structure count ({len(atoms)}) != data point count "
|
|
49
|
+
f"({len(z_data)}) in {dat_file_name}"
|
|
50
|
+
)
|
|
51
|
+
log.error(errmsg)
|
|
52
|
+
raise ValueError(errmsg)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def load_or_compute_data(
|
|
56
|
+
cache_file: Path | None,
|
|
57
|
+
force_recompute: bool,
|
|
58
|
+
validation_check: Callable[[pl.DataFrame], None],
|
|
59
|
+
computation_callback: Callable[[], pl.DataFrame],
|
|
60
|
+
context_name: str,
|
|
61
|
+
) -> pl.DataFrame:
|
|
62
|
+
"""Retrieves data from a parquet cache or triggers a computation callback."""
|
|
63
|
+
if cache_file and cache_file.exists() and not force_recompute:
|
|
64
|
+
log.info(f"Loading cached {context_name} data from {cache_file}...")
|
|
65
|
+
try:
|
|
66
|
+
df = pl.read_parquet(cache_file)
|
|
67
|
+
validation_check(df)
|
|
68
|
+
log.info(f"Loaded {df.height} rows from cache.")
|
|
69
|
+
return df
|
|
70
|
+
except Exception as e:
|
|
71
|
+
log.warning(f"Cache load failed or invalid: {e}. Recomputing...")
|
|
72
|
+
|
|
73
|
+
log.info(f"Computing {context_name} data...")
|
|
74
|
+
df = computation_callback()
|
|
75
|
+
|
|
76
|
+
if cache_file:
|
|
77
|
+
log.info(f"Saving {context_name} cache to {cache_file}...")
|
|
78
|
+
try:
|
|
79
|
+
df.write_parquet(cache_file)
|
|
80
|
+
except Exception as e:
|
|
81
|
+
log.error(f"Failed to write cache file: {e}")
|
|
82
|
+
|
|
83
|
+
return df
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def load_structures_and_calculate_additional_rmsd(
|
|
87
|
+
con_file: Path,
|
|
88
|
+
additional_con: list[tuple[Path, str]],
|
|
89
|
+
ira_kmax: float,
|
|
90
|
+
sp_file: Path | None = None,
|
|
91
|
+
):
|
|
92
|
+
"""Loads the main trajectory and calculates RMSD for any additional comparison structures."""
|
|
93
|
+
log.info(f"Reading structures from {con_file}")
|
|
94
|
+
atoms_list = ase_read(con_file, index=":")
|
|
95
|
+
log.info(f"Loaded {len(atoms_list)} structures.")
|
|
96
|
+
|
|
97
|
+
# --- Explicit Saddle Point Loading ---
|
|
98
|
+
sp_data = None
|
|
99
|
+
ira_instance = ira_mod.IRA() if ira_mod else None
|
|
100
|
+
|
|
101
|
+
if sp_file and sp_file.exists():
|
|
102
|
+
log.info(f"Loading explicit saddle point from {sp_file}")
|
|
103
|
+
sp_atoms = ase_read(sp_file)
|
|
104
|
+
sp_rmsd_r = calculate_rmsd_from_ref(
|
|
105
|
+
[sp_atoms],
|
|
106
|
+
ira_instance,
|
|
107
|
+
ref_atom=atoms_list[0],
|
|
108
|
+
ira_kmax=ira_kmax,
|
|
109
|
+
)[0]
|
|
110
|
+
sp_rmsd_p = calculate_rmsd_from_ref(
|
|
111
|
+
[sp_atoms],
|
|
112
|
+
ira_instance,
|
|
113
|
+
ref_atom=atoms_list[-1],
|
|
114
|
+
ira_kmax=ira_kmax,
|
|
115
|
+
)[0]
|
|
116
|
+
sp_data = {"atoms": sp_atoms, "r": sp_rmsd_r, "p": sp_rmsd_p}
|
|
117
|
+
|
|
118
|
+
# --- Additional Structures Loading ---
|
|
119
|
+
additional_atoms_data = []
|
|
120
|
+
if additional_con:
|
|
121
|
+
for add_file, add_label in additional_con:
|
|
122
|
+
# Handle empty labels
|
|
123
|
+
if not add_label or add_label.strip() == "":
|
|
124
|
+
label = add_file.stem
|
|
125
|
+
else:
|
|
126
|
+
label = add_label
|
|
127
|
+
|
|
128
|
+
log.info(f"Processing additional structure: {label}")
|
|
129
|
+
additional_atoms = ase_read(add_file)
|
|
130
|
+
|
|
131
|
+
add_rmsd_r = calculate_rmsd_from_ref(
|
|
132
|
+
[additional_atoms],
|
|
133
|
+
ira_instance,
|
|
134
|
+
ref_atom=atoms_list[0],
|
|
135
|
+
ira_kmax=ira_kmax,
|
|
136
|
+
)[0]
|
|
137
|
+
add_rmsd_p = calculate_rmsd_from_ref(
|
|
138
|
+
[additional_atoms],
|
|
139
|
+
ira_instance,
|
|
140
|
+
ref_atom=atoms_list[-1],
|
|
141
|
+
ira_kmax=ira_kmax,
|
|
142
|
+
)[0]
|
|
143
|
+
|
|
144
|
+
additional_atoms_data.append(
|
|
145
|
+
(additional_atoms, add_rmsd_r, add_rmsd_p, label)
|
|
146
|
+
)
|
|
147
|
+
|
|
148
|
+
return atoms_list, additional_atoms_data, sp_data
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
def _process_single_path_step(
|
|
152
|
+
dat_file,
|
|
153
|
+
con_file,
|
|
154
|
+
y_data_column,
|
|
155
|
+
ira_instance,
|
|
156
|
+
ira_kmax,
|
|
157
|
+
step_idx,
|
|
158
|
+
ref_atoms=None,
|
|
159
|
+
prod_atoms=None,
|
|
160
|
+
):
|
|
161
|
+
"""Helper to process a single .dat/.con pair into a DataFrame row."""
|
|
162
|
+
path_data = np.loadtxt(dat_file, skiprows=1).T
|
|
163
|
+
z_data_step = path_data[y_data_column]
|
|
164
|
+
atoms_list_step = ase_read(con_file, index=":")
|
|
165
|
+
f_para_step = path_data[3]
|
|
166
|
+
|
|
167
|
+
_validate_data_atoms_match(z_data_step, atoms_list_step, dat_file.name)
|
|
168
|
+
|
|
169
|
+
# If ref/prod not provided, assume self-contained NEB (0=Ref, -1=Prod)
|
|
170
|
+
# If provided (augmentation mode), use them.
|
|
171
|
+
ref = ref_atoms if ref_atoms is not None else atoms_list_step[0]
|
|
172
|
+
prod = prod_atoms if prod_atoms is not None else atoms_list_step[-1]
|
|
173
|
+
|
|
174
|
+
rmsd_r = calculate_rmsd_from_ref(atoms_list_step, ira_instance, ref_atom=ref, ira_kmax=ira_kmax)
|
|
175
|
+
rmsd_p = calculate_rmsd_from_ref(atoms_list_step, ira_instance, ref_atom=prod, ira_kmax=ira_kmax)
|
|
176
|
+
|
|
177
|
+
# --- Calculate Synthetic 2D Gradients ---
|
|
178
|
+
dr = np.gradient(rmsd_r)
|
|
179
|
+
dp = np.gradient(rmsd_p)
|
|
180
|
+
norm_ds = np.sqrt(dr**2 + dp**2)
|
|
181
|
+
norm_ds[norm_ds == 0] = 1.0
|
|
182
|
+
tr = dr / norm_ds
|
|
183
|
+
tp = dp / norm_ds
|
|
184
|
+
grad_r = -f_para_step * tr
|
|
185
|
+
grad_p = -f_para_step * tp
|
|
186
|
+
|
|
187
|
+
return pl.DataFrame(
|
|
188
|
+
{
|
|
189
|
+
"r": rmsd_r,
|
|
190
|
+
"p": rmsd_p,
|
|
191
|
+
"grad_r": grad_r,
|
|
192
|
+
"grad_p": grad_p,
|
|
193
|
+
"z": z_data_step,
|
|
194
|
+
"step": int(step_idx),
|
|
195
|
+
}
|
|
196
|
+
)
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
def aggregate_neb_landscape_data(
|
|
200
|
+
all_dat_paths: list[Path],
|
|
201
|
+
all_con_paths: list[Path],
|
|
202
|
+
y_data_column: int,
|
|
203
|
+
ira_instance, # Can be None
|
|
204
|
+
cache_file: Path | None = None,
|
|
205
|
+
force_recompute: bool = False,
|
|
206
|
+
ira_kmax: float = 1.8,
|
|
207
|
+
# Caching augmentation
|
|
208
|
+
augment_dat: str | None = None,
|
|
209
|
+
augment_con: str | None = None,
|
|
210
|
+
ref_atoms: Atoms | None = None,
|
|
211
|
+
prod_atoms: Atoms | None = None,
|
|
212
|
+
) -> pl.DataFrame:
|
|
213
|
+
"""Aggregates data from multiple NEB steps for landscape visualization."""
|
|
214
|
+
|
|
215
|
+
# Init IRA if not passed
|
|
216
|
+
if ira_instance is None and ira_mod is not None:
|
|
217
|
+
ira_instance = ira_mod.IRA()
|
|
218
|
+
|
|
219
|
+
def validate_landscape_cache(df: pl.DataFrame):
|
|
220
|
+
if "p" not in df.columns:
|
|
221
|
+
raise ValueError("Cache missing 'p' column.")
|
|
222
|
+
if "grad_r" not in df.columns:
|
|
223
|
+
raise ValueError("Cache missing gradient columns (outdated).")
|
|
224
|
+
|
|
225
|
+
def compute_landscape_data() -> pl.DataFrame:
|
|
226
|
+
all_dfs = []
|
|
227
|
+
# --- Load Augmentation Data (Inside Cache Block) ---
|
|
228
|
+
if augment_dat and augment_con and ref_atoms and prod_atoms:
|
|
229
|
+
log.info(f"Loading augmentation data for cache: {augment_dat}")
|
|
230
|
+
df_aug = load_augmenting_neb_data(
|
|
231
|
+
augment_dat,
|
|
232
|
+
augment_con,
|
|
233
|
+
ref_atoms=ref_atoms,
|
|
234
|
+
prod_atoms=prod_atoms,
|
|
235
|
+
y_data_column=y_data_column,
|
|
236
|
+
ira_kmax=ira_kmax,
|
|
237
|
+
)
|
|
238
|
+
if not df_aug.is_empty():
|
|
239
|
+
all_dfs.append(df_aug)
|
|
240
|
+
|
|
241
|
+
# Synchronization check
|
|
242
|
+
paths_dat = all_dat_paths
|
|
243
|
+
paths_con = all_con_paths
|
|
244
|
+
if len(paths_dat) != len(paths_con):
|
|
245
|
+
log.warning(f"Mismatch: {len(paths_dat)} dat vs {len(paths_con)} con.")
|
|
246
|
+
min_len = min(len(paths_dat), len(paths_con))
|
|
247
|
+
paths_dat = paths_dat[:min_len]
|
|
248
|
+
paths_con = paths_con[:min_len]
|
|
249
|
+
|
|
250
|
+
for step_idx, (dat_file, con_file_step) in enumerate(
|
|
251
|
+
zip(paths_dat, paths_con, strict=True)
|
|
252
|
+
):
|
|
253
|
+
try:
|
|
254
|
+
df_step = _process_single_path_step(
|
|
255
|
+
dat_file,
|
|
256
|
+
con_file_step,
|
|
257
|
+
y_data_column,
|
|
258
|
+
ira_instance,
|
|
259
|
+
ira_kmax,
|
|
260
|
+
step_idx,
|
|
261
|
+
)
|
|
262
|
+
all_dfs.append(df_step)
|
|
263
|
+
except Exception as e:
|
|
264
|
+
log.warning(f"Failed to process step {step_idx} ({dat_file.name}): {e}")
|
|
265
|
+
continue
|
|
266
|
+
|
|
267
|
+
if not all_dfs:
|
|
268
|
+
rerr = "No data could be aggregated."
|
|
269
|
+
raise RuntimeError(rerr)
|
|
270
|
+
|
|
271
|
+
return pl.concat(all_dfs)
|
|
272
|
+
|
|
273
|
+
return load_or_compute_data(
|
|
274
|
+
cache_file=cache_file,
|
|
275
|
+
force_recompute=force_recompute,
|
|
276
|
+
validation_check=validate_landscape_cache,
|
|
277
|
+
computation_callback=compute_landscape_data,
|
|
278
|
+
context_name="Landscape",
|
|
279
|
+
)
|
|
280
|
+
|
|
281
|
+
|
|
282
|
+
def load_augmenting_neb_data(
|
|
283
|
+
dat_pattern: str,
|
|
284
|
+
con_pattern: str,
|
|
285
|
+
ref_atoms: Atoms,
|
|
286
|
+
prod_atoms: Atoms,
|
|
287
|
+
y_data_column: int,
|
|
288
|
+
ira_kmax: float,
|
|
289
|
+
) -> pl.DataFrame:
|
|
290
|
+
"""
|
|
291
|
+
Loads external NEB paths (dat+con) to augment the landscape fit.
|
|
292
|
+
Forces projection onto the MAIN path's R/P coordinates.
|
|
293
|
+
"""
|
|
294
|
+
from chemparseplot.parse.file_ import find_file_paths
|
|
295
|
+
|
|
296
|
+
dat_paths = find_file_paths(dat_pattern)
|
|
297
|
+
con_paths = find_file_paths(con_pattern)
|
|
298
|
+
|
|
299
|
+
if not dat_paths or not con_paths:
|
|
300
|
+
log.warning("Augmentation patterns did not match files.")
|
|
301
|
+
return pl.DataFrame()
|
|
302
|
+
|
|
303
|
+
# Sync lengths
|
|
304
|
+
min_len = min(len(dat_paths), len(con_paths))
|
|
305
|
+
dat_paths = dat_paths[:min_len]
|
|
306
|
+
con_paths = con_paths[:min_len]
|
|
307
|
+
|
|
308
|
+
log.info(f"Augmenting with {min_len} external paths...")
|
|
309
|
+
|
|
310
|
+
all_dfs = []
|
|
311
|
+
ira_instance = ira_mod.IRA() if ira_mod else None
|
|
312
|
+
|
|
313
|
+
for i, (d, c) in enumerate(zip(dat_paths, con_paths)):
|
|
314
|
+
try:
|
|
315
|
+
# Step -1 indicates 'background/augmented' data
|
|
316
|
+
df = _process_single_path_step(
|
|
317
|
+
d,
|
|
318
|
+
c,
|
|
319
|
+
y_data_column,
|
|
320
|
+
ira_instance,
|
|
321
|
+
ira_kmax,
|
|
322
|
+
-1,
|
|
323
|
+
ref_atoms=ref_atoms,
|
|
324
|
+
prod_atoms=prod_atoms,
|
|
325
|
+
)
|
|
326
|
+
all_dfs.append(df)
|
|
327
|
+
except Exception as e:
|
|
328
|
+
log.warning(f"Failed to load augmentation pair {d.name}: {e}")
|
|
329
|
+
|
|
330
|
+
return pl.concat(all_dfs) if all_dfs else pl.DataFrame()
|
|
331
|
+
|
|
332
|
+
|
|
333
|
+
def compute_profile_rmsd(
|
|
334
|
+
atoms_list: list[Atoms],
|
|
335
|
+
cache_file: Path | None,
|
|
336
|
+
force_recompute: bool,
|
|
337
|
+
ira_kmax: float,
|
|
338
|
+
) -> pl.DataFrame:
|
|
339
|
+
"""Computes RMSD for a 1D profile."""
|
|
340
|
+
|
|
341
|
+
def validate_profile_cache(df: pl.DataFrame):
|
|
342
|
+
if "p" in df.columns:
|
|
343
|
+
raise ValueError("Cache contains 'p' column (looks like landscape data).")
|
|
344
|
+
if df.height != len(atoms_list):
|
|
345
|
+
raise ValueError(
|
|
346
|
+
f"Size mismatch: {df.height} vs {len(atoms_list)} structures."
|
|
347
|
+
)
|
|
348
|
+
|
|
349
|
+
def compute_data() -> pl.DataFrame:
|
|
350
|
+
ira_instance = ira_mod.IRA() if ira_mod else None
|
|
351
|
+
r_vals = calculate_rmsd_from_ref(
|
|
352
|
+
atoms_list, ira_instance, ref_atom=atoms_list[0], ira_kmax=ira_kmax
|
|
353
|
+
)
|
|
354
|
+
return pl.DataFrame({"r": r_vals})
|
|
355
|
+
|
|
356
|
+
return load_or_compute_data(
|
|
357
|
+
cache_file=cache_file,
|
|
358
|
+
force_recompute=force_recompute,
|
|
359
|
+
validation_check=validate_profile_cache,
|
|
360
|
+
computation_callback=compute_data,
|
|
361
|
+
context_name="Profile RMSD",
|
|
362
|
+
)
|
|
363
|
+
|
|
364
|
+
|
|
365
|
+
def estimate_rbf_smoothing(df: pl.DataFrame) -> float:
|
|
366
|
+
"""
|
|
367
|
+
Estimates a smoothing parameter for RBF interpolation.
|
|
368
|
+
|
|
369
|
+
Calculates the median Euclidean distance between sequential points in the path
|
|
370
|
+
and uses that value as the smoothing factor.
|
|
371
|
+
"""
|
|
372
|
+
# Calculate distances between sequential images (r, p) within each step
|
|
373
|
+
df_dist = (
|
|
374
|
+
df.sort(["step", "r"])
|
|
375
|
+
.with_columns(
|
|
376
|
+
dr=pl.col("r").diff().over("step"),
|
|
377
|
+
dp=pl.col("p").diff().over("step"),
|
|
378
|
+
)
|
|
379
|
+
.with_columns(dist=(pl.col("dr") ** 2 + pl.col("dp") ** 2).sqrt())
|
|
380
|
+
.drop_nulls()
|
|
381
|
+
)
|
|
382
|
+
|
|
383
|
+
global_median_step = df_dist["dist"].median()
|
|
384
|
+
|
|
385
|
+
if global_median_step is None or global_median_step == 0:
|
|
386
|
+
return 0.0
|
|
387
|
+
|
|
388
|
+
return global_median_step
|
|
@@ -10,7 +10,7 @@ from rgpycrumbs.parsers.bless import BLESS_LOG
|
|
|
10
10
|
from rgpycrumbs.parsers.common import _NUM
|
|
11
11
|
from rgpycrumbs.search.helpers import tail
|
|
12
12
|
|
|
13
|
-
from
|
|
13
|
+
from rgpycrumbs.basetypes import DimerOpt, MolGeom, SaddleMeasure, SpinID
|
|
14
14
|
|
|
15
15
|
|
|
16
16
|
class EONSaddleStatus(Enum):
|
|
@@ -72,7 +72,7 @@ def _read_results_dat(eresp: Path) -> dict:
|
|
|
72
72
|
"""Reads and parses the results.dat file.
|
|
73
73
|
|
|
74
74
|
Args:
|
|
75
|
-
eresp: Path to the
|
|
75
|
+
eresp: Path to the eOn results directory.
|
|
76
76
|
|
|
77
77
|
Returns:
|
|
78
78
|
A dictionary containing the parsed data from results.dat, or None if the file
|
|
@@ -103,10 +103,10 @@ def _read_results_dat(eresp: Path) -> dict:
|
|
|
103
103
|
|
|
104
104
|
|
|
105
105
|
def _find_log_file(eresp: Path) -> Path | None:
|
|
106
|
-
"""Finds the most recent, valid log file within the
|
|
106
|
+
"""Finds the most recent, valid log file within the eOn results directory.
|
|
107
107
|
|
|
108
108
|
Args:
|
|
109
|
-
eresp: Path to the
|
|
109
|
+
eresp: Path to the eOn results directory.
|
|
110
110
|
|
|
111
111
|
Returns:
|
|
112
112
|
Path to the chosen log file, or None if no suitable log file is found.
|
|
@@ -182,7 +182,7 @@ def _extract_saddle_info(
|
|
|
182
182
|
|
|
183
183
|
Args:
|
|
184
184
|
log_data: A list of strings representing the lines of the log file.
|
|
185
|
-
eresp: Path to the
|
|
185
|
+
eresp: Path to the eOn results directory.
|
|
186
186
|
is_gprd: Boolean flag indicating whether the GPRD method was used.
|
|
187
187
|
|
|
188
188
|
Returns:
|
|
@@ -206,12 +206,14 @@ def _extract_saddle_info(
|
|
|
206
206
|
saddle_fmax = np.abs(np.max(saddle.forces))
|
|
207
207
|
elif not is_gprd:
|
|
208
208
|
try:
|
|
209
|
+
# Expected header: Step, Step Size, Delta E, ||Force||
|
|
210
|
+
# ||Force|| is the 5th element (index 4)
|
|
209
211
|
saddle_fmax = float(
|
|
210
212
|
(eresp / "client_spdlog.log")
|
|
211
213
|
.read_text()
|
|
212
214
|
.strip()
|
|
213
215
|
.split("\n")[-5:][0]
|
|
214
|
-
.split()[
|
|
216
|
+
.split()[4]
|
|
215
217
|
)
|
|
216
218
|
except (FileNotFoundError, IndexError):
|
|
217
219
|
saddle_fmax = 0.0
|
|
@@ -243,10 +245,10 @@ def _get_methods(eresp: Path) -> DimerOpt:
|
|
|
243
245
|
|
|
244
246
|
|
|
245
247
|
def parse_eon_saddle(eresp: Path, rloc: "SpinID") -> "SaddleMeasure":
|
|
246
|
-
"""Parses
|
|
248
|
+
"""Parses eOn saddle point search results from a directory.
|
|
247
249
|
|
|
248
250
|
Args:
|
|
249
|
-
eresp: Path to the directory containing
|
|
251
|
+
eresp: Path to the directory containing eOn results.
|
|
250
252
|
rloc: A SpinID object.
|
|
251
253
|
|
|
252
254
|
Returns:
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import glob
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
log = logging.getLogger(__name__)
|
|
6
|
+
|
|
7
|
+
def find_file_paths(file_pattern: str) -> list[Path]:
|
|
8
|
+
"""Finds and sorts files matching a glob pattern."""
|
|
9
|
+
log.info(f"Searching for files with pattern: '{file_pattern}'")
|
|
10
|
+
file_paths = sorted(Path(p) for p in glob.glob(file_pattern))
|
|
11
|
+
log.info(f"Found {len(file_paths)} file(s).")
|
|
12
|
+
return file_paths
|
|
@@ -14,7 +14,7 @@ import re
|
|
|
14
14
|
|
|
15
15
|
import chemparseplot.parse.converter as conv
|
|
16
16
|
import chemparseplot.parse.patterns as pat
|
|
17
|
-
from
|
|
17
|
+
from rgpycrumbs.basetypes import nebiter, nebpath
|
|
18
18
|
from chemparseplot.units import Q_
|
|
19
19
|
|
|
20
20
|
# fmt: off
|