reaxkit 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- reaxkit/__init__.py +0 -0
- reaxkit/analysis/__init__.py +0 -0
- reaxkit/analysis/composed/RDF_analyzer.py +560 -0
- reaxkit/analysis/composed/__init__.py +0 -0
- reaxkit/analysis/composed/connectivity_analyzer.py +706 -0
- reaxkit/analysis/composed/coordination_analyzer.py +144 -0
- reaxkit/analysis/composed/electrostatics_analyzer.py +687 -0
- reaxkit/analysis/per_file/__init__.py +0 -0
- reaxkit/analysis/per_file/control_analyzer.py +165 -0
- reaxkit/analysis/per_file/eregime_analyzer.py +108 -0
- reaxkit/analysis/per_file/ffield_analyzer.py +305 -0
- reaxkit/analysis/per_file/fort13_analyzer.py +79 -0
- reaxkit/analysis/per_file/fort57_analyzer.py +106 -0
- reaxkit/analysis/per_file/fort73_analyzer.py +61 -0
- reaxkit/analysis/per_file/fort74_analyzer.py +65 -0
- reaxkit/analysis/per_file/fort76_analyzer.py +191 -0
- reaxkit/analysis/per_file/fort78_analyzer.py +154 -0
- reaxkit/analysis/per_file/fort79_analyzer.py +83 -0
- reaxkit/analysis/per_file/fort7_analyzer.py +393 -0
- reaxkit/analysis/per_file/fort99_analyzer.py +411 -0
- reaxkit/analysis/per_file/molfra_analyzer.py +359 -0
- reaxkit/analysis/per_file/params_analyzer.py +258 -0
- reaxkit/analysis/per_file/summary_analyzer.py +84 -0
- reaxkit/analysis/per_file/trainset_analyzer.py +84 -0
- reaxkit/analysis/per_file/vels_analyzer.py +95 -0
- reaxkit/analysis/per_file/xmolout_analyzer.py +528 -0
- reaxkit/cli.py +181 -0
- reaxkit/count_loc.py +276 -0
- reaxkit/data/alias.yaml +89 -0
- reaxkit/data/constants.yaml +27 -0
- reaxkit/data/reaxff_input_files_contents.yaml +186 -0
- reaxkit/data/reaxff_output_files_contents.yaml +301 -0
- reaxkit/data/units.yaml +38 -0
- reaxkit/help/__init__.py +0 -0
- reaxkit/help/help_index_loader.py +531 -0
- reaxkit/help/introspection_utils.py +131 -0
- reaxkit/io/__init__.py +0 -0
- reaxkit/io/base_handler.py +165 -0
- reaxkit/io/generators/__init__.py +0 -0
- reaxkit/io/generators/control_generator.py +123 -0
- reaxkit/io/generators/eregime_generator.py +341 -0
- reaxkit/io/generators/geo_generator.py +967 -0
- reaxkit/io/generators/trainset_generator.py +1758 -0
- reaxkit/io/generators/tregime_generator.py +113 -0
- reaxkit/io/generators/vregime_generator.py +164 -0
- reaxkit/io/generators/xmolout_generator.py +304 -0
- reaxkit/io/handlers/__init__.py +0 -0
- reaxkit/io/handlers/control_handler.py +209 -0
- reaxkit/io/handlers/eregime_handler.py +122 -0
- reaxkit/io/handlers/ffield_handler.py +812 -0
- reaxkit/io/handlers/fort13_handler.py +123 -0
- reaxkit/io/handlers/fort57_handler.py +143 -0
- reaxkit/io/handlers/fort73_handler.py +145 -0
- reaxkit/io/handlers/fort74_handler.py +155 -0
- reaxkit/io/handlers/fort76_handler.py +195 -0
- reaxkit/io/handlers/fort78_handler.py +142 -0
- reaxkit/io/handlers/fort79_handler.py +227 -0
- reaxkit/io/handlers/fort7_handler.py +264 -0
- reaxkit/io/handlers/fort99_handler.py +128 -0
- reaxkit/io/handlers/geo_handler.py +224 -0
- reaxkit/io/handlers/molfra_handler.py +184 -0
- reaxkit/io/handlers/params_handler.py +137 -0
- reaxkit/io/handlers/summary_handler.py +135 -0
- reaxkit/io/handlers/trainset_handler.py +658 -0
- reaxkit/io/handlers/vels_handler.py +293 -0
- reaxkit/io/handlers/xmolout_handler.py +174 -0
- reaxkit/utils/__init__.py +0 -0
- reaxkit/utils/alias.py +219 -0
- reaxkit/utils/cache.py +77 -0
- reaxkit/utils/constants.py +75 -0
- reaxkit/utils/equation_of_states.py +96 -0
- reaxkit/utils/exceptions.py +27 -0
- reaxkit/utils/frame_utils.py +175 -0
- reaxkit/utils/log.py +43 -0
- reaxkit/utils/media/__init__.py +0 -0
- reaxkit/utils/media/convert.py +90 -0
- reaxkit/utils/media/make_video.py +91 -0
- reaxkit/utils/media/plotter.py +812 -0
- reaxkit/utils/numerical/__init__.py +0 -0
- reaxkit/utils/numerical/extrema_finder.py +96 -0
- reaxkit/utils/numerical/moving_average.py +103 -0
- reaxkit/utils/numerical/numerical_calcs.py +75 -0
- reaxkit/utils/numerical/signal_ops.py +135 -0
- reaxkit/utils/path.py +55 -0
- reaxkit/utils/units.py +104 -0
- reaxkit/webui/__init__.py +0 -0
- reaxkit/webui/app.py +0 -0
- reaxkit/webui/components.py +0 -0
- reaxkit/webui/layouts.py +0 -0
- reaxkit/webui/utils.py +0 -0
- reaxkit/workflows/__init__.py +0 -0
- reaxkit/workflows/composed/__init__.py +0 -0
- reaxkit/workflows/composed/coordination_workflow.py +393 -0
- reaxkit/workflows/composed/electrostatics_workflow.py +587 -0
- reaxkit/workflows/composed/xmolout_fort7_workflow.py +343 -0
- reaxkit/workflows/meta/__init__.py +0 -0
- reaxkit/workflows/meta/help_workflow.py +136 -0
- reaxkit/workflows/meta/introspection_workflow.py +235 -0
- reaxkit/workflows/meta/make_video_workflow.py +61 -0
- reaxkit/workflows/meta/plotter_workflow.py +601 -0
- reaxkit/workflows/per_file/__init__.py +0 -0
- reaxkit/workflows/per_file/control_workflow.py +110 -0
- reaxkit/workflows/per_file/eregime_workflow.py +267 -0
- reaxkit/workflows/per_file/ffield_workflow.py +390 -0
- reaxkit/workflows/per_file/fort13_workflow.py +86 -0
- reaxkit/workflows/per_file/fort57_workflow.py +137 -0
- reaxkit/workflows/per_file/fort73_workflow.py +151 -0
- reaxkit/workflows/per_file/fort74_workflow.py +88 -0
- reaxkit/workflows/per_file/fort76_workflow.py +188 -0
- reaxkit/workflows/per_file/fort78_workflow.py +135 -0
- reaxkit/workflows/per_file/fort79_workflow.py +314 -0
- reaxkit/workflows/per_file/fort7_workflow.py +592 -0
- reaxkit/workflows/per_file/fort83_workflow.py +60 -0
- reaxkit/workflows/per_file/fort99_workflow.py +223 -0
- reaxkit/workflows/per_file/geo_workflow.py +554 -0
- reaxkit/workflows/per_file/molfra_workflow.py +577 -0
- reaxkit/workflows/per_file/params_workflow.py +135 -0
- reaxkit/workflows/per_file/summary_workflow.py +161 -0
- reaxkit/workflows/per_file/trainset_workflow.py +356 -0
- reaxkit/workflows/per_file/tregime_workflow.py +79 -0
- reaxkit/workflows/per_file/vels_workflow.py +309 -0
- reaxkit/workflows/per_file/vregime_workflow.py +75 -0
- reaxkit/workflows/per_file/xmolout_workflow.py +678 -0
- reaxkit-1.0.0.dist-info/METADATA +128 -0
- reaxkit-1.0.0.dist-info/RECORD +130 -0
- reaxkit-1.0.0.dist-info/WHEEL +5 -0
- reaxkit-1.0.0.dist-info/entry_points.txt +2 -0
- reaxkit-1.0.0.dist-info/licenses/AUTHORS.md +20 -0
- reaxkit-1.0.0.dist-info/licenses/LICENSE +21 -0
- reaxkit-1.0.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,293 @@
|
|
|
1
|
+
"""
|
|
2
|
+
ReaxFF atomic velocities and accelerations (vels / moldyn.vel / molsav) handler.
|
|
3
|
+
|
|
4
|
+
This module provides a handler for parsing ReaxFF velocity-related output
|
|
5
|
+
files, which store per-atom coordinates, velocities, accelerations, and
|
|
6
|
+
optional lattice and temperature information for a single MD step.
|
|
7
|
+
|
|
8
|
+
Typical use cases include:
|
|
9
|
+
|
|
10
|
+
- extracting atomic velocities or accelerations for analysis
|
|
11
|
+
- correlating kinematics with structural or energetic data
|
|
12
|
+
- visualizing velocity and acceleration fields
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
|
|
18
|
+
from pathlib import Path
|
|
19
|
+
from typing import Any, Dict, Tuple
|
|
20
|
+
|
|
21
|
+
import pandas as pd
|
|
22
|
+
|
|
23
|
+
from reaxkit.io.base_handler import BaseHandler
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class VelsHandler(BaseHandler):
|
|
27
|
+
"""
|
|
28
|
+
Parser for ReaxFF atomic kinematics output files
|
|
29
|
+
(``vels``, ``moldyn.vel``, ``molsav``).
|
|
30
|
+
|
|
31
|
+
This class parses velocity-style ReaxFF outputs and exposes atomic
|
|
32
|
+
coordinates, velocities, accelerations, and related metadata as
|
|
33
|
+
section-specific tables.
|
|
34
|
+
|
|
35
|
+
Parsed Data
|
|
36
|
+
-----------
|
|
37
|
+
Summary table
|
|
38
|
+
The main ``dataframe()`` is intentionally empty.
|
|
39
|
+
All meaningful data is stored in section tables.
|
|
40
|
+
|
|
41
|
+
Section tables
|
|
42
|
+
Accessible via ``sections`` or ``section_df(name)``, with one
|
|
43
|
+
table per section:
|
|
44
|
+
|
|
45
|
+
- ``Atom coordinates``:
|
|
46
|
+
One row per atom, with columns:
|
|
47
|
+
["atom_index", "x", "y", "z", "symbol"]
|
|
48
|
+
|
|
49
|
+
- ``Atom velocities``:
|
|
50
|
+
One row per atom, with columns:
|
|
51
|
+
["atom_index", "vx", "vy", "vz"]
|
|
52
|
+
|
|
53
|
+
- ``Atom accelerations``:
|
|
54
|
+
One row per atom, with columns:
|
|
55
|
+
["atom_index", "ax", "ay", "az"]
|
|
56
|
+
|
|
57
|
+
- ``Previous atom accelerations``:
|
|
58
|
+
One row per atom, with columns:
|
|
59
|
+
["atom_index", "ax", "ay", "az"]
|
|
60
|
+
(empty if not present in the file)
|
|
61
|
+
|
|
62
|
+
Metadata
|
|
63
|
+
Returned by ``metadata()``, containing (when available):
|
|
64
|
+
{
|
|
65
|
+
"lattice_parameters": {
|
|
66
|
+
"a": float, "b": float, "c": float,
|
|
67
|
+
"alpha": float, "beta": float, "gamma": float
|
|
68
|
+
},
|
|
69
|
+
"md_temperature_K": float
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
Notes
|
|
73
|
+
-----
|
|
74
|
+
- All numeric parsing supports Fortran ``D``/``d`` exponents.
|
|
75
|
+
- The number of atoms is inferred from the ``Atom coordinates`` header
|
|
76
|
+
and reused for all subsequent sections.
|
|
77
|
+
- Sections encountered out of order or with truncated data raise
|
|
78
|
+
explicit parsing errors.
|
|
79
|
+
- This handler represents a single MD snapshot and is not frame-based;
|
|
80
|
+
``n_frames()`` always returns 0.
|
|
81
|
+
"""
|
|
82
|
+
SECTION_COORDS = "Atom coordinates"
|
|
83
|
+
SECTION_VELS = "Atom velocities"
|
|
84
|
+
SECTION_ACCELS = "Atom accelerations"
|
|
85
|
+
SECTION_PREV_ACCELS = "Previous atom accelerations"
|
|
86
|
+
|
|
87
|
+
def __init__(self, file_path: str | Path = "vels") -> None:
|
|
88
|
+
super().__init__(file_path)
|
|
89
|
+
self._sections: Dict[str, pd.DataFrame] = {}
|
|
90
|
+
|
|
91
|
+
@property
|
|
92
|
+
def sections(self) -> Dict[str, pd.DataFrame]:
|
|
93
|
+
if not self._parsed:
|
|
94
|
+
self.parse()
|
|
95
|
+
return self._sections
|
|
96
|
+
|
|
97
|
+
def section_df(self, name: str) -> pd.DataFrame:
|
|
98
|
+
if not self._parsed:
|
|
99
|
+
self.parse()
|
|
100
|
+
return self._sections[name]
|
|
101
|
+
|
|
102
|
+
def _parse(self) -> Tuple[pd.DataFrame, Dict[str, Any]]:
|
|
103
|
+
lines = self.path.read_text().splitlines()
|
|
104
|
+
meta: Dict[str, Any] = {}
|
|
105
|
+
sections: Dict[str, pd.DataFrame] = {}
|
|
106
|
+
|
|
107
|
+
def next_nonempty(idx: int) -> int:
|
|
108
|
+
while idx < len(lines) and not lines[idx].strip():
|
|
109
|
+
idx += 1
|
|
110
|
+
return idx
|
|
111
|
+
|
|
112
|
+
def floats_from_line(s: str, n: int) -> list[float]:
|
|
113
|
+
# IMPORTANT: handle Fortran D exponents like 0.20D+01
|
|
114
|
+
s = s.replace("D", "E").replace("d", "E")
|
|
115
|
+
out: list[float] = []
|
|
116
|
+
for tok in s.replace(",", " ").split():
|
|
117
|
+
tok2 = tok.replace("D", "E").replace("d", "E")
|
|
118
|
+
try:
|
|
119
|
+
out.append(float(tok2))
|
|
120
|
+
except ValueError:
|
|
121
|
+
pass
|
|
122
|
+
if len(out) == n:
|
|
123
|
+
break
|
|
124
|
+
return out
|
|
125
|
+
|
|
126
|
+
def first_int_in_line(s: str) -> int | None:
|
|
127
|
+
for tok in s.split():
|
|
128
|
+
try:
|
|
129
|
+
return int(tok)
|
|
130
|
+
except ValueError:
|
|
131
|
+
continue
|
|
132
|
+
return None
|
|
133
|
+
|
|
134
|
+
def parse_lattice(idx: int) -> tuple[dict[str, float], int]:
|
|
135
|
+
idx = next_nonempty(idx)
|
|
136
|
+
abc = floats_from_line(lines[idx], 3)
|
|
137
|
+
|
|
138
|
+
idx += 1
|
|
139
|
+
idx = next_nonempty(idx)
|
|
140
|
+
ang = floats_from_line(lines[idx], 3)
|
|
141
|
+
|
|
142
|
+
if len(abc) != 3 or len(ang) != 3:
|
|
143
|
+
raise ValueError("Could not parse lattice parameters (expected two lines: 3 + 3 floats).")
|
|
144
|
+
|
|
145
|
+
lat = {"a": abc[0], "b": abc[1], "c": abc[2], "alpha": ang[0], "beta": ang[1], "gamma": ang[2]}
|
|
146
|
+
return lat, idx + 1
|
|
147
|
+
|
|
148
|
+
def parse_coords(idx: int, n_atoms: int, section_name: str) -> tuple[pd.DataFrame, int]:
|
|
149
|
+
idx = next_nonempty(idx)
|
|
150
|
+
rows = []
|
|
151
|
+
for a in range(1, n_atoms + 1):
|
|
152
|
+
idx = next_nonempty(idx)
|
|
153
|
+
if idx >= len(lines):
|
|
154
|
+
raise ValueError(
|
|
155
|
+
f"[vels] Truncated section: '{section_name}'. "
|
|
156
|
+
f"Expected {n_atoms} atom lines, but file ended at atom {a - 1}."
|
|
157
|
+
)
|
|
158
|
+
|
|
159
|
+
s = lines[idx].strip()
|
|
160
|
+
low = s.lower()
|
|
161
|
+
if (
|
|
162
|
+
"md-temperature" in low
|
|
163
|
+
or "atom velocities" in low
|
|
164
|
+
or ("atom accelerations" in low)
|
|
165
|
+
or "lattice parameters" in low
|
|
166
|
+
):
|
|
167
|
+
raise ValueError(
|
|
168
|
+
f"[vels] Truncated section: '{section_name}'. "
|
|
169
|
+
f"Expected {n_atoms} atom lines, but only found {a - 1}. "
|
|
170
|
+
f"Next header encountered early at line {idx + 1}: {s!r}"
|
|
171
|
+
)
|
|
172
|
+
|
|
173
|
+
xyz = floats_from_line(s, 3)
|
|
174
|
+
if len(xyz) != 3:
|
|
175
|
+
raise ValueError(
|
|
176
|
+
f"[vels] Bad numeric line in section '{section_name}' at atom {a}. "
|
|
177
|
+
f"Line {idx + 1}: {s!r}"
|
|
178
|
+
)
|
|
179
|
+
|
|
180
|
+
symbol = s.split()[-1] if s.split() else ""
|
|
181
|
+
rows.append({"atom_index": a, "x": xyz[0], "y": xyz[1], "z": xyz[2], "symbol": symbol})
|
|
182
|
+
idx += 1
|
|
183
|
+
|
|
184
|
+
return pd.DataFrame(rows), idx
|
|
185
|
+
|
|
186
|
+
def parse_xyz3(idx: int, n_atoms: int, c1: str, c2: str, c3: str, section_name: str) -> tuple[
|
|
187
|
+
pd.DataFrame, int]:
|
|
188
|
+
idx = next_nonempty(idx)
|
|
189
|
+
rows = []
|
|
190
|
+
|
|
191
|
+
for a in range(1, n_atoms + 1):
|
|
192
|
+
idx = next_nonempty(idx)
|
|
193
|
+
if idx >= len(lines):
|
|
194
|
+
raise ValueError(
|
|
195
|
+
f"[vels] Truncated section: '{section_name}'. "
|
|
196
|
+
f"Expected {n_atoms} atom lines, but file ended at atom {a - 1}."
|
|
197
|
+
)
|
|
198
|
+
|
|
199
|
+
s = lines[idx].strip()
|
|
200
|
+
low = s.lower()
|
|
201
|
+
|
|
202
|
+
# If we accidentally hit the next header, the section is shortened/truncated.
|
|
203
|
+
if (
|
|
204
|
+
"md-temperature" in low
|
|
205
|
+
or "atom coordinates" in low
|
|
206
|
+
or "atom velocities" in low
|
|
207
|
+
or ("atom accelerations" in low)
|
|
208
|
+
or "lattice parameters" in low
|
|
209
|
+
):
|
|
210
|
+
raise ValueError(
|
|
211
|
+
f"[vels] Truncated section: '{section_name}'. "
|
|
212
|
+
f"Expected {n_atoms} atom lines, but only found {a - 1}. "
|
|
213
|
+
f"Next header encountered early at line {idx + 1}: {s!r}"
|
|
214
|
+
)
|
|
215
|
+
|
|
216
|
+
v = floats_from_line(s, 3)
|
|
217
|
+
if len(v) != 3:
|
|
218
|
+
raise ValueError(
|
|
219
|
+
f"[vels] Bad numeric line in section '{section_name}' at atom {a}. "
|
|
220
|
+
f"Line {idx + 1}: {s!r}"
|
|
221
|
+
)
|
|
222
|
+
|
|
223
|
+
rows.append({"atom_index": a, c1: v[0], c2: v[1], c3: v[2]})
|
|
224
|
+
idx += 1
|
|
225
|
+
|
|
226
|
+
return pd.DataFrame(rows), idx
|
|
227
|
+
|
|
228
|
+
def parse_temperature(idx: int) -> tuple[float, int]:
|
|
229
|
+
idx = next_nonempty(idx)
|
|
230
|
+
v = floats_from_line(lines[idx], 1)
|
|
231
|
+
if not v:
|
|
232
|
+
raise ValueError(f"Could not parse MD-temperature from line: {lines[idx]!r}")
|
|
233
|
+
return float(v[0]), idx + 1
|
|
234
|
+
|
|
235
|
+
i = 0
|
|
236
|
+
n_atoms: int | None = None
|
|
237
|
+
prev_acc_present = False
|
|
238
|
+
|
|
239
|
+
while i < len(lines):
|
|
240
|
+
s = lines[i].strip()
|
|
241
|
+
low = s.lower()
|
|
242
|
+
|
|
243
|
+
if not s:
|
|
244
|
+
i += 1
|
|
245
|
+
continue
|
|
246
|
+
|
|
247
|
+
if "lattice parameters" in low:
|
|
248
|
+
lat, i = parse_lattice(i + 1)
|
|
249
|
+
meta["lattice_parameters"] = lat
|
|
250
|
+
continue
|
|
251
|
+
|
|
252
|
+
if "atom coordinates" in low:
|
|
253
|
+
n_atoms = first_int_in_line(s)
|
|
254
|
+
if n_atoms is None:
|
|
255
|
+
raise ValueError("Could not read number of atoms from 'Atom coordinates' header.")
|
|
256
|
+
df, i = parse_coords(i + 1, n_atoms, self.SECTION_COORDS)
|
|
257
|
+
sections[self.SECTION_COORDS] = df
|
|
258
|
+
continue
|
|
259
|
+
|
|
260
|
+
if "atom velocities" in low:
|
|
261
|
+
if n_atoms is None:
|
|
262
|
+
raise ValueError("Found velocities before coordinates; cannot infer number of atoms.")
|
|
263
|
+
df, i = parse_xyz3(i + 1, n_atoms, "vx", "vy", "vz", self.SECTION_VELS)
|
|
264
|
+
sections[self.SECTION_VELS] = df
|
|
265
|
+
continue
|
|
266
|
+
|
|
267
|
+
if "atom accelerations" in low and "previous" not in low:
|
|
268
|
+
if n_atoms is None:
|
|
269
|
+
raise ValueError("Found accelerations before coordinates; cannot infer number of atoms.")
|
|
270
|
+
df, i = parse_xyz3(i + 1, n_atoms, "ax", "ay", "az", self.SECTION_ACCELS)
|
|
271
|
+
sections[self.SECTION_ACCELS] = df
|
|
272
|
+
continue
|
|
273
|
+
|
|
274
|
+
if "previous atom accelerations" in low:
|
|
275
|
+
if n_atoms is None:
|
|
276
|
+
raise ValueError("Found previous accelerations before coordinates; cannot infer number of atoms.")
|
|
277
|
+
df, i = parse_xyz3(i + 1, n_atoms, "ax", "ay", "az", self.SECTION_PREV_ACCELS)
|
|
278
|
+
sections[self.SECTION_PREV_ACCELS] = df
|
|
279
|
+
prev_acc_present = True
|
|
280
|
+
continue
|
|
281
|
+
|
|
282
|
+
if "md-temperature" in low or "md temperature" in low:
|
|
283
|
+
t, i = parse_temperature(i + 1)
|
|
284
|
+
meta["md_temperature_K"] = t
|
|
285
|
+
continue
|
|
286
|
+
|
|
287
|
+
i += 1
|
|
288
|
+
|
|
289
|
+
if not prev_acc_present:
|
|
290
|
+
sections[self.SECTION_PREV_ACCELS] = pd.DataFrame(columns=["atom_index", "ax", "ay", "az"])
|
|
291
|
+
|
|
292
|
+
self._sections = sections
|
|
293
|
+
return pd.DataFrame(), meta
|
|
@@ -0,0 +1,174 @@
|
|
|
1
|
+
"""
|
|
2
|
+
ReaxFF trajectory output (xmolout) handler.
|
|
3
|
+
|
|
4
|
+
This module provides a handler for parsing ReaxFF ``xmolout`` files,
|
|
5
|
+
which store atomic trajectories from MD runs or MM minimizations.
|
|
6
|
+
|
|
7
|
+
``xmolout`` files contain repeated coordinate frames with associated
|
|
8
|
+
cell parameters and energies and are commonly used for visualization
|
|
9
|
+
and structural analysis.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
from pathlib import Path
|
|
15
|
+
from typing import List, Optional, Iterator, Dict, Any
|
|
16
|
+
import pandas as pd
|
|
17
|
+
from reaxkit.io.base_handler import BaseHandler
|
|
18
|
+
|
|
19
|
+
class XmoloutHandler(BaseHandler):
|
|
20
|
+
"""
|
|
21
|
+
Parser for ReaxFF trajectory output files (``xmolout``).
|
|
22
|
+
|
|
23
|
+
This class parses ``xmolout`` files and exposes both a per-frame
|
|
24
|
+
summary table and per-frame atomic coordinate tables.
|
|
25
|
+
|
|
26
|
+
Parsed Data
|
|
27
|
+
-----------
|
|
28
|
+
Summary table
|
|
29
|
+
One row per frame, returned by ``dataframe()``, with columns:
|
|
30
|
+
["num_of_atoms", "iter", "E_pot",
|
|
31
|
+
"a", "b", "c", "alpha", "beta", "gamma"]
|
|
32
|
+
|
|
33
|
+
Duplicate iteration indices are removed by keeping the last
|
|
34
|
+
occurrence.
|
|
35
|
+
|
|
36
|
+
Per-frame atom tables
|
|
37
|
+
Stored in ``self._frames``, one table per frame, where each table
|
|
38
|
+
has at least the columns:
|
|
39
|
+
["atom_type", "x", "y", "z"]
|
|
40
|
+
|
|
41
|
+
Any additional per-atom columns present in the file are preserved
|
|
42
|
+
per frame. If their names are not provided explicitly, they are
|
|
43
|
+
auto-named as ``unknown_1``, ``unknown_2``, …
|
|
44
|
+
|
|
45
|
+
Metadata
|
|
46
|
+
Returned by ``metadata()``, containing:
|
|
47
|
+
["simulation_name", "n_atoms", "n_frames", "has_time"]
|
|
48
|
+
|
|
49
|
+
Notes
|
|
50
|
+
-----
|
|
51
|
+
- Frames are inferred from the repeating ``#atoms → header → atoms`` pattern.
|
|
52
|
+
- The number of atoms is assumed constant across all frames.
|
|
53
|
+
- This handler supports lightweight frame access via ``frame(i)``
|
|
54
|
+
and streaming access via ``iter_frames(step=...)``.
|
|
55
|
+
"""
|
|
56
|
+
|
|
57
|
+
def __init__(self, file_path: str | Path = "xmolout", *, extra_atom_cols: Optional[list[str]] = None):
|
|
58
|
+
super().__init__(file_path)
|
|
59
|
+
self._frames: List[pd.DataFrame] = [] # list of per-frame atom tables
|
|
60
|
+
self._n_atoms: Optional[int] = None
|
|
61
|
+
self.simulation_name: str = ""
|
|
62
|
+
self._extra_atom_cols = list(extra_atom_cols) if extra_atom_cols else None
|
|
63
|
+
|
|
64
|
+
# ---- FileHandler requirement
|
|
65
|
+
def _parse(self) -> tuple[pd.DataFrame, dict[str, Any]]:
|
|
66
|
+
sim_rows: List[list] = []
|
|
67
|
+
frames: List[pd.DataFrame] = []
|
|
68
|
+
|
|
69
|
+
sim_cols = ["num_of_atoms", "iter", "E_pot", "a", "b", "c", "alpha", "beta", "gamma"]
|
|
70
|
+
base_atom_cols = ["atom_type", "x", "y", "z"]
|
|
71
|
+
|
|
72
|
+
with open(self.path, "r") as fh:
|
|
73
|
+
atom_buf: List[list] = []
|
|
74
|
+
atom_count = 0
|
|
75
|
+
current_atom_cols: Optional[List[str]] = None
|
|
76
|
+
n_atoms: Optional[int] = None
|
|
77
|
+
|
|
78
|
+
for line in fh:
|
|
79
|
+
vals = line.strip().split()
|
|
80
|
+
if not vals:
|
|
81
|
+
continue
|
|
82
|
+
|
|
83
|
+
# #atoms line
|
|
84
|
+
if len(vals) == 1 and vals[0].isdigit():
|
|
85
|
+
n_atoms = int(vals[0])
|
|
86
|
+
self._n_atoms = n_atoms
|
|
87
|
+
sim_rows.append([n_atoms]) # placeholder row; will complete after header line
|
|
88
|
+
atom_buf, atom_count = [], 0
|
|
89
|
+
current_atom_cols = None
|
|
90
|
+
continue
|
|
91
|
+
|
|
92
|
+
# header line (name iter E a b c alpha beta gamma)
|
|
93
|
+
if len(vals) == 9 and self._n_atoms and vals[1].lstrip("-").isdigit():
|
|
94
|
+
if not self.simulation_name:
|
|
95
|
+
self.simulation_name = vals[0]
|
|
96
|
+
row = [self._n_atoms, int(vals[1])] + list(map(float, vals[2:]))
|
|
97
|
+
sim_rows[-1] = row
|
|
98
|
+
continue
|
|
99
|
+
|
|
100
|
+
# atom coordinates (optionally with extra columns)
|
|
101
|
+
if self._n_atoms and len(vals) >= 4:
|
|
102
|
+
# lazily determine expected columns for this frame
|
|
103
|
+
if current_atom_cols is None:
|
|
104
|
+
n_extras = max(0, len(vals) - 4)
|
|
105
|
+
if self._extra_atom_cols:
|
|
106
|
+
names = list(self._extra_atom_cols)[:n_extras]
|
|
107
|
+
if len(names) < n_extras:
|
|
108
|
+
names += [f"unknown_{i+1}" for i in range(n_extras - len(names))]
|
|
109
|
+
else:
|
|
110
|
+
names = [f"unknown_{i+1}" for i in range(n_extras)]
|
|
111
|
+
current_atom_cols = base_atom_cols + names
|
|
112
|
+
|
|
113
|
+
base = [vals[0]] + list(map(float, vals[1:4]))
|
|
114
|
+
expected_extras = len(current_atom_cols) - 4
|
|
115
|
+
extras_vals = [float(x) for x in vals[4:4+expected_extras]]
|
|
116
|
+
# pad if fewer provided
|
|
117
|
+
while len(extras_vals) < expected_extras:
|
|
118
|
+
extras_vals.append(float('nan'))
|
|
119
|
+
atom_buf.append(base + extras_vals)
|
|
120
|
+
atom_count += 1
|
|
121
|
+
|
|
122
|
+
if atom_count == self._n_atoms:
|
|
123
|
+
frames.append(pd.DataFrame(atom_buf, columns=current_atom_cols))
|
|
124
|
+
atom_buf, atom_count = [], 0
|
|
125
|
+
current_atom_cols = None
|
|
126
|
+
|
|
127
|
+
# Build per-frame summary table
|
|
128
|
+
df = pd.DataFrame(sim_rows, columns=sim_cols)
|
|
129
|
+
|
|
130
|
+
# Deduplicate by iter (keep last)
|
|
131
|
+
if not df.empty and "iter" in df.columns:
|
|
132
|
+
keep_idx = df.drop_duplicates("iter", keep="last").index
|
|
133
|
+
frames = [frames[i] for i in keep_idx if i < len(frames)]
|
|
134
|
+
df = df.loc[keep_idx].reset_index(drop=True)
|
|
135
|
+
|
|
136
|
+
# Save frames
|
|
137
|
+
self._frames = frames
|
|
138
|
+
|
|
139
|
+
meta: Dict[str, Any] = {
|
|
140
|
+
"simulation_name": self.simulation_name,
|
|
141
|
+
"n_atoms": self._n_atoms,
|
|
142
|
+
"n_frames": len(self._frames),
|
|
143
|
+
"has_time": False,
|
|
144
|
+
}
|
|
145
|
+
return df, meta
|
|
146
|
+
|
|
147
|
+
# ---- Explicit, file-specific accessors (no generic get())
|
|
148
|
+
def n_frames(self) -> int:
|
|
149
|
+
return int(self.metadata().get("n_frames", 0))
|
|
150
|
+
|
|
151
|
+
def n_atoms(self) -> Optional[int]:
|
|
152
|
+
return self._n_atoms
|
|
153
|
+
|
|
154
|
+
def frame(self, i: int) -> Dict[str, Any]:
|
|
155
|
+
"""Return a lightweight frame dict: coords + atom_types + iter for frame i."""
|
|
156
|
+
df = self.dataframe()
|
|
157
|
+
if i < 0 or i >= len(self._frames):
|
|
158
|
+
raise IndexError(f"frame index {i} out of range [0, {len(self._frames) - 1}]")
|
|
159
|
+
|
|
160
|
+
frame_df = self._frames[i]
|
|
161
|
+
coords = frame_df[["x", "y", "z"]].to_numpy(dtype=float)
|
|
162
|
+
atom_types = frame_df["atom_type"].astype(str).tolist()
|
|
163
|
+
|
|
164
|
+
row = df.iloc[i]
|
|
165
|
+
return {
|
|
166
|
+
"index": i,
|
|
167
|
+
"iter": int(row["iter"]) if "iter" in df.columns else i,
|
|
168
|
+
"coords": coords,
|
|
169
|
+
"atom_types": atom_types,
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
def iter_frames(self, step: int = 1) -> Iterator[Dict[str, Any]]:
|
|
173
|
+
for i in range(0, self.n_frames(), max(1, int(step))):
|
|
174
|
+
yield self.frame(i)
|
|
File without changes
|
reaxkit/utils/alias.py
ADDED
|
@@ -0,0 +1,219 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Alias resolution utilities for tolerant column and key matching.
|
|
3
|
+
|
|
4
|
+
This module provides functions for resolving canonical ReaxKit keys
|
|
5
|
+
(e.g., ``iter``, ``time``, ``D``) against the actual column names present
|
|
6
|
+
in parsed DataFrames, using a packaged alias map.
|
|
7
|
+
|
|
8
|
+
The canonical→alias definitions are stored in ``reaxkit/data/alias.yaml``.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
from typing import Dict, List, Iterable, Optional
|
|
14
|
+
from functools import lru_cache
|
|
15
|
+
|
|
16
|
+
# You can load this via importlib.resources so it works after pip install.
|
|
17
|
+
# Requires: alias.yaml included as package data.
|
|
18
|
+
import yaml
|
|
19
|
+
import importlib.resources as ir
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@lru_cache(maxsize=1)
|
|
23
|
+
def load_default_alias_map() -> Dict[str, List[str]]:
|
|
24
|
+
"""
|
|
25
|
+
Load the packaged canonical→aliases mapping.
|
|
26
|
+
|
|
27
|
+
The alias map is read from ``reaxkit/data/alias.yaml`` and cached after
|
|
28
|
+
the first call.
|
|
29
|
+
|
|
30
|
+
Returns
|
|
31
|
+
-------
|
|
32
|
+
dict[str, list[str]]
|
|
33
|
+
Mapping of canonical keys to accepted alias strings.
|
|
34
|
+
|
|
35
|
+
Raises
|
|
36
|
+
------
|
|
37
|
+
FileNotFoundError
|
|
38
|
+
If the packaged ``alias.yaml`` cannot be found.
|
|
39
|
+
"""
|
|
40
|
+
# reaxkit.data is NOT a package; we read by file location within package resources.
|
|
41
|
+
# If you later make data/ a package, you can switch to ir.files("reaxkit.data").
|
|
42
|
+
pkg = "reaxkit"
|
|
43
|
+
rel = "data/alias.yaml"
|
|
44
|
+
|
|
45
|
+
try:
|
|
46
|
+
with ir.files(pkg).joinpath(rel).open("r", encoding="utf-8") as f:
|
|
47
|
+
doc = yaml.safe_load(f) or {}
|
|
48
|
+
except FileNotFoundError as e:
|
|
49
|
+
raise FileNotFoundError(
|
|
50
|
+
f"Could not find packaged alias map at '{pkg}/{rel}'. "
|
|
51
|
+
"Make sure alias.yaml is included as package data."
|
|
52
|
+
) from e
|
|
53
|
+
|
|
54
|
+
aliases = doc.get("aliases") or {}
|
|
55
|
+
# Normalize to Dict[str, List[str]]
|
|
56
|
+
out: Dict[str, List[str]] = {}
|
|
57
|
+
for k, v in aliases.items():
|
|
58
|
+
if v is None:
|
|
59
|
+
out[str(k)] = []
|
|
60
|
+
elif isinstance(v, list):
|
|
61
|
+
out[str(k)] = [str(x) for x in v]
|
|
62
|
+
else:
|
|
63
|
+
out[str(k)] = [str(v)]
|
|
64
|
+
return out
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def resolve_alias_from_columns(
|
|
68
|
+
cols: Iterable[str],
|
|
69
|
+
canonical: str,
|
|
70
|
+
aliases: Optional[Dict[str, List[str]]] = None,
|
|
71
|
+
) -> Optional[str]:
|
|
72
|
+
"""
|
|
73
|
+
Resolve a canonical key to the matching column name in a column list.
|
|
74
|
+
|
|
75
|
+
Matching is case-insensitive and falls back to simple heuristics when an
|
|
76
|
+
exact alias match is not found.
|
|
77
|
+
|
|
78
|
+
Parameters
|
|
79
|
+
----------
|
|
80
|
+
cols : iterable of str
|
|
81
|
+
Available column names (e.g., DataFrame columns).
|
|
82
|
+
canonical : str
|
|
83
|
+
Canonical key to resolve (e.g., ``"iter"``, ``"time"``, ``"D"``).
|
|
84
|
+
aliases : dict[str, list[str]], optional
|
|
85
|
+
Canonical→aliases mapping to use. If not provided, the packaged map
|
|
86
|
+
from ``alias.yaml`` is loaded.
|
|
87
|
+
|
|
88
|
+
Returns
|
|
89
|
+
-------
|
|
90
|
+
str or None
|
|
91
|
+
The matching column name if found, otherwise ``None``.
|
|
92
|
+
|
|
93
|
+
Examples
|
|
94
|
+
--------
|
|
95
|
+
>>> resolve_alias_from_columns(df.columns, "time")
|
|
96
|
+
"""
|
|
97
|
+
if cols is None:
|
|
98
|
+
return None
|
|
99
|
+
|
|
100
|
+
orig_cols = list(cols)
|
|
101
|
+
lower_map = {c.lower(): c for c in orig_cols}
|
|
102
|
+
aliases = aliases or load_default_alias_map()
|
|
103
|
+
|
|
104
|
+
candidates = [canonical]
|
|
105
|
+
if canonical in aliases:
|
|
106
|
+
candidates.extend(aliases[canonical])
|
|
107
|
+
|
|
108
|
+
# Exact (case-insensitive)
|
|
109
|
+
for cand in candidates:
|
|
110
|
+
hit = lower_map.get(str(cand).lower())
|
|
111
|
+
if hit is not None:
|
|
112
|
+
return hit
|
|
113
|
+
|
|
114
|
+
# Heuristics on canonical (startswith/contains)
|
|
115
|
+
cname = str(canonical).lower()
|
|
116
|
+
for c in orig_cols:
|
|
117
|
+
cl = c.lower()
|
|
118
|
+
if cl.startswith(cname) or cname in cl:
|
|
119
|
+
return c
|
|
120
|
+
|
|
121
|
+
return None
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def _resolve_alias(source, canonical: str) -> str:
|
|
125
|
+
"""
|
|
126
|
+
Resolve a canonical key from a DataFrame-like source.
|
|
127
|
+
|
|
128
|
+
Notes
|
|
129
|
+
-----
|
|
130
|
+
This is a compatibility helper that accepts a handler (with ``.dataframe()``),
|
|
131
|
+
a pandas DataFrame, or an iterable of column names.
|
|
132
|
+
"""
|
|
133
|
+
try:
|
|
134
|
+
cols = list(source.dataframe().columns) # type: ignore[attr-defined]
|
|
135
|
+
except Exception:
|
|
136
|
+
try:
|
|
137
|
+
cols = list(getattr(source, "columns"))
|
|
138
|
+
except Exception:
|
|
139
|
+
cols = list(source) # assume iterable of str
|
|
140
|
+
|
|
141
|
+
hit = resolve_alias_from_columns(cols, canonical)
|
|
142
|
+
if hit is None:
|
|
143
|
+
raise KeyError(
|
|
144
|
+
f"Could not resolve alias '{canonical}'. Available columns: {list(cols)}"
|
|
145
|
+
)
|
|
146
|
+
return hit
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
def _available_keys_from_columns(cols: Iterable[str]) -> List[str]:
|
|
150
|
+
"""
|
|
151
|
+
List canonical keys that are usable for a given column set.
|
|
152
|
+
|
|
153
|
+
The returned list includes:
|
|
154
|
+
- raw columns already present in ``cols``
|
|
155
|
+
- canonical keys whose aliases resolve against ``cols``
|
|
156
|
+
|
|
157
|
+
Parameters
|
|
158
|
+
----------
|
|
159
|
+
cols : iterable of str
|
|
160
|
+
Available column names.
|
|
161
|
+
|
|
162
|
+
Returns
|
|
163
|
+
-------
|
|
164
|
+
list[str]
|
|
165
|
+
Sorted list of usable keys for lookup and CLI choices.
|
|
166
|
+
|
|
167
|
+
Examples
|
|
168
|
+
--------
|
|
169
|
+
>>> _available_keys_from_columns(df.columns)
|
|
170
|
+
"""
|
|
171
|
+
amap = load_default_alias_map()
|
|
172
|
+
cols_set = set(cols)
|
|
173
|
+
keys = set(cols_set)
|
|
174
|
+
for alias, cands in amap.items():
|
|
175
|
+
if any(c in cols_set for c in cands) or alias in cols_set:
|
|
176
|
+
keys.add(alias)
|
|
177
|
+
return sorted(keys)
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
# Re-export for callers that already import these names
|
|
181
|
+
available_keys = _available_keys_from_columns
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
def normalize_choice(value: str, domain: str = "xaxis") -> str:
|
|
185
|
+
"""
|
|
186
|
+
Normalize a user-provided keyword to its canonical alias key.
|
|
187
|
+
|
|
188
|
+
This is intended for tolerant CLI inputs where users may provide
|
|
189
|
+
any alias defined in ``alias.yaml`` (e.g., ``Time(fs)`` → ``time``).
|
|
190
|
+
|
|
191
|
+
Parameters
|
|
192
|
+
----------
|
|
193
|
+
value : str
|
|
194
|
+
User-provided keyword or alias.
|
|
195
|
+
domain : str, optional
|
|
196
|
+
Reserved for future domain-specific normalization rules.
|
|
197
|
+
|
|
198
|
+
Returns
|
|
199
|
+
-------
|
|
200
|
+
str
|
|
201
|
+
Canonical key if an alias match is found; otherwise the normalized
|
|
202
|
+
input string.
|
|
203
|
+
|
|
204
|
+
Examples
|
|
205
|
+
--------
|
|
206
|
+
>>> normalize_choice("Time(fs)")
|
|
207
|
+
>>> normalize_choice("frm")
|
|
208
|
+
"""
|
|
209
|
+
v = (value or "").strip().lower()
|
|
210
|
+
if not v:
|
|
211
|
+
return v
|
|
212
|
+
|
|
213
|
+
amap = load_default_alias_map()
|
|
214
|
+
for canonical, aliases in amap.items():
|
|
215
|
+
all_names = [canonical.lower()] + [a.lower() for a in aliases]
|
|
216
|
+
if v in all_names:
|
|
217
|
+
return canonical
|
|
218
|
+
|
|
219
|
+
return v
|