reaxkit 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- reaxkit/__init__.py +0 -0
- reaxkit/analysis/__init__.py +0 -0
- reaxkit/analysis/composed/RDF_analyzer.py +560 -0
- reaxkit/analysis/composed/__init__.py +0 -0
- reaxkit/analysis/composed/connectivity_analyzer.py +706 -0
- reaxkit/analysis/composed/coordination_analyzer.py +144 -0
- reaxkit/analysis/composed/electrostatics_analyzer.py +687 -0
- reaxkit/analysis/per_file/__init__.py +0 -0
- reaxkit/analysis/per_file/control_analyzer.py +165 -0
- reaxkit/analysis/per_file/eregime_analyzer.py +108 -0
- reaxkit/analysis/per_file/ffield_analyzer.py +305 -0
- reaxkit/analysis/per_file/fort13_analyzer.py +79 -0
- reaxkit/analysis/per_file/fort57_analyzer.py +106 -0
- reaxkit/analysis/per_file/fort73_analyzer.py +61 -0
- reaxkit/analysis/per_file/fort74_analyzer.py +65 -0
- reaxkit/analysis/per_file/fort76_analyzer.py +191 -0
- reaxkit/analysis/per_file/fort78_analyzer.py +154 -0
- reaxkit/analysis/per_file/fort79_analyzer.py +83 -0
- reaxkit/analysis/per_file/fort7_analyzer.py +393 -0
- reaxkit/analysis/per_file/fort99_analyzer.py +411 -0
- reaxkit/analysis/per_file/molfra_analyzer.py +359 -0
- reaxkit/analysis/per_file/params_analyzer.py +258 -0
- reaxkit/analysis/per_file/summary_analyzer.py +84 -0
- reaxkit/analysis/per_file/trainset_analyzer.py +84 -0
- reaxkit/analysis/per_file/vels_analyzer.py +95 -0
- reaxkit/analysis/per_file/xmolout_analyzer.py +528 -0
- reaxkit/cli.py +181 -0
- reaxkit/count_loc.py +276 -0
- reaxkit/data/alias.yaml +89 -0
- reaxkit/data/constants.yaml +27 -0
- reaxkit/data/reaxff_input_files_contents.yaml +186 -0
- reaxkit/data/reaxff_output_files_contents.yaml +301 -0
- reaxkit/data/units.yaml +38 -0
- reaxkit/help/__init__.py +0 -0
- reaxkit/help/help_index_loader.py +531 -0
- reaxkit/help/introspection_utils.py +131 -0
- reaxkit/io/__init__.py +0 -0
- reaxkit/io/base_handler.py +165 -0
- reaxkit/io/generators/__init__.py +0 -0
- reaxkit/io/generators/control_generator.py +123 -0
- reaxkit/io/generators/eregime_generator.py +341 -0
- reaxkit/io/generators/geo_generator.py +967 -0
- reaxkit/io/generators/trainset_generator.py +1758 -0
- reaxkit/io/generators/tregime_generator.py +113 -0
- reaxkit/io/generators/vregime_generator.py +164 -0
- reaxkit/io/generators/xmolout_generator.py +304 -0
- reaxkit/io/handlers/__init__.py +0 -0
- reaxkit/io/handlers/control_handler.py +209 -0
- reaxkit/io/handlers/eregime_handler.py +122 -0
- reaxkit/io/handlers/ffield_handler.py +812 -0
- reaxkit/io/handlers/fort13_handler.py +123 -0
- reaxkit/io/handlers/fort57_handler.py +143 -0
- reaxkit/io/handlers/fort73_handler.py +145 -0
- reaxkit/io/handlers/fort74_handler.py +155 -0
- reaxkit/io/handlers/fort76_handler.py +195 -0
- reaxkit/io/handlers/fort78_handler.py +142 -0
- reaxkit/io/handlers/fort79_handler.py +227 -0
- reaxkit/io/handlers/fort7_handler.py +264 -0
- reaxkit/io/handlers/fort99_handler.py +128 -0
- reaxkit/io/handlers/geo_handler.py +224 -0
- reaxkit/io/handlers/molfra_handler.py +184 -0
- reaxkit/io/handlers/params_handler.py +137 -0
- reaxkit/io/handlers/summary_handler.py +135 -0
- reaxkit/io/handlers/trainset_handler.py +658 -0
- reaxkit/io/handlers/vels_handler.py +293 -0
- reaxkit/io/handlers/xmolout_handler.py +174 -0
- reaxkit/utils/__init__.py +0 -0
- reaxkit/utils/alias.py +219 -0
- reaxkit/utils/cache.py +77 -0
- reaxkit/utils/constants.py +75 -0
- reaxkit/utils/equation_of_states.py +96 -0
- reaxkit/utils/exceptions.py +27 -0
- reaxkit/utils/frame_utils.py +175 -0
- reaxkit/utils/log.py +43 -0
- reaxkit/utils/media/__init__.py +0 -0
- reaxkit/utils/media/convert.py +90 -0
- reaxkit/utils/media/make_video.py +91 -0
- reaxkit/utils/media/plotter.py +812 -0
- reaxkit/utils/numerical/__init__.py +0 -0
- reaxkit/utils/numerical/extrema_finder.py +96 -0
- reaxkit/utils/numerical/moving_average.py +103 -0
- reaxkit/utils/numerical/numerical_calcs.py +75 -0
- reaxkit/utils/numerical/signal_ops.py +135 -0
- reaxkit/utils/path.py +55 -0
- reaxkit/utils/units.py +104 -0
- reaxkit/webui/__init__.py +0 -0
- reaxkit/webui/app.py +0 -0
- reaxkit/webui/components.py +0 -0
- reaxkit/webui/layouts.py +0 -0
- reaxkit/webui/utils.py +0 -0
- reaxkit/workflows/__init__.py +0 -0
- reaxkit/workflows/composed/__init__.py +0 -0
- reaxkit/workflows/composed/coordination_workflow.py +393 -0
- reaxkit/workflows/composed/electrostatics_workflow.py +587 -0
- reaxkit/workflows/composed/xmolout_fort7_workflow.py +343 -0
- reaxkit/workflows/meta/__init__.py +0 -0
- reaxkit/workflows/meta/help_workflow.py +136 -0
- reaxkit/workflows/meta/introspection_workflow.py +235 -0
- reaxkit/workflows/meta/make_video_workflow.py +61 -0
- reaxkit/workflows/meta/plotter_workflow.py +601 -0
- reaxkit/workflows/per_file/__init__.py +0 -0
- reaxkit/workflows/per_file/control_workflow.py +110 -0
- reaxkit/workflows/per_file/eregime_workflow.py +267 -0
- reaxkit/workflows/per_file/ffield_workflow.py +390 -0
- reaxkit/workflows/per_file/fort13_workflow.py +86 -0
- reaxkit/workflows/per_file/fort57_workflow.py +137 -0
- reaxkit/workflows/per_file/fort73_workflow.py +151 -0
- reaxkit/workflows/per_file/fort74_workflow.py +88 -0
- reaxkit/workflows/per_file/fort76_workflow.py +188 -0
- reaxkit/workflows/per_file/fort78_workflow.py +135 -0
- reaxkit/workflows/per_file/fort79_workflow.py +314 -0
- reaxkit/workflows/per_file/fort7_workflow.py +592 -0
- reaxkit/workflows/per_file/fort83_workflow.py +60 -0
- reaxkit/workflows/per_file/fort99_workflow.py +223 -0
- reaxkit/workflows/per_file/geo_workflow.py +554 -0
- reaxkit/workflows/per_file/molfra_workflow.py +577 -0
- reaxkit/workflows/per_file/params_workflow.py +135 -0
- reaxkit/workflows/per_file/summary_workflow.py +161 -0
- reaxkit/workflows/per_file/trainset_workflow.py +356 -0
- reaxkit/workflows/per_file/tregime_workflow.py +79 -0
- reaxkit/workflows/per_file/vels_workflow.py +309 -0
- reaxkit/workflows/per_file/vregime_workflow.py +75 -0
- reaxkit/workflows/per_file/xmolout_workflow.py +678 -0
- reaxkit-1.0.0.dist-info/METADATA +128 -0
- reaxkit-1.0.0.dist-info/RECORD +130 -0
- reaxkit-1.0.0.dist-info/WHEEL +5 -0
- reaxkit-1.0.0.dist-info/entry_points.txt +2 -0
- reaxkit-1.0.0.dist-info/licenses/AUTHORS.md +20 -0
- reaxkit-1.0.0.dist-info/licenses/LICENSE +21 -0
- reaxkit-1.0.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,264 @@
|
|
|
1
|
+
"""
|
|
2
|
+
ReaxFF connectivity (fort.7) file handler.
|
|
3
|
+
|
|
4
|
+
This module provides a handler for parsing ReaxFF ``fort.7`` files,
|
|
5
|
+
which store per-iteration atom connectivity, bond-order information,
|
|
6
|
+
and system-wide totals.
|
|
7
|
+
|
|
8
|
+
Typical use cases include:
|
|
9
|
+
|
|
10
|
+
- extracting per-atom bond-order features
|
|
11
|
+
- computing coordination statistics
|
|
12
|
+
- building molecule- and structure-level descriptors
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
from pathlib import Path
|
|
18
|
+
from typing import List, Dict, Any, Optional
|
|
19
|
+
import pandas as pd
|
|
20
|
+
|
|
21
|
+
from reaxkit.io.base_handler import BaseHandler
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class Fort7Handler(BaseHandler):
|
|
25
|
+
"""
|
|
26
|
+
Parser for ReaxFF connectivity output files (``fort.7``).
|
|
27
|
+
|
|
28
|
+
This class parses ReaxFF ``fort.7`` files and exposes both
|
|
29
|
+
iteration-level summaries and per-iteration atom connectivity
|
|
30
|
+
tables as structured tabular data.
|
|
31
|
+
|
|
32
|
+
Parsed Data
|
|
33
|
+
-----------
|
|
34
|
+
Summary table
|
|
35
|
+
One row per iteration, returned by ``dataframe()``, with columns:
|
|
36
|
+
["iter", "num_of_atoms", "num_of_bonds",
|
|
37
|
+
"total_BO", "total_LP", "total_BO_uncorrected", "total_charge"]
|
|
38
|
+
|
|
39
|
+
Per-frame atom tables
|
|
40
|
+
Stored in ``self._frames``, one table per iteration, where each
|
|
41
|
+
frame is a ``pandas.DataFrame`` with columns:
|
|
42
|
+
["atom_num", "atom_type_num", "atom_cnn1..nb", "molecule_num",
|
|
43
|
+
"BO1..nb", "sum_BOs", "num_LPs", "partial_charge", ...]
|
|
44
|
+
|
|
45
|
+
Here, ``nb`` denotes the number of bonded neighbors in that frame,
|
|
46
|
+
leading to variable-length connectivity and bond-order columns.
|
|
47
|
+
|
|
48
|
+
Metadata
|
|
49
|
+
Returned by ``metadata()``, containing:
|
|
50
|
+
["n_frames", "n_records", "simulation_name"]
|
|
51
|
+
|
|
52
|
+
Notes
|
|
53
|
+
-----
|
|
54
|
+
- Duplicate iterations are resolved by keeping the last occurrence.
|
|
55
|
+
- Connectivity and bond-order columns are inferred from the header.
|
|
56
|
+
- Extra, file-dependent columns are preserved as ``unknown*`` fields.
|
|
57
|
+
"""
|
|
58
|
+
def __init__(self, file_path: str | Path = "fort.7"):
|
|
59
|
+
"""Initialize a handler for a ReaxFF ``fort.7`` connectivity file.
|
|
60
|
+
|
|
61
|
+
Works on
|
|
62
|
+
--------
|
|
63
|
+
Fort7Handler — ``fort.7``
|
|
64
|
+
|
|
65
|
+
Parameters
|
|
66
|
+
----------
|
|
67
|
+
file_path : str or pathlib.Path, optional
|
|
68
|
+
Path to the ``fort.7`` file to be parsed.
|
|
69
|
+
|
|
70
|
+
Returns
|
|
71
|
+
-------
|
|
72
|
+
None
|
|
73
|
+
Initializes the handler without parsing the file.
|
|
74
|
+
"""
|
|
75
|
+
super().__init__(file_path)
|
|
76
|
+
self._frames: List[pd.DataFrame] = []
|
|
77
|
+
self._sim_name: Optional[str] = None
|
|
78
|
+
|
|
79
|
+
def _parse(self) -> tuple[pd.DataFrame, dict[str, Any]]:
|
|
80
|
+
sim_rows: List[List[Any]] = []
|
|
81
|
+
frames: List[pd.DataFrame] = []
|
|
82
|
+
totals: List[List[float]] = []
|
|
83
|
+
|
|
84
|
+
cur_atoms_rows: List[List[float | int]] = []
|
|
85
|
+
cur_totals: List[float] = []
|
|
86
|
+
cur_num_particles: Optional[int] = None
|
|
87
|
+
cur_nbonds: Optional[int] = None
|
|
88
|
+
sim_name: str = ""
|
|
89
|
+
|
|
90
|
+
def _finalize_iteration() -> None:
|
|
91
|
+
if cur_num_particles is None or cur_nbonds is None or not cur_atoms_rows:
|
|
92
|
+
return
|
|
93
|
+
nb = int(cur_nbonds)
|
|
94
|
+
atom_cols = (
|
|
95
|
+
["atom_num", "atom_type_num"]
|
|
96
|
+
+ [f"atom_cnn{i}" for i in range(1, nb + 1)]
|
|
97
|
+
+ ["molecule_num"]
|
|
98
|
+
+ [f"BO{i}" for i in range(1, nb + 1)]
|
|
99
|
+
+ ["sum_BOs", "num_LPs", "partial_charge"]
|
|
100
|
+
)
|
|
101
|
+
extra = max(0, len(cur_atoms_rows[0]) - len(atom_cols))
|
|
102
|
+
if extra > 0:
|
|
103
|
+
atom_cols += [f"unknown{i}" for i in range(1, extra + 1)]
|
|
104
|
+
frames.append(pd.DataFrame(cur_atoms_rows, columns=atom_cols))
|
|
105
|
+
totals.append(cur_totals[:] if cur_totals else [float("nan")] * 4)
|
|
106
|
+
|
|
107
|
+
with open(self.path, "r") as fh:
|
|
108
|
+
for raw in fh:
|
|
109
|
+
values = raw.split()
|
|
110
|
+
if not values:
|
|
111
|
+
continue
|
|
112
|
+
|
|
113
|
+
# Header
|
|
114
|
+
if len(values) == 6:
|
|
115
|
+
if cur_atoms_rows:
|
|
116
|
+
_finalize_iteration()
|
|
117
|
+
cur_atoms_rows.clear()
|
|
118
|
+
cur_totals.clear()
|
|
119
|
+
|
|
120
|
+
cur_num_particles = int(values[0])
|
|
121
|
+
sim_name = values[1]
|
|
122
|
+
iteration = int(values[3])
|
|
123
|
+
cur_nbonds = int(values[5])
|
|
124
|
+
sim_rows.append([iteration, cur_num_particles, cur_nbonds])
|
|
125
|
+
|
|
126
|
+
# Totals
|
|
127
|
+
elif len(values) < 6:
|
|
128
|
+
cur_totals.extend(map(float, values))
|
|
129
|
+
|
|
130
|
+
# Atom line
|
|
131
|
+
else:
|
|
132
|
+
nb = int(cur_nbonds)
|
|
133
|
+
int_part = list(map(int, values[0: nb + 3]))
|
|
134
|
+
float_part = list(map(float, values[nb + 3:]))
|
|
135
|
+
cur_atoms_rows.append(int_part + float_part)
|
|
136
|
+
|
|
137
|
+
# Final iter
|
|
138
|
+
if cur_atoms_rows:
|
|
139
|
+
_finalize_iteration()
|
|
140
|
+
|
|
141
|
+
# Summary dataframe
|
|
142
|
+
sim_df = pd.DataFrame(sim_rows, columns=["iter", "num_of_atoms", "num_of_bonds"])
|
|
143
|
+
totals_df = pd.DataFrame(
|
|
144
|
+
totals,
|
|
145
|
+
columns=["total_BO", "total_LP", "total_BO_uncorrected", "total_charge"]
|
|
146
|
+
if totals and len(totals[0]) == 4
|
|
147
|
+
else [f"total_val{i}" for i in range(1, (len(totals[0]) if totals else 0) + 1)]
|
|
148
|
+
)
|
|
149
|
+
if not totals_df.empty:
|
|
150
|
+
totals_df = totals_df.iloc[: len(sim_df)].reset_index(drop=True)
|
|
151
|
+
sim_df = pd.concat([sim_df.reset_index(drop=True), totals_df], axis=1)
|
|
152
|
+
|
|
153
|
+
# Deduplicate
|
|
154
|
+
if not sim_df.empty and "iter" in sim_df.columns:
|
|
155
|
+
keep_idx = sim_df.drop_duplicates("iter", keep="last").index
|
|
156
|
+
frames = [frames[i] for i in keep_idx]
|
|
157
|
+
sim_df = sim_df.loc[keep_idx].reset_index(drop=True)
|
|
158
|
+
|
|
159
|
+
self._frames = frames
|
|
160
|
+
self._sim_name = sim_name
|
|
161
|
+
|
|
162
|
+
meta: Dict[str, Any] = {
|
|
163
|
+
"n_frames": len(frames),
|
|
164
|
+
"n_records": len(sim_df),
|
|
165
|
+
"simulation_name": sim_name,
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
return sim_df, meta
|
|
169
|
+
|
|
170
|
+
# -------------------------------------------------------
|
|
171
|
+
# Frame utilities (match XmoloutHandler API)
|
|
172
|
+
# -------------------------------------------------------
|
|
173
|
+
|
|
174
|
+
def n_frames(self) -> int:
|
|
175
|
+
"""
|
|
176
|
+
Return the number of frames parsed from the ``fort.7`` file.
|
|
177
|
+
|
|
178
|
+
Works on
|
|
179
|
+
--------
|
|
180
|
+
Fort7Handler — ``fort.7``
|
|
181
|
+
|
|
182
|
+
Returns
|
|
183
|
+
-------
|
|
184
|
+
int
|
|
185
|
+
Number of parsed frames (iterations).
|
|
186
|
+
"""
|
|
187
|
+
return len(self._frames) if hasattr(self, "_frames") else 0
|
|
188
|
+
|
|
189
|
+
def n_atoms(self, frame: int = 0) -> int:
|
|
190
|
+
"""
|
|
191
|
+
Return the number of atoms in a given frame.
|
|
192
|
+
|
|
193
|
+
Works on
|
|
194
|
+
--------
|
|
195
|
+
Fort7Handler — ``fort.7``
|
|
196
|
+
|
|
197
|
+
Parameters
|
|
198
|
+
----------
|
|
199
|
+
frame : int, optional
|
|
200
|
+
Frame index to query.
|
|
201
|
+
|
|
202
|
+
Returns
|
|
203
|
+
-------
|
|
204
|
+
int
|
|
205
|
+
Number of atoms in the selected frame.
|
|
206
|
+
"""
|
|
207
|
+
if not hasattr(self, "_frames") or self.n_frames() == 0:
|
|
208
|
+
return 0
|
|
209
|
+
return len(self._frames[int(frame)])
|
|
210
|
+
|
|
211
|
+
def frame(self, i: int):
|
|
212
|
+
"""Return a single frame as an atom-level connectivity table.
|
|
213
|
+
|
|
214
|
+
Works on
|
|
215
|
+
--------
|
|
216
|
+
Fort7Handler — ``fort.7``
|
|
217
|
+
|
|
218
|
+
Parameters
|
|
219
|
+
----------
|
|
220
|
+
i : int
|
|
221
|
+
Frame index to retrieve.
|
|
222
|
+
|
|
223
|
+
Returns
|
|
224
|
+
-------
|
|
225
|
+
pandas.DataFrame
|
|
226
|
+
Atom-level table for the selected frame, including connectivity
|
|
227
|
+
and bond-order columns.
|
|
228
|
+
|
|
229
|
+
Examples
|
|
230
|
+
--------
|
|
231
|
+
>>> h = Fort7Handler("fort.7")
|
|
232
|
+
>>> df = h.frame(0)
|
|
233
|
+
"""
|
|
234
|
+
if not hasattr(self, "_frames"):
|
|
235
|
+
raise RuntimeError("fort.7 has not been parsed yet.")
|
|
236
|
+
return self._frames[int(i)]
|
|
237
|
+
|
|
238
|
+
def iter_frames(self, step: int = 1):
|
|
239
|
+
"""Iterate over atom-level frames with optional subsampling.
|
|
240
|
+
|
|
241
|
+
Works on
|
|
242
|
+
--------
|
|
243
|
+
Fort7Handler — ``fort.7``
|
|
244
|
+
|
|
245
|
+
Parameters
|
|
246
|
+
----------
|
|
247
|
+
step : int, optional
|
|
248
|
+
Step size for subsampling frames (default: 1).
|
|
249
|
+
|
|
250
|
+
Yields
|
|
251
|
+
------
|
|
252
|
+
pandas.DataFrame
|
|
253
|
+
Atom-level connectivity table for each yielded frame.
|
|
254
|
+
|
|
255
|
+
Examples
|
|
256
|
+
--------
|
|
257
|
+
>>> h = Fort7Handler("fort.7")
|
|
258
|
+
>>> for frame in h.iter_frames(step=10):
|
|
259
|
+
... print(len(frame))
|
|
260
|
+
"""
|
|
261
|
+
if not hasattr(self, "_frames"):
|
|
262
|
+
return
|
|
263
|
+
for i in range(0, self.n_frames(), max(1, int(step))):
|
|
264
|
+
yield self._frames[i]
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
"""
|
|
2
|
+
ReaxFF training set error report (fort.99) handler.
|
|
3
|
+
|
|
4
|
+
This module provides a handler for parsing ReaxFF ``fort.99`` files,
|
|
5
|
+
which summarize force-field training errors by category and target
|
|
6
|
+
during parameter optimization runs.
|
|
7
|
+
|
|
8
|
+
Typical use cases include:
|
|
9
|
+
|
|
10
|
+
- analyzing training set contributions to total error
|
|
11
|
+
- inspecting charge, geometry, and energy fitting quality
|
|
12
|
+
- building diagnostics for force-field parameterization workflows
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
from pathlib import Path
|
|
18
|
+
from typing import List, Dict, Any
|
|
19
|
+
import re
|
|
20
|
+
import pandas as pd
|
|
21
|
+
|
|
22
|
+
from reaxkit.io.base_handler import BaseHandler
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class Fort99Handler(BaseHandler):
|
|
26
|
+
"""
|
|
27
|
+
Parser for ReaxFF training set error reports (``fort.99``).
|
|
28
|
+
|
|
29
|
+
This class parses ``fort.99`` files and exposes individual training
|
|
30
|
+
targets and their contributions to the total force-field error as
|
|
31
|
+
a structured tabular dataset.
|
|
32
|
+
|
|
33
|
+
Parsed Data
|
|
34
|
+
-----------
|
|
35
|
+
Summary table
|
|
36
|
+
One row per training target, returned by ``dataframe()``,
|
|
37
|
+
with columns:
|
|
38
|
+
["lineno", "section", "title",
|
|
39
|
+
"ffield_value", "qm_value", "weight",
|
|
40
|
+
"error", "total_ff_error"]
|
|
41
|
+
|
|
42
|
+
The ``section`` column categorizes each target as one of:
|
|
43
|
+
["CHARGE", "HEATFO", "GEOMETRY", "CELL PARAMETERS", "ENERGY", None].
|
|
44
|
+
|
|
45
|
+
Metadata
|
|
46
|
+
Returned by ``metadata()``, containing:
|
|
47
|
+
["n_records", "n_frames"]
|
|
48
|
+
|
|
49
|
+
Notes
|
|
50
|
+
-----
|
|
51
|
+
- The last five numeric values on each line are interpreted as
|
|
52
|
+
(FF value, QM/reference value, weight, error, total error).
|
|
53
|
+
- Section categories are inferred heuristically from the title text.
|
|
54
|
+
- Unrecognized entries are retained with ``section=None``.
|
|
55
|
+
- This handler is not frame-based; ``n_frames()`` always returns 0.
|
|
56
|
+
"""
|
|
57
|
+
|
|
58
|
+
def __init__(self, file_path: str | Path = "fort.99"):
|
|
59
|
+
super().__init__(file_path)
|
|
60
|
+
|
|
61
|
+
def _parse(self) -> tuple[pd.DataFrame, dict[str, Any]]:
|
|
62
|
+
rows: List[Dict[str, Any]] = []
|
|
63
|
+
|
|
64
|
+
# float like -17.8000, 1.54, 1.0e-03, etc.
|
|
65
|
+
float_re = re.compile(r"[+-]?(?:\d+\.\d*|\d*\.\d+|\d+)(?:[eE][+-]?\d+)?")
|
|
66
|
+
|
|
67
|
+
with open(self.path, "r") as fh:
|
|
68
|
+
for lineno, raw in enumerate(fh, start=1):
|
|
69
|
+
line = raw.rstrip("\n")
|
|
70
|
+
if not line.strip():
|
|
71
|
+
continue
|
|
72
|
+
|
|
73
|
+
# Find all floats
|
|
74
|
+
matches = list(float_re.finditer(line))
|
|
75
|
+
if len(matches) < 5:
|
|
76
|
+
continue
|
|
77
|
+
|
|
78
|
+
# Extract last 5 numbers
|
|
79
|
+
last5 = matches[-5:]
|
|
80
|
+
vals = [float(m.group()) for m in last5]
|
|
81
|
+
ffield_val, qm_val, weight, err, tot_err = vals
|
|
82
|
+
|
|
83
|
+
# Title
|
|
84
|
+
title_start = last5[0].start()
|
|
85
|
+
title = line[:title_start].strip()
|
|
86
|
+
if not title:
|
|
87
|
+
continue
|
|
88
|
+
|
|
89
|
+
tl = title.lower()
|
|
90
|
+
|
|
91
|
+
# -------- SECTION detection --------
|
|
92
|
+
if "charge" in tl:
|
|
93
|
+
section = "CHARGE"
|
|
94
|
+
elif "heat" in tl:
|
|
95
|
+
section = "HEATFO"
|
|
96
|
+
elif ("bond" in tl) or ("angle" in tl):
|
|
97
|
+
section = "GEOMETRY"
|
|
98
|
+
elif ("a:" in tl) or ("b:" in tl) or ("c:" in tl):
|
|
99
|
+
section = "CELL PARAMETERS"
|
|
100
|
+
elif "energy" in tl:
|
|
101
|
+
section = "ENERGY"
|
|
102
|
+
else:
|
|
103
|
+
section = None # mark unknown
|
|
104
|
+
print(f"Unrecognized fort.99 entry at line {lineno}: {title}")
|
|
105
|
+
|
|
106
|
+
# Save row
|
|
107
|
+
rows.append(
|
|
108
|
+
{
|
|
109
|
+
"lineno": lineno,
|
|
110
|
+
"section": section,
|
|
111
|
+
"title": title,
|
|
112
|
+
"ffield_value": ffield_val,
|
|
113
|
+
"qm_value": qm_val,
|
|
114
|
+
"weight": weight,
|
|
115
|
+
"error": err,
|
|
116
|
+
"total_ff_error": tot_err,
|
|
117
|
+
}
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
df = pd.DataFrame(rows)
|
|
121
|
+
|
|
122
|
+
# fort.99 has no per-frame data
|
|
123
|
+
self._frames = [] # from TemplateHandler
|
|
124
|
+
meta: Dict[str, Any] = {
|
|
125
|
+
"n_records": len(df),
|
|
126
|
+
"n_frames": 0,
|
|
127
|
+
}
|
|
128
|
+
return df, meta
|
|
@@ -0,0 +1,224 @@
|
|
|
1
|
+
"""
|
|
2
|
+
ReaxFF geometry structure (geo) file handler.
|
|
3
|
+
|
|
4
|
+
This module provides a handler for parsing ReaxFF ``.geo`` structure
|
|
5
|
+
files in XTLGRF format, which define atomic coordinates, optional
|
|
6
|
+
periodic cell parameters, and descriptive metadata for a system.
|
|
7
|
+
|
|
8
|
+
Typical use cases include:
|
|
9
|
+
|
|
10
|
+
- loading initial or relaxed geometries
|
|
11
|
+
- extracting atomic coordinates for analysis or visualization
|
|
12
|
+
- accessing unit cell parameters for periodic systems
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
from pathlib import Path
|
|
18
|
+
from typing import List, Optional, Dict, Any
|
|
19
|
+
|
|
20
|
+
import pandas as pd
|
|
21
|
+
|
|
22
|
+
from reaxkit.io.base_handler import BaseHandler
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class GeoHandler(BaseHandler):
|
|
26
|
+
"""
|
|
27
|
+
Parser for ReaxFF geometry structure files (``.geo`` / XTLGRF format).
|
|
28
|
+
|
|
29
|
+
This class parses ``.geo`` files and exposes atomic coordinates and
|
|
30
|
+
associated structural metadata as structured Python objects.
|
|
31
|
+
|
|
32
|
+
Parsed Data
|
|
33
|
+
-----------
|
|
34
|
+
Atom table
|
|
35
|
+
One row per atom, returned by ``dataframe()``, with columns:
|
|
36
|
+
["atom_id", "atom_type", "x", "y", "z"]
|
|
37
|
+
|
|
38
|
+
Metadata
|
|
39
|
+
Returned by ``metadata()``, containing:
|
|
40
|
+
{
|
|
41
|
+
"descriptor": str | None, # from DESCRP line
|
|
42
|
+
"remark": str | None, # concatenated REMARK lines
|
|
43
|
+
"cell_lengths": { # from CRYSTX (a, b, c)
|
|
44
|
+
"a": float,
|
|
45
|
+
"b": float,
|
|
46
|
+
"c": float,
|
|
47
|
+
} | None,
|
|
48
|
+
"cell_angles": { # from CRYSTX (alpha, beta, gamma)
|
|
49
|
+
"alpha": float,
|
|
50
|
+
"beta": float,
|
|
51
|
+
"gamma": float,
|
|
52
|
+
} | None,
|
|
53
|
+
"n_atoms": int,
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
Notes
|
|
57
|
+
-----
|
|
58
|
+
- Only ``ATOM`` and ``HETATM`` records are parsed into the atom table.
|
|
59
|
+
- Cell parameters are optional and may be absent for non-periodic systems.
|
|
60
|
+
- Non-structural lines (e.g. ``XTLGRF``, ``FORMAT``) are ignored.
|
|
61
|
+
- This handler is not frame-based; the file represents a single structure.
|
|
62
|
+
"""
|
|
63
|
+
|
|
64
|
+
def __init__(self, file_path: str | Path = "geo"):
|
|
65
|
+
super().__init__(file_path)
|
|
66
|
+
self._n_atoms: Optional[int] = None
|
|
67
|
+
|
|
68
|
+
# ------------------------------------------------------------------
|
|
69
|
+
# Core parser
|
|
70
|
+
# ------------------------------------------------------------------
|
|
71
|
+
def _parse(self) -> tuple[pd.DataFrame, dict[str, Any]]:
|
|
72
|
+
atoms: List[Dict[str, Any]] = []
|
|
73
|
+
|
|
74
|
+
descriptor: Optional[str] = None
|
|
75
|
+
remark: Optional[str] = None
|
|
76
|
+
cell_lengths: Optional[Dict[str, float]] = None
|
|
77
|
+
cell_angles: Optional[Dict[str, float]] = None
|
|
78
|
+
|
|
79
|
+
with open(self.path, "r") as fh:
|
|
80
|
+
for raw in fh:
|
|
81
|
+
line = raw.rstrip("\n")
|
|
82
|
+
stripped = line.strip()
|
|
83
|
+
if not stripped:
|
|
84
|
+
continue
|
|
85
|
+
|
|
86
|
+
# Descriptor
|
|
87
|
+
if line.startswith("DESCRP"):
|
|
88
|
+
# everything after the keyword is the descriptor
|
|
89
|
+
# "DESCRP" is 6 chars, keep the rest
|
|
90
|
+
text = line[6:].strip()
|
|
91
|
+
if not text:
|
|
92
|
+
# fallback: split-based if for some reason slicing fails
|
|
93
|
+
parts = line.split(maxsplit=1)
|
|
94
|
+
text = parts[1].strip() if len(parts) > 1 else ""
|
|
95
|
+
descriptor = text or None
|
|
96
|
+
continue
|
|
97
|
+
|
|
98
|
+
# Remark (optional, possibly multiple lines)
|
|
99
|
+
if line.startswith("REMARK"):
|
|
100
|
+
text = line[6:].strip()
|
|
101
|
+
if remark:
|
|
102
|
+
remark = f"{remark} {text}".strip()
|
|
103
|
+
else:
|
|
104
|
+
remark = text
|
|
105
|
+
continue
|
|
106
|
+
|
|
107
|
+
# Cell / periodic box
|
|
108
|
+
if line.startswith("CRYSTX"):
|
|
109
|
+
# Expected: CRYSTX a b c alpha beta gamma
|
|
110
|
+
parts = line.split()
|
|
111
|
+
if len(parts) >= 7:
|
|
112
|
+
try:
|
|
113
|
+
a, b, c = map(float, parts[1:4])
|
|
114
|
+
alpha, beta, gamma = map(float, parts[4:7])
|
|
115
|
+
cell_lengths = {"a": a, "b": b, "c": c}
|
|
116
|
+
cell_angles = {
|
|
117
|
+
"alpha": alpha,
|
|
118
|
+
"beta": beta,
|
|
119
|
+
"gamma": gamma,
|
|
120
|
+
}
|
|
121
|
+
except ValueError:
|
|
122
|
+
# If parsing fails, leave as None
|
|
123
|
+
pass
|
|
124
|
+
continue
|
|
125
|
+
|
|
126
|
+
# Atom records: HETATM or ATOM
|
|
127
|
+
if line.startswith("HETATM") or line.startswith("ATOM"):
|
|
128
|
+
parts = line.split()
|
|
129
|
+
# We expect at least:
|
|
130
|
+
# 0: "HETATM" / "ATOM"
|
|
131
|
+
# 1: atom_id (int)
|
|
132
|
+
# 2: atom_type (str, e.g., N, Al, O_w, ...)
|
|
133
|
+
# 3: x
|
|
134
|
+
# 4: y
|
|
135
|
+
# 5: z
|
|
136
|
+
# 6: repeated atom type (ignored)
|
|
137
|
+
# 7+: extra fields (ignored)
|
|
138
|
+
if len(parts) < 7:
|
|
139
|
+
# Too short to contain id, type, and coordinates
|
|
140
|
+
continue
|
|
141
|
+
|
|
142
|
+
try:
|
|
143
|
+
atom_id = int(parts[1])
|
|
144
|
+
except ValueError:
|
|
145
|
+
# Unexpected format, skip this line
|
|
146
|
+
continue
|
|
147
|
+
|
|
148
|
+
atom_type = parts[2]
|
|
149
|
+
try:
|
|
150
|
+
x, y, z = map(float, parts[3:6])
|
|
151
|
+
except ValueError:
|
|
152
|
+
# Coordinates not parseable, skip
|
|
153
|
+
continue
|
|
154
|
+
|
|
155
|
+
atoms.append(
|
|
156
|
+
{
|
|
157
|
+
"atom_id": atom_id,
|
|
158
|
+
"atom_type": atom_type,
|
|
159
|
+
"x": x,
|
|
160
|
+
"y": y,
|
|
161
|
+
"z": z,
|
|
162
|
+
}
|
|
163
|
+
)
|
|
164
|
+
|
|
165
|
+
# Other lines (XTLGRF, FORMAT, etc.) are ignored
|
|
166
|
+
|
|
167
|
+
df = pd.DataFrame(atoms, columns=["atom_id", "atom_type", "x", "y", "z"])
|
|
168
|
+
n_atoms = len(df)
|
|
169
|
+
self._n_atoms = n_atoms
|
|
170
|
+
|
|
171
|
+
meta: Dict[str, Any] = {
|
|
172
|
+
"descriptor": descriptor,
|
|
173
|
+
"remark": remark,
|
|
174
|
+
"cell_lengths": cell_lengths,
|
|
175
|
+
"cell_angles": cell_angles,
|
|
176
|
+
"n_atoms": n_atoms,
|
|
177
|
+
}
|
|
178
|
+
return df, meta
|
|
179
|
+
|
|
180
|
+
# ------------------------------------------------------------------
|
|
181
|
+
# Convenience accessors
|
|
182
|
+
# ------------------------------------------------------------------
|
|
183
|
+
def n_atoms(self) -> int:
|
|
184
|
+
"""Return the number of atoms in the .geo file."""
|
|
185
|
+
if self._n_atoms is None:
|
|
186
|
+
self._n_atoms = int(self.metadata().get("n_atoms", len(self.dataframe())))
|
|
187
|
+
return self._n_atoms
|
|
188
|
+
|
|
189
|
+
def cell(self) -> Dict[str, Optional[float]]:
|
|
190
|
+
"""
|
|
191
|
+
Return a flat dict with cell parameters:
|
|
192
|
+
|
|
193
|
+
{
|
|
194
|
+
"a": ...,
|
|
195
|
+
"b": ...,
|
|
196
|
+
"c": ...,
|
|
197
|
+
"alpha": ...,
|
|
198
|
+
"beta": ...,
|
|
199
|
+
"gamma": ...,
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
Values may be None if CRYSTX was missing or malformed.
|
|
203
|
+
"""
|
|
204
|
+
meta = self.metadata()
|
|
205
|
+
lengths = meta.get("cell_lengths") or {}
|
|
206
|
+
angles = meta.get("cell_angles") or {}
|
|
207
|
+
|
|
208
|
+
return {
|
|
209
|
+
"a": lengths.get("a"),
|
|
210
|
+
"b": lengths.get("b"),
|
|
211
|
+
"c": lengths.get("c"),
|
|
212
|
+
"alpha": angles.get("alpha"),
|
|
213
|
+
"beta": angles.get("beta"),
|
|
214
|
+
"gamma": angles.get("gamma"),
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
def coordinates(self) -> pd.DataFrame:
|
|
218
|
+
"""
|
|
219
|
+
Return a copy of the atom table (id, type, x, y, z).
|
|
220
|
+
|
|
221
|
+
This is just a convenience wrapper around .dataframe()
|
|
222
|
+
to make the intent explicit.
|
|
223
|
+
"""
|
|
224
|
+
return self.dataframe().copy()
|