reaxkit 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- reaxkit/__init__.py +0 -0
- reaxkit/analysis/__init__.py +0 -0
- reaxkit/analysis/composed/RDF_analyzer.py +560 -0
- reaxkit/analysis/composed/__init__.py +0 -0
- reaxkit/analysis/composed/connectivity_analyzer.py +706 -0
- reaxkit/analysis/composed/coordination_analyzer.py +144 -0
- reaxkit/analysis/composed/electrostatics_analyzer.py +687 -0
- reaxkit/analysis/per_file/__init__.py +0 -0
- reaxkit/analysis/per_file/control_analyzer.py +165 -0
- reaxkit/analysis/per_file/eregime_analyzer.py +108 -0
- reaxkit/analysis/per_file/ffield_analyzer.py +305 -0
- reaxkit/analysis/per_file/fort13_analyzer.py +79 -0
- reaxkit/analysis/per_file/fort57_analyzer.py +106 -0
- reaxkit/analysis/per_file/fort73_analyzer.py +61 -0
- reaxkit/analysis/per_file/fort74_analyzer.py +65 -0
- reaxkit/analysis/per_file/fort76_analyzer.py +191 -0
- reaxkit/analysis/per_file/fort78_analyzer.py +154 -0
- reaxkit/analysis/per_file/fort79_analyzer.py +83 -0
- reaxkit/analysis/per_file/fort7_analyzer.py +393 -0
- reaxkit/analysis/per_file/fort99_analyzer.py +411 -0
- reaxkit/analysis/per_file/molfra_analyzer.py +359 -0
- reaxkit/analysis/per_file/params_analyzer.py +258 -0
- reaxkit/analysis/per_file/summary_analyzer.py +84 -0
- reaxkit/analysis/per_file/trainset_analyzer.py +84 -0
- reaxkit/analysis/per_file/vels_analyzer.py +95 -0
- reaxkit/analysis/per_file/xmolout_analyzer.py +528 -0
- reaxkit/cli.py +181 -0
- reaxkit/count_loc.py +276 -0
- reaxkit/data/alias.yaml +89 -0
- reaxkit/data/constants.yaml +27 -0
- reaxkit/data/reaxff_input_files_contents.yaml +186 -0
- reaxkit/data/reaxff_output_files_contents.yaml +301 -0
- reaxkit/data/units.yaml +38 -0
- reaxkit/help/__init__.py +0 -0
- reaxkit/help/help_index_loader.py +531 -0
- reaxkit/help/introspection_utils.py +131 -0
- reaxkit/io/__init__.py +0 -0
- reaxkit/io/base_handler.py +165 -0
- reaxkit/io/generators/__init__.py +0 -0
- reaxkit/io/generators/control_generator.py +123 -0
- reaxkit/io/generators/eregime_generator.py +341 -0
- reaxkit/io/generators/geo_generator.py +967 -0
- reaxkit/io/generators/trainset_generator.py +1758 -0
- reaxkit/io/generators/tregime_generator.py +113 -0
- reaxkit/io/generators/vregime_generator.py +164 -0
- reaxkit/io/generators/xmolout_generator.py +304 -0
- reaxkit/io/handlers/__init__.py +0 -0
- reaxkit/io/handlers/control_handler.py +209 -0
- reaxkit/io/handlers/eregime_handler.py +122 -0
- reaxkit/io/handlers/ffield_handler.py +812 -0
- reaxkit/io/handlers/fort13_handler.py +123 -0
- reaxkit/io/handlers/fort57_handler.py +143 -0
- reaxkit/io/handlers/fort73_handler.py +145 -0
- reaxkit/io/handlers/fort74_handler.py +155 -0
- reaxkit/io/handlers/fort76_handler.py +195 -0
- reaxkit/io/handlers/fort78_handler.py +142 -0
- reaxkit/io/handlers/fort79_handler.py +227 -0
- reaxkit/io/handlers/fort7_handler.py +264 -0
- reaxkit/io/handlers/fort99_handler.py +128 -0
- reaxkit/io/handlers/geo_handler.py +224 -0
- reaxkit/io/handlers/molfra_handler.py +184 -0
- reaxkit/io/handlers/params_handler.py +137 -0
- reaxkit/io/handlers/summary_handler.py +135 -0
- reaxkit/io/handlers/trainset_handler.py +658 -0
- reaxkit/io/handlers/vels_handler.py +293 -0
- reaxkit/io/handlers/xmolout_handler.py +174 -0
- reaxkit/utils/__init__.py +0 -0
- reaxkit/utils/alias.py +219 -0
- reaxkit/utils/cache.py +77 -0
- reaxkit/utils/constants.py +75 -0
- reaxkit/utils/equation_of_states.py +96 -0
- reaxkit/utils/exceptions.py +27 -0
- reaxkit/utils/frame_utils.py +175 -0
- reaxkit/utils/log.py +43 -0
- reaxkit/utils/media/__init__.py +0 -0
- reaxkit/utils/media/convert.py +90 -0
- reaxkit/utils/media/make_video.py +91 -0
- reaxkit/utils/media/plotter.py +812 -0
- reaxkit/utils/numerical/__init__.py +0 -0
- reaxkit/utils/numerical/extrema_finder.py +96 -0
- reaxkit/utils/numerical/moving_average.py +103 -0
- reaxkit/utils/numerical/numerical_calcs.py +75 -0
- reaxkit/utils/numerical/signal_ops.py +135 -0
- reaxkit/utils/path.py +55 -0
- reaxkit/utils/units.py +104 -0
- reaxkit/webui/__init__.py +0 -0
- reaxkit/webui/app.py +0 -0
- reaxkit/webui/components.py +0 -0
- reaxkit/webui/layouts.py +0 -0
- reaxkit/webui/utils.py +0 -0
- reaxkit/workflows/__init__.py +0 -0
- reaxkit/workflows/composed/__init__.py +0 -0
- reaxkit/workflows/composed/coordination_workflow.py +393 -0
- reaxkit/workflows/composed/electrostatics_workflow.py +587 -0
- reaxkit/workflows/composed/xmolout_fort7_workflow.py +343 -0
- reaxkit/workflows/meta/__init__.py +0 -0
- reaxkit/workflows/meta/help_workflow.py +136 -0
- reaxkit/workflows/meta/introspection_workflow.py +235 -0
- reaxkit/workflows/meta/make_video_workflow.py +61 -0
- reaxkit/workflows/meta/plotter_workflow.py +601 -0
- reaxkit/workflows/per_file/__init__.py +0 -0
- reaxkit/workflows/per_file/control_workflow.py +110 -0
- reaxkit/workflows/per_file/eregime_workflow.py +267 -0
- reaxkit/workflows/per_file/ffield_workflow.py +390 -0
- reaxkit/workflows/per_file/fort13_workflow.py +86 -0
- reaxkit/workflows/per_file/fort57_workflow.py +137 -0
- reaxkit/workflows/per_file/fort73_workflow.py +151 -0
- reaxkit/workflows/per_file/fort74_workflow.py +88 -0
- reaxkit/workflows/per_file/fort76_workflow.py +188 -0
- reaxkit/workflows/per_file/fort78_workflow.py +135 -0
- reaxkit/workflows/per_file/fort79_workflow.py +314 -0
- reaxkit/workflows/per_file/fort7_workflow.py +592 -0
- reaxkit/workflows/per_file/fort83_workflow.py +60 -0
- reaxkit/workflows/per_file/fort99_workflow.py +223 -0
- reaxkit/workflows/per_file/geo_workflow.py +554 -0
- reaxkit/workflows/per_file/molfra_workflow.py +577 -0
- reaxkit/workflows/per_file/params_workflow.py +135 -0
- reaxkit/workflows/per_file/summary_workflow.py +161 -0
- reaxkit/workflows/per_file/trainset_workflow.py +356 -0
- reaxkit/workflows/per_file/tregime_workflow.py +79 -0
- reaxkit/workflows/per_file/vels_workflow.py +309 -0
- reaxkit/workflows/per_file/vregime_workflow.py +75 -0
- reaxkit/workflows/per_file/xmolout_workflow.py +678 -0
- reaxkit-1.0.0.dist-info/METADATA +128 -0
- reaxkit-1.0.0.dist-info/RECORD +130 -0
- reaxkit-1.0.0.dist-info/WHEEL +5 -0
- reaxkit-1.0.0.dist-info/entry_points.txt +2 -0
- reaxkit-1.0.0.dist-info/licenses/AUTHORS.md +20 -0
- reaxkit-1.0.0.dist-info/licenses/LICENSE +21 -0
- reaxkit-1.0.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,184 @@
|
|
|
1
|
+
"""
|
|
2
|
+
ReaxFF molecular fragment analysis (molfra.out) handler.
|
|
3
|
+
|
|
4
|
+
This module provides a handler for parsing ReaxFF ``molfra.out`` and
|
|
5
|
+
``molfra_ig.out`` files, which report molecule/fragment compositions
|
|
6
|
+
and their frequencies as a function of simulation iteration.
|
|
7
|
+
|
|
8
|
+
Typical use cases include:
|
|
9
|
+
|
|
10
|
+
- tracking molecular species formation and decay
|
|
11
|
+
- monitoring reaction pathways and fragment distributions
|
|
12
|
+
- computing molecule counts and system-level mass summaries
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
from pathlib import Path
|
|
18
|
+
from typing import Dict, Any, List, Iterator, Optional
|
|
19
|
+
import pandas as pd
|
|
20
|
+
|
|
21
|
+
from reaxkit.io.base_handler import BaseHandler
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class MolFraHandler(BaseHandler):
|
|
25
|
+
"""
|
|
26
|
+
Parser for ReaxFF molecular fragment output files
|
|
27
|
+
(``molfra.out``, ``molfra_ig.out``).
|
|
28
|
+
|
|
29
|
+
This class parses molecular fragment frequency data and exposes both
|
|
30
|
+
per-molecule and per-iteration summary information as structured
|
|
31
|
+
tabular datasets.
|
|
32
|
+
|
|
33
|
+
Parsed Data
|
|
34
|
+
-----------
|
|
35
|
+
Molecule table
|
|
36
|
+
One row per (iteration, molecular species), returned by
|
|
37
|
+
``dataframe()``, with columns:
|
|
38
|
+
["iter", "molecular_formula", "freq", "molecular_mass"]
|
|
39
|
+
|
|
40
|
+
Totals table
|
|
41
|
+
One row per iteration, accessible via ``totals()``, with columns:
|
|
42
|
+
["iter", "total_molecules", "total_atoms", "total_molecular_mass"]
|
|
43
|
+
|
|
44
|
+
Metadata
|
|
45
|
+
Returned by ``metadata()``, containing:
|
|
46
|
+
["n_records", "n_iters", "iter_min", "iter_max",
|
|
47
|
+
"molecular_formulas"]
|
|
48
|
+
|
|
49
|
+
Notes
|
|
50
|
+
-----
|
|
51
|
+
- Molecular species are identified by their chemical formula strings.
|
|
52
|
+
- Frequency values represent counts per iteration.
|
|
53
|
+
- Totals are parsed from summary blocks following molecule listings.
|
|
54
|
+
- This handler is iteration-based rather than frame-based, but exposes
|
|
55
|
+
a minimal frame-like API for consistency.
|
|
56
|
+
"""
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def __init__(self, file_path: str | Path = "molfra.out"):
|
|
60
|
+
super().__init__(file_path)
|
|
61
|
+
self._n_records: Optional[int] = None
|
|
62
|
+
self._types: Optional[List[str]] = None
|
|
63
|
+
|
|
64
|
+
# ---- Core parser
|
|
65
|
+
def _parse(self) -> tuple[pd.DataFrame, Dict[str, Any]]:
|
|
66
|
+
"""
|
|
67
|
+
Parse molfra.out into two dataframes:
|
|
68
|
+
1. Molecule occurrences per iter
|
|
69
|
+
2. Totals (number of molecules, atoms, system molecular_mass) per iter
|
|
70
|
+
"""
|
|
71
|
+
mol_rows: list[dict[str, any]] = []
|
|
72
|
+
total_rows: list[dict[str, any]] = []
|
|
73
|
+
|
|
74
|
+
with open(self.path, "r") as fh:
|
|
75
|
+
current_iter = None
|
|
76
|
+
current_totals = {}
|
|
77
|
+
|
|
78
|
+
for line in fh:
|
|
79
|
+
line = line.strip()
|
|
80
|
+
if not line or line.startswith("Bond order"):
|
|
81
|
+
continue
|
|
82
|
+
|
|
83
|
+
# Header
|
|
84
|
+
if line.startswith("Iteration"):
|
|
85
|
+
continue
|
|
86
|
+
|
|
87
|
+
# Total lines
|
|
88
|
+
if line.startswith("Total number of molecules"):
|
|
89
|
+
current_totals["total_molecules"] = int(line.split()[-1])
|
|
90
|
+
continue
|
|
91
|
+
if line.startswith("Total number of atoms"):
|
|
92
|
+
current_totals["total_atoms"] = int(line.split()[-1])
|
|
93
|
+
continue
|
|
94
|
+
if line.startswith("Total system"):
|
|
95
|
+
current_totals["total_molecular_mass"] = float(line.split()[-1])
|
|
96
|
+
if current_iter is not None and current_totals:
|
|
97
|
+
current_totals["iter"] = current_iter
|
|
98
|
+
total_rows.append(current_totals)
|
|
99
|
+
current_totals = {}
|
|
100
|
+
continue
|
|
101
|
+
|
|
102
|
+
# Molecule lines
|
|
103
|
+
parts = line.split()
|
|
104
|
+
if len(parts) >= 5 and "x" in parts:
|
|
105
|
+
try:
|
|
106
|
+
iter = int(parts[0])
|
|
107
|
+
freq = int(parts[1])
|
|
108
|
+
molecular_mass = float(parts[-1])
|
|
109
|
+
x_index = parts.index("x")
|
|
110
|
+
molecular_formula = parts[x_index + 1]
|
|
111
|
+
current_iter = iter
|
|
112
|
+
except (ValueError, IndexError):
|
|
113
|
+
continue
|
|
114
|
+
mol_rows.append({
|
|
115
|
+
"iter": iter,
|
|
116
|
+
"molecular_formula": molecular_formula,
|
|
117
|
+
"freq": freq,
|
|
118
|
+
"molecular_mass": molecular_mass,
|
|
119
|
+
})
|
|
120
|
+
|
|
121
|
+
# Build dataframes
|
|
122
|
+
df_mol = pd.DataFrame(mol_rows)
|
|
123
|
+
df_tot = pd.DataFrame(total_rows).sort_values("iter").reset_index(drop=True)
|
|
124
|
+
|
|
125
|
+
meta = {
|
|
126
|
+
"n_records": len(df_mol),
|
|
127
|
+
"n_iters": df_mol["iter"].nunique(),
|
|
128
|
+
"iter_min": df_mol["iter"].min(),
|
|
129
|
+
"iter_max": df_mol["iter"].max(),
|
|
130
|
+
"molecular_formulas": sorted(df_mol["molecular_formula"].unique().tolist()),
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
# Store both
|
|
134
|
+
self._df_totals = df_tot
|
|
135
|
+
self._n_records = meta["n_records"]
|
|
136
|
+
self._types = meta["molecular_formulas"]
|
|
137
|
+
return df_mol, meta
|
|
138
|
+
|
|
139
|
+
# ---- Convenience accessors (file-specific)
|
|
140
|
+
def n_records(self) -> int:
|
|
141
|
+
return int(self.metadata().get("n_records", 0))
|
|
142
|
+
|
|
143
|
+
def molecular_formulas(self) -> List[str]:
|
|
144
|
+
return list(self.metadata().get("molecular_formulas", []))
|
|
145
|
+
|
|
146
|
+
def by_type(self, mtype: str) -> pd.DataFrame:
|
|
147
|
+
"""Return rows for a given molecule type."""
|
|
148
|
+
df = self.dataframe()
|
|
149
|
+
return df[df["molecular_formula"] == mtype].reset_index(drop=True)
|
|
150
|
+
|
|
151
|
+
def totals(self) -> pd.DataFrame:
|
|
152
|
+
"""Return total molecules/atoms/molecular_mass per iter."""
|
|
153
|
+
if hasattr(self, "_df_totals"):
|
|
154
|
+
return self._df_totals.copy()
|
|
155
|
+
else:
|
|
156
|
+
raise AttributeError("Totals dataframe not parsed or unavailable.")
|
|
157
|
+
|
|
158
|
+
# ---- Frame-oriented API (kept minimal for template parity)
|
|
159
|
+
def n_frames(self) -> int:
|
|
160
|
+
"""molfra.out is not frame-based; expose unique iters instead."""
|
|
161
|
+
return int(self.metadata().get("n_iters", 0))
|
|
162
|
+
|
|
163
|
+
def frame(self, i: int) -> Dict[str, Any]:
|
|
164
|
+
"""
|
|
165
|
+
Return a per-iter 'frame' view:
|
|
166
|
+
{ 'iter': <int>, 'freqs': DataFrame[molecular_formula, freq] }
|
|
167
|
+
"""
|
|
168
|
+
df = self.dataframe()
|
|
169
|
+
if df.empty:
|
|
170
|
+
raise IndexError("No data loaded.")
|
|
171
|
+
iters = sorted(df["iter"].unique())
|
|
172
|
+
if i < 0 or i >= len(iters):
|
|
173
|
+
raise IndexError(f"Frame index {i} out of range (0..{len(iters)-1}).")
|
|
174
|
+
it = iters[i]
|
|
175
|
+
sub = (
|
|
176
|
+
df.loc[df["iter"] == it, ["molecular_formula", "freq"]]
|
|
177
|
+
.sort_values("molecular_formula")
|
|
178
|
+
.reset_index(drop=True)
|
|
179
|
+
)
|
|
180
|
+
return {"iter": it, "freqs": sub}
|
|
181
|
+
|
|
182
|
+
def iter_frames(self, step: int = 1) -> Iterator[Dict[str, Any]]:
|
|
183
|
+
for i in range(0, self.n_frames(), step):
|
|
184
|
+
yield self.frame(i)
|
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
"""
|
|
2
|
+
ReaxFF parameter search definition (params) handler.
|
|
3
|
+
|
|
4
|
+
This module provides a handler for parsing ReaxFF ``params`` files,
|
|
5
|
+
which define parameter indices, search intervals, bounds, and optional
|
|
6
|
+
inline comments used during force-field optimization.
|
|
7
|
+
|
|
8
|
+
Typical use cases include:
|
|
9
|
+
|
|
10
|
+
- inspecting parameter search spaces
|
|
11
|
+
- linking optimization parameters to force-field sections
|
|
12
|
+
- building interpretable training and sensitivity analyses
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
|
|
18
|
+
from pathlib import Path
|
|
19
|
+
from typing import List, Dict, Any
|
|
20
|
+
|
|
21
|
+
import pandas as pd
|
|
22
|
+
|
|
23
|
+
from reaxkit.io.base_handler import BaseHandler
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class ParamsHandler(BaseHandler):
|
|
27
|
+
"""
|
|
28
|
+
Parser for ReaxFF parameter search definition files (``params``).
|
|
29
|
+
|
|
30
|
+
This class parses ``params`` files and exposes parameter search
|
|
31
|
+
definitions as a structured tabular dataset suitable for training,
|
|
32
|
+
optimization, and diagnostics workflows.
|
|
33
|
+
|
|
34
|
+
Parsed Data
|
|
35
|
+
-----------
|
|
36
|
+
Summary table
|
|
37
|
+
One row per parameter entry, returned by ``dataframe()``, with columns:
|
|
38
|
+
["ff_section", "ff_section_line", "ff_parameter",
|
|
39
|
+
"search_interval", "min_value", "max_value", "inline_comment"]
|
|
40
|
+
|
|
41
|
+
The columns map to ReaxFF force-field definitions as follows:
|
|
42
|
+
- ``ff_section``: force-field section identifier
|
|
43
|
+
(1–7 → general, atom, bond, off-diagonal, angle, torsion, h-bond)
|
|
44
|
+
- ``ff_section_line``: line index within the corresponding section
|
|
45
|
+
- ``ff_parameter``: parameter index within that line
|
|
46
|
+
|
|
47
|
+
Metadata
|
|
48
|
+
Returned by ``metadata()``, containing:
|
|
49
|
+
["n_records", "n_frames"]
|
|
50
|
+
|
|
51
|
+
Notes
|
|
52
|
+
-----
|
|
53
|
+
- Inline comments following ``!`` are preserved verbatim.
|
|
54
|
+
- Lines with incorrect token counts raise a parsing error.
|
|
55
|
+
- This handler is not frame-based; ``n_frames()`` always returns 0.
|
|
56
|
+
"""
|
|
57
|
+
|
|
58
|
+
COLUMNS = [
|
|
59
|
+
"ff_section",
|
|
60
|
+
"ff_section_line",
|
|
61
|
+
"ff_parameter",
|
|
62
|
+
"search_interval",
|
|
63
|
+
"min_value",
|
|
64
|
+
"max_value",
|
|
65
|
+
"inline_comment",
|
|
66
|
+
]
|
|
67
|
+
|
|
68
|
+
def __init__(self, file_path: str | Path = "params.in"):
|
|
69
|
+
super().__init__(file_path)
|
|
70
|
+
|
|
71
|
+
def _parse(self) -> tuple[pd.DataFrame, dict[str, Any]]:
|
|
72
|
+
"""
|
|
73
|
+
Implementation of TemplateHandler._parse for params files.
|
|
74
|
+
|
|
75
|
+
Returns
|
|
76
|
+
-------
|
|
77
|
+
df : DataFrame
|
|
78
|
+
With columns: ff_section, ff_section_line, ff_parameter,
|
|
79
|
+
search_interval, min_value, max_value, inline_comment.
|
|
80
|
+
meta : dict
|
|
81
|
+
Metadata with keys: n_records, n_frames.
|
|
82
|
+
"""
|
|
83
|
+
rows: List[Dict[str, Any]] = []
|
|
84
|
+
|
|
85
|
+
with open(self.path, "r") as fh:
|
|
86
|
+
for raw_line in fh:
|
|
87
|
+
line = raw_line.strip()
|
|
88
|
+
|
|
89
|
+
# Skip empty lines and full-line comments
|
|
90
|
+
if not line or line.startswith(("!", "#")):
|
|
91
|
+
continue
|
|
92
|
+
|
|
93
|
+
# Split off inline comment at first "!"
|
|
94
|
+
before, sep, comment = line.partition("!")
|
|
95
|
+
inline_comment = comment.strip() if sep else ""
|
|
96
|
+
|
|
97
|
+
# Numeric / token part
|
|
98
|
+
tokens = before.split()
|
|
99
|
+
if not tokens:
|
|
100
|
+
continue
|
|
101
|
+
|
|
102
|
+
# Expect exactly 6 numeric tokens:
|
|
103
|
+
# ff_section ff_section_line ff_parameter search_interval min_value max_value
|
|
104
|
+
if len(tokens) != 6:
|
|
105
|
+
raise ValueError(
|
|
106
|
+
f"Expected 6 tokens in params line, got {len(tokens)}: {raw_line!r}"
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
ff_section = int(tokens[0])
|
|
110
|
+
ff_section_line = int(tokens[1])
|
|
111
|
+
ff_parameter = int(tokens[2])
|
|
112
|
+
search_interval = float(tokens[3])
|
|
113
|
+
min_value = float(tokens[4])
|
|
114
|
+
max_value = float(tokens[5])
|
|
115
|
+
|
|
116
|
+
rows.append(
|
|
117
|
+
{
|
|
118
|
+
"ff_section": ff_section,
|
|
119
|
+
"ff_section_line": ff_section_line,
|
|
120
|
+
"ff_parameter": ff_parameter,
|
|
121
|
+
"search_interval": search_interval,
|
|
122
|
+
"min_value": min_value,
|
|
123
|
+
"max_value": max_value,
|
|
124
|
+
"inline_comment": inline_comment,
|
|
125
|
+
}
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
df = pd.DataFrame(rows, columns=self.COLUMNS)
|
|
129
|
+
|
|
130
|
+
# No per-frame data for this file type
|
|
131
|
+
self._frames = []
|
|
132
|
+
|
|
133
|
+
meta: Dict[str, Any] = {
|
|
134
|
+
"n_records": len(df),
|
|
135
|
+
"n_frames": 0,
|
|
136
|
+
}
|
|
137
|
+
return df, meta
|
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
"""
|
|
2
|
+
ReaxFF simulation summary (summary.txt) handler.
|
|
3
|
+
|
|
4
|
+
This module provides a handler for parsing ReaxFF ``summary.txt`` files,
|
|
5
|
+
which report per-iteration thermodynamic and system-level quantities
|
|
6
|
+
during MD or minimization runs.
|
|
7
|
+
|
|
8
|
+
Typical use cases include:
|
|
9
|
+
|
|
10
|
+
- tracking energy, temperature, pressure, and density versus iteration
|
|
11
|
+
- extracting time-series data for plotting or analysis
|
|
12
|
+
- validating simulation stability and convergence
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
from pathlib import Path
|
|
18
|
+
from typing import Dict, Any, Tuple, List
|
|
19
|
+
import pandas as pd
|
|
20
|
+
from io import StringIO
|
|
21
|
+
|
|
22
|
+
from reaxkit.io.base_handler import BaseHandler
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class SummaryHandler(BaseHandler):
|
|
26
|
+
"""
|
|
27
|
+
Parser for ReaxFF simulation summary files (``summary.txt``).
|
|
28
|
+
|
|
29
|
+
This class parses ``summary.txt`` outputs and exposes per-iteration
|
|
30
|
+
simulation summaries as a canonical, numeric time series.
|
|
31
|
+
|
|
32
|
+
Parsed Data
|
|
33
|
+
-----------
|
|
34
|
+
Summary table
|
|
35
|
+
One row per iteration, returned by ``dataframe()``, with columns
|
|
36
|
+
determined by the detected column count:
|
|
37
|
+
|
|
38
|
+
- 8 columns:
|
|
39
|
+
["iter", "nmol", "time", "E_pot", "V", "T", "P", "D"]
|
|
40
|
+
|
|
41
|
+
- 9 columns:
|
|
42
|
+
["iter", "nmol", "time", "E_pot", "V", "T", "P", "D", "elap_time"]
|
|
43
|
+
|
|
44
|
+
Metadata
|
|
45
|
+
Returned by ``metadata()``, containing:
|
|
46
|
+
["n_records", "columns", "has_time", "source_file"]
|
|
47
|
+
|
|
48
|
+
Notes
|
|
49
|
+
-----
|
|
50
|
+
- Banner and header lines starting with ``REAX`` or ``Iteration`` are ignored.
|
|
51
|
+
- Rows are parsed as whitespace-delimited numeric data with no in-file header.
|
|
52
|
+
- Duplicate iteration indices are resolved by keeping the last entry.
|
|
53
|
+
- This handler represents a scalar-per-iteration time-series file.
|
|
54
|
+
"""
|
|
55
|
+
|
|
56
|
+
def __init__(self, file_path: str | Path = "summary.txt") -> None:
|
|
57
|
+
super().__init__(file_path)
|
|
58
|
+
|
|
59
|
+
@staticmethod
|
|
60
|
+
def _canonical_names(ncols: int) -> List[str]:
|
|
61
|
+
if ncols == 8:
|
|
62
|
+
return ["iter", "nmol", "time", "E_pot", "V", "T", "P", "D"]
|
|
63
|
+
if ncols == 9:
|
|
64
|
+
return ["iter", "nmol", "time", "E_pot", "V", "T", "P", "D", "elap_time"]
|
|
65
|
+
raise ValueError(
|
|
66
|
+
f"Unsupported number of columns in summary data: {ncols}. "
|
|
67
|
+
f"Expected 8 or 9."
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
def _parse(self) -> Tuple[pd.DataFrame, Dict[str, Any]]:
|
|
71
|
+
p = Path(self.path)
|
|
72
|
+
if not p.exists():
|
|
73
|
+
raise FileNotFoundError(f"Summary file not found: {p}")
|
|
74
|
+
|
|
75
|
+
# Read, filter out banners/headers, keep only data lines.
|
|
76
|
+
with p.open("r") as fh:
|
|
77
|
+
raw_lines = fh.readlines()
|
|
78
|
+
|
|
79
|
+
data_lines: List[str] = []
|
|
80
|
+
for ln in raw_lines:
|
|
81
|
+
s = ln.strip()
|
|
82
|
+
if not s:
|
|
83
|
+
continue
|
|
84
|
+
s_lower = s.lower()
|
|
85
|
+
# Skip banner/header lines only; keep every other line
|
|
86
|
+
if s_lower.startswith("reax"):
|
|
87
|
+
continue
|
|
88
|
+
if s_lower.startswith("iteration"):
|
|
89
|
+
continue
|
|
90
|
+
if not s[0].isdigit(): # skipping comment or warning lines that may occur at the end of the file
|
|
91
|
+
continue
|
|
92
|
+
data_lines.append(ln)
|
|
93
|
+
|
|
94
|
+
if not data_lines:
|
|
95
|
+
raise ValueError(
|
|
96
|
+
"No data lines found after removing 'REAX…' and 'Iteration…' headers."
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
# Parse as whitespace-delimited, no header
|
|
100
|
+
data_str = "".join(data_lines).strip()
|
|
101
|
+
df = pd.read_csv(StringIO(data_str), sep=r"\s+", header=None, engine="python")
|
|
102
|
+
|
|
103
|
+
# Assign canonical names based on column count
|
|
104
|
+
names = self._canonical_names(df.shape[1])
|
|
105
|
+
df.columns = names
|
|
106
|
+
|
|
107
|
+
# Cleanup
|
|
108
|
+
df = df.dropna(how="all").reset_index(drop=True)
|
|
109
|
+
for col in df.columns:
|
|
110
|
+
df[col] = pd.to_numeric(df[col], errors="coerce")
|
|
111
|
+
|
|
112
|
+
if "iter" in df.columns:
|
|
113
|
+
df = df.dropna(subset=["iter"]).reset_index(drop=True)
|
|
114
|
+
df = df.drop_duplicates("iter", keep="last").reset_index(drop=True)
|
|
115
|
+
|
|
116
|
+
meta: Dict[str, Any] = {
|
|
117
|
+
"n_records": int(len(df)),
|
|
118
|
+
"columns": list(df.columns),
|
|
119
|
+
"has_time": "time" in df.columns,
|
|
120
|
+
"source_file": str(p),
|
|
121
|
+
}
|
|
122
|
+
return df, meta
|
|
123
|
+
|
|
124
|
+
# Convenience accessors on canonical schema
|
|
125
|
+
def fields(self) -> List[str]:
|
|
126
|
+
return list(self.dataframe().columns)
|
|
127
|
+
|
|
128
|
+
def has_times(self) -> bool:
|
|
129
|
+
return "time" in self.dataframe().columns
|
|
130
|
+
|
|
131
|
+
def iterations(self) -> pd.Series:
|
|
132
|
+
df = self.dataframe()
|
|
133
|
+
if "iter" not in df.columns:
|
|
134
|
+
raise KeyError("'iter' column not found in summary.")
|
|
135
|
+
return df["iter"]
|