reaxkit 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- reaxkit/__init__.py +0 -0
- reaxkit/analysis/__init__.py +0 -0
- reaxkit/analysis/composed/RDF_analyzer.py +560 -0
- reaxkit/analysis/composed/__init__.py +0 -0
- reaxkit/analysis/composed/connectivity_analyzer.py +706 -0
- reaxkit/analysis/composed/coordination_analyzer.py +144 -0
- reaxkit/analysis/composed/electrostatics_analyzer.py +687 -0
- reaxkit/analysis/per_file/__init__.py +0 -0
- reaxkit/analysis/per_file/control_analyzer.py +165 -0
- reaxkit/analysis/per_file/eregime_analyzer.py +108 -0
- reaxkit/analysis/per_file/ffield_analyzer.py +305 -0
- reaxkit/analysis/per_file/fort13_analyzer.py +79 -0
- reaxkit/analysis/per_file/fort57_analyzer.py +106 -0
- reaxkit/analysis/per_file/fort73_analyzer.py +61 -0
- reaxkit/analysis/per_file/fort74_analyzer.py +65 -0
- reaxkit/analysis/per_file/fort76_analyzer.py +191 -0
- reaxkit/analysis/per_file/fort78_analyzer.py +154 -0
- reaxkit/analysis/per_file/fort79_analyzer.py +83 -0
- reaxkit/analysis/per_file/fort7_analyzer.py +393 -0
- reaxkit/analysis/per_file/fort99_analyzer.py +411 -0
- reaxkit/analysis/per_file/molfra_analyzer.py +359 -0
- reaxkit/analysis/per_file/params_analyzer.py +258 -0
- reaxkit/analysis/per_file/summary_analyzer.py +84 -0
- reaxkit/analysis/per_file/trainset_analyzer.py +84 -0
- reaxkit/analysis/per_file/vels_analyzer.py +95 -0
- reaxkit/analysis/per_file/xmolout_analyzer.py +528 -0
- reaxkit/cli.py +181 -0
- reaxkit/count_loc.py +276 -0
- reaxkit/data/alias.yaml +89 -0
- reaxkit/data/constants.yaml +27 -0
- reaxkit/data/reaxff_input_files_contents.yaml +186 -0
- reaxkit/data/reaxff_output_files_contents.yaml +301 -0
- reaxkit/data/units.yaml +38 -0
- reaxkit/help/__init__.py +0 -0
- reaxkit/help/help_index_loader.py +531 -0
- reaxkit/help/introspection_utils.py +131 -0
- reaxkit/io/__init__.py +0 -0
- reaxkit/io/base_handler.py +165 -0
- reaxkit/io/generators/__init__.py +0 -0
- reaxkit/io/generators/control_generator.py +123 -0
- reaxkit/io/generators/eregime_generator.py +341 -0
- reaxkit/io/generators/geo_generator.py +967 -0
- reaxkit/io/generators/trainset_generator.py +1758 -0
- reaxkit/io/generators/tregime_generator.py +113 -0
- reaxkit/io/generators/vregime_generator.py +164 -0
- reaxkit/io/generators/xmolout_generator.py +304 -0
- reaxkit/io/handlers/__init__.py +0 -0
- reaxkit/io/handlers/control_handler.py +209 -0
- reaxkit/io/handlers/eregime_handler.py +122 -0
- reaxkit/io/handlers/ffield_handler.py +812 -0
- reaxkit/io/handlers/fort13_handler.py +123 -0
- reaxkit/io/handlers/fort57_handler.py +143 -0
- reaxkit/io/handlers/fort73_handler.py +145 -0
- reaxkit/io/handlers/fort74_handler.py +155 -0
- reaxkit/io/handlers/fort76_handler.py +195 -0
- reaxkit/io/handlers/fort78_handler.py +142 -0
- reaxkit/io/handlers/fort79_handler.py +227 -0
- reaxkit/io/handlers/fort7_handler.py +264 -0
- reaxkit/io/handlers/fort99_handler.py +128 -0
- reaxkit/io/handlers/geo_handler.py +224 -0
- reaxkit/io/handlers/molfra_handler.py +184 -0
- reaxkit/io/handlers/params_handler.py +137 -0
- reaxkit/io/handlers/summary_handler.py +135 -0
- reaxkit/io/handlers/trainset_handler.py +658 -0
- reaxkit/io/handlers/vels_handler.py +293 -0
- reaxkit/io/handlers/xmolout_handler.py +174 -0
- reaxkit/utils/__init__.py +0 -0
- reaxkit/utils/alias.py +219 -0
- reaxkit/utils/cache.py +77 -0
- reaxkit/utils/constants.py +75 -0
- reaxkit/utils/equation_of_states.py +96 -0
- reaxkit/utils/exceptions.py +27 -0
- reaxkit/utils/frame_utils.py +175 -0
- reaxkit/utils/log.py +43 -0
- reaxkit/utils/media/__init__.py +0 -0
- reaxkit/utils/media/convert.py +90 -0
- reaxkit/utils/media/make_video.py +91 -0
- reaxkit/utils/media/plotter.py +812 -0
- reaxkit/utils/numerical/__init__.py +0 -0
- reaxkit/utils/numerical/extrema_finder.py +96 -0
- reaxkit/utils/numerical/moving_average.py +103 -0
- reaxkit/utils/numerical/numerical_calcs.py +75 -0
- reaxkit/utils/numerical/signal_ops.py +135 -0
- reaxkit/utils/path.py +55 -0
- reaxkit/utils/units.py +104 -0
- reaxkit/webui/__init__.py +0 -0
- reaxkit/webui/app.py +0 -0
- reaxkit/webui/components.py +0 -0
- reaxkit/webui/layouts.py +0 -0
- reaxkit/webui/utils.py +0 -0
- reaxkit/workflows/__init__.py +0 -0
- reaxkit/workflows/composed/__init__.py +0 -0
- reaxkit/workflows/composed/coordination_workflow.py +393 -0
- reaxkit/workflows/composed/electrostatics_workflow.py +587 -0
- reaxkit/workflows/composed/xmolout_fort7_workflow.py +343 -0
- reaxkit/workflows/meta/__init__.py +0 -0
- reaxkit/workflows/meta/help_workflow.py +136 -0
- reaxkit/workflows/meta/introspection_workflow.py +235 -0
- reaxkit/workflows/meta/make_video_workflow.py +61 -0
- reaxkit/workflows/meta/plotter_workflow.py +601 -0
- reaxkit/workflows/per_file/__init__.py +0 -0
- reaxkit/workflows/per_file/control_workflow.py +110 -0
- reaxkit/workflows/per_file/eregime_workflow.py +267 -0
- reaxkit/workflows/per_file/ffield_workflow.py +390 -0
- reaxkit/workflows/per_file/fort13_workflow.py +86 -0
- reaxkit/workflows/per_file/fort57_workflow.py +137 -0
- reaxkit/workflows/per_file/fort73_workflow.py +151 -0
- reaxkit/workflows/per_file/fort74_workflow.py +88 -0
- reaxkit/workflows/per_file/fort76_workflow.py +188 -0
- reaxkit/workflows/per_file/fort78_workflow.py +135 -0
- reaxkit/workflows/per_file/fort79_workflow.py +314 -0
- reaxkit/workflows/per_file/fort7_workflow.py +592 -0
- reaxkit/workflows/per_file/fort83_workflow.py +60 -0
- reaxkit/workflows/per_file/fort99_workflow.py +223 -0
- reaxkit/workflows/per_file/geo_workflow.py +554 -0
- reaxkit/workflows/per_file/molfra_workflow.py +577 -0
- reaxkit/workflows/per_file/params_workflow.py +135 -0
- reaxkit/workflows/per_file/summary_workflow.py +161 -0
- reaxkit/workflows/per_file/trainset_workflow.py +356 -0
- reaxkit/workflows/per_file/tregime_workflow.py +79 -0
- reaxkit/workflows/per_file/vels_workflow.py +309 -0
- reaxkit/workflows/per_file/vregime_workflow.py +75 -0
- reaxkit/workflows/per_file/xmolout_workflow.py +678 -0
- reaxkit-1.0.0.dist-info/METADATA +128 -0
- reaxkit-1.0.0.dist-info/RECORD +130 -0
- reaxkit-1.0.0.dist-info/WHEEL +5 -0
- reaxkit-1.0.0.dist-info/entry_points.txt +2 -0
- reaxkit-1.0.0.dist-info/licenses/AUTHORS.md +20 -0
- reaxkit-1.0.0.dist-info/licenses/LICENSE +21 -0
- reaxkit-1.0.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
"""
|
|
2
|
+
ReaxFF force-field optimization error (fort.13) handler.
|
|
3
|
+
|
|
4
|
+
This module provides a handler for parsing ReaxFF ``fort.13`` files,
|
|
5
|
+
which store the total force-field error values produced during
|
|
6
|
+
ReaxFF parameter optimization runs.
|
|
7
|
+
|
|
8
|
+
Typical use cases include:
|
|
9
|
+
|
|
10
|
+
- tracking optimization convergence
|
|
11
|
+
- comparing force-field parameter sets
|
|
12
|
+
- plotting total error versus optimization epoch
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
from pathlib import Path
|
|
18
|
+
from typing import List, Dict, Any, Iterator, Optional
|
|
19
|
+
import pandas as pd
|
|
20
|
+
|
|
21
|
+
from reaxkit.io.base_handler import BaseHandler
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class Fort13Handler(BaseHandler):
|
|
25
|
+
"""
|
|
26
|
+
Parser for ReaxFF force-field optimization output files (``fort.13``).
|
|
27
|
+
|
|
28
|
+
This class parses ``fort.13`` files and exposes total force-field
|
|
29
|
+
error values as a simple, iteration-indexed time series.
|
|
30
|
+
|
|
31
|
+
Parsed Data
|
|
32
|
+
-----------
|
|
33
|
+
Summary table
|
|
34
|
+
One row per optimization epoch, returned by ``dataframe()``,
|
|
35
|
+
with columns:
|
|
36
|
+
["epoch", "total_ff_error"]
|
|
37
|
+
|
|
38
|
+
Metadata
|
|
39
|
+
Returned by ``metadata()``, containing:
|
|
40
|
+
["n_records", "min_error", "max_error", "mean_error"]
|
|
41
|
+
|
|
42
|
+
Notes
|
|
43
|
+
-----
|
|
44
|
+
- Epoch indices are inferred from line order in the file.
|
|
45
|
+
- Non-numeric or empty lines are ignored.
|
|
46
|
+
- This handler represents a single-scalar-per-iteration data source.
|
|
47
|
+
"""
|
|
48
|
+
|
|
49
|
+
def __init__(self, file_path: str | Path = "fort.13"):
|
|
50
|
+
super().__init__(file_path)
|
|
51
|
+
self._n_records: Optional[int] = None
|
|
52
|
+
|
|
53
|
+
def _parse(self) -> tuple[pd.DataFrame, dict[str, Any]]:
|
|
54
|
+
"""Parse fort.13 file into a summary DataFrame."""
|
|
55
|
+
sim_rows: List[list] = []
|
|
56
|
+
with open(self.path, "r") as fh:
|
|
57
|
+
for idx, line in enumerate(fh, start=1):
|
|
58
|
+
line = line.strip()
|
|
59
|
+
if not line:
|
|
60
|
+
continue
|
|
61
|
+
try:
|
|
62
|
+
error_value = float(line)
|
|
63
|
+
except ValueError:
|
|
64
|
+
continue
|
|
65
|
+
sim_rows.append([idx, error_value])
|
|
66
|
+
|
|
67
|
+
df = pd.DataFrame(sim_rows, columns=["epoch", "total_ff_error"])
|
|
68
|
+
|
|
69
|
+
meta: Dict[str, Any] = {
|
|
70
|
+
"n_records": len(df),
|
|
71
|
+
"min_error": df["total_ff_error"].min() if not df.empty else None,
|
|
72
|
+
"max_error": df["total_ff_error"].max() if not df.empty else None,
|
|
73
|
+
"mean_error": df["total_ff_error"].mean() if not df.empty else None,
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
self._n_records = meta["n_records"]
|
|
77
|
+
return df, meta
|
|
78
|
+
|
|
79
|
+
# ---- Accessors ----
|
|
80
|
+
def n_records(self) -> int:
|
|
81
|
+
"""
|
|
82
|
+
Return the number of optimization epochs recorded in the file.
|
|
83
|
+
|
|
84
|
+
Works on
|
|
85
|
+
--------
|
|
86
|
+
Fort13Handler — ``fort.13``
|
|
87
|
+
|
|
88
|
+
Returns
|
|
89
|
+
-------
|
|
90
|
+
int
|
|
91
|
+
Number of parsed optimization epochs.
|
|
92
|
+
"""
|
|
93
|
+
return int(self.metadata().get("n_records", 0))
|
|
94
|
+
|
|
95
|
+
def iter_errors(self, step: int = 1) -> Iterator[Dict[str, Any]]:
|
|
96
|
+
"""Iterate over total force-field error values with optional subsampling.
|
|
97
|
+
|
|
98
|
+
Works on
|
|
99
|
+
--------
|
|
100
|
+
Fort13Handler — ``fort.13``
|
|
101
|
+
|
|
102
|
+
Parameters
|
|
103
|
+
----------
|
|
104
|
+
step : int, optional
|
|
105
|
+
Step size for subsampling epochs (default: 1).
|
|
106
|
+
|
|
107
|
+
Yields
|
|
108
|
+
------
|
|
109
|
+
dict
|
|
110
|
+
Dictionary with keys: ``epoch`` and ``total_ff_error``.
|
|
111
|
+
|
|
112
|
+
Examples
|
|
113
|
+
--------
|
|
114
|
+
>>> h = Fort13Handler("fort.13")
|
|
115
|
+
>>> for row in h.iter_errors(step=10):
|
|
116
|
+
... print(row["epoch"], row["total_ff_error"])
|
|
117
|
+
"""
|
|
118
|
+
df = self.dataframe()
|
|
119
|
+
for i in range(0, len(df), step):
|
|
120
|
+
yield {
|
|
121
|
+
"epoch": int(df.iloc[i]["epoch"]),
|
|
122
|
+
"total_ff_error": float(df.iloc[i]["total_ff_error"]),
|
|
123
|
+
}
|
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
"""
|
|
2
|
+
ReaxFF geometry-optimization summary (fort.57) handler.
|
|
3
|
+
|
|
4
|
+
This module provides a handler for parsing ReaxFF ``fort.57`` files,
|
|
5
|
+
which store per-iteration summary information during geometry
|
|
6
|
+
optimization or relaxation runs.
|
|
7
|
+
|
|
8
|
+
Typical use cases include:
|
|
9
|
+
|
|
10
|
+
- monitoring convergence via RMS gradient values
|
|
11
|
+
- tracking potential energy and temperature evolution
|
|
12
|
+
- comparing relaxation behavior across geometries
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
|
|
18
|
+
from pathlib import Path
|
|
19
|
+
from typing import Any, Dict, List, Optional
|
|
20
|
+
|
|
21
|
+
import pandas as pd
|
|
22
|
+
|
|
23
|
+
from reaxkit.io.base_handler import BaseHandler
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class Fort57Handler(BaseHandler):
|
|
27
|
+
"""
|
|
28
|
+
Parser for ReaxFF geometry-optimization output files (``fort.57``).
|
|
29
|
+
|
|
30
|
+
This class parses ``fort.57`` files and exposes per-iteration
|
|
31
|
+
optimization summaries as a structured time series.
|
|
32
|
+
|
|
33
|
+
Parsed Data
|
|
34
|
+
-----------
|
|
35
|
+
Summary table
|
|
36
|
+
One row per iteration, returned by ``dataframe()``, with columns:
|
|
37
|
+
["iter", "E_pot", "T", "T_set", "RMSG", "nfc"]
|
|
38
|
+
|
|
39
|
+
Metadata
|
|
40
|
+
Returned by ``metadata()``, containing:
|
|
41
|
+
["geo_descriptor", "n_records", "n_frames"]
|
|
42
|
+
|
|
43
|
+
The ``geo_descriptor`` is taken from the first line of the file
|
|
44
|
+
and typically describes the optimized geometry.
|
|
45
|
+
|
|
46
|
+
Notes
|
|
47
|
+
-----
|
|
48
|
+
- Header and non-numeric lines are ignored automatically.
|
|
49
|
+
- Duplicate iteration indices are resolved by keeping the last entry.
|
|
50
|
+
- This handler represents a scalar-per-iteration time-series file.
|
|
51
|
+
"""
|
|
52
|
+
|
|
53
|
+
_COLS = ["iter", "E_pot", "T", "T_set", "RMSG", "nfc"]
|
|
54
|
+
|
|
55
|
+
def __init__(self, file_path: str | Path = "fort.57"):
|
|
56
|
+
super().__init__(file_path)
|
|
57
|
+
self._geo_descriptor: str = ""
|
|
58
|
+
|
|
59
|
+
def _parse(self) -> tuple[pd.DataFrame, dict[str, Any]]:
|
|
60
|
+
with open(self.path, "r") as fh:
|
|
61
|
+
lines = fh.readlines()
|
|
62
|
+
|
|
63
|
+
if not lines:
|
|
64
|
+
df = pd.DataFrame(columns=self._COLS)
|
|
65
|
+
meta: Dict[str, Any] = {"geo_descriptor": "", "n_records": 0}
|
|
66
|
+
return df, meta
|
|
67
|
+
|
|
68
|
+
# 1) metadata line
|
|
69
|
+
self._geo_descriptor = lines[0].strip()
|
|
70
|
+
|
|
71
|
+
# 2) parse numeric rows
|
|
72
|
+
rows: List[List[Any]] = []
|
|
73
|
+
for raw in lines[1:]:
|
|
74
|
+
s = raw.strip()
|
|
75
|
+
if not s:
|
|
76
|
+
continue
|
|
77
|
+
|
|
78
|
+
toks = s.split()
|
|
79
|
+
|
|
80
|
+
# Skip headers / non-data lines (e.g., "Iter.", "Epot", etc.)
|
|
81
|
+
# Data rows should have 6 tokens and start with an integer iter.
|
|
82
|
+
if len(toks) < 6:
|
|
83
|
+
continue
|
|
84
|
+
if not toks[0].lstrip("+-").isdigit():
|
|
85
|
+
continue
|
|
86
|
+
|
|
87
|
+
try:
|
|
88
|
+
it = int(toks[0])
|
|
89
|
+
epot = float(toks[1])
|
|
90
|
+
temp = float(toks[2])
|
|
91
|
+
tset = float(toks[3])
|
|
92
|
+
rmsg = float(toks[4])
|
|
93
|
+
nfc = int(float(toks[5])) # sometimes written like "-1" but be tolerant
|
|
94
|
+
except Exception:
|
|
95
|
+
continue
|
|
96
|
+
|
|
97
|
+
rows.append([it, epot, temp, tset, rmsg, nfc])
|
|
98
|
+
|
|
99
|
+
df = pd.DataFrame(rows, columns=self._COLS)
|
|
100
|
+
|
|
101
|
+
# 3) clean: drop duplicate iters (keep last)
|
|
102
|
+
if not df.empty:
|
|
103
|
+
keep_idx = df.drop_duplicates("iter", keep="last").index
|
|
104
|
+
df = df.loc[keep_idx].sort_values("iter").reset_index(drop=True)
|
|
105
|
+
|
|
106
|
+
meta = {
|
|
107
|
+
"geo_descriptor": self._geo_descriptor,
|
|
108
|
+
"n_records": int(len(df)),
|
|
109
|
+
"n_frames": int(len(df)), # for consistency with other handlers
|
|
110
|
+
}
|
|
111
|
+
return df, meta
|
|
112
|
+
|
|
113
|
+
# ---- convenience accessors ----
|
|
114
|
+
@property
|
|
115
|
+
def geo_descriptor(self) -> str:
|
|
116
|
+
"""
|
|
117
|
+
Return the geometry descriptor extracted from the file header.
|
|
118
|
+
|
|
119
|
+
Works on
|
|
120
|
+
--------
|
|
121
|
+
Fort57Handler — ``fort.57``
|
|
122
|
+
|
|
123
|
+
Returns
|
|
124
|
+
-------
|
|
125
|
+
str
|
|
126
|
+
Geometry descriptor string (typically describing the optimized structure).
|
|
127
|
+
"""
|
|
128
|
+
return self._geo_descriptor or str(self.metadata().get("geo_descriptor", ""))
|
|
129
|
+
|
|
130
|
+
def n_frames(self) -> int:
|
|
131
|
+
"""
|
|
132
|
+
Return the number of optimization iterations recorded in the file.
|
|
133
|
+
|
|
134
|
+
Works on
|
|
135
|
+
--------
|
|
136
|
+
Fort57Handler — ``fort.57``
|
|
137
|
+
|
|
138
|
+
Returns
|
|
139
|
+
-------
|
|
140
|
+
int
|
|
141
|
+
Number of parsed iterations (frames).
|
|
142
|
+
"""
|
|
143
|
+
return int(self.metadata().get("n_frames", 0))
|
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
"""
|
|
2
|
+
ReaxFF energy time-series log handler.
|
|
3
|
+
|
|
4
|
+
This module provides a unified handler for parsing ReaxFF energy
|
|
5
|
+
time-series output files that share a common tabular format, including
|
|
6
|
+
``fort.73``, ``energylog``, and ``fort.58``.
|
|
7
|
+
|
|
8
|
+
These files report per-iteration energetic quantities and are commonly
|
|
9
|
+
used to monitor MD stability, energy conservation, and convergence.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
from pathlib import Path
|
|
16
|
+
from typing import Any, Dict, List, Optional
|
|
17
|
+
|
|
18
|
+
import pandas as pd
|
|
19
|
+
|
|
20
|
+
from reaxkit.io.base_handler import BaseHandler
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class Fort73Handler(BaseHandler):
|
|
24
|
+
"""
|
|
25
|
+
Parser for ReaxFF energy time-series output files
|
|
26
|
+
(``fort.73``, ``energylog``, ``fort.58``).
|
|
27
|
+
|
|
28
|
+
This class parses per-iteration energy logs and exposes them as a
|
|
29
|
+
single, normalized tabular time series, handling line wrapping and
|
|
30
|
+
missing values automatically.
|
|
31
|
+
|
|
32
|
+
Parsed Data
|
|
33
|
+
-----------
|
|
34
|
+
Summary table
|
|
35
|
+
One row per iteration, returned by ``dataframe()``, with columns
|
|
36
|
+
taken directly from the file header (e.g. ``Iter.``, ``E_pot``,
|
|
37
|
+
``E_kin``, ``E_tot``, ``Eelec``, …).
|
|
38
|
+
|
|
39
|
+
The iteration column is normalized to ``iter`` when present.
|
|
40
|
+
|
|
41
|
+
Metadata
|
|
42
|
+
Returned by ``metadata()``, containing:
|
|
43
|
+
["n_records", "columns", "source_file"]
|
|
44
|
+
|
|
45
|
+
Notes
|
|
46
|
+
-----
|
|
47
|
+
- Continuation lines (e.g. in ``fort.58``) are appended to the
|
|
48
|
+
preceding row.
|
|
49
|
+
- Rows with fewer values than header columns are padded with ``NaN``.
|
|
50
|
+
- Extra tokens beyond the header length are truncated safely.
|
|
51
|
+
- This handler represents a scalar-per-iteration time-series file.
|
|
52
|
+
"""
|
|
53
|
+
|
|
54
|
+
def __init__(self, file_path: str | Path = "fort.73"):
|
|
55
|
+
super().__init__(file_path)
|
|
56
|
+
|
|
57
|
+
@staticmethod
|
|
58
|
+
def _is_int_token(tok: str) -> bool:
|
|
59
|
+
# Iteration index appears as an integer token
|
|
60
|
+
# (allow leading +/-, though usually non-negative)
|
|
61
|
+
if not tok:
|
|
62
|
+
return False
|
|
63
|
+
if tok[0] in "+-":
|
|
64
|
+
tok = tok[1:]
|
|
65
|
+
return tok.isdigit()
|
|
66
|
+
|
|
67
|
+
def _parse(self) -> tuple[pd.DataFrame, Dict[str, Any]]:
|
|
68
|
+
cols: List[str] = []
|
|
69
|
+
rows: List[List[Optional[str]]] = []
|
|
70
|
+
|
|
71
|
+
current: List[Optional[str]] = []
|
|
72
|
+
in_table = False
|
|
73
|
+
|
|
74
|
+
def _flush_current() -> None:
|
|
75
|
+
nonlocal current
|
|
76
|
+
if not cols or not current:
|
|
77
|
+
current = []
|
|
78
|
+
return
|
|
79
|
+
# pad missing trailing values (energylog case)
|
|
80
|
+
if len(current) < len(cols):
|
|
81
|
+
current = current + [None] * (len(cols) - len(current))
|
|
82
|
+
# truncate any accidental extra tokens
|
|
83
|
+
rows.append(current[: len(cols)])
|
|
84
|
+
current = []
|
|
85
|
+
|
|
86
|
+
with open(self.path, "r") as fh:
|
|
87
|
+
for line in fh:
|
|
88
|
+
s = line.strip()
|
|
89
|
+
|
|
90
|
+
# skip blanks/separators
|
|
91
|
+
if not s or "----" in s:
|
|
92
|
+
continue
|
|
93
|
+
|
|
94
|
+
# header
|
|
95
|
+
if s.startswith("Iter."):
|
|
96
|
+
cols = s.split()
|
|
97
|
+
in_table = True
|
|
98
|
+
current = []
|
|
99
|
+
continue
|
|
100
|
+
|
|
101
|
+
if not in_table:
|
|
102
|
+
continue
|
|
103
|
+
|
|
104
|
+
parts = s.split()
|
|
105
|
+
if not parts:
|
|
106
|
+
continue
|
|
107
|
+
|
|
108
|
+
# New row starts when first token is an integer iteration index.
|
|
109
|
+
if self._is_int_token(parts[0]):
|
|
110
|
+
# finish previous row (even if incomplete)
|
|
111
|
+
_flush_current()
|
|
112
|
+
current = parts[:] # start new row buffer
|
|
113
|
+
else:
|
|
114
|
+
# continuation line (fort.58 style): append to current row
|
|
115
|
+
if not current:
|
|
116
|
+
# orphan continuation; ignore
|
|
117
|
+
continue
|
|
118
|
+
current.extend(parts)
|
|
119
|
+
|
|
120
|
+
# If we already have a full row, flush it (handles cases where
|
|
121
|
+
# continuation makes it complete before next Iter appears).
|
|
122
|
+
if cols and len(current) >= len(cols):
|
|
123
|
+
_flush_current()
|
|
124
|
+
|
|
125
|
+
# flush last buffered row at EOF
|
|
126
|
+
_flush_current()
|
|
127
|
+
|
|
128
|
+
df = pd.DataFrame(rows, columns=cols)
|
|
129
|
+
|
|
130
|
+
# Convert numeric columns safely (None/garbage -> NaN)
|
|
131
|
+
for c in df.columns:
|
|
132
|
+
df[c] = pd.to_numeric(df[c], errors="coerce")
|
|
133
|
+
|
|
134
|
+
if "Iter." in df.columns:
|
|
135
|
+
df.rename(columns={"Iter.": "iter"}, inplace=True)
|
|
136
|
+
|
|
137
|
+
meta: Dict[str, Any] = {
|
|
138
|
+
"n_records": len(df),
|
|
139
|
+
"columns": df.columns.tolist(),
|
|
140
|
+
# optional debugging hints:
|
|
141
|
+
"source_file": str(self.path),
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
self._frames = [] # Not used in this handler
|
|
145
|
+
return df, meta
|
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
"""
|
|
2
|
+
ReaxFF structure summary (fort.74) handler.
|
|
3
|
+
|
|
4
|
+
This module provides a handler for parsing ReaxFF ``fort.74`` files,
|
|
5
|
+
which report per-structure energetic and thermodynamic summary
|
|
6
|
+
quantities produced during ReaxFF runs or force-field training.
|
|
7
|
+
|
|
8
|
+
Typical use cases include:
|
|
9
|
+
|
|
10
|
+
- extracting formation energies and volumes
|
|
11
|
+
- comparing multiple structures or configurations
|
|
12
|
+
- building datasets for bulk-property analysis
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
from pathlib import Path
|
|
18
|
+
from typing import Any, Dict, List, Tuple
|
|
19
|
+
|
|
20
|
+
import pandas as pd
|
|
21
|
+
|
|
22
|
+
from reaxkit.io.base_handler import BaseHandler
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def _safe_float(s: str) -> float | None:
|
|
26
|
+
try:
|
|
27
|
+
return float(s)
|
|
28
|
+
except (ValueError, TypeError):
|
|
29
|
+
return None
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _safe_int(s: str) -> int | None:
|
|
33
|
+
try:
|
|
34
|
+
# sometimes written as "0", "0.0", etc.
|
|
35
|
+
return int(float(s))
|
|
36
|
+
except (ValueError, TypeError):
|
|
37
|
+
return None
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class Fort74Handler(BaseHandler):
|
|
41
|
+
"""
|
|
42
|
+
Parser for ReaxFF structure summary files (``fort.74``).
|
|
43
|
+
|
|
44
|
+
This class parses ``fort.74`` files and exposes one summary record
|
|
45
|
+
per structure or configuration as a tabular dataset.
|
|
46
|
+
|
|
47
|
+
Parsed Data
|
|
48
|
+
-----------
|
|
49
|
+
Summary table
|
|
50
|
+
One row per structure, returned by ``dataframe()``, with columns:
|
|
51
|
+
["identifier", "Emin", "iter", "Hf", "V", "D"]
|
|
52
|
+
|
|
53
|
+
All columns except ``identifier`` are optional and may contain
|
|
54
|
+
``NaN`` when the corresponding quantity is not present in the file.
|
|
55
|
+
|
|
56
|
+
Metadata
|
|
57
|
+
Returned by ``metadata()``, containing:
|
|
58
|
+
["n_records", "n_frames"]
|
|
59
|
+
|
|
60
|
+
Notes
|
|
61
|
+
-----
|
|
62
|
+
- The ``identifier`` is taken from the first token of each line.
|
|
63
|
+
- Field labels (e.g. ``Emin:``, ``Iter.:``, ``Hf:``, ``Vol:``, ``Dens:``)
|
|
64
|
+
are detected dynamically and may appear in any order.
|
|
65
|
+
- This handler is not frame-based; ``n_frames()`` always returns 0.
|
|
66
|
+
"""
|
|
67
|
+
|
|
68
|
+
def __init__(self, file_path: str | Path = "fort.74"):
|
|
69
|
+
super().__init__(file_path)
|
|
70
|
+
|
|
71
|
+
def _parse(self) -> Tuple[pd.DataFrame, Dict[str, Any]]:
|
|
72
|
+
rows: List[Dict[str, Any]] = []
|
|
73
|
+
|
|
74
|
+
with open(self.path, "r") as fh:
|
|
75
|
+
for line in fh:
|
|
76
|
+
line = line.strip()
|
|
77
|
+
if not line:
|
|
78
|
+
continue
|
|
79
|
+
|
|
80
|
+
tokens = line.split()
|
|
81
|
+
if not tokens:
|
|
82
|
+
continue
|
|
83
|
+
|
|
84
|
+
# identifier is always the first token
|
|
85
|
+
identifier = tokens[0]
|
|
86
|
+
|
|
87
|
+
Emin = None
|
|
88
|
+
Iter = None
|
|
89
|
+
Hf = None
|
|
90
|
+
Vol = None
|
|
91
|
+
Dens = None
|
|
92
|
+
|
|
93
|
+
# Walk through tokens and look for known labels like "Emin:", "Iter.:", etc.
|
|
94
|
+
i = 1
|
|
95
|
+
n = len(tokens)
|
|
96
|
+
while i < n:
|
|
97
|
+
t = tokens[i]
|
|
98
|
+
|
|
99
|
+
# Emin:
|
|
100
|
+
if t == "Emin:" and i + 1 < n:
|
|
101
|
+
Emin = _safe_float(tokens[i + 1])
|
|
102
|
+
i += 2
|
|
103
|
+
continue
|
|
104
|
+
|
|
105
|
+
# Iter.: or Iter:
|
|
106
|
+
if (t == "Iter.:" or t == "Iter:") and i + 1 < n:
|
|
107
|
+
Iter = _safe_int(tokens[i + 1])
|
|
108
|
+
i += 2
|
|
109
|
+
continue
|
|
110
|
+
|
|
111
|
+
# Hf: or Heatfo:
|
|
112
|
+
if (t == "Hf:" or t == "Heatfo:") and i + 1 < n:
|
|
113
|
+
Hf = _safe_float(tokens[i + 1])
|
|
114
|
+
i += 2
|
|
115
|
+
continue
|
|
116
|
+
|
|
117
|
+
# Vol:
|
|
118
|
+
if t == "Vol:" and i + 1 < n:
|
|
119
|
+
Vol = _safe_float(tokens[i + 1])
|
|
120
|
+
i += 2
|
|
121
|
+
continue
|
|
122
|
+
|
|
123
|
+
# Dens: or Dens):
|
|
124
|
+
if (t == "Dens:" or t == "Dens):") and i + 1 < n:
|
|
125
|
+
Dens = _safe_float(tokens[i + 1])
|
|
126
|
+
i += 2
|
|
127
|
+
continue
|
|
128
|
+
|
|
129
|
+
# anything else → skip
|
|
130
|
+
i += 1
|
|
131
|
+
|
|
132
|
+
rows.append(
|
|
133
|
+
{
|
|
134
|
+
"identifier": identifier,
|
|
135
|
+
"Emin": Emin,
|
|
136
|
+
"iter": Iter,
|
|
137
|
+
"Hf": Hf,
|
|
138
|
+
"V": Vol,
|
|
139
|
+
"D": Dens,
|
|
140
|
+
}
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
df = pd.DataFrame(rows, columns=["identifier", "Emin", "iter", "Hf", "V", "D"])
|
|
144
|
+
|
|
145
|
+
self._frames = []
|
|
146
|
+
meta = {"n_records": len(df), "n_frames": 0}
|
|
147
|
+
|
|
148
|
+
return df, meta
|
|
149
|
+
|
|
150
|
+
# fort.74 is effectively a single "table", not frame-based
|
|
151
|
+
def n_frames(self) -> int:
|
|
152
|
+
return 0
|
|
153
|
+
|
|
154
|
+
def frame(self, i: int) -> Dict[str, Any]:
|
|
155
|
+
raise IndexError("fort.74 has no per-frame data")
|