reaxkit 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- reaxkit/__init__.py +0 -0
- reaxkit/analysis/__init__.py +0 -0
- reaxkit/analysis/composed/RDF_analyzer.py +560 -0
- reaxkit/analysis/composed/__init__.py +0 -0
- reaxkit/analysis/composed/connectivity_analyzer.py +706 -0
- reaxkit/analysis/composed/coordination_analyzer.py +144 -0
- reaxkit/analysis/composed/electrostatics_analyzer.py +687 -0
- reaxkit/analysis/per_file/__init__.py +0 -0
- reaxkit/analysis/per_file/control_analyzer.py +165 -0
- reaxkit/analysis/per_file/eregime_analyzer.py +108 -0
- reaxkit/analysis/per_file/ffield_analyzer.py +305 -0
- reaxkit/analysis/per_file/fort13_analyzer.py +79 -0
- reaxkit/analysis/per_file/fort57_analyzer.py +106 -0
- reaxkit/analysis/per_file/fort73_analyzer.py +61 -0
- reaxkit/analysis/per_file/fort74_analyzer.py +65 -0
- reaxkit/analysis/per_file/fort76_analyzer.py +191 -0
- reaxkit/analysis/per_file/fort78_analyzer.py +154 -0
- reaxkit/analysis/per_file/fort79_analyzer.py +83 -0
- reaxkit/analysis/per_file/fort7_analyzer.py +393 -0
- reaxkit/analysis/per_file/fort99_analyzer.py +411 -0
- reaxkit/analysis/per_file/molfra_analyzer.py +359 -0
- reaxkit/analysis/per_file/params_analyzer.py +258 -0
- reaxkit/analysis/per_file/summary_analyzer.py +84 -0
- reaxkit/analysis/per_file/trainset_analyzer.py +84 -0
- reaxkit/analysis/per_file/vels_analyzer.py +95 -0
- reaxkit/analysis/per_file/xmolout_analyzer.py +528 -0
- reaxkit/cli.py +181 -0
- reaxkit/count_loc.py +276 -0
- reaxkit/data/alias.yaml +89 -0
- reaxkit/data/constants.yaml +27 -0
- reaxkit/data/reaxff_input_files_contents.yaml +186 -0
- reaxkit/data/reaxff_output_files_contents.yaml +301 -0
- reaxkit/data/units.yaml +38 -0
- reaxkit/help/__init__.py +0 -0
- reaxkit/help/help_index_loader.py +531 -0
- reaxkit/help/introspection_utils.py +131 -0
- reaxkit/io/__init__.py +0 -0
- reaxkit/io/base_handler.py +165 -0
- reaxkit/io/generators/__init__.py +0 -0
- reaxkit/io/generators/control_generator.py +123 -0
- reaxkit/io/generators/eregime_generator.py +341 -0
- reaxkit/io/generators/geo_generator.py +967 -0
- reaxkit/io/generators/trainset_generator.py +1758 -0
- reaxkit/io/generators/tregime_generator.py +113 -0
- reaxkit/io/generators/vregime_generator.py +164 -0
- reaxkit/io/generators/xmolout_generator.py +304 -0
- reaxkit/io/handlers/__init__.py +0 -0
- reaxkit/io/handlers/control_handler.py +209 -0
- reaxkit/io/handlers/eregime_handler.py +122 -0
- reaxkit/io/handlers/ffield_handler.py +812 -0
- reaxkit/io/handlers/fort13_handler.py +123 -0
- reaxkit/io/handlers/fort57_handler.py +143 -0
- reaxkit/io/handlers/fort73_handler.py +145 -0
- reaxkit/io/handlers/fort74_handler.py +155 -0
- reaxkit/io/handlers/fort76_handler.py +195 -0
- reaxkit/io/handlers/fort78_handler.py +142 -0
- reaxkit/io/handlers/fort79_handler.py +227 -0
- reaxkit/io/handlers/fort7_handler.py +264 -0
- reaxkit/io/handlers/fort99_handler.py +128 -0
- reaxkit/io/handlers/geo_handler.py +224 -0
- reaxkit/io/handlers/molfra_handler.py +184 -0
- reaxkit/io/handlers/params_handler.py +137 -0
- reaxkit/io/handlers/summary_handler.py +135 -0
- reaxkit/io/handlers/trainset_handler.py +658 -0
- reaxkit/io/handlers/vels_handler.py +293 -0
- reaxkit/io/handlers/xmolout_handler.py +174 -0
- reaxkit/utils/__init__.py +0 -0
- reaxkit/utils/alias.py +219 -0
- reaxkit/utils/cache.py +77 -0
- reaxkit/utils/constants.py +75 -0
- reaxkit/utils/equation_of_states.py +96 -0
- reaxkit/utils/exceptions.py +27 -0
- reaxkit/utils/frame_utils.py +175 -0
- reaxkit/utils/log.py +43 -0
- reaxkit/utils/media/__init__.py +0 -0
- reaxkit/utils/media/convert.py +90 -0
- reaxkit/utils/media/make_video.py +91 -0
- reaxkit/utils/media/plotter.py +812 -0
- reaxkit/utils/numerical/__init__.py +0 -0
- reaxkit/utils/numerical/extrema_finder.py +96 -0
- reaxkit/utils/numerical/moving_average.py +103 -0
- reaxkit/utils/numerical/numerical_calcs.py +75 -0
- reaxkit/utils/numerical/signal_ops.py +135 -0
- reaxkit/utils/path.py +55 -0
- reaxkit/utils/units.py +104 -0
- reaxkit/webui/__init__.py +0 -0
- reaxkit/webui/app.py +0 -0
- reaxkit/webui/components.py +0 -0
- reaxkit/webui/layouts.py +0 -0
- reaxkit/webui/utils.py +0 -0
- reaxkit/workflows/__init__.py +0 -0
- reaxkit/workflows/composed/__init__.py +0 -0
- reaxkit/workflows/composed/coordination_workflow.py +393 -0
- reaxkit/workflows/composed/electrostatics_workflow.py +587 -0
- reaxkit/workflows/composed/xmolout_fort7_workflow.py +343 -0
- reaxkit/workflows/meta/__init__.py +0 -0
- reaxkit/workflows/meta/help_workflow.py +136 -0
- reaxkit/workflows/meta/introspection_workflow.py +235 -0
- reaxkit/workflows/meta/make_video_workflow.py +61 -0
- reaxkit/workflows/meta/plotter_workflow.py +601 -0
- reaxkit/workflows/per_file/__init__.py +0 -0
- reaxkit/workflows/per_file/control_workflow.py +110 -0
- reaxkit/workflows/per_file/eregime_workflow.py +267 -0
- reaxkit/workflows/per_file/ffield_workflow.py +390 -0
- reaxkit/workflows/per_file/fort13_workflow.py +86 -0
- reaxkit/workflows/per_file/fort57_workflow.py +137 -0
- reaxkit/workflows/per_file/fort73_workflow.py +151 -0
- reaxkit/workflows/per_file/fort74_workflow.py +88 -0
- reaxkit/workflows/per_file/fort76_workflow.py +188 -0
- reaxkit/workflows/per_file/fort78_workflow.py +135 -0
- reaxkit/workflows/per_file/fort79_workflow.py +314 -0
- reaxkit/workflows/per_file/fort7_workflow.py +592 -0
- reaxkit/workflows/per_file/fort83_workflow.py +60 -0
- reaxkit/workflows/per_file/fort99_workflow.py +223 -0
- reaxkit/workflows/per_file/geo_workflow.py +554 -0
- reaxkit/workflows/per_file/molfra_workflow.py +577 -0
- reaxkit/workflows/per_file/params_workflow.py +135 -0
- reaxkit/workflows/per_file/summary_workflow.py +161 -0
- reaxkit/workflows/per_file/trainset_workflow.py +356 -0
- reaxkit/workflows/per_file/tregime_workflow.py +79 -0
- reaxkit/workflows/per_file/vels_workflow.py +309 -0
- reaxkit/workflows/per_file/vregime_workflow.py +75 -0
- reaxkit/workflows/per_file/xmolout_workflow.py +678 -0
- reaxkit-1.0.0.dist-info/METADATA +128 -0
- reaxkit-1.0.0.dist-info/RECORD +130 -0
- reaxkit-1.0.0.dist-info/WHEEL +5 -0
- reaxkit-1.0.0.dist-info/entry_points.txt +2 -0
- reaxkit-1.0.0.dist-info/licenses/AUTHORS.md +20 -0
- reaxkit-1.0.0.dist-info/licenses/LICENSE +21 -0
- reaxkit-1.0.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,195 @@
|
|
|
1
|
+
"""
|
|
2
|
+
ReaxFF restraint monitor (fort.76) handler.
|
|
3
|
+
|
|
4
|
+
This module provides a handler for parsing ReaxFF ``fort.76`` files,
|
|
5
|
+
which record per-iteration restraint energies and target/actual values
|
|
6
|
+
for distance or coordinate restraints applied during simulations.
|
|
7
|
+
|
|
8
|
+
Typical use cases include:
|
|
9
|
+
|
|
10
|
+
- monitoring restraint convergence
|
|
11
|
+
- comparing target vs actual restraint values
|
|
12
|
+
- debugging constrained MD or minimization runs
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
|
|
18
|
+
from pathlib import Path
|
|
19
|
+
from typing import Any, Dict, Iterator, List, Optional
|
|
20
|
+
|
|
21
|
+
import pandas as pd
|
|
22
|
+
|
|
23
|
+
from reaxkit.io.base_handler import BaseHandler
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class Fort76Handler(BaseHandler):
|
|
27
|
+
"""
|
|
28
|
+
Parser for ReaxFF restraint monitor files (``fort.76``).
|
|
29
|
+
|
|
30
|
+
This class parses ``fort.76`` files and exposes per-iteration
|
|
31
|
+
restraint information as a structured, iteration-indexed table.
|
|
32
|
+
|
|
33
|
+
Parsed Data
|
|
34
|
+
-----------
|
|
35
|
+
Summary table
|
|
36
|
+
One row per iteration, returned by ``dataframe()``, with columns:
|
|
37
|
+
|
|
38
|
+
- Base columns:
|
|
39
|
+
["iter", "E_res", "E_pot"]
|
|
40
|
+
|
|
41
|
+
- Restraint columns (repeated per restraint):
|
|
42
|
+
["r1_target", "r1_actual",
|
|
43
|
+
"r2_target", "r2_actual", ...]
|
|
44
|
+
|
|
45
|
+
The number of restraints is inferred automatically from the file.
|
|
46
|
+
|
|
47
|
+
Metadata
|
|
48
|
+
Returned by ``metadata()``, containing:
|
|
49
|
+
["n_records", "n_frames", "n_restraints", "restraint_cols"]
|
|
50
|
+
|
|
51
|
+
Notes
|
|
52
|
+
-----
|
|
53
|
+
- Supports an arbitrary number of restraints per iteration.
|
|
54
|
+
- Header, comment, and malformed lines are skipped robustly.
|
|
55
|
+
- Duplicate iteration indices are resolved by keeping the last entry.
|
|
56
|
+
- This handler represents one row per iteration (frame-like semantics).
|
|
57
|
+
"""
|
|
58
|
+
|
|
59
|
+
def __init__(self, file_path: str | Path = "fort.76"):
|
|
60
|
+
super().__init__(file_path)
|
|
61
|
+
self._frames: List[pd.DataFrame] = [] # optional, kept for template consistency
|
|
62
|
+
self._n_records: Optional[int] = None
|
|
63
|
+
|
|
64
|
+
@staticmethod
|
|
65
|
+
def _is_float(token: str) -> bool:
|
|
66
|
+
try:
|
|
67
|
+
float(token)
|
|
68
|
+
return True
|
|
69
|
+
except Exception:
|
|
70
|
+
return False
|
|
71
|
+
|
|
72
|
+
def _parse(self) -> tuple[pd.DataFrame, dict[str, Any]]:
|
|
73
|
+
rows: List[List[float]] = []
|
|
74
|
+
n_restraints_max = 0
|
|
75
|
+
|
|
76
|
+
with open(self.path, "r") as fh:
|
|
77
|
+
for line in fh:
|
|
78
|
+
s = line.strip()
|
|
79
|
+
if not s:
|
|
80
|
+
continue
|
|
81
|
+
# Common comment styles
|
|
82
|
+
if s.startswith(("#", "!", "//")):
|
|
83
|
+
continue
|
|
84
|
+
|
|
85
|
+
parts = s.split()
|
|
86
|
+
if len(parts) < 3:
|
|
87
|
+
continue
|
|
88
|
+
|
|
89
|
+
# If the line doesn't look numeric, treat as header and skip
|
|
90
|
+
if not (self._is_float(parts[0]) and self._is_float(parts[1]) and self._is_float(parts[2])):
|
|
91
|
+
continue
|
|
92
|
+
|
|
93
|
+
# Convert tokens to floats (iter will be cast to int later)
|
|
94
|
+
try:
|
|
95
|
+
vals = [float(x) for x in parts]
|
|
96
|
+
except Exception:
|
|
97
|
+
continue
|
|
98
|
+
|
|
99
|
+
# Determine restraint pairs beyond (iter, E_res, E_pot)
|
|
100
|
+
extra = len(vals) - 3
|
|
101
|
+
if extra < 0:
|
|
102
|
+
continue
|
|
103
|
+
|
|
104
|
+
# If odd number of extra columns, ignore this malformed row
|
|
105
|
+
if extra % 2 != 0:
|
|
106
|
+
continue
|
|
107
|
+
|
|
108
|
+
n_restraints = extra // 2
|
|
109
|
+
n_restraints_max = max(n_restraints_max, n_restraints)
|
|
110
|
+
|
|
111
|
+
rows.append(vals)
|
|
112
|
+
|
|
113
|
+
# If nothing parsed, return empty but well-defined DF
|
|
114
|
+
base_cols = ["iter", "E_res", "E_pot"]
|
|
115
|
+
if not rows:
|
|
116
|
+
df_empty = pd.DataFrame(columns=base_cols)
|
|
117
|
+
meta = {
|
|
118
|
+
"n_records": 0,
|
|
119
|
+
"n_frames": 0,
|
|
120
|
+
"n_restraints": 0,
|
|
121
|
+
"restraint_cols": [],
|
|
122
|
+
}
|
|
123
|
+
self._frames = []
|
|
124
|
+
return df_empty, meta
|
|
125
|
+
|
|
126
|
+
# Pad rows so all have same number of restraint columns (in case file changes mid-run)
|
|
127
|
+
target_len = 3 + 2 * n_restraints_max
|
|
128
|
+
for r in rows:
|
|
129
|
+
if len(r) < target_len:
|
|
130
|
+
r.extend([float("nan")] * (target_len - len(r)))
|
|
131
|
+
|
|
132
|
+
# Build columns dynamically
|
|
133
|
+
cols = list(base_cols)
|
|
134
|
+
for i in range(1, n_restraints_max + 1):
|
|
135
|
+
cols.append(f"r{i}_target")
|
|
136
|
+
cols.append(f"r{i}_actual")
|
|
137
|
+
|
|
138
|
+
df = pd.DataFrame(rows, columns=cols)
|
|
139
|
+
|
|
140
|
+
# Types
|
|
141
|
+
df["iter"] = df["iter"].astype(int, errors="ignore")
|
|
142
|
+
|
|
143
|
+
# Clean: drop duplicate iterations (keep last)
|
|
144
|
+
if not df.empty:
|
|
145
|
+
keep_idx = df.drop_duplicates("iter", keep="last").index
|
|
146
|
+
df = df.loc[keep_idx].reset_index(drop=True)
|
|
147
|
+
|
|
148
|
+
self._frames = [] # fort.76 is already 1-row-per-iter; no extra per-frame tables needed
|
|
149
|
+
|
|
150
|
+
meta: Dict[str, Any] = {
|
|
151
|
+
"n_records": int(len(df)),
|
|
152
|
+
"n_frames": int(len(df)),
|
|
153
|
+
"n_restraints": int(n_restraints_max),
|
|
154
|
+
"restraint_cols": [c for c in df.columns if c.startswith("r")],
|
|
155
|
+
}
|
|
156
|
+
return df, meta
|
|
157
|
+
|
|
158
|
+
# ---- File-specific accessors
|
|
159
|
+
def n_frames(self) -> int:
|
|
160
|
+
# 1 row per iteration
|
|
161
|
+
return int(self.metadata().get("n_frames", len(self.dataframe())))
|
|
162
|
+
|
|
163
|
+
def n_restraints(self) -> int:
|
|
164
|
+
return int(self.metadata().get("n_restraints", 0))
|
|
165
|
+
|
|
166
|
+
def frame(self, i: int) -> Dict[str, Any]:
|
|
167
|
+
"""
|
|
168
|
+
Return a normalized per-row structure.
|
|
169
|
+
restraints is a list of dicts:
|
|
170
|
+
[{"index": 1, "target": ..., "actual": ...}, ...]
|
|
171
|
+
"""
|
|
172
|
+
df = self.dataframe()
|
|
173
|
+
row = df.iloc[i]
|
|
174
|
+
|
|
175
|
+
restraints: List[Dict[str, Any]] = []
|
|
176
|
+
n = self.n_restraints()
|
|
177
|
+
for k in range(1, n + 1):
|
|
178
|
+
tgt = row.get(f"r{k}_target")
|
|
179
|
+
act = row.get(f"r{k}_actual")
|
|
180
|
+
# Skip if both are NaN (e.g., padded)
|
|
181
|
+
if pd.isna(tgt) and pd.isna(act):
|
|
182
|
+
continue
|
|
183
|
+
restraints.append({"index": k, "target": tgt, "actual": act})
|
|
184
|
+
|
|
185
|
+
return {
|
|
186
|
+
"index": int(i),
|
|
187
|
+
"iter": int(row.get("iter")),
|
|
188
|
+
"E_res": row.get("E_res"),
|
|
189
|
+
"E_pot": row.get("E_pot"),
|
|
190
|
+
"restraints": restraints,
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
def iter_frames(self, step: int = 1) -> Iterator[Dict[str, Any]]:
|
|
194
|
+
for i in range(0, self.n_frames(), max(1, int(step))):
|
|
195
|
+
yield self.frame(i)
|
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
"""
|
|
2
|
+
ReaxFF electric-field output (fort.78) handler.
|
|
3
|
+
|
|
4
|
+
This module provides a handler for parsing ReaxFF ``fort.78`` files,
|
|
5
|
+
which report per-iteration electric-field components and magnitudes
|
|
6
|
+
during simulations with applied external fields.
|
|
7
|
+
|
|
8
|
+
Typical use cases include:
|
|
9
|
+
|
|
10
|
+
- analyzing applied electric-field schedules
|
|
11
|
+
- correlating field strength with polarization or dipole response
|
|
12
|
+
- plotting field components versus iteration
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
from pathlib import Path
|
|
18
|
+
from typing import Dict, Any
|
|
19
|
+
import pandas as pd
|
|
20
|
+
|
|
21
|
+
from reaxkit.io.base_handler import BaseHandler
|
|
22
|
+
|
|
23
|
+
# Canonical names as requested
|
|
24
|
+
_CANONICAL_5 = [
|
|
25
|
+
"iter",
|
|
26
|
+
"field_x", "field_y", "field_z",
|
|
27
|
+
"E_field",
|
|
28
|
+
]
|
|
29
|
+
|
|
30
|
+
_CANONICAL_8 = [
|
|
31
|
+
"iter",
|
|
32
|
+
"field_x", "field_y", "field_z",
|
|
33
|
+
"E_field_x", "E_field_y", "E_field_z",
|
|
34
|
+
"E_field",
|
|
35
|
+
]
|
|
36
|
+
|
|
37
|
+
_NUMERIC_CANONICAL = set(_CANONICAL_8) # superset of _CANONICAL_5
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class Fort78Handler(BaseHandler):
|
|
41
|
+
"""
|
|
42
|
+
Parser for ReaxFF electric-field output files (``fort.78``).
|
|
43
|
+
|
|
44
|
+
This class parses ``fort.78`` files and exposes electric-field
|
|
45
|
+
quantities as a tidy, iteration-indexed table with canonical
|
|
46
|
+
column names.
|
|
47
|
+
|
|
48
|
+
Parsed Data
|
|
49
|
+
-----------
|
|
50
|
+
Summary table
|
|
51
|
+
One row per iteration, returned by ``dataframe()``, with columns:
|
|
52
|
+
|
|
53
|
+
- When 5 columns are present:
|
|
54
|
+
["iter", "field_x", "field_y", "field_z", "E_field"]
|
|
55
|
+
|
|
56
|
+
- When 8 columns are present:
|
|
57
|
+
["iter", "field_x", "field_y", "field_z",
|
|
58
|
+
"E_field_x", "E_field_y", "E_field_z", "E_field"]
|
|
59
|
+
|
|
60
|
+
- For other column counts:
|
|
61
|
+
["Col1", "Col2", ..., "ColN"]
|
|
62
|
+
|
|
63
|
+
Metadata
|
|
64
|
+
Returned by ``metadata()``, containing:
|
|
65
|
+
["source", "n_rows", "has_time", "columns"]
|
|
66
|
+
|
|
67
|
+
Notes
|
|
68
|
+
-----
|
|
69
|
+
- Header presence is detected automatically.
|
|
70
|
+
- Canonical column names are enforced when column counts match
|
|
71
|
+
known ``fort.78`` formats.
|
|
72
|
+
- Duplicate iteration indices are resolved by keeping the last entry.
|
|
73
|
+
- This handler represents a scalar-per-iteration time-series file.
|
|
74
|
+
"""
|
|
75
|
+
|
|
76
|
+
def __init__(self, file_path: str | Path = "fort.78"):
|
|
77
|
+
super().__init__(file_path)
|
|
78
|
+
self._n_rows: int = 0
|
|
79
|
+
|
|
80
|
+
def _parse(self) -> tuple[pd.DataFrame, dict[str, Any]]:
|
|
81
|
+
path = Path(self.path)
|
|
82
|
+
|
|
83
|
+
# Peek first line to decide if it's a header or data
|
|
84
|
+
with open(path, "r") as fh:
|
|
85
|
+
first_line = fh.readline().strip()
|
|
86
|
+
first_tokens = first_line.split()
|
|
87
|
+
is_numeric_row = all(self._is_number(tok) for tok in first_tokens)
|
|
88
|
+
|
|
89
|
+
# Read file using whitespace separator (no deprecation warning)
|
|
90
|
+
if is_numeric_row:
|
|
91
|
+
# No header present
|
|
92
|
+
df = pd.read_csv(path, sep=r"\s+", header=None, engine="python")
|
|
93
|
+
else:
|
|
94
|
+
# Header present -> parse with header row, then overwrite with canonical names
|
|
95
|
+
df = pd.read_csv(path, sep=r"\s+", header=0, engine="python")
|
|
96
|
+
|
|
97
|
+
ncols = df.shape[1]
|
|
98
|
+
|
|
99
|
+
# Force canonical names when column count matches 5 or 8
|
|
100
|
+
if ncols == 5:
|
|
101
|
+
df.columns = _CANONICAL_5
|
|
102
|
+
elif ncols == 8:
|
|
103
|
+
df.columns = _CANONICAL_8
|
|
104
|
+
else:
|
|
105
|
+
# Fallback: numbered names (still parsed)
|
|
106
|
+
df.columns = [f"Col{i+1}" for i in range(ncols)]
|
|
107
|
+
|
|
108
|
+
# Coerce numeric for known numeric columns that are present
|
|
109
|
+
for col in _NUMERIC_CANONICAL:
|
|
110
|
+
if col in df.columns:
|
|
111
|
+
df[col] = pd.to_numeric(df[col], errors="coerce")
|
|
112
|
+
|
|
113
|
+
# Clean 'iter' column if present
|
|
114
|
+
if "iter" in df.columns:
|
|
115
|
+
df = df.dropna(subset=["iter"])
|
|
116
|
+
df["iter"] = pd.to_numeric(df["iter"], errors="coerce")
|
|
117
|
+
df = df.dropna(subset=["iter"])
|
|
118
|
+
try:
|
|
119
|
+
df["iter"] = df["iter"].astype(int)
|
|
120
|
+
except Exception:
|
|
121
|
+
pass
|
|
122
|
+
df = df.drop_duplicates("iter", keep="last").reset_index(drop=True)
|
|
123
|
+
|
|
124
|
+
self._n_rows = len(df)
|
|
125
|
+
meta: Dict[str, Any] = {
|
|
126
|
+
"source": "fort.78",
|
|
127
|
+
"n_rows": self._n_rows,
|
|
128
|
+
"has_time": False,
|
|
129
|
+
"columns": list(df.columns),
|
|
130
|
+
}
|
|
131
|
+
return df, meta
|
|
132
|
+
|
|
133
|
+
def n_rows(self) -> int:
|
|
134
|
+
return self._n_rows
|
|
135
|
+
|
|
136
|
+
@staticmethod
|
|
137
|
+
def _is_number(tok: str) -> bool:
|
|
138
|
+
try:
|
|
139
|
+
float(tok.replace("D", "E").replace("d", "E"))
|
|
140
|
+
return True
|
|
141
|
+
except ValueError:
|
|
142
|
+
return False
|
|
@@ -0,0 +1,227 @@
|
|
|
1
|
+
"""
|
|
2
|
+
ReaxFF parameter optimization diagnostics (fort.79) handler.
|
|
3
|
+
|
|
4
|
+
This module provides a handler for parsing ReaxFF ``fort.79`` files,
|
|
5
|
+
which report detailed diagnostics from force-field parameter
|
|
6
|
+
optimization, including trial parameter values, error differences,
|
|
7
|
+
and parabolic fits used during optimization steps.
|
|
8
|
+
|
|
9
|
+
Typical use cases include:
|
|
10
|
+
|
|
11
|
+
- inspecting parameter update behavior during training
|
|
12
|
+
- analyzing parabolic fits and estimated optima
|
|
13
|
+
- debugging unstable or poorly conditioned parameter updates
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
from __future__ import annotations
|
|
18
|
+
from pathlib import Path
|
|
19
|
+
from typing import List, Dict, Any, Optional, Iterator
|
|
20
|
+
import re
|
|
21
|
+
import math
|
|
22
|
+
import pandas as pd
|
|
23
|
+
|
|
24
|
+
from reaxkit.io.base_handler import BaseHandler
|
|
25
|
+
|
|
26
|
+
# ----------------------------------------------------------------------
|
|
27
|
+
# Regex that matches:
|
|
28
|
+
# - Proper Fortran float with optional D/E exponent: 1.234D+05, 3.21e-3, 0.5
|
|
29
|
+
# - Malformed bare-exponent tokens we want to capture then mark as NaN: 0.2408814586-316
|
|
30
|
+
# (We capture them so field counts stay correct; _f() will convert these to NaN.)
|
|
31
|
+
# ----------------------------------------------------------------------
|
|
32
|
+
_FNUM = r"[+-]?\d+\.\d+(?:[DdEe][+-]?\d+|[+-]\d+)?"
|
|
33
|
+
_FVAL_RE = re.compile(_FNUM)
|
|
34
|
+
|
|
35
|
+
def _f(s: str) -> float:
|
|
36
|
+
"""
|
|
37
|
+
Convert a numeric token to float.
|
|
38
|
+
- Valid Fortran floats 'D'/'d' → 'E'
|
|
39
|
+
- Bare-exponent malformed tokens (e.g., '0.24088-316') → NaN
|
|
40
|
+
- Any conversion error → NaN
|
|
41
|
+
"""
|
|
42
|
+
try:
|
|
43
|
+
clean = s.strip()
|
|
44
|
+
# Bare exponent without D/E ('0.240...-316') → NaN by policy
|
|
45
|
+
if re.fullmatch(r"[+-]?\d+\.\d+[+-]\d+", clean):
|
|
46
|
+
return float("nan")
|
|
47
|
+
# Normal path: Fortran 'D' → 'E'
|
|
48
|
+
clean = clean.replace("D", "E").replace("d", "E")
|
|
49
|
+
return float(clean)
|
|
50
|
+
except Exception:
|
|
51
|
+
return float("nan")
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
class Fort79Handler(BaseHandler):
|
|
55
|
+
"""
|
|
56
|
+
Parser for ReaxFF parameter optimization diagnostic files (``fort.79``).
|
|
57
|
+
|
|
58
|
+
This class parses ``fort.79`` files and exposes per-parameter
|
|
59
|
+
optimization diagnostics as a structured tabular dataset.
|
|
60
|
+
|
|
61
|
+
Parsed Data
|
|
62
|
+
-----------
|
|
63
|
+
Summary table
|
|
64
|
+
One row per optimized parameter, returned by ``dataframe()``,
|
|
65
|
+
with columns:
|
|
66
|
+
["identifier",
|
|
67
|
+
"value1", "value2", "value3",
|
|
68
|
+
"diff1", "diff2", "diff3",
|
|
69
|
+
"a", "b", "c",
|
|
70
|
+
"parabol_min", "parabol_min_diff",
|
|
71
|
+
"value4", "diff4"]
|
|
72
|
+
|
|
73
|
+
Here, ``value1..value3`` and ``diff1..diff3`` correspond to the
|
|
74
|
+
trial parameter values and their associated error differences
|
|
75
|
+
used to construct a parabolic fit.
|
|
76
|
+
|
|
77
|
+
Metadata
|
|
78
|
+
Returned by ``metadata()``, containing:
|
|
79
|
+
["n_records"]
|
|
80
|
+
|
|
81
|
+
Notes
|
|
82
|
+
-----
|
|
83
|
+
- Numeric values may span multiple lines and are reconstructed
|
|
84
|
+
robustly across wrapped output.
|
|
85
|
+
- Fortran ``D`` exponents are supported; malformed bare-exponent
|
|
86
|
+
tokens are converted to ``NaN`` by design.
|
|
87
|
+
- This handler is not frame-based; ``n_frames()`` always returns 0.
|
|
88
|
+
"""
|
|
89
|
+
|
|
90
|
+
def __init__(self, file_path: str | Path = "fort.79"):
|
|
91
|
+
super().__init__(file_path)
|
|
92
|
+
self._frames: List[pd.DataFrame] = []
|
|
93
|
+
self._n_records: Optional[int] = None
|
|
94
|
+
|
|
95
|
+
def _parse(self) -> tuple[pd.DataFrame, Dict[str, Any]]:
|
|
96
|
+
rows: List[Dict[str, Any]] = []
|
|
97
|
+
|
|
98
|
+
with open(self.path, "r", encoding="utf-8", errors="ignore") as fh:
|
|
99
|
+
lines = fh.readlines()
|
|
100
|
+
|
|
101
|
+
i, n = 0, len(lines)
|
|
102
|
+
while i < n:
|
|
103
|
+
line = lines[i]
|
|
104
|
+
if line.strip().startswith("Values used for parameter"):
|
|
105
|
+
ident = line.split("parameter", 1)[1].strip()
|
|
106
|
+
|
|
107
|
+
# ---- three "Values used..." numbers (may wrap) ----
|
|
108
|
+
v1 = v2 = v3 = math.nan
|
|
109
|
+
i += 1
|
|
110
|
+
if i < n:
|
|
111
|
+
vals = _FVAL_RE.findall(lines[i])
|
|
112
|
+
if len(vals) < 3 and i + 1 < n:
|
|
113
|
+
vals += _FVAL_RE.findall(lines[i + 1])
|
|
114
|
+
# only advance extra line if we actually used it
|
|
115
|
+
i += 1 if len(vals) >= 3 else 0
|
|
116
|
+
if len(vals) >= 3:
|
|
117
|
+
v1, v2, v3 = map(_f, vals[:3])
|
|
118
|
+
|
|
119
|
+
# ---- "Differences found" + three diffs (may wrap) ----
|
|
120
|
+
d1 = d2 = d3 = math.nan
|
|
121
|
+
i += 1
|
|
122
|
+
if i < n and lines[i].strip().startswith("Differences found"):
|
|
123
|
+
i += 1
|
|
124
|
+
if i < n:
|
|
125
|
+
diffs = _FVAL_RE.findall(lines[i])
|
|
126
|
+
if len(diffs) < 3 and i + 1 < n:
|
|
127
|
+
diffs += _FVAL_RE.findall(lines[i + 1])
|
|
128
|
+
i += 1 if len(diffs) >= 3 else 0
|
|
129
|
+
if len(diffs) >= 3:
|
|
130
|
+
d1, d2, d3 = map(_f, diffs[:3])
|
|
131
|
+
|
|
132
|
+
# ---- "Parabol: a= ... b= ... c= ..." (may wrap) ----
|
|
133
|
+
a = b = c = math.nan
|
|
134
|
+
i += 1
|
|
135
|
+
if i < n:
|
|
136
|
+
par_chunk = lines[i]
|
|
137
|
+
# try to pull from next lines if needed
|
|
138
|
+
if i + 1 < n:
|
|
139
|
+
par_chunk_try = par_chunk + " " + lines[i + 1]
|
|
140
|
+
else:
|
|
141
|
+
par_chunk_try = par_chunk
|
|
142
|
+
if i + 2 < n:
|
|
143
|
+
par_chunk_try2 = par_chunk_try + " " + lines[i + 2]
|
|
144
|
+
else:
|
|
145
|
+
par_chunk_try2 = par_chunk_try
|
|
146
|
+
|
|
147
|
+
# Try 1-line, 2-line, 3-line match
|
|
148
|
+
m = re.search(r"a=\s*(" + _FNUM + r")\s*b=\s*(" + _FNUM + r")\s*c=\s*(" + _FNUM + r")", par_chunk)
|
|
149
|
+
bump = 0
|
|
150
|
+
if not m:
|
|
151
|
+
m = re.search(r"a=\s*(" + _FNUM + r")\s*b=\s*(" + _FNUM + r")\s*c=\s*(" + _FNUM + r")", par_chunk_try)
|
|
152
|
+
bump = 1 if m else 0
|
|
153
|
+
if not m:
|
|
154
|
+
m = re.search(r"a=\s*(" + _FNUM + r")\s*b=\s*(" + _FNUM + r")\s*c=\s*(" + _FNUM + r")", par_chunk_try2)
|
|
155
|
+
bump = 2 if m else 0
|
|
156
|
+
if m:
|
|
157
|
+
a, b, c = map(_f, m.groups())
|
|
158
|
+
i += bump # consume extra lines used
|
|
159
|
+
|
|
160
|
+
# ---- "Minimum of the parabol ..." ----
|
|
161
|
+
parabol_min = math.nan
|
|
162
|
+
i += 1
|
|
163
|
+
if i < n:
|
|
164
|
+
mins = _FVAL_RE.findall(lines[i])
|
|
165
|
+
if mins:
|
|
166
|
+
parabol_min = _f(mins[0])
|
|
167
|
+
|
|
168
|
+
# ---- "Difference belonging to minimum of parabol ..." ----
|
|
169
|
+
parabol_min_diff = math.nan
|
|
170
|
+
i += 1
|
|
171
|
+
if i < n:
|
|
172
|
+
mins2 = _FVAL_RE.findall(lines[i])
|
|
173
|
+
if mins2:
|
|
174
|
+
parabol_min_diff = _f(mins2[0])
|
|
175
|
+
|
|
176
|
+
# ---- "New parameter value ..." ----
|
|
177
|
+
value4 = math.nan
|
|
178
|
+
i += 1
|
|
179
|
+
if i < n:
|
|
180
|
+
news = _FVAL_RE.findall(lines[i])
|
|
181
|
+
if news:
|
|
182
|
+
value4 = _f(news[0])
|
|
183
|
+
|
|
184
|
+
# ---- "Difference belonging to new parameter value ..." ----
|
|
185
|
+
diff4 = math.nan
|
|
186
|
+
i += 1
|
|
187
|
+
if i < n:
|
|
188
|
+
news2 = _FVAL_RE.findall(lines[i])
|
|
189
|
+
if news2:
|
|
190
|
+
diff4 = _f(news2[0])
|
|
191
|
+
|
|
192
|
+
rows.append(
|
|
193
|
+
{
|
|
194
|
+
"identifier": ident,
|
|
195
|
+
"value1": v1, "value2": v2, "value3": v3,
|
|
196
|
+
"diff1": d1, "diff2": d2, "diff3": d3,
|
|
197
|
+
"a": a, "b": b, "c": c,
|
|
198
|
+
"parabol_min": parabol_min,
|
|
199
|
+
"parabol_min_diff": parabol_min_diff,
|
|
200
|
+
"value4": value4,
|
|
201
|
+
"diff4": diff4,
|
|
202
|
+
}
|
|
203
|
+
)
|
|
204
|
+
i += 1
|
|
205
|
+
|
|
206
|
+
df = pd.DataFrame(
|
|
207
|
+
rows,
|
|
208
|
+
columns=[
|
|
209
|
+
"identifier",
|
|
210
|
+
"value1", "value2", "value3",
|
|
211
|
+
"diff1", "diff2", "diff3",
|
|
212
|
+
"a", "b", "c",
|
|
213
|
+
"parabol_min", "parabol_min_diff",
|
|
214
|
+
"value4", "diff4",
|
|
215
|
+
],
|
|
216
|
+
)
|
|
217
|
+
|
|
218
|
+
meta: Dict[str, Any] = {"n_records": int(len(df))}
|
|
219
|
+
self._frames = []
|
|
220
|
+
return df, meta
|
|
221
|
+
|
|
222
|
+
def n_frames(self) -> int:
|
|
223
|
+
return 0
|
|
224
|
+
|
|
225
|
+
def iter_frames(self, step: int = 1) -> Iterator[Dict[str, Any]]:
|
|
226
|
+
if False:
|
|
227
|
+
yield {}
|