reaxkit 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (130) hide show
  1. reaxkit/__init__.py +0 -0
  2. reaxkit/analysis/__init__.py +0 -0
  3. reaxkit/analysis/composed/RDF_analyzer.py +560 -0
  4. reaxkit/analysis/composed/__init__.py +0 -0
  5. reaxkit/analysis/composed/connectivity_analyzer.py +706 -0
  6. reaxkit/analysis/composed/coordination_analyzer.py +144 -0
  7. reaxkit/analysis/composed/electrostatics_analyzer.py +687 -0
  8. reaxkit/analysis/per_file/__init__.py +0 -0
  9. reaxkit/analysis/per_file/control_analyzer.py +165 -0
  10. reaxkit/analysis/per_file/eregime_analyzer.py +108 -0
  11. reaxkit/analysis/per_file/ffield_analyzer.py +305 -0
  12. reaxkit/analysis/per_file/fort13_analyzer.py +79 -0
  13. reaxkit/analysis/per_file/fort57_analyzer.py +106 -0
  14. reaxkit/analysis/per_file/fort73_analyzer.py +61 -0
  15. reaxkit/analysis/per_file/fort74_analyzer.py +65 -0
  16. reaxkit/analysis/per_file/fort76_analyzer.py +191 -0
  17. reaxkit/analysis/per_file/fort78_analyzer.py +154 -0
  18. reaxkit/analysis/per_file/fort79_analyzer.py +83 -0
  19. reaxkit/analysis/per_file/fort7_analyzer.py +393 -0
  20. reaxkit/analysis/per_file/fort99_analyzer.py +411 -0
  21. reaxkit/analysis/per_file/molfra_analyzer.py +359 -0
  22. reaxkit/analysis/per_file/params_analyzer.py +258 -0
  23. reaxkit/analysis/per_file/summary_analyzer.py +84 -0
  24. reaxkit/analysis/per_file/trainset_analyzer.py +84 -0
  25. reaxkit/analysis/per_file/vels_analyzer.py +95 -0
  26. reaxkit/analysis/per_file/xmolout_analyzer.py +528 -0
  27. reaxkit/cli.py +181 -0
  28. reaxkit/count_loc.py +276 -0
  29. reaxkit/data/alias.yaml +89 -0
  30. reaxkit/data/constants.yaml +27 -0
  31. reaxkit/data/reaxff_input_files_contents.yaml +186 -0
  32. reaxkit/data/reaxff_output_files_contents.yaml +301 -0
  33. reaxkit/data/units.yaml +38 -0
  34. reaxkit/help/__init__.py +0 -0
  35. reaxkit/help/help_index_loader.py +531 -0
  36. reaxkit/help/introspection_utils.py +131 -0
  37. reaxkit/io/__init__.py +0 -0
  38. reaxkit/io/base_handler.py +165 -0
  39. reaxkit/io/generators/__init__.py +0 -0
  40. reaxkit/io/generators/control_generator.py +123 -0
  41. reaxkit/io/generators/eregime_generator.py +341 -0
  42. reaxkit/io/generators/geo_generator.py +967 -0
  43. reaxkit/io/generators/trainset_generator.py +1758 -0
  44. reaxkit/io/generators/tregime_generator.py +113 -0
  45. reaxkit/io/generators/vregime_generator.py +164 -0
  46. reaxkit/io/generators/xmolout_generator.py +304 -0
  47. reaxkit/io/handlers/__init__.py +0 -0
  48. reaxkit/io/handlers/control_handler.py +209 -0
  49. reaxkit/io/handlers/eregime_handler.py +122 -0
  50. reaxkit/io/handlers/ffield_handler.py +812 -0
  51. reaxkit/io/handlers/fort13_handler.py +123 -0
  52. reaxkit/io/handlers/fort57_handler.py +143 -0
  53. reaxkit/io/handlers/fort73_handler.py +145 -0
  54. reaxkit/io/handlers/fort74_handler.py +155 -0
  55. reaxkit/io/handlers/fort76_handler.py +195 -0
  56. reaxkit/io/handlers/fort78_handler.py +142 -0
  57. reaxkit/io/handlers/fort79_handler.py +227 -0
  58. reaxkit/io/handlers/fort7_handler.py +264 -0
  59. reaxkit/io/handlers/fort99_handler.py +128 -0
  60. reaxkit/io/handlers/geo_handler.py +224 -0
  61. reaxkit/io/handlers/molfra_handler.py +184 -0
  62. reaxkit/io/handlers/params_handler.py +137 -0
  63. reaxkit/io/handlers/summary_handler.py +135 -0
  64. reaxkit/io/handlers/trainset_handler.py +658 -0
  65. reaxkit/io/handlers/vels_handler.py +293 -0
  66. reaxkit/io/handlers/xmolout_handler.py +174 -0
  67. reaxkit/utils/__init__.py +0 -0
  68. reaxkit/utils/alias.py +219 -0
  69. reaxkit/utils/cache.py +77 -0
  70. reaxkit/utils/constants.py +75 -0
  71. reaxkit/utils/equation_of_states.py +96 -0
  72. reaxkit/utils/exceptions.py +27 -0
  73. reaxkit/utils/frame_utils.py +175 -0
  74. reaxkit/utils/log.py +43 -0
  75. reaxkit/utils/media/__init__.py +0 -0
  76. reaxkit/utils/media/convert.py +90 -0
  77. reaxkit/utils/media/make_video.py +91 -0
  78. reaxkit/utils/media/plotter.py +812 -0
  79. reaxkit/utils/numerical/__init__.py +0 -0
  80. reaxkit/utils/numerical/extrema_finder.py +96 -0
  81. reaxkit/utils/numerical/moving_average.py +103 -0
  82. reaxkit/utils/numerical/numerical_calcs.py +75 -0
  83. reaxkit/utils/numerical/signal_ops.py +135 -0
  84. reaxkit/utils/path.py +55 -0
  85. reaxkit/utils/units.py +104 -0
  86. reaxkit/webui/__init__.py +0 -0
  87. reaxkit/webui/app.py +0 -0
  88. reaxkit/webui/components.py +0 -0
  89. reaxkit/webui/layouts.py +0 -0
  90. reaxkit/webui/utils.py +0 -0
  91. reaxkit/workflows/__init__.py +0 -0
  92. reaxkit/workflows/composed/__init__.py +0 -0
  93. reaxkit/workflows/composed/coordination_workflow.py +393 -0
  94. reaxkit/workflows/composed/electrostatics_workflow.py +587 -0
  95. reaxkit/workflows/composed/xmolout_fort7_workflow.py +343 -0
  96. reaxkit/workflows/meta/__init__.py +0 -0
  97. reaxkit/workflows/meta/help_workflow.py +136 -0
  98. reaxkit/workflows/meta/introspection_workflow.py +235 -0
  99. reaxkit/workflows/meta/make_video_workflow.py +61 -0
  100. reaxkit/workflows/meta/plotter_workflow.py +601 -0
  101. reaxkit/workflows/per_file/__init__.py +0 -0
  102. reaxkit/workflows/per_file/control_workflow.py +110 -0
  103. reaxkit/workflows/per_file/eregime_workflow.py +267 -0
  104. reaxkit/workflows/per_file/ffield_workflow.py +390 -0
  105. reaxkit/workflows/per_file/fort13_workflow.py +86 -0
  106. reaxkit/workflows/per_file/fort57_workflow.py +137 -0
  107. reaxkit/workflows/per_file/fort73_workflow.py +151 -0
  108. reaxkit/workflows/per_file/fort74_workflow.py +88 -0
  109. reaxkit/workflows/per_file/fort76_workflow.py +188 -0
  110. reaxkit/workflows/per_file/fort78_workflow.py +135 -0
  111. reaxkit/workflows/per_file/fort79_workflow.py +314 -0
  112. reaxkit/workflows/per_file/fort7_workflow.py +592 -0
  113. reaxkit/workflows/per_file/fort83_workflow.py +60 -0
  114. reaxkit/workflows/per_file/fort99_workflow.py +223 -0
  115. reaxkit/workflows/per_file/geo_workflow.py +554 -0
  116. reaxkit/workflows/per_file/molfra_workflow.py +577 -0
  117. reaxkit/workflows/per_file/params_workflow.py +135 -0
  118. reaxkit/workflows/per_file/summary_workflow.py +161 -0
  119. reaxkit/workflows/per_file/trainset_workflow.py +356 -0
  120. reaxkit/workflows/per_file/tregime_workflow.py +79 -0
  121. reaxkit/workflows/per_file/vels_workflow.py +309 -0
  122. reaxkit/workflows/per_file/vregime_workflow.py +75 -0
  123. reaxkit/workflows/per_file/xmolout_workflow.py +678 -0
  124. reaxkit-1.0.0.dist-info/METADATA +128 -0
  125. reaxkit-1.0.0.dist-info/RECORD +130 -0
  126. reaxkit-1.0.0.dist-info/WHEEL +5 -0
  127. reaxkit-1.0.0.dist-info/entry_points.txt +2 -0
  128. reaxkit-1.0.0.dist-info/licenses/AUTHORS.md +20 -0
  129. reaxkit-1.0.0.dist-info/licenses/LICENSE +21 -0
  130. reaxkit-1.0.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,184 @@
1
+ """
2
+ ReaxFF molecular fragment analysis (molfra.out) handler.
3
+
4
+ This module provides a handler for parsing ReaxFF ``molfra.out`` and
5
+ ``molfra_ig.out`` files, which report molecule/fragment compositions
6
+ and their frequencies as a function of simulation iteration.
7
+
8
+ Typical use cases include:
9
+
10
+ - tracking molecular species formation and decay
11
+ - monitoring reaction pathways and fragment distributions
12
+ - computing molecule counts and system-level mass summaries
13
+ """
14
+
15
+
16
+ from __future__ import annotations
17
+ from pathlib import Path
18
+ from typing import Dict, Any, List, Iterator, Optional
19
+ import pandas as pd
20
+
21
+ from reaxkit.io.base_handler import BaseHandler
22
+
23
+
24
+ class MolFraHandler(BaseHandler):
25
+ """
26
+ Parser for ReaxFF molecular fragment output files
27
+ (``molfra.out``, ``molfra_ig.out``).
28
+
29
+ This class parses molecular fragment frequency data and exposes both
30
+ per-molecule and per-iteration summary information as structured
31
+ tabular datasets.
32
+
33
+ Parsed Data
34
+ -----------
35
+ Molecule table
36
+ One row per (iteration, molecular species), returned by
37
+ ``dataframe()``, with columns:
38
+ ["iter", "molecular_formula", "freq", "molecular_mass"]
39
+
40
+ Totals table
41
+ One row per iteration, accessible via ``totals()``, with columns:
42
+ ["iter", "total_molecules", "total_atoms", "total_molecular_mass"]
43
+
44
+ Metadata
45
+ Returned by ``metadata()``, containing:
46
+ ["n_records", "n_iters", "iter_min", "iter_max",
47
+ "molecular_formulas"]
48
+
49
+ Notes
50
+ -----
51
+ - Molecular species are identified by their chemical formula strings.
52
+ - Frequency values represent counts per iteration.
53
+ - Totals are parsed from summary blocks following molecule listings.
54
+ - This handler is iteration-based rather than frame-based, but exposes
55
+ a minimal frame-like API for consistency.
56
+ """
57
+
58
+
59
+ def __init__(self, file_path: str | Path = "molfra.out"):
60
+ super().__init__(file_path)
61
+ self._n_records: Optional[int] = None
62
+ self._types: Optional[List[str]] = None
63
+
64
+ # ---- Core parser
65
+ def _parse(self) -> tuple[pd.DataFrame, Dict[str, Any]]:
66
+ """
67
+ Parse molfra.out into two dataframes:
68
+ 1. Molecule occurrences per iter
69
+ 2. Totals (number of molecules, atoms, system molecular_mass) per iter
70
+ """
71
+ mol_rows: list[dict[str, any]] = []
72
+ total_rows: list[dict[str, any]] = []
73
+
74
+ with open(self.path, "r") as fh:
75
+ current_iter = None
76
+ current_totals = {}
77
+
78
+ for line in fh:
79
+ line = line.strip()
80
+ if not line or line.startswith("Bond order"):
81
+ continue
82
+
83
+ # Header
84
+ if line.startswith("Iteration"):
85
+ continue
86
+
87
+ # Total lines
88
+ if line.startswith("Total number of molecules"):
89
+ current_totals["total_molecules"] = int(line.split()[-1])
90
+ continue
91
+ if line.startswith("Total number of atoms"):
92
+ current_totals["total_atoms"] = int(line.split()[-1])
93
+ continue
94
+ if line.startswith("Total system"):
95
+ current_totals["total_molecular_mass"] = float(line.split()[-1])
96
+ if current_iter is not None and current_totals:
97
+ current_totals["iter"] = current_iter
98
+ total_rows.append(current_totals)
99
+ current_totals = {}
100
+ continue
101
+
102
+ # Molecule lines
103
+ parts = line.split()
104
+ if len(parts) >= 5 and "x" in parts:
105
+ try:
106
+ iter = int(parts[0])
107
+ freq = int(parts[1])
108
+ molecular_mass = float(parts[-1])
109
+ x_index = parts.index("x")
110
+ molecular_formula = parts[x_index + 1]
111
+ current_iter = iter
112
+ except (ValueError, IndexError):
113
+ continue
114
+ mol_rows.append({
115
+ "iter": iter,
116
+ "molecular_formula": molecular_formula,
117
+ "freq": freq,
118
+ "molecular_mass": molecular_mass,
119
+ })
120
+
121
+ # Build dataframes
122
+ df_mol = pd.DataFrame(mol_rows)
123
+ df_tot = pd.DataFrame(total_rows).sort_values("iter").reset_index(drop=True)
124
+
125
+ meta = {
126
+ "n_records": len(df_mol),
127
+ "n_iters": df_mol["iter"].nunique(),
128
+ "iter_min": df_mol["iter"].min(),
129
+ "iter_max": df_mol["iter"].max(),
130
+ "molecular_formulas": sorted(df_mol["molecular_formula"].unique().tolist()),
131
+ }
132
+
133
+ # Store both
134
+ self._df_totals = df_tot
135
+ self._n_records = meta["n_records"]
136
+ self._types = meta["molecular_formulas"]
137
+ return df_mol, meta
138
+
139
+ # ---- Convenience accessors (file-specific)
140
+ def n_records(self) -> int:
141
+ return int(self.metadata().get("n_records", 0))
142
+
143
+ def molecular_formulas(self) -> List[str]:
144
+ return list(self.metadata().get("molecular_formulas", []))
145
+
146
+ def by_type(self, mtype: str) -> pd.DataFrame:
147
+ """Return rows for a given molecule type."""
148
+ df = self.dataframe()
149
+ return df[df["molecular_formula"] == mtype].reset_index(drop=True)
150
+
151
+ def totals(self) -> pd.DataFrame:
152
+ """Return total molecules/atoms/molecular_mass per iter."""
153
+ if hasattr(self, "_df_totals"):
154
+ return self._df_totals.copy()
155
+ else:
156
+ raise AttributeError("Totals dataframe not parsed or unavailable.")
157
+
158
+ # ---- Frame-oriented API (kept minimal for template parity)
159
+ def n_frames(self) -> int:
160
+ """molfra.out is not frame-based; expose unique iters instead."""
161
+ return int(self.metadata().get("n_iters", 0))
162
+
163
+ def frame(self, i: int) -> Dict[str, Any]:
164
+ """
165
+ Return a per-iter 'frame' view:
166
+ { 'iter': <int>, 'freqs': DataFrame[molecular_formula, freq] }
167
+ """
168
+ df = self.dataframe()
169
+ if df.empty:
170
+ raise IndexError("No data loaded.")
171
+ iters = sorted(df["iter"].unique())
172
+ if i < 0 or i >= len(iters):
173
+ raise IndexError(f"Frame index {i} out of range (0..{len(iters)-1}).")
174
+ it = iters[i]
175
+ sub = (
176
+ df.loc[df["iter"] == it, ["molecular_formula", "freq"]]
177
+ .sort_values("molecular_formula")
178
+ .reset_index(drop=True)
179
+ )
180
+ return {"iter": it, "freqs": sub}
181
+
182
+ def iter_frames(self, step: int = 1) -> Iterator[Dict[str, Any]]:
183
+ for i in range(0, self.n_frames(), step):
184
+ yield self.frame(i)
@@ -0,0 +1,137 @@
1
+ """
2
+ ReaxFF parameter search definition (params) handler.
3
+
4
+ This module provides a handler for parsing ReaxFF ``params`` files,
5
+ which define parameter indices, search intervals, bounds, and optional
6
+ inline comments used during force-field optimization.
7
+
8
+ Typical use cases include:
9
+
10
+ - inspecting parameter search spaces
11
+ - linking optimization parameters to force-field sections
12
+ - building interpretable training and sensitivity analyses
13
+ """
14
+
15
+
16
+ from __future__ import annotations
17
+
18
+ from pathlib import Path
19
+ from typing import List, Dict, Any
20
+
21
+ import pandas as pd
22
+
23
+ from reaxkit.io.base_handler import BaseHandler
24
+
25
+
26
+ class ParamsHandler(BaseHandler):
27
+ """
28
+ Parser for ReaxFF parameter search definition files (``params``).
29
+
30
+ This class parses ``params`` files and exposes parameter search
31
+ definitions as a structured tabular dataset suitable for training,
32
+ optimization, and diagnostics workflows.
33
+
34
+ Parsed Data
35
+ -----------
36
+ Summary table
37
+ One row per parameter entry, returned by ``dataframe()``, with columns:
38
+ ["ff_section", "ff_section_line", "ff_parameter",
39
+ "search_interval", "min_value", "max_value", "inline_comment"]
40
+
41
+ The columns map to ReaxFF force-field definitions as follows:
42
+ - ``ff_section``: force-field section identifier
43
+ (1–7 → general, atom, bond, off-diagonal, angle, torsion, h-bond)
44
+ - ``ff_section_line``: line index within the corresponding section
45
+ - ``ff_parameter``: parameter index within that line
46
+
47
+ Metadata
48
+ Returned by ``metadata()``, containing:
49
+ ["n_records", "n_frames"]
50
+
51
+ Notes
52
+ -----
53
+ - Inline comments following ``!`` are preserved verbatim.
54
+ - Lines with incorrect token counts raise a parsing error.
55
+ - This handler is not frame-based; ``n_frames()`` always returns 0.
56
+ """
57
+
58
+ COLUMNS = [
59
+ "ff_section",
60
+ "ff_section_line",
61
+ "ff_parameter",
62
+ "search_interval",
63
+ "min_value",
64
+ "max_value",
65
+ "inline_comment",
66
+ ]
67
+
68
+ def __init__(self, file_path: str | Path = "params.in"):
69
+ super().__init__(file_path)
70
+
71
+ def _parse(self) -> tuple[pd.DataFrame, dict[str, Any]]:
72
+ """
73
+ Implementation of TemplateHandler._parse for params files.
74
+
75
+ Returns
76
+ -------
77
+ df : DataFrame
78
+ With columns: ff_section, ff_section_line, ff_parameter,
79
+ search_interval, min_value, max_value, inline_comment.
80
+ meta : dict
81
+ Metadata with keys: n_records, n_frames.
82
+ """
83
+ rows: List[Dict[str, Any]] = []
84
+
85
+ with open(self.path, "r") as fh:
86
+ for raw_line in fh:
87
+ line = raw_line.strip()
88
+
89
+ # Skip empty lines and full-line comments
90
+ if not line or line.startswith(("!", "#")):
91
+ continue
92
+
93
+ # Split off inline comment at first "!"
94
+ before, sep, comment = line.partition("!")
95
+ inline_comment = comment.strip() if sep else ""
96
+
97
+ # Numeric / token part
98
+ tokens = before.split()
99
+ if not tokens:
100
+ continue
101
+
102
+ # Expect exactly 6 numeric tokens:
103
+ # ff_section ff_section_line ff_parameter search_interval min_value max_value
104
+ if len(tokens) != 6:
105
+ raise ValueError(
106
+ f"Expected 6 tokens in params line, got {len(tokens)}: {raw_line!r}"
107
+ )
108
+
109
+ ff_section = int(tokens[0])
110
+ ff_section_line = int(tokens[1])
111
+ ff_parameter = int(tokens[2])
112
+ search_interval = float(tokens[3])
113
+ min_value = float(tokens[4])
114
+ max_value = float(tokens[5])
115
+
116
+ rows.append(
117
+ {
118
+ "ff_section": ff_section,
119
+ "ff_section_line": ff_section_line,
120
+ "ff_parameter": ff_parameter,
121
+ "search_interval": search_interval,
122
+ "min_value": min_value,
123
+ "max_value": max_value,
124
+ "inline_comment": inline_comment,
125
+ }
126
+ )
127
+
128
+ df = pd.DataFrame(rows, columns=self.COLUMNS)
129
+
130
+ # No per-frame data for this file type
131
+ self._frames = []
132
+
133
+ meta: Dict[str, Any] = {
134
+ "n_records": len(df),
135
+ "n_frames": 0,
136
+ }
137
+ return df, meta
@@ -0,0 +1,135 @@
1
+ """
2
+ ReaxFF simulation summary (summary.txt) handler.
3
+
4
+ This module provides a handler for parsing ReaxFF ``summary.txt`` files,
5
+ which report per-iteration thermodynamic and system-level quantities
6
+ during MD or minimization runs.
7
+
8
+ Typical use cases include:
9
+
10
+ - tracking energy, temperature, pressure, and density versus iteration
11
+ - extracting time-series data for plotting or analysis
12
+ - validating simulation stability and convergence
13
+ """
14
+
15
+
16
+ from __future__ import annotations
17
+ from pathlib import Path
18
+ from typing import Dict, Any, Tuple, List
19
+ import pandas as pd
20
+ from io import StringIO
21
+
22
+ from reaxkit.io.base_handler import BaseHandler
23
+
24
+
25
+ class SummaryHandler(BaseHandler):
26
+ """
27
+ Parser for ReaxFF simulation summary files (``summary.txt``).
28
+
29
+ This class parses ``summary.txt`` outputs and exposes per-iteration
30
+ simulation summaries as a canonical, numeric time series.
31
+
32
+ Parsed Data
33
+ -----------
34
+ Summary table
35
+ One row per iteration, returned by ``dataframe()``, with columns
36
+ determined by the detected column count:
37
+
38
+ - 8 columns:
39
+ ["iter", "nmol", "time", "E_pot", "V", "T", "P", "D"]
40
+
41
+ - 9 columns:
42
+ ["iter", "nmol", "time", "E_pot", "V", "T", "P", "D", "elap_time"]
43
+
44
+ Metadata
45
+ Returned by ``metadata()``, containing:
46
+ ["n_records", "columns", "has_time", "source_file"]
47
+
48
+ Notes
49
+ -----
50
+ - Banner and header lines starting with ``REAX`` or ``Iteration`` are ignored.
51
+ - Rows are parsed as whitespace-delimited numeric data with no in-file header.
52
+ - Duplicate iteration indices are resolved by keeping the last entry.
53
+ - This handler represents a scalar-per-iteration time-series file.
54
+ """
55
+
56
+ def __init__(self, file_path: str | Path = "summary.txt") -> None:
57
+ super().__init__(file_path)
58
+
59
+ @staticmethod
60
+ def _canonical_names(ncols: int) -> List[str]:
61
+ if ncols == 8:
62
+ return ["iter", "nmol", "time", "E_pot", "V", "T", "P", "D"]
63
+ if ncols == 9:
64
+ return ["iter", "nmol", "time", "E_pot", "V", "T", "P", "D", "elap_time"]
65
+ raise ValueError(
66
+ f"Unsupported number of columns in summary data: {ncols}. "
67
+ f"Expected 8 or 9."
68
+ )
69
+
70
+ def _parse(self) -> Tuple[pd.DataFrame, Dict[str, Any]]:
71
+ p = Path(self.path)
72
+ if not p.exists():
73
+ raise FileNotFoundError(f"Summary file not found: {p}")
74
+
75
+ # Read, filter out banners/headers, keep only data lines.
76
+ with p.open("r") as fh:
77
+ raw_lines = fh.readlines()
78
+
79
+ data_lines: List[str] = []
80
+ for ln in raw_lines:
81
+ s = ln.strip()
82
+ if not s:
83
+ continue
84
+ s_lower = s.lower()
85
+ # Skip banner/header lines only; keep every other line
86
+ if s_lower.startswith("reax"):
87
+ continue
88
+ if s_lower.startswith("iteration"):
89
+ continue
90
+ if not s[0].isdigit(): # skipping comment or warning lines that may occur at the end of the file
91
+ continue
92
+ data_lines.append(ln)
93
+
94
+ if not data_lines:
95
+ raise ValueError(
96
+ "No data lines found after removing 'REAX…' and 'Iteration…' headers."
97
+ )
98
+
99
+ # Parse as whitespace-delimited, no header
100
+ data_str = "".join(data_lines).strip()
101
+ df = pd.read_csv(StringIO(data_str), sep=r"\s+", header=None, engine="python")
102
+
103
+ # Assign canonical names based on column count
104
+ names = self._canonical_names(df.shape[1])
105
+ df.columns = names
106
+
107
+ # Cleanup
108
+ df = df.dropna(how="all").reset_index(drop=True)
109
+ for col in df.columns:
110
+ df[col] = pd.to_numeric(df[col], errors="coerce")
111
+
112
+ if "iter" in df.columns:
113
+ df = df.dropna(subset=["iter"]).reset_index(drop=True)
114
+ df = df.drop_duplicates("iter", keep="last").reset_index(drop=True)
115
+
116
+ meta: Dict[str, Any] = {
117
+ "n_records": int(len(df)),
118
+ "columns": list(df.columns),
119
+ "has_time": "time" in df.columns,
120
+ "source_file": str(p),
121
+ }
122
+ return df, meta
123
+
124
+ # Convenience accessors on canonical schema
125
+ def fields(self) -> List[str]:
126
+ return list(self.dataframe().columns)
127
+
128
+ def has_times(self) -> bool:
129
+ return "time" in self.dataframe().columns
130
+
131
+ def iterations(self) -> pd.Series:
132
+ df = self.dataframe()
133
+ if "iter" not in df.columns:
134
+ raise KeyError("'iter' column not found in summary.")
135
+ return df["iter"]