reaxkit 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (130) hide show
  1. reaxkit/__init__.py +0 -0
  2. reaxkit/analysis/__init__.py +0 -0
  3. reaxkit/analysis/composed/RDF_analyzer.py +560 -0
  4. reaxkit/analysis/composed/__init__.py +0 -0
  5. reaxkit/analysis/composed/connectivity_analyzer.py +706 -0
  6. reaxkit/analysis/composed/coordination_analyzer.py +144 -0
  7. reaxkit/analysis/composed/electrostatics_analyzer.py +687 -0
  8. reaxkit/analysis/per_file/__init__.py +0 -0
  9. reaxkit/analysis/per_file/control_analyzer.py +165 -0
  10. reaxkit/analysis/per_file/eregime_analyzer.py +108 -0
  11. reaxkit/analysis/per_file/ffield_analyzer.py +305 -0
  12. reaxkit/analysis/per_file/fort13_analyzer.py +79 -0
  13. reaxkit/analysis/per_file/fort57_analyzer.py +106 -0
  14. reaxkit/analysis/per_file/fort73_analyzer.py +61 -0
  15. reaxkit/analysis/per_file/fort74_analyzer.py +65 -0
  16. reaxkit/analysis/per_file/fort76_analyzer.py +191 -0
  17. reaxkit/analysis/per_file/fort78_analyzer.py +154 -0
  18. reaxkit/analysis/per_file/fort79_analyzer.py +83 -0
  19. reaxkit/analysis/per_file/fort7_analyzer.py +393 -0
  20. reaxkit/analysis/per_file/fort99_analyzer.py +411 -0
  21. reaxkit/analysis/per_file/molfra_analyzer.py +359 -0
  22. reaxkit/analysis/per_file/params_analyzer.py +258 -0
  23. reaxkit/analysis/per_file/summary_analyzer.py +84 -0
  24. reaxkit/analysis/per_file/trainset_analyzer.py +84 -0
  25. reaxkit/analysis/per_file/vels_analyzer.py +95 -0
  26. reaxkit/analysis/per_file/xmolout_analyzer.py +528 -0
  27. reaxkit/cli.py +181 -0
  28. reaxkit/count_loc.py +276 -0
  29. reaxkit/data/alias.yaml +89 -0
  30. reaxkit/data/constants.yaml +27 -0
  31. reaxkit/data/reaxff_input_files_contents.yaml +186 -0
  32. reaxkit/data/reaxff_output_files_contents.yaml +301 -0
  33. reaxkit/data/units.yaml +38 -0
  34. reaxkit/help/__init__.py +0 -0
  35. reaxkit/help/help_index_loader.py +531 -0
  36. reaxkit/help/introspection_utils.py +131 -0
  37. reaxkit/io/__init__.py +0 -0
  38. reaxkit/io/base_handler.py +165 -0
  39. reaxkit/io/generators/__init__.py +0 -0
  40. reaxkit/io/generators/control_generator.py +123 -0
  41. reaxkit/io/generators/eregime_generator.py +341 -0
  42. reaxkit/io/generators/geo_generator.py +967 -0
  43. reaxkit/io/generators/trainset_generator.py +1758 -0
  44. reaxkit/io/generators/tregime_generator.py +113 -0
  45. reaxkit/io/generators/vregime_generator.py +164 -0
  46. reaxkit/io/generators/xmolout_generator.py +304 -0
  47. reaxkit/io/handlers/__init__.py +0 -0
  48. reaxkit/io/handlers/control_handler.py +209 -0
  49. reaxkit/io/handlers/eregime_handler.py +122 -0
  50. reaxkit/io/handlers/ffield_handler.py +812 -0
  51. reaxkit/io/handlers/fort13_handler.py +123 -0
  52. reaxkit/io/handlers/fort57_handler.py +143 -0
  53. reaxkit/io/handlers/fort73_handler.py +145 -0
  54. reaxkit/io/handlers/fort74_handler.py +155 -0
  55. reaxkit/io/handlers/fort76_handler.py +195 -0
  56. reaxkit/io/handlers/fort78_handler.py +142 -0
  57. reaxkit/io/handlers/fort79_handler.py +227 -0
  58. reaxkit/io/handlers/fort7_handler.py +264 -0
  59. reaxkit/io/handlers/fort99_handler.py +128 -0
  60. reaxkit/io/handlers/geo_handler.py +224 -0
  61. reaxkit/io/handlers/molfra_handler.py +184 -0
  62. reaxkit/io/handlers/params_handler.py +137 -0
  63. reaxkit/io/handlers/summary_handler.py +135 -0
  64. reaxkit/io/handlers/trainset_handler.py +658 -0
  65. reaxkit/io/handlers/vels_handler.py +293 -0
  66. reaxkit/io/handlers/xmolout_handler.py +174 -0
  67. reaxkit/utils/__init__.py +0 -0
  68. reaxkit/utils/alias.py +219 -0
  69. reaxkit/utils/cache.py +77 -0
  70. reaxkit/utils/constants.py +75 -0
  71. reaxkit/utils/equation_of_states.py +96 -0
  72. reaxkit/utils/exceptions.py +27 -0
  73. reaxkit/utils/frame_utils.py +175 -0
  74. reaxkit/utils/log.py +43 -0
  75. reaxkit/utils/media/__init__.py +0 -0
  76. reaxkit/utils/media/convert.py +90 -0
  77. reaxkit/utils/media/make_video.py +91 -0
  78. reaxkit/utils/media/plotter.py +812 -0
  79. reaxkit/utils/numerical/__init__.py +0 -0
  80. reaxkit/utils/numerical/extrema_finder.py +96 -0
  81. reaxkit/utils/numerical/moving_average.py +103 -0
  82. reaxkit/utils/numerical/numerical_calcs.py +75 -0
  83. reaxkit/utils/numerical/signal_ops.py +135 -0
  84. reaxkit/utils/path.py +55 -0
  85. reaxkit/utils/units.py +104 -0
  86. reaxkit/webui/__init__.py +0 -0
  87. reaxkit/webui/app.py +0 -0
  88. reaxkit/webui/components.py +0 -0
  89. reaxkit/webui/layouts.py +0 -0
  90. reaxkit/webui/utils.py +0 -0
  91. reaxkit/workflows/__init__.py +0 -0
  92. reaxkit/workflows/composed/__init__.py +0 -0
  93. reaxkit/workflows/composed/coordination_workflow.py +393 -0
  94. reaxkit/workflows/composed/electrostatics_workflow.py +587 -0
  95. reaxkit/workflows/composed/xmolout_fort7_workflow.py +343 -0
  96. reaxkit/workflows/meta/__init__.py +0 -0
  97. reaxkit/workflows/meta/help_workflow.py +136 -0
  98. reaxkit/workflows/meta/introspection_workflow.py +235 -0
  99. reaxkit/workflows/meta/make_video_workflow.py +61 -0
  100. reaxkit/workflows/meta/plotter_workflow.py +601 -0
  101. reaxkit/workflows/per_file/__init__.py +0 -0
  102. reaxkit/workflows/per_file/control_workflow.py +110 -0
  103. reaxkit/workflows/per_file/eregime_workflow.py +267 -0
  104. reaxkit/workflows/per_file/ffield_workflow.py +390 -0
  105. reaxkit/workflows/per_file/fort13_workflow.py +86 -0
  106. reaxkit/workflows/per_file/fort57_workflow.py +137 -0
  107. reaxkit/workflows/per_file/fort73_workflow.py +151 -0
  108. reaxkit/workflows/per_file/fort74_workflow.py +88 -0
  109. reaxkit/workflows/per_file/fort76_workflow.py +188 -0
  110. reaxkit/workflows/per_file/fort78_workflow.py +135 -0
  111. reaxkit/workflows/per_file/fort79_workflow.py +314 -0
  112. reaxkit/workflows/per_file/fort7_workflow.py +592 -0
  113. reaxkit/workflows/per_file/fort83_workflow.py +60 -0
  114. reaxkit/workflows/per_file/fort99_workflow.py +223 -0
  115. reaxkit/workflows/per_file/geo_workflow.py +554 -0
  116. reaxkit/workflows/per_file/molfra_workflow.py +577 -0
  117. reaxkit/workflows/per_file/params_workflow.py +135 -0
  118. reaxkit/workflows/per_file/summary_workflow.py +161 -0
  119. reaxkit/workflows/per_file/trainset_workflow.py +356 -0
  120. reaxkit/workflows/per_file/tregime_workflow.py +79 -0
  121. reaxkit/workflows/per_file/vels_workflow.py +309 -0
  122. reaxkit/workflows/per_file/vregime_workflow.py +75 -0
  123. reaxkit/workflows/per_file/xmolout_workflow.py +678 -0
  124. reaxkit-1.0.0.dist-info/METADATA +128 -0
  125. reaxkit-1.0.0.dist-info/RECORD +130 -0
  126. reaxkit-1.0.0.dist-info/WHEEL +5 -0
  127. reaxkit-1.0.0.dist-info/entry_points.txt +2 -0
  128. reaxkit-1.0.0.dist-info/licenses/AUTHORS.md +20 -0
  129. reaxkit-1.0.0.dist-info/licenses/LICENSE +21 -0
  130. reaxkit-1.0.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,293 @@
1
+ """
2
+ ReaxFF atomic velocities and accelerations (vels / moldyn.vel / molsav) handler.
3
+
4
+ This module provides a handler for parsing ReaxFF velocity-related output
5
+ files, which store per-atom coordinates, velocities, accelerations, and
6
+ optional lattice and temperature information for a single MD step.
7
+
8
+ Typical use cases include:
9
+
10
+ - extracting atomic velocities or accelerations for analysis
11
+ - correlating kinematics with structural or energetic data
12
+ - visualizing velocity and acceleration fields
13
+ """
14
+
15
+
16
+ from __future__ import annotations
17
+
18
+ from pathlib import Path
19
+ from typing import Any, Dict, Tuple
20
+
21
+ import pandas as pd
22
+
23
+ from reaxkit.io.base_handler import BaseHandler
24
+
25
+
26
+ class VelsHandler(BaseHandler):
27
+ """
28
+ Parser for ReaxFF atomic kinematics output files
29
+ (``vels``, ``moldyn.vel``, ``molsav``).
30
+
31
+ This class parses velocity-style ReaxFF outputs and exposes atomic
32
+ coordinates, velocities, accelerations, and related metadata as
33
+ section-specific tables.
34
+
35
+ Parsed Data
36
+ -----------
37
+ Summary table
38
+ The main ``dataframe()`` is intentionally empty.
39
+ All meaningful data is stored in section tables.
40
+
41
+ Section tables
42
+ Accessible via ``sections`` or ``section_df(name)``, with one
43
+ table per section:
44
+
45
+ - ``Atom coordinates``:
46
+ One row per atom, with columns:
47
+ ["atom_index", "x", "y", "z", "symbol"]
48
+
49
+ - ``Atom velocities``:
50
+ One row per atom, with columns:
51
+ ["atom_index", "vx", "vy", "vz"]
52
+
53
+ - ``Atom accelerations``:
54
+ One row per atom, with columns:
55
+ ["atom_index", "ax", "ay", "az"]
56
+
57
+ - ``Previous atom accelerations``:
58
+ One row per atom, with columns:
59
+ ["atom_index", "ax", "ay", "az"]
60
+ (empty if not present in the file)
61
+
62
+ Metadata
63
+ Returned by ``metadata()``, containing (when available):
64
+ {
65
+ "lattice_parameters": {
66
+ "a": float, "b": float, "c": float,
67
+ "alpha": float, "beta": float, "gamma": float
68
+ },
69
+ "md_temperature_K": float
70
+ }
71
+
72
+ Notes
73
+ -----
74
+ - All numeric parsing supports Fortran ``D``/``d`` exponents.
75
+ - The number of atoms is inferred from the ``Atom coordinates`` header
76
+ and reused for all subsequent sections.
77
+ - Sections encountered out of order or with truncated data raise
78
+ explicit parsing errors.
79
+ - This handler represents a single MD snapshot and is not frame-based;
80
+ ``n_frames()`` always returns 0.
81
+ """
82
+ SECTION_COORDS = "Atom coordinates"
83
+ SECTION_VELS = "Atom velocities"
84
+ SECTION_ACCELS = "Atom accelerations"
85
+ SECTION_PREV_ACCELS = "Previous atom accelerations"
86
+
87
+ def __init__(self, file_path: str | Path = "vels") -> None:
88
+ super().__init__(file_path)
89
+ self._sections: Dict[str, pd.DataFrame] = {}
90
+
91
+ @property
92
+ def sections(self) -> Dict[str, pd.DataFrame]:
93
+ if not self._parsed:
94
+ self.parse()
95
+ return self._sections
96
+
97
+ def section_df(self, name: str) -> pd.DataFrame:
98
+ if not self._parsed:
99
+ self.parse()
100
+ return self._sections[name]
101
+
102
+ def _parse(self) -> Tuple[pd.DataFrame, Dict[str, Any]]:
103
+ lines = self.path.read_text().splitlines()
104
+ meta: Dict[str, Any] = {}
105
+ sections: Dict[str, pd.DataFrame] = {}
106
+
107
+ def next_nonempty(idx: int) -> int:
108
+ while idx < len(lines) and not lines[idx].strip():
109
+ idx += 1
110
+ return idx
111
+
112
+ def floats_from_line(s: str, n: int) -> list[float]:
113
+ # IMPORTANT: handle Fortran D exponents like 0.20D+01
114
+ s = s.replace("D", "E").replace("d", "E")
115
+ out: list[float] = []
116
+ for tok in s.replace(",", " ").split():
117
+ tok2 = tok.replace("D", "E").replace("d", "E")
118
+ try:
119
+ out.append(float(tok2))
120
+ except ValueError:
121
+ pass
122
+ if len(out) == n:
123
+ break
124
+ return out
125
+
126
+ def first_int_in_line(s: str) -> int | None:
127
+ for tok in s.split():
128
+ try:
129
+ return int(tok)
130
+ except ValueError:
131
+ continue
132
+ return None
133
+
134
+ def parse_lattice(idx: int) -> tuple[dict[str, float], int]:
135
+ idx = next_nonempty(idx)
136
+ abc = floats_from_line(lines[idx], 3)
137
+
138
+ idx += 1
139
+ idx = next_nonempty(idx)
140
+ ang = floats_from_line(lines[idx], 3)
141
+
142
+ if len(abc) != 3 or len(ang) != 3:
143
+ raise ValueError("Could not parse lattice parameters (expected two lines: 3 + 3 floats).")
144
+
145
+ lat = {"a": abc[0], "b": abc[1], "c": abc[2], "alpha": ang[0], "beta": ang[1], "gamma": ang[2]}
146
+ return lat, idx + 1
147
+
148
+ def parse_coords(idx: int, n_atoms: int, section_name: str) -> tuple[pd.DataFrame, int]:
149
+ idx = next_nonempty(idx)
150
+ rows = []
151
+ for a in range(1, n_atoms + 1):
152
+ idx = next_nonempty(idx)
153
+ if idx >= len(lines):
154
+ raise ValueError(
155
+ f"[vels] Truncated section: '{section_name}'. "
156
+ f"Expected {n_atoms} atom lines, but file ended at atom {a - 1}."
157
+ )
158
+
159
+ s = lines[idx].strip()
160
+ low = s.lower()
161
+ if (
162
+ "md-temperature" in low
163
+ or "atom velocities" in low
164
+ or ("atom accelerations" in low)
165
+ or "lattice parameters" in low
166
+ ):
167
+ raise ValueError(
168
+ f"[vels] Truncated section: '{section_name}'. "
169
+ f"Expected {n_atoms} atom lines, but only found {a - 1}. "
170
+ f"Next header encountered early at line {idx + 1}: {s!r}"
171
+ )
172
+
173
+ xyz = floats_from_line(s, 3)
174
+ if len(xyz) != 3:
175
+ raise ValueError(
176
+ f"[vels] Bad numeric line in section '{section_name}' at atom {a}. "
177
+ f"Line {idx + 1}: {s!r}"
178
+ )
179
+
180
+ symbol = s.split()[-1] if s.split() else ""
181
+ rows.append({"atom_index": a, "x": xyz[0], "y": xyz[1], "z": xyz[2], "symbol": symbol})
182
+ idx += 1
183
+
184
+ return pd.DataFrame(rows), idx
185
+
186
+ def parse_xyz3(idx: int, n_atoms: int, c1: str, c2: str, c3: str, section_name: str) -> tuple[
187
+ pd.DataFrame, int]:
188
+ idx = next_nonempty(idx)
189
+ rows = []
190
+
191
+ for a in range(1, n_atoms + 1):
192
+ idx = next_nonempty(idx)
193
+ if idx >= len(lines):
194
+ raise ValueError(
195
+ f"[vels] Truncated section: '{section_name}'. "
196
+ f"Expected {n_atoms} atom lines, but file ended at atom {a - 1}."
197
+ )
198
+
199
+ s = lines[idx].strip()
200
+ low = s.lower()
201
+
202
+ # If we accidentally hit the next header, the section is shortened/truncated.
203
+ if (
204
+ "md-temperature" in low
205
+ or "atom coordinates" in low
206
+ or "atom velocities" in low
207
+ or ("atom accelerations" in low)
208
+ or "lattice parameters" in low
209
+ ):
210
+ raise ValueError(
211
+ f"[vels] Truncated section: '{section_name}'. "
212
+ f"Expected {n_atoms} atom lines, but only found {a - 1}. "
213
+ f"Next header encountered early at line {idx + 1}: {s!r}"
214
+ )
215
+
216
+ v = floats_from_line(s, 3)
217
+ if len(v) != 3:
218
+ raise ValueError(
219
+ f"[vels] Bad numeric line in section '{section_name}' at atom {a}. "
220
+ f"Line {idx + 1}: {s!r}"
221
+ )
222
+
223
+ rows.append({"atom_index": a, c1: v[0], c2: v[1], c3: v[2]})
224
+ idx += 1
225
+
226
+ return pd.DataFrame(rows), idx
227
+
228
+ def parse_temperature(idx: int) -> tuple[float, int]:
229
+ idx = next_nonempty(idx)
230
+ v = floats_from_line(lines[idx], 1)
231
+ if not v:
232
+ raise ValueError(f"Could not parse MD-temperature from line: {lines[idx]!r}")
233
+ return float(v[0]), idx + 1
234
+
235
+ i = 0
236
+ n_atoms: int | None = None
237
+ prev_acc_present = False
238
+
239
+ while i < len(lines):
240
+ s = lines[i].strip()
241
+ low = s.lower()
242
+
243
+ if not s:
244
+ i += 1
245
+ continue
246
+
247
+ if "lattice parameters" in low:
248
+ lat, i = parse_lattice(i + 1)
249
+ meta["lattice_parameters"] = lat
250
+ continue
251
+
252
+ if "atom coordinates" in low:
253
+ n_atoms = first_int_in_line(s)
254
+ if n_atoms is None:
255
+ raise ValueError("Could not read number of atoms from 'Atom coordinates' header.")
256
+ df, i = parse_coords(i + 1, n_atoms, self.SECTION_COORDS)
257
+ sections[self.SECTION_COORDS] = df
258
+ continue
259
+
260
+ if "atom velocities" in low:
261
+ if n_atoms is None:
262
+ raise ValueError("Found velocities before coordinates; cannot infer number of atoms.")
263
+ df, i = parse_xyz3(i + 1, n_atoms, "vx", "vy", "vz", self.SECTION_VELS)
264
+ sections[self.SECTION_VELS] = df
265
+ continue
266
+
267
+ if "atom accelerations" in low and "previous" not in low:
268
+ if n_atoms is None:
269
+ raise ValueError("Found accelerations before coordinates; cannot infer number of atoms.")
270
+ df, i = parse_xyz3(i + 1, n_atoms, "ax", "ay", "az", self.SECTION_ACCELS)
271
+ sections[self.SECTION_ACCELS] = df
272
+ continue
273
+
274
+ if "previous atom accelerations" in low:
275
+ if n_atoms is None:
276
+ raise ValueError("Found previous accelerations before coordinates; cannot infer number of atoms.")
277
+ df, i = parse_xyz3(i + 1, n_atoms, "ax", "ay", "az", self.SECTION_PREV_ACCELS)
278
+ sections[self.SECTION_PREV_ACCELS] = df
279
+ prev_acc_present = True
280
+ continue
281
+
282
+ if "md-temperature" in low or "md temperature" in low:
283
+ t, i = parse_temperature(i + 1)
284
+ meta["md_temperature_K"] = t
285
+ continue
286
+
287
+ i += 1
288
+
289
+ if not prev_acc_present:
290
+ sections[self.SECTION_PREV_ACCELS] = pd.DataFrame(columns=["atom_index", "ax", "ay", "az"])
291
+
292
+ self._sections = sections
293
+ return pd.DataFrame(), meta
@@ -0,0 +1,174 @@
1
+ """
2
+ ReaxFF trajectory output (xmolout) handler.
3
+
4
+ This module provides a handler for parsing ReaxFF ``xmolout`` files,
5
+ which store atomic trajectories from MD runs or MM minimizations.
6
+
7
+ ``xmolout`` files contain repeated coordinate frames with associated
8
+ cell parameters and energies and are commonly used for visualization
9
+ and structural analysis.
10
+ """
11
+
12
+
13
+ from __future__ import annotations
14
+ from pathlib import Path
15
+ from typing import List, Optional, Iterator, Dict, Any
16
+ import pandas as pd
17
+ from reaxkit.io.base_handler import BaseHandler
18
+
19
+ class XmoloutHandler(BaseHandler):
20
+ """
21
+ Parser for ReaxFF trajectory output files (``xmolout``).
22
+
23
+ This class parses ``xmolout`` files and exposes both a per-frame
24
+ summary table and per-frame atomic coordinate tables.
25
+
26
+ Parsed Data
27
+ -----------
28
+ Summary table
29
+ One row per frame, returned by ``dataframe()``, with columns:
30
+ ["num_of_atoms", "iter", "E_pot",
31
+ "a", "b", "c", "alpha", "beta", "gamma"]
32
+
33
+ Duplicate iteration indices are removed by keeping the last
34
+ occurrence.
35
+
36
+ Per-frame atom tables
37
+ Stored in ``self._frames``, one table per frame, where each table
38
+ has at least the columns:
39
+ ["atom_type", "x", "y", "z"]
40
+
41
+ Any additional per-atom columns present in the file are preserved
42
+ per frame. If their names are not provided explicitly, they are
43
+ auto-named as ``unknown_1``, ``unknown_2``, …
44
+
45
+ Metadata
46
+ Returned by ``metadata()``, containing:
47
+ ["simulation_name", "n_atoms", "n_frames", "has_time"]
48
+
49
+ Notes
50
+ -----
51
+ - Frames are inferred from the repeating ``#atoms → header → atoms`` pattern.
52
+ - The number of atoms is assumed constant across all frames.
53
+ - This handler supports lightweight frame access via ``frame(i)``
54
+ and streaming access via ``iter_frames(step=...)``.
55
+ """
56
+
57
+ def __init__(self, file_path: str | Path = "xmolout", *, extra_atom_cols: Optional[list[str]] = None):
58
+ super().__init__(file_path)
59
+ self._frames: List[pd.DataFrame] = [] # list of per-frame atom tables
60
+ self._n_atoms: Optional[int] = None
61
+ self.simulation_name: str = ""
62
+ self._extra_atom_cols = list(extra_atom_cols) if extra_atom_cols else None
63
+
64
+ # ---- FileHandler requirement
65
+ def _parse(self) -> tuple[pd.DataFrame, dict[str, Any]]:
66
+ sim_rows: List[list] = []
67
+ frames: List[pd.DataFrame] = []
68
+
69
+ sim_cols = ["num_of_atoms", "iter", "E_pot", "a", "b", "c", "alpha", "beta", "gamma"]
70
+ base_atom_cols = ["atom_type", "x", "y", "z"]
71
+
72
+ with open(self.path, "r") as fh:
73
+ atom_buf: List[list] = []
74
+ atom_count = 0
75
+ current_atom_cols: Optional[List[str]] = None
76
+ n_atoms: Optional[int] = None
77
+
78
+ for line in fh:
79
+ vals = line.strip().split()
80
+ if not vals:
81
+ continue
82
+
83
+ # #atoms line
84
+ if len(vals) == 1 and vals[0].isdigit():
85
+ n_atoms = int(vals[0])
86
+ self._n_atoms = n_atoms
87
+ sim_rows.append([n_atoms]) # placeholder row; will complete after header line
88
+ atom_buf, atom_count = [], 0
89
+ current_atom_cols = None
90
+ continue
91
+
92
+ # header line (name iter E a b c alpha beta gamma)
93
+ if len(vals) == 9 and self._n_atoms and vals[1].lstrip("-").isdigit():
94
+ if not self.simulation_name:
95
+ self.simulation_name = vals[0]
96
+ row = [self._n_atoms, int(vals[1])] + list(map(float, vals[2:]))
97
+ sim_rows[-1] = row
98
+ continue
99
+
100
+ # atom coordinates (optionally with extra columns)
101
+ if self._n_atoms and len(vals) >= 4:
102
+ # lazily determine expected columns for this frame
103
+ if current_atom_cols is None:
104
+ n_extras = max(0, len(vals) - 4)
105
+ if self._extra_atom_cols:
106
+ names = list(self._extra_atom_cols)[:n_extras]
107
+ if len(names) < n_extras:
108
+ names += [f"unknown_{i+1}" for i in range(n_extras - len(names))]
109
+ else:
110
+ names = [f"unknown_{i+1}" for i in range(n_extras)]
111
+ current_atom_cols = base_atom_cols + names
112
+
113
+ base = [vals[0]] + list(map(float, vals[1:4]))
114
+ expected_extras = len(current_atom_cols) - 4
115
+ extras_vals = [float(x) for x in vals[4:4+expected_extras]]
116
+ # pad if fewer provided
117
+ while len(extras_vals) < expected_extras:
118
+ extras_vals.append(float('nan'))
119
+ atom_buf.append(base + extras_vals)
120
+ atom_count += 1
121
+
122
+ if atom_count == self._n_atoms:
123
+ frames.append(pd.DataFrame(atom_buf, columns=current_atom_cols))
124
+ atom_buf, atom_count = [], 0
125
+ current_atom_cols = None
126
+
127
+ # Build per-frame summary table
128
+ df = pd.DataFrame(sim_rows, columns=sim_cols)
129
+
130
+ # Deduplicate by iter (keep last)
131
+ if not df.empty and "iter" in df.columns:
132
+ keep_idx = df.drop_duplicates("iter", keep="last").index
133
+ frames = [frames[i] for i in keep_idx if i < len(frames)]
134
+ df = df.loc[keep_idx].reset_index(drop=True)
135
+
136
+ # Save frames
137
+ self._frames = frames
138
+
139
+ meta: Dict[str, Any] = {
140
+ "simulation_name": self.simulation_name,
141
+ "n_atoms": self._n_atoms,
142
+ "n_frames": len(self._frames),
143
+ "has_time": False,
144
+ }
145
+ return df, meta
146
+
147
+ # ---- Explicit, file-specific accessors (no generic get())
148
+ def n_frames(self) -> int:
149
+ return int(self.metadata().get("n_frames", 0))
150
+
151
+ def n_atoms(self) -> Optional[int]:
152
+ return self._n_atoms
153
+
154
+ def frame(self, i: int) -> Dict[str, Any]:
155
+ """Return a lightweight frame dict: coords + atom_types + iter for frame i."""
156
+ df = self.dataframe()
157
+ if i < 0 or i >= len(self._frames):
158
+ raise IndexError(f"frame index {i} out of range [0, {len(self._frames) - 1}]")
159
+
160
+ frame_df = self._frames[i]
161
+ coords = frame_df[["x", "y", "z"]].to_numpy(dtype=float)
162
+ atom_types = frame_df["atom_type"].astype(str).tolist()
163
+
164
+ row = df.iloc[i]
165
+ return {
166
+ "index": i,
167
+ "iter": int(row["iter"]) if "iter" in df.columns else i,
168
+ "coords": coords,
169
+ "atom_types": atom_types,
170
+ }
171
+
172
+ def iter_frames(self, step: int = 1) -> Iterator[Dict[str, Any]]:
173
+ for i in range(0, self.n_frames(), max(1, int(step))):
174
+ yield self.frame(i)
File without changes
reaxkit/utils/alias.py ADDED
@@ -0,0 +1,219 @@
1
+ """
2
+ Alias resolution utilities for tolerant column and key matching.
3
+
4
+ This module provides functions for resolving canonical ReaxKit keys
5
+ (e.g., ``iter``, ``time``, ``D``) against the actual column names present
6
+ in parsed DataFrames, using a packaged alias map.
7
+
8
+ The canonical→alias definitions are stored in ``reaxkit/data/alias.yaml``.
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ from typing import Dict, List, Iterable, Optional
14
+ from functools import lru_cache
15
+
16
+ # You can load this via importlib.resources so it works after pip install.
17
+ # Requires: alias.yaml included as package data.
18
+ import yaml
19
+ import importlib.resources as ir
20
+
21
+
22
+ @lru_cache(maxsize=1)
23
+ def load_default_alias_map() -> Dict[str, List[str]]:
24
+ """
25
+ Load the packaged canonical→aliases mapping.
26
+
27
+ The alias map is read from ``reaxkit/data/alias.yaml`` and cached after
28
+ the first call.
29
+
30
+ Returns
31
+ -------
32
+ dict[str, list[str]]
33
+ Mapping of canonical keys to accepted alias strings.
34
+
35
+ Raises
36
+ ------
37
+ FileNotFoundError
38
+ If the packaged ``alias.yaml`` cannot be found.
39
+ """
40
+ # reaxkit.data is NOT a package; we read by file location within package resources.
41
+ # If you later make data/ a package, you can switch to ir.files("reaxkit.data").
42
+ pkg = "reaxkit"
43
+ rel = "data/alias.yaml"
44
+
45
+ try:
46
+ with ir.files(pkg).joinpath(rel).open("r", encoding="utf-8") as f:
47
+ doc = yaml.safe_load(f) or {}
48
+ except FileNotFoundError as e:
49
+ raise FileNotFoundError(
50
+ f"Could not find packaged alias map at '{pkg}/{rel}'. "
51
+ "Make sure alias.yaml is included as package data."
52
+ ) from e
53
+
54
+ aliases = doc.get("aliases") or {}
55
+ # Normalize to Dict[str, List[str]]
56
+ out: Dict[str, List[str]] = {}
57
+ for k, v in aliases.items():
58
+ if v is None:
59
+ out[str(k)] = []
60
+ elif isinstance(v, list):
61
+ out[str(k)] = [str(x) for x in v]
62
+ else:
63
+ out[str(k)] = [str(v)]
64
+ return out
65
+
66
+
67
+ def resolve_alias_from_columns(
68
+ cols: Iterable[str],
69
+ canonical: str,
70
+ aliases: Optional[Dict[str, List[str]]] = None,
71
+ ) -> Optional[str]:
72
+ """
73
+ Resolve a canonical key to the matching column name in a column list.
74
+
75
+ Matching is case-insensitive and falls back to simple heuristics when an
76
+ exact alias match is not found.
77
+
78
+ Parameters
79
+ ----------
80
+ cols : iterable of str
81
+ Available column names (e.g., DataFrame columns).
82
+ canonical : str
83
+ Canonical key to resolve (e.g., ``"iter"``, ``"time"``, ``"D"``).
84
+ aliases : dict[str, list[str]], optional
85
+ Canonical→aliases mapping to use. If not provided, the packaged map
86
+ from ``alias.yaml`` is loaded.
87
+
88
+ Returns
89
+ -------
90
+ str or None
91
+ The matching column name if found, otherwise ``None``.
92
+
93
+ Examples
94
+ --------
95
+ >>> resolve_alias_from_columns(df.columns, "time")
96
+ """
97
+ if cols is None:
98
+ return None
99
+
100
+ orig_cols = list(cols)
101
+ lower_map = {c.lower(): c for c in orig_cols}
102
+ aliases = aliases or load_default_alias_map()
103
+
104
+ candidates = [canonical]
105
+ if canonical in aliases:
106
+ candidates.extend(aliases[canonical])
107
+
108
+ # Exact (case-insensitive)
109
+ for cand in candidates:
110
+ hit = lower_map.get(str(cand).lower())
111
+ if hit is not None:
112
+ return hit
113
+
114
+ # Heuristics on canonical (startswith/contains)
115
+ cname = str(canonical).lower()
116
+ for c in orig_cols:
117
+ cl = c.lower()
118
+ if cl.startswith(cname) or cname in cl:
119
+ return c
120
+
121
+ return None
122
+
123
+
124
+ def _resolve_alias(source, canonical: str) -> str:
125
+ """
126
+ Resolve a canonical key from a DataFrame-like source.
127
+
128
+ Notes
129
+ -----
130
+ This is a compatibility helper that accepts a handler (with ``.dataframe()``),
131
+ a pandas DataFrame, or an iterable of column names.
132
+ """
133
+ try:
134
+ cols = list(source.dataframe().columns) # type: ignore[attr-defined]
135
+ except Exception:
136
+ try:
137
+ cols = list(getattr(source, "columns"))
138
+ except Exception:
139
+ cols = list(source) # assume iterable of str
140
+
141
+ hit = resolve_alias_from_columns(cols, canonical)
142
+ if hit is None:
143
+ raise KeyError(
144
+ f"Could not resolve alias '{canonical}'. Available columns: {list(cols)}"
145
+ )
146
+ return hit
147
+
148
+
149
+ def _available_keys_from_columns(cols: Iterable[str]) -> List[str]:
150
+ """
151
+ List canonical keys that are usable for a given column set.
152
+
153
+ The returned list includes:
154
+ - raw columns already present in ``cols``
155
+ - canonical keys whose aliases resolve against ``cols``
156
+
157
+ Parameters
158
+ ----------
159
+ cols : iterable of str
160
+ Available column names.
161
+
162
+ Returns
163
+ -------
164
+ list[str]
165
+ Sorted list of usable keys for lookup and CLI choices.
166
+
167
+ Examples
168
+ --------
169
+ >>> _available_keys_from_columns(df.columns)
170
+ """
171
+ amap = load_default_alias_map()
172
+ cols_set = set(cols)
173
+ keys = set(cols_set)
174
+ for alias, cands in amap.items():
175
+ if any(c in cols_set for c in cands) or alias in cols_set:
176
+ keys.add(alias)
177
+ return sorted(keys)
178
+
179
+
180
+ # Re-export for callers that already import these names
181
+ available_keys = _available_keys_from_columns
182
+
183
+
184
+ def normalize_choice(value: str, domain: str = "xaxis") -> str:
185
+ """
186
+ Normalize a user-provided keyword to its canonical alias key.
187
+
188
+ This is intended for tolerant CLI inputs where users may provide
189
+ any alias defined in ``alias.yaml`` (e.g., ``Time(fs)`` → ``time``).
190
+
191
+ Parameters
192
+ ----------
193
+ value : str
194
+ User-provided keyword or alias.
195
+ domain : str, optional
196
+ Reserved for future domain-specific normalization rules.
197
+
198
+ Returns
199
+ -------
200
+ str
201
+ Canonical key if an alias match is found; otherwise the normalized
202
+ input string.
203
+
204
+ Examples
205
+ --------
206
+ >>> normalize_choice("Time(fs)")
207
+ >>> normalize_choice("frm")
208
+ """
209
+ v = (value or "").strip().lower()
210
+ if not v:
211
+ return v
212
+
213
+ amap = load_default_alias_map()
214
+ for canonical, aliases in amap.items():
215
+ all_names = [canonical.lower()] + [a.lower() for a in aliases]
216
+ if v in all_names:
217
+ return canonical
218
+
219
+ return v