reaxkit 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (130) hide show
  1. reaxkit/__init__.py +0 -0
  2. reaxkit/analysis/__init__.py +0 -0
  3. reaxkit/analysis/composed/RDF_analyzer.py +560 -0
  4. reaxkit/analysis/composed/__init__.py +0 -0
  5. reaxkit/analysis/composed/connectivity_analyzer.py +706 -0
  6. reaxkit/analysis/composed/coordination_analyzer.py +144 -0
  7. reaxkit/analysis/composed/electrostatics_analyzer.py +687 -0
  8. reaxkit/analysis/per_file/__init__.py +0 -0
  9. reaxkit/analysis/per_file/control_analyzer.py +165 -0
  10. reaxkit/analysis/per_file/eregime_analyzer.py +108 -0
  11. reaxkit/analysis/per_file/ffield_analyzer.py +305 -0
  12. reaxkit/analysis/per_file/fort13_analyzer.py +79 -0
  13. reaxkit/analysis/per_file/fort57_analyzer.py +106 -0
  14. reaxkit/analysis/per_file/fort73_analyzer.py +61 -0
  15. reaxkit/analysis/per_file/fort74_analyzer.py +65 -0
  16. reaxkit/analysis/per_file/fort76_analyzer.py +191 -0
  17. reaxkit/analysis/per_file/fort78_analyzer.py +154 -0
  18. reaxkit/analysis/per_file/fort79_analyzer.py +83 -0
  19. reaxkit/analysis/per_file/fort7_analyzer.py +393 -0
  20. reaxkit/analysis/per_file/fort99_analyzer.py +411 -0
  21. reaxkit/analysis/per_file/molfra_analyzer.py +359 -0
  22. reaxkit/analysis/per_file/params_analyzer.py +258 -0
  23. reaxkit/analysis/per_file/summary_analyzer.py +84 -0
  24. reaxkit/analysis/per_file/trainset_analyzer.py +84 -0
  25. reaxkit/analysis/per_file/vels_analyzer.py +95 -0
  26. reaxkit/analysis/per_file/xmolout_analyzer.py +528 -0
  27. reaxkit/cli.py +181 -0
  28. reaxkit/count_loc.py +276 -0
  29. reaxkit/data/alias.yaml +89 -0
  30. reaxkit/data/constants.yaml +27 -0
  31. reaxkit/data/reaxff_input_files_contents.yaml +186 -0
  32. reaxkit/data/reaxff_output_files_contents.yaml +301 -0
  33. reaxkit/data/units.yaml +38 -0
  34. reaxkit/help/__init__.py +0 -0
  35. reaxkit/help/help_index_loader.py +531 -0
  36. reaxkit/help/introspection_utils.py +131 -0
  37. reaxkit/io/__init__.py +0 -0
  38. reaxkit/io/base_handler.py +165 -0
  39. reaxkit/io/generators/__init__.py +0 -0
  40. reaxkit/io/generators/control_generator.py +123 -0
  41. reaxkit/io/generators/eregime_generator.py +341 -0
  42. reaxkit/io/generators/geo_generator.py +967 -0
  43. reaxkit/io/generators/trainset_generator.py +1758 -0
  44. reaxkit/io/generators/tregime_generator.py +113 -0
  45. reaxkit/io/generators/vregime_generator.py +164 -0
  46. reaxkit/io/generators/xmolout_generator.py +304 -0
  47. reaxkit/io/handlers/__init__.py +0 -0
  48. reaxkit/io/handlers/control_handler.py +209 -0
  49. reaxkit/io/handlers/eregime_handler.py +122 -0
  50. reaxkit/io/handlers/ffield_handler.py +812 -0
  51. reaxkit/io/handlers/fort13_handler.py +123 -0
  52. reaxkit/io/handlers/fort57_handler.py +143 -0
  53. reaxkit/io/handlers/fort73_handler.py +145 -0
  54. reaxkit/io/handlers/fort74_handler.py +155 -0
  55. reaxkit/io/handlers/fort76_handler.py +195 -0
  56. reaxkit/io/handlers/fort78_handler.py +142 -0
  57. reaxkit/io/handlers/fort79_handler.py +227 -0
  58. reaxkit/io/handlers/fort7_handler.py +264 -0
  59. reaxkit/io/handlers/fort99_handler.py +128 -0
  60. reaxkit/io/handlers/geo_handler.py +224 -0
  61. reaxkit/io/handlers/molfra_handler.py +184 -0
  62. reaxkit/io/handlers/params_handler.py +137 -0
  63. reaxkit/io/handlers/summary_handler.py +135 -0
  64. reaxkit/io/handlers/trainset_handler.py +658 -0
  65. reaxkit/io/handlers/vels_handler.py +293 -0
  66. reaxkit/io/handlers/xmolout_handler.py +174 -0
  67. reaxkit/utils/__init__.py +0 -0
  68. reaxkit/utils/alias.py +219 -0
  69. reaxkit/utils/cache.py +77 -0
  70. reaxkit/utils/constants.py +75 -0
  71. reaxkit/utils/equation_of_states.py +96 -0
  72. reaxkit/utils/exceptions.py +27 -0
  73. reaxkit/utils/frame_utils.py +175 -0
  74. reaxkit/utils/log.py +43 -0
  75. reaxkit/utils/media/__init__.py +0 -0
  76. reaxkit/utils/media/convert.py +90 -0
  77. reaxkit/utils/media/make_video.py +91 -0
  78. reaxkit/utils/media/plotter.py +812 -0
  79. reaxkit/utils/numerical/__init__.py +0 -0
  80. reaxkit/utils/numerical/extrema_finder.py +96 -0
  81. reaxkit/utils/numerical/moving_average.py +103 -0
  82. reaxkit/utils/numerical/numerical_calcs.py +75 -0
  83. reaxkit/utils/numerical/signal_ops.py +135 -0
  84. reaxkit/utils/path.py +55 -0
  85. reaxkit/utils/units.py +104 -0
  86. reaxkit/webui/__init__.py +0 -0
  87. reaxkit/webui/app.py +0 -0
  88. reaxkit/webui/components.py +0 -0
  89. reaxkit/webui/layouts.py +0 -0
  90. reaxkit/webui/utils.py +0 -0
  91. reaxkit/workflows/__init__.py +0 -0
  92. reaxkit/workflows/composed/__init__.py +0 -0
  93. reaxkit/workflows/composed/coordination_workflow.py +393 -0
  94. reaxkit/workflows/composed/electrostatics_workflow.py +587 -0
  95. reaxkit/workflows/composed/xmolout_fort7_workflow.py +343 -0
  96. reaxkit/workflows/meta/__init__.py +0 -0
  97. reaxkit/workflows/meta/help_workflow.py +136 -0
  98. reaxkit/workflows/meta/introspection_workflow.py +235 -0
  99. reaxkit/workflows/meta/make_video_workflow.py +61 -0
  100. reaxkit/workflows/meta/plotter_workflow.py +601 -0
  101. reaxkit/workflows/per_file/__init__.py +0 -0
  102. reaxkit/workflows/per_file/control_workflow.py +110 -0
  103. reaxkit/workflows/per_file/eregime_workflow.py +267 -0
  104. reaxkit/workflows/per_file/ffield_workflow.py +390 -0
  105. reaxkit/workflows/per_file/fort13_workflow.py +86 -0
  106. reaxkit/workflows/per_file/fort57_workflow.py +137 -0
  107. reaxkit/workflows/per_file/fort73_workflow.py +151 -0
  108. reaxkit/workflows/per_file/fort74_workflow.py +88 -0
  109. reaxkit/workflows/per_file/fort76_workflow.py +188 -0
  110. reaxkit/workflows/per_file/fort78_workflow.py +135 -0
  111. reaxkit/workflows/per_file/fort79_workflow.py +314 -0
  112. reaxkit/workflows/per_file/fort7_workflow.py +592 -0
  113. reaxkit/workflows/per_file/fort83_workflow.py +60 -0
  114. reaxkit/workflows/per_file/fort99_workflow.py +223 -0
  115. reaxkit/workflows/per_file/geo_workflow.py +554 -0
  116. reaxkit/workflows/per_file/molfra_workflow.py +577 -0
  117. reaxkit/workflows/per_file/params_workflow.py +135 -0
  118. reaxkit/workflows/per_file/summary_workflow.py +161 -0
  119. reaxkit/workflows/per_file/trainset_workflow.py +356 -0
  120. reaxkit/workflows/per_file/tregime_workflow.py +79 -0
  121. reaxkit/workflows/per_file/vels_workflow.py +309 -0
  122. reaxkit/workflows/per_file/vregime_workflow.py +75 -0
  123. reaxkit/workflows/per_file/xmolout_workflow.py +678 -0
  124. reaxkit-1.0.0.dist-info/METADATA +128 -0
  125. reaxkit-1.0.0.dist-info/RECORD +130 -0
  126. reaxkit-1.0.0.dist-info/WHEEL +5 -0
  127. reaxkit-1.0.0.dist-info/entry_points.txt +2 -0
  128. reaxkit-1.0.0.dist-info/licenses/AUTHORS.md +20 -0
  129. reaxkit-1.0.0.dist-info/licenses/LICENSE +21 -0
  130. reaxkit-1.0.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,264 @@
1
+ """
2
+ ReaxFF connectivity (fort.7) file handler.
3
+
4
+ This module provides a handler for parsing ReaxFF ``fort.7`` files,
5
+ which store per-iteration atom connectivity, bond-order information,
6
+ and system-wide totals.
7
+
8
+ Typical use cases include:
9
+
10
+ - extracting per-atom bond-order features
11
+ - computing coordination statistics
12
+ - building molecule- and structure-level descriptors
13
+ """
14
+
15
+
16
+ from __future__ import annotations
17
+ from pathlib import Path
18
+ from typing import List, Dict, Any, Optional
19
+ import pandas as pd
20
+
21
+ from reaxkit.io.base_handler import BaseHandler
22
+
23
+
24
+ class Fort7Handler(BaseHandler):
25
+ """
26
+ Parser for ReaxFF connectivity output files (``fort.7``).
27
+
28
+ This class parses ReaxFF ``fort.7`` files and exposes both
29
+ iteration-level summaries and per-iteration atom connectivity
30
+ tables as structured tabular data.
31
+
32
+ Parsed Data
33
+ -----------
34
+ Summary table
35
+ One row per iteration, returned by ``dataframe()``, with columns:
36
+ ["iter", "num_of_atoms", "num_of_bonds",
37
+ "total_BO", "total_LP", "total_BO_uncorrected", "total_charge"]
38
+
39
+ Per-frame atom tables
40
+ Stored in ``self._frames``, one table per iteration, where each
41
+ frame is a ``pandas.DataFrame`` with columns:
42
+ ["atom_num", "atom_type_num", "atom_cnn1..nb", "molecule_num",
43
+ "BO1..nb", "sum_BOs", "num_LPs", "partial_charge", ...]
44
+
45
+ Here, ``nb`` denotes the number of bonded neighbors in that frame,
46
+ leading to variable-length connectivity and bond-order columns.
47
+
48
+ Metadata
49
+ Returned by ``metadata()``, containing:
50
+ ["n_frames", "n_records", "simulation_name"]
51
+
52
+ Notes
53
+ -----
54
+ - Duplicate iterations are resolved by keeping the last occurrence.
55
+ - Connectivity and bond-order columns are inferred from the header.
56
+ - Extra, file-dependent columns are preserved as ``unknown*`` fields.
57
+ """
58
+ def __init__(self, file_path: str | Path = "fort.7"):
59
+ """Initialize a handler for a ReaxFF ``fort.7`` connectivity file.
60
+
61
+ Works on
62
+ --------
63
+ Fort7Handler — ``fort.7``
64
+
65
+ Parameters
66
+ ----------
67
+ file_path : str or pathlib.Path, optional
68
+ Path to the ``fort.7`` file to be parsed.
69
+
70
+ Returns
71
+ -------
72
+ None
73
+ Initializes the handler without parsing the file.
74
+ """
75
+ super().__init__(file_path)
76
+ self._frames: List[pd.DataFrame] = []
77
+ self._sim_name: Optional[str] = None
78
+
79
+ def _parse(self) -> tuple[pd.DataFrame, dict[str, Any]]:
80
+ sim_rows: List[List[Any]] = []
81
+ frames: List[pd.DataFrame] = []
82
+ totals: List[List[float]] = []
83
+
84
+ cur_atoms_rows: List[List[float | int]] = []
85
+ cur_totals: List[float] = []
86
+ cur_num_particles: Optional[int] = None
87
+ cur_nbonds: Optional[int] = None
88
+ sim_name: str = ""
89
+
90
+ def _finalize_iteration() -> None:
91
+ if cur_num_particles is None or cur_nbonds is None or not cur_atoms_rows:
92
+ return
93
+ nb = int(cur_nbonds)
94
+ atom_cols = (
95
+ ["atom_num", "atom_type_num"]
96
+ + [f"atom_cnn{i}" for i in range(1, nb + 1)]
97
+ + ["molecule_num"]
98
+ + [f"BO{i}" for i in range(1, nb + 1)]
99
+ + ["sum_BOs", "num_LPs", "partial_charge"]
100
+ )
101
+ extra = max(0, len(cur_atoms_rows[0]) - len(atom_cols))
102
+ if extra > 0:
103
+ atom_cols += [f"unknown{i}" for i in range(1, extra + 1)]
104
+ frames.append(pd.DataFrame(cur_atoms_rows, columns=atom_cols))
105
+ totals.append(cur_totals[:] if cur_totals else [float("nan")] * 4)
106
+
107
+ with open(self.path, "r") as fh:
108
+ for raw in fh:
109
+ values = raw.split()
110
+ if not values:
111
+ continue
112
+
113
+ # Header
114
+ if len(values) == 6:
115
+ if cur_atoms_rows:
116
+ _finalize_iteration()
117
+ cur_atoms_rows.clear()
118
+ cur_totals.clear()
119
+
120
+ cur_num_particles = int(values[0])
121
+ sim_name = values[1]
122
+ iteration = int(values[3])
123
+ cur_nbonds = int(values[5])
124
+ sim_rows.append([iteration, cur_num_particles, cur_nbonds])
125
+
126
+ # Totals
127
+ elif len(values) < 6:
128
+ cur_totals.extend(map(float, values))
129
+
130
+ # Atom line
131
+ else:
132
+ nb = int(cur_nbonds)
133
+ int_part = list(map(int, values[0: nb + 3]))
134
+ float_part = list(map(float, values[nb + 3:]))
135
+ cur_atoms_rows.append(int_part + float_part)
136
+
137
+ # Final iter
138
+ if cur_atoms_rows:
139
+ _finalize_iteration()
140
+
141
+ # Summary dataframe
142
+ sim_df = pd.DataFrame(sim_rows, columns=["iter", "num_of_atoms", "num_of_bonds"])
143
+ totals_df = pd.DataFrame(
144
+ totals,
145
+ columns=["total_BO", "total_LP", "total_BO_uncorrected", "total_charge"]
146
+ if totals and len(totals[0]) == 4
147
+ else [f"total_val{i}" for i in range(1, (len(totals[0]) if totals else 0) + 1)]
148
+ )
149
+ if not totals_df.empty:
150
+ totals_df = totals_df.iloc[: len(sim_df)].reset_index(drop=True)
151
+ sim_df = pd.concat([sim_df.reset_index(drop=True), totals_df], axis=1)
152
+
153
+ # Deduplicate
154
+ if not sim_df.empty and "iter" in sim_df.columns:
155
+ keep_idx = sim_df.drop_duplicates("iter", keep="last").index
156
+ frames = [frames[i] for i in keep_idx]
157
+ sim_df = sim_df.loc[keep_idx].reset_index(drop=True)
158
+
159
+ self._frames = frames
160
+ self._sim_name = sim_name
161
+
162
+ meta: Dict[str, Any] = {
163
+ "n_frames": len(frames),
164
+ "n_records": len(sim_df),
165
+ "simulation_name": sim_name,
166
+ }
167
+
168
+ return sim_df, meta
169
+
170
+ # -------------------------------------------------------
171
+ # Frame utilities (match XmoloutHandler API)
172
+ # -------------------------------------------------------
173
+
174
+ def n_frames(self) -> int:
175
+ """
176
+ Return the number of frames parsed from the ``fort.7`` file.
177
+
178
+ Works on
179
+ --------
180
+ Fort7Handler — ``fort.7``
181
+
182
+ Returns
183
+ -------
184
+ int
185
+ Number of parsed frames (iterations).
186
+ """
187
+ return len(self._frames) if hasattr(self, "_frames") else 0
188
+
189
+ def n_atoms(self, frame: int = 0) -> int:
190
+ """
191
+ Return the number of atoms in a given frame.
192
+
193
+ Works on
194
+ --------
195
+ Fort7Handler — ``fort.7``
196
+
197
+ Parameters
198
+ ----------
199
+ frame : int, optional
200
+ Frame index to query.
201
+
202
+ Returns
203
+ -------
204
+ int
205
+ Number of atoms in the selected frame.
206
+ """
207
+ if not hasattr(self, "_frames") or self.n_frames() == 0:
208
+ return 0
209
+ return len(self._frames[int(frame)])
210
+
211
+ def frame(self, i: int):
212
+ """Return a single frame as an atom-level connectivity table.
213
+
214
+ Works on
215
+ --------
216
+ Fort7Handler — ``fort.7``
217
+
218
+ Parameters
219
+ ----------
220
+ i : int
221
+ Frame index to retrieve.
222
+
223
+ Returns
224
+ -------
225
+ pandas.DataFrame
226
+ Atom-level table for the selected frame, including connectivity
227
+ and bond-order columns.
228
+
229
+ Examples
230
+ --------
231
+ >>> h = Fort7Handler("fort.7")
232
+ >>> df = h.frame(0)
233
+ """
234
+ if not hasattr(self, "_frames"):
235
+ raise RuntimeError("fort.7 has not been parsed yet.")
236
+ return self._frames[int(i)]
237
+
238
+ def iter_frames(self, step: int = 1):
239
+ """Iterate over atom-level frames with optional subsampling.
240
+
241
+ Works on
242
+ --------
243
+ Fort7Handler — ``fort.7``
244
+
245
+ Parameters
246
+ ----------
247
+ step : int, optional
248
+ Step size for subsampling frames (default: 1).
249
+
250
+ Yields
251
+ ------
252
+ pandas.DataFrame
253
+ Atom-level connectivity table for each yielded frame.
254
+
255
+ Examples
256
+ --------
257
+ >>> h = Fort7Handler("fort.7")
258
+ >>> for frame in h.iter_frames(step=10):
259
+ ... print(len(frame))
260
+ """
261
+ if not hasattr(self, "_frames"):
262
+ return
263
+ for i in range(0, self.n_frames(), max(1, int(step))):
264
+ yield self._frames[i]
@@ -0,0 +1,128 @@
1
+ """
2
+ ReaxFF training set error report (fort.99) handler.
3
+
4
+ This module provides a handler for parsing ReaxFF ``fort.99`` files,
5
+ which summarize force-field training errors by category and target
6
+ during parameter optimization runs.
7
+
8
+ Typical use cases include:
9
+
10
+ - analyzing training set contributions to total error
11
+ - inspecting charge, geometry, and energy fitting quality
12
+ - building diagnostics for force-field parameterization workflows
13
+ """
14
+
15
+
16
+ from __future__ import annotations
17
+ from pathlib import Path
18
+ from typing import List, Dict, Any
19
+ import re
20
+ import pandas as pd
21
+
22
+ from reaxkit.io.base_handler import BaseHandler
23
+
24
+
25
+ class Fort99Handler(BaseHandler):
26
+ """
27
+ Parser for ReaxFF training set error reports (``fort.99``).
28
+
29
+ This class parses ``fort.99`` files and exposes individual training
30
+ targets and their contributions to the total force-field error as
31
+ a structured tabular dataset.
32
+
33
+ Parsed Data
34
+ -----------
35
+ Summary table
36
+ One row per training target, returned by ``dataframe()``,
37
+ with columns:
38
+ ["lineno", "section", "title",
39
+ "ffield_value", "qm_value", "weight",
40
+ "error", "total_ff_error"]
41
+
42
+ The ``section`` column categorizes each target as one of:
43
+ ["CHARGE", "HEATFO", "GEOMETRY", "CELL PARAMETERS", "ENERGY", None].
44
+
45
+ Metadata
46
+ Returned by ``metadata()``, containing:
47
+ ["n_records", "n_frames"]
48
+
49
+ Notes
50
+ -----
51
+ - The last five numeric values on each line are interpreted as
52
+ (FF value, QM/reference value, weight, error, total error).
53
+ - Section categories are inferred heuristically from the title text.
54
+ - Unrecognized entries are retained with ``section=None``.
55
+ - This handler is not frame-based; ``n_frames()`` always returns 0.
56
+ """
57
+
58
+ def __init__(self, file_path: str | Path = "fort.99"):
59
+ super().__init__(file_path)
60
+
61
+ def _parse(self) -> tuple[pd.DataFrame, dict[str, Any]]:
62
+ rows: List[Dict[str, Any]] = []
63
+
64
+ # float like -17.8000, 1.54, 1.0e-03, etc.
65
+ float_re = re.compile(r"[+-]?(?:\d+\.\d*|\d*\.\d+|\d+)(?:[eE][+-]?\d+)?")
66
+
67
+ with open(self.path, "r") as fh:
68
+ for lineno, raw in enumerate(fh, start=1):
69
+ line = raw.rstrip("\n")
70
+ if not line.strip():
71
+ continue
72
+
73
+ # Find all floats
74
+ matches = list(float_re.finditer(line))
75
+ if len(matches) < 5:
76
+ continue
77
+
78
+ # Extract last 5 numbers
79
+ last5 = matches[-5:]
80
+ vals = [float(m.group()) for m in last5]
81
+ ffield_val, qm_val, weight, err, tot_err = vals
82
+
83
+ # Title
84
+ title_start = last5[0].start()
85
+ title = line[:title_start].strip()
86
+ if not title:
87
+ continue
88
+
89
+ tl = title.lower()
90
+
91
+ # -------- SECTION detection --------
92
+ if "charge" in tl:
93
+ section = "CHARGE"
94
+ elif "heat" in tl:
95
+ section = "HEATFO"
96
+ elif ("bond" in tl) or ("angle" in tl):
97
+ section = "GEOMETRY"
98
+ elif ("a:" in tl) or ("b:" in tl) or ("c:" in tl):
99
+ section = "CELL PARAMETERS"
100
+ elif "energy" in tl:
101
+ section = "ENERGY"
102
+ else:
103
+ section = None # mark unknown
104
+ print(f"Unrecognized fort.99 entry at line {lineno}: {title}")
105
+
106
+ # Save row
107
+ rows.append(
108
+ {
109
+ "lineno": lineno,
110
+ "section": section,
111
+ "title": title,
112
+ "ffield_value": ffield_val,
113
+ "qm_value": qm_val,
114
+ "weight": weight,
115
+ "error": err,
116
+ "total_ff_error": tot_err,
117
+ }
118
+ )
119
+
120
+ df = pd.DataFrame(rows)
121
+
122
+ # fort.99 has no per-frame data
123
+ self._frames = [] # from TemplateHandler
124
+ meta: Dict[str, Any] = {
125
+ "n_records": len(df),
126
+ "n_frames": 0,
127
+ }
128
+ return df, meta
@@ -0,0 +1,224 @@
1
+ """
2
+ ReaxFF geometry structure (geo) file handler.
3
+
4
+ This module provides a handler for parsing ReaxFF ``.geo`` structure
5
+ files in XTLGRF format, which define atomic coordinates, optional
6
+ periodic cell parameters, and descriptive metadata for a system.
7
+
8
+ Typical use cases include:
9
+
10
+ - loading initial or relaxed geometries
11
+ - extracting atomic coordinates for analysis or visualization
12
+ - accessing unit cell parameters for periodic systems
13
+ """
14
+
15
+
16
+ from __future__ import annotations
17
+ from pathlib import Path
18
+ from typing import List, Optional, Dict, Any
19
+
20
+ import pandas as pd
21
+
22
+ from reaxkit.io.base_handler import BaseHandler
23
+
24
+
25
+ class GeoHandler(BaseHandler):
26
+ """
27
+ Parser for ReaxFF geometry structure files (``.geo`` / XTLGRF format).
28
+
29
+ This class parses ``.geo`` files and exposes atomic coordinates and
30
+ associated structural metadata as structured Python objects.
31
+
32
+ Parsed Data
33
+ -----------
34
+ Atom table
35
+ One row per atom, returned by ``dataframe()``, with columns:
36
+ ["atom_id", "atom_type", "x", "y", "z"]
37
+
38
+ Metadata
39
+ Returned by ``metadata()``, containing:
40
+ {
41
+ "descriptor": str | None, # from DESCRP line
42
+ "remark": str | None, # concatenated REMARK lines
43
+ "cell_lengths": { # from CRYSTX (a, b, c)
44
+ "a": float,
45
+ "b": float,
46
+ "c": float,
47
+ } | None,
48
+ "cell_angles": { # from CRYSTX (alpha, beta, gamma)
49
+ "alpha": float,
50
+ "beta": float,
51
+ "gamma": float,
52
+ } | None,
53
+ "n_atoms": int,
54
+ }
55
+
56
+ Notes
57
+ -----
58
+ - Only ``ATOM`` and ``HETATM`` records are parsed into the atom table.
59
+ - Cell parameters are optional and may be absent for non-periodic systems.
60
+ - Non-structural lines (e.g. ``XTLGRF``, ``FORMAT``) are ignored.
61
+ - This handler is not frame-based; the file represents a single structure.
62
+ """
63
+
64
+ def __init__(self, file_path: str | Path = "geo"):
65
+ super().__init__(file_path)
66
+ self._n_atoms: Optional[int] = None
67
+
68
+ # ------------------------------------------------------------------
69
+ # Core parser
70
+ # ------------------------------------------------------------------
71
+ def _parse(self) -> tuple[pd.DataFrame, dict[str, Any]]:
72
+ atoms: List[Dict[str, Any]] = []
73
+
74
+ descriptor: Optional[str] = None
75
+ remark: Optional[str] = None
76
+ cell_lengths: Optional[Dict[str, float]] = None
77
+ cell_angles: Optional[Dict[str, float]] = None
78
+
79
+ with open(self.path, "r") as fh:
80
+ for raw in fh:
81
+ line = raw.rstrip("\n")
82
+ stripped = line.strip()
83
+ if not stripped:
84
+ continue
85
+
86
+ # Descriptor
87
+ if line.startswith("DESCRP"):
88
+ # everything after the keyword is the descriptor
89
+ # "DESCRP" is 6 chars, keep the rest
90
+ text = line[6:].strip()
91
+ if not text:
92
+ # fallback: split-based if for some reason slicing fails
93
+ parts = line.split(maxsplit=1)
94
+ text = parts[1].strip() if len(parts) > 1 else ""
95
+ descriptor = text or None
96
+ continue
97
+
98
+ # Remark (optional, possibly multiple lines)
99
+ if line.startswith("REMARK"):
100
+ text = line[6:].strip()
101
+ if remark:
102
+ remark = f"{remark} {text}".strip()
103
+ else:
104
+ remark = text
105
+ continue
106
+
107
+ # Cell / periodic box
108
+ if line.startswith("CRYSTX"):
109
+ # Expected: CRYSTX a b c alpha beta gamma
110
+ parts = line.split()
111
+ if len(parts) >= 7:
112
+ try:
113
+ a, b, c = map(float, parts[1:4])
114
+ alpha, beta, gamma = map(float, parts[4:7])
115
+ cell_lengths = {"a": a, "b": b, "c": c}
116
+ cell_angles = {
117
+ "alpha": alpha,
118
+ "beta": beta,
119
+ "gamma": gamma,
120
+ }
121
+ except ValueError:
122
+ # If parsing fails, leave as None
123
+ pass
124
+ continue
125
+
126
+ # Atom records: HETATM or ATOM
127
+ if line.startswith("HETATM") or line.startswith("ATOM"):
128
+ parts = line.split()
129
+ # We expect at least:
130
+ # 0: "HETATM" / "ATOM"
131
+ # 1: atom_id (int)
132
+ # 2: atom_type (str, e.g., N, Al, O_w, ...)
133
+ # 3: x
134
+ # 4: y
135
+ # 5: z
136
+ # 6: repeated atom type (ignored)
137
+ # 7+: extra fields (ignored)
138
+ if len(parts) < 7:
139
+ # Too short to contain id, type, and coordinates
140
+ continue
141
+
142
+ try:
143
+ atom_id = int(parts[1])
144
+ except ValueError:
145
+ # Unexpected format, skip this line
146
+ continue
147
+
148
+ atom_type = parts[2]
149
+ try:
150
+ x, y, z = map(float, parts[3:6])
151
+ except ValueError:
152
+ # Coordinates not parseable, skip
153
+ continue
154
+
155
+ atoms.append(
156
+ {
157
+ "atom_id": atom_id,
158
+ "atom_type": atom_type,
159
+ "x": x,
160
+ "y": y,
161
+ "z": z,
162
+ }
163
+ )
164
+
165
+ # Other lines (XTLGRF, FORMAT, etc.) are ignored
166
+
167
+ df = pd.DataFrame(atoms, columns=["atom_id", "atom_type", "x", "y", "z"])
168
+ n_atoms = len(df)
169
+ self._n_atoms = n_atoms
170
+
171
+ meta: Dict[str, Any] = {
172
+ "descriptor": descriptor,
173
+ "remark": remark,
174
+ "cell_lengths": cell_lengths,
175
+ "cell_angles": cell_angles,
176
+ "n_atoms": n_atoms,
177
+ }
178
+ return df, meta
179
+
180
+ # ------------------------------------------------------------------
181
+ # Convenience accessors
182
+ # ------------------------------------------------------------------
183
+ def n_atoms(self) -> int:
184
+ """Return the number of atoms in the .geo file."""
185
+ if self._n_atoms is None:
186
+ self._n_atoms = int(self.metadata().get("n_atoms", len(self.dataframe())))
187
+ return self._n_atoms
188
+
189
+ def cell(self) -> Dict[str, Optional[float]]:
190
+ """
191
+ Return a flat dict with cell parameters:
192
+
193
+ {
194
+ "a": ...,
195
+ "b": ...,
196
+ "c": ...,
197
+ "alpha": ...,
198
+ "beta": ...,
199
+ "gamma": ...,
200
+ }
201
+
202
+ Values may be None if CRYSTX was missing or malformed.
203
+ """
204
+ meta = self.metadata()
205
+ lengths = meta.get("cell_lengths") or {}
206
+ angles = meta.get("cell_angles") or {}
207
+
208
+ return {
209
+ "a": lengths.get("a"),
210
+ "b": lengths.get("b"),
211
+ "c": lengths.get("c"),
212
+ "alpha": angles.get("alpha"),
213
+ "beta": angles.get("beta"),
214
+ "gamma": angles.get("gamma"),
215
+ }
216
+
217
+ def coordinates(self) -> pd.DataFrame:
218
+ """
219
+ Return a copy of the atom table (id, type, x, y, z).
220
+
221
+ This is just a convenience wrapper around .dataframe()
222
+ to make the intent explicit.
223
+ """
224
+ return self.dataframe().copy()