reaxkit 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (130) hide show
  1. reaxkit/__init__.py +0 -0
  2. reaxkit/analysis/__init__.py +0 -0
  3. reaxkit/analysis/composed/RDF_analyzer.py +560 -0
  4. reaxkit/analysis/composed/__init__.py +0 -0
  5. reaxkit/analysis/composed/connectivity_analyzer.py +706 -0
  6. reaxkit/analysis/composed/coordination_analyzer.py +144 -0
  7. reaxkit/analysis/composed/electrostatics_analyzer.py +687 -0
  8. reaxkit/analysis/per_file/__init__.py +0 -0
  9. reaxkit/analysis/per_file/control_analyzer.py +165 -0
  10. reaxkit/analysis/per_file/eregime_analyzer.py +108 -0
  11. reaxkit/analysis/per_file/ffield_analyzer.py +305 -0
  12. reaxkit/analysis/per_file/fort13_analyzer.py +79 -0
  13. reaxkit/analysis/per_file/fort57_analyzer.py +106 -0
  14. reaxkit/analysis/per_file/fort73_analyzer.py +61 -0
  15. reaxkit/analysis/per_file/fort74_analyzer.py +65 -0
  16. reaxkit/analysis/per_file/fort76_analyzer.py +191 -0
  17. reaxkit/analysis/per_file/fort78_analyzer.py +154 -0
  18. reaxkit/analysis/per_file/fort79_analyzer.py +83 -0
  19. reaxkit/analysis/per_file/fort7_analyzer.py +393 -0
  20. reaxkit/analysis/per_file/fort99_analyzer.py +411 -0
  21. reaxkit/analysis/per_file/molfra_analyzer.py +359 -0
  22. reaxkit/analysis/per_file/params_analyzer.py +258 -0
  23. reaxkit/analysis/per_file/summary_analyzer.py +84 -0
  24. reaxkit/analysis/per_file/trainset_analyzer.py +84 -0
  25. reaxkit/analysis/per_file/vels_analyzer.py +95 -0
  26. reaxkit/analysis/per_file/xmolout_analyzer.py +528 -0
  27. reaxkit/cli.py +181 -0
  28. reaxkit/count_loc.py +276 -0
  29. reaxkit/data/alias.yaml +89 -0
  30. reaxkit/data/constants.yaml +27 -0
  31. reaxkit/data/reaxff_input_files_contents.yaml +186 -0
  32. reaxkit/data/reaxff_output_files_contents.yaml +301 -0
  33. reaxkit/data/units.yaml +38 -0
  34. reaxkit/help/__init__.py +0 -0
  35. reaxkit/help/help_index_loader.py +531 -0
  36. reaxkit/help/introspection_utils.py +131 -0
  37. reaxkit/io/__init__.py +0 -0
  38. reaxkit/io/base_handler.py +165 -0
  39. reaxkit/io/generators/__init__.py +0 -0
  40. reaxkit/io/generators/control_generator.py +123 -0
  41. reaxkit/io/generators/eregime_generator.py +341 -0
  42. reaxkit/io/generators/geo_generator.py +967 -0
  43. reaxkit/io/generators/trainset_generator.py +1758 -0
  44. reaxkit/io/generators/tregime_generator.py +113 -0
  45. reaxkit/io/generators/vregime_generator.py +164 -0
  46. reaxkit/io/generators/xmolout_generator.py +304 -0
  47. reaxkit/io/handlers/__init__.py +0 -0
  48. reaxkit/io/handlers/control_handler.py +209 -0
  49. reaxkit/io/handlers/eregime_handler.py +122 -0
  50. reaxkit/io/handlers/ffield_handler.py +812 -0
  51. reaxkit/io/handlers/fort13_handler.py +123 -0
  52. reaxkit/io/handlers/fort57_handler.py +143 -0
  53. reaxkit/io/handlers/fort73_handler.py +145 -0
  54. reaxkit/io/handlers/fort74_handler.py +155 -0
  55. reaxkit/io/handlers/fort76_handler.py +195 -0
  56. reaxkit/io/handlers/fort78_handler.py +142 -0
  57. reaxkit/io/handlers/fort79_handler.py +227 -0
  58. reaxkit/io/handlers/fort7_handler.py +264 -0
  59. reaxkit/io/handlers/fort99_handler.py +128 -0
  60. reaxkit/io/handlers/geo_handler.py +224 -0
  61. reaxkit/io/handlers/molfra_handler.py +184 -0
  62. reaxkit/io/handlers/params_handler.py +137 -0
  63. reaxkit/io/handlers/summary_handler.py +135 -0
  64. reaxkit/io/handlers/trainset_handler.py +658 -0
  65. reaxkit/io/handlers/vels_handler.py +293 -0
  66. reaxkit/io/handlers/xmolout_handler.py +174 -0
  67. reaxkit/utils/__init__.py +0 -0
  68. reaxkit/utils/alias.py +219 -0
  69. reaxkit/utils/cache.py +77 -0
  70. reaxkit/utils/constants.py +75 -0
  71. reaxkit/utils/equation_of_states.py +96 -0
  72. reaxkit/utils/exceptions.py +27 -0
  73. reaxkit/utils/frame_utils.py +175 -0
  74. reaxkit/utils/log.py +43 -0
  75. reaxkit/utils/media/__init__.py +0 -0
  76. reaxkit/utils/media/convert.py +90 -0
  77. reaxkit/utils/media/make_video.py +91 -0
  78. reaxkit/utils/media/plotter.py +812 -0
  79. reaxkit/utils/numerical/__init__.py +0 -0
  80. reaxkit/utils/numerical/extrema_finder.py +96 -0
  81. reaxkit/utils/numerical/moving_average.py +103 -0
  82. reaxkit/utils/numerical/numerical_calcs.py +75 -0
  83. reaxkit/utils/numerical/signal_ops.py +135 -0
  84. reaxkit/utils/path.py +55 -0
  85. reaxkit/utils/units.py +104 -0
  86. reaxkit/webui/__init__.py +0 -0
  87. reaxkit/webui/app.py +0 -0
  88. reaxkit/webui/components.py +0 -0
  89. reaxkit/webui/layouts.py +0 -0
  90. reaxkit/webui/utils.py +0 -0
  91. reaxkit/workflows/__init__.py +0 -0
  92. reaxkit/workflows/composed/__init__.py +0 -0
  93. reaxkit/workflows/composed/coordination_workflow.py +393 -0
  94. reaxkit/workflows/composed/electrostatics_workflow.py +587 -0
  95. reaxkit/workflows/composed/xmolout_fort7_workflow.py +343 -0
  96. reaxkit/workflows/meta/__init__.py +0 -0
  97. reaxkit/workflows/meta/help_workflow.py +136 -0
  98. reaxkit/workflows/meta/introspection_workflow.py +235 -0
  99. reaxkit/workflows/meta/make_video_workflow.py +61 -0
  100. reaxkit/workflows/meta/plotter_workflow.py +601 -0
  101. reaxkit/workflows/per_file/__init__.py +0 -0
  102. reaxkit/workflows/per_file/control_workflow.py +110 -0
  103. reaxkit/workflows/per_file/eregime_workflow.py +267 -0
  104. reaxkit/workflows/per_file/ffield_workflow.py +390 -0
  105. reaxkit/workflows/per_file/fort13_workflow.py +86 -0
  106. reaxkit/workflows/per_file/fort57_workflow.py +137 -0
  107. reaxkit/workflows/per_file/fort73_workflow.py +151 -0
  108. reaxkit/workflows/per_file/fort74_workflow.py +88 -0
  109. reaxkit/workflows/per_file/fort76_workflow.py +188 -0
  110. reaxkit/workflows/per_file/fort78_workflow.py +135 -0
  111. reaxkit/workflows/per_file/fort79_workflow.py +314 -0
  112. reaxkit/workflows/per_file/fort7_workflow.py +592 -0
  113. reaxkit/workflows/per_file/fort83_workflow.py +60 -0
  114. reaxkit/workflows/per_file/fort99_workflow.py +223 -0
  115. reaxkit/workflows/per_file/geo_workflow.py +554 -0
  116. reaxkit/workflows/per_file/molfra_workflow.py +577 -0
  117. reaxkit/workflows/per_file/params_workflow.py +135 -0
  118. reaxkit/workflows/per_file/summary_workflow.py +161 -0
  119. reaxkit/workflows/per_file/trainset_workflow.py +356 -0
  120. reaxkit/workflows/per_file/tregime_workflow.py +79 -0
  121. reaxkit/workflows/per_file/vels_workflow.py +309 -0
  122. reaxkit/workflows/per_file/vregime_workflow.py +75 -0
  123. reaxkit/workflows/per_file/xmolout_workflow.py +678 -0
  124. reaxkit-1.0.0.dist-info/METADATA +128 -0
  125. reaxkit-1.0.0.dist-info/RECORD +130 -0
  126. reaxkit-1.0.0.dist-info/WHEEL +5 -0
  127. reaxkit-1.0.0.dist-info/entry_points.txt +2 -0
  128. reaxkit-1.0.0.dist-info/licenses/AUTHORS.md +20 -0
  129. reaxkit-1.0.0.dist-info/licenses/LICENSE +21 -0
  130. reaxkit-1.0.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,359 @@
1
+ """
2
+ molfra (molecular fragment) analysis utilities.
3
+
4
+ This module provides molecule-level and system-level analysis tools
5
+ for ReaxFF ``molfra.out`` and ``molfra_ig.out`` files via ``MolFraHandler``.
6
+
7
+ Typical use cases include:
8
+
9
+ - tracking molecular species counts over time
10
+ - converting molecule occurrence tables between wide and long formats
11
+ - extracting system totals (molecules, atoms, mass) versus iteration or time
12
+ - identifying and characterizing the largest (slab) molecule in the system
13
+ """
14
+
15
+
16
+ from __future__ import annotations
17
+ import re
18
+ import pandas as pd
19
+ from typing import Optional, Iterable, Dict, Sequence
20
+
21
+ from reaxkit.io.handlers.molfra_handler import MolFraHandler
22
+ from reaxkit.utils.media.convert import convert_xaxis
23
+
24
+ # =======================
25
+ # Molecule-level analysis
26
+ # =======================
27
+ def get_molfra_data_wide_format(
28
+ handler: MolFraHandler,
29
+ *,
30
+ molecules: Optional[Iterable[str]] = None,
31
+ iters: Optional[Sequence[int]] = None,
32
+ by_index: bool = False,
33
+ fill_value: int = 0,
34
+ ) -> pd.DataFrame:
35
+ """
36
+ Return molecule occurrence counts across iterations (wide format).
37
+
38
+ Works on
39
+ --------
40
+ MolFraHandler — ``molfra.out`` / ``molfra_ig.out``
41
+
42
+ Parameters
43
+ ----------
44
+ handler : MolFraHandler
45
+ Parsed molecular fragment handler.
46
+ molecules : iterable of str, optional
47
+ Molecular formulas to include (e.g. ``"H2O"``, ``"CO2"``).
48
+ If None, all detected molecules are included.
49
+ iters : sequence of int, optional
50
+ Iteration numbers to include.
51
+ by_index : bool, default=False
52
+ If True, interpret ``iters`` as indices into the unique iteration list.
53
+ fill_value : int, default=0
54
+ Value used when a requested molecule is absent at an iteration.
55
+
56
+ Returns
57
+ -------
58
+ pandas.DataFrame
59
+ Wide table with columns:
60
+ ``iter`` and one column per molecule containing occurrence counts.
61
+
62
+ Examples
63
+ --------
64
+ >>> df = get_molfra_data_wide_format(h, molecules=["H2O", "OH"], iters=[0, 100])
65
+ """
66
+ df = handler.dataframe().copy()
67
+ if df.empty:
68
+ cols = ["iter"] + (list(molecules) if molecules else [])
69
+ return pd.DataFrame(columns=cols)
70
+
71
+ # Filter molecules if requested
72
+ if molecules is not None:
73
+ df = df[df["molecular_formula"].isin(set(molecules))]
74
+
75
+ # Frame selection
76
+ if iters is not None:
77
+ if by_index:
78
+ uniq = sorted(df["iter"].unique().tolist())
79
+ chosen = [uniq[i] for i in iters if 0 <= i < len(uniq)]
80
+ df = df[df["iter"].isin(set(chosen))]
81
+ else:
82
+ df = df[df["iter"].isin(set(iters))]
83
+
84
+ # Pivot
85
+ pivot = (
86
+ df.pivot_table(
87
+ index="iter",
88
+ columns="molecular_formula",
89
+ values="freq",
90
+ aggfunc="max",
91
+ fill_value=fill_value,
92
+ )
93
+ .sort_index()
94
+ .reset_index()
95
+ )
96
+
97
+ # Ensure requested molecules exist
98
+ if molecules is not None:
99
+ for m in molecules:
100
+ if m not in pivot.columns:
101
+ pivot[m] = fill_value
102
+ pivot = pivot[["iter"] + list(molecules)]
103
+ else:
104
+ pivot = pivot[["iter"] + [c for c in pivot.columns if c != "iter"]]
105
+
106
+ return pivot
107
+
108
+
109
+ def get_molfra_data_long_format(
110
+ handler: MolFraHandler,
111
+ *,
112
+ molecules: Optional[Iterable[str]] = None,
113
+ iters: Optional[Sequence[int]] = None,
114
+ by_index: bool = False,
115
+ fill_value: int = 0,
116
+ ) -> pd.DataFrame:
117
+ """Return molecule occurrence counts across iterations (long format).
118
+
119
+ Works on
120
+ --------
121
+ MolFraHandler — ``molfra.out`` / ``molfra_ig.out``
122
+
123
+ Parameters
124
+ ----------
125
+ handler : MolFraHandler
126
+ Parsed molecular fragment handler.
127
+ molecules, iters, by_index, fill_value
128
+ Same meaning as in :func:`get_occurrences_wide`.
129
+
130
+ Returns
131
+ -------
132
+ pandas.DataFrame
133
+ Long-form table with columns:
134
+ ``iter``, ``molecular_formula``, ``freq``.
135
+
136
+ Examples
137
+ --------
138
+ >>> df = get_molfra_data_long_format(h, molecules=["H2O"])
139
+ """
140
+ wide = get_molfra_data_wide_format(
141
+ handler,
142
+ molecules=molecules,
143
+ iters=iters,
144
+ by_index=by_index,
145
+ fill_value=fill_value,
146
+ )
147
+ if wide.empty:
148
+ return pd.DataFrame(columns=["iter", "molecular_formula", "freq"])
149
+
150
+ long_df = (
151
+ wide.melt(id_vars="iter", var_name="molecular_formula", value_name="freq")
152
+ .sort_values(["iter", "molecular_formula"])
153
+ .reset_index(drop=True)
154
+ )
155
+ return long_df
156
+
157
+
158
+ def _qualifying_types(
159
+ handler: MolFraHandler,
160
+ *,
161
+ threshold: int = 3,
162
+ exclude_types: Optional[Iterable[str]] = ("Pt",),
163
+ ) -> list[str]:
164
+ """Return molecule types whose maximum count >= threshold. This filters out molecules with low appearance.
165
+ """
166
+ df = handler.dataframe()
167
+ if df.empty:
168
+ return []
169
+ if exclude_types:
170
+ df = df[~df["molecular_formula"].isin(set(exclude_types))]
171
+ grp = df.groupby("molecular_formula")["freq"].max()
172
+ return sorted(grp[grp >= threshold].index.tolist())
173
+
174
+
175
+ # ====================
176
+ # Totals-level analysis
177
+ # ====================
178
+ def get_molfra_totals_vs_axis(
179
+ handler: MolFraHandler,
180
+ *,
181
+ xaxis: str = "iter",
182
+ control_file: str = "control",
183
+ quantities: Optional[Iterable[str]] = ("total_molecules", "total_atoms", "total_molecular_mass"),
184
+ ) -> pd.DataFrame:
185
+ """Return system-level totals versus a chosen x-axis.
186
+
187
+ Works on
188
+ --------
189
+ MolFraHandler — ``molfra.out`` / ``molfra_ig.out``
190
+
191
+ Parameters
192
+ ----------
193
+ handler : MolFraHandler
194
+ Parsed molecular fragment handler with totals data available.
195
+ xaxis : {"iter", "frame", "time"}, default="iter"
196
+ X-axis to use. ``time`` conversion uses the control file.
197
+ control_file : str, default="control"
198
+ Path to the ReaxFF control file for time conversion.
199
+ quantities : iterable of str, optional
200
+ Totals to include (e.g. ``total_molecules``, ``total_atoms``,
201
+ ``total_molecular_mass``).
202
+
203
+ Returns
204
+ -------
205
+ pandas.DataFrame
206
+ Table with one column for the x-axis and one column per requested quantity.
207
+
208
+ Examples
209
+ --------
210
+ >>> df = get_molfra_totals_vs_axis(h, xaxis="time")
211
+ """
212
+ if not hasattr(handler, "_df_totals"):
213
+ raise AttributeError("Totals dataframe not found. Parse handler with updated version first.")
214
+
215
+ df = handler._df_totals.copy()
216
+ if df.empty:
217
+ return pd.DataFrame()
218
+
219
+ iters = df["iter"].to_numpy()
220
+ x_vals, xlabel = convert_xaxis(iters, xaxis, control_file=control_file)
221
+
222
+ # Prepare output
223
+ xcol = (xlabel.strip().lower()
224
+ .replace(" ", "_")
225
+ .replace("(", "")
226
+ .replace(")", "")) # e.g., "time_ps"
227
+ out_cols = [c for c in (quantities or []) if c in df.columns]
228
+ out = df[["iter"] + out_cols].copy()
229
+ if xaxis != "iter":
230
+ out.insert(0, xcol, x_vals)
231
+ else:
232
+ out.rename(columns={"iter": xcol}, inplace=True)
233
+ return out
234
+
235
+
236
+ # ============================================================================================
237
+ # the molecule type whose individual molecular mass is the highest = main slab
238
+ # ============================================================================================
239
+ def largest_molecule_by_individual_mass(
240
+ handler: MolFraHandler,
241
+ ) -> pd.DataFrame:
242
+ """Identify the molecule type with the largest individual mass at each iteration.
243
+
244
+ This is typically the main slab or backbone molecule.
245
+
246
+ Works on
247
+ --------
248
+ MolFraHandler — ``molfra.out`` / ``molfra_ig.out``
249
+
250
+ Parameters
251
+ ----------
252
+ handler : MolFraHandler
253
+ Parsed molecular fragment handler.
254
+
255
+ Returns
256
+ -------
257
+ pandas.DataFrame
258
+ Table with columns:
259
+ ``iter``, ``molecular_formula``, ``molecular_mass``.
260
+
261
+ Examples
262
+ --------
263
+ >>> df = largest_molecule_by_individual_mass(h)
264
+ """
265
+ df = handler.dataframe().copy()
266
+ if df.empty:
267
+ return pd.DataFrame(columns=["iter", "molecular_formula", "freq"])
268
+
269
+ # For each iter, select the molecule with the highest molecular mass
270
+ idx = df.groupby("iter")["molecular_mass"].idxmax()
271
+ df_max = df.loc[idx, ["iter", "molecular_formula", "molecular_mass"]].reset_index(drop=True)
272
+
273
+ return df_max.sort_values("iter").reset_index(drop=True)
274
+
275
+
276
+ def atoms_in_the_largest_molecule_wide_format(handler: MolFraHandler) -> pd.DataFrame:
277
+ """Return per-element atom counts for the largest molecule at each iteration (wide format).
278
+
279
+ Works on
280
+ --------
281
+ MolFraHandler — ``molfra.out`` / ``molfra_ig.out``
282
+
283
+ Parameters
284
+ ----------
285
+ handler : MolFraHandler
286
+ Parsed molecular fragment handler.
287
+
288
+ Returns
289
+ -------
290
+ pandas.DataFrame
291
+ Wide table with columns:
292
+ ``iter`` and one column per element symbol (e.g. ``Al``, ``N``, ``O``),
293
+ containing atom counts.
294
+
295
+ Examples
296
+ --------
297
+ >>> df = atoms_in_the_largest_molecule_wide_format(h)
298
+ """
299
+ # Get largest molecule per iter
300
+ df_largest = largest_molecule_by_individual_mass(handler)
301
+ if df_largest.empty:
302
+ return pd.DataFrame(columns=["iter"])
303
+
304
+ rows = []
305
+ all_elems = set()
306
+
307
+ for _, r in df_largest.iterrows():
308
+ it = int(r["iter"])
309
+ formula = str(r["molecular_formula"])
310
+ pairs = re.findall(r"([A-Z][a-z]*)(\d+)", formula)
311
+
312
+ # per-iter element->count
313
+ elem_counts: Dict[str, int] = {"iter": it}
314
+ for elem, cnt in pairs:
315
+ cnt_i = int(cnt)
316
+ elem_counts[elem] = elem_counts.get(elem, 0) + cnt_i
317
+ all_elems.add(elem)
318
+
319
+ rows.append(elem_counts)
320
+
321
+ # Build wide, ensure all elements present, fill missing with 0
322
+ wide = pd.DataFrame(rows).sort_values("iter").reset_index(drop=True)
323
+ for elem in sorted(all_elems):
324
+ if elem not in wide.columns:
325
+ wide[elem] = 0
326
+
327
+ # Order columns: iter first, then alphabetical elements
328
+ cols = ["iter"] + sorted([c for c in wide.columns if c != "iter"])
329
+ return wide[cols]
330
+
331
+
332
+ def atoms_in_the_largest_molecule_long_format(handler: MolFraHandler) -> pd.DataFrame:
333
+ """Return per-element atom counts for the largest molecule at each iteration (long format).
334
+
335
+ Works on
336
+ --------
337
+ MolFraHandler — ``molfra.out`` / ``molfra_ig.out``
338
+
339
+ Parameters
340
+ ----------
341
+ handler : MolFraHandler
342
+ Parsed molecular fragment handler.
343
+
344
+ Returns
345
+ -------
346
+ pandas.DataFrame
347
+ Long-form table with columns:
348
+ ``iter``, ``element``, ``freq``.
349
+
350
+ Examples
351
+ --------
352
+ >>> df = atoms_in_the_largest_molecule_long_format(h)
353
+ """
354
+ wide = atoms_in_the_largest_molecule_wide_format(handler)
355
+ if wide.empty:
356
+ return pd.DataFrame(columns=["iter", "element", "freq"])
357
+ return wide.melt(id_vars="iter", var_name="element", value_name="freq") \
358
+ .sort_values(["iter", "element"]).reset_index(drop=True)
359
+
@@ -0,0 +1,258 @@
1
+ """
2
+ params (tunable-parameter list) analysis utilities.
3
+
4
+ This module provides helpers for working with ReaxFF ``params`` files via
5
+ ``ParamsHandler``, and optionally interpreting each params entry as a pointer
6
+ into the corresponding ``ffield`` section via ``FFieldHandler``.
7
+
8
+ Typical use cases include:
9
+
10
+ - loading params tables with optional duplicate removal and sorting
11
+ - translating (ff_section, ff_section_line, ff_parameter) into an ffield parameter name/value
12
+ - attaching human-readable interaction labels (e.g., C-H, C-C-C) when available
13
+ """
14
+
15
+
16
+ from __future__ import annotations
17
+
18
+ import pandas as pd
19
+ from typing import Dict, List, Tuple
20
+
21
+ from reaxkit.io.handlers.params_handler import ParamsHandler
22
+ from reaxkit.io.handlers.ffield_handler import FFieldHandler
23
+ from reaxkit.analysis.per_file.ffield_analyzer import interpret_one_section
24
+
25
+ def get_params_data(
26
+ handler: ParamsHandler,
27
+ *,
28
+ sort_by: str | None = None,
29
+ ascending: bool = True,
30
+ drop_duplicate: bool = True
31
+ ) -> pd.DataFrame:
32
+ """
33
+ Retrieve params entries as a DataFrame with optional sorting and de-duplication.
34
+
35
+ Works on
36
+ --------
37
+ ParamsHandler — ``params`` / ``params.in``
38
+
39
+ Parameters
40
+ ----------
41
+ handler : ParamsHandler
42
+ Parsed params handler.
43
+ sort_by : str, optional
44
+ Column name to sort by (e.g. ``ff_section``, ``min_value``, ``max_value``).
45
+ If None, rows are returned in file order.
46
+ ascending : bool, default=True
47
+ Sort order when ``sort_by`` is specified.
48
+ drop_duplicate : bool, default=True
49
+ If True, drop duplicate rows by ``(ff_section, ff_section_line, ff_parameter)``,
50
+ keeping the first occurrence.
51
+
52
+ Returns
53
+ -------
54
+ pandas.DataFrame
55
+ Params table with columns such as:
56
+ ``ff_section``, ``ff_section_line``, ``ff_parameter``,
57
+ ``search_interval``, ``min_value``, ``max_value``, ``inline_comment``.
58
+
59
+ Examples
60
+ --------
61
+ >>> from reaxkit.io.handlers.params_handler import ParamsHandler
62
+ >>> from reaxkit.analysis.per_file.params_analyzer import get_params_data
63
+ >>> h = ParamsHandler("params")
64
+ >>> df = get_params_data(h, drop_duplicate=True)
65
+ """
66
+ df = handler.dataframe().copy()
67
+
68
+ if drop_duplicate:
69
+ df = df.drop_duplicates(
70
+ subset=["ff_section", "ff_section_line", "ff_parameter"],
71
+ keep="first"
72
+ )
73
+
74
+ if sort_by:
75
+ if sort_by not in df.columns:
76
+ raise ValueError(
77
+ f"'sort_by' must be one of {list(df.columns)}, got {sort_by!r}"
78
+ )
79
+ df = df.sort_values(by=sort_by, ascending=ascending)
80
+
81
+ return df
82
+
83
+ ###############################################################################
84
+ # A “interpreter which translates a line like
85
+ # 3 49 1 1.0000 45.0 180.0
86
+ # bond data (because of ff_section = 3),
87
+ # line number 49 in that section,
88
+ # the first paramter in that line.
89
+ # These are all based on the ffield data.
90
+ ###############################################################################
91
+
92
+ # ff_section number → canonical ffield section key + friendly name
93
+ _SECTION_NUM_MAP: Dict[int, Tuple[str, str]] = {
94
+ 1: (FFieldHandler.SECTION_GENERAL, "general"),
95
+ 2: (FFieldHandler.SECTION_ATOM, "atom"),
96
+ 3: (FFieldHandler.SECTION_BOND, "bond"),
97
+ 4: (FFieldHandler.SECTION_OFF_DIAGONAL, "off_diagonal"),
98
+ 5: (FFieldHandler.SECTION_ANGLE, "angle"),
99
+ 6: (FFieldHandler.SECTION_TORSION, "torsion"),
100
+ 7: (FFieldHandler.SECTION_HBOND, "hbond"),
101
+ }
102
+
103
+
104
+ # Which columns in each section are "index/identity" (NOT tunable parameters)
105
+ # Everything else (in original df column order) is treated as "parameter columns".
106
+ _SECTION_INDEX_COLS: Dict[str, List[str]] = {
107
+ FFieldHandler.SECTION_GENERAL: [],
108
+ FFieldHandler.SECTION_ATOM: ["symbol"], # adjust if your atom df has other identity cols
109
+ FFieldHandler.SECTION_BOND: ["i", "j"],
110
+ FFieldHandler.SECTION_OFF_DIAGONAL: ["i", "j"],
111
+ FFieldHandler.SECTION_ANGLE: ["i", "j", "k"],
112
+ FFieldHandler.SECTION_TORSION: ["i", "j", "k", "l"],
113
+ FFieldHandler.SECTION_HBOND: ["i", "j", "k"],
114
+ }
115
+
116
+
117
+ def _param_columns_for_section(sec_df: pd.DataFrame, section_key: str) -> List[str]:
118
+ """
119
+ Return the ordered list of parameter columns for a given ffield section df.
120
+ We treat all non-index columns (based on _SECTION_INDEX_COLS) as tunable parameters.
121
+ """
122
+ idx_cols = set(_SECTION_INDEX_COLS.get(section_key, []))
123
+ return [c for c in sec_df.columns if c not in idx_cols]
124
+
125
+
126
+ def interpret_params(
127
+ params_handler: ParamsHandler,
128
+ ffield_handler: FFieldHandler,
129
+ *,
130
+ add_term: bool = True,
131
+ sep: str = "-",
132
+ ) -> pd.DataFrame:
133
+ """
134
+ Interpret each params row as a pointer into the corresponding ffield section.
135
+
136
+ Each params entry points to an ffield value using:
137
+
138
+ - ``ff_section``: section number (1..7 → general, atom, bond, off-diagonal, angle, torsion, hbond)
139
+ - ``ff_section_line``: 1-based row number within that ffield section
140
+ - ``ff_parameter``: 1-based index of the tunable parameter within that row
141
+
142
+ Works on
143
+ --------
144
+ ParamsHandler + FFieldHandler — ``params`` + ``ffield``
145
+
146
+ Parameters
147
+ ----------
148
+ params_handler : ParamsHandler
149
+ Parsed params handler.
150
+ ffield_handler : FFieldHandler
151
+ Parsed ffield handler.
152
+ add_term : bool, default=True
153
+ If True, include a human-readable interaction label (``term``) for
154
+ multi-body sections when available (e.g. ``C-H``, ``C-C-C``).
155
+ sep : str, default="-"
156
+ Separator used for building ``term`` labels.
157
+
158
+ Returns
159
+ -------
160
+ pandas.DataFrame
161
+ Interpreted params table including the original params fields plus:
162
+ - ``ffield_section_key`` and ``ffield_section_name``
163
+ - ``ffield_row_index`` (0-based row index)
164
+ - ``ffield_param_name`` (parameter column name in ffield)
165
+ - ``ffield_value`` (current value from ffield)
166
+ - ``term`` (optional interaction label)
167
+
168
+ Examples
169
+ --------
170
+ >>> from reaxkit.io.handlers.params_handler import ParamsHandler
171
+ >>> from reaxkit.io.handlers.ffield_handler import FFieldHandler
172
+ >>> from reaxkit.analysis.per_file.params_analyzer import interpret_params
173
+ >>> p = ParamsHandler("params")
174
+ >>> f = FFieldHandler("ffield")
175
+ >>> df = interpret_params(p, f, add_term=True)
176
+ """
177
+ p = params_handler.dataframe().copy()
178
+
179
+ out_rows: List[Dict[str, object]] = []
180
+
181
+ # Cache section dfs (optionally interpreted w/ symbols)
182
+ sec_cache: Dict[str, pd.DataFrame] = {}
183
+
184
+ for r in p.itertuples(index=False):
185
+ sec_num = int(getattr(r, "ff_section"))
186
+ line_1b = int(getattr(r, "ff_section_line"))
187
+ par_1b = int(getattr(r, "ff_parameter"))
188
+
189
+ if sec_num not in _SECTION_NUM_MAP:
190
+ raise ValueError(f"Unknown ff_section={sec_num}. Expected 1..7.")
191
+
192
+ section_key, section_name = _SECTION_NUM_MAP[sec_num]
193
+
194
+ # Load section df (and optionally add term)
195
+ if section_key not in sec_cache:
196
+ base_df = ffield_handler.section_df(section_key).copy()
197
+ if add_term and section_key in {
198
+ FFieldHandler.SECTION_BOND,
199
+ FFieldHandler.SECTION_OFF_DIAGONAL,
200
+ FFieldHandler.SECTION_ANGLE,
201
+ FFieldHandler.SECTION_TORSION,
202
+ FFieldHandler.SECTION_HBOND,
203
+ }:
204
+ # adds i_symbol/j_symbol/... and 'term'
205
+ base_df = interpret_one_section(ffield_handler, section=section_name, sep=sep)
206
+ sec_cache[section_key] = base_df
207
+
208
+ sec_df = sec_cache[section_key]
209
+
210
+ row_idx = line_1b - 1
211
+ if row_idx < 0 or row_idx >= len(sec_df):
212
+ raise IndexError(
213
+ f"params points to {section_name} line {line_1b}, "
214
+ f"but section has {len(sec_df)} rows."
215
+ )
216
+
217
+ param_cols = _param_columns_for_section(sec_df, section_key)
218
+
219
+ # Safety: allow params that point to "term" or "*_symbol" columns only if user wants that
220
+ # By default, those are NOT included because they're not in _SECTION_INDEX_COLS,
221
+ # so we exclude them explicitly here.
222
+ param_cols = [c for c in param_cols if not (c.endswith("_symbol") or c == "term")]
223
+
224
+ par_idx = par_1b - 1
225
+ if par_idx < 0 or par_idx >= len(param_cols):
226
+ raise IndexError(
227
+ f"params points to {section_name} parameter {par_1b}, "
228
+ f"but only {len(param_cols)} parameter columns exist: {param_cols}"
229
+ )
230
+
231
+ param_name = param_cols[par_idx]
232
+ row = sec_df.iloc[row_idx]
233
+ val = row[param_name]
234
+
235
+ term = row.get("term") if add_term else None
236
+
237
+ out_rows.append(
238
+ {
239
+ # original params fields
240
+ "ff_section": sec_num,
241
+ "ff_section_line": line_1b,
242
+ "ff_parameter": par_1b,
243
+ "search_interval": getattr(r, "search_interval"),
244
+ "min_value": getattr(r, "min_value"),
245
+ "max_value": getattr(r, "max_value"),
246
+ "inline_comment": getattr(r, "inline_comment"),
247
+
248
+ # interpreted fields
249
+ "ffield_section_key": section_key,
250
+ "ffield_section_name": section_name,
251
+ "ffield_row_index": row_idx, # 0-based
252
+ "ffield_param_name": param_name,
253
+ "ffield_value": val,
254
+ "term": term,
255
+ }
256
+ )
257
+
258
+ return pd.DataFrame(out_rows)
@@ -0,0 +1,84 @@
1
+ """
2
+ summary.txt analysis utilities.
3
+
4
+ This module provides helper functions for accessing scalar, per-frame
5
+ summary quantities written by ReaxFF into ``summary.txt`` files via
6
+ ``SummaryHandler``.
7
+
8
+ Typical use cases include:
9
+
10
+ - extracting a single summary quantity (e.g. potential energy) as a time series
11
+ - selecting subsets of frames for post-processing or plotting
12
+ - working with canonical column names and legacy aliases transparently
13
+ """
14
+
15
+
16
+ from __future__ import annotations
17
+ from typing import Optional, Sequence, Union
18
+ import pandas as pd
19
+
20
+ from reaxkit.io.handlers.summary_handler import SummaryHandler
21
+ from reaxkit.utils.alias import _resolve_alias, available_keys, normalize_choice
22
+
23
+ __all__ = ["get_summary_data"]
24
+
25
+
26
+ def get_summary_data(
27
+ handler: SummaryHandler,
28
+ feature: str,
29
+ frames: Optional[Union[slice, Sequence[int]]] = None,
30
+ ) -> pd.Series:
31
+ """Extract a single summary quantity from ``summary.txt`` as a pandas Series.
32
+
33
+ Canonical column names (e.g. ``E_pot``) and legacy aliases
34
+ (e.g. ``Epot(kcal/mol)``) are both supported.
35
+
36
+ Works on
37
+ --------
38
+ SummaryHandler — ``summary.txt``
39
+
40
+ Parameters
41
+ ----------
42
+ handler : SummaryHandler
43
+ Parsed ``summary.txt`` handler.
44
+ feature : str
45
+ Name or alias of the summary quantity to extract.
46
+ frames : slice or sequence of int, optional
47
+ Frame indices to include. If None, all frames are returned.
48
+
49
+ Returns
50
+ -------
51
+ pandas.Series
52
+ Series containing the requested summary quantity, indexed by frame.
53
+
54
+ Examples
55
+ --------
56
+ >>> from reaxkit.io.handlers.summary_handler import SummaryHandler
57
+ >>> from reaxkit.analysis.per_file.summary_analyzer import get_summary_data
58
+ >>> h = SummaryHandler("summary.txt")
59
+ >>> epot = get_summary_data(h, "E_pot")
60
+ >>> epot_head = get_summary_data(h, "Epot(kcal/mol)", frames=slice(0, 10))
61
+ """
62
+ # Map legacy -> canonical (e.g., "Epot(kcal/mol)" -> "E_pot")
63
+ canonical = normalize_choice(feature)
64
+
65
+ # Resolve against dataframe columns
66
+ try:
67
+ col = _resolve_alias(handler, canonical)
68
+ except KeyError:
69
+ # Fallback: case-insensitive direct match
70
+ cols_lower = {c.lower(): c for c in handler.dataframe().columns}
71
+ direct = cols_lower.get(feature.strip().lower())
72
+ if direct is None:
73
+ raise KeyError(
74
+ f"Column '{feature}' not found. "
75
+ f"Try one of: {available_keys(handler.dataframe().columns)}"
76
+ )
77
+ col = direct
78
+
79
+ s = handler.dataframe()[col]
80
+ if frames is None:
81
+ return s.copy()
82
+ if isinstance(frames, slice):
83
+ return s.iloc[frames].copy()
84
+ return s.iloc[list(frames)].copy()