reaxkit 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- reaxkit/__init__.py +0 -0
- reaxkit/analysis/__init__.py +0 -0
- reaxkit/analysis/composed/RDF_analyzer.py +560 -0
- reaxkit/analysis/composed/__init__.py +0 -0
- reaxkit/analysis/composed/connectivity_analyzer.py +706 -0
- reaxkit/analysis/composed/coordination_analyzer.py +144 -0
- reaxkit/analysis/composed/electrostatics_analyzer.py +687 -0
- reaxkit/analysis/per_file/__init__.py +0 -0
- reaxkit/analysis/per_file/control_analyzer.py +165 -0
- reaxkit/analysis/per_file/eregime_analyzer.py +108 -0
- reaxkit/analysis/per_file/ffield_analyzer.py +305 -0
- reaxkit/analysis/per_file/fort13_analyzer.py +79 -0
- reaxkit/analysis/per_file/fort57_analyzer.py +106 -0
- reaxkit/analysis/per_file/fort73_analyzer.py +61 -0
- reaxkit/analysis/per_file/fort74_analyzer.py +65 -0
- reaxkit/analysis/per_file/fort76_analyzer.py +191 -0
- reaxkit/analysis/per_file/fort78_analyzer.py +154 -0
- reaxkit/analysis/per_file/fort79_analyzer.py +83 -0
- reaxkit/analysis/per_file/fort7_analyzer.py +393 -0
- reaxkit/analysis/per_file/fort99_analyzer.py +411 -0
- reaxkit/analysis/per_file/molfra_analyzer.py +359 -0
- reaxkit/analysis/per_file/params_analyzer.py +258 -0
- reaxkit/analysis/per_file/summary_analyzer.py +84 -0
- reaxkit/analysis/per_file/trainset_analyzer.py +84 -0
- reaxkit/analysis/per_file/vels_analyzer.py +95 -0
- reaxkit/analysis/per_file/xmolout_analyzer.py +528 -0
- reaxkit/cli.py +181 -0
- reaxkit/count_loc.py +276 -0
- reaxkit/data/alias.yaml +89 -0
- reaxkit/data/constants.yaml +27 -0
- reaxkit/data/reaxff_input_files_contents.yaml +186 -0
- reaxkit/data/reaxff_output_files_contents.yaml +301 -0
- reaxkit/data/units.yaml +38 -0
- reaxkit/help/__init__.py +0 -0
- reaxkit/help/help_index_loader.py +531 -0
- reaxkit/help/introspection_utils.py +131 -0
- reaxkit/io/__init__.py +0 -0
- reaxkit/io/base_handler.py +165 -0
- reaxkit/io/generators/__init__.py +0 -0
- reaxkit/io/generators/control_generator.py +123 -0
- reaxkit/io/generators/eregime_generator.py +341 -0
- reaxkit/io/generators/geo_generator.py +967 -0
- reaxkit/io/generators/trainset_generator.py +1758 -0
- reaxkit/io/generators/tregime_generator.py +113 -0
- reaxkit/io/generators/vregime_generator.py +164 -0
- reaxkit/io/generators/xmolout_generator.py +304 -0
- reaxkit/io/handlers/__init__.py +0 -0
- reaxkit/io/handlers/control_handler.py +209 -0
- reaxkit/io/handlers/eregime_handler.py +122 -0
- reaxkit/io/handlers/ffield_handler.py +812 -0
- reaxkit/io/handlers/fort13_handler.py +123 -0
- reaxkit/io/handlers/fort57_handler.py +143 -0
- reaxkit/io/handlers/fort73_handler.py +145 -0
- reaxkit/io/handlers/fort74_handler.py +155 -0
- reaxkit/io/handlers/fort76_handler.py +195 -0
- reaxkit/io/handlers/fort78_handler.py +142 -0
- reaxkit/io/handlers/fort79_handler.py +227 -0
- reaxkit/io/handlers/fort7_handler.py +264 -0
- reaxkit/io/handlers/fort99_handler.py +128 -0
- reaxkit/io/handlers/geo_handler.py +224 -0
- reaxkit/io/handlers/molfra_handler.py +184 -0
- reaxkit/io/handlers/params_handler.py +137 -0
- reaxkit/io/handlers/summary_handler.py +135 -0
- reaxkit/io/handlers/trainset_handler.py +658 -0
- reaxkit/io/handlers/vels_handler.py +293 -0
- reaxkit/io/handlers/xmolout_handler.py +174 -0
- reaxkit/utils/__init__.py +0 -0
- reaxkit/utils/alias.py +219 -0
- reaxkit/utils/cache.py +77 -0
- reaxkit/utils/constants.py +75 -0
- reaxkit/utils/equation_of_states.py +96 -0
- reaxkit/utils/exceptions.py +27 -0
- reaxkit/utils/frame_utils.py +175 -0
- reaxkit/utils/log.py +43 -0
- reaxkit/utils/media/__init__.py +0 -0
- reaxkit/utils/media/convert.py +90 -0
- reaxkit/utils/media/make_video.py +91 -0
- reaxkit/utils/media/plotter.py +812 -0
- reaxkit/utils/numerical/__init__.py +0 -0
- reaxkit/utils/numerical/extrema_finder.py +96 -0
- reaxkit/utils/numerical/moving_average.py +103 -0
- reaxkit/utils/numerical/numerical_calcs.py +75 -0
- reaxkit/utils/numerical/signal_ops.py +135 -0
- reaxkit/utils/path.py +55 -0
- reaxkit/utils/units.py +104 -0
- reaxkit/webui/__init__.py +0 -0
- reaxkit/webui/app.py +0 -0
- reaxkit/webui/components.py +0 -0
- reaxkit/webui/layouts.py +0 -0
- reaxkit/webui/utils.py +0 -0
- reaxkit/workflows/__init__.py +0 -0
- reaxkit/workflows/composed/__init__.py +0 -0
- reaxkit/workflows/composed/coordination_workflow.py +393 -0
- reaxkit/workflows/composed/electrostatics_workflow.py +587 -0
- reaxkit/workflows/composed/xmolout_fort7_workflow.py +343 -0
- reaxkit/workflows/meta/__init__.py +0 -0
- reaxkit/workflows/meta/help_workflow.py +136 -0
- reaxkit/workflows/meta/introspection_workflow.py +235 -0
- reaxkit/workflows/meta/make_video_workflow.py +61 -0
- reaxkit/workflows/meta/plotter_workflow.py +601 -0
- reaxkit/workflows/per_file/__init__.py +0 -0
- reaxkit/workflows/per_file/control_workflow.py +110 -0
- reaxkit/workflows/per_file/eregime_workflow.py +267 -0
- reaxkit/workflows/per_file/ffield_workflow.py +390 -0
- reaxkit/workflows/per_file/fort13_workflow.py +86 -0
- reaxkit/workflows/per_file/fort57_workflow.py +137 -0
- reaxkit/workflows/per_file/fort73_workflow.py +151 -0
- reaxkit/workflows/per_file/fort74_workflow.py +88 -0
- reaxkit/workflows/per_file/fort76_workflow.py +188 -0
- reaxkit/workflows/per_file/fort78_workflow.py +135 -0
- reaxkit/workflows/per_file/fort79_workflow.py +314 -0
- reaxkit/workflows/per_file/fort7_workflow.py +592 -0
- reaxkit/workflows/per_file/fort83_workflow.py +60 -0
- reaxkit/workflows/per_file/fort99_workflow.py +223 -0
- reaxkit/workflows/per_file/geo_workflow.py +554 -0
- reaxkit/workflows/per_file/molfra_workflow.py +577 -0
- reaxkit/workflows/per_file/params_workflow.py +135 -0
- reaxkit/workflows/per_file/summary_workflow.py +161 -0
- reaxkit/workflows/per_file/trainset_workflow.py +356 -0
- reaxkit/workflows/per_file/tregime_workflow.py +79 -0
- reaxkit/workflows/per_file/vels_workflow.py +309 -0
- reaxkit/workflows/per_file/vregime_workflow.py +75 -0
- reaxkit/workflows/per_file/xmolout_workflow.py +678 -0
- reaxkit-1.0.0.dist-info/METADATA +128 -0
- reaxkit-1.0.0.dist-info/RECORD +130 -0
- reaxkit-1.0.0.dist-info/WHEEL +5 -0
- reaxkit-1.0.0.dist-info/entry_points.txt +2 -0
- reaxkit-1.0.0.dist-info/licenses/AUTHORS.md +20 -0
- reaxkit-1.0.0.dist-info/licenses/LICENSE +21 -0
- reaxkit-1.0.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,359 @@
|
|
|
1
|
+
"""
|
|
2
|
+
molfra (molecular fragment) analysis utilities.
|
|
3
|
+
|
|
4
|
+
This module provides molecule-level and system-level analysis tools
|
|
5
|
+
for ReaxFF ``molfra.out`` and ``molfra_ig.out`` files via ``MolFraHandler``.
|
|
6
|
+
|
|
7
|
+
Typical use cases include:
|
|
8
|
+
|
|
9
|
+
- tracking molecular species counts over time
|
|
10
|
+
- converting molecule occurrence tables between wide and long formats
|
|
11
|
+
- extracting system totals (molecules, atoms, mass) versus iteration or time
|
|
12
|
+
- identifying and characterizing the largest (slab) molecule in the system
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
import re
|
|
18
|
+
import pandas as pd
|
|
19
|
+
from typing import Optional, Iterable, Dict, Sequence
|
|
20
|
+
|
|
21
|
+
from reaxkit.io.handlers.molfra_handler import MolFraHandler
|
|
22
|
+
from reaxkit.utils.media.convert import convert_xaxis
|
|
23
|
+
|
|
24
|
+
# =======================
|
|
25
|
+
# Molecule-level analysis
|
|
26
|
+
# =======================
|
|
27
|
+
def get_molfra_data_wide_format(
|
|
28
|
+
handler: MolFraHandler,
|
|
29
|
+
*,
|
|
30
|
+
molecules: Optional[Iterable[str]] = None,
|
|
31
|
+
iters: Optional[Sequence[int]] = None,
|
|
32
|
+
by_index: bool = False,
|
|
33
|
+
fill_value: int = 0,
|
|
34
|
+
) -> pd.DataFrame:
|
|
35
|
+
"""
|
|
36
|
+
Return molecule occurrence counts across iterations (wide format).
|
|
37
|
+
|
|
38
|
+
Works on
|
|
39
|
+
--------
|
|
40
|
+
MolFraHandler — ``molfra.out`` / ``molfra_ig.out``
|
|
41
|
+
|
|
42
|
+
Parameters
|
|
43
|
+
----------
|
|
44
|
+
handler : MolFraHandler
|
|
45
|
+
Parsed molecular fragment handler.
|
|
46
|
+
molecules : iterable of str, optional
|
|
47
|
+
Molecular formulas to include (e.g. ``"H2O"``, ``"CO2"``).
|
|
48
|
+
If None, all detected molecules are included.
|
|
49
|
+
iters : sequence of int, optional
|
|
50
|
+
Iteration numbers to include.
|
|
51
|
+
by_index : bool, default=False
|
|
52
|
+
If True, interpret ``iters`` as indices into the unique iteration list.
|
|
53
|
+
fill_value : int, default=0
|
|
54
|
+
Value used when a requested molecule is absent at an iteration.
|
|
55
|
+
|
|
56
|
+
Returns
|
|
57
|
+
-------
|
|
58
|
+
pandas.DataFrame
|
|
59
|
+
Wide table with columns:
|
|
60
|
+
``iter`` and one column per molecule containing occurrence counts.
|
|
61
|
+
|
|
62
|
+
Examples
|
|
63
|
+
--------
|
|
64
|
+
>>> df = get_molfra_data_wide_format(h, molecules=["H2O", "OH"], iters=[0, 100])
|
|
65
|
+
"""
|
|
66
|
+
df = handler.dataframe().copy()
|
|
67
|
+
if df.empty:
|
|
68
|
+
cols = ["iter"] + (list(molecules) if molecules else [])
|
|
69
|
+
return pd.DataFrame(columns=cols)
|
|
70
|
+
|
|
71
|
+
# Filter molecules if requested
|
|
72
|
+
if molecules is not None:
|
|
73
|
+
df = df[df["molecular_formula"].isin(set(molecules))]
|
|
74
|
+
|
|
75
|
+
# Frame selection
|
|
76
|
+
if iters is not None:
|
|
77
|
+
if by_index:
|
|
78
|
+
uniq = sorted(df["iter"].unique().tolist())
|
|
79
|
+
chosen = [uniq[i] for i in iters if 0 <= i < len(uniq)]
|
|
80
|
+
df = df[df["iter"].isin(set(chosen))]
|
|
81
|
+
else:
|
|
82
|
+
df = df[df["iter"].isin(set(iters))]
|
|
83
|
+
|
|
84
|
+
# Pivot
|
|
85
|
+
pivot = (
|
|
86
|
+
df.pivot_table(
|
|
87
|
+
index="iter",
|
|
88
|
+
columns="molecular_formula",
|
|
89
|
+
values="freq",
|
|
90
|
+
aggfunc="max",
|
|
91
|
+
fill_value=fill_value,
|
|
92
|
+
)
|
|
93
|
+
.sort_index()
|
|
94
|
+
.reset_index()
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
# Ensure requested molecules exist
|
|
98
|
+
if molecules is not None:
|
|
99
|
+
for m in molecules:
|
|
100
|
+
if m not in pivot.columns:
|
|
101
|
+
pivot[m] = fill_value
|
|
102
|
+
pivot = pivot[["iter"] + list(molecules)]
|
|
103
|
+
else:
|
|
104
|
+
pivot = pivot[["iter"] + [c for c in pivot.columns if c != "iter"]]
|
|
105
|
+
|
|
106
|
+
return pivot
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def get_molfra_data_long_format(
|
|
110
|
+
handler: MolFraHandler,
|
|
111
|
+
*,
|
|
112
|
+
molecules: Optional[Iterable[str]] = None,
|
|
113
|
+
iters: Optional[Sequence[int]] = None,
|
|
114
|
+
by_index: bool = False,
|
|
115
|
+
fill_value: int = 0,
|
|
116
|
+
) -> pd.DataFrame:
|
|
117
|
+
"""Return molecule occurrence counts across iterations (long format).
|
|
118
|
+
|
|
119
|
+
Works on
|
|
120
|
+
--------
|
|
121
|
+
MolFraHandler — ``molfra.out`` / ``molfra_ig.out``
|
|
122
|
+
|
|
123
|
+
Parameters
|
|
124
|
+
----------
|
|
125
|
+
handler : MolFraHandler
|
|
126
|
+
Parsed molecular fragment handler.
|
|
127
|
+
molecules, iters, by_index, fill_value
|
|
128
|
+
Same meaning as in :func:`get_occurrences_wide`.
|
|
129
|
+
|
|
130
|
+
Returns
|
|
131
|
+
-------
|
|
132
|
+
pandas.DataFrame
|
|
133
|
+
Long-form table with columns:
|
|
134
|
+
``iter``, ``molecular_formula``, ``freq``.
|
|
135
|
+
|
|
136
|
+
Examples
|
|
137
|
+
--------
|
|
138
|
+
>>> df = get_molfra_data_long_format(h, molecules=["H2O"])
|
|
139
|
+
"""
|
|
140
|
+
wide = get_molfra_data_wide_format(
|
|
141
|
+
handler,
|
|
142
|
+
molecules=molecules,
|
|
143
|
+
iters=iters,
|
|
144
|
+
by_index=by_index,
|
|
145
|
+
fill_value=fill_value,
|
|
146
|
+
)
|
|
147
|
+
if wide.empty:
|
|
148
|
+
return pd.DataFrame(columns=["iter", "molecular_formula", "freq"])
|
|
149
|
+
|
|
150
|
+
long_df = (
|
|
151
|
+
wide.melt(id_vars="iter", var_name="molecular_formula", value_name="freq")
|
|
152
|
+
.sort_values(["iter", "molecular_formula"])
|
|
153
|
+
.reset_index(drop=True)
|
|
154
|
+
)
|
|
155
|
+
return long_df
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
def _qualifying_types(
|
|
159
|
+
handler: MolFraHandler,
|
|
160
|
+
*,
|
|
161
|
+
threshold: int = 3,
|
|
162
|
+
exclude_types: Optional[Iterable[str]] = ("Pt",),
|
|
163
|
+
) -> list[str]:
|
|
164
|
+
"""Return molecule types whose maximum count >= threshold. This filters out molecules with low appearance.
|
|
165
|
+
"""
|
|
166
|
+
df = handler.dataframe()
|
|
167
|
+
if df.empty:
|
|
168
|
+
return []
|
|
169
|
+
if exclude_types:
|
|
170
|
+
df = df[~df["molecular_formula"].isin(set(exclude_types))]
|
|
171
|
+
grp = df.groupby("molecular_formula")["freq"].max()
|
|
172
|
+
return sorted(grp[grp >= threshold].index.tolist())
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
# ====================
|
|
176
|
+
# Totals-level analysis
|
|
177
|
+
# ====================
|
|
178
|
+
def get_molfra_totals_vs_axis(
|
|
179
|
+
handler: MolFraHandler,
|
|
180
|
+
*,
|
|
181
|
+
xaxis: str = "iter",
|
|
182
|
+
control_file: str = "control",
|
|
183
|
+
quantities: Optional[Iterable[str]] = ("total_molecules", "total_atoms", "total_molecular_mass"),
|
|
184
|
+
) -> pd.DataFrame:
|
|
185
|
+
"""Return system-level totals versus a chosen x-axis.
|
|
186
|
+
|
|
187
|
+
Works on
|
|
188
|
+
--------
|
|
189
|
+
MolFraHandler — ``molfra.out`` / ``molfra_ig.out``
|
|
190
|
+
|
|
191
|
+
Parameters
|
|
192
|
+
----------
|
|
193
|
+
handler : MolFraHandler
|
|
194
|
+
Parsed molecular fragment handler with totals data available.
|
|
195
|
+
xaxis : {"iter", "frame", "time"}, default="iter"
|
|
196
|
+
X-axis to use. ``time`` conversion uses the control file.
|
|
197
|
+
control_file : str, default="control"
|
|
198
|
+
Path to the ReaxFF control file for time conversion.
|
|
199
|
+
quantities : iterable of str, optional
|
|
200
|
+
Totals to include (e.g. ``total_molecules``, ``total_atoms``,
|
|
201
|
+
``total_molecular_mass``).
|
|
202
|
+
|
|
203
|
+
Returns
|
|
204
|
+
-------
|
|
205
|
+
pandas.DataFrame
|
|
206
|
+
Table with one column for the x-axis and one column per requested quantity.
|
|
207
|
+
|
|
208
|
+
Examples
|
|
209
|
+
--------
|
|
210
|
+
>>> df = get_molfra_totals_vs_axis(h, xaxis="time")
|
|
211
|
+
"""
|
|
212
|
+
if not hasattr(handler, "_df_totals"):
|
|
213
|
+
raise AttributeError("Totals dataframe not found. Parse handler with updated version first.")
|
|
214
|
+
|
|
215
|
+
df = handler._df_totals.copy()
|
|
216
|
+
if df.empty:
|
|
217
|
+
return pd.DataFrame()
|
|
218
|
+
|
|
219
|
+
iters = df["iter"].to_numpy()
|
|
220
|
+
x_vals, xlabel = convert_xaxis(iters, xaxis, control_file=control_file)
|
|
221
|
+
|
|
222
|
+
# Prepare output
|
|
223
|
+
xcol = (xlabel.strip().lower()
|
|
224
|
+
.replace(" ", "_")
|
|
225
|
+
.replace("(", "")
|
|
226
|
+
.replace(")", "")) # e.g., "time_ps"
|
|
227
|
+
out_cols = [c for c in (quantities or []) if c in df.columns]
|
|
228
|
+
out = df[["iter"] + out_cols].copy()
|
|
229
|
+
if xaxis != "iter":
|
|
230
|
+
out.insert(0, xcol, x_vals)
|
|
231
|
+
else:
|
|
232
|
+
out.rename(columns={"iter": xcol}, inplace=True)
|
|
233
|
+
return out
|
|
234
|
+
|
|
235
|
+
|
|
236
|
+
# ============================================================================================
|
|
237
|
+
# the molecule type whose individual molecular mass is the highest = main slab
|
|
238
|
+
# ============================================================================================
|
|
239
|
+
def largest_molecule_by_individual_mass(
|
|
240
|
+
handler: MolFraHandler,
|
|
241
|
+
) -> pd.DataFrame:
|
|
242
|
+
"""Identify the molecule type with the largest individual mass at each iteration.
|
|
243
|
+
|
|
244
|
+
This is typically the main slab or backbone molecule.
|
|
245
|
+
|
|
246
|
+
Works on
|
|
247
|
+
--------
|
|
248
|
+
MolFraHandler — ``molfra.out`` / ``molfra_ig.out``
|
|
249
|
+
|
|
250
|
+
Parameters
|
|
251
|
+
----------
|
|
252
|
+
handler : MolFraHandler
|
|
253
|
+
Parsed molecular fragment handler.
|
|
254
|
+
|
|
255
|
+
Returns
|
|
256
|
+
-------
|
|
257
|
+
pandas.DataFrame
|
|
258
|
+
Table with columns:
|
|
259
|
+
``iter``, ``molecular_formula``, ``molecular_mass``.
|
|
260
|
+
|
|
261
|
+
Examples
|
|
262
|
+
--------
|
|
263
|
+
>>> df = largest_molecule_by_individual_mass(h)
|
|
264
|
+
"""
|
|
265
|
+
df = handler.dataframe().copy()
|
|
266
|
+
if df.empty:
|
|
267
|
+
return pd.DataFrame(columns=["iter", "molecular_formula", "freq"])
|
|
268
|
+
|
|
269
|
+
# For each iter, select the molecule with the highest molecular mass
|
|
270
|
+
idx = df.groupby("iter")["molecular_mass"].idxmax()
|
|
271
|
+
df_max = df.loc[idx, ["iter", "molecular_formula", "molecular_mass"]].reset_index(drop=True)
|
|
272
|
+
|
|
273
|
+
return df_max.sort_values("iter").reset_index(drop=True)
|
|
274
|
+
|
|
275
|
+
|
|
276
|
+
def atoms_in_the_largest_molecule_wide_format(handler: MolFraHandler) -> pd.DataFrame:
|
|
277
|
+
"""Return per-element atom counts for the largest molecule at each iteration (wide format).
|
|
278
|
+
|
|
279
|
+
Works on
|
|
280
|
+
--------
|
|
281
|
+
MolFraHandler — ``molfra.out`` / ``molfra_ig.out``
|
|
282
|
+
|
|
283
|
+
Parameters
|
|
284
|
+
----------
|
|
285
|
+
handler : MolFraHandler
|
|
286
|
+
Parsed molecular fragment handler.
|
|
287
|
+
|
|
288
|
+
Returns
|
|
289
|
+
-------
|
|
290
|
+
pandas.DataFrame
|
|
291
|
+
Wide table with columns:
|
|
292
|
+
``iter`` and one column per element symbol (e.g. ``Al``, ``N``, ``O``),
|
|
293
|
+
containing atom counts.
|
|
294
|
+
|
|
295
|
+
Examples
|
|
296
|
+
--------
|
|
297
|
+
>>> df = atoms_in_the_largest_molecule_wide_format(h)
|
|
298
|
+
"""
|
|
299
|
+
# Get largest molecule per iter
|
|
300
|
+
df_largest = largest_molecule_by_individual_mass(handler)
|
|
301
|
+
if df_largest.empty:
|
|
302
|
+
return pd.DataFrame(columns=["iter"])
|
|
303
|
+
|
|
304
|
+
rows = []
|
|
305
|
+
all_elems = set()
|
|
306
|
+
|
|
307
|
+
for _, r in df_largest.iterrows():
|
|
308
|
+
it = int(r["iter"])
|
|
309
|
+
formula = str(r["molecular_formula"])
|
|
310
|
+
pairs = re.findall(r"([A-Z][a-z]*)(\d+)", formula)
|
|
311
|
+
|
|
312
|
+
# per-iter element->count
|
|
313
|
+
elem_counts: Dict[str, int] = {"iter": it}
|
|
314
|
+
for elem, cnt in pairs:
|
|
315
|
+
cnt_i = int(cnt)
|
|
316
|
+
elem_counts[elem] = elem_counts.get(elem, 0) + cnt_i
|
|
317
|
+
all_elems.add(elem)
|
|
318
|
+
|
|
319
|
+
rows.append(elem_counts)
|
|
320
|
+
|
|
321
|
+
# Build wide, ensure all elements present, fill missing with 0
|
|
322
|
+
wide = pd.DataFrame(rows).sort_values("iter").reset_index(drop=True)
|
|
323
|
+
for elem in sorted(all_elems):
|
|
324
|
+
if elem not in wide.columns:
|
|
325
|
+
wide[elem] = 0
|
|
326
|
+
|
|
327
|
+
# Order columns: iter first, then alphabetical elements
|
|
328
|
+
cols = ["iter"] + sorted([c for c in wide.columns if c != "iter"])
|
|
329
|
+
return wide[cols]
|
|
330
|
+
|
|
331
|
+
|
|
332
|
+
def atoms_in_the_largest_molecule_long_format(handler: MolFraHandler) -> pd.DataFrame:
|
|
333
|
+
"""Return per-element atom counts for the largest molecule at each iteration (long format).
|
|
334
|
+
|
|
335
|
+
Works on
|
|
336
|
+
--------
|
|
337
|
+
MolFraHandler — ``molfra.out`` / ``molfra_ig.out``
|
|
338
|
+
|
|
339
|
+
Parameters
|
|
340
|
+
----------
|
|
341
|
+
handler : MolFraHandler
|
|
342
|
+
Parsed molecular fragment handler.
|
|
343
|
+
|
|
344
|
+
Returns
|
|
345
|
+
-------
|
|
346
|
+
pandas.DataFrame
|
|
347
|
+
Long-form table with columns:
|
|
348
|
+
``iter``, ``element``, ``freq``.
|
|
349
|
+
|
|
350
|
+
Examples
|
|
351
|
+
--------
|
|
352
|
+
>>> df = atoms_in_the_largest_molecule_long_format(h)
|
|
353
|
+
"""
|
|
354
|
+
wide = atoms_in_the_largest_molecule_wide_format(handler)
|
|
355
|
+
if wide.empty:
|
|
356
|
+
return pd.DataFrame(columns=["iter", "element", "freq"])
|
|
357
|
+
return wide.melt(id_vars="iter", var_name="element", value_name="freq") \
|
|
358
|
+
.sort_values(["iter", "element"]).reset_index(drop=True)
|
|
359
|
+
|
|
@@ -0,0 +1,258 @@
|
|
|
1
|
+
"""
|
|
2
|
+
params (tunable-parameter list) analysis utilities.
|
|
3
|
+
|
|
4
|
+
This module provides helpers for working with ReaxFF ``params`` files via
|
|
5
|
+
``ParamsHandler``, and optionally interpreting each params entry as a pointer
|
|
6
|
+
into the corresponding ``ffield`` section via ``FFieldHandler``.
|
|
7
|
+
|
|
8
|
+
Typical use cases include:
|
|
9
|
+
|
|
10
|
+
- loading params tables with optional duplicate removal and sorting
|
|
11
|
+
- translating (ff_section, ff_section_line, ff_parameter) into an ffield parameter name/value
|
|
12
|
+
- attaching human-readable interaction labels (e.g., C-H, C-C-C) when available
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
|
|
18
|
+
import pandas as pd
|
|
19
|
+
from typing import Dict, List, Tuple
|
|
20
|
+
|
|
21
|
+
from reaxkit.io.handlers.params_handler import ParamsHandler
|
|
22
|
+
from reaxkit.io.handlers.ffield_handler import FFieldHandler
|
|
23
|
+
from reaxkit.analysis.per_file.ffield_analyzer import interpret_one_section
|
|
24
|
+
|
|
25
|
+
def get_params_data(
|
|
26
|
+
handler: ParamsHandler,
|
|
27
|
+
*,
|
|
28
|
+
sort_by: str | None = None,
|
|
29
|
+
ascending: bool = True,
|
|
30
|
+
drop_duplicate: bool = True
|
|
31
|
+
) -> pd.DataFrame:
|
|
32
|
+
"""
|
|
33
|
+
Retrieve params entries as a DataFrame with optional sorting and de-duplication.
|
|
34
|
+
|
|
35
|
+
Works on
|
|
36
|
+
--------
|
|
37
|
+
ParamsHandler — ``params`` / ``params.in``
|
|
38
|
+
|
|
39
|
+
Parameters
|
|
40
|
+
----------
|
|
41
|
+
handler : ParamsHandler
|
|
42
|
+
Parsed params handler.
|
|
43
|
+
sort_by : str, optional
|
|
44
|
+
Column name to sort by (e.g. ``ff_section``, ``min_value``, ``max_value``).
|
|
45
|
+
If None, rows are returned in file order.
|
|
46
|
+
ascending : bool, default=True
|
|
47
|
+
Sort order when ``sort_by`` is specified.
|
|
48
|
+
drop_duplicate : bool, default=True
|
|
49
|
+
If True, drop duplicate rows by ``(ff_section, ff_section_line, ff_parameter)``,
|
|
50
|
+
keeping the first occurrence.
|
|
51
|
+
|
|
52
|
+
Returns
|
|
53
|
+
-------
|
|
54
|
+
pandas.DataFrame
|
|
55
|
+
Params table with columns such as:
|
|
56
|
+
``ff_section``, ``ff_section_line``, ``ff_parameter``,
|
|
57
|
+
``search_interval``, ``min_value``, ``max_value``, ``inline_comment``.
|
|
58
|
+
|
|
59
|
+
Examples
|
|
60
|
+
--------
|
|
61
|
+
>>> from reaxkit.io.handlers.params_handler import ParamsHandler
|
|
62
|
+
>>> from reaxkit.analysis.per_file.params_analyzer import get_params_data
|
|
63
|
+
>>> h = ParamsHandler("params")
|
|
64
|
+
>>> df = get_params_data(h, drop_duplicate=True)
|
|
65
|
+
"""
|
|
66
|
+
df = handler.dataframe().copy()
|
|
67
|
+
|
|
68
|
+
if drop_duplicate:
|
|
69
|
+
df = df.drop_duplicates(
|
|
70
|
+
subset=["ff_section", "ff_section_line", "ff_parameter"],
|
|
71
|
+
keep="first"
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
if sort_by:
|
|
75
|
+
if sort_by not in df.columns:
|
|
76
|
+
raise ValueError(
|
|
77
|
+
f"'sort_by' must be one of {list(df.columns)}, got {sort_by!r}"
|
|
78
|
+
)
|
|
79
|
+
df = df.sort_values(by=sort_by, ascending=ascending)
|
|
80
|
+
|
|
81
|
+
return df
|
|
82
|
+
|
|
83
|
+
###############################################################################
|
|
84
|
+
# A “interpreter which translates a line like
|
|
85
|
+
# 3 49 1 1.0000 45.0 180.0
|
|
86
|
+
# bond data (because of ff_section = 3),
|
|
87
|
+
# line number 49 in that section,
|
|
88
|
+
# the first paramter in that line.
|
|
89
|
+
# These are all based on the ffield data.
|
|
90
|
+
###############################################################################
|
|
91
|
+
|
|
92
|
+
# ff_section number → canonical ffield section key + friendly name
|
|
93
|
+
_SECTION_NUM_MAP: Dict[int, Tuple[str, str]] = {
|
|
94
|
+
1: (FFieldHandler.SECTION_GENERAL, "general"),
|
|
95
|
+
2: (FFieldHandler.SECTION_ATOM, "atom"),
|
|
96
|
+
3: (FFieldHandler.SECTION_BOND, "bond"),
|
|
97
|
+
4: (FFieldHandler.SECTION_OFF_DIAGONAL, "off_diagonal"),
|
|
98
|
+
5: (FFieldHandler.SECTION_ANGLE, "angle"),
|
|
99
|
+
6: (FFieldHandler.SECTION_TORSION, "torsion"),
|
|
100
|
+
7: (FFieldHandler.SECTION_HBOND, "hbond"),
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
# Which columns in each section are "index/identity" (NOT tunable parameters)
|
|
105
|
+
# Everything else (in original df column order) is treated as "parameter columns".
|
|
106
|
+
_SECTION_INDEX_COLS: Dict[str, List[str]] = {
|
|
107
|
+
FFieldHandler.SECTION_GENERAL: [],
|
|
108
|
+
FFieldHandler.SECTION_ATOM: ["symbol"], # adjust if your atom df has other identity cols
|
|
109
|
+
FFieldHandler.SECTION_BOND: ["i", "j"],
|
|
110
|
+
FFieldHandler.SECTION_OFF_DIAGONAL: ["i", "j"],
|
|
111
|
+
FFieldHandler.SECTION_ANGLE: ["i", "j", "k"],
|
|
112
|
+
FFieldHandler.SECTION_TORSION: ["i", "j", "k", "l"],
|
|
113
|
+
FFieldHandler.SECTION_HBOND: ["i", "j", "k"],
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def _param_columns_for_section(sec_df: pd.DataFrame, section_key: str) -> List[str]:
|
|
118
|
+
"""
|
|
119
|
+
Return the ordered list of parameter columns for a given ffield section df.
|
|
120
|
+
We treat all non-index columns (based on _SECTION_INDEX_COLS) as tunable parameters.
|
|
121
|
+
"""
|
|
122
|
+
idx_cols = set(_SECTION_INDEX_COLS.get(section_key, []))
|
|
123
|
+
return [c for c in sec_df.columns if c not in idx_cols]
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def interpret_params(
|
|
127
|
+
params_handler: ParamsHandler,
|
|
128
|
+
ffield_handler: FFieldHandler,
|
|
129
|
+
*,
|
|
130
|
+
add_term: bool = True,
|
|
131
|
+
sep: str = "-",
|
|
132
|
+
) -> pd.DataFrame:
|
|
133
|
+
"""
|
|
134
|
+
Interpret each params row as a pointer into the corresponding ffield section.
|
|
135
|
+
|
|
136
|
+
Each params entry points to an ffield value using:
|
|
137
|
+
|
|
138
|
+
- ``ff_section``: section number (1..7 → general, atom, bond, off-diagonal, angle, torsion, hbond)
|
|
139
|
+
- ``ff_section_line``: 1-based row number within that ffield section
|
|
140
|
+
- ``ff_parameter``: 1-based index of the tunable parameter within that row
|
|
141
|
+
|
|
142
|
+
Works on
|
|
143
|
+
--------
|
|
144
|
+
ParamsHandler + FFieldHandler — ``params`` + ``ffield``
|
|
145
|
+
|
|
146
|
+
Parameters
|
|
147
|
+
----------
|
|
148
|
+
params_handler : ParamsHandler
|
|
149
|
+
Parsed params handler.
|
|
150
|
+
ffield_handler : FFieldHandler
|
|
151
|
+
Parsed ffield handler.
|
|
152
|
+
add_term : bool, default=True
|
|
153
|
+
If True, include a human-readable interaction label (``term``) for
|
|
154
|
+
multi-body sections when available (e.g. ``C-H``, ``C-C-C``).
|
|
155
|
+
sep : str, default="-"
|
|
156
|
+
Separator used for building ``term`` labels.
|
|
157
|
+
|
|
158
|
+
Returns
|
|
159
|
+
-------
|
|
160
|
+
pandas.DataFrame
|
|
161
|
+
Interpreted params table including the original params fields plus:
|
|
162
|
+
- ``ffield_section_key`` and ``ffield_section_name``
|
|
163
|
+
- ``ffield_row_index`` (0-based row index)
|
|
164
|
+
- ``ffield_param_name`` (parameter column name in ffield)
|
|
165
|
+
- ``ffield_value`` (current value from ffield)
|
|
166
|
+
- ``term`` (optional interaction label)
|
|
167
|
+
|
|
168
|
+
Examples
|
|
169
|
+
--------
|
|
170
|
+
>>> from reaxkit.io.handlers.params_handler import ParamsHandler
|
|
171
|
+
>>> from reaxkit.io.handlers.ffield_handler import FFieldHandler
|
|
172
|
+
>>> from reaxkit.analysis.per_file.params_analyzer import interpret_params
|
|
173
|
+
>>> p = ParamsHandler("params")
|
|
174
|
+
>>> f = FFieldHandler("ffield")
|
|
175
|
+
>>> df = interpret_params(p, f, add_term=True)
|
|
176
|
+
"""
|
|
177
|
+
p = params_handler.dataframe().copy()
|
|
178
|
+
|
|
179
|
+
out_rows: List[Dict[str, object]] = []
|
|
180
|
+
|
|
181
|
+
# Cache section dfs (optionally interpreted w/ symbols)
|
|
182
|
+
sec_cache: Dict[str, pd.DataFrame] = {}
|
|
183
|
+
|
|
184
|
+
for r in p.itertuples(index=False):
|
|
185
|
+
sec_num = int(getattr(r, "ff_section"))
|
|
186
|
+
line_1b = int(getattr(r, "ff_section_line"))
|
|
187
|
+
par_1b = int(getattr(r, "ff_parameter"))
|
|
188
|
+
|
|
189
|
+
if sec_num not in _SECTION_NUM_MAP:
|
|
190
|
+
raise ValueError(f"Unknown ff_section={sec_num}. Expected 1..7.")
|
|
191
|
+
|
|
192
|
+
section_key, section_name = _SECTION_NUM_MAP[sec_num]
|
|
193
|
+
|
|
194
|
+
# Load section df (and optionally add term)
|
|
195
|
+
if section_key not in sec_cache:
|
|
196
|
+
base_df = ffield_handler.section_df(section_key).copy()
|
|
197
|
+
if add_term and section_key in {
|
|
198
|
+
FFieldHandler.SECTION_BOND,
|
|
199
|
+
FFieldHandler.SECTION_OFF_DIAGONAL,
|
|
200
|
+
FFieldHandler.SECTION_ANGLE,
|
|
201
|
+
FFieldHandler.SECTION_TORSION,
|
|
202
|
+
FFieldHandler.SECTION_HBOND,
|
|
203
|
+
}:
|
|
204
|
+
# adds i_symbol/j_symbol/... and 'term'
|
|
205
|
+
base_df = interpret_one_section(ffield_handler, section=section_name, sep=sep)
|
|
206
|
+
sec_cache[section_key] = base_df
|
|
207
|
+
|
|
208
|
+
sec_df = sec_cache[section_key]
|
|
209
|
+
|
|
210
|
+
row_idx = line_1b - 1
|
|
211
|
+
if row_idx < 0 or row_idx >= len(sec_df):
|
|
212
|
+
raise IndexError(
|
|
213
|
+
f"params points to {section_name} line {line_1b}, "
|
|
214
|
+
f"but section has {len(sec_df)} rows."
|
|
215
|
+
)
|
|
216
|
+
|
|
217
|
+
param_cols = _param_columns_for_section(sec_df, section_key)
|
|
218
|
+
|
|
219
|
+
# Safety: allow params that point to "term" or "*_symbol" columns only if user wants that
|
|
220
|
+
# By default, those are NOT included because they're not in _SECTION_INDEX_COLS,
|
|
221
|
+
# so we exclude them explicitly here.
|
|
222
|
+
param_cols = [c for c in param_cols if not (c.endswith("_symbol") or c == "term")]
|
|
223
|
+
|
|
224
|
+
par_idx = par_1b - 1
|
|
225
|
+
if par_idx < 0 or par_idx >= len(param_cols):
|
|
226
|
+
raise IndexError(
|
|
227
|
+
f"params points to {section_name} parameter {par_1b}, "
|
|
228
|
+
f"but only {len(param_cols)} parameter columns exist: {param_cols}"
|
|
229
|
+
)
|
|
230
|
+
|
|
231
|
+
param_name = param_cols[par_idx]
|
|
232
|
+
row = sec_df.iloc[row_idx]
|
|
233
|
+
val = row[param_name]
|
|
234
|
+
|
|
235
|
+
term = row.get("term") if add_term else None
|
|
236
|
+
|
|
237
|
+
out_rows.append(
|
|
238
|
+
{
|
|
239
|
+
# original params fields
|
|
240
|
+
"ff_section": sec_num,
|
|
241
|
+
"ff_section_line": line_1b,
|
|
242
|
+
"ff_parameter": par_1b,
|
|
243
|
+
"search_interval": getattr(r, "search_interval"),
|
|
244
|
+
"min_value": getattr(r, "min_value"),
|
|
245
|
+
"max_value": getattr(r, "max_value"),
|
|
246
|
+
"inline_comment": getattr(r, "inline_comment"),
|
|
247
|
+
|
|
248
|
+
# interpreted fields
|
|
249
|
+
"ffield_section_key": section_key,
|
|
250
|
+
"ffield_section_name": section_name,
|
|
251
|
+
"ffield_row_index": row_idx, # 0-based
|
|
252
|
+
"ffield_param_name": param_name,
|
|
253
|
+
"ffield_value": val,
|
|
254
|
+
"term": term,
|
|
255
|
+
}
|
|
256
|
+
)
|
|
257
|
+
|
|
258
|
+
return pd.DataFrame(out_rows)
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
"""
|
|
2
|
+
summary.txt analysis utilities.
|
|
3
|
+
|
|
4
|
+
This module provides helper functions for accessing scalar, per-frame
|
|
5
|
+
summary quantities written by ReaxFF into ``summary.txt`` files via
|
|
6
|
+
``SummaryHandler``.
|
|
7
|
+
|
|
8
|
+
Typical use cases include:
|
|
9
|
+
|
|
10
|
+
- extracting a single summary quantity (e.g. potential energy) as a time series
|
|
11
|
+
- selecting subsets of frames for post-processing or plotting
|
|
12
|
+
- working with canonical column names and legacy aliases transparently
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
from typing import Optional, Sequence, Union
|
|
18
|
+
import pandas as pd
|
|
19
|
+
|
|
20
|
+
from reaxkit.io.handlers.summary_handler import SummaryHandler
|
|
21
|
+
from reaxkit.utils.alias import _resolve_alias, available_keys, normalize_choice
|
|
22
|
+
|
|
23
|
+
__all__ = ["get_summary_data"]
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def get_summary_data(
|
|
27
|
+
handler: SummaryHandler,
|
|
28
|
+
feature: str,
|
|
29
|
+
frames: Optional[Union[slice, Sequence[int]]] = None,
|
|
30
|
+
) -> pd.Series:
|
|
31
|
+
"""Extract a single summary quantity from ``summary.txt`` as a pandas Series.
|
|
32
|
+
|
|
33
|
+
Canonical column names (e.g. ``E_pot``) and legacy aliases
|
|
34
|
+
(e.g. ``Epot(kcal/mol)``) are both supported.
|
|
35
|
+
|
|
36
|
+
Works on
|
|
37
|
+
--------
|
|
38
|
+
SummaryHandler — ``summary.txt``
|
|
39
|
+
|
|
40
|
+
Parameters
|
|
41
|
+
----------
|
|
42
|
+
handler : SummaryHandler
|
|
43
|
+
Parsed ``summary.txt`` handler.
|
|
44
|
+
feature : str
|
|
45
|
+
Name or alias of the summary quantity to extract.
|
|
46
|
+
frames : slice or sequence of int, optional
|
|
47
|
+
Frame indices to include. If None, all frames are returned.
|
|
48
|
+
|
|
49
|
+
Returns
|
|
50
|
+
-------
|
|
51
|
+
pandas.Series
|
|
52
|
+
Series containing the requested summary quantity, indexed by frame.
|
|
53
|
+
|
|
54
|
+
Examples
|
|
55
|
+
--------
|
|
56
|
+
>>> from reaxkit.io.handlers.summary_handler import SummaryHandler
|
|
57
|
+
>>> from reaxkit.analysis.per_file.summary_analyzer import get_summary_data
|
|
58
|
+
>>> h = SummaryHandler("summary.txt")
|
|
59
|
+
>>> epot = get_summary_data(h, "E_pot")
|
|
60
|
+
>>> epot_head = get_summary_data(h, "Epot(kcal/mol)", frames=slice(0, 10))
|
|
61
|
+
"""
|
|
62
|
+
# Map legacy -> canonical (e.g., "Epot(kcal/mol)" -> "E_pot")
|
|
63
|
+
canonical = normalize_choice(feature)
|
|
64
|
+
|
|
65
|
+
# Resolve against dataframe columns
|
|
66
|
+
try:
|
|
67
|
+
col = _resolve_alias(handler, canonical)
|
|
68
|
+
except KeyError:
|
|
69
|
+
# Fallback: case-insensitive direct match
|
|
70
|
+
cols_lower = {c.lower(): c for c in handler.dataframe().columns}
|
|
71
|
+
direct = cols_lower.get(feature.strip().lower())
|
|
72
|
+
if direct is None:
|
|
73
|
+
raise KeyError(
|
|
74
|
+
f"Column '{feature}' not found. "
|
|
75
|
+
f"Try one of: {available_keys(handler.dataframe().columns)}"
|
|
76
|
+
)
|
|
77
|
+
col = direct
|
|
78
|
+
|
|
79
|
+
s = handler.dataframe()[col]
|
|
80
|
+
if frames is None:
|
|
81
|
+
return s.copy()
|
|
82
|
+
if isinstance(frames, slice):
|
|
83
|
+
return s.iloc[frames].copy()
|
|
84
|
+
return s.iloc[list(frames)].copy()
|