reaxkit 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- reaxkit/__init__.py +0 -0
- reaxkit/analysis/__init__.py +0 -0
- reaxkit/analysis/composed/RDF_analyzer.py +560 -0
- reaxkit/analysis/composed/__init__.py +0 -0
- reaxkit/analysis/composed/connectivity_analyzer.py +706 -0
- reaxkit/analysis/composed/coordination_analyzer.py +144 -0
- reaxkit/analysis/composed/electrostatics_analyzer.py +687 -0
- reaxkit/analysis/per_file/__init__.py +0 -0
- reaxkit/analysis/per_file/control_analyzer.py +165 -0
- reaxkit/analysis/per_file/eregime_analyzer.py +108 -0
- reaxkit/analysis/per_file/ffield_analyzer.py +305 -0
- reaxkit/analysis/per_file/fort13_analyzer.py +79 -0
- reaxkit/analysis/per_file/fort57_analyzer.py +106 -0
- reaxkit/analysis/per_file/fort73_analyzer.py +61 -0
- reaxkit/analysis/per_file/fort74_analyzer.py +65 -0
- reaxkit/analysis/per_file/fort76_analyzer.py +191 -0
- reaxkit/analysis/per_file/fort78_analyzer.py +154 -0
- reaxkit/analysis/per_file/fort79_analyzer.py +83 -0
- reaxkit/analysis/per_file/fort7_analyzer.py +393 -0
- reaxkit/analysis/per_file/fort99_analyzer.py +411 -0
- reaxkit/analysis/per_file/molfra_analyzer.py +359 -0
- reaxkit/analysis/per_file/params_analyzer.py +258 -0
- reaxkit/analysis/per_file/summary_analyzer.py +84 -0
- reaxkit/analysis/per_file/trainset_analyzer.py +84 -0
- reaxkit/analysis/per_file/vels_analyzer.py +95 -0
- reaxkit/analysis/per_file/xmolout_analyzer.py +528 -0
- reaxkit/cli.py +181 -0
- reaxkit/count_loc.py +276 -0
- reaxkit/data/alias.yaml +89 -0
- reaxkit/data/constants.yaml +27 -0
- reaxkit/data/reaxff_input_files_contents.yaml +186 -0
- reaxkit/data/reaxff_output_files_contents.yaml +301 -0
- reaxkit/data/units.yaml +38 -0
- reaxkit/help/__init__.py +0 -0
- reaxkit/help/help_index_loader.py +531 -0
- reaxkit/help/introspection_utils.py +131 -0
- reaxkit/io/__init__.py +0 -0
- reaxkit/io/base_handler.py +165 -0
- reaxkit/io/generators/__init__.py +0 -0
- reaxkit/io/generators/control_generator.py +123 -0
- reaxkit/io/generators/eregime_generator.py +341 -0
- reaxkit/io/generators/geo_generator.py +967 -0
- reaxkit/io/generators/trainset_generator.py +1758 -0
- reaxkit/io/generators/tregime_generator.py +113 -0
- reaxkit/io/generators/vregime_generator.py +164 -0
- reaxkit/io/generators/xmolout_generator.py +304 -0
- reaxkit/io/handlers/__init__.py +0 -0
- reaxkit/io/handlers/control_handler.py +209 -0
- reaxkit/io/handlers/eregime_handler.py +122 -0
- reaxkit/io/handlers/ffield_handler.py +812 -0
- reaxkit/io/handlers/fort13_handler.py +123 -0
- reaxkit/io/handlers/fort57_handler.py +143 -0
- reaxkit/io/handlers/fort73_handler.py +145 -0
- reaxkit/io/handlers/fort74_handler.py +155 -0
- reaxkit/io/handlers/fort76_handler.py +195 -0
- reaxkit/io/handlers/fort78_handler.py +142 -0
- reaxkit/io/handlers/fort79_handler.py +227 -0
- reaxkit/io/handlers/fort7_handler.py +264 -0
- reaxkit/io/handlers/fort99_handler.py +128 -0
- reaxkit/io/handlers/geo_handler.py +224 -0
- reaxkit/io/handlers/molfra_handler.py +184 -0
- reaxkit/io/handlers/params_handler.py +137 -0
- reaxkit/io/handlers/summary_handler.py +135 -0
- reaxkit/io/handlers/trainset_handler.py +658 -0
- reaxkit/io/handlers/vels_handler.py +293 -0
- reaxkit/io/handlers/xmolout_handler.py +174 -0
- reaxkit/utils/__init__.py +0 -0
- reaxkit/utils/alias.py +219 -0
- reaxkit/utils/cache.py +77 -0
- reaxkit/utils/constants.py +75 -0
- reaxkit/utils/equation_of_states.py +96 -0
- reaxkit/utils/exceptions.py +27 -0
- reaxkit/utils/frame_utils.py +175 -0
- reaxkit/utils/log.py +43 -0
- reaxkit/utils/media/__init__.py +0 -0
- reaxkit/utils/media/convert.py +90 -0
- reaxkit/utils/media/make_video.py +91 -0
- reaxkit/utils/media/plotter.py +812 -0
- reaxkit/utils/numerical/__init__.py +0 -0
- reaxkit/utils/numerical/extrema_finder.py +96 -0
- reaxkit/utils/numerical/moving_average.py +103 -0
- reaxkit/utils/numerical/numerical_calcs.py +75 -0
- reaxkit/utils/numerical/signal_ops.py +135 -0
- reaxkit/utils/path.py +55 -0
- reaxkit/utils/units.py +104 -0
- reaxkit/webui/__init__.py +0 -0
- reaxkit/webui/app.py +0 -0
- reaxkit/webui/components.py +0 -0
- reaxkit/webui/layouts.py +0 -0
- reaxkit/webui/utils.py +0 -0
- reaxkit/workflows/__init__.py +0 -0
- reaxkit/workflows/composed/__init__.py +0 -0
- reaxkit/workflows/composed/coordination_workflow.py +393 -0
- reaxkit/workflows/composed/electrostatics_workflow.py +587 -0
- reaxkit/workflows/composed/xmolout_fort7_workflow.py +343 -0
- reaxkit/workflows/meta/__init__.py +0 -0
- reaxkit/workflows/meta/help_workflow.py +136 -0
- reaxkit/workflows/meta/introspection_workflow.py +235 -0
- reaxkit/workflows/meta/make_video_workflow.py +61 -0
- reaxkit/workflows/meta/plotter_workflow.py +601 -0
- reaxkit/workflows/per_file/__init__.py +0 -0
- reaxkit/workflows/per_file/control_workflow.py +110 -0
- reaxkit/workflows/per_file/eregime_workflow.py +267 -0
- reaxkit/workflows/per_file/ffield_workflow.py +390 -0
- reaxkit/workflows/per_file/fort13_workflow.py +86 -0
- reaxkit/workflows/per_file/fort57_workflow.py +137 -0
- reaxkit/workflows/per_file/fort73_workflow.py +151 -0
- reaxkit/workflows/per_file/fort74_workflow.py +88 -0
- reaxkit/workflows/per_file/fort76_workflow.py +188 -0
- reaxkit/workflows/per_file/fort78_workflow.py +135 -0
- reaxkit/workflows/per_file/fort79_workflow.py +314 -0
- reaxkit/workflows/per_file/fort7_workflow.py +592 -0
- reaxkit/workflows/per_file/fort83_workflow.py +60 -0
- reaxkit/workflows/per_file/fort99_workflow.py +223 -0
- reaxkit/workflows/per_file/geo_workflow.py +554 -0
- reaxkit/workflows/per_file/molfra_workflow.py +577 -0
- reaxkit/workflows/per_file/params_workflow.py +135 -0
- reaxkit/workflows/per_file/summary_workflow.py +161 -0
- reaxkit/workflows/per_file/trainset_workflow.py +356 -0
- reaxkit/workflows/per_file/tregime_workflow.py +79 -0
- reaxkit/workflows/per_file/vels_workflow.py +309 -0
- reaxkit/workflows/per_file/vregime_workflow.py +75 -0
- reaxkit/workflows/per_file/xmolout_workflow.py +678 -0
- reaxkit-1.0.0.dist-info/METADATA +128 -0
- reaxkit-1.0.0.dist-info/RECORD +130 -0
- reaxkit-1.0.0.dist-info/WHEEL +5 -0
- reaxkit-1.0.0.dist-info/entry_points.txt +2 -0
- reaxkit-1.0.0.dist-info/licenses/AUTHORS.md +20 -0
- reaxkit-1.0.0.dist-info/licenses/LICENSE +21 -0
- reaxkit-1.0.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Params-file workflow for ReaxKit.
|
|
3
|
+
|
|
4
|
+
This workflow provides tools for inspecting and exporting ReaxFF `params` files,
|
|
5
|
+
which define optimization parameters and search intervals used during
|
|
6
|
+
force-field training.
|
|
7
|
+
|
|
8
|
+
It supports:
|
|
9
|
+
- Loading the raw params table with optional duplicate removal and flexible
|
|
10
|
+
column-based sorting.
|
|
11
|
+
- Interpreting params entries by resolving their references into the
|
|
12
|
+
corresponding sections and rows of the `ffield` file.
|
|
13
|
+
- Optionally constructing human-readable chemical terms (e.g. C–C–H) during
|
|
14
|
+
interpretation for improved readability.
|
|
15
|
+
- Exporting processed params data to CSV for downstream analysis, auditing,
|
|
16
|
+
or force-field development workflows.
|
|
17
|
+
|
|
18
|
+
The workflow is designed to bridge low-level optimization parameter definitions
|
|
19
|
+
with interpretable force-field context in a reproducible, CLI-driven manner.
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
from __future__ import annotations
|
|
23
|
+
|
|
24
|
+
import argparse
|
|
25
|
+
|
|
26
|
+
from reaxkit.io.handlers.params_handler import ParamsHandler
|
|
27
|
+
from reaxkit.io.handlers.ffield_handler import FFieldHandler
|
|
28
|
+
|
|
29
|
+
from reaxkit.analysis.per_file.params_analyzer import get_params_data, interpret_params
|
|
30
|
+
from reaxkit.utils.path import resolve_output_path
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def _task_get(args: argparse.Namespace) -> int:
|
|
34
|
+
params_handler = ParamsHandler(args.file)
|
|
35
|
+
|
|
36
|
+
if args.interpret:
|
|
37
|
+
# Interpreted params require ffield
|
|
38
|
+
ffield_handler = FFieldHandler(args.ffield)
|
|
39
|
+
df = interpret_params(
|
|
40
|
+
params_handler=params_handler,
|
|
41
|
+
ffield_handler=ffield_handler,
|
|
42
|
+
add_term=(not args.no_term),
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
# Match default "drop duplicates" behavior to raw get()
|
|
46
|
+
if not args.keep_duplicates:
|
|
47
|
+
df = df.drop_duplicates(
|
|
48
|
+
subset=["ff_section", "ff_section_line", "ff_parameter"],
|
|
49
|
+
keep="first",
|
|
50
|
+
)
|
|
51
|
+
else:
|
|
52
|
+
df = get_params_data(
|
|
53
|
+
params_handler,
|
|
54
|
+
sort_by=None, # no sorting by default (handled below if user requests)
|
|
55
|
+
ascending=True,
|
|
56
|
+
drop_duplicate=(not args.keep_duplicates),
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
# Optional sorting (default is none)
|
|
60
|
+
if args.sort_by:
|
|
61
|
+
if args.sort_by not in df.columns:
|
|
62
|
+
raise SystemExit(
|
|
63
|
+
f"❌ sort-by column '{args.sort_by}' not found. Available: {', '.join(df.columns)}"
|
|
64
|
+
)
|
|
65
|
+
df = df.sort_values(by=args.sort_by, ascending=(not args.descending))
|
|
66
|
+
|
|
67
|
+
# Export or preview
|
|
68
|
+
if args.export:
|
|
69
|
+
out = resolve_output_path(args.export, workflow="params")
|
|
70
|
+
df.to_csv(out, index=False)
|
|
71
|
+
print(f"[Done] Exported the requested data to {out}")
|
|
72
|
+
else:
|
|
73
|
+
print(df.head(20).to_string(index=False))
|
|
74
|
+
|
|
75
|
+
return 0
|
|
76
|
+
|
|
77
|
+
#####################################################################################
|
|
78
|
+
|
|
79
|
+
def _add_common_params_io_args(p: argparse.ArgumentParser) -> None:
|
|
80
|
+
# Core IO
|
|
81
|
+
p.add_argument("--file", default="params", help="Path to params file.")
|
|
82
|
+
p.add_argument("--export", default=None, help="Path to export CSV data.")
|
|
83
|
+
|
|
84
|
+
# Default behavior requested:
|
|
85
|
+
# - remove duplicates by default
|
|
86
|
+
# - no sorting by default
|
|
87
|
+
p.add_argument(
|
|
88
|
+
"--keep-duplicates",
|
|
89
|
+
action="store_true",
|
|
90
|
+
help="If set, do NOT drop duplicates (default drops duplicates).",
|
|
91
|
+
)
|
|
92
|
+
p.add_argument(
|
|
93
|
+
"--sort-by",
|
|
94
|
+
default=None,
|
|
95
|
+
help="Optional column name to sort by (default: no sorting).",
|
|
96
|
+
)
|
|
97
|
+
p.add_argument(
|
|
98
|
+
"--descending",
|
|
99
|
+
action="store_true",
|
|
100
|
+
help="If set, sort in descending order (only if --sort-by is used).",
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
def register_tasks(subparsers: argparse._SubParsersAction) -> None:
|
|
104
|
+
p = subparsers.add_parser(
|
|
105
|
+
"get",
|
|
106
|
+
help="Load params table (optionally interpret pointers into ffield)",
|
|
107
|
+
description=(
|
|
108
|
+
"Examples:\n"
|
|
109
|
+
" reaxkit params get --export params.csv\n"
|
|
110
|
+
"\n"
|
|
111
|
+
"Interpreted params:\n"
|
|
112
|
+
" reaxkit params get --interpret --export params_interpreted.csv\n"
|
|
113
|
+
),
|
|
114
|
+
formatter_class=argparse.RawTextHelpFormatter,
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
_add_common_params_io_args(p)
|
|
118
|
+
|
|
119
|
+
p.add_argument(
|
|
120
|
+
"--interpret",
|
|
121
|
+
action="store_true",
|
|
122
|
+
help="If set, interpret params pointers into the ffield (adds section/row/param/value/term columns).",
|
|
123
|
+
)
|
|
124
|
+
p.add_argument(
|
|
125
|
+
"--ffield",
|
|
126
|
+
default="ffield",
|
|
127
|
+
help="Path to ffield file (required when --interpret is set).",
|
|
128
|
+
)
|
|
129
|
+
p.add_argument(
|
|
130
|
+
"--no-term",
|
|
131
|
+
action="store_true",
|
|
132
|
+
help="If set, do not build readable term (e.g., C-C-H) during interpretation.",
|
|
133
|
+
)
|
|
134
|
+
|
|
135
|
+
p.set_defaults(_run=_task_get)
|
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
"""
|
|
2
|
+
summary.txt analysis workflow for ReaxKit.
|
|
3
|
+
|
|
4
|
+
This workflow provides tools for reading, analyzing, and visualizing data from
|
|
5
|
+
ReaxFF `summary.txt` files, which contain per-iteration thermodynamic and
|
|
6
|
+
simulation summary quantities.
|
|
7
|
+
|
|
8
|
+
It supports:
|
|
9
|
+
- Extracting a selected summary column (with alias support) as a function of
|
|
10
|
+
iteration, frame index, or physical time.
|
|
11
|
+
- Converting the x-axis between iteration, frame, and time using control-file
|
|
12
|
+
metadata.
|
|
13
|
+
- Selecting subsets of frames for focused analysis.
|
|
14
|
+
- Plotting summary quantities, saving figures, or exporting the processed data
|
|
15
|
+
to CSV using standardized output paths.
|
|
16
|
+
|
|
17
|
+
The workflow is designed for quick inspection and post-processing of ReaxFF
|
|
18
|
+
summary outputs, enabling reproducible analysis of thermodynamic and
|
|
19
|
+
simulation-wide properties.
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
from __future__ import annotations
|
|
24
|
+
import argparse
|
|
25
|
+
from typing import Optional, Sequence, Union
|
|
26
|
+
import pandas as pd
|
|
27
|
+
from reaxkit.utils.units import unit_for
|
|
28
|
+
from reaxkit.utils.media.plotter import single_plot
|
|
29
|
+
from reaxkit.utils.media.convert import convert_xaxis
|
|
30
|
+
from reaxkit.utils.frame_utils import parse_frames, select_frames
|
|
31
|
+
from reaxkit.utils.path import resolve_output_path
|
|
32
|
+
from reaxkit.io.handlers.summary_handler import SummaryHandler
|
|
33
|
+
from reaxkit.utils.alias import available_keys
|
|
34
|
+
from reaxkit.analysis.per_file.summary_analyzer import get_summary_data
|
|
35
|
+
|
|
36
|
+
FramesT = Optional[Union[slice, Sequence[int]]]
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def _summary_get_task(args: argparse.Namespace) -> int:
|
|
40
|
+
handler = SummaryHandler(args.file)
|
|
41
|
+
df = handler.dataframe().copy()
|
|
42
|
+
|
|
43
|
+
# --- X axis: convert from 'iter' using convert_xaxis ---
|
|
44
|
+
if "iter" not in df.columns:
|
|
45
|
+
raise KeyError("Expected 'iter' column in parsed summary data.")
|
|
46
|
+
xvals, xlabel = convert_xaxis(df["iter"].to_numpy(), args.xaxis)
|
|
47
|
+
|
|
48
|
+
# --- Y axis: use analyzer-level helper for alias resolution ---
|
|
49
|
+
try:
|
|
50
|
+
y_series = get_summary_data(handler, args.yaxis) # handles aliases + fallbacks
|
|
51
|
+
except KeyError as e:
|
|
52
|
+
# (optional) just re-raise, message already includes available keys
|
|
53
|
+
# from summary_analyzer.get_summary
|
|
54
|
+
raise e
|
|
55
|
+
|
|
56
|
+
# Name of the resolved column (canonical or actual df column)
|
|
57
|
+
ycol = y_series.name or args.yaxis
|
|
58
|
+
|
|
59
|
+
# Build working DataFrame with aligned index
|
|
60
|
+
work = pd.DataFrame(
|
|
61
|
+
{
|
|
62
|
+
"x": pd.Series(xvals, index=df.index),
|
|
63
|
+
"y": y_series,
|
|
64
|
+
}
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
# --- Frame selection ---
|
|
68
|
+
frames = parse_frames(args.frames)
|
|
69
|
+
work = select_frames(work, frames)
|
|
70
|
+
|
|
71
|
+
workflow_name = args.kind
|
|
72
|
+
|
|
73
|
+
# --- Export CSV ---
|
|
74
|
+
if args.export:
|
|
75
|
+
out = resolve_output_path(args.export, workflow_name)
|
|
76
|
+
work.rename(columns={"x": xlabel, "y": ycol}).to_csv(out, index=False)
|
|
77
|
+
print(f'[Done] successfully saved the data in {out}')
|
|
78
|
+
|
|
79
|
+
# --- Save figure (no show) ---
|
|
80
|
+
u = unit_for(args.yaxis) or unit_for(ycol)
|
|
81
|
+
if args.save:
|
|
82
|
+
out = resolve_output_path(args.save, workflow_name)
|
|
83
|
+
single_plot(
|
|
84
|
+
work["x"],
|
|
85
|
+
work["y"],
|
|
86
|
+
title=f"{ycol} vs {xlabel}",
|
|
87
|
+
xlabel=xlabel,
|
|
88
|
+
ylabel=f"{ycol} ({u})" if u else ycol,
|
|
89
|
+
save=out,
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
# --- Plot interactively ---
|
|
93
|
+
if args.plot:
|
|
94
|
+
single_plot(
|
|
95
|
+
work["x"],
|
|
96
|
+
work["y"],
|
|
97
|
+
title=f"{ycol} vs {xlabel}",
|
|
98
|
+
xlabel=xlabel,
|
|
99
|
+
ylabel=f"{ycol} ({u})" if u else ycol,
|
|
100
|
+
save=None,
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
# --- No action fallback ---
|
|
104
|
+
if not args.plot and not args.save and not args.export:
|
|
105
|
+
print("ℹ️ No action selected. Use one or more of --plot, --save, --export.")
|
|
106
|
+
print("Available keys:", ", ".join(available_keys(df.columns)))
|
|
107
|
+
|
|
108
|
+
return 0
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def _wire_get_flags(p: argparse.ArgumentParser) -> None:
|
|
112
|
+
p.add_argument("--file", default="summary.txt", help="Path to summary file")
|
|
113
|
+
p.add_argument(
|
|
114
|
+
"--xaxis",
|
|
115
|
+
default="time",
|
|
116
|
+
choices=["time", "iter", "frame"],
|
|
117
|
+
help="X-axis domain (default: time)",
|
|
118
|
+
)
|
|
119
|
+
p.add_argument(
|
|
120
|
+
"--yaxis",
|
|
121
|
+
required=True,
|
|
122
|
+
help="Y-axis feature/column (aliases allowed, e.g., 'E_potential' → 'E_pot')",
|
|
123
|
+
)
|
|
124
|
+
p.add_argument(
|
|
125
|
+
"--frames",
|
|
126
|
+
default=None,
|
|
127
|
+
help="Frames to select: 'start:stop[:step]' or 'i,j,k' (default: all)",
|
|
128
|
+
)
|
|
129
|
+
p.add_argument("--plot", action="store_true", help="Show the plot interactively.")
|
|
130
|
+
p.add_argument(
|
|
131
|
+
"--save",
|
|
132
|
+
default=None,
|
|
133
|
+
help="Save the plot to a file (without showing). Provide a path.",
|
|
134
|
+
)
|
|
135
|
+
p.add_argument(
|
|
136
|
+
"--export",
|
|
137
|
+
default=None,
|
|
138
|
+
help="Export the data to CSV. Provide a path.",
|
|
139
|
+
)
|
|
140
|
+
p.set_defaults(_run=_summary_get_task)
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def register_tasks(subparsers: argparse._SubParsersAction) -> None:
|
|
144
|
+
"""
|
|
145
|
+
Register 'summary' tasks. get can be used for example to plot potential energy vs time (auto-scaled fs/ps/ns).
|
|
146
|
+
"""
|
|
147
|
+
p = subparsers.add_parser(
|
|
148
|
+
"get",
|
|
149
|
+
help="Extract a column and optionally plot/save/export it.",
|
|
150
|
+
description=(
|
|
151
|
+
"Examples:\n"
|
|
152
|
+
" reaxkit summary get --yaxis E_pot --xaxis time --plot\n"
|
|
153
|
+
" reaxkit summary get --file summary.txt --yaxis T --xaxis iter "
|
|
154
|
+
"--frames 0:400:5 --save summary_T_vs_iter.png --export summary_T_vs_iter.csv"
|
|
155
|
+
),
|
|
156
|
+
formatter_class=argparse.RawTextHelpFormatter,
|
|
157
|
+
)
|
|
158
|
+
_wire_get_flags(p)
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
|
|
@@ -0,0 +1,356 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Trainset workflow for ReaxKit.
|
|
3
|
+
|
|
4
|
+
This workflow provides tools for inspecting, categorizing, generating, and
|
|
5
|
+
exporting ReaxFF trainset files used in force-field training and validation.
|
|
6
|
+
|
|
7
|
+
It supports:
|
|
8
|
+
- Reading an existing trainset file and exporting individual sections
|
|
9
|
+
(e.g. charge, heat of formation, geometry, cell parameters, energy)
|
|
10
|
+
as CSV tables for inspection or downstream analysis.
|
|
11
|
+
- Extracting and listing unique group comments (categories) defined in
|
|
12
|
+
trainset sections, with optional sorting and CSV export.
|
|
13
|
+
- Generating a template trainset settings YAML file populated with
|
|
14
|
+
default values for elastic and structural targets.
|
|
15
|
+
- Generating complete elastic-energy trainsets and associated tables
|
|
16
|
+
from either:
|
|
17
|
+
• a user-provided YAML settings file, or
|
|
18
|
+
• Materials Project data via a material ID and API key.
|
|
19
|
+
- Optionally generating and post-processing strained geometry files
|
|
20
|
+
associated with elastic trainset construction.
|
|
21
|
+
|
|
22
|
+
The workflow is designed to bridge high-level training specifications
|
|
23
|
+
(YAML, Materials Project data) with concrete ReaxFF trainset inputs in a
|
|
24
|
+
reproducible, CLI-driven manner.
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
from __future__ import annotations
|
|
29
|
+
|
|
30
|
+
import os
|
|
31
|
+
import argparse
|
|
32
|
+
from pathlib import Path
|
|
33
|
+
from typing import Any, Dict
|
|
34
|
+
|
|
35
|
+
from reaxkit.io.handlers.trainset_handler import TrainsetHandler
|
|
36
|
+
from reaxkit.analysis.per_file.trainset_analyzer import get_trainset_group_comments
|
|
37
|
+
from reaxkit.utils.path import resolve_output_path
|
|
38
|
+
from reaxkit.io.generators.trainset_generator import (
|
|
39
|
+
write_trainset_settings_yaml,
|
|
40
|
+
generate_trainset_from_yaml,
|
|
41
|
+
generate_trainset_settings_yaml_from_mp_simple,
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
# ----------------------------------------------------------------------
|
|
45
|
+
# Task 1: reaxkit trainset get --file ... --section ...
|
|
46
|
+
# ----------------------------------------------------------------------
|
|
47
|
+
def _get_task(args: argparse.Namespace) -> int:
|
|
48
|
+
"""
|
|
49
|
+
Read trainset and save section DataFrames to CSV files.
|
|
50
|
+
"""
|
|
51
|
+
handler = TrainsetHandler(args.file)
|
|
52
|
+
meta: Dict[str, Any] = handler.metadata()
|
|
53
|
+
tables: Dict[str, Any] = meta.get("tables", {})
|
|
54
|
+
|
|
55
|
+
# -------------------------------------
|
|
56
|
+
# Determine output directory
|
|
57
|
+
# -------------------------------------
|
|
58
|
+
if args.export:
|
|
59
|
+
outdir = Path(args.export)
|
|
60
|
+
else:
|
|
61
|
+
outdir = Path("trainset_analysis")
|
|
62
|
+
|
|
63
|
+
outdir.mkdir(parents=True, exist_ok=True)
|
|
64
|
+
|
|
65
|
+
section = args.section.lower()
|
|
66
|
+
|
|
67
|
+
if section == "all":
|
|
68
|
+
items = list(tables.items())
|
|
69
|
+
else:
|
|
70
|
+
try:
|
|
71
|
+
df = handler.section(section)
|
|
72
|
+
except KeyError:
|
|
73
|
+
print(f"[Error] Section '{section}' not found in trainset.")
|
|
74
|
+
return 1
|
|
75
|
+
|
|
76
|
+
canon_name = section.upper()
|
|
77
|
+
if canon_name in ("CELL", "CELL PARAMETERS"):
|
|
78
|
+
canon_name = "CELL_PARAMETERS"
|
|
79
|
+
|
|
80
|
+
items = [(canon_name, df)]
|
|
81
|
+
|
|
82
|
+
stem = Path(args.file).stem
|
|
83
|
+
|
|
84
|
+
if not items:
|
|
85
|
+
print("[Info] No sections found in trainset.")
|
|
86
|
+
return 0
|
|
87
|
+
|
|
88
|
+
for sec_name, df in items:
|
|
89
|
+
if df is None or df.empty:
|
|
90
|
+
print(f"[Skip] Section {sec_name} is empty or not parsed.")
|
|
91
|
+
continue
|
|
92
|
+
|
|
93
|
+
fname = f"{stem}_{sec_name.lower()}.csv"
|
|
94
|
+
outpath = outdir / fname
|
|
95
|
+
df.to_csv(outpath, index=False)
|
|
96
|
+
print(f"[Done] Exported section '{sec_name}' to {outpath}")
|
|
97
|
+
|
|
98
|
+
return 0
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
# ----------------------------------------------------------------------
|
|
102
|
+
# Task 2: reaxkit trainset category --file ... --section ...
|
|
103
|
+
# ----------------------------------------------------------------------
|
|
104
|
+
def _category_task(args: argparse.Namespace) -> int:
|
|
105
|
+
"""
|
|
106
|
+
Print or export unique group comments (categories) for trainset sections.
|
|
107
|
+
"""
|
|
108
|
+
handler = TrainsetHandler(args.file)
|
|
109
|
+
df = get_trainset_group_comments(handler, sort=args.sort) # columns: section, group_comment
|
|
110
|
+
|
|
111
|
+
if df.empty:
|
|
112
|
+
print("[Info] No categories found in trainset.")
|
|
113
|
+
return 0
|
|
114
|
+
|
|
115
|
+
section = args.section.lower()
|
|
116
|
+
|
|
117
|
+
if section != "all":
|
|
118
|
+
df = df[df["section"].str.lower() == section]
|
|
119
|
+
if df.empty:
|
|
120
|
+
print(f"[Info] No categories found for section '{section}'.")
|
|
121
|
+
return 0
|
|
122
|
+
|
|
123
|
+
# ---------------------------------
|
|
124
|
+
# EXPORT OPTION
|
|
125
|
+
# ---------------------------------
|
|
126
|
+
workflow_name = args.kind
|
|
127
|
+
if args.export:
|
|
128
|
+
outpath = resolve_output_path(args.export, workflow_name)
|
|
129
|
+
df.to_csv(outpath, index=False)
|
|
130
|
+
print(f"[Done] Exported categories to: {outpath}")
|
|
131
|
+
return 0
|
|
132
|
+
|
|
133
|
+
# ---------------------------------
|
|
134
|
+
# PRINT OPTION
|
|
135
|
+
# ---------------------------------
|
|
136
|
+
for _, row in df.iterrows():
|
|
137
|
+
print(f"{row['section']} {row['group_comment']}")
|
|
138
|
+
|
|
139
|
+
return 0
|
|
140
|
+
|
|
141
|
+
# ----------------------------------------------------------------------
|
|
142
|
+
# Task 3: reaxkit trainset gen-settings --out ...
|
|
143
|
+
# ----------------------------------------------------------------------
|
|
144
|
+
def _gen_settings_task(args: argparse.Namespace) -> int:
|
|
145
|
+
"""
|
|
146
|
+
Generate a sample trainset settings YAML using default values.
|
|
147
|
+
|
|
148
|
+
Where the generated file is stored:
|
|
149
|
+
- The YAML is written to: <resolved --out path> (typically under reaxkit_outputs/trainset/).
|
|
150
|
+
"""
|
|
151
|
+
base_dir = Path("reaxkit_generated_inputs")
|
|
152
|
+
base_dir.mkdir(parents=True, exist_ok=True)
|
|
153
|
+
|
|
154
|
+
out_yaml = base_dir / args.out
|
|
155
|
+
|
|
156
|
+
write_trainset_settings_yaml(out_path=str(out_yaml))
|
|
157
|
+
|
|
158
|
+
print(f"[Done] Wrote sample settings YAML to: {out_yaml}")
|
|
159
|
+
return 0
|
|
160
|
+
|
|
161
|
+
# ----------------------------------------------------------------------
|
|
162
|
+
# Task 4: reaxkit trainset generate --yaml ... OR --mp-id ... --api-key ...
|
|
163
|
+
# ----------------------------------------------------------------------
|
|
164
|
+
def _generate_task(args: argparse.Namespace) -> int:
|
|
165
|
+
"""
|
|
166
|
+
Generate elastic-energy trainset + tables (and optional geo if geo.enable=true in YAML).
|
|
167
|
+
|
|
168
|
+
Two modes:
|
|
169
|
+
A) Use an existing YAML file: --yaml trainset_settings.yaml
|
|
170
|
+
B) Build YAML from Materials Project: --mp-id mp-XXXX --api-key <KEY> [--bulk-mode vrh]
|
|
171
|
+
|
|
172
|
+
Where the generated files are stored:
|
|
173
|
+
- Elastic-energy trainset + tables are written to: <resolved --out-dir> (typically under reaxkit_outputs/trainset/).
|
|
174
|
+
- Geo outputs (if geo.enable=true) are written under the YAML folder (trainset_generator writes geo to yaml_path.parent).
|
|
175
|
+
"""
|
|
176
|
+
workflow_name = args.kind
|
|
177
|
+
|
|
178
|
+
yaml_path = args.yaml
|
|
179
|
+
|
|
180
|
+
# -------------------------
|
|
181
|
+
# Mode B: build YAML from MP
|
|
182
|
+
# -------------------------
|
|
183
|
+
if not yaml_path:
|
|
184
|
+
if not args.mp_id:
|
|
185
|
+
print("❌ You must provide either --yaml <settings.yaml> OR --mp-id <mp-####>.")
|
|
186
|
+
return 2
|
|
187
|
+
|
|
188
|
+
api_key = args.api_key or os.getenv("MP_API_KEY")
|
|
189
|
+
args.out_dir = f"{args.out_dir}_mp"
|
|
190
|
+
|
|
191
|
+
if not api_key:
|
|
192
|
+
print("❌ Missing Materials Project API key. Provide --api-key or set MP_API_KEY env var.")
|
|
193
|
+
return 2
|
|
194
|
+
|
|
195
|
+
# Where to write the generated YAML (and associated structure files)
|
|
196
|
+
out_yaml = resolve_output_path(args.out_yaml, workflow_name)
|
|
197
|
+
out_yaml_p = Path(out_yaml)
|
|
198
|
+
out_yaml_p.parent.mkdir(parents=True, exist_ok=True)
|
|
199
|
+
|
|
200
|
+
structure_dir = args.structure_dir or str(out_yaml_p.parent / "downloaded_structures")
|
|
201
|
+
Path(structure_dir).mkdir(parents=True, exist_ok=True)
|
|
202
|
+
|
|
203
|
+
res = generate_trainset_settings_yaml_from_mp_simple(
|
|
204
|
+
mp_id=args.mp_id,
|
|
205
|
+
out_yaml=out_yaml,
|
|
206
|
+
structure_dir=structure_dir,
|
|
207
|
+
bulk_mode=args.bulk_mode,
|
|
208
|
+
api_key=api_key,
|
|
209
|
+
verbose=bool(args.verbose),
|
|
210
|
+
)
|
|
211
|
+
|
|
212
|
+
yaml_path = res["yaml"]
|
|
213
|
+
print(f"\n[Done] Generated settings from Materials Project:")
|
|
214
|
+
print(f" YAML: {res['yaml']}")
|
|
215
|
+
print(f" CIF: {res['cif']}")
|
|
216
|
+
print(f" XYZ: {res['xyz']}\n")
|
|
217
|
+
|
|
218
|
+
else:
|
|
219
|
+
args.out_dir = f"{args.out_dir}_yaml"
|
|
220
|
+
|
|
221
|
+
# -------------------------
|
|
222
|
+
# Run YAML -> trainset + tables (+ optional geo)
|
|
223
|
+
# -------------------------
|
|
224
|
+
|
|
225
|
+
out_dir = resolve_output_path(args.out_dir, workflow_name)
|
|
226
|
+
Path(out_dir).mkdir(parents=True, exist_ok=True)
|
|
227
|
+
|
|
228
|
+
generate_trainset_from_yaml(yaml_path=yaml_path, out_dir=out_dir)
|
|
229
|
+
|
|
230
|
+
print(f"[Done] Elastic-energy trainset + tables written to: {out_dir}")
|
|
231
|
+
print(f"[Info] Geo outputs (if enabled in YAML) are written under the same folder in two separate sub-folders:\n"
|
|
232
|
+
f" geo_strained and xyz_strained which contain .bgf and .xyz files, respectively.")
|
|
233
|
+
|
|
234
|
+
# ------------------------------------------------------------------
|
|
235
|
+
# Concatenate all strained geo (.bgf) files into one
|
|
236
|
+
# ------------------------------------------------------------------
|
|
237
|
+
geo_dir = Path(out_dir) / "geo_strained"
|
|
238
|
+
all_geo_file = geo_dir / "all_trainset_geo.bgf"
|
|
239
|
+
|
|
240
|
+
if geo_dir.exists():
|
|
241
|
+
bgf_files = sorted(geo_dir.glob("*.bgf"))
|
|
242
|
+
|
|
243
|
+
if bgf_files:
|
|
244
|
+
with open(all_geo_file, "w") as fout:
|
|
245
|
+
for bgf in bgf_files:
|
|
246
|
+
fout.write(f"# ===== BEGIN {bgf.name} =====\n")
|
|
247
|
+
with open(bgf, "r") as fin:
|
|
248
|
+
fout.write(fin.read())
|
|
249
|
+
fout.write(f"\n# ===== END {bgf.name} =====\n\n")
|
|
250
|
+
|
|
251
|
+
print(
|
|
252
|
+
f"[Post] All strained geometry (.bgf) files were concatenated into: all_trainset_geo.bgf"
|
|
253
|
+
)
|
|
254
|
+
else:
|
|
255
|
+
print("[Post] geo_strained folder exists but contains no .bgf files.")
|
|
256
|
+
else:
|
|
257
|
+
print("[Post] No geo_strained folder found; skipping geometry concatenation.")
|
|
258
|
+
|
|
259
|
+
return 0
|
|
260
|
+
|
|
261
|
+
# ----------------------------------------------------------------------
|
|
262
|
+
# Register tasks with the CLI
|
|
263
|
+
# ----------------------------------------------------------------------
|
|
264
|
+
|
|
265
|
+
def register_tasks(subparsers: argparse._SubParsersAction) -> None:
|
|
266
|
+
# ---- get ---- (existing)
|
|
267
|
+
p_get = subparsers.add_parser(
|
|
268
|
+
"get",
|
|
269
|
+
help="Save trainset sections as CSV files. \n",
|
|
270
|
+
description=(
|
|
271
|
+
"Examples:\n"
|
|
272
|
+
" reaxkit trainset get --section all --export reaxkit_outputs/trainset\n"
|
|
273
|
+
),
|
|
274
|
+
formatter_class=argparse.RawTextHelpFormatter,
|
|
275
|
+
)
|
|
276
|
+
p_get.add_argument("--file", default="trainset.in", help="Path to trainset/fort.99 file")
|
|
277
|
+
p_get.add_argument("--section", default="all",
|
|
278
|
+
help="Section to export: all, charge, heatfo, geometry, cell_parameters, energy")
|
|
279
|
+
p_get.add_argument("--export", help="Directory to save CSVs into (default: trainset_analysis/)")
|
|
280
|
+
p_get.set_defaults(_run=_get_task)
|
|
281
|
+
|
|
282
|
+
# ---- category ---- (existing)
|
|
283
|
+
p_cat = subparsers.add_parser(
|
|
284
|
+
"category",
|
|
285
|
+
help="List or export unique trainset categories (group comments) || ",
|
|
286
|
+
description=(
|
|
287
|
+
"Examples:\n"
|
|
288
|
+
" reaxkit trainset category --section all --export trainset_categories.csv\n"
|
|
289
|
+
" reaxkit trainset category --section all --sort\n"
|
|
290
|
+
" reaxkit trainset category --section energy --export energy_categories.csv\n"
|
|
291
|
+
),
|
|
292
|
+
formatter_class=argparse.RawTextHelpFormatter,
|
|
293
|
+
)
|
|
294
|
+
p_cat.add_argument("--file", default="trainset.in", help="Path to trainset/fort.99 file")
|
|
295
|
+
p_cat.add_argument("--section", default="all",
|
|
296
|
+
help="Section to analyze: all, charge, heatfo, geometry, cell_parameters, energy",
|
|
297
|
+
)
|
|
298
|
+
p_cat.add_argument("--export", help="Optional CSV file to write categories into (e.g. trainset_categories.csv)")
|
|
299
|
+
p_cat.add_argument("--sort", action="store_true", help="Sort labels alphabetically (default: off)")
|
|
300
|
+
p_cat.set_defaults(_run=_category_task)
|
|
301
|
+
|
|
302
|
+
# ------------------------------------------------------------------
|
|
303
|
+
# gen-settings
|
|
304
|
+
# ------------------------------------------------------------------
|
|
305
|
+
p_gens = subparsers.add_parser(
|
|
306
|
+
"gen-settings",
|
|
307
|
+
help="Generate a sample trainset settings YAML (default values).",
|
|
308
|
+
description=(
|
|
309
|
+
"Examples:\n"
|
|
310
|
+
" reaxkit trainset gen-settings\n"
|
|
311
|
+
" reaxkit trainset gen-settings --out reaxkit_outputs/trainset/trainset_settings.yaml\n"
|
|
312
|
+
),
|
|
313
|
+
formatter_class=argparse.RawTextHelpFormatter,
|
|
314
|
+
)
|
|
315
|
+
p_gens.add_argument(
|
|
316
|
+
"--out",
|
|
317
|
+
default="trainset_settings.yaml",
|
|
318
|
+
help="Output YAML filename/path (resolved under reaxkit_outputs/trainset/ if relative).",
|
|
319
|
+
)
|
|
320
|
+
p_gens.set_defaults(_run=_gen_settings_task)
|
|
321
|
+
|
|
322
|
+
# ------------------------------------------------------------------
|
|
323
|
+
# generate
|
|
324
|
+
# ------------------------------------------------------------------
|
|
325
|
+
p_gen = subparsers.add_parser(
|
|
326
|
+
"generate",
|
|
327
|
+
help="Generate elastic-energy trainset + tables (and optional geo) from YAML or Materials Project.",
|
|
328
|
+
description=(
|
|
329
|
+
"YAML mode:\n"
|
|
330
|
+
" reaxkit trainset generate --yaml trainset_settings.yaml\n"
|
|
331
|
+
"\n"
|
|
332
|
+
"Materials Project mode:\n"
|
|
333
|
+
" reaxkit trainset generate --mp-id mp-661 --api-key YOUR_KEY\n"
|
|
334
|
+
),
|
|
335
|
+
formatter_class=argparse.RawTextHelpFormatter,
|
|
336
|
+
)
|
|
337
|
+
|
|
338
|
+
# Mode A
|
|
339
|
+
p_gen.add_argument("--yaml", default=None, help="Path to an existing trainset_settings.yaml file.")
|
|
340
|
+
|
|
341
|
+
# Mode B
|
|
342
|
+
p_gen.add_argument("--mp-id", default=None, help="Materials Project material id (e.g., mp-661).")
|
|
343
|
+
p_gen.add_argument("--api-key", default=None, help="Materials Project API key (or set MP_API_KEY env var).")
|
|
344
|
+
p_gen.add_argument("--bulk-mode", default="voigt", choices=["voigt", "reuss", "vrh"],
|
|
345
|
+
help="Which MP bulk modulus to use (default: vrh).")
|
|
346
|
+
p_gen.add_argument("--out-yaml", default="reaxkit_generated_inputs/trainset_mp/trainset_settings_mp.yaml",
|
|
347
|
+
help="Where to write the generated YAML in MP mode (resolved under outputs if relative).")
|
|
348
|
+
p_gen.add_argument("--structure-dir", default=None,
|
|
349
|
+
help="Directory to write MP-downloaded structure files (default: next to out-yaml).")
|
|
350
|
+
p_gen.add_argument("--verbose", action="store_true", help="Verbose MP fetching/logging.")
|
|
351
|
+
|
|
352
|
+
# Common output
|
|
353
|
+
p_gen.add_argument("--out-dir", default="reaxkit_generated_inputs/trainset",
|
|
354
|
+
help="Directory to write elastic-energy trainset + tables (resolved under outputs if relative).")
|
|
355
|
+
|
|
356
|
+
p_gen.set_defaults(_run=_generate_task)
|