reaxkit 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (130) hide show
  1. reaxkit/__init__.py +0 -0
  2. reaxkit/analysis/__init__.py +0 -0
  3. reaxkit/analysis/composed/RDF_analyzer.py +560 -0
  4. reaxkit/analysis/composed/__init__.py +0 -0
  5. reaxkit/analysis/composed/connectivity_analyzer.py +706 -0
  6. reaxkit/analysis/composed/coordination_analyzer.py +144 -0
  7. reaxkit/analysis/composed/electrostatics_analyzer.py +687 -0
  8. reaxkit/analysis/per_file/__init__.py +0 -0
  9. reaxkit/analysis/per_file/control_analyzer.py +165 -0
  10. reaxkit/analysis/per_file/eregime_analyzer.py +108 -0
  11. reaxkit/analysis/per_file/ffield_analyzer.py +305 -0
  12. reaxkit/analysis/per_file/fort13_analyzer.py +79 -0
  13. reaxkit/analysis/per_file/fort57_analyzer.py +106 -0
  14. reaxkit/analysis/per_file/fort73_analyzer.py +61 -0
  15. reaxkit/analysis/per_file/fort74_analyzer.py +65 -0
  16. reaxkit/analysis/per_file/fort76_analyzer.py +191 -0
  17. reaxkit/analysis/per_file/fort78_analyzer.py +154 -0
  18. reaxkit/analysis/per_file/fort79_analyzer.py +83 -0
  19. reaxkit/analysis/per_file/fort7_analyzer.py +393 -0
  20. reaxkit/analysis/per_file/fort99_analyzer.py +411 -0
  21. reaxkit/analysis/per_file/molfra_analyzer.py +359 -0
  22. reaxkit/analysis/per_file/params_analyzer.py +258 -0
  23. reaxkit/analysis/per_file/summary_analyzer.py +84 -0
  24. reaxkit/analysis/per_file/trainset_analyzer.py +84 -0
  25. reaxkit/analysis/per_file/vels_analyzer.py +95 -0
  26. reaxkit/analysis/per_file/xmolout_analyzer.py +528 -0
  27. reaxkit/cli.py +181 -0
  28. reaxkit/count_loc.py +276 -0
  29. reaxkit/data/alias.yaml +89 -0
  30. reaxkit/data/constants.yaml +27 -0
  31. reaxkit/data/reaxff_input_files_contents.yaml +186 -0
  32. reaxkit/data/reaxff_output_files_contents.yaml +301 -0
  33. reaxkit/data/units.yaml +38 -0
  34. reaxkit/help/__init__.py +0 -0
  35. reaxkit/help/help_index_loader.py +531 -0
  36. reaxkit/help/introspection_utils.py +131 -0
  37. reaxkit/io/__init__.py +0 -0
  38. reaxkit/io/base_handler.py +165 -0
  39. reaxkit/io/generators/__init__.py +0 -0
  40. reaxkit/io/generators/control_generator.py +123 -0
  41. reaxkit/io/generators/eregime_generator.py +341 -0
  42. reaxkit/io/generators/geo_generator.py +967 -0
  43. reaxkit/io/generators/trainset_generator.py +1758 -0
  44. reaxkit/io/generators/tregime_generator.py +113 -0
  45. reaxkit/io/generators/vregime_generator.py +164 -0
  46. reaxkit/io/generators/xmolout_generator.py +304 -0
  47. reaxkit/io/handlers/__init__.py +0 -0
  48. reaxkit/io/handlers/control_handler.py +209 -0
  49. reaxkit/io/handlers/eregime_handler.py +122 -0
  50. reaxkit/io/handlers/ffield_handler.py +812 -0
  51. reaxkit/io/handlers/fort13_handler.py +123 -0
  52. reaxkit/io/handlers/fort57_handler.py +143 -0
  53. reaxkit/io/handlers/fort73_handler.py +145 -0
  54. reaxkit/io/handlers/fort74_handler.py +155 -0
  55. reaxkit/io/handlers/fort76_handler.py +195 -0
  56. reaxkit/io/handlers/fort78_handler.py +142 -0
  57. reaxkit/io/handlers/fort79_handler.py +227 -0
  58. reaxkit/io/handlers/fort7_handler.py +264 -0
  59. reaxkit/io/handlers/fort99_handler.py +128 -0
  60. reaxkit/io/handlers/geo_handler.py +224 -0
  61. reaxkit/io/handlers/molfra_handler.py +184 -0
  62. reaxkit/io/handlers/params_handler.py +137 -0
  63. reaxkit/io/handlers/summary_handler.py +135 -0
  64. reaxkit/io/handlers/trainset_handler.py +658 -0
  65. reaxkit/io/handlers/vels_handler.py +293 -0
  66. reaxkit/io/handlers/xmolout_handler.py +174 -0
  67. reaxkit/utils/__init__.py +0 -0
  68. reaxkit/utils/alias.py +219 -0
  69. reaxkit/utils/cache.py +77 -0
  70. reaxkit/utils/constants.py +75 -0
  71. reaxkit/utils/equation_of_states.py +96 -0
  72. reaxkit/utils/exceptions.py +27 -0
  73. reaxkit/utils/frame_utils.py +175 -0
  74. reaxkit/utils/log.py +43 -0
  75. reaxkit/utils/media/__init__.py +0 -0
  76. reaxkit/utils/media/convert.py +90 -0
  77. reaxkit/utils/media/make_video.py +91 -0
  78. reaxkit/utils/media/plotter.py +812 -0
  79. reaxkit/utils/numerical/__init__.py +0 -0
  80. reaxkit/utils/numerical/extrema_finder.py +96 -0
  81. reaxkit/utils/numerical/moving_average.py +103 -0
  82. reaxkit/utils/numerical/numerical_calcs.py +75 -0
  83. reaxkit/utils/numerical/signal_ops.py +135 -0
  84. reaxkit/utils/path.py +55 -0
  85. reaxkit/utils/units.py +104 -0
  86. reaxkit/webui/__init__.py +0 -0
  87. reaxkit/webui/app.py +0 -0
  88. reaxkit/webui/components.py +0 -0
  89. reaxkit/webui/layouts.py +0 -0
  90. reaxkit/webui/utils.py +0 -0
  91. reaxkit/workflows/__init__.py +0 -0
  92. reaxkit/workflows/composed/__init__.py +0 -0
  93. reaxkit/workflows/composed/coordination_workflow.py +393 -0
  94. reaxkit/workflows/composed/electrostatics_workflow.py +587 -0
  95. reaxkit/workflows/composed/xmolout_fort7_workflow.py +343 -0
  96. reaxkit/workflows/meta/__init__.py +0 -0
  97. reaxkit/workflows/meta/help_workflow.py +136 -0
  98. reaxkit/workflows/meta/introspection_workflow.py +235 -0
  99. reaxkit/workflows/meta/make_video_workflow.py +61 -0
  100. reaxkit/workflows/meta/plotter_workflow.py +601 -0
  101. reaxkit/workflows/per_file/__init__.py +0 -0
  102. reaxkit/workflows/per_file/control_workflow.py +110 -0
  103. reaxkit/workflows/per_file/eregime_workflow.py +267 -0
  104. reaxkit/workflows/per_file/ffield_workflow.py +390 -0
  105. reaxkit/workflows/per_file/fort13_workflow.py +86 -0
  106. reaxkit/workflows/per_file/fort57_workflow.py +137 -0
  107. reaxkit/workflows/per_file/fort73_workflow.py +151 -0
  108. reaxkit/workflows/per_file/fort74_workflow.py +88 -0
  109. reaxkit/workflows/per_file/fort76_workflow.py +188 -0
  110. reaxkit/workflows/per_file/fort78_workflow.py +135 -0
  111. reaxkit/workflows/per_file/fort79_workflow.py +314 -0
  112. reaxkit/workflows/per_file/fort7_workflow.py +592 -0
  113. reaxkit/workflows/per_file/fort83_workflow.py +60 -0
  114. reaxkit/workflows/per_file/fort99_workflow.py +223 -0
  115. reaxkit/workflows/per_file/geo_workflow.py +554 -0
  116. reaxkit/workflows/per_file/molfra_workflow.py +577 -0
  117. reaxkit/workflows/per_file/params_workflow.py +135 -0
  118. reaxkit/workflows/per_file/summary_workflow.py +161 -0
  119. reaxkit/workflows/per_file/trainset_workflow.py +356 -0
  120. reaxkit/workflows/per_file/tregime_workflow.py +79 -0
  121. reaxkit/workflows/per_file/vels_workflow.py +309 -0
  122. reaxkit/workflows/per_file/vregime_workflow.py +75 -0
  123. reaxkit/workflows/per_file/xmolout_workflow.py +678 -0
  124. reaxkit-1.0.0.dist-info/METADATA +128 -0
  125. reaxkit-1.0.0.dist-info/RECORD +130 -0
  126. reaxkit-1.0.0.dist-info/WHEEL +5 -0
  127. reaxkit-1.0.0.dist-info/entry_points.txt +2 -0
  128. reaxkit-1.0.0.dist-info/licenses/AUTHORS.md +20 -0
  129. reaxkit-1.0.0.dist-info/licenses/LICENSE +21 -0
  130. reaxkit-1.0.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,135 @@
1
+ """
2
+ Params-file workflow for ReaxKit.
3
+
4
+ This workflow provides tools for inspecting and exporting ReaxFF `params` files,
5
+ which define optimization parameters and search intervals used during
6
+ force-field training.
7
+
8
+ It supports:
9
+ - Loading the raw params table with optional duplicate removal and flexible
10
+ column-based sorting.
11
+ - Interpreting params entries by resolving their references into the
12
+ corresponding sections and rows of the `ffield` file.
13
+ - Optionally constructing human-readable chemical terms (e.g. C–C–H) during
14
+ interpretation for improved readability.
15
+ - Exporting processed params data to CSV for downstream analysis, auditing,
16
+ or force-field development workflows.
17
+
18
+ The workflow is designed to bridge low-level optimization parameter definitions
19
+ with interpretable force-field context in a reproducible, CLI-driven manner.
20
+ """
21
+
22
+ from __future__ import annotations
23
+
24
+ import argparse
25
+
26
+ from reaxkit.io.handlers.params_handler import ParamsHandler
27
+ from reaxkit.io.handlers.ffield_handler import FFieldHandler
28
+
29
+ from reaxkit.analysis.per_file.params_analyzer import get_params_data, interpret_params
30
+ from reaxkit.utils.path import resolve_output_path
31
+
32
+
33
+ def _task_get(args: argparse.Namespace) -> int:
34
+ params_handler = ParamsHandler(args.file)
35
+
36
+ if args.interpret:
37
+ # Interpreted params require ffield
38
+ ffield_handler = FFieldHandler(args.ffield)
39
+ df = interpret_params(
40
+ params_handler=params_handler,
41
+ ffield_handler=ffield_handler,
42
+ add_term=(not args.no_term),
43
+ )
44
+
45
+ # Match default "drop duplicates" behavior to raw get()
46
+ if not args.keep_duplicates:
47
+ df = df.drop_duplicates(
48
+ subset=["ff_section", "ff_section_line", "ff_parameter"],
49
+ keep="first",
50
+ )
51
+ else:
52
+ df = get_params_data(
53
+ params_handler,
54
+ sort_by=None, # no sorting by default (handled below if user requests)
55
+ ascending=True,
56
+ drop_duplicate=(not args.keep_duplicates),
57
+ )
58
+
59
+ # Optional sorting (default is none)
60
+ if args.sort_by:
61
+ if args.sort_by not in df.columns:
62
+ raise SystemExit(
63
+ f"❌ sort-by column '{args.sort_by}' not found. Available: {', '.join(df.columns)}"
64
+ )
65
+ df = df.sort_values(by=args.sort_by, ascending=(not args.descending))
66
+
67
+ # Export or preview
68
+ if args.export:
69
+ out = resolve_output_path(args.export, workflow="params")
70
+ df.to_csv(out, index=False)
71
+ print(f"[Done] Exported the requested data to {out}")
72
+ else:
73
+ print(df.head(20).to_string(index=False))
74
+
75
+ return 0
76
+
77
+ #####################################################################################
78
+
79
+ def _add_common_params_io_args(p: argparse.ArgumentParser) -> None:
80
+ # Core IO
81
+ p.add_argument("--file", default="params", help="Path to params file.")
82
+ p.add_argument("--export", default=None, help="Path to export CSV data.")
83
+
84
+ # Default behavior requested:
85
+ # - remove duplicates by default
86
+ # - no sorting by default
87
+ p.add_argument(
88
+ "--keep-duplicates",
89
+ action="store_true",
90
+ help="If set, do NOT drop duplicates (default drops duplicates).",
91
+ )
92
+ p.add_argument(
93
+ "--sort-by",
94
+ default=None,
95
+ help="Optional column name to sort by (default: no sorting).",
96
+ )
97
+ p.add_argument(
98
+ "--descending",
99
+ action="store_true",
100
+ help="If set, sort in descending order (only if --sort-by is used).",
101
+ )
102
+
103
+ def register_tasks(subparsers: argparse._SubParsersAction) -> None:
104
+ p = subparsers.add_parser(
105
+ "get",
106
+ help="Load params table (optionally interpret pointers into ffield)",
107
+ description=(
108
+ "Examples:\n"
109
+ " reaxkit params get --export params.csv\n"
110
+ "\n"
111
+ "Interpreted params:\n"
112
+ " reaxkit params get --interpret --export params_interpreted.csv\n"
113
+ ),
114
+ formatter_class=argparse.RawTextHelpFormatter,
115
+ )
116
+
117
+ _add_common_params_io_args(p)
118
+
119
+ p.add_argument(
120
+ "--interpret",
121
+ action="store_true",
122
+ help="If set, interpret params pointers into the ffield (adds section/row/param/value/term columns).",
123
+ )
124
+ p.add_argument(
125
+ "--ffield",
126
+ default="ffield",
127
+ help="Path to ffield file (required when --interpret is set).",
128
+ )
129
+ p.add_argument(
130
+ "--no-term",
131
+ action="store_true",
132
+ help="If set, do not build readable term (e.g., C-C-H) during interpretation.",
133
+ )
134
+
135
+ p.set_defaults(_run=_task_get)
@@ -0,0 +1,161 @@
1
+ """
2
+ summary.txt analysis workflow for ReaxKit.
3
+
4
+ This workflow provides tools for reading, analyzing, and visualizing data from
5
+ ReaxFF `summary.txt` files, which contain per-iteration thermodynamic and
6
+ simulation summary quantities.
7
+
8
+ It supports:
9
+ - Extracting a selected summary column (with alias support) as a function of
10
+ iteration, frame index, or physical time.
11
+ - Converting the x-axis between iteration, frame, and time using control-file
12
+ metadata.
13
+ - Selecting subsets of frames for focused analysis.
14
+ - Plotting summary quantities, saving figures, or exporting the processed data
15
+ to CSV using standardized output paths.
16
+
17
+ The workflow is designed for quick inspection and post-processing of ReaxFF
18
+ summary outputs, enabling reproducible analysis of thermodynamic and
19
+ simulation-wide properties.
20
+ """
21
+
22
+
23
+ from __future__ import annotations
24
+ import argparse
25
+ from typing import Optional, Sequence, Union
26
+ import pandas as pd
27
+ from reaxkit.utils.units import unit_for
28
+ from reaxkit.utils.media.plotter import single_plot
29
+ from reaxkit.utils.media.convert import convert_xaxis
30
+ from reaxkit.utils.frame_utils import parse_frames, select_frames
31
+ from reaxkit.utils.path import resolve_output_path
32
+ from reaxkit.io.handlers.summary_handler import SummaryHandler
33
+ from reaxkit.utils.alias import available_keys
34
+ from reaxkit.analysis.per_file.summary_analyzer import get_summary_data
35
+
36
+ FramesT = Optional[Union[slice, Sequence[int]]]
37
+
38
+
39
+ def _summary_get_task(args: argparse.Namespace) -> int:
40
+ handler = SummaryHandler(args.file)
41
+ df = handler.dataframe().copy()
42
+
43
+ # --- X axis: convert from 'iter' using convert_xaxis ---
44
+ if "iter" not in df.columns:
45
+ raise KeyError("Expected 'iter' column in parsed summary data.")
46
+ xvals, xlabel = convert_xaxis(df["iter"].to_numpy(), args.xaxis)
47
+
48
+ # --- Y axis: use analyzer-level helper for alias resolution ---
49
+ try:
50
+ y_series = get_summary_data(handler, args.yaxis) # handles aliases + fallbacks
51
+ except KeyError as e:
52
+ # (optional) just re-raise, message already includes available keys
53
+ # from summary_analyzer.get_summary
54
+ raise e
55
+
56
+ # Name of the resolved column (canonical or actual df column)
57
+ ycol = y_series.name or args.yaxis
58
+
59
+ # Build working DataFrame with aligned index
60
+ work = pd.DataFrame(
61
+ {
62
+ "x": pd.Series(xvals, index=df.index),
63
+ "y": y_series,
64
+ }
65
+ )
66
+
67
+ # --- Frame selection ---
68
+ frames = parse_frames(args.frames)
69
+ work = select_frames(work, frames)
70
+
71
+ workflow_name = args.kind
72
+
73
+ # --- Export CSV ---
74
+ if args.export:
75
+ out = resolve_output_path(args.export, workflow_name)
76
+ work.rename(columns={"x": xlabel, "y": ycol}).to_csv(out, index=False)
77
+ print(f'[Done] successfully saved the data in {out}')
78
+
79
+ # --- Save figure (no show) ---
80
+ u = unit_for(args.yaxis) or unit_for(ycol)
81
+ if args.save:
82
+ out = resolve_output_path(args.save, workflow_name)
83
+ single_plot(
84
+ work["x"],
85
+ work["y"],
86
+ title=f"{ycol} vs {xlabel}",
87
+ xlabel=xlabel,
88
+ ylabel=f"{ycol} ({u})" if u else ycol,
89
+ save=out,
90
+ )
91
+
92
+ # --- Plot interactively ---
93
+ if args.plot:
94
+ single_plot(
95
+ work["x"],
96
+ work["y"],
97
+ title=f"{ycol} vs {xlabel}",
98
+ xlabel=xlabel,
99
+ ylabel=f"{ycol} ({u})" if u else ycol,
100
+ save=None,
101
+ )
102
+
103
+ # --- No action fallback ---
104
+ if not args.plot and not args.save and not args.export:
105
+ print("ℹ️ No action selected. Use one or more of --plot, --save, --export.")
106
+ print("Available keys:", ", ".join(available_keys(df.columns)))
107
+
108
+ return 0
109
+
110
+
111
+ def _wire_get_flags(p: argparse.ArgumentParser) -> None:
112
+ p.add_argument("--file", default="summary.txt", help="Path to summary file")
113
+ p.add_argument(
114
+ "--xaxis",
115
+ default="time",
116
+ choices=["time", "iter", "frame"],
117
+ help="X-axis domain (default: time)",
118
+ )
119
+ p.add_argument(
120
+ "--yaxis",
121
+ required=True,
122
+ help="Y-axis feature/column (aliases allowed, e.g., 'E_potential' → 'E_pot')",
123
+ )
124
+ p.add_argument(
125
+ "--frames",
126
+ default=None,
127
+ help="Frames to select: 'start:stop[:step]' or 'i,j,k' (default: all)",
128
+ )
129
+ p.add_argument("--plot", action="store_true", help="Show the plot interactively.")
130
+ p.add_argument(
131
+ "--save",
132
+ default=None,
133
+ help="Save the plot to a file (without showing). Provide a path.",
134
+ )
135
+ p.add_argument(
136
+ "--export",
137
+ default=None,
138
+ help="Export the data to CSV. Provide a path.",
139
+ )
140
+ p.set_defaults(_run=_summary_get_task)
141
+
142
+
143
+ def register_tasks(subparsers: argparse._SubParsersAction) -> None:
144
+ """
145
+ Register 'summary' tasks. get can be used for example to plot potential energy vs time (auto-scaled fs/ps/ns).
146
+ """
147
+ p = subparsers.add_parser(
148
+ "get",
149
+ help="Extract a column and optionally plot/save/export it.",
150
+ description=(
151
+ "Examples:\n"
152
+ " reaxkit summary get --yaxis E_pot --xaxis time --plot\n"
153
+ " reaxkit summary get --file summary.txt --yaxis T --xaxis iter "
154
+ "--frames 0:400:5 --save summary_T_vs_iter.png --export summary_T_vs_iter.csv"
155
+ ),
156
+ formatter_class=argparse.RawTextHelpFormatter,
157
+ )
158
+ _wire_get_flags(p)
159
+
160
+
161
+
@@ -0,0 +1,356 @@
1
+ """
2
+ Trainset workflow for ReaxKit.
3
+
4
+ This workflow provides tools for inspecting, categorizing, generating, and
5
+ exporting ReaxFF trainset files used in force-field training and validation.
6
+
7
+ It supports:
8
+ - Reading an existing trainset file and exporting individual sections
9
+ (e.g. charge, heat of formation, geometry, cell parameters, energy)
10
+ as CSV tables for inspection or downstream analysis.
11
+ - Extracting and listing unique group comments (categories) defined in
12
+ trainset sections, with optional sorting and CSV export.
13
+ - Generating a template trainset settings YAML file populated with
14
+ default values for elastic and structural targets.
15
+ - Generating complete elastic-energy trainsets and associated tables
16
+ from either:
17
+ • a user-provided YAML settings file, or
18
+ • Materials Project data via a material ID and API key.
19
+ - Optionally generating and post-processing strained geometry files
20
+ associated with elastic trainset construction.
21
+
22
+ The workflow is designed to bridge high-level training specifications
23
+ (YAML, Materials Project data) with concrete ReaxFF trainset inputs in a
24
+ reproducible, CLI-driven manner.
25
+ """
26
+
27
+
28
+ from __future__ import annotations
29
+
30
+ import os
31
+ import argparse
32
+ from pathlib import Path
33
+ from typing import Any, Dict
34
+
35
+ from reaxkit.io.handlers.trainset_handler import TrainsetHandler
36
+ from reaxkit.analysis.per_file.trainset_analyzer import get_trainset_group_comments
37
+ from reaxkit.utils.path import resolve_output_path
38
+ from reaxkit.io.generators.trainset_generator import (
39
+ write_trainset_settings_yaml,
40
+ generate_trainset_from_yaml,
41
+ generate_trainset_settings_yaml_from_mp_simple,
42
+ )
43
+
44
+ # ----------------------------------------------------------------------
45
+ # Task 1: reaxkit trainset get --file ... --section ...
46
+ # ----------------------------------------------------------------------
47
+ def _get_task(args: argparse.Namespace) -> int:
48
+ """
49
+ Read trainset and save section DataFrames to CSV files.
50
+ """
51
+ handler = TrainsetHandler(args.file)
52
+ meta: Dict[str, Any] = handler.metadata()
53
+ tables: Dict[str, Any] = meta.get("tables", {})
54
+
55
+ # -------------------------------------
56
+ # Determine output directory
57
+ # -------------------------------------
58
+ if args.export:
59
+ outdir = Path(args.export)
60
+ else:
61
+ outdir = Path("trainset_analysis")
62
+
63
+ outdir.mkdir(parents=True, exist_ok=True)
64
+
65
+ section = args.section.lower()
66
+
67
+ if section == "all":
68
+ items = list(tables.items())
69
+ else:
70
+ try:
71
+ df = handler.section(section)
72
+ except KeyError:
73
+ print(f"[Error] Section '{section}' not found in trainset.")
74
+ return 1
75
+
76
+ canon_name = section.upper()
77
+ if canon_name in ("CELL", "CELL PARAMETERS"):
78
+ canon_name = "CELL_PARAMETERS"
79
+
80
+ items = [(canon_name, df)]
81
+
82
+ stem = Path(args.file).stem
83
+
84
+ if not items:
85
+ print("[Info] No sections found in trainset.")
86
+ return 0
87
+
88
+ for sec_name, df in items:
89
+ if df is None or df.empty:
90
+ print(f"[Skip] Section {sec_name} is empty or not parsed.")
91
+ continue
92
+
93
+ fname = f"{stem}_{sec_name.lower()}.csv"
94
+ outpath = outdir / fname
95
+ df.to_csv(outpath, index=False)
96
+ print(f"[Done] Exported section '{sec_name}' to {outpath}")
97
+
98
+ return 0
99
+
100
+
101
+ # ----------------------------------------------------------------------
102
+ # Task 2: reaxkit trainset category --file ... --section ...
103
+ # ----------------------------------------------------------------------
104
+ def _category_task(args: argparse.Namespace) -> int:
105
+ """
106
+ Print or export unique group comments (categories) for trainset sections.
107
+ """
108
+ handler = TrainsetHandler(args.file)
109
+ df = get_trainset_group_comments(handler, sort=args.sort) # columns: section, group_comment
110
+
111
+ if df.empty:
112
+ print("[Info] No categories found in trainset.")
113
+ return 0
114
+
115
+ section = args.section.lower()
116
+
117
+ if section != "all":
118
+ df = df[df["section"].str.lower() == section]
119
+ if df.empty:
120
+ print(f"[Info] No categories found for section '{section}'.")
121
+ return 0
122
+
123
+ # ---------------------------------
124
+ # EXPORT OPTION
125
+ # ---------------------------------
126
+ workflow_name = args.kind
127
+ if args.export:
128
+ outpath = resolve_output_path(args.export, workflow_name)
129
+ df.to_csv(outpath, index=False)
130
+ print(f"[Done] Exported categories to: {outpath}")
131
+ return 0
132
+
133
+ # ---------------------------------
134
+ # PRINT OPTION
135
+ # ---------------------------------
136
+ for _, row in df.iterrows():
137
+ print(f"{row['section']} {row['group_comment']}")
138
+
139
+ return 0
140
+
141
+ # ----------------------------------------------------------------------
142
+ # Task 3: reaxkit trainset gen-settings --out ...
143
+ # ----------------------------------------------------------------------
144
+ def _gen_settings_task(args: argparse.Namespace) -> int:
145
+ """
146
+ Generate a sample trainset settings YAML using default values.
147
+
148
+ Where the generated file is stored:
149
+ - The YAML is written to: <resolved --out path> (typically under reaxkit_outputs/trainset/).
150
+ """
151
+ base_dir = Path("reaxkit_generated_inputs")
152
+ base_dir.mkdir(parents=True, exist_ok=True)
153
+
154
+ out_yaml = base_dir / args.out
155
+
156
+ write_trainset_settings_yaml(out_path=str(out_yaml))
157
+
158
+ print(f"[Done] Wrote sample settings YAML to: {out_yaml}")
159
+ return 0
160
+
161
+ # ----------------------------------------------------------------------
162
+ # Task 4: reaxkit trainset generate --yaml ... OR --mp-id ... --api-key ...
163
+ # ----------------------------------------------------------------------
164
+ def _generate_task(args: argparse.Namespace) -> int:
165
+ """
166
+ Generate elastic-energy trainset + tables (and optional geo if geo.enable=true in YAML).
167
+
168
+ Two modes:
169
+ A) Use an existing YAML file: --yaml trainset_settings.yaml
170
+ B) Build YAML from Materials Project: --mp-id mp-XXXX --api-key <KEY> [--bulk-mode vrh]
171
+
172
+ Where the generated files are stored:
173
+ - Elastic-energy trainset + tables are written to: <resolved --out-dir> (typically under reaxkit_outputs/trainset/).
174
+ - Geo outputs (if geo.enable=true) are written under the YAML folder (trainset_generator writes geo to yaml_path.parent).
175
+ """
176
+ workflow_name = args.kind
177
+
178
+ yaml_path = args.yaml
179
+
180
+ # -------------------------
181
+ # Mode B: build YAML from MP
182
+ # -------------------------
183
+ if not yaml_path:
184
+ if not args.mp_id:
185
+ print("❌ You must provide either --yaml <settings.yaml> OR --mp-id <mp-####>.")
186
+ return 2
187
+
188
+ api_key = args.api_key or os.getenv("MP_API_KEY")
189
+ args.out_dir = f"{args.out_dir}_mp"
190
+
191
+ if not api_key:
192
+ print("❌ Missing Materials Project API key. Provide --api-key or set MP_API_KEY env var.")
193
+ return 2
194
+
195
+ # Where to write the generated YAML (and associated structure files)
196
+ out_yaml = resolve_output_path(args.out_yaml, workflow_name)
197
+ out_yaml_p = Path(out_yaml)
198
+ out_yaml_p.parent.mkdir(parents=True, exist_ok=True)
199
+
200
+ structure_dir = args.structure_dir or str(out_yaml_p.parent / "downloaded_structures")
201
+ Path(structure_dir).mkdir(parents=True, exist_ok=True)
202
+
203
+ res = generate_trainset_settings_yaml_from_mp_simple(
204
+ mp_id=args.mp_id,
205
+ out_yaml=out_yaml,
206
+ structure_dir=structure_dir,
207
+ bulk_mode=args.bulk_mode,
208
+ api_key=api_key,
209
+ verbose=bool(args.verbose),
210
+ )
211
+
212
+ yaml_path = res["yaml"]
213
+ print(f"\n[Done] Generated settings from Materials Project:")
214
+ print(f" YAML: {res['yaml']}")
215
+ print(f" CIF: {res['cif']}")
216
+ print(f" XYZ: {res['xyz']}\n")
217
+
218
+ else:
219
+ args.out_dir = f"{args.out_dir}_yaml"
220
+
221
+ # -------------------------
222
+ # Run YAML -> trainset + tables (+ optional geo)
223
+ # -------------------------
224
+
225
+ out_dir = resolve_output_path(args.out_dir, workflow_name)
226
+ Path(out_dir).mkdir(parents=True, exist_ok=True)
227
+
228
+ generate_trainset_from_yaml(yaml_path=yaml_path, out_dir=out_dir)
229
+
230
+ print(f"[Done] Elastic-energy trainset + tables written to: {out_dir}")
231
+ print(f"[Info] Geo outputs (if enabled in YAML) are written under the same folder in two separate sub-folders:\n"
232
+ f" geo_strained and xyz_strained which contain .bgf and .xyz files, respectively.")
233
+
234
+ # ------------------------------------------------------------------
235
+ # Concatenate all strained geo (.bgf) files into one
236
+ # ------------------------------------------------------------------
237
+ geo_dir = Path(out_dir) / "geo_strained"
238
+ all_geo_file = geo_dir / "all_trainset_geo.bgf"
239
+
240
+ if geo_dir.exists():
241
+ bgf_files = sorted(geo_dir.glob("*.bgf"))
242
+
243
+ if bgf_files:
244
+ with open(all_geo_file, "w") as fout:
245
+ for bgf in bgf_files:
246
+ fout.write(f"# ===== BEGIN {bgf.name} =====\n")
247
+ with open(bgf, "r") as fin:
248
+ fout.write(fin.read())
249
+ fout.write(f"\n# ===== END {bgf.name} =====\n\n")
250
+
251
+ print(
252
+ f"[Post] All strained geometry (.bgf) files were concatenated into: all_trainset_geo.bgf"
253
+ )
254
+ else:
255
+ print("[Post] geo_strained folder exists but contains no .bgf files.")
256
+ else:
257
+ print("[Post] No geo_strained folder found; skipping geometry concatenation.")
258
+
259
+ return 0
260
+
261
+ # ----------------------------------------------------------------------
262
+ # Register tasks with the CLI
263
+ # ----------------------------------------------------------------------
264
+
265
+ def register_tasks(subparsers: argparse._SubParsersAction) -> None:
266
+ # ---- get ---- (existing)
267
+ p_get = subparsers.add_parser(
268
+ "get",
269
+ help="Save trainset sections as CSV files. \n",
270
+ description=(
271
+ "Examples:\n"
272
+ " reaxkit trainset get --section all --export reaxkit_outputs/trainset\n"
273
+ ),
274
+ formatter_class=argparse.RawTextHelpFormatter,
275
+ )
276
+ p_get.add_argument("--file", default="trainset.in", help="Path to trainset/fort.99 file")
277
+ p_get.add_argument("--section", default="all",
278
+ help="Section to export: all, charge, heatfo, geometry, cell_parameters, energy")
279
+ p_get.add_argument("--export", help="Directory to save CSVs into (default: trainset_analysis/)")
280
+ p_get.set_defaults(_run=_get_task)
281
+
282
+ # ---- category ---- (existing)
283
+ p_cat = subparsers.add_parser(
284
+ "category",
285
+ help="List or export unique trainset categories (group comments) || ",
286
+ description=(
287
+ "Examples:\n"
288
+ " reaxkit trainset category --section all --export trainset_categories.csv\n"
289
+ " reaxkit trainset category --section all --sort\n"
290
+ " reaxkit trainset category --section energy --export energy_categories.csv\n"
291
+ ),
292
+ formatter_class=argparse.RawTextHelpFormatter,
293
+ )
294
+ p_cat.add_argument("--file", default="trainset.in", help="Path to trainset/fort.99 file")
295
+ p_cat.add_argument("--section", default="all",
296
+ help="Section to analyze: all, charge, heatfo, geometry, cell_parameters, energy",
297
+ )
298
+ p_cat.add_argument("--export", help="Optional CSV file to write categories into (e.g. trainset_categories.csv)")
299
+ p_cat.add_argument("--sort", action="store_true", help="Sort labels alphabetically (default: off)")
300
+ p_cat.set_defaults(_run=_category_task)
301
+
302
+ # ------------------------------------------------------------------
303
+ # gen-settings
304
+ # ------------------------------------------------------------------
305
+ p_gens = subparsers.add_parser(
306
+ "gen-settings",
307
+ help="Generate a sample trainset settings YAML (default values).",
308
+ description=(
309
+ "Examples:\n"
310
+ " reaxkit trainset gen-settings\n"
311
+ " reaxkit trainset gen-settings --out reaxkit_outputs/trainset/trainset_settings.yaml\n"
312
+ ),
313
+ formatter_class=argparse.RawTextHelpFormatter,
314
+ )
315
+ p_gens.add_argument(
316
+ "--out",
317
+ default="trainset_settings.yaml",
318
+ help="Output YAML filename/path (resolved under reaxkit_outputs/trainset/ if relative).",
319
+ )
320
+ p_gens.set_defaults(_run=_gen_settings_task)
321
+
322
+ # ------------------------------------------------------------------
323
+ # generate
324
+ # ------------------------------------------------------------------
325
+ p_gen = subparsers.add_parser(
326
+ "generate",
327
+ help="Generate elastic-energy trainset + tables (and optional geo) from YAML or Materials Project.",
328
+ description=(
329
+ "YAML mode:\n"
330
+ " reaxkit trainset generate --yaml trainset_settings.yaml\n"
331
+ "\n"
332
+ "Materials Project mode:\n"
333
+ " reaxkit trainset generate --mp-id mp-661 --api-key YOUR_KEY\n"
334
+ ),
335
+ formatter_class=argparse.RawTextHelpFormatter,
336
+ )
337
+
338
+ # Mode A
339
+ p_gen.add_argument("--yaml", default=None, help="Path to an existing trainset_settings.yaml file.")
340
+
341
+ # Mode B
342
+ p_gen.add_argument("--mp-id", default=None, help="Materials Project material id (e.g., mp-661).")
343
+ p_gen.add_argument("--api-key", default=None, help="Materials Project API key (or set MP_API_KEY env var).")
344
+ p_gen.add_argument("--bulk-mode", default="voigt", choices=["voigt", "reuss", "vrh"],
345
+ help="Which MP bulk modulus to use (default: vrh).")
346
+ p_gen.add_argument("--out-yaml", default="reaxkit_generated_inputs/trainset_mp/trainset_settings_mp.yaml",
347
+ help="Where to write the generated YAML in MP mode (resolved under outputs if relative).")
348
+ p_gen.add_argument("--structure-dir", default=None,
349
+ help="Directory to write MP-downloaded structure files (default: next to out-yaml).")
350
+ p_gen.add_argument("--verbose", action="store_true", help="Verbose MP fetching/logging.")
351
+
352
+ # Common output
353
+ p_gen.add_argument("--out-dir", default="reaxkit_generated_inputs/trainset",
354
+ help="Directory to write elastic-energy trainset + tables (resolved under outputs if relative).")
355
+
356
+ p_gen.set_defaults(_run=_generate_task)