pymdkit 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,224 @@
1
+ """
2
+ Calculate ionic diffusivity and conductivity for all groups from GPUMD MD data.
3
+
4
+ (Originally compute_msd_all_groups.py. Refactored to expose add_arguments/run
5
+ for the pymdkit dispatcher while remaining runnable standalone.)
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import argparse
11
+ from pathlib import Path
12
+ from typing import Dict, Tuple
13
+
14
+ import numpy as np
15
+ import scipy.constants as consts
16
+
17
+ COMMAND = "msd"
18
+ HELP = "Diffusivity & conductivity for all groups from GPUMD MSD data."
19
+
20
+
21
+ class MDAnalyzer:
22
+ """Analyze GPUMD output for ionic diffusivity and conductivity."""
23
+
24
+ def __init__(self, diffuse_ion: str = "Li", ion_charge: int = 1):
25
+ self.diffuse_ion = diffuse_ion
26
+ self.ion_charge = ion_charge
27
+ self.temperature = None
28
+ self.volume = None
29
+ self.groups_info: Dict[int, Dict] = {}
30
+ self.runs_info = self._parse_run_file()
31
+
32
+ @staticmethod
33
+ def _parse_run_file(filepath: str = "run.in") -> list:
34
+ path = Path(filepath)
35
+ if not path.exists():
36
+ return []
37
+
38
+ runs = []
39
+ dump_interval, dump_msd, time_step = 10, None, 1.0
40
+ for line in path.read_text(encoding="utf-8").splitlines():
41
+ parts = line.split()
42
+ if not parts:
43
+ continue
44
+ key = parts[0]
45
+ if key == "dump_thermo":
46
+ dump_interval = int(parts[1])
47
+ elif key == "dump_msd":
48
+ dump_msd = int(parts[1])
49
+ elif key == "time_step":
50
+ time_step = float(parts[1])
51
+ elif key == "run":
52
+ runs.append({
53
+ "steps": int(parts[1]),
54
+ "dump_interval": dump_interval,
55
+ "dump_msd": dump_msd or dump_interval,
56
+ "time_step": time_step,
57
+ })
58
+ return runs
59
+
60
+ def _last_run_slice(self, n_points: int, interval: int) -> slice:
61
+ """Index slice covering only the last run's data."""
62
+ if not self.runs_info:
63
+ return slice(0, n_points)
64
+ points_in_last = self.runs_info[-1]["steps"] // interval
65
+ return slice(max(0, n_points - points_in_last), n_points)
66
+
67
+ @staticmethod
68
+ def _calculate_volume(data: np.ndarray) -> np.ndarray:
69
+ n_cols = data.shape[1]
70
+ if n_cols == 12: # orthogonal box: lx, ly, lz in cols 9-11
71
+ return data[:, 9] * data[:, 10] * data[:, 11]
72
+ if n_cols == 18: # triclinic box: 3x3 matrix in cols 9-17
73
+ box = data[:, 9:18].reshape(-1, 3, 3)
74
+ return np.abs(np.einsum("...i,...i->...", box[:, 0], np.cross(box[:, 1], box[:, 2])))
75
+ raise ValueError(f"Unsupported thermo.out format with {n_cols} columns")
76
+
77
+ def load_thermo_data(self, filepath: str = "thermo.out") -> Tuple[float, float]:
78
+ data = np.atleast_2d(np.loadtxt(Path(filepath)))
79
+ dump_interval = self.runs_info[-1]["dump_interval"] if self.runs_info else 1
80
+ sl = self._last_run_slice(len(data), dump_interval)
81
+
82
+ self.temperature = float(np.mean(data[sl, 0]))
83
+ self.volume = float(np.mean(self._calculate_volume(data)[sl]))
84
+ return self.temperature, self.volume
85
+
86
+ def parse_model_xyz(self, filepath: str = "model.xyz") -> Dict[int, Dict]:
87
+ """Build {group_id: {'element', 'count'}} from the extended XYZ."""
88
+ lines = Path(filepath).read_text(encoding="utf-8").splitlines()[2:]
89
+
90
+ groups: Dict[int, Dict] = {}
91
+ for line in lines:
92
+ parts = line.split()
93
+ if len(parts) < 5:
94
+ continue
95
+ element, gid = parts[0], int(parts[4])
96
+ if gid not in groups:
97
+ groups[gid] = {"element": element, "count": 0}
98
+ elif groups[gid]["element"] != element:
99
+ raise ValueError(
100
+ f"Inconsistent element in group {gid}: {element} vs {groups[gid]['element']}"
101
+ )
102
+ groups[gid]["count"] += 1
103
+
104
+ self.groups_info = groups
105
+ return groups
106
+
107
+ def load_msd_data_all_groups(self, filepath: str = "msd.out") -> Dict[int, Tuple[np.ndarray, np.ndarray]]:
108
+ """Return {group_id: (times, [MSD_x, MSD_y, MSD_z, MSD_total])}."""
109
+ data = np.loadtxt(Path(filepath))
110
+ dump_msd = self.runs_info[-1]["dump_msd"] if self.runs_info else 1
111
+ data = data[self._last_run_slice(len(data), dump_msd)]
112
+
113
+ times = data[:, 0] - data[0, 0]
114
+ # Each group occupies 6 columns: 3 MSD components + 3 SDC components
115
+ num_groups = (data.shape[1] - 1) // 6
116
+ group_ids = sorted(self.groups_info.keys())
117
+ if len(group_ids) != num_groups:
118
+ raise ValueError("Mismatch between number of groups in model.xyz and msd.out")
119
+
120
+ msd_dict = {}
121
+ for i, gid in enumerate(group_ids):
122
+ xyz = data[:, 1 + i * 6 : 4 + i * 6]
123
+ total = xyz.sum(axis=1, keepdims=True)
124
+ msd_dict[gid] = (times, np.hstack([xyz, total]))
125
+ return msd_dict
126
+
127
+ @staticmethod
128
+ def calculate_diffusivity(times: np.ndarray, msd_data: np.ndarray,
129
+ fit_start: float = 0.4, fit_end: float = 1.0) -> np.ndarray:
130
+ """Linear fit of MSD on [fit_start, fit_end] window. Returns D in cm^2/s."""
131
+ n = len(times)
132
+ sl = slice(int(n * fit_start), int(n * fit_end))
133
+ slopes = np.polyfit(times[sl], msd_data[sl], 1)[0]
134
+ # x, y, z use 1D Einstein relation (slope/2); total uses 3D (slope/6)
135
+ divisors = np.array([2.0, 2.0, 2.0, 6.0])
136
+ return slopes / divisors * 1e-4 # A^2/ps -> cm^2/s
137
+
138
+ def calculate_conductivity(self, diffusivities: np.ndarray, num_ions: int, charge: int) -> np.ndarray:
139
+ """Nernst-Einstein conductivity in mS/cm."""
140
+ vol_cm3 = self.volume * 1e-24
141
+ # sigma = n * q^2 * D / (V * k_B * T); factor 1000 converts S/cm -> mS/cm
142
+ factor = num_ions * charge**2 * consts.e**2 / (vol_cm3 * consts.k * self.temperature)
143
+ return diffusivities * factor * 1000
144
+
145
+ def _format_summary(self, element: str, group_id: int, num_ions: int,
146
+ D: np.ndarray, sigma: np.ndarray = None) -> str:
147
+ labels = ["x", "y", "z", "total"]
148
+ lines = [
149
+ "=" * 70,
150
+ "MD ANALYSIS RESULTS",
151
+ "=" * 70,
152
+ f"Element: {element}",
153
+ f"Group ID: {group_id}",
154
+ f"Number of atoms: {num_ions}",
155
+ f"Temperature: {self.temperature:.2f} K",
156
+ f"Volume: {self.volume:.2f} A^3",
157
+ "-" * 70,
158
+ "DIFFUSIVITY (cm^2/s):",
159
+ ]
160
+ lines += [f" D_{lab:<6} = {d:.2e}" for lab, d in zip(labels, D)]
161
+ if sigma is not None:
162
+ lines += [
163
+ "-" * 70,
164
+ f"IONIC CONDUCTIVITY (mS/cm) - {element}^{self.ion_charge:+d}:",
165
+ ]
166
+ lines += [f" sigma_{lab:<6} = {s:.2f}" for lab, s in zip(labels, sigma)]
167
+ lines.append("=" * 70)
168
+ return "\n".join(lines)
169
+
170
+ def save_results(self, element: str, group_id: int, num_ions: int,
171
+ D: np.ndarray, sigma: np.ndarray = None, output_dir: str = "."):
172
+ path = Path(output_dir) / f"group_{group_id}_{element}_results.txt"
173
+ path.write_text(self._format_summary(element, group_id, num_ions, D, sigma), encoding="utf-8")
174
+
175
+ @staticmethod
176
+ def save_msd_data(times: np.ndarray, msd_data: np.ndarray, element: str,
177
+ group_id: int, output_dir: str = "."):
178
+ path = Path(output_dir) / f"group_{group_id}_{element}_msd.txt"
179
+ header = "time(ps) MSD_x(A^2) MSD_y(A^2) MSD_z(A^2) MSD_total(A^2)"
180
+ np.savetxt(path, np.column_stack([times, msd_data]),
181
+ fmt="%.6f", header=header, comments="", encoding="utf-8")
182
+
183
+
184
+ def add_arguments(parser: argparse.ArgumentParser) -> None:
185
+ parser.formatter_class = argparse.ArgumentDefaultsHelpFormatter
186
+ parser.add_argument("--msd_file", default="msd.out")
187
+ parser.add_argument("--thermo_file", default="thermo.out")
188
+ parser.add_argument("--xyz_file", default="model.xyz")
189
+ parser.add_argument("--fit_start", type=float, default=0.4, help="Start fraction for MSD fit")
190
+ parser.add_argument("--fit_end", type=float, default=1.0, help="End fraction for MSD fit")
191
+ parser.add_argument("--diffuse_ion", default="Li")
192
+ parser.add_argument("--ion_charge", type=int, default=1)
193
+ parser.add_argument("--output_dir", default=".")
194
+
195
+
196
+ def run(args) -> int:
197
+ output_dir = Path(args.output_dir)
198
+ output_dir.mkdir(parents=True, exist_ok=True)
199
+
200
+ analyzer = MDAnalyzer(diffuse_ion=args.diffuse_ion, ion_charge=args.ion_charge)
201
+ analyzer.load_thermo_data(args.thermo_file)
202
+ analyzer.parse_model_xyz(args.xyz_file)
203
+ msd_dict = analyzer.load_msd_data_all_groups(args.msd_file)
204
+
205
+ for gid in sorted(analyzer.groups_info.keys()):
206
+ if gid not in msd_dict:
207
+ continue
208
+ info = analyzer.groups_info[gid]
209
+ element, num_ions = info["element"], info["count"]
210
+ times, msd_data = msd_dict[gid]
211
+
212
+ analyzer.save_msd_data(times, msd_data, element, gid, args.output_dir)
213
+ D = analyzer.calculate_diffusivity(times, msd_data, args.fit_start, args.fit_end)
214
+ sigma = (analyzer.calculate_conductivity(D, num_ions, analyzer.ion_charge)
215
+ if element == analyzer.diffuse_ion else None)
216
+ analyzer.save_results(element, gid, num_ions, D, sigma, args.output_dir)
217
+
218
+ return 0
219
+
220
+
221
+ if __name__ == "__main__":
222
+ _parser = argparse.ArgumentParser(description=__doc__)
223
+ add_arguments(_parser)
224
+ raise SystemExit(run(_parser.parse_args()))
@@ -0,0 +1,149 @@
1
+ """
2
+ compute_rmsd.py
3
+
4
+ Compute RMSD between crystal structures with pymatgen's StructureMatcher.
5
+
6
+ Usage:
7
+ pymdkit rmsd file1 file2 # RMSD between two structure files
8
+ pymdkit rmsd folder/ # RMSD for every structure pair in a folder
9
+
10
+ Behaviour:
11
+ * Two file arguments: compute the RMSD of file1 vs file2.
12
+ * One folder argument: read every structure file in the folder and compute
13
+ the RMSD of each unique pair.
14
+
15
+ Output:
16
+ rmsd.txt - the RMSD result(s).
17
+ """
18
+
19
+ from __future__ import annotations
20
+
21
+ import argparse
22
+ import re
23
+ import warnings
24
+ from itertools import combinations
25
+ from pathlib import Path
26
+
27
+ from pymatgen.analysis.structure_matcher import StructureMatcher
28
+ from pymatgen.core import Structure
29
+
30
+ warnings.filterwarnings("ignore",
31
+ message=r".*pymatgen\.core\.structure_matcher.*",
32
+ category=DeprecationWarning)
33
+
34
+ COMMAND = "rmsd"
35
+ HELP = "Compute RMSD between two structure files, or all pairs in a folder."
36
+
37
+ REPORT_FILE = "rmsd.txt"
38
+
39
+ SM_KWARGS = dict(ltol=0.2, stol=0.3, angle_tol=5.0,
40
+ primitive_cell=False, scale=True, allow_subset=False)
41
+
42
+ # Recognised structure-file extensions / names for folder mode.
43
+ STRUCT_SUFFIXES = {".cif", ".vasp", ".poscar", ".contcar", ".xyz", ".json"}
44
+ STRUCT_NAMES = {"POSCAR", "CONTCAR"}
45
+
46
+
47
+ def folder_index(name: str) -> int:
48
+ m = re.match(r"^(\d+)", name)
49
+ return int(m.group(1)) if m else 10 ** 9
50
+
51
+
52
+ def is_structure_file(p: Path) -> bool:
53
+ return p.is_file() and (p.suffix.lower() in STRUCT_SUFFIXES
54
+ or p.name in STRUCT_NAMES)
55
+
56
+
57
+ def load_structure(path: Path):
58
+ try:
59
+ return Structure.from_file(str(path))
60
+ except Exception as exc:
61
+ print(f" [warn] could not read {path}: {exc}")
62
+ return None
63
+
64
+
65
+ def compute_rmsd(s_i, s_j, matcher):
66
+ if s_i is None or s_j is None:
67
+ return None
68
+ try:
69
+ res = matcher.get_rms_dist(s_i, s_j)
70
+ except Exception:
71
+ return None
72
+ return None if res is None else res[0]
73
+
74
+
75
+ def add_arguments(parser):
76
+ parser.add_argument("paths", nargs="+",
77
+ help="Two structure files, or a single folder.")
78
+
79
+
80
+ def _write_report(root, header_lines, rows):
81
+ """rows: list of (name_i, name_j, rmsd_or_None)."""
82
+ lines = list(header_lines)
83
+ lines.append("-" * 80)
84
+ lines.append(f"{'Structure A':<34} {'Structure B':<34} {'RMSD':>10}")
85
+ lines.append("-" * 80)
86
+ for a, b, r in rows:
87
+ r_str = "N/A" if r is None else f"{r:.6f}"
88
+ lines.append(f"{a:<34} {b:<34} {r_str:>10}")
89
+ (root / REPORT_FILE).write_text("\n".join(lines) + "\n")
90
+
91
+
92
+ def run(args):
93
+ matcher = StructureMatcher(**SM_KWARGS)
94
+ out_root = Path(".").resolve()
95
+
96
+ paths = [Path(p) for p in args.paths]
97
+
98
+ # --- folder mode -------------------------------------------------------
99
+ if len(paths) == 1 and paths[0].is_dir():
100
+ folder = paths[0]
101
+ files = sorted(
102
+ (p for p in folder.iterdir() if is_structure_file(p)),
103
+ key=lambda p: (folder_index(p.name), p.name),
104
+ )
105
+ if len(files) < 2:
106
+ print(f"Need at least 2 structure files in {folder}/ "
107
+ f"(found {len(files)}). Nothing to do.")
108
+ return 0
109
+
110
+ structs = {p.name: load_structure(p) for p in files}
111
+ rows = []
112
+ for a, b in combinations([p.name for p in files], 2):
113
+ rows.append((a, b, compute_rmsd(structs[a], structs[b], matcher)))
114
+
115
+ header = [
116
+ "=" * 80,
117
+ "Pairwise RMSD Report",
118
+ f"Folder : {folder}",
119
+ f"Files : {len(files)}",
120
+ f"Pairs : {len(rows)}",
121
+ "=" * 80, "",
122
+ ]
123
+ _write_report(out_root, header, rows)
124
+ print(f"Wrote {REPORT_FILE} ({len(rows)} pair(s) from {folder}/).")
125
+ return 0
126
+
127
+ # --- two-file mode -----------------------------------------------------
128
+ if len(paths) == 2 and paths[0].is_file() and paths[1].is_file():
129
+ f1, f2 = paths
130
+ rmsd = compute_rmsd(load_structure(f1), load_structure(f2), matcher)
131
+ header = [
132
+ "=" * 80,
133
+ "RMSD Report",
134
+ "=" * 80, "",
135
+ ]
136
+ _write_report(out_root, header, [(f1.name, f2.name, rmsd)])
137
+ r_str = "N/A (no match)" if rmsd is None else f"{rmsd:.6f}"
138
+ print(f"RMSD({f1.name}, {f2.name}) = {r_str}")
139
+ print(f"Wrote {REPORT_FILE}.")
140
+ return 0
141
+
142
+ print("Usage: pymdkit rmsd file1 file2 OR pymdkit rmsd folder/")
143
+ return 1
144
+
145
+
146
+ if __name__ == "__main__":
147
+ _p = argparse.ArgumentParser(description=__doc__)
148
+ add_arguments(_p)
149
+ raise SystemExit(run(_p.parse_args()))
@@ -0,0 +1,106 @@
1
+ """
2
+ Gather CONTCAR files from VASP job folders into one folder.
3
+
4
+ Scans the sub-folders of the current directory (like `ehull`) and copies each
5
+ folder's CONTCAR into an output folder, renamed ``<folder-name>.vasp``. For
6
+ example the CONTCAR in ``1-Li3YCl6-164/`` becomes ``1-Li3YCl6-164.vasp``.
7
+
8
+ gather-contcar -of vasp-opted
9
+ gather-contcar -of vasp-opted -ehull 0.028 # only E_hull < 0.028 eV/atom
10
+
11
+ With -ehull, the ehull.txt report in the current directory (produced by
12
+ `pymdkit ehull`) is read and only structures whose E_hull is below the given
13
+ threshold (eV/atom) are gathered.
14
+ """
15
+
16
+ import argparse
17
+ import shutil
18
+ from pathlib import Path
19
+
20
+ try:
21
+ from . import _fileio
22
+ except ImportError: # running as a standalone script
23
+ import _fileio
24
+
25
+ COMMAND = "gather-contcar"
26
+ HELP = "Gather CONTCAR files from VASP job folders into one folder (renamed .vasp)."
27
+
28
+
29
+ def parse_ehull_file(path):
30
+ """Return {folder_name: e_hull} from an ehull.txt report.
31
+
32
+ Data rows are ``<folder> <formula> <system> <FinalE> <E_form> <E_hull>``;
33
+ the folder is the first token and E_hull the last. Header / separator lines
34
+ (whose last token is not a number) are ignored.
35
+ """
36
+ result = {}
37
+ for line in Path(path).read_text().splitlines():
38
+ parts = line.split()
39
+ if len(parts) < 2:
40
+ continue
41
+ try:
42
+ result[parts[0]] = float(parts[-1])
43
+ except ValueError:
44
+ continue
45
+ return result
46
+
47
+
48
+ def add_arguments(parser):
49
+ parser.add_argument("-of", "--output-folder", dest="output_folder",
50
+ required=True, metavar="DIR",
51
+ help="Folder to create and fill with <name>.vasp files.")
52
+ parser.add_argument("-ehull", "--ehull", dest="ehull", type=float,
53
+ default=None, metavar="EV",
54
+ help="Only gather folders whose E_hull (from the report) "
55
+ "is below this value in eV/atom.")
56
+ parser.add_argument("--entry-path", default=".",
57
+ help="Directory to scan for VASP job folders (default: .).")
58
+ parser.add_argument("--ehull-file", default="ehull.txt",
59
+ help="E_hull report read with -ehull (default: ehull.txt).")
60
+
61
+
62
+ def run(args):
63
+ root = Path(args.entry_path)
64
+ out_dir = Path(args.output_folder)
65
+
66
+ ehull_map = None
67
+ if args.ehull is not None:
68
+ report = Path(args.ehull_file)
69
+ if not report.is_file():
70
+ raise SystemExit(f"Error: {report} not found (needed for -ehull).")
71
+ ehull_map = parse_ehull_file(report)
72
+
73
+ out_dir.mkdir(parents=True, exist_ok=True)
74
+ gathered = 0
75
+ no_ehull = []
76
+ for sub in _fileio.subfolders(root):
77
+ contcar = sub / "CONTCAR"
78
+ if not contcar.is_file():
79
+ continue
80
+ name = sub.name
81
+
82
+ if ehull_map is not None:
83
+ val = ehull_map.get(name)
84
+ if val is None:
85
+ no_ehull.append(name)
86
+ continue
87
+ if not (val < args.ehull):
88
+ continue
89
+
90
+ dest = out_dir / f"{name}.vasp"
91
+ shutil.copy2(contcar, dest)
92
+ tag = f" (E_hull={ehull_map[name]:.3f})" if ehull_map is not None else ""
93
+ print(f"{contcar} -> {dest}{tag}")
94
+ gathered += 1
95
+
96
+ if no_ehull:
97
+ print(f"Note: {len(no_ehull)} folder(s) had a CONTCAR but no E_hull entry "
98
+ f"in {args.ehull_file}; skipped.")
99
+ print(f"Done: gathered {gathered} CONTCAR file(s) into {out_dir}/")
100
+ return 0
101
+
102
+
103
+ if __name__ == "__main__":
104
+ _p = argparse.ArgumentParser(description=__doc__)
105
+ add_arguments(_p)
106
+ raise SystemExit(run(_p.parse_args()))
@@ -0,0 +1,141 @@
1
+ """
2
+ Build scf-converged.xyz from per-job VASP single-point SCF runs.
3
+
4
+ Scans every sub-folder of the entry path that contains an OUTCAR (regardless of
5
+ how the folders are named). For each such folder the geometry / energy / forces
6
+ / stress are read from OUTCAR, and Config_type is copied from a sibling
7
+ ``<folder>/<folder>.xyz`` (or the single .xyz in the folder) if present. Folders
8
+ that did not reach EDIFF are skipped. Output is ordered by the first integer in
9
+ each folder name (so frame_2 precedes frame_10), then alphabetically.
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ import argparse
15
+ import os
16
+ import re
17
+ from pathlib import Path
18
+
19
+ COMMAND = "outcar2xyz"
20
+ HELP = "Collect SCF-converged VASP job folders into one extxyz file."
21
+
22
+
23
+ def folder_sort_key(name):
24
+ """Sort by the first integer in the folder name, else push to the end."""
25
+ m = re.search(r"\d+", name)
26
+ return (int(m.group()) if m else float("inf"), name)
27
+
28
+
29
+ def scf_converged(outcar_path):
30
+ try:
31
+ with open(outcar_path) as f:
32
+ return "aborting loop because EDIFF is reached" in f.read()
33
+ except OSError:
34
+ return False
35
+
36
+
37
+ def read_config_type(folder):
38
+ """Extract Config_type=... from a sibling extxyz in *folder*, if any.
39
+
40
+ Looks first for ``<folder>/<foldername>.xyz`` (the original convention),
41
+ then for any single .xyz file in the folder. Handles quoted and unquoted
42
+ forms so values containing | ( ) , = are preserved intact.
43
+ """
44
+ folder = Path(folder)
45
+ candidate = folder / f"{folder.name}.xyz"
46
+ if not candidate.exists():
47
+ xyzs = sorted(folder.glob("*.xyz"))
48
+ candidate = xyzs[0] if len(xyzs) == 1 else None
49
+ if candidate is None or not candidate.exists():
50
+ return None
51
+ try:
52
+ with open(candidate) as f:
53
+ f.readline()
54
+ comment = f.readline()
55
+ except OSError:
56
+ return None
57
+ m = re.search(r'Config_type=("([^"]*)"|(\S+))', comment)
58
+ if not m:
59
+ return None
60
+ return m.group(2) if m.group(2) is not None else m.group(3)
61
+
62
+
63
+ def find_job_folders(entry_path):
64
+ """Sub-folders of *entry_path* that contain an OUTCAR, sorted by index."""
65
+ root = Path(entry_path)
66
+ folders = [p.name for p in root.iterdir()
67
+ if p.is_dir() and (p / "OUTCAR").exists()]
68
+ return sorted(folders, key=folder_sort_key)
69
+
70
+
71
+ def add_arguments(parser: argparse.ArgumentParser) -> None:
72
+ parser.formatter_class = argparse.ArgumentDefaultsHelpFormatter
73
+ parser.add_argument("--entry-path", default=".",
74
+ help="Directory to scan for VASP job sub-folders.")
75
+ parser.add_argument("--output", default="scf-converged.xyz",
76
+ help="Output extxyz trajectory file.")
77
+ parser.add_argument("--summary", default="summary.txt",
78
+ help="Summary text file to write.")
79
+
80
+
81
+ def run(args) -> int:
82
+ # ase is imported lazily so `pymdkit --help` works without ase installed.
83
+ from ase.io import read, write
84
+
85
+ root = Path(args.entry_path)
86
+ job_dirs = find_job_folders(root)
87
+ print(f"Found {len(job_dirs)} VASP job folder(s) under {root.resolve()}")
88
+
89
+ structures = []
90
+ for name in job_dirs:
91
+ d = root / name
92
+ outcar = d / "OUTCAR"
93
+
94
+ if not scf_converged(outcar):
95
+ print(f"{name}: SCF not converged -> skipping")
96
+ continue
97
+
98
+ try:
99
+ atoms = read(str(outcar), format="vasp-out")
100
+ except Exception as e: # noqa: BLE001 - mirror original behavior
101
+ print(f"{name}: error reading OUTCAR ({e}) -> skipping")
102
+ continue
103
+
104
+ ct = read_config_type(d)
105
+ if ct is not None:
106
+ atoms.info["Config_type"] = ct
107
+ else:
108
+ print(f"{name}: warning, Config_type not found")
109
+
110
+ structures.append(atoms)
111
+ print(f"{name}: OK")
112
+
113
+ output = os.path.join(str(root), args.output) if str(root) != "." else args.output
114
+ if structures:
115
+ write(output, structures, format="extxyz")
116
+ print(f"\nWrote {len(structures)} structures to {output}")
117
+ else:
118
+ print("\nNo converged structures found")
119
+
120
+ total = len(job_dirs)
121
+ kept = len(structures)
122
+ ratio = (kept / total * 100) if total else 0.0
123
+ summary = (
124
+ f"Total job folders processed: {total}\n"
125
+ f"Structures with SCF converged: {kept}\n"
126
+ f"Selection ratio: {ratio:.2f}%"
127
+ )
128
+ print("\n" + summary)
129
+
130
+ summary_path = (os.path.join(str(root), args.summary)
131
+ if str(root) != "." else args.summary)
132
+ with open(summary_path, "w") as f:
133
+ f.write(summary)
134
+
135
+ return 0
136
+
137
+
138
+ if __name__ == "__main__":
139
+ _parser = argparse.ArgumentParser(description=__doc__)
140
+ add_arguments(_parser)
141
+ raise SystemExit(run(_parser.parse_args()))