pymdkit 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pymdkit/__init__.py +3 -0
- pymdkit/commands/__init__.py +1 -0
- pymdkit/commands/_fileio.py +96 -0
- pymdkit/commands/_vaspset.py +169 -0
- pymdkit/commands/add_groups.py +77 -0
- pymdkit/commands/compute_ehull.py +230 -0
- pymdkit/commands/compute_msd_all_groups.py +224 -0
- pymdkit/commands/compute_rmsd.py +149 -0
- pymdkit/commands/gather_contcar.py +106 -0
- pymdkit/commands/outcar2xyz.py +141 -0
- pymdkit/commands/select_candidate.py +119 -0
- pymdkit/commands/stru2xyz.py +100 -0
- pymdkit/commands/supercell.py +164 -0
- pymdkit/commands/symmetrize.py +271 -0
- pymdkit/commands/vasp_relax.py +62 -0
- pymdkit/commands/vasp_static.py +59 -0
- pymdkit/pymdkit_main.py +115 -0
- pymdkit-1.0.0.dist-info/METADATA +201 -0
- pymdkit-1.0.0.dist-info/RECORD +23 -0
- pymdkit-1.0.0.dist-info/WHEEL +5 -0
- pymdkit-1.0.0.dist-info/entry_points.txt +2 -0
- pymdkit-1.0.0.dist-info/licenses/LICENSE +674 -0
- pymdkit-1.0.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,224 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Calculate ionic diffusivity and conductivity for all groups from GPUMD MD data.
|
|
3
|
+
|
|
4
|
+
(Originally compute_msd_all_groups.py. Refactored to expose add_arguments/run
|
|
5
|
+
for the pymdkit dispatcher while remaining runnable standalone.)
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import argparse
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from typing import Dict, Tuple
|
|
13
|
+
|
|
14
|
+
import numpy as np
|
|
15
|
+
import scipy.constants as consts
|
|
16
|
+
|
|
17
|
+
COMMAND = "msd"
|
|
18
|
+
HELP = "Diffusivity & conductivity for all groups from GPUMD MSD data."
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class MDAnalyzer:
|
|
22
|
+
"""Analyze GPUMD output for ionic diffusivity and conductivity."""
|
|
23
|
+
|
|
24
|
+
def __init__(self, diffuse_ion: str = "Li", ion_charge: int = 1):
|
|
25
|
+
self.diffuse_ion = diffuse_ion
|
|
26
|
+
self.ion_charge = ion_charge
|
|
27
|
+
self.temperature = None
|
|
28
|
+
self.volume = None
|
|
29
|
+
self.groups_info: Dict[int, Dict] = {}
|
|
30
|
+
self.runs_info = self._parse_run_file()
|
|
31
|
+
|
|
32
|
+
@staticmethod
|
|
33
|
+
def _parse_run_file(filepath: str = "run.in") -> list:
|
|
34
|
+
path = Path(filepath)
|
|
35
|
+
if not path.exists():
|
|
36
|
+
return []
|
|
37
|
+
|
|
38
|
+
runs = []
|
|
39
|
+
dump_interval, dump_msd, time_step = 10, None, 1.0
|
|
40
|
+
for line in path.read_text(encoding="utf-8").splitlines():
|
|
41
|
+
parts = line.split()
|
|
42
|
+
if not parts:
|
|
43
|
+
continue
|
|
44
|
+
key = parts[0]
|
|
45
|
+
if key == "dump_thermo":
|
|
46
|
+
dump_interval = int(parts[1])
|
|
47
|
+
elif key == "dump_msd":
|
|
48
|
+
dump_msd = int(parts[1])
|
|
49
|
+
elif key == "time_step":
|
|
50
|
+
time_step = float(parts[1])
|
|
51
|
+
elif key == "run":
|
|
52
|
+
runs.append({
|
|
53
|
+
"steps": int(parts[1]),
|
|
54
|
+
"dump_interval": dump_interval,
|
|
55
|
+
"dump_msd": dump_msd or dump_interval,
|
|
56
|
+
"time_step": time_step,
|
|
57
|
+
})
|
|
58
|
+
return runs
|
|
59
|
+
|
|
60
|
+
def _last_run_slice(self, n_points: int, interval: int) -> slice:
|
|
61
|
+
"""Index slice covering only the last run's data."""
|
|
62
|
+
if not self.runs_info:
|
|
63
|
+
return slice(0, n_points)
|
|
64
|
+
points_in_last = self.runs_info[-1]["steps"] // interval
|
|
65
|
+
return slice(max(0, n_points - points_in_last), n_points)
|
|
66
|
+
|
|
67
|
+
@staticmethod
|
|
68
|
+
def _calculate_volume(data: np.ndarray) -> np.ndarray:
|
|
69
|
+
n_cols = data.shape[1]
|
|
70
|
+
if n_cols == 12: # orthogonal box: lx, ly, lz in cols 9-11
|
|
71
|
+
return data[:, 9] * data[:, 10] * data[:, 11]
|
|
72
|
+
if n_cols == 18: # triclinic box: 3x3 matrix in cols 9-17
|
|
73
|
+
box = data[:, 9:18].reshape(-1, 3, 3)
|
|
74
|
+
return np.abs(np.einsum("...i,...i->...", box[:, 0], np.cross(box[:, 1], box[:, 2])))
|
|
75
|
+
raise ValueError(f"Unsupported thermo.out format with {n_cols} columns")
|
|
76
|
+
|
|
77
|
+
def load_thermo_data(self, filepath: str = "thermo.out") -> Tuple[float, float]:
|
|
78
|
+
data = np.atleast_2d(np.loadtxt(Path(filepath)))
|
|
79
|
+
dump_interval = self.runs_info[-1]["dump_interval"] if self.runs_info else 1
|
|
80
|
+
sl = self._last_run_slice(len(data), dump_interval)
|
|
81
|
+
|
|
82
|
+
self.temperature = float(np.mean(data[sl, 0]))
|
|
83
|
+
self.volume = float(np.mean(self._calculate_volume(data)[sl]))
|
|
84
|
+
return self.temperature, self.volume
|
|
85
|
+
|
|
86
|
+
def parse_model_xyz(self, filepath: str = "model.xyz") -> Dict[int, Dict]:
|
|
87
|
+
"""Build {group_id: {'element', 'count'}} from the extended XYZ."""
|
|
88
|
+
lines = Path(filepath).read_text(encoding="utf-8").splitlines()[2:]
|
|
89
|
+
|
|
90
|
+
groups: Dict[int, Dict] = {}
|
|
91
|
+
for line in lines:
|
|
92
|
+
parts = line.split()
|
|
93
|
+
if len(parts) < 5:
|
|
94
|
+
continue
|
|
95
|
+
element, gid = parts[0], int(parts[4])
|
|
96
|
+
if gid not in groups:
|
|
97
|
+
groups[gid] = {"element": element, "count": 0}
|
|
98
|
+
elif groups[gid]["element"] != element:
|
|
99
|
+
raise ValueError(
|
|
100
|
+
f"Inconsistent element in group {gid}: {element} vs {groups[gid]['element']}"
|
|
101
|
+
)
|
|
102
|
+
groups[gid]["count"] += 1
|
|
103
|
+
|
|
104
|
+
self.groups_info = groups
|
|
105
|
+
return groups
|
|
106
|
+
|
|
107
|
+
def load_msd_data_all_groups(self, filepath: str = "msd.out") -> Dict[int, Tuple[np.ndarray, np.ndarray]]:
|
|
108
|
+
"""Return {group_id: (times, [MSD_x, MSD_y, MSD_z, MSD_total])}."""
|
|
109
|
+
data = np.loadtxt(Path(filepath))
|
|
110
|
+
dump_msd = self.runs_info[-1]["dump_msd"] if self.runs_info else 1
|
|
111
|
+
data = data[self._last_run_slice(len(data), dump_msd)]
|
|
112
|
+
|
|
113
|
+
times = data[:, 0] - data[0, 0]
|
|
114
|
+
# Each group occupies 6 columns: 3 MSD components + 3 SDC components
|
|
115
|
+
num_groups = (data.shape[1] - 1) // 6
|
|
116
|
+
group_ids = sorted(self.groups_info.keys())
|
|
117
|
+
if len(group_ids) != num_groups:
|
|
118
|
+
raise ValueError("Mismatch between number of groups in model.xyz and msd.out")
|
|
119
|
+
|
|
120
|
+
msd_dict = {}
|
|
121
|
+
for i, gid in enumerate(group_ids):
|
|
122
|
+
xyz = data[:, 1 + i * 6 : 4 + i * 6]
|
|
123
|
+
total = xyz.sum(axis=1, keepdims=True)
|
|
124
|
+
msd_dict[gid] = (times, np.hstack([xyz, total]))
|
|
125
|
+
return msd_dict
|
|
126
|
+
|
|
127
|
+
@staticmethod
|
|
128
|
+
def calculate_diffusivity(times: np.ndarray, msd_data: np.ndarray,
|
|
129
|
+
fit_start: float = 0.4, fit_end: float = 1.0) -> np.ndarray:
|
|
130
|
+
"""Linear fit of MSD on [fit_start, fit_end] window. Returns D in cm^2/s."""
|
|
131
|
+
n = len(times)
|
|
132
|
+
sl = slice(int(n * fit_start), int(n * fit_end))
|
|
133
|
+
slopes = np.polyfit(times[sl], msd_data[sl], 1)[0]
|
|
134
|
+
# x, y, z use 1D Einstein relation (slope/2); total uses 3D (slope/6)
|
|
135
|
+
divisors = np.array([2.0, 2.0, 2.0, 6.0])
|
|
136
|
+
return slopes / divisors * 1e-4 # A^2/ps -> cm^2/s
|
|
137
|
+
|
|
138
|
+
def calculate_conductivity(self, diffusivities: np.ndarray, num_ions: int, charge: int) -> np.ndarray:
|
|
139
|
+
"""Nernst-Einstein conductivity in mS/cm."""
|
|
140
|
+
vol_cm3 = self.volume * 1e-24
|
|
141
|
+
# sigma = n * q^2 * D / (V * k_B * T); factor 1000 converts S/cm -> mS/cm
|
|
142
|
+
factor = num_ions * charge**2 * consts.e**2 / (vol_cm3 * consts.k * self.temperature)
|
|
143
|
+
return diffusivities * factor * 1000
|
|
144
|
+
|
|
145
|
+
def _format_summary(self, element: str, group_id: int, num_ions: int,
|
|
146
|
+
D: np.ndarray, sigma: np.ndarray = None) -> str:
|
|
147
|
+
labels = ["x", "y", "z", "total"]
|
|
148
|
+
lines = [
|
|
149
|
+
"=" * 70,
|
|
150
|
+
"MD ANALYSIS RESULTS",
|
|
151
|
+
"=" * 70,
|
|
152
|
+
f"Element: {element}",
|
|
153
|
+
f"Group ID: {group_id}",
|
|
154
|
+
f"Number of atoms: {num_ions}",
|
|
155
|
+
f"Temperature: {self.temperature:.2f} K",
|
|
156
|
+
f"Volume: {self.volume:.2f} A^3",
|
|
157
|
+
"-" * 70,
|
|
158
|
+
"DIFFUSIVITY (cm^2/s):",
|
|
159
|
+
]
|
|
160
|
+
lines += [f" D_{lab:<6} = {d:.2e}" for lab, d in zip(labels, D)]
|
|
161
|
+
if sigma is not None:
|
|
162
|
+
lines += [
|
|
163
|
+
"-" * 70,
|
|
164
|
+
f"IONIC CONDUCTIVITY (mS/cm) - {element}^{self.ion_charge:+d}:",
|
|
165
|
+
]
|
|
166
|
+
lines += [f" sigma_{lab:<6} = {s:.2f}" for lab, s in zip(labels, sigma)]
|
|
167
|
+
lines.append("=" * 70)
|
|
168
|
+
return "\n".join(lines)
|
|
169
|
+
|
|
170
|
+
def save_results(self, element: str, group_id: int, num_ions: int,
|
|
171
|
+
D: np.ndarray, sigma: np.ndarray = None, output_dir: str = "."):
|
|
172
|
+
path = Path(output_dir) / f"group_{group_id}_{element}_results.txt"
|
|
173
|
+
path.write_text(self._format_summary(element, group_id, num_ions, D, sigma), encoding="utf-8")
|
|
174
|
+
|
|
175
|
+
@staticmethod
|
|
176
|
+
def save_msd_data(times: np.ndarray, msd_data: np.ndarray, element: str,
|
|
177
|
+
group_id: int, output_dir: str = "."):
|
|
178
|
+
path = Path(output_dir) / f"group_{group_id}_{element}_msd.txt"
|
|
179
|
+
header = "time(ps) MSD_x(A^2) MSD_y(A^2) MSD_z(A^2) MSD_total(A^2)"
|
|
180
|
+
np.savetxt(path, np.column_stack([times, msd_data]),
|
|
181
|
+
fmt="%.6f", header=header, comments="", encoding="utf-8")
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
def add_arguments(parser: argparse.ArgumentParser) -> None:
|
|
185
|
+
parser.formatter_class = argparse.ArgumentDefaultsHelpFormatter
|
|
186
|
+
parser.add_argument("--msd_file", default="msd.out")
|
|
187
|
+
parser.add_argument("--thermo_file", default="thermo.out")
|
|
188
|
+
parser.add_argument("--xyz_file", default="model.xyz")
|
|
189
|
+
parser.add_argument("--fit_start", type=float, default=0.4, help="Start fraction for MSD fit")
|
|
190
|
+
parser.add_argument("--fit_end", type=float, default=1.0, help="End fraction for MSD fit")
|
|
191
|
+
parser.add_argument("--diffuse_ion", default="Li")
|
|
192
|
+
parser.add_argument("--ion_charge", type=int, default=1)
|
|
193
|
+
parser.add_argument("--output_dir", default=".")
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
def run(args) -> int:
|
|
197
|
+
output_dir = Path(args.output_dir)
|
|
198
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
199
|
+
|
|
200
|
+
analyzer = MDAnalyzer(diffuse_ion=args.diffuse_ion, ion_charge=args.ion_charge)
|
|
201
|
+
analyzer.load_thermo_data(args.thermo_file)
|
|
202
|
+
analyzer.parse_model_xyz(args.xyz_file)
|
|
203
|
+
msd_dict = analyzer.load_msd_data_all_groups(args.msd_file)
|
|
204
|
+
|
|
205
|
+
for gid in sorted(analyzer.groups_info.keys()):
|
|
206
|
+
if gid not in msd_dict:
|
|
207
|
+
continue
|
|
208
|
+
info = analyzer.groups_info[gid]
|
|
209
|
+
element, num_ions = info["element"], info["count"]
|
|
210
|
+
times, msd_data = msd_dict[gid]
|
|
211
|
+
|
|
212
|
+
analyzer.save_msd_data(times, msd_data, element, gid, args.output_dir)
|
|
213
|
+
D = analyzer.calculate_diffusivity(times, msd_data, args.fit_start, args.fit_end)
|
|
214
|
+
sigma = (analyzer.calculate_conductivity(D, num_ions, analyzer.ion_charge)
|
|
215
|
+
if element == analyzer.diffuse_ion else None)
|
|
216
|
+
analyzer.save_results(element, gid, num_ions, D, sigma, args.output_dir)
|
|
217
|
+
|
|
218
|
+
return 0
|
|
219
|
+
|
|
220
|
+
|
|
221
|
+
if __name__ == "__main__":
|
|
222
|
+
_parser = argparse.ArgumentParser(description=__doc__)
|
|
223
|
+
add_arguments(_parser)
|
|
224
|
+
raise SystemExit(run(_parser.parse_args()))
|
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
"""
|
|
2
|
+
compute_rmsd.py
|
|
3
|
+
|
|
4
|
+
Compute RMSD between crystal structures with pymatgen's StructureMatcher.
|
|
5
|
+
|
|
6
|
+
Usage:
|
|
7
|
+
pymdkit rmsd file1 file2 # RMSD between two structure files
|
|
8
|
+
pymdkit rmsd folder/ # RMSD for every structure pair in a folder
|
|
9
|
+
|
|
10
|
+
Behaviour:
|
|
11
|
+
* Two file arguments: compute the RMSD of file1 vs file2.
|
|
12
|
+
* One folder argument: read every structure file in the folder and compute
|
|
13
|
+
the RMSD of each unique pair.
|
|
14
|
+
|
|
15
|
+
Output:
|
|
16
|
+
rmsd.txt - the RMSD result(s).
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
from __future__ import annotations
|
|
20
|
+
|
|
21
|
+
import argparse
|
|
22
|
+
import re
|
|
23
|
+
import warnings
|
|
24
|
+
from itertools import combinations
|
|
25
|
+
from pathlib import Path
|
|
26
|
+
|
|
27
|
+
from pymatgen.analysis.structure_matcher import StructureMatcher
|
|
28
|
+
from pymatgen.core import Structure
|
|
29
|
+
|
|
30
|
+
warnings.filterwarnings("ignore",
|
|
31
|
+
message=r".*pymatgen\.core\.structure_matcher.*",
|
|
32
|
+
category=DeprecationWarning)
|
|
33
|
+
|
|
34
|
+
COMMAND = "rmsd"
|
|
35
|
+
HELP = "Compute RMSD between two structure files, or all pairs in a folder."
|
|
36
|
+
|
|
37
|
+
REPORT_FILE = "rmsd.txt"
|
|
38
|
+
|
|
39
|
+
SM_KWARGS = dict(ltol=0.2, stol=0.3, angle_tol=5.0,
|
|
40
|
+
primitive_cell=False, scale=True, allow_subset=False)
|
|
41
|
+
|
|
42
|
+
# Recognised structure-file extensions / names for folder mode.
|
|
43
|
+
STRUCT_SUFFIXES = {".cif", ".vasp", ".poscar", ".contcar", ".xyz", ".json"}
|
|
44
|
+
STRUCT_NAMES = {"POSCAR", "CONTCAR"}
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def folder_index(name: str) -> int:
|
|
48
|
+
m = re.match(r"^(\d+)", name)
|
|
49
|
+
return int(m.group(1)) if m else 10 ** 9
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def is_structure_file(p: Path) -> bool:
|
|
53
|
+
return p.is_file() and (p.suffix.lower() in STRUCT_SUFFIXES
|
|
54
|
+
or p.name in STRUCT_NAMES)
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def load_structure(path: Path):
|
|
58
|
+
try:
|
|
59
|
+
return Structure.from_file(str(path))
|
|
60
|
+
except Exception as exc:
|
|
61
|
+
print(f" [warn] could not read {path}: {exc}")
|
|
62
|
+
return None
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def compute_rmsd(s_i, s_j, matcher):
|
|
66
|
+
if s_i is None or s_j is None:
|
|
67
|
+
return None
|
|
68
|
+
try:
|
|
69
|
+
res = matcher.get_rms_dist(s_i, s_j)
|
|
70
|
+
except Exception:
|
|
71
|
+
return None
|
|
72
|
+
return None if res is None else res[0]
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def add_arguments(parser):
|
|
76
|
+
parser.add_argument("paths", nargs="+",
|
|
77
|
+
help="Two structure files, or a single folder.")
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def _write_report(root, header_lines, rows):
|
|
81
|
+
"""rows: list of (name_i, name_j, rmsd_or_None)."""
|
|
82
|
+
lines = list(header_lines)
|
|
83
|
+
lines.append("-" * 80)
|
|
84
|
+
lines.append(f"{'Structure A':<34} {'Structure B':<34} {'RMSD':>10}")
|
|
85
|
+
lines.append("-" * 80)
|
|
86
|
+
for a, b, r in rows:
|
|
87
|
+
r_str = "N/A" if r is None else f"{r:.6f}"
|
|
88
|
+
lines.append(f"{a:<34} {b:<34} {r_str:>10}")
|
|
89
|
+
(root / REPORT_FILE).write_text("\n".join(lines) + "\n")
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def run(args):
|
|
93
|
+
matcher = StructureMatcher(**SM_KWARGS)
|
|
94
|
+
out_root = Path(".").resolve()
|
|
95
|
+
|
|
96
|
+
paths = [Path(p) for p in args.paths]
|
|
97
|
+
|
|
98
|
+
# --- folder mode -------------------------------------------------------
|
|
99
|
+
if len(paths) == 1 and paths[0].is_dir():
|
|
100
|
+
folder = paths[0]
|
|
101
|
+
files = sorted(
|
|
102
|
+
(p for p in folder.iterdir() if is_structure_file(p)),
|
|
103
|
+
key=lambda p: (folder_index(p.name), p.name),
|
|
104
|
+
)
|
|
105
|
+
if len(files) < 2:
|
|
106
|
+
print(f"Need at least 2 structure files in {folder}/ "
|
|
107
|
+
f"(found {len(files)}). Nothing to do.")
|
|
108
|
+
return 0
|
|
109
|
+
|
|
110
|
+
structs = {p.name: load_structure(p) for p in files}
|
|
111
|
+
rows = []
|
|
112
|
+
for a, b in combinations([p.name for p in files], 2):
|
|
113
|
+
rows.append((a, b, compute_rmsd(structs[a], structs[b], matcher)))
|
|
114
|
+
|
|
115
|
+
header = [
|
|
116
|
+
"=" * 80,
|
|
117
|
+
"Pairwise RMSD Report",
|
|
118
|
+
f"Folder : {folder}",
|
|
119
|
+
f"Files : {len(files)}",
|
|
120
|
+
f"Pairs : {len(rows)}",
|
|
121
|
+
"=" * 80, "",
|
|
122
|
+
]
|
|
123
|
+
_write_report(out_root, header, rows)
|
|
124
|
+
print(f"Wrote {REPORT_FILE} ({len(rows)} pair(s) from {folder}/).")
|
|
125
|
+
return 0
|
|
126
|
+
|
|
127
|
+
# --- two-file mode -----------------------------------------------------
|
|
128
|
+
if len(paths) == 2 and paths[0].is_file() and paths[1].is_file():
|
|
129
|
+
f1, f2 = paths
|
|
130
|
+
rmsd = compute_rmsd(load_structure(f1), load_structure(f2), matcher)
|
|
131
|
+
header = [
|
|
132
|
+
"=" * 80,
|
|
133
|
+
"RMSD Report",
|
|
134
|
+
"=" * 80, "",
|
|
135
|
+
]
|
|
136
|
+
_write_report(out_root, header, [(f1.name, f2.name, rmsd)])
|
|
137
|
+
r_str = "N/A (no match)" if rmsd is None else f"{rmsd:.6f}"
|
|
138
|
+
print(f"RMSD({f1.name}, {f2.name}) = {r_str}")
|
|
139
|
+
print(f"Wrote {REPORT_FILE}.")
|
|
140
|
+
return 0
|
|
141
|
+
|
|
142
|
+
print("Usage: pymdkit rmsd file1 file2 OR pymdkit rmsd folder/")
|
|
143
|
+
return 1
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
if __name__ == "__main__":
|
|
147
|
+
_p = argparse.ArgumentParser(description=__doc__)
|
|
148
|
+
add_arguments(_p)
|
|
149
|
+
raise SystemExit(run(_p.parse_args()))
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Gather CONTCAR files from VASP job folders into one folder.
|
|
3
|
+
|
|
4
|
+
Scans the sub-folders of the current directory (like `ehull`) and copies each
|
|
5
|
+
folder's CONTCAR into an output folder, renamed ``<folder-name>.vasp``. For
|
|
6
|
+
example the CONTCAR in ``1-Li3YCl6-164/`` becomes ``1-Li3YCl6-164.vasp``.
|
|
7
|
+
|
|
8
|
+
gather-contcar -of vasp-opted
|
|
9
|
+
gather-contcar -of vasp-opted -ehull 0.028 # only E_hull < 0.028 eV/atom
|
|
10
|
+
|
|
11
|
+
With -ehull, the ehull.txt report in the current directory (produced by
|
|
12
|
+
`pymdkit ehull`) is read and only structures whose E_hull is below the given
|
|
13
|
+
threshold (eV/atom) are gathered.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
import argparse
|
|
17
|
+
import shutil
|
|
18
|
+
from pathlib import Path
|
|
19
|
+
|
|
20
|
+
try:
|
|
21
|
+
from . import _fileio
|
|
22
|
+
except ImportError: # running as a standalone script
|
|
23
|
+
import _fileio
|
|
24
|
+
|
|
25
|
+
COMMAND = "gather-contcar"
|
|
26
|
+
HELP = "Gather CONTCAR files from VASP job folders into one folder (renamed .vasp)."
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def parse_ehull_file(path):
|
|
30
|
+
"""Return {folder_name: e_hull} from an ehull.txt report.
|
|
31
|
+
|
|
32
|
+
Data rows are ``<folder> <formula> <system> <FinalE> <E_form> <E_hull>``;
|
|
33
|
+
the folder is the first token and E_hull the last. Header / separator lines
|
|
34
|
+
(whose last token is not a number) are ignored.
|
|
35
|
+
"""
|
|
36
|
+
result = {}
|
|
37
|
+
for line in Path(path).read_text().splitlines():
|
|
38
|
+
parts = line.split()
|
|
39
|
+
if len(parts) < 2:
|
|
40
|
+
continue
|
|
41
|
+
try:
|
|
42
|
+
result[parts[0]] = float(parts[-1])
|
|
43
|
+
except ValueError:
|
|
44
|
+
continue
|
|
45
|
+
return result
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def add_arguments(parser):
|
|
49
|
+
parser.add_argument("-of", "--output-folder", dest="output_folder",
|
|
50
|
+
required=True, metavar="DIR",
|
|
51
|
+
help="Folder to create and fill with <name>.vasp files.")
|
|
52
|
+
parser.add_argument("-ehull", "--ehull", dest="ehull", type=float,
|
|
53
|
+
default=None, metavar="EV",
|
|
54
|
+
help="Only gather folders whose E_hull (from the report) "
|
|
55
|
+
"is below this value in eV/atom.")
|
|
56
|
+
parser.add_argument("--entry-path", default=".",
|
|
57
|
+
help="Directory to scan for VASP job folders (default: .).")
|
|
58
|
+
parser.add_argument("--ehull-file", default="ehull.txt",
|
|
59
|
+
help="E_hull report read with -ehull (default: ehull.txt).")
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def run(args):
|
|
63
|
+
root = Path(args.entry_path)
|
|
64
|
+
out_dir = Path(args.output_folder)
|
|
65
|
+
|
|
66
|
+
ehull_map = None
|
|
67
|
+
if args.ehull is not None:
|
|
68
|
+
report = Path(args.ehull_file)
|
|
69
|
+
if not report.is_file():
|
|
70
|
+
raise SystemExit(f"Error: {report} not found (needed for -ehull).")
|
|
71
|
+
ehull_map = parse_ehull_file(report)
|
|
72
|
+
|
|
73
|
+
out_dir.mkdir(parents=True, exist_ok=True)
|
|
74
|
+
gathered = 0
|
|
75
|
+
no_ehull = []
|
|
76
|
+
for sub in _fileio.subfolders(root):
|
|
77
|
+
contcar = sub / "CONTCAR"
|
|
78
|
+
if not contcar.is_file():
|
|
79
|
+
continue
|
|
80
|
+
name = sub.name
|
|
81
|
+
|
|
82
|
+
if ehull_map is not None:
|
|
83
|
+
val = ehull_map.get(name)
|
|
84
|
+
if val is None:
|
|
85
|
+
no_ehull.append(name)
|
|
86
|
+
continue
|
|
87
|
+
if not (val < args.ehull):
|
|
88
|
+
continue
|
|
89
|
+
|
|
90
|
+
dest = out_dir / f"{name}.vasp"
|
|
91
|
+
shutil.copy2(contcar, dest)
|
|
92
|
+
tag = f" (E_hull={ehull_map[name]:.3f})" if ehull_map is not None else ""
|
|
93
|
+
print(f"{contcar} -> {dest}{tag}")
|
|
94
|
+
gathered += 1
|
|
95
|
+
|
|
96
|
+
if no_ehull:
|
|
97
|
+
print(f"Note: {len(no_ehull)} folder(s) had a CONTCAR but no E_hull entry "
|
|
98
|
+
f"in {args.ehull_file}; skipped.")
|
|
99
|
+
print(f"Done: gathered {gathered} CONTCAR file(s) into {out_dir}/")
|
|
100
|
+
return 0
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
if __name__ == "__main__":
|
|
104
|
+
_p = argparse.ArgumentParser(description=__doc__)
|
|
105
|
+
add_arguments(_p)
|
|
106
|
+
raise SystemExit(run(_p.parse_args()))
|
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Build scf-converged.xyz from per-job VASP single-point SCF runs.
|
|
3
|
+
|
|
4
|
+
Scans every sub-folder of the entry path that contains an OUTCAR (regardless of
|
|
5
|
+
how the folders are named). For each such folder the geometry / energy / forces
|
|
6
|
+
/ stress are read from OUTCAR, and Config_type is copied from a sibling
|
|
7
|
+
``<folder>/<folder>.xyz`` (or the single .xyz in the folder) if present. Folders
|
|
8
|
+
that did not reach EDIFF are skipped. Output is ordered by the first integer in
|
|
9
|
+
each folder name (so frame_2 precedes frame_10), then alphabetically.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
import argparse
|
|
15
|
+
import os
|
|
16
|
+
import re
|
|
17
|
+
from pathlib import Path
|
|
18
|
+
|
|
19
|
+
COMMAND = "outcar2xyz"
|
|
20
|
+
HELP = "Collect SCF-converged VASP job folders into one extxyz file."
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def folder_sort_key(name):
|
|
24
|
+
"""Sort by the first integer in the folder name, else push to the end."""
|
|
25
|
+
m = re.search(r"\d+", name)
|
|
26
|
+
return (int(m.group()) if m else float("inf"), name)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def scf_converged(outcar_path):
|
|
30
|
+
try:
|
|
31
|
+
with open(outcar_path) as f:
|
|
32
|
+
return "aborting loop because EDIFF is reached" in f.read()
|
|
33
|
+
except OSError:
|
|
34
|
+
return False
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def read_config_type(folder):
|
|
38
|
+
"""Extract Config_type=... from a sibling extxyz in *folder*, if any.
|
|
39
|
+
|
|
40
|
+
Looks first for ``<folder>/<foldername>.xyz`` (the original convention),
|
|
41
|
+
then for any single .xyz file in the folder. Handles quoted and unquoted
|
|
42
|
+
forms so values containing | ( ) , = are preserved intact.
|
|
43
|
+
"""
|
|
44
|
+
folder = Path(folder)
|
|
45
|
+
candidate = folder / f"{folder.name}.xyz"
|
|
46
|
+
if not candidate.exists():
|
|
47
|
+
xyzs = sorted(folder.glob("*.xyz"))
|
|
48
|
+
candidate = xyzs[0] if len(xyzs) == 1 else None
|
|
49
|
+
if candidate is None or not candidate.exists():
|
|
50
|
+
return None
|
|
51
|
+
try:
|
|
52
|
+
with open(candidate) as f:
|
|
53
|
+
f.readline()
|
|
54
|
+
comment = f.readline()
|
|
55
|
+
except OSError:
|
|
56
|
+
return None
|
|
57
|
+
m = re.search(r'Config_type=("([^"]*)"|(\S+))', comment)
|
|
58
|
+
if not m:
|
|
59
|
+
return None
|
|
60
|
+
return m.group(2) if m.group(2) is not None else m.group(3)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def find_job_folders(entry_path):
|
|
64
|
+
"""Sub-folders of *entry_path* that contain an OUTCAR, sorted by index."""
|
|
65
|
+
root = Path(entry_path)
|
|
66
|
+
folders = [p.name for p in root.iterdir()
|
|
67
|
+
if p.is_dir() and (p / "OUTCAR").exists()]
|
|
68
|
+
return sorted(folders, key=folder_sort_key)
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def add_arguments(parser: argparse.ArgumentParser) -> None:
|
|
72
|
+
parser.formatter_class = argparse.ArgumentDefaultsHelpFormatter
|
|
73
|
+
parser.add_argument("--entry-path", default=".",
|
|
74
|
+
help="Directory to scan for VASP job sub-folders.")
|
|
75
|
+
parser.add_argument("--output", default="scf-converged.xyz",
|
|
76
|
+
help="Output extxyz trajectory file.")
|
|
77
|
+
parser.add_argument("--summary", default="summary.txt",
|
|
78
|
+
help="Summary text file to write.")
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def run(args) -> int:
|
|
82
|
+
# ase is imported lazily so `pymdkit --help` works without ase installed.
|
|
83
|
+
from ase.io import read, write
|
|
84
|
+
|
|
85
|
+
root = Path(args.entry_path)
|
|
86
|
+
job_dirs = find_job_folders(root)
|
|
87
|
+
print(f"Found {len(job_dirs)} VASP job folder(s) under {root.resolve()}")
|
|
88
|
+
|
|
89
|
+
structures = []
|
|
90
|
+
for name in job_dirs:
|
|
91
|
+
d = root / name
|
|
92
|
+
outcar = d / "OUTCAR"
|
|
93
|
+
|
|
94
|
+
if not scf_converged(outcar):
|
|
95
|
+
print(f"{name}: SCF not converged -> skipping")
|
|
96
|
+
continue
|
|
97
|
+
|
|
98
|
+
try:
|
|
99
|
+
atoms = read(str(outcar), format="vasp-out")
|
|
100
|
+
except Exception as e: # noqa: BLE001 - mirror original behavior
|
|
101
|
+
print(f"{name}: error reading OUTCAR ({e}) -> skipping")
|
|
102
|
+
continue
|
|
103
|
+
|
|
104
|
+
ct = read_config_type(d)
|
|
105
|
+
if ct is not None:
|
|
106
|
+
atoms.info["Config_type"] = ct
|
|
107
|
+
else:
|
|
108
|
+
print(f"{name}: warning, Config_type not found")
|
|
109
|
+
|
|
110
|
+
structures.append(atoms)
|
|
111
|
+
print(f"{name}: OK")
|
|
112
|
+
|
|
113
|
+
output = os.path.join(str(root), args.output) if str(root) != "." else args.output
|
|
114
|
+
if structures:
|
|
115
|
+
write(output, structures, format="extxyz")
|
|
116
|
+
print(f"\nWrote {len(structures)} structures to {output}")
|
|
117
|
+
else:
|
|
118
|
+
print("\nNo converged structures found")
|
|
119
|
+
|
|
120
|
+
total = len(job_dirs)
|
|
121
|
+
kept = len(structures)
|
|
122
|
+
ratio = (kept / total * 100) if total else 0.0
|
|
123
|
+
summary = (
|
|
124
|
+
f"Total job folders processed: {total}\n"
|
|
125
|
+
f"Structures with SCF converged: {kept}\n"
|
|
126
|
+
f"Selection ratio: {ratio:.2f}%"
|
|
127
|
+
)
|
|
128
|
+
print("\n" + summary)
|
|
129
|
+
|
|
130
|
+
summary_path = (os.path.join(str(root), args.summary)
|
|
131
|
+
if str(root) != "." else args.summary)
|
|
132
|
+
with open(summary_path, "w") as f:
|
|
133
|
+
f.write(summary)
|
|
134
|
+
|
|
135
|
+
return 0
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
if __name__ == "__main__":
|
|
139
|
+
_parser = argparse.ArgumentParser(description=__doc__)
|
|
140
|
+
add_arguments(_parser)
|
|
141
|
+
raise SystemExit(run(_parser.parse_args()))
|