pymdkit 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,119 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ select_candidate.py -- select candidate structures from a NEP training set
4
+ based on per-structure energy error against DFT references.
5
+
6
+ Inputs : energy_train.out (col0 = NEP, col1 = DFT, eV/atom),
7
+ train.xyz (extended-XYZ trajectory, full-batch order).
8
+ Outputs : energy_rmse.txt (always)
9
+ candidate.xyz / candidate_energy.txt
10
+ accurate.xyz / accurate_energy.txt
11
+ Energy txt format: 2 columns (DFT, NEP), %.8f.
12
+ xyz files retain Config_type via the ASE extxyz writer.
13
+
14
+ Selection is decided by the total RMSE:
15
+ * total RMSE < --rmse-low -> all frames are accurate.
16
+ * --rmse-low <= RMSE <= --rmse-high -> the worst int(N*ratio) frames by
17
+ |energy error| become candidates (ratio defaults to 0.5, i.e. 50%).
18
+ * total RMSE > --rmse-high -> all frames are candidates.
19
+ """
20
+
21
+ import shutil
22
+ import argparse
23
+ import numpy as np
24
+ from ase.io import read, write
25
+
26
+ RMSE_LOW = 5.0 # meV/atom
27
+ RMSE_HIGH = 10.0 # meV/atom
28
+
29
+ COMMAND = "select-candidate"
30
+ HELP = "Split a NEP training set into candidate/accurate sets by energy error."
31
+
32
+
33
+ def write_energy_table(path, dft_vals, nep_vals):
34
+ np.savetxt(path, np.column_stack([dft_vals, nep_vals]), fmt='%.8f')
35
+
36
+
37
+ def rmse_meV(nep, dft):
38
+ return float(np.sqrt(np.mean((nep - dft) ** 2))) * 1000.0
39
+
40
+
41
+ def write_rmse_file(path, **labelled_values):
42
+ with open(path, 'w') as f:
43
+ for label, value in labelled_values.items():
44
+ f.write(f"RMSE ({label}): {value:.2f} meV/atom\n")
45
+
46
+
47
+ def add_arguments(parser):
48
+ parser.add_argument("--energy", default="energy_train.out",
49
+ help="Energy file: col0=NEP, col1=DFT, eV/atom (default: energy_train.out).")
50
+ parser.add_argument("--xyz", default="train.xyz",
51
+ help="Training trajectory in extxyz (default: train.xyz).")
52
+ parser.add_argument("-r", "--ratio", type=float, default=0.5,
53
+ help="Fraction selected as candidates in the middle RMSE band, "
54
+ "int(N*ratio) worst by energy error (default: 0.5 = 50%%).")
55
+ parser.add_argument("--rmse-low", type=float, default=RMSE_LOW,
56
+ help="Below this total RMSE (meV/atom) all frames are 'accurate'.")
57
+ parser.add_argument("--rmse-high", type=float, default=RMSE_HIGH,
58
+ help="Above this total RMSE (meV/atom) all frames are 'candidate'.")
59
+
60
+
61
+ def _emit_split(frames, nep, dft, cand_idx, acc_idx, total_rmse):
62
+ """Write the candidate/accurate xyz + energy tables + rmse summary."""
63
+ summary = {"total": total_rmse}
64
+ if len(cand_idx):
65
+ summary["candidate"] = rmse_meV(nep[cand_idx], dft[cand_idx])
66
+ if len(acc_idx):
67
+ summary["accurate"] = rmse_meV(nep[acc_idx], dft[acc_idx])
68
+ write_rmse_file('energy_rmse.txt', **summary)
69
+
70
+ if len(cand_idx):
71
+ write('candidate.xyz', [frames[i] for i in cand_idx], format='extxyz')
72
+ write_energy_table('candidate_energy.txt', dft[cand_idx], nep[cand_idx])
73
+ if len(acc_idx):
74
+ write('accurate.xyz', [frames[i] for i in acc_idx], format='extxyz')
75
+ write_energy_table('accurate_energy.txt', dft[acc_idx], nep[acc_idx])
76
+
77
+ n = len(frames)
78
+ acc_rmse = summary.get("accurate")
79
+ print(f"Total: {n} frames, RMSE {total_rmse:.2f} meV/atom")
80
+ print(f" candidate: {len(cand_idx)} | accurate: {len(acc_idx)}"
81
+ + (f" (accurate RMSE {acc_rmse:.2f} meV/atom)" if acc_rmse is not None else ""))
82
+
83
+
84
+ def run(args):
85
+ if not (0.0 <= args.ratio <= 1.0):
86
+ raise SystemExit(f"Error: --ratio must be in [0, 1], got {args.ratio}")
87
+
88
+ energy = np.loadtxt(args.energy)
89
+ nep, dft = energy[:, 0], energy[:, 1]
90
+ frames = read(args.xyz, index=':')
91
+ n = len(frames)
92
+
93
+ abs_err_meV = np.abs(nep - dft) * 1000.0
94
+ total_rmse = rmse_meV(nep, dft)
95
+ order_desc = np.argsort(-abs_err_meV, kind='stable') # worst error first
96
+
97
+ if total_rmse < args.rmse_low:
98
+ print(f"Total RMSE {total_rmse:.2f} < {args.rmse_low} meV/atom -> all accurate.")
99
+ cand_idx = np.array([], dtype=int)
100
+ acc_idx = np.arange(n)
101
+ elif total_rmse > args.rmse_high:
102
+ print(f"Total RMSE {total_rmse:.2f} > {args.rmse_high} meV/atom -> all candidate.")
103
+ cand_idx = np.arange(n)
104
+ acc_idx = np.array([], dtype=int)
105
+ else:
106
+ n_candidate = int(n * args.ratio)
107
+ print(f"Total RMSE {total_rmse:.2f} in [{args.rmse_low}, {args.rmse_high}] "
108
+ f"meV/atom -> worst {args.ratio:.0%} = {n_candidate}/{n} as candidate.")
109
+ cand_idx = np.sort(order_desc[:n_candidate])
110
+ acc_idx = np.sort(order_desc[n_candidate:])
111
+
112
+ _emit_split(frames, nep, dft, cand_idx, acc_idx, total_rmse)
113
+ return 0
114
+
115
+
116
+ if __name__ == '__main__':
117
+ _p = argparse.ArgumentParser(description=__doc__)
118
+ add_arguments(_p)
119
+ raise SystemExit(run(_p.parse_args()))
@@ -0,0 +1,100 @@
1
+ """
2
+ Convert structure files of any ASE-readable format into extxyz.
3
+
4
+ Single file: stru2xyz -i file1.vasp -o file1.xyz
5
+ Whole folder: stru2xyz -if vasp-opted -of extxyz-opted
6
+ Scan mode: stru2xyz
7
+ -> scans sub-folders of the current dir (like `ehull`) for
8
+ structure files (.vasp/.cif/...) and writes <name>.xyz next to
9
+ each one, in the same sub-folder.
10
+ -> with -o model.xyz, every converted file is named model.xyz.
11
+
12
+ Output is structure-only extxyz: the Lattice is written with 8-decimal
13
+ precision and any momenta/velocities are dropped.
14
+ """
15
+
16
+ import argparse
17
+ import re
18
+ from pathlib import Path
19
+
20
+ from ase.io import read, write
21
+
22
+ try:
23
+ from . import _fileio
24
+ except ImportError: # running as a standalone script
25
+ import _fileio
26
+
27
+ COMMAND = "stru2xyz"
28
+ HELP = "Convert any structure format(s) to extxyz (file, folder, or scan)."
29
+
30
+ # In scan mode, don't reprocess files that are already xyz.
31
+ _SCAN_EXTS = _fileio.STRUCT_EXTS - {".xyz", ".extxyz"}
32
+
33
+
34
+ def _reformat_lattice(text):
35
+ """Rewrite every Lattice="..." value with fixed 8-decimal precision."""
36
+ def repl(m):
37
+ nums = " ".join(f"{float(x):.8f}" for x in m.group(1).split())
38
+ return f'Lattice="{nums}"'
39
+ return re.sub(r'Lattice="([^"]*)"', repl, text)
40
+
41
+
42
+ def convert(in_path, out_path):
43
+ out_path = Path(out_path)
44
+ out_path.parent.mkdir(parents=True, exist_ok=True)
45
+ atoms = read(str(in_path))
46
+ # Drop momenta/velocities (e.g. carried in from a CONTCAR/POSCAR) so the
47
+ # output is a clean structure-only extxyz.
48
+ atoms.arrays.pop("momenta", None)
49
+ write(str(out_path), atoms, format="extxyz")
50
+ # ASE has no lattice-precision option; reformat the Lattice to %.8f.
51
+ out_path.write_text(_reformat_lattice(out_path.read_text()))
52
+ print(f"{in_path} -> {out_path}")
53
+
54
+
55
+ def add_arguments(parser):
56
+ _fileio.add_io_arguments(
57
+ parser,
58
+ single_output_help="Output .xyz file (single mode), or output name "
59
+ "to use everywhere in scan mode (e.g. model.xyz).")
60
+
61
+
62
+ def run(args):
63
+ count = 0
64
+
65
+ # Single file.
66
+ if args.input:
67
+ out = Path(args.output) if args.output else Path(args.input).with_suffix(".xyz")
68
+ convert(args.input, out)
69
+ return 0
70
+
71
+ # Whole folder -> output folder (same logic as supercell batch mode).
72
+ if args.input_folder:
73
+ in_dir = Path(args.input_folder)
74
+ out_dir = (Path(args.output_folder) if args.output_folder
75
+ else in_dir.with_name(in_dir.name + "-xyz"))
76
+ for f in _fileio.structure_files(in_dir):
77
+ convert(f, out_dir / (f.stem + ".xyz"))
78
+ count += 1
79
+ print(f"Done: converted {count} file(s) -> {out_dir}/")
80
+ return 0
81
+
82
+ # Scan mode: walk sub-folders, convert structure files in place.
83
+ for sub in _fileio.subfolders("."):
84
+ for f in sorted(sub.iterdir()):
85
+ if not _fileio.is_structure(f, _SCAN_EXTS):
86
+ continue
87
+ out_name = args.output if args.output else f.stem + ".xyz"
88
+ convert(f, sub / out_name)
89
+ count += 1
90
+ if count == 0:
91
+ print("No structure files found in sub-folders. Nothing to do.")
92
+ else:
93
+ print(f"Done: converted {count} file(s) across sub-folders.")
94
+ return 0
95
+
96
+
97
+ if __name__ == "__main__":
98
+ _p = argparse.ArgumentParser(description=__doc__)
99
+ add_arguments(_p)
100
+ raise SystemExit(run(_p.parse_args()))
@@ -0,0 +1,164 @@
1
+ """
2
+ Build a supercell whose cell lengths stay within a maximum (-max-abc).
3
+
4
+ For each lattice vector the structure is repeated as many times as possible
5
+ without its length exceeding ``-max-abc`` (in Angstrom):
6
+ ``n_i = max(1, floor(max_abc / |a_i|))``.
7
+
8
+ Single file: supercell -i example.vasp -o out.vasp -max-abc 20
9
+ Whole folder: supercell -if vasp-opted -of out -max-abc 20
10
+ Per-folder: supercell -if vasp-opted -max-abc 20 -individual
11
+ -> creates ./<name>/<name>.<ext> for every structure in vasp-opted
12
+ GPUMD setup: supercell -if extxyz-vasp-opted -max-abc 24 -individual \\
13
+ -temp 500 600 -md-if input-files
14
+ -> ./<name>/model.xyz plus one ./<name>/<T>/ folder per
15
+ temperature, each seeded with model.xyz + the shared GPUMD
16
+ input files (target_t in run.in replaced by the temperature).
17
+ """
18
+
19
+ import argparse
20
+ import shutil
21
+ from pathlib import Path
22
+
23
+ from ase.io import read, write
24
+
25
+ try:
26
+ from . import _fileio
27
+ except ImportError: # running as a standalone script
28
+ import _fileio
29
+
30
+ COMMAND = "supercell"
31
+ HELP = "Build a supercell with cell lengths capped at -max-abc Angstrom."
32
+
33
+
34
+ def repetitions_for(atoms, max_abc):
35
+ """Per-axis repeat counts so each cell length stays <= max_abc."""
36
+ reps = []
37
+ for L in atoms.cell.lengths():
38
+ reps.append(max(1, int(max_abc // L)) if L > 0 else 1)
39
+ return reps
40
+
41
+
42
+ def make_supercell(in_path, out_path, max_abc):
43
+ atoms = read(str(in_path))
44
+ reps = repetitions_for(atoms, max_abc)
45
+ sc = atoms.repeat(reps)
46
+ out_path = Path(out_path)
47
+ out_path.parent.mkdir(parents=True, exist_ok=True)
48
+ write(str(out_path), sc)
49
+ a, b, c = sc.cell.lengths()
50
+ print(f"{in_path} ({len(atoms)} atoms) x{reps[0]}{reps[1]}{reps[2]} "
51
+ f"-> {out_path} ({len(sc)} atoms, abc={a:.2f},{b:.2f},{c:.2f})")
52
+ return sc
53
+
54
+
55
+ def setup_temp_folders(struct_folder, temps, md_input_folder):
56
+ """Create one ./<T>/ folder per temperature inside *struct_folder*, copy
57
+ model.xyz + the shared GPUMD input files into it, and replace the
58
+ 'target_t' placeholder in run.in with the temperature."""
59
+ model = struct_folder / "model.xyz"
60
+ md_dir = Path(md_input_folder)
61
+ for temp in temps:
62
+ tdir = struct_folder / str(temp)
63
+ tdir.mkdir(parents=True, exist_ok=True)
64
+ shutil.copy2(model, tdir / "model.xyz")
65
+
66
+ if md_dir.is_dir():
67
+ for src in md_dir.iterdir():
68
+ if src.is_file():
69
+ shutil.copy2(src, tdir / src.name)
70
+ else:
71
+ print(f" [WARN] md input folder '{md_dir}' not found")
72
+
73
+ run_in = tdir / "run.in"
74
+ if run_in.is_file():
75
+ run_in.write_text(run_in.read_text().replace("target_t", str(temp)))
76
+ else:
77
+ print(f" [WARN] run.in not found in {tdir}")
78
+ print(f" temp folder: {tdir} (target_t -> {temp})")
79
+
80
+
81
+ def _folder_name(stem):
82
+ """Folder name for a structure: drop a trailing '-opted' if present."""
83
+ return stem[:-6] if stem.endswith("-opted") else stem
84
+
85
+
86
+ def add_arguments(parser):
87
+ _fileio.add_io_arguments(
88
+ parser, single_output_help="Output file for single-file mode.")
89
+ parser.add_argument("-max-abc", "--max-abc", dest="max_abc", type=float,
90
+ required=True, metavar="L",
91
+ help="Maximum cell length a/b/c in Angstrom.")
92
+ parser.add_argument("-individual", "--individual", action="store_true",
93
+ help="Folder mode: write ./<name>/<name>.<ext> per structure "
94
+ "(into --output-folder if given, else current dir).")
95
+ parser.add_argument("-temp", "--temp", dest="temp", nargs="+", type=int,
96
+ default=[], metavar="T",
97
+ help="Temperatures (K). For each structure, write "
98
+ "./<name>/model.xyz and one ./<name>/<T>/ GPUMD job "
99
+ "folder per temperature.")
100
+ parser.add_argument("-md-if", "--md-input-folder", dest="md_input_folder",
101
+ default="input-files", metavar="DIR",
102
+ help="Folder with shared GPUMD input files (run.in, "
103
+ "sub_gpumd*, potentials; default: input-files).")
104
+
105
+
106
+ def _run_gpumd(args):
107
+ """GPUMD setup: ./<name>/model.xyz + per-temperature job folders."""
108
+ if not args.input_folder:
109
+ raise SystemExit("Error: -temp requires -if/--input-folder.")
110
+ count = 0
111
+ for f in _fileio.structure_files(args.input_folder):
112
+ folder = Path(_folder_name(f.stem))
113
+ folder.mkdir(parents=True, exist_ok=True)
114
+ make_supercell(f, folder / "model.xyz", args.max_abc)
115
+ setup_temp_folders(folder, args.temp, args.md_input_folder)
116
+ count += 1
117
+ print(f"Done: set up {count} structure(s) with temperatures {args.temp}.")
118
+ return 0
119
+
120
+
121
+ def run(args):
122
+ count = 0
123
+
124
+ # GPUMD per-temperature setup (model.xyz + ./<name>/<T>/ folders).
125
+ if args.temp:
126
+ return _run_gpumd(args)
127
+
128
+ # Per-folder "individual" mode: one sub-folder per input structure.
129
+ if args.individual:
130
+ if not args.input_folder:
131
+ raise SystemExit("Error: -individual requires -if/--input-folder.")
132
+ parent = Path(args.output_folder) if args.output_folder else Path(".")
133
+ for f in _fileio.structure_files(args.input_folder):
134
+ out_dir = parent / f.stem
135
+ make_supercell(f, out_dir / f.name, args.max_abc)
136
+ count += 1
137
+ print(f"Done: {count} supercell(s) in per-structure folders.")
138
+ return 0
139
+
140
+ # Single (-i/-o) or batch (-if/-of); preserve each input's file extension.
141
+ if args.input:
142
+ out = (Path(args.output) if args.output
143
+ else Path(args.input).with_name(Path(args.input).stem + "-sc"
144
+ + Path(args.input).suffix))
145
+ make_supercell(args.input, out, args.max_abc)
146
+ return 0
147
+
148
+ if args.input_folder:
149
+ in_dir = Path(args.input_folder)
150
+ out_dir = (Path(args.output_folder) if args.output_folder
151
+ else in_dir.with_name(in_dir.name + "-out"))
152
+ for f in _fileio.structure_files(in_dir):
153
+ make_supercell(f, out_dir / f.name, args.max_abc)
154
+ count += 1
155
+ print(f"Done: built {count} supercell(s).")
156
+ return 0
157
+
158
+ raise SystemExit("Error: provide -i (single file) or -if (folder).")
159
+
160
+
161
+ if __name__ == "__main__":
162
+ _p = argparse.ArgumentParser(description=__doc__)
163
+ add_arguments(_p)
164
+ raise SystemExit(run(_p.parse_args()))
@@ -0,0 +1,271 @@
1
+ """
2
+ symmetrize.py
3
+
4
+ Import space-group symmetry into a structure (or a folder of structures) using
5
+ pyxtal.util.symmetrize + pyxtal, and write the idealized, standardized cell to a
6
+ CIF in the legacy block layout.
7
+
8
+ Usage:
9
+ pymdkit symmetrize file --symprec 0.01 --add_oxidation yes -o file-symm.cif
10
+ pymdkit symmetrize folder --symprec 0.01 --add_oxidation no -o folder-symm
11
+
12
+ Behaviour:
13
+ * file input : symmetrize the single structure; write one CIF
14
+ (default output: <stem>-symm.cif).
15
+ * folder input: symmetrize every structure file in the folder; write one CIF
16
+ per structure into the output directory
17
+ (default output: <folder>-symm/).
18
+ * --add_oxidation yes : include the _atom_type_oxidation_number loop.
19
+ * --add_oxidation no : omit the oxidation-state section entirely.
20
+
21
+ The written CIF uses the same block order as upet-opt-rm-duplicate.py:
22
+ cell -> symmetry ops -> (oxidation states) -> atom sites.
23
+ """
24
+
25
+ from __future__ import annotations
26
+
27
+ import argparse
28
+ import warnings
29
+ from pathlib import Path
30
+
31
+ import numpy as np
32
+
33
+ warnings.filterwarnings("ignore",
34
+ message=r"logm result may be inaccurate.*",
35
+ category=RuntimeWarning)
36
+
37
+ from ase.io import read as ase_read
38
+
39
+ from pymatgen.core import Structure, Lattice, Element
40
+ from pyxtal import pyxtal
41
+ from pyxtal.util import symmetrize # replaces SpacegroupAnalyzer
42
+
43
+ COMMAND = "symmetrize"
44
+ HELP = "Import space-group symmetry into a structure file (or folder) -> CIF."
45
+
46
+ SYMPREC = 0.01
47
+
48
+ # Recognised structure-file extensions / names for folder mode.
49
+ STRUCT_SUFFIXES = {".cif", ".vasp", ".poscar", ".contcar", ".xyz", ".json"}
50
+ STRUCT_NAMES = {"POSCAR", "CONTCAR"}
51
+
52
+ DEFAULT_OXIDATION_STATES = {
53
+ "Li": 1, "Na": 1, "K": 1, "Rb": 1, "Cs": 1,
54
+ "Be": 2, "Mg": 2, "Ca": 2, "Sr": 2, "Ba": 2,
55
+ "Sc": 3, "Y": 3, "La": 3, "Ce": 3, "Pr": 3, "Nd": 3, "Sm": 3,
56
+ "Eu": 3, "Gd": 3, "Tb": 3, "Dy": 3, "Ho": 3, "Er": 3, "Tm": 3,
57
+ "Yb": 3, "Lu": 3,
58
+ "Ti": 4, "Zr": 4, "Hf": 4,
59
+ "V": 5, "Nb": 5, "Ta": 5,
60
+ "Cr": 3, "Mo": 6, "W": 6,
61
+ "Mn": 2, "Fe": 3, "Co": 2, "Ni": 2, "Cu": 2, "Zn": 2,
62
+ "Al": 3, "Ga": 3, "In": 3, "Tl": 1,
63
+ "Si": 4, "Ge": 4, "Sn": 4, "Pb": 2,
64
+ "Sb": 3, "Bi": 3,
65
+ "H": 1, "B": 3, "C": 4, "N": -3,
66
+ "O": -2, "S": -2, "Se": -2, "Te": -2,
67
+ "F": -1, "Cl": -1, "Br": -1, "I": -1,
68
+ }
69
+
70
+
71
+ def _fmt_ox(v):
72
+ v = int(v)
73
+ return f"+{v}" if v > 0 else (str(v) if v < 0 else "0")
74
+
75
+
76
+ # ====================== Symmetrize + load into pyxtal ========================
77
+ def build_pyxtal_from_atoms(atoms, symprec=SYMPREC):
78
+ """ASE Atoms -> pyxtal struct via pyxtal.util.symmetrize + from_seed.
79
+ Falls back to loading the raw cell (typically P1) if symmetrization raises."""
80
+ lattice = np.array(atoms.get_cell(), dtype=float)
81
+ positions = np.array(atoms.get_scaled_positions(wrap=True), dtype=float)
82
+ symbols = list(atoms.get_chemical_symbols())
83
+ pm_struct = Structure(Lattice(lattice), symbols, positions,
84
+ coords_are_cartesian=False)
85
+ try:
86
+ # symmetrize returns (refined Structure, hall_number); take the structure.
87
+ # (defaults: tol=1e-3, a_tol=5.0, style='pyxtal')
88
+ refined, _hall = symmetrize(pm_struct, tol=symprec)
89
+ c = pyxtal()
90
+ c.from_seed(refined, tol=symprec)
91
+ return c, False, ""
92
+ except Exception as e:
93
+ # Fallback: load raw cell (usually resolves to P1)
94
+ c = pyxtal()
95
+ c.from_seed(pm_struct, tol=symprec)
96
+ return c, True, str(e)
97
+
98
+
99
+ # ============================== CIF writer ==================================
100
+ def write_cif_legacy(struct, filepath, header, oxidation_states, add_oxidation=True):
101
+ """Write a CIF in the legacy gemmi-style block layout using pyxtal data.
102
+ Cell parameters via struct.lattice.get_para(degree=True); space group via
103
+ struct.group; symmetry ops from the general Wyckoff position
104
+ (struct.group[0]); atom sites from struct.atom_sites. Occupancies are 1.0
105
+ for relaxed (stoichiometric) cells. When add_oxidation is False the
106
+ _atom_type_oxidation_number section is omitted entirely."""
107
+ a, b, c, alpha, beta, gamma = struct.lattice.get_para(degree=True)
108
+ vol = struct.lattice.volume
109
+ hm = struct.group.symbol.replace("_", "").replace(" ", "")
110
+ sg_num = struct.group.number
111
+
112
+ # Atom site rows, sorted by (Z asc, Wyckoff letter), labels enumerated per element
113
+ rows = []
114
+ for site in struct.atom_sites:
115
+ sym = site.specie if isinstance(site.specie, str) else \
116
+ getattr(site.specie, "symbol", str(site.specie))
117
+ x, y, z = (np.asarray(site.position, dtype=float) % 1.0).tolist()
118
+ rows.append((Element(sym).Z, str(site.wp.letter),
119
+ sym, int(site.wp.multiplicity), x, y, z))
120
+ rows.sort(key=lambda r: (r[0], r[1]))
121
+
122
+ counts = {}
123
+ labeled = []
124
+ for _z, wyck, sym, mult, x, y, zc in rows:
125
+ counts[sym] = counts.get(sym, 0) + 1
126
+ labeled.append((f"{sym}{counts[sym]}", sym, mult, wyck, x, y, zc))
127
+
128
+ # Symmetry operations (no quotes, no spaces) from general Wyckoff position
129
+ symops = [op.as_xyz_str().replace(" ", "") for op in struct.group[0]]
130
+
131
+ # Compose CIF body. Keyword/value pairs align at column 34 (legacy layout).
132
+ out = [f"data_{header}"]
133
+ for key, val in (
134
+ ("_cell_length_a", f"{a:.6f}"),
135
+ ("_cell_length_b", f"{b:.6f}"),
136
+ ("_cell_length_c", f"{c:.6f}"),
137
+ ("_cell_angle_alpha", f"{alpha:.6f}"),
138
+ ("_cell_angle_beta", f"{beta:.6f}"),
139
+ ("_cell_angle_gamma", f"{gamma:.6f}"),
140
+ ("_cell_volume", f"{vol:.4f}"),
141
+ ("_symmetry_space_group_name_H-M", hm),
142
+ ("_symmetry_Int_Tables_number", str(sg_num)),
143
+ ):
144
+ out.append(f"{key:<34}{val}")
145
+
146
+ out.append("")
147
+ out.append("loop_")
148
+ out.append("_symmetry_equiv_pos_site_id")
149
+ out.append("_symmetry_equiv_pos_as_xyz")
150
+ for i, s in enumerate(symops, start=1):
151
+ out.append(f"{i} {s}")
152
+
153
+ if add_oxidation:
154
+ out.append("")
155
+ out.append("loop_")
156
+ out.append("_atom_type_symbol")
157
+ out.append("_atom_type_oxidation_number")
158
+ for label, sym, *_ in labeled:
159
+ out.append(f"{label:<3} {_fmt_ox(oxidation_states.get(sym, 0))}")
160
+
161
+ out.append("")
162
+ out.append("loop_")
163
+ out.append("_atom_site_label")
164
+ out.append("_atom_site_type_symbol")
165
+ out.append("_atom_site_symmetry_multiplicity")
166
+ out.append("_atom_site_Wyckoff_symbol")
167
+ out.append("_atom_site_fract_x")
168
+ out.append("_atom_site_fract_y")
169
+ out.append("_atom_site_fract_z")
170
+ out.append("_atom_site_occupancy")
171
+ for label, sym, mult, wyck, x, y, zc in labeled:
172
+ out.append(f"{label:<3} {sym:<2} {mult} {wyck} "
173
+ f"{x:.6f} {y:.6f} {zc:.6f} 1.000")
174
+
175
+ with open(filepath, "w") as f:
176
+ f.write("\n".join(out) + "\n")
177
+
178
+
179
+ # ============================== driver ======================================
180
+ def is_structure_file(p: Path) -> bool:
181
+ return p.is_file() and (p.suffix.lower() in STRUCT_SUFFIXES
182
+ or p.name in STRUCT_NAMES)
183
+
184
+
185
+ def symmetrize_one(in_path: Path, out_path: Path, symprec, add_oxidation):
186
+ """Symmetrize a single structure file and write the CIF. Returns the
187
+ space-group number, or None on failure."""
188
+ atoms = ase_read(str(in_path))
189
+ c, is_fallback, reason = build_pyxtal_from_atoms(atoms, symprec=symprec)
190
+ header = out_path.stem
191
+ write_cif_legacy(c, str(out_path), header=header,
192
+ oxidation_states=DEFAULT_OXIDATION_STATES,
193
+ add_oxidation=add_oxidation)
194
+ sg_number = int(c.group.number)
195
+ sg_symbol = str(c.group.symbol)
196
+ note = ""
197
+ if is_fallback:
198
+ note = f" [fallback to raw cell: {reason}]"
199
+ print(f" {in_path.name} -> {out_path} "
200
+ f"(SG {sg_symbol} #{sg_number}){note}")
201
+ return sg_number
202
+
203
+
204
+ def add_arguments(parser):
205
+ parser.add_argument("path",
206
+ help="Input structure file, or a folder of structures.")
207
+ parser.add_argument("--symprec", type=float, default=SYMPREC,
208
+ help=f"Symmetry precision (default: {SYMPREC}).")
209
+ parser.add_argument("--add_oxidation", choices=["yes", "no"], default="yes",
210
+ help="Include the oxidation-state section (default: yes).")
211
+ parser.add_argument("-o", "--output", default=None,
212
+ help="Output CIF (file mode) or output folder (folder mode). "
213
+ "Default: <stem>-symm.cif or <folder>-symm/.")
214
+
215
+
216
+ def run(args):
217
+ in_path = Path(args.path)
218
+ add_oxidation = (args.add_oxidation == "yes")
219
+
220
+ if not in_path.exists():
221
+ print(f"Input not found: {in_path}")
222
+ return 1
223
+
224
+ # --- folder mode -------------------------------------------------------
225
+ if in_path.is_dir():
226
+ files = sorted(
227
+ (p for p in in_path.iterdir() if is_structure_file(p)),
228
+ key=lambda p: p.name,
229
+ )
230
+ if not files:
231
+ print(f"No structure files found in {in_path}/. Nothing to do.")
232
+ return 0
233
+
234
+ out_dir = Path(args.output) if args.output \
235
+ else in_path.with_name(in_path.name + "-symm")
236
+ out_dir.mkdir(parents=True, exist_ok=True)
237
+
238
+ print(f"Symmetrizing {len(files)} structure(s) from {in_path}/ "
239
+ f"(symprec={args.symprec}, add_oxidation={args.add_oxidation}) "
240
+ f"-> {out_dir}/")
241
+ n_ok = 0
242
+ for p in files:
243
+ out_path = out_dir / f"{p.stem}-symm.cif"
244
+ try:
245
+ symmetrize_one(p, out_path, args.symprec, add_oxidation)
246
+ n_ok += 1
247
+ except Exception as exc:
248
+ print(f" [error] {p.name}: {exc}")
249
+ print(f"\nWrote {n_ok}/{len(files)} symmetrized CIF(s) to {out_dir}/")
250
+ return 0
251
+
252
+ # --- file mode ---------------------------------------------------------
253
+ out_path = Path(args.output) if args.output \
254
+ else in_path.with_name(in_path.stem + "-symm.cif")
255
+ out_path.parent.mkdir(parents=True, exist_ok=True)
256
+
257
+ print(f"Symmetrizing {in_path} "
258
+ f"(symprec={args.symprec}, add_oxidation={args.add_oxidation}) "
259
+ f"-> {out_path}")
260
+ try:
261
+ symmetrize_one(in_path, out_path, args.symprec, add_oxidation)
262
+ except Exception as exc:
263
+ print(f" [error] {in_path.name}: {exc}")
264
+ return 1
265
+ return 0
266
+
267
+
268
+ if __name__ == "__main__":
269
+ _p = argparse.ArgumentParser(description=__doc__)
270
+ add_arguments(_p)
271
+ raise SystemExit(run(_p.parse_args()))