PyPI - pymdkit - Versions diffs - 1.0.0__py3-none-any.whl - Mend

pymdkit 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

pymdkit/__init__.py +3 -0
pymdkit/commands/__init__.py +1 -0
pymdkit/commands/_fileio.py +96 -0
pymdkit/commands/_vaspset.py +169 -0
pymdkit/commands/add_groups.py +77 -0
pymdkit/commands/compute_ehull.py +230 -0
pymdkit/commands/compute_msd_all_groups.py +224 -0
pymdkit/commands/compute_rmsd.py +149 -0
pymdkit/commands/gather_contcar.py +106 -0
pymdkit/commands/outcar2xyz.py +141 -0
pymdkit/commands/select_candidate.py +119 -0
pymdkit/commands/stru2xyz.py +100 -0
pymdkit/commands/supercell.py +164 -0
pymdkit/commands/symmetrize.py +271 -0
pymdkit/commands/vasp_relax.py +62 -0
pymdkit/commands/vasp_static.py +59 -0
pymdkit/pymdkit_main.py +115 -0
pymdkit-1.0.0.dist-info/METADATA +201 -0
pymdkit-1.0.0.dist-info/RECORD +23 -0
pymdkit-1.0.0.dist-info/WHEEL +5 -0
pymdkit-1.0.0.dist-info/entry_points.txt +2 -0
pymdkit-1.0.0.dist-info/licenses/LICENSE +674 -0
pymdkit-1.0.0.dist-info/top_level.txt +1 -0

pymdkit/commands/select_candidate.py ADDED Viewed

@@ -0,0 +1,119 @@
+#!/usr/bin/env python3
+"""
+select_candidate.py -- select candidate structures from a NEP training set
+based on per-structure energy error against DFT references.
+Inputs  : energy_train.out (col0 = NEP, col1 = DFT, eV/atom),
+          train.xyz        (extended-XYZ trajectory, full-batch order).
+Outputs : energy_rmse.txt           (always)
+          candidate.xyz / candidate_energy.txt
+          accurate.xyz  / accurate_energy.txt
+Energy txt format: 2 columns (DFT, NEP), %.8f.
+xyz files retain Config_type via the ASE extxyz writer.
+Selection is decided by the total RMSE:
+  * total RMSE < --rmse-low   -> all frames are accurate.
+  * --rmse-low <= RMSE <= --rmse-high -> the worst int(N*ratio) frames by
+    |energy error| become candidates (ratio defaults to 0.5, i.e. 50%).
+  * total RMSE > --rmse-high  -> all frames are candidates.
+"""
+import shutil
+import argparse
+import numpy as np
+from ase.io import read, write
+RMSE_LOW  = 5.0    # meV/atom
+RMSE_HIGH = 10.0   # meV/atom
+COMMAND = "select-candidate"
+HELP = "Split a NEP training set into candidate/accurate sets by energy error."
+def write_energy_table(path, dft_vals, nep_vals):
+    np.savetxt(path, np.column_stack([dft_vals, nep_vals]), fmt='%.8f')
+def rmse_meV(nep, dft):
+    return float(np.sqrt(np.mean((nep - dft) ** 2))) * 1000.0
+def write_rmse_file(path, **labelled_values):
+    with open(path, 'w') as f:
+        for label, value in labelled_values.items():
+            f.write(f"RMSE ({label}): {value:.2f} meV/atom\n")
+def add_arguments(parser):
+    parser.add_argument("--energy", default="energy_train.out",
+                        help="Energy file: col0=NEP, col1=DFT, eV/atom (default: energy_train.out).")
+    parser.add_argument("--xyz", default="train.xyz",
+                        help="Training trajectory in extxyz (default: train.xyz).")
+    parser.add_argument("-r", "--ratio", type=float, default=0.5,
+                        help="Fraction selected as candidates in the middle RMSE band, "
+                             "int(N*ratio) worst by energy error (default: 0.5 = 50%%).")
+    parser.add_argument("--rmse-low", type=float, default=RMSE_LOW,
+                        help="Below this total RMSE (meV/atom) all frames are 'accurate'.")
+    parser.add_argument("--rmse-high", type=float, default=RMSE_HIGH,
+                        help="Above this total RMSE (meV/atom) all frames are 'candidate'.")
+def _emit_split(frames, nep, dft, cand_idx, acc_idx, total_rmse):
+    """Write the candidate/accurate xyz + energy tables + rmse summary."""
+    summary = {"total": total_rmse}
+    if len(cand_idx):
+        summary["candidate"] = rmse_meV(nep[cand_idx], dft[cand_idx])
+    if len(acc_idx):
+        summary["accurate"] = rmse_meV(nep[acc_idx], dft[acc_idx])
+    write_rmse_file('energy_rmse.txt', **summary)
+    if len(cand_idx):
+        write('candidate.xyz', [frames[i] for i in cand_idx], format='extxyz')
+        write_energy_table('candidate_energy.txt', dft[cand_idx], nep[cand_idx])
+    if len(acc_idx):
+        write('accurate.xyz', [frames[i] for i in acc_idx], format='extxyz')
+        write_energy_table('accurate_energy.txt', dft[acc_idx], nep[acc_idx])
+    n = len(frames)
+    acc_rmse = summary.get("accurate")
+    print(f"Total: {n} frames, RMSE {total_rmse:.2f} meV/atom")
+    print(f"  candidate: {len(cand_idx)}  |  accurate: {len(acc_idx)}"
+          + (f"  (accurate RMSE {acc_rmse:.2f} meV/atom)" if acc_rmse is not None else ""))
+def run(args):
+    if not (0.0 <= args.ratio <= 1.0):
+        raise SystemExit(f"Error: --ratio must be in [0, 1], got {args.ratio}")
+    energy = np.loadtxt(args.energy)
+    nep, dft = energy[:, 0], energy[:, 1]
+    frames = read(args.xyz, index=':')
+    n = len(frames)
+    abs_err_meV = np.abs(nep - dft) * 1000.0
+    total_rmse  = rmse_meV(nep, dft)
+    order_desc  = np.argsort(-abs_err_meV, kind='stable')  # worst error first
+    if total_rmse < args.rmse_low:
+        print(f"Total RMSE {total_rmse:.2f} < {args.rmse_low} meV/atom -> all accurate.")
+        cand_idx = np.array([], dtype=int)
+        acc_idx  = np.arange(n)
+    elif total_rmse > args.rmse_high:
+        print(f"Total RMSE {total_rmse:.2f} > {args.rmse_high} meV/atom -> all candidate.")
+        cand_idx = np.arange(n)
+        acc_idx  = np.array([], dtype=int)
+    else:
+        n_candidate = int(n * args.ratio)
+        print(f"Total RMSE {total_rmse:.2f} in [{args.rmse_low}, {args.rmse_high}] "
+              f"meV/atom -> worst {args.ratio:.0%} = {n_candidate}/{n} as candidate.")
+        cand_idx = np.sort(order_desc[:n_candidate])
+        acc_idx  = np.sort(order_desc[n_candidate:])
+    _emit_split(frames, nep, dft, cand_idx, acc_idx, total_rmse)
+    return 0
+if __name__ == '__main__':
+    _p = argparse.ArgumentParser(description=__doc__)
+    add_arguments(_p)
+    raise SystemExit(run(_p.parse_args()))

pymdkit/commands/stru2xyz.py ADDED Viewed

@@ -0,0 +1,100 @@
+"""
+Convert structure files of any ASE-readable format into extxyz.
+Single file:  stru2xyz -i file1.vasp -o file1.xyz
+Whole folder: stru2xyz -if vasp-opted -of extxyz-opted
+Scan mode:    stru2xyz
+              -> scans sub-folders of the current dir (like `ehull`) for
+                 structure files (.vasp/.cif/...) and writes <name>.xyz next to
+                 each one, in the same sub-folder.
+              -> with -o model.xyz, every converted file is named model.xyz.
+Output is structure-only extxyz: the Lattice is written with 8-decimal
+precision and any momenta/velocities are dropped.
+"""
+import argparse
+import re
+from pathlib import Path
+from ase.io import read, write
+try:
+    from . import _fileio
+except ImportError:  # running as a standalone script
+    import _fileio
+COMMAND = "stru2xyz"
+HELP = "Convert any structure format(s) to extxyz (file, folder, or scan)."
+# In scan mode, don't reprocess files that are already xyz.
+_SCAN_EXTS = _fileio.STRUCT_EXTS - {".xyz", ".extxyz"}
+def _reformat_lattice(text):
+    """Rewrite every Lattice="..." value with fixed 8-decimal precision."""
+    def repl(m):
+        nums = " ".join(f"{float(x):.8f}" for x in m.group(1).split())
+        return f'Lattice="{nums}"'
+    return re.sub(r'Lattice="([^"]*)"', repl, text)
+def convert(in_path, out_path):
+    out_path = Path(out_path)
+    out_path.parent.mkdir(parents=True, exist_ok=True)
+    atoms = read(str(in_path))
+    # Drop momenta/velocities (e.g. carried in from a CONTCAR/POSCAR) so the
+    # output is a clean structure-only extxyz.
+    atoms.arrays.pop("momenta", None)
+    write(str(out_path), atoms, format="extxyz")
+    # ASE has no lattice-precision option; reformat the Lattice to %.8f.
+    out_path.write_text(_reformat_lattice(out_path.read_text()))
+    print(f"{in_path} -> {out_path}")
+def add_arguments(parser):
+    _fileio.add_io_arguments(
+        parser,
+        single_output_help="Output .xyz file (single mode), or output name "
+                           "to use everywhere in scan mode (e.g. model.xyz).")
+def run(args):
+    count = 0
+    # Single file.
+    if args.input:
+        out = Path(args.output) if args.output else Path(args.input).with_suffix(".xyz")
+        convert(args.input, out)
+        return 0
+    # Whole folder -> output folder (same logic as supercell batch mode).
+    if args.input_folder:
+        in_dir = Path(args.input_folder)
+        out_dir = (Path(args.output_folder) if args.output_folder
+                   else in_dir.with_name(in_dir.name + "-xyz"))
+        for f in _fileio.structure_files(in_dir):
+            convert(f, out_dir / (f.stem + ".xyz"))
+            count += 1
+        print(f"Done: converted {count} file(s) -> {out_dir}/")
+        return 0
+    # Scan mode: walk sub-folders, convert structure files in place.
+    for sub in _fileio.subfolders("."):
+        for f in sorted(sub.iterdir()):
+            if not _fileio.is_structure(f, _SCAN_EXTS):
+                continue
+            out_name = args.output if args.output else f.stem + ".xyz"
+            convert(f, sub / out_name)
+            count += 1
+    if count == 0:
+        print("No structure files found in sub-folders. Nothing to do.")
+    else:
+        print(f"Done: converted {count} file(s) across sub-folders.")
+    return 0
+if __name__ == "__main__":
+    _p = argparse.ArgumentParser(description=__doc__)
+    add_arguments(_p)
+    raise SystemExit(run(_p.parse_args()))

pymdkit/commands/supercell.py ADDED Viewed

@@ -0,0 +1,164 @@
+"""
+Build a supercell whose cell lengths stay within a maximum (-max-abc).
+For each lattice vector the structure is repeated as many times as possible
+without its length exceeding ``-max-abc`` (in Angstrom):
+``n_i = max(1, floor(max_abc / |a_i|))``.
+Single file:  supercell -i example.vasp -o out.vasp -max-abc 20
+Whole folder: supercell -if vasp-opted -of out -max-abc 20
+Per-folder:   supercell -if vasp-opted -max-abc 20 -individual
+              -> creates ./<name>/<name>.<ext> for every structure in vasp-opted
+GPUMD setup:  supercell -if extxyz-vasp-opted -max-abc 24 -individual \\
+                        -temp 500 600 -md-if input-files
+              -> ./<name>/model.xyz plus one ./<name>/<T>/ folder per
+                 temperature, each seeded with model.xyz + the shared GPUMD
+                 input files (target_t in run.in replaced by the temperature).
+"""
+import argparse
+import shutil
+from pathlib import Path
+from ase.io import read, write
+try:
+    from . import _fileio
+except ImportError:  # running as a standalone script
+    import _fileio
+COMMAND = "supercell"
+HELP = "Build a supercell with cell lengths capped at -max-abc Angstrom."
+def repetitions_for(atoms, max_abc):
+    """Per-axis repeat counts so each cell length stays <= max_abc."""
+    reps = []
+    for L in atoms.cell.lengths():
+        reps.append(max(1, int(max_abc // L)) if L > 0 else 1)
+    return reps
+def make_supercell(in_path, out_path, max_abc):
+    atoms = read(str(in_path))
+    reps = repetitions_for(atoms, max_abc)
+    sc = atoms.repeat(reps)
+    out_path = Path(out_path)
+    out_path.parent.mkdir(parents=True, exist_ok=True)
+    write(str(out_path), sc)
+    a, b, c = sc.cell.lengths()
+    print(f"{in_path} ({len(atoms)} atoms) x{reps[0]}{reps[1]}{reps[2]} "
+          f"-> {out_path} ({len(sc)} atoms, abc={a:.2f},{b:.2f},{c:.2f})")
+    return sc
+def setup_temp_folders(struct_folder, temps, md_input_folder):
+    """Create one ./<T>/ folder per temperature inside *struct_folder*, copy
+    model.xyz + the shared GPUMD input files into it, and replace the
+    'target_t' placeholder in run.in with the temperature."""
+    model = struct_folder / "model.xyz"
+    md_dir = Path(md_input_folder)
+    for temp in temps:
+        tdir = struct_folder / str(temp)
+        tdir.mkdir(parents=True, exist_ok=True)
+        shutil.copy2(model, tdir / "model.xyz")
+        if md_dir.is_dir():
+            for src in md_dir.iterdir():
+                if src.is_file():
+                    shutil.copy2(src, tdir / src.name)
+        else:
+            print(f"  [WARN] md input folder '{md_dir}' not found")
+        run_in = tdir / "run.in"
+        if run_in.is_file():
+            run_in.write_text(run_in.read_text().replace("target_t", str(temp)))
+        else:
+            print(f"  [WARN] run.in not found in {tdir}")
+        print(f"  temp folder: {tdir} (target_t -> {temp})")
+def _folder_name(stem):
+    """Folder name for a structure: drop a trailing '-opted' if present."""
+    return stem[:-6] if stem.endswith("-opted") else stem
+def add_arguments(parser):
+    _fileio.add_io_arguments(
+        parser, single_output_help="Output file for single-file mode.")
+    parser.add_argument("-max-abc", "--max-abc", dest="max_abc", type=float,
+                        required=True, metavar="L",
+                        help="Maximum cell length a/b/c in Angstrom.")
+    parser.add_argument("-individual", "--individual", action="store_true",
+                        help="Folder mode: write ./<name>/<name>.<ext> per structure "
+                             "(into --output-folder if given, else current dir).")
+    parser.add_argument("-temp", "--temp", dest="temp", nargs="+", type=int,
+                        default=[], metavar="T",
+                        help="Temperatures (K). For each structure, write "
+                             "./<name>/model.xyz and one ./<name>/<T>/ GPUMD job "
+                             "folder per temperature.")
+    parser.add_argument("-md-if", "--md-input-folder", dest="md_input_folder",
+                        default="input-files", metavar="DIR",
+                        help="Folder with shared GPUMD input files (run.in, "
+                             "sub_gpumd*, potentials; default: input-files).")
+def _run_gpumd(args):
+    """GPUMD setup: ./<name>/model.xyz + per-temperature job folders."""
+    if not args.input_folder:
+        raise SystemExit("Error: -temp requires -if/--input-folder.")
+    count = 0
+    for f in _fileio.structure_files(args.input_folder):
+        folder = Path(_folder_name(f.stem))
+        folder.mkdir(parents=True, exist_ok=True)
+        make_supercell(f, folder / "model.xyz", args.max_abc)
+        setup_temp_folders(folder, args.temp, args.md_input_folder)
+        count += 1
+    print(f"Done: set up {count} structure(s) with temperatures {args.temp}.")
+    return 0
+def run(args):
+    count = 0
+    # GPUMD per-temperature setup (model.xyz + ./<name>/<T>/ folders).
+    if args.temp:
+        return _run_gpumd(args)
+    # Per-folder "individual" mode: one sub-folder per input structure.
+    if args.individual:
+        if not args.input_folder:
+            raise SystemExit("Error: -individual requires -if/--input-folder.")
+        parent = Path(args.output_folder) if args.output_folder else Path(".")
+        for f in _fileio.structure_files(args.input_folder):
+            out_dir = parent / f.stem
+            make_supercell(f, out_dir / f.name, args.max_abc)
+            count += 1
+        print(f"Done: {count} supercell(s) in per-structure folders.")
+        return 0
+    # Single (-i/-o) or batch (-if/-of); preserve each input's file extension.
+    if args.input:
+        out = (Path(args.output) if args.output
+               else Path(args.input).with_name(Path(args.input).stem + "-sc"
+                                                + Path(args.input).suffix))
+        make_supercell(args.input, out, args.max_abc)
+        return 0
+    if args.input_folder:
+        in_dir = Path(args.input_folder)
+        out_dir = (Path(args.output_folder) if args.output_folder
+                   else in_dir.with_name(in_dir.name + "-out"))
+        for f in _fileio.structure_files(in_dir):
+            make_supercell(f, out_dir / f.name, args.max_abc)
+            count += 1
+        print(f"Done: built {count} supercell(s).")
+        return 0
+    raise SystemExit("Error: provide -i (single file) or -if (folder).")
+if __name__ == "__main__":
+    _p = argparse.ArgumentParser(description=__doc__)
+    add_arguments(_p)
+    raise SystemExit(run(_p.parse_args()))

pymdkit/commands/symmetrize.py ADDED Viewed

@@ -0,0 +1,271 @@
+"""
+symmetrize.py
+Import space-group symmetry into a structure (or a folder of structures) using
+pyxtal.util.symmetrize + pyxtal, and write the idealized, standardized cell to a
+CIF in the legacy block layout.
+Usage:
+    pymdkit symmetrize file   --symprec 0.01 --add_oxidation yes -o file-symm.cif
+    pymdkit symmetrize folder --symprec 0.01 --add_oxidation no  -o folder-symm
+Behaviour:
+    * file input  : symmetrize the single structure; write one CIF
+                    (default output: <stem>-symm.cif).
+    * folder input: symmetrize every structure file in the folder; write one CIF
+                    per structure into the output directory
+                    (default output: <folder>-symm/).
+    * --add_oxidation yes : include the _atom_type_oxidation_number loop.
+    * --add_oxidation no  : omit the oxidation-state section entirely.
+The written CIF uses the same block order as upet-opt-rm-duplicate.py:
+    cell -> symmetry ops -> (oxidation states) -> atom sites.
+"""
+from __future__ import annotations
+import argparse
+import warnings
+from pathlib import Path
+import numpy as np
+warnings.filterwarnings("ignore",
+    message=r"logm result may be inaccurate.*",
+    category=RuntimeWarning)
+from ase.io import read as ase_read
+from pymatgen.core import Structure, Lattice, Element
+from pyxtal import pyxtal
+from pyxtal.util import symmetrize        # replaces SpacegroupAnalyzer
+COMMAND = "symmetrize"
+HELP = "Import space-group symmetry into a structure file (or folder) -> CIF."
+SYMPREC = 0.01
+# Recognised structure-file extensions / names for folder mode.
+STRUCT_SUFFIXES = {".cif", ".vasp", ".poscar", ".contcar", ".xyz", ".json"}
+STRUCT_NAMES    = {"POSCAR", "CONTCAR"}
+DEFAULT_OXIDATION_STATES = {
+    "Li": 1, "Na": 1, "K": 1, "Rb": 1, "Cs": 1,
+    "Be": 2, "Mg": 2, "Ca": 2, "Sr": 2, "Ba": 2,
+    "Sc": 3, "Y": 3, "La": 3, "Ce": 3, "Pr": 3, "Nd": 3, "Sm": 3,
+    "Eu": 3, "Gd": 3, "Tb": 3, "Dy": 3, "Ho": 3, "Er": 3, "Tm": 3,
+    "Yb": 3, "Lu": 3,
+    "Ti": 4, "Zr": 4, "Hf": 4,
+    "V": 5, "Nb": 5, "Ta": 5,
+    "Cr": 3, "Mo": 6, "W": 6,
+    "Mn": 2, "Fe": 3, "Co": 2, "Ni": 2, "Cu": 2, "Zn": 2,
+    "Al": 3, "Ga": 3, "In": 3, "Tl": 1,
+    "Si": 4, "Ge": 4, "Sn": 4, "Pb": 2,
+    "Sb": 3, "Bi": 3,
+    "H": 1, "B": 3, "C": 4, "N": -3,
+    "O": -2, "S": -2, "Se": -2, "Te": -2,
+    "F": -1, "Cl": -1, "Br": -1, "I": -1,
+}
+def _fmt_ox(v):
+    v = int(v)
+    return f"+{v}" if v > 0 else (str(v) if v < 0 else "0")
+# ====================== Symmetrize + load into pyxtal ========================
+def build_pyxtal_from_atoms(atoms, symprec=SYMPREC):
+    """ASE Atoms -> pyxtal struct via pyxtal.util.symmetrize + from_seed.
+    Falls back to loading the raw cell (typically P1) if symmetrization raises."""
+    lattice = np.array(atoms.get_cell(), dtype=float)
+    positions = np.array(atoms.get_scaled_positions(wrap=True), dtype=float)
+    symbols = list(atoms.get_chemical_symbols())
+    pm_struct = Structure(Lattice(lattice), symbols, positions,
+                          coords_are_cartesian=False)
+    try:
+        # symmetrize returns (refined Structure, hall_number); take the structure.
+        # (defaults: tol=1e-3, a_tol=5.0, style='pyxtal')
+        refined, _hall = symmetrize(pm_struct, tol=symprec)
+        c = pyxtal()
+        c.from_seed(refined, tol=symprec)
+        return c, False, ""
+    except Exception as e:
+        # Fallback: load raw cell (usually resolves to P1)
+        c = pyxtal()
+        c.from_seed(pm_struct, tol=symprec)
+        return c, True, str(e)
+# ============================== CIF writer ==================================
+def write_cif_legacy(struct, filepath, header, oxidation_states, add_oxidation=True):
+    """Write a CIF in the legacy gemmi-style block layout using pyxtal data.
+    Cell parameters via struct.lattice.get_para(degree=True); space group via
+    struct.group; symmetry ops from the general Wyckoff position
+    (struct.group[0]); atom sites from struct.atom_sites. Occupancies are 1.0
+    for relaxed (stoichiometric) cells. When add_oxidation is False the
+    _atom_type_oxidation_number section is omitted entirely."""
+    a, b, c, alpha, beta, gamma = struct.lattice.get_para(degree=True)
+    vol = struct.lattice.volume
+    hm = struct.group.symbol.replace("_", "").replace(" ", "")
+    sg_num = struct.group.number
+    # Atom site rows, sorted by (Z asc, Wyckoff letter), labels enumerated per element
+    rows = []
+    for site in struct.atom_sites:
+        sym = site.specie if isinstance(site.specie, str) else \
+            getattr(site.specie, "symbol", str(site.specie))
+        x, y, z = (np.asarray(site.position, dtype=float) % 1.0).tolist()
+        rows.append((Element(sym).Z, str(site.wp.letter),
+                     sym, int(site.wp.multiplicity), x, y, z))
+    rows.sort(key=lambda r: (r[0], r[1]))
+    counts = {}
+    labeled = []
+    for _z, wyck, sym, mult, x, y, zc in rows:
+        counts[sym] = counts.get(sym, 0) + 1
+        labeled.append((f"{sym}{counts[sym]}", sym, mult, wyck, x, y, zc))
+    # Symmetry operations (no quotes, no spaces) from general Wyckoff position
+    symops = [op.as_xyz_str().replace(" ", "") for op in struct.group[0]]
+    # Compose CIF body. Keyword/value pairs align at column 34 (legacy layout).
+    out = [f"data_{header}"]
+    for key, val in (
+        ("_cell_length_a",    f"{a:.6f}"),
+        ("_cell_length_b",    f"{b:.6f}"),
+        ("_cell_length_c",    f"{c:.6f}"),
+        ("_cell_angle_alpha", f"{alpha:.6f}"),
+        ("_cell_angle_beta",  f"{beta:.6f}"),
+        ("_cell_angle_gamma", f"{gamma:.6f}"),
+        ("_cell_volume",      f"{vol:.4f}"),
+        ("_symmetry_space_group_name_H-M", hm),
+        ("_symmetry_Int_Tables_number",    str(sg_num)),
+    ):
+        out.append(f"{key:<34}{val}")
+    out.append("")
+    out.append("loop_")
+    out.append("_symmetry_equiv_pos_site_id")
+    out.append("_symmetry_equiv_pos_as_xyz")
+    for i, s in enumerate(symops, start=1):
+        out.append(f"{i} {s}")
+    if add_oxidation:
+        out.append("")
+        out.append("loop_")
+        out.append("_atom_type_symbol")
+        out.append("_atom_type_oxidation_number")
+        for label, sym, *_ in labeled:
+            out.append(f"{label:<3} {_fmt_ox(oxidation_states.get(sym, 0))}")
+    out.append("")
+    out.append("loop_")
+    out.append("_atom_site_label")
+    out.append("_atom_site_type_symbol")
+    out.append("_atom_site_symmetry_multiplicity")
+    out.append("_atom_site_Wyckoff_symbol")
+    out.append("_atom_site_fract_x")
+    out.append("_atom_site_fract_y")
+    out.append("_atom_site_fract_z")
+    out.append("_atom_site_occupancy")
+    for label, sym, mult, wyck, x, y, zc in labeled:
+        out.append(f"{label:<3} {sym:<2} {mult} {wyck} "
+                   f"{x:.6f} {y:.6f} {zc:.6f} 1.000")
+    with open(filepath, "w") as f:
+        f.write("\n".join(out) + "\n")
+# ============================== driver ======================================
+def is_structure_file(p: Path) -> bool:
+    return p.is_file() and (p.suffix.lower() in STRUCT_SUFFIXES
+                            or p.name in STRUCT_NAMES)
+def symmetrize_one(in_path: Path, out_path: Path, symprec, add_oxidation):
+    """Symmetrize a single structure file and write the CIF. Returns the
+    space-group number, or None on failure."""
+    atoms = ase_read(str(in_path))
+    c, is_fallback, reason = build_pyxtal_from_atoms(atoms, symprec=symprec)
+    header = out_path.stem
+    write_cif_legacy(c, str(out_path), header=header,
+                     oxidation_states=DEFAULT_OXIDATION_STATES,
+                     add_oxidation=add_oxidation)
+    sg_number = int(c.group.number)
+    sg_symbol = str(c.group.symbol)
+    note = ""
+    if is_fallback:
+        note = f"  [fallback to raw cell: {reason}]"
+    print(f"  {in_path.name}  ->  {out_path}  "
+          f"(SG {sg_symbol} #{sg_number}){note}")
+    return sg_number
+def add_arguments(parser):
+    parser.add_argument("path",
+                        help="Input structure file, or a folder of structures.")
+    parser.add_argument("--symprec", type=float, default=SYMPREC,
+                        help=f"Symmetry precision (default: {SYMPREC}).")
+    parser.add_argument("--add_oxidation", choices=["yes", "no"], default="yes",
+                        help="Include the oxidation-state section (default: yes).")
+    parser.add_argument("-o", "--output", default=None,
+                        help="Output CIF (file mode) or output folder (folder mode). "
+                             "Default: <stem>-symm.cif or <folder>-symm/.")
+def run(args):
+    in_path = Path(args.path)
+    add_oxidation = (args.add_oxidation == "yes")
+    if not in_path.exists():
+        print(f"Input not found: {in_path}")
+        return 1
+    # --- folder mode -------------------------------------------------------
+    if in_path.is_dir():
+        files = sorted(
+            (p for p in in_path.iterdir() if is_structure_file(p)),
+            key=lambda p: p.name,
+        )
+        if not files:
+            print(f"No structure files found in {in_path}/. Nothing to do.")
+            return 0
+        out_dir = Path(args.output) if args.output \
+            else in_path.with_name(in_path.name + "-symm")
+        out_dir.mkdir(parents=True, exist_ok=True)
+        print(f"Symmetrizing {len(files)} structure(s) from {in_path}/ "
+              f"(symprec={args.symprec}, add_oxidation={args.add_oxidation}) "
+              f"-> {out_dir}/")
+        n_ok = 0
+        for p in files:
+            out_path = out_dir / f"{p.stem}-symm.cif"
+            try:
+                symmetrize_one(p, out_path, args.symprec, add_oxidation)
+                n_ok += 1
+            except Exception as exc:
+                print(f"  [error] {p.name}: {exc}")
+        print(f"\nWrote {n_ok}/{len(files)} symmetrized CIF(s) to {out_dir}/")
+        return 0
+    # --- file mode ---------------------------------------------------------
+    out_path = Path(args.output) if args.output \
+        else in_path.with_name(in_path.stem + "-symm.cif")
+    out_path.parent.mkdir(parents=True, exist_ok=True)
+    print(f"Symmetrizing {in_path} "
+          f"(symprec={args.symprec}, add_oxidation={args.add_oxidation}) "
+          f"-> {out_path}")
+    try:
+        symmetrize_one(in_path, out_path, args.symprec, add_oxidation)
+    except Exception as exc:
+        print(f"  [error] {in_path.name}: {exc}")
+        return 1
+    return 0
+if __name__ == "__main__":
+    _p = argparse.ArgumentParser(description=__doc__)
+    add_arguments(_p)
+    raise SystemExit(run(_p.parse_args()))