PyPI - cellify - Versions diffs - 0.1.2__py3-none-any.whl - Mend

cellify 0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

cellify/__init__.py +6 -0
cellify/adapters/__init__.py +26 -0
cellify/adapters/base.py +36 -0
cellify/adapters/espresso.py +124 -0
cellify/adapters/standard.py +27 -0
cellify/cli.py +245 -0
cellify/core.py +271 -0
cellify-0.1.2.dist-info/METADATA +202 -0
cellify-0.1.2.dist-info/RECORD +11 -0
cellify-0.1.2.dist-info/WHEEL +4 -0
cellify-0.1.2.dist-info/entry_points.txt +2 -0

cellify/__init__.py ADDED Viewed

@@ -0,0 +1,6 @@
+"""
+cellify package.
+A friendly DFT helper CLI for generating supercells and calculation-ready inputs.
+"""
+__version__ = "0.1.2"

cellify/adapters/__init__.py ADDED Viewed

@@ -0,0 +1,26 @@
+"""
+I/O Adapters package for cellify.
+"""
+from cellify.adapters.base import BaseAdapter
+from cellify.adapters.espresso import EspressoAdapter
+from cellify.adapters.standard import StandardAdapter
+__all__ = ["BaseAdapter", "EspressoAdapter", "StandardAdapter"]
+def get_adapter(filepath: str) -> BaseAdapter:
+    """
+    Returns an appropriate I/O adapter object based on the filepath or extension.
+    """
+    lower_path: str = filepath.lower()
+    # Check if the file is a Quantum ESPRESSO input file
+    is_qe: bool = (
+        any(lower_path.endswith(ext) for ext in [".in", ".qe", ".pwi"])
+        or "qe" in lower_path
+        or "espresso" in lower_path
+    )
+    if is_qe:
+        return EspressoAdapter()
+    return StandardAdapter()

cellify/adapters/base.py ADDED Viewed

@@ -0,0 +1,36 @@
+from abc import ABC, abstractmethod
+from typing import Any, Dict, Tuple
+from pymatgen.core import Structure
+class BaseAdapter(ABC):
+    """
+    Abstract base class for structure file I/O supported by cellify.
+    Parameter-preserving and software-specific output adapters should inherit this class.
+    """
+    @abstractmethod
+    def read(self, filepath: str) -> Tuple[Structure, Dict[str, Any]]:
+        """
+        Loads a structure file and returns the structure object along with metadata.
+        Args:
+            filepath (str): Path to the input file.
+        Returns:
+            Tuple[Structure, Dict[str, Any]]: A tuple of the pymatgen Structure object and a metadata dictionary.
+        """
+    @abstractmethod
+    def write(
+        self, filepath: str, structure: Structure, meta_data: Dict[str, Any]
+    ) -> None:
+        """
+        Writes the structure to the specified path while preserving original metadata.
+        Args:
+            filepath (str): Path to the output file.
+            structure (Structure): The modified/supercell Structure object.
+            meta_data (Dict[str, Any]): Metadata retrieved during the read phase.
+        """

cellify/adapters/espresso.py ADDED Viewed

@@ -0,0 +1,124 @@
+"""
+Quantum ESPRESSO input/output adapter for cellify.
+"""
+import os
+import re
+from typing import Any, Dict, Tuple
+from pymatgen.core import Structure
+from cellify.adapters.base import BaseAdapter
+class EspressoAdapter(BaseAdapter):
+    """
+    Quantum ESPRESSO input file adapter.
+    Preserves calculation parameters (&CONTROL, &SYSTEM, etc.) and comment lines,
+    while automatically updating nat/ntyp and replacing structure sections.
+    """
+    def read(self, filepath: str) -> Tuple[Structure, Dict[str, Any]]:
+        if not os.path.exists(filepath):
+            raise FileNotFoundError(f"Input file not found: {filepath}")
+        with open(filepath, "r", encoding="utf-8") as f:
+            content: str = f.read()
+        # Safely parse structure using ASE espresso-in reader
+        try:
+            # pylint: disable=import-outside-toplevel
+            from ase.io import read as ase_read
+            from pymatgen.io.ase import AseAtomsAdaptor
+            atoms = ase_read(filepath, format="espresso-in")
+            structure: Structure = AseAtomsAdaptor.get_structure(atoms)
+        except Exception as ase_err:
+            raise ValueError(
+                f"Failed to parse structure from Quantum ESPRESSO file: {ase_err}"
+            ) from ase_err
+        meta_data: Dict[str, Any] = {
+            "mode": "espresso_text_replace",
+            "content": content,
+            "filepath": filepath,
+        }
+        return structure, meta_data
+    def write(
+        self, filepath: str, structure: Structure, meta_data: Dict[str, Any]
+    ) -> None:
+        content: str = meta_data["content"]
+        # 1. Calculate new nat and ntyp
+        nat_new: int = len(structure)
+        ntyp_new: int = len(structure.composition.elements)
+        # 2. Update nat and ntyp inside namelists
+        content = re.sub(
+            r"(\bnat\s*=\s*)\d+", r"\g<1>" + str(nat_new), content, flags=re.IGNORECASE
+        )
+        content = re.sub(
+            r"(\bntyp\s*=\s*)\d+",
+            r"\g<1>" + str(ntyp_new),
+            content,
+            flags=re.IGNORECASE,
+        )
+        # 3. Strip old structure-related blocks from text
+        cleaned_content: str = content
+        struct_keywords = ["ATOMIC_SPECIES", "CELL_PARAMETERS", "ATOMIC_POSITIONS"]
+        for kw in struct_keywords:
+            pattern = (
+                r"(?i)^\s*"
+                + kw
+                + r"\b.*?(?=\n\s*(?:ATOMIC_SPECIES|CELL_PARAMETERS|ATOMIC_POSITIONS|K_POINTS|KPOINTS|&[A-Za-z]+)|\Z)"
+            )
+            cleaned_content = re.sub(
+                pattern, "", cleaned_content, flags=re.DOTALL | re.MULTILINE
+            )
+        # Clean extra leading/trailing whitespaces
+        cleaned_content = cleaned_content.strip() + "\n\n"
+        # 4. Extract existing pseudopotential information from the original file
+        pseudos: Dict[str, Any] = {}
+        species_match = re.search(
+            r"(?i)ATOMIC_SPECIES\s*\n(.*?)(?=\n\s*(?:ATOMIC_|CELL_|K_POINTS|KPOINTS|&[A-Za-z]+)|\Z)",
+            content,
+            re.DOTALL,
+        )
+        if species_match:
+            for line in species_match.group(1).strip().split("\n"):
+                parts = line.split()
+                if len(parts) >= 3:
+                    pseudos[parts[0]] = (parts[1], parts[2])
+        # 5. Reconstruct structure blocks
+        # ATOMIC_SPECIES
+        species_str: str = "ATOMIC_SPECIES\n"
+        for el in structure.composition.elements:
+            el_symbol: str = el.symbol
+            mass, pseudo = pseudos.get(
+                el_symbol, (str(el.atomic_mass), f"{el_symbol}.UPF")
+            )
+            species_str += f"  {el_symbol}  {mass}  {pseudo}\n"
+        # CELL_PARAMETERS
+        cell_str: str = "\nCELL_PARAMETERS angstrom\n"
+        for vec in structure.lattice.matrix:
+            cell_str += f"  {vec[0]:.10f}  {vec[1]:.10f}  {vec[2]:.10f}\n"
+        # ATOMIC_POSITIONS
+        pos_str: str = "\nATOMIC_POSITIONS crystal\n"
+        for site in structure:
+            pos_str += (
+                f"  {site.specie.symbol}  {site.a:.10f}  {site.b:.10f}  {site.c:.10f}\n"
+            )
+        # 6. Save file
+        with open(filepath, "w", encoding="utf-8") as f:
+            f.write(cleaned_content)
+            f.write(species_str)
+            f.write(cell_str)
+            f.write(pos_str)

cellify/adapters/standard.py ADDED Viewed

@@ -0,0 +1,27 @@
+"""
+Standard file format adapter for cellify (VASP POSCAR, CIF, XYZ, etc.).
+"""
+from typing import Any, Dict, Tuple
+from pymatgen.core import Structure
+from cellify.adapters.base import BaseAdapter
+class StandardAdapter(BaseAdapter):
+    """
+    Standard structure file adapter for formats like VASP (POSCAR), CIF, XYZ, etc.
+    Does not perform parameter-preserving text replacements, and uses pymatgen's
+    default I/O functionalities.
+    """
+    def read(self, filepath: str) -> Tuple[Structure, Dict[str, Any]]:
+        struct: Structure = Structure.from_file(filepath)
+        meta_data: Dict[str, Any] = {"mode": "standard", "filepath": filepath}
+        return struct, meta_data
+    def write(
+        self, filepath: str, structure: Structure, meta_data: Dict[str, Any]
+    ) -> None:
+        structure.to(filename=filepath)

cellify/cli.py ADDED Viewed

@@ -0,0 +1,245 @@
+"""
+Command-line interface (CLI) for cellify.
+Handles arg parsing, workflow orchestration, and user output reporting.
+"""
+import argparse
+import os
+import sys
+from typing import List, Optional, cast
+import numpy as np
+from pymatgen.core import Structure
+from cellify import __version__
+from cellify.core import (
+    apply_substitutions,
+    apply_vacancies,
+    calculate_min_dist_scaling,
+    convert_to_conventional,
+    generate_surface_slab,
+    load_structure_file,
+    parse_matrix_string,
+    save_structure_file,
+)
+def parse_args(args: Optional[List[str]] = None) -> argparse.Namespace:
+    """
+    Parses command-line arguments.
+    """
+    parser = argparse.ArgumentParser(
+        description="cellify: A friendly DFT helper CLI for generating supercells and calculation-ready inputs."
+    )
+    parser.add_argument(
+        "-v", "--version", action="version", version=f"cellify {__version__}"
+    )
+    # I/O options
+    parser.add_argument(
+        "-i",
+        "--input",
+        required=True,
+        help="Input structure file path (e.g. POSCAR, input.cif, qe.in)",
+    )
+    parser.add_argument(
+        "-o",
+        "--output",
+        help="Output structure file path (default: <input_base>_supercell.<ext>)",
+    )
+    # Supercell options
+    group = parser.add_mutually_exclusive_group()
+    group.add_argument(
+        "-d",
+        "--dim",
+        nargs=3,
+        type=int,
+        metavar=("nx", "ny", "nz"),
+        help="Diagonal scaling factors for the supercell (e.g., -d 2 2 2)",
+    )
+    group.add_argument(
+        "-m",
+        "--matrix",
+        help="3x3 transformation matrix. Specify as 'r11 r12 r13 / r21 r22 r23 / r31 r32 r33'",
+    )
+    group.add_argument(
+        "--min-dist",
+        type=float,
+        metavar="DISTANCE",
+        help="Automatically generate a supercell where the minimum distance between periodic images is >= DISTANCE (in Angstroms)",
+    )
+    parser.add_argument(
+        "--conventional",
+        action="store_true",
+        help="Automatically convert the input structure to its standard conventional representation before applying other operations.",
+    )
+    # Doping / Defect options
+    parser.add_argument(
+        "--substitute",
+        action="append",
+        default=[],
+        help="Substitution rule: 'element:target_element:index_or_percentage' (e.g., 'Si:P:0' or 'Si:Al:5%%')",
+    )
+    parser.add_argument(
+        "--vacancy",
+        action="append",
+        default=[],
+        help="Vacancy rule: 'element:index_or_count' (e.g., 'Si:0' or 'O:2')",
+    )
+    # Slab options
+    parser.add_argument(
+        "--slab",
+        nargs=3,
+        type=int,
+        metavar=("h", "k", "l"),
+        help="Miller indices for surface slab generation (e.g., --slab 1 0 0)",
+    )
+    parser.add_argument(
+        "--thick", type=float, help="Slab thickness (in Angstroms or layers)"
+    )
+    parser.add_argument(
+        "--vacuum", type=float, help="Vacuum layer thickness (in Angstroms)"
+    )
+    return parser.parse_args(args)
+def _print_structure_summary(structure: Structure, label: str = "") -> None:
+    """
+    Prints a formatted summary of the structure.
+    """
+    if label:
+        print(f"\n{label}")
+    print(f"  Formula: {structure.composition.reduced_formula}")
+    print(f"  Volume:  {structure.volume:.3f} A^3")
+    print(f"  Number of atoms: {len(structure)}")
+    if label:
+        print("  Lattice constants:")
+        print(
+            f"    a = {structure.lattice.a:.4f} A, b = {structure.lattice.b:.4f} A, c = {structure.lattice.c:.4f} A"
+        )
+        print(
+            f"    alpha = {structure.lattice.alpha:.2f} deg, beta = {structure.lattice.beta:.2f} deg, gamma = {structure.lattice.gamma:.2f} deg"
+        )
+def _apply_supercell(structure: Structure, args: argparse.Namespace) -> None:
+    """
+    Applies supercell generation options to the structure.
+    """
+    if args.dim:
+        print(f"Generating supercell with diagonal scaling: {args.dim}")
+        structure.make_supercell(args.dim)
+    elif args.matrix:
+        try:
+            matrix: np.ndarray = parse_matrix_string(args.matrix)
+            print(f"Generating supercell with matrix:\n{matrix}")
+            structure.make_supercell(matrix)
+        except Exception as e:  # pylint: disable=broad-exception-caught
+            print(f"Error parsing matrix: {e}", file=sys.stderr)
+            sys.exit(1)
+    elif args.min_dist:
+        nx, ny, nz = calculate_min_dist_scaling(structure, args.min_dist)
+        print(
+            f"Calculated scaling for minimum distance >= {args.min_dist} A: [{nx}, {ny}, {nz}]"
+        )
+        structure.make_supercell([nx, ny, nz])
+def _apply_defects_and_slab(
+    structure: Structure, args: argparse.Namespace
+) -> Structure:
+    """
+    Applies substitutions, vacancies, and surface slab options to the structure.
+    """
+    if args.substitute:
+        try:
+            apply_substitutions(structure, args.substitute)
+        except Exception as e:  # pylint: disable=broad-exception-caught
+            print(f"Error applying substitutions: {e}", file=sys.stderr)
+            sys.exit(1)
+    if args.vacancy:
+        try:
+            apply_vacancies(structure, args.vacancy)
+        except Exception as e:  # pylint: disable=broad-exception-caught
+            print(f"Error applying vacancies: {e}", file=sys.stderr)
+            sys.exit(1)
+    if args.slab:
+        print(f"Generating slab model for Miller indices: {args.slab}")
+        try:
+            structure = generate_surface_slab(
+                structure, args.slab, args.thick, args.vacuum
+            )
+        except Exception as e:  # pylint: disable=broad-exception-caught
+            print(f"Error generating slab: {e}", file=sys.stderr)
+            sys.exit(1)
+    return structure
+def _determine_output_path(args: argparse.Namespace) -> str:
+    """
+    Determines the output file path.
+    """
+    if args.output:
+        return cast(str, args.output)
+    base, ext = os.path.splitext(args.input)
+    # Special case: VASP files like POSCAR or CONTCAR with no extension
+    if not ext and base in ["POSCAR", "CONTCAR"]:
+        return f"{base}_supercell"
+    return f"{base}_supercell{ext}"
+def main() -> None:
+    """
+    Main entry point for the cellify CLI utility.
+    """
+    args: argparse.Namespace = parse_args()
+    if not os.path.exists(args.input):
+        print(f"Error: Input file '{args.input}' not found.", file=sys.stderr)
+        sys.exit(1)
+    print(f"Loading structure from: {args.input}")
+    try:
+        structure, meta_data = load_structure_file(args.input)
+    except Exception as e:  # pylint: disable=broad-exception-caught
+        print(f"Error loading file: {e}", file=sys.stderr)
+        sys.exit(1)
+    _print_structure_summary(structure)
+    # 0. Conventional cell conversion
+    if args.conventional:
+        print("Converting structure to standard conventional cell...")
+        structure = convert_to_conventional(structure)
+    # 1. Supercell generation
+    _apply_supercell(structure, args)
+    # 2. Defect and slab modifications
+    structure = _apply_defects_and_slab(structure, args)
+    # Print final structure summary
+    _print_structure_summary(structure, label="Final structure summary:")
+    # Determine output filename
+    output_path: str = _determine_output_path(args)
+    print(f"\nSaving final structure to: {output_path}")
+    try:
+        save_structure_file(output_path, structure, meta_data)
+        print("Success!")
+    except Exception as e:  # pylint: disable=broad-exception-caught
+        print(f"Error saving file: {e}", file=sys.stderr)
+        sys.exit(1)
+if __name__ == "__main__":
+    main()

cellify/core.py ADDED Viewed

@@ -0,0 +1,271 @@
+"""
+Core modeling logic for cellify.
+Handles structure loading, supercell generation, substitutions,
+vacancies, slab generation, and file saving using pymatgen and ASE.
+"""
+import math
+import re
+from typing import Any, Dict, List, Optional, Tuple
+import numpy as np
+from pymatgen.core import Structure
+from pymatgen.core.surface import SlabGenerator
+from pymatgen.symmetry.analyzer import SpacegroupAnalyzer
+from cellify.adapters import BaseAdapter, get_adapter
+def load_structure_file(filepath: str) -> Tuple[Structure, Dict[str, Any]]:
+    """
+    Loads a file and returns the structure object along with metadata.
+    """
+    adapter: BaseAdapter = get_adapter(filepath)
+    return adapter.read(filepath)
+def save_structure_file(
+    filepath: str, structure: Structure, meta_data: Dict[str, Any]
+) -> None:
+    """
+    Saves the structure to a file.
+    """
+    adapter: BaseAdapter = get_adapter(filepath)
+    adapter.write(filepath, structure, meta_data)
+def convert_to_conventional(structure: Structure) -> Structure:
+    """
+    Finds and returns the standard conventional cell of the structure.
+    """
+    sga = SpacegroupAnalyzer(structure)
+    return sga.get_conventional_standard_structure()
+def parse_matrix_string(matrix_str: str) -> np.ndarray:
+    """
+    Parses a matrix string like "1 -1 0 / 1 1 0 / 0 0 1" into a 3x3 numpy array.
+    """
+    # Split rows by slash, comma, or semicolon
+    rows_raw: List[str] = re.split(r"[/,;]", matrix_str)
+    if len(rows_raw) != 3:
+        raise ValueError(
+            "Matrix string must define exactly 3 rows (separated by /, , or ;)"
+        )
+    matrix: List[List[float]] = []
+    for r in rows_raw:
+        vals: List[float] = [float(x) for x in r.strip().split()]
+        if len(vals) != 3:
+            raise ValueError("Each row in the matrix must have exactly 3 elements")
+        matrix.append(vals)
+    return np.array(matrix)
+def calculate_min_dist_scaling(
+    structure: Structure, min_dist: float
+) -> Tuple[int, int, int]:
+    """
+    Calculates the minimum diagonal scaling factors (nx, ny, nz) so that
+    the perpendicular distance (plane-to-plane distance) along all lattice vectors
+    is at least min_dist under periodic boundary conditions.
+    """
+    lattice = structure.lattice
+    matrix = lattice.matrix
+    a_vec, b_vec, c_vec = matrix[0], matrix[1], matrix[2]
+    vol: float = lattice.volume
+    # Perpendicular distance along each lattice vector (plane-to-plane distance d_i)
+    # d_a = V / |b x c|
+    # d_b = V / |c x a|
+    # d_c = V / |a x b|
+    d_a: float = vol / np.linalg.norm(np.cross(b_vec, c_vec))
+    d_b: float = vol / np.linalg.norm(np.cross(c_vec, a_vec))
+    d_c: float = vol / np.linalg.norm(np.cross(a_vec, b_vec))
+    # Calculate required scaling factors
+    nx: int = int(math.ceil(min_dist / d_a))
+    ny: int = int(math.ceil(min_dist / d_b))
+    nz: int = int(math.ceil(min_dist / d_c))
+    return max(1, nx), max(1, ny), max(1, nz)
+def apply_substitutions(structure: Structure, substitute_rules: List[str]) -> None:
+    """
+    Applies substitution rules to the structure.
+    Rule formats:
+        "Si:P:0" (replaces Si at absolute index 0 with P)
+        "Si:Al:5%" (randomly replaces 5% of Si atoms with Al)
+    """
+    for rule in substitute_rules:
+        _apply_single_substitution(structure, rule)
+def _apply_single_substitution(structure: Structure, rule: str) -> None:
+    """
+    Applies a single substitution rule to the structure.
+    """
+    parts: List[str] = rule.split(":")
+    if len(parts) != 3:
+        raise ValueError(
+            f"Invalid substitution rule: {rule}. Must be 'element:target_element:index_or_percentage'"
+        )
+    src_el, dest_el, target = parts[0], parts[1], parts[2]
+    matching_indices: List[int] = [
+        i for i, site in enumerate(structure) if site.specie.symbol == src_el
+    ]
+    if not matching_indices:
+        print(f"Warning: No matching elements found for substitution source '{src_el}'")
+        return
+    if target.endswith("%"):
+        _substitute_percentage(structure, src_el, dest_el, target, matching_indices)
+    else:
+        _substitute_index(structure, src_el, dest_el, target)
+def _substitute_percentage(
+    structure: Structure,
+    src_el: str,
+    dest_el: str,
+    target: str,
+    matching_indices: List[int],
+) -> None:
+    """
+    Helper to apply substitution by percentage.
+    """
+    percentage: float = float(target[:-1]) / 100.0
+    num_to_replace: int = int(round(len(matching_indices) * percentage))
+    if num_to_replace == 0 and percentage > 0:
+        num_to_replace = 1
+    replace_indices = np.random.choice(matching_indices, num_to_replace, replace=False)
+    for replace_idx in replace_indices:
+        structure.replace(replace_idx, dest_el)
+    print(f"Replaced {num_to_replace} of {src_el} with {dest_el} ({target})")
+def _substitute_index(
+    structure: Structure, src_el: str, dest_el: str, target: str
+) -> None:
+    """
+    Helper to apply substitution by absolute index.
+    """
+    try:
+        idx: int = int(target)
+        if idx < 0 or idx >= len(structure):
+            raise IndexError(
+                f"Index {idx} out of range (structure size: {len(structure)})"
+            )
+        actual_symbol: str = structure[idx].specie.symbol
+        if actual_symbol != src_el:
+            print(
+                f"Warning: Site index {idx} is '{actual_symbol}', not source element '{src_el}'. Replacing anyway."
+            )
+        structure.replace(idx, dest_el)
+        print(f"Replaced site {idx} ({actual_symbol}) with {dest_el}")
+    except ValueError as exc:
+        raise ValueError(
+            f"Invalid substitution target index or percentage: {target}"
+        ) from exc
+def apply_vacancies(structure: Structure, vacancy_rules: List[str]) -> None:
+    """
+    Applies vacancy rules to the structure (deletes specified atoms).
+    Rule formats:
+        "Si:0" (deletes Si atom at index 0)
+        "O:2" (randomly deletes 2 oxygen atoms)
+    """
+    indices_to_remove: List[int] = []
+    for rule in vacancy_rules:
+        _apply_single_vacancy(structure, rule, indices_to_remove)
+    if indices_to_remove:
+        # Sort indices in descending order to avoid shift errors when removing sites
+        indices_to_remove = sorted(list(set(indices_to_remove)), reverse=True)
+        structure.remove_sites(indices_to_remove)
+def _apply_single_vacancy(
+    structure: Structure, rule: str, indices_to_remove: List[int]
+) -> None:
+    """
+    Applies a single vacancy rule to compile index list for removal.
+    """
+    parts: List[str] = rule.split(":")
+    if len(parts) != 3 and len(parts) != 2:
+        raise ValueError(
+            f"Invalid vacancy rule: {rule}. Must be 'element:index' or 'element:count'"
+        )
+    src_el: str = parts[0]
+    target: str = parts[1]
+    matching_indices: List[int] = [
+        i for i, site in enumerate(structure) if site.specie.symbol == src_el
+    ]
+    if not matching_indices:
+        print(f"Warning: No matching elements found for vacancy source '{src_el}'")
+        return
+    try:
+        val: int = int(target)
+        # If the value is less than or equal to the count of matching elements, treat as count-based vacancy creation
+        if 0 < val <= len(matching_indices) and len(structure) > 20:
+            remove_subset = np.random.choice(matching_indices, val, replace=False)
+            indices_to_remove.extend(remove_subset)
+            print(f"Created {val} vacancies of {src_el} (randomly selected)")
+        else:
+            # Treat as index-based vacancy creation
+            if val < 0 or val >= len(structure):
+                raise IndexError(f"Index {val} out of range")
+            actual_symbol: str = structure[val].specie.symbol
+            if actual_symbol != src_el:
+                print(
+                    f"Warning: Site index {val} is '{actual_symbol}', not vacancy element '{src_el}'. Removing anyway."
+                )
+            indices_to_remove.append(val)
+            print(f"Removed site {val} ({actual_symbol}) to create vacancy")
+    except ValueError as exc:
+        raise ValueError(f"Invalid vacancy target: {target}") from exc
+def generate_surface_slab(
+    structure: Structure,
+    miller_index: List[int],
+    thick: Optional[float],
+    vacuum: Optional[float],
+) -> Structure:
+    """
+    Generates a surface slab model using pymatgen's SlabGenerator.
+    """
+    slab_thick: float = thick if thick else 10.0
+    vac_thick: float = vacuum if vacuum else 15.0
+    gen = SlabGenerator(
+        initial_structure=structure,
+        miller_index=miller_index,
+        min_slab_size=slab_thick,
+        min_vacuum_size=vac_thick,
+        center_slab=True,
+    )
+    slabs = gen.get_slabs()
+    if not slabs:
+        raise ValueError(f"Could not generate slab for Miller index {miller_index}")
+    # Adopt the first generated slab model (often the most symmetric and stable one)
+    slab = slabs[0]
+    return slab.generate_unique_slab_structs()[0]

cellify-0.1.2.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,202 @@
+Metadata-Version: 2.4
+Name: cellify
+Version: 0.1.2
+Summary: A friendly DFT helper CLI for generating supercells and calculation-ready inputs.
+Author: ToAmano
+License: MIT
+Classifier: Development Status :: 3 - Alpha
+Classifier: Intended Audience :: Science/Research
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.9
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Topic :: Scientific/Engineering :: Chemistry
+Classifier: Topic :: Scientific/Engineering :: Physics
+Requires-Python: >=3.9
+Requires-Dist: ase>=3.22.0
+Requires-Dist: numpy>=1.20.0
+Requires-Dist: pymatgen>=2023.0.0
+Provides-Extra: test
+Requires-Dist: pytest>=7.0.0; extra == 'test'
+Description-Content-Type: text/markdown
+# cellify
+A user-friendly command-line interface (CLI) tool to quickly, intuitively, and advancedly generate supercells and calculation-ready inputs from unit cells in density functional theory (DFT) calculation workflows (VASP, Quantum ESPRESSO, OpenMX, CP2K, etc.).
+---
+## 1. Target Users and Pain Points
+### Target Users
+*   Researchers in materials science, physics, and chemistry simulating crystals, interfaces, surfaces, defects, and amorphous structures using DFT.
+### Current Pain Points (Limitations of Existing Tools)
+1.  **"ASE and Pymatgen are powerful, but writing Python scripts is tedious"**
+    *   Writing scripts with `read`, `make_supercell`, and `write` just to create a quick supercell is annoying.
+2.  **"cif2cell and other tools are prone to broken installations"**
+    *   Older python dependencies or compilation issues often cause setup problems.
+3.  **"Specifying non-diagonal transformation matrices (orthogonalization, etc.) is unintuitive"**
+    *   Quickly redefining lattices or cutting specific orientations from a terminal is difficult.
+4.  **"Calculating sizes to avoid periodic boundary interferences is tedious"**
+    *   Manually finding the smallest cell configuration to keep defect-to-defect distances above a threshold (e.g., $15\ \text{Å}$) is time-consuming.
+5.  **"Creating surface slab models and inserting vacuum layers in separate tools is prone to errors"**
+---
+## 2. Requirements & Features
+### ① Format-Free Multi-Format Conversion
+*   Automatically determines file formats from file extensions or headers.
+*   **Supported Formats**:
+    *   VASP (`POSCAR`, `CONTCAR`)
+    *   Quantum ESPRESSO (`.in`, `.txt`, `.qe`)
+    *   Crystallographic Information File (`.cif`)
+    *   XCrysDen Structure Format (`.xsf`, `.axsf`)
+    *   XYZ format (`.xyz`)
+    *   FHI-aims (`geometry.in`)
+### ② Flexible Cell Expansion (Supercell Generation)
+*   **Conventional Cell Auto-Conversion**: Automatically transforms loaded structures (e.g. primitive cells) into their standard conventional representation using `--conventional`.
+*   **Diagonal Scaling**: Simplest integer multiplication along lattice axes (e.g., `2 2 2`).
+*   **Matrix-Based Redefinition**: Redefine lattices using an arbitrary $3 \times 3$ transformation matrix. Ideal for orthogonalizing hexagonal cells or extracting specific crystal orientations.
+*   **Minimum Distance (Cutoff) Automatic Scaling**:
+    *   Automatically calculates and generates the smallest diagonal supercell (or specific axis dimensions) that guarantees the distance between periodic images of any atom is $\ge d\ \text{Å}$. Extremely useful for defect and phonon calculations.
+### ③ Easy Defect & Doping Modeling
+*   **Substitutions**: Replace specific atoms at a given index (e.g., replacing Si at index 0 with P) or randomly replace a specified percentage of atoms (e.g., replacing $5\%$ of Si atoms with Al).
+*   **Vacancies**: Remove atoms at specific indices or randomly delete a specified count of a given element.
+### ④ Surface Slab Generation
+*   Cut a surface slab from bulk structures by specifying Miller indices $(h, k, l)$, slab thickness (in $\text{Å}$ or layers), and vacuum thickness (in $\text{Å}$).
+### ⑤ Logging and Metadata Analysis
+*   Outputs structure logs to stderr during execution:
+    *   Initial volume, atom count, and reduced formula.
+    *   Final supercell volume, lattice constants, lattice angles, and atom count.
+    *   Applied transformation matrix.
+    *   Minimum atomic distance under periodic boundary conditions.
+### ⑥ Calculation-Ready Input Generation
+*   For formats like Quantum ESPRESSO where calculation parameters and coordinates coexist in a single file, the original parameters (`&CONTROL`, `&SYSTEM`, etc.) and comments are completely preserved.
+*   The following parameters are automatically updated to match the generated supercell structure:
+    *   **Total number of atoms (`nat`)**: Automatically updated to the supercell atom count.
+    *   **Number of atomic types (`ntyp`)**: Dynamically incremented if new elements are added via doping.
+    *   **Atomic species definitions (`ATOMIC_SPECIES`)**: Automatically appends definitions (mass, pseudopotentials) for newly introduced elements.
+---
+## 3. Installation
+You can install `cellify` from the local repository directory:
+```bash
+# Clone the repository
+git clone https://github.com/ToAmano/cellify.git
+cd cellify
+# Install in editable mode for development
+pip install -e .
+# Or install with test dependencies
+pip install -e ".[test]"
+```
+After installation, the `cellify` command will be registered and executable from anywhere in your shell environment.
+---
+## 4. CLI Design
+### Command-Line Arguments
+```bash
+cellify -i <input_file> -o <output_file> [options]
+```
+#### Arguments List
+*   `-i`, `--input` : Input structure file path (Required).
+*   `-o`, `--output` : Output structure file path (Default: `<input_base>_supercell.<ext>`).
+*   `-d`, `--dim` : Diagonal scaling factors. 3 integers separated by spaces (e.g., `--dim 2 2 2`).
+*   `-m`, `--matrix` : $3 \times 3$ transformation matrix. Specify row values separated by spaces, rows separated by slashes/commas/semicolons (e.g., `--matrix "1 -1 0 / 1 1 0 / 0 0 2"`).
+*   `--min-dist` : Automatically generate a supercell with minimum periodic image distance $\ge$ specified distance (in $\text{Å}$).
+*   `--conventional` : Automatically convert the input structure to its standard conventional representation before applying other operations.
+*   `--substitute` : Substitution rule. Format: `element:target_element:index_or_percentage` (e.g., `--substitute "Si:P:0"` or `--substitute "Si:Al:5%"`).
+*   `--vacancy` : Vacancy rule. Format: `element:index_or_count` (e.g., `--vacancy "Si:0"`, `--vacancy "O:2"`_).
+*   `--slab` : Miller indices $h\ k\ l$ for surface slab model creation (e.g., `--slab 1 1 1`).
+*   `--thick` : Slab thickness in $\text{Å}$ or layers (e.g., `--thick 15.0`).
+*   `--vacuum` : Vacuum layer thickness in $\text{Å}$ (e.g., `--vacuum 15.0`).
+---
+## 5. Use Cases
+### 1. Create a simple $2 \times 2 \times 3$ supercell (VASP POSCAR)
+```bash
+cellify -i POSCAR -o POSCAR_223 --dim 2 2 3
+```
+### 2. Orthogonalize a hexagonal cell (Quantum ESPRESSO input)
+```bash
+# Preserves &CONTROL and &SYSTEM settings, and updates nat, CELL_PARAMETERS, and ATOMIC_POSITIONS
+cellify -i qe.in -o qe_ortho.in --matrix "1 -1 0 / 1 1 0 / 0 0 1"
+```
+### 3. Generate the smallest supercell keeping defect distance $\ge 15\ \text{Å}$
+```bash
+cellify -i POSCAR -o POSCAR_defect_bulk --min-dist 15.0
+```
+### 4. Create a silicon supercell and replace 1 atom with Phosphorus (n-type doped model)
+```bash
+cellify -i Si_unit.cif -o Si_doped.POSCAR --dim 3 3 3 --substitute "Si:P:0"
+```
+### 5. Generate a $\text{SrTiO}_3$ (100) surface slab model with $15\ \text{Å}$ vacuum
+```bash
+cellify -i STO_bulk.cif -o STO_100_slab.POSCAR --slab 1 0 0 --thick 12.0 --vacuum 15.0
+```
+### 6. Convert a primitive Silicon cell to conventional cell and scale it to 2x2x2
+```bash
+cellify -i Si_primitive.POSCAR -o Si_conventional_222.POSCAR --conventional --dim 2 2 2
+```
+---
+## 6. Directory Structure
+This project uses the standard Python `src-layout`:
+```text
+cellify/
+├── README.md
+├── NAMES.md
+├── pyproject.toml
+└── src/
+    └── cellify/
+        ├── __init__.py
+        ├── cli.py            # Command-line argument parsing and execution flow
+        ├── core.py           # Pure geometric modeling (supercell, defect, slab creation)
+        └── adapters/         # Software-specific file I/O and parameter-preservation adapters
+            ├── __init__.py
+            ├── base.py       # Abstract base class for I/O adapters
+            ├── espresso.py   # Quantum ESPRESSO adapter
+            └── standard.py   # VASP/CIF generic format adapter
+```
+---
+## 7. Technical Stack & Development Approach
+1.  **Language**: **Python 3** (High affinity with scientific and DFT software ecosystems).
+2.  **Core Libraries**: **pymatgen** and **ASE (Atomic Simulation Environment)**.
+    *   **pymatgen**: Used for symmetry determination, structure analysis, defect modulations, and advanced slab generations.
+    *   **ASE**: Used for format-free structure loading/writing and robust file parsed operations.
+    *   Conversion between both frameworks is done seamlessly via `pymatgen.io.ase.AseAtomsAdaptor`.
+3.  **Packaging**:
+    *   Managed via `pyproject.toml` using `hatchling` as the build backend.
+    *   Installable in editable mode using `pip install -e ".[test]"`.
+    *   Registers `cellify` command as an entry point upon installation.

cellify-0.1.2.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,11 @@
+cellify/__init__.py,sha256=Fo90js_EtP3snSnFwEPc465PRIErk5egGacvFSNgA1U,130
+cellify/cli.py,sha256=wl3FirQQtCurDCHLeuxeRl6JQBRQod1N7GHXm8J8FBw,7698
+cellify/core.py,sha256=2PO6_dXZnQAj7fCAayxP6B76VcfgnH3_U5_yuuIs4H4,9024
+cellify/adapters/__init__.py,sha256=3JsbQnuB4rgTcy_WWH-J7jb1sUNxKFBudrKXWsp1faU,746
+cellify/adapters/base.py,sha256=9eWJ7QAYDf8sKmrWa8vVlJl7OpvIZw-82rKrEFR2kA4,1175
+cellify/adapters/espresso.py,sha256=gKSwRUF3s5Occ50eDbHfAroPmFgPT1iXOVgCVJz8Hok,4346
+cellify/adapters/standard.py,sha256=zn8NZ-iy4eDS4oMSD6tx2aAKrfD1spjsP60h8LeUfVE,842
+cellify-0.1.2.dist-info/METADATA,sha256=BjkEV_epmdyzb4sTDFssKhhQhsfiEkMeI52VFnfdGE8,9610
+cellify-0.1.2.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
+cellify-0.1.2.dist-info/entry_points.txt,sha256=XP96fl4EbrFmoNAa3vkdO5u-5Z6nlDneZ-GHfMgcWh4,45
+cellify-0.1.2.dist-info/RECORD,,

cellify-0.1.2.dist-info/WHEEL ADDED Viewed

@@ -0,0 +1,4 @@
+Wheel-Version: 1.0
+Generator: hatchling 1.30.1
+Root-Is-Purelib: true
+Tag: py3-none-any

cellify-0.1.2.dist-info/entry_points.txt ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ [console_scripts]
2	+ cellify = cellify.cli:main