cellify 0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
cellify/__init__.py ADDED
@@ -0,0 +1,6 @@
1
+ """
2
+ cellify package.
3
+ A friendly DFT helper CLI for generating supercells and calculation-ready inputs.
4
+ """
5
+
6
+ __version__ = "0.1.2"
@@ -0,0 +1,26 @@
1
+ """
2
+ I/O Adapters package for cellify.
3
+ """
4
+
5
+ from cellify.adapters.base import BaseAdapter
6
+ from cellify.adapters.espresso import EspressoAdapter
7
+ from cellify.adapters.standard import StandardAdapter
8
+
9
+ __all__ = ["BaseAdapter", "EspressoAdapter", "StandardAdapter"]
10
+
11
+
12
+ def get_adapter(filepath: str) -> BaseAdapter:
13
+ """
14
+ Returns an appropriate I/O adapter object based on the filepath or extension.
15
+ """
16
+ lower_path: str = filepath.lower()
17
+ # Check if the file is a Quantum ESPRESSO input file
18
+ is_qe: bool = (
19
+ any(lower_path.endswith(ext) for ext in [".in", ".qe", ".pwi"])
20
+ or "qe" in lower_path
21
+ or "espresso" in lower_path
22
+ )
23
+
24
+ if is_qe:
25
+ return EspressoAdapter()
26
+ return StandardAdapter()
@@ -0,0 +1,36 @@
1
+ from abc import ABC, abstractmethod
2
+ from typing import Any, Dict, Tuple
3
+
4
+ from pymatgen.core import Structure
5
+
6
+
7
+ class BaseAdapter(ABC):
8
+ """
9
+ Abstract base class for structure file I/O supported by cellify.
10
+ Parameter-preserving and software-specific output adapters should inherit this class.
11
+ """
12
+
13
+ @abstractmethod
14
+ def read(self, filepath: str) -> Tuple[Structure, Dict[str, Any]]:
15
+ """
16
+ Loads a structure file and returns the structure object along with metadata.
17
+
18
+ Args:
19
+ filepath (str): Path to the input file.
20
+
21
+ Returns:
22
+ Tuple[Structure, Dict[str, Any]]: A tuple of the pymatgen Structure object and a metadata dictionary.
23
+ """
24
+
25
+ @abstractmethod
26
+ def write(
27
+ self, filepath: str, structure: Structure, meta_data: Dict[str, Any]
28
+ ) -> None:
29
+ """
30
+ Writes the structure to the specified path while preserving original metadata.
31
+
32
+ Args:
33
+ filepath (str): Path to the output file.
34
+ structure (Structure): The modified/supercell Structure object.
35
+ meta_data (Dict[str, Any]): Metadata retrieved during the read phase.
36
+ """
@@ -0,0 +1,124 @@
1
+ """
2
+ Quantum ESPRESSO input/output adapter for cellify.
3
+ """
4
+
5
+ import os
6
+ import re
7
+ from typing import Any, Dict, Tuple
8
+
9
+ from pymatgen.core import Structure
10
+
11
+ from cellify.adapters.base import BaseAdapter
12
+
13
+
14
+ class EspressoAdapter(BaseAdapter):
15
+ """
16
+ Quantum ESPRESSO input file adapter.
17
+ Preserves calculation parameters (&CONTROL, &SYSTEM, etc.) and comment lines,
18
+ while automatically updating nat/ntyp and replacing structure sections.
19
+ """
20
+
21
+ def read(self, filepath: str) -> Tuple[Structure, Dict[str, Any]]:
22
+ if not os.path.exists(filepath):
23
+ raise FileNotFoundError(f"Input file not found: {filepath}")
24
+
25
+ with open(filepath, "r", encoding="utf-8") as f:
26
+ content: str = f.read()
27
+
28
+ # Safely parse structure using ASE espresso-in reader
29
+ try:
30
+ # pylint: disable=import-outside-toplevel
31
+ from ase.io import read as ase_read
32
+ from pymatgen.io.ase import AseAtomsAdaptor
33
+
34
+ atoms = ase_read(filepath, format="espresso-in")
35
+ structure: Structure = AseAtomsAdaptor.get_structure(atoms)
36
+ except Exception as ase_err:
37
+ raise ValueError(
38
+ f"Failed to parse structure from Quantum ESPRESSO file: {ase_err}"
39
+ ) from ase_err
40
+
41
+ meta_data: Dict[str, Any] = {
42
+ "mode": "espresso_text_replace",
43
+ "content": content,
44
+ "filepath": filepath,
45
+ }
46
+ return structure, meta_data
47
+
48
+ def write(
49
+ self, filepath: str, structure: Structure, meta_data: Dict[str, Any]
50
+ ) -> None:
51
+ content: str = meta_data["content"]
52
+
53
+ # 1. Calculate new nat and ntyp
54
+ nat_new: int = len(structure)
55
+ ntyp_new: int = len(structure.composition.elements)
56
+
57
+ # 2. Update nat and ntyp inside namelists
58
+ content = re.sub(
59
+ r"(\bnat\s*=\s*)\d+", r"\g<1>" + str(nat_new), content, flags=re.IGNORECASE
60
+ )
61
+ content = re.sub(
62
+ r"(\bntyp\s*=\s*)\d+",
63
+ r"\g<1>" + str(ntyp_new),
64
+ content,
65
+ flags=re.IGNORECASE,
66
+ )
67
+
68
+ # 3. Strip old structure-related blocks from text
69
+ cleaned_content: str = content
70
+ struct_keywords = ["ATOMIC_SPECIES", "CELL_PARAMETERS", "ATOMIC_POSITIONS"]
71
+ for kw in struct_keywords:
72
+ pattern = (
73
+ r"(?i)^\s*"
74
+ + kw
75
+ + r"\b.*?(?=\n\s*(?:ATOMIC_SPECIES|CELL_PARAMETERS|ATOMIC_POSITIONS|K_POINTS|KPOINTS|&[A-Za-z]+)|\Z)"
76
+ )
77
+ cleaned_content = re.sub(
78
+ pattern, "", cleaned_content, flags=re.DOTALL | re.MULTILINE
79
+ )
80
+
81
+ # Clean extra leading/trailing whitespaces
82
+ cleaned_content = cleaned_content.strip() + "\n\n"
83
+
84
+ # 4. Extract existing pseudopotential information from the original file
85
+ pseudos: Dict[str, Any] = {}
86
+ species_match = re.search(
87
+ r"(?i)ATOMIC_SPECIES\s*\n(.*?)(?=\n\s*(?:ATOMIC_|CELL_|K_POINTS|KPOINTS|&[A-Za-z]+)|\Z)",
88
+ content,
89
+ re.DOTALL,
90
+ )
91
+ if species_match:
92
+ for line in species_match.group(1).strip().split("\n"):
93
+ parts = line.split()
94
+ if len(parts) >= 3:
95
+ pseudos[parts[0]] = (parts[1], parts[2])
96
+
97
+ # 5. Reconstruct structure blocks
98
+ # ATOMIC_SPECIES
99
+ species_str: str = "ATOMIC_SPECIES\n"
100
+ for el in structure.composition.elements:
101
+ el_symbol: str = el.symbol
102
+ mass, pseudo = pseudos.get(
103
+ el_symbol, (str(el.atomic_mass), f"{el_symbol}.UPF")
104
+ )
105
+ species_str += f" {el_symbol} {mass} {pseudo}\n"
106
+
107
+ # CELL_PARAMETERS
108
+ cell_str: str = "\nCELL_PARAMETERS angstrom\n"
109
+ for vec in structure.lattice.matrix:
110
+ cell_str += f" {vec[0]:.10f} {vec[1]:.10f} {vec[2]:.10f}\n"
111
+
112
+ # ATOMIC_POSITIONS
113
+ pos_str: str = "\nATOMIC_POSITIONS crystal\n"
114
+ for site in structure:
115
+ pos_str += (
116
+ f" {site.specie.symbol} {site.a:.10f} {site.b:.10f} {site.c:.10f}\n"
117
+ )
118
+
119
+ # 6. Save file
120
+ with open(filepath, "w", encoding="utf-8") as f:
121
+ f.write(cleaned_content)
122
+ f.write(species_str)
123
+ f.write(cell_str)
124
+ f.write(pos_str)
@@ -0,0 +1,27 @@
1
+ """
2
+ Standard file format adapter for cellify (VASP POSCAR, CIF, XYZ, etc.).
3
+ """
4
+
5
+ from typing import Any, Dict, Tuple
6
+
7
+ from pymatgen.core import Structure
8
+
9
+ from cellify.adapters.base import BaseAdapter
10
+
11
+
12
+ class StandardAdapter(BaseAdapter):
13
+ """
14
+ Standard structure file adapter for formats like VASP (POSCAR), CIF, XYZ, etc.
15
+ Does not perform parameter-preserving text replacements, and uses pymatgen's
16
+ default I/O functionalities.
17
+ """
18
+
19
+ def read(self, filepath: str) -> Tuple[Structure, Dict[str, Any]]:
20
+ struct: Structure = Structure.from_file(filepath)
21
+ meta_data: Dict[str, Any] = {"mode": "standard", "filepath": filepath}
22
+ return struct, meta_data
23
+
24
+ def write(
25
+ self, filepath: str, structure: Structure, meta_data: Dict[str, Any]
26
+ ) -> None:
27
+ structure.to(filename=filepath)
cellify/cli.py ADDED
@@ -0,0 +1,245 @@
1
+ """
2
+ Command-line interface (CLI) for cellify.
3
+ Handles arg parsing, workflow orchestration, and user output reporting.
4
+ """
5
+
6
+ import argparse
7
+ import os
8
+ import sys
9
+ from typing import List, Optional, cast
10
+
11
+ import numpy as np
12
+ from pymatgen.core import Structure
13
+
14
+ from cellify import __version__
15
+ from cellify.core import (
16
+ apply_substitutions,
17
+ apply_vacancies,
18
+ calculate_min_dist_scaling,
19
+ convert_to_conventional,
20
+ generate_surface_slab,
21
+ load_structure_file,
22
+ parse_matrix_string,
23
+ save_structure_file,
24
+ )
25
+
26
+
27
+ def parse_args(args: Optional[List[str]] = None) -> argparse.Namespace:
28
+ """
29
+ Parses command-line arguments.
30
+ """
31
+ parser = argparse.ArgumentParser(
32
+ description="cellify: A friendly DFT helper CLI for generating supercells and calculation-ready inputs."
33
+ )
34
+ parser.add_argument(
35
+ "-v", "--version", action="version", version=f"cellify {__version__}"
36
+ )
37
+
38
+ # I/O options
39
+ parser.add_argument(
40
+ "-i",
41
+ "--input",
42
+ required=True,
43
+ help="Input structure file path (e.g. POSCAR, input.cif, qe.in)",
44
+ )
45
+ parser.add_argument(
46
+ "-o",
47
+ "--output",
48
+ help="Output structure file path (default: <input_base>_supercell.<ext>)",
49
+ )
50
+
51
+ # Supercell options
52
+ group = parser.add_mutually_exclusive_group()
53
+ group.add_argument(
54
+ "-d",
55
+ "--dim",
56
+ nargs=3,
57
+ type=int,
58
+ metavar=("nx", "ny", "nz"),
59
+ help="Diagonal scaling factors for the supercell (e.g., -d 2 2 2)",
60
+ )
61
+ group.add_argument(
62
+ "-m",
63
+ "--matrix",
64
+ help="3x3 transformation matrix. Specify as 'r11 r12 r13 / r21 r22 r23 / r31 r32 r33'",
65
+ )
66
+ group.add_argument(
67
+ "--min-dist",
68
+ type=float,
69
+ metavar="DISTANCE",
70
+ help="Automatically generate a supercell where the minimum distance between periodic images is >= DISTANCE (in Angstroms)",
71
+ )
72
+ parser.add_argument(
73
+ "--conventional",
74
+ action="store_true",
75
+ help="Automatically convert the input structure to its standard conventional representation before applying other operations.",
76
+ )
77
+
78
+ # Doping / Defect options
79
+ parser.add_argument(
80
+ "--substitute",
81
+ action="append",
82
+ default=[],
83
+ help="Substitution rule: 'element:target_element:index_or_percentage' (e.g., 'Si:P:0' or 'Si:Al:5%%')",
84
+ )
85
+ parser.add_argument(
86
+ "--vacancy",
87
+ action="append",
88
+ default=[],
89
+ help="Vacancy rule: 'element:index_or_count' (e.g., 'Si:0' or 'O:2')",
90
+ )
91
+
92
+ # Slab options
93
+ parser.add_argument(
94
+ "--slab",
95
+ nargs=3,
96
+ type=int,
97
+ metavar=("h", "k", "l"),
98
+ help="Miller indices for surface slab generation (e.g., --slab 1 0 0)",
99
+ )
100
+ parser.add_argument(
101
+ "--thick", type=float, help="Slab thickness (in Angstroms or layers)"
102
+ )
103
+ parser.add_argument(
104
+ "--vacuum", type=float, help="Vacuum layer thickness (in Angstroms)"
105
+ )
106
+
107
+ return parser.parse_args(args)
108
+
109
+
110
+ def _print_structure_summary(structure: Structure, label: str = "") -> None:
111
+ """
112
+ Prints a formatted summary of the structure.
113
+ """
114
+ if label:
115
+ print(f"\n{label}")
116
+ print(f" Formula: {structure.composition.reduced_formula}")
117
+ print(f" Volume: {structure.volume:.3f} A^3")
118
+ print(f" Number of atoms: {len(structure)}")
119
+ if label:
120
+ print(" Lattice constants:")
121
+ print(
122
+ f" a = {structure.lattice.a:.4f} A, b = {structure.lattice.b:.4f} A, c = {structure.lattice.c:.4f} A"
123
+ )
124
+ print(
125
+ f" alpha = {structure.lattice.alpha:.2f} deg, beta = {structure.lattice.beta:.2f} deg, gamma = {structure.lattice.gamma:.2f} deg"
126
+ )
127
+
128
+
129
+ def _apply_supercell(structure: Structure, args: argparse.Namespace) -> None:
130
+ """
131
+ Applies supercell generation options to the structure.
132
+ """
133
+ if args.dim:
134
+ print(f"Generating supercell with diagonal scaling: {args.dim}")
135
+ structure.make_supercell(args.dim)
136
+ elif args.matrix:
137
+ try:
138
+ matrix: np.ndarray = parse_matrix_string(args.matrix)
139
+ print(f"Generating supercell with matrix:\n{matrix}")
140
+ structure.make_supercell(matrix)
141
+ except Exception as e: # pylint: disable=broad-exception-caught
142
+ print(f"Error parsing matrix: {e}", file=sys.stderr)
143
+ sys.exit(1)
144
+ elif args.min_dist:
145
+ nx, ny, nz = calculate_min_dist_scaling(structure, args.min_dist)
146
+ print(
147
+ f"Calculated scaling for minimum distance >= {args.min_dist} A: [{nx}, {ny}, {nz}]"
148
+ )
149
+ structure.make_supercell([nx, ny, nz])
150
+
151
+
152
+ def _apply_defects_and_slab(
153
+ structure: Structure, args: argparse.Namespace
154
+ ) -> Structure:
155
+ """
156
+ Applies substitutions, vacancies, and surface slab options to the structure.
157
+ """
158
+ if args.substitute:
159
+ try:
160
+ apply_substitutions(structure, args.substitute)
161
+ except Exception as e: # pylint: disable=broad-exception-caught
162
+ print(f"Error applying substitutions: {e}", file=sys.stderr)
163
+ sys.exit(1)
164
+
165
+ if args.vacancy:
166
+ try:
167
+ apply_vacancies(structure, args.vacancy)
168
+ except Exception as e: # pylint: disable=broad-exception-caught
169
+ print(f"Error applying vacancies: {e}", file=sys.stderr)
170
+ sys.exit(1)
171
+
172
+ if args.slab:
173
+ print(f"Generating slab model for Miller indices: {args.slab}")
174
+ try:
175
+ structure = generate_surface_slab(
176
+ structure, args.slab, args.thick, args.vacuum
177
+ )
178
+ except Exception as e: # pylint: disable=broad-exception-caught
179
+ print(f"Error generating slab: {e}", file=sys.stderr)
180
+ sys.exit(1)
181
+
182
+ return structure
183
+
184
+
185
+ def _determine_output_path(args: argparse.Namespace) -> str:
186
+ """
187
+ Determines the output file path.
188
+ """
189
+ if args.output:
190
+ return cast(str, args.output)
191
+
192
+ base, ext = os.path.splitext(args.input)
193
+ # Special case: VASP files like POSCAR or CONTCAR with no extension
194
+ if not ext and base in ["POSCAR", "CONTCAR"]:
195
+ return f"{base}_supercell"
196
+ return f"{base}_supercell{ext}"
197
+
198
+
199
+ def main() -> None:
200
+ """
201
+ Main entry point for the cellify CLI utility.
202
+ """
203
+ args: argparse.Namespace = parse_args()
204
+
205
+ if not os.path.exists(args.input):
206
+ print(f"Error: Input file '{args.input}' not found.", file=sys.stderr)
207
+ sys.exit(1)
208
+
209
+ print(f"Loading structure from: {args.input}")
210
+ try:
211
+ structure, meta_data = load_structure_file(args.input)
212
+ except Exception as e: # pylint: disable=broad-exception-caught
213
+ print(f"Error loading file: {e}", file=sys.stderr)
214
+ sys.exit(1)
215
+
216
+ _print_structure_summary(structure)
217
+
218
+ # 0. Conventional cell conversion
219
+ if args.conventional:
220
+ print("Converting structure to standard conventional cell...")
221
+ structure = convert_to_conventional(structure)
222
+
223
+ # 1. Supercell generation
224
+ _apply_supercell(structure, args)
225
+
226
+ # 2. Defect and slab modifications
227
+ structure = _apply_defects_and_slab(structure, args)
228
+
229
+ # Print final structure summary
230
+ _print_structure_summary(structure, label="Final structure summary:")
231
+
232
+ # Determine output filename
233
+ output_path: str = _determine_output_path(args)
234
+
235
+ print(f"\nSaving final structure to: {output_path}")
236
+ try:
237
+ save_structure_file(output_path, structure, meta_data)
238
+ print("Success!")
239
+ except Exception as e: # pylint: disable=broad-exception-caught
240
+ print(f"Error saving file: {e}", file=sys.stderr)
241
+ sys.exit(1)
242
+
243
+
244
+ if __name__ == "__main__":
245
+ main()
cellify/core.py ADDED
@@ -0,0 +1,271 @@
1
+ """
2
+ Core modeling logic for cellify.
3
+ Handles structure loading, supercell generation, substitutions,
4
+ vacancies, slab generation, and file saving using pymatgen and ASE.
5
+ """
6
+
7
+ import math
8
+ import re
9
+ from typing import Any, Dict, List, Optional, Tuple
10
+
11
+ import numpy as np
12
+ from pymatgen.core import Structure
13
+ from pymatgen.core.surface import SlabGenerator
14
+ from pymatgen.symmetry.analyzer import SpacegroupAnalyzer
15
+
16
+ from cellify.adapters import BaseAdapter, get_adapter
17
+
18
+
19
+ def load_structure_file(filepath: str) -> Tuple[Structure, Dict[str, Any]]:
20
+ """
21
+ Loads a file and returns the structure object along with metadata.
22
+ """
23
+ adapter: BaseAdapter = get_adapter(filepath)
24
+ return adapter.read(filepath)
25
+
26
+
27
+ def save_structure_file(
28
+ filepath: str, structure: Structure, meta_data: Dict[str, Any]
29
+ ) -> None:
30
+ """
31
+ Saves the structure to a file.
32
+ """
33
+ adapter: BaseAdapter = get_adapter(filepath)
34
+ adapter.write(filepath, structure, meta_data)
35
+
36
+
37
+ def convert_to_conventional(structure: Structure) -> Structure:
38
+ """
39
+ Finds and returns the standard conventional cell of the structure.
40
+ """
41
+ sga = SpacegroupAnalyzer(structure)
42
+ return sga.get_conventional_standard_structure()
43
+
44
+
45
+ def parse_matrix_string(matrix_str: str) -> np.ndarray:
46
+ """
47
+ Parses a matrix string like "1 -1 0 / 1 1 0 / 0 0 1" into a 3x3 numpy array.
48
+ """
49
+ # Split rows by slash, comma, or semicolon
50
+ rows_raw: List[str] = re.split(r"[/,;]", matrix_str)
51
+ if len(rows_raw) != 3:
52
+ raise ValueError(
53
+ "Matrix string must define exactly 3 rows (separated by /, , or ;)"
54
+ )
55
+
56
+ matrix: List[List[float]] = []
57
+ for r in rows_raw:
58
+ vals: List[float] = [float(x) for x in r.strip().split()]
59
+ if len(vals) != 3:
60
+ raise ValueError("Each row in the matrix must have exactly 3 elements")
61
+ matrix.append(vals)
62
+
63
+ return np.array(matrix)
64
+
65
+
66
+ def calculate_min_dist_scaling(
67
+ structure: Structure, min_dist: float
68
+ ) -> Tuple[int, int, int]:
69
+ """
70
+ Calculates the minimum diagonal scaling factors (nx, ny, nz) so that
71
+ the perpendicular distance (plane-to-plane distance) along all lattice vectors
72
+ is at least min_dist under periodic boundary conditions.
73
+ """
74
+ lattice = structure.lattice
75
+ matrix = lattice.matrix
76
+ a_vec, b_vec, c_vec = matrix[0], matrix[1], matrix[2]
77
+
78
+ vol: float = lattice.volume
79
+
80
+ # Perpendicular distance along each lattice vector (plane-to-plane distance d_i)
81
+ # d_a = V / |b x c|
82
+ # d_b = V / |c x a|
83
+ # d_c = V / |a x b|
84
+ d_a: float = vol / np.linalg.norm(np.cross(b_vec, c_vec))
85
+ d_b: float = vol / np.linalg.norm(np.cross(c_vec, a_vec))
86
+ d_c: float = vol / np.linalg.norm(np.cross(a_vec, b_vec))
87
+
88
+ # Calculate required scaling factors
89
+ nx: int = int(math.ceil(min_dist / d_a))
90
+ ny: int = int(math.ceil(min_dist / d_b))
91
+ nz: int = int(math.ceil(min_dist / d_c))
92
+
93
+ return max(1, nx), max(1, ny), max(1, nz)
94
+
95
+
96
+ def apply_substitutions(structure: Structure, substitute_rules: List[str]) -> None:
97
+ """
98
+ Applies substitution rules to the structure.
99
+ Rule formats:
100
+ "Si:P:0" (replaces Si at absolute index 0 with P)
101
+ "Si:Al:5%" (randomly replaces 5% of Si atoms with Al)
102
+ """
103
+ for rule in substitute_rules:
104
+ _apply_single_substitution(structure, rule)
105
+
106
+
107
+ def _apply_single_substitution(structure: Structure, rule: str) -> None:
108
+ """
109
+ Applies a single substitution rule to the structure.
110
+ """
111
+ parts: List[str] = rule.split(":")
112
+ if len(parts) != 3:
113
+ raise ValueError(
114
+ f"Invalid substitution rule: {rule}. Must be 'element:target_element:index_or_percentage'"
115
+ )
116
+
117
+ src_el, dest_el, target = parts[0], parts[1], parts[2]
118
+
119
+ matching_indices: List[int] = [
120
+ i for i, site in enumerate(structure) if site.specie.symbol == src_el
121
+ ]
122
+ if not matching_indices:
123
+ print(f"Warning: No matching elements found for substitution source '{src_el}'")
124
+ return
125
+
126
+ if target.endswith("%"):
127
+ _substitute_percentage(structure, src_el, dest_el, target, matching_indices)
128
+ else:
129
+ _substitute_index(structure, src_el, dest_el, target)
130
+
131
+
132
+ def _substitute_percentage(
133
+ structure: Structure,
134
+ src_el: str,
135
+ dest_el: str,
136
+ target: str,
137
+ matching_indices: List[int],
138
+ ) -> None:
139
+ """
140
+ Helper to apply substitution by percentage.
141
+ """
142
+ percentage: float = float(target[:-1]) / 100.0
143
+ num_to_replace: int = int(round(len(matching_indices) * percentage))
144
+ if num_to_replace == 0 and percentage > 0:
145
+ num_to_replace = 1
146
+
147
+ replace_indices = np.random.choice(matching_indices, num_to_replace, replace=False)
148
+ for replace_idx in replace_indices:
149
+ structure.replace(replace_idx, dest_el)
150
+ print(f"Replaced {num_to_replace} of {src_el} with {dest_el} ({target})")
151
+
152
+
153
+ def _substitute_index(
154
+ structure: Structure, src_el: str, dest_el: str, target: str
155
+ ) -> None:
156
+ """
157
+ Helper to apply substitution by absolute index.
158
+ """
159
+ try:
160
+ idx: int = int(target)
161
+ if idx < 0 or idx >= len(structure):
162
+ raise IndexError(
163
+ f"Index {idx} out of range (structure size: {len(structure)})"
164
+ )
165
+
166
+ actual_symbol: str = structure[idx].specie.symbol
167
+ if actual_symbol != src_el:
168
+ print(
169
+ f"Warning: Site index {idx} is '{actual_symbol}', not source element '{src_el}'. Replacing anyway."
170
+ )
171
+
172
+ structure.replace(idx, dest_el)
173
+ print(f"Replaced site {idx} ({actual_symbol}) with {dest_el}")
174
+ except ValueError as exc:
175
+ raise ValueError(
176
+ f"Invalid substitution target index or percentage: {target}"
177
+ ) from exc
178
+
179
+
180
+ def apply_vacancies(structure: Structure, vacancy_rules: List[str]) -> None:
181
+ """
182
+ Applies vacancy rules to the structure (deletes specified atoms).
183
+ Rule formats:
184
+ "Si:0" (deletes Si atom at index 0)
185
+ "O:2" (randomly deletes 2 oxygen atoms)
186
+ """
187
+ indices_to_remove: List[int] = []
188
+
189
+ for rule in vacancy_rules:
190
+ _apply_single_vacancy(structure, rule, indices_to_remove)
191
+
192
+ if indices_to_remove:
193
+ # Sort indices in descending order to avoid shift errors when removing sites
194
+ indices_to_remove = sorted(list(set(indices_to_remove)), reverse=True)
195
+ structure.remove_sites(indices_to_remove)
196
+
197
+
198
+ def _apply_single_vacancy(
199
+ structure: Structure, rule: str, indices_to_remove: List[int]
200
+ ) -> None:
201
+ """
202
+ Applies a single vacancy rule to compile index list for removal.
203
+ """
204
+ parts: List[str] = rule.split(":")
205
+ if len(parts) != 3 and len(parts) != 2:
206
+ raise ValueError(
207
+ f"Invalid vacancy rule: {rule}. Must be 'element:index' or 'element:count'"
208
+ )
209
+
210
+ src_el: str = parts[0]
211
+ target: str = parts[1]
212
+
213
+ matching_indices: List[int] = [
214
+ i for i, site in enumerate(structure) if site.specie.symbol == src_el
215
+ ]
216
+ if not matching_indices:
217
+ print(f"Warning: No matching elements found for vacancy source '{src_el}'")
218
+ return
219
+
220
+ try:
221
+ val: int = int(target)
222
+
223
+ # If the value is less than or equal to the count of matching elements, treat as count-based vacancy creation
224
+ if 0 < val <= len(matching_indices) and len(structure) > 20:
225
+ remove_subset = np.random.choice(matching_indices, val, replace=False)
226
+ indices_to_remove.extend(remove_subset)
227
+ print(f"Created {val} vacancies of {src_el} (randomly selected)")
228
+ else:
229
+ # Treat as index-based vacancy creation
230
+ if val < 0 or val >= len(structure):
231
+ raise IndexError(f"Index {val} out of range")
232
+
233
+ actual_symbol: str = structure[val].specie.symbol
234
+ if actual_symbol != src_el:
235
+ print(
236
+ f"Warning: Site index {val} is '{actual_symbol}', not vacancy element '{src_el}'. Removing anyway."
237
+ )
238
+
239
+ indices_to_remove.append(val)
240
+ print(f"Removed site {val} ({actual_symbol}) to create vacancy")
241
+ except ValueError as exc:
242
+ raise ValueError(f"Invalid vacancy target: {target}") from exc
243
+
244
+
245
+ def generate_surface_slab(
246
+ structure: Structure,
247
+ miller_index: List[int],
248
+ thick: Optional[float],
249
+ vacuum: Optional[float],
250
+ ) -> Structure:
251
+ """
252
+ Generates a surface slab model using pymatgen's SlabGenerator.
253
+ """
254
+ slab_thick: float = thick if thick else 10.0
255
+ vac_thick: float = vacuum if vacuum else 15.0
256
+
257
+ gen = SlabGenerator(
258
+ initial_structure=structure,
259
+ miller_index=miller_index,
260
+ min_slab_size=slab_thick,
261
+ min_vacuum_size=vac_thick,
262
+ center_slab=True,
263
+ )
264
+
265
+ slabs = gen.get_slabs()
266
+ if not slabs:
267
+ raise ValueError(f"Could not generate slab for Miller index {miller_index}")
268
+
269
+ # Adopt the first generated slab model (often the most symmetric and stable one)
270
+ slab = slabs[0]
271
+ return slab.generate_unique_slab_structs()[0]
@@ -0,0 +1,202 @@
1
+ Metadata-Version: 2.4
2
+ Name: cellify
3
+ Version: 0.1.2
4
+ Summary: A friendly DFT helper CLI for generating supercells and calculation-ready inputs.
5
+ Author: ToAmano
6
+ License: MIT
7
+ Classifier: Development Status :: 3 - Alpha
8
+ Classifier: Intended Audience :: Science/Research
9
+ Classifier: License :: OSI Approved :: MIT License
10
+ Classifier: Programming Language :: Python :: 3
11
+ Classifier: Programming Language :: Python :: 3.9
12
+ Classifier: Programming Language :: Python :: 3.10
13
+ Classifier: Programming Language :: Python :: 3.11
14
+ Classifier: Programming Language :: Python :: 3.12
15
+ Classifier: Topic :: Scientific/Engineering :: Chemistry
16
+ Classifier: Topic :: Scientific/Engineering :: Physics
17
+ Requires-Python: >=3.9
18
+ Requires-Dist: ase>=3.22.0
19
+ Requires-Dist: numpy>=1.20.0
20
+ Requires-Dist: pymatgen>=2023.0.0
21
+ Provides-Extra: test
22
+ Requires-Dist: pytest>=7.0.0; extra == 'test'
23
+ Description-Content-Type: text/markdown
24
+
25
+ # cellify
26
+
27
+ A user-friendly command-line interface (CLI) tool to quickly, intuitively, and advancedly generate supercells and calculation-ready inputs from unit cells in density functional theory (DFT) calculation workflows (VASP, Quantum ESPRESSO, OpenMX, CP2K, etc.).
28
+
29
+ ---
30
+
31
+ ## 1. Target Users and Pain Points
32
+
33
+ ### Target Users
34
+ * Researchers in materials science, physics, and chemistry simulating crystals, interfaces, surfaces, defects, and amorphous structures using DFT.
35
+
36
+ ### Current Pain Points (Limitations of Existing Tools)
37
+ 1. **"ASE and Pymatgen are powerful, but writing Python scripts is tedious"**
38
+ * Writing scripts with `read`, `make_supercell`, and `write` just to create a quick supercell is annoying.
39
+ 2. **"cif2cell and other tools are prone to broken installations"**
40
+ * Older python dependencies or compilation issues often cause setup problems.
41
+ 3. **"Specifying non-diagonal transformation matrices (orthogonalization, etc.) is unintuitive"**
42
+ * Quickly redefining lattices or cutting specific orientations from a terminal is difficult.
43
+ 4. **"Calculating sizes to avoid periodic boundary interferences is tedious"**
44
+ * Manually finding the smallest cell configuration to keep defect-to-defect distances above a threshold (e.g., $15\ \text{Å}$) is time-consuming.
45
+ 5. **"Creating surface slab models and inserting vacuum layers in separate tools is prone to errors"**
46
+
47
+ ---
48
+
49
+ ## 2. Requirements & Features
50
+
51
+ ### ① Format-Free Multi-Format Conversion
52
+ * Automatically determines file formats from file extensions or headers.
53
+ * **Supported Formats**:
54
+ * VASP (`POSCAR`, `CONTCAR`)
55
+ * Quantum ESPRESSO (`.in`, `.txt`, `.qe`)
56
+ * Crystallographic Information File (`.cif`)
57
+ * XCrysDen Structure Format (`.xsf`, `.axsf`)
58
+ * XYZ format (`.xyz`)
59
+ * FHI-aims (`geometry.in`)
60
+
61
+ ### ② Flexible Cell Expansion (Supercell Generation)
62
+ * **Conventional Cell Auto-Conversion**: Automatically transforms loaded structures (e.g. primitive cells) into their standard conventional representation using `--conventional`.
63
+ * **Diagonal Scaling**: Simplest integer multiplication along lattice axes (e.g., `2 2 2`).
64
+ * **Matrix-Based Redefinition**: Redefine lattices using an arbitrary $3 \times 3$ transformation matrix. Ideal for orthogonalizing hexagonal cells or extracting specific crystal orientations.
65
+ * **Minimum Distance (Cutoff) Automatic Scaling**:
66
+ * Automatically calculates and generates the smallest diagonal supercell (or specific axis dimensions) that guarantees the distance between periodic images of any atom is $\ge d\ \text{Å}$. Extremely useful for defect and phonon calculations.
67
+
68
+ ### ③ Easy Defect & Doping Modeling
69
+ * **Substitutions**: Replace specific atoms at a given index (e.g., replacing Si at index 0 with P) or randomly replace a specified percentage of atoms (e.g., replacing $5\%$ of Si atoms with Al).
70
+ * **Vacancies**: Remove atoms at specific indices or randomly delete a specified count of a given element.
71
+
72
+ ### ④ Surface Slab Generation
73
+ * Cut a surface slab from bulk structures by specifying Miller indices $(h, k, l)$, slab thickness (in $\text{Å}$ or layers), and vacuum thickness (in $\text{Å}$).
74
+
75
+ ### ⑤ Logging and Metadata Analysis
76
+ * Outputs structure logs to stderr during execution:
77
+ * Initial volume, atom count, and reduced formula.
78
+ * Final supercell volume, lattice constants, lattice angles, and atom count.
79
+ * Applied transformation matrix.
80
+ * Minimum atomic distance under periodic boundary conditions.
81
+
82
+ ### ⑥ Calculation-Ready Input Generation
83
+ * For formats like Quantum ESPRESSO where calculation parameters and coordinates coexist in a single file, the original parameters (`&CONTROL`, `&SYSTEM`, etc.) and comments are completely preserved.
84
+ * The following parameters are automatically updated to match the generated supercell structure:
85
+ * **Total number of atoms (`nat`)**: Automatically updated to the supercell atom count.
86
+ * **Number of atomic types (`ntyp`)**: Dynamically incremented if new elements are added via doping.
87
+ * **Atomic species definitions (`ATOMIC_SPECIES`)**: Automatically appends definitions (mass, pseudopotentials) for newly introduced elements.
88
+
89
+ ---
90
+
91
+ ## 3. Installation
92
+
93
+ You can install `cellify` from the local repository directory:
94
+
95
+ ```bash
96
+ # Clone the repository
97
+ git clone https://github.com/ToAmano/cellify.git
98
+ cd cellify
99
+
100
+ # Install in editable mode for development
101
+ pip install -e .
102
+
103
+ # Or install with test dependencies
104
+ pip install -e ".[test]"
105
+ ```
106
+
107
+ After installation, the `cellify` command will be registered and executable from anywhere in your shell environment.
108
+
109
+ ---
110
+
111
+ ## 4. CLI Design
112
+
113
+ ### Command-Line Arguments
114
+
115
+ ```bash
116
+ cellify -i <input_file> -o <output_file> [options]
117
+ ```
118
+
119
+ #### Arguments List
120
+ * `-i`, `--input` : Input structure file path (Required).
121
+ * `-o`, `--output` : Output structure file path (Default: `<input_base>_supercell.<ext>`).
122
+ * `-d`, `--dim` : Diagonal scaling factors. 3 integers separated by spaces (e.g., `--dim 2 2 2`).
123
+ * `-m`, `--matrix` : $3 \times 3$ transformation matrix. Specify row values separated by spaces, rows separated by slashes/commas/semicolons (e.g., `--matrix "1 -1 0 / 1 1 0 / 0 0 2"`).
124
+ * `--min-dist` : Automatically generate a supercell with minimum periodic image distance $\ge$ specified distance (in $\text{Å}$).
125
+ * `--conventional` : Automatically convert the input structure to its standard conventional representation before applying other operations.
126
+ * `--substitute` : Substitution rule. Format: `element:target_element:index_or_percentage` (e.g., `--substitute "Si:P:0"` or `--substitute "Si:Al:5%"`).
127
+ * `--vacancy` : Vacancy rule. Format: `element:index_or_count` (e.g., `--vacancy "Si:0"`, `--vacancy "O:2"`_).
128
+ * `--slab` : Miller indices $h\ k\ l$ for surface slab model creation (e.g., `--slab 1 1 1`).
129
+ * `--thick` : Slab thickness in $\text{Å}$ or layers (e.g., `--thick 15.0`).
130
+ * `--vacuum` : Vacuum layer thickness in $\text{Å}$ (e.g., `--vacuum 15.0`).
131
+
132
+ ---
133
+
134
+ ## 5. Use Cases
135
+
136
+ ### 1. Create a simple $2 \times 2 \times 3$ supercell (VASP POSCAR)
137
+ ```bash
138
+ cellify -i POSCAR -o POSCAR_223 --dim 2 2 3
139
+ ```
140
+
141
+ ### 2. Orthogonalize a hexagonal cell (Quantum ESPRESSO input)
142
+ ```bash
143
+ # Preserves &CONTROL and &SYSTEM settings, and updates nat, CELL_PARAMETERS, and ATOMIC_POSITIONS
144
+ cellify -i qe.in -o qe_ortho.in --matrix "1 -1 0 / 1 1 0 / 0 0 1"
145
+ ```
146
+
147
+ ### 3. Generate the smallest supercell keeping defect distance $\ge 15\ \text{Å}$
148
+ ```bash
149
+ cellify -i POSCAR -o POSCAR_defect_bulk --min-dist 15.0
150
+ ```
151
+
152
+ ### 4. Create a silicon supercell and replace 1 atom with Phosphorus (n-type doped model)
153
+ ```bash
154
+ cellify -i Si_unit.cif -o Si_doped.POSCAR --dim 3 3 3 --substitute "Si:P:0"
155
+ ```
156
+
157
+ ### 5. Generate a $\text{SrTiO}_3$ (100) surface slab model with $15\ \text{Å}$ vacuum
158
+ ```bash
159
+ cellify -i STO_bulk.cif -o STO_100_slab.POSCAR --slab 1 0 0 --thick 12.0 --vacuum 15.0
160
+ ```
161
+
162
+ ### 6. Convert a primitive Silicon cell to conventional cell and scale it to 2x2x2
163
+ ```bash
164
+ cellify -i Si_primitive.POSCAR -o Si_conventional_222.POSCAR --conventional --dim 2 2 2
165
+ ```
166
+
167
+ ---
168
+
169
+ ## 6. Directory Structure
170
+
171
+ This project uses the standard Python `src-layout`:
172
+
173
+ ```text
174
+ cellify/
175
+ ├── README.md
176
+ ├── NAMES.md
177
+ ├── pyproject.toml
178
+ └── src/
179
+ └── cellify/
180
+ ├── __init__.py
181
+ ├── cli.py # Command-line argument parsing and execution flow
182
+ ├── core.py # Pure geometric modeling (supercell, defect, slab creation)
183
+ └── adapters/ # Software-specific file I/O and parameter-preservation adapters
184
+ ├── __init__.py
185
+ ├── base.py # Abstract base class for I/O adapters
186
+ ├── espresso.py # Quantum ESPRESSO adapter
187
+ └── standard.py # VASP/CIF generic format adapter
188
+ ```
189
+
190
+ ---
191
+
192
+ ## 7. Technical Stack & Development Approach
193
+
194
+ 1. **Language**: **Python 3** (High affinity with scientific and DFT software ecosystems).
195
+ 2. **Core Libraries**: **pymatgen** and **ASE (Atomic Simulation Environment)**.
196
+ * **pymatgen**: Used for symmetry determination, structure analysis, defect modulations, and advanced slab generations.
197
+ * **ASE**: Used for format-free structure loading/writing and robust file parsed operations.
198
+ * Conversion between both frameworks is done seamlessly via `pymatgen.io.ase.AseAtomsAdaptor`.
199
+ 3. **Packaging**:
200
+ * Managed via `pyproject.toml` using `hatchling` as the build backend.
201
+ * Installable in editable mode using `pip install -e ".[test]"`.
202
+ * Registers `cellify` command as an entry point upon installation.
@@ -0,0 +1,11 @@
1
+ cellify/__init__.py,sha256=Fo90js_EtP3snSnFwEPc465PRIErk5egGacvFSNgA1U,130
2
+ cellify/cli.py,sha256=wl3FirQQtCurDCHLeuxeRl6JQBRQod1N7GHXm8J8FBw,7698
3
+ cellify/core.py,sha256=2PO6_dXZnQAj7fCAayxP6B76VcfgnH3_U5_yuuIs4H4,9024
4
+ cellify/adapters/__init__.py,sha256=3JsbQnuB4rgTcy_WWH-J7jb1sUNxKFBudrKXWsp1faU,746
5
+ cellify/adapters/base.py,sha256=9eWJ7QAYDf8sKmrWa8vVlJl7OpvIZw-82rKrEFR2kA4,1175
6
+ cellify/adapters/espresso.py,sha256=gKSwRUF3s5Occ50eDbHfAroPmFgPT1iXOVgCVJz8Hok,4346
7
+ cellify/adapters/standard.py,sha256=zn8NZ-iy4eDS4oMSD6tx2aAKrfD1spjsP60h8LeUfVE,842
8
+ cellify-0.1.2.dist-info/METADATA,sha256=BjkEV_epmdyzb4sTDFssKhhQhsfiEkMeI52VFnfdGE8,9610
9
+ cellify-0.1.2.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
10
+ cellify-0.1.2.dist-info/entry_points.txt,sha256=XP96fl4EbrFmoNAa3vkdO5u-5Z6nlDneZ-GHfMgcWh4,45
11
+ cellify-0.1.2.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.30.1
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ cellify = cellify.cli:main