pymdkit 1.1.2__tar.gz → 1.1.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. {pymdkit-1.1.2/src/pymdkit.egg-info → pymdkit-1.1.4}/PKG-INFO +19 -9
  2. {pymdkit-1.1.2 → pymdkit-1.1.4}/README.md +33 -23
  3. {pymdkit-1.1.2 → pymdkit-1.1.4}/pyproject.toml +1 -1
  4. {pymdkit-1.1.2 → pymdkit-1.1.4}/src/pymdkit/__init__.py +1 -1
  5. pymdkit-1.1.4/src/pymdkit/commands/electrostatic_energy.py +103 -0
  6. pymdkit-1.1.4/src/pymdkit/commands/substitute.py +452 -0
  7. {pymdkit-1.1.2 → pymdkit-1.1.4}/src/pymdkit/pymdkit_main.py +3 -3
  8. {pymdkit-1.1.2 → pymdkit-1.1.4/src/pymdkit.egg-info}/PKG-INFO +19 -9
  9. {pymdkit-1.1.2 → pymdkit-1.1.4}/src/pymdkit.egg-info/SOURCES.txt +2 -0
  10. {pymdkit-1.1.2 → pymdkit-1.1.4}/LICENSE +0 -0
  11. {pymdkit-1.1.2 → pymdkit-1.1.4}/setup.cfg +0 -0
  12. {pymdkit-1.1.2 → pymdkit-1.1.4}/src/pymdkit/commands/__init__.py +0 -0
  13. {pymdkit-1.1.2 → pymdkit-1.1.4}/src/pymdkit/commands/_fileio.py +0 -0
  14. {pymdkit-1.1.2 → pymdkit-1.1.4}/src/pymdkit/commands/_vaspset.py +0 -0
  15. {pymdkit-1.1.2 → pymdkit-1.1.4}/src/pymdkit/commands/add_groups.py +0 -0
  16. {pymdkit-1.1.2 → pymdkit-1.1.4}/src/pymdkit/commands/compute_ehull.py +0 -0
  17. {pymdkit-1.1.2 → pymdkit-1.1.4}/src/pymdkit/commands/compute_msd_all_groups.py +0 -0
  18. {pymdkit-1.1.2 → pymdkit-1.1.4}/src/pymdkit/commands/compute_rmsd.py +0 -0
  19. {pymdkit-1.1.2 → pymdkit-1.1.4}/src/pymdkit/commands/gather_contcar.py +0 -0
  20. {pymdkit-1.1.2 → pymdkit-1.1.4}/src/pymdkit/commands/outcar2xyz.py +0 -0
  21. {pymdkit-1.1.2 → pymdkit-1.1.4}/src/pymdkit/commands/select_candidate.py +0 -0
  22. {pymdkit-1.1.2 → pymdkit-1.1.4}/src/pymdkit/commands/stru2xyz.py +0 -0
  23. {pymdkit-1.1.2 → pymdkit-1.1.4}/src/pymdkit/commands/supercell.py +0 -0
  24. {pymdkit-1.1.2 → pymdkit-1.1.4}/src/pymdkit/commands/symmetrize.py +0 -0
  25. {pymdkit-1.1.2 → pymdkit-1.1.4}/src/pymdkit/commands/vasp_relax.py +0 -0
  26. {pymdkit-1.1.2 → pymdkit-1.1.4}/src/pymdkit/commands/vasp_static.py +0 -0
  27. {pymdkit-1.1.2 → pymdkit-1.1.4}/src/pymdkit.egg-info/dependency_links.txt +0 -0
  28. {pymdkit-1.1.2 → pymdkit-1.1.4}/src/pymdkit.egg-info/entry_points.txt +0 -0
  29. {pymdkit-1.1.2 → pymdkit-1.1.4}/src/pymdkit.egg-info/requires.txt +0 -0
  30. {pymdkit-1.1.2 → pymdkit-1.1.4}/src/pymdkit.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pymdkit
3
- Version: 1.1.2
3
+ Version: 1.1.4
4
4
  Summary: A unified command-line toolkit for atomistic / MD structure workflows.
5
5
  Author-email: Yueda Wang <ydwang0608@ustc.edu.cn>
6
6
  License-Expression: GPL-3.0-or-later
@@ -33,18 +33,19 @@ scripts into each working folder and running `python some_script.py`, you instal
33
33
  Every command exposes named `--flags` (no positional guessing), and each underlying
34
34
  script is still runnable on its own.
35
35
 
36
- ## Install ("compiling" the executable)
36
+ ## Install
37
37
 
38
- Python isn't compiled to a binary; the equivalent step is installing the package,
39
- which creates the `pymdkit` command on your `PATH`.
38
+ Create a clean conda environment, activate it, then install `pymdkit` with pip:
40
39
 
41
40
  ```bash
41
+ conda create -n pymdkit python=3.10
42
+ conda activate pymdkit
42
43
  pip install pymdkit
43
44
  ```
44
45
 
45
- On an HPC cluster, activate your conda env / `module load` first so `pymdkit` lands in
46
- that environment's `bin`. This installs every dependency (numpy, scipy, ase, pymatgen,
47
- pyxtal, mp_api, gemmi, tqdm), so all commands work out of the box.
46
+ This installs the `pymdkit` command into the active conda environment, together
47
+ with its dependencies (numpy, scipy, ase, pymatgen, pyxtal, mp_api, gemmi,
48
+ tqdm).
48
49
 
49
50
  Verify:
50
51
 
@@ -63,6 +64,7 @@ directory for job sub-folders automatically.
63
64
  | Command | What it does |
64
65
  |---|---|
65
66
  | `add-groups` | Tag atoms with a GPUMD group index by element order |
67
+ | `electrostatic-energy` | Compute CIF electrostatic energy with pymatgen EwaldSummation |
66
68
  | `ehull` | Auto-detect VASP job folders and compute E_hull vs Materials Project |
67
69
  | `gather-contcar` | Collect CONTCARs from VASP job folders into one folder, renamed `<folder>.vasp` |
68
70
  | `msd` | Diffusivity & conductivity from GPUMD MSD jobs (auto-scans `<structure>/<temp>/`) |
@@ -70,6 +72,7 @@ directory for job sub-folders automatically.
70
72
  | `rmsd` | Compute RMSD between two structure files, or all pairs in a folder |
71
73
  | `select-candidate` | Split a NEP training set into candidate/accurate sets by energy error |
72
74
  | `stru2xyz` | Convert structure file(s) of any format to extxyz |
75
+ | `substitute` | Randomly substitute or remove selected atoms/sites from a structure |
73
76
  | `supercell` | Build a supercell with cell lengths capped at a maximum (Angstrom); optional per-temperature GPUMD setup |
74
77
  | `symmetrize` | Import space-group symmetry into a structure file (or folder) -> CIF |
75
78
  | `vasp-relax` | Write VASP relaxation inputs for a structure (or folder); INCAR tags overridable |
@@ -99,6 +102,11 @@ pymdkit gather-contcar -of vasp-opted # CONTCARs -> vasp-opted/<fo
99
102
  pymdkit gather-contcar -of vasp-opted -ehull 0.028 # only structures with E_hull < 0.028 eV/atom
100
103
  pymdkit outcar2xyz # scans ./ for OUTCAR folders -> scf-converged.xyz
101
104
  pymdkit outcar2xyz --position-only # write positions only, without energy/forces/stress
105
+ pymdkit substitute -i Li3YCl6.cif -se Li -sn 3 -we Na -wn 3 -on 100
106
+ pymdkit substitute -i Li3YCl6.cif -se Li -sn 3 -we none -on 100
107
+ pymdkit substitute -i Li96Ta6La11Cl72.cif -se Li1 Li2 -sn 20 67 -we none -on 100
108
+ pymdkit electrostatic-energy -i Li3YCl6.cif
109
+ pymdkit electrostatic-energy -if Li3YCl6-all
102
110
  pymdkit select-candidate # RMSE bands: <low all accurate, >high all candidate, else worst 50%
103
111
  pymdkit select-candidate -r 0.8 # in the middle band, take worst 80% as candidate.xyz
104
112
  pymdkit rmsd a.cif b.cif # RMSD of two files -> rmsd.txt
@@ -154,8 +162,10 @@ pymdkit/
154
162
  |-- add_groups.py
155
163
  |-- compute_ehull.py
156
164
  |-- compute_rmsd.py
165
+ |-- electrostatic_energy.py
157
166
  |-- outcar2xyz.py
158
167
  |-- stru2xyz.py
168
+ |-- substitute.py
159
169
  |-- supercell.py
160
170
  |-- vasp_relax.py
161
171
  |-- vasp_static.py
@@ -188,8 +198,8 @@ if __name__ == "__main__": # keeps the script runnable on its own
188
198
  raise SystemExit(run(_p.parse_args()))
189
199
  ```
190
200
 
191
- It will appear in `pymdkit --help` automatically no central registration needed.
192
- Put heavy imports (pymatgen, ase, ) inside `run()` where practical; the dispatcher
201
+ It will appear in `pymdkit --help` automatically - no central registration needed.
202
+ Put heavy imports (pymatgen, ase, ...) inside `run()` where practical; the dispatcher
193
203
  reads each command's name and help without importing it, so `pymdkit --help` stays
194
204
  fast and a missing optional dependency only affects the one command that needs it.
195
205
 
@@ -8,18 +8,19 @@ scripts into each working folder and running `python some_script.py`, you instal
8
8
  Every command exposes named `--flags` (no positional guessing), and each underlying
9
9
  script is still runnable on its own.
10
10
 
11
- ## Install ("compiling" the executable)
12
-
13
- Python isn't compiled to a binary; the equivalent step is installing the package,
14
- which creates the `pymdkit` command on your `PATH`.
15
-
16
- ```bash
17
- pip install pymdkit
18
- ```
19
-
20
- On an HPC cluster, activate your conda env / `module load` first so `pymdkit` lands in
21
- that environment's `bin`. This installs every dependency (numpy, scipy, ase, pymatgen,
22
- pyxtal, mp_api, gemmi, tqdm), so all commands work out of the box.
11
+ ## Install
12
+
13
+ Create a clean conda environment, activate it, then install `pymdkit` with pip:
14
+
15
+ ```bash
16
+ conda create -n pymdkit python=3.10
17
+ conda activate pymdkit
18
+ pip install pymdkit
19
+ ```
20
+
21
+ This installs the `pymdkit` command into the active conda environment, together
22
+ with its dependencies (numpy, scipy, ase, pymatgen, pyxtal, mp_api, gemmi,
23
+ tqdm).
23
24
 
24
25
  Verify:
25
26
 
@@ -38,6 +39,7 @@ directory for job sub-folders automatically.
38
39
  | Command | What it does |
39
40
  |---|---|
40
41
  | `add-groups` | Tag atoms with a GPUMD group index by element order |
42
+ | `electrostatic-energy` | Compute CIF electrostatic energy with pymatgen EwaldSummation |
41
43
  | `ehull` | Auto-detect VASP job folders and compute E_hull vs Materials Project |
42
44
  | `gather-contcar` | Collect CONTCARs from VASP job folders into one folder, renamed `<folder>.vasp` |
43
45
  | `msd` | Diffusivity & conductivity from GPUMD MSD jobs (auto-scans `<structure>/<temp>/`) |
@@ -45,6 +47,7 @@ directory for job sub-folders automatically.
45
47
  | `rmsd` | Compute RMSD between two structure files, or all pairs in a folder |
46
48
  | `select-candidate` | Split a NEP training set into candidate/accurate sets by energy error |
47
49
  | `stru2xyz` | Convert structure file(s) of any format to extxyz |
50
+ | `substitute` | Randomly substitute or remove selected atoms/sites from a structure |
48
51
  | `supercell` | Build a supercell with cell lengths capped at a maximum (Angstrom); optional per-temperature GPUMD setup |
49
52
  | `symmetrize` | Import space-group symmetry into a structure file (or folder) -> CIF |
50
53
  | `vasp-relax` | Write VASP relaxation inputs for a structure (or folder); INCAR tags overridable |
@@ -74,18 +77,23 @@ pymdkit gather-contcar -of vasp-opted # CONTCARs -> vasp-opted/<fo
74
77
  pymdkit gather-contcar -of vasp-opted -ehull 0.028 # only structures with E_hull < 0.028 eV/atom
75
78
  pymdkit outcar2xyz # scans ./ for OUTCAR folders -> scf-converged.xyz
76
79
  pymdkit outcar2xyz --position-only # write positions only, without energy/forces/stress
80
+ pymdkit substitute -i Li3YCl6.cif -se Li -sn 3 -we Na -wn 3 -on 100
81
+ pymdkit substitute -i Li3YCl6.cif -se Li -sn 3 -we none -on 100
82
+ pymdkit substitute -i Li96Ta6La11Cl72.cif -se Li1 Li2 -sn 20 67 -we none -on 100
83
+ pymdkit electrostatic-energy -i Li3YCl6.cif
84
+ pymdkit electrostatic-energy -if Li3YCl6-all
77
85
  pymdkit select-candidate # RMSE bands: <low all accurate, >high all candidate, else worst 50%
78
86
  pymdkit select-candidate -r 0.8 # in the middle band, take worst 80% as candidate.xyz
79
87
  pymdkit rmsd a.cif b.cif # RMSD of two files -> rmsd.txt
80
88
  pymdkit rmsd vasp-opted/ # all pairs in a folder -> rmsd.txt
81
- pymdkit symmetrize -i opted.cif --symprec 0.01 --add_oxidation yes -o opted-symm.cif
82
- pymdkit symmetrize -if my_cifs/ --symprec 0.01 --add_oxidation no -of my_cifs-symm
89
+ pymdkit symmetrize -i opted.cif --symprec 0.01 --add_oxidation yes -o opted-symm.cif
90
+ pymdkit symmetrize -if my_cifs/ --symprec 0.01 --add_oxidation no -of my_cifs-symm
83
91
  ```
84
92
 
85
93
  VASP input commands (`vasp-relax`, `vasp-static`) always produce **individual**
86
94
  jobs (one structure per folder): `-i` writes into the current dir (or `-o`),
87
95
  `-if` creates one `./<name>/` folder per structure, and `-it` creates one
88
- `./frame_N/` folder per trajectory frame - all directly in the current path.
96
+ `./frame_N/` folder per trajectory frame - all directly in the current path.
89
97
 
90
98
  They start from sensible default INCAR settings; override them by passing a
91
99
  settings file with `-custom-setting FILE`. The file may be a Python-dict block
@@ -101,7 +109,7 @@ custom_settings = {
101
109
 
102
110
  `vasp-static -it traj.xyz` (also available on `vasp-relax`) reads a
103
111
  multi-structure trajectory and writes one job sub-folder per frame
104
- (`frame_1/`, `frame_2/`, ..., prefix configurable via `--frame-prefix`). Each
112
+ (`frame_1/`, `frame_2/`, ..., prefix configurable via `--frame-prefix`). Each
105
113
  folder also keeps a `frame_N.xyz`, so `Config_type` survives for a later
106
114
  `outcar2xyz`.
107
115
 
@@ -110,7 +118,7 @@ Each command's full flag list is in `pymdkit <command> --help`.
110
118
  `ehull` auto-detects every sub-folder of the current path that contains a
111
119
  `vasprun.xml`, groups them by chemical system (elements ordered by
112
120
  electronegativity, e.g. `Li-Y-Cl`), and builds/reuses one `mp_cache_<system>.json`
113
- per system - so a pure Li-Y-Cl batch yields a single `mp_cache_Li-Y-Cl.json`, while
121
+ per system - so a pure Li-Y-Cl batch yields a single `mp_cache_Li-Y-Cl.json`, while
114
122
  a mixed Li-Y-Cl + La-O batch yields both `mp_cache_Li-Y-Cl.json` and
115
123
  `mp_cache_La-O.json`. (Formation energy is reported alongside E_hull in
116
124
  `ehull.txt`.)
@@ -119,23 +127,25 @@ a mixed Li-Y-Cl + La-O batch yields both `mp_cache_Li-Y-Cl.json` and
119
127
 
120
128
  ```
121
129
  pymdkit/
122
- |-- pyproject.toml # package metadata + the `pymdkit` entry point
130
+ |-- pyproject.toml # package metadata + the `pymdkit` entry point
123
131
  |-- README.md
124
- `-- src/pymdkit/
132
+ `-- src/pymdkit/
125
133
  |-- pymdkit_main.py # dispatcher: discovers and runs commands
126
- `-- commands/ # one module per command
134
+ `-- commands/ # one module per command
127
135
  |-- _fileio.py # shared -i/-o/-if/-of helper (not a command)
128
136
  |-- _vaspset.py # shared VASP input-set helper (not a command)
129
137
  |-- add_groups.py
130
138
  |-- compute_ehull.py
131
139
  |-- compute_rmsd.py
140
+ |-- electrostatic_energy.py
132
141
  |-- outcar2xyz.py
133
142
  |-- stru2xyz.py
143
+ |-- substitute.py
134
144
  |-- supercell.py
135
145
  |-- vasp_relax.py
136
146
  |-- vasp_static.py
137
147
  |-- ...
138
- `-- symmetrize.py
148
+ `-- symmetrize.py
139
149
  ```
140
150
 
141
151
  Modules whose name starts with `_` are shared helpers and are skipped by the
@@ -163,8 +173,8 @@ if __name__ == "__main__": # keeps the script runnable on its own
163
173
  raise SystemExit(run(_p.parse_args()))
164
174
  ```
165
175
 
166
- It will appear in `pymdkit --help` automatically no central registration needed.
167
- Put heavy imports (pymatgen, ase, ) inside `run()` where practical; the dispatcher
176
+ It will appear in `pymdkit --help` automatically - no central registration needed.
177
+ Put heavy imports (pymatgen, ase, ...) inside `run()` where practical; the dispatcher
168
178
  reads each command's name and help without importing it, so `pymdkit --help` stays
169
179
  fast and a missing optional dependency only affects the one command that needs it.
170
180
 
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "pymdkit"
7
- version = "1.1.2"
7
+ version = "1.1.4"
8
8
  description = "A unified command-line toolkit for atomistic / MD structure workflows."
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.9"
@@ -1,3 +1,3 @@
1
1
  """pymdkit -- a unified CLI for atomistic / MD structure workflows."""
2
2
 
3
- __version__ = "1.1.2"
3
+ __version__ = "1.1.4"
@@ -0,0 +1,103 @@
1
+ """
2
+ Compute electrostatic energy with pymatgen's EwaldSummation.
3
+
4
+ Examples:
5
+ pymdkit electrostatic-energy -i Li3YCl6.cif
6
+ pymdkit electrostatic-energy -if Li3YCl6-all
7
+
8
+ Input files must be CIF files containing oxidation-state information, such as
9
+ an _atom_type_oxidation_number loop.
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ import argparse
15
+ from pathlib import Path
16
+
17
+ COMMAND = "electrostatic-energy"
18
+ HELP = "Compute CIF electrostatic energy using pymatgen EwaldSummation."
19
+
20
+
21
+ def read_charged_structure(path):
22
+ from pymatgen.core import Structure
23
+
24
+ structure = Structure.from_file(str(path))
25
+ missing = []
26
+ for site in structure:
27
+ for specie in site.species:
28
+ if getattr(specie, "oxi_state", None) is None:
29
+ missing.append(str(specie))
30
+ if missing:
31
+ raise ValueError(
32
+ "missing oxidation states; CIF must include "
33
+ "_atom_type_oxidation_number")
34
+ return structure
35
+
36
+
37
+ def electrostatic_energy(path):
38
+ from pymatgen.analysis.ewald import EwaldSummation
39
+
40
+ structure = read_charged_structure(path)
41
+ return float(EwaldSummation(structure).total_energy)
42
+
43
+
44
+ def cif_files(folder):
45
+ folder = Path(folder)
46
+ if not folder.is_dir():
47
+ raise SystemExit(f"Error: input folder not found: {folder}")
48
+ files = sorted(p for p in folder.iterdir()
49
+ if p.is_file() and p.suffix.lower() == ".cif")
50
+ if not files:
51
+ raise SystemExit(f"Error: no CIF files found in {folder}")
52
+ return files
53
+
54
+
55
+ def add_arguments(parser: argparse.ArgumentParser) -> None:
56
+ parser.add_argument("-i", "--input", help="Single input CIF file.")
57
+ parser.add_argument("-if", "--input-folder", dest="input_folder",
58
+ help="Folder containing input CIF files.")
59
+ parser.add_argument("-o", "--output", default="electrostatic-energy.txt",
60
+ help="Output text filename (default: electrostatic-energy.txt).")
61
+
62
+
63
+ def run(args) -> int:
64
+ if bool(args.input) == bool(args.input_folder):
65
+ print("Error: provide exactly one of -i/--input or -if/--input-folder.")
66
+ return 1
67
+
68
+ if args.input:
69
+ input_path = Path(args.input)
70
+ if input_path.suffix.lower() != ".cif":
71
+ print("Error: electrostatic-energy requires CIF input.")
72
+ return 1
73
+ output_path = input_path.with_name(args.output)
74
+ files = [input_path]
75
+ else:
76
+ input_folder = Path(args.input_folder)
77
+ output_path = input_folder / args.output
78
+ files = cif_files(input_folder)
79
+
80
+ results = []
81
+ for path in files:
82
+ try:
83
+ energy = electrostatic_energy(path)
84
+ except Exception as exc: # noqa: BLE001 - report per-file failures
85
+ print(f"{path.name}: error ({exc})")
86
+ continue
87
+ results.append((path.name, energy))
88
+ print(f"{path.name}\t{energy:.3f} eV")
89
+
90
+ if args.input_folder:
91
+ results.sort(key=lambda item: (item[1], item[0]))
92
+
93
+ lines = [f"{name}\t{energy:.3f} eV" for name, energy in results]
94
+ output_path.parent.mkdir(parents=True, exist_ok=True)
95
+ output_path.write_text("\n".join(lines) + ("\n" if lines else ""))
96
+ print(f"Wrote {len(lines)} result(s) to {output_path}")
97
+ return 0 if lines else 1
98
+
99
+
100
+ if __name__ == "__main__":
101
+ _p = argparse.ArgumentParser(description=__doc__)
102
+ add_arguments(_p)
103
+ raise SystemExit(run(_p.parse_args()))
@@ -0,0 +1,452 @@
1
+ """
2
+ Randomly substitute or remove selected atoms from a structure.
3
+
4
+ Examples:
5
+ pymdkit substitute -i Li3YCl6.cif -se Li -sn 3 -we Na -wn 3 -on 100
6
+ pymdkit substitute -i Li3YCl6.cif -se Li -sn 3 -we none -on 100
7
+ pymdkit substitute -i Li96Ta6La11Cl72.cif -se Li1 Li2 -sn 20 67 -we none -on 100
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import argparse
13
+ import itertools
14
+ import math
15
+ import random
16
+ import re
17
+ import shlex
18
+ from pathlib import Path
19
+
20
+ COMMAND = "substitute"
21
+ HELP = "Randomly substitute or remove selected atoms from a structure."
22
+
23
+
24
+ def _clean_token(token):
25
+ token = token.strip()
26
+ if len(token) >= 2 and token[0] == token[-1] and token[0] in {"'", '"'}:
27
+ return token[1:-1]
28
+ return token
29
+
30
+
31
+ def _float_token(token, default=None):
32
+ token = _clean_token(token)
33
+ if token in {"", ".", "?"}:
34
+ return default
35
+ token = token.split("(", 1)[0]
36
+ try:
37
+ return float(token)
38
+ except ValueError:
39
+ return default
40
+
41
+
42
+ def _frac_distance(a, b):
43
+ import numpy as np
44
+
45
+ diff = np.asarray(a, dtype=float) - np.asarray(b, dtype=float)
46
+ diff -= np.round(diff)
47
+ return float(np.linalg.norm(diff))
48
+
49
+
50
+ def _tokenize(line):
51
+ lexer = shlex.shlex(line, posix=True)
52
+ lexer.whitespace_split = True
53
+ lexer.commenters = ""
54
+ return list(lexer)
55
+
56
+
57
+ def _element_from_label(label):
58
+ m = re.match(r"[A-Z][a-z]?", label)
59
+ return m.group(0) if m else label
60
+
61
+
62
+ def _eval_symop_part(expr, x, y, z):
63
+ expr = expr.strip().lower()
64
+ if not re.fullmatch(r"[xyz0-9+\-./ ]+", expr):
65
+ raise ValueError(f"unsupported symmetry expression: {expr}")
66
+ return eval(expr, {"__builtins__": {}}, {"x": x, "y": y, "z": z}) % 1.0
67
+
68
+
69
+ def _apply_symop(op, coords):
70
+ parts = [part.strip() for part in op.strip().strip("'\"").split(",")]
71
+ if len(parts) != 3:
72
+ return coords
73
+ x, y, z = coords
74
+ return tuple(_eval_symop_part(part, x, y, z) for part in parts)
75
+
76
+
77
+ def _dedupe_label_records(records, tol=1e-4):
78
+ unique = []
79
+ for rec in records:
80
+ duplicate = False
81
+ for other in unique:
82
+ if (rec["label"] == other["label"]
83
+ and rec["symbol"] == other["symbol"]
84
+ and _frac_distance(rec["coords"], other["coords"]) <= tol):
85
+ duplicate = True
86
+ break
87
+ if not duplicate:
88
+ unique.append(rec)
89
+ return unique
90
+
91
+
92
+ def read_cif_labels(path):
93
+ """Return CIF atom-site labels with fractional coordinates, if available."""
94
+ path = Path(path)
95
+ if path.suffix.lower() != ".cif":
96
+ return []
97
+
98
+ try:
99
+ lines = path.read_text(encoding="utf-8", errors="replace").splitlines()
100
+ except OSError:
101
+ return []
102
+
103
+ records = []
104
+ symops = []
105
+ i = 0
106
+ while i < len(lines):
107
+ if lines[i].strip().lower() != "loop_":
108
+ i += 1
109
+ continue
110
+
111
+ i += 1
112
+ headers = []
113
+ while i < len(lines) and lines[i].strip().startswith("_"):
114
+ headers.append(lines[i].strip())
115
+ i += 1
116
+
117
+ if ("_symmetry_equiv_pos_as_xyz" in headers
118
+ or "_space_group_symop_operation_xyz" in headers):
119
+ op_i = (headers.index("_symmetry_equiv_pos_as_xyz")
120
+ if "_symmetry_equiv_pos_as_xyz" in headers
121
+ else headers.index("_space_group_symop_operation_xyz"))
122
+ while i < len(lines):
123
+ stripped = lines[i].strip()
124
+ if (not stripped or stripped.lower() == "loop_"
125
+ or stripped.startswith("_")
126
+ or stripped.lower().startswith("data_")):
127
+ break
128
+ tokens = _tokenize(stripped)
129
+ if len(tokens) > op_i:
130
+ symops.append(_clean_token(tokens[op_i]))
131
+ i += 1
132
+ continue
133
+
134
+ if "_atom_site_label" not in headers:
135
+ continue
136
+
137
+ def idx(name):
138
+ return headers.index(name) if name in headers else None
139
+
140
+ label_i = idx("_atom_site_label")
141
+ sym_i = idx("_atom_site_type_symbol")
142
+ x_i = idx("_atom_site_fract_x")
143
+ y_i = idx("_atom_site_fract_y")
144
+ z_i = idx("_atom_site_fract_z")
145
+ if None in {label_i, x_i, y_i, z_i}:
146
+ continue
147
+
148
+ while i < len(lines):
149
+ stripped = lines[i].strip()
150
+ if (not stripped or stripped.lower() == "loop_"
151
+ or stripped.startswith("_")
152
+ or stripped.lower().startswith("data_")):
153
+ break
154
+ tokens = _tokenize(stripped)
155
+ if len(tokens) >= len(headers):
156
+ label = _clean_token(tokens[label_i])
157
+ symbol = (_clean_token(tokens[sym_i]) if sym_i is not None
158
+ else _element_from_label(label))
159
+ coords = (
160
+ _float_token(tokens[x_i]),
161
+ _float_token(tokens[y_i]),
162
+ _float_token(tokens[z_i]),
163
+ )
164
+ if label and all(v is not None for v in coords):
165
+ for expanded in (symops or ["x,y,z"]):
166
+ try:
167
+ expanded_coords = _apply_symop(expanded, coords)
168
+ except Exception:
169
+ expanded_coords = coords
170
+ records.append({
171
+ "label": label,
172
+ "symbol": symbol,
173
+ "coords": expanded_coords,
174
+ })
175
+ i += 1
176
+
177
+ return _dedupe_label_records(records)
178
+
179
+
180
+ def read_structure(path):
181
+ """Read any pymatgen-readable structure, falling back to ASE."""
182
+ from pymatgen.core import Structure
183
+
184
+ path = Path(path)
185
+ try:
186
+ return Structure.from_file(str(path))
187
+ except Exception:
188
+ from ase.io import read as ase_read
189
+ from pymatgen.io.ase import AseAtomsAdaptor
190
+
191
+ atoms = ase_read(str(path))
192
+ return AseAtomsAdaptor.get_structure(atoms)
193
+
194
+
195
+ def write_structure(structure, path):
196
+ """Write output. CIF is the default because it is stable for substitutions."""
197
+ from pymatgen.io.cif import CifWriter
198
+
199
+ path = Path(path)
200
+ path.parent.mkdir(parents=True, exist_ok=True)
201
+ if path.suffix.lower() == ".cif":
202
+ CifWriter(structure).write_file(str(path))
203
+ else:
204
+ structure.to(filename=str(path))
205
+
206
+
207
+ def site_symbols(structure):
208
+ return [site.specie.symbol for site in structure]
209
+
210
+
211
+ def map_cif_labels_to_sites(structure, label_records, tol=1e-3):
212
+ labels = [None] * len(structure)
213
+ used = set()
214
+ for idx, site in enumerate(structure):
215
+ best_rec = None
216
+ best_dist = float("inf")
217
+ for rec_idx, rec in enumerate(label_records):
218
+ if rec_idx in used or rec["symbol"] != site.specie.symbol:
219
+ continue
220
+ dist = _frac_distance(site.frac_coords, rec["coords"])
221
+ if dist < best_dist:
222
+ best_dist = dist
223
+ best_rec = rec_idx
224
+ if best_rec is not None and best_dist <= tol:
225
+ used.add(best_rec)
226
+ labels[idx] = label_records[best_rec]["label"]
227
+ return labels
228
+
229
+
230
+ def target_indices(structure, selectors, input_path):
231
+ symbols = site_symbols(structure)
232
+ labels = map_cif_labels_to_sites(structure, read_cif_labels(input_path))
233
+ result = []
234
+ for selector in selectors:
235
+ if any(label == selector for label in labels):
236
+ result.append([i for i, label in enumerate(labels) if label == selector])
237
+ else:
238
+ result.append([i for i, sym in enumerate(symbols) if sym == selector])
239
+ return result
240
+
241
+
242
+ def choose_combinations(groups, counts, output_number):
243
+ total = math.prod(math.comb(len(group), count)
244
+ for group, count in zip(groups, counts))
245
+ if total == 0:
246
+ return [], total
247
+
248
+ if total <= output_number:
249
+ pools = [itertools.combinations(group, count)
250
+ for group, count in zip(groups, counts)]
251
+ combos = [tuple(sorted(itertools.chain.from_iterable(parts)))
252
+ for parts in itertools.product(*pools)]
253
+ random.shuffle(combos)
254
+ return combos, total
255
+
256
+ seen = set()
257
+ max_attempts = max(output_number * 100, 1000)
258
+ attempts = 0
259
+ while len(seen) < output_number and attempts < max_attempts:
260
+ attempts += 1
261
+ selected = []
262
+ for group, count in zip(groups, counts):
263
+ selected.extend(random.sample(group, count))
264
+ seen.add(tuple(sorted(selected)))
265
+
266
+ if len(seen) < output_number:
267
+ pools = [itertools.combinations(group, count)
268
+ for group, count in zip(groups, counts)]
269
+ for parts in itertools.product(*pools):
270
+ seen.add(tuple(sorted(itertools.chain.from_iterable(parts))))
271
+ if len(seen) >= output_number:
272
+ break
273
+
274
+ combos = list(seen)
275
+ random.shuffle(combos)
276
+ return combos[:output_number], total
277
+
278
+
279
+ def _multinomial_count(items):
280
+ counts = {}
281
+ for item in items:
282
+ counts[item] = counts.get(item, 0) + 1
283
+ total = math.factorial(len(items))
284
+ for count in counts.values():
285
+ total //= math.factorial(count)
286
+ return total
287
+
288
+
289
+ def _unique_permutations(items):
290
+ return sorted(set(itertools.permutations(items)))
291
+
292
+
293
+ def _random_replacement_assignment(selected, replacements):
294
+ repl = list(replacements)
295
+ random.shuffle(repl)
296
+ return tuple(sorted(zip(selected, repl)))
297
+
298
+
299
+ def choose_operations(groups, counts, replacements, output_number):
300
+ """Return unique remove/replace operations and the total unique count."""
301
+ combos, selection_total = choose_combinations(groups, counts, output_number)
302
+ if replacements is None:
303
+ return [("remove", combo) for combo in combos], selection_total
304
+
305
+ arrangement_total = _multinomial_count(replacements)
306
+ total = selection_total * arrangement_total
307
+
308
+ if total <= output_number:
309
+ all_selection_combos, _ = choose_combinations(groups, counts, selection_total)
310
+ operations = []
311
+ for combo in all_selection_combos:
312
+ for assignment in _unique_permutations(replacements):
313
+ operations.append(("replace", tuple(sorted(zip(combo, assignment)))))
314
+ random.shuffle(operations)
315
+ return operations, total
316
+
317
+ seen = set()
318
+ max_attempts = max(output_number * 100, 1000)
319
+ attempts = 0
320
+ while len(seen) < output_number and attempts < max_attempts:
321
+ attempts += 1
322
+ selected = []
323
+ for group, count in zip(groups, counts):
324
+ selected.extend(random.sample(group, count))
325
+ selected = tuple(sorted(selected))
326
+ seen.add(("replace", _random_replacement_assignment(selected, replacements)))
327
+
328
+ if len(seen) < output_number:
329
+ all_selection_combos, _ = choose_combinations(groups, counts, selection_total)
330
+ for combo in all_selection_combos:
331
+ for assignment in _unique_permutations(replacements):
332
+ seen.add(("replace", tuple(sorted(zip(combo, assignment)))))
333
+ if len(seen) >= output_number:
334
+ break
335
+ if len(seen) >= output_number:
336
+ break
337
+
338
+ operations = list(seen)
339
+ random.shuffle(operations)
340
+ return operations[:output_number], total
341
+
342
+
343
+ def replacement_plan(args, selected_count):
344
+ if len(args.with_element) == 1 and args.with_element[0].lower() == "none":
345
+ return None
346
+
347
+ if not args.with_number:
348
+ if len(args.with_element) == 1:
349
+ return [args.with_element[0]] * selected_count
350
+ raise SystemExit("Error: provide -wn/--with-number for multiple replacement elements.")
351
+
352
+ if len(args.with_number) != len(args.with_element):
353
+ raise SystemExit("Error: -wn must have the same length as -we.")
354
+
355
+ if sum(args.with_number) != selected_count:
356
+ raise SystemExit("Error: sum(-wn) must equal sum(-sn).")
357
+
358
+ replacements = []
359
+ for element, count in zip(args.with_element, args.with_number):
360
+ replacements.extend([element] * count)
361
+ return replacements
362
+
363
+
364
+ def apply_operation(structure, operation):
365
+ new_structure = structure.copy()
366
+ mode, payload = operation
367
+ if mode == "remove":
368
+ selected = list(payload)
369
+ new_structure.remove_sites(sorted(selected, reverse=True))
370
+ return new_structure
371
+
372
+ for site_idx, element in payload:
373
+ new_structure.replace(site_idx, element)
374
+ return new_structure
375
+
376
+
377
+ def substitute_one(input_path, output_dir, args):
378
+ structure = read_structure(input_path)
379
+ groups = target_indices(structure, args.select_element, input_path)
380
+
381
+ for selector, group, count in zip(args.select_element, groups, args.select_number):
382
+ if len(group) < count:
383
+ raise SystemExit(
384
+ f"Error: selector {selector!r} has {len(group)} matching site(s), "
385
+ f"but -sn requests {count}.")
386
+
387
+ selected_count = sum(args.select_number)
388
+ replacements = replacement_plan(args, selected_count)
389
+ operations, total = choose_operations(
390
+ groups, args.select_number, replacements, args.output_number)
391
+ if not operations:
392
+ print(f"No combinations generated for {input_path}.")
393
+ return 0
394
+
395
+ output_dir.mkdir(parents=True, exist_ok=True)
396
+ stem = Path(input_path).stem
397
+ suffix = args.output_format
398
+ for idx, operation in enumerate(operations, start=1):
399
+ new_structure = apply_operation(structure, operation)
400
+ write_structure(new_structure, output_dir / f"{stem}_r{idx}.{suffix}")
401
+
402
+ print(f"{input_path}: total unique combinations = {total}")
403
+ print(f"{input_path}: wrote {len(operations)} unique structure(s) -> {output_dir}/")
404
+ return len(operations)
405
+
406
+
407
+ def add_arguments(parser: argparse.ArgumentParser) -> None:
408
+ parser.add_argument("-i", "--input", required=True,
409
+ help="Input structure file.")
410
+ parser.add_argument("-se", "--select-element", nargs="+", required=True,
411
+ help="Element symbols or CIF atom-site labels to select, e.g. Li or Li1 Li2.")
412
+ parser.add_argument("-sn", "--select-number", nargs="+", type=int, required=True,
413
+ help="Number of selected atoms/sites for each -se selector.")
414
+ parser.add_argument("-we", "--with-element", nargs="+", required=True,
415
+ help="Replacement element(s), or 'none' to remove selected atoms.")
416
+ parser.add_argument("-wn", "--with-number", nargs="+", type=int,
417
+ help="Replacement count(s), required for multiple -we values.")
418
+ parser.add_argument("-on", "--output-number", type=int, required=True,
419
+ help="Number of random substituted/removed structures to output.")
420
+ parser.add_argument("-o", "--output-folder",
421
+ help="Output folder (default: <input-stem>_substitute).")
422
+ parser.add_argument("--output-format", default="cif",
423
+ help="Output file format/extension (default: cif).")
424
+ parser.add_argument("--seed", type=int,
425
+ help="Random seed for reproducible selections.")
426
+
427
+
428
+ def run(args) -> int:
429
+ if len(args.select_number) != len(args.select_element):
430
+ print("Error: -sn must have the same length as -se.")
431
+ return 1
432
+ if args.output_number <= 0:
433
+ print("Error: -on/--output-number must be positive.")
434
+ return 1
435
+ if args.seed is not None:
436
+ random.seed(args.seed)
437
+
438
+ input_path = Path(args.input)
439
+ if not input_path.is_file():
440
+ print(f"Input file not found: {input_path}")
441
+ return 1
442
+
443
+ output_dir = (Path(args.output_folder) if args.output_folder
444
+ else input_path.with_name(input_path.stem + "_substitute"))
445
+ substitute_one(input_path, output_dir, args)
446
+ return 0
447
+
448
+
449
+ if __name__ == "__main__":
450
+ _p = argparse.ArgumentParser(description=__doc__)
451
+ add_arguments(_p)
452
+ raise SystemExit(run(_p.parse_args()))
@@ -69,10 +69,10 @@ def _discover() -> Dict[str, Tuple[str, str]]:
69
69
 
70
70
  def _build_top_parser(cmds: Dict[str, Tuple[str, str]]) -> argparse.ArgumentParser:
71
71
  parser = argparse.ArgumentParser(
72
- prog="pymdkit",
73
- description="Unified CLI for atomistic / MD structure workflows.",
72
+ prog=f"pymdkit {__version__}",
73
+ description="Available commands:",
74
74
  )
75
- parser.add_argument("--version", action="version", version=f"%(prog)s {__version__}")
75
+ parser.add_argument("--version", action="version", version=f"pymdkit {__version__}")
76
76
  sub = parser.add_subparsers(dest="command", metavar="<command>")
77
77
  for name in sorted(cmds):
78
78
  sub.add_parser(name, help=cmds[name][1], add_help=False)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pymdkit
3
- Version: 1.1.2
3
+ Version: 1.1.4
4
4
  Summary: A unified command-line toolkit for atomistic / MD structure workflows.
5
5
  Author-email: Yueda Wang <ydwang0608@ustc.edu.cn>
6
6
  License-Expression: GPL-3.0-or-later
@@ -33,18 +33,19 @@ scripts into each working folder and running `python some_script.py`, you instal
33
33
  Every command exposes named `--flags` (no positional guessing), and each underlying
34
34
  script is still runnable on its own.
35
35
 
36
- ## Install ("compiling" the executable)
36
+ ## Install
37
37
 
38
- Python isn't compiled to a binary; the equivalent step is installing the package,
39
- which creates the `pymdkit` command on your `PATH`.
38
+ Create a clean conda environment, activate it, then install `pymdkit` with pip:
40
39
 
41
40
  ```bash
41
+ conda create -n pymdkit python=3.10
42
+ conda activate pymdkit
42
43
  pip install pymdkit
43
44
  ```
44
45
 
45
- On an HPC cluster, activate your conda env / `module load` first so `pymdkit` lands in
46
- that environment's `bin`. This installs every dependency (numpy, scipy, ase, pymatgen,
47
- pyxtal, mp_api, gemmi, tqdm), so all commands work out of the box.
46
+ This installs the `pymdkit` command into the active conda environment, together
47
+ with its dependencies (numpy, scipy, ase, pymatgen, pyxtal, mp_api, gemmi,
48
+ tqdm).
48
49
 
49
50
  Verify:
50
51
 
@@ -63,6 +64,7 @@ directory for job sub-folders automatically.
63
64
  | Command | What it does |
64
65
  |---|---|
65
66
  | `add-groups` | Tag atoms with a GPUMD group index by element order |
67
+ | `electrostatic-energy` | Compute CIF electrostatic energy with pymatgen EwaldSummation |
66
68
  | `ehull` | Auto-detect VASP job folders and compute E_hull vs Materials Project |
67
69
  | `gather-contcar` | Collect CONTCARs from VASP job folders into one folder, renamed `<folder>.vasp` |
68
70
  | `msd` | Diffusivity & conductivity from GPUMD MSD jobs (auto-scans `<structure>/<temp>/`) |
@@ -70,6 +72,7 @@ directory for job sub-folders automatically.
70
72
  | `rmsd` | Compute RMSD between two structure files, or all pairs in a folder |
71
73
  | `select-candidate` | Split a NEP training set into candidate/accurate sets by energy error |
72
74
  | `stru2xyz` | Convert structure file(s) of any format to extxyz |
75
+ | `substitute` | Randomly substitute or remove selected atoms/sites from a structure |
73
76
  | `supercell` | Build a supercell with cell lengths capped at a maximum (Angstrom); optional per-temperature GPUMD setup |
74
77
  | `symmetrize` | Import space-group symmetry into a structure file (or folder) -> CIF |
75
78
  | `vasp-relax` | Write VASP relaxation inputs for a structure (or folder); INCAR tags overridable |
@@ -99,6 +102,11 @@ pymdkit gather-contcar -of vasp-opted # CONTCARs -> vasp-opted/<fo
99
102
  pymdkit gather-contcar -of vasp-opted -ehull 0.028 # only structures with E_hull < 0.028 eV/atom
100
103
  pymdkit outcar2xyz # scans ./ for OUTCAR folders -> scf-converged.xyz
101
104
  pymdkit outcar2xyz --position-only # write positions only, without energy/forces/stress
105
+ pymdkit substitute -i Li3YCl6.cif -se Li -sn 3 -we Na -wn 3 -on 100
106
+ pymdkit substitute -i Li3YCl6.cif -se Li -sn 3 -we none -on 100
107
+ pymdkit substitute -i Li96Ta6La11Cl72.cif -se Li1 Li2 -sn 20 67 -we none -on 100
108
+ pymdkit electrostatic-energy -i Li3YCl6.cif
109
+ pymdkit electrostatic-energy -if Li3YCl6-all
102
110
  pymdkit select-candidate # RMSE bands: <low all accurate, >high all candidate, else worst 50%
103
111
  pymdkit select-candidate -r 0.8 # in the middle band, take worst 80% as candidate.xyz
104
112
  pymdkit rmsd a.cif b.cif # RMSD of two files -> rmsd.txt
@@ -154,8 +162,10 @@ pymdkit/
154
162
  |-- add_groups.py
155
163
  |-- compute_ehull.py
156
164
  |-- compute_rmsd.py
165
+ |-- electrostatic_energy.py
157
166
  |-- outcar2xyz.py
158
167
  |-- stru2xyz.py
168
+ |-- substitute.py
159
169
  |-- supercell.py
160
170
  |-- vasp_relax.py
161
171
  |-- vasp_static.py
@@ -188,8 +198,8 @@ if __name__ == "__main__": # keeps the script runnable on its own
188
198
  raise SystemExit(run(_p.parse_args()))
189
199
  ```
190
200
 
191
- It will appear in `pymdkit --help` automatically no central registration needed.
192
- Put heavy imports (pymatgen, ase, ) inside `run()` where practical; the dispatcher
201
+ It will appear in `pymdkit --help` automatically - no central registration needed.
202
+ Put heavy imports (pymatgen, ase, ...) inside `run()` where practical; the dispatcher
193
203
  reads each command's name and help without importing it, so `pymdkit --help` stays
194
204
  fast and a missing optional dependency only affects the one command that needs it.
195
205
 
@@ -16,10 +16,12 @@ src/pymdkit/commands/add_groups.py
16
16
  src/pymdkit/commands/compute_ehull.py
17
17
  src/pymdkit/commands/compute_msd_all_groups.py
18
18
  src/pymdkit/commands/compute_rmsd.py
19
+ src/pymdkit/commands/electrostatic_energy.py
19
20
  src/pymdkit/commands/gather_contcar.py
20
21
  src/pymdkit/commands/outcar2xyz.py
21
22
  src/pymdkit/commands/select_candidate.py
22
23
  src/pymdkit/commands/stru2xyz.py
24
+ src/pymdkit/commands/substitute.py
23
25
  src/pymdkit/commands/supercell.py
24
26
  src/pymdkit/commands/symmetrize.py
25
27
  src/pymdkit/commands/vasp_relax.py
File without changes
File without changes