csslib 1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- csslib/__init__.py +386 -0
- csslib/config_logging.py +94 -0
- csslib/utils.py +101 -0
- csslib-1.0.dist-info/METADATA +49 -0
- csslib-1.0.dist-info/RECORD +7 -0
- csslib-1.0.dist-info/WHEEL +5 -0
- csslib-1.0.dist-info/top_level.txt +1 -0
csslib/__init__.py
ADDED
|
@@ -0,0 +1,386 @@
|
|
|
1
|
+
from math import prod
|
|
2
|
+
from itertools import product
|
|
3
|
+
import os
|
|
4
|
+
import json
|
|
5
|
+
from tqdm import tqdm
|
|
6
|
+
import pandas as pd
|
|
7
|
+
from copy import deepcopy
|
|
8
|
+
import zipfile
|
|
9
|
+
import re
|
|
10
|
+
from collections import Counter
|
|
11
|
+
from concurrent.futures import ProcessPoolExecutor, as_completed
|
|
12
|
+
import subprocess
|
|
13
|
+
from pymatgen.core import Structure
|
|
14
|
+
from pymatgen.symmetry.analyzer import SpacegroupAnalyzer
|
|
15
|
+
from pymatgen.io.cif import CifParser, CifBlock
|
|
16
|
+
from pymatgen.analysis.defects.generators import VoronoiInterstitialGenerator
|
|
17
|
+
import warnings
|
|
18
|
+
from csslib.config_logging import get_main_logger, get_supercell_worker_logger, get_collect_worker_logger
|
|
19
|
+
import sys
|
|
20
|
+
|
|
21
|
+
warnings.filterwarnings("ignore")
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class CSS:
|
|
25
|
+
_RESULTS_DIR = "results"
|
|
26
|
+
_SUPERCELL_INPUT_CIFS_DIR = "disordered_structures"
|
|
27
|
+
_SUPERCELL_OUTPUT_DIR = "ordered_representations"
|
|
28
|
+
_ORDERED_REPRESENTATIONS_METADATA_DIR = "ordered_representations_metadata"
|
|
29
|
+
|
|
30
|
+
def __init__(self, config_filename: str) -> None:
|
|
31
|
+
with open(config_filename) as f:
|
|
32
|
+
self.config = json.load(f)
|
|
33
|
+
self._result_path = os.path.join(self._RESULTS_DIR, self.config["result_dir"])
|
|
34
|
+
self._supercell_input_cifs_path = os.path.join(self._result_path, self._SUPERCELL_INPUT_CIFS_DIR)
|
|
35
|
+
self._supercell_output_path = os.path.join(self._result_path, self._SUPERCELL_OUTPUT_DIR)
|
|
36
|
+
self._ordered_representations_metadata_path = os.path.join(self._result_path,
|
|
37
|
+
self._ORDERED_REPRESENTATIONS_METADATA_DIR)
|
|
38
|
+
os.makedirs(self._RESULTS_DIR, exist_ok=True)
|
|
39
|
+
os.makedirs(self._result_path)
|
|
40
|
+
self._parser_data = None
|
|
41
|
+
self._structure_sym = None
|
|
42
|
+
self._scale_factor = 0
|
|
43
|
+
self.logger = get_main_logger(self._result_path)
|
|
44
|
+
|
|
45
|
+
def read_structure(self) -> None:
|
|
46
|
+
"""
|
|
47
|
+
Read an initial structure from a cif-file.
|
|
48
|
+
:return: None.
|
|
49
|
+
"""
|
|
50
|
+
|
|
51
|
+
structure = Structure.from_file(self.config["structure_filename"])
|
|
52
|
+
self.logger.info("Initial structure is read.")
|
|
53
|
+
finder = SpacegroupAnalyzer(structure)
|
|
54
|
+
self._structure_sym = finder.get_symmetrized_structure()
|
|
55
|
+
self._structure_sym.to(os.path.join(self._result_path, "css_temp.cif"),
|
|
56
|
+
fmt="cif",
|
|
57
|
+
symprec=True,
|
|
58
|
+
refine_struct=True)
|
|
59
|
+
parser = CifParser(os.path.join(self._result_path, "css_temp.cif"))
|
|
60
|
+
self._parser_data = next(iter(parser._cif.data.values()))
|
|
61
|
+
os.remove(os.path.join(self._result_path, "css_temp.cif"))
|
|
62
|
+
|
|
63
|
+
def generate_interstitial_structure(self) -> None:
|
|
64
|
+
"""
|
|
65
|
+
Generate interstitial structure using Voronoi algorithm and save it to a cif-file.
|
|
66
|
+
Interstitial sites are filled by Neptunium species.
|
|
67
|
+
:return: None.
|
|
68
|
+
"""
|
|
69
|
+
|
|
70
|
+
self.logger.info("Preparing to generate interstitial structure.")
|
|
71
|
+
interstitial_generator = VoronoiInterstitialGenerator()
|
|
72
|
+
for i, interstitial in enumerate(interstitial_generator.generate(self._structure_sym, {"Np", })):
|
|
73
|
+
self._parser_data["_atom_site_type_symbol"].append("Np")
|
|
74
|
+
self._parser_data["_atom_site_label"].append(f"Np{i}")
|
|
75
|
+
self._parser_data["_atom_site_symmetry_multiplicity"].append(str(interstitial.multiplicity))
|
|
76
|
+
self._parser_data["_atom_site_fract_x"].append(f"{interstitial.site.frac_coords[0]:.7f}")
|
|
77
|
+
self._parser_data["_atom_site_fract_y"].append(f"{interstitial.site.frac_coords[1]:.7f}")
|
|
78
|
+
self._parser_data["_atom_site_fract_z"].append(f"{interstitial.site.frac_coords[2]:.7f}")
|
|
79
|
+
self._parser_data["_atom_site_occupancy"].append("1.0")
|
|
80
|
+
|
|
81
|
+
interstitial_structure_filename = self._create_interstitial_structure_filename()
|
|
82
|
+
self._save_structure(self._parser_data, interstitial_structure_filename)
|
|
83
|
+
self.logger.info("Interstitial structure is generated and saved at %s.", self._result_path)
|
|
84
|
+
|
|
85
|
+
def generate_substituted_disordered_structures(self) -> None:
|
|
86
|
+
"""
|
|
87
|
+
Generate substituted disordered structures (with partial occupancies) according to the configuration file.
|
|
88
|
+
Save them to a cif-files.
|
|
89
|
+
:return: None.
|
|
90
|
+
"""
|
|
91
|
+
|
|
92
|
+
self.logger.info("Preparing to generate disordered structures (with partial occupancies) ...")
|
|
93
|
+
cell_natoms = sum(map(int, self._parser_data["_atom_site_symmetry_multiplicity"]))
|
|
94
|
+
self._scale_factor = prod(map(int, self.config["supercell"].split("x")))
|
|
95
|
+
supercell_natoms = cell_natoms * self._scale_factor
|
|
96
|
+
|
|
97
|
+
for subst in self.config["substitution"]:
|
|
98
|
+
subst["substitution_low_limit_natoms"] = (int(subst["substitution_low_limit"] *
|
|
99
|
+
supercell_natoms + 0.001))
|
|
100
|
+
subst["substitution_high_limit_natoms"] = (int(subst["substitution_high_limit"] *
|
|
101
|
+
supercell_natoms + 0.001))
|
|
102
|
+
subst["indices_to_substitute"] = [j for j in range(len(self._parser_data["_atom_site_type_symbol"]))
|
|
103
|
+
if self._parser_data["_atom_site_type_symbol"][j] ==
|
|
104
|
+
subst["specie_to_substitute"]]
|
|
105
|
+
|
|
106
|
+
subst_natoms_list = []
|
|
107
|
+
product_range = range(1 + max([subst["substitution_high_limit_natoms"]
|
|
108
|
+
for subst in self.config["substitution"]]))
|
|
109
|
+
product_repeat = sum([len(subst["indices_to_substitute"]) for subst in self.config["substitution"]])
|
|
110
|
+
|
|
111
|
+
for subst_natoms in product(product_range, repeat=product_repeat):
|
|
112
|
+
idx_right = 0
|
|
113
|
+
for subst in self.config["substitution"]:
|
|
114
|
+
idx_left = idx_right
|
|
115
|
+
idx_right += len(subst["indices_to_substitute"])
|
|
116
|
+
if sum(subst_natoms[idx_left: idx_right]) > subst["substitution_high_limit_natoms"]:
|
|
117
|
+
break
|
|
118
|
+
else:
|
|
119
|
+
subst_natoms_list.append(subst_natoms)
|
|
120
|
+
|
|
121
|
+
os.makedirs(self._supercell_input_cifs_path)
|
|
122
|
+
|
|
123
|
+
for subst_natoms in subst_natoms_list:
|
|
124
|
+
p_data = deepcopy(self._parser_data)
|
|
125
|
+
k = 0
|
|
126
|
+
indices_to_substitute_occup = {i: 1.0 for subst in self.config["substitution"]
|
|
127
|
+
for i in subst["indices_to_substitute"]}
|
|
128
|
+
for subst in self.config["substitution"]:
|
|
129
|
+
for j in range(len(subst["indices_to_substitute"])):
|
|
130
|
+
p_data["_atom_site_type_symbol"].append(subst["substitute_with"])
|
|
131
|
+
p_data["_atom_site_label"].append(subst["substitute_with"] + str(k))
|
|
132
|
+
p_data["_atom_site_symmetry_multiplicity"].append(
|
|
133
|
+
p_data["_atom_site_symmetry_multiplicity"][subst["indices_to_substitute"][j]])
|
|
134
|
+
p_data["_atom_site_fract_x"].append(
|
|
135
|
+
p_data["_atom_site_fract_x"][subst["indices_to_substitute"][j]])
|
|
136
|
+
p_data["_atom_site_fract_y"].append(
|
|
137
|
+
p_data["_atom_site_fract_y"][subst["indices_to_substitute"][j]])
|
|
138
|
+
p_data["_atom_site_fract_z"].append(
|
|
139
|
+
p_data["_atom_site_fract_z"][subst["indices_to_substitute"][j]])
|
|
140
|
+
new_atom_site_occupancy = (subst_natoms[k] / self._scale_factor /
|
|
141
|
+
int(p_data['_atom_site_symmetry_multiplicity'][subst['indices_to_substitute'][j]]))
|
|
142
|
+
p_data["_atom_site_occupancy"].append(f"{new_atom_site_occupancy:.7f}")
|
|
143
|
+
indices_to_substitute_occup[subst["indices_to_substitute"][j]] -= new_atom_site_occupancy
|
|
144
|
+
k += 1
|
|
145
|
+
|
|
146
|
+
for idx, occup in indices_to_substitute_occup.items():
|
|
147
|
+
if occup >= 0.:
|
|
148
|
+
p_data["_atom_site_occupancy"][idx] = f"{occup:.7f}"
|
|
149
|
+
else:
|
|
150
|
+
break
|
|
151
|
+
else:
|
|
152
|
+
supercell_structure_filename = self._create_supercell_structure_filename(p_data)
|
|
153
|
+
self._save_structure(p_data, self._SUPERCELL_INPUT_CIFS_DIR, supercell_structure_filename)
|
|
154
|
+
self.logger.debug("%s disordered structure (with partial occupancies) is generated and saved.",
|
|
155
|
+
supercell_structure_filename)
|
|
156
|
+
self.logger.info("%d disordered structures (with partial occupancies) are generated and saved at %s.",
|
|
157
|
+
len(os.listdir(self._supercell_input_cifs_path)), self._supercell_input_cifs_path)
|
|
158
|
+
|
|
159
|
+
def _create_supercell_structure_filename(self, parser_data: CifBlock) -> str:
|
|
160
|
+
"""
|
|
161
|
+
Create a filename for disordered structure (with partial occupancies).
|
|
162
|
+
:param parser_data: Structural data to save.
|
|
163
|
+
:return: Filename.
|
|
164
|
+
"""
|
|
165
|
+
|
|
166
|
+
supercell_structure_filename = ""
|
|
167
|
+
for idx in range(len(parser_data["_atom_site_type_symbol"])):
|
|
168
|
+
supercell_structure_filename += parser_data["_atom_site_type_symbol"][idx]
|
|
169
|
+
supercell_structure_filename += f"{int(float(parser_data['_atom_site_occupancy'][idx]) * int(parser_data['_atom_site_symmetry_multiplicity'][idx]) * self._scale_factor + 0.001)}"
|
|
170
|
+
supercell_structure_filename += ".cif"
|
|
171
|
+
return supercell_structure_filename
|
|
172
|
+
|
|
173
|
+
def _create_interstitial_structure_filename(self) -> str:
|
|
174
|
+
"""
|
|
175
|
+
Create a filename for interstitial structure.
|
|
176
|
+
:return: Filename.
|
|
177
|
+
"""
|
|
178
|
+
|
|
179
|
+
return os.path.splitext(os.path.split(self.config["structure_filename"])[1])[0] + "_interstitial" + ".cif"
|
|
180
|
+
|
|
181
|
+
def _save_structure(self, parser_data: CifBlock, *args: str) -> None:
|
|
182
|
+
"""
|
|
183
|
+
Save a structure to a cif-file.
|
|
184
|
+
:param parser_data: Structural data to save.
|
|
185
|
+
:param args: Path to the directory to save the structure.
|
|
186
|
+
:return: None.
|
|
187
|
+
"""
|
|
188
|
+
|
|
189
|
+
with open(os.path.join(self._result_path, *args), "w") as f:
|
|
190
|
+
f.write(str(parser_data))
|
|
191
|
+
|
|
192
|
+
@staticmethod
|
|
193
|
+
def _init_supercell_worker(result_path: str) -> None:
|
|
194
|
+
"""
|
|
195
|
+
Configure logger for supercell worker.
|
|
196
|
+
:param result_path: Path to the results' directory.
|
|
197
|
+
:return: None.
|
|
198
|
+
"""
|
|
199
|
+
|
|
200
|
+
logger_ = get_supercell_worker_logger(result_path)
|
|
201
|
+
global supercell_worker_logger
|
|
202
|
+
supercell_worker_logger = logger_
|
|
203
|
+
|
|
204
|
+
@staticmethod
|
|
205
|
+
def _supercell_worker(cmd: str, compound: str) -> int:
|
|
206
|
+
"""
|
|
207
|
+
Run a Supercell worker (a process with Supercell software instance).
|
|
208
|
+
:param cmd: Command to run.
|
|
209
|
+
:return: None.
|
|
210
|
+
"""
|
|
211
|
+
|
|
212
|
+
worker_result = subprocess.run(cmd, shell=True, text=True, encoding="utf-8", capture_output=True)
|
|
213
|
+
if worker_result.returncode == 0:
|
|
214
|
+
num_struct_before = re.search(r"The total number of combinations is (\d+)", worker_result.stdout).group(1)
|
|
215
|
+
num_struct_after = re.search(r"Combinations after merge: (\d+)", worker_result.stdout).group(1)
|
|
216
|
+
supercell_worker_logger.info("%s - DONE! - The total number of structures: %s - Symmetrically inequivalent structures: %s",
|
|
217
|
+
compound, num_struct_before, num_struct_after)
|
|
218
|
+
else:
|
|
219
|
+
supercell_worker_logger.info("%s - FAILED! - %s", compound, worker_result.stderr)
|
|
220
|
+
return worker_result.returncode
|
|
221
|
+
|
|
222
|
+
def run_supercell(self) -> None:
|
|
223
|
+
"""
|
|
224
|
+
Run Supercell software to convert disordered structures (with partial occupancies)
|
|
225
|
+
to ordered representations (supercell structures).
|
|
226
|
+
:return: None.
|
|
227
|
+
"""
|
|
228
|
+
|
|
229
|
+
self.logger.info("Preparing to check out possibility of creation ordered representations of disordered structures ...")
|
|
230
|
+
if (error_message := self._dry_run_supercell()) is not None:
|
|
231
|
+
self.logger.error("%s Change config-file to simplify CSS and try again.", error_message.rstrip())
|
|
232
|
+
sys.exit(1)
|
|
233
|
+
self.logger.info("Checking out possibility of creation ordered representations of disordered structures is finished successfully!")
|
|
234
|
+
self.logger.info("Preparing to generate ordered representations of disordered structures ...")
|
|
235
|
+
os.makedirs(self._supercell_output_path)
|
|
236
|
+
futures = []
|
|
237
|
+
with (tqdm(range(len(os.listdir(self._supercell_input_cifs_path))),
|
|
238
|
+
desc="Creating ordered representations of disordered structures",
|
|
239
|
+
unit=" composition",
|
|
240
|
+
ncols=200)
|
|
241
|
+
as pbar,
|
|
242
|
+
ProcessPoolExecutor(max_workers=self.config["num_workers"],
|
|
243
|
+
initializer=self._init_supercell_worker,
|
|
244
|
+
initargs=(self._result_path,))
|
|
245
|
+
as pool):
|
|
246
|
+
for supercell_structure_filename in os.listdir(self._supercell_input_cifs_path):
|
|
247
|
+
cmd = f"supercell -i {os.path.join(self._supercell_input_cifs_path, supercell_structure_filename)} -m "\
|
|
248
|
+
f"-s {self.config['supercell']} "\
|
|
249
|
+
f"-a {os.path.join(self._supercell_output_path, supercell_structure_filename.replace('.cif', ''))}.zip "\
|
|
250
|
+
f"-o {supercell_structure_filename.replace('.cif', '')}"
|
|
251
|
+
compound = supercell_structure_filename.replace('.cif', '')
|
|
252
|
+
future = pool.submit(self._supercell_worker, cmd, compound)
|
|
253
|
+
future.add_done_callback(lambda p: pbar.update())
|
|
254
|
+
futures.append(future)
|
|
255
|
+
|
|
256
|
+
num_failed_tasks = 0
|
|
257
|
+
for future in as_completed(futures):
|
|
258
|
+
num_failed_tasks += future.result() != 0
|
|
259
|
+
self.logger.info("Ordered representations of disordered structures are generated and saved at %s.",
|
|
260
|
+
self._supercell_output_path)
|
|
261
|
+
if num_failed_tasks:
|
|
262
|
+
self.logger.info("Generation of ordered representations of disordered structures is failed for %d compound(s)!", num_failed_tasks)
|
|
263
|
+
else:
|
|
264
|
+
self.logger.info("Generation of ordered representations of disordered structures is finished successfully!")
|
|
265
|
+
|
|
266
|
+
@staticmethod
|
|
267
|
+
def _dry_supercell_worker(cmd: str) -> str | None:
|
|
268
|
+
"""
|
|
269
|
+
Run a Supercell worker (a process with Supercell software instance) in dry-run mode
|
|
270
|
+
to check the possibility of creation ordered representations of disordered structures.
|
|
271
|
+
:param cmd: Command to run.
|
|
272
|
+
:return: Error message if something went wrong, None otherwise.
|
|
273
|
+
"""
|
|
274
|
+
|
|
275
|
+
worker_result = subprocess.run(cmd, shell=True, text=True, encoding="utf-8", capture_output=True)
|
|
276
|
+
if worker_result.returncode == 0:
|
|
277
|
+
return None
|
|
278
|
+
return worker_result.stderr
|
|
279
|
+
|
|
280
|
+
def _dry_run_supercell(self) -> str | None:
|
|
281
|
+
"""
|
|
282
|
+
Check the possibility of creation ordered representations of disordered structures.
|
|
283
|
+
:return: None if the possibility exists, error message otherwise.
|
|
284
|
+
"""
|
|
285
|
+
|
|
286
|
+
futures = []
|
|
287
|
+
with tqdm(range(len(os.listdir(self._supercell_input_cifs_path))),
|
|
288
|
+
desc="Checking out possibility of creation ordered representations of disordered structures",
|
|
289
|
+
unit=" composition",
|
|
290
|
+
ncols=200) as pbar:
|
|
291
|
+
pool = ProcessPoolExecutor(max_workers=self.config["num_workers"])
|
|
292
|
+
for supercell_structure_filename in os.listdir(self._supercell_input_cifs_path):
|
|
293
|
+
cmd = f"supercell -i {os.path.join(self._supercell_input_cifs_path, supercell_structure_filename)} "\
|
|
294
|
+
f"-s {self.config['supercell']} -d -v 0"
|
|
295
|
+
future = pool.submit(self._dry_supercell_worker, cmd)
|
|
296
|
+
future.add_done_callback(lambda p: pbar.update())
|
|
297
|
+
futures.append(future)
|
|
298
|
+
for future in as_completed(futures):
|
|
299
|
+
if (error_message := future.result()) is not None:
|
|
300
|
+
pool.shutdown(wait=True, cancel_futures=True)
|
|
301
|
+
return error_message
|
|
302
|
+
else:
|
|
303
|
+
pool.shutdown(wait=True, cancel_futures=False)
|
|
304
|
+
return None
|
|
305
|
+
|
|
306
|
+
@staticmethod
|
|
307
|
+
def _init_collect_worker(fields: tuple, substitute_with_species: tuple,
|
|
308
|
+
ordered_representations_metadata_path: str, result_path: str) -> None:
|
|
309
|
+
"""
|
|
310
|
+
Initialize collect workers.
|
|
311
|
+
:param fields: Names of dataframe columns where metadata collected.
|
|
312
|
+
:param substitute_with_species: Species that were used as substitutes.
|
|
313
|
+
:param ordered_representations_metadata_path: Path to ordered representations of disordered structures.
|
|
314
|
+
:param result_path: Path to the results' directory.
|
|
315
|
+
:return: None.
|
|
316
|
+
"""
|
|
317
|
+
|
|
318
|
+
global fields_, substitute_with_species_, ordered_representations_metadata_path_, collect_worker_logger_
|
|
319
|
+
fields_ = fields
|
|
320
|
+
substitute_with_species_ = substitute_with_species
|
|
321
|
+
ordered_representations_metadata_path_ = ordered_representations_metadata_path
|
|
322
|
+
logger = get_collect_worker_logger(result_path)
|
|
323
|
+
collect_worker_logger_ = logger
|
|
324
|
+
|
|
325
|
+
@staticmethod
|
|
326
|
+
def _collect_data_one_composition(archive_path: str) -> None:
|
|
327
|
+
"""
|
|
328
|
+
Collect meta-information about one particular composition.
|
|
329
|
+
:param archive_path: Path to archive containing ordered representations of disordered structure.
|
|
330
|
+
:return: None.
|
|
331
|
+
"""
|
|
332
|
+
|
|
333
|
+
ordered_representations_metadata = {key: [] for key in fields_}
|
|
334
|
+
with zipfile.ZipFile(archive_path, "r") as archive:
|
|
335
|
+
for structure_filename in archive.namelist():
|
|
336
|
+
with archive.open(structure_filename, "r") as file:
|
|
337
|
+
file_data = file.read().decode("utf-8")
|
|
338
|
+
structure = CifParser.from_str(file_data).get_structures(primitive=False)[0]
|
|
339
|
+
finder = SpacegroupAnalyzer(structure)
|
|
340
|
+
specie_counter = Counter(map(str, structure.species))
|
|
341
|
+
ordered_representations_metadata["cif_data"].append(file_data)
|
|
342
|
+
ordered_representations_metadata["structure_filename"].append(structure_filename.replace(".zip", ""))
|
|
343
|
+
ordered_representations_metadata["composition"].append(str(structure.composition))
|
|
344
|
+
ordered_representations_metadata["space_group_no"].append(int(finder.get_space_group_number()))
|
|
345
|
+
ordered_representations_metadata["space_group_symbol"].append(finder.get_space_group_symbol())
|
|
346
|
+
ordered_representations_metadata["weight"].append(int(re.search(r"_w(.*?).cif", structure_filename).group(1)))
|
|
347
|
+
for specie in substitute_with_species_:
|
|
348
|
+
ordered_representations_metadata[f"{specie}_concentration"].append(specie_counter[specie] / len(structure))
|
|
349
|
+
ordered_representations_metadata_df = pd.DataFrame.from_dict(ordered_representations_metadata)
|
|
350
|
+
ordered_representations_metadata_path = os.path.join(ordered_representations_metadata_path_,
|
|
351
|
+
os.path.splitext(os.path.split(archive_path)[1])[0] + ".pkl.gz")
|
|
352
|
+
ordered_representations_metadata_df.to_pickle(ordered_representations_metadata_path)
|
|
353
|
+
collect_worker_logger_.info(
|
|
354
|
+
"%s - DONE! - The total number of structures: %d",
|
|
355
|
+
os.path.splitext(os.path.split(archive_path)[1])[0], ordered_representations_metadata_df.shape[0])
|
|
356
|
+
|
|
357
|
+
def collect_data_mp(self) -> None:
|
|
358
|
+
"""
|
|
359
|
+
Collect meta-information about all ordered representations of disordered structures
|
|
360
|
+
and save it to pandas dataframes.
|
|
361
|
+
:return: None.
|
|
362
|
+
"""
|
|
363
|
+
|
|
364
|
+
self.logger.info("Preparing to collect ordered representations' metadata ...")
|
|
365
|
+
substitute_with_species = tuple({subst['substitute_with'] for subst in self.config["substitution"]})
|
|
366
|
+
fields = ["cif_data", "structure_filename", "composition", "space_group_no", "space_group_symbol", "weight"]
|
|
367
|
+
for specie in substitute_with_species:
|
|
368
|
+
fields.append(f"{specie}_concentration")
|
|
369
|
+
fields = tuple(fields)
|
|
370
|
+
os.makedirs(self._ordered_representations_metadata_path)
|
|
371
|
+
archive_paths = [os.path.join(self._supercell_output_path, archive_filename)
|
|
372
|
+
for archive_filename in os.listdir(self._supercell_output_path)]
|
|
373
|
+
with (tqdm(range(len(os.listdir(self._supercell_output_path))),
|
|
374
|
+
desc="Collecting ordered representations' metadata of disordered structures",
|
|
375
|
+
unit=" composition",
|
|
376
|
+
ncols=200)
|
|
377
|
+
as pbar,
|
|
378
|
+
ProcessPoolExecutor(max_workers=self.config["num_workers"],
|
|
379
|
+
initializer=self._init_collect_worker,
|
|
380
|
+
initargs=(fields, substitute_with_species, self._ordered_representations_metadata_path, self._result_path))
|
|
381
|
+
as pool):
|
|
382
|
+
for archive_path in archive_paths:
|
|
383
|
+
future = pool.submit(self._collect_data_one_composition, archive_path)
|
|
384
|
+
future.add_done_callback(lambda p: pbar.update())
|
|
385
|
+
self.logger.info("Ordered representations' metadata are collected and saved at %s.",
|
|
386
|
+
self._ordered_representations_metadata_path)
|
csslib/config_logging.py
ADDED
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import logging.config
|
|
3
|
+
import os
|
|
4
|
+
from multiprocessing import current_process
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
log_config = {
|
|
8
|
+
"version": 1,
|
|
9
|
+
"disable_existing_loggers": True,
|
|
10
|
+
"formatters": {
|
|
11
|
+
"console_formatter": {
|
|
12
|
+
"format": "%(levelname)s - %(message)s"
|
|
13
|
+
},
|
|
14
|
+
"file_formatter": {
|
|
15
|
+
"format": "[%(asctime)s] %(levelname)s - %(message)s",
|
|
16
|
+
"datefmt": "%Y-%m-%d %H:%M:%S"
|
|
17
|
+
}
|
|
18
|
+
},
|
|
19
|
+
"handlers": {
|
|
20
|
+
"console_handler": {
|
|
21
|
+
"level": "INFO",
|
|
22
|
+
"formatter": "console_formatter",
|
|
23
|
+
"class": "logging.StreamHandler",
|
|
24
|
+
"stream": "ext://sys.stdout",
|
|
25
|
+
},
|
|
26
|
+
"file_handler": {
|
|
27
|
+
"level": "DEBUG",
|
|
28
|
+
"formatter": "file_formatter",
|
|
29
|
+
"class": "logging.FileHandler",
|
|
30
|
+
"filename": "",
|
|
31
|
+
"mode": "a",
|
|
32
|
+
"encoding": "utf-8"
|
|
33
|
+
}
|
|
34
|
+
},
|
|
35
|
+
"loggers": {
|
|
36
|
+
"main": {
|
|
37
|
+
"handlers": ["console_handler", "file_handler"],
|
|
38
|
+
"level": "DEBUG",
|
|
39
|
+
"propagate": False
|
|
40
|
+
},
|
|
41
|
+
"supercell_worker": {
|
|
42
|
+
"handlers": ["file_handler"],
|
|
43
|
+
"level": "DEBUG",
|
|
44
|
+
"propagate": False
|
|
45
|
+
},
|
|
46
|
+
"collect_worker": {
|
|
47
|
+
"handlers": ["file_handler"],
|
|
48
|
+
"level": "DEBUG",
|
|
49
|
+
"propagate": False
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def configure_logging(result_path: str, log_filename: str = "main") -> None:
|
|
56
|
+
"""
|
|
57
|
+
Configure logging for the main process and the supercell and collect workers.
|
|
58
|
+
:param result_path: Logs will be saved in result_path/logs.
|
|
59
|
+
:param log_filename: Log filename.
|
|
60
|
+
:return: None.
|
|
61
|
+
"""
|
|
62
|
+
os.makedirs(os.path.join(result_path, "logs"), exist_ok=True)
|
|
63
|
+
log_config["handlers"]["file_handler"]["filename"] = os.path.join(result_path, "logs", f"{log_filename}.log")
|
|
64
|
+
logging.config.dictConfig(log_config)
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def get_main_logger(result_path: str) -> logging.Logger:
|
|
68
|
+
"""
|
|
69
|
+
Get the main logger.
|
|
70
|
+
:param result_path: Logs will be saved in result_path/logs.
|
|
71
|
+
:return: Logger.
|
|
72
|
+
"""
|
|
73
|
+
configure_logging(result_path)
|
|
74
|
+
return logging.getLogger("main")
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def get_supercell_worker_logger(result_path: str) -> logging.Logger:
|
|
78
|
+
"""
|
|
79
|
+
Get the supercell worker logger.
|
|
80
|
+
:param result_path: Logs will be saved in result_path/logs.
|
|
81
|
+
:return: Logger.
|
|
82
|
+
"""
|
|
83
|
+
configure_logging(result_path, f"supercell_{current_process().name}")
|
|
84
|
+
return logging.getLogger("supercell_worker")
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def get_collect_worker_logger(result_path: str) -> logging.Logger:
|
|
88
|
+
"""
|
|
89
|
+
Get the collect worker logger.
|
|
90
|
+
:param result_path: Logs will be saved in result_path/logs.
|
|
91
|
+
:return: Logger.
|
|
92
|
+
"""
|
|
93
|
+
configure_logging(result_path, f"collect_{current_process().name}")
|
|
94
|
+
return logging.getLogger("collect_worker")
|
csslib/utils.py
ADDED
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
import matplotlib.pyplot as plt
|
|
2
|
+
import pandas as pd
|
|
3
|
+
import json
|
|
4
|
+
import re
|
|
5
|
+
import numpy as np
|
|
6
|
+
from pymatgen.symmetry.groups import SpaceGroup
|
|
7
|
+
from matplotlib.ticker import MaxNLocator
|
|
8
|
+
import networkx as nx
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def _spaceGroupConventional(sg: str) -> str:
|
|
12
|
+
"""
|
|
13
|
+
Convert a space group symbol to a conventional form.
|
|
14
|
+
:param sg: Space group symbol.
|
|
15
|
+
:return: Formatted space group symbol.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
sg = re.sub(r"-\d", lambda x: "\\bar{" + x.group()[1:] + "}", sg)
|
|
19
|
+
return f"${sg}$"
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def plot_group_subgroup_graph(css_df: pd.DataFrame, node_size: int = 1200) -> None:
|
|
23
|
+
"""
|
|
24
|
+
Plot the group-subgroup graph.
|
|
25
|
+
:return: None.
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
with open("venv/Lib/site-packages/pymatgen/symmetry/symm_data.json", "r") as f:
|
|
29
|
+
symm_data = json.load(f)
|
|
30
|
+
symm_data_subg = symm_data["maximal_subgroups"]
|
|
31
|
+
symm_data_abbr = {v: k for k, v in symm_data["abbreviated_spacegroup_symbols"].items()}
|
|
32
|
+
|
|
33
|
+
sgs = sorted(css_df["space_group_no"].unique(), reverse=True)
|
|
34
|
+
sg_info = {sg: ((css_df["space_group_no"] == sg).sum(),
|
|
35
|
+
symm_data_abbr.get(SpaceGroup.from_int_number(sg).symbol,
|
|
36
|
+
SpaceGroup.from_int_number(sg).symbol))
|
|
37
|
+
for sg in sgs}
|
|
38
|
+
sg_info2 = {i[1]: i[0] for i in sg_info.values()}
|
|
39
|
+
label_map_black = {v[1]: _spaceGroupConventional(v[1]) + f"\n({k})" for k, v in sg_info.items() if
|
|
40
|
+
np.log10(v[0]) >= np.log10(max(sg_info2.values())) / 3}
|
|
41
|
+
label_map_white = {v[1]: _spaceGroupConventional(v[1]) + f"\n({k})" for k, v in sg_info.items() if
|
|
42
|
+
np.log10(v[0]) < np.log10(max(sg_info2.values())) / 3}
|
|
43
|
+
|
|
44
|
+
graph = nx.DiGraph()
|
|
45
|
+
for i in range(len(sgs)):
|
|
46
|
+
for j in range(len(sgs)):
|
|
47
|
+
if sgs[j] in symm_data_subg[str(sgs[i])] and i != j:
|
|
48
|
+
graph.add_edge(sg_info[sgs[i]][1], sg_info[sgs[j]][1])
|
|
49
|
+
|
|
50
|
+
not_connected_nodes = set(graph.nodes) - set([i[1] for i in graph.edges])
|
|
51
|
+
for node2 in not_connected_nodes:
|
|
52
|
+
for node1 in graph.nodes:
|
|
53
|
+
if SpaceGroup(node2).is_subgroup(SpaceGroup(node1)):
|
|
54
|
+
graph.add_edge(node1, node2)
|
|
55
|
+
break
|
|
56
|
+
|
|
57
|
+
nodes = [i for i in graph.nodes]
|
|
58
|
+
orders = np.array([SpaceGroup(nodes[i]).order for i in range(len(nodes))])
|
|
59
|
+
pos_x = [0] * len(nodes)
|
|
60
|
+
unique, counts = np.unique(orders, return_counts=True)
|
|
61
|
+
for count_pos in range(len(counts)):
|
|
62
|
+
for i in range(counts[count_pos]):
|
|
63
|
+
pos_x[np.where(orders == unique[count_pos])[0][i]] = (i + 1) / (counts[count_pos] + 1)
|
|
64
|
+
orders_unique = np.sort(np.unique(orders))
|
|
65
|
+
orders_dict = {orders_unique[i].item(): i for i in range(orders_unique.shape[0])}
|
|
66
|
+
pos = {nodes[i]: (pos_x[i], orders_dict[orders[i]]) for i in range(len(nodes))}
|
|
67
|
+
|
|
68
|
+
edges_curved = set() # It can be happened that some edges are not shown because of
|
|
69
|
+
# overlapping. One can curve them manually to avoid this.
|
|
70
|
+
edges_straight = set(graph.edges) - edges_curved
|
|
71
|
+
|
|
72
|
+
fig, ax = plt.subplots(figsize=(15, 10))
|
|
73
|
+
cmap = "viridis"
|
|
74
|
+
nx.draw_networkx_nodes(graph, pos, node_color=[np.log(sg_info2[i]) for i in graph.nodes], node_size=node_size,
|
|
75
|
+
edgecolors="black", linewidths=1, cmap=cmap, vmin=0,
|
|
76
|
+
vmax=np.log(max([i for i in sg_info2.values()])), ax=ax)
|
|
77
|
+
nx.draw_networkx_labels(graph, pos, labels=label_map_black, font_size=6, font_color="black")
|
|
78
|
+
nx.draw_networkx_labels(graph, pos, labels=label_map_white, font_size=6, font_color="white")
|
|
79
|
+
nx.draw_networkx_edges(graph, pos, edgelist=edges_straight, edge_color="grey", node_size=node_size, width=1,
|
|
80
|
+
arrowsize=12, ax=ax)
|
|
81
|
+
nx.draw_networkx_edges(graph, pos, edgelist=edges_curved, edge_color="grey", width=1, node_size=node_size,
|
|
82
|
+
arrowsize=12, connectionstyle='arc3, rad = -0.1', ax=ax)
|
|
83
|
+
ax.tick_params(left=True, labelleft=True)
|
|
84
|
+
ax.set_ylabel("Space group order", fontsize=12)
|
|
85
|
+
ax.set_yticks(range(orders_unique.shape[0]))
|
|
86
|
+
ax.set_yticklabels(list(map(str, orders_unique)), fontsize=12)
|
|
87
|
+
ax.yaxis.set_tick_params(labelsize=12)
|
|
88
|
+
|
|
89
|
+
cax = fig.add_axes([ax.get_position().x1 + 0.03, ax.get_position().y0 - 0.05, 0.02,
|
|
90
|
+
ax.get_position().y1 - ax.get_position().y0 + 0.1])
|
|
91
|
+
|
|
92
|
+
sm = plt.cm.ScalarMappable(cmap=cmap,
|
|
93
|
+
norm=plt.Normalize(vmin=0, vmax=np.log10(max([i for i in sg_info2.values()]))))
|
|
94
|
+
sm.set_array([])
|
|
95
|
+
cbar = fig.colorbar(sm, aspect=70, cax=cax)
|
|
96
|
+
cbar.ax.set_ylabel("lg (number of inequivalent structures)", fontsize=12)
|
|
97
|
+
cbar.ax.yaxis.set_tick_params(labelsize=12)
|
|
98
|
+
cbar.ax.yaxis.set_major_locator(MaxNLocator(integer=True))
|
|
99
|
+
cbar.outline.set_visible(False)
|
|
100
|
+
plt.tight_layout()
|
|
101
|
+
plt.show()
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: csslib
|
|
3
|
+
Version: 1.0
|
|
4
|
+
Summary: CSSlib is an open-source code for building configuration search space (CSS) of disordered crystals.
|
|
5
|
+
Author: A.V. Krautsou
|
|
6
|
+
Classifier: Programming Language :: Python :: 3
|
|
7
|
+
Classifier: Operating System :: OS Independent
|
|
8
|
+
Requires-Python: >=3.9
|
|
9
|
+
Description-Content-Type: text/markdown
|
|
10
|
+
Requires-Dist: jupyter==1.0.0
|
|
11
|
+
Requires-Dist: matplotlib==3.10.7
|
|
12
|
+
Requires-Dist: numpy==1.26.4
|
|
13
|
+
Requires-Dist: pandas==2.3.2
|
|
14
|
+
Requires-Dist: plotly==6.3.1
|
|
15
|
+
Requires-Dist: pymatgen==2025.6.14
|
|
16
|
+
Requires-Dist: pymatgen-analysis-defects==2025.1.18
|
|
17
|
+
Requires-Dist: scipy==1.15.3
|
|
18
|
+
Requires-Dist: tqdm==4.67.1
|
|
19
|
+
Requires-Dist: networkx==3.4.2
|
|
20
|
+
|
|
21
|
+
<p align="center">
|
|
22
|
+
<img src="./logo.jpg" width="20%" title="CSSlib" alt="CSSlib"/>
|
|
23
|
+
</p>
|
|
24
|
+
|
|
25
|
+
# CSSlib
|
|
26
|
+
|
|
27
|
+
CSSlib is an open-source code for building configuration search space (CSS) of disordered crystals.
|
|
28
|
+
|
|
29
|
+
Installation
|
|
30
|
+
-----
|
|
31
|
+
**CSSlib** requires **Supercell** program. Details on **Supercell** installation can be found at the corresponding [website](https://orex.github.io/supercell/download/).
|
|
32
|
+
|
|
33
|
+
Tutorial
|
|
34
|
+
-----
|
|
35
|
+
The best way to learn how to use **CSSlib** is through the [tutorial notebook](csslib_example.ipynb).
|
|
36
|
+
|
|
37
|
+
References & Citing
|
|
38
|
+
-----
|
|
39
|
+
If you use this code, please consider citing works that actively used the CSS approach, which resulted in the creation of this library:
|
|
40
|
+
|
|
41
|
+
1. A.V. Krautsou, I.S. Humonen, V.D. Lazarev, R.A. Eremin, S.A. Budennyy<br/>
|
|
42
|
+
"Impact of crystal structure symmetry in training datasets on GNN-based energy assessments for chemically disordered CsPbI<sub>3</sub>"<br/>
|
|
43
|
+
https://doi.org/10.1038/s41598-025-92669-3
|
|
44
|
+
2. N.A. Matsokin, R.A. Eremin, A.A. Kuznetsova, I.S. Humonen, A.V. Krautsou, V.D. Lazarev, Y.Z. Vassilyeva, A.Y. Pak, S.A. Budennyy, A.G. Kvashnin, A.A. Osiptsov<br/>
|
|
45
|
+
"Discovery of chemically modified higher tungsten boride by means of hybrid GNN/DFT approach"<br/>
|
|
46
|
+
https://doi.org/10.1038/s41524-025-01628-z
|
|
47
|
+
3. R.A. Zaripov, R.A. Eremin, I.S. Humonen, A.V. Krautsou, V.V. Kuznetsov, K.E. GermanS, S.A. Budennyy, S.V. Levchenko</br>
|
|
48
|
+
"First-principles data-driven approach for assessment of stability of Tc-C systems"</br>
|
|
49
|
+
https://doi.org/10.1016/j.actamat.2025.121704
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
csslib/__init__.py,sha256=A9txZ15Pk-QcZJdxobZYc81RXa8EHPQ53cMdkr2dkqU,21221
|
|
2
|
+
csslib/config_logging.py,sha256=ozv8OmX3eZFUK6MpHru_ittTcohC7A0hbd-P7_qXNuc,2802
|
|
3
|
+
csslib/utils.py,sha256=-fTklp4Y5p5sShj_AI6lScAUm87OLNJjkR0CxzXSpEg,4766
|
|
4
|
+
csslib-1.0.dist-info/METADATA,sha256=5cARLFHgmzsXcBVvya8UoHUdz6syYyoPBf9832fnniY,2185
|
|
5
|
+
csslib-1.0.dist-info/WHEEL,sha256=YCfwYGOYMi5Jhw2fU4yNgwErybb2IX5PEwBKV4ZbdBo,91
|
|
6
|
+
csslib-1.0.dist-info/top_level.txt,sha256=EaL08DpIS7Uy37P8JopRxfEPhmlhn6T7SIgL5DITtwo,7
|
|
7
|
+
csslib-1.0.dist-info/RECORD,,
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
csslib
|