csslib 1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
csslib/__init__.py ADDED
@@ -0,0 +1,386 @@
1
+ from math import prod
2
+ from itertools import product
3
+ import os
4
+ import json
5
+ from tqdm import tqdm
6
+ import pandas as pd
7
+ from copy import deepcopy
8
+ import zipfile
9
+ import re
10
+ from collections import Counter
11
+ from concurrent.futures import ProcessPoolExecutor, as_completed
12
+ import subprocess
13
+ from pymatgen.core import Structure
14
+ from pymatgen.symmetry.analyzer import SpacegroupAnalyzer
15
+ from pymatgen.io.cif import CifParser, CifBlock
16
+ from pymatgen.analysis.defects.generators import VoronoiInterstitialGenerator
17
+ import warnings
18
+ from csslib.config_logging import get_main_logger, get_supercell_worker_logger, get_collect_worker_logger
19
+ import sys
20
+
21
+ warnings.filterwarnings("ignore")
22
+
23
+
24
+ class CSS:
25
+ _RESULTS_DIR = "results"
26
+ _SUPERCELL_INPUT_CIFS_DIR = "disordered_structures"
27
+ _SUPERCELL_OUTPUT_DIR = "ordered_representations"
28
+ _ORDERED_REPRESENTATIONS_METADATA_DIR = "ordered_representations_metadata"
29
+
30
+ def __init__(self, config_filename: str) -> None:
31
+ with open(config_filename) as f:
32
+ self.config = json.load(f)
33
+ self._result_path = os.path.join(self._RESULTS_DIR, self.config["result_dir"])
34
+ self._supercell_input_cifs_path = os.path.join(self._result_path, self._SUPERCELL_INPUT_CIFS_DIR)
35
+ self._supercell_output_path = os.path.join(self._result_path, self._SUPERCELL_OUTPUT_DIR)
36
+ self._ordered_representations_metadata_path = os.path.join(self._result_path,
37
+ self._ORDERED_REPRESENTATIONS_METADATA_DIR)
38
+ os.makedirs(self._RESULTS_DIR, exist_ok=True)
39
+ os.makedirs(self._result_path)
40
+ self._parser_data = None
41
+ self._structure_sym = None
42
+ self._scale_factor = 0
43
+ self.logger = get_main_logger(self._result_path)
44
+
45
+ def read_structure(self) -> None:
46
+ """
47
+ Read an initial structure from a cif-file.
48
+ :return: None.
49
+ """
50
+
51
+ structure = Structure.from_file(self.config["structure_filename"])
52
+ self.logger.info("Initial structure is read.")
53
+ finder = SpacegroupAnalyzer(structure)
54
+ self._structure_sym = finder.get_symmetrized_structure()
55
+ self._structure_sym.to(os.path.join(self._result_path, "css_temp.cif"),
56
+ fmt="cif",
57
+ symprec=True,
58
+ refine_struct=True)
59
+ parser = CifParser(os.path.join(self._result_path, "css_temp.cif"))
60
+ self._parser_data = next(iter(parser._cif.data.values()))
61
+ os.remove(os.path.join(self._result_path, "css_temp.cif"))
62
+
63
+ def generate_interstitial_structure(self) -> None:
64
+ """
65
+ Generate interstitial structure using Voronoi algorithm and save it to a cif-file.
66
+ Interstitial sites are filled by Neptunium species.
67
+ :return: None.
68
+ """
69
+
70
+ self.logger.info("Preparing to generate interstitial structure.")
71
+ interstitial_generator = VoronoiInterstitialGenerator()
72
+ for i, interstitial in enumerate(interstitial_generator.generate(self._structure_sym, {"Np", })):
73
+ self._parser_data["_atom_site_type_symbol"].append("Np")
74
+ self._parser_data["_atom_site_label"].append(f"Np{i}")
75
+ self._parser_data["_atom_site_symmetry_multiplicity"].append(str(interstitial.multiplicity))
76
+ self._parser_data["_atom_site_fract_x"].append(f"{interstitial.site.frac_coords[0]:.7f}")
77
+ self._parser_data["_atom_site_fract_y"].append(f"{interstitial.site.frac_coords[1]:.7f}")
78
+ self._parser_data["_atom_site_fract_z"].append(f"{interstitial.site.frac_coords[2]:.7f}")
79
+ self._parser_data["_atom_site_occupancy"].append("1.0")
80
+
81
+ interstitial_structure_filename = self._create_interstitial_structure_filename()
82
+ self._save_structure(self._parser_data, interstitial_structure_filename)
83
+ self.logger.info("Interstitial structure is generated and saved at %s.", self._result_path)
84
+
85
+ def generate_substituted_disordered_structures(self) -> None:
86
+ """
87
+ Generate substituted disordered structures (with partial occupancies) according to the configuration file.
88
+ Save them to a cif-files.
89
+ :return: None.
90
+ """
91
+
92
+ self.logger.info("Preparing to generate disordered structures (with partial occupancies) ...")
93
+ cell_natoms = sum(map(int, self._parser_data["_atom_site_symmetry_multiplicity"]))
94
+ self._scale_factor = prod(map(int, self.config["supercell"].split("x")))
95
+ supercell_natoms = cell_natoms * self._scale_factor
96
+
97
+ for subst in self.config["substitution"]:
98
+ subst["substitution_low_limit_natoms"] = (int(subst["substitution_low_limit"] *
99
+ supercell_natoms + 0.001))
100
+ subst["substitution_high_limit_natoms"] = (int(subst["substitution_high_limit"] *
101
+ supercell_natoms + 0.001))
102
+ subst["indices_to_substitute"] = [j for j in range(len(self._parser_data["_atom_site_type_symbol"]))
103
+ if self._parser_data["_atom_site_type_symbol"][j] ==
104
+ subst["specie_to_substitute"]]
105
+
106
+ subst_natoms_list = []
107
+ product_range = range(1 + max([subst["substitution_high_limit_natoms"]
108
+ for subst in self.config["substitution"]]))
109
+ product_repeat = sum([len(subst["indices_to_substitute"]) for subst in self.config["substitution"]])
110
+
111
+ for subst_natoms in product(product_range, repeat=product_repeat):
112
+ idx_right = 0
113
+ for subst in self.config["substitution"]:
114
+ idx_left = idx_right
115
+ idx_right += len(subst["indices_to_substitute"])
116
+ if sum(subst_natoms[idx_left: idx_right]) > subst["substitution_high_limit_natoms"]:
117
+ break
118
+ else:
119
+ subst_natoms_list.append(subst_natoms)
120
+
121
+ os.makedirs(self._supercell_input_cifs_path)
122
+
123
+ for subst_natoms in subst_natoms_list:
124
+ p_data = deepcopy(self._parser_data)
125
+ k = 0
126
+ indices_to_substitute_occup = {i: 1.0 for subst in self.config["substitution"]
127
+ for i in subst["indices_to_substitute"]}
128
+ for subst in self.config["substitution"]:
129
+ for j in range(len(subst["indices_to_substitute"])):
130
+ p_data["_atom_site_type_symbol"].append(subst["substitute_with"])
131
+ p_data["_atom_site_label"].append(subst["substitute_with"] + str(k))
132
+ p_data["_atom_site_symmetry_multiplicity"].append(
133
+ p_data["_atom_site_symmetry_multiplicity"][subst["indices_to_substitute"][j]])
134
+ p_data["_atom_site_fract_x"].append(
135
+ p_data["_atom_site_fract_x"][subst["indices_to_substitute"][j]])
136
+ p_data["_atom_site_fract_y"].append(
137
+ p_data["_atom_site_fract_y"][subst["indices_to_substitute"][j]])
138
+ p_data["_atom_site_fract_z"].append(
139
+ p_data["_atom_site_fract_z"][subst["indices_to_substitute"][j]])
140
+ new_atom_site_occupancy = (subst_natoms[k] / self._scale_factor /
141
+ int(p_data['_atom_site_symmetry_multiplicity'][subst['indices_to_substitute'][j]]))
142
+ p_data["_atom_site_occupancy"].append(f"{new_atom_site_occupancy:.7f}")
143
+ indices_to_substitute_occup[subst["indices_to_substitute"][j]] -= new_atom_site_occupancy
144
+ k += 1
145
+
146
+ for idx, occup in indices_to_substitute_occup.items():
147
+ if occup >= 0.:
148
+ p_data["_atom_site_occupancy"][idx] = f"{occup:.7f}"
149
+ else:
150
+ break
151
+ else:
152
+ supercell_structure_filename = self._create_supercell_structure_filename(p_data)
153
+ self._save_structure(p_data, self._SUPERCELL_INPUT_CIFS_DIR, supercell_structure_filename)
154
+ self.logger.debug("%s disordered structure (with partial occupancies) is generated and saved.",
155
+ supercell_structure_filename)
156
+ self.logger.info("%d disordered structures (with partial occupancies) are generated and saved at %s.",
157
+ len(os.listdir(self._supercell_input_cifs_path)), self._supercell_input_cifs_path)
158
+
159
+ def _create_supercell_structure_filename(self, parser_data: CifBlock) -> str:
160
+ """
161
+ Create a filename for disordered structure (with partial occupancies).
162
+ :param parser_data: Structural data to save.
163
+ :return: Filename.
164
+ """
165
+
166
+ supercell_structure_filename = ""
167
+ for idx in range(len(parser_data["_atom_site_type_symbol"])):
168
+ supercell_structure_filename += parser_data["_atom_site_type_symbol"][idx]
169
+ supercell_structure_filename += f"{int(float(parser_data['_atom_site_occupancy'][idx]) * int(parser_data['_atom_site_symmetry_multiplicity'][idx]) * self._scale_factor + 0.001)}"
170
+ supercell_structure_filename += ".cif"
171
+ return supercell_structure_filename
172
+
173
+ def _create_interstitial_structure_filename(self) -> str:
174
+ """
175
+ Create a filename for interstitial structure.
176
+ :return: Filename.
177
+ """
178
+
179
+ return os.path.splitext(os.path.split(self.config["structure_filename"])[1])[0] + "_interstitial" + ".cif"
180
+
181
+ def _save_structure(self, parser_data: CifBlock, *args: str) -> None:
182
+ """
183
+ Save a structure to a cif-file.
184
+ :param parser_data: Structural data to save.
185
+ :param args: Path to the directory to save the structure.
186
+ :return: None.
187
+ """
188
+
189
+ with open(os.path.join(self._result_path, *args), "w") as f:
190
+ f.write(str(parser_data))
191
+
192
+ @staticmethod
193
+ def _init_supercell_worker(result_path: str) -> None:
194
+ """
195
+ Configure logger for supercell worker.
196
+ :param result_path: Path to the results' directory.
197
+ :return: None.
198
+ """
199
+
200
+ logger_ = get_supercell_worker_logger(result_path)
201
+ global supercell_worker_logger
202
+ supercell_worker_logger = logger_
203
+
204
+ @staticmethod
205
+ def _supercell_worker(cmd: str, compound: str) -> int:
206
+ """
207
+ Run a Supercell worker (a process with Supercell software instance).
208
+ :param cmd: Command to run.
209
+ :return: None.
210
+ """
211
+
212
+ worker_result = subprocess.run(cmd, shell=True, text=True, encoding="utf-8", capture_output=True)
213
+ if worker_result.returncode == 0:
214
+ num_struct_before = re.search(r"The total number of combinations is (\d+)", worker_result.stdout).group(1)
215
+ num_struct_after = re.search(r"Combinations after merge: (\d+)", worker_result.stdout).group(1)
216
+ supercell_worker_logger.info("%s - DONE! - The total number of structures: %s - Symmetrically inequivalent structures: %s",
217
+ compound, num_struct_before, num_struct_after)
218
+ else:
219
+ supercell_worker_logger.info("%s - FAILED! - %s", compound, worker_result.stderr)
220
+ return worker_result.returncode
221
+
222
+ def run_supercell(self) -> None:
223
+ """
224
+ Run Supercell software to convert disordered structures (with partial occupancies)
225
+ to ordered representations (supercell structures).
226
+ :return: None.
227
+ """
228
+
229
+ self.logger.info("Preparing to check out possibility of creation ordered representations of disordered structures ...")
230
+ if (error_message := self._dry_run_supercell()) is not None:
231
+ self.logger.error("%s Change config-file to simplify CSS and try again.", error_message.rstrip())
232
+ sys.exit(1)
233
+ self.logger.info("Checking out possibility of creation ordered representations of disordered structures is finished successfully!")
234
+ self.logger.info("Preparing to generate ordered representations of disordered structures ...")
235
+ os.makedirs(self._supercell_output_path)
236
+ futures = []
237
+ with (tqdm(range(len(os.listdir(self._supercell_input_cifs_path))),
238
+ desc="Creating ordered representations of disordered structures",
239
+ unit=" composition",
240
+ ncols=200)
241
+ as pbar,
242
+ ProcessPoolExecutor(max_workers=self.config["num_workers"],
243
+ initializer=self._init_supercell_worker,
244
+ initargs=(self._result_path,))
245
+ as pool):
246
+ for supercell_structure_filename in os.listdir(self._supercell_input_cifs_path):
247
+ cmd = f"supercell -i {os.path.join(self._supercell_input_cifs_path, supercell_structure_filename)} -m "\
248
+ f"-s {self.config['supercell']} "\
249
+ f"-a {os.path.join(self._supercell_output_path, supercell_structure_filename.replace('.cif', ''))}.zip "\
250
+ f"-o {supercell_structure_filename.replace('.cif', '')}"
251
+ compound = supercell_structure_filename.replace('.cif', '')
252
+ future = pool.submit(self._supercell_worker, cmd, compound)
253
+ future.add_done_callback(lambda p: pbar.update())
254
+ futures.append(future)
255
+
256
+ num_failed_tasks = 0
257
+ for future in as_completed(futures):
258
+ num_failed_tasks += future.result() != 0
259
+ self.logger.info("Ordered representations of disordered structures are generated and saved at %s.",
260
+ self._supercell_output_path)
261
+ if num_failed_tasks:
262
+ self.logger.info("Generation of ordered representations of disordered structures is failed for %d compound(s)!", num_failed_tasks)
263
+ else:
264
+ self.logger.info("Generation of ordered representations of disordered structures is finished successfully!")
265
+
266
+ @staticmethod
267
+ def _dry_supercell_worker(cmd: str) -> str | None:
268
+ """
269
+ Run a Supercell worker (a process with Supercell software instance) in dry-run mode
270
+ to check the possibility of creation ordered representations of disordered structures.
271
+ :param cmd: Command to run.
272
+ :return: Error message if something went wrong, None otherwise.
273
+ """
274
+
275
+ worker_result = subprocess.run(cmd, shell=True, text=True, encoding="utf-8", capture_output=True)
276
+ if worker_result.returncode == 0:
277
+ return None
278
+ return worker_result.stderr
279
+
280
+ def _dry_run_supercell(self) -> str | None:
281
+ """
282
+ Check the possibility of creation ordered representations of disordered structures.
283
+ :return: None if the possibility exists, error message otherwise.
284
+ """
285
+
286
+ futures = []
287
+ with tqdm(range(len(os.listdir(self._supercell_input_cifs_path))),
288
+ desc="Checking out possibility of creation ordered representations of disordered structures",
289
+ unit=" composition",
290
+ ncols=200) as pbar:
291
+ pool = ProcessPoolExecutor(max_workers=self.config["num_workers"])
292
+ for supercell_structure_filename in os.listdir(self._supercell_input_cifs_path):
293
+ cmd = f"supercell -i {os.path.join(self._supercell_input_cifs_path, supercell_structure_filename)} "\
294
+ f"-s {self.config['supercell']} -d -v 0"
295
+ future = pool.submit(self._dry_supercell_worker, cmd)
296
+ future.add_done_callback(lambda p: pbar.update())
297
+ futures.append(future)
298
+ for future in as_completed(futures):
299
+ if (error_message := future.result()) is not None:
300
+ pool.shutdown(wait=True, cancel_futures=True)
301
+ return error_message
302
+ else:
303
+ pool.shutdown(wait=True, cancel_futures=False)
304
+ return None
305
+
306
+ @staticmethod
307
+ def _init_collect_worker(fields: tuple, substitute_with_species: tuple,
308
+ ordered_representations_metadata_path: str, result_path: str) -> None:
309
+ """
310
+ Initialize collect workers.
311
+ :param fields: Names of dataframe columns where metadata collected.
312
+ :param substitute_with_species: Species that were used as substitutes.
313
+ :param ordered_representations_metadata_path: Path to ordered representations of disordered structures.
314
+ :param result_path: Path to the results' directory.
315
+ :return: None.
316
+ """
317
+
318
+ global fields_, substitute_with_species_, ordered_representations_metadata_path_, collect_worker_logger_
319
+ fields_ = fields
320
+ substitute_with_species_ = substitute_with_species
321
+ ordered_representations_metadata_path_ = ordered_representations_metadata_path
322
+ logger = get_collect_worker_logger(result_path)
323
+ collect_worker_logger_ = logger
324
+
325
+ @staticmethod
326
+ def _collect_data_one_composition(archive_path: str) -> None:
327
+ """
328
+ Collect meta-information about one particular composition.
329
+ :param archive_path: Path to archive containing ordered representations of disordered structure.
330
+ :return: None.
331
+ """
332
+
333
+ ordered_representations_metadata = {key: [] for key in fields_}
334
+ with zipfile.ZipFile(archive_path, "r") as archive:
335
+ for structure_filename in archive.namelist():
336
+ with archive.open(structure_filename, "r") as file:
337
+ file_data = file.read().decode("utf-8")
338
+ structure = CifParser.from_str(file_data).get_structures(primitive=False)[0]
339
+ finder = SpacegroupAnalyzer(structure)
340
+ specie_counter = Counter(map(str, structure.species))
341
+ ordered_representations_metadata["cif_data"].append(file_data)
342
+ ordered_representations_metadata["structure_filename"].append(structure_filename.replace(".zip", ""))
343
+ ordered_representations_metadata["composition"].append(str(structure.composition))
344
+ ordered_representations_metadata["space_group_no"].append(int(finder.get_space_group_number()))
345
+ ordered_representations_metadata["space_group_symbol"].append(finder.get_space_group_symbol())
346
+ ordered_representations_metadata["weight"].append(int(re.search(r"_w(.*?).cif", structure_filename).group(1)))
347
+ for specie in substitute_with_species_:
348
+ ordered_representations_metadata[f"{specie}_concentration"].append(specie_counter[specie] / len(structure))
349
+ ordered_representations_metadata_df = pd.DataFrame.from_dict(ordered_representations_metadata)
350
+ ordered_representations_metadata_path = os.path.join(ordered_representations_metadata_path_,
351
+ os.path.splitext(os.path.split(archive_path)[1])[0] + ".pkl.gz")
352
+ ordered_representations_metadata_df.to_pickle(ordered_representations_metadata_path)
353
+ collect_worker_logger_.info(
354
+ "%s - DONE! - The total number of structures: %d",
355
+ os.path.splitext(os.path.split(archive_path)[1])[0], ordered_representations_metadata_df.shape[0])
356
+
357
+ def collect_data_mp(self) -> None:
358
+ """
359
+ Collect meta-information about all ordered representations of disordered structures
360
+ and save it to pandas dataframes.
361
+ :return: None.
362
+ """
363
+
364
+ self.logger.info("Preparing to collect ordered representations' metadata ...")
365
+ substitute_with_species = tuple({subst['substitute_with'] for subst in self.config["substitution"]})
366
+ fields = ["cif_data", "structure_filename", "composition", "space_group_no", "space_group_symbol", "weight"]
367
+ for specie in substitute_with_species:
368
+ fields.append(f"{specie}_concentration")
369
+ fields = tuple(fields)
370
+ os.makedirs(self._ordered_representations_metadata_path)
371
+ archive_paths = [os.path.join(self._supercell_output_path, archive_filename)
372
+ for archive_filename in os.listdir(self._supercell_output_path)]
373
+ with (tqdm(range(len(os.listdir(self._supercell_output_path))),
374
+ desc="Collecting ordered representations' metadata of disordered structures",
375
+ unit=" composition",
376
+ ncols=200)
377
+ as pbar,
378
+ ProcessPoolExecutor(max_workers=self.config["num_workers"],
379
+ initializer=self._init_collect_worker,
380
+ initargs=(fields, substitute_with_species, self._ordered_representations_metadata_path, self._result_path))
381
+ as pool):
382
+ for archive_path in archive_paths:
383
+ future = pool.submit(self._collect_data_one_composition, archive_path)
384
+ future.add_done_callback(lambda p: pbar.update())
385
+ self.logger.info("Ordered representations' metadata are collected and saved at %s.",
386
+ self._ordered_representations_metadata_path)
@@ -0,0 +1,94 @@
1
+ import logging
2
+ import logging.config
3
+ import os
4
+ from multiprocessing import current_process
5
+
6
+
7
+ log_config = {
8
+ "version": 1,
9
+ "disable_existing_loggers": True,
10
+ "formatters": {
11
+ "console_formatter": {
12
+ "format": "%(levelname)s - %(message)s"
13
+ },
14
+ "file_formatter": {
15
+ "format": "[%(asctime)s] %(levelname)s - %(message)s",
16
+ "datefmt": "%Y-%m-%d %H:%M:%S"
17
+ }
18
+ },
19
+ "handlers": {
20
+ "console_handler": {
21
+ "level": "INFO",
22
+ "formatter": "console_formatter",
23
+ "class": "logging.StreamHandler",
24
+ "stream": "ext://sys.stdout",
25
+ },
26
+ "file_handler": {
27
+ "level": "DEBUG",
28
+ "formatter": "file_formatter",
29
+ "class": "logging.FileHandler",
30
+ "filename": "",
31
+ "mode": "a",
32
+ "encoding": "utf-8"
33
+ }
34
+ },
35
+ "loggers": {
36
+ "main": {
37
+ "handlers": ["console_handler", "file_handler"],
38
+ "level": "DEBUG",
39
+ "propagate": False
40
+ },
41
+ "supercell_worker": {
42
+ "handlers": ["file_handler"],
43
+ "level": "DEBUG",
44
+ "propagate": False
45
+ },
46
+ "collect_worker": {
47
+ "handlers": ["file_handler"],
48
+ "level": "DEBUG",
49
+ "propagate": False
50
+ }
51
+ }
52
+ }
53
+
54
+
55
+ def configure_logging(result_path: str, log_filename: str = "main") -> None:
56
+ """
57
+ Configure logging for the main process and the supercell and collect workers.
58
+ :param result_path: Logs will be saved in result_path/logs.
59
+ :param log_filename: Log filename.
60
+ :return: None.
61
+ """
62
+ os.makedirs(os.path.join(result_path, "logs"), exist_ok=True)
63
+ log_config["handlers"]["file_handler"]["filename"] = os.path.join(result_path, "logs", f"{log_filename}.log")
64
+ logging.config.dictConfig(log_config)
65
+
66
+
67
+ def get_main_logger(result_path: str) -> logging.Logger:
68
+ """
69
+ Get the main logger.
70
+ :param result_path: Logs will be saved in result_path/logs.
71
+ :return: Logger.
72
+ """
73
+ configure_logging(result_path)
74
+ return logging.getLogger("main")
75
+
76
+
77
+ def get_supercell_worker_logger(result_path: str) -> logging.Logger:
78
+ """
79
+ Get the supercell worker logger.
80
+ :param result_path: Logs will be saved in result_path/logs.
81
+ :return: Logger.
82
+ """
83
+ configure_logging(result_path, f"supercell_{current_process().name}")
84
+ return logging.getLogger("supercell_worker")
85
+
86
+
87
+ def get_collect_worker_logger(result_path: str) -> logging.Logger:
88
+ """
89
+ Get the collect worker logger.
90
+ :param result_path: Logs will be saved in result_path/logs.
91
+ :return: Logger.
92
+ """
93
+ configure_logging(result_path, f"collect_{current_process().name}")
94
+ return logging.getLogger("collect_worker")
csslib/utils.py ADDED
@@ -0,0 +1,101 @@
1
+ import matplotlib.pyplot as plt
2
+ import pandas as pd
3
+ import json
4
+ import re
5
+ import numpy as np
6
+ from pymatgen.symmetry.groups import SpaceGroup
7
+ from matplotlib.ticker import MaxNLocator
8
+ import networkx as nx
9
+
10
+
11
+ def _spaceGroupConventional(sg: str) -> str:
12
+ """
13
+ Convert a space group symbol to a conventional form.
14
+ :param sg: Space group symbol.
15
+ :return: Formatted space group symbol.
16
+ """
17
+
18
+ sg = re.sub(r"-\d", lambda x: "\\bar{" + x.group()[1:] + "}", sg)
19
+ return f"${sg}$"
20
+
21
+
22
+ def plot_group_subgroup_graph(css_df: pd.DataFrame, node_size: int = 1200) -> None:
23
+ """
24
+ Plot the group-subgroup graph.
25
+ :return: None.
26
+ """
27
+
28
+ with open("venv/Lib/site-packages/pymatgen/symmetry/symm_data.json", "r") as f:
29
+ symm_data = json.load(f)
30
+ symm_data_subg = symm_data["maximal_subgroups"]
31
+ symm_data_abbr = {v: k for k, v in symm_data["abbreviated_spacegroup_symbols"].items()}
32
+
33
+ sgs = sorted(css_df["space_group_no"].unique(), reverse=True)
34
+ sg_info = {sg: ((css_df["space_group_no"] == sg).sum(),
35
+ symm_data_abbr.get(SpaceGroup.from_int_number(sg).symbol,
36
+ SpaceGroup.from_int_number(sg).symbol))
37
+ for sg in sgs}
38
+ sg_info2 = {i[1]: i[0] for i in sg_info.values()}
39
+ label_map_black = {v[1]: _spaceGroupConventional(v[1]) + f"\n({k})" for k, v in sg_info.items() if
40
+ np.log10(v[0]) >= np.log10(max(sg_info2.values())) / 3}
41
+ label_map_white = {v[1]: _spaceGroupConventional(v[1]) + f"\n({k})" for k, v in sg_info.items() if
42
+ np.log10(v[0]) < np.log10(max(sg_info2.values())) / 3}
43
+
44
+ graph = nx.DiGraph()
45
+ for i in range(len(sgs)):
46
+ for j in range(len(sgs)):
47
+ if sgs[j] in symm_data_subg[str(sgs[i])] and i != j:
48
+ graph.add_edge(sg_info[sgs[i]][1], sg_info[sgs[j]][1])
49
+
50
+ not_connected_nodes = set(graph.nodes) - set([i[1] for i in graph.edges])
51
+ for node2 in not_connected_nodes:
52
+ for node1 in graph.nodes:
53
+ if SpaceGroup(node2).is_subgroup(SpaceGroup(node1)):
54
+ graph.add_edge(node1, node2)
55
+ break
56
+
57
+ nodes = [i for i in graph.nodes]
58
+ orders = np.array([SpaceGroup(nodes[i]).order for i in range(len(nodes))])
59
+ pos_x = [0] * len(nodes)
60
+ unique, counts = np.unique(orders, return_counts=True)
61
+ for count_pos in range(len(counts)):
62
+ for i in range(counts[count_pos]):
63
+ pos_x[np.where(orders == unique[count_pos])[0][i]] = (i + 1) / (counts[count_pos] + 1)
64
+ orders_unique = np.sort(np.unique(orders))
65
+ orders_dict = {orders_unique[i].item(): i for i in range(orders_unique.shape[0])}
66
+ pos = {nodes[i]: (pos_x[i], orders_dict[orders[i]]) for i in range(len(nodes))}
67
+
68
+ edges_curved = set() # It can be happened that some edges are not shown because of
69
+ # overlapping. One can curve them manually to avoid this.
70
+ edges_straight = set(graph.edges) - edges_curved
71
+
72
+ fig, ax = plt.subplots(figsize=(15, 10))
73
+ cmap = "viridis"
74
+ nx.draw_networkx_nodes(graph, pos, node_color=[np.log(sg_info2[i]) for i in graph.nodes], node_size=node_size,
75
+ edgecolors="black", linewidths=1, cmap=cmap, vmin=0,
76
+ vmax=np.log(max([i for i in sg_info2.values()])), ax=ax)
77
+ nx.draw_networkx_labels(graph, pos, labels=label_map_black, font_size=6, font_color="black")
78
+ nx.draw_networkx_labels(graph, pos, labels=label_map_white, font_size=6, font_color="white")
79
+ nx.draw_networkx_edges(graph, pos, edgelist=edges_straight, edge_color="grey", node_size=node_size, width=1,
80
+ arrowsize=12, ax=ax)
81
+ nx.draw_networkx_edges(graph, pos, edgelist=edges_curved, edge_color="grey", width=1, node_size=node_size,
82
+ arrowsize=12, connectionstyle='arc3, rad = -0.1', ax=ax)
83
+ ax.tick_params(left=True, labelleft=True)
84
+ ax.set_ylabel("Space group order", fontsize=12)
85
+ ax.set_yticks(range(orders_unique.shape[0]))
86
+ ax.set_yticklabels(list(map(str, orders_unique)), fontsize=12)
87
+ ax.yaxis.set_tick_params(labelsize=12)
88
+
89
+ cax = fig.add_axes([ax.get_position().x1 + 0.03, ax.get_position().y0 - 0.05, 0.02,
90
+ ax.get_position().y1 - ax.get_position().y0 + 0.1])
91
+
92
+ sm = plt.cm.ScalarMappable(cmap=cmap,
93
+ norm=plt.Normalize(vmin=0, vmax=np.log10(max([i for i in sg_info2.values()]))))
94
+ sm.set_array([])
95
+ cbar = fig.colorbar(sm, aspect=70, cax=cax)
96
+ cbar.ax.set_ylabel("lg (number of inequivalent structures)", fontsize=12)
97
+ cbar.ax.yaxis.set_tick_params(labelsize=12)
98
+ cbar.ax.yaxis.set_major_locator(MaxNLocator(integer=True))
99
+ cbar.outline.set_visible(False)
100
+ plt.tight_layout()
101
+ plt.show()
@@ -0,0 +1,49 @@
1
+ Metadata-Version: 2.4
2
+ Name: csslib
3
+ Version: 1.0
4
+ Summary: CSSlib is an open-source code for building configuration search space (CSS) of disordered crystals.
5
+ Author: A.V. Krautsou
6
+ Classifier: Programming Language :: Python :: 3
7
+ Classifier: Operating System :: OS Independent
8
+ Requires-Python: >=3.9
9
+ Description-Content-Type: text/markdown
10
+ Requires-Dist: jupyter==1.0.0
11
+ Requires-Dist: matplotlib==3.10.7
12
+ Requires-Dist: numpy==1.26.4
13
+ Requires-Dist: pandas==2.3.2
14
+ Requires-Dist: plotly==6.3.1
15
+ Requires-Dist: pymatgen==2025.6.14
16
+ Requires-Dist: pymatgen-analysis-defects==2025.1.18
17
+ Requires-Dist: scipy==1.15.3
18
+ Requires-Dist: tqdm==4.67.1
19
+ Requires-Dist: networkx==3.4.2
20
+
21
+ <p align="center">
22
+ <img src="./logo.jpg" width="20%" title="CSSlib" alt="CSSlib"/>
23
+ </p>
24
+
25
+ # CSSlib
26
+
27
+ CSSlib is an open-source code for building configuration search space (CSS) of disordered crystals.
28
+
29
+ Installation
30
+ -----
31
+ **CSSlib** requires **Supercell** program. Details on **Supercell** installation can be found at the corresponding [website](https://orex.github.io/supercell/download/).
32
+
33
+ Tutorial
34
+ -----
35
+ The best way to learn how to use **CSSlib** is through the [tutorial notebook](csslib_example.ipynb).
36
+
37
+ References & Citing
38
+ -----
39
+ If you use this code, please consider citing works that actively used the CSS approach, which resulted in the creation of this library:
40
+
41
+ 1. A.V. Krautsou, I.S. Humonen, V.D. Lazarev, R.A. Eremin, S.A. Budennyy<br/>
42
+ "Impact of crystal structure symmetry in training datasets on GNN-based energy assessments for chemically disordered CsPbI<sub>3</sub>"<br/>
43
+ https://doi.org/10.1038/s41598-025-92669-3
44
+ 2. N.A. Matsokin, R.A. Eremin, A.A. Kuznetsova, I.S. Humonen, A.V. Krautsou, V.D. Lazarev, Y.Z. Vassilyeva, A.Y. Pak, S.A. Budennyy, A.G. Kvashnin, A.A. Osiptsov<br/>
45
+ "Discovery of chemically modified higher tungsten boride by means of hybrid GNN/DFT approach"<br/>
46
+ https://doi.org/10.1038/s41524-025-01628-z
47
+ 3. R.A. Zaripov, R.A. Eremin, I.S. Humonen, A.V. Krautsou, V.V. Kuznetsov, K.E. GermanS, S.A. Budennyy, S.V. Levchenko</br>
48
+ "First-principles data-driven approach for assessment of stability of Tc-C systems"</br>
49
+ https://doi.org/10.1016/j.actamat.2025.121704
@@ -0,0 +1,7 @@
1
+ csslib/__init__.py,sha256=A9txZ15Pk-QcZJdxobZYc81RXa8EHPQ53cMdkr2dkqU,21221
2
+ csslib/config_logging.py,sha256=ozv8OmX3eZFUK6MpHru_ittTcohC7A0hbd-P7_qXNuc,2802
3
+ csslib/utils.py,sha256=-fTklp4Y5p5sShj_AI6lScAUm87OLNJjkR0CxzXSpEg,4766
4
+ csslib-1.0.dist-info/METADATA,sha256=5cARLFHgmzsXcBVvya8UoHUdz6syYyoPBf9832fnniY,2185
5
+ csslib-1.0.dist-info/WHEEL,sha256=YCfwYGOYMi5Jhw2fU4yNgwErybb2IX5PEwBKV4ZbdBo,91
6
+ csslib-1.0.dist-info/top_level.txt,sha256=EaL08DpIS7Uy37P8JopRxfEPhmlhn6T7SIgL5DITtwo,7
7
+ csslib-1.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (82.0.0)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1 @@
1
+ csslib