graph-id-core 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- graph_id_core-0.1.0/PKG-INFO +35 -0
- graph_id_core-0.1.0/README.md +17 -0
- graph_id_core-0.1.0/build.py +108 -0
- graph_id_core-0.1.0/graph_id/__init__.py +1 -0
- graph_id_core-0.1.0/graph_id/analysis/__init__.py +0 -0
- graph_id_core-0.1.0/graph_id/analysis/compositional_sequence.py +75 -0
- graph_id_core-0.1.0/graph_id/analysis/graphs.py +404 -0
- graph_id_core-0.1.0/graph_id/analysis/local_env.py +122 -0
- graph_id_core-0.1.0/graph_id/commands/gid.py +44 -0
- graph_id_core-0.1.0/graph_id/core/__init__.py +0 -0
- graph_id_core-0.1.0/graph_id/core/distance_clustering_graph_id.py +138 -0
- graph_id_core-0.1.0/graph_id/core/graph_id.py +209 -0
- graph_id_core-0.1.0/pyproject.toml +30 -0
- graph_id_core-0.1.0/setup.py +31 -0
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
|
+
Name: graph-id-core
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary:
|
|
5
|
+
Author: Koki Muraoka
|
|
6
|
+
Author-email: muraok_k@chemsys.t.u-tokyo.ac.jp
|
|
7
|
+
Requires-Python: >=3.10,<3.14
|
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
|
9
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
10
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
13
|
+
Requires-Dist: pybind11 (==2.11.1)
|
|
14
|
+
Requires-Dist: pymatgen (>=2025.4.20,<2026.0.0)
|
|
15
|
+
Requires-Dist: scikit-learn (>=0.24.1)
|
|
16
|
+
Description-Content-Type: text/markdown
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
# Graph ID
|
|
21
|
+
|
|
22
|
+
## Installation
|
|
23
|
+
### pypi
|
|
24
|
+
```
|
|
25
|
+
pip install graph-id-core
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
### GitHub
|
|
29
|
+
```
|
|
30
|
+
git clone https://github.com/kmu/graph-id-core.git
|
|
31
|
+
git submodule init
|
|
32
|
+
git submodule update
|
|
33
|
+
pip install -e .
|
|
34
|
+
```
|
|
35
|
+
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import re
|
|
3
|
+
import subprocess
|
|
4
|
+
import sys
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
from setuptools import Extension
|
|
8
|
+
from setuptools.command.build_ext import build_ext
|
|
9
|
+
|
|
10
|
+
# https://github.com/pybind/cmake_example/blob/master/setup.py
|
|
11
|
+
|
|
12
|
+
# Convert distutils Windows platform specifiers to CMake -A arguments
|
|
13
|
+
PLAT_TO_CMAKE = {
|
|
14
|
+
"win32": "Win32",
|
|
15
|
+
"win-amd64": "x64",
|
|
16
|
+
"win-arm32": "ARM",
|
|
17
|
+
"win-arm64": "ARM64",
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
# A CMakeExtension needs a sourcedir instead of a file list.
|
|
22
|
+
# The name must be the _single_ output extension from the CMake build.
|
|
23
|
+
# If you need multiple extensions, see scikit-build.
|
|
24
|
+
class CMakeExtension(Extension):
|
|
25
|
+
def __init__(self, name: str, sourcedir: str = "") -> None:
|
|
26
|
+
super().__init__(name, sources=[])
|
|
27
|
+
self.sourcedir = os.fspath(Path(sourcedir).resolve())
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class CMakeBuild(build_ext):
|
|
31
|
+
def build_extension(self, ext: CMakeExtension) -> None:
|
|
32
|
+
# Must be in this form due to bug in .resolve() only fixed in Python 3.10+
|
|
33
|
+
ext_fullpath = Path.cwd() / self.get_ext_fullpath(ext.name)
|
|
34
|
+
extdir = ext_fullpath.parent.resolve()
|
|
35
|
+
|
|
36
|
+
# Using this requires trailing slash for auto-detection & inclusion of
|
|
37
|
+
# auxiliary "native" libs
|
|
38
|
+
|
|
39
|
+
debug = int(os.environ.get("DEBUG", 0)) if self.debug is None else self.debug
|
|
40
|
+
cfg = "Debug" if debug else "Release"
|
|
41
|
+
|
|
42
|
+
# Set Python_EXECUTABLE instead if you use PYBIND11_FINDPYTHON
|
|
43
|
+
# EXAMPLE_VERSION_INFO shows you how to pass a value into the C++ code
|
|
44
|
+
# from Python.
|
|
45
|
+
cmake_args = [
|
|
46
|
+
f"-DCMAKE_LIBRARY_OUTPUT_DIRECTORY={extdir}{os.sep}",
|
|
47
|
+
f"-DCMAKE_LIBRARY_OUTPUT_DIRECTORY_RELEASE={extdir}{os.sep}",
|
|
48
|
+
f"-DPYTHON_EXECUTABLE={sys.executable}",
|
|
49
|
+
f"-DCMAKE_BUILD_TYPE={cfg}", # not used on MSVC, but no harm
|
|
50
|
+
]
|
|
51
|
+
build_args = []
|
|
52
|
+
# Adding CMake arguments set as environment variable
|
|
53
|
+
# (needed e.g. to build for ARM OSx on conda-forge)
|
|
54
|
+
if "CMAKE_ARGS" in os.environ:
|
|
55
|
+
cmake_args += [item for item in os.environ["CMAKE_ARGS"].split(" ") if item]
|
|
56
|
+
|
|
57
|
+
# Pass in the version to C++.
|
|
58
|
+
cmake_args += [f"-DVERSION_INFO={self.distribution.get_version()}"]
|
|
59
|
+
|
|
60
|
+
if sys.platform.startswith("win32"):
|
|
61
|
+
build_args += ["--config", cfg]
|
|
62
|
+
if sys.platform.startswith("darwin"):
|
|
63
|
+
# Cross-compile support for macOS - respect ARCHFLAGS if set
|
|
64
|
+
archs = re.findall(r"-arch (\S+)", os.environ.get("ARCHFLAGS", ""))
|
|
65
|
+
if archs:
|
|
66
|
+
cmake_args += ["-DCMAKE_OSX_ARCHITECTURES={}".format(";".join(archs))]
|
|
67
|
+
|
|
68
|
+
# Set CMAKE_BUILD_PARALLEL_LEVEL to control the parallel build level
|
|
69
|
+
# across all generators.
|
|
70
|
+
if "CMAKE_BUILD_PARALLEL_LEVEL" not in os.environ:
|
|
71
|
+
# self.parallel is a Python 3 only way to set parallel jobs by hand
|
|
72
|
+
# using -j in the build_ext call, not supported by pip or PyPA-build.
|
|
73
|
+
if hasattr(self, "parallel") and self.parallel:
|
|
74
|
+
# CMake 3.12+ only.
|
|
75
|
+
build_args += [f"-j{self.parallel}"]
|
|
76
|
+
|
|
77
|
+
build_temp = Path(self.build_temp) / ext.name
|
|
78
|
+
if not build_temp.exists():
|
|
79
|
+
build_temp.mkdir(parents=True)
|
|
80
|
+
env = {**os.environ}
|
|
81
|
+
if "PYTHONPATH" in env:
|
|
82
|
+
# Google Colab など CMake が pip でインストールされている環境で、import cmake できなくなり build が失敗する
|
|
83
|
+
del env["PYTHONPATH"]
|
|
84
|
+
subprocess.run(
|
|
85
|
+
["cmake", ext.sourcedir, *cmake_args],
|
|
86
|
+
cwd=build_temp,
|
|
87
|
+
check=True,
|
|
88
|
+
env=env,
|
|
89
|
+
)
|
|
90
|
+
subprocess.run(
|
|
91
|
+
["cmake", "--build", ".", *build_args],
|
|
92
|
+
cwd=build_temp,
|
|
93
|
+
check=True,
|
|
94
|
+
env=env,
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def build(setup_kwargs):
|
|
99
|
+
ext_modules = [
|
|
100
|
+
CMakeExtension("graph_id_cpp"),
|
|
101
|
+
]
|
|
102
|
+
setup_kwargs.update(
|
|
103
|
+
{
|
|
104
|
+
"ext_modules": ext_modules,
|
|
105
|
+
"cmdclass": {"build_ext": CMakeBuild},
|
|
106
|
+
"zip_safe": False,
|
|
107
|
+
}
|
|
108
|
+
)
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from graph_id.core.graph_id import GraphIDGenerator # noqa
|
|
File without changes
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
from collections import Counter
|
|
2
|
+
from hashlib import blake2b
|
|
3
|
+
from typing import List
|
|
4
|
+
|
|
5
|
+
from pymatgen.core.structure import Neighbor
|
|
6
|
+
from pymatgen.util.string import formula_double_format
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def blake(s):
|
|
10
|
+
return blake2b(s.encode()).hexdigest()
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class CompositionalSequence:
|
|
14
|
+
def __init__(self, focused_site_i, starting_labels, hash_cs=False, use_previous_cs=False):
|
|
15
|
+
self.hash_cs = hash_cs
|
|
16
|
+
if hash_cs:
|
|
17
|
+
self.cs_for_hashing = ""
|
|
18
|
+
else:
|
|
19
|
+
self.compositional_seq = []
|
|
20
|
+
|
|
21
|
+
self.focused_site_i = focused_site_i
|
|
22
|
+
self.new_sites = [(focused_site_i, (0, 0, 0))]
|
|
23
|
+
|
|
24
|
+
self.seen_sites = set(self.new_sites)
|
|
25
|
+
self.use_previous_cs = use_previous_cs
|
|
26
|
+
self.labels = starting_labels
|
|
27
|
+
self.composition_counter: Counter = Counter()
|
|
28
|
+
self.first_element = starting_labels[focused_site_i]
|
|
29
|
+
|
|
30
|
+
def __str__(self):
|
|
31
|
+
if self.hash_cs:
|
|
32
|
+
return f"{self.first_element}-{self.cs_for_hashing}" # type: ignore
|
|
33
|
+
|
|
34
|
+
else:
|
|
35
|
+
return f"{self.first_element}-{'-'.join(self.compositional_seq)}" # type: ignore
|
|
36
|
+
|
|
37
|
+
def get_current_starting_sites(self):
|
|
38
|
+
new_sites = self.new_sites
|
|
39
|
+
self.new_sites = []
|
|
40
|
+
return [*new_sites]
|
|
41
|
+
|
|
42
|
+
def count_composition_for_neighbors(
|
|
43
|
+
self,
|
|
44
|
+
nsites: List[Neighbor],
|
|
45
|
+
# graph: nx.Graph,
|
|
46
|
+
# labels: List[str],
|
|
47
|
+
) -> None:
|
|
48
|
+
|
|
49
|
+
for neighbor in nsites:
|
|
50
|
+
neighbor_info = (neighbor.index, neighbor.jimage)
|
|
51
|
+
|
|
52
|
+
if neighbor_info not in self.seen_sites:
|
|
53
|
+
|
|
54
|
+
self.seen_sites.add(neighbor_info)
|
|
55
|
+
|
|
56
|
+
self.new_sites.append(neighbor_info)
|
|
57
|
+
|
|
58
|
+
if self.use_previous_cs:
|
|
59
|
+
cs = self.labels[neighbor.index]
|
|
60
|
+
self.composition_counter[cs] += 1
|
|
61
|
+
else:
|
|
62
|
+
self.composition_counter[self.labels[neighbor.index]] += 1
|
|
63
|
+
|
|
64
|
+
def finalize_this_depth(self):
|
|
65
|
+
formula = self.get_sorted_composition_list_from(self.composition_counter)
|
|
66
|
+
|
|
67
|
+
if self.hash_cs:
|
|
68
|
+
self.cs_for_hashing = blake(f"{self.cs_for_hashing}-{''.join(formula)}")
|
|
69
|
+
else:
|
|
70
|
+
self.compositional_seq.append("".join(formula))
|
|
71
|
+
|
|
72
|
+
def get_sorted_composition_list_from(self, composition_counter: Counter) -> List[str]:
|
|
73
|
+
sorted_symbols = sorted(composition_counter.keys())
|
|
74
|
+
formula = [s + str(formula_double_format(composition_counter[s], False)) for s in sorted_symbols]
|
|
75
|
+
return formula
|
|
@@ -0,0 +1,404 @@
|
|
|
1
|
+
import functools
|
|
2
|
+
|
|
3
|
+
import networkx as nx
|
|
4
|
+
import numpy as np
|
|
5
|
+
from graph_id.analysis.compositional_sequence import CompositionalSequence
|
|
6
|
+
from networkx.algorithms.distance_measures import diameter
|
|
7
|
+
from pymatgen.analysis.graphs import StructureGraph as PmgStructureGraph
|
|
8
|
+
from pymatgen.core import Element
|
|
9
|
+
from pymatgen.symmetry.analyzer import SpacegroupAnalyzer
|
|
10
|
+
from itertools import combinations
|
|
11
|
+
from hashlib import blake2b
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def standardize_loop(lst):
|
|
15
|
+
lst2 = list(reversed(lst))
|
|
16
|
+
starting_point = lst2.pop(-1)
|
|
17
|
+
lst2.insert(0, starting_point)
|
|
18
|
+
|
|
19
|
+
return sorted([lst, lst2], key=lambda x: "".join(x))[-1]
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class SiteOnlySpeciesString:
|
|
23
|
+
def __init__(self, species_string):
|
|
24
|
+
self.species_string = species_string
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class ConnectedSiteLight:
|
|
28
|
+
def __init__(
|
|
29
|
+
self,
|
|
30
|
+
site,
|
|
31
|
+
jimage,
|
|
32
|
+
index,
|
|
33
|
+
weight,
|
|
34
|
+
dist,
|
|
35
|
+
):
|
|
36
|
+
self.site = SiteOnlySpeciesString(site.species_string)
|
|
37
|
+
self.jimage = jimage
|
|
38
|
+
self.index = index
|
|
39
|
+
self.weight = weight
|
|
40
|
+
self.dist = dist
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class StructureGraph(PmgStructureGraph): # type: ignore
|
|
44
|
+
@staticmethod
|
|
45
|
+
def from_pymatgen_structure_graph(sg: PmgStructureGraph):
|
|
46
|
+
graph_data = sg.as_dict()["graphs"]
|
|
47
|
+
|
|
48
|
+
return StructureGraph(sg.structure, graph_data)
|
|
49
|
+
|
|
50
|
+
# Copied from original pymatgen with modifications
|
|
51
|
+
@staticmethod
|
|
52
|
+
def with_local_env_strategy(structure, strategy, weights=False):
|
|
53
|
+
"""
|
|
54
|
+
Constructor for StructureGraph, using a strategy
|
|
55
|
+
from :Class: `pymatgen.analysis.local_env`.
|
|
56
|
+
|
|
57
|
+
:param structure: Structure object
|
|
58
|
+
:param strategy: an instance of a
|
|
59
|
+
:Class: `pymatgen.analysis.local_env.NearNeighbors` object
|
|
60
|
+
:param weights: if True, use weights from local_env class
|
|
61
|
+
(consult relevant class for their meaning)
|
|
62
|
+
:return:
|
|
63
|
+
"""
|
|
64
|
+
|
|
65
|
+
if not strategy.structures_allowed:
|
|
66
|
+
raise ValueError(
|
|
67
|
+
"Chosen strategy is not designed for use with structures! " "Please choose another strategy."
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
sg = StructureGraph.from_empty_graph(structure, name="bonds")
|
|
71
|
+
|
|
72
|
+
for n, neighbors in enumerate(strategy.get_all_nn_info(structure)):
|
|
73
|
+
for neighbor in neighbors:
|
|
74
|
+
# local_env will always try to add two edges
|
|
75
|
+
# for any one bond, one from site u to site v
|
|
76
|
+
# and another form site v to site u: this is
|
|
77
|
+
# harmless, so warn_duplicates=False
|
|
78
|
+
sg.add_edge(
|
|
79
|
+
from_index=n,
|
|
80
|
+
from_jimage=(0, 0, 0),
|
|
81
|
+
to_index=neighbor["site_index"],
|
|
82
|
+
to_jimage=neighbor["image"],
|
|
83
|
+
weight=neighbor["weight"] if weights else None,
|
|
84
|
+
warn_duplicates=False,
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
return sg
|
|
88
|
+
|
|
89
|
+
@staticmethod
|
|
90
|
+
def with_indivisual_state_comp_strategy(structure, strategy, _sg, n, weights=False, rank_k=1, cutoff=6.0):
|
|
91
|
+
"""
|
|
92
|
+
Constructor for StructureGraph, using a StateCompNN strategy
|
|
93
|
+
from :Class: `chemsys.pymatgen.analysis.local_env`.
|
|
94
|
+
:param structure: Structure object
|
|
95
|
+
:param strategy: an instance of StateCompNN
|
|
96
|
+
:param n: (int) an index of focused site
|
|
97
|
+
:param weights: if True, use weights from local_env class
|
|
98
|
+
(consult relevant class for their meaning)
|
|
99
|
+
:rank_k: (int) cluster_idx
|
|
100
|
+
:cutoff: (float)
|
|
101
|
+
:return:
|
|
102
|
+
"""
|
|
103
|
+
|
|
104
|
+
if not strategy.structures_allowed:
|
|
105
|
+
raise ValueError( # noqa: TRY003
|
|
106
|
+
"Chosen strategy is not designed for use with structures!", # noqa: EM101
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
nn_info = strategy.get_nn_info(structure, n, rank_k, cutoff)
|
|
110
|
+
|
|
111
|
+
for neighbor in nn_info:
|
|
112
|
+
# local_env will always try to add two edges
|
|
113
|
+
# for any one bond, one from site u to site v
|
|
114
|
+
# and another form site v to site u: this is
|
|
115
|
+
# harmless, so warn_duplicates=False
|
|
116
|
+
_sg.add_edge(
|
|
117
|
+
from_index=n,
|
|
118
|
+
from_jimage=(0, 0, 0),
|
|
119
|
+
to_index=neighbor["site_index"],
|
|
120
|
+
to_jimage=neighbor["image"],
|
|
121
|
+
weight=neighbor["weight"] if weights else None,
|
|
122
|
+
warn_duplicates=False,
|
|
123
|
+
edge_properties=neighbor["edge_properties"],
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
return _sg
|
|
127
|
+
|
|
128
|
+
def set_elemental_labels(self):
|
|
129
|
+
self.starting_labels = [site.species_string for site in self.structure]
|
|
130
|
+
|
|
131
|
+
def get_connected_sites_light(self, n, jimage=(0, 0, 0)):
|
|
132
|
+
"""
|
|
133
|
+
A light version of get_connected_sites.
|
|
134
|
+
periodic_site -> SiteOnlySpeciesString
|
|
135
|
+
"""
|
|
136
|
+
|
|
137
|
+
connected_sites = set()
|
|
138
|
+
connected_site_images = set()
|
|
139
|
+
|
|
140
|
+
out_edges = [(u, v, d, "out") for u, v, d in self.graph.out_edges(n, data=True)]
|
|
141
|
+
in_edges = [(u, v, d, "in") for u, v, d in self.graph.in_edges(n, data=True)]
|
|
142
|
+
|
|
143
|
+
for u, v, d, dir in out_edges + in_edges:
|
|
144
|
+
|
|
145
|
+
to_jimage = d["to_jimage"]
|
|
146
|
+
|
|
147
|
+
if dir == "in":
|
|
148
|
+
u, v = v, u
|
|
149
|
+
to_jimage = np.multiply(-1, to_jimage)
|
|
150
|
+
|
|
151
|
+
to_jimage = tuple(map(int, np.add(to_jimage, jimage)))
|
|
152
|
+
|
|
153
|
+
if (v, to_jimage) not in connected_site_images:
|
|
154
|
+
connected_site = ConnectedSiteLight(
|
|
155
|
+
site=self.structure[v], jimage=to_jimage, index=v, weight=None, dist=None
|
|
156
|
+
)
|
|
157
|
+
|
|
158
|
+
connected_sites.add(connected_site)
|
|
159
|
+
connected_site_images.add((v, to_jimage))
|
|
160
|
+
|
|
161
|
+
_connected_sites = list(connected_sites)
|
|
162
|
+
|
|
163
|
+
return _connected_sites
|
|
164
|
+
|
|
165
|
+
def set_wyckoffs(self, symmetry_tol: float = 0.1) -> None:
|
|
166
|
+
siteless_strc = self.structure.copy()
|
|
167
|
+
|
|
168
|
+
for site_i in range(len(self.structure)):
|
|
169
|
+
siteless_strc.replace(site_i, Element("H"))
|
|
170
|
+
|
|
171
|
+
sga = SpacegroupAnalyzer(siteless_strc)
|
|
172
|
+
sym_dataset = sga.get_symmetry_dataset()
|
|
173
|
+
|
|
174
|
+
if sym_dataset is None:
|
|
175
|
+
self.set_elemental_labels()
|
|
176
|
+
return None
|
|
177
|
+
|
|
178
|
+
wyckoffs = sym_dataset["wyckoffs"]
|
|
179
|
+
number = sym_dataset["number"]
|
|
180
|
+
|
|
181
|
+
attribute_values = {}
|
|
182
|
+
|
|
183
|
+
self.starting_labels = []
|
|
184
|
+
for site_i, w in enumerate(wyckoffs):
|
|
185
|
+
attribute_values[site_i] = f"{self.structure[site_i].species_string}_{w}_{number}"
|
|
186
|
+
self.starting_labels.append(f"{self.structure[site_i].species_string}_{w}_{number}")
|
|
187
|
+
|
|
188
|
+
def set_compositional_sequence_node_attr(
|
|
189
|
+
self,
|
|
190
|
+
hash_cs: bool = False,
|
|
191
|
+
wyckoff: bool = False,
|
|
192
|
+
additional_depth: int = 0,
|
|
193
|
+
depth_factor: int = 2,
|
|
194
|
+
use_previous_cs: bool = False,
|
|
195
|
+
) -> None:
|
|
196
|
+
|
|
197
|
+
node_attributes = {}
|
|
198
|
+
self.cc_cs = []
|
|
199
|
+
get_connected_sites_light = functools.lru_cache(maxsize=None)(self.get_connected_sites_light)
|
|
200
|
+
|
|
201
|
+
ug = self.graph.to_undirected()
|
|
202
|
+
|
|
203
|
+
for cc in nx.connected_components(ug):
|
|
204
|
+
cs_list = []
|
|
205
|
+
|
|
206
|
+
d = diameter(ug.subgraph(cc))
|
|
207
|
+
|
|
208
|
+
for focused_site_i in cc:
|
|
209
|
+
|
|
210
|
+
depth = depth_factor * d + additional_depth
|
|
211
|
+
|
|
212
|
+
cs = CompositionalSequence(
|
|
213
|
+
focused_site_i=focused_site_i,
|
|
214
|
+
starting_labels=self.starting_labels,
|
|
215
|
+
hash_cs=hash_cs,
|
|
216
|
+
use_previous_cs=use_previous_cs or wyckoff,
|
|
217
|
+
)
|
|
218
|
+
|
|
219
|
+
for _ in range(depth):
|
|
220
|
+
for c_site in cs.get_current_starting_sites():
|
|
221
|
+
nsites = get_connected_sites_light(c_site[0], c_site[1])
|
|
222
|
+
cs.count_composition_for_neighbors(nsites)
|
|
223
|
+
|
|
224
|
+
cs.finalize_this_depth()
|
|
225
|
+
|
|
226
|
+
this_cs = str(cs)
|
|
227
|
+
|
|
228
|
+
node_attributes[focused_site_i] = self.starting_labels[focused_site_i] + "_" + this_cs
|
|
229
|
+
cs_list.append(this_cs)
|
|
230
|
+
|
|
231
|
+
self.cc_cs.append({"site_i": cc, "cs_list": cs_list})
|
|
232
|
+
|
|
233
|
+
nx.set_node_attributes(self.graph, values=node_attributes, name="compositional_sequence")
|
|
234
|
+
|
|
235
|
+
def get_loops(self, depth: int, index: int, shortest: bool = True): # noqa: C901
|
|
236
|
+
"""
|
|
237
|
+
各原子を起点としてループを計算し、そのインデックス情報を返す。
|
|
238
|
+
|
|
239
|
+
Parameters:
|
|
240
|
+
indices: ループの起点としたいインデックス
|
|
241
|
+
depth: ループの最大の大きさ
|
|
242
|
+
|
|
243
|
+
Returns:
|
|
244
|
+
[[(index, image), ...], ...]
|
|
245
|
+
"""
|
|
246
|
+
|
|
247
|
+
get_connected_sites = functools.lru_cache(maxsize=None)(self.get_connected_sites)
|
|
248
|
+
|
|
249
|
+
def find_all_rings(index, ring_list):
|
|
250
|
+
neighbors = get_connected_sites(index, (0, 0, 0))
|
|
251
|
+
for n0, n1 in combinations(neighbors, 2):
|
|
252
|
+
found = False
|
|
253
|
+
for ring in ring_list:
|
|
254
|
+
term0 = ring[1]
|
|
255
|
+
term1 = ring[-2]
|
|
256
|
+
|
|
257
|
+
if all(
|
|
258
|
+
(
|
|
259
|
+
n0.index == term0[0],
|
|
260
|
+
n0.jimage == term0[1],
|
|
261
|
+
n1.index == term1[0],
|
|
262
|
+
n1.jimage == term1[1],
|
|
263
|
+
),
|
|
264
|
+
):
|
|
265
|
+
found = True
|
|
266
|
+
break
|
|
267
|
+
|
|
268
|
+
if all(
|
|
269
|
+
(
|
|
270
|
+
n1.index == term0[0],
|
|
271
|
+
n1.jimage == term0[1],
|
|
272
|
+
n0.index == term1[0],
|
|
273
|
+
n0.jimage == term1[1],
|
|
274
|
+
),
|
|
275
|
+
):
|
|
276
|
+
found = True
|
|
277
|
+
break
|
|
278
|
+
|
|
279
|
+
if found is False:
|
|
280
|
+
return False
|
|
281
|
+
|
|
282
|
+
return True
|
|
283
|
+
|
|
284
|
+
def get_further_lines_from_lines(lines):
|
|
285
|
+
new_lines = []
|
|
286
|
+
for line in lines:
|
|
287
|
+
ind, image = line[-1]
|
|
288
|
+
neighbors = get_connected_sites(ind, image)
|
|
289
|
+
|
|
290
|
+
for n in neighbors:
|
|
291
|
+
new_line = [*line, (n.index, n.jimage)]
|
|
292
|
+
|
|
293
|
+
# 戻らない場合のみ。
|
|
294
|
+
if len(new_line[:-1]) == len(set(new_line[:-1])):
|
|
295
|
+
new_lines.append(new_line)
|
|
296
|
+
|
|
297
|
+
return new_lines
|
|
298
|
+
|
|
299
|
+
lines = []
|
|
300
|
+
lines.append([(index, (0, 0, 0))])
|
|
301
|
+
|
|
302
|
+
ring_list = []
|
|
303
|
+
|
|
304
|
+
for depth_i in range(depth):
|
|
305
|
+
next_lines = []
|
|
306
|
+
lines = get_further_lines_from_lines(lines)
|
|
307
|
+
|
|
308
|
+
for line in lines:
|
|
309
|
+
# 前と後ろが同じ
|
|
310
|
+
if line[0] == line[-1]:
|
|
311
|
+
if depth_i > 1 and list(reversed(line)) not in ring_list:
|
|
312
|
+
ring_list.append(line)
|
|
313
|
+
else:
|
|
314
|
+
next_lines.append(line)
|
|
315
|
+
|
|
316
|
+
lines = next_lines
|
|
317
|
+
|
|
318
|
+
# ここで理論上の値に達したら探索を打ち切る
|
|
319
|
+
if shortest and find_all_rings(index, ring_list):
|
|
320
|
+
return ring_list
|
|
321
|
+
|
|
322
|
+
return list(ring_list)
|
|
323
|
+
|
|
324
|
+
def set_loops(self, depth_factor: int, additional_depth: int) -> None:
|
|
325
|
+
self.starting_labels = []
|
|
326
|
+
|
|
327
|
+
undirected_graph = self.graph.to_undirected()
|
|
328
|
+
|
|
329
|
+
max_diameter = 0
|
|
330
|
+
for cc in nx.connected_components(undirected_graph):
|
|
331
|
+
d = diameter(undirected_graph.subgraph(cc))
|
|
332
|
+
if d > max_diameter:
|
|
333
|
+
max_diameter = d
|
|
334
|
+
|
|
335
|
+
depth = max_diameter * depth_factor + additional_depth
|
|
336
|
+
|
|
337
|
+
for site_i in range(len(self.graph.nodes)):
|
|
338
|
+
all_loops = self.get_loops(depth=depth, index=site_i)
|
|
339
|
+
all_loop_strings = []
|
|
340
|
+
# print(all_loops)
|
|
341
|
+
for loop in all_loops:
|
|
342
|
+
loop_elements = []
|
|
343
|
+
for site_i_jimage in loop:
|
|
344
|
+
loop_species_string = self.structure[site_i_jimage[0]].species_string
|
|
345
|
+
# print(loop_species_string)
|
|
346
|
+
loop_elements.append(loop_species_string)
|
|
347
|
+
|
|
348
|
+
loop_elements = standardize_loop(loop_elements)
|
|
349
|
+
|
|
350
|
+
seed_str = "-".join(loop_elements)
|
|
351
|
+
hashed_loop = blake2b(seed_str.encode(), digest_size=8).hexdigest()
|
|
352
|
+
|
|
353
|
+
all_loop_strings.append(hashed_loop)
|
|
354
|
+
|
|
355
|
+
seed_str_all_loops = ":".join(sorted(all_loop_strings))
|
|
356
|
+
hashed_all_loops = blake2b(seed_str_all_loops.encode(), digest_size=8).hexdigest()
|
|
357
|
+
|
|
358
|
+
self.starting_labels.append(hashed_all_loops)
|
|
359
|
+
|
|
360
|
+
def set_indivisual_compositional_sequence_node_attr(
|
|
361
|
+
self,
|
|
362
|
+
n: int,
|
|
363
|
+
hash_cs: bool = False,
|
|
364
|
+
wyckoff: bool = False,
|
|
365
|
+
additional_depth: int = 0,
|
|
366
|
+
depth_factor: int = 2,
|
|
367
|
+
use_previous_cs: bool = False,
|
|
368
|
+
) -> None:
|
|
369
|
+
node_attributes = {}
|
|
370
|
+
self.cc_cs = []
|
|
371
|
+
get_connected_sites_light = functools.lru_cache(maxsize=None)(self.get_connected_sites_light)
|
|
372
|
+
|
|
373
|
+
ug = self.graph.to_undirected()
|
|
374
|
+
|
|
375
|
+
for cc in nx.connected_components(ug):
|
|
376
|
+
cs_list = []
|
|
377
|
+
|
|
378
|
+
d = diameter(ug.subgraph(cc))
|
|
379
|
+
|
|
380
|
+
if n in cc:
|
|
381
|
+
depth = depth_factor * d + additional_depth
|
|
382
|
+
|
|
383
|
+
cs = CompositionalSequence(
|
|
384
|
+
focused_site_i=n,
|
|
385
|
+
starting_labels=self.starting_labels,
|
|
386
|
+
hash_cs=hash_cs,
|
|
387
|
+
use_previous_cs=use_previous_cs or wyckoff,
|
|
388
|
+
)
|
|
389
|
+
|
|
390
|
+
for _this_depth in range(depth):
|
|
391
|
+
for c_site in cs.get_current_starting_sites():
|
|
392
|
+
nsites = get_connected_sites_light(c_site[0], c_site[1])
|
|
393
|
+
cs.count_composition_for_neighbors(nsites)
|
|
394
|
+
|
|
395
|
+
cs.finalize_this_depth()
|
|
396
|
+
|
|
397
|
+
this_cs = str(cs)
|
|
398
|
+
|
|
399
|
+
node_attributes[n] = self.starting_labels[n] + "_" + this_cs
|
|
400
|
+
cs_list.append(this_cs)
|
|
401
|
+
|
|
402
|
+
self.cc_cs.append({"site_i": cc, "cs_list": cs_list})
|
|
403
|
+
|
|
404
|
+
nx.set_node_attributes(self.graph, values=node_attributes, name="compositional_sequence")
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Any
|
|
4
|
+
|
|
5
|
+
import numpy as np
|
|
6
|
+
from pymatgen.analysis.local_env import NearNeighbors
|
|
7
|
+
from pymatgen.core import IStructure, Structure
|
|
8
|
+
from sklearn.cluster import DBSCAN
|
|
9
|
+
|
|
10
|
+
def _get_original_site(structure, site):
|
|
11
|
+
"""Private convenience method for get_nn_info,
|
|
12
|
+
gives original site index from ProvidedPeriodicSite.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
if isinstance(structure, IStructure | Structure):
|
|
16
|
+
site_fcoords = site.frac_coords
|
|
17
|
+
strc_fcoords = structure.frac_coords
|
|
18
|
+
tol = 1e-8 # threshold in Site.is_periodic_image
|
|
19
|
+
# sort to reduce the iteration
|
|
20
|
+
nearest_i = np.argsort(-(np.abs(strc_fcoords - site_fcoords) < tol).sum(axis=1))
|
|
21
|
+
|
|
22
|
+
for i in nearest_i:
|
|
23
|
+
if site.is_periodic_image(structure[i]):
|
|
24
|
+
return i
|
|
25
|
+
else:
|
|
26
|
+
for i, s in enumerate(structure):
|
|
27
|
+
if site == s:
|
|
28
|
+
return i
|
|
29
|
+
raise Exception("Site not found!") # noqa: TRY002, TRY003, EM101
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class DistanceClusteringNN(NearNeighbors):
|
|
33
|
+
# 結合長のクラスタリングによって原子に近いクラスター順に番号を振る
|
|
34
|
+
# その番号と元素記号を使ってGraph IDを計算する
|
|
35
|
+
def __init__(self) -> None:
|
|
36
|
+
""" """
|
|
37
|
+
|
|
38
|
+
@property
|
|
39
|
+
def structures_allowed(self) -> bool:
|
|
40
|
+
"""
|
|
41
|
+
Boolean property: can this NearNeighbors class be used with Structure
|
|
42
|
+
objects?
|
|
43
|
+
"""
|
|
44
|
+
return True
|
|
45
|
+
|
|
46
|
+
def get_nn_info(self, structure: Structure, n: int, rank_k: int, cutoff: float = 6.0) -> list[dict[str, Any]]:
|
|
47
|
+
"""
|
|
48
|
+
Args:
|
|
49
|
+
structure (Structure): input structure.
|
|
50
|
+
n (int): index of site for which to determine near
|
|
51
|
+
neighbors.
|
|
52
|
+
cutoff (float): distance cutoff parameter.
|
|
53
|
+
Returns:
|
|
54
|
+
siw (list[dict]): dicts with (Site, array, float) each one of which represents a
|
|
55
|
+
neighbor site, its image location, and its weight.
|
|
56
|
+
"""
|
|
57
|
+
|
|
58
|
+
site = structure[n]
|
|
59
|
+
cutoff_cluster_list = self.get_cutoff_cluster(structure, n, cutoff)
|
|
60
|
+
if len(cutoff_cluster_list) <= rank_k:
|
|
61
|
+
return []
|
|
62
|
+
|
|
63
|
+
neighs_dists = structure.get_neighbors(site, cutoff_cluster_list[rank_k])
|
|
64
|
+
max_weight = round(cutoff_cluster_list[rank_k], 3)
|
|
65
|
+
# is_periodic = isinstance(structure, Structure | IStructure) # Python 3.10 以降でのみサポート
|
|
66
|
+
is_periodic = isinstance(structure, Structure) or isinstance(structure, IStructure)
|
|
67
|
+
siw = []
|
|
68
|
+
|
|
69
|
+
for nn in neighs_dists:
|
|
70
|
+
weight = round(nn.nn_distance, 3)
|
|
71
|
+
if (rank_k > 0 and weight <= max_weight and weight > round(cutoff_cluster_list[rank_k - 1], 3)) or (
|
|
72
|
+
rank_k == 0 and weight <= max_weight
|
|
73
|
+
):
|
|
74
|
+
siw.append(
|
|
75
|
+
{
|
|
76
|
+
"site": nn,
|
|
77
|
+
"image": self._get_image(structure, nn) if is_periodic else None,
|
|
78
|
+
"weight": weight,
|
|
79
|
+
"site_index": self._get_original_site(structure, nn),
|
|
80
|
+
"edge_properties": {"cluster_idx": rank_k + 1},
|
|
81
|
+
},
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
return siw
|
|
85
|
+
|
|
86
|
+
def get_cutoff_cluster(self, structure: Structure, n: int, cutoff: float = 6.0) -> list:
|
|
87
|
+
"""
|
|
88
|
+
DBSCANによって得られた距離のクラスターから結合判定の閾値を決定する
|
|
89
|
+
"""
|
|
90
|
+
|
|
91
|
+
# # スーパーセルを作成し、6.0angまでの結合長を数え上げる
|
|
92
|
+
# copy_structure = structure.copy()
|
|
93
|
+
# supercell = copy_structure.make_supercell([3, 3, 3])
|
|
94
|
+
# site_i = structure[n]
|
|
95
|
+
|
|
96
|
+
# site_index = None
|
|
97
|
+
# for idx, site in enumerate(supercell):
|
|
98
|
+
# # Siteのdistanceメソッドを使うとなぜか正しく距離が計算されない
|
|
99
|
+
# if float(np.linalg.norm(site_i.coords - site.coords)) < 0.01:
|
|
100
|
+
# site_index = idx
|
|
101
|
+
# break
|
|
102
|
+
|
|
103
|
+
distance_list = []
|
|
104
|
+
neighbors = structure.get_sites_in_sphere(structure[n].coords, cutoff)
|
|
105
|
+
for neighbor in neighbors:
|
|
106
|
+
dist = neighbor.nn_distance
|
|
107
|
+
distance_list.append([dist, 0])
|
|
108
|
+
|
|
109
|
+
dbscan = DBSCAN(eps=0.5, min_samples=2)
|
|
110
|
+
dbscan.fit(distance_list)
|
|
111
|
+
labels = dbscan.labels_
|
|
112
|
+
|
|
113
|
+
max_dist_list = [0 for _ in range(max(labels) + 1)]
|
|
114
|
+
for label_number in range(max(labels) + 1):
|
|
115
|
+
max_dist = 0
|
|
116
|
+
for label, distance in zip(labels, distance_list):
|
|
117
|
+
if label == label_number:
|
|
118
|
+
max_dist = max(max_dist, distance[0])
|
|
119
|
+
|
|
120
|
+
max_dist_list[label_number] = max_dist
|
|
121
|
+
|
|
122
|
+
return sorted(max_dist_list)
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
import argparse
|
|
2
|
+
import csv
|
|
3
|
+
from datetime import datetime
|
|
4
|
+
|
|
5
|
+
from graph_id import GraphIDGenerator
|
|
6
|
+
from pymatgen.core import Structure
|
|
7
|
+
from tabulate import tabulate
|
|
8
|
+
|
|
9
|
+
if __name__ == "__main__":
|
|
10
|
+
parser = argparse.ArgumentParser(description="Graph ID: graph-based ID for materails")
|
|
11
|
+
|
|
12
|
+
parser.add_argument(
|
|
13
|
+
dest="filenames",
|
|
14
|
+
metavar="filename",
|
|
15
|
+
nargs="+",
|
|
16
|
+
help="List of structure files.",
|
|
17
|
+
default=[],
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
parser.add_argument("-p", "--parallel", help="parallel execution", action="store_true")
|
|
21
|
+
|
|
22
|
+
gid = GraphIDGenerator()
|
|
23
|
+
args = parser.parse_args()
|
|
24
|
+
|
|
25
|
+
table = []
|
|
26
|
+
|
|
27
|
+
for fname in args.filenames:
|
|
28
|
+
s = Structure.from_file(fname)
|
|
29
|
+
s.merge_sites(mode="delete")
|
|
30
|
+
|
|
31
|
+
my_id = gid.get_id(s)
|
|
32
|
+
|
|
33
|
+
table.append([my_id, fname])
|
|
34
|
+
|
|
35
|
+
t_headers = ["GraphIDGenerator", "Filename"]
|
|
36
|
+
|
|
37
|
+
print(tabulate(table, headers=t_headers))
|
|
38
|
+
|
|
39
|
+
now = datetime.now()
|
|
40
|
+
now_str = now.strftime("%Y%m%d-%H%M%S")
|
|
41
|
+
|
|
42
|
+
with open(f"graph_id_{now_str}.csv", "w") as f:
|
|
43
|
+
writer = csv.writer(f)
|
|
44
|
+
writer.writerows(table)
|
|
File without changes
|
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
from copy import deepcopy
|
|
2
|
+
from hashlib import blake2b
|
|
3
|
+
|
|
4
|
+
import networkx as nx
|
|
5
|
+
import numpy as np
|
|
6
|
+
from graph_id.analysis.local_env import DistanceClusteringNN
|
|
7
|
+
from pymatgen.analysis.local_env import MinimumDistanceNN
|
|
8
|
+
from graph_id.analysis.graphs import StructureGraph
|
|
9
|
+
from graph_id.core.graph_id import GraphIDGenerator
|
|
10
|
+
from pymatgen.core import Element
|
|
11
|
+
|
|
12
|
+
__version__ = "0.1.0"
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def blake(s):
|
|
16
|
+
return blake2b(s.encode()).hexdigest()
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class DistanceClusteringGraphID(GraphIDGenerator):
|
|
20
|
+
def __init__( # noqa: PLR0913
|
|
21
|
+
self,
|
|
22
|
+
nn=None,
|
|
23
|
+
wyckoff=False,
|
|
24
|
+
depth_factor=2,
|
|
25
|
+
additional_depth=1,
|
|
26
|
+
symmetry_tol=0.1,
|
|
27
|
+
topology_only=False,
|
|
28
|
+
loop=False,
|
|
29
|
+
rank_k=3,
|
|
30
|
+
cutoff=6.0,
|
|
31
|
+
digest_size=8,
|
|
32
|
+
) -> None:
|
|
33
|
+
super().__init__(
|
|
34
|
+
nn,
|
|
35
|
+
wyckoff,
|
|
36
|
+
depth_factor,
|
|
37
|
+
additional_depth,
|
|
38
|
+
symmetry_tol,
|
|
39
|
+
topology_only,
|
|
40
|
+
loop,
|
|
41
|
+
digest_size,
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
self.rank_k = rank_k
|
|
45
|
+
self.cutoff = cutoff
|
|
46
|
+
self.digest_size = digest_size
|
|
47
|
+
|
|
48
|
+
if nn is None:
|
|
49
|
+
self.nn = DistanceClusteringNN()
|
|
50
|
+
else:
|
|
51
|
+
self.nn = nn
|
|
52
|
+
|
|
53
|
+
def get_id(self, structure):
|
|
54
|
+
gid_list = []
|
|
55
|
+
_sg = StructureGraph.with_local_env_strategy(structure, MinimumDistanceNN())
|
|
56
|
+
for cluster_idx in range(self.rank_k):
|
|
57
|
+
long_str_list = []
|
|
58
|
+
# _sg = StructureGraph.with_local_env_strategy(structure, MinimumDistanceNN())
|
|
59
|
+
for idx in range(len(structure)):
|
|
60
|
+
copied_sg = deepcopy(_sg)
|
|
61
|
+
# まず原子idxが含まれる結合を削除する
|
|
62
|
+
for from_index, to_index, dct in _sg.graph.edges(keys=False, data=True):
|
|
63
|
+
if from_index == idx or to_index == idx:
|
|
64
|
+
copied_sg.break_edge(from_index, to_index, dct["to_jimage"], allow_reverse=True)
|
|
65
|
+
sg = self.prepare_structure_graph(structure, copied_sg, idx, cluster_idx)
|
|
66
|
+
n = len(sg.cc_cs)
|
|
67
|
+
array = np.empty(
|
|
68
|
+
[
|
|
69
|
+
n,
|
|
70
|
+
],
|
|
71
|
+
dtype=object,
|
|
72
|
+
)
|
|
73
|
+
for i, component in enumerate(sg.cc_cs):
|
|
74
|
+
array[i] = blake("-".join(sorted(component["cs_list"])))
|
|
75
|
+
# array[i] = blake2b("-".join(sorted(component["cs_list"])).encode("ascii"), digest_size=16).hexdigest()
|
|
76
|
+
long_str_tmp = ":".join(np.sort(array))
|
|
77
|
+
# long_str_tmp = blake2b(":".join(np.sort(array)).encode("ascii"), digest_size=16).hexdigest()
|
|
78
|
+
long_str_list.append(long_str_tmp)
|
|
79
|
+
long_str = ":".join(np.sort(long_str_list))
|
|
80
|
+
gid = blake2b(long_str.encode("ascii"), digest_size=self.digest_size).hexdigest()
|
|
81
|
+
gid_list.append(gid)
|
|
82
|
+
|
|
83
|
+
long_gid = "".join(gid_list)
|
|
84
|
+
# return self.elaborate_comp_dim(sg, blake2b(long_gid.encode("ascii"), digest_size=16).hexdigest())
|
|
85
|
+
return blake2b(long_gid.encode("ascii"), digest_size=self.digest_size).hexdigest()
|
|
86
|
+
|
|
87
|
+
def prepare_structure_graph(self, structure, _sg, n, rank_k):
|
|
88
|
+
|
|
89
|
+
sg = StructureGraph.with_indivisual_state_comp_strategy(
|
|
90
|
+
structure=structure,
|
|
91
|
+
strategy=self.nn,
|
|
92
|
+
_sg=_sg,
|
|
93
|
+
n=n,
|
|
94
|
+
rank_k=rank_k,
|
|
95
|
+
cutoff=self.cutoff,
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
use_previous_cs = False
|
|
99
|
+
|
|
100
|
+
compound = sg.structure
|
|
101
|
+
prev_num_uniq = len(compound.composition)
|
|
102
|
+
|
|
103
|
+
if self.topology_only:
|
|
104
|
+
for site_i in range(len(sg.structure)):
|
|
105
|
+
sg.structure.replace(site_i, Element("H"))
|
|
106
|
+
|
|
107
|
+
if self.wyckoff:
|
|
108
|
+
sg.set_wyckoffs(symmetry_tol=self.symmetry_tol)
|
|
109
|
+
prev_num_uniq = len(list(set(nx.get_node_attributes(sg.graph, "compositional_sequence").values())))
|
|
110
|
+
|
|
111
|
+
elif self.loop:
|
|
112
|
+
sg.set_loops_as_starting_labels(
|
|
113
|
+
depth_factor=self.depth_factor,
|
|
114
|
+
additional_depth=self.additional_depth,
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
else:
|
|
118
|
+
sg.set_elemental_labels()
|
|
119
|
+
|
|
120
|
+
while True:
|
|
121
|
+
sg.set_indivisual_compositional_sequence_node_attr(
|
|
122
|
+
n=n,
|
|
123
|
+
hash_cs=False,
|
|
124
|
+
wyckoff=self.wyckoff,
|
|
125
|
+
additional_depth=self.additional_depth,
|
|
126
|
+
depth_factor=self.depth_factor,
|
|
127
|
+
use_previous_cs=use_previous_cs or self.wyckoff,
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
num_unique_nodes = len(list(set(nx.get_node_attributes(sg.graph, "compositional_sequence").values())))
|
|
131
|
+
use_previous_cs = True
|
|
132
|
+
|
|
133
|
+
if prev_num_uniq == num_unique_nodes:
|
|
134
|
+
break
|
|
135
|
+
|
|
136
|
+
prev_num_uniq = num_unique_nodes
|
|
137
|
+
|
|
138
|
+
return sg
|
|
@@ -0,0 +1,209 @@
|
|
|
1
|
+
import multiprocessing as multi
|
|
2
|
+
from copy import deepcopy
|
|
3
|
+
from hashlib import blake2b
|
|
4
|
+
from multiprocessing import Pool
|
|
5
|
+
|
|
6
|
+
import networkx as nx
|
|
7
|
+
import numpy as np
|
|
8
|
+
from graph_id.analysis.graphs import StructureGraph
|
|
9
|
+
from pymatgen.analysis.dimensionality import get_dimensionality_larsen
|
|
10
|
+
from pymatgen.analysis.local_env import MinimumDistanceNN
|
|
11
|
+
from pymatgen.core import Element
|
|
12
|
+
from tqdm import tqdm
|
|
13
|
+
|
|
14
|
+
__version__ = "0.1.0"
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def blake(s):
|
|
18
|
+
return blake2b(s.encode()).hexdigest()
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class GraphIDGenerator:
|
|
22
|
+
def __init__(
|
|
23
|
+
self,
|
|
24
|
+
nn=None,
|
|
25
|
+
wyckoff=False,
|
|
26
|
+
depth_factor=2,
|
|
27
|
+
additional_depth=1,
|
|
28
|
+
symmetry_tol=0.1,
|
|
29
|
+
topology_only=False,
|
|
30
|
+
loop=False,
|
|
31
|
+
digest_size=8,
|
|
32
|
+
):
|
|
33
|
+
"""
|
|
34
|
+
comp_dim: include composition and dimensionality as the prefix
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
if wyckoff and loop:
|
|
38
|
+
raise ValueError("wyckoff and loop cannot be True at the same time")
|
|
39
|
+
|
|
40
|
+
if loop and topology_only:
|
|
41
|
+
raise ValueError("loop and topology_only cannot be True at the same time")
|
|
42
|
+
|
|
43
|
+
if nn is None:
|
|
44
|
+
self.nn = MinimumDistanceNN()
|
|
45
|
+
else:
|
|
46
|
+
self.nn = nn
|
|
47
|
+
|
|
48
|
+
self.wyckoff = wyckoff
|
|
49
|
+
self.additional_depth = additional_depth
|
|
50
|
+
self.depth_factor = depth_factor
|
|
51
|
+
self.symmetry_tol = symmetry_tol
|
|
52
|
+
self.topology_only = topology_only
|
|
53
|
+
self.loop = loop
|
|
54
|
+
self.digest_size = digest_size
|
|
55
|
+
|
|
56
|
+
# def get_graph_I#
|
|
57
|
+
|
|
58
|
+
def get_id(self, structure):
|
|
59
|
+
sg = self.prepare_structure_graph(structure)
|
|
60
|
+
n = len(sg.cc_cs)
|
|
61
|
+
array = np.empty(
|
|
62
|
+
[
|
|
63
|
+
n,
|
|
64
|
+
],
|
|
65
|
+
dtype=object,
|
|
66
|
+
)
|
|
67
|
+
for i, component in enumerate(sg.cc_cs):
|
|
68
|
+
array[i] = blake("-".join(sorted(component["cs_list"])))
|
|
69
|
+
long_str = ":".join(np.sort(array))
|
|
70
|
+
gid = blake2b(long_str.encode("ascii"), digest_size=self.digest_size).hexdigest()
|
|
71
|
+
|
|
72
|
+
gid = self.elaborate_comp_dim(sg, gid)
|
|
73
|
+
|
|
74
|
+
return gid
|
|
75
|
+
|
|
76
|
+
def elaborate_comp_dim(self, sg, gid):
|
|
77
|
+
dim = get_dimensionality_larsen(sg)
|
|
78
|
+
gid = f"{dim}D-{gid}"
|
|
79
|
+
|
|
80
|
+
if not self.topology_only:
|
|
81
|
+
gid = f"{sg.structure.composition.reduced_formula}-{gid}"
|
|
82
|
+
|
|
83
|
+
return gid
|
|
84
|
+
|
|
85
|
+
@property
|
|
86
|
+
def version(self):
|
|
87
|
+
return __version__
|
|
88
|
+
|
|
89
|
+
def get_id_catch_error(self, structure):
|
|
90
|
+
try:
|
|
91
|
+
return self.get_id(structure)
|
|
92
|
+
except Exception:
|
|
93
|
+
return ""
|
|
94
|
+
|
|
95
|
+
def get_many_ids(self, structures, parallel=False):
|
|
96
|
+
if parallel:
|
|
97
|
+
n_cores = multi.cpu_count()
|
|
98
|
+
# ctx = multi.get_context("spawn")
|
|
99
|
+
# p = ctx.Pool(n_cores)
|
|
100
|
+
p = Pool(n_cores)
|
|
101
|
+
imap = p.imap(self.get_id_catch_error, structures)
|
|
102
|
+
# ids = p.map(self.get_id, structures)
|
|
103
|
+
ids = list(tqdm(imap, total=len(structures)))
|
|
104
|
+
return ids
|
|
105
|
+
|
|
106
|
+
return [self.get_id(s) for s in structures]
|
|
107
|
+
|
|
108
|
+
def get_component_ids(self, structure):
|
|
109
|
+
sg = self.prepare_structure_graph(structure)
|
|
110
|
+
cc_gid = np.empty(
|
|
111
|
+
[
|
|
112
|
+
len(sg.cc_cs),
|
|
113
|
+
],
|
|
114
|
+
dtype=object,
|
|
115
|
+
)
|
|
116
|
+
for i, component in enumerate(sg.cc_cs):
|
|
117
|
+
each_long_str = blake("-".join(sorted(component["cs_list"])))
|
|
118
|
+
gid = blake2b(each_long_str.encode("ascii"), digest_size=16).hexdigest()
|
|
119
|
+
# cc_gid[] = gid
|
|
120
|
+
cc_gid[i] = {"site_i": component["site_i"], "graph_id": gid}
|
|
121
|
+
|
|
122
|
+
return cc_gid
|
|
123
|
+
|
|
124
|
+
def are_same(self, structure1, structure2):
|
|
125
|
+
return self.get_id(structure1) == self.get_id(structure2)
|
|
126
|
+
|
|
127
|
+
def expand_for_low_dimensionality(self, sg):
|
|
128
|
+
dimensionality = get_dimensionality_larsen(sg)
|
|
129
|
+
|
|
130
|
+
if dimensionality < 3:
|
|
131
|
+
if len(list(nx.weakly_connected_components(sg.graph))) == 1:
|
|
132
|
+
supercell = sg.structure.copy()
|
|
133
|
+
supercell.make_supercell([[2, 2, 2]])
|
|
134
|
+
sg = StructureGraph.with_local_env_strategy(supercell, self.nn)
|
|
135
|
+
|
|
136
|
+
return sg
|
|
137
|
+
|
|
138
|
+
def expand_for_multi_bonds(self, sg):
|
|
139
|
+
_sg = deepcopy(sg)
|
|
140
|
+
factor = 2
|
|
141
|
+
|
|
142
|
+
while self.has_multi_bonds(_sg):
|
|
143
|
+
_strc = sg.structure.copy()
|
|
144
|
+
_strc.make_supercell([factor, factor, factor])
|
|
145
|
+
|
|
146
|
+
_sg = StructureGraph.with_local_env_strategy(_strc, self.nn)
|
|
147
|
+
factor += 1
|
|
148
|
+
# sg.expand
|
|
149
|
+
# for site_i in range(len(sg.structure)):
|
|
150
|
+
# sites = sg.get_connected_sites_light(site_i)
|
|
151
|
+
# for site in sites:
|
|
152
|
+
# print(site.index)
|
|
153
|
+
# # print(sites )
|
|
154
|
+
|
|
155
|
+
return _sg
|
|
156
|
+
|
|
157
|
+
def has_multi_bonds(self, sg):
|
|
158
|
+
# g = sg.graph.to_undirected()
|
|
159
|
+
for edge in sg.graph.edges:
|
|
160
|
+
if edge[2] != 0:
|
|
161
|
+
return True
|
|
162
|
+
# print(edge)
|
|
163
|
+
|
|
164
|
+
return False
|
|
165
|
+
|
|
166
|
+
def prepare_structure_graph(self, structure):
|
|
167
|
+
sg = StructureGraph.with_local_env_strategy(structure, self.nn)
|
|
168
|
+
use_previous_cs = False
|
|
169
|
+
|
|
170
|
+
compound = sg.structure
|
|
171
|
+
prev_num_uniq = len(compound.composition)
|
|
172
|
+
|
|
173
|
+
if self.topology_only:
|
|
174
|
+
for site_i in range(len(sg.structure)):
|
|
175
|
+
sg.structure.replace(site_i, Element("H"))
|
|
176
|
+
|
|
177
|
+
if self.wyckoff:
|
|
178
|
+
sg.set_wyckoffs(symmetry_tol=self.symmetry_tol)
|
|
179
|
+
|
|
180
|
+
# TODO: remove nx
|
|
181
|
+
prev_num_uniq = len(list(set(nx.get_node_attributes(sg.graph, "compositional_sequence").values())))
|
|
182
|
+
|
|
183
|
+
elif self.loop:
|
|
184
|
+
sg.set_loops(
|
|
185
|
+
depth_factor=self.depth_factor,
|
|
186
|
+
additional_depth=self.additional_depth,
|
|
187
|
+
)
|
|
188
|
+
|
|
189
|
+
else:
|
|
190
|
+
sg.set_elemental_labels()
|
|
191
|
+
|
|
192
|
+
while True:
|
|
193
|
+
sg.set_compositional_sequence_node_attr(
|
|
194
|
+
hash_cs=True,
|
|
195
|
+
wyckoff=self.wyckoff,
|
|
196
|
+
additional_depth=self.additional_depth,
|
|
197
|
+
depth_factor=self.depth_factor,
|
|
198
|
+
use_previous_cs=use_previous_cs or self.wyckoff,
|
|
199
|
+
)
|
|
200
|
+
|
|
201
|
+
num_unique_nodes = len(list(set(nx.get_node_attributes(sg.graph, "compositional_sequence").values())))
|
|
202
|
+
use_previous_cs = True
|
|
203
|
+
|
|
204
|
+
if prev_num_uniq == num_unique_nodes:
|
|
205
|
+
break
|
|
206
|
+
|
|
207
|
+
prev_num_uniq = num_unique_nodes
|
|
208
|
+
|
|
209
|
+
return sg
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
[tool.poetry]
|
|
2
|
+
name = "graph-id-core"
|
|
3
|
+
version = "0.1.0"
|
|
4
|
+
description = ""
|
|
5
|
+
authors = ["Koki Muraoka <muraok_k@chemsys.t.u-tokyo.ac.jp>"]
|
|
6
|
+
readme = "README.md"
|
|
7
|
+
packages = [{include = "graph_id"}]
|
|
8
|
+
|
|
9
|
+
[tool.poetry.build]
|
|
10
|
+
script = "build.py"
|
|
11
|
+
generate-setup-file = true
|
|
12
|
+
|
|
13
|
+
[tool.poetry.dependencies]
|
|
14
|
+
pybind11 = "2.11.1"
|
|
15
|
+
python = ">=3.10,<3.14"
|
|
16
|
+
pymatgen = "^2025.4.20"
|
|
17
|
+
scikit-learn = ">=0.24.1"
|
|
18
|
+
|
|
19
|
+
[tool.poetry.group.dev.dependencies]
|
|
20
|
+
pytest = "^7.4.0"
|
|
21
|
+
pytest-cov = "^4.1.0"
|
|
22
|
+
|
|
23
|
+
[tool.pytest.ini_options]
|
|
24
|
+
testpaths = ["tests", "graph_id"]
|
|
25
|
+
norecursedirs = ["library", "build", "dist", ".git", ".venv", "__pycache__"]
|
|
26
|
+
|
|
27
|
+
[build-system]
|
|
28
|
+
requires = ["poetry-core>=1.0.0", "pybind11~=2.11.1", "setuptools"]
|
|
29
|
+
build-backend = "poetry.core.masonry.api"
|
|
30
|
+
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
from setuptools import setup
|
|
3
|
+
|
|
4
|
+
packages = \
|
|
5
|
+
['graph_id', 'graph_id.analysis', 'graph_id.commands', 'graph_id.core']
|
|
6
|
+
|
|
7
|
+
package_data = \
|
|
8
|
+
{'': ['*']}
|
|
9
|
+
|
|
10
|
+
install_requires = \
|
|
11
|
+
['pybind11==2.11.1', 'pymatgen>=2025.4.20,<2026.0.0', 'scikit-learn>=0.24.1']
|
|
12
|
+
|
|
13
|
+
setup_kwargs = {
|
|
14
|
+
'name': 'graph-id-core',
|
|
15
|
+
'version': '0.1.0',
|
|
16
|
+
'description': '',
|
|
17
|
+
'long_description': '\n\n# Graph ID\n\n## Installation \n### pypi\n```\npip install graph-id-core\n```\n\n### GitHub\n```\ngit clone https://github.com/kmu/graph-id-core.git\ngit submodule init\ngit submodule update\npip install -e .\n```\n',
|
|
18
|
+
'author': 'Koki Muraoka',
|
|
19
|
+
'author_email': 'muraok_k@chemsys.t.u-tokyo.ac.jp',
|
|
20
|
+
'maintainer': 'None',
|
|
21
|
+
'maintainer_email': 'None',
|
|
22
|
+
'url': 'None',
|
|
23
|
+
'packages': packages,
|
|
24
|
+
'package_data': package_data,
|
|
25
|
+
'install_requires': install_requires,
|
|
26
|
+
'python_requires': '>=3.10,<3.14',
|
|
27
|
+
}
|
|
28
|
+
from build import *
|
|
29
|
+
build(setup_kwargs)
|
|
30
|
+
|
|
31
|
+
setup(**setup_kwargs)
|