graph-id-core 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,35 @@
1
+ Metadata-Version: 2.3
2
+ Name: graph-id-core
3
+ Version: 0.1.0
4
+ Summary:
5
+ Author: Koki Muraoka
6
+ Author-email: muraok_k@chemsys.t.u-tokyo.ac.jp
7
+ Requires-Python: >=3.10,<3.14
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: Programming Language :: Python :: 3.10
10
+ Classifier: Programming Language :: Python :: 3.11
11
+ Classifier: Programming Language :: Python :: 3.12
12
+ Classifier: Programming Language :: Python :: 3.13
13
+ Requires-Dist: pybind11 (==2.11.1)
14
+ Requires-Dist: pymatgen (>=2025.4.20,<2026.0.0)
15
+ Requires-Dist: scikit-learn (>=0.24.1)
16
+ Description-Content-Type: text/markdown
17
+
18
+
19
+
20
+ # Graph ID
21
+
22
+ ## Installation
23
+ ### pypi
24
+ ```
25
+ pip install graph-id-core
26
+ ```
27
+
28
+ ### GitHub
29
+ ```
30
+ git clone https://github.com/kmu/graph-id-core.git
31
+ git submodule init
32
+ git submodule update
33
+ pip install -e .
34
+ ```
35
+
@@ -0,0 +1,17 @@
1
+
2
+
3
+ # Graph ID
4
+
5
+ ## Installation
6
+ ### pypi
7
+ ```
8
+ pip install graph-id-core
9
+ ```
10
+
11
+ ### GitHub
12
+ ```
13
+ git clone https://github.com/kmu/graph-id-core.git
14
+ git submodule init
15
+ git submodule update
16
+ pip install -e .
17
+ ```
@@ -0,0 +1,108 @@
1
+ import os
2
+ import re
3
+ import subprocess
4
+ import sys
5
+ from pathlib import Path
6
+
7
+ from setuptools import Extension
8
+ from setuptools.command.build_ext import build_ext
9
+
10
+ # https://github.com/pybind/cmake_example/blob/master/setup.py
11
+
12
+ # Convert distutils Windows platform specifiers to CMake -A arguments
13
+ PLAT_TO_CMAKE = {
14
+ "win32": "Win32",
15
+ "win-amd64": "x64",
16
+ "win-arm32": "ARM",
17
+ "win-arm64": "ARM64",
18
+ }
19
+
20
+
21
+ # A CMakeExtension needs a sourcedir instead of a file list.
22
+ # The name must be the _single_ output extension from the CMake build.
23
+ # If you need multiple extensions, see scikit-build.
24
+ class CMakeExtension(Extension):
25
+ def __init__(self, name: str, sourcedir: str = "") -> None:
26
+ super().__init__(name, sources=[])
27
+ self.sourcedir = os.fspath(Path(sourcedir).resolve())
28
+
29
+
30
+ class CMakeBuild(build_ext):
31
+ def build_extension(self, ext: CMakeExtension) -> None:
32
+ # Must be in this form due to bug in .resolve() only fixed in Python 3.10+
33
+ ext_fullpath = Path.cwd() / self.get_ext_fullpath(ext.name)
34
+ extdir = ext_fullpath.parent.resolve()
35
+
36
+ # Using this requires trailing slash for auto-detection & inclusion of
37
+ # auxiliary "native" libs
38
+
39
+ debug = int(os.environ.get("DEBUG", 0)) if self.debug is None else self.debug
40
+ cfg = "Debug" if debug else "Release"
41
+
42
+ # Set Python_EXECUTABLE instead if you use PYBIND11_FINDPYTHON
43
+ # EXAMPLE_VERSION_INFO shows you how to pass a value into the C++ code
44
+ # from Python.
45
+ cmake_args = [
46
+ f"-DCMAKE_LIBRARY_OUTPUT_DIRECTORY={extdir}{os.sep}",
47
+ f"-DCMAKE_LIBRARY_OUTPUT_DIRECTORY_RELEASE={extdir}{os.sep}",
48
+ f"-DPYTHON_EXECUTABLE={sys.executable}",
49
+ f"-DCMAKE_BUILD_TYPE={cfg}", # not used on MSVC, but no harm
50
+ ]
51
+ build_args = []
52
+ # Adding CMake arguments set as environment variable
53
+ # (needed e.g. to build for ARM OSx on conda-forge)
54
+ if "CMAKE_ARGS" in os.environ:
55
+ cmake_args += [item for item in os.environ["CMAKE_ARGS"].split(" ") if item]
56
+
57
+ # Pass in the version to C++.
58
+ cmake_args += [f"-DVERSION_INFO={self.distribution.get_version()}"]
59
+
60
+ if sys.platform.startswith("win32"):
61
+ build_args += ["--config", cfg]
62
+ if sys.platform.startswith("darwin"):
63
+ # Cross-compile support for macOS - respect ARCHFLAGS if set
64
+ archs = re.findall(r"-arch (\S+)", os.environ.get("ARCHFLAGS", ""))
65
+ if archs:
66
+ cmake_args += ["-DCMAKE_OSX_ARCHITECTURES={}".format(";".join(archs))]
67
+
68
+ # Set CMAKE_BUILD_PARALLEL_LEVEL to control the parallel build level
69
+ # across all generators.
70
+ if "CMAKE_BUILD_PARALLEL_LEVEL" not in os.environ:
71
+ # self.parallel is a Python 3 only way to set parallel jobs by hand
72
+ # using -j in the build_ext call, not supported by pip or PyPA-build.
73
+ if hasattr(self, "parallel") and self.parallel:
74
+ # CMake 3.12+ only.
75
+ build_args += [f"-j{self.parallel}"]
76
+
77
+ build_temp = Path(self.build_temp) / ext.name
78
+ if not build_temp.exists():
79
+ build_temp.mkdir(parents=True)
80
+ env = {**os.environ}
81
+ if "PYTHONPATH" in env:
82
+ # Google Colab など CMake が pip でインストールされている環境で、import cmake できなくなり build が失敗する
83
+ del env["PYTHONPATH"]
84
+ subprocess.run(
85
+ ["cmake", ext.sourcedir, *cmake_args],
86
+ cwd=build_temp,
87
+ check=True,
88
+ env=env,
89
+ )
90
+ subprocess.run(
91
+ ["cmake", "--build", ".", *build_args],
92
+ cwd=build_temp,
93
+ check=True,
94
+ env=env,
95
+ )
96
+
97
+
98
+ def build(setup_kwargs):
99
+ ext_modules = [
100
+ CMakeExtension("graph_id_cpp"),
101
+ ]
102
+ setup_kwargs.update(
103
+ {
104
+ "ext_modules": ext_modules,
105
+ "cmdclass": {"build_ext": CMakeBuild},
106
+ "zip_safe": False,
107
+ }
108
+ )
@@ -0,0 +1 @@
1
+ from graph_id.core.graph_id import GraphIDGenerator # noqa
File without changes
@@ -0,0 +1,75 @@
1
+ from collections import Counter
2
+ from hashlib import blake2b
3
+ from typing import List
4
+
5
+ from pymatgen.core.structure import Neighbor
6
+ from pymatgen.util.string import formula_double_format
7
+
8
+
9
+ def blake(s):
10
+ return blake2b(s.encode()).hexdigest()
11
+
12
+
13
+ class CompositionalSequence:
14
+ def __init__(self, focused_site_i, starting_labels, hash_cs=False, use_previous_cs=False):
15
+ self.hash_cs = hash_cs
16
+ if hash_cs:
17
+ self.cs_for_hashing = ""
18
+ else:
19
+ self.compositional_seq = []
20
+
21
+ self.focused_site_i = focused_site_i
22
+ self.new_sites = [(focused_site_i, (0, 0, 0))]
23
+
24
+ self.seen_sites = set(self.new_sites)
25
+ self.use_previous_cs = use_previous_cs
26
+ self.labels = starting_labels
27
+ self.composition_counter: Counter = Counter()
28
+ self.first_element = starting_labels[focused_site_i]
29
+
30
+ def __str__(self):
31
+ if self.hash_cs:
32
+ return f"{self.first_element}-{self.cs_for_hashing}" # type: ignore
33
+
34
+ else:
35
+ return f"{self.first_element}-{'-'.join(self.compositional_seq)}" # type: ignore
36
+
37
+ def get_current_starting_sites(self):
38
+ new_sites = self.new_sites
39
+ self.new_sites = []
40
+ return [*new_sites]
41
+
42
+ def count_composition_for_neighbors(
43
+ self,
44
+ nsites: List[Neighbor],
45
+ # graph: nx.Graph,
46
+ # labels: List[str],
47
+ ) -> None:
48
+
49
+ for neighbor in nsites:
50
+ neighbor_info = (neighbor.index, neighbor.jimage)
51
+
52
+ if neighbor_info not in self.seen_sites:
53
+
54
+ self.seen_sites.add(neighbor_info)
55
+
56
+ self.new_sites.append(neighbor_info)
57
+
58
+ if self.use_previous_cs:
59
+ cs = self.labels[neighbor.index]
60
+ self.composition_counter[cs] += 1
61
+ else:
62
+ self.composition_counter[self.labels[neighbor.index]] += 1
63
+
64
+ def finalize_this_depth(self):
65
+ formula = self.get_sorted_composition_list_from(self.composition_counter)
66
+
67
+ if self.hash_cs:
68
+ self.cs_for_hashing = blake(f"{self.cs_for_hashing}-{''.join(formula)}")
69
+ else:
70
+ self.compositional_seq.append("".join(formula))
71
+
72
+ def get_sorted_composition_list_from(self, composition_counter: Counter) -> List[str]:
73
+ sorted_symbols = sorted(composition_counter.keys())
74
+ formula = [s + str(formula_double_format(composition_counter[s], False)) for s in sorted_symbols]
75
+ return formula
@@ -0,0 +1,404 @@
1
+ import functools
2
+
3
+ import networkx as nx
4
+ import numpy as np
5
+ from graph_id.analysis.compositional_sequence import CompositionalSequence
6
+ from networkx.algorithms.distance_measures import diameter
7
+ from pymatgen.analysis.graphs import StructureGraph as PmgStructureGraph
8
+ from pymatgen.core import Element
9
+ from pymatgen.symmetry.analyzer import SpacegroupAnalyzer
10
+ from itertools import combinations
11
+ from hashlib import blake2b
12
+
13
+
14
+ def standardize_loop(lst):
15
+ lst2 = list(reversed(lst))
16
+ starting_point = lst2.pop(-1)
17
+ lst2.insert(0, starting_point)
18
+
19
+ return sorted([lst, lst2], key=lambda x: "".join(x))[-1]
20
+
21
+
22
+ class SiteOnlySpeciesString:
23
+ def __init__(self, species_string):
24
+ self.species_string = species_string
25
+
26
+
27
+ class ConnectedSiteLight:
28
+ def __init__(
29
+ self,
30
+ site,
31
+ jimage,
32
+ index,
33
+ weight,
34
+ dist,
35
+ ):
36
+ self.site = SiteOnlySpeciesString(site.species_string)
37
+ self.jimage = jimage
38
+ self.index = index
39
+ self.weight = weight
40
+ self.dist = dist
41
+
42
+
43
+ class StructureGraph(PmgStructureGraph): # type: ignore
44
+ @staticmethod
45
+ def from_pymatgen_structure_graph(sg: PmgStructureGraph):
46
+ graph_data = sg.as_dict()["graphs"]
47
+
48
+ return StructureGraph(sg.structure, graph_data)
49
+
50
+ # Copied from original pymatgen with modifications
51
+ @staticmethod
52
+ def with_local_env_strategy(structure, strategy, weights=False):
53
+ """
54
+ Constructor for StructureGraph, using a strategy
55
+ from :Class: `pymatgen.analysis.local_env`.
56
+
57
+ :param structure: Structure object
58
+ :param strategy: an instance of a
59
+ :Class: `pymatgen.analysis.local_env.NearNeighbors` object
60
+ :param weights: if True, use weights from local_env class
61
+ (consult relevant class for their meaning)
62
+ :return:
63
+ """
64
+
65
+ if not strategy.structures_allowed:
66
+ raise ValueError(
67
+ "Chosen strategy is not designed for use with structures! " "Please choose another strategy."
68
+ )
69
+
70
+ sg = StructureGraph.from_empty_graph(structure, name="bonds")
71
+
72
+ for n, neighbors in enumerate(strategy.get_all_nn_info(structure)):
73
+ for neighbor in neighbors:
74
+ # local_env will always try to add two edges
75
+ # for any one bond, one from site u to site v
76
+ # and another form site v to site u: this is
77
+ # harmless, so warn_duplicates=False
78
+ sg.add_edge(
79
+ from_index=n,
80
+ from_jimage=(0, 0, 0),
81
+ to_index=neighbor["site_index"],
82
+ to_jimage=neighbor["image"],
83
+ weight=neighbor["weight"] if weights else None,
84
+ warn_duplicates=False,
85
+ )
86
+
87
+ return sg
88
+
89
+ @staticmethod
90
+ def with_indivisual_state_comp_strategy(structure, strategy, _sg, n, weights=False, rank_k=1, cutoff=6.0):
91
+ """
92
+ Constructor for StructureGraph, using a StateCompNN strategy
93
+ from :Class: `chemsys.pymatgen.analysis.local_env`.
94
+ :param structure: Structure object
95
+ :param strategy: an instance of StateCompNN
96
+ :param n: (int) an index of focused site
97
+ :param weights: if True, use weights from local_env class
98
+ (consult relevant class for their meaning)
99
+ :rank_k: (int) cluster_idx
100
+ :cutoff: (float)
101
+ :return:
102
+ """
103
+
104
+ if not strategy.structures_allowed:
105
+ raise ValueError( # noqa: TRY003
106
+ "Chosen strategy is not designed for use with structures!", # noqa: EM101
107
+ )
108
+
109
+ nn_info = strategy.get_nn_info(structure, n, rank_k, cutoff)
110
+
111
+ for neighbor in nn_info:
112
+ # local_env will always try to add two edges
113
+ # for any one bond, one from site u to site v
114
+ # and another form site v to site u: this is
115
+ # harmless, so warn_duplicates=False
116
+ _sg.add_edge(
117
+ from_index=n,
118
+ from_jimage=(0, 0, 0),
119
+ to_index=neighbor["site_index"],
120
+ to_jimage=neighbor["image"],
121
+ weight=neighbor["weight"] if weights else None,
122
+ warn_duplicates=False,
123
+ edge_properties=neighbor["edge_properties"],
124
+ )
125
+
126
+ return _sg
127
+
128
+ def set_elemental_labels(self):
129
+ self.starting_labels = [site.species_string for site in self.structure]
130
+
131
+ def get_connected_sites_light(self, n, jimage=(0, 0, 0)):
132
+ """
133
+ A light version of get_connected_sites.
134
+ periodic_site -> SiteOnlySpeciesString
135
+ """
136
+
137
+ connected_sites = set()
138
+ connected_site_images = set()
139
+
140
+ out_edges = [(u, v, d, "out") for u, v, d in self.graph.out_edges(n, data=True)]
141
+ in_edges = [(u, v, d, "in") for u, v, d in self.graph.in_edges(n, data=True)]
142
+
143
+ for u, v, d, dir in out_edges + in_edges:
144
+
145
+ to_jimage = d["to_jimage"]
146
+
147
+ if dir == "in":
148
+ u, v = v, u
149
+ to_jimage = np.multiply(-1, to_jimage)
150
+
151
+ to_jimage = tuple(map(int, np.add(to_jimage, jimage)))
152
+
153
+ if (v, to_jimage) not in connected_site_images:
154
+ connected_site = ConnectedSiteLight(
155
+ site=self.structure[v], jimage=to_jimage, index=v, weight=None, dist=None
156
+ )
157
+
158
+ connected_sites.add(connected_site)
159
+ connected_site_images.add((v, to_jimage))
160
+
161
+ _connected_sites = list(connected_sites)
162
+
163
+ return _connected_sites
164
+
165
+ def set_wyckoffs(self, symmetry_tol: float = 0.1) -> None:
166
+ siteless_strc = self.structure.copy()
167
+
168
+ for site_i in range(len(self.structure)):
169
+ siteless_strc.replace(site_i, Element("H"))
170
+
171
+ sga = SpacegroupAnalyzer(siteless_strc)
172
+ sym_dataset = sga.get_symmetry_dataset()
173
+
174
+ if sym_dataset is None:
175
+ self.set_elemental_labels()
176
+ return None
177
+
178
+ wyckoffs = sym_dataset["wyckoffs"]
179
+ number = sym_dataset["number"]
180
+
181
+ attribute_values = {}
182
+
183
+ self.starting_labels = []
184
+ for site_i, w in enumerate(wyckoffs):
185
+ attribute_values[site_i] = f"{self.structure[site_i].species_string}_{w}_{number}"
186
+ self.starting_labels.append(f"{self.structure[site_i].species_string}_{w}_{number}")
187
+
188
+ def set_compositional_sequence_node_attr(
189
+ self,
190
+ hash_cs: bool = False,
191
+ wyckoff: bool = False,
192
+ additional_depth: int = 0,
193
+ depth_factor: int = 2,
194
+ use_previous_cs: bool = False,
195
+ ) -> None:
196
+
197
+ node_attributes = {}
198
+ self.cc_cs = []
199
+ get_connected_sites_light = functools.lru_cache(maxsize=None)(self.get_connected_sites_light)
200
+
201
+ ug = self.graph.to_undirected()
202
+
203
+ for cc in nx.connected_components(ug):
204
+ cs_list = []
205
+
206
+ d = diameter(ug.subgraph(cc))
207
+
208
+ for focused_site_i in cc:
209
+
210
+ depth = depth_factor * d + additional_depth
211
+
212
+ cs = CompositionalSequence(
213
+ focused_site_i=focused_site_i,
214
+ starting_labels=self.starting_labels,
215
+ hash_cs=hash_cs,
216
+ use_previous_cs=use_previous_cs or wyckoff,
217
+ )
218
+
219
+ for _ in range(depth):
220
+ for c_site in cs.get_current_starting_sites():
221
+ nsites = get_connected_sites_light(c_site[0], c_site[1])
222
+ cs.count_composition_for_neighbors(nsites)
223
+
224
+ cs.finalize_this_depth()
225
+
226
+ this_cs = str(cs)
227
+
228
+ node_attributes[focused_site_i] = self.starting_labels[focused_site_i] + "_" + this_cs
229
+ cs_list.append(this_cs)
230
+
231
+ self.cc_cs.append({"site_i": cc, "cs_list": cs_list})
232
+
233
+ nx.set_node_attributes(self.graph, values=node_attributes, name="compositional_sequence")
234
+
235
+ def get_loops(self, depth: int, index: int, shortest: bool = True): # noqa: C901
236
+ """
237
+ 各原子を起点としてループを計算し、そのインデックス情報を返す。
238
+
239
+ Parameters:
240
+ indices: ループの起点としたいインデックス
241
+ depth: ループの最大の大きさ
242
+
243
+ Returns:
244
+ [[(index, image), ...], ...]
245
+ """
246
+
247
+ get_connected_sites = functools.lru_cache(maxsize=None)(self.get_connected_sites)
248
+
249
+ def find_all_rings(index, ring_list):
250
+ neighbors = get_connected_sites(index, (0, 0, 0))
251
+ for n0, n1 in combinations(neighbors, 2):
252
+ found = False
253
+ for ring in ring_list:
254
+ term0 = ring[1]
255
+ term1 = ring[-2]
256
+
257
+ if all(
258
+ (
259
+ n0.index == term0[0],
260
+ n0.jimage == term0[1],
261
+ n1.index == term1[0],
262
+ n1.jimage == term1[1],
263
+ ),
264
+ ):
265
+ found = True
266
+ break
267
+
268
+ if all(
269
+ (
270
+ n1.index == term0[0],
271
+ n1.jimage == term0[1],
272
+ n0.index == term1[0],
273
+ n0.jimage == term1[1],
274
+ ),
275
+ ):
276
+ found = True
277
+ break
278
+
279
+ if found is False:
280
+ return False
281
+
282
+ return True
283
+
284
+ def get_further_lines_from_lines(lines):
285
+ new_lines = []
286
+ for line in lines:
287
+ ind, image = line[-1]
288
+ neighbors = get_connected_sites(ind, image)
289
+
290
+ for n in neighbors:
291
+ new_line = [*line, (n.index, n.jimage)]
292
+
293
+ # 戻らない場合のみ。
294
+ if len(new_line[:-1]) == len(set(new_line[:-1])):
295
+ new_lines.append(new_line)
296
+
297
+ return new_lines
298
+
299
+ lines = []
300
+ lines.append([(index, (0, 0, 0))])
301
+
302
+ ring_list = []
303
+
304
+ for depth_i in range(depth):
305
+ next_lines = []
306
+ lines = get_further_lines_from_lines(lines)
307
+
308
+ for line in lines:
309
+ # 前と後ろが同じ
310
+ if line[0] == line[-1]:
311
+ if depth_i > 1 and list(reversed(line)) not in ring_list:
312
+ ring_list.append(line)
313
+ else:
314
+ next_lines.append(line)
315
+
316
+ lines = next_lines
317
+
318
+ # ここで理論上の値に達したら探索を打ち切る
319
+ if shortest and find_all_rings(index, ring_list):
320
+ return ring_list
321
+
322
+ return list(ring_list)
323
+
324
+ def set_loops(self, depth_factor: int, additional_depth: int) -> None:
325
+ self.starting_labels = []
326
+
327
+ undirected_graph = self.graph.to_undirected()
328
+
329
+ max_diameter = 0
330
+ for cc in nx.connected_components(undirected_graph):
331
+ d = diameter(undirected_graph.subgraph(cc))
332
+ if d > max_diameter:
333
+ max_diameter = d
334
+
335
+ depth = max_diameter * depth_factor + additional_depth
336
+
337
+ for site_i in range(len(self.graph.nodes)):
338
+ all_loops = self.get_loops(depth=depth, index=site_i)
339
+ all_loop_strings = []
340
+ # print(all_loops)
341
+ for loop in all_loops:
342
+ loop_elements = []
343
+ for site_i_jimage in loop:
344
+ loop_species_string = self.structure[site_i_jimage[0]].species_string
345
+ # print(loop_species_string)
346
+ loop_elements.append(loop_species_string)
347
+
348
+ loop_elements = standardize_loop(loop_elements)
349
+
350
+ seed_str = "-".join(loop_elements)
351
+ hashed_loop = blake2b(seed_str.encode(), digest_size=8).hexdigest()
352
+
353
+ all_loop_strings.append(hashed_loop)
354
+
355
+ seed_str_all_loops = ":".join(sorted(all_loop_strings))
356
+ hashed_all_loops = blake2b(seed_str_all_loops.encode(), digest_size=8).hexdigest()
357
+
358
+ self.starting_labels.append(hashed_all_loops)
359
+
360
+ def set_indivisual_compositional_sequence_node_attr(
361
+ self,
362
+ n: int,
363
+ hash_cs: bool = False,
364
+ wyckoff: bool = False,
365
+ additional_depth: int = 0,
366
+ depth_factor: int = 2,
367
+ use_previous_cs: bool = False,
368
+ ) -> None:
369
+ node_attributes = {}
370
+ self.cc_cs = []
371
+ get_connected_sites_light = functools.lru_cache(maxsize=None)(self.get_connected_sites_light)
372
+
373
+ ug = self.graph.to_undirected()
374
+
375
+ for cc in nx.connected_components(ug):
376
+ cs_list = []
377
+
378
+ d = diameter(ug.subgraph(cc))
379
+
380
+ if n in cc:
381
+ depth = depth_factor * d + additional_depth
382
+
383
+ cs = CompositionalSequence(
384
+ focused_site_i=n,
385
+ starting_labels=self.starting_labels,
386
+ hash_cs=hash_cs,
387
+ use_previous_cs=use_previous_cs or wyckoff,
388
+ )
389
+
390
+ for _this_depth in range(depth):
391
+ for c_site in cs.get_current_starting_sites():
392
+ nsites = get_connected_sites_light(c_site[0], c_site[1])
393
+ cs.count_composition_for_neighbors(nsites)
394
+
395
+ cs.finalize_this_depth()
396
+
397
+ this_cs = str(cs)
398
+
399
+ node_attributes[n] = self.starting_labels[n] + "_" + this_cs
400
+ cs_list.append(this_cs)
401
+
402
+ self.cc_cs.append({"site_i": cc, "cs_list": cs_list})
403
+
404
+ nx.set_node_attributes(self.graph, values=node_attributes, name="compositional_sequence")
@@ -0,0 +1,122 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Any
4
+
5
+ import numpy as np
6
+ from pymatgen.analysis.local_env import NearNeighbors
7
+ from pymatgen.core import IStructure, Structure
8
+ from sklearn.cluster import DBSCAN
9
+
10
+ def _get_original_site(structure, site):
11
+ """Private convenience method for get_nn_info,
12
+ gives original site index from ProvidedPeriodicSite.
13
+ """
14
+
15
+ if isinstance(structure, IStructure | Structure):
16
+ site_fcoords = site.frac_coords
17
+ strc_fcoords = structure.frac_coords
18
+ tol = 1e-8 # threshold in Site.is_periodic_image
19
+ # sort to reduce the iteration
20
+ nearest_i = np.argsort(-(np.abs(strc_fcoords - site_fcoords) < tol).sum(axis=1))
21
+
22
+ for i in nearest_i:
23
+ if site.is_periodic_image(structure[i]):
24
+ return i
25
+ else:
26
+ for i, s in enumerate(structure):
27
+ if site == s:
28
+ return i
29
+ raise Exception("Site not found!") # noqa: TRY002, TRY003, EM101
30
+
31
+
32
+ class DistanceClusteringNN(NearNeighbors):
33
+ # 結合長のクラスタリングによって原子に近いクラスター順に番号を振る
34
+ # その番号と元素記号を使ってGraph IDを計算する
35
+ def __init__(self) -> None:
36
+ """ """
37
+
38
+ @property
39
+ def structures_allowed(self) -> bool:
40
+ """
41
+ Boolean property: can this NearNeighbors class be used with Structure
42
+ objects?
43
+ """
44
+ return True
45
+
46
+ def get_nn_info(self, structure: Structure, n: int, rank_k: int, cutoff: float = 6.0) -> list[dict[str, Any]]:
47
+ """
48
+ Args:
49
+ structure (Structure): input structure.
50
+ n (int): index of site for which to determine near
51
+ neighbors.
52
+ cutoff (float): distance cutoff parameter.
53
+ Returns:
54
+ siw (list[dict]): dicts with (Site, array, float) each one of which represents a
55
+ neighbor site, its image location, and its weight.
56
+ """
57
+
58
+ site = structure[n]
59
+ cutoff_cluster_list = self.get_cutoff_cluster(structure, n, cutoff)
60
+ if len(cutoff_cluster_list) <= rank_k:
61
+ return []
62
+
63
+ neighs_dists = structure.get_neighbors(site, cutoff_cluster_list[rank_k])
64
+ max_weight = round(cutoff_cluster_list[rank_k], 3)
65
+ # is_periodic = isinstance(structure, Structure | IStructure) # Python 3.10 以降でのみサポート
66
+ is_periodic = isinstance(structure, Structure) or isinstance(structure, IStructure)
67
+ siw = []
68
+
69
+ for nn in neighs_dists:
70
+ weight = round(nn.nn_distance, 3)
71
+ if (rank_k > 0 and weight <= max_weight and weight > round(cutoff_cluster_list[rank_k - 1], 3)) or (
72
+ rank_k == 0 and weight <= max_weight
73
+ ):
74
+ siw.append(
75
+ {
76
+ "site": nn,
77
+ "image": self._get_image(structure, nn) if is_periodic else None,
78
+ "weight": weight,
79
+ "site_index": self._get_original_site(structure, nn),
80
+ "edge_properties": {"cluster_idx": rank_k + 1},
81
+ },
82
+ )
83
+
84
+ return siw
85
+
86
+ def get_cutoff_cluster(self, structure: Structure, n: int, cutoff: float = 6.0) -> list:
87
+ """
88
+ DBSCANによって得られた距離のクラスターから結合判定の閾値を決定する
89
+ """
90
+
91
+ # # スーパーセルを作成し、6.0angまでの結合長を数え上げる
92
+ # copy_structure = structure.copy()
93
+ # supercell = copy_structure.make_supercell([3, 3, 3])
94
+ # site_i = structure[n]
95
+
96
+ # site_index = None
97
+ # for idx, site in enumerate(supercell):
98
+ # # Siteのdistanceメソッドを使うとなぜか正しく距離が計算されない
99
+ # if float(np.linalg.norm(site_i.coords - site.coords)) < 0.01:
100
+ # site_index = idx
101
+ # break
102
+
103
+ distance_list = []
104
+ neighbors = structure.get_sites_in_sphere(structure[n].coords, cutoff)
105
+ for neighbor in neighbors:
106
+ dist = neighbor.nn_distance
107
+ distance_list.append([dist, 0])
108
+
109
+ dbscan = DBSCAN(eps=0.5, min_samples=2)
110
+ dbscan.fit(distance_list)
111
+ labels = dbscan.labels_
112
+
113
+ max_dist_list = [0 for _ in range(max(labels) + 1)]
114
+ for label_number in range(max(labels) + 1):
115
+ max_dist = 0
116
+ for label, distance in zip(labels, distance_list):
117
+ if label == label_number:
118
+ max_dist = max(max_dist, distance[0])
119
+
120
+ max_dist_list[label_number] = max_dist
121
+
122
+ return sorted(max_dist_list)
@@ -0,0 +1,44 @@
1
+ import argparse
2
+ import csv
3
+ from datetime import datetime
4
+
5
+ from graph_id import GraphIDGenerator
6
+ from pymatgen.core import Structure
7
+ from tabulate import tabulate
8
+
9
+ if __name__ == "__main__":
10
+ parser = argparse.ArgumentParser(description="Graph ID: graph-based ID for materails")
11
+
12
+ parser.add_argument(
13
+ dest="filenames",
14
+ metavar="filename",
15
+ nargs="+",
16
+ help="List of structure files.",
17
+ default=[],
18
+ )
19
+
20
+ parser.add_argument("-p", "--parallel", help="parallel execution", action="store_true")
21
+
22
+ gid = GraphIDGenerator()
23
+ args = parser.parse_args()
24
+
25
+ table = []
26
+
27
+ for fname in args.filenames:
28
+ s = Structure.from_file(fname)
29
+ s.merge_sites(mode="delete")
30
+
31
+ my_id = gid.get_id(s)
32
+
33
+ table.append([my_id, fname])
34
+
35
+ t_headers = ["GraphIDGenerator", "Filename"]
36
+
37
+ print(tabulate(table, headers=t_headers))
38
+
39
+ now = datetime.now()
40
+ now_str = now.strftime("%Y%m%d-%H%M%S")
41
+
42
+ with open(f"graph_id_{now_str}.csv", "w") as f:
43
+ writer = csv.writer(f)
44
+ writer.writerows(table)
File without changes
@@ -0,0 +1,138 @@
1
+ from copy import deepcopy
2
+ from hashlib import blake2b
3
+
4
+ import networkx as nx
5
+ import numpy as np
6
+ from graph_id.analysis.local_env import DistanceClusteringNN
7
+ from pymatgen.analysis.local_env import MinimumDistanceNN
8
+ from graph_id.analysis.graphs import StructureGraph
9
+ from graph_id.core.graph_id import GraphIDGenerator
10
+ from pymatgen.core import Element
11
+
12
+ __version__ = "0.1.0"
13
+
14
+
15
+ def blake(s):
16
+ return blake2b(s.encode()).hexdigest()
17
+
18
+
19
+ class DistanceClusteringGraphID(GraphIDGenerator):
20
+ def __init__( # noqa: PLR0913
21
+ self,
22
+ nn=None,
23
+ wyckoff=False,
24
+ depth_factor=2,
25
+ additional_depth=1,
26
+ symmetry_tol=0.1,
27
+ topology_only=False,
28
+ loop=False,
29
+ rank_k=3,
30
+ cutoff=6.0,
31
+ digest_size=8,
32
+ ) -> None:
33
+ super().__init__(
34
+ nn,
35
+ wyckoff,
36
+ depth_factor,
37
+ additional_depth,
38
+ symmetry_tol,
39
+ topology_only,
40
+ loop,
41
+ digest_size,
42
+ )
43
+
44
+ self.rank_k = rank_k
45
+ self.cutoff = cutoff
46
+ self.digest_size = digest_size
47
+
48
+ if nn is None:
49
+ self.nn = DistanceClusteringNN()
50
+ else:
51
+ self.nn = nn
52
+
53
+ def get_id(self, structure):
54
+ gid_list = []
55
+ _sg = StructureGraph.with_local_env_strategy(structure, MinimumDistanceNN())
56
+ for cluster_idx in range(self.rank_k):
57
+ long_str_list = []
58
+ # _sg = StructureGraph.with_local_env_strategy(structure, MinimumDistanceNN())
59
+ for idx in range(len(structure)):
60
+ copied_sg = deepcopy(_sg)
61
+ # まず原子idxが含まれる結合を削除する
62
+ for from_index, to_index, dct in _sg.graph.edges(keys=False, data=True):
63
+ if from_index == idx or to_index == idx:
64
+ copied_sg.break_edge(from_index, to_index, dct["to_jimage"], allow_reverse=True)
65
+ sg = self.prepare_structure_graph(structure, copied_sg, idx, cluster_idx)
66
+ n = len(sg.cc_cs)
67
+ array = np.empty(
68
+ [
69
+ n,
70
+ ],
71
+ dtype=object,
72
+ )
73
+ for i, component in enumerate(sg.cc_cs):
74
+ array[i] = blake("-".join(sorted(component["cs_list"])))
75
+ # array[i] = blake2b("-".join(sorted(component["cs_list"])).encode("ascii"), digest_size=16).hexdigest()
76
+ long_str_tmp = ":".join(np.sort(array))
77
+ # long_str_tmp = blake2b(":".join(np.sort(array)).encode("ascii"), digest_size=16).hexdigest()
78
+ long_str_list.append(long_str_tmp)
79
+ long_str = ":".join(np.sort(long_str_list))
80
+ gid = blake2b(long_str.encode("ascii"), digest_size=self.digest_size).hexdigest()
81
+ gid_list.append(gid)
82
+
83
+ long_gid = "".join(gid_list)
84
+ # return self.elaborate_comp_dim(sg, blake2b(long_gid.encode("ascii"), digest_size=16).hexdigest())
85
+ return blake2b(long_gid.encode("ascii"), digest_size=self.digest_size).hexdigest()
86
+
87
+ def prepare_structure_graph(self, structure, _sg, n, rank_k):
88
+
89
+ sg = StructureGraph.with_indivisual_state_comp_strategy(
90
+ structure=structure,
91
+ strategy=self.nn,
92
+ _sg=_sg,
93
+ n=n,
94
+ rank_k=rank_k,
95
+ cutoff=self.cutoff,
96
+ )
97
+
98
+ use_previous_cs = False
99
+
100
+ compound = sg.structure
101
+ prev_num_uniq = len(compound.composition)
102
+
103
+ if self.topology_only:
104
+ for site_i in range(len(sg.structure)):
105
+ sg.structure.replace(site_i, Element("H"))
106
+
107
+ if self.wyckoff:
108
+ sg.set_wyckoffs(symmetry_tol=self.symmetry_tol)
109
+ prev_num_uniq = len(list(set(nx.get_node_attributes(sg.graph, "compositional_sequence").values())))
110
+
111
+ elif self.loop:
112
+ sg.set_loops_as_starting_labels(
113
+ depth_factor=self.depth_factor,
114
+ additional_depth=self.additional_depth,
115
+ )
116
+
117
+ else:
118
+ sg.set_elemental_labels()
119
+
120
+ while True:
121
+ sg.set_indivisual_compositional_sequence_node_attr(
122
+ n=n,
123
+ hash_cs=False,
124
+ wyckoff=self.wyckoff,
125
+ additional_depth=self.additional_depth,
126
+ depth_factor=self.depth_factor,
127
+ use_previous_cs=use_previous_cs or self.wyckoff,
128
+ )
129
+
130
+ num_unique_nodes = len(list(set(nx.get_node_attributes(sg.graph, "compositional_sequence").values())))
131
+ use_previous_cs = True
132
+
133
+ if prev_num_uniq == num_unique_nodes:
134
+ break
135
+
136
+ prev_num_uniq = num_unique_nodes
137
+
138
+ return sg
@@ -0,0 +1,209 @@
1
+ import multiprocessing as multi
2
+ from copy import deepcopy
3
+ from hashlib import blake2b
4
+ from multiprocessing import Pool
5
+
6
+ import networkx as nx
7
+ import numpy as np
8
+ from graph_id.analysis.graphs import StructureGraph
9
+ from pymatgen.analysis.dimensionality import get_dimensionality_larsen
10
+ from pymatgen.analysis.local_env import MinimumDistanceNN
11
+ from pymatgen.core import Element
12
+ from tqdm import tqdm
13
+
14
+ __version__ = "0.1.0"
15
+
16
+
17
+ def blake(s):
18
+ return blake2b(s.encode()).hexdigest()
19
+
20
+
21
+ class GraphIDGenerator:
22
+ def __init__(
23
+ self,
24
+ nn=None,
25
+ wyckoff=False,
26
+ depth_factor=2,
27
+ additional_depth=1,
28
+ symmetry_tol=0.1,
29
+ topology_only=False,
30
+ loop=False,
31
+ digest_size=8,
32
+ ):
33
+ """
34
+ comp_dim: include composition and dimensionality as the prefix
35
+ """
36
+
37
+ if wyckoff and loop:
38
+ raise ValueError("wyckoff and loop cannot be True at the same time")
39
+
40
+ if loop and topology_only:
41
+ raise ValueError("loop and topology_only cannot be True at the same time")
42
+
43
+ if nn is None:
44
+ self.nn = MinimumDistanceNN()
45
+ else:
46
+ self.nn = nn
47
+
48
+ self.wyckoff = wyckoff
49
+ self.additional_depth = additional_depth
50
+ self.depth_factor = depth_factor
51
+ self.symmetry_tol = symmetry_tol
52
+ self.topology_only = topology_only
53
+ self.loop = loop
54
+ self.digest_size = digest_size
55
+
56
+ # def get_graph_I#
57
+
58
+ def get_id(self, structure):
59
+ sg = self.prepare_structure_graph(structure)
60
+ n = len(sg.cc_cs)
61
+ array = np.empty(
62
+ [
63
+ n,
64
+ ],
65
+ dtype=object,
66
+ )
67
+ for i, component in enumerate(sg.cc_cs):
68
+ array[i] = blake("-".join(sorted(component["cs_list"])))
69
+ long_str = ":".join(np.sort(array))
70
+ gid = blake2b(long_str.encode("ascii"), digest_size=self.digest_size).hexdigest()
71
+
72
+ gid = self.elaborate_comp_dim(sg, gid)
73
+
74
+ return gid
75
+
76
+ def elaborate_comp_dim(self, sg, gid):
77
+ dim = get_dimensionality_larsen(sg)
78
+ gid = f"{dim}D-{gid}"
79
+
80
+ if not self.topology_only:
81
+ gid = f"{sg.structure.composition.reduced_formula}-{gid}"
82
+
83
+ return gid
84
+
85
+ @property
86
+ def version(self):
87
+ return __version__
88
+
89
+ def get_id_catch_error(self, structure):
90
+ try:
91
+ return self.get_id(structure)
92
+ except Exception:
93
+ return ""
94
+
95
+ def get_many_ids(self, structures, parallel=False):
96
+ if parallel:
97
+ n_cores = multi.cpu_count()
98
+ # ctx = multi.get_context("spawn")
99
+ # p = ctx.Pool(n_cores)
100
+ p = Pool(n_cores)
101
+ imap = p.imap(self.get_id_catch_error, structures)
102
+ # ids = p.map(self.get_id, structures)
103
+ ids = list(tqdm(imap, total=len(structures)))
104
+ return ids
105
+
106
+ return [self.get_id(s) for s in structures]
107
+
108
+ def get_component_ids(self, structure):
109
+ sg = self.prepare_structure_graph(structure)
110
+ cc_gid = np.empty(
111
+ [
112
+ len(sg.cc_cs),
113
+ ],
114
+ dtype=object,
115
+ )
116
+ for i, component in enumerate(sg.cc_cs):
117
+ each_long_str = blake("-".join(sorted(component["cs_list"])))
118
+ gid = blake2b(each_long_str.encode("ascii"), digest_size=16).hexdigest()
119
+ # cc_gid[] = gid
120
+ cc_gid[i] = {"site_i": component["site_i"], "graph_id": gid}
121
+
122
+ return cc_gid
123
+
124
+ def are_same(self, structure1, structure2):
125
+ return self.get_id(structure1) == self.get_id(structure2)
126
+
127
+ def expand_for_low_dimensionality(self, sg):
128
+ dimensionality = get_dimensionality_larsen(sg)
129
+
130
+ if dimensionality < 3:
131
+ if len(list(nx.weakly_connected_components(sg.graph))) == 1:
132
+ supercell = sg.structure.copy()
133
+ supercell.make_supercell([[2, 2, 2]])
134
+ sg = StructureGraph.with_local_env_strategy(supercell, self.nn)
135
+
136
+ return sg
137
+
138
+ def expand_for_multi_bonds(self, sg):
139
+ _sg = deepcopy(sg)
140
+ factor = 2
141
+
142
+ while self.has_multi_bonds(_sg):
143
+ _strc = sg.structure.copy()
144
+ _strc.make_supercell([factor, factor, factor])
145
+
146
+ _sg = StructureGraph.with_local_env_strategy(_strc, self.nn)
147
+ factor += 1
148
+ # sg.expand
149
+ # for site_i in range(len(sg.structure)):
150
+ # sites = sg.get_connected_sites_light(site_i)
151
+ # for site in sites:
152
+ # print(site.index)
153
+ # # print(sites )
154
+
155
+ return _sg
156
+
157
+ def has_multi_bonds(self, sg):
158
+ # g = sg.graph.to_undirected()
159
+ for edge in sg.graph.edges:
160
+ if edge[2] != 0:
161
+ return True
162
+ # print(edge)
163
+
164
+ return False
165
+
166
+ def prepare_structure_graph(self, structure):
167
+ sg = StructureGraph.with_local_env_strategy(structure, self.nn)
168
+ use_previous_cs = False
169
+
170
+ compound = sg.structure
171
+ prev_num_uniq = len(compound.composition)
172
+
173
+ if self.topology_only:
174
+ for site_i in range(len(sg.structure)):
175
+ sg.structure.replace(site_i, Element("H"))
176
+
177
+ if self.wyckoff:
178
+ sg.set_wyckoffs(symmetry_tol=self.symmetry_tol)
179
+
180
+ # TODO: remove nx
181
+ prev_num_uniq = len(list(set(nx.get_node_attributes(sg.graph, "compositional_sequence").values())))
182
+
183
+ elif self.loop:
184
+ sg.set_loops(
185
+ depth_factor=self.depth_factor,
186
+ additional_depth=self.additional_depth,
187
+ )
188
+
189
+ else:
190
+ sg.set_elemental_labels()
191
+
192
+ while True:
193
+ sg.set_compositional_sequence_node_attr(
194
+ hash_cs=True,
195
+ wyckoff=self.wyckoff,
196
+ additional_depth=self.additional_depth,
197
+ depth_factor=self.depth_factor,
198
+ use_previous_cs=use_previous_cs or self.wyckoff,
199
+ )
200
+
201
+ num_unique_nodes = len(list(set(nx.get_node_attributes(sg.graph, "compositional_sequence").values())))
202
+ use_previous_cs = True
203
+
204
+ if prev_num_uniq == num_unique_nodes:
205
+ break
206
+
207
+ prev_num_uniq = num_unique_nodes
208
+
209
+ return sg
@@ -0,0 +1,30 @@
1
+ [tool.poetry]
2
+ name = "graph-id-core"
3
+ version = "0.1.0"
4
+ description = ""
5
+ authors = ["Koki Muraoka <muraok_k@chemsys.t.u-tokyo.ac.jp>"]
6
+ readme = "README.md"
7
+ packages = [{include = "graph_id"}]
8
+
9
+ [tool.poetry.build]
10
+ script = "build.py"
11
+ generate-setup-file = true
12
+
13
+ [tool.poetry.dependencies]
14
+ pybind11 = "2.11.1"
15
+ python = ">=3.10,<3.14"
16
+ pymatgen = "^2025.4.20"
17
+ scikit-learn = ">=0.24.1"
18
+
19
+ [tool.poetry.group.dev.dependencies]
20
+ pytest = "^7.4.0"
21
+ pytest-cov = "^4.1.0"
22
+
23
+ [tool.pytest.ini_options]
24
+ testpaths = ["tests", "graph_id"]
25
+ norecursedirs = ["library", "build", "dist", ".git", ".venv", "__pycache__"]
26
+
27
+ [build-system]
28
+ requires = ["poetry-core>=1.0.0", "pybind11~=2.11.1", "setuptools"]
29
+ build-backend = "poetry.core.masonry.api"
30
+
@@ -0,0 +1,31 @@
1
+ # -*- coding: utf-8 -*-
2
+ from setuptools import setup
3
+
4
+ packages = \
5
+ ['graph_id', 'graph_id.analysis', 'graph_id.commands', 'graph_id.core']
6
+
7
+ package_data = \
8
+ {'': ['*']}
9
+
10
+ install_requires = \
11
+ ['pybind11==2.11.1', 'pymatgen>=2025.4.20,<2026.0.0', 'scikit-learn>=0.24.1']
12
+
13
+ setup_kwargs = {
14
+ 'name': 'graph-id-core',
15
+ 'version': '0.1.0',
16
+ 'description': '',
17
+ 'long_description': '\n\n# Graph ID\n\n## Installation \n### pypi\n```\npip install graph-id-core\n```\n\n### GitHub\n```\ngit clone https://github.com/kmu/graph-id-core.git\ngit submodule init\ngit submodule update\npip install -e .\n```\n',
18
+ 'author': 'Koki Muraoka',
19
+ 'author_email': 'muraok_k@chemsys.t.u-tokyo.ac.jp',
20
+ 'maintainer': 'None',
21
+ 'maintainer_email': 'None',
22
+ 'url': 'None',
23
+ 'packages': packages,
24
+ 'package_data': package_data,
25
+ 'install_requires': install_requires,
26
+ 'python_requires': '>=3.10,<3.14',
27
+ }
28
+ from build import *
29
+ build(setup_kwargs)
30
+
31
+ setup(**setup_kwargs)