emmet-builders 0.57.2__py3-none-any.whl → 0.58.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of emmet-builders might be problematic. Click here for more details.
- emmet/builders/materials/electronic_structure.py +79 -16
- emmet/builders/utils.py +57 -1
- {emmet_builders-0.57.2.dist-info → emmet_builders-0.58.0.dist-info}/METADATA +2 -2
- {emmet_builders-0.57.2.dist-info → emmet_builders-0.58.0.dist-info}/RECORD +6 -6
- {emmet_builders-0.57.2.dist-info → emmet_builders-0.58.0.dist-info}/WHEEL +0 -0
- {emmet_builders-0.57.2.dist-info → emmet_builders-0.58.0.dist-info}/top_level.txt +0 -0
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
from collections import defaultdict
|
|
2
2
|
from math import ceil
|
|
3
3
|
import itertools
|
|
4
|
+
import re
|
|
5
|
+
import boto3
|
|
4
6
|
import numpy as np
|
|
5
7
|
from maggma.builders import Builder
|
|
6
8
|
from maggma.utils import grouper
|
|
@@ -18,6 +20,8 @@ from emmet.core.settings import EmmetSettings
|
|
|
18
20
|
from emmet.core.electronic_structure import ElectronicStructureDoc
|
|
19
21
|
from emmet.core.utils import jsanitize
|
|
20
22
|
|
|
23
|
+
from emmet.builders.utils import query_open_data
|
|
24
|
+
|
|
21
25
|
SETTINGS = EmmetSettings()
|
|
22
26
|
|
|
23
27
|
|
|
@@ -43,8 +47,10 @@ class ElectronicStructureBuilder(Builder):
|
|
|
43
47
|
tasks (Store): Store of task documents
|
|
44
48
|
materials (Store): Store of materials documents
|
|
45
49
|
electronic_structure (Store): Store of electronic structure summary data documents
|
|
46
|
-
bandstructure_fs (Store): Store of bandstructures
|
|
47
|
-
|
|
50
|
+
bandstructure_fs (Store, str): Store of bandstructures, or S3 URL string with prefix
|
|
51
|
+
(e.g. s3://materialsproject-parsed/bandstructures).
|
|
52
|
+
dos_fs (Store, str): Store of DOS, or S3 URL string with bucket and prefix
|
|
53
|
+
(e.g. s3://materialsproject-parsed/dos).
|
|
48
54
|
chunk_size (int): Chunk size to use for processing. Defaults to 10.
|
|
49
55
|
query (dict): Dictionary to limit materials to be analyzed
|
|
50
56
|
"""
|
|
@@ -57,8 +63,27 @@ class ElectronicStructureBuilder(Builder):
|
|
|
57
63
|
self.chunk_size = chunk_size
|
|
58
64
|
self.query = query if query else {}
|
|
59
65
|
|
|
66
|
+
self._s3_resource = None
|
|
67
|
+
|
|
68
|
+
sources = [tasks, materials]
|
|
69
|
+
|
|
70
|
+
fs_stores = [bandstructure_fs, dos_fs]
|
|
71
|
+
|
|
72
|
+
for store in fs_stores:
|
|
73
|
+
if isinstance(store, str):
|
|
74
|
+
if not re.match("^s3://.*", store):
|
|
75
|
+
raise ValueError(
|
|
76
|
+
"Please provide an S3 URL "
|
|
77
|
+
"in the format s3://{bucket_name}/{prefix}"
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
if self._s3_resource is None:
|
|
81
|
+
self._s3_resource = boto3.resource("s3")
|
|
82
|
+
else:
|
|
83
|
+
sources.append(store)
|
|
84
|
+
|
|
60
85
|
super().__init__(
|
|
61
|
-
sources=
|
|
86
|
+
sources=sources,
|
|
62
87
|
targets=[electronic_structure],
|
|
63
88
|
chunk_size=chunk_size,
|
|
64
89
|
**kwargs,
|
|
@@ -385,6 +410,7 @@ class ElectronicStructureBuilder(Builder):
|
|
|
385
410
|
structure = Structure.from_dict(task_query["output"]["structure"])
|
|
386
411
|
|
|
387
412
|
kpoints = task_query["orig_inputs"]["kpoints"]
|
|
413
|
+
|
|
388
414
|
labels_dict = {
|
|
389
415
|
label: point
|
|
390
416
|
for label, point in zip(kpoints["labels"], kpoints["kpoints"])
|
|
@@ -397,16 +423,32 @@ class ElectronicStructureBuilder(Builder):
|
|
|
397
423
|
bs_type = None
|
|
398
424
|
|
|
399
425
|
if bs_type is None:
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
426
|
+
if isinstance(self.bandstructure_fs, str):
|
|
427
|
+
_, _, bucket, prefix = self.bandstructure_fs.strip(
|
|
428
|
+
"/"
|
|
429
|
+
).split("/")
|
|
430
|
+
|
|
431
|
+
bs_dict = query_open_data(
|
|
432
|
+
bucket,
|
|
433
|
+
prefix,
|
|
434
|
+
task_id,
|
|
435
|
+
monty_decode=False,
|
|
436
|
+
s3_resource=self._s3_resource,
|
|
437
|
+
)
|
|
438
|
+
else:
|
|
439
|
+
bs_dict = self.bandstructure_fs.query_one(
|
|
440
|
+
{self.bandstructure_fs.key: str(task_id)}
|
|
441
|
+
)
|
|
403
442
|
|
|
404
443
|
if bs_dict is not None:
|
|
405
444
|
bs = BandStructureSymmLine.from_dict(bs_dict["data"])
|
|
406
445
|
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
446
|
+
labels_dict = {
|
|
447
|
+
label: kpoint.frac_coords
|
|
448
|
+
for label, kpoint in bs.labels_dict.items()
|
|
449
|
+
}
|
|
450
|
+
|
|
451
|
+
bs_type = self._obtain_path_type(labels_dict, bs.structure)
|
|
410
452
|
|
|
411
453
|
is_hubbard = task_query["input"]["is_hubbard"]
|
|
412
454
|
lmaxmix = task_query["input"]["incar"].get(
|
|
@@ -425,6 +467,7 @@ class ElectronicStructureBuilder(Builder):
|
|
|
425
467
|
"nkpoints": int(nkpoints),
|
|
426
468
|
"updated_on": lu_dt,
|
|
427
469
|
"output_structure": structure,
|
|
470
|
+
"labels_dict": labels_dict,
|
|
428
471
|
}
|
|
429
472
|
)
|
|
430
473
|
|
|
@@ -565,10 +608,19 @@ class ElectronicStructureBuilder(Builder):
|
|
|
565
608
|
materials_doc["bandstructure"][bs_type]["lmaxmix"] = sorted_bs_data[0][
|
|
566
609
|
"lmaxmix"
|
|
567
610
|
]
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
|
|
611
|
+
if isinstance(self.bandstructure_fs, str):
|
|
612
|
+
_, _, bucket, prefix = self.bandstructure_fs.strip("/").split("/")
|
|
613
|
+
bs_obj = query_open_data(
|
|
614
|
+
bucket,
|
|
615
|
+
prefix,
|
|
616
|
+
sorted_bs_data[0]["task_id"],
|
|
617
|
+
monty_decode=False,
|
|
618
|
+
s3_resource=self._s3_resource,
|
|
619
|
+
)
|
|
620
|
+
else:
|
|
621
|
+
bs_obj = self.bandstructure_fs.query_one(
|
|
622
|
+
criteria={"fs_id": sorted_bs_data[0]["fs_id"]}
|
|
623
|
+
)
|
|
572
624
|
|
|
573
625
|
materials_doc["bandstructure"][bs_type]["object"] = (
|
|
574
626
|
bs_obj["data"] if bs_obj is not None else None
|
|
@@ -602,9 +654,20 @@ class ElectronicStructureBuilder(Builder):
|
|
|
602
654
|
|
|
603
655
|
materials_doc["dos"]["lmaxmix"] = sorted_dos_data[0]["lmaxmix"]
|
|
604
656
|
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
|
|
657
|
+
if isinstance(self.bandstructure_fs, str):
|
|
658
|
+
_, _, bucket, prefix = self.dos_fs.strip("/").split("/")
|
|
659
|
+
dos_obj = query_open_data(
|
|
660
|
+
bucket,
|
|
661
|
+
prefix,
|
|
662
|
+
sorted_dos_data[0]["task_id"],
|
|
663
|
+
monty_decode=False,
|
|
664
|
+
s3_resource=self._s3_resource,
|
|
665
|
+
)
|
|
666
|
+
else:
|
|
667
|
+
dos_obj = self.dos_fs.query_one(
|
|
668
|
+
criteria={"fs_id": sorted_dos_data[0]["fs_id"]}
|
|
669
|
+
)
|
|
670
|
+
|
|
608
671
|
materials_doc["dos"]["object"] = (
|
|
609
672
|
dos_obj["data"] if dos_obj is not None else None
|
|
610
673
|
)
|
emmet/builders/utils.py
CHANGED
|
@@ -1,6 +1,12 @@
|
|
|
1
|
-
from typing import Set, Union
|
|
1
|
+
from typing import Set, Union, Any
|
|
2
2
|
import sys
|
|
3
3
|
import os
|
|
4
|
+
from gzip import GzipFile
|
|
5
|
+
import orjson
|
|
6
|
+
import json
|
|
7
|
+
from io import BytesIO
|
|
8
|
+
from monty.serialization import MontyDecoder
|
|
9
|
+
from botocore.exceptions import ClientError
|
|
4
10
|
from itertools import chain, combinations
|
|
5
11
|
from pymatgen.core import Structure
|
|
6
12
|
from pymatgen.analysis.diffusion.neb.full_path_mapper import MigrationGraph
|
|
@@ -146,6 +152,56 @@ def get_hop_cutoff(
|
|
|
146
152
|
return None
|
|
147
153
|
|
|
148
154
|
|
|
155
|
+
def query_open_data(
|
|
156
|
+
bucket: str,
|
|
157
|
+
prefix: str,
|
|
158
|
+
key: str,
|
|
159
|
+
monty_decode: bool = True,
|
|
160
|
+
s3_resource: Any = None,
|
|
161
|
+
) -> Union[dict, None]:
|
|
162
|
+
"""Query a Materials Project AWS S3 Open Data bucket directly with boto3
|
|
163
|
+
|
|
164
|
+
Args:
|
|
165
|
+
bucket (str): Materials project bucket name
|
|
166
|
+
prefix (str): Full set of file prefixes
|
|
167
|
+
key (str): Key for file
|
|
168
|
+
monty_decode (bool): Whether to monty decode or keep as dictionary. Defaults to True.
|
|
169
|
+
s3_resource (Optional[Any]): S3 resource. One will be instantiated if none are provided
|
|
170
|
+
|
|
171
|
+
Returns:
|
|
172
|
+
dict: MontyDecoded data or None
|
|
173
|
+
"""
|
|
174
|
+
|
|
175
|
+
def decode(content, monty_decode):
|
|
176
|
+
if monty_decode:
|
|
177
|
+
result = MontyDecoder().decode(content)
|
|
178
|
+
else:
|
|
179
|
+
result = orjson.loads(content)
|
|
180
|
+
return result
|
|
181
|
+
|
|
182
|
+
try:
|
|
183
|
+
file_key = f"{prefix}/{key}.json.gz"
|
|
184
|
+
ref = s3_resource.Object(bucket, file_key) # type: ignore
|
|
185
|
+
bytes = ref.get()["Body"] # type: ignore
|
|
186
|
+
|
|
187
|
+
with GzipFile(fileobj=bytes, mode="r") as gzipfile:
|
|
188
|
+
content = gzipfile.read()
|
|
189
|
+
|
|
190
|
+
try:
|
|
191
|
+
result = decode(content, monty_decode)
|
|
192
|
+
except (orjson.JSONDecodeError, json.JSONDecodeError):
|
|
193
|
+
try:
|
|
194
|
+
with GzipFile(fileobj=BytesIO(content), mode="r") as gzipfile_nested:
|
|
195
|
+
result = decode(gzipfile_nested.read(), monty_decode)
|
|
196
|
+
except Exception:
|
|
197
|
+
print(f"Issue decoding {file_key} from bucket {bucket}")
|
|
198
|
+
return None
|
|
199
|
+
except ClientError:
|
|
200
|
+
return None
|
|
201
|
+
|
|
202
|
+
return result
|
|
203
|
+
|
|
204
|
+
|
|
149
205
|
# From: https://stackoverflow.com/a/45669280
|
|
150
206
|
class HiddenPrints:
|
|
151
207
|
def __enter__(self):
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: emmet-builders
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.58.0
|
|
4
4
|
Summary: Builders for the Emmet Library
|
|
5
5
|
Home-page: https://github.com/materialsproject/emmet
|
|
6
6
|
Author: The Materials Project
|
|
@@ -9,7 +9,7 @@ License: modified BSD
|
|
|
9
9
|
Platform: UNKNOWN
|
|
10
10
|
Requires-Python: >=3.8
|
|
11
11
|
Requires-Dist: emmet-core[all]
|
|
12
|
-
Requires-Dist: maggma (>=0.
|
|
12
|
+
Requires-Dist: maggma (>=0.51.9)
|
|
13
13
|
Requires-Dist: matminer (>=0.7.3)
|
|
14
14
|
Provides-Extra: docs
|
|
15
15
|
Requires-Dist: mkdocs ; extra == 'docs'
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
emmet/builders/__init__.py,sha256=y-ZREtieuFK3MaYvCBDMPf3YLxnsZG1VNho9lMjnRDU,221
|
|
2
2
|
emmet/builders/settings.py,sha256=xVIqpQjSxn--m6mbKJIr7g-nqoqZr1Vcxi3YNa3etOg,2766
|
|
3
|
-
emmet/builders/utils.py,sha256=
|
|
3
|
+
emmet/builders/utils.py,sha256=iaCWMGjttInSW21a_zEk4CqN4HPlSx7BX3a48QMZw3c,7537
|
|
4
4
|
emmet/builders/abinit/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
5
5
|
emmet/builders/abinit/phonon.py,sha256=dWQ3pGjxTES7jkwZVoqeNgTMhXORXcg8rkxAA9lLd5Q,32026
|
|
6
6
|
emmet/builders/abinit/sound_velocity.py,sha256=3S3EaAaLwKKZ9-4Lnv3nUX6dHkIahRkpr3ecM_8nWx8,6999
|
|
@@ -16,7 +16,7 @@ emmet/builders/materials/corrected_entries.py,sha256=GctwGyE3JYBAnKcCN2PJjOa1stm
|
|
|
16
16
|
emmet/builders/materials/dielectric.py,sha256=-AagE2LwsB6U5A1cG-wPEKK-4zd3n57XzWEOQ0qCLps,6522
|
|
17
17
|
emmet/builders/materials/elasticity.py,sha256=O6AdNlPwUCz9bQmgRVoqtlwMTkgK31e-kKK94HmUVi8,16297
|
|
18
18
|
emmet/builders/materials/electrodes.py,sha256=7bkCCObs-XMcJ3wv6BjX6vWzdK2jdwdeAMNGQkvro6Y,23258
|
|
19
|
-
emmet/builders/materials/electronic_structure.py,sha256=
|
|
19
|
+
emmet/builders/materials/electronic_structure.py,sha256=gn7krFpbNs4q82TWYsycQmRrhdD1tjx4JdOi3qGspRY,28892
|
|
20
20
|
emmet/builders/materials/magnetism.py,sha256=h3sMPracp1KaYKxBEQLt9AlLmbUl_OAE2g5YZdQykZA,4599
|
|
21
21
|
emmet/builders/materials/optimade.py,sha256=fdY9sM2eJt0g_ka-QkGQSSSHJ2xfiWN-G7LIvHuctyo,5158
|
|
22
22
|
emmet/builders/materials/oxidation_states.py,sha256=bgHFpVzrHyyifTE1jdGTMHi39GRiZ9wCcmQKbCULXlY,1540
|
|
@@ -43,7 +43,7 @@ emmet/builders/qchem/molecules.py,sha256=TpCysoSQzWGZ_P4sDfz76wq6tcodyPFDJv0v9xt
|
|
|
43
43
|
emmet/builders/vasp/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
44
44
|
emmet/builders/vasp/materials.py,sha256=RGJawnvUwwR1PIJBEIJSbIuGzwsF2lj04P9rl37RIZ0,13332
|
|
45
45
|
emmet/builders/vasp/task_validator.py,sha256=sV-yxBq-ZTKjI73jdEzWdaFTC0Ng18xMGoHglu5Fh9Q,3600
|
|
46
|
-
emmet_builders-0.
|
|
47
|
-
emmet_builders-0.
|
|
48
|
-
emmet_builders-0.
|
|
49
|
-
emmet_builders-0.
|
|
46
|
+
emmet_builders-0.58.0.dist-info/METADATA,sha256=c_4UxlNCjeSZEeqGuIzGxCzKowSRyPYrAVcbaZGuOHs,1448
|
|
47
|
+
emmet_builders-0.58.0.dist-info/WHEEL,sha256=pkctZYzUS4AYVn6dJ-7367OJZivF2e8RA9b_ZBjif18,92
|
|
48
|
+
emmet_builders-0.58.0.dist-info/top_level.txt,sha256=6GcpbmWPeFhNCTfDFilb8GQ4T1UQu4z9c5jpobjwE-Q,6
|
|
49
|
+
emmet_builders-0.58.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|