emmet-builders 0.57.1__py3-none-any.whl → 0.58.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of emmet-builders might be problematic. Click here for more details.

@@ -1,6 +1,8 @@
1
1
  from collections import defaultdict
2
2
  from math import ceil
3
3
  import itertools
4
+ import re
5
+ import boto3
4
6
  import numpy as np
5
7
  from maggma.builders import Builder
6
8
  from maggma.utils import grouper
@@ -18,6 +20,8 @@ from emmet.core.settings import EmmetSettings
18
20
  from emmet.core.electronic_structure import ElectronicStructureDoc
19
21
  from emmet.core.utils import jsanitize
20
22
 
23
+ from emmet.builders.utils import query_open_data
24
+
21
25
  SETTINGS = EmmetSettings()
22
26
 
23
27
 
@@ -43,8 +47,10 @@ class ElectronicStructureBuilder(Builder):
43
47
  tasks (Store): Store of task documents
44
48
  materials (Store): Store of materials documents
45
49
  electronic_structure (Store): Store of electronic structure summary data documents
46
- bandstructure_fs (Store): Store of bandstructures
47
- dos_fs (Store): Store of DOS
50
+ bandstructure_fs (Store, str): Store of bandstructures, or S3 URL string with prefix
51
+ (e.g. s3://materialsproject-parsed/bandstructures).
52
+ dos_fs (Store, str): Store of DOS, or S3 URL string with bucket and prefix
53
+ (e.g. s3://materialsproject-parsed/dos).
48
54
  chunk_size (int): Chunk size to use for processing. Defaults to 10.
49
55
  query (dict): Dictionary to limit materials to be analyzed
50
56
  """
@@ -57,8 +63,27 @@ class ElectronicStructureBuilder(Builder):
57
63
  self.chunk_size = chunk_size
58
64
  self.query = query if query else {}
59
65
 
66
+ self._s3_resource = None
67
+
68
+ sources = [tasks, materials]
69
+
70
+ fs_stores = [bandstructure_fs, dos_fs]
71
+
72
+ for store in fs_stores:
73
+ if isinstance(store, str):
74
+ if not re.match("^s3://.*", store):
75
+ raise ValueError(
76
+ "Please provide an S3 URL "
77
+ "in the format s3://{bucket_name}/{prefix}"
78
+ )
79
+
80
+ if self._s3_resource is None:
81
+ self._s3_resource = boto3.resource("s3")
82
+ else:
83
+ sources.append(store)
84
+
60
85
  super().__init__(
61
- sources=[tasks, materials, bandstructure_fs, dos_fs],
86
+ sources=sources,
62
87
  targets=[electronic_structure],
63
88
  chunk_size=chunk_size,
64
89
  **kwargs,
@@ -385,6 +410,7 @@ class ElectronicStructureBuilder(Builder):
385
410
  structure = Structure.from_dict(task_query["output"]["structure"])
386
411
 
387
412
  kpoints = task_query["orig_inputs"]["kpoints"]
413
+
388
414
  labels_dict = {
389
415
  label: point
390
416
  for label, point in zip(kpoints["labels"], kpoints["kpoints"])
@@ -397,16 +423,32 @@ class ElectronicStructureBuilder(Builder):
397
423
  bs_type = None
398
424
 
399
425
  if bs_type is None:
400
- bs_dict = self.bandstructure_fs.query_one(
401
- {self.bandstructure_fs.key: str(task_id)}
402
- )
426
+ if isinstance(self.bandstructure_fs, str):
427
+ _, _, bucket, prefix = self.bandstructure_fs.strip(
428
+ "/"
429
+ ).split("/")
430
+
431
+ bs_dict = query_open_data(
432
+ bucket,
433
+ prefix,
434
+ task_id,
435
+ monty_decode=False,
436
+ s3_resource=self._s3_resource,
437
+ )
438
+ else:
439
+ bs_dict = self.bandstructure_fs.query_one(
440
+ {self.bandstructure_fs.key: str(task_id)}
441
+ )
403
442
 
404
443
  if bs_dict is not None:
405
444
  bs = BandStructureSymmLine.from_dict(bs_dict["data"])
406
445
 
407
- bs_type = self._obtain_path_type(
408
- bs.labels_dict, bs.structure
409
- )
446
+ labels_dict = {
447
+ label: kpoint.frac_coords
448
+ for label, kpoint in bs.labels_dict.items()
449
+ }
450
+
451
+ bs_type = self._obtain_path_type(labels_dict, bs.structure)
410
452
 
411
453
  is_hubbard = task_query["input"]["is_hubbard"]
412
454
  lmaxmix = task_query["input"]["incar"].get(
@@ -425,6 +467,7 @@ class ElectronicStructureBuilder(Builder):
425
467
  "nkpoints": int(nkpoints),
426
468
  "updated_on": lu_dt,
427
469
  "output_structure": structure,
470
+ "labels_dict": labels_dict,
428
471
  }
429
472
  )
430
473
 
@@ -565,10 +608,19 @@ class ElectronicStructureBuilder(Builder):
565
608
  materials_doc["bandstructure"][bs_type]["lmaxmix"] = sorted_bs_data[0][
566
609
  "lmaxmix"
567
610
  ]
568
-
569
- bs_obj = self.bandstructure_fs.query_one(
570
- criteria={"fs_id": sorted_bs_data[0]["fs_id"]}
571
- )
611
+ if isinstance(self.bandstructure_fs, str):
612
+ _, _, bucket, prefix = self.bandstructure_fs.strip("/").split("/")
613
+ bs_obj = query_open_data(
614
+ bucket,
615
+ prefix,
616
+ sorted_bs_data[0]["task_id"],
617
+ monty_decode=False,
618
+ s3_resource=self._s3_resource,
619
+ )
620
+ else:
621
+ bs_obj = self.bandstructure_fs.query_one(
622
+ criteria={"fs_id": sorted_bs_data[0]["fs_id"]}
623
+ )
572
624
 
573
625
  materials_doc["bandstructure"][bs_type]["object"] = (
574
626
  bs_obj["data"] if bs_obj is not None else None
@@ -602,9 +654,20 @@ class ElectronicStructureBuilder(Builder):
602
654
 
603
655
  materials_doc["dos"]["lmaxmix"] = sorted_dos_data[0]["lmaxmix"]
604
656
 
605
- dos_obj = self.dos_fs.query_one(
606
- criteria={"fs_id": sorted_dos_data[0]["fs_id"]}
607
- )
657
+ if isinstance(self.bandstructure_fs, str):
658
+ _, _, bucket, prefix = self.dos_fs.strip("/").split("/")
659
+ dos_obj = query_open_data(
660
+ bucket,
661
+ prefix,
662
+ sorted_dos_data[0]["task_id"],
663
+ monty_decode=False,
664
+ s3_resource=self._s3_resource,
665
+ )
666
+ else:
667
+ dos_obj = self.dos_fs.query_one(
668
+ criteria={"fs_id": sorted_dos_data[0]["fs_id"]}
669
+ )
670
+
608
671
  materials_doc["dos"]["object"] = (
609
672
  dos_obj["data"] if dos_obj is not None else None
610
673
  )
emmet/builders/utils.py CHANGED
@@ -1,6 +1,12 @@
1
- from typing import Set, Union
1
+ from typing import Set, Union, Any
2
2
  import sys
3
3
  import os
4
+ from gzip import GzipFile
5
+ import orjson
6
+ import json
7
+ from io import BytesIO
8
+ from monty.serialization import MontyDecoder
9
+ from botocore.exceptions import ClientError
4
10
  from itertools import chain, combinations
5
11
  from pymatgen.core import Structure
6
12
  from pymatgen.analysis.diffusion.neb.full_path_mapper import MigrationGraph
@@ -146,6 +152,56 @@ def get_hop_cutoff(
146
152
  return None
147
153
 
148
154
 
155
+ def query_open_data(
156
+ bucket: str,
157
+ prefix: str,
158
+ key: str,
159
+ monty_decode: bool = True,
160
+ s3_resource: Any = None,
161
+ ) -> Union[dict, None]:
162
+ """Query a Materials Project AWS S3 Open Data bucket directly with boto3
163
+
164
+ Args:
165
+ bucket (str): Materials project bucket name
166
+ prefix (str): Full set of file prefixes
167
+ key (str): Key for file
168
+ monty_decode (bool): Whether to monty decode or keep as dictionary. Defaults to True.
169
+ s3_resource (Optional[Any]): S3 resource. One will be instantiated if none are provided
170
+
171
+ Returns:
172
+ dict: MontyDecoded data or None
173
+ """
174
+
175
+ def decode(content, monty_decode):
176
+ if monty_decode:
177
+ result = MontyDecoder().decode(content)
178
+ else:
179
+ result = orjson.loads(content)
180
+ return result
181
+
182
+ try:
183
+ file_key = f"{prefix}/{key}.json.gz"
184
+ ref = s3_resource.Object(bucket, file_key) # type: ignore
185
+ bytes = ref.get()["Body"] # type: ignore
186
+
187
+ with GzipFile(fileobj=bytes, mode="r") as gzipfile:
188
+ content = gzipfile.read()
189
+
190
+ try:
191
+ result = decode(content, monty_decode)
192
+ except (orjson.JSONDecodeError, json.JSONDecodeError):
193
+ try:
194
+ with GzipFile(fileobj=BytesIO(content), mode="r") as gzipfile_nested:
195
+ result = decode(gzipfile_nested.read(), monty_decode)
196
+ except Exception:
197
+ print(f"Issue decoding {file_key} from bucket {bucket}")
198
+ return None
199
+ except ClientError:
200
+ return None
201
+
202
+ return result
203
+
204
+
149
205
  # From: https://stackoverflow.com/a/45669280
150
206
  class HiddenPrints:
151
207
  def __enter__(self):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: emmet-builders
3
- Version: 0.57.1
3
+ Version: 0.58.0
4
4
  Summary: Builders for the Emmet Library
5
5
  Home-page: https://github.com/materialsproject/emmet
6
6
  Author: The Materials Project
@@ -9,7 +9,7 @@ License: modified BSD
9
9
  Platform: UNKNOWN
10
10
  Requires-Python: >=3.8
11
11
  Requires-Dist: emmet-core[all]
12
- Requires-Dist: maggma (>=0.49.17)
12
+ Requires-Dist: maggma (>=0.51.9)
13
13
  Requires-Dist: matminer (>=0.7.3)
14
14
  Provides-Extra: docs
15
15
  Requires-Dist: mkdocs ; extra == 'docs'
@@ -1,6 +1,6 @@
1
1
  emmet/builders/__init__.py,sha256=y-ZREtieuFK3MaYvCBDMPf3YLxnsZG1VNho9lMjnRDU,221
2
2
  emmet/builders/settings.py,sha256=xVIqpQjSxn--m6mbKJIr7g-nqoqZr1Vcxi3YNa3etOg,2766
3
- emmet/builders/utils.py,sha256=VtA_77cBqzKBaA747FBbcPX3M0QmQE-l2nMLzlQwijw,5775
3
+ emmet/builders/utils.py,sha256=iaCWMGjttInSW21a_zEk4CqN4HPlSx7BX3a48QMZw3c,7537
4
4
  emmet/builders/abinit/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
5
  emmet/builders/abinit/phonon.py,sha256=dWQ3pGjxTES7jkwZVoqeNgTMhXORXcg8rkxAA9lLd5Q,32026
6
6
  emmet/builders/abinit/sound_velocity.py,sha256=3S3EaAaLwKKZ9-4Lnv3nUX6dHkIahRkpr3ecM_8nWx8,6999
@@ -16,7 +16,7 @@ emmet/builders/materials/corrected_entries.py,sha256=GctwGyE3JYBAnKcCN2PJjOa1stm
16
16
  emmet/builders/materials/dielectric.py,sha256=-AagE2LwsB6U5A1cG-wPEKK-4zd3n57XzWEOQ0qCLps,6522
17
17
  emmet/builders/materials/elasticity.py,sha256=O6AdNlPwUCz9bQmgRVoqtlwMTkgK31e-kKK94HmUVi8,16297
18
18
  emmet/builders/materials/electrodes.py,sha256=7bkCCObs-XMcJ3wv6BjX6vWzdK2jdwdeAMNGQkvro6Y,23258
19
- emmet/builders/materials/electronic_structure.py,sha256=wadGgWGuW7zsZMDODVZVPaBjR7EQSwZiVs1xm-tPzN8,26395
19
+ emmet/builders/materials/electronic_structure.py,sha256=gn7krFpbNs4q82TWYsycQmRrhdD1tjx4JdOi3qGspRY,28892
20
20
  emmet/builders/materials/magnetism.py,sha256=h3sMPracp1KaYKxBEQLt9AlLmbUl_OAE2g5YZdQykZA,4599
21
21
  emmet/builders/materials/optimade.py,sha256=fdY9sM2eJt0g_ka-QkGQSSSHJ2xfiWN-G7LIvHuctyo,5158
22
22
  emmet/builders/materials/oxidation_states.py,sha256=bgHFpVzrHyyifTE1jdGTMHi39GRiZ9wCcmQKbCULXlY,1540
@@ -43,7 +43,7 @@ emmet/builders/qchem/molecules.py,sha256=TpCysoSQzWGZ_P4sDfz76wq6tcodyPFDJv0v9xt
43
43
  emmet/builders/vasp/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
44
44
  emmet/builders/vasp/materials.py,sha256=RGJawnvUwwR1PIJBEIJSbIuGzwsF2lj04P9rl37RIZ0,13332
45
45
  emmet/builders/vasp/task_validator.py,sha256=sV-yxBq-ZTKjI73jdEzWdaFTC0Ng18xMGoHglu5Fh9Q,3600
46
- emmet_builders-0.57.1.dist-info/METADATA,sha256=xwlyajZWiLgj-bbmLF9mOdMoie3jWO9NPmNsvhb7NoA,1449
47
- emmet_builders-0.57.1.dist-info/WHEEL,sha256=pkctZYzUS4AYVn6dJ-7367OJZivF2e8RA9b_ZBjif18,92
48
- emmet_builders-0.57.1.dist-info/top_level.txt,sha256=6GcpbmWPeFhNCTfDFilb8GQ4T1UQu4z9c5jpobjwE-Q,6
49
- emmet_builders-0.57.1.dist-info/RECORD,,
46
+ emmet_builders-0.58.0.dist-info/METADATA,sha256=c_4UxlNCjeSZEeqGuIzGxCzKowSRyPYrAVcbaZGuOHs,1448
47
+ emmet_builders-0.58.0.dist-info/WHEEL,sha256=pkctZYzUS4AYVn6dJ-7367OJZivF2e8RA9b_ZBjif18,92
48
+ emmet_builders-0.58.0.dist-info/top_level.txt,sha256=6GcpbmWPeFhNCTfDFilb8GQ4T1UQu4z9c5jpobjwE-Q,6
49
+ emmet_builders-0.58.0.dist-info/RECORD,,