emmet-builders 0.78.3__py3-none-any.whl → 0.86.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. emmet/builders/abinit/phonon.py +47 -47
  2. emmet/builders/abinit/sound_velocity.py +15 -11
  3. emmet/builders/feff/xas.py +1 -2
  4. emmet/builders/materials/absorption_spectrum.py +25 -14
  5. emmet/builders/materials/alloys.py +10 -11
  6. emmet/builders/materials/chemenv.py +2 -3
  7. emmet/builders/materials/corrected_entries.py +21 -15
  8. emmet/builders/materials/dielectric.py +19 -11
  9. emmet/builders/materials/elasticity.py +44 -33
  10. emmet/builders/materials/electrodes.py +35 -28
  11. emmet/builders/materials/electronic_structure.py +17 -17
  12. emmet/builders/materials/magnetism.py +11 -4
  13. emmet/builders/materials/optimade.py +7 -3
  14. emmet/builders/materials/piezoelectric.py +24 -21
  15. emmet/builders/materials/provenance.py +16 -13
  16. emmet/builders/materials/robocrys.py +2 -3
  17. emmet/builders/materials/substrates.py +9 -8
  18. emmet/builders/materials/summary.py +3 -3
  19. emmet/builders/materials/thermo.py +17 -11
  20. emmet/builders/matscholar/missing_compositions.py +12 -8
  21. emmet/builders/mobility/migration_graph.py +5 -5
  22. emmet/builders/settings.py +21 -17
  23. emmet/builders/utils.py +101 -12
  24. emmet/builders/vasp/materials.py +40 -51
  25. emmet/builders/vasp/mp_potcar_stats.json.gz +0 -0
  26. emmet/builders/vasp/task_validator.py +25 -36
  27. emmet_builders-0.86.0.dist-info/METADATA +37 -0
  28. emmet_builders-0.86.0.dist-info/RECORD +41 -0
  29. {emmet_builders-0.78.3.dist-info → emmet_builders-0.86.0.dist-info}/WHEEL +1 -1
  30. emmet/builders/materials/ml.py +0 -87
  31. emmet/builders/molecules/atomic.py +0 -589
  32. emmet/builders/molecules/bonds.py +0 -324
  33. emmet/builders/molecules/metal_binding.py +0 -526
  34. emmet/builders/molecules/orbitals.py +0 -288
  35. emmet/builders/molecules/redox.py +0 -496
  36. emmet/builders/molecules/summary.py +0 -383
  37. emmet/builders/molecules/thermo.py +0 -500
  38. emmet/builders/molecules/vibration.py +0 -278
  39. emmet/builders/qchem/__init__.py +0 -0
  40. emmet/builders/qchem/molecules.py +0 -734
  41. emmet_builders-0.78.3.dist-info/METADATA +0 -47
  42. emmet_builders-0.78.3.dist-info/RECORD +0 -51
  43. /emmet/builders/{molecules/__init__.py → py.typed} +0 -0
  44. {emmet_builders-0.78.3.dist-info → emmet_builders-0.86.0.dist-info}/top_level.txt +0 -0
@@ -1,7 +1,5 @@
1
1
  from collections import defaultdict
2
- from typing import Dict, Iterable, List, Optional, Tuple
3
2
  from math import ceil
4
- from datetime import datetime
5
3
 
6
4
  from maggma.core import Builder, Store
7
5
  from maggma.utils import grouper
@@ -10,7 +8,12 @@ from pymatgen.core.structure import Structure
10
8
 
11
9
  from emmet.builders.settings import EmmetBuildSettings
12
10
  from emmet.core.provenance import ProvenanceDoc, SNLDict
13
- from emmet.core.utils import get_sg, jsanitize
11
+ from emmet.core.utils import get_sg, jsanitize, utcnow
12
+
13
+ from typing import TYPE_CHECKING
14
+
15
+ if TYPE_CHECKING:
16
+ from collections.abc import Iterable
14
17
 
15
18
 
16
19
  class ProvenanceBuilder(Builder):
@@ -18,9 +21,9 @@ class ProvenanceBuilder(Builder):
18
21
  self,
19
22
  materials: Store,
20
23
  provenance: Store,
21
- source_snls: List[Store],
22
- settings: Optional[EmmetBuildSettings] = None,
23
- query: Optional[Dict] = None,
24
+ source_snls: list[Store],
25
+ settings: EmmetBuildSettings | None = None,
26
+ query: dict | None = None,
24
27
  **kwargs,
25
28
  ):
26
29
  """
@@ -59,7 +62,7 @@ class ProvenanceBuilder(Builder):
59
62
  s.ensure_index("snl_id")
60
63
  s.ensure_index("formula_pretty")
61
64
 
62
- def prechunk(self, number_splits: int) -> Iterable[Dict]: # pragma: no cover
65
+ def prechunk(self, number_splits: int) -> Iterable[dict]: # pragma: no cover
63
66
  self.ensure_indicies()
64
67
 
65
68
  # Find all formulas for materials that have been updated since this
@@ -101,7 +104,7 @@ class ProvenanceBuilder(Builder):
101
104
  for chunk in grouper(mat_ids, N):
102
105
  yield {"query": {"material_id": {"$in": chunk}}}
103
106
 
104
- def get_items(self) -> Tuple[List[Dict], List[Dict]]: # type: ignore
107
+ def get_items(self) -> tuple[list[dict], list[dict]]: # type: ignore
105
108
  """
106
109
  Gets all materials to assocaite with SNLs
107
110
  Returns:
@@ -168,7 +171,7 @@ class ProvenanceBuilder(Builder):
168
171
  for snl in snls:
169
172
  struc = Structure.from_dict(snl)
170
173
  snl_sg = get_sg(struc)
171
- struc.snl = SNLDict(**snl)
174
+ struc.snl = SNLDict(**snl) # type: ignore[attr-defined]
172
175
  snl_groups[snl_sg].append(struc)
173
176
 
174
177
  mat_sg = get_sg(Structure.from_dict(mat["structure"]))
@@ -178,7 +181,7 @@ class ProvenanceBuilder(Builder):
178
181
  self.logger.debug(f"Found {len(snl_structs)} potential snls for {mat_id}")
179
182
  yield mat, snl_structs
180
183
 
181
- def process_item(self, item) -> Dict:
184
+ def process_item(self, item) -> dict:
182
185
  """
183
186
  Matches SNLS and Materials
184
187
  Args:
@@ -203,15 +206,15 @@ class ProvenanceBuilder(Builder):
203
206
  deprecated=mat["deprecated"],
204
207
  )
205
208
  else:
206
- doc = ProvenanceDoc(
209
+ doc = ProvenanceDoc( # type: ignore[call-arg]
207
210
  material_id=mat["material_id"],
208
211
  structure=Structure.from_dict(mat["structure"]),
209
212
  deprecated=mat["deprecated"],
210
- created_at=datetime.utcnow(),
213
+ created_at=utcnow(),
211
214
  )
212
215
 
213
216
  doc.authors.append(self.settings.DEFAULT_AUTHOR)
214
- doc.history.append(self.settings.DEFAULT_HISTORY)
217
+ doc.history.append(self.settings.DEFAULT_HISTORY) # type: ignore[union-attr]
215
218
  doc.references.append(self.settings.DEFAULT_REFERENCE)
216
219
 
217
220
  snl_doc = jsanitize(doc.dict(exclude_none=False), allow_bson=True)
@@ -1,8 +1,7 @@
1
- from typing import Dict, Optional
2
1
  from maggma.builders.map_builder import MapBuilder
3
2
  from maggma.core import Store
4
-
5
3
  from pymatgen.core.structure import Structure
4
+
6
5
  from emmet.core.robocrys import RobocrystallogapherDoc
7
6
  from emmet.core.utils import jsanitize
8
7
 
@@ -12,7 +11,7 @@ class RobocrystallographerBuilder(MapBuilder):
12
11
  self,
13
12
  oxidation_states: Store,
14
13
  robocrys: Store,
15
- query: Optional[Dict] = None,
14
+ query: dict | None = None,
16
15
  **kwargs
17
16
  ):
18
17
  self.oxidation_states = oxidation_states
@@ -1,14 +1,15 @@
1
- from typing import Optional, Dict, Iterable
2
- from emmet.core.mpid import MPID
3
- from maggma.core.store import Store
1
+ from typing import Iterable
2
+
4
3
  from maggma.core.builder import Builder
5
- from pymatgen.core.structure import Structure
4
+ from maggma.core.store import Store
5
+ from maggma.utils import grouper
6
6
  from pymatgen.analysis.elasticity.elastic import ElasticTensor
7
+ from pymatgen.core.structure import Structure
7
8
  from pymatgen.symmetry.analyzer import SpacegroupAnalyzer
8
9
 
10
+ from emmet.core.mpid import AlphaID
9
11
  from emmet.core.substrates import SubstratesDoc
10
12
  from emmet.core.utils import jsanitize
11
- from maggma.utils import grouper
12
13
 
13
14
 
14
15
  class SubstratesBuilder(Builder):
@@ -17,7 +18,7 @@ class SubstratesBuilder(Builder):
17
18
  materials: Store,
18
19
  substrates: Store,
19
20
  elasticity: Store,
20
- query: Optional[Dict] = None,
21
+ query: dict | None = None,
21
22
  **kwargs,
22
23
  ):
23
24
  """
@@ -47,7 +48,7 @@ class SubstratesBuilder(Builder):
47
48
  **kwargs,
48
49
  )
49
50
 
50
- def prechunk(self, number_splits: int) -> Iterable[Dict]: # pragma: no cover
51
+ def prechunk(self, number_splits: int) -> Iterable[dict]: # pragma: no cover
51
52
  to_process_mat_ids = self._find_to_process()
52
53
 
53
54
  return [
@@ -107,7 +108,7 @@ class SubstratesBuilder(Builder):
107
108
  dict: a diffraction dict
108
109
  """
109
110
 
110
- mpid = MPID(item["material_id"])
111
+ mpid = AlphaID(item["material_id"])
111
112
  elastic_tensor = item.get("elastic_tensor", None)
112
113
  elastic_tensor = (
113
114
  ElasticTensor.from_voigt(elastic_tensor) if elastic_tensor else None
@@ -3,10 +3,10 @@ from math import ceil
3
3
  from maggma.builders import Builder
4
4
  from maggma.utils import grouper
5
5
 
6
- from emmet.core.mpid import MPID
6
+ from emmet.core.mpid import AlphaID
7
7
  from emmet.core.summary import SummaryDoc, HasProps
8
8
  from emmet.core.utils import jsanitize
9
- from emmet.core.thermo import ThermoType
9
+ from emmet.core.types.enums import ThermoType
10
10
 
11
11
 
12
12
  class SummaryBuilder(Builder):
@@ -214,7 +214,7 @@ class SummaryBuilder(Builder):
214
214
  yield {"query": {self.materials.key: {"$in": list(split)}}}
215
215
 
216
216
  def process_item(self, item):
217
- material_id = MPID(item[HasProps.materials.value]["material_id"])
217
+ material_id = AlphaID(item[HasProps.materials.value]["material_id"])
218
218
  doc = SummaryDoc.from_docs(material_id=material_id, **item)
219
219
  return jsanitize(doc.model_dump(exclude_none=False), allow_bson=True)
220
220
 
@@ -1,8 +1,9 @@
1
- from math import ceil
1
+ from __future__ import annotations
2
+
2
3
  import warnings
3
- from itertools import chain
4
- from typing import Dict, Iterator, List, Optional, Set
5
4
  from datetime import datetime
5
+ from itertools import chain
6
+ from math import ceil
6
7
 
7
8
  from maggma.core import Builder, Store
8
9
  from maggma.stores import S3Store
@@ -12,18 +13,23 @@ from pymatgen.analysis.phase_diagram import PhaseDiagramError
12
13
  from pymatgen.entries.computed_entries import ComputedStructureEntry
13
14
 
14
15
  from emmet.builders.utils import HiddenPrints
15
- from emmet.core.thermo import ThermoDoc, PhaseDiagramDoc
16
+ from emmet.core.thermo import PhaseDiagramDoc, ThermoDoc
16
17
  from emmet.core.utils import jsanitize
17
18
 
19
+ from typing import TYPE_CHECKING
20
+
21
+ if TYPE_CHECKING:
22
+ from collections.abc import Iterator
23
+
18
24
 
19
25
  class ThermoBuilder(Builder):
20
26
  def __init__(
21
27
  self,
22
28
  thermo: Store,
23
29
  corrected_entries: Store,
24
- phase_diagram: Optional[Store] = None,
25
- query: Optional[Dict] = None,
26
- num_phase_diagram_eles: Optional[int] = None,
30
+ phase_diagram: Store | None = None,
31
+ query: dict | None = None,
32
+ num_phase_diagram_eles: int | None = None,
27
33
  chunk_size: int = 1000,
28
34
  **kwargs,
29
35
  ):
@@ -49,7 +55,7 @@ class ThermoBuilder(Builder):
49
55
  self.phase_diagram = phase_diagram
50
56
  self.num_phase_diagram_eles = num_phase_diagram_eles
51
57
  self.chunk_size = chunk_size
52
- self._completed_tasks: Set[str] = set()
58
+ self._completed_tasks: set[str] = set()
53
59
 
54
60
  if self.thermo.key != "thermo_id":
55
61
  warnings.warn(
@@ -111,7 +117,7 @@ class ThermoBuilder(Builder):
111
117
  coll.ensure_index("chemsys")
112
118
  coll.ensure_index("phase_diagram_id")
113
119
 
114
- def prechunk(self, number_splits: int) -> Iterator[Dict]: # pragma: no cover
120
+ def prechunk(self, number_splits: int) -> Iterator[dict]: # pragma: no cover
115
121
  to_process_chemsys = self._get_chemsys_to_process()
116
122
 
117
123
  N = ceil(len(to_process_chemsys) / number_splits)
@@ -119,7 +125,7 @@ class ThermoBuilder(Builder):
119
125
  for chemsys_chunk in grouper(to_process_chemsys, N):
120
126
  yield {"query": {"chemsys": {"$in": list(chemsys_chunk)}}}
121
127
 
122
- def get_items(self) -> Iterator[List[Dict]]:
128
+ def get_items(self) -> Iterator[list[dict]]:
123
129
  """
124
130
  Gets whole chemical systems of entries to process
125
131
  """
@@ -224,7 +230,7 @@ class ThermoBuilder(Builder):
224
230
  """
225
231
  Inserts the thermo and phase diagram docs into the thermo collection
226
232
  Args:
227
- items ([[tuple(List[dict],List[dict])]]): a list of a list of thermo and phase diagram dict pairs to update
233
+ items ([[tuple(list[dict],list[dict])]]): a list of a list of thermo and phase diagram dict pairs to update
228
234
  """
229
235
 
230
236
  thermo_docs = [pair[0] for pair_list in items for pair in pair_list]
@@ -1,13 +1,17 @@
1
- from itertools import combinations
2
1
  import itertools
2
+ from itertools import combinations
3
3
  from math import ceil
4
- from typing import Dict, List, Iterator, Optional
5
4
 
6
5
  from maggma.core import Builder
7
- from maggma.stores import S3Store, MongoURIStore, MongoStore
6
+ from maggma.stores import MongoStore, MongoURIStore, S3Store
8
7
  from maggma.utils import grouper
9
8
  from pymatgen.core import Composition, Element
10
9
 
10
+ from typing import TYPE_CHECKING
11
+
12
+ if TYPE_CHECKING:
13
+ from collections.abc import Iterator
14
+
11
15
 
12
16
  class MissingCompositionsBuilder(Builder):
13
17
  """
@@ -21,7 +25,7 @@ class MissingCompositionsBuilder(Builder):
21
25
  phase_diagram: S3Store,
22
26
  mpcontribs: MongoURIStore,
23
27
  missing_compositions: MongoStore,
24
- query: Optional[Dict] = None,
28
+ query: dict | None = None,
25
29
  **kwargs,
26
30
  ):
27
31
  """
@@ -47,7 +51,7 @@ class MissingCompositionsBuilder(Builder):
47
51
  **kwargs,
48
52
  )
49
53
 
50
- def prechunk(self, number_splits: int) -> Iterator[Dict]: # pragma: no cover
54
+ def prechunk(self, number_splits: int) -> Iterator[dict]: # pragma: no cover
51
55
  """
52
56
  Prechunk method to perform chunking by the key field
53
57
  """
@@ -61,7 +65,7 @@ class MissingCompositionsBuilder(Builder):
61
65
  for split in grouper(keys, N):
62
66
  yield {"query": {self.phase_diagram.key: {"$in": list(split)}}}
63
67
 
64
- def get_items(self) -> Iterator[Dict]:
68
+ def get_items(self) -> Iterator[dict]:
65
69
  """
66
70
  Returns all chemical systems (combinations of elements)
67
71
  to process.
@@ -112,7 +116,7 @@ class MissingCompositionsBuilder(Builder):
112
116
  self.logger.error(f"Erro looking for phase diagram for {sys}: {ex}")
113
117
  continue
114
118
 
115
- def process_item(self, item: Dict) -> Dict:
119
+ def process_item(self, item: dict) -> dict:
116
120
  """
117
121
  Processes a chemical system and finds missing c
118
122
  ompositions for that system.
@@ -195,7 +199,7 @@ class MissingCompositionsBuilder(Builder):
195
199
  else:
196
200
  self.logger.info("No items to update")
197
201
 
198
- def _get_entries_in_chemsys(self, chemsys) -> List:
202
+ def _get_entries_in_chemsys(self, chemsys) -> list:
199
203
  """Queries the MPContribs Store for entries in a chemical system."""
200
204
  # get sub-systems
201
205
  chemsys_subsystems = []
@@ -1,10 +1,10 @@
1
1
  from maggma.builders.map_builder import MapBuilder
2
2
  from maggma.stores import MongoStore
3
- from typing import Tuple
4
- from emmet.core.mobility.migrationgraph import MigrationGraphDoc
5
- from emmet.builders.utils import get_hop_cutoff
6
- from pymatgen.apps.battery.insertion_battery import InsertionElectrode
7
3
  from pymatgen.analysis.diffusion.neb.full_path_mapper import MigrationGraph
4
+ from pymatgen.apps.battery.insertion_battery import InsertionElectrode
5
+
6
+ from emmet.builders.utils import get_hop_cutoff
7
+ from emmet.core.mobility.migrationgraph import MigrationGraphDoc
8
8
  from emmet.core.utils import jsanitize
9
9
 
10
10
 
@@ -18,7 +18,7 @@ class MigrationGraphBuilder(MapBuilder):
18
18
  max_hop_distance: float = 7,
19
19
  populate_sc_fields: bool = True,
20
20
  min_length_sc: float = 8,
21
- minmax_num_atoms: Tuple[int, int] = (80, 120),
21
+ minmax_num_atoms: tuple[int, int] = (80, 120),
22
22
  ltol: float = 0.2,
23
23
  stol: float = 0.3,
24
24
  angle_tol: float = 5,
@@ -1,14 +1,13 @@
1
1
  """
2
2
  Settings for defaults in the build pipelines for the Materials Project
3
3
  """
4
- from typing import List
5
4
 
6
5
  from pydantic.fields import Field
7
6
 
8
7
  from emmet.core.provenance import Author, History
8
+ from emmet.core.qchem.calc_types import TaskType as QChemTaskType
9
9
  from emmet.core.settings import EmmetSettings
10
10
  from emmet.core.vasp.calc_types import TaskType as VaspTaskType
11
- from emmet.core.qchem.calc_types import TaskType as QChemTaskType
12
11
 
13
12
 
14
13
  class EmmetBuildSettings(EmmetSettings):
@@ -18,35 +17,40 @@ class EmmetBuildSettings(EmmetSettings):
18
17
  EMMET_CONFIG_FILE to point to the json with emmet settings
19
18
  """
20
19
 
21
- BUILD_TAGS: List[str] = Field(
20
+ BUILD_TAGS: list[str] = Field(
22
21
  [], description="Tags for calculations to build materials"
23
22
  )
24
- EXCLUDED_TAGS: List[str] = Field(
23
+ EXCLUDED_TAGS: list[str] = Field(
25
24
  [],
26
25
  description="Tags to exclude from materials",
27
26
  )
28
27
 
29
- DEPRECATED_TAGS: List[str] = Field(
28
+ DEPRECATED_TAGS: list[str] = Field(
30
29
  [], description="Tags for calculations to deprecate"
31
30
  )
32
31
 
33
- NON_COMMERCIAL_TAGS: List[str] = Field(
32
+ NON_COMMERCIAL_TAGS: list[str] = Field(
34
33
  [], description="Tages for which to add BY-NC as license data in builder_meta"
35
34
  )
36
35
 
37
- VASP_ALLOWED_VASP_TYPES: List[VaspTaskType] = Field(
38
- [t.value for t in VaspTaskType],
36
+ VASP_ALLOWED_VASP_TYPES: list[VaspTaskType] = Field(
37
+ list(VaspTaskType),
39
38
  description="Allowed task_types to build materials from",
40
39
  )
41
40
 
42
- QCHEM_ALLOWED_TASK_TYPES: List[QChemTaskType] = Field(
43
- [
44
- "Single Point",
45
- "Force",
46
- "Geometry Optimization",
47
- "Frequency Analysis",
48
- "Frequency Flattening Geometry Optimization",
49
- ],
41
+ QCHEM_ALLOWED_TASK_TYPES: list[QChemTaskType] = Field(
42
+ list(
43
+ map(
44
+ QChemTaskType,
45
+ [
46
+ "Single Point",
47
+ "Force",
48
+ "Geometry Optimization",
49
+ "Frequency Analysis",
50
+ "Frequency Flattening Geometry Optimization",
51
+ ],
52
+ )
53
+ ),
50
54
  description="Allowed task_types to build molecules from",
51
55
  )
52
56
 
@@ -71,7 +75,7 @@ class EmmetBuildSettings(EmmetSettings):
71
75
  )
72
76
 
73
77
  DEFAULT_HISTORY: History = Field(
74
- History(
78
+ History( # type: ignore[call-arg]
75
79
  name="Materials Project Optimized Structure",
76
80
  url="http://www.materialsproject.org",
77
81
  ),
emmet/builders/utils.py CHANGED
@@ -1,18 +1,30 @@
1
- from typing import Set, Union, Any
2
- import sys
1
+ from __future__ import annotations
2
+
3
+ import json
3
4
  import os
5
+ import sys
4
6
  from gzip import GzipFile
5
- import orjson
6
- import json
7
7
  from io import BytesIO
8
- from monty.serialization import MontyDecoder
9
- from botocore.exceptions import ClientError
10
8
  from itertools import chain, combinations
11
- from pymatgen.core import Structure
9
+
10
+ import orjson
11
+ from botocore.exceptions import ClientError
12
+ from monty.serialization import MontyDecoder
12
13
  from pymatgen.analysis.diffusion.neb.full_path_mapper import MigrationGraph
14
+ from pymatgen.core import Structure
15
+ from pymatgen.io.vasp.inputs import PotcarSingle
16
+
17
+ from emmet.core.types.typing import FSPathType
13
18
 
19
+ from emmet.builders.settings import EmmetBuildSettings
14
20
 
15
- def maximal_spanning_non_intersecting_subsets(sets) -> Set[Set]:
21
+ from typing import TYPE_CHECKING
22
+
23
+ if TYPE_CHECKING:
24
+ from typing import Any, Literal
25
+
26
+
27
+ def maximal_spanning_non_intersecting_subsets(sets) -> set[set[Any]]:
16
28
  """
17
29
  Finds the maximal spanning non intersecting subsets of a group of sets
18
30
  This is usefull for parsing out the sandboxes and figuring out how to group
@@ -40,7 +52,7 @@ def maximal_spanning_non_intersecting_subsets(sets) -> Set[Set]:
40
52
  return set(to_return_subsets)
41
53
 
42
54
 
43
- def chemsys_permutations(chemsys) -> Set:
55
+ def chemsys_permutations(chemsys) -> set[str]:
44
56
  # Function to get all relevant chemical subsystems
45
57
  # e.g. for Li-Mn-O returns Li, Li-Mn, Li-Mn-O, Li-O, Mn, Mn-O, O
46
58
  elements = chemsys.split("-")
@@ -58,7 +70,7 @@ def get_hop_cutoff(
58
70
  algorithm: str = "min_distance",
59
71
  min_hop_distance: float = 1,
60
72
  max_hop_distance: float = 7,
61
- ) -> Union[float, None]:
73
+ ) -> float | None:
62
74
  """
63
75
  A function to get an appropriate hop distance cutoff for a given migration
64
76
  graph structure which can be used for MigrationGraph.with_distance()
@@ -158,7 +170,7 @@ def query_open_data(
158
170
  key: str,
159
171
  monty_decode: bool = True,
160
172
  s3_resource: Any = None,
161
- ) -> Union[dict, None]:
173
+ ) -> dict | None:
162
174
  """Query a Materials Project AWS S3 Open Data bucket directly with boto3
163
175
 
164
176
  Args:
@@ -166,7 +178,7 @@ def query_open_data(
166
178
  prefix (str): Full set of file prefixes
167
179
  key (str): Key for file
168
180
  monty_decode (bool): Whether to monty decode or keep as dictionary. Defaults to True.
169
- s3_resource (Optional[Any]): S3 resource. One will be instantiated if none are provided
181
+ s3_resource (Any | None): S3 resource. One will be instantiated if none are provided
170
182
 
171
183
  Returns:
172
184
  dict: MontyDecoded data or None
@@ -211,3 +223,80 @@ class HiddenPrints:
211
223
  def __exit__(self, exc_type, exc_val, exc_tb):
212
224
  sys.stdout.close()
213
225
  sys.stdout = self._original_stdout
226
+
227
+
228
+ def get_potcar_stats(
229
+ method: Literal["potcar", "pymatgen", "stored"] = "potcar",
230
+ path_to_stored_stats: FSPathType | None = None,
231
+ ) -> dict[str, Any]:
232
+ """
233
+ Get the POTCAR stats used in MP calculations to validate POTCARs.
234
+
235
+ Args:
236
+ method : Literal[str : "potcar","pymatgen","stored"] = "potcar"
237
+ Method to generate the POTCAR stats:
238
+ - "potcar": regenerate stats from a user's POTCAR library.
239
+ - "pymatgen": regenerate stats from the stored pymatgen
240
+ summary stats dict. This has the downside of the possibility
241
+ of finding multiple matching POTCAR stats for older POTCAR
242
+ releases. As of 25 March, 2024, it does not appear that the
243
+ MP POTCARs have duplicates
244
+ - "stored": load a stored dict of POTCAR stats.
245
+ path_to_stored_stats : FSPathType or None
246
+ If FSPathType, the path to the stored summary stats file.
247
+ If None, defaults to
248
+ `importlib.resources.file("emmet.builders.vasp") / "mp_potcar_stats.json.gz"`
249
+ Returns:
250
+ dict, of POTCAR summary stats.
251
+ """
252
+ default_settings = EmmetBuildSettings()
253
+
254
+ stats: dict[str, dict] = {} # type: ignore
255
+
256
+ if method == "stored":
257
+ from monty.serialization import loadfn
258
+
259
+ if path_to_stored_stats is None:
260
+ from importlib.resources import files
261
+
262
+ path_to_stored_stats = str(
263
+ files("emmet.builders.vasp") / "mp_potcar_stats.json.gz"
264
+ )
265
+ return loadfn(path_to_stored_stats) # type: ignore
266
+
267
+ for (
268
+ calc_type,
269
+ input_set,
270
+ ) in default_settings.VASP_DEFAULT_INPUT_SETS.items():
271
+ _input = input_set()
272
+
273
+ stats[calc_type] = {}
274
+ functional = _input._config_dict["POTCAR_FUNCTIONAL"]
275
+
276
+ for potcar_symbol in _input.CONFIG["POTCAR"].values():
277
+ if method == "potcar":
278
+ potcar = PotcarSingle.from_symbol_and_functional(
279
+ symbol=potcar_symbol, functional=functional
280
+ )
281
+ summary_stats = potcar._summary_stats.copy()
282
+ # fallback method for validation - use header hash and symbol
283
+ # note that the potcar_spec assigns PotcarSingle.symbol to "titel"
284
+ # whereas the ***correct*** field is `header`
285
+ summary_stats["titel"] = potcar.header # type: ignore[assignment]
286
+ summary_stats["hash"] = potcar.md5_header_hash # type: ignore[assignment]
287
+ summary_stats = [summary_stats] # type: ignore[assignment]
288
+
289
+ elif method == "pymatgen":
290
+ summary_stats = [] # type: ignore[assignment]
291
+ for _, entries in PotcarSingle._potcar_summary_stats[
292
+ functional
293
+ ].items():
294
+ summary_stats += [ # type: ignore[operator]
295
+ {**entry, "titel": None, "hash": None}
296
+ for entry in entries
297
+ if entry["symbol"] == potcar_symbol
298
+ ]
299
+
300
+ stats[calc_type].update({potcar_symbol: summary_stats})
301
+
302
+ return stats