emmet-builders 0.78.3__py3-none-any.whl → 0.86.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- emmet/builders/abinit/phonon.py +47 -47
- emmet/builders/abinit/sound_velocity.py +15 -11
- emmet/builders/feff/xas.py +1 -2
- emmet/builders/materials/absorption_spectrum.py +25 -14
- emmet/builders/materials/alloys.py +10 -11
- emmet/builders/materials/chemenv.py +2 -3
- emmet/builders/materials/corrected_entries.py +21 -15
- emmet/builders/materials/dielectric.py +19 -11
- emmet/builders/materials/elasticity.py +44 -33
- emmet/builders/materials/electrodes.py +35 -28
- emmet/builders/materials/electronic_structure.py +17 -17
- emmet/builders/materials/magnetism.py +11 -4
- emmet/builders/materials/optimade.py +7 -3
- emmet/builders/materials/piezoelectric.py +24 -21
- emmet/builders/materials/provenance.py +16 -13
- emmet/builders/materials/robocrys.py +2 -3
- emmet/builders/materials/substrates.py +9 -8
- emmet/builders/materials/summary.py +3 -3
- emmet/builders/materials/thermo.py +17 -11
- emmet/builders/matscholar/missing_compositions.py +12 -8
- emmet/builders/mobility/migration_graph.py +5 -5
- emmet/builders/settings.py +21 -17
- emmet/builders/utils.py +101 -12
- emmet/builders/vasp/materials.py +40 -51
- emmet/builders/vasp/mp_potcar_stats.json.gz +0 -0
- emmet/builders/vasp/task_validator.py +25 -36
- emmet_builders-0.86.0.dist-info/METADATA +37 -0
- emmet_builders-0.86.0.dist-info/RECORD +41 -0
- {emmet_builders-0.78.3.dist-info → emmet_builders-0.86.0.dist-info}/WHEEL +1 -1
- emmet/builders/materials/ml.py +0 -87
- emmet/builders/molecules/atomic.py +0 -589
- emmet/builders/molecules/bonds.py +0 -324
- emmet/builders/molecules/metal_binding.py +0 -526
- emmet/builders/molecules/orbitals.py +0 -288
- emmet/builders/molecules/redox.py +0 -496
- emmet/builders/molecules/summary.py +0 -383
- emmet/builders/molecules/thermo.py +0 -500
- emmet/builders/molecules/vibration.py +0 -278
- emmet/builders/qchem/__init__.py +0 -0
- emmet/builders/qchem/molecules.py +0 -734
- emmet_builders-0.78.3.dist-info/METADATA +0 -47
- emmet_builders-0.78.3.dist-info/RECORD +0 -51
- /emmet/builders/{molecules/__init__.py → py.typed} +0 -0
- {emmet_builders-0.78.3.dist-info → emmet_builders-0.86.0.dist-info}/top_level.txt +0 -0
|
@@ -1,7 +1,5 @@
|
|
|
1
1
|
from collections import defaultdict
|
|
2
|
-
from typing import Dict, Iterable, List, Optional, Tuple
|
|
3
2
|
from math import ceil
|
|
4
|
-
from datetime import datetime
|
|
5
3
|
|
|
6
4
|
from maggma.core import Builder, Store
|
|
7
5
|
from maggma.utils import grouper
|
|
@@ -10,7 +8,12 @@ from pymatgen.core.structure import Structure
|
|
|
10
8
|
|
|
11
9
|
from emmet.builders.settings import EmmetBuildSettings
|
|
12
10
|
from emmet.core.provenance import ProvenanceDoc, SNLDict
|
|
13
|
-
from emmet.core.utils import get_sg, jsanitize
|
|
11
|
+
from emmet.core.utils import get_sg, jsanitize, utcnow
|
|
12
|
+
|
|
13
|
+
from typing import TYPE_CHECKING
|
|
14
|
+
|
|
15
|
+
if TYPE_CHECKING:
|
|
16
|
+
from collections.abc import Iterable
|
|
14
17
|
|
|
15
18
|
|
|
16
19
|
class ProvenanceBuilder(Builder):
|
|
@@ -18,9 +21,9 @@ class ProvenanceBuilder(Builder):
|
|
|
18
21
|
self,
|
|
19
22
|
materials: Store,
|
|
20
23
|
provenance: Store,
|
|
21
|
-
source_snls:
|
|
22
|
-
settings:
|
|
23
|
-
query:
|
|
24
|
+
source_snls: list[Store],
|
|
25
|
+
settings: EmmetBuildSettings | None = None,
|
|
26
|
+
query: dict | None = None,
|
|
24
27
|
**kwargs,
|
|
25
28
|
):
|
|
26
29
|
"""
|
|
@@ -59,7 +62,7 @@ class ProvenanceBuilder(Builder):
|
|
|
59
62
|
s.ensure_index("snl_id")
|
|
60
63
|
s.ensure_index("formula_pretty")
|
|
61
64
|
|
|
62
|
-
def prechunk(self, number_splits: int) -> Iterable[
|
|
65
|
+
def prechunk(self, number_splits: int) -> Iterable[dict]: # pragma: no cover
|
|
63
66
|
self.ensure_indicies()
|
|
64
67
|
|
|
65
68
|
# Find all formulas for materials that have been updated since this
|
|
@@ -101,7 +104,7 @@ class ProvenanceBuilder(Builder):
|
|
|
101
104
|
for chunk in grouper(mat_ids, N):
|
|
102
105
|
yield {"query": {"material_id": {"$in": chunk}}}
|
|
103
106
|
|
|
104
|
-
def get_items(self) ->
|
|
107
|
+
def get_items(self) -> tuple[list[dict], list[dict]]: # type: ignore
|
|
105
108
|
"""
|
|
106
109
|
Gets all materials to assocaite with SNLs
|
|
107
110
|
Returns:
|
|
@@ -168,7 +171,7 @@ class ProvenanceBuilder(Builder):
|
|
|
168
171
|
for snl in snls:
|
|
169
172
|
struc = Structure.from_dict(snl)
|
|
170
173
|
snl_sg = get_sg(struc)
|
|
171
|
-
struc.snl = SNLDict(**snl)
|
|
174
|
+
struc.snl = SNLDict(**snl) # type: ignore[attr-defined]
|
|
172
175
|
snl_groups[snl_sg].append(struc)
|
|
173
176
|
|
|
174
177
|
mat_sg = get_sg(Structure.from_dict(mat["structure"]))
|
|
@@ -178,7 +181,7 @@ class ProvenanceBuilder(Builder):
|
|
|
178
181
|
self.logger.debug(f"Found {len(snl_structs)} potential snls for {mat_id}")
|
|
179
182
|
yield mat, snl_structs
|
|
180
183
|
|
|
181
|
-
def process_item(self, item) ->
|
|
184
|
+
def process_item(self, item) -> dict:
|
|
182
185
|
"""
|
|
183
186
|
Matches SNLS and Materials
|
|
184
187
|
Args:
|
|
@@ -203,15 +206,15 @@ class ProvenanceBuilder(Builder):
|
|
|
203
206
|
deprecated=mat["deprecated"],
|
|
204
207
|
)
|
|
205
208
|
else:
|
|
206
|
-
doc = ProvenanceDoc(
|
|
209
|
+
doc = ProvenanceDoc( # type: ignore[call-arg]
|
|
207
210
|
material_id=mat["material_id"],
|
|
208
211
|
structure=Structure.from_dict(mat["structure"]),
|
|
209
212
|
deprecated=mat["deprecated"],
|
|
210
|
-
created_at=
|
|
213
|
+
created_at=utcnow(),
|
|
211
214
|
)
|
|
212
215
|
|
|
213
216
|
doc.authors.append(self.settings.DEFAULT_AUTHOR)
|
|
214
|
-
doc.history.append(self.settings.DEFAULT_HISTORY)
|
|
217
|
+
doc.history.append(self.settings.DEFAULT_HISTORY) # type: ignore[union-attr]
|
|
215
218
|
doc.references.append(self.settings.DEFAULT_REFERENCE)
|
|
216
219
|
|
|
217
220
|
snl_doc = jsanitize(doc.dict(exclude_none=False), allow_bson=True)
|
|
@@ -1,8 +1,7 @@
|
|
|
1
|
-
from typing import Dict, Optional
|
|
2
1
|
from maggma.builders.map_builder import MapBuilder
|
|
3
2
|
from maggma.core import Store
|
|
4
|
-
|
|
5
3
|
from pymatgen.core.structure import Structure
|
|
4
|
+
|
|
6
5
|
from emmet.core.robocrys import RobocrystallogapherDoc
|
|
7
6
|
from emmet.core.utils import jsanitize
|
|
8
7
|
|
|
@@ -12,7 +11,7 @@ class RobocrystallographerBuilder(MapBuilder):
|
|
|
12
11
|
self,
|
|
13
12
|
oxidation_states: Store,
|
|
14
13
|
robocrys: Store,
|
|
15
|
-
query:
|
|
14
|
+
query: dict | None = None,
|
|
16
15
|
**kwargs
|
|
17
16
|
):
|
|
18
17
|
self.oxidation_states = oxidation_states
|
|
@@ -1,14 +1,15 @@
|
|
|
1
|
-
from typing import
|
|
2
|
-
|
|
3
|
-
from maggma.core.store import Store
|
|
1
|
+
from typing import Iterable
|
|
2
|
+
|
|
4
3
|
from maggma.core.builder import Builder
|
|
5
|
-
from
|
|
4
|
+
from maggma.core.store import Store
|
|
5
|
+
from maggma.utils import grouper
|
|
6
6
|
from pymatgen.analysis.elasticity.elastic import ElasticTensor
|
|
7
|
+
from pymatgen.core.structure import Structure
|
|
7
8
|
from pymatgen.symmetry.analyzer import SpacegroupAnalyzer
|
|
8
9
|
|
|
10
|
+
from emmet.core.mpid import AlphaID
|
|
9
11
|
from emmet.core.substrates import SubstratesDoc
|
|
10
12
|
from emmet.core.utils import jsanitize
|
|
11
|
-
from maggma.utils import grouper
|
|
12
13
|
|
|
13
14
|
|
|
14
15
|
class SubstratesBuilder(Builder):
|
|
@@ -17,7 +18,7 @@ class SubstratesBuilder(Builder):
|
|
|
17
18
|
materials: Store,
|
|
18
19
|
substrates: Store,
|
|
19
20
|
elasticity: Store,
|
|
20
|
-
query:
|
|
21
|
+
query: dict | None = None,
|
|
21
22
|
**kwargs,
|
|
22
23
|
):
|
|
23
24
|
"""
|
|
@@ -47,7 +48,7 @@ class SubstratesBuilder(Builder):
|
|
|
47
48
|
**kwargs,
|
|
48
49
|
)
|
|
49
50
|
|
|
50
|
-
def prechunk(self, number_splits: int) -> Iterable[
|
|
51
|
+
def prechunk(self, number_splits: int) -> Iterable[dict]: # pragma: no cover
|
|
51
52
|
to_process_mat_ids = self._find_to_process()
|
|
52
53
|
|
|
53
54
|
return [
|
|
@@ -107,7 +108,7 @@ class SubstratesBuilder(Builder):
|
|
|
107
108
|
dict: a diffraction dict
|
|
108
109
|
"""
|
|
109
110
|
|
|
110
|
-
mpid =
|
|
111
|
+
mpid = AlphaID(item["material_id"])
|
|
111
112
|
elastic_tensor = item.get("elastic_tensor", None)
|
|
112
113
|
elastic_tensor = (
|
|
113
114
|
ElasticTensor.from_voigt(elastic_tensor) if elastic_tensor else None
|
|
@@ -3,10 +3,10 @@ from math import ceil
|
|
|
3
3
|
from maggma.builders import Builder
|
|
4
4
|
from maggma.utils import grouper
|
|
5
5
|
|
|
6
|
-
from emmet.core.mpid import
|
|
6
|
+
from emmet.core.mpid import AlphaID
|
|
7
7
|
from emmet.core.summary import SummaryDoc, HasProps
|
|
8
8
|
from emmet.core.utils import jsanitize
|
|
9
|
-
from emmet.core.
|
|
9
|
+
from emmet.core.types.enums import ThermoType
|
|
10
10
|
|
|
11
11
|
|
|
12
12
|
class SummaryBuilder(Builder):
|
|
@@ -214,7 +214,7 @@ class SummaryBuilder(Builder):
|
|
|
214
214
|
yield {"query": {self.materials.key: {"$in": list(split)}}}
|
|
215
215
|
|
|
216
216
|
def process_item(self, item):
|
|
217
|
-
material_id =
|
|
217
|
+
material_id = AlphaID(item[HasProps.materials.value]["material_id"])
|
|
218
218
|
doc = SummaryDoc.from_docs(material_id=material_id, **item)
|
|
219
219
|
return jsanitize(doc.model_dump(exclude_none=False), allow_bson=True)
|
|
220
220
|
|
|
@@ -1,8 +1,9 @@
|
|
|
1
|
-
from
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
2
3
|
import warnings
|
|
3
|
-
from itertools import chain
|
|
4
|
-
from typing import Dict, Iterator, List, Optional, Set
|
|
5
4
|
from datetime import datetime
|
|
5
|
+
from itertools import chain
|
|
6
|
+
from math import ceil
|
|
6
7
|
|
|
7
8
|
from maggma.core import Builder, Store
|
|
8
9
|
from maggma.stores import S3Store
|
|
@@ -12,18 +13,23 @@ from pymatgen.analysis.phase_diagram import PhaseDiagramError
|
|
|
12
13
|
from pymatgen.entries.computed_entries import ComputedStructureEntry
|
|
13
14
|
|
|
14
15
|
from emmet.builders.utils import HiddenPrints
|
|
15
|
-
from emmet.core.thermo import
|
|
16
|
+
from emmet.core.thermo import PhaseDiagramDoc, ThermoDoc
|
|
16
17
|
from emmet.core.utils import jsanitize
|
|
17
18
|
|
|
19
|
+
from typing import TYPE_CHECKING
|
|
20
|
+
|
|
21
|
+
if TYPE_CHECKING:
|
|
22
|
+
from collections.abc import Iterator
|
|
23
|
+
|
|
18
24
|
|
|
19
25
|
class ThermoBuilder(Builder):
|
|
20
26
|
def __init__(
|
|
21
27
|
self,
|
|
22
28
|
thermo: Store,
|
|
23
29
|
corrected_entries: Store,
|
|
24
|
-
phase_diagram:
|
|
25
|
-
query:
|
|
26
|
-
num_phase_diagram_eles:
|
|
30
|
+
phase_diagram: Store | None = None,
|
|
31
|
+
query: dict | None = None,
|
|
32
|
+
num_phase_diagram_eles: int | None = None,
|
|
27
33
|
chunk_size: int = 1000,
|
|
28
34
|
**kwargs,
|
|
29
35
|
):
|
|
@@ -49,7 +55,7 @@ class ThermoBuilder(Builder):
|
|
|
49
55
|
self.phase_diagram = phase_diagram
|
|
50
56
|
self.num_phase_diagram_eles = num_phase_diagram_eles
|
|
51
57
|
self.chunk_size = chunk_size
|
|
52
|
-
self._completed_tasks:
|
|
58
|
+
self._completed_tasks: set[str] = set()
|
|
53
59
|
|
|
54
60
|
if self.thermo.key != "thermo_id":
|
|
55
61
|
warnings.warn(
|
|
@@ -111,7 +117,7 @@ class ThermoBuilder(Builder):
|
|
|
111
117
|
coll.ensure_index("chemsys")
|
|
112
118
|
coll.ensure_index("phase_diagram_id")
|
|
113
119
|
|
|
114
|
-
def prechunk(self, number_splits: int) -> Iterator[
|
|
120
|
+
def prechunk(self, number_splits: int) -> Iterator[dict]: # pragma: no cover
|
|
115
121
|
to_process_chemsys = self._get_chemsys_to_process()
|
|
116
122
|
|
|
117
123
|
N = ceil(len(to_process_chemsys) / number_splits)
|
|
@@ -119,7 +125,7 @@ class ThermoBuilder(Builder):
|
|
|
119
125
|
for chemsys_chunk in grouper(to_process_chemsys, N):
|
|
120
126
|
yield {"query": {"chemsys": {"$in": list(chemsys_chunk)}}}
|
|
121
127
|
|
|
122
|
-
def get_items(self) -> Iterator[
|
|
128
|
+
def get_items(self) -> Iterator[list[dict]]:
|
|
123
129
|
"""
|
|
124
130
|
Gets whole chemical systems of entries to process
|
|
125
131
|
"""
|
|
@@ -224,7 +230,7 @@ class ThermoBuilder(Builder):
|
|
|
224
230
|
"""
|
|
225
231
|
Inserts the thermo and phase diagram docs into the thermo collection
|
|
226
232
|
Args:
|
|
227
|
-
items ([[tuple(
|
|
233
|
+
items ([[tuple(list[dict],list[dict])]]): a list of a list of thermo and phase diagram dict pairs to update
|
|
228
234
|
"""
|
|
229
235
|
|
|
230
236
|
thermo_docs = [pair[0] for pair_list in items for pair in pair_list]
|
|
@@ -1,13 +1,17 @@
|
|
|
1
|
-
from itertools import combinations
|
|
2
1
|
import itertools
|
|
2
|
+
from itertools import combinations
|
|
3
3
|
from math import ceil
|
|
4
|
-
from typing import Dict, List, Iterator, Optional
|
|
5
4
|
|
|
6
5
|
from maggma.core import Builder
|
|
7
|
-
from maggma.stores import
|
|
6
|
+
from maggma.stores import MongoStore, MongoURIStore, S3Store
|
|
8
7
|
from maggma.utils import grouper
|
|
9
8
|
from pymatgen.core import Composition, Element
|
|
10
9
|
|
|
10
|
+
from typing import TYPE_CHECKING
|
|
11
|
+
|
|
12
|
+
if TYPE_CHECKING:
|
|
13
|
+
from collections.abc import Iterator
|
|
14
|
+
|
|
11
15
|
|
|
12
16
|
class MissingCompositionsBuilder(Builder):
|
|
13
17
|
"""
|
|
@@ -21,7 +25,7 @@ class MissingCompositionsBuilder(Builder):
|
|
|
21
25
|
phase_diagram: S3Store,
|
|
22
26
|
mpcontribs: MongoURIStore,
|
|
23
27
|
missing_compositions: MongoStore,
|
|
24
|
-
query:
|
|
28
|
+
query: dict | None = None,
|
|
25
29
|
**kwargs,
|
|
26
30
|
):
|
|
27
31
|
"""
|
|
@@ -47,7 +51,7 @@ class MissingCompositionsBuilder(Builder):
|
|
|
47
51
|
**kwargs,
|
|
48
52
|
)
|
|
49
53
|
|
|
50
|
-
def prechunk(self, number_splits: int) -> Iterator[
|
|
54
|
+
def prechunk(self, number_splits: int) -> Iterator[dict]: # pragma: no cover
|
|
51
55
|
"""
|
|
52
56
|
Prechunk method to perform chunking by the key field
|
|
53
57
|
"""
|
|
@@ -61,7 +65,7 @@ class MissingCompositionsBuilder(Builder):
|
|
|
61
65
|
for split in grouper(keys, N):
|
|
62
66
|
yield {"query": {self.phase_diagram.key: {"$in": list(split)}}}
|
|
63
67
|
|
|
64
|
-
def get_items(self) -> Iterator[
|
|
68
|
+
def get_items(self) -> Iterator[dict]:
|
|
65
69
|
"""
|
|
66
70
|
Returns all chemical systems (combinations of elements)
|
|
67
71
|
to process.
|
|
@@ -112,7 +116,7 @@ class MissingCompositionsBuilder(Builder):
|
|
|
112
116
|
self.logger.error(f"Erro looking for phase diagram for {sys}: {ex}")
|
|
113
117
|
continue
|
|
114
118
|
|
|
115
|
-
def process_item(self, item:
|
|
119
|
+
def process_item(self, item: dict) -> dict:
|
|
116
120
|
"""
|
|
117
121
|
Processes a chemical system and finds missing c
|
|
118
122
|
ompositions for that system.
|
|
@@ -195,7 +199,7 @@ class MissingCompositionsBuilder(Builder):
|
|
|
195
199
|
else:
|
|
196
200
|
self.logger.info("No items to update")
|
|
197
201
|
|
|
198
|
-
def _get_entries_in_chemsys(self, chemsys) ->
|
|
202
|
+
def _get_entries_in_chemsys(self, chemsys) -> list:
|
|
199
203
|
"""Queries the MPContribs Store for entries in a chemical system."""
|
|
200
204
|
# get sub-systems
|
|
201
205
|
chemsys_subsystems = []
|
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
from maggma.builders.map_builder import MapBuilder
|
|
2
2
|
from maggma.stores import MongoStore
|
|
3
|
-
from typing import Tuple
|
|
4
|
-
from emmet.core.mobility.migrationgraph import MigrationGraphDoc
|
|
5
|
-
from emmet.builders.utils import get_hop_cutoff
|
|
6
|
-
from pymatgen.apps.battery.insertion_battery import InsertionElectrode
|
|
7
3
|
from pymatgen.analysis.diffusion.neb.full_path_mapper import MigrationGraph
|
|
4
|
+
from pymatgen.apps.battery.insertion_battery import InsertionElectrode
|
|
5
|
+
|
|
6
|
+
from emmet.builders.utils import get_hop_cutoff
|
|
7
|
+
from emmet.core.mobility.migrationgraph import MigrationGraphDoc
|
|
8
8
|
from emmet.core.utils import jsanitize
|
|
9
9
|
|
|
10
10
|
|
|
@@ -18,7 +18,7 @@ class MigrationGraphBuilder(MapBuilder):
|
|
|
18
18
|
max_hop_distance: float = 7,
|
|
19
19
|
populate_sc_fields: bool = True,
|
|
20
20
|
min_length_sc: float = 8,
|
|
21
|
-
minmax_num_atoms:
|
|
21
|
+
minmax_num_atoms: tuple[int, int] = (80, 120),
|
|
22
22
|
ltol: float = 0.2,
|
|
23
23
|
stol: float = 0.3,
|
|
24
24
|
angle_tol: float = 5,
|
emmet/builders/settings.py
CHANGED
|
@@ -1,14 +1,13 @@
|
|
|
1
1
|
"""
|
|
2
2
|
Settings for defaults in the build pipelines for the Materials Project
|
|
3
3
|
"""
|
|
4
|
-
from typing import List
|
|
5
4
|
|
|
6
5
|
from pydantic.fields import Field
|
|
7
6
|
|
|
8
7
|
from emmet.core.provenance import Author, History
|
|
8
|
+
from emmet.core.qchem.calc_types import TaskType as QChemTaskType
|
|
9
9
|
from emmet.core.settings import EmmetSettings
|
|
10
10
|
from emmet.core.vasp.calc_types import TaskType as VaspTaskType
|
|
11
|
-
from emmet.core.qchem.calc_types import TaskType as QChemTaskType
|
|
12
11
|
|
|
13
12
|
|
|
14
13
|
class EmmetBuildSettings(EmmetSettings):
|
|
@@ -18,35 +17,40 @@ class EmmetBuildSettings(EmmetSettings):
|
|
|
18
17
|
EMMET_CONFIG_FILE to point to the json with emmet settings
|
|
19
18
|
"""
|
|
20
19
|
|
|
21
|
-
BUILD_TAGS:
|
|
20
|
+
BUILD_TAGS: list[str] = Field(
|
|
22
21
|
[], description="Tags for calculations to build materials"
|
|
23
22
|
)
|
|
24
|
-
EXCLUDED_TAGS:
|
|
23
|
+
EXCLUDED_TAGS: list[str] = Field(
|
|
25
24
|
[],
|
|
26
25
|
description="Tags to exclude from materials",
|
|
27
26
|
)
|
|
28
27
|
|
|
29
|
-
DEPRECATED_TAGS:
|
|
28
|
+
DEPRECATED_TAGS: list[str] = Field(
|
|
30
29
|
[], description="Tags for calculations to deprecate"
|
|
31
30
|
)
|
|
32
31
|
|
|
33
|
-
NON_COMMERCIAL_TAGS:
|
|
32
|
+
NON_COMMERCIAL_TAGS: list[str] = Field(
|
|
34
33
|
[], description="Tages for which to add BY-NC as license data in builder_meta"
|
|
35
34
|
)
|
|
36
35
|
|
|
37
|
-
VASP_ALLOWED_VASP_TYPES:
|
|
38
|
-
|
|
36
|
+
VASP_ALLOWED_VASP_TYPES: list[VaspTaskType] = Field(
|
|
37
|
+
list(VaspTaskType),
|
|
39
38
|
description="Allowed task_types to build materials from",
|
|
40
39
|
)
|
|
41
40
|
|
|
42
|
-
QCHEM_ALLOWED_TASK_TYPES:
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
41
|
+
QCHEM_ALLOWED_TASK_TYPES: list[QChemTaskType] = Field(
|
|
42
|
+
list(
|
|
43
|
+
map(
|
|
44
|
+
QChemTaskType,
|
|
45
|
+
[
|
|
46
|
+
"Single Point",
|
|
47
|
+
"Force",
|
|
48
|
+
"Geometry Optimization",
|
|
49
|
+
"Frequency Analysis",
|
|
50
|
+
"Frequency Flattening Geometry Optimization",
|
|
51
|
+
],
|
|
52
|
+
)
|
|
53
|
+
),
|
|
50
54
|
description="Allowed task_types to build molecules from",
|
|
51
55
|
)
|
|
52
56
|
|
|
@@ -71,7 +75,7 @@ class EmmetBuildSettings(EmmetSettings):
|
|
|
71
75
|
)
|
|
72
76
|
|
|
73
77
|
DEFAULT_HISTORY: History = Field(
|
|
74
|
-
History(
|
|
78
|
+
History( # type: ignore[call-arg]
|
|
75
79
|
name="Materials Project Optimized Structure",
|
|
76
80
|
url="http://www.materialsproject.org",
|
|
77
81
|
),
|
emmet/builders/utils.py
CHANGED
|
@@ -1,18 +1,30 @@
|
|
|
1
|
-
from
|
|
2
|
-
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
3
4
|
import os
|
|
5
|
+
import sys
|
|
4
6
|
from gzip import GzipFile
|
|
5
|
-
import orjson
|
|
6
|
-
import json
|
|
7
7
|
from io import BytesIO
|
|
8
|
-
from monty.serialization import MontyDecoder
|
|
9
|
-
from botocore.exceptions import ClientError
|
|
10
8
|
from itertools import chain, combinations
|
|
11
|
-
|
|
9
|
+
|
|
10
|
+
import orjson
|
|
11
|
+
from botocore.exceptions import ClientError
|
|
12
|
+
from monty.serialization import MontyDecoder
|
|
12
13
|
from pymatgen.analysis.diffusion.neb.full_path_mapper import MigrationGraph
|
|
14
|
+
from pymatgen.core import Structure
|
|
15
|
+
from pymatgen.io.vasp.inputs import PotcarSingle
|
|
16
|
+
|
|
17
|
+
from emmet.core.types.typing import FSPathType
|
|
13
18
|
|
|
19
|
+
from emmet.builders.settings import EmmetBuildSettings
|
|
14
20
|
|
|
15
|
-
|
|
21
|
+
from typing import TYPE_CHECKING
|
|
22
|
+
|
|
23
|
+
if TYPE_CHECKING:
|
|
24
|
+
from typing import Any, Literal
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def maximal_spanning_non_intersecting_subsets(sets) -> set[set[Any]]:
|
|
16
28
|
"""
|
|
17
29
|
Finds the maximal spanning non intersecting subsets of a group of sets
|
|
18
30
|
This is usefull for parsing out the sandboxes and figuring out how to group
|
|
@@ -40,7 +52,7 @@ def maximal_spanning_non_intersecting_subsets(sets) -> Set[Set]:
|
|
|
40
52
|
return set(to_return_subsets)
|
|
41
53
|
|
|
42
54
|
|
|
43
|
-
def chemsys_permutations(chemsys) ->
|
|
55
|
+
def chemsys_permutations(chemsys) -> set[str]:
|
|
44
56
|
# Function to get all relevant chemical subsystems
|
|
45
57
|
# e.g. for Li-Mn-O returns Li, Li-Mn, Li-Mn-O, Li-O, Mn, Mn-O, O
|
|
46
58
|
elements = chemsys.split("-")
|
|
@@ -58,7 +70,7 @@ def get_hop_cutoff(
|
|
|
58
70
|
algorithm: str = "min_distance",
|
|
59
71
|
min_hop_distance: float = 1,
|
|
60
72
|
max_hop_distance: float = 7,
|
|
61
|
-
) ->
|
|
73
|
+
) -> float | None:
|
|
62
74
|
"""
|
|
63
75
|
A function to get an appropriate hop distance cutoff for a given migration
|
|
64
76
|
graph structure which can be used for MigrationGraph.with_distance()
|
|
@@ -158,7 +170,7 @@ def query_open_data(
|
|
|
158
170
|
key: str,
|
|
159
171
|
monty_decode: bool = True,
|
|
160
172
|
s3_resource: Any = None,
|
|
161
|
-
) ->
|
|
173
|
+
) -> dict | None:
|
|
162
174
|
"""Query a Materials Project AWS S3 Open Data bucket directly with boto3
|
|
163
175
|
|
|
164
176
|
Args:
|
|
@@ -166,7 +178,7 @@ def query_open_data(
|
|
|
166
178
|
prefix (str): Full set of file prefixes
|
|
167
179
|
key (str): Key for file
|
|
168
180
|
monty_decode (bool): Whether to monty decode or keep as dictionary. Defaults to True.
|
|
169
|
-
s3_resource (
|
|
181
|
+
s3_resource (Any | None): S3 resource. One will be instantiated if none are provided
|
|
170
182
|
|
|
171
183
|
Returns:
|
|
172
184
|
dict: MontyDecoded data or None
|
|
@@ -211,3 +223,80 @@ class HiddenPrints:
|
|
|
211
223
|
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
212
224
|
sys.stdout.close()
|
|
213
225
|
sys.stdout = self._original_stdout
|
|
226
|
+
|
|
227
|
+
|
|
228
|
+
def get_potcar_stats(
|
|
229
|
+
method: Literal["potcar", "pymatgen", "stored"] = "potcar",
|
|
230
|
+
path_to_stored_stats: FSPathType | None = None,
|
|
231
|
+
) -> dict[str, Any]:
|
|
232
|
+
"""
|
|
233
|
+
Get the POTCAR stats used in MP calculations to validate POTCARs.
|
|
234
|
+
|
|
235
|
+
Args:
|
|
236
|
+
method : Literal[str : "potcar","pymatgen","stored"] = "potcar"
|
|
237
|
+
Method to generate the POTCAR stats:
|
|
238
|
+
- "potcar": regenerate stats from a user's POTCAR library.
|
|
239
|
+
- "pymatgen": regenerate stats from the stored pymatgen
|
|
240
|
+
summary stats dict. This has the downside of the possibility
|
|
241
|
+
of finding multiple matching POTCAR stats for older POTCAR
|
|
242
|
+
releases. As of 25 March, 2024, it does not appear that the
|
|
243
|
+
MP POTCARs have duplicates
|
|
244
|
+
- "stored": load a stored dict of POTCAR stats.
|
|
245
|
+
path_to_stored_stats : FSPathType or None
|
|
246
|
+
If FSPathType, the path to the stored summary stats file.
|
|
247
|
+
If None, defaults to
|
|
248
|
+
`importlib.resources.file("emmet.builders.vasp") / "mp_potcar_stats.json.gz"`
|
|
249
|
+
Returns:
|
|
250
|
+
dict, of POTCAR summary stats.
|
|
251
|
+
"""
|
|
252
|
+
default_settings = EmmetBuildSettings()
|
|
253
|
+
|
|
254
|
+
stats: dict[str, dict] = {} # type: ignore
|
|
255
|
+
|
|
256
|
+
if method == "stored":
|
|
257
|
+
from monty.serialization import loadfn
|
|
258
|
+
|
|
259
|
+
if path_to_stored_stats is None:
|
|
260
|
+
from importlib.resources import files
|
|
261
|
+
|
|
262
|
+
path_to_stored_stats = str(
|
|
263
|
+
files("emmet.builders.vasp") / "mp_potcar_stats.json.gz"
|
|
264
|
+
)
|
|
265
|
+
return loadfn(path_to_stored_stats) # type: ignore
|
|
266
|
+
|
|
267
|
+
for (
|
|
268
|
+
calc_type,
|
|
269
|
+
input_set,
|
|
270
|
+
) in default_settings.VASP_DEFAULT_INPUT_SETS.items():
|
|
271
|
+
_input = input_set()
|
|
272
|
+
|
|
273
|
+
stats[calc_type] = {}
|
|
274
|
+
functional = _input._config_dict["POTCAR_FUNCTIONAL"]
|
|
275
|
+
|
|
276
|
+
for potcar_symbol in _input.CONFIG["POTCAR"].values():
|
|
277
|
+
if method == "potcar":
|
|
278
|
+
potcar = PotcarSingle.from_symbol_and_functional(
|
|
279
|
+
symbol=potcar_symbol, functional=functional
|
|
280
|
+
)
|
|
281
|
+
summary_stats = potcar._summary_stats.copy()
|
|
282
|
+
# fallback method for validation - use header hash and symbol
|
|
283
|
+
# note that the potcar_spec assigns PotcarSingle.symbol to "titel"
|
|
284
|
+
# whereas the ***correct*** field is `header`
|
|
285
|
+
summary_stats["titel"] = potcar.header # type: ignore[assignment]
|
|
286
|
+
summary_stats["hash"] = potcar.md5_header_hash # type: ignore[assignment]
|
|
287
|
+
summary_stats = [summary_stats] # type: ignore[assignment]
|
|
288
|
+
|
|
289
|
+
elif method == "pymatgen":
|
|
290
|
+
summary_stats = [] # type: ignore[assignment]
|
|
291
|
+
for _, entries in PotcarSingle._potcar_summary_stats[
|
|
292
|
+
functional
|
|
293
|
+
].items():
|
|
294
|
+
summary_stats += [ # type: ignore[operator]
|
|
295
|
+
{**entry, "titel": None, "hash": None}
|
|
296
|
+
for entry in entries
|
|
297
|
+
if entry["symbol"] == potcar_symbol
|
|
298
|
+
]
|
|
299
|
+
|
|
300
|
+
stats[calc_type].update({potcar_symbol: summary_stats})
|
|
301
|
+
|
|
302
|
+
return stats
|