emmet-builders 0.84.7rc4__py3-none-any.whl → 0.84.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of emmet-builders might be problematic. Click here for more details.
- emmet/builders/abinit/phonon.py +15 -11
- emmet/builders/abinit/sound_velocity.py +14 -10
- emmet/builders/feff/xas.py +1 -2
- emmet/builders/materials/absorption_spectrum.py +9 -4
- emmet/builders/materials/alloys.py +2 -3
- emmet/builders/materials/chemenv.py +2 -3
- emmet/builders/materials/corrected_entries.py +14 -8
- emmet/builders/materials/dielectric.py +9 -4
- emmet/builders/materials/elasticity.py +32 -25
- emmet/builders/materials/electrodes.py +23 -18
- emmet/builders/materials/electronic_structure.py +16 -16
- emmet/builders/materials/magnetism.py +9 -3
- emmet/builders/materials/ml.py +9 -11
- emmet/builders/materials/optimade.py +7 -3
- emmet/builders/materials/piezoelectric.py +1 -2
- emmet/builders/materials/provenance.py +11 -7
- emmet/builders/materials/robocrys.py +2 -3
- emmet/builders/materials/substrates.py +8 -7
- emmet/builders/materials/thermo.py +17 -11
- emmet/builders/matscholar/missing_compositions.py +12 -8
- emmet/builders/mobility/migration_graph.py +5 -5
- emmet/builders/molecules/atomic.py +27 -22
- emmet/builders/molecules/bonds.py +17 -12
- emmet/builders/molecules/electric.py +16 -11
- emmet/builders/molecules/metal_binding.py +19 -16
- emmet/builders/molecules/orbitals.py +15 -11
- emmet/builders/molecules/redox.py +27 -21
- emmet/builders/molecules/summary.py +21 -13
- emmet/builders/molecules/thermo.py +20 -15
- emmet/builders/molecules/trajectory.py +23 -18
- emmet/builders/molecules/vibration.py +15 -11
- emmet/builders/qchem/molecules.py +37 -32
- emmet/builders/settings.py +7 -8
- emmet/builders/utils.py +11 -7
- emmet/builders/vasp/materials.py +17 -11
- emmet/builders/vasp/task_validator.py +3 -5
- {emmet_builders-0.84.7rc4.dist-info → emmet_builders-0.84.9.dist-info}/METADATA +1 -1
- emmet_builders-0.84.9.dist-info/RECORD +54 -0
- emmet_builders-0.84.7rc4.dist-info/RECORD +0 -54
- {emmet_builders-0.84.7rc4.dist-info → emmet_builders-0.84.9.dist-info}/WHEEL +0 -0
- {emmet_builders-0.84.7rc4.dist-info → emmet_builders-0.84.9.dist-info}/top_level.txt +0 -0
|
@@ -1,17 +1,25 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
from datetime import datetime
|
|
2
4
|
from itertools import chain
|
|
3
5
|
from math import ceil
|
|
4
|
-
from typing import Any, Optional, Iterable, Iterator, List, Dict
|
|
5
|
-
|
|
6
|
-
# from monty.serialization import loadfn, dumpfn
|
|
7
6
|
|
|
8
7
|
from maggma.builders import Builder
|
|
9
8
|
from maggma.core import Store
|
|
10
9
|
from maggma.utils import grouper
|
|
11
10
|
|
|
11
|
+
from emmet.builders.settings import EmmetBuildSettings
|
|
12
12
|
from emmet.core.molecules.summary import MoleculeSummaryDoc
|
|
13
13
|
from emmet.core.utils import jsanitize
|
|
14
|
-
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
from typing import TYPE_CHECKING
|
|
17
|
+
|
|
18
|
+
if TYPE_CHECKING:
|
|
19
|
+
from collections.abc import Iterable, Iterator
|
|
20
|
+
from typing import Any
|
|
21
|
+
|
|
22
|
+
# from monty.serialization import loadfn, dumpfn
|
|
15
23
|
|
|
16
24
|
|
|
17
25
|
__author__ = "Evan Spotte-Smith"
|
|
@@ -43,8 +51,8 @@ class SummaryBuilder(Builder):
|
|
|
43
51
|
thermo: Store,
|
|
44
52
|
vibes: Store,
|
|
45
53
|
summary: Store,
|
|
46
|
-
query:
|
|
47
|
-
settings:
|
|
54
|
+
query: dict | None = None,
|
|
55
|
+
settings: EmmetBuildSettings | None = None,
|
|
48
56
|
**kwargs,
|
|
49
57
|
):
|
|
50
58
|
self.molecules = molecules
|
|
@@ -198,7 +206,7 @@ class SummaryBuilder(Builder):
|
|
|
198
206
|
self.summary.ensure_index("last_updated")
|
|
199
207
|
self.summary.ensure_index("formula_alphabetical")
|
|
200
208
|
|
|
201
|
-
def prechunk(self, number_splits: int) -> Iterable[
|
|
209
|
+
def prechunk(self, number_splits: int) -> Iterable[dict]: # pragma: no cover
|
|
202
210
|
"""Prechunk the builder for distributed computation"""
|
|
203
211
|
|
|
204
212
|
temp_query = dict(self.query)
|
|
@@ -224,7 +232,7 @@ class SummaryBuilder(Builder):
|
|
|
224
232
|
query["species_hash"] = {"$in": list(hash_chunk)}
|
|
225
233
|
yield {"query": query}
|
|
226
234
|
|
|
227
|
-
def get_items(self) -> Iterator[
|
|
235
|
+
def get_items(self) -> Iterator[list[dict]]:
|
|
228
236
|
"""
|
|
229
237
|
Gets all items to process into summary documents.
|
|
230
238
|
This does no datetime checking; relying on on whether
|
|
@@ -271,20 +279,20 @@ class SummaryBuilder(Builder):
|
|
|
271
279
|
|
|
272
280
|
yield molecules
|
|
273
281
|
|
|
274
|
-
def process_item(self, items:
|
|
282
|
+
def process_item(self, items: list[dict]) -> list[dict]:
|
|
275
283
|
"""
|
|
276
284
|
Process the tasks into a MoleculeSummaryDoc
|
|
277
285
|
|
|
278
286
|
Args:
|
|
279
|
-
tasks
|
|
287
|
+
tasks list[dict] : a list of MoleculeDocs in dict form
|
|
280
288
|
|
|
281
289
|
Returns:
|
|
282
290
|
[dict] : a list of new orbital docs
|
|
283
291
|
"""
|
|
284
292
|
|
|
285
|
-
def _group_docs(docs:
|
|
293
|
+
def _group_docs(docs: list[dict[str, Any]], by_method: bool = False):
|
|
286
294
|
"""Helper function to group docs by solvent"""
|
|
287
|
-
grouped:
|
|
295
|
+
grouped: dict[str, Any] = dict()
|
|
288
296
|
|
|
289
297
|
for doc in docs:
|
|
290
298
|
solvent = doc.get("solvent")
|
|
@@ -367,7 +375,7 @@ class SummaryBuilder(Builder):
|
|
|
367
375
|
|
|
368
376
|
return jsanitize([doc.model_dump() for doc in summary_docs], allow_bson=True)
|
|
369
377
|
|
|
370
|
-
def update_targets(self, items:
|
|
378
|
+
def update_targets(self, items: list[list[dict]]):
|
|
371
379
|
"""
|
|
372
380
|
Inserts the new documents into the summary collection
|
|
373
381
|
|
|
@@ -1,22 +1,27 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
from collections import defaultdict
|
|
2
4
|
from datetime import datetime
|
|
3
5
|
from itertools import chain
|
|
4
6
|
from math import ceil
|
|
5
|
-
from typing import Optional, Iterable, Iterator, List, Dict
|
|
6
|
-
|
|
7
|
-
from pymatgen.core.structure import Molecule
|
|
8
|
-
from pymatgen.analysis.molecule_matcher import MoleculeMatcher
|
|
9
7
|
|
|
10
8
|
from maggma.builders import Builder
|
|
11
9
|
from maggma.core import Store
|
|
12
10
|
from maggma.utils import grouper
|
|
11
|
+
from pymatgen.analysis.molecule_matcher import MoleculeMatcher
|
|
12
|
+
from pymatgen.core.structure import Molecule
|
|
13
13
|
|
|
14
|
-
from emmet.
|
|
15
|
-
from emmet.core.
|
|
16
|
-
from emmet.core.molecules.thermo import get_free_energy, MoleculeThermoDoc
|
|
14
|
+
from emmet.builders.settings import EmmetBuildSettings
|
|
15
|
+
from emmet.core.molecules.thermo import MoleculeThermoDoc, get_free_energy
|
|
17
16
|
from emmet.core.qchem.calc_types import TaskType
|
|
17
|
+
from emmet.core.qchem.molecule import MoleculeDoc, evaluate_lot
|
|
18
|
+
from emmet.core.qchem.task import TaskDocument
|
|
18
19
|
from emmet.core.utils import jsanitize
|
|
19
|
-
|
|
20
|
+
|
|
21
|
+
from typing import TYPE_CHECKING
|
|
22
|
+
|
|
23
|
+
if TYPE_CHECKING:
|
|
24
|
+
from collections.abc import Iterable, Iterator
|
|
20
25
|
|
|
21
26
|
|
|
22
27
|
__author__ = "Evan Spotte-Smith"
|
|
@@ -119,8 +124,8 @@ class ThermoBuilder(Builder):
|
|
|
119
124
|
tasks: Store,
|
|
120
125
|
molecules: Store,
|
|
121
126
|
thermo: Store,
|
|
122
|
-
query:
|
|
123
|
-
settings:
|
|
127
|
+
query: dict | None = None,
|
|
128
|
+
settings: EmmetBuildSettings | None = None,
|
|
124
129
|
**kwargs,
|
|
125
130
|
):
|
|
126
131
|
self.tasks = tasks
|
|
@@ -166,7 +171,7 @@ class ThermoBuilder(Builder):
|
|
|
166
171
|
self.thermo.ensure_index("last_updated")
|
|
167
172
|
self.thermo.ensure_index("formula_alphabetical")
|
|
168
173
|
|
|
169
|
-
def prechunk(self, number_splits: int) -> Iterable[
|
|
174
|
+
def prechunk(self, number_splits: int) -> Iterable[dict]: # pragma: no cover
|
|
170
175
|
"""Prechunk the builder for distributed computation"""
|
|
171
176
|
|
|
172
177
|
temp_query = dict(self.query)
|
|
@@ -192,7 +197,7 @@ class ThermoBuilder(Builder):
|
|
|
192
197
|
query["species_hash"] = {"$in": list(hash_chunk)}
|
|
193
198
|
yield {"query": query}
|
|
194
199
|
|
|
195
|
-
def get_items(self) -> Iterator[
|
|
200
|
+
def get_items(self) -> Iterator[list[dict]]:
|
|
196
201
|
"""
|
|
197
202
|
Gets all items to process into thermo documents.
|
|
198
203
|
This does no datetime checking; relying on on whether
|
|
@@ -239,12 +244,12 @@ class ThermoBuilder(Builder):
|
|
|
239
244
|
|
|
240
245
|
yield molecules
|
|
241
246
|
|
|
242
|
-
def process_item(self, items:
|
|
247
|
+
def process_item(self, items: list[dict]) -> list[dict]:
|
|
243
248
|
"""
|
|
244
249
|
Process the tasks into a MoleculeThermoDoc
|
|
245
250
|
|
|
246
251
|
Args:
|
|
247
|
-
items
|
|
252
|
+
items list[dict] : a list of MoleculeDocs in dict form
|
|
248
253
|
|
|
249
254
|
Returns:
|
|
250
255
|
[dict] : a list of new thermo docs
|
|
@@ -469,7 +474,7 @@ class ThermoBuilder(Builder):
|
|
|
469
474
|
|
|
470
475
|
return jsanitize([doc.model_dump() for doc in thermo_docs], allow_bson=True)
|
|
471
476
|
|
|
472
|
-
def update_targets(self, items:
|
|
477
|
+
def update_targets(self, items: list[list[dict]]):
|
|
473
478
|
"""
|
|
474
479
|
Inserts the new thermo docs into the thermo collection
|
|
475
480
|
|
|
@@ -1,19 +1,24 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
from collections import defaultdict
|
|
2
4
|
from datetime import datetime
|
|
3
5
|
from itertools import chain
|
|
4
6
|
from math import ceil
|
|
5
|
-
from typing import Optional, Iterable, Iterator, List, Dict
|
|
6
7
|
|
|
7
8
|
from maggma.builders import Builder
|
|
8
9
|
from maggma.core import Store
|
|
9
10
|
from maggma.utils import grouper
|
|
10
11
|
|
|
11
|
-
from emmet.
|
|
12
|
-
from emmet.core.qchem.molecule import MoleculeDoc, evaluate_lot
|
|
12
|
+
from emmet.builders.settings import EmmetBuildSettings
|
|
13
13
|
from emmet.core.molecules.trajectory import ForcesDoc, TrajectoryDoc
|
|
14
|
+
from emmet.core.qchem.molecule import MoleculeDoc, evaluate_lot
|
|
15
|
+
from emmet.core.qchem.task import TaskDocument
|
|
14
16
|
from emmet.core.utils import jsanitize
|
|
15
|
-
from emmet.builders.settings import EmmetBuildSettings
|
|
16
17
|
|
|
18
|
+
from typing import TYPE_CHECKING
|
|
19
|
+
|
|
20
|
+
if TYPE_CHECKING:
|
|
21
|
+
from collections.abc import Iterable, Iterator
|
|
17
22
|
|
|
18
23
|
__author__ = "Evan Spotte-Smith"
|
|
19
24
|
|
|
@@ -41,8 +46,8 @@ class ForcesBuilder(Builder):
|
|
|
41
46
|
tasks: Store,
|
|
42
47
|
molecules: Store,
|
|
43
48
|
forces: Store,
|
|
44
|
-
query:
|
|
45
|
-
settings:
|
|
49
|
+
query: dict | None = None,
|
|
50
|
+
settings: EmmetBuildSettings | None = None,
|
|
46
51
|
**kwargs,
|
|
47
52
|
):
|
|
48
53
|
self.tasks = tasks
|
|
@@ -88,7 +93,7 @@ class ForcesBuilder(Builder):
|
|
|
88
93
|
self.forces.ensure_index("last_updated")
|
|
89
94
|
self.forces.ensure_index("formula_alphabetical")
|
|
90
95
|
|
|
91
|
-
def prechunk(self, number_splits: int) -> Iterable[
|
|
96
|
+
def prechunk(self, number_splits: int) -> Iterable[dict]: # pragma: no cover
|
|
92
97
|
"""Prechunk the builder for distributed computation"""
|
|
93
98
|
|
|
94
99
|
temp_query = dict(self.query)
|
|
@@ -114,7 +119,7 @@ class ForcesBuilder(Builder):
|
|
|
114
119
|
query["species_hash"] = {"$in": list(hash_chunk)}
|
|
115
120
|
yield {"query": query}
|
|
116
121
|
|
|
117
|
-
def get_items(self) -> Iterator[
|
|
122
|
+
def get_items(self) -> Iterator[list[dict]]:
|
|
118
123
|
"""
|
|
119
124
|
Gets all items to process into force documents.
|
|
120
125
|
This does no datetime checking; relying on on whether
|
|
@@ -161,12 +166,12 @@ class ForcesBuilder(Builder):
|
|
|
161
166
|
|
|
162
167
|
yield molecules
|
|
163
168
|
|
|
164
|
-
def process_item(self, items:
|
|
169
|
+
def process_item(self, items: list[dict]) -> list[dict]:
|
|
165
170
|
"""
|
|
166
171
|
Process the tasks into ForcesDoc
|
|
167
172
|
|
|
168
173
|
Args:
|
|
169
|
-
items
|
|
174
|
+
items list[dict] : a list of MoleculeDocs in dict form
|
|
170
175
|
|
|
171
176
|
Returns:
|
|
172
177
|
[dict] : a list of new forces docs
|
|
@@ -242,7 +247,7 @@ class ForcesBuilder(Builder):
|
|
|
242
247
|
|
|
243
248
|
return jsanitize([doc.model_dump() for doc in force_docs], allow_bson=True)
|
|
244
249
|
|
|
245
|
-
def update_targets(self, items:
|
|
250
|
+
def update_targets(self, items: list[list[dict]]):
|
|
246
251
|
"""
|
|
247
252
|
Inserts the new force docs into the forces collection
|
|
248
253
|
|
|
@@ -294,8 +299,8 @@ class TrajectoryBuilder(Builder):
|
|
|
294
299
|
tasks: Store,
|
|
295
300
|
molecules: Store,
|
|
296
301
|
trajectories: Store,
|
|
297
|
-
query:
|
|
298
|
-
settings:
|
|
302
|
+
query: dict | None = None,
|
|
303
|
+
settings: EmmetBuildSettings | None = None,
|
|
299
304
|
**kwargs,
|
|
300
305
|
):
|
|
301
306
|
self.tasks = tasks
|
|
@@ -341,7 +346,7 @@ class TrajectoryBuilder(Builder):
|
|
|
341
346
|
self.trajectories.ensure_index("last_updated")
|
|
342
347
|
self.trajectories.ensure_index("formula_alphabetical")
|
|
343
348
|
|
|
344
|
-
def prechunk(self, number_splits: int) -> Iterable[
|
|
349
|
+
def prechunk(self, number_splits: int) -> Iterable[dict]: # pragma: no cover
|
|
345
350
|
"""Prechunk the builder for distributed computation"""
|
|
346
351
|
|
|
347
352
|
temp_query = dict(self.query)
|
|
@@ -365,7 +370,7 @@ class TrajectoryBuilder(Builder):
|
|
|
365
370
|
for hash_chunk in grouper(to_process_hashes, N):
|
|
366
371
|
yield {"query": {"species_hash": {"$in": list(hash_chunk)}}}
|
|
367
372
|
|
|
368
|
-
def get_items(self) -> Iterator[
|
|
373
|
+
def get_items(self) -> Iterator[list[dict]]:
|
|
369
374
|
"""
|
|
370
375
|
Gets all items to process into trajectory documents.
|
|
371
376
|
This does no datetime checking; relying on on whether
|
|
@@ -412,12 +417,12 @@ class TrajectoryBuilder(Builder):
|
|
|
412
417
|
|
|
413
418
|
yield molecules
|
|
414
419
|
|
|
415
|
-
def process_item(self, items:
|
|
420
|
+
def process_item(self, items: list[dict]) -> list[dict]:
|
|
416
421
|
"""
|
|
417
422
|
Process the tasks into TrajectoryDocs
|
|
418
423
|
|
|
419
424
|
Args:
|
|
420
|
-
items
|
|
425
|
+
items list[dict] : a list of MoleculeDocs in dict form
|
|
421
426
|
|
|
422
427
|
Returns:
|
|
423
428
|
[dict] : a list of new trajectory docs
|
|
@@ -492,7 +497,7 @@ class TrajectoryBuilder(Builder):
|
|
|
492
497
|
|
|
493
498
|
return jsanitize([doc.model_dump() for doc in trajectory_docs], allow_bson=True)
|
|
494
499
|
|
|
495
|
-
def update_targets(self, items:
|
|
500
|
+
def update_targets(self, items: list[list[dict]]):
|
|
496
501
|
"""
|
|
497
502
|
Inserts the new force docs into the trajectories collection
|
|
498
503
|
|
|
@@ -1,19 +1,23 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
from collections import defaultdict
|
|
2
4
|
from datetime import datetime
|
|
3
5
|
from itertools import chain
|
|
4
6
|
from math import ceil
|
|
5
|
-
from typing import
|
|
7
|
+
from typing import TYPE_CHECKING
|
|
6
8
|
|
|
7
9
|
from maggma.builders import Builder
|
|
8
10
|
from maggma.core import Store
|
|
9
11
|
from maggma.utils import grouper
|
|
10
12
|
|
|
11
|
-
from emmet.
|
|
12
|
-
from emmet.core.qchem.molecule import MoleculeDoc, evaluate_lot
|
|
13
|
+
from emmet.builders.settings import EmmetBuildSettings
|
|
13
14
|
from emmet.core.molecules.vibration import VibrationDoc
|
|
15
|
+
from emmet.core.qchem.molecule import MoleculeDoc, evaluate_lot
|
|
16
|
+
from emmet.core.qchem.task import TaskDocument
|
|
14
17
|
from emmet.core.utils import jsanitize
|
|
15
|
-
from emmet.builders.settings import EmmetBuildSettings
|
|
16
18
|
|
|
19
|
+
if TYPE_CHECKING:
|
|
20
|
+
from collections.abc import Iterable, Iterator
|
|
17
21
|
|
|
18
22
|
__author__ = "Evan Spotte-Smith"
|
|
19
23
|
|
|
@@ -44,8 +48,8 @@ class VibrationBuilder(Builder):
|
|
|
44
48
|
tasks: Store,
|
|
45
49
|
molecules: Store,
|
|
46
50
|
vibes: Store,
|
|
47
|
-
query:
|
|
48
|
-
settings:
|
|
51
|
+
query: dict | None = None,
|
|
52
|
+
settings: EmmetBuildSettings | None = None,
|
|
49
53
|
**kwargs,
|
|
50
54
|
):
|
|
51
55
|
self.tasks = tasks
|
|
@@ -91,7 +95,7 @@ class VibrationBuilder(Builder):
|
|
|
91
95
|
self.vibes.ensure_index("last_updated")
|
|
92
96
|
self.vibes.ensure_index("formula_alphabetical")
|
|
93
97
|
|
|
94
|
-
def prechunk(self, number_splits: int) -> Iterable[
|
|
98
|
+
def prechunk(self, number_splits: int) -> Iterable[dict]: # pragma: no cover
|
|
95
99
|
"""Prechunk the builder for distributed computation"""
|
|
96
100
|
|
|
97
101
|
temp_query = dict(self.query)
|
|
@@ -117,7 +121,7 @@ class VibrationBuilder(Builder):
|
|
|
117
121
|
query["species_hash"] = {"$in": list(hash_chunk)}
|
|
118
122
|
yield {"query": query}
|
|
119
123
|
|
|
120
|
-
def get_items(self) -> Iterator[
|
|
124
|
+
def get_items(self) -> Iterator[list[dict]]:
|
|
121
125
|
"""
|
|
122
126
|
Gets all items to process into vibration documents.
|
|
123
127
|
This does no datetime checking; relying on on whether
|
|
@@ -164,12 +168,12 @@ class VibrationBuilder(Builder):
|
|
|
164
168
|
|
|
165
169
|
yield molecules
|
|
166
170
|
|
|
167
|
-
def process_item(self, items:
|
|
171
|
+
def process_item(self, items: list[dict]) -> list[dict]:
|
|
168
172
|
"""
|
|
169
173
|
Process the tasks into VibrationDocs
|
|
170
174
|
|
|
171
175
|
Args:
|
|
172
|
-
items
|
|
176
|
+
items list[dict] : a list of MoleculeDocs in dict form
|
|
173
177
|
|
|
174
178
|
Returns:
|
|
175
179
|
[dict] : a list of new vibration docs
|
|
@@ -247,7 +251,7 @@ class VibrationBuilder(Builder):
|
|
|
247
251
|
|
|
248
252
|
return jsanitize([doc.model_dump() for doc in vibe_docs], allow_bson=True)
|
|
249
253
|
|
|
250
|
-
def update_targets(self, items:
|
|
254
|
+
def update_targets(self, items: list[list[dict]]):
|
|
251
255
|
"""
|
|
252
256
|
Inserts the new vibration docs into the vibes collection
|
|
253
257
|
|
|
@@ -1,25 +1,30 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
from datetime import datetime
|
|
2
4
|
from itertools import chain, groupby
|
|
3
5
|
from math import ceil
|
|
4
|
-
from typing import Any, Dict, Iterable, Iterator, List, Optional, Set, Union
|
|
5
6
|
|
|
6
7
|
import networkx as nx
|
|
7
|
-
|
|
8
8
|
from maggma.builders import Builder
|
|
9
9
|
from maggma.stores import Store
|
|
10
10
|
from maggma.utils import grouper
|
|
11
11
|
|
|
12
12
|
from emmet.builders.settings import EmmetBuildSettings
|
|
13
|
-
from emmet.core.
|
|
13
|
+
from emmet.core.qchem.calc_types import CalcType, LevelOfTheory, TaskType
|
|
14
14
|
from emmet.core.qchem.molecule import (
|
|
15
|
+
MoleculeDoc,
|
|
15
16
|
best_lot,
|
|
16
17
|
evaluate_lot,
|
|
17
18
|
evaluate_task_entry,
|
|
18
|
-
MoleculeDoc,
|
|
19
19
|
)
|
|
20
20
|
from emmet.core.qchem.task import TaskDocument
|
|
21
|
-
from emmet.core.
|
|
21
|
+
from emmet.core.utils import get_molecule_id, group_molecules, jsanitize, make_mol_graph
|
|
22
|
+
|
|
23
|
+
from typing import TYPE_CHECKING
|
|
22
24
|
|
|
25
|
+
if TYPE_CHECKING:
|
|
26
|
+
from collections.abc import Iterable, Iterator
|
|
27
|
+
from typing import Any
|
|
23
28
|
|
|
24
29
|
__author__ = "Evan Spotte-Smith <ewcspottesmith@lbl.gov>"
|
|
25
30
|
|
|
@@ -29,9 +34,9 @@ SETTINGS = EmmetBuildSettings()
|
|
|
29
34
|
|
|
30
35
|
def evaluate_molecule(
|
|
31
36
|
mol_doc: MoleculeDoc,
|
|
32
|
-
funct_scores:
|
|
33
|
-
basis_scores:
|
|
34
|
-
solvent_scores:
|
|
37
|
+
funct_scores: dict[str, int] = SETTINGS.QCHEM_FUNCTIONAL_QUALITY_SCORES,
|
|
38
|
+
basis_scores: dict[str, int] = SETTINGS.QCHEM_BASIS_QUALITY_SCORES,
|
|
39
|
+
solvent_scores: dict[str, int] = SETTINGS.QCHEM_SOLVENT_MODEL_QUALITY_SCORES,
|
|
35
40
|
):
|
|
36
41
|
"""
|
|
37
42
|
Helper function to order optimization calcs by
|
|
@@ -106,8 +111,8 @@ class MoleculesAssociationBuilder(Builder):
|
|
|
106
111
|
self,
|
|
107
112
|
tasks: Store,
|
|
108
113
|
assoc: Store,
|
|
109
|
-
query:
|
|
110
|
-
settings:
|
|
114
|
+
query: dict | None = None,
|
|
115
|
+
settings: EmmetBuildSettings | None = None,
|
|
111
116
|
**kwargs,
|
|
112
117
|
):
|
|
113
118
|
"""
|
|
@@ -146,7 +151,7 @@ class MoleculesAssociationBuilder(Builder):
|
|
|
146
151
|
self.assoc.ensure_index("task_ids")
|
|
147
152
|
self.assoc.ensure_index("formula_alphabetical")
|
|
148
153
|
|
|
149
|
-
def prechunk(self, number_splits: int) -> Iterable[
|
|
154
|
+
def prechunk(self, number_splits: int) -> Iterable[dict]: # pragma: no cover
|
|
150
155
|
"""Prechunk the molecule builder for distributed computation"""
|
|
151
156
|
|
|
152
157
|
temp_query = dict(self.query)
|
|
@@ -170,7 +175,7 @@ class MoleculesAssociationBuilder(Builder):
|
|
|
170
175
|
query["species_hash"] = {"$in": list(hash_chunk)}
|
|
171
176
|
yield {"query": query}
|
|
172
177
|
|
|
173
|
-
def get_items(self) -> Iterator[
|
|
178
|
+
def get_items(self) -> Iterator[list[TaskDocument]]:
|
|
174
179
|
"""
|
|
175
180
|
Gets all items to process into molecules (and other) documents.
|
|
176
181
|
This does no datetime checking; relying on on whether
|
|
@@ -252,7 +257,7 @@ class MoleculesAssociationBuilder(Builder):
|
|
|
252
257
|
|
|
253
258
|
yield to_yield
|
|
254
259
|
|
|
255
|
-
def process_item(self, tasks:
|
|
260
|
+
def process_item(self, tasks: list[TaskDocument]) -> list[dict]:
|
|
256
261
|
"""
|
|
257
262
|
Process the tasks into a MoleculeDoc
|
|
258
263
|
|
|
@@ -288,7 +293,7 @@ class MoleculesAssociationBuilder(Builder):
|
|
|
288
293
|
|
|
289
294
|
return jsanitize([mol.model_dump() for mol in molecules], allow_bson=True)
|
|
290
295
|
|
|
291
|
-
def update_targets(self, items:
|
|
296
|
+
def update_targets(self, items: list[list[dict]]):
|
|
292
297
|
"""
|
|
293
298
|
Inserts the new molecules into the molecules collection
|
|
294
299
|
|
|
@@ -314,8 +319,8 @@ class MoleculesAssociationBuilder(Builder):
|
|
|
314
319
|
self.logger.info("No items to update")
|
|
315
320
|
|
|
316
321
|
def filter_and_group_tasks(
|
|
317
|
-
self, tasks:
|
|
318
|
-
) -> Iterator[
|
|
322
|
+
self, tasks: list[TaskDocument]
|
|
323
|
+
) -> Iterator[list[TaskDocument]]:
|
|
319
324
|
"""
|
|
320
325
|
Groups tasks by identical structure
|
|
321
326
|
"""
|
|
@@ -363,8 +368,8 @@ class MoleculesBuilder(Builder):
|
|
|
363
368
|
self,
|
|
364
369
|
assoc: Store,
|
|
365
370
|
molecules: Store,
|
|
366
|
-
query:
|
|
367
|
-
settings:
|
|
371
|
+
query: dict | None = None,
|
|
372
|
+
settings: EmmetBuildSettings | None = None,
|
|
368
373
|
**kwargs,
|
|
369
374
|
):
|
|
370
375
|
"""
|
|
@@ -401,7 +406,7 @@ class MoleculesBuilder(Builder):
|
|
|
401
406
|
self.molecules.ensure_index("task_ids")
|
|
402
407
|
self.molecules.ensure_index("formula_alphabetical")
|
|
403
408
|
|
|
404
|
-
def prechunk(self, number_splits: int) -> Iterable[
|
|
409
|
+
def prechunk(self, number_splits: int) -> Iterable[dict]: # pragma: no cover
|
|
405
410
|
"""Prechunk the molecule builder for distributed computation"""
|
|
406
411
|
|
|
407
412
|
temp_query = dict(self.query)
|
|
@@ -450,7 +455,7 @@ class MoleculesBuilder(Builder):
|
|
|
450
455
|
query["species_hash"] = {"$in": list(hash_chunk)}
|
|
451
456
|
yield {"query": query}
|
|
452
457
|
|
|
453
|
-
def get_items(self) -> Iterator[
|
|
458
|
+
def get_items(self) -> Iterator[list[dict]]:
|
|
454
459
|
"""
|
|
455
460
|
Gets all items to process into molecules (and other) documents.
|
|
456
461
|
This does no datetime checking; relying on on whether
|
|
@@ -520,12 +525,12 @@ class MoleculesBuilder(Builder):
|
|
|
520
525
|
|
|
521
526
|
yield assoc
|
|
522
527
|
|
|
523
|
-
def process_item(self, items:
|
|
528
|
+
def process_item(self, items: list[dict]) -> list[dict]:
|
|
524
529
|
"""
|
|
525
530
|
Process the tasks into a MoleculeDoc
|
|
526
531
|
|
|
527
532
|
Args:
|
|
528
|
-
tasks
|
|
533
|
+
tasks list[dict] : a list of task docs
|
|
529
534
|
|
|
530
535
|
Returns:
|
|
531
536
|
[dict] : a list of new molecule docs
|
|
@@ -556,18 +561,18 @@ class MoleculesBuilder(Builder):
|
|
|
556
561
|
levels_of_theory = dict()
|
|
557
562
|
solvents = dict()
|
|
558
563
|
lot_solvents = dict()
|
|
559
|
-
unique_calc_types:
|
|
560
|
-
unique_task_types:
|
|
561
|
-
unique_levels_of_theory:
|
|
562
|
-
unique_solvents:
|
|
563
|
-
unique_lot_solvents:
|
|
564
|
+
unique_calc_types: set[str | CalcType] = set()
|
|
565
|
+
unique_task_types: set[str | TaskType] = set()
|
|
566
|
+
unique_levels_of_theory: set[str | LevelOfTheory] = set()
|
|
567
|
+
unique_solvents: set[str] = set()
|
|
568
|
+
unique_lot_solvents: set[str] = set()
|
|
564
569
|
origins = list()
|
|
565
570
|
entries = list()
|
|
566
|
-
best_entries:
|
|
571
|
+
best_entries: dict[str, Any] = dict()
|
|
567
572
|
constituent_molecules = list()
|
|
568
573
|
similar_molecules = list()
|
|
569
574
|
|
|
570
|
-
base_doc:
|
|
575
|
+
base_doc: MoleculeDoc | None = None
|
|
571
576
|
|
|
572
577
|
# Grab best doc for each solvent
|
|
573
578
|
# A doc is given a solvent based on how the molecule was optimized
|
|
@@ -658,7 +663,7 @@ class MoleculesBuilder(Builder):
|
|
|
658
663
|
[mol.model_dump() for mol in complete_mol_docs], allow_bson=True
|
|
659
664
|
)
|
|
660
665
|
|
|
661
|
-
def update_targets(self, items:
|
|
666
|
+
def update_targets(self, items: list[list[dict]]):
|
|
662
667
|
"""
|
|
663
668
|
Inserts the new molecules into the molecules collection
|
|
664
669
|
|
|
@@ -691,7 +696,7 @@ class MoleculesBuilder(Builder):
|
|
|
691
696
|
else:
|
|
692
697
|
self.logger.info("No items to update")
|
|
693
698
|
|
|
694
|
-
def group_mol_docs(self, assoc:
|
|
699
|
+
def group_mol_docs(self, assoc: list[MoleculeDoc]) -> Iterator[list[MoleculeDoc]]:
|
|
695
700
|
"""
|
|
696
701
|
Groups molecules by:
|
|
697
702
|
- highest level of theory
|
|
@@ -711,7 +716,7 @@ class MoleculesBuilder(Builder):
|
|
|
711
716
|
|
|
712
717
|
# Group by charge and spin
|
|
713
718
|
for c_s, group in groupby(sorted(assoc, key=charge_spin), key=charge_spin):
|
|
714
|
-
subgroups:
|
|
719
|
+
subgroups: list[dict[str, Any]] = list()
|
|
715
720
|
for mol_doc in group:
|
|
716
721
|
mol_graph = make_mol_graph(mol_doc.molecule)
|
|
717
722
|
mol_hash = mol_doc.species_hash
|
emmet/builders/settings.py
CHANGED
|
@@ -1,14 +1,13 @@
|
|
|
1
1
|
"""
|
|
2
2
|
Settings for defaults in the build pipelines for the Materials Project
|
|
3
3
|
"""
|
|
4
|
-
from typing import List
|
|
5
4
|
|
|
6
5
|
from pydantic.fields import Field
|
|
7
6
|
|
|
8
7
|
from emmet.core.provenance import Author, History
|
|
8
|
+
from emmet.core.qchem.calc_types import TaskType as QChemTaskType
|
|
9
9
|
from emmet.core.settings import EmmetSettings
|
|
10
10
|
from emmet.core.vasp.calc_types import TaskType as VaspTaskType
|
|
11
|
-
from emmet.core.qchem.calc_types import TaskType as QChemTaskType
|
|
12
11
|
|
|
13
12
|
|
|
14
13
|
class EmmetBuildSettings(EmmetSettings):
|
|
@@ -18,28 +17,28 @@ class EmmetBuildSettings(EmmetSettings):
|
|
|
18
17
|
EMMET_CONFIG_FILE to point to the json with emmet settings
|
|
19
18
|
"""
|
|
20
19
|
|
|
21
|
-
BUILD_TAGS:
|
|
20
|
+
BUILD_TAGS: list[str] = Field(
|
|
22
21
|
[], description="Tags for calculations to build materials"
|
|
23
22
|
)
|
|
24
|
-
EXCLUDED_TAGS:
|
|
23
|
+
EXCLUDED_TAGS: list[str] = Field(
|
|
25
24
|
[],
|
|
26
25
|
description="Tags to exclude from materials",
|
|
27
26
|
)
|
|
28
27
|
|
|
29
|
-
DEPRECATED_TAGS:
|
|
28
|
+
DEPRECATED_TAGS: list[str] = Field(
|
|
30
29
|
[], description="Tags for calculations to deprecate"
|
|
31
30
|
)
|
|
32
31
|
|
|
33
|
-
NON_COMMERCIAL_TAGS:
|
|
32
|
+
NON_COMMERCIAL_TAGS: list[str] = Field(
|
|
34
33
|
[], description="Tages for which to add BY-NC as license data in builder_meta"
|
|
35
34
|
)
|
|
36
35
|
|
|
37
|
-
VASP_ALLOWED_VASP_TYPES:
|
|
36
|
+
VASP_ALLOWED_VASP_TYPES: list[VaspTaskType] = Field(
|
|
38
37
|
[t.value for t in VaspTaskType],
|
|
39
38
|
description="Allowed task_types to build materials from",
|
|
40
39
|
)
|
|
41
40
|
|
|
42
|
-
QCHEM_ALLOWED_TASK_TYPES:
|
|
41
|
+
QCHEM_ALLOWED_TASK_TYPES: list[QChemTaskType] = Field(
|
|
43
42
|
[
|
|
44
43
|
"Single Point",
|
|
45
44
|
"Force",
|