emmet-builders 0.84.2__py3-none-any.whl → 0.86.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- emmet/builders/abinit/phonon.py +27 -25
- emmet/builders/abinit/sound_velocity.py +15 -11
- emmet/builders/feff/xas.py +1 -2
- emmet/builders/materials/absorption_spectrum.py +25 -14
- emmet/builders/materials/alloys.py +3 -4
- emmet/builders/materials/chemenv.py +2 -3
- emmet/builders/materials/corrected_entries.py +15 -9
- emmet/builders/materials/dielectric.py +19 -11
- emmet/builders/materials/elasticity.py +44 -33
- emmet/builders/materials/electrodes.py +24 -19
- emmet/builders/materials/electronic_structure.py +17 -17
- emmet/builders/materials/magnetism.py +11 -4
- emmet/builders/materials/optimade.py +7 -3
- emmet/builders/materials/piezoelectric.py +24 -21
- emmet/builders/materials/provenance.py +15 -12
- emmet/builders/materials/robocrys.py +2 -3
- emmet/builders/materials/substrates.py +9 -8
- emmet/builders/materials/summary.py +3 -3
- emmet/builders/materials/thermo.py +17 -11
- emmet/builders/matscholar/missing_compositions.py +12 -8
- emmet/builders/mobility/migration_graph.py +5 -5
- emmet/builders/settings.py +21 -17
- emmet/builders/utils.py +15 -10
- emmet/builders/vasp/materials.py +32 -16
- emmet/builders/vasp/task_validator.py +15 -11
- {emmet_builders-0.84.2.dist-info → emmet_builders-0.86.0.dist-info}/METADATA +21 -36
- emmet_builders-0.86.0.dist-info/RECORD +41 -0
- {emmet_builders-0.84.2.dist-info → emmet_builders-0.86.0.dist-info}/WHEEL +1 -1
- emmet/builders/materials/ml.py +0 -87
- emmet/builders/molecules/atomic.py +0 -589
- emmet/builders/molecules/bonds.py +0 -324
- emmet/builders/molecules/metal_binding.py +0 -526
- emmet/builders/molecules/orbitals.py +0 -288
- emmet/builders/molecules/redox.py +0 -496
- emmet/builders/molecules/summary.py +0 -383
- emmet/builders/molecules/thermo.py +0 -500
- emmet/builders/molecules/vibration.py +0 -278
- emmet/builders/qchem/__init__.py +0 -0
- emmet/builders/qchem/molecules.py +0 -734
- emmet_builders-0.84.2.dist-info/RECORD +0 -52
- /emmet/builders/{molecules/__init__.py → py.typed} +0 -0
- {emmet_builders-0.84.2.dist-info → emmet_builders-0.86.0.dist-info}/top_level.txt +0 -0
|
@@ -1,278 +0,0 @@
|
|
|
1
|
-
from collections import defaultdict
|
|
2
|
-
from datetime import datetime
|
|
3
|
-
from itertools import chain
|
|
4
|
-
from math import ceil
|
|
5
|
-
from typing import Optional, Iterable, Iterator, List, Dict
|
|
6
|
-
|
|
7
|
-
from maggma.builders import Builder
|
|
8
|
-
from maggma.core import Store
|
|
9
|
-
from maggma.utils import grouper
|
|
10
|
-
|
|
11
|
-
from emmet.core.qchem.task import TaskDocument
|
|
12
|
-
from emmet.core.qchem.molecule import MoleculeDoc, evaluate_lot
|
|
13
|
-
from emmet.core.molecules.vibration import VibrationDoc
|
|
14
|
-
from emmet.core.utils import jsanitize
|
|
15
|
-
from emmet.builders.settings import EmmetBuildSettings
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
__author__ = "Evan Spotte-Smith"
|
|
19
|
-
|
|
20
|
-
SETTINGS = EmmetBuildSettings()
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
class VibrationBuilder(Builder):
|
|
24
|
-
"""
|
|
25
|
-
The VibrationBuilder extracts the highest-quality vibrational data from a
|
|
26
|
-
MoleculeDoc (lowest electronic energy, highest level of theory for
|
|
27
|
-
each solvent available).
|
|
28
|
-
|
|
29
|
-
The process is as follows:
|
|
30
|
-
1. Gather MoleculeDocs by formula
|
|
31
|
-
2. For each doc, sort tasks by solvent
|
|
32
|
-
3. For each solvent, grab the best TaskDoc (doc with vibrational
|
|
33
|
-
information that has the highest level of theory with lowest
|
|
34
|
-
electronic energy for the molecule)
|
|
35
|
-
4. Convert TaskDoc to VibrationDoc
|
|
36
|
-
|
|
37
|
-
Note that if no tasks associated with a given MoleculeDoc have vibrational
|
|
38
|
-
data associated with them, then no VibrationDoc will be made for
|
|
39
|
-
that molecule.
|
|
40
|
-
"""
|
|
41
|
-
|
|
42
|
-
def __init__(
|
|
43
|
-
self,
|
|
44
|
-
tasks: Store,
|
|
45
|
-
molecules: Store,
|
|
46
|
-
vibes: Store,
|
|
47
|
-
query: Optional[Dict] = None,
|
|
48
|
-
settings: Optional[EmmetBuildSettings] = None,
|
|
49
|
-
**kwargs,
|
|
50
|
-
):
|
|
51
|
-
self.tasks = tasks
|
|
52
|
-
self.molecules = molecules
|
|
53
|
-
self.vibes = vibes
|
|
54
|
-
self.query = query if query else dict()
|
|
55
|
-
self.settings = EmmetBuildSettings.autoload(settings)
|
|
56
|
-
self.kwargs = kwargs
|
|
57
|
-
|
|
58
|
-
super().__init__(sources=[tasks, molecules], targets=[vibes], **kwargs)
|
|
59
|
-
# Uncomment in case of issue with mrun not connecting automatically to collections
|
|
60
|
-
# for i in [self.tasks, self.molecules, self.vibes]:
|
|
61
|
-
# try:
|
|
62
|
-
# i.connect()
|
|
63
|
-
# except Exception as e:
|
|
64
|
-
# print("Could not connect,", e)
|
|
65
|
-
|
|
66
|
-
def ensure_indexes(self):
|
|
67
|
-
"""
|
|
68
|
-
Ensures indices on the collections needed for building
|
|
69
|
-
"""
|
|
70
|
-
|
|
71
|
-
# Basic search index for tasks
|
|
72
|
-
self.tasks.ensure_index("task_id")
|
|
73
|
-
self.tasks.ensure_index("last_updated")
|
|
74
|
-
self.tasks.ensure_index("state")
|
|
75
|
-
self.tasks.ensure_index("formula_alphabetical")
|
|
76
|
-
|
|
77
|
-
# Search index for molecules
|
|
78
|
-
self.molecules.ensure_index("molecule_id")
|
|
79
|
-
self.molecules.ensure_index("last_updated")
|
|
80
|
-
self.molecules.ensure_index("task_ids")
|
|
81
|
-
self.molecules.ensure_index("formula_alphabetical")
|
|
82
|
-
|
|
83
|
-
# Search index for vibrational properties
|
|
84
|
-
self.vibes.ensure_index("molecule_id")
|
|
85
|
-
self.vibes.ensure_index("task_id")
|
|
86
|
-
self.vibes.ensure_index("solvent")
|
|
87
|
-
self.vibes.ensure_index("lot_solvent")
|
|
88
|
-
self.vibes.ensure_index("property_id")
|
|
89
|
-
self.vibes.ensure_index("last_updated")
|
|
90
|
-
self.vibes.ensure_index("formula_alphabetical")
|
|
91
|
-
|
|
92
|
-
def prechunk(self, number_splits: int) -> Iterable[Dict]: # pragma: no cover
|
|
93
|
-
"""Prechunk the builder for distributed computation"""
|
|
94
|
-
|
|
95
|
-
temp_query = dict(self.query)
|
|
96
|
-
temp_query["deprecated"] = False
|
|
97
|
-
|
|
98
|
-
self.logger.info("Finding documents to process")
|
|
99
|
-
all_mols = list(
|
|
100
|
-
self.molecules.query(
|
|
101
|
-
temp_query, [self.molecules.key, "formula_alphabetical"]
|
|
102
|
-
)
|
|
103
|
-
)
|
|
104
|
-
|
|
105
|
-
processed_docs = set([e for e in self.vibes.distinct("molecule_id")])
|
|
106
|
-
to_process_docs = {d[self.molecules.key] for d in all_mols} - processed_docs
|
|
107
|
-
to_process_forms = {
|
|
108
|
-
d["formula_alphabetical"]
|
|
109
|
-
for d in all_mols
|
|
110
|
-
if d[self.molecules.key] in to_process_docs
|
|
111
|
-
}
|
|
112
|
-
|
|
113
|
-
N = ceil(len(to_process_forms) / number_splits)
|
|
114
|
-
|
|
115
|
-
for formula_chunk in grouper(to_process_forms, N):
|
|
116
|
-
yield {"query": {"formula_alphabetical": {"$in": list(formula_chunk)}}}
|
|
117
|
-
|
|
118
|
-
def get_items(self) -> Iterator[List[Dict]]:
|
|
119
|
-
"""
|
|
120
|
-
Gets all items to process into vibration documents.
|
|
121
|
-
This does no datetime checking; relying on on whether
|
|
122
|
-
task_ids are included in the vibes Store
|
|
123
|
-
|
|
124
|
-
Returns:
|
|
125
|
-
generator or list relevant tasks and molecules to process into documents
|
|
126
|
-
"""
|
|
127
|
-
|
|
128
|
-
self.logger.info("Vibration builder started")
|
|
129
|
-
self.logger.info("Setting indexes")
|
|
130
|
-
self.ensure_indexes()
|
|
131
|
-
|
|
132
|
-
# Save timestamp to mark buildtime
|
|
133
|
-
self.timestamp = datetime.utcnow()
|
|
134
|
-
|
|
135
|
-
# Get all processed molecules
|
|
136
|
-
temp_query = dict(self.query)
|
|
137
|
-
temp_query["deprecated"] = False
|
|
138
|
-
|
|
139
|
-
self.logger.info("Finding documents to process")
|
|
140
|
-
all_mols = list(
|
|
141
|
-
self.molecules.query(
|
|
142
|
-
temp_query, [self.molecules.key, "formula_alphabetical"]
|
|
143
|
-
)
|
|
144
|
-
)
|
|
145
|
-
|
|
146
|
-
processed_docs = set([e for e in self.vibes.distinct("molecule_id")])
|
|
147
|
-
to_process_docs = {d[self.molecules.key] for d in all_mols} - processed_docs
|
|
148
|
-
to_process_forms = {
|
|
149
|
-
d["formula_alphabetical"]
|
|
150
|
-
for d in all_mols
|
|
151
|
-
if d[self.molecules.key] in to_process_docs
|
|
152
|
-
}
|
|
153
|
-
|
|
154
|
-
self.logger.info(f"Found {len(to_process_docs)} unprocessed documents")
|
|
155
|
-
self.logger.info(f"Found {len(to_process_forms)} unprocessed formulas")
|
|
156
|
-
|
|
157
|
-
# Set total for builder bars to have a total
|
|
158
|
-
self.total = len(to_process_forms)
|
|
159
|
-
|
|
160
|
-
for formula in to_process_forms:
|
|
161
|
-
mol_query = dict(temp_query)
|
|
162
|
-
mol_query["formula_alphabetical"] = formula
|
|
163
|
-
molecules = list(self.molecules.query(criteria=mol_query))
|
|
164
|
-
|
|
165
|
-
yield molecules
|
|
166
|
-
|
|
167
|
-
def process_item(self, items: List[Dict]) -> List[Dict]:
|
|
168
|
-
"""
|
|
169
|
-
Process the tasks into VibrationDocs
|
|
170
|
-
|
|
171
|
-
Args:
|
|
172
|
-
items List[Dict] : a list of MoleculeDocs in dict form
|
|
173
|
-
|
|
174
|
-
Returns:
|
|
175
|
-
[dict] : a list of new vibration docs
|
|
176
|
-
"""
|
|
177
|
-
|
|
178
|
-
mols = [MoleculeDoc(**item) for item in items]
|
|
179
|
-
formula = mols[0].formula_alphabetical
|
|
180
|
-
mol_ids = [m.molecule_id for m in mols]
|
|
181
|
-
self.logger.debug(f"Processing {formula} : {mol_ids}")
|
|
182
|
-
|
|
183
|
-
vibe_docs = list()
|
|
184
|
-
|
|
185
|
-
for mol in mols:
|
|
186
|
-
vibe_entries = [
|
|
187
|
-
e
|
|
188
|
-
for e in mol.entries
|
|
189
|
-
if e["charge"] == mol.charge
|
|
190
|
-
and e["spin_multiplicity"] == mol.spin_multiplicity
|
|
191
|
-
and e["output"].get("frequencies") is not None
|
|
192
|
-
]
|
|
193
|
-
|
|
194
|
-
# Organize by solvent environment
|
|
195
|
-
by_solvent = defaultdict(list)
|
|
196
|
-
for entry in vibe_entries:
|
|
197
|
-
by_solvent[entry["solvent"]].append(entry)
|
|
198
|
-
|
|
199
|
-
for solvent, entries in by_solvent.items():
|
|
200
|
-
# No documents with enthalpy and entropy
|
|
201
|
-
if len(entries) == 0:
|
|
202
|
-
continue
|
|
203
|
-
else:
|
|
204
|
-
best = sorted(
|
|
205
|
-
entries,
|
|
206
|
-
key=lambda x: (
|
|
207
|
-
sum(evaluate_lot(x["level_of_theory"])),
|
|
208
|
-
x["energy"],
|
|
209
|
-
),
|
|
210
|
-
)[0]
|
|
211
|
-
task = best["task_id"]
|
|
212
|
-
|
|
213
|
-
tdoc = self.tasks.query_one(
|
|
214
|
-
{
|
|
215
|
-
"task_id": task,
|
|
216
|
-
"formula_alphabetical": formula,
|
|
217
|
-
"orig": {"$exists": True},
|
|
218
|
-
}
|
|
219
|
-
)
|
|
220
|
-
|
|
221
|
-
if tdoc is None:
|
|
222
|
-
try:
|
|
223
|
-
tdoc = self.tasks.query_one(
|
|
224
|
-
{
|
|
225
|
-
"task_id": int(task),
|
|
226
|
-
"formula_alphabetical": formula,
|
|
227
|
-
"orig": {"$exists": True},
|
|
228
|
-
}
|
|
229
|
-
)
|
|
230
|
-
except ValueError:
|
|
231
|
-
tdoc = None
|
|
232
|
-
|
|
233
|
-
if tdoc is None:
|
|
234
|
-
continue
|
|
235
|
-
|
|
236
|
-
task_doc = TaskDocument(**tdoc)
|
|
237
|
-
|
|
238
|
-
if task_doc is None:
|
|
239
|
-
continue
|
|
240
|
-
|
|
241
|
-
vibe_doc = VibrationDoc.from_task(
|
|
242
|
-
task_doc, molecule_id=mol.molecule_id, deprecated=False
|
|
243
|
-
)
|
|
244
|
-
vibe_docs.append(vibe_doc)
|
|
245
|
-
|
|
246
|
-
self.logger.debug(f"Produced {len(vibe_docs)} vibration docs for {formula}")
|
|
247
|
-
|
|
248
|
-
return jsanitize([doc.model_dump() for doc in vibe_docs], allow_bson=True)
|
|
249
|
-
|
|
250
|
-
def update_targets(self, items: List[List[Dict]]):
|
|
251
|
-
"""
|
|
252
|
-
Inserts the new vibration docs into the vibes collection
|
|
253
|
-
|
|
254
|
-
Args:
|
|
255
|
-
items [[dict]]: A list of documents to update
|
|
256
|
-
"""
|
|
257
|
-
|
|
258
|
-
docs = list(chain.from_iterable(items)) # type: ignore
|
|
259
|
-
|
|
260
|
-
# Add timestamp
|
|
261
|
-
for item in docs:
|
|
262
|
-
item.update(
|
|
263
|
-
{
|
|
264
|
-
"_bt": self.timestamp,
|
|
265
|
-
}
|
|
266
|
-
)
|
|
267
|
-
|
|
268
|
-
molecule_ids = list({item["molecule_id"] for item in docs})
|
|
269
|
-
|
|
270
|
-
if len(items) > 0:
|
|
271
|
-
self.logger.info(f"Updating {len(docs)} vibration documents")
|
|
272
|
-
self.vibes.remove_docs({self.vibes.key: {"$in": molecule_ids}})
|
|
273
|
-
self.vibes.update(
|
|
274
|
-
docs=docs,
|
|
275
|
-
key=["molecule_id", "solvent"],
|
|
276
|
-
)
|
|
277
|
-
else:
|
|
278
|
-
self.logger.info("No items to update")
|
emmet/builders/qchem/__init__.py
DELETED
|
File without changes
|