emmet-builders 0.84.2__py3-none-any.whl → 0.86.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. emmet/builders/abinit/phonon.py +27 -25
  2. emmet/builders/abinit/sound_velocity.py +15 -11
  3. emmet/builders/feff/xas.py +1 -2
  4. emmet/builders/materials/absorption_spectrum.py +25 -14
  5. emmet/builders/materials/alloys.py +3 -4
  6. emmet/builders/materials/chemenv.py +2 -3
  7. emmet/builders/materials/corrected_entries.py +15 -9
  8. emmet/builders/materials/dielectric.py +19 -11
  9. emmet/builders/materials/elasticity.py +44 -33
  10. emmet/builders/materials/electrodes.py +24 -19
  11. emmet/builders/materials/electronic_structure.py +17 -17
  12. emmet/builders/materials/magnetism.py +11 -4
  13. emmet/builders/materials/optimade.py +7 -3
  14. emmet/builders/materials/piezoelectric.py +24 -21
  15. emmet/builders/materials/provenance.py +15 -12
  16. emmet/builders/materials/robocrys.py +2 -3
  17. emmet/builders/materials/substrates.py +9 -8
  18. emmet/builders/materials/summary.py +3 -3
  19. emmet/builders/materials/thermo.py +17 -11
  20. emmet/builders/matscholar/missing_compositions.py +12 -8
  21. emmet/builders/mobility/migration_graph.py +5 -5
  22. emmet/builders/settings.py +21 -17
  23. emmet/builders/utils.py +15 -10
  24. emmet/builders/vasp/materials.py +32 -16
  25. emmet/builders/vasp/task_validator.py +15 -11
  26. {emmet_builders-0.84.2.dist-info → emmet_builders-0.86.0.dist-info}/METADATA +21 -36
  27. emmet_builders-0.86.0.dist-info/RECORD +41 -0
  28. {emmet_builders-0.84.2.dist-info → emmet_builders-0.86.0.dist-info}/WHEEL +1 -1
  29. emmet/builders/materials/ml.py +0 -87
  30. emmet/builders/molecules/atomic.py +0 -589
  31. emmet/builders/molecules/bonds.py +0 -324
  32. emmet/builders/molecules/metal_binding.py +0 -526
  33. emmet/builders/molecules/orbitals.py +0 -288
  34. emmet/builders/molecules/redox.py +0 -496
  35. emmet/builders/molecules/summary.py +0 -383
  36. emmet/builders/molecules/thermo.py +0 -500
  37. emmet/builders/molecules/vibration.py +0 -278
  38. emmet/builders/qchem/__init__.py +0 -0
  39. emmet/builders/qchem/molecules.py +0 -734
  40. emmet_builders-0.84.2.dist-info/RECORD +0 -52
  41. /emmet/builders/{molecules/__init__.py → py.typed} +0 -0
  42. {emmet_builders-0.84.2.dist-info → emmet_builders-0.86.0.dist-info}/top_level.txt +0 -0
@@ -1,278 +0,0 @@
1
- from collections import defaultdict
2
- from datetime import datetime
3
- from itertools import chain
4
- from math import ceil
5
- from typing import Optional, Iterable, Iterator, List, Dict
6
-
7
- from maggma.builders import Builder
8
- from maggma.core import Store
9
- from maggma.utils import grouper
10
-
11
- from emmet.core.qchem.task import TaskDocument
12
- from emmet.core.qchem.molecule import MoleculeDoc, evaluate_lot
13
- from emmet.core.molecules.vibration import VibrationDoc
14
- from emmet.core.utils import jsanitize
15
- from emmet.builders.settings import EmmetBuildSettings
16
-
17
-
18
- __author__ = "Evan Spotte-Smith"
19
-
20
- SETTINGS = EmmetBuildSettings()
21
-
22
-
23
- class VibrationBuilder(Builder):
24
- """
25
- The VibrationBuilder extracts the highest-quality vibrational data from a
26
- MoleculeDoc (lowest electronic energy, highest level of theory for
27
- each solvent available).
28
-
29
- The process is as follows:
30
- 1. Gather MoleculeDocs by formula
31
- 2. For each doc, sort tasks by solvent
32
- 3. For each solvent, grab the best TaskDoc (doc with vibrational
33
- information that has the highest level of theory with lowest
34
- electronic energy for the molecule)
35
- 4. Convert TaskDoc to VibrationDoc
36
-
37
- Note that if no tasks associated with a given MoleculeDoc have vibrational
38
- data associated with them, then no VibrationDoc will be made for
39
- that molecule.
40
- """
41
-
42
- def __init__(
43
- self,
44
- tasks: Store,
45
- molecules: Store,
46
- vibes: Store,
47
- query: Optional[Dict] = None,
48
- settings: Optional[EmmetBuildSettings] = None,
49
- **kwargs,
50
- ):
51
- self.tasks = tasks
52
- self.molecules = molecules
53
- self.vibes = vibes
54
- self.query = query if query else dict()
55
- self.settings = EmmetBuildSettings.autoload(settings)
56
- self.kwargs = kwargs
57
-
58
- super().__init__(sources=[tasks, molecules], targets=[vibes], **kwargs)
59
- # Uncomment in case of issue with mrun not connecting automatically to collections
60
- # for i in [self.tasks, self.molecules, self.vibes]:
61
- # try:
62
- # i.connect()
63
- # except Exception as e:
64
- # print("Could not connect,", e)
65
-
66
- def ensure_indexes(self):
67
- """
68
- Ensures indices on the collections needed for building
69
- """
70
-
71
- # Basic search index for tasks
72
- self.tasks.ensure_index("task_id")
73
- self.tasks.ensure_index("last_updated")
74
- self.tasks.ensure_index("state")
75
- self.tasks.ensure_index("formula_alphabetical")
76
-
77
- # Search index for molecules
78
- self.molecules.ensure_index("molecule_id")
79
- self.molecules.ensure_index("last_updated")
80
- self.molecules.ensure_index("task_ids")
81
- self.molecules.ensure_index("formula_alphabetical")
82
-
83
- # Search index for vibrational properties
84
- self.vibes.ensure_index("molecule_id")
85
- self.vibes.ensure_index("task_id")
86
- self.vibes.ensure_index("solvent")
87
- self.vibes.ensure_index("lot_solvent")
88
- self.vibes.ensure_index("property_id")
89
- self.vibes.ensure_index("last_updated")
90
- self.vibes.ensure_index("formula_alphabetical")
91
-
92
- def prechunk(self, number_splits: int) -> Iterable[Dict]: # pragma: no cover
93
- """Prechunk the builder for distributed computation"""
94
-
95
- temp_query = dict(self.query)
96
- temp_query["deprecated"] = False
97
-
98
- self.logger.info("Finding documents to process")
99
- all_mols = list(
100
- self.molecules.query(
101
- temp_query, [self.molecules.key, "formula_alphabetical"]
102
- )
103
- )
104
-
105
- processed_docs = set([e for e in self.vibes.distinct("molecule_id")])
106
- to_process_docs = {d[self.molecules.key] for d in all_mols} - processed_docs
107
- to_process_forms = {
108
- d["formula_alphabetical"]
109
- for d in all_mols
110
- if d[self.molecules.key] in to_process_docs
111
- }
112
-
113
- N = ceil(len(to_process_forms) / number_splits)
114
-
115
- for formula_chunk in grouper(to_process_forms, N):
116
- yield {"query": {"formula_alphabetical": {"$in": list(formula_chunk)}}}
117
-
118
- def get_items(self) -> Iterator[List[Dict]]:
119
- """
120
- Gets all items to process into vibration documents.
121
- This does no datetime checking; relying on on whether
122
- task_ids are included in the vibes Store
123
-
124
- Returns:
125
- generator or list relevant tasks and molecules to process into documents
126
- """
127
-
128
- self.logger.info("Vibration builder started")
129
- self.logger.info("Setting indexes")
130
- self.ensure_indexes()
131
-
132
- # Save timestamp to mark buildtime
133
- self.timestamp = datetime.utcnow()
134
-
135
- # Get all processed molecules
136
- temp_query = dict(self.query)
137
- temp_query["deprecated"] = False
138
-
139
- self.logger.info("Finding documents to process")
140
- all_mols = list(
141
- self.molecules.query(
142
- temp_query, [self.molecules.key, "formula_alphabetical"]
143
- )
144
- )
145
-
146
- processed_docs = set([e for e in self.vibes.distinct("molecule_id")])
147
- to_process_docs = {d[self.molecules.key] for d in all_mols} - processed_docs
148
- to_process_forms = {
149
- d["formula_alphabetical"]
150
- for d in all_mols
151
- if d[self.molecules.key] in to_process_docs
152
- }
153
-
154
- self.logger.info(f"Found {len(to_process_docs)} unprocessed documents")
155
- self.logger.info(f"Found {len(to_process_forms)} unprocessed formulas")
156
-
157
- # Set total for builder bars to have a total
158
- self.total = len(to_process_forms)
159
-
160
- for formula in to_process_forms:
161
- mol_query = dict(temp_query)
162
- mol_query["formula_alphabetical"] = formula
163
- molecules = list(self.molecules.query(criteria=mol_query))
164
-
165
- yield molecules
166
-
167
- def process_item(self, items: List[Dict]) -> List[Dict]:
168
- """
169
- Process the tasks into VibrationDocs
170
-
171
- Args:
172
- items List[Dict] : a list of MoleculeDocs in dict form
173
-
174
- Returns:
175
- [dict] : a list of new vibration docs
176
- """
177
-
178
- mols = [MoleculeDoc(**item) for item in items]
179
- formula = mols[0].formula_alphabetical
180
- mol_ids = [m.molecule_id for m in mols]
181
- self.logger.debug(f"Processing {formula} : {mol_ids}")
182
-
183
- vibe_docs = list()
184
-
185
- for mol in mols:
186
- vibe_entries = [
187
- e
188
- for e in mol.entries
189
- if e["charge"] == mol.charge
190
- and e["spin_multiplicity"] == mol.spin_multiplicity
191
- and e["output"].get("frequencies") is not None
192
- ]
193
-
194
- # Organize by solvent environment
195
- by_solvent = defaultdict(list)
196
- for entry in vibe_entries:
197
- by_solvent[entry["solvent"]].append(entry)
198
-
199
- for solvent, entries in by_solvent.items():
200
- # No documents with enthalpy and entropy
201
- if len(entries) == 0:
202
- continue
203
- else:
204
- best = sorted(
205
- entries,
206
- key=lambda x: (
207
- sum(evaluate_lot(x["level_of_theory"])),
208
- x["energy"],
209
- ),
210
- )[0]
211
- task = best["task_id"]
212
-
213
- tdoc = self.tasks.query_one(
214
- {
215
- "task_id": task,
216
- "formula_alphabetical": formula,
217
- "orig": {"$exists": True},
218
- }
219
- )
220
-
221
- if tdoc is None:
222
- try:
223
- tdoc = self.tasks.query_one(
224
- {
225
- "task_id": int(task),
226
- "formula_alphabetical": formula,
227
- "orig": {"$exists": True},
228
- }
229
- )
230
- except ValueError:
231
- tdoc = None
232
-
233
- if tdoc is None:
234
- continue
235
-
236
- task_doc = TaskDocument(**tdoc)
237
-
238
- if task_doc is None:
239
- continue
240
-
241
- vibe_doc = VibrationDoc.from_task(
242
- task_doc, molecule_id=mol.molecule_id, deprecated=False
243
- )
244
- vibe_docs.append(vibe_doc)
245
-
246
- self.logger.debug(f"Produced {len(vibe_docs)} vibration docs for {formula}")
247
-
248
- return jsanitize([doc.model_dump() for doc in vibe_docs], allow_bson=True)
249
-
250
- def update_targets(self, items: List[List[Dict]]):
251
- """
252
- Inserts the new vibration docs into the vibes collection
253
-
254
- Args:
255
- items [[dict]]: A list of documents to update
256
- """
257
-
258
- docs = list(chain.from_iterable(items)) # type: ignore
259
-
260
- # Add timestamp
261
- for item in docs:
262
- item.update(
263
- {
264
- "_bt": self.timestamp,
265
- }
266
- )
267
-
268
- molecule_ids = list({item["molecule_id"] for item in docs})
269
-
270
- if len(items) > 0:
271
- self.logger.info(f"Updating {len(docs)} vibration documents")
272
- self.vibes.remove_docs({self.vibes.key: {"$in": molecule_ids}})
273
- self.vibes.update(
274
- docs=docs,
275
- key=["molecule_id", "solvent"],
276
- )
277
- else:
278
- self.logger.info("No items to update")
File without changes