emmet-builders 0.78.3__py3-none-any.whl → 0.86.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. emmet/builders/abinit/phonon.py +47 -47
  2. emmet/builders/abinit/sound_velocity.py +15 -11
  3. emmet/builders/feff/xas.py +1 -2
  4. emmet/builders/materials/absorption_spectrum.py +25 -14
  5. emmet/builders/materials/alloys.py +10 -11
  6. emmet/builders/materials/chemenv.py +2 -3
  7. emmet/builders/materials/corrected_entries.py +21 -15
  8. emmet/builders/materials/dielectric.py +19 -11
  9. emmet/builders/materials/elasticity.py +44 -33
  10. emmet/builders/materials/electrodes.py +35 -28
  11. emmet/builders/materials/electronic_structure.py +17 -17
  12. emmet/builders/materials/magnetism.py +11 -4
  13. emmet/builders/materials/optimade.py +7 -3
  14. emmet/builders/materials/piezoelectric.py +24 -21
  15. emmet/builders/materials/provenance.py +16 -13
  16. emmet/builders/materials/robocrys.py +2 -3
  17. emmet/builders/materials/substrates.py +9 -8
  18. emmet/builders/materials/summary.py +3 -3
  19. emmet/builders/materials/thermo.py +17 -11
  20. emmet/builders/matscholar/missing_compositions.py +12 -8
  21. emmet/builders/mobility/migration_graph.py +5 -5
  22. emmet/builders/settings.py +21 -17
  23. emmet/builders/utils.py +101 -12
  24. emmet/builders/vasp/materials.py +40 -51
  25. emmet/builders/vasp/mp_potcar_stats.json.gz +0 -0
  26. emmet/builders/vasp/task_validator.py +25 -36
  27. emmet_builders-0.86.0.dist-info/METADATA +37 -0
  28. emmet_builders-0.86.0.dist-info/RECORD +41 -0
  29. {emmet_builders-0.78.3.dist-info → emmet_builders-0.86.0.dist-info}/WHEEL +1 -1
  30. emmet/builders/materials/ml.py +0 -87
  31. emmet/builders/molecules/atomic.py +0 -589
  32. emmet/builders/molecules/bonds.py +0 -324
  33. emmet/builders/molecules/metal_binding.py +0 -526
  34. emmet/builders/molecules/orbitals.py +0 -288
  35. emmet/builders/molecules/redox.py +0 -496
  36. emmet/builders/molecules/summary.py +0 -383
  37. emmet/builders/molecules/thermo.py +0 -500
  38. emmet/builders/molecules/vibration.py +0 -278
  39. emmet/builders/qchem/__init__.py +0 -0
  40. emmet/builders/qchem/molecules.py +0 -734
  41. emmet_builders-0.78.3.dist-info/METADATA +0 -47
  42. emmet_builders-0.78.3.dist-info/RECORD +0 -51
  43. /emmet/builders/{molecules/__init__.py → py.typed} +0 -0
  44. {emmet_builders-0.78.3.dist-info → emmet_builders-0.86.0.dist-info}/top_level.txt +0 -0
@@ -1,288 +0,0 @@
1
- from collections import defaultdict
2
- from datetime import datetime
3
- from itertools import chain
4
- from math import ceil
5
- from typing import Optional, Iterable, Iterator, List, Dict
6
-
7
- from maggma.builders import Builder
8
- from maggma.core import Store
9
- from maggma.utils import grouper
10
-
11
- from emmet.core.qchem.task import TaskDocument
12
- from emmet.core.qchem.molecule import MoleculeDoc, evaluate_lot
13
- from emmet.core.molecules.orbitals import OrbitalDoc
14
- from emmet.core.utils import jsanitize
15
- from emmet.builders.settings import EmmetBuildSettings
16
-
17
-
18
- __author__ = "Evan Spotte-Smith"
19
-
20
- SETTINGS = EmmetBuildSettings()
21
-
22
-
23
- class OrbitalBuilder(Builder):
24
- """
25
- The OrbitalBuilder extracts the highest-quality natural bonding orbital data
26
- from a MoleculeDoc (lowest electronic energy, highest level of theory for
27
- each solvent available).
28
-
29
- The process is as follows:
30
- 1. Gather MoleculeDocs by formula
31
- 2. For each doc, sort tasks by solvent
32
- 3. For each solvent, grab the best TaskDoc (including NBO data using
33
- the highest level of theory with lowest electronic energy for the
34
- molecule)
35
- 4. Convert TaskDoc to OrbitalDoc
36
- """
37
-
38
- def __init__(
39
- self,
40
- tasks: Store,
41
- molecules: Store,
42
- orbitals: Store,
43
- query: Optional[Dict] = None,
44
- settings: Optional[EmmetBuildSettings] = None,
45
- **kwargs,
46
- ):
47
- self.tasks = tasks
48
- self.molecules = molecules
49
- self.orbitals = orbitals
50
- self.query = query if query else dict()
51
- self.settings = EmmetBuildSettings.autoload(settings)
52
- self.kwargs = kwargs
53
-
54
- super().__init__(sources=[tasks, molecules], targets=[orbitals], **kwargs)
55
- # Uncomment in case of issue with mrun not connecting automatically to collections
56
- # for i in [self.tasks, self.molecules, self.orbitals]:
57
- # try:
58
- # i.connect()
59
- # except Exception as e:
60
- # print("Could not connect,", e)
61
-
62
- def ensure_indexes(self):
63
- """
64
- Ensures indices on the collections needed for building
65
- """
66
-
67
- # Basic search index for tasks
68
- self.tasks.ensure_index("task_id")
69
- self.tasks.ensure_index("last_updated")
70
- self.tasks.ensure_index("state")
71
- self.tasks.ensure_index("formula_alphabetical")
72
-
73
- # Search index for molecules
74
- self.molecules.ensure_index("molecule_id")
75
- self.molecules.ensure_index("last_updated")
76
- self.molecules.ensure_index("task_ids")
77
- self.molecules.ensure_index("formula_alphabetical")
78
-
79
- # Search index for orbitals
80
- self.orbitals.ensure_index("molecule_id")
81
- self.orbitals.ensure_index("task_id")
82
- self.orbitals.ensure_index("solvent")
83
- self.orbitals.ensure_index("lot_solvent")
84
- self.orbitals.ensure_index("property_id")
85
- self.orbitals.ensure_index("last_updated")
86
- self.orbitals.ensure_index("formula_alphabetical")
87
-
88
- def prechunk(self, number_splits: int) -> Iterable[Dict]: # pragma: no cover
89
- """Prechunk the builder for distributed computation"""
90
-
91
- temp_query = dict(self.query)
92
- temp_query["deprecated"] = False
93
-
94
- self.logger.info("Finding documents to process")
95
- all_mols = list(
96
- self.molecules.query(
97
- temp_query, [self.molecules.key, "formula_alphabetical"]
98
- )
99
- )
100
-
101
- processed_docs = set([e for e in self.orbitals.distinct("molecule_id")])
102
- to_process_docs = {d[self.molecules.key] for d in all_mols} - processed_docs
103
- to_process_forms = {
104
- d["formula_alphabetical"]
105
- for d in all_mols
106
- if d[self.molecules.key] in to_process_docs
107
- }
108
-
109
- N = ceil(len(to_process_forms) / number_splits)
110
-
111
- for formula_chunk in grouper(to_process_forms, N):
112
- yield {"query": {"formula_alphabetical": {"$in": list(formula_chunk)}}}
113
-
114
- def get_items(self) -> Iterator[List[Dict]]:
115
- """
116
- Gets all items to process into orbital documents.
117
- This does no datetime checking; relying on on whether
118
- task_ids are included in the orbitals Store
119
-
120
- Returns:
121
- generator or list relevant tasks and molecules to process into documents
122
- """
123
-
124
- self.logger.info("Orbital builder started")
125
- self.logger.info("Setting indexes")
126
- self.ensure_indexes()
127
-
128
- # Save timestamp to mark buildtime
129
- self.timestamp = datetime.utcnow()
130
-
131
- # Get all processed molecules
132
- temp_query = dict(self.query)
133
- temp_query["deprecated"] = False
134
-
135
- self.logger.info("Finding documents to process")
136
- all_mols = list(
137
- self.molecules.query(
138
- temp_query, [self.molecules.key, "formula_alphabetical"]
139
- )
140
- )
141
-
142
- processed_docs = set([e for e in self.orbitals.distinct("molecule_id")])
143
- to_process_docs = {d[self.molecules.key] for d in all_mols} - processed_docs
144
- to_process_forms = {
145
- d["formula_alphabetical"]
146
- for d in all_mols
147
- if d[self.molecules.key] in to_process_docs
148
- }
149
-
150
- self.logger.info(f"Found {len(to_process_docs)} unprocessed documents")
151
- self.logger.info(f"Found {len(to_process_forms)} unprocessed formulas")
152
-
153
- # Set total for builder bars to have a total
154
- self.total = len(to_process_forms)
155
-
156
- for formula in to_process_forms:
157
- mol_query = dict(temp_query)
158
- mol_query["formula_alphabetical"] = formula
159
- molecules = list(self.molecules.query(criteria=mol_query))
160
-
161
- yield molecules
162
-
163
- def process_item(self, items: List[Dict]) -> List[Dict]:
164
- """
165
- Process the tasks into a OrbitalDocs
166
-
167
- Args:
168
- tasks List[Dict] : a list of MoleculeDocs in dict form
169
-
170
- Returns:
171
- [dict] : a list of new orbital docs
172
- """
173
-
174
- mols = [MoleculeDoc(**item) for item in items]
175
- formula = mols[0].formula_alphabetical
176
- mol_ids = [m.molecule_id for m in mols]
177
- self.logger.info(f"Processing {formula} : {mol_ids}")
178
-
179
- orbital_docs = list()
180
-
181
- for mol in mols:
182
- correct_charge_spin = [
183
- e
184
- for e in mol.entries
185
- if e["charge"] == mol.charge
186
- and e["spin_multiplicity"] == mol.spin_multiplicity
187
- ]
188
-
189
- # Must have NBO, and must specifically use NBO7
190
- orbital_entries = [
191
- e
192
- for e in correct_charge_spin
193
- if e["output"]["nbo"] is not None
194
- and (
195
- e["orig"]["rem"].get("run_nbo6", False)
196
- or e["orig"]["rem"].get("nbo_external", False)
197
- )
198
- ]
199
-
200
- # Organize by solvent environment
201
- by_solvent = defaultdict(list)
202
- for entry in orbital_entries:
203
- by_solvent[entry["solvent"]].append(entry)
204
-
205
- for solvent, entries in by_solvent.items():
206
- # No documents with NBO data; no documents to be made
207
- if len(entries) == 0:
208
- continue
209
- else:
210
- sorted_entries = sorted(
211
- entries,
212
- key=lambda x: (
213
- sum(evaluate_lot(x["level_of_theory"])),
214
- x["energy"],
215
- ),
216
- )
217
-
218
- for best in sorted_entries:
219
- task = best["task_id"]
220
-
221
- tdoc = self.tasks.query_one(
222
- {
223
- "task_id": task,
224
- "formula_alphabetical": formula,
225
- "orig": {"$exists": True},
226
- }
227
- )
228
-
229
- if tdoc is None:
230
- try:
231
- tdoc = self.tasks.query_one(
232
- {
233
- "task_id": int(task),
234
- "formula_alphabetical": formula,
235
- "orig": {"$exists": True},
236
- }
237
- )
238
- except ValueError:
239
- tdoc = None
240
-
241
- if tdoc is None:
242
- continue
243
-
244
- task_doc = TaskDocument(**tdoc)
245
-
246
- if task_doc is None:
247
- continue
248
-
249
- orbital_doc = OrbitalDoc.from_task(
250
- task_doc, molecule_id=mol.molecule_id, deprecated=False
251
- )
252
-
253
- if orbital_doc is not None:
254
- orbital_docs.append(orbital_doc)
255
-
256
- self.logger.debug(f"Produced {len(orbital_docs)} orbital docs for {formula}")
257
-
258
- return jsanitize([doc.model_dump() for doc in orbital_docs], allow_bson=True)
259
-
260
- def update_targets(self, items: List[List[Dict]]):
261
- """
262
- Inserts the new documents into the orbitals collection
263
-
264
- Args:
265
- items [[dict]]: A list of documents to update
266
- """
267
-
268
- docs = list(chain.from_iterable(items)) # type: ignore
269
-
270
- # Add timestamp
271
- for item in docs:
272
- item.update(
273
- {
274
- "_bt": self.timestamp,
275
- }
276
- )
277
-
278
- molecule_ids = list({item["molecule_id"] for item in docs})
279
-
280
- if len(items) > 0:
281
- self.logger.info(f"Updating {len(docs)} orbital documents")
282
- self.orbitals.remove_docs({self.orbitals.key: {"$in": molecule_ids}})
283
- self.orbitals.update(
284
- docs=docs,
285
- key=["molecule_id", "solvent"],
286
- )
287
- else:
288
- self.logger.info("No items to update")