emmet-builders 0.84.10rc2__py3-none-any.whl → 0.85.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of emmet-builders might be problematic. Click here for more details.

Files changed (36) hide show
  1. emmet/builders/abinit/phonon.py +12 -14
  2. emmet/builders/abinit/sound_velocity.py +1 -1
  3. emmet/builders/materials/absorption_spectrum.py +16 -10
  4. emmet/builders/materials/alloys.py +1 -1
  5. emmet/builders/materials/corrected_entries.py +1 -1
  6. emmet/builders/materials/dielectric.py +10 -7
  7. emmet/builders/materials/elasticity.py +12 -9
  8. emmet/builders/materials/electrodes.py +1 -1
  9. emmet/builders/materials/electronic_structure.py +1 -1
  10. emmet/builders/materials/magnetism.py +2 -1
  11. emmet/builders/materials/piezoelectric.py +23 -19
  12. emmet/builders/materials/provenance.py +3 -4
  13. emmet/builders/materials/summary.py +1 -1
  14. emmet/builders/settings.py +14 -9
  15. emmet/builders/utils.py +5 -4
  16. emmet/builders/vasp/materials.py +11 -4
  17. emmet/builders/vasp/task_validator.py +3 -1
  18. {emmet_builders-0.84.10rc2.dist-info → emmet_builders-0.85.0.dist-info}/METADATA +7 -30
  19. emmet_builders-0.85.0.dist-info/RECORD +41 -0
  20. emmet/builders/materials/ml.py +0 -101
  21. emmet/builders/molecules/atomic.py +0 -592
  22. emmet/builders/molecules/bonds.py +0 -329
  23. emmet/builders/molecules/electric.py +0 -287
  24. emmet/builders/molecules/metal_binding.py +0 -528
  25. emmet/builders/molecules/orbitals.py +0 -292
  26. emmet/builders/molecules/redox.py +0 -502
  27. emmet/builders/molecules/summary.py +0 -406
  28. emmet/builders/molecules/thermo.py +0 -505
  29. emmet/builders/molecules/trajectory.py +0 -530
  30. emmet/builders/molecules/vibration.py +0 -282
  31. emmet/builders/qchem/__init__.py +0 -0
  32. emmet/builders/qchem/molecules.py +0 -745
  33. emmet_builders-0.84.10rc2.dist-info/RECORD +0 -54
  34. /emmet/builders/{molecules/__init__.py → py.typed} +0 -0
  35. {emmet_builders-0.84.10rc2.dist-info → emmet_builders-0.85.0.dist-info}/WHEEL +0 -0
  36. {emmet_builders-0.84.10rc2.dist-info → emmet_builders-0.85.0.dist-info}/top_level.txt +0 -0
@@ -1,292 +0,0 @@
1
- from __future__ import annotations
2
-
3
- from collections import defaultdict
4
- from datetime import datetime
5
- from itertools import chain
6
- from math import ceil
7
- from typing import TYPE_CHECKING
8
-
9
- from maggma.builders import Builder
10
- from maggma.core import Store
11
- from maggma.utils import grouper
12
-
13
- from emmet.builders.settings import EmmetBuildSettings
14
- from emmet.core.molecules.orbitals import OrbitalDoc
15
- from emmet.core.qchem.molecule import MoleculeDoc, evaluate_lot
16
- from emmet.core.qchem.task import TaskDocument
17
- from emmet.core.utils import jsanitize
18
-
19
- if TYPE_CHECKING:
20
- from collections.abc import Iterable, Iterator
21
-
22
- __author__ = "Evan Spotte-Smith"
23
-
24
- SETTINGS = EmmetBuildSettings()
25
-
26
-
27
- class OrbitalBuilder(Builder):
28
- """
29
- The OrbitalBuilder extracts the highest-quality natural bonding orbital data
30
- from a MoleculeDoc (lowest electronic energy, highest level of theory for
31
- each solvent available).
32
-
33
- The process is as follows:
34
- 1. Gather MoleculeDocs by species hash
35
- 2. For each doc, sort tasks by solvent
36
- 3. For each solvent, grab the best TaskDoc (including NBO data using
37
- the highest level of theory with lowest electronic energy for the
38
- molecule)
39
- 4. Convert TaskDoc to OrbitalDoc
40
- """
41
-
42
- def __init__(
43
- self,
44
- tasks: Store,
45
- molecules: Store,
46
- orbitals: Store,
47
- query: dict | None = None,
48
- settings: EmmetBuildSettings | None = None,
49
- **kwargs,
50
- ):
51
- self.tasks = tasks
52
- self.molecules = molecules
53
- self.orbitals = orbitals
54
- self.query = query if query else dict()
55
- self.settings = EmmetBuildSettings.autoload(settings)
56
- self.kwargs = kwargs
57
-
58
- super().__init__(sources=[tasks, molecules], targets=[orbitals], **kwargs)
59
- # Uncomment in case of issue with mrun not connecting automatically to collections
60
- # for i in [self.tasks, self.molecules, self.orbitals]:
61
- # try:
62
- # i.connect()
63
- # except Exception as e:
64
- # print("Could not connect,", e)
65
-
66
- def ensure_indexes(self):
67
- """
68
- Ensures indices on the collections needed for building
69
- """
70
-
71
- # Basic search index for tasks
72
- self.tasks.ensure_index("task_id")
73
- self.tasks.ensure_index("last_updated")
74
- self.tasks.ensure_index("state")
75
- self.tasks.ensure_index("formula_alphabetical")
76
- self.tasks.ensure_index("species_hash")
77
-
78
- # Search index for molecules
79
- self.molecules.ensure_index("molecule_id")
80
- self.molecules.ensure_index("last_updated")
81
- self.molecules.ensure_index("task_ids")
82
- self.molecules.ensure_index("formula_alphabetical")
83
- self.molecules.ensure_index("species_hash")
84
-
85
- # Search index for orbitals
86
- self.orbitals.ensure_index("molecule_id")
87
- self.orbitals.ensure_index("task_id")
88
- self.orbitals.ensure_index("solvent")
89
- self.orbitals.ensure_index("lot_solvent")
90
- self.orbitals.ensure_index("property_id")
91
- self.orbitals.ensure_index("last_updated")
92
- self.orbitals.ensure_index("formula_alphabetical")
93
-
94
- def prechunk(self, number_splits: int) -> Iterable[dict]: # pragma: no cover
95
- """Prechunk the builder for distributed computation"""
96
-
97
- temp_query = dict(self.query)
98
- temp_query["deprecated"] = False
99
-
100
- self.logger.info("Finding documents to process")
101
- all_mols = list(
102
- self.molecules.query(temp_query, [self.molecules.key, "species_hash"])
103
- )
104
-
105
- processed_docs = set([e for e in self.orbitals.distinct("molecule_id")])
106
- to_process_docs = {d[self.molecules.key] for d in all_mols} - processed_docs
107
- to_process_hashes = {
108
- d["species_hash"]
109
- for d in all_mols
110
- if d[self.molecules.key] in to_process_docs
111
- }
112
-
113
- N = ceil(len(to_process_hashes) / number_splits)
114
-
115
- for hash_chunk in grouper(to_process_hashes, N):
116
- query = dict(temp_query)
117
- query["species_hash"] = {"$in": list(hash_chunk)}
118
- yield {"query": query}
119
-
120
- def get_items(self) -> Iterator[list[dict]]:
121
- """
122
- Gets all items to process into orbital documents.
123
- This does no datetime checking; relying on on whether
124
- task_ids are included in the orbitals Store
125
-
126
- Returns:
127
- generator or list relevant tasks and molecules to process into documents
128
- """
129
-
130
- self.logger.info("Orbital builder started")
131
- self.logger.info("Setting indexes")
132
- self.ensure_indexes()
133
-
134
- # Save timestamp to mark buildtime
135
- self.timestamp = datetime.utcnow()
136
-
137
- # Get all processed molecules
138
- temp_query = dict(self.query)
139
- temp_query["deprecated"] = False
140
-
141
- self.logger.info("Finding documents to process")
142
- all_mols = list(
143
- self.molecules.query(temp_query, [self.molecules.key, "species_hash"])
144
- )
145
-
146
- processed_docs = set([e for e in self.orbitals.distinct("molecule_id")])
147
- to_process_docs = {d[self.molecules.key] for d in all_mols} - processed_docs
148
- to_process_hashes = {
149
- d["species_hash"]
150
- for d in all_mols
151
- if d[self.molecules.key] in to_process_docs
152
- }
153
-
154
- self.logger.info(f"Found {len(to_process_docs)} unprocessed documents")
155
- self.logger.info(f"Found {len(to_process_hashes)} unprocessed hashes")
156
-
157
- # Set total for builder bars to have a total
158
- self.total = len(to_process_hashes)
159
-
160
- for shash in to_process_hashes:
161
- mol_query = dict(temp_query)
162
- mol_query["species_hash"] = shash
163
- molecules = list(self.molecules.query(criteria=mol_query))
164
-
165
- yield molecules
166
-
167
- def process_item(self, items: list[dict]) -> list[dict]:
168
- """
169
- Process the tasks into a OrbitalDocs
170
-
171
- Args:
172
- tasks list[dict] : a list of MoleculeDocs in dict form
173
-
174
- Returns:
175
- [dict] : a list of new orbital docs
176
- """
177
-
178
- mols = [MoleculeDoc(**item) for item in items]
179
- shash = mols[0].species_hash
180
- mol_ids = [m.molecule_id for m in mols]
181
- self.logger.info(f"Processing {shash} : {mol_ids}")
182
-
183
- orbital_docs = list()
184
-
185
- for mol in mols:
186
- correct_charge_spin = [
187
- e
188
- for e in mol.entries
189
- if e["charge"] == mol.charge
190
- and e["spin_multiplicity"] == mol.spin_multiplicity
191
- ]
192
-
193
- # Must have NBO, and must specifically use NBO7
194
- orbital_entries = [
195
- e
196
- for e in correct_charge_spin
197
- if e["output"]["nbo"] is not None
198
- and (
199
- e["orig"]["rem"].get("run_nbo6", False)
200
- or e["orig"]["rem"].get("nbo_external", False)
201
- )
202
- ]
203
-
204
- # Organize by solvent environment
205
- by_solvent = defaultdict(list)
206
- for entry in orbital_entries:
207
- by_solvent[entry["solvent"]].append(entry)
208
-
209
- for solvent, entries in by_solvent.items():
210
- # No documents with NBO data; no documents to be made
211
- if len(entries) == 0:
212
- continue
213
- else:
214
- sorted_entries = sorted(
215
- entries,
216
- key=lambda x: (
217
- sum(evaluate_lot(x["level_of_theory"])),
218
- x["energy"],
219
- ),
220
- )
221
-
222
- for best in sorted_entries:
223
- task = best["task_id"]
224
-
225
- tdoc = self.tasks.query_one(
226
- {
227
- "task_id": task,
228
- "species_hash": shash,
229
- "orig": {"$exists": True},
230
- }
231
- )
232
-
233
- if tdoc is None:
234
- try:
235
- tdoc = self.tasks.query_one(
236
- {
237
- "task_id": int(task),
238
- "species_hash": shash,
239
- "orig": {"$exists": True},
240
- }
241
- )
242
- except ValueError:
243
- tdoc = None
244
-
245
- if tdoc is None:
246
- continue
247
-
248
- task_doc = TaskDocument(**tdoc)
249
-
250
- if task_doc is None:
251
- continue
252
-
253
- orbital_doc = OrbitalDoc.from_task(
254
- task_doc, molecule_id=mol.molecule_id, deprecated=False
255
- )
256
-
257
- if orbital_doc is not None:
258
- orbital_docs.append(orbital_doc)
259
-
260
- self.logger.debug(f"Produced {len(orbital_docs)} orbital docs for {shash}")
261
-
262
- return jsanitize([doc.model_dump() for doc in orbital_docs], allow_bson=True)
263
-
264
- def update_targets(self, items: list[list[dict]]):
265
- """
266
- Inserts the new documents into the orbitals collection
267
-
268
- Args:
269
- items [[dict]]: A list of documents to update
270
- """
271
-
272
- docs = list(chain.from_iterable(items)) # type: ignore
273
-
274
- # Add timestamp
275
- for item in docs:
276
- item.update(
277
- {
278
- "_bt": self.timestamp,
279
- }
280
- )
281
-
282
- molecule_ids = list({item["molecule_id"] for item in docs})
283
-
284
- if len(items) > 0:
285
- self.logger.info(f"Updating {len(docs)} orbital documents")
286
- self.orbitals.remove_docs({self.orbitals.key: {"$in": molecule_ids}})
287
- self.orbitals.update(
288
- docs=docs,
289
- key=["molecule_id", "solvent"],
290
- )
291
- else:
292
- self.logger.info("No items to update")