emmet-builders 0.84.10rc2__py3-none-any.whl → 0.85.0rc0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of emmet-builders might be problematic. Click here for more details.

Files changed (33) hide show
  1. emmet/builders/abinit/phonon.py +12 -14
  2. emmet/builders/abinit/sound_velocity.py +1 -1
  3. emmet/builders/materials/absorption_spectrum.py +16 -10
  4. emmet/builders/materials/dielectric.py +10 -7
  5. emmet/builders/materials/elasticity.py +12 -9
  6. emmet/builders/materials/electrodes.py +1 -1
  7. emmet/builders/materials/electronic_structure.py +1 -1
  8. emmet/builders/materials/magnetism.py +2 -1
  9. emmet/builders/materials/piezoelectric.py +23 -19
  10. emmet/builders/materials/provenance.py +3 -4
  11. emmet/builders/settings.py +14 -9
  12. emmet/builders/utils.py +5 -4
  13. emmet/builders/vasp/materials.py +11 -4
  14. emmet/builders/vasp/task_validator.py +3 -1
  15. {emmet_builders-0.84.10rc2.dist-info → emmet_builders-0.85.0rc0.dist-info}/METADATA +7 -30
  16. emmet_builders-0.85.0rc0.dist-info/RECORD +41 -0
  17. emmet/builders/materials/ml.py +0 -101
  18. emmet/builders/molecules/atomic.py +0 -592
  19. emmet/builders/molecules/bonds.py +0 -329
  20. emmet/builders/molecules/electric.py +0 -287
  21. emmet/builders/molecules/metal_binding.py +0 -528
  22. emmet/builders/molecules/orbitals.py +0 -292
  23. emmet/builders/molecules/redox.py +0 -502
  24. emmet/builders/molecules/summary.py +0 -406
  25. emmet/builders/molecules/thermo.py +0 -505
  26. emmet/builders/molecules/trajectory.py +0 -530
  27. emmet/builders/molecules/vibration.py +0 -282
  28. emmet/builders/qchem/__init__.py +0 -0
  29. emmet/builders/qchem/molecules.py +0 -745
  30. emmet_builders-0.84.10rc2.dist-info/RECORD +0 -54
  31. /emmet/builders/{molecules/__init__.py → py.typed} +0 -0
  32. {emmet_builders-0.84.10rc2.dist-info → emmet_builders-0.85.0rc0.dist-info}/WHEEL +0 -0
  33. {emmet_builders-0.84.10rc2.dist-info → emmet_builders-0.85.0rc0.dist-info}/top_level.txt +0 -0
@@ -1,282 +0,0 @@
1
- from __future__ import annotations
2
-
3
- from collections import defaultdict
4
- from datetime import datetime
5
- from itertools import chain
6
- from math import ceil
7
- from typing import TYPE_CHECKING
8
-
9
- from maggma.builders import Builder
10
- from maggma.core import Store
11
- from maggma.utils import grouper
12
-
13
- from emmet.builders.settings import EmmetBuildSettings
14
- from emmet.core.molecules.vibration import VibrationDoc
15
- from emmet.core.qchem.molecule import MoleculeDoc, evaluate_lot
16
- from emmet.core.qchem.task import TaskDocument
17
- from emmet.core.utils import jsanitize
18
-
19
- if TYPE_CHECKING:
20
- from collections.abc import Iterable, Iterator
21
-
22
- __author__ = "Evan Spotte-Smith"
23
-
24
- SETTINGS = EmmetBuildSettings()
25
-
26
-
27
- class VibrationBuilder(Builder):
28
- """
29
- The VibrationBuilder extracts the highest-quality vibrational data from a
30
- MoleculeDoc (lowest electronic energy, highest level of theory for
31
- each solvent available).
32
-
33
- The process is as follows:
34
- 1. Gather MoleculeDocs by species hash
35
- 2. For each doc, sort tasks by solvent
36
- 3. For each solvent, grab the best TaskDoc (doc with vibrational
37
- information that has the highest level of theory with lowest
38
- electronic energy for the molecule)
39
- 4. Convert TaskDoc to VibrationDoc
40
-
41
- Note that if no tasks associated with a given MoleculeDoc have vibrational
42
- data associated with them, then no VibrationDoc will be made for
43
- that molecule.
44
- """
45
-
46
- def __init__(
47
- self,
48
- tasks: Store,
49
- molecules: Store,
50
- vibes: Store,
51
- query: dict | None = None,
52
- settings: EmmetBuildSettings | None = None,
53
- **kwargs,
54
- ):
55
- self.tasks = tasks
56
- self.molecules = molecules
57
- self.vibes = vibes
58
- self.query = query if query else dict()
59
- self.settings = EmmetBuildSettings.autoload(settings)
60
- self.kwargs = kwargs
61
-
62
- super().__init__(sources=[tasks, molecules], targets=[vibes], **kwargs)
63
- # Uncomment in case of issue with mrun not connecting automatically to collections
64
- # for i in [self.tasks, self.molecules, self.vibes]:
65
- # try:
66
- # i.connect()
67
- # except Exception as e:
68
- # print("Could not connect,", e)
69
-
70
- def ensure_indexes(self):
71
- """
72
- Ensures indices on the collections needed for building
73
- """
74
-
75
- # Basic search index for tasks
76
- self.tasks.ensure_index("task_id")
77
- self.tasks.ensure_index("last_updated")
78
- self.tasks.ensure_index("state")
79
- self.tasks.ensure_index("formula_alphabetical")
80
- self.tasks.ensure_index("species_hash")
81
-
82
- # Search index for molecules
83
- self.molecules.ensure_index("molecule_id")
84
- self.molecules.ensure_index("last_updated")
85
- self.molecules.ensure_index("task_ids")
86
- self.molecules.ensure_index("formula_alphabetical")
87
- self.molecules.ensure_index("species_hash")
88
-
89
- # Search index for vibrational properties
90
- self.vibes.ensure_index("molecule_id")
91
- self.vibes.ensure_index("task_id")
92
- self.vibes.ensure_index("solvent")
93
- self.vibes.ensure_index("lot_solvent")
94
- self.vibes.ensure_index("property_id")
95
- self.vibes.ensure_index("last_updated")
96
- self.vibes.ensure_index("formula_alphabetical")
97
-
98
- def prechunk(self, number_splits: int) -> Iterable[dict]: # pragma: no cover
99
- """Prechunk the builder for distributed computation"""
100
-
101
- temp_query = dict(self.query)
102
- temp_query["deprecated"] = False
103
-
104
- self.logger.info("Finding documents to process")
105
- all_mols = list(
106
- self.molecules.query(temp_query, [self.molecules.key, "species_hash"])
107
- )
108
-
109
- processed_docs = set([e for e in self.vibes.distinct("molecule_id")])
110
- to_process_docs = {d[self.molecules.key] for d in all_mols} - processed_docs
111
- to_process_hashes = {
112
- d["species_hash"]
113
- for d in all_mols
114
- if d[self.molecules.key] in to_process_docs
115
- }
116
-
117
- N = ceil(len(to_process_hashes) / number_splits)
118
-
119
- for hash_chunk in grouper(to_process_hashes, N):
120
- query = dict(temp_query)
121
- query["species_hash"] = {"$in": list(hash_chunk)}
122
- yield {"query": query}
123
-
124
- def get_items(self) -> Iterator[list[dict]]:
125
- """
126
- Gets all items to process into vibration documents.
127
- This does no datetime checking; relying on on whether
128
- task_ids are included in the vibes Store
129
-
130
- Returns:
131
- generator or list relevant tasks and molecules to process into documents
132
- """
133
-
134
- self.logger.info("Vibration builder started")
135
- self.logger.info("Setting indexes")
136
- self.ensure_indexes()
137
-
138
- # Save timestamp to mark buildtime
139
- self.timestamp = datetime.utcnow()
140
-
141
- # Get all processed molecules
142
- temp_query = dict(self.query)
143
- temp_query["deprecated"] = False
144
-
145
- self.logger.info("Finding documents to process")
146
- all_mols = list(
147
- self.molecules.query(temp_query, [self.molecules.key, "species_hash"])
148
- )
149
-
150
- processed_docs = set([e for e in self.vibes.distinct("molecule_id")])
151
- to_process_docs = {d[self.molecules.key] for d in all_mols} - processed_docs
152
- to_process_hashes = {
153
- d["species_hash"]
154
- for d in all_mols
155
- if d[self.molecules.key] in to_process_docs
156
- }
157
-
158
- self.logger.info(f"Found {len(to_process_docs)} unprocessed documents")
159
- self.logger.info(f"Found {len(to_process_hashes)} unprocessed hashes")
160
-
161
- # Set total for builder bars to have a total
162
- self.total = len(to_process_hashes)
163
-
164
- for shash in to_process_hashes:
165
- mol_query = dict(temp_query)
166
- mol_query["species_hash"] = shash
167
- molecules = list(self.molecules.query(criteria=mol_query))
168
-
169
- yield molecules
170
-
171
- def process_item(self, items: list[dict]) -> list[dict]:
172
- """
173
- Process the tasks into VibrationDocs
174
-
175
- Args:
176
- items list[dict] : a list of MoleculeDocs in dict form
177
-
178
- Returns:
179
- [dict] : a list of new vibration docs
180
- """
181
-
182
- mols = [MoleculeDoc(**item) for item in items]
183
- shash = mols[0].species_hash
184
- mol_ids = [m.molecule_id for m in mols]
185
- self.logger.debug(f"Processing {shash} : {mol_ids}")
186
-
187
- vibe_docs = list()
188
-
189
- for mol in mols:
190
- vibe_entries = [
191
- e
192
- for e in mol.entries
193
- if e["charge"] == mol.charge
194
- and e["spin_multiplicity"] == mol.spin_multiplicity
195
- and e["output"].get("frequencies") is not None
196
- ]
197
-
198
- # Organize by solvent environment
199
- by_solvent = defaultdict(list)
200
- for entry in vibe_entries:
201
- by_solvent[entry["solvent"]].append(entry)
202
-
203
- for solvent, entries in by_solvent.items():
204
- # No documents with enthalpy and entropy
205
- if len(entries) == 0:
206
- continue
207
- else:
208
- best = sorted(
209
- entries,
210
- key=lambda x: (
211
- sum(evaluate_lot(x["level_of_theory"])),
212
- x["energy"],
213
- ),
214
- )[0]
215
- task = best["task_id"]
216
-
217
- tdoc = self.tasks.query_one(
218
- {
219
- "task_id": task,
220
- "species_hash": shash,
221
- "orig": {"$exists": True},
222
- }
223
- )
224
-
225
- if tdoc is None:
226
- try:
227
- tdoc = self.tasks.query_one(
228
- {
229
- "task_id": int(task),
230
- "species_hash": shash,
231
- "orig": {"$exists": True},
232
- }
233
- )
234
- except ValueError:
235
- tdoc = None
236
-
237
- if tdoc is None:
238
- continue
239
-
240
- task_doc = TaskDocument(**tdoc)
241
-
242
- if task_doc is None:
243
- continue
244
-
245
- vibe_doc = VibrationDoc.from_task(
246
- task_doc, molecule_id=mol.molecule_id, deprecated=False
247
- )
248
- vibe_docs.append(vibe_doc)
249
-
250
- self.logger.debug(f"Produced {len(vibe_docs)} vibration docs for {shash}")
251
-
252
- return jsanitize([doc.model_dump() for doc in vibe_docs], allow_bson=True)
253
-
254
- def update_targets(self, items: list[list[dict]]):
255
- """
256
- Inserts the new vibration docs into the vibes collection
257
-
258
- Args:
259
- items [[dict]]: A list of documents to update
260
- """
261
-
262
- docs = list(chain.from_iterable(items)) # type: ignore
263
-
264
- # Add timestamp
265
- for item in docs:
266
- item.update(
267
- {
268
- "_bt": self.timestamp,
269
- }
270
- )
271
-
272
- molecule_ids = list({item["molecule_id"] for item in docs})
273
-
274
- if len(items) > 0:
275
- self.logger.info(f"Updating {len(docs)} vibration documents")
276
- self.vibes.remove_docs({self.vibes.key: {"$in": molecule_ids}})
277
- self.vibes.update(
278
- docs=docs,
279
- key=["molecule_id", "solvent"],
280
- )
281
- else:
282
- self.logger.info("No items to update")
File without changes