emmet-builders 0.84.10rc2__py3-none-any.whl → 0.85.0rc0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of emmet-builders might be problematic. Click here for more details.

Files changed (33) hide show
  1. emmet/builders/abinit/phonon.py +12 -14
  2. emmet/builders/abinit/sound_velocity.py +1 -1
  3. emmet/builders/materials/absorption_spectrum.py +16 -10
  4. emmet/builders/materials/dielectric.py +10 -7
  5. emmet/builders/materials/elasticity.py +12 -9
  6. emmet/builders/materials/electrodes.py +1 -1
  7. emmet/builders/materials/electronic_structure.py +1 -1
  8. emmet/builders/materials/magnetism.py +2 -1
  9. emmet/builders/materials/piezoelectric.py +23 -19
  10. emmet/builders/materials/provenance.py +3 -4
  11. emmet/builders/settings.py +14 -9
  12. emmet/builders/utils.py +5 -4
  13. emmet/builders/vasp/materials.py +11 -4
  14. emmet/builders/vasp/task_validator.py +3 -1
  15. {emmet_builders-0.84.10rc2.dist-info → emmet_builders-0.85.0rc0.dist-info}/METADATA +7 -30
  16. emmet_builders-0.85.0rc0.dist-info/RECORD +41 -0
  17. emmet/builders/materials/ml.py +0 -101
  18. emmet/builders/molecules/atomic.py +0 -592
  19. emmet/builders/molecules/bonds.py +0 -329
  20. emmet/builders/molecules/electric.py +0 -287
  21. emmet/builders/molecules/metal_binding.py +0 -528
  22. emmet/builders/molecules/orbitals.py +0 -292
  23. emmet/builders/molecules/redox.py +0 -502
  24. emmet/builders/molecules/summary.py +0 -406
  25. emmet/builders/molecules/thermo.py +0 -505
  26. emmet/builders/molecules/trajectory.py +0 -530
  27. emmet/builders/molecules/vibration.py +0 -282
  28. emmet/builders/qchem/__init__.py +0 -0
  29. emmet/builders/qchem/molecules.py +0 -745
  30. emmet_builders-0.84.10rc2.dist-info/RECORD +0 -54
  31. /emmet/builders/{molecules/__init__.py → py.typed} +0 -0
  32. {emmet_builders-0.84.10rc2.dist-info → emmet_builders-0.85.0rc0.dist-info}/WHEEL +0 -0
  33. {emmet_builders-0.84.10rc2.dist-info → emmet_builders-0.85.0rc0.dist-info}/top_level.txt +0 -0
@@ -1,101 +0,0 @@
1
- from importlib.metadata import version
2
-
3
- from maggma.builders.map_builder import MapBuilder
4
- from maggma.core import Store
5
-
6
- try:
7
- from matcalc import PESCalculator
8
-
9
- matcalc_installed = True
10
- except ImportError:
11
- matcalc_installed = False
12
-
13
- from pymatgen.core import Structure
14
-
15
- from emmet.core.ml import MLDoc
16
- from emmet.core.utils import jsanitize
17
-
18
- try:
19
- from ase.calculators.calculator import Calculator
20
-
21
- ase_installed = True
22
- except ImportError:
23
- ase_installed = False
24
-
25
-
26
- class MLBuilder(MapBuilder):
27
- def __init__(
28
- self,
29
- materials: Store,
30
- ml_potential: Store,
31
- model: str | Calculator,
32
- model_kwargs: dict | None = None,
33
- prop_kwargs: dict | None = None,
34
- provenance: dict | None = None,
35
- **kwargs,
36
- ):
37
- """Machine learning interatomic potential builder.
38
-
39
- Args:
40
- materials (Store): Materials to use as input structures.
41
- ml_potential (Store): Where to save MLDoc documents to.
42
- model (str | Calculator): ASE calculator or name of model to use as ML
43
- potential. See matcalc.utils.UNIVERSAL_CALCULATORS for recognized names.
44
- model_kwargs (dict, optional): Additional kwargs to pass to the calculator.
45
- Defaults to None.
46
- prop_kwargs (dict[str, dict], optional): Separate kwargs passed to each matcalc
47
- PropCalc class. Recognized keys are RelaxCalc, ElasticityCalc, PhononCalc, EOSCalc.
48
- Defaults to None.
49
- provenance (dict, optional): Additional provenance information to include in
50
- MLDocs. Will be saved in each document so use sparingly. Defaults to None.
51
- Set to {} to disable default provenance model, version, matcalc_version.
52
- """
53
-
54
- if not matcalc_installed or not ase_installed:
55
- raise ImportError("Please `pip install matcalc` to use the MLBuilder.")
56
-
57
- self.materials = materials
58
- self.ml_potential = ml_potential
59
- self.kwargs = kwargs
60
- self.model = PESCalculator.load_universal(model, **(model_kwargs or {}))
61
- self.prop_kwargs = prop_kwargs or {}
62
-
63
- if provenance == {}:
64
- self.provenance = {}
65
- else:
66
- model_name = (
67
- model if isinstance(model, str) else type(model).__name__
68
- ).lower()
69
- model_name = {"chgnetcalculator": "chgnet"}.get(model_name, model_name)
70
- pkg_name = {"m3gnet": "matgl"}.get(model_name, model_name)
71
- self.provenance = dict(
72
- model=model_name,
73
- version=version(pkg_name),
74
- matcalc_version=version("matcalc"),
75
- **(provenance or {}),
76
- )
77
-
78
- # Enforce that we key on material_id
79
- self.materials.key = "material_id"
80
- self.ml_potential.key = "material_id"
81
- super().__init__(
82
- source=materials,
83
- target=ml_potential,
84
- projection=["structure", "deprecated"],
85
- **kwargs,
86
- )
87
-
88
- def unary_function(self, item):
89
- struct = Structure.from_dict(item["structure"])
90
- mp_id, deprecated = item["material_id"], item["deprecated"]
91
-
92
- doc = MLDoc(
93
- structure=struct,
94
- material_id=mp_id,
95
- calculator=self.model,
96
- prop_kwargs=self.prop_kwargs,
97
- deprecated=deprecated,
98
- **self.provenance,
99
- )
100
-
101
- return jsanitize(doc, allow_bson=True)
@@ -1,592 +0,0 @@
1
- from __future__ import annotations
2
-
3
- from collections import defaultdict
4
- from datetime import datetime
5
- from itertools import chain
6
- from math import ceil
7
-
8
- from maggma.builders import Builder
9
- from maggma.core import Store
10
- from maggma.utils import grouper
11
-
12
- from emmet.builders.settings import EmmetBuildSettings
13
- from emmet.core.molecules.atomic import (
14
- CHARGES_METHODS,
15
- SPINS_METHODS,
16
- PartialChargesDoc,
17
- PartialSpinsDoc,
18
- )
19
- from emmet.core.qchem.molecule import MoleculeDoc, evaluate_lot
20
- from emmet.core.qchem.task import TaskDocument
21
- from emmet.core.utils import jsanitize
22
-
23
- from typing import TYPE_CHECKING
24
-
25
- if TYPE_CHECKING:
26
- from collections.abc import Iterable, Iterator
27
-
28
- __author__ = "Evan Spotte-Smith"
29
-
30
- SETTINGS = EmmetBuildSettings()
31
-
32
-
33
- class PartialChargesBuilder(Builder):
34
- """
35
- The PartialChargesBuilder extracts partial charges data from a MoleculeDoc.
36
-
37
- Various methods can be used to define partial charges, including:
38
- - Mulliken
39
- - Restrained Electrostatic Potential (RESP)
40
- - Critic2
41
- - Natural Bonding Orbital (NBO) population analysis
42
-
43
- This builder will attempt to build documents for each molecule, in each solvent,
44
- with each method. For each molecule-solvent-method combination, the
45
- highest-quality data available (based on level of theory and electronic
46
- energy) will be used.
47
-
48
- The process is as follows:
49
- 1. Gather MoleculeDocs by species hash
50
- 2. For each molecule, group all tasks by solvent.
51
- 3. For each solvent, sort tasks by level of theory and electronic energy
52
- 4. For each method:
53
- 4.1. Find task docs with necessary data to calculate partial charges by that method
54
- 4.2. Take best (defined by level of theory and electronic energy) task
55
- 4.3. Convert TaskDoc to PartialChargesDoc
56
- """
57
-
58
- def __init__(
59
- self,
60
- tasks: Store,
61
- molecules: Store,
62
- charges: Store,
63
- query: dict | None = None,
64
- methods: list | None = None,
65
- settings: EmmetBuildSettings | None = None,
66
- **kwargs,
67
- ):
68
- self.tasks = tasks
69
- self.molecules = molecules
70
- self.charges = charges
71
- self.query = query if query else dict()
72
- self.methods = methods if methods else CHARGES_METHODS
73
- self.settings = EmmetBuildSettings.autoload(settings)
74
- self.kwargs = kwargs
75
-
76
- super().__init__(sources=[tasks, molecules], targets=[charges], **kwargs)
77
- # Uncomment in case of issue with mrun not connecting automatically to collections
78
- # for i in [self.tasks, self.molecules, self.charges]:
79
- # try:
80
- # i.connect()
81
- # except Exception as e:
82
- # print("Could not connect,", e)
83
-
84
- def ensure_indexes(self):
85
- """
86
- Ensures indices on the collections needed for building
87
- """
88
-
89
- # Basic search index for tasks
90
- self.tasks.ensure_index("task_id")
91
- self.tasks.ensure_index("last_updated")
92
- self.tasks.ensure_index("state")
93
- self.tasks.ensure_index("formula_alphabetical")
94
- self.tasks.ensure_index("species_hash")
95
-
96
- # Search index for molecules
97
- self.molecules.ensure_index("molecule_id")
98
- self.molecules.ensure_index("last_updated")
99
- self.molecules.ensure_index("task_ids")
100
- self.molecules.ensure_index("formula_alphabetical")
101
- self.molecules.ensure_index("species_hash")
102
-
103
- # Search index for charges
104
- self.charges.ensure_index("molecule_id")
105
- self.charges.ensure_index("task_id")
106
- self.charges.ensure_index("method")
107
- self.charges.ensure_index("solvent")
108
- self.charges.ensure_index("lot_solvent")
109
- self.charges.ensure_index("property_id")
110
- self.charges.ensure_index("last_updated")
111
- self.charges.ensure_index("formula_alphabetical")
112
-
113
- def prechunk(self, number_splits: int) -> Iterable[dict]: # pragma: no cover
114
- """Prechunk the builder for distributed computation"""
115
-
116
- temp_query = dict(self.query)
117
- temp_query["deprecated"] = False
118
-
119
- self.logger.info("Finding documents to process")
120
- all_mols = list(
121
- self.molecules.query(temp_query, [self.molecules.key, "species_hash"])
122
- )
123
-
124
- processed_docs = set([e for e in self.charges.distinct("molecule_id")])
125
- to_process_docs = {d[self.molecules.key] for d in all_mols} - processed_docs
126
- to_process_hashes = {
127
- d["species_hash"]
128
- for d in all_mols
129
- if d[self.molecules.key] in to_process_docs
130
- }
131
-
132
- N = ceil(len(to_process_hashes) / number_splits)
133
-
134
- for hash_chunk in grouper(to_process_hashes, N):
135
- query = dict(temp_query)
136
- query["species_hash"] = {"$in": list(hash_chunk)}
137
- yield {"query": query}
138
-
139
- def get_items(self) -> Iterator[list[dict]]:
140
- """
141
- Gets all items to process into partial charges documents.
142
- This does no datetime checking; relying on on whether
143
- task_ids are included in the charges Store
144
-
145
- Returns:
146
- generator or list relevant tasks and molecules to process into documents
147
- """
148
-
149
- self.logger.info("Partial charges builder started")
150
- self.logger.info("Setting indexes")
151
- self.ensure_indexes()
152
-
153
- # Save timestamp to mark buildtime
154
- self.timestamp = datetime.utcnow()
155
-
156
- # Get all processed molecules
157
- temp_query = dict(self.query)
158
- temp_query["deprecated"] = False
159
-
160
- self.logger.info("Finding documents to process")
161
- all_mols = list(
162
- self.molecules.query(temp_query, [self.molecules.key, "species_hash"])
163
- )
164
-
165
- processed_docs = set([e for e in self.charges.distinct("molecule_id")])
166
- to_process_docs = {d[self.molecules.key] for d in all_mols} - processed_docs
167
- to_process_hashes = {
168
- d["species_hash"]
169
- for d in all_mols
170
- if d[self.molecules.key] in to_process_docs
171
- }
172
-
173
- self.logger.info(f"Found {len(to_process_docs)} unprocessed documents")
174
- self.logger.info(f"Found {len(to_process_hashes)} unprocessed hashes")
175
-
176
- # Set total for builder bars to have a total
177
- self.total = len(to_process_hashes)
178
-
179
- for shash in to_process_hashes:
180
- mol_query = dict(temp_query)
181
- mol_query["species_hash"] = shash
182
- molecules = list(self.molecules.query(criteria=mol_query))
183
-
184
- yield molecules
185
-
186
- def process_item(self, items: list[dict]) -> list[dict]:
187
- """
188
- Process the tasks into PartialChargesDocs
189
-
190
- Args:
191
- tasks list[dict] : a list of MoleculeDocs in dict form
192
-
193
- Returns:
194
- [dict] : a list of new partial charges docs
195
- """
196
-
197
- mols = [MoleculeDoc(**item) for item in items]
198
- shash = mols[0].species_hash
199
- mol_ids = [m.molecule_id for m in mols]
200
- self.logger.debug(f"Processing {shash} : {mol_ids}")
201
-
202
- charges_docs = list()
203
-
204
- for mol in mols:
205
- correct_charge_spin = [
206
- e
207
- for e in mol.entries
208
- if e["charge"] == mol.charge
209
- and e["spin_multiplicity"] == mol.spin_multiplicity
210
- ]
211
-
212
- # Organize by solvent environment
213
- by_solvent = defaultdict(list)
214
- for entry in correct_charge_spin:
215
- by_solvent[entry["solvent"]].append(entry)
216
-
217
- for solvent, entries in by_solvent.items():
218
- sorted_entries = sorted(
219
- entries,
220
- key=lambda x: (
221
- sum(evaluate_lot(x["level_of_theory"])),
222
- x["energy"],
223
- ),
224
- )
225
-
226
- for method in self.methods:
227
- # For each method, grab entries that have the relevant data
228
- relevant_entries = [
229
- e
230
- for e in sorted_entries
231
- if e.get(method) is not None
232
- or e["output"].get(method) is not None
233
- ]
234
-
235
- if len(relevant_entries) == 0:
236
- continue
237
-
238
- # Grab task document of best entry
239
- best_entry = relevant_entries[0]
240
- task = best_entry["task_id"]
241
-
242
- tdoc = self.tasks.query_one(
243
- {
244
- "task_id": task,
245
- "species_hash": shash,
246
- "orig": {"$exists": True},
247
- }
248
- )
249
-
250
- if tdoc is None:
251
- try:
252
- tdoc = self.tasks.query_one(
253
- {
254
- "task_id": int(task),
255
- "species_hash": shash,
256
- "orig": {"$exists": True},
257
- }
258
- )
259
- except ValueError:
260
- tdoc = None
261
-
262
- if tdoc is None:
263
- continue
264
-
265
- task_doc = TaskDocument(**tdoc)
266
-
267
- if task_doc is None:
268
- continue
269
-
270
- doc = PartialChargesDoc.from_task(
271
- task_doc,
272
- molecule_id=mol.molecule_id,
273
- preferred_methods=[method],
274
- deprecated=False,
275
- )
276
-
277
- charges_docs.append(doc)
278
-
279
- self.logger.debug(f"Produced {len(charges_docs)} charges docs for {shash}")
280
-
281
- return jsanitize([doc.model_dump() for doc in charges_docs], allow_bson=True)
282
-
283
- def update_targets(self, items: list[list[dict]]):
284
- """
285
- Inserts the new documents into the charges collection
286
-
287
- Args:
288
- items [[dict]]: A list of documents to update
289
- """
290
-
291
- docs = list(chain.from_iterable(items)) # type: ignore
292
-
293
- # Add timestamp
294
- for item in docs:
295
- item.update(
296
- {
297
- "_bt": self.timestamp,
298
- }
299
- )
300
-
301
- molecule_ids = list({item["molecule_id"] for item in docs})
302
-
303
- if len(items) > 0:
304
- self.logger.info(f"Updating {len(docs)} partial charges documents")
305
- self.charges.remove_docs({self.charges.key: {"$in": molecule_ids}})
306
- # Neither molecule_id nor method need to be unique, but the combination must be
307
- self.charges.update(
308
- docs=docs,
309
- key=["molecule_id", "method", "solvent"],
310
- )
311
- else:
312
- self.logger.info("No items to update")
313
-
314
-
315
- class PartialSpinsBuilder(Builder):
316
- """
317
- The PartialSpinsBuilder extracts partial spin data from a MoleculeDoc.
318
-
319
- Various methods can be used to define partial atomic spins, including:
320
- - Mulliken
321
- - Natural Bonding Orbital (NBO) population analysis
322
-
323
- This builder will attempt to build documents for each molecule, in each solvent,
324
- with each method. For each molecule-method combination, the highest-quality
325
- data available (based on level of theory and electronic energy) will be used.
326
-
327
- The process is as follows:
328
- 1. Gather MoleculeDocs by species_hash
329
- 2. For each molecule, group all tasks by solvent.
330
- 3. For each solvent, sort tasks by level of theory and electronic energy
331
- 4. For each method:
332
- 4.1. Find task docs with necessary data to calculate partial charges by that method
333
- 4.2. Take best (defined by level of theory and electronic energy) task
334
- 4.3. Convert TaskDoc to PartialSpinsDoc
335
- """
336
-
337
- def __init__(
338
- self,
339
- tasks: Store,
340
- molecules: Store,
341
- spins: Store,
342
- query: dict | None = None,
343
- methods: list | None = None,
344
- settings: EmmetBuildSettings | None = None,
345
- **kwargs,
346
- ):
347
- self.tasks = tasks
348
- self.molecules = molecules
349
- self.spins = spins
350
- self.query = query if query else dict()
351
- self.methods = methods if methods else SPINS_METHODS
352
- self.settings = EmmetBuildSettings.autoload(settings)
353
- self.kwargs = kwargs
354
-
355
- super().__init__(sources=[tasks, molecules], targets=[spins], **kwargs)
356
- # Uncomment in case of issue with mrun not connecting automatically to collections
357
- # for i in [self.tasks, self.molecules, self.spins]:
358
- # try:
359
- # i.connect()
360
- # except Exception as e:
361
- # print("Could not connect,", e)
362
-
363
- def ensure_indexes(self):
364
- """
365
- Ensures indices on the collections needed for building
366
- """
367
-
368
- # Basic search index for tasks
369
- self.tasks.ensure_index("task_id")
370
- self.tasks.ensure_index("last_updated")
371
- self.tasks.ensure_index("state")
372
- self.tasks.ensure_index("formula_alphabetical")
373
- self.tasks.ensure_index("species_hash")
374
-
375
- # Search index for molecules
376
- self.molecules.ensure_index("molecule_id")
377
- self.molecules.ensure_index("last_updated")
378
- self.molecules.ensure_index("task_ids")
379
- self.molecules.ensure_index("formula_alphabetical")
380
- self.molecules.ensure_index("species_hash")
381
-
382
- # Search index for spins
383
- self.spins.ensure_index("molecule_id")
384
- self.spins.ensure_index("task_id")
385
- self.spins.ensure_index("method")
386
- self.spins.ensure_index("solvent")
387
- self.spins.ensure_index("lot_solvent")
388
- self.spins.ensure_index("property_id")
389
- self.spins.ensure_index("last_updated")
390
- self.spins.ensure_index("formula_alphabetical")
391
-
392
- def prechunk(self, number_splits: int) -> Iterable[dict]: # pragma: no cover
393
- """Prechunk the builder for distributed computation"""
394
-
395
- temp_query = dict(self.query)
396
- temp_query["deprecated"] = False
397
-
398
- self.logger.info("Finding documents to process")
399
- all_mols = list(
400
- self.molecules.query(temp_query, [self.molecules.key, "species_hash"])
401
- )
402
-
403
- processed_docs = set([e for e in self.spins.distinct("molecule_id")])
404
- to_process_docs = {d[self.molecules.key] for d in all_mols} - processed_docs
405
- to_process_hashes = {
406
- d["species_hash"]
407
- for d in all_mols
408
- if d[self.molecules.key] in to_process_docs
409
- }
410
-
411
- N = ceil(len(to_process_hashes) / number_splits)
412
-
413
- for hash_chunk in grouper(to_process_hashes, N):
414
- query = dict(temp_query)
415
- query["species_hash"] = {"$in": list(hash_chunk)}
416
- yield {"query": query}
417
-
418
- def get_items(self) -> Iterator[list[dict]]:
419
- """
420
- Gets all items to process into partial spins documents.
421
- This does no datetime checking; relying on on whether
422
- task_ids are included in the spins Store
423
-
424
- Returns:
425
- generator or list relevant tasks and molecules to process into documents
426
- """
427
-
428
- self.logger.info("Partial spins builder started")
429
- self.logger.info("Setting indexes")
430
- self.ensure_indexes()
431
-
432
- # Save timestamp to mark buildtime
433
- self.timestamp = datetime.utcnow()
434
-
435
- # Get all processed molecules
436
- temp_query = dict(self.query)
437
- temp_query["deprecated"] = False
438
-
439
- self.logger.info("Finding documents to process")
440
- all_mols = list(
441
- self.molecules.query(temp_query, [self.molecules.key, "species_hash"])
442
- )
443
-
444
- processed_docs = set([e for e in self.spins.distinct("molecule_id")])
445
- to_process_docs = {d[self.molecules.key] for d in all_mols} - processed_docs
446
- to_process_hashes = {
447
- d["species_hash"]
448
- for d in all_mols
449
- if d[self.molecules.key] in to_process_docs
450
- }
451
-
452
- self.logger.info(f"Found {len(to_process_docs)} unprocessed documents")
453
- self.logger.info(f"Found {len(to_process_hashes)} unprocessed hashes")
454
-
455
- # Set total for builder bars to have a total
456
- self.total = len(to_process_hashes)
457
-
458
- for shash in to_process_hashes:
459
- mol_query = dict(temp_query)
460
- mol_query["species_hash"] = shash
461
- molecules = list(self.molecules.query(criteria=mol_query))
462
-
463
- yield molecules
464
-
465
- def process_item(self, items: list[dict]) -> list[dict]:
466
- """
467
- Process the tasks into PartialSpinsDocs
468
-
469
- Args:
470
- tasks list[dict] : a list of MoleculeDocs in dict form
471
-
472
- Returns:
473
- [dict] : a list of new partial spins docs
474
- """
475
-
476
- mols = [MoleculeDoc(**item) for item in items]
477
- shash = mols[0].species_hash
478
- mol_ids = [m.molecule_id for m in mols]
479
- self.logger.debug(f"Processing {shash} : {mol_ids}")
480
-
481
- spins_docs = list()
482
-
483
- for mol in mols:
484
- # Molecule with spin multiplicity 1 has no partial spins
485
- if mol.spin_multiplicity == 1:
486
- continue
487
-
488
- correct_charge_spin = [
489
- e
490
- for e in mol.entries
491
- if e["charge"] == mol.charge
492
- and e["spin_multiplicity"] == mol.spin_multiplicity
493
- ]
494
-
495
- # Organize by solvent environment
496
- by_solvent = defaultdict(list)
497
- for entry in correct_charge_spin:
498
- by_solvent[entry["solvent"]].append(entry)
499
-
500
- for solvent, entries in by_solvent.items():
501
- sorted_entries = sorted(
502
- entries,
503
- key=lambda x: (
504
- sum(evaluate_lot(x["level_of_theory"])),
505
- x["energy"],
506
- ),
507
- )
508
-
509
- for method in self.methods:
510
- # For each method, grab entries that have the relevant data
511
- relevant_entries = [
512
- e
513
- for e in sorted_entries
514
- if e.get(method) is not None
515
- or e["output"].get(method) is not None
516
- ]
517
-
518
- if len(relevant_entries) == 0:
519
- continue
520
-
521
- # Grab task document of best entry
522
- best_entry = relevant_entries[0]
523
- task = best_entry["task_id"]
524
-
525
- tdoc = self.tasks.query_one(
526
- {
527
- "task_id": task,
528
- "species_hash": shash,
529
- "orig": {"$exists": True},
530
- }
531
- )
532
-
533
- if tdoc is None:
534
- try:
535
- tdoc = self.tasks.query_one(
536
- {
537
- "task_id": int(task),
538
- "species_hash": shash,
539
- "orig": {"$exists": True},
540
- }
541
- )
542
- except ValueError:
543
- tdoc = None
544
-
545
- if tdoc is None:
546
- continue
547
-
548
- task_doc = TaskDocument(**tdoc)
549
-
550
- doc = PartialSpinsDoc.from_task(
551
- task_doc,
552
- molecule_id=mol.molecule_id,
553
- preferred_methods=[method],
554
- deprecated=False,
555
- )
556
-
557
- spins_docs.append(doc)
558
-
559
- self.logger.debug(f"Produced {len(spins_docs)} partial spins docs for {shash}")
560
-
561
- return jsanitize([doc.model_dump() for doc in spins_docs], allow_bson=True)
562
-
563
- def update_targets(self, items: list[list[dict]]):
564
- """
565
- Inserts the new documents into the spins collection
566
-
567
- Args:
568
- items [[dict]]: A list of documents to update
569
- """
570
-
571
- docs = list(chain.from_iterable(items)) # type: ignore
572
-
573
- # Add timestamp
574
- for item in docs:
575
- item.update(
576
- {
577
- "_bt": self.timestamp,
578
- }
579
- )
580
-
581
- molecule_ids = list({item["molecule_id"] for item in docs})
582
-
583
- if len(items) > 0:
584
- self.logger.info(f"Updating {len(docs)} partial spins documents")
585
- self.spins.remove_docs({self.spins.key: {"$in": molecule_ids}})
586
- # Neither molecule_id nor method need to be unique, but the combination must be
587
- self.spins.update(
588
- docs=docs,
589
- key=["molecule_id", "method", "solvent"],
590
- )
591
- else:
592
- self.logger.info("No items to update")