emmet-builders 0.84.2rc7__py3-none-any.whl → 0.84.2rc9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of emmet-builders might be problematic. Click here for more details.
- emmet/builders/molecules/atomic.py +48 -46
- emmet/builders/molecules/bonds.py +24 -24
- emmet/builders/molecules/metal_binding.py +21 -20
- emmet/builders/molecules/orbitals.py +23 -23
- emmet/builders/molecules/redox.py +27 -27
- emmet/builders/molecules/summary.py +21 -36
- emmet/builders/molecules/thermo.py +23 -23
- emmet/builders/molecules/vibration.py +23 -23
- emmet/builders/qchem/molecules.py +15 -21
- emmet/builders/vasp/mp_potcar_stats.json.gz +0 -0
- {emmet_builders-0.84.2rc7.dist-info → emmet_builders-0.84.2rc9.dist-info}/METADATA +1 -1
- {emmet_builders-0.84.2rc7.dist-info → emmet_builders-0.84.2rc9.dist-info}/RECORD +14 -16
- emmet/builders/molecules/electric.py +0 -282
- emmet/builders/molecules/trajectory.py +0 -525
- {emmet_builders-0.84.2rc7.dist-info → emmet_builders-0.84.2rc9.dist-info}/WHEEL +0 -0
- {emmet_builders-0.84.2rc7.dist-info → emmet_builders-0.84.2rc9.dist-info}/top_level.txt +0 -0
|
@@ -138,7 +138,6 @@ class MoleculesAssociationBuilder(Builder):
|
|
|
138
138
|
self.tasks.ensure_index("formula_alphabetical")
|
|
139
139
|
self.tasks.ensure_index("smiles")
|
|
140
140
|
self.tasks.ensure_index("species_hash")
|
|
141
|
-
self.tasks.ensure_index("coord_hash")
|
|
142
141
|
|
|
143
142
|
# Search index for molecules
|
|
144
143
|
self.assoc.ensure_index("molecule_id")
|
|
@@ -166,9 +165,7 @@ class MoleculesAssociationBuilder(Builder):
|
|
|
166
165
|
N = ceil(len(to_process_hashes) / number_splits)
|
|
167
166
|
|
|
168
167
|
for hash_chunk in grouper(to_process_hashes, N):
|
|
169
|
-
query
|
|
170
|
-
query["species_hash"] = {"$in": list(hash_chunk)}
|
|
171
|
-
yield {"query": query}
|
|
168
|
+
yield {"query": {"species_hash": {"$in": list(hash_chunk)}}}
|
|
172
169
|
|
|
173
170
|
def get_items(self) -> Iterator[List[TaskDocument]]:
|
|
174
171
|
"""
|
|
@@ -393,7 +390,6 @@ class MoleculesBuilder(Builder):
|
|
|
393
390
|
self.assoc.ensure_index("last_updated")
|
|
394
391
|
self.assoc.ensure_index("task_ids")
|
|
395
392
|
self.assoc.ensure_index("formula_alphabetical")
|
|
396
|
-
self.assoc.ensure_index("species_hash")
|
|
397
393
|
|
|
398
394
|
# Search index for molecules
|
|
399
395
|
self.molecules.ensure_index("molecule_id")
|
|
@@ -437,18 +433,16 @@ class MoleculesBuilder(Builder):
|
|
|
437
433
|
xyz_species_id_map[d[self.assoc.key]] = this_id
|
|
438
434
|
to_process_docs = assoc_ids - processed_docs
|
|
439
435
|
|
|
440
|
-
|
|
441
|
-
d["
|
|
436
|
+
to_process_forms = {
|
|
437
|
+
d["formula_alphabetical"]
|
|
442
438
|
for d in all_assoc
|
|
443
439
|
if xyz_species_id_map[d[self.assoc.key]] in to_process_docs
|
|
444
440
|
}
|
|
445
441
|
|
|
446
|
-
N = ceil(len(
|
|
442
|
+
N = ceil(len(to_process_forms) / number_splits)
|
|
447
443
|
|
|
448
|
-
for
|
|
449
|
-
query
|
|
450
|
-
query["species_hash"] = {"$in": list(hash_chunk)}
|
|
451
|
-
yield {"query": query}
|
|
444
|
+
for formula_chunk in grouper(to_process_forms, N):
|
|
445
|
+
yield {"query": {"formula_alphabetical": {"$in": list(formula_chunk)}}}
|
|
452
446
|
|
|
453
447
|
def get_items(self) -> Iterator[List[Dict]]:
|
|
454
448
|
"""
|
|
@@ -501,21 +495,21 @@ class MoleculesBuilder(Builder):
|
|
|
501
495
|
xyz_species_id_map[d[self.assoc.key]] = this_id
|
|
502
496
|
to_process_docs = assoc_ids - processed_docs
|
|
503
497
|
|
|
504
|
-
|
|
505
|
-
d["
|
|
498
|
+
to_process_forms = {
|
|
499
|
+
d["formula_alphabetical"]
|
|
506
500
|
for d in all_assoc
|
|
507
501
|
if xyz_species_id_map[d[self.assoc.key]] in to_process_docs
|
|
508
502
|
}
|
|
509
503
|
|
|
510
504
|
self.logger.info(f"Found {len(to_process_docs)} unprocessed documents")
|
|
511
|
-
self.logger.info(f"Found {len(
|
|
505
|
+
self.logger.info(f"Found {len(to_process_forms)} unprocessed formulas")
|
|
512
506
|
|
|
513
507
|
# Set total for builder bars to have a total
|
|
514
|
-
self.total = len(
|
|
508
|
+
self.total = len(to_process_forms)
|
|
515
509
|
|
|
516
|
-
for
|
|
510
|
+
for formula in to_process_forms:
|
|
517
511
|
assoc_query = dict(temp_query)
|
|
518
|
-
assoc_query["
|
|
512
|
+
assoc_query["formula_alphabetical"] = formula
|
|
519
513
|
assoc = list(self.assoc.query(criteria=assoc_query))
|
|
520
514
|
|
|
521
515
|
yield assoc
|
|
@@ -532,9 +526,9 @@ class MoleculesBuilder(Builder):
|
|
|
532
526
|
"""
|
|
533
527
|
|
|
534
528
|
assoc = [MoleculeDoc(**item) for item in items]
|
|
535
|
-
|
|
529
|
+
formula = assoc[0].formula_alphabetical
|
|
536
530
|
mol_ids = [a.molecule_id for a in assoc]
|
|
537
|
-
self.logger.debug(f"Processing {
|
|
531
|
+
self.logger.debug(f"Processing {formula} : {mol_ids}")
|
|
538
532
|
|
|
539
533
|
complete_mol_docs = list()
|
|
540
534
|
|
|
@@ -652,7 +646,7 @@ class MoleculesBuilder(Builder):
|
|
|
652
646
|
|
|
653
647
|
complete_mol_docs.append(base_doc)
|
|
654
648
|
|
|
655
|
-
self.logger.debug(f"Produced {len(complete_mol_docs)} molecules for {
|
|
649
|
+
self.logger.debug(f"Produced {len(complete_mol_docs)} molecules for {formula}")
|
|
656
650
|
|
|
657
651
|
return jsanitize(
|
|
658
652
|
[mol.model_dump() for mol in complete_mol_docs], allow_bson=True
|
|
Binary file
|
|
@@ -32,23 +32,21 @@ emmet/builders/matscholar/missing_compositions.py,sha256=RGQOEhfmJ6YMbjD4osLWqs7
|
|
|
32
32
|
emmet/builders/mobility/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
33
33
|
emmet/builders/mobility/migration_graph.py,sha256=WEXtPSn0UE5Q8mnvJ-T19FB3_LrZ3ojvNyRBs1PXWRg,3923
|
|
34
34
|
emmet/builders/molecules/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
35
|
-
emmet/builders/molecules/atomic.py,sha256=
|
|
36
|
-
emmet/builders/molecules/bonds.py,sha256=
|
|
37
|
-
emmet/builders/molecules/
|
|
38
|
-
emmet/builders/molecules/
|
|
39
|
-
emmet/builders/molecules/
|
|
40
|
-
emmet/builders/molecules/
|
|
41
|
-
emmet/builders/molecules/
|
|
42
|
-
emmet/builders/molecules/
|
|
43
|
-
emmet/builders/molecules/trajectory.py,sha256=oKrmWtKJ6mC0d1uJRE7g72X97kkS7JQ7nMhupVOUEUU,18163
|
|
44
|
-
emmet/builders/molecules/vibration.py,sha256=_FA-tRixghsJdlls6oO9U2abxCHWqWv5SucbxpP5mVQ,9520
|
|
35
|
+
emmet/builders/molecules/atomic.py,sha256=X590oMDIPaHJMk0Xuy_r4hATm1TEj0hKfJD6ofi1asg,20823
|
|
36
|
+
emmet/builders/molecules/bonds.py,sha256=5orXkCBgBBOqz1iGoacDUb5iGMsOPVmdmghtQLud9ZU,12086
|
|
37
|
+
emmet/builders/molecules/metal_binding.py,sha256=gyL5Nu1pmu0ZJq4HgTxwLh8_1696pJiMnrFggFESnjo,23317
|
|
38
|
+
emmet/builders/molecules/orbitals.py,sha256=W_7_3zz9bFfHQZgAMdp3PSSt4PDH4DVZVervHPrv1Pk,10041
|
|
39
|
+
emmet/builders/molecules/redox.py,sha256=HHmj-nFMTEV7qq3g3GM2lB5RdLUMBE-xOIZogIgmORc,18427
|
|
40
|
+
emmet/builders/molecules/summary.py,sha256=I9-4-oKoUSg5sxvr-CHYVIuCyD48mpV9rsMno4pbbOk,13198
|
|
41
|
+
emmet/builders/molecules/thermo.py,sha256=DizVM9rLXo7AhHW3cq0Bo6vO1OI6YtK4PeIVixmt47g,19855
|
|
42
|
+
emmet/builders/molecules/vibration.py,sha256=9LNeKh8BHck-ooW4XzAZAFeio2u6bDwdsUV1aA5XVb4,9501
|
|
45
43
|
emmet/builders/qchem/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
46
|
-
emmet/builders/qchem/molecules.py,sha256=
|
|
44
|
+
emmet/builders/qchem/molecules.py,sha256=CZyVQzjfb-_gAS997BFbd9xkKwvwPWrquNH0Aoy6oY8,26217
|
|
47
45
|
emmet/builders/vasp/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
48
46
|
emmet/builders/vasp/materials.py,sha256=5bjP-W5-gmSjDzmcHdF7bviwgk4ywUceCL4FcF9Ya9c,12700
|
|
49
|
-
emmet/builders/vasp/mp_potcar_stats.json.gz,sha256=
|
|
47
|
+
emmet/builders/vasp/mp_potcar_stats.json.gz,sha256=x3bn4gSMj1U_3bR2qKIaBtbJlYT-EJgoUIMFTA9bvaU,338957
|
|
50
48
|
emmet/builders/vasp/task_validator.py,sha256=bmRTDiOWof4rpHVg3ksoxocN9xxieYu7IE-ylMjYOVs,2922
|
|
51
|
-
emmet_builders-0.84.
|
|
52
|
-
emmet_builders-0.84.
|
|
53
|
-
emmet_builders-0.84.
|
|
54
|
-
emmet_builders-0.84.
|
|
49
|
+
emmet_builders-0.84.2rc9.dist-info/METADATA,sha256=KApWsvZ71L5eRNIbqjPK3TE1eP5Sb5uYVU-3bQ5AJBM,2162
|
|
50
|
+
emmet_builders-0.84.2rc9.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
|
|
51
|
+
emmet_builders-0.84.2rc9.dist-info/top_level.txt,sha256=6GcpbmWPeFhNCTfDFilb8GQ4T1UQu4z9c5jpobjwE-Q,6
|
|
52
|
+
emmet_builders-0.84.2rc9.dist-info/RECORD,,
|
|
@@ -1,282 +0,0 @@
|
|
|
1
|
-
from collections import defaultdict
|
|
2
|
-
from datetime import datetime
|
|
3
|
-
from itertools import chain
|
|
4
|
-
from math import ceil
|
|
5
|
-
from typing import Optional, Iterable, Iterator, List, Dict
|
|
6
|
-
|
|
7
|
-
from maggma.builders import Builder
|
|
8
|
-
from maggma.core import Store
|
|
9
|
-
from maggma.utils import grouper
|
|
10
|
-
|
|
11
|
-
from emmet.core.qchem.task import TaskDocument
|
|
12
|
-
from emmet.core.qchem.molecule import MoleculeDoc, evaluate_lot
|
|
13
|
-
from emmet.core.molecules.electric import ElectricMultipoleDoc
|
|
14
|
-
from emmet.core.utils import jsanitize
|
|
15
|
-
from emmet.builders.settings import EmmetBuildSettings
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
__author__ = "Evan Spotte-Smith"
|
|
19
|
-
|
|
20
|
-
SETTINGS = EmmetBuildSettings()
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
class ElectricMultipoleBuilder(Builder):
|
|
24
|
-
"""
|
|
25
|
-
The ElectricMultipoleBuilder defines the electric multipole properties of a MoleculeDoc.
|
|
26
|
-
|
|
27
|
-
This builder will attempt to build documents for each molecule, in each solvent.
|
|
28
|
-
For each molecule-solvent combination, the highest-quality
|
|
29
|
-
data available (based on level of theory and electronic energy) will be used.
|
|
30
|
-
|
|
31
|
-
The process is as follows:
|
|
32
|
-
1. Gather MoleculeDocs by species hash
|
|
33
|
-
2. For each molecule, group all tasks by solvent.
|
|
34
|
-
3. For each solvent, grab the best TaskDoc (doc with elecrtric dipole/multipole information
|
|
35
|
-
that has the highest level of theory with the lowest electronic energy) for the molecule
|
|
36
|
-
4. Convert TaskDoc to ElectricMultipoleDoc
|
|
37
|
-
"""
|
|
38
|
-
|
|
39
|
-
def __init__(
|
|
40
|
-
self,
|
|
41
|
-
tasks: Store,
|
|
42
|
-
molecules: Store,
|
|
43
|
-
multipoles: Store,
|
|
44
|
-
query: Optional[Dict] = None,
|
|
45
|
-
settings: Optional[EmmetBuildSettings] = None,
|
|
46
|
-
**kwargs,
|
|
47
|
-
):
|
|
48
|
-
self.tasks = tasks
|
|
49
|
-
self.molecules = molecules
|
|
50
|
-
self.multipoles = multipoles
|
|
51
|
-
self.query = query if query else dict()
|
|
52
|
-
self.settings = EmmetBuildSettings.autoload(settings)
|
|
53
|
-
self.kwargs = kwargs
|
|
54
|
-
|
|
55
|
-
super().__init__(sources=[tasks, molecules], targets=[multipoles], **kwargs)
|
|
56
|
-
# Uncomment in case of issue with mrun not connecting automatically to collections
|
|
57
|
-
# for i in [self.tasks, self.molecules, self.multipoles]:
|
|
58
|
-
# try:
|
|
59
|
-
# i.connect()
|
|
60
|
-
# except Exception as e:
|
|
61
|
-
# print("Could not connect,", e)
|
|
62
|
-
|
|
63
|
-
def ensure_indexes(self):
|
|
64
|
-
"""
|
|
65
|
-
Ensures indices on the collections needed for building
|
|
66
|
-
"""
|
|
67
|
-
|
|
68
|
-
# Basic search index for tasks
|
|
69
|
-
self.tasks.ensure_index("task_id")
|
|
70
|
-
self.tasks.ensure_index("last_updated")
|
|
71
|
-
self.tasks.ensure_index("state")
|
|
72
|
-
self.tasks.ensure_index("formula_alphabetical")
|
|
73
|
-
self.tasks.ensure_index("species_hash")
|
|
74
|
-
|
|
75
|
-
# Search index for molecules
|
|
76
|
-
self.molecules.ensure_index("molecule_id")
|
|
77
|
-
self.molecules.ensure_index("last_updated")
|
|
78
|
-
self.molecules.ensure_index("task_ids")
|
|
79
|
-
self.molecules.ensure_index("formula_alphabetical")
|
|
80
|
-
self.molecules.ensure_index("species_hash")
|
|
81
|
-
|
|
82
|
-
# Search index for electric
|
|
83
|
-
self.multipoles.ensure_index("method")
|
|
84
|
-
self.multipoles.ensure_index("molecule_id")
|
|
85
|
-
self.multipoles.ensure_index("task_id")
|
|
86
|
-
self.multipoles.ensure_index("solvent")
|
|
87
|
-
self.multipoles.ensure_index("lot_solvent")
|
|
88
|
-
self.multipoles.ensure_index("property_id")
|
|
89
|
-
self.multipoles.ensure_index("last_updated")
|
|
90
|
-
self.multipoles.ensure_index("formula_alphabetical")
|
|
91
|
-
|
|
92
|
-
def prechunk(self, number_splits: int) -> Iterable[Dict]: # pragma: no cover
|
|
93
|
-
"""Prechunk the builder for distributed computation"""
|
|
94
|
-
|
|
95
|
-
temp_query = dict(self.query)
|
|
96
|
-
temp_query["deprecated"] = False
|
|
97
|
-
|
|
98
|
-
self.logger.info("Finding documents to process")
|
|
99
|
-
all_mols = list(
|
|
100
|
-
self.molecules.query(temp_query, [self.molecules.key, "species_hash"])
|
|
101
|
-
)
|
|
102
|
-
|
|
103
|
-
processed_docs = set([e for e in self.multipoles.distinct("molecule_id")])
|
|
104
|
-
to_process_docs = {d[self.molecules.key] for d in all_mols} - processed_docs
|
|
105
|
-
to_process_hashes = {
|
|
106
|
-
d["species_hash"]
|
|
107
|
-
for d in all_mols
|
|
108
|
-
if d[self.molecules.key] in to_process_docs
|
|
109
|
-
}
|
|
110
|
-
|
|
111
|
-
N = ceil(len(to_process_hashes) / number_splits)
|
|
112
|
-
|
|
113
|
-
for hash_chunk in grouper(to_process_hashes, N):
|
|
114
|
-
query = dict(temp_query)
|
|
115
|
-
query["species_hash"] = {"$in": list(hash_chunk)}
|
|
116
|
-
yield {"query": query}
|
|
117
|
-
|
|
118
|
-
def get_items(self) -> Iterator[List[Dict]]:
|
|
119
|
-
"""
|
|
120
|
-
Gets all items to process into multipole documents.
|
|
121
|
-
This does no datetime checking; relying on on whether
|
|
122
|
-
task_ids are included in the multipoles Store
|
|
123
|
-
|
|
124
|
-
Returns:
|
|
125
|
-
generator or list relevant tasks and molecules to process into documents
|
|
126
|
-
"""
|
|
127
|
-
|
|
128
|
-
self.logger.info("Electric multipoles builder started")
|
|
129
|
-
self.logger.info("Setting indexes")
|
|
130
|
-
self.ensure_indexes()
|
|
131
|
-
|
|
132
|
-
# Save timestamp to mark buildtime
|
|
133
|
-
self.timestamp = datetime.utcnow()
|
|
134
|
-
|
|
135
|
-
# Get all processed molecules
|
|
136
|
-
temp_query = dict(self.query)
|
|
137
|
-
temp_query["deprecated"] = False
|
|
138
|
-
|
|
139
|
-
self.logger.info("Finding documents to process")
|
|
140
|
-
all_mols = list(
|
|
141
|
-
self.molecules.query(temp_query, [self.molecules.key, "species_hash"])
|
|
142
|
-
)
|
|
143
|
-
|
|
144
|
-
processed_docs = set([e for e in self.multipoles.distinct("molecule_id")])
|
|
145
|
-
to_process_docs = {d[self.molecules.key] for d in all_mols} - processed_docs
|
|
146
|
-
to_process_hashes = {
|
|
147
|
-
d["species_hash"]
|
|
148
|
-
for d in all_mols
|
|
149
|
-
if d[self.molecules.key] in to_process_docs
|
|
150
|
-
}
|
|
151
|
-
|
|
152
|
-
self.logger.info(f"Found {len(to_process_docs)} unprocessed documents")
|
|
153
|
-
self.logger.info(f"Found {len(to_process_hashes)} unprocessed hashes")
|
|
154
|
-
|
|
155
|
-
# Set total for builder bars to have a total
|
|
156
|
-
self.total = len(to_process_hashes)
|
|
157
|
-
|
|
158
|
-
for shash in to_process_hashes:
|
|
159
|
-
mol_query = dict(temp_query)
|
|
160
|
-
mol_query["species_hash"] = shash
|
|
161
|
-
molecules = list(self.molecules.query(criteria=mol_query))
|
|
162
|
-
|
|
163
|
-
yield molecules
|
|
164
|
-
|
|
165
|
-
def process_item(self, items: List[Dict]) -> List[Dict]:
|
|
166
|
-
"""
|
|
167
|
-
Process the tasks into ElectricMultipoleDocs
|
|
168
|
-
|
|
169
|
-
Args:
|
|
170
|
-
tasks List[Dict] : a list of MoleculeDocs in dict form
|
|
171
|
-
|
|
172
|
-
Returns:
|
|
173
|
-
[dict] : a list of new electric multipole docs
|
|
174
|
-
"""
|
|
175
|
-
|
|
176
|
-
mols = [MoleculeDoc(**item) for item in items]
|
|
177
|
-
shash = mols[0].species_hash
|
|
178
|
-
mol_ids = [m.molecule_id for m in mols]
|
|
179
|
-
self.logger.debug(f"Processing {shash} : {mol_ids}")
|
|
180
|
-
|
|
181
|
-
multipole_docs = list()
|
|
182
|
-
|
|
183
|
-
for mol in mols:
|
|
184
|
-
# Relevant tasks are those with the correct charge and spin
|
|
185
|
-
# for which there are AT LEAST electric dipoles present
|
|
186
|
-
# (ideally, multipole information would also be present)
|
|
187
|
-
multip_entries = [
|
|
188
|
-
e
|
|
189
|
-
for e in mol.entries
|
|
190
|
-
if e["charge"] == mol.charge
|
|
191
|
-
and e["spin_multiplicity"] == mol.spin_multiplicity
|
|
192
|
-
and (e["output"].get("dipoles") is not None)
|
|
193
|
-
]
|
|
194
|
-
|
|
195
|
-
# Organize by solvent environment
|
|
196
|
-
by_solvent = defaultdict(list)
|
|
197
|
-
for entry in multip_entries:
|
|
198
|
-
by_solvent[entry["solvent"]].append(entry)
|
|
199
|
-
|
|
200
|
-
for solvent, entries in by_solvent.items():
|
|
201
|
-
# No documents with enthalpy and entropy
|
|
202
|
-
if len(entries) == 0:
|
|
203
|
-
continue
|
|
204
|
-
else:
|
|
205
|
-
best = sorted(
|
|
206
|
-
entries,
|
|
207
|
-
key=lambda x: (
|
|
208
|
-
sum(evaluate_lot(x["level_of_theory"])),
|
|
209
|
-
x["energy"],
|
|
210
|
-
),
|
|
211
|
-
)[0]
|
|
212
|
-
task = best["task_id"]
|
|
213
|
-
|
|
214
|
-
tdoc = self.tasks.query_one(
|
|
215
|
-
{
|
|
216
|
-
"task_id": task,
|
|
217
|
-
"species_hash": shash,
|
|
218
|
-
"orig": {"$exists": True},
|
|
219
|
-
}
|
|
220
|
-
)
|
|
221
|
-
|
|
222
|
-
if tdoc is None:
|
|
223
|
-
try:
|
|
224
|
-
tdoc = self.tasks.query_one(
|
|
225
|
-
{
|
|
226
|
-
"task_id": int(task),
|
|
227
|
-
"species_hash": shash,
|
|
228
|
-
"orig": {"$exists": True},
|
|
229
|
-
}
|
|
230
|
-
)
|
|
231
|
-
except ValueError:
|
|
232
|
-
tdoc = None
|
|
233
|
-
|
|
234
|
-
if tdoc is None:
|
|
235
|
-
continue
|
|
236
|
-
|
|
237
|
-
task_doc = TaskDocument(**tdoc)
|
|
238
|
-
|
|
239
|
-
if task_doc is None:
|
|
240
|
-
continue
|
|
241
|
-
|
|
242
|
-
multipole_doc = ElectricMultipoleDoc.from_task(
|
|
243
|
-
task_doc, molecule_id=mol.molecule_id, deprecated=False
|
|
244
|
-
)
|
|
245
|
-
multipole_docs.append(multipole_doc)
|
|
246
|
-
|
|
247
|
-
self.logger.debug(
|
|
248
|
-
f"Produced {len(multipole_docs)} electric multipole docs for {shash}"
|
|
249
|
-
)
|
|
250
|
-
|
|
251
|
-
return jsanitize([doc.model_dump() for doc in multipole_docs], allow_bson=True)
|
|
252
|
-
|
|
253
|
-
def update_targets(self, items: List[List[Dict]]):
|
|
254
|
-
"""
|
|
255
|
-
Inserts the new documents into the multipoles collection
|
|
256
|
-
|
|
257
|
-
Args:
|
|
258
|
-
items [[dict]]: A list of documents to update
|
|
259
|
-
"""
|
|
260
|
-
|
|
261
|
-
docs = list(chain.from_iterable(items)) # type: ignore
|
|
262
|
-
|
|
263
|
-
# Add timestamp
|
|
264
|
-
for item in docs:
|
|
265
|
-
item.update(
|
|
266
|
-
{
|
|
267
|
-
"_bt": self.timestamp,
|
|
268
|
-
}
|
|
269
|
-
)
|
|
270
|
-
|
|
271
|
-
molecule_ids = list({item["molecule_id"] for item in docs})
|
|
272
|
-
|
|
273
|
-
if len(items) > 0:
|
|
274
|
-
self.logger.info(f"Updating {len(docs)} electric multipole documents")
|
|
275
|
-
self.multipoles.remove_docs({self.multipoles.key: {"$in": molecule_ids}})
|
|
276
|
-
# Neither molecule_id nor method need to be unique, but the combination must be
|
|
277
|
-
self.multipoles.update(
|
|
278
|
-
docs=docs,
|
|
279
|
-
key=["molecule_id", "solvent"],
|
|
280
|
-
)
|
|
281
|
-
else:
|
|
282
|
-
self.logger.info("No items to update")
|