emmet-builders 0.84.3rc2__tar.gz → 0.84.3rc3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of emmet-builders might be problematic. Click here for more details.
- {emmet-builders-0.84.3rc2 → emmet-builders-0.84.3rc3}/PKG-INFO +1 -1
- {emmet-builders-0.84.3rc2 → emmet-builders-0.84.3rc3}/emmet/builders/molecules/atomic.py +46 -48
- {emmet-builders-0.84.3rc2 → emmet-builders-0.84.3rc3}/emmet/builders/molecules/bonds.py +24 -24
- emmet-builders-0.84.3rc3/emmet/builders/molecules/electric.py +282 -0
- {emmet-builders-0.84.3rc2 → emmet-builders-0.84.3rc3}/emmet/builders/molecules/metal_binding.py +20 -21
- {emmet-builders-0.84.3rc2 → emmet-builders-0.84.3rc3}/emmet/builders/molecules/orbitals.py +23 -23
- {emmet-builders-0.84.3rc2 → emmet-builders-0.84.3rc3}/emmet/builders/molecules/redox.py +27 -27
- {emmet-builders-0.84.3rc2 → emmet-builders-0.84.3rc3}/emmet/builders/molecules/summary.py +36 -21
- {emmet-builders-0.84.3rc2 → emmet-builders-0.84.3rc3}/emmet/builders/molecules/thermo.py +23 -23
- emmet-builders-0.84.3rc3/emmet/builders/molecules/trajectory.py +525 -0
- {emmet-builders-0.84.3rc2 → emmet-builders-0.84.3rc3}/emmet/builders/molecules/vibration.py +23 -23
- {emmet-builders-0.84.3rc2 → emmet-builders-0.84.3rc3}/emmet/builders/qchem/molecules.py +21 -15
- {emmet-builders-0.84.3rc2 → emmet-builders-0.84.3rc3}/emmet_builders.egg-info/PKG-INFO +1 -1
- {emmet-builders-0.84.3rc2 → emmet-builders-0.84.3rc3}/emmet_builders.egg-info/SOURCES.txt +4 -0
- {emmet-builders-0.84.3rc2 → emmet-builders-0.84.3rc3}/requirements/ubuntu-latest_py3.10.txt +13 -13
- {emmet-builders-0.84.3rc2 → emmet-builders-0.84.3rc3}/requirements/ubuntu-latest_py3.10_extras.txt +46 -43
- {emmet-builders-0.84.3rc2 → emmet-builders-0.84.3rc3}/requirements/ubuntu-latest_py3.11.txt +13 -13
- {emmet-builders-0.84.3rc2 → emmet-builders-0.84.3rc3}/requirements/ubuntu-latest_py3.11_extras.txt +46 -43
- emmet-builders-0.84.3rc2/tests/molecules/test_orbitals.py → emmet-builders-0.84.3rc3/tests/molecules/test_electric.py +7 -7
- emmet-builders-0.84.3rc3/tests/molecules/test_orbitals.py +70 -0
- emmet-builders-0.84.3rc3/tests/molecules/test_summary.py +222 -0
- emmet-builders-0.84.3rc3/tests/molecules/test_trajectory.py +50 -0
- emmet-builders-0.84.3rc2/tests/molecules/test_summary.py +0 -132
- {emmet-builders-0.84.3rc2 → emmet-builders-0.84.3rc3}/MANIFEST.in +0 -0
- {emmet-builders-0.84.3rc2 → emmet-builders-0.84.3rc3}/emmet/builders/__init__.py +0 -0
- {emmet-builders-0.84.3rc2 → emmet-builders-0.84.3rc3}/emmet/builders/abinit/__init__.py +0 -0
- {emmet-builders-0.84.3rc2 → emmet-builders-0.84.3rc3}/emmet/builders/abinit/phonon.py +0 -0
- {emmet-builders-0.84.3rc2 → emmet-builders-0.84.3rc3}/emmet/builders/abinit/sound_velocity.py +0 -0
- {emmet-builders-0.84.3rc2 → emmet-builders-0.84.3rc3}/emmet/builders/feff/__init__.py +0 -0
- {emmet-builders-0.84.3rc2 → emmet-builders-0.84.3rc3}/emmet/builders/feff/xas.py +0 -0
- {emmet-builders-0.84.3rc2 → emmet-builders-0.84.3rc3}/emmet/builders/materials/__init__.py +0 -0
- {emmet-builders-0.84.3rc2 → emmet-builders-0.84.3rc3}/emmet/builders/materials/absorption_spectrum.py +0 -0
- {emmet-builders-0.84.3rc2 → emmet-builders-0.84.3rc3}/emmet/builders/materials/alloys.py +0 -0
- {emmet-builders-0.84.3rc2 → emmet-builders-0.84.3rc3}/emmet/builders/materials/basic_descriptors.py +0 -0
- {emmet-builders-0.84.3rc2 → emmet-builders-0.84.3rc3}/emmet/builders/materials/bonds.py +0 -0
- {emmet-builders-0.84.3rc2 → emmet-builders-0.84.3rc3}/emmet/builders/materials/chemenv.py +0 -0
- {emmet-builders-0.84.3rc2 → emmet-builders-0.84.3rc3}/emmet/builders/materials/corrected_entries.py +0 -0
- {emmet-builders-0.84.3rc2 → emmet-builders-0.84.3rc3}/emmet/builders/materials/dielectric.py +0 -0
- {emmet-builders-0.84.3rc2 → emmet-builders-0.84.3rc3}/emmet/builders/materials/elasticity.py +0 -0
- {emmet-builders-0.84.3rc2 → emmet-builders-0.84.3rc3}/emmet/builders/materials/electrodes.py +0 -0
- {emmet-builders-0.84.3rc2 → emmet-builders-0.84.3rc3}/emmet/builders/materials/electronic_structure.py +0 -0
- {emmet-builders-0.84.3rc2 → emmet-builders-0.84.3rc3}/emmet/builders/materials/magnetism.py +0 -0
- {emmet-builders-0.84.3rc2 → emmet-builders-0.84.3rc3}/emmet/builders/materials/ml.py +0 -0
- {emmet-builders-0.84.3rc2 → emmet-builders-0.84.3rc3}/emmet/builders/materials/optimade.py +0 -0
- {emmet-builders-0.84.3rc2 → emmet-builders-0.84.3rc3}/emmet/builders/materials/oxidation_states.py +0 -0
- {emmet-builders-0.84.3rc2 → emmet-builders-0.84.3rc3}/emmet/builders/materials/piezoelectric.py +0 -0
- {emmet-builders-0.84.3rc2 → emmet-builders-0.84.3rc3}/emmet/builders/materials/provenance.py +0 -0
- {emmet-builders-0.84.3rc2 → emmet-builders-0.84.3rc3}/emmet/builders/materials/robocrys.py +0 -0
- {emmet-builders-0.84.3rc2 → emmet-builders-0.84.3rc3}/emmet/builders/materials/similarity.py +0 -0
- {emmet-builders-0.84.3rc2 → emmet-builders-0.84.3rc3}/emmet/builders/materials/substrates.py +0 -0
- {emmet-builders-0.84.3rc2 → emmet-builders-0.84.3rc3}/emmet/builders/materials/summary.py +0 -0
- {emmet-builders-0.84.3rc2 → emmet-builders-0.84.3rc3}/emmet/builders/materials/thermo.py +0 -0
- {emmet-builders-0.84.3rc2 → emmet-builders-0.84.3rc3}/emmet/builders/matscholar/missing_compositions.py +0 -0
- {emmet-builders-0.84.3rc2 → emmet-builders-0.84.3rc3}/emmet/builders/mobility/__init__.py +0 -0
- {emmet-builders-0.84.3rc2 → emmet-builders-0.84.3rc3}/emmet/builders/mobility/migration_graph.py +0 -0
- {emmet-builders-0.84.3rc2 → emmet-builders-0.84.3rc3}/emmet/builders/molecules/__init__.py +0 -0
- {emmet-builders-0.84.3rc2 → emmet-builders-0.84.3rc3}/emmet/builders/qchem/__init__.py +0 -0
- {emmet-builders-0.84.3rc2 → emmet-builders-0.84.3rc3}/emmet/builders/settings.py +0 -0
- {emmet-builders-0.84.3rc2 → emmet-builders-0.84.3rc3}/emmet/builders/utils.py +0 -0
- {emmet-builders-0.84.3rc2 → emmet-builders-0.84.3rc3}/emmet/builders/vasp/__init__.py +0 -0
- {emmet-builders-0.84.3rc2 → emmet-builders-0.84.3rc3}/emmet/builders/vasp/materials.py +0 -0
- {emmet-builders-0.84.3rc2 → emmet-builders-0.84.3rc3}/emmet/builders/vasp/mp_potcar_stats.json.gz +0 -0
- {emmet-builders-0.84.3rc2 → emmet-builders-0.84.3rc3}/emmet/builders/vasp/task_validator.py +0 -0
- {emmet-builders-0.84.3rc2 → emmet-builders-0.84.3rc3}/emmet_builders.egg-info/dependency_links.txt +0 -0
- {emmet-builders-0.84.3rc2 → emmet-builders-0.84.3rc3}/emmet_builders.egg-info/not-zip-safe +0 -0
- {emmet-builders-0.84.3rc2 → emmet-builders-0.84.3rc3}/emmet_builders.egg-info/requires.txt +0 -0
- {emmet-builders-0.84.3rc2 → emmet-builders-0.84.3rc3}/emmet_builders.egg-info/top_level.txt +0 -0
- {emmet-builders-0.84.3rc2 → emmet-builders-0.84.3rc3}/requirements/deployment.txt +0 -0
- {emmet-builders-0.84.3rc2 → emmet-builders-0.84.3rc3}/setup.cfg +0 -0
- {emmet-builders-0.84.3rc2 → emmet-builders-0.84.3rc3}/setup.py +0 -0
- {emmet-builders-0.84.3rc2 → emmet-builders-0.84.3rc3}/tests/__init__.py +0 -0
- {emmet-builders-0.84.3rc2 → emmet-builders-0.84.3rc3}/tests/conftest.py +0 -0
- {emmet-builders-0.84.3rc2 → emmet-builders-0.84.3rc3}/tests/molecules/__init__.py +0 -0
- {emmet-builders-0.84.3rc2 → emmet-builders-0.84.3rc3}/tests/molecules/test_atomic.py +0 -0
- {emmet-builders-0.84.3rc2 → emmet-builders-0.84.3rc3}/tests/molecules/test_bonds.py +0 -0
- {emmet-builders-0.84.3rc2 → emmet-builders-0.84.3rc3}/tests/molecules/test_metal_binding.py +0 -0
- {emmet-builders-0.84.3rc2 → emmet-builders-0.84.3rc3}/tests/molecules/test_redox.py +0 -0
- {emmet-builders-0.84.3rc2 → emmet-builders-0.84.3rc3}/tests/molecules/test_thermo.py +0 -0
- {emmet-builders-0.84.3rc2 → emmet-builders-0.84.3rc3}/tests/molecules/test_vibration.py +0 -0
- {emmet-builders-0.84.3rc2 → emmet-builders-0.84.3rc3}/tests/test_absorption.py +0 -0
- {emmet-builders-0.84.3rc2 → emmet-builders-0.84.3rc3}/tests/test_basic_descriptors.py +0 -0
- {emmet-builders-0.84.3rc2 → emmet-builders-0.84.3rc3}/tests/test_chemenv.py +0 -0
- {emmet-builders-0.84.3rc2 → emmet-builders-0.84.3rc3}/tests/test_corrected_entries_thermo.py +0 -0
- {emmet-builders-0.84.3rc2 → emmet-builders-0.84.3rc3}/tests/test_dielectric.py +0 -0
- {emmet-builders-0.84.3rc2 → emmet-builders-0.84.3rc3}/tests/test_elasticity.py +0 -0
- {emmet-builders-0.84.3rc2 → emmet-builders-0.84.3rc3}/tests/test_electronic_structure.py +0 -0
- {emmet-builders-0.84.3rc2 → emmet-builders-0.84.3rc3}/tests/test_magnetism.py +0 -0
- {emmet-builders-0.84.3rc2 → emmet-builders-0.84.3rc3}/tests/test_materials.py +0 -0
- {emmet-builders-0.84.3rc2 → emmet-builders-0.84.3rc3}/tests/test_ml.py +0 -0
- {emmet-builders-0.84.3rc2 → emmet-builders-0.84.3rc3}/tests/test_mobility.py +0 -0
- {emmet-builders-0.84.3rc2 → emmet-builders-0.84.3rc3}/tests/test_oxidation.py +0 -0
- {emmet-builders-0.84.3rc2 → emmet-builders-0.84.3rc3}/tests/test_piezoelectric.py +0 -0
- {emmet-builders-0.84.3rc2 → emmet-builders-0.84.3rc3}/tests/test_qchem.py +0 -0
- {emmet-builders-0.84.3rc2 → emmet-builders-0.84.3rc3}/tests/test_similarity.py +0 -0
- {emmet-builders-0.84.3rc2 → emmet-builders-0.84.3rc3}/tests/test_summary.py +0 -0
- {emmet-builders-0.84.3rc2 → emmet-builders-0.84.3rc3}/tests/test_utils.py +0 -0
- {emmet-builders-0.84.3rc2 → emmet-builders-0.84.3rc3}/tests/test_vasp.py +0 -0
|
@@ -41,7 +41,7 @@ class PartialChargesBuilder(Builder):
|
|
|
41
41
|
energy) will be used.
|
|
42
42
|
|
|
43
43
|
The process is as follows:
|
|
44
|
-
1. Gather MoleculeDocs by
|
|
44
|
+
1. Gather MoleculeDocs by species hash
|
|
45
45
|
2. For each molecule, group all tasks by solvent.
|
|
46
46
|
3. For each solvent, sort tasks by level of theory and electronic energy
|
|
47
47
|
4. For each method:
|
|
@@ -86,12 +86,14 @@ class PartialChargesBuilder(Builder):
|
|
|
86
86
|
self.tasks.ensure_index("last_updated")
|
|
87
87
|
self.tasks.ensure_index("state")
|
|
88
88
|
self.tasks.ensure_index("formula_alphabetical")
|
|
89
|
+
self.tasks.ensure_index("species_hash")
|
|
89
90
|
|
|
90
91
|
# Search index for molecules
|
|
91
92
|
self.molecules.ensure_index("molecule_id")
|
|
92
93
|
self.molecules.ensure_index("last_updated")
|
|
93
94
|
self.molecules.ensure_index("task_ids")
|
|
94
95
|
self.molecules.ensure_index("formula_alphabetical")
|
|
96
|
+
self.molecules.ensure_index("species_hash")
|
|
95
97
|
|
|
96
98
|
# Search index for charges
|
|
97
99
|
self.charges.ensure_index("molecule_id")
|
|
@@ -111,23 +113,23 @@ class PartialChargesBuilder(Builder):
|
|
|
111
113
|
|
|
112
114
|
self.logger.info("Finding documents to process")
|
|
113
115
|
all_mols = list(
|
|
114
|
-
self.molecules.query(
|
|
115
|
-
temp_query, [self.molecules.key, "formula_alphabetical"]
|
|
116
|
-
)
|
|
116
|
+
self.molecules.query(temp_query, [self.molecules.key, "species_hash"])
|
|
117
117
|
)
|
|
118
118
|
|
|
119
119
|
processed_docs = set([e for e in self.charges.distinct("molecule_id")])
|
|
120
120
|
to_process_docs = {d[self.molecules.key] for d in all_mols} - processed_docs
|
|
121
|
-
|
|
122
|
-
d["
|
|
121
|
+
to_process_hashes = {
|
|
122
|
+
d["species_hash"]
|
|
123
123
|
for d in all_mols
|
|
124
124
|
if d[self.molecules.key] in to_process_docs
|
|
125
125
|
}
|
|
126
126
|
|
|
127
|
-
N = ceil(len(
|
|
127
|
+
N = ceil(len(to_process_hashes) / number_splits)
|
|
128
128
|
|
|
129
|
-
for
|
|
130
|
-
|
|
129
|
+
for hash_chunk in grouper(to_process_hashes, N):
|
|
130
|
+
query = dict(temp_query)
|
|
131
|
+
query["species_hash"] = {"$in": list(hash_chunk)}
|
|
132
|
+
yield {"query": query}
|
|
131
133
|
|
|
132
134
|
def get_items(self) -> Iterator[List[Dict]]:
|
|
133
135
|
"""
|
|
@@ -152,28 +154,26 @@ class PartialChargesBuilder(Builder):
|
|
|
152
154
|
|
|
153
155
|
self.logger.info("Finding documents to process")
|
|
154
156
|
all_mols = list(
|
|
155
|
-
self.molecules.query(
|
|
156
|
-
temp_query, [self.molecules.key, "formula_alphabetical"]
|
|
157
|
-
)
|
|
157
|
+
self.molecules.query(temp_query, [self.molecules.key, "species_hash"])
|
|
158
158
|
)
|
|
159
159
|
|
|
160
160
|
processed_docs = set([e for e in self.charges.distinct("molecule_id")])
|
|
161
161
|
to_process_docs = {d[self.molecules.key] for d in all_mols} - processed_docs
|
|
162
|
-
|
|
163
|
-
d["
|
|
162
|
+
to_process_hashes = {
|
|
163
|
+
d["species_hash"]
|
|
164
164
|
for d in all_mols
|
|
165
165
|
if d[self.molecules.key] in to_process_docs
|
|
166
166
|
}
|
|
167
167
|
|
|
168
168
|
self.logger.info(f"Found {len(to_process_docs)} unprocessed documents")
|
|
169
|
-
self.logger.info(f"Found {len(
|
|
169
|
+
self.logger.info(f"Found {len(to_process_hashes)} unprocessed hashes")
|
|
170
170
|
|
|
171
171
|
# Set total for builder bars to have a total
|
|
172
|
-
self.total = len(
|
|
172
|
+
self.total = len(to_process_hashes)
|
|
173
173
|
|
|
174
|
-
for
|
|
174
|
+
for shash in to_process_hashes:
|
|
175
175
|
mol_query = dict(temp_query)
|
|
176
|
-
mol_query["
|
|
176
|
+
mol_query["species_hash"] = shash
|
|
177
177
|
molecules = list(self.molecules.query(criteria=mol_query))
|
|
178
178
|
|
|
179
179
|
yield molecules
|
|
@@ -190,9 +190,9 @@ class PartialChargesBuilder(Builder):
|
|
|
190
190
|
"""
|
|
191
191
|
|
|
192
192
|
mols = [MoleculeDoc(**item) for item in items]
|
|
193
|
-
|
|
193
|
+
shash = mols[0].species_hash
|
|
194
194
|
mol_ids = [m.molecule_id for m in mols]
|
|
195
|
-
self.logger.debug(f"Processing {
|
|
195
|
+
self.logger.debug(f"Processing {shash} : {mol_ids}")
|
|
196
196
|
|
|
197
197
|
charges_docs = list()
|
|
198
198
|
|
|
@@ -237,7 +237,7 @@ class PartialChargesBuilder(Builder):
|
|
|
237
237
|
tdoc = self.tasks.query_one(
|
|
238
238
|
{
|
|
239
239
|
"task_id": task,
|
|
240
|
-
"
|
|
240
|
+
"species_hash": shash,
|
|
241
241
|
"orig": {"$exists": True},
|
|
242
242
|
}
|
|
243
243
|
)
|
|
@@ -247,7 +247,7 @@ class PartialChargesBuilder(Builder):
|
|
|
247
247
|
tdoc = self.tasks.query_one(
|
|
248
248
|
{
|
|
249
249
|
"task_id": int(task),
|
|
250
|
-
"
|
|
250
|
+
"species_hash": shash,
|
|
251
251
|
"orig": {"$exists": True},
|
|
252
252
|
}
|
|
253
253
|
)
|
|
@@ -271,7 +271,7 @@ class PartialChargesBuilder(Builder):
|
|
|
271
271
|
|
|
272
272
|
charges_docs.append(doc)
|
|
273
273
|
|
|
274
|
-
self.logger.debug(f"Produced {len(charges_docs)} charges docs for {
|
|
274
|
+
self.logger.debug(f"Produced {len(charges_docs)} charges docs for {shash}")
|
|
275
275
|
|
|
276
276
|
return jsanitize([doc.model_dump() for doc in charges_docs], allow_bson=True)
|
|
277
277
|
|
|
@@ -320,7 +320,7 @@ class PartialSpinsBuilder(Builder):
|
|
|
320
320
|
data available (based on level of theory and electronic energy) will be used.
|
|
321
321
|
|
|
322
322
|
The process is as follows:
|
|
323
|
-
1. Gather MoleculeDocs by
|
|
323
|
+
1. Gather MoleculeDocs by species_hash
|
|
324
324
|
2. For each molecule, group all tasks by solvent.
|
|
325
325
|
3. For each solvent, sort tasks by level of theory and electronic energy
|
|
326
326
|
4. For each method:
|
|
@@ -365,12 +365,14 @@ class PartialSpinsBuilder(Builder):
|
|
|
365
365
|
self.tasks.ensure_index("last_updated")
|
|
366
366
|
self.tasks.ensure_index("state")
|
|
367
367
|
self.tasks.ensure_index("formula_alphabetical")
|
|
368
|
+
self.tasks.ensure_index("species_hash")
|
|
368
369
|
|
|
369
370
|
# Search index for molecules
|
|
370
371
|
self.molecules.ensure_index("molecule_id")
|
|
371
372
|
self.molecules.ensure_index("last_updated")
|
|
372
373
|
self.molecules.ensure_index("task_ids")
|
|
373
374
|
self.molecules.ensure_index("formula_alphabetical")
|
|
375
|
+
self.molecules.ensure_index("species_hash")
|
|
374
376
|
|
|
375
377
|
# Search index for spins
|
|
376
378
|
self.spins.ensure_index("molecule_id")
|
|
@@ -390,23 +392,23 @@ class PartialSpinsBuilder(Builder):
|
|
|
390
392
|
|
|
391
393
|
self.logger.info("Finding documents to process")
|
|
392
394
|
all_mols = list(
|
|
393
|
-
self.molecules.query(
|
|
394
|
-
temp_query, [self.molecules.key, "formula_alphabetical"]
|
|
395
|
-
)
|
|
395
|
+
self.molecules.query(temp_query, [self.molecules.key, "species_hash"])
|
|
396
396
|
)
|
|
397
397
|
|
|
398
398
|
processed_docs = set([e for e in self.spins.distinct("molecule_id")])
|
|
399
399
|
to_process_docs = {d[self.molecules.key] for d in all_mols} - processed_docs
|
|
400
|
-
|
|
401
|
-
d["
|
|
400
|
+
to_process_hashes = {
|
|
401
|
+
d["species_hash"]
|
|
402
402
|
for d in all_mols
|
|
403
403
|
if d[self.molecules.key] in to_process_docs
|
|
404
404
|
}
|
|
405
405
|
|
|
406
|
-
N = ceil(len(
|
|
406
|
+
N = ceil(len(to_process_hashes) / number_splits)
|
|
407
407
|
|
|
408
|
-
for
|
|
409
|
-
|
|
408
|
+
for hash_chunk in grouper(to_process_hashes, N):
|
|
409
|
+
query = dict(temp_query)
|
|
410
|
+
query["species_hash"] = {"$in": list(hash_chunk)}
|
|
411
|
+
yield {"query": query}
|
|
410
412
|
|
|
411
413
|
def get_items(self) -> Iterator[List[Dict]]:
|
|
412
414
|
"""
|
|
@@ -431,28 +433,26 @@ class PartialSpinsBuilder(Builder):
|
|
|
431
433
|
|
|
432
434
|
self.logger.info("Finding documents to process")
|
|
433
435
|
all_mols = list(
|
|
434
|
-
self.molecules.query(
|
|
435
|
-
temp_query, [self.molecules.key, "formula_alphabetical"]
|
|
436
|
-
)
|
|
436
|
+
self.molecules.query(temp_query, [self.molecules.key, "species_hash"])
|
|
437
437
|
)
|
|
438
438
|
|
|
439
439
|
processed_docs = set([e for e in self.spins.distinct("molecule_id")])
|
|
440
440
|
to_process_docs = {d[self.molecules.key] for d in all_mols} - processed_docs
|
|
441
|
-
|
|
442
|
-
d["
|
|
441
|
+
to_process_hashes = {
|
|
442
|
+
d["species_hash"]
|
|
443
443
|
for d in all_mols
|
|
444
444
|
if d[self.molecules.key] in to_process_docs
|
|
445
445
|
}
|
|
446
446
|
|
|
447
447
|
self.logger.info(f"Found {len(to_process_docs)} unprocessed documents")
|
|
448
|
-
self.logger.info(f"Found {len(
|
|
448
|
+
self.logger.info(f"Found {len(to_process_hashes)} unprocessed hashes")
|
|
449
449
|
|
|
450
450
|
# Set total for builder bars to have a total
|
|
451
|
-
self.total = len(
|
|
451
|
+
self.total = len(to_process_hashes)
|
|
452
452
|
|
|
453
|
-
for
|
|
453
|
+
for shash in to_process_hashes:
|
|
454
454
|
mol_query = dict(temp_query)
|
|
455
|
-
mol_query["
|
|
455
|
+
mol_query["species_hash"] = shash
|
|
456
456
|
molecules = list(self.molecules.query(criteria=mol_query))
|
|
457
457
|
|
|
458
458
|
yield molecules
|
|
@@ -469,9 +469,9 @@ class PartialSpinsBuilder(Builder):
|
|
|
469
469
|
"""
|
|
470
470
|
|
|
471
471
|
mols = [MoleculeDoc(**item) for item in items]
|
|
472
|
-
|
|
472
|
+
shash = mols[0].species_hash
|
|
473
473
|
mol_ids = [m.molecule_id for m in mols]
|
|
474
|
-
self.logger.debug(f"Processing {
|
|
474
|
+
self.logger.debug(f"Processing {shash} : {mol_ids}")
|
|
475
475
|
|
|
476
476
|
spins_docs = list()
|
|
477
477
|
|
|
@@ -520,7 +520,7 @@ class PartialSpinsBuilder(Builder):
|
|
|
520
520
|
tdoc = self.tasks.query_one(
|
|
521
521
|
{
|
|
522
522
|
"task_id": task,
|
|
523
|
-
"
|
|
523
|
+
"species_hash": shash,
|
|
524
524
|
"orig": {"$exists": True},
|
|
525
525
|
}
|
|
526
526
|
)
|
|
@@ -530,7 +530,7 @@ class PartialSpinsBuilder(Builder):
|
|
|
530
530
|
tdoc = self.tasks.query_one(
|
|
531
531
|
{
|
|
532
532
|
"task_id": int(task),
|
|
533
|
-
"
|
|
533
|
+
"species_hash": shash,
|
|
534
534
|
"orig": {"$exists": True},
|
|
535
535
|
}
|
|
536
536
|
)
|
|
@@ -551,9 +551,7 @@ class PartialSpinsBuilder(Builder):
|
|
|
551
551
|
|
|
552
552
|
spins_docs.append(doc)
|
|
553
553
|
|
|
554
|
-
self.logger.debug(
|
|
555
|
-
f"Produced {len(spins_docs)} partial spins docs for {formula}"
|
|
556
|
-
)
|
|
554
|
+
self.logger.debug(f"Produced {len(spins_docs)} partial spins docs for {shash}")
|
|
557
555
|
|
|
558
556
|
return jsanitize([doc.model_dump() for doc in spins_docs], allow_bson=True)
|
|
559
557
|
|
|
@@ -40,7 +40,7 @@ class BondingBuilder(Builder):
|
|
|
40
40
|
data available (based on level of theory and electronic energy) will be used.
|
|
41
41
|
|
|
42
42
|
The process is as follows:
|
|
43
|
-
1. Gather MoleculeDocs by
|
|
43
|
+
1. Gather MoleculeDocs by species hash
|
|
44
44
|
2. For each molecule, group all tasks by solvent.
|
|
45
45
|
3. For each solvent, sort tasks by level of theory and electronic energy
|
|
46
46
|
4. For each method:
|
|
@@ -85,12 +85,14 @@ class BondingBuilder(Builder):
|
|
|
85
85
|
self.tasks.ensure_index("last_updated")
|
|
86
86
|
self.tasks.ensure_index("state")
|
|
87
87
|
self.tasks.ensure_index("formula_alphabetical")
|
|
88
|
+
self.tasks.ensure_index("species_hash")
|
|
88
89
|
|
|
89
90
|
# Search index for molecules
|
|
90
91
|
self.molecules.ensure_index("molecule_id")
|
|
91
92
|
self.molecules.ensure_index("last_updated")
|
|
92
93
|
self.molecules.ensure_index("task_ids")
|
|
93
94
|
self.molecules.ensure_index("formula_alphabetical")
|
|
95
|
+
self.molecules.ensure_index("species_hash")
|
|
94
96
|
|
|
95
97
|
# Search index for bonds
|
|
96
98
|
self.bonds.ensure_index("molecule_id")
|
|
@@ -110,23 +112,23 @@ class BondingBuilder(Builder):
|
|
|
110
112
|
|
|
111
113
|
self.logger.info("Finding documents to process")
|
|
112
114
|
all_mols = list(
|
|
113
|
-
self.molecules.query(
|
|
114
|
-
temp_query, [self.molecules.key, "formula_alphabetical"]
|
|
115
|
-
)
|
|
115
|
+
self.molecules.query(temp_query, [self.molecules.key, "species_hash"])
|
|
116
116
|
)
|
|
117
117
|
|
|
118
118
|
processed_docs = set([e for e in self.bonds.distinct("molecule_id")])
|
|
119
119
|
to_process_docs = {d[self.molecules.key] for d in all_mols} - processed_docs
|
|
120
|
-
|
|
121
|
-
d["
|
|
120
|
+
to_process_hashes = {
|
|
121
|
+
d["species_hash"]
|
|
122
122
|
for d in all_mols
|
|
123
123
|
if d[self.molecules.key] in to_process_docs
|
|
124
124
|
}
|
|
125
125
|
|
|
126
|
-
N = ceil(len(
|
|
126
|
+
N = ceil(len(to_process_hashes) / number_splits)
|
|
127
127
|
|
|
128
|
-
for
|
|
129
|
-
|
|
128
|
+
for hash_chunk in grouper(to_process_hashes, N):
|
|
129
|
+
query = dict(temp_query)
|
|
130
|
+
query["species_hash"] = {"$in": list(hash_chunk)}
|
|
131
|
+
yield {"query": query}
|
|
130
132
|
|
|
131
133
|
def get_items(self) -> Iterator[List[Dict]]:
|
|
132
134
|
"""
|
|
@@ -151,28 +153,26 @@ class BondingBuilder(Builder):
|
|
|
151
153
|
|
|
152
154
|
self.logger.info("Finding documents to process")
|
|
153
155
|
all_mols = list(
|
|
154
|
-
self.molecules.query(
|
|
155
|
-
temp_query, [self.molecules.key, "formula_alphabetical"]
|
|
156
|
-
)
|
|
156
|
+
self.molecules.query(temp_query, [self.molecules.key, "species_hash"])
|
|
157
157
|
)
|
|
158
158
|
|
|
159
159
|
processed_docs = set([e for e in self.bonds.distinct("molecule_id")])
|
|
160
160
|
to_process_docs = {d[self.molecules.key] for d in all_mols} - processed_docs
|
|
161
|
-
|
|
162
|
-
d["
|
|
161
|
+
to_process_hashes = {
|
|
162
|
+
d["species_hash"]
|
|
163
163
|
for d in all_mols
|
|
164
164
|
if d[self.molecules.key] in to_process_docs
|
|
165
165
|
}
|
|
166
166
|
|
|
167
167
|
self.logger.info(f"Found {len(to_process_docs)} unprocessed documents")
|
|
168
|
-
self.logger.info(f"Found {len(
|
|
168
|
+
self.logger.info(f"Found {len(to_process_hashes)} unprocessed hashes")
|
|
169
169
|
|
|
170
170
|
# Set total for builder bars to have a total
|
|
171
|
-
self.total = len(
|
|
171
|
+
self.total = len(to_process_hashes)
|
|
172
172
|
|
|
173
|
-
for
|
|
173
|
+
for shash in to_process_hashes:
|
|
174
174
|
mol_query = dict(temp_query)
|
|
175
|
-
mol_query["
|
|
175
|
+
mol_query["species_hash"] = shash
|
|
176
176
|
molecules = list(self.molecules.query(criteria=mol_query))
|
|
177
177
|
|
|
178
178
|
yield molecules
|
|
@@ -189,9 +189,9 @@ class BondingBuilder(Builder):
|
|
|
189
189
|
"""
|
|
190
190
|
|
|
191
191
|
mols = [MoleculeDoc(**item) for item in items]
|
|
192
|
-
|
|
192
|
+
shash = mols[0].species_hash
|
|
193
193
|
mol_ids = [m.molecule_id for m in mols]
|
|
194
|
-
self.logger.debug(f"Processing {
|
|
194
|
+
self.logger.debug(f"Processing {shash} : {mol_ids}")
|
|
195
195
|
|
|
196
196
|
bonding_docs = list()
|
|
197
197
|
|
|
@@ -255,7 +255,7 @@ class BondingBuilder(Builder):
|
|
|
255
255
|
tdoc = self.tasks.query_one(
|
|
256
256
|
{
|
|
257
257
|
"task_id": task,
|
|
258
|
-
"
|
|
258
|
+
"species_hash": shash,
|
|
259
259
|
"orig": {"$exists": True},
|
|
260
260
|
}
|
|
261
261
|
)
|
|
@@ -265,7 +265,7 @@ class BondingBuilder(Builder):
|
|
|
265
265
|
tdoc = self.tasks.query_one(
|
|
266
266
|
{
|
|
267
267
|
"task_id": int(task),
|
|
268
|
-
"
|
|
268
|
+
"species_hash": shash,
|
|
269
269
|
"orig": {"$exists": True},
|
|
270
270
|
}
|
|
271
271
|
)
|
|
@@ -288,13 +288,13 @@ class BondingBuilder(Builder):
|
|
|
288
288
|
)
|
|
289
289
|
bonding_docs.append(doc)
|
|
290
290
|
|
|
291
|
-
self.logger.debug(f"Produced {len(bonding_docs)} bonding docs for {
|
|
291
|
+
self.logger.debug(f"Produced {len(bonding_docs)} bonding docs for {shash}")
|
|
292
292
|
|
|
293
293
|
return jsanitize([doc.model_dump() for doc in bonding_docs], allow_bson=True)
|
|
294
294
|
|
|
295
295
|
def update_targets(self, items: List[List[Dict]]):
|
|
296
296
|
"""
|
|
297
|
-
Inserts the new documents into the
|
|
297
|
+
Inserts the new documents into the bonds collection
|
|
298
298
|
|
|
299
299
|
Args:
|
|
300
300
|
items [[dict]]: A list of documents to update
|