emmet-builders 0.84.2rc8__tar.gz → 0.84.2rc9__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of emmet-builders might be problematic. Click here for more details.
- {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/PKG-INFO +1 -1
- {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/emmet/builders/molecules/atomic.py +48 -46
- {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/emmet/builders/molecules/bonds.py +24 -24
- {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/emmet/builders/molecules/metal_binding.py +21 -20
- {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/emmet/builders/molecules/orbitals.py +23 -23
- {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/emmet/builders/molecules/redox.py +27 -27
- {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/emmet/builders/molecules/summary.py +21 -36
- {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/emmet/builders/molecules/thermo.py +23 -23
- {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/emmet/builders/molecules/vibration.py +23 -23
- {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/emmet/builders/qchem/molecules.py +15 -21
- {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/emmet_builders.egg-info/PKG-INFO +1 -1
- {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/emmet_builders.egg-info/SOURCES.txt +0 -4
- emmet-builders-0.84.2rc8/tests/molecules/test_electric.py → emmet-builders-0.84.2rc9/tests/molecules/test_orbitals.py +7 -7
- emmet-builders-0.84.2rc9/tests/molecules/test_summary.py +132 -0
- emmet-builders-0.84.2rc8/emmet/builders/molecules/electric.py +0 -282
- emmet-builders-0.84.2rc8/emmet/builders/molecules/trajectory.py +0 -525
- emmet-builders-0.84.2rc8/tests/molecules/test_orbitals.py +0 -70
- emmet-builders-0.84.2rc8/tests/molecules/test_summary.py +0 -222
- emmet-builders-0.84.2rc8/tests/molecules/test_trajectory.py +0 -50
- {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/MANIFEST.in +0 -0
- {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/emmet/builders/__init__.py +0 -0
- {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/emmet/builders/abinit/__init__.py +0 -0
- {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/emmet/builders/abinit/phonon.py +0 -0
- {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/emmet/builders/abinit/sound_velocity.py +0 -0
- {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/emmet/builders/feff/__init__.py +0 -0
- {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/emmet/builders/feff/xas.py +0 -0
- {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/emmet/builders/materials/__init__.py +0 -0
- {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/emmet/builders/materials/absorption_spectrum.py +0 -0
- {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/emmet/builders/materials/alloys.py +0 -0
- {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/emmet/builders/materials/basic_descriptors.py +0 -0
- {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/emmet/builders/materials/bonds.py +0 -0
- {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/emmet/builders/materials/chemenv.py +0 -0
- {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/emmet/builders/materials/corrected_entries.py +0 -0
- {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/emmet/builders/materials/dielectric.py +0 -0
- {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/emmet/builders/materials/elasticity.py +0 -0
- {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/emmet/builders/materials/electrodes.py +0 -0
- {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/emmet/builders/materials/electronic_structure.py +0 -0
- {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/emmet/builders/materials/magnetism.py +0 -0
- {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/emmet/builders/materials/ml.py +0 -0
- {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/emmet/builders/materials/optimade.py +0 -0
- {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/emmet/builders/materials/oxidation_states.py +0 -0
- {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/emmet/builders/materials/piezoelectric.py +0 -0
- {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/emmet/builders/materials/provenance.py +0 -0
- {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/emmet/builders/materials/robocrys.py +0 -0
- {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/emmet/builders/materials/similarity.py +0 -0
- {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/emmet/builders/materials/substrates.py +0 -0
- {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/emmet/builders/materials/summary.py +0 -0
- {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/emmet/builders/materials/thermo.py +0 -0
- {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/emmet/builders/matscholar/missing_compositions.py +0 -0
- {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/emmet/builders/mobility/__init__.py +0 -0
- {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/emmet/builders/mobility/migration_graph.py +0 -0
- {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/emmet/builders/molecules/__init__.py +0 -0
- {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/emmet/builders/qchem/__init__.py +0 -0
- {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/emmet/builders/settings.py +0 -0
- {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/emmet/builders/utils.py +0 -0
- {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/emmet/builders/vasp/__init__.py +0 -0
- {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/emmet/builders/vasp/materials.py +0 -0
- {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/emmet/builders/vasp/mp_potcar_stats.json.gz +0 -0
- {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/emmet/builders/vasp/task_validator.py +0 -0
- {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/emmet_builders.egg-info/dependency_links.txt +0 -0
- {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/emmet_builders.egg-info/not-zip-safe +0 -0
- {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/emmet_builders.egg-info/requires.txt +0 -0
- {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/emmet_builders.egg-info/top_level.txt +0 -0
- {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/requirements/deployment.txt +0 -0
- {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/requirements/ubuntu-latest_py3.10.txt +0 -0
- {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/requirements/ubuntu-latest_py3.10_extras.txt +0 -0
- {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/requirements/ubuntu-latest_py3.11.txt +0 -0
- {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/requirements/ubuntu-latest_py3.11_extras.txt +0 -0
- {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/setup.cfg +0 -0
- {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/setup.py +0 -0
- {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/tests/__init__.py +0 -0
- {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/tests/conftest.py +0 -0
- {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/tests/molecules/__init__.py +0 -0
- {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/tests/molecules/test_atomic.py +0 -0
- {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/tests/molecules/test_bonds.py +0 -0
- {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/tests/molecules/test_metal_binding.py +0 -0
- {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/tests/molecules/test_redox.py +0 -0
- {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/tests/molecules/test_thermo.py +0 -0
- {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/tests/molecules/test_vibration.py +0 -0
- {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/tests/test_absorption.py +0 -0
- {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/tests/test_basic_descriptors.py +0 -0
- {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/tests/test_chemenv.py +0 -0
- {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/tests/test_corrected_entries_thermo.py +0 -0
- {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/tests/test_dielectric.py +0 -0
- {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/tests/test_elasticity.py +0 -0
- {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/tests/test_electronic_structure.py +0 -0
- {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/tests/test_magnetism.py +0 -0
- {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/tests/test_materials.py +0 -0
- {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/tests/test_ml.py +0 -0
- {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/tests/test_mobility.py +0 -0
- {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/tests/test_oxidation.py +0 -0
- {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/tests/test_piezoelectric.py +0 -0
- {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/tests/test_qchem.py +0 -0
- {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/tests/test_similarity.py +0 -0
- {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/tests/test_summary.py +0 -0
- {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/tests/test_utils.py +0 -0
- {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/tests/test_vasp.py +0 -0
|
@@ -41,7 +41,7 @@ class PartialChargesBuilder(Builder):
|
|
|
41
41
|
energy) will be used.
|
|
42
42
|
|
|
43
43
|
The process is as follows:
|
|
44
|
-
1. Gather MoleculeDocs by
|
|
44
|
+
1. Gather MoleculeDocs by formula
|
|
45
45
|
2. For each molecule, group all tasks by solvent.
|
|
46
46
|
3. For each solvent, sort tasks by level of theory and electronic energy
|
|
47
47
|
4. For each method:
|
|
@@ -86,14 +86,12 @@ class PartialChargesBuilder(Builder):
|
|
|
86
86
|
self.tasks.ensure_index("last_updated")
|
|
87
87
|
self.tasks.ensure_index("state")
|
|
88
88
|
self.tasks.ensure_index("formula_alphabetical")
|
|
89
|
-
self.tasks.ensure_index("species_hash")
|
|
90
89
|
|
|
91
90
|
# Search index for molecules
|
|
92
91
|
self.molecules.ensure_index("molecule_id")
|
|
93
92
|
self.molecules.ensure_index("last_updated")
|
|
94
93
|
self.molecules.ensure_index("task_ids")
|
|
95
94
|
self.molecules.ensure_index("formula_alphabetical")
|
|
96
|
-
self.molecules.ensure_index("species_hash")
|
|
97
95
|
|
|
98
96
|
# Search index for charges
|
|
99
97
|
self.charges.ensure_index("molecule_id")
|
|
@@ -113,23 +111,23 @@ class PartialChargesBuilder(Builder):
|
|
|
113
111
|
|
|
114
112
|
self.logger.info("Finding documents to process")
|
|
115
113
|
all_mols = list(
|
|
116
|
-
self.molecules.query(
|
|
114
|
+
self.molecules.query(
|
|
115
|
+
temp_query, [self.molecules.key, "formula_alphabetical"]
|
|
116
|
+
)
|
|
117
117
|
)
|
|
118
118
|
|
|
119
119
|
processed_docs = set([e for e in self.charges.distinct("molecule_id")])
|
|
120
120
|
to_process_docs = {d[self.molecules.key] for d in all_mols} - processed_docs
|
|
121
|
-
|
|
122
|
-
d["
|
|
121
|
+
to_process_forms = {
|
|
122
|
+
d["formula_alphabetical"]
|
|
123
123
|
for d in all_mols
|
|
124
124
|
if d[self.molecules.key] in to_process_docs
|
|
125
125
|
}
|
|
126
126
|
|
|
127
|
-
N = ceil(len(
|
|
127
|
+
N = ceil(len(to_process_forms) / number_splits)
|
|
128
128
|
|
|
129
|
-
for
|
|
130
|
-
query
|
|
131
|
-
query["species_hash"] = {"$in": list(hash_chunk)}
|
|
132
|
-
yield {"query": query}
|
|
129
|
+
for formula_chunk in grouper(to_process_forms, N):
|
|
130
|
+
yield {"query": {"formula_alphabetical": {"$in": list(formula_chunk)}}}
|
|
133
131
|
|
|
134
132
|
def get_items(self) -> Iterator[List[Dict]]:
|
|
135
133
|
"""
|
|
@@ -154,26 +152,28 @@ class PartialChargesBuilder(Builder):
|
|
|
154
152
|
|
|
155
153
|
self.logger.info("Finding documents to process")
|
|
156
154
|
all_mols = list(
|
|
157
|
-
self.molecules.query(
|
|
155
|
+
self.molecules.query(
|
|
156
|
+
temp_query, [self.molecules.key, "formula_alphabetical"]
|
|
157
|
+
)
|
|
158
158
|
)
|
|
159
159
|
|
|
160
160
|
processed_docs = set([e for e in self.charges.distinct("molecule_id")])
|
|
161
161
|
to_process_docs = {d[self.molecules.key] for d in all_mols} - processed_docs
|
|
162
|
-
|
|
163
|
-
d["
|
|
162
|
+
to_process_forms = {
|
|
163
|
+
d["formula_alphabetical"]
|
|
164
164
|
for d in all_mols
|
|
165
165
|
if d[self.molecules.key] in to_process_docs
|
|
166
166
|
}
|
|
167
167
|
|
|
168
168
|
self.logger.info(f"Found {len(to_process_docs)} unprocessed documents")
|
|
169
|
-
self.logger.info(f"Found {len(
|
|
169
|
+
self.logger.info(f"Found {len(to_process_forms)} unprocessed formulas")
|
|
170
170
|
|
|
171
171
|
# Set total for builder bars to have a total
|
|
172
|
-
self.total = len(
|
|
172
|
+
self.total = len(to_process_forms)
|
|
173
173
|
|
|
174
|
-
for
|
|
174
|
+
for formula in to_process_forms:
|
|
175
175
|
mol_query = dict(temp_query)
|
|
176
|
-
mol_query["
|
|
176
|
+
mol_query["formula_alphabetical"] = formula
|
|
177
177
|
molecules = list(self.molecules.query(criteria=mol_query))
|
|
178
178
|
|
|
179
179
|
yield molecules
|
|
@@ -190,9 +190,9 @@ class PartialChargesBuilder(Builder):
|
|
|
190
190
|
"""
|
|
191
191
|
|
|
192
192
|
mols = [MoleculeDoc(**item) for item in items]
|
|
193
|
-
|
|
193
|
+
formula = mols[0].formula_alphabetical
|
|
194
194
|
mol_ids = [m.molecule_id for m in mols]
|
|
195
|
-
self.logger.debug(f"Processing {
|
|
195
|
+
self.logger.debug(f"Processing {formula} : {mol_ids}")
|
|
196
196
|
|
|
197
197
|
charges_docs = list()
|
|
198
198
|
|
|
@@ -237,7 +237,7 @@ class PartialChargesBuilder(Builder):
|
|
|
237
237
|
tdoc = self.tasks.query_one(
|
|
238
238
|
{
|
|
239
239
|
"task_id": task,
|
|
240
|
-
"
|
|
240
|
+
"formula_alphabetical": formula,
|
|
241
241
|
"orig": {"$exists": True},
|
|
242
242
|
}
|
|
243
243
|
)
|
|
@@ -247,7 +247,7 @@ class PartialChargesBuilder(Builder):
|
|
|
247
247
|
tdoc = self.tasks.query_one(
|
|
248
248
|
{
|
|
249
249
|
"task_id": int(task),
|
|
250
|
-
"
|
|
250
|
+
"formula_alphabetical": formula,
|
|
251
251
|
"orig": {"$exists": True},
|
|
252
252
|
}
|
|
253
253
|
)
|
|
@@ -271,7 +271,7 @@ class PartialChargesBuilder(Builder):
|
|
|
271
271
|
|
|
272
272
|
charges_docs.append(doc)
|
|
273
273
|
|
|
274
|
-
self.logger.debug(f"Produced {len(charges_docs)} charges docs for {
|
|
274
|
+
self.logger.debug(f"Produced {len(charges_docs)} charges docs for {formula}")
|
|
275
275
|
|
|
276
276
|
return jsanitize([doc.model_dump() for doc in charges_docs], allow_bson=True)
|
|
277
277
|
|
|
@@ -320,7 +320,7 @@ class PartialSpinsBuilder(Builder):
|
|
|
320
320
|
data available (based on level of theory and electronic energy) will be used.
|
|
321
321
|
|
|
322
322
|
The process is as follows:
|
|
323
|
-
1. Gather MoleculeDocs by
|
|
323
|
+
1. Gather MoleculeDocs by formula
|
|
324
324
|
2. For each molecule, group all tasks by solvent.
|
|
325
325
|
3. For each solvent, sort tasks by level of theory and electronic energy
|
|
326
326
|
4. For each method:
|
|
@@ -365,14 +365,12 @@ class PartialSpinsBuilder(Builder):
|
|
|
365
365
|
self.tasks.ensure_index("last_updated")
|
|
366
366
|
self.tasks.ensure_index("state")
|
|
367
367
|
self.tasks.ensure_index("formula_alphabetical")
|
|
368
|
-
self.tasks.ensure_index("species_hash")
|
|
369
368
|
|
|
370
369
|
# Search index for molecules
|
|
371
370
|
self.molecules.ensure_index("molecule_id")
|
|
372
371
|
self.molecules.ensure_index("last_updated")
|
|
373
372
|
self.molecules.ensure_index("task_ids")
|
|
374
373
|
self.molecules.ensure_index("formula_alphabetical")
|
|
375
|
-
self.molecules.ensure_index("species_hash")
|
|
376
374
|
|
|
377
375
|
# Search index for spins
|
|
378
376
|
self.spins.ensure_index("molecule_id")
|
|
@@ -392,23 +390,23 @@ class PartialSpinsBuilder(Builder):
|
|
|
392
390
|
|
|
393
391
|
self.logger.info("Finding documents to process")
|
|
394
392
|
all_mols = list(
|
|
395
|
-
self.molecules.query(
|
|
393
|
+
self.molecules.query(
|
|
394
|
+
temp_query, [self.molecules.key, "formula_alphabetical"]
|
|
395
|
+
)
|
|
396
396
|
)
|
|
397
397
|
|
|
398
398
|
processed_docs = set([e for e in self.spins.distinct("molecule_id")])
|
|
399
399
|
to_process_docs = {d[self.molecules.key] for d in all_mols} - processed_docs
|
|
400
|
-
|
|
401
|
-
d["
|
|
400
|
+
to_process_forms = {
|
|
401
|
+
d["formula_alphabetical"]
|
|
402
402
|
for d in all_mols
|
|
403
403
|
if d[self.molecules.key] in to_process_docs
|
|
404
404
|
}
|
|
405
405
|
|
|
406
|
-
N = ceil(len(
|
|
406
|
+
N = ceil(len(to_process_forms) / number_splits)
|
|
407
407
|
|
|
408
|
-
for
|
|
409
|
-
query
|
|
410
|
-
query["species_hash"] = {"$in": list(hash_chunk)}
|
|
411
|
-
yield {"query": query}
|
|
408
|
+
for formula_chunk in grouper(to_process_forms, N):
|
|
409
|
+
yield {"query": {"formula_alphabetical": {"$in": list(formula_chunk)}}}
|
|
412
410
|
|
|
413
411
|
def get_items(self) -> Iterator[List[Dict]]:
|
|
414
412
|
"""
|
|
@@ -433,26 +431,28 @@ class PartialSpinsBuilder(Builder):
|
|
|
433
431
|
|
|
434
432
|
self.logger.info("Finding documents to process")
|
|
435
433
|
all_mols = list(
|
|
436
|
-
self.molecules.query(
|
|
434
|
+
self.molecules.query(
|
|
435
|
+
temp_query, [self.molecules.key, "formula_alphabetical"]
|
|
436
|
+
)
|
|
437
437
|
)
|
|
438
438
|
|
|
439
439
|
processed_docs = set([e for e in self.spins.distinct("molecule_id")])
|
|
440
440
|
to_process_docs = {d[self.molecules.key] for d in all_mols} - processed_docs
|
|
441
|
-
|
|
442
|
-
d["
|
|
441
|
+
to_process_forms = {
|
|
442
|
+
d["formula_alphabetical"]
|
|
443
443
|
for d in all_mols
|
|
444
444
|
if d[self.molecules.key] in to_process_docs
|
|
445
445
|
}
|
|
446
446
|
|
|
447
447
|
self.logger.info(f"Found {len(to_process_docs)} unprocessed documents")
|
|
448
|
-
self.logger.info(f"Found {len(
|
|
448
|
+
self.logger.info(f"Found {len(to_process_forms)} unprocessed formulas")
|
|
449
449
|
|
|
450
450
|
# Set total for builder bars to have a total
|
|
451
|
-
self.total = len(
|
|
451
|
+
self.total = len(to_process_forms)
|
|
452
452
|
|
|
453
|
-
for
|
|
453
|
+
for formula in to_process_forms:
|
|
454
454
|
mol_query = dict(temp_query)
|
|
455
|
-
mol_query["
|
|
455
|
+
mol_query["formula_alphabetical"] = formula
|
|
456
456
|
molecules = list(self.molecules.query(criteria=mol_query))
|
|
457
457
|
|
|
458
458
|
yield molecules
|
|
@@ -469,9 +469,9 @@ class PartialSpinsBuilder(Builder):
|
|
|
469
469
|
"""
|
|
470
470
|
|
|
471
471
|
mols = [MoleculeDoc(**item) for item in items]
|
|
472
|
-
|
|
472
|
+
formula = mols[0].formula_alphabetical
|
|
473
473
|
mol_ids = [m.molecule_id for m in mols]
|
|
474
|
-
self.logger.debug(f"Processing {
|
|
474
|
+
self.logger.debug(f"Processing {formula} : {mol_ids}")
|
|
475
475
|
|
|
476
476
|
spins_docs = list()
|
|
477
477
|
|
|
@@ -520,7 +520,7 @@ class PartialSpinsBuilder(Builder):
|
|
|
520
520
|
tdoc = self.tasks.query_one(
|
|
521
521
|
{
|
|
522
522
|
"task_id": task,
|
|
523
|
-
"
|
|
523
|
+
"formula_alphabetical": formula,
|
|
524
524
|
"orig": {"$exists": True},
|
|
525
525
|
}
|
|
526
526
|
)
|
|
@@ -530,7 +530,7 @@ class PartialSpinsBuilder(Builder):
|
|
|
530
530
|
tdoc = self.tasks.query_one(
|
|
531
531
|
{
|
|
532
532
|
"task_id": int(task),
|
|
533
|
-
"
|
|
533
|
+
"formula_alphabetical": formula,
|
|
534
534
|
"orig": {"$exists": True},
|
|
535
535
|
}
|
|
536
536
|
)
|
|
@@ -551,7 +551,9 @@ class PartialSpinsBuilder(Builder):
|
|
|
551
551
|
|
|
552
552
|
spins_docs.append(doc)
|
|
553
553
|
|
|
554
|
-
self.logger.debug(
|
|
554
|
+
self.logger.debug(
|
|
555
|
+
f"Produced {len(spins_docs)} partial spins docs for {formula}"
|
|
556
|
+
)
|
|
555
557
|
|
|
556
558
|
return jsanitize([doc.model_dump() for doc in spins_docs], allow_bson=True)
|
|
557
559
|
|
|
@@ -40,7 +40,7 @@ class BondingBuilder(Builder):
|
|
|
40
40
|
data available (based on level of theory and electronic energy) will be used.
|
|
41
41
|
|
|
42
42
|
The process is as follows:
|
|
43
|
-
1. Gather MoleculeDocs by
|
|
43
|
+
1. Gather MoleculeDocs by formula
|
|
44
44
|
2. For each molecule, group all tasks by solvent.
|
|
45
45
|
3. For each solvent, sort tasks by level of theory and electronic energy
|
|
46
46
|
4. For each method:
|
|
@@ -85,14 +85,12 @@ class BondingBuilder(Builder):
|
|
|
85
85
|
self.tasks.ensure_index("last_updated")
|
|
86
86
|
self.tasks.ensure_index("state")
|
|
87
87
|
self.tasks.ensure_index("formula_alphabetical")
|
|
88
|
-
self.tasks.ensure_index("species_hash")
|
|
89
88
|
|
|
90
89
|
# Search index for molecules
|
|
91
90
|
self.molecules.ensure_index("molecule_id")
|
|
92
91
|
self.molecules.ensure_index("last_updated")
|
|
93
92
|
self.molecules.ensure_index("task_ids")
|
|
94
93
|
self.molecules.ensure_index("formula_alphabetical")
|
|
95
|
-
self.molecules.ensure_index("species_hash")
|
|
96
94
|
|
|
97
95
|
# Search index for bonds
|
|
98
96
|
self.bonds.ensure_index("molecule_id")
|
|
@@ -112,23 +110,23 @@ class BondingBuilder(Builder):
|
|
|
112
110
|
|
|
113
111
|
self.logger.info("Finding documents to process")
|
|
114
112
|
all_mols = list(
|
|
115
|
-
self.molecules.query(
|
|
113
|
+
self.molecules.query(
|
|
114
|
+
temp_query, [self.molecules.key, "formula_alphabetical"]
|
|
115
|
+
)
|
|
116
116
|
)
|
|
117
117
|
|
|
118
118
|
processed_docs = set([e for e in self.bonds.distinct("molecule_id")])
|
|
119
119
|
to_process_docs = {d[self.molecules.key] for d in all_mols} - processed_docs
|
|
120
|
-
|
|
121
|
-
d["
|
|
120
|
+
to_process_forms = {
|
|
121
|
+
d["formula_alphabetical"]
|
|
122
122
|
for d in all_mols
|
|
123
123
|
if d[self.molecules.key] in to_process_docs
|
|
124
124
|
}
|
|
125
125
|
|
|
126
|
-
N = ceil(len(
|
|
126
|
+
N = ceil(len(to_process_forms) / number_splits)
|
|
127
127
|
|
|
128
|
-
for
|
|
129
|
-
query
|
|
130
|
-
query["species_hash"] = {"$in": list(hash_chunk)}
|
|
131
|
-
yield {"query": query}
|
|
128
|
+
for formula_chunk in grouper(to_process_forms, N):
|
|
129
|
+
yield {"query": {"formula_alphabetical": {"$in": list(formula_chunk)}}}
|
|
132
130
|
|
|
133
131
|
def get_items(self) -> Iterator[List[Dict]]:
|
|
134
132
|
"""
|
|
@@ -153,26 +151,28 @@ class BondingBuilder(Builder):
|
|
|
153
151
|
|
|
154
152
|
self.logger.info("Finding documents to process")
|
|
155
153
|
all_mols = list(
|
|
156
|
-
self.molecules.query(
|
|
154
|
+
self.molecules.query(
|
|
155
|
+
temp_query, [self.molecules.key, "formula_alphabetical"]
|
|
156
|
+
)
|
|
157
157
|
)
|
|
158
158
|
|
|
159
159
|
processed_docs = set([e for e in self.bonds.distinct("molecule_id")])
|
|
160
160
|
to_process_docs = {d[self.molecules.key] for d in all_mols} - processed_docs
|
|
161
|
-
|
|
162
|
-
d["
|
|
161
|
+
to_process_forms = {
|
|
162
|
+
d["formula_alphabetical"]
|
|
163
163
|
for d in all_mols
|
|
164
164
|
if d[self.molecules.key] in to_process_docs
|
|
165
165
|
}
|
|
166
166
|
|
|
167
167
|
self.logger.info(f"Found {len(to_process_docs)} unprocessed documents")
|
|
168
|
-
self.logger.info(f"Found {len(
|
|
168
|
+
self.logger.info(f"Found {len(to_process_forms)} unprocessed formulas")
|
|
169
169
|
|
|
170
170
|
# Set total for builder bars to have a total
|
|
171
|
-
self.total = len(
|
|
171
|
+
self.total = len(to_process_forms)
|
|
172
172
|
|
|
173
|
-
for
|
|
173
|
+
for formula in to_process_forms:
|
|
174
174
|
mol_query = dict(temp_query)
|
|
175
|
-
mol_query["
|
|
175
|
+
mol_query["formula_alphabetical"] = formula
|
|
176
176
|
molecules = list(self.molecules.query(criteria=mol_query))
|
|
177
177
|
|
|
178
178
|
yield molecules
|
|
@@ -189,9 +189,9 @@ class BondingBuilder(Builder):
|
|
|
189
189
|
"""
|
|
190
190
|
|
|
191
191
|
mols = [MoleculeDoc(**item) for item in items]
|
|
192
|
-
|
|
192
|
+
formula = mols[0].formula_alphabetical
|
|
193
193
|
mol_ids = [m.molecule_id for m in mols]
|
|
194
|
-
self.logger.debug(f"Processing {
|
|
194
|
+
self.logger.debug(f"Processing {formula} : {mol_ids}")
|
|
195
195
|
|
|
196
196
|
bonding_docs = list()
|
|
197
197
|
|
|
@@ -255,7 +255,7 @@ class BondingBuilder(Builder):
|
|
|
255
255
|
tdoc = self.tasks.query_one(
|
|
256
256
|
{
|
|
257
257
|
"task_id": task,
|
|
258
|
-
"
|
|
258
|
+
"formula_alphabetical": formula,
|
|
259
259
|
"orig": {"$exists": True},
|
|
260
260
|
}
|
|
261
261
|
)
|
|
@@ -265,7 +265,7 @@ class BondingBuilder(Builder):
|
|
|
265
265
|
tdoc = self.tasks.query_one(
|
|
266
266
|
{
|
|
267
267
|
"task_id": int(task),
|
|
268
|
-
"
|
|
268
|
+
"formula_alphabetical": formula,
|
|
269
269
|
"orig": {"$exists": True},
|
|
270
270
|
}
|
|
271
271
|
)
|
|
@@ -288,13 +288,13 @@ class BondingBuilder(Builder):
|
|
|
288
288
|
)
|
|
289
289
|
bonding_docs.append(doc)
|
|
290
290
|
|
|
291
|
-
self.logger.debug(f"Produced {len(bonding_docs)} bonding docs for {
|
|
291
|
+
self.logger.debug(f"Produced {len(bonding_docs)} bonding docs for {formula}")
|
|
292
292
|
|
|
293
293
|
return jsanitize([doc.model_dump() for doc in bonding_docs], allow_bson=True)
|
|
294
294
|
|
|
295
295
|
def update_targets(self, items: List[List[Dict]]):
|
|
296
296
|
"""
|
|
297
|
-
Inserts the new documents into the
|
|
297
|
+
Inserts the new documents into the charges collection
|
|
298
298
|
|
|
299
299
|
Args:
|
|
300
300
|
items [[dict]]: A list of documents to update
|
{emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/emmet/builders/molecules/metal_binding.py
RENAMED
|
@@ -44,7 +44,7 @@ class MetalBindingBuilder(Builder):
|
|
|
44
44
|
will be used.
|
|
45
45
|
|
|
46
46
|
The process is as follows:
|
|
47
|
-
1. Gather MoleculeDocs by
|
|
47
|
+
1. Gather MoleculeDocs by formula
|
|
48
48
|
2. For each molecule, first identify if there are any metals. If not, then no MetalBindingDoc can be made.
|
|
49
49
|
If so, then identify the possible solvents that can be used to generate MetalBindingDocs
|
|
50
50
|
3. For each combination of Molecule ID and solvent, search for additional documents:
|
|
@@ -111,7 +111,6 @@ class MetalBindingBuilder(Builder):
|
|
|
111
111
|
self.molecules.ensure_index("last_updated")
|
|
112
112
|
self.molecules.ensure_index("task_ids")
|
|
113
113
|
self.molecules.ensure_index("formula_alphabetical")
|
|
114
|
-
self.molecules.ensure_index("species_hash")
|
|
115
114
|
|
|
116
115
|
# Search index for charges
|
|
117
116
|
self.charges.ensure_index("molecule_id")
|
|
@@ -169,23 +168,23 @@ class MetalBindingBuilder(Builder):
|
|
|
169
168
|
|
|
170
169
|
self.logger.info("Finding documents to process")
|
|
171
170
|
all_mols = list(
|
|
172
|
-
self.molecules.query(
|
|
171
|
+
self.molecules.query(
|
|
172
|
+
temp_query, [self.molecules.key, "formula_alphabetical"]
|
|
173
|
+
)
|
|
173
174
|
)
|
|
174
175
|
|
|
175
176
|
processed_docs = set([e for e in self.metal_binding.distinct("molecule_id")])
|
|
176
177
|
to_process_docs = {d[self.molecules.key] for d in all_mols} - processed_docs
|
|
177
|
-
|
|
178
|
-
d["
|
|
178
|
+
to_process_forms = {
|
|
179
|
+
d["formula_alphabetical"]
|
|
179
180
|
for d in all_mols
|
|
180
181
|
if d[self.molecules.key] in to_process_docs
|
|
181
182
|
}
|
|
182
183
|
|
|
183
|
-
N = ceil(len(
|
|
184
|
+
N = ceil(len(to_process_forms) / number_splits)
|
|
184
185
|
|
|
185
|
-
for
|
|
186
|
-
query
|
|
187
|
-
query["species_hash"] = {"$in": list(hash_chunk)}
|
|
188
|
-
yield {"query": query}
|
|
186
|
+
for formula_chunk in grouper(to_process_forms, N):
|
|
187
|
+
yield {"query": {"formula_alphabetical": {"$in": list(formula_chunk)}}}
|
|
189
188
|
|
|
190
189
|
def get_items(self) -> Iterator[List[Dict]]:
|
|
191
190
|
"""
|
|
@@ -208,26 +207,28 @@ class MetalBindingBuilder(Builder):
|
|
|
208
207
|
|
|
209
208
|
self.logger.info("Finding documents to process")
|
|
210
209
|
all_mols = list(
|
|
211
|
-
self.molecules.query(
|
|
210
|
+
self.molecules.query(
|
|
211
|
+
temp_query, [self.molecules.key, "formula_alphabetical"]
|
|
212
|
+
)
|
|
212
213
|
)
|
|
213
214
|
|
|
214
215
|
processed_docs = set([e for e in self.metal_binding.distinct("molecule_id")])
|
|
215
216
|
to_process_docs = {d[self.molecules.key] for d in all_mols} - processed_docs
|
|
216
|
-
|
|
217
|
-
d["
|
|
217
|
+
to_process_forms = {
|
|
218
|
+
d["formula_alphabetical"]
|
|
218
219
|
for d in all_mols
|
|
219
220
|
if d[self.molecules.key] in to_process_docs
|
|
220
221
|
}
|
|
221
222
|
|
|
222
223
|
self.logger.info(f"Found {len(to_process_docs)} unprocessed documents")
|
|
223
|
-
self.logger.info(f"Found {len(
|
|
224
|
+
self.logger.info(f"Found {len(to_process_forms)} unprocessed formulas")
|
|
224
225
|
|
|
225
226
|
# Set total for builder bars to have a total
|
|
226
|
-
self.total = len(
|
|
227
|
+
self.total = len(to_process_forms)
|
|
227
228
|
|
|
228
|
-
for
|
|
229
|
+
for formula in to_process_forms:
|
|
229
230
|
mol_query = dict(temp_query)
|
|
230
|
-
mol_query["
|
|
231
|
+
mol_query["formula_alphabetical"] = formula
|
|
231
232
|
molecules = list(self.molecules.query(criteria=mol_query))
|
|
232
233
|
|
|
233
234
|
yield molecules
|
|
@@ -244,9 +245,9 @@ class MetalBindingBuilder(Builder):
|
|
|
244
245
|
"""
|
|
245
246
|
|
|
246
247
|
mols = [MoleculeDoc(**item) for item in items]
|
|
247
|
-
|
|
248
|
+
formula = mols[0].formula_alphabetical
|
|
248
249
|
mol_ids = [m.molecule_id for m in mols]
|
|
249
|
-
self.logger.debug(f"Processing {
|
|
250
|
+
self.logger.debug(f"Processing {formula} : {mol_ids}")
|
|
250
251
|
|
|
251
252
|
binding_docs = list()
|
|
252
253
|
|
|
@@ -486,7 +487,7 @@ class MetalBindingBuilder(Builder):
|
|
|
486
487
|
binding_docs.append(doc)
|
|
487
488
|
|
|
488
489
|
self.logger.debug(
|
|
489
|
-
f"Produced {len(binding_docs)} metal binding docs for {
|
|
490
|
+
f"Produced {len(binding_docs)} metal binding docs for {formula}"
|
|
490
491
|
)
|
|
491
492
|
|
|
492
493
|
return jsanitize([doc.model_dump() for doc in binding_docs], allow_bson=True)
|
|
@@ -27,7 +27,7 @@ class OrbitalBuilder(Builder):
|
|
|
27
27
|
each solvent available).
|
|
28
28
|
|
|
29
29
|
The process is as follows:
|
|
30
|
-
1. Gather MoleculeDocs by
|
|
30
|
+
1. Gather MoleculeDocs by formula
|
|
31
31
|
2. For each doc, sort tasks by solvent
|
|
32
32
|
3. For each solvent, grab the best TaskDoc (including NBO data using
|
|
33
33
|
the highest level of theory with lowest electronic energy for the
|
|
@@ -69,14 +69,12 @@ class OrbitalBuilder(Builder):
|
|
|
69
69
|
self.tasks.ensure_index("last_updated")
|
|
70
70
|
self.tasks.ensure_index("state")
|
|
71
71
|
self.tasks.ensure_index("formula_alphabetical")
|
|
72
|
-
self.tasks.ensure_index("species_hash")
|
|
73
72
|
|
|
74
73
|
# Search index for molecules
|
|
75
74
|
self.molecules.ensure_index("molecule_id")
|
|
76
75
|
self.molecules.ensure_index("last_updated")
|
|
77
76
|
self.molecules.ensure_index("task_ids")
|
|
78
77
|
self.molecules.ensure_index("formula_alphabetical")
|
|
79
|
-
self.molecules.ensure_index("species_hash")
|
|
80
78
|
|
|
81
79
|
# Search index for orbitals
|
|
82
80
|
self.orbitals.ensure_index("molecule_id")
|
|
@@ -95,23 +93,23 @@ class OrbitalBuilder(Builder):
|
|
|
95
93
|
|
|
96
94
|
self.logger.info("Finding documents to process")
|
|
97
95
|
all_mols = list(
|
|
98
|
-
self.molecules.query(
|
|
96
|
+
self.molecules.query(
|
|
97
|
+
temp_query, [self.molecules.key, "formula_alphabetical"]
|
|
98
|
+
)
|
|
99
99
|
)
|
|
100
100
|
|
|
101
101
|
processed_docs = set([e for e in self.orbitals.distinct("molecule_id")])
|
|
102
102
|
to_process_docs = {d[self.molecules.key] for d in all_mols} - processed_docs
|
|
103
|
-
|
|
104
|
-
d["
|
|
103
|
+
to_process_forms = {
|
|
104
|
+
d["formula_alphabetical"]
|
|
105
105
|
for d in all_mols
|
|
106
106
|
if d[self.molecules.key] in to_process_docs
|
|
107
107
|
}
|
|
108
108
|
|
|
109
|
-
N = ceil(len(
|
|
109
|
+
N = ceil(len(to_process_forms) / number_splits)
|
|
110
110
|
|
|
111
|
-
for
|
|
112
|
-
query
|
|
113
|
-
query["species_hash"] = {"$in": list(hash_chunk)}
|
|
114
|
-
yield {"query": query}
|
|
111
|
+
for formula_chunk in grouper(to_process_forms, N):
|
|
112
|
+
yield {"query": {"formula_alphabetical": {"$in": list(formula_chunk)}}}
|
|
115
113
|
|
|
116
114
|
def get_items(self) -> Iterator[List[Dict]]:
|
|
117
115
|
"""
|
|
@@ -136,26 +134,28 @@ class OrbitalBuilder(Builder):
|
|
|
136
134
|
|
|
137
135
|
self.logger.info("Finding documents to process")
|
|
138
136
|
all_mols = list(
|
|
139
|
-
self.molecules.query(
|
|
137
|
+
self.molecules.query(
|
|
138
|
+
temp_query, [self.molecules.key, "formula_alphabetical"]
|
|
139
|
+
)
|
|
140
140
|
)
|
|
141
141
|
|
|
142
142
|
processed_docs = set([e for e in self.orbitals.distinct("molecule_id")])
|
|
143
143
|
to_process_docs = {d[self.molecules.key] for d in all_mols} - processed_docs
|
|
144
|
-
|
|
145
|
-
d["
|
|
144
|
+
to_process_forms = {
|
|
145
|
+
d["formula_alphabetical"]
|
|
146
146
|
for d in all_mols
|
|
147
147
|
if d[self.molecules.key] in to_process_docs
|
|
148
148
|
}
|
|
149
149
|
|
|
150
150
|
self.logger.info(f"Found {len(to_process_docs)} unprocessed documents")
|
|
151
|
-
self.logger.info(f"Found {len(
|
|
151
|
+
self.logger.info(f"Found {len(to_process_forms)} unprocessed formulas")
|
|
152
152
|
|
|
153
153
|
# Set total for builder bars to have a total
|
|
154
|
-
self.total = len(
|
|
154
|
+
self.total = len(to_process_forms)
|
|
155
155
|
|
|
156
|
-
for
|
|
156
|
+
for formula in to_process_forms:
|
|
157
157
|
mol_query = dict(temp_query)
|
|
158
|
-
mol_query["
|
|
158
|
+
mol_query["formula_alphabetical"] = formula
|
|
159
159
|
molecules = list(self.molecules.query(criteria=mol_query))
|
|
160
160
|
|
|
161
161
|
yield molecules
|
|
@@ -172,9 +172,9 @@ class OrbitalBuilder(Builder):
|
|
|
172
172
|
"""
|
|
173
173
|
|
|
174
174
|
mols = [MoleculeDoc(**item) for item in items]
|
|
175
|
-
|
|
175
|
+
formula = mols[0].formula_alphabetical
|
|
176
176
|
mol_ids = [m.molecule_id for m in mols]
|
|
177
|
-
self.logger.info(f"Processing {
|
|
177
|
+
self.logger.info(f"Processing {formula} : {mol_ids}")
|
|
178
178
|
|
|
179
179
|
orbital_docs = list()
|
|
180
180
|
|
|
@@ -221,7 +221,7 @@ class OrbitalBuilder(Builder):
|
|
|
221
221
|
tdoc = self.tasks.query_one(
|
|
222
222
|
{
|
|
223
223
|
"task_id": task,
|
|
224
|
-
"
|
|
224
|
+
"formula_alphabetical": formula,
|
|
225
225
|
"orig": {"$exists": True},
|
|
226
226
|
}
|
|
227
227
|
)
|
|
@@ -231,7 +231,7 @@ class OrbitalBuilder(Builder):
|
|
|
231
231
|
tdoc = self.tasks.query_one(
|
|
232
232
|
{
|
|
233
233
|
"task_id": int(task),
|
|
234
|
-
"
|
|
234
|
+
"formula_alphabetical": formula,
|
|
235
235
|
"orig": {"$exists": True},
|
|
236
236
|
}
|
|
237
237
|
)
|
|
@@ -253,7 +253,7 @@ class OrbitalBuilder(Builder):
|
|
|
253
253
|
if orbital_doc is not None:
|
|
254
254
|
orbital_docs.append(orbital_doc)
|
|
255
255
|
|
|
256
|
-
self.logger.debug(f"Produced {len(orbital_docs)} orbital docs for {
|
|
256
|
+
self.logger.debug(f"Produced {len(orbital_docs)} orbital docs for {formula}")
|
|
257
257
|
|
|
258
258
|
return jsanitize([doc.model_dump() for doc in orbital_docs], allow_bson=True)
|
|
259
259
|
|