emmet-builders 0.84.2rc6__py3-none-any.whl → 0.84.2rc7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of emmet-builders might be problematic. Click here for more details.
- emmet/builders/molecules/atomic.py +46 -48
- emmet/builders/molecules/bonds.py +24 -24
- emmet/builders/molecules/electric.py +282 -0
- emmet/builders/molecules/metal_binding.py +20 -21
- emmet/builders/molecules/orbitals.py +23 -23
- emmet/builders/molecules/redox.py +27 -27
- emmet/builders/molecules/summary.py +36 -21
- emmet/builders/molecules/thermo.py +23 -23
- emmet/builders/molecules/trajectory.py +525 -0
- emmet/builders/molecules/vibration.py +23 -23
- emmet/builders/qchem/molecules.py +21 -15
- {emmet_builders-0.84.2rc6.dist-info → emmet_builders-0.84.2rc7.dist-info}/METADATA +1 -1
- {emmet_builders-0.84.2rc6.dist-info → emmet_builders-0.84.2rc7.dist-info}/RECORD +15 -13
- {emmet_builders-0.84.2rc6.dist-info → emmet_builders-0.84.2rc7.dist-info}/WHEEL +0 -0
- {emmet_builders-0.84.2rc6.dist-info → emmet_builders-0.84.2rc7.dist-info}/top_level.txt +0 -0
|
@@ -27,7 +27,7 @@ class VibrationBuilder(Builder):
|
|
|
27
27
|
each solvent available).
|
|
28
28
|
|
|
29
29
|
The process is as follows:
|
|
30
|
-
1. Gather MoleculeDocs by
|
|
30
|
+
1. Gather MoleculeDocs by species hash
|
|
31
31
|
2. For each doc, sort tasks by solvent
|
|
32
32
|
3. For each solvent, grab the best TaskDoc (doc with vibrational
|
|
33
33
|
information that has the highest level of theory with lowest
|
|
@@ -73,12 +73,14 @@ class VibrationBuilder(Builder):
|
|
|
73
73
|
self.tasks.ensure_index("last_updated")
|
|
74
74
|
self.tasks.ensure_index("state")
|
|
75
75
|
self.tasks.ensure_index("formula_alphabetical")
|
|
76
|
+
self.tasks.ensure_index("species_hash")
|
|
76
77
|
|
|
77
78
|
# Search index for molecules
|
|
78
79
|
self.molecules.ensure_index("molecule_id")
|
|
79
80
|
self.molecules.ensure_index("last_updated")
|
|
80
81
|
self.molecules.ensure_index("task_ids")
|
|
81
82
|
self.molecules.ensure_index("formula_alphabetical")
|
|
83
|
+
self.molecules.ensure_index("species_hash")
|
|
82
84
|
|
|
83
85
|
# Search index for vibrational properties
|
|
84
86
|
self.vibes.ensure_index("molecule_id")
|
|
@@ -97,23 +99,23 @@ class VibrationBuilder(Builder):
|
|
|
97
99
|
|
|
98
100
|
self.logger.info("Finding documents to process")
|
|
99
101
|
all_mols = list(
|
|
100
|
-
self.molecules.query(
|
|
101
|
-
temp_query, [self.molecules.key, "formula_alphabetical"]
|
|
102
|
-
)
|
|
102
|
+
self.molecules.query(temp_query, [self.molecules.key, "species_hash"])
|
|
103
103
|
)
|
|
104
104
|
|
|
105
105
|
processed_docs = set([e for e in self.vibes.distinct("molecule_id")])
|
|
106
106
|
to_process_docs = {d[self.molecules.key] for d in all_mols} - processed_docs
|
|
107
|
-
|
|
108
|
-
d["
|
|
107
|
+
to_process_hashes = {
|
|
108
|
+
d["species_hash"]
|
|
109
109
|
for d in all_mols
|
|
110
110
|
if d[self.molecules.key] in to_process_docs
|
|
111
111
|
}
|
|
112
112
|
|
|
113
|
-
N = ceil(len(
|
|
113
|
+
N = ceil(len(to_process_hashes) / number_splits)
|
|
114
114
|
|
|
115
|
-
for
|
|
116
|
-
|
|
115
|
+
for hash_chunk in grouper(to_process_hashes, N):
|
|
116
|
+
query = dict(temp_query)
|
|
117
|
+
query["species_hash"] = {"$in": list(hash_chunk)}
|
|
118
|
+
yield {"query": query}
|
|
117
119
|
|
|
118
120
|
def get_items(self) -> Iterator[List[Dict]]:
|
|
119
121
|
"""
|
|
@@ -138,28 +140,26 @@ class VibrationBuilder(Builder):
|
|
|
138
140
|
|
|
139
141
|
self.logger.info("Finding documents to process")
|
|
140
142
|
all_mols = list(
|
|
141
|
-
self.molecules.query(
|
|
142
|
-
temp_query, [self.molecules.key, "formula_alphabetical"]
|
|
143
|
-
)
|
|
143
|
+
self.molecules.query(temp_query, [self.molecules.key, "species_hash"])
|
|
144
144
|
)
|
|
145
145
|
|
|
146
146
|
processed_docs = set([e for e in self.vibes.distinct("molecule_id")])
|
|
147
147
|
to_process_docs = {d[self.molecules.key] for d in all_mols} - processed_docs
|
|
148
|
-
|
|
149
|
-
d["
|
|
148
|
+
to_process_hashes = {
|
|
149
|
+
d["species_hash"]
|
|
150
150
|
for d in all_mols
|
|
151
151
|
if d[self.molecules.key] in to_process_docs
|
|
152
152
|
}
|
|
153
153
|
|
|
154
154
|
self.logger.info(f"Found {len(to_process_docs)} unprocessed documents")
|
|
155
|
-
self.logger.info(f"Found {len(
|
|
155
|
+
self.logger.info(f"Found {len(to_process_hashes)} unprocessed hashes")
|
|
156
156
|
|
|
157
157
|
# Set total for builder bars to have a total
|
|
158
|
-
self.total = len(
|
|
158
|
+
self.total = len(to_process_hashes)
|
|
159
159
|
|
|
160
|
-
for
|
|
160
|
+
for shash in to_process_hashes:
|
|
161
161
|
mol_query = dict(temp_query)
|
|
162
|
-
mol_query["
|
|
162
|
+
mol_query["species_hash"] = shash
|
|
163
163
|
molecules = list(self.molecules.query(criteria=mol_query))
|
|
164
164
|
|
|
165
165
|
yield molecules
|
|
@@ -176,9 +176,9 @@ class VibrationBuilder(Builder):
|
|
|
176
176
|
"""
|
|
177
177
|
|
|
178
178
|
mols = [MoleculeDoc(**item) for item in items]
|
|
179
|
-
|
|
179
|
+
shash = mols[0].species_hash
|
|
180
180
|
mol_ids = [m.molecule_id for m in mols]
|
|
181
|
-
self.logger.debug(f"Processing {
|
|
181
|
+
self.logger.debug(f"Processing {shash} : {mol_ids}")
|
|
182
182
|
|
|
183
183
|
vibe_docs = list()
|
|
184
184
|
|
|
@@ -213,7 +213,7 @@ class VibrationBuilder(Builder):
|
|
|
213
213
|
tdoc = self.tasks.query_one(
|
|
214
214
|
{
|
|
215
215
|
"task_id": task,
|
|
216
|
-
"
|
|
216
|
+
"species_hash": shash,
|
|
217
217
|
"orig": {"$exists": True},
|
|
218
218
|
}
|
|
219
219
|
)
|
|
@@ -223,7 +223,7 @@ class VibrationBuilder(Builder):
|
|
|
223
223
|
tdoc = self.tasks.query_one(
|
|
224
224
|
{
|
|
225
225
|
"task_id": int(task),
|
|
226
|
-
"
|
|
226
|
+
"species_hash": shash,
|
|
227
227
|
"orig": {"$exists": True},
|
|
228
228
|
}
|
|
229
229
|
)
|
|
@@ -243,7 +243,7 @@ class VibrationBuilder(Builder):
|
|
|
243
243
|
)
|
|
244
244
|
vibe_docs.append(vibe_doc)
|
|
245
245
|
|
|
246
|
-
self.logger.debug(f"Produced {len(vibe_docs)} vibration docs for {
|
|
246
|
+
self.logger.debug(f"Produced {len(vibe_docs)} vibration docs for {shash}")
|
|
247
247
|
|
|
248
248
|
return jsanitize([doc.model_dump() for doc in vibe_docs], allow_bson=True)
|
|
249
249
|
|
|
@@ -138,6 +138,7 @@ class MoleculesAssociationBuilder(Builder):
|
|
|
138
138
|
self.tasks.ensure_index("formula_alphabetical")
|
|
139
139
|
self.tasks.ensure_index("smiles")
|
|
140
140
|
self.tasks.ensure_index("species_hash")
|
|
141
|
+
self.tasks.ensure_index("coord_hash")
|
|
141
142
|
|
|
142
143
|
# Search index for molecules
|
|
143
144
|
self.assoc.ensure_index("molecule_id")
|
|
@@ -165,7 +166,9 @@ class MoleculesAssociationBuilder(Builder):
|
|
|
165
166
|
N = ceil(len(to_process_hashes) / number_splits)
|
|
166
167
|
|
|
167
168
|
for hash_chunk in grouper(to_process_hashes, N):
|
|
168
|
-
|
|
169
|
+
query = dict(temp_query)
|
|
170
|
+
query["species_hash"] = {"$in": list(hash_chunk)}
|
|
171
|
+
yield {"query": query}
|
|
169
172
|
|
|
170
173
|
def get_items(self) -> Iterator[List[TaskDocument]]:
|
|
171
174
|
"""
|
|
@@ -390,6 +393,7 @@ class MoleculesBuilder(Builder):
|
|
|
390
393
|
self.assoc.ensure_index("last_updated")
|
|
391
394
|
self.assoc.ensure_index("task_ids")
|
|
392
395
|
self.assoc.ensure_index("formula_alphabetical")
|
|
396
|
+
self.assoc.ensure_index("species_hash")
|
|
393
397
|
|
|
394
398
|
# Search index for molecules
|
|
395
399
|
self.molecules.ensure_index("molecule_id")
|
|
@@ -433,16 +437,18 @@ class MoleculesBuilder(Builder):
|
|
|
433
437
|
xyz_species_id_map[d[self.assoc.key]] = this_id
|
|
434
438
|
to_process_docs = assoc_ids - processed_docs
|
|
435
439
|
|
|
436
|
-
|
|
437
|
-
d["
|
|
440
|
+
to_process_hashes = {
|
|
441
|
+
d["species_hash"]
|
|
438
442
|
for d in all_assoc
|
|
439
443
|
if xyz_species_id_map[d[self.assoc.key]] in to_process_docs
|
|
440
444
|
}
|
|
441
445
|
|
|
442
|
-
N = ceil(len(
|
|
446
|
+
N = ceil(len(to_process_hashes) / number_splits)
|
|
443
447
|
|
|
444
|
-
for
|
|
445
|
-
|
|
448
|
+
for hash_chunk in grouper(to_process_hashes, N):
|
|
449
|
+
query = dict(temp_query)
|
|
450
|
+
query["species_hash"] = {"$in": list(hash_chunk)}
|
|
451
|
+
yield {"query": query}
|
|
446
452
|
|
|
447
453
|
def get_items(self) -> Iterator[List[Dict]]:
|
|
448
454
|
"""
|
|
@@ -495,21 +501,21 @@ class MoleculesBuilder(Builder):
|
|
|
495
501
|
xyz_species_id_map[d[self.assoc.key]] = this_id
|
|
496
502
|
to_process_docs = assoc_ids - processed_docs
|
|
497
503
|
|
|
498
|
-
|
|
499
|
-
d["
|
|
504
|
+
to_process_hashes = {
|
|
505
|
+
d["species_hash"]
|
|
500
506
|
for d in all_assoc
|
|
501
507
|
if xyz_species_id_map[d[self.assoc.key]] in to_process_docs
|
|
502
508
|
}
|
|
503
509
|
|
|
504
510
|
self.logger.info(f"Found {len(to_process_docs)} unprocessed documents")
|
|
505
|
-
self.logger.info(f"Found {len(
|
|
511
|
+
self.logger.info(f"Found {len(to_process_hashes)} unprocessed hashes")
|
|
506
512
|
|
|
507
513
|
# Set total for builder bars to have a total
|
|
508
|
-
self.total = len(
|
|
514
|
+
self.total = len(to_process_hashes)
|
|
509
515
|
|
|
510
|
-
for
|
|
516
|
+
for shash in to_process_hashes:
|
|
511
517
|
assoc_query = dict(temp_query)
|
|
512
|
-
assoc_query["
|
|
518
|
+
assoc_query["species_hash"] = shash
|
|
513
519
|
assoc = list(self.assoc.query(criteria=assoc_query))
|
|
514
520
|
|
|
515
521
|
yield assoc
|
|
@@ -526,9 +532,9 @@ class MoleculesBuilder(Builder):
|
|
|
526
532
|
"""
|
|
527
533
|
|
|
528
534
|
assoc = [MoleculeDoc(**item) for item in items]
|
|
529
|
-
|
|
535
|
+
shash = assoc[0].species_hash
|
|
530
536
|
mol_ids = [a.molecule_id for a in assoc]
|
|
531
|
-
self.logger.debug(f"Processing {
|
|
537
|
+
self.logger.debug(f"Processing {shash} : {mol_ids}")
|
|
532
538
|
|
|
533
539
|
complete_mol_docs = list()
|
|
534
540
|
|
|
@@ -646,7 +652,7 @@ class MoleculesBuilder(Builder):
|
|
|
646
652
|
|
|
647
653
|
complete_mol_docs.append(base_doc)
|
|
648
654
|
|
|
649
|
-
self.logger.debug(f"Produced {len(complete_mol_docs)} molecules for {
|
|
655
|
+
self.logger.debug(f"Produced {len(complete_mol_docs)} molecules for {shash}")
|
|
650
656
|
|
|
651
657
|
return jsanitize(
|
|
652
658
|
[mol.model_dump() for mol in complete_mol_docs], allow_bson=True
|
|
@@ -32,21 +32,23 @@ emmet/builders/matscholar/missing_compositions.py,sha256=RGQOEhfmJ6YMbjD4osLWqs7
|
|
|
32
32
|
emmet/builders/mobility/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
33
33
|
emmet/builders/mobility/migration_graph.py,sha256=WEXtPSn0UE5Q8mnvJ-T19FB3_LrZ3ojvNyRBs1PXWRg,3923
|
|
34
34
|
emmet/builders/molecules/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
35
|
-
emmet/builders/molecules/atomic.py,sha256=
|
|
36
|
-
emmet/builders/molecules/bonds.py,sha256=
|
|
37
|
-
emmet/builders/molecules/
|
|
38
|
-
emmet/builders/molecules/
|
|
39
|
-
emmet/builders/molecules/
|
|
40
|
-
emmet/builders/molecules/
|
|
41
|
-
emmet/builders/molecules/
|
|
42
|
-
emmet/builders/molecules/
|
|
35
|
+
emmet/builders/molecules/atomic.py,sha256=DBG_FScwT7YU1GgI69yRrR3_wUoWBwiP1uBvbL7xP3Y,20839
|
|
36
|
+
emmet/builders/molecules/bonds.py,sha256=aHp9U_LX3sZvdTIaZj2T8PG_wTFYqw6nOcYC-4MiI0E,12103
|
|
37
|
+
emmet/builders/molecules/electric.py,sha256=ldoLSfIAjue6YQlyXkgJRXLcfh178goiarjI_f_EtH4,10065
|
|
38
|
+
emmet/builders/molecules/metal_binding.py,sha256=kimYsmQWdmukRRX2_GgVywCNRhNHz_-fp2oMYJTVMrg,23308
|
|
39
|
+
emmet/builders/molecules/orbitals.py,sha256=nIsmx0m6Zi402opHE6OoEljGgQ2714p1Gig8Py1IXrU,10060
|
|
40
|
+
emmet/builders/molecules/redox.py,sha256=52er0zK_IVQ0UYUh7svml-zwlvOtbptz1D8ZYCglfI0,18448
|
|
41
|
+
emmet/builders/molecules/summary.py,sha256=7KHsnc9PBstps6s-hK4mYrWOBHt26_PTPKsCSFw3018,13848
|
|
42
|
+
emmet/builders/molecules/thermo.py,sha256=MutvuYJsU0Hj5Qaa_Z7qnikM6mkPSBPNE4RG8JK4qes,19874
|
|
43
|
+
emmet/builders/molecules/trajectory.py,sha256=oKrmWtKJ6mC0d1uJRE7g72X97kkS7JQ7nMhupVOUEUU,18163
|
|
44
|
+
emmet/builders/molecules/vibration.py,sha256=_FA-tRixghsJdlls6oO9U2abxCHWqWv5SucbxpP5mVQ,9520
|
|
43
45
|
emmet/builders/qchem/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
44
|
-
emmet/builders/qchem/molecules.py,sha256=
|
|
46
|
+
emmet/builders/qchem/molecules.py,sha256=qI8WFOJ69FMPu9hxGzoFR4V3y4qO7UhtzVZwG9AWPpw,26382
|
|
45
47
|
emmet/builders/vasp/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
46
48
|
emmet/builders/vasp/materials.py,sha256=5bjP-W5-gmSjDzmcHdF7bviwgk4ywUceCL4FcF9Ya9c,12700
|
|
47
49
|
emmet/builders/vasp/mp_potcar_stats.json.gz,sha256=RD6gbZEmmmKQYRKpFtEKHzncGO1WsLYMPjn3wvONrIc,291869
|
|
48
50
|
emmet/builders/vasp/task_validator.py,sha256=bmRTDiOWof4rpHVg3ksoxocN9xxieYu7IE-ylMjYOVs,2922
|
|
49
|
-
emmet_builders-0.84.
|
|
50
|
-
emmet_builders-0.84.
|
|
51
|
-
emmet_builders-0.84.
|
|
52
|
-
emmet_builders-0.84.
|
|
51
|
+
emmet_builders-0.84.2rc7.dist-info/METADATA,sha256=QqGWpUDogkBmmZbyiHsduf3OWrUR3lfjmlBZigf8Kzk,2162
|
|
52
|
+
emmet_builders-0.84.2rc7.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
|
|
53
|
+
emmet_builders-0.84.2rc7.dist-info/top_level.txt,sha256=6GcpbmWPeFhNCTfDFilb8GQ4T1UQu4z9c5jpobjwE-Q,6
|
|
54
|
+
emmet_builders-0.84.2rc7.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|