emmet-builders 0.84.3rc2__py3-none-any.whl → 0.84.3rc3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of emmet-builders might be problematic. Click here for more details.

@@ -27,7 +27,7 @@ class VibrationBuilder(Builder):
27
27
  each solvent available).
28
28
 
29
29
  The process is as follows:
30
- 1. Gather MoleculeDocs by formula
30
+ 1. Gather MoleculeDocs by species hash
31
31
  2. For each doc, sort tasks by solvent
32
32
  3. For each solvent, grab the best TaskDoc (doc with vibrational
33
33
  information that has the highest level of theory with lowest
@@ -73,12 +73,14 @@ class VibrationBuilder(Builder):
73
73
  self.tasks.ensure_index("last_updated")
74
74
  self.tasks.ensure_index("state")
75
75
  self.tasks.ensure_index("formula_alphabetical")
76
+ self.tasks.ensure_index("species_hash")
76
77
 
77
78
  # Search index for molecules
78
79
  self.molecules.ensure_index("molecule_id")
79
80
  self.molecules.ensure_index("last_updated")
80
81
  self.molecules.ensure_index("task_ids")
81
82
  self.molecules.ensure_index("formula_alphabetical")
83
+ self.molecules.ensure_index("species_hash")
82
84
 
83
85
  # Search index for vibrational properties
84
86
  self.vibes.ensure_index("molecule_id")
@@ -97,23 +99,23 @@ class VibrationBuilder(Builder):
97
99
 
98
100
  self.logger.info("Finding documents to process")
99
101
  all_mols = list(
100
- self.molecules.query(
101
- temp_query, [self.molecules.key, "formula_alphabetical"]
102
- )
102
+ self.molecules.query(temp_query, [self.molecules.key, "species_hash"])
103
103
  )
104
104
 
105
105
  processed_docs = set([e for e in self.vibes.distinct("molecule_id")])
106
106
  to_process_docs = {d[self.molecules.key] for d in all_mols} - processed_docs
107
- to_process_forms = {
108
- d["formula_alphabetical"]
107
+ to_process_hashes = {
108
+ d["species_hash"]
109
109
  for d in all_mols
110
110
  if d[self.molecules.key] in to_process_docs
111
111
  }
112
112
 
113
- N = ceil(len(to_process_forms) / number_splits)
113
+ N = ceil(len(to_process_hashes) / number_splits)
114
114
 
115
- for formula_chunk in grouper(to_process_forms, N):
116
- yield {"query": {"formula_alphabetical": {"$in": list(formula_chunk)}}}
115
+ for hash_chunk in grouper(to_process_hashes, N):
116
+ query = dict(temp_query)
117
+ query["species_hash"] = {"$in": list(hash_chunk)}
118
+ yield {"query": query}
117
119
 
118
120
  def get_items(self) -> Iterator[List[Dict]]:
119
121
  """
@@ -138,28 +140,26 @@ class VibrationBuilder(Builder):
138
140
 
139
141
  self.logger.info("Finding documents to process")
140
142
  all_mols = list(
141
- self.molecules.query(
142
- temp_query, [self.molecules.key, "formula_alphabetical"]
143
- )
143
+ self.molecules.query(temp_query, [self.molecules.key, "species_hash"])
144
144
  )
145
145
 
146
146
  processed_docs = set([e for e in self.vibes.distinct("molecule_id")])
147
147
  to_process_docs = {d[self.molecules.key] for d in all_mols} - processed_docs
148
- to_process_forms = {
149
- d["formula_alphabetical"]
148
+ to_process_hashes = {
149
+ d["species_hash"]
150
150
  for d in all_mols
151
151
  if d[self.molecules.key] in to_process_docs
152
152
  }
153
153
 
154
154
  self.logger.info(f"Found {len(to_process_docs)} unprocessed documents")
155
- self.logger.info(f"Found {len(to_process_forms)} unprocessed formulas")
155
+ self.logger.info(f"Found {len(to_process_hashes)} unprocessed hashes")
156
156
 
157
157
  # Set total for builder bars to have a total
158
- self.total = len(to_process_forms)
158
+ self.total = len(to_process_hashes)
159
159
 
160
- for formula in to_process_forms:
160
+ for shash in to_process_hashes:
161
161
  mol_query = dict(temp_query)
162
- mol_query["formula_alphabetical"] = formula
162
+ mol_query["species_hash"] = shash
163
163
  molecules = list(self.molecules.query(criteria=mol_query))
164
164
 
165
165
  yield molecules
@@ -176,9 +176,9 @@ class VibrationBuilder(Builder):
176
176
  """
177
177
 
178
178
  mols = [MoleculeDoc(**item) for item in items]
179
- formula = mols[0].formula_alphabetical
179
+ shash = mols[0].species_hash
180
180
  mol_ids = [m.molecule_id for m in mols]
181
- self.logger.debug(f"Processing {formula} : {mol_ids}")
181
+ self.logger.debug(f"Processing {shash} : {mol_ids}")
182
182
 
183
183
  vibe_docs = list()
184
184
 
@@ -213,7 +213,7 @@ class VibrationBuilder(Builder):
213
213
  tdoc = self.tasks.query_one(
214
214
  {
215
215
  "task_id": task,
216
- "formula_alphabetical": formula,
216
+ "species_hash": shash,
217
217
  "orig": {"$exists": True},
218
218
  }
219
219
  )
@@ -223,7 +223,7 @@ class VibrationBuilder(Builder):
223
223
  tdoc = self.tasks.query_one(
224
224
  {
225
225
  "task_id": int(task),
226
- "formula_alphabetical": formula,
226
+ "species_hash": shash,
227
227
  "orig": {"$exists": True},
228
228
  }
229
229
  )
@@ -243,7 +243,7 @@ class VibrationBuilder(Builder):
243
243
  )
244
244
  vibe_docs.append(vibe_doc)
245
245
 
246
- self.logger.debug(f"Produced {len(vibe_docs)} vibration docs for {formula}")
246
+ self.logger.debug(f"Produced {len(vibe_docs)} vibration docs for {shash}")
247
247
 
248
248
  return jsanitize([doc.model_dump() for doc in vibe_docs], allow_bson=True)
249
249
 
@@ -138,6 +138,7 @@ class MoleculesAssociationBuilder(Builder):
138
138
  self.tasks.ensure_index("formula_alphabetical")
139
139
  self.tasks.ensure_index("smiles")
140
140
  self.tasks.ensure_index("species_hash")
141
+ self.tasks.ensure_index("coord_hash")
141
142
 
142
143
  # Search index for molecules
143
144
  self.assoc.ensure_index("molecule_id")
@@ -165,7 +166,9 @@ class MoleculesAssociationBuilder(Builder):
165
166
  N = ceil(len(to_process_hashes) / number_splits)
166
167
 
167
168
  for hash_chunk in grouper(to_process_hashes, N):
168
- yield {"query": {"species_hash": {"$in": list(hash_chunk)}}}
169
+ query = dict(temp_query)
170
+ query["species_hash"] = {"$in": list(hash_chunk)}
171
+ yield {"query": query}
169
172
 
170
173
  def get_items(self) -> Iterator[List[TaskDocument]]:
171
174
  """
@@ -390,6 +393,7 @@ class MoleculesBuilder(Builder):
390
393
  self.assoc.ensure_index("last_updated")
391
394
  self.assoc.ensure_index("task_ids")
392
395
  self.assoc.ensure_index("formula_alphabetical")
396
+ self.assoc.ensure_index("species_hash")
393
397
 
394
398
  # Search index for molecules
395
399
  self.molecules.ensure_index("molecule_id")
@@ -433,16 +437,18 @@ class MoleculesBuilder(Builder):
433
437
  xyz_species_id_map[d[self.assoc.key]] = this_id
434
438
  to_process_docs = assoc_ids - processed_docs
435
439
 
436
- to_process_forms = {
437
- d["formula_alphabetical"]
440
+ to_process_hashes = {
441
+ d["species_hash"]
438
442
  for d in all_assoc
439
443
  if xyz_species_id_map[d[self.assoc.key]] in to_process_docs
440
444
  }
441
445
 
442
- N = ceil(len(to_process_forms) / number_splits)
446
+ N = ceil(len(to_process_hashes) / number_splits)
443
447
 
444
- for formula_chunk in grouper(to_process_forms, N):
445
- yield {"query": {"formula_alphabetical": {"$in": list(formula_chunk)}}}
448
+ for hash_chunk in grouper(to_process_hashes, N):
449
+ query = dict(temp_query)
450
+ query["species_hash"] = {"$in": list(hash_chunk)}
451
+ yield {"query": query}
446
452
 
447
453
  def get_items(self) -> Iterator[List[Dict]]:
448
454
  """
@@ -495,21 +501,21 @@ class MoleculesBuilder(Builder):
495
501
  xyz_species_id_map[d[self.assoc.key]] = this_id
496
502
  to_process_docs = assoc_ids - processed_docs
497
503
 
498
- to_process_forms = {
499
- d["formula_alphabetical"]
504
+ to_process_hashes = {
505
+ d["species_hash"]
500
506
  for d in all_assoc
501
507
  if xyz_species_id_map[d[self.assoc.key]] in to_process_docs
502
508
  }
503
509
 
504
510
  self.logger.info(f"Found {len(to_process_docs)} unprocessed documents")
505
- self.logger.info(f"Found {len(to_process_forms)} unprocessed formulas")
511
+ self.logger.info(f"Found {len(to_process_hashes)} unprocessed hashes")
506
512
 
507
513
  # Set total for builder bars to have a total
508
- self.total = len(to_process_forms)
514
+ self.total = len(to_process_hashes)
509
515
 
510
- for formula in to_process_forms:
516
+ for shash in to_process_hashes:
511
517
  assoc_query = dict(temp_query)
512
- assoc_query["formula_alphabetical"] = formula
518
+ assoc_query["species_hash"] = shash
513
519
  assoc = list(self.assoc.query(criteria=assoc_query))
514
520
 
515
521
  yield assoc
@@ -526,9 +532,9 @@ class MoleculesBuilder(Builder):
526
532
  """
527
533
 
528
534
  assoc = [MoleculeDoc(**item) for item in items]
529
- formula = assoc[0].formula_alphabetical
535
+ shash = assoc[0].species_hash
530
536
  mol_ids = [a.molecule_id for a in assoc]
531
- self.logger.debug(f"Processing {formula} : {mol_ids}")
537
+ self.logger.debug(f"Processing {shash} : {mol_ids}")
532
538
 
533
539
  complete_mol_docs = list()
534
540
 
@@ -646,7 +652,7 @@ class MoleculesBuilder(Builder):
646
652
 
647
653
  complete_mol_docs.append(base_doc)
648
654
 
649
- self.logger.debug(f"Produced {len(complete_mol_docs)} molecules for {formula}")
655
+ self.logger.debug(f"Produced {len(complete_mol_docs)} molecules for {shash}")
650
656
 
651
657
  return jsanitize(
652
658
  [mol.model_dump() for mol in complete_mol_docs], allow_bson=True
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: emmet-builders
3
- Version: 0.84.3rc2
3
+ Version: 0.84.3rc3
4
4
  Summary: Builders for the Emmet Library
5
5
  Home-page: https://github.com/materialsproject/emmet
6
6
  Author: The Materials Project
@@ -32,21 +32,23 @@ emmet/builders/matscholar/missing_compositions.py,sha256=RGQOEhfmJ6YMbjD4osLWqs7
32
32
  emmet/builders/mobility/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
33
33
  emmet/builders/mobility/migration_graph.py,sha256=WEXtPSn0UE5Q8mnvJ-T19FB3_LrZ3ojvNyRBs1PXWRg,3923
34
34
  emmet/builders/molecules/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
35
- emmet/builders/molecules/atomic.py,sha256=X590oMDIPaHJMk0Xuy_r4hATm1TEj0hKfJD6ofi1asg,20823
36
- emmet/builders/molecules/bonds.py,sha256=5orXkCBgBBOqz1iGoacDUb5iGMsOPVmdmghtQLud9ZU,12086
37
- emmet/builders/molecules/metal_binding.py,sha256=gyL5Nu1pmu0ZJq4HgTxwLh8_1696pJiMnrFggFESnjo,23317
38
- emmet/builders/molecules/orbitals.py,sha256=W_7_3zz9bFfHQZgAMdp3PSSt4PDH4DVZVervHPrv1Pk,10041
39
- emmet/builders/molecules/redox.py,sha256=HHmj-nFMTEV7qq3g3GM2lB5RdLUMBE-xOIZogIgmORc,18427
40
- emmet/builders/molecules/summary.py,sha256=I9-4-oKoUSg5sxvr-CHYVIuCyD48mpV9rsMno4pbbOk,13198
41
- emmet/builders/molecules/thermo.py,sha256=DizVM9rLXo7AhHW3cq0Bo6vO1OI6YtK4PeIVixmt47g,19855
42
- emmet/builders/molecules/vibration.py,sha256=9LNeKh8BHck-ooW4XzAZAFeio2u6bDwdsUV1aA5XVb4,9501
35
+ emmet/builders/molecules/atomic.py,sha256=DBG_FScwT7YU1GgI69yRrR3_wUoWBwiP1uBvbL7xP3Y,20839
36
+ emmet/builders/molecules/bonds.py,sha256=aHp9U_LX3sZvdTIaZj2T8PG_wTFYqw6nOcYC-4MiI0E,12103
37
+ emmet/builders/molecules/electric.py,sha256=ldoLSfIAjue6YQlyXkgJRXLcfh178goiarjI_f_EtH4,10065
38
+ emmet/builders/molecules/metal_binding.py,sha256=kimYsmQWdmukRRX2_GgVywCNRhNHz_-fp2oMYJTVMrg,23308
39
+ emmet/builders/molecules/orbitals.py,sha256=nIsmx0m6Zi402opHE6OoEljGgQ2714p1Gig8Py1IXrU,10060
40
+ emmet/builders/molecules/redox.py,sha256=52er0zK_IVQ0UYUh7svml-zwlvOtbptz1D8ZYCglfI0,18448
41
+ emmet/builders/molecules/summary.py,sha256=7KHsnc9PBstps6s-hK4mYrWOBHt26_PTPKsCSFw3018,13848
42
+ emmet/builders/molecules/thermo.py,sha256=MutvuYJsU0Hj5Qaa_Z7qnikM6mkPSBPNE4RG8JK4qes,19874
43
+ emmet/builders/molecules/trajectory.py,sha256=oKrmWtKJ6mC0d1uJRE7g72X97kkS7JQ7nMhupVOUEUU,18163
44
+ emmet/builders/molecules/vibration.py,sha256=_FA-tRixghsJdlls6oO9U2abxCHWqWv5SucbxpP5mVQ,9520
43
45
  emmet/builders/qchem/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
44
- emmet/builders/qchem/molecules.py,sha256=CZyVQzjfb-_gAS997BFbd9xkKwvwPWrquNH0Aoy6oY8,26217
46
+ emmet/builders/qchem/molecules.py,sha256=qI8WFOJ69FMPu9hxGzoFR4V3y4qO7UhtzVZwG9AWPpw,26382
45
47
  emmet/builders/vasp/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
46
48
  emmet/builders/vasp/materials.py,sha256=DWMwZFQ5ZYf9tBkgrJ_MfvSwt5GO4Yqm9GBKPpLxvWU,12890
47
49
  emmet/builders/vasp/mp_potcar_stats.json.gz,sha256=x3bn4gSMj1U_3bR2qKIaBtbJlYT-EJgoUIMFTA9bvaU,338957
48
50
  emmet/builders/vasp/task_validator.py,sha256=bmRTDiOWof4rpHVg3ksoxocN9xxieYu7IE-ylMjYOVs,2922
49
- emmet_builders-0.84.3rc2.dist-info/METADATA,sha256=1Hzqb1RmyYEkCpdGqvg7OVyrM-Hy0H_LgFpGo4ccN04,2162
50
- emmet_builders-0.84.3rc2.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
51
- emmet_builders-0.84.3rc2.dist-info/top_level.txt,sha256=6GcpbmWPeFhNCTfDFilb8GQ4T1UQu4z9c5jpobjwE-Q,6
52
- emmet_builders-0.84.3rc2.dist-info/RECORD,,
51
+ emmet_builders-0.84.3rc3.dist-info/METADATA,sha256=wZYHUNmrorHzpbC3RKur4fdbpXEYYj5YaWNQhqYYfQg,2162
52
+ emmet_builders-0.84.3rc3.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
53
+ emmet_builders-0.84.3rc3.dist-info/top_level.txt,sha256=6GcpbmWPeFhNCTfDFilb8GQ4T1UQu4z9c5jpobjwE-Q,6
54
+ emmet_builders-0.84.3rc3.dist-info/RECORD,,