emmet-builders 0.84.2rc8__tar.gz → 0.84.2rc9__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of emmet-builders might be problematic. Click here for more details.

Files changed (97) hide show
  1. {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/PKG-INFO +1 -1
  2. {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/emmet/builders/molecules/atomic.py +48 -46
  3. {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/emmet/builders/molecules/bonds.py +24 -24
  4. {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/emmet/builders/molecules/metal_binding.py +21 -20
  5. {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/emmet/builders/molecules/orbitals.py +23 -23
  6. {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/emmet/builders/molecules/redox.py +27 -27
  7. {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/emmet/builders/molecules/summary.py +21 -36
  8. {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/emmet/builders/molecules/thermo.py +23 -23
  9. {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/emmet/builders/molecules/vibration.py +23 -23
  10. {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/emmet/builders/qchem/molecules.py +15 -21
  11. {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/emmet_builders.egg-info/PKG-INFO +1 -1
  12. {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/emmet_builders.egg-info/SOURCES.txt +0 -4
  13. emmet-builders-0.84.2rc8/tests/molecules/test_electric.py → emmet-builders-0.84.2rc9/tests/molecules/test_orbitals.py +7 -7
  14. emmet-builders-0.84.2rc9/tests/molecules/test_summary.py +132 -0
  15. emmet-builders-0.84.2rc8/emmet/builders/molecules/electric.py +0 -282
  16. emmet-builders-0.84.2rc8/emmet/builders/molecules/trajectory.py +0 -525
  17. emmet-builders-0.84.2rc8/tests/molecules/test_orbitals.py +0 -70
  18. emmet-builders-0.84.2rc8/tests/molecules/test_summary.py +0 -222
  19. emmet-builders-0.84.2rc8/tests/molecules/test_trajectory.py +0 -50
  20. {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/MANIFEST.in +0 -0
  21. {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/emmet/builders/__init__.py +0 -0
  22. {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/emmet/builders/abinit/__init__.py +0 -0
  23. {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/emmet/builders/abinit/phonon.py +0 -0
  24. {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/emmet/builders/abinit/sound_velocity.py +0 -0
  25. {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/emmet/builders/feff/__init__.py +0 -0
  26. {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/emmet/builders/feff/xas.py +0 -0
  27. {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/emmet/builders/materials/__init__.py +0 -0
  28. {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/emmet/builders/materials/absorption_spectrum.py +0 -0
  29. {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/emmet/builders/materials/alloys.py +0 -0
  30. {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/emmet/builders/materials/basic_descriptors.py +0 -0
  31. {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/emmet/builders/materials/bonds.py +0 -0
  32. {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/emmet/builders/materials/chemenv.py +0 -0
  33. {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/emmet/builders/materials/corrected_entries.py +0 -0
  34. {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/emmet/builders/materials/dielectric.py +0 -0
  35. {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/emmet/builders/materials/elasticity.py +0 -0
  36. {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/emmet/builders/materials/electrodes.py +0 -0
  37. {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/emmet/builders/materials/electronic_structure.py +0 -0
  38. {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/emmet/builders/materials/magnetism.py +0 -0
  39. {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/emmet/builders/materials/ml.py +0 -0
  40. {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/emmet/builders/materials/optimade.py +0 -0
  41. {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/emmet/builders/materials/oxidation_states.py +0 -0
  42. {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/emmet/builders/materials/piezoelectric.py +0 -0
  43. {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/emmet/builders/materials/provenance.py +0 -0
  44. {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/emmet/builders/materials/robocrys.py +0 -0
  45. {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/emmet/builders/materials/similarity.py +0 -0
  46. {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/emmet/builders/materials/substrates.py +0 -0
  47. {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/emmet/builders/materials/summary.py +0 -0
  48. {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/emmet/builders/materials/thermo.py +0 -0
  49. {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/emmet/builders/matscholar/missing_compositions.py +0 -0
  50. {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/emmet/builders/mobility/__init__.py +0 -0
  51. {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/emmet/builders/mobility/migration_graph.py +0 -0
  52. {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/emmet/builders/molecules/__init__.py +0 -0
  53. {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/emmet/builders/qchem/__init__.py +0 -0
  54. {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/emmet/builders/settings.py +0 -0
  55. {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/emmet/builders/utils.py +0 -0
  56. {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/emmet/builders/vasp/__init__.py +0 -0
  57. {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/emmet/builders/vasp/materials.py +0 -0
  58. {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/emmet/builders/vasp/mp_potcar_stats.json.gz +0 -0
  59. {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/emmet/builders/vasp/task_validator.py +0 -0
  60. {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/emmet_builders.egg-info/dependency_links.txt +0 -0
  61. {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/emmet_builders.egg-info/not-zip-safe +0 -0
  62. {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/emmet_builders.egg-info/requires.txt +0 -0
  63. {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/emmet_builders.egg-info/top_level.txt +0 -0
  64. {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/requirements/deployment.txt +0 -0
  65. {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/requirements/ubuntu-latest_py3.10.txt +0 -0
  66. {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/requirements/ubuntu-latest_py3.10_extras.txt +0 -0
  67. {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/requirements/ubuntu-latest_py3.11.txt +0 -0
  68. {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/requirements/ubuntu-latest_py3.11_extras.txt +0 -0
  69. {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/setup.cfg +0 -0
  70. {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/setup.py +0 -0
  71. {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/tests/__init__.py +0 -0
  72. {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/tests/conftest.py +0 -0
  73. {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/tests/molecules/__init__.py +0 -0
  74. {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/tests/molecules/test_atomic.py +0 -0
  75. {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/tests/molecules/test_bonds.py +0 -0
  76. {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/tests/molecules/test_metal_binding.py +0 -0
  77. {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/tests/molecules/test_redox.py +0 -0
  78. {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/tests/molecules/test_thermo.py +0 -0
  79. {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/tests/molecules/test_vibration.py +0 -0
  80. {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/tests/test_absorption.py +0 -0
  81. {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/tests/test_basic_descriptors.py +0 -0
  82. {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/tests/test_chemenv.py +0 -0
  83. {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/tests/test_corrected_entries_thermo.py +0 -0
  84. {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/tests/test_dielectric.py +0 -0
  85. {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/tests/test_elasticity.py +0 -0
  86. {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/tests/test_electronic_structure.py +0 -0
  87. {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/tests/test_magnetism.py +0 -0
  88. {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/tests/test_materials.py +0 -0
  89. {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/tests/test_ml.py +0 -0
  90. {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/tests/test_mobility.py +0 -0
  91. {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/tests/test_oxidation.py +0 -0
  92. {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/tests/test_piezoelectric.py +0 -0
  93. {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/tests/test_qchem.py +0 -0
  94. {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/tests/test_similarity.py +0 -0
  95. {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/tests/test_summary.py +0 -0
  96. {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/tests/test_utils.py +0 -0
  97. {emmet-builders-0.84.2rc8 → emmet-builders-0.84.2rc9}/tests/test_vasp.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: emmet-builders
3
- Version: 0.84.2rc8
3
+ Version: 0.84.2rc9
4
4
  Summary: Builders for the Emmet Library
5
5
  Home-page: https://github.com/materialsproject/emmet
6
6
  Author: The Materials Project
@@ -41,7 +41,7 @@ class PartialChargesBuilder(Builder):
41
41
  energy) will be used.
42
42
 
43
43
  The process is as follows:
44
- 1. Gather MoleculeDocs by species hash
44
+ 1. Gather MoleculeDocs by formula
45
45
  2. For each molecule, group all tasks by solvent.
46
46
  3. For each solvent, sort tasks by level of theory and electronic energy
47
47
  4. For each method:
@@ -86,14 +86,12 @@ class PartialChargesBuilder(Builder):
86
86
  self.tasks.ensure_index("last_updated")
87
87
  self.tasks.ensure_index("state")
88
88
  self.tasks.ensure_index("formula_alphabetical")
89
- self.tasks.ensure_index("species_hash")
90
89
 
91
90
  # Search index for molecules
92
91
  self.molecules.ensure_index("molecule_id")
93
92
  self.molecules.ensure_index("last_updated")
94
93
  self.molecules.ensure_index("task_ids")
95
94
  self.molecules.ensure_index("formula_alphabetical")
96
- self.molecules.ensure_index("species_hash")
97
95
 
98
96
  # Search index for charges
99
97
  self.charges.ensure_index("molecule_id")
@@ -113,23 +111,23 @@ class PartialChargesBuilder(Builder):
113
111
 
114
112
  self.logger.info("Finding documents to process")
115
113
  all_mols = list(
116
- self.molecules.query(temp_query, [self.molecules.key, "species_hash"])
114
+ self.molecules.query(
115
+ temp_query, [self.molecules.key, "formula_alphabetical"]
116
+ )
117
117
  )
118
118
 
119
119
  processed_docs = set([e for e in self.charges.distinct("molecule_id")])
120
120
  to_process_docs = {d[self.molecules.key] for d in all_mols} - processed_docs
121
- to_process_hashes = {
122
- d["species_hash"]
121
+ to_process_forms = {
122
+ d["formula_alphabetical"]
123
123
  for d in all_mols
124
124
  if d[self.molecules.key] in to_process_docs
125
125
  }
126
126
 
127
- N = ceil(len(to_process_hashes) / number_splits)
127
+ N = ceil(len(to_process_forms) / number_splits)
128
128
 
129
- for hash_chunk in grouper(to_process_hashes, N):
130
- query = dict(temp_query)
131
- query["species_hash"] = {"$in": list(hash_chunk)}
132
- yield {"query": query}
129
+ for formula_chunk in grouper(to_process_forms, N):
130
+ yield {"query": {"formula_alphabetical": {"$in": list(formula_chunk)}}}
133
131
 
134
132
  def get_items(self) -> Iterator[List[Dict]]:
135
133
  """
@@ -154,26 +152,28 @@ class PartialChargesBuilder(Builder):
154
152
 
155
153
  self.logger.info("Finding documents to process")
156
154
  all_mols = list(
157
- self.molecules.query(temp_query, [self.molecules.key, "species_hash"])
155
+ self.molecules.query(
156
+ temp_query, [self.molecules.key, "formula_alphabetical"]
157
+ )
158
158
  )
159
159
 
160
160
  processed_docs = set([e for e in self.charges.distinct("molecule_id")])
161
161
  to_process_docs = {d[self.molecules.key] for d in all_mols} - processed_docs
162
- to_process_hashes = {
163
- d["species_hash"]
162
+ to_process_forms = {
163
+ d["formula_alphabetical"]
164
164
  for d in all_mols
165
165
  if d[self.molecules.key] in to_process_docs
166
166
  }
167
167
 
168
168
  self.logger.info(f"Found {len(to_process_docs)} unprocessed documents")
169
- self.logger.info(f"Found {len(to_process_hashes)} unprocessed hashes")
169
+ self.logger.info(f"Found {len(to_process_forms)} unprocessed formulas")
170
170
 
171
171
  # Set total for builder bars to have a total
172
- self.total = len(to_process_hashes)
172
+ self.total = len(to_process_forms)
173
173
 
174
- for shash in to_process_hashes:
174
+ for formula in to_process_forms:
175
175
  mol_query = dict(temp_query)
176
- mol_query["species_hash"] = shash
176
+ mol_query["formula_alphabetical"] = formula
177
177
  molecules = list(self.molecules.query(criteria=mol_query))
178
178
 
179
179
  yield molecules
@@ -190,9 +190,9 @@ class PartialChargesBuilder(Builder):
190
190
  """
191
191
 
192
192
  mols = [MoleculeDoc(**item) for item in items]
193
- shash = mols[0].species_hash
193
+ formula = mols[0].formula_alphabetical
194
194
  mol_ids = [m.molecule_id for m in mols]
195
- self.logger.debug(f"Processing {shash} : {mol_ids}")
195
+ self.logger.debug(f"Processing {formula} : {mol_ids}")
196
196
 
197
197
  charges_docs = list()
198
198
 
@@ -237,7 +237,7 @@ class PartialChargesBuilder(Builder):
237
237
  tdoc = self.tasks.query_one(
238
238
  {
239
239
  "task_id": task,
240
- "species_hash": shash,
240
+ "formula_alphabetical": formula,
241
241
  "orig": {"$exists": True},
242
242
  }
243
243
  )
@@ -247,7 +247,7 @@ class PartialChargesBuilder(Builder):
247
247
  tdoc = self.tasks.query_one(
248
248
  {
249
249
  "task_id": int(task),
250
- "species_hash": shash,
250
+ "formula_alphabetical": formula,
251
251
  "orig": {"$exists": True},
252
252
  }
253
253
  )
@@ -271,7 +271,7 @@ class PartialChargesBuilder(Builder):
271
271
 
272
272
  charges_docs.append(doc)
273
273
 
274
- self.logger.debug(f"Produced {len(charges_docs)} charges docs for {shash}")
274
+ self.logger.debug(f"Produced {len(charges_docs)} charges docs for {formula}")
275
275
 
276
276
  return jsanitize([doc.model_dump() for doc in charges_docs], allow_bson=True)
277
277
 
@@ -320,7 +320,7 @@ class PartialSpinsBuilder(Builder):
320
320
  data available (based on level of theory and electronic energy) will be used.
321
321
 
322
322
  The process is as follows:
323
- 1. Gather MoleculeDocs by species_hash
323
+ 1. Gather MoleculeDocs by formula
324
324
  2. For each molecule, group all tasks by solvent.
325
325
  3. For each solvent, sort tasks by level of theory and electronic energy
326
326
  4. For each method:
@@ -365,14 +365,12 @@ class PartialSpinsBuilder(Builder):
365
365
  self.tasks.ensure_index("last_updated")
366
366
  self.tasks.ensure_index("state")
367
367
  self.tasks.ensure_index("formula_alphabetical")
368
- self.tasks.ensure_index("species_hash")
369
368
 
370
369
  # Search index for molecules
371
370
  self.molecules.ensure_index("molecule_id")
372
371
  self.molecules.ensure_index("last_updated")
373
372
  self.molecules.ensure_index("task_ids")
374
373
  self.molecules.ensure_index("formula_alphabetical")
375
- self.molecules.ensure_index("species_hash")
376
374
 
377
375
  # Search index for spins
378
376
  self.spins.ensure_index("molecule_id")
@@ -392,23 +390,23 @@ class PartialSpinsBuilder(Builder):
392
390
 
393
391
  self.logger.info("Finding documents to process")
394
392
  all_mols = list(
395
- self.molecules.query(temp_query, [self.molecules.key, "species_hash"])
393
+ self.molecules.query(
394
+ temp_query, [self.molecules.key, "formula_alphabetical"]
395
+ )
396
396
  )
397
397
 
398
398
  processed_docs = set([e for e in self.spins.distinct("molecule_id")])
399
399
  to_process_docs = {d[self.molecules.key] for d in all_mols} - processed_docs
400
- to_process_hashes = {
401
- d["species_hash"]
400
+ to_process_forms = {
401
+ d["formula_alphabetical"]
402
402
  for d in all_mols
403
403
  if d[self.molecules.key] in to_process_docs
404
404
  }
405
405
 
406
- N = ceil(len(to_process_hashes) / number_splits)
406
+ N = ceil(len(to_process_forms) / number_splits)
407
407
 
408
- for hash_chunk in grouper(to_process_hashes, N):
409
- query = dict(temp_query)
410
- query["species_hash"] = {"$in": list(hash_chunk)}
411
- yield {"query": query}
408
+ for formula_chunk in grouper(to_process_forms, N):
409
+ yield {"query": {"formula_alphabetical": {"$in": list(formula_chunk)}}}
412
410
 
413
411
  def get_items(self) -> Iterator[List[Dict]]:
414
412
  """
@@ -433,26 +431,28 @@ class PartialSpinsBuilder(Builder):
433
431
 
434
432
  self.logger.info("Finding documents to process")
435
433
  all_mols = list(
436
- self.molecules.query(temp_query, [self.molecules.key, "species_hash"])
434
+ self.molecules.query(
435
+ temp_query, [self.molecules.key, "formula_alphabetical"]
436
+ )
437
437
  )
438
438
 
439
439
  processed_docs = set([e for e in self.spins.distinct("molecule_id")])
440
440
  to_process_docs = {d[self.molecules.key] for d in all_mols} - processed_docs
441
- to_process_hashes = {
442
- d["species_hash"]
441
+ to_process_forms = {
442
+ d["formula_alphabetical"]
443
443
  for d in all_mols
444
444
  if d[self.molecules.key] in to_process_docs
445
445
  }
446
446
 
447
447
  self.logger.info(f"Found {len(to_process_docs)} unprocessed documents")
448
- self.logger.info(f"Found {len(to_process_hashes)} unprocessed hashes")
448
+ self.logger.info(f"Found {len(to_process_forms)} unprocessed formulas")
449
449
 
450
450
  # Set total for builder bars to have a total
451
- self.total = len(to_process_hashes)
451
+ self.total = len(to_process_forms)
452
452
 
453
- for shash in to_process_hashes:
453
+ for formula in to_process_forms:
454
454
  mol_query = dict(temp_query)
455
- mol_query["species_hash"] = shash
455
+ mol_query["formula_alphabetical"] = formula
456
456
  molecules = list(self.molecules.query(criteria=mol_query))
457
457
 
458
458
  yield molecules
@@ -469,9 +469,9 @@ class PartialSpinsBuilder(Builder):
469
469
  """
470
470
 
471
471
  mols = [MoleculeDoc(**item) for item in items]
472
- shash = mols[0].species_hash
472
+ formula = mols[0].formula_alphabetical
473
473
  mol_ids = [m.molecule_id for m in mols]
474
- self.logger.debug(f"Processing {shash} : {mol_ids}")
474
+ self.logger.debug(f"Processing {formula} : {mol_ids}")
475
475
 
476
476
  spins_docs = list()
477
477
 
@@ -520,7 +520,7 @@ class PartialSpinsBuilder(Builder):
520
520
  tdoc = self.tasks.query_one(
521
521
  {
522
522
  "task_id": task,
523
- "species_hash": shash,
523
+ "formula_alphabetical": formula,
524
524
  "orig": {"$exists": True},
525
525
  }
526
526
  )
@@ -530,7 +530,7 @@ class PartialSpinsBuilder(Builder):
530
530
  tdoc = self.tasks.query_one(
531
531
  {
532
532
  "task_id": int(task),
533
- "species_hash": shash,
533
+ "formula_alphabetical": formula,
534
534
  "orig": {"$exists": True},
535
535
  }
536
536
  )
@@ -551,7 +551,9 @@ class PartialSpinsBuilder(Builder):
551
551
 
552
552
  spins_docs.append(doc)
553
553
 
554
- self.logger.debug(f"Produced {len(spins_docs)} partial spins docs for {shash}")
554
+ self.logger.debug(
555
+ f"Produced {len(spins_docs)} partial spins docs for {formula}"
556
+ )
555
557
 
556
558
  return jsanitize([doc.model_dump() for doc in spins_docs], allow_bson=True)
557
559
 
@@ -40,7 +40,7 @@ class BondingBuilder(Builder):
40
40
  data available (based on level of theory and electronic energy) will be used.
41
41
 
42
42
  The process is as follows:
43
- 1. Gather MoleculeDocs by species hash
43
+ 1. Gather MoleculeDocs by formula
44
44
  2. For each molecule, group all tasks by solvent.
45
45
  3. For each solvent, sort tasks by level of theory and electronic energy
46
46
  4. For each method:
@@ -85,14 +85,12 @@ class BondingBuilder(Builder):
85
85
  self.tasks.ensure_index("last_updated")
86
86
  self.tasks.ensure_index("state")
87
87
  self.tasks.ensure_index("formula_alphabetical")
88
- self.tasks.ensure_index("species_hash")
89
88
 
90
89
  # Search index for molecules
91
90
  self.molecules.ensure_index("molecule_id")
92
91
  self.molecules.ensure_index("last_updated")
93
92
  self.molecules.ensure_index("task_ids")
94
93
  self.molecules.ensure_index("formula_alphabetical")
95
- self.molecules.ensure_index("species_hash")
96
94
 
97
95
  # Search index for bonds
98
96
  self.bonds.ensure_index("molecule_id")
@@ -112,23 +110,23 @@ class BondingBuilder(Builder):
112
110
 
113
111
  self.logger.info("Finding documents to process")
114
112
  all_mols = list(
115
- self.molecules.query(temp_query, [self.molecules.key, "species_hash"])
113
+ self.molecules.query(
114
+ temp_query, [self.molecules.key, "formula_alphabetical"]
115
+ )
116
116
  )
117
117
 
118
118
  processed_docs = set([e for e in self.bonds.distinct("molecule_id")])
119
119
  to_process_docs = {d[self.molecules.key] for d in all_mols} - processed_docs
120
- to_process_hashes = {
121
- d["species_hash"]
120
+ to_process_forms = {
121
+ d["formula_alphabetical"]
122
122
  for d in all_mols
123
123
  if d[self.molecules.key] in to_process_docs
124
124
  }
125
125
 
126
- N = ceil(len(to_process_hashes) / number_splits)
126
+ N = ceil(len(to_process_forms) / number_splits)
127
127
 
128
- for hash_chunk in grouper(to_process_hashes, N):
129
- query = dict(temp_query)
130
- query["species_hash"] = {"$in": list(hash_chunk)}
131
- yield {"query": query}
128
+ for formula_chunk in grouper(to_process_forms, N):
129
+ yield {"query": {"formula_alphabetical": {"$in": list(formula_chunk)}}}
132
130
 
133
131
  def get_items(self) -> Iterator[List[Dict]]:
134
132
  """
@@ -153,26 +151,28 @@ class BondingBuilder(Builder):
153
151
 
154
152
  self.logger.info("Finding documents to process")
155
153
  all_mols = list(
156
- self.molecules.query(temp_query, [self.molecules.key, "species_hash"])
154
+ self.molecules.query(
155
+ temp_query, [self.molecules.key, "formula_alphabetical"]
156
+ )
157
157
  )
158
158
 
159
159
  processed_docs = set([e for e in self.bonds.distinct("molecule_id")])
160
160
  to_process_docs = {d[self.molecules.key] for d in all_mols} - processed_docs
161
- to_process_hashes = {
162
- d["species_hash"]
161
+ to_process_forms = {
162
+ d["formula_alphabetical"]
163
163
  for d in all_mols
164
164
  if d[self.molecules.key] in to_process_docs
165
165
  }
166
166
 
167
167
  self.logger.info(f"Found {len(to_process_docs)} unprocessed documents")
168
- self.logger.info(f"Found {len(to_process_hashes)} unprocessed hashes")
168
+ self.logger.info(f"Found {len(to_process_forms)} unprocessed formulas")
169
169
 
170
170
  # Set total for builder bars to have a total
171
- self.total = len(to_process_hashes)
171
+ self.total = len(to_process_forms)
172
172
 
173
- for shash in to_process_hashes:
173
+ for formula in to_process_forms:
174
174
  mol_query = dict(temp_query)
175
- mol_query["species_hash"] = shash
175
+ mol_query["formula_alphabetical"] = formula
176
176
  molecules = list(self.molecules.query(criteria=mol_query))
177
177
 
178
178
  yield molecules
@@ -189,9 +189,9 @@ class BondingBuilder(Builder):
189
189
  """
190
190
 
191
191
  mols = [MoleculeDoc(**item) for item in items]
192
- shash = mols[0].species_hash
192
+ formula = mols[0].formula_alphabetical
193
193
  mol_ids = [m.molecule_id for m in mols]
194
- self.logger.debug(f"Processing {shash} : {mol_ids}")
194
+ self.logger.debug(f"Processing {formula} : {mol_ids}")
195
195
 
196
196
  bonding_docs = list()
197
197
 
@@ -255,7 +255,7 @@ class BondingBuilder(Builder):
255
255
  tdoc = self.tasks.query_one(
256
256
  {
257
257
  "task_id": task,
258
- "species_hash": shash,
258
+ "formula_alphabetical": formula,
259
259
  "orig": {"$exists": True},
260
260
  }
261
261
  )
@@ -265,7 +265,7 @@ class BondingBuilder(Builder):
265
265
  tdoc = self.tasks.query_one(
266
266
  {
267
267
  "task_id": int(task),
268
- "species_hash": shash,
268
+ "formula_alphabetical": formula,
269
269
  "orig": {"$exists": True},
270
270
  }
271
271
  )
@@ -288,13 +288,13 @@ class BondingBuilder(Builder):
288
288
  )
289
289
  bonding_docs.append(doc)
290
290
 
291
- self.logger.debug(f"Produced {len(bonding_docs)} bonding docs for {shash}")
291
+ self.logger.debug(f"Produced {len(bonding_docs)} bonding docs for {formula}")
292
292
 
293
293
  return jsanitize([doc.model_dump() for doc in bonding_docs], allow_bson=True)
294
294
 
295
295
  def update_targets(self, items: List[List[Dict]]):
296
296
  """
297
- Inserts the new documents into the bonds collection
297
+ Inserts the new documents into the charges collection
298
298
 
299
299
  Args:
300
300
  items [[dict]]: A list of documents to update
@@ -44,7 +44,7 @@ class MetalBindingBuilder(Builder):
44
44
  will be used.
45
45
 
46
46
  The process is as follows:
47
- 1. Gather MoleculeDocs by species hash
47
+ 1. Gather MoleculeDocs by formula
48
48
  2. For each molecule, first identify if there are any metals. If not, then no MetalBindingDoc can be made.
49
49
  If so, then identify the possible solvents that can be used to generate MetalBindingDocs
50
50
  3. For each combination of Molecule ID and solvent, search for additional documents:
@@ -111,7 +111,6 @@ class MetalBindingBuilder(Builder):
111
111
  self.molecules.ensure_index("last_updated")
112
112
  self.molecules.ensure_index("task_ids")
113
113
  self.molecules.ensure_index("formula_alphabetical")
114
- self.molecules.ensure_index("species_hash")
115
114
 
116
115
  # Search index for charges
117
116
  self.charges.ensure_index("molecule_id")
@@ -169,23 +168,23 @@ class MetalBindingBuilder(Builder):
169
168
 
170
169
  self.logger.info("Finding documents to process")
171
170
  all_mols = list(
172
- self.molecules.query(temp_query, [self.molecules.key, "species_hash"])
171
+ self.molecules.query(
172
+ temp_query, [self.molecules.key, "formula_alphabetical"]
173
+ )
173
174
  )
174
175
 
175
176
  processed_docs = set([e for e in self.metal_binding.distinct("molecule_id")])
176
177
  to_process_docs = {d[self.molecules.key] for d in all_mols} - processed_docs
177
- to_process_hashes = {
178
- d["species_hash"]
178
+ to_process_forms = {
179
+ d["formula_alphabetical"]
179
180
  for d in all_mols
180
181
  if d[self.molecules.key] in to_process_docs
181
182
  }
182
183
 
183
- N = ceil(len(to_process_hashes) / number_splits)
184
+ N = ceil(len(to_process_forms) / number_splits)
184
185
 
185
- for hash_chunk in grouper(to_process_hashes, N):
186
- query = dict(temp_query)
187
- query["species_hash"] = {"$in": list(hash_chunk)}
188
- yield {"query": query}
186
+ for formula_chunk in grouper(to_process_forms, N):
187
+ yield {"query": {"formula_alphabetical": {"$in": list(formula_chunk)}}}
189
188
 
190
189
  def get_items(self) -> Iterator[List[Dict]]:
191
190
  """
@@ -208,26 +207,28 @@ class MetalBindingBuilder(Builder):
208
207
 
209
208
  self.logger.info("Finding documents to process")
210
209
  all_mols = list(
211
- self.molecules.query(temp_query, [self.molecules.key, "species_hash"])
210
+ self.molecules.query(
211
+ temp_query, [self.molecules.key, "formula_alphabetical"]
212
+ )
212
213
  )
213
214
 
214
215
  processed_docs = set([e for e in self.metal_binding.distinct("molecule_id")])
215
216
  to_process_docs = {d[self.molecules.key] for d in all_mols} - processed_docs
216
- to_process_hashes = {
217
- d["species_hash"]
217
+ to_process_forms = {
218
+ d["formula_alphabetical"]
218
219
  for d in all_mols
219
220
  if d[self.molecules.key] in to_process_docs
220
221
  }
221
222
 
222
223
  self.logger.info(f"Found {len(to_process_docs)} unprocessed documents")
223
- self.logger.info(f"Found {len(to_process_hashes)} unprocessed hashes")
224
+ self.logger.info(f"Found {len(to_process_forms)} unprocessed formulas")
224
225
 
225
226
  # Set total for builder bars to have a total
226
- self.total = len(to_process_hashes)
227
+ self.total = len(to_process_forms)
227
228
 
228
- for shash in to_process_hashes:
229
+ for formula in to_process_forms:
229
230
  mol_query = dict(temp_query)
230
- mol_query["species_hash"] = shash
231
+ mol_query["formula_alphabetical"] = formula
231
232
  molecules = list(self.molecules.query(criteria=mol_query))
232
233
 
233
234
  yield molecules
@@ -244,9 +245,9 @@ class MetalBindingBuilder(Builder):
244
245
  """
245
246
 
246
247
  mols = [MoleculeDoc(**item) for item in items]
247
- shash = mols[0].species_hash
248
+ formula = mols[0].formula_alphabetical
248
249
  mol_ids = [m.molecule_id for m in mols]
249
- self.logger.debug(f"Processing {shash} : {mol_ids}")
250
+ self.logger.debug(f"Processing {formula} : {mol_ids}")
250
251
 
251
252
  binding_docs = list()
252
253
 
@@ -486,7 +487,7 @@ class MetalBindingBuilder(Builder):
486
487
  binding_docs.append(doc)
487
488
 
488
489
  self.logger.debug(
489
- f"Produced {len(binding_docs)} metal binding docs for {shash}"
490
+ f"Produced {len(binding_docs)} metal binding docs for {formula}"
490
491
  )
491
492
 
492
493
  return jsanitize([doc.model_dump() for doc in binding_docs], allow_bson=True)
@@ -27,7 +27,7 @@ class OrbitalBuilder(Builder):
27
27
  each solvent available).
28
28
 
29
29
  The process is as follows:
30
- 1. Gather MoleculeDocs by species hash
30
+ 1. Gather MoleculeDocs by formula
31
31
  2. For each doc, sort tasks by solvent
32
32
  3. For each solvent, grab the best TaskDoc (including NBO data using
33
33
  the highest level of theory with lowest electronic energy for the
@@ -69,14 +69,12 @@ class OrbitalBuilder(Builder):
69
69
  self.tasks.ensure_index("last_updated")
70
70
  self.tasks.ensure_index("state")
71
71
  self.tasks.ensure_index("formula_alphabetical")
72
- self.tasks.ensure_index("species_hash")
73
72
 
74
73
  # Search index for molecules
75
74
  self.molecules.ensure_index("molecule_id")
76
75
  self.molecules.ensure_index("last_updated")
77
76
  self.molecules.ensure_index("task_ids")
78
77
  self.molecules.ensure_index("formula_alphabetical")
79
- self.molecules.ensure_index("species_hash")
80
78
 
81
79
  # Search index for orbitals
82
80
  self.orbitals.ensure_index("molecule_id")
@@ -95,23 +93,23 @@ class OrbitalBuilder(Builder):
95
93
 
96
94
  self.logger.info("Finding documents to process")
97
95
  all_mols = list(
98
- self.molecules.query(temp_query, [self.molecules.key, "species_hash"])
96
+ self.molecules.query(
97
+ temp_query, [self.molecules.key, "formula_alphabetical"]
98
+ )
99
99
  )
100
100
 
101
101
  processed_docs = set([e for e in self.orbitals.distinct("molecule_id")])
102
102
  to_process_docs = {d[self.molecules.key] for d in all_mols} - processed_docs
103
- to_process_hashes = {
104
- d["species_hash"]
103
+ to_process_forms = {
104
+ d["formula_alphabetical"]
105
105
  for d in all_mols
106
106
  if d[self.molecules.key] in to_process_docs
107
107
  }
108
108
 
109
- N = ceil(len(to_process_hashes) / number_splits)
109
+ N = ceil(len(to_process_forms) / number_splits)
110
110
 
111
- for hash_chunk in grouper(to_process_hashes, N):
112
- query = dict(temp_query)
113
- query["species_hash"] = {"$in": list(hash_chunk)}
114
- yield {"query": query}
111
+ for formula_chunk in grouper(to_process_forms, N):
112
+ yield {"query": {"formula_alphabetical": {"$in": list(formula_chunk)}}}
115
113
 
116
114
  def get_items(self) -> Iterator[List[Dict]]:
117
115
  """
@@ -136,26 +134,28 @@ class OrbitalBuilder(Builder):
136
134
 
137
135
  self.logger.info("Finding documents to process")
138
136
  all_mols = list(
139
- self.molecules.query(temp_query, [self.molecules.key, "species_hash"])
137
+ self.molecules.query(
138
+ temp_query, [self.molecules.key, "formula_alphabetical"]
139
+ )
140
140
  )
141
141
 
142
142
  processed_docs = set([e for e in self.orbitals.distinct("molecule_id")])
143
143
  to_process_docs = {d[self.molecules.key] for d in all_mols} - processed_docs
144
- to_process_hashes = {
145
- d["species_hash"]
144
+ to_process_forms = {
145
+ d["formula_alphabetical"]
146
146
  for d in all_mols
147
147
  if d[self.molecules.key] in to_process_docs
148
148
  }
149
149
 
150
150
  self.logger.info(f"Found {len(to_process_docs)} unprocessed documents")
151
- self.logger.info(f"Found {len(to_process_hashes)} unprocessed hashes")
151
+ self.logger.info(f"Found {len(to_process_forms)} unprocessed formulas")
152
152
 
153
153
  # Set total for builder bars to have a total
154
- self.total = len(to_process_hashes)
154
+ self.total = len(to_process_forms)
155
155
 
156
- for shash in to_process_hashes:
156
+ for formula in to_process_forms:
157
157
  mol_query = dict(temp_query)
158
- mol_query["species_hash"] = shash
158
+ mol_query["formula_alphabetical"] = formula
159
159
  molecules = list(self.molecules.query(criteria=mol_query))
160
160
 
161
161
  yield molecules
@@ -172,9 +172,9 @@ class OrbitalBuilder(Builder):
172
172
  """
173
173
 
174
174
  mols = [MoleculeDoc(**item) for item in items]
175
- shash = mols[0].species_hash
175
+ formula = mols[0].formula_alphabetical
176
176
  mol_ids = [m.molecule_id for m in mols]
177
- self.logger.info(f"Processing {shash} : {mol_ids}")
177
+ self.logger.info(f"Processing {formula} : {mol_ids}")
178
178
 
179
179
  orbital_docs = list()
180
180
 
@@ -221,7 +221,7 @@ class OrbitalBuilder(Builder):
221
221
  tdoc = self.tasks.query_one(
222
222
  {
223
223
  "task_id": task,
224
- "species_hash": shash,
224
+ "formula_alphabetical": formula,
225
225
  "orig": {"$exists": True},
226
226
  }
227
227
  )
@@ -231,7 +231,7 @@ class OrbitalBuilder(Builder):
231
231
  tdoc = self.tasks.query_one(
232
232
  {
233
233
  "task_id": int(task),
234
- "species_hash": shash,
234
+ "formula_alphabetical": formula,
235
235
  "orig": {"$exists": True},
236
236
  }
237
237
  )
@@ -253,7 +253,7 @@ class OrbitalBuilder(Builder):
253
253
  if orbital_doc is not None:
254
254
  orbital_docs.append(orbital_doc)
255
255
 
256
- self.logger.debug(f"Produced {len(orbital_docs)} orbital docs for {shash}")
256
+ self.logger.debug(f"Produced {len(orbital_docs)} orbital docs for {formula}")
257
257
 
258
258
  return jsanitize([doc.model_dump() for doc in orbital_docs], allow_bson=True)
259
259