emmet-builders 0.84.2rc8__py3-none-any.whl → 0.84.2rc9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of emmet-builders might be problematic. Click here for more details.

@@ -30,7 +30,7 @@ class RedoxBuilder(Builder):
30
30
  from a MoleculeDoc (lowest electronic energy, highest level of theory).
31
31
 
32
32
  The process is as follows:
33
- 1. Gather MoleculeDocs by species hash
33
+ 1. Gather MoleculeDocs by formula
34
34
  2. Further group based on (covalent) isomorphism and charge
35
35
  3. For each MoleculeDoc:
36
36
  3a. Identify relevant MoleculeThermoDocs
@@ -81,14 +81,12 @@ class RedoxBuilder(Builder):
81
81
  self.tasks.ensure_index("last_updated")
82
82
  self.tasks.ensure_index("state")
83
83
  self.tasks.ensure_index("formula_alphabetical")
84
- self.tasks.ensure_index("species_hash")
85
84
 
86
85
  # Search index for molecules
87
86
  self.molecules.ensure_index("molecule_id")
88
87
  self.molecules.ensure_index("last_updated")
89
88
  self.molecules.ensure_index("task_ids")
90
89
  self.molecules.ensure_index("formula_alphabetical")
91
- self.molecules.ensure_index("species_hash")
92
90
 
93
91
  # Search index for thermo
94
92
  self.thermo.ensure_index("molecule_id")
@@ -115,23 +113,23 @@ class RedoxBuilder(Builder):
115
113
 
116
114
  self.logger.info("Finding documents to process")
117
115
  all_mols = list(
118
- self.molecules.query(temp_query, [self.molecules.key, "species_hash"])
116
+ self.molecules.query(
117
+ temp_query, [self.molecules.key, "formula_alphabetical"]
118
+ )
119
119
  )
120
120
 
121
121
  processed_docs = set([e for e in self.redox.distinct("molecule_id")])
122
122
  to_process_docs = {d[self.molecules.key] for d in all_mols} - processed_docs
123
- to_process_hashes = {
124
- d["species_hash"]
123
+ to_process_forms = {
124
+ d["formula_alphabetical"]
125
125
  for d in all_mols
126
126
  if d[self.molecules.key] in to_process_docs
127
127
  }
128
128
 
129
- N = ceil(len(to_process_hashes) / number_splits)
129
+ N = ceil(len(to_process_forms) / number_splits)
130
130
 
131
- for hash_chunk in grouper(to_process_hashes, N):
132
- query = dict(temp_query)
133
- query["species_hash"] = {"$in": list(hash_chunk)}
134
- yield {"query": query}
131
+ for formula_chunk in grouper(to_process_forms, N):
132
+ yield {"query": {"formula_alphabetical": {"$in": list(formula_chunk)}}}
135
133
 
136
134
  def get_items(self) -> Iterator[List[Dict]]:
137
135
  """
@@ -156,26 +154,28 @@ class RedoxBuilder(Builder):
156
154
 
157
155
  self.logger.info("Finding documents to process")
158
156
  all_mols = list(
159
- self.molecules.query(temp_query, [self.molecules.key, "species_hash"])
157
+ self.molecules.query(
158
+ temp_query, [self.molecules.key, "formula_alphabetical"]
159
+ )
160
160
  )
161
161
 
162
162
  processed_docs = set([e for e in self.redox.distinct("molecule_id")])
163
163
  to_process_docs = {d[self.molecules.key] for d in all_mols} - processed_docs
164
- to_process_hashes = {
165
- d["species_hash"]
164
+ to_process_forms = {
165
+ d["formula_alphabetical"]
166
166
  for d in all_mols
167
167
  if d[self.molecules.key] in to_process_docs
168
168
  }
169
169
 
170
170
  self.logger.info(f"Found {len(to_process_docs)} unprocessed documents")
171
- self.logger.info(f"Found {len(to_process_hashes)} unprocessed hashes")
171
+ self.logger.info(f"Found {len(to_process_forms)} unprocessed formulas")
172
172
 
173
173
  # Set total for builder bars to have a total
174
- self.total = len(to_process_hashes)
174
+ self.total = len(to_process_forms)
175
175
 
176
- for shash in to_process_hashes:
176
+ for formula in to_process_forms:
177
177
  mol_query = dict(temp_query)
178
- mol_query["species_hash"] = shash
178
+ mol_query["formula_alphabetical"] = formula
179
179
  molecules = list(self.molecules.query(criteria=mol_query))
180
180
 
181
181
  yield molecules
@@ -192,9 +192,9 @@ class RedoxBuilder(Builder):
192
192
  """
193
193
 
194
194
  mols = [MoleculeDoc(**item) for item in items]
195
- shash = mols[0].species_hash
195
+ formula = mols[0].formula_alphabetical
196
196
  mol_ids = [m.molecule_id for m in mols]
197
- self.logger.debug(f"Processing {shash} : {mol_ids}")
197
+ self.logger.debug(f"Processing {formula} : {mol_ids}")
198
198
 
199
199
  redox_docs = list()
200
200
 
@@ -220,7 +220,7 @@ class RedoxBuilder(Builder):
220
220
  e["task_id"]
221
221
  for e in gg.entries
222
222
  if e["charge"] == gg.charge + 1
223
- and e["task_type"] in ["Single Point", "Force"]
223
+ and e["task_type"] == "Single Point"
224
224
  and e["output"].get("final_energy")
225
225
  ]
226
226
  ie_tasks = list()
@@ -228,7 +228,7 @@ class RedoxBuilder(Builder):
228
228
  tdoc = self.tasks.query_one(
229
229
  {
230
230
  "task_id": i,
231
- "species_hash": shash,
231
+ "formula_alphabetical": formula,
232
232
  "orig": {"$exists": True},
233
233
  }
234
234
  )
@@ -238,7 +238,7 @@ class RedoxBuilder(Builder):
238
238
  tdoc = self.tasks.query_one(
239
239
  {
240
240
  "task_id": int(i),
241
- "species_hash": shash,
241
+ "formula_alphabetical": formula,
242
242
  "orig": {"$exists": True},
243
243
  }
244
244
  )
@@ -254,7 +254,7 @@ class RedoxBuilder(Builder):
254
254
  e["task_id"]
255
255
  for e in gg.entries
256
256
  if e["charge"] == gg.charge - 1
257
- and e["task_type"] in ["Single Point", "Force"]
257
+ and e["task_type"] == "Single Point"
258
258
  and e["output"].get("final_energy")
259
259
  ]
260
260
  ea_tasks = list()
@@ -262,7 +262,7 @@ class RedoxBuilder(Builder):
262
262
  tdoc = self.tasks.query_one(
263
263
  {
264
264
  "task_id": i,
265
- "species_hash": shash,
265
+ "formula_alphabetical": formula,
266
266
  "orig": {"$exists": True},
267
267
  }
268
268
  )
@@ -272,7 +272,7 @@ class RedoxBuilder(Builder):
272
272
  tdoc = self.tasks.query_one(
273
273
  {
274
274
  "task_id": int(i),
275
- "species_hash": shash,
275
+ "formula_alphabetical": formula,
276
276
  "orig": {"$exists": True},
277
277
  }
278
278
  )
@@ -354,7 +354,7 @@ class RedoxBuilder(Builder):
354
354
  )
355
355
  )
356
356
 
357
- self.logger.debug(f"Produced {len(redox_docs)} redox docs for {shash}")
357
+ self.logger.debug(f"Produced {len(redox_docs)} redox docs for {formula}")
358
358
 
359
359
  return jsanitize(
360
360
  [doc.model_dump() for doc in redox_docs if doc is not None], allow_bson=True
@@ -36,7 +36,6 @@ class SummaryBuilder(Builder):
36
36
  charges: Store,
37
37
  spins: Store,
38
38
  bonds: Store,
39
- multipoles: Store,
40
39
  metal_binding: Store,
41
40
  orbitals: Store,
42
41
  redox: Store,
@@ -51,7 +50,6 @@ class SummaryBuilder(Builder):
51
50
  self.charges = charges
52
51
  self.spins = spins
53
52
  self.bonds = bonds
54
- self.multipoles = multipoles
55
53
  self.metal_binding = metal_binding
56
54
  self.orbitals = orbitals
57
55
  self.redox = redox
@@ -68,7 +66,6 @@ class SummaryBuilder(Builder):
68
66
  charges,
69
67
  spins,
70
68
  bonds,
71
- multipoles,
72
69
  metal_binding,
73
70
  orbitals,
74
71
  redox,
@@ -84,7 +81,6 @@ class SummaryBuilder(Builder):
84
81
  # self.charges,
85
82
  # self.spins,
86
83
  # self.bonds,
87
- # self.multipoles,
88
84
  # self.metal_binding,
89
85
  # self.orbitals,
90
86
  # self.redox,
@@ -107,7 +103,6 @@ class SummaryBuilder(Builder):
107
103
  self.molecules.ensure_index("last_updated")
108
104
  self.molecules.ensure_index("task_ids")
109
105
  self.molecules.ensure_index("formula_alphabetical")
110
- self.molecules.ensure_index("species_hash")
111
106
 
112
107
  # Search index for charges
113
108
  self.charges.ensure_index("molecule_id")
@@ -139,15 +134,6 @@ class SummaryBuilder(Builder):
139
134
  self.bonds.ensure_index("last_updated")
140
135
  self.bonds.ensure_index("formula_alphabetical")
141
136
 
142
- # Search index for multipoles
143
- self.multipoles.ensure_index("molecule_id")
144
- self.multipoles.ensure_index("task_id")
145
- self.multipoles.ensure_index("solvent")
146
- self.multipoles.ensure_index("lot_solvent")
147
- self.multipoles.ensure_index("property_id")
148
- self.multipoles.ensure_index("last_updated")
149
- self.multipoles.ensure_index("formula_alphabetical")
150
-
151
137
  # Search index for metal_binding
152
138
  self.metal_binding.ensure_index("molecule_id")
153
139
  self.metal_binding.ensure_index("solvent")
@@ -206,23 +192,23 @@ class SummaryBuilder(Builder):
206
192
 
207
193
  self.logger.info("Finding documents to process")
208
194
  all_mols = list(
209
- self.molecules.query(temp_query, [self.molecules.key, "species_hash"])
195
+ self.molecules.query(
196
+ temp_query, [self.molecules.key, "formula_alphabetical"]
197
+ )
210
198
  )
211
199
 
212
200
  processed_docs = set([e for e in self.summary.distinct("molecule_id")])
213
201
  to_process_docs = {d[self.molecules.key] for d in all_mols} - processed_docs
214
- to_process_hashes = {
215
- d["species_hash"]
202
+ to_process_forms = {
203
+ d["formula_alphabetical"]
216
204
  for d in all_mols
217
205
  if d[self.molecules.key] in to_process_docs
218
206
  }
219
207
 
220
- N = ceil(len(to_process_hashes) / number_splits)
208
+ N = ceil(len(to_process_forms) / number_splits)
221
209
 
222
- for hash_chunk in grouper(to_process_hashes, N):
223
- query = dict(temp_query)
224
- query["species_hash"] = {"$in": list(hash_chunk)}
225
- yield {"query": query}
210
+ for formula_chunk in grouper(to_process_forms, N):
211
+ yield {"query": {"formula_alphabetical": {"$in": list(formula_chunk)}}}
226
212
 
227
213
  def get_items(self) -> Iterator[List[Dict]]:
228
214
  """
@@ -247,26 +233,28 @@ class SummaryBuilder(Builder):
247
233
 
248
234
  self.logger.info("Finding documents to process")
249
235
  all_mols = list(
250
- self.molecules.query(temp_query, [self.molecules.key, "species_hash"])
236
+ self.molecules.query(
237
+ temp_query, [self.molecules.key, "formula_alphabetical"]
238
+ )
251
239
  )
252
240
 
253
241
  processed_docs = set([e for e in self.summary.distinct("molecule_id")])
254
242
  to_process_docs = {d[self.molecules.key] for d in all_mols} - processed_docs
255
- to_process_hashes = {
256
- d["species_hash"]
243
+ to_process_forms = {
244
+ d["formula_alphabetical"]
257
245
  for d in all_mols
258
246
  if d[self.molecules.key] in to_process_docs
259
247
  }
260
248
 
261
249
  self.logger.info(f"Found {len(to_process_docs)} unprocessed documents")
262
- self.logger.info(f"Found {len(to_process_hashes)} unprocessed hashes")
250
+ self.logger.info(f"Found {len(to_process_forms)} unprocessed formulas")
263
251
 
264
252
  # Set total for builder bars to have a total
265
- self.total = len(to_process_hashes)
253
+ self.total = len(to_process_forms)
266
254
 
267
- for shash in to_process_hashes:
255
+ for formula in to_process_forms:
268
256
  mol_query = dict(temp_query)
269
- mol_query["species_hash"] = shash
257
+ mol_query["formula_alphabetical"] = formula
270
258
  molecules = list(self.molecules.query(criteria=mol_query))
271
259
 
272
260
  yield molecules
@@ -304,12 +292,12 @@ class SummaryBuilder(Builder):
304
292
  else:
305
293
  grouped[solvent][method] = doc
306
294
 
307
- return grouped
295
+ return (grouped, by_method)
308
296
 
309
297
  mols = items
310
- shash = mols[0]["species_hash"]
298
+ formula = mols[0]["formula_alphabetical"]
311
299
  mol_ids = [m["molecule_id"] for m in mols]
312
- self.logger.debug(f"Processing {shash} : {mol_ids}")
300
+ self.logger.debug(f"Processing {formula} : {mol_ids}")
313
301
 
314
302
  summary_docs = list()
315
303
 
@@ -330,9 +318,6 @@ class SummaryBuilder(Builder):
330
318
  "metal_binding": _group_docs(
331
319
  list(self.metal_binding.query({"molecule_id": mol_id})), True
332
320
  ),
333
- "multipole_moments": _group_docs(
334
- list(self.multipoles.query({"molecule_id": mol_id})), False
335
- ),
336
321
  "orbitals": _group_docs(
337
322
  list(self.orbitals.query({"molecule_id": mol_id})), False
338
323
  ),
@@ -363,7 +348,7 @@ class SummaryBuilder(Builder):
363
348
  summary_doc = MoleculeSummaryDoc.from_docs(molecule_id=mol_id, docs=d)
364
349
  summary_docs.append(summary_doc)
365
350
 
366
- self.logger.debug(f"Produced {len(summary_docs)} summary docs for {shash}")
351
+ self.logger.debug(f"Produced {len(summary_docs)} summary docs for {formula}")
367
352
 
368
353
  return jsanitize([doc.model_dump() for doc in summary_docs], allow_bson=True)
369
354
 
@@ -84,7 +84,7 @@ class ThermoBuilder(Builder):
84
84
  single-point energy corrections.
85
85
 
86
86
  Before any documents are constructed, the following steps are taken:
87
- 1. Gather MoleculeDocs by species hash
87
+ 1. Gather MoleculeDocs by formula
88
88
  2. For each doc, identify tasks with thermodynamic information such as
89
89
  zero-point energy, enthalpy, and entropy. Collect these "documents
90
90
  including complete thermodynamics" (DICTs).
@@ -148,14 +148,12 @@ class ThermoBuilder(Builder):
148
148
  self.tasks.ensure_index("last_updated")
149
149
  self.tasks.ensure_index("state")
150
150
  self.tasks.ensure_index("formula_alphabetical")
151
- self.tasks.ensure_index("species_hash")
152
151
 
153
152
  # Search index for molecules
154
153
  self.molecules.ensure_index("molecule_id")
155
154
  self.molecules.ensure_index("last_updated")
156
155
  self.molecules.ensure_index("task_ids")
157
156
  self.molecules.ensure_index("formula_alphabetical")
158
- self.molecules.ensure_index("species_hash")
159
157
 
160
158
  # Search index for thermo
161
159
  self.thermo.ensure_index("molecule_id")
@@ -174,23 +172,23 @@ class ThermoBuilder(Builder):
174
172
 
175
173
  self.logger.info("Finding documents to process")
176
174
  all_mols = list(
177
- self.molecules.query(temp_query, [self.molecules.key, "species_hash"])
175
+ self.molecules.query(
176
+ temp_query, [self.molecules.key, "formula_alphabetical"]
177
+ )
178
178
  )
179
179
 
180
180
  processed_docs = set([e for e in self.thermo.distinct("molecule_id")])
181
181
  to_process_docs = {d[self.molecules.key] for d in all_mols} - processed_docs
182
- to_process_hashes = {
183
- d["species_hash"]
182
+ to_process_forms = {
183
+ d["formula_alphabetical"]
184
184
  for d in all_mols
185
185
  if d[self.molecules.key] in to_process_docs
186
186
  }
187
187
 
188
- N = ceil(len(to_process_hashes) / number_splits)
188
+ N = ceil(len(to_process_forms) / number_splits)
189
189
 
190
- for hash_chunk in grouper(to_process_hashes, N):
191
- query = dict(temp_query)
192
- query["species_hash"] = {"$in": list(hash_chunk)}
193
- yield {"query": query}
190
+ for formula_chunk in grouper(to_process_forms, N):
191
+ yield {"query": {"formula_alphabetical": {"$in": list(formula_chunk)}}}
194
192
 
195
193
  def get_items(self) -> Iterator[List[Dict]]:
196
194
  """
@@ -215,26 +213,28 @@ class ThermoBuilder(Builder):
215
213
 
216
214
  self.logger.info("Finding documents to process")
217
215
  all_mols = list(
218
- self.molecules.query(temp_query, [self.molecules.key, "species_hash"])
216
+ self.molecules.query(
217
+ temp_query, [self.molecules.key, "formula_alphabetical"]
218
+ )
219
219
  )
220
220
 
221
221
  processed_docs = set([e for e in self.thermo.distinct("molecule_id")])
222
222
  to_process_docs = {d[self.molecules.key] for d in all_mols} - processed_docs
223
- to_process_hashes = {
224
- d["species_hash"]
223
+ to_process_forms = {
224
+ d["formula_alphabetical"]
225
225
  for d in all_mols
226
226
  if d[self.molecules.key] in to_process_docs
227
227
  }
228
228
 
229
229
  self.logger.info(f"Found {len(to_process_docs)} unprocessed documents")
230
- self.logger.info(f"Found {len(to_process_hashes)} unprocessed hashes")
230
+ self.logger.info(f"Found {len(to_process_forms)} unprocessed formulas")
231
231
 
232
232
  # Set total for builder bars to have a total
233
- self.total = len(to_process_hashes)
233
+ self.total = len(to_process_forms)
234
234
 
235
- for shash in to_process_hashes:
235
+ for formula in to_process_forms:
236
236
  mol_query = dict(temp_query)
237
- mol_query["species_hash"] = shash
237
+ mol_query["formula_alphabetical"] = formula
238
238
  molecules = list(self.molecules.query(criteria=mol_query))
239
239
 
240
240
  yield molecules
@@ -273,9 +273,9 @@ class ThermoBuilder(Builder):
273
273
  return doc
274
274
 
275
275
  mols = [MoleculeDoc(**item) for item in items]
276
- shash = mols[0].species_hash
276
+ formula = mols[0].formula_alphabetical
277
277
  mol_ids = [m.molecule_id for m in mols]
278
- self.logger.debug(f"Processing {shash} : {mol_ids}")
278
+ self.logger.debug(f"Processing {formula} : {mol_ids}")
279
279
 
280
280
  thermo_docs = list()
281
281
 
@@ -334,7 +334,7 @@ class ThermoBuilder(Builder):
334
334
  tdoc = self.tasks.query_one(
335
335
  {
336
336
  "task_id": task,
337
- "species_hash": shash,
337
+ "formula_alphabetical": formula,
338
338
  "orig": {"$exists": True},
339
339
  }
340
340
  )
@@ -344,7 +344,7 @@ class ThermoBuilder(Builder):
344
344
  tdoc = self.tasks.query_one(
345
345
  {
346
346
  "task_id": int(task),
347
- "species_hash": shash,
347
+ "formula_alphabetical": formula,
348
348
  "orig": {"$exists": True},
349
349
  }
350
350
  )
@@ -465,7 +465,7 @@ class ThermoBuilder(Builder):
465
465
  sorted(with_eval_e, key=lambda x: (x[1], x[2]))[0][0]
466
466
  )
467
467
 
468
- self.logger.debug(f"Produced {len(thermo_docs)} thermo docs for {shash}")
468
+ self.logger.debug(f"Produced {len(thermo_docs)} thermo docs for {formula}")
469
469
 
470
470
  return jsanitize([doc.model_dump() for doc in thermo_docs], allow_bson=True)
471
471
 
@@ -27,7 +27,7 @@ class VibrationBuilder(Builder):
27
27
  each solvent available).
28
28
 
29
29
  The process is as follows:
30
- 1. Gather MoleculeDocs by species hash
30
+ 1. Gather MoleculeDocs by formula
31
31
  2. For each doc, sort tasks by solvent
32
32
  3. For each solvent, grab the best TaskDoc (doc with vibrational
33
33
  information that has the highest level of theory with lowest
@@ -73,14 +73,12 @@ class VibrationBuilder(Builder):
73
73
  self.tasks.ensure_index("last_updated")
74
74
  self.tasks.ensure_index("state")
75
75
  self.tasks.ensure_index("formula_alphabetical")
76
- self.tasks.ensure_index("species_hash")
77
76
 
78
77
  # Search index for molecules
79
78
  self.molecules.ensure_index("molecule_id")
80
79
  self.molecules.ensure_index("last_updated")
81
80
  self.molecules.ensure_index("task_ids")
82
81
  self.molecules.ensure_index("formula_alphabetical")
83
- self.molecules.ensure_index("species_hash")
84
82
 
85
83
  # Search index for vibrational properties
86
84
  self.vibes.ensure_index("molecule_id")
@@ -99,23 +97,23 @@ class VibrationBuilder(Builder):
99
97
 
100
98
  self.logger.info("Finding documents to process")
101
99
  all_mols = list(
102
- self.molecules.query(temp_query, [self.molecules.key, "species_hash"])
100
+ self.molecules.query(
101
+ temp_query, [self.molecules.key, "formula_alphabetical"]
102
+ )
103
103
  )
104
104
 
105
105
  processed_docs = set([e for e in self.vibes.distinct("molecule_id")])
106
106
  to_process_docs = {d[self.molecules.key] for d in all_mols} - processed_docs
107
- to_process_hashes = {
108
- d["species_hash"]
107
+ to_process_forms = {
108
+ d["formula_alphabetical"]
109
109
  for d in all_mols
110
110
  if d[self.molecules.key] in to_process_docs
111
111
  }
112
112
 
113
- N = ceil(len(to_process_hashes) / number_splits)
113
+ N = ceil(len(to_process_forms) / number_splits)
114
114
 
115
- for hash_chunk in grouper(to_process_hashes, N):
116
- query = dict(temp_query)
117
- query["species_hash"] = {"$in": list(hash_chunk)}
118
- yield {"query": query}
115
+ for formula_chunk in grouper(to_process_forms, N):
116
+ yield {"query": {"formula_alphabetical": {"$in": list(formula_chunk)}}}
119
117
 
120
118
  def get_items(self) -> Iterator[List[Dict]]:
121
119
  """
@@ -140,26 +138,28 @@ class VibrationBuilder(Builder):
140
138
 
141
139
  self.logger.info("Finding documents to process")
142
140
  all_mols = list(
143
- self.molecules.query(temp_query, [self.molecules.key, "species_hash"])
141
+ self.molecules.query(
142
+ temp_query, [self.molecules.key, "formula_alphabetical"]
143
+ )
144
144
  )
145
145
 
146
146
  processed_docs = set([e for e in self.vibes.distinct("molecule_id")])
147
147
  to_process_docs = {d[self.molecules.key] for d in all_mols} - processed_docs
148
- to_process_hashes = {
149
- d["species_hash"]
148
+ to_process_forms = {
149
+ d["formula_alphabetical"]
150
150
  for d in all_mols
151
151
  if d[self.molecules.key] in to_process_docs
152
152
  }
153
153
 
154
154
  self.logger.info(f"Found {len(to_process_docs)} unprocessed documents")
155
- self.logger.info(f"Found {len(to_process_hashes)} unprocessed hashes")
155
+ self.logger.info(f"Found {len(to_process_forms)} unprocessed formulas")
156
156
 
157
157
  # Set total for builder bars to have a total
158
- self.total = len(to_process_hashes)
158
+ self.total = len(to_process_forms)
159
159
 
160
- for shash in to_process_hashes:
160
+ for formula in to_process_forms:
161
161
  mol_query = dict(temp_query)
162
- mol_query["species_hash"] = shash
162
+ mol_query["formula_alphabetical"] = formula
163
163
  molecules = list(self.molecules.query(criteria=mol_query))
164
164
 
165
165
  yield molecules
@@ -176,9 +176,9 @@ class VibrationBuilder(Builder):
176
176
  """
177
177
 
178
178
  mols = [MoleculeDoc(**item) for item in items]
179
- shash = mols[0].species_hash
179
+ formula = mols[0].formula_alphabetical
180
180
  mol_ids = [m.molecule_id for m in mols]
181
- self.logger.debug(f"Processing {shash} : {mol_ids}")
181
+ self.logger.debug(f"Processing {formula} : {mol_ids}")
182
182
 
183
183
  vibe_docs = list()
184
184
 
@@ -213,7 +213,7 @@ class VibrationBuilder(Builder):
213
213
  tdoc = self.tasks.query_one(
214
214
  {
215
215
  "task_id": task,
216
- "species_hash": shash,
216
+ "formula_alphabetical": formula,
217
217
  "orig": {"$exists": True},
218
218
  }
219
219
  )
@@ -223,7 +223,7 @@ class VibrationBuilder(Builder):
223
223
  tdoc = self.tasks.query_one(
224
224
  {
225
225
  "task_id": int(task),
226
- "species_hash": shash,
226
+ "formula_alphabetical": formula,
227
227
  "orig": {"$exists": True},
228
228
  }
229
229
  )
@@ -243,7 +243,7 @@ class VibrationBuilder(Builder):
243
243
  )
244
244
  vibe_docs.append(vibe_doc)
245
245
 
246
- self.logger.debug(f"Produced {len(vibe_docs)} vibration docs for {shash}")
246
+ self.logger.debug(f"Produced {len(vibe_docs)} vibration docs for {formula}")
247
247
 
248
248
  return jsanitize([doc.model_dump() for doc in vibe_docs], allow_bson=True)
249
249