PyPI - emmet-builders - Versions diffs - 0.84.2rc6__py3-none-any.whl → 0.84.2rc8__py3-none-any.whl - Mend

emmet-builders 0.84.2rc6py3-none-any.whl → 0.84.2rc8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of emmet-builders might be problematic. Click here for more details.

Files changed (16) hide show

emmet/builders/molecules/atomic.py +46 -48
emmet/builders/molecules/bonds.py +24 -24
emmet/builders/molecules/electric.py +282 -0
emmet/builders/molecules/metal_binding.py +20 -21
emmet/builders/molecules/orbitals.py +23 -23
emmet/builders/molecules/redox.py +27 -27
emmet/builders/molecules/summary.py +36 -21
emmet/builders/molecules/thermo.py +23 -23
emmet/builders/molecules/trajectory.py +525 -0
emmet/builders/molecules/vibration.py +23 -23
emmet/builders/qchem/molecules.py +21 -15
emmet/builders/vasp/mp_potcar_stats.json.gz +0 -0
{emmet_builders-0.84.2rc6.dist-info → emmet_builders-0.84.2rc8.dist-info}/METADATA +1 -1
{emmet_builders-0.84.2rc6.dist-info → emmet_builders-0.84.2rc8.dist-info}/RECORD +16 -14
{emmet_builders-0.84.2rc6.dist-info → emmet_builders-0.84.2rc8.dist-info}/WHEEL +0 -0
{emmet_builders-0.84.2rc6.dist-info → emmet_builders-0.84.2rc8.dist-info}/top_level.txt +0 -0

emmet/builders/molecules/metal_binding.py CHANGED Viewed

@@ -44,7 +44,7 @@ class MetalBindingBuilder(Builder):
     will be used.
     The process is as follows:
-        1. Gather MoleculeDocs by formula
+        1. Gather MoleculeDocs by species hash
         2. For each molecule, first identify if there are any metals. If not, then no MetalBindingDoc can be made.
             If so, then identify the possible solvents that can be used to generate MetalBindingDocs
         3. For each combination of Molecule ID and solvent, search for additional documents:
@@ -111,6 +111,7 @@ class MetalBindingBuilder(Builder):
         self.molecules.ensure_index("last_updated")
         self.molecules.ensure_index("task_ids")
         self.molecules.ensure_index("formula_alphabetical")
+        self.molecules.ensure_index("species_hash")
         # Search index for charges
         self.charges.ensure_index("molecule_id")
@@ -168,23 +169,23 @@ class MetalBindingBuilder(Builder):
         self.logger.info("Finding documents to process")
         all_mols = list(
-            self.molecules.query(
-                temp_query, [self.molecules.key, "formula_alphabetical"]
-            )
+            self.molecules.query(temp_query, [self.molecules.key, "species_hash"])
         )
         processed_docs = set([e for e in self.metal_binding.distinct("molecule_id")])
         to_process_docs = {d[self.molecules.key] for d in all_mols} - processed_docs
-        to_process_forms = {
-            d["formula_alphabetical"]
+        to_process_hashes = {
+            d["species_hash"]
             for d in all_mols
             if d[self.molecules.key] in to_process_docs
         }
-        N = ceil(len(to_process_forms) / number_splits)
+        N = ceil(len(to_process_hashes) / number_splits)
-        for formula_chunk in grouper(to_process_forms, N):
-            yield {"query": {"formula_alphabetical": {"$in": list(formula_chunk)}}}
+        for hash_chunk in grouper(to_process_hashes, N):
+            query = dict(temp_query)
+            query["species_hash"] = {"$in": list(hash_chunk)}
+            yield {"query": query}
     def get_items(self) -> Iterator[List[Dict]]:
         """
@@ -207,28 +208,26 @@ class MetalBindingBuilder(Builder):
         self.logger.info("Finding documents to process")
         all_mols = list(
-            self.molecules.query(
-                temp_query, [self.molecules.key, "formula_alphabetical"]
-            )
+            self.molecules.query(temp_query, [self.molecules.key, "species_hash"])
         )
         processed_docs = set([e for e in self.metal_binding.distinct("molecule_id")])
         to_process_docs = {d[self.molecules.key] for d in all_mols} - processed_docs
-        to_process_forms = {
-            d["formula_alphabetical"]
+        to_process_hashes = {
+            d["species_hash"]
             for d in all_mols
             if d[self.molecules.key] in to_process_docs
         }
         self.logger.info(f"Found {len(to_process_docs)} unprocessed documents")
-        self.logger.info(f"Found {len(to_process_forms)} unprocessed formulas")
+        self.logger.info(f"Found {len(to_process_hashes)} unprocessed hashes")
         # Set total for builder bars to have a total
-        self.total = len(to_process_forms)
+        self.total = len(to_process_hashes)
-        for formula in to_process_forms:
+        for shash in to_process_hashes:
             mol_query = dict(temp_query)
-            mol_query["formula_alphabetical"] = formula
+            mol_query["species_hash"] = shash
             molecules = list(self.molecules.query(criteria=mol_query))
             yield molecules
@@ -245,9 +244,9 @@ class MetalBindingBuilder(Builder):
         """
         mols = [MoleculeDoc(**item) for item in items]
-        formula = mols[0].formula_alphabetical
+        shash = mols[0].species_hash
         mol_ids = [m.molecule_id for m in mols]
-        self.logger.debug(f"Processing {formula} : {mol_ids}")
+        self.logger.debug(f"Processing {shash} : {mol_ids}")
         binding_docs = list()
@@ -487,7 +486,7 @@ class MetalBindingBuilder(Builder):
                         binding_docs.append(doc)
         self.logger.debug(
-            f"Produced {len(binding_docs)} metal binding docs for {formula}"
+            f"Produced {len(binding_docs)} metal binding docs for {shash}"
         )
         return jsanitize([doc.model_dump() for doc in binding_docs], allow_bson=True)

emmet/builders/molecules/orbitals.py CHANGED Viewed

@@ -27,7 +27,7 @@ class OrbitalBuilder(Builder):
     each solvent available).
     The process is as follows:
-        1. Gather MoleculeDocs by formula
+        1. Gather MoleculeDocs by species hash
         2. For each doc, sort tasks by solvent
         3. For each solvent, grab the best TaskDoc (including NBO data using
             the highest level of theory with lowest electronic energy for the
@@ -69,12 +69,14 @@ class OrbitalBuilder(Builder):
         self.tasks.ensure_index("last_updated")
         self.tasks.ensure_index("state")
         self.tasks.ensure_index("formula_alphabetical")
+        self.tasks.ensure_index("species_hash")
         # Search index for molecules
         self.molecules.ensure_index("molecule_id")
         self.molecules.ensure_index("last_updated")
         self.molecules.ensure_index("task_ids")
         self.molecules.ensure_index("formula_alphabetical")
+        self.molecules.ensure_index("species_hash")
         # Search index for orbitals
         self.orbitals.ensure_index("molecule_id")
@@ -93,23 +95,23 @@ class OrbitalBuilder(Builder):
         self.logger.info("Finding documents to process")
         all_mols = list(
-            self.molecules.query(
-                temp_query, [self.molecules.key, "formula_alphabetical"]
-            )
+            self.molecules.query(temp_query, [self.molecules.key, "species_hash"])
         )
         processed_docs = set([e for e in self.orbitals.distinct("molecule_id")])
         to_process_docs = {d[self.molecules.key] for d in all_mols} - processed_docs
-        to_process_forms = {
-            d["formula_alphabetical"]
+        to_process_hashes = {
+            d["species_hash"]
             for d in all_mols
             if d[self.molecules.key] in to_process_docs
         }
-        N = ceil(len(to_process_forms) / number_splits)
+        N = ceil(len(to_process_hashes) / number_splits)
-        for formula_chunk in grouper(to_process_forms, N):
-            yield {"query": {"formula_alphabetical": {"$in": list(formula_chunk)}}}
+        for hash_chunk in grouper(to_process_hashes, N):
+            query = dict(temp_query)
+            query["species_hash"] = {"$in": list(hash_chunk)}
+            yield {"query": query}
     def get_items(self) -> Iterator[List[Dict]]:
         """
@@ -134,28 +136,26 @@ class OrbitalBuilder(Builder):
         self.logger.info("Finding documents to process")
         all_mols = list(
-            self.molecules.query(
-                temp_query, [self.molecules.key, "formula_alphabetical"]
-            )
+            self.molecules.query(temp_query, [self.molecules.key, "species_hash"])
         )
         processed_docs = set([e for e in self.orbitals.distinct("molecule_id")])
         to_process_docs = {d[self.molecules.key] for d in all_mols} - processed_docs
-        to_process_forms = {
-            d["formula_alphabetical"]
+        to_process_hashes = {
+            d["species_hash"]
             for d in all_mols
             if d[self.molecules.key] in to_process_docs
         }
         self.logger.info(f"Found {len(to_process_docs)} unprocessed documents")
-        self.logger.info(f"Found {len(to_process_forms)} unprocessed formulas")
+        self.logger.info(f"Found {len(to_process_hashes)} unprocessed hashes")
         # Set total for builder bars to have a total
-        self.total = len(to_process_forms)
+        self.total = len(to_process_hashes)
-        for formula in to_process_forms:
+        for shash in to_process_hashes:
             mol_query = dict(temp_query)
-            mol_query["formula_alphabetical"] = formula
+            mol_query["species_hash"] = shash
             molecules = list(self.molecules.query(criteria=mol_query))
             yield molecules
@@ -172,9 +172,9 @@ class OrbitalBuilder(Builder):
         """
         mols = [MoleculeDoc(**item) for item in items]
-        formula = mols[0].formula_alphabetical
+        shash = mols[0].species_hash
         mol_ids = [m.molecule_id for m in mols]
-        self.logger.info(f"Processing {formula} : {mol_ids}")
+        self.logger.info(f"Processing {shash} : {mol_ids}")
         orbital_docs = list()
@@ -221,7 +221,7 @@ class OrbitalBuilder(Builder):
                         tdoc = self.tasks.query_one(
                             {
                                 "task_id": task,
-                                "formula_alphabetical": formula,
+                                "species_hash": shash,
                                 "orig": {"$exists": True},
                             }
                         )
@@ -231,7 +231,7 @@ class OrbitalBuilder(Builder):
                                 tdoc = self.tasks.query_one(
                                     {
                                         "task_id": int(task),
-                                        "formula_alphabetical": formula,
+                                        "species_hash": shash,
                                         "orig": {"$exists": True},
                                     }
                                 )
@@ -253,7 +253,7 @@ class OrbitalBuilder(Builder):
                         if orbital_doc is not None:
                             orbital_docs.append(orbital_doc)
-        self.logger.debug(f"Produced {len(orbital_docs)} orbital docs for {formula}")
+        self.logger.debug(f"Produced {len(orbital_docs)} orbital docs for {shash}")
         return jsanitize([doc.model_dump() for doc in orbital_docs], allow_bson=True)

emmet/builders/molecules/redox.py CHANGED Viewed

@@ -30,7 +30,7 @@ class RedoxBuilder(Builder):
     from a MoleculeDoc (lowest electronic energy, highest level of theory).
     The process is as follows:
-        1. Gather MoleculeDocs by formula
+        1. Gather MoleculeDocs by species hash
         2. Further group based on (covalent) isomorphism and charge
         3. For each MoleculeDoc:
             3a. Identify relevant MoleculeThermoDocs
@@ -81,12 +81,14 @@ class RedoxBuilder(Builder):
         self.tasks.ensure_index("last_updated")
         self.tasks.ensure_index("state")
         self.tasks.ensure_index("formula_alphabetical")
+        self.tasks.ensure_index("species_hash")
         # Search index for molecules
         self.molecules.ensure_index("molecule_id")
         self.molecules.ensure_index("last_updated")
         self.molecules.ensure_index("task_ids")
         self.molecules.ensure_index("formula_alphabetical")
+        self.molecules.ensure_index("species_hash")
         # Search index for thermo
         self.thermo.ensure_index("molecule_id")
@@ -113,23 +115,23 @@ class RedoxBuilder(Builder):
         self.logger.info("Finding documents to process")
         all_mols = list(
-            self.molecules.query(
-                temp_query, [self.molecules.key, "formula_alphabetical"]
-            )
+            self.molecules.query(temp_query, [self.molecules.key, "species_hash"])
         )
         processed_docs = set([e for e in self.redox.distinct("molecule_id")])
         to_process_docs = {d[self.molecules.key] for d in all_mols} - processed_docs
-        to_process_forms = {
-            d["formula_alphabetical"]
+        to_process_hashes = {
+            d["species_hash"]
             for d in all_mols
             if d[self.molecules.key] in to_process_docs
         }
-        N = ceil(len(to_process_forms) / number_splits)
+        N = ceil(len(to_process_hashes) / number_splits)
-        for formula_chunk in grouper(to_process_forms, N):
-            yield {"query": {"formula_alphabetical": {"$in": list(formula_chunk)}}}
+        for hash_chunk in grouper(to_process_hashes, N):
+            query = dict(temp_query)
+            query["species_hash"] = {"$in": list(hash_chunk)}
+            yield {"query": query}
     def get_items(self) -> Iterator[List[Dict]]:
         """
@@ -154,28 +156,26 @@ class RedoxBuilder(Builder):
         self.logger.info("Finding documents to process")
         all_mols = list(
-            self.molecules.query(
-                temp_query, [self.molecules.key, "formula_alphabetical"]
-            )
+            self.molecules.query(temp_query, [self.molecules.key, "species_hash"])
         )
         processed_docs = set([e for e in self.redox.distinct("molecule_id")])
         to_process_docs = {d[self.molecules.key] for d in all_mols} - processed_docs
-        to_process_forms = {
-            d["formula_alphabetical"]
+        to_process_hashes = {
+            d["species_hash"]
             for d in all_mols
             if d[self.molecules.key] in to_process_docs
         }
         self.logger.info(f"Found {len(to_process_docs)} unprocessed documents")
-        self.logger.info(f"Found {len(to_process_forms)} unprocessed formulas")
+        self.logger.info(f"Found {len(to_process_hashes)} unprocessed hashes")
         # Set total for builder bars to have a total
-        self.total = len(to_process_forms)
+        self.total = len(to_process_hashes)
-        for formula in to_process_forms:
+        for shash in to_process_hashes:
             mol_query = dict(temp_query)
-            mol_query["formula_alphabetical"] = formula
+            mol_query["species_hash"] = shash
             molecules = list(self.molecules.query(criteria=mol_query))
             yield molecules
@@ -192,9 +192,9 @@ class RedoxBuilder(Builder):
         """
         mols = [MoleculeDoc(**item) for item in items]
-        formula = mols[0].formula_alphabetical
+        shash = mols[0].species_hash
         mol_ids = [m.molecule_id for m in mols]
-        self.logger.debug(f"Processing {formula} : {mol_ids}")
+        self.logger.debug(f"Processing {shash} : {mol_ids}")
         redox_docs = list()
@@ -220,7 +220,7 @@ class RedoxBuilder(Builder):
                     e["task_id"]
                     for e in gg.entries
                     if e["charge"] == gg.charge + 1
-                    and e["task_type"] == "Single Point"
+                    and e["task_type"] in ["Single Point", "Force"]
                     and e["output"].get("final_energy")
                 ]
                 ie_tasks = list()
@@ -228,7 +228,7 @@ class RedoxBuilder(Builder):
                     tdoc = self.tasks.query_one(
                         {
                             "task_id": i,
-                            "formula_alphabetical": formula,
+                            "species_hash": shash,
                             "orig": {"$exists": True},
                         }
                     )
@@ -238,7 +238,7 @@ class RedoxBuilder(Builder):
                             tdoc = self.tasks.query_one(
                                 {
                                     "task_id": int(i),
-                                    "formula_alphabetical": formula,
+                                    "species_hash": shash,
                                     "orig": {"$exists": True},
                                 }
                             )
@@ -254,7 +254,7 @@ class RedoxBuilder(Builder):
                     e["task_id"]
                     for e in gg.entries
                     if e["charge"] == gg.charge - 1
-                    and e["task_type"] == "Single Point"
+                    and e["task_type"] in ["Single Point", "Force"]
                     and e["output"].get("final_energy")
                 ]
                 ea_tasks = list()
@@ -262,7 +262,7 @@ class RedoxBuilder(Builder):
                     tdoc = self.tasks.query_one(
                         {
                             "task_id": i,
-                            "formula_alphabetical": formula,
+                            "species_hash": shash,
                             "orig": {"$exists": True},
                         }
                     )
@@ -272,7 +272,7 @@ class RedoxBuilder(Builder):
                             tdoc = self.tasks.query_one(
                                 {
                                     "task_id": int(i),
-                                    "formula_alphabetical": formula,
+                                    "species_hash": shash,
                                     "orig": {"$exists": True},
                                 }
                             )
@@ -354,7 +354,7 @@ class RedoxBuilder(Builder):
                             )
                         )
-        self.logger.debug(f"Produced {len(redox_docs)} redox docs for {formula}")
+        self.logger.debug(f"Produced {len(redox_docs)} redox docs for {shash}")
         return jsanitize(
             [doc.model_dump() for doc in redox_docs if doc is not None], allow_bson=True

emmet/builders/molecules/summary.py CHANGED Viewed

@@ -36,6 +36,7 @@ class SummaryBuilder(Builder):
         charges: Store,
         spins: Store,
         bonds: Store,
+        multipoles: Store,
         metal_binding: Store,
         orbitals: Store,
         redox: Store,
@@ -50,6 +51,7 @@ class SummaryBuilder(Builder):
         self.charges = charges
         self.spins = spins
         self.bonds = bonds
+        self.multipoles = multipoles
         self.metal_binding = metal_binding
         self.orbitals = orbitals
         self.redox = redox
@@ -66,6 +68,7 @@ class SummaryBuilder(Builder):
                 charges,
                 spins,
                 bonds,
+                multipoles,
                 metal_binding,
                 orbitals,
                 redox,
@@ -81,6 +84,7 @@ class SummaryBuilder(Builder):
         #     self.charges,
         #     self.spins,
         #     self.bonds,
+        #     self.multipoles,
         #     self.metal_binding,
         #     self.orbitals,
         #     self.redox,
@@ -103,6 +107,7 @@ class SummaryBuilder(Builder):
         self.molecules.ensure_index("last_updated")
         self.molecules.ensure_index("task_ids")
         self.molecules.ensure_index("formula_alphabetical")
+        self.molecules.ensure_index("species_hash")
         # Search index for charges
         self.charges.ensure_index("molecule_id")
@@ -134,6 +139,15 @@ class SummaryBuilder(Builder):
         self.bonds.ensure_index("last_updated")
         self.bonds.ensure_index("formula_alphabetical")
+        # Search index for multipoles
+        self.multipoles.ensure_index("molecule_id")
+        self.multipoles.ensure_index("task_id")
+        self.multipoles.ensure_index("solvent")
+        self.multipoles.ensure_index("lot_solvent")
+        self.multipoles.ensure_index("property_id")
+        self.multipoles.ensure_index("last_updated")
+        self.multipoles.ensure_index("formula_alphabetical")
         # Search index for metal_binding
         self.metal_binding.ensure_index("molecule_id")
         self.metal_binding.ensure_index("solvent")
@@ -192,23 +206,23 @@ class SummaryBuilder(Builder):
         self.logger.info("Finding documents to process")
         all_mols = list(
-            self.molecules.query(
-                temp_query, [self.molecules.key, "formula_alphabetical"]
-            )
+            self.molecules.query(temp_query, [self.molecules.key, "species_hash"])
         )
         processed_docs = set([e for e in self.summary.distinct("molecule_id")])
         to_process_docs = {d[self.molecules.key] for d in all_mols} - processed_docs
-        to_process_forms = {
-            d["formula_alphabetical"]
+        to_process_hashes = {
+            d["species_hash"]
             for d in all_mols
             if d[self.molecules.key] in to_process_docs
         }
-        N = ceil(len(to_process_forms) / number_splits)
+        N = ceil(len(to_process_hashes) / number_splits)
-        for formula_chunk in grouper(to_process_forms, N):
-            yield {"query": {"formula_alphabetical": {"$in": list(formula_chunk)}}}
+        for hash_chunk in grouper(to_process_hashes, N):
+            query = dict(temp_query)
+            query["species_hash"] = {"$in": list(hash_chunk)}
+            yield {"query": query}
     def get_items(self) -> Iterator[List[Dict]]:
         """
@@ -233,28 +247,26 @@ class SummaryBuilder(Builder):
         self.logger.info("Finding documents to process")
         all_mols = list(
-            self.molecules.query(
-                temp_query, [self.molecules.key, "formula_alphabetical"]
-            )
+            self.molecules.query(temp_query, [self.molecules.key, "species_hash"])
         )
         processed_docs = set([e for e in self.summary.distinct("molecule_id")])
         to_process_docs = {d[self.molecules.key] for d in all_mols} - processed_docs
-        to_process_forms = {
-            d["formula_alphabetical"]
+        to_process_hashes = {
+            d["species_hash"]
             for d in all_mols
             if d[self.molecules.key] in to_process_docs
         }
         self.logger.info(f"Found {len(to_process_docs)} unprocessed documents")
-        self.logger.info(f"Found {len(to_process_forms)} unprocessed formulas")
+        self.logger.info(f"Found {len(to_process_hashes)} unprocessed hashes")
         # Set total for builder bars to have a total
-        self.total = len(to_process_forms)
+        self.total = len(to_process_hashes)
-        for formula in to_process_forms:
+        for shash in to_process_hashes:
             mol_query = dict(temp_query)
-            mol_query["formula_alphabetical"] = formula
+            mol_query["species_hash"] = shash
             molecules = list(self.molecules.query(criteria=mol_query))
             yield molecules
@@ -292,12 +304,12 @@ class SummaryBuilder(Builder):
                     else:
                         grouped[solvent][method] = doc
-            return (grouped, by_method)
+            return grouped
         mols = items
-        formula = mols[0]["formula_alphabetical"]
+        shash = mols[0]["species_hash"]
         mol_ids = [m["molecule_id"] for m in mols]
-        self.logger.debug(f"Processing {formula} : {mol_ids}")
+        self.logger.debug(f"Processing {shash} : {mol_ids}")
         summary_docs = list()
@@ -318,6 +330,9 @@ class SummaryBuilder(Builder):
                 "metal_binding": _group_docs(
                     list(self.metal_binding.query({"molecule_id": mol_id})), True
                 ),
+                "multipole_moments": _group_docs(
+                    list(self.multipoles.query({"molecule_id": mol_id})), False
+                ),
                 "orbitals": _group_docs(
                     list(self.orbitals.query({"molecule_id": mol_id})), False
                 ),
@@ -348,7 +363,7 @@ class SummaryBuilder(Builder):
             summary_doc = MoleculeSummaryDoc.from_docs(molecule_id=mol_id, docs=d)
             summary_docs.append(summary_doc)
-        self.logger.debug(f"Produced {len(summary_docs)} summary docs for {formula}")
+        self.logger.debug(f"Produced {len(summary_docs)} summary docs for {shash}")
         return jsanitize([doc.model_dump() for doc in summary_docs], allow_bson=True)

emmet-builders 0.84.2rc6__py3-none-any.whl → 0.84.2rc8__py3-none-any.whl

Potentially problematic release.

emmet-builders 0.84.2rc6py3-none-any.whl → 0.84.2rc8py3-none-any.whl