PyPI - emmet-builders - Versions diffs - 0.84.10rc1__py3-none-any.whl → 0.85.0rc0__py3-none-any.whl - Mend

emmet-builders 0.84.10rc1py3-none-any.whl → 0.85.0rc0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of emmet-builders might be problematic. Click here for more details.

Files changed (35) hide show

emmet/builders/abinit/phonon.py +12 -14
emmet/builders/abinit/sound_velocity.py +1 -1
emmet/builders/materials/absorption_spectrum.py +16 -10
emmet/builders/materials/dielectric.py +10 -7
emmet/builders/materials/elasticity.py +13 -9
emmet/builders/materials/electrodes.py +1 -1
emmet/builders/materials/electronic_structure.py +1 -1
emmet/builders/materials/magnetism.py +2 -1
emmet/builders/materials/piezoelectric.py +23 -19
emmet/builders/materials/provenance.py +3 -4
emmet/builders/materials/substrates.py +2 -2
emmet/builders/materials/summary.py +2 -2
emmet/builders/settings.py +14 -9
emmet/builders/utils.py +5 -4
emmet/builders/vasp/materials.py +11 -4
emmet/builders/vasp/task_validator.py +3 -1
{emmet_builders-0.84.10rc1.dist-info → emmet_builders-0.85.0rc0.dist-info}/METADATA +7 -30
emmet_builders-0.85.0rc0.dist-info/RECORD +41 -0
emmet/builders/materials/ml.py +0 -101
emmet/builders/molecules/atomic.py +0 -592
emmet/builders/molecules/bonds.py +0 -329
emmet/builders/molecules/electric.py +0 -287
emmet/builders/molecules/metal_binding.py +0 -528
emmet/builders/molecules/orbitals.py +0 -292
emmet/builders/molecules/redox.py +0 -502
emmet/builders/molecules/summary.py +0 -406
emmet/builders/molecules/thermo.py +0 -505
emmet/builders/molecules/trajectory.py +0 -530
emmet/builders/molecules/vibration.py +0 -282
emmet/builders/qchem/__init__.py +0 -0
emmet/builders/qchem/molecules.py +0 -745
emmet_builders-0.84.10rc1.dist-info/RECORD +0 -54
/emmet/builders/{molecules/__init__.py → py.typed} +0 -0
{emmet_builders-0.84.10rc1.dist-info → emmet_builders-0.85.0rc0.dist-info}/WHEEL +0 -0
{emmet_builders-0.84.10rc1.dist-info → emmet_builders-0.85.0rc0.dist-info}/top_level.txt +0 -0

emmet/builders/molecules/thermo.py DELETED Viewed

@@ -1,505 +0,0 @@
-from __future__ import annotations
-from collections import defaultdict
-from datetime import datetime
-from itertools import chain
-from math import ceil
-from maggma.builders import Builder
-from maggma.core import Store
-from maggma.utils import grouper
-from pymatgen.analysis.molecule_matcher import MoleculeMatcher
-from pymatgen.core.structure import Molecule
-from emmet.builders.settings import EmmetBuildSettings
-from emmet.core.molecules.thermo import MoleculeThermoDoc, get_free_energy
-from emmet.core.qchem.calc_types import TaskType
-from emmet.core.qchem.molecule import MoleculeDoc, evaluate_lot
-from emmet.core.qchem.task import TaskDocument
-from emmet.core.utils import jsanitize
-from typing import TYPE_CHECKING
-if TYPE_CHECKING:
-    from collections.abc import Iterable, Iterator
-__author__ = "Evan Spotte-Smith"
-SETTINGS = EmmetBuildSettings()
-single_mol_thermo = {
-    "Zn1": {"enthalpy": 1.481, "entropy": 38.384},
-    "Xe1": {"enthalpy": 1.481, "entropy": 40.543},
-    "Tl1": {"enthalpy": 1.481, "entropy": 41.857},
-    "Ti1": {"enthalpy": 1.481, "entropy": 37.524},
-    "Te1": {"enthalpy": 1.481, "entropy": 40.498},
-    "Sr1": {"enthalpy": 1.481, "entropy": 39.334},
-    "Sn1": {"enthalpy": 1.481, "entropy": 40.229},
-    "Si1": {"enthalpy": 1.481, "entropy": 35.921},
-    "Sb1": {"enthalpy": 1.481, "entropy": 40.284},
-    "Se1": {"enthalpy": 1.481, "entropy": 39.05},
-    "S1": {"enthalpy": 1.481, "entropy": 36.319},
-    "Rn1": {"enthalpy": 1.481, "entropy": 42.095},
-    "Pt1": {"enthalpy": 1.481, "entropy": 41.708},
-    "Rb1": {"enthalpy": 1.481, "entropy": 39.23},
-    "Po1": {"enthalpy": 1.481, "entropy": 41.915},
-    "Pb1": {"enthalpy": 1.481, "entropy": 41.901},
-    "P1": {"enthalpy": 1.481, "entropy": 36.224},
-    "O1": {"enthalpy": 1.481, "entropy": 34.254},
-    "Ne1": {"enthalpy": 1.481, "entropy": 34.919},
-    "N1": {"enthalpy": 1.481, "entropy": 33.858},
-    "Na1": {"enthalpy": 1.481, "entropy": 35.336},
-    "Mg1": {"enthalpy": 1.481, "entropy": 35.462},
-    "Li1": {"enthalpy": 1.481, "entropy": 31.798},
-    "Kr1": {"enthalpy": 1.481, "entropy": 39.191},
-    "K1": {"enthalpy": 1.481, "entropy": 36.908},
-    "In1": {"enthalpy": 1.481, "entropy": 40.132},
-    "I1": {"enthalpy": 1.481, "entropy": 40.428},
-    "H1": {"enthalpy": 1.481, "entropy": 26.014},
-    "He1": {"enthalpy": 1.481, "entropy": 30.125},
-    "Ge1": {"enthalpy": 1.481, "entropy": 38.817},
-    "Ga1": {"enthalpy": 1.481, "entropy": 38.609},
-    "F1": {"enthalpy": 1.481, "entropy": 34.767},
-    "Cu1": {"enthalpy": 1.481, "entropy": 38.337},
-    "Cl1": {"enthalpy": 1.481, "entropy": 36.586},
-    "Ca1": {"enthalpy": 1.481, "entropy": 36.984},
-    "C1": {"enthalpy": 1.481, "entropy": 33.398},
-    "Br1": {"enthalpy": 1.481, "entropy": 39.012},
-    "Bi1": {"enthalpy": 1.481, "entropy": 41.915},
-    "Be1": {"enthalpy": 1.481, "entropy": 32.544},
-    "Ba1": {"enthalpy": 1.481, "entropy": 40.676},
-    "B1": {"enthalpy": 1.481, "entropy": 33.141},
-    "Au1": {"enthalpy": 1.481, "entropy": 41.738},
-    "At1": {"enthalpy": 1.481, "entropy": 41.929},
-    "As1": {"enthalpy": 1.481, "entropy": 38.857},
-    "Ar1": {"enthalpy": 1.481, "entropy": 36.983},
-    "Al1": {"enthalpy": 1.481, "entropy": 35.813},
-    "Ag1": {"enthalpy": 1.481, "entropy": 39.917},
-}
-class ThermoBuilder(Builder):
-    """
-    The ThermoBuilder extracts the highest-quality thermodynamic data from a
-    MoleculeDoc (lowest electronic energy, highest level of theory for each
-    solvent available).
-    This builder constructs MoleculeThermoDocs in two different ways: with and without
-    single-point energy corrections.
-    Before any documents are constructed, the following steps are taken:
-        1. Gather MoleculeDocs by species hash
-        2. For each doc, identify tasks with thermodynamic information such as
-            zero-point energy, enthalpy, and entropy. Collect these "documents
-             including complete thermodynamics" (DICTs).
-        3. Separately, collect single-point energy calculations (SPECs).
-        4. Sort both sets of collected tasks (DICT and SPEC) by solvent
-    The first type of doc - those without corrections - can be constructed in
-    a straightforward fashion:
-        5. For each solvent, grab the best DICT (where "best" is defined as the
-            task generated using the highest level of theory with the lowest
-            electronic energy)
-        6. Convert this TaskDoc to MoleculeThermoDoc
-    The second type - those involving single-point energy corrections - are
-    generated differently and in a slightly more involved process:
-        7. For each of the "best" DICT docs identified in step 5 above:
-            7.1 For each solvent, grab the best SPEC
-            7.2 Try to match each best SPEC with a matching DICT (meaning that
-                the DICT and the SPEC have identical structure) where the DICT
-                is calculated at a lower or the same level of theory than the
-                SPEC
-            7.3 Convert each DICT-SPEC combination to create a MoleculeThermoDoc
-    In the case where there are multiple MoleculeThermoDocs made for a given solvent,
-    the different MoleculeThermoDocs will be ranked, first by level of theory (for
-    a doc made with an energy correction, the scores of the DICT and the SPEC
-    levels of theory will be averaged) and then by electronic energy.
-    """
-    def __init__(
-        self,
-        tasks: Store,
-        molecules: Store,
-        thermo: Store,
-        query: dict | None = None,
-        settings: EmmetBuildSettings | None = None,
-        **kwargs,
-    ):
-        self.tasks = tasks
-        self.molecules = molecules
-        self.thermo = thermo
-        self.query = query if query else dict()
-        self.settings = EmmetBuildSettings.autoload(settings)
-        self.kwargs = kwargs
-        super().__init__(sources=[tasks, molecules], targets=[thermo], **kwargs)
-        # Uncomment in case of issue with mrun not connecting automatically to collections
-        # for i in [self.tasks, self.molecules, self.thermo]:
-        #     try:
-        #         i.connect()
-        #     except Exception as e:
-        #         print("Could not connect,", e)
-    def ensure_indexes(self):
-        """
-        Ensures indices on the collections needed for building
-        """
-        # Basic search index for tasks
-        self.tasks.ensure_index("task_id")
-        self.tasks.ensure_index("last_updated")
-        self.tasks.ensure_index("state")
-        self.tasks.ensure_index("formula_alphabetical")
-        self.tasks.ensure_index("species_hash")
-        # Search index for molecules
-        self.molecules.ensure_index("molecule_id")
-        self.molecules.ensure_index("last_updated")
-        self.molecules.ensure_index("task_ids")
-        self.molecules.ensure_index("formula_alphabetical")
-        self.molecules.ensure_index("species_hash")
-        # Search index for thermo
-        self.thermo.ensure_index("molecule_id")
-        self.thermo.ensure_index("task_id")
-        self.thermo.ensure_index("solvent")
-        self.thermo.ensure_index("lot_solvent")
-        self.thermo.ensure_index("property_id")
-        self.thermo.ensure_index("last_updated")
-        self.thermo.ensure_index("formula_alphabetical")
-    def prechunk(self, number_splits: int) -> Iterable[dict]:  # pragma: no cover
-        """Prechunk the builder for distributed computation"""
-        temp_query = dict(self.query)
-        temp_query["deprecated"] = False
-        self.logger.info("Finding documents to process")
-        all_mols = list(
-            self.molecules.query(temp_query, [self.molecules.key, "species_hash"])
-        )
-        processed_docs = set([e for e in self.thermo.distinct("molecule_id")])
-        to_process_docs = {d[self.molecules.key] for d in all_mols} - processed_docs
-        to_process_hashes = {
-            d["species_hash"]
-            for d in all_mols
-            if d[self.molecules.key] in to_process_docs
-        }
-        N = ceil(len(to_process_hashes) / number_splits)
-        for hash_chunk in grouper(to_process_hashes, N):
-            query = dict(temp_query)
-            query["species_hash"] = {"$in": list(hash_chunk)}
-            yield {"query": query}
-    def get_items(self) -> Iterator[list[dict]]:
-        """
-        Gets all items to process into thermo documents.
-        This does no datetime checking; relying on on whether
-        task_ids are included in the thermo Store
-        Returns:
-            generator or list relevant tasks and molecules to process into documents
-        """
-        self.logger.info("Thermo builder started")
-        self.logger.info("Setting indexes")
-        self.ensure_indexes()
-        # Save timestamp to mark buildtime
-        self.timestamp = datetime.utcnow()
-        # Get all processed molecules
-        temp_query = dict(self.query)
-        temp_query["deprecated"] = False
-        self.logger.info("Finding documents to process")
-        all_mols = list(
-            self.molecules.query(temp_query, [self.molecules.key, "species_hash"])
-        )
-        processed_docs = set([e for e in self.thermo.distinct("molecule_id")])
-        to_process_docs = {d[self.molecules.key] for d in all_mols} - processed_docs
-        to_process_hashes = {
-            d["species_hash"]
-            for d in all_mols
-            if d[self.molecules.key] in to_process_docs
-        }
-        self.logger.info(f"Found {len(to_process_docs)} unprocessed documents")
-        self.logger.info(f"Found {len(to_process_hashes)} unprocessed hashes")
-        # Set total for builder bars to have a total
-        self.total = len(to_process_hashes)
-        for shash in to_process_hashes:
-            mol_query = dict(temp_query)
-            mol_query["species_hash"] = shash
-            molecules = list(self.molecules.query(criteria=mol_query))
-            yield molecules
-    def process_item(self, items: list[dict]) -> list[dict]:
-        """
-        Process the tasks into a MoleculeThermoDoc
-        Args:
-            items list[dict] : a list of MoleculeDocs in dict form
-        Returns:
-            [dict] : a list of new thermo docs
-        """
-        def _add_single_atom_enthalpy_entropy(
-            task: TaskDocument, doc: MoleculeThermoDoc
-        ):
-            initial_mol = task.output.initial_molecule
-            # If single atom, try to add enthalpy and entropy
-            if len(initial_mol) == 1:
-                if doc.total_enthalpy is None or doc.total_entropy is None:
-                    formula = initial_mol.composition.alphabetical_formula
-                    if formula in single_mol_thermo:
-                        vals = single_mol_thermo[formula]
-                        doc.total_enthalpy = vals["enthalpy"] * 0.043363
-                        doc.total_entropy = vals["entropy"] * 0.000043363
-                        doc.translational_enthalpy = vals["enthalpy"] * 0.043363
-                        doc.translational_entropy = vals["entropy"] * 0.000043363
-                        doc.free_energy = get_free_energy(
-                            doc.electronic_energy,
-                            vals["enthalpy"],
-                            vals["entropy"],
-                            convert_energy=False,
-                        )
-            return doc
-        mols = [MoleculeDoc(**item) for item in items]
-        shash = mols[0].species_hash
-        mol_ids = [m.molecule_id for m in mols]
-        self.logger.debug(f"Processing {shash} : {mol_ids}")
-        thermo_docs = list()
-        mm = MoleculeMatcher(tolerance=0.000001)
-        for mol in mols:
-            this_thermo_docs = list()
-            # Collect DICTs and SPECs
-            thermo_entries = [
-                e
-                for e in mol.entries
-                if e["output"]["enthalpy"] is not None
-                and e["output"]["entropy"] is not None
-                and e["charge"] == mol.charge
-                and e["spin_multiplicity"] == mol.spin_multiplicity
-            ]
-            sp_entries = list()
-            for entry in mol.entries:
-                if isinstance(entry["task_type"], TaskType):
-                    task_type = entry["task_type"].value
-                else:
-                    task_type = entry["task_type"]
-                if (
-                    task_type in ["Single Point", "Force"]
-                    and entry["charge"] == mol.charge
-                    and entry["spin_multiplicity"] == mol.spin_multiplicity
-                ):
-                    sp_entries.append(entry)
-            # Group both DICTs and SPECs by solvent environment
-            by_solvent_dict = defaultdict(list)
-            by_solvent_spec = defaultdict(list)
-            for entry in thermo_entries:
-                by_solvent_dict[entry["solvent"]].append(entry)
-            for entry in sp_entries:
-                by_solvent_spec[entry["solvent"]].append(entry)
-            if len(thermo_entries) == 0:
-                without_corrections = by_solvent_spec
-            else:
-                without_corrections = by_solvent_dict
-            # Construct without corrections
-            for solvent, entries in without_corrections.items():
-                best = sorted(
-                    entries,
-                    key=lambda x: (
-                        sum(evaluate_lot(x["level_of_theory"])),
-                        x["energy"],
-                    ),
-                )[0]
-                task = best["task_id"]
-                tdoc = self.tasks.query_one(
-                    {
-                        "task_id": task,
-                        "species_hash": shash,
-                        "orig": {"$exists": True},
-                    }
-                )
-                if tdoc is None:
-                    try:
-                        tdoc = self.tasks.query_one(
-                            {
-                                "task_id": int(task),
-                                "species_hash": shash,
-                                "orig": {"$exists": True},
-                            }
-                        )
-                    except ValueError:
-                        tdoc = None
-                if tdoc is None:
-                    continue
-                task_doc = TaskDocument(**tdoc)
-                if task_doc is None:
-                    continue
-                thermo_doc = MoleculeThermoDoc.from_task(
-                    task_doc, molecule_id=mol.molecule_id, deprecated=False
-                )
-                thermo_doc = _add_single_atom_enthalpy_entropy(task_doc, thermo_doc)
-                this_thermo_docs.append(thermo_doc)
-            # Construct with corrections
-            for solvent, entries in by_solvent_spec.items():
-                spec_sorted = sorted(
-                    entries,
-                    key=lambda x: (
-                        sum(evaluate_lot(x["level_of_theory"])),
-                        x["energy"],
-                    ),
-                )
-                for best_spec in spec_sorted:
-                    task_spec = best_spec["task_id"]
-                    matching_structures = list()
-                    for entry in thermo_entries:
-                        mol1 = Molecule.from_dict(entry["molecule"])
-                        mol2 = Molecule.from_dict(best_spec["molecule"])
-                        if (mm.fit(mol1, mol2) or mol1 == mol2) and (
-                            sum(evaluate_lot(best_spec["level_of_theory"]))
-                            < sum(evaluate_lot(entry["level_of_theory"]))
-                        ):
-                            matching_structures.append(entry)
-                    if len(matching_structures) == 0:
-                        continue
-                    best_dict = sorted(
-                        matching_structures,
-                        key=lambda x: (
-                            sum(evaluate_lot(x["level_of_theory"])),
-                            x["energy"],
-                        ),
-                    )[0]
-                    task_dict = best_dict["task_id"]
-                    tdict = self.tasks.query_one({"task_id": task_dict})
-                    if tdict is None:
-                        try:
-                            tdict = self.tasks.query_one({"task_id": int(task_dict)})
-                        except ValueError:
-                            tdict = None
-                    tspec = self.tasks.query_one({"task_id": task_spec})
-                    if tspec is None:
-                        try:
-                            tspec = self.tasks.query_one({"task_id": int(task_spec)})
-                        except ValueError:
-                            tspec = None
-                    if tdict is None or tspec is None:
-                        continue
-                    task_doc_dict = TaskDocument(**tdict)
-                    task_doc_spec = TaskDocument(**tspec)
-                    thermo_doc = MoleculeThermoDoc.from_task(
-                        task_doc_dict,
-                        correction_task=task_doc_spec,
-                        molecule_id=mol.molecule_id,
-                        deprecated=False,
-                    )
-                    thermo_doc = _add_single_atom_enthalpy_entropy(
-                        task_doc_dict, thermo_doc
-                    )
-                    this_thermo_docs.append(thermo_doc)
-                    break
-            docs_by_solvent = defaultdict(list)
-            for doc in this_thermo_docs:
-                if doc.correction_solvent is not None:
-                    docs_by_solvent[doc.correction_solvent].append(doc)
-                else:
-                    docs_by_solvent[doc.solvent].append(doc)
-            # If multiple documents exist for the same solvent, grab the best one
-            for _, collection in docs_by_solvent.items():
-                with_eval_e = list()
-                for member in collection:
-                    if member.correction_level_of_theory is None:
-                        with_eval_e.append(
-                            (
-                                member,
-                                sum(evaluate_lot(member.level_of_theory)),
-                                member.electronic_energy,
-                            )
-                        )
-                    else:
-                        dict_lot = sum(evaluate_lot(member.level_of_theory))
-                        spec_lot = sum(evaluate_lot(member.correction_level_of_theory))
-                        with_eval_e.append(
-                            (
-                                member,
-                                (dict_lot + spec_lot) / 2,
-                                member.electronic_energy,
-                            )
-                        )
-                thermo_docs.append(
-                    sorted(with_eval_e, key=lambda x: (x[1], x[2]))[0][0]
-                )
-        self.logger.debug(f"Produced {len(thermo_docs)} thermo docs for {shash}")
-        return jsanitize([doc.model_dump() for doc in thermo_docs], allow_bson=True)
-    def update_targets(self, items: list[list[dict]]):
-        """
-        Inserts the new thermo docs into the thermo collection
-        Args:
-            items [[dict]]: A list of documents to update
-        """
-        docs = list(chain.from_iterable(items))  # type: ignore
-        # Add timestamp
-        for item in docs:
-            item.update(
-                {
-                    "_bt": self.timestamp,
-                }
-            )
-        molecule_ids = list({item["molecule_id"] for item in docs})
-        if len(items) > 0:
-            self.logger.info(f"Updating {len(docs)} thermo documents")
-            self.thermo.remove_docs({self.thermo.key: {"$in": molecule_ids}})
-            self.thermo.update(
-                docs=docs,
-                key=["molecule_id", "solvent"],
-            )
-        else:
-            self.logger.info("No items to update")

emmet-builders 0.84.10rc1__py3-none-any.whl → 0.85.0rc0__py3-none-any.whl

Potentially problematic release.

emmet-builders 0.84.10rc1py3-none-any.whl → 0.85.0rc0py3-none-any.whl