PyPI - digichem-core - Versions diffs - 6.1.0__py3-none-any.whl → 6.10.1__py3-none-any.whl - Mend

digichem-core 6.1.0py3-none-any.whl → 6.10.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

digichem/__init__.py +2 -2
digichem/config/base.py +5 -3
digichem/data/batoms/batoms-renderer.py +190 -50
digichem/data/batoms/batoms_renderer.py +500 -0
digichem/file/base.py +14 -0
digichem/file/cube.py +185 -16
digichem/file/types.py +1 -0
digichem/image/render.py +144 -45
digichem/image/vmd.py +7 -2
digichem/input/digichem_input.py +2 -2
digichem/memory.py +10 -0
digichem/misc/io.py +84 -1
digichem/parse/__init__.py +6 -1
digichem/parse/base.py +85 -54
digichem/parse/cclib.py +103 -13
digichem/parse/dump.py +3 -3
digichem/parse/orca.py +1 -0
digichem/parse/pyscf.py +25 -0
digichem/parse/turbomole.py +3 -3
digichem/parse/util.py +146 -65
digichem/result/excited_state.py +17 -11
digichem/result/metadata.py +272 -3
digichem/result/result.py +3 -0
digichem/result/spectroscopy.py +42 -0
digichem/test/test_memory.py +33 -0
digichem/test/test_parsing.py +68 -1
digichem/test/test_result.py +1 -1
digichem/test/util.py +2 -1
{digichem_core-6.1.0.dist-info → digichem_core-6.10.1.dist-info}/METADATA +4 -3
{digichem_core-6.1.0.dist-info → digichem_core-6.10.1.dist-info}/RECORD +33 -30
{digichem_core-6.1.0.dist-info → digichem_core-6.10.1.dist-info}/WHEEL +1 -1
{digichem_core-6.1.0.dist-info → digichem_core-6.10.1.dist-info}/licenses/COPYING.md +0 -0
{digichem_core-6.1.0.dist-info → digichem_core-6.10.1.dist-info}/licenses/LICENSE +0 -0

digichem/result/metadata.py CHANGED Viewed

@@ -8,6 +8,7 @@ from deepmerge import conservative_merger
 from pathlib import Path
 import copy
 import warnings
+from scipy import integrate
 from digichem.misc.time import latest_datetime, total_timedelta, date_to_string,\
     timedelta_to_string
@@ -147,6 +148,7 @@ class Metadata(Result_object):
             auxiliary_files = None,
             history = None,
             date = None,
+            insert_date = None,
             duration = None,
             package = None,
             package_version = None,
@@ -167,6 +169,7 @@ class Metadata(Result_object):
             num_cpu = None,
             memory_available = None,
             memory_used = None,
+            performance = None,
             # Deprecated.
             solvent_model = None,
@@ -181,6 +184,7 @@ class Metadata(Result_object):
         :param history: Optional SHA of the calculation from which the coordinates of this calculation were generated.
         :param num_calculations: Optional number of individual calculations this metadata represents.
         :param date: Optional date (datetime object) of this calculation result.
+        :param insert_date: Optional date (datetime object) of when this calculation result was stored (normally in a DB).
         :param duration: Optional duration (timedelta object) of this calculation.
         :param package: Optional string identifying the computational chem program that performed the calculation.
         :param package_version: Optional string identifying the version of the computational chem program that performed the calculation.
@@ -206,6 +210,7 @@ class Metadata(Result_object):
         self.auxiliary_files = auxiliary_files if auxiliary_files is not None and len(auxiliary_files) != 0 else {}
         self.history = history
         self.date = date
+        self.insert_date = insert_date
         self.duration = duration
         self.package = package
         self.package_version = package_version
@@ -229,6 +234,7 @@ class Metadata(Result_object):
         self.num_cpu = num_cpu
         self.memory_available = memory_available
         self.memory_used = memory_used
+        self.performance = performance
         # Deprecated solvent system.
         if solvent_model is not None:
@@ -473,6 +479,7 @@ class Metadata(Result_object):
                 num_cpu = parser.data.metadata.get('num_cpu', None),
                 memory_available = memory_available,
                 memory_used = memory_used,
+                performance = Performance.from_parser(parser) if "performance" in parser.data.metadata else None
             )
         except AttributeError:
             # There is no metadata available, give up.
@@ -513,6 +520,11 @@ class Metadata(Result_object):
             "units": "s",
             "string": date_to_string(self.date) if self.date is not None else None
         }
+        attr_dict['insert_date'] = {
+            "value": self.insert_date.timestamp() if self.insert_date is not None else None,
+            "units": "s",
+            "string": date_to_string(self.insert_date) if self.insert_date is not None else None
+        }
         attr_dict['duration'] = {
             "value": self.duration.total_seconds() if self.duration is not None else None,
             "units": "s",
@@ -542,6 +554,8 @@ class Metadata(Result_object):
                     "units": None
                 }
+        attr_dict['performance'] = self.performance.dump(digichem_options) if self.performance else None
         return attr_dict
     @classmethod
@@ -557,9 +571,14 @@ class Metadata(Result_object):
         kwargs = copy.deepcopy(data)
         # For more complex fields, use the data item.
-        for attr in ['date', 'duration', 'temperature', "pressure"]:
-            kwargs[attr] = data[attr]['value']
+        for attr in ['insert_date', 'date', 'duration', 'temperature', "pressure"]:
+            if attr in data:
+                kwargs[attr] = data[attr]['value']
+            else:
+                kwargs[attr] = None
+        kwargs['insert_date'] = datetime.fromtimestamp(kwargs['insert_date']) if kwargs['insert_date'] is not None else None
         kwargs['date'] = datetime.fromtimestamp(kwargs['date']) if kwargs['date'] is not None else None
         kwargs['duration'] = timedelta(seconds = kwargs['duration'])  if kwargs['duration'] is not None else None
@@ -571,6 +590,8 @@ class Metadata(Result_object):
             else:
                 kwargs[attr_name] = None
+        kwargs['performance'] = Performance.from_dump(data['performance'], result_set, options) if "performance" in data and data['performance'] is not None else None
         return self(**kwargs)
@@ -644,4 +665,252 @@ class Merged_metadata(Metadata):
             merged_metadata.auxiliary_files = conservative_merger.merge(merged_metadata.auxiliary_files, metadata.auxiliary_files)
         return merged_metadata
+class Performance(Result_object):
+    """
+    Performance metrics and profiling data for the calculation.
+    """
+    def __init__(
+        self,
+        duration = [],
+        memory_used = [],
+        memory_used_percent = [],
+        memory_available = [],
+        memory_available_percent = [],
+        cpu_used = [],
+        output_space = [],
+        scratch_space = [],
+        memory_allocated = None,
+        cpu_allocated = None,
+    ):
+        self.duration = duration
+        self.memory_used = memory_used
+        self.memory_used_percent = memory_used_percent
+        self.memory_available = memory_available
+        self.memory_available_percent = memory_available_percent
+        self.cpu_used = cpu_used
+        self.output_space = output_space
+        self.scratch_space = scratch_space
+        self.memory_allocated = memory_allocated if memory_allocated is not None else self.max_mem
+        self.cpu_allocated = cpu_allocated if cpu_allocated is not None else math.ceil(max(cpu_used) / 100)
+    @classmethod
+    def from_parser(self, parser):
+        """
+        Construct a Performance object from an output file parser.
+        :param parser: Output data parser.
+        :return: A populated Performance object.
+        """
+        return self(
+            duration = parser.data.metadata['performance'][:, 0].tolist(),
+            memory_used = parser.data.metadata['performance'][:, 1].tolist(),
+            memory_allocated = Memory(parser.data.metadata['memory_available']) if "memory_available" in parser.data.metadata else None,
+            memory_used_percent = parser.data.metadata['performance'][:, 2].tolist(),
+            memory_available = parser.data.metadata['performance'][:, 3].tolist(),
+            memory_available_percent = parser.data.metadata['performance'][:, 4].tolist(),
+            cpu_used = parser.data.metadata['performance'][:, 5].tolist(),
+            cpu_allocated = parser.data.metadata.get('num_cpu', None),
+            output_space = parser.data.metadata['performance'][:, 6].tolist(),
+            scratch_space = parser.data.metadata['performance'][:, 7].tolist()
+        )
+    @property
+    def max_mem(self):
+        """
+        The maximum amount of memory used in the calculation (in bytes)
+        """
+        return max(self.memory_used)
+    @property
+    def memory_margin(self):
+        max_memory = float(self.memory_allocated) if self.memory_allocated is not None else self.max_mem
+        return max_memory - self.max_mem
+    @property
+    def memory_efficiency(self):
+        """
+        Calculate the memory efficiency of this calculation.
+        :param max_memory: The amount of allocated memory (in bytes), this will be guestimated automatically if not available.
+        """
+        # Integrate to find the number of byte seconds used.
+        area = integrate.trapezoid(self.memory_used, self.duration)
+        # How much we should/could have used.
+        total_area = (self.duration[-1] - self.duration[0]) * float(self.memory_allocated)
+        # Return as %.
+        try:
+            return area / total_area * 100
+        except Exception:
+            return 0
+    @property
+    def cpu_efficiency(self):
+        """
+        Calculate the CPU efficiency of this calculation.
+        :param max_cpu: The number of allocated CPUs, this will be guestimated automatically if not available.
+        """
+        # Integrate to find the number of CPU seconds used.
+        area = integrate.trapezoid(self.cpu_used, self.duration)
+        # How much we should/could have used.
+        total_area = (self.duration[-1] - self.duration[0]) * self.cpu_allocated * 100
+        # Return as %.
+        try:
+            return area / total_area * 100
+        except Exception:
+            # Div zero
+            return 0
+    @classmethod
+    def from_dump(self, data, result_set, options):
+        """
+        Get an instance of this class from its dumped representation.
+        :param data: The data to parse.
+        :param result_set: The partially constructed result set which is being populated.
+        """
+        duration = [0.0] * len(data['values'])
+        memory_used = [0.0] * len(data['values'])
+        memory_allocated = Memory(data['memory_allocated']['value'])
+        memory_used_percent = [0.0] * len(data['values'])
+        memory_available = [0.0] * len(data['values'])
+        memory_available_percent = [0.0] * len(data['values'])
+        cpu_used = [0.0] * len(data['values'])
+        cpu_allocated = data['cpu_allocated']
+        output_space = [0.0] * len(data['values'])
+        scratch_space = [0.0] * len(data['values'])
+        for i, value in enumerate(data['values']):
+            duration[i] = value['duration']['value']
+            memory_used[i] = value['memory_used']['value']
+            memory_used_percent[i] = value['memory_used_percent']['value']
+            memory_available[i] = value['memory_available']['value']
+            memory_available_percent[i] = value['memory_available_percent']['value']
+            cpu_used[i] = value['cpu_used']['value']
+            output_space[i] = value['output_space']['value']
+            scratch_space[i] = value['scratch_space']['value']
+        return self(
+            duration = duration,
+            memory_used = memory_used,
+            memory_allocated = memory_allocated,
+            memory_used_percent = memory_used_percent,
+            memory_available = memory_available,
+            memory_available_percent = memory_available_percent,
+            cpu_used = cpu_used,
+            cpu_allocated = cpu_allocated,
+            output_space = output_space,
+            scratch_space = scratch_space
+        )
+    def dump(self, digichem_options):
+        """
+        Get a representation of this result object in primitive format.
+        """
+        return {
+            "cpu_allocated": self.cpu_allocated,
+            "cpu_efficiency": {
+                "units": "%",
+                "value": float(self.cpu_efficiency),
+            },
+            "memory_allocated": {
+                "units": "bytes",
+                "value": float(self.memory_allocated)
+            },
+            "maximum_memory": {
+                "units": "bytes",
+                "value": self.max_mem,
+            },
+            "memory_margin": {
+                "units": "bytes",
+                "value": self.memory_margin
+            },
+            "memory_efficiency": {
+                "units": "%",
+                "value": float(self.memory_efficiency)
+            },
+            "values":[
+                {
+                    'duration': {
+                        "units": "s",
+                        "value": self.duration[i]
+                    },
+                    'memory_used': {
+                        "units": "bytes",
+                        "value": self.memory_used[i]
+                    },
+                    'memory_used_percent': {
+                        "units": "%",
+                        "value": self.memory_used_percent[i]
+                    },
+                    'memory_available': {
+                        "units": "bytes",
+                        "value": self.memory_available[i]
+                    },
+                    'memory_available_percent': {
+                        "units": "bytes",
+                        "value": self.memory_available_percent[i]
+                    },
+                    'cpu_used': {
+                        "units": "%",
+                        "value": self.cpu_used[i]
+                    },
+                    'output_space': {
+                        "units": "bytes",
+                        "value": self.output_space[i]
+                    },
+                    'scratch_space': {
+                        "units": "bytes",
+                        "value": self.scratch_space[i]
+                    }
+                } for i in range(len(self.duration))
+            ]
+        }
+        return {
+            'duration': {
+                "units": "s",
+                "values": self.duration.tolist()
+            },
+            'memory_used': {
+                "units": "bytes",
+                "values": self.memory_used.tolist()
+            },
+            'memory_used_percent': {
+                "units": "%",
+                "values": self.memory_used_percent.tolist()
+            },
+            'memory_available': {
+                "units": "bytes",
+                "values": self.memory_available.tolist()
+            },
+            'memory_available_percent': {
+                "units": "bytes",
+                "values": self.memory_available_percent.tolist()
+            },
+            'cpu_used': {
+                "units": "%",
+                "values": self.cpu_used.tolist()
+            },
+            'output_space': {
+                "units": "bytes",
+                "values": self.output_space.tolist()
+            },
+            'scratch_space': {
+                "units": "bytes",
+                "values": self.scratch_space.tolist()
+            }
+        }

digichem/result/result.py CHANGED Viewed

@@ -34,6 +34,9 @@ class Result_set(Result_object):
         self.results = (self,)
         self.emission = attributes.pop('emission', Emissions())
+        # Any ancillary data.
+        self._aux = attributes.pop('aux', {})
         for attr_name, attribute in attributes.items():
             setattr(self, attr_name, attribute)

digichem/result/spectroscopy.py CHANGED Viewed

@@ -333,6 +333,48 @@ class Absorption_emission_graph(Spectroscopy_graph):
         """
         return ((E * scipy.constants.electron_volt)**2  * f_E) / (scipy.constants.Planck * scipy.constants.c)
+    @classmethod
+    def inverse_jacobian(self, E, f_nm):
+        """
+        An implementation of the jacobian transform that scales intensity in wavelength units to intensity in energy units.
+        See J. Phys. Chem. Lett. 2014, 5, 20, 3497 for why this is necessary.
+        Note that the jacobian transform will maintain the area under the curve regardless of x units (nm or x).
+        Sadly, this has the consequence of mangling the intensity units (it becomes tiny; an oscillator strength of 1 at 3 eV becomes 1.163e-12).
+        """
+        # TODO: Might be better to rearrange this to accept nm rather than eV?
+        return (
+            (f_nm * scipy.constants.Planck * scipy.constants.c) / (E * scipy.constants.electron_volt) **2
+        )
+    @classmethod
+    def shift_coord(self, coord, delta_eV):
+        """
+        Shift a coordinate (in nm) by a given energy value.
+        :param delta_eV: The energy (in eV) to shift by. A positive value will blueshift (higher energy).
+        """
+        old_x_nm, old_y_nm = coord
+        # Convert x to energy.
+        old_x_ev = digichem.result.excited_state.Excited_state.wavelength_to_energy(old_x_nm)
+        # Transform y.
+        old_y_ev = self.inverse_jacobian(old_x_ev, old_y_nm)
+        # Shift by given amount.
+        new_x_ev = old_x_ev + delta_eV
+        # Convert back to nm.
+        new_x_nm, new_f_nm = self.energy_to_wavelength((new_x_ev, old_y_ev), True)
+        return (new_x_nm, new_f_nm)
+    def shift(self, delta_eV):
+        """
+        """
+        return map(lambda coord: self.shift_coord(coord, delta_eV), self.coordinates)
     def plot_gaussian(self):
         """
         Plot a gaussian distribution around our excited state energies.

digichem/test/test_memory.py ADDED Viewed

@@ -0,0 +1,33 @@
+"""Test for memory representations."""
+import pytest
+from digichem.memory import Memory
+@pytest.mark.parametrize("value, valid", [
+        ["1", True],
+        [1, True],
+        [1.0, True],
+        ["1 B", True],
+        ["1 KB", True],
+        ["1 MB", True],
+        ["1 GB", True],
+        ["1 TB", True],
+        ["foo bar", False],
+     ])
+def test_validation(value, valid):
+    try:
+        mem = Memory(value)
+        str(mem)
+    except Exception:
+        if valid:
+            raise
+        else:
+            return
+    if not valid:
+        raise

digichem/test/test_parsing.py CHANGED Viewed

@@ -76,4 +76,71 @@ def test_dump_and_parse(result_files, tmp_path, digichem_options):
     assert raw_dump == parsed_dump
+@pytest.mark.parametrize(
+    "result_files, num_archives",
+    [
+        [(Path(data_directory(), "Archives/1/Benzene.log"),), 1],
+        [(Path(data_directory(), "Archives/1/Benzene.log"), "fchk:" + str(Path(data_directory(), "Archives/1/Benzene.fchk.zip"))), 1],
+        [(Path(data_directory(), "Archives/1/"),), 1],
+        [(Path(data_directory(), "Archives/1/"), "fchk:" + str(Path(data_directory(), "Archives/1/Benzene.fchk.zip"))), 1],
+        [(Path(data_directory(), "Archives/2/Benzene.log.zip"),), 1],
+        [(Path(data_directory(), "Archives/2/Benzene.log.zip"), "fchk:" + str(Path(data_directory(), "Archives/2/Benzene.fchk"))), 1],
+        [(Path(data_directory(), "Archives/2"),), 1],
+        [(Path(data_directory(), "Archives/2/Benzene.log.zip"), "fchk:" + str(Path(data_directory(), "Archives/2/Benzene.fchk"))), 1],
+        [(Path(data_directory(), "Archives/3/Benzene.log.zip"),), 2],
+        [(Path(data_directory(), "Archives/3/Benzene.log.zip"), "fchk:" + str(Path(data_directory(), "Archives/3/Benzene.fchk.zip"))), 2],
+        [(Path(data_directory(), "Archives/3"),), 2],
+        [(Path(data_directory(), "Archives/3/Benzene.log.zip"), "fchk:" + str(Path(data_directory(), "Archives/3/Benzene.fchk.zip"))), 2],
+        [(Path(data_directory(), "Archives/4"),), 0],
+        [(Path(data_directory(), "Archives/4.zip"),), 1],
+        [(Path(data_directory(), "Archives/8.zip"),), 1],
+    ]
+)
+def test_gaussian_archives(result_files, num_archives, digichem_options):
+    """
+    Can we parse from various archives?
+    """
+    try:
+        result, archive = parse_calculation(*result_files, options = digichem_options, keep_archive = True)
+        assert isinstance(result, Result_set)
+        # Check we have found the fchk.
+        assert "fchk_file" in result.metadata.auxiliary_files
+        assert len(result.metadata.log_files) == 1
+        # Make sure we didn't unpack any other archives by accident.
+        assert len(archive.archive_dirs) == num_archives
+    finally:
+        archive.cleanup()
+@pytest.mark.parametrize(
+    "result_files, num_archives",
+    [
+        [(Path(data_directory(), "Archives/5"),), 27],
+        [(Path(data_directory(), "Archives/5/Naphthalene.log"),), 27],
+        [(Path(data_directory(), "Archives/6"),), 28],
+        [(Path(data_directory(), "Archives/6/Naphthalene.log.zip"),), 28],
+        [(Path(data_directory(), "Archives/7"),), 1],
+        [(Path(data_directory(), "Archives/7/Naphthalene.log.zip"),), 1],
+    ]
+)
+def test_turbomole_archives(result_files, num_archives, digichem_options):
+    """
+    Can we parse from various archives?
+    """
+    try:
+        result, archive = parse_calculation(*result_files, options = digichem_options, keep_archive = True)
+        assert isinstance(result, Result_set)
+        # Check we have found the fchk.
+        assert "ground_state_cao_file" in result.metadata.auxiliary_files
+        assert len(result.metadata.log_files) == 13
+        # Make sure we didn't unpack any other archives by accident.
+        assert len(archive.archive_dirs) == num_archives
+    finally:
+        archive.cleanup()

digichem/test/test_result.py CHANGED Viewed

@@ -208,7 +208,7 @@ def test_atoms(result_set):
     assert result_set.raw_atoms.element_dict['H'] == 8
     # Check mass.
-    assert result_set.raw_atoms.molar_mass == pytest.approx(128.1705)
+    assert result_set.raw_atoms.molar_mass == pytest.approx(128.174)
     # We don't check positions here because these can vary from calc to calc due to reorientation...

digichem/test/util.py CHANGED Viewed

@@ -29,7 +29,8 @@ result_files = {
     "gaussian": [Path(data_directory(), datum) for datum in [
         'Naphthalene/Gaussian 16 Optimisation Frequencies PBE1PBE (GD3BJ) Toluene 6-31G(d,p)',
         'Naphthalene/Gaussian 16 Excited States TDA 10 Singlets 10 Triplets PBE1PBE (GD3BJ) Toluene 6-31G(d,p).tar.gz',
-        'Naphthalene/Gaussian 16 Excited States TDA Optimised S(1) PBE1PBE (GD3BJ) Toluene 6-31G(d,p).tar.gz',
+        'Naphthalene/Gaussian 16 Excited States TDA Optimised S(1) PBE1PBE (GD3BJ) Toluene 6-31G(d,p).tar.gz',
+        'Pyridine/Gaussian 16 Excited States TDA Optimised S(1) PBE1PBE (GD3BJ) Toluene 6-31G(d,p).tar.gz'
     ]],
     "turbomole": [Path(data_directory(), datum) for datum in [
         'Naphthalene/Turbomole Optimisation ADC(2) cc-pVDZ.tar.gz',

{digichem_core-6.1.0.dist-info → digichem_core-6.10.1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
-Metadata-Version: 2.3
+Metadata-Version: 2.4
 Name: digichem-core
-Version: 6.1.0
+Version: 6.10.1
 Summary: Open-source library for Digichem core components
 Project-URL: Homepage, https://github.com/Digichem-Project/digichem-core
 Project-URL: Documentation, https://doc.digi-chem.co.uk
@@ -38,6 +38,7 @@ Requires-Dist: pyyaml
 Requires-Dist: rdkit
 Requires-Dist: scipy
 Provides-Extra: test
+Requires-Dist: pyscf; extra == 'test'
 Requires-Dist: pytest; extra == 'test'
 Requires-Dist: pytest-lazy-fixture; extra == 'test'
 Description-Content-Type: text/markdown
@@ -97,4 +98,4 @@ Documentation coming soon.
 Digichem-core is licensed under the BSD-3-Clause license, but some files are licensed separately. See [COPYING.md](COPYING.md) for full details.
-The Digichem logo and branding is Copyright Digichem 2024, you may not use them in any way (although you are welcome to look at them).
+The Digichem logo and branding is Copyright Digichem 2025, you may not use them in any way (although you are welcome to look at them).

digichem-core 6.1.0__py3-none-any.whl → 6.10.1__py3-none-any.whl

digichem-core 6.1.0py3-none-any.whl → 6.10.1py3-none-any.whl