PyPI - digichem-core - Versions diffs - 6.0.0rc1__py3-none-any.whl - Mend

digichem-core 6.0.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (111) hide show

digichem/__init__.py +75 -0
digichem/basis.py +116 -0
digichem/config/README +3 -0
digichem/config/__init__.py +5 -0
digichem/config/base.py +321 -0
digichem/config/locations.py +14 -0
digichem/config/parse.py +90 -0
digichem/config/util.py +117 -0
digichem/data/README +4 -0
digichem/data/batoms/COPYING +18 -0
digichem/data/batoms/LICENSE +674 -0
digichem/data/batoms/README +2 -0
digichem/data/batoms/__init__.py +0 -0
digichem/data/batoms/batoms-renderer.py +351 -0
digichem/data/config/digichem.yaml +714 -0
digichem/data/functionals.csv +15 -0
digichem/data/solvents.csv +185 -0
digichem/data/tachyon/COPYING.md +5 -0
digichem/data/tachyon/LICENSE +30 -0
digichem/data/tachyon/tachyon_LINUXAMD64 +0 -0
digichem/data/vmd/common.tcl +468 -0
digichem/data/vmd/generate_combined_orbital_images.tcl +70 -0
digichem/data/vmd/generate_density_images.tcl +45 -0
digichem/data/vmd/generate_dipole_images.tcl +68 -0
digichem/data/vmd/generate_orbital_images.tcl +57 -0
digichem/data/vmd/generate_spin_images.tcl +66 -0
digichem/data/vmd/generate_structure_images.tcl +40 -0
digichem/datas.py +14 -0
digichem/exception/__init__.py +7 -0
digichem/exception/base.py +133 -0
digichem/exception/uncatchable.py +63 -0
digichem/file/__init__.py +1 -0
digichem/file/base.py +364 -0
digichem/file/cube.py +284 -0
digichem/file/fchk.py +94 -0
digichem/file/prattle.py +277 -0
digichem/file/types.py +97 -0
digichem/image/__init__.py +6 -0
digichem/image/base.py +113 -0
digichem/image/excited_states.py +335 -0
digichem/image/graph.py +293 -0
digichem/image/orbitals.py +239 -0
digichem/image/render.py +617 -0
digichem/image/spectroscopy.py +797 -0
digichem/image/structure.py +115 -0
digichem/image/vmd.py +826 -0
digichem/input/__init__.py +3 -0
digichem/input/base.py +78 -0
digichem/input/digichem_input.py +500 -0
digichem/input/gaussian.py +140 -0
digichem/log.py +179 -0
digichem/memory.py +166 -0
digichem/misc/__init__.py +4 -0
digichem/misc/argparse.py +44 -0
digichem/misc/base.py +61 -0
digichem/misc/io.py +239 -0
digichem/misc/layered_dict.py +285 -0
digichem/misc/text.py +139 -0
digichem/misc/time.py +73 -0
digichem/parse/__init__.py +13 -0
digichem/parse/base.py +220 -0
digichem/parse/cclib.py +138 -0
digichem/parse/dump.py +253 -0
digichem/parse/gaussian.py +130 -0
digichem/parse/orca.py +96 -0
digichem/parse/turbomole.py +201 -0
digichem/parse/util.py +523 -0
digichem/result/__init__.py +6 -0
digichem/result/alignment/AA.py +114 -0
digichem/result/alignment/AAA.py +61 -0
digichem/result/alignment/FAP.py +148 -0
digichem/result/alignment/__init__.py +3 -0
digichem/result/alignment/base.py +310 -0
digichem/result/angle.py +153 -0
digichem/result/atom.py +742 -0
digichem/result/base.py +258 -0
digichem/result/dipole_moment.py +332 -0
digichem/result/emission.py +402 -0
digichem/result/energy.py +323 -0
digichem/result/excited_state.py +821 -0
digichem/result/ground_state.py +94 -0
digichem/result/metadata.py +644 -0
digichem/result/multi.py +98 -0
digichem/result/nmr.py +1086 -0
digichem/result/orbital.py +647 -0
digichem/result/result.py +244 -0
digichem/result/soc.py +272 -0
digichem/result/spectroscopy.py +514 -0
digichem/result/tdm.py +267 -0
digichem/result/vibration.py +167 -0
digichem/test/__init__.py +6 -0
digichem/test/conftest.py +4 -0
digichem/test/test_basis.py +71 -0
digichem/test/test_calculate.py +30 -0
digichem/test/test_config.py +78 -0
digichem/test/test_cube.py +369 -0
digichem/test/test_exception.py +16 -0
digichem/test/test_file.py +104 -0
digichem/test/test_image.py +337 -0
digichem/test/test_input.py +64 -0
digichem/test/test_parsing.py +79 -0
digichem/test/test_prattle.py +36 -0
digichem/test/test_result.py +489 -0
digichem/test/test_translate.py +112 -0
digichem/test/util.py +207 -0
digichem/translate.py +591 -0
digichem_core-6.0.0rc1.dist-info/METADATA +96 -0
digichem_core-6.0.0rc1.dist-info/RECORD +111 -0
digichem_core-6.0.0rc1.dist-info/WHEEL +4 -0
digichem_core-6.0.0rc1.dist-info/licenses/COPYING.md +10 -0
digichem_core-6.0.0rc1.dist-info/licenses/LICENSE +11 -0

digichem/parse/gaussian.py ADDED Viewed

@@ -0,0 +1,130 @@
+# General imports.
+from datetime import datetime, timedelta
+from digichem.exception.base import Result_unavailable_error
+# Digichem imports.
+from digichem.parse.cclib import Cclib_parser
+import digichem.log
+import digichem.file.types as file_types
+# Hidden imports.
+#import pysoc.io.SOC
+class Gaussian_parser(Cclib_parser):
+    """
+    Top level class for parsing output from Gaussian log files.
+    """
+    # A dictionary of recognised auxiliary file types.
+    INPUT_FILE_TYPES = {
+            file_types.gaussian_chk_file: "chk_file",
+            file_types.gaussian_fchk_file: "fchk_file",
+            file_types.gaussian_rwf_file: "rwf_file"
+        }
+    # Headers for date strings.
+    DATE_HEADER = "Normal termination of"
+    ELAPSED_TIME_HEADER = "Elapsed time:"
+    CPU_TIME_HEADER = "Job cpu time:"
+    CPU_HEADER = "Will use up to"
+    def parse_metadata(self):
+        """
+        Parse additional calculation metadata.
+        """
+        super().parse_metadata()
+    def pre_parse(self):
+        """
+        Perform any setup before line-by-line parsing.
+        """
+        super().pre_parse()
+        # Assume we used 1 CPU if not otherwise clear (is this a good idea?)
+        self.data.metadata['num_cpus'] = 1
+        self.wall_time = []
+        self.cpu_time = []
+    def parse_output_line(self, log_file, line):
+        """
+        Perform custom line-by-line parsing of an output file.
+        """
+        # Although we only need the last ~5 lines from the (possibly huge) log file, we read all the way through because negative seek()ing is tricky.
+        # Look for our key string.
+        if self.DATE_HEADER in line:
+            # This line looks like: "Normal termination of Gaussian 16 at Sun Dec  6 19:13:09 2020"
+            date_str = " ".join(line.split()[-4:])
+            self.data.metadata['date'] = datetime.strptime(date_str, "%b %d %H:%M:%S %Y.").timestamp()
+        elif self.ELAPSED_TIME_HEADER in line:
+            # This line looks like: "Elapsed time:       0 days  2 hours 38 minutes 50.9 seconds."
+            datey = line.split()[-8:]
+            self.wall_time.append(timedelta(days = int(datey[0]), hours = int(datey[2]), minutes = int(datey[4]), seconds = float(datey[6])).total_seconds())
+        elif self.CPU_TIME_HEADER in line:
+            # This line looks like: "Job cpu time:       0 days 20 hours 52 minutes 17.3 seconds."
+            datey = line.split()[-8:]
+            self.cpu_time.append(timedelta(days = int(datey[0]), hours = int(datey[2]), minutes = int(datey[4]), seconds = float(datey[6])).total_seconds())
+        elif self.CPU_HEADER in line:
+            # This line looks like: "Will use up to   10 processors via shared memory."
+            self.data.metadata['num_cpus'] = int(line.split()[4])
+    def post_parse(self):
+        """
+        Perform any required operations after line-by-line parsing.
+        """
+        super().post_parse()
+        if 'wall_time' not in self.data.metadata and len(self.wall_time) != 0:
+            self.data.metadata['wall_time'] = self.wall_time
+        if 'cpu_time' not in self.data.metadata and len(self.cpu_time) != 0:
+            self.data.metadata['cpu_time'] = self.cpu_time
+        # Get SOC.
+        # Next try and get SOC.
+        try:
+            self.calculate_SOC()
+        except Exception:
+            digichem.log.get_logger().debug("Cannot calculate spin-orbit-coupling from output file '{}'".format(self.log_file_path), exc_info = True)
+    def calculate_SOC(self):
+        """
+        Parse spin-orbit coupling using PySOC.
+        """
+        try:
+            import pysoc.io.SOC
+        except Exception as e:
+            raise Result_unavailable_error("Spin-orbit coupling", "PySOC is not available") from e
+        # For SOC, we need both .log and .rwf file.
+        # No need to check for these tho; pysoc does that for us.
+        # We also need etsyms to decide which excited state is which.
+        if not hasattr(self.data, "etsyms"):
+            raise Result_unavailable_error("Spin-orbit coupling", "There are no excited states available")
+        # Get a PySOC parser.
+        soc_calculator = pysoc.io.SOC.Calculator(self.log_file_path, rwf_file_name = self.auxiliary_files['rwf_file'])
+        soc_calculator.calculate()
+        SOC_table = soc_calculator.soc_td.SOC
+        # We'll split the SOC table given to use by PySOC to better match the format used by cclib.
+        socstates = []
+        socelements = []
+        for SOC_line in SOC_table:
+            # Add states.
+            socstates.append([SOC_line.singlet_state, SOC_line.triplet_state])
+            # Add coupling.
+            socelements.append([SOC_line.positive_one, SOC_line.zero, SOC_line.negative_one])
+        # Add to data.
+        self.data.socstates = socstates
+        self.data.socelements = socelements

digichem/parse/orca.py ADDED Viewed

@@ -0,0 +1,96 @@
+from digichem.parse.cclib import Cclib_parser
+import digichem.file.types as file_types
+import digichem.log
+class Orca_parser(Cclib_parser):
+    """
+    Top level class for parsing output from Gaussian log files.
+    """
+    # A dictionary of recognised auxiliary file types.
+    INPUT_FILE_TYPES = {
+            file_types.orca_gbw_file: "gbw_file",
+            file_types.orca_density_file: "density_file",
+        }
+    def parse_output_line(self, log_file, line):
+        """
+        Perform custom line-by-line parsing of an output file.
+        """
+        # Spin-orbit coupling.
+        # We're looking to populate two or three attributes
+        # 'socstates'   : A two-membered list of the singlet and triplet symbols that make up this coupling (eg, ["S(1)", "T(2)"].
+        # 'socenergies' : The total spin-orbit coupling value (RSS of socelements).
+        # 'socelements' : A three-membered list of the soc values for the triplet state with number +1, 0 and -1.
+        if "CALCULATED SOCME BETWEEN TRIPLETS AND SINGLETS" in line:
+            # Start of the SOC section.
+            # The same header is used for SOC in cartesian basis (x,y,z) and for individual triplet states (+1, 0, -1).
+            line = next(log_file)
+            line = next(log_file)
+            line = next(log_file)
+            soc_type = None
+            if "Z" in line and "X" in line and "Y in line":
+                # Cartesian SOC
+                # In this format we can parse total SOC only.
+                soc_type = "cartesian"
+            elif "0" in line and "-1" in line and "+1" in line:
+                # Triplet SOC.
+                # In this format we can parse individual SOC as well as total SOC.
+                soc_type = "triplet"
+            else:
+                pass
+                digichem.log.get_logger().debug("Unrecognised SOC section started by line '{}'".format(line))
+            if soc_type is not None:
+                # Reset our attributes.
+                self.data.socstates = []
+                self.data.socenergies = []
+                if soc_type == "triplet":
+                    self.data.socelements = []
+                elif hasattr(self.data, 'socelements'):
+                    delattr(self.data, 'socelements')
+                line = next(log_file)
+                line = next(log_file)
+                while line.strip() != "--------------------------------------------------------------------------------" and \
+                    line.strip() != "":
+                    # Each line is the coupling between one singlet state and one triplet state.
+                    # 1      1    (   0.00 ,    0.00)    (  -0.00 ,   -0.00)    (  -0.00 ,    0.00)
+                    split_line = line.split()
+                    try:
+                        triplet_index = int(split_line[0])
+                        singlet_index = int(split_line[1])
+                    except Exception:
+                        print(line)
+                    # Split on brackets to get each xyz/0, -1, +1 element.
+                    soc_elements = []
+                    soc_element_strings = line.split("(")[1:]
+                    for soc_element_string in soc_element_strings:
+                        # The last character will be the closing bracket, so we can discard.
+                        real, imagine = [float(ele) for ele in soc_element_string.strip()[:-1].split(",")]
+                        # We're not interested in the real or imaginary parts, just combine (root of the sum of the squares).
+                        soc_elements.append((real**2 + imagine**2)**0.5)
+                    # We now have everything we need.
+                    self.data.socstates.append(["S({})".format(singlet_index), "T({})".format(triplet_index)])
+                    self.data.socenergies.append((soc_elements[0]**2 + soc_elements[1]**2 + soc_elements[2]**2)**0.5)
+                    # Only add elements if they are triplet elements (not cartesian).
+                    if soc_type == "triplet":
+                        # The order of triplet states is different in Orca to what we expect.
+                        # We want +1, 0, -1
+                        # Orca is 0, -1, +1
+                        self.data.socelements.append([soc_elements[2], soc_elements[0], soc_elements[1]])
+                    line = next(log_file)

digichem/parse/turbomole.py ADDED Viewed

@@ -0,0 +1,201 @@
+# General imports.
+import re
+from datetime import timedelta, datetime
+import glob, pathlib
+import warnings
+# Digichem imports.
+from digichem.parse.cclib import Cclib_parser
+# Hidden imports.
+#from cclib.io.ccio import sort_turbomole_outputs
+class Turbomole_parser(Cclib_parser):
+    """
+    Top level class for parsing output from Turbomole files.
+    """
+    DAYS_REGEX = re.compile(r"([0-9.]*) days")
+    HOURS_REGEX = re.compile(r"([0-9.]*) hours")
+    MINUTES_REGEX = re.compile(r"([0-9.]*) minutes")
+    SECONDS_REGEX = re.compile(r"([0-9.]*) seconds")
+    @classmethod
+    def sort_log_files(self, log_files):
+        """
+        Sort a list of log files into a particular order, if required for this parser.
+        """
+        from cclib.io.ccio import sort_turbomole_outputs
+        return sort_turbomole_outputs(log_files)
+    def duration_to_timedelta(self, duration_str):
+        """
+        Convert a Turbomole duration string into an equivalent timedelta object.
+        """
+        time_parts = {'days': 0, 'hours': 0, 'minutes': 0, 'seconds': 0}
+        for time_part in time_parts:
+            # Use regex to look for each part in the string.
+            match = getattr(self, time_part.upper() + '_REGEX').search(duration_str)
+            if match:
+                time_parts[time_part] = float(match.group(1))
+        # Build a timedelta from our parts.
+        duration = timedelta(days = time_parts['days'], hours = time_parts['hours'], minutes = time_parts['minutes'], milliseconds = time_parts['seconds'] * 1000)
+        # All done.
+        return duration
+    def pre_parse(self):
+        """
+        Perform any setup before line-by-line parsing.
+        """
+        super().pre_parse()
+        # Look for duration information.
+        # Only bother doing this if we don't have timings from cclib.
+        if 'wall_time' not in self.data.metadata:
+            self.data.metadata['wall_time'] = []
+        if 'cpu_time' not in self.data.metadata:
+            self.data.metadata['cpu_time'] = []
+    def parse_output_line(self, log_file, line):
+        """
+        Perform custom line-by-line parsing of an output file.
+        """
+        # Only bother doing this if we don't have timings from cclib.
+        if 'wall_time' not in self.data.metadata or 'cpu_time' not in self.data.metadata:
+            # Look for duration.
+            if "total  cpu-time :" in line:
+                self.data.metadata['cpu_time'].append(self.duration_to_timedelta(line))
+            elif "total wall-time :" in line:
+                self.data.metadata['wall_time'].append(self.duration_to_timedelta(line))
+        # And also end date.
+        if ": all done  ****" in line:
+            # Skip 2 lines.
+            line = next(log_file)
+            line = next(log_file)
+            line = next(log_file)
+            try:
+                self.data.metadata['date'] = datetime.strptime(line.strip(), "%Y-%m-%d %H:%M:%S.%f").timestamp()
+            except ValueError:
+                # We couldn't parse.
+                pass
+    def post_parse(self):
+        """
+        Perform any required operations after line-by-line parsing.
+        """
+        super().post_parse()
+        # Delete our durations if they are zero.
+        if len(self.data.metadata['wall_time']) == 0:
+            del(self.data.metadata['wall_time'])
+        if len(self.data.metadata['cpu_time']) == 0:
+            del(self.data.metadata['cpu_time'])
+    def process(self, options):
+        """
+        Get a Result set object from this parser.
+        :param options: A Digichem options nested dictionary containing options to control parsing.
+        :return: The populated result set.
+        """
+        super().process(options)
+        # After processing is complete, have a look for excited state density files.
+        # These have the general file name:
+        # adcp2-xsdn-1a-001-total.cao
+        #  ^    ^     ^  ^------------ Excited state number (1, 2, 3 etc).
+        #  |    |     ---------------- Irrep (multiplicity and symmetry)
+        #  |    ---------------------- Excited state
+        #  --------------------------- Method (MP2, ADC(2), CC2).
+        #
+        # Each found density file will be stored under auxiliary_files['excited_state_cao_files'][state_symbol] where state_symbol is the corresponding state, eg S(1).
+        # If we have no excited states we can skip this altogether.
+        if len(self.results.excited_states) != 0:
+            for log_file in self.log_file_paths:
+                excited_densities = {}
+                state_num = 1
+                # Look for each numbered excited state until we run out of files.
+                while True:
+                    found_densities = glob.glob(str(pathlib.Path(log_file.parent, "*xsdn*{:03}*total*.cao".format(state_num))))
+                    # Sort results in case there is more than one result the same behaviour is encountered between multiple runs.
+                    found_densities.sort()
+                    if len(found_densities) > 0:
+                        try:
+                            # Get the state that corresponds to this file.
+                            excited_state = self.results.excited_states[state_num -1]
+                            excited_densities[excited_state.state_symbol] = pathlib.Path(found_densities[0])
+                            # Print a warning if there's more than one (because we don't know how to handle that scenario).
+                            if len(found_densities) > 1:
+                                warnings.warn("Found multiple excited state density files for state '{}' in Turbomole calculation directory '{}'; using file '{}' and ignoring '{}'".format(excited_state.state_symbol, log_file.parent, pathlib.Path(excited_densities[excited_state.state_symbol]).name, ", ".join([pathlib.Path(density).name for density in found_densities[1:]])))
+                        except IndexError:
+                            warnings.warn("Could not find excited state that corresponds to excited state density file '{}' with index {}; this file will be ignored".format(found_densities[0], state_num -1))
+                    if len(found_densities) == 0:
+                        # All done.
+                        break
+                    state_num += 1
+                # Update auxiliary_files with new files.
+                if len(excited_densities) > 0:
+                    try:
+                        self.results.metadata.auxiliary_files['excited_state_cao_files'].update(excited_densities)
+                    except KeyError:
+                        if 'excited_state_cao_files' not in self.results.metadata.auxiliary_files:
+                            self.results.metadata.auxiliary_files['excited_state_cao_files'] = excited_densities
+                        else:
+                            raise
+        return self.results
+    @classmethod
+    def find_auxiliary_files(self, hint):
+        """
+        Find auxiliary files from a given hint.
+        :param hint: A path to a file to use as a hint to find additional files.
+        :returns: A dictionary of found aux files.
+        """
+        auxiliary_files = super().find_auxiliary_files(hint)
+        parent = pathlib.Path(hint).parent
+        # Find .cao density files.
+        # First look for ground state density files.
+        # These have the general file name:
+        # mp2-gsdn-1a-000-total.cao
+        #  ^    ^   ^----------------- Irrep (multiplicity and symmetry)
+        #  |    ---------------------- Ground state
+        #  --------------------------- Method (MP2, ADC(2), CC2).
+        #
+        ground_densities = glob.glob(str(pathlib.Path(parent, "*gsdn*total*.cao")))
+        # Sort results in case there is more than one result the same behaviour is encountered between multiple runs.
+        ground_densities.sort()
+        if len(ground_densities) > 0:
+            auxiliary_files['ground_state_cao_file'] = pathlib.Path(ground_densities[0])
+        # Print a warning if there's more than one (because we don't know how to handle that scenario).
+        if len(ground_densities) > 1:
+            warnings.warn("Found multiple ground state density files in Turbomole calculation directory '{}'; using file '{}' and ignoring '{}'".format(parent, pathlib.Path(auxiliary_files['ground_state_cao_file']).name, ", ".join([pathlib.Path(density).name for density in ground_densities[1:]])))
+        #################################################################################################
+        # Note that excited state densities are also located, but this is done in the process() method. #
+        #################################################################################################
+        return auxiliary_files