digichem-core 6.0.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (111) hide show
  1. digichem/__init__.py +75 -0
  2. digichem/basis.py +116 -0
  3. digichem/config/README +3 -0
  4. digichem/config/__init__.py +5 -0
  5. digichem/config/base.py +321 -0
  6. digichem/config/locations.py +14 -0
  7. digichem/config/parse.py +90 -0
  8. digichem/config/util.py +117 -0
  9. digichem/data/README +4 -0
  10. digichem/data/batoms/COPYING +18 -0
  11. digichem/data/batoms/LICENSE +674 -0
  12. digichem/data/batoms/README +2 -0
  13. digichem/data/batoms/__init__.py +0 -0
  14. digichem/data/batoms/batoms-renderer.py +351 -0
  15. digichem/data/config/digichem.yaml +714 -0
  16. digichem/data/functionals.csv +15 -0
  17. digichem/data/solvents.csv +185 -0
  18. digichem/data/tachyon/COPYING.md +5 -0
  19. digichem/data/tachyon/LICENSE +30 -0
  20. digichem/data/tachyon/tachyon_LINUXAMD64 +0 -0
  21. digichem/data/vmd/common.tcl +468 -0
  22. digichem/data/vmd/generate_combined_orbital_images.tcl +70 -0
  23. digichem/data/vmd/generate_density_images.tcl +45 -0
  24. digichem/data/vmd/generate_dipole_images.tcl +68 -0
  25. digichem/data/vmd/generate_orbital_images.tcl +57 -0
  26. digichem/data/vmd/generate_spin_images.tcl +66 -0
  27. digichem/data/vmd/generate_structure_images.tcl +40 -0
  28. digichem/datas.py +14 -0
  29. digichem/exception/__init__.py +7 -0
  30. digichem/exception/base.py +133 -0
  31. digichem/exception/uncatchable.py +63 -0
  32. digichem/file/__init__.py +1 -0
  33. digichem/file/base.py +364 -0
  34. digichem/file/cube.py +284 -0
  35. digichem/file/fchk.py +94 -0
  36. digichem/file/prattle.py +277 -0
  37. digichem/file/types.py +97 -0
  38. digichem/image/__init__.py +6 -0
  39. digichem/image/base.py +113 -0
  40. digichem/image/excited_states.py +335 -0
  41. digichem/image/graph.py +293 -0
  42. digichem/image/orbitals.py +239 -0
  43. digichem/image/render.py +617 -0
  44. digichem/image/spectroscopy.py +797 -0
  45. digichem/image/structure.py +115 -0
  46. digichem/image/vmd.py +826 -0
  47. digichem/input/__init__.py +3 -0
  48. digichem/input/base.py +78 -0
  49. digichem/input/digichem_input.py +500 -0
  50. digichem/input/gaussian.py +140 -0
  51. digichem/log.py +179 -0
  52. digichem/memory.py +166 -0
  53. digichem/misc/__init__.py +4 -0
  54. digichem/misc/argparse.py +44 -0
  55. digichem/misc/base.py +61 -0
  56. digichem/misc/io.py +239 -0
  57. digichem/misc/layered_dict.py +285 -0
  58. digichem/misc/text.py +139 -0
  59. digichem/misc/time.py +73 -0
  60. digichem/parse/__init__.py +13 -0
  61. digichem/parse/base.py +220 -0
  62. digichem/parse/cclib.py +138 -0
  63. digichem/parse/dump.py +253 -0
  64. digichem/parse/gaussian.py +130 -0
  65. digichem/parse/orca.py +96 -0
  66. digichem/parse/turbomole.py +201 -0
  67. digichem/parse/util.py +523 -0
  68. digichem/result/__init__.py +6 -0
  69. digichem/result/alignment/AA.py +114 -0
  70. digichem/result/alignment/AAA.py +61 -0
  71. digichem/result/alignment/FAP.py +148 -0
  72. digichem/result/alignment/__init__.py +3 -0
  73. digichem/result/alignment/base.py +310 -0
  74. digichem/result/angle.py +153 -0
  75. digichem/result/atom.py +742 -0
  76. digichem/result/base.py +258 -0
  77. digichem/result/dipole_moment.py +332 -0
  78. digichem/result/emission.py +402 -0
  79. digichem/result/energy.py +323 -0
  80. digichem/result/excited_state.py +821 -0
  81. digichem/result/ground_state.py +94 -0
  82. digichem/result/metadata.py +644 -0
  83. digichem/result/multi.py +98 -0
  84. digichem/result/nmr.py +1086 -0
  85. digichem/result/orbital.py +647 -0
  86. digichem/result/result.py +244 -0
  87. digichem/result/soc.py +272 -0
  88. digichem/result/spectroscopy.py +514 -0
  89. digichem/result/tdm.py +267 -0
  90. digichem/result/vibration.py +167 -0
  91. digichem/test/__init__.py +6 -0
  92. digichem/test/conftest.py +4 -0
  93. digichem/test/test_basis.py +71 -0
  94. digichem/test/test_calculate.py +30 -0
  95. digichem/test/test_config.py +78 -0
  96. digichem/test/test_cube.py +369 -0
  97. digichem/test/test_exception.py +16 -0
  98. digichem/test/test_file.py +104 -0
  99. digichem/test/test_image.py +337 -0
  100. digichem/test/test_input.py +64 -0
  101. digichem/test/test_parsing.py +79 -0
  102. digichem/test/test_prattle.py +36 -0
  103. digichem/test/test_result.py +489 -0
  104. digichem/test/test_translate.py +112 -0
  105. digichem/test/util.py +207 -0
  106. digichem/translate.py +591 -0
  107. digichem_core-6.0.0rc1.dist-info/METADATA +96 -0
  108. digichem_core-6.0.0rc1.dist-info/RECORD +111 -0
  109. digichem_core-6.0.0rc1.dist-info/WHEEL +4 -0
  110. digichem_core-6.0.0rc1.dist-info/licenses/COPYING.md +10 -0
  111. digichem_core-6.0.0rc1.dist-info/licenses/LICENSE +11 -0
@@ -0,0 +1,130 @@
1
+ # General imports.
2
+ from datetime import datetime, timedelta
3
+
4
+ from digichem.exception.base import Result_unavailable_error
5
+
6
+ # Digichem imports.
7
+ from digichem.parse.cclib import Cclib_parser
8
+ import digichem.log
9
+ import digichem.file.types as file_types
10
+
11
+ # Hidden imports.
12
+ #import pysoc.io.SOC
13
+
14
+ class Gaussian_parser(Cclib_parser):
15
+ """
16
+ Top level class for parsing output from Gaussian log files.
17
+ """
18
+
19
+ # A dictionary of recognised auxiliary file types.
20
+ INPUT_FILE_TYPES = {
21
+ file_types.gaussian_chk_file: "chk_file",
22
+ file_types.gaussian_fchk_file: "fchk_file",
23
+ file_types.gaussian_rwf_file: "rwf_file"
24
+ }
25
+
26
+ # Headers for date strings.
27
+ DATE_HEADER = "Normal termination of"
28
+ ELAPSED_TIME_HEADER = "Elapsed time:"
29
+ CPU_TIME_HEADER = "Job cpu time:"
30
+ CPU_HEADER = "Will use up to"
31
+
32
+ def parse_metadata(self):
33
+ """
34
+ Parse additional calculation metadata.
35
+ """
36
+ super().parse_metadata()
37
+
38
+ def pre_parse(self):
39
+ """
40
+ Perform any setup before line-by-line parsing.
41
+ """
42
+ super().pre_parse()
43
+ # Assume we used 1 CPU if not otherwise clear (is this a good idea?)
44
+ self.data.metadata['num_cpus'] = 1
45
+
46
+ self.wall_time = []
47
+ self.cpu_time = []
48
+
49
+ def parse_output_line(self, log_file, line):
50
+ """
51
+ Perform custom line-by-line parsing of an output file.
52
+ """
53
+ # Although we only need the last ~5 lines from the (possibly huge) log file, we read all the way through because negative seek()ing is tricky.
54
+ # Look for our key string.
55
+ if self.DATE_HEADER in line:
56
+ # This line looks like: "Normal termination of Gaussian 16 at Sun Dec 6 19:13:09 2020"
57
+ date_str = " ".join(line.split()[-4:])
58
+ self.data.metadata['date'] = datetime.strptime(date_str, "%b %d %H:%M:%S %Y.").timestamp()
59
+
60
+ elif self.ELAPSED_TIME_HEADER in line:
61
+ # This line looks like: "Elapsed time: 0 days 2 hours 38 minutes 50.9 seconds."
62
+ datey = line.split()[-8:]
63
+ self.wall_time.append(timedelta(days = int(datey[0]), hours = int(datey[2]), minutes = int(datey[4]), seconds = float(datey[6])).total_seconds())
64
+
65
+ elif self.CPU_TIME_HEADER in line:
66
+ # This line looks like: "Job cpu time: 0 days 20 hours 52 minutes 17.3 seconds."
67
+ datey = line.split()[-8:]
68
+ self.cpu_time.append(timedelta(days = int(datey[0]), hours = int(datey[2]), minutes = int(datey[4]), seconds = float(datey[6])).total_seconds())
69
+
70
+ elif self.CPU_HEADER in line:
71
+ # This line looks like: "Will use up to 10 processors via shared memory."
72
+ self.data.metadata['num_cpus'] = int(line.split()[4])
73
+
74
+ def post_parse(self):
75
+ """
76
+ Perform any required operations after line-by-line parsing.
77
+ """
78
+ super().post_parse()
79
+
80
+ if 'wall_time' not in self.data.metadata and len(self.wall_time) != 0:
81
+ self.data.metadata['wall_time'] = self.wall_time
82
+
83
+ if 'cpu_time' not in self.data.metadata and len(self.cpu_time) != 0:
84
+ self.data.metadata['cpu_time'] = self.cpu_time
85
+
86
+ # Get SOC.
87
+ # Next try and get SOC.
88
+ try:
89
+ self.calculate_SOC()
90
+
91
+ except Exception:
92
+ digichem.log.get_logger().debug("Cannot calculate spin-orbit-coupling from output file '{}'".format(self.log_file_path), exc_info = True)
93
+
94
+ def calculate_SOC(self):
95
+ """
96
+ Parse spin-orbit coupling using PySOC.
97
+ """
98
+ try:
99
+ import pysoc.io.SOC
100
+
101
+ except Exception as e:
102
+ raise Result_unavailable_error("Spin-orbit coupling", "PySOC is not available") from e
103
+
104
+ # For SOC, we need both .log and .rwf file.
105
+ # No need to check for these tho; pysoc does that for us.
106
+ # We also need etsyms to decide which excited state is which.
107
+ if not hasattr(self.data, "etsyms"):
108
+ raise Result_unavailable_error("Spin-orbit coupling", "There are no excited states available")
109
+
110
+ # Get a PySOC parser.
111
+ soc_calculator = pysoc.io.SOC.Calculator(self.log_file_path, rwf_file_name = self.auxiliary_files['rwf_file'])
112
+ soc_calculator.calculate()
113
+ SOC_table = soc_calculator.soc_td.SOC
114
+
115
+ # We'll split the SOC table given to use by PySOC to better match the format used by cclib.
116
+ socstates = []
117
+ socelements = []
118
+
119
+ for SOC_line in SOC_table:
120
+ # Add states.
121
+ socstates.append([SOC_line.singlet_state, SOC_line.triplet_state])
122
+
123
+ # Add coupling.
124
+ socelements.append([SOC_line.positive_one, SOC_line.zero, SOC_line.negative_one])
125
+
126
+ # Add to data.
127
+ self.data.socstates = socstates
128
+ self.data.socelements = socelements
129
+
130
+
digichem/parse/orca.py ADDED
@@ -0,0 +1,96 @@
1
+ from digichem.parse.cclib import Cclib_parser
2
+ import digichem.file.types as file_types
3
+ import digichem.log
4
+
5
+
6
+ class Orca_parser(Cclib_parser):
7
+ """
8
+ Top level class for parsing output from Gaussian log files.
9
+ """
10
+
11
+ # A dictionary of recognised auxiliary file types.
12
+ INPUT_FILE_TYPES = {
13
+ file_types.orca_gbw_file: "gbw_file",
14
+ file_types.orca_density_file: "density_file",
15
+ }
16
+
17
+ def parse_output_line(self, log_file, line):
18
+ """
19
+ Perform custom line-by-line parsing of an output file.
20
+ """
21
+
22
+ # Spin-orbit coupling.
23
+ # We're looking to populate two or three attributes
24
+ # 'socstates' : A two-membered list of the singlet and triplet symbols that make up this coupling (eg, ["S(1)", "T(2)"].
25
+ # 'socenergies' : The total spin-orbit coupling value (RSS of socelements).
26
+ # 'socelements' : A three-membered list of the soc values for the triplet state with number +1, 0 and -1.
27
+ if "CALCULATED SOCME BETWEEN TRIPLETS AND SINGLETS" in line:
28
+ # Start of the SOC section.
29
+ # The same header is used for SOC in cartesian basis (x,y,z) and for individual triplet states (+1, 0, -1).
30
+ line = next(log_file)
31
+ line = next(log_file)
32
+ line = next(log_file)
33
+
34
+ soc_type = None
35
+ if "Z" in line and "X" in line and "Y in line":
36
+ # Cartesian SOC
37
+ # In this format we can parse total SOC only.
38
+ soc_type = "cartesian"
39
+ elif "0" in line and "-1" in line and "+1" in line:
40
+ # Triplet SOC.
41
+ # In this format we can parse individual SOC as well as total SOC.
42
+ soc_type = "triplet"
43
+
44
+ else:
45
+ pass
46
+ digichem.log.get_logger().debug("Unrecognised SOC section started by line '{}'".format(line))
47
+
48
+ if soc_type is not None:
49
+ # Reset our attributes.
50
+ self.data.socstates = []
51
+ self.data.socenergies = []
52
+ if soc_type == "triplet":
53
+ self.data.socelements = []
54
+ elif hasattr(self.data, 'socelements'):
55
+ delattr(self.data, 'socelements')
56
+
57
+ line = next(log_file)
58
+ line = next(log_file)
59
+
60
+ while line.strip() != "--------------------------------------------------------------------------------" and \
61
+ line.strip() != "":
62
+ # Each line is the coupling between one singlet state and one triplet state.
63
+ # 1 1 ( 0.00 , 0.00) ( -0.00 , -0.00) ( -0.00 , 0.00)
64
+ split_line = line.split()
65
+ try:
66
+ triplet_index = int(split_line[0])
67
+ singlet_index = int(split_line[1])
68
+
69
+ except Exception:
70
+ print(line)
71
+
72
+ # Split on brackets to get each xyz/0, -1, +1 element.
73
+ soc_elements = []
74
+ soc_element_strings = line.split("(")[1:]
75
+
76
+ for soc_element_string in soc_element_strings:
77
+ # The last character will be the closing bracket, so we can discard.
78
+ real, imagine = [float(ele) for ele in soc_element_string.strip()[:-1].split(",")]
79
+
80
+ # We're not interested in the real or imaginary parts, just combine (root of the sum of the squares).
81
+ soc_elements.append((real**2 + imagine**2)**0.5)
82
+
83
+ # We now have everything we need.
84
+ self.data.socstates.append(["S({})".format(singlet_index), "T({})".format(triplet_index)])
85
+ self.data.socenergies.append((soc_elements[0]**2 + soc_elements[1]**2 + soc_elements[2]**2)**0.5)
86
+
87
+ # Only add elements if they are triplet elements (not cartesian).
88
+ if soc_type == "triplet":
89
+ # The order of triplet states is different in Orca to what we expect.
90
+ # We want +1, 0, -1
91
+ # Orca is 0, -1, +1
92
+ self.data.socelements.append([soc_elements[2], soc_elements[0], soc_elements[1]])
93
+
94
+ line = next(log_file)
95
+
96
+
@@ -0,0 +1,201 @@
1
+ # General imports.
2
+ import re
3
+ from datetime import timedelta, datetime
4
+ import glob, pathlib
5
+ import warnings
6
+
7
+ # Digichem imports.
8
+ from digichem.parse.cclib import Cclib_parser
9
+
10
+ # Hidden imports.
11
+ #from cclib.io.ccio import sort_turbomole_outputs
12
+
13
+
14
+ class Turbomole_parser(Cclib_parser):
15
+ """
16
+ Top level class for parsing output from Turbomole files.
17
+ """
18
+ DAYS_REGEX = re.compile(r"([0-9.]*) days")
19
+ HOURS_REGEX = re.compile(r"([0-9.]*) hours")
20
+ MINUTES_REGEX = re.compile(r"([0-9.]*) minutes")
21
+ SECONDS_REGEX = re.compile(r"([0-9.]*) seconds")
22
+
23
+ @classmethod
24
+ def sort_log_files(self, log_files):
25
+ """
26
+ Sort a list of log files into a particular order, if required for this parser.
27
+ """
28
+ from cclib.io.ccio import sort_turbomole_outputs
29
+
30
+ return sort_turbomole_outputs(log_files)
31
+
32
+ def duration_to_timedelta(self, duration_str):
33
+ """
34
+ Convert a Turbomole duration string into an equivalent timedelta object.
35
+ """
36
+ time_parts = {'days': 0, 'hours': 0, 'minutes': 0, 'seconds': 0}
37
+
38
+ for time_part in time_parts:
39
+ # Use regex to look for each part in the string.
40
+ match = getattr(self, time_part.upper() + '_REGEX').search(duration_str)
41
+ if match:
42
+ time_parts[time_part] = float(match.group(1))
43
+
44
+ # Build a timedelta from our parts.
45
+ duration = timedelta(days = time_parts['days'], hours = time_parts['hours'], minutes = time_parts['minutes'], milliseconds = time_parts['seconds'] * 1000)
46
+
47
+ # All done.
48
+ return duration
49
+
50
+ def pre_parse(self):
51
+ """
52
+ Perform any setup before line-by-line parsing.
53
+ """
54
+ super().pre_parse()
55
+ # Look for duration information.
56
+ # Only bother doing this if we don't have timings from cclib.
57
+ if 'wall_time' not in self.data.metadata:
58
+ self.data.metadata['wall_time'] = []
59
+
60
+ if 'cpu_time' not in self.data.metadata:
61
+ self.data.metadata['cpu_time'] = []
62
+
63
+ def parse_output_line(self, log_file, line):
64
+ """
65
+ Perform custom line-by-line parsing of an output file.
66
+ """
67
+ # Only bother doing this if we don't have timings from cclib.
68
+ if 'wall_time' not in self.data.metadata or 'cpu_time' not in self.data.metadata:
69
+ # Look for duration.
70
+ if "total cpu-time :" in line:
71
+ self.data.metadata['cpu_time'].append(self.duration_to_timedelta(line))
72
+
73
+ elif "total wall-time :" in line:
74
+ self.data.metadata['wall_time'].append(self.duration_to_timedelta(line))
75
+
76
+ # And also end date.
77
+ if ": all done ****" in line:
78
+ # Skip 2 lines.
79
+ line = next(log_file)
80
+ line = next(log_file)
81
+ line = next(log_file)
82
+ try:
83
+ self.data.metadata['date'] = datetime.strptime(line.strip(), "%Y-%m-%d %H:%M:%S.%f").timestamp()
84
+ except ValueError:
85
+ # We couldn't parse.
86
+ pass
87
+
88
+ def post_parse(self):
89
+ """
90
+ Perform any required operations after line-by-line parsing.
91
+ """
92
+ super().post_parse()
93
+
94
+ # Delete our durations if they are zero.
95
+ if len(self.data.metadata['wall_time']) == 0:
96
+ del(self.data.metadata['wall_time'])
97
+
98
+ if len(self.data.metadata['cpu_time']) == 0:
99
+ del(self.data.metadata['cpu_time'])
100
+
101
+ def process(self, options):
102
+ """
103
+ Get a Result set object from this parser.
104
+
105
+ :param options: A Digichem options nested dictionary containing options to control parsing.
106
+ :return: The populated result set.
107
+ """
108
+ super().process(options)
109
+
110
+ # After processing is complete, have a look for excited state density files.
111
+ # These have the general file name:
112
+ # adcp2-xsdn-1a-001-total.cao
113
+ # ^ ^ ^ ^------------ Excited state number (1, 2, 3 etc).
114
+ # | | ---------------- Irrep (multiplicity and symmetry)
115
+ # | ---------------------- Excited state
116
+ # --------------------------- Method (MP2, ADC(2), CC2).
117
+ #
118
+ # Each found density file will be stored under auxiliary_files['excited_state_cao_files'][state_symbol] where state_symbol is the corresponding state, eg S(1).
119
+ # If we have no excited states we can skip this altogether.
120
+ if len(self.results.excited_states) != 0:
121
+ for log_file in self.log_file_paths:
122
+ excited_densities = {}
123
+ state_num = 1
124
+ # Look for each numbered excited state until we run out of files.
125
+ while True:
126
+ found_densities = glob.glob(str(pathlib.Path(log_file.parent, "*xsdn*{:03}*total*.cao".format(state_num))))
127
+
128
+ # Sort results in case there is more than one result the same behaviour is encountered between multiple runs.
129
+ found_densities.sort()
130
+
131
+ if len(found_densities) > 0:
132
+ try:
133
+ # Get the state that corresponds to this file.
134
+ excited_state = self.results.excited_states[state_num -1]
135
+ excited_densities[excited_state.state_symbol] = pathlib.Path(found_densities[0])
136
+
137
+ # Print a warning if there's more than one (because we don't know how to handle that scenario).
138
+ if len(found_densities) > 1:
139
+ warnings.warn("Found multiple excited state density files for state '{}' in Turbomole calculation directory '{}'; using file '{}' and ignoring '{}'".format(excited_state.state_symbol, log_file.parent, pathlib.Path(excited_densities[excited_state.state_symbol]).name, ", ".join([pathlib.Path(density).name for density in found_densities[1:]])))
140
+
141
+ except IndexError:
142
+ warnings.warn("Could not find excited state that corresponds to excited state density file '{}' with index {}; this file will be ignored".format(found_densities[0], state_num -1))
143
+
144
+ if len(found_densities) == 0:
145
+ # All done.
146
+ break
147
+
148
+ state_num += 1
149
+
150
+ # Update auxiliary_files with new files.
151
+ if len(excited_densities) > 0:
152
+ try:
153
+ self.results.metadata.auxiliary_files['excited_state_cao_files'].update(excited_densities)
154
+
155
+ except KeyError:
156
+ if 'excited_state_cao_files' not in self.results.metadata.auxiliary_files:
157
+ self.results.metadata.auxiliary_files['excited_state_cao_files'] = excited_densities
158
+ else:
159
+ raise
160
+
161
+ return self.results
162
+
163
+ @classmethod
164
+ def find_auxiliary_files(self, hint):
165
+ """
166
+ Find auxiliary files from a given hint.
167
+
168
+ :param hint: A path to a file to use as a hint to find additional files.
169
+ :returns: A dictionary of found aux files.
170
+ """
171
+ auxiliary_files = super().find_auxiliary_files(hint)
172
+
173
+ parent = pathlib.Path(hint).parent
174
+
175
+ # Find .cao density files.
176
+ # First look for ground state density files.
177
+ # These have the general file name:
178
+ # mp2-gsdn-1a-000-total.cao
179
+ # ^ ^ ^----------------- Irrep (multiplicity and symmetry)
180
+ # | ---------------------- Ground state
181
+ # --------------------------- Method (MP2, ADC(2), CC2).
182
+ #
183
+ ground_densities = glob.glob(str(pathlib.Path(parent, "*gsdn*total*.cao")))
184
+
185
+ # Sort results in case there is more than one result the same behaviour is encountered between multiple runs.
186
+ ground_densities.sort()
187
+
188
+ if len(ground_densities) > 0:
189
+ auxiliary_files['ground_state_cao_file'] = pathlib.Path(ground_densities[0])
190
+
191
+ # Print a warning if there's more than one (because we don't know how to handle that scenario).
192
+ if len(ground_densities) > 1:
193
+ warnings.warn("Found multiple ground state density files in Turbomole calculation directory '{}'; using file '{}' and ignoring '{}'".format(parent, pathlib.Path(auxiliary_files['ground_state_cao_file']).name, ", ".join([pathlib.Path(density).name for density in ground_densities[1:]])))
194
+
195
+ #################################################################################################
196
+ # Note that excited state densities are also located, but this is done in the process() method. #
197
+ #################################################################################################
198
+
199
+ return auxiliary_files
200
+
201
+