digichem-core 6.0.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- digichem/__init__.py +75 -0
- digichem/basis.py +116 -0
- digichem/config/README +3 -0
- digichem/config/__init__.py +5 -0
- digichem/config/base.py +321 -0
- digichem/config/locations.py +14 -0
- digichem/config/parse.py +90 -0
- digichem/config/util.py +117 -0
- digichem/data/README +4 -0
- digichem/data/batoms/COPYING +18 -0
- digichem/data/batoms/LICENSE +674 -0
- digichem/data/batoms/README +2 -0
- digichem/data/batoms/__init__.py +0 -0
- digichem/data/batoms/batoms-renderer.py +351 -0
- digichem/data/config/digichem.yaml +714 -0
- digichem/data/functionals.csv +15 -0
- digichem/data/solvents.csv +185 -0
- digichem/data/tachyon/COPYING.md +5 -0
- digichem/data/tachyon/LICENSE +30 -0
- digichem/data/tachyon/tachyon_LINUXAMD64 +0 -0
- digichem/data/vmd/common.tcl +468 -0
- digichem/data/vmd/generate_combined_orbital_images.tcl +70 -0
- digichem/data/vmd/generate_density_images.tcl +45 -0
- digichem/data/vmd/generate_dipole_images.tcl +68 -0
- digichem/data/vmd/generate_orbital_images.tcl +57 -0
- digichem/data/vmd/generate_spin_images.tcl +66 -0
- digichem/data/vmd/generate_structure_images.tcl +40 -0
- digichem/datas.py +14 -0
- digichem/exception/__init__.py +7 -0
- digichem/exception/base.py +133 -0
- digichem/exception/uncatchable.py +63 -0
- digichem/file/__init__.py +1 -0
- digichem/file/base.py +364 -0
- digichem/file/cube.py +284 -0
- digichem/file/fchk.py +94 -0
- digichem/file/prattle.py +277 -0
- digichem/file/types.py +97 -0
- digichem/image/__init__.py +6 -0
- digichem/image/base.py +113 -0
- digichem/image/excited_states.py +335 -0
- digichem/image/graph.py +293 -0
- digichem/image/orbitals.py +239 -0
- digichem/image/render.py +617 -0
- digichem/image/spectroscopy.py +797 -0
- digichem/image/structure.py +115 -0
- digichem/image/vmd.py +826 -0
- digichem/input/__init__.py +3 -0
- digichem/input/base.py +78 -0
- digichem/input/digichem_input.py +500 -0
- digichem/input/gaussian.py +140 -0
- digichem/log.py +179 -0
- digichem/memory.py +166 -0
- digichem/misc/__init__.py +4 -0
- digichem/misc/argparse.py +44 -0
- digichem/misc/base.py +61 -0
- digichem/misc/io.py +239 -0
- digichem/misc/layered_dict.py +285 -0
- digichem/misc/text.py +139 -0
- digichem/misc/time.py +73 -0
- digichem/parse/__init__.py +13 -0
- digichem/parse/base.py +220 -0
- digichem/parse/cclib.py +138 -0
- digichem/parse/dump.py +253 -0
- digichem/parse/gaussian.py +130 -0
- digichem/parse/orca.py +96 -0
- digichem/parse/turbomole.py +201 -0
- digichem/parse/util.py +523 -0
- digichem/result/__init__.py +6 -0
- digichem/result/alignment/AA.py +114 -0
- digichem/result/alignment/AAA.py +61 -0
- digichem/result/alignment/FAP.py +148 -0
- digichem/result/alignment/__init__.py +3 -0
- digichem/result/alignment/base.py +310 -0
- digichem/result/angle.py +153 -0
- digichem/result/atom.py +742 -0
- digichem/result/base.py +258 -0
- digichem/result/dipole_moment.py +332 -0
- digichem/result/emission.py +402 -0
- digichem/result/energy.py +323 -0
- digichem/result/excited_state.py +821 -0
- digichem/result/ground_state.py +94 -0
- digichem/result/metadata.py +644 -0
- digichem/result/multi.py +98 -0
- digichem/result/nmr.py +1086 -0
- digichem/result/orbital.py +647 -0
- digichem/result/result.py +244 -0
- digichem/result/soc.py +272 -0
- digichem/result/spectroscopy.py +514 -0
- digichem/result/tdm.py +267 -0
- digichem/result/vibration.py +167 -0
- digichem/test/__init__.py +6 -0
- digichem/test/conftest.py +4 -0
- digichem/test/test_basis.py +71 -0
- digichem/test/test_calculate.py +30 -0
- digichem/test/test_config.py +78 -0
- digichem/test/test_cube.py +369 -0
- digichem/test/test_exception.py +16 -0
- digichem/test/test_file.py +104 -0
- digichem/test/test_image.py +337 -0
- digichem/test/test_input.py +64 -0
- digichem/test/test_parsing.py +79 -0
- digichem/test/test_prattle.py +36 -0
- digichem/test/test_result.py +489 -0
- digichem/test/test_translate.py +112 -0
- digichem/test/util.py +207 -0
- digichem/translate.py +591 -0
- digichem_core-6.0.0rc1.dist-info/METADATA +96 -0
- digichem_core-6.0.0rc1.dist-info/RECORD +111 -0
- digichem_core-6.0.0rc1.dist-info/WHEEL +4 -0
- digichem_core-6.0.0rc1.dist-info/licenses/COPYING.md +10 -0
- digichem_core-6.0.0rc1.dist-info/licenses/LICENSE +11 -0
digichem/input/base.py
ADDED
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
|
|
3
|
+
class Input_file():
|
|
4
|
+
"""
|
|
5
|
+
ABC for classes that support various input format types.
|
|
6
|
+
|
|
7
|
+
In most cases the concrete class digichem.input.Digichem_coords is what you are likely looking for.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
def __init__(self, charge = None, multiplicity = None, name = None, file_name = None, history = None):
|
|
11
|
+
"""
|
|
12
|
+
Abstract constructor for Input_file classes.
|
|
13
|
+
|
|
14
|
+
:param charge: An explicit molecular charge (as an integer). If not given, a charge may be interpreted from other properties of the input file.
|
|
15
|
+
:param multiplicity: An explicit molecular multiplicity (as an integer). If not given, a multiplicity may be interpreted from other properties of the input file.
|
|
16
|
+
:param name: An explicit molecule name. If not given, a name may be interpreted from other properties of the input file.
|
|
17
|
+
:param file_name: The path to the file from which this object was created. If this Input_file was not created from a file (eg, from memory instead), this should be None.
|
|
18
|
+
:param history: The SHA of the previous calculation (if applicable) from which these coordinates were taken.
|
|
19
|
+
"""
|
|
20
|
+
if charge is not None and not isinstance(charge, int):
|
|
21
|
+
raise TypeError("Charge must be an integer (or None)")
|
|
22
|
+
if multiplicity is not None and not isinstance(multiplicity, int):
|
|
23
|
+
raise TypeError("Multiplicity must be an integer (or None)")
|
|
24
|
+
|
|
25
|
+
self.charge = charge
|
|
26
|
+
self.multiplicity = multiplicity
|
|
27
|
+
self.name = name
|
|
28
|
+
# TOOD: This should be called file_path
|
|
29
|
+
self.file_name = Path(file_name) if file_name is not None else None
|
|
30
|
+
self.history = history
|
|
31
|
+
|
|
32
|
+
def dump(self):
|
|
33
|
+
"""
|
|
34
|
+
Get this input file as a dict.
|
|
35
|
+
"""
|
|
36
|
+
return {
|
|
37
|
+
'name': self.name,
|
|
38
|
+
'charge': self.charge,
|
|
39
|
+
'multiplicity': self.multiplicity,
|
|
40
|
+
'history': self.history,
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
@property
|
|
44
|
+
def implicit_name(self):
|
|
45
|
+
"""
|
|
46
|
+
A more intelligent name for this molecule, taking into account our old file name if necessary.
|
|
47
|
+
"""
|
|
48
|
+
# If a real name wasn't given, but a file name was, use it.
|
|
49
|
+
if self.name is None and self.file_name is not None:
|
|
50
|
+
name = self.file_name.with_suffix("").name
|
|
51
|
+
|
|
52
|
+
elif self.name is not None:
|
|
53
|
+
name = self.name
|
|
54
|
+
|
|
55
|
+
else:
|
|
56
|
+
name = "molecule"
|
|
57
|
+
|
|
58
|
+
return name
|
|
59
|
+
|
|
60
|
+
@property
|
|
61
|
+
def implicit_charge(self):
|
|
62
|
+
"""
|
|
63
|
+
The charge of the molecule/system, accounting for cases where no explicit charge is set.
|
|
64
|
+
"""
|
|
65
|
+
if self.charge is None:
|
|
66
|
+
return 0
|
|
67
|
+
else:
|
|
68
|
+
return self.charge
|
|
69
|
+
|
|
70
|
+
@property
|
|
71
|
+
def implicit_multiplicity(self):
|
|
72
|
+
"""
|
|
73
|
+
The multiplicity (as an integer) of the molecule/system, accounting for cases where no explicit multiplicity is set.
|
|
74
|
+
"""
|
|
75
|
+
if self.multiplicity is None:
|
|
76
|
+
return 1
|
|
77
|
+
else:
|
|
78
|
+
return self.multiplicity
|
|
@@ -0,0 +1,500 @@
|
|
|
1
|
+
# General imports.
|
|
2
|
+
import yaml
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
import packaging.version
|
|
5
|
+
import dill
|
|
6
|
+
import periodictable
|
|
7
|
+
|
|
8
|
+
# Digichem imports.
|
|
9
|
+
from digichem.exception.base import Digichem_exception
|
|
10
|
+
from digichem.input import Gaussian_input_parser
|
|
11
|
+
from digichem.file.prattle import Openprattle_converter, Oprattle_formats
|
|
12
|
+
import digichem.log
|
|
13
|
+
from digichem.input.base import Input_file
|
|
14
|
+
from digichem.parse.util import parse_calculation, open_for_parsing
|
|
15
|
+
import digichem.config
|
|
16
|
+
from digichem.result.atom import Molecule_mixin
|
|
17
|
+
|
|
18
|
+
# Custom formats to allow literal strings in yaml output.
|
|
19
|
+
# Adapted from https://stackoverflow.com/questions/6432605/any-yaml-libraries-in-python-that-support-dumping-of-long-strings-as-block-liter
|
|
20
|
+
|
|
21
|
+
# Custom 'str' class that will be dumped literally.
|
|
22
|
+
class literal_str(str): pass
|
|
23
|
+
# The dumper (which recognises the literal_str class).
|
|
24
|
+
def literal_str_representer(dumper, data):
|
|
25
|
+
return dumper.represent_scalar('tag:yaml.org,2002:str', data, style='|')
|
|
26
|
+
# Add to yaml.
|
|
27
|
+
yaml.add_representer(literal_str, literal_str_representer)
|
|
28
|
+
|
|
29
|
+
class flow_mapping(dict): pass
|
|
30
|
+
def flow_mapping_representer(dumper, data):
|
|
31
|
+
return dumper.represent_mapping( u'tag:yaml.org,2002:map', data, flow_style=True )
|
|
32
|
+
|
|
33
|
+
yaml.add_representer(flow_mapping, flow_mapping_representer)
|
|
34
|
+
|
|
35
|
+
###########
|
|
36
|
+
# Classes #
|
|
37
|
+
###########
|
|
38
|
+
|
|
39
|
+
class Digichem_coords_ABC(Input_file, Molecule_mixin):
|
|
40
|
+
"""
|
|
41
|
+
ABC for classes that represents an input file in the digichem input (.si) format.
|
|
42
|
+
"""
|
|
43
|
+
|
|
44
|
+
@classmethod
|
|
45
|
+
def from_xyz(self, geometry, **kwargs):
|
|
46
|
+
"""
|
|
47
|
+
Create a Digichem_coords object from a molecule in xyz format.
|
|
48
|
+
|
|
49
|
+
:param geometry: The input geometry in xyz format.
|
|
50
|
+
:param charge: The molecular charge.
|
|
51
|
+
:param multiplicity: The molecular multiplicity (as an integer).
|
|
52
|
+
:param name: Name of the system/molecule
|
|
53
|
+
"""
|
|
54
|
+
raise NotImplementedError("Implement in subclass")
|
|
55
|
+
|
|
56
|
+
@classmethod
|
|
57
|
+
def from_result(self, result, **kwargs):
|
|
58
|
+
if not kwargs.get('charge'):
|
|
59
|
+
kwargs['charge'] = result.atoms.charge
|
|
60
|
+
|
|
61
|
+
if not kwargs.get('multiplicity'):
|
|
62
|
+
kwargs['multiplicity'] = result.metadata.multiplicity
|
|
63
|
+
|
|
64
|
+
if not kwargs.get('history'):
|
|
65
|
+
# Note it's not the history of the old calc we want here, this old calc IS our new history.
|
|
66
|
+
kwargs['history'] = result._id
|
|
67
|
+
|
|
68
|
+
if not kwargs.get('file_name'):
|
|
69
|
+
kwargs['file_name'] = result.metadata.log_files[0]
|
|
70
|
+
|
|
71
|
+
if not kwargs.get('name'):
|
|
72
|
+
kwargs['name'] = result.metadata.name
|
|
73
|
+
|
|
74
|
+
return self.from_xyz(
|
|
75
|
+
result.atoms.to_xyz(),
|
|
76
|
+
**kwargs
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
def dump(self):
|
|
80
|
+
"""
|
|
81
|
+
Get this input file as a dict.
|
|
82
|
+
"""
|
|
83
|
+
dic = {
|
|
84
|
+
'version': "2.1.0",
|
|
85
|
+
}
|
|
86
|
+
dic.update(super().dump())
|
|
87
|
+
dic['atoms'] = self.atoms
|
|
88
|
+
return dic
|
|
89
|
+
|
|
90
|
+
@property
|
|
91
|
+
def dict(self):
|
|
92
|
+
"""
|
|
93
|
+
Get this input file as a dict.
|
|
94
|
+
"""
|
|
95
|
+
return self.dump()
|
|
96
|
+
|
|
97
|
+
@property
|
|
98
|
+
def element_dict(self):
|
|
99
|
+
"""
|
|
100
|
+
Get a dictionary where each key is one of the elements in this molecules (C, H, N etc) and the value is the number of that element that appears in the molecule
|
|
101
|
+
|
|
102
|
+
:return: The element dictionary.
|
|
103
|
+
"""
|
|
104
|
+
elements = {}
|
|
105
|
+
for atom in self.atoms:
|
|
106
|
+
# Try and increment the count of the atom.
|
|
107
|
+
try:
|
|
108
|
+
elements[atom['atom']] += 1
|
|
109
|
+
except KeyError:
|
|
110
|
+
# Add the new atom.
|
|
111
|
+
elements[atom['atom']] = 1
|
|
112
|
+
return elements
|
|
113
|
+
|
|
114
|
+
@property
|
|
115
|
+
def yaml(self):
|
|
116
|
+
"""
|
|
117
|
+
Get this input file in yaml format.
|
|
118
|
+
"""
|
|
119
|
+
# Get in dict format.
|
|
120
|
+
intermediate = self.dict
|
|
121
|
+
|
|
122
|
+
# Wrap geometry in literal format (so it will appear line by line).
|
|
123
|
+
intermediate['atoms'] = [flow_mapping(atom) for atom in intermediate['atoms']]
|
|
124
|
+
|
|
125
|
+
# Convert.
|
|
126
|
+
return yaml.dump(intermediate, sort_keys=False)
|
|
127
|
+
|
|
128
|
+
@property
|
|
129
|
+
def xyz(self):
|
|
130
|
+
"""
|
|
131
|
+
Get the geometry of this input file in XYZ format.
|
|
132
|
+
"""
|
|
133
|
+
return "{}\n\n{}".format(len(self.geometry.strip().split("\n")), self.geometry)
|
|
134
|
+
|
|
135
|
+
@property
|
|
136
|
+
def elements(self):
|
|
137
|
+
"""
|
|
138
|
+
A unique list of the elements in this input file.
|
|
139
|
+
|
|
140
|
+
The elements are returned as a list of integers (atomic numbers)
|
|
141
|
+
"""
|
|
142
|
+
return list(set(
|
|
143
|
+
periodictable.elements.symbol(coord['atom']).number for coord in self.atoms
|
|
144
|
+
))
|
|
145
|
+
|
|
146
|
+
@classmethod
|
|
147
|
+
def from_com(self, geometry, *, charge = None, multiplicity = None, **kwargs):
|
|
148
|
+
"""
|
|
149
|
+
Create a Digichem_coords object from a molecule in gaussian input format (.com, .gjf etc).
|
|
150
|
+
|
|
151
|
+
:param geometry: The input geometry in gaussian format.
|
|
152
|
+
:param charge: The molecular charge.
|
|
153
|
+
:param multiplicity: The molecular multiplicity (as an integer).
|
|
154
|
+
:param name: Name of the system/molecule
|
|
155
|
+
"""
|
|
156
|
+
# Get a parser for our gaussian input file.
|
|
157
|
+
parser = Gaussian_input_parser(geometry)
|
|
158
|
+
|
|
159
|
+
# If we've not been given a charge and/or multi, use the values from the input file.
|
|
160
|
+
if charge is None:
|
|
161
|
+
charge = parser.charge
|
|
162
|
+
if multiplicity is None:
|
|
163
|
+
multiplicity = parser.multiplicity
|
|
164
|
+
|
|
165
|
+
# Continue with other constructors.
|
|
166
|
+
return self.from_xyz(parser.xyz, charge = charge, multiplicity = multiplicity, **kwargs)
|
|
167
|
+
|
|
168
|
+
@classmethod
|
|
169
|
+
def from_yaml(self, yaml_dict, file_name = None, **kwargs):
|
|
170
|
+
"""
|
|
171
|
+
Create a Digichem_coords object from a loaded/parsed .si file.
|
|
172
|
+
"""
|
|
173
|
+
yaml_dict = dict(yaml_dict)
|
|
174
|
+
# Overwrite dictionary values if explicit ones have been given.
|
|
175
|
+
for kwarg in kwargs:
|
|
176
|
+
if kwargs[kwarg] is not None:
|
|
177
|
+
yaml_dict[kwarg] = kwargs[kwarg]
|
|
178
|
+
|
|
179
|
+
# Continue constructing.
|
|
180
|
+
try:
|
|
181
|
+
return self(**yaml_dict, file_name = file_name)
|
|
182
|
+
except TypeError:
|
|
183
|
+
raise Digichem_exception("Failed to load .si file '{}'; is the file formatted correctly?".format(file_name))
|
|
184
|
+
|
|
185
|
+
@classmethod
|
|
186
|
+
def from_si(self, si_file, **kwargs):
|
|
187
|
+
"""
|
|
188
|
+
Create a Digichem_coords object from a raw .si file.
|
|
189
|
+
"""
|
|
190
|
+
return self.from_yaml(yaml.safe_load(si_file), **kwargs)
|
|
191
|
+
|
|
192
|
+
def to_file(self, file):
|
|
193
|
+
"""
|
|
194
|
+
Write this digichem input file to an open file object.
|
|
195
|
+
|
|
196
|
+
:param file: A file opened with open() (or similar).
|
|
197
|
+
"""
|
|
198
|
+
file.write(self.yaml)
|
|
199
|
+
|
|
200
|
+
def to_format(self, file_type, file = None):
|
|
201
|
+
"""
|
|
202
|
+
Get this input file in an arbitrary format.
|
|
203
|
+
|
|
204
|
+
:param file_type: The format of the file; see output_formats()
|
|
205
|
+
:param file: An optional file to write to, if not given the converted file is returned as a string.
|
|
206
|
+
"""
|
|
207
|
+
if file_type.lower() == "si":
|
|
208
|
+
# Convert to yaml
|
|
209
|
+
return self.yaml
|
|
210
|
+
else:
|
|
211
|
+
# Convert.
|
|
212
|
+
charge = self.charge if self.charge is not None else None
|
|
213
|
+
multiplicity = self.multiplicity if self.multiplicity is not None else None
|
|
214
|
+
return Openprattle_converter(input_file = self.xyz, input_file_path = self.implicit_name, input_file_type = "xyz").convert(file_type, file, charge = charge, multiplicity = multiplicity)
|
|
215
|
+
|
|
216
|
+
@classmethod
|
|
217
|
+
def input_formats(self):
|
|
218
|
+
"""
|
|
219
|
+
A dictionary of available input formats that this file can be created from.
|
|
220
|
+
|
|
221
|
+
Each key is the short-code of the format (eg, si, com, xyz etc) while the value is a longer description.
|
|
222
|
+
"""
|
|
223
|
+
formats = {
|
|
224
|
+
"si": "Digichem Input Format",
|
|
225
|
+
"com": "Gaussian Input",
|
|
226
|
+
"gau": "Gaussian Input",
|
|
227
|
+
"gjc": "Gaussian Input",
|
|
228
|
+
"gjf": "Gaussian Input"
|
|
229
|
+
}
|
|
230
|
+
formats.update(Oprattle_formats().read())
|
|
231
|
+
|
|
232
|
+
# Some formats supported by obabel don't make sense for us to use.
|
|
233
|
+
formats.pop('text', None)
|
|
234
|
+
formats.pop('txt', None)
|
|
235
|
+
return formats
|
|
236
|
+
|
|
237
|
+
@classmethod
|
|
238
|
+
def output_formats(self):
|
|
239
|
+
"""
|
|
240
|
+
A dictionary of available output formats that this file can be converted to.
|
|
241
|
+
|
|
242
|
+
Each key is the short-code of the format (eg, si, com, xyz etc) while the value is a longer description.
|
|
243
|
+
"""
|
|
244
|
+
formats = {"si": "Digichem Input Format"}
|
|
245
|
+
formats.update(Oprattle_formats().write())
|
|
246
|
+
|
|
247
|
+
# Some formats supported by obabel don't make sense for us to use.
|
|
248
|
+
formats.pop('copy', None)
|
|
249
|
+
formats.pop('k', None)
|
|
250
|
+
formats.pop('confabreport', None)
|
|
251
|
+
formats.pop('nul', None)
|
|
252
|
+
formats.pop('text', None)
|
|
253
|
+
formats.pop('txt', None)
|
|
254
|
+
|
|
255
|
+
return formats
|
|
256
|
+
|
|
257
|
+
def __eq__(self, other):
|
|
258
|
+
"""
|
|
259
|
+
Check for equality with another coord object.
|
|
260
|
+
"""
|
|
261
|
+
try:
|
|
262
|
+
assert self.charge == other.charge
|
|
263
|
+
assert self.multiplicity == other.multiplicity
|
|
264
|
+
|
|
265
|
+
# TODO: This check should not care about the order of atoms.
|
|
266
|
+
for index, atom in enumerate(self.atoms):
|
|
267
|
+
assert atom == other.atoms[index]
|
|
268
|
+
|
|
269
|
+
return True
|
|
270
|
+
|
|
271
|
+
except AssertionError:
|
|
272
|
+
return False
|
|
273
|
+
|
|
274
|
+
|
|
275
|
+
class Digichem_coords_v2(Digichem_coords_ABC):
|
|
276
|
+
"""
|
|
277
|
+
Class that represents an input file in the digichem input (.si) format (V2).
|
|
278
|
+
|
|
279
|
+
The .si format is YAML based and stores atom positions in a dictionary format along with charge and multiplicity.
|
|
280
|
+
"""
|
|
281
|
+
|
|
282
|
+
def __init__(self, atoms, *, charge = None, multiplicity = None, name = None, version = "2.1.0", file_name = None, history = None):
|
|
283
|
+
"""
|
|
284
|
+
Constructor for .si files.
|
|
285
|
+
|
|
286
|
+
:param atoms: A list of dictionaries specifying the geometry. Each dict should contain four keys, 'atom': The element, 'x': The x coord, 'y': The y coord, 'z': The z coord. All coordinates are in angstroms.
|
|
287
|
+
:param charge: The molecular charge (as an integer).
|
|
288
|
+
:param multiplicity: The molecular multiplicity (as an integer).
|
|
289
|
+
:param name: Name of the system/molecule.
|
|
290
|
+
:param version: The version of the .si file.
|
|
291
|
+
:param file_name: The name of the file which was loaded. This can be used as a back-up file name.
|
|
292
|
+
"""
|
|
293
|
+
super().__init__(charge, multiplicity, name, file_name, history = history)
|
|
294
|
+
self.atoms = atoms
|
|
295
|
+
self.version = version
|
|
296
|
+
|
|
297
|
+
@classmethod
|
|
298
|
+
def from_xyz(self, geometry, **kwargs):
|
|
299
|
+
"""
|
|
300
|
+
Create a Digichem_coords object from a molecule in xyz format.
|
|
301
|
+
|
|
302
|
+
:param geometry: The input geometry in xyz format.
|
|
303
|
+
:param charge: The molecular charge.
|
|
304
|
+
:param multiplicity: The molecular multiplicity (as an integer).
|
|
305
|
+
:param name: Name of the system/molecule
|
|
306
|
+
"""
|
|
307
|
+
# Split the xyz format on newlines.
|
|
308
|
+
split_geom = geometry.strip().split("\n")
|
|
309
|
+
|
|
310
|
+
# Remove the first line (the number of atoms) and the second (optional comment line)
|
|
311
|
+
split_geom = split_geom[2:]
|
|
312
|
+
|
|
313
|
+
atoms = []
|
|
314
|
+
for geom_line in split_geom:
|
|
315
|
+
atom, x_coord, y_coord, z_coord = geom_line.split()
|
|
316
|
+
atoms.append({"atom": atom, "x": float(x_coord), "y": float(y_coord), "z": float(z_coord)})
|
|
317
|
+
|
|
318
|
+
# Call our main constructor.
|
|
319
|
+
return self(atoms, **kwargs)
|
|
320
|
+
|
|
321
|
+
@property
|
|
322
|
+
def geometry(self):
|
|
323
|
+
"""
|
|
324
|
+
Get the geometry of this input file in XYZ format.
|
|
325
|
+
"""
|
|
326
|
+
# Build the geometry section.
|
|
327
|
+
# XYZ is a little bit loosely defined.
|
|
328
|
+
# Obabel (defacto standard? uses ~10 chars for each column?).
|
|
329
|
+
# That's 9 usable chars +1 obligate space.
|
|
330
|
+
geometry = []
|
|
331
|
+
for atom in self.atoms:
|
|
332
|
+
geometry.append("{:<9} {:>14.8f} {:>14.8f} {:>14.8f}".format(atom['atom'], atom['x'], atom['y'], atom['z']).strip())
|
|
333
|
+
|
|
334
|
+
return "\n".join(geometry)
|
|
335
|
+
|
|
336
|
+
|
|
337
|
+
class Digichem_coords_v1(Digichem_coords_ABC):
|
|
338
|
+
"""
|
|
339
|
+
Class that represents an input file in the digichem input (.si) format (V1).
|
|
340
|
+
|
|
341
|
+
The .si format is yaml based, and stores the input geometry in a modified xyz format along with charge and multiplicity.
|
|
342
|
+
"""
|
|
343
|
+
|
|
344
|
+
def __init__(self, geometry, *, charge = None, multiplicity = None, name = None, version = "1.0.0", file_name = None):
|
|
345
|
+
"""
|
|
346
|
+
Constructor for .si files.
|
|
347
|
+
|
|
348
|
+
:param geometry: The molecular geometry in .si format. Use from_xyz() instead if your format is in xyz.
|
|
349
|
+
:param charge: The molecular charge (as an integer).
|
|
350
|
+
:param multiplicity: The molecular multiplicity (as an integer).
|
|
351
|
+
:param name: Name of the system/molecule.
|
|
352
|
+
:param file_name: The name of the file which was loaded. This can be used as a back-up file name.
|
|
353
|
+
"""
|
|
354
|
+
super().__init__(charge, multiplicity, name, file_name)
|
|
355
|
+
self.geometry = geometry
|
|
356
|
+
self.version = version
|
|
357
|
+
|
|
358
|
+
@classmethod
|
|
359
|
+
def from_xyz(self, geometry, **kwargs):
|
|
360
|
+
"""
|
|
361
|
+
Create a Digichem_coords object from a molecule in xyz format.
|
|
362
|
+
|
|
363
|
+
:param geometry: The input geometry in xyz format.
|
|
364
|
+
:param charge: The molecular charge.
|
|
365
|
+
:param multiplicity: The molecular multiplicity (as an integer).
|
|
366
|
+
:param name: Name of the system/molecule
|
|
367
|
+
"""
|
|
368
|
+
# Split the xyz format on newlines.
|
|
369
|
+
split_geom = geometry.split("\n")
|
|
370
|
+
|
|
371
|
+
# Remove the first line (the number of atoms) and the second (optional comment line)
|
|
372
|
+
split_geom = split_geom[2:]
|
|
373
|
+
|
|
374
|
+
# Call our main constructor.
|
|
375
|
+
return self("\n".join(split_geom), **kwargs)
|
|
376
|
+
|
|
377
|
+
@property
|
|
378
|
+
def atoms(self):
|
|
379
|
+
return Digichem_coords_v2.from_xyz(self.xyz, charge = self.charge, multiplicity = self.multiplicity, name = self.name, file_name = self.file_name).atoms
|
|
380
|
+
|
|
381
|
+
|
|
382
|
+
####################
|
|
383
|
+
# Helper utilities #
|
|
384
|
+
####################
|
|
385
|
+
|
|
386
|
+
Digichem_coords = Digichem_coords_v2
|
|
387
|
+
|
|
388
|
+
def si_from_yaml(yaml_dict, file_name = None, **kwargs):
|
|
389
|
+
"""
|
|
390
|
+
"""
|
|
391
|
+
# In the first instance, look to see if there's a version string.
|
|
392
|
+
if "version" in yaml_dict:
|
|
393
|
+
version = packaging.version.parse(yaml_dict['version'])
|
|
394
|
+
|
|
395
|
+
if version < packaging.version.parse("2"):
|
|
396
|
+
# V1.
|
|
397
|
+
cls = Digichem_coords_v1
|
|
398
|
+
|
|
399
|
+
elif version < packaging.version.parse("3"):
|
|
400
|
+
# V2.
|
|
401
|
+
cls = Digichem_coords_v2
|
|
402
|
+
|
|
403
|
+
else:
|
|
404
|
+
raise Digichem_exception("Unsupported .si file version '{}'".format(yaml_dict['version']))
|
|
405
|
+
|
|
406
|
+
else:
|
|
407
|
+
# No explicit version.
|
|
408
|
+
# If we have the old 'geometry' section, assume v1.
|
|
409
|
+
if "geometry" in yaml_dict:
|
|
410
|
+
cls = Digichem_coords_v1
|
|
411
|
+
|
|
412
|
+
else:
|
|
413
|
+
# Assume the latest format.
|
|
414
|
+
cls = Digichem_coords
|
|
415
|
+
|
|
416
|
+
return cls.from_yaml(yaml_dict, file_name, **kwargs)
|
|
417
|
+
|
|
418
|
+
|
|
419
|
+
def si_from_file(file_name, file_type = None, *, gen3D = None, **kwargs):
|
|
420
|
+
"""
|
|
421
|
+
Create a Digichem_coords object from a file in arbitrary format.
|
|
422
|
+
|
|
423
|
+
:param file_name: Name/path of the input file to read from.
|
|
424
|
+
:param file_type: The format of the file; a string recognised by Digichem_coords.input_formats(). If not given, an attempt will be made to guess from the file name.
|
|
425
|
+
:param charge: The molecular charge.
|
|
426
|
+
:param multiplicity: The molecular multiplicity (as an integer).
|
|
427
|
+
:param name: Name of the system/molecule.
|
|
428
|
+
"""
|
|
429
|
+
file_name = Path(file_name)
|
|
430
|
+
digichem.log.get_logger().info("Parsing coordinate file '{}'".format(file_name))
|
|
431
|
+
auto_file_type = False
|
|
432
|
+
|
|
433
|
+
try:
|
|
434
|
+
# Get the file format.
|
|
435
|
+
if file_type is None:
|
|
436
|
+
auto_file_type = True
|
|
437
|
+
file_type = Openprattle_converter.type_from_file_name(file_name, allow_none = True)
|
|
438
|
+
|
|
439
|
+
# Certain formats we support natively; others we convert to an intermediate format.
|
|
440
|
+
if file_type in ["com", "gau", "gjc", "gjf"]:
|
|
441
|
+
# Gaussian input format.
|
|
442
|
+
with open(file_name, "rt") as com_file:
|
|
443
|
+
return Digichem_coords.from_com(com_file.read(), file_name = file_name, **kwargs)
|
|
444
|
+
|
|
445
|
+
elif file_type == "si":
|
|
446
|
+
# Digichem input format.
|
|
447
|
+
with open(file_name, "rt") as si_file:
|
|
448
|
+
return si_from_yaml(yaml.safe_load(si_file.read()), file_name = file_name, **kwargs)
|
|
449
|
+
|
|
450
|
+
elif file_type == "pickle":
|
|
451
|
+
# A digichem resume file.
|
|
452
|
+
# The resume file (should be) a pickled destination object.
|
|
453
|
+
with open(file_name, "rb") as pickle_file:
|
|
454
|
+
try:
|
|
455
|
+
destination = dill.load(pickle_file)
|
|
456
|
+
|
|
457
|
+
except Exception as e:
|
|
458
|
+
raise Digichem_exception("Failed to parse digichem resume file") from e
|
|
459
|
+
|
|
460
|
+
return destination.program.calculation.input_coords
|
|
461
|
+
|
|
462
|
+
# NOTE: Here we assume files without an extension are log files.
|
|
463
|
+
# This works fine for directories, but might change in future.
|
|
464
|
+
elif file_type in ["dat", "log", "out", "output", None] \
|
|
465
|
+
or (auto_file_type and "".join(file_name.suffixes) in open_for_parsing.archive_formats()):
|
|
466
|
+
# Generic log-file (output) format.
|
|
467
|
+
# Most formats (.log, .dat etc) we can parse with either Obabel or Digichem.
|
|
468
|
+
# Some broken log files (incomplete) we can parse only with Obabel.
|
|
469
|
+
# Some unusual formats (directories, archives) we can parse only with Digichem.
|
|
470
|
+
|
|
471
|
+
# Try with Digichem first.
|
|
472
|
+
try:
|
|
473
|
+
result = parse_calculation(file_name, options = digichem.config.get_config(), format_hint = "cclib")
|
|
474
|
+
return Digichem_coords.from_result(result, file_name = file_name, **kwargs)
|
|
475
|
+
|
|
476
|
+
except Exception as e:
|
|
477
|
+
# No good, see if we can use obabel.
|
|
478
|
+
try:
|
|
479
|
+
com_file = Openprattle_converter.from_file(file_name, file_type).convert("com", gen3D = gen3D)
|
|
480
|
+
|
|
481
|
+
except Exception:
|
|
482
|
+
# Also no good, re-raise original exception.
|
|
483
|
+
raise
|
|
484
|
+
|
|
485
|
+
# Worked with fallback, log a message.
|
|
486
|
+
digichem.log.get_logger().warning(f"Failed to parse calculation output file '{file_name}'; using Obabel fallback mechanism")
|
|
487
|
+
return Digichem_coords.from_com(com_file, file_name = file_name, **kwargs)
|
|
488
|
+
|
|
489
|
+
else:
|
|
490
|
+
# Generic input format, use obabel.
|
|
491
|
+
|
|
492
|
+
# We convert all formats to gaussian input formats (because this format contains charge and multiplicity, which we can extract).
|
|
493
|
+
com_file = Openprattle_converter.from_file(file_name, file_type).convert("com", gen3D = gen3D)
|
|
494
|
+
|
|
495
|
+
# Continue with other constructors.
|
|
496
|
+
return Digichem_coords.from_com(com_file, file_name = file_name, **kwargs)
|
|
497
|
+
|
|
498
|
+
except:
|
|
499
|
+
raise ValueError("Could not parse input coordinates from '{}'".format(file_name))
|
|
500
|
+
|