digichem-core 6.0.3__py3-none-any.whl → 6.10.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- digichem/__init__.py +2 -2
- digichem/config/base.py +5 -3
- digichem/data/batoms/batoms-renderer.py +190 -50
- digichem/data/batoms/batoms_renderer.py +500 -0
- digichem/file/base.py +17 -3
- digichem/file/cube.py +185 -16
- digichem/file/types.py +1 -0
- digichem/image/render.py +144 -45
- digichem/image/vmd.py +7 -2
- digichem/input/digichem_input.py +2 -2
- digichem/memory.py +10 -0
- digichem/misc/io.py +84 -1
- digichem/parse/__init__.py +6 -1
- digichem/parse/base.py +85 -54
- digichem/parse/cclib.py +103 -13
- digichem/parse/dump.py +3 -3
- digichem/parse/orca.py +1 -0
- digichem/parse/pyscf.py +25 -0
- digichem/parse/turbomole.py +5 -5
- digichem/parse/util.py +146 -65
- digichem/result/excited_state.py +17 -11
- digichem/result/metadata.py +272 -3
- digichem/result/result.py +3 -0
- digichem/result/spectroscopy.py +42 -0
- digichem/test/test_memory.py +33 -0
- digichem/test/test_parsing.py +68 -1
- digichem/test/test_result.py +1 -1
- digichem/test/util.py +2 -1
- {digichem_core-6.0.3.dist-info → digichem_core-6.10.1.dist-info}/METADATA +4 -3
- {digichem_core-6.0.3.dist-info → digichem_core-6.10.1.dist-info}/RECORD +33 -30
- {digichem_core-6.0.3.dist-info → digichem_core-6.10.1.dist-info}/WHEEL +1 -1
- {digichem_core-6.0.3.dist-info → digichem_core-6.10.1.dist-info}/licenses/COPYING.md +0 -0
- {digichem_core-6.0.3.dist-info → digichem_core-6.10.1.dist-info}/licenses/LICENSE +0 -0
digichem/image/vmd.py
CHANGED
|
@@ -35,7 +35,7 @@ class VMD_image_maker(Render_maker):
|
|
|
35
35
|
options_name = "orbital"
|
|
36
36
|
|
|
37
37
|
def __init__(self, *args, cube_file = None, rotations = None, auto_crop = True, rendering_style = "pastel", resolution = 1024, also_make_png = True, isovalue = 0.2,
|
|
38
|
-
vmd_executable = "vmd", tachyon_executable = "tachyon", vmd_logging = False,
|
|
38
|
+
vmd_executable = "vmd", tachyon_executable = "tachyon", vmd_logging = False, num_cpu = 1,
|
|
39
39
|
**kwargs):
|
|
40
40
|
"""
|
|
41
41
|
Constructor for Image_maker objects.
|
|
@@ -48,6 +48,7 @@ class VMD_image_maker(Render_maker):
|
|
|
48
48
|
:param resolution: The max width or height of the rendered images in pixels.
|
|
49
49
|
:param also_make_png: If True, additional images will be rendered in PNG format. This option is useful to generate higher quality images alongside more portable formats. If 'output' is a .png file, then it is wise to set this option to False (otherwise two png files will be rendered, which is a waste).
|
|
50
50
|
:param isovalue: The isovalue to use for rendering isosurfaces. Has no effect when rendering only atoms.
|
|
51
|
+
:param num_cpu: Number of CPUs to use for multithreading.
|
|
51
52
|
:param vmd_executable: 'Path' to the vmd executable to use for image rendering. Defaults to relying on the command 'vmd'.
|
|
52
53
|
:param tachyon_executable: 'Path' to the tachyon executable to use for image rendering. Defaults to relying on the command 'tachyon'.
|
|
53
54
|
:param vmd_logging: Whether to print output from vmd.
|
|
@@ -60,6 +61,7 @@ class VMD_image_maker(Render_maker):
|
|
|
60
61
|
resolution = resolution,
|
|
61
62
|
also_make_png = also_make_png,
|
|
62
63
|
isovalue = isovalue,
|
|
64
|
+
num_cpu = num_cpu,
|
|
63
65
|
**kwargs
|
|
64
66
|
)
|
|
65
67
|
|
|
@@ -95,7 +97,7 @@ class VMD_image_maker(Render_maker):
|
|
|
95
97
|
return [(axis, math.degrees(-theta)) for axis, theta in self._rotations]
|
|
96
98
|
|
|
97
99
|
@classmethod
|
|
98
|
-
def from_options(self, output, *, cube_file = None, rotations = None, options, **kwargs):
|
|
100
|
+
def from_options(self, output, *, cube_file = None, rotations = None, options, num_cpu = 1, **kwargs):
|
|
99
101
|
"""
|
|
100
102
|
Constructor that takes a dictionary of config like options.
|
|
101
103
|
"""
|
|
@@ -112,6 +114,7 @@ class VMD_image_maker(Render_maker):
|
|
|
112
114
|
vmd_executable = options['render']['vmd']['executable'],
|
|
113
115
|
tachyon_executable = options['render']['vmd']['tachyon'],
|
|
114
116
|
vmd_logging = options['logging']['render_logging'],
|
|
117
|
+
num_cpu = num_cpu,
|
|
115
118
|
**kwargs
|
|
116
119
|
)
|
|
117
120
|
|
|
@@ -344,6 +347,8 @@ class VMD_image_maker(Render_maker):
|
|
|
344
347
|
"{}".format(self.tachyon_executable),
|
|
345
348
|
scene_file.relative_to(working_directory),
|
|
346
349
|
"-aasamples", "12",
|
|
350
|
+
# Note: this can get capped in a SLURM context...
|
|
351
|
+
"-numthreads", "{}".format(self.num_cpu),
|
|
347
352
|
"-res", "{}".format(resolution), "{}".format(resolution),
|
|
348
353
|
"-o", tmpfile_name
|
|
349
354
|
],
|
digichem/input/digichem_input.py
CHANGED
|
@@ -65,7 +65,7 @@ class Digichem_coords_ABC(Input_file, Molecule_mixin):
|
|
|
65
65
|
# Note it's not the history of the old calc we want here, this old calc IS our new history.
|
|
66
66
|
kwargs['history'] = result._id
|
|
67
67
|
|
|
68
|
-
if not kwargs.get('file_name'):
|
|
68
|
+
if not kwargs.get('file_name') and len(result.metadata.log_files):
|
|
69
69
|
kwargs['file_name'] = result.metadata.log_files[0]
|
|
70
70
|
|
|
71
71
|
if not kwargs.get('name'):
|
|
@@ -462,7 +462,7 @@ def si_from_file(file_name, file_type = None, *, gen3D = None, **kwargs):
|
|
|
462
462
|
# NOTE: Here we assume files without an extension are log files.
|
|
463
463
|
# This works fine for directories, but might change in future.
|
|
464
464
|
elif file_type in ["dat", "log", "out", "output", None] \
|
|
465
|
-
or (auto_file_type and "".join(file_name.suffixes) in open_for_parsing.
|
|
465
|
+
or (auto_file_type and "".join(file_name.suffixes) in open_for_parsing.get_archive_formats()):
|
|
466
466
|
# Generic log-file (output) format.
|
|
467
467
|
# Most formats (.log, .dat etc) we can parse with either Obabel or Digichem.
|
|
468
468
|
# Some broken log files (incomplete) we can parse only with Obabel.
|
digichem/memory.py
CHANGED
|
@@ -155,6 +155,16 @@ class Memory():
|
|
|
155
155
|
|
|
156
156
|
def __eq__(self, other):
|
|
157
157
|
return int(self) == other
|
|
158
|
+
|
|
159
|
+
@classmethod
|
|
160
|
+
def is_memory(self, value):
|
|
161
|
+
"""Convenience method to determine whether a value is a valid memory amount."""
|
|
162
|
+
try:
|
|
163
|
+
self(value)
|
|
164
|
+
return True
|
|
165
|
+
|
|
166
|
+
except Exception:
|
|
167
|
+
return False
|
|
158
168
|
|
|
159
169
|
|
|
160
170
|
class Turbomole_memory(Memory):
|
digichem/misc/io.py
CHANGED
|
@@ -6,8 +6,43 @@ import os
|
|
|
6
6
|
import shutil
|
|
7
7
|
import sys
|
|
8
8
|
import warnings
|
|
9
|
+
from uuid import uuid4
|
|
10
|
+
import hashlib
|
|
9
11
|
|
|
10
12
|
from digichem.datas import get_resource
|
|
13
|
+
import digichem.log
|
|
14
|
+
|
|
15
|
+
def checksum(*paths, hash_func = "sha1", buf_size = 1024 * 1024, ret_size = False):
|
|
16
|
+
"""
|
|
17
|
+
Calculate the checksum of a file.
|
|
18
|
+
|
|
19
|
+
This function avoids reading the entire file into memory at once.
|
|
20
|
+
|
|
21
|
+
:param paths: The file(s) to calculate.
|
|
22
|
+
:param hash_func: The name of a hashlib function to pass to haslib.new.
|
|
23
|
+
:param buf_size: How much to read in a single pass. The default is 1MB.
|
|
24
|
+
:param ret_size: If True, this function will return a tuple of (checksum, file_size). Otherwise, only the checksum is returned.
|
|
25
|
+
"""
|
|
26
|
+
hasher = hashlib.new(hash_func)
|
|
27
|
+
tot_size = 0
|
|
28
|
+
|
|
29
|
+
for pth in paths:
|
|
30
|
+
with open(pth, "rb") as file:
|
|
31
|
+
while True:
|
|
32
|
+
data = file.read(buf_size)
|
|
33
|
+
|
|
34
|
+
if len(data) == 0:
|
|
35
|
+
# End of file
|
|
36
|
+
break
|
|
37
|
+
|
|
38
|
+
tot_size += len(data)
|
|
39
|
+
hasher.update(data)
|
|
40
|
+
|
|
41
|
+
if ret_size:
|
|
42
|
+
return (hasher.hexdigest(), tot_size)
|
|
43
|
+
|
|
44
|
+
else:
|
|
45
|
+
return hasher.hexdigest()
|
|
11
46
|
|
|
12
47
|
def expand_path(pth):
|
|
13
48
|
"""
|
|
@@ -236,4 +271,52 @@ class Multi_file_wrapper():
|
|
|
236
271
|
"""
|
|
237
272
|
# Close our file.
|
|
238
273
|
self.close()
|
|
239
|
-
|
|
274
|
+
|
|
275
|
+
class Safe_path():
|
|
276
|
+
"""
|
|
277
|
+
Get a 'safe' path to a file.
|
|
278
|
+
|
|
279
|
+
A safe path is made up of only alphanumeric characters, and is short. This function is useful for dealing
|
|
280
|
+
with antiquated programs that struggle with whitespace/non-alpha characters or have a max file name requirement.
|
|
281
|
+
|
|
282
|
+
This class should be used as a context manager. The returned path is a temporary symbolic link link to the true file.
|
|
283
|
+
The symbolic link will be created in the specified 'dir', which defaults to the CWD.
|
|
284
|
+
"""
|
|
285
|
+
|
|
286
|
+
def __init__(self, unsafe_path, dir = "./", suffix = ""):
|
|
287
|
+
self.unsafe_path = unsafe_path
|
|
288
|
+
self.dir = dir
|
|
289
|
+
self.link = None
|
|
290
|
+
self.suffix = suffix
|
|
291
|
+
|
|
292
|
+
|
|
293
|
+
def enter(self):
|
|
294
|
+
"""
|
|
295
|
+
Create the symlink.
|
|
296
|
+
"""
|
|
297
|
+
self.link = Path(self.dir, ".{}".format(uuid4().hex) + self.suffix)
|
|
298
|
+
os.symlink(self.unsafe_path, self.link)
|
|
299
|
+
|
|
300
|
+
def close(self):
|
|
301
|
+
"""
|
|
302
|
+
Remove the symlink.
|
|
303
|
+
"""
|
|
304
|
+
try:
|
|
305
|
+
os.unlink(self.link)
|
|
306
|
+
|
|
307
|
+
except FileExistsError:
|
|
308
|
+
digichem.log.get_logger().warning("Failed to remove temporary symbolic link '{}' -> '{}'; the file has already disappeared?".format(self.link, self.unsafe_path))
|
|
309
|
+
|
|
310
|
+
def __enter__(self):
|
|
311
|
+
"""
|
|
312
|
+
Magic function for the 'with' keyword.
|
|
313
|
+
"""
|
|
314
|
+
self.enter()
|
|
315
|
+
return self
|
|
316
|
+
|
|
317
|
+
def __exit__(self, etype, value, traceback):
|
|
318
|
+
"""
|
|
319
|
+
Magic function for the 'with' keyword, called at the end of the block.
|
|
320
|
+
"""
|
|
321
|
+
# Close our file.
|
|
322
|
+
self.close()
|
digichem/parse/__init__.py
CHANGED
|
@@ -4,10 +4,15 @@ Classes for parsing calculation result data.
|
|
|
4
4
|
Most of the heavy lifting is done by cclib, we just extract additional data not currently handed by cclib.
|
|
5
5
|
"""
|
|
6
6
|
|
|
7
|
+
# We're not bothered about these warnings.
|
|
8
|
+
import warnings
|
|
9
|
+
warnings.filterwarnings("ignore", "Module .* is under testing", category = UserWarning)
|
|
10
|
+
warnings.filterwarnings("ignore", "Module .* is not fully tested", category = UserWarning)
|
|
11
|
+
|
|
7
12
|
# These alignment classes are needed to parse correctly.
|
|
8
13
|
from digichem.result.alignment.AAA import Adjusted_average_angle
|
|
9
14
|
from digichem.result.alignment.AA import Average_angle
|
|
10
15
|
from digichem.result.alignment.FAP import Furthest_atom_pair
|
|
11
16
|
from digichem.result.alignment import Minimal
|
|
12
17
|
|
|
13
|
-
from digichem.parse.util import from_log_files, parse_calculation, parse_and_merge_calculations, parse_multiple_calculations, parse_and_merge_multiple_calculations
|
|
18
|
+
from digichem.parse.util import from_log_files, parse_calculation, parse_and_merge_calculations, parse_multiple_calculations, parse_and_merge_multiple_calculations, open_for_parsing
|
digichem/parse/base.py
CHANGED
|
@@ -31,20 +31,11 @@ custom_parsing_formats = [
|
|
|
31
31
|
|
|
32
32
|
class Parser_abc():
|
|
33
33
|
"""ABC for all parsers."""
|
|
34
|
-
|
|
35
|
-
def __init__(self,
|
|
34
|
+
|
|
35
|
+
def __init__(self, *, raw_data = None, **kwargs):
|
|
36
36
|
"""
|
|
37
37
|
Top level constructor for calculation parsers.
|
|
38
|
-
|
|
39
|
-
:param log_files: A list of output file to analyse/parse. The first log_file given will be used for naming purposes.
|
|
40
38
|
"""
|
|
41
|
-
# Set our name.
|
|
42
|
-
self.log_file_paths = self.sort_log_files([Path(log_file) for log_file in log_files if log_file is not None])
|
|
43
|
-
|
|
44
|
-
# Panic if we have no logs.
|
|
45
|
-
if len(self.log_file_paths) == 0:
|
|
46
|
-
raise Digichem_exception("Cannot parse calculation output; no available log files. Are you sure the given path is a log file or directory containing log files?")
|
|
47
|
-
|
|
48
39
|
# An object that we will populate with raw results.
|
|
49
40
|
self.data = raw_data
|
|
50
41
|
|
|
@@ -57,49 +48,21 @@ class Parser_abc():
|
|
|
57
48
|
self.parse()
|
|
58
49
|
except Exception:
|
|
59
50
|
raise Digichem_exception("Error parsing calculation result '{}'".format(self.description))
|
|
60
|
-
|
|
61
|
-
@classmethod
|
|
62
|
-
def from_logs(self, *log_files, **kwargs):
|
|
63
|
-
"""
|
|
64
|
-
Intelligent constructor that will attempt to guess the location of aux files from a given log file(s).
|
|
65
|
-
|
|
66
|
-
:param log_files: Output file(s) to parse or a directory of output files to parse.
|
|
67
|
-
"""
|
|
68
|
-
# This default implementation does nothing smart.
|
|
69
|
-
return self(*log_files, **kwargs)
|
|
70
|
-
|
|
71
|
-
@classmethod
|
|
72
|
-
def sort_log_files(self, log_files):
|
|
73
|
-
"""
|
|
74
|
-
Sort a list of log files into a particular order, if required for this parser.
|
|
75
|
-
"""
|
|
76
|
-
return log_files
|
|
77
51
|
|
|
78
|
-
@property
|
|
79
|
-
def log_file_path(self):
|
|
80
|
-
"""
|
|
81
|
-
The main log file.
|
|
82
|
-
"""
|
|
83
|
-
for log_file in self.log_file_paths:
|
|
84
|
-
if log_file.suffix.lower() == ".log":
|
|
85
|
-
return log_file
|
|
86
|
-
|
|
87
|
-
return self.log_file_paths[0]
|
|
88
|
-
|
|
89
52
|
@property
|
|
90
53
|
def name(self):
|
|
91
54
|
"""
|
|
92
55
|
Short name to describe this calculation result.
|
|
93
56
|
"""
|
|
94
|
-
return
|
|
57
|
+
return "Parser"
|
|
95
58
|
|
|
96
59
|
@property
|
|
97
60
|
def description(self):
|
|
98
61
|
"""
|
|
99
62
|
A name/path that describes the file(s) being parsed, used for error messages etc.
|
|
100
63
|
"""
|
|
101
|
-
return
|
|
102
|
-
|
|
64
|
+
return "Parser"
|
|
65
|
+
|
|
103
66
|
def parse(self):
|
|
104
67
|
"""
|
|
105
68
|
Extract results from our output files.
|
|
@@ -128,22 +91,14 @@ class Parser_abc():
|
|
|
128
91
|
|
|
129
92
|
except Exception as e:
|
|
130
93
|
return None
|
|
131
|
-
|
|
94
|
+
|
|
132
95
|
def post_parse(self):
|
|
133
96
|
"""
|
|
134
97
|
Perform any required operations after line-by-line parsing.
|
|
135
|
-
"""
|
|
136
|
-
# Add our name.
|
|
137
|
-
if self.name is not None:
|
|
138
|
-
self.data.metadata['name'] = self.name
|
|
139
|
-
|
|
98
|
+
"""
|
|
140
99
|
# Add current username.
|
|
141
100
|
# TODO: It would probably be better if we used the name of the user who owns the output file, rather than the current user...
|
|
142
101
|
self.data.metadata['user'] = self.get_current_username()
|
|
143
|
-
|
|
144
|
-
# Set our file paths.
|
|
145
|
-
self.data.metadata['log_files'] = self.log_file_paths
|
|
146
|
-
self.data.metadata['auxiliary_files'] = self.auxiliary_files
|
|
147
102
|
|
|
148
103
|
def process_all(self, options):
|
|
149
104
|
"""
|
|
@@ -154,7 +109,7 @@ class Parser_abc():
|
|
|
154
109
|
"""
|
|
155
110
|
self.process(options)
|
|
156
111
|
return [self.results]
|
|
157
|
-
|
|
112
|
+
|
|
158
113
|
def process(self, options):
|
|
159
114
|
"""
|
|
160
115
|
Get a Result set object from this parser.
|
|
@@ -166,7 +121,8 @@ class Parser_abc():
|
|
|
166
121
|
# Get our result set.
|
|
167
122
|
self.results = Result_set(
|
|
168
123
|
_id = self.data._id,
|
|
169
|
-
metadata = Metadata.from_parser(self)
|
|
124
|
+
metadata = Metadata.from_parser(self),
|
|
125
|
+
aux = self.data._aux if hasattr(self.data, '_aux') else None
|
|
170
126
|
)
|
|
171
127
|
|
|
172
128
|
alignment_class = Alignment.from_class_handle(options['alignment']) if options['alignment'] is not None else Minimal
|
|
@@ -218,3 +174,78 @@ class Parser_abc():
|
|
|
218
174
|
|
|
219
175
|
# Return the populated result set for convenience.
|
|
220
176
|
return self.results
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
class File_parser_abc(Parser_abc):
|
|
180
|
+
"""ABC for all parsers."""
|
|
181
|
+
|
|
182
|
+
def __init__(self, *log_files, raw_data = None, **kwargs):
|
|
183
|
+
"""
|
|
184
|
+
Top level constructor for calculation parsers.
|
|
185
|
+
|
|
186
|
+
:param log_files: A list of output file to analyse/parse. The first log_file given will be used for naming purposes.
|
|
187
|
+
"""
|
|
188
|
+
# Set our name.
|
|
189
|
+
self.log_file_paths = self.sort_log_files([Path(log_file) for log_file in log_files if log_file is not None])
|
|
190
|
+
|
|
191
|
+
# Panic if we have no logs.
|
|
192
|
+
if len(self.log_file_paths) == 0:
|
|
193
|
+
raise Digichem_exception("Cannot parse calculation output; no available log files. Are you sure the given path is a log file or directory containing log files?")
|
|
194
|
+
|
|
195
|
+
super().__init__(raw_data=raw_data, **kwargs)
|
|
196
|
+
|
|
197
|
+
@classmethod
|
|
198
|
+
def from_logs(self, *log_files, **kwargs):
|
|
199
|
+
"""
|
|
200
|
+
Intelligent constructor that will attempt to guess the location of aux files from a given log file(s).
|
|
201
|
+
|
|
202
|
+
:param log_files: Output file(s) to parse or a directory of output files to parse.
|
|
203
|
+
"""
|
|
204
|
+
# This default implementation does nothing smart.
|
|
205
|
+
return self(*log_files, **kwargs)
|
|
206
|
+
|
|
207
|
+
@classmethod
|
|
208
|
+
def sort_log_files(self, log_files):
|
|
209
|
+
"""
|
|
210
|
+
Sort a list of log files into a particular order, if required for this parser.
|
|
211
|
+
"""
|
|
212
|
+
return log_files
|
|
213
|
+
|
|
214
|
+
@property
|
|
215
|
+
def log_file_path(self):
|
|
216
|
+
"""
|
|
217
|
+
The main log file.
|
|
218
|
+
"""
|
|
219
|
+
for log_file in self.log_file_paths:
|
|
220
|
+
if log_file.suffix.lower() == ".log":
|
|
221
|
+
return log_file
|
|
222
|
+
|
|
223
|
+
return self.log_file_paths[0]
|
|
224
|
+
|
|
225
|
+
@property
|
|
226
|
+
def name(self):
|
|
227
|
+
"""
|
|
228
|
+
Short name to describe this calculation result.
|
|
229
|
+
"""
|
|
230
|
+
return self.log_file_path.with_suffix("").name
|
|
231
|
+
|
|
232
|
+
@property
|
|
233
|
+
def description(self):
|
|
234
|
+
"""
|
|
235
|
+
A name/path that describes the file(s) being parsed, used for error messages etc.
|
|
236
|
+
"""
|
|
237
|
+
return self.log_file_path
|
|
238
|
+
|
|
239
|
+
def post_parse(self):
|
|
240
|
+
"""
|
|
241
|
+
Perform any required operations after line-by-line parsing.
|
|
242
|
+
"""
|
|
243
|
+
super().post_parse()
|
|
244
|
+
|
|
245
|
+
# Add our name.
|
|
246
|
+
if self.name is not None:
|
|
247
|
+
self.data.metadata['name'] = self.name
|
|
248
|
+
|
|
249
|
+
# Set our file paths.
|
|
250
|
+
self.data.metadata['log_files'] = self.log_file_paths
|
|
251
|
+
self.data.metadata['auxiliary_files'] = self.auxiliary_files
|
digichem/parse/cclib.py
CHANGED
|
@@ -1,14 +1,20 @@
|
|
|
1
1
|
from pathlib import Path
|
|
2
|
-
import
|
|
2
|
+
import itertools
|
|
3
|
+
import csv
|
|
4
|
+
import numpy
|
|
5
|
+
import math
|
|
6
|
+
from scipy import signal
|
|
3
7
|
|
|
4
8
|
import digichem.log
|
|
5
|
-
from digichem.parse.base import
|
|
9
|
+
from digichem.parse.base import File_parser_abc
|
|
10
|
+
from digichem.exception import Digichem_exception
|
|
11
|
+
from digichem.misc.io import checksum
|
|
6
12
|
|
|
7
13
|
# Hidden imports.
|
|
8
14
|
#import cclib.io
|
|
9
15
|
|
|
10
16
|
|
|
11
|
-
class Cclib_parser(
|
|
17
|
+
class Cclib_parser(File_parser_abc):
|
|
12
18
|
"""
|
|
13
19
|
ABC for parsers that use cclib to do most of their work for them.
|
|
14
20
|
"""
|
|
@@ -25,11 +31,15 @@ class Cclib_parser(Parser_abc):
|
|
|
25
31
|
"""
|
|
26
32
|
# Also save our aux files, stripping None.
|
|
27
33
|
self.auxiliary_files = {name: aux_file for name,aux_file in auxiliary_files.items() if aux_file is not None}
|
|
34
|
+
|
|
35
|
+
# TODO: Does this belong here?
|
|
36
|
+
# Also have a look for a profile.csv file that we can us for performance metrics.
|
|
37
|
+
self.profile_file = Path(log_files[0].parent, "../Logs/profile.csv")
|
|
28
38
|
|
|
29
39
|
super().__init__(*log_files)
|
|
30
40
|
|
|
31
41
|
@classmethod
|
|
32
|
-
def from_logs(self, *log_files, **kwargs):
|
|
42
|
+
def from_logs(self, *log_files, hints = None, **kwargs):
|
|
33
43
|
"""
|
|
34
44
|
Intelligent constructor that will attempt to guess the location of files from a given log file(s).
|
|
35
45
|
|
|
@@ -37,9 +47,11 @@ class Cclib_parser(Parser_abc):
|
|
|
37
47
|
"""
|
|
38
48
|
# Have a look for aux. files.
|
|
39
49
|
auxiliary_files = {}
|
|
50
|
+
|
|
51
|
+
basename = log_files[0].name if len(log_files) > 0 else ""
|
|
40
52
|
|
|
41
|
-
for
|
|
42
|
-
auxiliary_files.update(self.find_auxiliary_files(
|
|
53
|
+
for hint in itertools.chain(log_files, hints if hints is not None else []):
|
|
54
|
+
auxiliary_files.update(self.find_auxiliary_files(hint, basename))
|
|
43
55
|
|
|
44
56
|
# Finally, update our auxiliary_files with kwargs, so any user specified aux files take precedence.
|
|
45
57
|
auxiliary_files.update(kwargs)
|
|
@@ -47,7 +59,7 @@ class Cclib_parser(Parser_abc):
|
|
|
47
59
|
return self(*log_files, **auxiliary_files)
|
|
48
60
|
|
|
49
61
|
@classmethod
|
|
50
|
-
def find_auxiliary_files(self, hint):
|
|
62
|
+
def find_auxiliary_files(self, hint, basename):
|
|
51
63
|
"""
|
|
52
64
|
Find auxiliary files from a given hint.
|
|
53
65
|
|
|
@@ -60,6 +72,11 @@ class Cclib_parser(Parser_abc):
|
|
|
60
72
|
auxiliary_files = {}
|
|
61
73
|
for file_type in self.INPUT_FILE_TYPES:
|
|
62
74
|
for extension in file_type.extensions:
|
|
75
|
+
if hint.is_dir():
|
|
76
|
+
# Peak inside.
|
|
77
|
+
if Path(hint, basename).with_suffix(extension).exists():
|
|
78
|
+
auxiliary_files[self.INPUT_FILE_TYPES[file_type]] = Path(hint, basename).with_suffix(extension)
|
|
79
|
+
|
|
63
80
|
if hint.with_suffix(extension).exists():
|
|
64
81
|
auxiliary_files[self.INPUT_FILE_TYPES[file_type]] = hint.with_suffix(extension)
|
|
65
82
|
|
|
@@ -86,18 +103,15 @@ class Cclib_parser(Parser_abc):
|
|
|
86
103
|
|
|
87
104
|
# Get data from cclib.
|
|
88
105
|
self.data = cclib.io.ccread(file_paths if len(file_paths) > 1 else file_paths[0])
|
|
106
|
+
if self.data is None:
|
|
107
|
+
raise Digichem_exception("Could not parse any data at all!")
|
|
89
108
|
|
|
90
109
|
# Get a unique ID (checksum) from the given log files.
|
|
91
110
|
# First, order the list of filenames so we also process in the same order.
|
|
92
111
|
# We do this because not all parsers define a custom sort.
|
|
93
112
|
|
|
94
113
|
file_paths.sort()
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
for file_path in file_paths:
|
|
98
|
-
hasher.update(Path(file_path).read_bytes())
|
|
99
|
-
|
|
100
|
-
self.data._id = hasher.hexdigest()
|
|
114
|
+
self.data._id = checksum(*file_paths, hash_func = "sha1")
|
|
101
115
|
|
|
102
116
|
# Do some setup.
|
|
103
117
|
self.pre_parse()
|
|
@@ -107,6 +121,82 @@ class Cclib_parser(Parser_abc):
|
|
|
107
121
|
with open(log_file_path, "rt") as log_file:
|
|
108
122
|
for line in log_file:
|
|
109
123
|
self.parse_output_line(log_file, line)
|
|
124
|
+
|
|
125
|
+
# Add profiling data.
|
|
126
|
+
try:
|
|
127
|
+
self.parse_profile_file()
|
|
128
|
+
|
|
129
|
+
except Exception:
|
|
130
|
+
digichem.log.get_logger().warning("Could not parse profile.csv file; profiling data will be unavailable", exc_info=True)
|
|
131
|
+
|
|
132
|
+
def parse_profile_file(self):
|
|
133
|
+
"""
|
|
134
|
+
"""
|
|
135
|
+
# Real calculations can end up with millions of rows, which is far too much data to handle.
|
|
136
|
+
# We will need to downsample if we have too many data points.
|
|
137
|
+
# First work out how many rows there are.
|
|
138
|
+
try:
|
|
139
|
+
with open(self.profile_file, "rb") as profile_file:
|
|
140
|
+
lines = sum(1 for _ in profile_file) -1 # Remove 1 line for the header.
|
|
141
|
+
|
|
142
|
+
except FileNotFoundError:
|
|
143
|
+
# This is ok
|
|
144
|
+
return
|
|
145
|
+
|
|
146
|
+
if lines < 2:
|
|
147
|
+
return
|
|
148
|
+
|
|
149
|
+
max_lines = 1000
|
|
150
|
+
factor = math.ceil(lines / max_lines)
|
|
151
|
+
|
|
152
|
+
with open(self.profile_file) as profile_file:
|
|
153
|
+
reader = csv.reader(profile_file)
|
|
154
|
+
|
|
155
|
+
# Get the header.
|
|
156
|
+
headers = next(reader)
|
|
157
|
+
|
|
158
|
+
# Check headers match.
|
|
159
|
+
if (headers[0] != "Duration / s" or
|
|
160
|
+
headers[1] != "Memory Used (Real) / bytes" or
|
|
161
|
+
headers[2] != "Memory Used (Real) / %" or
|
|
162
|
+
headers[3] != "Memory Available (Real) / bytes" or
|
|
163
|
+
headers[4] != "Memory Available (Real) / %" or
|
|
164
|
+
headers[9] != "CPU Usage / %" or
|
|
165
|
+
headers[15] != "Output Directory Available / bytes" or
|
|
166
|
+
headers[17] != "Scratch Directory Available / bytes"
|
|
167
|
+
):
|
|
168
|
+
raise Digichem_exception("wrong headers found in profile.csv file")
|
|
169
|
+
|
|
170
|
+
# Then the body.
|
|
171
|
+
# TODO: Reading the entire file is not ideal...
|
|
172
|
+
data = numpy.genfromtxt(
|
|
173
|
+
profile_file,
|
|
174
|
+
delimiter=',',
|
|
175
|
+
# TODO: use something better.
|
|
176
|
+
filling_values = "0"
|
|
177
|
+
)
|
|
178
|
+
|
|
179
|
+
# We'll keep:
|
|
180
|
+
# - duration
|
|
181
|
+
# - memory used
|
|
182
|
+
# - memory used %
|
|
183
|
+
# - memory available
|
|
184
|
+
# - memory available %
|
|
185
|
+
# - cpu used
|
|
186
|
+
# - output space
|
|
187
|
+
# - scratch space
|
|
188
|
+
new_data = numpy.zeros((math.ceil(lines / factor), 8))
|
|
189
|
+
|
|
190
|
+
# Now decimate.
|
|
191
|
+
for i, k in enumerate([0,1,2,3,4,9,15,17]):
|
|
192
|
+
if factor > 1:
|
|
193
|
+
new_data[:, i] = signal.decimate(data[:, k], factor)
|
|
194
|
+
else:
|
|
195
|
+
new_data[:, i] = data[:, k]
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
self.data.metadata['performance'] = new_data
|
|
199
|
+
|
|
110
200
|
|
|
111
201
|
def parse_output_line(self, log_file, line):
|
|
112
202
|
"""
|
digichem/parse/dump.py
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
import yaml
|
|
3
3
|
import json
|
|
4
4
|
|
|
5
|
-
from digichem.parse.base import
|
|
5
|
+
from digichem.parse.base import File_parser_abc
|
|
6
6
|
from digichem.result.tdm import Transition_dipole_moment
|
|
7
7
|
from digichem.result.result import Result_set
|
|
8
8
|
from digichem.result.metadata import Metadata
|
|
@@ -20,7 +20,7 @@ from digichem.result.alignment.base import Alignment, Minimal
|
|
|
20
20
|
from digichem.result.nmr import NMR_list
|
|
21
21
|
|
|
22
22
|
|
|
23
|
-
class Dump_multi_parser_abc(
|
|
23
|
+
class Dump_multi_parser_abc(File_parser_abc):
|
|
24
24
|
"""
|
|
25
25
|
ABC for classes that can read multiple result sets from dumped data.
|
|
26
26
|
"""
|
|
@@ -122,7 +122,7 @@ class Json_multi_parser(Dump_multi_parser_abc):
|
|
|
122
122
|
self.data = json.load(json_file)
|
|
123
123
|
|
|
124
124
|
|
|
125
|
-
class Dump_parser_abc(
|
|
125
|
+
class Dump_parser_abc(File_parser_abc):
|
|
126
126
|
"""
|
|
127
127
|
ABC for parsers that read dumped data.
|
|
128
128
|
"""
|
digichem/parse/orca.py
CHANGED
digichem/parse/pyscf.py
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
import hashlib
|
|
2
|
+
import json
|
|
3
|
+
|
|
4
|
+
from cclib.bridge.cclib2pyscf import cclibfrommethods
|
|
5
|
+
|
|
6
|
+
from digichem.parse.base import Parser_abc
|
|
7
|
+
|
|
8
|
+
class Pyscf_parser(Parser_abc):
|
|
9
|
+
"""
|
|
10
|
+
Top level class for parsing output from pyscf data.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
def __init__(self, mol_name, methods, **kwargs):
|
|
14
|
+
self.methods = methods
|
|
15
|
+
self.mol_name = mol_name
|
|
16
|
+
super().__init__(**kwargs)
|
|
17
|
+
|
|
18
|
+
def _parse(self):
|
|
19
|
+
self.data = cclibfrommethods(**self.methods)
|
|
20
|
+
# TODO: We need some way of generating a checksum
|
|
21
|
+
self.data._id = hashlib.sha1(json.dumps(dir(self.data), sort_keys = True).encode('utf-8')).hexdigest()
|
|
22
|
+
self.data.metadata['name'] = self.mol_name
|
|
23
|
+
self.data._aux = {'methods': self.methods}
|
|
24
|
+
|
|
25
|
+
|
digichem/parse/turbomole.py
CHANGED
|
@@ -25,9 +25,9 @@ class Turbomole_parser(Cclib_parser):
|
|
|
25
25
|
"""
|
|
26
26
|
Sort a list of log files into a particular order, if required for this parser.
|
|
27
27
|
"""
|
|
28
|
-
from cclib.
|
|
28
|
+
from cclib.parser.turbomoleparser import Turbomole
|
|
29
29
|
|
|
30
|
-
return
|
|
30
|
+
return Turbomole.sort_input(log_files)
|
|
31
31
|
|
|
32
32
|
def duration_to_timedelta(self, duration_str):
|
|
33
33
|
"""
|
|
@@ -161,16 +161,16 @@ class Turbomole_parser(Cclib_parser):
|
|
|
161
161
|
return self.results
|
|
162
162
|
|
|
163
163
|
@classmethod
|
|
164
|
-
def find_auxiliary_files(self, hint):
|
|
164
|
+
def find_auxiliary_files(self, hint, basename):
|
|
165
165
|
"""
|
|
166
166
|
Find auxiliary files from a given hint.
|
|
167
167
|
|
|
168
168
|
:param hint: A path to a file to use as a hint to find additional files.
|
|
169
169
|
:returns: A dictionary of found aux files.
|
|
170
170
|
"""
|
|
171
|
-
auxiliary_files = super().find_auxiliary_files(hint)
|
|
171
|
+
auxiliary_files = super().find_auxiliary_files(hint, basename)
|
|
172
172
|
|
|
173
|
-
parent = pathlib.Path(hint).parent
|
|
173
|
+
parent = pathlib.Path(hint).parent if not hint.is_dir() else hint
|
|
174
174
|
|
|
175
175
|
# Find .cao density files.
|
|
176
176
|
# First look for ground state density files.
|