digichem-core 6.10.1__py3-none-any.whl → 7.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- digichem/__init__.py +2 -2
- digichem/config/base.py +11 -2
- digichem/config/util.py +3 -2
- digichem/file/prattle.py +9 -7
- digichem/image/render.py +6 -4
- digichem/image/spectroscopy.py +17 -4
- digichem/input/__init__.py +1 -1
- digichem/input/digichem_input.py +39 -34
- digichem/misc/io.py +11 -0
- digichem/parse/base.py +8 -6
- digichem/parse/cclib.py +57 -17
- digichem/parse/dump.py +31 -35
- digichem/parse/gaussian.py +2 -2
- digichem/parse/pyscf.py +13 -3
- digichem/parse/turbomole.py +2 -3
- digichem/parse/util.py +6 -5
- digichem/result/alignment/base.py +2 -2
- digichem/result/atom.py +4 -4
- digichem/result/base.py +53 -5
- digichem/result/dipole_moment.py +1 -1
- digichem/result/emission.py +5 -5
- digichem/result/energy.py +8 -8
- digichem/result/excited_state.py +20 -13
- digichem/result/ground_state.py +2 -2
- digichem/result/metadata.py +51 -25
- digichem/result/nmr.py +37 -28
- digichem/result/orbital.py +3 -3
- digichem/result/result.py +14 -14
- digichem/result/soc.py +3 -3
- digichem/result/spectroscopy.py +5 -4
- digichem/result/tdm.py +5 -5
- digichem/result/vibration.py +15 -6
- digichem/test/conftest.py +5 -0
- digichem/test/mock/cubegen +87172 -0
- digichem/test/mock/formchk +9456 -0
- digichem/test/test_image.py +54 -42
- digichem/test/test_input.py +17 -3
- digichem/test/test_parsing.py +9 -0
- digichem/test/test_prattle.py +31 -2
- digichem/test/util.py +2 -0
- {digichem_core-6.10.1.dist-info → digichem_core-7.0.0.dist-info}/METADATA +1 -1
- {digichem_core-6.10.1.dist-info → digichem_core-7.0.0.dist-info}/RECORD +45 -43
- {digichem_core-6.10.1.dist-info → digichem_core-7.0.0.dist-info}/WHEEL +0 -0
- {digichem_core-6.10.1.dist-info → digichem_core-7.0.0.dist-info}/licenses/COPYING.md +0 -0
- {digichem_core-6.10.1.dist-info → digichem_core-7.0.0.dist-info}/licenses/LICENSE +0 -0
digichem/__init__.py
CHANGED
|
@@ -20,7 +20,7 @@ from digichem.datas import get_resource
|
|
|
20
20
|
# development = prerelease is not None
|
|
21
21
|
# # The full version number of this package.
|
|
22
22
|
# __version__ = "{}.{}.{}{}".format(major_version, minor_version, revision, "-pre.{}".format(prerelease) if development else "")
|
|
23
|
-
__version__ = "
|
|
23
|
+
__version__ = "7.0.0"
|
|
24
24
|
_v_parts = __version__.split("-")[0].split(".")
|
|
25
25
|
major_version = int(_v_parts[0])
|
|
26
26
|
minor_version = int(_v_parts[1])
|
|
@@ -39,7 +39,7 @@ __author__ = [
|
|
|
39
39
|
]
|
|
40
40
|
|
|
41
41
|
# Program date (when we were last updated). This is changed automatically.
|
|
42
|
-
_last_updated_string = "
|
|
42
|
+
_last_updated_string = "30/09/2025"
|
|
43
43
|
last_updated = datetime.strptime(_last_updated_string, "%d/%m/%Y")
|
|
44
44
|
|
|
45
45
|
# The sys attribute 'frozen' is our flag, '_MEIPASS' is the dir location.
|
digichem/config/base.py
CHANGED
|
@@ -35,6 +35,11 @@ class Digichem_options(Configurable):
|
|
|
35
35
|
cubegen = Option(help = "Gaussian's cubegen utility https://gaussian.com/cubegen/", default = "cubegen"),
|
|
36
36
|
cubegen_parallel = Option(help = "What type of parallelism to use with cubegen, multithreaded runs a single instance of cubegen across multiple CPUs, pool runs multiple instances of cubegen", choices = [None, "multithreaded", "pool"], default = "pool")
|
|
37
37
|
)
|
|
38
|
+
|
|
39
|
+
parse = Options(
|
|
40
|
+
help = "Options for controlling parsing of config files",
|
|
41
|
+
profiling_rows = Option(help = "The maximum number of rows to parse from the calculation profile file (if available); if more rows than this are available then the data will be downsampled to at most this number of data points", type = int, default = 1000)
|
|
42
|
+
)
|
|
38
43
|
|
|
39
44
|
skeletal_image = Options(
|
|
40
45
|
help = "Options for controlling the rendering of 2D skeletal images.",
|
|
@@ -72,7 +77,7 @@ Possible options are:
|
|
|
72
77
|
),
|
|
73
78
|
batoms = Options(help = "Beautiful Atoms/Blender specific options (only applies if engine == 'batoms'",
|
|
74
79
|
blender = Option(help = "Path to the blender executable, in which beautiful atoms should be installed", default = "batoms-blender"),
|
|
75
|
-
cpus = Option(help = "The number of CPUs/threads to use. This option is overridden if running in a calculation
|
|
80
|
+
cpus = Option(help = "The number of CPUs/threads to use. This option is overridden if running in a calculation environment (where it uses the same number of CPUs as the calculation did)", type = int, default = 1),
|
|
76
81
|
render_samples = Option(help = "The number of render samples (or passes) to use. Higher values result in higher image quality and greater render times", type = int, default = 32),
|
|
77
82
|
perspective = Option(help = "The perspective mode", choices = ["orthographic", "perspective"], default = "perspective"),
|
|
78
83
|
stacking = Option(help = "The number of image copies to composite together to avoid transparency artifacts", type = int, default = 10)
|
|
@@ -172,6 +177,7 @@ To disable the maximum width, set to null.""", type = int, default = 1200
|
|
|
172
177
|
Set to 0 for no cutoff (all peaks shown), which may results in the graph being extended well beyond the drawn peaks (because many peaks are too small to see).
|
|
173
178
|
This option has no effect when using manual x limits.""", type = float, default = 0.01
|
|
174
179
|
),
|
|
180
|
+
y_filter = Option(help = "The minimum y value to simulate using the Gaussian function (y values below this are discarded)", type = float, default = 1e-6),
|
|
175
181
|
x_padding = Option(help = "The amount (in nm) to extend the x axis past the highest/lowest energy peak.", type = int, default = 40),
|
|
176
182
|
fwhm = Option(help = "The full-width at half-maximum; changes how wide the drawn peaks are. Note that the choice of peak width is essentially arbitrary; only the peak height is given by calculation. Units are eV.", type = float, default = 0.4),
|
|
177
183
|
gaussian_cutoff = Option(help = "The minimum y value to plot using the Gaussian function (controls how close to the x axis we draw the gaussian) as a fraction of the max peak height.", type = float, default = 0.001),
|
|
@@ -202,10 +208,11 @@ Absorption graphs will grow/shrink their width to fit available data, keeping a
|
|
|
202
208
|
To disable the maximum width, set to null.""", type = int, default = 1200
|
|
203
209
|
),
|
|
204
210
|
peak_cutoff = Option(help =\
|
|
205
|
-
"""The minimum oscillator strength that a peak must have to be shown in the graph, as a fraction
|
|
211
|
+
"""The minimum oscillator strength that a peak must have to be shown in the graph, as a fraction of the highest peak.
|
|
206
212
|
Set to 0 for no cutoff (all peaks shown), which may results in the graph being extended well beyond the drawn peaks (because many peaks are too small to see).
|
|
207
213
|
This option has no effect when using manual x limits.""", type = float, default = 0.01
|
|
208
214
|
),
|
|
215
|
+
y_filter = Option(help = "The minimum y value to simulate using the Gaussian function (y values below this are discarded)", type = float, default = 1e-6),
|
|
209
216
|
x_padding = Option(help = "The amount (in nm) to extend the x axis past the highest/lowest energy peak.", type = int, default = 40),
|
|
210
217
|
fwhm = Option(help = "The full-width at half-maximum; changes how wide the drawn peaks are. Note that the choice of peak width is essentially arbitrary; only the peak height is given by calculation. Units are eV.", type = float, default = 0.4),
|
|
211
218
|
gaussian_cutoff = Option(help = "The minimum y value to plot using the Gaussian function (controls how close to the x axis we draw the gaussian) as a fraction of the max peak height.", type = float, default = 0.001),
|
|
@@ -235,6 +242,7 @@ Possible options are:
|
|
|
235
242
|
"""The maximum image width in pixels.
|
|
236
243
|
IR spectra will grow/shrink their width to fit available data, keeping a constant scale (constant pixels to nm ratio) but only up to this maximum.
|
|
237
244
|
To disable the maximum width, set to null.""", type = int, default = 1500),
|
|
245
|
+
y_filter = Option(help = "The minimum y value to simulate using the Gaussian function (y values below this are discarded)", type = float, default = 1e-6),
|
|
238
246
|
gaussian_cutoff = Option(help = "The minimum y value to plot using the Gaussian function (controls how close to the x axis we draw the gaussian) as a fraction of the max peak height.", type = float, default = 0.001),
|
|
239
247
|
gaussian_resolution = Option(help = "The spacing between x values to plot using the Gaussian function, in eV. Values that are too large will result in 'curves' made up of a series of straight edges.", type = float, default = 1.0)
|
|
240
248
|
)
|
|
@@ -243,6 +251,7 @@ To disable the maximum width, set to null.""", type = int, default = 1500),
|
|
|
243
251
|
enable_rendering = Option(help = "Set to False to disable image rendering.", type = bool, default = True),
|
|
244
252
|
coupling_filter = Option(help = "Discard J coupling that is below this threshold (in Hz)", type = float, default = 1),
|
|
245
253
|
fwhm = Option(help = "The full-width at half-maximum; changes how wide the drawn peaks are. Note that the choice of peak width is essentially arbitrary; only the peak height is given by calculation. Units are ppm.", type = float, default = 0.01),
|
|
254
|
+
y_filter = Option(help = "The minimum y value to simulate using the Gaussian function (y values below this are discarded)", type = float, default = 1e-6),
|
|
246
255
|
gaussian_cutoff = Option(help = "The minimum y value to plot using the Gaussian function (controls how close to the x axis we draw the gaussian) as a fraction of the max peak height.", type = float, default = 0.001),
|
|
247
256
|
gaussian_resolution = Option(help = "The spacing between x values to plot using the Gaussian function, in ppm. Values that are too large will result in 'curves' made up of a series of straight edges.", type = float, default = 0.001),
|
|
248
257
|
frequency = Option(help = "The frequency to run the simulated spectrometer at. Larger values will result in narrower coupling. Units are MHz", type = float, default = 100),
|
digichem/config/util.py
CHANGED
|
@@ -40,8 +40,9 @@ def get_config(
|
|
|
40
40
|
:param sources: An iterable of file locations to read from.
|
|
41
41
|
:return: A Digichem_options object (a fancy dict).
|
|
42
42
|
"""
|
|
43
|
+
global _options
|
|
43
44
|
if clear_cache:
|
|
44
|
-
|
|
45
|
+
_options = None
|
|
45
46
|
|
|
46
47
|
if _options is not None and extra_config_files is None and extra_config_strings is None:
|
|
47
48
|
# Config has already been loaded (and we have nothing new to add).
|
|
@@ -63,7 +64,7 @@ def get_config(
|
|
|
63
64
|
# config.merge(Config_file_parser(source).load(True))
|
|
64
65
|
|
|
65
66
|
# No need to validate here, we're going to do it later anyway.
|
|
66
|
-
|
|
67
|
+
_options = cls(validate_now = False, **config)
|
|
67
68
|
|
|
68
69
|
if extra_config_files is None:
|
|
69
70
|
extra_config_files = []
|
digichem/file/prattle.py
CHANGED
|
@@ -37,7 +37,7 @@ class Openprattle_converter():
|
|
|
37
37
|
:param executable: Path or command name to the oprattle executable.
|
|
38
38
|
"""
|
|
39
39
|
self.input_file = input_file
|
|
40
|
-
self.input_file_buffer = input_file_buffer
|
|
40
|
+
self.input_file_buffer = input_file_buffer if input_file is None else input_file.read()
|
|
41
41
|
self.input_file_path = input_file_path
|
|
42
42
|
self.input_file_type = input_file_type
|
|
43
43
|
self.executable = executable
|
|
@@ -128,7 +128,7 @@ class Openprattle_converter():
|
|
|
128
128
|
]
|
|
129
129
|
|
|
130
130
|
# Add the input path if we're reading from file.
|
|
131
|
-
if self.
|
|
131
|
+
if self.input_file_buffer is None:
|
|
132
132
|
sig.append(str(self.input_file_path))
|
|
133
133
|
|
|
134
134
|
# Now add the input and output switches.
|
|
@@ -152,7 +152,11 @@ class Openprattle_converter():
|
|
|
152
152
|
sig.extend(['-O', output_file])
|
|
153
153
|
|
|
154
154
|
# Give our input_file as stdin if we're not reading from file.
|
|
155
|
-
inputs = self.
|
|
155
|
+
inputs = self.input_file_buffer
|
|
156
|
+
|
|
157
|
+
# Encode strings.
|
|
158
|
+
if isinstance(inputs, str):
|
|
159
|
+
inputs = inputs.encode()
|
|
156
160
|
|
|
157
161
|
# GO.
|
|
158
162
|
done_process = subprocess.run(
|
|
@@ -160,18 +164,16 @@ class Openprattle_converter():
|
|
|
160
164
|
input = inputs,
|
|
161
165
|
stdout = subprocess.PIPE,
|
|
162
166
|
stderr = subprocess.PIPE,
|
|
163
|
-
# TODO: Using universal newlines is probably not safe here; some formats are binary (.cdx etc...)
|
|
164
|
-
universal_newlines = True,
|
|
165
167
|
)
|
|
166
168
|
|
|
167
169
|
# This can throw exceptions.
|
|
168
|
-
self.handle_logging(done_process.stderr)
|
|
170
|
+
self.handle_logging(done_process.stderr.decode())
|
|
169
171
|
|
|
170
172
|
if done_process.returncode != 0:
|
|
171
173
|
raise Digichem_exception("prattle subprocess returned code {}".format(done_process.returncode))
|
|
172
174
|
|
|
173
175
|
# Return our output.
|
|
174
|
-
return done_process.stdout if output_file is None else None
|
|
176
|
+
return done_process.stdout.decode() if output_file is None else None
|
|
175
177
|
|
|
176
178
|
def handle_logging(self, raw_output):
|
|
177
179
|
"""
|
digichem/image/render.py
CHANGED
|
@@ -247,7 +247,6 @@ class Batoms_renderer(Render_maker):
|
|
|
247
247
|
# "--render-samples", f"{samples}",
|
|
248
248
|
"--perspective", f"{self.perspective}",
|
|
249
249
|
"--padding", f"{padding}",
|
|
250
|
-
"--rotations",
|
|
251
250
|
]
|
|
252
251
|
for orientation, resolution, samples, mini_file_name in targets:
|
|
253
252
|
args.extend([
|
|
@@ -258,9 +257,12 @@ class Batoms_renderer(Render_maker):
|
|
|
258
257
|
mini_file_name
|
|
259
258
|
])
|
|
260
259
|
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
260
|
+
if len(self.rotations) > 0:
|
|
261
|
+
args.append("--rotations")
|
|
262
|
+
|
|
263
|
+
# Add rotations.
|
|
264
|
+
for rotation in self.rotations:
|
|
265
|
+
args.append(json.dumps(rotation))
|
|
264
266
|
|
|
265
267
|
return args
|
|
266
268
|
|
digichem/image/spectroscopy.py
CHANGED
|
@@ -256,8 +256,15 @@ class Absorption_emission_graph_maker(Spectroscopy_graph_maker):
|
|
|
256
256
|
"""
|
|
257
257
|
return self(
|
|
258
258
|
output,
|
|
259
|
-
Absorption_emission_graph.from_excited_states(
|
|
260
|
-
|
|
259
|
+
Absorption_emission_graph.from_excited_states(
|
|
260
|
+
excited_states,
|
|
261
|
+
options[self.options_name]['fwhm'],
|
|
262
|
+
options[self.options_name]['gaussian_resolution'],
|
|
263
|
+
options[self.options_name]['gaussian_cutoff'],
|
|
264
|
+
use_jacobian = options[self.options_name]['use_jacobian'],
|
|
265
|
+
filter = options[self.options_name]['y_filter'],
|
|
266
|
+
adjust_zero = adjust_zero),
|
|
267
|
+
**{key: value for key, value in options[self.options_name].items() if key not in ["gaussian_cutoff", "gaussian_resolution", "fwhm", "use_jacobian", "y_filter"]},
|
|
261
268
|
**kwargs
|
|
262
269
|
)
|
|
263
270
|
|
|
@@ -332,8 +339,14 @@ class Frequency_graph_maker(Spectroscopy_graph_maker):
|
|
|
332
339
|
"""
|
|
333
340
|
return self(
|
|
334
341
|
output,
|
|
335
|
-
graph = Spectroscopy_graph.from_vibrations(
|
|
336
|
-
|
|
342
|
+
graph = Spectroscopy_graph.from_vibrations(
|
|
343
|
+
vibrations,
|
|
344
|
+
options['IR_spectrum']['fwhm'],
|
|
345
|
+
options['IR_spectrum']['gaussian_resolution'],
|
|
346
|
+
options['IR_spectrum']['gaussian_cutoff'],
|
|
347
|
+
filter = options['IR_spectrum']['y_filter']
|
|
348
|
+
),
|
|
349
|
+
**{key: value for key, value in options['IR_spectrum'].items() if key not in ["gaussian_cutoff", "gaussian_resolution", "fwhm", "y_filter"]},
|
|
337
350
|
**kwargs
|
|
338
351
|
)
|
|
339
352
|
|
digichem/input/__init__.py
CHANGED
|
@@ -1,3 +1,3 @@
|
|
|
1
1
|
from .base import Input_file
|
|
2
2
|
from .gaussian import Gaussian_input_parser
|
|
3
|
-
from .digichem_input import Digichem_coords_ABC, Digichem_coords, si_from_yaml, si_from_file
|
|
3
|
+
from .digichem_input import Digichem_coords_ABC, Digichem_coords, si_from_yaml, si_from_file, si_from_data
|
digichem/input/digichem_input.py
CHANGED
|
@@ -211,7 +211,7 @@ class Digichem_coords_ABC(Input_file, Molecule_mixin):
|
|
|
211
211
|
# Convert.
|
|
212
212
|
charge = self.charge if self.charge is not None else None
|
|
213
213
|
multiplicity = self.multiplicity if self.multiplicity is not None else None
|
|
214
|
-
return Openprattle_converter(
|
|
214
|
+
return Openprattle_converter(input_file_buffer = self.xyz, input_file_path = self.implicit_name, input_file_type = "xyz").convert(file_type, file, charge = charge, multiplicity = multiplicity)
|
|
215
215
|
|
|
216
216
|
@classmethod
|
|
217
217
|
def input_formats(self):
|
|
@@ -416,6 +416,38 @@ def si_from_yaml(yaml_dict, file_name = None, **kwargs):
|
|
|
416
416
|
return cls.from_yaml(yaml_dict, file_name, **kwargs)
|
|
417
417
|
|
|
418
418
|
|
|
419
|
+
def si_from_data(data, file_type, *, gen3D = None, file_name = None, **kwargs):
|
|
420
|
+
"""
|
|
421
|
+
"""
|
|
422
|
+
if file_type in ["com", "gau", "gjc", "gjf"]:
|
|
423
|
+
# Gaussian input format.
|
|
424
|
+
return Digichem_coords.from_com(data, file_name = file_name, **kwargs)
|
|
425
|
+
|
|
426
|
+
elif file_type == "si":
|
|
427
|
+
# Digichem input format.
|
|
428
|
+
return si_from_yaml(yaml.safe_load(data), file_name = file_name, **kwargs)
|
|
429
|
+
|
|
430
|
+
elif file_type == "pickle":
|
|
431
|
+
# A digichem resume file.
|
|
432
|
+
# The resume file (should be) a pickled destination object.
|
|
433
|
+
try:
|
|
434
|
+
destination = dill.loads(data)
|
|
435
|
+
|
|
436
|
+
except Exception as e:
|
|
437
|
+
raise Digichem_exception("Failed to parse digichem resume file") from e
|
|
438
|
+
|
|
439
|
+
return destination.program.calculation.input_coords
|
|
440
|
+
|
|
441
|
+
else:
|
|
442
|
+
# Generic input format, use obabel.
|
|
443
|
+
|
|
444
|
+
# We convert all formats to gaussian input formats (because this format contains charge and multiplicity, which we can extract).
|
|
445
|
+
com_file = Openprattle_converter(input_file_buffer = data, input_file_type = file_type).convert("com", gen3D = gen3D)
|
|
446
|
+
|
|
447
|
+
# Continue with other constructors.
|
|
448
|
+
return Digichem_coords.from_com(com_file, file_name = file_name, **kwargs)
|
|
449
|
+
|
|
450
|
+
|
|
419
451
|
def si_from_file(file_name, file_type = None, *, gen3D = None, **kwargs):
|
|
420
452
|
"""
|
|
421
453
|
Create a Digichem_coords object from a file in arbitrary format.
|
|
@@ -436,32 +468,10 @@ def si_from_file(file_name, file_type = None, *, gen3D = None, **kwargs):
|
|
|
436
468
|
auto_file_type = True
|
|
437
469
|
file_type = Openprattle_converter.type_from_file_name(file_name, allow_none = True)
|
|
438
470
|
|
|
439
|
-
# Certain formats we support natively; others we convert to an intermediate format.
|
|
440
|
-
if file_type in ["com", "gau", "gjc", "gjf"]:
|
|
441
|
-
# Gaussian input format.
|
|
442
|
-
with open(file_name, "rt") as com_file:
|
|
443
|
-
return Digichem_coords.from_com(com_file.read(), file_name = file_name, **kwargs)
|
|
444
|
-
|
|
445
|
-
elif file_type == "si":
|
|
446
|
-
# Digichem input format.
|
|
447
|
-
with open(file_name, "rt") as si_file:
|
|
448
|
-
return si_from_yaml(yaml.safe_load(si_file.read()), file_name = file_name, **kwargs)
|
|
449
|
-
|
|
450
|
-
elif file_type == "pickle":
|
|
451
|
-
# A digichem resume file.
|
|
452
|
-
# The resume file (should be) a pickled destination object.
|
|
453
|
-
with open(file_name, "rb") as pickle_file:
|
|
454
|
-
try:
|
|
455
|
-
destination = dill.load(pickle_file)
|
|
456
|
-
|
|
457
|
-
except Exception as e:
|
|
458
|
-
raise Digichem_exception("Failed to parse digichem resume file") from e
|
|
459
|
-
|
|
460
|
-
return destination.program.calculation.input_coords
|
|
461
|
-
|
|
471
|
+
# Certain formats we support natively; others we convert to an intermediate format.
|
|
462
472
|
# NOTE: Here we assume files without an extension are log files.
|
|
463
473
|
# This works fine for directories, but might change in future.
|
|
464
|
-
|
|
474
|
+
if file_type in ["dat", "log", "out", "output", None] \
|
|
465
475
|
or (auto_file_type and "".join(file_name.suffixes) in open_for_parsing.get_archive_formats()):
|
|
466
476
|
# Generic log-file (output) format.
|
|
467
477
|
# Most formats (.log, .dat etc) we can parse with either Obabel or Digichem.
|
|
@@ -476,24 +486,19 @@ def si_from_file(file_name, file_type = None, *, gen3D = None, **kwargs):
|
|
|
476
486
|
except Exception as e:
|
|
477
487
|
# No good, see if we can use obabel.
|
|
478
488
|
try:
|
|
479
|
-
com_file = Openprattle_converter
|
|
489
|
+
com_file = Openprattle_converter(file_name, file_type).convert("com", gen3D = gen3D)
|
|
480
490
|
|
|
481
491
|
except Exception:
|
|
482
492
|
# Also no good, re-raise original exception.
|
|
483
|
-
raise
|
|
493
|
+
raise e
|
|
484
494
|
|
|
485
495
|
# Worked with fallback, log a message.
|
|
486
496
|
digichem.log.get_logger().warning(f"Failed to parse calculation output file '{file_name}'; using Obabel fallback mechanism")
|
|
487
497
|
return Digichem_coords.from_com(com_file, file_name = file_name, **kwargs)
|
|
488
498
|
|
|
489
499
|
else:
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
# We convert all formats to gaussian input formats (because this format contains charge and multiplicity, which we can extract).
|
|
493
|
-
com_file = Openprattle_converter.from_file(file_name, file_type).convert("com", gen3D = gen3D)
|
|
494
|
-
|
|
495
|
-
# Continue with other constructors.
|
|
496
|
-
return Digichem_coords.from_com(com_file, file_name = file_name, **kwargs)
|
|
500
|
+
with open(file_name, "rb" if file_type in ["pickle", "cdx"] else "r") as input_file:
|
|
501
|
+
return si_from_data(input_file.read(), file_name = file_name, gen3D = gen3D, file_type = file_type, **kwargs)
|
|
497
502
|
|
|
498
503
|
except:
|
|
499
504
|
raise ValueError("Could not parse coordinates from '{}'".format(file_name))
|
digichem/misc/io.py
CHANGED
|
@@ -6,6 +6,7 @@ import os
|
|
|
6
6
|
import shutil
|
|
7
7
|
import sys
|
|
8
8
|
import warnings
|
|
9
|
+
import itertools
|
|
9
10
|
from uuid import uuid4
|
|
10
11
|
import hashlib
|
|
11
12
|
|
|
@@ -320,3 +321,13 @@ class Safe_path():
|
|
|
320
321
|
"""
|
|
321
322
|
# Close our file.
|
|
322
323
|
self.close()
|
|
324
|
+
|
|
325
|
+
def dir_size(target):
|
|
326
|
+
"""
|
|
327
|
+
Calculate the total used file space of a directory and all contents.
|
|
328
|
+
"""
|
|
329
|
+
bytes = 0
|
|
330
|
+
for path in itertools.chain(Path(target).rglob("*"), [Path(target)]):
|
|
331
|
+
bytes += path.stat().st_size
|
|
332
|
+
|
|
333
|
+
return bytes
|
digichem/parse/base.py
CHANGED
|
@@ -32,7 +32,7 @@ custom_parsing_formats = [
|
|
|
32
32
|
class Parser_abc():
|
|
33
33
|
"""ABC for all parsers."""
|
|
34
34
|
|
|
35
|
-
def __init__(self, *, raw_data = None, **kwargs):
|
|
35
|
+
def __init__(self, *, raw_data = None, options, **kwargs):
|
|
36
36
|
"""
|
|
37
37
|
Top level constructor for calculation parsers.
|
|
38
38
|
"""
|
|
@@ -41,6 +41,9 @@ class Parser_abc():
|
|
|
41
41
|
|
|
42
42
|
# A result set object that we'll populate with results.
|
|
43
43
|
self.results = None
|
|
44
|
+
|
|
45
|
+
# Config options.
|
|
46
|
+
self.options = options
|
|
44
47
|
|
|
45
48
|
# Parse (if we haven't already).
|
|
46
49
|
try:
|
|
@@ -100,24 +103,23 @@ class Parser_abc():
|
|
|
100
103
|
# TODO: It would probably be better if we used the name of the user who owns the output file, rather than the current user...
|
|
101
104
|
self.data.metadata['user'] = self.get_current_username()
|
|
102
105
|
|
|
103
|
-
def process_all(self
|
|
106
|
+
def process_all(self):
|
|
104
107
|
"""
|
|
105
108
|
Get all the Result set objects produced by this parser.
|
|
106
109
|
|
|
107
110
|
:param options: A Digichem options nested dictionary containing options to control parsing.
|
|
108
111
|
:return: A list of the populated result sets.
|
|
109
112
|
"""
|
|
110
|
-
self.process(
|
|
113
|
+
self.process()
|
|
111
114
|
return [self.results]
|
|
112
115
|
|
|
113
|
-
def process(self
|
|
116
|
+
def process(self):
|
|
114
117
|
"""
|
|
115
118
|
Get a Result set object from this parser.
|
|
116
119
|
|
|
117
120
|
:param options: A Digichem options nested dictionary containing options to control parsing.
|
|
118
121
|
:return: The populated result set.
|
|
119
122
|
"""
|
|
120
|
-
self.options = options
|
|
121
123
|
# Get our result set.
|
|
122
124
|
self.results = Result_set(
|
|
123
125
|
_id = self.data._id,
|
|
@@ -125,7 +127,7 @@ class Parser_abc():
|
|
|
125
127
|
aux = self.data._aux if hasattr(self.data, '_aux') else None
|
|
126
128
|
)
|
|
127
129
|
|
|
128
|
-
alignment_class = Alignment.from_class_handle(options['alignment']) if options['alignment'] is not None else Minimal
|
|
130
|
+
alignment_class = Alignment.from_class_handle(self.options['alignment']) if self.options['alignment'] is not None else Minimal
|
|
129
131
|
|
|
130
132
|
# First get our list of MOs (because we need them for excited states too.)
|
|
131
133
|
self.results.orbitals = Molecular_orbital_list.from_parser(self)
|
digichem/parse/cclib.py
CHANGED
|
@@ -22,7 +22,7 @@ class Cclib_parser(File_parser_abc):
|
|
|
22
22
|
# A dictionary of recognised auxiliary file types.
|
|
23
23
|
INPUT_FILE_TYPES = {}
|
|
24
24
|
|
|
25
|
-
def __init__(self, *log_files, **auxiliary_files):
|
|
25
|
+
def __init__(self, *log_files, options, **auxiliary_files):
|
|
26
26
|
"""
|
|
27
27
|
Top level constructor for calculation parsers.
|
|
28
28
|
|
|
@@ -36,10 +36,10 @@ class Cclib_parser(File_parser_abc):
|
|
|
36
36
|
# Also have a look for a profile.csv file that we can us for performance metrics.
|
|
37
37
|
self.profile_file = Path(log_files[0].parent, "../Logs/profile.csv")
|
|
38
38
|
|
|
39
|
-
super().__init__(*log_files)
|
|
39
|
+
super().__init__(*log_files, options = options)
|
|
40
40
|
|
|
41
41
|
@classmethod
|
|
42
|
-
def from_logs(self, *log_files, hints = None, **kwargs):
|
|
42
|
+
def from_logs(self, *log_files, hints = None, options, **kwargs):
|
|
43
43
|
"""
|
|
44
44
|
Intelligent constructor that will attempt to guess the location of files from a given log file(s).
|
|
45
45
|
|
|
@@ -56,7 +56,7 @@ class Cclib_parser(File_parser_abc):
|
|
|
56
56
|
# Finally, update our auxiliary_files with kwargs, so any user specified aux files take precedence.
|
|
57
57
|
auxiliary_files.update(kwargs)
|
|
58
58
|
|
|
59
|
-
return self(*log_files, **auxiliary_files)
|
|
59
|
+
return self(*log_files, options = options, **auxiliary_files)
|
|
60
60
|
|
|
61
61
|
@classmethod
|
|
62
62
|
def find_auxiliary_files(self, hint, basename):
|
|
@@ -127,7 +127,11 @@ class Cclib_parser(File_parser_abc):
|
|
|
127
127
|
self.parse_profile_file()
|
|
128
128
|
|
|
129
129
|
except Exception:
|
|
130
|
-
|
|
130
|
+
if self.profile_file.exists():
|
|
131
|
+
digichem.log.get_logger().warning("Could not parse profile.csv file; profiling data will be unavailable", exc_info=True)
|
|
132
|
+
|
|
133
|
+
else:
|
|
134
|
+
pass
|
|
131
135
|
|
|
132
136
|
def parse_profile_file(self):
|
|
133
137
|
"""
|
|
@@ -146,7 +150,7 @@ class Cclib_parser(File_parser_abc):
|
|
|
146
150
|
if lines < 2:
|
|
147
151
|
return
|
|
148
152
|
|
|
149
|
-
max_lines =
|
|
153
|
+
max_lines = self.options.parse['profiling_rows']
|
|
150
154
|
factor = math.ceil(lines / max_lines)
|
|
151
155
|
|
|
152
156
|
with open(self.profile_file) as profile_file:
|
|
@@ -156,15 +160,49 @@ class Cclib_parser(File_parser_abc):
|
|
|
156
160
|
headers = next(reader)
|
|
157
161
|
|
|
158
162
|
# Check headers match.
|
|
159
|
-
if (headers[0]
|
|
160
|
-
headers[1]
|
|
161
|
-
headers[2]
|
|
162
|
-
headers[3]
|
|
163
|
-
headers[4]
|
|
164
|
-
headers[9]
|
|
165
|
-
headers[15]
|
|
166
|
-
headers[17]
|
|
163
|
+
if (headers[0] == "Duration / s" and
|
|
164
|
+
headers[1] == "Memory Used (Real) / bytes" and
|
|
165
|
+
headers[2] == "Memory Used (Real) / %" and
|
|
166
|
+
headers[3] == "Memory Available (Real) / bytes" and
|
|
167
|
+
headers[4] == "Memory Available (Real) / %" and
|
|
168
|
+
headers[9] == "CPU Usage / %" and
|
|
169
|
+
headers[15] == "Output Directory Available / bytes" and
|
|
170
|
+
headers[17] == "Scratch Directory Used / bytes" and
|
|
171
|
+
headers[18] == "Scratch Directory Available / bytes"
|
|
172
|
+
):
|
|
173
|
+
column_map = {
|
|
174
|
+
"duration": 0,
|
|
175
|
+
"memory_used": 1,
|
|
176
|
+
"memory_used_percent": 2,
|
|
177
|
+
"memory_available": 3,
|
|
178
|
+
"memory_available_percent": 4,
|
|
179
|
+
"cpu_used": 9,
|
|
180
|
+
"output_available": 15,
|
|
181
|
+
"scratch_used": 17,
|
|
182
|
+
"scratch_available": 18
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
elif (headers[0] == "Duration / s" and
|
|
186
|
+
headers[1] == "Memory Used (Real) / bytes" and
|
|
187
|
+
headers[2] == "Memory Used (Real) / %" and
|
|
188
|
+
headers[3] == "Memory Available (Real) / bytes" and
|
|
189
|
+
headers[4] == "Memory Available (Real) / %" and
|
|
190
|
+
headers[9] == "CPU Usage / %" and
|
|
191
|
+
headers[15] == "Output Directory Available / bytes" and
|
|
192
|
+
headers[17] == "Scratch Directory Available / bytes"
|
|
167
193
|
):
|
|
194
|
+
column_map = {
|
|
195
|
+
"duration": 0,
|
|
196
|
+
"memory_used": 1,
|
|
197
|
+
"memory_used_percent": 2,
|
|
198
|
+
"memory_available": 3,
|
|
199
|
+
"memory_available_percent": 4,
|
|
200
|
+
"cpu_used": 9,
|
|
201
|
+
"output_available": 15,
|
|
202
|
+
"scratch_available": 17
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
else:
|
|
168
206
|
raise Digichem_exception("wrong headers found in profile.csv file")
|
|
169
207
|
|
|
170
208
|
# Then the body.
|
|
@@ -185,17 +223,19 @@ class Cclib_parser(File_parser_abc):
|
|
|
185
223
|
# - cpu used
|
|
186
224
|
# - output space
|
|
187
225
|
# - scratch space
|
|
188
|
-
new_data = numpy.zeros((math.ceil(lines / factor),
|
|
226
|
+
new_data = numpy.zeros((math.ceil(lines / factor), len(column_map)))
|
|
189
227
|
|
|
190
228
|
# Now decimate.
|
|
191
|
-
for i, k in enumerate(
|
|
229
|
+
for i, k in enumerate(column_map.values()):
|
|
192
230
|
if factor > 1:
|
|
193
231
|
new_data[:, i] = signal.decimate(data[:, k], factor)
|
|
194
232
|
else:
|
|
195
233
|
new_data[:, i] = data[:, k]
|
|
196
234
|
|
|
197
235
|
|
|
198
|
-
self.data.metadata['performance'] =
|
|
236
|
+
self.data.metadata['performance'] = {
|
|
237
|
+
key: new_data[:, index] for index, key in enumerate(column_map)
|
|
238
|
+
}
|
|
199
239
|
|
|
200
240
|
|
|
201
241
|
def parse_output_line(self, log_file, line):
|