digichem-core 7.0.4__py3-none-any.whl → 7.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- digichem/__init__.py +2 -11
- digichem/config/base.py +2 -2
- digichem/datas.py +6 -2
- digichem/image/render.py +2 -1
- digichem/image/spectroscopy.py +15 -8
- digichem/input/__init__.py +1 -1
- digichem/input/digichem_input.py +33 -1
- digichem/input/gaussian.py +41 -1
- digichem/misc/base.py +12 -0
- digichem/misc/io.py +8 -3
- digichem/parse/base.py +135 -4
- digichem/parse/cclib.py +2 -115
- digichem/parse/censo.py +135 -0
- digichem/parse/crest.py +89 -0
- digichem/parse/gaussian.py +2 -2
- digichem/parse/pyscf.py +15 -4
- digichem/result/atom.py +37 -20
- digichem/result/metadata.py +3 -0
- digichem/result/nmr.py +103 -27
- digichem/result/spectroscopy.py +2 -2
- digichem/test/test_input.py +2 -1
- digichem/test/test_result.py +2 -2
- digichem/test/util.py +1 -0
- {digichem_core-7.0.4.dist-info → digichem_core-7.3.0.dist-info}/METADATA +3 -3
- {digichem_core-7.0.4.dist-info → digichem_core-7.3.0.dist-info}/RECORD +28 -26
- {digichem_core-7.0.4.dist-info → digichem_core-7.3.0.dist-info}/WHEEL +1 -1
- {digichem_core-7.0.4.dist-info → digichem_core-7.3.0.dist-info}/licenses/LICENSE +1 -1
- {digichem_core-7.0.4.dist-info → digichem_core-7.3.0.dist-info}/licenses/COPYING.md +0 -0
digichem/__init__.py
CHANGED
|
@@ -11,16 +11,7 @@ from digichem.datas import get_resource
|
|
|
11
11
|
####################
|
|
12
12
|
|
|
13
13
|
|
|
14
|
-
|
|
15
|
-
# major_version = 6
|
|
16
|
-
# minor_version = 0
|
|
17
|
-
# revision = 0
|
|
18
|
-
# prerelease = 1
|
|
19
|
-
# # Whether this is a development version.
|
|
20
|
-
# development = prerelease is not None
|
|
21
|
-
# # The full version number of this package.
|
|
22
|
-
# __version__ = "{}.{}.{}{}".format(major_version, minor_version, revision, "-pre.{}".format(prerelease) if development else "")
|
|
23
|
-
__version__ = "7.0.4"
|
|
14
|
+
__version__ = "7.3.0"
|
|
24
15
|
_v_parts = __version__.split("-")[0].split(".")
|
|
25
16
|
major_version = int(_v_parts[0])
|
|
26
17
|
minor_version = int(_v_parts[1])
|
|
@@ -39,7 +30,7 @@ __author__ = [
|
|
|
39
30
|
]
|
|
40
31
|
|
|
41
32
|
# Program date (when we were last updated). This is changed automatically.
|
|
42
|
-
_last_updated_string = "
|
|
33
|
+
_last_updated_string = "02/02/2026"
|
|
43
34
|
last_updated = datetime.strptime(_last_updated_string, "%d/%m/%Y")
|
|
44
35
|
|
|
45
36
|
# The sys attribute 'frozen' is our flag, '_MEIPASS' is the dir location.
|
digichem/config/base.py
CHANGED
|
@@ -249,7 +249,7 @@ To disable the maximum width, set to null.""", type = int, default = 1500),
|
|
|
249
249
|
|
|
250
250
|
nmr = Options(help = "Options for controlling simulated NMR spectra",
|
|
251
251
|
enable_rendering = Option(help = "Set to False to disable image rendering.", type = bool, default = True),
|
|
252
|
-
coupling_filter = Option(help = "Discard J coupling that is below this threshold (in Hz)", type = float, default = 1),
|
|
252
|
+
coupling_filter = Option(help = "Discard J coupling that is below this threshold (in Hz)", type = float, default = 0.1),
|
|
253
253
|
fwhm = Option(help = "The full-width at half-maximum; changes how wide the drawn peaks are. Note that the choice of peak width is essentially arbitrary; only the peak height is given by calculation. Units are ppm.", type = float, default = 0.01),
|
|
254
254
|
y_filter = Option(help = "The minimum y value to simulate using the Gaussian function (y values below this are discarded)", type = float, default = 1e-6),
|
|
255
255
|
gaussian_cutoff = Option(help = "The minimum y value to plot using the Gaussian function (controls how close to the x axis we draw the gaussian) as a fraction of the max peak height.", type = float, default = 0.001),
|
|
@@ -271,7 +271,7 @@ To disable the maximum width, set to null.""", type = int, default = 1500),
|
|
|
271
271
|
isotopes = Option(help = "Isotope specific options. Each key should consist of a tuple of (proton_number, isotope).", type = Nested_dict_type, default = Nested_dict_type({
|
|
272
272
|
# Resonance frequencies calculated at 9.3947 T.
|
|
273
273
|
# 1H, increase fidelity to see more detail.
|
|
274
|
-
"1H": {"frequency": 400, "fwhm": 0.
|
|
274
|
+
"1H": {"frequency": 400, "fwhm": 0.0015, "gaussian_resolution": 0.0001, "coupling_filter": 0.001, "pre_merge": 0.0005},
|
|
275
275
|
# 11B.
|
|
276
276
|
"11B": {"frequency": 128.3},
|
|
277
277
|
# 13C.
|
digichem/datas.py
CHANGED
|
@@ -1,8 +1,12 @@
|
|
|
1
1
|
import atexit
|
|
2
2
|
from contextlib import ExitStack
|
|
3
3
|
|
|
4
|
-
|
|
5
|
-
import
|
|
4
|
+
try:
|
|
5
|
+
import importlib.resources
|
|
6
|
+
importlib_resources = importlib.resources
|
|
7
|
+
|
|
8
|
+
except ImportError:
|
|
9
|
+
import importlib_resources
|
|
6
10
|
|
|
7
11
|
def get_resource(name):
|
|
8
12
|
"""
|
digichem/image/render.py
CHANGED
|
@@ -68,7 +68,8 @@ class Render_maker(File_converter, Cropable_mixin):
|
|
|
68
68
|
self.target_resolution = resolution
|
|
69
69
|
self.also_make_png = also_make_png
|
|
70
70
|
self.isovalue = isovalue
|
|
71
|
-
|
|
71
|
+
# TODO: Should we not guess the number of CPUs required?
|
|
72
|
+
self.num_cpu = num_cpu if num_cpu is not None else num_cpu
|
|
72
73
|
|
|
73
74
|
# TODO: These.
|
|
74
75
|
self.primary_colour = "red"
|
digichem/image/spectroscopy.py
CHANGED
|
@@ -435,7 +435,7 @@ class NMR_graph_maker_abc(Spectroscopy_graph_maker):
|
|
|
435
435
|
label = r"$\mathdefault{{{{{}}}_{{{}}}}}$ ({})".format(atom_group.element, atom_group.index, mult_string)
|
|
436
436
|
#label += "\n" + r"$\int$ = {}{}".format(len(atom_group.atoms), atom_group.element.symbol)
|
|
437
437
|
#label += "\n{:.2f} ppm".format(x_coord) + r", $\int$ = {}{}".format(len(atom_group.atoms), atom_group.element.symbol)
|
|
438
|
-
label += "\n{:.2f} ppm".format(x_coord) + r", {}{}".format(len(atom_group.atoms), atom_group.element.symbol)
|
|
438
|
+
label += "\n{:.2f} ppm".format(x_coord) + r", ∫ = {}{}".format(len(atom_group.atoms), atom_group.element.symbol)
|
|
439
439
|
|
|
440
440
|
else:
|
|
441
441
|
label = r"$\mathdefault{{{{{}}}_{{{}}}}}$".format(atom_group.element, atom_group.index)
|
|
@@ -572,9 +572,13 @@ class NMR_graph_maker(NMR_graph_maker_abc):
|
|
|
572
572
|
# We always want to make sure that zero is shown however.
|
|
573
573
|
#
|
|
574
574
|
# NMR is also typically shown on a reversed scale.
|
|
575
|
+
|
|
576
|
+
# First, work out how wide our graph will be.
|
|
577
|
+
min_x = min(min(visible_x_values), 0)
|
|
578
|
+
max_x = max(max(visible_x_values), 0)
|
|
575
579
|
|
|
576
580
|
x_padding = (
|
|
577
|
-
|
|
581
|
+
max_x - min_x
|
|
578
582
|
) * self.x_padding_percent
|
|
579
583
|
|
|
580
584
|
# If we have no negative shifts, set zero as the end of one scale.
|
|
@@ -634,7 +638,7 @@ class NMR_graph_zoom_maker(NMR_graph_maker_abc):
|
|
|
634
638
|
self.focus = focus
|
|
635
639
|
|
|
636
640
|
self.x_padding = None
|
|
637
|
-
self.x_padding_percent =
|
|
641
|
+
self.x_padding_percent = 0.5
|
|
638
642
|
|
|
639
643
|
self.target_width = 3.5
|
|
640
644
|
|
|
@@ -668,10 +672,11 @@ class NMR_graph_zoom_maker(NMR_graph_maker_abc):
|
|
|
668
672
|
|
|
669
673
|
# We need to get a list of all peaks that are above our cutoff point.
|
|
670
674
|
# First determine our highest point.
|
|
671
|
-
|
|
675
|
+
coords = graph.plot_cumulative_gaussian()
|
|
676
|
+
highest_point = max(self.transpose(coords)[1])
|
|
672
677
|
|
|
673
678
|
# Now filter by a fraction of that amount.
|
|
674
|
-
visible_x_values = [x for x, y in
|
|
679
|
+
visible_x_values = [x for x, y in coords if y >= (highest_point * self.peak_cutoff)]
|
|
675
680
|
|
|
676
681
|
x_padding = (
|
|
677
682
|
max(visible_x_values) -min(visible_x_values)
|
|
@@ -721,9 +726,11 @@ class NMR_graph_zoom_maker(NMR_graph_maker_abc):
|
|
|
721
726
|
|
|
722
727
|
|
|
723
728
|
highest_point = max(spectrum[1])
|
|
729
|
+
|
|
730
|
+
height = highest_point * 1.3
|
|
724
731
|
|
|
725
732
|
# Clamp to 0 -> pos.
|
|
726
|
-
self.axes.set_ylim(0
|
|
733
|
+
self.axes.set_ylim(0 - height * 0.025, height)
|
|
727
734
|
|
|
728
735
|
def plot_lines(self):
|
|
729
736
|
"""
|
|
@@ -796,8 +803,8 @@ class NMR_graph_zoom_maker(NMR_graph_maker_abc):
|
|
|
796
803
|
if len(couplings) > 0 and mult[0]["number"] != 1:
|
|
797
804
|
# Only show couplings for peaks we can actually distinguish.
|
|
798
805
|
for (coupling_group, coupling_isotope), coupling in list(couplings.items())[:len(mult)]:
|
|
799
|
-
|
|
800
|
-
|
|
806
|
+
label += "\n" + r"$\mathdefault{{^{{{}}}}}$J = {:.2f} Hz ($\mathdefault{{^{{{}}}{}_{{{}}}}}$, {}{})".format(
|
|
807
|
+
coupling.distance if coupling.distance is not None else "",
|
|
801
808
|
coupling.total,
|
|
802
809
|
coupling_isotope,
|
|
803
810
|
coupling_group.element,
|
digichem/input/__init__.py
CHANGED
|
@@ -1,3 +1,3 @@
|
|
|
1
1
|
from .base import Input_file
|
|
2
2
|
from .gaussian import Gaussian_input_parser
|
|
3
|
-
from .digichem_input import Digichem_coords_ABC, Digichem_coords, si_from_yaml, si_from_file, si_from_data
|
|
3
|
+
from .digichem_input import Digichem_coords_ABC, Digichem_coords, si_from_yaml, si_from_file, si_from_data, si_iter_from_xyz
|
digichem/input/digichem_input.py
CHANGED
|
@@ -502,4 +502,36 @@ def si_from_file(file_name, file_type = None, *, gen3D = None, **kwargs):
|
|
|
502
502
|
|
|
503
503
|
except:
|
|
504
504
|
raise ValueError("Could not parse coordinates from '{}'".format(file_name))
|
|
505
|
-
|
|
505
|
+
|
|
506
|
+
|
|
507
|
+
def si_iter_from_xyz(file_name, **kwargs):
|
|
508
|
+
"""
|
|
509
|
+
Return a generator that yields Digichem_coords objects from the molecules in an xyz file.
|
|
510
|
+
|
|
511
|
+
:param file_name: The XYZ file to read from. If the file contains multiple structures, they will be read in sequence.
|
|
512
|
+
"""
|
|
513
|
+
with open(file_name, "rt") as file:
|
|
514
|
+
# How many lines are we expecting in the current molecule.
|
|
515
|
+
doc_length = None
|
|
516
|
+
doc_line_no = 0
|
|
517
|
+
data = []
|
|
518
|
+
|
|
519
|
+
for line in file:
|
|
520
|
+
if doc_length is None:
|
|
521
|
+
# New document, how big is it?
|
|
522
|
+
doc_length = int(line.strip()) +2
|
|
523
|
+
|
|
524
|
+
# What line of the document are we on right now?
|
|
525
|
+
doc_line_no += 1
|
|
526
|
+
data.append(line)
|
|
527
|
+
|
|
528
|
+
# Are we still reading the same document?
|
|
529
|
+
if doc_line_no >= doc_length:
|
|
530
|
+
# End of the document, we got a molecule!
|
|
531
|
+
yield Digichem_coords_v2.from_xyz("".join(data), **kwargs)
|
|
532
|
+
|
|
533
|
+
# Time for a new document, reset.
|
|
534
|
+
doc_length = None
|
|
535
|
+
doc_line_no = 0
|
|
536
|
+
data = []
|
|
537
|
+
|
digichem/input/gaussian.py
CHANGED
|
@@ -130,11 +130,51 @@ class Gaussian_input_parser():
|
|
|
130
130
|
|
|
131
131
|
# And anything else.
|
|
132
132
|
self.additional_sections = sections[3:]
|
|
133
|
+
|
|
134
|
+
def geometry_with_iso(self, isotopes = {}):
|
|
135
|
+
"""
|
|
136
|
+
Get the geometry of this gaussian input file with specific isotopes.
|
|
137
|
+
|
|
138
|
+
:param isotopes: Isotope information. Each key should be an atomic index, or an element symbol, and each value the corresponding isotope (as an integer).
|
|
139
|
+
"""
|
|
140
|
+
lines = []
|
|
141
|
+
for index, line in enumerate(self.geometry.split("\n")):
|
|
142
|
+
split_line = line.split()
|
|
143
|
+
atom = split_line[0]
|
|
144
|
+
if "(" in atom and ")" in atom:
|
|
145
|
+
atom = atom[atom.find("("):]
|
|
146
|
+
|
|
147
|
+
iso = None
|
|
148
|
+
if index in isotopes:
|
|
149
|
+
iso = isotopes[index]
|
|
150
|
+
|
|
151
|
+
elif atom in isotopes:
|
|
152
|
+
iso = isotopes[atom]
|
|
153
|
+
|
|
154
|
+
if iso is not None:
|
|
155
|
+
atom = "{}(iso={})".format(atom, iso)
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
lines.append(" ".join(
|
|
159
|
+
[atom, *split_line[1:]]
|
|
160
|
+
))
|
|
161
|
+
|
|
162
|
+
return "\n".join(lines)
|
|
163
|
+
|
|
133
164
|
|
|
134
165
|
@property
|
|
135
166
|
def xyz(self):
|
|
136
167
|
"""
|
|
137
168
|
Get the geometry of this gaussian input file in XYZ format.
|
|
138
169
|
"""
|
|
139
|
-
|
|
170
|
+
lines = []
|
|
171
|
+
for line in self.geometry.split("\n"):
|
|
172
|
+
split_line = line.split()
|
|
173
|
+
atom = split_line[0]
|
|
174
|
+
if "(" in atom and ")" in atom:
|
|
175
|
+
atom = atom[atom.find("("):]
|
|
176
|
+
lines.append(" ".join(
|
|
177
|
+
[atom, *split_line[1:]]
|
|
178
|
+
))
|
|
179
|
+
return "{}\n\n{}".format(len(self.geometry.split("\n")), "\n".join(lines))
|
|
140
180
|
|
digichem/misc/base.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
from itertools import chain, combinations
|
|
2
|
+
import math
|
|
2
3
|
|
|
3
4
|
def powerset(iterable):
|
|
4
5
|
"powerset([1,2,3]) --> () (1,) (2,) (3,) (1,2) (1,3) (2,3) (1,2,3)"
|
|
@@ -26,6 +27,17 @@ def regular_range(median, number, spacing):
|
|
|
26
27
|
peaks.append(median - magnitude * spacing)
|
|
27
28
|
|
|
28
29
|
return sorted(peaks)
|
|
30
|
+
|
|
31
|
+
def round_sig(number, sig_figs):
|
|
32
|
+
"""Round a number to a given number of significant figures."""
|
|
33
|
+
try:
|
|
34
|
+
return round(number, sig_figs-int(math.floor(math.log10(abs(number))))-1)
|
|
35
|
+
|
|
36
|
+
except ValueError:
|
|
37
|
+
if number == 0:
|
|
38
|
+
return 0
|
|
39
|
+
else:
|
|
40
|
+
raise
|
|
29
41
|
|
|
30
42
|
|
|
31
43
|
def dict_list_index(dictionary, item):
|
digichem/misc/io.py
CHANGED
|
@@ -322,12 +322,17 @@ class Safe_path():
|
|
|
322
322
|
# Close our file.
|
|
323
323
|
self.close()
|
|
324
324
|
|
|
325
|
-
def dir_size(target):
|
|
325
|
+
def dir_size(target, apparent = False):
|
|
326
326
|
"""
|
|
327
327
|
Calculate the total used file space of a directory and all contents.
|
|
328
328
|
"""
|
|
329
329
|
bytes = 0
|
|
330
|
-
for path in itertools.chain(Path(target).
|
|
331
|
-
|
|
330
|
+
for path in itertools.chain(Path(target).glob('**/*'), [Path(target)]):
|
|
331
|
+
stat = path.stat()
|
|
332
|
+
if not apparent and hasattr(stat, "st_blocks"):
|
|
333
|
+
bytes += stat.st_blocks * 512
|
|
334
|
+
|
|
335
|
+
else:
|
|
336
|
+
bytes += stat.st_size
|
|
332
337
|
|
|
333
338
|
return bytes
|
digichem/parse/base.py
CHANGED
|
@@ -2,6 +2,10 @@
|
|
|
2
2
|
from pathlib import Path
|
|
3
3
|
import pwd
|
|
4
4
|
import os
|
|
5
|
+
import csv
|
|
6
|
+
import numpy
|
|
7
|
+
import math
|
|
8
|
+
from scipy import signal
|
|
5
9
|
|
|
6
10
|
from digichem.exception.base import Digichem_exception
|
|
7
11
|
from digichem.result.orbital import Molecular_orbital_list,\
|
|
@@ -19,6 +23,7 @@ from digichem.result.vibration import Vibrations_list
|
|
|
19
23
|
from digichem.result.emission import Relaxed_excited_state
|
|
20
24
|
from digichem.result.nmr import NMR_shielding, NMR_spin_couplings_list, NMR_list
|
|
21
25
|
from digichem.result.alignment.base import Minimal, Alignment
|
|
26
|
+
import digichem.log
|
|
22
27
|
|
|
23
28
|
|
|
24
29
|
# NOTE: This is a repeat of the list in util to avoid circular import nonsense.
|
|
@@ -32,7 +37,7 @@ custom_parsing_formats = [
|
|
|
32
37
|
class Parser_abc():
|
|
33
38
|
"""ABC for all parsers."""
|
|
34
39
|
|
|
35
|
-
def __init__(self, *, raw_data = None, options, **kwargs):
|
|
40
|
+
def __init__(self, *, raw_data = None, options, metadata_defaults = None, profile_file = None, **kwargs):
|
|
36
41
|
"""
|
|
37
42
|
Top level constructor for calculation parsers.
|
|
38
43
|
"""
|
|
@@ -44,6 +49,12 @@ class Parser_abc():
|
|
|
44
49
|
|
|
45
50
|
# Config options.
|
|
46
51
|
self.options = options
|
|
52
|
+
|
|
53
|
+
# Manually provided overrides.
|
|
54
|
+
self.metadata_defaults = metadata_defaults if metadata_defaults is not None else {}
|
|
55
|
+
|
|
56
|
+
# Save the profiling file.
|
|
57
|
+
self.profile_file = profile_file
|
|
47
58
|
|
|
48
59
|
# Parse (if we haven't already).
|
|
49
60
|
try:
|
|
@@ -102,6 +113,126 @@ class Parser_abc():
|
|
|
102
113
|
# Add current username.
|
|
103
114
|
# TODO: It would probably be better if we used the name of the user who owns the output file, rather than the current user...
|
|
104
115
|
self.data.metadata['user'] = self.get_current_username()
|
|
116
|
+
|
|
117
|
+
# Add any user supplied defaults.
|
|
118
|
+
metadata = self.metadata_defaults.copy()
|
|
119
|
+
metadata.update(self.data.metadata)
|
|
120
|
+
self.data.metadata = metadata
|
|
121
|
+
|
|
122
|
+
# Add profiling data.
|
|
123
|
+
try:
|
|
124
|
+
self.parse_profile_file()
|
|
125
|
+
|
|
126
|
+
except Exception:
|
|
127
|
+
if self.profile_file and self.profile_file.exists():
|
|
128
|
+
digichem.log.get_logger().warning("Could not parse profile.csv file; profiling data will be unavailable", exc_info=True)
|
|
129
|
+
|
|
130
|
+
else:
|
|
131
|
+
pass
|
|
132
|
+
|
|
133
|
+
def parse_profile_file(self):
|
|
134
|
+
"""
|
|
135
|
+
"""
|
|
136
|
+
# Real calculations can end up with millions of rows, which is far too much data to handle.
|
|
137
|
+
# We will need to downsample if we have too many data points.
|
|
138
|
+
# First work out how many rows there are.
|
|
139
|
+
try:
|
|
140
|
+
with open(self.profile_file, "rb") as profile_file:
|
|
141
|
+
lines = sum(1 for _ in profile_file) -1 # Remove 1 line for the header.
|
|
142
|
+
|
|
143
|
+
except FileNotFoundError:
|
|
144
|
+
# This is ok
|
|
145
|
+
return
|
|
146
|
+
|
|
147
|
+
if lines < 2:
|
|
148
|
+
return
|
|
149
|
+
|
|
150
|
+
max_lines = self.options.parse['profiling_rows']
|
|
151
|
+
factor = math.ceil(lines / max_lines)
|
|
152
|
+
|
|
153
|
+
with open(self.profile_file) as profile_file:
|
|
154
|
+
reader = csv.reader(profile_file)
|
|
155
|
+
|
|
156
|
+
# Get the header.
|
|
157
|
+
headers = next(reader)
|
|
158
|
+
|
|
159
|
+
# Check headers match.
|
|
160
|
+
if (headers[0] == "Duration / s" and
|
|
161
|
+
headers[1] == "Memory Used (Real) / bytes" and
|
|
162
|
+
headers[2] == "Memory Used (Real) / %" and
|
|
163
|
+
headers[3] == "Memory Available (Real) / bytes" and
|
|
164
|
+
headers[4] == "Memory Available (Real) / %" and
|
|
165
|
+
headers[9] == "CPU Usage / %" and
|
|
166
|
+
headers[15] == "Output Directory Available / bytes" and
|
|
167
|
+
headers[17] == "Scratch Directory Used / bytes" and
|
|
168
|
+
headers[18] == "Scratch Directory Available / bytes"
|
|
169
|
+
):
|
|
170
|
+
column_map = {
|
|
171
|
+
"duration": 0,
|
|
172
|
+
"memory_used": 1,
|
|
173
|
+
"memory_used_percent": 2,
|
|
174
|
+
"memory_available": 3,
|
|
175
|
+
"memory_available_percent": 4,
|
|
176
|
+
"cpu_used": 9,
|
|
177
|
+
"output_available": 15,
|
|
178
|
+
"scratch_used": 17,
|
|
179
|
+
"scratch_available": 18
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
elif (headers[0] == "Duration / s" and
|
|
183
|
+
headers[1] == "Memory Used (Real) / bytes" and
|
|
184
|
+
headers[2] == "Memory Used (Real) / %" and
|
|
185
|
+
headers[3] == "Memory Available (Real) / bytes" and
|
|
186
|
+
headers[4] == "Memory Available (Real) / %" and
|
|
187
|
+
headers[9] == "CPU Usage / %" and
|
|
188
|
+
headers[15] == "Output Directory Available / bytes" and
|
|
189
|
+
headers[17] == "Scratch Directory Available / bytes"
|
|
190
|
+
):
|
|
191
|
+
column_map = {
|
|
192
|
+
"duration": 0,
|
|
193
|
+
"memory_used": 1,
|
|
194
|
+
"memory_used_percent": 2,
|
|
195
|
+
"memory_available": 3,
|
|
196
|
+
"memory_available_percent": 4,
|
|
197
|
+
"cpu_used": 9,
|
|
198
|
+
"output_available": 15,
|
|
199
|
+
"scratch_available": 17
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
else:
|
|
203
|
+
raise Digichem_exception("wrong headers found in profile.csv file")
|
|
204
|
+
|
|
205
|
+
# Then the body.
|
|
206
|
+
# TODO: Reading the entire file is not ideal...
|
|
207
|
+
data = numpy.genfromtxt(
|
|
208
|
+
profile_file,
|
|
209
|
+
delimiter=',',
|
|
210
|
+
# TODO: use something better.
|
|
211
|
+
filling_values = "0"
|
|
212
|
+
)
|
|
213
|
+
|
|
214
|
+
# We'll keep:
|
|
215
|
+
# - duration
|
|
216
|
+
# - memory used
|
|
217
|
+
# - memory used %
|
|
218
|
+
# - memory available
|
|
219
|
+
# - memory available %
|
|
220
|
+
# - cpu used
|
|
221
|
+
# - output space
|
|
222
|
+
# - scratch space
|
|
223
|
+
new_data = numpy.zeros((math.ceil(lines / factor), len(column_map)))
|
|
224
|
+
|
|
225
|
+
# Now decimate.
|
|
226
|
+
for i, k in enumerate(column_map.values()):
|
|
227
|
+
if factor > 1:
|
|
228
|
+
new_data[:, i] = signal.decimate(data[:, k], factor)
|
|
229
|
+
else:
|
|
230
|
+
new_data[:, i] = data[:, k]
|
|
231
|
+
|
|
232
|
+
|
|
233
|
+
self.data.metadata['performance'] = {
|
|
234
|
+
key: new_data[:, index] for index, key in enumerate(column_map)
|
|
235
|
+
}
|
|
105
236
|
|
|
106
237
|
def process_all(self):
|
|
107
238
|
"""
|
|
@@ -181,7 +312,7 @@ class Parser_abc():
|
|
|
181
312
|
class File_parser_abc(Parser_abc):
|
|
182
313
|
"""ABC for all parsers."""
|
|
183
314
|
|
|
184
|
-
def __init__(self, *log_files, raw_data = None, **kwargs):
|
|
315
|
+
def __init__(self, *log_files, raw_data = None, metadata_defaults = None, **kwargs):
|
|
185
316
|
"""
|
|
186
317
|
Top level constructor for calculation parsers.
|
|
187
318
|
|
|
@@ -189,12 +320,12 @@ class File_parser_abc(Parser_abc):
|
|
|
189
320
|
"""
|
|
190
321
|
# Set our name.
|
|
191
322
|
self.log_file_paths = self.sort_log_files([Path(log_file) for log_file in log_files if log_file is not None])
|
|
192
|
-
|
|
323
|
+
|
|
193
324
|
# Panic if we have no logs.
|
|
194
325
|
if len(self.log_file_paths) == 0:
|
|
195
326
|
raise Digichem_exception("Cannot parse calculation output; no available log files. Are you sure the given path is a log file or directory containing log files?")
|
|
196
327
|
|
|
197
|
-
super().__init__(raw_data=raw_data, **kwargs)
|
|
328
|
+
super().__init__(raw_data=raw_data, metadata_defaults = metadata_defaults, **kwargs)
|
|
198
329
|
|
|
199
330
|
@classmethod
|
|
200
331
|
def from_logs(self, *log_files, **kwargs):
|
digichem/parse/cclib.py
CHANGED
|
@@ -1,9 +1,5 @@
|
|
|
1
1
|
from pathlib import Path
|
|
2
2
|
import itertools
|
|
3
|
-
import csv
|
|
4
|
-
import numpy
|
|
5
|
-
import math
|
|
6
|
-
from scipy import signal
|
|
7
3
|
|
|
8
4
|
import digichem.log
|
|
9
5
|
from digichem.parse.base import File_parser_abc
|
|
@@ -22,7 +18,7 @@ class Cclib_parser(File_parser_abc):
|
|
|
22
18
|
# A dictionary of recognised auxiliary file types.
|
|
23
19
|
INPUT_FILE_TYPES = {}
|
|
24
20
|
|
|
25
|
-
def __init__(self, *log_files, options, **auxiliary_files):
|
|
21
|
+
def __init__(self, *log_files, options, metadata_defaults = None, **auxiliary_files):
|
|
26
22
|
"""
|
|
27
23
|
Top level constructor for calculation parsers.
|
|
28
24
|
|
|
@@ -31,12 +27,8 @@ class Cclib_parser(File_parser_abc):
|
|
|
31
27
|
"""
|
|
32
28
|
# Also save our aux files, stripping None.
|
|
33
29
|
self.auxiliary_files = {name: aux_file for name,aux_file in auxiliary_files.items() if aux_file is not None}
|
|
34
|
-
|
|
35
|
-
# TODO: Does this belong here?
|
|
36
|
-
# Also have a look for a profile.csv file that we can us for performance metrics.
|
|
37
|
-
self.profile_file = Path(log_files[0].parent, "../Logs/profile.csv")
|
|
38
30
|
|
|
39
|
-
super().__init__(*log_files, options = options)
|
|
31
|
+
super().__init__(*log_files, options = options, metadata_defaults = metadata_defaults, profile_file = Path(log_files[0].parent, "../Logs/profile.csv"))
|
|
40
32
|
|
|
41
33
|
@classmethod
|
|
42
34
|
def from_logs(self, *log_files, hints = None, options, **kwargs):
|
|
@@ -132,111 +124,6 @@ class Cclib_parser(File_parser_abc):
|
|
|
132
124
|
|
|
133
125
|
else:
|
|
134
126
|
pass
|
|
135
|
-
|
|
136
|
-
def parse_profile_file(self):
|
|
137
|
-
"""
|
|
138
|
-
"""
|
|
139
|
-
# Real calculations can end up with millions of rows, which is far too much data to handle.
|
|
140
|
-
# We will need to downsample if we have too many data points.
|
|
141
|
-
# First work out how many rows there are.
|
|
142
|
-
try:
|
|
143
|
-
with open(self.profile_file, "rb") as profile_file:
|
|
144
|
-
lines = sum(1 for _ in profile_file) -1 # Remove 1 line for the header.
|
|
145
|
-
|
|
146
|
-
except FileNotFoundError:
|
|
147
|
-
# This is ok
|
|
148
|
-
return
|
|
149
|
-
|
|
150
|
-
if lines < 2:
|
|
151
|
-
return
|
|
152
|
-
|
|
153
|
-
max_lines = self.options.parse['profiling_rows']
|
|
154
|
-
factor = math.ceil(lines / max_lines)
|
|
155
|
-
|
|
156
|
-
with open(self.profile_file) as profile_file:
|
|
157
|
-
reader = csv.reader(profile_file)
|
|
158
|
-
|
|
159
|
-
# Get the header.
|
|
160
|
-
headers = next(reader)
|
|
161
|
-
|
|
162
|
-
# Check headers match.
|
|
163
|
-
if (headers[0] == "Duration / s" and
|
|
164
|
-
headers[1] == "Memory Used (Real) / bytes" and
|
|
165
|
-
headers[2] == "Memory Used (Real) / %" and
|
|
166
|
-
headers[3] == "Memory Available (Real) / bytes" and
|
|
167
|
-
headers[4] == "Memory Available (Real) / %" and
|
|
168
|
-
headers[9] == "CPU Usage / %" and
|
|
169
|
-
headers[15] == "Output Directory Available / bytes" and
|
|
170
|
-
headers[17] == "Scratch Directory Used / bytes" and
|
|
171
|
-
headers[18] == "Scratch Directory Available / bytes"
|
|
172
|
-
):
|
|
173
|
-
column_map = {
|
|
174
|
-
"duration": 0,
|
|
175
|
-
"memory_used": 1,
|
|
176
|
-
"memory_used_percent": 2,
|
|
177
|
-
"memory_available": 3,
|
|
178
|
-
"memory_available_percent": 4,
|
|
179
|
-
"cpu_used": 9,
|
|
180
|
-
"output_available": 15,
|
|
181
|
-
"scratch_used": 17,
|
|
182
|
-
"scratch_available": 18
|
|
183
|
-
}
|
|
184
|
-
|
|
185
|
-
elif (headers[0] == "Duration / s" and
|
|
186
|
-
headers[1] == "Memory Used (Real) / bytes" and
|
|
187
|
-
headers[2] == "Memory Used (Real) / %" and
|
|
188
|
-
headers[3] == "Memory Available (Real) / bytes" and
|
|
189
|
-
headers[4] == "Memory Available (Real) / %" and
|
|
190
|
-
headers[9] == "CPU Usage / %" and
|
|
191
|
-
headers[15] == "Output Directory Available / bytes" and
|
|
192
|
-
headers[17] == "Scratch Directory Available / bytes"
|
|
193
|
-
):
|
|
194
|
-
column_map = {
|
|
195
|
-
"duration": 0,
|
|
196
|
-
"memory_used": 1,
|
|
197
|
-
"memory_used_percent": 2,
|
|
198
|
-
"memory_available": 3,
|
|
199
|
-
"memory_available_percent": 4,
|
|
200
|
-
"cpu_used": 9,
|
|
201
|
-
"output_available": 15,
|
|
202
|
-
"scratch_available": 17
|
|
203
|
-
}
|
|
204
|
-
|
|
205
|
-
else:
|
|
206
|
-
raise Digichem_exception("wrong headers found in profile.csv file")
|
|
207
|
-
|
|
208
|
-
# Then the body.
|
|
209
|
-
# TODO: Reading the entire file is not ideal...
|
|
210
|
-
data = numpy.genfromtxt(
|
|
211
|
-
profile_file,
|
|
212
|
-
delimiter=',',
|
|
213
|
-
# TODO: use something better.
|
|
214
|
-
filling_values = "0"
|
|
215
|
-
)
|
|
216
|
-
|
|
217
|
-
# We'll keep:
|
|
218
|
-
# - duration
|
|
219
|
-
# - memory used
|
|
220
|
-
# - memory used %
|
|
221
|
-
# - memory available
|
|
222
|
-
# - memory available %
|
|
223
|
-
# - cpu used
|
|
224
|
-
# - output space
|
|
225
|
-
# - scratch space
|
|
226
|
-
new_data = numpy.zeros((math.ceil(lines / factor), len(column_map)))
|
|
227
|
-
|
|
228
|
-
# Now decimate.
|
|
229
|
-
for i, k in enumerate(column_map.values()):
|
|
230
|
-
if factor > 1:
|
|
231
|
-
new_data[:, i] = signal.decimate(data[:, k], factor)
|
|
232
|
-
else:
|
|
233
|
-
new_data[:, i] = data[:, k]
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
self.data.metadata['performance'] = {
|
|
237
|
-
key: new_data[:, index] for index, key in enumerate(column_map)
|
|
238
|
-
}
|
|
239
|
-
|
|
240
127
|
|
|
241
128
|
def parse_output_line(self, log_file, line):
|
|
242
129
|
"""
|