digichem-core 7.2.0__py3-none-any.whl → 7.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
digichem/__init__.py CHANGED
@@ -11,7 +11,7 @@ from digichem.datas import get_resource
11
11
  ####################
12
12
 
13
13
 
14
- __version__ = "7.2.0"
14
+ __version__ = "7.3.0"
15
15
  _v_parts = __version__.split("-")[0].split(".")
16
16
  major_version = int(_v_parts[0])
17
17
  minor_version = int(_v_parts[1])
@@ -30,7 +30,7 @@ __author__ = [
30
30
  ]
31
31
 
32
32
  # Program date (when we were last updated). This is changed automatically.
33
- _last_updated_string = "18/12/2025"
33
+ _last_updated_string = "02/02/2026"
34
34
  last_updated = datetime.strptime(_last_updated_string, "%d/%m/%Y")
35
35
 
36
36
  # The sys attribute 'frozen' is our flag, '_MEIPASS' is the dir location.
digichem/datas.py CHANGED
@@ -1,8 +1,12 @@
1
1
  import atexit
2
2
  from contextlib import ExitStack
3
3
 
4
- # TODO: Can switch to non-backport importlib.resources once >= 3.9
5
- import importlib_resources
4
+ try:
5
+ import importlib.resources
6
+ importlib_resources = importlib.resources
7
+
8
+ except ImportError:
9
+ import importlib_resources
6
10
 
7
11
  def get_resource(name):
8
12
  """
digichem/image/render.py CHANGED
@@ -68,7 +68,8 @@ class Render_maker(File_converter, Cropable_mixin):
68
68
  self.target_resolution = resolution
69
69
  self.also_make_png = also_make_png
70
70
  self.isovalue = isovalue
71
- self.num_cpu = num_cpu
71
+ # TODO: Should we not guess the number of CPUs required?
72
+ self.num_cpu = num_cpu if num_cpu is not None else num_cpu
72
73
 
73
74
  # TODO: These.
74
75
  self.primary_colour = "red"
@@ -1,3 +1,3 @@
1
1
  from .base import Input_file
2
2
  from .gaussian import Gaussian_input_parser
3
- from .digichem_input import Digichem_coords_ABC, Digichem_coords, si_from_yaml, si_from_file, si_from_data
3
+ from .digichem_input import Digichem_coords_ABC, Digichem_coords, si_from_yaml, si_from_file, si_from_data, si_iter_from_xyz
@@ -502,4 +502,36 @@ def si_from_file(file_name, file_type = None, *, gen3D = None, **kwargs):
502
502
 
503
503
  except:
504
504
  raise ValueError("Could not parse coordinates from '{}'".format(file_name))
505
-
505
+
506
+
507
+ def si_iter_from_xyz(file_name, **kwargs):
508
+ """
509
+ Return a generator that yields Digichem_coords objects from the molecules in an xyz file.
510
+
511
+ :param file_name: The XYZ file to read from. If the file contains multiple structures, they will be read in sequence.
512
+ """
513
+ with open(file_name, "rt") as file:
514
+ # How many lines are we expecting in the current molecule.
515
+ doc_length = None
516
+ doc_line_no = 0
517
+ data = []
518
+
519
+ for line in file:
520
+ if doc_length is None:
521
+ # New document, how big is it?
522
+ doc_length = int(line.strip()) +2
523
+
524
+ # What line of the document are we on right now?
525
+ doc_line_no += 1
526
+ data.append(line)
527
+
528
+ # Are we still reading the same document?
529
+ if doc_line_no >= doc_length:
530
+ # End of the document, we got a molecule!
531
+ yield Digichem_coords_v2.from_xyz("".join(data), **kwargs)
532
+
533
+ # Time for a new document, reset.
534
+ doc_length = None
535
+ doc_line_no = 0
536
+ data = []
537
+
digichem/misc/io.py CHANGED
@@ -322,12 +322,17 @@ class Safe_path():
322
322
  # Close our file.
323
323
  self.close()
324
324
 
325
- def dir_size(target):
325
+ def dir_size(target, apparent = False):
326
326
  """
327
327
  Calculate the total used file space of a directory and all contents.
328
328
  """
329
329
  bytes = 0
330
- for path in itertools.chain(Path(target).rglob("*"), [Path(target)]):
331
- bytes += path.stat().st_size
330
+ for path in itertools.chain(Path(target).glob('**/*'), [Path(target)]):
331
+ stat = path.stat()
332
+ if not apparent and hasattr(stat, "st_blocks"):
333
+ bytes += stat.st_blocks * 512
334
+
335
+ else:
336
+ bytes += stat.st_size
332
337
 
333
338
  return bytes
digichem/parse/base.py CHANGED
@@ -2,6 +2,10 @@
2
2
  from pathlib import Path
3
3
  import pwd
4
4
  import os
5
+ import csv
6
+ import numpy
7
+ import math
8
+ from scipy import signal
5
9
 
6
10
  from digichem.exception.base import Digichem_exception
7
11
  from digichem.result.orbital import Molecular_orbital_list,\
@@ -19,6 +23,7 @@ from digichem.result.vibration import Vibrations_list
19
23
  from digichem.result.emission import Relaxed_excited_state
20
24
  from digichem.result.nmr import NMR_shielding, NMR_spin_couplings_list, NMR_list
21
25
  from digichem.result.alignment.base import Minimal, Alignment
26
+ import digichem.log
22
27
 
23
28
 
24
29
  # NOTE: This is a repeat of the list in util to avoid circular import nonsense.
@@ -32,7 +37,7 @@ custom_parsing_formats = [
32
37
  class Parser_abc():
33
38
  """ABC for all parsers."""
34
39
 
35
- def __init__(self, *, raw_data = None, options, metadata_defaults = None, **kwargs):
40
+ def __init__(self, *, raw_data = None, options, metadata_defaults = None, profile_file = None, **kwargs):
36
41
  """
37
42
  Top level constructor for calculation parsers.
38
43
  """
@@ -48,6 +53,9 @@ class Parser_abc():
48
53
  # Manually provided overrides.
49
54
  self.metadata_defaults = metadata_defaults if metadata_defaults is not None else {}
50
55
 
56
+ # Save the profiling file.
57
+ self.profile_file = profile_file
58
+
51
59
  # Parse (if we haven't already).
52
60
  try:
53
61
  if self.data is None:
@@ -110,6 +118,121 @@ class Parser_abc():
110
118
  metadata = self.metadata_defaults.copy()
111
119
  metadata.update(self.data.metadata)
112
120
  self.data.metadata = metadata
121
+
122
+ # Add profiling data.
123
+ try:
124
+ self.parse_profile_file()
125
+
126
+ except Exception:
127
+ if self.profile_file and self.profile_file.exists():
128
+ digichem.log.get_logger().warning("Could not parse profile.csv file; profiling data will be unavailable", exc_info=True)
129
+
130
+ else:
131
+ pass
132
+
133
+ def parse_profile_file(self):
134
+ """
135
+ """
136
+ # Real calculations can end up with millions of rows, which is far too much data to handle.
137
+ # We will need to downsample if we have too many data points.
138
+ # First work out how many rows there are.
139
+ try:
140
+ with open(self.profile_file, "rb") as profile_file:
141
+ lines = sum(1 for _ in profile_file) -1 # Remove 1 line for the header.
142
+
143
+ except FileNotFoundError:
144
+ # This is ok
145
+ return
146
+
147
+ if lines < 2:
148
+ return
149
+
150
+ max_lines = self.options.parse['profiling_rows']
151
+ factor = math.ceil(lines / max_lines)
152
+
153
+ with open(self.profile_file) as profile_file:
154
+ reader = csv.reader(profile_file)
155
+
156
+ # Get the header.
157
+ headers = next(reader)
158
+
159
+ # Check headers match.
160
+ if (headers[0] == "Duration / s" and
161
+ headers[1] == "Memory Used (Real) / bytes" and
162
+ headers[2] == "Memory Used (Real) / %" and
163
+ headers[3] == "Memory Available (Real) / bytes" and
164
+ headers[4] == "Memory Available (Real) / %" and
165
+ headers[9] == "CPU Usage / %" and
166
+ headers[15] == "Output Directory Available / bytes" and
167
+ headers[17] == "Scratch Directory Used / bytes" and
168
+ headers[18] == "Scratch Directory Available / bytes"
169
+ ):
170
+ column_map = {
171
+ "duration": 0,
172
+ "memory_used": 1,
173
+ "memory_used_percent": 2,
174
+ "memory_available": 3,
175
+ "memory_available_percent": 4,
176
+ "cpu_used": 9,
177
+ "output_available": 15,
178
+ "scratch_used": 17,
179
+ "scratch_available": 18
180
+ }
181
+
182
+ elif (headers[0] == "Duration / s" and
183
+ headers[1] == "Memory Used (Real) / bytes" and
184
+ headers[2] == "Memory Used (Real) / %" and
185
+ headers[3] == "Memory Available (Real) / bytes" and
186
+ headers[4] == "Memory Available (Real) / %" and
187
+ headers[9] == "CPU Usage / %" and
188
+ headers[15] == "Output Directory Available / bytes" and
189
+ headers[17] == "Scratch Directory Available / bytes"
190
+ ):
191
+ column_map = {
192
+ "duration": 0,
193
+ "memory_used": 1,
194
+ "memory_used_percent": 2,
195
+ "memory_available": 3,
196
+ "memory_available_percent": 4,
197
+ "cpu_used": 9,
198
+ "output_available": 15,
199
+ "scratch_available": 17
200
+ }
201
+
202
+ else:
203
+ raise Digichem_exception("wrong headers found in profile.csv file")
204
+
205
+ # Then the body.
206
+ # TODO: Reading the entire file is not ideal...
207
+ data = numpy.genfromtxt(
208
+ profile_file,
209
+ delimiter=',',
210
+ # TODO: use something better.
211
+ filling_values = "0"
212
+ )
213
+
214
+ # We'll keep:
215
+ # - duration
216
+ # - memory used
217
+ # - memory used %
218
+ # - memory available
219
+ # - memory available %
220
+ # - cpu used
221
+ # - output space
222
+ # - scratch space
223
+ new_data = numpy.zeros((math.ceil(lines / factor), len(column_map)))
224
+
225
+ # Now decimate.
226
+ for i, k in enumerate(column_map.values()):
227
+ if factor > 1:
228
+ new_data[:, i] = signal.decimate(data[:, k], factor)
229
+ else:
230
+ new_data[:, i] = data[:, k]
231
+
232
+
233
+ self.data.metadata['performance'] = {
234
+ key: new_data[:, index] for index, key in enumerate(column_map)
235
+ }
113
236
 
114
237
  def process_all(self):
115
238
  """
@@ -197,7 +320,7 @@ class File_parser_abc(Parser_abc):
197
320
  """
198
321
  # Set our name.
199
322
  self.log_file_paths = self.sort_log_files([Path(log_file) for log_file in log_files if log_file is not None])
200
-
323
+
201
324
  # Panic if we have no logs.
202
325
  if len(self.log_file_paths) == 0:
203
326
  raise Digichem_exception("Cannot parse calculation output; no available log files. Are you sure the given path is a log file or directory containing log files?")
digichem/parse/cclib.py CHANGED
@@ -1,9 +1,5 @@
1
1
  from pathlib import Path
2
2
  import itertools
3
- import csv
4
- import numpy
5
- import math
6
- from scipy import signal
7
3
 
8
4
  import digichem.log
9
5
  from digichem.parse.base import File_parser_abc
@@ -31,12 +27,8 @@ class Cclib_parser(File_parser_abc):
31
27
  """
32
28
  # Also save our aux files, stripping None.
33
29
  self.auxiliary_files = {name: aux_file for name,aux_file in auxiliary_files.items() if aux_file is not None}
34
-
35
- # TODO: Does this belong here?
36
- # Also have a look for a profile.csv file that we can us for performance metrics.
37
- self.profile_file = Path(log_files[0].parent, "../Logs/profile.csv")
38
30
 
39
- super().__init__(*log_files, options = options, metadata_defaults = metadata_defaults)
31
+ super().__init__(*log_files, options = options, metadata_defaults = metadata_defaults, profile_file = Path(log_files[0].parent, "../Logs/profile.csv"))
40
32
 
41
33
  @classmethod
42
34
  def from_logs(self, *log_files, hints = None, options, **kwargs):
@@ -132,111 +124,6 @@ class Cclib_parser(File_parser_abc):
132
124
 
133
125
  else:
134
126
  pass
135
-
136
- def parse_profile_file(self):
137
- """
138
- """
139
- # Real calculations can end up with millions of rows, which is far too much data to handle.
140
- # We will need to downsample if we have too many data points.
141
- # First work out how many rows there are.
142
- try:
143
- with open(self.profile_file, "rb") as profile_file:
144
- lines = sum(1 for _ in profile_file) -1 # Remove 1 line for the header.
145
-
146
- except FileNotFoundError:
147
- # This is ok
148
- return
149
-
150
- if lines < 2:
151
- return
152
-
153
- max_lines = self.options.parse['profiling_rows']
154
- factor = math.ceil(lines / max_lines)
155
-
156
- with open(self.profile_file) as profile_file:
157
- reader = csv.reader(profile_file)
158
-
159
- # Get the header.
160
- headers = next(reader)
161
-
162
- # Check headers match.
163
- if (headers[0] == "Duration / s" and
164
- headers[1] == "Memory Used (Real) / bytes" and
165
- headers[2] == "Memory Used (Real) / %" and
166
- headers[3] == "Memory Available (Real) / bytes" and
167
- headers[4] == "Memory Available (Real) / %" and
168
- headers[9] == "CPU Usage / %" and
169
- headers[15] == "Output Directory Available / bytes" and
170
- headers[17] == "Scratch Directory Used / bytes" and
171
- headers[18] == "Scratch Directory Available / bytes"
172
- ):
173
- column_map = {
174
- "duration": 0,
175
- "memory_used": 1,
176
- "memory_used_percent": 2,
177
- "memory_available": 3,
178
- "memory_available_percent": 4,
179
- "cpu_used": 9,
180
- "output_available": 15,
181
- "scratch_used": 17,
182
- "scratch_available": 18
183
- }
184
-
185
- elif (headers[0] == "Duration / s" and
186
- headers[1] == "Memory Used (Real) / bytes" and
187
- headers[2] == "Memory Used (Real) / %" and
188
- headers[3] == "Memory Available (Real) / bytes" and
189
- headers[4] == "Memory Available (Real) / %" and
190
- headers[9] == "CPU Usage / %" and
191
- headers[15] == "Output Directory Available / bytes" and
192
- headers[17] == "Scratch Directory Available / bytes"
193
- ):
194
- column_map = {
195
- "duration": 0,
196
- "memory_used": 1,
197
- "memory_used_percent": 2,
198
- "memory_available": 3,
199
- "memory_available_percent": 4,
200
- "cpu_used": 9,
201
- "output_available": 15,
202
- "scratch_available": 17
203
- }
204
-
205
- else:
206
- raise Digichem_exception("wrong headers found in profile.csv file")
207
-
208
- # Then the body.
209
- # TODO: Reading the entire file is not ideal...
210
- data = numpy.genfromtxt(
211
- profile_file,
212
- delimiter=',',
213
- # TODO: use something better.
214
- filling_values = "0"
215
- )
216
-
217
- # We'll keep:
218
- # - duration
219
- # - memory used
220
- # - memory used %
221
- # - memory available
222
- # - memory available %
223
- # - cpu used
224
- # - output space
225
- # - scratch space
226
- new_data = numpy.zeros((math.ceil(lines / factor), len(column_map)))
227
-
228
- # Now decimate.
229
- for i, k in enumerate(column_map.values()):
230
- if factor > 1:
231
- new_data[:, i] = signal.decimate(data[:, k], factor)
232
- else:
233
- new_data[:, i] = data[:, k]
234
-
235
-
236
- self.data.metadata['performance'] = {
237
- key: new_data[:, index] for index, key in enumerate(column_map)
238
- }
239
-
240
127
 
241
128
  def parse_output_line(self, log_file, line):
242
129
  """
@@ -0,0 +1,135 @@
1
+ import hashlib
2
+ import json
3
+ import datetime
4
+ import periodictable
5
+ from uuid import uuid4
6
+ import types
7
+
8
+ from digichem.parse.base import Parser_abc
9
+ import digichem.log
10
+ from digichem.input.digichem_input import si_from_file
11
+
12
+
13
+ class Censo_parser(Parser_abc):
14
+ """
15
+ Top level class for parsing output from censo data.
16
+ """
17
+
18
+ def __init__(self, mol_name, program, **kwargs):
19
+ self.program = program
20
+ self.mol_name = mol_name
21
+ super().__init__(**kwargs)
22
+
23
+ def _parse(self):
24
+ """
25
+ Extract results from our output files.
26
+ """
27
+ self.data = types.SimpleNamespace()
28
+
29
+ # Censo calculations are normally made up of four distinct phases (prescreening, screening, optimisation, and refinement).
30
+ calculations = []
31
+ methods = []
32
+ functional = []
33
+ basis_set = []
34
+ engines = []
35
+
36
+ if self.program.calculation.properties['prescreening']['calc']:
37
+ calculations.append("Screening")
38
+ methods.append(self.program.calculation.properties['prescreening']['gfn'])
39
+ methods.append("DFT")
40
+ engines.append(self.program.calculation.properties['prescreening']['engine'])
41
+ functional.append(self.program.calculation.properties['prescreening']['functional'])
42
+ basis_set.append(self.program.calculation.properties['prescreening']['basis_set'])
43
+
44
+ if self.program.calculation.properties['screening']['calc']:
45
+ calculations.append("Screening")
46
+ methods.append(self.program.calculation.properties['screening']['gfn'])
47
+ methods.append("DFT")
48
+ engines.append(self.program.calculation.properties['screening']['engine'])
49
+ functional.append(self.program.calculation.properties['screening']['functional'])
50
+ basis_set.append(self.program.calculation.properties['screening']['basis_set'])
51
+
52
+ if self.program.calculation.properties['optimisation']['calc']:
53
+ calculations.append("Optimisation")
54
+ methods.append(self.program.calculation.properties['optimisation']['gfn'])
55
+ methods.append("DFT")
56
+ engines.append(self.program.calculation.properties['optimisation']['engine'])
57
+ functional.append(self.program.calculation.properties['optimisation']['functional'])
58
+ basis_set.append(self.program.calculation.properties['optimisation']['basis_set'])
59
+
60
+ if self.program.calculation.properties['refinement']['calc']:
61
+ calculations.append("Single Point")
62
+ methods.append(self.program.calculation.properties['refinement']['gfn'])
63
+ methods.append("DFT")
64
+ engines.append(self.program.calculation.properties['refinement']['engine'])
65
+ functional.append(self.program.calculation.properties['refinement']['functional'])
66
+ basis_set.append(self.program.calculation.properties['refinement']['basis_set'])
67
+
68
+ engines = ["CENSO"] + list(set(engines))
69
+
70
+ # Metadata we can get entirely from our passed in program object.
71
+ self.data.metadata = {
72
+ "name": self.mol_name,
73
+ "jobId": self.program.calculation.job_id,
74
+ "wall_time": [self.program.duration],
75
+ "cpu_time": [self.program.duration * self.program.calculation.performance['num_cpu']],
76
+ "date": datetime.datetime.now(datetime.timezone.utc).timestamp(),
77
+ "package": "/".join(engines),
78
+ #"package_version": None,
79
+ "calculations": calculations,
80
+ "success": not self.program.error,
81
+ "methods": methods,
82
+ "functional": "/".join(functional),
83
+ "basis_set": "/".join(basis_set),
84
+ "charge": self.program.calculation.charge,
85
+ "multiplicity": self.program.calculation.multiplicity,
86
+ "optimisation_converged": not self.program.error,
87
+ #"temperature": None,
88
+ #"pressure": None,
89
+ "orbital_spin_type": "restricted" if self.program.calculation.multiplicity == 1 else "unrestricted",
90
+ "solvent_name": str(self.program.calculation.solution['solvent']) if self.program.calculation.solution['calc'] else None,
91
+ "solvent_model": self.program.calculation.solution['model'] if self.program.calculation.solution['calc'] else None,
92
+ "num_cpu": self.program.calculation.performance['num_cpu'],
93
+ #"memory_used": None,
94
+ "memory_available": self.program.calculation.performance['memory'],
95
+ }
96
+
97
+ try:
98
+ coord_file = self.program.next_coords
99
+
100
+ except Exception:
101
+ coord_file = None
102
+
103
+ if coord_file:
104
+ main_si = si_from_file(coord_file, gen3D = False, charge = self.program.calculation.charge, multiplicity = self.program.calculation.multiplicity)
105
+
106
+ self.data.atomnos = []
107
+ self.data.atomcoords = [[]]
108
+
109
+ for atom in main_si.atoms:
110
+ self.data.atomnos.append(periodictable.elements.symbol(atom['atom']).number)
111
+ self.data.atomcoords[0].append(
112
+ [
113
+ atom['x'],
114
+ atom['y'],
115
+ atom['z']
116
+ ]
117
+ )
118
+
119
+ def post_parse(self):
120
+ """
121
+ Perform any required operations after line-by-line parsing.
122
+ """
123
+ super().post_parse()
124
+
125
+ try:
126
+ # Try to generate a checksum from metadata.
127
+ self.data._id = hashlib.sha1(json.dumps(self.data.metadata, sort_keys = True, default = str).encode('utf-8')).hexdigest()
128
+
129
+ except Exception:
130
+ # No luck, something in metadata must be unhashable.
131
+ digichem.log.get_logger().error("Unable to generate hash ID from calculation metadata, using random ID instead", exc_info = True)
132
+ # TODO: Think of a better way to do this.
133
+ self.data._id = hashlib.sha1(uuid4().hex.encode('utf-8')).hexdigest()
134
+
135
+
@@ -0,0 +1,89 @@
1
+ import hashlib
2
+ import json
3
+ import datetime
4
+ import periodictable
5
+ from uuid import uuid4
6
+ import types
7
+
8
+ from digichem.parse.base import Parser_abc
9
+ import digichem.log
10
+ from digichem.input.digichem_input import si_from_file
11
+
12
+
13
+
14
+ class Crest_parser(Parser_abc):
15
+ """
16
+ Top level class for parsing output from crest data.
17
+ """
18
+
19
+ def __init__(self, mol_name, program, **kwargs):
20
+ self.program = program
21
+ self.mol_name = mol_name
22
+ super().__init__(**kwargs)
23
+
24
+ def _parse(self):
25
+ """
26
+ Extract results from our output files.
27
+ """
28
+ self.data = types.SimpleNamespace()
29
+
30
+ # Metadata we can get entirely from our passed in program object.
31
+ self.data.metadata = {
32
+ "name": self.mol_name,
33
+ "jobId": self.program.calculation.job_id,
34
+ "wall_time": [self.program.duration],
35
+ "cpu_time": [self.program.duration * self.program.calculation.performance['num_cpu']],
36
+ "date": datetime.datetime.now(datetime.timezone.utc).timestamp(),
37
+ "package": "CREST",
38
+ #"package_version": None,
39
+ "calculations": ["Optimisation"],
40
+ "success": not self.program.error,
41
+ "methods": [self.program.calculation.method['gfn']['level']],
42
+ "charge": self.program.calculation.charge,
43
+ "multiplicity": self.program.calculation.multiplicity,
44
+ "optimisation_converged": not self.program.error,
45
+ #"temperature": None,
46
+ #"pressure": None,
47
+ # TODO: CREST doesn't actually use orbitals...
48
+ "orbital_spin_type": "restricted",
49
+ "solvent_name": str(self.program.calculation.solution['solvent']) if self.program.calculation.solution['calc'] else None,
50
+ "solvent_model": self.program.calculation.solution['model'] if self.program.calculation.solution['calc'] else None,
51
+ "num_cpu": self.program.calculation.performance['num_cpu'],
52
+ #"memory_used": None,
53
+ "memory_available": self.program.calculation.performance['memory'],
54
+ }
55
+
56
+ # Crest calculations are normally used for conformer searching, so we should expect multiple structures as output.
57
+ # We'll use the lowest energy conformer as our 'main' structure.
58
+ main_si = si_from_file(self.program.working_directory / "crest_conformers.xyz", gen3D = False, charge = self.program.calculation.charge, multiplicity = self.program.calculation.multiplicity)
59
+
60
+ self.data.atomnos = []
61
+ self.data.atomcoords = [[]]
62
+
63
+ for atom in main_si.atoms:
64
+ self.data.atomnos.append(periodictable.elements.symbol(atom['atom']).number)
65
+ self.data.atomcoords[0].append(
66
+ [
67
+ atom['x'],
68
+ atom['y'],
69
+ atom['z']
70
+ ]
71
+ )
72
+
73
+ def post_parse(self):
74
+ """
75
+ Perform any required operations after line-by-line parsing.
76
+ """
77
+ super().post_parse()
78
+
79
+ try:
80
+ # Try to generate a checksum from metadata.
81
+ self.data._id = hashlib.sha1(json.dumps(self.data.metadata, sort_keys = True, default = str).encode('utf-8')).hexdigest()
82
+
83
+ except Exception:
84
+ # No luck, something in metadata must be unhashable.
85
+ digichem.log.get_logger().error("Unable to generate hash ID from calculation metadata, using random ID instead", exc_info = True)
86
+ # TODO: Think of a better way to do this.
87
+ self.data._id = hashlib.sha1(uuid4().hex.encode('utf-8')).hexdigest()
88
+
89
+
digichem/parse/pyscf.py CHANGED
@@ -7,6 +7,7 @@ from cclib.bridge.cclib2pyscf import cclibfrommethods
7
7
  from digichem.parse.base import Parser_abc
8
8
  import digichem.log
9
9
 
10
+
10
11
  class Pyscf_parser(Parser_abc):
11
12
  """
12
13
  Top level class for parsing output from pyscf data.
digichem/result/atom.py CHANGED
@@ -318,28 +318,30 @@ class Atom_list(Result_container, Unmergeable_container_mixin, Molecule_mixin):
318
318
  return self._smiles
319
319
 
320
320
  except AttributeError:
321
- # Cache miss, go do some work.
321
+ pass
322
+
323
+ # Cache miss, go do some work.
322
324
 
323
- from rdkit.Chem import MolToSmiles
324
- from rdkit.Chem.rdmolops import RemoveHs
325
-
326
- # TODO: Find some other way of generating SMILES.
325
+ from rdkit.Chem import MolToSmiles
326
+ from rdkit.Chem.rdmolops import RemoveHs
327
+
328
+ # TODO: Find some other way of generating SMILES.
327
329
 
328
- mol = self.to_rdkit_molecule()
329
- try:
330
- # TODO: rdkit is unreliable, this method can fail for lots of reasons...
331
- mol = RemoveHs(mol)
332
- except Exception:
333
- pass
334
-
335
- self._smiles = MolToSmiles(mol)
336
- return self._smiles
330
+ mol = self.to_rdkit_molecule()
331
+ try:
332
+ # TODO: rdkit is unreliable, this method can fail for lots of reasons...
333
+ mol = RemoveHs(mol)
334
+ except Exception:
335
+ pass
336
+
337
+ self._smiles = MolToSmiles(mol)
338
+ return self._smiles
337
339
 
338
- # # TODO: Handle cases where obabel isn't available
339
- # conv = Openprattle_converter.get_cls("xyz")(input_file = self.to_xyz(), input_file_type = "xyz")
340
- # # Cache the result in case we need it again.
341
- # self._smiles = conv.convert("can").strip()
342
- # return self._smiles
340
+ # # TODO: Handle cases where obabel isn't available
341
+ # conv = Openprattle_converter.get_cls("xyz")(input_file = self.to_xyz(), input_file_type = "xyz")
342
+ # # Cache the result in case we need it again.
343
+ # self._smiles = conv.convert("can").strip()
344
+ # return self._smiles
343
345
 
344
346
  @property
345
347
  def X_length(self):
@@ -567,9 +569,16 @@ class Atom_list(Result_container, Unmergeable_container_mixin, Molecule_mixin):
567
569
  rdDetermineBonds.DetermineBonds(mol, charge = self.charge)
568
570
 
569
571
  except Exception:
572
+ #formula_string may also not be implemented...
573
+ try:
574
+ formula_string = self.formula_string
575
+
576
+ except Exception:
577
+ formula_string = None
578
+
570
579
  # This function is not implemented for some atoms (eg, Se).
571
580
  digichem.log.get_logger().warning(
572
- "Unable to determine bond ordering for molecule; all bonds will be represented as single bonds only".format(self.formula_string)
581
+ "Unable to determine bond ordering for '{}'; all bonds will be represented as single bonds only".format(formula_string)
573
582
  , exc_info = True
574
583
  )
575
584
 
@@ -354,6 +354,9 @@ class Metadata(Result_object):
354
354
  calculations = []
355
355
  if "Single Point" in self.calculations:
356
356
  calculations.append("single point energy")
357
+
358
+ if "Screening" in self.calculations:
359
+ calculations.append("conformer screening")
357
360
 
358
361
  if "Optimisation" in self.calculations:
359
362
  calculations.append("optimised structure")
@@ -1,12 +1,12 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: digichem-core
3
- Version: 7.2.0
3
+ Version: 7.3.0
4
4
  Summary: Open-source library for Digichem core components
5
5
  Project-URL: Homepage, https://github.com/Digichem-Project/digichem-core
6
6
  Project-URL: Documentation, https://doc.digi-chem.co.uk
7
7
  Project-URL: Issues, https://github.com/Digichem-Project/digichem-core/issues
8
8
  Author-email: "Oliver S. Lee" <osl@digi-chem.ac.uk>
9
- License: Copyright 2024 Digichem
9
+ License: Copyright 2026 Digichem
10
10
 
11
11
  Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
12
12
 
@@ -1,6 +1,6 @@
1
- digichem/__init__.py,sha256=ChTuRj6CsvNmgu2v8yY94WqAp_x78nVnyxhvEpo6XFU,1934
1
+ digichem/__init__.py,sha256=KCCGEdjNGje8j9rag_4A1HgVuiokD86P25UF434ndJo,1934
2
2
  digichem/basis.py,sha256=khzIS9A_w8QH2vsXctWr16Bw5TA84_9_Fo-vuveP_5w,4875
3
- digichem/datas.py,sha256=t2PuQFvPb55WO_qVF3Xz67XNodQDimqYD26VkRPEWLA,433
3
+ digichem/datas.py,sha256=I1yaPZ5nwKRFkwibqLd3kDw3s4mMkSzAjLqXCDwdqUw,473
4
4
  digichem/log.py,sha256=tjl8Er16HUsAibBZGZMu8KyT0w53YM3u0Neh_l2jD9Q,6166
5
5
  digichem/memory.py,sha256=_HJMwiElr0534lr3h17WwvXOi-DxCN-bAD1Ij8Lwg6U,5638
6
6
  digichem/translate.py,sha256=YKm7wko13A17zQJW8qOT52rzp9wDmR0GDzOepuzr75I,21326
@@ -44,40 +44,42 @@ digichem/image/base.py,sha256=6LGIYK2zB7lQxFhV5t2ZJPcpNMOqRv-JzrBJEeTF9Wg,5193
44
44
  digichem/image/excited_states.py,sha256=1YUa8KYXTvh_Vwl2DSaRwpIh15FtW18KpjLlnD8VTOk,14103
45
45
  digichem/image/graph.py,sha256=3wcembS3gcI5Ejq1lv4Sw1g_7s-fubDpiho_85kzkks,10880
46
46
  digichem/image/orbitals.py,sha256=UcPnOm3tQOdTegsVm_a7AizVe7OpBCz_4sve6u7jAvA,9705
47
- digichem/image/render.py,sha256=fQu9QcuIolzoeMoaQPQo27Z-JEXUCQCWiSVWQgv2iG4,30470
47
+ digichem/image/render.py,sha256=vWBJS7W5yn2suwHrf2dqezyahpJb-Qvs8cLDQKv0g0k,30571
48
48
  digichem/image/spectroscopy.py,sha256=lB7O8QxeV0-B_jasr51XvBozfBjCOWwNgDW74HMxbqs,33998
49
49
  digichem/image/structure.py,sha256=gxgX5cXA3BaqBh5K7UPcVGWX-yG5kc6Robf5sWT1IYM,4900
50
50
  digichem/image/vmd.py,sha256=GGE35qypgBbpadHo1M929czuxM0U4NRICPkltKWEojE,39305
51
- digichem/input/__init__.py,sha256=DJMFLt-p2DZaTdvk2zIR6gdUepsa1n0e2_mk-BB45Sk,179
51
+ digichem/input/__init__.py,sha256=unPAMm8CnD-UU0nIfc1_yX8uFNm63SksGO7POlIdiCY,197
52
52
  digichem/input/base.py,sha256=9nxut3IlvKSYCjRUzpi7SdlaO_4MU8KWAecV-z3T1jo,3082
53
- digichem/input/digichem_input.py,sha256=kAoLmphdIHLB4RoxHTXohTcL-L5iaYSl8peIR3vJPx8,19628
53
+ digichem/input/digichem_input.py,sha256=zSM1w9ZbT_ncLzPwhBzAsjDE5Z4ZsXBBWfHz1EdFr1c,20751
54
54
  digichem/input/gaussian.py,sha256=W3JqRdsfebs_2G3grlYRv_dR1FsTBbf8YrxCNGwSd0U,6963
55
55
  digichem/misc/__init__.py,sha256=gRlflbkXYyKvqeQ2KTnNAdJ00S6Vhmk-tzSVCBSiWgk,162
56
56
  digichem/misc/argparse.py,sha256=AG33L7MCi-ftb4XnvndQb4hKcorzJxESAVB0HIZRlcw,1371
57
57
  digichem/misc/base.py,sha256=X75sM4X9aCQlzg8SrOnEr7GPFQkLBlaRXiE9tj8xj5o,2189
58
- digichem/misc/io.py,sha256=7Oqi7UQLwhCsVHdEN0n7Vubsbzk14fLSkAYXe4obPbQ,11007
58
+ digichem/misc/io.py,sha256=Qzum8Lnc0EX0Ns90SllVDMM8zCJHZIextNqeUqsNrYQ,11172
59
59
  digichem/misc/layered_dict.py,sha256=Psf30UmHVHAE95RnFYiR2f7oaW05CsUhT4FtXhUtQcw,15561
60
60
  digichem/misc/text.py,sha256=FVcxf8ctt3D2yGFYgrZk3dpO48f_0aDhHiry_xGirD8,3673
61
61
  digichem/misc/time.py,sha256=Z7FWpkb8gqBHVMXdhFiBWn2VwmXuU9uaMxEKsfAH5Kw,2214
62
62
  digichem/parse/__init__.py,sha256=SIAj7do6mXRNuOEkN691Nh-je2YlZ3qL6fxnjsTE6QY,867
63
- digichem/parse/base.py,sha256=G7GULLX9YTSRote3o2MJk7nZ_wPrIKcG7Dyri5pyGCA,9175
64
- digichem/parse/cclib.py,sha256=g4a8r1IouVTz2N3Swmx8YVG5kZ5NnhBj6PoJpSP1eLY,10149
63
+ digichem/parse/base.py,sha256=tuO9zW7JnX3L6dOHjgEkJ0FwYGmT4rDYSl1ppWqUIQI,13618
64
+ digichem/parse/cclib.py,sha256=Ppw_3Y2GEk8p93Leqtv5GXH5ImgfEDN1OiuSA2N6ooI,6048
65
+ digichem/parse/censo.py,sha256=MqQVosa71td6w-b7HVa01iw7ShmGP_iVzUXoN7hI4uE,6053
66
+ digichem/parse/crest.py,sha256=AoPom9ihVyeDBB1ICC59L6JInNYYlmhKRHtDi5voYek,3632
65
67
  digichem/parse/dump.py,sha256=PnVwas1sFZTG9dgjAn7VLxQ8gSzTNllE-tx_PXh51DI,9332
66
68
  digichem/parse/gaussian.py,sha256=VaL0eotz2_b-OsWfzQioFU9JDDd3lYElrkXpLEy78fE,5344
67
69
  digichem/parse/orca.py,sha256=QahMQh-rZphDzyM8Kn6xEbEB9imCWpKUhUmlOxpDio0,4680
68
- digichem/parse/pyscf.py,sha256=DFR9xvM4ewHMM9-CikdaYq9I3Q78Xe7XA73x0cnLiIM,1465
70
+ digichem/parse/pyscf.py,sha256=I_b4vYzjNlZaaZL_X3CVlwDI73hb5mSHeliIpVR3LN0,1466
69
71
  digichem/parse/turbomole.py,sha256=dulO8Y5txEABnK4Ybbp97KwOmnCrJMVSJSI9427SVIo,9133
70
72
  digichem/parse/util.py,sha256=QSxrRN3HFllVlsWqzTC4EKjj9w7t4kFbXuvhsMTpLIY,28930
71
73
  digichem/result/__init__.py,sha256=FUNL2pc2bP3XNVFRrlyrTppVWEsPRHcThLiVytpsH84,222
72
74
  digichem/result/angle.py,sha256=jehU0dRN4nALvUFzIdkh4GGQORq4lgXre8ZfFQMzB94,5043
73
- digichem/result/atom.py,sha256=TOlMiKYFe8vvBv9qCMBEJD1LYNzFtd7VePnUNCfCIbY,28287
75
+ digichem/result/atom.py,sha256=y88x3tCnpR1nlOLqrkRMAPsyxaDbpf0LUQMa2dxSbY0,28439
74
76
  digichem/result/base.py,sha256=Lahahog9EHEQFP--DClyhiFIoJ0bRFJN37GH-Z2Nu2c,11615
75
77
  digichem/result/dipole_moment.py,sha256=1Z0qksmk7HYyov7318zWJQVXvteCo59DCCXd2rT2-hg,11082
76
78
  digichem/result/emission.py,sha256=XsyOgMtxV6wg9iBpPK-3WSbcIh6ohlZtGybMkuZaEbY,19785
77
79
  digichem/result/energy.py,sha256=iboiQF5Twa-TU9b_Utxg5B5U7RmL3LHtRp6XzmQHG0o,10899
78
80
  digichem/result/excited_state.py,sha256=FtQuW3R6yMCJMXmqxutwIleyYWqzDBgQ6O36sjWCSq0,34592
79
81
  digichem/result/ground_state.py,sha256=B8lOalD90uNq_pMHy0JhwYYY3rlWpcFlOHPLSO5CjhQ,3804
80
- digichem/result/metadata.py,sha256=Aak2tviK2HXfbr2m97AbNW1myfWVsSBwgT1ISLRKG88,38602
82
+ digichem/result/metadata.py,sha256=JCz1NYiwhmVPQ3dQVTEQchfRo25QJ1pjO83HcaPs-Ng,38703
81
83
  digichem/result/multi.py,sha256=IzupKHMcXTf6AReeUCjY9Szy2b4TdOHVtC2Tlb9xg0o,4330
82
84
  digichem/result/nmr.py,sha256=vvhfVQSsi3fQ1VU_5POAUvwsJuN7XFidLkpmZy4UBzQ,50659
83
85
  digichem/result/orbital.py,sha256=8oMAtYSi8hYWWsjXJC2z5UAA47h0axM24ALqIo7yxFY,28292
@@ -109,8 +111,8 @@ digichem/test/test_translate.py,sha256=_3FkYottqHZGxMSTJkbcE8dQXhQlrNlVS0FjMopIw
109
111
  digichem/test/util.py,sha256=p1NfdSunB6WJM4BknyTDld7fGYWuvi2kHPu7pZcL9zE,8877
110
112
  digichem/test/mock/cubegen,sha256=h2HvmW8YbmYDqycnfCJYstZ2yO1uUxErbCWFKgzXaJ8,6781858
111
113
  digichem/test/mock/formchk,sha256=KZWhyJtaMXAGX1Xlx7Ikw9gbncOqLrpYkynz9IMYUlY,756867
112
- digichem_core-7.2.0.dist-info/METADATA,sha256=fB6uhPUZ02TY6dx1Uz-_mHDdsPiXPHpGUwMbN8N1ZcU,4094
113
- digichem_core-7.2.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
114
- digichem_core-7.2.0.dist-info/licenses/COPYING.md,sha256=d-yG6IJrlqLkuiuoOFe331YKzMRS05pnmk4e98gu9pQ,900
115
- digichem_core-7.2.0.dist-info/licenses/LICENSE,sha256=ZLlePQN2WLgdvmIGLEjjHtgIoneUGR2MgE9yjMg1JiY,1457
116
- digichem_core-7.2.0.dist-info/RECORD,,
114
+ digichem_core-7.3.0.dist-info/METADATA,sha256=3hH0iNtlNMF3mYeNjZbEUl9wcEXW-_UbCuX-1PbhJ_w,4094
115
+ digichem_core-7.3.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
116
+ digichem_core-7.3.0.dist-info/licenses/COPYING.md,sha256=d-yG6IJrlqLkuiuoOFe331YKzMRS05pnmk4e98gu9pQ,900
117
+ digichem_core-7.3.0.dist-info/licenses/LICENSE,sha256=b5-PNaQU9yZMNyXZKh5uzESv3Sb-bFhsXknE3B3UpMA,1457
118
+ digichem_core-7.3.0.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
- Copyright 2024 Digichem
1
+ Copyright 2026 Digichem
2
2
 
3
3
  Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
4
4