digichem-core 6.1.0__py3-none-any.whl → 6.10.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -8,6 +8,7 @@ from deepmerge import conservative_merger
8
8
  from pathlib import Path
9
9
  import copy
10
10
  import warnings
11
+ from scipy import integrate
11
12
 
12
13
  from digichem.misc.time import latest_datetime, total_timedelta, date_to_string,\
13
14
  timedelta_to_string
@@ -147,6 +148,7 @@ class Metadata(Result_object):
147
148
  auxiliary_files = None,
148
149
  history = None,
149
150
  date = None,
151
+ insert_date = None,
150
152
  duration = None,
151
153
  package = None,
152
154
  package_version = None,
@@ -167,6 +169,7 @@ class Metadata(Result_object):
167
169
  num_cpu = None,
168
170
  memory_available = None,
169
171
  memory_used = None,
172
+ performance = None,
170
173
 
171
174
  # Deprecated.
172
175
  solvent_model = None,
@@ -181,6 +184,7 @@ class Metadata(Result_object):
181
184
  :param history: Optional SHA of the calculation from which the coordinates of this calculation were generated.
182
185
  :param num_calculations: Optional number of individual calculations this metadata represents.
183
186
  :param date: Optional date (datetime object) of this calculation result.
187
+ :param insert_date: Optional date (datetime object) of when this calculation result was stored (normally in a DB).
184
188
  :param duration: Optional duration (timedelta object) of this calculation.
185
189
  :param package: Optional string identifying the computational chem program that performed the calculation.
186
190
  :param package_version: Optional string identifying the version of the computational chem program that performed the calculation.
@@ -206,6 +210,7 @@ class Metadata(Result_object):
206
210
  self.auxiliary_files = auxiliary_files if auxiliary_files is not None and len(auxiliary_files) != 0 else {}
207
211
  self.history = history
208
212
  self.date = date
213
+ self.insert_date = insert_date
209
214
  self.duration = duration
210
215
  self.package = package
211
216
  self.package_version = package_version
@@ -229,6 +234,7 @@ class Metadata(Result_object):
229
234
  self.num_cpu = num_cpu
230
235
  self.memory_available = memory_available
231
236
  self.memory_used = memory_used
237
+ self.performance = performance
232
238
 
233
239
  # Deprecated solvent system.
234
240
  if solvent_model is not None:
@@ -473,6 +479,7 @@ class Metadata(Result_object):
473
479
  num_cpu = parser.data.metadata.get('num_cpu', None),
474
480
  memory_available = memory_available,
475
481
  memory_used = memory_used,
482
+ performance = Performance.from_parser(parser) if "performance" in parser.data.metadata else None
476
483
  )
477
484
  except AttributeError:
478
485
  # There is no metadata available, give up.
@@ -513,6 +520,11 @@ class Metadata(Result_object):
513
520
  "units": "s",
514
521
  "string": date_to_string(self.date) if self.date is not None else None
515
522
  }
523
+ attr_dict['insert_date'] = {
524
+ "value": self.insert_date.timestamp() if self.insert_date is not None else None,
525
+ "units": "s",
526
+ "string": date_to_string(self.insert_date) if self.insert_date is not None else None
527
+ }
516
528
  attr_dict['duration'] = {
517
529
  "value": self.duration.total_seconds() if self.duration is not None else None,
518
530
  "units": "s",
@@ -542,6 +554,8 @@ class Metadata(Result_object):
542
554
  "units": None
543
555
  }
544
556
 
557
+ attr_dict['performance'] = self.performance.dump(digichem_options) if self.performance else None
558
+
545
559
  return attr_dict
546
560
 
547
561
  @classmethod
@@ -557,9 +571,14 @@ class Metadata(Result_object):
557
571
  kwargs = copy.deepcopy(data)
558
572
 
559
573
  # For more complex fields, use the data item.
560
- for attr in ['date', 'duration', 'temperature', "pressure"]:
561
- kwargs[attr] = data[attr]['value']
574
+ for attr in ['insert_date', 'date', 'duration', 'temperature', "pressure"]:
575
+ if attr in data:
576
+ kwargs[attr] = data[attr]['value']
577
+
578
+ else:
579
+ kwargs[attr] = None
562
580
 
581
+ kwargs['insert_date'] = datetime.fromtimestamp(kwargs['insert_date']) if kwargs['insert_date'] is not None else None
563
582
  kwargs['date'] = datetime.fromtimestamp(kwargs['date']) if kwargs['date'] is not None else None
564
583
  kwargs['duration'] = timedelta(seconds = kwargs['duration']) if kwargs['duration'] is not None else None
565
584
 
@@ -571,6 +590,8 @@ class Metadata(Result_object):
571
590
 
572
591
  else:
573
592
  kwargs[attr_name] = None
593
+
594
+ kwargs['performance'] = Performance.from_dump(data['performance'], result_set, options) if "performance" in data and data['performance'] is not None else None
574
595
 
575
596
  return self(**kwargs)
576
597
 
@@ -644,4 +665,252 @@ class Merged_metadata(Metadata):
644
665
  merged_metadata.auxiliary_files = conservative_merger.merge(merged_metadata.auxiliary_files, metadata.auxiliary_files)
645
666
 
646
667
  return merged_metadata
647
-
668
+
669
+ class Performance(Result_object):
670
+ """
671
+ Performance metrics and profiling data for the calculation.
672
+ """
673
+
674
+ def __init__(
675
+ self,
676
+ duration = [],
677
+ memory_used = [],
678
+ memory_used_percent = [],
679
+ memory_available = [],
680
+ memory_available_percent = [],
681
+ cpu_used = [],
682
+ output_space = [],
683
+ scratch_space = [],
684
+ memory_allocated = None,
685
+ cpu_allocated = None,
686
+ ):
687
+ self.duration = duration
688
+ self.memory_used = memory_used
689
+ self.memory_used_percent = memory_used_percent
690
+ self.memory_available = memory_available
691
+ self.memory_available_percent = memory_available_percent
692
+ self.cpu_used = cpu_used
693
+ self.output_space = output_space
694
+ self.scratch_space = scratch_space
695
+
696
+ self.memory_allocated = memory_allocated if memory_allocated is not None else self.max_mem
697
+ self.cpu_allocated = cpu_allocated if cpu_allocated is not None else math.ceil(max(cpu_used) / 100)
698
+
699
+
700
+ @classmethod
701
+ def from_parser(self, parser):
702
+ """
703
+ Construct a Performance object from an output file parser.
704
+
705
+ :param parser: Output data parser.
706
+ :return: A populated Performance object.
707
+ """
708
+ return self(
709
+ duration = parser.data.metadata['performance'][:, 0].tolist(),
710
+ memory_used = parser.data.metadata['performance'][:, 1].tolist(),
711
+ memory_allocated = Memory(parser.data.metadata['memory_available']) if "memory_available" in parser.data.metadata else None,
712
+ memory_used_percent = parser.data.metadata['performance'][:, 2].tolist(),
713
+ memory_available = parser.data.metadata['performance'][:, 3].tolist(),
714
+ memory_available_percent = parser.data.metadata['performance'][:, 4].tolist(),
715
+ cpu_used = parser.data.metadata['performance'][:, 5].tolist(),
716
+ cpu_allocated = parser.data.metadata.get('num_cpu', None),
717
+ output_space = parser.data.metadata['performance'][:, 6].tolist(),
718
+ scratch_space = parser.data.metadata['performance'][:, 7].tolist()
719
+ )
720
+
721
+ @property
722
+ def max_mem(self):
723
+ """
724
+ The maximum amount of memory used in the calculation (in bytes)
725
+ """
726
+ return max(self.memory_used)
727
+
728
+ @property
729
+ def memory_margin(self):
730
+ max_memory = float(self.memory_allocated) if self.memory_allocated is not None else self.max_mem
731
+
732
+ return max_memory - self.max_mem
733
+
734
+ @property
735
+ def memory_efficiency(self):
736
+ """
737
+ Calculate the memory efficiency of this calculation.
738
+
739
+ :param max_memory: The amount of allocated memory (in bytes), this will be guestimated automatically if not available.
740
+ """
741
+ # Integrate to find the number of byte seconds used.
742
+ area = integrate.trapezoid(self.memory_used, self.duration)
743
+
744
+ # How much we should/could have used.
745
+ total_area = (self.duration[-1] - self.duration[0]) * float(self.memory_allocated)
746
+
747
+ # Return as %.
748
+ try:
749
+ return area / total_area * 100
750
+
751
+ except Exception:
752
+ return 0
753
+
754
+ @property
755
+ def cpu_efficiency(self):
756
+ """
757
+ Calculate the CPU efficiency of this calculation.
758
+
759
+ :param max_cpu: The number of allocated CPUs, this will be guestimated automatically if not available.
760
+ """
761
+ # Integrate to find the number of CPU seconds used.
762
+ area = integrate.trapezoid(self.cpu_used, self.duration)
763
+
764
+ # How much we should/could have used.
765
+ total_area = (self.duration[-1] - self.duration[0]) * self.cpu_allocated * 100
766
+
767
+ # Return as %.
768
+ try:
769
+ return area / total_area * 100
770
+
771
+ except Exception:
772
+ # Div zero
773
+ return 0
774
+
775
+ @classmethod
776
+ def from_dump(self, data, result_set, options):
777
+ """
778
+ Get an instance of this class from its dumped representation.
779
+
780
+ :param data: The data to parse.
781
+ :param result_set: The partially constructed result set which is being populated.
782
+ """
783
+ duration = [0.0] * len(data['values'])
784
+ memory_used = [0.0] * len(data['values'])
785
+ memory_allocated = Memory(data['memory_allocated']['value'])
786
+ memory_used_percent = [0.0] * len(data['values'])
787
+ memory_available = [0.0] * len(data['values'])
788
+ memory_available_percent = [0.0] * len(data['values'])
789
+ cpu_used = [0.0] * len(data['values'])
790
+ cpu_allocated = data['cpu_allocated']
791
+ output_space = [0.0] * len(data['values'])
792
+ scratch_space = [0.0] * len(data['values'])
793
+
794
+ for i, value in enumerate(data['values']):
795
+ duration[i] = value['duration']['value']
796
+ memory_used[i] = value['memory_used']['value']
797
+ memory_used_percent[i] = value['memory_used_percent']['value']
798
+ memory_available[i] = value['memory_available']['value']
799
+ memory_available_percent[i] = value['memory_available_percent']['value']
800
+ cpu_used[i] = value['cpu_used']['value']
801
+ output_space[i] = value['output_space']['value']
802
+ scratch_space[i] = value['scratch_space']['value']
803
+
804
+ return self(
805
+ duration = duration,
806
+ memory_used = memory_used,
807
+ memory_allocated = memory_allocated,
808
+ memory_used_percent = memory_used_percent,
809
+ memory_available = memory_available,
810
+ memory_available_percent = memory_available_percent,
811
+ cpu_used = cpu_used,
812
+ cpu_allocated = cpu_allocated,
813
+ output_space = output_space,
814
+ scratch_space = scratch_space
815
+ )
816
+
817
+
818
+ def dump(self, digichem_options):
819
+ """
820
+ Get a representation of this result object in primitive format.
821
+ """
822
+ return {
823
+ "cpu_allocated": self.cpu_allocated,
824
+ "cpu_efficiency": {
825
+ "units": "%",
826
+ "value": float(self.cpu_efficiency),
827
+ },
828
+ "memory_allocated": {
829
+ "units": "bytes",
830
+ "value": float(self.memory_allocated)
831
+ },
832
+ "maximum_memory": {
833
+ "units": "bytes",
834
+ "value": self.max_mem,
835
+ },
836
+ "memory_margin": {
837
+ "units": "bytes",
838
+ "value": self.memory_margin
839
+ },
840
+ "memory_efficiency": {
841
+ "units": "%",
842
+ "value": float(self.memory_efficiency)
843
+ },
844
+ "values":[
845
+ {
846
+ 'duration': {
847
+ "units": "s",
848
+ "value": self.duration[i]
849
+ },
850
+ 'memory_used': {
851
+ "units": "bytes",
852
+ "value": self.memory_used[i]
853
+ },
854
+ 'memory_used_percent': {
855
+ "units": "%",
856
+ "value": self.memory_used_percent[i]
857
+ },
858
+ 'memory_available': {
859
+ "units": "bytes",
860
+ "value": self.memory_available[i]
861
+ },
862
+ 'memory_available_percent': {
863
+ "units": "bytes",
864
+ "value": self.memory_available_percent[i]
865
+ },
866
+ 'cpu_used': {
867
+ "units": "%",
868
+ "value": self.cpu_used[i]
869
+ },
870
+ 'output_space': {
871
+ "units": "bytes",
872
+ "value": self.output_space[i]
873
+ },
874
+ 'scratch_space': {
875
+ "units": "bytes",
876
+ "value": self.scratch_space[i]
877
+ }
878
+ } for i in range(len(self.duration))
879
+ ]
880
+ }
881
+
882
+ return {
883
+ 'duration': {
884
+ "units": "s",
885
+ "values": self.duration.tolist()
886
+ },
887
+ 'memory_used': {
888
+ "units": "bytes",
889
+ "values": self.memory_used.tolist()
890
+ },
891
+ 'memory_used_percent': {
892
+ "units": "%",
893
+ "values": self.memory_used_percent.tolist()
894
+ },
895
+ 'memory_available': {
896
+ "units": "bytes",
897
+ "values": self.memory_available.tolist()
898
+ },
899
+ 'memory_available_percent': {
900
+ "units": "bytes",
901
+ "values": self.memory_available_percent.tolist()
902
+ },
903
+ 'cpu_used': {
904
+ "units": "%",
905
+ "values": self.cpu_used.tolist()
906
+ },
907
+ 'output_space': {
908
+ "units": "bytes",
909
+ "values": self.output_space.tolist()
910
+ },
911
+ 'scratch_space': {
912
+ "units": "bytes",
913
+ "values": self.scratch_space.tolist()
914
+ }
915
+ }
916
+
digichem/result/result.py CHANGED
@@ -34,6 +34,9 @@ class Result_set(Result_object):
34
34
 
35
35
  self.results = (self,)
36
36
  self.emission = attributes.pop('emission', Emissions())
37
+
38
+ # Any ancillary data.
39
+ self._aux = attributes.pop('aux', {})
37
40
 
38
41
  for attr_name, attribute in attributes.items():
39
42
  setattr(self, attr_name, attribute)
@@ -333,6 +333,48 @@ class Absorption_emission_graph(Spectroscopy_graph):
333
333
  """
334
334
  return ((E * scipy.constants.electron_volt)**2 * f_E) / (scipy.constants.Planck * scipy.constants.c)
335
335
 
336
+ @classmethod
337
+ def inverse_jacobian(self, E, f_nm):
338
+ """
339
+ An implementation of the jacobian transform that scales intensity in wavelength units to intensity in energy units.
340
+
341
+ See J. Phys. Chem. Lett. 2014, 5, 20, 3497 for why this is necessary.
342
+
343
+ Note that the jacobian transform will maintain the area under the curve regardless of x units (nm or x).
344
+ Sadly, this has the consequence of mangling the intensity units (it becomes tiny; an oscillator strength of 1 at 3 eV becomes 1.163e-12).
345
+ """
346
+ # TODO: Might be better to rearrange this to accept nm rather than eV?
347
+ return (
348
+ (f_nm * scipy.constants.Planck * scipy.constants.c) / (E * scipy.constants.electron_volt) **2
349
+ )
350
+
351
+ @classmethod
352
+ def shift_coord(self, coord, delta_eV):
353
+ """
354
+ Shift a coordinate (in nm) by a given energy value.
355
+
356
+ :param delta_eV: The energy (in eV) to shift by. A positive value will blueshift (higher energy).
357
+ """
358
+ old_x_nm, old_y_nm = coord
359
+ # Convert x to energy.
360
+ old_x_ev = digichem.result.excited_state.Excited_state.wavelength_to_energy(old_x_nm)
361
+ # Transform y.
362
+ old_y_ev = self.inverse_jacobian(old_x_ev, old_y_nm)
363
+
364
+ # Shift by given amount.
365
+ new_x_ev = old_x_ev + delta_eV
366
+
367
+ # Convert back to nm.
368
+ new_x_nm, new_f_nm = self.energy_to_wavelength((new_x_ev, old_y_ev), True)
369
+
370
+ return (new_x_nm, new_f_nm)
371
+
372
+ def shift(self, delta_eV):
373
+ """
374
+ """
375
+ return map(lambda coord: self.shift_coord(coord, delta_eV), self.coordinates)
376
+
377
+
336
378
  def plot_gaussian(self):
337
379
  """
338
380
  Plot a gaussian distribution around our excited state energies.
@@ -0,0 +1,33 @@
1
+ """Test for memory representations."""
2
+
3
+ import pytest
4
+
5
+ from digichem.memory import Memory
6
+
7
+
8
+ @pytest.mark.parametrize("value, valid", [
9
+ ["1", True],
10
+ [1, True],
11
+ [1.0, True],
12
+ ["1 B", True],
13
+ ["1 KB", True],
14
+ ["1 MB", True],
15
+ ["1 GB", True],
16
+ ["1 TB", True],
17
+ ["foo bar", False],
18
+ ])
19
+ def test_validation(value, valid):
20
+ try:
21
+ mem = Memory(value)
22
+
23
+ str(mem)
24
+
25
+ except Exception:
26
+ if valid:
27
+ raise
28
+
29
+ else:
30
+ return
31
+
32
+ if not valid:
33
+ raise
@@ -76,4 +76,71 @@ def test_dump_and_parse(result_files, tmp_path, digichem_options):
76
76
 
77
77
 
78
78
  assert raw_dump == parsed_dump
79
-
79
+
80
+ @pytest.mark.parametrize(
81
+ "result_files, num_archives",
82
+ [
83
+ [(Path(data_directory(), "Archives/1/Benzene.log"),), 1],
84
+ [(Path(data_directory(), "Archives/1/Benzene.log"), "fchk:" + str(Path(data_directory(), "Archives/1/Benzene.fchk.zip"))), 1],
85
+ [(Path(data_directory(), "Archives/1/"),), 1],
86
+ [(Path(data_directory(), "Archives/1/"), "fchk:" + str(Path(data_directory(), "Archives/1/Benzene.fchk.zip"))), 1],
87
+ [(Path(data_directory(), "Archives/2/Benzene.log.zip"),), 1],
88
+ [(Path(data_directory(), "Archives/2/Benzene.log.zip"), "fchk:" + str(Path(data_directory(), "Archives/2/Benzene.fchk"))), 1],
89
+ [(Path(data_directory(), "Archives/2"),), 1],
90
+ [(Path(data_directory(), "Archives/2/Benzene.log.zip"), "fchk:" + str(Path(data_directory(), "Archives/2/Benzene.fchk"))), 1],
91
+ [(Path(data_directory(), "Archives/3/Benzene.log.zip"),), 2],
92
+ [(Path(data_directory(), "Archives/3/Benzene.log.zip"), "fchk:" + str(Path(data_directory(), "Archives/3/Benzene.fchk.zip"))), 2],
93
+ [(Path(data_directory(), "Archives/3"),), 2],
94
+ [(Path(data_directory(), "Archives/3/Benzene.log.zip"), "fchk:" + str(Path(data_directory(), "Archives/3/Benzene.fchk.zip"))), 2],
95
+ [(Path(data_directory(), "Archives/4"),), 0],
96
+ [(Path(data_directory(), "Archives/4.zip"),), 1],
97
+ [(Path(data_directory(), "Archives/8.zip"),), 1],
98
+ ]
99
+ )
100
+ def test_gaussian_archives(result_files, num_archives, digichem_options):
101
+ """
102
+ Can we parse from various archives?
103
+ """
104
+ try:
105
+ result, archive = parse_calculation(*result_files, options = digichem_options, keep_archive = True)
106
+ assert isinstance(result, Result_set)
107
+
108
+ # Check we have found the fchk.
109
+ assert "fchk_file" in result.metadata.auxiliary_files
110
+ assert len(result.metadata.log_files) == 1
111
+
112
+ # Make sure we didn't unpack any other archives by accident.
113
+ assert len(archive.archive_dirs) == num_archives
114
+
115
+ finally:
116
+ archive.cleanup()
117
+
118
+
119
+ @pytest.mark.parametrize(
120
+ "result_files, num_archives",
121
+ [
122
+ [(Path(data_directory(), "Archives/5"),), 27],
123
+ [(Path(data_directory(), "Archives/5/Naphthalene.log"),), 27],
124
+ [(Path(data_directory(), "Archives/6"),), 28],
125
+ [(Path(data_directory(), "Archives/6/Naphthalene.log.zip"),), 28],
126
+ [(Path(data_directory(), "Archives/7"),), 1],
127
+ [(Path(data_directory(), "Archives/7/Naphthalene.log.zip"),), 1],
128
+ ]
129
+ )
130
+ def test_turbomole_archives(result_files, num_archives, digichem_options):
131
+ """
132
+ Can we parse from various archives?
133
+ """
134
+ try:
135
+ result, archive = parse_calculation(*result_files, options = digichem_options, keep_archive = True)
136
+ assert isinstance(result, Result_set)
137
+
138
+ # Check we have found the fchk.
139
+ assert "ground_state_cao_file" in result.metadata.auxiliary_files
140
+ assert len(result.metadata.log_files) == 13
141
+
142
+ # Make sure we didn't unpack any other archives by accident.
143
+ assert len(archive.archive_dirs) == num_archives
144
+
145
+ finally:
146
+ archive.cleanup()
@@ -208,7 +208,7 @@ def test_atoms(result_set):
208
208
  assert result_set.raw_atoms.element_dict['H'] == 8
209
209
 
210
210
  # Check mass.
211
- assert result_set.raw_atoms.molar_mass == pytest.approx(128.1705)
211
+ assert result_set.raw_atoms.molar_mass == pytest.approx(128.174)
212
212
 
213
213
  # We don't check positions here because these can vary from calc to calc due to reorientation...
214
214
 
digichem/test/util.py CHANGED
@@ -29,7 +29,8 @@ result_files = {
29
29
  "gaussian": [Path(data_directory(), datum) for datum in [
30
30
  'Naphthalene/Gaussian 16 Optimisation Frequencies PBE1PBE (GD3BJ) Toluene 6-31G(d,p)',
31
31
  'Naphthalene/Gaussian 16 Excited States TDA 10 Singlets 10 Triplets PBE1PBE (GD3BJ) Toluene 6-31G(d,p).tar.gz',
32
- 'Naphthalene/Gaussian 16 Excited States TDA Optimised S(1) PBE1PBE (GD3BJ) Toluene 6-31G(d,p).tar.gz',
32
+ 'Naphthalene/Gaussian 16 Excited States TDA Optimised S(1) PBE1PBE (GD3BJ) Toluene 6-31G(d,p).tar.gz',
33
+ 'Pyridine/Gaussian 16 Excited States TDA Optimised S(1) PBE1PBE (GD3BJ) Toluene 6-31G(d,p).tar.gz'
33
34
  ]],
34
35
  "turbomole": [Path(data_directory(), datum) for datum in [
35
36
  'Naphthalene/Turbomole Optimisation ADC(2) cc-pVDZ.tar.gz',
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.3
1
+ Metadata-Version: 2.4
2
2
  Name: digichem-core
3
- Version: 6.1.0
3
+ Version: 6.10.1
4
4
  Summary: Open-source library for Digichem core components
5
5
  Project-URL: Homepage, https://github.com/Digichem-Project/digichem-core
6
6
  Project-URL: Documentation, https://doc.digi-chem.co.uk
@@ -38,6 +38,7 @@ Requires-Dist: pyyaml
38
38
  Requires-Dist: rdkit
39
39
  Requires-Dist: scipy
40
40
  Provides-Extra: test
41
+ Requires-Dist: pyscf; extra == 'test'
41
42
  Requires-Dist: pytest; extra == 'test'
42
43
  Requires-Dist: pytest-lazy-fixture; extra == 'test'
43
44
  Description-Content-Type: text/markdown
@@ -97,4 +98,4 @@ Documentation coming soon.
97
98
 
98
99
  Digichem-core is licensed under the BSD-3-Clause license, but some files are licensed separately. See [COPYING.md](COPYING.md) for full details.
99
100
 
100
- The Digichem logo and branding is Copyright Digichem 2024, you may not use them in any way (although you are welcome to look at them).
101
+ The Digichem logo and branding is Copyright Digichem 2025, you may not use them in any way (although you are welcome to look at them).