digichem-core 6.0.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (111) hide show
  1. digichem/__init__.py +75 -0
  2. digichem/basis.py +116 -0
  3. digichem/config/README +3 -0
  4. digichem/config/__init__.py +5 -0
  5. digichem/config/base.py +321 -0
  6. digichem/config/locations.py +14 -0
  7. digichem/config/parse.py +90 -0
  8. digichem/config/util.py +117 -0
  9. digichem/data/README +4 -0
  10. digichem/data/batoms/COPYING +18 -0
  11. digichem/data/batoms/LICENSE +674 -0
  12. digichem/data/batoms/README +2 -0
  13. digichem/data/batoms/__init__.py +0 -0
  14. digichem/data/batoms/batoms-renderer.py +351 -0
  15. digichem/data/config/digichem.yaml +714 -0
  16. digichem/data/functionals.csv +15 -0
  17. digichem/data/solvents.csv +185 -0
  18. digichem/data/tachyon/COPYING.md +5 -0
  19. digichem/data/tachyon/LICENSE +30 -0
  20. digichem/data/tachyon/tachyon_LINUXAMD64 +0 -0
  21. digichem/data/vmd/common.tcl +468 -0
  22. digichem/data/vmd/generate_combined_orbital_images.tcl +70 -0
  23. digichem/data/vmd/generate_density_images.tcl +45 -0
  24. digichem/data/vmd/generate_dipole_images.tcl +68 -0
  25. digichem/data/vmd/generate_orbital_images.tcl +57 -0
  26. digichem/data/vmd/generate_spin_images.tcl +66 -0
  27. digichem/data/vmd/generate_structure_images.tcl +40 -0
  28. digichem/datas.py +14 -0
  29. digichem/exception/__init__.py +7 -0
  30. digichem/exception/base.py +133 -0
  31. digichem/exception/uncatchable.py +63 -0
  32. digichem/file/__init__.py +1 -0
  33. digichem/file/base.py +364 -0
  34. digichem/file/cube.py +284 -0
  35. digichem/file/fchk.py +94 -0
  36. digichem/file/prattle.py +277 -0
  37. digichem/file/types.py +97 -0
  38. digichem/image/__init__.py +6 -0
  39. digichem/image/base.py +113 -0
  40. digichem/image/excited_states.py +335 -0
  41. digichem/image/graph.py +293 -0
  42. digichem/image/orbitals.py +239 -0
  43. digichem/image/render.py +617 -0
  44. digichem/image/spectroscopy.py +797 -0
  45. digichem/image/structure.py +115 -0
  46. digichem/image/vmd.py +826 -0
  47. digichem/input/__init__.py +3 -0
  48. digichem/input/base.py +78 -0
  49. digichem/input/digichem_input.py +500 -0
  50. digichem/input/gaussian.py +140 -0
  51. digichem/log.py +179 -0
  52. digichem/memory.py +166 -0
  53. digichem/misc/__init__.py +4 -0
  54. digichem/misc/argparse.py +44 -0
  55. digichem/misc/base.py +61 -0
  56. digichem/misc/io.py +239 -0
  57. digichem/misc/layered_dict.py +285 -0
  58. digichem/misc/text.py +139 -0
  59. digichem/misc/time.py +73 -0
  60. digichem/parse/__init__.py +13 -0
  61. digichem/parse/base.py +220 -0
  62. digichem/parse/cclib.py +138 -0
  63. digichem/parse/dump.py +253 -0
  64. digichem/parse/gaussian.py +130 -0
  65. digichem/parse/orca.py +96 -0
  66. digichem/parse/turbomole.py +201 -0
  67. digichem/parse/util.py +523 -0
  68. digichem/result/__init__.py +6 -0
  69. digichem/result/alignment/AA.py +114 -0
  70. digichem/result/alignment/AAA.py +61 -0
  71. digichem/result/alignment/FAP.py +148 -0
  72. digichem/result/alignment/__init__.py +3 -0
  73. digichem/result/alignment/base.py +310 -0
  74. digichem/result/angle.py +153 -0
  75. digichem/result/atom.py +742 -0
  76. digichem/result/base.py +258 -0
  77. digichem/result/dipole_moment.py +332 -0
  78. digichem/result/emission.py +402 -0
  79. digichem/result/energy.py +323 -0
  80. digichem/result/excited_state.py +821 -0
  81. digichem/result/ground_state.py +94 -0
  82. digichem/result/metadata.py +644 -0
  83. digichem/result/multi.py +98 -0
  84. digichem/result/nmr.py +1086 -0
  85. digichem/result/orbital.py +647 -0
  86. digichem/result/result.py +244 -0
  87. digichem/result/soc.py +272 -0
  88. digichem/result/spectroscopy.py +514 -0
  89. digichem/result/tdm.py +267 -0
  90. digichem/result/vibration.py +167 -0
  91. digichem/test/__init__.py +6 -0
  92. digichem/test/conftest.py +4 -0
  93. digichem/test/test_basis.py +71 -0
  94. digichem/test/test_calculate.py +30 -0
  95. digichem/test/test_config.py +78 -0
  96. digichem/test/test_cube.py +369 -0
  97. digichem/test/test_exception.py +16 -0
  98. digichem/test/test_file.py +104 -0
  99. digichem/test/test_image.py +337 -0
  100. digichem/test/test_input.py +64 -0
  101. digichem/test/test_parsing.py +79 -0
  102. digichem/test/test_prattle.py +36 -0
  103. digichem/test/test_result.py +489 -0
  104. digichem/test/test_translate.py +112 -0
  105. digichem/test/util.py +207 -0
  106. digichem/translate.py +591 -0
  107. digichem_core-6.0.0rc1.dist-info/METADATA +96 -0
  108. digichem_core-6.0.0rc1.dist-info/RECORD +111 -0
  109. digichem_core-6.0.0rc1.dist-info/WHEEL +4 -0
  110. digichem_core-6.0.0rc1.dist-info/licenses/COPYING.md +10 -0
  111. digichem_core-6.0.0rc1.dist-info/licenses/LICENSE +11 -0
digichem/parse/util.py ADDED
@@ -0,0 +1,523 @@
1
+ """Utilities and handy functions for reading and parsing result files."""
2
+
3
+ # General imports.
4
+ from functools import partial
5
+ from itertools import filterfalse, zip_longest
6
+ from pathlib import Path
7
+ import itertools
8
+ import shutil
9
+ from tempfile import mkdtemp
10
+ import collections
11
+ import warnings
12
+ # IMPORTANT: Do not replace multiprocessing pools with pathos, the latter is too buggy for production ATM (26-05-2023).
13
+ import multiprocessing
14
+
15
+ from configurables.misc import is_iter
16
+
17
+ # Digichem imports.
18
+ from digichem.exception.base import Digichem_exception
19
+ from digichem.parse.cclib import Cclib_parser
20
+ from digichem.parse.gaussian import Gaussian_parser
21
+ from digichem.parse.turbomole import Turbomole_parser
22
+ from digichem.parse.orca import Orca_parser
23
+ from digichem.result.multi import Merged
24
+ from digichem.result.result import Result_set
25
+ import digichem.log
26
+ from digichem.parse.dump import Yaml_multi_parser, Json_multi_parser
27
+
28
+ # Hidden imports.
29
+ #import cclib.io
30
+ #import cclib.parser
31
+
32
+ custom_parsing_formats = {
33
+ "sir": Yaml_multi_parser,
34
+ "yaml": Yaml_multi_parser,
35
+ "sij": Json_multi_parser,
36
+ "json": Json_multi_parser,
37
+ }
38
+
39
+ def find_log_files_from_hint(hint):
40
+ """
41
+ Find output (log) files from a given hint.
42
+
43
+ :param hint: A path to a file to use as a hint to find additional log files. hint can optionally be a directory, in which case files inside this directory will be found.
44
+ :returns: A list of found log files.
45
+ """
46
+ hint = Path(hint)
47
+
48
+ # First, find our parent dir.
49
+ # hint may actually be a dir.
50
+ if hint.is_dir():
51
+ # Look for all .log files.
52
+ # File extensions that we recognise.
53
+ log_types = itertools.chain(["*." + custom_format for custom_format in custom_parsing_formats], ["*.log", ]) # "*.out" disabled for now...
54
+ parent = hint
55
+ log_files = [found_log_file for found_log_file in itertools.chain(*[parent.glob(log_type) for log_type in log_types])]
56
+
57
+ # Remove any 'digichem.log' files as we know these are not calc log files.
58
+ # We don't actually write 'digichem.log' files anymore either (we use digichem.out instead),
59
+ # but older versions did...
60
+ log_files = [log_file for log_file in log_files if log_file.name not in ["digichem.log", "digichem.out"]]
61
+ else:
62
+ parent = hint.parent
63
+ log_files = [hint]
64
+
65
+ # If we have a computational style log file, look for others.
66
+ if hint.suffix not in ["." + custom_format for custom_format in custom_parsing_formats]:
67
+ # Try and find job files.
68
+ # These files have names like 'job.0', 'job.1' etc, ending in 'job.last'.
69
+ for number in itertools.count():
70
+ # Get the theoretical file name.
71
+ job_file_path = Path(parent, "job.{}".format(number))
72
+
73
+ # See if it exists (and isn't the log_file given to us).
74
+ if job_file_path.exists():
75
+ # Add to list.
76
+ log_files.append(job_file_path)
77
+ else:
78
+ # We've found all the numbered files.
79
+ break
80
+
81
+ # Look for other files.
82
+ for maybe_file_name in ("basis", "control", "mos", "alpha", "beta", "coord", "gradient", "aoforce", "job.last", "numforce/aoforce.out"):
83
+ maybe_file_path = Path(parent, maybe_file_name)
84
+
85
+ if maybe_file_path.exists():
86
+ # Found it.
87
+ log_files.append(maybe_file_path)
88
+
89
+ # Make sure we only have unique log files.
90
+ # We also now reverse our ordering, so that files given earlier by the user have priority.
91
+ if len(log_files) > 0:
92
+ return log_files
93
+
94
+ # If we have no log files, and there's a directory called Output or Result that we can use, try again using that as the hint.
95
+ elif hint.is_dir():
96
+ # Try the Output dir first.
97
+ if Path(hint, "Output").is_dir():
98
+ log_files = find_log_files_from_hint(Path(hint, "Output"))
99
+
100
+ # If we still have nothing, try the Results dir.
101
+ if len(log_files) == 0 and Path(hint, "Results").is_dir():
102
+ log_files = find_log_files_from_hint(Path(hint, "Results"))
103
+
104
+ return log_files
105
+
106
+
107
+ def find_log_files(*hints):
108
+ """
109
+ Find log files from a number of given hints.
110
+
111
+ Each hint should point to an existing log file to parse, or a directory containing such log files.
112
+ Each log file given (and found) should refer to the same calculation.
113
+ """
114
+ # Get a list of found log files.
115
+ log_files = [found_log for hint in hints for found_log in find_log_files_from_hint(hint)]
116
+
117
+ # Make sure we only have unique log files.
118
+ # We also now reverse our ordering, so that files given earlier by the user have priority.
119
+ log_files = list(reversed(list(dict.fromkeys([log_file.resolve() for log_file in log_files]))))
120
+
121
+ return log_files
122
+
123
+
124
+ def class_from_log_files(*log_files, format_hint = "auto"):
125
+ """
126
+ Get a parser class based on some calculation log files.
127
+
128
+ :param format_hint: A hint as to the format of the given log files. Either 'auto' (to guess), 'cclib' (calc log file), 'sir' (digichem result file) or 'sid' (digichem database file).
129
+ """
130
+ import cclib.io
131
+ import cclib.parser
132
+
133
+ if format_hint in custom_parsing_formats:
134
+ return custom_parsing_formats[format_hint]
135
+
136
+ elif format_hint == "auto" and len(log_files) > 0 and log_files[0].suffix[1:] in custom_parsing_formats:
137
+ return custom_parsing_formats[log_files[0].suffix[1:]]
138
+
139
+ if format_hint not in ["cclib", "auto"]:
140
+ raise ValueError("Unrecognised format hint '{}'".format(format_hint))
141
+
142
+ # We'll use cclib to guess the file type for us.
143
+ try:
144
+ log_file_type = type(cclib.io.ccopen([str(found_log_file) for found_log_file in log_files]))
145
+
146
+ except Exception as e:
147
+ # cclib couldn't figure out the file type, it probably wasn't a .log file.
148
+ raise Digichem_exception("Could not determine file type of file(s): '{}'; are you sure these are computational log files?".format(", ".join((str(log_file) for log_file in log_files)))) from e
149
+
150
+ # Either get a more complex parser if we have one, or just return the base parser.
151
+ if log_file_type == cclib.parser.gaussianparser.Gaussian:
152
+ return Gaussian_parser
153
+ elif log_file_type == cclib.parser.turbomoleparser.Turbomole:
154
+ return Turbomole_parser
155
+ elif log_file_type == cclib.parser.orcaparser.ORCA:
156
+ return Orca_parser
157
+ else:
158
+ return Cclib_parser
159
+
160
+ def from_log_files(*log_files, format_hint = "auto", **auxiliary_files):
161
+ """
162
+ Get an output file parser of appropriate type.
163
+
164
+ This is a convenience function.
165
+
166
+ :param format_hint: A hint as to the format of the given log files. Either 'auto' (to guess), 'log' (calc log file), 'sir' (digichem result file) or 'sid' (digichem database file).
167
+ """
168
+ found_log_files = find_log_files(*log_files)
169
+
170
+ #return class_from_log_files(*found_log_files, format_hint = format_hint).from_logs(*found_log_files, **auxiliary_files)
171
+
172
+ try:
173
+ return class_from_log_files(*found_log_files, format_hint = format_hint).from_logs(*found_log_files, **auxiliary_files)
174
+
175
+ except Exception:
176
+ if len(found_log_files) == 0:
177
+ raise ValueError("There are no log files at '{}'".format(log_files[0] if len(log_files) == 1 else log_files)) from None
178
+
179
+ else:
180
+ raise
181
+
182
+ def parse_calculation(*log_files, options, parse_all = False, format_hint = "auto", keep_archive = False, **auxiliary_files):
183
+ """
184
+ Parse a single calculation result.
185
+
186
+ :param log_files: A number of calculation result files corresponding to the same calculation.
187
+ :param options: A Digichem options nested dictionary containing options to control parsing.
188
+ :param parse_all: Whether to parse all results in the given log file. If True, a list of result sets will be returned, if False, only the first result will be returned if there are multiple.
189
+ :param format_hint: A hint as to the format of the given log files. Either 'auto' (to guess), 'log' (calc log file), 'sir' (digichem result file) or 'sid' (digichem database file).
190
+ :param auxiliary_files: Optional auxiliary calculation files corresponding to the calculation.
191
+ :return: A single Result_set object.
192
+ """
193
+ # Handle aux files.
194
+ # Auxiliary files are files associated with a calculation but that do not contain calculation output directly (they are not log files).
195
+ # Often, these files are written in a program-dependent binary format, and may be used for eg, program restarting, post-calculations, or image generation.
196
+ # For example, common aux files for Gaussian include: .chk, .fchk and .rwf.
197
+ # Auxiliary files to associate with a log file(s) can be given by the auxiliary_files key-word argument, but this is cumbersome in some instances.
198
+ # Alternatively, aux files can be given as normal log_files by prefixing the file name with 'aux':, where aux is the type of file.
199
+ # For example, a chk file could be given as "chk:calc/output.chk".
200
+ real_log_files = []
201
+ for maybe_log_file in log_files:
202
+ maybe_log_file = str(maybe_log_file)
203
+ found = False
204
+ # Loop through all known auxiliary_files:
205
+ for file_type, aux_file_name in itertools.chain(Gaussian_parser.INPUT_FILE_TYPES.items(), Turbomole_parser.INPUT_FILE_TYPES.items(), Orca_parser.INPUT_FILE_TYPES.items()):
206
+ code_len = len(file_type.short_code) +1
207
+ if maybe_log_file[:code_len] == file_type.short_code + ":":
208
+ auxiliary_files[aux_file_name] = maybe_log_file[code_len:]
209
+
210
+ # Done.
211
+ found = True
212
+ break
213
+
214
+ if not found:
215
+ # Take care of 'log:' files (which we support in-case a normal log file happened to start with 'chk:' or similar).
216
+ if maybe_log_file[:4] == "log:":
217
+ real_log_files.append(maybe_log_file[4:])
218
+
219
+ else:
220
+ # Take as is.
221
+ real_log_files.append(maybe_log_file)
222
+
223
+ log_files = real_log_files
224
+
225
+ # Open files for reading (handles archives for us).
226
+ archive = open_for_parsing(*log_files)
227
+
228
+ try:
229
+ open_log_files = archive.open()
230
+
231
+ if parse_all:
232
+ results = from_log_files(*open_log_files, format_hint = format_hint, **auxiliary_files).process_all(options)
233
+
234
+ else:
235
+ results = from_log_files(*open_log_files, format_hint = format_hint, **auxiliary_files).process(options)
236
+
237
+ finally:
238
+ if not keep_archive:
239
+ archive.cleanup()
240
+
241
+ if keep_archive:
242
+ # We've been asked not to close the archive, return it.
243
+ return (results, archive)
244
+
245
+ else:
246
+ # The caller isn't interested in the archive.
247
+ return results
248
+
249
+
250
+ # with open_for_parsing(*log_files) as open_log_files:
251
+ #
252
+ # if parse_all:
253
+ # return from_log_files(*open_log_files, format_hint = format_hint, **auxiliary_files).process_all(options)
254
+ #
255
+ # else:
256
+ # return from_log_files(*open_log_files, format_hint = format_hint, **auxiliary_files).process(options)
257
+
258
+ def multi_parser(log_files, auxiliary_files, *, options, format_hint = "auto", keep_archive = False):
259
+ """
260
+ The inner function which will be called in parallel to parse files.
261
+ """
262
+ # If the given 'log_file' is actually already a result object, then there's nothing for us to do.
263
+ # This is allowed to support merging calculation results between previously parsed results and new log files.
264
+ if isinstance(log_files, Result_set):
265
+ # Nothing we need to do.
266
+ return [log_files]
267
+
268
+ # Next, decide if this is a single log file path, or is actually a list of multiple paths.
269
+ # Regardless of whether this is a single file or multiple, all given files should correspond to the same calculation.
270
+ if not isinstance(log_files, str) and is_iter(log_files):
271
+ # Multiple paths.
272
+ logs = log_files
273
+
274
+ else:
275
+ # Single path.
276
+ logs = (log_files,)
277
+
278
+ try:
279
+ return parse_calculation(*logs, options = options, parse_all = True, format_hint = format_hint, keep_archive = keep_archive, **auxiliary_files)
280
+
281
+ except Exception:
282
+ digichem.log.get_logger().warning("Unable to parse calculation result file '{}'; skipping".format(logs[0]), exc_info = True)
283
+ return None
284
+
285
+ def parse_multiple_calculations(*log_files, auxiliary_files = None, options, pool = None, init_func = None, init_args = None, format_hint = "auto", processes = 1, keep_archive = False):
286
+ """
287
+ Parse a number of separate calculation results in parallel.
288
+
289
+ If the argument 'pool' is not given, a multiprocessing.pool object will first be created using the arguments init_func and num_cpu.
290
+
291
+ :param log_files: A number of calculation result files corresponding to different calculations. Each item can optionally be a list itself, to specify files from the same calculation but which are spread across multiple files.
292
+ :param auxiliary_files: A list of dicts of aux files. The ordering of the dicts should correspond to that of log_files.
293
+ :param options: A Digichem options nested dictionary containing options to control parsing.
294
+ :param pool: An optional subprocessing.pool object to use for parallel parsing.
295
+ :param init_func: An optional function to call to init each newly created process.
296
+ :param format_hint: A hint as to the format of the given log files. Either 'auto' (to guess), 'log' (calc log file), 'sir' (digichem result file) or 'sid' (digichem database file).
297
+ :param processes: The max number of processes to create the new pool object with; if the number of given log_files is less than processes, then len(log_files) will be used instead.
298
+ """
299
+ if len(log_files) == 0:
300
+ # Give up now.
301
+ return []
302
+
303
+ if auxiliary_files is None:
304
+ auxiliary_files = [{}] * len(log_files)
305
+
306
+ if len(log_files) < processes:
307
+ processes = len(log_files)
308
+
309
+ # Sort out our pool if we need to.
310
+ own_pool = False
311
+ if pool is None:
312
+ own_pool = True
313
+ pool = multiprocessing.Pool(processes, initializer = init_func, initargs = init_args if init_args is not None else [])
314
+
315
+ # Do some parsing.
316
+ try:
317
+ result_lists = list(
318
+ filterfalse(lambda x: x is None,
319
+ pool.starmap(partial(multi_parser, options = options, format_hint = format_hint, keep_archive = keep_archive), zip_longest(log_files, auxiliary_files, fillvalue = {}))
320
+ #pool.map(partial(multi_parser, options = options, format_hint = format_hint, keep_archive = keep_archive), *transpose(list(zip_longest(log_files, auxiliary_files, fillvalue = {})), 2))
321
+ )
322
+ )
323
+
324
+ if keep_archive:
325
+ return [(result, archive) for results, archive in result_lists for result in results]
326
+
327
+ else:
328
+ return [result for result_list in result_lists for result in result_list]
329
+
330
+ finally:
331
+ # Do some cleanup if we need to.
332
+ if own_pool:
333
+ pool.__exit__(None, None, None)
334
+
335
+ def parse_and_merge_calculations(*log_files, auxiliary_files = None, options, format_hint = "auto", inner_pool = None, keep_archive = False):
336
+ """
337
+ Get a single result object by parsing a number of computational log files.
338
+
339
+ Multiple log files can be given both from the same calculation, or from multiple different calculations.
340
+ If multiple different calculations are given, the individually parsed results will be merged together (which may give bizarre results if the calculations are unrelated, eg if they are of different molecules).
341
+
342
+ Example:
343
+ parse_and_merge_calculations(['calc1/primary.log', 'calc1/secondary.log'], 'calc2/calc.log', 'calc3/calc.log')
344
+ Would parse three separate calculations (calc1, calc2 and calc3), of which the first is contained in two output files (primary.log and secondary.log), merging the result sets together.
345
+
346
+ :param log_files: A list of paths to computational chemistry log files to parse. If more than one file is given, each is assumed to correspond to a separate calculation in which case the parsed results will be merged together. In addition, each given 'log file' can be an iterable of log file paths, which will be considered to correspond to an individual calculation.
347
+ :param options: A Digichem options nested dictionary containing options to control parsing.: An alignment class to use to reorientate each molecule.
348
+ :param format_hint: A hint as to the format of the given log files. Either 'auto' (to guess), 'log' (calc log file), 'sir' (digichem result file) or 'sid' (digichem database file).
349
+ :param auxiliary_files: A list of dictionaries of auxiliary files. The ordering of auxiliary_files should match that of log_files.
350
+ :return: A single Result_set object (or child thereof).
351
+ """
352
+ parsed_results = parse_multiple_calculations(*log_files, options = options, format_hint = format_hint, pool = inner_pool, auxiliary_files = auxiliary_files, keep_archive = keep_archive)
353
+
354
+ # If we asked for archives as well, unpack.
355
+ if keep_archive:
356
+ parsed_results, archives = list(map(list, zip(*parsed_results)))
357
+
358
+ # If we have more than one result, merge them together.
359
+ if len(parsed_results) > 1:
360
+ parsed_results = Merged.from_results(*parsed_results, options = options)
361
+
362
+ elif len(parsed_results) == 0:
363
+ parsed_results = None
364
+
365
+ else:
366
+ parsed_results = parsed_results[0]
367
+
368
+ if keep_archive:
369
+ return (parsed_results, archives)
370
+
371
+ else:
372
+ return parsed_results
373
+
374
+ def multi_merger_parser(log_files, auxiliary_files, *, options, format_hint = "auto" , inner_pool = None, keep_archive = False):
375
+ """
376
+ The inner function which will be called in parallel to parse files.
377
+ """
378
+ try:
379
+ return parse_and_merge_calculations(*log_files, options = options, format_hint = format_hint, inner_pool = inner_pool, auxiliary_files = auxiliary_files, keep_archive = keep_archive)
380
+
381
+ except Exception:
382
+ digichem.log.get_logger().warning("Unable to parse and merge calculation results '{}'; skipping".format(", ".join([str(log_file) for log_file in log_files])), exc_info = True)
383
+ return None
384
+
385
+ def parse_and_merge_multiple_calculations(*multiple_results, options, format_hint = "auto", init_func = None, init_args = None, processes = None, auxiliary_files = None, keep_archive = False):
386
+ """
387
+ Parse a number of separate calculation results in parallel, merging some or all of the results into combined result sets.
388
+
389
+ :param multiple_results: A list of two dimensions, where the first dimension is a list of separate results to process, and the second dimension is a list of results that should be merged together.
390
+ :param options: A Digichem options nested dictionary containing options to control parsing.
391
+ :param format_hint: A hint as to the format of the given log files. Either 'auto' (to guess), 'log' (calc log file), 'sir' (digichem result file) or 'sid' (digichem database file).
392
+ :param pool: An optional subprocessing.pool object to use for parallel parsing.
393
+ :param init_func: An optional function to call to init each newly created process.
394
+ :param processes: The max number of processes to create the new pool object with.
395
+ :param auxiliary_files: An optional list of lists of dicts of auxiliary files. Each item in auxiliary_files should match the corresponding log file in multiple_results.
396
+ :return: A single Result_set object (or child thereof).
397
+ """
398
+ if auxiliary_files is None:
399
+ auxiliary_files = [None] * len(multiple_results)
400
+
401
+ # Do some parsing.
402
+ # TODO: This parallelization isn't ideal, currently we process each group of to-be merged calcs separately, meaning processes can be wasted.
403
+ try:
404
+ pool = multiprocessing.Pool(processes, initializer = init_func, initargs = init_args if init_args is not None else [])
405
+
406
+ result_lists = list(
407
+ filterfalse(lambda x: x is None,
408
+ map(partial(multi_merger_parser, options = options, format_hint = format_hint, inner_pool = pool, keep_archive = keep_archive), multiple_results, auxiliary_files)
409
+ )
410
+ )
411
+
412
+ return result_lists
413
+
414
+ finally:
415
+ pool.__exit__(None, None, None)
416
+
417
+
418
+ class open_for_parsing():
419
+ """
420
+ A context manager for opening log files for parsing. Returns a list of pathlib.Path objects suitable for parsing.
421
+
422
+ Currently, the main purpose of this context manager is to intelligently handle unpacking of archives (.zip, .tar etc) for parsing.
423
+ """
424
+
425
+ def __init__(self, *log_files):
426
+ log_files = [Path(log_file).resolve() for log_file in log_files]
427
+
428
+ # Check we haven't been given any duplicate log files.
429
+ # This is just for convenience, if duplicates have been given the user has probably made a mistake.
430
+ duplicates = [path for path, number in collections.Counter(log_files).items() if number > 1]
431
+
432
+ for duplicate in duplicates:
433
+ warnings.warn("Ignoring duplicate log file: ".format(duplicate))
434
+
435
+ # Remove duplicates but retain order.
436
+ self.log_files = list(dict.fromkeys(log_files).keys())
437
+
438
+ # A list of tempfile.TemporaryDirectory objects that should be closed when we are finished.
439
+ self.temp_dirs = []
440
+
441
+ @classmethod
442
+ def archive_formats(self):
443
+ """
444
+ Get a list of supported archive formats.
445
+ """
446
+ return list(itertools.chain(*[extensions for name, extensions, desc in shutil.get_unpack_formats()]))
447
+
448
+ def __enter__(self):
449
+ """
450
+ 'Open' files for reading.
451
+ """
452
+ return self.open()
453
+
454
+ def open(self):
455
+ """
456
+ 'Open' files for reading.
457
+ """
458
+ new_log_files = []
459
+
460
+ formats = self.archive_formats()
461
+
462
+ for log_file in self.log_files:
463
+
464
+ found_child_archive = None
465
+
466
+ # If 'log_file' is a directory, check for an archive inside called 'Output.xxx'.
467
+ for archive_format in formats:
468
+ child_archive = Path(log_file, "Output" + archive_format)
469
+ if child_archive.exists():
470
+ if not found_child_archive:
471
+ # Found an Output dir archive, use this instead.
472
+ new_log_files.extend(self.extract(child_archive))
473
+ found_child_archive = child_archive
474
+
475
+ else:
476
+ # For now, only care about the first.
477
+ warnings.warn("Ignoring subsequent Output archive '{}'; already found '{}'".format(child_archive, found_child_archive))
478
+
479
+ # No need to check 'found_child_archive' here; a file cannot simultaneously be a directory containing an archive and also an archive itself.
480
+ if "".join(log_file.suffixes) in formats:
481
+ # This is an archive format.
482
+ # Add any files/directories that were unpacked.
483
+ new_log_files.extend(self.extract(log_file))
484
+
485
+ elif not found_child_archive:
486
+ # Non-archive file, add normally.
487
+ new_log_files.append(log_file)
488
+
489
+ return new_log_files
490
+
491
+ def extract(self, file_name):
492
+ """
493
+ Extract an archive and return the contained log files.
494
+ """
495
+ # Get a temp dir to extact to.
496
+ # We can't use TemporaryDirectory here, because these are auto deleted on program exit. This is not compatible with multi-processing.
497
+ #tempdir = tempfile.TemporaryDirectory()
498
+ tempdir = mkdtemp()
499
+ self.temp_dirs.append(tempdir)
500
+
501
+ # Extract to it.
502
+ digichem.log.get_logger().info("Extracting archive '{}'...".format(file_name))
503
+ shutil.unpack_archive(file_name, tempdir)
504
+
505
+ # Add any files/directories that were unpacked.
506
+ return Path(tempdir).glob("*")
507
+
508
+ def cleanup(self):
509
+ """
510
+ Perform cleanup of any open files.
511
+
512
+ Alias for __exit__().
513
+ """
514
+ return self.__exit__()
515
+
516
+ def __exit__(self, exc_type = None, exc_value = None, traceback = None):
517
+ """
518
+ 'Close' any open files.
519
+ """
520
+ for tempdir in self.temp_dirs:
521
+ shutil.rmtree(tempdir, ignore_errors = True)
522
+
523
+
@@ -0,0 +1,6 @@
1
+ # Import top-level object for easier access.
2
+ from .base import Result_object
3
+ from .base import Result_container
4
+ from .base import Unmergeable_container_mixin
5
+ from .base import Floatable_mixin
6
+ from .result import Result_set
@@ -0,0 +1,114 @@
1
+ from digichem.result.alignment.FAP import Furthest_atom_pair
2
+ from statistics import mean
3
+ import math
4
+ import statistics as stats
5
+ from digichem.result.alignment import Axis_swapper_mix
6
+
7
+ class Average_angle(Furthest_atom_pair, Axis_swapper_mix):
8
+ """
9
+ An enhancement to the Kebab skewer method for estimating molecule linearity, featuring a different method to determine a molecule's long axis.
10
+ """
11
+
12
+ # Names that uniquely describe this alignment protocol.
13
+ CLASS_HANDLE = ["Average Angle", "AA", "Kebab+"]
14
+
15
+ def align_axes(self):
16
+ """
17
+ Realign the axes of our coordinate system, so they have the following meaning:
18
+
19
+ X-axis: The long axis (the kebab skewer), which we define as passing through the pair of atoms with the greatest separation in our set.
20
+ Y-axis: The middle axis, which we define as perpendicular to the the X-axis (obviously) and passing through the atom that is furthest from the X-axis. Note that the Y-axis only has to pass through one atom; there may not be a corresponding atom on the other side (but there will be if the molecule is symmetrical about the X-axis).
21
+ Z-axis: The short axis, defined as perpendicular to both the X and Y-axes (so we have no choice where this goes).
22
+
23
+ :return: Nothing. The atoms are rearranged in place.
24
+ """
25
+ self.align_X()
26
+ self.align_Y()
27
+ self.align_Z()
28
+
29
+ def get_average_angle(self, thetas):
30
+ """
31
+ Get the average angle from a list of angles.
32
+ """
33
+ average_sin = stats.mean([math.sin(angle) for angle in thetas])
34
+ average_cos = stats.mean([math.cos(angle) for angle in thetas])
35
+
36
+ raw_angle = math.atan(average_sin / average_cos)
37
+
38
+ # Adjust.
39
+ if average_sin > 0 and average_cos > 0:
40
+ return raw_angle
41
+ elif average_cos < 0:
42
+ return raw_angle + math.pi
43
+ else:
44
+ return raw_angle + math.pi * 2
45
+
46
+
47
+ def align_X(self):
48
+ """
49
+ Align the X axis.
50
+
51
+ You do not need to call this method yourself; it is called as part of align_axes().
52
+
53
+ Unlike normal Kebab, we don't align our X axis along the line drawn between our two most separated atoms. Instead, our X axis is defined so as to pass as close to as many points as possible, more accurately passing through the molecule.
54
+ """
55
+ # First get a list of all our coordinates.
56
+ coords = self.get_coordinate_list()
57
+
58
+ # Set our origin so as to be in the middle of all our points.
59
+ self.translate((-mean(coords[0]) , -mean(coords[1]), -mean(coords[2])))
60
+
61
+ # Get an average angle from all our points.
62
+ # Rotate xy coords (along Z axis).
63
+
64
+ theta = self.get_average_angle(([math.atan2(atom.coords[1], atom.coords[0]) for atom in self]))
65
+ self.rotate_XY(theta)
66
+
67
+ # Rotate xz coords (along Y axis).
68
+ # Get our angle again.
69
+
70
+ theta = self.get_average_angle([math.atan2(atom.coords[2], atom.coords[0]) for atom in self])
71
+ self.rotate_XZ(theta)
72
+
73
+ def align_Y(self):
74
+ """
75
+ Align the Y axis.
76
+
77
+ You do not need to call this method yourself; it is called as part of align_axes().
78
+ """
79
+ # Rotate xz coords (along Y axis).
80
+ # Get our angle.
81
+ theta = self.get_average_angle([math.atan2(atom.coords[2], atom.coords[1]) for atom in self])
82
+ self.rotate_YZ(theta)
83
+
84
+ def align_Z(self):
85
+ """
86
+ Align the Z axis.
87
+
88
+ You do not need to call this method yourself; it is called as part of align_axes().
89
+
90
+ In kebab+, this method can reorientate the molecule, as it ensures X is the longest axis, Y is the second longest, and Z is the shortest.
91
+ """
92
+ self.reassign_axes()
93
+
94
+ # # If Z is bigger than Y, rotate.
95
+ # if (self.Z_length > self.Y_length):
96
+ # self.rotate_YZ(math.pi/2)
97
+ #
98
+ # # If Y is bigger than X, rotate.
99
+ # if (self.Y_length > self.X_length):
100
+ # self.rotate_XY(math.pi/2)
101
+ #
102
+ # return
103
+
104
+
105
+
106
+
107
+
108
+ # # Get the length of each axis.
109
+ # axes = [(0, self.get_axis_length(0)), (1, self.get_axis_length(1)), (2, self.get_axis_length(2))]
110
+ # # Sort them in terms of length.
111
+ # axes.sort(key = lambda axis: axis[1], reverse = True)
112
+ # new_axes = (axes[0][0], axes[1][0], axes[2][0])
113
+ # # Rearrange axes.
114
+ # self.swap_axes(new_axes)