psdi-data-conversion 0.0.23__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- psdi_data_conversion/__init__.py +11 -0
- psdi_data_conversion/app.py +242 -0
- psdi_data_conversion/bin/linux/atomsk +0 -0
- psdi_data_conversion/bin/linux/c2x +0 -0
- psdi_data_conversion/bin/mac/atomsk +0 -0
- psdi_data_conversion/bin/mac/c2x +0 -0
- psdi_data_conversion/constants.py +185 -0
- psdi_data_conversion/converter.py +459 -0
- psdi_data_conversion/converters/__init__.py +6 -0
- psdi_data_conversion/converters/atomsk.py +32 -0
- psdi_data_conversion/converters/base.py +702 -0
- psdi_data_conversion/converters/c2x.py +32 -0
- psdi_data_conversion/converters/openbabel.py +239 -0
- psdi_data_conversion/database.py +1064 -0
- psdi_data_conversion/dist.py +87 -0
- psdi_data_conversion/file_io.py +216 -0
- psdi_data_conversion/log_utility.py +241 -0
- psdi_data_conversion/main.py +776 -0
- psdi_data_conversion/scripts/atomsk.sh +32 -0
- psdi_data_conversion/scripts/c2x.sh +26 -0
- psdi_data_conversion/security.py +38 -0
- psdi_data_conversion/static/content/accessibility.htm +254 -0
- psdi_data_conversion/static/content/convert.htm +121 -0
- psdi_data_conversion/static/content/convertato.htm +65 -0
- psdi_data_conversion/static/content/convertc2x.htm +65 -0
- psdi_data_conversion/static/content/documentation.htm +94 -0
- psdi_data_conversion/static/content/feedback.htm +53 -0
- psdi_data_conversion/static/content/header-links.html +8 -0
- psdi_data_conversion/static/content/index-versions/header-links.html +8 -0
- psdi_data_conversion/static/content/index-versions/psdi-common-footer.html +99 -0
- psdi_data_conversion/static/content/index-versions/psdi-common-header.html +28 -0
- psdi_data_conversion/static/content/psdi-common-footer.html +99 -0
- psdi_data_conversion/static/content/psdi-common-header.html +28 -0
- psdi_data_conversion/static/content/report.htm +103 -0
- psdi_data_conversion/static/data/data.json +143940 -0
- psdi_data_conversion/static/img/colormode-toggle-dm.svg +3 -0
- psdi_data_conversion/static/img/colormode-toggle-lm.svg +3 -0
- psdi_data_conversion/static/img/psdi-icon-dark.svg +136 -0
- psdi_data_conversion/static/img/psdi-icon-light.svg +208 -0
- psdi_data_conversion/static/img/psdi-logo-darktext.png +0 -0
- psdi_data_conversion/static/img/psdi-logo-lighttext.png +0 -0
- psdi_data_conversion/static/img/social-logo-bluesky-black.svg +4 -0
- psdi_data_conversion/static/img/social-logo-bluesky-white.svg +4 -0
- psdi_data_conversion/static/img/social-logo-instagram-black.svg +1 -0
- psdi_data_conversion/static/img/social-logo-instagram-white.svg +1 -0
- psdi_data_conversion/static/img/social-logo-linkedin-black.png +0 -0
- psdi_data_conversion/static/img/social-logo-linkedin-white.png +0 -0
- psdi_data_conversion/static/img/social-logo-mastodon-black.svg +4 -0
- psdi_data_conversion/static/img/social-logo-mastodon-white.svg +4 -0
- psdi_data_conversion/static/img/social-logo-x-black.svg +3 -0
- psdi_data_conversion/static/img/social-logo-x-white.svg +3 -0
- psdi_data_conversion/static/img/social-logo-youtube-black.png +0 -0
- psdi_data_conversion/static/img/social-logo-youtube-white.png +0 -0
- psdi_data_conversion/static/img/ukri-epsr-logo-darktext.png +0 -0
- psdi_data_conversion/static/img/ukri-epsr-logo-lighttext.png +0 -0
- psdi_data_conversion/static/img/ukri-logo-darktext.png +0 -0
- psdi_data_conversion/static/img/ukri-logo-lighttext.png +0 -0
- psdi_data_conversion/static/javascript/accessibility.js +196 -0
- psdi_data_conversion/static/javascript/common.js +42 -0
- psdi_data_conversion/static/javascript/convert.js +296 -0
- psdi_data_conversion/static/javascript/convert_common.js +252 -0
- psdi_data_conversion/static/javascript/convertato.js +107 -0
- psdi_data_conversion/static/javascript/convertc2x.js +107 -0
- psdi_data_conversion/static/javascript/data.js +176 -0
- psdi_data_conversion/static/javascript/format.js +611 -0
- psdi_data_conversion/static/javascript/load_accessibility.js +89 -0
- psdi_data_conversion/static/javascript/psdi-common.js +177 -0
- psdi_data_conversion/static/javascript/report.js +381 -0
- psdi_data_conversion/static/styles/format.css +147 -0
- psdi_data_conversion/static/styles/psdi-common.css +705 -0
- psdi_data_conversion/templates/index.htm +114 -0
- psdi_data_conversion/testing/__init__.py +5 -0
- psdi_data_conversion/testing/constants.py +12 -0
- psdi_data_conversion/testing/conversion_callbacks.py +394 -0
- psdi_data_conversion/testing/conversion_test_specs.py +208 -0
- psdi_data_conversion/testing/utils.py +522 -0
- psdi_data_conversion-0.0.23.dist-info/METADATA +663 -0
- psdi_data_conversion-0.0.23.dist-info/RECORD +81 -0
- psdi_data_conversion-0.0.23.dist-info/WHEEL +4 -0
- psdi_data_conversion-0.0.23.dist-info/entry_points.txt +2 -0
- psdi_data_conversion-0.0.23.dist-info/licenses/LICENSE +201 -0
@@ -0,0 +1,459 @@
|
|
1
|
+
"""@file psdi-data-conversion/psdi_data_conversion/converter.py
|
2
|
+
|
3
|
+
Created 2024-12-10 by Bryan Gillis.
|
4
|
+
|
5
|
+
Class and functions to perform file conversion
|
6
|
+
"""
|
7
|
+
|
8
|
+
from dataclasses import dataclass, field
|
9
|
+
import os
|
10
|
+
import importlib
|
11
|
+
import sys
|
12
|
+
from tempfile import TemporaryDirectory
|
13
|
+
import traceback
|
14
|
+
from typing import Any, Callable, NamedTuple
|
15
|
+
from psdi_data_conversion import constants as const
|
16
|
+
from psdi_data_conversion.converters import base
|
17
|
+
|
18
|
+
import glob
|
19
|
+
|
20
|
+
from psdi_data_conversion.file_io import (is_archive, is_supported_archive, pack_zip_or_tar, split_archive_ext,
|
21
|
+
unpack_zip_or_tar)
|
22
|
+
|
23
|
+
# Find all modules for specific converters
|
24
|
+
l_converter_modules = glob.glob(os.path.dirname(base.__file__) + "/*.py")
|
25
|
+
|
26
|
+
try:
|
27
|
+
|
28
|
+
class NameAndClass(NamedTuple):
|
29
|
+
name: str
|
30
|
+
converter_class: type[base.FileConverter]
|
31
|
+
|
32
|
+
def get_converter_name_and_class(module_path: str) -> NameAndClass | None:
|
33
|
+
|
34
|
+
module_name = os.path.splitext(os.path.basename(module_path))[0]
|
35
|
+
|
36
|
+
# Skip the base module and the package __init__
|
37
|
+
if module_name in ("base", "__init__"):
|
38
|
+
return None
|
39
|
+
|
40
|
+
package_name = "psdi_data_conversion.converters"
|
41
|
+
module = importlib.import_module(f".{module_name}", package=package_name)
|
42
|
+
|
43
|
+
# Check that the module defines a converter
|
44
|
+
if not hasattr(module, "converter") or not issubclass(module.converter, base.FileConverter):
|
45
|
+
print(f"ERROR: Module `{module_name}` in package `{package_name}` fails to define a converter to the "
|
46
|
+
"variable `converter` which is a subclass of `FileConverter`.", file=sys.stderr)
|
47
|
+
return None
|
48
|
+
|
49
|
+
converter_class = module.converter
|
50
|
+
|
51
|
+
name = converter_class.name
|
52
|
+
|
53
|
+
return NameAndClass(name, converter_class)
|
54
|
+
|
55
|
+
# Get a list of all converter names and FileConverter subclasses
|
56
|
+
l_converter_names_and_classes = [get_converter_name_and_class(module_name) for
|
57
|
+
module_name in l_converter_modules]
|
58
|
+
# Remove the None entry from the list, which corresponds to the 'base' module
|
59
|
+
l_converter_names_and_classes = [x for x in l_converter_names_and_classes if x is not None]
|
60
|
+
|
61
|
+
# Make constant dict and list of supported converters
|
62
|
+
D_SUPPORTED_CONVERTERS: dict[str, type[base.FileConverter]] = dict(l_converter_names_and_classes)
|
63
|
+
L_SUPPORTED_CONVERTERS: list[str] = [name for name in D_SUPPORTED_CONVERTERS.keys()]
|
64
|
+
|
65
|
+
# Make constant dict and list of registered converters
|
66
|
+
D_REGISTERED_CONVERTERS = {converter_name: converter_class for converter_name, converter_class in
|
67
|
+
D_SUPPORTED_CONVERTERS.items() if converter_class.can_be_registered()}
|
68
|
+
L_REGISTERED_CONVERTERS: list[str] = [name for name in D_REGISTERED_CONVERTERS.keys()]
|
69
|
+
|
70
|
+
# Make dicts of flags, options, and args (combined flags and options) for each converter
|
71
|
+
_d_converter_flags, _d_converter_options, _d_converter_args = {}, {}, {}
|
72
|
+
for name, converter_class in D_SUPPORTED_CONVERTERS.items():
|
73
|
+
l_flags = converter_class.allowed_flags if converter_class.allowed_flags else ()
|
74
|
+
l_options = converter_class.allowed_options if converter_class.allowed_options else ()
|
75
|
+
_d_converter_flags[name] = l_flags
|
76
|
+
_d_converter_options[name] = l_options
|
77
|
+
_d_converter_args[name] = (*l_flags, *l_options)
|
78
|
+
D_CONVERTER_FLAGS: dict[str, tuple[tuple[str, dict[str, Any], Callable]]] = _d_converter_flags
|
79
|
+
D_CONVERTER_OPTIONS: dict[str, tuple[tuple[str, dict[str, Any], Callable]]] = _d_converter_options
|
80
|
+
D_CONVERTER_ARGS: dict[str, tuple[tuple[str, dict[str, Any], Callable]]] = _d_converter_args
|
81
|
+
|
82
|
+
except Exception:
|
83
|
+
print(f"ERROR: Failed to register converters. Exception was: \n{traceback.format_exc()}", file=sys.stderr)
|
84
|
+
D_SUPPORTED_CONVERTERS: dict[str, type[base.FileConverter]] = {}
|
85
|
+
L_SUPPORTED_CONVERTERS: list[str] = []
|
86
|
+
D_REGISTERED_CONVERTERS: dict[str, type[base.FileConverter]] = {}
|
87
|
+
L_REGISTERED_CONVERTERS: list[str] = []
|
88
|
+
D_CONVERTER_FLAGS = {}
|
89
|
+
D_CONVERTER_OPTIONS = {}
|
90
|
+
D_CONVERTER_ARGS = {}
|
91
|
+
|
92
|
+
|
93
|
+
def get_converter(*args, name=const.CONVERTER_DEFAULT, **converter_kwargs) -> base.FileConverter:
|
94
|
+
"""Get a FileConverter of the proper subclass for the requested converter type
|
95
|
+
|
96
|
+
Parameters
|
97
|
+
----------
|
98
|
+
filename : str
|
99
|
+
The filename of the input file to be converted, either relative to current directory or fully-qualified
|
100
|
+
to_format : str
|
101
|
+
The desired format to convert to, as the file extension (e.g. "cif")
|
102
|
+
from_format : str | None
|
103
|
+
The format to convert from, as the file extension (e.g. "pdb"). If None is provided (default), will be
|
104
|
+
determined from the extension of `filename`
|
105
|
+
name : str
|
106
|
+
The desired converter type, by default 'Open Babel'
|
107
|
+
data : dict[str | Any] | None
|
108
|
+
A dict of any other data needed by a converter or for extra logging information, default empty dict
|
109
|
+
abort_callback : Callable[[int], None]
|
110
|
+
Function to be called if the conversion hits an error and must be aborted, default `abort_raise`, which
|
111
|
+
raises an appropriate exception
|
112
|
+
use_envvars : bool
|
113
|
+
If set to True, environment variables will be checked for any that set options for this class and used,
|
114
|
+
default False
|
115
|
+
upload_dir : str
|
116
|
+
The location of input files relative to the current directory
|
117
|
+
download_dir : str
|
118
|
+
The location of output files relative to the current directory
|
119
|
+
max_file_size : float
|
120
|
+
The maximum allowed file size for input/output files, in MB, default 1 MB. If 0, will be unlimited
|
121
|
+
no_check : bool
|
122
|
+
If False (default), will check at setup whether or not a conversion between the desired file formats is
|
123
|
+
supported with the specified converter
|
124
|
+
log_file : str | None
|
125
|
+
If provided, all logging will go to a single file or stream. Otherwise, logs will be split up among multiple
|
126
|
+
files for server-style logging.
|
127
|
+
log_mode : str
|
128
|
+
How logs should be stores. Allowed values are:
|
129
|
+
- 'full' - Multi-file logging, only recommended when running as a public web app
|
130
|
+
- 'simple' - Logs saved to one file
|
131
|
+
- 'stdout' - Output logs and errors only to stdout
|
132
|
+
- 'none' - Output only errors to stdout
|
133
|
+
log_level : int | None
|
134
|
+
The level to log output at. If None (default), the level will depend on the chosen `log_mode`:
|
135
|
+
- 'full' or 'simple': INFO
|
136
|
+
- 'stdout' - INFO to stdout, no logging to file
|
137
|
+
- 'none' - ERROR to stdout, no logging to file
|
138
|
+
refresh_local_log : bool
|
139
|
+
If True, the local log generated from this run will be overwritten. If False it will be appended to. Default
|
140
|
+
True
|
141
|
+
delete_input : bool
|
142
|
+
Whether or not to delete input files after conversion, default False
|
143
|
+
|
144
|
+
Returns
|
145
|
+
-------
|
146
|
+
FileConverter
|
147
|
+
A subclassed FileConverter for the desired converter type
|
148
|
+
|
149
|
+
Raises
|
150
|
+
------
|
151
|
+
FileConverterInputException
|
152
|
+
If the converter isn't recognized or there's some other issue with the input
|
153
|
+
"""
|
154
|
+
if name not in L_REGISTERED_CONVERTERS:
|
155
|
+
raise base.FileConverterInputException(const.ERR_CONVERTER_NOT_RECOGNISED.format(name) +
|
156
|
+
f"{L_REGISTERED_CONVERTERS}")
|
157
|
+
converter_class = D_REGISTERED_CONVERTERS[name]
|
158
|
+
|
159
|
+
return converter_class(*args, **converter_kwargs)
|
160
|
+
|
161
|
+
|
162
|
+
@dataclass
|
163
|
+
class FileConversionRunResult:
|
164
|
+
"""An object of this class will be output by the `run_converter` function on success to provide key info on
|
165
|
+
the files created
|
166
|
+
"""
|
167
|
+
# Lists of results from each individual conversion
|
168
|
+
l_output_filenames: list[str] = field(default_factory=list)
|
169
|
+
l_log_filenames: list[str] = field(default_factory=list)
|
170
|
+
l_in_size: list[int] = field(default_factory=list)
|
171
|
+
l_out_size: list[int] = field(default_factory=list)
|
172
|
+
status_code: int = 0
|
173
|
+
|
174
|
+
# If only one conversion was performed, these variables will hold the results for that conversion. Otherwise they
|
175
|
+
# will point to summary files / hold the combined size
|
176
|
+
output_filename: str | None = None
|
177
|
+
log_filename: str | None = None
|
178
|
+
in_size: int = field(init=False)
|
179
|
+
out_size: int = field(init=False)
|
180
|
+
|
181
|
+
def __post_init__(self):
|
182
|
+
"""Calculate appropriate values where possible - in_size and out_size are the sum of individual sizes, and if
|
183
|
+
only one run was performed, we can set the output and log filenames to the filenames from that one run
|
184
|
+
"""
|
185
|
+
if self.output_filename is None and len(self.l_output_filenames) == 1:
|
186
|
+
self.output_filename = self.l_output_filenames[0]
|
187
|
+
if self.log_filename is None and len(self.l_log_filenames) == 1:
|
188
|
+
self.log_filename = self.l_log_filenames[0]
|
189
|
+
|
190
|
+
self.in_size = sum(self.l_in_size)
|
191
|
+
self.out_size = sum(self.l_out_size)
|
192
|
+
|
193
|
+
|
194
|
+
def check_from_format(filename: str,
|
195
|
+
from_format: str,
|
196
|
+
strict=False) -> bool:
|
197
|
+
"""Check that the filename for an input file ends with the expected extension
|
198
|
+
|
199
|
+
Parameters
|
200
|
+
----------
|
201
|
+
filename : str
|
202
|
+
The filename
|
203
|
+
from_format : str
|
204
|
+
The expected format (extension)
|
205
|
+
strict : bool, optional
|
206
|
+
If True, will raise an exception on failure. Otherwise will print a warning and return False
|
207
|
+
|
208
|
+
Returns
|
209
|
+
-------
|
210
|
+
bool
|
211
|
+
Whether the file ends with the expected extension or not
|
212
|
+
|
213
|
+
Raises
|
214
|
+
------
|
215
|
+
base.FileConverterInputException
|
216
|
+
If `strict` is True and the the file does not end with the expected exception
|
217
|
+
"""
|
218
|
+
|
219
|
+
# Silently make sure `from_format` starts with a dot
|
220
|
+
if not from_format.startswith("."):
|
221
|
+
from_format = f".{from_format}"
|
222
|
+
|
223
|
+
if filename.endswith(from_format):
|
224
|
+
return True
|
225
|
+
|
226
|
+
msg = const.ERR_WRONG_EXTENSIONS.format(file=os.path.basename(filename), ext=from_format)
|
227
|
+
|
228
|
+
if strict:
|
229
|
+
raise base.FileConverterInputException(msg)
|
230
|
+
|
231
|
+
print(f"WARNING: {msg}", file=sys.stderr)
|
232
|
+
|
233
|
+
return False
|
234
|
+
|
235
|
+
|
236
|
+
def run_converter(filename: str,
|
237
|
+
to_format: str,
|
238
|
+
*args,
|
239
|
+
from_format: str | None = None,
|
240
|
+
download_dir=const.DEFAULT_DOWNLOAD_DIR,
|
241
|
+
max_file_size=const.DEFAULT_MAX_FILE_SIZE,
|
242
|
+
log_file: str | None = None,
|
243
|
+
log_mode=const.LOG_SIMPLE,
|
244
|
+
strict=False,
|
245
|
+
archive_output=True,
|
246
|
+
**converter_kwargs) -> FileConversionRunResult:
|
247
|
+
"""Shortcut to create and run a FileConverter in one step
|
248
|
+
|
249
|
+
Parameters
|
250
|
+
----------
|
251
|
+
filename : str
|
252
|
+
Either the filename of the input file to be converted or of an archive file containing files to be converted
|
253
|
+
(zip and tar supported), either relative to current directory or fully-qualified. If an archive is provided,
|
254
|
+
the contents will be converted and then packed into an archive of the same type
|
255
|
+
to_format : str
|
256
|
+
The desired format to convert to, as the file extension (e.g. "cif")
|
257
|
+
from_format : str | None
|
258
|
+
The format to convert from, as the file extension (e.g. "pdb"). If None is provided (default), will be
|
259
|
+
determined from the extension of `filename` if it's a simple file, or the contained files if `filename` is an
|
260
|
+
archive file
|
261
|
+
name : str
|
262
|
+
The desired converter type, by default 'Open Babel'
|
263
|
+
data : dict[str | Any] | None
|
264
|
+
A dict of any other data needed by a converter or for extra logging information, default empty dict
|
265
|
+
abort_callback : Callable[[int], None]
|
266
|
+
Function to be called if the conversion hits an error and must be aborted, default `abort_raise`, which
|
267
|
+
raises an appropriate exception
|
268
|
+
use_envvars : bool
|
269
|
+
If set to True, environment variables will be checked for any that set options for this class and used,
|
270
|
+
default False
|
271
|
+
upload_dir : str
|
272
|
+
The location of input files relative to the current directory
|
273
|
+
download_dir : str
|
274
|
+
The location of output files relative to the current directory
|
275
|
+
strict : bool
|
276
|
+
If True and `from_format` is not None, will fail if any input file has the wrong extension (including files
|
277
|
+
within archives, but not the archives themselves). Otherwise, will only print a warning in this case
|
278
|
+
archive_output : bool
|
279
|
+
If True (default) and the input file is an archive (i.e. zip or tar file), the converted files will be archived
|
280
|
+
into a file of the same format, their logs will be combined into a single log, and the converted files and
|
281
|
+
individual logs will be deleted
|
282
|
+
max_file_size : float
|
283
|
+
The maximum allowed file size for input/output files, in MB, default 1 MB. If 0, will be unlimited. If an
|
284
|
+
archive of files is provided, this will apply to the total of all files contained in it
|
285
|
+
no_check : bool
|
286
|
+
If False (default), will check at setup whether or not a conversion between the desired file formats is
|
287
|
+
supported with the specified converter
|
288
|
+
log_file : str | None
|
289
|
+
If provided, all logging will go to a single file or stream. Otherwise, logs will be split up among multiple
|
290
|
+
files for server-style logging.
|
291
|
+
log_mode : str
|
292
|
+
How logs should be stores. Allowed values are:
|
293
|
+
- 'full' - Multi-file logging, only recommended when running as a public web app
|
294
|
+
- 'simple' - Logs saved to one file
|
295
|
+
- 'stdout' - Output logs and errors only to stdout
|
296
|
+
- 'none' - Output only errors to stdout
|
297
|
+
log_level : int | None
|
298
|
+
The level to log output at. If None (default), the level will depend on the chosen `log_mode`:
|
299
|
+
- 'full' or 'simple': INFO
|
300
|
+
- 'stdout' - INFO to stdout, no logging to file
|
301
|
+
- 'none' - ERROR to stdout, no logging to file
|
302
|
+
refresh_local_log : bool
|
303
|
+
If True, the local log generated from this run will be overwritten. If False it will be appended to. Default
|
304
|
+
True
|
305
|
+
delete_input : bool
|
306
|
+
Whether or not to delete input files after conversion, default False
|
307
|
+
|
308
|
+
Returns
|
309
|
+
-------
|
310
|
+
FileConversionRunResult
|
311
|
+
An object containing the filenames of output files and logs created, and input/output file sizes
|
312
|
+
|
313
|
+
Raises
|
314
|
+
------
|
315
|
+
FileConverterInputException
|
316
|
+
If the converter isn't recognized or there's some other issue with the input
|
317
|
+
FileConverterAbortException
|
318
|
+
If something goes wrong during the conversion process
|
319
|
+
"""
|
320
|
+
|
321
|
+
# Set the log file if it was unset - note that in server logging mode, this value won't be used within the
|
322
|
+
# converter class, so it needs to be set up here to match what will be set up there
|
323
|
+
if log_file is None:
|
324
|
+
base_filename = os.path.basename(split_archive_ext(filename)[0])
|
325
|
+
log_file = os.path.join(download_dir, base_filename + const.OUTPUT_LOG_EXT)
|
326
|
+
|
327
|
+
# Check if the filename is for an archive file, and handle appropriately
|
328
|
+
|
329
|
+
l_run_output: list[base.FileConversionResult] = []
|
330
|
+
|
331
|
+
file_is_archive = is_archive(filename)
|
332
|
+
|
333
|
+
# Status code for the overall success of the process
|
334
|
+
status_code = 0
|
335
|
+
|
336
|
+
if not file_is_archive:
|
337
|
+
# Not an archive, so just get and run the converter straightforwardly
|
338
|
+
if from_format is not None:
|
339
|
+
check_from_format(filename, from_format, strict=strict)
|
340
|
+
l_run_output.append(get_converter(filename,
|
341
|
+
to_format,
|
342
|
+
*args,
|
343
|
+
from_format=from_format,
|
344
|
+
download_dir=download_dir,
|
345
|
+
max_file_size=max_file_size,
|
346
|
+
log_file=log_file,
|
347
|
+
log_mode=log_mode,
|
348
|
+
**converter_kwargs).run())
|
349
|
+
|
350
|
+
elif not is_supported_archive(filename):
|
351
|
+
raise base.FileConverterInputException(f"{filename} is an unsupported archive type. Supported types are: "
|
352
|
+
f"{const.D_SUPPORTED_ARCHIVE_FORMATS}")
|
353
|
+
|
354
|
+
else:
|
355
|
+
# The filename is of a supported archive type. Make a temporary directory to extract its contents
|
356
|
+
# to, then run the converter on each file extracted
|
357
|
+
with TemporaryDirectory() as extract_dir:
|
358
|
+
l_filenames = unpack_zip_or_tar(filename, extract_dir=extract_dir)
|
359
|
+
|
360
|
+
# Check for no files in archive
|
361
|
+
if len(l_filenames) == 0:
|
362
|
+
raise base.FileConverterInputException(const.ERR_EMPTY_ARCHIVE)
|
363
|
+
|
364
|
+
# First check for files of invalid type, to avoid converting if one will cause a failure
|
365
|
+
if from_format is not None:
|
366
|
+
for extracted_filename in l_filenames:
|
367
|
+
check_from_format(extracted_filename, from_format, strict=strict)
|
368
|
+
|
369
|
+
# Keep track of the file size budget
|
370
|
+
remaining_file_size = max_file_size
|
371
|
+
|
372
|
+
for extracted_filename in l_filenames:
|
373
|
+
# Make a filename for the log for this particular conversion
|
374
|
+
individual_log_file = os.path.join(extract_dir,
|
375
|
+
os.path.basename(os.path.splitext(extracted_filename)[0]) +
|
376
|
+
const.OUTPUT_LOG_EXT)
|
377
|
+
|
378
|
+
# If the log mode is "full", set it to "full-force" for the individual runs to force use of the log file
|
379
|
+
# name we set up for it
|
380
|
+
individual_log_mode = log_mode if log_mode != const.LOG_FULL else const.LOG_FULL_FORCE
|
381
|
+
|
382
|
+
try:
|
383
|
+
individual_run_output = get_converter(extracted_filename,
|
384
|
+
to_format,
|
385
|
+
*args,
|
386
|
+
from_format=from_format,
|
387
|
+
download_dir=download_dir,
|
388
|
+
log_file=individual_log_file,
|
389
|
+
log_mode=individual_log_mode,
|
390
|
+
max_file_size=remaining_file_size,
|
391
|
+
**converter_kwargs).run()
|
392
|
+
except base.FileConverterAbortException as e:
|
393
|
+
# If the run fails, create a run output object to indicate that
|
394
|
+
individual_run_output = base.FileConversionResult(log_filename=individual_log_file,
|
395
|
+
status_code=e.status_code)
|
396
|
+
status_code = max((status_code, e.status_code))
|
397
|
+
# If we specifically have a failure due to the size being exceeded, stop here, since no further
|
398
|
+
# runs are allowed
|
399
|
+
if isinstance(e, base.FileConverterSizeException):
|
400
|
+
l_run_output.append(individual_run_output)
|
401
|
+
break
|
402
|
+
|
403
|
+
l_run_output.append(individual_run_output)
|
404
|
+
|
405
|
+
# Reduce the file size limit by how much was used here
|
406
|
+
remaining_file_size -= max((individual_run_output.in_size, individual_run_output.out_size))
|
407
|
+
|
408
|
+
# Combine the output logs into a single log
|
409
|
+
with open(log_file, "w") as fo:
|
410
|
+
for individual_run_output in l_run_output:
|
411
|
+
individual_log_filename = individual_run_output.log_filename
|
412
|
+
if not os.path.exists(individual_log_filename):
|
413
|
+
raise base.FileConverterException(f"Expected log file '{individual_log_filename}' cannot be "
|
414
|
+
"found")
|
415
|
+
fo.write(open(individual_log_filename, "r").read() + "\n")
|
416
|
+
os.remove(individual_log_filename)
|
417
|
+
|
418
|
+
# Combine the possibly-multiple FileConversionResults objects into a single FileConversionRunResult
|
419
|
+
run_output = FileConversionRunResult(*zip(*[(x.output_filename,
|
420
|
+
x.log_filename,
|
421
|
+
x.in_size,
|
422
|
+
x.out_size) for x in l_run_output]),
|
423
|
+
log_filename=log_file,
|
424
|
+
status_code=status_code)
|
425
|
+
|
426
|
+
if file_is_archive and archive_output:
|
427
|
+
# If we get here, the file is an archive and we want to archive the output
|
428
|
+
|
429
|
+
# Prune any unsuccessful runs from the list of output files
|
430
|
+
l_successful_files = [x for x in run_output.l_output_filenames if x is not None]
|
431
|
+
|
432
|
+
if len(l_successful_files) > 0:
|
433
|
+
|
434
|
+
# Determine the directory for the output from the output filenames
|
435
|
+
downloads_dir = os.path.split(l_successful_files[0])[0]
|
436
|
+
|
437
|
+
# Create new names for the archive file and log file
|
438
|
+
filename_base, ext = split_archive_ext(os.path.basename(filename))
|
439
|
+
run_output.output_filename = os.path.join(downloads_dir, f"{filename_base}-{to_format}{ext}")
|
440
|
+
|
441
|
+
# Pack the output files into an archive, cleaning them up afterwards
|
442
|
+
pack_zip_or_tar(run_output.output_filename,
|
443
|
+
l_successful_files,
|
444
|
+
cleanup=True)
|
445
|
+
|
446
|
+
# If the run was ultimately unsuccessful, raise an exception now, referencing the output log and including
|
447
|
+
# error lines in it
|
448
|
+
if status_code:
|
449
|
+
msg = const.ERR_CONVERSION_FAILED.format(run_output.log_filename)
|
450
|
+
l_output_log_lines = open(run_output.log_filename, "r").read().splitlines()
|
451
|
+
l_error_lines = [line for line in l_output_log_lines if "ERROR" in line]
|
452
|
+
msg += "\n".join(l_error_lines)
|
453
|
+
if status_code == const.STATUS_CODE_SIZE:
|
454
|
+
exception_class = base.FileConverterSizeException
|
455
|
+
else:
|
456
|
+
exception_class = base.FileConverterAbortException
|
457
|
+
raise exception_class(status_code, msg)
|
458
|
+
|
459
|
+
return run_output
|
@@ -0,0 +1,32 @@
|
|
1
|
+
"""@file psdi_data_conversion/converters/atomsk.py
|
2
|
+
|
3
|
+
Created 2025-01-23 by Bryan Gillis.
|
4
|
+
|
5
|
+
Atomsk FileConverter
|
6
|
+
"""
|
7
|
+
|
8
|
+
from psdi_data_conversion.converters.base import ScriptFileConverter
|
9
|
+
|
10
|
+
CONVERTER_ATO = 'Atomsk'
|
11
|
+
|
12
|
+
|
13
|
+
class AtoFileConverter(ScriptFileConverter):
|
14
|
+
"""File Converter specialized to use Atomsk for conversions
|
15
|
+
"""
|
16
|
+
|
17
|
+
name = CONVERTER_ATO
|
18
|
+
script = "atomsk.sh"
|
19
|
+
required_bin = "atomsk"
|
20
|
+
info = ("Atomsk binaries compiled for 64-bit Linux and MacOS systems are distributed with this package. It may be "
|
21
|
+
"registered on other systems by compiling it locally and adding the compiled 'atomsk' binary (with this "
|
22
|
+
"exact name - rename it or make a symbolic link to it if necessary) to your $PATH.\n"
|
23
|
+
"\n"
|
24
|
+
"Atomsk is licensed under GPLv3, the full text of which may be found at "
|
25
|
+
"https://www.gnu.org/licenses/gpl-3.0.en.html. Its binaries are redistributed here under the terms of this "
|
26
|
+
"license, and any further redistribution must also follow these terms. Its corresponding source code "
|
27
|
+
"may be found at https://github.com/pierrehirel/atomsk/")
|
28
|
+
|
29
|
+
|
30
|
+
# Assign this converter to the `converter` variable - this lets the psdi_data_conversion.converter module detect and
|
31
|
+
# register it, making it available for use by the command-line script, python library, and web app
|
32
|
+
converter = AtoFileConverter
|