psdi-data-conversion 0.0.23__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- psdi_data_conversion/__init__.py +11 -0
- psdi_data_conversion/app.py +242 -0
- psdi_data_conversion/bin/linux/atomsk +0 -0
- psdi_data_conversion/bin/linux/c2x +0 -0
- psdi_data_conversion/bin/mac/atomsk +0 -0
- psdi_data_conversion/bin/mac/c2x +0 -0
- psdi_data_conversion/constants.py +185 -0
- psdi_data_conversion/converter.py +459 -0
- psdi_data_conversion/converters/__init__.py +6 -0
- psdi_data_conversion/converters/atomsk.py +32 -0
- psdi_data_conversion/converters/base.py +702 -0
- psdi_data_conversion/converters/c2x.py +32 -0
- psdi_data_conversion/converters/openbabel.py +239 -0
- psdi_data_conversion/database.py +1064 -0
- psdi_data_conversion/dist.py +87 -0
- psdi_data_conversion/file_io.py +216 -0
- psdi_data_conversion/log_utility.py +241 -0
- psdi_data_conversion/main.py +776 -0
- psdi_data_conversion/scripts/atomsk.sh +32 -0
- psdi_data_conversion/scripts/c2x.sh +26 -0
- psdi_data_conversion/security.py +38 -0
- psdi_data_conversion/static/content/accessibility.htm +254 -0
- psdi_data_conversion/static/content/convert.htm +121 -0
- psdi_data_conversion/static/content/convertato.htm +65 -0
- psdi_data_conversion/static/content/convertc2x.htm +65 -0
- psdi_data_conversion/static/content/documentation.htm +94 -0
- psdi_data_conversion/static/content/feedback.htm +53 -0
- psdi_data_conversion/static/content/header-links.html +8 -0
- psdi_data_conversion/static/content/index-versions/header-links.html +8 -0
- psdi_data_conversion/static/content/index-versions/psdi-common-footer.html +99 -0
- psdi_data_conversion/static/content/index-versions/psdi-common-header.html +28 -0
- psdi_data_conversion/static/content/psdi-common-footer.html +99 -0
- psdi_data_conversion/static/content/psdi-common-header.html +28 -0
- psdi_data_conversion/static/content/report.htm +103 -0
- psdi_data_conversion/static/data/data.json +143940 -0
- psdi_data_conversion/static/img/colormode-toggle-dm.svg +3 -0
- psdi_data_conversion/static/img/colormode-toggle-lm.svg +3 -0
- psdi_data_conversion/static/img/psdi-icon-dark.svg +136 -0
- psdi_data_conversion/static/img/psdi-icon-light.svg +208 -0
- psdi_data_conversion/static/img/psdi-logo-darktext.png +0 -0
- psdi_data_conversion/static/img/psdi-logo-lighttext.png +0 -0
- psdi_data_conversion/static/img/social-logo-bluesky-black.svg +4 -0
- psdi_data_conversion/static/img/social-logo-bluesky-white.svg +4 -0
- psdi_data_conversion/static/img/social-logo-instagram-black.svg +1 -0
- psdi_data_conversion/static/img/social-logo-instagram-white.svg +1 -0
- psdi_data_conversion/static/img/social-logo-linkedin-black.png +0 -0
- psdi_data_conversion/static/img/social-logo-linkedin-white.png +0 -0
- psdi_data_conversion/static/img/social-logo-mastodon-black.svg +4 -0
- psdi_data_conversion/static/img/social-logo-mastodon-white.svg +4 -0
- psdi_data_conversion/static/img/social-logo-x-black.svg +3 -0
- psdi_data_conversion/static/img/social-logo-x-white.svg +3 -0
- psdi_data_conversion/static/img/social-logo-youtube-black.png +0 -0
- psdi_data_conversion/static/img/social-logo-youtube-white.png +0 -0
- psdi_data_conversion/static/img/ukri-epsr-logo-darktext.png +0 -0
- psdi_data_conversion/static/img/ukri-epsr-logo-lighttext.png +0 -0
- psdi_data_conversion/static/img/ukri-logo-darktext.png +0 -0
- psdi_data_conversion/static/img/ukri-logo-lighttext.png +0 -0
- psdi_data_conversion/static/javascript/accessibility.js +196 -0
- psdi_data_conversion/static/javascript/common.js +42 -0
- psdi_data_conversion/static/javascript/convert.js +296 -0
- psdi_data_conversion/static/javascript/convert_common.js +252 -0
- psdi_data_conversion/static/javascript/convertato.js +107 -0
- psdi_data_conversion/static/javascript/convertc2x.js +107 -0
- psdi_data_conversion/static/javascript/data.js +176 -0
- psdi_data_conversion/static/javascript/format.js +611 -0
- psdi_data_conversion/static/javascript/load_accessibility.js +89 -0
- psdi_data_conversion/static/javascript/psdi-common.js +177 -0
- psdi_data_conversion/static/javascript/report.js +381 -0
- psdi_data_conversion/static/styles/format.css +147 -0
- psdi_data_conversion/static/styles/psdi-common.css +705 -0
- psdi_data_conversion/templates/index.htm +114 -0
- psdi_data_conversion/testing/__init__.py +5 -0
- psdi_data_conversion/testing/constants.py +12 -0
- psdi_data_conversion/testing/conversion_callbacks.py +394 -0
- psdi_data_conversion/testing/conversion_test_specs.py +208 -0
- psdi_data_conversion/testing/utils.py +522 -0
- psdi_data_conversion-0.0.23.dist-info/METADATA +663 -0
- psdi_data_conversion-0.0.23.dist-info/RECORD +81 -0
- psdi_data_conversion-0.0.23.dist-info/WHEEL +4 -0
- psdi_data_conversion-0.0.23.dist-info/entry_points.txt +2 -0
- psdi_data_conversion-0.0.23.dist-info/licenses/LICENSE +201 -0
@@ -0,0 +1,702 @@
|
|
1
|
+
"""@file psdi_data_conversion/converters/base.py
|
2
|
+
|
3
|
+
Created 2025-01-23 by Bryan Gillis.
|
4
|
+
|
5
|
+
Base class and information for file format converters
|
6
|
+
"""
|
7
|
+
|
8
|
+
|
9
|
+
from copy import deepcopy
|
10
|
+
from dataclasses import dataclass
|
11
|
+
import logging
|
12
|
+
from collections.abc import Callable
|
13
|
+
import os
|
14
|
+
import subprocess
|
15
|
+
import abc
|
16
|
+
|
17
|
+
import sys
|
18
|
+
import traceback
|
19
|
+
from typing import Any
|
20
|
+
|
21
|
+
from psdi_data_conversion import constants as const, log_utility
|
22
|
+
from psdi_data_conversion.dist import bin_exists, get_bin_path, get_dist
|
23
|
+
from psdi_data_conversion.security import SAFE_STRING_RE, string_is_safe
|
24
|
+
|
25
|
+
try:
|
26
|
+
# werkzeug is installed in the optional dependency Flask. It's only used here to recognize an exception type,
|
27
|
+
# and if Flask isn't installed, that exception will never be raised, so we can just replace it with None and later
|
28
|
+
# not try to catch it if werkzeug isn't found
|
29
|
+
from werkzeug.exceptions import HTTPException
|
30
|
+
except ImportError:
|
31
|
+
HTTPException = None
|
32
|
+
|
33
|
+
|
34
|
+
class FileConverterException(RuntimeError):
|
35
|
+
"""Exception class to represent any runtime error encountered by this package.
|
36
|
+
"""
|
37
|
+
pass
|
38
|
+
|
39
|
+
|
40
|
+
class FileConverterAbortException(FileConverterException):
|
41
|
+
"""Class representing an exception triggered by a call to abort a file conversion
|
42
|
+
"""
|
43
|
+
|
44
|
+
def __init__(self,
|
45
|
+
status_code: int,
|
46
|
+
*args,
|
47
|
+
**kwargs):
|
48
|
+
super().__init__(*args, **kwargs)
|
49
|
+
self.status_code = status_code
|
50
|
+
|
51
|
+
|
52
|
+
class FileConverterSizeException(FileConverterAbortException):
|
53
|
+
"""Class representing an exception triggered by the maximum size being exceeded
|
54
|
+
"""
|
55
|
+
|
56
|
+
def __init__(self,
|
57
|
+
*args,
|
58
|
+
in_size: int | None = None,
|
59
|
+
out_size: int | None = None,
|
60
|
+
max_file_size: int | None = None,
|
61
|
+
**kwargs):
|
62
|
+
super().__init__(*args, **kwargs)
|
63
|
+
self.in_size = in_size
|
64
|
+
self.out_size = out_size
|
65
|
+
self.max_file_size = max_file_size
|
66
|
+
|
67
|
+
|
68
|
+
class FileConverterInputException(FileConverterException):
|
69
|
+
"""Exception class to represent errors encountered with input parameters for the data conversion script.
|
70
|
+
"""
|
71
|
+
pass
|
72
|
+
|
73
|
+
|
74
|
+
class FileConverterHelpException(FileConverterInputException):
|
75
|
+
"""An exception class which indicates an error where we will likely want to help the user figure out how to
|
76
|
+
correctly use the CLI instead of simply printing a traceback
|
77
|
+
"""
|
78
|
+
|
79
|
+
def __init__(self, *args, msg_preformatted=False):
|
80
|
+
"""Init the exception, noting if the message should be treated as preformatted or not
|
81
|
+
|
82
|
+
Parameters
|
83
|
+
----------
|
84
|
+
msg_preformatted : bool, optional
|
85
|
+
If True, indicates that the message of the exception has already been formatted. Default False
|
86
|
+
"""
|
87
|
+
super().__init__(*args)
|
88
|
+
self.msg_preformatted = msg_preformatted
|
89
|
+
|
90
|
+
|
91
|
+
if HTTPException is not None:
|
92
|
+
l_abort_exceptions = (HTTPException, FileConverterAbortException)
|
93
|
+
else:
|
94
|
+
l_abort_exceptions = (FileConverterAbortException,)
|
95
|
+
|
96
|
+
|
97
|
+
@dataclass
|
98
|
+
class FileConversionResult:
|
99
|
+
"""An object of this class will be output by the file converter's `run` function on success to provide key info on
|
100
|
+
the files created
|
101
|
+
"""
|
102
|
+
output_filename: str | None = None
|
103
|
+
log_filename: str | None = None
|
104
|
+
in_size: int = 0
|
105
|
+
out_size: int = 0
|
106
|
+
status_code: int = 0
|
107
|
+
|
108
|
+
|
109
|
+
def abort_raise(status_code: int,
|
110
|
+
*args,
|
111
|
+
e: Exception | None = None,
|
112
|
+
**kwargs):
|
113
|
+
"""Callback for aborting during a file conversion, which passes relevant information to an exception of the
|
114
|
+
appropriate type
|
115
|
+
"""
|
116
|
+
if e:
|
117
|
+
raise e
|
118
|
+
elif status_code == const.STATUS_CODE_SIZE:
|
119
|
+
exception_class = FileConverterSizeException
|
120
|
+
else:
|
121
|
+
exception_class = FileConverterAbortException
|
122
|
+
raise exception_class(status_code, *args, **kwargs)
|
123
|
+
|
124
|
+
|
125
|
+
class FileConverter:
|
126
|
+
"""Class to handle conversion of files from one type to another
|
127
|
+
"""
|
128
|
+
|
129
|
+
# Class variables and methods which must/can be overridden by subclasses
|
130
|
+
# ----------------------------------------------------------------------
|
131
|
+
|
132
|
+
name: str | None = None
|
133
|
+
"""Name of the converter - must be overridden in each subclass to name each converter uniquely"""
|
134
|
+
|
135
|
+
info: str | None = None
|
136
|
+
"""General info about the converter - can be overridden in a subclass to add information about a converter which
|
137
|
+
isn't covered in its database entry, such as notes on its support."""
|
138
|
+
|
139
|
+
allowed_flags: tuple[tuple[str, dict, Callable], ...] | None = None
|
140
|
+
"""List of flags allowed for the converter (flags are arguments that are set by being present, and don't require a
|
141
|
+
value specified - e.g. "-v" to enable verbose mode) - should be overridden with a tuple of tuples containing the
|
142
|
+
flag names, a dict of kwargs to be passed to the argument parser's `add_argument` method, and callable function to
|
143
|
+
get a dict of needed info for them. If the converter does not accept any flags, an empty tuple should be supplied
|
144
|
+
(e.g `allowed_flags = ()`), as `None` will be interpreted as this value not having been overridden"""
|
145
|
+
|
146
|
+
allowed_options: tuple[tuple[str, dict, Callable], ...] | None = None
|
147
|
+
"""List of options allowed for the converter (options are arguments that take one or more values, e.g. "-o out.txt")
|
148
|
+
- should be overridden with a tuple of tuples containing the option names, a dict of kwargs to be passed to the
|
149
|
+
argument parser's `add_argument` method, and callable function to get a dict of needed info for them.
|
150
|
+
As with flags, an empty tuple should be provided if the converter does not accept any options"""
|
151
|
+
|
152
|
+
database_key_prefix: str | None = None
|
153
|
+
"""The prefix used in the database for keys related to this converter"""
|
154
|
+
|
155
|
+
@abc.abstractmethod
|
156
|
+
def _convert(self):
|
157
|
+
"""Run the conversion with the desired converter. This must be implemented for each converter class.
|
158
|
+
"""
|
159
|
+
pass
|
160
|
+
|
161
|
+
@classmethod
|
162
|
+
def can_be_registered(cls) -> bool:
|
163
|
+
"""If the converter class may not be able to be registered (for instance, it relies on a binary which isn't
|
164
|
+
supported on all platforms), this method should be overridden to perform necessary checks to indicate if it
|
165
|
+
can be registered or not.
|
166
|
+
"""
|
167
|
+
return True
|
168
|
+
|
169
|
+
# If the converter supports flags specific to the input file format, set the below to True for the subclass so help
|
170
|
+
# text will be properly displayed notifying the user that they can request this by providing an input format (and
|
171
|
+
# similar for the other similar class variables below)
|
172
|
+
has_in_format_flags_or_options = False
|
173
|
+
has_out_format_flags_or_options = False
|
174
|
+
|
175
|
+
@staticmethod
|
176
|
+
def get_in_format_flags(in_format: str) -> tuple[tuple[str, str], ...]:
|
177
|
+
"""Gets flags which are applicable for a specific input file format, returned as a tuple of (flag, description).
|
178
|
+
This should be overridden for each converter class if it uses any format-specific input flags.
|
179
|
+
"""
|
180
|
+
return ()
|
181
|
+
|
182
|
+
@staticmethod
|
183
|
+
def get_out_format_flags(in_format: str) -> tuple[tuple[str, str], ...]:
|
184
|
+
"""Gets flags which are applicable for a specific output file format, returned as a tuple of (flag,
|
185
|
+
description). This should be overridden for each converter class if it uses any format-specific output flags.
|
186
|
+
"""
|
187
|
+
return ()
|
188
|
+
|
189
|
+
@staticmethod
|
190
|
+
def get_in_format_options(in_format: str) -> tuple[tuple[str, str], ...]:
|
191
|
+
"""Gets options which are applicable for a specific input file format, returned as a tuple of (option,
|
192
|
+
description). This should be overridden for each converter class if it uses any format-specific input options.
|
193
|
+
"""
|
194
|
+
return ()
|
195
|
+
|
196
|
+
@staticmethod
|
197
|
+
def get_out_format_options(in_format: str) -> tuple[tuple[str, str], ...]:
|
198
|
+
"""Gets options which are applicable for a specific output file format, returned as a tuple of (option,
|
199
|
+
description). This should be overridden for each converter class if it uses any format-specific output options.
|
200
|
+
"""
|
201
|
+
return ()
|
202
|
+
|
203
|
+
# Base class functionality
|
204
|
+
# ------------------------
|
205
|
+
|
206
|
+
def __init__(self,
|
207
|
+
filename: str,
|
208
|
+
to_format: str,
|
209
|
+
from_format: str | None = None,
|
210
|
+
data: dict[str, Any] | None = None,
|
211
|
+
abort_callback: Callable[[int], None] = abort_raise,
|
212
|
+
use_envvars=False,
|
213
|
+
upload_dir=const.DEFAULT_UPLOAD_DIR,
|
214
|
+
download_dir=const.DEFAULT_DOWNLOAD_DIR,
|
215
|
+
max_file_size=const.DEFAULT_MAX_FILE_SIZE,
|
216
|
+
no_check=False,
|
217
|
+
log_file: str | None = None,
|
218
|
+
log_mode=const.LOG_FULL,
|
219
|
+
log_level: int | None = None,
|
220
|
+
refresh_local_log: bool = True,
|
221
|
+
delete_input=False):
|
222
|
+
"""Initialize the object, storing needed data and setting up loggers.
|
223
|
+
|
224
|
+
Parameters
|
225
|
+
----------
|
226
|
+
filename : str
|
227
|
+
The filename of the input file to be converted, either relative to current directory or fully-qualified
|
228
|
+
to_format : str
|
229
|
+
The desired format to convert to, as the file extension (e.g. "cif")
|
230
|
+
from_format : str | None
|
231
|
+
The format to convert from, as the file extension (e.g. "pdb"). If None is provided (default), will be
|
232
|
+
determined from the extension of `filename`
|
233
|
+
data : dict[str | Any] | None
|
234
|
+
A dict of any other data needed by a converter or for extra logging information, default empty dict
|
235
|
+
abort_callback : Callable[[int], None]
|
236
|
+
Function to be called if the conversion hits an error and must be aborted, default `abort_raise`, which
|
237
|
+
raises an appropriate exception
|
238
|
+
use_envvars : bool
|
239
|
+
If set to True, environment variables will be checked for any that set options for this class and used,
|
240
|
+
default False
|
241
|
+
upload_dir : str
|
242
|
+
The location of input files relative to the current directory
|
243
|
+
download_dir : str
|
244
|
+
The location of output files relative to the current directory
|
245
|
+
max_file_size : float
|
246
|
+
The maximum allowed file size for input/output files, in MB. If 0, will be unlimited. Default 0 (unlimited)
|
247
|
+
no_check : bool
|
248
|
+
If False (default), will check at setup whether or not a conversion between the desired file formats is
|
249
|
+
supported with the specified converter
|
250
|
+
log_file : str | None
|
251
|
+
If provided, all logging will go to a single file or stream. Otherwise, logs will be split up among multiple
|
252
|
+
files for server-style logging.
|
253
|
+
log_mode : str
|
254
|
+
How logs should be stores. Allowed values are:
|
255
|
+
- 'full' - Multi-file logging, only recommended when running as a public web app
|
256
|
+
- 'full-force' - Multi-file logging, only recommended when running as a public web app, with the log file
|
257
|
+
name forced to be used for the output log
|
258
|
+
- 'simple' - Logs saved to one file
|
259
|
+
- 'stdout' - Output logs and errors only to stdout
|
260
|
+
- 'none' - Output only errors to stdout
|
261
|
+
log_level : int | None
|
262
|
+
The level to log output at. If None (default), the level will depend on the chosen `log_mode`:
|
263
|
+
- 'full', 'full-force', or 'simple': INFO
|
264
|
+
- 'stdout' - INFO to stdout, no logging to file
|
265
|
+
- 'none' - ERROR to stdout, no logging to file
|
266
|
+
refresh_local_log : bool
|
267
|
+
If True, the local log generated from this run will be overwritten. If False it will be appended to. Default
|
268
|
+
True
|
269
|
+
delete_input : bool
|
270
|
+
Whether or not to delete input files after conversion, default False
|
271
|
+
"""
|
272
|
+
|
273
|
+
# Wrap the initialisation in a try block, calling the abort_callback function if anything goes wrong
|
274
|
+
self.abort_callback = abort_callback
|
275
|
+
|
276
|
+
try:
|
277
|
+
|
278
|
+
# Set member variables directly from input
|
279
|
+
self.in_filename = filename
|
280
|
+
self.to_format = to_format
|
281
|
+
self.upload_dir = upload_dir
|
282
|
+
self.download_dir = download_dir
|
283
|
+
self.max_file_size = max_file_size*const.MEGABYTE
|
284
|
+
self.log_file = log_file
|
285
|
+
self.log_mode = log_mode
|
286
|
+
self.log_level = log_level
|
287
|
+
self.refresh_local_log = refresh_local_log
|
288
|
+
self.delete_input = delete_input
|
289
|
+
|
290
|
+
# Use an empty dict for data if None was provided
|
291
|
+
if data is None:
|
292
|
+
self.data = {}
|
293
|
+
else:
|
294
|
+
self.data = dict(deepcopy(data))
|
295
|
+
|
296
|
+
# Get from_format from the input file extension if not supplied
|
297
|
+
if from_format is None:
|
298
|
+
self.from_format = os.path.splitext(self.in_filename)[1]
|
299
|
+
else:
|
300
|
+
self.from_format = from_format
|
301
|
+
|
302
|
+
# Remove any leading periods from to/from_format
|
303
|
+
if self.to_format.startswith("."):
|
304
|
+
self.to_format = self.to_format[1:]
|
305
|
+
if self.from_format.startswith("."):
|
306
|
+
self.from_format = self.from_format[1:]
|
307
|
+
|
308
|
+
# Set placeholders for member variables which will be set when conversion is run
|
309
|
+
self.in_size: int | None = None
|
310
|
+
self.out_size: int | None = None
|
311
|
+
self.out: str | None = None
|
312
|
+
self.err: str | None = None
|
313
|
+
self.quality: str | None = None
|
314
|
+
|
315
|
+
# Set values from envvars if desired
|
316
|
+
if use_envvars:
|
317
|
+
# Get the maximum allowed size from the envvar for it
|
318
|
+
ev_max_file_size = os.environ.get(const.MAX_FILESIZE_EV)
|
319
|
+
if ev_max_file_size is not None:
|
320
|
+
self.max_file_size = float(ev_max_file_size)*const.MEGABYTE
|
321
|
+
|
322
|
+
# Create directory 'uploads' if not extant.
|
323
|
+
if not os.path.exists(self.upload_dir):
|
324
|
+
os.makedirs(self.upload_dir, exist_ok=True)
|
325
|
+
|
326
|
+
# Create directory 'downloads' if not extant.
|
327
|
+
if not os.path.exists(self.download_dir):
|
328
|
+
os.makedirs(self.download_dir, exist_ok=True)
|
329
|
+
|
330
|
+
self.local_filename = os.path.split(self.in_filename)[1]
|
331
|
+
self.filename_base = os.path.splitext(self.local_filename)[0]
|
332
|
+
self.out_filename = f"{self.download_dir}/{self.filename_base}.{self.to_format}"
|
333
|
+
|
334
|
+
# Set up files to log to
|
335
|
+
self._setup_loggers()
|
336
|
+
|
337
|
+
# Check that the requested conversion is valid and warn of any issues unless suppressed
|
338
|
+
if not no_check:
|
339
|
+
from psdi_data_conversion.database import get_conversion_quality
|
340
|
+
qual = get_conversion_quality(self.name, self.from_format, self.to_format)
|
341
|
+
if not qual:
|
342
|
+
raise FileConverterHelpException(f"Conversion from {self.from_format} to {self.to_format} "
|
343
|
+
f"with {self.name} is not supported.")
|
344
|
+
if qual.details:
|
345
|
+
msg = (":\nPotential data loss or extrapolation issues with the conversion from "
|
346
|
+
f"{self.from_format} to {self.to_format}:\n")
|
347
|
+
for detail_line in qual.details.split("\n"):
|
348
|
+
msg += f"- {detail_line}\n"
|
349
|
+
self.logger.warning(msg)
|
350
|
+
|
351
|
+
self.logger.debug("Finished FileConverter initialisation")
|
352
|
+
|
353
|
+
except Exception as e:
|
354
|
+
if isinstance(e, l_abort_exceptions):
|
355
|
+
# Don't catch a deliberate abort; let it pass through
|
356
|
+
self.logger.error(f"Unexpected exception raised while initializing the converter, of type '{type(e)}' "
|
357
|
+
f"with message: {str(e)}")
|
358
|
+
raise
|
359
|
+
# Try to run the standard abort method. There's a good chance this will fail though depending on what went
|
360
|
+
# wrong when during init, so we fallback to printing the exception to stderr
|
361
|
+
try:
|
362
|
+
if not isinstance(e, FileConverterHelpException):
|
363
|
+
self.logger.error(f"Exception triggering an abort was raised while initializing the converter. "
|
364
|
+
f"Exception was type '{type(e)}', with message: {str(e)}")
|
365
|
+
self._abort(message="The application encountered an error while initializing the converter:\n" +
|
366
|
+
traceback.format_exc(), e=e)
|
367
|
+
except Exception as ee:
|
368
|
+
if isinstance(ee, (l_abort_exceptions, FileConverterHelpException)):
|
369
|
+
# Don't catch a deliberate abort or help exception; let it pass through
|
370
|
+
raise
|
371
|
+
message = ("ERROR: The application encounted an error during initialization of the converter and "
|
372
|
+
"could not cleanly log the error due to incomplete init: " + traceback.format_exc())
|
373
|
+
print(message, file=sys.stderr)
|
374
|
+
try:
|
375
|
+
self.abort_callback(const.STATUS_CODE_GENERAL, message, e=e)
|
376
|
+
except TypeError:
|
377
|
+
self.abort_callback(const.STATUS_CODE_GENERAL)
|
378
|
+
|
379
|
+
def _setup_loggers(self):
|
380
|
+
"""Run at init to set up loggers for this object.
|
381
|
+
"""
|
382
|
+
|
383
|
+
# Determine level to log at based on quiet status
|
384
|
+
if self.log_level:
|
385
|
+
self._local_logger_level = self.log_level
|
386
|
+
self._stdout_output_level = self.log_level
|
387
|
+
else:
|
388
|
+
if self.log_mode == const.LOG_NONE:
|
389
|
+
self._local_logger_level = None
|
390
|
+
self._stdout_output_level = logging.ERROR
|
391
|
+
elif self.log_mode == const.LOG_STDOUT:
|
392
|
+
self._local_logger_level = None
|
393
|
+
self._stdout_output_level = logging.INFO
|
394
|
+
elif self.log_mode in (const.LOG_FULL, const.LOG_FULL_FORCE, const.LOG_SIMPLE):
|
395
|
+
self._local_logger_level = const.DEFAULT_LOCAL_LOGGER_LEVEL
|
396
|
+
self._stdout_output_level = logging.ERROR
|
397
|
+
else:
|
398
|
+
raise FileConverterInputException(f"ERROR: Unrecognised logging option: {self.log_mode}. Allowed "
|
399
|
+
f"options are: {const.L_ALLOWED_LOG_MODES}")
|
400
|
+
if self.log_mode in (const.LOG_FULL, const.LOG_FULL_FORCE):
|
401
|
+
return self._setup_server_loggers()
|
402
|
+
|
403
|
+
self.output_log = self.log_file
|
404
|
+
|
405
|
+
write_mode = "w" if self.refresh_local_log else "a"
|
406
|
+
self.logger = log_utility.set_up_data_conversion_logger(local_log_file=self.log_file,
|
407
|
+
local_logger_level=self._local_logger_level,
|
408
|
+
stdout_output_level=self._stdout_output_level,
|
409
|
+
suppress_global_handler=True,
|
410
|
+
mode=write_mode)
|
411
|
+
|
412
|
+
self.logger.debug(f"Set up logging in log mode '{self.log_mode}'")
|
413
|
+
if self.log_level:
|
414
|
+
self.logger.debug(f"Logging level set to {self.log_level}")
|
415
|
+
else:
|
416
|
+
self.logger.debug(f"Logging level left to defaults. Using {self._local_logger_level} for local logger "
|
417
|
+
f"and {self._stdout_output_level} for stdout output")
|
418
|
+
|
419
|
+
def _setup_server_loggers(self):
|
420
|
+
"""Run at init to set up loggers for this object in server-style execution
|
421
|
+
"""
|
422
|
+
# For server mode, we need a specific log name, so set that up unless the mode is set to force the use of
|
423
|
+
# the input log file
|
424
|
+
if self.log_mode == const.LOG_FULL_FORCE:
|
425
|
+
self.output_log = self.log_file
|
426
|
+
else:
|
427
|
+
self.output_log = os.path.join(self.download_dir, f"{self.filename_base}{const.OUTPUT_LOG_EXT}")
|
428
|
+
|
429
|
+
# If any previous log exists, delete it
|
430
|
+
if os.path.exists(self.output_log):
|
431
|
+
os.remove(self.output_log)
|
432
|
+
|
433
|
+
write_mode = "w" if self.refresh_local_log else "a"
|
434
|
+
# Set up loggers - one for general-purpose log_utility, and one just for what we want to output to the user
|
435
|
+
self.logger = log_utility.set_up_data_conversion_logger(local_log_file=self.output_log,
|
436
|
+
local_logger_level=self._local_logger_level,
|
437
|
+
stdout_output_level=self._stdout_output_level,
|
438
|
+
local_logger_raw_output=False,
|
439
|
+
mode=write_mode)
|
440
|
+
|
441
|
+
self.logger.debug(f"Set up server-style logging, with user logging at level {self._local_logger_level}")
|
442
|
+
|
443
|
+
def run(self):
|
444
|
+
"""Run the file conversion
|
445
|
+
"""
|
446
|
+
|
447
|
+
try:
|
448
|
+
self.logger.debug("Starting file conversion")
|
449
|
+
self._convert()
|
450
|
+
|
451
|
+
self.logger.debug("Finished file conversion; performing cleanup tasks")
|
452
|
+
self._finish_convert()
|
453
|
+
except Exception as e:
|
454
|
+
if isinstance(e, l_abort_exceptions):
|
455
|
+
# Don't catch a deliberate abort; let it pass through
|
456
|
+
self.logger.error(f"Unexpected exception raised while running the converter, of type '{type(e)}' with "
|
457
|
+
f"message: {str(e)}")
|
458
|
+
raise
|
459
|
+
if not isinstance(e, FileConverterHelpException):
|
460
|
+
self.logger.error(f"Exception triggering an abort was raised while running the converter. Exception "
|
461
|
+
f"was type '{type(e)}', with message: {str(e)}")
|
462
|
+
self._abort(message="The application encountered an error while running the converter:\n" +
|
463
|
+
traceback.format_exc(), e=e)
|
464
|
+
|
465
|
+
return FileConversionResult(output_filename=self.out_filename,
|
466
|
+
log_filename=self.output_log,
|
467
|
+
in_size=self.in_size,
|
468
|
+
out_size=self.out_size)
|
469
|
+
|
470
|
+
def _abort(self,
|
471
|
+
status_code: int = const.STATUS_CODE_GENERAL,
|
472
|
+
message: str | None = None,
|
473
|
+
e: Exception | None = None,
|
474
|
+
**kwargs):
|
475
|
+
"""Abort the conversion, reporting the desired message to the user at the top of the output
|
476
|
+
|
477
|
+
Parameters
|
478
|
+
----------
|
479
|
+
status_code : int
|
480
|
+
The HTTP status code to exit with. Default is 422: Unprocessable Content
|
481
|
+
message : str | None
|
482
|
+
If provided, this message will be logged in the user output log at the top of the file and will appear in
|
483
|
+
any raised exception if possible. This should typically explain the reason the process failed
|
484
|
+
e : Exception | None
|
485
|
+
The caught exception which triggered this abort, if any
|
486
|
+
**kwargs : Any
|
487
|
+
Any additional keyword arguments are passed to the `self.abort_callback` function if it accepts them
|
488
|
+
|
489
|
+
"""
|
490
|
+
|
491
|
+
# Remove the input and output files if they exist
|
492
|
+
if self.delete_input:
|
493
|
+
self.logger.debug(f"Cleaning up input file {self.in_filename}")
|
494
|
+
try:
|
495
|
+
os.remove(self.in_filename)
|
496
|
+
except FileNotFoundError:
|
497
|
+
pass
|
498
|
+
try:
|
499
|
+
os.remove(self.out_filename)
|
500
|
+
except FileNotFoundError:
|
501
|
+
self.logger.debug("Application aborting; no output file found to clean up")
|
502
|
+
else:
|
503
|
+
self.logger.debug(f"Application aborting, so cleaning up output file {self.out_filename}")
|
504
|
+
|
505
|
+
# If we have a Help exception, override the message with its message
|
506
|
+
if isinstance(e, FileConverterHelpException):
|
507
|
+
self.logger.debug("Help exception triggered, so only using its message for output")
|
508
|
+
message = str(e)
|
509
|
+
|
510
|
+
if message:
|
511
|
+
# If we're adding a message in server mode, read in any prior logs, clear the log, write the message, then
|
512
|
+
# write the prior logs
|
513
|
+
if self.log_file is None:
|
514
|
+
self.logger.debug("Adding abort message to the top of the output log so it will be the first thing "
|
515
|
+
"read by the user")
|
516
|
+
prior_output_log = open(self.output_log, "r").read()
|
517
|
+
os.remove(self.output_log)
|
518
|
+
with open(self.output_log, "w") as fo:
|
519
|
+
fo.write(message + "\n")
|
520
|
+
fo.write(prior_output_log)
|
521
|
+
|
522
|
+
# Note this message in the dev logger as well
|
523
|
+
if not isinstance(e, FileConverterHelpException):
|
524
|
+
self.logger.error(message)
|
525
|
+
|
526
|
+
# Call the abort callback function now. We first try to add information to it, but in case that isn't supported,
|
527
|
+
# we fall back to just calling it with the status code
|
528
|
+
try:
|
529
|
+
self.abort_callback(status_code, message, e=e, **kwargs)
|
530
|
+
except TypeError:
|
531
|
+
self.abort_callback(status_code)
|
532
|
+
|
533
|
+
def _abort_from_err(self):
|
534
|
+
"""Call an abort after a call to the converter has completed, but it's returned an error. Create a message for
|
535
|
+
the logger including this error and other relevant information.
|
536
|
+
"""
|
537
|
+
self.logger.error(self._create_message_start() +
|
538
|
+
self._create_message() +
|
539
|
+
self.out + '\n' +
|
540
|
+
self.err)
|
541
|
+
self._abort(message=self.err)
|
542
|
+
|
543
|
+
def _create_message(self) -> str:
|
544
|
+
"""Create a log of options passed to the converter - this method should be overloaded to log any information
|
545
|
+
unique to a specific converter.
|
546
|
+
"""
|
547
|
+
|
548
|
+
self.logger.debug("Default _create_message method called - not outputting any additional information specific "
|
549
|
+
"to this converter")
|
550
|
+
|
551
|
+
return ""
|
552
|
+
|
553
|
+
def _create_message_start(self) -> str:
|
554
|
+
"""Create beginning of message for log files
|
555
|
+
|
556
|
+
Returns
|
557
|
+
-------
|
558
|
+
str
|
559
|
+
The beginning of a message for log files, containing generic information about what was trying to be done
|
560
|
+
"""
|
561
|
+
# We want the entries to all line up, so we need a dummy line at the top to force a newline break - anything
|
562
|
+
# empty or whitespace will be stripped by the logger, so we use a lone colon, which looks least obtrusive
|
563
|
+
return (":\n"
|
564
|
+
f"File name: {self.filename_base}\n"
|
565
|
+
f"From: {self.from_format}\n"
|
566
|
+
f"To: {self.to_format}\n"
|
567
|
+
f"Converter: {self.name}\n")
|
568
|
+
|
569
|
+
def _log_success(self):
|
570
|
+
"""Write conversion information to server-side file, ready for downloading to user
|
571
|
+
"""
|
572
|
+
|
573
|
+
message = (self._create_message_start()+self._create_message() +
|
574
|
+
'Quality: ' + self.quality + '\n'
|
575
|
+
'Success: Assuming that the data provided was of the correct format, the conversion\n'
|
576
|
+
' was successful (to the best of our knowledge) subject to any warnings below.\n' +
|
577
|
+
self.out + '\n' + self.err).strip() + '\n'
|
578
|
+
|
579
|
+
self.logger.info(message)
|
580
|
+
|
581
|
+
def _check_file_size_and_status(self):
|
582
|
+
"""Get file sizes, checking that output file isn't too large
|
583
|
+
|
584
|
+
Returns
|
585
|
+
-------
|
586
|
+
in_size : int
|
587
|
+
Size of input file in bytes
|
588
|
+
out_size : int
|
589
|
+
Size of output file in bytes
|
590
|
+
"""
|
591
|
+
in_size = os.path.getsize(os.path.realpath(self.in_filename))
|
592
|
+
try:
|
593
|
+
out_size = os.path.getsize(os.path.realpath(self.out_filename))
|
594
|
+
except FileNotFoundError:
|
595
|
+
# Something went wrong and the output file doesn't exist
|
596
|
+
err_message = f"Expected output file {self.out_filename} does not exist."
|
597
|
+
self.logger.error(err_message)
|
598
|
+
self.err += f"ERROR: {err_message}\n"
|
599
|
+
self._abort_from_err()
|
600
|
+
|
601
|
+
# Check that the output file doesn't exceed the maximum allowed size
|
602
|
+
if self.max_file_size > 0 and out_size > self.max_file_size:
|
603
|
+
|
604
|
+
self._abort(const.STATUS_CODE_SIZE,
|
605
|
+
f"ERROR converting {os.path.basename(self.in_filename)} to " +
|
606
|
+
os.path.basename(self.out_filename) + ": "
|
607
|
+
f"Output file exceeds maximum size.\nInput file size is "
|
608
|
+
f"{in_size/const.MEGABYTE:.2f} MB; Output file size is {out_size/const.MEGABYTE:.2f} "
|
609
|
+
f"MB; maximum output file size is {self.max_file_size/const.MEGABYTE:.2f} MB.\n",
|
610
|
+
in_size=in_size,
|
611
|
+
out_size=out_size,
|
612
|
+
max_file_size=self.max_file_size)
|
613
|
+
self.logger.debug(f"Output file found to have size {out_size/const.MEGABYTE:.2f} MB")
|
614
|
+
|
615
|
+
return in_size, out_size
|
616
|
+
|
617
|
+
def get_quality(self) -> str:
|
618
|
+
"""Query the JSON file to obtain conversion quality
|
619
|
+
"""
|
620
|
+
from psdi_data_conversion.database import get_conversion_quality
|
621
|
+
|
622
|
+
conversion_quality = get_conversion_quality(converter_name=self.name,
|
623
|
+
in_format=self.from_format,
|
624
|
+
out_format=self.to_format)
|
625
|
+
if not conversion_quality:
|
626
|
+
return "unknown"
|
627
|
+
return conversion_quality.qual_str
|
628
|
+
|
629
|
+
def _finish_convert(self):
|
630
|
+
"""Run final common steps to clean up a conversion and log success or abort due to an error
|
631
|
+
"""
|
632
|
+
|
633
|
+
self.in_size, self.out_size = self._check_file_size_and_status()
|
634
|
+
|
635
|
+
if self.delete_input:
|
636
|
+
os.remove(self.in_filename)
|
637
|
+
if "from_full" in self.data:
|
638
|
+
self.from_format = self.data["from_full"]
|
639
|
+
if "to_full" in self.data:
|
640
|
+
self.to_format = self.data["to_full"]
|
641
|
+
if "success" in self.data:
|
642
|
+
self.quality = self.data["success"]
|
643
|
+
else:
|
644
|
+
self.quality = self.get_quality()
|
645
|
+
|
646
|
+
self._log_success()
|
647
|
+
|
648
|
+
|
649
|
+
class ScriptFileConverter(FileConverter):
|
650
|
+
"""File Converter specialized to run a shell script to call the converter
|
651
|
+
"""
|
652
|
+
|
653
|
+
script: str | None = None
|
654
|
+
"""The name of the script to run this converter, relative to the ``psdi_data_conversion/scripts`` directory"""
|
655
|
+
|
656
|
+
required_bin: str | None = None
|
657
|
+
"""The name of the binary called by the script, relative to the ``psdi_data_conversion/bin/$DIST`` directory,
|
658
|
+
where `DIST` is 'linux', 'windows', and/or 'mac', depending on the user's platform. The code will check
|
659
|
+
that a binary by this name exists for the user's distribution, and will only register this converter if one is
|
660
|
+
found.
|
661
|
+
"""
|
662
|
+
|
663
|
+
@classmethod
|
664
|
+
def can_be_registered(cls) -> bool:
|
665
|
+
"""If a binary is required for this script, check that it exists for the user's OS/distribution. If one isn't
|
666
|
+
required (`cls.required_bin` is None), also return True
|
667
|
+
"""
|
668
|
+
if cls.required_bin is None:
|
669
|
+
return True
|
670
|
+
return bin_exists(cls.required_bin)
|
671
|
+
|
672
|
+
def _convert(self):
|
673
|
+
|
674
|
+
self.logger.debug(f"Performing conversion with ScriptFileConverter using script '{self.script}'")
|
675
|
+
|
676
|
+
from_flags = self.data.get("from_flags", "")
|
677
|
+
to_flags = self.data.get("from_flags", "")
|
678
|
+
from_options = self.data.get("from_options", "")
|
679
|
+
to_options = self.data.get("from_options", "")
|
680
|
+
|
681
|
+
# Check that all user-provided input passes security checks
|
682
|
+
for user_args in [from_flags, to_flags, from_options, to_options]:
|
683
|
+
if not string_is_safe(user_args):
|
684
|
+
raise FileConverterHelpException(f"Provided argument '{user_args}' does not pass security check - it "
|
685
|
+
f"must match the regex {SAFE_STRING_RE.pattern}.")
|
686
|
+
|
687
|
+
env = {"DIST": get_dist()}
|
688
|
+
if self.required_bin is not None:
|
689
|
+
env["BIN_PATH"] = get_bin_path(self.required_bin)
|
690
|
+
|
691
|
+
process = subprocess.run(['sh', f'psdi_data_conversion/scripts/{self.script}', '--' + self.to_format,
|
692
|
+
self.in_filename, self.out_filename, from_flags, to_flags, from_options, to_options],
|
693
|
+
env=env, capture_output=True, text=True)
|
694
|
+
|
695
|
+
self.out = process.stdout
|
696
|
+
self.err = process.stderr
|
697
|
+
|
698
|
+
if process.returncode != 0:
|
699
|
+
self.logger.error(f"Conversion process completed with non-zero returncode {process.returncode}; aborting")
|
700
|
+
self._abort_from_err()
|
701
|
+
else:
|
702
|
+
self.logger.debug("Conversion process completed successfully")
|