psdi-data-conversion 0.0.23__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (81) hide show
  1. psdi_data_conversion/__init__.py +11 -0
  2. psdi_data_conversion/app.py +242 -0
  3. psdi_data_conversion/bin/linux/atomsk +0 -0
  4. psdi_data_conversion/bin/linux/c2x +0 -0
  5. psdi_data_conversion/bin/mac/atomsk +0 -0
  6. psdi_data_conversion/bin/mac/c2x +0 -0
  7. psdi_data_conversion/constants.py +185 -0
  8. psdi_data_conversion/converter.py +459 -0
  9. psdi_data_conversion/converters/__init__.py +6 -0
  10. psdi_data_conversion/converters/atomsk.py +32 -0
  11. psdi_data_conversion/converters/base.py +702 -0
  12. psdi_data_conversion/converters/c2x.py +32 -0
  13. psdi_data_conversion/converters/openbabel.py +239 -0
  14. psdi_data_conversion/database.py +1064 -0
  15. psdi_data_conversion/dist.py +87 -0
  16. psdi_data_conversion/file_io.py +216 -0
  17. psdi_data_conversion/log_utility.py +241 -0
  18. psdi_data_conversion/main.py +776 -0
  19. psdi_data_conversion/scripts/atomsk.sh +32 -0
  20. psdi_data_conversion/scripts/c2x.sh +26 -0
  21. psdi_data_conversion/security.py +38 -0
  22. psdi_data_conversion/static/content/accessibility.htm +254 -0
  23. psdi_data_conversion/static/content/convert.htm +121 -0
  24. psdi_data_conversion/static/content/convertato.htm +65 -0
  25. psdi_data_conversion/static/content/convertc2x.htm +65 -0
  26. psdi_data_conversion/static/content/documentation.htm +94 -0
  27. psdi_data_conversion/static/content/feedback.htm +53 -0
  28. psdi_data_conversion/static/content/header-links.html +8 -0
  29. psdi_data_conversion/static/content/index-versions/header-links.html +8 -0
  30. psdi_data_conversion/static/content/index-versions/psdi-common-footer.html +99 -0
  31. psdi_data_conversion/static/content/index-versions/psdi-common-header.html +28 -0
  32. psdi_data_conversion/static/content/psdi-common-footer.html +99 -0
  33. psdi_data_conversion/static/content/psdi-common-header.html +28 -0
  34. psdi_data_conversion/static/content/report.htm +103 -0
  35. psdi_data_conversion/static/data/data.json +143940 -0
  36. psdi_data_conversion/static/img/colormode-toggle-dm.svg +3 -0
  37. psdi_data_conversion/static/img/colormode-toggle-lm.svg +3 -0
  38. psdi_data_conversion/static/img/psdi-icon-dark.svg +136 -0
  39. psdi_data_conversion/static/img/psdi-icon-light.svg +208 -0
  40. psdi_data_conversion/static/img/psdi-logo-darktext.png +0 -0
  41. psdi_data_conversion/static/img/psdi-logo-lighttext.png +0 -0
  42. psdi_data_conversion/static/img/social-logo-bluesky-black.svg +4 -0
  43. psdi_data_conversion/static/img/social-logo-bluesky-white.svg +4 -0
  44. psdi_data_conversion/static/img/social-logo-instagram-black.svg +1 -0
  45. psdi_data_conversion/static/img/social-logo-instagram-white.svg +1 -0
  46. psdi_data_conversion/static/img/social-logo-linkedin-black.png +0 -0
  47. psdi_data_conversion/static/img/social-logo-linkedin-white.png +0 -0
  48. psdi_data_conversion/static/img/social-logo-mastodon-black.svg +4 -0
  49. psdi_data_conversion/static/img/social-logo-mastodon-white.svg +4 -0
  50. psdi_data_conversion/static/img/social-logo-x-black.svg +3 -0
  51. psdi_data_conversion/static/img/social-logo-x-white.svg +3 -0
  52. psdi_data_conversion/static/img/social-logo-youtube-black.png +0 -0
  53. psdi_data_conversion/static/img/social-logo-youtube-white.png +0 -0
  54. psdi_data_conversion/static/img/ukri-epsr-logo-darktext.png +0 -0
  55. psdi_data_conversion/static/img/ukri-epsr-logo-lighttext.png +0 -0
  56. psdi_data_conversion/static/img/ukri-logo-darktext.png +0 -0
  57. psdi_data_conversion/static/img/ukri-logo-lighttext.png +0 -0
  58. psdi_data_conversion/static/javascript/accessibility.js +196 -0
  59. psdi_data_conversion/static/javascript/common.js +42 -0
  60. psdi_data_conversion/static/javascript/convert.js +296 -0
  61. psdi_data_conversion/static/javascript/convert_common.js +252 -0
  62. psdi_data_conversion/static/javascript/convertato.js +107 -0
  63. psdi_data_conversion/static/javascript/convertc2x.js +107 -0
  64. psdi_data_conversion/static/javascript/data.js +176 -0
  65. psdi_data_conversion/static/javascript/format.js +611 -0
  66. psdi_data_conversion/static/javascript/load_accessibility.js +89 -0
  67. psdi_data_conversion/static/javascript/psdi-common.js +177 -0
  68. psdi_data_conversion/static/javascript/report.js +381 -0
  69. psdi_data_conversion/static/styles/format.css +147 -0
  70. psdi_data_conversion/static/styles/psdi-common.css +705 -0
  71. psdi_data_conversion/templates/index.htm +114 -0
  72. psdi_data_conversion/testing/__init__.py +5 -0
  73. psdi_data_conversion/testing/constants.py +12 -0
  74. psdi_data_conversion/testing/conversion_callbacks.py +394 -0
  75. psdi_data_conversion/testing/conversion_test_specs.py +208 -0
  76. psdi_data_conversion/testing/utils.py +522 -0
  77. psdi_data_conversion-0.0.23.dist-info/METADATA +663 -0
  78. psdi_data_conversion-0.0.23.dist-info/RECORD +81 -0
  79. psdi_data_conversion-0.0.23.dist-info/WHEEL +4 -0
  80. psdi_data_conversion-0.0.23.dist-info/entry_points.txt +2 -0
  81. psdi_data_conversion-0.0.23.dist-info/licenses/LICENSE +201 -0
@@ -0,0 +1,702 @@
1
+ """@file psdi_data_conversion/converters/base.py
2
+
3
+ Created 2025-01-23 by Bryan Gillis.
4
+
5
+ Base class and information for file format converters
6
+ """
7
+
8
+
9
+ from copy import deepcopy
10
+ from dataclasses import dataclass
11
+ import logging
12
+ from collections.abc import Callable
13
+ import os
14
+ import subprocess
15
+ import abc
16
+
17
+ import sys
18
+ import traceback
19
+ from typing import Any
20
+
21
+ from psdi_data_conversion import constants as const, log_utility
22
+ from psdi_data_conversion.dist import bin_exists, get_bin_path, get_dist
23
+ from psdi_data_conversion.security import SAFE_STRING_RE, string_is_safe
24
+
25
+ try:
26
+ # werkzeug is installed in the optional dependency Flask. It's only used here to recognize an exception type,
27
+ # and if Flask isn't installed, that exception will never be raised, so we can just replace it with None and later
28
+ # not try to catch it if werkzeug isn't found
29
+ from werkzeug.exceptions import HTTPException
30
+ except ImportError:
31
+ HTTPException = None
32
+
33
+
34
+ class FileConverterException(RuntimeError):
35
+ """Exception class to represent any runtime error encountered by this package.
36
+ """
37
+ pass
38
+
39
+
40
+ class FileConverterAbortException(FileConverterException):
41
+ """Class representing an exception triggered by a call to abort a file conversion
42
+ """
43
+
44
+ def __init__(self,
45
+ status_code: int,
46
+ *args,
47
+ **kwargs):
48
+ super().__init__(*args, **kwargs)
49
+ self.status_code = status_code
50
+
51
+
52
+ class FileConverterSizeException(FileConverterAbortException):
53
+ """Class representing an exception triggered by the maximum size being exceeded
54
+ """
55
+
56
+ def __init__(self,
57
+ *args,
58
+ in_size: int | None = None,
59
+ out_size: int | None = None,
60
+ max_file_size: int | None = None,
61
+ **kwargs):
62
+ super().__init__(*args, **kwargs)
63
+ self.in_size = in_size
64
+ self.out_size = out_size
65
+ self.max_file_size = max_file_size
66
+
67
+
68
+ class FileConverterInputException(FileConverterException):
69
+ """Exception class to represent errors encountered with input parameters for the data conversion script.
70
+ """
71
+ pass
72
+
73
+
74
+ class FileConverterHelpException(FileConverterInputException):
75
+ """An exception class which indicates an error where we will likely want to help the user figure out how to
76
+ correctly use the CLI instead of simply printing a traceback
77
+ """
78
+
79
+ def __init__(self, *args, msg_preformatted=False):
80
+ """Init the exception, noting if the message should be treated as preformatted or not
81
+
82
+ Parameters
83
+ ----------
84
+ msg_preformatted : bool, optional
85
+ If True, indicates that the message of the exception has already been formatted. Default False
86
+ """
87
+ super().__init__(*args)
88
+ self.msg_preformatted = msg_preformatted
89
+
90
+
91
+ if HTTPException is not None:
92
+ l_abort_exceptions = (HTTPException, FileConverterAbortException)
93
+ else:
94
+ l_abort_exceptions = (FileConverterAbortException,)
95
+
96
+
97
+ @dataclass
98
+ class FileConversionResult:
99
+ """An object of this class will be output by the file converter's `run` function on success to provide key info on
100
+ the files created
101
+ """
102
+ output_filename: str | None = None
103
+ log_filename: str | None = None
104
+ in_size: int = 0
105
+ out_size: int = 0
106
+ status_code: int = 0
107
+
108
+
109
+ def abort_raise(status_code: int,
110
+ *args,
111
+ e: Exception | None = None,
112
+ **kwargs):
113
+ """Callback for aborting during a file conversion, which passes relevant information to an exception of the
114
+ appropriate type
115
+ """
116
+ if e:
117
+ raise e
118
+ elif status_code == const.STATUS_CODE_SIZE:
119
+ exception_class = FileConverterSizeException
120
+ else:
121
+ exception_class = FileConverterAbortException
122
+ raise exception_class(status_code, *args, **kwargs)
123
+
124
+
125
+ class FileConverter:
126
+ """Class to handle conversion of files from one type to another
127
+ """
128
+
129
+ # Class variables and methods which must/can be overridden by subclasses
130
+ # ----------------------------------------------------------------------
131
+
132
+ name: str | None = None
133
+ """Name of the converter - must be overridden in each subclass to name each converter uniquely"""
134
+
135
+ info: str | None = None
136
+ """General info about the converter - can be overridden in a subclass to add information about a converter which
137
+ isn't covered in its database entry, such as notes on its support."""
138
+
139
+ allowed_flags: tuple[tuple[str, dict, Callable], ...] | None = None
140
+ """List of flags allowed for the converter (flags are arguments that are set by being present, and don't require a
141
+ value specified - e.g. "-v" to enable verbose mode) - should be overridden with a tuple of tuples containing the
142
+ flag names, a dict of kwargs to be passed to the argument parser's `add_argument` method, and callable function to
143
+ get a dict of needed info for them. If the converter does not accept any flags, an empty tuple should be supplied
144
+ (e.g `allowed_flags = ()`), as `None` will be interpreted as this value not having been overridden"""
145
+
146
+ allowed_options: tuple[tuple[str, dict, Callable], ...] | None = None
147
+ """List of options allowed for the converter (options are arguments that take one or more values, e.g. "-o out.txt")
148
+ - should be overridden with a tuple of tuples containing the option names, a dict of kwargs to be passed to the
149
+ argument parser's `add_argument` method, and callable function to get a dict of needed info for them.
150
+ As with flags, an empty tuple should be provided if the converter does not accept any options"""
151
+
152
+ database_key_prefix: str | None = None
153
+ """The prefix used in the database for keys related to this converter"""
154
+
155
+ @abc.abstractmethod
156
+ def _convert(self):
157
+ """Run the conversion with the desired converter. This must be implemented for each converter class.
158
+ """
159
+ pass
160
+
161
+ @classmethod
162
+ def can_be_registered(cls) -> bool:
163
+ """If the converter class may not be able to be registered (for instance, it relies on a binary which isn't
164
+ supported on all platforms), this method should be overridden to perform necessary checks to indicate if it
165
+ can be registered or not.
166
+ """
167
+ return True
168
+
169
+ # If the converter supports flags specific to the input file format, set the below to True for the subclass so help
170
+ # text will be properly displayed notifying the user that they can request this by providing an input format (and
171
+ # similar for the other similar class variables below)
172
+ has_in_format_flags_or_options = False
173
+ has_out_format_flags_or_options = False
174
+
175
+ @staticmethod
176
+ def get_in_format_flags(in_format: str) -> tuple[tuple[str, str], ...]:
177
+ """Gets flags which are applicable for a specific input file format, returned as a tuple of (flag, description).
178
+ This should be overridden for each converter class if it uses any format-specific input flags.
179
+ """
180
+ return ()
181
+
182
+ @staticmethod
183
+ def get_out_format_flags(in_format: str) -> tuple[tuple[str, str], ...]:
184
+ """Gets flags which are applicable for a specific output file format, returned as a tuple of (flag,
185
+ description). This should be overridden for each converter class if it uses any format-specific output flags.
186
+ """
187
+ return ()
188
+
189
+ @staticmethod
190
+ def get_in_format_options(in_format: str) -> tuple[tuple[str, str], ...]:
191
+ """Gets options which are applicable for a specific input file format, returned as a tuple of (option,
192
+ description). This should be overridden for each converter class if it uses any format-specific input options.
193
+ """
194
+ return ()
195
+
196
+ @staticmethod
197
+ def get_out_format_options(in_format: str) -> tuple[tuple[str, str], ...]:
198
+ """Gets options which are applicable for a specific output file format, returned as a tuple of (option,
199
+ description). This should be overridden for each converter class if it uses any format-specific output options.
200
+ """
201
+ return ()
202
+
203
+ # Base class functionality
204
+ # ------------------------
205
+
206
+ def __init__(self,
207
+ filename: str,
208
+ to_format: str,
209
+ from_format: str | None = None,
210
+ data: dict[str, Any] | None = None,
211
+ abort_callback: Callable[[int], None] = abort_raise,
212
+ use_envvars=False,
213
+ upload_dir=const.DEFAULT_UPLOAD_DIR,
214
+ download_dir=const.DEFAULT_DOWNLOAD_DIR,
215
+ max_file_size=const.DEFAULT_MAX_FILE_SIZE,
216
+ no_check=False,
217
+ log_file: str | None = None,
218
+ log_mode=const.LOG_FULL,
219
+ log_level: int | None = None,
220
+ refresh_local_log: bool = True,
221
+ delete_input=False):
222
+ """Initialize the object, storing needed data and setting up loggers.
223
+
224
+ Parameters
225
+ ----------
226
+ filename : str
227
+ The filename of the input file to be converted, either relative to current directory or fully-qualified
228
+ to_format : str
229
+ The desired format to convert to, as the file extension (e.g. "cif")
230
+ from_format : str | None
231
+ The format to convert from, as the file extension (e.g. "pdb"). If None is provided (default), will be
232
+ determined from the extension of `filename`
233
+ data : dict[str | Any] | None
234
+ A dict of any other data needed by a converter or for extra logging information, default empty dict
235
+ abort_callback : Callable[[int], None]
236
+ Function to be called if the conversion hits an error and must be aborted, default `abort_raise`, which
237
+ raises an appropriate exception
238
+ use_envvars : bool
239
+ If set to True, environment variables will be checked for any that set options for this class and used,
240
+ default False
241
+ upload_dir : str
242
+ The location of input files relative to the current directory
243
+ download_dir : str
244
+ The location of output files relative to the current directory
245
+ max_file_size : float
246
+ The maximum allowed file size for input/output files, in MB. If 0, will be unlimited. Default 0 (unlimited)
247
+ no_check : bool
248
+ If False (default), will check at setup whether or not a conversion between the desired file formats is
249
+ supported with the specified converter
250
+ log_file : str | None
251
+ If provided, all logging will go to a single file or stream. Otherwise, logs will be split up among multiple
252
+ files for server-style logging.
253
+ log_mode : str
254
+ How logs should be stores. Allowed values are:
255
+ - 'full' - Multi-file logging, only recommended when running as a public web app
256
+ - 'full-force' - Multi-file logging, only recommended when running as a public web app, with the log file
257
+ name forced to be used for the output log
258
+ - 'simple' - Logs saved to one file
259
+ - 'stdout' - Output logs and errors only to stdout
260
+ - 'none' - Output only errors to stdout
261
+ log_level : int | None
262
+ The level to log output at. If None (default), the level will depend on the chosen `log_mode`:
263
+ - 'full', 'full-force', or 'simple': INFO
264
+ - 'stdout' - INFO to stdout, no logging to file
265
+ - 'none' - ERROR to stdout, no logging to file
266
+ refresh_local_log : bool
267
+ If True, the local log generated from this run will be overwritten. If False it will be appended to. Default
268
+ True
269
+ delete_input : bool
270
+ Whether or not to delete input files after conversion, default False
271
+ """
272
+
273
+ # Wrap the initialisation in a try block, calling the abort_callback function if anything goes wrong
274
+ self.abort_callback = abort_callback
275
+
276
+ try:
277
+
278
+ # Set member variables directly from input
279
+ self.in_filename = filename
280
+ self.to_format = to_format
281
+ self.upload_dir = upload_dir
282
+ self.download_dir = download_dir
283
+ self.max_file_size = max_file_size*const.MEGABYTE
284
+ self.log_file = log_file
285
+ self.log_mode = log_mode
286
+ self.log_level = log_level
287
+ self.refresh_local_log = refresh_local_log
288
+ self.delete_input = delete_input
289
+
290
+ # Use an empty dict for data if None was provided
291
+ if data is None:
292
+ self.data = {}
293
+ else:
294
+ self.data = dict(deepcopy(data))
295
+
296
+ # Get from_format from the input file extension if not supplied
297
+ if from_format is None:
298
+ self.from_format = os.path.splitext(self.in_filename)[1]
299
+ else:
300
+ self.from_format = from_format
301
+
302
+ # Remove any leading periods from to/from_format
303
+ if self.to_format.startswith("."):
304
+ self.to_format = self.to_format[1:]
305
+ if self.from_format.startswith("."):
306
+ self.from_format = self.from_format[1:]
307
+
308
+ # Set placeholders for member variables which will be set when conversion is run
309
+ self.in_size: int | None = None
310
+ self.out_size: int | None = None
311
+ self.out: str | None = None
312
+ self.err: str | None = None
313
+ self.quality: str | None = None
314
+
315
+ # Set values from envvars if desired
316
+ if use_envvars:
317
+ # Get the maximum allowed size from the envvar for it
318
+ ev_max_file_size = os.environ.get(const.MAX_FILESIZE_EV)
319
+ if ev_max_file_size is not None:
320
+ self.max_file_size = float(ev_max_file_size)*const.MEGABYTE
321
+
322
+ # Create directory 'uploads' if not extant.
323
+ if not os.path.exists(self.upload_dir):
324
+ os.makedirs(self.upload_dir, exist_ok=True)
325
+
326
+ # Create directory 'downloads' if not extant.
327
+ if not os.path.exists(self.download_dir):
328
+ os.makedirs(self.download_dir, exist_ok=True)
329
+
330
+ self.local_filename = os.path.split(self.in_filename)[1]
331
+ self.filename_base = os.path.splitext(self.local_filename)[0]
332
+ self.out_filename = f"{self.download_dir}/{self.filename_base}.{self.to_format}"
333
+
334
+ # Set up files to log to
335
+ self._setup_loggers()
336
+
337
+ # Check that the requested conversion is valid and warn of any issues unless suppressed
338
+ if not no_check:
339
+ from psdi_data_conversion.database import get_conversion_quality
340
+ qual = get_conversion_quality(self.name, self.from_format, self.to_format)
341
+ if not qual:
342
+ raise FileConverterHelpException(f"Conversion from {self.from_format} to {self.to_format} "
343
+ f"with {self.name} is not supported.")
344
+ if qual.details:
345
+ msg = (":\nPotential data loss or extrapolation issues with the conversion from "
346
+ f"{self.from_format} to {self.to_format}:\n")
347
+ for detail_line in qual.details.split("\n"):
348
+ msg += f"- {detail_line}\n"
349
+ self.logger.warning(msg)
350
+
351
+ self.logger.debug("Finished FileConverter initialisation")
352
+
353
+ except Exception as e:
354
+ if isinstance(e, l_abort_exceptions):
355
+ # Don't catch a deliberate abort; let it pass through
356
+ self.logger.error(f"Unexpected exception raised while initializing the converter, of type '{type(e)}' "
357
+ f"with message: {str(e)}")
358
+ raise
359
+ # Try to run the standard abort method. There's a good chance this will fail though depending on what went
360
+ # wrong when during init, so we fallback to printing the exception to stderr
361
+ try:
362
+ if not isinstance(e, FileConverterHelpException):
363
+ self.logger.error(f"Exception triggering an abort was raised while initializing the converter. "
364
+ f"Exception was type '{type(e)}', with message: {str(e)}")
365
+ self._abort(message="The application encountered an error while initializing the converter:\n" +
366
+ traceback.format_exc(), e=e)
367
+ except Exception as ee:
368
+ if isinstance(ee, (l_abort_exceptions, FileConverterHelpException)):
369
+ # Don't catch a deliberate abort or help exception; let it pass through
370
+ raise
371
+ message = ("ERROR: The application encounted an error during initialization of the converter and "
372
+ "could not cleanly log the error due to incomplete init: " + traceback.format_exc())
373
+ print(message, file=sys.stderr)
374
+ try:
375
+ self.abort_callback(const.STATUS_CODE_GENERAL, message, e=e)
376
+ except TypeError:
377
+ self.abort_callback(const.STATUS_CODE_GENERAL)
378
+
379
+ def _setup_loggers(self):
380
+ """Run at init to set up loggers for this object.
381
+ """
382
+
383
+ # Determine level to log at based on quiet status
384
+ if self.log_level:
385
+ self._local_logger_level = self.log_level
386
+ self._stdout_output_level = self.log_level
387
+ else:
388
+ if self.log_mode == const.LOG_NONE:
389
+ self._local_logger_level = None
390
+ self._stdout_output_level = logging.ERROR
391
+ elif self.log_mode == const.LOG_STDOUT:
392
+ self._local_logger_level = None
393
+ self._stdout_output_level = logging.INFO
394
+ elif self.log_mode in (const.LOG_FULL, const.LOG_FULL_FORCE, const.LOG_SIMPLE):
395
+ self._local_logger_level = const.DEFAULT_LOCAL_LOGGER_LEVEL
396
+ self._stdout_output_level = logging.ERROR
397
+ else:
398
+ raise FileConverterInputException(f"ERROR: Unrecognised logging option: {self.log_mode}. Allowed "
399
+ f"options are: {const.L_ALLOWED_LOG_MODES}")
400
+ if self.log_mode in (const.LOG_FULL, const.LOG_FULL_FORCE):
401
+ return self._setup_server_loggers()
402
+
403
+ self.output_log = self.log_file
404
+
405
+ write_mode = "w" if self.refresh_local_log else "a"
406
+ self.logger = log_utility.set_up_data_conversion_logger(local_log_file=self.log_file,
407
+ local_logger_level=self._local_logger_level,
408
+ stdout_output_level=self._stdout_output_level,
409
+ suppress_global_handler=True,
410
+ mode=write_mode)
411
+
412
+ self.logger.debug(f"Set up logging in log mode '{self.log_mode}'")
413
+ if self.log_level:
414
+ self.logger.debug(f"Logging level set to {self.log_level}")
415
+ else:
416
+ self.logger.debug(f"Logging level left to defaults. Using {self._local_logger_level} for local logger "
417
+ f"and {self._stdout_output_level} for stdout output")
418
+
419
+ def _setup_server_loggers(self):
420
+ """Run at init to set up loggers for this object in server-style execution
421
+ """
422
+ # For server mode, we need a specific log name, so set that up unless the mode is set to force the use of
423
+ # the input log file
424
+ if self.log_mode == const.LOG_FULL_FORCE:
425
+ self.output_log = self.log_file
426
+ else:
427
+ self.output_log = os.path.join(self.download_dir, f"{self.filename_base}{const.OUTPUT_LOG_EXT}")
428
+
429
+ # If any previous log exists, delete it
430
+ if os.path.exists(self.output_log):
431
+ os.remove(self.output_log)
432
+
433
+ write_mode = "w" if self.refresh_local_log else "a"
434
+ # Set up loggers - one for general-purpose log_utility, and one just for what we want to output to the user
435
+ self.logger = log_utility.set_up_data_conversion_logger(local_log_file=self.output_log,
436
+ local_logger_level=self._local_logger_level,
437
+ stdout_output_level=self._stdout_output_level,
438
+ local_logger_raw_output=False,
439
+ mode=write_mode)
440
+
441
+ self.logger.debug(f"Set up server-style logging, with user logging at level {self._local_logger_level}")
442
+
443
+ def run(self):
444
+ """Run the file conversion
445
+ """
446
+
447
+ try:
448
+ self.logger.debug("Starting file conversion")
449
+ self._convert()
450
+
451
+ self.logger.debug("Finished file conversion; performing cleanup tasks")
452
+ self._finish_convert()
453
+ except Exception as e:
454
+ if isinstance(e, l_abort_exceptions):
455
+ # Don't catch a deliberate abort; let it pass through
456
+ self.logger.error(f"Unexpected exception raised while running the converter, of type '{type(e)}' with "
457
+ f"message: {str(e)}")
458
+ raise
459
+ if not isinstance(e, FileConverterHelpException):
460
+ self.logger.error(f"Exception triggering an abort was raised while running the converter. Exception "
461
+ f"was type '{type(e)}', with message: {str(e)}")
462
+ self._abort(message="The application encountered an error while running the converter:\n" +
463
+ traceback.format_exc(), e=e)
464
+
465
+ return FileConversionResult(output_filename=self.out_filename,
466
+ log_filename=self.output_log,
467
+ in_size=self.in_size,
468
+ out_size=self.out_size)
469
+
470
+ def _abort(self,
471
+ status_code: int = const.STATUS_CODE_GENERAL,
472
+ message: str | None = None,
473
+ e: Exception | None = None,
474
+ **kwargs):
475
+ """Abort the conversion, reporting the desired message to the user at the top of the output
476
+
477
+ Parameters
478
+ ----------
479
+ status_code : int
480
+ The HTTP status code to exit with. Default is 422: Unprocessable Content
481
+ message : str | None
482
+ If provided, this message will be logged in the user output log at the top of the file and will appear in
483
+ any raised exception if possible. This should typically explain the reason the process failed
484
+ e : Exception | None
485
+ The caught exception which triggered this abort, if any
486
+ **kwargs : Any
487
+ Any additional keyword arguments are passed to the `self.abort_callback` function if it accepts them
488
+
489
+ """
490
+
491
+ # Remove the input and output files if they exist
492
+ if self.delete_input:
493
+ self.logger.debug(f"Cleaning up input file {self.in_filename}")
494
+ try:
495
+ os.remove(self.in_filename)
496
+ except FileNotFoundError:
497
+ pass
498
+ try:
499
+ os.remove(self.out_filename)
500
+ except FileNotFoundError:
501
+ self.logger.debug("Application aborting; no output file found to clean up")
502
+ else:
503
+ self.logger.debug(f"Application aborting, so cleaning up output file {self.out_filename}")
504
+
505
+ # If we have a Help exception, override the message with its message
506
+ if isinstance(e, FileConverterHelpException):
507
+ self.logger.debug("Help exception triggered, so only using its message for output")
508
+ message = str(e)
509
+
510
+ if message:
511
+ # If we're adding a message in server mode, read in any prior logs, clear the log, write the message, then
512
+ # write the prior logs
513
+ if self.log_file is None:
514
+ self.logger.debug("Adding abort message to the top of the output log so it will be the first thing "
515
+ "read by the user")
516
+ prior_output_log = open(self.output_log, "r").read()
517
+ os.remove(self.output_log)
518
+ with open(self.output_log, "w") as fo:
519
+ fo.write(message + "\n")
520
+ fo.write(prior_output_log)
521
+
522
+ # Note this message in the dev logger as well
523
+ if not isinstance(e, FileConverterHelpException):
524
+ self.logger.error(message)
525
+
526
+ # Call the abort callback function now. We first try to add information to it, but in case that isn't supported,
527
+ # we fall back to just calling it with the status code
528
+ try:
529
+ self.abort_callback(status_code, message, e=e, **kwargs)
530
+ except TypeError:
531
+ self.abort_callback(status_code)
532
+
533
+ def _abort_from_err(self):
534
+ """Call an abort after a call to the converter has completed, but it's returned an error. Create a message for
535
+ the logger including this error and other relevant information.
536
+ """
537
+ self.logger.error(self._create_message_start() +
538
+ self._create_message() +
539
+ self.out + '\n' +
540
+ self.err)
541
+ self._abort(message=self.err)
542
+
543
+ def _create_message(self) -> str:
544
+ """Create a log of options passed to the converter - this method should be overloaded to log any information
545
+ unique to a specific converter.
546
+ """
547
+
548
+ self.logger.debug("Default _create_message method called - not outputting any additional information specific "
549
+ "to this converter")
550
+
551
+ return ""
552
+
553
+ def _create_message_start(self) -> str:
554
+ """Create beginning of message for log files
555
+
556
+ Returns
557
+ -------
558
+ str
559
+ The beginning of a message for log files, containing generic information about what was trying to be done
560
+ """
561
+ # We want the entries to all line up, so we need a dummy line at the top to force a newline break - anything
562
+ # empty or whitespace will be stripped by the logger, so we use a lone colon, which looks least obtrusive
563
+ return (":\n"
564
+ f"File name: {self.filename_base}\n"
565
+ f"From: {self.from_format}\n"
566
+ f"To: {self.to_format}\n"
567
+ f"Converter: {self.name}\n")
568
+
569
+ def _log_success(self):
570
+ """Write conversion information to server-side file, ready for downloading to user
571
+ """
572
+
573
+ message = (self._create_message_start()+self._create_message() +
574
+ 'Quality: ' + self.quality + '\n'
575
+ 'Success: Assuming that the data provided was of the correct format, the conversion\n'
576
+ ' was successful (to the best of our knowledge) subject to any warnings below.\n' +
577
+ self.out + '\n' + self.err).strip() + '\n'
578
+
579
+ self.logger.info(message)
580
+
581
+ def _check_file_size_and_status(self):
582
+ """Get file sizes, checking that output file isn't too large
583
+
584
+ Returns
585
+ -------
586
+ in_size : int
587
+ Size of input file in bytes
588
+ out_size : int
589
+ Size of output file in bytes
590
+ """
591
+ in_size = os.path.getsize(os.path.realpath(self.in_filename))
592
+ try:
593
+ out_size = os.path.getsize(os.path.realpath(self.out_filename))
594
+ except FileNotFoundError:
595
+ # Something went wrong and the output file doesn't exist
596
+ err_message = f"Expected output file {self.out_filename} does not exist."
597
+ self.logger.error(err_message)
598
+ self.err += f"ERROR: {err_message}\n"
599
+ self._abort_from_err()
600
+
601
+ # Check that the output file doesn't exceed the maximum allowed size
602
+ if self.max_file_size > 0 and out_size > self.max_file_size:
603
+
604
+ self._abort(const.STATUS_CODE_SIZE,
605
+ f"ERROR converting {os.path.basename(self.in_filename)} to " +
606
+ os.path.basename(self.out_filename) + ": "
607
+ f"Output file exceeds maximum size.\nInput file size is "
608
+ f"{in_size/const.MEGABYTE:.2f} MB; Output file size is {out_size/const.MEGABYTE:.2f} "
609
+ f"MB; maximum output file size is {self.max_file_size/const.MEGABYTE:.2f} MB.\n",
610
+ in_size=in_size,
611
+ out_size=out_size,
612
+ max_file_size=self.max_file_size)
613
+ self.logger.debug(f"Output file found to have size {out_size/const.MEGABYTE:.2f} MB")
614
+
615
+ return in_size, out_size
616
+
617
+ def get_quality(self) -> str:
618
+ """Query the JSON file to obtain conversion quality
619
+ """
620
+ from psdi_data_conversion.database import get_conversion_quality
621
+
622
+ conversion_quality = get_conversion_quality(converter_name=self.name,
623
+ in_format=self.from_format,
624
+ out_format=self.to_format)
625
+ if not conversion_quality:
626
+ return "unknown"
627
+ return conversion_quality.qual_str
628
+
629
+ def _finish_convert(self):
630
+ """Run final common steps to clean up a conversion and log success or abort due to an error
631
+ """
632
+
633
+ self.in_size, self.out_size = self._check_file_size_and_status()
634
+
635
+ if self.delete_input:
636
+ os.remove(self.in_filename)
637
+ if "from_full" in self.data:
638
+ self.from_format = self.data["from_full"]
639
+ if "to_full" in self.data:
640
+ self.to_format = self.data["to_full"]
641
+ if "success" in self.data:
642
+ self.quality = self.data["success"]
643
+ else:
644
+ self.quality = self.get_quality()
645
+
646
+ self._log_success()
647
+
648
+
649
+ class ScriptFileConverter(FileConverter):
650
+ """File Converter specialized to run a shell script to call the converter
651
+ """
652
+
653
+ script: str | None = None
654
+ """The name of the script to run this converter, relative to the ``psdi_data_conversion/scripts`` directory"""
655
+
656
+ required_bin: str | None = None
657
+ """The name of the binary called by the script, relative to the ``psdi_data_conversion/bin/$DIST`` directory,
658
+ where `DIST` is 'linux', 'windows', and/or 'mac', depending on the user's platform. The code will check
659
+ that a binary by this name exists for the user's distribution, and will only register this converter if one is
660
+ found.
661
+ """
662
+
663
+ @classmethod
664
+ def can_be_registered(cls) -> bool:
665
+ """If a binary is required for this script, check that it exists for the user's OS/distribution. If one isn't
666
+ required (`cls.required_bin` is None), also return True
667
+ """
668
+ if cls.required_bin is None:
669
+ return True
670
+ return bin_exists(cls.required_bin)
671
+
672
+ def _convert(self):
673
+
674
+ self.logger.debug(f"Performing conversion with ScriptFileConverter using script '{self.script}'")
675
+
676
+ from_flags = self.data.get("from_flags", "")
677
+ to_flags = self.data.get("from_flags", "")
678
+ from_options = self.data.get("from_options", "")
679
+ to_options = self.data.get("from_options", "")
680
+
681
+ # Check that all user-provided input passes security checks
682
+ for user_args in [from_flags, to_flags, from_options, to_options]:
683
+ if not string_is_safe(user_args):
684
+ raise FileConverterHelpException(f"Provided argument '{user_args}' does not pass security check - it "
685
+ f"must match the regex {SAFE_STRING_RE.pattern}.")
686
+
687
+ env = {"DIST": get_dist()}
688
+ if self.required_bin is not None:
689
+ env["BIN_PATH"] = get_bin_path(self.required_bin)
690
+
691
+ process = subprocess.run(['sh', f'psdi_data_conversion/scripts/{self.script}', '--' + self.to_format,
692
+ self.in_filename, self.out_filename, from_flags, to_flags, from_options, to_options],
693
+ env=env, capture_output=True, text=True)
694
+
695
+ self.out = process.stdout
696
+ self.err = process.stderr
697
+
698
+ if process.returncode != 0:
699
+ self.logger.error(f"Conversion process completed with non-zero returncode {process.returncode}; aborting")
700
+ self._abort_from_err()
701
+ else:
702
+ self.logger.debug("Conversion process completed successfully")