psdi-data-conversion 0.0.37__py3-none-any.whl → 0.0.38__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -19,6 +19,7 @@ import psdi_data_conversion
19
19
  from psdi_data_conversion import log_utility
20
20
  from psdi_data_conversion import constants as const
21
21
  from psdi_data_conversion.converter import run_converter
22
+ from psdi_data_conversion.database import FormatInfo, get_format_info
22
23
  from psdi_data_conversion.file_io import split_archive_ext
23
24
 
24
25
  # Env var for the SHA of the latest commit
@@ -139,13 +140,38 @@ def convert():
139
140
  qualified_output_log = os.path.join(const.DEFAULT_DOWNLOAD_DIR,
140
141
  split_archive_ext(filename)[0] + const.OUTPUT_LOG_EXT)
141
142
 
143
+ # Determine the input and output formats
144
+ d_formats = {}
145
+ for format_label in "to", "from":
146
+ name = request.form[format_label]
147
+ full_note = request.form[format_label+"_full"]
148
+
149
+ l_possible_formats: list[FormatInfo] = get_format_info(name, which="all")
150
+
151
+ # If there's only one possible format, use that
152
+ if len(l_possible_formats) == 1:
153
+ d_formats[format_label] = l_possible_formats[0]
154
+ continue
155
+
156
+ # Otherwise, find the format with the matching note
157
+ found = False
158
+ for possible_format in l_possible_formats:
159
+ if possible_format.note in full_note:
160
+ d_formats[format_label] = possible_format
161
+ found = True
162
+ break
163
+ if not found:
164
+ print(f"Format '{name}' with full description '{full_note}' could not be found in database.",
165
+ file=sys.stderr)
166
+ abort(const.STATUS_CODE_GENERAL)
167
+
142
168
  if (not service_mode) or (request.form['token'] == token and token != ''):
143
169
  try:
144
170
  conversion_output = run_converter(name=request.form['converter'],
145
171
  filename=qualified_filename,
146
172
  data=request.form,
147
- to_format=request.form['to'],
148
- from_format=request.form['from'],
173
+ to_format=d_formats["to"],
174
+ from_format=d_formats["from"],
149
175
  strict=(request.form['check_ext'] != "false"),
150
176
  log_mode=log_mode,
151
177
  log_level=log_level,
@@ -286,10 +312,10 @@ def main():
286
312
  "variables and their defaults will instead control execution. These defaults will result in "
287
313
  "the app running in production server mode.")
288
314
 
289
- parser.add_argument("--max-file-size", type=float, default=const.DEFAULT_MAX_FILE_SIZE,
315
+ parser.add_argument("--max-file-size", type=float, default=const.DEFAULT_MAX_FILE_SIZE/const.MEGABYTE,
290
316
  help="The maximum allowed filesize in MB - 0 (default) indicates no maximum")
291
317
 
292
- parser.add_argument("--max-file-size-ob", type=float, default=const.DEFAULT_MAX_FILE_SIZE_OB,
318
+ parser.add_argument("--max-file-size-ob", type=float, default=const.DEFAULT_MAX_FILE_SIZE_OB/const.MEGABYTE,
293
319
  help="The maximum allowed filesize in MB for the Open Babel converter, taking precendence over "
294
320
  "the general maximum file size when Open Babel is used - 0 indicates no maximum. Default 1 MB.")
295
321
 
@@ -45,10 +45,11 @@ MAX_FILESIZE_OB_EV = "MAX_FILESIZE_OB"
45
45
  # Files and Folders
46
46
  # -----------------
47
47
 
48
- # Maximum output file size in bytes
49
48
  MEGABYTE = 1024*1024
50
- DEFAULT_MAX_FILE_SIZE = 0*MEGABYTE
51
- DEFAULT_MAX_FILE_SIZE_OB = 1*MEGABYTE
49
+
50
+ # Maximum output file size in bytes
51
+ DEFAULT_MAX_FILE_SIZE = 0 * MEGABYTE
52
+ DEFAULT_MAX_FILE_SIZE_OB = 1 * MEGABYTE
52
53
 
53
54
  DEFAULT_UPLOAD_DIR = './psdi_data_conversion/static/uploads'
54
55
  DEFAULT_DOWNLOAD_DIR = './psdi_data_conversion/static/downloads'
@@ -75,7 +76,7 @@ XZTAR_FORMAT = "xztar"
75
76
  D_TAR_FORMATS = {TAR_EXTENSION: TAR_FORMAT,
76
77
  GZTAR_EXTENSION: GZTAR_FORMAT,
77
78
  BZTAR_EXTENSION: BZTAR_FORMAT,
78
- XZTAR_EXTENSION: BZTAR_FORMAT}
79
+ XZTAR_EXTENSION: XZTAR_FORMAT}
79
80
 
80
81
  # A list of specifically the extensions that are combinations of multiple different extensions
81
82
  L_COMPOUND_EXTENSIONS = [GZTAR_EXTENSION, BZTAR_EXTENSION, XZTAR_EXTENSION]
@@ -93,7 +94,7 @@ L_ALL_ARCHIVE_EXTENSIONS = [*D_SUPPORTED_ARCHIVE_FORMATS.keys(), *L_UNSUPPORTED_
93
94
 
94
95
  # Number of character spaces allocated for flags/options
95
96
 
96
- # Get the terminal width so we can prettily print help text
97
+ # Get the terminal width so we can prettily print help text - default to 80 chars by 20 lines
97
98
  TERM_WIDTH, _ = shutil.get_terminal_size((80, 20))
98
99
 
99
100
  # Log formatting
@@ -196,7 +196,7 @@ class FileConversionRunResult:
196
196
 
197
197
 
198
198
  def check_from_format(filename: str,
199
- from_format: str,
199
+ from_format: str | int,
200
200
  strict=False) -> bool:
201
201
  """Check that the filename for an input file ends with the expected extension
202
202
 
@@ -204,7 +204,7 @@ def check_from_format(filename: str,
204
204
  ----------
205
205
  filename : str
206
206
  The filename
207
- from_format : str
207
+ from_format : str | int
208
208
  The expected format (extension)
209
209
  strict : bool, optional
210
210
  If True, will raise an exception on failure. Otherwise will print a warning and return False
@@ -220,14 +220,21 @@ def check_from_format(filename: str,
220
220
  If `strict` is True and the the file does not end with the expected exception
221
221
  """
222
222
 
223
+ # Get the name of the format
224
+ if isinstance(from_format, str):
225
+ from_format_name = from_format
226
+ else:
227
+ from psdi_data_conversion.database import get_format_info
228
+ from_format_name = get_format_info(from_format).name
229
+
223
230
  # Silently make sure `from_format` starts with a dot
224
- if not from_format.startswith("."):
225
- from_format = f".{from_format}"
231
+ if not from_format_name.startswith("."):
232
+ from_format_name = f".{from_format}"
226
233
 
227
- if filename.endswith(from_format):
234
+ if filename.endswith(from_format_name):
228
235
  return True
229
236
 
230
- msg = const.ERR_WRONG_EXTENSIONS.format(file=os.path.basename(filename), ext=from_format)
237
+ msg = const.ERR_WRONG_EXTENSIONS.format(file=os.path.basename(filename), ext=from_format_name)
231
238
 
232
239
  if strict:
233
240
  raise base.FileConverterInputException(msg)
@@ -37,9 +37,13 @@ class FileConverterException(RuntimeError):
37
37
 
38
38
  def __init__(self,
39
39
  *args,
40
- logged: bool = False):
40
+ logged: bool = False,
41
+ help: bool = False,
42
+ msg_preformatted: bool = False):
41
43
  super().__init__(*args)
42
44
  self.logged = logged
45
+ self.help = help
46
+ self.msg_preformatted = msg_preformatted
43
47
 
44
48
 
45
49
  class FileConverterAbortException(FileConverterException):
@@ -76,23 +80,6 @@ class FileConverterInputException(FileConverterException):
76
80
  pass
77
81
 
78
82
 
79
- class FileConverterHelpException(FileConverterInputException):
80
- """An exception class which indicates an error where we will likely want to help the user figure out how to
81
- correctly use the CLI instead of simply printing a traceback
82
- """
83
-
84
- def __init__(self, *args, msg_preformatted=False):
85
- """Init the exception, noting if the message should be treated as preformatted or not
86
-
87
- Parameters
88
- ----------
89
- msg_preformatted : bool, optional
90
- If True, indicates that the message of the exception has already been formatted. Default False
91
- """
92
- super().__init__(*args)
93
- self.msg_preformatted = msg_preformatted
94
-
95
-
96
83
  if HTTPException is not None:
97
84
  l_abort_exceptions = (HTTPException, FileConverterAbortException)
98
85
  else:
@@ -157,6 +144,10 @@ class FileConverter:
157
144
  database_key_prefix: str | None = None
158
145
  """The prefix used in the database for keys related to this converter"""
159
146
 
147
+ supports_ambiguous_extensions: bool = False
148
+ """Whether or not this converter supports formats which share the same extension. This is used to enforce stricter
149
+ but less user-friendly requirements on format specification"""
150
+
160
151
  @abc.abstractmethod
161
152
  def _convert(self):
162
153
  """Run the conversion with the desired converter. This must be implemented for each converter class.
@@ -325,11 +316,10 @@ class FileConverter:
325
316
  else:
326
317
  self.from_format = from_format
327
318
 
328
- # Remove any leading periods from to/from_format
329
- if self.to_format.startswith("."):
330
- self.to_format = self.to_format[1:]
331
- if self.from_format.startswith("."):
332
- self.from_format = self.from_format[1:]
319
+ # Convert in and out formats to FormatInfo, and raise an exception if one is ambiguous
320
+ from psdi_data_conversion.database import disambiguate_formats
321
+ (self.from_format_info,
322
+ self.to_format_info) = disambiguate_formats(self.name, self.from_format, self.to_format)
333
323
 
334
324
  # Set placeholders for member variables which will be set when conversion is run
335
325
  self.in_size: int | None = None
@@ -348,7 +338,7 @@ class FileConverter:
348
338
 
349
339
  self.local_filename = os.path.split(self.in_filename)[1]
350
340
  self.filename_base = os.path.splitext(self.local_filename)[0]
351
- self.out_filename = f"{self.download_dir}/{self.filename_base}.{self.to_format}"
341
+ self.out_filename = f"{self.download_dir}/{self.filename_base}.{self.to_format_info.name}"
352
342
 
353
343
  # Set up files to log to
354
344
  self._setup_loggers()
@@ -356,13 +346,16 @@ class FileConverter:
356
346
  # Check that the requested conversion is valid and warn of any issues unless suppressed
357
347
  if not no_check:
358
348
  from psdi_data_conversion.database import get_conversion_quality
359
- qual = get_conversion_quality(self.name, self.from_format, self.to_format)
349
+ qual = get_conversion_quality(self.name,
350
+ self.from_format_info.id,
351
+ self.to_format_info.id)
360
352
  if not qual:
361
- raise FileConverterHelpException(f"Conversion from {self.from_format} to {self.to_format} "
362
- f"with {self.name} is not supported.")
353
+ raise FileConverterInputException(f"Conversion from {self.from_format_info.name} to "
354
+ f"{self.to_format_info.name} "
355
+ f"with {self.name} is not supported.", help=True)
363
356
  if qual.details:
364
357
  msg = (":\nPotential data loss or extrapolation issues with the conversion from "
365
- f"{self.from_format} to {self.to_format}:\n")
358
+ f"{self.from_format_info.name} to {self.to_format_info.name}:\n")
366
359
  for detail_line in qual.details.split("\n"):
367
360
  msg += f"- {detail_line}\n"
368
361
  self.logger.warning(msg)
@@ -381,7 +374,7 @@ class FileConverter:
381
374
  # Try to run the standard abort method. There's a good chance this will fail though depending on what went
382
375
  # wrong when during init, so we fallback to printing the exception to stderr
383
376
  try:
384
- if not isinstance(e, FileConverterHelpException):
377
+ if not (isinstance(e, FileConverterException) and e.help):
385
378
  self.logger.error(f"Exception triggering an abort was raised while initializing the converter. "
386
379
  f"Exception was type '{type(e)}', with message: {str(e)}")
387
380
  if e:
@@ -389,7 +382,7 @@ class FileConverter:
389
382
  self._abort(message="The application encountered an error while initializing the converter:\n" +
390
383
  traceback.format_exc(), e=e)
391
384
  except Exception as ee:
392
- if isinstance(ee, (l_abort_exceptions, FileConverterHelpException)):
385
+ if isinstance(ee, l_abort_exceptions) or (isinstance(ee, FileConverterException) and ee.help):
393
386
  # Don't catch a deliberate abort or help exception; let it pass through
394
387
  raise
395
388
  message = ("ERROR: The application encounted an error during initialization of the converter and "
@@ -486,7 +479,7 @@ class FileConverter:
486
479
  f"with message: {str(e)}")
487
480
  e.logged = True
488
481
  raise
489
- if not isinstance(e, FileConverterHelpException):
482
+ if not (isinstance(e, FileConverterException) and e.help):
490
483
  self.logger.error(f"Exception triggering an abort was raised while running the converter. Exception "
491
484
  f"was type '{type(e)}', with message: {str(e)}")
492
485
  if e:
@@ -520,6 +513,18 @@ class FileConverter:
520
513
 
521
514
  """
522
515
 
516
+ def try_debug_log(msg, *args, **kwargs):
517
+ try:
518
+ self.logger.debug(msg, *args, **kwargs)
519
+ except AttributeError:
520
+ pass
521
+
522
+ def error_log(msg, *args, **kwargs):
523
+ try:
524
+ self.logger.error(msg, *args, **kwargs)
525
+ except AttributeError:
526
+ print(msg, file=sys.stderr)
527
+
523
528
  # Remove the input and output files if they exist
524
529
  if self.delete_input:
525
530
  self.logger.debug(f"Cleaning up input file {self.in_filename}")
@@ -527,33 +532,34 @@ class FileConverter:
527
532
  os.remove(self.in_filename)
528
533
  except FileNotFoundError:
529
534
  pass
535
+
530
536
  try:
531
537
  os.remove(self.out_filename)
532
- except FileNotFoundError:
533
- self.logger.debug("Application aborting; no output file found to clean up")
538
+ except (FileNotFoundError, AttributeError):
539
+ try_debug_log("Application aborting; no output file found to clean up")
534
540
  else:
535
- self.logger.debug(f"Application aborting, so cleaning up output file {self.out_filename}")
541
+ try_debug_log(f"Application aborting, so cleaning up output file {self.out_filename}")
536
542
 
537
543
  # If we have a Help exception, override the message with its message
538
- if isinstance(e, FileConverterHelpException):
539
- self.logger.debug("Help exception triggered, so only using its message for output")
544
+ if isinstance(e, FileConverterException) and e.help:
545
+ try_debug_log("Help exception triggered, so only using its message for output")
540
546
  message = str(e)
541
547
 
542
548
  if message:
543
549
  # If we're adding a message in server mode, read in any prior logs, clear the log, write the message, then
544
550
  # write the prior logs
545
551
  if self.log_file is None:
546
- self.logger.debug("Adding abort message to the top of the output log so it will be the first thing "
547
- "read by the user")
552
+ try_debug_log("Adding abort message to the top of the output log so it will be the first thing "
553
+ "read by the user")
548
554
  prior_output_log = open(self.output_log, "r").read()
549
555
  os.remove(self.output_log)
550
556
  with open(self.output_log, "w") as fo:
551
557
  fo.write(message + "\n")
552
558
  fo.write(prior_output_log)
553
559
 
554
- # Note this message in the dev logger as well
555
- if not isinstance(e, FileConverterHelpException):
556
- self.logger.error(message)
560
+ # Note this message in the error logger as well
561
+ if not (isinstance(e, FileConverterException) and e.help):
562
+ error_log(message)
557
563
  if e:
558
564
  e.logged = True
559
565
 
@@ -604,8 +610,8 @@ class FileConverter:
604
610
  # empty or whitespace will be stripped by the logger, so we use a lone colon, which looks least obtrusive
605
611
  return (":\n"
606
612
  f"File name: {self.filename_base}\n"
607
- f"From: {self.from_format}\n"
608
- f"To: {self.to_format}\n"
613
+ f"From: {self.from_format_info.name} ({self.from_format_info.note})\n"
614
+ f"To: {self.to_format} ({self.to_format_info.note})\n"
609
615
  f"Converter: {self.name}\n")
610
616
 
611
617
  def _log_success(self):
@@ -680,8 +686,8 @@ class FileConverter:
680
686
  from psdi_data_conversion.database import get_conversion_quality
681
687
 
682
688
  conversion_quality = get_conversion_quality(converter_name=self.name,
683
- in_format=self.from_format,
684
- out_format=self.to_format)
689
+ in_format=self.from_format_info.id,
690
+ out_format=self.to_format_info.id)
685
691
  if not conversion_quality:
686
692
  return "unknown"
687
693
  return conversion_quality.qual_str
@@ -694,10 +700,6 @@ class FileConverter:
694
700
 
695
701
  if self.delete_input:
696
702
  os.remove(self.in_filename)
697
- if "from_full" in self.data:
698
- self.from_format = self.data["from_full"]
699
- if "to_full" in self.data:
700
- self.to_format = self.data["to_full"]
701
703
  if "success" in self.data:
702
704
  self.quality = self.data["success"]
703
705
  else:
@@ -741,15 +743,16 @@ class ScriptFileConverter(FileConverter):
741
743
  # Check that all user-provided input passes security checks
742
744
  for user_args in [from_flags, to_flags, from_options, to_options]:
743
745
  if not string_is_safe(user_args):
744
- raise FileConverterHelpException(f"Provided argument '{user_args}' does not pass security check - it "
745
- f"must match the regex {SAFE_STRING_RE.pattern}.")
746
+ raise FileConverterInputException(f"Provided argument '{user_args}' does not pass security check - it "
747
+ f"must match the regex {SAFE_STRING_RE.pattern}.", help=True)
746
748
 
747
749
  env = {"DIST": get_dist()}
748
750
  if self.required_bin is not None:
749
751
  env["BIN_PATH"] = get_bin_path(self.required_bin)
750
752
 
751
- process = subprocess.run(['sh', f'psdi_data_conversion/scripts/{self.script}', '--' + self.to_format,
752
- self.in_filename, self.out_filename, from_flags, to_flags, from_options, to_options],
753
+ process = subprocess.run(['sh', f'psdi_data_conversion/scripts/{self.script}',
754
+ '--' + self.to_format_info.name, self.in_filename, self.out_filename, from_flags,
755
+ to_flags, from_options, to_options],
753
756
  env=env, capture_output=True, text=True)
754
757
 
755
758
  self.out = process.stdout
@@ -27,6 +27,7 @@ class C2xFileConverter(ScriptFileConverter):
27
27
  "https://www.gnu.org/licenses/gpl-3.0.en.html. Its binaries are redistributed here under the terms of this "
28
28
  "license, and any further redistribution must also follow these terms. Its corresponding source code "
29
29
  "may be downloaded from https://www.c2x.org.uk/downloads/")
30
+ supports_ambiguous_extensions = True
30
31
 
31
32
 
32
33
  # Assign this converter to the `converter` variable - this lets the psdi_data_conversion.converter module detect and
@@ -9,7 +9,7 @@ from copy import deepcopy
9
9
  from openbabel import openbabel
10
10
  import py
11
11
 
12
- from psdi_data_conversion.converters.base import FileConverter, FileConverterHelpException
12
+ from psdi_data_conversion.converters.base import FileConverter, FileConverterInputException
13
13
  from psdi_data_conversion.security import SAFE_STRING_RE, string_is_safe
14
14
 
15
15
  CONVERTER_OB = 'Open Babel'
@@ -30,8 +30,8 @@ def check_string_security(s: str):
30
30
  """Checks that a string is secure and raises an exception if it isn't.
31
31
  """
32
32
  if not string_is_safe(s):
33
- raise FileConverterHelpException(f"Format option '{s}' does not pass security checks. It must pass the regex "
34
- f"/{SAFE_STRING_RE.pattern}/.")
33
+ raise FileConverterInputException(f"Format option '{s}' does not pass security checks. It must pass the regex "
34
+ f"/{SAFE_STRING_RE.pattern}/.", help=True)
35
35
 
36
36
 
37
37
  def get_option_and_value(s: str):
@@ -60,16 +60,16 @@ def get_coord_gen(l_opts: list[str] | None) -> dict[str, str]:
60
60
 
61
61
  # No more than two arguments supplied to --coord-gen
62
62
  if l_opts is not None and len(l_opts) > 2:
63
- raise FileConverterHelpException("At most two arguments may be provided to --coord-gen, the mode and "
64
- "quality, e.g. '--coord-gen Gen3D best'")
63
+ raise FileConverterInputException("At most two arguments may be provided to --coord-gen, the mode and "
64
+ "quality, e.g. '--coord-gen Gen3D best'", help=True)
65
65
 
66
66
  # Coordinate generation options are valid
67
67
  if coord_gen not in L_ALLOWED_COORD_GENS:
68
- raise FileConverterHelpException(f"Coordinate generation type '{coord_gen}' not recognised. Allowed "
69
- f"types are: {L_ALLOWED_COORD_GENS}")
68
+ raise FileConverterInputException(f"Coordinate generation type '{coord_gen}' not recognised. Allowed "
69
+ f"types are: {L_ALLOWED_COORD_GENS}", help=True)
70
70
  if coord_gen_qual not in L_ALLOWED_COORD_GEN_QUALS:
71
- raise FileConverterHelpException(f"Coordinate generation quality '{coord_gen_qual}' not recognised. "
72
- f"Allowed qualities are: {L_ALLOWED_COORD_GEN_QUALS}")
71
+ raise FileConverterInputException(f"Coordinate generation quality '{coord_gen_qual}' not recognised. "
72
+ f"Allowed qualities are: {L_ALLOWED_COORD_GEN_QUALS}", help=True)
73
73
 
74
74
  return {COORD_GEN_KEY: coord_gen,
75
75
  COORD_GEN_QUAL_KEY: coord_gen_qual}
@@ -148,7 +148,7 @@ class OBFileConverter(FileConverter):
148
148
  stdouterr_ob = py.io.StdCaptureFD(in_=False)
149
149
 
150
150
  ob_conversion = openbabel.OBConversion()
151
- ob_conversion.SetInAndOutFormats(self.from_format, self.to_format)
151
+ ob_conversion.SetInAndOutFormats(self.from_format_info.name, self.to_format_info.name)
152
152
 
153
153
  # Retrieve 'from' and 'to' option flags and arguments
154
154
  from_flags = self.data.get("from_flags", "")