psdi-data-conversion 0.0.37__py3-none-any.whl → 0.0.39__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- psdi_data_conversion/app.py +64 -14
- psdi_data_conversion/constants.py +6 -5
- psdi_data_conversion/converter.py +20 -13
- psdi_data_conversion/converters/base.py +75 -68
- psdi_data_conversion/converters/c2x.py +14 -0
- psdi_data_conversion/converters/openbabel.py +12 -11
- psdi_data_conversion/database.py +361 -115
- psdi_data_conversion/dist.py +2 -1
- psdi_data_conversion/file_io.py +1 -2
- psdi_data_conversion/log_utility.py +1 -1
- psdi_data_conversion/main.py +152 -70
- psdi_data_conversion/static/content/index-versions/psdi-common-footer.html +12 -8
- psdi_data_conversion/static/content/psdi-common-footer.html +12 -8
- psdi_data_conversion/static/data/data.json +617 -3
- psdi_data_conversion/static/javascript/convert.js +54 -6
- psdi_data_conversion/static/javascript/convert_common.js +16 -2
- psdi_data_conversion/static/javascript/data.js +36 -4
- psdi_data_conversion/static/javascript/format.js +22 -9
- psdi_data_conversion/static/styles/format.css +7 -0
- psdi_data_conversion/templates/index.htm +57 -48
- psdi_data_conversion/testing/constants.py +3 -0
- psdi_data_conversion/testing/conversion_callbacks.py +4 -3
- psdi_data_conversion/testing/conversion_test_specs.py +44 -20
- psdi_data_conversion/testing/gui.py +362 -294
- psdi_data_conversion/testing/utils.py +38 -19
- {psdi_data_conversion-0.0.37.dist-info → psdi_data_conversion-0.0.39.dist-info}/METADATA +88 -4
- {psdi_data_conversion-0.0.37.dist-info → psdi_data_conversion-0.0.39.dist-info}/RECORD +30 -30
- {psdi_data_conversion-0.0.37.dist-info → psdi_data_conversion-0.0.39.dist-info}/WHEEL +0 -0
- {psdi_data_conversion-0.0.37.dist-info → psdi_data_conversion-0.0.39.dist-info}/entry_points.txt +0 -0
- {psdi_data_conversion-0.0.37.dist-info → psdi_data_conversion-0.0.39.dist-info}/licenses/LICENSE +0 -0
psdi_data_conversion/app.py
CHANGED
@@ -5,21 +5,28 @@ Version 1.0, 8th November 2024
|
|
5
5
|
This script acts as a server for the PSDI Data Conversion Service website.
|
6
6
|
"""
|
7
7
|
|
8
|
-
from argparse import ArgumentParser
|
9
|
-
import hashlib
|
10
|
-
import os
|
11
8
|
import json
|
9
|
+
import os
|
10
|
+
import sys
|
11
|
+
from argparse import ArgumentParser
|
12
|
+
from collections.abc import Callable
|
12
13
|
from datetime import datetime
|
14
|
+
from functools import wraps
|
15
|
+
from hashlib import md5
|
13
16
|
from subprocess import run
|
14
|
-
import
|
15
|
-
import
|
16
|
-
|
17
|
+
from traceback import format_exc
|
18
|
+
from typing import Any
|
19
|
+
|
20
|
+
import werkzeug.serving
|
21
|
+
from flask import Flask, Response, abort, cli, render_template, request
|
17
22
|
|
18
23
|
import psdi_data_conversion
|
19
|
-
from psdi_data_conversion import log_utility
|
20
24
|
from psdi_data_conversion import constants as const
|
25
|
+
from psdi_data_conversion import log_utility
|
21
26
|
from psdi_data_conversion.converter import run_converter
|
27
|
+
from psdi_data_conversion.database import get_format_info
|
22
28
|
from psdi_data_conversion.file_io import split_archive_ext
|
29
|
+
from psdi_data_conversion.main import print_wrap
|
23
30
|
|
24
31
|
# Env var for the SHA of the latest commit
|
25
32
|
SHA_EV = "SHA"
|
@@ -35,7 +42,7 @@ FILE_TO_UPLOAD_KEY = 'fileToUpload'
|
|
35
42
|
|
36
43
|
# Create a token by hashing the current date and time.
|
37
44
|
dt = str(datetime.now())
|
38
|
-
token =
|
45
|
+
token = md5(dt.encode('utf8')).hexdigest()
|
39
46
|
|
40
47
|
# Get the service and production modes from their envvars
|
41
48
|
service_mode_ev = os.environ.get(SERVICE_MODE_EV)
|
@@ -79,6 +86,22 @@ if ev_max_file_size_ob is not None:
|
|
79
86
|
else:
|
80
87
|
max_file_size_ob = const.DEFAULT_MAX_FILE_SIZE_OB
|
81
88
|
|
89
|
+
# Since we're using the development server as the user GUI, we monkey-patch Flask to disable the warnings that would
|
90
|
+
# otherwise appear for this so they don't confuse the user
|
91
|
+
|
92
|
+
|
93
|
+
def suppress_warning(func: Callable[..., Any]) -> Callable[..., Any]:
|
94
|
+
@wraps(func)
|
95
|
+
def wrapper(*args, **kwargs) -> Any:
|
96
|
+
if args and isinstance(args[0], str) and args[0].startswith('WARNING: This is a development server.'):
|
97
|
+
return ''
|
98
|
+
return func(*args, **kwargs)
|
99
|
+
return wrapper
|
100
|
+
|
101
|
+
|
102
|
+
werkzeug.serving._ansi_style = suppress_warning(werkzeug.serving._ansi_style)
|
103
|
+
cli.show_server_banner = lambda *_: None
|
104
|
+
|
82
105
|
app = Flask(__name__)
|
83
106
|
|
84
107
|
|
@@ -100,7 +123,7 @@ def get_last_sha() -> str:
|
|
100
123
|
out_str = str(out_bytes.decode()).strip()
|
101
124
|
|
102
125
|
except Exception:
|
103
|
-
print("ERROR: Could not determine SHA of most recent commit. Error was:\n" +
|
126
|
+
print("ERROR: Could not determine SHA of most recent commit. Error was:\n" + format_exc(),
|
104
127
|
file=sys.stderr)
|
105
128
|
out_str = "N/A"
|
106
129
|
|
@@ -139,13 +162,36 @@ def convert():
|
|
139
162
|
qualified_output_log = os.path.join(const.DEFAULT_DOWNLOAD_DIR,
|
140
163
|
split_archive_ext(filename)[0] + const.OUTPUT_LOG_EXT)
|
141
164
|
|
165
|
+
# Determine the input and output formats
|
166
|
+
d_formats = {}
|
167
|
+
for format_label in "to", "from":
|
168
|
+
name = request.form[format_label]
|
169
|
+
full_note = request.form[format_label+"_full"]
|
170
|
+
|
171
|
+
l_possible_formats = get_format_info(name, which="all")
|
172
|
+
|
173
|
+
# If there's only one possible format, use that
|
174
|
+
if len(l_possible_formats) == 1:
|
175
|
+
d_formats[format_label] = l_possible_formats[0]
|
176
|
+
continue
|
177
|
+
|
178
|
+
# Otherwise, find the format with the matching note
|
179
|
+
for possible_format in l_possible_formats:
|
180
|
+
if possible_format.note in full_note:
|
181
|
+
d_formats[format_label] = possible_format
|
182
|
+
break
|
183
|
+
else:
|
184
|
+
print(f"Format '{name}' with full description '{full_note}' could not be found in database.",
|
185
|
+
file=sys.stderr)
|
186
|
+
abort(const.STATUS_CODE_GENERAL)
|
187
|
+
|
142
188
|
if (not service_mode) or (request.form['token'] == token and token != ''):
|
143
189
|
try:
|
144
190
|
conversion_output = run_converter(name=request.form['converter'],
|
145
191
|
filename=qualified_filename,
|
146
192
|
data=request.form,
|
147
|
-
to_format=
|
148
|
-
from_format=
|
193
|
+
to_format=d_formats["to"],
|
194
|
+
from_format=d_formats["from"],
|
149
195
|
strict=(request.form['check_ext'] != "false"),
|
150
196
|
log_mode=log_mode,
|
151
197
|
log_level=log_level,
|
@@ -174,7 +220,7 @@ def convert():
|
|
174
220
|
else:
|
175
221
|
# Failsafe exception message
|
176
222
|
msg = ("The following unexpected exception was raised by the converter:\n" +
|
177
|
-
|
223
|
+
format_exc()+"\n")
|
178
224
|
with open(qualified_output_log, "w") as fo:
|
179
225
|
fo.write(msg)
|
180
226
|
abort(status_code)
|
@@ -286,10 +332,10 @@ def main():
|
|
286
332
|
"variables and their defaults will instead control execution. These defaults will result in "
|
287
333
|
"the app running in production server mode.")
|
288
334
|
|
289
|
-
parser.add_argument("--max-file-size", type=float, default=const.DEFAULT_MAX_FILE_SIZE,
|
335
|
+
parser.add_argument("--max-file-size", type=float, default=const.DEFAULT_MAX_FILE_SIZE/const.MEGABYTE,
|
290
336
|
help="The maximum allowed filesize in MB - 0 (default) indicates no maximum")
|
291
337
|
|
292
|
-
parser.add_argument("--max-file-size-ob", type=float, default=const.DEFAULT_MAX_FILE_SIZE_OB,
|
338
|
+
parser.add_argument("--max-file-size-ob", type=float, default=const.DEFAULT_MAX_FILE_SIZE_OB/const.MEGABYTE,
|
293
339
|
help="The maximum allowed filesize in MB for the Open Babel converter, taking precendence over "
|
294
340
|
"the general maximum file size when Open Babel is used - 0 indicates no maximum. Default 1 MB.")
|
295
341
|
|
@@ -334,6 +380,10 @@ def main():
|
|
334
380
|
global log_level
|
335
381
|
log_level = args.log_level
|
336
382
|
|
383
|
+
print_wrap("Starting the PSDI Data Conversion GUI. This GUI is run as a webpage, which you can open by "
|
384
|
+
"right-clicking the link below to open it in your default browser, or by copy-and-pasting it into your "
|
385
|
+
"browser of choice.")
|
386
|
+
|
337
387
|
start_app()
|
338
388
|
|
339
389
|
|
@@ -45,10 +45,11 @@ MAX_FILESIZE_OB_EV = "MAX_FILESIZE_OB"
|
|
45
45
|
# Files and Folders
|
46
46
|
# -----------------
|
47
47
|
|
48
|
-
# Maximum output file size in bytes
|
49
48
|
MEGABYTE = 1024*1024
|
50
|
-
|
51
|
-
|
49
|
+
|
50
|
+
# Maximum output file size in bytes
|
51
|
+
DEFAULT_MAX_FILE_SIZE = 0 * MEGABYTE
|
52
|
+
DEFAULT_MAX_FILE_SIZE_OB = 1 * MEGABYTE
|
52
53
|
|
53
54
|
DEFAULT_UPLOAD_DIR = './psdi_data_conversion/static/uploads'
|
54
55
|
DEFAULT_DOWNLOAD_DIR = './psdi_data_conversion/static/downloads'
|
@@ -75,7 +76,7 @@ XZTAR_FORMAT = "xztar"
|
|
75
76
|
D_TAR_FORMATS = {TAR_EXTENSION: TAR_FORMAT,
|
76
77
|
GZTAR_EXTENSION: GZTAR_FORMAT,
|
77
78
|
BZTAR_EXTENSION: BZTAR_FORMAT,
|
78
|
-
XZTAR_EXTENSION:
|
79
|
+
XZTAR_EXTENSION: XZTAR_FORMAT}
|
79
80
|
|
80
81
|
# A list of specifically the extensions that are combinations of multiple different extensions
|
81
82
|
L_COMPOUND_EXTENSIONS = [GZTAR_EXTENSION, BZTAR_EXTENSION, XZTAR_EXTENSION]
|
@@ -93,7 +94,7 @@ L_ALL_ARCHIVE_EXTENSIONS = [*D_SUPPORTED_ARCHIVE_FORMATS.keys(), *L_UNSUPPORTED_
|
|
93
94
|
|
94
95
|
# Number of character spaces allocated for flags/options
|
95
96
|
|
96
|
-
# Get the terminal width so we can prettily print help text
|
97
|
+
# Get the terminal width so we can prettily print help text - default to 80 chars by 20 lines
|
97
98
|
TERM_WIDTH, _ = shutil.get_terminal_size((80, 20))
|
98
99
|
|
99
100
|
# Log formatting
|
@@ -5,18 +5,18 @@ Created 2024-12-10 by Bryan Gillis.
|
|
5
5
|
Class and functions to perform file conversion
|
6
6
|
"""
|
7
7
|
|
8
|
-
|
9
|
-
import os
|
8
|
+
import glob
|
10
9
|
import importlib
|
10
|
+
import os
|
11
11
|
import sys
|
12
|
-
from tempfile import TemporaryDirectory
|
13
12
|
import traceback
|
14
|
-
from
|
13
|
+
from collections.abc import Callable
|
14
|
+
from dataclasses import dataclass, field
|
15
|
+
from tempfile import TemporaryDirectory
|
16
|
+
from typing import Any, NamedTuple
|
17
|
+
|
15
18
|
from psdi_data_conversion import constants as const
|
16
19
|
from psdi_data_conversion.converters import base
|
17
|
-
|
18
|
-
import glob
|
19
|
-
|
20
20
|
from psdi_data_conversion.converters.openbabel import CONVERTER_OB
|
21
21
|
from psdi_data_conversion.file_io import (is_archive, is_supported_archive, pack_zip_or_tar, split_archive_ext,
|
22
22
|
unpack_zip_or_tar)
|
@@ -196,7 +196,7 @@ class FileConversionRunResult:
|
|
196
196
|
|
197
197
|
|
198
198
|
def check_from_format(filename: str,
|
199
|
-
from_format: str,
|
199
|
+
from_format: str | int,
|
200
200
|
strict=False) -> bool:
|
201
201
|
"""Check that the filename for an input file ends with the expected extension
|
202
202
|
|
@@ -204,7 +204,7 @@ def check_from_format(filename: str,
|
|
204
204
|
----------
|
205
205
|
filename : str
|
206
206
|
The filename
|
207
|
-
from_format : str
|
207
|
+
from_format : str | int
|
208
208
|
The expected format (extension)
|
209
209
|
strict : bool, optional
|
210
210
|
If True, will raise an exception on failure. Otherwise will print a warning and return False
|
@@ -220,14 +220,21 @@ def check_from_format(filename: str,
|
|
220
220
|
If `strict` is True and the the file does not end with the expected exception
|
221
221
|
"""
|
222
222
|
|
223
|
+
# Get the name of the format
|
224
|
+
if isinstance(from_format, str):
|
225
|
+
from_format_name = from_format
|
226
|
+
else:
|
227
|
+
from psdi_data_conversion.database import get_format_info
|
228
|
+
from_format_name = get_format_info(from_format).name
|
229
|
+
|
223
230
|
# Silently make sure `from_format` starts with a dot
|
224
|
-
if not
|
225
|
-
|
231
|
+
if not from_format_name.startswith("."):
|
232
|
+
from_format_name = f".{from_format}"
|
226
233
|
|
227
|
-
if filename.endswith(
|
234
|
+
if filename.endswith(from_format_name):
|
228
235
|
return True
|
229
236
|
|
230
|
-
msg = const.ERR_WRONG_EXTENSIONS.format(file=os.path.basename(filename), ext=
|
237
|
+
msg = const.ERR_WRONG_EXTENSIONS.format(file=os.path.basename(filename), ext=from_format_name)
|
231
238
|
|
232
239
|
if strict:
|
233
240
|
raise base.FileConverterInputException(msg)
|
@@ -6,19 +6,19 @@ Base class and information for file format converters
|
|
6
6
|
"""
|
7
7
|
|
8
8
|
|
9
|
-
|
10
|
-
from dataclasses import dataclass
|
9
|
+
import abc
|
11
10
|
import logging
|
12
|
-
from collections.abc import Callable
|
13
11
|
import os
|
14
12
|
import subprocess
|
15
|
-
import abc
|
16
|
-
|
17
13
|
import sys
|
18
14
|
import traceback
|
15
|
+
from collections.abc import Callable
|
16
|
+
from copy import deepcopy
|
17
|
+
from dataclasses import dataclass
|
19
18
|
from typing import Any
|
20
19
|
|
21
|
-
from psdi_data_conversion import constants as const
|
20
|
+
from psdi_data_conversion import constants as const
|
21
|
+
from psdi_data_conversion import log_utility
|
22
22
|
from psdi_data_conversion.dist import bin_exists, get_bin_path, get_dist
|
23
23
|
from psdi_data_conversion.security import SAFE_STRING_RE, string_is_safe
|
24
24
|
|
@@ -37,9 +37,13 @@ class FileConverterException(RuntimeError):
|
|
37
37
|
|
38
38
|
def __init__(self,
|
39
39
|
*args,
|
40
|
-
logged: bool = False
|
40
|
+
logged: bool = False,
|
41
|
+
help: bool = False,
|
42
|
+
msg_preformatted: bool = False):
|
41
43
|
super().__init__(*args)
|
42
44
|
self.logged = logged
|
45
|
+
self.help = help
|
46
|
+
self.msg_preformatted = msg_preformatted
|
43
47
|
|
44
48
|
|
45
49
|
class FileConverterAbortException(FileConverterException):
|
@@ -76,23 +80,6 @@ class FileConverterInputException(FileConverterException):
|
|
76
80
|
pass
|
77
81
|
|
78
82
|
|
79
|
-
class FileConverterHelpException(FileConverterInputException):
|
80
|
-
"""An exception class which indicates an error where we will likely want to help the user figure out how to
|
81
|
-
correctly use the CLI instead of simply printing a traceback
|
82
|
-
"""
|
83
|
-
|
84
|
-
def __init__(self, *args, msg_preformatted=False):
|
85
|
-
"""Init the exception, noting if the message should be treated as preformatted or not
|
86
|
-
|
87
|
-
Parameters
|
88
|
-
----------
|
89
|
-
msg_preformatted : bool, optional
|
90
|
-
If True, indicates that the message of the exception has already been formatted. Default False
|
91
|
-
"""
|
92
|
-
super().__init__(*args)
|
93
|
-
self.msg_preformatted = msg_preformatted
|
94
|
-
|
95
|
-
|
96
83
|
if HTTPException is not None:
|
97
84
|
l_abort_exceptions = (HTTPException, FileConverterAbortException)
|
98
85
|
else:
|
@@ -157,6 +144,10 @@ class FileConverter:
|
|
157
144
|
database_key_prefix: str | None = None
|
158
145
|
"""The prefix used in the database for keys related to this converter"""
|
159
146
|
|
147
|
+
supports_ambiguous_extensions: bool = False
|
148
|
+
"""Whether or not this converter supports formats which share the same extension. This is used to enforce stricter
|
149
|
+
but less user-friendly requirements on format specification"""
|
150
|
+
|
160
151
|
@abc.abstractmethod
|
161
152
|
def _convert(self):
|
162
153
|
"""Run the conversion with the desired converter. This must be implemented for each converter class.
|
@@ -325,11 +316,10 @@ class FileConverter:
|
|
325
316
|
else:
|
326
317
|
self.from_format = from_format
|
327
318
|
|
328
|
-
#
|
329
|
-
|
330
|
-
|
331
|
-
|
332
|
-
self.from_format = self.from_format[1:]
|
319
|
+
# Convert in and out formats to FormatInfo, and raise an exception if one is ambiguous
|
320
|
+
from psdi_data_conversion.database import disambiguate_formats
|
321
|
+
(self.from_format_info,
|
322
|
+
self.to_format_info) = disambiguate_formats(self.name, self.from_format, self.to_format)
|
333
323
|
|
334
324
|
# Set placeholders for member variables which will be set when conversion is run
|
335
325
|
self.in_size: int | None = None
|
@@ -348,7 +338,7 @@ class FileConverter:
|
|
348
338
|
|
349
339
|
self.local_filename = os.path.split(self.in_filename)[1]
|
350
340
|
self.filename_base = os.path.splitext(self.local_filename)[0]
|
351
|
-
self.out_filename = f"{self.download_dir}/{self.filename_base}.{self.
|
341
|
+
self.out_filename = f"{self.download_dir}/{self.filename_base}.{self.to_format_info.name}"
|
352
342
|
|
353
343
|
# Set up files to log to
|
354
344
|
self._setup_loggers()
|
@@ -356,13 +346,16 @@ class FileConverter:
|
|
356
346
|
# Check that the requested conversion is valid and warn of any issues unless suppressed
|
357
347
|
if not no_check:
|
358
348
|
from psdi_data_conversion.database import get_conversion_quality
|
359
|
-
qual = get_conversion_quality(self.name,
|
349
|
+
qual = get_conversion_quality(self.name,
|
350
|
+
self.from_format_info.id,
|
351
|
+
self.to_format_info.id)
|
360
352
|
if not qual:
|
361
|
-
raise
|
362
|
-
|
353
|
+
raise FileConverterInputException(f"Conversion from {self.from_format_info.name} to "
|
354
|
+
f"{self.to_format_info.name} "
|
355
|
+
f"with {self.name} is not supported.", help=True)
|
363
356
|
if qual.details:
|
364
357
|
msg = (":\nPotential data loss or extrapolation issues with the conversion from "
|
365
|
-
f"{self.
|
358
|
+
f"{self.from_format_info.name} to {self.to_format_info.name}:\n")
|
366
359
|
for detail_line in qual.details.split("\n"):
|
367
360
|
msg += f"- {detail_line}\n"
|
368
361
|
self.logger.warning(msg)
|
@@ -381,7 +374,7 @@ class FileConverter:
|
|
381
374
|
# Try to run the standard abort method. There's a good chance this will fail though depending on what went
|
382
375
|
# wrong when during init, so we fallback to printing the exception to stderr
|
383
376
|
try:
|
384
|
-
if not isinstance(e,
|
377
|
+
if not (isinstance(e, FileConverterException) and e.help):
|
385
378
|
self.logger.error(f"Exception triggering an abort was raised while initializing the converter. "
|
386
379
|
f"Exception was type '{type(e)}', with message: {str(e)}")
|
387
380
|
if e:
|
@@ -389,7 +382,7 @@ class FileConverter:
|
|
389
382
|
self._abort(message="The application encountered an error while initializing the converter:\n" +
|
390
383
|
traceback.format_exc(), e=e)
|
391
384
|
except Exception as ee:
|
392
|
-
if isinstance(ee, (
|
385
|
+
if isinstance(ee, l_abort_exceptions) or (isinstance(ee, FileConverterException) and ee.help):
|
393
386
|
# Don't catch a deliberate abort or help exception; let it pass through
|
394
387
|
raise
|
395
388
|
message = ("ERROR: The application encounted an error during initialization of the converter and "
|
@@ -486,7 +479,7 @@ class FileConverter:
|
|
486
479
|
f"with message: {str(e)}")
|
487
480
|
e.logged = True
|
488
481
|
raise
|
489
|
-
if not isinstance(e,
|
482
|
+
if not (isinstance(e, FileConverterException) and e.help):
|
490
483
|
self.logger.error(f"Exception triggering an abort was raised while running the converter. Exception "
|
491
484
|
f"was type '{type(e)}', with message: {str(e)}")
|
492
485
|
if e:
|
@@ -520,6 +513,18 @@ class FileConverter:
|
|
520
513
|
|
521
514
|
"""
|
522
515
|
|
516
|
+
def try_debug_log(msg, *args, **kwargs):
|
517
|
+
try:
|
518
|
+
self.logger.debug(msg, *args, **kwargs)
|
519
|
+
except AttributeError:
|
520
|
+
pass
|
521
|
+
|
522
|
+
def error_log(msg, *args, **kwargs):
|
523
|
+
try:
|
524
|
+
self.logger.error(msg, *args, **kwargs)
|
525
|
+
except AttributeError:
|
526
|
+
print(msg, file=sys.stderr)
|
527
|
+
|
523
528
|
# Remove the input and output files if they exist
|
524
529
|
if self.delete_input:
|
525
530
|
self.logger.debug(f"Cleaning up input file {self.in_filename}")
|
@@ -527,33 +532,34 @@ class FileConverter:
|
|
527
532
|
os.remove(self.in_filename)
|
528
533
|
except FileNotFoundError:
|
529
534
|
pass
|
535
|
+
|
530
536
|
try:
|
531
537
|
os.remove(self.out_filename)
|
532
|
-
except FileNotFoundError:
|
533
|
-
|
538
|
+
except (FileNotFoundError, AttributeError):
|
539
|
+
try_debug_log("Application aborting; no output file found to clean up")
|
534
540
|
else:
|
535
|
-
|
541
|
+
try_debug_log(f"Application aborting, so cleaning up output file {self.out_filename}")
|
536
542
|
|
537
543
|
# If we have a Help exception, override the message with its message
|
538
|
-
if isinstance(e,
|
539
|
-
|
544
|
+
if isinstance(e, FileConverterException) and e.help:
|
545
|
+
try_debug_log("Help exception triggered, so only using its message for output")
|
540
546
|
message = str(e)
|
541
547
|
|
542
548
|
if message:
|
543
549
|
# If we're adding a message in server mode, read in any prior logs, clear the log, write the message, then
|
544
550
|
# write the prior logs
|
545
551
|
if self.log_file is None:
|
546
|
-
|
547
|
-
|
552
|
+
try_debug_log("Adding abort message to the top of the output log so it will be the first thing "
|
553
|
+
"read by the user")
|
548
554
|
prior_output_log = open(self.output_log, "r").read()
|
549
555
|
os.remove(self.output_log)
|
550
556
|
with open(self.output_log, "w") as fo:
|
551
557
|
fo.write(message + "\n")
|
552
558
|
fo.write(prior_output_log)
|
553
559
|
|
554
|
-
# Note this message in the
|
555
|
-
if not isinstance(e,
|
556
|
-
|
560
|
+
# Note this message in the error logger as well
|
561
|
+
if not (isinstance(e, FileConverterException) and e.help):
|
562
|
+
error_log(message)
|
557
563
|
if e:
|
558
564
|
e.logged = True
|
559
565
|
|
@@ -604,8 +610,8 @@ class FileConverter:
|
|
604
610
|
# empty or whitespace will be stripped by the logger, so we use a lone colon, which looks least obtrusive
|
605
611
|
return (":\n"
|
606
612
|
f"File name: {self.filename_base}\n"
|
607
|
-
f"From: {self.
|
608
|
-
f"To: {self.to_format}\n"
|
613
|
+
f"From: {self.from_format_info.name} ({self.from_format_info.note})\n"
|
614
|
+
f"To: {self.to_format} ({self.to_format_info.note})\n"
|
609
615
|
f"Converter: {self.name}\n")
|
610
616
|
|
611
617
|
def _log_success(self):
|
@@ -680,8 +686,8 @@ class FileConverter:
|
|
680
686
|
from psdi_data_conversion.database import get_conversion_quality
|
681
687
|
|
682
688
|
conversion_quality = get_conversion_quality(converter_name=self.name,
|
683
|
-
in_format=self.
|
684
|
-
out_format=self.
|
689
|
+
in_format=self.from_format_info.id,
|
690
|
+
out_format=self.to_format_info.id)
|
685
691
|
if not conversion_quality:
|
686
692
|
return "unknown"
|
687
693
|
return conversion_quality.qual_str
|
@@ -694,10 +700,6 @@ class FileConverter:
|
|
694
700
|
|
695
701
|
if self.delete_input:
|
696
702
|
os.remove(self.in_filename)
|
697
|
-
if "from_full" in self.data:
|
698
|
-
self.from_format = self.data["from_full"]
|
699
|
-
if "to_full" in self.data:
|
700
|
-
self.to_format = self.data["to_full"]
|
701
703
|
if "success" in self.data:
|
702
704
|
self.quality = self.data["success"]
|
703
705
|
else:
|
@@ -733,23 +735,11 @@ class ScriptFileConverter(FileConverter):
|
|
733
735
|
|
734
736
|
self.logger.debug(f"Performing conversion with ScriptFileConverter using script '{self.script}'")
|
735
737
|
|
736
|
-
from_flags = self.data.get("from_flags", "")
|
737
|
-
to_flags = self.data.get("from_flags", "")
|
738
|
-
from_options = self.data.get("from_options", "")
|
739
|
-
to_options = self.data.get("from_options", "")
|
740
|
-
|
741
|
-
# Check that all user-provided input passes security checks
|
742
|
-
for user_args in [from_flags, to_flags, from_options, to_options]:
|
743
|
-
if not string_is_safe(user_args):
|
744
|
-
raise FileConverterHelpException(f"Provided argument '{user_args}' does not pass security check - it "
|
745
|
-
f"must match the regex {SAFE_STRING_RE.pattern}.")
|
746
|
-
|
747
738
|
env = {"DIST": get_dist()}
|
748
739
|
if self.required_bin is not None:
|
749
740
|
env["BIN_PATH"] = get_bin_path(self.required_bin)
|
750
741
|
|
751
|
-
process = subprocess.run(['sh', f'psdi_data_conversion/scripts/{self.script}',
|
752
|
-
self.in_filename, self.out_filename, from_flags, to_flags, from_options, to_options],
|
742
|
+
process = subprocess.run(['sh', f'psdi_data_conversion/scripts/{self.script}', *self._get_script_args()],
|
753
743
|
env=env, capture_output=True, text=True)
|
754
744
|
|
755
745
|
self.out = process.stdout
|
@@ -760,3 +750,20 @@ class ScriptFileConverter(FileConverter):
|
|
760
750
|
self._abort_from_err()
|
761
751
|
else:
|
762
752
|
self.logger.debug("Conversion process completed successfully")
|
753
|
+
|
754
|
+
def _get_script_args(self):
|
755
|
+
"""Get the list of arguments which will be passed to the script"""
|
756
|
+
|
757
|
+
from_flags = self.data.get("from_flags", "")
|
758
|
+
to_flags = self.data.get("from_flags", "")
|
759
|
+
from_options = self.data.get("from_options", "")
|
760
|
+
to_options = self.data.get("from_options", "")
|
761
|
+
|
762
|
+
# Check that all user-provided input passes security checks
|
763
|
+
for user_args in [from_flags, to_flags, from_options, to_options]:
|
764
|
+
if not string_is_safe(user_args):
|
765
|
+
raise FileConverterInputException(f"Provided argument '{user_args}' does not pass security check - it "
|
766
|
+
f"must match the regex {SAFE_STRING_RE.pattern}.", help=True)
|
767
|
+
|
768
|
+
return ['--' + self.to_format_info.name, self.in_filename, self.out_filename, from_flags, to_flags,
|
769
|
+
from_options, to_options]
|
@@ -27,6 +27,20 @@ class C2xFileConverter(ScriptFileConverter):
|
|
27
27
|
"https://www.gnu.org/licenses/gpl-3.0.en.html. Its binaries are redistributed here under the terms of this "
|
28
28
|
"license, and any further redistribution must also follow these terms. Its corresponding source code "
|
29
29
|
"may be downloaded from https://www.c2x.org.uk/downloads/")
|
30
|
+
supports_ambiguous_extensions = True
|
31
|
+
|
32
|
+
def _get_script_args(self):
|
33
|
+
"""Override the standard script arguments so we can set the different format names expected by c2x
|
34
|
+
"""
|
35
|
+
l_script_args = super()._get_script_args()
|
36
|
+
|
37
|
+
# Update the output format to c2x style
|
38
|
+
l_script_args[0] = "--" + self.to_format_info.c2x_format
|
39
|
+
|
40
|
+
# TODO - check if the input file has an extension which will be accepted by c2x for its format, and handle if
|
41
|
+
# not
|
42
|
+
|
43
|
+
return l_script_args
|
30
44
|
|
31
45
|
|
32
46
|
# Assign this converter to the `converter` variable - this lets the psdi_data_conversion.converter module detect and
|
@@ -6,10 +6,11 @@ Open Babel FileConverter
|
|
6
6
|
"""
|
7
7
|
|
8
8
|
from copy import deepcopy
|
9
|
-
|
9
|
+
|
10
10
|
import py
|
11
|
+
from openbabel import openbabel
|
11
12
|
|
12
|
-
from psdi_data_conversion.converters.base import FileConverter,
|
13
|
+
from psdi_data_conversion.converters.base import FileConverter, FileConverterInputException
|
13
14
|
from psdi_data_conversion.security import SAFE_STRING_RE, string_is_safe
|
14
15
|
|
15
16
|
CONVERTER_OB = 'Open Babel'
|
@@ -30,8 +31,8 @@ def check_string_security(s: str):
|
|
30
31
|
"""Checks that a string is secure and raises an exception if it isn't.
|
31
32
|
"""
|
32
33
|
if not string_is_safe(s):
|
33
|
-
raise
|
34
|
-
|
34
|
+
raise FileConverterInputException(f"Format option '{s}' does not pass security checks. It must pass the regex "
|
35
|
+
f"/{SAFE_STRING_RE.pattern}/.", help=True)
|
35
36
|
|
36
37
|
|
37
38
|
def get_option_and_value(s: str):
|
@@ -60,16 +61,16 @@ def get_coord_gen(l_opts: list[str] | None) -> dict[str, str]:
|
|
60
61
|
|
61
62
|
# No more than two arguments supplied to --coord-gen
|
62
63
|
if l_opts is not None and len(l_opts) > 2:
|
63
|
-
raise
|
64
|
-
|
64
|
+
raise FileConverterInputException("At most two arguments may be provided to --coord-gen, the mode and "
|
65
|
+
"quality, e.g. '--coord-gen Gen3D best'", help=True)
|
65
66
|
|
66
67
|
# Coordinate generation options are valid
|
67
68
|
if coord_gen not in L_ALLOWED_COORD_GENS:
|
68
|
-
raise
|
69
|
-
|
69
|
+
raise FileConverterInputException(f"Coordinate generation type '{coord_gen}' not recognised. Allowed "
|
70
|
+
f"types are: {L_ALLOWED_COORD_GENS}", help=True)
|
70
71
|
if coord_gen_qual not in L_ALLOWED_COORD_GEN_QUALS:
|
71
|
-
raise
|
72
|
-
|
72
|
+
raise FileConverterInputException(f"Coordinate generation quality '{coord_gen_qual}' not recognised. "
|
73
|
+
f"Allowed qualities are: {L_ALLOWED_COORD_GEN_QUALS}", help=True)
|
73
74
|
|
74
75
|
return {COORD_GEN_KEY: coord_gen,
|
75
76
|
COORD_GEN_QUAL_KEY: coord_gen_qual}
|
@@ -148,7 +149,7 @@ class OBFileConverter(FileConverter):
|
|
148
149
|
stdouterr_ob = py.io.StdCaptureFD(in_=False)
|
149
150
|
|
150
151
|
ob_conversion = openbabel.OBConversion()
|
151
|
-
ob_conversion.SetInAndOutFormats(self.
|
152
|
+
ob_conversion.SetInAndOutFormats(self.from_format_info.name, self.to_format_info.name)
|
152
153
|
|
153
154
|
# Retrieve 'from' and 'to' option flags and arguments
|
154
155
|
from_flags = self.data.get("from_flags", "")
|