psdi-data-conversion 0.1.7__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- psdi_data_conversion/app.py +5 -408
- psdi_data_conversion/constants.py +11 -7
- psdi_data_conversion/converter.py +41 -28
- psdi_data_conversion/converters/base.py +18 -13
- psdi_data_conversion/database.py +284 -88
- psdi_data_conversion/gui/__init__.py +5 -0
- psdi_data_conversion/gui/accessibility.py +51 -0
- psdi_data_conversion/gui/env.py +239 -0
- psdi_data_conversion/gui/get.py +53 -0
- psdi_data_conversion/gui/post.py +176 -0
- psdi_data_conversion/gui/setup.py +102 -0
- psdi_data_conversion/main.py +70 -13
- psdi_data_conversion/static/content/convert.htm +105 -74
- psdi_data_conversion/static/content/convertato.htm +36 -26
- psdi_data_conversion/static/content/convertc2x.htm +39 -26
- psdi_data_conversion/static/content/download.htm +5 -5
- psdi_data_conversion/static/content/feedback.htm +2 -2
- psdi_data_conversion/static/content/header-links.html +2 -2
- psdi_data_conversion/static/content/index-versions/header-links.html +2 -2
- psdi_data_conversion/static/content/index-versions/psdi-common-header.html +9 -12
- psdi_data_conversion/static/content/psdi-common-header.html +9 -12
- psdi_data_conversion/static/javascript/accessibility.js +88 -61
- psdi_data_conversion/static/javascript/data.js +1 -3
- psdi_data_conversion/static/javascript/load_accessibility.js +50 -33
- psdi_data_conversion/static/styles/format.css +72 -18
- psdi_data_conversion/templates/accessibility.htm +274 -0
- psdi_data_conversion/templates/documentation.htm +6 -6
- psdi_data_conversion/templates/index.htm +73 -56
- psdi_data_conversion/{static/content → templates}/report.htm +28 -10
- psdi_data_conversion/testing/conversion_test_specs.py +26 -6
- psdi_data_conversion/testing/utils.py +6 -6
- {psdi_data_conversion-0.1.7.dist-info → psdi_data_conversion-0.2.0.dist-info}/METADATA +6 -2
- {psdi_data_conversion-0.1.7.dist-info → psdi_data_conversion-0.2.0.dist-info}/RECORD +36 -30
- psdi_data_conversion/static/content/accessibility.htm +0 -255
- {psdi_data_conversion-0.1.7.dist-info → psdi_data_conversion-0.2.0.dist-info}/WHEEL +0 -0
- {psdi_data_conversion-0.1.7.dist-info → psdi_data_conversion-0.2.0.dist-info}/entry_points.txt +0 -0
- {psdi_data_conversion-0.1.7.dist-info → psdi_data_conversion-0.2.0.dist-info}/licenses/LICENSE +0 -0
psdi_data_conversion/app.py
CHANGED
@@ -5,398 +5,14 @@ Version 1.0, 8th November 2024
|
|
5
5
|
This script acts as a server for the PSDI Data Conversion Service website.
|
6
6
|
"""
|
7
7
|
|
8
|
-
import json
|
9
|
-
import os
|
10
|
-
import sys
|
11
8
|
from argparse import ArgumentParser
|
12
|
-
from collections.abc import Callable
|
13
|
-
from datetime import datetime
|
14
|
-
from functools import wraps
|
15
|
-
from hashlib import md5
|
16
|
-
from multiprocessing import Lock
|
17
|
-
from subprocess import run
|
18
|
-
from traceback import format_exc
|
19
|
-
from typing import Any
|
20
9
|
|
21
|
-
import werkzeug.serving
|
22
|
-
from flask import Flask, Response, abort, cli, render_template, request
|
23
|
-
from werkzeug.utils import secure_filename
|
24
|
-
|
25
|
-
import psdi_data_conversion
|
26
10
|
from psdi_data_conversion import constants as const
|
27
|
-
from psdi_data_conversion import
|
28
|
-
from psdi_data_conversion.
|
29
|
-
from psdi_data_conversion.database import get_format_info
|
30
|
-
from psdi_data_conversion.file_io import split_archive_ext
|
11
|
+
from psdi_data_conversion.gui.env import update_env
|
12
|
+
from psdi_data_conversion.gui.setup import get_app, limit_upload_size, start_app
|
31
13
|
from psdi_data_conversion.main import print_wrap
|
32
14
|
|
33
|
-
|
34
|
-
TAG_EV = "TAG"
|
35
|
-
TAG_SHA_EV = "TAG_SHA"
|
36
|
-
SHA_EV = "SHA"
|
37
|
-
|
38
|
-
# Env var for whether this is a production release or development
|
39
|
-
PRODUCTION_EV = "PRODUCTION_MODE"
|
40
|
-
|
41
|
-
# Env var for whether this is a production release or development
|
42
|
-
DEBUG_EV = "DEBUG_MODE"
|
43
|
-
|
44
|
-
# Key for the label given to the file uploaded in the web interface
|
45
|
-
FILE_TO_UPLOAD_KEY = 'fileToUpload'
|
46
|
-
|
47
|
-
# A lock to prevent multiple threads from logging at the same time
|
48
|
-
logLock = Lock()
|
49
|
-
|
50
|
-
# Create a token by hashing the current date and time.
|
51
|
-
dt = str(datetime.now())
|
52
|
-
token = md5(dt.encode('utf8')).hexdigest()
|
53
|
-
|
54
|
-
# Get the debug, service and production modes from their envvars
|
55
|
-
service_mode_ev = os.environ.get(const.SERVICE_MODE_EV)
|
56
|
-
service_mode = (service_mode_ev is not None) and (service_mode_ev.lower() == "true")
|
57
|
-
production_mode_ev = os.environ.get(PRODUCTION_EV)
|
58
|
-
production_mode = (production_mode_ev is not None) and (production_mode_ev.lower() == "true")
|
59
|
-
debug_mode_ev = os.environ.get(DEBUG_EV)
|
60
|
-
debug_mode = (debug_mode_ev is not None) and (debug_mode_ev.lower() == "true")
|
61
|
-
|
62
|
-
# Get the logging mode and level from their envvars
|
63
|
-
ev_log_mode = os.environ.get(const.LOG_MODE_EV)
|
64
|
-
if ev_log_mode is None:
|
65
|
-
log_mode = const.LOG_MODE_DEFAULT
|
66
|
-
else:
|
67
|
-
ev_log_mode = ev_log_mode.lower()
|
68
|
-
if ev_log_mode not in const.L_ALLOWED_LOG_MODES:
|
69
|
-
print(f"ERROR: Unrecognised logging option: {ev_log_mode}. Allowed options are: {const.L_ALLOWED_LOG_MODES}",
|
70
|
-
file=sys.stderr)
|
71
|
-
exit(1)
|
72
|
-
log_mode = ev_log_mode
|
73
|
-
|
74
|
-
ev_log_level = os.environ.get(const.LOG_LEVEL_EV)
|
75
|
-
if ev_log_level is None:
|
76
|
-
log_level = None
|
77
|
-
else:
|
78
|
-
try:
|
79
|
-
log_level = log_utility.get_log_level_from_str(ev_log_level)
|
80
|
-
except ValueError as e:
|
81
|
-
print(f"ERROR: {str(e)}")
|
82
|
-
exit(1)
|
83
|
-
|
84
|
-
# Get the maximum allowed size from the envvar for it
|
85
|
-
ev_max_file_size = os.environ.get(const.MAX_FILESIZE_EV)
|
86
|
-
if ev_max_file_size is not None:
|
87
|
-
max_file_size = float(ev_max_file_size)*const.MEGABYTE
|
88
|
-
else:
|
89
|
-
max_file_size = const.DEFAULT_MAX_FILE_SIZE
|
90
|
-
|
91
|
-
# And same for the Open Babel maximum file size
|
92
|
-
ev_max_file_size_ob = os.environ.get(const.MAX_FILESIZE_OB_EV)
|
93
|
-
if ev_max_file_size_ob is not None:
|
94
|
-
max_file_size_ob = float(ev_max_file_size_ob)*const.MEGABYTE
|
95
|
-
else:
|
96
|
-
max_file_size_ob = const.DEFAULT_MAX_FILE_SIZE_OB
|
97
|
-
|
98
|
-
|
99
|
-
def suppress_warning(func: Callable[..., Any]) -> Callable[..., Any]:
|
100
|
-
"""Since we're using the development server as the user GUI, we monkey-patch Flask to disable the warnings that
|
101
|
-
would otherwise appear for this so they don't confuse the user
|
102
|
-
"""
|
103
|
-
@wraps(func)
|
104
|
-
def wrapper(*args, **kwargs) -> Any:
|
105
|
-
if args and isinstance(args[0], str) and args[0].startswith('WARNING: This is a development server.'):
|
106
|
-
return ''
|
107
|
-
return func(*args, **kwargs)
|
108
|
-
return wrapper
|
109
|
-
|
110
|
-
|
111
|
-
werkzeug.serving._ansi_style = suppress_warning(werkzeug.serving._ansi_style)
|
112
|
-
cli.show_server_banner = lambda *_: None
|
113
|
-
|
114
|
-
app = Flask(__name__)
|
115
|
-
|
116
|
-
|
117
|
-
def limit_upload_size():
|
118
|
-
"""Impose a limit on the maximum file that can be uploaded before Flask will raise an error"""
|
119
|
-
|
120
|
-
# Determine the largest possible file size that can be uploaded, keeping in mind that 0 indicates unlimited
|
121
|
-
larger_max_file_size = max_file_size
|
122
|
-
if (max_file_size > 0) and (max_file_size_ob > max_file_size):
|
123
|
-
larger_max_file_size = max_file_size_ob
|
124
|
-
|
125
|
-
if larger_max_file_size > 0:
|
126
|
-
app.config['MAX_CONTENT_LENGTH'] = larger_max_file_size
|
127
|
-
else:
|
128
|
-
app.config['MAX_CONTENT_LENGTH'] = None
|
129
|
-
|
130
|
-
|
131
|
-
# Set the upload limit based on env vars to start with
|
132
|
-
limit_upload_size()
|
133
|
-
|
134
|
-
|
135
|
-
def get_tag_and_sha() -> str:
|
136
|
-
"""Get the SHA of the last commit
|
137
|
-
"""
|
138
|
-
|
139
|
-
# Get the tag of the latest commit
|
140
|
-
ev_tag = os.environ.get(TAG_EV)
|
141
|
-
if ev_tag:
|
142
|
-
tag = ev_tag
|
143
|
-
else:
|
144
|
-
try:
|
145
|
-
# This bash command calls `git tag` to get a sorted list of tags, with the most recent at the top, then uses
|
146
|
-
# `head` to trim it to one line
|
147
|
-
cmd = "git tag --sort -version:refname | head -n 1"
|
148
|
-
|
149
|
-
out_bytes = run(cmd, shell=True, capture_output=True).stdout
|
150
|
-
tag = str(out_bytes.decode()).strip()
|
151
|
-
|
152
|
-
except Exception:
|
153
|
-
print("ERROR: Could not determine most recent tag. Error was:\n" + format_exc(),
|
154
|
-
file=sys.stderr)
|
155
|
-
tag = ""
|
156
|
-
|
157
|
-
# Get the SHA associated with this tag
|
158
|
-
ev_tag_sha = os.environ.get(TAG_SHA_EV)
|
159
|
-
if ev_tag_sha:
|
160
|
-
tag_sha: str | None = ev_tag_sha
|
161
|
-
else:
|
162
|
-
try:
|
163
|
-
cmd = f"git show {tag}" + " | head -n 1 | gawk '{print($2)}'"
|
164
|
-
|
165
|
-
out_bytes = run(cmd, shell=True, capture_output=True).stdout
|
166
|
-
tag_sha = str(out_bytes.decode()).strip()
|
167
|
-
|
168
|
-
except Exception:
|
169
|
-
print("ERROR: Could not determine SHA for most recent tag. Error was:\n" + format_exc(),
|
170
|
-
file=sys.stderr)
|
171
|
-
tag_sha = None
|
172
|
-
|
173
|
-
# First check if the SHA is provided through an environmental variable
|
174
|
-
ev_sha = os.environ.get(SHA_EV)
|
175
|
-
if ev_sha:
|
176
|
-
sha = ev_sha
|
177
|
-
else:
|
178
|
-
try:
|
179
|
-
# This bash command calls `git log` to get info on the last commit, uses `head` to trim it to one line, then
|
180
|
-
# uses `gawk` to get just the second word of this line, which is the SHA of this commit
|
181
|
-
cmd = "git log -n 1 | head -n 1 | gawk '{print($2)}'"
|
182
|
-
|
183
|
-
out_bytes = run(cmd, shell=True, capture_output=True).stdout
|
184
|
-
sha = str(out_bytes.decode()).strip()
|
185
|
-
|
186
|
-
except Exception:
|
187
|
-
print("ERROR: Could not determine SHA of most recent commit. Error was:\n" + format_exc(),
|
188
|
-
file=sys.stderr)
|
189
|
-
sha = ""
|
190
|
-
|
191
|
-
# If the SHA of the tag is the same as the current SHA, we indicate this by returning a blank SHA
|
192
|
-
if tag_sha == sha:
|
193
|
-
sha = ""
|
194
|
-
|
195
|
-
return (tag, sha)
|
196
|
-
|
197
|
-
|
198
|
-
@app.route('/')
|
199
|
-
def website():
|
200
|
-
"""Return the web page along with relevant data
|
201
|
-
"""
|
202
|
-
tag, sha = get_tag_and_sha()
|
203
|
-
return render_template("index.htm",
|
204
|
-
token=token,
|
205
|
-
max_file_size=max_file_size,
|
206
|
-
max_file_size_ob=max_file_size_ob,
|
207
|
-
service_mode=service_mode,
|
208
|
-
production_mode=production_mode,
|
209
|
-
tag=tag,
|
210
|
-
sha=sha)
|
211
|
-
|
212
|
-
|
213
|
-
@app.route('/documentation.htm')
|
214
|
-
def documentation():
|
215
|
-
"""Return the documentation page
|
216
|
-
"""
|
217
|
-
tag, sha = get_tag_and_sha()
|
218
|
-
return render_template("documentation.htm",
|
219
|
-
tag=tag,
|
220
|
-
sha=sha)
|
221
|
-
|
222
|
-
|
223
|
-
@app.route('/convert/', methods=['POST'])
|
224
|
-
def convert():
|
225
|
-
"""Convert file to a different format and save to folder 'downloads'. Delete original file. Note that downloading is
|
226
|
-
achieved in format.js
|
227
|
-
"""
|
228
|
-
|
229
|
-
# Make sure the upload directory exists
|
230
|
-
os.makedirs(const.DEFAULT_UPLOAD_DIR, exist_ok=True)
|
231
|
-
|
232
|
-
file = request.files[FILE_TO_UPLOAD_KEY]
|
233
|
-
filename = secure_filename(file.filename)
|
234
|
-
|
235
|
-
qualified_filename = os.path.join(const.DEFAULT_UPLOAD_DIR, filename)
|
236
|
-
file.save(qualified_filename)
|
237
|
-
qualified_output_log = os.path.join(const.DEFAULT_DOWNLOAD_DIR,
|
238
|
-
split_archive_ext(filename)[0] + const.OUTPUT_LOG_EXT)
|
239
|
-
|
240
|
-
# Determine the input and output formats
|
241
|
-
d_formats = {}
|
242
|
-
for format_label in "to", "from":
|
243
|
-
name = request.form[format_label]
|
244
|
-
full_note = request.form[format_label+"_full"]
|
245
|
-
|
246
|
-
l_possible_formats = get_format_info(name, which="all")
|
247
|
-
|
248
|
-
# If there's only one possible format, use that
|
249
|
-
if len(l_possible_formats) == 1:
|
250
|
-
d_formats[format_label] = l_possible_formats[0]
|
251
|
-
continue
|
252
|
-
|
253
|
-
# Otherwise, find the format with the matching note
|
254
|
-
for possible_format in l_possible_formats:
|
255
|
-
if possible_format.note in full_note:
|
256
|
-
d_formats[format_label] = possible_format
|
257
|
-
break
|
258
|
-
else:
|
259
|
-
print(f"Format '{name}' with full description '{full_note}' could not be found in database.",
|
260
|
-
file=sys.stderr)
|
261
|
-
abort(const.STATUS_CODE_GENERAL)
|
262
|
-
|
263
|
-
if (not service_mode) or (request.form['token'] == token and token != ''):
|
264
|
-
try:
|
265
|
-
conversion_output = run_converter(name=request.form['converter'],
|
266
|
-
filename=qualified_filename,
|
267
|
-
data=request.form,
|
268
|
-
to_format=d_formats["to"],
|
269
|
-
from_format=d_formats["from"],
|
270
|
-
strict=(request.form['check_ext'] != "false"),
|
271
|
-
log_mode=log_mode,
|
272
|
-
log_level=log_level,
|
273
|
-
delete_input=True,
|
274
|
-
abort_callback=abort)
|
275
|
-
except Exception as e:
|
276
|
-
|
277
|
-
# Check for anticipated exceptions, and write a simpler message for them
|
278
|
-
for err_message in (const.ERR_CONVERSION_FAILED, const.ERR_CONVERTER_NOT_RECOGNISED,
|
279
|
-
const.ERR_EMPTY_ARCHIVE, const.ERR_WRONG_EXTENSIONS):
|
280
|
-
if log_utility.string_with_placeholders_matches(err_message, str(e)):
|
281
|
-
with open(qualified_output_log, "w") as fo:
|
282
|
-
fo.write(str(e))
|
283
|
-
abort(const.STATUS_CODE_GENERAL)
|
284
|
-
|
285
|
-
# If the exception provides a status code, get it
|
286
|
-
status_code: int
|
287
|
-
if hasattr(e, "status_code"):
|
288
|
-
status_code = e.status_code
|
289
|
-
else:
|
290
|
-
status_code = const.STATUS_CODE_GENERAL
|
291
|
-
|
292
|
-
# If the exception provides a message, report it
|
293
|
-
if hasattr(e, "message"):
|
294
|
-
msg = f"An unexpected exception was raised by the converter, with error message:\n{e.message}\n"
|
295
|
-
else:
|
296
|
-
# Failsafe exception message
|
297
|
-
msg = ("The following unexpected exception was raised by the converter:\n" +
|
298
|
-
format_exc()+"\n")
|
299
|
-
with open(qualified_output_log, "w") as fo:
|
300
|
-
fo.write(msg)
|
301
|
-
abort(status_code)
|
302
|
-
|
303
|
-
return repr(conversion_output)
|
304
|
-
else:
|
305
|
-
# return http status code 405
|
306
|
-
abort(405)
|
307
|
-
|
308
|
-
|
309
|
-
@app.route('/feedback/', methods=['POST'])
|
310
|
-
def feedback():
|
311
|
-
"""Take feedback data from the web app and log it
|
312
|
-
"""
|
313
|
-
|
314
|
-
try:
|
315
|
-
|
316
|
-
entry = {
|
317
|
-
"datetime": log_utility.get_date_time(),
|
318
|
-
}
|
319
|
-
|
320
|
-
report = json.loads(request.form['data'])
|
321
|
-
|
322
|
-
for key in ["type", "missing", "reason", "from", "to"]:
|
323
|
-
if key in report:
|
324
|
-
entry[key] = str(report[key])
|
325
|
-
|
326
|
-
# Write data in JSON format and send to stdout
|
327
|
-
logLock.acquire()
|
328
|
-
sys.stdout.write(f"{json.dumps(entry) + '\n'}")
|
329
|
-
logLock.release()
|
330
|
-
|
331
|
-
return Response(status=201)
|
332
|
-
|
333
|
-
except Exception:
|
334
|
-
|
335
|
-
return Response(status=400)
|
336
|
-
|
337
|
-
|
338
|
-
@app.route('/delete/', methods=['POST'])
|
339
|
-
def delete():
|
340
|
-
"""Delete files in folder 'downloads'
|
341
|
-
"""
|
342
|
-
|
343
|
-
realbase = os.path.realpath(const.DEFAULT_DOWNLOAD_DIR)
|
344
|
-
|
345
|
-
realfilename = os.path.realpath(os.path.join(const.DEFAULT_DOWNLOAD_DIR, request.form['filename']))
|
346
|
-
reallogname = os.path.realpath(os.path.join(const.DEFAULT_DOWNLOAD_DIR, request.form['logname']))
|
347
|
-
|
348
|
-
if realfilename.startswith(realbase + os.sep) and reallogname.startswith(realbase + os.sep):
|
349
|
-
|
350
|
-
os.remove(realfilename)
|
351
|
-
os.remove(reallogname)
|
352
|
-
|
353
|
-
return 'okay'
|
354
|
-
|
355
|
-
else:
|
356
|
-
|
357
|
-
return Response(status=400)
|
358
|
-
|
359
|
-
|
360
|
-
@app.route('/del/', methods=['POST'])
|
361
|
-
def delete_file():
|
362
|
-
"""Delete file (cURL)
|
363
|
-
"""
|
364
|
-
os.remove(request.form['filepath'])
|
365
|
-
return 'Server-side file ' + request.form['filepath'] + ' deleted\n'
|
366
|
-
|
367
|
-
|
368
|
-
@app.route('/data/', methods=['GET'])
|
369
|
-
def data():
|
370
|
-
"""Check that the incoming token matches the one sent to the user (should mostly prevent spambots). Write date- and
|
371
|
-
time-stamped user input to server-side file 'user_responses'.
|
372
|
-
|
373
|
-
$$$$$$$$$$ Retained in case direct logging is required in the future. $$$$$$$$$$
|
374
|
-
|
375
|
-
Returns
|
376
|
-
-------
|
377
|
-
str
|
378
|
-
Output status - 'okay' if exited successfuly
|
379
|
-
"""
|
380
|
-
if service_mode and request.args['token'] == token and token != '':
|
381
|
-
message = '[' + log_utility.get_date_time() + '] ' + request.args['data'] + '\n'
|
382
|
-
|
383
|
-
with open("user_responses", "a") as f:
|
384
|
-
f.write(message)
|
385
|
-
|
386
|
-
return 'okay'
|
387
|
-
else:
|
388
|
-
# return http status code 405
|
389
|
-
abort(405)
|
390
|
-
|
391
|
-
|
392
|
-
def start_app():
|
393
|
-
"""Start the Flask app - this requires being run from the base directory of the project, so this changes the
|
394
|
-
current directory to there. Anything else which changes it while the app is running may interfere with its proper
|
395
|
-
execution.
|
396
|
-
"""
|
397
|
-
|
398
|
-
os.chdir(os.path.join(psdi_data_conversion.__path__[0], ".."))
|
399
|
-
app.run(debug=debug_mode)
|
15
|
+
app = get_app()
|
400
16
|
|
401
17
|
|
402
18
|
def main():
|
@@ -443,27 +59,8 @@ def main():
|
|
443
59
|
args = parser.parse_args()
|
444
60
|
|
445
61
|
if not args.use_env_vars:
|
446
|
-
|
447
|
-
|
448
|
-
max_file_size = args.max_file_size*const.MEGABYTE
|
449
|
-
|
450
|
-
global max_file_size_ob
|
451
|
-
max_file_size_ob = args.max_file_size_ob*const.MEGABYTE
|
452
|
-
|
453
|
-
global service_mode
|
454
|
-
service_mode = args.service_mode
|
455
|
-
|
456
|
-
global debug_mode
|
457
|
-
debug_mode = args.debug
|
458
|
-
|
459
|
-
global production_mode
|
460
|
-
production_mode = not args.dev_mode
|
461
|
-
|
462
|
-
global log_mode
|
463
|
-
log_mode = args.log_mode
|
464
|
-
|
465
|
-
global log_level
|
466
|
-
log_level = args.log_level
|
62
|
+
# Overwrite the values from environmental variables with the values from the command-line arguments
|
63
|
+
update_env(args)
|
467
64
|
|
468
65
|
# Set the upload limit based on provided arguments now
|
469
66
|
limit_upload_size()
|
@@ -36,6 +36,9 @@ import shutil
|
|
36
36
|
# The name of the command-line script
|
37
37
|
CL_SCRIPT_NAME = "psdi-data-convert"
|
38
38
|
|
39
|
+
# The name of the Flask app
|
40
|
+
APP_NAME = "psdi_data_conversion"
|
41
|
+
|
39
42
|
# Environmental variables
|
40
43
|
LOG_MODE_EV = "LOG_MODE"
|
41
44
|
LOG_LEVEL_EV = "LOG_LEVEL"
|
@@ -52,8 +55,8 @@ MEGABYTE = 1024*1024
|
|
52
55
|
DEFAULT_MAX_FILE_SIZE = 0 * MEGABYTE
|
53
56
|
DEFAULT_MAX_FILE_SIZE_OB = 1 * MEGABYTE
|
54
57
|
|
55
|
-
|
56
|
-
|
58
|
+
DEFAULT_INPUT_DIR = './psdi_data_conversion/static/uploads'
|
59
|
+
DEFAULT_OUTPUT_DIR = './psdi_data_conversion/static/downloads'
|
57
60
|
|
58
61
|
# Filename of the database, relative to the base of the python package
|
59
62
|
DATABASE_FILENAME = "static/data/data.json"
|
@@ -150,11 +153,6 @@ QUAL_2D_LABEL = "2D atomic coordinates are"
|
|
150
153
|
QUAL_3D_KEY = "three_dim"
|
151
154
|
QUAL_3D_LABEL = "2D atomic coordinates are"
|
152
155
|
|
153
|
-
D_QUAL_LABELS = {QUAL_COMP_KEY: QUAL_COMP_LABEL,
|
154
|
-
QUAL_CONN_KEY: QUAL_CONN_LABEL,
|
155
|
-
QUAL_2D_KEY: QUAL_2D_LABEL,
|
156
|
-
QUAL_3D_KEY: QUAL_3D_LABEL}
|
157
|
-
|
158
156
|
# Notes for conversion quality
|
159
157
|
QUAL_NOTE_IN_UNKNOWN = ("Potential data extrapolation: {} represented in the output format but its representation in "
|
160
158
|
"the input format is unknown")
|
@@ -187,3 +185,9 @@ ERR_WRONG_EXTENSIONS = "Input file '{file}' does not have expected extension '{e
|
|
187
185
|
ERR_EMPTY_ARCHIVE = "No files to convert were contained in archive"
|
188
186
|
ERR_CONVERSION_FAILED = ("File conversion failed for one or more files. Lines from the output log "
|
189
187
|
"{} which indicate possible sources of error: ")
|
188
|
+
|
189
|
+
# Misc
|
190
|
+
# ----
|
191
|
+
|
192
|
+
# Year in seconds
|
193
|
+
YEAR = 365*24*60*60
|
@@ -5,22 +5,20 @@ Created 2024-12-10 by Bryan Gillis.
|
|
5
5
|
Class and functions to perform file conversion
|
6
6
|
"""
|
7
7
|
|
8
|
-
from dataclasses import dataclass, field
|
9
|
-
import json
|
10
8
|
import glob
|
11
9
|
import importlib
|
10
|
+
import json
|
12
11
|
import os
|
13
12
|
import sys
|
14
13
|
import traceback
|
15
|
-
from typing import Any, Callable, NamedTuple
|
16
|
-
from multiprocessing import Lock
|
17
|
-
from psdi_data_conversion import log_utility
|
18
14
|
from collections.abc import Callable
|
19
15
|
from dataclasses import dataclass, field
|
16
|
+
from multiprocessing import Lock
|
20
17
|
from tempfile import TemporaryDirectory
|
21
18
|
from typing import Any, NamedTuple
|
22
19
|
|
23
20
|
from psdi_data_conversion import constants as const
|
21
|
+
from psdi_data_conversion import log_utility
|
24
22
|
from psdi_data_conversion.converters import base
|
25
23
|
from psdi_data_conversion.converters.openbabel import CONVERTER_OB
|
26
24
|
from psdi_data_conversion.file_io import (is_archive, is_supported_archive, pack_zip_or_tar, split_archive_ext,
|
@@ -184,9 +182,9 @@ def get_converter(*args, name=const.CONVERTER_DEFAULT, **converter_kwargs) -> ba
|
|
184
182
|
use_envvars : bool
|
185
183
|
If set to True, environment variables will be checked for any that set options for this class and used,
|
186
184
|
default False
|
187
|
-
|
185
|
+
input_dir : str
|
188
186
|
The location of input files relative to the current directory
|
189
|
-
|
187
|
+
output_dir : str
|
190
188
|
The location of output files relative to the current directory
|
191
189
|
max_file_size : float
|
192
190
|
The maximum allowed file size for input/output files, in MB, default 1 MB for Open Babel, unlimited for other
|
@@ -315,11 +313,18 @@ def check_from_format(filename: str,
|
|
315
313
|
return False
|
316
314
|
|
317
315
|
|
316
|
+
def _run_single_file_conversion(*args, **kwargs):
|
317
|
+
"""Run a conversion on a single file, after all arguments have been checked
|
318
|
+
"""
|
319
|
+
return get_converter(*args, **kwargs).run()
|
320
|
+
|
321
|
+
|
318
322
|
def run_converter(filename: str,
|
319
323
|
to_format: str,
|
320
324
|
*args,
|
321
325
|
from_format: str | None = None,
|
322
|
-
|
326
|
+
input_dir=const.DEFAULT_INPUT_DIR,
|
327
|
+
output_dir=const.DEFAULT_OUTPUT_DIR,
|
323
328
|
max_file_size=None,
|
324
329
|
log_file: str | None = None,
|
325
330
|
log_mode=const.LOG_SIMPLE,
|
@@ -351,9 +356,9 @@ def run_converter(filename: str,
|
|
351
356
|
use_envvars : bool
|
352
357
|
If set to True, environment variables will be checked for any that set options for this class and used,
|
353
358
|
default False
|
354
|
-
|
359
|
+
input_dir : str
|
355
360
|
The location of input files relative to the current directory
|
356
|
-
|
361
|
+
output_dir : str
|
357
362
|
The location of output files relative to the current directory
|
358
363
|
strict : bool
|
359
364
|
If True and `from_format` is not None, will fail if any input file has the wrong extension (including files
|
@@ -410,7 +415,12 @@ def run_converter(filename: str,
|
|
410
415
|
# converter class, so it needs to be set up here to match what will be set up there
|
411
416
|
if log_file is None:
|
412
417
|
base_filename = os.path.basename(split_archive_ext(filename)[0])
|
413
|
-
log_file = os.path.join(
|
418
|
+
log_file = os.path.join(output_dir, base_filename + const.OUTPUT_LOG_EXT)
|
419
|
+
|
420
|
+
if os.path.exists(filename):
|
421
|
+
qualified_filename = filename
|
422
|
+
else:
|
423
|
+
qualified_filename = os.path.join(input_dir, filename)
|
414
424
|
|
415
425
|
# Check if the filename is for an archive file, and handle appropriately
|
416
426
|
|
@@ -425,15 +435,16 @@ def run_converter(filename: str,
|
|
425
435
|
# Not an archive, so just get and run the converter straightforwardly
|
426
436
|
if from_format is not None:
|
427
437
|
check_from_format(filename, from_format, strict=strict)
|
428
|
-
l_run_output.append(
|
438
|
+
l_run_output.append(_run_single_file_conversion(filename,
|
429
439
|
to_format,
|
430
440
|
*args,
|
431
441
|
from_format=from_format,
|
432
|
-
|
442
|
+
input_dir=input_dir,
|
443
|
+
output_dir=output_dir,
|
433
444
|
max_file_size=max_file_size,
|
434
445
|
log_file=log_file,
|
435
446
|
log_mode=log_mode,
|
436
|
-
**converter_kwargs)
|
447
|
+
**converter_kwargs))
|
437
448
|
|
438
449
|
elif not is_supported_archive(filename):
|
439
450
|
raise base.FileConverterInputException(f"{filename} is an unsupported archive type. Supported types are: "
|
@@ -443,7 +454,7 @@ def run_converter(filename: str,
|
|
443
454
|
# The filename is of a supported archive type. Make a temporary directory to extract its contents
|
444
455
|
# to, then run the converter on each file extracted
|
445
456
|
with TemporaryDirectory() as extract_dir:
|
446
|
-
l_filenames = unpack_zip_or_tar(
|
457
|
+
l_filenames = unpack_zip_or_tar(qualified_filename, extract_dir=extract_dir)
|
447
458
|
|
448
459
|
# Check for no files in archive
|
449
460
|
if len(l_filenames) == 0:
|
@@ -468,15 +479,15 @@ def run_converter(filename: str,
|
|
468
479
|
individual_log_mode = log_mode if log_mode != const.LOG_FULL else const.LOG_FULL_FORCE
|
469
480
|
|
470
481
|
try:
|
471
|
-
individual_run_output =
|
472
|
-
|
473
|
-
|
474
|
-
|
475
|
-
|
476
|
-
|
477
|
-
|
478
|
-
|
479
|
-
|
482
|
+
individual_run_output = _run_single_file_conversion(extracted_filename,
|
483
|
+
to_format,
|
484
|
+
*args,
|
485
|
+
from_format=from_format,
|
486
|
+
output_dir=output_dir,
|
487
|
+
log_file=individual_log_file,
|
488
|
+
log_mode=individual_log_mode,
|
489
|
+
max_file_size=remaining_file_size,
|
490
|
+
**converter_kwargs)
|
480
491
|
except base.FileConverterAbortException as e:
|
481
492
|
# If the run fails, create a run output object to indicate that
|
482
493
|
individual_run_output = base.FileConversionResult(log_filename=individual_log_file,
|
@@ -573,12 +584,13 @@ def run_converter(filename: str,
|
|
573
584
|
"input_filename": in_filename,
|
574
585
|
"output_filename": run_output.output_filename[l_index:r_index],
|
575
586
|
"input_size": input_size,
|
576
|
-
"output_size": output_size
|
587
|
+
"output_size": output_size}
|
577
588
|
|
578
|
-
for key in [
|
579
|
-
|
589
|
+
for key in ["converter", "coordinates", "coordOption", "from_flags",
|
590
|
+
"to_flags", "from_arg_flags", "to_arg_flags"]:
|
580
591
|
if key in converter_kwargs['data'] and converter_kwargs['data'][key] != "" and not \
|
581
|
-
|
592
|
+
((key == "coordinates" or key == "coordOption") and
|
593
|
+
converter_kwargs['data']['coordinates'] == "neither"):
|
582
594
|
entry[key] = converter_kwargs['data'][key]
|
583
595
|
|
584
596
|
entry["outcome"] = outcome
|
@@ -595,6 +607,7 @@ def run_converter(filename: str,
|
|
595
607
|
|
596
608
|
return run_output
|
597
609
|
|
610
|
+
|
598
611
|
def set_size_units(size):
|
599
612
|
if size >= 1024:
|
600
613
|
return str('%.3f' % (size / 1024)) + ' kB'
|