psdi-data-conversion 0.0.38__py3-none-any.whl → 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. psdi_data_conversion/app.py +93 -33
  2. psdi_data_conversion/constants.py +1 -0
  3. psdi_data_conversion/converter.py +145 -17
  4. psdi_data_conversion/converters/base.py +24 -20
  5. psdi_data_conversion/converters/c2x.py +13 -0
  6. psdi_data_conversion/converters/openbabel.py +2 -1
  7. psdi_data_conversion/database.py +46 -14
  8. psdi_data_conversion/dist.py +2 -1
  9. psdi_data_conversion/file_io.py +1 -2
  10. psdi_data_conversion/log_utility.py +1 -1
  11. psdi_data_conversion/main.py +32 -25
  12. psdi_data_conversion/static/content/index-versions/psdi-common-footer.html +13 -9
  13. psdi_data_conversion/static/content/index-versions/psdi-common-header.html +1 -1
  14. psdi_data_conversion/static/content/psdi-common-footer.html +13 -9
  15. psdi_data_conversion/static/content/psdi-common-header.html +1 -1
  16. psdi_data_conversion/static/data/data.json +617 -3
  17. psdi_data_conversion/static/javascript/convert.js +54 -6
  18. psdi_data_conversion/static/javascript/convert_common.js +16 -2
  19. psdi_data_conversion/static/javascript/data.js +18 -0
  20. psdi_data_conversion/static/styles/format.css +7 -0
  21. psdi_data_conversion/templates/index.htm +8 -9
  22. psdi_data_conversion/testing/conversion_callbacks.py +2 -2
  23. psdi_data_conversion/testing/conversion_test_specs.py +27 -7
  24. psdi_data_conversion/testing/gui.py +18 -12
  25. psdi_data_conversion/testing/utils.py +3 -3
  26. psdi_data_conversion/utils.py +21 -0
  27. {psdi_data_conversion-0.0.38.dist-info → psdi_data_conversion-0.1.0.dist-info}/METADATA +2 -2
  28. {psdi_data_conversion-0.0.38.dist-info → psdi_data_conversion-0.1.0.dist-info}/RECORD +31 -30
  29. {psdi_data_conversion-0.0.38.dist-info → psdi_data_conversion-0.1.0.dist-info}/WHEEL +0 -0
  30. {psdi_data_conversion-0.0.38.dist-info → psdi_data_conversion-0.1.0.dist-info}/entry_points.txt +0 -0
  31. {psdi_data_conversion-0.0.38.dist-info → psdi_data_conversion-0.1.0.dist-info}/licenses/LICENSE +0 -0
@@ -5,44 +5,57 @@ Version 1.0, 8th November 2024
5
5
  This script acts as a server for the PSDI Data Conversion Service website.
6
6
  """
7
7
 
8
- from argparse import ArgumentParser
9
- import hashlib
10
- import os
11
8
  import json
9
+ from multiprocessing import Lock
10
+ import os
11
+ import sys
12
+ from argparse import ArgumentParser
13
+ from collections.abc import Callable
12
14
  from datetime import datetime
15
+ from functools import wraps
16
+ from hashlib import md5
13
17
  from subprocess import run
14
- import sys
15
- import traceback
16
- from flask import Flask, request, render_template, abort, Response
18
+ from traceback import format_exc
19
+ from typing import Any
20
+
21
+ import werkzeug.serving
22
+ from flask import Flask, Response, abort, cli, render_template, request
23
+ from werkzeug.utils import secure_filename
17
24
 
18
25
  import psdi_data_conversion
19
- from psdi_data_conversion import log_utility
20
26
  from psdi_data_conversion import constants as const
27
+ from psdi_data_conversion import log_utility
21
28
  from psdi_data_conversion.converter import run_converter
22
- from psdi_data_conversion.database import FormatInfo, get_format_info
29
+ from psdi_data_conversion.database import get_format_info
23
30
  from psdi_data_conversion.file_io import split_archive_ext
31
+ from psdi_data_conversion.main import print_wrap
24
32
 
25
33
  # Env var for the SHA of the latest commit
26
34
  SHA_EV = "SHA"
27
35
 
28
- # Env var for whether this is running in service mode or locally
29
- SERVICE_MODE_EV = "SERVICE_MODE"
30
-
31
36
  # Env var for whether this is a production release or development
32
37
  PRODUCTION_EV = "PRODUCTION_MODE"
33
38
 
39
+ # Env var for whether this is a production release or development
40
+ DEBUG_EV = "DEBUG_MODE"
41
+
34
42
  # Key for the label given to the file uploaded in the web interface
35
43
  FILE_TO_UPLOAD_KEY = 'fileToUpload'
36
44
 
45
+ # A lock to prevent multiple threads from logging at the same time
46
+ logLock = Lock()
47
+
37
48
  # Create a token by hashing the current date and time.
38
49
  dt = str(datetime.now())
39
- token = hashlib.md5(dt.encode('utf8')).hexdigest()
50
+ token = md5(dt.encode('utf8')).hexdigest()
40
51
 
41
- # Get the service and production modes from their envvars
42
- service_mode_ev = os.environ.get(SERVICE_MODE_EV)
52
+ # Get the debug, service and production modes from their envvars
53
+ service_mode_ev = os.environ.get(const.SERVICE_MODE_EV)
43
54
  service_mode = (service_mode_ev is not None) and (service_mode_ev.lower() == "true")
44
55
  production_mode_ev = os.environ.get(PRODUCTION_EV)
45
56
  production_mode = (production_mode_ev is not None) and (production_mode_ev.lower() == "true")
57
+ debug_mode_ev = os.environ.get(DEBUG_EV)
58
+ debug_mode = (debug_mode_ev is not None) and (debug_mode_ev.lower() == "true")
46
59
 
47
60
  # Get the logging mode and level from their envvars
48
61
  ev_log_mode = os.environ.get(const.LOG_MODE_EV)
@@ -80,9 +93,43 @@ if ev_max_file_size_ob is not None:
80
93
  else:
81
94
  max_file_size_ob = const.DEFAULT_MAX_FILE_SIZE_OB
82
95
 
96
+
97
+ def suppress_warning(func: Callable[..., Any]) -> Callable[..., Any]:
98
+ """Since we're using the development server as the user GUI, we monkey-patch Flask to disable the warnings that
99
+ would otherwise appear for this so they don't confuse the user
100
+ """
101
+ @wraps(func)
102
+ def wrapper(*args, **kwargs) -> Any:
103
+ if args and isinstance(args[0], str) and args[0].startswith('WARNING: This is a development server.'):
104
+ return ''
105
+ return func(*args, **kwargs)
106
+ return wrapper
107
+
108
+
109
+ werkzeug.serving._ansi_style = suppress_warning(werkzeug.serving._ansi_style)
110
+ cli.show_server_banner = lambda *_: None
111
+
83
112
  app = Flask(__name__)
84
113
 
85
114
 
115
+ def limit_upload_size():
116
+ """Impose a limit on the maximum file that can be uploaded before Flask will raise an error"""
117
+
118
+ # Determine the largest possible file size that can be uploaded, keeping in mind that 0 indicates unlimited
119
+ larger_max_file_size = max_file_size
120
+ if (max_file_size > 0) and (max_file_size_ob > max_file_size):
121
+ larger_max_file_size = max_file_size_ob
122
+
123
+ if larger_max_file_size > 0:
124
+ app.config['MAX_CONTENT_LENGTH'] = larger_max_file_size
125
+ else:
126
+ app.config['MAX_CONTENT_LENGTH'] = None
127
+
128
+
129
+ # Set the upload limit based on env vars to start with
130
+ limit_upload_size()
131
+
132
+
86
133
  def get_last_sha() -> str:
87
134
  """Get the SHA of the last commit
88
135
  """
@@ -101,7 +148,7 @@ def get_last_sha() -> str:
101
148
  out_str = str(out_bytes.decode()).strip()
102
149
 
103
150
  except Exception:
104
- print("ERROR: Could not determine SHA of most recent commit. Error was:\n" + traceback.format_exc(),
151
+ print("ERROR: Could not determine SHA of most recent commit. Error was:\n" + format_exc(),
105
152
  file=sys.stderr)
106
153
  out_str = "N/A"
107
154
 
@@ -112,14 +159,13 @@ def get_last_sha() -> str:
112
159
  def website():
113
160
  """Return the web page along with the token
114
161
  """
115
-
116
- data = [{'token': token,
117
- 'max_file_size': max_file_size,
118
- 'max_file_size_ob': max_file_size_ob,
119
- 'service_mode': service_mode,
120
- 'production_mode': production_mode,
121
- 'sha': get_last_sha()}]
122
- return render_template("index.htm", data=data)
162
+ return render_template("index.htm",
163
+ token=token,
164
+ max_file_size=max_file_size,
165
+ max_file_size_ob=max_file_size_ob,
166
+ service_mode=service_mode,
167
+ production_mode=production_mode,
168
+ sha=get_last_sha())
123
169
 
124
170
 
125
171
  @app.route('/convert/', methods=['POST'])
@@ -131,9 +177,8 @@ def convert():
131
177
  # Make sure the upload directory exists
132
178
  os.makedirs(const.DEFAULT_UPLOAD_DIR, exist_ok=True)
133
179
 
134
- # Save the file in the upload directory
135
180
  file = request.files[FILE_TO_UPLOAD_KEY]
136
- filename = filename = file.filename
181
+ filename = secure_filename(file.filename)
137
182
 
138
183
  qualified_filename = os.path.join(const.DEFAULT_UPLOAD_DIR, filename)
139
184
  file.save(qualified_filename)
@@ -146,7 +191,7 @@ def convert():
146
191
  name = request.form[format_label]
147
192
  full_note = request.form[format_label+"_full"]
148
193
 
149
- l_possible_formats: list[FormatInfo] = get_format_info(name, which="all")
194
+ l_possible_formats = get_format_info(name, which="all")
150
195
 
151
196
  # If there's only one possible format, use that
152
197
  if len(l_possible_formats) == 1:
@@ -154,13 +199,11 @@ def convert():
154
199
  continue
155
200
 
156
201
  # Otherwise, find the format with the matching note
157
- found = False
158
202
  for possible_format in l_possible_formats:
159
203
  if possible_format.note in full_note:
160
204
  d_formats[format_label] = possible_format
161
- found = True
162
205
  break
163
- if not found:
206
+ else:
164
207
  print(f"Format '{name}' with full description '{full_note}' could not be found in database.",
165
208
  file=sys.stderr)
166
209
  abort(const.STATUS_CODE_GENERAL)
@@ -200,7 +243,7 @@ def convert():
200
243
  else:
201
244
  # Failsafe exception message
202
245
  msg = ("The following unexpected exception was raised by the converter:\n" +
203
- traceback.format_exc()+"\n")
246
+ format_exc()+"\n")
204
247
  with open(qualified_output_log, "w") as fo:
205
248
  fo.write(msg)
206
249
  abort(status_code)
@@ -228,7 +271,10 @@ def feedback():
228
271
  if key in report:
229
272
  entry[key] = str(report[key])
230
273
 
231
- log_utility.append_to_log_file("feedback", entry)
274
+ # Write data in JSON format and send to stdout
275
+ logLock.acquire()
276
+ sys.stdout.write(f"{json.dumps(entry) + '\n'}")
277
+ logLock.release()
232
278
 
233
279
  return Response(status=201)
234
280
 
@@ -298,7 +344,7 @@ def start_app():
298
344
  """
299
345
 
300
346
  os.chdir(os.path.join(psdi_data_conversion.__path__[0], ".."))
301
- app.run()
347
+ app.run(debug=debug_mode)
302
348
 
303
349
 
304
350
  def main():
@@ -323,7 +369,11 @@ def main():
323
369
  help="If set, will run as if deploying a service rather than the local GUI")
324
370
 
325
371
  parser.add_argument("--dev-mode", action="store_true",
326
- help="If set, will expose development elements")
372
+ help="If set, will expose development elements, such as the SHA of the latest commit")
373
+
374
+ parser.add_argument("--debug", action="store_true",
375
+ help="If set, will run the Flask server in debug mode, which will cause it to automatically "
376
+ "reload if code changes and show an interactive debugger in the case of errors")
327
377
 
328
378
  parser.add_argument("--log-mode", type=str, default=const.LOG_FULL,
329
379
  help="How logs should be stored. Allowed values are: \n"
@@ -351,6 +401,9 @@ def main():
351
401
  global service_mode
352
402
  service_mode = args.service_mode
353
403
 
404
+ global debug_mode
405
+ debug_mode = args.debug
406
+
354
407
  global production_mode
355
408
  production_mode = not args.dev_mode
356
409
 
@@ -360,6 +413,13 @@ def main():
360
413
  global log_level
361
414
  log_level = args.log_level
362
415
 
416
+ # Set the upload limit based on provided arguments now
417
+ limit_upload_size()
418
+
419
+ print_wrap("Starting the PSDI Data Conversion GUI. This GUI is run as a webpage, which you can open by "
420
+ "right-clicking the link below to open it in your default browser, or by copy-and-pasting it into your "
421
+ "browser of choice.")
422
+
363
423
  start_app()
364
424
 
365
425
 
@@ -41,6 +41,7 @@ LOG_MODE_EV = "LOG_MODE"
41
41
  LOG_LEVEL_EV = "LOG_LEVEL"
42
42
  MAX_FILESIZE_EV = "MAX_FILESIZE"
43
43
  MAX_FILESIZE_OB_EV = "MAX_FILESIZE_OB"
44
+ SERVICE_MODE_EV = "SERVICE_MODE"
44
45
 
45
46
  # Files and Folders
46
47
  # -----------------
@@ -6,20 +6,29 @@ Class and functions to perform file conversion
6
6
  """
7
7
 
8
8
  from dataclasses import dataclass, field
9
- import os
9
+ import json
10
+ import glob
10
11
  import importlib
12
+ import os
11
13
  import sys
12
- from tempfile import TemporaryDirectory
13
14
  import traceback
14
15
  from typing import Any, Callable, NamedTuple
16
+ from multiprocessing import Lock
17
+ from psdi_data_conversion import log_utility
18
+ from collections.abc import Callable
19
+ from dataclasses import dataclass, field
20
+ from tempfile import TemporaryDirectory
21
+ from typing import Any, NamedTuple
22
+
15
23
  from psdi_data_conversion import constants as const
16
24
  from psdi_data_conversion.converters import base
17
-
18
- import glob
19
-
20
25
  from psdi_data_conversion.converters.openbabel import CONVERTER_OB
21
26
  from psdi_data_conversion.file_io import (is_archive, is_supported_archive, pack_zip_or_tar, split_archive_ext,
22
27
  unpack_zip_or_tar)
28
+ from psdi_data_conversion.utils import regularize_name
29
+
30
+ # A lock to prevent multiple threads from logging at the same time
31
+ logLock = Lock()
23
32
 
24
33
  # Find all modules for specific converters
25
34
  l_converter_modules = glob.glob(os.path.dirname(base.__file__) + "/*.py")
@@ -49,7 +58,8 @@ try:
49
58
 
50
59
  converter_class = module.converter
51
60
 
52
- name = converter_class.name
61
+ # To make querying case/space-insensitive, we store all names in lowercase with spaces stripped
62
+ name = converter_class.name.lower().replace(" ", "")
53
63
 
54
64
  return NameAndClass(name, converter_class)
55
65
 
@@ -91,6 +101,66 @@ except Exception:
91
101
  D_CONVERTER_ARGS = {}
92
102
 
93
103
 
104
+ def get_supported_converter_class(name: str):
105
+ """Get the appropriate converter class matching the provided name from the dict of supported converters
106
+
107
+ Parameters
108
+ ----------
109
+ name : str
110
+ Converter name (case- and space-insensitive)
111
+
112
+ Returns
113
+ -------
114
+ type[base.FileConverter]
115
+ """
116
+ return D_SUPPORTED_CONVERTERS[regularize_name(name)]
117
+
118
+
119
+ def get_registered_converter_class(name: str):
120
+ """Get the appropriate converter class matching the provided name from the dict of supported converters
121
+
122
+ Parameters
123
+ ----------
124
+ name : str
125
+ Converter name (case- and space-insensitive)
126
+
127
+ Returns
128
+ -------
129
+ type[base.FileConverter]
130
+ """
131
+ return D_REGISTERED_CONVERTERS[regularize_name(name)]
132
+
133
+
134
+ def converter_is_supported(name: str):
135
+ """Checks if a converter is supported in principle by this project
136
+
137
+ Parameters
138
+ ----------
139
+ name : str
140
+ Converter name (case- and space-insensitive)
141
+
142
+ Returns
143
+ -------
144
+ bool
145
+ """
146
+ return regularize_name(name) in L_SUPPORTED_CONVERTERS
147
+
148
+
149
+ def converter_is_registered(name: str):
150
+ """Checks if a converter is registered (usable)
151
+
152
+ Parameters
153
+ ----------
154
+ name : str
155
+ Converter name (case- and space-insensitive)
156
+
157
+ Returns
158
+ -------
159
+ bool
160
+ """
161
+ return regularize_name(name) in L_REGISTERED_CONVERTERS
162
+
163
+
94
164
  def get_converter(*args, name=const.CONVERTER_DEFAULT, **converter_kwargs) -> base.FileConverter:
95
165
  """Get a FileConverter of the proper subclass for the requested converter type
96
166
 
@@ -129,7 +199,7 @@ def get_converter(*args, name=const.CONVERTER_DEFAULT, **converter_kwargs) -> ba
129
199
  If provided, all logging will go to a single file or stream. Otherwise, logs will be split up among multiple
130
200
  files for server-style logging.
131
201
  log_mode : str
132
- How logs should be stores. Allowed values are:
202
+ How logs should be stored. Allowed values are:
133
203
  - 'full' - Multi-file logging, only recommended when running as a public web app
134
204
  - 'simple' - Logs saved to one file
135
205
  - 'stdout' - Output logs and errors only to stdout
@@ -155,10 +225,11 @@ def get_converter(*args, name=const.CONVERTER_DEFAULT, **converter_kwargs) -> ba
155
225
  FileConverterInputException
156
226
  If the converter isn't recognized or there's some other issue with the input
157
227
  """
228
+ name = regularize_name(name)
158
229
  if name not in L_REGISTERED_CONVERTERS:
159
230
  raise base.FileConverterInputException(const.ERR_CONVERTER_NOT_RECOGNISED.format(name) +
160
231
  f"{L_REGISTERED_CONVERTERS}")
161
- converter_class = D_REGISTERED_CONVERTERS[name]
232
+ converter_class = get_registered_converter_class(name)
162
233
 
163
234
  return converter_class(*args, **converter_kwargs)
164
235
 
@@ -302,7 +373,7 @@ def run_converter(filename: str,
302
373
  If provided, all logging will go to a single file or stream. Otherwise, logs will be split up among multiple
303
374
  files for server-style logging.
304
375
  log_mode : str
305
- How logs should be stores. Allowed values are:
376
+ How logs should be stored. Allowed values are:
306
377
  - 'full' - Multi-file logging, only recommended when running as a public web app
307
378
  - 'simple' - Logs saved to one file
308
379
  - 'stdout' - Output logs and errors only to stdout
@@ -355,14 +426,14 @@ def run_converter(filename: str,
355
426
  if from_format is not None:
356
427
  check_from_format(filename, from_format, strict=strict)
357
428
  l_run_output.append(get_converter(filename,
358
- to_format,
359
- *args,
360
- from_format=from_format,
361
- download_dir=download_dir,
362
- max_file_size=max_file_size,
363
- log_file=log_file,
364
- log_mode=log_mode,
365
- **converter_kwargs).run())
429
+ to_format,
430
+ *args,
431
+ from_format=from_format,
432
+ download_dir=download_dir,
433
+ max_file_size=max_file_size,
434
+ log_file=log_file,
435
+ log_mode=log_mode,
436
+ **converter_kwargs).run())
366
437
 
367
438
  elif not is_supported_archive(filename):
368
439
  raise base.FileConverterInputException(f"{filename} is an unsupported archive type. Supported types are: "
@@ -473,4 +544,61 @@ def run_converter(filename: str,
473
544
  exception_class = base.FileConverterAbortException
474
545
  raise exception_class(status_code, msg)
475
546
 
547
+ # Log conversion information if in service mode
548
+ service_mode_ev = os.environ.get(const.SERVICE_MODE_EV)
549
+ service_mode = (service_mode_ev is not None) and (service_mode_ev.lower() == "true")
550
+ if service_mode:
551
+ try:
552
+ l_index = filename.rfind('/') + 1
553
+ r_index = len(filename)
554
+ in_filename = filename[l_index:r_index]
555
+
556
+ l_index = run_output.output_filename.rfind('/') + 1
557
+ r_index = len(run_output.output_filename)
558
+
559
+ input_size = set_size_units(run_output.in_size)
560
+ output_size = set_size_units(run_output.out_size)
561
+
562
+ if status_code:
563
+ outcome = "failed"
564
+ fail_reason = l_error_lines
565
+ else:
566
+ outcome = "succeeded"
567
+ fail_reason = ""
568
+
569
+ entry = {
570
+ "datetime": log_utility.get_date_time(),
571
+ "input_format": converter_kwargs['data']['from_full'],
572
+ "output_format": converter_kwargs['data']['to_full'],
573
+ "input_filename": in_filename,
574
+ "output_filename": run_output.output_filename[l_index:r_index],
575
+ "input_size": input_size,
576
+ "output_size": output_size }
577
+
578
+ for key in [ "converter", "coordinates", "coordOption", "from_flags",
579
+ "to_flags", "from_arg_flags", "to_arg_flags" ]:
580
+ if key in converter_kwargs['data'] and converter_kwargs['data'][key] != "" and not \
581
+ ((key == "coordinates" or key == "coordOption") and converter_kwargs['data']['coordinates'] == "neither") :
582
+ entry[key] = converter_kwargs['data'][key]
583
+
584
+ entry["outcome"] = outcome
585
+
586
+ if fail_reason != "":
587
+ entry["fail_reason"] = fail_reason
588
+
589
+ logLock.acquire()
590
+ sys.__stdout__.write(f"{json.dumps(entry) + '\n'}")
591
+ logLock.release()
592
+ except Exception:
593
+ sys.__stdout__.write({"datetime": log_utility.get_date_time(),
594
+ "logging_error": "An error occurred during logging of conversion information."})
595
+
476
596
  return run_output
597
+
598
+ def set_size_units(size):
599
+ if size >= 1024:
600
+ return str('%.3f' % (size / 1024)) + ' kB'
601
+ elif size >= const.MEGABYTE:
602
+ return str(size / const.MEGABYTE) + ' MB'
603
+ else:
604
+ return str(size) + ' B'
@@ -6,19 +6,19 @@ Base class and information for file format converters
6
6
  """
7
7
 
8
8
 
9
- from copy import deepcopy
10
- from dataclasses import dataclass
9
+ import abc
11
10
  import logging
12
- from collections.abc import Callable
13
11
  import os
14
12
  import subprocess
15
- import abc
16
-
17
13
  import sys
18
14
  import traceback
15
+ from collections.abc import Callable
16
+ from copy import deepcopy
17
+ from dataclasses import dataclass
19
18
  from typing import Any
20
19
 
21
- from psdi_data_conversion import constants as const, log_utility
20
+ from psdi_data_conversion import constants as const
21
+ from psdi_data_conversion import log_utility
22
22
  from psdi_data_conversion.dist import bin_exists, get_bin_path, get_dist
23
23
  from psdi_data_conversion.security import SAFE_STRING_RE, string_is_safe
24
24
 
@@ -735,24 +735,11 @@ class ScriptFileConverter(FileConverter):
735
735
 
736
736
  self.logger.debug(f"Performing conversion with ScriptFileConverter using script '{self.script}'")
737
737
 
738
- from_flags = self.data.get("from_flags", "")
739
- to_flags = self.data.get("from_flags", "")
740
- from_options = self.data.get("from_options", "")
741
- to_options = self.data.get("from_options", "")
742
-
743
- # Check that all user-provided input passes security checks
744
- for user_args in [from_flags, to_flags, from_options, to_options]:
745
- if not string_is_safe(user_args):
746
- raise FileConverterInputException(f"Provided argument '{user_args}' does not pass security check - it "
747
- f"must match the regex {SAFE_STRING_RE.pattern}.", help=True)
748
-
749
738
  env = {"DIST": get_dist()}
750
739
  if self.required_bin is not None:
751
740
  env["BIN_PATH"] = get_bin_path(self.required_bin)
752
741
 
753
- process = subprocess.run(['sh', f'psdi_data_conversion/scripts/{self.script}',
754
- '--' + self.to_format_info.name, self.in_filename, self.out_filename, from_flags,
755
- to_flags, from_options, to_options],
742
+ process = subprocess.run(['sh', f'psdi_data_conversion/scripts/{self.script}', *self._get_script_args()],
756
743
  env=env, capture_output=True, text=True)
757
744
 
758
745
  self.out = process.stdout
@@ -763,3 +750,20 @@ class ScriptFileConverter(FileConverter):
763
750
  self._abort_from_err()
764
751
  else:
765
752
  self.logger.debug("Conversion process completed successfully")
753
+
754
+ def _get_script_args(self):
755
+ """Get the list of arguments which will be passed to the script"""
756
+
757
+ from_flags = self.data.get("from_flags", "")
758
+ to_flags = self.data.get("from_flags", "")
759
+ from_options = self.data.get("from_options", "")
760
+ to_options = self.data.get("from_options", "")
761
+
762
+ # Check that all user-provided input passes security checks
763
+ for user_args in [from_flags, to_flags, from_options, to_options]:
764
+ if not string_is_safe(user_args):
765
+ raise FileConverterInputException(f"Provided argument '{user_args}' does not pass security check - it "
766
+ f"must match the regex {SAFE_STRING_RE.pattern}.", help=True)
767
+
768
+ return ['--' + self.to_format_info.name, self.in_filename, self.out_filename, from_flags, to_flags,
769
+ from_options, to_options]
@@ -29,6 +29,19 @@ class C2xFileConverter(ScriptFileConverter):
29
29
  "may be downloaded from https://www.c2x.org.uk/downloads/")
30
30
  supports_ambiguous_extensions = True
31
31
 
32
+ def _get_script_args(self):
33
+ """Override the standard script arguments so we can set the different format names expected by c2x
34
+ """
35
+ l_script_args = super()._get_script_args()
36
+
37
+ # Update the output format to c2x style
38
+ l_script_args[0] = "--" + self.to_format_info.c2x_format
39
+
40
+ # TODO - check if the input file has an extension which will be accepted by c2x for its format, and handle if
41
+ # not
42
+
43
+ return l_script_args
44
+
32
45
 
33
46
  # Assign this converter to the `converter` variable - this lets the psdi_data_conversion.converter module detect and
34
47
  # register it, making it available for use by the command-line script, python library, and web app
@@ -6,8 +6,9 @@ Open Babel FileConverter
6
6
  """
7
7
 
8
8
  from copy import deepcopy
9
- from openbabel import openbabel
9
+
10
10
  import py
11
+ from openbabel import openbabel
11
12
 
12
13
  from psdi_data_conversion.converters.base import FileConverter, FileConverterInputException
13
14
  from psdi_data_conversion.security import SAFE_STRING_RE, string_is_safe