psdi-data-conversion 0.1.7__py3-none-any.whl → 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. psdi_data_conversion/app.py +5 -408
  2. psdi_data_conversion/constants.py +12 -8
  3. psdi_data_conversion/converter.py +41 -28
  4. psdi_data_conversion/converters/base.py +18 -13
  5. psdi_data_conversion/database.py +292 -88
  6. psdi_data_conversion/gui/__init__.py +5 -0
  7. psdi_data_conversion/gui/accessibility.py +51 -0
  8. psdi_data_conversion/gui/env.py +239 -0
  9. psdi_data_conversion/gui/get.py +53 -0
  10. psdi_data_conversion/gui/post.py +176 -0
  11. psdi_data_conversion/gui/setup.py +102 -0
  12. psdi_data_conversion/main.py +70 -13
  13. psdi_data_conversion/static/content/convert.htm +105 -74
  14. psdi_data_conversion/static/content/convertato.htm +36 -26
  15. psdi_data_conversion/static/content/convertc2x.htm +39 -26
  16. psdi_data_conversion/static/content/download.htm +5 -5
  17. psdi_data_conversion/static/content/feedback.htm +2 -2
  18. psdi_data_conversion/static/content/header-links.html +2 -2
  19. psdi_data_conversion/static/content/index-versions/header-links.html +2 -2
  20. psdi_data_conversion/static/content/index-versions/psdi-common-header.html +9 -12
  21. psdi_data_conversion/static/content/psdi-common-header.html +9 -12
  22. psdi_data_conversion/static/javascript/accessibility.js +88 -61
  23. psdi_data_conversion/static/javascript/data.js +1 -3
  24. psdi_data_conversion/static/javascript/load_accessibility.js +50 -33
  25. psdi_data_conversion/static/styles/format.css +72 -18
  26. psdi_data_conversion/templates/accessibility.htm +274 -0
  27. psdi_data_conversion/templates/documentation.htm +6 -6
  28. psdi_data_conversion/templates/index.htm +73 -56
  29. psdi_data_conversion/{static/content → templates}/report.htm +28 -10
  30. psdi_data_conversion/testing/conversion_test_specs.py +26 -6
  31. psdi_data_conversion/testing/utils.py +6 -6
  32. {psdi_data_conversion-0.1.7.dist-info → psdi_data_conversion-0.2.1.dist-info}/METADATA +9 -3
  33. {psdi_data_conversion-0.1.7.dist-info → psdi_data_conversion-0.2.1.dist-info}/RECORD +36 -30
  34. psdi_data_conversion/static/content/accessibility.htm +0 -255
  35. {psdi_data_conversion-0.1.7.dist-info → psdi_data_conversion-0.2.1.dist-info}/WHEEL +0 -0
  36. {psdi_data_conversion-0.1.7.dist-info → psdi_data_conversion-0.2.1.dist-info}/entry_points.txt +0 -0
  37. {psdi_data_conversion-0.1.7.dist-info → psdi_data_conversion-0.2.1.dist-info}/licenses/LICENSE +0 -0
@@ -206,8 +206,8 @@ class FileConverter:
206
206
  data: dict[str, Any] | None = None,
207
207
  abort_callback: Callable[[int], None] = abort_raise,
208
208
  use_envvars=False,
209
- upload_dir=const.DEFAULT_UPLOAD_DIR,
210
- download_dir=const.DEFAULT_DOWNLOAD_DIR,
209
+ input_dir=const.DEFAULT_INPUT_DIR,
210
+ output_dir=const.DEFAULT_OUTPUT_DIR,
211
211
  max_file_size=None,
212
212
  no_check=False,
213
213
  log_file: str | None = None,
@@ -235,9 +235,9 @@ class FileConverter:
235
235
  use_envvars : bool
236
236
  If set to True, environment variables will be checked for any that set options for this class and used,
237
237
  default False
238
- upload_dir : str
238
+ input_dir : str
239
239
  The location of input files relative to the current directory
240
- download_dir : str
240
+ output_dir : str
241
241
  The location of output files relative to the current directory
242
242
  max_file_size : float
243
243
  The maximum allowed file size for input/output files, in MB. If 0, will be unlimited. Default 0 (unlimited)
@@ -296,8 +296,8 @@ class FileConverter:
296
296
  # Set member variables directly from input
297
297
  self.in_filename = filename
298
298
  self.to_format = to_format
299
- self.upload_dir = upload_dir
300
- self.download_dir = download_dir
299
+ self.input_dir = input_dir
300
+ self.output_dir = output_dir
301
301
  self.log_file = log_file
302
302
  self.log_mode = log_mode
303
303
  self.log_level = log_level
@@ -328,17 +328,22 @@ class FileConverter:
328
328
  self.err: str | None = None
329
329
  self.quality: str | None = None
330
330
 
331
- # Create directory 'uploads' if not extant.
332
- if not os.path.exists(self.upload_dir):
333
- os.makedirs(self.upload_dir, exist_ok=True)
331
+ # Determine if the filename is fully-qualified, and if not, find it in the upload dir
332
+ if not os.path.exists(self.in_filename):
333
+ qualified_in_filename = os.path.join(self.input_dir, self.in_filename)
334
+ if os.path.exists(qualified_in_filename):
335
+ self.in_filename = qualified_in_filename
336
+ else:
337
+ FileConverterInputException(f"Input file {self.in_filename} not found, either absolute or relative "
338
+ f"to {self.input_dir}")
334
339
 
335
340
  # Create directory 'downloads' if not extant.
336
- if not os.path.exists(self.download_dir):
337
- os.makedirs(self.download_dir, exist_ok=True)
341
+ if not os.path.exists(self.output_dir):
342
+ os.makedirs(self.output_dir, exist_ok=True)
338
343
 
339
344
  self.local_filename = os.path.split(self.in_filename)[1]
340
345
  self.filename_base = os.path.splitext(self.local_filename)[0]
341
- self.out_filename = f"{self.download_dir}/{self.filename_base}.{self.to_format_info.name}"
346
+ self.out_filename = f"{self.output_dir}/{self.filename_base}.{self.to_format_info.name}"
342
347
 
343
348
  # Set up files to log to
344
349
  self._setup_loggers()
@@ -441,7 +446,7 @@ class FileConverter:
441
446
  if self.log_mode == const.LOG_FULL_FORCE:
442
447
  self.output_log = self.log_file
443
448
  else:
444
- self.output_log = os.path.join(self.download_dir, f"{self.filename_base}{const.OUTPUT_LOG_EXT}")
449
+ self.output_log = os.path.join(self.output_dir, f"{self.filename_base}{const.OUTPUT_LOG_EXT}")
445
450
 
446
451
  # If any previous log exists, delete it
447
452
  if os.path.exists(self.output_log):
@@ -9,14 +9,21 @@ from __future__ import annotations
9
9
 
10
10
  import json
11
11
  import os
12
+ import sys
13
+ from copy import copy
12
14
  from dataclasses import dataclass, field
15
+ from functools import lru_cache
13
16
  from itertools import product
14
17
  from logging import getLogger
15
18
  from typing import Any, Literal, overload
19
+ from warnings import catch_warnings
20
+
21
+ import igraph as ig
16
22
 
17
23
  from psdi_data_conversion import constants as const
18
- from psdi_data_conversion.converter import D_SUPPORTED_CONVERTERS, get_registered_converter_class
19
- from psdi_data_conversion.converters.base import FileConverterException
24
+ from psdi_data_conversion.converter import (L_REGISTERED_CONVERTERS, L_SUPPORTED_CONVERTERS,
25
+ get_registered_converter_class)
26
+ from psdi_data_conversion.converters.base import FileConverter, FileConverterException
20
27
  from psdi_data_conversion.utils import regularize_name
21
28
 
22
29
  # Keys for top-level and general items in the database
@@ -121,17 +128,40 @@ class ConverterInfo:
121
128
  The regularized name of the converter
122
129
  parent : DataConversionDatabase
123
130
  The database which this belongs to
131
+ d_single_converter_info : dict[str, int | str]
132
+ The dict within the database file which describes this converter
124
133
  d_data : dict[str, Any]
125
134
  The loaded database dict
126
135
  """
127
136
 
128
137
  self.name = regularize_name(name)
138
+ """The regularized name of the converter"""
139
+
140
+ self.converter_class: type[FileConverter]
141
+ """The class used to perform conversions with this converter"""
142
+
143
+ self.pretty_name: str
144
+ """The name of the converter, properly spaced and capitalized"""
145
+
146
+ try:
147
+ self.converter_class = get_registered_converter_class(self.name)
148
+ self.pretty_name = self.converter_class.name
149
+ except KeyError:
150
+ self.converter_class = None
151
+ self.pretty_name = name
152
+
129
153
  self.parent = parent
154
+ """The parent database"""
130
155
 
131
156
  # Get info about the converter from the database
132
157
  self.id: int = d_single_converter_info.get(DB_ID_KEY, -1)
158
+ """The converter's ID"""
159
+
133
160
  self.description: str = d_single_converter_info.get(DB_DESC_KEY, "")
161
+ """A description of the converter"""
162
+
134
163
  self.url: str = d_single_converter_info.get(DB_URL_KEY, "")
164
+ """The official URL for the converter"""
135
165
 
136
166
  # Get necessary info about the converter from the class
137
167
  try:
@@ -403,6 +433,12 @@ class FormatInfo:
403
433
  """Class providing information on a file format from the PSDI Data Conversion database
404
434
  """
405
435
 
436
+ D_PROPERTY_ATTRS = {const.QUAL_COMP_KEY: const.QUAL_COMP_LABEL,
437
+ const.QUAL_CONN_KEY: const.QUAL_CONN_LABEL,
438
+ const.QUAL_2D_KEY: const.QUAL_2D_LABEL,
439
+ const.QUAL_3D_KEY: const.QUAL_3D_LABEL}
440
+ """A dict of attrs of this class which describe properties that a format may or may not have"""
441
+
406
442
  def __init__(self,
407
443
  name: str,
408
444
  parent: DataConversionDatabase,
@@ -448,6 +484,9 @@ class FormatInfo:
448
484
  self.three_dim = d_single_format_info.get(DB_FORMAT_3D_KEY)
449
485
  """Whether or not this format stores 3D structural information"""
450
486
 
487
+ self._lower_name: str = self.name.lower()
488
+ """The format name all in lower-case"""
489
+
451
490
  self._disambiguated_name: str | None = None
452
491
 
453
492
  @property
@@ -455,12 +494,13 @@ class FormatInfo:
455
494
  """A unique name for this format which can be used to distinguish it from others which share the same extension,
456
495
  by appending the name of each with a unique index"""
457
496
  if self._disambiguated_name is None:
458
- l_formats_with_same_name = [x for x in self.parent.l_format_info if x and x.name == self.name]
497
+ l_formats_with_same_name = [x for x in self.parent.l_format_info
498
+ if x and x._lower_name == self._lower_name]
459
499
  if len(l_formats_with_same_name) == 1:
460
- self._disambiguated_name = self.name
500
+ self._disambiguated_name = self._lower_name
461
501
  else:
462
502
  index_of_this = [i for i, x in enumerate(l_formats_with_same_name) if self is x][0]
463
- self._disambiguated_name = f"{self.name}-{index_of_this}"
503
+ self._disambiguated_name = f"{self._lower_name}-{index_of_this}"
464
504
  return self._disambiguated_name
465
505
 
466
506
  def __str__(self):
@@ -486,7 +526,7 @@ class PropertyConversionInfo:
486
526
  def __post_init__(self):
487
527
  """Set the label and note based on input/output status
488
528
  """
489
- self.label = const.D_QUAL_LABELS[self.key]
529
+ self.label = FormatInfo.D_PROPERTY_ATTRS[self.key]
490
530
 
491
531
  if self.input_supported is None and self.output_supported is None:
492
532
  self.note = const.QUAL_NOTE_BOTH_UNKNOWN
@@ -584,26 +624,56 @@ class ConversionsTable:
584
624
  # Store references to needed data
585
625
  self._l_converts_to = l_converts_to
586
626
 
587
- # Build the conversion table, indexed Converter, Input Format, Output Format - note that each of these is
588
- # 1-indexed, so we add 1 to each of the lengths here
589
- num_converters = len(parent.converters)
627
+ # Build the conversion graphs - each format is a vertex, each conversion is an edge
590
628
  num_formats = len(parent.formats)
591
629
 
592
- self.table = [[[0 for k in range(num_formats+1)] for j in range(num_formats+1)]
593
- for i in range(num_converters+1)]
594
-
595
- for possible_conversion in l_converts_to:
596
-
597
- try:
598
- conv_id: int = possible_conversion[DB_CONV_ID_KEY]
599
- in_id: int = possible_conversion[DB_IN_ID_KEY]
600
- out_id: int = possible_conversion[DB_OUT_ID_KEY]
601
- except KeyError:
602
- raise FileConverterDatabaseException(
603
- f"Malformed 'converts_to' entry in database: {possible_conversion}")
630
+ l_supported_conversions = [x for x in l_converts_to if
631
+ self.parent.get_converter_info(x[DB_CONV_ID_KEY]).name in L_SUPPORTED_CONVERTERS]
632
+ l_registered_conversions = [x for x in l_supported_conversions if
633
+ self.parent.get_converter_info(x[DB_CONV_ID_KEY]).name in L_REGISTERED_CONVERTERS]
634
+
635
+ # We make separate graphs for all known conversions, all supported conversions, and all registered conversions
636
+ self.graph: ig.Graph
637
+ self.supported_graph: ig.Graph
638
+ self.registered_graph: ig.Graph
639
+
640
+ for support_type, l_conversions in (("", l_converts_to),
641
+ ("supported_", l_supported_conversions),
642
+ ("registered_", l_registered_conversions)):
643
+
644
+ setattr(self, support_type+"graph",
645
+ ig.Graph(n=num_formats,
646
+ directed=True,
647
+ # Each vertex stores the disambiguated name of the format
648
+ vertex_attrs={DB_NAME_KEY: [x.disambiguated_name if x is not None else None
649
+ for x in parent.l_format_info]},
650
+ edges=[(x[DB_IN_ID_KEY], x[DB_OUT_ID_KEY]) for x in l_conversions],
651
+ # Each edge stores the id and name of the converter used for the conversion
652
+ edge_attrs={DB_CONV_ID_KEY: [x[DB_CONV_ID_KEY] for x in l_conversions],
653
+ DB_NAME_KEY: [self.parent.get_converter_info(x[DB_CONV_ID_KEY]).name
654
+ for x in l_conversions]}))
655
+
656
+ def _get_desired_graph(self,
657
+ only: Literal["all"] | Literal["supported"] | Literal["registered"] = "all") -> ig.Graph:
658
+ if only == "all":
659
+ return self.graph
660
+ elif only == "supported":
661
+ return self.supported_graph
662
+ elif only == "registered":
663
+ return self.registered_graph
664
+ else:
665
+ raise ValueError(f"Invalid value \"{only}\" for keyword argument `only`. Allowed values are \"all\" "
666
+ "(default), \"supported\", and \"registered\".")
604
667
 
605
- self.table[conv_id][in_id][out_id] = 1
668
+ def _get_possible_converters(self, in_format_info: FormatInfo, out_format_info: FormatInfo,
669
+ only: Literal["all"] | Literal["supported"] | Literal["registered"] = "all"):
670
+ """Get a list of all converters which can convert from one format to another
671
+ """
672
+ graph = self._get_desired_graph(only)
673
+ l_edges = graph.es.select(_source=in_format_info.id, _target=out_format_info.id)
674
+ return [x[DB_NAME_KEY] for x in l_edges]
606
675
 
676
+ @lru_cache(maxsize=None)
607
677
  def get_conversion_quality(self,
608
678
  converter_name: str,
609
679
  in_format: str | int,
@@ -633,14 +703,12 @@ class ConversionsTable:
633
703
  else:
634
704
  which_format = 0
635
705
 
636
- # Get info about the converter and formats
637
- conv_id = self.parent.get_converter_info(converter_name).id
638
- in_info = self.parent.get_format_info(in_format, which_format)
639
- out_info: int = self.parent.get_format_info(out_format, which_format)
706
+ # Get the full format info for each format
707
+ in_format_info = self.parent.get_format_info(in_format, which_format)
708
+ out_format_info: int = self.parent.get_format_info(out_format, which_format)
640
709
 
641
710
  # First check if the conversion is possible
642
- success_flag = self.table[conv_id][in_info.id][out_info.id]
643
- if not success_flag:
711
+ if converter_name not in self._get_possible_converters(in_format_info, out_format_info):
644
712
  return None
645
713
 
646
714
  # The conversion is possible. Now determine how many properties of the output format are not in the input
@@ -649,9 +717,9 @@ class ConversionsTable:
649
717
  num_new_props = 0
650
718
  any_unknown = False
651
719
  d_prop_conversion_info: dict[str, PropertyConversionInfo] = {}
652
- for prop in const.D_QUAL_LABELS:
653
- in_prop: bool | None = getattr(in_info, prop)
654
- out_prop: bool | None = getattr(out_info, prop)
720
+ for prop in FormatInfo.D_PROPERTY_ATTRS:
721
+ in_prop: bool | None = getattr(in_format_info, prop)
722
+ out_prop: bool | None = getattr(out_format_info, prop)
655
723
 
656
724
  d_prop_conversion_info[prop] = PropertyConversionInfo(prop, in_prop, out_prop)
657
725
 
@@ -699,7 +767,9 @@ class ConversionsTable:
699
767
 
700
768
  def get_possible_conversions(self,
701
769
  in_format: str | int,
702
- out_format: str | int) -> list[tuple[str, FormatInfo, FormatInfo]]:
770
+ out_format: str | int,
771
+ only: Literal["all"] | Literal["supported"] | Literal["registered"] = "all"
772
+ ) -> list[tuple[ConverterInfo, FormatInfo, FormatInfo]]:
703
773
  """Get a list of converters which can perform a conversion from one format to another, disambiguating in the
704
774
  case of ambiguous formats and providing IDs for input/output formats for possible conversions
705
775
 
@@ -712,10 +782,10 @@ class ConversionsTable:
712
782
 
713
783
  Returns
714
784
  -------
715
- list[tuple[str, FormatInfo, FormatInfo]]
716
- A list of tuples, where each tuple's first item is the name of a converter which can perform a matching
717
- conversion, the second is the info of the input format for this conversion, and the third is the info of the
718
- output format
785
+ list[tuple[ConverterInfo, FormatInfo, FormatInfo]]
786
+ A list of tuples, where each tuple's first item is the ConverterInfo of a converter which can perform a
787
+ matching conversion, the second is the info of the input format for this conversion, and the third is the
788
+ info of the output format
719
789
  """
720
790
  l_in_format_infos = self.parent.get_format_info(in_format, which="all")
721
791
  l_out_format_infos = self.parent.get_format_info(out_format, which="all")
@@ -726,20 +796,113 @@ class ConversionsTable:
726
796
  # Iterate over all possible combinations of input and output formats
727
797
  for in_format_info, out_format_info in product(l_in_format_infos, l_out_format_infos):
728
798
 
729
- # Slice the table to get a list of the success for this conversion for each converter
730
- l_converter_success = [x[in_format_info.id][out_format_info.id] for x in self.table]
731
-
732
- # Filter for possible conversions and get the converter name and degree-of-success string
733
- # for each possible conversion
734
- l_converter_names = [self.parent.get_converter_info(converter_id).name
735
- for converter_id, possible_flag
736
- in enumerate(l_converter_success) if possible_flag > 0]
799
+ # Filter for converters which can perform this conversion
800
+ l_converter_names = self._get_possible_converters(in_format_info, out_format_info, only=only)
737
801
 
738
802
  for converter_name in l_converter_names:
739
- l_possible_conversions.append((converter_name, in_format_info, out_format_info))
803
+ l_possible_conversions.append((self.parent.get_converter_info(converter_name),
804
+ in_format_info, out_format_info))
740
805
 
741
806
  return l_possible_conversions
742
807
 
808
+ @lru_cache
809
+ def _get_shared_attrs(self, source_format, target_format):
810
+ """Get a list of attributes that both the source and target format feature
811
+ """
812
+ source_format_info = self.parent.get_format_info(source_format)
813
+ target_format_info = self.parent.get_format_info(target_format)
814
+
815
+ l_shared_attrs: list[str] = []
816
+
817
+ for attr in FormatInfo.D_PROPERTY_ATTRS:
818
+ if getattr(source_format_info, attr) and getattr(target_format_info, attr):
819
+ l_shared_attrs.append(attr)
820
+
821
+ return l_shared_attrs
822
+
823
+ def _get_info_loss(self, path):
824
+ """Get the number of attributes in both the first and last format which would be lost if a conversion path
825
+ is traversed
826
+ """
827
+ l_shared_attrs = self._get_shared_attrs(path[0], path[-1])
828
+
829
+ if len(l_shared_attrs) == 0:
830
+ return 0
831
+
832
+ l_kept_attrs = copy(l_shared_attrs)
833
+ for i in range(len(path)-1):
834
+ target_format_info = self.parent.get_format_info(i+1)
835
+
836
+ # Check if each attr still in the shared list is kept here
837
+ for attr in l_kept_attrs:
838
+ if not getattr(target_format_info, attr):
839
+ l_kept_attrs.remove(attr)
840
+ if len(l_kept_attrs) == 0:
841
+ break
842
+
843
+ num_lost_attrs = len(l_shared_attrs) - len(l_kept_attrs)
844
+
845
+ return num_lost_attrs
846
+
847
+ def get_conversion_pathway(self,
848
+ in_format: str | int | FormatInfo,
849
+ out_format: str | int | FormatInfo,
850
+ only: Literal["all"] | Literal["supported"] | Literal["registered"] = "all"
851
+ ) -> list[tuple[ConverterInfo, FormatInfo, FormatInfo]] | None:
852
+ """Gets a pathway to convert from one format to another
853
+ """
854
+
855
+ in_format_info = self.parent.get_format_info(in_format)
856
+ out_format_info = self.parent.get_format_info(out_format)
857
+
858
+ # Check if the formats are the same
859
+ if in_format_info is out_format_info:
860
+ return None
861
+
862
+ # First check if direct conversion is possible
863
+ l_possible_direct_conversions = self.get_possible_conversions(in_format=in_format, out_format=out_format)
864
+ if l_possible_direct_conversions:
865
+ # TODO: When there's some better measure of conversion quality, use it to choose which converter to use
866
+ return [l_possible_direct_conversions[0]]
867
+
868
+ graph: ig.Graph = self._get_desired_graph(only)
869
+
870
+ # Query the graph for the shortest paths to perform this conversion. If no conversions are possible, igraph
871
+ # will print a warning, which we catch and suppress here
872
+ with catch_warnings(record=True) as l_warnings:
873
+ l_paths: list[list[int]] = graph.get_shortest_paths(in_format_info.id, to=out_format_info.id)
874
+ for warning in l_warnings:
875
+ if "Couldn't reach some vertices" not in str(warning.message):
876
+ print(warning, file=sys.stderr)
877
+
878
+ # Check if any paths are possible
879
+ if not l_paths or not l_paths[0]:
880
+ return None
881
+
882
+ # Check each path to find the first which doesn't lose any unnecessary info, or else the one which loses the
883
+ # least
884
+ best_path: list[int] | None = None
885
+ best_info_loss: int | None = None
886
+ for path in l_paths:
887
+ info_loss = self._get_info_loss(path)
888
+ if best_info_loss is None or info_loss < best_info_loss:
889
+ best_path = path
890
+ best_info_loss = info_loss
891
+ if best_info_loss == 0:
892
+ break
893
+
894
+ # Output the best path in the desired format
895
+ l_steps: list[tuple[str, FormatInfo, FormatInfo]] = []
896
+ for i in range(len(best_path)-1):
897
+ source_id: int = best_path[i]
898
+ target_id: int = best_path[i+1]
899
+ converter_name: str = graph.es.select(_source=source_id, _target=target_id)[0][DB_NAME_KEY]
900
+ l_steps.append((get_converter_info(converter_name),
901
+ self.parent.get_format_info(source_id),
902
+ self.parent.get_format_info(target_id)))
903
+
904
+ return l_steps
905
+
743
906
  def get_possible_formats(self, converter_name: str) -> tuple[list[FormatInfo], list[FormatInfo]]:
744
907
  """Get a list of input and output formats that a given converter supports
745
908
 
@@ -754,21 +917,10 @@ class ConversionsTable:
754
917
  A tuple of a list of the supported input formats and a list of the supported output formats
755
918
  """
756
919
  conv_id: int = self.parent.get_converter_info(converter_name).id
757
- ll_in_out_format_success = self.table[conv_id]
758
-
759
- # Filter for possible input formats by checking if at least one output format for each has a degree of success
760
- # index greater than 0, and stored the filtered lists where the input format is possible so we only need to
761
- # check them for possible output formats
762
- (l_possible_in_format_ids,
763
- ll_filtered_in_out_format_success) = zip(*[(i, l_out_format_success) for i, l_out_format_success
764
- in enumerate(ll_in_out_format_success)
765
- if sum(l_out_format_success) > 0])
766
-
767
- # As with input IDs, filter for output IDs where at least one input format has a degree of success index greater
768
- # than 0. A bit more complicated for the second index, forcing us to do list comprehension to fetch a list
769
- # across the table before summing
770
- l_possible_out_format_ids = [j for j, _ in enumerate(ll_filtered_in_out_format_success[0]) if
771
- sum([x[j] for x in ll_filtered_in_out_format_success]) > 0]
920
+
921
+ l_conversion_edges = self.graph.es.select(**{DB_CONV_ID_KEY: conv_id})
922
+ l_possible_in_format_ids = list({x.source for x in l_conversion_edges})
923
+ l_possible_out_format_ids = list({x.target for x in l_conversion_edges})
772
924
 
773
925
  # Get the name for each format ID, and return lists of the names
774
926
  return ([self.parent.get_format_info(x) for x in l_possible_in_format_ids],
@@ -875,9 +1027,9 @@ class DataConversionDatabase:
875
1027
  self._l_format_info: list[FormatInfo | None] = [None] * (max_id+1)
876
1028
 
877
1029
  for d_single_format_info in self.formats:
878
- name: str = d_single_format_info[DB_FORMAT_EXT_KEY]
1030
+ lc_name: str = d_single_format_info[DB_FORMAT_EXT_KEY]
879
1031
 
880
- format_info = FormatInfo(name=name,
1032
+ format_info = FormatInfo(name=lc_name,
881
1033
  parent=self,
882
1034
  d_single_format_info=d_single_format_info)
883
1035
 
@@ -887,24 +1039,17 @@ class DataConversionDatabase:
887
1039
  self._conversions_table = ConversionsTable(l_converts_to=self.converts_to,
888
1040
  parent=self)
889
1041
 
890
- # Use the conversions table to prune any formats which have no valid conversions
1042
+ # Use the conversions graph to prune any formats which have no valid conversions
891
1043
 
892
1044
  # Get a slice of the table which only includes supported converters
893
- l_supported_converter_ids = [self.get_converter_info(x).id for x in D_SUPPORTED_CONVERTERS]
894
- supported_table = [self._conversions_table.table[x] for x in l_supported_converter_ids]
1045
+ supported_graph = self._conversions_table.supported_graph
895
1046
 
896
1047
  for format_id, format_info in enumerate(self._l_format_info):
897
1048
  if not format_info:
898
1049
  continue
899
1050
 
900
- # Check if the format is supported as the input format for any conversion
901
- ll_possible_from_conversions = [x[format_id] for x in supported_table]
902
- if sum([sum(x) for x in ll_possible_from_conversions]) > 0:
903
- continue
904
-
905
- # Check if the format is supported as the output format for any conversion
906
- ll_possible_to_conversions = [[y[format_id] for y in x] for x in supported_table]
907
- if sum([sum(x) for x in ll_possible_to_conversions]) > 0:
1051
+ # Check if the format is supported as the input or output format for any conversion
1052
+ if supported_graph.degree(format_id) > 0:
908
1053
  continue
909
1054
 
910
1055
  # If we get here, the format isn't supported for any conversions, so remove it from our list
@@ -918,14 +1063,14 @@ class DataConversionDatabase:
918
1063
  if not format_info:
919
1064
  continue
920
1065
 
921
- name = format_info.name
1066
+ lc_name = format_info.name.lower()
922
1067
 
923
1068
  # Each name may correspond to multiple formats, so we use a list for each entry to list all possible
924
1069
  # formats for each name
925
- if name not in self._d_format_info:
926
- self._d_format_info[name] = []
1070
+ if lc_name not in self._d_format_info:
1071
+ self._d_format_info[lc_name] = []
927
1072
 
928
- self._d_format_info[name].append(format_info)
1073
+ self._d_format_info[lc_name].append(format_info)
929
1074
 
930
1075
  def get_converter_info(self, converter_name_or_id: str | int) -> ConverterInfo:
931
1076
  """Get a converter's info from either its name or ID
@@ -987,6 +1132,9 @@ class DataConversionDatabase:
987
1132
  if format_name_or_id.startswith("."):
988
1133
  format_name_or_id = format_name_or_id[1:]
989
1134
 
1135
+ # Convert the format name to lower-case to handle it case-insensitively
1136
+ format_name_or_id = format_name_or_id.lower()
1137
+
990
1138
  # Check for a hyphen in the format, which indicates a preference from the user as to which, overriding the
991
1139
  # `which` kwarg
992
1140
  if "-" in format_name_or_id:
@@ -1050,6 +1198,22 @@ class DataConversionDatabase:
1050
1198
  _database: DataConversionDatabase | None = None
1051
1199
 
1052
1200
 
1201
+ def get_database_path() -> str:
1202
+ """Get the absolute path to the database file
1203
+
1204
+ Returns
1205
+ -------
1206
+ str
1207
+ """
1208
+
1209
+ # For an interactive shell, __file__ won't be defined for this module, so use the constants module instead
1210
+ reference_file = os.path.realpath(const.__file__)
1211
+
1212
+ qualified_database_filename = os.path.join(os.path.dirname(reference_file), const.DATABASE_FILENAME)
1213
+
1214
+ return qualified_database_filename
1215
+
1216
+
1053
1217
  def load_database() -> DataConversionDatabase:
1054
1218
  """Load and return a new instance of the data conversion database from the JSON database file in this package. This
1055
1219
  function should not be called directly unless you specifically need a new instance of the database object and can't
@@ -1061,12 +1225,7 @@ def load_database() -> DataConversionDatabase:
1061
1225
  """
1062
1226
 
1063
1227
  # Find and load the database JSON file
1064
-
1065
- # For an interactive shell, __file__ won't be defined for this module, so use the constants module instead
1066
- reference_file = os.path.realpath(const.__file__)
1067
-
1068
- qualified_database_filename = os.path.join(os.path.dirname(reference_file), const.DATABASE_FILENAME)
1069
- d_data: dict = json.load(open(qualified_database_filename, "r"))
1228
+ d_data: dict = json.load(open(get_database_path(), "r"))
1070
1229
 
1071
1230
  return DataConversionDatabase(d_data)
1072
1231
 
@@ -1166,7 +1325,7 @@ def get_conversion_quality(converter_name: str,
1166
1325
 
1167
1326
 
1168
1327
  def get_possible_conversions(in_format: str | int,
1169
- out_format: str | int) -> list[tuple[str, FormatInfo, FormatInfo]]:
1328
+ out_format: str | int) -> list[tuple[ConverterInfo, FormatInfo, FormatInfo]]:
1170
1329
  """Get a list of converters which can perform a conversion from one format to another and disambiguate in the case
1171
1330
  of ambiguous input/output formats
1172
1331
 
@@ -1179,8 +1338,8 @@ def get_possible_conversions(in_format: str | int,
1179
1338
 
1180
1339
  Returns
1181
1340
  -------
1182
- list[tuple[str, FormatInfo, FormatInfo]]
1183
- A list of tuples, where each tuple's first item is the name of a converter which can perform a matching
1341
+ list[tuple[ConverterInfo, FormatInfo, FormatInfo]]
1342
+ A list of tuples, where each tuple's first item is the ConverterInfo of a converter which can perform a matching
1184
1343
  conversion, the second is the info of the input format for this conversion, and the third is the info of the
1185
1344
  output format
1186
1345
  """
@@ -1189,6 +1348,51 @@ def get_possible_conversions(in_format: str | int,
1189
1348
  out_format=out_format)
1190
1349
 
1191
1350
 
1351
+ def get_conversion_pathway(in_format: str | int | FormatInfo,
1352
+ out_format: str | int | FormatInfo,
1353
+ only: Literal["all"] | Literal["supported"] | Literal["registered"] = "all"
1354
+ ) -> list[tuple[ConverterInfo, FormatInfo, FormatInfo]] | None:
1355
+ """Get a list of conversions that can be performed to convert one format to another. This is primarily used when a
1356
+ direct conversion is not supported by any individual converter. Only one possible pathway will be returned,
1357
+ prioritising pathways which do not lose lose and then re-extrapolate any information stored by some formats and not
1358
+ others along the path.
1359
+
1360
+ Parameters
1361
+ ----------
1362
+ in_format : str | int
1363
+ The input file format. For this function, the format must be defined uniquely, either by using a disambiguated
1364
+ extension, ID, or FormatInfo
1365
+ out_format : str | int
1366
+ The output file format. For this function, the format must be defined uniquely, either by using a disambiguated
1367
+ extension, ID, or FormatInfo
1368
+ only : Literal["all"] | Literal["supported"] | Literal["registered"], optional
1369
+ Which converters to limit the pathway search to:
1370
+ - "all" (default): All known converters
1371
+ - "supported": Only converters supported by this utility, even if not currently available (e.g. they don't work
1372
+ on your OS)
1373
+ - "registered": Only converters supported by this utility and currently available
1374
+
1375
+ Returns
1376
+ -------
1377
+ list[tuple[ConverterInfo, FormatInfo, FormatInfo]] | None
1378
+ Will return `None` if no conversion pathway is possible or if the input and output formats are the same.
1379
+ Otherwise, will return a list of steps in the pathway, each being a tuple of:
1380
+
1381
+ converter_info : ConverterInfo
1382
+ Info on the converter used to perform this step
1383
+ in_format : FormatInfo
1384
+ Input format for this step (if the first step, will be the input format to this function, otherwise will be
1385
+ the output format of the previous step)
1386
+ out_format : FormatInfo
1387
+ Output format from this step (if the last step, will be the output format for this function, otherwise will
1388
+ be the input format of the next step)
1389
+ """
1390
+
1391
+ return get_database().conversions_table.get_conversion_pathway(in_format=in_format,
1392
+ out_format=out_format,
1393
+ only=only)
1394
+
1395
+
1192
1396
  def disambiguate_formats(converter_name: str,
1193
1397
  in_format: str | int | FormatInfo,
1194
1398
  out_format: str | int | FormatInfo) -> tuple[FormatInfo, FormatInfo]:
@@ -1216,11 +1420,11 @@ def disambiguate_formats(converter_name: str,
1216
1420
  """
1217
1421
 
1218
1422
  # Regularize the converter name so we don't worry about case/spacing mismatches
1219
- converter_name = regularize_name(converter_name)
1423
+ converter_reg_name = regularize_name(converter_name)
1220
1424
 
1221
1425
  # Get all possible conversions, and see if we only have one for this converter
1222
1426
  l_possible_conversions = [x for x in get_possible_conversions(in_format, out_format)
1223
- if x[0] == converter_name]
1427
+ if x[0].name == converter_reg_name]
1224
1428
 
1225
1429
  if len(l_possible_conversions) == 1:
1226
1430
  return l_possible_conversions[0][1], l_possible_conversions[0][2]
@@ -0,0 +1,5 @@
1
+ """
2
+ # GUI package
3
+
4
+ This package contains functions, classes, and information used for the Flask-served website (aka GUI)
5
+ """