psdi-data-conversion 0.1.7__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. psdi_data_conversion/app.py +5 -408
  2. psdi_data_conversion/constants.py +11 -7
  3. psdi_data_conversion/converter.py +41 -28
  4. psdi_data_conversion/converters/base.py +18 -13
  5. psdi_data_conversion/database.py +284 -88
  6. psdi_data_conversion/gui/__init__.py +5 -0
  7. psdi_data_conversion/gui/accessibility.py +51 -0
  8. psdi_data_conversion/gui/env.py +239 -0
  9. psdi_data_conversion/gui/get.py +53 -0
  10. psdi_data_conversion/gui/post.py +176 -0
  11. psdi_data_conversion/gui/setup.py +102 -0
  12. psdi_data_conversion/main.py +70 -13
  13. psdi_data_conversion/static/content/convert.htm +105 -74
  14. psdi_data_conversion/static/content/convertato.htm +36 -26
  15. psdi_data_conversion/static/content/convertc2x.htm +39 -26
  16. psdi_data_conversion/static/content/download.htm +5 -5
  17. psdi_data_conversion/static/content/feedback.htm +2 -2
  18. psdi_data_conversion/static/content/header-links.html +2 -2
  19. psdi_data_conversion/static/content/index-versions/header-links.html +2 -2
  20. psdi_data_conversion/static/content/index-versions/psdi-common-header.html +9 -12
  21. psdi_data_conversion/static/content/psdi-common-header.html +9 -12
  22. psdi_data_conversion/static/javascript/accessibility.js +88 -61
  23. psdi_data_conversion/static/javascript/data.js +1 -3
  24. psdi_data_conversion/static/javascript/load_accessibility.js +50 -33
  25. psdi_data_conversion/static/styles/format.css +72 -18
  26. psdi_data_conversion/templates/accessibility.htm +274 -0
  27. psdi_data_conversion/templates/documentation.htm +6 -6
  28. psdi_data_conversion/templates/index.htm +73 -56
  29. psdi_data_conversion/{static/content → templates}/report.htm +28 -10
  30. psdi_data_conversion/testing/conversion_test_specs.py +26 -6
  31. psdi_data_conversion/testing/utils.py +6 -6
  32. {psdi_data_conversion-0.1.7.dist-info → psdi_data_conversion-0.2.0.dist-info}/METADATA +6 -2
  33. {psdi_data_conversion-0.1.7.dist-info → psdi_data_conversion-0.2.0.dist-info}/RECORD +36 -30
  34. psdi_data_conversion/static/content/accessibility.htm +0 -255
  35. {psdi_data_conversion-0.1.7.dist-info → psdi_data_conversion-0.2.0.dist-info}/WHEEL +0 -0
  36. {psdi_data_conversion-0.1.7.dist-info → psdi_data_conversion-0.2.0.dist-info}/entry_points.txt +0 -0
  37. {psdi_data_conversion-0.1.7.dist-info → psdi_data_conversion-0.2.0.dist-info}/licenses/LICENSE +0 -0
@@ -206,8 +206,8 @@ class FileConverter:
206
206
  data: dict[str, Any] | None = None,
207
207
  abort_callback: Callable[[int], None] = abort_raise,
208
208
  use_envvars=False,
209
- upload_dir=const.DEFAULT_UPLOAD_DIR,
210
- download_dir=const.DEFAULT_DOWNLOAD_DIR,
209
+ input_dir=const.DEFAULT_INPUT_DIR,
210
+ output_dir=const.DEFAULT_OUTPUT_DIR,
211
211
  max_file_size=None,
212
212
  no_check=False,
213
213
  log_file: str | None = None,
@@ -235,9 +235,9 @@ class FileConverter:
235
235
  use_envvars : bool
236
236
  If set to True, environment variables will be checked for any that set options for this class and used,
237
237
  default False
238
- upload_dir : str
238
+ input_dir : str
239
239
  The location of input files relative to the current directory
240
- download_dir : str
240
+ output_dir : str
241
241
  The location of output files relative to the current directory
242
242
  max_file_size : float
243
243
  The maximum allowed file size for input/output files, in MB. If 0, will be unlimited. Default 0 (unlimited)
@@ -296,8 +296,8 @@ class FileConverter:
296
296
  # Set member variables directly from input
297
297
  self.in_filename = filename
298
298
  self.to_format = to_format
299
- self.upload_dir = upload_dir
300
- self.download_dir = download_dir
299
+ self.input_dir = input_dir
300
+ self.output_dir = output_dir
301
301
  self.log_file = log_file
302
302
  self.log_mode = log_mode
303
303
  self.log_level = log_level
@@ -328,17 +328,22 @@ class FileConverter:
328
328
  self.err: str | None = None
329
329
  self.quality: str | None = None
330
330
 
331
- # Create directory 'uploads' if not extant.
332
- if not os.path.exists(self.upload_dir):
333
- os.makedirs(self.upload_dir, exist_ok=True)
331
+ # Determine if the filename is fully-qualified, and if not, find it in the upload dir
332
+ if not os.path.exists(self.in_filename):
333
+ qualified_in_filename = os.path.join(self.input_dir, self.in_filename)
334
+ if os.path.exists(qualified_in_filename):
335
+ self.in_filename = qualified_in_filename
336
+ else:
337
+ FileConverterInputException(f"Input file {self.in_filename} not found, either absolute or relative "
338
+ f"to {self.input_dir}")
334
339
 
335
340
  # Create directory 'downloads' if not extant.
336
- if not os.path.exists(self.download_dir):
337
- os.makedirs(self.download_dir, exist_ok=True)
341
+ if not os.path.exists(self.output_dir):
342
+ os.makedirs(self.output_dir, exist_ok=True)
338
343
 
339
344
  self.local_filename = os.path.split(self.in_filename)[1]
340
345
  self.filename_base = os.path.splitext(self.local_filename)[0]
341
- self.out_filename = f"{self.download_dir}/{self.filename_base}.{self.to_format_info.name}"
346
+ self.out_filename = f"{self.output_dir}/{self.filename_base}.{self.to_format_info.name}"
342
347
 
343
348
  # Set up files to log to
344
349
  self._setup_loggers()
@@ -441,7 +446,7 @@ class FileConverter:
441
446
  if self.log_mode == const.LOG_FULL_FORCE:
442
447
  self.output_log = self.log_file
443
448
  else:
444
- self.output_log = os.path.join(self.download_dir, f"{self.filename_base}{const.OUTPUT_LOG_EXT}")
449
+ self.output_log = os.path.join(self.output_dir, f"{self.filename_base}{const.OUTPUT_LOG_EXT}")
445
450
 
446
451
  # If any previous log exists, delete it
447
452
  if os.path.exists(self.output_log):
@@ -9,14 +9,19 @@ from __future__ import annotations
9
9
 
10
10
  import json
11
11
  import os
12
+ from copy import copy
12
13
  from dataclasses import dataclass, field
14
+ from functools import lru_cache
13
15
  from itertools import product
14
16
  from logging import getLogger
15
17
  from typing import Any, Literal, overload
16
18
 
19
+ import igraph as ig
20
+
17
21
  from psdi_data_conversion import constants as const
18
- from psdi_data_conversion.converter import D_SUPPORTED_CONVERTERS, get_registered_converter_class
19
- from psdi_data_conversion.converters.base import FileConverterException
22
+ from psdi_data_conversion.converter import (L_REGISTERED_CONVERTERS, L_SUPPORTED_CONVERTERS,
23
+ get_registered_converter_class)
24
+ from psdi_data_conversion.converters.base import FileConverter, FileConverterException
20
25
  from psdi_data_conversion.utils import regularize_name
21
26
 
22
27
  # Keys for top-level and general items in the database
@@ -121,17 +126,40 @@ class ConverterInfo:
121
126
  The regularized name of the converter
122
127
  parent : DataConversionDatabase
123
128
  The database which this belongs to
129
+ d_single_converter_info : dict[str, int | str]
130
+ The dict within the database file which describes this converter
124
131
  d_data : dict[str, Any]
125
132
  The loaded database dict
126
133
  """
127
134
 
128
135
  self.name = regularize_name(name)
136
+ """The regularized name of the converter"""
137
+
138
+ self.converter_class: type[FileConverter]
139
+ """The class used to perform conversions with this converter"""
140
+
141
+ self.pretty_name: str
142
+ """The name of the converter, properly spaced and capitalized"""
143
+
144
+ try:
145
+ self.converter_class = get_registered_converter_class(self.name)
146
+ self.pretty_name = self.converter_class.name
147
+ except KeyError:
148
+ self.converter_class = None
149
+ self.pretty_name = name
150
+
129
151
  self.parent = parent
152
+ """The parent database"""
130
153
 
131
154
  # Get info about the converter from the database
132
155
  self.id: int = d_single_converter_info.get(DB_ID_KEY, -1)
156
+ """The converter's ID"""
157
+
133
158
  self.description: str = d_single_converter_info.get(DB_DESC_KEY, "")
159
+ """A description of the converter"""
160
+
134
161
  self.url: str = d_single_converter_info.get(DB_URL_KEY, "")
162
+ """The official URL for the converter"""
135
163
 
136
164
  # Get necessary info about the converter from the class
137
165
  try:
@@ -403,6 +431,12 @@ class FormatInfo:
403
431
  """Class providing information on a file format from the PSDI Data Conversion database
404
432
  """
405
433
 
434
+ D_PROPERTY_ATTRS = {const.QUAL_COMP_KEY: const.QUAL_COMP_LABEL,
435
+ const.QUAL_CONN_KEY: const.QUAL_CONN_LABEL,
436
+ const.QUAL_2D_KEY: const.QUAL_2D_LABEL,
437
+ const.QUAL_3D_KEY: const.QUAL_3D_LABEL}
438
+ """A dict of attrs of this class which describe properties that a format may or may not have"""
439
+
406
440
  def __init__(self,
407
441
  name: str,
408
442
  parent: DataConversionDatabase,
@@ -448,6 +482,9 @@ class FormatInfo:
448
482
  self.three_dim = d_single_format_info.get(DB_FORMAT_3D_KEY)
449
483
  """Whether or not this format stores 3D structural information"""
450
484
 
485
+ self._lower_name: str = self.name.lower()
486
+ """The format name all in lower-case"""
487
+
451
488
  self._disambiguated_name: str | None = None
452
489
 
453
490
  @property
@@ -455,12 +492,13 @@ class FormatInfo:
455
492
  """A unique name for this format which can be used to distinguish it from others which share the same extension,
456
493
  by appending the name of each with a unique index"""
457
494
  if self._disambiguated_name is None:
458
- l_formats_with_same_name = [x for x in self.parent.l_format_info if x and x.name == self.name]
495
+ l_formats_with_same_name = [x for x in self.parent.l_format_info
496
+ if x and x._lower_name == self._lower_name]
459
497
  if len(l_formats_with_same_name) == 1:
460
- self._disambiguated_name = self.name
498
+ self._disambiguated_name = self._lower_name
461
499
  else:
462
500
  index_of_this = [i for i, x in enumerate(l_formats_with_same_name) if self is x][0]
463
- self._disambiguated_name = f"{self.name}-{index_of_this}"
501
+ self._disambiguated_name = f"{self._lower_name}-{index_of_this}"
464
502
  return self._disambiguated_name
465
503
 
466
504
  def __str__(self):
@@ -486,7 +524,7 @@ class PropertyConversionInfo:
486
524
  def __post_init__(self):
487
525
  """Set the label and note based on input/output status
488
526
  """
489
- self.label = const.D_QUAL_LABELS[self.key]
527
+ self.label = FormatInfo.D_PROPERTY_ATTRS[self.key]
490
528
 
491
529
  if self.input_supported is None and self.output_supported is None:
492
530
  self.note = const.QUAL_NOTE_BOTH_UNKNOWN
@@ -584,26 +622,56 @@ class ConversionsTable:
584
622
  # Store references to needed data
585
623
  self._l_converts_to = l_converts_to
586
624
 
587
- # Build the conversion table, indexed Converter, Input Format, Output Format - note that each of these is
588
- # 1-indexed, so we add 1 to each of the lengths here
589
- num_converters = len(parent.converters)
625
+ # Build the conversion graphs - each format is a vertex, each conversion is an edge
590
626
  num_formats = len(parent.formats)
591
627
 
592
- self.table = [[[0 for k in range(num_formats+1)] for j in range(num_formats+1)]
593
- for i in range(num_converters+1)]
594
-
595
- for possible_conversion in l_converts_to:
596
-
597
- try:
598
- conv_id: int = possible_conversion[DB_CONV_ID_KEY]
599
- in_id: int = possible_conversion[DB_IN_ID_KEY]
600
- out_id: int = possible_conversion[DB_OUT_ID_KEY]
601
- except KeyError:
602
- raise FileConverterDatabaseException(
603
- f"Malformed 'converts_to' entry in database: {possible_conversion}")
628
+ l_supported_conversions = [x for x in l_converts_to if
629
+ self.parent.get_converter_info(x[DB_CONV_ID_KEY]).name in L_SUPPORTED_CONVERTERS]
630
+ l_registered_conversions = [x for x in l_supported_conversions if
631
+ self.parent.get_converter_info(x[DB_CONV_ID_KEY]).name in L_REGISTERED_CONVERTERS]
632
+
633
+ # We make separate graphs for all known conversions, all supported conversions, and all registered conversions
634
+ self.graph: ig.Graph
635
+ self.supported_graph: ig.Graph
636
+ self.registered_graph: ig.Graph
637
+
638
+ for support_type, l_conversions in (("", l_converts_to),
639
+ ("supported_", l_supported_conversions),
640
+ ("registered_", l_registered_conversions)):
641
+
642
+ setattr(self, support_type+"graph",
643
+ ig.Graph(n=num_formats,
644
+ directed=True,
645
+ # Each vertex stores the disambiguated name of the format
646
+ vertex_attrs={DB_NAME_KEY: [x.disambiguated_name if x is not None else None
647
+ for x in parent.l_format_info]},
648
+ edges=[(x[DB_IN_ID_KEY], x[DB_OUT_ID_KEY]) for x in l_conversions],
649
+ # Each edge stores the id and name of the converter used for the conversion
650
+ edge_attrs={DB_CONV_ID_KEY: [x[DB_CONV_ID_KEY] for x in l_conversions],
651
+ DB_NAME_KEY: [self.parent.get_converter_info(x[DB_CONV_ID_KEY]).name
652
+ for x in l_conversions]}))
653
+
654
+ def _get_desired_graph(self,
655
+ only: Literal["all"] | Literal["supported"] | Literal["registered"] = "all") -> ig.Graph:
656
+ if only == "all":
657
+ return self.graph
658
+ elif only == "supported":
659
+ return self.supported_graph
660
+ elif only == "registered":
661
+ return self.registered_graph
662
+ else:
663
+ raise ValueError(f"Invalid value \"{only}\" for keyword argument `only`. Allowed values are \"all\" "
664
+ "(default), \"supported\", and \"registered\".")
604
665
 
605
- self.table[conv_id][in_id][out_id] = 1
666
+ def _get_possible_converters(self, in_format_info: FormatInfo, out_format_info: FormatInfo,
667
+ only: Literal["all"] | Literal["supported"] | Literal["registered"] = "all"):
668
+ """Get a list of all converters which can convert from one format to another
669
+ """
670
+ graph = self._get_desired_graph(only)
671
+ l_edges = graph.es.select(_source=in_format_info.id, _target=out_format_info.id)
672
+ return [x[DB_NAME_KEY] for x in l_edges]
606
673
 
674
+ @lru_cache(maxsize=None)
607
675
  def get_conversion_quality(self,
608
676
  converter_name: str,
609
677
  in_format: str | int,
@@ -633,14 +701,12 @@ class ConversionsTable:
633
701
  else:
634
702
  which_format = 0
635
703
 
636
- # Get info about the converter and formats
637
- conv_id = self.parent.get_converter_info(converter_name).id
638
- in_info = self.parent.get_format_info(in_format, which_format)
639
- out_info: int = self.parent.get_format_info(out_format, which_format)
704
+ # Get the full format info for each format
705
+ in_format_info = self.parent.get_format_info(in_format, which_format)
706
+ out_format_info: int = self.parent.get_format_info(out_format, which_format)
640
707
 
641
708
  # First check if the conversion is possible
642
- success_flag = self.table[conv_id][in_info.id][out_info.id]
643
- if not success_flag:
709
+ if converter_name not in self._get_possible_converters(in_format_info, out_format_info):
644
710
  return None
645
711
 
646
712
  # The conversion is possible. Now determine how many properties of the output format are not in the input
@@ -649,9 +715,9 @@ class ConversionsTable:
649
715
  num_new_props = 0
650
716
  any_unknown = False
651
717
  d_prop_conversion_info: dict[str, PropertyConversionInfo] = {}
652
- for prop in const.D_QUAL_LABELS:
653
- in_prop: bool | None = getattr(in_info, prop)
654
- out_prop: bool | None = getattr(out_info, prop)
718
+ for prop in FormatInfo.D_PROPERTY_ATTRS:
719
+ in_prop: bool | None = getattr(in_format_info, prop)
720
+ out_prop: bool | None = getattr(out_format_info, prop)
655
721
 
656
722
  d_prop_conversion_info[prop] = PropertyConversionInfo(prop, in_prop, out_prop)
657
723
 
@@ -699,7 +765,9 @@ class ConversionsTable:
699
765
 
700
766
  def get_possible_conversions(self,
701
767
  in_format: str | int,
702
- out_format: str | int) -> list[tuple[str, FormatInfo, FormatInfo]]:
768
+ out_format: str | int,
769
+ only: Literal["all"] | Literal["supported"] | Literal["registered"] = "all"
770
+ ) -> list[tuple[ConverterInfo, FormatInfo, FormatInfo]]:
703
771
  """Get a list of converters which can perform a conversion from one format to another, disambiguating in the
704
772
  case of ambiguous formats and providing IDs for input/output formats for possible conversions
705
773
 
@@ -712,10 +780,10 @@ class ConversionsTable:
712
780
 
713
781
  Returns
714
782
  -------
715
- list[tuple[str, FormatInfo, FormatInfo]]
716
- A list of tuples, where each tuple's first item is the name of a converter which can perform a matching
717
- conversion, the second is the info of the input format for this conversion, and the third is the info of the
718
- output format
783
+ list[tuple[ConverterInfo, FormatInfo, FormatInfo]]
784
+ A list of tuples, where each tuple's first item is the ConverterInfo of a converter which can perform a
785
+ matching conversion, the second is the info of the input format for this conversion, and the third is the
786
+ info of the output format
719
787
  """
720
788
  l_in_format_infos = self.parent.get_format_info(in_format, which="all")
721
789
  l_out_format_infos = self.parent.get_format_info(out_format, which="all")
@@ -726,20 +794,107 @@ class ConversionsTable:
726
794
  # Iterate over all possible combinations of input and output formats
727
795
  for in_format_info, out_format_info in product(l_in_format_infos, l_out_format_infos):
728
796
 
729
- # Slice the table to get a list of the success for this conversion for each converter
730
- l_converter_success = [x[in_format_info.id][out_format_info.id] for x in self.table]
731
-
732
- # Filter for possible conversions and get the converter name and degree-of-success string
733
- # for each possible conversion
734
- l_converter_names = [self.parent.get_converter_info(converter_id).name
735
- for converter_id, possible_flag
736
- in enumerate(l_converter_success) if possible_flag > 0]
797
+ # Filter for converters which can perform this conversion
798
+ l_converter_names = self._get_possible_converters(in_format_info, out_format_info, only=only)
737
799
 
738
800
  for converter_name in l_converter_names:
739
- l_possible_conversions.append((converter_name, in_format_info, out_format_info))
801
+ l_possible_conversions.append((self.parent.get_converter_info(converter_name),
802
+ in_format_info, out_format_info))
740
803
 
741
804
  return l_possible_conversions
742
805
 
806
+ @lru_cache
807
+ def _get_shared_attrs(self, source_format, target_format):
808
+ """Get a list of attributes that both the source and target format feature
809
+ """
810
+ source_format_info = self.parent.get_format_info(source_format)
811
+ target_format_info = self.parent.get_format_info(target_format)
812
+
813
+ l_shared_attrs: list[str] = []
814
+
815
+ for attr in FormatInfo.D_PROPERTY_ATTRS:
816
+ if getattr(source_format_info, attr) and getattr(target_format_info, attr):
817
+ l_shared_attrs.append(attr)
818
+
819
+ return l_shared_attrs
820
+
821
+ def _get_info_loss(self, path):
822
+ """Get the number of attributes in both the first and last format which would be lost if a conversion path
823
+ is traversed
824
+ """
825
+ l_shared_attrs = self._get_shared_attrs(path[0], path[-1])
826
+
827
+ if len(l_shared_attrs) == 0:
828
+ return 0
829
+
830
+ l_kept_attrs = copy(l_shared_attrs)
831
+ for i in range(len(path)-1):
832
+ target_format_info = self.parent.get_format_info(i+1)
833
+
834
+ # Check if each attr still in the shared list is kept here
835
+ for attr in l_kept_attrs:
836
+ if not getattr(target_format_info, attr):
837
+ l_kept_attrs.remove(attr)
838
+ if len(l_kept_attrs) == 0:
839
+ break
840
+
841
+ num_lost_attrs = len(l_shared_attrs) - len(l_kept_attrs)
842
+
843
+ return num_lost_attrs
844
+
845
+ def get_conversion_pathway(self,
846
+ in_format: str | int | FormatInfo,
847
+ out_format: str | int | FormatInfo,
848
+ only: Literal["all"] | Literal["supported"] | Literal["registered"] = "all"
849
+ ) -> list[tuple[ConverterInfo, FormatInfo, FormatInfo]] | None:
850
+ """Gets a pathway to convert from one format to another
851
+ """
852
+
853
+ in_format_info = self.parent.get_format_info(in_format)
854
+ out_format_info = self.parent.get_format_info(out_format)
855
+
856
+ # Check if the formats are the same
857
+ if in_format_info is out_format_info:
858
+ return None
859
+
860
+ # First check if direct conversion is possible
861
+ l_possible_direct_conversions = self.get_possible_conversions(in_format=in_format, out_format=out_format)
862
+ if l_possible_direct_conversions:
863
+ # TODO: When there's some better measure of conversion quality, use it to choose which converter to use
864
+ return [l_possible_direct_conversions[0]]
865
+
866
+ # Query the graph for the shortest paths to perform this conversion
867
+ graph: ig.Graph = self._get_desired_graph(only)
868
+ l_paths: list[list[int]] = graph.get_shortest_paths(in_format_info.id, to=out_format_info.id)
869
+
870
+ # Check if any paths are possible
871
+ if not l_paths:
872
+ return None
873
+
874
+ # Check each path to find the first which doesn't lose any unnecessary info, or else the one which loses the
875
+ # least
876
+ best_path: list[int] | None = None
877
+ best_info_loss: int | None = None
878
+ for path in l_paths:
879
+ info_loss = self._get_info_loss(path)
880
+ if best_info_loss is None or info_loss < best_info_loss:
881
+ best_path = path
882
+ best_info_loss = info_loss
883
+ if best_info_loss == 0:
884
+ break
885
+
886
+ # Output the best path in the desired format
887
+ l_steps: list[tuple[str, FormatInfo, FormatInfo]] = []
888
+ for i in range(len(best_path)-1):
889
+ source_id: int = best_path[i]
890
+ target_id: int = best_path[i+1]
891
+ converter_name: str = graph.es.select(_source=source_id, _target=target_id)[0][DB_NAME_KEY]
892
+ l_steps.append((get_converter_info(converter_name),
893
+ self.parent.get_format_info(source_id),
894
+ self.parent.get_format_info(target_id)))
895
+
896
+ return l_steps
897
+
743
898
  def get_possible_formats(self, converter_name: str) -> tuple[list[FormatInfo], list[FormatInfo]]:
744
899
  """Get a list of input and output formats that a given converter supports
745
900
 
@@ -754,21 +909,10 @@ class ConversionsTable:
754
909
  A tuple of a list of the supported input formats and a list of the supported output formats
755
910
  """
756
911
  conv_id: int = self.parent.get_converter_info(converter_name).id
757
- ll_in_out_format_success = self.table[conv_id]
758
-
759
- # Filter for possible input formats by checking if at least one output format for each has a degree of success
760
- # index greater than 0, and stored the filtered lists where the input format is possible so we only need to
761
- # check them for possible output formats
762
- (l_possible_in_format_ids,
763
- ll_filtered_in_out_format_success) = zip(*[(i, l_out_format_success) for i, l_out_format_success
764
- in enumerate(ll_in_out_format_success)
765
- if sum(l_out_format_success) > 0])
766
-
767
- # As with input IDs, filter for output IDs where at least one input format has a degree of success index greater
768
- # than 0. A bit more complicated for the second index, forcing us to do list comprehension to fetch a list
769
- # across the table before summing
770
- l_possible_out_format_ids = [j for j, _ in enumerate(ll_filtered_in_out_format_success[0]) if
771
- sum([x[j] for x in ll_filtered_in_out_format_success]) > 0]
912
+
913
+ l_conversion_edges = self.graph.es.select(**{DB_CONV_ID_KEY: conv_id})
914
+ l_possible_in_format_ids = list({x.source for x in l_conversion_edges})
915
+ l_possible_out_format_ids = list({x.target for x in l_conversion_edges})
772
916
 
773
917
  # Get the name for each format ID, and return lists of the names
774
918
  return ([self.parent.get_format_info(x) for x in l_possible_in_format_ids],
@@ -875,9 +1019,9 @@ class DataConversionDatabase:
875
1019
  self._l_format_info: list[FormatInfo | None] = [None] * (max_id+1)
876
1020
 
877
1021
  for d_single_format_info in self.formats:
878
- name: str = d_single_format_info[DB_FORMAT_EXT_KEY]
1022
+ lc_name: str = d_single_format_info[DB_FORMAT_EXT_KEY]
879
1023
 
880
- format_info = FormatInfo(name=name,
1024
+ format_info = FormatInfo(name=lc_name,
881
1025
  parent=self,
882
1026
  d_single_format_info=d_single_format_info)
883
1027
 
@@ -887,24 +1031,17 @@ class DataConversionDatabase:
887
1031
  self._conversions_table = ConversionsTable(l_converts_to=self.converts_to,
888
1032
  parent=self)
889
1033
 
890
- # Use the conversions table to prune any formats which have no valid conversions
1034
+ # Use the conversions graph to prune any formats which have no valid conversions
891
1035
 
892
1036
  # Get a slice of the table which only includes supported converters
893
- l_supported_converter_ids = [self.get_converter_info(x).id for x in D_SUPPORTED_CONVERTERS]
894
- supported_table = [self._conversions_table.table[x] for x in l_supported_converter_ids]
1037
+ supported_graph = self._conversions_table.supported_graph
895
1038
 
896
1039
  for format_id, format_info in enumerate(self._l_format_info):
897
1040
  if not format_info:
898
1041
  continue
899
1042
 
900
- # Check if the format is supported as the input format for any conversion
901
- ll_possible_from_conversions = [x[format_id] for x in supported_table]
902
- if sum([sum(x) for x in ll_possible_from_conversions]) > 0:
903
- continue
904
-
905
- # Check if the format is supported as the output format for any conversion
906
- ll_possible_to_conversions = [[y[format_id] for y in x] for x in supported_table]
907
- if sum([sum(x) for x in ll_possible_to_conversions]) > 0:
1043
+ # Check if the format is supported as the input or output format for any conversion
1044
+ if supported_graph.degree(format_id) > 0:
908
1045
  continue
909
1046
 
910
1047
  # If we get here, the format isn't supported for any conversions, so remove it from our list
@@ -918,14 +1055,14 @@ class DataConversionDatabase:
918
1055
  if not format_info:
919
1056
  continue
920
1057
 
921
- name = format_info.name
1058
+ lc_name = format_info.name.lower()
922
1059
 
923
1060
  # Each name may correspond to multiple formats, so we use a list for each entry to list all possible
924
1061
  # formats for each name
925
- if name not in self._d_format_info:
926
- self._d_format_info[name] = []
1062
+ if lc_name not in self._d_format_info:
1063
+ self._d_format_info[lc_name] = []
927
1064
 
928
- self._d_format_info[name].append(format_info)
1065
+ self._d_format_info[lc_name].append(format_info)
929
1066
 
930
1067
  def get_converter_info(self, converter_name_or_id: str | int) -> ConverterInfo:
931
1068
  """Get a converter's info from either its name or ID
@@ -987,6 +1124,9 @@ class DataConversionDatabase:
987
1124
  if format_name_or_id.startswith("."):
988
1125
  format_name_or_id = format_name_or_id[1:]
989
1126
 
1127
+ # Convert the format name to lower-case to handle it case-insensitively
1128
+ format_name_or_id = format_name_or_id.lower()
1129
+
990
1130
  # Check for a hyphen in the format, which indicates a preference from the user as to which, overriding the
991
1131
  # `which` kwarg
992
1132
  if "-" in format_name_or_id:
@@ -1050,6 +1190,22 @@ class DataConversionDatabase:
1050
1190
  _database: DataConversionDatabase | None = None
1051
1191
 
1052
1192
 
1193
+ def get_database_path() -> str:
1194
+ """Get the absolute path to the database file
1195
+
1196
+ Returns
1197
+ -------
1198
+ str
1199
+ """
1200
+
1201
+ # For an interactive shell, __file__ won't be defined for this module, so use the constants module instead
1202
+ reference_file = os.path.realpath(const.__file__)
1203
+
1204
+ qualified_database_filename = os.path.join(os.path.dirname(reference_file), const.DATABASE_FILENAME)
1205
+
1206
+ return qualified_database_filename
1207
+
1208
+
1053
1209
  def load_database() -> DataConversionDatabase:
1054
1210
  """Load and return a new instance of the data conversion database from the JSON database file in this package. This
1055
1211
  function should not be called directly unless you specifically need a new instance of the database object and can't
@@ -1061,12 +1217,7 @@ def load_database() -> DataConversionDatabase:
1061
1217
  """
1062
1218
 
1063
1219
  # Find and load the database JSON file
1064
-
1065
- # For an interactive shell, __file__ won't be defined for this module, so use the constants module instead
1066
- reference_file = os.path.realpath(const.__file__)
1067
-
1068
- qualified_database_filename = os.path.join(os.path.dirname(reference_file), const.DATABASE_FILENAME)
1069
- d_data: dict = json.load(open(qualified_database_filename, "r"))
1220
+ d_data: dict = json.load(open(get_database_path(), "r"))
1070
1221
 
1071
1222
  return DataConversionDatabase(d_data)
1072
1223
 
@@ -1166,7 +1317,7 @@ def get_conversion_quality(converter_name: str,
1166
1317
 
1167
1318
 
1168
1319
  def get_possible_conversions(in_format: str | int,
1169
- out_format: str | int) -> list[tuple[str, FormatInfo, FormatInfo]]:
1320
+ out_format: str | int) -> list[tuple[ConverterInfo, FormatInfo, FormatInfo]]:
1170
1321
  """Get a list of converters which can perform a conversion from one format to another and disambiguate in the case
1171
1322
  of ambiguous input/output formats
1172
1323
 
@@ -1179,8 +1330,8 @@ def get_possible_conversions(in_format: str | int,
1179
1330
 
1180
1331
  Returns
1181
1332
  -------
1182
- list[tuple[str, FormatInfo, FormatInfo]]
1183
- A list of tuples, where each tuple's first item is the name of a converter which can perform a matching
1333
+ list[tuple[ConverterInfo, FormatInfo, FormatInfo]]
1334
+ A list of tuples, where each tuple's first item is the ConverterInfo of a converter which can perform a matching
1184
1335
  conversion, the second is the info of the input format for this conversion, and the third is the info of the
1185
1336
  output format
1186
1337
  """
@@ -1189,6 +1340,51 @@ def get_possible_conversions(in_format: str | int,
1189
1340
  out_format=out_format)
1190
1341
 
1191
1342
 
1343
+ def get_conversion_pathway(in_format: str | int | FormatInfo,
1344
+ out_format: str | int | FormatInfo,
1345
+ only: Literal["all"] | Literal["supported"] | Literal["registered"] = "all"
1346
+ ) -> list[tuple[ConverterInfo, FormatInfo, FormatInfo]] | None:
1347
+ """Get a list of conversions that can be performed to convert one format to another. This is primarily used when a
1348
+ direct conversion is not supported by any individual converter. Only one possible pathway will be returned,
1349
+ prioritising pathways which do not lose lose and then re-extrapolate any information stored by some formats and not
1350
+ others along the path.
1351
+
1352
+ Parameters
1353
+ ----------
1354
+ in_format : str | int
1355
+ The input file format. For this function, the format must be defined uniquely, either by using a disambiguated
1356
+ extension, ID, or FormatInfo
1357
+ out_format : str | int
1358
+ The output file format. For this function, the format must be defined uniquely, either by using a disambiguated
1359
+ extension, ID, or FormatInfo
1360
+ only : Literal["all"] | Literal["supported"] | Literal["registered"], optional
1361
+ Which converters to limit the pathway search to:
1362
+ - "all" (default): All known converters
1363
+ - "supported": Only converters supported by this utility, even if not currently available (e.g. they don't work
1364
+ on your OS)
1365
+ - "registered": Only converters supported by this utility and currently available
1366
+
1367
+ Returns
1368
+ -------
1369
+ list[tuple[ConverterInfo, FormatInfo, FormatInfo]] | None
1370
+ Will return `None` if no conversion pathway is possible or if the input and output formats are the same.
1371
+ Otherwise, will return a list of steps in the pathway, each being a tuple of:
1372
+
1373
+ converter_info : ConverterInfo
1374
+ Info on the converter used to perform this step
1375
+ in_format : FormatInfo
1376
+ Input format for this step (if the first step, will be the input format to this function, otherwise will be
1377
+ the output format of the previous step)
1378
+ out_format : FormatInfo
1379
+ Output format from this step (if the last step, will be the output format for this function, otherwise will
1380
+ be the input format of the next step)
1381
+ """
1382
+
1383
+ return get_database().conversions_table.get_conversion_pathway(in_format=in_format,
1384
+ out_format=out_format,
1385
+ only=only)
1386
+
1387
+
1192
1388
  def disambiguate_formats(converter_name: str,
1193
1389
  in_format: str | int | FormatInfo,
1194
1390
  out_format: str | int | FormatInfo) -> tuple[FormatInfo, FormatInfo]:
@@ -1216,11 +1412,11 @@ def disambiguate_formats(converter_name: str,
1216
1412
  """
1217
1413
 
1218
1414
  # Regularize the converter name so we don't worry about case/spacing mismatches
1219
- converter_name = regularize_name(converter_name)
1415
+ converter_reg_name = regularize_name(converter_name)
1220
1416
 
1221
1417
  # Get all possible conversions, and see if we only have one for this converter
1222
1418
  l_possible_conversions = [x for x in get_possible_conversions(in_format, out_format)
1223
- if x[0] == converter_name]
1419
+ if x[0].name == converter_reg_name]
1224
1420
 
1225
1421
  if len(l_possible_conversions) == 1:
1226
1422
  return l_possible_conversions[0][1], l_possible_conversions[0][2]
@@ -0,0 +1,5 @@
1
+ """
2
+ # GUI package
3
+
4
+ This package contains functions, classes, and information used for the Flask-served website (aka GUI)
5
+ """