psdi-data-conversion 0.0.37__py3-none-any.whl → 0.0.39__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. psdi_data_conversion/app.py +64 -14
  2. psdi_data_conversion/constants.py +6 -5
  3. psdi_data_conversion/converter.py +20 -13
  4. psdi_data_conversion/converters/base.py +75 -68
  5. psdi_data_conversion/converters/c2x.py +14 -0
  6. psdi_data_conversion/converters/openbabel.py +12 -11
  7. psdi_data_conversion/database.py +361 -115
  8. psdi_data_conversion/dist.py +2 -1
  9. psdi_data_conversion/file_io.py +1 -2
  10. psdi_data_conversion/log_utility.py +1 -1
  11. psdi_data_conversion/main.py +152 -70
  12. psdi_data_conversion/static/content/index-versions/psdi-common-footer.html +12 -8
  13. psdi_data_conversion/static/content/psdi-common-footer.html +12 -8
  14. psdi_data_conversion/static/data/data.json +617 -3
  15. psdi_data_conversion/static/javascript/convert.js +54 -6
  16. psdi_data_conversion/static/javascript/convert_common.js +16 -2
  17. psdi_data_conversion/static/javascript/data.js +36 -4
  18. psdi_data_conversion/static/javascript/format.js +22 -9
  19. psdi_data_conversion/static/styles/format.css +7 -0
  20. psdi_data_conversion/templates/index.htm +57 -48
  21. psdi_data_conversion/testing/constants.py +3 -0
  22. psdi_data_conversion/testing/conversion_callbacks.py +4 -3
  23. psdi_data_conversion/testing/conversion_test_specs.py +44 -20
  24. psdi_data_conversion/testing/gui.py +362 -294
  25. psdi_data_conversion/testing/utils.py +38 -19
  26. {psdi_data_conversion-0.0.37.dist-info → psdi_data_conversion-0.0.39.dist-info}/METADATA +88 -4
  27. {psdi_data_conversion-0.0.37.dist-info → psdi_data_conversion-0.0.39.dist-info}/RECORD +30 -30
  28. {psdi_data_conversion-0.0.37.dist-info → psdi_data_conversion-0.0.39.dist-info}/WHEEL +0 -0
  29. {psdi_data_conversion-0.0.37.dist-info → psdi_data_conversion-0.0.39.dist-info}/entry_points.txt +0 -0
  30. {psdi_data_conversion-0.0.37.dist-info → psdi_data_conversion-0.0.39.dist-info}/licenses/LICENSE +0 -0
@@ -7,14 +7,15 @@ Python module provide utilities for accessing the converter database
7
7
 
8
8
  from __future__ import annotations
9
9
 
10
- from dataclasses import dataclass, field
11
10
  import json
12
- from logging import getLogger
13
11
  import os
14
- from typing import Any
12
+ from dataclasses import dataclass, field
13
+ from itertools import product
14
+ from logging import getLogger
15
+ from typing import Any, Literal, overload
15
16
 
16
17
  from psdi_data_conversion import constants as const
17
- from psdi_data_conversion.converter import D_REGISTERED_CONVERTERS
18
+ from psdi_data_conversion.converter import D_REGISTERED_CONVERTERS, D_SUPPORTED_CONVERTERS
18
19
  from psdi_data_conversion.converters.base import FileConverterException
19
20
 
20
21
  # Keys for top-level and general items in the database
@@ -31,6 +32,7 @@ DB_URL_KEY = "url"
31
32
 
32
33
  # Keys for format general info in the database
33
34
  DB_FORMAT_EXT_KEY = "extension"
35
+ DB_FORMAT_C2X_KEY = "format"
34
36
  DB_FORMAT_NOTE_KEY = "note"
35
37
  DB_FORMAT_COMP_KEY = "composition"
36
38
  DB_FORMAT_CONN_KEY = "connections"
@@ -418,15 +420,55 @@ class FormatInfo:
418
420
 
419
421
  # Load attributes from input
420
422
  self.name = name
423
+ """The name of this format"""
424
+
421
425
  self.parent = parent
426
+ """The database which this format belongs to"""
422
427
 
423
428
  # Load attributes from the database
424
429
  self.id: int = d_single_format_info.get(DB_ID_KEY, -1)
430
+ """The ID of this format"""
431
+
432
+ self.c2x_format: str = d_single_format_info.get(DB_FORMAT_C2X_KEY)
433
+ """The name of this format as the c2x converter expects it"""
434
+
425
435
  self.note: str = d_single_format_info.get(DB_FORMAT_NOTE_KEY, "")
436
+ """The description of this format"""
437
+
426
438
  self.composition = d_single_format_info.get(DB_FORMAT_COMP_KEY)
439
+ """Whether or not this format stores composition information"""
440
+
427
441
  self.connections = d_single_format_info.get(DB_FORMAT_CONN_KEY)
442
+ """Whether or not this format stores connections information"""
443
+
428
444
  self.two_dim = d_single_format_info.get(DB_FORMAT_2D_KEY)
445
+ """Whether or not this format stores 2D structural information"""
446
+
429
447
  self.three_dim = d_single_format_info.get(DB_FORMAT_3D_KEY)
448
+ """Whether or not this format stores 3D structural information"""
449
+
450
+ self._disambiguated_name: str | None = None
451
+
452
+ @property
453
+ def disambiguated_name(self) -> str:
454
+ """A unique name for this format which can be used to distinguish it from others which share the same extension,
455
+ by appending the name of each with a unique index"""
456
+ if self._disambiguated_name is None:
457
+ l_formats_with_same_name = [x for x in self.parent.l_format_info if x and x.name == self.name]
458
+ if len(l_formats_with_same_name) == 1:
459
+ self._disambiguated_name = self.name
460
+ else:
461
+ index_of_this = [i for i, x in enumerate(l_formats_with_same_name) if self is x][0]
462
+ self._disambiguated_name = f"{self.name}-{index_of_this}"
463
+ return self._disambiguated_name
464
+
465
+ def __str__(self):
466
+ """When cast to string, convert to the name (extension) of the format"""
467
+ return self.name
468
+
469
+ def __int__(self):
470
+ """When cast to int, return the ID of the format"""
471
+ return self.id
430
472
 
431
473
 
432
474
  @dataclass
@@ -530,7 +572,6 @@ class ConversionsTable:
530
572
  Raises
531
573
  ------
532
574
  FileConverterDatabaseException
533
- _description_
534
575
  """
535
576
 
536
577
  self.parent = parent
@@ -543,8 +584,8 @@ class ConversionsTable:
543
584
  num_converters = len(parent.converters)
544
585
  num_formats = len(parent.formats)
545
586
 
546
- self._table = [[[0 for k in range(num_formats+1)] for j in range(num_formats+1)]
547
- for i in range(num_converters+1)]
587
+ self.table = [[[0 for k in range(num_formats+1)] for j in range(num_formats+1)]
588
+ for i in range(num_converters+1)]
548
589
 
549
590
  for possible_conversion in l_converts_to:
550
591
 
@@ -556,22 +597,22 @@ class ConversionsTable:
556
597
  raise FileConverterDatabaseException(
557
598
  f"Malformed 'converts_to' entry in database: {possible_conversion}")
558
599
 
559
- self._table[conv_id][in_id][out_id] = 1
600
+ self.table[conv_id][in_id][out_id] = 1
560
601
 
561
602
  def get_conversion_quality(self,
562
603
  converter_name: str,
563
- in_format: str,
564
- out_format: str) -> ConversionQualityInfo | None:
604
+ in_format: str | int,
605
+ out_format: str | int) -> ConversionQualityInfo | None:
565
606
  """Get an indication of the quality of a conversion from one format to another, or if it's not possible
566
607
 
567
608
  Parameters
568
609
  ----------
569
610
  converter_name : str
570
611
  The name of the converter
571
- in_format : str
572
- The extension of the input file format
573
- out_format : str
574
- The extension of the output file format
612
+ in_format : str | int
613
+ The extension or ID of the input file format
614
+ out_format : str | int
615
+ The extension or ID of the output file format
575
616
 
576
617
  Returns
577
618
  -------
@@ -580,12 +621,20 @@ class ConversionsTable:
580
621
  `ConversionQualityInfo` object with info on the conversion
581
622
  """
582
623
 
583
- conv_id: int = self.parent.get_converter_info(converter_name).id
584
- in_info = self.parent.get_format_info(in_format)
585
- out_info: int = self.parent.get_format_info(out_format)
624
+ # Check if this converter deals with ambiguous formats, so we know if we need to be strict about getting format
625
+ # info
626
+ if D_REGISTERED_CONVERTERS[converter_name].supports_ambiguous_extensions:
627
+ which_format = None
628
+ else:
629
+ which_format = 0
630
+
631
+ # Get info about the converter and formats
632
+ conv_id = self.parent.get_converter_info(converter_name).id
633
+ in_info = self.parent.get_format_info(in_format, which_format)
634
+ out_info: int = self.parent.get_format_info(out_format, which_format)
586
635
 
587
636
  # First check if the conversion is possible
588
- success_flag = self._table[conv_id][in_info.id][out_info.id]
637
+ success_flag = self.table[conv_id][in_info.id][out_info.id]
589
638
  if not success_flag:
590
639
  return None
591
640
 
@@ -643,40 +692,50 @@ class ConversionsTable:
643
692
  details=details,
644
693
  d_prop_conversion_info=d_prop_conversion_info)
645
694
 
646
- def get_possible_converters(self,
647
- in_format: str,
648
- out_format: str) -> list[str]:
649
- """Get a list of converters which can perform a conversion from one format to another and the degree of success
650
- with each of these converters
695
+ def get_possible_conversions(self,
696
+ in_format: str | int,
697
+ out_format: str | int) -> list[tuple[str, FormatInfo, FormatInfo]]:
698
+ """Get a list of converters which can perform a conversion from one format to another, disambiguating in the
699
+ case of ambiguous formats and providing IDs for input/output formats for possible conversions
651
700
 
652
701
  Parameters
653
702
  ----------
654
- in_format : str
655
- The extension of the input file format
656
- out_format : str
657
- The extension of the output file format
703
+ in_format : str | int
704
+ The extension or ID of the input file format
705
+ out_format : str | int
706
+ The extension or ID of the output file format
658
707
 
659
708
  Returns
660
709
  -------
661
- list[tuple[str, str]]
662
- A list of tuples, where each tuple's first item is the name of a converter which can perform this
663
- conversion, and the second item is the degree of success for the conversion
710
+ list[tuple[str, FormatInfo, FormatInfo]]
711
+ A list of tuples, where each tuple's first item is the name of a converter which can perform a matching
712
+ conversion, the second is the info of the input format for this conversion, and the third is the info of the
713
+ output format
664
714
  """
665
- in_id: int = self.parent.get_format_info(in_format).id
666
- out_id: int = self.parent.get_format_info(out_format).id
715
+ l_in_format_infos = self.parent.get_format_info(in_format, which="all")
716
+ l_out_format_infos = self.parent.get_format_info(out_format, which="all")
717
+
718
+ # Start a list of all possible conversions
719
+ l_possible_conversions = []
720
+
721
+ # Iterate over all possible combinations of input and output formats
722
+ for in_format_info, out_format_info in product(l_in_format_infos, l_out_format_infos):
667
723
 
668
- # Slice the table to get a list of the success for this conversion for each converter
669
- l_converter_success = [x[in_id][out_id] for x in self._table]
724
+ # Slice the table to get a list of the success for this conversion for each converter
725
+ l_converter_success = [x[in_format_info.id][out_format_info.id] for x in self.table]
670
726
 
671
- # Filter for possible conversions and get the converter name and degree-of-success string
672
- # for each possible conversion
673
- l_possible_converters = [self.parent.get_converter_info(converter_id).name
727
+ # Filter for possible conversions and get the converter name and degree-of-success string
728
+ # for each possible conversion
729
+ l_converter_names = [self.parent.get_converter_info(converter_id).name
674
730
  for converter_id, possible_flag
675
731
  in enumerate(l_converter_success) if possible_flag > 0]
676
732
 
677
- return l_possible_converters
733
+ for converter_name in l_converter_names:
734
+ l_possible_conversions.append((converter_name, in_format_info, out_format_info))
678
735
 
679
- def get_possible_formats(self, converter_name: str) -> tuple[list[str], list[str]]:
736
+ return l_possible_conversions
737
+
738
+ def get_possible_formats(self, converter_name: str) -> tuple[list[FormatInfo], list[FormatInfo]]:
680
739
  """Get a list of input and output formats that a given converter supports
681
740
 
682
741
  Parameters
@@ -686,11 +745,11 @@ class ConversionsTable:
686
745
 
687
746
  Returns
688
747
  -------
689
- tuple[list[str], list[str]]
748
+ tuple[list[FormatInfo], list[FormatInfo]]
690
749
  A tuple of a list of the supported input formats and a list of the supported output formats
691
750
  """
692
751
  conv_id: int = self.parent.get_converter_info(converter_name).id
693
- ll_in_out_format_success = self._table[conv_id]
752
+ ll_in_out_format_success = self.table[conv_id]
694
753
 
695
754
  # Filter for possible input formats by checking if at least one output format for each has a degree of success
696
755
  # index greater than 0, and stored the filtered lists where the input format is possible so we only need to
@@ -707,8 +766,8 @@ class ConversionsTable:
707
766
  sum([x[j] for x in ll_filtered_in_out_format_success]) > 0]
708
767
 
709
768
  # Get the name for each format ID, and return lists of the names
710
- return ([self.parent.get_format_info(x).name for x in l_possible_in_format_ids],
711
- [self.parent.get_format_info(x).name for x in l_possible_out_format_ids])
769
+ return ([self.parent.get_format_info(x) for x in l_possible_in_format_ids],
770
+ [self.parent.get_format_info(x) for x in l_possible_out_format_ids])
712
771
 
713
772
 
714
773
  class DataConversionDatabase:
@@ -774,37 +833,12 @@ class DataConversionDatabase:
774
833
  return self._l_converter_info
775
834
 
776
835
  @property
777
- def d_format_info(self) -> dict[str, FormatInfo]:
836
+ def d_format_info(self) -> dict[str, list[FormatInfo]]:
778
837
  """Generate the format info dict when needed
779
838
  """
780
839
  if self._d_format_info is None:
781
- self._d_format_info: dict[str, FormatInfo] = {}
782
-
783
- for d_single_format_info in self.formats:
784
- name: str = d_single_format_info[DB_FORMAT_EXT_KEY]
785
-
786
- format_info = FormatInfo(name=name,
787
- parent=self,
788
- d_single_format_info=d_single_format_info)
789
-
790
- if name in self._d_format_info:
791
- logger.debug(f"File extension '{name}' appears more than once in the database. Duplicates will use "
792
- "a key appended with an index")
793
- loop_concluded = False
794
- for i in range(97):
795
- test_name = f"{name}-{i+2}"
796
- if test_name in self._d_format_info:
797
- continue
798
- else:
799
- self._d_format_info[test_name] = format_info
800
- loop_concluded = True
801
- break
802
- if not loop_concluded:
803
- logger.warning("Loop counter exceeded when searching for valid new name for file extension "
804
- f"'{name}'. New entry will not be added to the database to avoid possibility of "
805
- "an infinite loop")
806
- else:
807
- self._d_format_info[name] = format_info
840
+ self._init_formats_and_conversions()
841
+
808
842
  return self._d_format_info
809
843
 
810
844
  @property
@@ -812,13 +846,7 @@ class DataConversionDatabase:
812
846
  """Generate the format info list (indexed by ID) when needed
813
847
  """
814
848
  if self._l_format_info is None:
815
- # Pre-size a list based on the maximum ID plus 1 (since IDs are 1-indexed)
816
- max_id: int = max([x[DB_ID_KEY] for x in self.formats])
817
- self._l_format_info: list[FormatInfo | None] = [None] * (max_id+1)
818
-
819
- # Fill the list with all formats in the dict
820
- for single_format_info in self.d_format_info.values():
821
- self._l_format_info[single_format_info.id] = single_format_info
849
+ self._init_formats_and_conversions()
822
850
 
823
851
  return self._l_format_info
824
852
 
@@ -826,11 +854,74 @@ class DataConversionDatabase:
826
854
  def conversions_table(self) -> ConversionsTable:
827
855
  """Generates the conversions table when needed
828
856
  """
857
+
829
858
  if self._conversions_table is None:
830
- self._conversions_table = ConversionsTable(l_converts_to=self.converts_to,
831
- parent=self)
859
+ self._init_formats_and_conversions()
860
+
832
861
  return self._conversions_table
833
862
 
863
+ def _init_formats_and_conversions(self):
864
+ """Initializes the format list and dict and the conversions table"""
865
+
866
+ # Start by initializing the list of conversions
867
+
868
+ # Pre-size a list based on the maximum ID plus 1 (since IDs are 1-indexed)
869
+ max_id: int = max([x[DB_ID_KEY] for x in self.formats])
870
+ self._l_format_info: list[FormatInfo | None] = [None] * (max_id+1)
871
+
872
+ for d_single_format_info in self.formats:
873
+ name: str = d_single_format_info[DB_FORMAT_EXT_KEY]
874
+
875
+ format_info = FormatInfo(name=name,
876
+ parent=self,
877
+ d_single_format_info=d_single_format_info)
878
+
879
+ self._l_format_info[format_info.id] = format_info
880
+
881
+ # Initialize the conversions table now
882
+ self._conversions_table = ConversionsTable(l_converts_to=self.converts_to,
883
+ parent=self)
884
+
885
+ # Use the conversions table to prune any formats which have no valid conversions
886
+
887
+ # Get a slice of the table which only includes supported converters
888
+ l_supported_converter_ids = [self.get_converter_info(x).id for x in D_SUPPORTED_CONVERTERS]
889
+ supported_table = [self._conversions_table.table[x] for x in l_supported_converter_ids]
890
+
891
+ for format_id, format_info in enumerate(self._l_format_info):
892
+ if not format_info:
893
+ continue
894
+
895
+ # Check if the format is supported as the input format for any conversion
896
+ ll_possible_from_conversions = [x[format_id] for x in supported_table]
897
+ if sum([sum(x) for x in ll_possible_from_conversions]) > 0:
898
+ continue
899
+
900
+ # Check if the format is supported as the output format for any conversion
901
+ ll_possible_to_conversions = [[y[format_id] for y in x] for x in supported_table]
902
+ if sum([sum(x) for x in ll_possible_to_conversions]) > 0:
903
+ continue
904
+
905
+ # If we get here, the format isn't supported for any conversions, so remove it from our list
906
+ self._l_format_info[format_id] = None
907
+
908
+ # Now create the formats dict, with only the pruned list of formats
909
+ self._d_format_info: dict[str, list[FormatInfo]] = {}
910
+
911
+ for format_info in self.l_format_info:
912
+
913
+ if not format_info:
914
+ continue
915
+
916
+ name = format_info.name
917
+
918
+ # Each name may correspond to multiple formats, so we use a list for each entry to list all possible
919
+ # formats for each name
920
+ if name not in self._d_format_info:
921
+ self._d_format_info[name] = []
922
+
923
+ self._d_format_info[name].append(format_info)
924
+
834
925
  def get_converter_info(self, converter_name_or_id: str | int) -> ConverterInfo:
835
926
  """Get a converter's info from either its name or ID
836
927
  """
@@ -838,7 +929,8 @@ class DataConversionDatabase:
838
929
  try:
839
930
  return self.d_converter_info[converter_name_or_id]
840
931
  except KeyError:
841
- raise FileConverterDatabaseException(f"Converter name '{converter_name_or_id}' not recognised")
932
+ raise FileConverterDatabaseException(f"Converter name '{converter_name_or_id}' not recognised",
933
+ help=True)
842
934
  elif isinstance(converter_name_or_id, int):
843
935
  return self.l_converter_info[converter_name_or_id]
844
936
  else:
@@ -846,20 +938,107 @@ class DataConversionDatabase:
846
938
  f" of type '{type(converter_name_or_id)}'. Type must be `str` or "
847
939
  "`int`")
848
940
 
849
- def get_format_info(self, format_name_or_id: str | int) -> FormatInfo:
850
- """Get a format's ID info from either its name or ID
941
+ @overload
942
+ def get_format_info(self,
943
+ format_name_or_id: str | int | FormatInfo,
944
+ which: int | None = None) -> FormatInfo: ...
945
+
946
+ @overload
947
+ def get_format_info(self,
948
+ format_name_or_id: str | int | FormatInfo,
949
+ which: Literal["all"]) -> list[FormatInfo]: ...
950
+
951
+ def get_format_info(self,
952
+ format_name_or_id: str | int | FormatInfo,
953
+ which: int | Literal["all"] | None = None) -> FormatInfo | list[FormatInfo]:
954
+ """Gets the information on a given file format stored in the database
955
+
956
+ Parameters
957
+ ----------
958
+ format_name_or_id : str | int | FormatInfo
959
+ The name (extension) of the format, or its ID. In the case of ambiguous extensions which could apply to
960
+ multiple formats, the ID must be used here or a FileConverterDatabaseException will be raised. This also
961
+ allows passing a FormatInfo to this, in which case that object will be silently returned, to allow
962
+ normalising the input to always be a FormatInfo when output from this
963
+ which : int | None
964
+ In the case that an extension string is provided which turns out to be ambiguous, which of the listed
965
+ possibilities to use from the zero-indexed list. Default None, which raises an exception for an ambiguous
966
+ format. 0 may be used to select the first in the database, which is often a good default choice. The literal
967
+ string "all" may be used to request all possibilites, in which case this method will return a list (even if
968
+ there are zero or one possibilities)
969
+
970
+ Returns
971
+ -------
972
+ FormatInfo | list[FormatInfo]
851
973
  """
974
+
975
+ if which == "all":
976
+ return_as_list = True
977
+ else:
978
+ return_as_list = False
979
+
852
980
  if isinstance(format_name_or_id, str):
853
- try:
854
- return self.d_format_info[format_name_or_id]
855
- except KeyError:
856
- raise FileConverterDatabaseException(f"Format name '{format_name_or_id}' not recognised")
981
+ # Silently strip leading period
982
+ if format_name_or_id.startswith("."):
983
+ format_name_or_id = format_name_or_id[1:]
984
+
985
+ # Check for a hyphen in the format, which indicates a preference from the user as to which, overriding the
986
+ # `which` kwarg
987
+ if "-" in format_name_or_id:
988
+ l_name_segments = format_name_or_id.split("-")
989
+ if len(l_name_segments) > 2:
990
+ raise FileConverterDatabaseException(f"Format name '{format_name_or_id} is improperly formatted - "
991
+ "It may contain at most one hyphen, separating the extension "
992
+ "from an index indicating which of the formats with that "
993
+ "extension to use, e.g. 'pdb-0', 'pdb-1', etc.",
994
+ help=True)
995
+ format_name_or_id = l_name_segments[0]
996
+ which = int(l_name_segments[1])
997
+
998
+ l_possible_format_info = self.d_format_info.get(format_name_or_id, [])
999
+
1000
+ if which == "all":
1001
+ return l_possible_format_info
1002
+
1003
+ elif len(l_possible_format_info) == 1:
1004
+ format_info = l_possible_format_info[0]
1005
+
1006
+ elif len(l_possible_format_info) == 0:
1007
+ raise FileConverterDatabaseException(f"Format name '{format_name_or_id}' not recognised",
1008
+ help=True)
1009
+
1010
+ elif which is not None and which < len(l_possible_format_info):
1011
+ format_info = l_possible_format_info[which]
1012
+
1013
+ else:
1014
+ msg = (f"Extension '{format_name_or_id}' is ambiguous and must be defined by ID. Possible formats "
1015
+ "and their IDs are:")
1016
+ for possible_format_info in l_possible_format_info:
1017
+ msg += (f"\n{possible_format_info.id}: {possible_format_info.disambiguated_name} "
1018
+ f"({possible_format_info.note})")
1019
+ raise FileConverterDatabaseException(msg, help=True)
1020
+
857
1021
  elif isinstance(format_name_or_id, int):
858
- return self.l_format_info[format_name_or_id]
1022
+ try:
1023
+ format_info = self.l_format_info[format_name_or_id]
1024
+ except IndexError:
1025
+ if return_as_list:
1026
+ return []
1027
+ raise FileConverterDatabaseException(f"Format ID '{format_name_or_id}' not recognised",
1028
+ help=True)
1029
+
1030
+ elif isinstance(format_name_or_id, FormatInfo):
1031
+ # Silently return the FormatInfo if it was used as a key here
1032
+ format_info = format_name_or_id
1033
+
859
1034
  else:
860
1035
  raise FileConverterDatabaseException(f"Invalid key passed to `get_format_info`: '{format_name_or_id}'"
861
1036
  f" of type '{type(format_name_or_id)}'. Type must be `str` or "
862
1037
  "`int`")
1038
+ if return_as_list:
1039
+ return [format_info]
1040
+
1041
+ return format_info
863
1042
 
864
1043
 
865
1044
  # The database will be loaded on demand when `get_database()` is called
@@ -919,35 +1098,55 @@ def get_converter_info(name: str) -> ConverterInfo:
919
1098
  return get_database().d_converter_info[name]
920
1099
 
921
1100
 
922
- def get_format_info(name: str) -> FormatInfo:
1101
+ @overload
1102
+ def get_format_info(format_name_or_id: str | int | FormatInfo,
1103
+ which: int | None = None) -> FormatInfo: ...
1104
+
1105
+
1106
+ @overload
1107
+ def get_format_info(format_name_or_id: str | int | FormatInfo,
1108
+ which: Literal["all"]) -> list[FormatInfo]: ...
1109
+
1110
+
1111
+ def get_format_info(format_name_or_id: str | int | FormatInfo,
1112
+ which: int | Literal["all"] | None = None) -> FormatInfo | list[FormatInfo]:
923
1113
  """Gets the information on a given file format stored in the database
924
1114
 
925
1115
  Parameters
926
1116
  ----------
927
- name : str
928
- The name (extension) of the form
1117
+ format_name_or_id : str | int | FormatInfo
1118
+ The name (extension) of the format, or its ID. In the case of ambiguous extensions which could apply to multiple
1119
+ formats, the ID must be used here or a FileConverterDatabaseException will be raised. This also allows passing a
1120
+ FormatInfo to this, in which case that object will be silently returned, to allow normalising the input to
1121
+ always be a FormatInfo when output from this
1122
+ which : int | None
1123
+ In the case that an extension string is provided which turns out to be ambiguous, which of the listed
1124
+ possibilities to use from the zero-indexed list. Default None, which raises an exception for an ambiguous
1125
+ format. 0 may be used to select the first in the database, which is often a good default choice. The literal
1126
+ string "all" may be used to request all possibilites, in which case this method will return a list (even if
1127
+ there are zero or one possibilities)
929
1128
 
930
1129
  Returns
931
1130
  -------
932
- FormatInfo
1131
+ FormatInfo | list[FormatInfo]
933
1132
  """
934
1133
 
935
- return get_database().d_format_info[name]
1134
+ return get_database().get_format_info(format_name_or_id, which)
936
1135
 
937
1136
 
938
1137
  def get_conversion_quality(converter_name: str,
939
- in_format: str,
940
- out_format: str) -> ConversionQualityInfo | None:
1138
+ in_format: str | int,
1139
+ out_format: str | int) -> ConversionQualityInfo | None:
941
1140
  """Get an indication of the quality of a conversion from one format to another, or if it's not possible
942
1141
 
943
1142
  Parameters
944
1143
  ----------
945
1144
  converter_name : str
946
1145
  The name of the converter
947
- in_format : str
948
- The extension of the input file format
949
- out_format : str
950
- The extension of the output file format
1146
+ in_format : str | int
1147
+ The extension or ID of the input file format
1148
+ out_format : str | int
1149
+ The extension or ID of the output file format
951
1150
 
952
1151
  Returns
953
1152
  -------
@@ -961,30 +1160,77 @@ def get_conversion_quality(converter_name: str,
961
1160
  out_format=out_format)
962
1161
 
963
1162
 
964
- def get_possible_converters(in_format: str,
965
- out_format: str) -> list[str]:
966
- """Get a list of converters which can perform a conversion from one format to another and the degree of success
967
- with each of these converters
1163
+ def get_possible_conversions(in_format: str | int,
1164
+ out_format: str | int) -> list[tuple[str, FormatInfo, FormatInfo]]:
1165
+ """Get a list of converters which can perform a conversion from one format to another and disambiguate in the case
1166
+ of ambiguous input/output formats
968
1167
 
969
1168
  Parameters
970
1169
  ----------
971
- in_format : str
972
- The extension of the input file format
973
- out_format : str
974
- The extension of the output file format
1170
+ in_format : str | int
1171
+ The extension or ID of the input file format
1172
+ out_format : str | int
1173
+ The extension or ID of the output file format
975
1174
 
976
1175
  Returns
977
1176
  -------
978
- list[tuple[str, str]]
979
- A list of tuples, where each tuple's first item is the name of a converter which can perform this
980
- conversion, and the second item is the degree of success for the conversion
1177
+ list[tuple[str, FormatInfo, FormatInfo]]
1178
+ A list of tuples, where each tuple's first item is the name of a converter which can perform a matching
1179
+ conversion, the second is the info of the input format for this conversion, and the third is the info of the
1180
+ output format
981
1181
  """
982
1182
 
983
- return get_database().conversions_table.get_possible_converters(in_format=in_format,
984
- out_format=out_format)
1183
+ return get_database().conversions_table.get_possible_conversions(in_format=in_format,
1184
+ out_format=out_format)
985
1185
 
986
1186
 
987
- def get_possible_formats(converter_name: str) -> tuple[list[str], list[str]]:
1187
+ def disambiguate_formats(converter_name: str,
1188
+ in_format: str | int | FormatInfo,
1189
+ out_format: str | int | FormatInfo) -> tuple[FormatInfo, FormatInfo]:
1190
+ """Try to disambiguate formats by seeing if there's only one possible conversion between formats matching those
1191
+ provided.
1192
+
1193
+ Parameters
1194
+ ----------
1195
+ converter_name : str
1196
+ The name of the converter
1197
+ in_format : str | int
1198
+ The extension or ID of the input file format
1199
+ out_format : str | int
1200
+ The extension or ID of the output file format
1201
+
1202
+ Returns
1203
+ -------
1204
+ tuple[FormatInfo, FormatInfo]
1205
+ The input and output format for this conversion, if only one combination is possible
1206
+
1207
+ Raises
1208
+ ------
1209
+ FileConverterDatabaseException
1210
+ If more than one format combination is possible for this conversion, or no conversion is possible
1211
+ """
1212
+
1213
+ # Get all possible conversions, and see if we only have one for this converter
1214
+ l_possible_conversions = [x for x in get_possible_conversions(in_format, out_format)
1215
+ if x[0] == converter_name]
1216
+
1217
+ if len(l_possible_conversions) == 1:
1218
+ return l_possible_conversions[0][1], l_possible_conversions[0][2]
1219
+ elif len(l_possible_conversions) == 0:
1220
+ raise FileConverterDatabaseException(f"Conversion from {in_format} to {out_format} with converter "
1221
+ f"{converter_name} is not supported", help=True)
1222
+ else:
1223
+ msg = (f"Conversion from {in_format} to {out_format} with converter {converter_name} is ambiguous.\n"
1224
+ "Possible matching conversions are:\n")
1225
+ for _, possible_in_format, possible_out_format in l_possible_conversions:
1226
+ msg += (f"{possible_in_format.disambiguated_name} ({possible_in_format.note}) to "
1227
+ f"{possible_out_format.disambiguated_name} ({possible_out_format.note})\n")
1228
+ # Trim the final newline from the message
1229
+ msg = msg[:-1]
1230
+ raise FileConverterDatabaseException(msg, help=True)
1231
+
1232
+
1233
+ def get_possible_formats(converter_name: str) -> tuple[list[FormatInfo], list[FormatInfo]]:
988
1234
  """Get a list of input and output formats that a given converter supports
989
1235
 
990
1236
  Parameters
@@ -994,7 +1240,7 @@ def get_possible_formats(converter_name: str) -> tuple[list[str], list[str]]:
994
1240
 
995
1241
  Returns
996
1242
  -------
997
- tuple[list[str], list[str]]
1243
+ tuple[list[FormatInfo], list[FormatInfo]]
998
1244
  A tuple of a list of the supported input formats and a list of the supported output formats
999
1245
  """
1000
1246
  return get_database().conversions_table.get_possible_formats(converter_name=converter_name)
@@ -1009,7 +1255,7 @@ def _find_arg(tl_args: tuple[list[FlagInfo], list[OptionInfo]],
1009
1255
  if len(l_found) > 0:
1010
1256
  return l_found[0]
1011
1257
  # If we get here, it wasn't found in either list
1012
- raise FileConverterDatabaseException(f"Argument {arg} was not found in the list of allowed arguments for this "
1258
+ raise FileConverterDatabaseException(f"Argument '{arg}' was not found in the list of allowed arguments for this "
1013
1259
  "conversion")
1014
1260
 
1015
1261