psdi-data-conversion 0.0.37__py3-none-any.whl → 0.0.39__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- psdi_data_conversion/app.py +64 -14
- psdi_data_conversion/constants.py +6 -5
- psdi_data_conversion/converter.py +20 -13
- psdi_data_conversion/converters/base.py +75 -68
- psdi_data_conversion/converters/c2x.py +14 -0
- psdi_data_conversion/converters/openbabel.py +12 -11
- psdi_data_conversion/database.py +361 -115
- psdi_data_conversion/dist.py +2 -1
- psdi_data_conversion/file_io.py +1 -2
- psdi_data_conversion/log_utility.py +1 -1
- psdi_data_conversion/main.py +152 -70
- psdi_data_conversion/static/content/index-versions/psdi-common-footer.html +12 -8
- psdi_data_conversion/static/content/psdi-common-footer.html +12 -8
- psdi_data_conversion/static/data/data.json +617 -3
- psdi_data_conversion/static/javascript/convert.js +54 -6
- psdi_data_conversion/static/javascript/convert_common.js +16 -2
- psdi_data_conversion/static/javascript/data.js +36 -4
- psdi_data_conversion/static/javascript/format.js +22 -9
- psdi_data_conversion/static/styles/format.css +7 -0
- psdi_data_conversion/templates/index.htm +57 -48
- psdi_data_conversion/testing/constants.py +3 -0
- psdi_data_conversion/testing/conversion_callbacks.py +4 -3
- psdi_data_conversion/testing/conversion_test_specs.py +44 -20
- psdi_data_conversion/testing/gui.py +362 -294
- psdi_data_conversion/testing/utils.py +38 -19
- {psdi_data_conversion-0.0.37.dist-info → psdi_data_conversion-0.0.39.dist-info}/METADATA +88 -4
- {psdi_data_conversion-0.0.37.dist-info → psdi_data_conversion-0.0.39.dist-info}/RECORD +30 -30
- {psdi_data_conversion-0.0.37.dist-info → psdi_data_conversion-0.0.39.dist-info}/WHEEL +0 -0
- {psdi_data_conversion-0.0.37.dist-info → psdi_data_conversion-0.0.39.dist-info}/entry_points.txt +0 -0
- {psdi_data_conversion-0.0.37.dist-info → psdi_data_conversion-0.0.39.dist-info}/licenses/LICENSE +0 -0
psdi_data_conversion/database.py
CHANGED
@@ -7,14 +7,15 @@ Python module provide utilities for accessing the converter database
|
|
7
7
|
|
8
8
|
from __future__ import annotations
|
9
9
|
|
10
|
-
from dataclasses import dataclass, field
|
11
10
|
import json
|
12
|
-
from logging import getLogger
|
13
11
|
import os
|
14
|
-
from
|
12
|
+
from dataclasses import dataclass, field
|
13
|
+
from itertools import product
|
14
|
+
from logging import getLogger
|
15
|
+
from typing import Any, Literal, overload
|
15
16
|
|
16
17
|
from psdi_data_conversion import constants as const
|
17
|
-
from psdi_data_conversion.converter import D_REGISTERED_CONVERTERS
|
18
|
+
from psdi_data_conversion.converter import D_REGISTERED_CONVERTERS, D_SUPPORTED_CONVERTERS
|
18
19
|
from psdi_data_conversion.converters.base import FileConverterException
|
19
20
|
|
20
21
|
# Keys for top-level and general items in the database
|
@@ -31,6 +32,7 @@ DB_URL_KEY = "url"
|
|
31
32
|
|
32
33
|
# Keys for format general info in the database
|
33
34
|
DB_FORMAT_EXT_KEY = "extension"
|
35
|
+
DB_FORMAT_C2X_KEY = "format"
|
34
36
|
DB_FORMAT_NOTE_KEY = "note"
|
35
37
|
DB_FORMAT_COMP_KEY = "composition"
|
36
38
|
DB_FORMAT_CONN_KEY = "connections"
|
@@ -418,15 +420,55 @@ class FormatInfo:
|
|
418
420
|
|
419
421
|
# Load attributes from input
|
420
422
|
self.name = name
|
423
|
+
"""The name of this format"""
|
424
|
+
|
421
425
|
self.parent = parent
|
426
|
+
"""The database which this format belongs to"""
|
422
427
|
|
423
428
|
# Load attributes from the database
|
424
429
|
self.id: int = d_single_format_info.get(DB_ID_KEY, -1)
|
430
|
+
"""The ID of this format"""
|
431
|
+
|
432
|
+
self.c2x_format: str = d_single_format_info.get(DB_FORMAT_C2X_KEY)
|
433
|
+
"""The name of this format as the c2x converter expects it"""
|
434
|
+
|
425
435
|
self.note: str = d_single_format_info.get(DB_FORMAT_NOTE_KEY, "")
|
436
|
+
"""The description of this format"""
|
437
|
+
|
426
438
|
self.composition = d_single_format_info.get(DB_FORMAT_COMP_KEY)
|
439
|
+
"""Whether or not this format stores composition information"""
|
440
|
+
|
427
441
|
self.connections = d_single_format_info.get(DB_FORMAT_CONN_KEY)
|
442
|
+
"""Whether or not this format stores connections information"""
|
443
|
+
|
428
444
|
self.two_dim = d_single_format_info.get(DB_FORMAT_2D_KEY)
|
445
|
+
"""Whether or not this format stores 2D structural information"""
|
446
|
+
|
429
447
|
self.three_dim = d_single_format_info.get(DB_FORMAT_3D_KEY)
|
448
|
+
"""Whether or not this format stores 3D structural information"""
|
449
|
+
|
450
|
+
self._disambiguated_name: str | None = None
|
451
|
+
|
452
|
+
@property
|
453
|
+
def disambiguated_name(self) -> str:
|
454
|
+
"""A unique name for this format which can be used to distinguish it from others which share the same extension,
|
455
|
+
by appending the name of each with a unique index"""
|
456
|
+
if self._disambiguated_name is None:
|
457
|
+
l_formats_with_same_name = [x for x in self.parent.l_format_info if x and x.name == self.name]
|
458
|
+
if len(l_formats_with_same_name) == 1:
|
459
|
+
self._disambiguated_name = self.name
|
460
|
+
else:
|
461
|
+
index_of_this = [i for i, x in enumerate(l_formats_with_same_name) if self is x][0]
|
462
|
+
self._disambiguated_name = f"{self.name}-{index_of_this}"
|
463
|
+
return self._disambiguated_name
|
464
|
+
|
465
|
+
def __str__(self):
|
466
|
+
"""When cast to string, convert to the name (extension) of the format"""
|
467
|
+
return self.name
|
468
|
+
|
469
|
+
def __int__(self):
|
470
|
+
"""When cast to int, return the ID of the format"""
|
471
|
+
return self.id
|
430
472
|
|
431
473
|
|
432
474
|
@dataclass
|
@@ -530,7 +572,6 @@ class ConversionsTable:
|
|
530
572
|
Raises
|
531
573
|
------
|
532
574
|
FileConverterDatabaseException
|
533
|
-
_description_
|
534
575
|
"""
|
535
576
|
|
536
577
|
self.parent = parent
|
@@ -543,8 +584,8 @@ class ConversionsTable:
|
|
543
584
|
num_converters = len(parent.converters)
|
544
585
|
num_formats = len(parent.formats)
|
545
586
|
|
546
|
-
self.
|
547
|
-
|
587
|
+
self.table = [[[0 for k in range(num_formats+1)] for j in range(num_formats+1)]
|
588
|
+
for i in range(num_converters+1)]
|
548
589
|
|
549
590
|
for possible_conversion in l_converts_to:
|
550
591
|
|
@@ -556,22 +597,22 @@ class ConversionsTable:
|
|
556
597
|
raise FileConverterDatabaseException(
|
557
598
|
f"Malformed 'converts_to' entry in database: {possible_conversion}")
|
558
599
|
|
559
|
-
self.
|
600
|
+
self.table[conv_id][in_id][out_id] = 1
|
560
601
|
|
561
602
|
def get_conversion_quality(self,
|
562
603
|
converter_name: str,
|
563
|
-
in_format: str,
|
564
|
-
out_format: str) -> ConversionQualityInfo | None:
|
604
|
+
in_format: str | int,
|
605
|
+
out_format: str | int) -> ConversionQualityInfo | None:
|
565
606
|
"""Get an indication of the quality of a conversion from one format to another, or if it's not possible
|
566
607
|
|
567
608
|
Parameters
|
568
609
|
----------
|
569
610
|
converter_name : str
|
570
611
|
The name of the converter
|
571
|
-
in_format : str
|
572
|
-
The extension of the input file format
|
573
|
-
out_format : str
|
574
|
-
The extension of the output file format
|
612
|
+
in_format : str | int
|
613
|
+
The extension or ID of the input file format
|
614
|
+
out_format : str | int
|
615
|
+
The extension or ID of the output file format
|
575
616
|
|
576
617
|
Returns
|
577
618
|
-------
|
@@ -580,12 +621,20 @@ class ConversionsTable:
|
|
580
621
|
`ConversionQualityInfo` object with info on the conversion
|
581
622
|
"""
|
582
623
|
|
583
|
-
|
584
|
-
|
585
|
-
|
624
|
+
# Check if this converter deals with ambiguous formats, so we know if we need to be strict about getting format
|
625
|
+
# info
|
626
|
+
if D_REGISTERED_CONVERTERS[converter_name].supports_ambiguous_extensions:
|
627
|
+
which_format = None
|
628
|
+
else:
|
629
|
+
which_format = 0
|
630
|
+
|
631
|
+
# Get info about the converter and formats
|
632
|
+
conv_id = self.parent.get_converter_info(converter_name).id
|
633
|
+
in_info = self.parent.get_format_info(in_format, which_format)
|
634
|
+
out_info: int = self.parent.get_format_info(out_format, which_format)
|
586
635
|
|
587
636
|
# First check if the conversion is possible
|
588
|
-
success_flag = self.
|
637
|
+
success_flag = self.table[conv_id][in_info.id][out_info.id]
|
589
638
|
if not success_flag:
|
590
639
|
return None
|
591
640
|
|
@@ -643,40 +692,50 @@ class ConversionsTable:
|
|
643
692
|
details=details,
|
644
693
|
d_prop_conversion_info=d_prop_conversion_info)
|
645
694
|
|
646
|
-
def
|
647
|
-
|
648
|
-
|
649
|
-
"""Get a list of converters which can perform a conversion from one format to another
|
650
|
-
|
695
|
+
def get_possible_conversions(self,
|
696
|
+
in_format: str | int,
|
697
|
+
out_format: str | int) -> list[tuple[str, FormatInfo, FormatInfo]]:
|
698
|
+
"""Get a list of converters which can perform a conversion from one format to another, disambiguating in the
|
699
|
+
case of ambiguous formats and providing IDs for input/output formats for possible conversions
|
651
700
|
|
652
701
|
Parameters
|
653
702
|
----------
|
654
|
-
in_format : str
|
655
|
-
The extension of the input file format
|
656
|
-
out_format : str
|
657
|
-
The extension of the output file format
|
703
|
+
in_format : str | int
|
704
|
+
The extension or ID of the input file format
|
705
|
+
out_format : str | int
|
706
|
+
The extension or ID of the output file format
|
658
707
|
|
659
708
|
Returns
|
660
709
|
-------
|
661
|
-
list[tuple[str,
|
662
|
-
A list of tuples, where each tuple's first item is the name of a converter which can perform
|
663
|
-
conversion,
|
710
|
+
list[tuple[str, FormatInfo, FormatInfo]]
|
711
|
+
A list of tuples, where each tuple's first item is the name of a converter which can perform a matching
|
712
|
+
conversion, the second is the info of the input format for this conversion, and the third is the info of the
|
713
|
+
output format
|
664
714
|
"""
|
665
|
-
|
666
|
-
|
715
|
+
l_in_format_infos = self.parent.get_format_info(in_format, which="all")
|
716
|
+
l_out_format_infos = self.parent.get_format_info(out_format, which="all")
|
717
|
+
|
718
|
+
# Start a list of all possible conversions
|
719
|
+
l_possible_conversions = []
|
720
|
+
|
721
|
+
# Iterate over all possible combinations of input and output formats
|
722
|
+
for in_format_info, out_format_info in product(l_in_format_infos, l_out_format_infos):
|
667
723
|
|
668
|
-
|
669
|
-
|
724
|
+
# Slice the table to get a list of the success for this conversion for each converter
|
725
|
+
l_converter_success = [x[in_format_info.id][out_format_info.id] for x in self.table]
|
670
726
|
|
671
|
-
|
672
|
-
|
673
|
-
|
727
|
+
# Filter for possible conversions and get the converter name and degree-of-success string
|
728
|
+
# for each possible conversion
|
729
|
+
l_converter_names = [self.parent.get_converter_info(converter_id).name
|
674
730
|
for converter_id, possible_flag
|
675
731
|
in enumerate(l_converter_success) if possible_flag > 0]
|
676
732
|
|
677
|
-
|
733
|
+
for converter_name in l_converter_names:
|
734
|
+
l_possible_conversions.append((converter_name, in_format_info, out_format_info))
|
678
735
|
|
679
|
-
|
736
|
+
return l_possible_conversions
|
737
|
+
|
738
|
+
def get_possible_formats(self, converter_name: str) -> tuple[list[FormatInfo], list[FormatInfo]]:
|
680
739
|
"""Get a list of input and output formats that a given converter supports
|
681
740
|
|
682
741
|
Parameters
|
@@ -686,11 +745,11 @@ class ConversionsTable:
|
|
686
745
|
|
687
746
|
Returns
|
688
747
|
-------
|
689
|
-
tuple[list[
|
748
|
+
tuple[list[FormatInfo], list[FormatInfo]]
|
690
749
|
A tuple of a list of the supported input formats and a list of the supported output formats
|
691
750
|
"""
|
692
751
|
conv_id: int = self.parent.get_converter_info(converter_name).id
|
693
|
-
ll_in_out_format_success = self.
|
752
|
+
ll_in_out_format_success = self.table[conv_id]
|
694
753
|
|
695
754
|
# Filter for possible input formats by checking if at least one output format for each has a degree of success
|
696
755
|
# index greater than 0, and stored the filtered lists where the input format is possible so we only need to
|
@@ -707,8 +766,8 @@ class ConversionsTable:
|
|
707
766
|
sum([x[j] for x in ll_filtered_in_out_format_success]) > 0]
|
708
767
|
|
709
768
|
# Get the name for each format ID, and return lists of the names
|
710
|
-
return ([self.parent.get_format_info(x)
|
711
|
-
[self.parent.get_format_info(x)
|
769
|
+
return ([self.parent.get_format_info(x) for x in l_possible_in_format_ids],
|
770
|
+
[self.parent.get_format_info(x) for x in l_possible_out_format_ids])
|
712
771
|
|
713
772
|
|
714
773
|
class DataConversionDatabase:
|
@@ -774,37 +833,12 @@ class DataConversionDatabase:
|
|
774
833
|
return self._l_converter_info
|
775
834
|
|
776
835
|
@property
|
777
|
-
def d_format_info(self) -> dict[str, FormatInfo]:
|
836
|
+
def d_format_info(self) -> dict[str, list[FormatInfo]]:
|
778
837
|
"""Generate the format info dict when needed
|
779
838
|
"""
|
780
839
|
if self._d_format_info is None:
|
781
|
-
self.
|
782
|
-
|
783
|
-
for d_single_format_info in self.formats:
|
784
|
-
name: str = d_single_format_info[DB_FORMAT_EXT_KEY]
|
785
|
-
|
786
|
-
format_info = FormatInfo(name=name,
|
787
|
-
parent=self,
|
788
|
-
d_single_format_info=d_single_format_info)
|
789
|
-
|
790
|
-
if name in self._d_format_info:
|
791
|
-
logger.debug(f"File extension '{name}' appears more than once in the database. Duplicates will use "
|
792
|
-
"a key appended with an index")
|
793
|
-
loop_concluded = False
|
794
|
-
for i in range(97):
|
795
|
-
test_name = f"{name}-{i+2}"
|
796
|
-
if test_name in self._d_format_info:
|
797
|
-
continue
|
798
|
-
else:
|
799
|
-
self._d_format_info[test_name] = format_info
|
800
|
-
loop_concluded = True
|
801
|
-
break
|
802
|
-
if not loop_concluded:
|
803
|
-
logger.warning("Loop counter exceeded when searching for valid new name for file extension "
|
804
|
-
f"'{name}'. New entry will not be added to the database to avoid possibility of "
|
805
|
-
"an infinite loop")
|
806
|
-
else:
|
807
|
-
self._d_format_info[name] = format_info
|
840
|
+
self._init_formats_and_conversions()
|
841
|
+
|
808
842
|
return self._d_format_info
|
809
843
|
|
810
844
|
@property
|
@@ -812,13 +846,7 @@ class DataConversionDatabase:
|
|
812
846
|
"""Generate the format info list (indexed by ID) when needed
|
813
847
|
"""
|
814
848
|
if self._l_format_info is None:
|
815
|
-
|
816
|
-
max_id: int = max([x[DB_ID_KEY] for x in self.formats])
|
817
|
-
self._l_format_info: list[FormatInfo | None] = [None] * (max_id+1)
|
818
|
-
|
819
|
-
# Fill the list with all formats in the dict
|
820
|
-
for single_format_info in self.d_format_info.values():
|
821
|
-
self._l_format_info[single_format_info.id] = single_format_info
|
849
|
+
self._init_formats_and_conversions()
|
822
850
|
|
823
851
|
return self._l_format_info
|
824
852
|
|
@@ -826,11 +854,74 @@ class DataConversionDatabase:
|
|
826
854
|
def conversions_table(self) -> ConversionsTable:
|
827
855
|
"""Generates the conversions table when needed
|
828
856
|
"""
|
857
|
+
|
829
858
|
if self._conversions_table is None:
|
830
|
-
self.
|
831
|
-
|
859
|
+
self._init_formats_and_conversions()
|
860
|
+
|
832
861
|
return self._conversions_table
|
833
862
|
|
863
|
+
def _init_formats_and_conversions(self):
|
864
|
+
"""Initializes the format list and dict and the conversions table"""
|
865
|
+
|
866
|
+
# Start by initializing the list of conversions
|
867
|
+
|
868
|
+
# Pre-size a list based on the maximum ID plus 1 (since IDs are 1-indexed)
|
869
|
+
max_id: int = max([x[DB_ID_KEY] for x in self.formats])
|
870
|
+
self._l_format_info: list[FormatInfo | None] = [None] * (max_id+1)
|
871
|
+
|
872
|
+
for d_single_format_info in self.formats:
|
873
|
+
name: str = d_single_format_info[DB_FORMAT_EXT_KEY]
|
874
|
+
|
875
|
+
format_info = FormatInfo(name=name,
|
876
|
+
parent=self,
|
877
|
+
d_single_format_info=d_single_format_info)
|
878
|
+
|
879
|
+
self._l_format_info[format_info.id] = format_info
|
880
|
+
|
881
|
+
# Initialize the conversions table now
|
882
|
+
self._conversions_table = ConversionsTable(l_converts_to=self.converts_to,
|
883
|
+
parent=self)
|
884
|
+
|
885
|
+
# Use the conversions table to prune any formats which have no valid conversions
|
886
|
+
|
887
|
+
# Get a slice of the table which only includes supported converters
|
888
|
+
l_supported_converter_ids = [self.get_converter_info(x).id for x in D_SUPPORTED_CONVERTERS]
|
889
|
+
supported_table = [self._conversions_table.table[x] for x in l_supported_converter_ids]
|
890
|
+
|
891
|
+
for format_id, format_info in enumerate(self._l_format_info):
|
892
|
+
if not format_info:
|
893
|
+
continue
|
894
|
+
|
895
|
+
# Check if the format is supported as the input format for any conversion
|
896
|
+
ll_possible_from_conversions = [x[format_id] for x in supported_table]
|
897
|
+
if sum([sum(x) for x in ll_possible_from_conversions]) > 0:
|
898
|
+
continue
|
899
|
+
|
900
|
+
# Check if the format is supported as the output format for any conversion
|
901
|
+
ll_possible_to_conversions = [[y[format_id] for y in x] for x in supported_table]
|
902
|
+
if sum([sum(x) for x in ll_possible_to_conversions]) > 0:
|
903
|
+
continue
|
904
|
+
|
905
|
+
# If we get here, the format isn't supported for any conversions, so remove it from our list
|
906
|
+
self._l_format_info[format_id] = None
|
907
|
+
|
908
|
+
# Now create the formats dict, with only the pruned list of formats
|
909
|
+
self._d_format_info: dict[str, list[FormatInfo]] = {}
|
910
|
+
|
911
|
+
for format_info in self.l_format_info:
|
912
|
+
|
913
|
+
if not format_info:
|
914
|
+
continue
|
915
|
+
|
916
|
+
name = format_info.name
|
917
|
+
|
918
|
+
# Each name may correspond to multiple formats, so we use a list for each entry to list all possible
|
919
|
+
# formats for each name
|
920
|
+
if name not in self._d_format_info:
|
921
|
+
self._d_format_info[name] = []
|
922
|
+
|
923
|
+
self._d_format_info[name].append(format_info)
|
924
|
+
|
834
925
|
def get_converter_info(self, converter_name_or_id: str | int) -> ConverterInfo:
|
835
926
|
"""Get a converter's info from either its name or ID
|
836
927
|
"""
|
@@ -838,7 +929,8 @@ class DataConversionDatabase:
|
|
838
929
|
try:
|
839
930
|
return self.d_converter_info[converter_name_or_id]
|
840
931
|
except KeyError:
|
841
|
-
raise FileConverterDatabaseException(f"Converter name '{converter_name_or_id}' not recognised"
|
932
|
+
raise FileConverterDatabaseException(f"Converter name '{converter_name_or_id}' not recognised",
|
933
|
+
help=True)
|
842
934
|
elif isinstance(converter_name_or_id, int):
|
843
935
|
return self.l_converter_info[converter_name_or_id]
|
844
936
|
else:
|
@@ -846,20 +938,107 @@ class DataConversionDatabase:
|
|
846
938
|
f" of type '{type(converter_name_or_id)}'. Type must be `str` or "
|
847
939
|
"`int`")
|
848
940
|
|
849
|
-
|
850
|
-
|
941
|
+
@overload
|
942
|
+
def get_format_info(self,
|
943
|
+
format_name_or_id: str | int | FormatInfo,
|
944
|
+
which: int | None = None) -> FormatInfo: ...
|
945
|
+
|
946
|
+
@overload
|
947
|
+
def get_format_info(self,
|
948
|
+
format_name_or_id: str | int | FormatInfo,
|
949
|
+
which: Literal["all"]) -> list[FormatInfo]: ...
|
950
|
+
|
951
|
+
def get_format_info(self,
|
952
|
+
format_name_or_id: str | int | FormatInfo,
|
953
|
+
which: int | Literal["all"] | None = None) -> FormatInfo | list[FormatInfo]:
|
954
|
+
"""Gets the information on a given file format stored in the database
|
955
|
+
|
956
|
+
Parameters
|
957
|
+
----------
|
958
|
+
format_name_or_id : str | int | FormatInfo
|
959
|
+
The name (extension) of the format, or its ID. In the case of ambiguous extensions which could apply to
|
960
|
+
multiple formats, the ID must be used here or a FileConverterDatabaseException will be raised. This also
|
961
|
+
allows passing a FormatInfo to this, in which case that object will be silently returned, to allow
|
962
|
+
normalising the input to always be a FormatInfo when output from this
|
963
|
+
which : int | None
|
964
|
+
In the case that an extension string is provided which turns out to be ambiguous, which of the listed
|
965
|
+
possibilities to use from the zero-indexed list. Default None, which raises an exception for an ambiguous
|
966
|
+
format. 0 may be used to select the first in the database, which is often a good default choice. The literal
|
967
|
+
string "all" may be used to request all possibilites, in which case this method will return a list (even if
|
968
|
+
there are zero or one possibilities)
|
969
|
+
|
970
|
+
Returns
|
971
|
+
-------
|
972
|
+
FormatInfo | list[FormatInfo]
|
851
973
|
"""
|
974
|
+
|
975
|
+
if which == "all":
|
976
|
+
return_as_list = True
|
977
|
+
else:
|
978
|
+
return_as_list = False
|
979
|
+
|
852
980
|
if isinstance(format_name_or_id, str):
|
853
|
-
|
854
|
-
|
855
|
-
|
856
|
-
|
981
|
+
# Silently strip leading period
|
982
|
+
if format_name_or_id.startswith("."):
|
983
|
+
format_name_or_id = format_name_or_id[1:]
|
984
|
+
|
985
|
+
# Check for a hyphen in the format, which indicates a preference from the user as to which, overriding the
|
986
|
+
# `which` kwarg
|
987
|
+
if "-" in format_name_or_id:
|
988
|
+
l_name_segments = format_name_or_id.split("-")
|
989
|
+
if len(l_name_segments) > 2:
|
990
|
+
raise FileConverterDatabaseException(f"Format name '{format_name_or_id} is improperly formatted - "
|
991
|
+
"It may contain at most one hyphen, separating the extension "
|
992
|
+
"from an index indicating which of the formats with that "
|
993
|
+
"extension to use, e.g. 'pdb-0', 'pdb-1', etc.",
|
994
|
+
help=True)
|
995
|
+
format_name_or_id = l_name_segments[0]
|
996
|
+
which = int(l_name_segments[1])
|
997
|
+
|
998
|
+
l_possible_format_info = self.d_format_info.get(format_name_or_id, [])
|
999
|
+
|
1000
|
+
if which == "all":
|
1001
|
+
return l_possible_format_info
|
1002
|
+
|
1003
|
+
elif len(l_possible_format_info) == 1:
|
1004
|
+
format_info = l_possible_format_info[0]
|
1005
|
+
|
1006
|
+
elif len(l_possible_format_info) == 0:
|
1007
|
+
raise FileConverterDatabaseException(f"Format name '{format_name_or_id}' not recognised",
|
1008
|
+
help=True)
|
1009
|
+
|
1010
|
+
elif which is not None and which < len(l_possible_format_info):
|
1011
|
+
format_info = l_possible_format_info[which]
|
1012
|
+
|
1013
|
+
else:
|
1014
|
+
msg = (f"Extension '{format_name_or_id}' is ambiguous and must be defined by ID. Possible formats "
|
1015
|
+
"and their IDs are:")
|
1016
|
+
for possible_format_info in l_possible_format_info:
|
1017
|
+
msg += (f"\n{possible_format_info.id}: {possible_format_info.disambiguated_name} "
|
1018
|
+
f"({possible_format_info.note})")
|
1019
|
+
raise FileConverterDatabaseException(msg, help=True)
|
1020
|
+
|
857
1021
|
elif isinstance(format_name_or_id, int):
|
858
|
-
|
1022
|
+
try:
|
1023
|
+
format_info = self.l_format_info[format_name_or_id]
|
1024
|
+
except IndexError:
|
1025
|
+
if return_as_list:
|
1026
|
+
return []
|
1027
|
+
raise FileConverterDatabaseException(f"Format ID '{format_name_or_id}' not recognised",
|
1028
|
+
help=True)
|
1029
|
+
|
1030
|
+
elif isinstance(format_name_or_id, FormatInfo):
|
1031
|
+
# Silently return the FormatInfo if it was used as a key here
|
1032
|
+
format_info = format_name_or_id
|
1033
|
+
|
859
1034
|
else:
|
860
1035
|
raise FileConverterDatabaseException(f"Invalid key passed to `get_format_info`: '{format_name_or_id}'"
|
861
1036
|
f" of type '{type(format_name_or_id)}'. Type must be `str` or "
|
862
1037
|
"`int`")
|
1038
|
+
if return_as_list:
|
1039
|
+
return [format_info]
|
1040
|
+
|
1041
|
+
return format_info
|
863
1042
|
|
864
1043
|
|
865
1044
|
# The database will be loaded on demand when `get_database()` is called
|
@@ -919,35 +1098,55 @@ def get_converter_info(name: str) -> ConverterInfo:
|
|
919
1098
|
return get_database().d_converter_info[name]
|
920
1099
|
|
921
1100
|
|
922
|
-
|
1101
|
+
@overload
|
1102
|
+
def get_format_info(format_name_or_id: str | int | FormatInfo,
|
1103
|
+
which: int | None = None) -> FormatInfo: ...
|
1104
|
+
|
1105
|
+
|
1106
|
+
@overload
|
1107
|
+
def get_format_info(format_name_or_id: str | int | FormatInfo,
|
1108
|
+
which: Literal["all"]) -> list[FormatInfo]: ...
|
1109
|
+
|
1110
|
+
|
1111
|
+
def get_format_info(format_name_or_id: str | int | FormatInfo,
|
1112
|
+
which: int | Literal["all"] | None = None) -> FormatInfo | list[FormatInfo]:
|
923
1113
|
"""Gets the information on a given file format stored in the database
|
924
1114
|
|
925
1115
|
Parameters
|
926
1116
|
----------
|
927
|
-
|
928
|
-
The name (extension) of the
|
1117
|
+
format_name_or_id : str | int | FormatInfo
|
1118
|
+
The name (extension) of the format, or its ID. In the case of ambiguous extensions which could apply to multiple
|
1119
|
+
formats, the ID must be used here or a FileConverterDatabaseException will be raised. This also allows passing a
|
1120
|
+
FormatInfo to this, in which case that object will be silently returned, to allow normalising the input to
|
1121
|
+
always be a FormatInfo when output from this
|
1122
|
+
which : int | None
|
1123
|
+
In the case that an extension string is provided which turns out to be ambiguous, which of the listed
|
1124
|
+
possibilities to use from the zero-indexed list. Default None, which raises an exception for an ambiguous
|
1125
|
+
format. 0 may be used to select the first in the database, which is often a good default choice. The literal
|
1126
|
+
string "all" may be used to request all possibilites, in which case this method will return a list (even if
|
1127
|
+
there are zero or one possibilities)
|
929
1128
|
|
930
1129
|
Returns
|
931
1130
|
-------
|
932
|
-
FormatInfo
|
1131
|
+
FormatInfo | list[FormatInfo]
|
933
1132
|
"""
|
934
1133
|
|
935
|
-
return get_database().
|
1134
|
+
return get_database().get_format_info(format_name_or_id, which)
|
936
1135
|
|
937
1136
|
|
938
1137
|
def get_conversion_quality(converter_name: str,
|
939
|
-
in_format: str,
|
940
|
-
out_format: str) -> ConversionQualityInfo | None:
|
1138
|
+
in_format: str | int,
|
1139
|
+
out_format: str | int) -> ConversionQualityInfo | None:
|
941
1140
|
"""Get an indication of the quality of a conversion from one format to another, or if it's not possible
|
942
1141
|
|
943
1142
|
Parameters
|
944
1143
|
----------
|
945
1144
|
converter_name : str
|
946
1145
|
The name of the converter
|
947
|
-
in_format : str
|
948
|
-
The extension of the input file format
|
949
|
-
out_format : str
|
950
|
-
The extension of the output file format
|
1146
|
+
in_format : str | int
|
1147
|
+
The extension or ID of the input file format
|
1148
|
+
out_format : str | int
|
1149
|
+
The extension or ID of the output file format
|
951
1150
|
|
952
1151
|
Returns
|
953
1152
|
-------
|
@@ -961,30 +1160,77 @@ def get_conversion_quality(converter_name: str,
|
|
961
1160
|
out_format=out_format)
|
962
1161
|
|
963
1162
|
|
964
|
-
def
|
965
|
-
|
966
|
-
"""Get a list of converters which can perform a conversion from one format to another and
|
967
|
-
|
1163
|
+
def get_possible_conversions(in_format: str | int,
|
1164
|
+
out_format: str | int) -> list[tuple[str, FormatInfo, FormatInfo]]:
|
1165
|
+
"""Get a list of converters which can perform a conversion from one format to another and disambiguate in the case
|
1166
|
+
of ambiguous input/output formats
|
968
1167
|
|
969
1168
|
Parameters
|
970
1169
|
----------
|
971
|
-
in_format : str
|
972
|
-
The extension of the input file format
|
973
|
-
out_format : str
|
974
|
-
The extension of the output file format
|
1170
|
+
in_format : str | int
|
1171
|
+
The extension or ID of the input file format
|
1172
|
+
out_format : str | int
|
1173
|
+
The extension or ID of the output file format
|
975
1174
|
|
976
1175
|
Returns
|
977
1176
|
-------
|
978
|
-
list[tuple[str,
|
979
|
-
A list of tuples, where each tuple's first item is the name of a converter which can perform
|
980
|
-
conversion,
|
1177
|
+
list[tuple[str, FormatInfo, FormatInfo]]
|
1178
|
+
A list of tuples, where each tuple's first item is the name of a converter which can perform a matching
|
1179
|
+
conversion, the second is the info of the input format for this conversion, and the third is the info of the
|
1180
|
+
output format
|
981
1181
|
"""
|
982
1182
|
|
983
|
-
return get_database().conversions_table.
|
984
|
-
|
1183
|
+
return get_database().conversions_table.get_possible_conversions(in_format=in_format,
|
1184
|
+
out_format=out_format)
|
985
1185
|
|
986
1186
|
|
987
|
-
def
|
1187
|
+
def disambiguate_formats(converter_name: str,
|
1188
|
+
in_format: str | int | FormatInfo,
|
1189
|
+
out_format: str | int | FormatInfo) -> tuple[FormatInfo, FormatInfo]:
|
1190
|
+
"""Try to disambiguate formats by seeing if there's only one possible conversion between formats matching those
|
1191
|
+
provided.
|
1192
|
+
|
1193
|
+
Parameters
|
1194
|
+
----------
|
1195
|
+
converter_name : str
|
1196
|
+
The name of the converter
|
1197
|
+
in_format : str | int
|
1198
|
+
The extension or ID of the input file format
|
1199
|
+
out_format : str | int
|
1200
|
+
The extension or ID of the output file format
|
1201
|
+
|
1202
|
+
Returns
|
1203
|
+
-------
|
1204
|
+
tuple[FormatInfo, FormatInfo]
|
1205
|
+
The input and output format for this conversion, if only one combination is possible
|
1206
|
+
|
1207
|
+
Raises
|
1208
|
+
------
|
1209
|
+
FileConverterDatabaseException
|
1210
|
+
If more than one format combination is possible for this conversion, or no conversion is possible
|
1211
|
+
"""
|
1212
|
+
|
1213
|
+
# Get all possible conversions, and see if we only have one for this converter
|
1214
|
+
l_possible_conversions = [x for x in get_possible_conversions(in_format, out_format)
|
1215
|
+
if x[0] == converter_name]
|
1216
|
+
|
1217
|
+
if len(l_possible_conversions) == 1:
|
1218
|
+
return l_possible_conversions[0][1], l_possible_conversions[0][2]
|
1219
|
+
elif len(l_possible_conversions) == 0:
|
1220
|
+
raise FileConverterDatabaseException(f"Conversion from {in_format} to {out_format} with converter "
|
1221
|
+
f"{converter_name} is not supported", help=True)
|
1222
|
+
else:
|
1223
|
+
msg = (f"Conversion from {in_format} to {out_format} with converter {converter_name} is ambiguous.\n"
|
1224
|
+
"Possible matching conversions are:\n")
|
1225
|
+
for _, possible_in_format, possible_out_format in l_possible_conversions:
|
1226
|
+
msg += (f"{possible_in_format.disambiguated_name} ({possible_in_format.note}) to "
|
1227
|
+
f"{possible_out_format.disambiguated_name} ({possible_out_format.note})\n")
|
1228
|
+
# Trim the final newline from the message
|
1229
|
+
msg = msg[:-1]
|
1230
|
+
raise FileConverterDatabaseException(msg, help=True)
|
1231
|
+
|
1232
|
+
|
1233
|
+
def get_possible_formats(converter_name: str) -> tuple[list[FormatInfo], list[FormatInfo]]:
|
988
1234
|
"""Get a list of input and output formats that a given converter supports
|
989
1235
|
|
990
1236
|
Parameters
|
@@ -994,7 +1240,7 @@ def get_possible_formats(converter_name: str) -> tuple[list[str], list[str]]:
|
|
994
1240
|
|
995
1241
|
Returns
|
996
1242
|
-------
|
997
|
-
tuple[list[
|
1243
|
+
tuple[list[FormatInfo], list[FormatInfo]]
|
998
1244
|
A tuple of a list of the supported input formats and a list of the supported output formats
|
999
1245
|
"""
|
1000
1246
|
return get_database().conversions_table.get_possible_formats(converter_name=converter_name)
|
@@ -1009,7 +1255,7 @@ def _find_arg(tl_args: tuple[list[FlagInfo], list[OptionInfo]],
|
|
1009
1255
|
if len(l_found) > 0:
|
1010
1256
|
return l_found[0]
|
1011
1257
|
# If we get here, it wasn't found in either list
|
1012
|
-
raise FileConverterDatabaseException(f"Argument {arg} was not found in the list of allowed arguments for this "
|
1258
|
+
raise FileConverterDatabaseException(f"Argument '{arg}' was not found in the list of allowed arguments for this "
|
1013
1259
|
"conversion")
|
1014
1260
|
|
1015
1261
|
|