psdi-data-conversion 0.1.7__py3-none-any.whl → 0.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- psdi_data_conversion/app.py +5 -408
- psdi_data_conversion/constants.py +12 -8
- psdi_data_conversion/converter.py +41 -28
- psdi_data_conversion/converters/base.py +18 -13
- psdi_data_conversion/database.py +292 -88
- psdi_data_conversion/gui/__init__.py +5 -0
- psdi_data_conversion/gui/accessibility.py +51 -0
- psdi_data_conversion/gui/env.py +239 -0
- psdi_data_conversion/gui/get.py +53 -0
- psdi_data_conversion/gui/post.py +176 -0
- psdi_data_conversion/gui/setup.py +102 -0
- psdi_data_conversion/main.py +70 -13
- psdi_data_conversion/static/content/convert.htm +105 -74
- psdi_data_conversion/static/content/convertato.htm +36 -26
- psdi_data_conversion/static/content/convertc2x.htm +39 -26
- psdi_data_conversion/static/content/download.htm +5 -5
- psdi_data_conversion/static/content/feedback.htm +2 -2
- psdi_data_conversion/static/content/header-links.html +2 -2
- psdi_data_conversion/static/content/index-versions/header-links.html +2 -2
- psdi_data_conversion/static/content/index-versions/psdi-common-header.html +9 -12
- psdi_data_conversion/static/content/psdi-common-header.html +9 -12
- psdi_data_conversion/static/javascript/accessibility.js +88 -61
- psdi_data_conversion/static/javascript/data.js +1 -3
- psdi_data_conversion/static/javascript/load_accessibility.js +50 -33
- psdi_data_conversion/static/styles/format.css +72 -18
- psdi_data_conversion/templates/accessibility.htm +274 -0
- psdi_data_conversion/templates/documentation.htm +6 -6
- psdi_data_conversion/templates/index.htm +73 -56
- psdi_data_conversion/{static/content → templates}/report.htm +28 -10
- psdi_data_conversion/testing/conversion_test_specs.py +26 -6
- psdi_data_conversion/testing/utils.py +6 -6
- {psdi_data_conversion-0.1.7.dist-info → psdi_data_conversion-0.2.1.dist-info}/METADATA +9 -3
- {psdi_data_conversion-0.1.7.dist-info → psdi_data_conversion-0.2.1.dist-info}/RECORD +36 -30
- psdi_data_conversion/static/content/accessibility.htm +0 -255
- {psdi_data_conversion-0.1.7.dist-info → psdi_data_conversion-0.2.1.dist-info}/WHEEL +0 -0
- {psdi_data_conversion-0.1.7.dist-info → psdi_data_conversion-0.2.1.dist-info}/entry_points.txt +0 -0
- {psdi_data_conversion-0.1.7.dist-info → psdi_data_conversion-0.2.1.dist-info}/licenses/LICENSE +0 -0
@@ -206,8 +206,8 @@ class FileConverter:
|
|
206
206
|
data: dict[str, Any] | None = None,
|
207
207
|
abort_callback: Callable[[int], None] = abort_raise,
|
208
208
|
use_envvars=False,
|
209
|
-
|
210
|
-
|
209
|
+
input_dir=const.DEFAULT_INPUT_DIR,
|
210
|
+
output_dir=const.DEFAULT_OUTPUT_DIR,
|
211
211
|
max_file_size=None,
|
212
212
|
no_check=False,
|
213
213
|
log_file: str | None = None,
|
@@ -235,9 +235,9 @@ class FileConverter:
|
|
235
235
|
use_envvars : bool
|
236
236
|
If set to True, environment variables will be checked for any that set options for this class and used,
|
237
237
|
default False
|
238
|
-
|
238
|
+
input_dir : str
|
239
239
|
The location of input files relative to the current directory
|
240
|
-
|
240
|
+
output_dir : str
|
241
241
|
The location of output files relative to the current directory
|
242
242
|
max_file_size : float
|
243
243
|
The maximum allowed file size for input/output files, in MB. If 0, will be unlimited. Default 0 (unlimited)
|
@@ -296,8 +296,8 @@ class FileConverter:
|
|
296
296
|
# Set member variables directly from input
|
297
297
|
self.in_filename = filename
|
298
298
|
self.to_format = to_format
|
299
|
-
self.
|
300
|
-
self.
|
299
|
+
self.input_dir = input_dir
|
300
|
+
self.output_dir = output_dir
|
301
301
|
self.log_file = log_file
|
302
302
|
self.log_mode = log_mode
|
303
303
|
self.log_level = log_level
|
@@ -328,17 +328,22 @@ class FileConverter:
|
|
328
328
|
self.err: str | None = None
|
329
329
|
self.quality: str | None = None
|
330
330
|
|
331
|
-
#
|
332
|
-
if not os.path.exists(self.
|
333
|
-
os.
|
331
|
+
# Determine if the filename is fully-qualified, and if not, find it in the upload dir
|
332
|
+
if not os.path.exists(self.in_filename):
|
333
|
+
qualified_in_filename = os.path.join(self.input_dir, self.in_filename)
|
334
|
+
if os.path.exists(qualified_in_filename):
|
335
|
+
self.in_filename = qualified_in_filename
|
336
|
+
else:
|
337
|
+
FileConverterInputException(f"Input file {self.in_filename} not found, either absolute or relative "
|
338
|
+
f"to {self.input_dir}")
|
334
339
|
|
335
340
|
# Create directory 'downloads' if not extant.
|
336
|
-
if not os.path.exists(self.
|
337
|
-
os.makedirs(self.
|
341
|
+
if not os.path.exists(self.output_dir):
|
342
|
+
os.makedirs(self.output_dir, exist_ok=True)
|
338
343
|
|
339
344
|
self.local_filename = os.path.split(self.in_filename)[1]
|
340
345
|
self.filename_base = os.path.splitext(self.local_filename)[0]
|
341
|
-
self.out_filename = f"{self.
|
346
|
+
self.out_filename = f"{self.output_dir}/{self.filename_base}.{self.to_format_info.name}"
|
342
347
|
|
343
348
|
# Set up files to log to
|
344
349
|
self._setup_loggers()
|
@@ -441,7 +446,7 @@ class FileConverter:
|
|
441
446
|
if self.log_mode == const.LOG_FULL_FORCE:
|
442
447
|
self.output_log = self.log_file
|
443
448
|
else:
|
444
|
-
self.output_log = os.path.join(self.
|
449
|
+
self.output_log = os.path.join(self.output_dir, f"{self.filename_base}{const.OUTPUT_LOG_EXT}")
|
445
450
|
|
446
451
|
# If any previous log exists, delete it
|
447
452
|
if os.path.exists(self.output_log):
|
psdi_data_conversion/database.py
CHANGED
@@ -9,14 +9,21 @@ from __future__ import annotations
|
|
9
9
|
|
10
10
|
import json
|
11
11
|
import os
|
12
|
+
import sys
|
13
|
+
from copy import copy
|
12
14
|
from dataclasses import dataclass, field
|
15
|
+
from functools import lru_cache
|
13
16
|
from itertools import product
|
14
17
|
from logging import getLogger
|
15
18
|
from typing import Any, Literal, overload
|
19
|
+
from warnings import catch_warnings
|
20
|
+
|
21
|
+
import igraph as ig
|
16
22
|
|
17
23
|
from psdi_data_conversion import constants as const
|
18
|
-
from psdi_data_conversion.converter import
|
19
|
-
|
24
|
+
from psdi_data_conversion.converter import (L_REGISTERED_CONVERTERS, L_SUPPORTED_CONVERTERS,
|
25
|
+
get_registered_converter_class)
|
26
|
+
from psdi_data_conversion.converters.base import FileConverter, FileConverterException
|
20
27
|
from psdi_data_conversion.utils import regularize_name
|
21
28
|
|
22
29
|
# Keys for top-level and general items in the database
|
@@ -121,17 +128,40 @@ class ConverterInfo:
|
|
121
128
|
The regularized name of the converter
|
122
129
|
parent : DataConversionDatabase
|
123
130
|
The database which this belongs to
|
131
|
+
d_single_converter_info : dict[str, int | str]
|
132
|
+
The dict within the database file which describes this converter
|
124
133
|
d_data : dict[str, Any]
|
125
134
|
The loaded database dict
|
126
135
|
"""
|
127
136
|
|
128
137
|
self.name = regularize_name(name)
|
138
|
+
"""The regularized name of the converter"""
|
139
|
+
|
140
|
+
self.converter_class: type[FileConverter]
|
141
|
+
"""The class used to perform conversions with this converter"""
|
142
|
+
|
143
|
+
self.pretty_name: str
|
144
|
+
"""The name of the converter, properly spaced and capitalized"""
|
145
|
+
|
146
|
+
try:
|
147
|
+
self.converter_class = get_registered_converter_class(self.name)
|
148
|
+
self.pretty_name = self.converter_class.name
|
149
|
+
except KeyError:
|
150
|
+
self.converter_class = None
|
151
|
+
self.pretty_name = name
|
152
|
+
|
129
153
|
self.parent = parent
|
154
|
+
"""The parent database"""
|
130
155
|
|
131
156
|
# Get info about the converter from the database
|
132
157
|
self.id: int = d_single_converter_info.get(DB_ID_KEY, -1)
|
158
|
+
"""The converter's ID"""
|
159
|
+
|
133
160
|
self.description: str = d_single_converter_info.get(DB_DESC_KEY, "")
|
161
|
+
"""A description of the converter"""
|
162
|
+
|
134
163
|
self.url: str = d_single_converter_info.get(DB_URL_KEY, "")
|
164
|
+
"""The official URL for the converter"""
|
135
165
|
|
136
166
|
# Get necessary info about the converter from the class
|
137
167
|
try:
|
@@ -403,6 +433,12 @@ class FormatInfo:
|
|
403
433
|
"""Class providing information on a file format from the PSDI Data Conversion database
|
404
434
|
"""
|
405
435
|
|
436
|
+
D_PROPERTY_ATTRS = {const.QUAL_COMP_KEY: const.QUAL_COMP_LABEL,
|
437
|
+
const.QUAL_CONN_KEY: const.QUAL_CONN_LABEL,
|
438
|
+
const.QUAL_2D_KEY: const.QUAL_2D_LABEL,
|
439
|
+
const.QUAL_3D_KEY: const.QUAL_3D_LABEL}
|
440
|
+
"""A dict of attrs of this class which describe properties that a format may or may not have"""
|
441
|
+
|
406
442
|
def __init__(self,
|
407
443
|
name: str,
|
408
444
|
parent: DataConversionDatabase,
|
@@ -448,6 +484,9 @@ class FormatInfo:
|
|
448
484
|
self.three_dim = d_single_format_info.get(DB_FORMAT_3D_KEY)
|
449
485
|
"""Whether or not this format stores 3D structural information"""
|
450
486
|
|
487
|
+
self._lower_name: str = self.name.lower()
|
488
|
+
"""The format name all in lower-case"""
|
489
|
+
|
451
490
|
self._disambiguated_name: str | None = None
|
452
491
|
|
453
492
|
@property
|
@@ -455,12 +494,13 @@ class FormatInfo:
|
|
455
494
|
"""A unique name for this format which can be used to distinguish it from others which share the same extension,
|
456
495
|
by appending the name of each with a unique index"""
|
457
496
|
if self._disambiguated_name is None:
|
458
|
-
l_formats_with_same_name = [x for x in self.parent.l_format_info
|
497
|
+
l_formats_with_same_name = [x for x in self.parent.l_format_info
|
498
|
+
if x and x._lower_name == self._lower_name]
|
459
499
|
if len(l_formats_with_same_name) == 1:
|
460
|
-
self._disambiguated_name = self.
|
500
|
+
self._disambiguated_name = self._lower_name
|
461
501
|
else:
|
462
502
|
index_of_this = [i for i, x in enumerate(l_formats_with_same_name) if self is x][0]
|
463
|
-
self._disambiguated_name = f"{self.
|
503
|
+
self._disambiguated_name = f"{self._lower_name}-{index_of_this}"
|
464
504
|
return self._disambiguated_name
|
465
505
|
|
466
506
|
def __str__(self):
|
@@ -486,7 +526,7 @@ class PropertyConversionInfo:
|
|
486
526
|
def __post_init__(self):
|
487
527
|
"""Set the label and note based on input/output status
|
488
528
|
"""
|
489
|
-
self.label =
|
529
|
+
self.label = FormatInfo.D_PROPERTY_ATTRS[self.key]
|
490
530
|
|
491
531
|
if self.input_supported is None and self.output_supported is None:
|
492
532
|
self.note = const.QUAL_NOTE_BOTH_UNKNOWN
|
@@ -584,26 +624,56 @@ class ConversionsTable:
|
|
584
624
|
# Store references to needed data
|
585
625
|
self._l_converts_to = l_converts_to
|
586
626
|
|
587
|
-
# Build the conversion
|
588
|
-
# 1-indexed, so we add 1 to each of the lengths here
|
589
|
-
num_converters = len(parent.converters)
|
627
|
+
# Build the conversion graphs - each format is a vertex, each conversion is an edge
|
590
628
|
num_formats = len(parent.formats)
|
591
629
|
|
592
|
-
|
593
|
-
|
594
|
-
|
595
|
-
|
596
|
-
|
597
|
-
|
598
|
-
|
599
|
-
|
600
|
-
|
601
|
-
|
602
|
-
|
603
|
-
|
630
|
+
l_supported_conversions = [x for x in l_converts_to if
|
631
|
+
self.parent.get_converter_info(x[DB_CONV_ID_KEY]).name in L_SUPPORTED_CONVERTERS]
|
632
|
+
l_registered_conversions = [x for x in l_supported_conversions if
|
633
|
+
self.parent.get_converter_info(x[DB_CONV_ID_KEY]).name in L_REGISTERED_CONVERTERS]
|
634
|
+
|
635
|
+
# We make separate graphs for all known conversions, all supported conversions, and all registered conversions
|
636
|
+
self.graph: ig.Graph
|
637
|
+
self.supported_graph: ig.Graph
|
638
|
+
self.registered_graph: ig.Graph
|
639
|
+
|
640
|
+
for support_type, l_conversions in (("", l_converts_to),
|
641
|
+
("supported_", l_supported_conversions),
|
642
|
+
("registered_", l_registered_conversions)):
|
643
|
+
|
644
|
+
setattr(self, support_type+"graph",
|
645
|
+
ig.Graph(n=num_formats,
|
646
|
+
directed=True,
|
647
|
+
# Each vertex stores the disambiguated name of the format
|
648
|
+
vertex_attrs={DB_NAME_KEY: [x.disambiguated_name if x is not None else None
|
649
|
+
for x in parent.l_format_info]},
|
650
|
+
edges=[(x[DB_IN_ID_KEY], x[DB_OUT_ID_KEY]) for x in l_conversions],
|
651
|
+
# Each edge stores the id and name of the converter used for the conversion
|
652
|
+
edge_attrs={DB_CONV_ID_KEY: [x[DB_CONV_ID_KEY] for x in l_conversions],
|
653
|
+
DB_NAME_KEY: [self.parent.get_converter_info(x[DB_CONV_ID_KEY]).name
|
654
|
+
for x in l_conversions]}))
|
655
|
+
|
656
|
+
def _get_desired_graph(self,
|
657
|
+
only: Literal["all"] | Literal["supported"] | Literal["registered"] = "all") -> ig.Graph:
|
658
|
+
if only == "all":
|
659
|
+
return self.graph
|
660
|
+
elif only == "supported":
|
661
|
+
return self.supported_graph
|
662
|
+
elif only == "registered":
|
663
|
+
return self.registered_graph
|
664
|
+
else:
|
665
|
+
raise ValueError(f"Invalid value \"{only}\" for keyword argument `only`. Allowed values are \"all\" "
|
666
|
+
"(default), \"supported\", and \"registered\".")
|
604
667
|
|
605
|
-
|
668
|
+
def _get_possible_converters(self, in_format_info: FormatInfo, out_format_info: FormatInfo,
|
669
|
+
only: Literal["all"] | Literal["supported"] | Literal["registered"] = "all"):
|
670
|
+
"""Get a list of all converters which can convert from one format to another
|
671
|
+
"""
|
672
|
+
graph = self._get_desired_graph(only)
|
673
|
+
l_edges = graph.es.select(_source=in_format_info.id, _target=out_format_info.id)
|
674
|
+
return [x[DB_NAME_KEY] for x in l_edges]
|
606
675
|
|
676
|
+
@lru_cache(maxsize=None)
|
607
677
|
def get_conversion_quality(self,
|
608
678
|
converter_name: str,
|
609
679
|
in_format: str | int,
|
@@ -633,14 +703,12 @@ class ConversionsTable:
|
|
633
703
|
else:
|
634
704
|
which_format = 0
|
635
705
|
|
636
|
-
# Get
|
637
|
-
|
638
|
-
|
639
|
-
out_info: int = self.parent.get_format_info(out_format, which_format)
|
706
|
+
# Get the full format info for each format
|
707
|
+
in_format_info = self.parent.get_format_info(in_format, which_format)
|
708
|
+
out_format_info: int = self.parent.get_format_info(out_format, which_format)
|
640
709
|
|
641
710
|
# First check if the conversion is possible
|
642
|
-
|
643
|
-
if not success_flag:
|
711
|
+
if converter_name not in self._get_possible_converters(in_format_info, out_format_info):
|
644
712
|
return None
|
645
713
|
|
646
714
|
# The conversion is possible. Now determine how many properties of the output format are not in the input
|
@@ -649,9 +717,9 @@ class ConversionsTable:
|
|
649
717
|
num_new_props = 0
|
650
718
|
any_unknown = False
|
651
719
|
d_prop_conversion_info: dict[str, PropertyConversionInfo] = {}
|
652
|
-
for prop in
|
653
|
-
in_prop: bool | None = getattr(
|
654
|
-
out_prop: bool | None = getattr(
|
720
|
+
for prop in FormatInfo.D_PROPERTY_ATTRS:
|
721
|
+
in_prop: bool | None = getattr(in_format_info, prop)
|
722
|
+
out_prop: bool | None = getattr(out_format_info, prop)
|
655
723
|
|
656
724
|
d_prop_conversion_info[prop] = PropertyConversionInfo(prop, in_prop, out_prop)
|
657
725
|
|
@@ -699,7 +767,9 @@ class ConversionsTable:
|
|
699
767
|
|
700
768
|
def get_possible_conversions(self,
|
701
769
|
in_format: str | int,
|
702
|
-
out_format: str | int
|
770
|
+
out_format: str | int,
|
771
|
+
only: Literal["all"] | Literal["supported"] | Literal["registered"] = "all"
|
772
|
+
) -> list[tuple[ConverterInfo, FormatInfo, FormatInfo]]:
|
703
773
|
"""Get a list of converters which can perform a conversion from one format to another, disambiguating in the
|
704
774
|
case of ambiguous formats and providing IDs for input/output formats for possible conversions
|
705
775
|
|
@@ -712,10 +782,10 @@ class ConversionsTable:
|
|
712
782
|
|
713
783
|
Returns
|
714
784
|
-------
|
715
|
-
list[tuple[
|
716
|
-
A list of tuples, where each tuple's first item is the
|
717
|
-
conversion, the second is the info of the input format for this conversion, and the third is the
|
718
|
-
output format
|
785
|
+
list[tuple[ConverterInfo, FormatInfo, FormatInfo]]
|
786
|
+
A list of tuples, where each tuple's first item is the ConverterInfo of a converter which can perform a
|
787
|
+
matching conversion, the second is the info of the input format for this conversion, and the third is the
|
788
|
+
info of the output format
|
719
789
|
"""
|
720
790
|
l_in_format_infos = self.parent.get_format_info(in_format, which="all")
|
721
791
|
l_out_format_infos = self.parent.get_format_info(out_format, which="all")
|
@@ -726,20 +796,113 @@ class ConversionsTable:
|
|
726
796
|
# Iterate over all possible combinations of input and output formats
|
727
797
|
for in_format_info, out_format_info in product(l_in_format_infos, l_out_format_infos):
|
728
798
|
|
729
|
-
#
|
730
|
-
|
731
|
-
|
732
|
-
# Filter for possible conversions and get the converter name and degree-of-success string
|
733
|
-
# for each possible conversion
|
734
|
-
l_converter_names = [self.parent.get_converter_info(converter_id).name
|
735
|
-
for converter_id, possible_flag
|
736
|
-
in enumerate(l_converter_success) if possible_flag > 0]
|
799
|
+
# Filter for converters which can perform this conversion
|
800
|
+
l_converter_names = self._get_possible_converters(in_format_info, out_format_info, only=only)
|
737
801
|
|
738
802
|
for converter_name in l_converter_names:
|
739
|
-
l_possible_conversions.append((converter_name,
|
803
|
+
l_possible_conversions.append((self.parent.get_converter_info(converter_name),
|
804
|
+
in_format_info, out_format_info))
|
740
805
|
|
741
806
|
return l_possible_conversions
|
742
807
|
|
808
|
+
@lru_cache
|
809
|
+
def _get_shared_attrs(self, source_format, target_format):
|
810
|
+
"""Get a list of attributes that both the source and target format feature
|
811
|
+
"""
|
812
|
+
source_format_info = self.parent.get_format_info(source_format)
|
813
|
+
target_format_info = self.parent.get_format_info(target_format)
|
814
|
+
|
815
|
+
l_shared_attrs: list[str] = []
|
816
|
+
|
817
|
+
for attr in FormatInfo.D_PROPERTY_ATTRS:
|
818
|
+
if getattr(source_format_info, attr) and getattr(target_format_info, attr):
|
819
|
+
l_shared_attrs.append(attr)
|
820
|
+
|
821
|
+
return l_shared_attrs
|
822
|
+
|
823
|
+
def _get_info_loss(self, path):
|
824
|
+
"""Get the number of attributes in both the first and last format which would be lost if a conversion path
|
825
|
+
is traversed
|
826
|
+
"""
|
827
|
+
l_shared_attrs = self._get_shared_attrs(path[0], path[-1])
|
828
|
+
|
829
|
+
if len(l_shared_attrs) == 0:
|
830
|
+
return 0
|
831
|
+
|
832
|
+
l_kept_attrs = copy(l_shared_attrs)
|
833
|
+
for i in range(len(path)-1):
|
834
|
+
target_format_info = self.parent.get_format_info(i+1)
|
835
|
+
|
836
|
+
# Check if each attr still in the shared list is kept here
|
837
|
+
for attr in l_kept_attrs:
|
838
|
+
if not getattr(target_format_info, attr):
|
839
|
+
l_kept_attrs.remove(attr)
|
840
|
+
if len(l_kept_attrs) == 0:
|
841
|
+
break
|
842
|
+
|
843
|
+
num_lost_attrs = len(l_shared_attrs) - len(l_kept_attrs)
|
844
|
+
|
845
|
+
return num_lost_attrs
|
846
|
+
|
847
|
+
def get_conversion_pathway(self,
|
848
|
+
in_format: str | int | FormatInfo,
|
849
|
+
out_format: str | int | FormatInfo,
|
850
|
+
only: Literal["all"] | Literal["supported"] | Literal["registered"] = "all"
|
851
|
+
) -> list[tuple[ConverterInfo, FormatInfo, FormatInfo]] | None:
|
852
|
+
"""Gets a pathway to convert from one format to another
|
853
|
+
"""
|
854
|
+
|
855
|
+
in_format_info = self.parent.get_format_info(in_format)
|
856
|
+
out_format_info = self.parent.get_format_info(out_format)
|
857
|
+
|
858
|
+
# Check if the formats are the same
|
859
|
+
if in_format_info is out_format_info:
|
860
|
+
return None
|
861
|
+
|
862
|
+
# First check if direct conversion is possible
|
863
|
+
l_possible_direct_conversions = self.get_possible_conversions(in_format=in_format, out_format=out_format)
|
864
|
+
if l_possible_direct_conversions:
|
865
|
+
# TODO: When there's some better measure of conversion quality, use it to choose which converter to use
|
866
|
+
return [l_possible_direct_conversions[0]]
|
867
|
+
|
868
|
+
graph: ig.Graph = self._get_desired_graph(only)
|
869
|
+
|
870
|
+
# Query the graph for the shortest paths to perform this conversion. If no conversions are possible, igraph
|
871
|
+
# will print a warning, which we catch and suppress here
|
872
|
+
with catch_warnings(record=True) as l_warnings:
|
873
|
+
l_paths: list[list[int]] = graph.get_shortest_paths(in_format_info.id, to=out_format_info.id)
|
874
|
+
for warning in l_warnings:
|
875
|
+
if "Couldn't reach some vertices" not in str(warning.message):
|
876
|
+
print(warning, file=sys.stderr)
|
877
|
+
|
878
|
+
# Check if any paths are possible
|
879
|
+
if not l_paths or not l_paths[0]:
|
880
|
+
return None
|
881
|
+
|
882
|
+
# Check each path to find the first which doesn't lose any unnecessary info, or else the one which loses the
|
883
|
+
# least
|
884
|
+
best_path: list[int] | None = None
|
885
|
+
best_info_loss: int | None = None
|
886
|
+
for path in l_paths:
|
887
|
+
info_loss = self._get_info_loss(path)
|
888
|
+
if best_info_loss is None or info_loss < best_info_loss:
|
889
|
+
best_path = path
|
890
|
+
best_info_loss = info_loss
|
891
|
+
if best_info_loss == 0:
|
892
|
+
break
|
893
|
+
|
894
|
+
# Output the best path in the desired format
|
895
|
+
l_steps: list[tuple[str, FormatInfo, FormatInfo]] = []
|
896
|
+
for i in range(len(best_path)-1):
|
897
|
+
source_id: int = best_path[i]
|
898
|
+
target_id: int = best_path[i+1]
|
899
|
+
converter_name: str = graph.es.select(_source=source_id, _target=target_id)[0][DB_NAME_KEY]
|
900
|
+
l_steps.append((get_converter_info(converter_name),
|
901
|
+
self.parent.get_format_info(source_id),
|
902
|
+
self.parent.get_format_info(target_id)))
|
903
|
+
|
904
|
+
return l_steps
|
905
|
+
|
743
906
|
def get_possible_formats(self, converter_name: str) -> tuple[list[FormatInfo], list[FormatInfo]]:
|
744
907
|
"""Get a list of input and output formats that a given converter supports
|
745
908
|
|
@@ -754,21 +917,10 @@ class ConversionsTable:
|
|
754
917
|
A tuple of a list of the supported input formats and a list of the supported output formats
|
755
918
|
"""
|
756
919
|
conv_id: int = self.parent.get_converter_info(converter_name).id
|
757
|
-
|
758
|
-
|
759
|
-
|
760
|
-
|
761
|
-
# check them for possible output formats
|
762
|
-
(l_possible_in_format_ids,
|
763
|
-
ll_filtered_in_out_format_success) = zip(*[(i, l_out_format_success) for i, l_out_format_success
|
764
|
-
in enumerate(ll_in_out_format_success)
|
765
|
-
if sum(l_out_format_success) > 0])
|
766
|
-
|
767
|
-
# As with input IDs, filter for output IDs where at least one input format has a degree of success index greater
|
768
|
-
# than 0. A bit more complicated for the second index, forcing us to do list comprehension to fetch a list
|
769
|
-
# across the table before summing
|
770
|
-
l_possible_out_format_ids = [j for j, _ in enumerate(ll_filtered_in_out_format_success[0]) if
|
771
|
-
sum([x[j] for x in ll_filtered_in_out_format_success]) > 0]
|
920
|
+
|
921
|
+
l_conversion_edges = self.graph.es.select(**{DB_CONV_ID_KEY: conv_id})
|
922
|
+
l_possible_in_format_ids = list({x.source for x in l_conversion_edges})
|
923
|
+
l_possible_out_format_ids = list({x.target for x in l_conversion_edges})
|
772
924
|
|
773
925
|
# Get the name for each format ID, and return lists of the names
|
774
926
|
return ([self.parent.get_format_info(x) for x in l_possible_in_format_ids],
|
@@ -875,9 +1027,9 @@ class DataConversionDatabase:
|
|
875
1027
|
self._l_format_info: list[FormatInfo | None] = [None] * (max_id+1)
|
876
1028
|
|
877
1029
|
for d_single_format_info in self.formats:
|
878
|
-
|
1030
|
+
lc_name: str = d_single_format_info[DB_FORMAT_EXT_KEY]
|
879
1031
|
|
880
|
-
format_info = FormatInfo(name=
|
1032
|
+
format_info = FormatInfo(name=lc_name,
|
881
1033
|
parent=self,
|
882
1034
|
d_single_format_info=d_single_format_info)
|
883
1035
|
|
@@ -887,24 +1039,17 @@ class DataConversionDatabase:
|
|
887
1039
|
self._conversions_table = ConversionsTable(l_converts_to=self.converts_to,
|
888
1040
|
parent=self)
|
889
1041
|
|
890
|
-
# Use the conversions
|
1042
|
+
# Use the conversions graph to prune any formats which have no valid conversions
|
891
1043
|
|
892
1044
|
# Get a slice of the table which only includes supported converters
|
893
|
-
|
894
|
-
supported_table = [self._conversions_table.table[x] for x in l_supported_converter_ids]
|
1045
|
+
supported_graph = self._conversions_table.supported_graph
|
895
1046
|
|
896
1047
|
for format_id, format_info in enumerate(self._l_format_info):
|
897
1048
|
if not format_info:
|
898
1049
|
continue
|
899
1050
|
|
900
|
-
# Check if the format is supported as the input format for any conversion
|
901
|
-
|
902
|
-
if sum([sum(x) for x in ll_possible_from_conversions]) > 0:
|
903
|
-
continue
|
904
|
-
|
905
|
-
# Check if the format is supported as the output format for any conversion
|
906
|
-
ll_possible_to_conversions = [[y[format_id] for y in x] for x in supported_table]
|
907
|
-
if sum([sum(x) for x in ll_possible_to_conversions]) > 0:
|
1051
|
+
# Check if the format is supported as the input or output format for any conversion
|
1052
|
+
if supported_graph.degree(format_id) > 0:
|
908
1053
|
continue
|
909
1054
|
|
910
1055
|
# If we get here, the format isn't supported for any conversions, so remove it from our list
|
@@ -918,14 +1063,14 @@ class DataConversionDatabase:
|
|
918
1063
|
if not format_info:
|
919
1064
|
continue
|
920
1065
|
|
921
|
-
|
1066
|
+
lc_name = format_info.name.lower()
|
922
1067
|
|
923
1068
|
# Each name may correspond to multiple formats, so we use a list for each entry to list all possible
|
924
1069
|
# formats for each name
|
925
|
-
if
|
926
|
-
self._d_format_info[
|
1070
|
+
if lc_name not in self._d_format_info:
|
1071
|
+
self._d_format_info[lc_name] = []
|
927
1072
|
|
928
|
-
self._d_format_info[
|
1073
|
+
self._d_format_info[lc_name].append(format_info)
|
929
1074
|
|
930
1075
|
def get_converter_info(self, converter_name_or_id: str | int) -> ConverterInfo:
|
931
1076
|
"""Get a converter's info from either its name or ID
|
@@ -987,6 +1132,9 @@ class DataConversionDatabase:
|
|
987
1132
|
if format_name_or_id.startswith("."):
|
988
1133
|
format_name_or_id = format_name_or_id[1:]
|
989
1134
|
|
1135
|
+
# Convert the format name to lower-case to handle it case-insensitively
|
1136
|
+
format_name_or_id = format_name_or_id.lower()
|
1137
|
+
|
990
1138
|
# Check for a hyphen in the format, which indicates a preference from the user as to which, overriding the
|
991
1139
|
# `which` kwarg
|
992
1140
|
if "-" in format_name_or_id:
|
@@ -1050,6 +1198,22 @@ class DataConversionDatabase:
|
|
1050
1198
|
_database: DataConversionDatabase | None = None
|
1051
1199
|
|
1052
1200
|
|
1201
|
+
def get_database_path() -> str:
|
1202
|
+
"""Get the absolute path to the database file
|
1203
|
+
|
1204
|
+
Returns
|
1205
|
+
-------
|
1206
|
+
str
|
1207
|
+
"""
|
1208
|
+
|
1209
|
+
# For an interactive shell, __file__ won't be defined for this module, so use the constants module instead
|
1210
|
+
reference_file = os.path.realpath(const.__file__)
|
1211
|
+
|
1212
|
+
qualified_database_filename = os.path.join(os.path.dirname(reference_file), const.DATABASE_FILENAME)
|
1213
|
+
|
1214
|
+
return qualified_database_filename
|
1215
|
+
|
1216
|
+
|
1053
1217
|
def load_database() -> DataConversionDatabase:
|
1054
1218
|
"""Load and return a new instance of the data conversion database from the JSON database file in this package. This
|
1055
1219
|
function should not be called directly unless you specifically need a new instance of the database object and can't
|
@@ -1061,12 +1225,7 @@ def load_database() -> DataConversionDatabase:
|
|
1061
1225
|
"""
|
1062
1226
|
|
1063
1227
|
# Find and load the database JSON file
|
1064
|
-
|
1065
|
-
# For an interactive shell, __file__ won't be defined for this module, so use the constants module instead
|
1066
|
-
reference_file = os.path.realpath(const.__file__)
|
1067
|
-
|
1068
|
-
qualified_database_filename = os.path.join(os.path.dirname(reference_file), const.DATABASE_FILENAME)
|
1069
|
-
d_data: dict = json.load(open(qualified_database_filename, "r"))
|
1228
|
+
d_data: dict = json.load(open(get_database_path(), "r"))
|
1070
1229
|
|
1071
1230
|
return DataConversionDatabase(d_data)
|
1072
1231
|
|
@@ -1166,7 +1325,7 @@ def get_conversion_quality(converter_name: str,
|
|
1166
1325
|
|
1167
1326
|
|
1168
1327
|
def get_possible_conversions(in_format: str | int,
|
1169
|
-
out_format: str | int) -> list[tuple[
|
1328
|
+
out_format: str | int) -> list[tuple[ConverterInfo, FormatInfo, FormatInfo]]:
|
1170
1329
|
"""Get a list of converters which can perform a conversion from one format to another and disambiguate in the case
|
1171
1330
|
of ambiguous input/output formats
|
1172
1331
|
|
@@ -1179,8 +1338,8 @@ def get_possible_conversions(in_format: str | int,
|
|
1179
1338
|
|
1180
1339
|
Returns
|
1181
1340
|
-------
|
1182
|
-
list[tuple[
|
1183
|
-
A list of tuples, where each tuple's first item is the
|
1341
|
+
list[tuple[ConverterInfo, FormatInfo, FormatInfo]]
|
1342
|
+
A list of tuples, where each tuple's first item is the ConverterInfo of a converter which can perform a matching
|
1184
1343
|
conversion, the second is the info of the input format for this conversion, and the third is the info of the
|
1185
1344
|
output format
|
1186
1345
|
"""
|
@@ -1189,6 +1348,51 @@ def get_possible_conversions(in_format: str | int,
|
|
1189
1348
|
out_format=out_format)
|
1190
1349
|
|
1191
1350
|
|
1351
|
+
def get_conversion_pathway(in_format: str | int | FormatInfo,
|
1352
|
+
out_format: str | int | FormatInfo,
|
1353
|
+
only: Literal["all"] | Literal["supported"] | Literal["registered"] = "all"
|
1354
|
+
) -> list[tuple[ConverterInfo, FormatInfo, FormatInfo]] | None:
|
1355
|
+
"""Get a list of conversions that can be performed to convert one format to another. This is primarily used when a
|
1356
|
+
direct conversion is not supported by any individual converter. Only one possible pathway will be returned,
|
1357
|
+
prioritising pathways which do not lose lose and then re-extrapolate any information stored by some formats and not
|
1358
|
+
others along the path.
|
1359
|
+
|
1360
|
+
Parameters
|
1361
|
+
----------
|
1362
|
+
in_format : str | int
|
1363
|
+
The input file format. For this function, the format must be defined uniquely, either by using a disambiguated
|
1364
|
+
extension, ID, or FormatInfo
|
1365
|
+
out_format : str | int
|
1366
|
+
The output file format. For this function, the format must be defined uniquely, either by using a disambiguated
|
1367
|
+
extension, ID, or FormatInfo
|
1368
|
+
only : Literal["all"] | Literal["supported"] | Literal["registered"], optional
|
1369
|
+
Which converters to limit the pathway search to:
|
1370
|
+
- "all" (default): All known converters
|
1371
|
+
- "supported": Only converters supported by this utility, even if not currently available (e.g. they don't work
|
1372
|
+
on your OS)
|
1373
|
+
- "registered": Only converters supported by this utility and currently available
|
1374
|
+
|
1375
|
+
Returns
|
1376
|
+
-------
|
1377
|
+
list[tuple[ConverterInfo, FormatInfo, FormatInfo]] | None
|
1378
|
+
Will return `None` if no conversion pathway is possible or if the input and output formats are the same.
|
1379
|
+
Otherwise, will return a list of steps in the pathway, each being a tuple of:
|
1380
|
+
|
1381
|
+
converter_info : ConverterInfo
|
1382
|
+
Info on the converter used to perform this step
|
1383
|
+
in_format : FormatInfo
|
1384
|
+
Input format for this step (if the first step, will be the input format to this function, otherwise will be
|
1385
|
+
the output format of the previous step)
|
1386
|
+
out_format : FormatInfo
|
1387
|
+
Output format from this step (if the last step, will be the output format for this function, otherwise will
|
1388
|
+
be the input format of the next step)
|
1389
|
+
"""
|
1390
|
+
|
1391
|
+
return get_database().conversions_table.get_conversion_pathway(in_format=in_format,
|
1392
|
+
out_format=out_format,
|
1393
|
+
only=only)
|
1394
|
+
|
1395
|
+
|
1192
1396
|
def disambiguate_formats(converter_name: str,
|
1193
1397
|
in_format: str | int | FormatInfo,
|
1194
1398
|
out_format: str | int | FormatInfo) -> tuple[FormatInfo, FormatInfo]:
|
@@ -1216,11 +1420,11 @@ def disambiguate_formats(converter_name: str,
|
|
1216
1420
|
"""
|
1217
1421
|
|
1218
1422
|
# Regularize the converter name so we don't worry about case/spacing mismatches
|
1219
|
-
|
1423
|
+
converter_reg_name = regularize_name(converter_name)
|
1220
1424
|
|
1221
1425
|
# Get all possible conversions, and see if we only have one for this converter
|
1222
1426
|
l_possible_conversions = [x for x in get_possible_conversions(in_format, out_format)
|
1223
|
-
if x[0] ==
|
1427
|
+
if x[0].name == converter_reg_name]
|
1224
1428
|
|
1225
1429
|
if len(l_possible_conversions) == 1:
|
1226
1430
|
return l_possible_conversions[0][1], l_possible_conversions[0][2]
|