psdi-data-conversion 0.1.7__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- psdi_data_conversion/app.py +5 -408
- psdi_data_conversion/constants.py +11 -7
- psdi_data_conversion/converter.py +41 -28
- psdi_data_conversion/converters/base.py +18 -13
- psdi_data_conversion/database.py +284 -88
- psdi_data_conversion/gui/__init__.py +5 -0
- psdi_data_conversion/gui/accessibility.py +51 -0
- psdi_data_conversion/gui/env.py +239 -0
- psdi_data_conversion/gui/get.py +53 -0
- psdi_data_conversion/gui/post.py +176 -0
- psdi_data_conversion/gui/setup.py +102 -0
- psdi_data_conversion/main.py +70 -13
- psdi_data_conversion/static/content/convert.htm +105 -74
- psdi_data_conversion/static/content/convertato.htm +36 -26
- psdi_data_conversion/static/content/convertc2x.htm +39 -26
- psdi_data_conversion/static/content/download.htm +5 -5
- psdi_data_conversion/static/content/feedback.htm +2 -2
- psdi_data_conversion/static/content/header-links.html +2 -2
- psdi_data_conversion/static/content/index-versions/header-links.html +2 -2
- psdi_data_conversion/static/content/index-versions/psdi-common-header.html +9 -12
- psdi_data_conversion/static/content/psdi-common-header.html +9 -12
- psdi_data_conversion/static/javascript/accessibility.js +88 -61
- psdi_data_conversion/static/javascript/data.js +1 -3
- psdi_data_conversion/static/javascript/load_accessibility.js +50 -33
- psdi_data_conversion/static/styles/format.css +72 -18
- psdi_data_conversion/templates/accessibility.htm +274 -0
- psdi_data_conversion/templates/documentation.htm +6 -6
- psdi_data_conversion/templates/index.htm +73 -56
- psdi_data_conversion/{static/content → templates}/report.htm +28 -10
- psdi_data_conversion/testing/conversion_test_specs.py +26 -6
- psdi_data_conversion/testing/utils.py +6 -6
- {psdi_data_conversion-0.1.7.dist-info → psdi_data_conversion-0.2.0.dist-info}/METADATA +6 -2
- {psdi_data_conversion-0.1.7.dist-info → psdi_data_conversion-0.2.0.dist-info}/RECORD +36 -30
- psdi_data_conversion/static/content/accessibility.htm +0 -255
- {psdi_data_conversion-0.1.7.dist-info → psdi_data_conversion-0.2.0.dist-info}/WHEEL +0 -0
- {psdi_data_conversion-0.1.7.dist-info → psdi_data_conversion-0.2.0.dist-info}/entry_points.txt +0 -0
- {psdi_data_conversion-0.1.7.dist-info → psdi_data_conversion-0.2.0.dist-info}/licenses/LICENSE +0 -0
@@ -206,8 +206,8 @@ class FileConverter:
|
|
206
206
|
data: dict[str, Any] | None = None,
|
207
207
|
abort_callback: Callable[[int], None] = abort_raise,
|
208
208
|
use_envvars=False,
|
209
|
-
|
210
|
-
|
209
|
+
input_dir=const.DEFAULT_INPUT_DIR,
|
210
|
+
output_dir=const.DEFAULT_OUTPUT_DIR,
|
211
211
|
max_file_size=None,
|
212
212
|
no_check=False,
|
213
213
|
log_file: str | None = None,
|
@@ -235,9 +235,9 @@ class FileConverter:
|
|
235
235
|
use_envvars : bool
|
236
236
|
If set to True, environment variables will be checked for any that set options for this class and used,
|
237
237
|
default False
|
238
|
-
|
238
|
+
input_dir : str
|
239
239
|
The location of input files relative to the current directory
|
240
|
-
|
240
|
+
output_dir : str
|
241
241
|
The location of output files relative to the current directory
|
242
242
|
max_file_size : float
|
243
243
|
The maximum allowed file size for input/output files, in MB. If 0, will be unlimited. Default 0 (unlimited)
|
@@ -296,8 +296,8 @@ class FileConverter:
|
|
296
296
|
# Set member variables directly from input
|
297
297
|
self.in_filename = filename
|
298
298
|
self.to_format = to_format
|
299
|
-
self.
|
300
|
-
self.
|
299
|
+
self.input_dir = input_dir
|
300
|
+
self.output_dir = output_dir
|
301
301
|
self.log_file = log_file
|
302
302
|
self.log_mode = log_mode
|
303
303
|
self.log_level = log_level
|
@@ -328,17 +328,22 @@ class FileConverter:
|
|
328
328
|
self.err: str | None = None
|
329
329
|
self.quality: str | None = None
|
330
330
|
|
331
|
-
#
|
332
|
-
if not os.path.exists(self.
|
333
|
-
os.
|
331
|
+
# Determine if the filename is fully-qualified, and if not, find it in the upload dir
|
332
|
+
if not os.path.exists(self.in_filename):
|
333
|
+
qualified_in_filename = os.path.join(self.input_dir, self.in_filename)
|
334
|
+
if os.path.exists(qualified_in_filename):
|
335
|
+
self.in_filename = qualified_in_filename
|
336
|
+
else:
|
337
|
+
FileConverterInputException(f"Input file {self.in_filename} not found, either absolute or relative "
|
338
|
+
f"to {self.input_dir}")
|
334
339
|
|
335
340
|
# Create directory 'downloads' if not extant.
|
336
|
-
if not os.path.exists(self.
|
337
|
-
os.makedirs(self.
|
341
|
+
if not os.path.exists(self.output_dir):
|
342
|
+
os.makedirs(self.output_dir, exist_ok=True)
|
338
343
|
|
339
344
|
self.local_filename = os.path.split(self.in_filename)[1]
|
340
345
|
self.filename_base = os.path.splitext(self.local_filename)[0]
|
341
|
-
self.out_filename = f"{self.
|
346
|
+
self.out_filename = f"{self.output_dir}/{self.filename_base}.{self.to_format_info.name}"
|
342
347
|
|
343
348
|
# Set up files to log to
|
344
349
|
self._setup_loggers()
|
@@ -441,7 +446,7 @@ class FileConverter:
|
|
441
446
|
if self.log_mode == const.LOG_FULL_FORCE:
|
442
447
|
self.output_log = self.log_file
|
443
448
|
else:
|
444
|
-
self.output_log = os.path.join(self.
|
449
|
+
self.output_log = os.path.join(self.output_dir, f"{self.filename_base}{const.OUTPUT_LOG_EXT}")
|
445
450
|
|
446
451
|
# If any previous log exists, delete it
|
447
452
|
if os.path.exists(self.output_log):
|
psdi_data_conversion/database.py
CHANGED
@@ -9,14 +9,19 @@ from __future__ import annotations
|
|
9
9
|
|
10
10
|
import json
|
11
11
|
import os
|
12
|
+
from copy import copy
|
12
13
|
from dataclasses import dataclass, field
|
14
|
+
from functools import lru_cache
|
13
15
|
from itertools import product
|
14
16
|
from logging import getLogger
|
15
17
|
from typing import Any, Literal, overload
|
16
18
|
|
19
|
+
import igraph as ig
|
20
|
+
|
17
21
|
from psdi_data_conversion import constants as const
|
18
|
-
from psdi_data_conversion.converter import
|
19
|
-
|
22
|
+
from psdi_data_conversion.converter import (L_REGISTERED_CONVERTERS, L_SUPPORTED_CONVERTERS,
|
23
|
+
get_registered_converter_class)
|
24
|
+
from psdi_data_conversion.converters.base import FileConverter, FileConverterException
|
20
25
|
from psdi_data_conversion.utils import regularize_name
|
21
26
|
|
22
27
|
# Keys for top-level and general items in the database
|
@@ -121,17 +126,40 @@ class ConverterInfo:
|
|
121
126
|
The regularized name of the converter
|
122
127
|
parent : DataConversionDatabase
|
123
128
|
The database which this belongs to
|
129
|
+
d_single_converter_info : dict[str, int | str]
|
130
|
+
The dict within the database file which describes this converter
|
124
131
|
d_data : dict[str, Any]
|
125
132
|
The loaded database dict
|
126
133
|
"""
|
127
134
|
|
128
135
|
self.name = regularize_name(name)
|
136
|
+
"""The regularized name of the converter"""
|
137
|
+
|
138
|
+
self.converter_class: type[FileConverter]
|
139
|
+
"""The class used to perform conversions with this converter"""
|
140
|
+
|
141
|
+
self.pretty_name: str
|
142
|
+
"""The name of the converter, properly spaced and capitalized"""
|
143
|
+
|
144
|
+
try:
|
145
|
+
self.converter_class = get_registered_converter_class(self.name)
|
146
|
+
self.pretty_name = self.converter_class.name
|
147
|
+
except KeyError:
|
148
|
+
self.converter_class = None
|
149
|
+
self.pretty_name = name
|
150
|
+
|
129
151
|
self.parent = parent
|
152
|
+
"""The parent database"""
|
130
153
|
|
131
154
|
# Get info about the converter from the database
|
132
155
|
self.id: int = d_single_converter_info.get(DB_ID_KEY, -1)
|
156
|
+
"""The converter's ID"""
|
157
|
+
|
133
158
|
self.description: str = d_single_converter_info.get(DB_DESC_KEY, "")
|
159
|
+
"""A description of the converter"""
|
160
|
+
|
134
161
|
self.url: str = d_single_converter_info.get(DB_URL_KEY, "")
|
162
|
+
"""The official URL for the converter"""
|
135
163
|
|
136
164
|
# Get necessary info about the converter from the class
|
137
165
|
try:
|
@@ -403,6 +431,12 @@ class FormatInfo:
|
|
403
431
|
"""Class providing information on a file format from the PSDI Data Conversion database
|
404
432
|
"""
|
405
433
|
|
434
|
+
D_PROPERTY_ATTRS = {const.QUAL_COMP_KEY: const.QUAL_COMP_LABEL,
|
435
|
+
const.QUAL_CONN_KEY: const.QUAL_CONN_LABEL,
|
436
|
+
const.QUAL_2D_KEY: const.QUAL_2D_LABEL,
|
437
|
+
const.QUAL_3D_KEY: const.QUAL_3D_LABEL}
|
438
|
+
"""A dict of attrs of this class which describe properties that a format may or may not have"""
|
439
|
+
|
406
440
|
def __init__(self,
|
407
441
|
name: str,
|
408
442
|
parent: DataConversionDatabase,
|
@@ -448,6 +482,9 @@ class FormatInfo:
|
|
448
482
|
self.three_dim = d_single_format_info.get(DB_FORMAT_3D_KEY)
|
449
483
|
"""Whether or not this format stores 3D structural information"""
|
450
484
|
|
485
|
+
self._lower_name: str = self.name.lower()
|
486
|
+
"""The format name all in lower-case"""
|
487
|
+
|
451
488
|
self._disambiguated_name: str | None = None
|
452
489
|
|
453
490
|
@property
|
@@ -455,12 +492,13 @@ class FormatInfo:
|
|
455
492
|
"""A unique name for this format which can be used to distinguish it from others which share the same extension,
|
456
493
|
by appending the name of each with a unique index"""
|
457
494
|
if self._disambiguated_name is None:
|
458
|
-
l_formats_with_same_name = [x for x in self.parent.l_format_info
|
495
|
+
l_formats_with_same_name = [x for x in self.parent.l_format_info
|
496
|
+
if x and x._lower_name == self._lower_name]
|
459
497
|
if len(l_formats_with_same_name) == 1:
|
460
|
-
self._disambiguated_name = self.
|
498
|
+
self._disambiguated_name = self._lower_name
|
461
499
|
else:
|
462
500
|
index_of_this = [i for i, x in enumerate(l_formats_with_same_name) if self is x][0]
|
463
|
-
self._disambiguated_name = f"{self.
|
501
|
+
self._disambiguated_name = f"{self._lower_name}-{index_of_this}"
|
464
502
|
return self._disambiguated_name
|
465
503
|
|
466
504
|
def __str__(self):
|
@@ -486,7 +524,7 @@ class PropertyConversionInfo:
|
|
486
524
|
def __post_init__(self):
|
487
525
|
"""Set the label and note based on input/output status
|
488
526
|
"""
|
489
|
-
self.label =
|
527
|
+
self.label = FormatInfo.D_PROPERTY_ATTRS[self.key]
|
490
528
|
|
491
529
|
if self.input_supported is None and self.output_supported is None:
|
492
530
|
self.note = const.QUAL_NOTE_BOTH_UNKNOWN
|
@@ -584,26 +622,56 @@ class ConversionsTable:
|
|
584
622
|
# Store references to needed data
|
585
623
|
self._l_converts_to = l_converts_to
|
586
624
|
|
587
|
-
# Build the conversion
|
588
|
-
# 1-indexed, so we add 1 to each of the lengths here
|
589
|
-
num_converters = len(parent.converters)
|
625
|
+
# Build the conversion graphs - each format is a vertex, each conversion is an edge
|
590
626
|
num_formats = len(parent.formats)
|
591
627
|
|
592
|
-
|
593
|
-
|
594
|
-
|
595
|
-
|
596
|
-
|
597
|
-
|
598
|
-
|
599
|
-
|
600
|
-
|
601
|
-
|
602
|
-
|
603
|
-
|
628
|
+
l_supported_conversions = [x for x in l_converts_to if
|
629
|
+
self.parent.get_converter_info(x[DB_CONV_ID_KEY]).name in L_SUPPORTED_CONVERTERS]
|
630
|
+
l_registered_conversions = [x for x in l_supported_conversions if
|
631
|
+
self.parent.get_converter_info(x[DB_CONV_ID_KEY]).name in L_REGISTERED_CONVERTERS]
|
632
|
+
|
633
|
+
# We make separate graphs for all known conversions, all supported conversions, and all registered conversions
|
634
|
+
self.graph: ig.Graph
|
635
|
+
self.supported_graph: ig.Graph
|
636
|
+
self.registered_graph: ig.Graph
|
637
|
+
|
638
|
+
for support_type, l_conversions in (("", l_converts_to),
|
639
|
+
("supported_", l_supported_conversions),
|
640
|
+
("registered_", l_registered_conversions)):
|
641
|
+
|
642
|
+
setattr(self, support_type+"graph",
|
643
|
+
ig.Graph(n=num_formats,
|
644
|
+
directed=True,
|
645
|
+
# Each vertex stores the disambiguated name of the format
|
646
|
+
vertex_attrs={DB_NAME_KEY: [x.disambiguated_name if x is not None else None
|
647
|
+
for x in parent.l_format_info]},
|
648
|
+
edges=[(x[DB_IN_ID_KEY], x[DB_OUT_ID_KEY]) for x in l_conversions],
|
649
|
+
# Each edge stores the id and name of the converter used for the conversion
|
650
|
+
edge_attrs={DB_CONV_ID_KEY: [x[DB_CONV_ID_KEY] for x in l_conversions],
|
651
|
+
DB_NAME_KEY: [self.parent.get_converter_info(x[DB_CONV_ID_KEY]).name
|
652
|
+
for x in l_conversions]}))
|
653
|
+
|
654
|
+
def _get_desired_graph(self,
|
655
|
+
only: Literal["all"] | Literal["supported"] | Literal["registered"] = "all") -> ig.Graph:
|
656
|
+
if only == "all":
|
657
|
+
return self.graph
|
658
|
+
elif only == "supported":
|
659
|
+
return self.supported_graph
|
660
|
+
elif only == "registered":
|
661
|
+
return self.registered_graph
|
662
|
+
else:
|
663
|
+
raise ValueError(f"Invalid value \"{only}\" for keyword argument `only`. Allowed values are \"all\" "
|
664
|
+
"(default), \"supported\", and \"registered\".")
|
604
665
|
|
605
|
-
|
666
|
+
def _get_possible_converters(self, in_format_info: FormatInfo, out_format_info: FormatInfo,
|
667
|
+
only: Literal["all"] | Literal["supported"] | Literal["registered"] = "all"):
|
668
|
+
"""Get a list of all converters which can convert from one format to another
|
669
|
+
"""
|
670
|
+
graph = self._get_desired_graph(only)
|
671
|
+
l_edges = graph.es.select(_source=in_format_info.id, _target=out_format_info.id)
|
672
|
+
return [x[DB_NAME_KEY] for x in l_edges]
|
606
673
|
|
674
|
+
@lru_cache(maxsize=None)
|
607
675
|
def get_conversion_quality(self,
|
608
676
|
converter_name: str,
|
609
677
|
in_format: str | int,
|
@@ -633,14 +701,12 @@ class ConversionsTable:
|
|
633
701
|
else:
|
634
702
|
which_format = 0
|
635
703
|
|
636
|
-
# Get
|
637
|
-
|
638
|
-
|
639
|
-
out_info: int = self.parent.get_format_info(out_format, which_format)
|
704
|
+
# Get the full format info for each format
|
705
|
+
in_format_info = self.parent.get_format_info(in_format, which_format)
|
706
|
+
out_format_info: int = self.parent.get_format_info(out_format, which_format)
|
640
707
|
|
641
708
|
# First check if the conversion is possible
|
642
|
-
|
643
|
-
if not success_flag:
|
709
|
+
if converter_name not in self._get_possible_converters(in_format_info, out_format_info):
|
644
710
|
return None
|
645
711
|
|
646
712
|
# The conversion is possible. Now determine how many properties of the output format are not in the input
|
@@ -649,9 +715,9 @@ class ConversionsTable:
|
|
649
715
|
num_new_props = 0
|
650
716
|
any_unknown = False
|
651
717
|
d_prop_conversion_info: dict[str, PropertyConversionInfo] = {}
|
652
|
-
for prop in
|
653
|
-
in_prop: bool | None = getattr(
|
654
|
-
out_prop: bool | None = getattr(
|
718
|
+
for prop in FormatInfo.D_PROPERTY_ATTRS:
|
719
|
+
in_prop: bool | None = getattr(in_format_info, prop)
|
720
|
+
out_prop: bool | None = getattr(out_format_info, prop)
|
655
721
|
|
656
722
|
d_prop_conversion_info[prop] = PropertyConversionInfo(prop, in_prop, out_prop)
|
657
723
|
|
@@ -699,7 +765,9 @@ class ConversionsTable:
|
|
699
765
|
|
700
766
|
def get_possible_conversions(self,
|
701
767
|
in_format: str | int,
|
702
|
-
out_format: str | int
|
768
|
+
out_format: str | int,
|
769
|
+
only: Literal["all"] | Literal["supported"] | Literal["registered"] = "all"
|
770
|
+
) -> list[tuple[ConverterInfo, FormatInfo, FormatInfo]]:
|
703
771
|
"""Get a list of converters which can perform a conversion from one format to another, disambiguating in the
|
704
772
|
case of ambiguous formats and providing IDs for input/output formats for possible conversions
|
705
773
|
|
@@ -712,10 +780,10 @@ class ConversionsTable:
|
|
712
780
|
|
713
781
|
Returns
|
714
782
|
-------
|
715
|
-
list[tuple[
|
716
|
-
A list of tuples, where each tuple's first item is the
|
717
|
-
conversion, the second is the info of the input format for this conversion, and the third is the
|
718
|
-
output format
|
783
|
+
list[tuple[ConverterInfo, FormatInfo, FormatInfo]]
|
784
|
+
A list of tuples, where each tuple's first item is the ConverterInfo of a converter which can perform a
|
785
|
+
matching conversion, the second is the info of the input format for this conversion, and the third is the
|
786
|
+
info of the output format
|
719
787
|
"""
|
720
788
|
l_in_format_infos = self.parent.get_format_info(in_format, which="all")
|
721
789
|
l_out_format_infos = self.parent.get_format_info(out_format, which="all")
|
@@ -726,20 +794,107 @@ class ConversionsTable:
|
|
726
794
|
# Iterate over all possible combinations of input and output formats
|
727
795
|
for in_format_info, out_format_info in product(l_in_format_infos, l_out_format_infos):
|
728
796
|
|
729
|
-
#
|
730
|
-
|
731
|
-
|
732
|
-
# Filter for possible conversions and get the converter name and degree-of-success string
|
733
|
-
# for each possible conversion
|
734
|
-
l_converter_names = [self.parent.get_converter_info(converter_id).name
|
735
|
-
for converter_id, possible_flag
|
736
|
-
in enumerate(l_converter_success) if possible_flag > 0]
|
797
|
+
# Filter for converters which can perform this conversion
|
798
|
+
l_converter_names = self._get_possible_converters(in_format_info, out_format_info, only=only)
|
737
799
|
|
738
800
|
for converter_name in l_converter_names:
|
739
|
-
l_possible_conversions.append((converter_name,
|
801
|
+
l_possible_conversions.append((self.parent.get_converter_info(converter_name),
|
802
|
+
in_format_info, out_format_info))
|
740
803
|
|
741
804
|
return l_possible_conversions
|
742
805
|
|
806
|
+
@lru_cache
|
807
|
+
def _get_shared_attrs(self, source_format, target_format):
|
808
|
+
"""Get a list of attributes that both the source and target format feature
|
809
|
+
"""
|
810
|
+
source_format_info = self.parent.get_format_info(source_format)
|
811
|
+
target_format_info = self.parent.get_format_info(target_format)
|
812
|
+
|
813
|
+
l_shared_attrs: list[str] = []
|
814
|
+
|
815
|
+
for attr in FormatInfo.D_PROPERTY_ATTRS:
|
816
|
+
if getattr(source_format_info, attr) and getattr(target_format_info, attr):
|
817
|
+
l_shared_attrs.append(attr)
|
818
|
+
|
819
|
+
return l_shared_attrs
|
820
|
+
|
821
|
+
def _get_info_loss(self, path):
|
822
|
+
"""Get the number of attributes in both the first and last format which would be lost if a conversion path
|
823
|
+
is traversed
|
824
|
+
"""
|
825
|
+
l_shared_attrs = self._get_shared_attrs(path[0], path[-1])
|
826
|
+
|
827
|
+
if len(l_shared_attrs) == 0:
|
828
|
+
return 0
|
829
|
+
|
830
|
+
l_kept_attrs = copy(l_shared_attrs)
|
831
|
+
for i in range(len(path)-1):
|
832
|
+
target_format_info = self.parent.get_format_info(i+1)
|
833
|
+
|
834
|
+
# Check if each attr still in the shared list is kept here
|
835
|
+
for attr in l_kept_attrs:
|
836
|
+
if not getattr(target_format_info, attr):
|
837
|
+
l_kept_attrs.remove(attr)
|
838
|
+
if len(l_kept_attrs) == 0:
|
839
|
+
break
|
840
|
+
|
841
|
+
num_lost_attrs = len(l_shared_attrs) - len(l_kept_attrs)
|
842
|
+
|
843
|
+
return num_lost_attrs
|
844
|
+
|
845
|
+
def get_conversion_pathway(self,
|
846
|
+
in_format: str | int | FormatInfo,
|
847
|
+
out_format: str | int | FormatInfo,
|
848
|
+
only: Literal["all"] | Literal["supported"] | Literal["registered"] = "all"
|
849
|
+
) -> list[tuple[ConverterInfo, FormatInfo, FormatInfo]] | None:
|
850
|
+
"""Gets a pathway to convert from one format to another
|
851
|
+
"""
|
852
|
+
|
853
|
+
in_format_info = self.parent.get_format_info(in_format)
|
854
|
+
out_format_info = self.parent.get_format_info(out_format)
|
855
|
+
|
856
|
+
# Check if the formats are the same
|
857
|
+
if in_format_info is out_format_info:
|
858
|
+
return None
|
859
|
+
|
860
|
+
# First check if direct conversion is possible
|
861
|
+
l_possible_direct_conversions = self.get_possible_conversions(in_format=in_format, out_format=out_format)
|
862
|
+
if l_possible_direct_conversions:
|
863
|
+
# TODO: When there's some better measure of conversion quality, use it to choose which converter to use
|
864
|
+
return [l_possible_direct_conversions[0]]
|
865
|
+
|
866
|
+
# Query the graph for the shortest paths to perform this conversion
|
867
|
+
graph: ig.Graph = self._get_desired_graph(only)
|
868
|
+
l_paths: list[list[int]] = graph.get_shortest_paths(in_format_info.id, to=out_format_info.id)
|
869
|
+
|
870
|
+
# Check if any paths are possible
|
871
|
+
if not l_paths:
|
872
|
+
return None
|
873
|
+
|
874
|
+
# Check each path to find the first which doesn't lose any unnecessary info, or else the one which loses the
|
875
|
+
# least
|
876
|
+
best_path: list[int] | None = None
|
877
|
+
best_info_loss: int | None = None
|
878
|
+
for path in l_paths:
|
879
|
+
info_loss = self._get_info_loss(path)
|
880
|
+
if best_info_loss is None or info_loss < best_info_loss:
|
881
|
+
best_path = path
|
882
|
+
best_info_loss = info_loss
|
883
|
+
if best_info_loss == 0:
|
884
|
+
break
|
885
|
+
|
886
|
+
# Output the best path in the desired format
|
887
|
+
l_steps: list[tuple[str, FormatInfo, FormatInfo]] = []
|
888
|
+
for i in range(len(best_path)-1):
|
889
|
+
source_id: int = best_path[i]
|
890
|
+
target_id: int = best_path[i+1]
|
891
|
+
converter_name: str = graph.es.select(_source=source_id, _target=target_id)[0][DB_NAME_KEY]
|
892
|
+
l_steps.append((get_converter_info(converter_name),
|
893
|
+
self.parent.get_format_info(source_id),
|
894
|
+
self.parent.get_format_info(target_id)))
|
895
|
+
|
896
|
+
return l_steps
|
897
|
+
|
743
898
|
def get_possible_formats(self, converter_name: str) -> tuple[list[FormatInfo], list[FormatInfo]]:
|
744
899
|
"""Get a list of input and output formats that a given converter supports
|
745
900
|
|
@@ -754,21 +909,10 @@ class ConversionsTable:
|
|
754
909
|
A tuple of a list of the supported input formats and a list of the supported output formats
|
755
910
|
"""
|
756
911
|
conv_id: int = self.parent.get_converter_info(converter_name).id
|
757
|
-
|
758
|
-
|
759
|
-
|
760
|
-
|
761
|
-
# check them for possible output formats
|
762
|
-
(l_possible_in_format_ids,
|
763
|
-
ll_filtered_in_out_format_success) = zip(*[(i, l_out_format_success) for i, l_out_format_success
|
764
|
-
in enumerate(ll_in_out_format_success)
|
765
|
-
if sum(l_out_format_success) > 0])
|
766
|
-
|
767
|
-
# As with input IDs, filter for output IDs where at least one input format has a degree of success index greater
|
768
|
-
# than 0. A bit more complicated for the second index, forcing us to do list comprehension to fetch a list
|
769
|
-
# across the table before summing
|
770
|
-
l_possible_out_format_ids = [j for j, _ in enumerate(ll_filtered_in_out_format_success[0]) if
|
771
|
-
sum([x[j] for x in ll_filtered_in_out_format_success]) > 0]
|
912
|
+
|
913
|
+
l_conversion_edges = self.graph.es.select(**{DB_CONV_ID_KEY: conv_id})
|
914
|
+
l_possible_in_format_ids = list({x.source for x in l_conversion_edges})
|
915
|
+
l_possible_out_format_ids = list({x.target for x in l_conversion_edges})
|
772
916
|
|
773
917
|
# Get the name for each format ID, and return lists of the names
|
774
918
|
return ([self.parent.get_format_info(x) for x in l_possible_in_format_ids],
|
@@ -875,9 +1019,9 @@ class DataConversionDatabase:
|
|
875
1019
|
self._l_format_info: list[FormatInfo | None] = [None] * (max_id+1)
|
876
1020
|
|
877
1021
|
for d_single_format_info in self.formats:
|
878
|
-
|
1022
|
+
lc_name: str = d_single_format_info[DB_FORMAT_EXT_KEY]
|
879
1023
|
|
880
|
-
format_info = FormatInfo(name=
|
1024
|
+
format_info = FormatInfo(name=lc_name,
|
881
1025
|
parent=self,
|
882
1026
|
d_single_format_info=d_single_format_info)
|
883
1027
|
|
@@ -887,24 +1031,17 @@ class DataConversionDatabase:
|
|
887
1031
|
self._conversions_table = ConversionsTable(l_converts_to=self.converts_to,
|
888
1032
|
parent=self)
|
889
1033
|
|
890
|
-
# Use the conversions
|
1034
|
+
# Use the conversions graph to prune any formats which have no valid conversions
|
891
1035
|
|
892
1036
|
# Get a slice of the table which only includes supported converters
|
893
|
-
|
894
|
-
supported_table = [self._conversions_table.table[x] for x in l_supported_converter_ids]
|
1037
|
+
supported_graph = self._conversions_table.supported_graph
|
895
1038
|
|
896
1039
|
for format_id, format_info in enumerate(self._l_format_info):
|
897
1040
|
if not format_info:
|
898
1041
|
continue
|
899
1042
|
|
900
|
-
# Check if the format is supported as the input format for any conversion
|
901
|
-
|
902
|
-
if sum([sum(x) for x in ll_possible_from_conversions]) > 0:
|
903
|
-
continue
|
904
|
-
|
905
|
-
# Check if the format is supported as the output format for any conversion
|
906
|
-
ll_possible_to_conversions = [[y[format_id] for y in x] for x in supported_table]
|
907
|
-
if sum([sum(x) for x in ll_possible_to_conversions]) > 0:
|
1043
|
+
# Check if the format is supported as the input or output format for any conversion
|
1044
|
+
if supported_graph.degree(format_id) > 0:
|
908
1045
|
continue
|
909
1046
|
|
910
1047
|
# If we get here, the format isn't supported for any conversions, so remove it from our list
|
@@ -918,14 +1055,14 @@ class DataConversionDatabase:
|
|
918
1055
|
if not format_info:
|
919
1056
|
continue
|
920
1057
|
|
921
|
-
|
1058
|
+
lc_name = format_info.name.lower()
|
922
1059
|
|
923
1060
|
# Each name may correspond to multiple formats, so we use a list for each entry to list all possible
|
924
1061
|
# formats for each name
|
925
|
-
if
|
926
|
-
self._d_format_info[
|
1062
|
+
if lc_name not in self._d_format_info:
|
1063
|
+
self._d_format_info[lc_name] = []
|
927
1064
|
|
928
|
-
self._d_format_info[
|
1065
|
+
self._d_format_info[lc_name].append(format_info)
|
929
1066
|
|
930
1067
|
def get_converter_info(self, converter_name_or_id: str | int) -> ConverterInfo:
|
931
1068
|
"""Get a converter's info from either its name or ID
|
@@ -987,6 +1124,9 @@ class DataConversionDatabase:
|
|
987
1124
|
if format_name_or_id.startswith("."):
|
988
1125
|
format_name_or_id = format_name_or_id[1:]
|
989
1126
|
|
1127
|
+
# Convert the format name to lower-case to handle it case-insensitively
|
1128
|
+
format_name_or_id = format_name_or_id.lower()
|
1129
|
+
|
990
1130
|
# Check for a hyphen in the format, which indicates a preference from the user as to which, overriding the
|
991
1131
|
# `which` kwarg
|
992
1132
|
if "-" in format_name_or_id:
|
@@ -1050,6 +1190,22 @@ class DataConversionDatabase:
|
|
1050
1190
|
_database: DataConversionDatabase | None = None
|
1051
1191
|
|
1052
1192
|
|
1193
|
+
def get_database_path() -> str:
|
1194
|
+
"""Get the absolute path to the database file
|
1195
|
+
|
1196
|
+
Returns
|
1197
|
+
-------
|
1198
|
+
str
|
1199
|
+
"""
|
1200
|
+
|
1201
|
+
# For an interactive shell, __file__ won't be defined for this module, so use the constants module instead
|
1202
|
+
reference_file = os.path.realpath(const.__file__)
|
1203
|
+
|
1204
|
+
qualified_database_filename = os.path.join(os.path.dirname(reference_file), const.DATABASE_FILENAME)
|
1205
|
+
|
1206
|
+
return qualified_database_filename
|
1207
|
+
|
1208
|
+
|
1053
1209
|
def load_database() -> DataConversionDatabase:
|
1054
1210
|
"""Load and return a new instance of the data conversion database from the JSON database file in this package. This
|
1055
1211
|
function should not be called directly unless you specifically need a new instance of the database object and can't
|
@@ -1061,12 +1217,7 @@ def load_database() -> DataConversionDatabase:
|
|
1061
1217
|
"""
|
1062
1218
|
|
1063
1219
|
# Find and load the database JSON file
|
1064
|
-
|
1065
|
-
# For an interactive shell, __file__ won't be defined for this module, so use the constants module instead
|
1066
|
-
reference_file = os.path.realpath(const.__file__)
|
1067
|
-
|
1068
|
-
qualified_database_filename = os.path.join(os.path.dirname(reference_file), const.DATABASE_FILENAME)
|
1069
|
-
d_data: dict = json.load(open(qualified_database_filename, "r"))
|
1220
|
+
d_data: dict = json.load(open(get_database_path(), "r"))
|
1070
1221
|
|
1071
1222
|
return DataConversionDatabase(d_data)
|
1072
1223
|
|
@@ -1166,7 +1317,7 @@ def get_conversion_quality(converter_name: str,
|
|
1166
1317
|
|
1167
1318
|
|
1168
1319
|
def get_possible_conversions(in_format: str | int,
|
1169
|
-
out_format: str | int) -> list[tuple[
|
1320
|
+
out_format: str | int) -> list[tuple[ConverterInfo, FormatInfo, FormatInfo]]:
|
1170
1321
|
"""Get a list of converters which can perform a conversion from one format to another and disambiguate in the case
|
1171
1322
|
of ambiguous input/output formats
|
1172
1323
|
|
@@ -1179,8 +1330,8 @@ def get_possible_conversions(in_format: str | int,
|
|
1179
1330
|
|
1180
1331
|
Returns
|
1181
1332
|
-------
|
1182
|
-
list[tuple[
|
1183
|
-
A list of tuples, where each tuple's first item is the
|
1333
|
+
list[tuple[ConverterInfo, FormatInfo, FormatInfo]]
|
1334
|
+
A list of tuples, where each tuple's first item is the ConverterInfo of a converter which can perform a matching
|
1184
1335
|
conversion, the second is the info of the input format for this conversion, and the third is the info of the
|
1185
1336
|
output format
|
1186
1337
|
"""
|
@@ -1189,6 +1340,51 @@ def get_possible_conversions(in_format: str | int,
|
|
1189
1340
|
out_format=out_format)
|
1190
1341
|
|
1191
1342
|
|
1343
|
+
def get_conversion_pathway(in_format: str | int | FormatInfo,
|
1344
|
+
out_format: str | int | FormatInfo,
|
1345
|
+
only: Literal["all"] | Literal["supported"] | Literal["registered"] = "all"
|
1346
|
+
) -> list[tuple[ConverterInfo, FormatInfo, FormatInfo]] | None:
|
1347
|
+
"""Get a list of conversions that can be performed to convert one format to another. This is primarily used when a
|
1348
|
+
direct conversion is not supported by any individual converter. Only one possible pathway will be returned,
|
1349
|
+
prioritising pathways which do not lose lose and then re-extrapolate any information stored by some formats and not
|
1350
|
+
others along the path.
|
1351
|
+
|
1352
|
+
Parameters
|
1353
|
+
----------
|
1354
|
+
in_format : str | int
|
1355
|
+
The input file format. For this function, the format must be defined uniquely, either by using a disambiguated
|
1356
|
+
extension, ID, or FormatInfo
|
1357
|
+
out_format : str | int
|
1358
|
+
The output file format. For this function, the format must be defined uniquely, either by using a disambiguated
|
1359
|
+
extension, ID, or FormatInfo
|
1360
|
+
only : Literal["all"] | Literal["supported"] | Literal["registered"], optional
|
1361
|
+
Which converters to limit the pathway search to:
|
1362
|
+
- "all" (default): All known converters
|
1363
|
+
- "supported": Only converters supported by this utility, even if not currently available (e.g. they don't work
|
1364
|
+
on your OS)
|
1365
|
+
- "registered": Only converters supported by this utility and currently available
|
1366
|
+
|
1367
|
+
Returns
|
1368
|
+
-------
|
1369
|
+
list[tuple[ConverterInfo, FormatInfo, FormatInfo]] | None
|
1370
|
+
Will return `None` if no conversion pathway is possible or if the input and output formats are the same.
|
1371
|
+
Otherwise, will return a list of steps in the pathway, each being a tuple of:
|
1372
|
+
|
1373
|
+
converter_info : ConverterInfo
|
1374
|
+
Info on the converter used to perform this step
|
1375
|
+
in_format : FormatInfo
|
1376
|
+
Input format for this step (if the first step, will be the input format to this function, otherwise will be
|
1377
|
+
the output format of the previous step)
|
1378
|
+
out_format : FormatInfo
|
1379
|
+
Output format from this step (if the last step, will be the output format for this function, otherwise will
|
1380
|
+
be the input format of the next step)
|
1381
|
+
"""
|
1382
|
+
|
1383
|
+
return get_database().conversions_table.get_conversion_pathway(in_format=in_format,
|
1384
|
+
out_format=out_format,
|
1385
|
+
only=only)
|
1386
|
+
|
1387
|
+
|
1192
1388
|
def disambiguate_formats(converter_name: str,
|
1193
1389
|
in_format: str | int | FormatInfo,
|
1194
1390
|
out_format: str | int | FormatInfo) -> tuple[FormatInfo, FormatInfo]:
|
@@ -1216,11 +1412,11 @@ def disambiguate_formats(converter_name: str,
|
|
1216
1412
|
"""
|
1217
1413
|
|
1218
1414
|
# Regularize the converter name so we don't worry about case/spacing mismatches
|
1219
|
-
|
1415
|
+
converter_reg_name = regularize_name(converter_name)
|
1220
1416
|
|
1221
1417
|
# Get all possible conversions, and see if we only have one for this converter
|
1222
1418
|
l_possible_conversions = [x for x in get_possible_conversions(in_format, out_format)
|
1223
|
-
if x[0] ==
|
1419
|
+
if x[0].name == converter_reg_name]
|
1224
1420
|
|
1225
1421
|
if len(l_possible_conversions) == 1:
|
1226
1422
|
return l_possible_conversions[0][1], l_possible_conversions[0][2]
|