pixeltable 0.2.7__py3-none-any.whl → 0.2.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__version__.py +2 -2
- pixeltable/catalog/column.py +0 -6
- pixeltable/catalog/table.py +15 -42
- pixeltable/catalog/table_version.py +15 -96
- pixeltable/catalog/table_version_path.py +1 -6
- pixeltable/datatransfer/label_studio.py +108 -182
- pixeltable/datatransfer/remote.py +3 -31
- pixeltable/env.py +4 -8
- pixeltable/io/globals.py +2 -3
- pixeltable/metadata/__init__.py +1 -1
- pixeltable/metadata/schema.py +0 -3
- pixeltable/tool/create_test_db_dump.py +3 -5
- {pixeltable-0.2.7.dist-info → pixeltable-0.2.8.dist-info}/METADATA +2 -2
- {pixeltable-0.2.7.dist-info → pixeltable-0.2.8.dist-info}/RECORD +16 -18
- pixeltable/metadata/converters/convert_15.py +0 -29
- pixeltable/metadata/converters/util.py +0 -63
- {pixeltable-0.2.7.dist-info → pixeltable-0.2.8.dist-info}/LICENSE +0 -0
- {pixeltable-0.2.7.dist-info → pixeltable-0.2.8.dist-info}/WHEEL +0 -0
pixeltable/__version__.py
CHANGED
|
@@ -1,3 +1,3 @@
|
|
|
1
1
|
# These version placeholders will be replaced during build.
|
|
2
|
-
__version__ = "0.2.
|
|
3
|
-
__version_tuple__ = (0, 2,
|
|
2
|
+
__version__ = "0.2.8"
|
|
3
|
+
__version_tuple__ = (0, 2, 8)
|
pixeltable/catalog/column.py
CHANGED
|
@@ -87,11 +87,6 @@ class Column:
|
|
|
87
87
|
self.schema_version_add = schema_version_add
|
|
88
88
|
self.schema_version_drop = schema_version_drop
|
|
89
89
|
|
|
90
|
-
# stored_proxy may be set later if this is a non-stored column.
|
|
91
|
-
# if col1.stored_proxy == col2, then also col1 == col2.proxy_base.
|
|
92
|
-
self.stored_proxy: Optional[Column] = None
|
|
93
|
-
self.proxy_base: Optional[Column] = None
|
|
94
|
-
|
|
95
90
|
self._records_errors = records_errors
|
|
96
91
|
|
|
97
92
|
# column in the stored table for the values of this Column
|
|
@@ -101,7 +96,6 @@ class Column:
|
|
|
101
96
|
# computed cols also have storage columns for the exception string and type
|
|
102
97
|
self.sa_errormsg_col: Optional[sql.schema.Column] = None
|
|
103
98
|
self.sa_errortype_col: Optional[sql.schema.Column] = None
|
|
104
|
-
|
|
105
99
|
from .table_version import TableVersion
|
|
106
100
|
self.tbl: Optional[TableVersion] = None # set by owning TableVersion
|
|
107
101
|
|
pixeltable/catalog/table.py
CHANGED
|
@@ -735,58 +735,31 @@ class Table(SchemaObject):
|
|
|
735
735
|
col_mapping: An optional mapping of columns from this `Table` to columns in the `Remote`.
|
|
736
736
|
"""
|
|
737
737
|
# TODO(aaron-siegel): Refactor `col_mapping`
|
|
738
|
+
if len(self._get_remotes()) > 0:
|
|
739
|
+
raise excs.Error('Linking more than one `Remote` to a table is not currently supported.')
|
|
738
740
|
self._check_is_dropped()
|
|
739
|
-
|
|
740
|
-
|
|
741
|
-
push_cols = remote.get_export_columns()
|
|
742
|
-
pull_cols = remote.get_import_columns()
|
|
741
|
+
export_cols = remote.get_export_columns()
|
|
742
|
+
import_cols = remote.get_import_columns()
|
|
743
743
|
is_col_mapping_user_specified = col_mapping is not None
|
|
744
744
|
if col_mapping is None:
|
|
745
745
|
# Use the identity mapping by default if `col_mapping` is not specified
|
|
746
|
-
col_mapping = {col: col for col in itertools.chain(
|
|
747
|
-
self._validate_remote(
|
|
748
|
-
_logger.info(f'Linking remote {remote} to table `{self.get_name()}`.')
|
|
746
|
+
col_mapping = {col: col for col in itertools.chain(export_cols.keys(), import_cols.keys())}
|
|
747
|
+
self._validate_remote(export_cols, import_cols, col_mapping, is_col_mapping_user_specified)
|
|
749
748
|
self.tbl_version_path.tbl_version.link(remote, col_mapping)
|
|
750
749
|
print(f'Linked remote {remote} to table `{self.get_name()}`.')
|
|
751
750
|
|
|
752
|
-
def unlink(
|
|
753
|
-
self,
|
|
754
|
-
remotes: Optional['pixeltable.datatransfer.Remote' | list['pixeltable.datatransfer.Remote']] = None,
|
|
755
|
-
*,
|
|
756
|
-
delete_remote_data: bool = False,
|
|
757
|
-
ignore_errors: bool = False
|
|
758
|
-
) -> None:
|
|
751
|
+
def unlink(self) -> None:
|
|
759
752
|
"""
|
|
760
753
|
Unlinks this table's `Remote`s.
|
|
761
|
-
|
|
762
|
-
Args:
|
|
763
|
-
remotes: If specified, will unlink only the specified `Remote` or list of `Remote`s. If not specified,
|
|
764
|
-
will unlink all of this table's `Remote`s.
|
|
765
|
-
ignore_errors (bool): If `True`, no exception will be thrown if the specified `Remote` is not linked
|
|
766
|
-
to this table.
|
|
767
|
-
delete_remote_data (bool): If `True`, then the remote data source will also be deleted. WARNING: This
|
|
768
|
-
is a destructive operation that will delete data outside Pixeltable, and cannot be undone.
|
|
769
|
-
|
|
770
754
|
"""
|
|
771
755
|
self._check_is_dropped()
|
|
772
|
-
|
|
773
|
-
|
|
774
|
-
if remotes is None:
|
|
775
|
-
remotes = list(all_remotes.keys())
|
|
776
|
-
elif isinstance(remotes, pixeltable.datatransfer.Remote):
|
|
777
|
-
remotes = [remotes]
|
|
778
|
-
|
|
779
|
-
# Validation
|
|
780
|
-
if not ignore_errors:
|
|
781
|
-
for remote in remotes:
|
|
782
|
-
if remote not in all_remotes:
|
|
783
|
-
raise excs.Error(f'Remote {remote} is not linked to table `{self.get_name()}`')
|
|
756
|
+
remotes = self._get_remotes()
|
|
757
|
+
assert len(remotes) <= 1
|
|
784
758
|
|
|
785
|
-
|
|
786
|
-
|
|
787
|
-
|
|
788
|
-
|
|
789
|
-
remote.delete()
|
|
759
|
+
remote = next(iter(remotes.keys()))
|
|
760
|
+
self.tbl_version_path.tbl_version.unlink(remote)
|
|
761
|
+
# TODO: Provide an option to auto-delete the project
|
|
762
|
+
print(f'Unlinked remote {remote} from table `{self.get_name()}`.')
|
|
790
763
|
|
|
791
764
|
def _validate_remote(
|
|
792
765
|
self,
|
|
@@ -823,7 +796,7 @@ class Table(SchemaObject):
|
|
|
823
796
|
r_col_type = export_cols[r_col]
|
|
824
797
|
if not r_col_type.is_supertype_of(t_col_type):
|
|
825
798
|
raise excs.Error(
|
|
826
|
-
f'Column `{t_col}` cannot be exported to remote column `{r_col}` (incompatible types
|
|
799
|
+
f'Column `{t_col}` cannot be exported to remote column `{r_col}` (incompatible types)'
|
|
827
800
|
)
|
|
828
801
|
if r_col in import_cols:
|
|
829
802
|
# Validate that the remote column can be assigned to the table column
|
|
@@ -834,7 +807,7 @@ class Table(SchemaObject):
|
|
|
834
807
|
r_col_type = import_cols[r_col]
|
|
835
808
|
if not t_col_type.is_supertype_of(r_col_type):
|
|
836
809
|
raise excs.Error(
|
|
837
|
-
f'Column `{t_col}` cannot be imported from remote column `{r_col}` (incompatible types
|
|
810
|
+
f'Column `{t_col}` cannot be imported from remote column `{r_col}` (incompatible types)'
|
|
838
811
|
)
|
|
839
812
|
|
|
840
813
|
def _get_remotes(self) -> dict[pixeltable.datatransfer.Remote, dict[str, str]]:
|
|
@@ -24,7 +24,6 @@ from pixeltable.utils.filecache import FileCache
|
|
|
24
24
|
from pixeltable.utils.media_store import MediaStore
|
|
25
25
|
from .column import Column
|
|
26
26
|
from .globals import UpdateStatus, POS_COLUMN_NAME, is_valid_identifier
|
|
27
|
-
from ..func.globals import resolve_symbol
|
|
28
27
|
|
|
29
28
|
_logger = logging.getLogger('pixeltable')
|
|
30
29
|
|
|
@@ -121,7 +120,7 @@ class TableVersion:
|
|
|
121
120
|
# init schema after we determined whether we're a component view, and before we create the store table
|
|
122
121
|
self.cols: list[Column] = [] # contains complete history of columns, incl dropped ones
|
|
123
122
|
self.cols_by_name: dict[str, Column] = {} # contains only user-facing (named) columns visible in this version
|
|
124
|
-
self.cols_by_id: dict[int, Column] = {} # contains only columns visible in this version
|
|
123
|
+
self.cols_by_id: dict[int, Column] = {} # contains only columns visible in this version
|
|
125
124
|
self.idx_md = tbl_md.index_md # needed for _create_tbl_md()
|
|
126
125
|
self.idxs_by_name: dict[str, TableVersion.IndexInfo] = {} # contains only actively maintained indices
|
|
127
126
|
self._init_schema(tbl_md, schema_version_md)
|
|
@@ -269,16 +268,6 @@ class TableVersion:
|
|
|
269
268
|
col.value_expr = exprs.Expr.from_dict(col_md.value_expr)
|
|
270
269
|
self._record_value_expr(col)
|
|
271
270
|
|
|
272
|
-
# if this is a stored proxy column, resolve the relationships with its proxy base.
|
|
273
|
-
if col_md.proxy_base is not None:
|
|
274
|
-
# proxy_base must have a strictly smaller id, so we must already have encountered it
|
|
275
|
-
# in traversal order; and if the proxy column is active at this version, then the
|
|
276
|
-
# proxy base must necessarily be active as well. This motivates the following assertion.
|
|
277
|
-
assert col_md.proxy_base in self.cols_by_id
|
|
278
|
-
base_col = self.cols_by_id[col_md.proxy_base]
|
|
279
|
-
base_col.stored_proxy = col
|
|
280
|
-
col.proxy_base = base_col
|
|
281
|
-
|
|
282
271
|
def _init_idxs(self, tbl_md: schema.TableMd) -> None:
|
|
283
272
|
self.idx_md = tbl_md.index_md
|
|
284
273
|
self.idxs_by_name = {}
|
|
@@ -544,16 +533,8 @@ class TableVersion:
|
|
|
544
533
|
dependent_user_cols = [c for c in col.dependent_cols if c.name is not None]
|
|
545
534
|
if len(dependent_user_cols) > 0:
|
|
546
535
|
raise excs.Error(
|
|
547
|
-
f'Cannot drop column
|
|
548
|
-
f'{", ".join(c.name for c in dependent_user_cols)}'
|
|
549
|
-
)
|
|
550
|
-
dependent_remotes = [remote for remote, col_mapping in self.remotes.items() if name in col_mapping]
|
|
551
|
-
if len(dependent_remotes) > 0:
|
|
552
|
-
raise excs.Error(
|
|
553
|
-
f'Cannot drop column `{name}` because the following remotes depend on it:\n'
|
|
554
|
-
f'{", ".join(str(r) for r in dependent_remotes)}'
|
|
555
|
-
)
|
|
556
|
-
assert col.stored_proxy is None # since there are no dependent remotes
|
|
536
|
+
f'Cannot drop column {name} because the following columns depend on it:\n',
|
|
537
|
+
f'{", ".join([c.name for c in dependent_user_cols])}')
|
|
557
538
|
|
|
558
539
|
# we're creating a new schema version
|
|
559
540
|
self.version += 1
|
|
@@ -964,88 +945,26 @@ class TableVersion:
|
|
|
964
945
|
|
|
965
946
|
@classmethod
|
|
966
947
|
def _init_remote(cls, remote_md: dict[str, Any]) -> Tuple[pixeltable.datatransfer.Remote, dict[str, str]]:
|
|
967
|
-
|
|
968
|
-
|
|
948
|
+
module = importlib.import_module(remote_md['module'])
|
|
949
|
+
remote_cls = getattr(module, remote_md['class'])
|
|
969
950
|
remote = remote_cls.from_dict(remote_md['remote_md'])
|
|
970
951
|
col_mapping = remote_md['col_mapping']
|
|
971
952
|
return remote, col_mapping
|
|
972
953
|
|
|
973
954
|
def link(self, remote: pixeltable.datatransfer.Remote, col_mapping: dict[str, str]) -> None:
|
|
974
|
-
|
|
975
|
-
|
|
976
|
-
|
|
977
|
-
cols_by_name = self.path.cols_by_name() # Includes base columns
|
|
978
|
-
stored_proxies_needed = []
|
|
979
|
-
for col_name in col_mapping.keys():
|
|
980
|
-
col = cols_by_name[col_name]
|
|
981
|
-
if col.col_type.is_media_type() and not (col.is_stored and col.compute_func) and not col.stored_proxy:
|
|
982
|
-
stored_proxies_needed.append(col)
|
|
955
|
+
timestamp = time.time()
|
|
956
|
+
self.version += 1
|
|
957
|
+
self.remotes[remote] = col_mapping
|
|
983
958
|
with Env.get().engine.begin() as conn:
|
|
984
|
-
self.
|
|
985
|
-
self.remotes[remote] = col_mapping
|
|
986
|
-
preceding_schema_version = None
|
|
987
|
-
if len(stored_proxies_needed) > 0:
|
|
988
|
-
_logger.info(f'Creating stored proxies for columns: {[col.name for col in stored_proxies_needed]}')
|
|
989
|
-
# Create stored proxies for columns that need one. Increment the schema version
|
|
990
|
-
# accordingly.
|
|
991
|
-
preceding_schema_version = self.schema_version
|
|
992
|
-
self.schema_version = self.version
|
|
993
|
-
proxy_cols = [self.create_stored_proxy(col) for col in stored_proxies_needed]
|
|
994
|
-
# Add the columns; this will also update table metadata.
|
|
995
|
-
# TODO Add to base tables
|
|
996
|
-
self._add_columns(proxy_cols, conn)
|
|
997
|
-
# We don't need to retain `UpdateStatus` since the stored proxies are intended to be
|
|
998
|
-
# invisible to the user.
|
|
999
|
-
self._update_md(time.time(), preceding_schema_version, conn)
|
|
1000
|
-
|
|
1001
|
-
def create_stored_proxy(self, col: Column) -> Column:
|
|
1002
|
-
from pixeltable import exprs
|
|
1003
|
-
|
|
1004
|
-
assert col.col_type.is_media_type() and not (col.is_stored and col.compute_func) and not col.stored_proxy
|
|
1005
|
-
proxy_col = Column(
|
|
1006
|
-
name=None,
|
|
1007
|
-
computed_with=exprs.ColumnRef(col).apply(lambda x: x, col_type=col.col_type),
|
|
1008
|
-
stored=True,
|
|
1009
|
-
col_id=self.next_col_id,
|
|
1010
|
-
sa_col_type=col.col_type.to_sa_type(),
|
|
1011
|
-
schema_version_add=self.schema_version
|
|
1012
|
-
)
|
|
1013
|
-
proxy_col.tbl = self
|
|
1014
|
-
self.next_col_id += 1
|
|
1015
|
-
col.stored_proxy = proxy_col
|
|
1016
|
-
proxy_col.proxy_base = col
|
|
1017
|
-
return proxy_col
|
|
959
|
+
self._update_md(timestamp, None, conn)
|
|
1018
960
|
|
|
1019
961
|
def unlink(self, remote: pixeltable.datatransfer.Remote) -> None:
|
|
1020
962
|
assert remote in self.remotes
|
|
1021
963
|
timestamp = time.time()
|
|
1022
|
-
|
|
1023
|
-
|
|
1024
|
-
col_name
|
|
1025
|
-
for other_remote, col_mapping in self.remotes.items() if other_remote != remote
|
|
1026
|
-
for col_name in col_mapping.keys()
|
|
1027
|
-
}
|
|
1028
|
-
cols_by_name = self.path.cols_by_name() # Includes base columns
|
|
1029
|
-
stored_proxy_deletions_needed = [
|
|
1030
|
-
cols_by_name[col_name]
|
|
1031
|
-
for col_name in this_remote_col_names
|
|
1032
|
-
if col_name not in other_remote_col_names and cols_by_name[col_name].stored_proxy
|
|
1033
|
-
]
|
|
964
|
+
self.version += 1
|
|
965
|
+
del self.remotes[remote]
|
|
1034
966
|
with Env.get().engine.begin() as conn:
|
|
1035
|
-
self.
|
|
1036
|
-
del self.remotes[remote]
|
|
1037
|
-
preceding_schema_version = None
|
|
1038
|
-
if len(stored_proxy_deletions_needed) > 0:
|
|
1039
|
-
preceding_schema_version = self.schema_version
|
|
1040
|
-
self.schema_version = self.version
|
|
1041
|
-
proxy_cols = [col.stored_proxy for col in stored_proxy_deletions_needed]
|
|
1042
|
-
for col in stored_proxy_deletions_needed:
|
|
1043
|
-
assert col.stored_proxy is not None and col.stored_proxy.proxy_base == col
|
|
1044
|
-
col.stored_proxy.proxy_base = None
|
|
1045
|
-
col.stored_proxy = None
|
|
1046
|
-
# TODO Drop from base tables
|
|
1047
|
-
self._drop_columns(proxy_cols)
|
|
1048
|
-
self._update_md(timestamp, preceding_schema_version, conn)
|
|
967
|
+
self._update_md(timestamp, None, conn)
|
|
1049
968
|
|
|
1050
969
|
def get_remotes(self) -> dict[pixeltable.datatransfer.Remote, dict[str, str]]:
|
|
1051
970
|
return self.remotes
|
|
@@ -1151,15 +1070,15 @@ class TableVersion:
|
|
|
1151
1070
|
column_md[col.id] = schema.ColumnMd(
|
|
1152
1071
|
id=col.id, col_type=col.col_type.as_dict(), is_pk=col.is_pk,
|
|
1153
1072
|
schema_version_add=col.schema_version_add, schema_version_drop=col.schema_version_drop,
|
|
1154
|
-
value_expr=value_expr_dict, stored=col.stored
|
|
1155
|
-
proxy_base=col.proxy_base.id if col.proxy_base else None)
|
|
1073
|
+
value_expr=value_expr_dict, stored=col.stored)
|
|
1156
1074
|
return column_md
|
|
1157
1075
|
|
|
1158
1076
|
@classmethod
|
|
1159
1077
|
def _create_remotes_md(cls, remotes: dict['pixeltable.datatransfer.Remote', dict[str, str]]) -> list[dict[str, Any]]:
|
|
1160
1078
|
return [
|
|
1161
1079
|
{
|
|
1162
|
-
'
|
|
1080
|
+
'module': type(remote).__module__,
|
|
1081
|
+
'class': type(remote).__qualname__,
|
|
1163
1082
|
'remote_md': remote.to_dict(),
|
|
1164
1083
|
'col_mapping': col_mapping
|
|
1165
1084
|
}
|
|
@@ -106,14 +106,9 @@ class TableVersionPath:
|
|
|
106
106
|
if self.base is not None:
|
|
107
107
|
base_cols = self.base.columns()
|
|
108
108
|
# we only include base columns that don't conflict with one of our column names
|
|
109
|
-
result.extend(c for c in base_cols if c.name not in self.tbl_version.cols_by_name)
|
|
109
|
+
result.extend([c for c in base_cols if c.name not in self.tbl_version.cols_by_name])
|
|
110
110
|
return result
|
|
111
111
|
|
|
112
|
-
def cols_by_name(self) -> dict[str, Column]:
|
|
113
|
-
"""Return a dict of all user columns visible in this tbl version path, including columns from bases"""
|
|
114
|
-
cols = self.columns()
|
|
115
|
-
return {col.name: col for col in cols}
|
|
116
|
-
|
|
117
112
|
def get_column(self, name: str, include_bases: bool = True) -> Optional[Column]:
|
|
118
113
|
"""Return the column with the given name, or None if not found"""
|
|
119
114
|
col = self.tbl_version.cols_by_name.get(name)
|
|
@@ -1,9 +1,8 @@
|
|
|
1
|
-
import json
|
|
2
1
|
import logging
|
|
3
2
|
import os
|
|
4
3
|
from dataclasses import dataclass
|
|
5
4
|
from pathlib import Path
|
|
6
|
-
from typing import Any, Iterator, Optional
|
|
5
|
+
from typing import Any, Iterator, Optional
|
|
7
6
|
from xml.etree import ElementTree
|
|
8
7
|
|
|
9
8
|
import PIL.Image
|
|
@@ -16,7 +15,6 @@ import pixeltable.env as env
|
|
|
16
15
|
import pixeltable.exceptions as excs
|
|
17
16
|
from pixeltable import Table
|
|
18
17
|
from pixeltable.datatransfer.remote import Remote
|
|
19
|
-
from pixeltable.exprs import ColumnRef, DataRow
|
|
20
18
|
from pixeltable.utils import coco
|
|
21
19
|
|
|
22
20
|
_logger = logging.getLogger('pixeltable')
|
|
@@ -43,55 +41,26 @@ class LabelStudioProject(Remote):
|
|
|
43
41
|
"""
|
|
44
42
|
# TODO(aaron-siegel): Add link in docstring to a Label Studio howto
|
|
45
43
|
|
|
46
|
-
def __init__(self, project_id: int
|
|
44
|
+
def __init__(self, project_id: int):
|
|
47
45
|
self.project_id = project_id
|
|
48
|
-
self.media_import_method = media_import_method
|
|
49
46
|
self._project: Optional[label_studio_sdk.project.Project] = None
|
|
50
47
|
|
|
51
48
|
@classmethod
|
|
52
|
-
def create(cls, title: str, label_config: str,
|
|
49
|
+
def create(cls, title: str, label_config: str, **kwargs: Any) -> 'LabelStudioProject':
|
|
53
50
|
"""
|
|
54
51
|
Creates a new Label Studio project, using the Label Studio client configured in Pixeltable.
|
|
55
52
|
|
|
56
53
|
Args:
|
|
57
54
|
title: The title of the project.
|
|
58
55
|
label_config: The Label Studio project configuration, in XML format.
|
|
59
|
-
media_import_method: The method to use when importing media columns to Label Studio:
|
|
60
|
-
- `file`: Media will be sent to Label Studio as a file on the local filesystem. This method can be
|
|
61
|
-
used if Pixeltable and Label Studio are running on the same host.
|
|
62
|
-
- `post`: Media will be sent to Label Studio via HTTP post. This should generally only be used for
|
|
63
|
-
prototyping; due to restrictions in Label Studio, it can only be used with projects that have
|
|
64
|
-
just one data field.
|
|
65
56
|
**kwargs: Additional keyword arguments for the new project; these will be passed to `start_project`
|
|
66
57
|
in the Label Studio SDK.
|
|
67
58
|
"""
|
|
68
|
-
# TODO(aaron-siegel): Add media_import_method = 'url' as an option
|
|
69
59
|
# Check that the config is valid before creating the project
|
|
70
|
-
|
|
71
|
-
if media_import_method == 'post' and len(config.data_keys) > 1:
|
|
72
|
-
raise excs.Error('`media_import_method` cannot be `post` if there is more than one data key')
|
|
73
|
-
|
|
60
|
+
cls._parse_project_config(label_config)
|
|
74
61
|
project = _label_studio_client().start_project(title=title, label_config=label_config, **kwargs)
|
|
75
|
-
|
|
76
|
-
if media_import_method == 'file':
|
|
77
|
-
# We need to set up a local storage connection to receive media files
|
|
78
|
-
os.environ['LABEL_STUDIO_LOCAL_FILES_DOCUMENT_ROOT'] = str(env.Env.get().home)
|
|
79
|
-
try:
|
|
80
|
-
project.connect_local_import_storage(local_store_path=str(env.Env.get().media_dir))
|
|
81
|
-
except HTTPError as exc:
|
|
82
|
-
if exc.errno == 400:
|
|
83
|
-
response: dict = json.loads(exc.response.text)
|
|
84
|
-
if 'validation_errors' in response and 'non_field_errors' in response['validation_errors'] \
|
|
85
|
-
and 'LOCAL_FILES_SERVING_ENABLED' in response['validation_errors']['non_field_errors'][0]:
|
|
86
|
-
raise excs.Error(
|
|
87
|
-
'`media_import_method` is set to `file`, but your Label Studio server is not configured '
|
|
88
|
-
'for local file storage.\nPlease set the `LABEL_STUDIO_LOCAL_FILES_SERVING_ENABLED` '
|
|
89
|
-
'environment variable to `true` in the environment where your Label Studio server is running.'
|
|
90
|
-
) from exc
|
|
91
|
-
raise # Handle any other exception type normally
|
|
92
|
-
|
|
93
62
|
project_id = project.get_params()['id']
|
|
94
|
-
return LabelStudioProject(project_id
|
|
63
|
+
return LabelStudioProject(project_id)
|
|
95
64
|
|
|
96
65
|
@property
|
|
97
66
|
def project(self) -> label_studio_sdk.project.Project:
|
|
@@ -115,14 +84,14 @@ class LabelStudioProject(Remote):
|
|
|
115
84
|
return self.project_params['title']
|
|
116
85
|
|
|
117
86
|
@property
|
|
118
|
-
def
|
|
119
|
-
return self.
|
|
87
|
+
def _project_config(self) -> '_LabelStudioConfig':
|
|
88
|
+
return self._parse_project_config(self.project_params['label_config'])
|
|
120
89
|
|
|
121
90
|
def get_export_columns(self) -> dict[str, pxt.ColumnType]:
|
|
122
91
|
"""
|
|
123
92
|
The data keys and preannotation fields specified in this Label Studio project.
|
|
124
93
|
"""
|
|
125
|
-
return self.
|
|
94
|
+
return self._project_config.export_columns
|
|
126
95
|
|
|
127
96
|
def get_import_columns(self) -> dict[str, pxt.ColumnType]:
|
|
128
97
|
"""
|
|
@@ -138,13 +107,13 @@ class LabelStudioProject(Remote):
|
|
|
138
107
|
_logger.info(f'Syncing Label Studio project "{self.project_title}" with table `{t.get_name()}`'
|
|
139
108
|
f' (export: {export_data}, import: {import_data}).')
|
|
140
109
|
# Collect all existing tasks into a dict with entries `rowid: task`
|
|
141
|
-
tasks = {tuple(task['meta']['rowid']): task for task in self.
|
|
110
|
+
tasks = {tuple(task['meta']['rowid']): task for task in self._fetch_all_tasks()}
|
|
142
111
|
if export_data:
|
|
143
|
-
self.
|
|
112
|
+
self._create_tasks_from_table(t, col_mapping, tasks)
|
|
144
113
|
if import_data:
|
|
145
|
-
self.
|
|
114
|
+
self._update_table_from_tasks(t, col_mapping, tasks)
|
|
146
115
|
|
|
147
|
-
def
|
|
116
|
+
def _fetch_all_tasks(self) -> Iterator[dict]:
|
|
148
117
|
page = 1
|
|
149
118
|
unknown_task_count = 0
|
|
150
119
|
while True:
|
|
@@ -163,10 +132,32 @@ class LabelStudioProject(Remote):
|
|
|
163
132
|
f'Skipped {unknown_task_count} unrecognized task(s) when syncing Label Studio project "{self.project_title}".'
|
|
164
133
|
)
|
|
165
134
|
|
|
166
|
-
def
|
|
135
|
+
def _update_table_from_tasks(self, t: Table, col_mapping: dict[str, str], tasks: dict[tuple, dict]) -> None:
|
|
136
|
+
# `col_mapping` is guaranteed to be a one-to-one dict whose values are a superset
|
|
137
|
+
# of `get_import_columns`
|
|
138
|
+
assert ANNOTATIONS_COLUMN in col_mapping.values()
|
|
139
|
+
annotations_column = next(k for k, v in col_mapping.items() if v == ANNOTATIONS_COLUMN)
|
|
140
|
+
updates = [
|
|
141
|
+
{
|
|
142
|
+
'_rowid': task['meta']['rowid'],
|
|
143
|
+
# Replace [] by None to indicate no annotations. We do want to sync rows with no annotations,
|
|
144
|
+
# in order to properly handle the scenario where existing annotations have been deleted in
|
|
145
|
+
# Label Studio.
|
|
146
|
+
annotations_column: task[ANNOTATIONS_COLUMN] if len(task[ANNOTATIONS_COLUMN]) > 0 else None
|
|
147
|
+
}
|
|
148
|
+
for task in tasks.values()
|
|
149
|
+
]
|
|
150
|
+
if len(updates) > 0:
|
|
151
|
+
_logger.info(
|
|
152
|
+
f'Updating table `{t.get_name()}`, column `{annotations_column}` with {len(updates)} total annotations.'
|
|
153
|
+
)
|
|
154
|
+
t.batch_update(updates)
|
|
155
|
+
annotations_count = sum(len(task[ANNOTATIONS_COLUMN]) for task in tasks.values())
|
|
156
|
+
print(f'Synced {annotations_count} annotation(s) from {len(updates)} existing task(s) in {self}.')
|
|
167
157
|
|
|
158
|
+
def _create_tasks_from_table(self, t: Table, col_mapping: dict[str, str], existing_tasks: dict[tuple, dict]) -> None:
|
|
168
159
|
t_col_types = t.column_types()
|
|
169
|
-
config = self.
|
|
160
|
+
config = self._project_config
|
|
170
161
|
|
|
171
162
|
# Columns in `t` that map to Label Studio data keys
|
|
172
163
|
t_data_cols = [
|
|
@@ -187,23 +178,24 @@ class LabelStudioProject(Remote):
|
|
|
187
178
|
_logger.debug('`t_rl_cols`: %s', t_rl_cols)
|
|
188
179
|
_logger.debug('`rl_info`: %s', rl_info)
|
|
189
180
|
|
|
190
|
-
if
|
|
191
|
-
#
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
# Send media to Label Studio by local file transfer.
|
|
195
|
-
self.__update_tasks_by_files(t, col_mapping, existing_tasks, t_data_cols, t_rl_cols, rl_info)
|
|
181
|
+
if len(t_data_cols) == 1 and t_col_types[t_data_cols[0]].is_media_type():
|
|
182
|
+
# With a single media column, we can post local files to Label Studio using
|
|
183
|
+
# the file transfer API.
|
|
184
|
+
self._create_tasks_by_post(t, col_mapping, existing_tasks, t_rl_cols, rl_info, t_data_cols[0])
|
|
196
185
|
else:
|
|
197
|
-
|
|
186
|
+
# Either a single non-media column or multiple columns. Either way, we can't
|
|
187
|
+
# use the file upload API and need to rely on externally accessible URLs for
|
|
188
|
+
# media columns.
|
|
189
|
+
self._create_tasks_by_urls(t, col_mapping, existing_tasks, t_data_cols, t_col_types, t_rl_cols, rl_info)
|
|
198
190
|
|
|
199
|
-
def
|
|
191
|
+
def _create_tasks_by_post(
|
|
200
192
|
self,
|
|
201
193
|
t: Table,
|
|
202
194
|
col_mapping: dict[str, str],
|
|
203
195
|
existing_tasks: dict[tuple, dict],
|
|
204
|
-
media_col_name: str,
|
|
205
196
|
t_rl_cols: list[str],
|
|
206
|
-
rl_info: list['_RectangleLabel']
|
|
197
|
+
rl_info: list['_RectangleLabel'],
|
|
198
|
+
media_col_name: str
|
|
207
199
|
) -> None:
|
|
208
200
|
is_stored = t[media_col_name].col.is_stored
|
|
209
201
|
# If it's a stored column, we can use `localpath`
|
|
@@ -233,13 +225,13 @@ class LabelStudioProject(Remote):
|
|
|
233
225
|
os.remove(file)
|
|
234
226
|
|
|
235
227
|
# Update the task with `rowid` metadata
|
|
236
|
-
self.project.update_task(task_id, meta={'rowid': row.rowid
|
|
228
|
+
self.project.update_task(task_id, meta={'rowid': row.rowid})
|
|
237
229
|
|
|
238
230
|
# Convert coco annotations to predictions
|
|
239
231
|
coco_annotations = [row.vals[i] for i in rl_col_idxs]
|
|
240
232
|
_logger.debug('`coco_annotations`: %s', coco_annotations)
|
|
241
233
|
predictions = [
|
|
242
|
-
self.
|
|
234
|
+
self._coco_to_predictions(
|
|
243
235
|
coco_annotations[i], col_mapping[t_rl_cols[i]], rl_info[i], task_id=task_id
|
|
244
236
|
)
|
|
245
237
|
for i in range(len(coco_annotations))
|
|
@@ -250,95 +242,67 @@ class LabelStudioProject(Remote):
|
|
|
250
242
|
|
|
251
243
|
print(f'Created {tasks_created} new task(s) in {self}.')
|
|
252
244
|
|
|
253
|
-
self.
|
|
245
|
+
self._delete_stale_tasks(existing_tasks, row_ids_in_pxt, tasks_created)
|
|
254
246
|
|
|
255
|
-
def
|
|
247
|
+
def _create_tasks_by_urls(
|
|
256
248
|
self,
|
|
257
249
|
t: Table,
|
|
258
250
|
col_mapping: dict[str, str],
|
|
259
251
|
existing_tasks: dict[tuple, dict],
|
|
260
252
|
t_data_cols: list[str],
|
|
253
|
+
t_col_types: dict[str, pxt.ColumnType],
|
|
261
254
|
t_rl_cols: list[str],
|
|
262
255
|
rl_info: list['_RectangleLabel']
|
|
263
|
-
)
|
|
256
|
+
):
|
|
257
|
+
# TODO(aaron-siegel): This is just a placeholder (implementation is not complete or tested!)
|
|
258
|
+
selection = [
|
|
259
|
+
t[col_name].fileurl if t_col_types[col_name].is_media_type() else t[col_name]
|
|
260
|
+
for col_name in t_data_cols
|
|
261
|
+
]
|
|
264
262
|
r_data_cols = [col_mapping[col_name] for col_name in t_data_cols]
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
if not t[col_name].col_type.is_media_type():
|
|
268
|
-
# Not a media column; query the data directly
|
|
269
|
-
col_refs[col_name] = t[col_name]
|
|
270
|
-
elif t[col_name].col.stored_proxy:
|
|
271
|
-
# Media column that has a stored proxy; use it. We have to give it a name,
|
|
272
|
-
# since it's an anonymous column
|
|
273
|
-
col_refs[f'{col_name}_proxy'] = ColumnRef(t[col_name].col.stored_proxy).localpath
|
|
274
|
-
else:
|
|
275
|
-
# Media column without a stored proxy; this means it's a stored computed column,
|
|
276
|
-
# and we can just use the localpath
|
|
277
|
-
col_refs[col_name] = t[col_name].localpath
|
|
278
|
-
|
|
279
|
-
df = t.select(*[t[col] for col in t_rl_cols], **col_refs)
|
|
280
|
-
rl_col_idxs: Optional[list[int]] = None # We have to wait until we begin iterating to populate these
|
|
281
|
-
data_col_idxs: Optional[list[int]] = None
|
|
282
|
-
|
|
283
|
-
row_ids_in_pxt: set[tuple] = set()
|
|
263
|
+
rows = t.select(*selection, *[t[col] for col in t_rl_cols])
|
|
264
|
+
new_rows = filter(lambda row: row.rowid not in existing_tasks, rows._exec())
|
|
284
265
|
tasks_created = 0
|
|
285
|
-
|
|
286
|
-
page = []
|
|
287
|
-
|
|
288
|
-
def create_task_info(row: DataRow) -> dict:
|
|
289
|
-
data_vals = [row.vals[idx] for idx in data_col_idxs]
|
|
290
|
-
coco_annotations = [row.vals[idx] for idx in rl_col_idxs]
|
|
291
|
-
# For media columns, we need to transform the paths into Label Studio's bespoke path format
|
|
292
|
-
for i in range(len(t_data_cols)):
|
|
293
|
-
if t[t_data_cols[i]].col_type.is_media_type():
|
|
294
|
-
data_vals[i] = self.__localpath_to_lspath(data_vals[i])
|
|
295
|
-
predictions = [
|
|
296
|
-
self.__coco_to_predictions(coco_annotations[i], col_mapping[t_rl_cols[i]], rl_info[i])
|
|
297
|
-
for i in range(len(coco_annotations))
|
|
298
|
-
]
|
|
299
|
-
return {
|
|
300
|
-
'data': dict(zip(r_data_cols, data_vals)),
|
|
301
|
-
'meta': {'rowid': row.rowid, 'v_min': row.v_min},
|
|
302
|
-
'predictions': predictions
|
|
303
|
-
}
|
|
266
|
+
row_ids_in_pxt: set[tuple] = set()
|
|
304
267
|
|
|
305
|
-
for
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
row_ids_in_pxt.add(row.rowid)
|
|
310
|
-
if row.rowid in existing_tasks:
|
|
311
|
-
# A task for this row already exists; see if it needs an update.
|
|
312
|
-
# Get the v_min record from task metadata. Default to 0 if no v_min record is found
|
|
313
|
-
old_v_min = int(existing_tasks[row.rowid]['meta'].get('v_min', 0))
|
|
314
|
-
print(f'{old_v_min} {row.v_min}')
|
|
315
|
-
if row.v_min > old_v_min:
|
|
316
|
-
_logger.debug(f'Updating task for rowid {row.rowid} ({row.v_min} > {old_v_min}).')
|
|
317
|
-
task_info = create_task_info(row)
|
|
318
|
-
self.project.update_task(existing_tasks[row.rowid]['id'], **task_info)
|
|
319
|
-
tasks_updated += 1
|
|
320
|
-
else:
|
|
321
|
-
# No task exists for this row; we need to create one.
|
|
322
|
-
page.append(create_task_info(row))
|
|
323
|
-
tasks_created += 1
|
|
324
|
-
if len(page) == _PAGE_SIZE:
|
|
325
|
-
self.project.import_tasks(page)
|
|
326
|
-
page.clear()
|
|
268
|
+
for page in more_itertools.batched(new_rows, n=_PAGE_SIZE):
|
|
269
|
+
data_col_idxs = [expr.slot_idx for expr in rows._select_list_exprs[:len(t_data_cols)]]
|
|
270
|
+
rl_col_idxs = [expr.slot_idx for expr in rows._select_list_exprs[len(t_data_cols):]]
|
|
271
|
+
tasks = []
|
|
327
272
|
|
|
328
|
-
|
|
329
|
-
|
|
273
|
+
for row in page:
|
|
274
|
+
row_ids_in_pxt.add(row.rowid)
|
|
275
|
+
data_vals = [row.vals[i] for i in data_col_idxs]
|
|
276
|
+
coco_annotations = [row.vals[i] for i in rl_col_idxs]
|
|
277
|
+
predictions = [
|
|
278
|
+
self._coco_to_predictions(coco_annotations[i], col_mapping[t_rl_cols[i]], rl_info[i])
|
|
279
|
+
for i in range(len(coco_annotations))
|
|
280
|
+
]
|
|
330
281
|
|
|
331
|
-
|
|
282
|
+
# Validate media columns
|
|
283
|
+
# TODO Support this if label studio is running on localhost?
|
|
284
|
+
for i in range(len(data_vals)):
|
|
285
|
+
if t[t_data_cols[i]].col_type.is_media_type() and data_vals[i].startswith("file://"):
|
|
286
|
+
raise excs.Error(
|
|
287
|
+
'Cannot use locally stored media files in a `LabelStudioProject` with more than one '
|
|
288
|
+
'data key. (This is a limitation of Label Studio; see warning here: '
|
|
289
|
+
'https://labelstud.io/guide/tasks.html)'
|
|
290
|
+
)
|
|
332
291
|
|
|
333
|
-
|
|
292
|
+
tasks.append({
|
|
293
|
+
'data': zip(r_data_cols, data_vals),
|
|
294
|
+
'meta': {'rowid': row.rowid},
|
|
295
|
+
'predictions': predictions
|
|
296
|
+
})
|
|
334
297
|
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
298
|
+
self.project.import_tasks(tasks)
|
|
299
|
+
tasks_created += len(tasks)
|
|
300
|
+
|
|
301
|
+
print(f'Created {tasks_created} new task(s) in {self}.')
|
|
302
|
+
|
|
303
|
+
self._delete_stale_tasks(existing_tasks, row_ids_in_pxt, tasks_created)
|
|
340
304
|
|
|
341
|
-
def
|
|
305
|
+
def _delete_stale_tasks(self, existing_tasks: dict[tuple, dict], row_ids_in_pxt: set[tuple], tasks_created: int):
|
|
342
306
|
tasks_to_delete = [
|
|
343
307
|
task['id'] for rowid, task in existing_tasks.items()
|
|
344
308
|
if rowid not in row_ids_in_pxt
|
|
@@ -350,42 +314,19 @@ class LabelStudioProject(Remote):
|
|
|
350
314
|
self.project.delete_tasks(tasks_to_delete)
|
|
351
315
|
print(f'Deleted {len(tasks_to_delete)} tasks(s) in {self} that are no longer present in Pixeltable.')
|
|
352
316
|
|
|
353
|
-
def __update_table_from_tasks(self, t: Table, col_mapping: dict[str, str], tasks: dict[tuple, dict]) -> None:
|
|
354
|
-
# `col_mapping` is guaranteed to be a one-to-one dict whose values are a superset
|
|
355
|
-
# of `get_pull_columns`
|
|
356
|
-
assert ANNOTATIONS_COLUMN in col_mapping.values()
|
|
357
|
-
annotations_column = next(k for k, v in col_mapping.items() if v == ANNOTATIONS_COLUMN)
|
|
358
|
-
updates = [
|
|
359
|
-
{
|
|
360
|
-
'_rowid': task['meta']['rowid'],
|
|
361
|
-
# Replace [] by None to indicate no annotations. We do want to sync rows with no annotations,
|
|
362
|
-
# in order to properly handle the scenario where existing annotations have been deleted in
|
|
363
|
-
# Label Studio.
|
|
364
|
-
annotations_column: task[ANNOTATIONS_COLUMN] if len(task[ANNOTATIONS_COLUMN]) > 0 else None
|
|
365
|
-
}
|
|
366
|
-
for task in tasks.values()
|
|
367
|
-
]
|
|
368
|
-
if len(updates) > 0:
|
|
369
|
-
_logger.info(
|
|
370
|
-
f'Updating table `{t.get_name()}`, column `{annotations_column}` with {len(updates)} total annotations.'
|
|
371
|
-
)
|
|
372
|
-
t.batch_update(updates)
|
|
373
|
-
annotations_count = sum(len(task[ANNOTATIONS_COLUMN]) for task in tasks.values())
|
|
374
|
-
print(f'Synced {annotations_count} annotation(s) from {len(updates)} existing task(s) in {self}.')
|
|
375
|
-
|
|
376
317
|
def to_dict(self) -> dict[str, Any]:
|
|
377
|
-
return {'project_id': self.project_id
|
|
318
|
+
return {'project_id': self.project_id}
|
|
378
319
|
|
|
379
320
|
@classmethod
|
|
380
321
|
def from_dict(cls, md: dict[str, Any]) -> 'LabelStudioProject':
|
|
381
|
-
return LabelStudioProject(md['project_id']
|
|
322
|
+
return LabelStudioProject(md['project_id'])
|
|
382
323
|
|
|
383
324
|
def __repr__(self) -> str:
|
|
384
325
|
name = self.project.get_params()['title']
|
|
385
326
|
return f'LabelStudioProject `{name}`'
|
|
386
327
|
|
|
387
328
|
@classmethod
|
|
388
|
-
def
|
|
329
|
+
def _parse_project_config(cls, xml_config: str) -> '_LabelStudioConfig':
|
|
389
330
|
"""
|
|
390
331
|
Parses a Label Studio XML config, extracting the names and Pixeltable types of
|
|
391
332
|
all input variables.
|
|
@@ -394,27 +335,28 @@ class LabelStudioProject(Remote):
|
|
|
394
335
|
if root.tag.lower() != 'view':
|
|
395
336
|
raise excs.Error('Root of Label Studio config must be a `View`')
|
|
396
337
|
config = _LabelStudioConfig(
|
|
397
|
-
data_keys=dict(cls.
|
|
398
|
-
rectangle_labels=dict(cls.
|
|
338
|
+
data_keys=dict(cls._parse_data_keys_config(root)),
|
|
339
|
+
rectangle_labels=dict(cls._parse_rectangle_labels_config(root))
|
|
399
340
|
)
|
|
400
341
|
config.validate()
|
|
401
342
|
return config
|
|
402
343
|
|
|
403
344
|
@classmethod
|
|
404
|
-
def
|
|
345
|
+
def _parse_data_keys_config(cls, root: ElementTree.Element) -> Iterator[tuple[str, '_DataKey']]:
|
|
405
346
|
for element in root:
|
|
406
347
|
if 'value' in element.attrib and element.attrib['value'][0] == '$':
|
|
407
348
|
remote_col_name = element.attrib['value'][1:]
|
|
408
|
-
|
|
349
|
+
if 'name' not in element.attrib:
|
|
350
|
+
raise excs.Error(f'Data key is missing `name` attribute: `{remote_col_name}`')
|
|
409
351
|
element_type = _LS_TAG_MAP.get(element.tag.lower())
|
|
410
352
|
if element_type is None:
|
|
411
353
|
raise excs.Error(
|
|
412
354
|
f'Unsupported Label Studio data type: `{element.tag}` (in data key `{remote_col_name}`)'
|
|
413
355
|
)
|
|
414
|
-
yield remote_col_name, _DataKey(
|
|
356
|
+
yield remote_col_name, _DataKey(element.attrib['name'], element_type)
|
|
415
357
|
|
|
416
358
|
@classmethod
|
|
417
|
-
def
|
|
359
|
+
def _parse_rectangle_labels_config(cls, root: ElementTree.Element) -> Iterator[tuple[str, '_RectangleLabel']]:
|
|
418
360
|
for element in root:
|
|
419
361
|
if element.tag.lower() == 'rectanglelabels':
|
|
420
362
|
name = element.attrib['name']
|
|
@@ -429,7 +371,7 @@ class LabelStudioProject(Remote):
|
|
|
429
371
|
yield name, _RectangleLabel(to_name=to_name, labels=labels)
|
|
430
372
|
|
|
431
373
|
@classmethod
|
|
432
|
-
def
|
|
374
|
+
def _coco_to_predictions(
|
|
433
375
|
cls,
|
|
434
376
|
coco_annotations: dict[str, Any],
|
|
435
377
|
from_name: str,
|
|
@@ -466,25 +408,10 @@ class LabelStudioProject(Remote):
|
|
|
466
408
|
else:
|
|
467
409
|
return {'result': result}
|
|
468
410
|
|
|
469
|
-
def delete(self) -> None:
|
|
470
|
-
"""
|
|
471
|
-
Deletes this Label Studio project. This will remove all data and annotations
|
|
472
|
-
associated with this project in Label Studio.
|
|
473
|
-
"""
|
|
474
|
-
title = self.project_title
|
|
475
|
-
_label_studio_client().delete_project(self.project_id)
|
|
476
|
-
print(f'Deleted Label Studio project: {title}')
|
|
477
|
-
|
|
478
|
-
def __eq__(self, other) -> bool:
|
|
479
|
-
return isinstance(other, LabelStudioProject) and self.project_id == other.project_id
|
|
480
|
-
|
|
481
|
-
def __hash__(self) -> int:
|
|
482
|
-
return hash(self.project_id)
|
|
483
|
-
|
|
484
411
|
|
|
485
412
|
@dataclass(frozen=True)
|
|
486
413
|
class _DataKey:
|
|
487
|
-
name:
|
|
414
|
+
name: str # The 'name' attribute of the data key; may differ from the field name
|
|
488
415
|
column_type: pxt.ColumnType
|
|
489
416
|
|
|
490
417
|
|
|
@@ -500,7 +427,7 @@ class _LabelStudioConfig:
|
|
|
500
427
|
rectangle_labels: dict[str, _RectangleLabel]
|
|
501
428
|
|
|
502
429
|
def validate(self) -> None:
|
|
503
|
-
data_key_names = set(key.name for key in self.data_keys.values()
|
|
430
|
+
data_key_names = set(key.name for key in self.data_keys.values())
|
|
504
431
|
for name, rl in self.rectangle_labels.items():
|
|
505
432
|
if rl.to_name not in data_key_names:
|
|
506
433
|
raise excs.Error(
|
|
@@ -510,7 +437,7 @@ class _LabelStudioConfig:
|
|
|
510
437
|
|
|
511
438
|
@property
|
|
512
439
|
def export_columns(self) -> dict[str, pxt.ColumnType]:
|
|
513
|
-
data_key_cols = {
|
|
440
|
+
data_key_cols = {key_name: key_info.column_type for key_name, key_info in self.data_keys.items()}
|
|
514
441
|
rl_cols = {name: pxt.JsonType() for name in self.rectangle_labels.keys()}
|
|
515
442
|
return {**data_key_cols, **rl_cols}
|
|
516
443
|
|
|
@@ -518,7 +445,6 @@ class _LabelStudioConfig:
|
|
|
518
445
|
ANNOTATIONS_COLUMN = 'annotations'
|
|
519
446
|
_PAGE_SIZE = 100 # This is the default used in the LS SDK
|
|
520
447
|
_LS_TAG_MAP = {
|
|
521
|
-
'header': pxt.StringType(),
|
|
522
448
|
'text': pxt.StringType(),
|
|
523
449
|
'image': pxt.ImageType(),
|
|
524
450
|
'video': pxt.VideoType(),
|
|
@@ -45,12 +45,6 @@ class Remote(abc.ABC):
|
|
|
45
45
|
import_data: If `True`, data from this table will be imported from the remote during synchronization.
|
|
46
46
|
"""
|
|
47
47
|
|
|
48
|
-
@abc.abstractmethod
|
|
49
|
-
def delete(self) -> None:
|
|
50
|
-
"""
|
|
51
|
-
Deletes this `Remote`.
|
|
52
|
-
"""
|
|
53
|
-
|
|
54
48
|
@abc.abstractmethod
|
|
55
49
|
def to_dict(self) -> dict[str, Any]: ...
|
|
56
50
|
|
|
@@ -62,11 +56,9 @@ class Remote(abc.ABC):
|
|
|
62
56
|
# A remote that cannot be synced, used mainly for testing.
|
|
63
57
|
class MockRemote(Remote):
|
|
64
58
|
|
|
65
|
-
def __init__(self,
|
|
66
|
-
self.name = name
|
|
59
|
+
def __init__(self, export_cols: dict[str, ts.ColumnType], import_cols: dict[str, ts.ColumnType]):
|
|
67
60
|
self.export_cols = export_cols
|
|
68
61
|
self.import_cols = import_cols
|
|
69
|
-
self.__is_deleted = False
|
|
70
62
|
|
|
71
63
|
def get_export_columns(self) -> dict[str, ts.ColumnType]:
|
|
72
64
|
return self.export_cols
|
|
@@ -77,17 +69,9 @@ class MockRemote(Remote):
|
|
|
77
69
|
def sync(self, t: Table, col_mapping: dict[str, str], export_data: bool, import_data: bool) -> NotImplemented:
|
|
78
70
|
raise NotImplementedError()
|
|
79
71
|
|
|
80
|
-
def delete(self) -> None:
|
|
81
|
-
self.__is_deleted = True
|
|
82
|
-
|
|
83
|
-
@property
|
|
84
|
-
def is_deleted(self) -> bool:
|
|
85
|
-
return self.__is_deleted
|
|
86
|
-
|
|
87
72
|
def to_dict(self) -> dict[str, Any]:
|
|
88
73
|
return {
|
|
89
74
|
# TODO Change in next schema version
|
|
90
|
-
'name': self.name,
|
|
91
75
|
'push_cols': {k: v.as_dict() for k, v in self.export_cols.items()},
|
|
92
76
|
'pull_cols': {k: v.as_dict() for k, v in self.import_cols.items()}
|
|
93
77
|
}
|
|
@@ -95,19 +79,7 @@ class MockRemote(Remote):
|
|
|
95
79
|
@classmethod
|
|
96
80
|
def from_dict(cls, md: dict[str, Any]) -> Remote:
|
|
97
81
|
return cls(
|
|
98
|
-
name=md['name'],
|
|
99
82
|
# TODO Change in next schema version
|
|
100
|
-
|
|
101
|
-
|
|
83
|
+
{k: ts.ColumnType.from_dict(v) for k, v in md['push_cols'].items()},
|
|
84
|
+
{k: ts.ColumnType.from_dict(v) for k, v in md['pull_cols'].items()}
|
|
102
85
|
)
|
|
103
|
-
|
|
104
|
-
def __eq__(self, other: Any) -> bool:
|
|
105
|
-
if not isinstance(other, MockRemote):
|
|
106
|
-
return False
|
|
107
|
-
return self.name == other.name
|
|
108
|
-
|
|
109
|
-
def __hash__(self) -> int:
|
|
110
|
-
return hash(self.name)
|
|
111
|
-
|
|
112
|
-
def __repr__(self) -> str:
|
|
113
|
-
return f'MockRemote `{self.name}`'
|
pixeltable/env.py
CHANGED
|
@@ -37,16 +37,12 @@ class Env:
|
|
|
37
37
|
@classmethod
|
|
38
38
|
def get(cls) -> Env:
|
|
39
39
|
if cls._instance is None:
|
|
40
|
-
|
|
40
|
+
env = Env()
|
|
41
|
+
env._set_up()
|
|
42
|
+
env._upgrade_metadata()
|
|
43
|
+
cls._instance = env
|
|
41
44
|
return cls._instance
|
|
42
45
|
|
|
43
|
-
@classmethod
|
|
44
|
-
def _init_env(cls, reinit_db: bool = False) -> None:
|
|
45
|
-
env = Env()
|
|
46
|
-
env._set_up(reinit_db=reinit_db)
|
|
47
|
-
env._upgrade_metadata()
|
|
48
|
-
cls._instance = env
|
|
49
|
-
|
|
50
46
|
def __init__(self):
|
|
51
47
|
self._home: Optional[Path] = None
|
|
52
48
|
self._media_dir: Optional[Path] = None # computed media files
|
pixeltable/io/globals.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Any, Optional
|
|
1
|
+
from typing import Any, Optional
|
|
2
2
|
|
|
3
3
|
import pixeltable as pxt
|
|
4
4
|
from pixeltable import Table
|
|
@@ -9,7 +9,6 @@ def create_label_studio_project(
|
|
|
9
9
|
label_config: str,
|
|
10
10
|
col_mapping: Optional[dict[str, str]] = None,
|
|
11
11
|
title: Optional[str] = None,
|
|
12
|
-
media_import_method: Literal['post', 'file'] = 'file',
|
|
13
12
|
sync_immediately: bool = True,
|
|
14
13
|
**kwargs: Any
|
|
15
14
|
) -> None:
|
|
@@ -43,7 +42,7 @@ def create_label_studio_project(
|
|
|
43
42
|
"""
|
|
44
43
|
from pixeltable.datatransfer.label_studio import LabelStudioProject, ANNOTATIONS_COLUMN
|
|
45
44
|
|
|
46
|
-
ls_project = LabelStudioProject.create(title or t.get_name(), label_config,
|
|
45
|
+
ls_project = LabelStudioProject.create(title or t.get_name(), label_config, **kwargs)
|
|
47
46
|
|
|
48
47
|
# Create a column to hold the annotations, if one does not yet exist.
|
|
49
48
|
if col_mapping is not None and ANNOTATIONS_COLUMN in col_mapping.values():
|
pixeltable/metadata/__init__.py
CHANGED
|
@@ -10,7 +10,7 @@ import sqlalchemy.orm as orm
|
|
|
10
10
|
from .schema import SystemInfo, SystemInfoMd
|
|
11
11
|
|
|
12
12
|
# current version of the metadata; this is incremented whenever the metadata schema changes
|
|
13
|
-
VERSION =
|
|
13
|
+
VERSION = 15
|
|
14
14
|
|
|
15
15
|
|
|
16
16
|
def create_system_info(engine: sql.engine.Engine) -> None:
|
pixeltable/metadata/schema.py
CHANGED
|
@@ -30,8 +30,6 @@ class Dumper:
|
|
|
30
30
|
os.environ['PIXELTABLE_DB'] = db_name
|
|
31
31
|
os.environ['PIXELTABLE_PGDATA'] = str(shared_home / 'pgdata')
|
|
32
32
|
|
|
33
|
-
Env._init_env(reinit_db=True)
|
|
34
|
-
|
|
35
33
|
Env.get().configure_logging(level=logging.DEBUG, to_stdout=True)
|
|
36
34
|
|
|
37
35
|
def dump_db(self) -> None:
|
|
@@ -166,15 +164,15 @@ class Dumper:
|
|
|
166
164
|
|
|
167
165
|
# Add remotes
|
|
168
166
|
from pixeltable.datatransfer.remote import MockRemote
|
|
169
|
-
v.
|
|
170
|
-
MockRemote(
|
|
167
|
+
v.link_remote(
|
|
168
|
+
MockRemote({'int_field': pxt.IntType()}, {'str_field': pxt.StringType()}),
|
|
171
169
|
col_mapping={'test_udf': 'int_field', 'c1': 'str_field'}
|
|
172
170
|
)
|
|
173
171
|
# We're just trying to test metadata here, so reach "under the covers" and link a fake
|
|
174
172
|
# Label Studio project without validation (so we don't need a real Label Studio server)
|
|
175
173
|
from pixeltable.datatransfer.label_studio import LabelStudioProject
|
|
176
174
|
v.tbl_version_path.tbl_version.link(
|
|
177
|
-
LabelStudioProject(4171780
|
|
175
|
+
LabelStudioProject(4171780),
|
|
178
176
|
col_mapping={'str_format': 'str_format'}
|
|
179
177
|
)
|
|
180
178
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: pixeltable
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.8
|
|
4
4
|
Summary: Pixeltable: The Multimodal AI Data Plane
|
|
5
5
|
Author: Marcel Kornacker
|
|
6
6
|
Author-email: marcelk@gmail.com
|
|
@@ -21,7 +21,7 @@ Requires-Dist: more-itertools (>=10.2,<11.0)
|
|
|
21
21
|
Requires-Dist: numpy (>=1.25)
|
|
22
22
|
Requires-Dist: opencv-python-headless (>=4.7.0.68,<5.0.0.0)
|
|
23
23
|
Requires-Dist: pandas (>=2.0,<3.0)
|
|
24
|
-
Requires-Dist: pgserver (==0.1.
|
|
24
|
+
Requires-Dist: pgserver (==0.1.4)
|
|
25
25
|
Requires-Dist: pgvector (>=0.2.1,<0.3.0)
|
|
26
26
|
Requires-Dist: pillow (>=9.3.0)
|
|
27
27
|
Requires-Dist: psutil (>=5.9.5,<6.0.0)
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
pixeltable/__init__.py,sha256=DzVevwic1g8Tp4QYrcCIzPFFwPZ66KGWeYprlRC9Uwc,1142
|
|
2
|
-
pixeltable/__version__.py,sha256=
|
|
2
|
+
pixeltable/__version__.py,sha256=AyN0bhxu_oExUztSHc2d8uAemad8-aDrT7QgYAM_JCs,112
|
|
3
3
|
pixeltable/catalog/__init__.py,sha256=E41bxaPeQIcgRYzTWc2vkDOboQhRymrJf4IcHQO7o_8,453
|
|
4
4
|
pixeltable/catalog/catalog.py,sha256=0TYWB1R6YBp9qCkWF7kCcX2Yw70UuburKKIemv5L1Js,7908
|
|
5
|
-
pixeltable/catalog/column.py,sha256=
|
|
5
|
+
pixeltable/catalog/column.py,sha256=J8irt6PfT1ofC3wVKi-hDGjNUZ1Ceq2qzbmZyEw-ddA,8335
|
|
6
6
|
pixeltable/catalog/dir.py,sha256=pG1nMpG123POo6WMSHhAmnwXOQ26uUJfUcbzL-Jb4ws,919
|
|
7
7
|
pixeltable/catalog/globals.py,sha256=yLEGNbsSnLzjWNHVJacfjA9hbw13Q6QXLOSCRmdTlq0,943
|
|
8
8
|
pixeltable/catalog/insertable_table.py,sha256=vMa_XUWT3DG3ZlxkScDZ9-mYNw31G8XB4ODUlxXt7NU,8927
|
|
@@ -10,15 +10,15 @@ pixeltable/catalog/named_function.py,sha256=a96gnKtx-nz5_MzDIiD4t4Hxqdjkg9ZtijRQ
|
|
|
10
10
|
pixeltable/catalog/path.py,sha256=QgccEi_QOfaKt8YsR2zLtd_z7z7QQkU_1kprJFi2SPQ,1677
|
|
11
11
|
pixeltable/catalog/path_dict.py,sha256=xfvxg1Ze5jZCARUGASF2DRbQPh7pRVTYhuJ_u82gYUo,5941
|
|
12
12
|
pixeltable/catalog/schema_object.py,sha256=-UxmPLbuEBqJiJi_GGRbFdr7arAFxTqs4bt6TFmSt3M,1059
|
|
13
|
-
pixeltable/catalog/table.py,sha256=
|
|
14
|
-
pixeltable/catalog/table_version.py,sha256=
|
|
15
|
-
pixeltable/catalog/table_version_path.py,sha256=
|
|
13
|
+
pixeltable/catalog/table.py,sha256=50UOSt7zltvthygiiXbgP-XMOiYUpcIaEXiM1uJFcaA,38220
|
|
14
|
+
pixeltable/catalog/table_version.py,sha256=BeP-4Io6euT6hOQXVJwpZNjZ6ZNehqOH6S98zvQsU9E,52751
|
|
15
|
+
pixeltable/catalog/table_version_path.py,sha256=2Ofzd0n36flcNm86KWwIWDBAfgnV5Z-FxAHdMSPgMLc,5482
|
|
16
16
|
pixeltable/catalog/view.py,sha256=BIL3s4DV3tWbOcqtqnhn46B2UvLaBhppfJUlNEt5nec,9734
|
|
17
17
|
pixeltable/dataframe.py,sha256=lzSzR7mi9C4BO39fNXYo64k3KxILyG_Z7eET6DXTgKY,31922
|
|
18
18
|
pixeltable/datatransfer/__init__.py,sha256=cRWdQ_LUNkJgmionI1RrYD71A1CSI92P4o8_XXOnFmU,27
|
|
19
|
-
pixeltable/datatransfer/label_studio.py,sha256=
|
|
20
|
-
pixeltable/datatransfer/remote.py,sha256=
|
|
21
|
-
pixeltable/env.py,sha256=
|
|
19
|
+
pixeltable/datatransfer/label_studio.py,sha256=3DLsqfIUNVG9xVRVUU4NayLuC-xUTIM1Sz92kGvrTUc,19643
|
|
20
|
+
pixeltable/datatransfer/remote.py,sha256=t-VeDIq62mX67xBoHLi8voa4V5XqMkr-8UZ-8DhIgk0,3100
|
|
21
|
+
pixeltable/env.py,sha256=OEZv6NS8Z41rdCx73Md5j78ImnKaZf3YhdAexFJR7gw,21381
|
|
22
22
|
pixeltable/exceptions.py,sha256=MSP9zeL0AmXT93XqjdvgGN4rzno1_KRrGriq6hpemnw,376
|
|
23
23
|
pixeltable/exec/__init__.py,sha256=RK7SKvrQ7Ky3G_LXDP4Bf7lHmMM_uYZl8dJaZYs0FjY,454
|
|
24
24
|
pixeltable/exec/aggregation_node.py,sha256=cf6rVAgrGh_uaMrCIgXJIwQTmbcboJlnrH_MmPIQSd0,3321
|
|
@@ -87,7 +87,7 @@ pixeltable/index/base.py,sha256=YAQ5Dz1mfI0dfu9rxWHWroE8TjB90yKfPtXAzoADq38,1568
|
|
|
87
87
|
pixeltable/index/btree.py,sha256=NE4GYhcJWYJhdKyeHI0sQBlFvUaIgGOF9KLyCZOfFjE,1822
|
|
88
88
|
pixeltable/index/embedding_index.py,sha256=AYphEggN-0B4GNrm4nMmi46CEtrQw5tguyk67BK2sWo,7627
|
|
89
89
|
pixeltable/io/__init__.py,sha256=Io5ZLrcvRPeqRQwnU2iODvWMqkYroWErkbp7dLxE4Kk,197
|
|
90
|
-
pixeltable/io/globals.py,sha256=
|
|
90
|
+
pixeltable/io/globals.py,sha256=ArnuWVhdKHT9ds84PZBl0-fszmEu-W62P4Su21c9oN4,2642
|
|
91
91
|
pixeltable/io/hf_datasets.py,sha256=h5M1NkXOvEU8kaeT3AON1A18Vmhnc1lVo5a3TZ5AAic,8004
|
|
92
92
|
pixeltable/io/pandas.py,sha256=cDHUDW2CGiBbsEJB9zE5vkXopTKxDdI-CZxNcp0OnIk,6478
|
|
93
93
|
pixeltable/io/parquet.py,sha256=Z1b92gsPeCBf4P9_jgWWHAEHtu51nhuC8nSJgoKiywQ,8150
|
|
@@ -95,17 +95,15 @@ pixeltable/iterators/__init__.py,sha256=kokLguXBY_nxBTqUiXZVvCxTv-vGsX4cK8tgIbsW
|
|
|
95
95
|
pixeltable/iterators/base.py,sha256=cnEh1tNN2JAxRzrLTg3dhun3N1oNQ8vifCm6ts3_UiE,1687
|
|
96
96
|
pixeltable/iterators/document.py,sha256=netSCJatG8NcgbHZ69BvQVICdAorQlYi8OlcpqwLQD4,19436
|
|
97
97
|
pixeltable/iterators/video.py,sha256=xtxODL1AfZwTfHVzWekhTCLA8gwTJIvJFdxC0KecD9Q,3836
|
|
98
|
-
pixeltable/metadata/__init__.py,sha256=
|
|
98
|
+
pixeltable/metadata/__init__.py,sha256=beGPpClpNaN7seM_AeKli5R33TSIkb7_mIBWoExT_5M,2228
|
|
99
99
|
pixeltable/metadata/converters/convert_10.py,sha256=0mSGCn7vqtef63riPi9msUaaUvsSQIj-NFj9QFDYPdA,733
|
|
100
100
|
pixeltable/metadata/converters/convert_12.py,sha256=g9rHTcKlDQZbM3_k4eBv0FBdWmQXHWCnMwx1_l6KpMI,107
|
|
101
101
|
pixeltable/metadata/converters/convert_13.py,sha256=FEgOH5PKf05xVoCaioDDDHOSuoWPyBzodojmsSMMZ5U,1366
|
|
102
102
|
pixeltable/metadata/converters/convert_14.py,sha256=UAWHEipZ-NrQtI5zZN1u9C5AD24ZpYXsdpC3Te0t-qE,402
|
|
103
|
-
pixeltable/metadata/
|
|
104
|
-
pixeltable/metadata/converters/util.py,sha256=97efM1Hx1qKMIOaEI4bjmX93POie7XvBEF_HJhFhzE0,2400
|
|
105
|
-
pixeltable/metadata/schema.py,sha256=IdvV_UIyQqQL25cf36Rz6dMhHRWXvsApKt_uFSHW5kk,8497
|
|
103
|
+
pixeltable/metadata/schema.py,sha256=ZYBbt_jESRrX7BAx35xKY1CpIgRuJnd2LJYo4MrPnn0,8399
|
|
106
104
|
pixeltable/plan.py,sha256=A_kPsX3bjLyfYbeQ6eCgDbrb_Oldk4w8HdFRqRSDpPY,32653
|
|
107
105
|
pixeltable/store.py,sha256=foQe9y8rRbl35f3naL7rbYVrD8LO00cmD53vWP2A9XI,18850
|
|
108
|
-
pixeltable/tool/create_test_db_dump.py,sha256=
|
|
106
|
+
pixeltable/tool/create_test_db_dump.py,sha256=17MdBqsSNj7j61w0Re9pS4aDIEdML_4hnE-uZJcEW4I,7537
|
|
109
107
|
pixeltable/tool/create_test_video.py,sha256=OLfccymYReIpzE8osZn4rQvLXxxiPC_l0vc06U74hVM,2899
|
|
110
108
|
pixeltable/type_system.py,sha256=nljZs4O_dsVFMs4aB3z7Szc9LgFtl5eOuloxJkk7tPE,29503
|
|
111
109
|
pixeltable/utils/__init__.py,sha256=UYlrf6TIWJT0g-Hac0b34-dEk478B5Qx8dGco34YlIk,439
|
|
@@ -120,7 +118,7 @@ pixeltable/utils/pytorch.py,sha256=BR4tgfUWw-2rwWTOgzXj5qdMBpe1Arpp5SK4ax6jjpk,3
|
|
|
120
118
|
pixeltable/utils/s3.py,sha256=rkanuhk9DWvSfmbOLQW1j1Iov4sl2KhxGGKN-AJ8LSE,432
|
|
121
119
|
pixeltable/utils/sql.py,sha256=5n5_OmXAGtqFdL6z5XvgnU-vlx6Ba6f1WJrO1ZwUle8,765
|
|
122
120
|
pixeltable/utils/transactional_directory.py,sha256=UGzCrGtLR3hEEf8sYGuWBzLVFAEQml3vdIavigWeTBM,1349
|
|
123
|
-
pixeltable-0.2.
|
|
124
|
-
pixeltable-0.2.
|
|
125
|
-
pixeltable-0.2.
|
|
126
|
-
pixeltable-0.2.
|
|
121
|
+
pixeltable-0.2.8.dist-info/LICENSE,sha256=0UNMmwuqWPC0xDY1NWMm4uNJ2_MyA1pnTNRgQTvuBiQ,746
|
|
122
|
+
pixeltable-0.2.8.dist-info/METADATA,sha256=VKFyaYflvsFwUgiC1Y2iukgPHGI4W9sQpnLmUCRdMiQ,9806
|
|
123
|
+
pixeltable-0.2.8.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
|
124
|
+
pixeltable-0.2.8.dist-info/RECORD,,
|
|
@@ -1,29 +0,0 @@
|
|
|
1
|
-
import uuid
|
|
2
|
-
|
|
3
|
-
import sqlalchemy as sql
|
|
4
|
-
|
|
5
|
-
from pixeltable.metadata import register_converter
|
|
6
|
-
from pixeltable.metadata.converters.util import convert_table_md
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
def convert_15(engine: sql.engine.Engine) -> None:
|
|
10
|
-
convert_table_md(engine, column_md_updater=update_column_md, remote_md_updater=update_remote_md)
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
def update_column_md(column_md: dict) -> None:
|
|
14
|
-
column_md['proxy_base'] = None
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
def update_remote_md(remote_md: dict) -> None:
|
|
18
|
-
remote_md['class'] = f'{remote_md["module"]}.{remote_md["class"]}'
|
|
19
|
-
del remote_md['module']
|
|
20
|
-
if remote_md['class'] == 'pixeltable.datatransfer.remote.MockRemote':
|
|
21
|
-
remote_md['remote_md']['name'] = f'remote_{uuid.uuid4()}'
|
|
22
|
-
elif remote_md['class'] == 'pixeltable.datatransfer.label_studio.LabelStudioProject':
|
|
23
|
-
# 'post' is the media_import_method for legacy LabelStudioProject remotes
|
|
24
|
-
remote_md['remote_md']['media_import_method'] = 'post'
|
|
25
|
-
else:
|
|
26
|
-
assert False, remote_md['class']
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
register_converter(15, convert_15)
|
|
@@ -1,63 +0,0 @@
|
|
|
1
|
-
import copy
|
|
2
|
-
import logging
|
|
3
|
-
from typing import Any, Callable, Optional
|
|
4
|
-
|
|
5
|
-
import sqlalchemy as sql
|
|
6
|
-
|
|
7
|
-
from pixeltable.metadata.schema import Table
|
|
8
|
-
|
|
9
|
-
__logger = logging.getLogger('pixeltable')
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
def convert_table_md(
|
|
13
|
-
engine: sql.engine.Engine,
|
|
14
|
-
column_md_updater: Optional[Callable[[dict], None]] = None,
|
|
15
|
-
remote_md_updater: Optional[Callable[[dict], None]] = None,
|
|
16
|
-
substitution_fn: Optional[Callable[[Any, Any], Optional[tuple[Any, Any]]]] = None
|
|
17
|
-
) -> None:
|
|
18
|
-
with engine.begin() as conn:
|
|
19
|
-
for row in conn.execute(sql.select(Table)):
|
|
20
|
-
id = row[0]
|
|
21
|
-
table_md = row[2]
|
|
22
|
-
assert isinstance(table_md, dict)
|
|
23
|
-
updated_table_md = copy.deepcopy(table_md)
|
|
24
|
-
if column_md_updater is not None:
|
|
25
|
-
__update_column_md(updated_table_md, column_md_updater)
|
|
26
|
-
if remote_md_updater is not None:
|
|
27
|
-
__update_remote_md(updated_table_md, remote_md_updater)
|
|
28
|
-
if substitution_fn is not None:
|
|
29
|
-
updated_table_md = __substitute_md_rec(updated_table_md, substitution_fn)
|
|
30
|
-
if updated_table_md != table_md:
|
|
31
|
-
__logger.info(f'Updating schema for table: {id}')
|
|
32
|
-
conn.execute(sql.update(Table).where(Table.id == id).values(md=updated_table_md))
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
def __update_column_md(table_md: dict, column_md_updater: Callable[[dict], None]) -> None:
|
|
36
|
-
columns_md = table_md['column_md']
|
|
37
|
-
assert isinstance(columns_md, dict)
|
|
38
|
-
for column_md in columns_md.values():
|
|
39
|
-
column_md_updater(column_md)
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
def __update_remote_md(table_md: dict, remote_md_updater: Callable[[dict], None]) -> None:
|
|
43
|
-
remotes_md = table_md['remotes']
|
|
44
|
-
assert isinstance(remotes_md, list)
|
|
45
|
-
for remote_md in remotes_md:
|
|
46
|
-
remote_md_updater(remote_md)
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
def __substitute_md_rec(md: Any, substitution_fn: Callable[[Any, Any], Optional[tuple[Any, Any]]]) -> Any:
|
|
50
|
-
if isinstance(md, dict):
|
|
51
|
-
updated_md = {}
|
|
52
|
-
for k, v in md.items():
|
|
53
|
-
substitute = substitution_fn(k, v)
|
|
54
|
-
if substitute is not None:
|
|
55
|
-
updated_k, updated_v = substitute
|
|
56
|
-
updated_md[updated_k] = updated_v
|
|
57
|
-
else:
|
|
58
|
-
updated_md[k] = __substitute_md_rec(v, substitution_fn)
|
|
59
|
-
return updated_md
|
|
60
|
-
elif isinstance(md, list):
|
|
61
|
-
return [__substitute_md_rec(v, substitution_fn) for v in md]
|
|
62
|
-
else:
|
|
63
|
-
return md
|
|
File without changes
|
|
File without changes
|