pixeltable 0.2.7__py3-none-any.whl → 0.2.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

pixeltable/__version__.py CHANGED
@@ -1,3 +1,3 @@
1
1
  # These version placeholders will be replaced during build.
2
- __version__ = "0.2.7"
3
- __version_tuple__ = (0, 2, 7)
2
+ __version__ = "0.2.8"
3
+ __version_tuple__ = (0, 2, 8)
@@ -87,11 +87,6 @@ class Column:
87
87
  self.schema_version_add = schema_version_add
88
88
  self.schema_version_drop = schema_version_drop
89
89
 
90
- # stored_proxy may be set later if this is a non-stored column.
91
- # if col1.stored_proxy == col2, then also col1 == col2.proxy_base.
92
- self.stored_proxy: Optional[Column] = None
93
- self.proxy_base: Optional[Column] = None
94
-
95
90
  self._records_errors = records_errors
96
91
 
97
92
  # column in the stored table for the values of this Column
@@ -101,7 +96,6 @@ class Column:
101
96
  # computed cols also have storage columns for the exception string and type
102
97
  self.sa_errormsg_col: Optional[sql.schema.Column] = None
103
98
  self.sa_errortype_col: Optional[sql.schema.Column] = None
104
-
105
99
  from .table_version import TableVersion
106
100
  self.tbl: Optional[TableVersion] = None # set by owning TableVersion
107
101
 
@@ -735,58 +735,31 @@ class Table(SchemaObject):
735
735
  col_mapping: An optional mapping of columns from this `Table` to columns in the `Remote`.
736
736
  """
737
737
  # TODO(aaron-siegel): Refactor `col_mapping`
738
+ if len(self._get_remotes()) > 0:
739
+ raise excs.Error('Linking more than one `Remote` to a table is not currently supported.')
738
740
  self._check_is_dropped()
739
- if remote in self._get_remotes():
740
- raise excs.Error(f'That remote is already linked to table `{self.get_name()}`: {remote}')
741
- push_cols = remote.get_export_columns()
742
- pull_cols = remote.get_import_columns()
741
+ export_cols = remote.get_export_columns()
742
+ import_cols = remote.get_import_columns()
743
743
  is_col_mapping_user_specified = col_mapping is not None
744
744
  if col_mapping is None:
745
745
  # Use the identity mapping by default if `col_mapping` is not specified
746
- col_mapping = {col: col for col in itertools.chain(push_cols.keys(), pull_cols.keys())}
747
- self._validate_remote(push_cols, pull_cols, col_mapping, is_col_mapping_user_specified)
748
- _logger.info(f'Linking remote {remote} to table `{self.get_name()}`.')
746
+ col_mapping = {col: col for col in itertools.chain(export_cols.keys(), import_cols.keys())}
747
+ self._validate_remote(export_cols, import_cols, col_mapping, is_col_mapping_user_specified)
749
748
  self.tbl_version_path.tbl_version.link(remote, col_mapping)
750
749
  print(f'Linked remote {remote} to table `{self.get_name()}`.')
751
750
 
752
- def unlink(
753
- self,
754
- remotes: Optional['pixeltable.datatransfer.Remote' | list['pixeltable.datatransfer.Remote']] = None,
755
- *,
756
- delete_remote_data: bool = False,
757
- ignore_errors: bool = False
758
- ) -> None:
751
+ def unlink(self) -> None:
759
752
  """
760
753
  Unlinks this table's `Remote`s.
761
-
762
- Args:
763
- remotes: If specified, will unlink only the specified `Remote` or list of `Remote`s. If not specified,
764
- will unlink all of this table's `Remote`s.
765
- ignore_errors (bool): If `True`, no exception will be thrown if the specified `Remote` is not linked
766
- to this table.
767
- delete_remote_data (bool): If `True`, then the remote data source will also be deleted. WARNING: This
768
- is a destructive operation that will delete data outside Pixeltable, and cannot be undone.
769
-
770
754
  """
771
755
  self._check_is_dropped()
772
- all_remotes = self._get_remotes()
773
-
774
- if remotes is None:
775
- remotes = list(all_remotes.keys())
776
- elif isinstance(remotes, pixeltable.datatransfer.Remote):
777
- remotes = [remotes]
778
-
779
- # Validation
780
- if not ignore_errors:
781
- for remote in remotes:
782
- if remote not in all_remotes:
783
- raise excs.Error(f'Remote {remote} is not linked to table `{self.get_name()}`')
756
+ remotes = self._get_remotes()
757
+ assert len(remotes) <= 1
784
758
 
785
- for remote in remotes:
786
- self.tbl_version_path.tbl_version.unlink(remote)
787
- print(f'Unlinked remote {remote} from table `{self.get_name()}`.')
788
- if delete_remote_data:
789
- remote.delete()
759
+ remote = next(iter(remotes.keys()))
760
+ self.tbl_version_path.tbl_version.unlink(remote)
761
+ # TODO: Provide an option to auto-delete the project
762
+ print(f'Unlinked remote {remote} from table `{self.get_name()}`.')
790
763
 
791
764
  def _validate_remote(
792
765
  self,
@@ -823,7 +796,7 @@ class Table(SchemaObject):
823
796
  r_col_type = export_cols[r_col]
824
797
  if not r_col_type.is_supertype_of(t_col_type):
825
798
  raise excs.Error(
826
- f'Column `{t_col}` cannot be exported to remote column `{r_col}` (incompatible types; expecting `{r_col_type}`)'
799
+ f'Column `{t_col}` cannot be exported to remote column `{r_col}` (incompatible types)'
827
800
  )
828
801
  if r_col in import_cols:
829
802
  # Validate that the remote column can be assigned to the table column
@@ -834,7 +807,7 @@ class Table(SchemaObject):
834
807
  r_col_type = import_cols[r_col]
835
808
  if not t_col_type.is_supertype_of(r_col_type):
836
809
  raise excs.Error(
837
- f'Column `{t_col}` cannot be imported from remote column `{r_col}` (incompatible types; expecting `{r_col_type}`)'
810
+ f'Column `{t_col}` cannot be imported from remote column `{r_col}` (incompatible types)'
838
811
  )
839
812
 
840
813
  def _get_remotes(self) -> dict[pixeltable.datatransfer.Remote, dict[str, str]]:
@@ -24,7 +24,6 @@ from pixeltable.utils.filecache import FileCache
24
24
  from pixeltable.utils.media_store import MediaStore
25
25
  from .column import Column
26
26
  from .globals import UpdateStatus, POS_COLUMN_NAME, is_valid_identifier
27
- from ..func.globals import resolve_symbol
28
27
 
29
28
  _logger = logging.getLogger('pixeltable')
30
29
 
@@ -121,7 +120,7 @@ class TableVersion:
121
120
  # init schema after we determined whether we're a component view, and before we create the store table
122
121
  self.cols: list[Column] = [] # contains complete history of columns, incl dropped ones
123
122
  self.cols_by_name: dict[str, Column] = {} # contains only user-facing (named) columns visible in this version
124
- self.cols_by_id: dict[int, Column] = {} # contains only columns visible in this version, both system and user
123
+ self.cols_by_id: dict[int, Column] = {} # contains only columns visible in this version
125
124
  self.idx_md = tbl_md.index_md # needed for _create_tbl_md()
126
125
  self.idxs_by_name: dict[str, TableVersion.IndexInfo] = {} # contains only actively maintained indices
127
126
  self._init_schema(tbl_md, schema_version_md)
@@ -269,16 +268,6 @@ class TableVersion:
269
268
  col.value_expr = exprs.Expr.from_dict(col_md.value_expr)
270
269
  self._record_value_expr(col)
271
270
 
272
- # if this is a stored proxy column, resolve the relationships with its proxy base.
273
- if col_md.proxy_base is not None:
274
- # proxy_base must have a strictly smaller id, so we must already have encountered it
275
- # in traversal order; and if the proxy column is active at this version, then the
276
- # proxy base must necessarily be active as well. This motivates the following assertion.
277
- assert col_md.proxy_base in self.cols_by_id
278
- base_col = self.cols_by_id[col_md.proxy_base]
279
- base_col.stored_proxy = col
280
- col.proxy_base = base_col
281
-
282
271
  def _init_idxs(self, tbl_md: schema.TableMd) -> None:
283
272
  self.idx_md = tbl_md.index_md
284
273
  self.idxs_by_name = {}
@@ -544,16 +533,8 @@ class TableVersion:
544
533
  dependent_user_cols = [c for c in col.dependent_cols if c.name is not None]
545
534
  if len(dependent_user_cols) > 0:
546
535
  raise excs.Error(
547
- f'Cannot drop column `{name}` because the following columns depend on it:\n'
548
- f'{", ".join(c.name for c in dependent_user_cols)}'
549
- )
550
- dependent_remotes = [remote for remote, col_mapping in self.remotes.items() if name in col_mapping]
551
- if len(dependent_remotes) > 0:
552
- raise excs.Error(
553
- f'Cannot drop column `{name}` because the following remotes depend on it:\n'
554
- f'{", ".join(str(r) for r in dependent_remotes)}'
555
- )
556
- assert col.stored_proxy is None # since there are no dependent remotes
536
+ f'Cannot drop column {name} because the following columns depend on it:\n',
537
+ f'{", ".join([c.name for c in dependent_user_cols])}')
557
538
 
558
539
  # we're creating a new schema version
559
540
  self.version += 1
@@ -964,88 +945,26 @@ class TableVersion:
964
945
 
965
946
  @classmethod
966
947
  def _init_remote(cls, remote_md: dict[str, Any]) -> Tuple[pixeltable.datatransfer.Remote, dict[str, str]]:
967
- remote_cls = resolve_symbol(remote_md['class'])
968
- assert isinstance(remote_cls, type) and issubclass(remote_cls, pixeltable.datatransfer.Remote)
948
+ module = importlib.import_module(remote_md['module'])
949
+ remote_cls = getattr(module, remote_md['class'])
969
950
  remote = remote_cls.from_dict(remote_md['remote_md'])
970
951
  col_mapping = remote_md['col_mapping']
971
952
  return remote, col_mapping
972
953
 
973
954
  def link(self, remote: pixeltable.datatransfer.Remote, col_mapping: dict[str, str]) -> None:
974
- # All of the media columns being linked need to either be stored, computed columns or have stored proxies.
975
- # This ensures that the media in those columns resides in the media cache, where it can be served.
976
- # First determine which columns (if any) need stored proxies, but don't have one yet.
977
- cols_by_name = self.path.cols_by_name() # Includes base columns
978
- stored_proxies_needed = []
979
- for col_name in col_mapping.keys():
980
- col = cols_by_name[col_name]
981
- if col.col_type.is_media_type() and not (col.is_stored and col.compute_func) and not col.stored_proxy:
982
- stored_proxies_needed.append(col)
955
+ timestamp = time.time()
956
+ self.version += 1
957
+ self.remotes[remote] = col_mapping
983
958
  with Env.get().engine.begin() as conn:
984
- self.version += 1
985
- self.remotes[remote] = col_mapping
986
- preceding_schema_version = None
987
- if len(stored_proxies_needed) > 0:
988
- _logger.info(f'Creating stored proxies for columns: {[col.name for col in stored_proxies_needed]}')
989
- # Create stored proxies for columns that need one. Increment the schema version
990
- # accordingly.
991
- preceding_schema_version = self.schema_version
992
- self.schema_version = self.version
993
- proxy_cols = [self.create_stored_proxy(col) for col in stored_proxies_needed]
994
- # Add the columns; this will also update table metadata.
995
- # TODO Add to base tables
996
- self._add_columns(proxy_cols, conn)
997
- # We don't need to retain `UpdateStatus` since the stored proxies are intended to be
998
- # invisible to the user.
999
- self._update_md(time.time(), preceding_schema_version, conn)
1000
-
1001
- def create_stored_proxy(self, col: Column) -> Column:
1002
- from pixeltable import exprs
1003
-
1004
- assert col.col_type.is_media_type() and not (col.is_stored and col.compute_func) and not col.stored_proxy
1005
- proxy_col = Column(
1006
- name=None,
1007
- computed_with=exprs.ColumnRef(col).apply(lambda x: x, col_type=col.col_type),
1008
- stored=True,
1009
- col_id=self.next_col_id,
1010
- sa_col_type=col.col_type.to_sa_type(),
1011
- schema_version_add=self.schema_version
1012
- )
1013
- proxy_col.tbl = self
1014
- self.next_col_id += 1
1015
- col.stored_proxy = proxy_col
1016
- proxy_col.proxy_base = col
1017
- return proxy_col
959
+ self._update_md(timestamp, None, conn)
1018
960
 
1019
961
  def unlink(self, remote: pixeltable.datatransfer.Remote) -> None:
1020
962
  assert remote in self.remotes
1021
963
  timestamp = time.time()
1022
- this_remote_col_names = list(self.remotes[remote].keys())
1023
- other_remote_col_names = {
1024
- col_name
1025
- for other_remote, col_mapping in self.remotes.items() if other_remote != remote
1026
- for col_name in col_mapping.keys()
1027
- }
1028
- cols_by_name = self.path.cols_by_name() # Includes base columns
1029
- stored_proxy_deletions_needed = [
1030
- cols_by_name[col_name]
1031
- for col_name in this_remote_col_names
1032
- if col_name not in other_remote_col_names and cols_by_name[col_name].stored_proxy
1033
- ]
964
+ self.version += 1
965
+ del self.remotes[remote]
1034
966
  with Env.get().engine.begin() as conn:
1035
- self.version += 1
1036
- del self.remotes[remote]
1037
- preceding_schema_version = None
1038
- if len(stored_proxy_deletions_needed) > 0:
1039
- preceding_schema_version = self.schema_version
1040
- self.schema_version = self.version
1041
- proxy_cols = [col.stored_proxy for col in stored_proxy_deletions_needed]
1042
- for col in stored_proxy_deletions_needed:
1043
- assert col.stored_proxy is not None and col.stored_proxy.proxy_base == col
1044
- col.stored_proxy.proxy_base = None
1045
- col.stored_proxy = None
1046
- # TODO Drop from base tables
1047
- self._drop_columns(proxy_cols)
1048
- self._update_md(timestamp, preceding_schema_version, conn)
967
+ self._update_md(timestamp, None, conn)
1049
968
 
1050
969
  def get_remotes(self) -> dict[pixeltable.datatransfer.Remote, dict[str, str]]:
1051
970
  return self.remotes
@@ -1151,15 +1070,15 @@ class TableVersion:
1151
1070
  column_md[col.id] = schema.ColumnMd(
1152
1071
  id=col.id, col_type=col.col_type.as_dict(), is_pk=col.is_pk,
1153
1072
  schema_version_add=col.schema_version_add, schema_version_drop=col.schema_version_drop,
1154
- value_expr=value_expr_dict, stored=col.stored,
1155
- proxy_base=col.proxy_base.id if col.proxy_base else None)
1073
+ value_expr=value_expr_dict, stored=col.stored)
1156
1074
  return column_md
1157
1075
 
1158
1076
  @classmethod
1159
1077
  def _create_remotes_md(cls, remotes: dict['pixeltable.datatransfer.Remote', dict[str, str]]) -> list[dict[str, Any]]:
1160
1078
  return [
1161
1079
  {
1162
- 'class': f'{type(remote).__module__}.{type(remote).__qualname__}',
1080
+ 'module': type(remote).__module__,
1081
+ 'class': type(remote).__qualname__,
1163
1082
  'remote_md': remote.to_dict(),
1164
1083
  'col_mapping': col_mapping
1165
1084
  }
@@ -106,14 +106,9 @@ class TableVersionPath:
106
106
  if self.base is not None:
107
107
  base_cols = self.base.columns()
108
108
  # we only include base columns that don't conflict with one of our column names
109
- result.extend(c for c in base_cols if c.name not in self.tbl_version.cols_by_name)
109
+ result.extend([c for c in base_cols if c.name not in self.tbl_version.cols_by_name])
110
110
  return result
111
111
 
112
- def cols_by_name(self) -> dict[str, Column]:
113
- """Return a dict of all user columns visible in this tbl version path, including columns from bases"""
114
- cols = self.columns()
115
- return {col.name: col for col in cols}
116
-
117
112
  def get_column(self, name: str, include_bases: bool = True) -> Optional[Column]:
118
113
  """Return the column with the given name, or None if not found"""
119
114
  col = self.tbl_version.cols_by_name.get(name)
@@ -1,9 +1,8 @@
1
- import json
2
1
  import logging
3
2
  import os
4
3
  from dataclasses import dataclass
5
4
  from pathlib import Path
6
- from typing import Any, Iterator, Optional, Literal
5
+ from typing import Any, Iterator, Optional
7
6
  from xml.etree import ElementTree
8
7
 
9
8
  import PIL.Image
@@ -16,7 +15,6 @@ import pixeltable.env as env
16
15
  import pixeltable.exceptions as excs
17
16
  from pixeltable import Table
18
17
  from pixeltable.datatransfer.remote import Remote
19
- from pixeltable.exprs import ColumnRef, DataRow
20
18
  from pixeltable.utils import coco
21
19
 
22
20
  _logger = logging.getLogger('pixeltable')
@@ -43,55 +41,26 @@ class LabelStudioProject(Remote):
43
41
  """
44
42
  # TODO(aaron-siegel): Add link in docstring to a Label Studio howto
45
43
 
46
- def __init__(self, project_id: int, media_import_method: Literal['post', 'file']):
44
+ def __init__(self, project_id: int):
47
45
  self.project_id = project_id
48
- self.media_import_method = media_import_method
49
46
  self._project: Optional[label_studio_sdk.project.Project] = None
50
47
 
51
48
  @classmethod
52
- def create(cls, title: str, label_config: str, media_import_method: Literal['post', 'file'] = 'file', **kwargs: Any) -> 'LabelStudioProject':
49
+ def create(cls, title: str, label_config: str, **kwargs: Any) -> 'LabelStudioProject':
53
50
  """
54
51
  Creates a new Label Studio project, using the Label Studio client configured in Pixeltable.
55
52
 
56
53
  Args:
57
54
  title: The title of the project.
58
55
  label_config: The Label Studio project configuration, in XML format.
59
- media_import_method: The method to use when importing media columns to Label Studio:
60
- - `file`: Media will be sent to Label Studio as a file on the local filesystem. This method can be
61
- used if Pixeltable and Label Studio are running on the same host.
62
- - `post`: Media will be sent to Label Studio via HTTP post. This should generally only be used for
63
- prototyping; due to restrictions in Label Studio, it can only be used with projects that have
64
- just one data field.
65
56
  **kwargs: Additional keyword arguments for the new project; these will be passed to `start_project`
66
57
  in the Label Studio SDK.
67
58
  """
68
- # TODO(aaron-siegel): Add media_import_method = 'url' as an option
69
59
  # Check that the config is valid before creating the project
70
- config = cls.__parse_project_config(label_config)
71
- if media_import_method == 'post' and len(config.data_keys) > 1:
72
- raise excs.Error('`media_import_method` cannot be `post` if there is more than one data key')
73
-
60
+ cls._parse_project_config(label_config)
74
61
  project = _label_studio_client().start_project(title=title, label_config=label_config, **kwargs)
75
-
76
- if media_import_method == 'file':
77
- # We need to set up a local storage connection to receive media files
78
- os.environ['LABEL_STUDIO_LOCAL_FILES_DOCUMENT_ROOT'] = str(env.Env.get().home)
79
- try:
80
- project.connect_local_import_storage(local_store_path=str(env.Env.get().media_dir))
81
- except HTTPError as exc:
82
- if exc.errno == 400:
83
- response: dict = json.loads(exc.response.text)
84
- if 'validation_errors' in response and 'non_field_errors' in response['validation_errors'] \
85
- and 'LOCAL_FILES_SERVING_ENABLED' in response['validation_errors']['non_field_errors'][0]:
86
- raise excs.Error(
87
- '`media_import_method` is set to `file`, but your Label Studio server is not configured '
88
- 'for local file storage.\nPlease set the `LABEL_STUDIO_LOCAL_FILES_SERVING_ENABLED` '
89
- 'environment variable to `true` in the environment where your Label Studio server is running.'
90
- ) from exc
91
- raise # Handle any other exception type normally
92
-
93
62
  project_id = project.get_params()['id']
94
- return LabelStudioProject(project_id, media_import_method)
63
+ return LabelStudioProject(project_id)
95
64
 
96
65
  @property
97
66
  def project(self) -> label_studio_sdk.project.Project:
@@ -115,14 +84,14 @@ class LabelStudioProject(Remote):
115
84
  return self.project_params['title']
116
85
 
117
86
  @property
118
- def __project_config(self) -> '_LabelStudioConfig':
119
- return self.__parse_project_config(self.project_params['label_config'])
87
+ def _project_config(self) -> '_LabelStudioConfig':
88
+ return self._parse_project_config(self.project_params['label_config'])
120
89
 
121
90
  def get_export_columns(self) -> dict[str, pxt.ColumnType]:
122
91
  """
123
92
  The data keys and preannotation fields specified in this Label Studio project.
124
93
  """
125
- return self.__project_config.export_columns
94
+ return self._project_config.export_columns
126
95
 
127
96
  def get_import_columns(self) -> dict[str, pxt.ColumnType]:
128
97
  """
@@ -138,13 +107,13 @@ class LabelStudioProject(Remote):
138
107
  _logger.info(f'Syncing Label Studio project "{self.project_title}" with table `{t.get_name()}`'
139
108
  f' (export: {export_data}, import: {import_data}).')
140
109
  # Collect all existing tasks into a dict with entries `rowid: task`
141
- tasks = {tuple(task['meta']['rowid']): task for task in self.__fetch_all_tasks()}
110
+ tasks = {tuple(task['meta']['rowid']): task for task in self._fetch_all_tasks()}
142
111
  if export_data:
143
- self.__update_tasks(t, col_mapping, tasks)
112
+ self._create_tasks_from_table(t, col_mapping, tasks)
144
113
  if import_data:
145
- self.__update_table_from_tasks(t, col_mapping, tasks)
114
+ self._update_table_from_tasks(t, col_mapping, tasks)
146
115
 
147
- def __fetch_all_tasks(self) -> Iterator[dict]:
116
+ def _fetch_all_tasks(self) -> Iterator[dict]:
148
117
  page = 1
149
118
  unknown_task_count = 0
150
119
  while True:
@@ -163,10 +132,32 @@ class LabelStudioProject(Remote):
163
132
  f'Skipped {unknown_task_count} unrecognized task(s) when syncing Label Studio project "{self.project_title}".'
164
133
  )
165
134
 
166
- def __update_tasks(self, t: Table, col_mapping: dict[str, str], existing_tasks: dict[tuple, dict]) -> None:
135
+ def _update_table_from_tasks(self, t: Table, col_mapping: dict[str, str], tasks: dict[tuple, dict]) -> None:
136
+ # `col_mapping` is guaranteed to be a one-to-one dict whose values are a superset
137
+ # of `get_import_columns`
138
+ assert ANNOTATIONS_COLUMN in col_mapping.values()
139
+ annotations_column = next(k for k, v in col_mapping.items() if v == ANNOTATIONS_COLUMN)
140
+ updates = [
141
+ {
142
+ '_rowid': task['meta']['rowid'],
143
+ # Replace [] by None to indicate no annotations. We do want to sync rows with no annotations,
144
+ # in order to properly handle the scenario where existing annotations have been deleted in
145
+ # Label Studio.
146
+ annotations_column: task[ANNOTATIONS_COLUMN] if len(task[ANNOTATIONS_COLUMN]) > 0 else None
147
+ }
148
+ for task in tasks.values()
149
+ ]
150
+ if len(updates) > 0:
151
+ _logger.info(
152
+ f'Updating table `{t.get_name()}`, column `{annotations_column}` with {len(updates)} total annotations.'
153
+ )
154
+ t.batch_update(updates)
155
+ annotations_count = sum(len(task[ANNOTATIONS_COLUMN]) for task in tasks.values())
156
+ print(f'Synced {annotations_count} annotation(s) from {len(updates)} existing task(s) in {self}.')
167
157
 
158
+ def _create_tasks_from_table(self, t: Table, col_mapping: dict[str, str], existing_tasks: dict[tuple, dict]) -> None:
168
159
  t_col_types = t.column_types()
169
- config = self.__project_config
160
+ config = self._project_config
170
161
 
171
162
  # Columns in `t` that map to Label Studio data keys
172
163
  t_data_cols = [
@@ -187,23 +178,24 @@ class LabelStudioProject(Remote):
187
178
  _logger.debug('`t_rl_cols`: %s', t_rl_cols)
188
179
  _logger.debug('`rl_info`: %s', rl_info)
189
180
 
190
- if self.media_import_method == 'post':
191
- # Send media to Label Studio by HTTP post.
192
- self.__update_tasks_by_post(t, col_mapping, existing_tasks, t_data_cols[0], t_rl_cols, rl_info)
193
- elif self.media_import_method == 'file':
194
- # Send media to Label Studio by local file transfer.
195
- self.__update_tasks_by_files(t, col_mapping, existing_tasks, t_data_cols, t_rl_cols, rl_info)
181
+ if len(t_data_cols) == 1 and t_col_types[t_data_cols[0]].is_media_type():
182
+ # With a single media column, we can post local files to Label Studio using
183
+ # the file transfer API.
184
+ self._create_tasks_by_post(t, col_mapping, existing_tasks, t_rl_cols, rl_info, t_data_cols[0])
196
185
  else:
197
- assert False
186
+ # Either a single non-media column or multiple columns. Either way, we can't
187
+ # use the file upload API and need to rely on externally accessible URLs for
188
+ # media columns.
189
+ self._create_tasks_by_urls(t, col_mapping, existing_tasks, t_data_cols, t_col_types, t_rl_cols, rl_info)
198
190
 
199
- def __update_tasks_by_post(
191
+ def _create_tasks_by_post(
200
192
  self,
201
193
  t: Table,
202
194
  col_mapping: dict[str, str],
203
195
  existing_tasks: dict[tuple, dict],
204
- media_col_name: str,
205
196
  t_rl_cols: list[str],
206
- rl_info: list['_RectangleLabel']
197
+ rl_info: list['_RectangleLabel'],
198
+ media_col_name: str
207
199
  ) -> None:
208
200
  is_stored = t[media_col_name].col.is_stored
209
201
  # If it's a stored column, we can use `localpath`
@@ -233,13 +225,13 @@ class LabelStudioProject(Remote):
233
225
  os.remove(file)
234
226
 
235
227
  # Update the task with `rowid` metadata
236
- self.project.update_task(task_id, meta={'rowid': row.rowid, 'v_min': row.v_min})
228
+ self.project.update_task(task_id, meta={'rowid': row.rowid})
237
229
 
238
230
  # Convert coco annotations to predictions
239
231
  coco_annotations = [row.vals[i] for i in rl_col_idxs]
240
232
  _logger.debug('`coco_annotations`: %s', coco_annotations)
241
233
  predictions = [
242
- self.__coco_to_predictions(
234
+ self._coco_to_predictions(
243
235
  coco_annotations[i], col_mapping[t_rl_cols[i]], rl_info[i], task_id=task_id
244
236
  )
245
237
  for i in range(len(coco_annotations))
@@ -250,95 +242,67 @@ class LabelStudioProject(Remote):
250
242
 
251
243
  print(f'Created {tasks_created} new task(s) in {self}.')
252
244
 
253
- self.__delete_stale_tasks(existing_tasks, row_ids_in_pxt, tasks_created)
245
+ self._delete_stale_tasks(existing_tasks, row_ids_in_pxt, tasks_created)
254
246
 
255
- def __update_tasks_by_files(
247
+ def _create_tasks_by_urls(
256
248
  self,
257
249
  t: Table,
258
250
  col_mapping: dict[str, str],
259
251
  existing_tasks: dict[tuple, dict],
260
252
  t_data_cols: list[str],
253
+ t_col_types: dict[str, pxt.ColumnType],
261
254
  t_rl_cols: list[str],
262
255
  rl_info: list['_RectangleLabel']
263
- ) -> None:
256
+ ):
257
+ # TODO(aaron-siegel): This is just a placeholder (implementation is not complete or tested!)
258
+ selection = [
259
+ t[col_name].fileurl if t_col_types[col_name].is_media_type() else t[col_name]
260
+ for col_name in t_data_cols
261
+ ]
264
262
  r_data_cols = [col_mapping[col_name] for col_name in t_data_cols]
265
- col_refs = {}
266
- for col_name in t_data_cols:
267
- if not t[col_name].col_type.is_media_type():
268
- # Not a media column; query the data directly
269
- col_refs[col_name] = t[col_name]
270
- elif t[col_name].col.stored_proxy:
271
- # Media column that has a stored proxy; use it. We have to give it a name,
272
- # since it's an anonymous column
273
- col_refs[f'{col_name}_proxy'] = ColumnRef(t[col_name].col.stored_proxy).localpath
274
- else:
275
- # Media column without a stored proxy; this means it's a stored computed column,
276
- # and we can just use the localpath
277
- col_refs[col_name] = t[col_name].localpath
278
-
279
- df = t.select(*[t[col] for col in t_rl_cols], **col_refs)
280
- rl_col_idxs: Optional[list[int]] = None # We have to wait until we begin iterating to populate these
281
- data_col_idxs: Optional[list[int]] = None
282
-
283
- row_ids_in_pxt: set[tuple] = set()
263
+ rows = t.select(*selection, *[t[col] for col in t_rl_cols])
264
+ new_rows = filter(lambda row: row.rowid not in existing_tasks, rows._exec())
284
265
  tasks_created = 0
285
- tasks_updated = 0
286
- page = []
287
-
288
- def create_task_info(row: DataRow) -> dict:
289
- data_vals = [row.vals[idx] for idx in data_col_idxs]
290
- coco_annotations = [row.vals[idx] for idx in rl_col_idxs]
291
- # For media columns, we need to transform the paths into Label Studio's bespoke path format
292
- for i in range(len(t_data_cols)):
293
- if t[t_data_cols[i]].col_type.is_media_type():
294
- data_vals[i] = self.__localpath_to_lspath(data_vals[i])
295
- predictions = [
296
- self.__coco_to_predictions(coco_annotations[i], col_mapping[t_rl_cols[i]], rl_info[i])
297
- for i in range(len(coco_annotations))
298
- ]
299
- return {
300
- 'data': dict(zip(r_data_cols, data_vals)),
301
- 'meta': {'rowid': row.rowid, 'v_min': row.v_min},
302
- 'predictions': predictions
303
- }
266
+ row_ids_in_pxt: set[tuple] = set()
304
267
 
305
- for row in df._exec():
306
- if rl_col_idxs is None:
307
- rl_col_idxs = [expr.slot_idx for expr in df._select_list_exprs[:len(t_rl_cols)]]
308
- data_col_idxs = [expr.slot_idx for expr in df._select_list_exprs[len(t_rl_cols):]]
309
- row_ids_in_pxt.add(row.rowid)
310
- if row.rowid in existing_tasks:
311
- # A task for this row already exists; see if it needs an update.
312
- # Get the v_min record from task metadata. Default to 0 if no v_min record is found
313
- old_v_min = int(existing_tasks[row.rowid]['meta'].get('v_min', 0))
314
- print(f'{old_v_min} {row.v_min}')
315
- if row.v_min > old_v_min:
316
- _logger.debug(f'Updating task for rowid {row.rowid} ({row.v_min} > {old_v_min}).')
317
- task_info = create_task_info(row)
318
- self.project.update_task(existing_tasks[row.rowid]['id'], **task_info)
319
- tasks_updated += 1
320
- else:
321
- # No task exists for this row; we need to create one.
322
- page.append(create_task_info(row))
323
- tasks_created += 1
324
- if len(page) == _PAGE_SIZE:
325
- self.project.import_tasks(page)
326
- page.clear()
268
+ for page in more_itertools.batched(new_rows, n=_PAGE_SIZE):
269
+ data_col_idxs = [expr.slot_idx for expr in rows._select_list_exprs[:len(t_data_cols)]]
270
+ rl_col_idxs = [expr.slot_idx for expr in rows._select_list_exprs[len(t_data_cols):]]
271
+ tasks = []
327
272
 
328
- if len(page) > 0:
329
- self.project.import_tasks(page)
273
+ for row in page:
274
+ row_ids_in_pxt.add(row.rowid)
275
+ data_vals = [row.vals[i] for i in data_col_idxs]
276
+ coco_annotations = [row.vals[i] for i in rl_col_idxs]
277
+ predictions = [
278
+ self._coco_to_predictions(coco_annotations[i], col_mapping[t_rl_cols[i]], rl_info[i])
279
+ for i in range(len(coco_annotations))
280
+ ]
330
281
 
331
- print(f'Created {tasks_created} new task(s) and updated {tasks_updated} existing task(s) in {self}.')
282
+ # Validate media columns
283
+ # TODO Support this if label studio is running on localhost?
284
+ for i in range(len(data_vals)):
285
+ if t[t_data_cols[i]].col_type.is_media_type() and data_vals[i].startswith("file://"):
286
+ raise excs.Error(
287
+ 'Cannot use locally stored media files in a `LabelStudioProject` with more than one '
288
+ 'data key. (This is a limitation of Label Studio; see warning here: '
289
+ 'https://labelstud.io/guide/tasks.html)'
290
+ )
332
291
 
333
- self.__delete_stale_tasks(existing_tasks, row_ids_in_pxt, tasks_created)
292
+ tasks.append({
293
+ 'data': zip(r_data_cols, data_vals),
294
+ 'meta': {'rowid': row.rowid},
295
+ 'predictions': predictions
296
+ })
334
297
 
335
- @classmethod
336
- def __localpath_to_lspath(self, localpath: str) -> str:
337
- assert isinstance(localpath, str)
338
- relpath = Path(localpath).relative_to(env.Env.get().home)
339
- return f'/data/local-files/?d={str(relpath)}'
298
+ self.project.import_tasks(tasks)
299
+ tasks_created += len(tasks)
300
+
301
+ print(f'Created {tasks_created} new task(s) in {self}.')
302
+
303
+ self._delete_stale_tasks(existing_tasks, row_ids_in_pxt, tasks_created)
340
304
 
341
- def __delete_stale_tasks(self, existing_tasks: dict[tuple, dict], row_ids_in_pxt: set[tuple], tasks_created: int):
305
+ def _delete_stale_tasks(self, existing_tasks: dict[tuple, dict], row_ids_in_pxt: set[tuple], tasks_created: int):
342
306
  tasks_to_delete = [
343
307
  task['id'] for rowid, task in existing_tasks.items()
344
308
  if rowid not in row_ids_in_pxt
@@ -350,42 +314,19 @@ class LabelStudioProject(Remote):
350
314
  self.project.delete_tasks(tasks_to_delete)
351
315
  print(f'Deleted {len(tasks_to_delete)} tasks(s) in {self} that are no longer present in Pixeltable.')
352
316
 
353
- def __update_table_from_tasks(self, t: Table, col_mapping: dict[str, str], tasks: dict[tuple, dict]) -> None:
354
- # `col_mapping` is guaranteed to be a one-to-one dict whose values are a superset
355
- # of `get_pull_columns`
356
- assert ANNOTATIONS_COLUMN in col_mapping.values()
357
- annotations_column = next(k for k, v in col_mapping.items() if v == ANNOTATIONS_COLUMN)
358
- updates = [
359
- {
360
- '_rowid': task['meta']['rowid'],
361
- # Replace [] by None to indicate no annotations. We do want to sync rows with no annotations,
362
- # in order to properly handle the scenario where existing annotations have been deleted in
363
- # Label Studio.
364
- annotations_column: task[ANNOTATIONS_COLUMN] if len(task[ANNOTATIONS_COLUMN]) > 0 else None
365
- }
366
- for task in tasks.values()
367
- ]
368
- if len(updates) > 0:
369
- _logger.info(
370
- f'Updating table `{t.get_name()}`, column `{annotations_column}` with {len(updates)} total annotations.'
371
- )
372
- t.batch_update(updates)
373
- annotations_count = sum(len(task[ANNOTATIONS_COLUMN]) for task in tasks.values())
374
- print(f'Synced {annotations_count} annotation(s) from {len(updates)} existing task(s) in {self}.')
375
-
376
317
  def to_dict(self) -> dict[str, Any]:
377
- return {'project_id': self.project_id, 'media_import_method': self.media_import_method}
318
+ return {'project_id': self.project_id}
378
319
 
379
320
  @classmethod
380
321
  def from_dict(cls, md: dict[str, Any]) -> 'LabelStudioProject':
381
- return LabelStudioProject(md['project_id'], md['media_import_method'])
322
+ return LabelStudioProject(md['project_id'])
382
323
 
383
324
  def __repr__(self) -> str:
384
325
  name = self.project.get_params()['title']
385
326
  return f'LabelStudioProject `{name}`'
386
327
 
387
328
  @classmethod
388
- def __parse_project_config(cls, xml_config: str) -> '_LabelStudioConfig':
329
+ def _parse_project_config(cls, xml_config: str) -> '_LabelStudioConfig':
389
330
  """
390
331
  Parses a Label Studio XML config, extracting the names and Pixeltable types of
391
332
  all input variables.
@@ -394,27 +335,28 @@ class LabelStudioProject(Remote):
394
335
  if root.tag.lower() != 'view':
395
336
  raise excs.Error('Root of Label Studio config must be a `View`')
396
337
  config = _LabelStudioConfig(
397
- data_keys=dict(cls.__parse_data_keys_config(root)),
398
- rectangle_labels=dict(cls.__parse_rectangle_labels_config(root))
338
+ data_keys=dict(cls._parse_data_keys_config(root)),
339
+ rectangle_labels=dict(cls._parse_rectangle_labels_config(root))
399
340
  )
400
341
  config.validate()
401
342
  return config
402
343
 
403
344
  @classmethod
404
- def __parse_data_keys_config(cls, root: ElementTree.Element) -> Iterator[tuple[str, '_DataKey']]:
345
+ def _parse_data_keys_config(cls, root: ElementTree.Element) -> Iterator[tuple[str, '_DataKey']]:
405
346
  for element in root:
406
347
  if 'value' in element.attrib and element.attrib['value'][0] == '$':
407
348
  remote_col_name = element.attrib['value'][1:]
408
- data_key_name = element.attrib.get('name')
349
+ if 'name' not in element.attrib:
350
+ raise excs.Error(f'Data key is missing `name` attribute: `{remote_col_name}`')
409
351
  element_type = _LS_TAG_MAP.get(element.tag.lower())
410
352
  if element_type is None:
411
353
  raise excs.Error(
412
354
  f'Unsupported Label Studio data type: `{element.tag}` (in data key `{remote_col_name}`)'
413
355
  )
414
- yield remote_col_name, _DataKey(data_key_name, element_type)
356
+ yield remote_col_name, _DataKey(element.attrib['name'], element_type)
415
357
 
416
358
  @classmethod
417
- def __parse_rectangle_labels_config(cls, root: ElementTree.Element) -> Iterator[tuple[str, '_RectangleLabel']]:
359
+ def _parse_rectangle_labels_config(cls, root: ElementTree.Element) -> Iterator[tuple[str, '_RectangleLabel']]:
418
360
  for element in root:
419
361
  if element.tag.lower() == 'rectanglelabels':
420
362
  name = element.attrib['name']
@@ -429,7 +371,7 @@ class LabelStudioProject(Remote):
429
371
  yield name, _RectangleLabel(to_name=to_name, labels=labels)
430
372
 
431
373
  @classmethod
432
- def __coco_to_predictions(
374
+ def _coco_to_predictions(
433
375
  cls,
434
376
  coco_annotations: dict[str, Any],
435
377
  from_name: str,
@@ -466,25 +408,10 @@ class LabelStudioProject(Remote):
466
408
  else:
467
409
  return {'result': result}
468
410
 
469
- def delete(self) -> None:
470
- """
471
- Deletes this Label Studio project. This will remove all data and annotations
472
- associated with this project in Label Studio.
473
- """
474
- title = self.project_title
475
- _label_studio_client().delete_project(self.project_id)
476
- print(f'Deleted Label Studio project: {title}')
477
-
478
- def __eq__(self, other) -> bool:
479
- return isinstance(other, LabelStudioProject) and self.project_id == other.project_id
480
-
481
- def __hash__(self) -> int:
482
- return hash(self.project_id)
483
-
484
411
 
485
412
  @dataclass(frozen=True)
486
413
  class _DataKey:
487
- name: Optional[str] # The 'name' attribute of the data key; may differ from the field name
414
+ name: str # The 'name' attribute of the data key; may differ from the field name
488
415
  column_type: pxt.ColumnType
489
416
 
490
417
 
@@ -500,7 +427,7 @@ class _LabelStudioConfig:
500
427
  rectangle_labels: dict[str, _RectangleLabel]
501
428
 
502
429
  def validate(self) -> None:
503
- data_key_names = set(key.name for key in self.data_keys.values() if key is not None)
430
+ data_key_names = set(key.name for key in self.data_keys.values())
504
431
  for name, rl in self.rectangle_labels.items():
505
432
  if rl.to_name not in data_key_names:
506
433
  raise excs.Error(
@@ -510,7 +437,7 @@ class _LabelStudioConfig:
510
437
 
511
438
  @property
512
439
  def export_columns(self) -> dict[str, pxt.ColumnType]:
513
- data_key_cols = {key_id: key_info.column_type for key_id, key_info in self.data_keys.items()}
440
+ data_key_cols = {key_name: key_info.column_type for key_name, key_info in self.data_keys.items()}
514
441
  rl_cols = {name: pxt.JsonType() for name in self.rectangle_labels.keys()}
515
442
  return {**data_key_cols, **rl_cols}
516
443
 
@@ -518,7 +445,6 @@ class _LabelStudioConfig:
518
445
  ANNOTATIONS_COLUMN = 'annotations'
519
446
  _PAGE_SIZE = 100 # This is the default used in the LS SDK
520
447
  _LS_TAG_MAP = {
521
- 'header': pxt.StringType(),
522
448
  'text': pxt.StringType(),
523
449
  'image': pxt.ImageType(),
524
450
  'video': pxt.VideoType(),
@@ -45,12 +45,6 @@ class Remote(abc.ABC):
45
45
  import_data: If `True`, data from this table will be imported from the remote during synchronization.
46
46
  """
47
47
 
48
- @abc.abstractmethod
49
- def delete(self) -> None:
50
- """
51
- Deletes this `Remote`.
52
- """
53
-
54
48
  @abc.abstractmethod
55
49
  def to_dict(self) -> dict[str, Any]: ...
56
50
 
@@ -62,11 +56,9 @@ class Remote(abc.ABC):
62
56
  # A remote that cannot be synced, used mainly for testing.
63
57
  class MockRemote(Remote):
64
58
 
65
- def __init__(self, name: str, export_cols: dict[str, ts.ColumnType], import_cols: dict[str, ts.ColumnType]):
66
- self.name = name
59
+ def __init__(self, export_cols: dict[str, ts.ColumnType], import_cols: dict[str, ts.ColumnType]):
67
60
  self.export_cols = export_cols
68
61
  self.import_cols = import_cols
69
- self.__is_deleted = False
70
62
 
71
63
  def get_export_columns(self) -> dict[str, ts.ColumnType]:
72
64
  return self.export_cols
@@ -77,17 +69,9 @@ class MockRemote(Remote):
77
69
  def sync(self, t: Table, col_mapping: dict[str, str], export_data: bool, import_data: bool) -> NotImplemented:
78
70
  raise NotImplementedError()
79
71
 
80
- def delete(self) -> None:
81
- self.__is_deleted = True
82
-
83
- @property
84
- def is_deleted(self) -> bool:
85
- return self.__is_deleted
86
-
87
72
  def to_dict(self) -> dict[str, Any]:
88
73
  return {
89
74
  # TODO Change in next schema version
90
- 'name': self.name,
91
75
  'push_cols': {k: v.as_dict() for k, v in self.export_cols.items()},
92
76
  'pull_cols': {k: v.as_dict() for k, v in self.import_cols.items()}
93
77
  }
@@ -95,19 +79,7 @@ class MockRemote(Remote):
95
79
  @classmethod
96
80
  def from_dict(cls, md: dict[str, Any]) -> Remote:
97
81
  return cls(
98
- name=md['name'],
99
82
  # TODO Change in next schema version
100
- export_cols={k: ts.ColumnType.from_dict(v) for k, v in md['push_cols'].items()},
101
- import_cols={k: ts.ColumnType.from_dict(v) for k, v in md['pull_cols'].items()}
83
+ {k: ts.ColumnType.from_dict(v) for k, v in md['push_cols'].items()},
84
+ {k: ts.ColumnType.from_dict(v) for k, v in md['pull_cols'].items()}
102
85
  )
103
-
104
- def __eq__(self, other: Any) -> bool:
105
- if not isinstance(other, MockRemote):
106
- return False
107
- return self.name == other.name
108
-
109
- def __hash__(self) -> int:
110
- return hash(self.name)
111
-
112
- def __repr__(self) -> str:
113
- return f'MockRemote `{self.name}`'
pixeltable/env.py CHANGED
@@ -37,16 +37,12 @@ class Env:
37
37
  @classmethod
38
38
  def get(cls) -> Env:
39
39
  if cls._instance is None:
40
- cls._init_env()
40
+ env = Env()
41
+ env._set_up()
42
+ env._upgrade_metadata()
43
+ cls._instance = env
41
44
  return cls._instance
42
45
 
43
- @classmethod
44
- def _init_env(cls, reinit_db: bool = False) -> None:
45
- env = Env()
46
- env._set_up(reinit_db=reinit_db)
47
- env._upgrade_metadata()
48
- cls._instance = env
49
-
50
46
  def __init__(self):
51
47
  self._home: Optional[Path] = None
52
48
  self._media_dir: Optional[Path] = None # computed media files
pixeltable/io/globals.py CHANGED
@@ -1,4 +1,4 @@
1
- from typing import Any, Optional, Literal
1
+ from typing import Any, Optional
2
2
 
3
3
  import pixeltable as pxt
4
4
  from pixeltable import Table
@@ -9,7 +9,6 @@ def create_label_studio_project(
9
9
  label_config: str,
10
10
  col_mapping: Optional[dict[str, str]] = None,
11
11
  title: Optional[str] = None,
12
- media_import_method: Literal['post', 'file'] = 'file',
13
12
  sync_immediately: bool = True,
14
13
  **kwargs: Any
15
14
  ) -> None:
@@ -43,7 +42,7 @@ def create_label_studio_project(
43
42
  """
44
43
  from pixeltable.datatransfer.label_studio import LabelStudioProject, ANNOTATIONS_COLUMN
45
44
 
46
- ls_project = LabelStudioProject.create(title or t.get_name(), label_config, media_import_method, **kwargs)
45
+ ls_project = LabelStudioProject.create(title or t.get_name(), label_config, **kwargs)
47
46
 
48
47
  # Create a column to hold the annotations, if one does not yet exist.
49
48
  if col_mapping is not None and ANNOTATIONS_COLUMN in col_mapping.values():
@@ -10,7 +10,7 @@ import sqlalchemy.orm as orm
10
10
  from .schema import SystemInfo, SystemInfoMd
11
11
 
12
12
  # current version of the metadata; this is incremented whenever the metadata schema changes
13
- VERSION = 16
13
+ VERSION = 15
14
14
 
15
15
 
16
16
  def create_system_info(engine: sql.engine.Engine) -> None:
@@ -92,9 +92,6 @@ class ColumnMd:
92
92
  # if True, the column is present in the stored table
93
93
  stored: Optional[bool]
94
94
 
95
- # if specified, the column is a stored proxy of another column
96
- proxy_base: Optional[int]
97
-
98
95
 
99
96
  @dataclasses.dataclass
100
97
  class IndexMd:
@@ -30,8 +30,6 @@ class Dumper:
30
30
  os.environ['PIXELTABLE_DB'] = db_name
31
31
  os.environ['PIXELTABLE_PGDATA'] = str(shared_home / 'pgdata')
32
32
 
33
- Env._init_env(reinit_db=True)
34
-
35
33
  Env.get().configure_logging(level=logging.DEBUG, to_stdout=True)
36
34
 
37
35
  def dump_db(self) -> None:
@@ -166,15 +164,15 @@ class Dumper:
166
164
 
167
165
  # Add remotes
168
166
  from pixeltable.datatransfer.remote import MockRemote
169
- v.link(
170
- MockRemote('remote', {'int_field': pxt.IntType()}, {'str_field': pxt.StringType()}),
167
+ v.link_remote(
168
+ MockRemote({'int_field': pxt.IntType()}, {'str_field': pxt.StringType()}),
171
169
  col_mapping={'test_udf': 'int_field', 'c1': 'str_field'}
172
170
  )
173
171
  # We're just trying to test metadata here, so reach "under the covers" and link a fake
174
172
  # Label Studio project without validation (so we don't need a real Label Studio server)
175
173
  from pixeltable.datatransfer.label_studio import LabelStudioProject
176
174
  v.tbl_version_path.tbl_version.link(
177
- LabelStudioProject(4171780, media_import_method='file'),
175
+ LabelStudioProject(4171780),
178
176
  col_mapping={'str_format': 'str_format'}
179
177
  )
180
178
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: pixeltable
3
- Version: 0.2.7
3
+ Version: 0.2.8
4
4
  Summary: Pixeltable: The Multimodal AI Data Plane
5
5
  Author: Marcel Kornacker
6
6
  Author-email: marcelk@gmail.com
@@ -21,7 +21,7 @@ Requires-Dist: more-itertools (>=10.2,<11.0)
21
21
  Requires-Dist: numpy (>=1.25)
22
22
  Requires-Dist: opencv-python-headless (>=4.7.0.68,<5.0.0.0)
23
23
  Requires-Dist: pandas (>=2.0,<3.0)
24
- Requires-Dist: pgserver (==0.1.3)
24
+ Requires-Dist: pgserver (==0.1.4)
25
25
  Requires-Dist: pgvector (>=0.2.1,<0.3.0)
26
26
  Requires-Dist: pillow (>=9.3.0)
27
27
  Requires-Dist: psutil (>=5.9.5,<6.0.0)
@@ -1,8 +1,8 @@
1
1
  pixeltable/__init__.py,sha256=DzVevwic1g8Tp4QYrcCIzPFFwPZ66KGWeYprlRC9Uwc,1142
2
- pixeltable/__version__.py,sha256=QpFd23iUxkd6iv60M7Xgh5GNlwHpgZV9y3kI5aNgy58,112
2
+ pixeltable/__version__.py,sha256=AyN0bhxu_oExUztSHc2d8uAemad8-aDrT7QgYAM_JCs,112
3
3
  pixeltable/catalog/__init__.py,sha256=E41bxaPeQIcgRYzTWc2vkDOboQhRymrJf4IcHQO7o_8,453
4
4
  pixeltable/catalog/catalog.py,sha256=0TYWB1R6YBp9qCkWF7kCcX2Yw70UuburKKIemv5L1Js,7908
5
- pixeltable/catalog/column.py,sha256=EW2nDHDcHiCSYN6Gboh5D39c0_NqheWsxSl62IrFWgg,8584
5
+ pixeltable/catalog/column.py,sha256=J8irt6PfT1ofC3wVKi-hDGjNUZ1Ceq2qzbmZyEw-ddA,8335
6
6
  pixeltable/catalog/dir.py,sha256=pG1nMpG123POo6WMSHhAmnwXOQ26uUJfUcbzL-Jb4ws,919
7
7
  pixeltable/catalog/globals.py,sha256=yLEGNbsSnLzjWNHVJacfjA9hbw13Q6QXLOSCRmdTlq0,943
8
8
  pixeltable/catalog/insertable_table.py,sha256=vMa_XUWT3DG3ZlxkScDZ9-mYNw31G8XB4ODUlxXt7NU,8927
@@ -10,15 +10,15 @@ pixeltable/catalog/named_function.py,sha256=a96gnKtx-nz5_MzDIiD4t4Hxqdjkg9ZtijRQ
10
10
  pixeltable/catalog/path.py,sha256=QgccEi_QOfaKt8YsR2zLtd_z7z7QQkU_1kprJFi2SPQ,1677
11
11
  pixeltable/catalog/path_dict.py,sha256=xfvxg1Ze5jZCARUGASF2DRbQPh7pRVTYhuJ_u82gYUo,5941
12
12
  pixeltable/catalog/schema_object.py,sha256=-UxmPLbuEBqJiJi_GGRbFdr7arAFxTqs4bt6TFmSt3M,1059
13
- pixeltable/catalog/table.py,sha256=FbLDJGFuvHtLT7rFTbFaayNKhmK1kzcr4lzdqyRjGBg,39507
14
- pixeltable/catalog/table_version.py,sha256=U-GWWSAvVvNinEDSAvN-m2dOO8SviuN1py442OO0hlo,57459
15
- pixeltable/catalog/table_version_path.py,sha256=P-HL9H1UIiJrKfa0wIRsESGrG3f815nGvHOEpVLJ3no,5718
13
+ pixeltable/catalog/table.py,sha256=50UOSt7zltvthygiiXbgP-XMOiYUpcIaEXiM1uJFcaA,38220
14
+ pixeltable/catalog/table_version.py,sha256=BeP-4Io6euT6hOQXVJwpZNjZ6ZNehqOH6S98zvQsU9E,52751
15
+ pixeltable/catalog/table_version_path.py,sha256=2Ofzd0n36flcNm86KWwIWDBAfgnV5Z-FxAHdMSPgMLc,5482
16
16
  pixeltable/catalog/view.py,sha256=BIL3s4DV3tWbOcqtqnhn46B2UvLaBhppfJUlNEt5nec,9734
17
17
  pixeltable/dataframe.py,sha256=lzSzR7mi9C4BO39fNXYo64k3KxILyG_Z7eET6DXTgKY,31922
18
18
  pixeltable/datatransfer/__init__.py,sha256=cRWdQ_LUNkJgmionI1RrYD71A1CSI92P4o8_XXOnFmU,27
19
- pixeltable/datatransfer/label_studio.py,sha256=7E70MdgbAKW4AkZcR0uBuYXjAlUzruYgsOIR7FEbZY4,23802
20
- pixeltable/datatransfer/remote.py,sha256=PaxdVggZMbHyG9FROfKXQxTmGAa8PiVhETl7lp5CNYg,3801
21
- pixeltable/env.py,sha256=6IEbaG7a-Jz_hJ7nqA32wPmejF9Njl1UIzadubq0SjI,21487
19
+ pixeltable/datatransfer/label_studio.py,sha256=3DLsqfIUNVG9xVRVUU4NayLuC-xUTIM1Sz92kGvrTUc,19643
20
+ pixeltable/datatransfer/remote.py,sha256=t-VeDIq62mX67xBoHLi8voa4V5XqMkr-8UZ-8DhIgk0,3100
21
+ pixeltable/env.py,sha256=OEZv6NS8Z41rdCx73Md5j78ImnKaZf3YhdAexFJR7gw,21381
22
22
  pixeltable/exceptions.py,sha256=MSP9zeL0AmXT93XqjdvgGN4rzno1_KRrGriq6hpemnw,376
23
23
  pixeltable/exec/__init__.py,sha256=RK7SKvrQ7Ky3G_LXDP4Bf7lHmMM_uYZl8dJaZYs0FjY,454
24
24
  pixeltable/exec/aggregation_node.py,sha256=cf6rVAgrGh_uaMrCIgXJIwQTmbcboJlnrH_MmPIQSd0,3321
@@ -87,7 +87,7 @@ pixeltable/index/base.py,sha256=YAQ5Dz1mfI0dfu9rxWHWroE8TjB90yKfPtXAzoADq38,1568
87
87
  pixeltable/index/btree.py,sha256=NE4GYhcJWYJhdKyeHI0sQBlFvUaIgGOF9KLyCZOfFjE,1822
88
88
  pixeltable/index/embedding_index.py,sha256=AYphEggN-0B4GNrm4nMmi46CEtrQw5tguyk67BK2sWo,7627
89
89
  pixeltable/io/__init__.py,sha256=Io5ZLrcvRPeqRQwnU2iODvWMqkYroWErkbp7dLxE4Kk,197
90
- pixeltable/io/globals.py,sha256=5EdMVoRVvk1AeLaq-P4_LNdNlEDVfsl9bRTFuoTgexQ,2735
90
+ pixeltable/io/globals.py,sha256=ArnuWVhdKHT9ds84PZBl0-fszmEu-W62P4Su21c9oN4,2642
91
91
  pixeltable/io/hf_datasets.py,sha256=h5M1NkXOvEU8kaeT3AON1A18Vmhnc1lVo5a3TZ5AAic,8004
92
92
  pixeltable/io/pandas.py,sha256=cDHUDW2CGiBbsEJB9zE5vkXopTKxDdI-CZxNcp0OnIk,6478
93
93
  pixeltable/io/parquet.py,sha256=Z1b92gsPeCBf4P9_jgWWHAEHtu51nhuC8nSJgoKiywQ,8150
@@ -95,17 +95,15 @@ pixeltable/iterators/__init__.py,sha256=kokLguXBY_nxBTqUiXZVvCxTv-vGsX4cK8tgIbsW
95
95
  pixeltable/iterators/base.py,sha256=cnEh1tNN2JAxRzrLTg3dhun3N1oNQ8vifCm6ts3_UiE,1687
96
96
  pixeltable/iterators/document.py,sha256=netSCJatG8NcgbHZ69BvQVICdAorQlYi8OlcpqwLQD4,19436
97
97
  pixeltable/iterators/video.py,sha256=xtxODL1AfZwTfHVzWekhTCLA8gwTJIvJFdxC0KecD9Q,3836
98
- pixeltable/metadata/__init__.py,sha256=k3aptna__92VIJ4C6bIHLQrfYE1MOpBCKKMJ_b03nxA,2228
98
+ pixeltable/metadata/__init__.py,sha256=beGPpClpNaN7seM_AeKli5R33TSIkb7_mIBWoExT_5M,2228
99
99
  pixeltable/metadata/converters/convert_10.py,sha256=0mSGCn7vqtef63riPi9msUaaUvsSQIj-NFj9QFDYPdA,733
100
100
  pixeltable/metadata/converters/convert_12.py,sha256=g9rHTcKlDQZbM3_k4eBv0FBdWmQXHWCnMwx1_l6KpMI,107
101
101
  pixeltable/metadata/converters/convert_13.py,sha256=FEgOH5PKf05xVoCaioDDDHOSuoWPyBzodojmsSMMZ5U,1366
102
102
  pixeltable/metadata/converters/convert_14.py,sha256=UAWHEipZ-NrQtI5zZN1u9C5AD24ZpYXsdpC3Te0t-qE,402
103
- pixeltable/metadata/converters/convert_15.py,sha256=WFpIOMRZDgb-_bh1V07EbLv5Hshh1aArdavSnCsUwoQ,1004
104
- pixeltable/metadata/converters/util.py,sha256=97efM1Hx1qKMIOaEI4bjmX93POie7XvBEF_HJhFhzE0,2400
105
- pixeltable/metadata/schema.py,sha256=IdvV_UIyQqQL25cf36Rz6dMhHRWXvsApKt_uFSHW5kk,8497
103
+ pixeltable/metadata/schema.py,sha256=ZYBbt_jESRrX7BAx35xKY1CpIgRuJnd2LJYo4MrPnn0,8399
106
104
  pixeltable/plan.py,sha256=A_kPsX3bjLyfYbeQ6eCgDbrb_Oldk4w8HdFRqRSDpPY,32653
107
105
  pixeltable/store.py,sha256=foQe9y8rRbl35f3naL7rbYVrD8LO00cmD53vWP2A9XI,18850
108
- pixeltable/tool/create_test_db_dump.py,sha256=W5XNaKpi_-YnOCDVYjNs3g_p2CnRQp0xMtIwyu2rP14,7607
106
+ pixeltable/tool/create_test_db_dump.py,sha256=17MdBqsSNj7j61w0Re9pS4aDIEdML_4hnE-uZJcEW4I,7537
109
107
  pixeltable/tool/create_test_video.py,sha256=OLfccymYReIpzE8osZn4rQvLXxxiPC_l0vc06U74hVM,2899
110
108
  pixeltable/type_system.py,sha256=nljZs4O_dsVFMs4aB3z7Szc9LgFtl5eOuloxJkk7tPE,29503
111
109
  pixeltable/utils/__init__.py,sha256=UYlrf6TIWJT0g-Hac0b34-dEk478B5Qx8dGco34YlIk,439
@@ -120,7 +118,7 @@ pixeltable/utils/pytorch.py,sha256=BR4tgfUWw-2rwWTOgzXj5qdMBpe1Arpp5SK4ax6jjpk,3
120
118
  pixeltable/utils/s3.py,sha256=rkanuhk9DWvSfmbOLQW1j1Iov4sl2KhxGGKN-AJ8LSE,432
121
119
  pixeltable/utils/sql.py,sha256=5n5_OmXAGtqFdL6z5XvgnU-vlx6Ba6f1WJrO1ZwUle8,765
122
120
  pixeltable/utils/transactional_directory.py,sha256=UGzCrGtLR3hEEf8sYGuWBzLVFAEQml3vdIavigWeTBM,1349
123
- pixeltable-0.2.7.dist-info/LICENSE,sha256=0UNMmwuqWPC0xDY1NWMm4uNJ2_MyA1pnTNRgQTvuBiQ,746
124
- pixeltable-0.2.7.dist-info/METADATA,sha256=umL9Z-G7tgcumv96Wu89_5i2S_PXLEb9wuvGLoSo2SQ,9806
125
- pixeltable-0.2.7.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
126
- pixeltable-0.2.7.dist-info/RECORD,,
121
+ pixeltable-0.2.8.dist-info/LICENSE,sha256=0UNMmwuqWPC0xDY1NWMm4uNJ2_MyA1pnTNRgQTvuBiQ,746
122
+ pixeltable-0.2.8.dist-info/METADATA,sha256=VKFyaYflvsFwUgiC1Y2iukgPHGI4W9sQpnLmUCRdMiQ,9806
123
+ pixeltable-0.2.8.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
124
+ pixeltable-0.2.8.dist-info/RECORD,,
@@ -1,29 +0,0 @@
1
- import uuid
2
-
3
- import sqlalchemy as sql
4
-
5
- from pixeltable.metadata import register_converter
6
- from pixeltable.metadata.converters.util import convert_table_md
7
-
8
-
9
- def convert_15(engine: sql.engine.Engine) -> None:
10
- convert_table_md(engine, column_md_updater=update_column_md, remote_md_updater=update_remote_md)
11
-
12
-
13
- def update_column_md(column_md: dict) -> None:
14
- column_md['proxy_base'] = None
15
-
16
-
17
- def update_remote_md(remote_md: dict) -> None:
18
- remote_md['class'] = f'{remote_md["module"]}.{remote_md["class"]}'
19
- del remote_md['module']
20
- if remote_md['class'] == 'pixeltable.datatransfer.remote.MockRemote':
21
- remote_md['remote_md']['name'] = f'remote_{uuid.uuid4()}'
22
- elif remote_md['class'] == 'pixeltable.datatransfer.label_studio.LabelStudioProject':
23
- # 'post' is the media_import_method for legacy LabelStudioProject remotes
24
- remote_md['remote_md']['media_import_method'] = 'post'
25
- else:
26
- assert False, remote_md['class']
27
-
28
-
29
- register_converter(15, convert_15)
@@ -1,63 +0,0 @@
1
- import copy
2
- import logging
3
- from typing import Any, Callable, Optional
4
-
5
- import sqlalchemy as sql
6
-
7
- from pixeltable.metadata.schema import Table
8
-
9
- __logger = logging.getLogger('pixeltable')
10
-
11
-
12
- def convert_table_md(
13
- engine: sql.engine.Engine,
14
- column_md_updater: Optional[Callable[[dict], None]] = None,
15
- remote_md_updater: Optional[Callable[[dict], None]] = None,
16
- substitution_fn: Optional[Callable[[Any, Any], Optional[tuple[Any, Any]]]] = None
17
- ) -> None:
18
- with engine.begin() as conn:
19
- for row in conn.execute(sql.select(Table)):
20
- id = row[0]
21
- table_md = row[2]
22
- assert isinstance(table_md, dict)
23
- updated_table_md = copy.deepcopy(table_md)
24
- if column_md_updater is not None:
25
- __update_column_md(updated_table_md, column_md_updater)
26
- if remote_md_updater is not None:
27
- __update_remote_md(updated_table_md, remote_md_updater)
28
- if substitution_fn is not None:
29
- updated_table_md = __substitute_md_rec(updated_table_md, substitution_fn)
30
- if updated_table_md != table_md:
31
- __logger.info(f'Updating schema for table: {id}')
32
- conn.execute(sql.update(Table).where(Table.id == id).values(md=updated_table_md))
33
-
34
-
35
- def __update_column_md(table_md: dict, column_md_updater: Callable[[dict], None]) -> None:
36
- columns_md = table_md['column_md']
37
- assert isinstance(columns_md, dict)
38
- for column_md in columns_md.values():
39
- column_md_updater(column_md)
40
-
41
-
42
- def __update_remote_md(table_md: dict, remote_md_updater: Callable[[dict], None]) -> None:
43
- remotes_md = table_md['remotes']
44
- assert isinstance(remotes_md, list)
45
- for remote_md in remotes_md:
46
- remote_md_updater(remote_md)
47
-
48
-
49
- def __substitute_md_rec(md: Any, substitution_fn: Callable[[Any, Any], Optional[tuple[Any, Any]]]) -> Any:
50
- if isinstance(md, dict):
51
- updated_md = {}
52
- for k, v in md.items():
53
- substitute = substitution_fn(k, v)
54
- if substitute is not None:
55
- updated_k, updated_v = substitute
56
- updated_md[updated_k] = updated_v
57
- else:
58
- updated_md[k] = __substitute_md_rec(v, substitution_fn)
59
- return updated_md
60
- elif isinstance(md, list):
61
- return [__substitute_md_rec(v, substitution_fn) for v in md]
62
- else:
63
- return md