pixeltable 0.3.14__py3-none-any.whl → 0.5.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pixeltable/__init__.py +42 -8
- pixeltable/{dataframe.py → _query.py} +470 -206
- pixeltable/_version.py +1 -0
- pixeltable/catalog/__init__.py +5 -4
- pixeltable/catalog/catalog.py +1785 -432
- pixeltable/catalog/column.py +190 -113
- pixeltable/catalog/dir.py +2 -4
- pixeltable/catalog/globals.py +19 -46
- pixeltable/catalog/insertable_table.py +191 -98
- pixeltable/catalog/path.py +63 -23
- pixeltable/catalog/schema_object.py +11 -15
- pixeltable/catalog/table.py +843 -436
- pixeltable/catalog/table_metadata.py +103 -0
- pixeltable/catalog/table_version.py +978 -657
- pixeltable/catalog/table_version_handle.py +72 -16
- pixeltable/catalog/table_version_path.py +112 -43
- pixeltable/catalog/tbl_ops.py +53 -0
- pixeltable/catalog/update_status.py +191 -0
- pixeltable/catalog/view.py +134 -90
- pixeltable/config.py +134 -22
- pixeltable/env.py +471 -157
- pixeltable/exceptions.py +6 -0
- pixeltable/exec/__init__.py +4 -1
- pixeltable/exec/aggregation_node.py +7 -8
- pixeltable/exec/cache_prefetch_node.py +83 -110
- pixeltable/exec/cell_materialization_node.py +268 -0
- pixeltable/exec/cell_reconstruction_node.py +168 -0
- pixeltable/exec/component_iteration_node.py +4 -3
- pixeltable/exec/data_row_batch.py +8 -65
- pixeltable/exec/exec_context.py +16 -4
- pixeltable/exec/exec_node.py +13 -36
- pixeltable/exec/expr_eval/evaluators.py +11 -7
- pixeltable/exec/expr_eval/expr_eval_node.py +27 -12
- pixeltable/exec/expr_eval/globals.py +8 -5
- pixeltable/exec/expr_eval/row_buffer.py +1 -2
- pixeltable/exec/expr_eval/schedulers.py +106 -56
- pixeltable/exec/globals.py +35 -0
- pixeltable/exec/in_memory_data_node.py +19 -19
- pixeltable/exec/object_store_save_node.py +293 -0
- pixeltable/exec/row_update_node.py +16 -9
- pixeltable/exec/sql_node.py +351 -84
- pixeltable/exprs/__init__.py +1 -1
- pixeltable/exprs/arithmetic_expr.py +27 -22
- pixeltable/exprs/array_slice.py +3 -3
- pixeltable/exprs/column_property_ref.py +36 -23
- pixeltable/exprs/column_ref.py +213 -89
- pixeltable/exprs/comparison.py +5 -5
- pixeltable/exprs/compound_predicate.py +5 -4
- pixeltable/exprs/data_row.py +164 -54
- pixeltable/exprs/expr.py +70 -44
- pixeltable/exprs/expr_dict.py +3 -3
- pixeltable/exprs/expr_set.py +17 -10
- pixeltable/exprs/function_call.py +100 -40
- pixeltable/exprs/globals.py +2 -2
- pixeltable/exprs/in_predicate.py +4 -4
- pixeltable/exprs/inline_expr.py +18 -32
- pixeltable/exprs/is_null.py +7 -3
- pixeltable/exprs/json_mapper.py +8 -8
- pixeltable/exprs/json_path.py +56 -22
- pixeltable/exprs/literal.py +27 -5
- pixeltable/exprs/method_ref.py +2 -2
- pixeltable/exprs/object_ref.py +2 -2
- pixeltable/exprs/row_builder.py +167 -67
- pixeltable/exprs/rowid_ref.py +25 -10
- pixeltable/exprs/similarity_expr.py +58 -40
- pixeltable/exprs/sql_element_cache.py +4 -4
- pixeltable/exprs/string_op.py +5 -5
- pixeltable/exprs/type_cast.py +3 -5
- pixeltable/func/__init__.py +1 -0
- pixeltable/func/aggregate_function.py +8 -8
- pixeltable/func/callable_function.py +9 -9
- pixeltable/func/expr_template_function.py +17 -11
- pixeltable/func/function.py +18 -20
- pixeltable/func/function_registry.py +6 -7
- pixeltable/func/globals.py +2 -3
- pixeltable/func/mcp.py +74 -0
- pixeltable/func/query_template_function.py +29 -27
- pixeltable/func/signature.py +46 -19
- pixeltable/func/tools.py +31 -13
- pixeltable/func/udf.py +18 -20
- pixeltable/functions/__init__.py +16 -0
- pixeltable/functions/anthropic.py +123 -77
- pixeltable/functions/audio.py +147 -10
- pixeltable/functions/bedrock.py +13 -6
- pixeltable/functions/date.py +7 -4
- pixeltable/functions/deepseek.py +35 -43
- pixeltable/functions/document.py +81 -0
- pixeltable/functions/fal.py +76 -0
- pixeltable/functions/fireworks.py +11 -20
- pixeltable/functions/gemini.py +195 -39
- pixeltable/functions/globals.py +142 -14
- pixeltable/functions/groq.py +108 -0
- pixeltable/functions/huggingface.py +1056 -24
- pixeltable/functions/image.py +115 -57
- pixeltable/functions/json.py +1 -1
- pixeltable/functions/llama_cpp.py +28 -13
- pixeltable/functions/math.py +67 -5
- pixeltable/functions/mistralai.py +18 -55
- pixeltable/functions/net.py +70 -0
- pixeltable/functions/ollama.py +20 -13
- pixeltable/functions/openai.py +240 -226
- pixeltable/functions/openrouter.py +143 -0
- pixeltable/functions/replicate.py +4 -4
- pixeltable/functions/reve.py +250 -0
- pixeltable/functions/string.py +239 -69
- pixeltable/functions/timestamp.py +16 -16
- pixeltable/functions/together.py +24 -84
- pixeltable/functions/twelvelabs.py +188 -0
- pixeltable/functions/util.py +6 -1
- pixeltable/functions/uuid.py +30 -0
- pixeltable/functions/video.py +1515 -107
- pixeltable/functions/vision.py +8 -8
- pixeltable/functions/voyageai.py +289 -0
- pixeltable/functions/whisper.py +16 -8
- pixeltable/functions/whisperx.py +179 -0
- pixeltable/{ext/functions → functions}/yolox.py +2 -4
- pixeltable/globals.py +362 -115
- pixeltable/index/base.py +17 -21
- pixeltable/index/btree.py +28 -22
- pixeltable/index/embedding_index.py +100 -118
- pixeltable/io/__init__.py +4 -2
- pixeltable/io/datarows.py +8 -7
- pixeltable/io/external_store.py +56 -105
- pixeltable/io/fiftyone.py +13 -13
- pixeltable/io/globals.py +31 -30
- pixeltable/io/hf_datasets.py +61 -16
- pixeltable/io/label_studio.py +74 -70
- pixeltable/io/lancedb.py +3 -0
- pixeltable/io/pandas.py +21 -12
- pixeltable/io/parquet.py +25 -105
- pixeltable/io/table_data_conduit.py +250 -123
- pixeltable/io/utils.py +4 -4
- pixeltable/iterators/__init__.py +2 -1
- pixeltable/iterators/audio.py +26 -25
- pixeltable/iterators/base.py +9 -3
- pixeltable/iterators/document.py +112 -78
- pixeltable/iterators/image.py +12 -15
- pixeltable/iterators/string.py +11 -4
- pixeltable/iterators/video.py +523 -120
- pixeltable/metadata/__init__.py +14 -3
- pixeltable/metadata/converters/convert_13.py +2 -2
- pixeltable/metadata/converters/convert_18.py +2 -2
- pixeltable/metadata/converters/convert_19.py +2 -2
- pixeltable/metadata/converters/convert_20.py +2 -2
- pixeltable/metadata/converters/convert_21.py +2 -2
- pixeltable/metadata/converters/convert_22.py +2 -2
- pixeltable/metadata/converters/convert_24.py +2 -2
- pixeltable/metadata/converters/convert_25.py +2 -2
- pixeltable/metadata/converters/convert_26.py +2 -2
- pixeltable/metadata/converters/convert_29.py +4 -4
- pixeltable/metadata/converters/convert_30.py +34 -21
- pixeltable/metadata/converters/convert_34.py +2 -2
- pixeltable/metadata/converters/convert_35.py +9 -0
- pixeltable/metadata/converters/convert_36.py +38 -0
- pixeltable/metadata/converters/convert_37.py +15 -0
- pixeltable/metadata/converters/convert_38.py +39 -0
- pixeltable/metadata/converters/convert_39.py +124 -0
- pixeltable/metadata/converters/convert_40.py +73 -0
- pixeltable/metadata/converters/convert_41.py +12 -0
- pixeltable/metadata/converters/convert_42.py +9 -0
- pixeltable/metadata/converters/convert_43.py +44 -0
- pixeltable/metadata/converters/util.py +20 -31
- pixeltable/metadata/notes.py +9 -0
- pixeltable/metadata/schema.py +140 -53
- pixeltable/metadata/utils.py +74 -0
- pixeltable/mypy/__init__.py +3 -0
- pixeltable/mypy/mypy_plugin.py +123 -0
- pixeltable/plan.py +382 -115
- pixeltable/share/__init__.py +1 -1
- pixeltable/share/packager.py +547 -83
- pixeltable/share/protocol/__init__.py +33 -0
- pixeltable/share/protocol/common.py +165 -0
- pixeltable/share/protocol/operation_types.py +33 -0
- pixeltable/share/protocol/replica.py +119 -0
- pixeltable/share/publish.py +257 -59
- pixeltable/store.py +311 -194
- pixeltable/type_system.py +373 -211
- pixeltable/utils/__init__.py +2 -3
- pixeltable/utils/arrow.py +131 -17
- pixeltable/utils/av.py +298 -0
- pixeltable/utils/azure_store.py +346 -0
- pixeltable/utils/coco.py +6 -6
- pixeltable/utils/code.py +3 -3
- pixeltable/utils/console_output.py +4 -1
- pixeltable/utils/coroutine.py +6 -23
- pixeltable/utils/dbms.py +32 -6
- pixeltable/utils/description_helper.py +4 -5
- pixeltable/utils/documents.py +7 -18
- pixeltable/utils/exception_handler.py +7 -30
- pixeltable/utils/filecache.py +6 -6
- pixeltable/utils/formatter.py +86 -48
- pixeltable/utils/gcs_store.py +295 -0
- pixeltable/utils/http.py +133 -0
- pixeltable/utils/http_server.py +2 -3
- pixeltable/utils/iceberg.py +1 -2
- pixeltable/utils/image.py +17 -0
- pixeltable/utils/lancedb.py +90 -0
- pixeltable/utils/local_store.py +322 -0
- pixeltable/utils/misc.py +5 -0
- pixeltable/utils/object_stores.py +573 -0
- pixeltable/utils/pydantic.py +60 -0
- pixeltable/utils/pytorch.py +5 -6
- pixeltable/utils/s3_store.py +527 -0
- pixeltable/utils/sql.py +26 -0
- pixeltable/utils/system.py +30 -0
- pixeltable-0.5.7.dist-info/METADATA +579 -0
- pixeltable-0.5.7.dist-info/RECORD +227 -0
- {pixeltable-0.3.14.dist-info → pixeltable-0.5.7.dist-info}/WHEEL +1 -1
- pixeltable-0.5.7.dist-info/entry_points.txt +2 -0
- pixeltable/__version__.py +0 -3
- pixeltable/catalog/named_function.py +0 -40
- pixeltable/ext/__init__.py +0 -17
- pixeltable/ext/functions/__init__.py +0 -11
- pixeltable/ext/functions/whisperx.py +0 -77
- pixeltable/utils/media_store.py +0 -77
- pixeltable/utils/s3.py +0 -17
- pixeltable-0.3.14.dist-info/METADATA +0 -434
- pixeltable-0.3.14.dist-info/RECORD +0 -186
- pixeltable-0.3.14.dist-info/entry_points.txt +0 -3
- {pixeltable-0.3.14.dist-info → pixeltable-0.5.7.dist-info/licenses}/LICENSE +0 -0
pixeltable/io/external_store.py
CHANGED
|
@@ -3,15 +3,13 @@ from __future__ import annotations
|
|
|
3
3
|
import abc
|
|
4
4
|
import itertools
|
|
5
5
|
import logging
|
|
6
|
-
import
|
|
7
|
-
from dataclasses import dataclass
|
|
8
|
-
from typing import Any, Optional
|
|
9
|
-
from uuid import UUID
|
|
6
|
+
from typing import Any
|
|
10
7
|
|
|
11
8
|
import pixeltable.exceptions as excs
|
|
12
9
|
import pixeltable.type_system as ts
|
|
13
10
|
from pixeltable import Column, Table
|
|
14
|
-
from pixeltable.catalog import
|
|
11
|
+
from pixeltable.catalog import ColumnHandle, TableVersion
|
|
12
|
+
from pixeltable.catalog.update_status import UpdateStatus
|
|
15
13
|
|
|
16
14
|
_logger = logging.getLogger('pixeltable')
|
|
17
15
|
|
|
@@ -23,6 +21,8 @@ class ExternalStore(abc.ABC):
|
|
|
23
21
|
and stateful external stores.
|
|
24
22
|
"""
|
|
25
23
|
|
|
24
|
+
__name: str
|
|
25
|
+
|
|
26
26
|
def __init__(self, name: str) -> None:
|
|
27
27
|
self.__name = name
|
|
28
28
|
|
|
@@ -32,24 +32,20 @@ class ExternalStore(abc.ABC):
|
|
|
32
32
|
|
|
33
33
|
@abc.abstractmethod
|
|
34
34
|
def link(self, tbl_version: TableVersion) -> None:
|
|
35
|
-
"""
|
|
36
|
-
Called by `TableVersion.link()` to implement store-specific logic.
|
|
37
|
-
"""
|
|
35
|
+
"""Creates store-specific metadata needed to implement sync()."""
|
|
38
36
|
|
|
39
37
|
@abc.abstractmethod
|
|
40
38
|
def unlink(self, tbl_version: TableVersion) -> None:
|
|
41
|
-
"""
|
|
42
|
-
Called by `TableVersion.unlink()` to implement store-specific logic.
|
|
43
|
-
"""
|
|
39
|
+
"""Removes store-specific metadata created in link()."""
|
|
44
40
|
|
|
45
41
|
@abc.abstractmethod
|
|
46
|
-
def get_local_columns(self) -> list[
|
|
42
|
+
def get_local_columns(self) -> list[ColumnHandle]:
|
|
47
43
|
"""
|
|
48
44
|
Gets a list of all local (Pixeltable) columns that are associated with this external store.
|
|
49
45
|
"""
|
|
50
46
|
|
|
51
47
|
@abc.abstractmethod
|
|
52
|
-
def sync(self, t: Table, export_data: bool, import_data: bool) ->
|
|
48
|
+
def sync(self, t: Table, export_data: bool, import_data: bool) -> UpdateStatus:
|
|
53
49
|
"""
|
|
54
50
|
Called by `Table.sync()` to implement store-specific synchronization logic.
|
|
55
51
|
"""
|
|
@@ -68,9 +64,12 @@ class Project(ExternalStore, abc.ABC):
|
|
|
68
64
|
additional capabilities specific to such projects.
|
|
69
65
|
"""
|
|
70
66
|
|
|
71
|
-
|
|
67
|
+
_col_mapping: dict[ColumnHandle, str] # col -> external col name
|
|
68
|
+
stored_proxies: dict[ColumnHandle, ColumnHandle] # original col -> proxy col
|
|
72
69
|
|
|
73
|
-
def __init__(
|
|
70
|
+
def __init__(
|
|
71
|
+
self, name: str, col_mapping: dict[ColumnHandle, str], stored_proxies: dict[ColumnHandle, ColumnHandle] | None
|
|
72
|
+
):
|
|
74
73
|
super().__init__(name)
|
|
75
74
|
self._col_mapping = col_mapping
|
|
76
75
|
|
|
@@ -85,11 +84,11 @@ class Project(ExternalStore, abc.ABC):
|
|
|
85
84
|
# Note from aaron-siegel: This methodology is inefficient in the case where a table has many views with a high
|
|
86
85
|
# proportion of overlapping rows, all proxying the same base column.
|
|
87
86
|
if stored_proxies is None:
|
|
88
|
-
self.stored_proxies: dict[
|
|
87
|
+
self.stored_proxies: dict[ColumnHandle, ColumnHandle] = {}
|
|
89
88
|
else:
|
|
90
89
|
self.stored_proxies = stored_proxies
|
|
91
90
|
|
|
92
|
-
def get_local_columns(self) -> list[
|
|
91
|
+
def get_local_columns(self) -> list[ColumnHandle]:
|
|
93
92
|
return list(self.col_mapping.keys())
|
|
94
93
|
|
|
95
94
|
def link(self, tbl_version: TableVersion) -> None:
|
|
@@ -97,50 +96,42 @@ class Project(ExternalStore, abc.ABC):
|
|
|
97
96
|
# This ensures that the media in those columns resides in the media store.
|
|
98
97
|
# First determine which columns (if any) need stored proxies, but don't have one yet.
|
|
99
98
|
stored_proxies_needed: list[Column] = []
|
|
100
|
-
for
|
|
99
|
+
for col_handle in self.col_mapping:
|
|
100
|
+
col = col_handle.get()
|
|
101
101
|
if col.col_type.is_media_type() and not (col.is_stored and col.is_computed):
|
|
102
102
|
# If this column is already proxied in some other Project, use the existing proxy to avoid
|
|
103
103
|
# duplication. Otherwise, we'll create a new one.
|
|
104
104
|
for store in tbl_version.external_stores.values():
|
|
105
|
-
if isinstance(store, Project) and
|
|
106
|
-
self.stored_proxies[
|
|
105
|
+
if isinstance(store, Project) and col_handle in store.stored_proxies:
|
|
106
|
+
self.stored_proxies[col_handle] = store.stored_proxies[col_handle]
|
|
107
107
|
break
|
|
108
|
-
if
|
|
108
|
+
if col_handle not in self.stored_proxies:
|
|
109
109
|
# We didn't find it in an existing Project
|
|
110
110
|
stored_proxies_needed.append(col)
|
|
111
111
|
|
|
112
112
|
if len(stored_proxies_needed) > 0:
|
|
113
113
|
_logger.info(f'Creating stored proxies for columns: {[col.name for col in stored_proxies_needed]}')
|
|
114
|
-
# Create stored proxies for columns that need one
|
|
115
|
-
|
|
116
|
-
tbl_version.version += 1
|
|
117
|
-
preceding_schema_version = tbl_version.schema_version
|
|
118
|
-
tbl_version.schema_version = tbl_version.version
|
|
119
|
-
proxy_cols = [self.create_stored_proxy(tbl_version, col) for col in stored_proxies_needed]
|
|
114
|
+
# Create stored proxies for columns that need one
|
|
115
|
+
proxy_cols = [self.create_stored_proxy(col) for col in stored_proxies_needed]
|
|
120
116
|
# Add the columns; this will also update table metadata.
|
|
121
|
-
tbl_version.
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
117
|
+
tbl_version.add_columns(proxy_cols, print_stats=False, on_error='ignore')
|
|
118
|
+
self.stored_proxies.update(
|
|
119
|
+
{col.handle: proxy_col.handle for col, proxy_col in zip(stored_proxies_needed, proxy_cols)}
|
|
120
|
+
)
|
|
125
121
|
|
|
126
122
|
def unlink(self, tbl_version: TableVersion) -> None:
|
|
127
123
|
# Determine which stored proxies can be deleted. (A stored proxy can be deleted if it is not referenced by
|
|
128
124
|
# any *other* external store for this table.)
|
|
129
|
-
deletions_needed: set[
|
|
125
|
+
deletions_needed: set[ColumnHandle] = set(self.stored_proxies.values())
|
|
130
126
|
for name, store in tbl_version.external_stores.items():
|
|
131
127
|
if isinstance(store, Project) and name != self.name:
|
|
132
128
|
deletions_needed = deletions_needed.difference(set(store.stored_proxies.values()))
|
|
133
129
|
if len(deletions_needed) > 0:
|
|
134
|
-
_logger.info(f'Removing stored proxies for columns: {[col.name for col in deletions_needed]}')
|
|
135
|
-
|
|
136
|
-
tbl_version.version += 1
|
|
137
|
-
preceding_schema_version = tbl_version.schema_version
|
|
138
|
-
tbl_version.schema_version = tbl_version.version
|
|
139
|
-
tbl_version._drop_columns(deletions_needed)
|
|
130
|
+
_logger.info(f'Removing stored proxies for columns: {[col.get().name for col in deletions_needed]}')
|
|
131
|
+
tbl_version._drop_columns(col.get() for col in deletions_needed)
|
|
140
132
|
self.stored_proxies.clear()
|
|
141
|
-
tbl_version._update_md(time.time(), preceding_schema_version=preceding_schema_version)
|
|
142
133
|
|
|
143
|
-
def create_stored_proxy(self,
|
|
134
|
+
def create_stored_proxy(self, col: Column) -> Column:
|
|
144
135
|
"""
|
|
145
136
|
Creates a proxy column for the specified column. The proxy column will be created in the specified
|
|
146
137
|
`TableVersion`.
|
|
@@ -158,17 +149,11 @@ class Project(ExternalStore, abc.ABC):
|
|
|
158
149
|
# Once `destination` is implemented, it can be replaced with a simple `ColumnRef`.
|
|
159
150
|
computed_with=exprs.ColumnRef(col).apply(lambda x: x, col_type=col.col_type),
|
|
160
151
|
stored=True,
|
|
161
|
-
col_id=tbl_version.next_col_id,
|
|
162
|
-
sa_col_type=col.col_type.to_sa_type(),
|
|
163
|
-
schema_version_add=tbl_version.schema_version,
|
|
164
152
|
)
|
|
165
|
-
proxy_col.tbl = TableVersionHandle(tbl_version.id, tbl_version.effective_version, tbl_version=tbl_version)
|
|
166
|
-
tbl_version.next_col_id += 1
|
|
167
|
-
self.stored_proxies[col] = proxy_col
|
|
168
153
|
return proxy_col
|
|
169
154
|
|
|
170
155
|
@property
|
|
171
|
-
def col_mapping(self) -> dict[
|
|
156
|
+
def col_mapping(self) -> dict[ColumnHandle, str]:
|
|
172
157
|
return self._col_mapping
|
|
173
158
|
|
|
174
159
|
@abc.abstractmethod
|
|
@@ -202,8 +187,8 @@ class Project(ExternalStore, abc.ABC):
|
|
|
202
187
|
table: Table,
|
|
203
188
|
export_cols: dict[str, ts.ColumnType],
|
|
204
189
|
import_cols: dict[str, ts.ColumnType],
|
|
205
|
-
col_mapping:
|
|
206
|
-
) -> dict[
|
|
190
|
+
col_mapping: dict[str, str] | None,
|
|
191
|
+
) -> dict[ColumnHandle, str]:
|
|
207
192
|
"""
|
|
208
193
|
Verifies that the specified `col_mapping` is valid. In particular, checks that:
|
|
209
194
|
(i) the keys of `col_mapping` are valid columns of the specified `Table`;
|
|
@@ -213,6 +198,7 @@ class Project(ExternalStore, abc.ABC):
|
|
|
213
198
|
external (import or export) columns.
|
|
214
199
|
If validation fails, an exception will be raised. If validation succeeds, a new mapping will be returned
|
|
215
200
|
in which the Pixeltable column names are resolved to the corresponding `Column` objects.
|
|
201
|
+
TODO: return columns as names or qualified ids
|
|
216
202
|
"""
|
|
217
203
|
from pixeltable import exprs
|
|
218
204
|
|
|
@@ -220,33 +206,34 @@ class Project(ExternalStore, abc.ABC):
|
|
|
220
206
|
if col_mapping is None:
|
|
221
207
|
col_mapping = {col: col for col in itertools.chain(export_cols.keys(), import_cols.keys())}
|
|
222
208
|
|
|
223
|
-
resolved_col_mapping: dict[
|
|
209
|
+
resolved_col_mapping: dict[ColumnHandle, str] = {}
|
|
224
210
|
|
|
225
211
|
# Validate names
|
|
226
|
-
t_cols = set(table.
|
|
212
|
+
t_cols = set(table._get_schema().keys())
|
|
227
213
|
for t_col, ext_col in col_mapping.items():
|
|
228
214
|
if t_col not in t_cols:
|
|
229
215
|
if is_user_specified_col_mapping:
|
|
230
216
|
raise excs.Error(
|
|
231
|
-
f'Column name
|
|
217
|
+
f'Column name {t_col!r} appears as a key in `col_mapping`, but {table._display_str()} '
|
|
232
218
|
'contains no such column.'
|
|
233
219
|
)
|
|
234
220
|
else:
|
|
235
221
|
raise excs.Error(
|
|
236
|
-
f'Column
|
|
222
|
+
f'Column {t_col!r} does not exist in {table._display_str()}. Either add a column {t_col!r}, '
|
|
237
223
|
f'or specify a `col_mapping` to associate a different column with '
|
|
238
|
-
f'the external field
|
|
224
|
+
f'the external field {ext_col!r}.'
|
|
239
225
|
)
|
|
240
226
|
if ext_col not in export_cols and ext_col not in import_cols:
|
|
241
227
|
raise excs.Error(
|
|
242
|
-
f'Column name
|
|
243
|
-
f'configuration has no column
|
|
228
|
+
f'Column name {ext_col!r} appears as a value in `col_mapping`, but the external store '
|
|
229
|
+
f'configuration has no column {ext_col!r}.'
|
|
244
230
|
)
|
|
245
231
|
col_ref = table[t_col]
|
|
246
232
|
assert isinstance(col_ref, exprs.ColumnRef)
|
|
247
|
-
resolved_col_mapping[col_ref.col] = ext_col
|
|
233
|
+
resolved_col_mapping[col_ref.col.handle] = ext_col
|
|
234
|
+
|
|
248
235
|
# Validate column specs
|
|
249
|
-
t_col_types = table.
|
|
236
|
+
t_col_types = table._get_schema()
|
|
250
237
|
for t_col, ext_col in col_mapping.items():
|
|
251
238
|
t_col_type = t_col_types[t_col]
|
|
252
239
|
if ext_col in export_cols:
|
|
@@ -254,57 +241,23 @@ class Project(ExternalStore, abc.ABC):
|
|
|
254
241
|
ext_col_type = export_cols[ext_col]
|
|
255
242
|
if not ext_col_type.is_supertype_of(t_col_type, ignore_nullable=True):
|
|
256
243
|
raise excs.Error(
|
|
257
|
-
f'Column
|
|
244
|
+
f'Column {t_col!r} cannot be exported to external column {ext_col!r} '
|
|
258
245
|
f'(incompatible types; expecting `{ext_col_type}`)'
|
|
259
246
|
)
|
|
260
247
|
if ext_col in import_cols:
|
|
261
248
|
# Validate that the external column can be assigned to the table column
|
|
262
249
|
if table._tbl_version_path.get_column(t_col).is_computed:
|
|
263
250
|
raise excs.Error(
|
|
264
|
-
f'Column
|
|
251
|
+
f'Column {t_col!r} is a computed column, which cannot be populated from an external column'
|
|
265
252
|
)
|
|
266
253
|
ext_col_type = import_cols[ext_col]
|
|
267
254
|
if not t_col_type.is_supertype_of(ext_col_type, ignore_nullable=True):
|
|
268
255
|
raise excs.Error(
|
|
269
|
-
f'Column
|
|
256
|
+
f'Column {t_col!r} cannot be imported from external column {ext_col!r} '
|
|
270
257
|
f'(incompatible types; expecting `{ext_col_type}`)'
|
|
271
258
|
)
|
|
272
259
|
return resolved_col_mapping
|
|
273
260
|
|
|
274
|
-
@classmethod
|
|
275
|
-
def _column_as_dict(cls, col: Column) -> dict[str, Any]:
|
|
276
|
-
return {'tbl_id': str(col.tbl.id), 'col_id': col.id}
|
|
277
|
-
|
|
278
|
-
@classmethod
|
|
279
|
-
def _column_from_dict(cls, d: dict[str, Any]) -> Column:
|
|
280
|
-
from pixeltable.catalog import Catalog
|
|
281
|
-
|
|
282
|
-
tbl_id = UUID(d['tbl_id'])
|
|
283
|
-
col_id = d['col_id']
|
|
284
|
-
return Catalog.get().get_tbl_version(tbl_id, None).cols_by_id[col_id]
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
@dataclass(frozen=True)
|
|
288
|
-
class SyncStatus:
|
|
289
|
-
external_rows_created: int = 0
|
|
290
|
-
external_rows_deleted: int = 0
|
|
291
|
-
external_rows_updated: int = 0
|
|
292
|
-
pxt_rows_updated: int = 0
|
|
293
|
-
num_excs: int = 0
|
|
294
|
-
|
|
295
|
-
def combine(self, other: 'SyncStatus') -> 'SyncStatus':
|
|
296
|
-
return SyncStatus(
|
|
297
|
-
external_rows_created=self.external_rows_created + other.external_rows_created,
|
|
298
|
-
external_rows_deleted=self.external_rows_deleted + other.external_rows_deleted,
|
|
299
|
-
external_rows_updated=self.external_rows_updated + other.external_rows_updated,
|
|
300
|
-
pxt_rows_updated=self.pxt_rows_updated + other.pxt_rows_updated,
|
|
301
|
-
num_excs=self.num_excs + other.num_excs,
|
|
302
|
-
)
|
|
303
|
-
|
|
304
|
-
@classmethod
|
|
305
|
-
def empty(cls) -> 'SyncStatus':
|
|
306
|
-
return SyncStatus(0, 0, 0, 0, 0)
|
|
307
|
-
|
|
308
261
|
|
|
309
262
|
class MockProject(Project):
|
|
310
263
|
"""A project that cannot be synced, used mainly for testing."""
|
|
@@ -314,8 +267,8 @@ class MockProject(Project):
|
|
|
314
267
|
name: str,
|
|
315
268
|
export_cols: dict[str, ts.ColumnType],
|
|
316
269
|
import_cols: dict[str, ts.ColumnType],
|
|
317
|
-
col_mapping: dict[
|
|
318
|
-
stored_proxies:
|
|
270
|
+
col_mapping: dict[ColumnHandle, str],
|
|
271
|
+
stored_proxies: dict[ColumnHandle, ColumnHandle] | None = None,
|
|
319
272
|
):
|
|
320
273
|
super().__init__(name, col_mapping, stored_proxies)
|
|
321
274
|
self.export_cols = export_cols
|
|
@@ -329,7 +282,7 @@ class MockProject(Project):
|
|
|
329
282
|
name: str,
|
|
330
283
|
export_cols: dict[str, ts.ColumnType],
|
|
331
284
|
import_cols: dict[str, ts.ColumnType],
|
|
332
|
-
col_mapping:
|
|
285
|
+
col_mapping: dict[str, str] | None = None,
|
|
333
286
|
) -> 'MockProject':
|
|
334
287
|
col_mapping = cls.validate_columns(t, export_cols, import_cols, col_mapping)
|
|
335
288
|
return cls(name, export_cols, import_cols, col_mapping)
|
|
@@ -340,7 +293,7 @@ class MockProject(Project):
|
|
|
340
293
|
def get_import_columns(self) -> dict[str, ts.ColumnType]:
|
|
341
294
|
return self.import_cols
|
|
342
295
|
|
|
343
|
-
def sync(self, t: Table, export_data: bool, import_data: bool) ->
|
|
296
|
+
def sync(self, t: Table, export_data: bool, import_data: bool) -> UpdateStatus:
|
|
344
297
|
raise NotImplementedError()
|
|
345
298
|
|
|
346
299
|
def delete(self) -> None:
|
|
@@ -355,10 +308,8 @@ class MockProject(Project):
|
|
|
355
308
|
'name': self.name,
|
|
356
309
|
'export_cols': {k: v.as_dict() for k, v in self.export_cols.items()},
|
|
357
310
|
'import_cols': {k: v.as_dict() for k, v in self.import_cols.items()},
|
|
358
|
-
'col_mapping': [[
|
|
359
|
-
'stored_proxies': [
|
|
360
|
-
[self._column_as_dict(k), self._column_as_dict(v)] for k, v in self.stored_proxies.items()
|
|
361
|
-
],
|
|
311
|
+
'col_mapping': [[k.as_dict(), v] for k, v in self.col_mapping.items()],
|
|
312
|
+
'stored_proxies': [[k.as_dict(), v.as_dict()] for k, v in self.stored_proxies.items()],
|
|
362
313
|
}
|
|
363
314
|
|
|
364
315
|
@classmethod
|
|
@@ -367,8 +318,8 @@ class MockProject(Project):
|
|
|
367
318
|
md['name'],
|
|
368
319
|
{k: ts.ColumnType.from_dict(v) for k, v in md['export_cols'].items()},
|
|
369
320
|
{k: ts.ColumnType.from_dict(v) for k, v in md['import_cols'].items()},
|
|
370
|
-
{
|
|
371
|
-
{
|
|
321
|
+
{ColumnHandle.from_dict(entry[0]): entry[1] for entry in md['col_mapping']},
|
|
322
|
+
{ColumnHandle.from_dict(entry[0]): ColumnHandle.from_dict(entry[1]) for entry in md['stored_proxies']},
|
|
372
323
|
)
|
|
373
324
|
|
|
374
325
|
def __eq__(self, other: object) -> bool:
|
pixeltable/io/fiftyone.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import os
|
|
2
|
-
from typing import Any, Iterator
|
|
2
|
+
from typing import Any, Iterator
|
|
3
3
|
|
|
4
4
|
import fiftyone as fo # type: ignore[import-untyped]
|
|
5
5
|
import fiftyone.utils.data as foud # type: ignore[import-untyped]
|
|
@@ -9,7 +9,7 @@ import puremagic
|
|
|
9
9
|
import pixeltable as pxt
|
|
10
10
|
import pixeltable.exceptions as excs
|
|
11
11
|
from pixeltable import exprs
|
|
12
|
-
from pixeltable.
|
|
12
|
+
from pixeltable.utils.local_store import TempStore
|
|
13
13
|
|
|
14
14
|
|
|
15
15
|
class PxtImageDatasetImporter(foud.LabeledImageDatasetImporter):
|
|
@@ -20,7 +20,7 @@ class PxtImageDatasetImporter(foud.LabeledImageDatasetImporter):
|
|
|
20
20
|
__image_format: str # format to use for any exported images that are not already stored on disk
|
|
21
21
|
__labels: dict[str, tuple[exprs.Expr, type[fo.Label]]] # label_name -> (expr, label_cls)
|
|
22
22
|
__image_idx: int # index of the image expr in the select list
|
|
23
|
-
__localpath_idx:
|
|
23
|
+
__localpath_idx: int | None # index of the image localpath in the select list, if present
|
|
24
24
|
__row_iter: Iterator[list] # iterator over the table rows, to be convered to FiftyOne samples
|
|
25
25
|
|
|
26
26
|
def __init__(
|
|
@@ -28,12 +28,12 @@ class PxtImageDatasetImporter(foud.LabeledImageDatasetImporter):
|
|
|
28
28
|
tbl: pxt.Table,
|
|
29
29
|
image: exprs.Expr,
|
|
30
30
|
image_format: str,
|
|
31
|
-
classifications:
|
|
32
|
-
detections:
|
|
33
|
-
dataset_dir:
|
|
31
|
+
classifications: exprs.Expr | list[exprs.Expr] | dict[str, exprs.Expr] | None = None,
|
|
32
|
+
detections: exprs.Expr | list[exprs.Expr] | dict[str, exprs.Expr] | None = None,
|
|
33
|
+
dataset_dir: os.PathLike | None = None,
|
|
34
34
|
shuffle: bool = False,
|
|
35
|
-
seed:
|
|
36
|
-
max_samples:
|
|
35
|
+
seed: int | float | str | bytes | bytearray | None = None,
|
|
36
|
+
max_samples: int | None = None,
|
|
37
37
|
):
|
|
38
38
|
super().__init__(dataset_dir=dataset_dir, shuffle=shuffle, seed=seed, max_samples=max_samples)
|
|
39
39
|
|
|
@@ -87,10 +87,10 @@ class PxtImageDatasetImporter(foud.LabeledImageDatasetImporter):
|
|
|
87
87
|
else:
|
|
88
88
|
self.__localpath_idx = None
|
|
89
89
|
|
|
90
|
-
|
|
91
|
-
self.__row_iter =
|
|
90
|
+
query = tbl.select(*selection)
|
|
91
|
+
self.__row_iter = query._output_row_iterator()
|
|
92
92
|
|
|
93
|
-
def __next__(self) -> tuple[str,
|
|
93
|
+
def __next__(self) -> tuple[str, fo.ImageMetadata | None, dict[str, fo.Label] | None]:
|
|
94
94
|
row = next(self.__row_iter)
|
|
95
95
|
img = row[self.__image_idx]
|
|
96
96
|
assert isinstance(img, PIL.Image.Image)
|
|
@@ -100,7 +100,7 @@ class PxtImageDatasetImporter(foud.LabeledImageDatasetImporter):
|
|
|
100
100
|
assert isinstance(file, str)
|
|
101
101
|
else:
|
|
102
102
|
# Write the dynamically created image to a temp file
|
|
103
|
-
file =
|
|
103
|
+
file = TempStore.create_path(extension=f'.{self.__image_format}')
|
|
104
104
|
img.save(file, format=self.__image_format)
|
|
105
105
|
|
|
106
106
|
metadata = fo.ImageMetadata(
|
|
@@ -108,7 +108,7 @@ class PxtImageDatasetImporter(foud.LabeledImageDatasetImporter):
|
|
|
108
108
|
mime_type=puremagic.from_file(file, mime=True),
|
|
109
109
|
width=img.width,
|
|
110
110
|
height=img.height,
|
|
111
|
-
filepath=file,
|
|
111
|
+
filepath=str(file),
|
|
112
112
|
num_channels=len(img.getbands()),
|
|
113
113
|
)
|
|
114
114
|
|
pixeltable/io/globals.py
CHANGED
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
from typing import TYPE_CHECKING, Any, Literal
|
|
3
|
+
from typing import TYPE_CHECKING, Any, Literal
|
|
4
4
|
|
|
5
5
|
import pixeltable as pxt
|
|
6
6
|
import pixeltable.exceptions as excs
|
|
7
7
|
from pixeltable import Table, exprs
|
|
8
|
+
from pixeltable.catalog.update_status import UpdateStatus
|
|
8
9
|
from pixeltable.env import Env
|
|
9
|
-
from pixeltable.io.external_store import SyncStatus
|
|
10
10
|
|
|
11
11
|
if TYPE_CHECKING:
|
|
12
12
|
import fiftyone as fo # type: ignore[import-untyped]
|
|
@@ -15,19 +15,19 @@ if TYPE_CHECKING:
|
|
|
15
15
|
def create_label_studio_project(
|
|
16
16
|
t: Table,
|
|
17
17
|
label_config: str,
|
|
18
|
-
name:
|
|
19
|
-
title:
|
|
18
|
+
name: str | None = None,
|
|
19
|
+
title: str | None = None,
|
|
20
20
|
media_import_method: Literal['post', 'file', 'url'] = 'post',
|
|
21
|
-
col_mapping:
|
|
21
|
+
col_mapping: dict[str, str] | None = None,
|
|
22
22
|
sync_immediately: bool = True,
|
|
23
|
-
s3_configuration:
|
|
23
|
+
s3_configuration: dict[str, Any] | None = None,
|
|
24
24
|
**kwargs: Any,
|
|
25
|
-
) ->
|
|
25
|
+
) -> UpdateStatus:
|
|
26
26
|
"""
|
|
27
27
|
Create a new Label Studio project and link it to the specified [`Table`][pixeltable.Table].
|
|
28
28
|
|
|
29
29
|
- A tutorial notebook with fully worked examples can be found here:
|
|
30
|
-
[Using Label Studio for Annotations with Pixeltable](https://pixeltable.
|
|
30
|
+
[Using Label Studio for Annotations with Pixeltable](https://docs.pixeltable.com/notebooks/integrations/using-label-studio-with-pixeltable)
|
|
31
31
|
|
|
32
32
|
The required parameter `label_config` specifies the Label Studio project configuration,
|
|
33
33
|
in XML format, as described in the Label Studio documentation. The linked project will
|
|
@@ -96,32 +96,33 @@ def create_label_studio_project(
|
|
|
96
96
|
[Label Studio start_project docs](https://labelstud.io/sdk/project.html#label_studio_sdk.project.Project.start_project).
|
|
97
97
|
|
|
98
98
|
Returns:
|
|
99
|
-
|
|
99
|
+
An `UpdateStatus` representing the status of any synchronization operations that occurred.
|
|
100
100
|
|
|
101
101
|
Examples:
|
|
102
102
|
Create a Label Studio project whose tasks correspond to videos stored in the `video_col`
|
|
103
103
|
column of the table `tbl`:
|
|
104
104
|
|
|
105
105
|
>>> config = \"\"\"
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
106
|
+
... <View>
|
|
107
|
+
... <Video name="video_obj" value="$video_col"/>
|
|
108
|
+
... <Choices name="video-category" toName="video" showInLine="true">
|
|
109
|
+
... <Choice value="city"/>
|
|
110
|
+
... <Choice value="food"/>
|
|
111
|
+
... <Choice value="sports"/>
|
|
112
|
+
... </Choices>
|
|
113
|
+
... </View>
|
|
114
|
+
... \"\"\"
|
|
115
|
+
>>> create_label_studio_project(tbl, config)
|
|
115
116
|
|
|
116
117
|
Create a Label Studio project with the same configuration, using `media_import_method='url'`,
|
|
117
118
|
whose media are stored in an S3 bucket:
|
|
118
119
|
|
|
119
120
|
>>> create_label_studio_project(
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
121
|
+
... tbl,
|
|
122
|
+
... config,
|
|
123
|
+
... media_import_method='url',
|
|
124
|
+
... s3_configuration={'bucket': 'my-bucket', 'region_name': 'us-east-2'}
|
|
125
|
+
... )
|
|
125
126
|
"""
|
|
126
127
|
Env.get().require_package('label_studio_sdk')
|
|
127
128
|
|
|
@@ -136,27 +137,27 @@ def create_label_studio_project(
|
|
|
136
137
|
if sync_immediately:
|
|
137
138
|
return t.sync()
|
|
138
139
|
else:
|
|
139
|
-
return
|
|
140
|
+
return UpdateStatus()
|
|
140
141
|
|
|
141
142
|
|
|
142
143
|
def export_images_as_fo_dataset(
|
|
143
144
|
tbl: pxt.Table,
|
|
144
145
|
images: exprs.Expr,
|
|
145
146
|
image_format: str = 'webp',
|
|
146
|
-
classifications:
|
|
147
|
-
detections:
|
|
147
|
+
classifications: exprs.Expr | list[exprs.Expr] | dict[str, exprs.Expr] | None = None,
|
|
148
|
+
detections: exprs.Expr | list[exprs.Expr] | dict[str, exprs.Expr] | None = None,
|
|
148
149
|
) -> 'fo.Dataset':
|
|
149
150
|
"""
|
|
150
151
|
Export images from a Pixeltable table as a Voxel51 dataset. The data must consist of a single column
|
|
151
152
|
(or expression) containing image data, along with optional additional columns containing labels. Currently, only
|
|
152
153
|
classification and detection labels are supported.
|
|
153
154
|
|
|
154
|
-
The [Working with Voxel51 in Pixeltable](https://docs.pixeltable.com/
|
|
155
|
+
The [Working with Voxel51 in Pixeltable](https://docs.pixeltable.com/examples/vision/voxel51) tutorial contains a
|
|
155
156
|
fully worked example showing how to export data from a Pixeltable table and load it into Voxel51.
|
|
156
157
|
|
|
157
158
|
Images in the dataset that already exist on disk will be exported directly, in whatever format they
|
|
158
159
|
are stored in. Images that are not already on disk (such as frames extracted using a
|
|
159
|
-
[`
|
|
160
|
+
[`frame_iterator`][pixeltable.functions.video.frame_iterator]) will first be written to disk in the specified
|
|
160
161
|
`image_format`.
|
|
161
162
|
|
|
162
163
|
The label parameters accept one or more sets of labels of each type. If a single `Expr` is provided, then it will
|
|
@@ -204,13 +205,13 @@ def export_images_as_fo_dataset(
|
|
|
204
205
|
Export the images in the `image` column of the table `tbl` as a Voxel51 dataset, using classification
|
|
205
206
|
labels from `tbl.classifications`:
|
|
206
207
|
|
|
207
|
-
>>>
|
|
208
|
+
>>> export_images_as_fo_dataset(
|
|
208
209
|
... tbl,
|
|
209
210
|
... tbl.image,
|
|
210
211
|
... classifications=tbl.classifications
|
|
211
212
|
... )
|
|
212
213
|
|
|
213
|
-
See the [Working with Voxel51 in Pixeltable](https://docs.pixeltable.com/
|
|
214
|
+
See the [Working with Voxel51 in Pixeltable](https://docs.pixeltable.com/examples/vision/voxel51) tutorial
|
|
214
215
|
for a fully worked example.
|
|
215
216
|
"""
|
|
216
217
|
Env.get().require_package('fiftyone')
|