pixeltable 0.4.0rc3__py3-none-any.whl → 0.4.20__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +23 -5
- pixeltable/_version.py +1 -0
- pixeltable/catalog/__init__.py +5 -3
- pixeltable/catalog/catalog.py +1318 -404
- pixeltable/catalog/column.py +186 -115
- pixeltable/catalog/dir.py +1 -2
- pixeltable/catalog/globals.py +11 -43
- pixeltable/catalog/insertable_table.py +167 -79
- pixeltable/catalog/path.py +61 -23
- pixeltable/catalog/schema_object.py +9 -10
- pixeltable/catalog/table.py +626 -308
- pixeltable/catalog/table_metadata.py +101 -0
- pixeltable/catalog/table_version.py +713 -569
- pixeltable/catalog/table_version_handle.py +37 -6
- pixeltable/catalog/table_version_path.py +42 -29
- pixeltable/catalog/tbl_ops.py +50 -0
- pixeltable/catalog/update_status.py +191 -0
- pixeltable/catalog/view.py +108 -94
- pixeltable/config.py +128 -22
- pixeltable/dataframe.py +188 -100
- pixeltable/env.py +407 -136
- pixeltable/exceptions.py +6 -0
- pixeltable/exec/__init__.py +3 -0
- pixeltable/exec/aggregation_node.py +7 -8
- pixeltable/exec/cache_prefetch_node.py +83 -110
- pixeltable/exec/cell_materialization_node.py +231 -0
- pixeltable/exec/cell_reconstruction_node.py +135 -0
- pixeltable/exec/component_iteration_node.py +4 -3
- pixeltable/exec/data_row_batch.py +8 -65
- pixeltable/exec/exec_context.py +16 -4
- pixeltable/exec/exec_node.py +13 -36
- pixeltable/exec/expr_eval/evaluators.py +7 -6
- pixeltable/exec/expr_eval/expr_eval_node.py +27 -12
- pixeltable/exec/expr_eval/globals.py +8 -5
- pixeltable/exec/expr_eval/row_buffer.py +1 -2
- pixeltable/exec/expr_eval/schedulers.py +190 -30
- pixeltable/exec/globals.py +32 -0
- pixeltable/exec/in_memory_data_node.py +18 -18
- pixeltable/exec/object_store_save_node.py +293 -0
- pixeltable/exec/row_update_node.py +16 -9
- pixeltable/exec/sql_node.py +206 -101
- pixeltable/exprs/__init__.py +1 -1
- pixeltable/exprs/arithmetic_expr.py +27 -22
- pixeltable/exprs/array_slice.py +3 -3
- pixeltable/exprs/column_property_ref.py +34 -30
- pixeltable/exprs/column_ref.py +92 -96
- pixeltable/exprs/comparison.py +5 -5
- pixeltable/exprs/compound_predicate.py +5 -4
- pixeltable/exprs/data_row.py +152 -55
- pixeltable/exprs/expr.py +62 -43
- pixeltable/exprs/expr_dict.py +3 -3
- pixeltable/exprs/expr_set.py +17 -10
- pixeltable/exprs/function_call.py +75 -37
- pixeltable/exprs/globals.py +1 -2
- pixeltable/exprs/in_predicate.py +4 -4
- pixeltable/exprs/inline_expr.py +10 -27
- pixeltable/exprs/is_null.py +1 -3
- pixeltable/exprs/json_mapper.py +8 -8
- pixeltable/exprs/json_path.py +56 -22
- pixeltable/exprs/literal.py +5 -5
- pixeltable/exprs/method_ref.py +2 -2
- pixeltable/exprs/object_ref.py +2 -2
- pixeltable/exprs/row_builder.py +127 -53
- pixeltable/exprs/rowid_ref.py +8 -12
- pixeltable/exprs/similarity_expr.py +50 -25
- pixeltable/exprs/sql_element_cache.py +4 -4
- pixeltable/exprs/string_op.py +5 -5
- pixeltable/exprs/type_cast.py +3 -5
- pixeltable/func/__init__.py +1 -0
- pixeltable/func/aggregate_function.py +8 -8
- pixeltable/func/callable_function.py +9 -9
- pixeltable/func/expr_template_function.py +10 -10
- pixeltable/func/function.py +18 -20
- pixeltable/func/function_registry.py +6 -7
- pixeltable/func/globals.py +2 -3
- pixeltable/func/mcp.py +74 -0
- pixeltable/func/query_template_function.py +20 -18
- pixeltable/func/signature.py +43 -16
- pixeltable/func/tools.py +23 -13
- pixeltable/func/udf.py +18 -20
- pixeltable/functions/__init__.py +6 -0
- pixeltable/functions/anthropic.py +93 -33
- pixeltable/functions/audio.py +114 -10
- pixeltable/functions/bedrock.py +13 -6
- pixeltable/functions/date.py +1 -1
- pixeltable/functions/deepseek.py +20 -9
- pixeltable/functions/fireworks.py +2 -2
- pixeltable/functions/gemini.py +28 -11
- pixeltable/functions/globals.py +13 -13
- pixeltable/functions/groq.py +108 -0
- pixeltable/functions/huggingface.py +1046 -23
- pixeltable/functions/image.py +9 -18
- pixeltable/functions/llama_cpp.py +23 -8
- pixeltable/functions/math.py +3 -4
- pixeltable/functions/mistralai.py +4 -15
- pixeltable/functions/ollama.py +16 -9
- pixeltable/functions/openai.py +104 -82
- pixeltable/functions/openrouter.py +143 -0
- pixeltable/functions/replicate.py +2 -2
- pixeltable/functions/reve.py +250 -0
- pixeltable/functions/string.py +21 -28
- pixeltable/functions/timestamp.py +13 -14
- pixeltable/functions/together.py +4 -6
- pixeltable/functions/twelvelabs.py +92 -0
- pixeltable/functions/util.py +6 -1
- pixeltable/functions/video.py +1388 -106
- pixeltable/functions/vision.py +7 -7
- pixeltable/functions/whisper.py +15 -7
- pixeltable/functions/whisperx.py +179 -0
- pixeltable/{ext/functions → functions}/yolox.py +2 -4
- pixeltable/globals.py +332 -105
- pixeltable/index/base.py +13 -22
- pixeltable/index/btree.py +23 -22
- pixeltable/index/embedding_index.py +32 -44
- pixeltable/io/__init__.py +4 -2
- pixeltable/io/datarows.py +7 -6
- pixeltable/io/external_store.py +49 -77
- pixeltable/io/fiftyone.py +11 -11
- pixeltable/io/globals.py +29 -28
- pixeltable/io/hf_datasets.py +17 -9
- pixeltable/io/label_studio.py +70 -66
- pixeltable/io/lancedb.py +3 -0
- pixeltable/io/pandas.py +12 -11
- pixeltable/io/parquet.py +13 -93
- pixeltable/io/table_data_conduit.py +71 -47
- pixeltable/io/utils.py +3 -3
- pixeltable/iterators/__init__.py +2 -1
- pixeltable/iterators/audio.py +21 -11
- pixeltable/iterators/document.py +116 -55
- pixeltable/iterators/image.py +5 -2
- pixeltable/iterators/video.py +293 -13
- pixeltable/metadata/__init__.py +4 -2
- pixeltable/metadata/converters/convert_18.py +2 -2
- pixeltable/metadata/converters/convert_19.py +2 -2
- pixeltable/metadata/converters/convert_20.py +2 -2
- pixeltable/metadata/converters/convert_21.py +2 -2
- pixeltable/metadata/converters/convert_22.py +2 -2
- pixeltable/metadata/converters/convert_24.py +2 -2
- pixeltable/metadata/converters/convert_25.py +2 -2
- pixeltable/metadata/converters/convert_26.py +2 -2
- pixeltable/metadata/converters/convert_29.py +4 -4
- pixeltable/metadata/converters/convert_34.py +2 -2
- pixeltable/metadata/converters/convert_36.py +2 -2
- pixeltable/metadata/converters/convert_37.py +15 -0
- pixeltable/metadata/converters/convert_38.py +39 -0
- pixeltable/metadata/converters/convert_39.py +124 -0
- pixeltable/metadata/converters/convert_40.py +73 -0
- pixeltable/metadata/converters/util.py +13 -12
- pixeltable/metadata/notes.py +4 -0
- pixeltable/metadata/schema.py +79 -42
- pixeltable/metadata/utils.py +74 -0
- pixeltable/mypy/__init__.py +3 -0
- pixeltable/mypy/mypy_plugin.py +123 -0
- pixeltable/plan.py +274 -223
- pixeltable/share/__init__.py +1 -1
- pixeltable/share/packager.py +259 -129
- pixeltable/share/protocol/__init__.py +34 -0
- pixeltable/share/protocol/common.py +170 -0
- pixeltable/share/protocol/operation_types.py +33 -0
- pixeltable/share/protocol/replica.py +109 -0
- pixeltable/share/publish.py +213 -57
- pixeltable/store.py +238 -175
- pixeltable/type_system.py +104 -63
- pixeltable/utils/__init__.py +2 -3
- pixeltable/utils/arrow.py +108 -13
- pixeltable/utils/av.py +298 -0
- pixeltable/utils/azure_store.py +305 -0
- pixeltable/utils/code.py +3 -3
- pixeltable/utils/console_output.py +4 -1
- pixeltable/utils/coroutine.py +6 -23
- pixeltable/utils/dbms.py +31 -5
- pixeltable/utils/description_helper.py +4 -5
- pixeltable/utils/documents.py +5 -6
- pixeltable/utils/exception_handler.py +7 -30
- pixeltable/utils/filecache.py +6 -6
- pixeltable/utils/formatter.py +4 -6
- pixeltable/utils/gcs_store.py +283 -0
- pixeltable/utils/http_server.py +2 -3
- pixeltable/utils/iceberg.py +1 -2
- pixeltable/utils/image.py +17 -0
- pixeltable/utils/lancedb.py +88 -0
- pixeltable/utils/local_store.py +316 -0
- pixeltable/utils/misc.py +5 -0
- pixeltable/utils/object_stores.py +528 -0
- pixeltable/utils/pydantic.py +60 -0
- pixeltable/utils/pytorch.py +5 -6
- pixeltable/utils/s3_store.py +392 -0
- pixeltable-0.4.20.dist-info/METADATA +587 -0
- pixeltable-0.4.20.dist-info/RECORD +218 -0
- {pixeltable-0.4.0rc3.dist-info → pixeltable-0.4.20.dist-info}/WHEEL +1 -1
- pixeltable-0.4.20.dist-info/entry_points.txt +2 -0
- pixeltable/__version__.py +0 -3
- pixeltable/ext/__init__.py +0 -17
- pixeltable/ext/functions/__init__.py +0 -11
- pixeltable/ext/functions/whisperx.py +0 -77
- pixeltable/utils/media_store.py +0 -77
- pixeltable/utils/s3.py +0 -17
- pixeltable/utils/sample.py +0 -25
- pixeltable-0.4.0rc3.dist-info/METADATA +0 -435
- pixeltable-0.4.0rc3.dist-info/RECORD +0 -189
- pixeltable-0.4.0rc3.dist-info/entry_points.txt +0 -3
- {pixeltable-0.4.0rc3.dist-info → pixeltable-0.4.20.dist-info/licenses}/LICENSE +0 -0
pixeltable/io/external_store.py
CHANGED
|
@@ -3,14 +3,13 @@ from __future__ import annotations
|
|
|
3
3
|
import abc
|
|
4
4
|
import itertools
|
|
5
5
|
import logging
|
|
6
|
-
from
|
|
7
|
-
from typing import Any, Optional
|
|
8
|
-
from uuid import UUID
|
|
6
|
+
from typing import Any
|
|
9
7
|
|
|
10
8
|
import pixeltable.exceptions as excs
|
|
11
9
|
import pixeltable.type_system as ts
|
|
12
10
|
from pixeltable import Column, Table
|
|
13
|
-
from pixeltable.catalog import TableVersion
|
|
11
|
+
from pixeltable.catalog import ColumnHandle, TableVersion
|
|
12
|
+
from pixeltable.catalog.update_status import UpdateStatus
|
|
14
13
|
|
|
15
14
|
_logger = logging.getLogger('pixeltable')
|
|
16
15
|
|
|
@@ -22,6 +21,8 @@ class ExternalStore(abc.ABC):
|
|
|
22
21
|
and stateful external stores.
|
|
23
22
|
"""
|
|
24
23
|
|
|
24
|
+
__name: str
|
|
25
|
+
|
|
25
26
|
def __init__(self, name: str) -> None:
|
|
26
27
|
self.__name = name
|
|
27
28
|
|
|
@@ -38,13 +39,13 @@ class ExternalStore(abc.ABC):
|
|
|
38
39
|
"""Removes store-specific metadata created in link()."""
|
|
39
40
|
|
|
40
41
|
@abc.abstractmethod
|
|
41
|
-
def get_local_columns(self) -> list[
|
|
42
|
+
def get_local_columns(self) -> list[ColumnHandle]:
|
|
42
43
|
"""
|
|
43
44
|
Gets a list of all local (Pixeltable) columns that are associated with this external store.
|
|
44
45
|
"""
|
|
45
46
|
|
|
46
47
|
@abc.abstractmethod
|
|
47
|
-
def sync(self, t: Table, export_data: bool, import_data: bool) ->
|
|
48
|
+
def sync(self, t: Table, export_data: bool, import_data: bool) -> UpdateStatus:
|
|
48
49
|
"""
|
|
49
50
|
Called by `Table.sync()` to implement store-specific synchronization logic.
|
|
50
51
|
"""
|
|
@@ -63,9 +64,12 @@ class Project(ExternalStore, abc.ABC):
|
|
|
63
64
|
additional capabilities specific to such projects.
|
|
64
65
|
"""
|
|
65
66
|
|
|
66
|
-
|
|
67
|
+
_col_mapping: dict[ColumnHandle, str] # col -> external col name
|
|
68
|
+
stored_proxies: dict[ColumnHandle, ColumnHandle] # original col -> proxy col
|
|
67
69
|
|
|
68
|
-
def __init__(
|
|
70
|
+
def __init__(
|
|
71
|
+
self, name: str, col_mapping: dict[ColumnHandle, str], stored_proxies: dict[ColumnHandle, ColumnHandle] | None
|
|
72
|
+
):
|
|
69
73
|
super().__init__(name)
|
|
70
74
|
self._col_mapping = col_mapping
|
|
71
75
|
|
|
@@ -80,11 +84,11 @@ class Project(ExternalStore, abc.ABC):
|
|
|
80
84
|
# Note from aaron-siegel: This methodology is inefficient in the case where a table has many views with a high
|
|
81
85
|
# proportion of overlapping rows, all proxying the same base column.
|
|
82
86
|
if stored_proxies is None:
|
|
83
|
-
self.stored_proxies: dict[
|
|
87
|
+
self.stored_proxies: dict[ColumnHandle, ColumnHandle] = {}
|
|
84
88
|
else:
|
|
85
89
|
self.stored_proxies = stored_proxies
|
|
86
90
|
|
|
87
|
-
def get_local_columns(self) -> list[
|
|
91
|
+
def get_local_columns(self) -> list[ColumnHandle]:
|
|
88
92
|
return list(self.col_mapping.keys())
|
|
89
93
|
|
|
90
94
|
def link(self, tbl_version: TableVersion) -> None:
|
|
@@ -92,15 +96,16 @@ class Project(ExternalStore, abc.ABC):
|
|
|
92
96
|
# This ensures that the media in those columns resides in the media store.
|
|
93
97
|
# First determine which columns (if any) need stored proxies, but don't have one yet.
|
|
94
98
|
stored_proxies_needed: list[Column] = []
|
|
95
|
-
for
|
|
99
|
+
for col_handle in self.col_mapping:
|
|
100
|
+
col = col_handle.get()
|
|
96
101
|
if col.col_type.is_media_type() and not (col.is_stored and col.is_computed):
|
|
97
102
|
# If this column is already proxied in some other Project, use the existing proxy to avoid
|
|
98
103
|
# duplication. Otherwise, we'll create a new one.
|
|
99
104
|
for store in tbl_version.external_stores.values():
|
|
100
|
-
if isinstance(store, Project) and
|
|
101
|
-
self.stored_proxies[
|
|
105
|
+
if isinstance(store, Project) and col_handle in store.stored_proxies:
|
|
106
|
+
self.stored_proxies[col_handle] = store.stored_proxies[col_handle]
|
|
102
107
|
break
|
|
103
|
-
if
|
|
108
|
+
if col_handle not in self.stored_proxies:
|
|
104
109
|
# We didn't find it in an existing Project
|
|
105
110
|
stored_proxies_needed.append(col)
|
|
106
111
|
|
|
@@ -110,17 +115,20 @@ class Project(ExternalStore, abc.ABC):
|
|
|
110
115
|
proxy_cols = [self.create_stored_proxy(col) for col in stored_proxies_needed]
|
|
111
116
|
# Add the columns; this will also update table metadata.
|
|
112
117
|
tbl_version.add_columns(proxy_cols, print_stats=False, on_error='ignore')
|
|
118
|
+
self.stored_proxies.update(
|
|
119
|
+
{col.handle: proxy_col.handle for col, proxy_col in zip(stored_proxies_needed, proxy_cols)}
|
|
120
|
+
)
|
|
113
121
|
|
|
114
122
|
def unlink(self, tbl_version: TableVersion) -> None:
|
|
115
123
|
# Determine which stored proxies can be deleted. (A stored proxy can be deleted if it is not referenced by
|
|
116
124
|
# any *other* external store for this table.)
|
|
117
|
-
deletions_needed: set[
|
|
125
|
+
deletions_needed: set[ColumnHandle] = set(self.stored_proxies.values())
|
|
118
126
|
for name, store in tbl_version.external_stores.items():
|
|
119
127
|
if isinstance(store, Project) and name != self.name:
|
|
120
128
|
deletions_needed = deletions_needed.difference(set(store.stored_proxies.values()))
|
|
121
129
|
if len(deletions_needed) > 0:
|
|
122
|
-
_logger.info(f'Removing stored proxies for columns: {[col.name for col in deletions_needed]}')
|
|
123
|
-
tbl_version._drop_columns(deletions_needed)
|
|
130
|
+
_logger.info(f'Removing stored proxies for columns: {[col.get().name for col in deletions_needed]}')
|
|
131
|
+
tbl_version._drop_columns(col.get() for col in deletions_needed)
|
|
124
132
|
self.stored_proxies.clear()
|
|
125
133
|
|
|
126
134
|
def create_stored_proxy(self, col: Column) -> Column:
|
|
@@ -142,11 +150,10 @@ class Project(ExternalStore, abc.ABC):
|
|
|
142
150
|
computed_with=exprs.ColumnRef(col).apply(lambda x: x, col_type=col.col_type),
|
|
143
151
|
stored=True,
|
|
144
152
|
)
|
|
145
|
-
self.stored_proxies[col] = proxy_col
|
|
146
153
|
return proxy_col
|
|
147
154
|
|
|
148
155
|
@property
|
|
149
|
-
def col_mapping(self) -> dict[
|
|
156
|
+
def col_mapping(self) -> dict[ColumnHandle, str]:
|
|
150
157
|
return self._col_mapping
|
|
151
158
|
|
|
152
159
|
@abc.abstractmethod
|
|
@@ -180,8 +187,8 @@ class Project(ExternalStore, abc.ABC):
|
|
|
180
187
|
table: Table,
|
|
181
188
|
export_cols: dict[str, ts.ColumnType],
|
|
182
189
|
import_cols: dict[str, ts.ColumnType],
|
|
183
|
-
col_mapping:
|
|
184
|
-
) -> dict[
|
|
190
|
+
col_mapping: dict[str, str] | None,
|
|
191
|
+
) -> dict[ColumnHandle, str]:
|
|
185
192
|
"""
|
|
186
193
|
Verifies that the specified `col_mapping` is valid. In particular, checks that:
|
|
187
194
|
(i) the keys of `col_mapping` are valid columns of the specified `Table`;
|
|
@@ -199,33 +206,34 @@ class Project(ExternalStore, abc.ABC):
|
|
|
199
206
|
if col_mapping is None:
|
|
200
207
|
col_mapping = {col: col for col in itertools.chain(export_cols.keys(), import_cols.keys())}
|
|
201
208
|
|
|
202
|
-
resolved_col_mapping: dict[
|
|
209
|
+
resolved_col_mapping: dict[ColumnHandle, str] = {}
|
|
203
210
|
|
|
204
211
|
# Validate names
|
|
205
|
-
t_cols = set(table.
|
|
212
|
+
t_cols = set(table._get_schema().keys())
|
|
206
213
|
for t_col, ext_col in col_mapping.items():
|
|
207
214
|
if t_col not in t_cols:
|
|
208
215
|
if is_user_specified_col_mapping:
|
|
209
216
|
raise excs.Error(
|
|
210
|
-
f'Column name
|
|
217
|
+
f'Column name {t_col!r} appears as a key in `col_mapping`, but {table._display_str()} '
|
|
211
218
|
'contains no such column.'
|
|
212
219
|
)
|
|
213
220
|
else:
|
|
214
221
|
raise excs.Error(
|
|
215
|
-
f'Column
|
|
222
|
+
f'Column {t_col!r} does not exist in {table._display_str()}. Either add a column {t_col!r}, '
|
|
216
223
|
f'or specify a `col_mapping` to associate a different column with '
|
|
217
|
-
f'the external field
|
|
224
|
+
f'the external field {ext_col!r}.'
|
|
218
225
|
)
|
|
219
226
|
if ext_col not in export_cols and ext_col not in import_cols:
|
|
220
227
|
raise excs.Error(
|
|
221
|
-
f'Column name
|
|
222
|
-
f'configuration has no column
|
|
228
|
+
f'Column name {ext_col!r} appears as a value in `col_mapping`, but the external store '
|
|
229
|
+
f'configuration has no column {ext_col!r}.'
|
|
223
230
|
)
|
|
224
231
|
col_ref = table[t_col]
|
|
225
232
|
assert isinstance(col_ref, exprs.ColumnRef)
|
|
226
|
-
resolved_col_mapping[col_ref.col] = ext_col
|
|
233
|
+
resolved_col_mapping[col_ref.col.handle] = ext_col
|
|
234
|
+
|
|
227
235
|
# Validate column specs
|
|
228
|
-
t_col_types = table.
|
|
236
|
+
t_col_types = table._get_schema()
|
|
229
237
|
for t_col, ext_col in col_mapping.items():
|
|
230
238
|
t_col_type = t_col_types[t_col]
|
|
231
239
|
if ext_col in export_cols:
|
|
@@ -233,57 +241,23 @@ class Project(ExternalStore, abc.ABC):
|
|
|
233
241
|
ext_col_type = export_cols[ext_col]
|
|
234
242
|
if not ext_col_type.is_supertype_of(t_col_type, ignore_nullable=True):
|
|
235
243
|
raise excs.Error(
|
|
236
|
-
f'Column
|
|
244
|
+
f'Column {t_col!r} cannot be exported to external column {ext_col!r} '
|
|
237
245
|
f'(incompatible types; expecting `{ext_col_type}`)'
|
|
238
246
|
)
|
|
239
247
|
if ext_col in import_cols:
|
|
240
248
|
# Validate that the external column can be assigned to the table column
|
|
241
249
|
if table._tbl_version_path.get_column(t_col).is_computed:
|
|
242
250
|
raise excs.Error(
|
|
243
|
-
f'Column
|
|
251
|
+
f'Column {t_col!r} is a computed column, which cannot be populated from an external column'
|
|
244
252
|
)
|
|
245
253
|
ext_col_type = import_cols[ext_col]
|
|
246
254
|
if not t_col_type.is_supertype_of(ext_col_type, ignore_nullable=True):
|
|
247
255
|
raise excs.Error(
|
|
248
|
-
f'Column
|
|
256
|
+
f'Column {t_col!r} cannot be imported from external column {ext_col!r} '
|
|
249
257
|
f'(incompatible types; expecting `{ext_col_type}`)'
|
|
250
258
|
)
|
|
251
259
|
return resolved_col_mapping
|
|
252
260
|
|
|
253
|
-
@classmethod
|
|
254
|
-
def _column_as_dict(cls, col: Column) -> dict[str, Any]:
|
|
255
|
-
return {'tbl_id': str(col.tbl.id), 'col_id': col.id}
|
|
256
|
-
|
|
257
|
-
@classmethod
|
|
258
|
-
def _column_from_dict(cls, d: dict[str, Any]) -> Column:
|
|
259
|
-
from pixeltable.catalog import Catalog
|
|
260
|
-
|
|
261
|
-
tbl_id = UUID(d['tbl_id'])
|
|
262
|
-
col_id = d['col_id']
|
|
263
|
-
return Catalog.get().get_tbl_version(tbl_id, None).cols_by_id[col_id]
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
@dataclass(frozen=True)
|
|
267
|
-
class SyncStatus:
|
|
268
|
-
external_rows_created: int = 0
|
|
269
|
-
external_rows_deleted: int = 0
|
|
270
|
-
external_rows_updated: int = 0
|
|
271
|
-
pxt_rows_updated: int = 0
|
|
272
|
-
num_excs: int = 0
|
|
273
|
-
|
|
274
|
-
def combine(self, other: 'SyncStatus') -> 'SyncStatus':
|
|
275
|
-
return SyncStatus(
|
|
276
|
-
external_rows_created=self.external_rows_created + other.external_rows_created,
|
|
277
|
-
external_rows_deleted=self.external_rows_deleted + other.external_rows_deleted,
|
|
278
|
-
external_rows_updated=self.external_rows_updated + other.external_rows_updated,
|
|
279
|
-
pxt_rows_updated=self.pxt_rows_updated + other.pxt_rows_updated,
|
|
280
|
-
num_excs=self.num_excs + other.num_excs,
|
|
281
|
-
)
|
|
282
|
-
|
|
283
|
-
@classmethod
|
|
284
|
-
def empty(cls) -> 'SyncStatus':
|
|
285
|
-
return SyncStatus(0, 0, 0, 0, 0)
|
|
286
|
-
|
|
287
261
|
|
|
288
262
|
class MockProject(Project):
|
|
289
263
|
"""A project that cannot be synced, used mainly for testing."""
|
|
@@ -293,8 +267,8 @@ class MockProject(Project):
|
|
|
293
267
|
name: str,
|
|
294
268
|
export_cols: dict[str, ts.ColumnType],
|
|
295
269
|
import_cols: dict[str, ts.ColumnType],
|
|
296
|
-
col_mapping: dict[
|
|
297
|
-
stored_proxies:
|
|
270
|
+
col_mapping: dict[ColumnHandle, str],
|
|
271
|
+
stored_proxies: dict[ColumnHandle, ColumnHandle] | None = None,
|
|
298
272
|
):
|
|
299
273
|
super().__init__(name, col_mapping, stored_proxies)
|
|
300
274
|
self.export_cols = export_cols
|
|
@@ -308,7 +282,7 @@ class MockProject(Project):
|
|
|
308
282
|
name: str,
|
|
309
283
|
export_cols: dict[str, ts.ColumnType],
|
|
310
284
|
import_cols: dict[str, ts.ColumnType],
|
|
311
|
-
col_mapping:
|
|
285
|
+
col_mapping: dict[str, str] | None = None,
|
|
312
286
|
) -> 'MockProject':
|
|
313
287
|
col_mapping = cls.validate_columns(t, export_cols, import_cols, col_mapping)
|
|
314
288
|
return cls(name, export_cols, import_cols, col_mapping)
|
|
@@ -319,7 +293,7 @@ class MockProject(Project):
|
|
|
319
293
|
def get_import_columns(self) -> dict[str, ts.ColumnType]:
|
|
320
294
|
return self.import_cols
|
|
321
295
|
|
|
322
|
-
def sync(self, t: Table, export_data: bool, import_data: bool) ->
|
|
296
|
+
def sync(self, t: Table, export_data: bool, import_data: bool) -> UpdateStatus:
|
|
323
297
|
raise NotImplementedError()
|
|
324
298
|
|
|
325
299
|
def delete(self) -> None:
|
|
@@ -334,10 +308,8 @@ class MockProject(Project):
|
|
|
334
308
|
'name': self.name,
|
|
335
309
|
'export_cols': {k: v.as_dict() for k, v in self.export_cols.items()},
|
|
336
310
|
'import_cols': {k: v.as_dict() for k, v in self.import_cols.items()},
|
|
337
|
-
'col_mapping': [[
|
|
338
|
-
'stored_proxies': [
|
|
339
|
-
[self._column_as_dict(k), self._column_as_dict(v)] for k, v in self.stored_proxies.items()
|
|
340
|
-
],
|
|
311
|
+
'col_mapping': [[k.as_dict(), v] for k, v in self.col_mapping.items()],
|
|
312
|
+
'stored_proxies': [[k.as_dict(), v.as_dict()] for k, v in self.stored_proxies.items()],
|
|
341
313
|
}
|
|
342
314
|
|
|
343
315
|
@classmethod
|
|
@@ -346,8 +318,8 @@ class MockProject(Project):
|
|
|
346
318
|
md['name'],
|
|
347
319
|
{k: ts.ColumnType.from_dict(v) for k, v in md['export_cols'].items()},
|
|
348
320
|
{k: ts.ColumnType.from_dict(v) for k, v in md['import_cols'].items()},
|
|
349
|
-
{
|
|
350
|
-
{
|
|
321
|
+
{ColumnHandle.from_dict(entry[0]): entry[1] for entry in md['col_mapping']},
|
|
322
|
+
{ColumnHandle.from_dict(entry[0]): ColumnHandle.from_dict(entry[1]) for entry in md['stored_proxies']},
|
|
351
323
|
)
|
|
352
324
|
|
|
353
325
|
def __eq__(self, other: object) -> bool:
|
pixeltable/io/fiftyone.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import os
|
|
2
|
-
from typing import Any, Iterator
|
|
2
|
+
from typing import Any, Iterator
|
|
3
3
|
|
|
4
4
|
import fiftyone as fo # type: ignore[import-untyped]
|
|
5
5
|
import fiftyone.utils.data as foud # type: ignore[import-untyped]
|
|
@@ -9,7 +9,7 @@ import puremagic
|
|
|
9
9
|
import pixeltable as pxt
|
|
10
10
|
import pixeltable.exceptions as excs
|
|
11
11
|
from pixeltable import exprs
|
|
12
|
-
from pixeltable.
|
|
12
|
+
from pixeltable.utils.local_store import TempStore
|
|
13
13
|
|
|
14
14
|
|
|
15
15
|
class PxtImageDatasetImporter(foud.LabeledImageDatasetImporter):
|
|
@@ -20,7 +20,7 @@ class PxtImageDatasetImporter(foud.LabeledImageDatasetImporter):
|
|
|
20
20
|
__image_format: str # format to use for any exported images that are not already stored on disk
|
|
21
21
|
__labels: dict[str, tuple[exprs.Expr, type[fo.Label]]] # label_name -> (expr, label_cls)
|
|
22
22
|
__image_idx: int # index of the image expr in the select list
|
|
23
|
-
__localpath_idx:
|
|
23
|
+
__localpath_idx: int | None # index of the image localpath in the select list, if present
|
|
24
24
|
__row_iter: Iterator[list] # iterator over the table rows, to be convered to FiftyOne samples
|
|
25
25
|
|
|
26
26
|
def __init__(
|
|
@@ -28,12 +28,12 @@ class PxtImageDatasetImporter(foud.LabeledImageDatasetImporter):
|
|
|
28
28
|
tbl: pxt.Table,
|
|
29
29
|
image: exprs.Expr,
|
|
30
30
|
image_format: str,
|
|
31
|
-
classifications:
|
|
32
|
-
detections:
|
|
33
|
-
dataset_dir:
|
|
31
|
+
classifications: exprs.Expr | list[exprs.Expr] | dict[str, exprs.Expr] | None = None,
|
|
32
|
+
detections: exprs.Expr | list[exprs.Expr] | dict[str, exprs.Expr] | None = None,
|
|
33
|
+
dataset_dir: os.PathLike | None = None,
|
|
34
34
|
shuffle: bool = False,
|
|
35
|
-
seed:
|
|
36
|
-
max_samples:
|
|
35
|
+
seed: int | float | str | bytes | bytearray | None = None,
|
|
36
|
+
max_samples: int | None = None,
|
|
37
37
|
):
|
|
38
38
|
super().__init__(dataset_dir=dataset_dir, shuffle=shuffle, seed=seed, max_samples=max_samples)
|
|
39
39
|
|
|
@@ -90,7 +90,7 @@ class PxtImageDatasetImporter(foud.LabeledImageDatasetImporter):
|
|
|
90
90
|
df = tbl.select(*selection)
|
|
91
91
|
self.__row_iter = df._output_row_iterator()
|
|
92
92
|
|
|
93
|
-
def __next__(self) -> tuple[str,
|
|
93
|
+
def __next__(self) -> tuple[str, fo.ImageMetadata | None, dict[str, fo.Label] | None]:
|
|
94
94
|
row = next(self.__row_iter)
|
|
95
95
|
img = row[self.__image_idx]
|
|
96
96
|
assert isinstance(img, PIL.Image.Image)
|
|
@@ -100,7 +100,7 @@ class PxtImageDatasetImporter(foud.LabeledImageDatasetImporter):
|
|
|
100
100
|
assert isinstance(file, str)
|
|
101
101
|
else:
|
|
102
102
|
# Write the dynamically created image to a temp file
|
|
103
|
-
file =
|
|
103
|
+
file = TempStore.create_path(extension=f'.{self.__image_format}')
|
|
104
104
|
img.save(file, format=self.__image_format)
|
|
105
105
|
|
|
106
106
|
metadata = fo.ImageMetadata(
|
|
@@ -108,7 +108,7 @@ class PxtImageDatasetImporter(foud.LabeledImageDatasetImporter):
|
|
|
108
108
|
mime_type=puremagic.from_file(file, mime=True),
|
|
109
109
|
width=img.width,
|
|
110
110
|
height=img.height,
|
|
111
|
-
filepath=file,
|
|
111
|
+
filepath=str(file),
|
|
112
112
|
num_channels=len(img.getbands()),
|
|
113
113
|
)
|
|
114
114
|
|
pixeltable/io/globals.py
CHANGED
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
from typing import TYPE_CHECKING, Any, Literal
|
|
3
|
+
from typing import TYPE_CHECKING, Any, Literal
|
|
4
4
|
|
|
5
5
|
import pixeltable as pxt
|
|
6
6
|
import pixeltable.exceptions as excs
|
|
7
7
|
from pixeltable import Table, exprs
|
|
8
|
+
from pixeltable.catalog.update_status import UpdateStatus
|
|
8
9
|
from pixeltable.env import Env
|
|
9
|
-
from pixeltable.io.external_store import SyncStatus
|
|
10
10
|
|
|
11
11
|
if TYPE_CHECKING:
|
|
12
12
|
import fiftyone as fo # type: ignore[import-untyped]
|
|
@@ -15,14 +15,14 @@ if TYPE_CHECKING:
|
|
|
15
15
|
def create_label_studio_project(
|
|
16
16
|
t: Table,
|
|
17
17
|
label_config: str,
|
|
18
|
-
name:
|
|
19
|
-
title:
|
|
18
|
+
name: str | None = None,
|
|
19
|
+
title: str | None = None,
|
|
20
20
|
media_import_method: Literal['post', 'file', 'url'] = 'post',
|
|
21
|
-
col_mapping:
|
|
21
|
+
col_mapping: dict[str, str] | None = None,
|
|
22
22
|
sync_immediately: bool = True,
|
|
23
|
-
s3_configuration:
|
|
23
|
+
s3_configuration: dict[str, Any] | None = None,
|
|
24
24
|
**kwargs: Any,
|
|
25
|
-
) ->
|
|
25
|
+
) -> UpdateStatus:
|
|
26
26
|
"""
|
|
27
27
|
Create a new Label Studio project and link it to the specified [`Table`][pixeltable.Table].
|
|
28
28
|
|
|
@@ -96,32 +96,33 @@ def create_label_studio_project(
|
|
|
96
96
|
[Label Studio start_project docs](https://labelstud.io/sdk/project.html#label_studio_sdk.project.Project.start_project).
|
|
97
97
|
|
|
98
98
|
Returns:
|
|
99
|
-
|
|
99
|
+
An `UpdateStatus` representing the status of any synchronization operations that occurred.
|
|
100
100
|
|
|
101
101
|
Examples:
|
|
102
102
|
Create a Label Studio project whose tasks correspond to videos stored in the `video_col`
|
|
103
103
|
column of the table `tbl`:
|
|
104
104
|
|
|
105
105
|
>>> config = \"\"\"
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
106
|
+
... <View>
|
|
107
|
+
... <Video name="video_obj" value="$video_col"/>
|
|
108
|
+
... <Choices name="video-category" toName="video" showInLine="true">
|
|
109
|
+
... <Choice value="city"/>
|
|
110
|
+
... <Choice value="food"/>
|
|
111
|
+
... <Choice value="sports"/>
|
|
112
|
+
... </Choices>
|
|
113
|
+
... </View>
|
|
114
|
+
... \"\"\"
|
|
115
|
+
>>> create_label_studio_project(tbl, config)
|
|
115
116
|
|
|
116
117
|
Create a Label Studio project with the same configuration, using `media_import_method='url'`,
|
|
117
118
|
whose media are stored in an S3 bucket:
|
|
118
119
|
|
|
119
120
|
>>> create_label_studio_project(
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
121
|
+
... tbl,
|
|
122
|
+
... config,
|
|
123
|
+
... media_import_method='url',
|
|
124
|
+
... s3_configuration={'bucket': 'my-bucket', 'region_name': 'us-east-2'}
|
|
125
|
+
... )
|
|
125
126
|
"""
|
|
126
127
|
Env.get().require_package('label_studio_sdk')
|
|
127
128
|
|
|
@@ -136,22 +137,22 @@ def create_label_studio_project(
|
|
|
136
137
|
if sync_immediately:
|
|
137
138
|
return t.sync()
|
|
138
139
|
else:
|
|
139
|
-
return
|
|
140
|
+
return UpdateStatus()
|
|
140
141
|
|
|
141
142
|
|
|
142
143
|
def export_images_as_fo_dataset(
|
|
143
144
|
tbl: pxt.Table,
|
|
144
145
|
images: exprs.Expr,
|
|
145
146
|
image_format: str = 'webp',
|
|
146
|
-
classifications:
|
|
147
|
-
detections:
|
|
147
|
+
classifications: exprs.Expr | list[exprs.Expr] | dict[str, exprs.Expr] | None = None,
|
|
148
|
+
detections: exprs.Expr | list[exprs.Expr] | dict[str, exprs.Expr] | None = None,
|
|
148
149
|
) -> 'fo.Dataset':
|
|
149
150
|
"""
|
|
150
151
|
Export images from a Pixeltable table as a Voxel51 dataset. The data must consist of a single column
|
|
151
152
|
(or expression) containing image data, along with optional additional columns containing labels. Currently, only
|
|
152
153
|
classification and detection labels are supported.
|
|
153
154
|
|
|
154
|
-
The [Working with Voxel51 in Pixeltable](https://docs.pixeltable.com/
|
|
155
|
+
The [Working with Voxel51 in Pixeltable](https://docs.pixeltable.com/examples/vision/voxel51) tutorial contains a
|
|
155
156
|
fully worked example showing how to export data from a Pixeltable table and load it into Voxel51.
|
|
156
157
|
|
|
157
158
|
Images in the dataset that already exist on disk will be exported directly, in whatever format they
|
|
@@ -204,13 +205,13 @@ def export_images_as_fo_dataset(
|
|
|
204
205
|
Export the images in the `image` column of the table `tbl` as a Voxel51 dataset, using classification
|
|
205
206
|
labels from `tbl.classifications`:
|
|
206
207
|
|
|
207
|
-
>>>
|
|
208
|
+
>>> export_images_as_fo_dataset(
|
|
208
209
|
... tbl,
|
|
209
210
|
... tbl.image,
|
|
210
211
|
... classifications=tbl.classifications
|
|
211
212
|
... )
|
|
212
213
|
|
|
213
|
-
See the [Working with Voxel51 in Pixeltable](https://docs.pixeltable.com/
|
|
214
|
+
See the [Working with Voxel51 in Pixeltable](https://docs.pixeltable.com/examples/vision/voxel51) tutorial
|
|
214
215
|
for a fully worked example.
|
|
215
216
|
"""
|
|
216
217
|
Env.get().require_package('fiftyone')
|
pixeltable/io/hf_datasets.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import typing
|
|
4
|
-
from typing import Any
|
|
4
|
+
from typing import Any
|
|
5
5
|
|
|
6
6
|
import pixeltable as pxt
|
|
7
7
|
import pixeltable.type_system as ts
|
|
@@ -36,7 +36,7 @@ _hf_to_pxt: dict[str, ts.ColumnType] = {
|
|
|
36
36
|
}
|
|
37
37
|
|
|
38
38
|
|
|
39
|
-
def _to_pixeltable_type(feature_type: Any, nullable: bool) ->
|
|
39
|
+
def _to_pixeltable_type(feature_type: Any, nullable: bool) -> ts.ColumnType | None:
|
|
40
40
|
"""Convert a huggingface feature type to a pixeltable ColumnType if one is defined."""
|
|
41
41
|
import datasets
|
|
42
42
|
|
|
@@ -50,15 +50,23 @@ def _to_pixeltable_type(feature_type: Any, nullable: bool) -> Optional[ts.Column
|
|
|
50
50
|
elif isinstance(feature_type, datasets.Sequence):
|
|
51
51
|
# example: cohere wiki. Sequence(feature=Value(dtype='float32', id=None), length=-1, id=None)
|
|
52
52
|
dtype = _to_pixeltable_type(feature_type.feature, nullable)
|
|
53
|
-
|
|
54
|
-
|
|
53
|
+
if dtype is None:
|
|
54
|
+
return None
|
|
55
|
+
if dtype.is_int_type() or dtype.is_float_type() or dtype.is_bool_type() or dtype.is_string_type():
|
|
56
|
+
length = feature_type.length if feature_type.length != -1 else None
|
|
57
|
+
return ts.ArrayType(shape=(length,), dtype=dtype, nullable=nullable)
|
|
58
|
+
else:
|
|
59
|
+
# Sequence of dicts must be cast as Json
|
|
60
|
+
return ts.JsonType(nullable=nullable)
|
|
55
61
|
elif isinstance(feature_type, datasets.Image):
|
|
56
62
|
return ts.ImageType(nullable=nullable)
|
|
63
|
+
elif isinstance(feature_type, dict):
|
|
64
|
+
return ts.JsonType(nullable=nullable)
|
|
57
65
|
else:
|
|
58
66
|
return None
|
|
59
67
|
|
|
60
68
|
|
|
61
|
-
def _get_hf_schema(dataset:
|
|
69
|
+
def _get_hf_schema(dataset: datasets.Dataset | datasets.DatasetDict) -> datasets.Features:
|
|
62
70
|
"""Get the schema of a huggingface dataset as a dictionary."""
|
|
63
71
|
import datasets
|
|
64
72
|
|
|
@@ -68,7 +76,7 @@ def _get_hf_schema(dataset: Union[datasets.Dataset, datasets.DatasetDict]) -> da
|
|
|
68
76
|
|
|
69
77
|
def huggingface_schema_to_pxt_schema(
|
|
70
78
|
hf_schema: datasets.Features, schema_overrides: dict[str, Any], primary_key: list[str]
|
|
71
|
-
) -> dict[str,
|
|
79
|
+
) -> dict[str, ts.ColumnType | None]:
|
|
72
80
|
"""Generate a pixeltable schema from a huggingface dataset schema.
|
|
73
81
|
Columns without a known mapping are mapped to None
|
|
74
82
|
"""
|
|
@@ -83,10 +91,10 @@ def huggingface_schema_to_pxt_schema(
|
|
|
83
91
|
|
|
84
92
|
def import_huggingface_dataset(
|
|
85
93
|
table_path: str,
|
|
86
|
-
dataset:
|
|
94
|
+
dataset: datasets.Dataset | datasets.DatasetDict,
|
|
87
95
|
*,
|
|
88
|
-
schema_overrides:
|
|
89
|
-
primary_key:
|
|
96
|
+
schema_overrides: dict[str, Any] | None = None,
|
|
97
|
+
primary_key: str | list[str] | None = None,
|
|
90
98
|
**kwargs: Any,
|
|
91
99
|
) -> pxt.Table:
|
|
92
100
|
"""Create a new base table from a Huggingface dataset, or dataset dict with multiple splits.
|