pixeltable 0.2.26__py3-none-any.whl → 0.5.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pixeltable/__init__.py +83 -19
- pixeltable/_query.py +1444 -0
- pixeltable/_version.py +1 -0
- pixeltable/catalog/__init__.py +7 -4
- pixeltable/catalog/catalog.py +2394 -119
- pixeltable/catalog/column.py +225 -104
- pixeltable/catalog/dir.py +38 -9
- pixeltable/catalog/globals.py +53 -34
- pixeltable/catalog/insertable_table.py +265 -115
- pixeltable/catalog/path.py +80 -17
- pixeltable/catalog/schema_object.py +28 -43
- pixeltable/catalog/table.py +1270 -677
- pixeltable/catalog/table_metadata.py +103 -0
- pixeltable/catalog/table_version.py +1270 -751
- pixeltable/catalog/table_version_handle.py +109 -0
- pixeltable/catalog/table_version_path.py +137 -42
- pixeltable/catalog/tbl_ops.py +53 -0
- pixeltable/catalog/update_status.py +191 -0
- pixeltable/catalog/view.py +251 -134
- pixeltable/config.py +215 -0
- pixeltable/env.py +736 -285
- pixeltable/exceptions.py +26 -2
- pixeltable/exec/__init__.py +7 -2
- pixeltable/exec/aggregation_node.py +39 -21
- pixeltable/exec/cache_prefetch_node.py +87 -109
- pixeltable/exec/cell_materialization_node.py +268 -0
- pixeltable/exec/cell_reconstruction_node.py +168 -0
- pixeltable/exec/component_iteration_node.py +25 -28
- pixeltable/exec/data_row_batch.py +11 -46
- pixeltable/exec/exec_context.py +26 -11
- pixeltable/exec/exec_node.py +35 -27
- pixeltable/exec/expr_eval/__init__.py +3 -0
- pixeltable/exec/expr_eval/evaluators.py +365 -0
- pixeltable/exec/expr_eval/expr_eval_node.py +413 -0
- pixeltable/exec/expr_eval/globals.py +200 -0
- pixeltable/exec/expr_eval/row_buffer.py +74 -0
- pixeltable/exec/expr_eval/schedulers.py +413 -0
- pixeltable/exec/globals.py +35 -0
- pixeltable/exec/in_memory_data_node.py +35 -27
- pixeltable/exec/object_store_save_node.py +293 -0
- pixeltable/exec/row_update_node.py +44 -29
- pixeltable/exec/sql_node.py +414 -115
- pixeltable/exprs/__init__.py +8 -5
- pixeltable/exprs/arithmetic_expr.py +79 -45
- pixeltable/exprs/array_slice.py +5 -5
- pixeltable/exprs/column_property_ref.py +40 -26
- pixeltable/exprs/column_ref.py +254 -61
- pixeltable/exprs/comparison.py +14 -9
- pixeltable/exprs/compound_predicate.py +9 -10
- pixeltable/exprs/data_row.py +213 -72
- pixeltable/exprs/expr.py +270 -104
- pixeltable/exprs/expr_dict.py +6 -5
- pixeltable/exprs/expr_set.py +20 -11
- pixeltable/exprs/function_call.py +383 -284
- pixeltable/exprs/globals.py +18 -5
- pixeltable/exprs/in_predicate.py +7 -7
- pixeltable/exprs/inline_expr.py +37 -37
- pixeltable/exprs/is_null.py +8 -4
- pixeltable/exprs/json_mapper.py +120 -54
- pixeltable/exprs/json_path.py +90 -60
- pixeltable/exprs/literal.py +61 -16
- pixeltable/exprs/method_ref.py +7 -6
- pixeltable/exprs/object_ref.py +19 -8
- pixeltable/exprs/row_builder.py +238 -75
- pixeltable/exprs/rowid_ref.py +53 -15
- pixeltable/exprs/similarity_expr.py +65 -50
- pixeltable/exprs/sql_element_cache.py +5 -5
- pixeltable/exprs/string_op.py +107 -0
- pixeltable/exprs/type_cast.py +25 -13
- pixeltable/exprs/variable.py +2 -2
- pixeltable/func/__init__.py +9 -5
- pixeltable/func/aggregate_function.py +197 -92
- pixeltable/func/callable_function.py +119 -35
- pixeltable/func/expr_template_function.py +101 -48
- pixeltable/func/function.py +375 -62
- pixeltable/func/function_registry.py +20 -19
- pixeltable/func/globals.py +6 -5
- pixeltable/func/mcp.py +74 -0
- pixeltable/func/query_template_function.py +151 -35
- pixeltable/func/signature.py +178 -49
- pixeltable/func/tools.py +164 -0
- pixeltable/func/udf.py +176 -53
- pixeltable/functions/__init__.py +44 -4
- pixeltable/functions/anthropic.py +226 -47
- pixeltable/functions/audio.py +148 -11
- pixeltable/functions/bedrock.py +137 -0
- pixeltable/functions/date.py +188 -0
- pixeltable/functions/deepseek.py +113 -0
- pixeltable/functions/document.py +81 -0
- pixeltable/functions/fal.py +76 -0
- pixeltable/functions/fireworks.py +72 -20
- pixeltable/functions/gemini.py +249 -0
- pixeltable/functions/globals.py +208 -53
- pixeltable/functions/groq.py +108 -0
- pixeltable/functions/huggingface.py +1088 -95
- pixeltable/functions/image.py +155 -84
- pixeltable/functions/json.py +8 -11
- pixeltable/functions/llama_cpp.py +31 -19
- pixeltable/functions/math.py +169 -0
- pixeltable/functions/mistralai.py +50 -75
- pixeltable/functions/net.py +70 -0
- pixeltable/functions/ollama.py +29 -36
- pixeltable/functions/openai.py +548 -160
- pixeltable/functions/openrouter.py +143 -0
- pixeltable/functions/replicate.py +15 -14
- pixeltable/functions/reve.py +250 -0
- pixeltable/functions/string.py +310 -85
- pixeltable/functions/timestamp.py +37 -19
- pixeltable/functions/together.py +77 -120
- pixeltable/functions/twelvelabs.py +188 -0
- pixeltable/functions/util.py +7 -2
- pixeltable/functions/uuid.py +30 -0
- pixeltable/functions/video.py +1528 -117
- pixeltable/functions/vision.py +26 -26
- pixeltable/functions/voyageai.py +289 -0
- pixeltable/functions/whisper.py +19 -10
- pixeltable/functions/whisperx.py +179 -0
- pixeltable/functions/yolox.py +112 -0
- pixeltable/globals.py +716 -236
- pixeltable/index/__init__.py +3 -1
- pixeltable/index/base.py +17 -21
- pixeltable/index/btree.py +32 -22
- pixeltable/index/embedding_index.py +155 -92
- pixeltable/io/__init__.py +12 -7
- pixeltable/io/datarows.py +140 -0
- pixeltable/io/external_store.py +83 -125
- pixeltable/io/fiftyone.py +24 -33
- pixeltable/io/globals.py +47 -182
- pixeltable/io/hf_datasets.py +96 -127
- pixeltable/io/label_studio.py +171 -156
- pixeltable/io/lancedb.py +3 -0
- pixeltable/io/pandas.py +136 -115
- pixeltable/io/parquet.py +40 -153
- pixeltable/io/table_data_conduit.py +702 -0
- pixeltable/io/utils.py +100 -0
- pixeltable/iterators/__init__.py +8 -4
- pixeltable/iterators/audio.py +207 -0
- pixeltable/iterators/base.py +9 -3
- pixeltable/iterators/document.py +144 -87
- pixeltable/iterators/image.py +17 -38
- pixeltable/iterators/string.py +15 -12
- pixeltable/iterators/video.py +523 -127
- pixeltable/metadata/__init__.py +33 -8
- pixeltable/metadata/converters/convert_10.py +2 -3
- pixeltable/metadata/converters/convert_13.py +2 -2
- pixeltable/metadata/converters/convert_15.py +15 -11
- pixeltable/metadata/converters/convert_16.py +4 -5
- pixeltable/metadata/converters/convert_17.py +4 -5
- pixeltable/metadata/converters/convert_18.py +4 -6
- pixeltable/metadata/converters/convert_19.py +6 -9
- pixeltable/metadata/converters/convert_20.py +3 -6
- pixeltable/metadata/converters/convert_21.py +6 -8
- pixeltable/metadata/converters/convert_22.py +3 -2
- pixeltable/metadata/converters/convert_23.py +33 -0
- pixeltable/metadata/converters/convert_24.py +55 -0
- pixeltable/metadata/converters/convert_25.py +19 -0
- pixeltable/metadata/converters/convert_26.py +23 -0
- pixeltable/metadata/converters/convert_27.py +29 -0
- pixeltable/metadata/converters/convert_28.py +13 -0
- pixeltable/metadata/converters/convert_29.py +110 -0
- pixeltable/metadata/converters/convert_30.py +63 -0
- pixeltable/metadata/converters/convert_31.py +11 -0
- pixeltable/metadata/converters/convert_32.py +15 -0
- pixeltable/metadata/converters/convert_33.py +17 -0
- pixeltable/metadata/converters/convert_34.py +21 -0
- pixeltable/metadata/converters/convert_35.py +9 -0
- pixeltable/metadata/converters/convert_36.py +38 -0
- pixeltable/metadata/converters/convert_37.py +15 -0
- pixeltable/metadata/converters/convert_38.py +39 -0
- pixeltable/metadata/converters/convert_39.py +124 -0
- pixeltable/metadata/converters/convert_40.py +73 -0
- pixeltable/metadata/converters/convert_41.py +12 -0
- pixeltable/metadata/converters/convert_42.py +9 -0
- pixeltable/metadata/converters/convert_43.py +44 -0
- pixeltable/metadata/converters/util.py +44 -18
- pixeltable/metadata/notes.py +21 -0
- pixeltable/metadata/schema.py +185 -42
- pixeltable/metadata/utils.py +74 -0
- pixeltable/mypy/__init__.py +3 -0
- pixeltable/mypy/mypy_plugin.py +123 -0
- pixeltable/plan.py +616 -225
- pixeltable/share/__init__.py +3 -0
- pixeltable/share/packager.py +797 -0
- pixeltable/share/protocol/__init__.py +33 -0
- pixeltable/share/protocol/common.py +165 -0
- pixeltable/share/protocol/operation_types.py +33 -0
- pixeltable/share/protocol/replica.py +119 -0
- pixeltable/share/publish.py +349 -0
- pixeltable/store.py +398 -232
- pixeltable/type_system.py +730 -267
- pixeltable/utils/__init__.py +40 -0
- pixeltable/utils/arrow.py +201 -29
- pixeltable/utils/av.py +298 -0
- pixeltable/utils/azure_store.py +346 -0
- pixeltable/utils/coco.py +26 -27
- pixeltable/utils/code.py +4 -4
- pixeltable/utils/console_output.py +46 -0
- pixeltable/utils/coroutine.py +24 -0
- pixeltable/utils/dbms.py +92 -0
- pixeltable/utils/description_helper.py +11 -12
- pixeltable/utils/documents.py +60 -61
- pixeltable/utils/exception_handler.py +36 -0
- pixeltable/utils/filecache.py +38 -22
- pixeltable/utils/formatter.py +88 -51
- pixeltable/utils/gcs_store.py +295 -0
- pixeltable/utils/http.py +133 -0
- pixeltable/utils/http_server.py +14 -13
- pixeltable/utils/iceberg.py +13 -0
- pixeltable/utils/image.py +17 -0
- pixeltable/utils/lancedb.py +90 -0
- pixeltable/utils/local_store.py +322 -0
- pixeltable/utils/misc.py +5 -0
- pixeltable/utils/object_stores.py +573 -0
- pixeltable/utils/pydantic.py +60 -0
- pixeltable/utils/pytorch.py +20 -20
- pixeltable/utils/s3_store.py +527 -0
- pixeltable/utils/sql.py +32 -5
- pixeltable/utils/system.py +30 -0
- pixeltable/utils/transactional_directory.py +4 -3
- pixeltable-0.5.7.dist-info/METADATA +579 -0
- pixeltable-0.5.7.dist-info/RECORD +227 -0
- {pixeltable-0.2.26.dist-info → pixeltable-0.5.7.dist-info}/WHEEL +1 -1
- pixeltable-0.5.7.dist-info/entry_points.txt +2 -0
- pixeltable/__version__.py +0 -3
- pixeltable/catalog/named_function.py +0 -36
- pixeltable/catalog/path_dict.py +0 -141
- pixeltable/dataframe.py +0 -894
- pixeltable/exec/expr_eval_node.py +0 -232
- pixeltable/ext/__init__.py +0 -14
- pixeltable/ext/functions/__init__.py +0 -8
- pixeltable/ext/functions/whisperx.py +0 -77
- pixeltable/ext/functions/yolox.py +0 -157
- pixeltable/tool/create_test_db_dump.py +0 -311
- pixeltable/tool/create_test_video.py +0 -81
- pixeltable/tool/doc_plugins/griffe.py +0 -50
- pixeltable/tool/doc_plugins/mkdocstrings.py +0 -6
- pixeltable/tool/doc_plugins/templates/material/udf.html.jinja +0 -135
- pixeltable/tool/embed_udf.py +0 -9
- pixeltable/tool/mypy_plugin.py +0 -55
- pixeltable/utils/media_store.py +0 -76
- pixeltable/utils/s3.py +0 -16
- pixeltable-0.2.26.dist-info/METADATA +0 -400
- pixeltable-0.2.26.dist-info/RECORD +0 -156
- pixeltable-0.2.26.dist-info/entry_points.txt +0 -3
- {pixeltable-0.2.26.dist-info → pixeltable-0.5.7.dist-info/licenses}/LICENSE +0 -0
pixeltable/io/external_store.py
CHANGED
|
@@ -3,17 +3,13 @@ from __future__ import annotations
|
|
|
3
3
|
import abc
|
|
4
4
|
import itertools
|
|
5
5
|
import logging
|
|
6
|
-
import
|
|
7
|
-
from dataclasses import dataclass
|
|
8
|
-
from typing import Any, Optional
|
|
9
|
-
from uuid import UUID
|
|
6
|
+
from typing import Any
|
|
10
7
|
|
|
11
8
|
import pixeltable.exceptions as excs
|
|
12
9
|
import pixeltable.type_system as ts
|
|
13
|
-
from pixeltable import
|
|
14
|
-
import
|
|
15
|
-
|
|
16
|
-
from pixeltable.catalog import TableVersion
|
|
10
|
+
from pixeltable import Column, Table
|
|
11
|
+
from pixeltable.catalog import ColumnHandle, TableVersion
|
|
12
|
+
from pixeltable.catalog.update_status import UpdateStatus
|
|
17
13
|
|
|
18
14
|
_logger = logging.getLogger('pixeltable')
|
|
19
15
|
|
|
@@ -25,6 +21,8 @@ class ExternalStore(abc.ABC):
|
|
|
25
21
|
and stateful external stores.
|
|
26
22
|
"""
|
|
27
23
|
|
|
24
|
+
__name: str
|
|
25
|
+
|
|
28
26
|
def __init__(self, name: str) -> None:
|
|
29
27
|
self.__name = name
|
|
30
28
|
|
|
@@ -33,25 +31,21 @@ class ExternalStore(abc.ABC):
|
|
|
33
31
|
return self.__name
|
|
34
32
|
|
|
35
33
|
@abc.abstractmethod
|
|
36
|
-
def link(self, tbl_version: TableVersion
|
|
37
|
-
"""
|
|
38
|
-
Called by `TableVersion.link()` to implement store-specific logic.
|
|
39
|
-
"""
|
|
34
|
+
def link(self, tbl_version: TableVersion) -> None:
|
|
35
|
+
"""Creates store-specific metadata needed to implement sync()."""
|
|
40
36
|
|
|
41
37
|
@abc.abstractmethod
|
|
42
|
-
def unlink(self, tbl_version: TableVersion
|
|
43
|
-
"""
|
|
44
|
-
Called by `TableVersion.unlink()` to implement store-specific logic.
|
|
45
|
-
"""
|
|
38
|
+
def unlink(self, tbl_version: TableVersion) -> None:
|
|
39
|
+
"""Removes store-specific metadata created in link()."""
|
|
46
40
|
|
|
47
41
|
@abc.abstractmethod
|
|
48
|
-
def get_local_columns(self) -> list[
|
|
42
|
+
def get_local_columns(self) -> list[ColumnHandle]:
|
|
49
43
|
"""
|
|
50
44
|
Gets a list of all local (Pixeltable) columns that are associated with this external store.
|
|
51
45
|
"""
|
|
52
46
|
|
|
53
47
|
@abc.abstractmethod
|
|
54
|
-
def sync(self, t: Table, export_data: bool, import_data: bool) ->
|
|
48
|
+
def sync(self, t: Table, export_data: bool, import_data: bool) -> UpdateStatus:
|
|
55
49
|
"""
|
|
56
50
|
Called by `Table.sync()` to implement store-specific synchronization logic.
|
|
57
51
|
"""
|
|
@@ -70,9 +64,12 @@ class Project(ExternalStore, abc.ABC):
|
|
|
70
64
|
additional capabilities specific to such projects.
|
|
71
65
|
"""
|
|
72
66
|
|
|
73
|
-
|
|
67
|
+
_col_mapping: dict[ColumnHandle, str] # col -> external col name
|
|
68
|
+
stored_proxies: dict[ColumnHandle, ColumnHandle] # original col -> proxy col
|
|
74
69
|
|
|
75
|
-
def __init__(
|
|
70
|
+
def __init__(
|
|
71
|
+
self, name: str, col_mapping: dict[ColumnHandle, str], stored_proxies: dict[ColumnHandle, ColumnHandle] | None
|
|
72
|
+
):
|
|
76
73
|
super().__init__(name)
|
|
77
74
|
self._col_mapping = col_mapping
|
|
78
75
|
|
|
@@ -87,68 +84,63 @@ class Project(ExternalStore, abc.ABC):
|
|
|
87
84
|
# Note from aaron-siegel: This methodology is inefficient in the case where a table has many views with a high
|
|
88
85
|
# proportion of overlapping rows, all proxying the same base column.
|
|
89
86
|
if stored_proxies is None:
|
|
90
|
-
self.stored_proxies: dict[
|
|
87
|
+
self.stored_proxies: dict[ColumnHandle, ColumnHandle] = {}
|
|
91
88
|
else:
|
|
92
89
|
self.stored_proxies = stored_proxies
|
|
93
90
|
|
|
94
|
-
def get_local_columns(self) -> list[
|
|
91
|
+
def get_local_columns(self) -> list[ColumnHandle]:
|
|
95
92
|
return list(self.col_mapping.keys())
|
|
96
93
|
|
|
97
|
-
def link(self, tbl_version: TableVersion
|
|
94
|
+
def link(self, tbl_version: TableVersion) -> None:
|
|
98
95
|
# All of the media columns being linked need to either be stored computed columns, or else have stored proxies.
|
|
99
96
|
# This ensures that the media in those columns resides in the media store.
|
|
100
97
|
# First determine which columns (if any) need stored proxies, but don't have one yet.
|
|
101
98
|
stored_proxies_needed: list[Column] = []
|
|
102
|
-
for
|
|
99
|
+
for col_handle in self.col_mapping:
|
|
100
|
+
col = col_handle.get()
|
|
103
101
|
if col.col_type.is_media_type() and not (col.is_stored and col.is_computed):
|
|
104
102
|
# If this column is already proxied in some other Project, use the existing proxy to avoid
|
|
105
103
|
# duplication. Otherwise, we'll create a new one.
|
|
106
104
|
for store in tbl_version.external_stores.values():
|
|
107
|
-
if isinstance(store, Project) and
|
|
108
|
-
self.stored_proxies[
|
|
105
|
+
if isinstance(store, Project) and col_handle in store.stored_proxies:
|
|
106
|
+
self.stored_proxies[col_handle] = store.stored_proxies[col_handle]
|
|
109
107
|
break
|
|
110
|
-
if
|
|
108
|
+
if col_handle not in self.stored_proxies:
|
|
111
109
|
# We didn't find it in an existing Project
|
|
112
110
|
stored_proxies_needed.append(col)
|
|
111
|
+
|
|
113
112
|
if len(stored_proxies_needed) > 0:
|
|
114
113
|
_logger.info(f'Creating stored proxies for columns: {[col.name for col in stored_proxies_needed]}')
|
|
115
|
-
# Create stored proxies for columns that need one
|
|
116
|
-
|
|
117
|
-
tbl_version.version += 1
|
|
118
|
-
preceding_schema_version = tbl_version.schema_version
|
|
119
|
-
tbl_version.schema_version = tbl_version.version
|
|
120
|
-
proxy_cols = [self.create_stored_proxy(tbl_version, col) for col in stored_proxies_needed]
|
|
114
|
+
# Create stored proxies for columns that need one
|
|
115
|
+
proxy_cols = [self.create_stored_proxy(col) for col in stored_proxies_needed]
|
|
121
116
|
# Add the columns; this will also update table metadata.
|
|
122
|
-
tbl_version.
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
117
|
+
tbl_version.add_columns(proxy_cols, print_stats=False, on_error='ignore')
|
|
118
|
+
self.stored_proxies.update(
|
|
119
|
+
{col.handle: proxy_col.handle for col, proxy_col in zip(stored_proxies_needed, proxy_cols)}
|
|
120
|
+
)
|
|
126
121
|
|
|
127
|
-
def unlink(self, tbl_version: TableVersion
|
|
122
|
+
def unlink(self, tbl_version: TableVersion) -> None:
|
|
128
123
|
# Determine which stored proxies can be deleted. (A stored proxy can be deleted if it is not referenced by
|
|
129
124
|
# any *other* external store for this table.)
|
|
130
|
-
deletions_needed: set[
|
|
125
|
+
deletions_needed: set[ColumnHandle] = set(self.stored_proxies.values())
|
|
131
126
|
for name, store in tbl_version.external_stores.items():
|
|
132
127
|
if isinstance(store, Project) and name != self.name:
|
|
133
128
|
deletions_needed = deletions_needed.difference(set(store.stored_proxies.values()))
|
|
134
129
|
if len(deletions_needed) > 0:
|
|
135
|
-
_logger.info(f'Removing stored proxies for columns: {[col.name for col in deletions_needed]}')
|
|
136
|
-
|
|
137
|
-
tbl_version.version += 1
|
|
138
|
-
preceding_schema_version = tbl_version.schema_version
|
|
139
|
-
tbl_version.schema_version = tbl_version.version
|
|
140
|
-
tbl_version._drop_columns(deletions_needed)
|
|
130
|
+
_logger.info(f'Removing stored proxies for columns: {[col.get().name for col in deletions_needed]}')
|
|
131
|
+
tbl_version._drop_columns(col.get() for col in deletions_needed)
|
|
141
132
|
self.stored_proxies.clear()
|
|
142
|
-
tbl_version._update_md(time.time(), conn, preceding_schema_version=preceding_schema_version)
|
|
143
133
|
|
|
144
|
-
def create_stored_proxy(self,
|
|
134
|
+
def create_stored_proxy(self, col: Column) -> Column:
|
|
145
135
|
"""
|
|
146
136
|
Creates a proxy column for the specified column. The proxy column will be created in the specified
|
|
147
137
|
`TableVersion`.
|
|
148
138
|
"""
|
|
149
139
|
from pixeltable import exprs
|
|
150
140
|
|
|
151
|
-
assert
|
|
141
|
+
assert (
|
|
142
|
+
col.col_type.is_media_type() and not (col.is_stored and col.is_computed) and col not in self.stored_proxies
|
|
143
|
+
)
|
|
152
144
|
proxy_col = Column(
|
|
153
145
|
name=None,
|
|
154
146
|
# Force images in the proxy column to be materialized inside the media store, in a normalized format.
|
|
@@ -157,17 +149,11 @@ class Project(ExternalStore, abc.ABC):
|
|
|
157
149
|
# Once `destination` is implemented, it can be replaced with a simple `ColumnRef`.
|
|
158
150
|
computed_with=exprs.ColumnRef(col).apply(lambda x: x, col_type=col.col_type),
|
|
159
151
|
stored=True,
|
|
160
|
-
col_id=tbl_version.next_col_id,
|
|
161
|
-
sa_col_type=col.col_type.to_sa_type(),
|
|
162
|
-
schema_version_add=tbl_version.schema_version
|
|
163
152
|
)
|
|
164
|
-
proxy_col.tbl = tbl_version
|
|
165
|
-
tbl_version.next_col_id += 1
|
|
166
|
-
self.stored_proxies[col] = proxy_col
|
|
167
153
|
return proxy_col
|
|
168
154
|
|
|
169
155
|
@property
|
|
170
|
-
def col_mapping(self) -> dict[
|
|
156
|
+
def col_mapping(self) -> dict[ColumnHandle, str]:
|
|
171
157
|
return self._col_mapping
|
|
172
158
|
|
|
173
159
|
@abc.abstractmethod
|
|
@@ -197,12 +183,12 @@ class Project(ExternalStore, abc.ABC):
|
|
|
197
183
|
|
|
198
184
|
@classmethod
|
|
199
185
|
def validate_columns(
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
) -> dict[
|
|
186
|
+
cls,
|
|
187
|
+
table: Table,
|
|
188
|
+
export_cols: dict[str, ts.ColumnType],
|
|
189
|
+
import_cols: dict[str, ts.ColumnType],
|
|
190
|
+
col_mapping: dict[str, str] | None,
|
|
191
|
+
) -> dict[ColumnHandle, str]:
|
|
206
192
|
"""
|
|
207
193
|
Verifies that the specified `col_mapping` is valid. In particular, checks that:
|
|
208
194
|
(i) the keys of `col_mapping` are valid columns of the specified `Table`;
|
|
@@ -212,6 +198,7 @@ class Project(ExternalStore, abc.ABC):
|
|
|
212
198
|
external (import or export) columns.
|
|
213
199
|
If validation fails, an exception will be raised. If validation succeeds, a new mapping will be returned
|
|
214
200
|
in which the Pixeltable column names are resolved to the corresponding `Column` objects.
|
|
201
|
+
TODO: return columns as names or qualified ids
|
|
215
202
|
"""
|
|
216
203
|
from pixeltable import exprs
|
|
217
204
|
|
|
@@ -219,32 +206,34 @@ class Project(ExternalStore, abc.ABC):
|
|
|
219
206
|
if col_mapping is None:
|
|
220
207
|
col_mapping = {col: col for col in itertools.chain(export_cols.keys(), import_cols.keys())}
|
|
221
208
|
|
|
222
|
-
resolved_col_mapping: dict[
|
|
209
|
+
resolved_col_mapping: dict[ColumnHandle, str] = {}
|
|
223
210
|
|
|
224
211
|
# Validate names
|
|
225
|
-
t_cols = set(table.
|
|
212
|
+
t_cols = set(table._get_schema().keys())
|
|
226
213
|
for t_col, ext_col in col_mapping.items():
|
|
227
214
|
if t_col not in t_cols:
|
|
228
215
|
if is_user_specified_col_mapping:
|
|
229
216
|
raise excs.Error(
|
|
230
|
-
f'Column name
|
|
217
|
+
f'Column name {t_col!r} appears as a key in `col_mapping`, but {table._display_str()} '
|
|
231
218
|
'contains no such column.'
|
|
232
219
|
)
|
|
233
220
|
else:
|
|
234
221
|
raise excs.Error(
|
|
235
|
-
f'Column
|
|
236
|
-
f'or specify a `col_mapping` to associate a different column with
|
|
222
|
+
f'Column {t_col!r} does not exist in {table._display_str()}. Either add a column {t_col!r}, '
|
|
223
|
+
f'or specify a `col_mapping` to associate a different column with '
|
|
224
|
+
f'the external field {ext_col!r}.'
|
|
237
225
|
)
|
|
238
226
|
if ext_col not in export_cols and ext_col not in import_cols:
|
|
239
227
|
raise excs.Error(
|
|
240
|
-
f'Column name
|
|
241
|
-
f'configuration has no column
|
|
228
|
+
f'Column name {ext_col!r} appears as a value in `col_mapping`, but the external store '
|
|
229
|
+
f'configuration has no column {ext_col!r}.'
|
|
242
230
|
)
|
|
243
231
|
col_ref = table[t_col]
|
|
244
232
|
assert isinstance(col_ref, exprs.ColumnRef)
|
|
245
|
-
resolved_col_mapping[col_ref.col] = ext_col
|
|
233
|
+
resolved_col_mapping[col_ref.col.handle] = ext_col
|
|
234
|
+
|
|
246
235
|
# Validate column specs
|
|
247
|
-
t_col_types = table.
|
|
236
|
+
t_col_types = table._get_schema()
|
|
248
237
|
for t_col, ext_col in col_mapping.items():
|
|
249
238
|
t_col_type = t_col_types[t_col]
|
|
250
239
|
if ext_col in export_cols:
|
|
@@ -252,65 +241,34 @@ class Project(ExternalStore, abc.ABC):
|
|
|
252
241
|
ext_col_type = export_cols[ext_col]
|
|
253
242
|
if not ext_col_type.is_supertype_of(t_col_type, ignore_nullable=True):
|
|
254
243
|
raise excs.Error(
|
|
255
|
-
f'Column
|
|
244
|
+
f'Column {t_col!r} cannot be exported to external column {ext_col!r} '
|
|
245
|
+
f'(incompatible types; expecting `{ext_col_type}`)'
|
|
256
246
|
)
|
|
257
247
|
if ext_col in import_cols:
|
|
258
248
|
# Validate that the external column can be assigned to the table column
|
|
259
249
|
if table._tbl_version_path.get_column(t_col).is_computed:
|
|
260
250
|
raise excs.Error(
|
|
261
|
-
f'Column
|
|
251
|
+
f'Column {t_col!r} is a computed column, which cannot be populated from an external column'
|
|
262
252
|
)
|
|
263
253
|
ext_col_type = import_cols[ext_col]
|
|
264
254
|
if not t_col_type.is_supertype_of(ext_col_type, ignore_nullable=True):
|
|
265
255
|
raise excs.Error(
|
|
266
|
-
f'Column
|
|
256
|
+
f'Column {t_col!r} cannot be imported from external column {ext_col!r} '
|
|
257
|
+
f'(incompatible types; expecting `{ext_col_type}`)'
|
|
267
258
|
)
|
|
268
259
|
return resolved_col_mapping
|
|
269
260
|
|
|
270
|
-
@classmethod
|
|
271
|
-
def _column_as_dict(cls, col: Column) -> dict[str, Any]:
|
|
272
|
-
return {'tbl_id': str(col.tbl.id), 'col_id': col.id}
|
|
273
|
-
|
|
274
|
-
@classmethod
|
|
275
|
-
def _column_from_dict(cls, d: dict[str, Any]) -> Column:
|
|
276
|
-
from pixeltable.catalog import Catalog
|
|
277
|
-
|
|
278
|
-
tbl_id = UUID(d['tbl_id'])
|
|
279
|
-
col_id = d['col_id']
|
|
280
|
-
return Catalog.get().tbl_versions[(tbl_id, None)].cols_by_id[col_id]
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
@dataclass(frozen=True)
|
|
284
|
-
class SyncStatus:
|
|
285
|
-
external_rows_created: int = 0
|
|
286
|
-
external_rows_deleted: int = 0
|
|
287
|
-
external_rows_updated: int = 0
|
|
288
|
-
pxt_rows_updated: int = 0
|
|
289
|
-
num_excs: int = 0
|
|
290
|
-
|
|
291
|
-
def combine(self, other: 'SyncStatus') -> 'SyncStatus':
|
|
292
|
-
return SyncStatus(
|
|
293
|
-
external_rows_created=self.external_rows_created + other.external_rows_created,
|
|
294
|
-
external_rows_deleted=self.external_rows_deleted + other.external_rows_deleted,
|
|
295
|
-
external_rows_updated=self.external_rows_updated + other.external_rows_updated,
|
|
296
|
-
pxt_rows_updated=self.pxt_rows_updated + other.pxt_rows_updated,
|
|
297
|
-
num_excs=self.num_excs + other.num_excs
|
|
298
|
-
)
|
|
299
|
-
|
|
300
|
-
@classmethod
|
|
301
|
-
def empty(cls) -> 'SyncStatus':
|
|
302
|
-
return SyncStatus(0, 0, 0, 0, 0)
|
|
303
|
-
|
|
304
261
|
|
|
305
262
|
class MockProject(Project):
|
|
306
263
|
"""A project that cannot be synced, used mainly for testing."""
|
|
264
|
+
|
|
307
265
|
def __init__(
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
266
|
+
self,
|
|
267
|
+
name: str,
|
|
268
|
+
export_cols: dict[str, ts.ColumnType],
|
|
269
|
+
import_cols: dict[str, ts.ColumnType],
|
|
270
|
+
col_mapping: dict[ColumnHandle, str],
|
|
271
|
+
stored_proxies: dict[ColumnHandle, ColumnHandle] | None = None,
|
|
314
272
|
):
|
|
315
273
|
super().__init__(name, col_mapping, stored_proxies)
|
|
316
274
|
self.export_cols = export_cols
|
|
@@ -319,12 +277,12 @@ class MockProject(Project):
|
|
|
319
277
|
|
|
320
278
|
@classmethod
|
|
321
279
|
def create(
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
280
|
+
cls,
|
|
281
|
+
t: Table,
|
|
282
|
+
name: str,
|
|
283
|
+
export_cols: dict[str, ts.ColumnType],
|
|
284
|
+
import_cols: dict[str, ts.ColumnType],
|
|
285
|
+
col_mapping: dict[str, str] | None = None,
|
|
328
286
|
) -> 'MockProject':
|
|
329
287
|
col_mapping = cls.validate_columns(t, export_cols, import_cols, col_mapping)
|
|
330
288
|
return cls(name, export_cols, import_cols, col_mapping)
|
|
@@ -335,7 +293,7 @@ class MockProject(Project):
|
|
|
335
293
|
def get_import_columns(self) -> dict[str, ts.ColumnType]:
|
|
336
294
|
return self.import_cols
|
|
337
295
|
|
|
338
|
-
def sync(self, t: Table, export_data: bool, import_data: bool) ->
|
|
296
|
+
def sync(self, t: Table, export_data: bool, import_data: bool) -> UpdateStatus:
|
|
339
297
|
raise NotImplementedError()
|
|
340
298
|
|
|
341
299
|
def delete(self) -> None:
|
|
@@ -350,8 +308,8 @@ class MockProject(Project):
|
|
|
350
308
|
'name': self.name,
|
|
351
309
|
'export_cols': {k: v.as_dict() for k, v in self.export_cols.items()},
|
|
352
310
|
'import_cols': {k: v.as_dict() for k, v in self.import_cols.items()},
|
|
353
|
-
'col_mapping': [[
|
|
354
|
-
'stored_proxies': [[
|
|
311
|
+
'col_mapping': [[k.as_dict(), v] for k, v in self.col_mapping.items()],
|
|
312
|
+
'stored_proxies': [[k.as_dict(), v.as_dict()] for k, v in self.stored_proxies.items()],
|
|
355
313
|
}
|
|
356
314
|
|
|
357
315
|
@classmethod
|
|
@@ -360,11 +318,11 @@ class MockProject(Project):
|
|
|
360
318
|
md['name'],
|
|
361
319
|
{k: ts.ColumnType.from_dict(v) for k, v in md['export_cols'].items()},
|
|
362
320
|
{k: ts.ColumnType.from_dict(v) for k, v in md['import_cols'].items()},
|
|
363
|
-
{
|
|
364
|
-
{
|
|
321
|
+
{ColumnHandle.from_dict(entry[0]): entry[1] for entry in md['col_mapping']},
|
|
322
|
+
{ColumnHandle.from_dict(entry[0]): ColumnHandle.from_dict(entry[1]) for entry in md['stored_proxies']},
|
|
365
323
|
)
|
|
366
324
|
|
|
367
|
-
def __eq__(self, other:
|
|
325
|
+
def __eq__(self, other: object) -> bool:
|
|
368
326
|
if not isinstance(other, MockProject):
|
|
369
327
|
return False
|
|
370
328
|
return self.name == other.name
|
pixeltable/io/fiftyone.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import os
|
|
2
|
-
from typing import
|
|
2
|
+
from typing import Any, Iterator
|
|
3
3
|
|
|
4
4
|
import fiftyone as fo # type: ignore[import-untyped]
|
|
5
5
|
import fiftyone.utils.data as foud # type: ignore[import-untyped]
|
|
@@ -9,17 +9,18 @@ import puremagic
|
|
|
9
9
|
import pixeltable as pxt
|
|
10
10
|
import pixeltable.exceptions as excs
|
|
11
11
|
from pixeltable import exprs
|
|
12
|
-
from pixeltable.
|
|
12
|
+
from pixeltable.utils.local_store import TempStore
|
|
13
13
|
|
|
14
14
|
|
|
15
15
|
class PxtImageDatasetImporter(foud.LabeledImageDatasetImporter):
|
|
16
16
|
"""
|
|
17
17
|
Implementation of a FiftyOne `DatasetImporter` that reads image data from a Pixeltable table.
|
|
18
18
|
"""
|
|
19
|
+
|
|
19
20
|
__image_format: str # format to use for any exported images that are not already stored on disk
|
|
20
21
|
__labels: dict[str, tuple[exprs.Expr, type[fo.Label]]] # label_name -> (expr, label_cls)
|
|
21
22
|
__image_idx: int # index of the image expr in the select list
|
|
22
|
-
__localpath_idx:
|
|
23
|
+
__localpath_idx: int | None # index of the image localpath in the select list, if present
|
|
23
24
|
__row_iter: Iterator[list] # iterator over the table rows, to be convered to FiftyOne samples
|
|
24
25
|
|
|
25
26
|
def __init__(
|
|
@@ -27,19 +28,14 @@ class PxtImageDatasetImporter(foud.LabeledImageDatasetImporter):
|
|
|
27
28
|
tbl: pxt.Table,
|
|
28
29
|
image: exprs.Expr,
|
|
29
30
|
image_format: str,
|
|
30
|
-
classifications:
|
|
31
|
-
detections:
|
|
32
|
-
dataset_dir:
|
|
31
|
+
classifications: exprs.Expr | list[exprs.Expr] | dict[str, exprs.Expr] | None = None,
|
|
32
|
+
detections: exprs.Expr | list[exprs.Expr] | dict[str, exprs.Expr] | None = None,
|
|
33
|
+
dataset_dir: os.PathLike | None = None,
|
|
33
34
|
shuffle: bool = False,
|
|
34
|
-
seed:
|
|
35
|
-
max_samples:
|
|
35
|
+
seed: int | float | str | bytes | bytearray | None = None,
|
|
36
|
+
max_samples: int | None = None,
|
|
36
37
|
):
|
|
37
|
-
super().__init__(
|
|
38
|
-
dataset_dir=dataset_dir,
|
|
39
|
-
shuffle=shuffle,
|
|
40
|
-
seed=seed,
|
|
41
|
-
max_samples=max_samples
|
|
42
|
-
)
|
|
38
|
+
super().__init__(dataset_dir=dataset_dir, shuffle=shuffle, seed=seed, max_samples=max_samples)
|
|
43
39
|
|
|
44
40
|
self.__image_format = image_format
|
|
45
41
|
|
|
@@ -54,19 +50,18 @@ class PxtImageDatasetImporter(foud.LabeledImageDatasetImporter):
|
|
|
54
50
|
if isinstance(exprs_, dict):
|
|
55
51
|
for label_name, expr in exprs_.items():
|
|
56
52
|
if not label_name.isidentifier():
|
|
57
|
-
raise excs.Error(f
|
|
53
|
+
raise excs.Error(f'Invalid label name: {label_name}')
|
|
58
54
|
if label_name in self.__labels:
|
|
59
|
-
raise excs.Error(f
|
|
55
|
+
raise excs.Error(f'Duplicate label name: {label_name}')
|
|
60
56
|
self.__labels[label_name] = (expr, label_cls)
|
|
61
57
|
|
|
62
58
|
# Now add the remaining labels, assigning unused default names.
|
|
63
59
|
for exprs_, label_cls, default_name in label_categories:
|
|
64
60
|
if exprs_ is None or isinstance(exprs_, dict):
|
|
65
61
|
continue
|
|
66
|
-
if isinstance(exprs_, exprs.Expr)
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
for expr in exprs_:
|
|
62
|
+
exprs_list = [exprs_] if isinstance(exprs_, exprs.Expr) else exprs_
|
|
63
|
+
assert isinstance(exprs_list, list)
|
|
64
|
+
for expr in exprs_list:
|
|
70
65
|
if default_name not in self.__labels:
|
|
71
66
|
name = default_name
|
|
72
67
|
else:
|
|
@@ -92,10 +87,10 @@ class PxtImageDatasetImporter(foud.LabeledImageDatasetImporter):
|
|
|
92
87
|
else:
|
|
93
88
|
self.__localpath_idx = None
|
|
94
89
|
|
|
95
|
-
|
|
96
|
-
self.__row_iter =
|
|
90
|
+
query = tbl.select(*selection)
|
|
91
|
+
self.__row_iter = query._output_row_iterator()
|
|
97
92
|
|
|
98
|
-
def __next__(self) -> tuple[str,
|
|
93
|
+
def __next__(self) -> tuple[str, fo.ImageMetadata | None, dict[str, fo.Label] | None]:
|
|
99
94
|
row = next(self.__row_iter)
|
|
100
95
|
img = row[self.__image_idx]
|
|
101
96
|
assert isinstance(img, PIL.Image.Image)
|
|
@@ -105,7 +100,7 @@ class PxtImageDatasetImporter(foud.LabeledImageDatasetImporter):
|
|
|
105
100
|
assert isinstance(file, str)
|
|
106
101
|
else:
|
|
107
102
|
# Write the dynamically created image to a temp file
|
|
108
|
-
file =
|
|
103
|
+
file = TempStore.create_path(extension=f'.{self.__image_format}')
|
|
109
104
|
img.save(file, format=self.__image_format)
|
|
110
105
|
|
|
111
106
|
metadata = fo.ImageMetadata(
|
|
@@ -113,7 +108,7 @@ class PxtImageDatasetImporter(foud.LabeledImageDatasetImporter):
|
|
|
113
108
|
mime_type=puremagic.from_file(file, mime=True),
|
|
114
109
|
width=img.width,
|
|
115
110
|
height=img.height,
|
|
116
|
-
filepath=file,
|
|
111
|
+
filepath=str(file),
|
|
117
112
|
num_channels=len(img.getbands()),
|
|
118
113
|
)
|
|
119
114
|
|
|
@@ -129,7 +124,7 @@ class PxtImageDatasetImporter(foud.LabeledImageDatasetImporter):
|
|
|
129
124
|
elif label_cls is fo.Detections:
|
|
130
125
|
label = fo.Detections(detections=self.__as_fo_detections(label_data))
|
|
131
126
|
else:
|
|
132
|
-
|
|
127
|
+
raise AssertionError()
|
|
133
128
|
labels[label_name] = label
|
|
134
129
|
|
|
135
130
|
return file, metadata, labels
|
|
@@ -137,13 +132,9 @@ class PxtImageDatasetImporter(foud.LabeledImageDatasetImporter):
|
|
|
137
132
|
def __as_fo_classifications(self, data: list) -> list[fo.Classification]:
|
|
138
133
|
if not isinstance(data, list) or any('label' not in entry for entry in data):
|
|
139
134
|
raise excs.Error(
|
|
140
|
-
f
|
|
141
|
-
"(Expected a list of dicts, each containing a 'label' key)"
|
|
135
|
+
f"Invalid classifications data: {data}\n(Expected a list of dicts, each containing a 'label' key)"
|
|
142
136
|
)
|
|
143
|
-
return [
|
|
144
|
-
fo.Classification(label=entry['label'], confidence=entry.get('confidence'))
|
|
145
|
-
for entry in data
|
|
146
|
-
]
|
|
137
|
+
return [fo.Classification(label=entry['label'], confidence=entry.get('confidence')) for entry in data]
|
|
147
138
|
|
|
148
139
|
def __as_fo_detections(self, data: list) -> list[fo.Detections]:
|
|
149
140
|
if not isinstance(data, list) or any('label' not in entry or 'bounding_box' not in entry for entry in data):
|
|
@@ -174,5 +165,5 @@ class PxtImageDatasetImporter(foud.LabeledImageDatasetImporter):
|
|
|
174
165
|
def get_dataset_info(self) -> dict:
|
|
175
166
|
pass
|
|
176
167
|
|
|
177
|
-
def close(self, *args) -> None:
|
|
168
|
+
def close(self, *args: Any) -> None:
|
|
178
169
|
pass
|