pixeltable 0.3.14__py3-none-any.whl → 0.4.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__version__.py +2 -2
- pixeltable/catalog/catalog.py +292 -105
- pixeltable/catalog/column.py +10 -8
- pixeltable/catalog/dir.py +1 -2
- pixeltable/catalog/insertable_table.py +25 -20
- pixeltable/catalog/schema_object.py +3 -6
- pixeltable/catalog/table.py +245 -189
- pixeltable/catalog/table_version.py +319 -201
- pixeltable/catalog/table_version_handle.py +15 -2
- pixeltable/catalog/table_version_path.py +60 -21
- pixeltable/catalog/view.py +14 -5
- pixeltable/dataframe.py +11 -9
- pixeltable/env.py +2 -4
- pixeltable/exec/in_memory_data_node.py +1 -1
- pixeltable/exec/sql_node.py +20 -11
- pixeltable/exprs/column_property_ref.py +15 -6
- pixeltable/exprs/column_ref.py +32 -11
- pixeltable/exprs/comparison.py +1 -1
- pixeltable/exprs/row_builder.py +4 -6
- pixeltable/exprs/rowid_ref.py +8 -0
- pixeltable/exprs/similarity_expr.py +1 -0
- pixeltable/func/query_template_function.py +1 -1
- pixeltable/functions/gemini.py +166 -33
- pixeltable/functions/math.py +63 -0
- pixeltable/functions/string.py +212 -58
- pixeltable/globals.py +7 -4
- pixeltable/index/base.py +5 -0
- pixeltable/index/btree.py +5 -0
- pixeltable/index/embedding_index.py +5 -0
- pixeltable/io/external_store.py +8 -29
- pixeltable/io/label_studio.py +1 -1
- pixeltable/io/parquet.py +4 -4
- pixeltable/io/table_data_conduit.py +0 -31
- pixeltable/metadata/__init__.py +1 -1
- pixeltable/metadata/converters/convert_13.py +2 -2
- pixeltable/metadata/converters/convert_30.py +6 -11
- pixeltable/metadata/converters/convert_35.py +9 -0
- pixeltable/metadata/converters/util.py +3 -9
- pixeltable/metadata/notes.py +1 -0
- pixeltable/metadata/schema.py +5 -1
- pixeltable/plan.py +4 -4
- pixeltable/share/packager.py +207 -15
- pixeltable/share/publish.py +2 -2
- pixeltable/store.py +31 -13
- pixeltable/utils/dbms.py +1 -1
- {pixeltable-0.3.14.dist-info → pixeltable-0.4.0rc1.dist-info}/METADATA +1 -1
- {pixeltable-0.3.14.dist-info → pixeltable-0.4.0rc1.dist-info}/RECORD +50 -49
- {pixeltable-0.3.14.dist-info → pixeltable-0.4.0rc1.dist-info}/LICENSE +0 -0
- {pixeltable-0.3.14.dist-info → pixeltable-0.4.0rc1.dist-info}/WHEEL +0 -0
- {pixeltable-0.3.14.dist-info → pixeltable-0.4.0rc1.dist-info}/entry_points.txt +0 -0
pixeltable/catalog/column.py
CHANGED
|
@@ -15,7 +15,6 @@ from .globals import MediaValidation, is_valid_identifier
|
|
|
15
15
|
|
|
16
16
|
if TYPE_CHECKING:
|
|
17
17
|
from .table_version import TableVersion
|
|
18
|
-
from .table_version_handle import TableVersionHandle
|
|
19
18
|
from .table_version_path import TableVersionPath
|
|
20
19
|
|
|
21
20
|
_logger = logging.getLogger('pixeltable')
|
|
@@ -44,7 +43,10 @@ class Column:
|
|
|
44
43
|
_value_expr: Optional[exprs.Expr]
|
|
45
44
|
value_expr_dict: Optional[dict[str, Any]]
|
|
46
45
|
dependent_cols: set[Column]
|
|
47
|
-
|
|
46
|
+
# we store a TableVersion here, not a TableVersionHandle, because this column is owned by that TableVersion instance
|
|
47
|
+
# (re-resolving it later to a different instance doesn't make sense)
|
|
48
|
+
tbl: Optional[TableVersion]
|
|
49
|
+
# tbl: Optional[TableVersionHandle]
|
|
48
50
|
|
|
49
51
|
def __init__(
|
|
50
52
|
self,
|
|
@@ -138,7 +140,7 @@ class Column:
|
|
|
138
140
|
message = (
|
|
139
141
|
dedent(
|
|
140
142
|
f"""
|
|
141
|
-
The computed column {self.name!r} in table {self.tbl.
|
|
143
|
+
The computed column {self.name!r} in table {self.tbl.name!r} is no longer valid.
|
|
142
144
|
{{validation_error}}
|
|
143
145
|
You can continue to query existing data from this column, but evaluating it on new data will raise an error.
|
|
144
146
|
""" # noqa: E501
|
|
@@ -175,8 +177,8 @@ class Column:
|
|
|
175
177
|
# multiple dependents)
|
|
176
178
|
def get_idx_info(self, reference_tbl: Optional['TableVersionPath'] = None) -> dict[str, 'TableVersion.IndexInfo']:
|
|
177
179
|
assert self.tbl is not None
|
|
178
|
-
tbl = reference_tbl.tbl_version if reference_tbl is not None else self.tbl
|
|
179
|
-
return {name: info for name, info in tbl.
|
|
180
|
+
tbl = reference_tbl.tbl_version.get() if reference_tbl is not None else self.tbl
|
|
181
|
+
return {name: info for name, info in tbl.idxs_by_name.items() if info.col == self}
|
|
180
182
|
|
|
181
183
|
@property
|
|
182
184
|
def is_computed(self) -> bool:
|
|
@@ -199,14 +201,14 @@ class Column:
|
|
|
199
201
|
@property
|
|
200
202
|
def qualified_name(self) -> str:
|
|
201
203
|
assert self.tbl is not None
|
|
202
|
-
return f'{self.tbl.
|
|
204
|
+
return f'{self.tbl.name}.{self.name}'
|
|
203
205
|
|
|
204
206
|
@property
|
|
205
207
|
def media_validation(self) -> MediaValidation:
|
|
206
208
|
if self._media_validation is not None:
|
|
207
209
|
return self._media_validation
|
|
208
210
|
assert self.tbl is not None
|
|
209
|
-
return self.tbl.
|
|
211
|
+
return self.tbl.media_validation
|
|
210
212
|
|
|
211
213
|
@property
|
|
212
214
|
def is_required_for_insert(self) -> bool:
|
|
@@ -256,7 +258,7 @@ class Column:
|
|
|
256
258
|
return f'{self.name}: {self.col_type}'
|
|
257
259
|
|
|
258
260
|
def __repr__(self) -> str:
|
|
259
|
-
return f'Column({self.id!r}, {self.name!r}, tbl={self.tbl.
|
|
261
|
+
return f'Column({self.id!r}, {self.name!r}, tbl={self.tbl.name!r})'
|
|
260
262
|
|
|
261
263
|
def __hash__(self) -> int:
|
|
262
264
|
# TODO(aaron-siegel): This and __eq__ do not capture the table version. We need to rethink the Column
|
pixeltable/catalog/dir.py
CHANGED
|
@@ -38,13 +38,12 @@ class Dir(SchemaObject):
|
|
|
38
38
|
def _display_name(cls) -> str:
|
|
39
39
|
return 'directory'
|
|
40
40
|
|
|
41
|
-
@property
|
|
42
41
|
def _path(self) -> str:
|
|
43
42
|
"""Returns the path to this schema object."""
|
|
44
43
|
if self._dir_id is None:
|
|
45
44
|
# we're the root dir
|
|
46
45
|
return ''
|
|
47
|
-
return super()._path
|
|
46
|
+
return super()._path()
|
|
48
47
|
|
|
49
48
|
def _move(self, new_name: str, new_dir_id: UUID) -> None:
|
|
50
49
|
# print(
|
|
@@ -138,37 +138,40 @@ class InsertableTable(Table):
|
|
|
138
138
|
print_stats: bool = False,
|
|
139
139
|
**kwargs: Any,
|
|
140
140
|
) -> UpdateStatus:
|
|
141
|
+
from pixeltable.catalog import Catalog
|
|
141
142
|
from pixeltable.io.table_data_conduit import UnkTableDataConduit
|
|
142
143
|
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
source
|
|
146
|
-
|
|
144
|
+
with Catalog.get().begin_xact(tbl_id=self._id, for_write=True):
|
|
145
|
+
table = self
|
|
146
|
+
if source is None:
|
|
147
|
+
source = [kwargs]
|
|
148
|
+
kwargs = None
|
|
147
149
|
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
150
|
+
tds = UnkTableDataConduit(
|
|
151
|
+
source, source_format=source_format, src_schema_overrides=schema_overrides, extra_fields=kwargs
|
|
152
|
+
)
|
|
153
|
+
data_source = tds.specialize()
|
|
154
|
+
if data_source.source_column_map is None:
|
|
155
|
+
data_source.src_pk = []
|
|
154
156
|
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
157
|
+
assert isinstance(table, Table)
|
|
158
|
+
data_source.add_table_info(table)
|
|
159
|
+
data_source.prepare_for_insert_into_table()
|
|
158
160
|
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
161
|
+
fail_on_exception = OnErrorParameter.fail_on_exception(on_error)
|
|
162
|
+
return table.insert_table_data_source(
|
|
163
|
+
data_source=data_source, fail_on_exception=fail_on_exception, print_stats=print_stats
|
|
164
|
+
)
|
|
163
165
|
|
|
164
166
|
def insert_table_data_source(
|
|
165
167
|
self, data_source: TableDataConduit, fail_on_exception: bool, print_stats: bool = False
|
|
166
168
|
) -> pxt.UpdateStatus:
|
|
167
169
|
"""Insert row batches into this table from a `TableDataConduit`."""
|
|
170
|
+
from pixeltable.catalog import Catalog
|
|
168
171
|
from pixeltable.io.table_data_conduit import DFTableDataConduit
|
|
169
172
|
|
|
170
173
|
status = pxt.UpdateStatus()
|
|
171
|
-
with
|
|
174
|
+
with Catalog.get().begin_xact(tbl_id=self._id, for_write=True):
|
|
172
175
|
if isinstance(data_source, DFTableDataConduit):
|
|
173
176
|
status += self._tbl_version.get().insert(
|
|
174
177
|
rows=None, df=data_source.pxt_df, print_stats=print_stats, fail_on_exception=fail_on_exception
|
|
@@ -226,7 +229,9 @@ class InsertableTable(Table):
|
|
|
226
229
|
|
|
227
230
|
>>> tbl.delete(tbl.a > 5)
|
|
228
231
|
"""
|
|
229
|
-
|
|
232
|
+
from pixeltable.catalog import Catalog
|
|
233
|
+
|
|
234
|
+
with Catalog.get().begin_xact(tbl_id=self._id, for_write=True):
|
|
230
235
|
return self._tbl_version.get().delete(where=where)
|
|
231
236
|
|
|
232
237
|
@property
|
|
@@ -238,4 +243,4 @@ class InsertableTable(Table):
|
|
|
238
243
|
return []
|
|
239
244
|
|
|
240
245
|
def _table_descriptor(self) -> str:
|
|
241
|
-
return f'Table {self._path!r}'
|
|
246
|
+
return f'Table {self._path()!r}'
|
|
@@ -2,8 +2,6 @@ from abc import abstractmethod
|
|
|
2
2
|
from typing import TYPE_CHECKING, Any, Optional
|
|
3
3
|
from uuid import UUID
|
|
4
4
|
|
|
5
|
-
from pixeltable.env import Env
|
|
6
|
-
|
|
7
5
|
if TYPE_CHECKING:
|
|
8
6
|
from pixeltable import catalog
|
|
9
7
|
|
|
@@ -28,24 +26,23 @@ class SchemaObject:
|
|
|
28
26
|
"""Returns the parent directory of this schema object."""
|
|
29
27
|
from .catalog import Catalog
|
|
30
28
|
|
|
31
|
-
with
|
|
29
|
+
with Catalog.get().begin_xact(for_write=False):
|
|
32
30
|
if self._dir_id is None:
|
|
33
31
|
return None
|
|
34
32
|
return Catalog.get().get_dir(self._dir_id)
|
|
35
33
|
|
|
36
|
-
@property
|
|
37
34
|
def _path(self) -> str:
|
|
38
35
|
"""Returns the path to this schema object."""
|
|
39
36
|
from .catalog import Catalog
|
|
40
37
|
|
|
41
38
|
assert self._dir_id is not None
|
|
42
|
-
with
|
|
39
|
+
with Catalog.get().begin_xact(for_write=False):
|
|
43
40
|
path = Catalog.get().get_dir_path(self._dir_id)
|
|
44
41
|
return str(path.append(self._name))
|
|
45
42
|
|
|
46
43
|
def get_metadata(self) -> dict[str, Any]:
|
|
47
44
|
"""Returns metadata associated with this schema object."""
|
|
48
|
-
return {'name': self._name, 'path': self._path}
|
|
45
|
+
return {'name': self._name, 'path': self._path()}
|
|
49
46
|
|
|
50
47
|
@classmethod
|
|
51
48
|
@abstractmethod
|