pixeltable 0.2.25__py3-none-any.whl → 0.2.27__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__version__.py +2 -2
- pixeltable/catalog/table.py +118 -44
- pixeltable/catalog/view.py +2 -2
- pixeltable/dataframe.py +240 -92
- pixeltable/env.py +8 -1
- pixeltable/exec/__init__.py +1 -1
- pixeltable/exec/exec_node.py +6 -7
- pixeltable/exec/sql_node.py +91 -44
- pixeltable/exprs/__init__.py +1 -0
- pixeltable/exprs/arithmetic_expr.py +1 -1
- pixeltable/exprs/array_slice.py +1 -1
- pixeltable/exprs/column_property_ref.py +1 -1
- pixeltable/exprs/column_ref.py +29 -2
- pixeltable/exprs/comparison.py +1 -1
- pixeltable/exprs/compound_predicate.py +1 -1
- pixeltable/exprs/expr.py +11 -5
- pixeltable/exprs/expr_set.py +8 -0
- pixeltable/exprs/function_call.py +14 -11
- pixeltable/exprs/in_predicate.py +1 -1
- pixeltable/exprs/inline_expr.py +3 -3
- pixeltable/exprs/is_null.py +1 -1
- pixeltable/exprs/json_mapper.py +1 -1
- pixeltable/exprs/json_path.py +1 -1
- pixeltable/exprs/method_ref.py +1 -1
- pixeltable/exprs/rowid_ref.py +1 -1
- pixeltable/exprs/similarity_expr.py +1 -1
- pixeltable/exprs/sql_element_cache.py +4 -0
- pixeltable/exprs/type_cast.py +2 -2
- pixeltable/exprs/variable.py +3 -0
- pixeltable/func/expr_template_function.py +3 -0
- pixeltable/functions/__init__.py +2 -2
- pixeltable/functions/gemini.py +85 -0
- pixeltable/functions/ollama.py +4 -4
- pixeltable/globals.py +4 -1
- pixeltable/io/__init__.py +1 -1
- pixeltable/io/parquet.py +39 -19
- pixeltable/iterators/document.py +12 -0
- pixeltable/metadata/__init__.py +1 -1
- pixeltable/metadata/converters/convert_22.py +17 -0
- pixeltable/metadata/notes.py +1 -0
- pixeltable/plan.py +128 -50
- pixeltable/store.py +1 -1
- pixeltable/type_system.py +2 -1
- pixeltable/utils/arrow.py +8 -3
- pixeltable/utils/description_helper.py +89 -0
- pixeltable/utils/documents.py +14 -0
- {pixeltable-0.2.25.dist-info → pixeltable-0.2.27.dist-info}/METADATA +26 -10
- {pixeltable-0.2.25.dist-info → pixeltable-0.2.27.dist-info}/RECORD +51 -48
- {pixeltable-0.2.25.dist-info → pixeltable-0.2.27.dist-info}/WHEEL +1 -1
- {pixeltable-0.2.25.dist-info → pixeltable-0.2.27.dist-info}/LICENSE +0 -0
- {pixeltable-0.2.25.dist-info → pixeltable-0.2.27.dist-info}/entry_points.txt +0 -0
pixeltable/__version__.py
CHANGED
|
@@ -1,3 +1,3 @@
|
|
|
1
1
|
# These version placeholders will be replaced during build.
|
|
2
|
-
__version__ = "0.2.
|
|
3
|
-
__version_tuple__ = (0, 2,
|
|
2
|
+
__version__ = "0.2.27"
|
|
3
|
+
__version_tuple__ = (0, 2, 27)
|
pixeltable/catalog/table.py
CHANGED
|
@@ -10,7 +10,6 @@ from typing import TYPE_CHECKING, Any, Callable, Iterable, Literal, Optional, Se
|
|
|
10
10
|
from uuid import UUID
|
|
11
11
|
|
|
12
12
|
import pandas as pd
|
|
13
|
-
import pandas.io.formats.style
|
|
14
13
|
import sqlalchemy as sql
|
|
15
14
|
|
|
16
15
|
import pixeltable as pxt
|
|
@@ -21,17 +20,19 @@ import pixeltable.exprs as exprs
|
|
|
21
20
|
import pixeltable.index as index
|
|
22
21
|
import pixeltable.metadata.schema as schema
|
|
23
22
|
import pixeltable.type_system as ts
|
|
24
|
-
from pixeltable.utils.filecache import FileCache
|
|
25
23
|
|
|
24
|
+
from ..exprs import ColumnRef
|
|
25
|
+
from ..utils.description_helper import DescriptionHelper
|
|
26
|
+
from ..utils.filecache import FileCache
|
|
26
27
|
from .column import Column
|
|
27
|
-
from .globals import _ROWID_COLUMN_NAME, UpdateStatus, is_system_column_name, is_valid_identifier
|
|
28
|
+
from .globals import _ROWID_COLUMN_NAME, MediaValidation, UpdateStatus, is_system_column_name, is_valid_identifier
|
|
28
29
|
from .schema_object import SchemaObject
|
|
29
30
|
from .table_version import TableVersion
|
|
30
31
|
from .table_version_path import TableVersionPath
|
|
31
|
-
from ..exprs import ColumnRef
|
|
32
32
|
|
|
33
33
|
if TYPE_CHECKING:
|
|
34
34
|
import torch.utils.data
|
|
35
|
+
import pixeltable.plan
|
|
35
36
|
|
|
36
37
|
_logger = logging.getLogger('pixeltable')
|
|
37
38
|
|
|
@@ -46,7 +47,7 @@ class Table(SchemaObject):
|
|
|
46
47
|
def __init__(self, id: UUID, dir_id: UUID, name: str, tbl_version_path: TableVersionPath):
|
|
47
48
|
super().__init__(id, name, dir_id)
|
|
48
49
|
self._is_dropped = False
|
|
49
|
-
self.
|
|
50
|
+
self.__tbl_version_path = tbl_version_path
|
|
50
51
|
self.__query_scope = self.QueryScope(self)
|
|
51
52
|
|
|
52
53
|
class QueryScope:
|
|
@@ -63,6 +64,7 @@ class Table(SchemaObject):
|
|
|
63
64
|
raise AttributeError(f'Table {self.__table._name!r} has no query with that name: {name!r}')
|
|
64
65
|
|
|
65
66
|
def _move(self, new_name: str, new_dir_id: UUID) -> None:
|
|
67
|
+
self._check_is_dropped()
|
|
66
68
|
super()._move(new_name, new_dir_id)
|
|
67
69
|
with env.Env.get().engine.begin() as conn:
|
|
68
70
|
stmt = sql.text((
|
|
@@ -96,6 +98,7 @@ class Table(SchemaObject):
|
|
|
96
98
|
}
|
|
97
99
|
```
|
|
98
100
|
"""
|
|
101
|
+
self._check_is_dropped()
|
|
99
102
|
md = super().get_metadata()
|
|
100
103
|
md['base'] = self._base._path if self._base is not None else None
|
|
101
104
|
md['schema'] = self._schema
|
|
@@ -116,6 +119,12 @@ class Table(SchemaObject):
|
|
|
116
119
|
"""Return TableVersion for just this table."""
|
|
117
120
|
return self._tbl_version_path.tbl_version
|
|
118
121
|
|
|
122
|
+
@property
|
|
123
|
+
def _tbl_version_path(self) -> TableVersionPath:
|
|
124
|
+
"""Return TableVersionPath for just this table."""
|
|
125
|
+
self._check_is_dropped()
|
|
126
|
+
return self.__tbl_version_path
|
|
127
|
+
|
|
119
128
|
def __hash__(self) -> int:
|
|
120
129
|
return hash(self._tbl_version.id)
|
|
121
130
|
|
|
@@ -153,6 +162,7 @@ class Table(SchemaObject):
|
|
|
153
162
|
Returns:
|
|
154
163
|
A list of view paths.
|
|
155
164
|
"""
|
|
165
|
+
self._check_is_dropped()
|
|
156
166
|
return [t._path for t in self._get_views(recursive=recursive)]
|
|
157
167
|
|
|
158
168
|
def _get_views(self, *, recursive: bool = True) -> list['Table']:
|
|
@@ -166,7 +176,8 @@ class Table(SchemaObject):
|
|
|
166
176
|
"""Return a DataFrame for this table.
|
|
167
177
|
"""
|
|
168
178
|
# local import: avoid circular imports
|
|
169
|
-
|
|
179
|
+
from pixeltable.plan import FromClause
|
|
180
|
+
return pxt.DataFrame(FromClause(tbls=[self._tbl_version_path]))
|
|
170
181
|
|
|
171
182
|
@property
|
|
172
183
|
def queries(self) -> 'Table.QueryScope':
|
|
@@ -180,6 +191,13 @@ class Table(SchemaObject):
|
|
|
180
191
|
"""Return a [`DataFrame`][pixeltable.DataFrame] for this table."""
|
|
181
192
|
return self._df().where(pred)
|
|
182
193
|
|
|
194
|
+
def join(
|
|
195
|
+
self, other: 'Table', *, on: Optional['exprs.Expr'] = None,
|
|
196
|
+
how: 'pixeltable.plan.JoinType.LiteralType' = 'inner'
|
|
197
|
+
) -> 'pxt.DataFrame':
|
|
198
|
+
"""Return a [`DataFrame`][pixeltable.DataFrame] for this table."""
|
|
199
|
+
return self._df().join(other, on=on, how=how)
|
|
200
|
+
|
|
183
201
|
def order_by(self, *items: 'exprs.Expr', asc: bool = True) -> 'pxt.DataFrame':
|
|
184
202
|
"""Return a [`DataFrame`][pixeltable.DataFrame] for this table."""
|
|
185
203
|
return self._df().order_by(*items, asc=asc)
|
|
@@ -200,7 +218,6 @@ class Table(SchemaObject):
|
|
|
200
218
|
) -> 'pxt.dataframe.DataFrameResultSet':
|
|
201
219
|
"""Return rows from this table.
|
|
202
220
|
"""
|
|
203
|
-
self._check_is_dropped()
|
|
204
221
|
return self._df().show(*args, **kwargs)
|
|
205
222
|
|
|
206
223
|
def head(
|
|
@@ -246,6 +263,18 @@ class Table(SchemaObject):
|
|
|
246
263
|
base_id = self._tbl_version_path.base.tbl_version.id
|
|
247
264
|
return catalog.Catalog.get().tbls[base_id]
|
|
248
265
|
|
|
266
|
+
@property
|
|
267
|
+
def _bases(self) -> list['Table']:
|
|
268
|
+
"""
|
|
269
|
+
The ancestor list of bases of this table, starting with its immediate base.
|
|
270
|
+
"""
|
|
271
|
+
bases = []
|
|
272
|
+
base = self._base
|
|
273
|
+
while base is not None:
|
|
274
|
+
bases.append(base)
|
|
275
|
+
base = base._base
|
|
276
|
+
return bases
|
|
277
|
+
|
|
249
278
|
@property
|
|
250
279
|
def _comment(self) -> str:
|
|
251
280
|
return self._tbl_version.comment
|
|
@@ -258,47 +287,98 @@ class Table(SchemaObject):
|
|
|
258
287
|
def _media_validation(self) -> MediaValidation:
|
|
259
288
|
return self._tbl_version.media_validation
|
|
260
289
|
|
|
261
|
-
def
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
.
|
|
277
|
-
|
|
290
|
+
def __repr__(self) -> str:
|
|
291
|
+
return self._descriptors().to_string()
|
|
292
|
+
|
|
293
|
+
def _repr_html_(self) -> str:
|
|
294
|
+
return self._descriptors().to_html()
|
|
295
|
+
|
|
296
|
+
def _descriptors(self) -> DescriptionHelper:
|
|
297
|
+
"""
|
|
298
|
+
Constructs a list of descriptors for this table that can be pretty-printed.
|
|
299
|
+
"""
|
|
300
|
+
helper = DescriptionHelper()
|
|
301
|
+
helper.append(self._title_descriptor())
|
|
302
|
+
helper.append(self._col_descriptor())
|
|
303
|
+
idxs = self._index_descriptor()
|
|
304
|
+
if not idxs.empty:
|
|
305
|
+
helper.append(idxs)
|
|
306
|
+
stores = self._external_store_descriptor()
|
|
307
|
+
if not stores.empty:
|
|
308
|
+
helper.append(stores)
|
|
309
|
+
if self._comment:
|
|
310
|
+
helper.append(f'COMMENT: {self._comment}')
|
|
311
|
+
return helper
|
|
312
|
+
|
|
313
|
+
def _title_descriptor(self) -> str:
|
|
314
|
+
title: str
|
|
315
|
+
if self._base is None:
|
|
316
|
+
title = f'Table\n{self._path!r}'
|
|
317
|
+
else:
|
|
318
|
+
title = f'View\n{self._path!r}'
|
|
319
|
+
title += f'\n(of {self.__bases_to_desc()})'
|
|
320
|
+
return title
|
|
321
|
+
|
|
322
|
+
def _col_descriptor(self, columns: Optional[list[str]] = None) -> pd.DataFrame:
|
|
323
|
+
return pd.DataFrame(
|
|
324
|
+
{
|
|
325
|
+
'Column Name': col.name,
|
|
326
|
+
'Type': col.col_type._to_str(as_schema=True),
|
|
327
|
+
'Computed With': col.value_expr.display_str(inline=False) if col.value_expr is not None else ''
|
|
328
|
+
}
|
|
329
|
+
for col in self.__tbl_version_path.columns()
|
|
330
|
+
if columns is None or col.name in columns
|
|
278
331
|
)
|
|
279
332
|
|
|
333
|
+
def __bases_to_desc(self) -> str:
|
|
334
|
+
bases = self._bases
|
|
335
|
+
assert len(bases) >= 1
|
|
336
|
+
if len(bases) <= 2:
|
|
337
|
+
return ', '.join(repr(b._path) for b in bases)
|
|
338
|
+
else:
|
|
339
|
+
return f'{bases[0]._path!r}, ..., {bases[-1]._path!r}'
|
|
340
|
+
|
|
341
|
+
def _index_descriptor(self, columns: Optional[list[str]] = None) -> pd.DataFrame:
|
|
342
|
+
from pixeltable import index
|
|
343
|
+
|
|
344
|
+
pd_rows = []
|
|
345
|
+
for name, info in self._tbl_version.idxs_by_name.items():
|
|
346
|
+
if isinstance(info.idx, index.EmbeddingIndex) and (columns is None or info.col.name in columns):
|
|
347
|
+
display_embed = info.idx.string_embed if info.col.col_type.is_string_type() else info.idx.image_embed
|
|
348
|
+
if info.idx.string_embed is not None and info.idx.image_embed is not None:
|
|
349
|
+
embed_str = f'{display_embed} (+1)'
|
|
350
|
+
else:
|
|
351
|
+
embed_str = str(display_embed)
|
|
352
|
+
row = {
|
|
353
|
+
'Index Name': name,
|
|
354
|
+
'Column': info.col.name,
|
|
355
|
+
'Metric': str(info.idx.metric.name.lower()),
|
|
356
|
+
'Embedding': embed_str,
|
|
357
|
+
}
|
|
358
|
+
pd_rows.append(row)
|
|
359
|
+
return pd.DataFrame(pd_rows)
|
|
360
|
+
|
|
361
|
+
def _external_store_descriptor(self) -> pd.DataFrame:
|
|
362
|
+
pd_rows = []
|
|
363
|
+
for name, store in self._tbl_version.external_stores.items():
|
|
364
|
+
row = {
|
|
365
|
+
'External Store': name,
|
|
366
|
+
'Type': type(store).__name__,
|
|
367
|
+
}
|
|
368
|
+
pd_rows.append(row)
|
|
369
|
+
return pd.DataFrame(pd_rows)
|
|
370
|
+
|
|
280
371
|
def describe(self) -> None:
|
|
281
372
|
"""
|
|
282
373
|
Print the table schema.
|
|
283
374
|
"""
|
|
375
|
+
self._check_is_dropped()
|
|
284
376
|
if getattr(builtins, '__IPYTHON__', False):
|
|
285
377
|
from IPython.display import display
|
|
286
|
-
display(self.
|
|
378
|
+
display(self._repr_html_())
|
|
287
379
|
else:
|
|
288
380
|
print(repr(self))
|
|
289
381
|
|
|
290
|
-
# TODO: Display comments in _repr_html()
|
|
291
|
-
def __repr__(self) -> str:
|
|
292
|
-
description_str = self._description().to_string(index=False)
|
|
293
|
-
if self._comment is None:
|
|
294
|
-
comment = ''
|
|
295
|
-
else:
|
|
296
|
-
comment = f'{self._comment}\n'
|
|
297
|
-
return f'{self._display_name()} \'{self._name}\'\n{comment}{description_str}'
|
|
298
|
-
|
|
299
|
-
def _repr_html_(self) -> str:
|
|
300
|
-
return self._description_html()._repr_html_() # type: ignore[attr-defined]
|
|
301
|
-
|
|
302
382
|
def _drop(self) -> None:
|
|
303
383
|
cat = catalog.Catalog.get()
|
|
304
384
|
# verify all dependents are deleted by now
|
|
@@ -337,6 +417,7 @@ class Table(SchemaObject):
|
|
|
337
417
|
|
|
338
418
|
For details, see the documentation for [`add_column()`][pixeltable.catalog.Table.add_column].
|
|
339
419
|
"""
|
|
420
|
+
self._check_is_dropped()
|
|
340
421
|
if not isinstance(col_name, str):
|
|
341
422
|
raise excs.Error(f'Column name must be a string, got {type(col_name)}')
|
|
342
423
|
if not isinstance(spec, (ts.ColumnType, exprs.Expr, type, _GenericAlias)):
|
|
@@ -686,7 +767,6 @@ class Table(SchemaObject):
|
|
|
686
767
|
>>> tbl = pxt.get_table('my_table')
|
|
687
768
|
... tbl.rename_column('col1', 'col2')
|
|
688
769
|
"""
|
|
689
|
-
self._check_is_dropped()
|
|
690
770
|
self._tbl_version.rename_column(old_name, new_name)
|
|
691
771
|
|
|
692
772
|
def add_embedding_index(
|
|
@@ -748,7 +828,6 @@ class Table(SchemaObject):
|
|
|
748
828
|
"""
|
|
749
829
|
if self._tbl_version_path.is_snapshot():
|
|
750
830
|
raise excs.Error('Cannot add an index to a snapshot')
|
|
751
|
-
self._check_is_dropped()
|
|
752
831
|
col: Column
|
|
753
832
|
if isinstance(column, str):
|
|
754
833
|
self.__check_column_name_exists(column, include_bases=True)
|
|
@@ -872,7 +951,6 @@ class Table(SchemaObject):
|
|
|
872
951
|
) -> None:
|
|
873
952
|
if self._tbl_version_path.is_snapshot():
|
|
874
953
|
raise excs.Error('Cannot drop an index from a snapshot')
|
|
875
|
-
self._check_is_dropped()
|
|
876
954
|
assert (col is None) != (idx_name is None)
|
|
877
955
|
|
|
878
956
|
if idx_name is not None:
|
|
@@ -1009,7 +1087,6 @@ class Table(SchemaObject):
|
|
|
1009
1087
|
|
|
1010
1088
|
>>> tbl.update({'int_col': tbl.int_col + 1}, where=tbl.int_col == 0)
|
|
1011
1089
|
"""
|
|
1012
|
-
self._check_is_dropped()
|
|
1013
1090
|
status = self._tbl_version.update(value_spec, where, cascade)
|
|
1014
1091
|
FileCache.get().emit_eviction_warnings()
|
|
1015
1092
|
return status
|
|
@@ -1045,7 +1122,6 @@ class Table(SchemaObject):
|
|
|
1045
1122
|
"""
|
|
1046
1123
|
if self._tbl_version_path.is_snapshot():
|
|
1047
1124
|
raise excs.Error('Cannot update a snapshot')
|
|
1048
|
-
self._check_is_dropped()
|
|
1049
1125
|
rows = list(rows)
|
|
1050
1126
|
|
|
1051
1127
|
row_updates: list[dict[Column, exprs.Expr]] = []
|
|
@@ -1100,7 +1176,6 @@ class Table(SchemaObject):
|
|
|
1100
1176
|
"""
|
|
1101
1177
|
if self._tbl_version_path.is_snapshot():
|
|
1102
1178
|
raise excs.Error('Cannot revert a snapshot')
|
|
1103
|
-
self._check_is_dropped()
|
|
1104
1179
|
self._tbl_version.revert()
|
|
1105
1180
|
|
|
1106
1181
|
@overload
|
|
@@ -1150,7 +1225,6 @@ class Table(SchemaObject):
|
|
|
1150
1225
|
"""
|
|
1151
1226
|
if self._tbl_version.is_snapshot:
|
|
1152
1227
|
raise excs.Error(f'Table `{self._name}` is a snapshot, so it cannot be linked to an external store.')
|
|
1153
|
-
self._check_is_dropped()
|
|
1154
1228
|
if store.name in self.external_stores:
|
|
1155
1229
|
raise excs.Error(f'Table `{self._name}` already has an external store with that name: {store.name}')
|
|
1156
1230
|
_logger.info(f'Linking external store `{store.name}` to table `{self._name}`')
|
pixeltable/catalog/view.py
CHANGED
|
@@ -59,7 +59,7 @@ class View(Table):
|
|
|
59
59
|
|
|
60
60
|
# verify that filter can be evaluated in the context of the base
|
|
61
61
|
if predicate is not None:
|
|
62
|
-
if not predicate.is_bound_by(base):
|
|
62
|
+
if not predicate.is_bound_by([base]):
|
|
63
63
|
raise excs.Error(f'Filter cannot be computed in the context of the base {base.tbl_name()}')
|
|
64
64
|
# create a copy that we can modify and store
|
|
65
65
|
predicate = predicate.copy()
|
|
@@ -69,7 +69,7 @@ class View(Table):
|
|
|
69
69
|
if not col.is_computed:
|
|
70
70
|
continue
|
|
71
71
|
# make sure that the value can be computed in the context of the base
|
|
72
|
-
if col.value_expr is not None and not col.value_expr.is_bound_by(base):
|
|
72
|
+
if col.value_expr is not None and not col.value_expr.is_bound_by([base]):
|
|
73
73
|
raise excs.Error(
|
|
74
74
|
f'Column {col.name}: value expression cannot be computed in the context of the base {base.tbl_name()}')
|
|
75
75
|
|