pixeltable 0.2.5__py3-none-any.whl → 0.2.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +18 -9
- pixeltable/__version__.py +3 -0
- pixeltable/catalog/column.py +9 -5
- pixeltable/catalog/insertable_table.py +0 -2
- pixeltable/catalog/table.py +16 -8
- pixeltable/catalog/table_version.py +3 -2
- pixeltable/dataframe.py +184 -110
- pixeltable/env.py +69 -18
- pixeltable/exec/__init__.py +2 -1
- pixeltable/exec/data_row_batch.py +6 -7
- pixeltable/exec/expr_eval_node.py +28 -28
- pixeltable/exec/sql_scan_node.py +7 -6
- pixeltable/exprs/__init__.py +4 -3
- pixeltable/exprs/column_ref.py +9 -0
- pixeltable/exprs/expr.py +15 -7
- pixeltable/exprs/function_call.py +17 -15
- pixeltable/exprs/image_member_access.py +9 -28
- pixeltable/exprs/in_predicate.py +96 -0
- pixeltable/exprs/inline_array.py +13 -11
- pixeltable/exprs/inline_dict.py +15 -13
- pixeltable/exprs/row_builder.py +7 -1
- pixeltable/exprs/similarity_expr.py +65 -0
- pixeltable/func/__init__.py +0 -2
- pixeltable/func/aggregate_function.py +3 -0
- pixeltable/func/callable_function.py +57 -13
- pixeltable/func/expr_template_function.py +11 -2
- pixeltable/func/function.py +35 -4
- pixeltable/func/signature.py +5 -15
- pixeltable/func/udf.py +6 -10
- pixeltable/functions/huggingface.py +23 -4
- pixeltable/functions/openai.py +34 -1
- pixeltable/functions/pil/image.py +61 -64
- pixeltable/functions/together.py +21 -0
- pixeltable/globals.py +425 -0
- pixeltable/index/base.py +3 -1
- pixeltable/index/embedding_index.py +87 -14
- pixeltable/io/__init__.py +3 -0
- pixeltable/{utils → io}/hf_datasets.py +48 -17
- pixeltable/io/pandas.py +148 -0
- pixeltable/{utils → io}/parquet.py +58 -33
- pixeltable/iterators/__init__.py +1 -1
- pixeltable/iterators/base.py +4 -0
- pixeltable/iterators/document.py +218 -97
- pixeltable/iterators/video.py +8 -9
- pixeltable/metadata/__init__.py +7 -3
- pixeltable/metadata/converters/convert_12.py +3 -0
- pixeltable/metadata/converters/convert_13.py +41 -0
- pixeltable/plan.py +2 -19
- pixeltable/store.py +2 -2
- pixeltable/tool/create_test_db_dump.py +32 -13
- pixeltable/type_system.py +13 -54
- pixeltable/utils/documents.py +42 -12
- pixeltable/utils/http_server.py +70 -0
- {pixeltable-0.2.5.dist-info → pixeltable-0.2.6.dist-info}/METADATA +10 -7
- pixeltable-0.2.6.dist-info/RECORD +119 -0
- {pixeltable-0.2.5.dist-info → pixeltable-0.2.6.dist-info}/WHEEL +1 -1
- pixeltable/client.py +0 -600
- pixeltable/exprs/image_similarity_predicate.py +0 -58
- pixeltable/func/batched_function.py +0 -53
- pixeltable/tests/conftest.py +0 -171
- pixeltable/tests/ext/test_yolox.py +0 -21
- pixeltable/tests/functions/test_fireworks.py +0 -43
- pixeltable/tests/functions/test_functions.py +0 -60
- pixeltable/tests/functions/test_huggingface.py +0 -158
- pixeltable/tests/functions/test_openai.py +0 -162
- pixeltable/tests/functions/test_together.py +0 -112
- pixeltable/tests/test_audio.py +0 -65
- pixeltable/tests/test_catalog.py +0 -27
- pixeltable/tests/test_client.py +0 -21
- pixeltable/tests/test_component_view.py +0 -379
- pixeltable/tests/test_dataframe.py +0 -440
- pixeltable/tests/test_dirs.py +0 -107
- pixeltable/tests/test_document.py +0 -120
- pixeltable/tests/test_exprs.py +0 -802
- pixeltable/tests/test_function.py +0 -332
- pixeltable/tests/test_index.py +0 -138
- pixeltable/tests/test_migration.py +0 -44
- pixeltable/tests/test_nos.py +0 -54
- pixeltable/tests/test_snapshot.py +0 -231
- pixeltable/tests/test_table.py +0 -1343
- pixeltable/tests/test_transactional_directory.py +0 -42
- pixeltable/tests/test_types.py +0 -52
- pixeltable/tests/test_video.py +0 -159
- pixeltable/tests/test_view.py +0 -535
- pixeltable/tests/utils.py +0 -442
- pixeltable-0.2.5.dist-info/RECORD +0 -139
- {pixeltable-0.2.5.dist-info → pixeltable-0.2.6.dist-info}/LICENSE +0 -0
pixeltable/__init__.py
CHANGED
|
@@ -1,18 +1,30 @@
|
|
|
1
1
|
from .catalog import Column, Table, InsertableTable, View
|
|
2
|
-
from .client import Client
|
|
3
2
|
from .dataframe import DataFrame
|
|
4
3
|
from .exceptions import Error, Error
|
|
5
4
|
from .exprs import RELATIVE_PATH_ROOT
|
|
6
5
|
from .func import Function, udf, uda, Aggregator, expr_udf
|
|
7
|
-
from .
|
|
8
|
-
|
|
9
|
-
|
|
6
|
+
from .globals import *
|
|
7
|
+
from .type_system import (
|
|
8
|
+
ColumnType,
|
|
9
|
+
StringType,
|
|
10
|
+
IntType,
|
|
11
|
+
FloatType,
|
|
12
|
+
BoolType,
|
|
13
|
+
TimestampType,
|
|
14
|
+
JsonType,
|
|
15
|
+
ArrayType,
|
|
16
|
+
ImageType,
|
|
17
|
+
VideoType,
|
|
18
|
+
AudioType,
|
|
19
|
+
DocumentType,
|
|
20
|
+
)
|
|
10
21
|
from .utils.help import help
|
|
22
|
+
|
|
11
23
|
# noinspection PyUnresolvedReferences
|
|
12
|
-
from . import functions
|
|
24
|
+
from . import functions, io
|
|
25
|
+
from .__version__ import __version__, __version_tuple__
|
|
13
26
|
|
|
14
27
|
__all__ = [
|
|
15
|
-
'Client',
|
|
16
28
|
'DataFrame',
|
|
17
29
|
'Column',
|
|
18
30
|
'Table',
|
|
@@ -39,6 +51,3 @@ __all__ = [
|
|
|
39
51
|
'uda',
|
|
40
52
|
'expr_udf',
|
|
41
53
|
]
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
pixeltable/catalog/column.py
CHANGED
|
@@ -5,8 +5,8 @@ from typing import Optional, Union, Callable, Set
|
|
|
5
5
|
|
|
6
6
|
import sqlalchemy as sql
|
|
7
7
|
|
|
8
|
-
|
|
9
|
-
|
|
8
|
+
import pixeltable.exceptions as excs
|
|
9
|
+
import pixeltable.type_system as ts
|
|
10
10
|
from .globals import is_valid_identifier
|
|
11
11
|
|
|
12
12
|
_logger = logging.getLogger('pixeltable')
|
|
@@ -18,7 +18,7 @@ class Column:
|
|
|
18
18
|
table/view.
|
|
19
19
|
"""
|
|
20
20
|
def __init__(
|
|
21
|
-
self, name: Optional[str], col_type: Optional[ColumnType] = None,
|
|
21
|
+
self, name: Optional[str], col_type: Optional[ts.ColumnType] = None,
|
|
22
22
|
computed_with: Optional[Union['Expr', Callable]] = None,
|
|
23
23
|
is_pk: bool = False, stored: Optional[bool] = None,
|
|
24
24
|
col_id: Optional[int] = None, schema_version_add: Optional[int] = None,
|
|
@@ -114,6 +114,10 @@ class Column:
|
|
|
114
114
|
l = list(self.value_expr.subexprs(filter=lambda e: isinstance(e, exprs.FunctionCall) and e.is_window_fn_call))
|
|
115
115
|
return len(l) > 0
|
|
116
116
|
|
|
117
|
+
def get_idx_info(self) -> dict[str, 'pixeltable.catalog.TableVersion.IndexInfo']:
|
|
118
|
+
assert self.tbl is not None
|
|
119
|
+
return {name: info for name, info in self.tbl.idxs_by_name.items() if info.col == self}
|
|
120
|
+
|
|
117
121
|
@property
|
|
118
122
|
def is_computed(self) -> bool:
|
|
119
123
|
return self.compute_func is not None or self.value_expr is not None
|
|
@@ -148,8 +152,8 @@ class Column:
|
|
|
148
152
|
self.store_name(), self.col_type.to_sa_type() if self.sa_col_type is None else self.sa_col_type,
|
|
149
153
|
nullable=True)
|
|
150
154
|
if self.is_computed or self.col_type.is_media_type():
|
|
151
|
-
self.sa_errormsg_col = sql.Column(self.errormsg_store_name(), StringType().to_sa_type(), nullable=True)
|
|
152
|
-
self.sa_errortype_col = sql.Column(self.errortype_store_name(), StringType().to_sa_type(), nullable=True)
|
|
155
|
+
self.sa_errormsg_col = sql.Column(self.errormsg_store_name(), ts.StringType().to_sa_type(), nullable=True)
|
|
156
|
+
self.sa_errortype_col = sql.Column(self.errortype_store_name(), ts.StringType().to_sa_type(), nullable=True)
|
|
153
157
|
|
|
154
158
|
def get_sa_col_type(self) -> sql.sqltypes.TypeEngine:
|
|
155
159
|
return self.col_type.to_sa_type() if self.sa_col_type is None else self.sa_col_type
|
|
@@ -185,8 +185,6 @@ class InsertableTable(Table):
|
|
|
185
185
|
if not isinstance(where, Predicate):
|
|
186
186
|
raise excs.Error(f"'where' argument must be a Predicate, got {type(where)}")
|
|
187
187
|
analysis_info = Planner.analyze(self.tbl_version_path, where)
|
|
188
|
-
if analysis_info.similarity_clause is not None:
|
|
189
|
-
raise excs.Error('nearest() cannot be used with delete()')
|
|
190
188
|
# for now we require that the updated rows can be identified via SQL, rather than via a Python filter
|
|
191
189
|
if analysis_info.filter is not None:
|
|
192
190
|
raise excs.Error(f'Filter {analysis_info.filter} not expressible in SQL')
|
pixeltable/catalog/table.py
CHANGED
|
@@ -97,6 +97,11 @@ class Table(SchemaObject):
|
|
|
97
97
|
from pixeltable.dataframe import DataFrame
|
|
98
98
|
return DataFrame(self.tbl_version_path).order_by(*items, asc=asc)
|
|
99
99
|
|
|
100
|
+
def group_by(self, *items: 'exprs.Expr') -> 'pixeltable.dataframe.DataFrame':
|
|
101
|
+
"""Return a DataFrame for this table."""
|
|
102
|
+
from pixeltable.dataframe import DataFrame
|
|
103
|
+
return DataFrame(self.tbl_version_path).group_by(*items)
|
|
104
|
+
|
|
100
105
|
def collect(self) -> 'pixeltable.dataframe.DataFrameResultSet': # type: ignore[name-defined, no-untyped-def]
|
|
101
106
|
"""Return rows from this table.
|
|
102
107
|
"""
|
|
@@ -470,13 +475,16 @@ class Table(SchemaObject):
|
|
|
470
475
|
|
|
471
476
|
def add_embedding_index(
|
|
472
477
|
self, col_name: str, *, idx_name: Optional[str] = None,
|
|
473
|
-
text_embed: Optional[pixeltable.Function] = None, img_embed: Optional[pixeltable.Function] = None
|
|
478
|
+
text_embed: Optional[pixeltable.Function] = None, img_embed: Optional[pixeltable.Function] = None,
|
|
479
|
+
metric: str = 'cosine'
|
|
474
480
|
) -> None:
|
|
475
481
|
"""Add an index to the table.
|
|
476
482
|
Args:
|
|
477
483
|
col_name: name of column to index
|
|
478
484
|
idx_name: name of index, which needs to be unique for the table; if not provided, a name will be generated
|
|
479
|
-
|
|
485
|
+
text_embed: function to embed text; required if the column is a text column
|
|
486
|
+
img_embed: function to embed images; required if the column is an image column
|
|
487
|
+
metric: distance metric to use for the index; one of 'cosine', 'ip', 'l2'; default is 'cosine'
|
|
480
488
|
|
|
481
489
|
Raises:
|
|
482
490
|
Error: If an index with that name already exists for the table or if the column does not exist.
|
|
@@ -484,11 +492,13 @@ class Table(SchemaObject):
|
|
|
484
492
|
Examples:
|
|
485
493
|
Add an index to the ``img`` column:
|
|
486
494
|
|
|
487
|
-
>>> tbl.add_embedding_index('img',
|
|
495
|
+
>>> tbl.add_embedding_index('img', img_embed=...)
|
|
488
496
|
|
|
489
|
-
Add another index to the ``img`` column,
|
|
497
|
+
Add another index to the ``img`` column, using the inner product as the distance metric,
|
|
498
|
+
and with a specific name; ``text_embed`` is also specified in order to search with text:
|
|
490
499
|
|
|
491
|
-
>>> tbl.add_embedding_index(
|
|
500
|
+
>>> tbl.add_embedding_index(
|
|
501
|
+
'img', idx_name='clip_idx', img_embed=..., text_embed=...text_embed..., metric='ip')
|
|
492
502
|
"""
|
|
493
503
|
if self.tbl_version_path.is_snapshot():
|
|
494
504
|
raise excs.Error('Cannot add an index to a snapshot')
|
|
@@ -500,7 +510,7 @@ class Table(SchemaObject):
|
|
|
500
510
|
raise excs.Error(f'Duplicate index name: {idx_name}')
|
|
501
511
|
from pixeltable.index import EmbeddingIndex
|
|
502
512
|
# create the EmbeddingIndex instance to verify args
|
|
503
|
-
idx = EmbeddingIndex(col, text_embed=text_embed, img_embed=img_embed)
|
|
513
|
+
idx = EmbeddingIndex(col, metric=metric, text_embed=text_embed, img_embed=img_embed)
|
|
504
514
|
status = self.tbl_version_path.tbl_version.add_index(col, idx_name=idx_name, idx=idx)
|
|
505
515
|
# TODO: how to deal with exceptions here? drop the index and raise?
|
|
506
516
|
|
|
@@ -582,8 +592,6 @@ class Table(SchemaObject):
|
|
|
582
592
|
if not isinstance(where, exprs.Predicate):
|
|
583
593
|
raise excs.Error(f"'where' argument must be a Predicate, got {type(where)}")
|
|
584
594
|
analysis_info = Planner.analyze(self.tbl_version_path, where)
|
|
585
|
-
if analysis_info.similarity_clause is not None:
|
|
586
|
-
raise excs.Error('nearest() cannot be used with update()')
|
|
587
595
|
# for now we require that the updated rows can be identified via SQL, rather than via a Python filter
|
|
588
596
|
if analysis_info.filter is not None:
|
|
589
597
|
raise excs.Error(f'Filter {analysis_info.filter} not expressible in SQL')
|
|
@@ -43,6 +43,7 @@ class TableVersion:
|
|
|
43
43
|
@dataclasses.dataclass
|
|
44
44
|
class IndexInfo:
|
|
45
45
|
id: int
|
|
46
|
+
name: str
|
|
46
47
|
idx: index.IndexBase
|
|
47
48
|
col: Column
|
|
48
49
|
val_col: Column
|
|
@@ -272,7 +273,7 @@ class TableVersion:
|
|
|
272
273
|
val_col.sa_col_type = idx.index_sa_type()
|
|
273
274
|
undo_col = self.cols_by_id[md.index_val_undo_col_id]
|
|
274
275
|
undo_col.sa_col_type = idx.index_sa_type()
|
|
275
|
-
idx_info = self.IndexInfo(id=md.id, idx=idx, col=idx_col, val_col=val_col, undo_col=undo_col)
|
|
276
|
+
idx_info = self.IndexInfo(id=md.id, name=md.name, idx=idx, col=idx_col, val_col=val_col, undo_col=undo_col)
|
|
276
277
|
self.idxs_by_name[md.name] = idx_info
|
|
277
278
|
|
|
278
279
|
def _init_sa_schema(self) -> None:
|
|
@@ -353,7 +354,7 @@ class TableVersion:
|
|
|
353
354
|
indexed_col_id=col.id, index_val_col_id=val_col.id, index_val_undo_col_id=undo_col.id,
|
|
354
355
|
schema_version_add=self.schema_version, schema_version_drop=None,
|
|
355
356
|
class_fqn=idx_cls.__module__ + '.' + idx_cls.__name__, init_args=idx.as_dict())
|
|
356
|
-
idx_info = self.IndexInfo(id=idx_id, idx=idx, col=col, val_col=val_col, undo_col=undo_col)
|
|
357
|
+
idx_info = self.IndexInfo(id=idx_id, name=idx_name, idx=idx, col=col, val_col=val_col, undo_col=undo_col)
|
|
357
358
|
self.idx_md[idx_id] = idx_md
|
|
358
359
|
self.idxs_by_name[idx_name] = idx_info
|
|
359
360
|
|