pixeltable 0.2.8__py3-none-any.whl → 0.2.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +15 -33
- pixeltable/__version__.py +2 -2
- pixeltable/catalog/catalog.py +1 -1
- pixeltable/catalog/column.py +29 -11
- pixeltable/catalog/dir.py +2 -2
- pixeltable/catalog/insertable_table.py +5 -55
- pixeltable/catalog/named_function.py +2 -2
- pixeltable/catalog/schema_object.py +2 -7
- pixeltable/catalog/table.py +307 -186
- pixeltable/catalog/table_version.py +109 -63
- pixeltable/catalog/table_version_path.py +28 -5
- pixeltable/catalog/view.py +20 -10
- pixeltable/dataframe.py +129 -26
- pixeltable/env.py +29 -18
- pixeltable/exec/exec_context.py +5 -0
- pixeltable/exec/exec_node.py +1 -0
- pixeltable/exec/in_memory_data_node.py +29 -24
- pixeltable/exec/sql_scan_node.py +1 -1
- pixeltable/exprs/column_ref.py +13 -8
- pixeltable/exprs/data_row.py +4 -0
- pixeltable/exprs/expr.py +16 -1
- pixeltable/exprs/function_call.py +4 -4
- pixeltable/exprs/row_builder.py +29 -20
- pixeltable/exprs/similarity_expr.py +4 -3
- pixeltable/ext/functions/yolox.py +2 -1
- pixeltable/func/__init__.py +1 -0
- pixeltable/func/aggregate_function.py +14 -12
- pixeltable/func/callable_function.py +8 -6
- pixeltable/func/expr_template_function.py +13 -19
- pixeltable/func/function.py +3 -6
- pixeltable/func/query_template_function.py +84 -0
- pixeltable/func/signature.py +68 -23
- pixeltable/func/udf.py +13 -10
- pixeltable/functions/__init__.py +6 -91
- pixeltable/functions/eval.py +26 -14
- pixeltable/functions/fireworks.py +25 -23
- pixeltable/functions/globals.py +62 -0
- pixeltable/functions/huggingface.py +20 -16
- pixeltable/functions/image.py +170 -1
- pixeltable/functions/openai.py +95 -128
- pixeltable/functions/string.py +10 -2
- pixeltable/functions/together.py +95 -84
- pixeltable/functions/util.py +16 -0
- pixeltable/functions/video.py +94 -16
- pixeltable/functions/whisper.py +74 -0
- pixeltable/globals.py +1 -1
- pixeltable/io/__init__.py +10 -0
- pixeltable/io/external_store.py +370 -0
- pixeltable/io/globals.py +51 -22
- pixeltable/io/label_studio.py +639 -0
- pixeltable/io/parquet.py +1 -1
- pixeltable/iterators/__init__.py +9 -0
- pixeltable/iterators/string.py +40 -0
- pixeltable/metadata/__init__.py +6 -8
- pixeltable/metadata/converters/convert_10.py +2 -4
- pixeltable/metadata/converters/convert_12.py +7 -2
- pixeltable/metadata/converters/convert_13.py +6 -8
- pixeltable/metadata/converters/convert_14.py +2 -4
- pixeltable/metadata/converters/convert_15.py +44 -0
- pixeltable/metadata/converters/convert_16.py +18 -0
- pixeltable/metadata/converters/util.py +66 -0
- pixeltable/metadata/schema.py +3 -3
- pixeltable/plan.py +8 -7
- pixeltable/store.py +1 -1
- pixeltable/tool/create_test_db_dump.py +147 -54
- pixeltable/tool/embed_udf.py +9 -0
- pixeltable/type_system.py +1 -2
- pixeltable/utils/code.py +34 -0
- {pixeltable-0.2.8.dist-info → pixeltable-0.2.10.dist-info}/METADATA +1 -1
- pixeltable-0.2.10.dist-info/RECORD +131 -0
- pixeltable/datatransfer/__init__.py +0 -1
- pixeltable/datatransfer/label_studio.py +0 -452
- pixeltable/datatransfer/remote.py +0 -85
- pixeltable/functions/pil/image.py +0 -147
- pixeltable-0.2.8.dist-info/RECORD +0 -124
- {pixeltable-0.2.8.dist-info → pixeltable-0.2.10.dist-info}/LICENSE +0 -0
- {pixeltable-0.2.8.dist-info → pixeltable-0.2.10.dist-info}/WHEEL +0 -0
pixeltable/__init__.py
CHANGED
|
@@ -1,11 +1,10 @@
|
|
|
1
1
|
from .catalog import Column, Table, InsertableTable, View
|
|
2
2
|
from .dataframe import DataFrame
|
|
3
|
-
from .
|
|
4
|
-
from .catalog import Column, Table, InsertableTable, View
|
|
5
|
-
from .exceptions import Error, Error
|
|
3
|
+
from .exceptions import Error
|
|
6
4
|
from .exprs import RELATIVE_PATH_ROOT
|
|
7
|
-
from .func import Function, udf,
|
|
8
|
-
from .globals import
|
|
5
|
+
from .func import Function, udf, Aggregator, uda, expr_udf
|
|
6
|
+
from .globals import init, create_table, create_view, get_table, move, drop_table, list_tables, create_dir, rm_dir, \
|
|
7
|
+
list_dirs, list_functions, get_path, configure_logging
|
|
9
8
|
from .type_system import (
|
|
10
9
|
ColumnType,
|
|
11
10
|
StringType,
|
|
@@ -22,34 +21,17 @@ from .type_system import (
|
|
|
22
21
|
)
|
|
23
22
|
from .utils.help import help
|
|
24
23
|
|
|
25
|
-
# noinspection PyUnresolvedReferences
|
|
26
24
|
from . import functions, io, iterators
|
|
27
25
|
from .__version__ import __version__, __version_tuple__
|
|
28
26
|
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
'BoolType',
|
|
41
|
-
'TimestampType',
|
|
42
|
-
'JsonType',
|
|
43
|
-
'RELATIVE_PATH_ROOT',
|
|
44
|
-
'ArrayType',
|
|
45
|
-
'ImageType',
|
|
46
|
-
'VideoType',
|
|
47
|
-
'AudioType',
|
|
48
|
-
'DocumentType',
|
|
49
|
-
'Function',
|
|
50
|
-
'help',
|
|
51
|
-
'udf',
|
|
52
|
-
'Aggregator',
|
|
53
|
-
'uda',
|
|
54
|
-
'expr_udf',
|
|
55
|
-
]
|
|
27
|
+
# This is the safest / most maintainable way to do this: start with the default and "blacklist" stuff that
|
|
28
|
+
# we don't want in there. (Using a "whitelist" is considerably harder to maintain.)
|
|
29
|
+
|
|
30
|
+
__default_dir = set(symbol for symbol in dir() if not symbol.startswith('_'))
|
|
31
|
+
__removed_symbols = {'catalog', 'dataframe', 'env', 'exceptions', 'exec', 'exprs', 'func', 'globals', 'index',
|
|
32
|
+
'metadata', 'plan', 'type_system', 'utils'}
|
|
33
|
+
__all__ = sorted(list(__default_dir - __removed_symbols))
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def __dir__():
|
|
37
|
+
return __all__
|
pixeltable/__version__.py
CHANGED
|
@@ -1,3 +1,3 @@
|
|
|
1
1
|
# These version placeholders will be replaced during build.
|
|
2
|
-
__version__ = "0.2.
|
|
3
|
-
__version_tuple__ = (0, 2,
|
|
2
|
+
__version__ = "0.2.10"
|
|
3
|
+
__version_tuple__ = (0, 2, 10)
|
pixeltable/catalog/catalog.py
CHANGED
|
@@ -39,7 +39,7 @@ class Catalog:
|
|
|
39
39
|
# key: [id, version]
|
|
40
40
|
# - mutable version of a table: version == None (even though TableVersion.version is set correctly)
|
|
41
41
|
# - snapshot versions: records the version of the snapshot
|
|
42
|
-
self.tbl_versions: Dict[Tuple[UUID, int], TableVersion] = {}
|
|
42
|
+
self.tbl_versions: Dict[Tuple[UUID, Optional[int]], TableVersion] = {}
|
|
43
43
|
|
|
44
44
|
self.tbls: Dict[UUID, Table] = {} # don't use a defaultdict here, it doesn't cooperate with the debugger
|
|
45
45
|
self.tbl_dependents: Dict[UUID, List[Table]] = {}
|
pixeltable/catalog/column.py
CHANGED
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import logging
|
|
4
|
-
from typing import Optional, Union, Callable,
|
|
4
|
+
from typing import Optional, Union, Callable, Any
|
|
5
|
+
from uuid import UUID
|
|
5
6
|
|
|
6
7
|
import sqlalchemy as sql
|
|
7
8
|
|
|
@@ -23,7 +24,7 @@ class Column:
|
|
|
23
24
|
is_pk: bool = False, stored: Optional[bool] = None,
|
|
24
25
|
col_id: Optional[int] = None, schema_version_add: Optional[int] = None,
|
|
25
26
|
schema_version_drop: Optional[int] = None, sa_col_type: Optional[sql.sqltypes.TypeEngine] = None,
|
|
26
|
-
records_errors: Optional[bool] = None
|
|
27
|
+
records_errors: Optional[bool] = None, value_expr_dict: Optional[dict[str, Any]] = None,
|
|
27
28
|
):
|
|
28
29
|
"""Column constructor.
|
|
29
30
|
|
|
@@ -56,8 +57,9 @@ class Column:
|
|
|
56
57
|
if col_type is None and computed_with is None:
|
|
57
58
|
raise excs.Error(f'Column `{name}`: col_type is required if computed_with is not specified')
|
|
58
59
|
|
|
59
|
-
self.
|
|
60
|
+
self._value_expr: Optional['Expr'] = None
|
|
60
61
|
self.compute_func: Optional[Callable] = None
|
|
62
|
+
self.value_expr_dict = value_expr_dict
|
|
61
63
|
from pixeltable import exprs
|
|
62
64
|
if computed_with is not None:
|
|
63
65
|
value_expr = exprs.Expr.from_object(computed_with)
|
|
@@ -73,8 +75,8 @@ class Column:
|
|
|
73
75
|
# column name references and for that we need to wait until we're assigned to a Table
|
|
74
76
|
self.compute_func = computed_with
|
|
75
77
|
else:
|
|
76
|
-
self.
|
|
77
|
-
self.col_type = self.
|
|
78
|
+
self._value_expr = value_expr.copy()
|
|
79
|
+
self.col_type = self._value_expr.col_type
|
|
78
80
|
|
|
79
81
|
if col_type is not None:
|
|
80
82
|
self.col_type = col_type
|
|
@@ -96,15 +98,26 @@ class Column:
|
|
|
96
98
|
# computed cols also have storage columns for the exception string and type
|
|
97
99
|
self.sa_errormsg_col: Optional[sql.schema.Column] = None
|
|
98
100
|
self.sa_errortype_col: Optional[sql.schema.Column] = None
|
|
101
|
+
|
|
99
102
|
from .table_version import TableVersion
|
|
100
103
|
self.tbl: Optional[TableVersion] = None # set by owning TableVersion
|
|
101
104
|
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
+
@property
|
|
106
|
+
def value_expr(self) -> Optional['Expr']:
|
|
107
|
+
"""Instantiate value_expr on-demand"""
|
|
108
|
+
# TODO: instantiate expr in the c'tor and add an Expr.prepare() that can create additional state after the
|
|
109
|
+
# catalog has been fully loaded; that way, we encounter bugs in the serialization/deserialization logic earlier
|
|
110
|
+
if self.value_expr_dict is not None and self._value_expr is None:
|
|
111
|
+
from pixeltable import exprs
|
|
112
|
+
self._value_expr = exprs.Expr.from_dict(self.value_expr_dict)
|
|
113
|
+
return self._value_expr
|
|
114
|
+
|
|
115
|
+
def set_value_expr(self, value_expr: 'Expr') -> None:
|
|
116
|
+
self._value_expr = value_expr
|
|
117
|
+
self.value_expr_dict = None
|
|
105
118
|
|
|
106
119
|
def check_value_expr(self) -> None:
|
|
107
|
-
assert self.
|
|
120
|
+
assert self._value_expr is not None
|
|
108
121
|
if self.stored == False and self.is_computed and self.has_window_fn_call():
|
|
109
122
|
raise excs.Error(
|
|
110
123
|
f'Column {self.name}: stored={self.stored} not supported for columns computed with window functions:'
|
|
@@ -123,7 +136,7 @@ class Column:
|
|
|
123
136
|
|
|
124
137
|
@property
|
|
125
138
|
def is_computed(self) -> bool:
|
|
126
|
-
return self.compute_func is not None or self.
|
|
139
|
+
return self.compute_func is not None or self._value_expr is not None or self.value_expr_dict is not None
|
|
127
140
|
|
|
128
141
|
@property
|
|
129
142
|
def is_stored(self) -> bool:
|
|
@@ -178,10 +191,15 @@ class Column:
|
|
|
178
191
|
def __str__(self) -> str:
|
|
179
192
|
return f'{self.name}: {self.col_type}'
|
|
180
193
|
|
|
194
|
+
def __hash__(self) -> int:
|
|
195
|
+
# TODO(aaron-siegel): This and __eq__ do not capture the table version. We need to rethink the Column
|
|
196
|
+
# abstraction (perhaps separating out the version-dependent properties into a different abstraction).
|
|
197
|
+
assert self.tbl is not None
|
|
198
|
+
return hash((self.tbl.id, self.id))
|
|
199
|
+
|
|
181
200
|
def __eq__(self, other: object) -> bool:
|
|
182
201
|
if not isinstance(other, Column):
|
|
183
202
|
return False
|
|
184
203
|
assert self.tbl is not None
|
|
185
204
|
assert other.tbl is not None
|
|
186
205
|
return self.tbl.id == other.tbl.id and self.id == other.id
|
|
187
|
-
|
pixeltable/catalog/dir.py
CHANGED
|
@@ -21,8 +21,8 @@ class Dir(SchemaObject):
|
|
|
21
21
|
def display_name(cls) -> str:
|
|
22
22
|
return 'directory'
|
|
23
23
|
|
|
24
|
-
def
|
|
25
|
-
super().
|
|
24
|
+
def _move(self, new_name: str, new_dir_id: UUID) -> None:
|
|
25
|
+
super()._move(new_name, new_dir_id)
|
|
26
26
|
with Env.get().engine.begin() as conn:
|
|
27
27
|
dir_md = schema.DirMd(name=new_name)
|
|
28
28
|
conn.execute(
|
|
@@ -71,56 +71,6 @@ class InsertableTable(Table):
|
|
|
71
71
|
self, rows: Optional[Iterable[dict[str, Any]]] = None, /, *, print_stats: bool = False,
|
|
72
72
|
fail_on_exception: bool = True, **kwargs: Any
|
|
73
73
|
) -> UpdateStatus:
|
|
74
|
-
"""Inserts rows into this table. There are two mutually exclusive call patterns:
|
|
75
|
-
|
|
76
|
-
To insert multiple rows at a time:
|
|
77
|
-
``insert(rows: Iterable[dict[str, Any]], /, *, print_stats: bool = False, fail_on_exception: bool = True)``
|
|
78
|
-
|
|
79
|
-
To insert just a single row, you can use the more convenient syntax:
|
|
80
|
-
``insert(*, print_stats: bool = False, fail_on_exception: bool = True, **kwargs: Any)``
|
|
81
|
-
|
|
82
|
-
Args:
|
|
83
|
-
rows: (if inserting multiple rows) A list of rows to insert, each of which is a dictionary mapping column
|
|
84
|
-
names to values.
|
|
85
|
-
kwargs: (if inserting a single row) Keyword-argument pairs representing column names and values.
|
|
86
|
-
print_stats: If ``True``, print statistics about the cost of computed columns.
|
|
87
|
-
fail_on_exception:
|
|
88
|
-
Determines how exceptions in computed columns and invalid media files (e.g., corrupt images)
|
|
89
|
-
are handled.
|
|
90
|
-
If ``False``, store error information (accessible as column properties 'errortype' and 'errormsg')
|
|
91
|
-
for those cases, but continue inserting rows.
|
|
92
|
-
If ``True``, raise an exception that aborts the insert.
|
|
93
|
-
|
|
94
|
-
Returns:
|
|
95
|
-
execution status
|
|
96
|
-
|
|
97
|
-
Raises:
|
|
98
|
-
Error: if a row does not match the table schema or contains values for computed columns
|
|
99
|
-
|
|
100
|
-
Examples:
|
|
101
|
-
Insert two rows into a table with three int columns ``a``, ``b``, and ``c``. Column ``c`` is nullable.
|
|
102
|
-
|
|
103
|
-
>>> tbl.insert([{'a': 1, 'b': 1, 'c': 1}, {'a': 2, 'b': 2}])
|
|
104
|
-
|
|
105
|
-
Insert a single row into a table with three int columns ``a``, ``b``, and ``c``.
|
|
106
|
-
|
|
107
|
-
>>> tbl.insert(a=1, b=1, c=1)
|
|
108
|
-
"""
|
|
109
|
-
# The commented code is the intended implementation, with signature (*args, **kwargs).
|
|
110
|
-
# That signature cannot be used currently, due to a present limitation in mkdocs.
|
|
111
|
-
# See: https://github.com/mkdocstrings/mkdocstrings/issues/669
|
|
112
|
-
|
|
113
|
-
# print_stats = kwargs.pop('print_stats', False)
|
|
114
|
-
# fail_on_exception = kwargs.pop('fail_on_exception', True)
|
|
115
|
-
# if len(args) > 0:
|
|
116
|
-
# # There's a positional argument; this means `rows` is expressed as a
|
|
117
|
-
# # list of dicts (multi-insert)
|
|
118
|
-
# rows = list(args[0])
|
|
119
|
-
# else:
|
|
120
|
-
# # No positional argument; this means we're inserting a single row
|
|
121
|
-
# # using kwargs syntax
|
|
122
|
-
# rows = [kwargs]
|
|
123
|
-
|
|
124
74
|
if rows is None:
|
|
125
75
|
rows = [kwargs]
|
|
126
76
|
else:
|
|
@@ -136,7 +86,7 @@ class InsertableTable(Table):
|
|
|
136
86
|
if not isinstance(row, dict):
|
|
137
87
|
raise excs.Error('rows must be a list of dictionaries')
|
|
138
88
|
self._validate_input_rows(rows)
|
|
139
|
-
result = self.
|
|
89
|
+
result = self._tbl_version.insert(rows, print_stats=print_stats, fail_on_exception=fail_on_exception)
|
|
140
90
|
|
|
141
91
|
if result.num_excs == 0:
|
|
142
92
|
cols_with_excs_str = ''
|
|
@@ -155,8 +105,8 @@ class InsertableTable(Table):
|
|
|
155
105
|
def _validate_input_rows(self, rows: List[Dict[str, Any]]) -> None:
|
|
156
106
|
"""Verify that the input rows match the table schema"""
|
|
157
107
|
valid_col_names = set(self.column_names())
|
|
158
|
-
reqd_col_names = set(self.
|
|
159
|
-
computed_col_names = set(self.
|
|
108
|
+
reqd_col_names = set(self._tbl_version_path.tbl_version.get_required_col_names())
|
|
109
|
+
computed_col_names = set(self._tbl_version_path.tbl_version.get_computed_col_names())
|
|
160
110
|
for row in rows:
|
|
161
111
|
assert isinstance(row, dict)
|
|
162
112
|
col_names = set(row.keys())
|
|
@@ -170,7 +120,7 @@ class InsertableTable(Table):
|
|
|
170
120
|
raise excs.Error(f'Value for computed column {col_name} in row {row}')
|
|
171
121
|
|
|
172
122
|
# validate data
|
|
173
|
-
col = self.
|
|
123
|
+
col = self._tbl_version_path.get_column(col_name)
|
|
174
124
|
try:
|
|
175
125
|
# basic sanity checks here
|
|
176
126
|
checked_val = col.col_type.create_literal(val)
|
|
@@ -199,7 +149,7 @@ class InsertableTable(Table):
|
|
|
199
149
|
if where is not None:
|
|
200
150
|
if not isinstance(where, Predicate):
|
|
201
151
|
raise excs.Error(f"'where' argument must be a Predicate, got {type(where)}")
|
|
202
|
-
analysis_info = Planner.analyze(self.
|
|
152
|
+
analysis_info = Planner.analyze(self._tbl_version_path, where)
|
|
203
153
|
# for now we require that the updated rows can be identified via SQL, rather than via a Python filter
|
|
204
154
|
if analysis_info.filter is not None:
|
|
205
155
|
raise excs.Error(f'Filter {analysis_info.filter} not expressible in SQL')
|
|
@@ -25,8 +25,8 @@ class NamedFunction(SchemaObject):
|
|
|
25
25
|
def display_name(cls) -> str:
|
|
26
26
|
return 'function'
|
|
27
27
|
|
|
28
|
-
def
|
|
29
|
-
super().
|
|
28
|
+
def _move(self, new_name: str, new_dir_id: UUID) -> None:
|
|
29
|
+
super()._move(new_name, new_dir_id)
|
|
30
30
|
with Env.get().engine.begin() as conn:
|
|
31
31
|
stmt = sql.text((
|
|
32
32
|
f"UPDATE {schema.Function.__table__} "
|
|
@@ -14,7 +14,7 @@ class SchemaObject:
|
|
|
14
14
|
self._name = name
|
|
15
15
|
self._dir_id = dir_id
|
|
16
16
|
|
|
17
|
-
def
|
|
17
|
+
def _get_id(self) -> UUID:
|
|
18
18
|
return self._id
|
|
19
19
|
|
|
20
20
|
def get_name(self) -> str:
|
|
@@ -28,12 +28,7 @@ class SchemaObject:
|
|
|
28
28
|
"""
|
|
29
29
|
pass
|
|
30
30
|
|
|
31
|
-
|
|
32
|
-
def fqn(self) -> str:
|
|
33
|
-
return f'{self.parent_dir().fqn}.{self._name}'
|
|
34
|
-
|
|
35
|
-
def move(self, new_name: str, new_dir_id: UUID) -> None:
|
|
31
|
+
def _move(self, new_name: str, new_dir_id: UUID) -> None:
|
|
36
32
|
"""Subclasses need to override this to make the change persistent"""
|
|
37
33
|
self._name = new_name
|
|
38
34
|
self._dir_id = new_dir_id
|
|
39
|
-
|