pixeltable 0.2.17__py3-none-any.whl → 0.2.19__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +1 -1
- pixeltable/__version__.py +2 -2
- pixeltable/catalog/catalog.py +8 -7
- pixeltable/catalog/column.py +11 -8
- pixeltable/catalog/insertable_table.py +1 -1
- pixeltable/catalog/path_dict.py +8 -6
- pixeltable/catalog/table.py +20 -14
- pixeltable/catalog/table_version.py +92 -55
- pixeltable/catalog/table_version_path.py +7 -9
- pixeltable/catalog/view.py +3 -2
- pixeltable/dataframe.py +2 -2
- pixeltable/env.py +205 -86
- pixeltable/exceptions.py +5 -1
- pixeltable/exec/aggregation_node.py +2 -1
- pixeltable/exec/component_iteration_node.py +2 -2
- pixeltable/exec/sql_node.py +11 -8
- pixeltable/exprs/__init__.py +2 -2
- pixeltable/exprs/arithmetic_expr.py +4 -4
- pixeltable/exprs/array_slice.py +2 -1
- pixeltable/exprs/column_property_ref.py +9 -7
- pixeltable/exprs/column_ref.py +2 -1
- pixeltable/exprs/comparison.py +10 -7
- pixeltable/exprs/compound_predicate.py +3 -2
- pixeltable/exprs/data_row.py +19 -4
- pixeltable/exprs/expr.py +51 -41
- pixeltable/exprs/expr_set.py +32 -9
- pixeltable/exprs/function_call.py +62 -40
- pixeltable/exprs/in_predicate.py +3 -2
- pixeltable/exprs/inline_expr.py +200 -0
- pixeltable/exprs/is_null.py +3 -2
- pixeltable/exprs/json_mapper.py +5 -4
- pixeltable/exprs/json_path.py +7 -1
- pixeltable/exprs/literal.py +34 -7
- pixeltable/exprs/method_ref.py +3 -3
- pixeltable/exprs/object_ref.py +6 -5
- pixeltable/exprs/row_builder.py +25 -17
- pixeltable/exprs/rowid_ref.py +2 -1
- pixeltable/exprs/similarity_expr.py +2 -1
- pixeltable/exprs/sql_element_cache.py +30 -0
- pixeltable/exprs/type_cast.py +3 -3
- pixeltable/exprs/variable.py +2 -1
- pixeltable/ext/functions/whisperx.py +6 -4
- pixeltable/ext/functions/yolox.py +11 -9
- pixeltable/func/aggregate_function.py +1 -0
- pixeltable/func/function.py +28 -4
- pixeltable/functions/__init__.py +4 -2
- pixeltable/functions/anthropic.py +15 -5
- pixeltable/functions/fireworks.py +1 -1
- pixeltable/functions/globals.py +6 -1
- pixeltable/functions/huggingface.py +91 -14
- pixeltable/functions/image.py +20 -5
- pixeltable/functions/json.py +5 -5
- pixeltable/functions/mistralai.py +188 -0
- pixeltable/functions/openai.py +6 -10
- pixeltable/functions/string.py +3 -2
- pixeltable/functions/timestamp.py +95 -7
- pixeltable/functions/together.py +18 -11
- pixeltable/functions/video.py +2 -2
- pixeltable/functions/vision.py +69 -37
- pixeltable/functions/whisper.py +4 -1
- pixeltable/globals.py +5 -1
- pixeltable/io/hf_datasets.py +17 -15
- pixeltable/io/pandas.py +0 -2
- pixeltable/io/parquet.py +15 -14
- pixeltable/iterators/document.py +16 -15
- pixeltable/metadata/__init__.py +1 -1
- pixeltable/metadata/converters/convert_18.py +1 -1
- pixeltable/metadata/converters/convert_19.py +46 -0
- pixeltable/metadata/converters/convert_20.py +56 -0
- pixeltable/metadata/converters/util.py +29 -4
- pixeltable/metadata/notes.py +2 -0
- pixeltable/metadata/schema.py +5 -4
- pixeltable/plan.py +100 -78
- pixeltable/store.py +5 -1
- pixeltable/tool/create_test_db_dump.py +18 -6
- pixeltable/type_system.py +15 -15
- pixeltable/utils/documents.py +45 -42
- pixeltable/utils/formatter.py +2 -2
- pixeltable-0.2.19.dist-info/LICENSE +201 -0
- {pixeltable-0.2.17.dist-info → pixeltable-0.2.19.dist-info}/METADATA +84 -24
- pixeltable-0.2.19.dist-info/RECORD +147 -0
- pixeltable/exprs/inline_array.py +0 -116
- pixeltable/exprs/inline_dict.py +0 -103
- pixeltable-0.2.17.dist-info/LICENSE +0 -18
- pixeltable-0.2.17.dist-info/RECORD +0 -144
- {pixeltable-0.2.17.dist-info → pixeltable-0.2.19.dist-info}/WHEEL +0 -0
- {pixeltable-0.2.17.dist-info → pixeltable-0.2.19.dist-info}/entry_points.txt +0 -0
pixeltable/__init__.py
CHANGED
|
@@ -4,7 +4,7 @@ from .exceptions import Error
|
|
|
4
4
|
from .exprs import RELATIVE_PATH_ROOT
|
|
5
5
|
from .func import Function, udf, Aggregator, uda, expr_udf
|
|
6
6
|
from .globals import init, create_table, create_view, get_table, move, drop_table, list_tables, create_dir, drop_dir, \
|
|
7
|
-
list_dirs, list_functions, configure_logging
|
|
7
|
+
list_dirs, list_functions, configure_logging, array
|
|
8
8
|
from .type_system import (
|
|
9
9
|
ColumnType,
|
|
10
10
|
StringType,
|
pixeltable/__version__.py
CHANGED
|
@@ -1,3 +1,3 @@
|
|
|
1
1
|
# These version placeholders will be replaced during build.
|
|
2
|
-
__version__ = "0.2.
|
|
3
|
-
__version_tuple__ = (0, 2,
|
|
2
|
+
__version__ = "0.2.19"
|
|
3
|
+
__version_tuple__ = (0, 2, 19)
|
pixeltable/catalog/catalog.py
CHANGED
|
@@ -1,8 +1,9 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
from uuid import UUID
|
|
2
|
+
|
|
4
3
|
import dataclasses
|
|
5
4
|
import logging
|
|
5
|
+
from typing import Optional
|
|
6
|
+
from uuid import UUID
|
|
6
7
|
|
|
7
8
|
import sqlalchemy as sql
|
|
8
9
|
import sqlalchemy.orm as orm
|
|
@@ -10,8 +11,8 @@ import sqlalchemy.orm as orm
|
|
|
10
11
|
from .table_version import TableVersion
|
|
11
12
|
from .table_version_path import TableVersionPath
|
|
12
13
|
from .table import Table
|
|
13
|
-
from .named_function import NamedFunction
|
|
14
14
|
from .path_dict import PathDict
|
|
15
|
+
|
|
15
16
|
import pixeltable.env as env
|
|
16
17
|
import pixeltable.metadata.schema as schema
|
|
17
18
|
|
|
@@ -39,10 +40,10 @@ class Catalog:
|
|
|
39
40
|
# key: [id, version]
|
|
40
41
|
# - mutable version of a table: version == None (even though TableVersion.version is set correctly)
|
|
41
42
|
# - snapshot versions: records the version of the snapshot
|
|
42
|
-
self.tbl_versions:
|
|
43
|
+
self.tbl_versions: dict[tuple[UUID, Optional[int]], TableVersion] = {}
|
|
43
44
|
|
|
44
|
-
self.tbls:
|
|
45
|
-
self.tbl_dependents:
|
|
45
|
+
self.tbls: dict[UUID, Table] = {} # don't use a defaultdict here, it doesn't cooperate with the debugger
|
|
46
|
+
self.tbl_dependents: dict[UUID, list[Table]] = {}
|
|
46
47
|
|
|
47
48
|
self._init_store()
|
|
48
49
|
self.paths = PathDict() # do this after _init_catalog()
|
|
@@ -133,7 +134,7 @@ class Catalog:
|
|
|
133
134
|
base_path=base_path if not is_snapshot else None)
|
|
134
135
|
view_path = TableVersionPath(tbl_version, base=base_path)
|
|
135
136
|
|
|
136
|
-
tbl = View(
|
|
137
|
+
tbl: Table = View(
|
|
137
138
|
tbl_record.id, tbl_record.dir_id, tbl_md.name, view_path, base_tbl_id,
|
|
138
139
|
snapshot_only=snapshot_only)
|
|
139
140
|
self.tbl_dependents[base_tbl_id].append(tbl)
|
pixeltable/catalog/column.py
CHANGED
|
@@ -1,15 +1,19 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import logging
|
|
4
|
-
from typing import Any, Callable, Optional, Union
|
|
4
|
+
from typing import TYPE_CHECKING, Any, Callable, Optional, Union
|
|
5
5
|
|
|
6
6
|
import sqlalchemy as sql
|
|
7
7
|
|
|
8
8
|
import pixeltable.exceptions as excs
|
|
9
9
|
import pixeltable.type_system as ts
|
|
10
|
+
from pixeltable import exprs
|
|
10
11
|
|
|
11
12
|
from .globals import is_valid_identifier
|
|
12
13
|
|
|
14
|
+
if TYPE_CHECKING:
|
|
15
|
+
from .table_version import TableVersion
|
|
16
|
+
|
|
13
17
|
_logger = logging.getLogger('pixeltable')
|
|
14
18
|
|
|
15
19
|
class Column:
|
|
@@ -20,7 +24,7 @@ class Column:
|
|
|
20
24
|
"""
|
|
21
25
|
def __init__(
|
|
22
26
|
self, name: Optional[str], col_type: Optional[ts.ColumnType] = None,
|
|
23
|
-
computed_with: Optional[Union[
|
|
27
|
+
computed_with: Optional[Union[exprs.Expr, Callable]] = None,
|
|
24
28
|
is_pk: bool = False, stored: bool = True,
|
|
25
29
|
col_id: Optional[int] = None, schema_version_add: Optional[int] = None,
|
|
26
30
|
schema_version_drop: Optional[int] = None, sa_col_type: Optional[sql.sqltypes.TypeEngine] = None,
|
|
@@ -57,15 +61,14 @@ class Column:
|
|
|
57
61
|
if col_type is None and computed_with is None:
|
|
58
62
|
raise excs.Error(f'Column `{name}`: col_type is required if computed_with is not specified')
|
|
59
63
|
|
|
60
|
-
self._value_expr: Optional[
|
|
64
|
+
self._value_expr: Optional[exprs.Expr] = None
|
|
61
65
|
self.compute_func: Optional[Callable] = None
|
|
62
66
|
self.value_expr_dict = value_expr_dict
|
|
63
|
-
from pixeltable import exprs
|
|
64
67
|
if computed_with is not None:
|
|
65
68
|
value_expr = exprs.Expr.from_object(computed_with)
|
|
66
69
|
if value_expr is None:
|
|
67
70
|
# computed_with needs to be a Callable
|
|
68
|
-
if not
|
|
71
|
+
if not callable(computed_with):
|
|
69
72
|
raise excs.Error(
|
|
70
73
|
f'Column {name}: computed_with needs to be either a Pixeltable expression or a Callable, '
|
|
71
74
|
f'but it is a {type(computed_with)}')
|
|
@@ -103,7 +106,7 @@ class Column:
|
|
|
103
106
|
self.tbl: Optional[TableVersion] = None # set by owning TableVersion
|
|
104
107
|
|
|
105
108
|
@property
|
|
106
|
-
def value_expr(self) -> Optional[
|
|
109
|
+
def value_expr(self) -> Optional[exprs.Expr]:
|
|
107
110
|
"""Instantiate value_expr on-demand"""
|
|
108
111
|
# TODO: instantiate expr in the c'tor and add an Expr.prepare() that can create additional state after the
|
|
109
112
|
# catalog has been fully loaded; that way, we encounter bugs in the serialization/deserialization logic earlier
|
|
@@ -112,7 +115,7 @@ class Column:
|
|
|
112
115
|
self._value_expr = exprs.Expr.from_dict(self.value_expr_dict)
|
|
113
116
|
return self._value_expr
|
|
114
117
|
|
|
115
|
-
def set_value_expr(self, value_expr:
|
|
118
|
+
def set_value_expr(self, value_expr: exprs.Expr) -> None:
|
|
116
119
|
self._value_expr = value_expr
|
|
117
120
|
self.value_expr_dict = None
|
|
118
121
|
|
|
@@ -130,7 +133,7 @@ class Column:
|
|
|
130
133
|
l = list(self.value_expr.subexprs(filter=lambda e: isinstance(e, exprs.FunctionCall) and e.is_window_fn_call))
|
|
131
134
|
return len(l) > 0
|
|
132
135
|
|
|
133
|
-
def get_idx_info(self) -> dict[str, '
|
|
136
|
+
def get_idx_info(self) -> dict[str, 'TableVersion.IndexInfo']:
|
|
134
137
|
assert self.tbl is not None
|
|
135
138
|
return {name: info for name, info in self.tbl.idxs_by_name.items() if info.col == self}
|
|
136
139
|
|
|
@@ -82,7 +82,7 @@ class InsertableTable(Table):
|
|
|
82
82
|
@overload
|
|
83
83
|
def insert(self, *, print_stats: bool = False, fail_on_exception: bool = True, **kwargs: Any) -> UpdateStatus: ...
|
|
84
84
|
|
|
85
|
-
def insert(
|
|
85
|
+
def insert( # type: ignore[misc]
|
|
86
86
|
self, rows: Optional[Iterable[dict[str, Any]]] = None, /, *, print_stats: bool = False,
|
|
87
87
|
fail_on_exception: bool = True, **kwargs: Any
|
|
88
88
|
) -> UpdateStatus:
|
pixeltable/catalog/path_dict.py
CHANGED
|
@@ -2,7 +2,7 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
import copy
|
|
4
4
|
import logging
|
|
5
|
-
from typing import Optional
|
|
5
|
+
from typing import Optional
|
|
6
6
|
from uuid import UUID
|
|
7
7
|
|
|
8
8
|
import sqlalchemy.orm as orm
|
|
@@ -10,6 +10,7 @@ import sqlalchemy.orm as orm
|
|
|
10
10
|
from pixeltable import exceptions as excs
|
|
11
11
|
from pixeltable.env import Env
|
|
12
12
|
from pixeltable.metadata import schema
|
|
13
|
+
|
|
13
14
|
from .dir import Dir
|
|
14
15
|
from .path import Path
|
|
15
16
|
from .schema_object import SchemaObject
|
|
@@ -19,8 +20,8 @@ _logger = logging.getLogger('pixeltable')
|
|
|
19
20
|
class PathDict:
|
|
20
21
|
"""Keep track of all paths in a Db instance"""
|
|
21
22
|
def __init__(self):
|
|
22
|
-
self.dir_contents:
|
|
23
|
-
self.schema_objs:
|
|
23
|
+
self.dir_contents: dict[UUID, dict[str, SchemaObject]] = {}
|
|
24
|
+
self.schema_objs: dict[UUID, SchemaObject] = {}
|
|
24
25
|
|
|
25
26
|
# load dirs
|
|
26
27
|
with orm.Session(Env.get().engine, future=True) as session:
|
|
@@ -36,7 +37,8 @@ class PathDict:
|
|
|
36
37
|
self.root_dir = root_dirs[0]
|
|
37
38
|
|
|
38
39
|
# build dir_contents
|
|
39
|
-
def record_dir(dir:
|
|
40
|
+
def record_dir(dir: SchemaObject) -> None:
|
|
41
|
+
assert isinstance(dir, Dir)
|
|
40
42
|
if dir._id in self.dir_contents:
|
|
41
43
|
return
|
|
42
44
|
else:
|
|
@@ -99,7 +101,7 @@ class PathDict:
|
|
|
99
101
|
assert to_path.name not in self.dir_contents[to_dir._id]
|
|
100
102
|
self.dir_contents[to_dir._id][to_path.name] = obj
|
|
101
103
|
|
|
102
|
-
def check_is_valid(self, path: Path, expected: Optional[
|
|
104
|
+
def check_is_valid(self, path: Path, expected: Optional[type[SchemaObject]]) -> None:
|
|
103
105
|
"""Check that path is valid and that the object at path has the expected type.
|
|
104
106
|
|
|
105
107
|
Args:
|
|
@@ -124,7 +126,7 @@ class PathDict:
|
|
|
124
126
|
obj = self.dir_contents[parent_obj._id][path.name]
|
|
125
127
|
raise excs.Error(f"{type(obj)._display_name()} '{str(path)}' already exists")
|
|
126
128
|
|
|
127
|
-
def get_children(self, parent: Path, child_type: Optional[
|
|
129
|
+
def get_children(self, parent: Path, child_type: Optional[type[SchemaObject]], recursive: bool) -> list[Path]:
|
|
128
130
|
dir = self._resolve_path(parent)
|
|
129
131
|
if not isinstance(dir, Dir):
|
|
130
132
|
raise excs.Error(f'{str(parent)} is a {type(dir)._display_name()}, not a directory')
|
pixeltable/catalog/table.py
CHANGED
|
@@ -1,14 +1,15 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import abc
|
|
4
|
-
import
|
|
4
|
+
import builtins
|
|
5
5
|
import json
|
|
6
6
|
import logging
|
|
7
7
|
from pathlib import Path
|
|
8
|
-
from typing import Any, Callable, Iterable, Literal, Optional, Set, Tuple, Type, Union, overload
|
|
8
|
+
from typing import TYPE_CHECKING, Any, Callable, Iterable, Literal, Optional, Set, Tuple, Type, Union, overload
|
|
9
9
|
from uuid import UUID
|
|
10
10
|
|
|
11
11
|
import pandas as pd
|
|
12
|
+
import pandas.io.formats.style
|
|
12
13
|
import sqlalchemy as sql
|
|
13
14
|
|
|
14
15
|
import pixeltable
|
|
@@ -26,6 +27,9 @@ from .schema_object import SchemaObject
|
|
|
26
27
|
from .table_version import TableVersion
|
|
27
28
|
from .table_version_path import TableVersionPath
|
|
28
29
|
|
|
30
|
+
if TYPE_CHECKING:
|
|
31
|
+
import torch.utils.data
|
|
32
|
+
|
|
29
33
|
_logger = logging.getLogger('pixeltable')
|
|
30
34
|
|
|
31
35
|
class Table(SchemaObject):
|
|
@@ -211,23 +215,24 @@ class Table(SchemaObject):
|
|
|
211
215
|
})
|
|
212
216
|
return df
|
|
213
217
|
|
|
214
|
-
def _description_html(self) ->
|
|
218
|
+
def _description_html(self) -> pandas.io.formats.style.Styler:
|
|
215
219
|
pd_df = self._description()
|
|
216
220
|
# white-space: pre-wrap: print \n as newline
|
|
217
221
|
# th: center-align headings
|
|
218
|
-
return
|
|
219
|
-
.
|
|
222
|
+
return (
|
|
223
|
+
pd_df.style.set_properties(None, **{'white-space': 'pre-wrap', 'text-align': 'left'})
|
|
224
|
+
.set_table_styles([dict(selector='th', props=[('text-align', 'center')])])
|
|
220
225
|
.hide(axis='index')
|
|
226
|
+
)
|
|
221
227
|
|
|
222
228
|
def describe(self) -> None:
|
|
223
229
|
"""
|
|
224
230
|
Print the table schema.
|
|
225
231
|
"""
|
|
226
|
-
|
|
227
|
-
__IPYTHON__
|
|
232
|
+
if getattr(builtins, '__IPYTHON__', False):
|
|
228
233
|
from IPython.display import display
|
|
229
234
|
display(self._description_html())
|
|
230
|
-
|
|
235
|
+
else:
|
|
231
236
|
print(self.__repr__())
|
|
232
237
|
|
|
233
238
|
# TODO: Display comments in _repr_html()
|
|
@@ -240,7 +245,7 @@ class Table(SchemaObject):
|
|
|
240
245
|
return f'{self._display_name()} \'{self._name}\'\n{comment}{description_str}'
|
|
241
246
|
|
|
242
247
|
def _repr_html_(self) -> str:
|
|
243
|
-
return self._description_html()._repr_html_()
|
|
248
|
+
return self._description_html()._repr_html_() # type: ignore[attr-defined]
|
|
244
249
|
|
|
245
250
|
def _drop(self) -> None:
|
|
246
251
|
self._check_is_dropped()
|
|
@@ -282,7 +287,7 @@ class Table(SchemaObject):
|
|
|
282
287
|
raise excs.Error(f'Column name must be a string, got {type(col_name)}')
|
|
283
288
|
if not isinstance(spec, (ts.ColumnType, exprs.Expr)):
|
|
284
289
|
raise excs.Error(f'Column spec must be a ColumnType or an Expr, got {type(spec)}')
|
|
285
|
-
self.add_column(**{col_name: spec})
|
|
290
|
+
self.add_column(type=None, stored=None, print_stats=False, **{col_name: spec})
|
|
286
291
|
|
|
287
292
|
def add_column(
|
|
288
293
|
self,
|
|
@@ -368,7 +373,7 @@ class Table(SchemaObject):
|
|
|
368
373
|
col_schema['stored'] = stored
|
|
369
374
|
|
|
370
375
|
new_col = self._create_columns({col_name: col_schema})[0]
|
|
371
|
-
self._verify_column(new_col, set(self._schema.keys()), self._query_names)
|
|
376
|
+
self._verify_column(new_col, set(self._schema.keys()), set(self._query_names))
|
|
372
377
|
return self._tbl_version.add_column(new_col, print_stats=print_stats)
|
|
373
378
|
|
|
374
379
|
@classmethod
|
|
@@ -395,7 +400,7 @@ class Table(SchemaObject):
|
|
|
395
400
|
value_expr = exprs.Expr.from_object(value_spec)
|
|
396
401
|
if value_expr is None:
|
|
397
402
|
# needs to be a Callable
|
|
398
|
-
if not
|
|
403
|
+
if not callable(value_spec):
|
|
399
404
|
raise excs.Error(
|
|
400
405
|
f'Column {name}: value needs to be either a Pixeltable expression or a Callable, '
|
|
401
406
|
f'but it is a {type(value_spec)}')
|
|
@@ -427,7 +432,7 @@ class Table(SchemaObject):
|
|
|
427
432
|
elif isinstance(spec, exprs.Expr):
|
|
428
433
|
# create copy so we can modify it
|
|
429
434
|
value_expr = spec.copy()
|
|
430
|
-
elif
|
|
435
|
+
elif callable(spec):
|
|
431
436
|
raise excs.Error((
|
|
432
437
|
f'Column {name} computed with a Callable: specify using a dictionary with '
|
|
433
438
|
f'the "value" and "type" keys (e.g., "{name}": {{"value": <Callable>, "type": IntType()}})'
|
|
@@ -546,6 +551,7 @@ class Table(SchemaObject):
|
|
|
546
551
|
metric: str = 'cosine'
|
|
547
552
|
) -> None:
|
|
548
553
|
"""Add an index to the table.
|
|
554
|
+
|
|
549
555
|
Args:
|
|
550
556
|
col_name: name of column to index
|
|
551
557
|
idx_name: name of index, which needs to be unique for the table; if not provided, a name will be generated
|
|
@@ -656,7 +662,7 @@ class Table(SchemaObject):
|
|
|
656
662
|
@overload
|
|
657
663
|
def insert(self, *, print_stats: bool = False, fail_on_exception: bool = True, **kwargs: Any) -> UpdateStatus: ...
|
|
658
664
|
|
|
659
|
-
@abc.abstractmethod
|
|
665
|
+
@abc.abstractmethod # type: ignore[misc]
|
|
660
666
|
def insert(
|
|
661
667
|
self, rows: Optional[Iterable[dict[str, Any]]] = None, /, *, print_stats: bool = False,
|
|
662
668
|
fail_on_exception: bool = True, **kwargs: Any
|