pixeltable 0.2.17__py3-none-any.whl → 0.2.18__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__version__.py +2 -2
- pixeltable/catalog/catalog.py +8 -7
- pixeltable/catalog/column.py +11 -8
- pixeltable/catalog/insertable_table.py +1 -1
- pixeltable/catalog/path_dict.py +8 -6
- pixeltable/catalog/table.py +20 -13
- pixeltable/catalog/table_version.py +91 -54
- pixeltable/catalog/table_version_path.py +7 -9
- pixeltable/catalog/view.py +2 -1
- pixeltable/dataframe.py +1 -1
- pixeltable/env.py +173 -83
- pixeltable/exec/aggregation_node.py +2 -1
- pixeltable/exec/component_iteration_node.py +1 -1
- pixeltable/exec/sql_node.py +11 -8
- pixeltable/exprs/__init__.py +1 -0
- pixeltable/exprs/arithmetic_expr.py +4 -4
- pixeltable/exprs/array_slice.py +2 -1
- pixeltable/exprs/column_property_ref.py +9 -7
- pixeltable/exprs/column_ref.py +2 -1
- pixeltable/exprs/comparison.py +10 -7
- pixeltable/exprs/compound_predicate.py +3 -2
- pixeltable/exprs/data_row.py +19 -4
- pixeltable/exprs/expr.py +46 -35
- pixeltable/exprs/expr_set.py +32 -9
- pixeltable/exprs/function_call.py +56 -32
- pixeltable/exprs/in_predicate.py +3 -2
- pixeltable/exprs/inline_array.py +2 -1
- pixeltable/exprs/inline_dict.py +2 -1
- pixeltable/exprs/is_null.py +3 -2
- pixeltable/exprs/json_mapper.py +5 -4
- pixeltable/exprs/json_path.py +7 -1
- pixeltable/exprs/literal.py +34 -7
- pixeltable/exprs/method_ref.py +3 -3
- pixeltable/exprs/object_ref.py +6 -5
- pixeltable/exprs/row_builder.py +25 -17
- pixeltable/exprs/rowid_ref.py +2 -1
- pixeltable/exprs/similarity_expr.py +2 -1
- pixeltable/exprs/sql_element_cache.py +30 -0
- pixeltable/exprs/type_cast.py +3 -3
- pixeltable/exprs/variable.py +2 -1
- pixeltable/ext/functions/whisperx.py +4 -4
- pixeltable/ext/functions/yolox.py +6 -6
- pixeltable/func/aggregate_function.py +1 -0
- pixeltable/func/function.py +28 -4
- pixeltable/functions/__init__.py +4 -2
- pixeltable/functions/anthropic.py +15 -5
- pixeltable/functions/fireworks.py +1 -1
- pixeltable/functions/globals.py +6 -1
- pixeltable/functions/huggingface.py +2 -2
- pixeltable/functions/image.py +17 -2
- pixeltable/functions/json.py +5 -5
- pixeltable/functions/mistralai.py +188 -0
- pixeltable/functions/openai.py +6 -10
- pixeltable/functions/string.py +3 -2
- pixeltable/functions/timestamp.py +95 -7
- pixeltable/functions/together.py +4 -4
- pixeltable/functions/video.py +2 -2
- pixeltable/functions/vision.py +27 -17
- pixeltable/functions/whisper.py +1 -1
- pixeltable/io/hf_datasets.py +17 -15
- pixeltable/io/pandas.py +0 -2
- pixeltable/io/parquet.py +15 -14
- pixeltable/iterators/document.py +16 -15
- pixeltable/metadata/__init__.py +1 -1
- pixeltable/metadata/converters/convert_19.py +46 -0
- pixeltable/metadata/notes.py +1 -0
- pixeltable/metadata/schema.py +5 -4
- pixeltable/plan.py +100 -78
- pixeltable/store.py +5 -1
- pixeltable/tool/create_test_db_dump.py +4 -3
- pixeltable/type_system.py +12 -14
- pixeltable/utils/documents.py +45 -42
- pixeltable/utils/formatter.py +2 -2
- {pixeltable-0.2.17.dist-info → pixeltable-0.2.18.dist-info}/METADATA +79 -21
- pixeltable-0.2.18.dist-info/RECORD +147 -0
- pixeltable-0.2.17.dist-info/RECORD +0 -144
- {pixeltable-0.2.17.dist-info → pixeltable-0.2.18.dist-info}/LICENSE +0 -0
- {pixeltable-0.2.17.dist-info → pixeltable-0.2.18.dist-info}/WHEEL +0 -0
- {pixeltable-0.2.17.dist-info → pixeltable-0.2.18.dist-info}/entry_points.txt +0 -0
pixeltable/__version__.py
CHANGED
|
@@ -1,3 +1,3 @@
|
|
|
1
1
|
# These version placeholders will be replaced during build.
|
|
2
|
-
__version__ = "0.2.
|
|
3
|
-
__version_tuple__ = (0, 2,
|
|
2
|
+
__version__ = "0.2.18"
|
|
3
|
+
__version_tuple__ = (0, 2, 18)
|
pixeltable/catalog/catalog.py
CHANGED
|
@@ -1,8 +1,9 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
from uuid import UUID
|
|
2
|
+
|
|
4
3
|
import dataclasses
|
|
5
4
|
import logging
|
|
5
|
+
from typing import Optional
|
|
6
|
+
from uuid import UUID
|
|
6
7
|
|
|
7
8
|
import sqlalchemy as sql
|
|
8
9
|
import sqlalchemy.orm as orm
|
|
@@ -10,8 +11,8 @@ import sqlalchemy.orm as orm
|
|
|
10
11
|
from .table_version import TableVersion
|
|
11
12
|
from .table_version_path import TableVersionPath
|
|
12
13
|
from .table import Table
|
|
13
|
-
from .named_function import NamedFunction
|
|
14
14
|
from .path_dict import PathDict
|
|
15
|
+
|
|
15
16
|
import pixeltable.env as env
|
|
16
17
|
import pixeltable.metadata.schema as schema
|
|
17
18
|
|
|
@@ -39,10 +40,10 @@ class Catalog:
|
|
|
39
40
|
# key: [id, version]
|
|
40
41
|
# - mutable version of a table: version == None (even though TableVersion.version is set correctly)
|
|
41
42
|
# - snapshot versions: records the version of the snapshot
|
|
42
|
-
self.tbl_versions:
|
|
43
|
+
self.tbl_versions: dict[tuple[UUID, Optional[int]], TableVersion] = {}
|
|
43
44
|
|
|
44
|
-
self.tbls:
|
|
45
|
-
self.tbl_dependents:
|
|
45
|
+
self.tbls: dict[UUID, Table] = {} # don't use a defaultdict here, it doesn't cooperate with the debugger
|
|
46
|
+
self.tbl_dependents: dict[UUID, list[Table]] = {}
|
|
46
47
|
|
|
47
48
|
self._init_store()
|
|
48
49
|
self.paths = PathDict() # do this after _init_catalog()
|
|
@@ -133,7 +134,7 @@ class Catalog:
|
|
|
133
134
|
base_path=base_path if not is_snapshot else None)
|
|
134
135
|
view_path = TableVersionPath(tbl_version, base=base_path)
|
|
135
136
|
|
|
136
|
-
tbl = View(
|
|
137
|
+
tbl: Table = View(
|
|
137
138
|
tbl_record.id, tbl_record.dir_id, tbl_md.name, view_path, base_tbl_id,
|
|
138
139
|
snapshot_only=snapshot_only)
|
|
139
140
|
self.tbl_dependents[base_tbl_id].append(tbl)
|
pixeltable/catalog/column.py
CHANGED
|
@@ -1,15 +1,19 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import logging
|
|
4
|
-
from typing import Any, Callable, Optional, Union
|
|
4
|
+
from typing import TYPE_CHECKING, Any, Callable, Optional, Union
|
|
5
5
|
|
|
6
6
|
import sqlalchemy as sql
|
|
7
7
|
|
|
8
8
|
import pixeltable.exceptions as excs
|
|
9
9
|
import pixeltable.type_system as ts
|
|
10
|
+
from pixeltable import exprs
|
|
10
11
|
|
|
11
12
|
from .globals import is_valid_identifier
|
|
12
13
|
|
|
14
|
+
if TYPE_CHECKING:
|
|
15
|
+
from .table_version import TableVersion
|
|
16
|
+
|
|
13
17
|
_logger = logging.getLogger('pixeltable')
|
|
14
18
|
|
|
15
19
|
class Column:
|
|
@@ -20,7 +24,7 @@ class Column:
|
|
|
20
24
|
"""
|
|
21
25
|
def __init__(
|
|
22
26
|
self, name: Optional[str], col_type: Optional[ts.ColumnType] = None,
|
|
23
|
-
computed_with: Optional[Union[
|
|
27
|
+
computed_with: Optional[Union[exprs.Expr, Callable]] = None,
|
|
24
28
|
is_pk: bool = False, stored: bool = True,
|
|
25
29
|
col_id: Optional[int] = None, schema_version_add: Optional[int] = None,
|
|
26
30
|
schema_version_drop: Optional[int] = None, sa_col_type: Optional[sql.sqltypes.TypeEngine] = None,
|
|
@@ -57,15 +61,14 @@ class Column:
|
|
|
57
61
|
if col_type is None and computed_with is None:
|
|
58
62
|
raise excs.Error(f'Column `{name}`: col_type is required if computed_with is not specified')
|
|
59
63
|
|
|
60
|
-
self._value_expr: Optional[
|
|
64
|
+
self._value_expr: Optional[exprs.Expr] = None
|
|
61
65
|
self.compute_func: Optional[Callable] = None
|
|
62
66
|
self.value_expr_dict = value_expr_dict
|
|
63
|
-
from pixeltable import exprs
|
|
64
67
|
if computed_with is not None:
|
|
65
68
|
value_expr = exprs.Expr.from_object(computed_with)
|
|
66
69
|
if value_expr is None:
|
|
67
70
|
# computed_with needs to be a Callable
|
|
68
|
-
if not
|
|
71
|
+
if not callable(computed_with):
|
|
69
72
|
raise excs.Error(
|
|
70
73
|
f'Column {name}: computed_with needs to be either a Pixeltable expression or a Callable, '
|
|
71
74
|
f'but it is a {type(computed_with)}')
|
|
@@ -103,7 +106,7 @@ class Column:
|
|
|
103
106
|
self.tbl: Optional[TableVersion] = None # set by owning TableVersion
|
|
104
107
|
|
|
105
108
|
@property
|
|
106
|
-
def value_expr(self) -> Optional[
|
|
109
|
+
def value_expr(self) -> Optional[exprs.Expr]:
|
|
107
110
|
"""Instantiate value_expr on-demand"""
|
|
108
111
|
# TODO: instantiate expr in the c'tor and add an Expr.prepare() that can create additional state after the
|
|
109
112
|
# catalog has been fully loaded; that way, we encounter bugs in the serialization/deserialization logic earlier
|
|
@@ -112,7 +115,7 @@ class Column:
|
|
|
112
115
|
self._value_expr = exprs.Expr.from_dict(self.value_expr_dict)
|
|
113
116
|
return self._value_expr
|
|
114
117
|
|
|
115
|
-
def set_value_expr(self, value_expr:
|
|
118
|
+
def set_value_expr(self, value_expr: exprs.Expr) -> None:
|
|
116
119
|
self._value_expr = value_expr
|
|
117
120
|
self.value_expr_dict = None
|
|
118
121
|
|
|
@@ -130,7 +133,7 @@ class Column:
|
|
|
130
133
|
l = list(self.value_expr.subexprs(filter=lambda e: isinstance(e, exprs.FunctionCall) and e.is_window_fn_call))
|
|
131
134
|
return len(l) > 0
|
|
132
135
|
|
|
133
|
-
def get_idx_info(self) -> dict[str, '
|
|
136
|
+
def get_idx_info(self) -> dict[str, 'TableVersion.IndexInfo']:
|
|
134
137
|
assert self.tbl is not None
|
|
135
138
|
return {name: info for name, info in self.tbl.idxs_by_name.items() if info.col == self}
|
|
136
139
|
|
|
@@ -82,7 +82,7 @@ class InsertableTable(Table):
|
|
|
82
82
|
@overload
|
|
83
83
|
def insert(self, *, print_stats: bool = False, fail_on_exception: bool = True, **kwargs: Any) -> UpdateStatus: ...
|
|
84
84
|
|
|
85
|
-
def insert(
|
|
85
|
+
def insert( # type: ignore[misc]
|
|
86
86
|
self, rows: Optional[Iterable[dict[str, Any]]] = None, /, *, print_stats: bool = False,
|
|
87
87
|
fail_on_exception: bool = True, **kwargs: Any
|
|
88
88
|
) -> UpdateStatus:
|
pixeltable/catalog/path_dict.py
CHANGED
|
@@ -2,7 +2,7 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
import copy
|
|
4
4
|
import logging
|
|
5
|
-
from typing import Optional
|
|
5
|
+
from typing import Optional
|
|
6
6
|
from uuid import UUID
|
|
7
7
|
|
|
8
8
|
import sqlalchemy.orm as orm
|
|
@@ -10,6 +10,7 @@ import sqlalchemy.orm as orm
|
|
|
10
10
|
from pixeltable import exceptions as excs
|
|
11
11
|
from pixeltable.env import Env
|
|
12
12
|
from pixeltable.metadata import schema
|
|
13
|
+
|
|
13
14
|
from .dir import Dir
|
|
14
15
|
from .path import Path
|
|
15
16
|
from .schema_object import SchemaObject
|
|
@@ -19,8 +20,8 @@ _logger = logging.getLogger('pixeltable')
|
|
|
19
20
|
class PathDict:
|
|
20
21
|
"""Keep track of all paths in a Db instance"""
|
|
21
22
|
def __init__(self):
|
|
22
|
-
self.dir_contents:
|
|
23
|
-
self.schema_objs:
|
|
23
|
+
self.dir_contents: dict[UUID, dict[str, SchemaObject]] = {}
|
|
24
|
+
self.schema_objs: dict[UUID, SchemaObject] = {}
|
|
24
25
|
|
|
25
26
|
# load dirs
|
|
26
27
|
with orm.Session(Env.get().engine, future=True) as session:
|
|
@@ -36,7 +37,8 @@ class PathDict:
|
|
|
36
37
|
self.root_dir = root_dirs[0]
|
|
37
38
|
|
|
38
39
|
# build dir_contents
|
|
39
|
-
def record_dir(dir:
|
|
40
|
+
def record_dir(dir: SchemaObject) -> None:
|
|
41
|
+
assert isinstance(dir, Dir)
|
|
40
42
|
if dir._id in self.dir_contents:
|
|
41
43
|
return
|
|
42
44
|
else:
|
|
@@ -99,7 +101,7 @@ class PathDict:
|
|
|
99
101
|
assert to_path.name not in self.dir_contents[to_dir._id]
|
|
100
102
|
self.dir_contents[to_dir._id][to_path.name] = obj
|
|
101
103
|
|
|
102
|
-
def check_is_valid(self, path: Path, expected: Optional[
|
|
104
|
+
def check_is_valid(self, path: Path, expected: Optional[type[SchemaObject]]) -> None:
|
|
103
105
|
"""Check that path is valid and that the object at path has the expected type.
|
|
104
106
|
|
|
105
107
|
Args:
|
|
@@ -124,7 +126,7 @@ class PathDict:
|
|
|
124
126
|
obj = self.dir_contents[parent_obj._id][path.name]
|
|
125
127
|
raise excs.Error(f"{type(obj)._display_name()} '{str(path)}' already exists")
|
|
126
128
|
|
|
127
|
-
def get_children(self, parent: Path, child_type: Optional[
|
|
129
|
+
def get_children(self, parent: Path, child_type: Optional[type[SchemaObject]], recursive: bool) -> list[Path]:
|
|
128
130
|
dir = self._resolve_path(parent)
|
|
129
131
|
if not isinstance(dir, Dir):
|
|
130
132
|
raise excs.Error(f'{str(parent)} is a {type(dir)._display_name()}, not a directory')
|
pixeltable/catalog/table.py
CHANGED
|
@@ -1,14 +1,16 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import abc
|
|
4
|
+
import builtins
|
|
4
5
|
import itertools
|
|
5
6
|
import json
|
|
6
7
|
import logging
|
|
7
8
|
from pathlib import Path
|
|
8
|
-
from typing import Any, Callable, Iterable, Literal, Optional, Set, Tuple, Type, Union, overload
|
|
9
|
+
from typing import TYPE_CHECKING, Any, Callable, Iterable, Literal, Optional, Set, Tuple, Type, Union, overload
|
|
9
10
|
from uuid import UUID
|
|
10
11
|
|
|
11
12
|
import pandas as pd
|
|
13
|
+
import pandas.io.formats.style
|
|
12
14
|
import sqlalchemy as sql
|
|
13
15
|
|
|
14
16
|
import pixeltable
|
|
@@ -26,6 +28,9 @@ from .schema_object import SchemaObject
|
|
|
26
28
|
from .table_version import TableVersion
|
|
27
29
|
from .table_version_path import TableVersionPath
|
|
28
30
|
|
|
31
|
+
if TYPE_CHECKING:
|
|
32
|
+
import torch.utils.data
|
|
33
|
+
|
|
29
34
|
_logger = logging.getLogger('pixeltable')
|
|
30
35
|
|
|
31
36
|
class Table(SchemaObject):
|
|
@@ -211,23 +216,24 @@ class Table(SchemaObject):
|
|
|
211
216
|
})
|
|
212
217
|
return df
|
|
213
218
|
|
|
214
|
-
def _description_html(self) ->
|
|
219
|
+
def _description_html(self) -> pandas.io.formats.style.Styler:
|
|
215
220
|
pd_df = self._description()
|
|
216
221
|
# white-space: pre-wrap: print \n as newline
|
|
217
222
|
# th: center-align headings
|
|
218
|
-
return
|
|
219
|
-
.
|
|
223
|
+
return (
|
|
224
|
+
pd_df.style.set_properties(None, **{'white-space': 'pre-wrap', 'text-align': 'left'})
|
|
225
|
+
.set_table_styles([dict(selector='th', props=[('text-align', 'center')])])
|
|
220
226
|
.hide(axis='index')
|
|
227
|
+
)
|
|
221
228
|
|
|
222
229
|
def describe(self) -> None:
|
|
223
230
|
"""
|
|
224
231
|
Print the table schema.
|
|
225
232
|
"""
|
|
226
|
-
|
|
227
|
-
__IPYTHON__
|
|
233
|
+
if getattr(builtins, '__IPYTHON__', False):
|
|
228
234
|
from IPython.display import display
|
|
229
235
|
display(self._description_html())
|
|
230
|
-
|
|
236
|
+
else:
|
|
231
237
|
print(self.__repr__())
|
|
232
238
|
|
|
233
239
|
# TODO: Display comments in _repr_html()
|
|
@@ -240,7 +246,7 @@ class Table(SchemaObject):
|
|
|
240
246
|
return f'{self._display_name()} \'{self._name}\'\n{comment}{description_str}'
|
|
241
247
|
|
|
242
248
|
def _repr_html_(self) -> str:
|
|
243
|
-
return self._description_html()._repr_html_()
|
|
249
|
+
return self._description_html()._repr_html_() # type: ignore[attr-defined]
|
|
244
250
|
|
|
245
251
|
def _drop(self) -> None:
|
|
246
252
|
self._check_is_dropped()
|
|
@@ -282,7 +288,7 @@ class Table(SchemaObject):
|
|
|
282
288
|
raise excs.Error(f'Column name must be a string, got {type(col_name)}')
|
|
283
289
|
if not isinstance(spec, (ts.ColumnType, exprs.Expr)):
|
|
284
290
|
raise excs.Error(f'Column spec must be a ColumnType or an Expr, got {type(spec)}')
|
|
285
|
-
self.add_column(**{col_name: spec})
|
|
291
|
+
self.add_column(type=None, stored=None, print_stats=False, **{col_name: spec})
|
|
286
292
|
|
|
287
293
|
def add_column(
|
|
288
294
|
self,
|
|
@@ -368,7 +374,7 @@ class Table(SchemaObject):
|
|
|
368
374
|
col_schema['stored'] = stored
|
|
369
375
|
|
|
370
376
|
new_col = self._create_columns({col_name: col_schema})[0]
|
|
371
|
-
self._verify_column(new_col, set(self._schema.keys()), self._query_names)
|
|
377
|
+
self._verify_column(new_col, set(self._schema.keys()), set(self._query_names))
|
|
372
378
|
return self._tbl_version.add_column(new_col, print_stats=print_stats)
|
|
373
379
|
|
|
374
380
|
@classmethod
|
|
@@ -395,7 +401,7 @@ class Table(SchemaObject):
|
|
|
395
401
|
value_expr = exprs.Expr.from_object(value_spec)
|
|
396
402
|
if value_expr is None:
|
|
397
403
|
# needs to be a Callable
|
|
398
|
-
if not
|
|
404
|
+
if not callable(value_spec):
|
|
399
405
|
raise excs.Error(
|
|
400
406
|
f'Column {name}: value needs to be either a Pixeltable expression or a Callable, '
|
|
401
407
|
f'but it is a {type(value_spec)}')
|
|
@@ -427,7 +433,7 @@ class Table(SchemaObject):
|
|
|
427
433
|
elif isinstance(spec, exprs.Expr):
|
|
428
434
|
# create copy so we can modify it
|
|
429
435
|
value_expr = spec.copy()
|
|
430
|
-
elif
|
|
436
|
+
elif callable(spec):
|
|
431
437
|
raise excs.Error((
|
|
432
438
|
f'Column {name} computed with a Callable: specify using a dictionary with '
|
|
433
439
|
f'the "value" and "type" keys (e.g., "{name}": {{"value": <Callable>, "type": IntType()}})'
|
|
@@ -546,6 +552,7 @@ class Table(SchemaObject):
|
|
|
546
552
|
metric: str = 'cosine'
|
|
547
553
|
) -> None:
|
|
548
554
|
"""Add an index to the table.
|
|
555
|
+
|
|
549
556
|
Args:
|
|
550
557
|
col_name: name of column to index
|
|
551
558
|
idx_name: name of index, which needs to be unique for the table; if not provided, a name will be generated
|
|
@@ -656,7 +663,7 @@ class Table(SchemaObject):
|
|
|
656
663
|
@overload
|
|
657
664
|
def insert(self, *, print_stats: bool = False, fail_on_exception: bool = True, **kwargs: Any) -> UpdateStatus: ...
|
|
658
665
|
|
|
659
|
-
@abc.abstractmethod
|
|
666
|
+
@abc.abstractmethod # type: ignore[misc]
|
|
660
667
|
def insert(
|
|
661
668
|
self, rows: Optional[Iterable[dict[str, Any]]] = None, /, *, print_stats: bool = False,
|
|
662
669
|
fail_on_exception: bool = True, **kwargs: Any
|