pixeltable 0.2.20__py3-none-any.whl → 0.2.22__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +7 -19
- pixeltable/__version__.py +2 -2
- pixeltable/catalog/__init__.py +7 -7
- pixeltable/catalog/column.py +37 -11
- pixeltable/catalog/globals.py +21 -0
- pixeltable/catalog/insertable_table.py +6 -4
- pixeltable/catalog/table.py +227 -148
- pixeltable/catalog/table_version.py +66 -28
- pixeltable/catalog/table_version_path.py +0 -8
- pixeltable/catalog/view.py +18 -19
- pixeltable/dataframe.py +16 -32
- pixeltable/env.py +6 -1
- pixeltable/exec/__init__.py +1 -2
- pixeltable/exec/aggregation_node.py +27 -17
- pixeltable/exec/cache_prefetch_node.py +1 -1
- pixeltable/exec/data_row_batch.py +9 -26
- pixeltable/exec/exec_node.py +36 -7
- pixeltable/exec/expr_eval_node.py +19 -11
- pixeltable/exec/in_memory_data_node.py +14 -11
- pixeltable/exec/sql_node.py +266 -138
- pixeltable/exprs/__init__.py +1 -0
- pixeltable/exprs/arithmetic_expr.py +3 -1
- pixeltable/exprs/array_slice.py +7 -7
- pixeltable/exprs/column_property_ref.py +37 -10
- pixeltable/exprs/column_ref.py +93 -14
- pixeltable/exprs/comparison.py +5 -5
- pixeltable/exprs/compound_predicate.py +8 -7
- pixeltable/exprs/data_row.py +56 -36
- pixeltable/exprs/expr.py +65 -63
- pixeltable/exprs/expr_dict.py +55 -0
- pixeltable/exprs/expr_set.py +26 -15
- pixeltable/exprs/function_call.py +53 -24
- pixeltable/exprs/globals.py +4 -1
- pixeltable/exprs/in_predicate.py +8 -7
- pixeltable/exprs/inline_expr.py +4 -4
- pixeltable/exprs/is_null.py +4 -4
- pixeltable/exprs/json_mapper.py +11 -12
- pixeltable/exprs/json_path.py +5 -10
- pixeltable/exprs/literal.py +5 -5
- pixeltable/exprs/method_ref.py +5 -4
- pixeltable/exprs/object_ref.py +2 -1
- pixeltable/exprs/row_builder.py +88 -36
- pixeltable/exprs/rowid_ref.py +14 -13
- pixeltable/exprs/similarity_expr.py +12 -7
- pixeltable/exprs/sql_element_cache.py +12 -6
- pixeltable/exprs/type_cast.py +8 -6
- pixeltable/exprs/variable.py +5 -4
- pixeltable/ext/functions/whisperx.py +7 -2
- pixeltable/func/aggregate_function.py +1 -1
- pixeltable/func/callable_function.py +2 -2
- pixeltable/func/function.py +11 -10
- pixeltable/func/function_registry.py +6 -7
- pixeltable/func/query_template_function.py +11 -12
- pixeltable/func/signature.py +17 -15
- pixeltable/func/udf.py +0 -4
- pixeltable/functions/__init__.py +2 -2
- pixeltable/functions/audio.py +4 -6
- pixeltable/functions/globals.py +84 -42
- pixeltable/functions/huggingface.py +31 -34
- pixeltable/functions/image.py +59 -45
- pixeltable/functions/json.py +0 -1
- pixeltable/functions/llama_cpp.py +106 -0
- pixeltable/functions/mistralai.py +2 -2
- pixeltable/functions/ollama.py +147 -0
- pixeltable/functions/openai.py +22 -25
- pixeltable/functions/replicate.py +72 -0
- pixeltable/functions/string.py +59 -50
- pixeltable/functions/timestamp.py +20 -20
- pixeltable/functions/together.py +2 -2
- pixeltable/functions/video.py +11 -20
- pixeltable/functions/whisper.py +2 -20
- pixeltable/globals.py +65 -74
- pixeltable/index/base.py +2 -2
- pixeltable/index/btree.py +20 -7
- pixeltable/index/embedding_index.py +12 -14
- pixeltable/io/__init__.py +1 -2
- pixeltable/io/external_store.py +11 -5
- pixeltable/io/fiftyone.py +178 -0
- pixeltable/io/globals.py +98 -2
- pixeltable/io/hf_datasets.py +1 -1
- pixeltable/io/label_studio.py +6 -6
- pixeltable/io/parquet.py +14 -13
- pixeltable/iterators/base.py +3 -2
- pixeltable/iterators/document.py +10 -8
- pixeltable/iterators/video.py +126 -60
- pixeltable/metadata/__init__.py +4 -3
- pixeltable/metadata/converters/convert_14.py +4 -2
- pixeltable/metadata/converters/convert_15.py +1 -1
- pixeltable/metadata/converters/convert_19.py +1 -0
- pixeltable/metadata/converters/convert_20.py +1 -1
- pixeltable/metadata/converters/convert_21.py +34 -0
- pixeltable/metadata/converters/util.py +54 -12
- pixeltable/metadata/notes.py +1 -0
- pixeltable/metadata/schema.py +40 -21
- pixeltable/plan.py +149 -165
- pixeltable/py.typed +0 -0
- pixeltable/store.py +57 -37
- pixeltable/tool/create_test_db_dump.py +6 -6
- pixeltable/tool/create_test_video.py +1 -1
- pixeltable/tool/doc_plugins/griffe.py +3 -34
- pixeltable/tool/embed_udf.py +1 -1
- pixeltable/tool/mypy_plugin.py +55 -0
- pixeltable/type_system.py +260 -61
- pixeltable/utils/arrow.py +10 -9
- pixeltable/utils/coco.py +4 -4
- pixeltable/utils/documents.py +16 -2
- pixeltable/utils/filecache.py +9 -9
- pixeltable/utils/formatter.py +10 -11
- pixeltable/utils/http_server.py +2 -5
- pixeltable/utils/media_store.py +6 -6
- pixeltable/utils/pytorch.py +10 -11
- pixeltable/utils/sql.py +2 -1
- {pixeltable-0.2.20.dist-info → pixeltable-0.2.22.dist-info}/METADATA +50 -13
- pixeltable-0.2.22.dist-info/RECORD +153 -0
- pixeltable/exec/media_validation_node.py +0 -43
- pixeltable/utils/help.py +0 -11
- pixeltable-0.2.20.dist-info/RECORD +0 -147
- {pixeltable-0.2.20.dist-info → pixeltable-0.2.22.dist-info}/LICENSE +0 -0
- {pixeltable-0.2.20.dist-info → pixeltable-0.2.22.dist-info}/WHEEL +0 -0
- {pixeltable-0.2.20.dist-info → pixeltable-0.2.22.dist-info}/entry_points.txt +0 -0
pixeltable/exprs/rowid_ref.py
CHANGED
|
@@ -1,15 +1,17 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
from
|
|
2
|
+
|
|
3
|
+
from typing import Any, Optional
|
|
4
4
|
from uuid import UUID
|
|
5
5
|
|
|
6
6
|
import sqlalchemy as sql
|
|
7
7
|
|
|
8
|
-
|
|
8
|
+
import pixeltable.catalog as catalog
|
|
9
|
+
import pixeltable.type_system as ts
|
|
10
|
+
|
|
9
11
|
from .data_row import DataRow
|
|
12
|
+
from .expr import Expr
|
|
10
13
|
from .row_builder import RowBuilder
|
|
11
|
-
|
|
12
|
-
import pixeltable.catalog as catalog
|
|
14
|
+
from .sql_element_cache import SqlElementCache
|
|
13
15
|
|
|
14
16
|
|
|
15
17
|
class RowidRef(Expr):
|
|
@@ -49,14 +51,14 @@ class RowidRef(Expr):
|
|
|
49
51
|
return self.normalized_base_id == other.normalized_base_id \
|
|
50
52
|
and self.rowid_component_idx == other.rowid_component_idx
|
|
51
53
|
|
|
52
|
-
def _id_attrs(self) ->
|
|
54
|
+
def _id_attrs(self) -> list[tuple[str, Any]]:
|
|
53
55
|
return super()._id_attrs() +\
|
|
54
56
|
[('normalized_base_id', self.normalized_base_id), ('idx', self.rowid_component_idx)]
|
|
55
57
|
|
|
56
58
|
def __str__(self) -> str:
|
|
57
59
|
# check if this is the pos column of a component view
|
|
58
60
|
tbl = self.tbl if self.tbl is not None else catalog.Catalog.get().tbl_versions[(self.tbl_id, None)]
|
|
59
|
-
if tbl.is_component_view() and self.rowid_component_idx == tbl.store_tbl.pos_col_idx:
|
|
61
|
+
if tbl.is_component_view() and self.rowid_component_idx == tbl.store_tbl.pos_col_idx: # type: ignore[attr-defined]
|
|
60
62
|
return catalog.globals._POS_COLUMN_NAME
|
|
61
63
|
return ''
|
|
62
64
|
|
|
@@ -68,12 +70,12 @@ class RowidRef(Expr):
|
|
|
68
70
|
"""
|
|
69
71
|
if self.tbl_id == tbl.tbl_version.id:
|
|
70
72
|
return
|
|
71
|
-
|
|
72
|
-
assert self.tbl_id in
|
|
73
|
+
base_ids = [tbl_version.id for tbl_version in tbl.get_tbl_versions()]
|
|
74
|
+
assert self.tbl_id in base_ids # our current TableVersion is a base of the new TableVersion
|
|
73
75
|
self.tbl = tbl.tbl_version
|
|
74
76
|
self.tbl_id = self.tbl.id
|
|
75
77
|
|
|
76
|
-
def sql_expr(self, _: SqlElementCache) -> Optional[sql.
|
|
78
|
+
def sql_expr(self, _: SqlElementCache) -> Optional[sql.ColumnElement]:
|
|
77
79
|
tbl = self.tbl if self.tbl is not None else catalog.Catalog.get().tbl_versions[(self.tbl_id, None)]
|
|
78
80
|
rowid_cols = tbl.store_tbl.rowid_columns()
|
|
79
81
|
return rowid_cols[self.rowid_component_idx]
|
|
@@ -81,7 +83,7 @@ class RowidRef(Expr):
|
|
|
81
83
|
def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
|
|
82
84
|
data_row[self.slot_idx] = data_row.pk[self.rowid_component_idx]
|
|
83
85
|
|
|
84
|
-
def _as_dict(self) ->
|
|
86
|
+
def _as_dict(self) -> dict:
|
|
85
87
|
return {
|
|
86
88
|
'tbl_id': str(self.tbl_id),
|
|
87
89
|
'normalized_base_id': str(self.normalized_base_id),
|
|
@@ -89,7 +91,6 @@ class RowidRef(Expr):
|
|
|
89
91
|
}
|
|
90
92
|
|
|
91
93
|
@classmethod
|
|
92
|
-
def _from_dict(cls, d:
|
|
94
|
+
def _from_dict(cls, d: dict, components: list[Expr]) -> RowidRef:
|
|
93
95
|
tbl_id, normalized_base_id, idx = UUID(d['tbl_id']), UUID(d['normalized_base_id']), d['idx']
|
|
94
96
|
return cls(tbl=None, idx=idx, tbl_id=tbl_id, normalized_base_id=normalized_base_id)
|
|
95
|
-
|
|
@@ -1,16 +1,16 @@
|
|
|
1
|
-
from typing import
|
|
2
|
-
from .sql_element_cache import SqlElementCache
|
|
1
|
+
from typing import Any, Optional
|
|
3
2
|
|
|
4
3
|
import sqlalchemy as sql
|
|
5
|
-
import PIL.Image
|
|
6
4
|
|
|
7
5
|
import pixeltable.exceptions as excs
|
|
8
6
|
import pixeltable.type_system as ts
|
|
7
|
+
|
|
9
8
|
from .column_ref import ColumnRef
|
|
10
9
|
from .data_row import DataRow
|
|
11
10
|
from .expr import Expr
|
|
12
11
|
from .literal import Literal
|
|
13
12
|
from .row_builder import RowBuilder
|
|
13
|
+
from .sql_element_cache import SqlElementCache
|
|
14
14
|
|
|
15
15
|
|
|
16
16
|
class SimilarityExpr(Expr):
|
|
@@ -27,7 +27,7 @@ class SimilarityExpr(Expr):
|
|
|
27
27
|
|
|
28
28
|
# determine index to use
|
|
29
29
|
idx_info = col_ref.col.get_idx_info()
|
|
30
|
-
|
|
30
|
+
from pixeltable import index
|
|
31
31
|
embedding_idx_info = {
|
|
32
32
|
info.name: info for info in idx_info.values() if isinstance(info.idx, index.EmbeddingIndex)
|
|
33
33
|
}
|
|
@@ -44,6 +44,7 @@ class SimilarityExpr(Expr):
|
|
|
44
44
|
else:
|
|
45
45
|
self.idx_info = next(iter(embedding_idx_info.values()))
|
|
46
46
|
idx = self.idx_info.idx
|
|
47
|
+
assert isinstance(idx, index.EmbeddingIndex)
|
|
47
48
|
|
|
48
49
|
if item_expr.col_type.is_string_type() and idx.string_embed is None:
|
|
49
50
|
raise excs.Error(
|
|
@@ -57,16 +58,20 @@ class SimilarityExpr(Expr):
|
|
|
57
58
|
def __str__(self) -> str:
|
|
58
59
|
return f'{self.components[0]}.similarity({self.components[1]})'
|
|
59
60
|
|
|
60
|
-
def sql_expr(self, _: SqlElementCache) -> Optional[sql.
|
|
61
|
+
def sql_expr(self, _: SqlElementCache) -> Optional[sql.ColumnElement]:
|
|
61
62
|
if not isinstance(self.components[1], Literal):
|
|
62
63
|
raise excs.Error(f'similarity(): requires a string or a PIL.Image.Image object, not an expression')
|
|
63
64
|
item = self.components[1].val
|
|
65
|
+
from pixeltable import index
|
|
66
|
+
assert isinstance(self.idx_info.idx, index.EmbeddingIndex)
|
|
64
67
|
return self.idx_info.idx.similarity_clause(self.idx_info.val_col, item)
|
|
65
68
|
|
|
66
|
-
def as_order_by_clause(self, is_asc: bool) -> Optional[sql.
|
|
69
|
+
def as_order_by_clause(self, is_asc: bool) -> Optional[sql.ColumnElement]:
|
|
67
70
|
if not isinstance(self.components[1], Literal):
|
|
68
71
|
raise excs.Error(f'similarity(): requires a string or a PIL.Image.Image object, not an expression')
|
|
69
72
|
item = self.components[1].val
|
|
73
|
+
from pixeltable import index
|
|
74
|
+
assert isinstance(self.idx_info.idx, index.EmbeddingIndex)
|
|
70
75
|
return self.idx_info.idx.order_by_clause(self.idx_info.val_col, item, is_asc)
|
|
71
76
|
|
|
72
77
|
def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
|
|
@@ -74,7 +79,7 @@ class SimilarityExpr(Expr):
|
|
|
74
79
|
assert False
|
|
75
80
|
|
|
76
81
|
@classmethod
|
|
77
|
-
def _from_dict(cls, d: dict, components:
|
|
82
|
+
def _from_dict(cls, d: dict, components: list[Expr]) -> 'SimilarityExpr':
|
|
78
83
|
assert len(components) == 2
|
|
79
84
|
assert isinstance(components[0], ColumnRef)
|
|
80
85
|
return cls(components[0], components[1])
|
|
@@ -1,8 +1,9 @@
|
|
|
1
|
-
from typing import Iterable, Union, Optional
|
|
1
|
+
from typing import Iterable, Union, Optional, cast
|
|
2
2
|
|
|
3
3
|
import sqlalchemy as sql
|
|
4
4
|
|
|
5
5
|
from .expr import Expr
|
|
6
|
+
from .expr_dict import ExprDict
|
|
6
7
|
|
|
7
8
|
|
|
8
9
|
class SqlElementCache:
|
|
@@ -10,8 +11,11 @@ class SqlElementCache:
|
|
|
10
11
|
|
|
11
12
|
cache: dict[int, Optional[sql.ColumnElement]] # key: Expr.id
|
|
12
13
|
|
|
13
|
-
def __init__(self):
|
|
14
|
+
def __init__(self, elements: Optional[ExprDict[sql.ColumnElement]] = None):
|
|
14
15
|
self.cache = {}
|
|
16
|
+
if elements is not None:
|
|
17
|
+
for e, el in elements.items():
|
|
18
|
+
self.cache[e.id] = el
|
|
15
19
|
|
|
16
20
|
def get(self, e: Expr) -> Optional[sql.ColumnElement]:
|
|
17
21
|
"""Returns the sql.ColumnElement for the given Expr, or None if Expr.to_sql() returns None."""
|
|
@@ -23,8 +27,10 @@ class SqlElementCache:
|
|
|
23
27
|
self.cache[e.id] = el
|
|
24
28
|
return el
|
|
25
29
|
|
|
26
|
-
def contains(self,
|
|
27
|
-
"""Returns True if
|
|
28
|
-
|
|
29
|
-
|
|
30
|
+
def contains(self, item: Expr) -> bool:
|
|
31
|
+
"""Returns True if the cache contains a (non-None) value for the given Expr."""
|
|
32
|
+
return self.get(item) is not None
|
|
33
|
+
|
|
34
|
+
def contains_all(self, items: Iterable[Expr]) -> bool:
|
|
35
|
+
"""Returns True if the cache contains a (non-None) value for every item in the collection of Exprs."""
|
|
30
36
|
return all(self.get(e) is not None for e in items)
|
pixeltable/exprs/type_cast.py
CHANGED
|
@@ -1,8 +1,9 @@
|
|
|
1
|
-
from typing import
|
|
1
|
+
from typing import Any, Optional
|
|
2
2
|
|
|
3
3
|
import sqlalchemy as sql
|
|
4
4
|
|
|
5
5
|
import pixeltable.type_system as ts
|
|
6
|
+
|
|
6
7
|
from .expr import DataRow, Expr
|
|
7
8
|
from .row_builder import RowBuilder
|
|
8
9
|
from .sql_element_cache import SqlElementCache
|
|
@@ -15,7 +16,7 @@ class TypeCast(Expr):
|
|
|
15
16
|
"""
|
|
16
17
|
def __init__(self, underlying: Expr, new_type: ts.ColumnType):
|
|
17
18
|
super().__init__(new_type)
|
|
18
|
-
self.components:
|
|
19
|
+
self.components: list[Expr] = [underlying]
|
|
19
20
|
self.id: Optional[int] = self._create_id()
|
|
20
21
|
|
|
21
22
|
@property
|
|
@@ -26,10 +27,10 @@ class TypeCast(Expr):
|
|
|
26
27
|
# `TypeCast` has no properties beyond those captured by `Expr`.
|
|
27
28
|
return True
|
|
28
29
|
|
|
29
|
-
def _id_attrs(self) ->
|
|
30
|
+
def _id_attrs(self) -> list[tuple[str, Any]]:
|
|
30
31
|
return super()._id_attrs() + [('new_type', self.col_type)]
|
|
31
32
|
|
|
32
|
-
def sql_expr(self, _: SqlElementCache) -> Optional[sql.
|
|
33
|
+
def sql_expr(self, _: SqlElementCache) -> Optional[sql.ColumnElement]:
|
|
33
34
|
"""
|
|
34
35
|
sql_expr() is unimplemented for now, in order to sidestep potentially thorny
|
|
35
36
|
questions about consistency of doing type conversions in both Python and Postgres.
|
|
@@ -40,11 +41,12 @@ class TypeCast(Expr):
|
|
|
40
41
|
original_val = data_row[self._underlying.slot_idx]
|
|
41
42
|
data_row[self.slot_idx] = self.col_type.create_literal(original_val)
|
|
42
43
|
|
|
43
|
-
|
|
44
|
+
|
|
45
|
+
def _as_dict(self) -> dict:
|
|
44
46
|
return {'new_type': self.col_type.as_dict(), **super()._as_dict()}
|
|
45
47
|
|
|
46
48
|
@classmethod
|
|
47
|
-
def _from_dict(cls, d:
|
|
49
|
+
def _from_dict(cls, d: dict, components: list[Expr]) -> 'TypeCast':
|
|
48
50
|
assert 'new_type' in d
|
|
49
51
|
assert len(components) == 1
|
|
50
52
|
return cls(components[0], ts.ColumnType.from_dict(d['new_type']))
|
pixeltable/exprs/variable.py
CHANGED
|
@@ -1,8 +1,9 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
from typing import
|
|
3
|
+
from typing import Any, NoReturn
|
|
4
4
|
|
|
5
5
|
import pixeltable.type_system as ts
|
|
6
|
+
|
|
6
7
|
from .data_row import DataRow
|
|
7
8
|
from .expr import Expr
|
|
8
9
|
from .row_builder import RowBuilder
|
|
@@ -20,7 +21,7 @@ class Variable(Expr):
|
|
|
20
21
|
self.name = name
|
|
21
22
|
self.id = self._create_id()
|
|
22
23
|
|
|
23
|
-
def _id_attrs(self) ->
|
|
24
|
+
def _id_attrs(self) -> list[tuple[str, Any]]:
|
|
24
25
|
return super()._id_attrs() + [('name', self.name)]
|
|
25
26
|
|
|
26
27
|
def default_column_name(self) -> NoReturn:
|
|
@@ -38,9 +39,9 @@ class Variable(Expr):
|
|
|
38
39
|
def eval(self, data_row: DataRow, row_builder: RowBuilder) -> NoReturn:
|
|
39
40
|
raise NotImplementedError()
|
|
40
41
|
|
|
41
|
-
def _as_dict(self) ->
|
|
42
|
+
def _as_dict(self) -> dict:
|
|
42
43
|
return {'name': self.name, 'type': self.col_type.as_dict(), **super()._as_dict()}
|
|
43
44
|
|
|
44
45
|
@classmethod
|
|
45
|
-
def _from_dict(cls, d:
|
|
46
|
+
def _from_dict(cls, d: dict, _: list[Expr]) -> Variable:
|
|
46
47
|
return cls(d['name'], ts.ColumnType.from_dict(d['type']))
|
|
@@ -8,9 +8,14 @@ if TYPE_CHECKING:
|
|
|
8
8
|
import pixeltable as pxt
|
|
9
9
|
|
|
10
10
|
|
|
11
|
-
@pxt.udf
|
|
11
|
+
@pxt.udf
|
|
12
12
|
def transcribe(
|
|
13
|
-
audio:
|
|
13
|
+
audio: pxt.Audio,
|
|
14
|
+
*,
|
|
15
|
+
model: str,
|
|
16
|
+
compute_type: Optional[str] = None,
|
|
17
|
+
language: Optional[str] = None,
|
|
18
|
+
chunk_size: int = 30
|
|
14
19
|
) -> dict:
|
|
15
20
|
"""
|
|
16
21
|
Transcribe an audio file using WhisperX.
|
|
@@ -86,7 +86,7 @@ class AggregateFunction(Function):
|
|
|
86
86
|
res += '\n\n' + inspect.getdoc(self.agg_cls.update)
|
|
87
87
|
return res
|
|
88
88
|
|
|
89
|
-
def __call__(self, *args: object, **kwargs: object) -> 'pixeltable.exprs.
|
|
89
|
+
def __call__(self, *args: object, **kwargs: object) -> 'pixeltable.exprs.FunctionCall':
|
|
90
90
|
from pixeltable import exprs
|
|
91
91
|
|
|
92
92
|
# perform semantic analysis of special parameters 'order_by' and 'group_by'
|
|
@@ -4,7 +4,7 @@ import inspect
|
|
|
4
4
|
from typing import Any, Callable, Optional
|
|
5
5
|
from uuid import UUID
|
|
6
6
|
|
|
7
|
-
import cloudpickle
|
|
7
|
+
import cloudpickle # type: ignore[import-untyped]
|
|
8
8
|
|
|
9
9
|
from .function import Function
|
|
10
10
|
from .signature import Signature
|
|
@@ -108,7 +108,7 @@ class CallableFunction(Function):
|
|
|
108
108
|
@classmethod
|
|
109
109
|
def from_store(cls, name: Optional[str], md: dict, binary_obj: bytes) -> Function:
|
|
110
110
|
py_fn = cloudpickle.loads(binary_obj)
|
|
111
|
-
assert
|
|
111
|
+
assert callable(py_fn)
|
|
112
112
|
sig = Signature.from_dict(md['signature'])
|
|
113
113
|
batch_size = md['batch_size']
|
|
114
114
|
return CallableFunction(sig, py_fn, self_name=name, batch_size=batch_size)
|
pixeltable/func/function.py
CHANGED
|
@@ -3,12 +3,13 @@ from __future__ import annotations
|
|
|
3
3
|
import abc
|
|
4
4
|
import importlib
|
|
5
5
|
import inspect
|
|
6
|
-
from typing import Any, Callable,
|
|
6
|
+
from typing import Any, Callable, Optional
|
|
7
7
|
|
|
8
8
|
import sqlalchemy as sql
|
|
9
9
|
|
|
10
|
-
import pixeltable
|
|
10
|
+
import pixeltable as pxt
|
|
11
11
|
import pixeltable.type_system as ts
|
|
12
|
+
|
|
12
13
|
from .globals import resolve_symbol
|
|
13
14
|
from .signature import Signature
|
|
14
15
|
|
|
@@ -66,13 +67,13 @@ class Function(abc.ABC):
|
|
|
66
67
|
def help_str(self) -> str:
|
|
67
68
|
return self.display_name + str(self.signature)
|
|
68
69
|
|
|
69
|
-
def __call__(self, *args: Any, **kwargs: Any) -> '
|
|
70
|
+
def __call__(self, *args: Any, **kwargs: Any) -> 'pxt.exprs.FunctionCall':
|
|
70
71
|
from pixeltable import exprs
|
|
71
72
|
bound_args = self.signature.py_signature.bind(*args, **kwargs)
|
|
72
73
|
self.validate_call(bound_args.arguments)
|
|
73
74
|
return exprs.FunctionCall(self, bound_args.arguments)
|
|
74
75
|
|
|
75
|
-
def validate_call(self, bound_args:
|
|
76
|
+
def validate_call(self, bound_args: dict[str, Any]) -> None:
|
|
76
77
|
"""Override this to do custom validation of the arguments"""
|
|
77
78
|
pass
|
|
78
79
|
|
|
@@ -121,7 +122,7 @@ class Function(abc.ABC):
|
|
|
121
122
|
"""Print source code"""
|
|
122
123
|
print('source not available')
|
|
123
124
|
|
|
124
|
-
def as_dict(self) ->
|
|
125
|
+
def as_dict(self) -> dict:
|
|
125
126
|
"""
|
|
126
127
|
Return a serialized reference to the instance that can be passed to json.dumps() and converted back
|
|
127
128
|
to an instance with from_dict().
|
|
@@ -130,13 +131,13 @@ class Function(abc.ABC):
|
|
|
130
131
|
classpath = f'{self.__class__.__module__}.{self.__class__.__qualname__}'
|
|
131
132
|
return {'_classpath': classpath, **self._as_dict()}
|
|
132
133
|
|
|
133
|
-
def _as_dict(self) ->
|
|
134
|
+
def _as_dict(self) -> dict:
|
|
134
135
|
"""Default serialization: store the path to self (which includes the module path)"""
|
|
135
136
|
assert self.self_path is not None
|
|
136
137
|
return {'path': self.self_path}
|
|
137
138
|
|
|
138
139
|
@classmethod
|
|
139
|
-
def from_dict(cls, d:
|
|
140
|
+
def from_dict(cls, d: dict) -> Function:
|
|
140
141
|
"""
|
|
141
142
|
Turn dict that was produced by calling as_dict() into an instance of the correct Function subclass.
|
|
142
143
|
"""
|
|
@@ -147,14 +148,14 @@ class Function(abc.ABC):
|
|
|
147
148
|
return func_class._from_dict(d)
|
|
148
149
|
|
|
149
150
|
@classmethod
|
|
150
|
-
def _from_dict(cls, d:
|
|
151
|
+
def _from_dict(cls, d: dict) -> Function:
|
|
151
152
|
"""Default deserialization: load the symbol indicated by the stored symbol_path"""
|
|
152
153
|
assert 'path' in d and d['path'] is not None
|
|
153
154
|
instance = resolve_symbol(d['path'])
|
|
154
155
|
assert isinstance(instance, Function)
|
|
155
156
|
return instance
|
|
156
157
|
|
|
157
|
-
def to_store(self) ->
|
|
158
|
+
def to_store(self) -> tuple[dict, bytes]:
|
|
158
159
|
"""
|
|
159
160
|
Serialize the function to a format that can be stored in the Pixeltable store
|
|
160
161
|
Returns:
|
|
@@ -165,7 +166,7 @@ class Function(abc.ABC):
|
|
|
165
166
|
raise NotImplementedError()
|
|
166
167
|
|
|
167
168
|
@classmethod
|
|
168
|
-
def from_store(cls, name: Optional[str], md:
|
|
169
|
+
def from_store(cls, name: Optional[str], md: dict, binary_obj: bytes) -> Function:
|
|
169
170
|
"""
|
|
170
171
|
Create a Function instance from the serialized representation returned by to_store()
|
|
171
172
|
"""
|
|
@@ -4,7 +4,7 @@ import dataclasses
|
|
|
4
4
|
import importlib
|
|
5
5
|
import logging
|
|
6
6
|
import sys
|
|
7
|
-
from typing import Optional
|
|
7
|
+
from typing import Optional
|
|
8
8
|
from uuid import UUID
|
|
9
9
|
|
|
10
10
|
import sqlalchemy as sql
|
|
@@ -14,7 +14,6 @@ import pixeltable.exceptions as excs
|
|
|
14
14
|
import pixeltable.type_system as ts
|
|
15
15
|
from pixeltable.metadata import schema
|
|
16
16
|
from .function import Function
|
|
17
|
-
from .globals import get_caller_module_path
|
|
18
17
|
|
|
19
18
|
_logger = logging.getLogger('pixeltable')
|
|
20
19
|
|
|
@@ -32,15 +31,15 @@ class FunctionRegistry:
|
|
|
32
31
|
return cls._instance
|
|
33
32
|
|
|
34
33
|
def __init__(self):
|
|
35
|
-
self.stored_fns_by_id:
|
|
36
|
-
self.module_fns:
|
|
34
|
+
self.stored_fns_by_id: dict[UUID, Function] = {}
|
|
35
|
+
self.module_fns: dict[str, Function] = {} # fqn -> Function
|
|
37
36
|
self.type_methods: dict[ts.ColumnType.Type, dict[str, Function]] = {}
|
|
38
37
|
|
|
39
38
|
def clear_cache(self) -> None:
|
|
40
39
|
"""
|
|
41
40
|
Useful during testing
|
|
42
41
|
"""
|
|
43
|
-
self.stored_fns_by_id
|
|
42
|
+
self.stored_fns_by_id = {}
|
|
44
43
|
|
|
45
44
|
# def register_std_modules(self) -> None:
|
|
46
45
|
# """Register all submodules of pixeltable.functions"""
|
|
@@ -76,7 +75,7 @@ class FunctionRegistry:
|
|
|
76
75
|
raise excs.Error(f'Duplicate method name for type {base_type}: {fn.name}')
|
|
77
76
|
self.type_methods[base_type][fn.name] = fn
|
|
78
77
|
|
|
79
|
-
def list_functions(self) ->
|
|
78
|
+
def list_functions(self) -> list[Function]:
|
|
80
79
|
# retrieve Function.Metadata data for all existing stored functions from store directly
|
|
81
80
|
# (self.stored_fns_by_id isn't guaranteed to contain all functions)
|
|
82
81
|
# TODO: have the client do this, once the client takes over the Db functionality
|
|
@@ -85,7 +84,7 @@ class FunctionRegistry:
|
|
|
85
84
|
# schema.Db.name, schema.Dir.path, sql_func.length(schema.Function.init_obj))\
|
|
86
85
|
# .where(schema.Function.db_id == schema.Db.id)\
|
|
87
86
|
# .where(schema.Function.dir_id == schema.Dir.id)
|
|
88
|
-
# stored_fn_md:
|
|
87
|
+
# stored_fn_md: list[Function.Metadata] = []
|
|
89
88
|
# with Env.get().engine.begin() as conn:
|
|
90
89
|
# rows = conn.execute(stmt)
|
|
91
90
|
# for name, md_dict, db_name, dir_path, init_obj_len in rows:
|
|
@@ -1,14 +1,15 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
|
+
|
|
2
3
|
import inspect
|
|
3
|
-
from typing import
|
|
4
|
+
from typing import Any, Callable, Optional
|
|
4
5
|
|
|
5
6
|
import sqlalchemy as sql
|
|
6
7
|
|
|
7
|
-
import pixeltable
|
|
8
|
-
|
|
9
|
-
|
|
8
|
+
import pixeltable as pxt
|
|
9
|
+
from pixeltable import exprs
|
|
10
|
+
|
|
10
11
|
from .function import Function
|
|
11
|
-
from .signature import Signature
|
|
12
|
+
from .signature import Signature
|
|
12
13
|
|
|
13
14
|
|
|
14
15
|
class QueryTemplateFunction(Function):
|
|
@@ -16,24 +17,23 @@ class QueryTemplateFunction(Function):
|
|
|
16
17
|
|
|
17
18
|
@classmethod
|
|
18
19
|
def create(
|
|
19
|
-
|
|
20
|
+
cls, template_callable: Callable, param_types: Optional[list[pxt.ColumnType]], path: str, name: str
|
|
20
21
|
) -> QueryTemplateFunction:
|
|
21
22
|
# we need to construct a template df and a signature
|
|
22
23
|
py_sig = inspect.signature(template_callable)
|
|
23
24
|
py_params = list(py_sig.parameters.values())
|
|
24
25
|
params = Signature.create_parameters(py_params=py_params, param_types=param_types)
|
|
25
26
|
# invoke template_callable with parameter expressions to construct a DataFrame with parameters
|
|
26
|
-
import pixeltable.exprs as exprs
|
|
27
27
|
var_exprs = [exprs.Variable(param.name, param.col_type) for param in params]
|
|
28
28
|
template_df = template_callable(*var_exprs)
|
|
29
29
|
from pixeltable import DataFrame
|
|
30
30
|
assert isinstance(template_df, DataFrame)
|
|
31
31
|
# we take params and return json
|
|
32
|
-
sig = Signature(return_type=
|
|
32
|
+
sig = Signature(return_type=pxt.JsonType(), parameters=params)
|
|
33
33
|
return QueryTemplateFunction(template_df, sig, path=path, name=name)
|
|
34
34
|
|
|
35
35
|
def __init__(
|
|
36
|
-
self, template_df: Optional['
|
|
36
|
+
self, template_df: Optional['pxt.DataFrame'], sig: Optional[Signature], path: Optional[str] = None,
|
|
37
37
|
name: Optional[str] = None,
|
|
38
38
|
):
|
|
39
39
|
super().__init__(sig, self_path=path)
|
|
@@ -46,7 +46,6 @@ class QueryTemplateFunction(Function):
|
|
|
46
46
|
self.conn: Optional[sql.engine.Connection] = None
|
|
47
47
|
|
|
48
48
|
# convert defaults to Literals
|
|
49
|
-
import pixeltable.exprs as exprs
|
|
50
49
|
self.defaults: dict[str, exprs.Literal] = {} # key: param name, value: default value converted to a Literal
|
|
51
50
|
param_types = self.template_df.parameters()
|
|
52
51
|
for param in [p for p in self.signature.parameters.values() if p.has_default()]:
|
|
@@ -75,10 +74,10 @@ class QueryTemplateFunction(Function):
|
|
|
75
74
|
def name(self) -> str:
|
|
76
75
|
return self.self_name
|
|
77
76
|
|
|
78
|
-
def _as_dict(self) ->
|
|
77
|
+
def _as_dict(self) -> dict:
|
|
79
78
|
return {'name': self.name, 'signature': self.signature.as_dict(), 'df': self.template_df.as_dict()}
|
|
80
79
|
|
|
81
80
|
@classmethod
|
|
82
|
-
def _from_dict(cls, d:
|
|
81
|
+
def _from_dict(cls, d: dict) -> Function:
|
|
83
82
|
from pixeltable.dataframe import DataFrame
|
|
84
83
|
return cls(DataFrame.from_dict(d['df']), Signature.from_dict(d['signature']), name=d['name'])
|
pixeltable/func/signature.py
CHANGED
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
import json
|
|
4
3
|
import dataclasses
|
|
5
4
|
import enum
|
|
6
5
|
import inspect
|
|
6
|
+
import json
|
|
7
7
|
import logging
|
|
8
8
|
import typing
|
|
9
|
-
from typing import
|
|
9
|
+
from typing import Any, Callable, Optional, Union
|
|
10
10
|
|
|
11
11
|
import pixeltable.exceptions as excs
|
|
12
12
|
import pixeltable.type_system as ts
|
|
@@ -18,7 +18,7 @@ _logger = logging.getLogger('pixeltable')
|
|
|
18
18
|
class Parameter:
|
|
19
19
|
name: str
|
|
20
20
|
col_type: Optional[ts.ColumnType] # None for variable parameters
|
|
21
|
-
kind:
|
|
21
|
+
kind: inspect._ParameterKind
|
|
22
22
|
# for some reason, this needs to precede is_batched in the dataclass definition,
|
|
23
23
|
# otherwise Python complains that an argument with a default is followed by an argument without a default
|
|
24
24
|
default: Any = inspect.Parameter.empty # default value for the parameter
|
|
@@ -82,7 +82,7 @@ class Signature:
|
|
|
82
82
|
"""
|
|
83
83
|
SPECIAL_PARAM_NAMES = ['group_by', 'order_by']
|
|
84
84
|
|
|
85
|
-
def __init__(self, return_type: ts.ColumnType, parameters:
|
|
85
|
+
def __init__(self, return_type: ts.ColumnType, parameters: list[Parameter], is_batched: bool = False):
|
|
86
86
|
assert isinstance(return_type, ts.ColumnType)
|
|
87
87
|
self.return_type = return_type
|
|
88
88
|
self.is_batched = is_batched
|
|
@@ -97,7 +97,7 @@ class Signature:
|
|
|
97
97
|
assert isinstance(self.return_type, ts.ColumnType)
|
|
98
98
|
return self.return_type
|
|
99
99
|
|
|
100
|
-
def as_dict(self) ->
|
|
100
|
+
def as_dict(self) -> dict[str, Any]:
|
|
101
101
|
result = {
|
|
102
102
|
'return_type': self.get_return_type().as_dict(),
|
|
103
103
|
'parameters': [p.as_dict() for p in self.parameters.values()],
|
|
@@ -106,11 +106,13 @@ class Signature:
|
|
|
106
106
|
return result
|
|
107
107
|
|
|
108
108
|
@classmethod
|
|
109
|
-
def from_dict(cls, d:
|
|
109
|
+
def from_dict(cls, d: dict[str, Any]) -> Signature:
|
|
110
110
|
parameters = [Parameter.from_dict(param_dict) for param_dict in d['parameters']]
|
|
111
111
|
return cls(ts.ColumnType.from_dict(d['return_type']), parameters, d['is_batched'])
|
|
112
112
|
|
|
113
|
-
def __eq__(self, other:
|
|
113
|
+
def __eq__(self, other: object) -> bool:
|
|
114
|
+
if not isinstance(other, Signature):
|
|
115
|
+
return False
|
|
114
116
|
if self.get_return_type() != other.get_return_type():
|
|
115
117
|
return False
|
|
116
118
|
if len(self.parameters) != len(other.parameters):
|
|
@@ -122,7 +124,7 @@ class Signature:
|
|
|
122
124
|
return True
|
|
123
125
|
|
|
124
126
|
def __str__(self) -> str:
|
|
125
|
-
param_strs:
|
|
127
|
+
param_strs: list[str] = []
|
|
126
128
|
for p in self.parameters.values():
|
|
127
129
|
if p.kind == inspect.Parameter.VAR_POSITIONAL:
|
|
128
130
|
param_strs.append(f'*{p.name}')
|
|
@@ -133,7 +135,7 @@ class Signature:
|
|
|
133
135
|
return f'({", ".join(param_strs)}) -> {str(self.get_return_type())}'
|
|
134
136
|
|
|
135
137
|
@classmethod
|
|
136
|
-
def _infer_type(cls, annotation: Optional[type]) ->
|
|
138
|
+
def _infer_type(cls, annotation: Optional[type]) -> tuple[Optional[ts.ColumnType], Optional[bool]]:
|
|
137
139
|
"""Returns: (column type, is_batched) or (None, ...) if the type cannot be inferred"""
|
|
138
140
|
if annotation is None:
|
|
139
141
|
return (None, None)
|
|
@@ -154,13 +156,13 @@ class Signature:
|
|
|
154
156
|
@classmethod
|
|
155
157
|
def create_parameters(
|
|
156
158
|
cls, py_fn: Optional[Callable] = None, py_params: Optional[list[inspect.Parameter]] = None,
|
|
157
|
-
param_types: Optional[
|
|
158
|
-
) ->
|
|
159
|
+
param_types: Optional[list[ts.ColumnType]] = None
|
|
160
|
+
) -> list[Parameter]:
|
|
159
161
|
assert (py_fn is None) != (py_params is None)
|
|
160
162
|
if py_fn is not None:
|
|
161
163
|
sig = inspect.signature(py_fn)
|
|
162
164
|
py_params = list(sig.parameters.values())
|
|
163
|
-
parameters:
|
|
165
|
+
parameters: list[Parameter] = []
|
|
164
166
|
|
|
165
167
|
for idx, param in enumerate(py_params):
|
|
166
168
|
if param.name in cls.SPECIAL_PARAM_NAMES:
|
|
@@ -187,9 +189,9 @@ class Signature:
|
|
|
187
189
|
|
|
188
190
|
@classmethod
|
|
189
191
|
def create(
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
192
|
+
cls, py_fn: Callable,
|
|
193
|
+
param_types: Optional[list[ts.ColumnType]] = None,
|
|
194
|
+
return_type: Optional[ts.ColumnType] = None
|
|
193
195
|
) -> Signature:
|
|
194
196
|
"""Create a signature for the given Callable.
|
|
195
197
|
Infer the parameter and return types, if none are specified.
|
pixeltable/func/udf.py
CHANGED
|
@@ -38,10 +38,6 @@ def udf(*args, **kwargs):
|
|
|
38
38
|
>>> @pxt.udf
|
|
39
39
|
... def my_function(x: int) -> int:
|
|
40
40
|
... return x + 1
|
|
41
|
-
|
|
42
|
-
>>> @pxt.udf(param_types=[pxt.IntType()], return_type=pxt.IntType())
|
|
43
|
-
... def my_function(x):
|
|
44
|
-
... return x + 1
|
|
45
41
|
"""
|
|
46
42
|
if len(args) == 1 and len(kwargs) == 0 and callable(args[0]):
|
|
47
43
|
|
pixeltable/functions/__init__.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
from pixeltable.utils.code import local_public_names
|
|
2
2
|
|
|
3
|
-
from . import (anthropic, audio, fireworks, huggingface, image, json,
|
|
4
|
-
video, vision)
|
|
3
|
+
from . import (anthropic, audio, fireworks, huggingface, image, json, llama_cpp, mistralai, ollama, openai, string,
|
|
4
|
+
timestamp, together, video, vision, whisper)
|
|
5
5
|
from .globals import *
|
|
6
6
|
|
|
7
7
|
__all__ = local_public_names(__name__, exclude=['globals']) + local_public_names(globals.__name__)
|
pixeltable/functions/audio.py
CHANGED
|
@@ -11,18 +11,16 @@ t.select(pxtf.audio.get_metadata()).collect()
|
|
|
11
11
|
```
|
|
12
12
|
"""
|
|
13
13
|
|
|
14
|
-
import pixeltable
|
|
15
|
-
import pixeltable.type_system as ts
|
|
14
|
+
import pixeltable as pxt
|
|
16
15
|
from pixeltable.utils.code import local_public_names
|
|
17
16
|
|
|
18
17
|
|
|
19
|
-
@
|
|
20
|
-
def get_metadata(audio:
|
|
18
|
+
@pxt.udf(is_method=True)
|
|
19
|
+
def get_metadata(audio: pxt.Audio) -> dict:
|
|
21
20
|
"""
|
|
22
21
|
Gets various metadata associated with an audio file and returns it as a dictionary.
|
|
23
22
|
"""
|
|
24
|
-
|
|
25
|
-
return pxtf.video._get_metadata(audio)
|
|
23
|
+
return pxt.functions.video._get_metadata(audio)
|
|
26
24
|
|
|
27
25
|
|
|
28
26
|
__all__ = local_public_names(__name__)
|