pixeltable 0.2.17__py3-none-any.whl → 0.2.19__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +1 -1
- pixeltable/__version__.py +2 -2
- pixeltable/catalog/catalog.py +8 -7
- pixeltable/catalog/column.py +11 -8
- pixeltable/catalog/insertable_table.py +1 -1
- pixeltable/catalog/path_dict.py +8 -6
- pixeltable/catalog/table.py +20 -14
- pixeltable/catalog/table_version.py +92 -55
- pixeltable/catalog/table_version_path.py +7 -9
- pixeltable/catalog/view.py +3 -2
- pixeltable/dataframe.py +2 -2
- pixeltable/env.py +205 -86
- pixeltable/exceptions.py +5 -1
- pixeltable/exec/aggregation_node.py +2 -1
- pixeltable/exec/component_iteration_node.py +2 -2
- pixeltable/exec/sql_node.py +11 -8
- pixeltable/exprs/__init__.py +2 -2
- pixeltable/exprs/arithmetic_expr.py +4 -4
- pixeltable/exprs/array_slice.py +2 -1
- pixeltable/exprs/column_property_ref.py +9 -7
- pixeltable/exprs/column_ref.py +2 -1
- pixeltable/exprs/comparison.py +10 -7
- pixeltable/exprs/compound_predicate.py +3 -2
- pixeltable/exprs/data_row.py +19 -4
- pixeltable/exprs/expr.py +51 -41
- pixeltable/exprs/expr_set.py +32 -9
- pixeltable/exprs/function_call.py +62 -40
- pixeltable/exprs/in_predicate.py +3 -2
- pixeltable/exprs/inline_expr.py +200 -0
- pixeltable/exprs/is_null.py +3 -2
- pixeltable/exprs/json_mapper.py +5 -4
- pixeltable/exprs/json_path.py +7 -1
- pixeltable/exprs/literal.py +34 -7
- pixeltable/exprs/method_ref.py +3 -3
- pixeltable/exprs/object_ref.py +6 -5
- pixeltable/exprs/row_builder.py +25 -17
- pixeltable/exprs/rowid_ref.py +2 -1
- pixeltable/exprs/similarity_expr.py +2 -1
- pixeltable/exprs/sql_element_cache.py +30 -0
- pixeltable/exprs/type_cast.py +3 -3
- pixeltable/exprs/variable.py +2 -1
- pixeltable/ext/functions/whisperx.py +6 -4
- pixeltable/ext/functions/yolox.py +11 -9
- pixeltable/func/aggregate_function.py +1 -0
- pixeltable/func/function.py +28 -4
- pixeltable/functions/__init__.py +4 -2
- pixeltable/functions/anthropic.py +15 -5
- pixeltable/functions/fireworks.py +1 -1
- pixeltable/functions/globals.py +6 -1
- pixeltable/functions/huggingface.py +91 -14
- pixeltable/functions/image.py +20 -5
- pixeltable/functions/json.py +5 -5
- pixeltable/functions/mistralai.py +188 -0
- pixeltable/functions/openai.py +6 -10
- pixeltable/functions/string.py +3 -2
- pixeltable/functions/timestamp.py +95 -7
- pixeltable/functions/together.py +18 -11
- pixeltable/functions/video.py +2 -2
- pixeltable/functions/vision.py +69 -37
- pixeltable/functions/whisper.py +4 -1
- pixeltable/globals.py +5 -1
- pixeltable/io/hf_datasets.py +17 -15
- pixeltable/io/pandas.py +0 -2
- pixeltable/io/parquet.py +15 -14
- pixeltable/iterators/document.py +16 -15
- pixeltable/metadata/__init__.py +1 -1
- pixeltable/metadata/converters/convert_18.py +1 -1
- pixeltable/metadata/converters/convert_19.py +46 -0
- pixeltable/metadata/converters/convert_20.py +56 -0
- pixeltable/metadata/converters/util.py +29 -4
- pixeltable/metadata/notes.py +2 -0
- pixeltable/metadata/schema.py +5 -4
- pixeltable/plan.py +100 -78
- pixeltable/store.py +5 -1
- pixeltable/tool/create_test_db_dump.py +18 -6
- pixeltable/type_system.py +15 -15
- pixeltable/utils/documents.py +45 -42
- pixeltable/utils/formatter.py +2 -2
- pixeltable-0.2.19.dist-info/LICENSE +201 -0
- {pixeltable-0.2.17.dist-info → pixeltable-0.2.19.dist-info}/METADATA +84 -24
- pixeltable-0.2.19.dist-info/RECORD +147 -0
- pixeltable/exprs/inline_array.py +0 -116
- pixeltable/exprs/inline_dict.py +0 -103
- pixeltable-0.2.17.dist-info/LICENSE +0 -18
- pixeltable-0.2.17.dist-info/RECORD +0 -144
- {pixeltable-0.2.17.dist-info → pixeltable-0.2.19.dist-info}/WHEEL +0 -0
- {pixeltable-0.2.17.dist-info → pixeltable-0.2.19.dist-info}/entry_points.txt +0 -0
pixeltable/exprs/in_predicate.py
CHANGED
|
@@ -5,6 +5,7 @@ from typing import Optional, List, Any, Dict, Tuple, Iterable
|
|
|
5
5
|
import sqlalchemy as sql
|
|
6
6
|
|
|
7
7
|
import pixeltable.exceptions as excs
|
|
8
|
+
from .sql_element_cache import SqlElementCache
|
|
8
9
|
import pixeltable.type_system as ts
|
|
9
10
|
from .data_row import DataRow
|
|
10
11
|
from .expr import Expr
|
|
@@ -70,8 +71,8 @@ class InPredicate(Expr):
|
|
|
70
71
|
def _id_attrs(self) -> List[Tuple[str, Any]]:
|
|
71
72
|
return super()._id_attrs() + [('value_list', self.value_list)]
|
|
72
73
|
|
|
73
|
-
def sql_expr(self) -> Optional[sql.ClauseElement]:
|
|
74
|
-
lhs_sql_exprs = self.components[0]
|
|
74
|
+
def sql_expr(self, sql_elements: SqlElementCache) -> Optional[sql.ClauseElement]:
|
|
75
|
+
lhs_sql_exprs = sql_elements.get(self.components[0])
|
|
75
76
|
if lhs_sql_exprs is None or self.value_list is None:
|
|
76
77
|
return None
|
|
77
78
|
return lhs_sql_exprs.in_(self.value_list)
|
|
@@ -0,0 +1,200 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import copy
|
|
4
|
+
from typing import Any, Iterable, Optional
|
|
5
|
+
|
|
6
|
+
import numpy as np
|
|
7
|
+
import sqlalchemy as sql
|
|
8
|
+
|
|
9
|
+
import pixeltable.exceptions as excs
|
|
10
|
+
import pixeltable.type_system as ts
|
|
11
|
+
|
|
12
|
+
from .data_row import DataRow
|
|
13
|
+
from .expr import Expr
|
|
14
|
+
from .literal import Literal
|
|
15
|
+
from .row_builder import RowBuilder
|
|
16
|
+
from .sql_element_cache import SqlElementCache
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class InlineArray(Expr):
|
|
20
|
+
"""
|
|
21
|
+
Array 'literal' which can use Exprs as values.
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
def __init__(self, elements: Iterable):
|
|
25
|
+
exprs = []
|
|
26
|
+
for el in elements:
|
|
27
|
+
if isinstance(el, Expr):
|
|
28
|
+
exprs.append(el)
|
|
29
|
+
elif isinstance(el, list) or isinstance(el, tuple):
|
|
30
|
+
exprs.append(InlineArray(el))
|
|
31
|
+
else:
|
|
32
|
+
exprs.append(Literal(el))
|
|
33
|
+
|
|
34
|
+
inferred_element_type: Optional[ts.ColumnType] = ts.InvalidType()
|
|
35
|
+
for i, expr in enumerate(exprs):
|
|
36
|
+
supertype = inferred_element_type.supertype(expr.col_type)
|
|
37
|
+
if supertype is None:
|
|
38
|
+
raise excs.Error(
|
|
39
|
+
f'Could not infer element type of array: element of type `{expr.col_type}` at index {i} '
|
|
40
|
+
f'is not compatible with type `{inferred_element_type}` of preceding elements'
|
|
41
|
+
)
|
|
42
|
+
inferred_element_type = supertype
|
|
43
|
+
|
|
44
|
+
if inferred_element_type.is_scalar_type():
|
|
45
|
+
col_type = ts.ArrayType((len(exprs),), inferred_element_type)
|
|
46
|
+
elif inferred_element_type.is_array_type():
|
|
47
|
+
assert isinstance(inferred_element_type, ts.ArrayType)
|
|
48
|
+
col_type = ts.ArrayType(
|
|
49
|
+
(len(exprs), *inferred_element_type.shape),
|
|
50
|
+
ts.ColumnType.make_type(inferred_element_type.dtype)
|
|
51
|
+
)
|
|
52
|
+
else:
|
|
53
|
+
raise excs.Error(f'Element type is not a valid dtype for an array: {inferred_element_type}')
|
|
54
|
+
|
|
55
|
+
super().__init__(col_type)
|
|
56
|
+
self.components.extend(exprs)
|
|
57
|
+
self.id = self._create_id()
|
|
58
|
+
|
|
59
|
+
def __str__(self) -> str:
|
|
60
|
+
elem_strs = [str(expr) for expr in self.components]
|
|
61
|
+
return f'[{", ".join(elem_strs)}]'
|
|
62
|
+
|
|
63
|
+
def _equals(self, _: InlineArray) -> bool:
|
|
64
|
+
return True # Always true if components match
|
|
65
|
+
|
|
66
|
+
def sql_expr(self, _: SqlElementCache) -> Optional[sql.ColumnElement]:
|
|
67
|
+
return None
|
|
68
|
+
|
|
69
|
+
def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
|
|
70
|
+
data_row[self.slot_idx] = np.array([data_row[el.slot_idx] for el in self.components])
|
|
71
|
+
|
|
72
|
+
def _as_dict(self) -> dict:
|
|
73
|
+
return super()._as_dict()
|
|
74
|
+
|
|
75
|
+
@classmethod
|
|
76
|
+
def _from_dict(cls, _: dict, components: list[Expr]) -> Expr:
|
|
77
|
+
try:
|
|
78
|
+
return cls(components)
|
|
79
|
+
except excs.Error:
|
|
80
|
+
# For legacy compatibility reasons, we need to try constructing as an `InlineList`.
|
|
81
|
+
# This is because in schema versions <= 19, `InlineArray` was serialized incorrectly, and
|
|
82
|
+
# there is no way to determine the correct expression type until the subexpressions are
|
|
83
|
+
# loaded and their types are known.
|
|
84
|
+
return InlineList(components)
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
class InlineList(Expr):
|
|
88
|
+
"""
|
|
89
|
+
List 'literal' which can use Exprs as values.
|
|
90
|
+
"""
|
|
91
|
+
|
|
92
|
+
def __init__(self, elements: Iterable):
|
|
93
|
+
exprs = []
|
|
94
|
+
for el in elements:
|
|
95
|
+
if isinstance(el, Expr):
|
|
96
|
+
exprs.append(el)
|
|
97
|
+
elif isinstance(el, list) or isinstance(el, tuple):
|
|
98
|
+
exprs.append(InlineList(el))
|
|
99
|
+
elif isinstance(el, dict):
|
|
100
|
+
exprs.append(InlineDict(el))
|
|
101
|
+
else:
|
|
102
|
+
exprs.append(Literal(el))
|
|
103
|
+
|
|
104
|
+
super().__init__(ts.JsonType())
|
|
105
|
+
self.components.extend(exprs)
|
|
106
|
+
self.id = self._create_id()
|
|
107
|
+
|
|
108
|
+
def __str__(self) -> str:
|
|
109
|
+
elem_strs = [str(expr) for expr in self.components]
|
|
110
|
+
return f'[{", ".join(elem_strs)}]'
|
|
111
|
+
|
|
112
|
+
def _equals(self, _: InlineList) -> bool:
|
|
113
|
+
return True # Always true if components match
|
|
114
|
+
|
|
115
|
+
def sql_expr(self, _: SqlElementCache) -> Optional[sql.ColumnElement]:
|
|
116
|
+
return None
|
|
117
|
+
|
|
118
|
+
def eval(self, data_row: DataRow, _: RowBuilder) -> None:
|
|
119
|
+
data_row[self.slot_idx] = [data_row[el.slot_idx] for el in self.components]
|
|
120
|
+
|
|
121
|
+
def _as_dict(self) -> dict:
|
|
122
|
+
return super()._as_dict()
|
|
123
|
+
|
|
124
|
+
@classmethod
|
|
125
|
+
def _from_dict(cls, _: dict, components: list[Expr]) -> Expr:
|
|
126
|
+
return cls(components)
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
class InlineDict(Expr):
|
|
130
|
+
"""
|
|
131
|
+
Dictionary 'literal' which can use Exprs as values.
|
|
132
|
+
"""
|
|
133
|
+
|
|
134
|
+
keys: list[str]
|
|
135
|
+
|
|
136
|
+
def __init__(self, d: dict[str, Any]):
|
|
137
|
+
self.keys = []
|
|
138
|
+
exprs: list[Expr] = []
|
|
139
|
+
for key, val in d.items():
|
|
140
|
+
if not isinstance(key, str):
|
|
141
|
+
raise excs.Error(f'Dictionary requires string keys; {key} has type {type(key)}')
|
|
142
|
+
self.keys.append(key)
|
|
143
|
+
if isinstance(val, Expr):
|
|
144
|
+
exprs.append(val)
|
|
145
|
+
elif isinstance(val, dict):
|
|
146
|
+
exprs.append(InlineDict(val))
|
|
147
|
+
elif isinstance(val, list) or isinstance(val, tuple):
|
|
148
|
+
exprs.append(InlineList(val))
|
|
149
|
+
else:
|
|
150
|
+
exprs.append(Literal(val))
|
|
151
|
+
|
|
152
|
+
super().__init__(ts.JsonType())
|
|
153
|
+
self.components.extend(exprs)
|
|
154
|
+
self.id = self._create_id()
|
|
155
|
+
|
|
156
|
+
def __str__(self) -> str:
|
|
157
|
+
item_strs = list(f"'{key}': {str(expr)}" for key, expr in zip(self.keys, self.components))
|
|
158
|
+
return '{' + ', '.join(item_strs) + '}'
|
|
159
|
+
|
|
160
|
+
def _equals(self, other: InlineDict) -> bool:
|
|
161
|
+
# The dict values are just the components, which have already been checked
|
|
162
|
+
return self.keys == other.keys
|
|
163
|
+
|
|
164
|
+
def _id_attrs(self) -> list[tuple[str, Any]]:
|
|
165
|
+
return super()._id_attrs() + [('keys', self.keys)]
|
|
166
|
+
|
|
167
|
+
def sql_expr(self, _: SqlElementCache) -> Optional[sql.ColumnElement]:
|
|
168
|
+
return None
|
|
169
|
+
|
|
170
|
+
def eval(self, data_row: DataRow, _: RowBuilder) -> None:
|
|
171
|
+
assert len(self.keys) == len(self.components)
|
|
172
|
+
data_row[self.slot_idx] = {
|
|
173
|
+
key: data_row[expr.slot_idx]
|
|
174
|
+
for key, expr in zip(self.keys, self.components)
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
def to_kwargs(self) -> dict[str, Any]:
|
|
178
|
+
"""Deconstructs this expression into a dictionary by recursively unwrapping all Literals,
|
|
179
|
+
InlineDicts, and InlineLists."""
|
|
180
|
+
return InlineDict._to_kwarg_element(self)
|
|
181
|
+
|
|
182
|
+
@classmethod
|
|
183
|
+
def _to_kwarg_element(cls, expr: Expr) -> Any:
|
|
184
|
+
if isinstance(expr, Literal):
|
|
185
|
+
return expr.val
|
|
186
|
+
if isinstance(expr, InlineDict):
|
|
187
|
+
return {key: cls._to_kwarg_element(val) for key, val in zip(expr.keys, expr.components)}
|
|
188
|
+
if isinstance(expr, InlineList):
|
|
189
|
+
return [cls._to_kwarg_element(el) for el in expr.components]
|
|
190
|
+
return expr
|
|
191
|
+
|
|
192
|
+
def _as_dict(self) -> dict[str, Any]:
|
|
193
|
+
return {'keys': self.keys, **super()._as_dict()}
|
|
194
|
+
|
|
195
|
+
@classmethod
|
|
196
|
+
def _from_dict(cls, d: dict, components: list[Expr]) -> Expr:
|
|
197
|
+
assert 'keys' in d
|
|
198
|
+
assert len(d['keys']) == len(components)
|
|
199
|
+
arg = dict(zip(d['keys'], components))
|
|
200
|
+
return InlineDict(arg)
|
pixeltable/exprs/is_null.py
CHANGED
|
@@ -8,6 +8,7 @@ import pixeltable.type_system as ts
|
|
|
8
8
|
from .data_row import DataRow
|
|
9
9
|
from .expr import Expr
|
|
10
10
|
from .row_builder import RowBuilder
|
|
11
|
+
from .sql_element_cache import SqlElementCache
|
|
11
12
|
|
|
12
13
|
|
|
13
14
|
class IsNull(Expr):
|
|
@@ -22,8 +23,8 @@ class IsNull(Expr):
|
|
|
22
23
|
def _equals(self, other: IsNull) -> bool:
|
|
23
24
|
return True
|
|
24
25
|
|
|
25
|
-
def sql_expr(self) -> Optional[sql.ClauseElement]:
|
|
26
|
-
e = self.components[0]
|
|
26
|
+
def sql_expr(self, sql_elements: SqlElementCache) -> Optional[sql.ClauseElement]:
|
|
27
|
+
e = sql_elements.get(self.components[0])
|
|
27
28
|
if e is None:
|
|
28
29
|
return None
|
|
29
30
|
return e == None
|
pixeltable/exprs/json_mapper.py
CHANGED
|
@@ -1,13 +1,14 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
|
+
|
|
2
3
|
from typing import Optional, List, Dict
|
|
3
4
|
|
|
4
5
|
import sqlalchemy as sql
|
|
5
6
|
|
|
6
|
-
|
|
7
|
+
import pixeltable.type_system as ts
|
|
7
8
|
from .data_row import DataRow
|
|
9
|
+
from .expr import Expr, ExprScope, _GLOBAL_SCOPE
|
|
8
10
|
from .row_builder import RowBuilder
|
|
9
|
-
|
|
10
|
-
import pixeltable.type_system as ts
|
|
11
|
+
from .sql_element_cache import SqlElementCache
|
|
11
12
|
|
|
12
13
|
|
|
13
14
|
class JsonMapper(Expr):
|
|
@@ -86,7 +87,7 @@ class JsonMapper(Expr):
|
|
|
86
87
|
def _equals(self, other: JsonMapper) -> bool:
|
|
87
88
|
return True
|
|
88
89
|
|
|
89
|
-
def sql_expr(self) -> Optional[sql.ClauseElement]:
|
|
90
|
+
def sql_expr(self, _: SqlElementCache) -> Optional[sql.ClauseElement]:
|
|
90
91
|
return None
|
|
91
92
|
|
|
92
93
|
def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
|
pixeltable/exprs/json_path.py
CHANGED
|
@@ -9,6 +9,12 @@ import pixeltable
|
|
|
9
9
|
import pixeltable.catalog as catalog
|
|
10
10
|
import pixeltable.exceptions as excs
|
|
11
11
|
import pixeltable.type_system as ts
|
|
12
|
+
from .data_row import DataRow
|
|
13
|
+
from .expr import Expr
|
|
14
|
+
from .globals import print_slice
|
|
15
|
+
from .json_mapper import JsonMapper
|
|
16
|
+
from .row_builder import RowBuilder
|
|
17
|
+
from .sql_element_cache import SqlElementCache
|
|
12
18
|
|
|
13
19
|
from .data_row import DataRow
|
|
14
20
|
from .expr import Expr
|
|
@@ -140,7 +146,7 @@ class JsonPath(Expr):
|
|
|
140
146
|
def _id_attrs(self) -> list[tuple[str, Any]]:
|
|
141
147
|
return super()._id_attrs() + [('path_elements', self.path_elements)]
|
|
142
148
|
|
|
143
|
-
def sql_expr(self) -> Optional[sql.ClauseElement]:
|
|
149
|
+
def sql_expr(self, _: SqlElementCache) -> Optional[sql.ClauseElement]:
|
|
144
150
|
"""
|
|
145
151
|
Postgres appears to have a bug: jsonb_path_query('{a: [{b: 0}, {b: 1}]}', '$.a.b') returns
|
|
146
152
|
*two* rows (each containing col val 0), not a single row with [0, 0].
|
pixeltable/exprs/literal.py
CHANGED
|
@@ -1,15 +1,17 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import datetime
|
|
4
|
-
from typing import
|
|
4
|
+
from typing import Any, Dict, List, Optional, Tuple
|
|
5
5
|
|
|
6
6
|
import sqlalchemy as sql
|
|
7
7
|
|
|
8
|
-
import pixeltable.exceptions as excs
|
|
9
8
|
import pixeltable.type_system as ts
|
|
9
|
+
from pixeltable.env import Env
|
|
10
|
+
|
|
10
11
|
from .data_row import DataRow
|
|
11
12
|
from .expr import Expr
|
|
12
13
|
from .row_builder import RowBuilder
|
|
14
|
+
from .sql_element_cache import SqlElementCache
|
|
13
15
|
|
|
14
16
|
|
|
15
17
|
class Literal(Expr):
|
|
@@ -22,6 +24,15 @@ class Literal(Expr):
|
|
|
22
24
|
if col_type is None:
|
|
23
25
|
raise TypeError(f'Not a valid literal: {val}')
|
|
24
26
|
super().__init__(col_type)
|
|
27
|
+
if isinstance(val, datetime.datetime):
|
|
28
|
+
# Normalize the datetime to UTC: all timestamps are stored as UTC (both in the database and in literals)
|
|
29
|
+
if val.tzinfo is None:
|
|
30
|
+
# We have a naive datetime. Modify it to use the configured default time zone
|
|
31
|
+
default_tz = Env.get().default_time_zone
|
|
32
|
+
if default_tz is not None:
|
|
33
|
+
val = val.replace(tzinfo=default_tz)
|
|
34
|
+
# Now convert to UTC
|
|
35
|
+
val = val.astimezone(datetime.timezone.utc)
|
|
25
36
|
self.val = val
|
|
26
37
|
self.id = self._create_id()
|
|
27
38
|
|
|
@@ -29,17 +40,24 @@ class Literal(Expr):
|
|
|
29
40
|
return 'Literal'
|
|
30
41
|
|
|
31
42
|
def __str__(self) -> str:
|
|
32
|
-
if self.col_type.is_string_type()
|
|
43
|
+
if self.col_type.is_string_type():
|
|
33
44
|
return f"'{self.val}'"
|
|
45
|
+
if self.col_type.is_timestamp_type():
|
|
46
|
+
assert isinstance(self.val, datetime.datetime)
|
|
47
|
+
default_tz = Env.get().default_time_zone
|
|
48
|
+
return f"'{self.val.astimezone(default_tz).isoformat()}'"
|
|
34
49
|
return str(self.val)
|
|
35
50
|
|
|
51
|
+
def __repr__(self) -> str:
|
|
52
|
+
return f'Literal({self.val!r})'
|
|
53
|
+
|
|
36
54
|
def _equals(self, other: Literal) -> bool:
|
|
37
55
|
return self.val == other.val
|
|
38
56
|
|
|
39
57
|
def _id_attrs(self) -> List[Tuple[str, Any]]:
|
|
40
58
|
return super()._id_attrs() + [('val', self.val)]
|
|
41
59
|
|
|
42
|
-
def sql_expr(self) -> Optional[sql.ClauseElement]:
|
|
60
|
+
def sql_expr(self, _: SqlElementCache) -> Optional[sql.ClauseElement]:
|
|
43
61
|
# we need to return something here so that we can generate a Where clause for predicates
|
|
44
62
|
# that involve literals (like Where c > 0)
|
|
45
63
|
return sql.sql.expression.literal(self.val)
|
|
@@ -52,7 +70,12 @@ class Literal(Expr):
|
|
|
52
70
|
# For some types, we need to explictly record their type, because JSON does not know
|
|
53
71
|
# how to interpret them unambiguously
|
|
54
72
|
if self.col_type.is_timestamp_type():
|
|
55
|
-
|
|
73
|
+
assert isinstance(self.val, datetime.datetime)
|
|
74
|
+
assert self.val.tzinfo == datetime.timezone.utc # Must be UTC in a literal
|
|
75
|
+
# Convert to ISO format in UTC (in keeping with the principle: all timestamps are
|
|
76
|
+
# stored as UTC in the database)
|
|
77
|
+
encoded_val = self.val.isoformat()
|
|
78
|
+
return {'val': encoded_val, 'val_t': self.col_type._type.name, **super()._as_dict()}
|
|
56
79
|
else:
|
|
57
80
|
return {'val': self.val, **super()._as_dict()}
|
|
58
81
|
|
|
@@ -61,6 +84,10 @@ class Literal(Expr):
|
|
|
61
84
|
assert 'val' in d
|
|
62
85
|
if 'val_t' in d:
|
|
63
86
|
val_t = d['val_t']
|
|
87
|
+
# Currently the only special-cased literal type is TIMESTAMP
|
|
64
88
|
assert val_t == ts.ColumnType.Type.TIMESTAMP.name
|
|
65
|
-
|
|
66
|
-
|
|
89
|
+
dt = datetime.datetime.fromisoformat(d['val'])
|
|
90
|
+
assert dt.tzinfo == datetime.timezone.utc # Must be UTC in the database
|
|
91
|
+
return cls(dt)
|
|
92
|
+
else:
|
|
93
|
+
return cls(d['val'])
|
pixeltable/exprs/method_ref.py
CHANGED
|
@@ -2,12 +2,12 @@ from typing import Any, Optional
|
|
|
2
2
|
|
|
3
3
|
import sqlalchemy as sql
|
|
4
4
|
|
|
5
|
-
import pixeltable.exceptions as excs
|
|
6
5
|
import pixeltable.type_system as ts
|
|
7
6
|
from pixeltable.exprs import Expr, FunctionCall
|
|
8
|
-
from pixeltable.func import FunctionRegistry
|
|
7
|
+
from pixeltable.func import FunctionRegistry
|
|
9
8
|
from .data_row import DataRow
|
|
10
9
|
from .row_builder import RowBuilder
|
|
10
|
+
from .sql_element_cache import SqlElementCache
|
|
11
11
|
|
|
12
12
|
|
|
13
13
|
class MethodRef(Expr):
|
|
@@ -53,7 +53,7 @@ class MethodRef(Expr):
|
|
|
53
53
|
def _id_attrs(self) -> list[tuple[str, Any]]:
|
|
54
54
|
return super()._id_attrs() + [('method_name', self.method_name)]
|
|
55
55
|
|
|
56
|
-
def sql_expr(self) -> Optional[sql.ClauseElement]:
|
|
56
|
+
def sql_expr(self, _: SqlElementCache) -> Optional[sql.ClauseElement]:
|
|
57
57
|
return None
|
|
58
58
|
|
|
59
59
|
def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
|
pixeltable/exprs/object_ref.py
CHANGED
|
@@ -1,14 +1,15 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
import
|
|
2
|
+
|
|
3
|
+
from typing import Optional
|
|
4
4
|
|
|
5
5
|
import sqlalchemy as sql
|
|
6
6
|
|
|
7
|
+
import pixeltable.type_system as ts
|
|
8
|
+
from .data_row import DataRow
|
|
7
9
|
from .expr import Expr, ExprScope
|
|
8
10
|
from .json_mapper import JsonMapper
|
|
9
|
-
from .data_row import DataRow
|
|
10
11
|
from .row_builder import RowBuilder
|
|
11
|
-
|
|
12
|
+
from .sql_element_cache import SqlElementCache
|
|
12
13
|
|
|
13
14
|
|
|
14
15
|
class ObjectRef(Expr):
|
|
@@ -32,7 +33,7 @@ class ObjectRef(Expr):
|
|
|
32
33
|
def _equals(self, other: ObjectRef) -> bool:
|
|
33
34
|
return self.owner is other.owner
|
|
34
35
|
|
|
35
|
-
def sql_expr(self) -> Optional[sql.ClauseElement]:
|
|
36
|
+
def sql_expr(self, _: SqlElementCache) -> Optional[sql.ClauseElement]:
|
|
36
37
|
return None
|
|
37
38
|
|
|
38
39
|
def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
|
pixeltable/exprs/row_builder.py
CHANGED
|
@@ -3,7 +3,7 @@ from __future__ import annotations
|
|
|
3
3
|
import sys
|
|
4
4
|
import time
|
|
5
5
|
from dataclasses import dataclass
|
|
6
|
-
from typing import
|
|
6
|
+
from typing import Any, Dict, Iterable, List, Optional, Sequence, Set, Tuple
|
|
7
7
|
|
|
8
8
|
import sqlalchemy as sql
|
|
9
9
|
|
|
@@ -11,6 +11,7 @@ import pixeltable.catalog as catalog
|
|
|
11
11
|
import pixeltable.exceptions as excs
|
|
12
12
|
import pixeltable.func as func
|
|
13
13
|
import pixeltable.utils as utils
|
|
14
|
+
|
|
14
15
|
from .data_row import DataRow
|
|
15
16
|
from .expr import Expr
|
|
16
17
|
from .expr_set import ExprSet
|
|
@@ -22,7 +23,7 @@ class ExecProfile:
|
|
|
22
23
|
self.eval_count = [0] * row_builder.num_materialized
|
|
23
24
|
self.row_builder = row_builder
|
|
24
25
|
|
|
25
|
-
def print(self, num_rows: int) ->
|
|
26
|
+
def print(self, num_rows: int) -> None:
|
|
26
27
|
for i in range(self.row_builder.num_materialized):
|
|
27
28
|
if self.eval_count[i] == 0:
|
|
28
29
|
continue
|
|
@@ -57,7 +58,7 @@ class RowBuilder:
|
|
|
57
58
|
target_exprs: List[Expr] # exprs corresponding to target_slot_idxs
|
|
58
59
|
|
|
59
60
|
def __init__(
|
|
60
|
-
self, output_exprs: Sequence[Expr], columns: Sequence[catalog.Column], input_exprs:
|
|
61
|
+
self, output_exprs: Sequence[Expr], columns: Sequence[catalog.Column], input_exprs: Iterable[Expr]
|
|
61
62
|
):
|
|
62
63
|
"""
|
|
63
64
|
Args:
|
|
@@ -96,7 +97,7 @@ class RowBuilder:
|
|
|
96
97
|
expr = ColumnRef(col)
|
|
97
98
|
expr = self._record_unique_expr(expr, recursive=False)
|
|
98
99
|
self.add_table_column(col, expr.slot_idx)
|
|
99
|
-
self.output_exprs.
|
|
100
|
+
self.output_exprs.add(expr)
|
|
100
101
|
|
|
101
102
|
# default eval ctx: all output exprs
|
|
102
103
|
self.default_eval_ctx = self.create_eval_ctx(list(self.output_exprs), exclude=unique_input_exprs)
|
|
@@ -193,7 +194,7 @@ class RowBuilder:
|
|
|
193
194
|
expr.components[i] = self._record_unique_expr(c, True)
|
|
194
195
|
assert expr.slot_idx is None
|
|
195
196
|
expr.slot_idx = self._next_slot_idx()
|
|
196
|
-
self.unique_exprs.
|
|
197
|
+
self.unique_exprs.add(expr)
|
|
197
198
|
return expr
|
|
198
199
|
|
|
199
200
|
def _record_output_expr_id(self, e: Expr, output_expr_id: int) -> None:
|
|
@@ -227,18 +228,25 @@ class RowBuilder:
|
|
|
227
228
|
# merge dependencies and convert to list
|
|
228
229
|
return sorted(set().union(*[dependencies[i] for i in target_slot_idxs]))
|
|
229
230
|
|
|
230
|
-
def
|
|
231
|
-
"""
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
231
|
+
def set_slot_idxs(self, expr_list: Sequence[Expr], remove_duplicates: bool = True) -> None:
|
|
232
|
+
"""
|
|
233
|
+
Recursively sets slot_idx in expr_list and its components
|
|
234
|
+
|
|
235
|
+
remove_duplicates == True: removes duplicates in-place
|
|
236
|
+
"""
|
|
237
|
+
for e in expr_list:
|
|
238
|
+
self.__set_slot_idxs_aux(e)
|
|
239
|
+
if remove_duplicates:
|
|
240
|
+
deduped = list(ExprSet(expr_list))
|
|
241
|
+
expr_list[:] = deduped
|
|
242
|
+
|
|
243
|
+
def __set_slot_idxs_aux(self, e: Expr) -> None:
|
|
244
|
+
"""Recursively sets slot_idx in e and its components"""
|
|
245
|
+
if e not in self.unique_exprs:
|
|
246
|
+
return
|
|
247
|
+
e.slot_idx = self.unique_exprs[e].slot_idx
|
|
248
|
+
for c in e.components:
|
|
249
|
+
self.__set_slot_idxs_aux(c)
|
|
242
250
|
|
|
243
251
|
def get_dependencies(self, targets: List[Expr], exclude: Optional[List[Expr]] = None) -> List[Expr]:
|
|
244
252
|
"""
|
pixeltable/exprs/rowid_ref.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
from typing import Optional, List, Any, Dict, Tuple
|
|
3
|
+
from .sql_element_cache import SqlElementCache
|
|
3
4
|
from uuid import UUID
|
|
4
5
|
|
|
5
6
|
import sqlalchemy as sql
|
|
@@ -72,7 +73,7 @@ class RowidRef(Expr):
|
|
|
72
73
|
self.tbl = tbl.tbl_version
|
|
73
74
|
self.tbl_id = self.tbl.id
|
|
74
75
|
|
|
75
|
-
def sql_expr(self) -> Optional[sql.ClauseElement]:
|
|
76
|
+
def sql_expr(self, _: SqlElementCache) -> Optional[sql.ClauseElement]:
|
|
76
77
|
tbl = self.tbl if self.tbl is not None else catalog.Catalog.get().tbl_versions[(self.tbl_id, None)]
|
|
77
78
|
rowid_cols = tbl.store_tbl.rowid_columns()
|
|
78
79
|
return rowid_cols[self.rowid_component_idx]
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
from typing import Optional, List, Any
|
|
2
|
+
from .sql_element_cache import SqlElementCache
|
|
2
3
|
|
|
3
4
|
import sqlalchemy as sql
|
|
4
5
|
import PIL.Image
|
|
@@ -56,7 +57,7 @@ class SimilarityExpr(Expr):
|
|
|
56
57
|
def __str__(self) -> str:
|
|
57
58
|
return f'{self.components[0]}.similarity({self.components[1]})'
|
|
58
59
|
|
|
59
|
-
def sql_expr(self) -> Optional[sql.ClauseElement]:
|
|
60
|
+
def sql_expr(self, _: SqlElementCache) -> Optional[sql.ClauseElement]:
|
|
60
61
|
if not isinstance(self.components[1], Literal):
|
|
61
62
|
raise excs.Error(f'similarity(): requires a string or a PIL.Image.Image object, not an expression')
|
|
62
63
|
item = self.components[1].val
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
from typing import Iterable, Union, Optional
|
|
2
|
+
|
|
3
|
+
import sqlalchemy as sql
|
|
4
|
+
|
|
5
|
+
from .expr import Expr
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class SqlElementCache:
|
|
9
|
+
"""Cache of sql.ColumnElements for exprs"""
|
|
10
|
+
|
|
11
|
+
cache: dict[int, Optional[sql.ColumnElement]] # key: Expr.id
|
|
12
|
+
|
|
13
|
+
def __init__(self):
|
|
14
|
+
self.cache = {}
|
|
15
|
+
|
|
16
|
+
def get(self, e: Expr) -> Optional[sql.ColumnElement]:
|
|
17
|
+
"""Returns the sql.ColumnElement for the given Expr, or None if Expr.to_sql() returns None."""
|
|
18
|
+
try:
|
|
19
|
+
return self.cache[e.id]
|
|
20
|
+
except KeyError:
|
|
21
|
+
pass
|
|
22
|
+
el = e.sql_expr(self)
|
|
23
|
+
self.cache[e.id] = el
|
|
24
|
+
return el
|
|
25
|
+
|
|
26
|
+
def contains(self, items: Union[Expr, Iterable[Expr]]) -> bool:
|
|
27
|
+
"""Returns True if every item has a (non-None) sql.ColumnElement."""
|
|
28
|
+
if isinstance(items, Expr):
|
|
29
|
+
return self.get(items) is not None
|
|
30
|
+
return all(self.get(e) is not None for e in items)
|
pixeltable/exprs/type_cast.py
CHANGED
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
import json
|
|
2
1
|
from typing import Optional, Dict, List, Tuple, Any
|
|
3
2
|
|
|
4
3
|
import sqlalchemy as sql
|
|
@@ -6,6 +5,7 @@ import sqlalchemy as sql
|
|
|
6
5
|
import pixeltable.type_system as ts
|
|
7
6
|
from .expr import DataRow, Expr
|
|
8
7
|
from .row_builder import RowBuilder
|
|
8
|
+
from .sql_element_cache import SqlElementCache
|
|
9
9
|
|
|
10
10
|
|
|
11
11
|
class TypeCast(Expr):
|
|
@@ -29,9 +29,9 @@ class TypeCast(Expr):
|
|
|
29
29
|
def _id_attrs(self) -> List[Tuple[str, Any]]:
|
|
30
30
|
return super()._id_attrs() + [('new_type', self.col_type)]
|
|
31
31
|
|
|
32
|
-
def sql_expr(self) -> Optional[sql.ClauseElement]:
|
|
32
|
+
def sql_expr(self, _: SqlElementCache) -> Optional[sql.ClauseElement]:
|
|
33
33
|
"""
|
|
34
|
-
|
|
34
|
+
sql_expr() is unimplemented for now, in order to sidestep potentially thorny
|
|
35
35
|
questions about consistency of doing type conversions in both Python and Postgres.
|
|
36
36
|
"""
|
|
37
37
|
return None
|
pixeltable/exprs/variable.py
CHANGED
|
@@ -6,6 +6,7 @@ import pixeltable.type_system as ts
|
|
|
6
6
|
from .data_row import DataRow
|
|
7
7
|
from .expr import Expr
|
|
8
8
|
from .row_builder import RowBuilder
|
|
9
|
+
from .sql_element_cache import SqlElementCache
|
|
9
10
|
|
|
10
11
|
|
|
11
12
|
class Variable(Expr):
|
|
@@ -31,7 +32,7 @@ class Variable(Expr):
|
|
|
31
32
|
def __str__(self) -> str:
|
|
32
33
|
return self.name
|
|
33
34
|
|
|
34
|
-
def sql_expr(self) -> NoReturn:
|
|
35
|
+
def sql_expr(self, _: SqlElementCache) -> NoReturn:
|
|
35
36
|
raise NotImplementedError()
|
|
36
37
|
|
|
37
38
|
def eval(self, data_row: DataRow, row_builder: RowBuilder) -> NoReturn:
|
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
from typing import
|
|
1
|
+
from typing import TYPE_CHECKING, Optional
|
|
2
2
|
|
|
3
3
|
from pixeltable.utils.code import local_public_names
|
|
4
4
|
|
|
5
5
|
if TYPE_CHECKING:
|
|
6
|
-
from whisperx.asr import FasterWhisperPipeline
|
|
6
|
+
from whisperx.asr import FasterWhisperPipeline # type: ignore[import-untyped]
|
|
7
7
|
|
|
8
8
|
import pixeltable as pxt
|
|
9
9
|
|
|
@@ -19,6 +19,8 @@ def transcribe(
|
|
|
19
19
|
equivalent to the WhisperX `transcribe` function, as described in the
|
|
20
20
|
[WhisperX library documentation](https://github.com/m-bain/whisperX).
|
|
21
21
|
|
|
22
|
+
WhisperX is part of the `pixeltable.ext` package: long-term support in Pixeltable is not guaranteed.
|
|
23
|
+
|
|
22
24
|
__Requirements:__
|
|
23
25
|
|
|
24
26
|
- `pip install whisperx`
|
|
@@ -40,7 +42,7 @@ def transcribe(
|
|
|
40
42
|
>>> tbl['result'] = transcribe(tbl.audio, model='tiny.en')
|
|
41
43
|
"""
|
|
42
44
|
import torch
|
|
43
|
-
import whisperx
|
|
45
|
+
import whisperx # type: ignore[import-untyped]
|
|
44
46
|
|
|
45
47
|
device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
|
46
48
|
compute_type = compute_type or ('float16' if device == 'cuda' else 'int8')
|
|
@@ -60,7 +62,7 @@ def _lookup_model(model_id: str, device: str, compute_type: str) -> 'FasterWhisp
|
|
|
60
62
|
return _model_cache[key]
|
|
61
63
|
|
|
62
64
|
|
|
63
|
-
_model_cache = {}
|
|
65
|
+
_model_cache: dict[tuple[str, str, str], 'FasterWhisperPipeline'] = {}
|
|
64
66
|
|
|
65
67
|
|
|
66
68
|
__all__ = local_public_names(__name__)
|