pixeltable 0.1.2__py3-none-any.whl → 0.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +21 -4
- pixeltable/catalog/__init__.py +13 -0
- pixeltable/catalog/catalog.py +159 -0
- pixeltable/catalog/column.py +200 -0
- pixeltable/catalog/dir.py +32 -0
- pixeltable/catalog/globals.py +33 -0
- pixeltable/catalog/insertable_table.py +191 -0
- pixeltable/catalog/named_function.py +36 -0
- pixeltable/catalog/path.py +58 -0
- pixeltable/catalog/path_dict.py +139 -0
- pixeltable/catalog/schema_object.py +39 -0
- pixeltable/catalog/table.py +581 -0
- pixeltable/catalog/table_version.py +749 -0
- pixeltable/catalog/table_version_path.py +133 -0
- pixeltable/catalog/view.py +203 -0
- pixeltable/client.py +520 -31
- pixeltable/dataframe.py +540 -349
- pixeltable/env.py +373 -48
- pixeltable/exceptions.py +12 -21
- pixeltable/exec/__init__.py +9 -0
- pixeltable/exec/aggregation_node.py +78 -0
- pixeltable/exec/cache_prefetch_node.py +113 -0
- pixeltable/exec/component_iteration_node.py +79 -0
- pixeltable/exec/data_row_batch.py +95 -0
- pixeltable/exec/exec_context.py +22 -0
- pixeltable/exec/exec_node.py +61 -0
- pixeltable/exec/expr_eval_node.py +217 -0
- pixeltable/exec/in_memory_data_node.py +69 -0
- pixeltable/exec/media_validation_node.py +43 -0
- pixeltable/exec/sql_scan_node.py +225 -0
- pixeltable/exprs/__init__.py +24 -0
- pixeltable/exprs/arithmetic_expr.py +102 -0
- pixeltable/exprs/array_slice.py +71 -0
- pixeltable/exprs/column_property_ref.py +77 -0
- pixeltable/exprs/column_ref.py +105 -0
- pixeltable/exprs/comparison.py +77 -0
- pixeltable/exprs/compound_predicate.py +98 -0
- pixeltable/exprs/data_row.py +187 -0
- pixeltable/exprs/expr.py +586 -0
- pixeltable/exprs/expr_set.py +39 -0
- pixeltable/exprs/function_call.py +380 -0
- pixeltable/exprs/globals.py +69 -0
- pixeltable/exprs/image_member_access.py +115 -0
- pixeltable/exprs/image_similarity_predicate.py +58 -0
- pixeltable/exprs/inline_array.py +107 -0
- pixeltable/exprs/inline_dict.py +101 -0
- pixeltable/exprs/is_null.py +38 -0
- pixeltable/exprs/json_mapper.py +121 -0
- pixeltable/exprs/json_path.py +159 -0
- pixeltable/exprs/literal.py +54 -0
- pixeltable/exprs/object_ref.py +41 -0
- pixeltable/exprs/predicate.py +44 -0
- pixeltable/exprs/row_builder.py +355 -0
- pixeltable/exprs/rowid_ref.py +94 -0
- pixeltable/exprs/type_cast.py +53 -0
- pixeltable/exprs/variable.py +45 -0
- pixeltable/func/__init__.py +9 -0
- pixeltable/func/aggregate_function.py +194 -0
- pixeltable/func/batched_function.py +53 -0
- pixeltable/func/callable_function.py +69 -0
- pixeltable/func/expr_template_function.py +82 -0
- pixeltable/func/function.py +110 -0
- pixeltable/func/function_registry.py +227 -0
- pixeltable/func/globals.py +36 -0
- pixeltable/func/nos_function.py +202 -0
- pixeltable/func/signature.py +166 -0
- pixeltable/func/udf.py +163 -0
- pixeltable/functions/__init__.py +52 -103
- pixeltable/functions/eval.py +216 -0
- pixeltable/functions/fireworks.py +61 -0
- pixeltable/functions/huggingface.py +120 -0
- pixeltable/functions/image.py +16 -0
- pixeltable/functions/openai.py +88 -0
- pixeltable/functions/pil/image.py +148 -7
- pixeltable/functions/string.py +13 -0
- pixeltable/functions/together.py +27 -0
- pixeltable/functions/util.py +41 -0
- pixeltable/functions/video.py +62 -0
- pixeltable/iterators/__init__.py +3 -0
- pixeltable/iterators/base.py +48 -0
- pixeltable/iterators/document.py +311 -0
- pixeltable/iterators/video.py +89 -0
- pixeltable/metadata/__init__.py +54 -0
- pixeltable/metadata/converters/convert_10.py +18 -0
- pixeltable/metadata/schema.py +211 -0
- pixeltable/plan.py +656 -0
- pixeltable/store.py +413 -182
- pixeltable/tests/conftest.py +143 -86
- pixeltable/tests/test_audio.py +65 -0
- pixeltable/tests/test_catalog.py +27 -0
- pixeltable/tests/test_client.py +14 -14
- pixeltable/tests/test_component_view.py +372 -0
- pixeltable/tests/test_dataframe.py +433 -0
- pixeltable/tests/test_dirs.py +78 -62
- pixeltable/tests/test_document.py +117 -0
- pixeltable/tests/test_exprs.py +591 -135
- pixeltable/tests/test_function.py +297 -67
- pixeltable/tests/test_functions.py +283 -1
- pixeltable/tests/test_migration.py +43 -0
- pixeltable/tests/test_nos.py +54 -0
- pixeltable/tests/test_snapshot.py +208 -0
- pixeltable/tests/test_table.py +1086 -258
- pixeltable/tests/test_transactional_directory.py +42 -0
- pixeltable/tests/test_types.py +5 -11
- pixeltable/tests/test_video.py +149 -34
- pixeltable/tests/test_view.py +530 -0
- pixeltable/tests/utils.py +186 -45
- pixeltable/tool/create_test_db_dump.py +149 -0
- pixeltable/type_system.py +490 -133
- pixeltable/utils/__init__.py +17 -46
- pixeltable/utils/clip.py +12 -15
- pixeltable/utils/coco.py +136 -0
- pixeltable/utils/documents.py +39 -0
- pixeltable/utils/filecache.py +195 -0
- pixeltable/utils/help.py +11 -0
- pixeltable/utils/media_store.py +76 -0
- pixeltable/utils/parquet.py +126 -0
- pixeltable/utils/pytorch.py +172 -0
- pixeltable/utils/s3.py +13 -0
- pixeltable/utils/sql.py +17 -0
- pixeltable/utils/transactional_directory.py +35 -0
- pixeltable-0.2.1.dist-info/LICENSE +18 -0
- pixeltable-0.2.1.dist-info/METADATA +119 -0
- pixeltable-0.2.1.dist-info/RECORD +125 -0
- {pixeltable-0.1.2.dist-info → pixeltable-0.2.1.dist-info}/WHEEL +1 -1
- pixeltable/catalog.py +0 -1421
- pixeltable/exprs.py +0 -1745
- pixeltable/function.py +0 -269
- pixeltable/functions/clip.py +0 -10
- pixeltable/functions/pil/__init__.py +0 -23
- pixeltable/functions/tf.py +0 -21
- pixeltable/index.py +0 -57
- pixeltable/tests/test_dict.py +0 -24
- pixeltable/tests/test_tf.py +0 -69
- pixeltable/tf.py +0 -33
- pixeltable/utils/tf.py +0 -33
- pixeltable/utils/video.py +0 -32
- pixeltable-0.1.2.dist-info/LICENSE +0 -201
- pixeltable-0.1.2.dist-info/METADATA +0 -89
- pixeltable-0.1.2.dist-info/RECORD +0 -37
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
from typing import Iterable, Optional
|
|
3
|
+
|
|
4
|
+
from .data_row_batch import DataRowBatch
|
|
5
|
+
from .exec_node import ExecNode
|
|
6
|
+
import pixeltable.exprs as exprs
|
|
7
|
+
import pixeltable.exceptions as excs
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class MediaValidationNode(ExecNode):
|
|
11
|
+
"""Validation of selected media slots
|
|
12
|
+
Records exceptions in the rows of the input batch
|
|
13
|
+
"""
|
|
14
|
+
def __init__(
|
|
15
|
+
self, row_builder: exprs.RowBuilder, media_slots: Iterable[exprs.ColumnSlotIdx],
|
|
16
|
+
input: Optional[ExecNode]):
|
|
17
|
+
super().__init__(row_builder, [], [], input)
|
|
18
|
+
self.row_builder = row_builder
|
|
19
|
+
self.input = input
|
|
20
|
+
for col in [c.col for c in media_slots]:
|
|
21
|
+
assert col.col_type.is_media_type()
|
|
22
|
+
self.media_slots = media_slots
|
|
23
|
+
|
|
24
|
+
def __next__(self) -> DataRowBatch:
|
|
25
|
+
assert self.input is not None
|
|
26
|
+
row_batch = next(self.input)
|
|
27
|
+
for row in row_batch:
|
|
28
|
+
for slot_idx, col in [(c.slot_idx, c.col) for c in self.media_slots]:
|
|
29
|
+
if row.has_exc(slot_idx):
|
|
30
|
+
continue
|
|
31
|
+
assert row.has_val[slot_idx]
|
|
32
|
+
path = row.file_paths[slot_idx]
|
|
33
|
+
if path is None:
|
|
34
|
+
continue
|
|
35
|
+
|
|
36
|
+
try:
|
|
37
|
+
col.col_type.validate_media(path)
|
|
38
|
+
except excs.Error as exc:
|
|
39
|
+
self.row_builder.set_exc(row, slot_idx, exc)
|
|
40
|
+
if not self.ctx.ignore_errors:
|
|
41
|
+
raise exc
|
|
42
|
+
|
|
43
|
+
return row_batch
|
|
@@ -0,0 +1,225 @@
|
|
|
1
|
+
from typing import List, Optional, Tuple, Iterable, Set
|
|
2
|
+
from uuid import UUID
|
|
3
|
+
import logging
|
|
4
|
+
import warnings
|
|
5
|
+
|
|
6
|
+
import sqlalchemy as sql
|
|
7
|
+
|
|
8
|
+
from .data_row_batch import DataRowBatch
|
|
9
|
+
from .exec_node import ExecNode
|
|
10
|
+
import pixeltable.exprs as exprs
|
|
11
|
+
import pixeltable.catalog as catalog
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
_logger = logging.getLogger('pixeltable')
|
|
15
|
+
|
|
16
|
+
class SqlScanNode(ExecNode):
|
|
17
|
+
"""Materializes data from the store via SQL
|
|
18
|
+
"""
|
|
19
|
+
def __init__(
|
|
20
|
+
self, tbl: catalog.TableVersionPath, row_builder: exprs.RowBuilder,
|
|
21
|
+
select_list: Iterable[exprs.Expr],
|
|
22
|
+
where_clause: Optional[exprs.Expr] = None, filter: Optional[exprs.Predicate] = None,
|
|
23
|
+
order_by_items: Optional[List[Tuple[exprs.Expr, bool]]] = None,
|
|
24
|
+
similarity_clause: Optional[exprs.ImageSimilarityPredicate] = None,
|
|
25
|
+
limit: int = 0, set_pk: bool = False, exact_version_only: Optional[List[catalog.TableVersion]] = None
|
|
26
|
+
):
|
|
27
|
+
"""
|
|
28
|
+
Args:
|
|
29
|
+
select_list: output of the query
|
|
30
|
+
sql_where_clause: SQL Where clause
|
|
31
|
+
filter: additional Where-clause predicate that can't be evaluated via SQL
|
|
32
|
+
limit: max number of rows to return: 0 = no limit
|
|
33
|
+
set_pk: if True, sets the primary for each DataRow
|
|
34
|
+
exact_version_only: tables for which we only want to see rows created at the current version
|
|
35
|
+
"""
|
|
36
|
+
# create Select stmt
|
|
37
|
+
if order_by_items is None:
|
|
38
|
+
order_by_items = []
|
|
39
|
+
if exact_version_only is None:
|
|
40
|
+
exact_version_only = []
|
|
41
|
+
super().__init__(row_builder, [], [], None)
|
|
42
|
+
self.tbl = tbl
|
|
43
|
+
target = tbl.tbl_version # the stored table we're scanning
|
|
44
|
+
self.sql_exprs = exprs.ExprSet(select_list)
|
|
45
|
+
# unstored iter columns: we also need to retrieve whatever is needed to materialize the iter args
|
|
46
|
+
for iter_arg in row_builder.unstored_iter_args.values():
|
|
47
|
+
sql_subexprs = iter_arg.subexprs(filter=lambda e: e.sql_expr() is not None, traverse_matches=False)
|
|
48
|
+
[self.sql_exprs.append(e) for e in sql_subexprs]
|
|
49
|
+
self.filter = filter
|
|
50
|
+
self.filter_eval_ctx = \
|
|
51
|
+
row_builder.create_eval_ctx([filter], exclude=select_list) if filter is not None else None
|
|
52
|
+
self.limit = limit
|
|
53
|
+
|
|
54
|
+
# change rowid refs against a base table to rowid refs against the target table, so that we minimize
|
|
55
|
+
# the number of tables that need to be joined to the target table
|
|
56
|
+
for rowid_ref in [e for e in self.sql_exprs if isinstance(e, exprs.RowidRef)]:
|
|
57
|
+
rowid_ref.set_tbl(tbl)
|
|
58
|
+
|
|
59
|
+
where_clause_tbl_ids = where_clause.tbl_ids() if where_clause is not None else set()
|
|
60
|
+
refd_tbl_ids = exprs.Expr.list_tbl_ids(self.sql_exprs) | where_clause_tbl_ids
|
|
61
|
+
sql_select_list = [e.sql_expr() for e in self.sql_exprs]
|
|
62
|
+
assert len(sql_select_list) == len(self.sql_exprs)
|
|
63
|
+
assert all([e is not None for e in sql_select_list])
|
|
64
|
+
self.set_pk = set_pk
|
|
65
|
+
self.num_pk_cols = 0
|
|
66
|
+
if set_pk:
|
|
67
|
+
# we also need to retrieve the pk columns
|
|
68
|
+
pk_columns = target.store_tbl.pk_columns()
|
|
69
|
+
self.num_pk_cols = len(pk_columns)
|
|
70
|
+
sql_select_list += pk_columns
|
|
71
|
+
|
|
72
|
+
self.stmt = sql.select(*sql_select_list)
|
|
73
|
+
self.stmt = self.create_from_clause(
|
|
74
|
+
tbl, self.stmt, refd_tbl_ids, exact_version_only={t.id for t in exact_version_only})
|
|
75
|
+
|
|
76
|
+
# change rowid refs against a base table to rowid refs against the target table, so that we minimize
|
|
77
|
+
# the number of tables that need to be joined to the target table
|
|
78
|
+
for rowid_ref in [e for e, _ in order_by_items if isinstance(e, exprs.RowidRef)]:
|
|
79
|
+
rowid_ref.set_tbl(tbl)
|
|
80
|
+
order_by_clause = [e.sql_expr().desc() if not asc else e.sql_expr() for e, asc in order_by_items]
|
|
81
|
+
|
|
82
|
+
if where_clause is not None:
|
|
83
|
+
sql_where_clause = where_clause.sql_expr()
|
|
84
|
+
assert sql_where_clause is not None
|
|
85
|
+
self.stmt = self.stmt.where(sql_where_clause)
|
|
86
|
+
if similarity_clause is not None:
|
|
87
|
+
self.stmt = self.stmt.order_by(
|
|
88
|
+
similarity_clause.img_col_ref.col.sa_idx_col.l2_distance(similarity_clause.embedding()))
|
|
89
|
+
if len(order_by_clause) > 0:
|
|
90
|
+
self.stmt = self.stmt.order_by(*order_by_clause)
|
|
91
|
+
elif target.id in row_builder.unstored_iter_args:
|
|
92
|
+
# we are referencing unstored iter columns from this view and try to order by our primary key,
|
|
93
|
+
# which ensures that iterators will see monotonically increasing pos values
|
|
94
|
+
self.stmt = self.stmt.order_by(*self.tbl.store_tbl.rowid_columns())
|
|
95
|
+
if limit != 0 and self.filter is None:
|
|
96
|
+
# if we need to do post-SQL filtering, we can't use LIMIT
|
|
97
|
+
self.stmt = self.stmt.limit(limit)
|
|
98
|
+
|
|
99
|
+
self.result_cursor: Optional[sql.engine.CursorResult] = None
|
|
100
|
+
|
|
101
|
+
try:
|
|
102
|
+
# log stmt, if possible
|
|
103
|
+
stmt_str = str(self.stmt.compile(compile_kwargs={'literal_binds': True}))
|
|
104
|
+
_logger.debug(f'SqlScanNode stmt:\n{stmt_str}')
|
|
105
|
+
except Exception as e:
|
|
106
|
+
pass
|
|
107
|
+
|
|
108
|
+
@classmethod
|
|
109
|
+
def create_from_clause(
|
|
110
|
+
cls, tbl: catalog.TableVersionPath, stmt: sql.Select, refd_tbl_ids: Optional[Set[UUID]] = None,
|
|
111
|
+
exact_version_only: Optional[Set[UUID]] = None
|
|
112
|
+
) -> sql.Select:
|
|
113
|
+
"""Add From clause to stmt for tables/views referenced by materialized_exprs
|
|
114
|
+
Args:
|
|
115
|
+
tbl: root table of join chain
|
|
116
|
+
stmt: stmt to add From clause to
|
|
117
|
+
materialized_exprs: list of exprs that reference tables in the join chain; if empty, include only the root
|
|
118
|
+
exact_version_only: set of table ids for which we only want to see rows created at the current version
|
|
119
|
+
Returns:
|
|
120
|
+
augmented stmt
|
|
121
|
+
"""
|
|
122
|
+
# we need to include at least the root
|
|
123
|
+
if refd_tbl_ids is None:
|
|
124
|
+
refd_tbl_ids = {}
|
|
125
|
+
if exact_version_only is None:
|
|
126
|
+
exact_version_only = {}
|
|
127
|
+
candidates = tbl.get_tbl_versions()
|
|
128
|
+
assert len(candidates) > 0
|
|
129
|
+
joined_tbls: List[catalog.TableVersion] = [candidates[0]]
|
|
130
|
+
for tbl in candidates[1:]:
|
|
131
|
+
if tbl.id in refd_tbl_ids:
|
|
132
|
+
joined_tbls.append(tbl)
|
|
133
|
+
|
|
134
|
+
first = True
|
|
135
|
+
for tbl in joined_tbls[::-1]:
|
|
136
|
+
if first:
|
|
137
|
+
stmt = stmt.select_from(tbl.store_tbl.sa_tbl)
|
|
138
|
+
first = False
|
|
139
|
+
else:
|
|
140
|
+
# join tbl to prev_tbl on prev_tbl's rowid cols
|
|
141
|
+
prev_tbl_rowid_cols = prev_tbl.store_tbl.rowid_columns()
|
|
142
|
+
tbl_rowid_cols = tbl.store_tbl.rowid_columns()
|
|
143
|
+
rowid_clauses = \
|
|
144
|
+
[c1 == c2 for c1, c2 in zip(prev_tbl_rowid_cols, tbl_rowid_cols[:len(prev_tbl_rowid_cols)])]
|
|
145
|
+
stmt = stmt.join(tbl.store_tbl.sa_tbl, sql.and_(*rowid_clauses))
|
|
146
|
+
if tbl.id in exact_version_only:
|
|
147
|
+
stmt = stmt.where(tbl.store_tbl.v_min_col == tbl.version)
|
|
148
|
+
else:
|
|
149
|
+
stmt = stmt \
|
|
150
|
+
.where(tbl.store_tbl.v_min_col <= tbl.version) \
|
|
151
|
+
.where(tbl.store_tbl.v_max_col > tbl.version)
|
|
152
|
+
prev_tbl = tbl
|
|
153
|
+
return stmt
|
|
154
|
+
|
|
155
|
+
def _log_explain(self, conn: sql.engine.Connection) -> None:
|
|
156
|
+
try:
|
|
157
|
+
# don't set dialect=Env.get().engine.dialect: x % y turns into x %% y, which results in a syntax error
|
|
158
|
+
stmt_str = str(self.stmt.compile(compile_kwargs={'literal_binds': True}))
|
|
159
|
+
explain_result = self.ctx.conn.execute(sql.text(f'EXPLAIN {stmt_str}'))
|
|
160
|
+
explain_str = '\n'.join([str(row) for row in explain_result])
|
|
161
|
+
_logger.debug(f'SqlScanNode explain:\n{explain_str}')
|
|
162
|
+
except Exception as e:
|
|
163
|
+
_logger.warning(f'EXPLAIN failed')
|
|
164
|
+
|
|
165
|
+
def __next__(self) -> DataRowBatch:
|
|
166
|
+
if self.result_cursor is None:
|
|
167
|
+
# run the query; do this here rather than in _open(), exceptions are only expected during iteration
|
|
168
|
+
assert self.ctx.conn is not None
|
|
169
|
+
try:
|
|
170
|
+
self._log_explain(self.ctx.conn)
|
|
171
|
+
with warnings.catch_warnings(record=True) as w:
|
|
172
|
+
self.result_cursor = self.ctx.conn.execute(self.stmt)
|
|
173
|
+
for warning in w:
|
|
174
|
+
pass
|
|
175
|
+
self.has_more_rows = True
|
|
176
|
+
except Exception as e:
|
|
177
|
+
self.has_more_rows = False
|
|
178
|
+
raise e
|
|
179
|
+
|
|
180
|
+
if not self.has_more_rows:
|
|
181
|
+
raise StopIteration
|
|
182
|
+
|
|
183
|
+
output_batch = DataRowBatch(self.tbl.tbl_version, self.row_builder)
|
|
184
|
+
needs_row = True
|
|
185
|
+
while self.ctx.batch_size == 0 or len(output_batch) < self.ctx.batch_size:
|
|
186
|
+
try:
|
|
187
|
+
sql_row = next(self.result_cursor)
|
|
188
|
+
except StopIteration:
|
|
189
|
+
self.has_more_rows = False
|
|
190
|
+
break
|
|
191
|
+
|
|
192
|
+
if needs_row:
|
|
193
|
+
output_row = output_batch.add_row()
|
|
194
|
+
if self.num_pk_cols > 0:
|
|
195
|
+
output_row.set_pk(tuple(sql_row[-self.num_pk_cols:]))
|
|
196
|
+
# copy the output of the SQL query into the output row
|
|
197
|
+
for i, e in enumerate(self.sql_exprs):
|
|
198
|
+
slot_idx = e.slot_idx
|
|
199
|
+
output_row[slot_idx] = sql_row[i]
|
|
200
|
+
if self.filter is not None:
|
|
201
|
+
self.row_builder.eval(output_row, self.filter_eval_ctx, profile=self.ctx.profile)
|
|
202
|
+
if output_row[self.filter.slot_idx]:
|
|
203
|
+
needs_row = True
|
|
204
|
+
if self.limit is not None and len(output_batch) >= self.limit:
|
|
205
|
+
self.has_more_rows = False
|
|
206
|
+
break
|
|
207
|
+
else:
|
|
208
|
+
# we re-use this row for the next sql row if it didn't pass the filter
|
|
209
|
+
needs_row = False
|
|
210
|
+
output_row.clear()
|
|
211
|
+
|
|
212
|
+
if not needs_row:
|
|
213
|
+
# the last row didn't pass the filter
|
|
214
|
+
assert self.filter is not None
|
|
215
|
+
output_batch.pop_row()
|
|
216
|
+
|
|
217
|
+
_logger.debug(f'SqlScanNode: returning {len(output_batch)} rows')
|
|
218
|
+
if len(output_batch) == 0:
|
|
219
|
+
raise StopIteration
|
|
220
|
+
return output_batch
|
|
221
|
+
|
|
222
|
+
def _close(self) -> None:
|
|
223
|
+
if self.result_cursor is not None:
|
|
224
|
+
self.result_cursor.close()
|
|
225
|
+
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
from .arithmetic_expr import ArithmeticExpr
|
|
2
|
+
from .array_slice import ArraySlice
|
|
3
|
+
from .column_property_ref import ColumnPropertyRef
|
|
4
|
+
from .column_ref import ColumnRef
|
|
5
|
+
from .comparison import Comparison
|
|
6
|
+
from .compound_predicate import CompoundPredicate
|
|
7
|
+
from .data_row import DataRow
|
|
8
|
+
from .expr import Expr
|
|
9
|
+
from .function_call import FunctionCall
|
|
10
|
+
from .image_member_access import ImageMemberAccess
|
|
11
|
+
from .image_similarity_predicate import ImageSimilarityPredicate
|
|
12
|
+
from .inline_array import InlineArray
|
|
13
|
+
from .inline_dict import InlineDict
|
|
14
|
+
from .is_null import IsNull
|
|
15
|
+
from .json_mapper import JsonMapper
|
|
16
|
+
from .json_path import RELATIVE_PATH_ROOT, JsonPath
|
|
17
|
+
from .literal import Literal
|
|
18
|
+
from .object_ref import ObjectRef
|
|
19
|
+
from .variable import Variable
|
|
20
|
+
from .predicate import Predicate
|
|
21
|
+
from .row_builder import RowBuilder, ColumnSlotIdx, ExecProfile
|
|
22
|
+
from .rowid_ref import RowidRef
|
|
23
|
+
from .expr_set import ExprSet
|
|
24
|
+
from .type_cast import TypeCast
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
from typing import Optional, List, Any, Dict, Tuple
|
|
3
|
+
|
|
4
|
+
import sqlalchemy as sql
|
|
5
|
+
|
|
6
|
+
from .globals import ArithmeticOperator
|
|
7
|
+
from .expr import Expr
|
|
8
|
+
from .data_row import DataRow
|
|
9
|
+
from .row_builder import RowBuilder
|
|
10
|
+
import pixeltable.exceptions as excs
|
|
11
|
+
import pixeltable.catalog as catalog
|
|
12
|
+
import pixeltable.type_system as ts
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class ArithmeticExpr(Expr):
|
|
16
|
+
"""
|
|
17
|
+
Allows arithmetic exprs on json paths
|
|
18
|
+
"""
|
|
19
|
+
def __init__(self, operator: ArithmeticOperator, op1: Expr, op2: Expr):
|
|
20
|
+
# TODO: determine most specific common supertype
|
|
21
|
+
if op1.col_type.is_json_type() or op2.col_type.is_json_type():
|
|
22
|
+
# we assume it's a float
|
|
23
|
+
super().__init__(ts.FloatType())
|
|
24
|
+
else:
|
|
25
|
+
super().__init__(ts.ColumnType.supertype(op1.col_type, op2.col_type))
|
|
26
|
+
self.operator = operator
|
|
27
|
+
self.components = [op1, op2]
|
|
28
|
+
|
|
29
|
+
# do typechecking after initialization in order for __str__() to work
|
|
30
|
+
if not op1.col_type.is_numeric_type() and not op1.col_type.is_json_type():
|
|
31
|
+
raise excs.Error(f'{self}: {operator} requires numeric types, but {op1} has type {op1.col_type}')
|
|
32
|
+
if not op2.col_type.is_numeric_type() and not op2.col_type.is_json_type():
|
|
33
|
+
raise excs.Error(f'{self}: {operator} requires numeric types, but {op2} has type {op2.col_type}')
|
|
34
|
+
|
|
35
|
+
self.id = self._create_id()
|
|
36
|
+
|
|
37
|
+
def __str__(self) -> str:
|
|
38
|
+
# add parentheses around operands that are ArithmeticExprs to express precedence
|
|
39
|
+
op1_str = f'({self._op1})' if isinstance(self._op1, ArithmeticExpr) else str(self._op1)
|
|
40
|
+
op2_str = f'({self._op2})' if isinstance(self._op2, ArithmeticExpr) else str(self._op2)
|
|
41
|
+
return f'{op1_str} {str(self.operator)} {op2_str}'
|
|
42
|
+
|
|
43
|
+
def _equals(self, other: ArithmeticExpr) -> bool:
|
|
44
|
+
return self.operator == other.operator
|
|
45
|
+
|
|
46
|
+
def _id_attrs(self) -> List[Tuple[str, Any]]:
|
|
47
|
+
return super()._id_attrs() + [('operator', self.operator.value)]
|
|
48
|
+
|
|
49
|
+
@property
|
|
50
|
+
def _op1(self) -> Expr:
|
|
51
|
+
return self.components[0]
|
|
52
|
+
|
|
53
|
+
@property
|
|
54
|
+
def _op2(self) -> Expr:
|
|
55
|
+
return self.components[1]
|
|
56
|
+
|
|
57
|
+
def sql_expr(self) -> Optional[sql.ClauseElement]:
|
|
58
|
+
left = self._op1.sql_expr()
|
|
59
|
+
right = self._op2.sql_expr()
|
|
60
|
+
if left is None or right is None:
|
|
61
|
+
return None
|
|
62
|
+
if self.operator == ArithmeticOperator.ADD:
|
|
63
|
+
return left + right
|
|
64
|
+
if self.operator == ArithmeticOperator.SUB:
|
|
65
|
+
return left - right
|
|
66
|
+
if self.operator == ArithmeticOperator.MUL:
|
|
67
|
+
return left * right
|
|
68
|
+
if self.operator == ArithmeticOperator.DIV:
|
|
69
|
+
return left / right
|
|
70
|
+
if self.operator == ArithmeticOperator.MOD:
|
|
71
|
+
return left % right
|
|
72
|
+
|
|
73
|
+
def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
|
|
74
|
+
op1_val = data_row[self._op1.slot_idx]
|
|
75
|
+
op2_val = data_row[self._op2.slot_idx]
|
|
76
|
+
# check types if we couldn't do that prior to execution
|
|
77
|
+
if self._op1.col_type.is_json_type() and not isinstance(op1_val, int) and not isinstance(op1_val, float):
|
|
78
|
+
raise excs.Error(
|
|
79
|
+
f'{self.operator} requires numeric type, but {self._op1} has type {type(op1_val).__name__}')
|
|
80
|
+
if self._op2.col_type.is_json_type() and not isinstance(op2_val, int) and not isinstance(op2_val, float):
|
|
81
|
+
raise excs.Error(
|
|
82
|
+
f'{self.operator} requires numeric type, but {self._op2} has type {type(op2_val).__name__}')
|
|
83
|
+
|
|
84
|
+
if self.operator == ArithmeticOperator.ADD:
|
|
85
|
+
data_row[self.slot_idx] = op1_val + op2_val
|
|
86
|
+
elif self.operator == ArithmeticOperator.SUB:
|
|
87
|
+
data_row[self.slot_idx] = op1_val - op2_val
|
|
88
|
+
elif self.operator == ArithmeticOperator.MUL:
|
|
89
|
+
data_row[self.slot_idx] = op1_val * op2_val
|
|
90
|
+
elif self.operator == ArithmeticOperator.DIV:
|
|
91
|
+
data_row[self.slot_idx] = op1_val / op2_val
|
|
92
|
+
elif self.operator == ArithmeticOperator.MOD:
|
|
93
|
+
data_row[self.slot_idx] = op1_val % op2_val
|
|
94
|
+
|
|
95
|
+
def _as_dict(self) -> Dict:
|
|
96
|
+
return {'operator': self.operator.value, **super()._as_dict()}
|
|
97
|
+
|
|
98
|
+
@classmethod
|
|
99
|
+
def _from_dict(cls, d: Dict, components: List[Expr]) -> Expr:
|
|
100
|
+
assert 'operator' in d
|
|
101
|
+
assert len(components) == 2
|
|
102
|
+
return cls(ArithmeticOperator(d['operator']), components[0], components[1])
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
from typing import Optional, List, Any, Dict, Tuple
|
|
3
|
+
import copy
|
|
4
|
+
|
|
5
|
+
import sqlalchemy as sql
|
|
6
|
+
|
|
7
|
+
from .expr import Expr
|
|
8
|
+
from .globals import print_slice
|
|
9
|
+
from .data_row import DataRow
|
|
10
|
+
from .row_builder import RowBuilder
|
|
11
|
+
import pixeltable.catalog as catalog
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class ArraySlice(Expr):
|
|
15
|
+
"""
|
|
16
|
+
Slice operation on an array, eg, t.array_col[:, 1:2].
|
|
17
|
+
"""
|
|
18
|
+
def __init__(self, arr: Expr, index: Tuple):
|
|
19
|
+
assert arr.col_type.is_array_type()
|
|
20
|
+
# determine result type
|
|
21
|
+
super().__init__(arr.col_type)
|
|
22
|
+
self.components = [arr]
|
|
23
|
+
self.index = index
|
|
24
|
+
self.id = self._create_id()
|
|
25
|
+
|
|
26
|
+
def __str__(self) -> str:
|
|
27
|
+
index_strs: List[str] = []
|
|
28
|
+
for el in self.index:
|
|
29
|
+
if isinstance(el, int):
|
|
30
|
+
index_strs.append(str(el))
|
|
31
|
+
if isinstance(el, slice):
|
|
32
|
+
index_strs.append(print_slice(el))
|
|
33
|
+
return f'{self._array}[{", ".join(index_strs)}]'
|
|
34
|
+
|
|
35
|
+
@property
|
|
36
|
+
def _array(self) -> Expr:
|
|
37
|
+
return self.components[0]
|
|
38
|
+
|
|
39
|
+
def _equals(self, other: ArraySlice) -> bool:
|
|
40
|
+
return self.index == other.index
|
|
41
|
+
|
|
42
|
+
def _id_attrs(self) -> List[Tuple[str, Any]]:
|
|
43
|
+
return super()._id_attrs() + [('index', self.index)]
|
|
44
|
+
|
|
45
|
+
def sql_expr(self) -> Optional[sql.ClauseElement]:
|
|
46
|
+
return None
|
|
47
|
+
|
|
48
|
+
def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
|
|
49
|
+
val = data_row[self._array.slot_idx]
|
|
50
|
+
data_row[self.slot_idx] = val[self.index]
|
|
51
|
+
|
|
52
|
+
def _as_dict(self) -> Dict:
|
|
53
|
+
index = []
|
|
54
|
+
for el in self.index:
|
|
55
|
+
if isinstance(el, slice):
|
|
56
|
+
index.append([el.start, el.stop, el.step])
|
|
57
|
+
else:
|
|
58
|
+
index.append(el)
|
|
59
|
+
return {'index': index, **super()._as_dict()}
|
|
60
|
+
|
|
61
|
+
@classmethod
|
|
62
|
+
def _from_dict(cls, d: Dict, components: List[Expr]) -> Expr:
|
|
63
|
+
assert 'index' in d
|
|
64
|
+
index = []
|
|
65
|
+
for el in d['index']:
|
|
66
|
+
if isinstance(el, list):
|
|
67
|
+
index.append(slice(el[0], el[1], el[2]))
|
|
68
|
+
else:
|
|
69
|
+
index.append(el)
|
|
70
|
+
return cls(components[0], tuple(index))
|
|
71
|
+
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
from typing import Optional, List, Any, Dict, Tuple
|
|
3
|
+
import enum
|
|
4
|
+
|
|
5
|
+
import sqlalchemy as sql
|
|
6
|
+
|
|
7
|
+
from .expr import Expr
|
|
8
|
+
from .column_ref import ColumnRef
|
|
9
|
+
from .row_builder import RowBuilder
|
|
10
|
+
from .data_row import DataRow
|
|
11
|
+
import pixeltable.catalog as catalog
|
|
12
|
+
import pixeltable.type_system as ts
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class ColumnPropertyRef(Expr):
|
|
16
|
+
"""A reference to a property of a table column
|
|
17
|
+
|
|
18
|
+
The properties themselves are type-specific and may or may not need to reference the underlying column data.
|
|
19
|
+
"""
|
|
20
|
+
class Property(enum.Enum):
|
|
21
|
+
ERRORTYPE = 0
|
|
22
|
+
ERRORMSG = 1
|
|
23
|
+
FILEURL = 2
|
|
24
|
+
LOCALPATH = 3
|
|
25
|
+
|
|
26
|
+
def __init__(self, col_ref: ColumnRef, prop: Property):
|
|
27
|
+
super().__init__(ts.StringType(nullable=True))
|
|
28
|
+
self.components = [col_ref]
|
|
29
|
+
self.prop = prop
|
|
30
|
+
self.id = self._create_id()
|
|
31
|
+
|
|
32
|
+
def default_column_name(self) -> Optional[str]:
|
|
33
|
+
return str(self).replace('.', '_')
|
|
34
|
+
|
|
35
|
+
def _equals(self, other: ColumnRef) -> bool:
|
|
36
|
+
return self.prop == other.prop
|
|
37
|
+
|
|
38
|
+
def _id_attrs(self) -> List[Tuple[str, Any]]:
|
|
39
|
+
return super()._id_attrs() + [('prop', self.prop.value)]
|
|
40
|
+
|
|
41
|
+
@property
|
|
42
|
+
def _col_ref(self) -> ColumnRef:
|
|
43
|
+
return self.components[0]
|
|
44
|
+
|
|
45
|
+
def __str__(self) -> str:
|
|
46
|
+
return f'{self._col_ref}.{self.prop.name.lower()}'
|
|
47
|
+
|
|
48
|
+
def sql_expr(self) -> Optional[sql.ClauseElement]:
|
|
49
|
+
if not self._col_ref.col.is_stored:
|
|
50
|
+
return None
|
|
51
|
+
if self.prop == self.Property.ERRORTYPE:
|
|
52
|
+
assert self._col_ref.col.sa_errortype_col is not None
|
|
53
|
+
return self._col_ref.col.sa_errortype_col
|
|
54
|
+
if self.prop == self.Property.ERRORMSG:
|
|
55
|
+
assert self._col_ref.col.sa_errormsg_col is not None
|
|
56
|
+
return self._col_ref.col.sa_errormsg_col
|
|
57
|
+
if self.prop == self.Property.FILEURL:
|
|
58
|
+
# the file url is stored as the column value
|
|
59
|
+
return self._col_ref.sql_expr()
|
|
60
|
+
return None
|
|
61
|
+
|
|
62
|
+
def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
|
|
63
|
+
assert self.prop == self.Property.FILEURL or self.prop == self.Property.LOCALPATH
|
|
64
|
+
assert data_row.has_val[self._col_ref.slot_idx]
|
|
65
|
+
if self.prop == self.Property.FILEURL:
|
|
66
|
+
data_row[self.slot_idx] = data_row.file_urls[self._col_ref.slot_idx]
|
|
67
|
+
if self.prop == self.Property.LOCALPATH:
|
|
68
|
+
data_row[self.slot_idx] = data_row.file_paths[self._col_ref.slot_idx]
|
|
69
|
+
|
|
70
|
+
def _as_dict(self) -> Dict:
|
|
71
|
+
return {'prop': self.prop.value, **super()._as_dict()}
|
|
72
|
+
|
|
73
|
+
@classmethod
|
|
74
|
+
def _from_dict(cls, d: Dict, components: List[Expr]) -> Expr:
|
|
75
|
+
assert 'prop' in d
|
|
76
|
+
return cls(components[0], cls.Property(d['prop']))
|
|
77
|
+
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
from typing import Optional, List, Any, Dict, Tuple
|
|
3
|
+
from uuid import UUID
|
|
4
|
+
|
|
5
|
+
import sqlalchemy as sql
|
|
6
|
+
|
|
7
|
+
from .expr import Expr
|
|
8
|
+
from .data_row import DataRow
|
|
9
|
+
from .row_builder import RowBuilder
|
|
10
|
+
import pixeltable.iterators as iters
|
|
11
|
+
import pixeltable.exceptions as excs
|
|
12
|
+
import pixeltable.catalog as catalog
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class ColumnRef(Expr):
|
|
16
|
+
"""A reference to a table column
|
|
17
|
+
|
|
18
|
+
When this reference is created in the context of a view, it can also refer to a column of the view base.
|
|
19
|
+
For that reason, a ColumnRef needs to be serialized with the qualifying table id (column ids are only
|
|
20
|
+
unique in the context of a particular table).
|
|
21
|
+
"""
|
|
22
|
+
def __init__(self, col: catalog.Column):
|
|
23
|
+
super().__init__(col.col_type)
|
|
24
|
+
assert col.tbl is not None
|
|
25
|
+
self.col = col
|
|
26
|
+
self.is_unstored_iter_col = \
|
|
27
|
+
col.tbl.is_component_view() and col.tbl.is_iterator_column(col) and not col.is_stored
|
|
28
|
+
self.iter_arg_ctx: Optional[RowBuilder.EvalCtx] = None
|
|
29
|
+
# number of rowid columns in the base table
|
|
30
|
+
self.base_rowid_len = col.tbl.base.num_rowid_columns() if self.is_unstored_iter_col else 0
|
|
31
|
+
self.base_rowid = [None] * self.base_rowid_len
|
|
32
|
+
self.iterator: Optional[iters.ComponentIterator] = None
|
|
33
|
+
# index of the position column in the view's primary key; don't try to reference tbl.store_tbl here
|
|
34
|
+
self.pos_idx: Optional[int] = col.tbl.num_rowid_columns() - 1 if self.is_unstored_iter_col else None
|
|
35
|
+
self.id = self._create_id()
|
|
36
|
+
|
|
37
|
+
def set_iter_arg_ctx(self, iter_arg_ctx: RowBuilder.EvalCtx) -> None:
|
|
38
|
+
self.iter_arg_ctx = iter_arg_ctx
|
|
39
|
+
assert len(self.iter_arg_ctx.target_slot_idxs) == 1 # a single inline dict
|
|
40
|
+
|
|
41
|
+
def _id_attrs(self) -> List[Tuple[str, Any]]:
|
|
42
|
+
return super()._id_attrs() + [('tbl_id', self.col.tbl.id), ('col_id', self.col.id)]
|
|
43
|
+
|
|
44
|
+
def __getattr__(self, name: str) -> Expr:
|
|
45
|
+
from .column_property_ref import ColumnPropertyRef
|
|
46
|
+
# resolve column properties
|
|
47
|
+
if name == ColumnPropertyRef.Property.ERRORTYPE.name.lower() \
|
|
48
|
+
or name == ColumnPropertyRef.Property.ERRORMSG.name.lower():
|
|
49
|
+
if not (self.col.is_computed and self.col.is_stored) and not self.col.col_type.is_media_type():
|
|
50
|
+
raise excs.Error(f'{name} only valid for a stored computed or media column: {self}')
|
|
51
|
+
return ColumnPropertyRef(self, ColumnPropertyRef.Property[name.upper()])
|
|
52
|
+
if name == ColumnPropertyRef.Property.FILEURL.name.lower() \
|
|
53
|
+
or name == ColumnPropertyRef.Property.LOCALPATH.name.lower():
|
|
54
|
+
if not self.col.col_type.is_media_type():
|
|
55
|
+
raise excs.Error(f'{name} only valid for image/video/audio/document columns: {self}')
|
|
56
|
+
if self.col.is_computed and not self.col.is_stored:
|
|
57
|
+
raise excs.Error(f'{name} not valid for computed unstored columns: {self}')
|
|
58
|
+
return ColumnPropertyRef(self, ColumnPropertyRef.Property[name.upper()])
|
|
59
|
+
|
|
60
|
+
if self.col_type.is_json_type():
|
|
61
|
+
from .json_path import JsonPath
|
|
62
|
+
return JsonPath(self, [name])
|
|
63
|
+
|
|
64
|
+
return super().__getattr__(name)
|
|
65
|
+
|
|
66
|
+
def default_column_name(self) -> Optional[str]:
|
|
67
|
+
return str(self)
|
|
68
|
+
|
|
69
|
+
def _equals(self, other: ColumnRef) -> bool:
|
|
70
|
+
return self.col == other.col
|
|
71
|
+
|
|
72
|
+
def __str__(self) -> str:
|
|
73
|
+
return self.col.name
|
|
74
|
+
|
|
75
|
+
def sql_expr(self) -> Optional[sql.ClauseElement]:
|
|
76
|
+
return self.col.sa_col
|
|
77
|
+
|
|
78
|
+
def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
|
|
79
|
+
if not self.is_unstored_iter_col:
|
|
80
|
+
assert data_row.has_val[self.slot_idx]
|
|
81
|
+
return
|
|
82
|
+
|
|
83
|
+
# if this is a new base row, we need to instantiate a new iterator
|
|
84
|
+
if self.base_rowid != data_row.pk[:self.base_rowid_len]:
|
|
85
|
+
row_builder.eval(data_row, self.iter_arg_ctx)
|
|
86
|
+
iterator_args = data_row[self.iter_arg_ctx.target_slot_idxs[0]]
|
|
87
|
+
self.iterator = self.col.tbl.iterator_cls(**iterator_args)
|
|
88
|
+
self.base_rowid = data_row.pk[:self.base_rowid_len]
|
|
89
|
+
self.iterator.set_pos(data_row.pk[self.pos_idx])
|
|
90
|
+
res = next(self.iterator)
|
|
91
|
+
data_row[self.slot_idx] = res[self.col.name]
|
|
92
|
+
|
|
93
|
+
def _as_dict(self) -> Dict:
|
|
94
|
+
tbl = self.col.tbl
|
|
95
|
+
version = tbl.version if tbl.is_snapshot else None
|
|
96
|
+
return {'tbl_id': str(tbl.id), 'tbl_version': version, 'col_id': self.col.id}
|
|
97
|
+
|
|
98
|
+
@classmethod
|
|
99
|
+
def _from_dict(cls, d: Dict, components: List[Expr]) -> Expr:
|
|
100
|
+
tbl_id, version, col_id = UUID(d['tbl_id']), d['tbl_version'], d['col_id']
|
|
101
|
+
tbl_version = catalog.Catalog.get().tbl_versions[(tbl_id, version)]
|
|
102
|
+
assert col_id in tbl_version.cols_by_id
|
|
103
|
+
col = tbl_version.cols_by_id[col_id]
|
|
104
|
+
return cls(col)
|
|
105
|
+
|