pixeltable 0.4.15__py3-none-any.whl → 0.4.17__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +4 -0
- pixeltable/catalog/catalog.py +125 -63
- pixeltable/catalog/column.py +7 -2
- pixeltable/catalog/table.py +1 -0
- pixeltable/catalog/table_metadata.py +4 -0
- pixeltable/catalog/table_version.py +174 -117
- pixeltable/catalog/table_version_handle.py +4 -1
- pixeltable/catalog/table_version_path.py +0 -11
- pixeltable/catalog/view.py +6 -0
- pixeltable/config.py +7 -0
- pixeltable/dataframe.py +10 -5
- pixeltable/env.py +56 -19
- pixeltable/exec/__init__.py +2 -0
- pixeltable/exec/cell_materialization_node.py +231 -0
- pixeltable/exec/cell_reconstruction_node.py +135 -0
- pixeltable/exec/exec_node.py +1 -1
- pixeltable/exec/expr_eval/evaluators.py +1 -0
- pixeltable/exec/expr_eval/expr_eval_node.py +3 -0
- pixeltable/exec/expr_eval/globals.py +2 -0
- pixeltable/exec/globals.py +32 -0
- pixeltable/exec/object_store_save_node.py +1 -4
- pixeltable/exec/row_update_node.py +16 -9
- pixeltable/exec/sql_node.py +107 -14
- pixeltable/exprs/__init__.py +1 -1
- pixeltable/exprs/arithmetic_expr.py +23 -18
- pixeltable/exprs/column_property_ref.py +10 -10
- pixeltable/exprs/column_ref.py +2 -2
- pixeltable/exprs/data_row.py +106 -37
- pixeltable/exprs/expr.py +9 -0
- pixeltable/exprs/expr_set.py +14 -7
- pixeltable/exprs/inline_expr.py +2 -19
- pixeltable/exprs/json_path.py +45 -12
- pixeltable/exprs/row_builder.py +54 -22
- pixeltable/functions/__init__.py +1 -0
- pixeltable/functions/bedrock.py +7 -0
- pixeltable/functions/deepseek.py +11 -4
- pixeltable/functions/llama_cpp.py +7 -0
- pixeltable/functions/math.py +1 -1
- pixeltable/functions/ollama.py +7 -0
- pixeltable/functions/openai.py +4 -4
- pixeltable/functions/openrouter.py +143 -0
- pixeltable/functions/video.py +110 -28
- pixeltable/globals.py +10 -4
- pixeltable/io/globals.py +18 -17
- pixeltable/io/parquet.py +1 -1
- pixeltable/io/table_data_conduit.py +47 -22
- pixeltable/iterators/document.py +61 -23
- pixeltable/iterators/video.py +126 -53
- pixeltable/metadata/__init__.py +1 -1
- pixeltable/metadata/converters/convert_40.py +73 -0
- pixeltable/metadata/notes.py +1 -0
- pixeltable/plan.py +175 -46
- pixeltable/share/packager.py +155 -26
- pixeltable/store.py +2 -3
- pixeltable/type_system.py +5 -3
- pixeltable/utils/arrow.py +6 -6
- pixeltable/utils/av.py +65 -0
- pixeltable/utils/console_output.py +4 -1
- pixeltable/utils/exception_handler.py +5 -28
- pixeltable/utils/image.py +7 -0
- pixeltable/utils/misc.py +5 -0
- pixeltable/utils/object_stores.py +16 -1
- pixeltable/utils/s3_store.py +44 -11
- {pixeltable-0.4.15.dist-info → pixeltable-0.4.17.dist-info}/METADATA +29 -28
- {pixeltable-0.4.15.dist-info → pixeltable-0.4.17.dist-info}/RECORD +68 -61
- {pixeltable-0.4.15.dist-info → pixeltable-0.4.17.dist-info}/WHEEL +0 -0
- {pixeltable-0.4.15.dist-info → pixeltable-0.4.17.dist-info}/entry_points.txt +0 -0
- {pixeltable-0.4.15.dist-info → pixeltable-0.4.17.dist-info}/licenses/LICENSE +0 -0
|
@@ -7,7 +7,6 @@ from collections import defaultdict, deque
|
|
|
7
7
|
from concurrent import futures
|
|
8
8
|
from pathlib import Path
|
|
9
9
|
from typing import AsyncIterator, Iterator, NamedTuple, Optional
|
|
10
|
-
from uuid import UUID
|
|
11
10
|
|
|
12
11
|
from pixeltable import exprs
|
|
13
12
|
from pixeltable.utils.object_stores import ObjectOps, ObjectPath, StorageTarget
|
|
@@ -81,9 +80,7 @@ class ObjectStoreSaveNode(ExecNode):
|
|
|
81
80
|
num_missing: int # number of references to media files in this row
|
|
82
81
|
delete_destinations: list[Path] # paths to delete after all copies are complete
|
|
83
82
|
|
|
84
|
-
def __init__(
|
|
85
|
-
self, tbl_id: UUID, file_col_info: list[exprs.ColumnSlotIdx], input: ExecNode, retain_input_order: bool = True
|
|
86
|
-
):
|
|
83
|
+
def __init__(self, file_col_info: list[exprs.ColumnSlotIdx], input: ExecNode, retain_input_order: bool = True):
|
|
87
84
|
# input_/output_exprs=[]: we don't have anything to evaluate
|
|
88
85
|
super().__init__(input.row_builder, [], [], input)
|
|
89
86
|
self.retain_input_order = retain_input_order
|
|
@@ -14,10 +14,18 @@ class RowUpdateNode(ExecNode):
|
|
|
14
14
|
Update individual rows in the input batches, identified by key columns.
|
|
15
15
|
|
|
16
16
|
The updates for a row are provided as a dict of column names to new values.
|
|
17
|
-
|
|
18
|
-
|
|
17
|
+
Populates the slots of the columns present in the update list.
|
|
18
|
+
Assumptions:
|
|
19
|
+
- all update dicts contain the same keys
|
|
20
|
+
- the input node populates DataRow.cell_vals for all primary key columns
|
|
19
21
|
"""
|
|
20
22
|
|
|
23
|
+
updates: dict[tuple, dict[catalog.Column, Any]]
|
|
24
|
+
is_rowid_key: bool # if True, key_vals_batch contains rowids rather than primary key values
|
|
25
|
+
col_slot_idxs: dict[catalog.Column, int]
|
|
26
|
+
pk_columns: list[catalog.Column]
|
|
27
|
+
matched_key_vals: set[tuple]
|
|
28
|
+
|
|
21
29
|
def __init__(
|
|
22
30
|
self,
|
|
23
31
|
tbl: catalog.TableVersionPath,
|
|
@@ -37,16 +45,16 @@ class RowUpdateNode(ExecNode):
|
|
|
37
45
|
for col_ref in row_builder.unique_exprs
|
|
38
46
|
if isinstance(col_ref, exprs.ColumnRef)
|
|
39
47
|
}
|
|
48
|
+
# all update target columns should have assigned slot idxs
|
|
49
|
+
assert all(col in all_col_slot_idxs for col in col_vals_batch[0])
|
|
40
50
|
self.col_slot_idxs = {col: all_col_slot_idxs[col] for col in col_vals_batch[0]}
|
|
41
|
-
self.
|
|
42
|
-
self.matched_key_vals
|
|
51
|
+
self.pk_columns = tbl.tbl_version.get().primary_key_columns()
|
|
52
|
+
self.matched_key_vals = set()
|
|
43
53
|
|
|
44
54
|
async def __aiter__(self) -> AsyncIterator[DataRowBatch]:
|
|
45
55
|
async for batch in self.input:
|
|
46
56
|
for row in batch:
|
|
47
|
-
key_vals = (
|
|
48
|
-
row.rowid if self.is_rowid_key else tuple(row[slot_idx] for slot_idx in self.key_slot_idxs.values())
|
|
49
|
-
)
|
|
57
|
+
key_vals = row.rowid if self.is_rowid_key else tuple(row.cell_vals[col.id] for col in self.pk_columns)
|
|
50
58
|
if key_vals not in self.updates:
|
|
51
59
|
continue
|
|
52
60
|
self.matched_key_vals.add(key_vals)
|
|
@@ -59,11 +67,10 @@ class RowUpdateNode(ExecNode):
|
|
|
59
67
|
def unmatched_rows(self) -> list[dict[str, Any]]:
|
|
60
68
|
"""Return rows that didn't get used in the updates as a list of dicts compatible with TableVersion.insert()."""
|
|
61
69
|
result: list[dict[str, Any]] = []
|
|
62
|
-
key_cols = self.key_slot_idxs.keys()
|
|
63
70
|
for key_vals, col_vals in self.updates.items():
|
|
64
71
|
if key_vals in self.matched_key_vals:
|
|
65
72
|
continue
|
|
66
|
-
row = {col.name: val for col, val in zip(
|
|
73
|
+
row = {col.name: val for col, val in zip(self.pk_columns, key_vals)}
|
|
67
74
|
row.update({col.name: val for col, val in col_vals.items()})
|
|
68
75
|
result.append(row)
|
|
69
76
|
return result
|
pixeltable/exec/sql_node.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import datetime
|
|
1
2
|
import logging
|
|
2
3
|
import warnings
|
|
3
4
|
from decimal import Decimal
|
|
@@ -65,7 +66,7 @@ def print_order_by_clause(clause: OrderByClause) -> str:
|
|
|
65
66
|
|
|
66
67
|
class SqlNode(ExecNode):
|
|
67
68
|
"""
|
|
68
|
-
Materializes data from the store via
|
|
69
|
+
Materializes data from the store via a SQL statement.
|
|
69
70
|
This only provides the select list. The subclasses are responsible for the From clause and any additional clauses.
|
|
70
71
|
The pk columns are not included in the select list.
|
|
71
72
|
If set_pk is True, they are added to the end of the result set when creating the SQL statement
|
|
@@ -82,6 +83,8 @@ class SqlNode(ExecNode):
|
|
|
82
83
|
|
|
83
84
|
tbl: Optional[catalog.TableVersionPath]
|
|
84
85
|
select_list: exprs.ExprSet
|
|
86
|
+
columns: list[catalog.Column] # for which columns to populate DataRow.cell_vals/cell_md
|
|
87
|
+
cell_md_refs: list[exprs.ColumnPropertyRef] # of ColumnRefs which also need DataRow.slot_cellmd for evaluation
|
|
85
88
|
set_pk: bool
|
|
86
89
|
num_pk_cols: int
|
|
87
90
|
py_filter: Optional[exprs.Expr] # a predicate that can only be run in Python
|
|
@@ -89,6 +92,12 @@ class SqlNode(ExecNode):
|
|
|
89
92
|
cte: Optional[sql.CTE]
|
|
90
93
|
sql_elements: exprs.SqlElementCache
|
|
91
94
|
|
|
95
|
+
# execution state
|
|
96
|
+
cellmd_item_idxs: exprs.ExprDict[int] # cellmd expr -> idx in sql select list
|
|
97
|
+
column_item_idxs: dict[catalog.Column, int] # column -> idx in sql select list
|
|
98
|
+
column_cellmd_item_idxs: dict[catalog.Column, int] # column -> idx in sql select list
|
|
99
|
+
result_cursor: sql.engine.CursorResult | None
|
|
100
|
+
|
|
92
101
|
# where_clause/-_element: allow subclass to set one or the other (but not both)
|
|
93
102
|
where_clause: Optional[exprs.Expr]
|
|
94
103
|
where_clause_element: Optional[sql.ColumnElement]
|
|
@@ -101,12 +110,22 @@ class SqlNode(ExecNode):
|
|
|
101
110
|
tbl: Optional[catalog.TableVersionPath],
|
|
102
111
|
row_builder: exprs.RowBuilder,
|
|
103
112
|
select_list: Iterable[exprs.Expr],
|
|
113
|
+
columns: list[catalog.Column],
|
|
104
114
|
sql_elements: exprs.SqlElementCache,
|
|
115
|
+
cell_md_col_refs: list[exprs.ColumnRef] | None = None,
|
|
105
116
|
set_pk: bool = False,
|
|
106
117
|
):
|
|
107
118
|
# create Select stmt
|
|
108
119
|
self.sql_elements = sql_elements
|
|
109
120
|
self.tbl = tbl
|
|
121
|
+
self.columns = columns
|
|
122
|
+
if cell_md_col_refs is not None:
|
|
123
|
+
assert all(ref.col.stores_cellmd for ref in cell_md_col_refs)
|
|
124
|
+
self.cell_md_refs = [
|
|
125
|
+
exprs.ColumnPropertyRef(ref, exprs.ColumnPropertyRef.Property.CELLMD) for ref in cell_md_col_refs
|
|
126
|
+
]
|
|
127
|
+
else:
|
|
128
|
+
self.cell_md_refs = []
|
|
110
129
|
self.select_list = exprs.ExprSet(select_list)
|
|
111
130
|
# unstored iter columns: we also need to retrieve whatever is needed to materialize the iter args
|
|
112
131
|
for iter_arg in row_builder.unstored_iter_args.values():
|
|
@@ -129,6 +148,9 @@ class SqlNode(ExecNode):
|
|
|
129
148
|
assert self.num_pk_cols > 1
|
|
130
149
|
|
|
131
150
|
# additional state
|
|
151
|
+
self.cellmd_item_idxs = exprs.ExprDict()
|
|
152
|
+
self.column_item_idxs = {}
|
|
153
|
+
self.column_cellmd_item_idxs = {}
|
|
132
154
|
self.result_cursor = None
|
|
133
155
|
# the filter is provided by the subclass
|
|
134
156
|
self.py_filter = None
|
|
@@ -144,10 +166,9 @@ class SqlNode(ExecNode):
|
|
|
144
166
|
if tv is not None:
|
|
145
167
|
assert tv.is_validated
|
|
146
168
|
|
|
147
|
-
def
|
|
148
|
-
"""Create a list of pk columns"""
|
|
149
|
-
# we need to retrieve the pk columns
|
|
169
|
+
def _pk_col_items(self) -> list[sql.Column]:
|
|
150
170
|
if self.set_pk:
|
|
171
|
+
# we need to retrieve the pk columns
|
|
151
172
|
assert self.tbl is not None
|
|
152
173
|
assert self.tbl.tbl_version.get().is_validated
|
|
153
174
|
return self.tbl.tbl_version.get().store_tbl.pk_columns()
|
|
@@ -157,7 +178,19 @@ class SqlNode(ExecNode):
|
|
|
157
178
|
"""Create Select from local state"""
|
|
158
179
|
|
|
159
180
|
assert self.sql_elements.contains_all(self.select_list)
|
|
160
|
-
|
|
181
|
+
sql_select_list_exprs = exprs.ExprSet(self.select_list)
|
|
182
|
+
self.cellmd_item_idxs = exprs.ExprDict((ref, sql_select_list_exprs.add(ref)) for ref in self.cell_md_refs)
|
|
183
|
+
column_refs = [exprs.ColumnRef(col) for col in self.columns]
|
|
184
|
+
self.column_item_idxs = {col_ref.col: sql_select_list_exprs.add(col_ref) for col_ref in column_refs}
|
|
185
|
+
column_cellmd_refs = [
|
|
186
|
+
exprs.ColumnPropertyRef(col_ref, exprs.ColumnPropertyRef.Property.CELLMD)
|
|
187
|
+
for col_ref in column_refs
|
|
188
|
+
if col_ref.col.stores_cellmd
|
|
189
|
+
]
|
|
190
|
+
self.column_cellmd_item_idxs = {
|
|
191
|
+
cellmd_ref.col_ref.col: sql_select_list_exprs.add(cellmd_ref) for cellmd_ref in column_cellmd_refs
|
|
192
|
+
}
|
|
193
|
+
sql_select_list = [self.sql_elements.get(e) for e in sql_select_list_exprs] + self._pk_col_items()
|
|
161
194
|
stmt = sql.select(*sql_select_list)
|
|
162
195
|
|
|
163
196
|
where_clause_element = (
|
|
@@ -198,9 +231,7 @@ class SqlNode(ExecNode):
|
|
|
198
231
|
if not keep_pk:
|
|
199
232
|
self.set_pk = False # we don't need the PK if we use this SqlNode as a CTE
|
|
200
233
|
self.cte = self._create_stmt().cte()
|
|
201
|
-
|
|
202
|
-
assert len(self.select_list) + pk_count == len(self.cte.c)
|
|
203
|
-
return self.cte, exprs.ExprDict(zip(self.select_list, self.cte.c)) # skip pk cols
|
|
234
|
+
return self.cte, exprs.ExprDict(zip(list(self.select_list) + self.cell_md_refs, self.cte.c)) # skip pk cols
|
|
204
235
|
|
|
205
236
|
@classmethod
|
|
206
237
|
def retarget_rowid_refs(cls, target: catalog.TableVersionPath, expr_seq: Iterable[exprs.Expr]) -> None:
|
|
@@ -318,24 +349,53 @@ class SqlNode(ExecNode):
|
|
|
318
349
|
output_batch = DataRowBatch(self.row_builder)
|
|
319
350
|
output_row: Optional[exprs.DataRow] = None
|
|
320
351
|
num_rows_returned = 0
|
|
352
|
+
is_using_cockroachdb = Env.get().is_using_cockroachdb
|
|
353
|
+
tzinfo = Env.get().default_time_zone
|
|
321
354
|
|
|
322
355
|
for sql_row in result_cursor:
|
|
323
356
|
output_row = output_batch.add_row(output_row)
|
|
324
357
|
|
|
325
358
|
# populate output_row
|
|
359
|
+
|
|
326
360
|
if self.num_pk_cols > 0:
|
|
327
361
|
output_row.set_pk(tuple(sql_row[-self.num_pk_cols :]))
|
|
362
|
+
|
|
363
|
+
# column copies
|
|
364
|
+
for col, item_idx in self.column_item_idxs.items():
|
|
365
|
+
output_row.cell_vals[col.id] = sql_row[item_idx]
|
|
366
|
+
for col, item_idx in self.column_cellmd_item_idxs.items():
|
|
367
|
+
cell_md_dict = sql_row[item_idx]
|
|
368
|
+
output_row.cell_md[col.id] = exprs.CellMd(**cell_md_dict) if cell_md_dict is not None else None
|
|
369
|
+
|
|
370
|
+
# populate DataRow.slot_cellmd, where requested
|
|
371
|
+
for cellmd_ref, item_idx in self.cellmd_item_idxs.items():
|
|
372
|
+
cell_md_dict = sql_row[item_idx]
|
|
373
|
+
output_row.slot_md[cellmd_ref.col_ref.slot_idx] = (
|
|
374
|
+
exprs.CellMd.from_dict(cell_md_dict) if cell_md_dict is not None else None
|
|
375
|
+
)
|
|
376
|
+
|
|
328
377
|
# copy the output of the SQL query into the output row
|
|
329
378
|
for i, e in enumerate(self.select_list):
|
|
330
379
|
slot_idx = e.slot_idx
|
|
331
|
-
# certain numerical operations can produce Decimals (eg, SUM(<int column>)); we need to convert them
|
|
332
380
|
if isinstance(sql_row[i], Decimal):
|
|
381
|
+
# certain numerical operations can produce Decimals (eg, SUM(<int column>)); we need to convert them
|
|
333
382
|
if e.col_type.is_int_type():
|
|
334
383
|
output_row[slot_idx] = int(sql_row[i])
|
|
335
384
|
elif e.col_type.is_float_type():
|
|
336
385
|
output_row[slot_idx] = float(sql_row[i])
|
|
337
386
|
else:
|
|
338
387
|
raise RuntimeError(f'Unexpected Decimal value for {e}')
|
|
388
|
+
elif is_using_cockroachdb and isinstance(sql_row[i], datetime.datetime):
|
|
389
|
+
# Ensure that the datetime is timezone-aware and in the session time zone
|
|
390
|
+
# cockroachDB returns timestamps in the session time zone, with numeric offset,
|
|
391
|
+
# convert to the session time zone with the requested tzinfo for DST handling
|
|
392
|
+
if e.col_type.is_timestamp_type():
|
|
393
|
+
if isinstance(sql_row[i].tzinfo, datetime.timezone):
|
|
394
|
+
output_row[slot_idx] = sql_row[i].astimezone(tz=tzinfo)
|
|
395
|
+
else:
|
|
396
|
+
output_row[slot_idx] = sql_row[i]
|
|
397
|
+
else:
|
|
398
|
+
raise RuntimeError(f'Unexpected datetime value for {e}')
|
|
339
399
|
else:
|
|
340
400
|
output_row[slot_idx] = sql_row[i]
|
|
341
401
|
|
|
@@ -387,11 +447,21 @@ class SqlScanNode(SqlNode):
|
|
|
387
447
|
tbl: catalog.TableVersionPath,
|
|
388
448
|
row_builder: exprs.RowBuilder,
|
|
389
449
|
select_list: Iterable[exprs.Expr],
|
|
450
|
+
columns: list[catalog.Column],
|
|
451
|
+
cell_md_col_refs: list[exprs.ColumnRef] | None = None,
|
|
390
452
|
set_pk: bool = False,
|
|
391
453
|
exact_version_only: Optional[list[catalog.TableVersionHandle]] = None,
|
|
392
454
|
):
|
|
393
455
|
sql_elements = exprs.SqlElementCache()
|
|
394
|
-
super().__init__(
|
|
456
|
+
super().__init__(
|
|
457
|
+
tbl,
|
|
458
|
+
row_builder,
|
|
459
|
+
select_list,
|
|
460
|
+
columns=columns,
|
|
461
|
+
sql_elements=sql_elements,
|
|
462
|
+
set_pk=set_pk,
|
|
463
|
+
cell_md_col_refs=cell_md_col_refs,
|
|
464
|
+
)
|
|
395
465
|
# create Select stmt
|
|
396
466
|
if exact_version_only is None:
|
|
397
467
|
exact_version_only = []
|
|
@@ -423,11 +493,21 @@ class SqlLookupNode(SqlNode):
|
|
|
423
493
|
tbl: catalog.TableVersionPath,
|
|
424
494
|
row_builder: exprs.RowBuilder,
|
|
425
495
|
select_list: Iterable[exprs.Expr],
|
|
496
|
+
columns: list[catalog.Column],
|
|
426
497
|
sa_key_cols: list[sql.Column],
|
|
427
498
|
key_vals: list[tuple],
|
|
499
|
+
cell_md_col_refs: list[exprs.ColumnRef] | None = None,
|
|
428
500
|
):
|
|
429
501
|
sql_elements = exprs.SqlElementCache()
|
|
430
|
-
super().__init__(
|
|
502
|
+
super().__init__(
|
|
503
|
+
tbl,
|
|
504
|
+
row_builder,
|
|
505
|
+
select_list,
|
|
506
|
+
columns=columns,
|
|
507
|
+
sql_elements=sql_elements,
|
|
508
|
+
set_pk=True,
|
|
509
|
+
cell_md_col_refs=cell_md_col_refs,
|
|
510
|
+
)
|
|
431
511
|
# Where clause: (key-col-1, key-col-2, ...) IN ((val-1, val-2, ...), ...)
|
|
432
512
|
self.where_clause_element = sql.tuple_(*sa_key_cols).in_(key_vals)
|
|
433
513
|
|
|
@@ -460,9 +540,10 @@ class SqlAggregationNode(SqlNode):
|
|
|
460
540
|
limit: Optional[int] = None,
|
|
461
541
|
exact_version_only: Optional[list[catalog.TableVersion]] = None,
|
|
462
542
|
):
|
|
543
|
+
assert len(input.cell_md_refs) == 0 # there's no aggregation over json or arrays in SQL
|
|
463
544
|
self.input_cte, input_col_map = input.to_cte()
|
|
464
545
|
sql_elements = exprs.SqlElementCache(input_col_map)
|
|
465
|
-
super().__init__(None, row_builder, select_list, sql_elements)
|
|
546
|
+
super().__init__(None, row_builder, select_list, columns=[], sql_elements=sql_elements)
|
|
466
547
|
self.group_by_items = group_by_items
|
|
467
548
|
|
|
468
549
|
def _create_stmt(self) -> sql.Select:
|
|
@@ -498,7 +579,10 @@ class SqlJoinNode(SqlNode):
|
|
|
498
579
|
input_cte, input_col_map = input_node.to_cte()
|
|
499
580
|
self.input_ctes.append(input_cte)
|
|
500
581
|
sql_elements.extend(input_col_map)
|
|
501
|
-
|
|
582
|
+
cell_md_col_refs = [cell_md_ref.col_ref for input in inputs for cell_md_ref in input.cell_md_refs]
|
|
583
|
+
super().__init__(
|
|
584
|
+
None, row_builder, select_list, columns=[], sql_elements=sql_elements, cell_md_col_refs=cell_md_col_refs
|
|
585
|
+
)
|
|
502
586
|
|
|
503
587
|
def _create_stmt(self) -> sql.Select:
|
|
504
588
|
from pixeltable import plan
|
|
@@ -552,7 +636,16 @@ class SqlSampleNode(SqlNode):
|
|
|
552
636
|
assert self.pk_count > 1
|
|
553
637
|
sql_elements = exprs.SqlElementCache(input_col_map)
|
|
554
638
|
assert sql_elements.contains_all(stratify_exprs)
|
|
555
|
-
|
|
639
|
+
cell_md_col_refs = [cell_md_ref.col_ref for cell_md_ref in input.cell_md_refs]
|
|
640
|
+
super().__init__(
|
|
641
|
+
input.tbl,
|
|
642
|
+
row_builder,
|
|
643
|
+
select_list,
|
|
644
|
+
columns=[],
|
|
645
|
+
sql_elements=sql_elements,
|
|
646
|
+
cell_md_col_refs=cell_md_col_refs,
|
|
647
|
+
set_pk=True,
|
|
648
|
+
)
|
|
556
649
|
self.stratify_exprs = stratify_exprs
|
|
557
650
|
self.sample_clause = sample_clause
|
|
558
651
|
assert isinstance(self.sample_clause.seed, int)
|
pixeltable/exprs/__init__.py
CHANGED
|
@@ -6,7 +6,7 @@ from .column_property_ref import ColumnPropertyRef
|
|
|
6
6
|
from .column_ref import ColumnRef
|
|
7
7
|
from .comparison import Comparison
|
|
8
8
|
from .compound_predicate import CompoundPredicate
|
|
9
|
-
from .data_row import DataRow
|
|
9
|
+
from .data_row import ArrayMd, CellMd, DataRow
|
|
10
10
|
from .expr import Expr
|
|
11
11
|
from .expr_dict import ExprDict
|
|
12
12
|
from .expr_set import ExprSet
|
|
@@ -4,7 +4,7 @@ from typing import Any, Optional
|
|
|
4
4
|
|
|
5
5
|
import sqlalchemy as sql
|
|
6
6
|
|
|
7
|
-
from pixeltable import exceptions as excs, type_system as ts
|
|
7
|
+
from pixeltable import env, exceptions as excs, type_system as ts
|
|
8
8
|
|
|
9
9
|
from .data_row import DataRow
|
|
10
10
|
from .expr import Expr
|
|
@@ -64,23 +64,30 @@ class ArithmeticExpr(Expr):
|
|
|
64
64
|
right = sql_elements.get(self._op2)
|
|
65
65
|
if left is None or right is None:
|
|
66
66
|
return None
|
|
67
|
-
if self.operator
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
67
|
+
if self.operator in (ArithmeticOperator.ADD, ArithmeticOperator.SUB, ArithmeticOperator.MUL):
|
|
68
|
+
if env.Env.get().is_using_cockroachdb and self._op1.col_type != self._op2.col_type:
|
|
69
|
+
if self._op1.col_type != self.col_type:
|
|
70
|
+
left = sql.cast(left, self.col_type.to_sa_type())
|
|
71
|
+
if self._op2.col_type != self.col_type:
|
|
72
|
+
right = sql.cast(right, self.col_type.to_sa_type())
|
|
73
|
+
if self.operator == ArithmeticOperator.ADD:
|
|
74
|
+
return left + right
|
|
75
|
+
if self.operator == ArithmeticOperator.SUB:
|
|
76
|
+
return left - right
|
|
77
|
+
if self.operator == ArithmeticOperator.MUL:
|
|
78
|
+
return left * right
|
|
73
79
|
if self.operator == ArithmeticOperator.DIV:
|
|
74
80
|
assert self.col_type.is_float_type()
|
|
75
|
-
# Avoid
|
|
81
|
+
# Avoid division by zero errors by converting any zero divisor to NULL.
|
|
76
82
|
# TODO: Should we cast the NULLs to NaNs when they are retrieved back into Python?
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
return sql.
|
|
83
|
+
# These casts cause the computation to take place in float units, rather than DECIMAL.
|
|
84
|
+
nullif = sql.cast(sql.func.nullif(right, 0), self.col_type.to_sa_type())
|
|
85
|
+
return sql.cast(left, self.col_type.to_sa_type()) / nullif
|
|
80
86
|
if self.operator == ArithmeticOperator.MOD:
|
|
81
87
|
if self.col_type.is_int_type():
|
|
82
|
-
|
|
83
|
-
|
|
88
|
+
# Avoid division by zero errors by converting any zero divisor to NULL.
|
|
89
|
+
nullif1 = sql.cast(sql.func.nullif(right, 0), self.col_type.to_sa_type())
|
|
90
|
+
return left % nullif1
|
|
84
91
|
if self.col_type.is_float_type():
|
|
85
92
|
# Postgres does not support modulus for floats
|
|
86
93
|
return None
|
|
@@ -90,11 +97,9 @@ class ArithmeticExpr(Expr):
|
|
|
90
97
|
# We need the behavior to be consistent, so that expressions will evaluate the same way
|
|
91
98
|
# whether or not their operands can be translated to SQL. These SQL clauses should
|
|
92
99
|
# mimic the behavior of Python's // operator.
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
if self.col_type.is_float_type():
|
|
97
|
-
return sql.sql.expression.cast(sql.func.floor(left / nullif), self.col_type.to_sa_type())
|
|
100
|
+
# Avoid division by zero errors by converting any zero divisor to NULL.
|
|
101
|
+
nullif = sql.cast(sql.func.nullif(right, 0), self.col_type.to_sa_type())
|
|
102
|
+
return sql.func.floor(sql.cast(left, self.col_type.to_sa_type()) / nullif)
|
|
98
103
|
raise AssertionError()
|
|
99
104
|
|
|
100
105
|
def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
|
|
@@ -44,21 +44,21 @@ class ColumnPropertyRef(Expr):
|
|
|
44
44
|
return [*super()._id_attrs(), ('prop', self.prop.value)]
|
|
45
45
|
|
|
46
46
|
@property
|
|
47
|
-
def
|
|
47
|
+
def col_ref(self) -> ColumnRef:
|
|
48
48
|
col_ref = self.components[0]
|
|
49
49
|
assert isinstance(col_ref, ColumnRef)
|
|
50
50
|
return col_ref
|
|
51
51
|
|
|
52
52
|
def __repr__(self) -> str:
|
|
53
|
-
return f'{self.
|
|
53
|
+
return f'{self.col_ref}.{self.prop.name.lower()}'
|
|
54
54
|
|
|
55
55
|
def is_cellmd_prop(self) -> bool:
|
|
56
56
|
return self.prop in (self.Property.ERRORTYPE, self.Property.ERRORMSG, self.Property.CELLMD)
|
|
57
57
|
|
|
58
58
|
def sql_expr(self, sql_elements: SqlElementCache) -> Optional[sql.ColumnElement]:
|
|
59
|
-
if not self.
|
|
59
|
+
if not self.col_ref.col_handle.get().is_stored:
|
|
60
60
|
return None
|
|
61
|
-
col = self.
|
|
61
|
+
col = self.col_ref.col_handle.get()
|
|
62
62
|
|
|
63
63
|
# the errortype/-msg properties of a read-validated media column need to be extracted from the DataRow
|
|
64
64
|
if (
|
|
@@ -77,7 +77,7 @@ class ColumnPropertyRef(Expr):
|
|
|
77
77
|
return col.sa_cellmd_col
|
|
78
78
|
if self.prop == self.Property.FILEURL:
|
|
79
79
|
# the file url is stored as the column value
|
|
80
|
-
return sql_elements.get(self.
|
|
80
|
+
return sql_elements.get(self.col_ref)
|
|
81
81
|
return None
|
|
82
82
|
|
|
83
83
|
@classmethod
|
|
@@ -87,15 +87,15 @@ class ColumnPropertyRef(Expr):
|
|
|
87
87
|
|
|
88
88
|
def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
|
|
89
89
|
if self.prop == self.Property.FILEURL:
|
|
90
|
-
assert data_row.has_val[self.
|
|
91
|
-
data_row[self.slot_idx] = data_row.file_urls[self.
|
|
90
|
+
assert data_row.has_val[self.col_ref.slot_idx]
|
|
91
|
+
data_row[self.slot_idx] = data_row.file_urls[self.col_ref.slot_idx]
|
|
92
92
|
return
|
|
93
93
|
elif self.prop == self.Property.LOCALPATH:
|
|
94
|
-
assert data_row.has_val[self.
|
|
95
|
-
data_row[self.slot_idx] = data_row.file_paths[self.
|
|
94
|
+
assert data_row.has_val[self.col_ref.slot_idx]
|
|
95
|
+
data_row[self.slot_idx] = data_row.file_paths[self.col_ref.slot_idx]
|
|
96
96
|
return
|
|
97
97
|
elif self.is_cellmd_prop():
|
|
98
|
-
exc = data_row.get_exc(self.
|
|
98
|
+
exc = data_row.get_exc(self.col_ref.slot_idx)
|
|
99
99
|
if exc is None:
|
|
100
100
|
data_row[self.slot_idx] = None
|
|
101
101
|
elif self.prop == self.Property.ERRORTYPE:
|
pixeltable/exprs/column_ref.py
CHANGED
|
@@ -123,8 +123,8 @@ class ColumnRef(Expr):
|
|
|
123
123
|
name == ColumnPropertyRef.Property.ERRORTYPE.name.lower()
|
|
124
124
|
or name == ColumnPropertyRef.Property.ERRORMSG.name.lower()
|
|
125
125
|
):
|
|
126
|
-
|
|
127
|
-
if not
|
|
126
|
+
is_valid = (self.col.is_computed or self.col.col_type.is_media_type()) and self.col.is_stored
|
|
127
|
+
if not is_valid:
|
|
128
128
|
raise excs.Error(f'{name} only valid for a stored computed or media column: {self}')
|
|
129
129
|
return ColumnPropertyRef(self, ColumnPropertyRef.Property[name.upper()])
|
|
130
130
|
if (
|