pixeltable 0.4.15__py3-none-any.whl → 0.4.17__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (68) hide show
  1. pixeltable/__init__.py +4 -0
  2. pixeltable/catalog/catalog.py +125 -63
  3. pixeltable/catalog/column.py +7 -2
  4. pixeltable/catalog/table.py +1 -0
  5. pixeltable/catalog/table_metadata.py +4 -0
  6. pixeltable/catalog/table_version.py +174 -117
  7. pixeltable/catalog/table_version_handle.py +4 -1
  8. pixeltable/catalog/table_version_path.py +0 -11
  9. pixeltable/catalog/view.py +6 -0
  10. pixeltable/config.py +7 -0
  11. pixeltable/dataframe.py +10 -5
  12. pixeltable/env.py +56 -19
  13. pixeltable/exec/__init__.py +2 -0
  14. pixeltable/exec/cell_materialization_node.py +231 -0
  15. pixeltable/exec/cell_reconstruction_node.py +135 -0
  16. pixeltable/exec/exec_node.py +1 -1
  17. pixeltable/exec/expr_eval/evaluators.py +1 -0
  18. pixeltable/exec/expr_eval/expr_eval_node.py +3 -0
  19. pixeltable/exec/expr_eval/globals.py +2 -0
  20. pixeltable/exec/globals.py +32 -0
  21. pixeltable/exec/object_store_save_node.py +1 -4
  22. pixeltable/exec/row_update_node.py +16 -9
  23. pixeltable/exec/sql_node.py +107 -14
  24. pixeltable/exprs/__init__.py +1 -1
  25. pixeltable/exprs/arithmetic_expr.py +23 -18
  26. pixeltable/exprs/column_property_ref.py +10 -10
  27. pixeltable/exprs/column_ref.py +2 -2
  28. pixeltable/exprs/data_row.py +106 -37
  29. pixeltable/exprs/expr.py +9 -0
  30. pixeltable/exprs/expr_set.py +14 -7
  31. pixeltable/exprs/inline_expr.py +2 -19
  32. pixeltable/exprs/json_path.py +45 -12
  33. pixeltable/exprs/row_builder.py +54 -22
  34. pixeltable/functions/__init__.py +1 -0
  35. pixeltable/functions/bedrock.py +7 -0
  36. pixeltable/functions/deepseek.py +11 -4
  37. pixeltable/functions/llama_cpp.py +7 -0
  38. pixeltable/functions/math.py +1 -1
  39. pixeltable/functions/ollama.py +7 -0
  40. pixeltable/functions/openai.py +4 -4
  41. pixeltable/functions/openrouter.py +143 -0
  42. pixeltable/functions/video.py +110 -28
  43. pixeltable/globals.py +10 -4
  44. pixeltable/io/globals.py +18 -17
  45. pixeltable/io/parquet.py +1 -1
  46. pixeltable/io/table_data_conduit.py +47 -22
  47. pixeltable/iterators/document.py +61 -23
  48. pixeltable/iterators/video.py +126 -53
  49. pixeltable/metadata/__init__.py +1 -1
  50. pixeltable/metadata/converters/convert_40.py +73 -0
  51. pixeltable/metadata/notes.py +1 -0
  52. pixeltable/plan.py +175 -46
  53. pixeltable/share/packager.py +155 -26
  54. pixeltable/store.py +2 -3
  55. pixeltable/type_system.py +5 -3
  56. pixeltable/utils/arrow.py +6 -6
  57. pixeltable/utils/av.py +65 -0
  58. pixeltable/utils/console_output.py +4 -1
  59. pixeltable/utils/exception_handler.py +5 -28
  60. pixeltable/utils/image.py +7 -0
  61. pixeltable/utils/misc.py +5 -0
  62. pixeltable/utils/object_stores.py +16 -1
  63. pixeltable/utils/s3_store.py +44 -11
  64. {pixeltable-0.4.15.dist-info → pixeltable-0.4.17.dist-info}/METADATA +29 -28
  65. {pixeltable-0.4.15.dist-info → pixeltable-0.4.17.dist-info}/RECORD +68 -61
  66. {pixeltable-0.4.15.dist-info → pixeltable-0.4.17.dist-info}/WHEEL +0 -0
  67. {pixeltable-0.4.15.dist-info → pixeltable-0.4.17.dist-info}/entry_points.txt +0 -0
  68. {pixeltable-0.4.15.dist-info → pixeltable-0.4.17.dist-info}/licenses/LICENSE +0 -0
@@ -7,7 +7,6 @@ from collections import defaultdict, deque
7
7
  from concurrent import futures
8
8
  from pathlib import Path
9
9
  from typing import AsyncIterator, Iterator, NamedTuple, Optional
10
- from uuid import UUID
11
10
 
12
11
  from pixeltable import exprs
13
12
  from pixeltable.utils.object_stores import ObjectOps, ObjectPath, StorageTarget
@@ -81,9 +80,7 @@ class ObjectStoreSaveNode(ExecNode):
81
80
  num_missing: int # number of references to media files in this row
82
81
  delete_destinations: list[Path] # paths to delete after all copies are complete
83
82
 
84
- def __init__(
85
- self, tbl_id: UUID, file_col_info: list[exprs.ColumnSlotIdx], input: ExecNode, retain_input_order: bool = True
86
- ):
83
+ def __init__(self, file_col_info: list[exprs.ColumnSlotIdx], input: ExecNode, retain_input_order: bool = True):
87
84
  # input_/output_exprs=[]: we don't have anything to evaluate
88
85
  super().__init__(input.row_builder, [], [], input)
89
86
  self.retain_input_order = retain_input_order
@@ -14,10 +14,18 @@ class RowUpdateNode(ExecNode):
14
14
  Update individual rows in the input batches, identified by key columns.
15
15
 
16
16
  The updates for a row are provided as a dict of column names to new values.
17
- The node assumes that all update dicts contain the same keys, and it populates the slots of the columns present in
18
- the update list.
17
+ Populates the slots of the columns present in the update list.
18
+ Assumptions:
19
+ - all update dicts contain the same keys
20
+ - the input node populates DataRow.cell_vals for all primary key columns
19
21
  """
20
22
 
23
+ updates: dict[tuple, dict[catalog.Column, Any]]
24
+ is_rowid_key: bool # if True, key_vals_batch contains rowids rather than primary key values
25
+ col_slot_idxs: dict[catalog.Column, int]
26
+ pk_columns: list[catalog.Column]
27
+ matched_key_vals: set[tuple]
28
+
21
29
  def __init__(
22
30
  self,
23
31
  tbl: catalog.TableVersionPath,
@@ -37,16 +45,16 @@ class RowUpdateNode(ExecNode):
37
45
  for col_ref in row_builder.unique_exprs
38
46
  if isinstance(col_ref, exprs.ColumnRef)
39
47
  }
48
+ # all update target columns should have assigned slot idxs
49
+ assert all(col in all_col_slot_idxs for col in col_vals_batch[0])
40
50
  self.col_slot_idxs = {col: all_col_slot_idxs[col] for col in col_vals_batch[0]}
41
- self.key_slot_idxs = {col: all_col_slot_idxs[col] for col in tbl.tbl_version.get().primary_key_columns()}
42
- self.matched_key_vals: set[tuple] = set()
51
+ self.pk_columns = tbl.tbl_version.get().primary_key_columns()
52
+ self.matched_key_vals = set()
43
53
 
44
54
  async def __aiter__(self) -> AsyncIterator[DataRowBatch]:
45
55
  async for batch in self.input:
46
56
  for row in batch:
47
- key_vals = (
48
- row.rowid if self.is_rowid_key else tuple(row[slot_idx] for slot_idx in self.key_slot_idxs.values())
49
- )
57
+ key_vals = row.rowid if self.is_rowid_key else tuple(row.cell_vals[col.id] for col in self.pk_columns)
50
58
  if key_vals not in self.updates:
51
59
  continue
52
60
  self.matched_key_vals.add(key_vals)
@@ -59,11 +67,10 @@ class RowUpdateNode(ExecNode):
59
67
  def unmatched_rows(self) -> list[dict[str, Any]]:
60
68
  """Return rows that didn't get used in the updates as a list of dicts compatible with TableVersion.insert()."""
61
69
  result: list[dict[str, Any]] = []
62
- key_cols = self.key_slot_idxs.keys()
63
70
  for key_vals, col_vals in self.updates.items():
64
71
  if key_vals in self.matched_key_vals:
65
72
  continue
66
- row = {col.name: val for col, val in zip(key_cols, key_vals)}
73
+ row = {col.name: val for col, val in zip(self.pk_columns, key_vals)}
67
74
  row.update({col.name: val for col, val in col_vals.items()})
68
75
  result.append(row)
69
76
  return result
@@ -1,3 +1,4 @@
1
+ import datetime
1
2
  import logging
2
3
  import warnings
3
4
  from decimal import Decimal
@@ -65,7 +66,7 @@ def print_order_by_clause(clause: OrderByClause) -> str:
65
66
 
66
67
  class SqlNode(ExecNode):
67
68
  """
68
- Materializes data from the store via an SQL statement.
69
+ Materializes data from the store via a SQL statement.
69
70
  This only provides the select list. The subclasses are responsible for the From clause and any additional clauses.
70
71
  The pk columns are not included in the select list.
71
72
  If set_pk is True, they are added to the end of the result set when creating the SQL statement
@@ -82,6 +83,8 @@ class SqlNode(ExecNode):
82
83
 
83
84
  tbl: Optional[catalog.TableVersionPath]
84
85
  select_list: exprs.ExprSet
86
+ columns: list[catalog.Column] # for which columns to populate DataRow.cell_vals/cell_md
87
+ cell_md_refs: list[exprs.ColumnPropertyRef] # of ColumnRefs which also need DataRow.slot_cellmd for evaluation
85
88
  set_pk: bool
86
89
  num_pk_cols: int
87
90
  py_filter: Optional[exprs.Expr] # a predicate that can only be run in Python
@@ -89,6 +92,12 @@ class SqlNode(ExecNode):
89
92
  cte: Optional[sql.CTE]
90
93
  sql_elements: exprs.SqlElementCache
91
94
 
95
+ # execution state
96
+ cellmd_item_idxs: exprs.ExprDict[int] # cellmd expr -> idx in sql select list
97
+ column_item_idxs: dict[catalog.Column, int] # column -> idx in sql select list
98
+ column_cellmd_item_idxs: dict[catalog.Column, int] # column -> idx in sql select list
99
+ result_cursor: sql.engine.CursorResult | None
100
+
92
101
  # where_clause/-_element: allow subclass to set one or the other (but not both)
93
102
  where_clause: Optional[exprs.Expr]
94
103
  where_clause_element: Optional[sql.ColumnElement]
@@ -101,12 +110,22 @@ class SqlNode(ExecNode):
101
110
  tbl: Optional[catalog.TableVersionPath],
102
111
  row_builder: exprs.RowBuilder,
103
112
  select_list: Iterable[exprs.Expr],
113
+ columns: list[catalog.Column],
104
114
  sql_elements: exprs.SqlElementCache,
115
+ cell_md_col_refs: list[exprs.ColumnRef] | None = None,
105
116
  set_pk: bool = False,
106
117
  ):
107
118
  # create Select stmt
108
119
  self.sql_elements = sql_elements
109
120
  self.tbl = tbl
121
+ self.columns = columns
122
+ if cell_md_col_refs is not None:
123
+ assert all(ref.col.stores_cellmd for ref in cell_md_col_refs)
124
+ self.cell_md_refs = [
125
+ exprs.ColumnPropertyRef(ref, exprs.ColumnPropertyRef.Property.CELLMD) for ref in cell_md_col_refs
126
+ ]
127
+ else:
128
+ self.cell_md_refs = []
110
129
  self.select_list = exprs.ExprSet(select_list)
111
130
  # unstored iter columns: we also need to retrieve whatever is needed to materialize the iter args
112
131
  for iter_arg in row_builder.unstored_iter_args.values():
@@ -129,6 +148,9 @@ class SqlNode(ExecNode):
129
148
  assert self.num_pk_cols > 1
130
149
 
131
150
  # additional state
151
+ self.cellmd_item_idxs = exprs.ExprDict()
152
+ self.column_item_idxs = {}
153
+ self.column_cellmd_item_idxs = {}
132
154
  self.result_cursor = None
133
155
  # the filter is provided by the subclass
134
156
  self.py_filter = None
@@ -144,10 +166,9 @@ class SqlNode(ExecNode):
144
166
  if tv is not None:
145
167
  assert tv.is_validated
146
168
 
147
- def _create_pk_cols(self) -> list[sql.Column]:
148
- """Create a list of pk columns"""
149
- # we need to retrieve the pk columns
169
+ def _pk_col_items(self) -> list[sql.Column]:
150
170
  if self.set_pk:
171
+ # we need to retrieve the pk columns
151
172
  assert self.tbl is not None
152
173
  assert self.tbl.tbl_version.get().is_validated
153
174
  return self.tbl.tbl_version.get().store_tbl.pk_columns()
@@ -157,7 +178,19 @@ class SqlNode(ExecNode):
157
178
  """Create Select from local state"""
158
179
 
159
180
  assert self.sql_elements.contains_all(self.select_list)
160
- sql_select_list = [self.sql_elements.get(e) for e in self.select_list] + self._create_pk_cols()
181
+ sql_select_list_exprs = exprs.ExprSet(self.select_list)
182
+ self.cellmd_item_idxs = exprs.ExprDict((ref, sql_select_list_exprs.add(ref)) for ref in self.cell_md_refs)
183
+ column_refs = [exprs.ColumnRef(col) for col in self.columns]
184
+ self.column_item_idxs = {col_ref.col: sql_select_list_exprs.add(col_ref) for col_ref in column_refs}
185
+ column_cellmd_refs = [
186
+ exprs.ColumnPropertyRef(col_ref, exprs.ColumnPropertyRef.Property.CELLMD)
187
+ for col_ref in column_refs
188
+ if col_ref.col.stores_cellmd
189
+ ]
190
+ self.column_cellmd_item_idxs = {
191
+ cellmd_ref.col_ref.col: sql_select_list_exprs.add(cellmd_ref) for cellmd_ref in column_cellmd_refs
192
+ }
193
+ sql_select_list = [self.sql_elements.get(e) for e in sql_select_list_exprs] + self._pk_col_items()
161
194
  stmt = sql.select(*sql_select_list)
162
195
 
163
196
  where_clause_element = (
@@ -198,9 +231,7 @@ class SqlNode(ExecNode):
198
231
  if not keep_pk:
199
232
  self.set_pk = False # we don't need the PK if we use this SqlNode as a CTE
200
233
  self.cte = self._create_stmt().cte()
201
- pk_count = self.num_pk_cols if self.set_pk else 0
202
- assert len(self.select_list) + pk_count == len(self.cte.c)
203
- return self.cte, exprs.ExprDict(zip(self.select_list, self.cte.c)) # skip pk cols
234
+ return self.cte, exprs.ExprDict(zip(list(self.select_list) + self.cell_md_refs, self.cte.c)) # skip pk cols
204
235
 
205
236
  @classmethod
206
237
  def retarget_rowid_refs(cls, target: catalog.TableVersionPath, expr_seq: Iterable[exprs.Expr]) -> None:
@@ -318,24 +349,53 @@ class SqlNode(ExecNode):
318
349
  output_batch = DataRowBatch(self.row_builder)
319
350
  output_row: Optional[exprs.DataRow] = None
320
351
  num_rows_returned = 0
352
+ is_using_cockroachdb = Env.get().is_using_cockroachdb
353
+ tzinfo = Env.get().default_time_zone
321
354
 
322
355
  for sql_row in result_cursor:
323
356
  output_row = output_batch.add_row(output_row)
324
357
 
325
358
  # populate output_row
359
+
326
360
  if self.num_pk_cols > 0:
327
361
  output_row.set_pk(tuple(sql_row[-self.num_pk_cols :]))
362
+
363
+ # column copies
364
+ for col, item_idx in self.column_item_idxs.items():
365
+ output_row.cell_vals[col.id] = sql_row[item_idx]
366
+ for col, item_idx in self.column_cellmd_item_idxs.items():
367
+ cell_md_dict = sql_row[item_idx]
368
+ output_row.cell_md[col.id] = exprs.CellMd(**cell_md_dict) if cell_md_dict is not None else None
369
+
370
+ # populate DataRow.slot_cellmd, where requested
371
+ for cellmd_ref, item_idx in self.cellmd_item_idxs.items():
372
+ cell_md_dict = sql_row[item_idx]
373
+ output_row.slot_md[cellmd_ref.col_ref.slot_idx] = (
374
+ exprs.CellMd.from_dict(cell_md_dict) if cell_md_dict is not None else None
375
+ )
376
+
328
377
  # copy the output of the SQL query into the output row
329
378
  for i, e in enumerate(self.select_list):
330
379
  slot_idx = e.slot_idx
331
- # certain numerical operations can produce Decimals (eg, SUM(<int column>)); we need to convert them
332
380
  if isinstance(sql_row[i], Decimal):
381
+ # certain numerical operations can produce Decimals (eg, SUM(<int column>)); we need to convert them
333
382
  if e.col_type.is_int_type():
334
383
  output_row[slot_idx] = int(sql_row[i])
335
384
  elif e.col_type.is_float_type():
336
385
  output_row[slot_idx] = float(sql_row[i])
337
386
  else:
338
387
  raise RuntimeError(f'Unexpected Decimal value for {e}')
388
+ elif is_using_cockroachdb and isinstance(sql_row[i], datetime.datetime):
389
+ # Ensure that the datetime is timezone-aware and in the session time zone
390
+ # cockroachDB returns timestamps in the session time zone, with numeric offset,
391
+ # convert to the session time zone with the requested tzinfo for DST handling
392
+ if e.col_type.is_timestamp_type():
393
+ if isinstance(sql_row[i].tzinfo, datetime.timezone):
394
+ output_row[slot_idx] = sql_row[i].astimezone(tz=tzinfo)
395
+ else:
396
+ output_row[slot_idx] = sql_row[i]
397
+ else:
398
+ raise RuntimeError(f'Unexpected datetime value for {e}')
339
399
  else:
340
400
  output_row[slot_idx] = sql_row[i]
341
401
 
@@ -387,11 +447,21 @@ class SqlScanNode(SqlNode):
387
447
  tbl: catalog.TableVersionPath,
388
448
  row_builder: exprs.RowBuilder,
389
449
  select_list: Iterable[exprs.Expr],
450
+ columns: list[catalog.Column],
451
+ cell_md_col_refs: list[exprs.ColumnRef] | None = None,
390
452
  set_pk: bool = False,
391
453
  exact_version_only: Optional[list[catalog.TableVersionHandle]] = None,
392
454
  ):
393
455
  sql_elements = exprs.SqlElementCache()
394
- super().__init__(tbl, row_builder, select_list, sql_elements, set_pk=set_pk)
456
+ super().__init__(
457
+ tbl,
458
+ row_builder,
459
+ select_list,
460
+ columns=columns,
461
+ sql_elements=sql_elements,
462
+ set_pk=set_pk,
463
+ cell_md_col_refs=cell_md_col_refs,
464
+ )
395
465
  # create Select stmt
396
466
  if exact_version_only is None:
397
467
  exact_version_only = []
@@ -423,11 +493,21 @@ class SqlLookupNode(SqlNode):
423
493
  tbl: catalog.TableVersionPath,
424
494
  row_builder: exprs.RowBuilder,
425
495
  select_list: Iterable[exprs.Expr],
496
+ columns: list[catalog.Column],
426
497
  sa_key_cols: list[sql.Column],
427
498
  key_vals: list[tuple],
499
+ cell_md_col_refs: list[exprs.ColumnRef] | None = None,
428
500
  ):
429
501
  sql_elements = exprs.SqlElementCache()
430
- super().__init__(tbl, row_builder, select_list, sql_elements, set_pk=True)
502
+ super().__init__(
503
+ tbl,
504
+ row_builder,
505
+ select_list,
506
+ columns=columns,
507
+ sql_elements=sql_elements,
508
+ set_pk=True,
509
+ cell_md_col_refs=cell_md_col_refs,
510
+ )
431
511
  # Where clause: (key-col-1, key-col-2, ...) IN ((val-1, val-2, ...), ...)
432
512
  self.where_clause_element = sql.tuple_(*sa_key_cols).in_(key_vals)
433
513
 
@@ -460,9 +540,10 @@ class SqlAggregationNode(SqlNode):
460
540
  limit: Optional[int] = None,
461
541
  exact_version_only: Optional[list[catalog.TableVersion]] = None,
462
542
  ):
543
+ assert len(input.cell_md_refs) == 0 # there's no aggregation over json or arrays in SQL
463
544
  self.input_cte, input_col_map = input.to_cte()
464
545
  sql_elements = exprs.SqlElementCache(input_col_map)
465
- super().__init__(None, row_builder, select_list, sql_elements)
546
+ super().__init__(None, row_builder, select_list, columns=[], sql_elements=sql_elements)
466
547
  self.group_by_items = group_by_items
467
548
 
468
549
  def _create_stmt(self) -> sql.Select:
@@ -498,7 +579,10 @@ class SqlJoinNode(SqlNode):
498
579
  input_cte, input_col_map = input_node.to_cte()
499
580
  self.input_ctes.append(input_cte)
500
581
  sql_elements.extend(input_col_map)
501
- super().__init__(None, row_builder, select_list, sql_elements)
582
+ cell_md_col_refs = [cell_md_ref.col_ref for input in inputs for cell_md_ref in input.cell_md_refs]
583
+ super().__init__(
584
+ None, row_builder, select_list, columns=[], sql_elements=sql_elements, cell_md_col_refs=cell_md_col_refs
585
+ )
502
586
 
503
587
  def _create_stmt(self) -> sql.Select:
504
588
  from pixeltable import plan
@@ -552,7 +636,16 @@ class SqlSampleNode(SqlNode):
552
636
  assert self.pk_count > 1
553
637
  sql_elements = exprs.SqlElementCache(input_col_map)
554
638
  assert sql_elements.contains_all(stratify_exprs)
555
- super().__init__(input.tbl, row_builder, select_list, sql_elements, set_pk=True)
639
+ cell_md_col_refs = [cell_md_ref.col_ref for cell_md_ref in input.cell_md_refs]
640
+ super().__init__(
641
+ input.tbl,
642
+ row_builder,
643
+ select_list,
644
+ columns=[],
645
+ sql_elements=sql_elements,
646
+ cell_md_col_refs=cell_md_col_refs,
647
+ set_pk=True,
648
+ )
556
649
  self.stratify_exprs = stratify_exprs
557
650
  self.sample_clause = sample_clause
558
651
  assert isinstance(self.sample_clause.seed, int)
@@ -6,7 +6,7 @@ from .column_property_ref import ColumnPropertyRef
6
6
  from .column_ref import ColumnRef
7
7
  from .comparison import Comparison
8
8
  from .compound_predicate import CompoundPredicate
9
- from .data_row import DataRow
9
+ from .data_row import ArrayMd, CellMd, DataRow
10
10
  from .expr import Expr
11
11
  from .expr_dict import ExprDict
12
12
  from .expr_set import ExprSet
@@ -4,7 +4,7 @@ from typing import Any, Optional
4
4
 
5
5
  import sqlalchemy as sql
6
6
 
7
- from pixeltable import exceptions as excs, type_system as ts
7
+ from pixeltable import env, exceptions as excs, type_system as ts
8
8
 
9
9
  from .data_row import DataRow
10
10
  from .expr import Expr
@@ -64,23 +64,30 @@ class ArithmeticExpr(Expr):
64
64
  right = sql_elements.get(self._op2)
65
65
  if left is None or right is None:
66
66
  return None
67
- if self.operator == ArithmeticOperator.ADD:
68
- return left + right
69
- if self.operator == ArithmeticOperator.SUB:
70
- return left - right
71
- if self.operator == ArithmeticOperator.MUL:
72
- return left * right
67
+ if self.operator in (ArithmeticOperator.ADD, ArithmeticOperator.SUB, ArithmeticOperator.MUL):
68
+ if env.Env.get().is_using_cockroachdb and self._op1.col_type != self._op2.col_type:
69
+ if self._op1.col_type != self.col_type:
70
+ left = sql.cast(left, self.col_type.to_sa_type())
71
+ if self._op2.col_type != self.col_type:
72
+ right = sql.cast(right, self.col_type.to_sa_type())
73
+ if self.operator == ArithmeticOperator.ADD:
74
+ return left + right
75
+ if self.operator == ArithmeticOperator.SUB:
76
+ return left - right
77
+ if self.operator == ArithmeticOperator.MUL:
78
+ return left * right
73
79
  if self.operator == ArithmeticOperator.DIV:
74
80
  assert self.col_type.is_float_type()
75
- # Avoid DivisionByZero: if right is 0, make this a NULL
81
+ # Avoid division by zero errors by converting any zero divisor to NULL.
76
82
  # TODO: Should we cast the NULLs to NaNs when they are retrieved back into Python?
77
- nullif = sql.sql.func.nullif(right, 0)
78
- # We have to cast to a `float`, or else we'll get a `Decimal`
79
- return sql.sql.expression.cast(left / nullif, self.col_type.to_sa_type())
83
+ # These casts cause the computation to take place in float units, rather than DECIMAL.
84
+ nullif = sql.cast(sql.func.nullif(right, 0), self.col_type.to_sa_type())
85
+ return sql.cast(left, self.col_type.to_sa_type()) / nullif
80
86
  if self.operator == ArithmeticOperator.MOD:
81
87
  if self.col_type.is_int_type():
82
- nullif = sql.sql.func.nullif(right, 0)
83
- return left % nullif
88
+ # Avoid division by zero errors by converting any zero divisor to NULL.
89
+ nullif1 = sql.cast(sql.func.nullif(right, 0), self.col_type.to_sa_type())
90
+ return left % nullif1
84
91
  if self.col_type.is_float_type():
85
92
  # Postgres does not support modulus for floats
86
93
  return None
@@ -90,11 +97,9 @@ class ArithmeticExpr(Expr):
90
97
  # We need the behavior to be consistent, so that expressions will evaluate the same way
91
98
  # whether or not their operands can be translated to SQL. These SQL clauses should
92
99
  # mimic the behavior of Python's // operator.
93
- nullif = sql.sql.func.nullif(right, 0)
94
- if self.col_type.is_int_type():
95
- return sql.sql.expression.cast(sql.func.floor(left / nullif), self.col_type.to_sa_type())
96
- if self.col_type.is_float_type():
97
- return sql.sql.expression.cast(sql.func.floor(left / nullif), self.col_type.to_sa_type())
100
+ # Avoid division by zero errors by converting any zero divisor to NULL.
101
+ nullif = sql.cast(sql.func.nullif(right, 0), self.col_type.to_sa_type())
102
+ return sql.func.floor(sql.cast(left, self.col_type.to_sa_type()) / nullif)
98
103
  raise AssertionError()
99
104
 
100
105
  def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
@@ -44,21 +44,21 @@ class ColumnPropertyRef(Expr):
44
44
  return [*super()._id_attrs(), ('prop', self.prop.value)]
45
45
 
46
46
  @property
47
- def _col_ref(self) -> ColumnRef:
47
+ def col_ref(self) -> ColumnRef:
48
48
  col_ref = self.components[0]
49
49
  assert isinstance(col_ref, ColumnRef)
50
50
  return col_ref
51
51
 
52
52
  def __repr__(self) -> str:
53
- return f'{self._col_ref}.{self.prop.name.lower()}'
53
+ return f'{self.col_ref}.{self.prop.name.lower()}'
54
54
 
55
55
  def is_cellmd_prop(self) -> bool:
56
56
  return self.prop in (self.Property.ERRORTYPE, self.Property.ERRORMSG, self.Property.CELLMD)
57
57
 
58
58
  def sql_expr(self, sql_elements: SqlElementCache) -> Optional[sql.ColumnElement]:
59
- if not self._col_ref.col_handle.get().is_stored:
59
+ if not self.col_ref.col_handle.get().is_stored:
60
60
  return None
61
- col = self._col_ref.col_handle.get()
61
+ col = self.col_ref.col_handle.get()
62
62
 
63
63
  # the errortype/-msg properties of a read-validated media column need to be extracted from the DataRow
64
64
  if (
@@ -77,7 +77,7 @@ class ColumnPropertyRef(Expr):
77
77
  return col.sa_cellmd_col
78
78
  if self.prop == self.Property.FILEURL:
79
79
  # the file url is stored as the column value
80
- return sql_elements.get(self._col_ref)
80
+ return sql_elements.get(self.col_ref)
81
81
  return None
82
82
 
83
83
  @classmethod
@@ -87,15 +87,15 @@ class ColumnPropertyRef(Expr):
87
87
 
88
88
  def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
89
89
  if self.prop == self.Property.FILEURL:
90
- assert data_row.has_val[self._col_ref.slot_idx]
91
- data_row[self.slot_idx] = data_row.file_urls[self._col_ref.slot_idx]
90
+ assert data_row.has_val[self.col_ref.slot_idx]
91
+ data_row[self.slot_idx] = data_row.file_urls[self.col_ref.slot_idx]
92
92
  return
93
93
  elif self.prop == self.Property.LOCALPATH:
94
- assert data_row.has_val[self._col_ref.slot_idx]
95
- data_row[self.slot_idx] = data_row.file_paths[self._col_ref.slot_idx]
94
+ assert data_row.has_val[self.col_ref.slot_idx]
95
+ data_row[self.slot_idx] = data_row.file_paths[self.col_ref.slot_idx]
96
96
  return
97
97
  elif self.is_cellmd_prop():
98
- exc = data_row.get_exc(self._col_ref.slot_idx)
98
+ exc = data_row.get_exc(self.col_ref.slot_idx)
99
99
  if exc is None:
100
100
  data_row[self.slot_idx] = None
101
101
  elif self.prop == self.Property.ERRORTYPE:
@@ -123,8 +123,8 @@ class ColumnRef(Expr):
123
123
  name == ColumnPropertyRef.Property.ERRORTYPE.name.lower()
124
124
  or name == ColumnPropertyRef.Property.ERRORMSG.name.lower()
125
125
  ):
126
- property_is_present = self.col.stores_cellmd
127
- if not property_is_present:
126
+ is_valid = (self.col.is_computed or self.col.col_type.is_media_type()) and self.col.is_stored
127
+ if not is_valid:
128
128
  raise excs.Error(f'{name} only valid for a stored computed or media column: {self}')
129
129
  return ColumnPropertyRef(self, ColumnPropertyRef.Property[name.upper()])
130
130
  if (