pixeltable 0.3.6__py3-none-any.whl → 0.3.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (127) hide show
  1. pixeltable/__init__.py +5 -3
  2. pixeltable/__version__.py +2 -2
  3. pixeltable/catalog/__init__.py +1 -0
  4. pixeltable/catalog/catalog.py +335 -128
  5. pixeltable/catalog/column.py +22 -5
  6. pixeltable/catalog/dir.py +19 -6
  7. pixeltable/catalog/insertable_table.py +34 -37
  8. pixeltable/catalog/named_function.py +0 -4
  9. pixeltable/catalog/schema_object.py +28 -42
  10. pixeltable/catalog/table.py +193 -158
  11. pixeltable/catalog/table_version.py +191 -232
  12. pixeltable/catalog/table_version_handle.py +50 -0
  13. pixeltable/catalog/table_version_path.py +49 -33
  14. pixeltable/catalog/view.py +56 -96
  15. pixeltable/config.py +103 -0
  16. pixeltable/dataframe.py +89 -89
  17. pixeltable/env.py +98 -168
  18. pixeltable/exec/aggregation_node.py +5 -4
  19. pixeltable/exec/cache_prefetch_node.py +1 -1
  20. pixeltable/exec/component_iteration_node.py +13 -9
  21. pixeltable/exec/data_row_batch.py +3 -3
  22. pixeltable/exec/exec_context.py +0 -4
  23. pixeltable/exec/exec_node.py +3 -2
  24. pixeltable/exec/expr_eval/schedulers.py +2 -1
  25. pixeltable/exec/in_memory_data_node.py +9 -4
  26. pixeltable/exec/row_update_node.py +1 -2
  27. pixeltable/exec/sql_node.py +20 -16
  28. pixeltable/exprs/__init__.py +2 -0
  29. pixeltable/exprs/arithmetic_expr.py +7 -11
  30. pixeltable/exprs/array_slice.py +1 -1
  31. pixeltable/exprs/column_property_ref.py +3 -3
  32. pixeltable/exprs/column_ref.py +12 -13
  33. pixeltable/exprs/comparison.py +3 -6
  34. pixeltable/exprs/compound_predicate.py +4 -4
  35. pixeltable/exprs/expr.py +31 -22
  36. pixeltable/exprs/expr_dict.py +3 -3
  37. pixeltable/exprs/expr_set.py +1 -1
  38. pixeltable/exprs/function_call.py +110 -80
  39. pixeltable/exprs/globals.py +3 -3
  40. pixeltable/exprs/in_predicate.py +1 -1
  41. pixeltable/exprs/inline_expr.py +3 -3
  42. pixeltable/exprs/is_null.py +1 -1
  43. pixeltable/exprs/json_mapper.py +2 -2
  44. pixeltable/exprs/json_path.py +17 -10
  45. pixeltable/exprs/literal.py +1 -1
  46. pixeltable/exprs/method_ref.py +2 -2
  47. pixeltable/exprs/row_builder.py +8 -17
  48. pixeltable/exprs/rowid_ref.py +21 -10
  49. pixeltable/exprs/similarity_expr.py +5 -5
  50. pixeltable/exprs/sql_element_cache.py +1 -1
  51. pixeltable/exprs/type_cast.py +2 -3
  52. pixeltable/exprs/variable.py +2 -2
  53. pixeltable/ext/__init__.py +2 -0
  54. pixeltable/ext/functions/__init__.py +2 -0
  55. pixeltable/ext/functions/yolox.py +3 -3
  56. pixeltable/func/__init__.py +3 -1
  57. pixeltable/func/aggregate_function.py +9 -9
  58. pixeltable/func/callable_function.py +3 -4
  59. pixeltable/func/expr_template_function.py +6 -16
  60. pixeltable/func/function.py +48 -14
  61. pixeltable/func/function_registry.py +1 -3
  62. pixeltable/func/query_template_function.py +5 -12
  63. pixeltable/func/signature.py +23 -22
  64. pixeltable/func/tools.py +3 -3
  65. pixeltable/func/udf.py +6 -4
  66. pixeltable/functions/__init__.py +2 -0
  67. pixeltable/functions/fireworks.py +7 -4
  68. pixeltable/functions/globals.py +4 -5
  69. pixeltable/functions/huggingface.py +1 -5
  70. pixeltable/functions/image.py +17 -7
  71. pixeltable/functions/llama_cpp.py +1 -1
  72. pixeltable/functions/mistralai.py +1 -1
  73. pixeltable/functions/ollama.py +4 -4
  74. pixeltable/functions/openai.py +19 -19
  75. pixeltable/functions/string.py +23 -30
  76. pixeltable/functions/timestamp.py +11 -6
  77. pixeltable/functions/together.py +14 -12
  78. pixeltable/functions/util.py +1 -1
  79. pixeltable/functions/video.py +5 -4
  80. pixeltable/functions/vision.py +6 -9
  81. pixeltable/functions/whisper.py +3 -3
  82. pixeltable/globals.py +246 -260
  83. pixeltable/index/__init__.py +2 -0
  84. pixeltable/index/base.py +1 -1
  85. pixeltable/index/btree.py +3 -1
  86. pixeltable/index/embedding_index.py +11 -5
  87. pixeltable/io/external_store.py +11 -12
  88. pixeltable/io/label_studio.py +4 -3
  89. pixeltable/io/parquet.py +57 -56
  90. pixeltable/iterators/__init__.py +4 -2
  91. pixeltable/iterators/audio.py +11 -11
  92. pixeltable/iterators/document.py +10 -10
  93. pixeltable/iterators/string.py +1 -2
  94. pixeltable/iterators/video.py +14 -15
  95. pixeltable/metadata/__init__.py +9 -5
  96. pixeltable/metadata/converters/convert_10.py +0 -1
  97. pixeltable/metadata/converters/convert_15.py +0 -2
  98. pixeltable/metadata/converters/convert_23.py +0 -2
  99. pixeltable/metadata/converters/convert_24.py +3 -3
  100. pixeltable/metadata/converters/convert_25.py +1 -1
  101. pixeltable/metadata/converters/convert_27.py +0 -2
  102. pixeltable/metadata/converters/convert_28.py +0 -2
  103. pixeltable/metadata/converters/convert_29.py +7 -8
  104. pixeltable/metadata/converters/util.py +7 -7
  105. pixeltable/metadata/schema.py +27 -19
  106. pixeltable/plan.py +68 -40
  107. pixeltable/share/__init__.py +2 -0
  108. pixeltable/share/packager.py +15 -12
  109. pixeltable/share/publish.py +3 -5
  110. pixeltable/store.py +37 -38
  111. pixeltable/type_system.py +41 -28
  112. pixeltable/utils/coco.py +4 -4
  113. pixeltable/utils/console_output.py +1 -3
  114. pixeltable/utils/description_helper.py +1 -1
  115. pixeltable/utils/documents.py +3 -3
  116. pixeltable/utils/filecache.py +20 -9
  117. pixeltable/utils/formatter.py +2 -3
  118. pixeltable/utils/media_store.py +1 -1
  119. pixeltable/utils/pytorch.py +1 -1
  120. pixeltable/utils/sql.py +4 -4
  121. pixeltable/utils/transactional_directory.py +2 -1
  122. {pixeltable-0.3.6.dist-info → pixeltable-0.3.8.dist-info}/METADATA +1 -1
  123. pixeltable-0.3.8.dist-info/RECORD +174 -0
  124. pixeltable-0.3.6.dist-info/RECORD +0 -172
  125. {pixeltable-0.3.6.dist-info → pixeltable-0.3.8.dist-info}/LICENSE +0 -0
  126. {pixeltable-0.3.6.dist-info → pixeltable-0.3.8.dist-info}/WHEEL +0 -0
  127. {pixeltable-0.3.6.dist-info → pixeltable-0.3.8.dist-info}/entry_points.txt +0 -0
@@ -20,7 +20,8 @@ class InMemoryDataNode(ExecNode):
20
20
  - if an input row doesn't provide a value, sets the slot to the column default
21
21
  """
22
22
 
23
- tbl: catalog.TableVersion
23
+ tbl: catalog.TableVersionHandle
24
+
24
25
  input_rows: list[dict[str, Any]]
25
26
  start_row_id: int
26
27
  output_rows: Optional[DataRowBatch]
@@ -29,12 +30,16 @@ class InMemoryDataNode(ExecNode):
29
30
  output_exprs: list[exprs.ColumnRef]
30
31
 
31
32
  def __init__(
32
- self, tbl: catalog.TableVersion, rows: list[dict[str, Any]], row_builder: exprs.RowBuilder, start_row_id: int
33
+ self,
34
+ tbl: catalog.TableVersionHandle,
35
+ rows: list[dict[str, Any]],
36
+ row_builder: exprs.RowBuilder,
37
+ start_row_id: int,
33
38
  ):
34
39
  # we materialize the input slots
35
40
  output_exprs = list(row_builder.input_exprs)
36
41
  super().__init__(row_builder, output_exprs, [], None)
37
- assert tbl.is_insertable()
42
+ assert tbl.get().is_insertable()
38
43
  self.tbl = tbl
39
44
  self.input_rows = rows
40
45
  self.start_row_id = start_row_id
@@ -62,7 +67,7 @@ class InMemoryDataNode(ExecNode):
62
67
 
63
68
  if col_info.col.col_type.is_image_type() and isinstance(val, bytes):
64
69
  # this is a literal image, ie, a sequence of bytes; we save this as a media file and store the path
65
- path = str(MediaStore.prepare_media_path(self.tbl.id, col_info.col.id, self.tbl.version))
70
+ path = str(MediaStore.prepare_media_path(self.tbl.id, col_info.col.id, self.tbl.get().version))
66
71
  open(path, 'wb').write(val)
67
72
  val = path
68
73
  self.output_rows[row_idx][col_info.slot_idx] = val
@@ -3,7 +3,6 @@ from typing import Any, AsyncIterator
3
3
 
4
4
  import pixeltable.catalog as catalog
5
5
  import pixeltable.exprs as exprs
6
- from pixeltable.utils.media_store import MediaStore
7
6
 
8
7
  from .data_row_batch import DataRowBatch
9
8
  from .exec_node import ExecNode
@@ -40,7 +39,7 @@ class RowUpdateNode(ExecNode):
40
39
  if isinstance(col_ref, exprs.ColumnRef)
41
40
  }
42
41
  self.col_slot_idxs = {col: all_col_slot_idxs[col] for col in col_vals_batch[0].keys()}
43
- self.key_slot_idxs = {col: all_col_slot_idxs[col] for col in tbl.tbl_version.primary_key_columns()}
42
+ self.key_slot_idxs = {col: all_col_slot_idxs[col] for col in tbl.tbl_version.get().primary_key_columns()}
44
43
  self.matched_key_vals: set[tuple] = set()
45
44
 
46
45
  async def __aiter__(self) -> AsyncIterator[DataRowBatch]:
@@ -1,13 +1,14 @@
1
1
  import logging
2
2
  import warnings
3
3
  from decimal import Decimal
4
- from typing import TYPE_CHECKING, AsyncIterator, Iterable, Iterator, NamedTuple, Optional, Sequence
4
+ from typing import TYPE_CHECKING, AsyncIterator, Iterable, NamedTuple, Optional, Sequence
5
5
  from uuid import UUID
6
6
 
7
7
  import sqlalchemy as sql
8
8
 
9
9
  import pixeltable.catalog as catalog
10
10
  import pixeltable.exprs as exprs
11
+ from pixeltable.env import Env
11
12
 
12
13
  from .data_row_batch import DataRowBatch
13
14
  from .exec_node import ExecNode
@@ -122,7 +123,7 @@ class SqlNode(ExecNode):
122
123
  if set_pk:
123
124
  # we also need to retrieve the pk columns
124
125
  assert tbl is not None
125
- self.num_pk_cols = len(tbl.tbl_version.store_tbl.pk_columns())
126
+ self.num_pk_cols = len(tbl.tbl_version.get().store_tbl.pk_columns())
126
127
 
127
128
  # additional state
128
129
  self.result_cursor = None
@@ -142,7 +143,7 @@ class SqlNode(ExecNode):
142
143
  sql_select_list = [self.sql_elements.get(e) for e in self.select_list]
143
144
  if self.set_pk:
144
145
  assert self.tbl is not None
145
- sql_select_list += self.tbl.tbl_version.store_tbl.pk_columns()
146
+ sql_select_list += self.tbl.tbl_version.get().store_tbl.pk_columns()
146
147
  stmt = sql.select(*sql_select_list)
147
148
 
148
149
  where_clause_element = (
@@ -215,29 +216,31 @@ class SqlNode(ExecNode):
215
216
  exact_version_only = set()
216
217
  candidates = tbl.get_tbl_versions()
217
218
  assert len(candidates) > 0
218
- joined_tbls: list[catalog.TableVersion] = [candidates[0]]
219
+ joined_tbls: list[catalog.TableVersionHandle] = [candidates[0]]
219
220
  for tbl in candidates[1:]:
220
221
  if tbl.id in refd_tbl_ids:
221
222
  joined_tbls.append(tbl)
222
223
 
223
224
  first = True
224
- prev_tbl: catalog.TableVersion
225
+ prev_tbl: catalog.TableVersionHandle
225
226
  for tbl in joined_tbls[::-1]:
226
227
  if first:
227
- stmt = stmt.select_from(tbl.store_tbl.sa_tbl)
228
+ stmt = stmt.select_from(tbl.get().store_tbl.sa_tbl)
228
229
  first = False
229
230
  else:
230
231
  # join tbl to prev_tbl on prev_tbl's rowid cols
231
- prev_tbl_rowid_cols = prev_tbl.store_tbl.rowid_columns()
232
- tbl_rowid_cols = tbl.store_tbl.rowid_columns()
232
+ prev_tbl_rowid_cols = prev_tbl.get().store_tbl.rowid_columns()
233
+ tbl_rowid_cols = tbl.get().store_tbl.rowid_columns()
233
234
  rowid_clauses = [
234
235
  c1 == c2 for c1, c2 in zip(prev_tbl_rowid_cols, tbl_rowid_cols[: len(prev_tbl_rowid_cols)])
235
236
  ]
236
- stmt = stmt.join(tbl.store_tbl.sa_tbl, sql.and_(*rowid_clauses))
237
+ stmt = stmt.join(tbl.get().store_tbl.sa_tbl, sql.and_(*rowid_clauses))
237
238
  if tbl.id in exact_version_only:
238
- stmt = stmt.where(tbl.store_tbl.v_min_col == tbl.version)
239
+ stmt = stmt.where(tbl.get().store_tbl.v_min_col == tbl.get().version)
239
240
  else:
240
- stmt = stmt.where(tbl.store_tbl.v_min_col <= tbl.version).where(tbl.store_tbl.v_max_col > tbl.version)
241
+ stmt = stmt.where(tbl.get().store_tbl.v_min_col <= tbl.get().version).where(
242
+ tbl.get().store_tbl.v_max_col > tbl.get().version
243
+ )
241
244
  prev_tbl = tbl
242
245
  return stmt
243
246
 
@@ -264,10 +267,11 @@ class SqlNode(ExecNode):
264
267
  self.limit = limit
265
268
 
266
269
  def _log_explain(self, stmt: sql.Select) -> None:
270
+ conn = Env.get().conn
267
271
  try:
268
272
  # don't set dialect=Env.get().engine.dialect: x % y turns into x %% y, which results in a syntax error
269
273
  stmt_str = str(stmt.compile(compile_kwargs={'literal_binds': True}))
270
- explain_result = self.ctx.conn.execute(sql.text(f'EXPLAIN {stmt_str}'))
274
+ explain_result = conn.execute(sql.text(f'EXPLAIN {stmt_str}'))
271
275
  explain_str = '\n'.join([str(row) for row in explain_result])
272
276
  _logger.debug(f'SqlScanNode explain:\n{explain_str}')
273
277
  except Exception as e:
@@ -275,7 +279,6 @@ class SqlNode(ExecNode):
275
279
 
276
280
  async def __aiter__(self) -> AsyncIterator[DataRowBatch]:
277
281
  # run the query; do this here rather than in _open(), exceptions are only expected during iteration
278
- assert self.ctx.conn is not None
279
282
  with warnings.catch_warnings(record=True) as w:
280
283
  stmt = self._create_stmt()
281
284
  try:
@@ -286,7 +289,8 @@ class SqlNode(ExecNode):
286
289
  pass
287
290
  self._log_explain(stmt)
288
291
 
289
- result_cursor = self.ctx.conn.execute(stmt)
292
+ conn = Env.get().conn
293
+ result_cursor = conn.execute(stmt)
290
294
  for warning in w:
291
295
  pass
292
296
 
@@ -351,7 +355,7 @@ class SqlScanNode(SqlNode):
351
355
  Supports filtering and ordering.
352
356
  """
353
357
 
354
- exact_version_only: list[catalog.TableVersion]
358
+ exact_version_only: list[catalog.TableVersionHandle]
355
359
 
356
360
  def __init__(
357
361
  self,
@@ -359,7 +363,7 @@ class SqlScanNode(SqlNode):
359
363
  row_builder: exprs.RowBuilder,
360
364
  select_list: Iterable[exprs.Expr],
361
365
  set_pk: bool = False,
362
- exact_version_only: Optional[list[catalog.TableVersion]] = None,
366
+ exact_version_only: Optional[list[catalog.TableVersionHandle]] = None,
363
367
  ):
364
368
  """
365
369
  Args:
@@ -1,3 +1,5 @@
1
+ # ruff: noqa: F401
2
+
1
3
  from .arithmetic_expr import ArithmeticExpr
2
4
  from .array_slice import ArraySlice
3
5
  from .column_property_ref import ColumnPropertyRef
@@ -1,12 +1,10 @@
1
1
  from __future__ import annotations
2
2
 
3
- from typing import Any, Optional, Union
3
+ from typing import Any, Optional
4
4
 
5
5
  import sqlalchemy as sql
6
6
 
7
- import pixeltable.exceptions as excs
8
- import pixeltable.exprs as exprs
9
- import pixeltable.type_system as ts
7
+ from pixeltable import exceptions as excs, type_system as ts
10
8
 
11
9
  from .data_row import DataRow
12
10
  from .expr import Expr
@@ -50,13 +48,13 @@ class ArithmeticExpr(Expr):
50
48
  # add parentheses around operands that are ArithmeticExprs to express precedence
51
49
  op1_str = f'({self._op1})' if isinstance(self._op1, ArithmeticExpr) else str(self._op1)
52
50
  op2_str = f'({self._op2})' if isinstance(self._op2, ArithmeticExpr) else str(self._op2)
53
- return f'{op1_str} {str(self.operator)} {op2_str}'
51
+ return f'{op1_str} {self.operator} {op2_str}'
54
52
 
55
53
  def _equals(self, other: ArithmeticExpr) -> bool:
56
54
  return self.operator == other.operator
57
55
 
58
56
  def _id_attrs(self) -> list[tuple[str, Any]]:
59
- return super()._id_attrs() + [('operator', self.operator.value)]
57
+ return [*super()._id_attrs(), ('operator', self.operator.value)]
60
58
 
61
59
  def sql_expr(self, sql_elements: SqlElementCache) -> Optional[sql.ColumnElement]:
62
60
  assert self.col_type.is_int_type() or self.col_type.is_float_type() or self.col_type.is_json_type()
@@ -95,7 +93,7 @@ class ArithmeticExpr(Expr):
95
93
  return sql.sql.expression.cast(sql.func.floor(left / nullif), self.col_type.to_sa_type())
96
94
  if self.col_type.is_float_type():
97
95
  return sql.sql.expression.cast(sql.func.floor(left / nullif), self.col_type.to_sa_type())
98
- assert False
96
+ raise AssertionError()
99
97
 
100
98
  def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
101
99
  op1_val = data_row[self._op1.slot_idx]
@@ -113,9 +111,7 @@ class ArithmeticExpr(Expr):
113
111
 
114
112
  data_row[self.slot_idx] = self.eval_nullable(op1_val, op2_val)
115
113
 
116
- def eval_nullable(
117
- self, op1_val: Union[int, float, None], op2_val: Union[int, float, None]
118
- ) -> Union[int, float, None]:
114
+ def eval_nullable(self, op1_val: Optional[float], op2_val: Optional[float]) -> Optional[float]:
119
115
  """
120
116
  Return the result of evaluating the expression on two nullable int/float operands,
121
117
  None is interpreted as SQL NULL
@@ -124,7 +120,7 @@ class ArithmeticExpr(Expr):
124
120
  return None
125
121
  return self.eval_non_null(op1_val, op2_val)
126
122
 
127
- def eval_non_null(self, op1_val: Union[int, float], op2_val: Union[int, float]) -> Union[int, float]:
123
+ def eval_non_null(self, op1_val: float, op2_val: float) -> float:
128
124
  """
129
125
  Return the result of evaluating the expression on two int/float operands
130
126
  """
@@ -41,7 +41,7 @@ class ArraySlice(Expr):
41
41
  return self.index == other.index
42
42
 
43
43
  def _id_attrs(self) -> list[tuple[str, Any]]:
44
- return super()._id_attrs() + [('index', self.index)]
44
+ return [*super()._id_attrs(), ('index', self.index)]
45
45
 
46
46
  def sql_expr(self, _: SqlElementCache) -> Optional[sql.ColumnElement]:
47
47
  return None
@@ -40,7 +40,7 @@ class ColumnPropertyRef(Expr):
40
40
  return self.prop == other.prop
41
41
 
42
42
  def _id_attrs(self) -> list[tuple[str, Any]]:
43
- return super()._id_attrs() + [('prop', self.prop.value)]
43
+ return [*super()._id_attrs(), ('prop', self.prop.value)]
44
44
 
45
45
  @property
46
46
  def _col_ref(self) -> ColumnRef:
@@ -52,7 +52,7 @@ class ColumnPropertyRef(Expr):
52
52
  return f'{self._col_ref}.{self.prop.name.lower()}'
53
53
 
54
54
  def is_error_prop(self) -> bool:
55
- return self.prop == self.Property.ERRORTYPE or self.prop == self.Property.ERRORMSG
55
+ return self.prop in {self.Property.ERRORTYPE, self.Property.ERRORMSG}
56
56
 
57
57
  def sql_expr(self, sql_elements: SqlElementCache) -> Optional[sql.ColumnElement]:
58
58
  if not self._col_ref.col.is_stored:
@@ -95,7 +95,7 @@ class ColumnPropertyRef(Expr):
95
95
  else:
96
96
  data_row[self.slot_idx] = str(exc)
97
97
  else:
98
- assert False
98
+ raise AssertionError()
99
99
 
100
100
  def _as_dict(self) -> dict:
101
101
  return {'prop': self.prop.value, **super()._as_dict()}
@@ -6,9 +6,7 @@ from uuid import UUID
6
6
  import sqlalchemy as sql
7
7
 
8
8
  import pixeltable as pxt
9
- import pixeltable.catalog as catalog
10
- import pixeltable.exceptions as excs
11
- import pixeltable.iterators as iters
9
+ from pixeltable import catalog, exceptions as excs, iterators as iters
12
10
 
13
11
  from ..utils.description_helper import DescriptionHelper
14
12
  from .data_row import DataRow
@@ -52,15 +50,15 @@ class ColumnRef(Expr):
52
50
  assert col.tbl is not None
53
51
  self.col = col
54
52
  self.is_unstored_iter_col = (
55
- col.tbl.is_component_view() and col.tbl.is_iterator_column(col) and not col.is_stored
53
+ col.tbl.get().is_component_view and col.tbl.get().is_iterator_column(col) and not col.is_stored
56
54
  )
57
55
  self.iter_arg_ctx = None
58
56
  # number of rowid columns in the base table
59
- self.base_rowid_len = col.tbl.base.num_rowid_columns() if self.is_unstored_iter_col else 0
57
+ self.base_rowid_len = col.tbl.get().base.get().num_rowid_columns() if self.is_unstored_iter_col else 0
60
58
  self.base_rowid = [None] * self.base_rowid_len
61
59
  self.iterator = None
62
60
  # index of the position column in the view's primary key; don't try to reference tbl.store_tbl here
63
- self.pos_idx = col.tbl.num_rowid_columns() - 1 if self.is_unstored_iter_col else None
61
+ self.pos_idx = col.tbl.get().num_rowid_columns() - 1 if self.is_unstored_iter_col else None
64
62
 
65
63
  self.perform_validation = False
66
64
  if col.col_type.is_media_type():
@@ -84,7 +82,8 @@ class ColumnRef(Expr):
84
82
  assert len(self.iter_arg_ctx.target_slot_idxs) == 1 # a single inline dict
85
83
 
86
84
  def _id_attrs(self) -> list[tuple[str, Any]]:
87
- return super()._id_attrs() + [
85
+ return [
86
+ *super()._id_attrs(),
88
87
  ('tbl_id', self.col.tbl.id),
89
88
  ('col_id', self.col.id),
90
89
  ('perform_validation', self.perform_validation),
@@ -138,7 +137,7 @@ class ColumnRef(Expr):
138
137
  return self.col == other.col and self.perform_validation == other.perform_validation
139
138
 
140
139
  def _df(self) -> 'pxt.dataframe.DataFrame':
141
- tbl = catalog.Catalog.get().tbls[self.col.tbl.id]
140
+ tbl = catalog.Catalog.get().get_tbl(self.col.tbl.id)
142
141
  return tbl.select(self)
143
142
 
144
143
  def show(self, *args, **kwargs) -> 'pxt.dataframe.DataFrameResultSet':
@@ -166,9 +165,9 @@ class ColumnRef(Expr):
166
165
  return self._descriptors().to_html()
167
166
 
168
167
  def _descriptors(self) -> DescriptionHelper:
169
- tbl = catalog.Catalog.get().tbls[self.col.tbl.id]
168
+ tbl = catalog.Catalog.get().get_tbl(self.col.tbl.id)
170
169
  helper = DescriptionHelper()
171
- helper.append(f'Column\n{self.col.name!r}\n(of table {tbl._path!r})')
170
+ helper.append(f'Column\n{self.col.name!r}\n(of table {tbl._path()!r})')
172
171
  helper.append(tbl._col_descriptor([self.col.name]))
173
172
  idxs = tbl._index_descriptor([self.col.name])
174
173
  if len(idxs) > 0:
@@ -217,7 +216,7 @@ class ColumnRef(Expr):
217
216
  if self.base_rowid != data_row.pk[: self.base_rowid_len]:
218
217
  row_builder.eval(data_row, self.iter_arg_ctx)
219
218
  iterator_args = data_row[self.iter_arg_ctx.target_slot_idxs[0]]
220
- self.iterator = self.col.tbl.iterator_cls(**iterator_args)
219
+ self.iterator = self.col.tbl.get().iterator_cls(**iterator_args)
221
220
  self.base_rowid = data_row.pk[: self.base_rowid_len]
222
221
  self.iterator.set_pos(data_row.pk[self.pos_idx])
223
222
  res = next(self.iterator)
@@ -225,7 +224,7 @@ class ColumnRef(Expr):
225
224
 
226
225
  def _as_dict(self) -> dict:
227
226
  tbl = self.col.tbl
228
- version = tbl.version if tbl.is_snapshot else None
227
+ version = tbl.get().version if tbl.get().is_snapshot else None
229
228
  # we omit self.components, even if this is a validating ColumnRef, because init() will recreate the
230
229
  # non-validating component ColumnRef
231
230
  return {
@@ -238,7 +237,7 @@ class ColumnRef(Expr):
238
237
  @classmethod
239
238
  def get_column(cls, d: dict) -> catalog.Column:
240
239
  tbl_id, version, col_id = UUID(d['tbl_id']), d['tbl_version'], d['col_id']
241
- tbl_version = catalog.Catalog.get().tbl_versions[(tbl_id, version)]
240
+ tbl_version = catalog.Catalog.get().get_tbl_version(tbl_id, version)
242
241
  # don't use tbl_version.cols_by_id here, this might be a snapshot reference to a column that was then dropped
243
242
  col = next(col for col in tbl_version.cols if col.id == col_id)
244
243
  return col
@@ -1,6 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
- from typing import TYPE_CHECKING, Any, Optional
3
+ from typing import Any, Optional
4
4
 
5
5
  import sqlalchemy as sql
6
6
 
@@ -15,9 +15,6 @@ from .literal import Literal
15
15
  from .row_builder import RowBuilder
16
16
  from .sql_element_cache import SqlElementCache
17
17
 
18
- if TYPE_CHECKING:
19
- from pixeltable import index
20
-
21
18
 
22
19
  class Comparison(Expr):
23
20
  is_search_arg_comparison: bool
@@ -62,7 +59,7 @@ class Comparison(Expr):
62
59
  return self.operator == other.operator
63
60
 
64
61
  def _id_attrs(self) -> list[tuple[str, Any]]:
65
- return super()._id_attrs() + [('operator', self.operator.value)]
62
+ return [*super()._id_attrs(), ('operator', self.operator.value)]
66
63
 
67
64
  @property
68
65
  def _op1(self) -> Expr:
@@ -84,7 +81,7 @@ class Comparison(Expr):
84
81
  if self.is_search_arg_comparison:
85
82
  # reference the index value column if there is an index and this is not a snapshot
86
83
  # (indices don't apply to snapshots)
87
- tbl = self._op1.col.tbl
84
+ tbl = self._op1.col.tbl.get()
88
85
  idx_info = [
89
86
  info for info in self._op1.col.get_idx_info().values() if isinstance(info.idx, index.BtreeIndex)
90
87
  ]
@@ -5,7 +5,7 @@ from typing import Any, Callable, Optional
5
5
 
6
6
  import sqlalchemy as sql
7
7
 
8
- import pixeltable.type_system as ts
8
+ from pixeltable import type_system as ts
9
9
 
10
10
  from .data_row import DataRow
11
11
  from .expr import Expr
@@ -58,10 +58,10 @@ class CompoundPredicate(Expr):
58
58
  return self.operator == other.operator
59
59
 
60
60
  def _id_attrs(self) -> list[tuple[str, Any]]:
61
- return super()._id_attrs() + [('operator', self.operator.value)]
61
+ return [*super()._id_attrs(), ('operator', self.operator.value)]
62
62
 
63
63
  def split_conjuncts(self, condition: Callable[[Expr], bool]) -> tuple[list[Expr], Optional[Expr]]:
64
- if self.operator == LogicalOperator.OR or self.operator == LogicalOperator.NOT:
64
+ if self.operator in {LogicalOperator.OR, LogicalOperator.NOT}:
65
65
  return super().split_conjuncts(condition)
66
66
  matches = [op for op in self.components if condition(op)]
67
67
  non_matches = [op for op in self.components if not condition(op)]
@@ -83,7 +83,7 @@ class CompoundPredicate(Expr):
83
83
  if self.operator == LogicalOperator.NOT:
84
84
  data_row[self.slot_idx] = not data_row[self.components[0].slot_idx]
85
85
  else:
86
- val = True if self.operator == LogicalOperator.AND else False
86
+ val = self.operator == LogicalOperator.AND
87
87
  op_function = operator.and_ if self.operator == LogicalOperator.AND else operator.or_
88
88
  for op in self.components:
89
89
  val = op_function(val, data_row[op.slot_idx])
pixeltable/exprs/expr.py CHANGED
@@ -14,10 +14,7 @@ import numpy as np
14
14
  import sqlalchemy as sql
15
15
  from typing_extensions import Self, _AnnotatedAlias
16
16
 
17
- import pixeltable.catalog as catalog
18
- import pixeltable.exceptions as excs
19
- import pixeltable.func as func
20
- import pixeltable.type_system as ts
17
+ from pixeltable import catalog, exceptions as excs, func, type_system as ts
21
18
 
22
19
  from .data_row import DataRow
23
20
  from .globals import ArithmeticOperator, ComparisonOperator, LiteralPythonTypes, LogicalOperator
@@ -110,11 +107,29 @@ class Expr(abc.ABC):
110
107
  """
111
108
  return None
112
109
 
110
+ @property
111
+ def validation_error(self) -> Optional[str]:
112
+ """
113
+ Subclasses can override this to indicate that validation has failed after a catalog load.
114
+
115
+ If an Expr (or any of its transitive components) is invalid, then it cannot be evaluated, but its metadata
116
+ will still be preserved in the catalog (so that the user can take appropriate corrective action).
117
+ """
118
+ for c in self.components:
119
+ error = c.validation_error
120
+ if error is not None:
121
+ return error
122
+ return None
123
+
124
+ @property
125
+ def is_valid(self) -> bool:
126
+ return self.validation_error is None
127
+
113
128
  def equals(self, other: Expr) -> bool:
114
129
  """
115
130
  Subclass-specific comparison. Implemented as a function because __eq__() is needed to construct Comparisons.
116
131
  """
117
- if type(self) != type(other):
132
+ if type(self) is not type(other):
118
133
  return False
119
134
  if len(self.components) != len(other.components):
120
135
  return False
@@ -156,10 +171,7 @@ class Expr(abc.ABC):
156
171
  def list_equals(cls, a: list[Expr], b: list[Expr]) -> bool:
157
172
  if len(a) != len(b):
158
173
  return False
159
- for i in range(len(a)):
160
- if not a[i].equals(b[i]):
161
- return False
162
- return True
174
+ return all(a[i].equals(b[i]) for i in range(len(a)))
163
175
 
164
176
  def copy(self) -> Expr:
165
177
  """
@@ -201,9 +213,9 @@ class Expr(abc.ABC):
201
213
  return new.copy()
202
214
  for i in range(len(self.components)):
203
215
  self.components[i] = self.components[i].substitute(spec)
204
- self = self.maybe_literal()
205
- self.id = self._create_id()
206
- return self
216
+ result = self.maybe_literal()
217
+ result.id = result._create_id()
218
+ return result
207
219
 
208
220
  @classmethod
209
221
  def list_substitute(cls, expr_list: list[Expr], spec: dict[Expr, Expr]) -> None:
@@ -238,14 +250,11 @@ class Expr(abc.ABC):
238
250
  from .column_ref import ColumnRef
239
251
 
240
252
  col_refs = self.subexprs(ColumnRef)
241
- for col_ref in col_refs:
242
- if not any(tbl.has_column(col_ref.col) for tbl in tbls):
243
- return False
244
- return True
253
+ return all(any(tbl.has_column(col_ref.col) for tbl in tbls) for col_ref in col_refs)
245
254
 
246
255
  def retarget(self, tbl: catalog.TableVersionPath) -> Self:
247
256
  """Retarget ColumnRefs in this expr to the specific TableVersions in tbl."""
248
- tbl_versions = {tbl_version.id: tbl_version for tbl_version in tbl.get_tbl_versions()}
257
+ tbl_versions = {tbl_version.id: tbl_version.get() for tbl_version in tbl.get_tbl_versions()}
249
258
  return self._retarget(tbl_versions)
250
259
 
251
260
  def _retarget(self, tbl_versions: dict[UUID, catalog.TableVersion]) -> Self:
@@ -355,7 +364,7 @@ class Expr(abc.ABC):
355
364
 
356
365
  @classmethod
357
366
  def all_tbl_ids(cls, exprs_: Iterable[Expr]) -> set[UUID]:
358
- return set(tbl_id for e in exprs_ for tbl_id in e.tbl_ids())
367
+ return {tbl_id for e in exprs_ for tbl_id in e.tbl_ids()}
359
368
 
360
369
  @classmethod
361
370
  def get_refd_columns(cls, expr_dict: dict[str, Any]) -> list[catalog.Column]:
@@ -474,7 +483,7 @@ class Expr(abc.ABC):
474
483
  return {'_classname': self.__class__.__name__, **self._as_dict()}
475
484
 
476
485
  @classmethod
477
- def as_dict_list(self, expr_list: list[Expr]) -> list[dict]:
486
+ def as_dict_list(cls, expr_list: list[Expr]) -> list[dict]:
478
487
  return [e.as_dict() for e in expr_list]
479
488
 
480
489
  def _as_dict(self) -> dict:
@@ -505,7 +514,7 @@ class Expr(abc.ABC):
505
514
 
506
515
  @classmethod
507
516
  def _from_dict(cls, d: dict, components: list[Expr]) -> Self:
508
- assert False, 'not implemented'
517
+ raise AssertionError('not implemented')
509
518
 
510
519
  def isin(self, value_set: Any) -> 'exprs.InPredicate':
511
520
  from .in_predicate import InPredicate
@@ -777,13 +786,13 @@ class Expr(abc.ABC):
777
786
  first_param = next(params_iter) if len(params) >= 1 else None
778
787
  second_param = next(params_iter) if len(params) >= 2 else None
779
788
  # Check that fn has at least one positional parameter
780
- if len(params) == 0 or first_param.kind in (inspect.Parameter.KEYWORD_ONLY, inspect.Parameter.VAR_KEYWORD):
789
+ if len(params) == 0 or first_param.kind in {inspect.Parameter.KEYWORD_ONLY, inspect.Parameter.VAR_KEYWORD}:
781
790
  raise excs.Error(f'Function `{fn.__name__}` has no positional parameters.')
782
791
  # Check that fn has at most one required parameter, i.e., its second parameter
783
792
  # has no default and is not a varargs
784
793
  if (
785
794
  len(params) >= 2
786
- and second_param.kind not in (inspect.Parameter.VAR_POSITIONAL, inspect.Parameter.VAR_KEYWORD)
795
+ and second_param.kind not in {inspect.Parameter.VAR_POSITIONAL, inspect.Parameter.VAR_KEYWORD}
787
796
  and second_param.default is inspect.Parameter.empty
788
797
  ):
789
798
  raise excs.Error(f'Function `{fn.__name__}` has multiple required parameters.')
@@ -1,9 +1,9 @@
1
1
  from typing import Generic, Iterable, Iterator, Optional, TypeVar
2
2
 
3
- T = TypeVar('T')
4
-
5
3
  from .expr import Expr
6
4
 
5
+ T = TypeVar('T')
6
+
7
7
 
8
8
  class ExprDict(Generic[T]):
9
9
  """
@@ -47,7 +47,7 @@ class ExprDict(Generic[T]):
47
47
  self._data.clear()
48
48
 
49
49
  def keys(self) -> Iterator[Expr]:
50
- return self.__iter__()
50
+ return iter(self)
51
51
 
52
52
  def values(self) -> Iterator[T]:
53
53
  return (value for _, value in self._data.values())
@@ -46,7 +46,7 @@ class ExprSet(Generic[T]):
46
46
 
47
47
  def __getitem__(self, index: object) -> Optional[T]:
48
48
  """Indexed lookup by slot_idx or Expr.id."""
49
- assert isinstance(index, int) or isinstance(index, Expr)
49
+ assert isinstance(index, (int, Expr))
50
50
  if isinstance(index, int):
51
51
  # return expr with matching slot_idx
52
52
  return self.exprs_by_idx.get(index)