pixeltable 0.4.0rc3__py3-none-any.whl → 0.4.20__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (202) hide show
  1. pixeltable/__init__.py +23 -5
  2. pixeltable/_version.py +1 -0
  3. pixeltable/catalog/__init__.py +5 -3
  4. pixeltable/catalog/catalog.py +1318 -404
  5. pixeltable/catalog/column.py +186 -115
  6. pixeltable/catalog/dir.py +1 -2
  7. pixeltable/catalog/globals.py +11 -43
  8. pixeltable/catalog/insertable_table.py +167 -79
  9. pixeltable/catalog/path.py +61 -23
  10. pixeltable/catalog/schema_object.py +9 -10
  11. pixeltable/catalog/table.py +626 -308
  12. pixeltable/catalog/table_metadata.py +101 -0
  13. pixeltable/catalog/table_version.py +713 -569
  14. pixeltable/catalog/table_version_handle.py +37 -6
  15. pixeltable/catalog/table_version_path.py +42 -29
  16. pixeltable/catalog/tbl_ops.py +50 -0
  17. pixeltable/catalog/update_status.py +191 -0
  18. pixeltable/catalog/view.py +108 -94
  19. pixeltable/config.py +128 -22
  20. pixeltable/dataframe.py +188 -100
  21. pixeltable/env.py +407 -136
  22. pixeltable/exceptions.py +6 -0
  23. pixeltable/exec/__init__.py +3 -0
  24. pixeltable/exec/aggregation_node.py +7 -8
  25. pixeltable/exec/cache_prefetch_node.py +83 -110
  26. pixeltable/exec/cell_materialization_node.py +231 -0
  27. pixeltable/exec/cell_reconstruction_node.py +135 -0
  28. pixeltable/exec/component_iteration_node.py +4 -3
  29. pixeltable/exec/data_row_batch.py +8 -65
  30. pixeltable/exec/exec_context.py +16 -4
  31. pixeltable/exec/exec_node.py +13 -36
  32. pixeltable/exec/expr_eval/evaluators.py +7 -6
  33. pixeltable/exec/expr_eval/expr_eval_node.py +27 -12
  34. pixeltable/exec/expr_eval/globals.py +8 -5
  35. pixeltable/exec/expr_eval/row_buffer.py +1 -2
  36. pixeltable/exec/expr_eval/schedulers.py +190 -30
  37. pixeltable/exec/globals.py +32 -0
  38. pixeltable/exec/in_memory_data_node.py +18 -18
  39. pixeltable/exec/object_store_save_node.py +293 -0
  40. pixeltable/exec/row_update_node.py +16 -9
  41. pixeltable/exec/sql_node.py +206 -101
  42. pixeltable/exprs/__init__.py +1 -1
  43. pixeltable/exprs/arithmetic_expr.py +27 -22
  44. pixeltable/exprs/array_slice.py +3 -3
  45. pixeltable/exprs/column_property_ref.py +34 -30
  46. pixeltable/exprs/column_ref.py +92 -96
  47. pixeltable/exprs/comparison.py +5 -5
  48. pixeltable/exprs/compound_predicate.py +5 -4
  49. pixeltable/exprs/data_row.py +152 -55
  50. pixeltable/exprs/expr.py +62 -43
  51. pixeltable/exprs/expr_dict.py +3 -3
  52. pixeltable/exprs/expr_set.py +17 -10
  53. pixeltable/exprs/function_call.py +75 -37
  54. pixeltable/exprs/globals.py +1 -2
  55. pixeltable/exprs/in_predicate.py +4 -4
  56. pixeltable/exprs/inline_expr.py +10 -27
  57. pixeltable/exprs/is_null.py +1 -3
  58. pixeltable/exprs/json_mapper.py +8 -8
  59. pixeltable/exprs/json_path.py +56 -22
  60. pixeltable/exprs/literal.py +5 -5
  61. pixeltable/exprs/method_ref.py +2 -2
  62. pixeltable/exprs/object_ref.py +2 -2
  63. pixeltable/exprs/row_builder.py +127 -53
  64. pixeltable/exprs/rowid_ref.py +8 -12
  65. pixeltable/exprs/similarity_expr.py +50 -25
  66. pixeltable/exprs/sql_element_cache.py +4 -4
  67. pixeltable/exprs/string_op.py +5 -5
  68. pixeltable/exprs/type_cast.py +3 -5
  69. pixeltable/func/__init__.py +1 -0
  70. pixeltable/func/aggregate_function.py +8 -8
  71. pixeltable/func/callable_function.py +9 -9
  72. pixeltable/func/expr_template_function.py +10 -10
  73. pixeltable/func/function.py +18 -20
  74. pixeltable/func/function_registry.py +6 -7
  75. pixeltable/func/globals.py +2 -3
  76. pixeltable/func/mcp.py +74 -0
  77. pixeltable/func/query_template_function.py +20 -18
  78. pixeltable/func/signature.py +43 -16
  79. pixeltable/func/tools.py +23 -13
  80. pixeltable/func/udf.py +18 -20
  81. pixeltable/functions/__init__.py +6 -0
  82. pixeltable/functions/anthropic.py +93 -33
  83. pixeltable/functions/audio.py +114 -10
  84. pixeltable/functions/bedrock.py +13 -6
  85. pixeltable/functions/date.py +1 -1
  86. pixeltable/functions/deepseek.py +20 -9
  87. pixeltable/functions/fireworks.py +2 -2
  88. pixeltable/functions/gemini.py +28 -11
  89. pixeltable/functions/globals.py +13 -13
  90. pixeltable/functions/groq.py +108 -0
  91. pixeltable/functions/huggingface.py +1046 -23
  92. pixeltable/functions/image.py +9 -18
  93. pixeltable/functions/llama_cpp.py +23 -8
  94. pixeltable/functions/math.py +3 -4
  95. pixeltable/functions/mistralai.py +4 -15
  96. pixeltable/functions/ollama.py +16 -9
  97. pixeltable/functions/openai.py +104 -82
  98. pixeltable/functions/openrouter.py +143 -0
  99. pixeltable/functions/replicate.py +2 -2
  100. pixeltable/functions/reve.py +250 -0
  101. pixeltable/functions/string.py +21 -28
  102. pixeltable/functions/timestamp.py +13 -14
  103. pixeltable/functions/together.py +4 -6
  104. pixeltable/functions/twelvelabs.py +92 -0
  105. pixeltable/functions/util.py +6 -1
  106. pixeltable/functions/video.py +1388 -106
  107. pixeltable/functions/vision.py +7 -7
  108. pixeltable/functions/whisper.py +15 -7
  109. pixeltable/functions/whisperx.py +179 -0
  110. pixeltable/{ext/functions → functions}/yolox.py +2 -4
  111. pixeltable/globals.py +332 -105
  112. pixeltable/index/base.py +13 -22
  113. pixeltable/index/btree.py +23 -22
  114. pixeltable/index/embedding_index.py +32 -44
  115. pixeltable/io/__init__.py +4 -2
  116. pixeltable/io/datarows.py +7 -6
  117. pixeltable/io/external_store.py +49 -77
  118. pixeltable/io/fiftyone.py +11 -11
  119. pixeltable/io/globals.py +29 -28
  120. pixeltable/io/hf_datasets.py +17 -9
  121. pixeltable/io/label_studio.py +70 -66
  122. pixeltable/io/lancedb.py +3 -0
  123. pixeltable/io/pandas.py +12 -11
  124. pixeltable/io/parquet.py +13 -93
  125. pixeltable/io/table_data_conduit.py +71 -47
  126. pixeltable/io/utils.py +3 -3
  127. pixeltable/iterators/__init__.py +2 -1
  128. pixeltable/iterators/audio.py +21 -11
  129. pixeltable/iterators/document.py +116 -55
  130. pixeltable/iterators/image.py +5 -2
  131. pixeltable/iterators/video.py +293 -13
  132. pixeltable/metadata/__init__.py +4 -2
  133. pixeltable/metadata/converters/convert_18.py +2 -2
  134. pixeltable/metadata/converters/convert_19.py +2 -2
  135. pixeltable/metadata/converters/convert_20.py +2 -2
  136. pixeltable/metadata/converters/convert_21.py +2 -2
  137. pixeltable/metadata/converters/convert_22.py +2 -2
  138. pixeltable/metadata/converters/convert_24.py +2 -2
  139. pixeltable/metadata/converters/convert_25.py +2 -2
  140. pixeltable/metadata/converters/convert_26.py +2 -2
  141. pixeltable/metadata/converters/convert_29.py +4 -4
  142. pixeltable/metadata/converters/convert_34.py +2 -2
  143. pixeltable/metadata/converters/convert_36.py +2 -2
  144. pixeltable/metadata/converters/convert_37.py +15 -0
  145. pixeltable/metadata/converters/convert_38.py +39 -0
  146. pixeltable/metadata/converters/convert_39.py +124 -0
  147. pixeltable/metadata/converters/convert_40.py +73 -0
  148. pixeltable/metadata/converters/util.py +13 -12
  149. pixeltable/metadata/notes.py +4 -0
  150. pixeltable/metadata/schema.py +79 -42
  151. pixeltable/metadata/utils.py +74 -0
  152. pixeltable/mypy/__init__.py +3 -0
  153. pixeltable/mypy/mypy_plugin.py +123 -0
  154. pixeltable/plan.py +274 -223
  155. pixeltable/share/__init__.py +1 -1
  156. pixeltable/share/packager.py +259 -129
  157. pixeltable/share/protocol/__init__.py +34 -0
  158. pixeltable/share/protocol/common.py +170 -0
  159. pixeltable/share/protocol/operation_types.py +33 -0
  160. pixeltable/share/protocol/replica.py +109 -0
  161. pixeltable/share/publish.py +213 -57
  162. pixeltable/store.py +238 -175
  163. pixeltable/type_system.py +104 -63
  164. pixeltable/utils/__init__.py +2 -3
  165. pixeltable/utils/arrow.py +108 -13
  166. pixeltable/utils/av.py +298 -0
  167. pixeltable/utils/azure_store.py +305 -0
  168. pixeltable/utils/code.py +3 -3
  169. pixeltable/utils/console_output.py +4 -1
  170. pixeltable/utils/coroutine.py +6 -23
  171. pixeltable/utils/dbms.py +31 -5
  172. pixeltable/utils/description_helper.py +4 -5
  173. pixeltable/utils/documents.py +5 -6
  174. pixeltable/utils/exception_handler.py +7 -30
  175. pixeltable/utils/filecache.py +6 -6
  176. pixeltable/utils/formatter.py +4 -6
  177. pixeltable/utils/gcs_store.py +283 -0
  178. pixeltable/utils/http_server.py +2 -3
  179. pixeltable/utils/iceberg.py +1 -2
  180. pixeltable/utils/image.py +17 -0
  181. pixeltable/utils/lancedb.py +88 -0
  182. pixeltable/utils/local_store.py +316 -0
  183. pixeltable/utils/misc.py +5 -0
  184. pixeltable/utils/object_stores.py +528 -0
  185. pixeltable/utils/pydantic.py +60 -0
  186. pixeltable/utils/pytorch.py +5 -6
  187. pixeltable/utils/s3_store.py +392 -0
  188. pixeltable-0.4.20.dist-info/METADATA +587 -0
  189. pixeltable-0.4.20.dist-info/RECORD +218 -0
  190. {pixeltable-0.4.0rc3.dist-info → pixeltable-0.4.20.dist-info}/WHEEL +1 -1
  191. pixeltable-0.4.20.dist-info/entry_points.txt +2 -0
  192. pixeltable/__version__.py +0 -3
  193. pixeltable/ext/__init__.py +0 -17
  194. pixeltable/ext/functions/__init__.py +0 -11
  195. pixeltable/ext/functions/whisperx.py +0 -77
  196. pixeltable/utils/media_store.py +0 -77
  197. pixeltable/utils/s3.py +0 -17
  198. pixeltable/utils/sample.py +0 -25
  199. pixeltable-0.4.0rc3.dist-info/METADATA +0 -435
  200. pixeltable-0.4.0rc3.dist-info/RECORD +0 -189
  201. pixeltable-0.4.0rc3.dist-info/entry_points.txt +0 -3
  202. {pixeltable-0.4.0rc3.dist-info → pixeltable-0.4.20.dist-info/licenses}/LICENSE +0 -0
@@ -1,10 +1,10 @@
1
1
  from __future__ import annotations
2
2
 
3
- from typing import Any, Optional
3
+ from typing import Any
4
4
 
5
5
  import sqlalchemy as sql
6
6
 
7
- from pixeltable import exceptions as excs, type_system as ts
7
+ from pixeltable import env, exceptions as excs, type_system as ts
8
8
 
9
9
  from .data_row import DataRow
10
10
  from .expr import Expr
@@ -58,29 +58,36 @@ class ArithmeticExpr(Expr):
58
58
  def _id_attrs(self) -> list[tuple[str, Any]]:
59
59
  return [*super()._id_attrs(), ('operator', self.operator.value)]
60
60
 
61
- def sql_expr(self, sql_elements: SqlElementCache) -> Optional[sql.ColumnElement]:
61
+ def sql_expr(self, sql_elements: SqlElementCache) -> sql.ColumnElement | None:
62
62
  assert self.col_type.is_int_type() or self.col_type.is_float_type() or self.col_type.is_json_type()
63
63
  left = sql_elements.get(self._op1)
64
64
  right = sql_elements.get(self._op2)
65
65
  if left is None or right is None:
66
66
  return None
67
- if self.operator == ArithmeticOperator.ADD:
68
- return left + right
69
- if self.operator == ArithmeticOperator.SUB:
70
- return left - right
71
- if self.operator == ArithmeticOperator.MUL:
72
- return left * right
67
+ if self.operator in (ArithmeticOperator.ADD, ArithmeticOperator.SUB, ArithmeticOperator.MUL):
68
+ if env.Env.get().is_using_cockroachdb and self._op1.col_type != self._op2.col_type:
69
+ if self._op1.col_type != self.col_type:
70
+ left = sql.cast(left, self.col_type.to_sa_type())
71
+ if self._op2.col_type != self.col_type:
72
+ right = sql.cast(right, self.col_type.to_sa_type())
73
+ if self.operator == ArithmeticOperator.ADD:
74
+ return left + right
75
+ if self.operator == ArithmeticOperator.SUB:
76
+ return left - right
77
+ if self.operator == ArithmeticOperator.MUL:
78
+ return left * right
73
79
  if self.operator == ArithmeticOperator.DIV:
74
80
  assert self.col_type.is_float_type()
75
- # Avoid DivisionByZero: if right is 0, make this a NULL
81
+ # Avoid division by zero errors by converting any zero divisor to NULL.
76
82
  # TODO: Should we cast the NULLs to NaNs when they are retrieved back into Python?
77
- nullif = sql.sql.func.nullif(right, 0)
78
- # We have to cast to a `float`, or else we'll get a `Decimal`
79
- return sql.sql.expression.cast(left / nullif, self.col_type.to_sa_type())
83
+ # These casts cause the computation to take place in float units, rather than DECIMAL.
84
+ nullif = sql.cast(sql.func.nullif(right, 0), self.col_type.to_sa_type())
85
+ return sql.cast(left, self.col_type.to_sa_type()) / nullif
80
86
  if self.operator == ArithmeticOperator.MOD:
81
87
  if self.col_type.is_int_type():
82
- nullif = sql.sql.func.nullif(right, 0)
83
- return left % nullif
88
+ # Avoid division by zero errors by converting any zero divisor to NULL.
89
+ nullif1 = sql.cast(sql.func.nullif(right, 0), self.col_type.to_sa_type())
90
+ return left % nullif1
84
91
  if self.col_type.is_float_type():
85
92
  # Postgres does not support modulus for floats
86
93
  return None
@@ -90,11 +97,9 @@ class ArithmeticExpr(Expr):
90
97
  # We need the behavior to be consistent, so that expressions will evaluate the same way
91
98
  # whether or not their operands can be translated to SQL. These SQL clauses should
92
99
  # mimic the behavior of Python's // operator.
93
- nullif = sql.sql.func.nullif(right, 0)
94
- if self.col_type.is_int_type():
95
- return sql.sql.expression.cast(sql.func.floor(left / nullif), self.col_type.to_sa_type())
96
- if self.col_type.is_float_type():
97
- return sql.sql.expression.cast(sql.func.floor(left / nullif), self.col_type.to_sa_type())
100
+ # Avoid division by zero errors by converting any zero divisor to NULL.
101
+ nullif = sql.cast(sql.func.nullif(right, 0), self.col_type.to_sa_type())
102
+ return sql.func.floor(sql.cast(left, self.col_type.to_sa_type()) / nullif)
98
103
  raise AssertionError()
99
104
 
100
105
  def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
@@ -113,7 +118,7 @@ class ArithmeticExpr(Expr):
113
118
 
114
119
  data_row[self.slot_idx] = self.eval_nullable(op1_val, op2_val)
115
120
 
116
- def eval_nullable(self, op1_val: Optional[float], op2_val: Optional[float]) -> Optional[float]:
121
+ def eval_nullable(self, op1_val: float | None, op2_val: float | None) -> float | None:
117
122
  """
118
123
  Return the result of evaluating the expression on two nullable int/float operands,
119
124
  None is interpreted as SQL NULL
@@ -139,7 +144,7 @@ class ArithmeticExpr(Expr):
139
144
  elif self.operator == ArithmeticOperator.FLOORDIV:
140
145
  return op1_val // op2_val
141
146
 
142
- def as_literal(self) -> Optional[Literal]:
147
+ def as_literal(self) -> Literal | None:
143
148
  op1_lit = self._op1.as_literal()
144
149
  if op1_lit is None:
145
150
  return None
@@ -1,6 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
- from typing import Any, Optional, Union
3
+ from typing import Any
4
4
 
5
5
  import sqlalchemy as sql
6
6
 
@@ -16,7 +16,7 @@ class ArraySlice(Expr):
16
16
  Slice operation on an array, eg, t.array_col[:, 1:2].
17
17
  """
18
18
 
19
- def __init__(self, arr: Expr, index: tuple[Union[int, slice], ...]):
19
+ def __init__(self, arr: Expr, index: tuple[int | slice, ...]):
20
20
  assert arr.col_type.is_array_type()
21
21
  # determine result type
22
22
  super().__init__(arr.col_type)
@@ -43,7 +43,7 @@ class ArraySlice(Expr):
43
43
  def _id_attrs(self) -> list[tuple[str, Any]]:
44
44
  return [*super()._id_attrs(), ('index', self.index)]
45
45
 
46
- def sql_expr(self, _: SqlElementCache) -> Optional[sql.ColumnElement]:
46
+ def sql_expr(self, _: SqlElementCache) -> sql.ColumnElement | None:
47
47
  return None
48
48
 
49
49
  def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
@@ -1,7 +1,7 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import enum
4
- from typing import Any, Optional
4
+ from typing import Any
5
5
 
6
6
  import sqlalchemy as sql
7
7
 
@@ -26,6 +26,7 @@ class ColumnPropertyRef(Expr):
26
26
  ERRORMSG = 1
27
27
  FILEURL = 2
28
28
  LOCALPATH = 3
29
+ CELLMD = 4 # JSON metadata for the cell, e.g. errortype, errormsg for media columns
29
30
 
30
31
  def __init__(self, col_ref: ColumnRef, prop: Property):
31
32
  super().__init__(ts.StringType(nullable=True))
@@ -33,7 +34,7 @@ class ColumnPropertyRef(Expr):
33
34
  self.prop = prop
34
35
  self.id = self._create_id()
35
36
 
36
- def default_column_name(self) -> Optional[str]:
37
+ def default_column_name(self) -> str | None:
37
38
  return str(self).replace('.', '_')
38
39
 
39
40
  def _equals(self, other: ColumnPropertyRef) -> bool:
@@ -43,66 +44,69 @@ class ColumnPropertyRef(Expr):
43
44
  return [*super()._id_attrs(), ('prop', self.prop.value)]
44
45
 
45
46
  @property
46
- def _col_ref(self) -> ColumnRef:
47
+ def col_ref(self) -> ColumnRef:
47
48
  col_ref = self.components[0]
48
49
  assert isinstance(col_ref, ColumnRef)
49
50
  return col_ref
50
51
 
51
52
  def __repr__(self) -> str:
52
- return f'{self._col_ref}.{self.prop.name.lower()}'
53
+ return f'{self.col_ref}.{self.prop.name.lower()}'
53
54
 
54
- def is_error_prop(self) -> bool:
55
- return self.prop in (self.Property.ERRORTYPE, self.Property.ERRORMSG)
55
+ def is_cellmd_prop(self) -> bool:
56
+ return self.prop in (self.Property.ERRORTYPE, self.Property.ERRORMSG, self.Property.CELLMD)
56
57
 
57
- def sql_expr(self, sql_elements: SqlElementCache) -> Optional[sql.ColumnElement]:
58
- if not self._col_ref.col.is_stored:
58
+ def sql_expr(self, sql_elements: SqlElementCache) -> sql.ColumnElement | None:
59
+ if not self.col_ref.col_handle.get().is_stored:
59
60
  return None
60
-
61
- # we need to reestablish that we have the correct Column instance, there could have been a metadata
62
- # reload since init()
63
- # TODO: add an explicit prepare phase (ie, Expr.prepare()) that gives every subclass instance a chance to
64
- # perform runtime checks and update state
65
- tv = self._col_ref.tbl_version.get()
66
- assert tv.is_validated
67
- col = tv.cols_by_id[self._col_ref.col_id]
68
- # TODO: check for column being dropped
61
+ col = self.col_ref.col_handle.get()
69
62
 
70
63
  # the errortype/-msg properties of a read-validated media column need to be extracted from the DataRow
71
64
  if (
72
65
  col.col_type.is_media_type()
73
66
  and col.media_validation == catalog.MediaValidation.ON_READ
74
- and self.is_error_prop()
67
+ and self.is_cellmd_prop()
75
68
  ):
76
69
  return None
77
70
 
78
71
  if self.prop == self.Property.ERRORTYPE:
79
- assert col.sa_errortype_col is not None
80
- return col.sa_errortype_col
72
+ return col.sa_cellmd_col.op('->>')('errortype')
81
73
  if self.prop == self.Property.ERRORMSG:
82
- assert col.sa_errormsg_col is not None
83
- return col.sa_errormsg_col
74
+ return col.sa_cellmd_col.op('->>')('errormsg')
75
+ if self.prop == self.Property.CELLMD:
76
+ assert col.sa_cellmd_col is not None
77
+ return col.sa_cellmd_col
84
78
  if self.prop == self.Property.FILEURL:
85
79
  # the file url is stored as the column value
86
- return sql_elements.get(self._col_ref)
80
+ return sql_elements.get(self.col_ref)
87
81
  return None
88
82
 
83
+ @classmethod
84
+ def create_cellmd_exc(cls, exc: Exception) -> dict[str, str]:
85
+ """Create a cellmd value from an exception."""
86
+ return {'errortype': type(exc).__name__, 'errormsg': str(exc)}
87
+
89
88
  def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
90
89
  if self.prop == self.Property.FILEURL:
91
- assert data_row.has_val[self._col_ref.slot_idx]
92
- data_row[self.slot_idx] = data_row.file_urls[self._col_ref.slot_idx]
90
+ assert data_row.has_val[self.col_ref.slot_idx]
91
+ data_row[self.slot_idx] = data_row.file_urls[self.col_ref.slot_idx]
93
92
  return
94
93
  elif self.prop == self.Property.LOCALPATH:
95
- assert data_row.has_val[self._col_ref.slot_idx]
96
- data_row[self.slot_idx] = data_row.file_paths[self._col_ref.slot_idx]
94
+ assert data_row.has_val[self.col_ref.slot_idx]
95
+ data_row[self.slot_idx] = data_row.file_paths[self.col_ref.slot_idx]
97
96
  return
98
- elif self.is_error_prop():
99
- exc = data_row.get_exc(self._col_ref.slot_idx)
97
+ elif self.is_cellmd_prop():
98
+ exc = data_row.get_exc(self.col_ref.slot_idx)
100
99
  if exc is None:
101
100
  data_row[self.slot_idx] = None
102
101
  elif self.prop == self.Property.ERRORTYPE:
103
102
  data_row[self.slot_idx] = type(exc).__name__
104
- else:
103
+ elif self.prop == self.Property.ERRORMSG:
105
104
  data_row[self.slot_idx] = str(exc)
105
+ elif self.prop == self.Property.CELLMD:
106
+ data_row[self.slot_idx] = self.create_cellmd_exc(exc)
107
+ else:
108
+ raise AssertionError(f'Unknown property {self.prop}')
109
+ return
106
110
  else:
107
111
  raise AssertionError()
108
112
 
@@ -1,20 +1,24 @@
1
1
  from __future__ import annotations
2
2
 
3
- import copy
4
- from typing import Any, Optional, Sequence
3
+ from typing import TYPE_CHECKING, Any, Sequence, cast
5
4
  from uuid import UUID
6
5
 
7
6
  import sqlalchemy as sql
8
7
 
9
- import pixeltable as pxt
10
- from pixeltable import catalog, exceptions as excs, iterators as iters
8
+ import pixeltable.catalog as catalog
9
+ import pixeltable.exceptions as excs
10
+ import pixeltable.iterators as iters
11
11
 
12
12
  from ..utils.description_helper import DescriptionHelper
13
+ from ..utils.filecache import FileCache
13
14
  from .data_row import DataRow
14
15
  from .expr import Expr
15
16
  from .row_builder import RowBuilder
16
17
  from .sql_element_cache import SqlElementCache
17
18
 
19
+ if TYPE_CHECKING:
20
+ from pixeltable.dataframe import DataFrame, DataFrameResultSet
21
+
18
22
 
19
23
  class ColumnRef(Expr):
20
24
  """A reference to a table column
@@ -41,42 +45,36 @@ class ColumnRef(Expr):
41
45
  insert them into the EvalCtxs as needed
42
46
  """
43
47
 
44
- col: catalog.Column
45
- reference_tbl: Optional[catalog.TableVersionPath]
48
+ col: catalog.Column # TODO: merge with col_handle
49
+ col_handle: catalog.ColumnHandle
50
+ reference_tbl: catalog.TableVersionPath | None
46
51
  is_unstored_iter_col: bool
47
- iter_arg_ctx: Optional[RowBuilder.EvalCtx]
48
- base_rowid_len: int
49
- base_rowid: Sequence[Optional[Any]]
50
- iterator: Optional[iters.ComponentIterator]
51
- pos_idx: Optional[int]
52
- id: int
53
52
  perform_validation: bool # if True, performs media validation
53
+ iter_arg_ctx: RowBuilder.EvalCtx | None
54
+ base_rowid_len: int # number of rowid columns in the base table
54
55
 
55
- # needed by sql_expr() to re-resolve Column instance after a metadata reload
56
- tbl_version: catalog.TableVersionHandle
57
- col_id: int
56
+ # execution state
57
+ base_rowid: Sequence[Any | None]
58
+ iterator: iters.ComponentIterator | None
59
+ pos_idx: int
58
60
 
59
61
  def __init__(
60
62
  self,
61
63
  col: catalog.Column,
62
- reference_tbl: Optional[catalog.TableVersionPath] = None,
63
- perform_validation: Optional[bool] = None,
64
+ reference_tbl: catalog.TableVersionPath | None = None,
65
+ perform_validation: bool | None = None,
64
66
  ):
65
67
  super().__init__(col.col_type)
66
- assert col.tbl is not None
67
68
  self.col = col
68
69
  self.reference_tbl = reference_tbl
69
- self.tbl_version = catalog.TableVersionHandle(col.tbl.id, col.tbl.effective_version)
70
- self.col_id = col.id
70
+ self.col_handle = col.handle
71
71
 
72
- self.is_unstored_iter_col = col.tbl.is_component_view and col.tbl.is_iterator_column(col) and not col.is_stored
72
+ self.is_unstored_iter_col = col.is_iterator_col and not col.is_stored
73
73
  self.iter_arg_ctx = None
74
- # number of rowid columns in the base table
75
- self.base_rowid_len = col.tbl.base.get().num_rowid_columns() if self.is_unstored_iter_col else 0
76
- self.base_rowid = [None] * self.base_rowid_len
74
+ self.base_rowid_len = 0
75
+ self.base_rowid = []
77
76
  self.iterator = None
78
- # index of the position column in the view's primary key; don't try to reference tbl.store_tbl here
79
- self.pos_idx = col.tbl.num_rowid_columns() - 1 if self.is_unstored_iter_col else None
77
+ self.pos_idx = 0
80
78
 
81
79
  self.perform_validation = False
82
80
  if col.col_type.is_media_type():
@@ -102,14 +100,14 @@ class ColumnRef(Expr):
102
100
  def _id_attrs(self) -> list[tuple[str, Any]]:
103
101
  return [
104
102
  *super()._id_attrs(),
105
- ('tbl_id', self.col.tbl.id),
103
+ ('tbl_id', self.col.tbl_handle.id),
106
104
  ('col_id', self.col.id),
107
105
  ('perform_validation', self.perform_validation),
108
106
  ]
109
107
 
110
108
  # override
111
109
  def _retarget(self, tbl_versions: dict[UUID, catalog.TableVersion]) -> ColumnRef:
112
- target = tbl_versions[self.col.tbl.id]
110
+ target = tbl_versions[self.col.tbl_handle.id]
113
111
  assert self.col.id in target.cols_by_id
114
112
  col = target.cols_by_id[self.col.id]
115
113
  return ColumnRef(col, self.reference_tbl)
@@ -118,12 +116,16 @@ class ColumnRef(Expr):
118
116
  from .column_property_ref import ColumnPropertyRef
119
117
 
120
118
  # resolve column properties
119
+ if name == ColumnPropertyRef.Property.CELLMD.name.lower():
120
+ # This is not user accessible, but used internally to store cell metadata
121
+ return super().__getattr__(name)
122
+
121
123
  if (
122
124
  name == ColumnPropertyRef.Property.ERRORTYPE.name.lower()
123
125
  or name == ColumnPropertyRef.Property.ERRORMSG.name.lower()
124
126
  ):
125
- property_is_present = self.col.is_stored and (self.col.is_computed or self.col_type.is_media_type())
126
- if not property_is_present:
127
+ is_valid = (self.col.is_computed or self.col.col_type.is_media_type()) and self.col.is_stored
128
+ if not is_valid:
127
129
  raise excs.Error(f'{name} only valid for a stored computed or media column: {self}')
128
130
  return ColumnPropertyRef(self, ColumnPropertyRef.Property[name.upper()])
129
131
  if (
@@ -143,76 +145,66 @@ class ColumnRef(Expr):
143
145
 
144
146
  return super().__getattr__(name)
145
147
 
146
- def find_embedding_index(
147
- self, idx_name: Optional[str], method_name: str
148
- ) -> dict[str, catalog.TableVersion.IndexInfo]:
149
- """Return IndexInfo for a column, with an optional given name"""
150
- from pixeltable import index
151
-
152
- # determine index to use
153
- idx_info_dict = self.col.get_idx_info(self.reference_tbl)
154
-
155
- embedding_idx_info = {
156
- info: value for info, value in idx_info_dict.items() if isinstance(value.idx, index.EmbeddingIndex)
157
- }
158
- if len(embedding_idx_info) == 0:
159
- raise excs.Error(f'No indices found for {method_name!r} on column {self.col.name!r}')
160
- if idx_name is not None and idx_name not in embedding_idx_info:
161
- raise excs.Error(f'Index {idx_name!r} not found for {method_name!r} on column {self.col.name!r}')
162
- if len(embedding_idx_info) > 1:
163
- if idx_name is None:
164
- raise excs.Error(
165
- f'Column {self.col.name!r} has multiple indices; use the index name to disambiguate: '
166
- f'`{method_name}(..., idx=<index_name>)`'
167
- )
168
- idx_info = {idx_name: embedding_idx_info[idx_name]}
169
- else:
170
- idx_info = embedding_idx_info
171
- return idx_info
172
-
173
- def similarity(self, item: Any, *, idx: Optional[str] = None) -> Expr:
148
+ def recompute(self, *, cascade: bool = True, errors_only: bool = False) -> catalog.UpdateStatus:
149
+ cat = catalog.Catalog.get()
150
+ # lock_mutable_tree=True: we need to be able to see whether any transitive view has column dependents
151
+ with cat.begin_xact(tbl=self.reference_tbl, for_write=True, lock_mutable_tree=True):
152
+ tbl_version = self.col_handle.tbl_version.get()
153
+ if tbl_version.id != self.reference_tbl.tbl_id:
154
+ raise excs.Error('Cannot recompute column of a base.')
155
+ if tbl_version.is_snapshot:
156
+ raise excs.Error('Cannot recompute column of a snapshot.')
157
+ col_name = self.col_handle.get().name
158
+ status = tbl_version.recompute_columns([col_name], errors_only=errors_only, cascade=cascade)
159
+ FileCache.get().emit_eviction_warnings()
160
+ return status
161
+
162
+ def similarity(self, item: Any, *, idx: str | None = None) -> Expr:
174
163
  from .similarity_expr import SimilarityExpr
175
164
 
176
165
  return SimilarityExpr(self, item, idx_name=idx)
177
166
 
178
- def embedding(self, *, idx: Optional[str] = None) -> ColumnRef:
179
- idx_info = self.find_embedding_index(idx, 'embedding')
180
- assert len(idx_info) == 1
181
- col = copy.copy(next(iter(idx_info.values())).val_col)
182
- col.name = f'{self.col.name}_embedding_{idx if idx is not None else ""}'
183
- # col.create_sa_cols()
184
- return ColumnRef(col)
167
+ def embedding(self, *, idx: str | None = None) -> ColumnRef:
168
+ from pixeltable.index import EmbeddingIndex
185
169
 
186
- def default_column_name(self) -> Optional[str]:
170
+ idx_info = self.tbl.get().get_idx(self.col, idx, EmbeddingIndex)
171
+ return ColumnRef(idx_info.val_col)
172
+
173
+ @property
174
+ def tbl(self) -> catalog.TableVersionHandle:
175
+ return self.reference_tbl.tbl_version if self.reference_tbl is not None else self.col.tbl_handle
176
+
177
+ def default_column_name(self) -> str | None:
187
178
  return self.col.name if self.col is not None else None
188
179
 
189
180
  def _equals(self, other: ColumnRef) -> bool:
190
181
  return self.col == other.col and self.perform_validation == other.perform_validation
191
182
 
192
- def _df(self) -> 'pxt.dataframe.DataFrame':
193
- from pixeltable import plan
183
+ def _df(self) -> 'DataFrame':
184
+ import pixeltable.plan as plan
185
+ from pixeltable.dataframe import DataFrame
194
186
 
195
187
  if self.reference_tbl is None:
196
188
  # No reference table; use the current version of the table to which the column belongs
197
- tbl = catalog.Catalog.get().get_table_by_id(self.col.tbl.id)
189
+ tbl = catalog.Catalog.get().get_table_by_id(self.col.tbl_handle.id)
198
190
  return tbl.select(self)
199
191
  else:
200
192
  # Explicit reference table; construct a DataFrame directly from it
201
- return pxt.DataFrame(plan.FromClause([self.reference_tbl])).select(self)
193
+ return DataFrame(plan.FromClause([self.reference_tbl])).select(self)
202
194
 
203
- def show(self, *args: Any, **kwargs: Any) -> 'pxt.dataframe.DataFrameResultSet':
195
+ def show(self, *args: Any, **kwargs: Any) -> 'DataFrameResultSet':
204
196
  return self._df().show(*args, **kwargs)
205
197
 
206
- def head(self, *args: Any, **kwargs: Any) -> 'pxt.dataframe.DataFrameResultSet':
198
+ def head(self, *args: Any, **kwargs: Any) -> 'DataFrameResultSet':
207
199
  return self._df().head(*args, **kwargs)
208
200
 
209
- def tail(self, *args: Any, **kwargs: Any) -> 'pxt.dataframe.DataFrameResultSet':
201
+ def tail(self, *args: Any, **kwargs: Any) -> 'DataFrameResultSet':
210
202
  return self._df().tail(*args, **kwargs)
211
203
 
212
204
  def count(self) -> int:
213
205
  return self._df().count()
214
206
 
215
- def distinct(self) -> 'pxt.dataframe.DataFrame':
207
+ def distinct(self) -> 'DataFrame':
216
208
  """Return distinct values in this column."""
217
209
  return self._df().distinct()
218
210
 
@@ -229,7 +221,8 @@ class ColumnRef(Expr):
229
221
  return self._descriptors().to_html()
230
222
 
231
223
  def _descriptors(self) -> DescriptionHelper:
232
- tbl = catalog.Catalog.get().get_table_by_id(self.col.tbl.id)
224
+ with catalog.Catalog.get().begin_xact():
225
+ tbl = catalog.Catalog.get().get_table_by_id(self.col.tbl_handle.id)
233
226
  helper = DescriptionHelper()
234
227
  helper.append(f'Column\n{self.col.name!r}\n(of table {tbl._path()!r})')
235
228
  helper.append(tbl._col_descriptor([self.col.name]))
@@ -238,23 +231,21 @@ class ColumnRef(Expr):
238
231
  helper.append(idxs)
239
232
  return helper
240
233
 
241
- def sql_expr(self, _: SqlElementCache) -> Optional[sql.ColumnElement]:
242
- # return None if self.perform_validation else self.col.sa_col
234
+ def prepare(self) -> None:
235
+ from pixeltable import store
236
+
237
+ if not self.is_unstored_iter_col:
238
+ return
239
+ col = self.col_handle.get()
240
+ self.base_rowid_len = col.get_tbl().base.get().num_rowid_columns()
241
+ self.base_rowid = [None] * self.base_rowid_len
242
+ assert isinstance(col.get_tbl().store_tbl, store.StoreComponentView)
243
+ self.pos_idx = cast(store.StoreComponentView, col.get_tbl().store_tbl).pos_col_idx
244
+
245
+ def sql_expr(self, _: SqlElementCache) -> sql.ColumnElement | None:
243
246
  if self.perform_validation:
244
247
  return None
245
- # we need to reestablish that we have the correct Column instance, there could have been a metadata
246
- # reload since init()
247
- # TODO: add an explicit prepare phase (ie, Expr.prepare()) that gives every subclass instance a chance to
248
- # perform runtime checks and update state
249
- tv = self.tbl_version.get()
250
- assert tv.is_validated
251
- self.col = tv.cols_by_id[self.col_id]
252
- assert self.col.tbl is tv
253
- # TODO: check for column being dropped
254
- # print(
255
- # f'ColumnRef.sql_expr: tbl={tv.id}:{tv.effective_version} sa_tbl={id(self.col.tbl.store_tbl.sa_tbl):x} '
256
- # f'tv={id(tv):x}'
257
- # )
248
+ self.col = self.col_handle.get()
258
249
  return self.col.sa_col
259
250
 
260
251
  def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:
@@ -296,29 +287,34 @@ class ColumnRef(Expr):
296
287
  if self.base_rowid != data_row.pk[: self.base_rowid_len]:
297
288
  row_builder.eval(data_row, self.iter_arg_ctx)
298
289
  iterator_args = data_row[self.iter_arg_ctx.target_slot_idxs[0]]
299
- self.iterator = self.col.tbl.iterator_cls(**iterator_args)
290
+ self.iterator = self.col.get_tbl().iterator_cls(**iterator_args)
300
291
  self.base_rowid = data_row.pk[: self.base_rowid_len]
301
292
  self.iterator.set_pos(data_row.pk[self.pos_idx])
302
293
  res = next(self.iterator)
303
294
  data_row[self.slot_idx] = res[self.col.name]
304
295
 
305
296
  def _as_dict(self) -> dict:
306
- tbl = self.col.tbl
307
- version = tbl.version if tbl.is_snapshot else None
297
+ tbl_handle = self.col.tbl_handle
308
298
  # we omit self.components, even if this is a validating ColumnRef, because init() will recreate the
309
299
  # non-validating component ColumnRef
310
300
  return {
311
- 'tbl_id': str(tbl.id),
312
- 'tbl_version': version,
301
+ 'tbl_id': str(tbl_handle.id),
302
+ 'tbl_version': tbl_handle.effective_version,
313
303
  'col_id': self.col.id,
314
304
  'reference_tbl': self.reference_tbl.as_dict() if self.reference_tbl is not None else None,
315
305
  'perform_validation': self.perform_validation,
316
306
  }
317
307
 
308
+ @classmethod
309
+ def get_column_id(cls, d: dict) -> catalog.QColumnId:
310
+ tbl_id, col_id = UUID(d['tbl_id']), d['col_id']
311
+ return catalog.QColumnId(tbl_id, col_id)
312
+
318
313
  @classmethod
319
314
  def get_column(cls, d: dict) -> catalog.Column:
320
315
  tbl_id, version, col_id = UUID(d['tbl_id']), d['tbl_version'], d['col_id']
321
- tbl_version = catalog.Catalog.get().get_tbl_version(tbl_id, version)
316
+ # validate_initialized=False: this gets called as part of TableVersion.init()
317
+ tbl_version = catalog.Catalog.get().get_tbl_version(tbl_id, version, validate_initialized=False)
322
318
  # don't use tbl_version.cols_by_id here, this might be a snapshot reference to a column that was then dropped
323
319
  col = next(col for col in tbl_version.cols if col.id == col_id)
324
320
  return col
@@ -1,6 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
- from typing import Any, Optional
3
+ from typing import Any
4
4
 
5
5
  import sqlalchemy as sql
6
6
 
@@ -69,8 +69,8 @@ class Comparison(Expr):
69
69
  def _op2(self) -> Expr:
70
70
  return self.components[1]
71
71
 
72
- def sql_expr(self, sql_elements: SqlElementCache) -> Optional[sql.ColumnElement]:
73
- from pixeltable import index
72
+ def sql_expr(self, sql_elements: SqlElementCache) -> sql.ColumnElement | None:
73
+ import pixeltable.index as index
74
74
 
75
75
  if str(self._op1.col_type.to_sa_type()) != str(self._op2.col_type.to_sa_type()):
76
76
  # Comparing columns of different SQL types (e.g., string vs. json); this can only be done in Python
@@ -81,9 +81,9 @@ class Comparison(Expr):
81
81
  if self.is_search_arg_comparison:
82
82
  # reference the index value column if there is an index and this is not a snapshot
83
83
  # (indices don't apply to snapshots)
84
- tbl = self._op1.col.tbl
84
+ tbl = self._op1.col.get_tbl()
85
85
  idx_info = [
86
- info for info in self._op1.col.get_idx_info().values() if isinstance(info.idx, index.BtreeIndex)
86
+ info for info in tbl.idxs_by_col.get(self._op1.col.qid, []) if isinstance(info.idx, index.BtreeIndex)
87
87
  ]
88
88
  if len(idx_info) > 0 and not tbl.is_snapshot:
89
89
  # there shouldn't be multiple B-tree indices on a column
@@ -1,7 +1,7 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import operator
4
- from typing import Any, Callable, Optional
4
+ from typing import Any, Callable
5
5
 
6
6
  import sqlalchemy as sql
7
7
 
@@ -36,7 +36,8 @@ class CompoundPredicate(Expr):
36
36
  return f' {self.operator} '.join([f'({e})' for e in self.components])
37
37
 
38
38
  @classmethod
39
- def make_conjunction(cls, operands: list[Expr]) -> Optional[Expr]:
39
+ def make_conjunction(cls, operands: list[Expr | None]) -> Expr | None:
40
+ operands = [e for e in operands if e is not None]
40
41
  if len(operands) == 0:
41
42
  return None
42
43
  if len(operands) == 1:
@@ -60,14 +61,14 @@ class CompoundPredicate(Expr):
60
61
  def _id_attrs(self) -> list[tuple[str, Any]]:
61
62
  return [*super()._id_attrs(), ('operator', self.operator.value)]
62
63
 
63
- def split_conjuncts(self, condition: Callable[[Expr], bool]) -> tuple[list[Expr], Optional[Expr]]:
64
+ def split_conjuncts(self, condition: Callable[[Expr], bool]) -> tuple[list[Expr], Expr | None]:
64
65
  if self.operator in (LogicalOperator.OR, LogicalOperator.NOT):
65
66
  return super().split_conjuncts(condition)
66
67
  matches = [op for op in self.components if condition(op)]
67
68
  non_matches = [op for op in self.components if not condition(op)]
68
69
  return (matches, self.make_conjunction(non_matches))
69
70
 
70
- def sql_expr(self, sql_elements: SqlElementCache) -> Optional[sql.ColumnElement]:
71
+ def sql_expr(self, sql_elements: SqlElementCache) -> sql.ColumnElement | None:
71
72
  sql_exprs = [sql_elements.get(op) for op in self.components]
72
73
  if any(e is None for e in sql_exprs):
73
74
  return None