pixeltable 0.4.0rc3__py3-none-any.whl → 0.4.20__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (202) hide show
  1. pixeltable/__init__.py +23 -5
  2. pixeltable/_version.py +1 -0
  3. pixeltable/catalog/__init__.py +5 -3
  4. pixeltable/catalog/catalog.py +1318 -404
  5. pixeltable/catalog/column.py +186 -115
  6. pixeltable/catalog/dir.py +1 -2
  7. pixeltable/catalog/globals.py +11 -43
  8. pixeltable/catalog/insertable_table.py +167 -79
  9. pixeltable/catalog/path.py +61 -23
  10. pixeltable/catalog/schema_object.py +9 -10
  11. pixeltable/catalog/table.py +626 -308
  12. pixeltable/catalog/table_metadata.py +101 -0
  13. pixeltable/catalog/table_version.py +713 -569
  14. pixeltable/catalog/table_version_handle.py +37 -6
  15. pixeltable/catalog/table_version_path.py +42 -29
  16. pixeltable/catalog/tbl_ops.py +50 -0
  17. pixeltable/catalog/update_status.py +191 -0
  18. pixeltable/catalog/view.py +108 -94
  19. pixeltable/config.py +128 -22
  20. pixeltable/dataframe.py +188 -100
  21. pixeltable/env.py +407 -136
  22. pixeltable/exceptions.py +6 -0
  23. pixeltable/exec/__init__.py +3 -0
  24. pixeltable/exec/aggregation_node.py +7 -8
  25. pixeltable/exec/cache_prefetch_node.py +83 -110
  26. pixeltable/exec/cell_materialization_node.py +231 -0
  27. pixeltable/exec/cell_reconstruction_node.py +135 -0
  28. pixeltable/exec/component_iteration_node.py +4 -3
  29. pixeltable/exec/data_row_batch.py +8 -65
  30. pixeltable/exec/exec_context.py +16 -4
  31. pixeltable/exec/exec_node.py +13 -36
  32. pixeltable/exec/expr_eval/evaluators.py +7 -6
  33. pixeltable/exec/expr_eval/expr_eval_node.py +27 -12
  34. pixeltable/exec/expr_eval/globals.py +8 -5
  35. pixeltable/exec/expr_eval/row_buffer.py +1 -2
  36. pixeltable/exec/expr_eval/schedulers.py +190 -30
  37. pixeltable/exec/globals.py +32 -0
  38. pixeltable/exec/in_memory_data_node.py +18 -18
  39. pixeltable/exec/object_store_save_node.py +293 -0
  40. pixeltable/exec/row_update_node.py +16 -9
  41. pixeltable/exec/sql_node.py +206 -101
  42. pixeltable/exprs/__init__.py +1 -1
  43. pixeltable/exprs/arithmetic_expr.py +27 -22
  44. pixeltable/exprs/array_slice.py +3 -3
  45. pixeltable/exprs/column_property_ref.py +34 -30
  46. pixeltable/exprs/column_ref.py +92 -96
  47. pixeltable/exprs/comparison.py +5 -5
  48. pixeltable/exprs/compound_predicate.py +5 -4
  49. pixeltable/exprs/data_row.py +152 -55
  50. pixeltable/exprs/expr.py +62 -43
  51. pixeltable/exprs/expr_dict.py +3 -3
  52. pixeltable/exprs/expr_set.py +17 -10
  53. pixeltable/exprs/function_call.py +75 -37
  54. pixeltable/exprs/globals.py +1 -2
  55. pixeltable/exprs/in_predicate.py +4 -4
  56. pixeltable/exprs/inline_expr.py +10 -27
  57. pixeltable/exprs/is_null.py +1 -3
  58. pixeltable/exprs/json_mapper.py +8 -8
  59. pixeltable/exprs/json_path.py +56 -22
  60. pixeltable/exprs/literal.py +5 -5
  61. pixeltable/exprs/method_ref.py +2 -2
  62. pixeltable/exprs/object_ref.py +2 -2
  63. pixeltable/exprs/row_builder.py +127 -53
  64. pixeltable/exprs/rowid_ref.py +8 -12
  65. pixeltable/exprs/similarity_expr.py +50 -25
  66. pixeltable/exprs/sql_element_cache.py +4 -4
  67. pixeltable/exprs/string_op.py +5 -5
  68. pixeltable/exprs/type_cast.py +3 -5
  69. pixeltable/func/__init__.py +1 -0
  70. pixeltable/func/aggregate_function.py +8 -8
  71. pixeltable/func/callable_function.py +9 -9
  72. pixeltable/func/expr_template_function.py +10 -10
  73. pixeltable/func/function.py +18 -20
  74. pixeltable/func/function_registry.py +6 -7
  75. pixeltable/func/globals.py +2 -3
  76. pixeltable/func/mcp.py +74 -0
  77. pixeltable/func/query_template_function.py +20 -18
  78. pixeltable/func/signature.py +43 -16
  79. pixeltable/func/tools.py +23 -13
  80. pixeltable/func/udf.py +18 -20
  81. pixeltable/functions/__init__.py +6 -0
  82. pixeltable/functions/anthropic.py +93 -33
  83. pixeltable/functions/audio.py +114 -10
  84. pixeltable/functions/bedrock.py +13 -6
  85. pixeltable/functions/date.py +1 -1
  86. pixeltable/functions/deepseek.py +20 -9
  87. pixeltable/functions/fireworks.py +2 -2
  88. pixeltable/functions/gemini.py +28 -11
  89. pixeltable/functions/globals.py +13 -13
  90. pixeltable/functions/groq.py +108 -0
  91. pixeltable/functions/huggingface.py +1046 -23
  92. pixeltable/functions/image.py +9 -18
  93. pixeltable/functions/llama_cpp.py +23 -8
  94. pixeltable/functions/math.py +3 -4
  95. pixeltable/functions/mistralai.py +4 -15
  96. pixeltable/functions/ollama.py +16 -9
  97. pixeltable/functions/openai.py +104 -82
  98. pixeltable/functions/openrouter.py +143 -0
  99. pixeltable/functions/replicate.py +2 -2
  100. pixeltable/functions/reve.py +250 -0
  101. pixeltable/functions/string.py +21 -28
  102. pixeltable/functions/timestamp.py +13 -14
  103. pixeltable/functions/together.py +4 -6
  104. pixeltable/functions/twelvelabs.py +92 -0
  105. pixeltable/functions/util.py +6 -1
  106. pixeltable/functions/video.py +1388 -106
  107. pixeltable/functions/vision.py +7 -7
  108. pixeltable/functions/whisper.py +15 -7
  109. pixeltable/functions/whisperx.py +179 -0
  110. pixeltable/{ext/functions → functions}/yolox.py +2 -4
  111. pixeltable/globals.py +332 -105
  112. pixeltable/index/base.py +13 -22
  113. pixeltable/index/btree.py +23 -22
  114. pixeltable/index/embedding_index.py +32 -44
  115. pixeltable/io/__init__.py +4 -2
  116. pixeltable/io/datarows.py +7 -6
  117. pixeltable/io/external_store.py +49 -77
  118. pixeltable/io/fiftyone.py +11 -11
  119. pixeltable/io/globals.py +29 -28
  120. pixeltable/io/hf_datasets.py +17 -9
  121. pixeltable/io/label_studio.py +70 -66
  122. pixeltable/io/lancedb.py +3 -0
  123. pixeltable/io/pandas.py +12 -11
  124. pixeltable/io/parquet.py +13 -93
  125. pixeltable/io/table_data_conduit.py +71 -47
  126. pixeltable/io/utils.py +3 -3
  127. pixeltable/iterators/__init__.py +2 -1
  128. pixeltable/iterators/audio.py +21 -11
  129. pixeltable/iterators/document.py +116 -55
  130. pixeltable/iterators/image.py +5 -2
  131. pixeltable/iterators/video.py +293 -13
  132. pixeltable/metadata/__init__.py +4 -2
  133. pixeltable/metadata/converters/convert_18.py +2 -2
  134. pixeltable/metadata/converters/convert_19.py +2 -2
  135. pixeltable/metadata/converters/convert_20.py +2 -2
  136. pixeltable/metadata/converters/convert_21.py +2 -2
  137. pixeltable/metadata/converters/convert_22.py +2 -2
  138. pixeltable/metadata/converters/convert_24.py +2 -2
  139. pixeltable/metadata/converters/convert_25.py +2 -2
  140. pixeltable/metadata/converters/convert_26.py +2 -2
  141. pixeltable/metadata/converters/convert_29.py +4 -4
  142. pixeltable/metadata/converters/convert_34.py +2 -2
  143. pixeltable/metadata/converters/convert_36.py +2 -2
  144. pixeltable/metadata/converters/convert_37.py +15 -0
  145. pixeltable/metadata/converters/convert_38.py +39 -0
  146. pixeltable/metadata/converters/convert_39.py +124 -0
  147. pixeltable/metadata/converters/convert_40.py +73 -0
  148. pixeltable/metadata/converters/util.py +13 -12
  149. pixeltable/metadata/notes.py +4 -0
  150. pixeltable/metadata/schema.py +79 -42
  151. pixeltable/metadata/utils.py +74 -0
  152. pixeltable/mypy/__init__.py +3 -0
  153. pixeltable/mypy/mypy_plugin.py +123 -0
  154. pixeltable/plan.py +274 -223
  155. pixeltable/share/__init__.py +1 -1
  156. pixeltable/share/packager.py +259 -129
  157. pixeltable/share/protocol/__init__.py +34 -0
  158. pixeltable/share/protocol/common.py +170 -0
  159. pixeltable/share/protocol/operation_types.py +33 -0
  160. pixeltable/share/protocol/replica.py +109 -0
  161. pixeltable/share/publish.py +213 -57
  162. pixeltable/store.py +238 -175
  163. pixeltable/type_system.py +104 -63
  164. pixeltable/utils/__init__.py +2 -3
  165. pixeltable/utils/arrow.py +108 -13
  166. pixeltable/utils/av.py +298 -0
  167. pixeltable/utils/azure_store.py +305 -0
  168. pixeltable/utils/code.py +3 -3
  169. pixeltable/utils/console_output.py +4 -1
  170. pixeltable/utils/coroutine.py +6 -23
  171. pixeltable/utils/dbms.py +31 -5
  172. pixeltable/utils/description_helper.py +4 -5
  173. pixeltable/utils/documents.py +5 -6
  174. pixeltable/utils/exception_handler.py +7 -30
  175. pixeltable/utils/filecache.py +6 -6
  176. pixeltable/utils/formatter.py +4 -6
  177. pixeltable/utils/gcs_store.py +283 -0
  178. pixeltable/utils/http_server.py +2 -3
  179. pixeltable/utils/iceberg.py +1 -2
  180. pixeltable/utils/image.py +17 -0
  181. pixeltable/utils/lancedb.py +88 -0
  182. pixeltable/utils/local_store.py +316 -0
  183. pixeltable/utils/misc.py +5 -0
  184. pixeltable/utils/object_stores.py +528 -0
  185. pixeltable/utils/pydantic.py +60 -0
  186. pixeltable/utils/pytorch.py +5 -6
  187. pixeltable/utils/s3_store.py +392 -0
  188. pixeltable-0.4.20.dist-info/METADATA +587 -0
  189. pixeltable-0.4.20.dist-info/RECORD +218 -0
  190. {pixeltable-0.4.0rc3.dist-info → pixeltable-0.4.20.dist-info}/WHEEL +1 -1
  191. pixeltable-0.4.20.dist-info/entry_points.txt +2 -0
  192. pixeltable/__version__.py +0 -3
  193. pixeltable/ext/__init__.py +0 -17
  194. pixeltable/ext/functions/__init__.py +0 -11
  195. pixeltable/ext/functions/whisperx.py +0 -77
  196. pixeltable/utils/media_store.py +0 -77
  197. pixeltable/utils/s3.py +0 -17
  198. pixeltable/utils/sample.py +0 -25
  199. pixeltable-0.4.0rc3.dist-info/METADATA +0 -435
  200. pixeltable-0.4.0rc3.dist-info/RECORD +0 -189
  201. pixeltable-0.4.0rc3.dist-info/entry_points.txt +0 -3
  202. {pixeltable-0.4.0rc3.dist-info → pixeltable-0.4.20.dist-info/licenses}/LICENSE +0 -0
@@ -3,18 +3,21 @@ from __future__ import annotations
3
3
  import logging
4
4
  import warnings
5
5
  from textwrap import dedent
6
- from typing import TYPE_CHECKING, Any, Optional
6
+ from typing import TYPE_CHECKING, Any
7
7
 
8
8
  import sqlalchemy as sql
9
9
 
10
10
  import pixeltable.exceptions as excs
11
+ import pixeltable.exprs as exprs
11
12
  import pixeltable.type_system as ts
12
- from pixeltable import exprs
13
+ from pixeltable.env import Env
14
+ from pixeltable.metadata import schema
13
15
 
14
- from .globals import MediaValidation, is_valid_identifier
16
+ from .globals import MediaValidation, QColumnId, is_valid_identifier
15
17
 
16
18
  if TYPE_CHECKING:
17
19
  from .table_version import TableVersion
20
+ from .table_version_handle import ColumnHandle, TableVersionHandle
18
21
  from .table_version_path import TableVersionPath
19
22
 
20
23
  _logger = logging.getLogger('pixeltable')
@@ -25,122 +28,161 @@ class Column:
25
28
 
26
29
  A Column contains all the metadata necessary for executing queries and updates against a particular version of a
27
30
  table/view.
31
+
32
+ Args:
33
+ name: column name; None for system columns (eg, index columns)
34
+ col_type: column type; can be None if the type can be derived from ``computed_with``
35
+ computed_with: an Expr that computes the column value
36
+ is_pk: if True, this column is part of the primary key
37
+ stored: determines whether a computed column is present in the stored table or recomputed on demand
38
+ destination: An object store reference for persisting computed files
39
+ col_id: column ID (only used internally)
40
+
41
+ Computed columns: those have a non-None ``computed_with`` argument
42
+ - when constructed by the user: ``computed_with`` was constructed explicitly and is passed in;
43
+ col_type is None
44
+ - when loaded from md store: ``computed_with`` is set and col_type is set
45
+
46
+ ``stored`` (only valid for computed columns):
47
+ - if True: the column is present in the stored table
48
+ - if False: the column is not present in the stored table and recomputed during a query
49
+ - if None: the system chooses for you (at present, this is always False, but this may change in the future)
28
50
  """
29
51
 
30
- name: str
31
- id: Optional[int]
52
+ name: str | None
53
+ id: int | None
32
54
  col_type: ts.ColumnType
33
55
  stored: bool
34
56
  is_pk: bool
35
- _media_validation: Optional[MediaValidation] # if not set, TableVersion.media_validation applies
36
- schema_version_add: Optional[int]
37
- schema_version_drop: Optional[int]
38
- _records_errors: Optional[bool]
39
- sa_col: Optional[sql.schema.Column]
40
- sa_col_type: Optional[sql.sqltypes.TypeEngine]
41
- sa_errormsg_col: Optional[sql.schema.Column]
42
- sa_errortype_col: Optional[sql.schema.Column]
43
- _value_expr: Optional[exprs.Expr]
44
- value_expr_dict: Optional[dict[str, Any]]
45
- dependent_cols: set[Column]
46
- # we store a TableVersion here, not a TableVersionHandle, because this column is owned by that TableVersion instance
47
- # (re-resolving it later to a different instance doesn't make sense)
48
- tbl: Optional[TableVersion]
49
- # tbl: Optional[TableVersionHandle]
57
+ is_iterator_col: bool
58
+ _explicit_destination: str | None # An object store reference for computed files
59
+ _media_validation: MediaValidation | None # if not set, TableVersion.media_validation applies
60
+ schema_version_add: int | None
61
+ schema_version_drop: int | None
62
+ stores_cellmd: bool
63
+ sa_col: sql.schema.Column | None
64
+ sa_col_type: sql.types.TypeEngine
65
+ sa_cellmd_col: sql.schema.Column | None # JSON metadata for the cell, e.g. errortype, errormsg for media columns
66
+ _value_expr: exprs.Expr | None
67
+ value_expr_dict: dict[str, Any] | None
68
+ # we store a handle here in order to allow Column construction before there is a corresponding TableVersion
69
+ tbl_handle: 'TableVersionHandle' | None
50
70
 
51
71
  def __init__(
52
72
  self,
53
- name: Optional[str],
54
- col_type: Optional[ts.ColumnType] = None,
55
- computed_with: Optional[exprs.Expr] = None,
73
+ name: str | None,
74
+ col_type: ts.ColumnType | None = None,
75
+ computed_with: exprs.Expr | None = None,
56
76
  is_pk: bool = False,
77
+ is_iterator_col: bool = False,
57
78
  stored: bool = True,
58
- media_validation: Optional[MediaValidation] = None,
59
- col_id: Optional[int] = None,
60
- schema_version_add: Optional[int] = None,
61
- schema_version_drop: Optional[int] = None,
62
- sa_col_type: Optional[sql.sqltypes.TypeEngine] = None,
63
- records_errors: Optional[bool] = None,
64
- value_expr_dict: Optional[dict[str, Any]] = None,
79
+ media_validation: MediaValidation | None = None,
80
+ col_id: int | None = None,
81
+ schema_version_add: int | None = None,
82
+ schema_version_drop: int | None = None,
83
+ sa_col_type: sql.types.TypeEngine | None = None,
84
+ stores_cellmd: bool | None = None,
85
+ value_expr_dict: dict[str, Any] | None = None,
86
+ tbl_handle: 'TableVersionHandle' | None = None,
87
+ destination: str | None = None,
65
88
  ):
66
- """Column constructor.
67
-
68
- Args:
69
- name: column name; None for system columns (eg, index columns)
70
- col_type: column type; can be None if the type can be derived from ``computed_with``
71
- computed_with: an Expr that computes the column value
72
- is_pk: if True, this column is part of the primary key
73
- stored: determines whether a computed column is present in the stored table or recomputed on demand
74
- col_id: column ID (only used internally)
75
-
76
- Computed columns: those have a non-None ``computed_with`` argument
77
- - when constructed by the user: ``computed_with`` was constructed explicitly and is passed in;
78
- col_type is None
79
- - when loaded from md store: ``computed_with`` is set and col_type is set
80
-
81
- ``stored`` (only valid for computed columns):
82
- - if True: the column is present in the stored table
83
- - if False: the column is not present in the stored table and recomputed during a query
84
- - if None: the system chooses for you (at present, this is always False, but this may change in the future)
85
- """
86
89
  if name is not None and not is_valid_identifier(name):
87
- raise excs.Error(f"Invalid column name: '{name}'")
90
+ raise excs.Error(f'Invalid column name: {name}')
88
91
  self.name = name
92
+ self.tbl_handle = tbl_handle
89
93
  if col_type is None and computed_with is None:
90
- raise excs.Error(f'Column `{name}`: col_type is required if computed_with is not specified')
94
+ raise excs.Error(f'Column {name!r}: `col_type` is required if `computed_with` is not specified')
91
95
 
92
- self._value_expr: Optional[exprs.Expr] = None
96
+ self._value_expr = None
93
97
  self.value_expr_dict = value_expr_dict
94
98
  if computed_with is not None:
95
99
  value_expr = exprs.Expr.from_object(computed_with)
96
100
  if value_expr is None:
101
+ # TODO: this shouldn't be a user-facing error
97
102
  raise excs.Error(
98
- f'Column {name}: computed_with needs to be a valid Pixeltable expression, '
103
+ f'Column {name!r}: `computed_with` needs to be a valid Pixeltable expression, '
99
104
  f'but it is a {type(computed_with)}'
100
105
  )
101
106
  else:
102
107
  self._value_expr = value_expr.copy()
103
108
  self.col_type = self._value_expr.col_type
109
+ if self._value_expr is not None and self.value_expr_dict is None:
110
+ self.value_expr_dict = self._value_expr.as_dict()
104
111
 
105
112
  if col_type is not None:
106
113
  self.col_type = col_type
107
114
  assert self.col_type is not None
108
115
 
109
116
  self.stored = stored
110
- self.dependent_cols = set() # cols with value_exprs that reference us; set by TableVersion
117
+ # self.dependent_cols = set() # cols with value_exprs that reference us; set by TableVersion
111
118
  self.id = col_id
112
119
  self.is_pk = is_pk
120
+ self.is_iterator_col = is_iterator_col
113
121
  self._media_validation = media_validation
114
122
  self.schema_version_add = schema_version_add
115
123
  self.schema_version_drop = schema_version_drop
116
124
 
117
- self._records_errors = records_errors
125
+ if stores_cellmd is not None:
126
+ self.stores_cellmd = stores_cellmd
127
+ else:
128
+ self.stores_cellmd = stored and (
129
+ self.is_computed
130
+ or self.col_type.is_media_type()
131
+ or self.col_type.is_json_type()
132
+ or self.col_type.is_array_type()
133
+ )
118
134
 
119
135
  # column in the stored table for the values of this Column
120
136
  self.sa_col = None
121
- self.sa_col_type = sa_col_type
137
+ self.sa_col_type = self.col_type.to_sa_type() if sa_col_type is None else sa_col_type
122
138
 
123
139
  # computed cols also have storage columns for the exception string and type
124
- self.sa_errormsg_col = None
125
- self.sa_errortype_col = None
140
+ self.sa_cellmd_col = None
141
+ self._explicit_destination = destination
142
+
143
+ def to_md(self, pos: int | None = None) -> tuple[schema.ColumnMd, schema.SchemaColumn | None]:
144
+ """Returns the Column and optional SchemaColumn metadata for this Column."""
145
+ assert self.is_pk is not None
146
+ col_md = schema.ColumnMd(
147
+ id=self.id,
148
+ col_type=self.col_type.as_dict(),
149
+ is_pk=self.is_pk,
150
+ schema_version_add=self.schema_version_add,
151
+ schema_version_drop=self.schema_version_drop,
152
+ value_expr=self.value_expr.as_dict() if self.value_expr is not None else None,
153
+ stored=self.stored,
154
+ destination=self._explicit_destination,
155
+ )
156
+ if pos is None:
157
+ return col_md, None
158
+ assert self.name is not None, 'Column name must be set for user-facing columns'
159
+ sch_md = schema.SchemaColumn(
160
+ name=self.name,
161
+ pos=pos,
162
+ media_validation=self._media_validation.name.lower() if self._media_validation is not None else None,
163
+ )
164
+ return col_md, sch_md
126
165
 
127
- self.tbl = None # set by owning TableVersion
166
+ def init_value_expr(self, tvp: 'TableVersionPath' | None) -> None:
167
+ """
168
+ Initialize the value_expr from its dict representation, if necessary.
128
169
 
129
- @property
130
- def value_expr(self) -> Optional[exprs.Expr]:
131
- """Instantiate value_expr on-demand"""
132
- # TODO: instantiate expr in the c'tor and add an Expr.prepare() that can create additional state after the
133
- # catalog has been fully loaded; that way, we encounter bugs in the serialization/deserialization logic earlier
134
- if self.value_expr_dict is not None and self._value_expr is None:
135
- from pixeltable import exprs
170
+ If `tvp` is not None, retarget the value_expr to the given TableVersionPath.
171
+ """
172
+ from pixeltable import exprs
136
173
 
174
+ if self._value_expr is None and self.value_expr_dict is None:
175
+ return
176
+
177
+ if self._value_expr is None:
178
+ # Instantiate the Expr from its dict
137
179
  self._value_expr = exprs.Expr.from_dict(self.value_expr_dict)
138
180
  self._value_expr.bind_rel_paths()
139
181
  if not self._value_expr.is_valid:
140
182
  message = (
141
183
  dedent(
142
184
  f"""
143
- The computed column {self.name!r} in table {self.tbl.name!r} is no longer valid.
185
+ The computed column {self.name!r} in table {self.get_tbl().name!r} is no longer valid.
144
186
  {{validation_error}}
145
187
  You can continue to query existing data from this column, but evaluating it on new data will raise an error.
146
188
  """ # noqa: E501
@@ -149,37 +191,76 @@ class Column:
149
191
  .format(validation_error=self._value_expr.validation_error)
150
192
  )
151
193
  warnings.warn(message, category=excs.PixeltableWarning, stacklevel=2)
194
+
195
+ if tvp is not None:
196
+ # Retarget the Expr
197
+ self._value_expr = self._value_expr.retarget(tvp)
198
+
199
+ def get_tbl(self) -> TableVersion:
200
+ tv = self.tbl_handle.get()
201
+ return tv
202
+
203
+ @property
204
+ def destination(self) -> str | None:
205
+ if self._explicit_destination is not None:
206
+ # An expilicit destination was set as part of the column definition
207
+ return self._explicit_destination
208
+
209
+ # Otherwise, if this is a stored media column, use the default destination if one is configured (input
210
+ # destination or output destination, depending on whether this is a computed column)
211
+ # TODO: The `self.name is not None` clause is necessary because index columns currently follow the type of
212
+ # the underlying media column. We should move to using pxt.String as the col_type of index columns; this
213
+ # would be a more robust solution, and then `self.name is not None` could be removed.
214
+ if self.is_stored and self.col_type.is_media_type() and self.name is not None:
215
+ if self.is_computed:
216
+ return Env.get().default_output_media_dest
217
+ else:
218
+ return Env.get().default_input_media_dest
219
+
220
+ return None
221
+
222
+ @property
223
+ def handle(self) -> 'ColumnHandle':
224
+ """Returns a ColumnHandle for this Column."""
225
+ from .table_version_handle import ColumnHandle
226
+
227
+ assert self.tbl_handle is not None
228
+ assert self.id is not None
229
+ return ColumnHandle(self.tbl_handle, self.id)
230
+
231
+ @property
232
+ def qid(self) -> QColumnId:
233
+ assert self.tbl_handle is not None
234
+ assert self.id is not None
235
+ return QColumnId(self.tbl_handle.id, self.id)
236
+
237
+ @property
238
+ def value_expr(self) -> exprs.Expr | None:
239
+ assert self.value_expr_dict is None or self._value_expr is not None
152
240
  return self._value_expr
153
241
 
154
242
  def set_value_expr(self, value_expr: exprs.Expr) -> None:
155
243
  self._value_expr = value_expr
156
- self.value_expr_dict = None
244
+ self.value_expr_dict = self._value_expr.as_dict()
157
245
 
158
246
  def check_value_expr(self) -> None:
159
247
  assert self._value_expr is not None
160
- if self.stored == False and self.is_computed and self.has_window_fn_call():
248
+ if not self.stored and self.is_computed and self.has_window_fn_call():
161
249
  raise excs.Error(
162
- f'Column {self.name}: stored={self.stored} not supported for columns computed with window functions:'
163
- f'\n{self.value_expr}'
250
+ f'Column {self.name!r}: `stored={self.stored}` not supported for columns '
251
+ f'computed with window functions:\n{self.value_expr}'
164
252
  )
165
253
 
166
254
  def has_window_fn_call(self) -> bool:
167
- if self.value_expr is None:
168
- return False
169
255
  from pixeltable import exprs
170
256
 
257
+ if self.value_expr is None:
258
+ return False
171
259
  window_fn_calls = list(
172
260
  self.value_expr.subexprs(filter=lambda e: isinstance(e, exprs.FunctionCall) and e.is_window_fn_call)
173
261
  )
174
262
  return len(window_fn_calls) > 0
175
263
 
176
- # TODO: This should be moved out of `Column` (its presence in `Column` doesn't anticipate indices being defined on
177
- # multiple dependents)
178
- def get_idx_info(self, reference_tbl: Optional['TableVersionPath'] = None) -> dict[str, 'TableVersion.IndexInfo']:
179
- assert self.tbl is not None
180
- tbl = reference_tbl.tbl_version.get() if reference_tbl is not None else self.tbl
181
- return {name: info for name, info in tbl.idxs_by_name.items() if info.col == self}
182
-
183
264
  @property
184
265
  def is_computed(self) -> bool:
185
266
  return self._value_expr is not None or self.value_expr_dict is not None
@@ -190,25 +271,17 @@ class Column:
190
271
  assert self.stored is not None
191
272
  return self.stored
192
273
 
193
- @property
194
- def records_errors(self) -> bool:
195
- """True if this column also stores error information."""
196
- # default: record errors for computed and media columns
197
- if self._records_errors is not None:
198
- return self._records_errors
199
- return self.is_stored and (self.is_computed or self.col_type.is_media_type())
200
-
201
274
  @property
202
275
  def qualified_name(self) -> str:
203
- assert self.tbl is not None
204
- return f'{self.tbl.name}.{self.name}'
276
+ assert self.get_tbl() is not None
277
+ return f'{self.get_tbl().name}.{self.name}'
205
278
 
206
279
  @property
207
280
  def media_validation(self) -> MediaValidation:
208
281
  if self._media_validation is not None:
209
282
  return self._media_validation
210
- assert self.tbl is not None
211
- return self.tbl.media_validation
283
+ assert self.get_tbl() is not None
284
+ return self.get_tbl().media_validation
212
285
 
213
286
  @property
214
287
  def is_required_for_insert(self) -> bool:
@@ -227,48 +300,46 @@ class Column:
227
300
 
228
301
  def create_sa_cols(self) -> None:
229
302
  """
230
- These need to be recreated for every new table schema version.
303
+ These need to be recreated for every sql.Table instance
231
304
  """
232
305
  assert self.is_stored
306
+ assert self.stores_cellmd is not None
233
307
  # all storage columns are nullable (we deal with null errors in Pixeltable directly)
234
- self.sa_col = sql.Column(
235
- self.store_name(),
236
- self.col_type.to_sa_type() if self.sa_col_type is None else self.sa_col_type,
237
- nullable=True,
238
- )
239
- if self.is_computed or self.col_type.is_media_type():
240
- self.sa_errormsg_col = sql.Column(self.errormsg_store_name(), ts.StringType().to_sa_type(), nullable=True)
241
- self.sa_errortype_col = sql.Column(self.errortype_store_name(), ts.StringType().to_sa_type(), nullable=True)
308
+ self.sa_col = sql.Column(self.store_name(), self.sa_col_type, nullable=True)
309
+ if self.stores_cellmd:
310
+ self.sa_cellmd_col = sql.Column(self.cellmd_store_name(), self.sa_cellmd_type(), nullable=True)
242
311
 
243
- def get_sa_col_type(self) -> sql.sqltypes.TypeEngine:
244
- return self.col_type.to_sa_type() if self.sa_col_type is None else self.sa_col_type
312
+ @classmethod
313
+ def cellmd_type(cls) -> ts.ColumnType:
314
+ return ts.JsonType(nullable=True)
315
+
316
+ @classmethod
317
+ def sa_cellmd_type(cls) -> sql.types.TypeEngine:
318
+ return cls.cellmd_type().to_sa_type()
245
319
 
246
320
  def store_name(self) -> str:
247
321
  assert self.id is not None
248
322
  assert self.is_stored
249
323
  return f'col_{self.id}'
250
324
 
251
- def errormsg_store_name(self) -> str:
252
- return f'{self.store_name()}_errormsg'
253
-
254
- def errortype_store_name(self) -> str:
255
- return f'{self.store_name()}_errortype'
325
+ def cellmd_store_name(self) -> str:
326
+ return f'{self.store_name()}_cellmd'
256
327
 
257
328
  def __str__(self) -> str:
258
329
  return f'{self.name}: {self.col_type}'
259
330
 
260
331
  def __repr__(self) -> str:
261
- return f'Column({self.id!r}, {self.name!r}, tbl={self.tbl.name!r})'
332
+ return f'Column({self.id!r}, {self.name!r}, tbl={self.get_tbl().name!r})'
262
333
 
263
334
  def __hash__(self) -> int:
264
335
  # TODO(aaron-siegel): This and __eq__ do not capture the table version. We need to rethink the Column
265
336
  # abstraction (perhaps separating out the version-dependent properties into a different abstraction).
266
- assert self.tbl is not None
267
- return hash((self.tbl.id, self.id))
337
+ assert self.tbl_handle is not None
338
+ return hash((self.tbl_handle.id, self.id))
268
339
 
269
340
  def __eq__(self, other: object) -> bool:
270
341
  if not isinstance(other, Column):
271
342
  return False
272
- assert self.tbl is not None
273
- assert other.tbl is not None
274
- return self.tbl.id == other.tbl.id and self.id == other.id
343
+ assert self.tbl_handle is not None
344
+ assert other.tbl_handle is not None
345
+ return self.tbl_handle.id == other.tbl_handle.id and self.id == other.id
pixeltable/catalog/dir.py CHANGED
@@ -34,8 +34,7 @@ class Dir(SchemaObject):
34
34
  dir = cls(dir_record.id, parent_id, name)
35
35
  return dir
36
36
 
37
- @classmethod
38
- def _display_name(cls) -> str:
37
+ def _display_name(self) -> str:
39
38
  return 'directory'
40
39
 
41
40
  def _path(self) -> str:
@@ -1,12 +1,10 @@
1
1
  from __future__ import annotations
2
2
 
3
- import dataclasses
4
3
  import enum
5
4
  import itertools
6
5
  import logging
7
- from typing import Optional
8
-
9
- from typing_extensions import Self
6
+ from dataclasses import dataclass
7
+ from uuid import UUID
10
8
 
11
9
  import pixeltable.exceptions as excs
12
10
 
@@ -18,45 +16,15 @@ _ROWID_COLUMN_NAME = '_rowid'
18
16
 
19
17
  # Set of symbols that are predefined in the `InsertableTable` class (and are therefore not allowed as column names).
20
18
  # This will be populated lazily to avoid circular imports.
21
- _PREDEF_SYMBOLS: Optional[set[str]] = None
22
-
23
-
24
- @dataclasses.dataclass
25
- class UpdateStatus:
26
- """
27
- Information about updates that resulted from a table operation.
28
- """
29
-
30
- num_rows: int = 0
31
- # TODO: disambiguate what this means: # of slots computed or # of columns computed?
32
- num_computed_values: int = 0
33
- num_excs: int = 0
34
- updated_cols: list[str] = dataclasses.field(default_factory=list)
35
- cols_with_excs: list[str] = dataclasses.field(default_factory=list)
36
-
37
- def __iadd__(self, other: 'UpdateStatus') -> Self:
38
- self.num_rows += other.num_rows
39
- self.num_computed_values += other.num_computed_values
40
- self.num_excs += other.num_excs
41
- self.updated_cols = list(dict.fromkeys(self.updated_cols + other.updated_cols))
42
- self.cols_with_excs = list(dict.fromkeys(self.cols_with_excs + other.cols_with_excs))
43
- return self
44
-
45
- @property
46
- def insert_msg(self) -> str:
47
- """Return a message describing the results of an insert operation."""
48
- if self.num_excs == 0:
49
- cols_with_excs_str = ''
50
- else:
51
- cols_with_excs_str = (
52
- f' across {len(self.cols_with_excs)} column{"" if len(self.cols_with_excs) == 1 else "s"}'
53
- )
54
- cols_with_excs_str += f' ({", ".join(self.cols_with_excs)})'
55
- msg = (
56
- f'Inserted {self.num_rows} row{"" if self.num_rows == 1 else "s"} '
57
- f'with {self.num_excs} error{"" if self.num_excs == 1 else "s"}{cols_with_excs_str}.'
58
- )
59
- return msg
19
+ _PREDEF_SYMBOLS: set[str] | None = None
20
+
21
+
22
+ @dataclass(frozen=True)
23
+ class QColumnId:
24
+ """Qualified column id"""
25
+
26
+ tbl_id: UUID
27
+ col_id: int
60
28
 
61
29
 
62
30
  class MediaValidation(enum.Enum):