pixeltable 0.4.17__py3-none-any.whl → 0.4.19__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (153) hide show
  1. pixeltable/__init__.py +1 -1
  2. pixeltable/_version.py +1 -0
  3. pixeltable/catalog/catalog.py +144 -118
  4. pixeltable/catalog/column.py +104 -115
  5. pixeltable/catalog/globals.py +1 -2
  6. pixeltable/catalog/insertable_table.py +44 -49
  7. pixeltable/catalog/path.py +3 -4
  8. pixeltable/catalog/schema_object.py +4 -4
  9. pixeltable/catalog/table.py +139 -124
  10. pixeltable/catalog/table_metadata.py +6 -6
  11. pixeltable/catalog/table_version.py +315 -246
  12. pixeltable/catalog/table_version_handle.py +4 -4
  13. pixeltable/catalog/table_version_path.py +9 -10
  14. pixeltable/catalog/tbl_ops.py +9 -3
  15. pixeltable/catalog/view.py +34 -28
  16. pixeltable/config.py +14 -10
  17. pixeltable/dataframe.py +69 -78
  18. pixeltable/env.py +78 -64
  19. pixeltable/exec/aggregation_node.py +6 -6
  20. pixeltable/exec/cache_prefetch_node.py +10 -10
  21. pixeltable/exec/data_row_batch.py +3 -3
  22. pixeltable/exec/exec_context.py +16 -4
  23. pixeltable/exec/exec_node.py +5 -5
  24. pixeltable/exec/expr_eval/evaluators.py +6 -6
  25. pixeltable/exec/expr_eval/expr_eval_node.py +8 -7
  26. pixeltable/exec/expr_eval/globals.py +6 -6
  27. pixeltable/exec/expr_eval/row_buffer.py +1 -2
  28. pixeltable/exec/expr_eval/schedulers.py +11 -11
  29. pixeltable/exec/in_memory_data_node.py +2 -2
  30. pixeltable/exec/object_store_save_node.py +14 -17
  31. pixeltable/exec/sql_node.py +28 -27
  32. pixeltable/exprs/arithmetic_expr.py +4 -4
  33. pixeltable/exprs/array_slice.py +2 -2
  34. pixeltable/exprs/column_property_ref.py +3 -3
  35. pixeltable/exprs/column_ref.py +61 -74
  36. pixeltable/exprs/comparison.py +5 -5
  37. pixeltable/exprs/compound_predicate.py +3 -3
  38. pixeltable/exprs/data_row.py +12 -12
  39. pixeltable/exprs/expr.py +41 -31
  40. pixeltable/exprs/expr_dict.py +3 -3
  41. pixeltable/exprs/expr_set.py +3 -3
  42. pixeltable/exprs/function_call.py +14 -14
  43. pixeltable/exprs/in_predicate.py +4 -4
  44. pixeltable/exprs/inline_expr.py +8 -8
  45. pixeltable/exprs/is_null.py +1 -3
  46. pixeltable/exprs/json_mapper.py +8 -8
  47. pixeltable/exprs/json_path.py +6 -6
  48. pixeltable/exprs/literal.py +5 -5
  49. pixeltable/exprs/method_ref.py +2 -2
  50. pixeltable/exprs/object_ref.py +2 -2
  51. pixeltable/exprs/row_builder.py +14 -14
  52. pixeltable/exprs/rowid_ref.py +8 -8
  53. pixeltable/exprs/similarity_expr.py +50 -25
  54. pixeltable/exprs/sql_element_cache.py +4 -4
  55. pixeltable/exprs/string_op.py +2 -2
  56. pixeltable/exprs/type_cast.py +3 -5
  57. pixeltable/func/aggregate_function.py +8 -8
  58. pixeltable/func/callable_function.py +9 -9
  59. pixeltable/func/expr_template_function.py +3 -3
  60. pixeltable/func/function.py +15 -17
  61. pixeltable/func/function_registry.py +6 -7
  62. pixeltable/func/globals.py +2 -3
  63. pixeltable/func/mcp.py +2 -2
  64. pixeltable/func/query_template_function.py +16 -16
  65. pixeltable/func/signature.py +14 -14
  66. pixeltable/func/tools.py +11 -11
  67. pixeltable/func/udf.py +16 -18
  68. pixeltable/functions/__init__.py +1 -0
  69. pixeltable/functions/anthropic.py +7 -7
  70. pixeltable/functions/audio.py +76 -0
  71. pixeltable/functions/bedrock.py +6 -6
  72. pixeltable/functions/deepseek.py +4 -4
  73. pixeltable/functions/fireworks.py +2 -2
  74. pixeltable/functions/gemini.py +6 -6
  75. pixeltable/functions/globals.py +12 -12
  76. pixeltable/functions/groq.py +4 -4
  77. pixeltable/functions/huggingface.py +1033 -6
  78. pixeltable/functions/image.py +7 -10
  79. pixeltable/functions/llama_cpp.py +7 -7
  80. pixeltable/functions/math.py +2 -3
  81. pixeltable/functions/mistralai.py +3 -3
  82. pixeltable/functions/ollama.py +9 -9
  83. pixeltable/functions/openai.py +21 -21
  84. pixeltable/functions/openrouter.py +7 -7
  85. pixeltable/functions/string.py +21 -28
  86. pixeltable/functions/timestamp.py +7 -8
  87. pixeltable/functions/together.py +4 -6
  88. pixeltable/functions/twelvelabs.py +92 -0
  89. pixeltable/functions/video.py +36 -31
  90. pixeltable/functions/vision.py +6 -6
  91. pixeltable/functions/whisper.py +7 -7
  92. pixeltable/functions/whisperx.py +16 -16
  93. pixeltable/globals.py +75 -40
  94. pixeltable/index/base.py +12 -8
  95. pixeltable/index/btree.py +19 -22
  96. pixeltable/index/embedding_index.py +30 -39
  97. pixeltable/io/datarows.py +3 -3
  98. pixeltable/io/external_store.py +13 -16
  99. pixeltable/io/fiftyone.py +5 -5
  100. pixeltable/io/globals.py +5 -5
  101. pixeltable/io/hf_datasets.py +4 -4
  102. pixeltable/io/label_studio.py +12 -12
  103. pixeltable/io/pandas.py +6 -6
  104. pixeltable/io/parquet.py +2 -2
  105. pixeltable/io/table_data_conduit.py +12 -12
  106. pixeltable/io/utils.py +2 -2
  107. pixeltable/iterators/audio.py +2 -2
  108. pixeltable/iterators/document.py +88 -57
  109. pixeltable/iterators/video.py +66 -37
  110. pixeltable/metadata/converters/convert_18.py +2 -2
  111. pixeltable/metadata/converters/convert_19.py +2 -2
  112. pixeltable/metadata/converters/convert_20.py +2 -2
  113. pixeltable/metadata/converters/convert_21.py +2 -2
  114. pixeltable/metadata/converters/convert_22.py +2 -2
  115. pixeltable/metadata/converters/convert_24.py +2 -2
  116. pixeltable/metadata/converters/convert_25.py +2 -2
  117. pixeltable/metadata/converters/convert_26.py +2 -2
  118. pixeltable/metadata/converters/convert_29.py +4 -4
  119. pixeltable/metadata/converters/convert_34.py +2 -2
  120. pixeltable/metadata/converters/convert_36.py +2 -2
  121. pixeltable/metadata/converters/convert_38.py +2 -2
  122. pixeltable/metadata/converters/convert_39.py +1 -2
  123. pixeltable/metadata/converters/util.py +11 -13
  124. pixeltable/metadata/schema.py +22 -21
  125. pixeltable/metadata/utils.py +2 -6
  126. pixeltable/mypy/mypy_plugin.py +5 -5
  127. pixeltable/plan.py +32 -34
  128. pixeltable/share/packager.py +7 -7
  129. pixeltable/share/publish.py +3 -3
  130. pixeltable/store.py +126 -41
  131. pixeltable/type_system.py +43 -46
  132. pixeltable/utils/__init__.py +1 -2
  133. pixeltable/utils/arrow.py +4 -4
  134. pixeltable/utils/av.py +74 -38
  135. pixeltable/utils/azure_store.py +305 -0
  136. pixeltable/utils/code.py +1 -2
  137. pixeltable/utils/dbms.py +15 -19
  138. pixeltable/utils/description_helper.py +2 -3
  139. pixeltable/utils/documents.py +5 -6
  140. pixeltable/utils/exception_handler.py +2 -2
  141. pixeltable/utils/filecache.py +5 -5
  142. pixeltable/utils/formatter.py +4 -6
  143. pixeltable/utils/gcs_store.py +9 -9
  144. pixeltable/utils/local_store.py +17 -17
  145. pixeltable/utils/object_stores.py +59 -43
  146. pixeltable/utils/s3_store.py +35 -30
  147. {pixeltable-0.4.17.dist-info → pixeltable-0.4.19.dist-info}/METADATA +4 -4
  148. pixeltable-0.4.19.dist-info/RECORD +213 -0
  149. pixeltable/__version__.py +0 -3
  150. pixeltable-0.4.17.dist-info/RECORD +0 -211
  151. {pixeltable-0.4.17.dist-info → pixeltable-0.4.19.dist-info}/WHEEL +0 -0
  152. {pixeltable-0.4.17.dist-info → pixeltable-0.4.19.dist-info}/entry_points.txt +0 -0
  153. {pixeltable-0.4.17.dist-info → pixeltable-0.4.19.dist-info}/licenses/LICENSE +0 -0
@@ -3,21 +3,21 @@ from __future__ import annotations
3
3
  import logging
4
4
  import warnings
5
5
  from textwrap import dedent
6
- from typing import TYPE_CHECKING, Any, Optional
6
+ from typing import TYPE_CHECKING, Any
7
7
 
8
8
  import sqlalchemy as sql
9
9
 
10
10
  import pixeltable.exceptions as excs
11
+ import pixeltable.exprs as exprs
11
12
  import pixeltable.type_system as ts
12
- from pixeltable import exprs
13
+ from pixeltable.env import Env
13
14
  from pixeltable.metadata import schema
14
15
 
15
- from .globals import MediaValidation, is_valid_identifier
16
+ from .globals import MediaValidation, QColumnId, is_valid_identifier
16
17
 
17
18
  if TYPE_CHECKING:
18
19
  from .table_version import TableVersion
19
- from .table_version_handle import ColumnHandle
20
- from .table_version_path import TableVersionPath
20
+ from .table_version_handle import ColumnHandle, TableVersionHandle
21
21
 
22
22
  _logger = logging.getLogger('pixeltable')
23
23
 
@@ -48,57 +48,58 @@ class Column:
48
48
  - if None: the system chooses for you (at present, this is always False, but this may change in the future)
49
49
  """
50
50
 
51
- name: Optional[str]
52
- id: Optional[int]
51
+ name: str | None
52
+ id: int | None
53
53
  col_type: ts.ColumnType
54
54
  stored: bool
55
55
  is_pk: bool
56
- destination: Optional[str] # An object store reference for computed files
57
- _media_validation: Optional[MediaValidation] # if not set, TableVersion.media_validation applies
58
- schema_version_add: Optional[int]
59
- schema_version_drop: Optional[int]
60
- _stores_cellmd: Optional[bool]
61
- sa_col: Optional[sql.schema.Column]
62
- sa_col_type: Optional[sql.sqltypes.TypeEngine]
63
- sa_cellmd_col: Optional[sql.schema.Column] # JSON metadata for the cell, e.g. errortype, errormsg for media columns
64
- _value_expr: Optional[exprs.Expr]
65
- value_expr_dict: Optional[dict[str, Any]]
66
- # we store a TableVersion here, not a TableVersionHandle, because this column is owned by that TableVersion instance
67
- # (re-resolving it later to a different instance doesn't make sense)
68
- tbl: Optional[TableVersion]
56
+ is_iterator_col: bool
57
+ _explicit_destination: str | None # An object store reference for computed files
58
+ _media_validation: MediaValidation | None # if not set, TableVersion.media_validation applies
59
+ schema_version_add: int | None
60
+ schema_version_drop: int | None
61
+ stores_cellmd: bool
62
+ sa_col: sql.schema.Column | None
63
+ sa_col_type: sql.types.TypeEngine
64
+ sa_cellmd_col: sql.schema.Column | None # JSON metadata for the cell, e.g. errortype, errormsg for media columns
65
+ _value_expr: exprs.Expr | None
66
+ value_expr_dict: dict[str, Any] | None
67
+ # we store a handle here in order to allow Column construction before there is a corresponding TableVersion
68
+ tbl_handle: 'TableVersionHandle' | None
69
69
 
70
70
  def __init__(
71
71
  self,
72
- name: Optional[str],
73
- col_type: Optional[ts.ColumnType] = None,
74
- computed_with: Optional[exprs.Expr] = None,
72
+ name: str | None,
73
+ col_type: ts.ColumnType | None = None,
74
+ computed_with: exprs.Expr | None = None,
75
75
  is_pk: bool = False,
76
+ is_iterator_col: bool = False,
76
77
  stored: bool = True,
77
- media_validation: Optional[MediaValidation] = None,
78
- col_id: Optional[int] = None,
79
- schema_version_add: Optional[int] = None,
80
- schema_version_drop: Optional[int] = None,
81
- sa_col_type: Optional[sql.sqltypes.TypeEngine] = None,
82
- stores_cellmd: Optional[bool] = None,
83
- value_expr_dict: Optional[dict[str, Any]] = None,
84
- tbl: Optional[TableVersion] = None,
85
- destination: Optional[str] = None,
78
+ media_validation: MediaValidation | None = None,
79
+ col_id: int | None = None,
80
+ schema_version_add: int | None = None,
81
+ schema_version_drop: int | None = None,
82
+ sa_col_type: sql.types.TypeEngine | None = None,
83
+ stores_cellmd: bool | None = None,
84
+ value_expr_dict: dict[str, Any] | None = None,
85
+ tbl_handle: 'TableVersionHandle' | None = None,
86
+ destination: str | None = None,
86
87
  ):
87
88
  if name is not None and not is_valid_identifier(name):
88
- raise excs.Error(f"Invalid column name: '{name}'")
89
+ raise excs.Error(f'Invalid column name: {name}')
89
90
  self.name = name
90
- self.tbl = tbl
91
+ self.tbl_handle = tbl_handle
91
92
  if col_type is None and computed_with is None:
92
- raise excs.Error(f'Column `{name}`: col_type is required if computed_with is not specified')
93
+ raise excs.Error(f'Column {name!r}: `col_type` is required if `computed_with` is not specified')
93
94
 
94
- self._value_expr: Optional[exprs.Expr] = None
95
+ self._value_expr = None
95
96
  self.value_expr_dict = value_expr_dict
96
97
  if computed_with is not None:
97
98
  value_expr = exprs.Expr.from_object(computed_with)
98
99
  if value_expr is None:
99
100
  # TODO: this shouldn't be a user-facing error
100
101
  raise excs.Error(
101
- f'Column {name}: computed_with needs to be a valid Pixeltable expression, '
102
+ f'Column {name!r}: `computed_with` needs to be a valid Pixeltable expression, '
102
103
  f'but it is a {type(computed_with)}'
103
104
  )
104
105
  else:
@@ -115,21 +116,30 @@ class Column:
115
116
  # self.dependent_cols = set() # cols with value_exprs that reference us; set by TableVersion
116
117
  self.id = col_id
117
118
  self.is_pk = is_pk
119
+ self.is_iterator_col = is_iterator_col
118
120
  self._media_validation = media_validation
119
121
  self.schema_version_add = schema_version_add
120
122
  self.schema_version_drop = schema_version_drop
121
123
 
122
- self._stores_cellmd = stores_cellmd
124
+ if stores_cellmd is not None:
125
+ self.stores_cellmd = stores_cellmd
126
+ else:
127
+ self.stores_cellmd = stored and (
128
+ self.is_computed
129
+ or self.col_type.is_media_type()
130
+ or self.col_type.is_json_type()
131
+ or self.col_type.is_array_type()
132
+ )
123
133
 
124
134
  # column in the stored table for the values of this Column
125
135
  self.sa_col = None
126
- self.sa_col_type = sa_col_type
136
+ self.sa_col_type = self.col_type.to_sa_type() if sa_col_type is None else sa_col_type
127
137
 
128
138
  # computed cols also have storage columns for the exception string and type
129
139
  self.sa_cellmd_col = None
130
- self.destination = destination
140
+ self._explicit_destination = destination
131
141
 
132
- def to_md(self, pos: Optional[int] = None) -> tuple[schema.ColumnMd, Optional[schema.SchemaColumn]]:
142
+ def to_md(self, pos: int | None = None) -> tuple[schema.ColumnMd, schema.SchemaColumn | None]:
133
143
  """Returns the Column and optional SchemaColumn metadata for this Column."""
134
144
  assert self.is_pk is not None
135
145
  col_md = schema.ColumnMd(
@@ -140,7 +150,7 @@ class Column:
140
150
  schema_version_drop=self.schema_version_drop,
141
151
  value_expr=self.value_expr.as_dict() if self.value_expr is not None else None,
142
152
  stored=self.stored,
143
- destination=self.destination,
153
+ destination=self._explicit_destination,
144
154
  )
145
155
  if pos is None:
146
156
  return col_md, None
@@ -152,33 +162,6 @@ class Column:
152
162
  )
153
163
  return col_md, sch_md
154
164
 
155
- @classmethod
156
- def from_md(
157
- cls, col_md: schema.ColumnMd, tbl: TableVersion, schema_col_md: Optional[schema.SchemaColumn]
158
- ) -> Column:
159
- """Create a Column from a ColumnMd."""
160
- assert col_md.id is not None
161
- col_name = schema_col_md.name if schema_col_md is not None else None
162
- media_val = (
163
- MediaValidation[schema_col_md.media_validation.upper()]
164
- if schema_col_md is not None and schema_col_md.media_validation is not None
165
- else None
166
- )
167
- col = cls(
168
- col_id=col_md.id,
169
- name=col_name,
170
- col_type=ts.ColumnType.from_dict(col_md.col_type),
171
- is_pk=col_md.is_pk,
172
- stored=col_md.stored,
173
- media_validation=media_val,
174
- schema_version_add=col_md.schema_version_add,
175
- schema_version_drop=col_md.schema_version_drop,
176
- value_expr_dict=col_md.value_expr,
177
- tbl=tbl,
178
- destination=col_md.destination,
179
- )
180
- return col
181
-
182
165
  def init_value_expr(self) -> None:
183
166
  from pixeltable import exprs
184
167
 
@@ -190,7 +173,7 @@ class Column:
190
173
  message = (
191
174
  dedent(
192
175
  f"""
193
- The computed column {self.name!r} in table {self.tbl.name!r} is no longer valid.
176
+ The computed column {self.name!r} in table {self.get_tbl().name!r} is no longer valid.
194
177
  {{validation_error}}
195
178
  You can continue to query existing data from this column, but evaluating it on new data will raise an error.
196
179
  """ # noqa: E501
@@ -200,17 +183,46 @@ class Column:
200
183
  )
201
184
  warnings.warn(message, category=excs.PixeltableWarning, stacklevel=2)
202
185
 
186
+ def get_tbl(self) -> TableVersion:
187
+ tv = self.tbl_handle.get()
188
+ return tv
189
+
190
+ @property
191
+ def destination(self) -> str | None:
192
+ if self._explicit_destination is not None:
193
+ # An expilicit destination was set as part of the column definition
194
+ return self._explicit_destination
195
+
196
+ # Otherwise, if this is a stored media column, use the default destination if one is configured (input
197
+ # destination or output destination, depending on whether this is a computed column)
198
+ # TODO: The `self.name is not None` clause is necessary because index columns currently follow the type of
199
+ # the underlying media column. We should move to using pxt.String as the col_type of index columns; this
200
+ # would be a more robust solution, and then `self.name is not None` could be removed.
201
+ if self.is_stored and self.col_type.is_media_type() and self.name is not None:
202
+ if self.is_computed:
203
+ return Env.get().default_output_media_dest
204
+ else:
205
+ return Env.get().default_input_media_dest
206
+
207
+ return None
208
+
203
209
  @property
204
210
  def handle(self) -> 'ColumnHandle':
205
211
  """Returns a ColumnHandle for this Column."""
206
212
  from .table_version_handle import ColumnHandle
207
213
 
208
- assert self.tbl is not None
214
+ assert self.tbl_handle is not None
215
+ assert self.id is not None
216
+ return ColumnHandle(self.tbl_handle, self.id)
217
+
218
+ @property
219
+ def qid(self) -> QColumnId:
220
+ assert self.tbl_handle is not None
209
221
  assert self.id is not None
210
- return ColumnHandle(self.tbl.handle, self.id)
222
+ return QColumnId(self.tbl_handle.id, self.id)
211
223
 
212
224
  @property
213
- def value_expr(self) -> Optional[exprs.Expr]:
225
+ def value_expr(self) -> exprs.Expr | None:
214
226
  assert self.value_expr_dict is None or self._value_expr is not None
215
227
  return self._value_expr
216
228
 
@@ -220,29 +232,22 @@ class Column:
220
232
 
221
233
  def check_value_expr(self) -> None:
222
234
  assert self._value_expr is not None
223
- if self.stored == False and self.is_computed and self.has_window_fn_call():
235
+ if not self.stored and self.is_computed and self.has_window_fn_call():
224
236
  raise excs.Error(
225
- f'Column {self.name}: stored={self.stored} not supported for columns computed with window functions:'
226
- f'\n{self.value_expr}'
237
+ f'Column {self.name!r}: `stored={self.stored}` not supported for columns '
238
+ f'computed with window functions:\n{self.value_expr}'
227
239
  )
228
240
 
229
241
  def has_window_fn_call(self) -> bool:
230
- if self.value_expr is None:
231
- return False
232
242
  from pixeltable import exprs
233
243
 
244
+ if self.value_expr is None:
245
+ return False
234
246
  window_fn_calls = list(
235
247
  self.value_expr.subexprs(filter=lambda e: isinstance(e, exprs.FunctionCall) and e.is_window_fn_call)
236
248
  )
237
249
  return len(window_fn_calls) > 0
238
250
 
239
- # TODO: This should be moved out of `Column` (its presence in `Column` doesn't anticipate indices being defined on
240
- # multiple dependents)
241
- def get_idx_info(self, reference_tbl: Optional['TableVersionPath'] = None) -> dict[str, 'TableVersion.IndexInfo']:
242
- assert self.tbl is not None
243
- tbl = reference_tbl.tbl_version.get() if reference_tbl is not None else self.tbl
244
- return {name: info for name, info in tbl.idxs_by_name.items() if info.col == self}
245
-
246
251
  @property
247
252
  def is_computed(self) -> bool:
248
253
  return self._value_expr is not None or self.value_expr_dict is not None
@@ -253,30 +258,17 @@ class Column:
253
258
  assert self.stored is not None
254
259
  return self.stored
255
260
 
256
- @property
257
- def stores_cellmd(self) -> bool:
258
- """True if this column also stores error information."""
259
- # default: record errors for computed and media columns
260
- if self._stores_cellmd is not None:
261
- return self._stores_cellmd
262
- return self.is_stored and (
263
- self.is_computed
264
- or self.col_type.is_media_type()
265
- or self.col_type.is_json_type()
266
- or self.col_type.is_array_type()
267
- )
268
-
269
261
  @property
270
262
  def qualified_name(self) -> str:
271
- assert self.tbl is not None
272
- return f'{self.tbl.name}.{self.name}'
263
+ assert self.get_tbl() is not None
264
+ return f'{self.get_tbl().name}.{self.name}'
273
265
 
274
266
  @property
275
267
  def media_validation(self) -> MediaValidation:
276
268
  if self._media_validation is not None:
277
269
  return self._media_validation
278
- assert self.tbl is not None
279
- return self.tbl.media_validation
270
+ assert self.get_tbl() is not None
271
+ return self.get_tbl().media_validation
280
272
 
281
273
  @property
282
274
  def is_required_for_insert(self) -> bool:
@@ -295,24 +287,21 @@ class Column:
295
287
 
296
288
  def create_sa_cols(self) -> None:
297
289
  """
298
- These need to be recreated for every new table schema version.
290
+ These need to be recreated for every sql.Table instance
299
291
  """
300
292
  assert self.is_stored
293
+ assert self.stores_cellmd is not None
301
294
  # all storage columns are nullable (we deal with null errors in Pixeltable directly)
302
- self.sa_col = sql.Column(self.store_name(), self.get_sa_col_type(), nullable=True)
295
+ self.sa_col = sql.Column(self.store_name(), self.sa_col_type, nullable=True)
303
296
  if self.stores_cellmd:
304
- # JSON metadata for the cell, e.g. errortype, errormsg for media columns
305
297
  self.sa_cellmd_col = sql.Column(self.cellmd_store_name(), self.sa_cellmd_type(), nullable=True)
306
298
 
307
- def get_sa_col_type(self) -> sql.sqltypes.TypeEngine:
308
- return self.col_type.to_sa_type() if self.sa_col_type is None else self.sa_col_type
309
-
310
299
  @classmethod
311
300
  def cellmd_type(cls) -> ts.ColumnType:
312
301
  return ts.JsonType(nullable=True)
313
302
 
314
303
  @classmethod
315
- def sa_cellmd_type(cls) -> sql.sqltypes.TypeEngine:
304
+ def sa_cellmd_type(cls) -> sql.types.TypeEngine:
316
305
  return cls.cellmd_type().to_sa_type()
317
306
 
318
307
  def store_name(self) -> str:
@@ -327,17 +316,17 @@ class Column:
327
316
  return f'{self.name}: {self.col_type}'
328
317
 
329
318
  def __repr__(self) -> str:
330
- return f'Column({self.id!r}, {self.name!r}, tbl={self.tbl.name!r})'
319
+ return f'Column({self.id!r}, {self.name!r}, tbl={self.get_tbl().name!r})'
331
320
 
332
321
  def __hash__(self) -> int:
333
322
  # TODO(aaron-siegel): This and __eq__ do not capture the table version. We need to rethink the Column
334
323
  # abstraction (perhaps separating out the version-dependent properties into a different abstraction).
335
- assert self.tbl is not None
336
- return hash((self.tbl.id, self.id))
324
+ assert self.tbl_handle is not None
325
+ return hash((self.tbl_handle.id, self.id))
337
326
 
338
327
  def __eq__(self, other: object) -> bool:
339
328
  if not isinstance(other, Column):
340
329
  return False
341
- assert self.tbl is not None
342
- assert other.tbl is not None
343
- return self.tbl.id == other.tbl.id and self.id == other.id
330
+ assert self.tbl_handle is not None
331
+ assert other.tbl_handle is not None
332
+ return self.tbl_handle.id == other.tbl_handle.id and self.id == other.id
@@ -4,7 +4,6 @@ import enum
4
4
  import itertools
5
5
  import logging
6
6
  from dataclasses import dataclass
7
- from typing import Optional
8
7
  from uuid import UUID
9
8
 
10
9
  import pixeltable.exceptions as excs
@@ -17,7 +16,7 @@ _ROWID_COLUMN_NAME = '_rowid'
17
16
 
18
17
  # Set of symbols that are predefined in the `InsertableTable` class (and are therefore not allowed as column names).
19
18
  # This will be populated lazily to avoid circular imports.
20
- _PREDEF_SYMBOLS: Optional[set[str]] = None
19
+ _PREDEF_SYMBOLS: set[str] | None = None
21
20
 
22
21
 
23
22
  @dataclass(frozen=True)
@@ -2,7 +2,7 @@ from __future__ import annotations
2
2
 
3
3
  import enum
4
4
  import logging
5
- from typing import TYPE_CHECKING, Any, Literal, Optional, Sequence, cast, overload
5
+ from typing import TYPE_CHECKING, Any, Literal, Sequence, cast, overload
6
6
  from uuid import UUID
7
7
 
8
8
  import pydantic
@@ -16,9 +16,10 @@ from pixeltable.utils.pydantic import is_json_convertible
16
16
 
17
17
  from .globals import MediaValidation
18
18
  from .table import Table
19
- from .table_version import TableVersion
19
+ from .table_version import TableVersion, TableVersionMd
20
20
  from .table_version_handle import TableVersionHandle
21
21
  from .table_version_path import TableVersionPath
22
+ from .tbl_ops import CreateStoreTableOp, TableOp
22
23
  from .update_status import UpdateStatus
23
24
 
24
25
  if TYPE_CHECKING:
@@ -65,15 +66,14 @@ class InsertableTable(Table):
65
66
  @classmethod
66
67
  def _create(
67
68
  cls,
68
- dir_id: UUID,
69
69
  name: str,
70
70
  schema: dict[str, ts.ColumnType],
71
- df: Optional[pxt.DataFrame],
72
71
  primary_key: list[str],
73
72
  num_retained_versions: int,
74
73
  comment: str,
75
74
  media_validation: MediaValidation,
76
- ) -> InsertableTable:
75
+ create_default_idxs: bool,
76
+ ) -> tuple[TableVersionMd, list[TableOp]]:
77
77
  columns = cls._create_columns(schema)
78
78
  cls._verify_schema(columns)
79
79
  column_names = [col.name for col in columns]
@@ -85,38 +85,35 @@ class InsertableTable(Table):
85
85
  raise excs.Error(f'Primary key column {pk_col!r} cannot be nullable.')
86
86
  col.is_pk = True
87
87
 
88
- _, tbl_version = TableVersion.create(
89
- dir_id,
88
+ md = TableVersion.create_initial_md(
90
89
  name,
91
90
  columns,
92
- num_retained_versions=num_retained_versions,
93
- comment=comment,
94
- media_validation=media_validation,
91
+ num_retained_versions,
92
+ comment,
93
+ media_validation,
94
+ create_default_idxs=create_default_idxs,
95
+ view_md=None,
95
96
  )
96
- tbl = cls(dir_id, TableVersionHandle.create(tbl_version))
97
- # TODO We need to commit before doing the insertion, in order to avoid a primary key (version) collision
98
- # when the table metadata gets updated. Once we have a notion of user-defined transactions in
99
- # Pixeltable, we can wrap the create/insert in a transaction to avoid this.
100
- session = Env.get().session
101
- session.commit()
102
- if df is not None:
103
- # A DataFrame was provided, so insert its contents into the table
104
- # (using the same DB session as the table creation)
105
- tbl_version.insert(None, df, fail_on_exception=True)
106
- session.commit()
107
-
108
- _logger.info(f'Created table {name!r}, id={tbl_version.id}')
109
- Env.get().console_logger.info(f'Created table {name!r}.')
110
- return tbl
97
+
98
+ ops = [
99
+ TableOp(
100
+ tbl_id=md.tbl_md.tbl_id,
101
+ op_sn=0,
102
+ num_ops=1,
103
+ needs_xact=False,
104
+ create_store_table_op=CreateStoreTableOp(),
105
+ )
106
+ ]
107
+ return md, ops
111
108
 
112
109
  @overload
113
110
  def insert(
114
111
  self,
115
- source: Optional[TableDataSource] = None,
112
+ source: TableDataSource | None = None,
116
113
  /,
117
114
  *,
118
- source_format: Optional[Literal['csv', 'excel', 'parquet', 'json']] = None,
119
- schema_overrides: Optional[dict[str, ts.ColumnType]] = None,
115
+ source_format: Literal['csv', 'excel', 'parquet', 'json'] | None = None,
116
+ schema_overrides: dict[str, ts.ColumnType] | None = None,
120
117
  on_error: Literal['abort', 'ignore'] = 'abort',
121
118
  print_stats: bool = False,
122
119
  **kwargs: Any,
@@ -129,11 +126,11 @@ class InsertableTable(Table):
129
126
 
130
127
  def insert(
131
128
  self,
132
- source: Optional[TableDataSource] = None,
129
+ source: TableDataSource | None = None,
133
130
  /,
134
131
  *,
135
- source_format: Optional[Literal['csv', 'excel', 'parquet', 'json']] = None,
136
- schema_overrides: Optional[dict[str, ts.ColumnType]] = None,
132
+ source_format: Literal['csv', 'excel', 'parquet', 'json'] | None = None,
133
+ schema_overrides: dict[str, ts.ColumnType] | None = None,
137
134
  on_error: Literal['abort', 'ignore'] = 'abort',
138
135
  print_stats: bool = False,
139
136
  **kwargs: Any,
@@ -142,7 +139,7 @@ class InsertableTable(Table):
142
139
  from pixeltable.io.table_data_conduit import UnkTableDataConduit
143
140
 
144
141
  if source is not None and isinstance(source, Sequence) and len(source) == 0:
145
- raise excs.Error('Cannot insert an empty sequence')
142
+ raise excs.Error('Cannot insert an empty sequence.')
146
143
  fail_on_exception = OnErrorParameter.fail_on_exception(on_error)
147
144
 
148
145
  with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=True):
@@ -214,7 +211,7 @@ class InsertableTable(Table):
214
211
  try:
215
212
  pxt_rows.append(row.model_dump(mode='json'))
216
213
  except pydantic_core.PydanticSerializationError as e:
217
- raise excs.Error(f'Row {i}: error serializing pydantic model to JSON:\n{e!s}') from e
214
+ raise excs.Error(f'Row {i}: error serializing pydantic model to JSON:\n{e}') from e
218
215
 
219
216
  # explicitly check that all required columns are present and non-None in the rows,
220
217
  # because we ignore nullability when validating the pydantic model
@@ -222,7 +219,7 @@ class InsertableTable(Table):
222
219
  for i, pxt_row in enumerate(pxt_rows):
223
220
  if type(rows[i]) is not model_class:
224
221
  raise excs.Error(
225
- f'Expected {model_class.__name__!r} instance, got {type(rows[i]).__name__!r} (in row {i})'
222
+ f'Expected an instance of `{model_class.__name__}`; got `{type(rows[i]).__name__}` (in row {i})'
226
223
  )
227
224
  for col_name in reqd_col_names:
228
225
  if pxt_row.get(col_name) is None:
@@ -253,22 +250,20 @@ class InsertableTable(Table):
253
250
  missing_required = required_cols - model_field_names
254
251
  if missing_required:
255
252
  raise excs.Error(
256
- f'Pydantic model {model.__name__!r} is missing required columns: '
257
- f'{", ".join(f"{col_name!r}" for col_name in missing_required)}'
253
+ f'Pydantic model `{model.__name__}` is missing required columns: ' + ', '.join(missing_required)
258
254
  )
259
255
 
260
256
  computed_in_model = computed_cols & model_field_names
261
257
  if computed_in_model:
262
258
  raise excs.Error(
263
- f'Pydantic model {model.__name__!r} has fields for computed columns: '
264
- f'{", ".join(f"{col_name!r}" for col_name in computed_in_model)}'
259
+ f'Pydantic model `{model.__name__}` has fields for computed columns: ' + ', '.join(computed_in_model)
265
260
  )
266
261
 
267
262
  # validate type compatibility
268
263
  common_fields = model_field_names & set(schema.keys())
269
264
  if len(common_fields) == 0:
270
265
  raise excs.Error(
271
- f'Pydantic model {model.__name__!r} has no fields that map to columns in table {self._name!r}'
266
+ f'Pydantic model `{model.__name__}` has no fields that map to columns in table {self._name!r}'
272
267
  )
273
268
  for field_name in common_fields:
274
269
  pxt_col_type = schema[field_name]
@@ -281,21 +276,21 @@ class InsertableTable(Table):
281
276
  inferred_pxt_type = ts.ColumnType.from_python_type(model_type, infer_pydantic_json=True)
282
277
  if inferred_pxt_type is None:
283
278
  raise excs.Error(
284
- f'Pydantic model {model.__name__!r}: cannot infer Pixeltable type for column {field_name!r}'
279
+ f'Pydantic model `{model.__name__}`: cannot infer Pixeltable type for column {field_name!r}'
285
280
  )
286
281
 
287
282
  if pxt_col_type.is_media_type():
288
283
  # media types require file paths, either as str or Path
289
284
  if not inferred_pxt_type.is_string_type():
290
285
  raise excs.Error(
291
- f"Column {field_name!r} requires a 'str' or 'Path' field in {model.__name__!r}, but it is "
292
- f'{model_type.__name__!r}'
286
+ f'Column {field_name!r} requires a `str` or `Path` field in `{model.__name__}`, but it is '
287
+ f'`{model_type.__name__}`'
293
288
  )
294
289
  else:
295
290
  if not pxt_col_type.is_supertype_of(inferred_pxt_type, ignore_nullable=True):
296
291
  raise excs.Error(
297
- f'Pydantic model {model.__name__!r} has incompatible type ({model_type.__name__}) '
298
- f'for column {field_name!r} ({pxt_col_type})'
292
+ f'Pydantic model `{model.__name__}` has incompatible type `{model_type.__name__}` '
293
+ f'for column {field_name!r} (of Pixeltable type `{pxt_col_type}`)'
299
294
  )
300
295
 
301
296
  if (
@@ -304,11 +299,11 @@ class InsertableTable(Table):
304
299
  and not is_json_convertible(model_type)
305
300
  ):
306
301
  raise excs.Error(
307
- f'Pydantic model {model.__name__!r} has field {field_name!r} with nested model '
308
- f'{model_type.__name__!r}, which is not JSON-convertible'
302
+ f'Pydantic model `{model.__name__}` has field {field_name!r} with nested model '
303
+ f'`{model_type.__name__}`, which is not JSON-convertible'
309
304
  )
310
305
 
311
- def delete(self, where: Optional['exprs.Expr'] = None) -> UpdateStatus:
306
+ def delete(self, where: 'exprs.Expr' | None = None) -> UpdateStatus:
312
307
  """Delete rows in this table.
313
308
 
314
309
  Args:
@@ -328,11 +323,11 @@ class InsertableTable(Table):
328
323
  with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=True):
329
324
  return self._tbl_version.get().delete(where=where)
330
325
 
331
- def _get_base_table(self) -> Optional['Table']:
326
+ def _get_base_table(self) -> 'Table' | None:
332
327
  return None
333
328
 
334
329
  @property
335
- def _effective_base_versions(self) -> list[Optional[int]]:
330
+ def _effective_base_versions(self) -> list[int | None]:
336
331
  return []
337
332
 
338
333
  def _table_descriptor(self) -> str:
@@ -1,7 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import logging
4
- from typing import Optional
5
4
 
6
5
  from pixeltable import exceptions as excs
7
6
 
@@ -12,9 +11,9 @@ _logger = logging.getLogger('pixeltable')
12
11
 
13
12
  class Path:
14
13
  components: list[str]
15
- version: Optional[int]
14
+ version: int | None
16
15
 
17
- def __init__(self, components: list[str], version: Optional[int] = None) -> None:
16
+ def __init__(self, components: list[str], version: int | None = None) -> None:
18
17
  assert len(components) > 0
19
18
  self.components = components
20
19
  self.version = version
@@ -28,7 +27,7 @@ class Path:
28
27
  allow_versioned_path: bool = False,
29
28
  ) -> Path:
30
29
  components: list[str]
31
- version: Optional[int]
30
+ version: int | None
32
31
  if ':' in path:
33
32
  parts = path.split(':')
34
33
  if len(parts) != 2:
@@ -1,5 +1,5 @@
1
1
  from abc import abstractmethod
2
- from typing import TYPE_CHECKING, Optional
2
+ from typing import TYPE_CHECKING
3
3
  from uuid import UUID
4
4
 
5
5
  if TYPE_CHECKING:
@@ -14,16 +14,16 @@ class SchemaObject:
14
14
 
15
15
  _id: UUID
16
16
  _name: str
17
- _dir_id: Optional[UUID]
17
+ _dir_id: UUID | None
18
18
 
19
- def __init__(self, obj_id: UUID, name: str, dir_id: Optional[UUID]):
19
+ def __init__(self, obj_id: UUID, name: str, dir_id: UUID | None):
20
20
  # make these private so they don't collide with column names (id and name are fairly common)
21
21
  assert dir_id is None or isinstance(dir_id, UUID), type(dir_id)
22
22
  self._id = obj_id
23
23
  self._name = name
24
24
  self._dir_id = dir_id
25
25
 
26
- def _parent(self) -> Optional['catalog.Dir']:
26
+ def _parent(self) -> 'catalog.Dir | None':
27
27
  """Returns the parent directory of this schema object."""
28
28
  from .catalog import Catalog
29
29