pixeltable 0.3.14__py3-none-any.whl → 0.5.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (220) hide show
  1. pixeltable/__init__.py +42 -8
  2. pixeltable/{dataframe.py → _query.py} +470 -206
  3. pixeltable/_version.py +1 -0
  4. pixeltable/catalog/__init__.py +5 -4
  5. pixeltable/catalog/catalog.py +1785 -432
  6. pixeltable/catalog/column.py +190 -113
  7. pixeltable/catalog/dir.py +2 -4
  8. pixeltable/catalog/globals.py +19 -46
  9. pixeltable/catalog/insertable_table.py +191 -98
  10. pixeltable/catalog/path.py +63 -23
  11. pixeltable/catalog/schema_object.py +11 -15
  12. pixeltable/catalog/table.py +843 -436
  13. pixeltable/catalog/table_metadata.py +103 -0
  14. pixeltable/catalog/table_version.py +978 -657
  15. pixeltable/catalog/table_version_handle.py +72 -16
  16. pixeltable/catalog/table_version_path.py +112 -43
  17. pixeltable/catalog/tbl_ops.py +53 -0
  18. pixeltable/catalog/update_status.py +191 -0
  19. pixeltable/catalog/view.py +134 -90
  20. pixeltable/config.py +134 -22
  21. pixeltable/env.py +471 -157
  22. pixeltable/exceptions.py +6 -0
  23. pixeltable/exec/__init__.py +4 -1
  24. pixeltable/exec/aggregation_node.py +7 -8
  25. pixeltable/exec/cache_prefetch_node.py +83 -110
  26. pixeltable/exec/cell_materialization_node.py +268 -0
  27. pixeltable/exec/cell_reconstruction_node.py +168 -0
  28. pixeltable/exec/component_iteration_node.py +4 -3
  29. pixeltable/exec/data_row_batch.py +8 -65
  30. pixeltable/exec/exec_context.py +16 -4
  31. pixeltable/exec/exec_node.py +13 -36
  32. pixeltable/exec/expr_eval/evaluators.py +11 -7
  33. pixeltable/exec/expr_eval/expr_eval_node.py +27 -12
  34. pixeltable/exec/expr_eval/globals.py +8 -5
  35. pixeltable/exec/expr_eval/row_buffer.py +1 -2
  36. pixeltable/exec/expr_eval/schedulers.py +106 -56
  37. pixeltable/exec/globals.py +35 -0
  38. pixeltable/exec/in_memory_data_node.py +19 -19
  39. pixeltable/exec/object_store_save_node.py +293 -0
  40. pixeltable/exec/row_update_node.py +16 -9
  41. pixeltable/exec/sql_node.py +351 -84
  42. pixeltable/exprs/__init__.py +1 -1
  43. pixeltable/exprs/arithmetic_expr.py +27 -22
  44. pixeltable/exprs/array_slice.py +3 -3
  45. pixeltable/exprs/column_property_ref.py +36 -23
  46. pixeltable/exprs/column_ref.py +213 -89
  47. pixeltable/exprs/comparison.py +5 -5
  48. pixeltable/exprs/compound_predicate.py +5 -4
  49. pixeltable/exprs/data_row.py +164 -54
  50. pixeltable/exprs/expr.py +70 -44
  51. pixeltable/exprs/expr_dict.py +3 -3
  52. pixeltable/exprs/expr_set.py +17 -10
  53. pixeltable/exprs/function_call.py +100 -40
  54. pixeltable/exprs/globals.py +2 -2
  55. pixeltable/exprs/in_predicate.py +4 -4
  56. pixeltable/exprs/inline_expr.py +18 -32
  57. pixeltable/exprs/is_null.py +7 -3
  58. pixeltable/exprs/json_mapper.py +8 -8
  59. pixeltable/exprs/json_path.py +56 -22
  60. pixeltable/exprs/literal.py +27 -5
  61. pixeltable/exprs/method_ref.py +2 -2
  62. pixeltable/exprs/object_ref.py +2 -2
  63. pixeltable/exprs/row_builder.py +167 -67
  64. pixeltable/exprs/rowid_ref.py +25 -10
  65. pixeltable/exprs/similarity_expr.py +58 -40
  66. pixeltable/exprs/sql_element_cache.py +4 -4
  67. pixeltable/exprs/string_op.py +5 -5
  68. pixeltable/exprs/type_cast.py +3 -5
  69. pixeltable/func/__init__.py +1 -0
  70. pixeltable/func/aggregate_function.py +8 -8
  71. pixeltable/func/callable_function.py +9 -9
  72. pixeltable/func/expr_template_function.py +17 -11
  73. pixeltable/func/function.py +18 -20
  74. pixeltable/func/function_registry.py +6 -7
  75. pixeltable/func/globals.py +2 -3
  76. pixeltable/func/mcp.py +74 -0
  77. pixeltable/func/query_template_function.py +29 -27
  78. pixeltable/func/signature.py +46 -19
  79. pixeltable/func/tools.py +31 -13
  80. pixeltable/func/udf.py +18 -20
  81. pixeltable/functions/__init__.py +16 -0
  82. pixeltable/functions/anthropic.py +123 -77
  83. pixeltable/functions/audio.py +147 -10
  84. pixeltable/functions/bedrock.py +13 -6
  85. pixeltable/functions/date.py +7 -4
  86. pixeltable/functions/deepseek.py +35 -43
  87. pixeltable/functions/document.py +81 -0
  88. pixeltable/functions/fal.py +76 -0
  89. pixeltable/functions/fireworks.py +11 -20
  90. pixeltable/functions/gemini.py +195 -39
  91. pixeltable/functions/globals.py +142 -14
  92. pixeltable/functions/groq.py +108 -0
  93. pixeltable/functions/huggingface.py +1056 -24
  94. pixeltable/functions/image.py +115 -57
  95. pixeltable/functions/json.py +1 -1
  96. pixeltable/functions/llama_cpp.py +28 -13
  97. pixeltable/functions/math.py +67 -5
  98. pixeltable/functions/mistralai.py +18 -55
  99. pixeltable/functions/net.py +70 -0
  100. pixeltable/functions/ollama.py +20 -13
  101. pixeltable/functions/openai.py +240 -226
  102. pixeltable/functions/openrouter.py +143 -0
  103. pixeltable/functions/replicate.py +4 -4
  104. pixeltable/functions/reve.py +250 -0
  105. pixeltable/functions/string.py +239 -69
  106. pixeltable/functions/timestamp.py +16 -16
  107. pixeltable/functions/together.py +24 -84
  108. pixeltable/functions/twelvelabs.py +188 -0
  109. pixeltable/functions/util.py +6 -1
  110. pixeltable/functions/uuid.py +30 -0
  111. pixeltable/functions/video.py +1515 -107
  112. pixeltable/functions/vision.py +8 -8
  113. pixeltable/functions/voyageai.py +289 -0
  114. pixeltable/functions/whisper.py +16 -8
  115. pixeltable/functions/whisperx.py +179 -0
  116. pixeltable/{ext/functions → functions}/yolox.py +2 -4
  117. pixeltable/globals.py +362 -115
  118. pixeltable/index/base.py +17 -21
  119. pixeltable/index/btree.py +28 -22
  120. pixeltable/index/embedding_index.py +100 -118
  121. pixeltable/io/__init__.py +4 -2
  122. pixeltable/io/datarows.py +8 -7
  123. pixeltable/io/external_store.py +56 -105
  124. pixeltable/io/fiftyone.py +13 -13
  125. pixeltable/io/globals.py +31 -30
  126. pixeltable/io/hf_datasets.py +61 -16
  127. pixeltable/io/label_studio.py +74 -70
  128. pixeltable/io/lancedb.py +3 -0
  129. pixeltable/io/pandas.py +21 -12
  130. pixeltable/io/parquet.py +25 -105
  131. pixeltable/io/table_data_conduit.py +250 -123
  132. pixeltable/io/utils.py +4 -4
  133. pixeltable/iterators/__init__.py +2 -1
  134. pixeltable/iterators/audio.py +26 -25
  135. pixeltable/iterators/base.py +9 -3
  136. pixeltable/iterators/document.py +112 -78
  137. pixeltable/iterators/image.py +12 -15
  138. pixeltable/iterators/string.py +11 -4
  139. pixeltable/iterators/video.py +523 -120
  140. pixeltable/metadata/__init__.py +14 -3
  141. pixeltable/metadata/converters/convert_13.py +2 -2
  142. pixeltable/metadata/converters/convert_18.py +2 -2
  143. pixeltable/metadata/converters/convert_19.py +2 -2
  144. pixeltable/metadata/converters/convert_20.py +2 -2
  145. pixeltable/metadata/converters/convert_21.py +2 -2
  146. pixeltable/metadata/converters/convert_22.py +2 -2
  147. pixeltable/metadata/converters/convert_24.py +2 -2
  148. pixeltable/metadata/converters/convert_25.py +2 -2
  149. pixeltable/metadata/converters/convert_26.py +2 -2
  150. pixeltable/metadata/converters/convert_29.py +4 -4
  151. pixeltable/metadata/converters/convert_30.py +34 -21
  152. pixeltable/metadata/converters/convert_34.py +2 -2
  153. pixeltable/metadata/converters/convert_35.py +9 -0
  154. pixeltable/metadata/converters/convert_36.py +38 -0
  155. pixeltable/metadata/converters/convert_37.py +15 -0
  156. pixeltable/metadata/converters/convert_38.py +39 -0
  157. pixeltable/metadata/converters/convert_39.py +124 -0
  158. pixeltable/metadata/converters/convert_40.py +73 -0
  159. pixeltable/metadata/converters/convert_41.py +12 -0
  160. pixeltable/metadata/converters/convert_42.py +9 -0
  161. pixeltable/metadata/converters/convert_43.py +44 -0
  162. pixeltable/metadata/converters/util.py +20 -31
  163. pixeltable/metadata/notes.py +9 -0
  164. pixeltable/metadata/schema.py +140 -53
  165. pixeltable/metadata/utils.py +74 -0
  166. pixeltable/mypy/__init__.py +3 -0
  167. pixeltable/mypy/mypy_plugin.py +123 -0
  168. pixeltable/plan.py +382 -115
  169. pixeltable/share/__init__.py +1 -1
  170. pixeltable/share/packager.py +547 -83
  171. pixeltable/share/protocol/__init__.py +33 -0
  172. pixeltable/share/protocol/common.py +165 -0
  173. pixeltable/share/protocol/operation_types.py +33 -0
  174. pixeltable/share/protocol/replica.py +119 -0
  175. pixeltable/share/publish.py +257 -59
  176. pixeltable/store.py +311 -194
  177. pixeltable/type_system.py +373 -211
  178. pixeltable/utils/__init__.py +2 -3
  179. pixeltable/utils/arrow.py +131 -17
  180. pixeltable/utils/av.py +298 -0
  181. pixeltable/utils/azure_store.py +346 -0
  182. pixeltable/utils/coco.py +6 -6
  183. pixeltable/utils/code.py +3 -3
  184. pixeltable/utils/console_output.py +4 -1
  185. pixeltable/utils/coroutine.py +6 -23
  186. pixeltable/utils/dbms.py +32 -6
  187. pixeltable/utils/description_helper.py +4 -5
  188. pixeltable/utils/documents.py +7 -18
  189. pixeltable/utils/exception_handler.py +7 -30
  190. pixeltable/utils/filecache.py +6 -6
  191. pixeltable/utils/formatter.py +86 -48
  192. pixeltable/utils/gcs_store.py +295 -0
  193. pixeltable/utils/http.py +133 -0
  194. pixeltable/utils/http_server.py +2 -3
  195. pixeltable/utils/iceberg.py +1 -2
  196. pixeltable/utils/image.py +17 -0
  197. pixeltable/utils/lancedb.py +90 -0
  198. pixeltable/utils/local_store.py +322 -0
  199. pixeltable/utils/misc.py +5 -0
  200. pixeltable/utils/object_stores.py +573 -0
  201. pixeltable/utils/pydantic.py +60 -0
  202. pixeltable/utils/pytorch.py +5 -6
  203. pixeltable/utils/s3_store.py +527 -0
  204. pixeltable/utils/sql.py +26 -0
  205. pixeltable/utils/system.py +30 -0
  206. pixeltable-0.5.7.dist-info/METADATA +579 -0
  207. pixeltable-0.5.7.dist-info/RECORD +227 -0
  208. {pixeltable-0.3.14.dist-info → pixeltable-0.5.7.dist-info}/WHEEL +1 -1
  209. pixeltable-0.5.7.dist-info/entry_points.txt +2 -0
  210. pixeltable/__version__.py +0 -3
  211. pixeltable/catalog/named_function.py +0 -40
  212. pixeltable/ext/__init__.py +0 -17
  213. pixeltable/ext/functions/__init__.py +0 -11
  214. pixeltable/ext/functions/whisperx.py +0 -77
  215. pixeltable/utils/media_store.py +0 -77
  216. pixeltable/utils/s3.py +0 -17
  217. pixeltable-0.3.14.dist-info/METADATA +0 -434
  218. pixeltable-0.3.14.dist-info/RECORD +0 -186
  219. pixeltable-0.3.14.dist-info/entry_points.txt +0 -3
  220. {pixeltable-0.3.14.dist-info → pixeltable-0.5.7.dist-info/licenses}/LICENSE +0 -0
@@ -3,142 +3,184 @@ from __future__ import annotations
3
3
  import logging
4
4
  import warnings
5
5
  from textwrap import dedent
6
- from typing import TYPE_CHECKING, Any, Optional
6
+ from typing import TYPE_CHECKING, Any
7
7
 
8
+ import pgvector.sqlalchemy # type: ignore[import-untyped]
8
9
  import sqlalchemy as sql
9
10
 
10
11
  import pixeltable.exceptions as excs
12
+ import pixeltable.exprs as exprs
11
13
  import pixeltable.type_system as ts
12
- from pixeltable import exprs
14
+ from pixeltable.env import Env
15
+ from pixeltable.metadata import schema
13
16
 
14
- from .globals import MediaValidation, is_valid_identifier
17
+ from .globals import MediaValidation, QColumnId, is_valid_identifier
15
18
 
16
19
  if TYPE_CHECKING:
17
20
  from .table_version import TableVersion
18
- from .table_version_handle import TableVersionHandle
21
+ from .table_version_handle import ColumnHandle, TableVersionHandle
19
22
  from .table_version_path import TableVersionPath
20
23
 
21
24
  _logger = logging.getLogger('pixeltable')
22
25
 
23
26
 
24
27
  class Column:
25
- """Representation of a column in the schema of a Table/DataFrame.
28
+ """Representation of a column in the schema of a Table/Query.
26
29
 
27
30
  A Column contains all the metadata necessary for executing queries and updates against a particular version of a
28
31
  table/view.
32
+
33
+ Args:
34
+ name: column name; None for system columns (eg, index columns)
35
+ col_type: column type; can be None if the type can be derived from ``computed_with``
36
+ computed_with: an Expr that computes the column value
37
+ is_pk: if True, this column is part of the primary key
38
+ stored: determines whether a computed column is present in the stored table or recomputed on demand
39
+ destination: An object store reference for persisting computed files
40
+ col_id: column ID (only used internally)
41
+
42
+ Computed columns: those have a non-None ``computed_with`` argument
43
+ - when constructed by the user: ``computed_with`` was constructed explicitly and is passed in;
44
+ col_type is None
45
+ - when loaded from md store: ``computed_with`` is set and col_type is set
46
+
47
+ ``stored`` (only valid for computed columns):
48
+ - if True: the column is present in the stored table
49
+ - if False: the column is not present in the stored table and recomputed during a query
50
+ - if None: the system chooses for you (at present, this is always False, but this may change in the future)
29
51
  """
30
52
 
31
- name: str
32
- id: Optional[int]
53
+ name: str | None
54
+ id: int | None
33
55
  col_type: ts.ColumnType
34
56
  stored: bool
35
57
  is_pk: bool
36
- _media_validation: Optional[MediaValidation] # if not set, TableVersion.media_validation applies
37
- schema_version_add: Optional[int]
38
- schema_version_drop: Optional[int]
39
- _records_errors: Optional[bool]
40
- sa_col: Optional[sql.schema.Column]
41
- sa_col_type: Optional[sql.sqltypes.TypeEngine]
42
- sa_errormsg_col: Optional[sql.schema.Column]
43
- sa_errortype_col: Optional[sql.schema.Column]
44
- _value_expr: Optional[exprs.Expr]
45
- value_expr_dict: Optional[dict[str, Any]]
46
- dependent_cols: set[Column]
47
- tbl: Optional[TableVersionHandle]
58
+ is_iterator_col: bool
59
+ _explicit_destination: str | None # An object store reference for computed files
60
+ _media_validation: MediaValidation | None # if not set, TableVersion.media_validation applies
61
+ schema_version_add: int | None
62
+ schema_version_drop: int | None
63
+ stores_cellmd: bool
64
+ sa_col: sql.schema.Column | None
65
+ sa_col_type: sql.types.TypeEngine
66
+ sa_cellmd_col: sql.schema.Column | None # JSON metadata for the cell, e.g. errortype, errormsg for media columns
67
+ _value_expr: exprs.Expr | None
68
+ value_expr_dict: dict[str, Any] | None
69
+ # we store a handle here in order to allow Column construction before there is a corresponding TableVersion
70
+ tbl_handle: 'TableVersionHandle' | None
48
71
 
49
72
  def __init__(
50
73
  self,
51
- name: Optional[str],
52
- col_type: Optional[ts.ColumnType] = None,
53
- computed_with: Optional[exprs.Expr] = None,
74
+ name: str | None,
75
+ col_type: ts.ColumnType | None = None,
76
+ computed_with: exprs.Expr | None = None,
54
77
  is_pk: bool = False,
78
+ is_iterator_col: bool = False,
55
79
  stored: bool = True,
56
- media_validation: Optional[MediaValidation] = None,
57
- col_id: Optional[int] = None,
58
- schema_version_add: Optional[int] = None,
59
- schema_version_drop: Optional[int] = None,
60
- sa_col_type: Optional[sql.sqltypes.TypeEngine] = None,
61
- records_errors: Optional[bool] = None,
62
- value_expr_dict: Optional[dict[str, Any]] = None,
80
+ media_validation: MediaValidation | None = None,
81
+ col_id: int | None = None,
82
+ schema_version_add: int | None = None,
83
+ schema_version_drop: int | None = None,
84
+ sa_col_type: sql.types.TypeEngine | None = None,
85
+ stores_cellmd: bool | None = None,
86
+ value_expr_dict: dict[str, Any] | None = None,
87
+ tbl_handle: 'TableVersionHandle' | None = None,
88
+ destination: str | None = None,
63
89
  ):
64
- """Column constructor.
65
-
66
- Args:
67
- name: column name; None for system columns (eg, index columns)
68
- col_type: column type; can be None if the type can be derived from ``computed_with``
69
- computed_with: an Expr that computes the column value
70
- is_pk: if True, this column is part of the primary key
71
- stored: determines whether a computed column is present in the stored table or recomputed on demand
72
- col_id: column ID (only used internally)
73
-
74
- Computed columns: those have a non-None ``computed_with`` argument
75
- - when constructed by the user: ``computed_with`` was constructed explicitly and is passed in;
76
- col_type is None
77
- - when loaded from md store: ``computed_with`` is set and col_type is set
78
-
79
- ``stored`` (only valid for computed columns):
80
- - if True: the column is present in the stored table
81
- - if False: the column is not present in the stored table and recomputed during a query
82
- - if None: the system chooses for you (at present, this is always False, but this may change in the future)
83
- """
84
90
  if name is not None and not is_valid_identifier(name):
85
- raise excs.Error(f"Invalid column name: '{name}'")
91
+ raise excs.Error(f'Invalid column name: {name}')
86
92
  self.name = name
93
+ self.tbl_handle = tbl_handle
87
94
  if col_type is None and computed_with is None:
88
- raise excs.Error(f'Column `{name}`: col_type is required if computed_with is not specified')
95
+ raise excs.Error(f'Column {name!r}: `col_type` is required if `computed_with` is not specified')
89
96
 
90
- self._value_expr: Optional[exprs.Expr] = None
97
+ self._value_expr = None
91
98
  self.value_expr_dict = value_expr_dict
92
99
  if computed_with is not None:
93
100
  value_expr = exprs.Expr.from_object(computed_with)
94
101
  if value_expr is None:
102
+ # TODO: this shouldn't be a user-facing error
95
103
  raise excs.Error(
96
- f'Column {name}: computed_with needs to be a valid Pixeltable expression, '
104
+ f'Column {name!r}: `computed_with` needs to be a valid Pixeltable expression, '
97
105
  f'but it is a {type(computed_with)}'
98
106
  )
99
107
  else:
100
108
  self._value_expr = value_expr.copy()
101
109
  self.col_type = self._value_expr.col_type
110
+ if self._value_expr is not None and self.value_expr_dict is None:
111
+ self.value_expr_dict = self._value_expr.as_dict()
102
112
 
103
113
  if col_type is not None:
104
114
  self.col_type = col_type
105
115
  assert self.col_type is not None
106
116
 
107
117
  self.stored = stored
108
- self.dependent_cols = set() # cols with value_exprs that reference us; set by TableVersion
118
+ # self.dependent_cols = set() # cols with value_exprs that reference us; set by TableVersion
109
119
  self.id = col_id
110
120
  self.is_pk = is_pk
121
+ self.is_iterator_col = is_iterator_col
111
122
  self._media_validation = media_validation
112
123
  self.schema_version_add = schema_version_add
113
124
  self.schema_version_drop = schema_version_drop
114
125
 
115
- self._records_errors = records_errors
126
+ if stores_cellmd is not None:
127
+ self.stores_cellmd = stores_cellmd
128
+ else:
129
+ self.stores_cellmd = stored and (
130
+ self.is_computed or self.col_type.is_media_type() or self.col_type.supports_file_offloading()
131
+ )
116
132
 
117
133
  # column in the stored table for the values of this Column
118
134
  self.sa_col = None
119
- self.sa_col_type = sa_col_type
135
+ self.sa_col_type = self.col_type.to_sa_type() if sa_col_type is None else sa_col_type
120
136
 
121
137
  # computed cols also have storage columns for the exception string and type
122
- self.sa_errormsg_col = None
123
- self.sa_errortype_col = None
138
+ self.sa_cellmd_col = None
139
+ self._explicit_destination = destination
140
+
141
+ def to_md(self, pos: int | None = None) -> tuple[schema.ColumnMd, schema.SchemaColumn | None]:
142
+ """Returns the Column and optional SchemaColumn metadata for this Column."""
143
+ assert self.is_pk is not None
144
+ col_md = schema.ColumnMd(
145
+ id=self.id,
146
+ col_type=self.col_type.as_dict(),
147
+ is_pk=self.is_pk,
148
+ schema_version_add=self.schema_version_add,
149
+ schema_version_drop=self.schema_version_drop,
150
+ value_expr=self.value_expr.as_dict() if self.value_expr is not None else None,
151
+ stored=self.stored,
152
+ destination=self._explicit_destination,
153
+ )
154
+ if pos is None:
155
+ return col_md, None
156
+ assert self.name is not None, 'Column name must be set for user-facing columns'
157
+ sch_md = schema.SchemaColumn(
158
+ name=self.name,
159
+ pos=pos,
160
+ media_validation=self._media_validation.name.lower() if self._media_validation is not None else None,
161
+ )
162
+ return col_md, sch_md
124
163
 
125
- self.tbl = None # set by owning TableVersion
164
+ def init_value_expr(self, tvp: 'TableVersionPath' | None) -> None:
165
+ """
166
+ Initialize the value_expr from its dict representation, if necessary.
126
167
 
127
- @property
128
- def value_expr(self) -> Optional[exprs.Expr]:
129
- """Instantiate value_expr on-demand"""
130
- # TODO: instantiate expr in the c'tor and add an Expr.prepare() that can create additional state after the
131
- # catalog has been fully loaded; that way, we encounter bugs in the serialization/deserialization logic earlier
132
- if self.value_expr_dict is not None and self._value_expr is None:
133
- from pixeltable import exprs
168
+ If `tvp` is not None, retarget the value_expr to the given TableVersionPath.
169
+ """
170
+ from pixeltable import exprs
134
171
 
172
+ if self._value_expr is None and self.value_expr_dict is None:
173
+ return
174
+
175
+ if self._value_expr is None:
176
+ # Instantiate the Expr from its dict
135
177
  self._value_expr = exprs.Expr.from_dict(self.value_expr_dict)
136
178
  self._value_expr.bind_rel_paths()
137
179
  if not self._value_expr.is_valid:
138
180
  message = (
139
181
  dedent(
140
182
  f"""
141
- The computed column {self.name!r} in table {self.tbl.get().name!r} is no longer valid.
183
+ The computed column {self.name!r} in table {self.get_tbl().name!r} is no longer valid.
142
184
  {{validation_error}}
143
185
  You can continue to query existing data from this column, but evaluating it on new data will raise an error.
144
186
  """ # noqa: E501
@@ -147,36 +189,81 @@ class Column:
147
189
  .format(validation_error=self._value_expr.validation_error)
148
190
  )
149
191
  warnings.warn(message, category=excs.PixeltableWarning, stacklevel=2)
192
+
193
+ if tvp is not None:
194
+ # Retarget the Expr
195
+ self._value_expr = self._value_expr.retarget(tvp)
196
+
197
+ def get_tbl(self) -> TableVersion:
198
+ tv = self.tbl_handle.get()
199
+ return tv
200
+
201
+ @property
202
+ def destination(self) -> str | None:
203
+ if self._explicit_destination is not None:
204
+ # An expilicit destination was set as part of the column definition
205
+ return self._explicit_destination
206
+
207
+ # Otherwise, if this is a stored media column, use the default destination if one is configured (input
208
+ # destination or output destination, depending on whether this is a computed column)
209
+ # TODO: The `self.name is not None` clause is necessary because index columns currently follow the type of
210
+ # the underlying media column. We should move to using pxt.String as the col_type of index columns; this
211
+ # would be a more robust solution, and then `self.name is not None` could be removed.
212
+ if self.is_stored and self.col_type.is_media_type() and self.name is not None:
213
+ if self.is_computed:
214
+ return Env.get().default_output_media_dest
215
+ else:
216
+ return Env.get().default_input_media_dest
217
+
218
+ return None
219
+
220
+ @property
221
+ def handle(self) -> 'ColumnHandle':
222
+ """Returns a ColumnHandle for this Column."""
223
+ from .table_version_handle import ColumnHandle
224
+
225
+ assert self.tbl_handle is not None
226
+ assert self.id is not None
227
+ return ColumnHandle(self.tbl_handle, self.id)
228
+
229
+ @property
230
+ def qid(self) -> QColumnId:
231
+ assert self.tbl_handle is not None
232
+ assert self.id is not None
233
+ return QColumnId(self.tbl_handle.id, self.id)
234
+
235
+ @property
236
+ def value_expr(self) -> exprs.Expr | None:
237
+ assert self.value_expr_dict is None or self._value_expr is not None
150
238
  return self._value_expr
151
239
 
152
240
  def set_value_expr(self, value_expr: exprs.Expr) -> None:
153
241
  self._value_expr = value_expr
154
- self.value_expr_dict = None
242
+ self.value_expr_dict = self._value_expr.as_dict()
155
243
 
156
244
  def check_value_expr(self) -> None:
157
245
  assert self._value_expr is not None
158
- if self.stored == False and self.is_computed and self.has_window_fn_call():
246
+ if not self.stored and self.is_computed and self.has_window_fn_call():
159
247
  raise excs.Error(
160
- f'Column {self.name}: stored={self.stored} not supported for columns computed with window functions:'
161
- f'\n{self.value_expr}'
248
+ f'Column {self.name!r}: `stored={self.stored}` not supported for columns '
249
+ f'computed with window functions:\n{self.value_expr}'
162
250
  )
163
251
 
164
252
  def has_window_fn_call(self) -> bool:
165
- if self.value_expr is None:
166
- return False
167
253
  from pixeltable import exprs
168
254
 
255
+ if self.value_expr is None:
256
+ return False
169
257
  window_fn_calls = list(
170
258
  self.value_expr.subexprs(filter=lambda e: isinstance(e, exprs.FunctionCall) and e.is_window_fn_call)
171
259
  )
172
260
  return len(window_fn_calls) > 0
173
261
 
174
- # TODO: This should be moved out of `Column` (its presence in `Column` doesn't anticipate indices being defined on
175
- # multiple dependents)
176
- def get_idx_info(self, reference_tbl: Optional['TableVersionPath'] = None) -> dict[str, 'TableVersion.IndexInfo']:
177
- assert self.tbl is not None
178
- tbl = reference_tbl.tbl_version if reference_tbl is not None else self.tbl
179
- return {name: info for name, info in tbl.get().idxs_by_name.items() if info.col == self}
262
+ def stores_external_array(self) -> bool:
263
+ """Returns True if this is an Array column that might store its values externally."""
264
+ assert self.sa_col_type is not None
265
+ # Vector: if this is a vector column (ie, used for a vector index), it stores the array itself
266
+ return self.col_type.is_array_type() and not isinstance(self.sa_col_type, pgvector.sqlalchemy.Vector)
180
267
 
181
268
  @property
182
269
  def is_computed(self) -> bool:
@@ -188,25 +275,17 @@ class Column:
188
275
  assert self.stored is not None
189
276
  return self.stored
190
277
 
191
- @property
192
- def records_errors(self) -> bool:
193
- """True if this column also stores error information."""
194
- # default: record errors for computed and media columns
195
- if self._records_errors is not None:
196
- return self._records_errors
197
- return self.is_stored and (self.is_computed or self.col_type.is_media_type())
198
-
199
278
  @property
200
279
  def qualified_name(self) -> str:
201
- assert self.tbl is not None
202
- return f'{self.tbl.get().name}.{self.name}'
280
+ assert self.get_tbl() is not None
281
+ return f'{self.get_tbl().name}.{self.name}'
203
282
 
204
283
  @property
205
284
  def media_validation(self) -> MediaValidation:
206
285
  if self._media_validation is not None:
207
286
  return self._media_validation
208
- assert self.tbl is not None
209
- return self.tbl.get().media_validation
287
+ assert self.get_tbl() is not None
288
+ return self.get_tbl().media_validation
210
289
 
211
290
  @property
212
291
  def is_required_for_insert(self) -> bool:
@@ -225,48 +304,46 @@ class Column:
225
304
 
226
305
  def create_sa_cols(self) -> None:
227
306
  """
228
- These need to be recreated for every new table schema version.
307
+ These need to be recreated for every sql.Table instance
229
308
  """
230
309
  assert self.is_stored
310
+ assert self.stores_cellmd is not None
231
311
  # all storage columns are nullable (we deal with null errors in Pixeltable directly)
232
- self.sa_col = sql.Column(
233
- self.store_name(),
234
- self.col_type.to_sa_type() if self.sa_col_type is None else self.sa_col_type,
235
- nullable=True,
236
- )
237
- if self.is_computed or self.col_type.is_media_type():
238
- self.sa_errormsg_col = sql.Column(self.errormsg_store_name(), ts.StringType().to_sa_type(), nullable=True)
239
- self.sa_errortype_col = sql.Column(self.errortype_store_name(), ts.StringType().to_sa_type(), nullable=True)
312
+ self.sa_col = sql.Column(self.store_name(), self.sa_col_type, nullable=True)
313
+ if self.stores_cellmd:
314
+ self.sa_cellmd_col = sql.Column(self.cellmd_store_name(), self.sa_cellmd_type(), nullable=True)
240
315
 
241
- def get_sa_col_type(self) -> sql.sqltypes.TypeEngine:
242
- return self.col_type.to_sa_type() if self.sa_col_type is None else self.sa_col_type
316
+ @classmethod
317
+ def cellmd_type(cls) -> ts.ColumnType:
318
+ return ts.JsonType(nullable=True)
319
+
320
+ @classmethod
321
+ def sa_cellmd_type(cls) -> sql.types.TypeEngine:
322
+ return cls.cellmd_type().to_sa_type()
243
323
 
244
324
  def store_name(self) -> str:
245
325
  assert self.id is not None
246
326
  assert self.is_stored
247
327
  return f'col_{self.id}'
248
328
 
249
- def errormsg_store_name(self) -> str:
250
- return f'{self.store_name()}_errormsg'
251
-
252
- def errortype_store_name(self) -> str:
253
- return f'{self.store_name()}_errortype'
329
+ def cellmd_store_name(self) -> str:
330
+ return f'{self.store_name()}_cellmd'
254
331
 
255
332
  def __str__(self) -> str:
256
333
  return f'{self.name}: {self.col_type}'
257
334
 
258
335
  def __repr__(self) -> str:
259
- return f'Column({self.id!r}, {self.name!r}, tbl={self.tbl.get().name!r})'
336
+ return f'Column({self.id!r}, {self.name!r}, tbl={self.get_tbl().name!r})'
260
337
 
261
338
  def __hash__(self) -> int:
262
339
  # TODO(aaron-siegel): This and __eq__ do not capture the table version. We need to rethink the Column
263
340
  # abstraction (perhaps separating out the version-dependent properties into a different abstraction).
264
- assert self.tbl is not None
265
- return hash((self.tbl.id, self.id))
341
+ assert self.tbl_handle is not None
342
+ return hash((self.tbl_handle.id, self.id))
266
343
 
267
344
  def __eq__(self, other: object) -> bool:
268
345
  if not isinstance(other, Column):
269
346
  return False
270
- assert self.tbl is not None
271
- assert other.tbl is not None
272
- return self.tbl.id == other.tbl.id and self.id == other.id
347
+ assert self.tbl_handle is not None
348
+ assert other.tbl_handle is not None
349
+ return self.tbl_handle.id == other.tbl_handle.id and self.id == other.id
pixeltable/catalog/dir.py CHANGED
@@ -34,17 +34,15 @@ class Dir(SchemaObject):
34
34
  dir = cls(dir_record.id, parent_id, name)
35
35
  return dir
36
36
 
37
- @classmethod
38
- def _display_name(cls) -> str:
37
+ def _display_name(self) -> str:
39
38
  return 'directory'
40
39
 
41
- @property
42
40
  def _path(self) -> str:
43
41
  """Returns the path to this schema object."""
44
42
  if self._dir_id is None:
45
43
  # we're the root dir
46
44
  return ''
47
- return super()._path
45
+ return super()._path()
48
46
 
49
47
  def _move(self, new_name: str, new_dir_id: UUID) -> None:
50
48
  # print(
@@ -1,12 +1,10 @@
1
1
  from __future__ import annotations
2
2
 
3
- import dataclasses
4
3
  import enum
5
4
  import itertools
6
5
  import logging
7
- from typing import Optional
8
-
9
- from typing_extensions import Self
6
+ from dataclasses import dataclass
7
+ from uuid import UUID
10
8
 
11
9
  import pixeltable.exceptions as excs
12
10
 
@@ -18,45 +16,15 @@ _ROWID_COLUMN_NAME = '_rowid'
18
16
 
19
17
  # Set of symbols that are predefined in the `InsertableTable` class (and are therefore not allowed as column names).
20
18
  # This will be populated lazily to avoid circular imports.
21
- _PREDEF_SYMBOLS: Optional[set[str]] = None
22
-
23
-
24
- @dataclasses.dataclass
25
- class UpdateStatus:
26
- """
27
- Information about updates that resulted from a table operation.
28
- """
29
-
30
- num_rows: int = 0
31
- # TODO: disambiguate what this means: # of slots computed or # of columns computed?
32
- num_computed_values: int = 0
33
- num_excs: int = 0
34
- updated_cols: list[str] = dataclasses.field(default_factory=list)
35
- cols_with_excs: list[str] = dataclasses.field(default_factory=list)
36
-
37
- def __iadd__(self, other: 'UpdateStatus') -> Self:
38
- self.num_rows += other.num_rows
39
- self.num_computed_values += other.num_computed_values
40
- self.num_excs += other.num_excs
41
- self.updated_cols = list(dict.fromkeys(self.updated_cols + other.updated_cols))
42
- self.cols_with_excs = list(dict.fromkeys(self.cols_with_excs + other.cols_with_excs))
43
- return self
44
-
45
- @property
46
- def insert_msg(self) -> str:
47
- """Return a message describing the results of an insert operation."""
48
- if self.num_excs == 0:
49
- cols_with_excs_str = ''
50
- else:
51
- cols_with_excs_str = (
52
- f' across {len(self.cols_with_excs)} column{"" if len(self.cols_with_excs) == 1 else "s"}'
53
- )
54
- cols_with_excs_str += f' ({", ".join(self.cols_with_excs)})'
55
- msg = (
56
- f'Inserted {self.num_rows} row{"" if self.num_rows == 1 else "s"} '
57
- f'with {self.num_excs} error{"" if self.num_excs == 1 else "s"}{cols_with_excs_str}.'
58
- )
59
- return msg
19
+ _PREDEF_SYMBOLS: set[str] | None = None
20
+
21
+
22
+ @dataclass(frozen=True)
23
+ class QColumnId:
24
+ """Qualified column id"""
25
+
26
+ tbl_id: UUID
27
+ col_id: int
60
28
 
61
29
 
62
30
  class MediaValidation(enum.Enum):
@@ -100,14 +68,19 @@ class IfNotExistsParam(enum.Enum):
100
68
  raise excs.Error(f'{param_name} must be one of: [{val_strs}]') from None
101
69
 
102
70
 
103
- def is_valid_identifier(name: str, allow_system_identifiers: bool = False) -> bool:
104
- return name.isidentifier() and (allow_system_identifiers or not name.startswith('_'))
71
+ def is_valid_identifier(name: str, *, allow_system_identifiers: bool = False, allow_hyphens: bool = False) -> bool:
72
+ # If allow_hyphens=True, we allow hyphens to appear in the name, but we still do not permit a name to start with
73
+ # one (even if allow_system_identifiers=True)
74
+ adj_name = name.replace('-', '_') if allow_hyphens else name
75
+ return (
76
+ adj_name.isidentifier() and not name.startswith('-') and (allow_system_identifiers or not name.startswith('_'))
77
+ )
105
78
 
106
79
 
107
80
  def is_valid_path(path: str, empty_is_valid: bool, allow_system_paths: bool = False) -> bool:
108
81
  if path == '':
109
82
  return empty_is_valid
110
- return all(is_valid_identifier(part, allow_system_paths) for part in path.split('.'))
83
+ return all(is_valid_identifier(part, allow_system_identifiers=allow_system_paths) for part in path.split('.'))
111
84
 
112
85
 
113
86
  def is_system_column_name(name: str) -> bool: