pixeltable 0.2.26__py3-none-any.whl → 0.5.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (245) hide show
  1. pixeltable/__init__.py +83 -19
  2. pixeltable/_query.py +1444 -0
  3. pixeltable/_version.py +1 -0
  4. pixeltable/catalog/__init__.py +7 -4
  5. pixeltable/catalog/catalog.py +2394 -119
  6. pixeltable/catalog/column.py +225 -104
  7. pixeltable/catalog/dir.py +38 -9
  8. pixeltable/catalog/globals.py +53 -34
  9. pixeltable/catalog/insertable_table.py +265 -115
  10. pixeltable/catalog/path.py +80 -17
  11. pixeltable/catalog/schema_object.py +28 -43
  12. pixeltable/catalog/table.py +1270 -677
  13. pixeltable/catalog/table_metadata.py +103 -0
  14. pixeltable/catalog/table_version.py +1270 -751
  15. pixeltable/catalog/table_version_handle.py +109 -0
  16. pixeltable/catalog/table_version_path.py +137 -42
  17. pixeltable/catalog/tbl_ops.py +53 -0
  18. pixeltable/catalog/update_status.py +191 -0
  19. pixeltable/catalog/view.py +251 -134
  20. pixeltable/config.py +215 -0
  21. pixeltable/env.py +736 -285
  22. pixeltable/exceptions.py +26 -2
  23. pixeltable/exec/__init__.py +7 -2
  24. pixeltable/exec/aggregation_node.py +39 -21
  25. pixeltable/exec/cache_prefetch_node.py +87 -109
  26. pixeltable/exec/cell_materialization_node.py +268 -0
  27. pixeltable/exec/cell_reconstruction_node.py +168 -0
  28. pixeltable/exec/component_iteration_node.py +25 -28
  29. pixeltable/exec/data_row_batch.py +11 -46
  30. pixeltable/exec/exec_context.py +26 -11
  31. pixeltable/exec/exec_node.py +35 -27
  32. pixeltable/exec/expr_eval/__init__.py +3 -0
  33. pixeltable/exec/expr_eval/evaluators.py +365 -0
  34. pixeltable/exec/expr_eval/expr_eval_node.py +413 -0
  35. pixeltable/exec/expr_eval/globals.py +200 -0
  36. pixeltable/exec/expr_eval/row_buffer.py +74 -0
  37. pixeltable/exec/expr_eval/schedulers.py +413 -0
  38. pixeltable/exec/globals.py +35 -0
  39. pixeltable/exec/in_memory_data_node.py +35 -27
  40. pixeltable/exec/object_store_save_node.py +293 -0
  41. pixeltable/exec/row_update_node.py +44 -29
  42. pixeltable/exec/sql_node.py +414 -115
  43. pixeltable/exprs/__init__.py +8 -5
  44. pixeltable/exprs/arithmetic_expr.py +79 -45
  45. pixeltable/exprs/array_slice.py +5 -5
  46. pixeltable/exprs/column_property_ref.py +40 -26
  47. pixeltable/exprs/column_ref.py +254 -61
  48. pixeltable/exprs/comparison.py +14 -9
  49. pixeltable/exprs/compound_predicate.py +9 -10
  50. pixeltable/exprs/data_row.py +213 -72
  51. pixeltable/exprs/expr.py +270 -104
  52. pixeltable/exprs/expr_dict.py +6 -5
  53. pixeltable/exprs/expr_set.py +20 -11
  54. pixeltable/exprs/function_call.py +383 -284
  55. pixeltable/exprs/globals.py +18 -5
  56. pixeltable/exprs/in_predicate.py +7 -7
  57. pixeltable/exprs/inline_expr.py +37 -37
  58. pixeltable/exprs/is_null.py +8 -4
  59. pixeltable/exprs/json_mapper.py +120 -54
  60. pixeltable/exprs/json_path.py +90 -60
  61. pixeltable/exprs/literal.py +61 -16
  62. pixeltable/exprs/method_ref.py +7 -6
  63. pixeltable/exprs/object_ref.py +19 -8
  64. pixeltable/exprs/row_builder.py +238 -75
  65. pixeltable/exprs/rowid_ref.py +53 -15
  66. pixeltable/exprs/similarity_expr.py +65 -50
  67. pixeltable/exprs/sql_element_cache.py +5 -5
  68. pixeltable/exprs/string_op.py +107 -0
  69. pixeltable/exprs/type_cast.py +25 -13
  70. pixeltable/exprs/variable.py +2 -2
  71. pixeltable/func/__init__.py +9 -5
  72. pixeltable/func/aggregate_function.py +197 -92
  73. pixeltable/func/callable_function.py +119 -35
  74. pixeltable/func/expr_template_function.py +101 -48
  75. pixeltable/func/function.py +375 -62
  76. pixeltable/func/function_registry.py +20 -19
  77. pixeltable/func/globals.py +6 -5
  78. pixeltable/func/mcp.py +74 -0
  79. pixeltable/func/query_template_function.py +151 -35
  80. pixeltable/func/signature.py +178 -49
  81. pixeltable/func/tools.py +164 -0
  82. pixeltable/func/udf.py +176 -53
  83. pixeltable/functions/__init__.py +44 -4
  84. pixeltable/functions/anthropic.py +226 -47
  85. pixeltable/functions/audio.py +148 -11
  86. pixeltable/functions/bedrock.py +137 -0
  87. pixeltable/functions/date.py +188 -0
  88. pixeltable/functions/deepseek.py +113 -0
  89. pixeltable/functions/document.py +81 -0
  90. pixeltable/functions/fal.py +76 -0
  91. pixeltable/functions/fireworks.py +72 -20
  92. pixeltable/functions/gemini.py +249 -0
  93. pixeltable/functions/globals.py +208 -53
  94. pixeltable/functions/groq.py +108 -0
  95. pixeltable/functions/huggingface.py +1088 -95
  96. pixeltable/functions/image.py +155 -84
  97. pixeltable/functions/json.py +8 -11
  98. pixeltable/functions/llama_cpp.py +31 -19
  99. pixeltable/functions/math.py +169 -0
  100. pixeltable/functions/mistralai.py +50 -75
  101. pixeltable/functions/net.py +70 -0
  102. pixeltable/functions/ollama.py +29 -36
  103. pixeltable/functions/openai.py +548 -160
  104. pixeltable/functions/openrouter.py +143 -0
  105. pixeltable/functions/replicate.py +15 -14
  106. pixeltable/functions/reve.py +250 -0
  107. pixeltable/functions/string.py +310 -85
  108. pixeltable/functions/timestamp.py +37 -19
  109. pixeltable/functions/together.py +77 -120
  110. pixeltable/functions/twelvelabs.py +188 -0
  111. pixeltable/functions/util.py +7 -2
  112. pixeltable/functions/uuid.py +30 -0
  113. pixeltable/functions/video.py +1528 -117
  114. pixeltable/functions/vision.py +26 -26
  115. pixeltable/functions/voyageai.py +289 -0
  116. pixeltable/functions/whisper.py +19 -10
  117. pixeltable/functions/whisperx.py +179 -0
  118. pixeltable/functions/yolox.py +112 -0
  119. pixeltable/globals.py +716 -236
  120. pixeltable/index/__init__.py +3 -1
  121. pixeltable/index/base.py +17 -21
  122. pixeltable/index/btree.py +32 -22
  123. pixeltable/index/embedding_index.py +155 -92
  124. pixeltable/io/__init__.py +12 -7
  125. pixeltable/io/datarows.py +140 -0
  126. pixeltable/io/external_store.py +83 -125
  127. pixeltable/io/fiftyone.py +24 -33
  128. pixeltable/io/globals.py +47 -182
  129. pixeltable/io/hf_datasets.py +96 -127
  130. pixeltable/io/label_studio.py +171 -156
  131. pixeltable/io/lancedb.py +3 -0
  132. pixeltable/io/pandas.py +136 -115
  133. pixeltable/io/parquet.py +40 -153
  134. pixeltable/io/table_data_conduit.py +702 -0
  135. pixeltable/io/utils.py +100 -0
  136. pixeltable/iterators/__init__.py +8 -4
  137. pixeltable/iterators/audio.py +207 -0
  138. pixeltable/iterators/base.py +9 -3
  139. pixeltable/iterators/document.py +144 -87
  140. pixeltable/iterators/image.py +17 -38
  141. pixeltable/iterators/string.py +15 -12
  142. pixeltable/iterators/video.py +523 -127
  143. pixeltable/metadata/__init__.py +33 -8
  144. pixeltable/metadata/converters/convert_10.py +2 -3
  145. pixeltable/metadata/converters/convert_13.py +2 -2
  146. pixeltable/metadata/converters/convert_15.py +15 -11
  147. pixeltable/metadata/converters/convert_16.py +4 -5
  148. pixeltable/metadata/converters/convert_17.py +4 -5
  149. pixeltable/metadata/converters/convert_18.py +4 -6
  150. pixeltable/metadata/converters/convert_19.py +6 -9
  151. pixeltable/metadata/converters/convert_20.py +3 -6
  152. pixeltable/metadata/converters/convert_21.py +6 -8
  153. pixeltable/metadata/converters/convert_22.py +3 -2
  154. pixeltable/metadata/converters/convert_23.py +33 -0
  155. pixeltable/metadata/converters/convert_24.py +55 -0
  156. pixeltable/metadata/converters/convert_25.py +19 -0
  157. pixeltable/metadata/converters/convert_26.py +23 -0
  158. pixeltable/metadata/converters/convert_27.py +29 -0
  159. pixeltable/metadata/converters/convert_28.py +13 -0
  160. pixeltable/metadata/converters/convert_29.py +110 -0
  161. pixeltable/metadata/converters/convert_30.py +63 -0
  162. pixeltable/metadata/converters/convert_31.py +11 -0
  163. pixeltable/metadata/converters/convert_32.py +15 -0
  164. pixeltable/metadata/converters/convert_33.py +17 -0
  165. pixeltable/metadata/converters/convert_34.py +21 -0
  166. pixeltable/metadata/converters/convert_35.py +9 -0
  167. pixeltable/metadata/converters/convert_36.py +38 -0
  168. pixeltable/metadata/converters/convert_37.py +15 -0
  169. pixeltable/metadata/converters/convert_38.py +39 -0
  170. pixeltable/metadata/converters/convert_39.py +124 -0
  171. pixeltable/metadata/converters/convert_40.py +73 -0
  172. pixeltable/metadata/converters/convert_41.py +12 -0
  173. pixeltable/metadata/converters/convert_42.py +9 -0
  174. pixeltable/metadata/converters/convert_43.py +44 -0
  175. pixeltable/metadata/converters/util.py +44 -18
  176. pixeltable/metadata/notes.py +21 -0
  177. pixeltable/metadata/schema.py +185 -42
  178. pixeltable/metadata/utils.py +74 -0
  179. pixeltable/mypy/__init__.py +3 -0
  180. pixeltable/mypy/mypy_plugin.py +123 -0
  181. pixeltable/plan.py +616 -225
  182. pixeltable/share/__init__.py +3 -0
  183. pixeltable/share/packager.py +797 -0
  184. pixeltable/share/protocol/__init__.py +33 -0
  185. pixeltable/share/protocol/common.py +165 -0
  186. pixeltable/share/protocol/operation_types.py +33 -0
  187. pixeltable/share/protocol/replica.py +119 -0
  188. pixeltable/share/publish.py +349 -0
  189. pixeltable/store.py +398 -232
  190. pixeltable/type_system.py +730 -267
  191. pixeltable/utils/__init__.py +40 -0
  192. pixeltable/utils/arrow.py +201 -29
  193. pixeltable/utils/av.py +298 -0
  194. pixeltable/utils/azure_store.py +346 -0
  195. pixeltable/utils/coco.py +26 -27
  196. pixeltable/utils/code.py +4 -4
  197. pixeltable/utils/console_output.py +46 -0
  198. pixeltable/utils/coroutine.py +24 -0
  199. pixeltable/utils/dbms.py +92 -0
  200. pixeltable/utils/description_helper.py +11 -12
  201. pixeltable/utils/documents.py +60 -61
  202. pixeltable/utils/exception_handler.py +36 -0
  203. pixeltable/utils/filecache.py +38 -22
  204. pixeltable/utils/formatter.py +88 -51
  205. pixeltable/utils/gcs_store.py +295 -0
  206. pixeltable/utils/http.py +133 -0
  207. pixeltable/utils/http_server.py +14 -13
  208. pixeltable/utils/iceberg.py +13 -0
  209. pixeltable/utils/image.py +17 -0
  210. pixeltable/utils/lancedb.py +90 -0
  211. pixeltable/utils/local_store.py +322 -0
  212. pixeltable/utils/misc.py +5 -0
  213. pixeltable/utils/object_stores.py +573 -0
  214. pixeltable/utils/pydantic.py +60 -0
  215. pixeltable/utils/pytorch.py +20 -20
  216. pixeltable/utils/s3_store.py +527 -0
  217. pixeltable/utils/sql.py +32 -5
  218. pixeltable/utils/system.py +30 -0
  219. pixeltable/utils/transactional_directory.py +4 -3
  220. pixeltable-0.5.7.dist-info/METADATA +579 -0
  221. pixeltable-0.5.7.dist-info/RECORD +227 -0
  222. {pixeltable-0.2.26.dist-info → pixeltable-0.5.7.dist-info}/WHEEL +1 -1
  223. pixeltable-0.5.7.dist-info/entry_points.txt +2 -0
  224. pixeltable/__version__.py +0 -3
  225. pixeltable/catalog/named_function.py +0 -36
  226. pixeltable/catalog/path_dict.py +0 -141
  227. pixeltable/dataframe.py +0 -894
  228. pixeltable/exec/expr_eval_node.py +0 -232
  229. pixeltable/ext/__init__.py +0 -14
  230. pixeltable/ext/functions/__init__.py +0 -8
  231. pixeltable/ext/functions/whisperx.py +0 -77
  232. pixeltable/ext/functions/yolox.py +0 -157
  233. pixeltable/tool/create_test_db_dump.py +0 -311
  234. pixeltable/tool/create_test_video.py +0 -81
  235. pixeltable/tool/doc_plugins/griffe.py +0 -50
  236. pixeltable/tool/doc_plugins/mkdocstrings.py +0 -6
  237. pixeltable/tool/doc_plugins/templates/material/udf.html.jinja +0 -135
  238. pixeltable/tool/embed_udf.py +0 -9
  239. pixeltable/tool/mypy_plugin.py +0 -55
  240. pixeltable/utils/media_store.py +0 -76
  241. pixeltable/utils/s3.py +0 -16
  242. pixeltable-0.2.26.dist-info/METADATA +0 -400
  243. pixeltable-0.2.26.dist-info/RECORD +0 -156
  244. pixeltable-0.2.26.dist-info/entry_points.txt +0 -3
  245. {pixeltable-0.2.26.dist-info → pixeltable-0.5.7.dist-info/licenses}/LICENSE +0 -0
@@ -1,146 +1,269 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import logging
4
- from typing import TYPE_CHECKING, Any, Optional
4
+ import warnings
5
+ from textwrap import dedent
6
+ from typing import TYPE_CHECKING, Any
5
7
 
8
+ import pgvector.sqlalchemy # type: ignore[import-untyped]
6
9
  import sqlalchemy as sql
7
10
 
8
11
  import pixeltable.exceptions as excs
12
+ import pixeltable.exprs as exprs
9
13
  import pixeltable.type_system as ts
10
- from pixeltable import exprs
11
- from .globals import is_valid_identifier, MediaValidation
14
+ from pixeltable.env import Env
15
+ from pixeltable.metadata import schema
16
+
17
+ from .globals import MediaValidation, QColumnId, is_valid_identifier
12
18
 
13
19
  if TYPE_CHECKING:
14
20
  from .table_version import TableVersion
21
+ from .table_version_handle import ColumnHandle, TableVersionHandle
22
+ from .table_version_path import TableVersionPath
15
23
 
16
24
  _logger = logging.getLogger('pixeltable')
17
25
 
18
26
 
19
27
  class Column:
20
- """Representation of a column in the schema of a Table/DataFrame.
28
+ """Representation of a column in the schema of a Table/Query.
21
29
 
22
30
  A Column contains all the metadata necessary for executing queries and updates against a particular version of a
23
31
  table/view.
32
+
33
+ Args:
34
+ name: column name; None for system columns (eg, index columns)
35
+ col_type: column type; can be None if the type can be derived from ``computed_with``
36
+ computed_with: an Expr that computes the column value
37
+ is_pk: if True, this column is part of the primary key
38
+ stored: determines whether a computed column is present in the stored table or recomputed on demand
39
+ destination: An object store reference for persisting computed files
40
+ col_id: column ID (only used internally)
41
+
42
+ Computed columns: those have a non-None ``computed_with`` argument
43
+ - when constructed by the user: ``computed_with`` was constructed explicitly and is passed in;
44
+ col_type is None
45
+ - when loaded from md store: ``computed_with`` is set and col_type is set
46
+
47
+ ``stored`` (only valid for computed columns):
48
+ - if True: the column is present in the stored table
49
+ - if False: the column is not present in the stored table and recomputed during a query
50
+ - if None: the system chooses for you (at present, this is always False, but this may change in the future)
24
51
  """
25
- name: str
26
- id: Optional[int]
52
+
53
+ name: str | None
54
+ id: int | None
27
55
  col_type: ts.ColumnType
28
56
  stored: bool
29
57
  is_pk: bool
30
- _media_validation: Optional[MediaValidation] # if not set, TableVersion.media_validation applies
31
- schema_version_add: Optional[int]
32
- schema_version_drop: Optional[int]
33
- _records_errors: Optional[bool]
34
- sa_col: Optional[sql.schema.Column]
35
- sa_col_type: Optional[sql.sqltypes.TypeEngine]
36
- sa_errormsg_col: Optional[sql.schema.Column]
37
- sa_errortype_col: Optional[sql.schema.Column]
38
- _value_expr: Optional[exprs.Expr]
39
- value_expr_dict: Optional[dict[str, Any]]
40
- dependent_cols: set[Column]
41
- tbl: Optional[TableVersion]
58
+ is_iterator_col: bool
59
+ _explicit_destination: str | None # An object store reference for computed files
60
+ _media_validation: MediaValidation | None # if not set, TableVersion.media_validation applies
61
+ schema_version_add: int | None
62
+ schema_version_drop: int | None
63
+ stores_cellmd: bool
64
+ sa_col: sql.schema.Column | None
65
+ sa_col_type: sql.types.TypeEngine
66
+ sa_cellmd_col: sql.schema.Column | None # JSON metadata for the cell, e.g. errortype, errormsg for media columns
67
+ _value_expr: exprs.Expr | None
68
+ value_expr_dict: dict[str, Any] | None
69
+ # we store a handle here in order to allow Column construction before there is a corresponding TableVersion
70
+ tbl_handle: 'TableVersionHandle' | None
42
71
 
43
72
  def __init__(
44
- self, name: Optional[str], col_type: Optional[ts.ColumnType] = None,
45
- computed_with: Optional[exprs.Expr] = None,
46
- is_pk: bool = False, stored: bool = True, media_validation: Optional[MediaValidation] = None,
47
- col_id: Optional[int] = None, schema_version_add: Optional[int] = None,
48
- schema_version_drop: Optional[int] = None, sa_col_type: Optional[sql.sqltypes.TypeEngine] = None,
49
- records_errors: Optional[bool] = None, value_expr_dict: Optional[dict[str, Any]] = None,
73
+ self,
74
+ name: str | None,
75
+ col_type: ts.ColumnType | None = None,
76
+ computed_with: exprs.Expr | None = None,
77
+ is_pk: bool = False,
78
+ is_iterator_col: bool = False,
79
+ stored: bool = True,
80
+ media_validation: MediaValidation | None = None,
81
+ col_id: int | None = None,
82
+ schema_version_add: int | None = None,
83
+ schema_version_drop: int | None = None,
84
+ sa_col_type: sql.types.TypeEngine | None = None,
85
+ stores_cellmd: bool | None = None,
86
+ value_expr_dict: dict[str, Any] | None = None,
87
+ tbl_handle: 'TableVersionHandle' | None = None,
88
+ destination: str | None = None,
50
89
  ):
51
- """Column constructor.
52
-
53
- Args:
54
- name: column name; None for system columns (eg, index columns)
55
- col_type: column type; can be None if the type can be derived from ``computed_with``
56
- computed_with: an Expr that computes the column value
57
- is_pk: if True, this column is part of the primary key
58
- stored: determines whether a computed column is present in the stored table or recomputed on demand
59
- col_id: column ID (only used internally)
60
-
61
- Computed columns: those have a non-None ``computed_with`` argument
62
- - when constructed by the user: ``computed_with`` was constructed explicitly and is passed in;
63
- col_type is None
64
- - when loaded from md store: ``computed_with`` is set and col_type is set
65
-
66
- ``stored`` (only valid for computed image columns):
67
- - if True: the column is present in the stored table
68
- - if False: the column is not present in the stored table and recomputed during a query
69
- - if None: the system chooses for you (at present, this is always False, but this may change in the future)
70
- """
71
90
  if name is not None and not is_valid_identifier(name):
72
- raise excs.Error(f"Invalid column name: '{name}'")
91
+ raise excs.Error(f'Invalid column name: {name}')
73
92
  self.name = name
93
+ self.tbl_handle = tbl_handle
74
94
  if col_type is None and computed_with is None:
75
- raise excs.Error(f'Column `{name}`: col_type is required if computed_with is not specified')
95
+ raise excs.Error(f'Column {name!r}: `col_type` is required if `computed_with` is not specified')
76
96
 
77
- self._value_expr: Optional[exprs.Expr] = None
97
+ self._value_expr = None
78
98
  self.value_expr_dict = value_expr_dict
79
99
  if computed_with is not None:
80
100
  value_expr = exprs.Expr.from_object(computed_with)
81
101
  if value_expr is None:
102
+ # TODO: this shouldn't be a user-facing error
82
103
  raise excs.Error(
83
- f'Column {name}: computed_with needs to be a valid Pixeltable expression, '
84
- f'but it is a {type(computed_with)}')
104
+ f'Column {name!r}: `computed_with` needs to be a valid Pixeltable expression, '
105
+ f'but it is a {type(computed_with)}'
106
+ )
85
107
  else:
86
108
  self._value_expr = value_expr.copy()
87
109
  self.col_type = self._value_expr.col_type
110
+ if self._value_expr is not None and self.value_expr_dict is None:
111
+ self.value_expr_dict = self._value_expr.as_dict()
88
112
 
89
113
  if col_type is not None:
90
114
  self.col_type = col_type
91
115
  assert self.col_type is not None
92
116
 
93
117
  self.stored = stored
94
- self.dependent_cols = set() # cols with value_exprs that reference us; set by TableVersion
118
+ # self.dependent_cols = set() # cols with value_exprs that reference us; set by TableVersion
95
119
  self.id = col_id
96
120
  self.is_pk = is_pk
121
+ self.is_iterator_col = is_iterator_col
97
122
  self._media_validation = media_validation
98
123
  self.schema_version_add = schema_version_add
99
124
  self.schema_version_drop = schema_version_drop
100
125
 
101
- self._records_errors = records_errors
126
+ if stores_cellmd is not None:
127
+ self.stores_cellmd = stores_cellmd
128
+ else:
129
+ self.stores_cellmd = stored and (
130
+ self.is_computed or self.col_type.is_media_type() or self.col_type.supports_file_offloading()
131
+ )
102
132
 
103
133
  # column in the stored table for the values of this Column
104
134
  self.sa_col = None
105
- self.sa_col_type = sa_col_type
135
+ self.sa_col_type = self.col_type.to_sa_type() if sa_col_type is None else sa_col_type
106
136
 
107
137
  # computed cols also have storage columns for the exception string and type
108
- self.sa_errormsg_col = None
109
- self.sa_errortype_col = None
138
+ self.sa_cellmd_col = None
139
+ self._explicit_destination = destination
140
+
141
+ def to_md(self, pos: int | None = None) -> tuple[schema.ColumnMd, schema.SchemaColumn | None]:
142
+ """Returns the Column and optional SchemaColumn metadata for this Column."""
143
+ assert self.is_pk is not None
144
+ col_md = schema.ColumnMd(
145
+ id=self.id,
146
+ col_type=self.col_type.as_dict(),
147
+ is_pk=self.is_pk,
148
+ schema_version_add=self.schema_version_add,
149
+ schema_version_drop=self.schema_version_drop,
150
+ value_expr=self.value_expr.as_dict() if self.value_expr is not None else None,
151
+ stored=self.stored,
152
+ destination=self._explicit_destination,
153
+ )
154
+ if pos is None:
155
+ return col_md, None
156
+ assert self.name is not None, 'Column name must be set for user-facing columns'
157
+ sch_md = schema.SchemaColumn(
158
+ name=self.name,
159
+ pos=pos,
160
+ media_validation=self._media_validation.name.lower() if self._media_validation is not None else None,
161
+ )
162
+ return col_md, sch_md
163
+
164
+ def init_value_expr(self, tvp: 'TableVersionPath' | None) -> None:
165
+ """
166
+ Initialize the value_expr from its dict representation, if necessary.
110
167
 
111
- self.tbl = None # set by owning TableVersion
168
+ If `tvp` is not None, retarget the value_expr to the given TableVersionPath.
169
+ """
170
+ from pixeltable import exprs
112
171
 
113
- @property
114
- def value_expr(self) -> Optional[exprs.Expr]:
115
- """Instantiate value_expr on-demand"""
116
- # TODO: instantiate expr in the c'tor and add an Expr.prepare() that can create additional state after the
117
- # catalog has been fully loaded; that way, we encounter bugs in the serialization/deserialization logic earlier
118
- if self.value_expr_dict is not None and self._value_expr is None:
119
- from pixeltable import exprs
172
+ if self._value_expr is None and self.value_expr_dict is None:
173
+ return
174
+
175
+ if self._value_expr is None:
176
+ # Instantiate the Expr from its dict
120
177
  self._value_expr = exprs.Expr.from_dict(self.value_expr_dict)
178
+ self._value_expr.bind_rel_paths()
179
+ if not self._value_expr.is_valid:
180
+ message = (
181
+ dedent(
182
+ f"""
183
+ The computed column {self.name!r} in table {self.get_tbl().name!r} is no longer valid.
184
+ {{validation_error}}
185
+ You can continue to query existing data from this column, but evaluating it on new data will raise an error.
186
+ """ # noqa: E501
187
+ )
188
+ .strip()
189
+ .format(validation_error=self._value_expr.validation_error)
190
+ )
191
+ warnings.warn(message, category=excs.PixeltableWarning, stacklevel=2)
192
+
193
+ if tvp is not None:
194
+ # Retarget the Expr
195
+ self._value_expr = self._value_expr.retarget(tvp)
196
+
197
+ def get_tbl(self) -> TableVersion:
198
+ tv = self.tbl_handle.get()
199
+ return tv
200
+
201
+ @property
202
+ def destination(self) -> str | None:
203
+ if self._explicit_destination is not None:
204
+ # An expilicit destination was set as part of the column definition
205
+ return self._explicit_destination
206
+
207
+ # Otherwise, if this is a stored media column, use the default destination if one is configured (input
208
+ # destination or output destination, depending on whether this is a computed column)
209
+ # TODO: The `self.name is not None` clause is necessary because index columns currently follow the type of
210
+ # the underlying media column. We should move to using pxt.String as the col_type of index columns; this
211
+ # would be a more robust solution, and then `self.name is not None` could be removed.
212
+ if self.is_stored and self.col_type.is_media_type() and self.name is not None:
213
+ if self.is_computed:
214
+ return Env.get().default_output_media_dest
215
+ else:
216
+ return Env.get().default_input_media_dest
217
+
218
+ return None
219
+
220
+ @property
221
+ def handle(self) -> 'ColumnHandle':
222
+ """Returns a ColumnHandle for this Column."""
223
+ from .table_version_handle import ColumnHandle
224
+
225
+ assert self.tbl_handle is not None
226
+ assert self.id is not None
227
+ return ColumnHandle(self.tbl_handle, self.id)
228
+
229
+ @property
230
+ def qid(self) -> QColumnId:
231
+ assert self.tbl_handle is not None
232
+ assert self.id is not None
233
+ return QColumnId(self.tbl_handle.id, self.id)
234
+
235
+ @property
236
+ def value_expr(self) -> exprs.Expr | None:
237
+ assert self.value_expr_dict is None or self._value_expr is not None
121
238
  return self._value_expr
122
239
 
123
240
  def set_value_expr(self, value_expr: exprs.Expr) -> None:
124
241
  self._value_expr = value_expr
125
- self.value_expr_dict = None
242
+ self.value_expr_dict = self._value_expr.as_dict()
126
243
 
127
244
  def check_value_expr(self) -> None:
128
245
  assert self._value_expr is not None
129
- if self.stored == False and self.is_computed and self.has_window_fn_call():
246
+ if not self.stored and self.is_computed and self.has_window_fn_call():
130
247
  raise excs.Error(
131
- f'Column {self.name}: stored={self.stored} not supported for columns computed with window functions:'
132
- f'\n{self.value_expr}')
248
+ f'Column {self.name!r}: `stored={self.stored}` not supported for columns '
249
+ f'computed with window functions:\n{self.value_expr}'
250
+ )
133
251
 
134
252
  def has_window_fn_call(self) -> bool:
253
+ from pixeltable import exprs
254
+
135
255
  if self.value_expr is None:
136
256
  return False
137
- from pixeltable import exprs
138
- l = list(self.value_expr.subexprs(filter=lambda e: isinstance(e, exprs.FunctionCall) and e.is_window_fn_call))
139
- return len(l) > 0
257
+ window_fn_calls = list(
258
+ self.value_expr.subexprs(filter=lambda e: isinstance(e, exprs.FunctionCall) and e.is_window_fn_call)
259
+ )
260
+ return len(window_fn_calls) > 0
140
261
 
141
- def get_idx_info(self) -> dict[str, 'TableVersion.IndexInfo']:
142
- assert self.tbl is not None
143
- return {name: info for name, info in self.tbl.idxs_by_name.items() if info.col == self}
262
+ def stores_external_array(self) -> bool:
263
+ """Returns True if this is an Array column that might store its values externally."""
264
+ assert self.sa_col_type is not None
265
+ # Vector: if this is a vector column (ie, used for a vector index), it stores the array itself
266
+ return self.col_type.is_array_type() and not isinstance(self.sa_col_type, pgvector.sqlalchemy.Vector)
144
267
 
145
268
  @property
146
269
  def is_computed(self) -> bool:
@@ -152,77 +275,75 @@ class Column:
152
275
  assert self.stored is not None
153
276
  return self.stored
154
277
 
155
- @property
156
- def records_errors(self) -> bool:
157
- """True if this column also stores error information."""
158
- # default: record errors for computed and media columns
159
- if self._records_errors is not None:
160
- return self._records_errors
161
- return self.is_stored and (self.is_computed or self.col_type.is_media_type())
162
-
163
278
  @property
164
279
  def qualified_name(self) -> str:
165
- assert self.tbl is not None
166
- return f'{self.tbl.name}.{self.name}'
280
+ assert self.get_tbl() is not None
281
+ return f'{self.get_tbl().name}.{self.name}'
167
282
 
168
283
  @property
169
284
  def media_validation(self) -> MediaValidation:
170
285
  if self._media_validation is not None:
171
286
  return self._media_validation
172
- assert self.tbl is not None
173
- return self.tbl.media_validation
287
+ assert self.get_tbl() is not None
288
+ return self.get_tbl().media_validation
289
+
290
+ @property
291
+ def is_required_for_insert(self) -> bool:
292
+ """Returns True if column is required when inserting rows."""
293
+ return not self.col_type.nullable and not self.is_computed
174
294
 
175
295
  def source(self) -> None:
176
296
  """
177
297
  If this is a computed col and the top-level expr is a function call, print the source, if possible.
178
298
  """
179
299
  from pixeltable import exprs
300
+
180
301
  if self.value_expr is None or not isinstance(self.value_expr, exprs.FunctionCall):
181
302
  return
182
303
  self.value_expr.fn.source()
183
304
 
184
305
  def create_sa_cols(self) -> None:
185
306
  """
186
- These need to be recreated for every new table schema version.
307
+ These need to be recreated for every sql.Table instance
187
308
  """
188
309
  assert self.is_stored
310
+ assert self.stores_cellmd is not None
189
311
  # all storage columns are nullable (we deal with null errors in Pixeltable directly)
190
- self.sa_col = sql.Column(
191
- self.store_name(), self.col_type.to_sa_type() if self.sa_col_type is None else self.sa_col_type,
192
- nullable=True)
193
- if self.is_computed or self.col_type.is_media_type():
194
- self.sa_errormsg_col = sql.Column(self.errormsg_store_name(), ts.StringType().to_sa_type(), nullable=True)
195
- self.sa_errortype_col = sql.Column(self.errortype_store_name(), ts.StringType().to_sa_type(), nullable=True)
312
+ self.sa_col = sql.Column(self.store_name(), self.sa_col_type, nullable=True)
313
+ if self.stores_cellmd:
314
+ self.sa_cellmd_col = sql.Column(self.cellmd_store_name(), self.sa_cellmd_type(), nullable=True)
196
315
 
197
- def get_sa_col_type(self) -> sql.sqltypes.TypeEngine:
198
- return self.col_type.to_sa_type() if self.sa_col_type is None else self.sa_col_type
316
+ @classmethod
317
+ def cellmd_type(cls) -> ts.ColumnType:
318
+ return ts.JsonType(nullable=True)
319
+
320
+ @classmethod
321
+ def sa_cellmd_type(cls) -> sql.types.TypeEngine:
322
+ return cls.cellmd_type().to_sa_type()
199
323
 
200
324
  def store_name(self) -> str:
201
325
  assert self.id is not None
202
326
  assert self.is_stored
203
327
  return f'col_{self.id}'
204
328
 
205
- def errormsg_store_name(self) -> str:
206
- return f'{self.store_name()}_errormsg'
207
-
208
- def errortype_store_name(self) -> str:
209
- return f'{self.store_name()}_errortype'
329
+ def cellmd_store_name(self) -> str:
330
+ return f'{self.store_name()}_cellmd'
210
331
 
211
332
  def __str__(self) -> str:
212
333
  return f'{self.name}: {self.col_type}'
213
334
 
214
335
  def __repr__(self) -> str:
215
- return f'Column({self.id!r}, {self.name!r}, tbl={self.tbl.name!r})'
336
+ return f'Column({self.id!r}, {self.name!r}, tbl={self.get_tbl().name!r})'
216
337
 
217
338
  def __hash__(self) -> int:
218
339
  # TODO(aaron-siegel): This and __eq__ do not capture the table version. We need to rethink the Column
219
340
  # abstraction (perhaps separating out the version-dependent properties into a different abstraction).
220
- assert self.tbl is not None
221
- return hash((self.tbl.id, self.id))
341
+ assert self.tbl_handle is not None
342
+ return hash((self.tbl_handle.id, self.id))
222
343
 
223
344
  def __eq__(self, other: object) -> bool:
224
345
  if not isinstance(other, Column):
225
346
  return False
226
- assert self.tbl is not None
227
- assert other.tbl is not None
228
- return self.tbl.id == other.tbl.id and self.id == other.id
347
+ assert self.tbl_handle is not None
348
+ assert other.tbl_handle is not None
349
+ return self.tbl_handle.id == other.tbl_handle.id and self.id == other.id
pixeltable/catalog/dir.py CHANGED
@@ -1,32 +1,61 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import dataclasses
4
+ import json
4
5
  import logging
5
6
  from uuid import UUID
6
7
 
7
8
  import sqlalchemy as sql
8
9
 
9
- from .schema_object import SchemaObject
10
10
  from pixeltable.env import Env
11
11
  from pixeltable.metadata import schema
12
12
 
13
+ from .schema_object import SchemaObject
13
14
 
14
15
  _logger = logging.getLogger('pixeltable')
15
16
 
17
+
16
18
  class Dir(SchemaObject):
17
19
  def __init__(self, id: UUID, parent_id: UUID, name: str):
18
20
  super().__init__(id, name, parent_id)
19
21
 
20
22
  @classmethod
21
- def _display_name(cls) -> str:
23
+ def _create(cls, parent_id: UUID, name: str) -> Dir:
24
+ session = Env.get().session
25
+ user = Env.get().user
26
+ assert session is not None
27
+ dir_md = schema.DirMd(name=name, user=user, additional_md={})
28
+ dir_record = schema.Dir(parent_id=parent_id, md=dataclasses.asdict(dir_md))
29
+ session.add(dir_record)
30
+ session.flush()
31
+ # print(f'{datetime.datetime.now()} create dir {dir_record}')
32
+ assert dir_record.id is not None
33
+ assert isinstance(dir_record.id, UUID)
34
+ dir = cls(dir_record.id, parent_id, name)
35
+ return dir
36
+
37
+ def _display_name(self) -> str:
22
38
  return 'directory'
23
39
 
40
+ def _path(self) -> str:
41
+ """Returns the path to this schema object."""
42
+ if self._dir_id is None:
43
+ # we're the root dir
44
+ return ''
45
+ return super()._path()
46
+
24
47
  def _move(self, new_name: str, new_dir_id: UUID) -> None:
48
+ # print(
49
+ # f'{datetime.datetime.now()} move dir name={self._name} parent={self._dir_id} '
50
+ # f'new_name={new_name} new_dir_id={new_dir_id}'
51
+ # )
25
52
  super()._move(new_name, new_dir_id)
26
- with Env.get().engine.begin() as conn:
27
- dir_md = schema.DirMd(name=new_name)
28
- conn.execute(
29
- sql.update(schema.Dir.__table__)
30
- .values({schema.Dir.parent_id: self._dir_id, schema.Dir.md: dataclasses.asdict(dir_md)})
31
- .where(schema.Dir.id == self._id))
32
-
53
+ stmt = sql.text(
54
+ (
55
+ f'UPDATE {schema.Dir.__table__} '
56
+ f'SET {schema.Dir.parent_id.name} = :new_dir_id, '
57
+ f" {schema.Dir.md.name} = jsonb_set({schema.Dir.md.name}, '{{name}}', (:new_name)::jsonb) "
58
+ f'WHERE {schema.Dir.id.name} = :id'
59
+ )
60
+ )
61
+ Env.get().conn.execute(stmt, {'new_dir_id': new_dir_id, 'new_name': json.dumps(new_name), 'id': self._id})
@@ -1,9 +1,10 @@
1
1
  from __future__ import annotations
2
- import dataclasses
2
+
3
3
  import enum
4
4
  import itertools
5
5
  import logging
6
- from typing import Optional
6
+ from dataclasses import dataclass
7
+ from uuid import UUID
7
8
 
8
9
  import pixeltable.exceptions as excs
9
10
 
@@ -15,28 +16,15 @@ _ROWID_COLUMN_NAME = '_rowid'
15
16
 
16
17
  # Set of symbols that are predefined in the `InsertableTable` class (and are therefore not allowed as column names).
17
18
  # This will be populated lazily to avoid circular imports.
18
- _PREDEF_SYMBOLS: Optional[set[str]] = None
19
-
20
-
21
- @dataclasses.dataclass
22
- class UpdateStatus:
23
- """
24
- Information about updates that resulted from a table operation.
25
- """
26
- num_rows: int = 0
27
- # TODO: disambiguate what this means: # of slots computed or # of columns computed?
28
- num_computed_values: int = 0
29
- num_excs: int = 0
30
- updated_cols: list[str] = dataclasses.field(default_factory=list)
31
- cols_with_excs: list[str] = dataclasses.field(default_factory=list)
32
-
33
- def __iadd__(self, other: 'UpdateStatus') -> 'UpdateStatus':
34
- self.num_rows += other.num_rows
35
- self.num_computed_values += other.num_computed_values
36
- self.num_excs += other.num_excs
37
- self.updated_cols = list(dict.fromkeys(self.updated_cols + other.updated_cols))
38
- self.cols_with_excs = list(dict.fromkeys(self.cols_with_excs + other.cols_with_excs))
39
- return self
19
+ _PREDEF_SYMBOLS: set[str] | None = None
20
+
21
+
22
+ @dataclass(frozen=True)
23
+ class QColumnId:
24
+ """Qualified column id"""
25
+
26
+ tbl_id: UUID
27
+ col_id: int
40
28
 
41
29
 
42
30
  class MediaValidation(enum.Enum):
@@ -48,26 +36,57 @@ class MediaValidation(enum.Enum):
48
36
  try:
49
37
  return cls[name.upper()]
50
38
  except KeyError:
51
- val_strs = ', '.join(f'{s.lower()!r}' for s in cls.__members__.keys())
52
- raise excs.Error(f'{error_prefix} must be one of: [{val_strs}]')
39
+ val_strs = ', '.join(f'{s.lower()!r}' for s in cls.__members__)
40
+ raise excs.Error(f'{error_prefix} must be one of: [{val_strs}]') from None
41
+
42
+
43
+ class IfExistsParam(enum.Enum):
44
+ ERROR = 0
45
+ IGNORE = 1
46
+ REPLACE = 2
47
+ REPLACE_FORCE = 3
48
+
49
+ @classmethod
50
+ def validated(cls, param_val: str, param_name: str) -> IfExistsParam:
51
+ try:
52
+ return cls[param_val.upper()]
53
+ except KeyError:
54
+ val_strs = ', '.join(f'{s.lower()!r}' for s in cls.__members__)
55
+ raise excs.Error(f'{param_name} must be one of: [{val_strs}]') from None
56
+
57
+
58
+ class IfNotExistsParam(enum.Enum):
59
+ ERROR = 0
60
+ IGNORE = 1
61
+
62
+ @classmethod
63
+ def validated(cls, param_val: str, param_name: str) -> IfNotExistsParam:
64
+ try:
65
+ return cls[param_val.upper()]
66
+ except KeyError:
67
+ val_strs = ', '.join(f'{s.lower()!r}' for s in cls.__members__)
68
+ raise excs.Error(f'{param_name} must be one of: [{val_strs}]') from None
69
+
53
70
 
71
+ def is_valid_identifier(name: str, *, allow_system_identifiers: bool = False, allow_hyphens: bool = False) -> bool:
72
+ # If allow_hyphens=True, we allow hyphens to appear in the name, but we still do not permit a name to start with
73
+ # one (even if allow_system_identifiers=True)
74
+ adj_name = name.replace('-', '_') if allow_hyphens else name
75
+ return (
76
+ adj_name.isidentifier() and not name.startswith('-') and (allow_system_identifiers or not name.startswith('_'))
77
+ )
54
78
 
55
- def is_valid_identifier(name: str) -> bool:
56
- return name.isidentifier() and not name.startswith('_')
57
79
 
58
- def is_valid_path(path: str, empty_is_valid : bool) -> bool:
80
+ def is_valid_path(path: str, empty_is_valid: bool, allow_system_paths: bool = False) -> bool:
59
81
  if path == '':
60
82
  return empty_is_valid
83
+ return all(is_valid_identifier(part, allow_system_identifiers=allow_system_paths) for part in path.split('.'))
61
84
 
62
- for part in path.split('.'):
63
- if not is_valid_identifier(part):
64
- return False
65
- return True
66
85
 
67
86
  def is_system_column_name(name: str) -> bool:
68
87
  from pixeltable.catalog import InsertableTable, View
69
88
 
70
- global _PREDEF_SYMBOLS
89
+ global _PREDEF_SYMBOLS # noqa: PLW0603
71
90
  if _PREDEF_SYMBOLS is None:
72
91
  _PREDEF_SYMBOLS = set(itertools.chain(dir(InsertableTable), dir(View)))
73
92
  return name == _POS_COLUMN_NAME or name in _PREDEF_SYMBOLS