pixeltable 0.4.0rc3__py3-none-any.whl → 0.4.20__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (202) hide show
  1. pixeltable/__init__.py +23 -5
  2. pixeltable/_version.py +1 -0
  3. pixeltable/catalog/__init__.py +5 -3
  4. pixeltable/catalog/catalog.py +1318 -404
  5. pixeltable/catalog/column.py +186 -115
  6. pixeltable/catalog/dir.py +1 -2
  7. pixeltable/catalog/globals.py +11 -43
  8. pixeltable/catalog/insertable_table.py +167 -79
  9. pixeltable/catalog/path.py +61 -23
  10. pixeltable/catalog/schema_object.py +9 -10
  11. pixeltable/catalog/table.py +626 -308
  12. pixeltable/catalog/table_metadata.py +101 -0
  13. pixeltable/catalog/table_version.py +713 -569
  14. pixeltable/catalog/table_version_handle.py +37 -6
  15. pixeltable/catalog/table_version_path.py +42 -29
  16. pixeltable/catalog/tbl_ops.py +50 -0
  17. pixeltable/catalog/update_status.py +191 -0
  18. pixeltable/catalog/view.py +108 -94
  19. pixeltable/config.py +128 -22
  20. pixeltable/dataframe.py +188 -100
  21. pixeltable/env.py +407 -136
  22. pixeltable/exceptions.py +6 -0
  23. pixeltable/exec/__init__.py +3 -0
  24. pixeltable/exec/aggregation_node.py +7 -8
  25. pixeltable/exec/cache_prefetch_node.py +83 -110
  26. pixeltable/exec/cell_materialization_node.py +231 -0
  27. pixeltable/exec/cell_reconstruction_node.py +135 -0
  28. pixeltable/exec/component_iteration_node.py +4 -3
  29. pixeltable/exec/data_row_batch.py +8 -65
  30. pixeltable/exec/exec_context.py +16 -4
  31. pixeltable/exec/exec_node.py +13 -36
  32. pixeltable/exec/expr_eval/evaluators.py +7 -6
  33. pixeltable/exec/expr_eval/expr_eval_node.py +27 -12
  34. pixeltable/exec/expr_eval/globals.py +8 -5
  35. pixeltable/exec/expr_eval/row_buffer.py +1 -2
  36. pixeltable/exec/expr_eval/schedulers.py +190 -30
  37. pixeltable/exec/globals.py +32 -0
  38. pixeltable/exec/in_memory_data_node.py +18 -18
  39. pixeltable/exec/object_store_save_node.py +293 -0
  40. pixeltable/exec/row_update_node.py +16 -9
  41. pixeltable/exec/sql_node.py +206 -101
  42. pixeltable/exprs/__init__.py +1 -1
  43. pixeltable/exprs/arithmetic_expr.py +27 -22
  44. pixeltable/exprs/array_slice.py +3 -3
  45. pixeltable/exprs/column_property_ref.py +34 -30
  46. pixeltable/exprs/column_ref.py +92 -96
  47. pixeltable/exprs/comparison.py +5 -5
  48. pixeltable/exprs/compound_predicate.py +5 -4
  49. pixeltable/exprs/data_row.py +152 -55
  50. pixeltable/exprs/expr.py +62 -43
  51. pixeltable/exprs/expr_dict.py +3 -3
  52. pixeltable/exprs/expr_set.py +17 -10
  53. pixeltable/exprs/function_call.py +75 -37
  54. pixeltable/exprs/globals.py +1 -2
  55. pixeltable/exprs/in_predicate.py +4 -4
  56. pixeltable/exprs/inline_expr.py +10 -27
  57. pixeltable/exprs/is_null.py +1 -3
  58. pixeltable/exprs/json_mapper.py +8 -8
  59. pixeltable/exprs/json_path.py +56 -22
  60. pixeltable/exprs/literal.py +5 -5
  61. pixeltable/exprs/method_ref.py +2 -2
  62. pixeltable/exprs/object_ref.py +2 -2
  63. pixeltable/exprs/row_builder.py +127 -53
  64. pixeltable/exprs/rowid_ref.py +8 -12
  65. pixeltable/exprs/similarity_expr.py +50 -25
  66. pixeltable/exprs/sql_element_cache.py +4 -4
  67. pixeltable/exprs/string_op.py +5 -5
  68. pixeltable/exprs/type_cast.py +3 -5
  69. pixeltable/func/__init__.py +1 -0
  70. pixeltable/func/aggregate_function.py +8 -8
  71. pixeltable/func/callable_function.py +9 -9
  72. pixeltable/func/expr_template_function.py +10 -10
  73. pixeltable/func/function.py +18 -20
  74. pixeltable/func/function_registry.py +6 -7
  75. pixeltable/func/globals.py +2 -3
  76. pixeltable/func/mcp.py +74 -0
  77. pixeltable/func/query_template_function.py +20 -18
  78. pixeltable/func/signature.py +43 -16
  79. pixeltable/func/tools.py +23 -13
  80. pixeltable/func/udf.py +18 -20
  81. pixeltable/functions/__init__.py +6 -0
  82. pixeltable/functions/anthropic.py +93 -33
  83. pixeltable/functions/audio.py +114 -10
  84. pixeltable/functions/bedrock.py +13 -6
  85. pixeltable/functions/date.py +1 -1
  86. pixeltable/functions/deepseek.py +20 -9
  87. pixeltable/functions/fireworks.py +2 -2
  88. pixeltable/functions/gemini.py +28 -11
  89. pixeltable/functions/globals.py +13 -13
  90. pixeltable/functions/groq.py +108 -0
  91. pixeltable/functions/huggingface.py +1046 -23
  92. pixeltable/functions/image.py +9 -18
  93. pixeltable/functions/llama_cpp.py +23 -8
  94. pixeltable/functions/math.py +3 -4
  95. pixeltable/functions/mistralai.py +4 -15
  96. pixeltable/functions/ollama.py +16 -9
  97. pixeltable/functions/openai.py +104 -82
  98. pixeltable/functions/openrouter.py +143 -0
  99. pixeltable/functions/replicate.py +2 -2
  100. pixeltable/functions/reve.py +250 -0
  101. pixeltable/functions/string.py +21 -28
  102. pixeltable/functions/timestamp.py +13 -14
  103. pixeltable/functions/together.py +4 -6
  104. pixeltable/functions/twelvelabs.py +92 -0
  105. pixeltable/functions/util.py +6 -1
  106. pixeltable/functions/video.py +1388 -106
  107. pixeltable/functions/vision.py +7 -7
  108. pixeltable/functions/whisper.py +15 -7
  109. pixeltable/functions/whisperx.py +179 -0
  110. pixeltable/{ext/functions → functions}/yolox.py +2 -4
  111. pixeltable/globals.py +332 -105
  112. pixeltable/index/base.py +13 -22
  113. pixeltable/index/btree.py +23 -22
  114. pixeltable/index/embedding_index.py +32 -44
  115. pixeltable/io/__init__.py +4 -2
  116. pixeltable/io/datarows.py +7 -6
  117. pixeltable/io/external_store.py +49 -77
  118. pixeltable/io/fiftyone.py +11 -11
  119. pixeltable/io/globals.py +29 -28
  120. pixeltable/io/hf_datasets.py +17 -9
  121. pixeltable/io/label_studio.py +70 -66
  122. pixeltable/io/lancedb.py +3 -0
  123. pixeltable/io/pandas.py +12 -11
  124. pixeltable/io/parquet.py +13 -93
  125. pixeltable/io/table_data_conduit.py +71 -47
  126. pixeltable/io/utils.py +3 -3
  127. pixeltable/iterators/__init__.py +2 -1
  128. pixeltable/iterators/audio.py +21 -11
  129. pixeltable/iterators/document.py +116 -55
  130. pixeltable/iterators/image.py +5 -2
  131. pixeltable/iterators/video.py +293 -13
  132. pixeltable/metadata/__init__.py +4 -2
  133. pixeltable/metadata/converters/convert_18.py +2 -2
  134. pixeltable/metadata/converters/convert_19.py +2 -2
  135. pixeltable/metadata/converters/convert_20.py +2 -2
  136. pixeltable/metadata/converters/convert_21.py +2 -2
  137. pixeltable/metadata/converters/convert_22.py +2 -2
  138. pixeltable/metadata/converters/convert_24.py +2 -2
  139. pixeltable/metadata/converters/convert_25.py +2 -2
  140. pixeltable/metadata/converters/convert_26.py +2 -2
  141. pixeltable/metadata/converters/convert_29.py +4 -4
  142. pixeltable/metadata/converters/convert_34.py +2 -2
  143. pixeltable/metadata/converters/convert_36.py +2 -2
  144. pixeltable/metadata/converters/convert_37.py +15 -0
  145. pixeltable/metadata/converters/convert_38.py +39 -0
  146. pixeltable/metadata/converters/convert_39.py +124 -0
  147. pixeltable/metadata/converters/convert_40.py +73 -0
  148. pixeltable/metadata/converters/util.py +13 -12
  149. pixeltable/metadata/notes.py +4 -0
  150. pixeltable/metadata/schema.py +79 -42
  151. pixeltable/metadata/utils.py +74 -0
  152. pixeltable/mypy/__init__.py +3 -0
  153. pixeltable/mypy/mypy_plugin.py +123 -0
  154. pixeltable/plan.py +274 -223
  155. pixeltable/share/__init__.py +1 -1
  156. pixeltable/share/packager.py +259 -129
  157. pixeltable/share/protocol/__init__.py +34 -0
  158. pixeltable/share/protocol/common.py +170 -0
  159. pixeltable/share/protocol/operation_types.py +33 -0
  160. pixeltable/share/protocol/replica.py +109 -0
  161. pixeltable/share/publish.py +213 -57
  162. pixeltable/store.py +238 -175
  163. pixeltable/type_system.py +104 -63
  164. pixeltable/utils/__init__.py +2 -3
  165. pixeltable/utils/arrow.py +108 -13
  166. pixeltable/utils/av.py +298 -0
  167. pixeltable/utils/azure_store.py +305 -0
  168. pixeltable/utils/code.py +3 -3
  169. pixeltable/utils/console_output.py +4 -1
  170. pixeltable/utils/coroutine.py +6 -23
  171. pixeltable/utils/dbms.py +31 -5
  172. pixeltable/utils/description_helper.py +4 -5
  173. pixeltable/utils/documents.py +5 -6
  174. pixeltable/utils/exception_handler.py +7 -30
  175. pixeltable/utils/filecache.py +6 -6
  176. pixeltable/utils/formatter.py +4 -6
  177. pixeltable/utils/gcs_store.py +283 -0
  178. pixeltable/utils/http_server.py +2 -3
  179. pixeltable/utils/iceberg.py +1 -2
  180. pixeltable/utils/image.py +17 -0
  181. pixeltable/utils/lancedb.py +88 -0
  182. pixeltable/utils/local_store.py +316 -0
  183. pixeltable/utils/misc.py +5 -0
  184. pixeltable/utils/object_stores.py +528 -0
  185. pixeltable/utils/pydantic.py +60 -0
  186. pixeltable/utils/pytorch.py +5 -6
  187. pixeltable/utils/s3_store.py +392 -0
  188. pixeltable-0.4.20.dist-info/METADATA +587 -0
  189. pixeltable-0.4.20.dist-info/RECORD +218 -0
  190. {pixeltable-0.4.0rc3.dist-info → pixeltable-0.4.20.dist-info}/WHEEL +1 -1
  191. pixeltable-0.4.20.dist-info/entry_points.txt +2 -0
  192. pixeltable/__version__.py +0 -3
  193. pixeltable/ext/__init__.py +0 -17
  194. pixeltable/ext/functions/__init__.py +0 -11
  195. pixeltable/ext/functions/whisperx.py +0 -77
  196. pixeltable/utils/media_store.py +0 -77
  197. pixeltable/utils/s3.py +0 -17
  198. pixeltable/utils/sample.py +0 -25
  199. pixeltable-0.4.0rc3.dist-info/METADATA +0 -435
  200. pixeltable-0.4.0rc3.dist-info/RECORD +0 -189
  201. pixeltable-0.4.0rc3.dist-info/entry_points.txt +0 -3
  202. {pixeltable-0.4.0rc3.dist-info → pixeltable-0.4.20.dist-info/licenses}/LICENSE +0 -0
@@ -2,19 +2,25 @@ from __future__ import annotations
2
2
 
3
3
  import enum
4
4
  import logging
5
- from typing import TYPE_CHECKING, Any, Literal, Optional, overload
5
+ from typing import TYPE_CHECKING, Any, Literal, Sequence, cast, overload
6
6
  from uuid import UUID
7
7
 
8
+ import pydantic
9
+ import pydantic_core
10
+
8
11
  import pixeltable as pxt
9
12
  from pixeltable import exceptions as excs, type_system as ts
10
13
  from pixeltable.env import Env
11
14
  from pixeltable.utils.filecache import FileCache
15
+ from pixeltable.utils.pydantic import is_json_convertible
12
16
 
13
- from .globals import MediaValidation, UpdateStatus
17
+ from .globals import MediaValidation
14
18
  from .table import Table
15
- from .table_version import TableVersion
19
+ from .table_version import TableVersion, TableVersionCompleteMd
16
20
  from .table_version_handle import TableVersionHandle
17
21
  from .table_version_path import TableVersionPath
22
+ from .tbl_ops import CreateStoreTableOp, TableOp
23
+ from .update_status import UpdateStatus
18
24
 
19
25
  if TYPE_CHECKING:
20
26
  from pixeltable import exprs
@@ -51,72 +57,63 @@ class InsertableTable(Table):
51
57
  def __init__(self, dir_id: UUID, tbl_version: TableVersionHandle):
52
58
  tbl_version_path = TableVersionPath(tbl_version)
53
59
  super().__init__(tbl_version.id, dir_id, tbl_version.get().name, tbl_version_path)
60
+ self._tbl_version = tbl_version
54
61
 
55
- @classmethod
56
- def _display_name(cls) -> str:
62
+ def _display_name(self) -> str:
63
+ assert not self._tbl_version_path.is_replica()
57
64
  return 'table'
58
65
 
59
66
  @classmethod
60
67
  def _create(
61
68
  cls,
62
- dir_id: UUID,
63
69
  name: str,
64
70
  schema: dict[str, ts.ColumnType],
65
- df: Optional[pxt.DataFrame],
66
71
  primary_key: list[str],
67
72
  num_retained_versions: int,
68
73
  comment: str,
69
74
  media_validation: MediaValidation,
70
- ) -> InsertableTable:
75
+ create_default_idxs: bool,
76
+ ) -> tuple[TableVersionCompleteMd, list[TableOp]]:
71
77
  columns = cls._create_columns(schema)
72
78
  cls._verify_schema(columns)
73
79
  column_names = [col.name for col in columns]
74
80
  for pk_col in primary_key:
75
81
  if pk_col not in column_names:
76
- raise excs.Error(f'Primary key column {pk_col} not found in table schema')
82
+ raise excs.Error(f'Primary key column {pk_col!r} not found in table schema.')
77
83
  col = columns[column_names.index(pk_col)]
78
84
  if col.col_type.nullable:
79
- raise excs.Error(f'Primary key column {pk_col} cannot be nullable')
85
+ raise excs.Error(f'Primary key column {pk_col!r} cannot be nullable.')
80
86
  col.is_pk = True
81
87
 
82
- _, tbl_version = TableVersion.create(
83
- dir_id,
88
+ md = TableVersion.create_initial_md(
84
89
  name,
85
90
  columns,
86
- num_retained_versions=num_retained_versions,
87
- comment=comment,
88
- media_validation=media_validation,
91
+ num_retained_versions,
92
+ comment,
93
+ media_validation,
94
+ create_default_idxs=create_default_idxs,
95
+ view_md=None,
89
96
  )
90
- tbl = cls(dir_id, TableVersionHandle.create(tbl_version))
91
- # TODO We need to commit before doing the insertion, in order to avoid a primary key (version) collision
92
- # when the table metadata gets updated. Once we have a notion of user-defined transactions in
93
- # Pixeltable, we can wrap the create/insert in a transaction to avoid this.
94
- session = Env.get().session
95
- session.commit()
96
- if df is not None:
97
- # A DataFrame was provided, so insert its contents into the table
98
- # (using the same DB session as the table creation)
99
- tbl_version.insert(None, df, fail_on_exception=True)
100
- session.commit()
101
-
102
- _logger.info(f'Created table `{name}`, id={tbl_version.id}')
103
- Env.get().console_logger.info(f'Created table `{name}`.')
104
- return tbl
105
-
106
- def get_metadata(self) -> dict[str, Any]:
107
- md = super().get_metadata()
108
- md['is_view'] = False
109
- md['is_snapshot'] = False
110
- return md
97
+
98
+ ops = [
99
+ TableOp(
100
+ tbl_id=md.tbl_md.tbl_id,
101
+ op_sn=0,
102
+ num_ops=1,
103
+ needs_xact=False,
104
+ create_store_table_op=CreateStoreTableOp(),
105
+ )
106
+ ]
107
+ return md, ops
111
108
 
112
109
  @overload
113
110
  def insert(
114
111
  self,
115
- source: Optional[TableDataSource] = None,
112
+ source: TableDataSource | None = None,
116
113
  /,
117
114
  *,
118
- source_format: Optional[Literal['csv', 'excel', 'parquet', 'json']] = None,
119
- schema_overrides: Optional[dict[str, ts.ColumnType]] = None,
115
+ source_format: Literal['csv', 'excel', 'parquet', 'json'] | None = None,
116
+ schema_overrides: dict[str, ts.ColumnType] | None = None,
120
117
  on_error: Literal['abort', 'ignore'] = 'abort',
121
118
  print_stats: bool = False,
122
119
  **kwargs: Any,
@@ -129,11 +126,11 @@ class InsertableTable(Table):
129
126
 
130
127
  def insert(
131
128
  self,
132
- source: Optional[TableDataSource] = None,
129
+ source: TableDataSource | None = None,
133
130
  /,
134
131
  *,
135
- source_format: Optional[Literal['csv', 'excel', 'parquet', 'json']] = None,
136
- schema_overrides: Optional[dict[str, ts.ColumnType]] = None,
132
+ source_format: Literal['csv', 'excel', 'parquet', 'json'] | None = None,
133
+ schema_overrides: dict[str, ts.ColumnType] | None = None,
137
134
  on_error: Literal['abort', 'ignore'] = 'abort',
138
135
  print_stats: bool = False,
139
136
  **kwargs: Any,
@@ -141,8 +138,24 @@ class InsertableTable(Table):
141
138
  from pixeltable.catalog import Catalog
142
139
  from pixeltable.io.table_data_conduit import UnkTableDataConduit
143
140
 
144
- with Catalog.get().begin_xact(tbl_id=self._id, for_write=True):
141
+ if source is not None and isinstance(source, Sequence) and len(source) == 0:
142
+ raise excs.Error('Cannot insert an empty sequence.')
143
+ fail_on_exception = OnErrorParameter.fail_on_exception(on_error)
144
+
145
+ with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=True):
145
146
  table = self
147
+
148
+ # TODO: unify with TableDataConduit
149
+ if source is not None and isinstance(source, Sequence) and isinstance(source[0], pydantic.BaseModel):
150
+ status = self._insert_pydantic(
151
+ cast(Sequence[pydantic.BaseModel], source), # needed for mypy
152
+ print_stats=print_stats,
153
+ fail_on_exception=fail_on_exception,
154
+ )
155
+ Env.get().console_logger.info(status.insert_msg)
156
+ FileCache.get().emit_eviction_warnings()
157
+ return status
158
+
146
159
  if source is None:
147
160
  source = [kwargs]
148
161
  kwargs = None
@@ -158,7 +171,6 @@ class InsertableTable(Table):
158
171
  data_source.add_table_info(table)
159
172
  data_source.prepare_for_insert_into_table()
160
173
 
161
- fail_on_exception = OnErrorParameter.fail_on_exception(on_error)
162
174
  return table.insert_table_data_source(
163
175
  data_source=data_source, fail_on_exception=fail_on_exception, print_stats=print_stats
164
176
  )
@@ -170,13 +182,14 @@ class InsertableTable(Table):
170
182
  from pixeltable.catalog import Catalog
171
183
  from pixeltable.io.table_data_conduit import DFTableDataConduit
172
184
 
173
- status = pxt.UpdateStatus()
174
- with Catalog.get().begin_xact(tbl_id=self._id, for_write=True):
185
+ with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=True):
175
186
  if isinstance(data_source, DFTableDataConduit):
187
+ status = pxt.UpdateStatus()
176
188
  status += self._tbl_version.get().insert(
177
189
  rows=None, df=data_source.pxt_df, print_stats=print_stats, fail_on_exception=fail_on_exception
178
190
  )
179
191
  else:
192
+ status = pxt.UpdateStatus()
180
193
  for row_batch in data_source.valid_row_batch():
181
194
  status += self._tbl_version.get().insert(
182
195
  rows=row_batch, df=None, print_stats=print_stats, fail_on_exception=fail_on_exception
@@ -187,34 +200,110 @@ class InsertableTable(Table):
187
200
  FileCache.get().emit_eviction_warnings()
188
201
  return status
189
202
 
190
- def _validate_input_rows(self, rows: list[dict[str, Any]]) -> None:
191
- """Verify that the input rows match the table schema"""
192
- valid_col_names = set(self._schema.keys())
193
- reqd_col_names = set(self._tbl_version_path.tbl_version.get().get_required_col_names())
194
- computed_col_names = set(self._tbl_version_path.tbl_version.get().get_computed_col_names())
195
- for row in rows:
196
- assert isinstance(row, dict)
197
- col_names = set(row.keys())
198
- if len(reqd_col_names - col_names) > 0:
199
- raise excs.Error(f'Missing required column(s) ({", ".join(reqd_col_names - col_names)}) in row {row}')
200
-
201
- for col_name, val in row.items():
202
- if col_name not in valid_col_names:
203
- raise excs.Error(f'Unknown column name {col_name} in row {row}')
204
- if col_name in computed_col_names:
205
- raise excs.Error(f'Value for computed column {col_name} in row {row}')
206
-
207
- # validate data
208
- col = self._tbl_version_path.get_column(col_name)
209
- try:
210
- # basic sanity checks here
211
- checked_val = col.col_type.create_literal(val)
212
- row[col_name] = checked_val
213
- except TypeError as e:
214
- msg = str(e)
215
- raise excs.Error(f'Error in column {col.name}: {msg[0].lower() + msg[1:]}\nRow: {row}') from e
216
-
217
- def delete(self, where: Optional['exprs.Expr'] = None) -> UpdateStatus:
203
+ def _insert_pydantic(
204
+ self, rows: Sequence[pydantic.BaseModel], print_stats: bool = False, fail_on_exception: bool = True
205
+ ) -> UpdateStatus:
206
+ model_class = type(rows[0])
207
+ self._validate_pydantic_model(model_class)
208
+ # convert rows one-by-one in order to be able to print meaningful error messages
209
+ pxt_rows: list[dict[str, Any]] = []
210
+ for i, row in enumerate(rows):
211
+ try:
212
+ pxt_rows.append(row.model_dump(mode='json'))
213
+ except pydantic_core.PydanticSerializationError as e:
214
+ raise excs.Error(f'Row {i}: error serializing pydantic model to JSON:\n{e}') from e
215
+
216
+ # explicitly check that all required columns are present and non-None in the rows,
217
+ # because we ignore nullability when validating the pydantic model
218
+ reqd_col_names = [col.name for col in self._tbl_version_path.columns() if col.is_required_for_insert]
219
+ for i, pxt_row in enumerate(pxt_rows):
220
+ if type(rows[i]) is not model_class:
221
+ raise excs.Error(
222
+ f'Expected an instance of `{model_class.__name__}`; got `{type(rows[i]).__name__}` (in row {i})'
223
+ )
224
+ for col_name in reqd_col_names:
225
+ if pxt_row.get(col_name) is None:
226
+ raise excs.Error(f'Missing required column {col_name!r} in row {i}')
227
+
228
+ status = self._tbl_version.get().insert(
229
+ rows=pxt_rows, df=None, print_stats=print_stats, fail_on_exception=fail_on_exception
230
+ )
231
+ return status
232
+
233
+ def _validate_pydantic_model(self, model: type[pydantic.BaseModel]) -> None:
234
+ """
235
+ Check if a Pydantic model is compatible with this table for insert operations.
236
+
237
+ A model is compatible if:
238
+ - All required table columns have corresponding model fields with compatible types
239
+ - Model does not define fields for computed columns
240
+ - Model field types are compatible with table column types
241
+ """
242
+ assert isinstance(model, type) and issubclass(model, pydantic.BaseModel)
243
+
244
+ schema = self._get_schema()
245
+ required_cols = set(self._tbl_version.get().get_required_col_names())
246
+ computed_cols = set(self._tbl_version.get().get_computed_col_names())
247
+ model_fields = model.model_fields
248
+ model_field_names = set(model_fields.keys())
249
+
250
+ missing_required = required_cols - model_field_names
251
+ if missing_required:
252
+ raise excs.Error(
253
+ f'Pydantic model `{model.__name__}` is missing required columns: ' + ', '.join(missing_required)
254
+ )
255
+
256
+ computed_in_model = computed_cols & model_field_names
257
+ if computed_in_model:
258
+ raise excs.Error(
259
+ f'Pydantic model `{model.__name__}` has fields for computed columns: ' + ', '.join(computed_in_model)
260
+ )
261
+
262
+ # validate type compatibility
263
+ common_fields = model_field_names & set(schema.keys())
264
+ if len(common_fields) == 0:
265
+ raise excs.Error(
266
+ f'Pydantic model `{model.__name__}` has no fields that map to columns in table {self._name!r}'
267
+ )
268
+ for field_name in common_fields:
269
+ pxt_col_type = schema[field_name]
270
+ model_field = model_fields[field_name]
271
+ model_type = model_field.annotation
272
+
273
+ # we ignore nullability: we want to accept optional model fields for required table columns, as long as
274
+ # the model instances provide a non-null value
275
+ # allow_enum=True: model_dump(mode='json') converts enums to their values
276
+ inferred_pxt_type = ts.ColumnType.from_python_type(model_type, infer_pydantic_json=True)
277
+ if inferred_pxt_type is None:
278
+ raise excs.Error(
279
+ f'Pydantic model `{model.__name__}`: cannot infer Pixeltable type for column {field_name!r}'
280
+ )
281
+
282
+ if pxt_col_type.is_media_type():
283
+ # media types require file paths, either as str or Path
284
+ if not inferred_pxt_type.is_string_type():
285
+ raise excs.Error(
286
+ f'Column {field_name!r} requires a `str` or `Path` field in `{model.__name__}`, but it is '
287
+ f'`{model_type.__name__}`'
288
+ )
289
+ else:
290
+ if not pxt_col_type.is_supertype_of(inferred_pxt_type, ignore_nullable=True):
291
+ raise excs.Error(
292
+ f'Pydantic model `{model.__name__}` has incompatible type `{model_type.__name__}` '
293
+ f'for column {field_name!r} (of Pixeltable type `{pxt_col_type}`)'
294
+ )
295
+
296
+ if (
297
+ isinstance(model_type, type)
298
+ and issubclass(model_type, pydantic.BaseModel)
299
+ and not is_json_convertible(model_type)
300
+ ):
301
+ raise excs.Error(
302
+ f'Pydantic model `{model.__name__}` has field {field_name!r} with nested model '
303
+ f'`{model_type.__name__}`, which is not JSON-convertible'
304
+ )
305
+
306
+ def delete(self, where: 'exprs.Expr' | None = None) -> UpdateStatus:
218
307
  """Delete rows in this table.
219
308
 
220
309
  Args:
@@ -231,16 +320,15 @@ class InsertableTable(Table):
231
320
  """
232
321
  from pixeltable.catalog import Catalog
233
322
 
234
- with Catalog.get().begin_xact(tbl_id=self._id, for_write=True):
323
+ with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=True):
235
324
  return self._tbl_version.get().delete(where=where)
236
325
 
237
- @property
238
- def _base_table(self) -> Optional['Table']:
326
+ def _get_base_table(self) -> 'Table' | None:
239
327
  return None
240
328
 
241
329
  @property
242
- def _effective_base_versions(self) -> list[Optional[int]]:
330
+ def _effective_base_versions(self) -> list[int | None]:
243
331
  return []
244
332
 
245
333
  def _table_descriptor(self) -> str:
246
- return f'Table {self._path()!r}'
334
+ return self._display_str()
@@ -1,20 +1,53 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import logging
4
- from typing import Iterator
4
+ from typing import NamedTuple
5
5
 
6
6
  from pixeltable import exceptions as excs
7
7
 
8
- from .globals import is_valid_path
8
+ from .globals import is_valid_identifier
9
9
 
10
10
  _logger = logging.getLogger('pixeltable')
11
11
 
12
12
 
13
- class Path:
14
- def __init__(self, path: str, empty_is_valid: bool = False, allow_system_paths: bool = False):
15
- if not is_valid_path(path, empty_is_valid, allow_system_paths):
16
- raise excs.Error(f"Invalid path format: '{path}'")
17
- self.components = path.split('.')
13
+ class Path(NamedTuple):
14
+ components: list[str]
15
+ version: int | None = None
16
+
17
+ @classmethod
18
+ def parse(
19
+ cls,
20
+ path: str,
21
+ allow_empty_path: bool = False,
22
+ allow_system_path: bool = False,
23
+ allow_versioned_path: bool = False,
24
+ ) -> Path:
25
+ components: list[str]
26
+ version: int | None
27
+ if ':' in path:
28
+ parts = path.split(':')
29
+ if len(parts) != 2:
30
+ raise excs.Error(f'Invalid path: {path}')
31
+ try:
32
+ components = parts[0].split('.')
33
+ version = int(parts[1])
34
+ except ValueError:
35
+ raise excs.Error(f'Invalid path: {path}') from None
36
+ else:
37
+ components = path.split('.')
38
+ version = None
39
+
40
+ if components == [''] and not allow_empty_path:
41
+ raise excs.Error(f'Invalid path: {path}')
42
+
43
+ if components != [''] and not all(is_valid_identifier(c, allow_system_path) for c in components):
44
+ raise excs.Error(f'Invalid path: {path}')
45
+
46
+ if version is not None and not allow_versioned_path:
47
+ raise excs.Error(f'Versioned path not allowed here: {path}')
48
+
49
+ assert len(components) > 0
50
+ return Path(components, version)
18
51
 
19
52
  @property
20
53
  def len(self) -> int:
@@ -22,7 +55,6 @@ class Path:
22
55
 
23
56
  @property
24
57
  def name(self) -> str:
25
- assert len(self.components) > 0
26
58
  return self.components[-1]
27
59
 
28
60
  @property
@@ -36,18 +68,15 @@ class Path:
36
68
  @property
37
69
  def parent(self) -> Path:
38
70
  if len(self.components) == 1:
39
- if self.is_root:
40
- return self
41
- else:
42
- return Path('', empty_is_valid=True, allow_system_paths=True)
71
+ return ROOT_PATH # Includes the case of the root path, which is its own parent.
43
72
  else:
44
- return Path('.'.join(self.components[:-1]), allow_system_paths=True)
73
+ return Path(self.components[:-1])
45
74
 
46
75
  def append(self, name: str) -> Path:
47
76
  if self.is_root:
48
- return Path(name, allow_system_paths=True)
77
+ return Path([name])
49
78
  else:
50
- return Path(f'{self}.{name}', allow_system_paths=True)
79
+ return Path([*self.components, name])
51
80
 
52
81
  def is_ancestor(self, other: Path, is_parent: bool = False) -> bool:
53
82
  """
@@ -60,22 +89,31 @@ class Path:
60
89
  is_prefix = self.components == other.components[: self.len]
61
90
  return is_prefix and (self.len == (other.len - 1) or not is_parent)
62
91
 
63
- def ancestors(self) -> Iterator[Path]:
92
+ def ancestors(self) -> list[Path]:
64
93
  """
65
- Return all ancestors of this path in top-down order including root.
94
+ Return all proper ancestors of this path in top-down order including root.
66
95
  If this path is for the root directory, which has no parent, then None is returned.
67
96
  """
68
97
  if self.is_root:
69
- return
98
+ return []
70
99
  else:
71
- for i in range(0, len(self.components)):
72
- yield Path('.'.join(self.components[0:i]), empty_is_valid=True)
100
+ return [Path(self.components[:i]) if i > 0 else ROOT_PATH for i in range(len(self.components))]
73
101
 
74
102
  def __repr__(self) -> str:
75
103
  return repr(str(self))
76
104
 
77
105
  def __str__(self) -> str:
78
- return '.'.join(self.components)
106
+ base = '.'.join(self.components)
107
+ if self.version is not None:
108
+ return f'{base}:{self.version}'
109
+ else:
110
+ return base
111
+
112
+ def __eq__(self, other: object) -> bool:
113
+ return isinstance(other, Path) and str(self) == str(other)
114
+
115
+ def __hash__(self) -> int:
116
+ return hash(str(self))
117
+
79
118
 
80
- def __lt__(self, other: Path) -> bool:
81
- return str(self) < str(other)
119
+ ROOT_PATH = Path([''])
@@ -1,5 +1,5 @@
1
1
  from abc import abstractmethod
2
- from typing import TYPE_CHECKING, Any, Optional
2
+ from typing import TYPE_CHECKING
3
3
  from uuid import UUID
4
4
 
5
5
  if TYPE_CHECKING:
@@ -14,15 +14,16 @@ class SchemaObject:
14
14
 
15
15
  _id: UUID
16
16
  _name: str
17
- _dir_id: Optional[UUID]
17
+ _dir_id: UUID | None
18
18
 
19
- def __init__(self, obj_id: UUID, name: str, dir_id: Optional[UUID]):
19
+ def __init__(self, obj_id: UUID, name: str, dir_id: UUID | None):
20
20
  # make these private so they don't collide with column names (id and name are fairly common)
21
+ assert dir_id is None or isinstance(dir_id, UUID), type(dir_id)
21
22
  self._id = obj_id
22
23
  self._name = name
23
24
  self._dir_id = dir_id
24
25
 
25
- def _parent(self) -> Optional['catalog.Dir']:
26
+ def _parent(self) -> 'catalog.Dir | None':
26
27
  """Returns the parent directory of this schema object."""
27
28
  from .catalog import Catalog
28
29
 
@@ -40,18 +41,16 @@ class SchemaObject:
40
41
  path = Catalog.get().get_dir_path(self._dir_id)
41
42
  return str(path.append(self._name))
42
43
 
43
- def get_metadata(self) -> dict[str, Any]:
44
- """Returns metadata associated with this schema object."""
45
- return {'name': self._name, 'path': self._path()}
46
-
47
- @classmethod
48
44
  @abstractmethod
49
- def _display_name(cls) -> str:
45
+ def _display_name(self) -> str:
50
46
  """
51
47
  Return name displayed in error messages.
52
48
  """
53
49
  pass
54
50
 
51
+ def _display_str(self) -> str:
52
+ return f'{self._display_name()} {self._path()!r}'
53
+
55
54
  def _move(self, new_name: str, new_dir_id: UUID) -> None:
56
55
  """Subclasses need to override this to make the change persistent"""
57
56
  self._name = new_name