pixeltable 0.3.14__py3-none-any.whl → 0.5.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (220) hide show
  1. pixeltable/__init__.py +42 -8
  2. pixeltable/{dataframe.py → _query.py} +470 -206
  3. pixeltable/_version.py +1 -0
  4. pixeltable/catalog/__init__.py +5 -4
  5. pixeltable/catalog/catalog.py +1785 -432
  6. pixeltable/catalog/column.py +190 -113
  7. pixeltable/catalog/dir.py +2 -4
  8. pixeltable/catalog/globals.py +19 -46
  9. pixeltable/catalog/insertable_table.py +191 -98
  10. pixeltable/catalog/path.py +63 -23
  11. pixeltable/catalog/schema_object.py +11 -15
  12. pixeltable/catalog/table.py +843 -436
  13. pixeltable/catalog/table_metadata.py +103 -0
  14. pixeltable/catalog/table_version.py +978 -657
  15. pixeltable/catalog/table_version_handle.py +72 -16
  16. pixeltable/catalog/table_version_path.py +112 -43
  17. pixeltable/catalog/tbl_ops.py +53 -0
  18. pixeltable/catalog/update_status.py +191 -0
  19. pixeltable/catalog/view.py +134 -90
  20. pixeltable/config.py +134 -22
  21. pixeltable/env.py +471 -157
  22. pixeltable/exceptions.py +6 -0
  23. pixeltable/exec/__init__.py +4 -1
  24. pixeltable/exec/aggregation_node.py +7 -8
  25. pixeltable/exec/cache_prefetch_node.py +83 -110
  26. pixeltable/exec/cell_materialization_node.py +268 -0
  27. pixeltable/exec/cell_reconstruction_node.py +168 -0
  28. pixeltable/exec/component_iteration_node.py +4 -3
  29. pixeltable/exec/data_row_batch.py +8 -65
  30. pixeltable/exec/exec_context.py +16 -4
  31. pixeltable/exec/exec_node.py +13 -36
  32. pixeltable/exec/expr_eval/evaluators.py +11 -7
  33. pixeltable/exec/expr_eval/expr_eval_node.py +27 -12
  34. pixeltable/exec/expr_eval/globals.py +8 -5
  35. pixeltable/exec/expr_eval/row_buffer.py +1 -2
  36. pixeltable/exec/expr_eval/schedulers.py +106 -56
  37. pixeltable/exec/globals.py +35 -0
  38. pixeltable/exec/in_memory_data_node.py +19 -19
  39. pixeltable/exec/object_store_save_node.py +293 -0
  40. pixeltable/exec/row_update_node.py +16 -9
  41. pixeltable/exec/sql_node.py +351 -84
  42. pixeltable/exprs/__init__.py +1 -1
  43. pixeltable/exprs/arithmetic_expr.py +27 -22
  44. pixeltable/exprs/array_slice.py +3 -3
  45. pixeltable/exprs/column_property_ref.py +36 -23
  46. pixeltable/exprs/column_ref.py +213 -89
  47. pixeltable/exprs/comparison.py +5 -5
  48. pixeltable/exprs/compound_predicate.py +5 -4
  49. pixeltable/exprs/data_row.py +164 -54
  50. pixeltable/exprs/expr.py +70 -44
  51. pixeltable/exprs/expr_dict.py +3 -3
  52. pixeltable/exprs/expr_set.py +17 -10
  53. pixeltable/exprs/function_call.py +100 -40
  54. pixeltable/exprs/globals.py +2 -2
  55. pixeltable/exprs/in_predicate.py +4 -4
  56. pixeltable/exprs/inline_expr.py +18 -32
  57. pixeltable/exprs/is_null.py +7 -3
  58. pixeltable/exprs/json_mapper.py +8 -8
  59. pixeltable/exprs/json_path.py +56 -22
  60. pixeltable/exprs/literal.py +27 -5
  61. pixeltable/exprs/method_ref.py +2 -2
  62. pixeltable/exprs/object_ref.py +2 -2
  63. pixeltable/exprs/row_builder.py +167 -67
  64. pixeltable/exprs/rowid_ref.py +25 -10
  65. pixeltable/exprs/similarity_expr.py +58 -40
  66. pixeltable/exprs/sql_element_cache.py +4 -4
  67. pixeltable/exprs/string_op.py +5 -5
  68. pixeltable/exprs/type_cast.py +3 -5
  69. pixeltable/func/__init__.py +1 -0
  70. pixeltable/func/aggregate_function.py +8 -8
  71. pixeltable/func/callable_function.py +9 -9
  72. pixeltable/func/expr_template_function.py +17 -11
  73. pixeltable/func/function.py +18 -20
  74. pixeltable/func/function_registry.py +6 -7
  75. pixeltable/func/globals.py +2 -3
  76. pixeltable/func/mcp.py +74 -0
  77. pixeltable/func/query_template_function.py +29 -27
  78. pixeltable/func/signature.py +46 -19
  79. pixeltable/func/tools.py +31 -13
  80. pixeltable/func/udf.py +18 -20
  81. pixeltable/functions/__init__.py +16 -0
  82. pixeltable/functions/anthropic.py +123 -77
  83. pixeltable/functions/audio.py +147 -10
  84. pixeltable/functions/bedrock.py +13 -6
  85. pixeltable/functions/date.py +7 -4
  86. pixeltable/functions/deepseek.py +35 -43
  87. pixeltable/functions/document.py +81 -0
  88. pixeltable/functions/fal.py +76 -0
  89. pixeltable/functions/fireworks.py +11 -20
  90. pixeltable/functions/gemini.py +195 -39
  91. pixeltable/functions/globals.py +142 -14
  92. pixeltable/functions/groq.py +108 -0
  93. pixeltable/functions/huggingface.py +1056 -24
  94. pixeltable/functions/image.py +115 -57
  95. pixeltable/functions/json.py +1 -1
  96. pixeltable/functions/llama_cpp.py +28 -13
  97. pixeltable/functions/math.py +67 -5
  98. pixeltable/functions/mistralai.py +18 -55
  99. pixeltable/functions/net.py +70 -0
  100. pixeltable/functions/ollama.py +20 -13
  101. pixeltable/functions/openai.py +240 -226
  102. pixeltable/functions/openrouter.py +143 -0
  103. pixeltable/functions/replicate.py +4 -4
  104. pixeltable/functions/reve.py +250 -0
  105. pixeltable/functions/string.py +239 -69
  106. pixeltable/functions/timestamp.py +16 -16
  107. pixeltable/functions/together.py +24 -84
  108. pixeltable/functions/twelvelabs.py +188 -0
  109. pixeltable/functions/util.py +6 -1
  110. pixeltable/functions/uuid.py +30 -0
  111. pixeltable/functions/video.py +1515 -107
  112. pixeltable/functions/vision.py +8 -8
  113. pixeltable/functions/voyageai.py +289 -0
  114. pixeltable/functions/whisper.py +16 -8
  115. pixeltable/functions/whisperx.py +179 -0
  116. pixeltable/{ext/functions → functions}/yolox.py +2 -4
  117. pixeltable/globals.py +362 -115
  118. pixeltable/index/base.py +17 -21
  119. pixeltable/index/btree.py +28 -22
  120. pixeltable/index/embedding_index.py +100 -118
  121. pixeltable/io/__init__.py +4 -2
  122. pixeltable/io/datarows.py +8 -7
  123. pixeltable/io/external_store.py +56 -105
  124. pixeltable/io/fiftyone.py +13 -13
  125. pixeltable/io/globals.py +31 -30
  126. pixeltable/io/hf_datasets.py +61 -16
  127. pixeltable/io/label_studio.py +74 -70
  128. pixeltable/io/lancedb.py +3 -0
  129. pixeltable/io/pandas.py +21 -12
  130. pixeltable/io/parquet.py +25 -105
  131. pixeltable/io/table_data_conduit.py +250 -123
  132. pixeltable/io/utils.py +4 -4
  133. pixeltable/iterators/__init__.py +2 -1
  134. pixeltable/iterators/audio.py +26 -25
  135. pixeltable/iterators/base.py +9 -3
  136. pixeltable/iterators/document.py +112 -78
  137. pixeltable/iterators/image.py +12 -15
  138. pixeltable/iterators/string.py +11 -4
  139. pixeltable/iterators/video.py +523 -120
  140. pixeltable/metadata/__init__.py +14 -3
  141. pixeltable/metadata/converters/convert_13.py +2 -2
  142. pixeltable/metadata/converters/convert_18.py +2 -2
  143. pixeltable/metadata/converters/convert_19.py +2 -2
  144. pixeltable/metadata/converters/convert_20.py +2 -2
  145. pixeltable/metadata/converters/convert_21.py +2 -2
  146. pixeltable/metadata/converters/convert_22.py +2 -2
  147. pixeltable/metadata/converters/convert_24.py +2 -2
  148. pixeltable/metadata/converters/convert_25.py +2 -2
  149. pixeltable/metadata/converters/convert_26.py +2 -2
  150. pixeltable/metadata/converters/convert_29.py +4 -4
  151. pixeltable/metadata/converters/convert_30.py +34 -21
  152. pixeltable/metadata/converters/convert_34.py +2 -2
  153. pixeltable/metadata/converters/convert_35.py +9 -0
  154. pixeltable/metadata/converters/convert_36.py +38 -0
  155. pixeltable/metadata/converters/convert_37.py +15 -0
  156. pixeltable/metadata/converters/convert_38.py +39 -0
  157. pixeltable/metadata/converters/convert_39.py +124 -0
  158. pixeltable/metadata/converters/convert_40.py +73 -0
  159. pixeltable/metadata/converters/convert_41.py +12 -0
  160. pixeltable/metadata/converters/convert_42.py +9 -0
  161. pixeltable/metadata/converters/convert_43.py +44 -0
  162. pixeltable/metadata/converters/util.py +20 -31
  163. pixeltable/metadata/notes.py +9 -0
  164. pixeltable/metadata/schema.py +140 -53
  165. pixeltable/metadata/utils.py +74 -0
  166. pixeltable/mypy/__init__.py +3 -0
  167. pixeltable/mypy/mypy_plugin.py +123 -0
  168. pixeltable/plan.py +382 -115
  169. pixeltable/share/__init__.py +1 -1
  170. pixeltable/share/packager.py +547 -83
  171. pixeltable/share/protocol/__init__.py +33 -0
  172. pixeltable/share/protocol/common.py +165 -0
  173. pixeltable/share/protocol/operation_types.py +33 -0
  174. pixeltable/share/protocol/replica.py +119 -0
  175. pixeltable/share/publish.py +257 -59
  176. pixeltable/store.py +311 -194
  177. pixeltable/type_system.py +373 -211
  178. pixeltable/utils/__init__.py +2 -3
  179. pixeltable/utils/arrow.py +131 -17
  180. pixeltable/utils/av.py +298 -0
  181. pixeltable/utils/azure_store.py +346 -0
  182. pixeltable/utils/coco.py +6 -6
  183. pixeltable/utils/code.py +3 -3
  184. pixeltable/utils/console_output.py +4 -1
  185. pixeltable/utils/coroutine.py +6 -23
  186. pixeltable/utils/dbms.py +32 -6
  187. pixeltable/utils/description_helper.py +4 -5
  188. pixeltable/utils/documents.py +7 -18
  189. pixeltable/utils/exception_handler.py +7 -30
  190. pixeltable/utils/filecache.py +6 -6
  191. pixeltable/utils/formatter.py +86 -48
  192. pixeltable/utils/gcs_store.py +295 -0
  193. pixeltable/utils/http.py +133 -0
  194. pixeltable/utils/http_server.py +2 -3
  195. pixeltable/utils/iceberg.py +1 -2
  196. pixeltable/utils/image.py +17 -0
  197. pixeltable/utils/lancedb.py +90 -0
  198. pixeltable/utils/local_store.py +322 -0
  199. pixeltable/utils/misc.py +5 -0
  200. pixeltable/utils/object_stores.py +573 -0
  201. pixeltable/utils/pydantic.py +60 -0
  202. pixeltable/utils/pytorch.py +5 -6
  203. pixeltable/utils/s3_store.py +527 -0
  204. pixeltable/utils/sql.py +26 -0
  205. pixeltable/utils/system.py +30 -0
  206. pixeltable-0.5.7.dist-info/METADATA +579 -0
  207. pixeltable-0.5.7.dist-info/RECORD +227 -0
  208. {pixeltable-0.3.14.dist-info → pixeltable-0.5.7.dist-info}/WHEEL +1 -1
  209. pixeltable-0.5.7.dist-info/entry_points.txt +2 -0
  210. pixeltable/__version__.py +0 -3
  211. pixeltable/catalog/named_function.py +0 -40
  212. pixeltable/ext/__init__.py +0 -17
  213. pixeltable/ext/functions/__init__.py +0 -11
  214. pixeltable/ext/functions/whisperx.py +0 -77
  215. pixeltable/utils/media_store.py +0 -77
  216. pixeltable/utils/s3.py +0 -17
  217. pixeltable-0.3.14.dist-info/METADATA +0 -434
  218. pixeltable-0.3.14.dist-info/RECORD +0 -186
  219. pixeltable-0.3.14.dist-info/entry_points.txt +0 -3
  220. {pixeltable-0.3.14.dist-info → pixeltable-0.5.7.dist-info/licenses}/LICENSE +0 -0
@@ -2,19 +2,25 @@ from __future__ import annotations
2
2
 
3
3
  import enum
4
4
  import logging
5
- from typing import TYPE_CHECKING, Any, Literal, Optional, overload
5
+ from typing import TYPE_CHECKING, Any, Literal, Sequence, cast, overload
6
6
  from uuid import UUID
7
7
 
8
+ import pydantic
9
+ import pydantic_core
10
+
8
11
  import pixeltable as pxt
9
12
  from pixeltable import exceptions as excs, type_system as ts
10
13
  from pixeltable.env import Env
11
14
  from pixeltable.utils.filecache import FileCache
15
+ from pixeltable.utils.pydantic import is_json_convertible
12
16
 
13
- from .globals import MediaValidation, UpdateStatus
17
+ from .globals import MediaValidation
14
18
  from .table import Table
15
- from .table_version import TableVersion
19
+ from .table_version import TableVersion, TableVersionMd
16
20
  from .table_version_handle import TableVersionHandle
17
21
  from .table_version_path import TableVersionPath
22
+ from .tbl_ops import CreateStoreTableOp, TableOp
23
+ from .update_status import UpdateStatus
18
24
 
19
25
  if TYPE_CHECKING:
20
26
  from pixeltable import exprs
@@ -51,72 +57,63 @@ class InsertableTable(Table):
51
57
  def __init__(self, dir_id: UUID, tbl_version: TableVersionHandle):
52
58
  tbl_version_path = TableVersionPath(tbl_version)
53
59
  super().__init__(tbl_version.id, dir_id, tbl_version.get().name, tbl_version_path)
60
+ self._tbl_version = tbl_version
54
61
 
55
- @classmethod
56
- def _display_name(cls) -> str:
62
+ def _display_name(self) -> str:
63
+ assert not self._tbl_version_path.is_replica()
57
64
  return 'table'
58
65
 
59
66
  @classmethod
60
67
  def _create(
61
68
  cls,
62
- dir_id: UUID,
63
69
  name: str,
64
70
  schema: dict[str, ts.ColumnType],
65
- df: Optional[pxt.DataFrame],
66
71
  primary_key: list[str],
67
72
  num_retained_versions: int,
68
73
  comment: str,
69
74
  media_validation: MediaValidation,
70
- ) -> InsertableTable:
75
+ create_default_idxs: bool,
76
+ ) -> tuple[TableVersionMd, list[TableOp]]:
71
77
  columns = cls._create_columns(schema)
72
78
  cls._verify_schema(columns)
73
79
  column_names = [col.name for col in columns]
74
80
  for pk_col in primary_key:
75
81
  if pk_col not in column_names:
76
- raise excs.Error(f'Primary key column {pk_col} not found in table schema')
82
+ raise excs.Error(f'Primary key column {pk_col!r} not found in table schema.')
77
83
  col = columns[column_names.index(pk_col)]
78
84
  if col.col_type.nullable:
79
- raise excs.Error(f'Primary key column {pk_col} cannot be nullable')
85
+ raise excs.Error(f'Primary key column {pk_col!r} cannot be nullable.')
80
86
  col.is_pk = True
81
87
 
82
- _, tbl_version = TableVersion.create(
83
- dir_id,
88
+ md = TableVersion.create_initial_md(
84
89
  name,
85
90
  columns,
86
- num_retained_versions=num_retained_versions,
87
- comment=comment,
88
- media_validation=media_validation,
91
+ num_retained_versions,
92
+ comment,
93
+ media_validation,
94
+ create_default_idxs=create_default_idxs,
95
+ view_md=None,
89
96
  )
90
- tbl = cls(dir_id, TableVersionHandle.create(tbl_version))
91
- # TODO We need to commit before doing the insertion, in order to avoid a primary key (version) collision
92
- # when the table metadata gets updated. Once we have a notion of user-defined transactions in
93
- # Pixeltable, we can wrap the create/insert in a transaction to avoid this.
94
- session = Env.get().session
95
- session.commit()
96
- if df is not None:
97
- # A DataFrame was provided, so insert its contents into the table
98
- # (using the same DB session as the table creation)
99
- tbl_version.insert(None, df, fail_on_exception=True)
100
- session.commit()
101
-
102
- _logger.info(f'Created table `{name}`, id={tbl_version.id}')
103
- Env.get().console_logger.info(f'Created table `{name}`.')
104
- return tbl
105
-
106
- def get_metadata(self) -> dict[str, Any]:
107
- md = super().get_metadata()
108
- md['is_view'] = False
109
- md['is_snapshot'] = False
110
- return md
97
+
98
+ ops = [
99
+ TableOp(
100
+ tbl_id=md.tbl_md.tbl_id,
101
+ op_sn=0,
102
+ num_ops=1,
103
+ needs_xact=False,
104
+ create_store_table_op=CreateStoreTableOp(),
105
+ )
106
+ ]
107
+ return md, ops
111
108
 
112
109
  @overload
113
110
  def insert(
114
111
  self,
115
- source: Optional[TableDataSource] = None,
112
+ source: TableDataSource | None = None,
116
113
  /,
117
114
  *,
118
- source_format: Optional[Literal['csv', 'excel', 'parquet', 'json']] = None,
119
- schema_overrides: Optional[dict[str, ts.ColumnType]] = None,
115
+ source_format: Literal['csv', 'excel', 'parquet', 'json'] | None = None,
116
+ schema_overrides: dict[str, ts.ColumnType] | None = None,
120
117
  on_error: Literal['abort', 'ignore'] = 'abort',
121
118
  print_stats: bool = False,
122
119
  **kwargs: Any,
@@ -129,54 +126,73 @@ class InsertableTable(Table):
129
126
 
130
127
  def insert(
131
128
  self,
132
- source: Optional[TableDataSource] = None,
129
+ source: TableDataSource | None = None,
133
130
  /,
134
131
  *,
135
- source_format: Optional[Literal['csv', 'excel', 'parquet', 'json']] = None,
136
- schema_overrides: Optional[dict[str, ts.ColumnType]] = None,
132
+ source_format: Literal['csv', 'excel', 'parquet', 'json'] | None = None,
133
+ schema_overrides: dict[str, ts.ColumnType] | None = None,
137
134
  on_error: Literal['abort', 'ignore'] = 'abort',
138
135
  print_stats: bool = False,
139
136
  **kwargs: Any,
140
137
  ) -> UpdateStatus:
138
+ from pixeltable.catalog import Catalog
141
139
  from pixeltable.io.table_data_conduit import UnkTableDataConduit
142
140
 
143
- table = self
144
- if source is None:
145
- source = [kwargs]
146
- kwargs = None
141
+ if source is not None and isinstance(source, Sequence) and len(source) == 0:
142
+ raise excs.Error('Cannot insert an empty sequence.')
143
+ fail_on_exception = OnErrorParameter.fail_on_exception(on_error)
147
144
 
148
- tds = UnkTableDataConduit(
149
- source, source_format=source_format, src_schema_overrides=schema_overrides, extra_fields=kwargs
150
- )
151
- data_source = tds.specialize()
152
- if data_source.source_column_map is None:
153
- data_source.src_pk = []
145
+ with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=True):
146
+ table = self
154
147
 
155
- assert isinstance(table, Table)
156
- data_source.add_table_info(table)
157
- data_source.prepare_for_insert_into_table()
148
+ # TODO: unify with TableDataConduit
149
+ if source is not None and isinstance(source, Sequence) and isinstance(source[0], pydantic.BaseModel):
150
+ status = self._insert_pydantic(
151
+ cast(Sequence[pydantic.BaseModel], source), # needed for mypy
152
+ print_stats=print_stats,
153
+ fail_on_exception=fail_on_exception,
154
+ )
155
+ Env.get().console_logger.info(status.insert_msg)
156
+ FileCache.get().emit_eviction_warnings()
157
+ return status
158
158
 
159
- fail_on_exception = OnErrorParameter.fail_on_exception(on_error)
160
- return table.insert_table_data_source(
161
- data_source=data_source, fail_on_exception=fail_on_exception, print_stats=print_stats
162
- )
159
+ if source is None:
160
+ source = [kwargs]
161
+ kwargs = None
162
+
163
+ tds = UnkTableDataConduit(
164
+ source, source_format=source_format, src_schema_overrides=schema_overrides, extra_fields=kwargs
165
+ )
166
+ data_source = tds.specialize()
167
+ if data_source.source_column_map is None:
168
+ data_source.src_pk = []
169
+
170
+ assert isinstance(table, Table)
171
+ data_source.add_table_info(table)
172
+ data_source.prepare_for_insert_into_table()
173
+
174
+ return table.insert_table_data_source(
175
+ data_source=data_source, fail_on_exception=fail_on_exception, print_stats=print_stats
176
+ )
163
177
 
164
178
  def insert_table_data_source(
165
179
  self, data_source: TableDataConduit, fail_on_exception: bool, print_stats: bool = False
166
180
  ) -> pxt.UpdateStatus:
167
181
  """Insert row batches into this table from a `TableDataConduit`."""
168
- from pixeltable.io.table_data_conduit import DFTableDataConduit
182
+ from pixeltable.catalog import Catalog
183
+ from pixeltable.io.table_data_conduit import QueryTableDataConduit
169
184
 
170
- status = pxt.UpdateStatus()
171
- with Env.get().begin_xact():
172
- if isinstance(data_source, DFTableDataConduit):
185
+ with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=True):
186
+ if isinstance(data_source, QueryTableDataConduit):
187
+ status = pxt.UpdateStatus()
173
188
  status += self._tbl_version.get().insert(
174
- rows=None, df=data_source.pxt_df, print_stats=print_stats, fail_on_exception=fail_on_exception
189
+ rows=None, query=data_source.pxt_query, print_stats=print_stats, fail_on_exception=fail_on_exception
175
190
  )
176
191
  else:
192
+ status = pxt.UpdateStatus()
177
193
  for row_batch in data_source.valid_row_batch():
178
194
  status += self._tbl_version.get().insert(
179
- rows=row_batch, df=None, print_stats=print_stats, fail_on_exception=fail_on_exception
195
+ rows=row_batch, query=None, print_stats=print_stats, fail_on_exception=fail_on_exception
180
196
  )
181
197
 
182
198
  Env.get().console_logger.info(status.insert_msg)
@@ -184,34 +200,110 @@ class InsertableTable(Table):
184
200
  FileCache.get().emit_eviction_warnings()
185
201
  return status
186
202
 
187
- def _validate_input_rows(self, rows: list[dict[str, Any]]) -> None:
188
- """Verify that the input rows match the table schema"""
189
- valid_col_names = set(self._schema.keys())
190
- reqd_col_names = set(self._tbl_version_path.tbl_version.get().get_required_col_names())
191
- computed_col_names = set(self._tbl_version_path.tbl_version.get().get_computed_col_names())
192
- for row in rows:
193
- assert isinstance(row, dict)
194
- col_names = set(row.keys())
195
- if len(reqd_col_names - col_names) > 0:
196
- raise excs.Error(f'Missing required column(s) ({", ".join(reqd_col_names - col_names)}) in row {row}')
197
-
198
- for col_name, val in row.items():
199
- if col_name not in valid_col_names:
200
- raise excs.Error(f'Unknown column name {col_name} in row {row}')
201
- if col_name in computed_col_names:
202
- raise excs.Error(f'Value for computed column {col_name} in row {row}')
203
-
204
- # validate data
205
- col = self._tbl_version_path.get_column(col_name)
206
- try:
207
- # basic sanity checks here
208
- checked_val = col.col_type.create_literal(val)
209
- row[col_name] = checked_val
210
- except TypeError as e:
211
- msg = str(e)
212
- raise excs.Error(f'Error in column {col.name}: {msg[0].lower() + msg[1:]}\nRow: {row}') from e
213
-
214
- def delete(self, where: Optional['exprs.Expr'] = None) -> UpdateStatus:
203
+ def _insert_pydantic(
204
+ self, rows: Sequence[pydantic.BaseModel], print_stats: bool = False, fail_on_exception: bool = True
205
+ ) -> UpdateStatus:
206
+ model_class = type(rows[0])
207
+ self._validate_pydantic_model(model_class)
208
+ # convert rows one-by-one in order to be able to print meaningful error messages
209
+ pxt_rows: list[dict[str, Any]] = []
210
+ for i, row in enumerate(rows):
211
+ try:
212
+ pxt_rows.append(row.model_dump(mode='json'))
213
+ except pydantic_core.PydanticSerializationError as e:
214
+ raise excs.Error(f'Row {i}: error serializing pydantic model to JSON:\n{e}') from e
215
+
216
+ # explicitly check that all required columns are present and non-None in the rows,
217
+ # because we ignore nullability when validating the pydantic model
218
+ reqd_col_names = [col.name for col in self._tbl_version_path.columns() if col.is_required_for_insert]
219
+ for i, pxt_row in enumerate(pxt_rows):
220
+ if type(rows[i]) is not model_class:
221
+ raise excs.Error(
222
+ f'Expected an instance of `{model_class.__name__}`; got `{type(rows[i]).__name__}` (in row {i})'
223
+ )
224
+ for col_name in reqd_col_names:
225
+ if pxt_row.get(col_name) is None:
226
+ raise excs.Error(f'Missing required column {col_name!r} in row {i}')
227
+
228
+ status = self._tbl_version.get().insert(
229
+ rows=pxt_rows, query=None, print_stats=print_stats, fail_on_exception=fail_on_exception
230
+ )
231
+ return status
232
+
233
+ def _validate_pydantic_model(self, model: type[pydantic.BaseModel]) -> None:
234
+ """
235
+ Check if a Pydantic model is compatible with this table for insert operations.
236
+
237
+ A model is compatible if:
238
+ - All required table columns have corresponding model fields with compatible types
239
+ - Model does not define fields for computed columns
240
+ - Model field types are compatible with table column types
241
+ """
242
+ assert isinstance(model, type) and issubclass(model, pydantic.BaseModel)
243
+
244
+ schema = self._get_schema()
245
+ required_cols = set(self._tbl_version.get().get_required_col_names())
246
+ computed_cols = set(self._tbl_version.get().get_computed_col_names())
247
+ model_fields = model.model_fields
248
+ model_field_names = set(model_fields.keys())
249
+
250
+ missing_required = required_cols - model_field_names
251
+ if missing_required:
252
+ raise excs.Error(
253
+ f'Pydantic model `{model.__name__}` is missing required columns: ' + ', '.join(missing_required)
254
+ )
255
+
256
+ computed_in_model = computed_cols & model_field_names
257
+ if computed_in_model:
258
+ raise excs.Error(
259
+ f'Pydantic model `{model.__name__}` has fields for computed columns: ' + ', '.join(computed_in_model)
260
+ )
261
+
262
+ # validate type compatibility
263
+ common_fields = model_field_names & set(schema.keys())
264
+ if len(common_fields) == 0:
265
+ raise excs.Error(
266
+ f'Pydantic model `{model.__name__}` has no fields that map to columns in table {self._name!r}'
267
+ )
268
+ for field_name in common_fields:
269
+ pxt_col_type = schema[field_name]
270
+ model_field = model_fields[field_name]
271
+ model_type = model_field.annotation
272
+
273
+ # we ignore nullability: we want to accept optional model fields for required table columns, as long as
274
+ # the model instances provide a non-null value
275
+ # allow_enum=True: model_dump(mode='json') converts enums to their values
276
+ inferred_pxt_type = ts.ColumnType.from_python_type(model_type, infer_pydantic_json=True)
277
+ if inferred_pxt_type is None:
278
+ raise excs.Error(
279
+ f'Pydantic model `{model.__name__}`: cannot infer Pixeltable type for column {field_name!r}'
280
+ )
281
+
282
+ if pxt_col_type.is_media_type():
283
+ # media types require file paths, either as str or Path
284
+ if not inferred_pxt_type.is_string_type():
285
+ raise excs.Error(
286
+ f'Column {field_name!r} requires a `str` or `Path` field in `{model.__name__}`, but it is '
287
+ f'`{model_type.__name__}`'
288
+ )
289
+ else:
290
+ if not pxt_col_type.is_supertype_of(inferred_pxt_type, ignore_nullable=True):
291
+ raise excs.Error(
292
+ f'Pydantic model `{model.__name__}` has incompatible type `{model_type.__name__}` '
293
+ f'for column {field_name!r} (of Pixeltable type `{pxt_col_type}`)'
294
+ )
295
+
296
+ if (
297
+ isinstance(model_type, type)
298
+ and issubclass(model_type, pydantic.BaseModel)
299
+ and not is_json_convertible(model_type)
300
+ ):
301
+ raise excs.Error(
302
+ f'Pydantic model `{model.__name__}` has field {field_name!r} with nested model '
303
+ f'`{model_type.__name__}`, which is not JSON-convertible'
304
+ )
305
+
306
+ def delete(self, where: 'exprs.Expr' | None = None) -> UpdateStatus:
215
307
  """Delete rows in this table.
216
308
 
217
309
  Args:
@@ -226,16 +318,17 @@ class InsertableTable(Table):
226
318
 
227
319
  >>> tbl.delete(tbl.a > 5)
228
320
  """
229
- with Env.get().begin_xact():
321
+ from pixeltable.catalog import Catalog
322
+
323
+ with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=True):
230
324
  return self._tbl_version.get().delete(where=where)
231
325
 
232
- @property
233
- def _base_table(self) -> Optional['Table']:
326
+ def _get_base_table(self) -> 'Table' | None:
234
327
  return None
235
328
 
236
329
  @property
237
- def _effective_base_versions(self) -> list[Optional[int]]:
330
+ def _effective_base_versions(self) -> list[int | None]:
238
331
  return []
239
332
 
240
333
  def _table_descriptor(self) -> str:
241
- return f'Table {self._path!r}'
334
+ return self._display_str()
@@ -1,20 +1,55 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import logging
4
- from typing import Iterator
4
+ from typing import NamedTuple
5
5
 
6
6
  from pixeltable import exceptions as excs
7
7
 
8
- from .globals import is_valid_path
8
+ from .globals import is_valid_identifier
9
9
 
10
10
  _logger = logging.getLogger('pixeltable')
11
11
 
12
12
 
13
- class Path:
14
- def __init__(self, path: str, empty_is_valid: bool = False, allow_system_paths: bool = False):
15
- if not is_valid_path(path, empty_is_valid, allow_system_paths):
16
- raise excs.Error(f"Invalid path format: '{path}'")
17
- self.components = path.split('.')
13
+ class Path(NamedTuple):
14
+ components: list[str]
15
+ version: int | None = None
16
+
17
+ @classmethod
18
+ def parse(
19
+ cls,
20
+ path: str,
21
+ allow_empty_path: bool = False,
22
+ allow_system_path: bool = False,
23
+ allow_versioned_path: bool = False,
24
+ ) -> Path:
25
+ components: list[str]
26
+ version: int | None
27
+ if ':' in path:
28
+ parts = path.split(':')
29
+ if len(parts) != 2:
30
+ raise excs.Error(f'Invalid path: {path}')
31
+ try:
32
+ components = parts[0].split('.')
33
+ version = int(parts[1])
34
+ except ValueError:
35
+ raise excs.Error(f'Invalid path: {path}') from None
36
+ else:
37
+ components = path.split('.')
38
+ version = None
39
+
40
+ if components == [''] and not allow_empty_path:
41
+ raise excs.Error(f'Invalid path: {path}')
42
+
43
+ if components != [''] and not all(
44
+ is_valid_identifier(c, allow_system_identifiers=allow_system_path, allow_hyphens=True) for c in components
45
+ ):
46
+ raise excs.Error(f'Invalid path: {path}')
47
+
48
+ if version is not None and not allow_versioned_path:
49
+ raise excs.Error(f'Versioned path not allowed here: {path}')
50
+
51
+ assert len(components) > 0
52
+ return Path(components, version)
18
53
 
19
54
  @property
20
55
  def len(self) -> int:
@@ -22,7 +57,6 @@ class Path:
22
57
 
23
58
  @property
24
59
  def name(self) -> str:
25
- assert len(self.components) > 0
26
60
  return self.components[-1]
27
61
 
28
62
  @property
@@ -36,18 +70,15 @@ class Path:
36
70
  @property
37
71
  def parent(self) -> Path:
38
72
  if len(self.components) == 1:
39
- if self.is_root:
40
- return self
41
- else:
42
- return Path('', empty_is_valid=True, allow_system_paths=True)
73
+ return ROOT_PATH # Includes the case of the root path, which is its own parent.
43
74
  else:
44
- return Path('.'.join(self.components[:-1]), allow_system_paths=True)
75
+ return Path(self.components[:-1])
45
76
 
46
77
  def append(self, name: str) -> Path:
47
78
  if self.is_root:
48
- return Path(name, allow_system_paths=True)
79
+ return Path([name])
49
80
  else:
50
- return Path(f'{self}.{name}', allow_system_paths=True)
81
+ return Path([*self.components, name])
51
82
 
52
83
  def is_ancestor(self, other: Path, is_parent: bool = False) -> bool:
53
84
  """
@@ -60,22 +91,31 @@ class Path:
60
91
  is_prefix = self.components == other.components[: self.len]
61
92
  return is_prefix and (self.len == (other.len - 1) or not is_parent)
62
93
 
63
- def ancestors(self) -> Iterator[Path]:
94
+ def ancestors(self) -> list[Path]:
64
95
  """
65
- Return all ancestors of this path in top-down order including root.
96
+ Return all proper ancestors of this path in top-down order including root.
66
97
  If this path is for the root directory, which has no parent, then None is returned.
67
98
  """
68
99
  if self.is_root:
69
- return
100
+ return []
70
101
  else:
71
- for i in range(0, len(self.components)):
72
- yield Path('.'.join(self.components[0:i]), empty_is_valid=True)
102
+ return [Path(self.components[:i]) if i > 0 else ROOT_PATH for i in range(len(self.components))]
73
103
 
74
104
  def __repr__(self) -> str:
75
105
  return repr(str(self))
76
106
 
77
107
  def __str__(self) -> str:
78
- return '.'.join(self.components)
108
+ base = '.'.join(self.components)
109
+ if self.version is not None:
110
+ return f'{base}:{self.version}'
111
+ else:
112
+ return base
113
+
114
+ def __eq__(self, other: object) -> bool:
115
+ return isinstance(other, Path) and str(self) == str(other)
116
+
117
+ def __hash__(self) -> int:
118
+ return hash(str(self))
119
+
79
120
 
80
- def __lt__(self, other: Path) -> bool:
81
- return str(self) < str(other)
121
+ ROOT_PATH = Path([''])
@@ -1,9 +1,7 @@
1
1
  from abc import abstractmethod
2
- from typing import TYPE_CHECKING, Any, Optional
2
+ from typing import TYPE_CHECKING
3
3
  from uuid import UUID
4
4
 
5
- from pixeltable.env import Env
6
-
7
5
  if TYPE_CHECKING:
8
6
  from pixeltable import catalog
9
7
 
@@ -16,45 +14,43 @@ class SchemaObject:
16
14
 
17
15
  _id: UUID
18
16
  _name: str
19
- _dir_id: Optional[UUID]
17
+ _dir_id: UUID | None
20
18
 
21
- def __init__(self, obj_id: UUID, name: str, dir_id: Optional[UUID]):
19
+ def __init__(self, obj_id: UUID, name: str, dir_id: UUID | None):
22
20
  # make these private so they don't collide with column names (id and name are fairly common)
21
+ assert dir_id is None or isinstance(dir_id, UUID), type(dir_id)
23
22
  self._id = obj_id
24
23
  self._name = name
25
24
  self._dir_id = dir_id
26
25
 
27
- def _parent(self) -> Optional['catalog.Dir']:
26
+ def _parent(self) -> 'catalog.Dir | None':
28
27
  """Returns the parent directory of this schema object."""
29
28
  from .catalog import Catalog
30
29
 
31
- with Env.get().begin_xact():
30
+ with Catalog.get().begin_xact(for_write=False):
32
31
  if self._dir_id is None:
33
32
  return None
34
33
  return Catalog.get().get_dir(self._dir_id)
35
34
 
36
- @property
37
35
  def _path(self) -> str:
38
36
  """Returns the path to this schema object."""
39
37
  from .catalog import Catalog
40
38
 
41
39
  assert self._dir_id is not None
42
- with Env.get().begin_xact():
40
+ with Catalog.get().begin_xact(for_write=False):
43
41
  path = Catalog.get().get_dir_path(self._dir_id)
44
42
  return str(path.append(self._name))
45
43
 
46
- def get_metadata(self) -> dict[str, Any]:
47
- """Returns metadata associated with this schema object."""
48
- return {'name': self._name, 'path': self._path}
49
-
50
- @classmethod
51
44
  @abstractmethod
52
- def _display_name(cls) -> str:
45
+ def _display_name(self) -> str:
53
46
  """
54
47
  Return name displayed in error messages.
55
48
  """
56
49
  pass
57
50
 
51
+ def _display_str(self) -> str:
52
+ return f'{self._display_name()} {self._path()!r}'
53
+
58
54
  def _move(self, new_name: str, new_dir_id: UUID) -> None:
59
55
  """Subclasses need to override this to make the change persistent"""
60
56
  self._name = new_name