pixeltable 0.2.26__py3-none-any.whl → 0.5.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (245) hide show
  1. pixeltable/__init__.py +83 -19
  2. pixeltable/_query.py +1444 -0
  3. pixeltable/_version.py +1 -0
  4. pixeltable/catalog/__init__.py +7 -4
  5. pixeltable/catalog/catalog.py +2394 -119
  6. pixeltable/catalog/column.py +225 -104
  7. pixeltable/catalog/dir.py +38 -9
  8. pixeltable/catalog/globals.py +53 -34
  9. pixeltable/catalog/insertable_table.py +265 -115
  10. pixeltable/catalog/path.py +80 -17
  11. pixeltable/catalog/schema_object.py +28 -43
  12. pixeltable/catalog/table.py +1270 -677
  13. pixeltable/catalog/table_metadata.py +103 -0
  14. pixeltable/catalog/table_version.py +1270 -751
  15. pixeltable/catalog/table_version_handle.py +109 -0
  16. pixeltable/catalog/table_version_path.py +137 -42
  17. pixeltable/catalog/tbl_ops.py +53 -0
  18. pixeltable/catalog/update_status.py +191 -0
  19. pixeltable/catalog/view.py +251 -134
  20. pixeltable/config.py +215 -0
  21. pixeltable/env.py +736 -285
  22. pixeltable/exceptions.py +26 -2
  23. pixeltable/exec/__init__.py +7 -2
  24. pixeltable/exec/aggregation_node.py +39 -21
  25. pixeltable/exec/cache_prefetch_node.py +87 -109
  26. pixeltable/exec/cell_materialization_node.py +268 -0
  27. pixeltable/exec/cell_reconstruction_node.py +168 -0
  28. pixeltable/exec/component_iteration_node.py +25 -28
  29. pixeltable/exec/data_row_batch.py +11 -46
  30. pixeltable/exec/exec_context.py +26 -11
  31. pixeltable/exec/exec_node.py +35 -27
  32. pixeltable/exec/expr_eval/__init__.py +3 -0
  33. pixeltable/exec/expr_eval/evaluators.py +365 -0
  34. pixeltable/exec/expr_eval/expr_eval_node.py +413 -0
  35. pixeltable/exec/expr_eval/globals.py +200 -0
  36. pixeltable/exec/expr_eval/row_buffer.py +74 -0
  37. pixeltable/exec/expr_eval/schedulers.py +413 -0
  38. pixeltable/exec/globals.py +35 -0
  39. pixeltable/exec/in_memory_data_node.py +35 -27
  40. pixeltable/exec/object_store_save_node.py +293 -0
  41. pixeltable/exec/row_update_node.py +44 -29
  42. pixeltable/exec/sql_node.py +414 -115
  43. pixeltable/exprs/__init__.py +8 -5
  44. pixeltable/exprs/arithmetic_expr.py +79 -45
  45. pixeltable/exprs/array_slice.py +5 -5
  46. pixeltable/exprs/column_property_ref.py +40 -26
  47. pixeltable/exprs/column_ref.py +254 -61
  48. pixeltable/exprs/comparison.py +14 -9
  49. pixeltable/exprs/compound_predicate.py +9 -10
  50. pixeltable/exprs/data_row.py +213 -72
  51. pixeltable/exprs/expr.py +270 -104
  52. pixeltable/exprs/expr_dict.py +6 -5
  53. pixeltable/exprs/expr_set.py +20 -11
  54. pixeltable/exprs/function_call.py +383 -284
  55. pixeltable/exprs/globals.py +18 -5
  56. pixeltable/exprs/in_predicate.py +7 -7
  57. pixeltable/exprs/inline_expr.py +37 -37
  58. pixeltable/exprs/is_null.py +8 -4
  59. pixeltable/exprs/json_mapper.py +120 -54
  60. pixeltable/exprs/json_path.py +90 -60
  61. pixeltable/exprs/literal.py +61 -16
  62. pixeltable/exprs/method_ref.py +7 -6
  63. pixeltable/exprs/object_ref.py +19 -8
  64. pixeltable/exprs/row_builder.py +238 -75
  65. pixeltable/exprs/rowid_ref.py +53 -15
  66. pixeltable/exprs/similarity_expr.py +65 -50
  67. pixeltable/exprs/sql_element_cache.py +5 -5
  68. pixeltable/exprs/string_op.py +107 -0
  69. pixeltable/exprs/type_cast.py +25 -13
  70. pixeltable/exprs/variable.py +2 -2
  71. pixeltable/func/__init__.py +9 -5
  72. pixeltable/func/aggregate_function.py +197 -92
  73. pixeltable/func/callable_function.py +119 -35
  74. pixeltable/func/expr_template_function.py +101 -48
  75. pixeltable/func/function.py +375 -62
  76. pixeltable/func/function_registry.py +20 -19
  77. pixeltable/func/globals.py +6 -5
  78. pixeltable/func/mcp.py +74 -0
  79. pixeltable/func/query_template_function.py +151 -35
  80. pixeltable/func/signature.py +178 -49
  81. pixeltable/func/tools.py +164 -0
  82. pixeltable/func/udf.py +176 -53
  83. pixeltable/functions/__init__.py +44 -4
  84. pixeltable/functions/anthropic.py +226 -47
  85. pixeltable/functions/audio.py +148 -11
  86. pixeltable/functions/bedrock.py +137 -0
  87. pixeltable/functions/date.py +188 -0
  88. pixeltable/functions/deepseek.py +113 -0
  89. pixeltable/functions/document.py +81 -0
  90. pixeltable/functions/fal.py +76 -0
  91. pixeltable/functions/fireworks.py +72 -20
  92. pixeltable/functions/gemini.py +249 -0
  93. pixeltable/functions/globals.py +208 -53
  94. pixeltable/functions/groq.py +108 -0
  95. pixeltable/functions/huggingface.py +1088 -95
  96. pixeltable/functions/image.py +155 -84
  97. pixeltable/functions/json.py +8 -11
  98. pixeltable/functions/llama_cpp.py +31 -19
  99. pixeltable/functions/math.py +169 -0
  100. pixeltable/functions/mistralai.py +50 -75
  101. pixeltable/functions/net.py +70 -0
  102. pixeltable/functions/ollama.py +29 -36
  103. pixeltable/functions/openai.py +548 -160
  104. pixeltable/functions/openrouter.py +143 -0
  105. pixeltable/functions/replicate.py +15 -14
  106. pixeltable/functions/reve.py +250 -0
  107. pixeltable/functions/string.py +310 -85
  108. pixeltable/functions/timestamp.py +37 -19
  109. pixeltable/functions/together.py +77 -120
  110. pixeltable/functions/twelvelabs.py +188 -0
  111. pixeltable/functions/util.py +7 -2
  112. pixeltable/functions/uuid.py +30 -0
  113. pixeltable/functions/video.py +1528 -117
  114. pixeltable/functions/vision.py +26 -26
  115. pixeltable/functions/voyageai.py +289 -0
  116. pixeltable/functions/whisper.py +19 -10
  117. pixeltable/functions/whisperx.py +179 -0
  118. pixeltable/functions/yolox.py +112 -0
  119. pixeltable/globals.py +716 -236
  120. pixeltable/index/__init__.py +3 -1
  121. pixeltable/index/base.py +17 -21
  122. pixeltable/index/btree.py +32 -22
  123. pixeltable/index/embedding_index.py +155 -92
  124. pixeltable/io/__init__.py +12 -7
  125. pixeltable/io/datarows.py +140 -0
  126. pixeltable/io/external_store.py +83 -125
  127. pixeltable/io/fiftyone.py +24 -33
  128. pixeltable/io/globals.py +47 -182
  129. pixeltable/io/hf_datasets.py +96 -127
  130. pixeltable/io/label_studio.py +171 -156
  131. pixeltable/io/lancedb.py +3 -0
  132. pixeltable/io/pandas.py +136 -115
  133. pixeltable/io/parquet.py +40 -153
  134. pixeltable/io/table_data_conduit.py +702 -0
  135. pixeltable/io/utils.py +100 -0
  136. pixeltable/iterators/__init__.py +8 -4
  137. pixeltable/iterators/audio.py +207 -0
  138. pixeltable/iterators/base.py +9 -3
  139. pixeltable/iterators/document.py +144 -87
  140. pixeltable/iterators/image.py +17 -38
  141. pixeltable/iterators/string.py +15 -12
  142. pixeltable/iterators/video.py +523 -127
  143. pixeltable/metadata/__init__.py +33 -8
  144. pixeltable/metadata/converters/convert_10.py +2 -3
  145. pixeltable/metadata/converters/convert_13.py +2 -2
  146. pixeltable/metadata/converters/convert_15.py +15 -11
  147. pixeltable/metadata/converters/convert_16.py +4 -5
  148. pixeltable/metadata/converters/convert_17.py +4 -5
  149. pixeltable/metadata/converters/convert_18.py +4 -6
  150. pixeltable/metadata/converters/convert_19.py +6 -9
  151. pixeltable/metadata/converters/convert_20.py +3 -6
  152. pixeltable/metadata/converters/convert_21.py +6 -8
  153. pixeltable/metadata/converters/convert_22.py +3 -2
  154. pixeltable/metadata/converters/convert_23.py +33 -0
  155. pixeltable/metadata/converters/convert_24.py +55 -0
  156. pixeltable/metadata/converters/convert_25.py +19 -0
  157. pixeltable/metadata/converters/convert_26.py +23 -0
  158. pixeltable/metadata/converters/convert_27.py +29 -0
  159. pixeltable/metadata/converters/convert_28.py +13 -0
  160. pixeltable/metadata/converters/convert_29.py +110 -0
  161. pixeltable/metadata/converters/convert_30.py +63 -0
  162. pixeltable/metadata/converters/convert_31.py +11 -0
  163. pixeltable/metadata/converters/convert_32.py +15 -0
  164. pixeltable/metadata/converters/convert_33.py +17 -0
  165. pixeltable/metadata/converters/convert_34.py +21 -0
  166. pixeltable/metadata/converters/convert_35.py +9 -0
  167. pixeltable/metadata/converters/convert_36.py +38 -0
  168. pixeltable/metadata/converters/convert_37.py +15 -0
  169. pixeltable/metadata/converters/convert_38.py +39 -0
  170. pixeltable/metadata/converters/convert_39.py +124 -0
  171. pixeltable/metadata/converters/convert_40.py +73 -0
  172. pixeltable/metadata/converters/convert_41.py +12 -0
  173. pixeltable/metadata/converters/convert_42.py +9 -0
  174. pixeltable/metadata/converters/convert_43.py +44 -0
  175. pixeltable/metadata/converters/util.py +44 -18
  176. pixeltable/metadata/notes.py +21 -0
  177. pixeltable/metadata/schema.py +185 -42
  178. pixeltable/metadata/utils.py +74 -0
  179. pixeltable/mypy/__init__.py +3 -0
  180. pixeltable/mypy/mypy_plugin.py +123 -0
  181. pixeltable/plan.py +616 -225
  182. pixeltable/share/__init__.py +3 -0
  183. pixeltable/share/packager.py +797 -0
  184. pixeltable/share/protocol/__init__.py +33 -0
  185. pixeltable/share/protocol/common.py +165 -0
  186. pixeltable/share/protocol/operation_types.py +33 -0
  187. pixeltable/share/protocol/replica.py +119 -0
  188. pixeltable/share/publish.py +349 -0
  189. pixeltable/store.py +398 -232
  190. pixeltable/type_system.py +730 -267
  191. pixeltable/utils/__init__.py +40 -0
  192. pixeltable/utils/arrow.py +201 -29
  193. pixeltable/utils/av.py +298 -0
  194. pixeltable/utils/azure_store.py +346 -0
  195. pixeltable/utils/coco.py +26 -27
  196. pixeltable/utils/code.py +4 -4
  197. pixeltable/utils/console_output.py +46 -0
  198. pixeltable/utils/coroutine.py +24 -0
  199. pixeltable/utils/dbms.py +92 -0
  200. pixeltable/utils/description_helper.py +11 -12
  201. pixeltable/utils/documents.py +60 -61
  202. pixeltable/utils/exception_handler.py +36 -0
  203. pixeltable/utils/filecache.py +38 -22
  204. pixeltable/utils/formatter.py +88 -51
  205. pixeltable/utils/gcs_store.py +295 -0
  206. pixeltable/utils/http.py +133 -0
  207. pixeltable/utils/http_server.py +14 -13
  208. pixeltable/utils/iceberg.py +13 -0
  209. pixeltable/utils/image.py +17 -0
  210. pixeltable/utils/lancedb.py +90 -0
  211. pixeltable/utils/local_store.py +322 -0
  212. pixeltable/utils/misc.py +5 -0
  213. pixeltable/utils/object_stores.py +573 -0
  214. pixeltable/utils/pydantic.py +60 -0
  215. pixeltable/utils/pytorch.py +20 -20
  216. pixeltable/utils/s3_store.py +527 -0
  217. pixeltable/utils/sql.py +32 -5
  218. pixeltable/utils/system.py +30 -0
  219. pixeltable/utils/transactional_directory.py +4 -3
  220. pixeltable-0.5.7.dist-info/METADATA +579 -0
  221. pixeltable-0.5.7.dist-info/RECORD +227 -0
  222. {pixeltable-0.2.26.dist-info → pixeltable-0.5.7.dist-info}/WHEEL +1 -1
  223. pixeltable-0.5.7.dist-info/entry_points.txt +2 -0
  224. pixeltable/__version__.py +0 -3
  225. pixeltable/catalog/named_function.py +0 -36
  226. pixeltable/catalog/path_dict.py +0 -141
  227. pixeltable/dataframe.py +0 -894
  228. pixeltable/exec/expr_eval_node.py +0 -232
  229. pixeltable/ext/__init__.py +0 -14
  230. pixeltable/ext/functions/__init__.py +0 -8
  231. pixeltable/ext/functions/whisperx.py +0 -77
  232. pixeltable/ext/functions/yolox.py +0 -157
  233. pixeltable/tool/create_test_db_dump.py +0 -311
  234. pixeltable/tool/create_test_video.py +0 -81
  235. pixeltable/tool/doc_plugins/griffe.py +0 -50
  236. pixeltable/tool/doc_plugins/mkdocstrings.py +0 -6
  237. pixeltable/tool/doc_plugins/templates/material/udf.html.jinja +0 -135
  238. pixeltable/tool/embed_udf.py +0 -9
  239. pixeltable/tool/mypy_plugin.py +0 -55
  240. pixeltable/utils/media_store.py +0 -76
  241. pixeltable/utils/s3.py +0 -16
  242. pixeltable-0.2.26.dist-info/METADATA +0 -400
  243. pixeltable-0.2.26.dist-info/RECORD +0 -156
  244. pixeltable-0.2.26.dist-info/entry_points.txt +0 -3
  245. {pixeltable-0.2.26.dist-info → pixeltable-0.5.7.dist-info/licenses}/LICENSE +0 -0
@@ -1,172 +1,309 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import enum
3
4
  import logging
4
- from typing import Any, Iterable, Literal, Optional, overload
5
+ from typing import TYPE_CHECKING, Any, Literal, Sequence, cast, overload
5
6
  from uuid import UUID
6
7
 
7
- import sqlalchemy.orm as orm
8
+ import pydantic
9
+ import pydantic_core
8
10
 
9
11
  import pixeltable as pxt
10
- import pixeltable.type_system as ts
11
- from pixeltable import exceptions as excs
12
+ from pixeltable import exceptions as excs, type_system as ts
12
13
  from pixeltable.env import Env
13
14
  from pixeltable.utils.filecache import FileCache
15
+ from pixeltable.utils.pydantic import is_json_convertible
14
16
 
15
- from .catalog import Catalog
16
- from .globals import MediaValidation, UpdateStatus
17
+ from .globals import MediaValidation
17
18
  from .table import Table
18
- from .table_version import TableVersion
19
+ from .table_version import TableVersion, TableVersionMd
20
+ from .table_version_handle import TableVersionHandle
19
21
  from .table_version_path import TableVersionPath
22
+ from .tbl_ops import CreateStoreTableOp, TableOp
23
+ from .update_status import UpdateStatus
24
+
25
+ if TYPE_CHECKING:
26
+ from pixeltable import exprs
27
+ from pixeltable.globals import TableDataSource
28
+ from pixeltable.io.table_data_conduit import TableDataConduit
20
29
 
21
30
  _logger = logging.getLogger('pixeltable')
22
31
 
23
32
 
33
+ class OnErrorParameter(enum.Enum):
34
+ """Supported values for the on_error parameter"""
35
+
36
+ ABORT = 'abort'
37
+ IGNORE = 'ignore'
38
+
39
+ @classmethod
40
+ def is_valid(cls, v: Any) -> bool:
41
+ if isinstance(v, str):
42
+ return v.lower() in [c.value for c in cls]
43
+ return False
44
+
45
+ @classmethod
46
+ def fail_on_exception(cls, v: Any) -> bool:
47
+ if not cls.is_valid(v):
48
+ raise ValueError(f'Invalid value for on_error: {v}')
49
+ if isinstance(v, str):
50
+ return v.lower() != cls.IGNORE.value
51
+ return True
52
+
53
+
24
54
  class InsertableTable(Table):
25
55
  """A `Table` that allows inserting and deleting rows."""
26
56
 
27
- def __init__(self, dir_id: UUID, tbl_version: TableVersion):
57
+ def __init__(self, dir_id: UUID, tbl_version: TableVersionHandle):
28
58
  tbl_version_path = TableVersionPath(tbl_version)
29
- super().__init__(tbl_version.id, dir_id, tbl_version.name, tbl_version_path)
59
+ super().__init__(tbl_version.id, dir_id, tbl_version.get().name, tbl_version_path)
60
+ self._tbl_version = tbl_version
30
61
 
31
- @classmethod
32
- def _display_name(cls) -> str:
62
+ def _display_name(self) -> str:
63
+ assert not self._tbl_version_path.is_replica()
33
64
  return 'table'
34
65
 
35
- # MODULE-LOCAL, NOT PUBLIC
36
66
  @classmethod
37
67
  def _create(
38
- cls, dir_id: UUID, name: str, schema: dict[str, ts.ColumnType], df: Optional[pxt.DataFrame],
39
- primary_key: list[str], num_retained_versions: int, comment: str, media_validation: MediaValidation
40
- ) -> InsertableTable:
68
+ cls,
69
+ name: str,
70
+ schema: dict[str, ts.ColumnType],
71
+ primary_key: list[str],
72
+ num_retained_versions: int,
73
+ comment: str,
74
+ media_validation: MediaValidation,
75
+ create_default_idxs: bool,
76
+ ) -> tuple[TableVersionMd, list[TableOp]]:
41
77
  columns = cls._create_columns(schema)
42
78
  cls._verify_schema(columns)
43
79
  column_names = [col.name for col in columns]
44
80
  for pk_col in primary_key:
45
81
  if pk_col not in column_names:
46
- raise excs.Error(f'Primary key column {pk_col} not found in table schema')
82
+ raise excs.Error(f'Primary key column {pk_col!r} not found in table schema.')
47
83
  col = columns[column_names.index(pk_col)]
48
84
  if col.col_type.nullable:
49
- raise excs.Error(f'Primary key column {pk_col} cannot be nullable')
85
+ raise excs.Error(f'Primary key column {pk_col!r} cannot be nullable.')
50
86
  col.is_pk = True
51
87
 
52
- with orm.Session(Env.get().engine, future=True) as session:
53
- _, tbl_version = TableVersion.create(
54
- session, dir_id, name, columns, num_retained_versions=num_retained_versions, comment=comment,
55
- media_validation=media_validation)
56
- tbl = cls(dir_id, tbl_version)
57
- # TODO We need to commit before doing the insertion, in order to avoid a primary key (version) collision
58
- # when the table metadata gets updated. Once we have a notion of user-defined transactions in
59
- # Pixeltable, we can wrap the create/insert in a transaction to avoid this.
60
- session.commit()
61
- if df is not None:
62
- # A DataFrame was provided, so insert its contents into the table
63
- # (using the same DB session as the table creation)
64
- tbl_version.insert(None, df, conn=session.connection(), fail_on_exception=True)
65
- session.commit()
66
- cat = Catalog.get()
67
- cat.tbl_dependents[tbl._id] = []
68
- cat.tbls[tbl._id] = tbl
69
-
70
- _logger.info(f'Created table `{name}`, id={tbl_version.id}')
71
- print(f'Created table `{name}`.')
72
- return tbl
73
-
74
- def get_metadata(self) -> dict[str, Any]:
75
- md = super().get_metadata()
76
- md['is_view'] = False
77
- md['is_snapshot'] = False
78
- return md
88
+ md = TableVersion.create_initial_md(
89
+ name,
90
+ columns,
91
+ num_retained_versions,
92
+ comment,
93
+ media_validation,
94
+ create_default_idxs=create_default_idxs,
95
+ view_md=None,
96
+ )
97
+
98
+ ops = [
99
+ TableOp(
100
+ tbl_id=md.tbl_md.tbl_id,
101
+ op_sn=0,
102
+ num_ops=1,
103
+ needs_xact=False,
104
+ create_store_table_op=CreateStoreTableOp(),
105
+ )
106
+ ]
107
+ return md, ops
79
108
 
80
109
  @overload
81
110
  def insert(
82
111
  self,
83
- rows: Iterable[dict[str, Any]],
112
+ source: TableDataSource | None = None,
84
113
  /,
85
114
  *,
115
+ source_format: Literal['csv', 'excel', 'parquet', 'json'] | None = None,
116
+ schema_overrides: dict[str, ts.ColumnType] | None = None,
117
+ on_error: Literal['abort', 'ignore'] = 'abort',
86
118
  print_stats: bool = False,
87
- on_error: Literal['abort', 'ignore'] = 'abort'
119
+ **kwargs: Any,
88
120
  ) -> UpdateStatus: ...
89
121
 
90
122
  @overload
91
123
  def insert(
92
- self,
93
- *,
94
- print_stats: bool = False,
95
- on_error: Literal['abort', 'ignore'] = 'abort',
96
- **kwargs: Any
124
+ self, /, *, on_error: Literal['abort', 'ignore'] = 'abort', print_stats: bool = False, **kwargs: Any
97
125
  ) -> UpdateStatus: ...
98
126
 
99
- def insert( # type: ignore[misc]
127
+ def insert(
100
128
  self,
101
- rows: Optional[Iterable[dict[str, Any]]] = None,
129
+ source: TableDataSource | None = None,
102
130
  /,
103
131
  *,
104
- print_stats: bool = False,
132
+ source_format: Literal['csv', 'excel', 'parquet', 'json'] | None = None,
133
+ schema_overrides: dict[str, ts.ColumnType] | None = None,
105
134
  on_error: Literal['abort', 'ignore'] = 'abort',
106
- **kwargs: Any
135
+ print_stats: bool = False,
136
+ **kwargs: Any,
107
137
  ) -> UpdateStatus:
108
- if rows is None:
109
- rows = [kwargs]
110
- else:
111
- rows = list(rows)
112
- if len(kwargs) > 0:
113
- raise excs.Error('`kwargs` cannot be specified unless `rows is None`.')
114
-
115
- fail_on_exception = on_error == 'abort'
116
-
117
- if not isinstance(rows, list):
118
- raise excs.Error('rows must be a list of dictionaries')
119
- if len(rows) == 0:
120
- raise excs.Error('rows must not be empty')
121
- for row in rows:
122
- if not isinstance(row, dict):
123
- raise excs.Error('rows must be a list of dictionaries')
124
- self._validate_input_rows(rows)
125
- status = self._tbl_version.insert(rows, None, print_stats=print_stats, fail_on_exception=fail_on_exception)
126
-
127
- if status.num_excs == 0:
128
- cols_with_excs_str = ''
129
- else:
130
- cols_with_excs_str = \
131
- f' across {len(status.cols_with_excs)} column{"" if len(status.cols_with_excs) == 1 else "s"}'
132
- cols_with_excs_str += f' ({", ".join(status.cols_with_excs)})'
133
- msg = (
134
- f'Inserted {status.num_rows} row{"" if status.num_rows == 1 else "s"} '
135
- f'with {status.num_excs} error{"" if status.num_excs == 1 else "s"}{cols_with_excs_str}.'
136
- )
137
- print(msg)
138
- _logger.info(f'InsertableTable {self._name}: {msg}')
138
+ from pixeltable.catalog import Catalog
139
+ from pixeltable.io.table_data_conduit import UnkTableDataConduit
140
+
141
+ if source is not None and isinstance(source, Sequence) and len(source) == 0:
142
+ raise excs.Error('Cannot insert an empty sequence.')
143
+ fail_on_exception = OnErrorParameter.fail_on_exception(on_error)
144
+
145
+ with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=True):
146
+ table = self
147
+
148
+ # TODO: unify with TableDataConduit
149
+ if source is not None and isinstance(source, Sequence) and isinstance(source[0], pydantic.BaseModel):
150
+ status = self._insert_pydantic(
151
+ cast(Sequence[pydantic.BaseModel], source), # needed for mypy
152
+ print_stats=print_stats,
153
+ fail_on_exception=fail_on_exception,
154
+ )
155
+ Env.get().console_logger.info(status.insert_msg)
156
+ FileCache.get().emit_eviction_warnings()
157
+ return status
158
+
159
+ if source is None:
160
+ source = [kwargs]
161
+ kwargs = None
162
+
163
+ tds = UnkTableDataConduit(
164
+ source, source_format=source_format, src_schema_overrides=schema_overrides, extra_fields=kwargs
165
+ )
166
+ data_source = tds.specialize()
167
+ if data_source.source_column_map is None:
168
+ data_source.src_pk = []
169
+
170
+ assert isinstance(table, Table)
171
+ data_source.add_table_info(table)
172
+ data_source.prepare_for_insert_into_table()
173
+
174
+ return table.insert_table_data_source(
175
+ data_source=data_source, fail_on_exception=fail_on_exception, print_stats=print_stats
176
+ )
177
+
178
+ def insert_table_data_source(
179
+ self, data_source: TableDataConduit, fail_on_exception: bool, print_stats: bool = False
180
+ ) -> pxt.UpdateStatus:
181
+ """Insert row batches into this table from a `TableDataConduit`."""
182
+ from pixeltable.catalog import Catalog
183
+ from pixeltable.io.table_data_conduit import QueryTableDataConduit
184
+
185
+ with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=True):
186
+ if isinstance(data_source, QueryTableDataConduit):
187
+ status = pxt.UpdateStatus()
188
+ status += self._tbl_version.get().insert(
189
+ rows=None, query=data_source.pxt_query, print_stats=print_stats, fail_on_exception=fail_on_exception
190
+ )
191
+ else:
192
+ status = pxt.UpdateStatus()
193
+ for row_batch in data_source.valid_row_batch():
194
+ status += self._tbl_version.get().insert(
195
+ rows=row_batch, query=None, print_stats=print_stats, fail_on_exception=fail_on_exception
196
+ )
197
+
198
+ Env.get().console_logger.info(status.insert_msg)
199
+
139
200
  FileCache.get().emit_eviction_warnings()
140
201
  return status
141
202
 
142
- def _validate_input_rows(self, rows: list[dict[str, Any]]) -> None:
143
- """Verify that the input rows match the table schema"""
144
- valid_col_names = set(self._schema.keys())
145
- reqd_col_names = set(self._tbl_version_path.tbl_version.get_required_col_names())
146
- computed_col_names = set(self._tbl_version_path.tbl_version.get_computed_col_names())
147
- for row in rows:
148
- assert isinstance(row, dict)
149
- col_names = set(row.keys())
150
- if len(reqd_col_names - col_names) > 0:
151
- raise excs.Error(f'Missing required column(s) ({", ".join(reqd_col_names - col_names)}) in row {row}')
152
-
153
- for col_name, val in row.items():
154
- if col_name not in valid_col_names:
155
- raise excs.Error(f'Unknown column name {col_name} in row {row}')
156
- if col_name in computed_col_names:
157
- raise excs.Error(f'Value for computed column {col_name} in row {row}')
158
-
159
- # validate data
160
- col = self._tbl_version_path.get_column(col_name)
161
- try:
162
- # basic sanity checks here
163
- checked_val = col.col_type.create_literal(val)
164
- row[col_name] = checked_val
165
- except TypeError as e:
166
- msg = str(e)
167
- raise excs.Error(f'Error in column {col.name}: {msg[0].lower() + msg[1:]}\nRow: {row}')
168
-
169
- def delete(self, where: Optional['pxt.exprs.Expr'] = None) -> UpdateStatus:
203
+ def _insert_pydantic(
204
+ self, rows: Sequence[pydantic.BaseModel], print_stats: bool = False, fail_on_exception: bool = True
205
+ ) -> UpdateStatus:
206
+ model_class = type(rows[0])
207
+ self._validate_pydantic_model(model_class)
208
+ # convert rows one-by-one in order to be able to print meaningful error messages
209
+ pxt_rows: list[dict[str, Any]] = []
210
+ for i, row in enumerate(rows):
211
+ try:
212
+ pxt_rows.append(row.model_dump(mode='json'))
213
+ except pydantic_core.PydanticSerializationError as e:
214
+ raise excs.Error(f'Row {i}: error serializing pydantic model to JSON:\n{e}') from e
215
+
216
+ # explicitly check that all required columns are present and non-None in the rows,
217
+ # because we ignore nullability when validating the pydantic model
218
+ reqd_col_names = [col.name for col in self._tbl_version_path.columns() if col.is_required_for_insert]
219
+ for i, pxt_row in enumerate(pxt_rows):
220
+ if type(rows[i]) is not model_class:
221
+ raise excs.Error(
222
+ f'Expected an instance of `{model_class.__name__}`; got `{type(rows[i]).__name__}` (in row {i})'
223
+ )
224
+ for col_name in reqd_col_names:
225
+ if pxt_row.get(col_name) is None:
226
+ raise excs.Error(f'Missing required column {col_name!r} in row {i}')
227
+
228
+ status = self._tbl_version.get().insert(
229
+ rows=pxt_rows, query=None, print_stats=print_stats, fail_on_exception=fail_on_exception
230
+ )
231
+ return status
232
+
233
+ def _validate_pydantic_model(self, model: type[pydantic.BaseModel]) -> None:
234
+ """
235
+ Check if a Pydantic model is compatible with this table for insert operations.
236
+
237
+ A model is compatible if:
238
+ - All required table columns have corresponding model fields with compatible types
239
+ - Model does not define fields for computed columns
240
+ - Model field types are compatible with table column types
241
+ """
242
+ assert isinstance(model, type) and issubclass(model, pydantic.BaseModel)
243
+
244
+ schema = self._get_schema()
245
+ required_cols = set(self._tbl_version.get().get_required_col_names())
246
+ computed_cols = set(self._tbl_version.get().get_computed_col_names())
247
+ model_fields = model.model_fields
248
+ model_field_names = set(model_fields.keys())
249
+
250
+ missing_required = required_cols - model_field_names
251
+ if missing_required:
252
+ raise excs.Error(
253
+ f'Pydantic model `{model.__name__}` is missing required columns: ' + ', '.join(missing_required)
254
+ )
255
+
256
+ computed_in_model = computed_cols & model_field_names
257
+ if computed_in_model:
258
+ raise excs.Error(
259
+ f'Pydantic model `{model.__name__}` has fields for computed columns: ' + ', '.join(computed_in_model)
260
+ )
261
+
262
+ # validate type compatibility
263
+ common_fields = model_field_names & set(schema.keys())
264
+ if len(common_fields) == 0:
265
+ raise excs.Error(
266
+ f'Pydantic model `{model.__name__}` has no fields that map to columns in table {self._name!r}'
267
+ )
268
+ for field_name in common_fields:
269
+ pxt_col_type = schema[field_name]
270
+ model_field = model_fields[field_name]
271
+ model_type = model_field.annotation
272
+
273
+ # we ignore nullability: we want to accept optional model fields for required table columns, as long as
274
+ # the model instances provide a non-null value
275
+ # allow_enum=True: model_dump(mode='json') converts enums to their values
276
+ inferred_pxt_type = ts.ColumnType.from_python_type(model_type, infer_pydantic_json=True)
277
+ if inferred_pxt_type is None:
278
+ raise excs.Error(
279
+ f'Pydantic model `{model.__name__}`: cannot infer Pixeltable type for column {field_name!r}'
280
+ )
281
+
282
+ if pxt_col_type.is_media_type():
283
+ # media types require file paths, either as str or Path
284
+ if not inferred_pxt_type.is_string_type():
285
+ raise excs.Error(
286
+ f'Column {field_name!r} requires a `str` or `Path` field in `{model.__name__}`, but it is '
287
+ f'`{model_type.__name__}`'
288
+ )
289
+ else:
290
+ if not pxt_col_type.is_supertype_of(inferred_pxt_type, ignore_nullable=True):
291
+ raise excs.Error(
292
+ f'Pydantic model `{model.__name__}` has incompatible type `{model_type.__name__}` '
293
+ f'for column {field_name!r} (of Pixeltable type `{pxt_col_type}`)'
294
+ )
295
+
296
+ if (
297
+ isinstance(model_type, type)
298
+ and issubclass(model_type, pydantic.BaseModel)
299
+ and not is_json_convertible(model_type)
300
+ ):
301
+ raise excs.Error(
302
+ f'Pydantic model `{model.__name__}` has field {field_name!r} with nested model '
303
+ f'`{model_type.__name__}`, which is not JSON-convertible'
304
+ )
305
+
306
+ def delete(self, where: 'exprs.Expr' | None = None) -> UpdateStatus:
170
307
  """Delete rows in this table.
171
308
 
172
309
  Args:
@@ -181,4 +318,17 @@ class InsertableTable(Table):
181
318
 
182
319
  >>> tbl.delete(tbl.a > 5)
183
320
  """
184
- return self._tbl_version.delete(where=where)
321
+ from pixeltable.catalog import Catalog
322
+
323
+ with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=True):
324
+ return self._tbl_version.get().delete(where=where)
325
+
326
+ def _get_base_table(self) -> 'Table' | None:
327
+ return None
328
+
329
+ @property
330
+ def _effective_base_versions(self) -> list[int | None]:
331
+ return []
332
+
333
+ def _table_descriptor(self) -> str:
334
+ return self._display_str()
@@ -1,17 +1,55 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import logging
4
+ from typing import NamedTuple
4
5
 
5
6
  from pixeltable import exceptions as excs
6
- from .globals import is_valid_path
7
+
8
+ from .globals import is_valid_identifier
7
9
 
8
10
  _logger = logging.getLogger('pixeltable')
9
11
 
10
- class Path:
11
- def __init__(self, path: str, empty_is_valid: bool = False):
12
- if not is_valid_path(path, empty_is_valid):
13
- raise excs.Error(f"Invalid path format: '{path}'")
14
- self.components = path.split('.')
12
+
13
+ class Path(NamedTuple):
14
+ components: list[str]
15
+ version: int | None = None
16
+
17
+ @classmethod
18
+ def parse(
19
+ cls,
20
+ path: str,
21
+ allow_empty_path: bool = False,
22
+ allow_system_path: bool = False,
23
+ allow_versioned_path: bool = False,
24
+ ) -> Path:
25
+ components: list[str]
26
+ version: int | None
27
+ if ':' in path:
28
+ parts = path.split(':')
29
+ if len(parts) != 2:
30
+ raise excs.Error(f'Invalid path: {path}')
31
+ try:
32
+ components = parts[0].split('.')
33
+ version = int(parts[1])
34
+ except ValueError:
35
+ raise excs.Error(f'Invalid path: {path}') from None
36
+ else:
37
+ components = path.split('.')
38
+ version = None
39
+
40
+ if components == [''] and not allow_empty_path:
41
+ raise excs.Error(f'Invalid path: {path}')
42
+
43
+ if components != [''] and not all(
44
+ is_valid_identifier(c, allow_system_identifiers=allow_system_path, allow_hyphens=True) for c in components
45
+ ):
46
+ raise excs.Error(f'Invalid path: {path}')
47
+
48
+ if version is not None and not allow_versioned_path:
49
+ raise excs.Error(f'Versioned path not allowed here: {path}')
50
+
51
+ assert len(components) > 0
52
+ return Path(components, version)
15
53
 
16
54
  @property
17
55
  def len(self) -> int:
@@ -19,28 +57,28 @@ class Path:
19
57
 
20
58
  @property
21
59
  def name(self) -> str:
22
- assert len(self.components) > 0
23
60
  return self.components[-1]
24
61
 
25
62
  @property
26
63
  def is_root(self) -> bool:
27
- return self.components[0] == ''
64
+ return not self.components[0]
65
+
66
+ @property
67
+ def is_system_path(self) -> bool:
68
+ return self.components[0].startswith('_')
28
69
 
29
70
  @property
30
71
  def parent(self) -> Path:
31
72
  if len(self.components) == 1:
32
- if self.is_root:
33
- return self
34
- else:
35
- return Path('', empty_is_valid=True)
73
+ return ROOT_PATH # Includes the case of the root path, which is its own parent.
36
74
  else:
37
- return Path('.'.join(self.components[:-1]))
75
+ return Path(self.components[:-1])
38
76
 
39
77
  def append(self, name: str) -> Path:
40
78
  if self.is_root:
41
- return Path(name)
79
+ return Path([name])
42
80
  else:
43
- return Path(f'{str(self)}.{name}')
81
+ return Path([*self.components, name])
44
82
 
45
83
  def is_ancestor(self, other: Path, is_parent: bool = False) -> bool:
46
84
  """
@@ -50,9 +88,34 @@ class Path:
50
88
  return False
51
89
  if self.is_root and (other.len == 1 or not is_parent):
52
90
  return True
53
- is_prefix = self.components == other.components[:self.len]
91
+ is_prefix = self.components == other.components[: self.len]
54
92
  return is_prefix and (self.len == (other.len - 1) or not is_parent)
55
93
 
94
+ def ancestors(self) -> list[Path]:
95
+ """
96
+ Return all proper ancestors of this path in top-down order including root.
97
+ If this path is for the root directory, which has no parent, then None is returned.
98
+ """
99
+ if self.is_root:
100
+ return []
101
+ else:
102
+ return [Path(self.components[:i]) if i > 0 else ROOT_PATH for i in range(len(self.components))]
103
+
104
+ def __repr__(self) -> str:
105
+ return repr(str(self))
106
+
56
107
  def __str__(self) -> str:
57
- return '.'.join(self.components)
108
+ base = '.'.join(self.components)
109
+ if self.version is not None:
110
+ return f'{base}:{self.version}'
111
+ else:
112
+ return base
113
+
114
+ def __eq__(self, other: object) -> bool:
115
+ return isinstance(other, Path) and str(self) == str(other)
116
+
117
+ def __hash__(self) -> int:
118
+ return hash(str(self))
119
+
58
120
 
121
+ ROOT_PATH = Path([''])