pixeltable 0.2.26__py3-none-any.whl → 0.5.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (245) hide show
  1. pixeltable/__init__.py +83 -19
  2. pixeltable/_query.py +1444 -0
  3. pixeltable/_version.py +1 -0
  4. pixeltable/catalog/__init__.py +7 -4
  5. pixeltable/catalog/catalog.py +2394 -119
  6. pixeltable/catalog/column.py +225 -104
  7. pixeltable/catalog/dir.py +38 -9
  8. pixeltable/catalog/globals.py +53 -34
  9. pixeltable/catalog/insertable_table.py +265 -115
  10. pixeltable/catalog/path.py +80 -17
  11. pixeltable/catalog/schema_object.py +28 -43
  12. pixeltable/catalog/table.py +1270 -677
  13. pixeltable/catalog/table_metadata.py +103 -0
  14. pixeltable/catalog/table_version.py +1270 -751
  15. pixeltable/catalog/table_version_handle.py +109 -0
  16. pixeltable/catalog/table_version_path.py +137 -42
  17. pixeltable/catalog/tbl_ops.py +53 -0
  18. pixeltable/catalog/update_status.py +191 -0
  19. pixeltable/catalog/view.py +251 -134
  20. pixeltable/config.py +215 -0
  21. pixeltable/env.py +736 -285
  22. pixeltable/exceptions.py +26 -2
  23. pixeltable/exec/__init__.py +7 -2
  24. pixeltable/exec/aggregation_node.py +39 -21
  25. pixeltable/exec/cache_prefetch_node.py +87 -109
  26. pixeltable/exec/cell_materialization_node.py +268 -0
  27. pixeltable/exec/cell_reconstruction_node.py +168 -0
  28. pixeltable/exec/component_iteration_node.py +25 -28
  29. pixeltable/exec/data_row_batch.py +11 -46
  30. pixeltable/exec/exec_context.py +26 -11
  31. pixeltable/exec/exec_node.py +35 -27
  32. pixeltable/exec/expr_eval/__init__.py +3 -0
  33. pixeltable/exec/expr_eval/evaluators.py +365 -0
  34. pixeltable/exec/expr_eval/expr_eval_node.py +413 -0
  35. pixeltable/exec/expr_eval/globals.py +200 -0
  36. pixeltable/exec/expr_eval/row_buffer.py +74 -0
  37. pixeltable/exec/expr_eval/schedulers.py +413 -0
  38. pixeltable/exec/globals.py +35 -0
  39. pixeltable/exec/in_memory_data_node.py +35 -27
  40. pixeltable/exec/object_store_save_node.py +293 -0
  41. pixeltable/exec/row_update_node.py +44 -29
  42. pixeltable/exec/sql_node.py +414 -115
  43. pixeltable/exprs/__init__.py +8 -5
  44. pixeltable/exprs/arithmetic_expr.py +79 -45
  45. pixeltable/exprs/array_slice.py +5 -5
  46. pixeltable/exprs/column_property_ref.py +40 -26
  47. pixeltable/exprs/column_ref.py +254 -61
  48. pixeltable/exprs/comparison.py +14 -9
  49. pixeltable/exprs/compound_predicate.py +9 -10
  50. pixeltable/exprs/data_row.py +213 -72
  51. pixeltable/exprs/expr.py +270 -104
  52. pixeltable/exprs/expr_dict.py +6 -5
  53. pixeltable/exprs/expr_set.py +20 -11
  54. pixeltable/exprs/function_call.py +383 -284
  55. pixeltable/exprs/globals.py +18 -5
  56. pixeltable/exprs/in_predicate.py +7 -7
  57. pixeltable/exprs/inline_expr.py +37 -37
  58. pixeltable/exprs/is_null.py +8 -4
  59. pixeltable/exprs/json_mapper.py +120 -54
  60. pixeltable/exprs/json_path.py +90 -60
  61. pixeltable/exprs/literal.py +61 -16
  62. pixeltable/exprs/method_ref.py +7 -6
  63. pixeltable/exprs/object_ref.py +19 -8
  64. pixeltable/exprs/row_builder.py +238 -75
  65. pixeltable/exprs/rowid_ref.py +53 -15
  66. pixeltable/exprs/similarity_expr.py +65 -50
  67. pixeltable/exprs/sql_element_cache.py +5 -5
  68. pixeltable/exprs/string_op.py +107 -0
  69. pixeltable/exprs/type_cast.py +25 -13
  70. pixeltable/exprs/variable.py +2 -2
  71. pixeltable/func/__init__.py +9 -5
  72. pixeltable/func/aggregate_function.py +197 -92
  73. pixeltable/func/callable_function.py +119 -35
  74. pixeltable/func/expr_template_function.py +101 -48
  75. pixeltable/func/function.py +375 -62
  76. pixeltable/func/function_registry.py +20 -19
  77. pixeltable/func/globals.py +6 -5
  78. pixeltable/func/mcp.py +74 -0
  79. pixeltable/func/query_template_function.py +151 -35
  80. pixeltable/func/signature.py +178 -49
  81. pixeltable/func/tools.py +164 -0
  82. pixeltable/func/udf.py +176 -53
  83. pixeltable/functions/__init__.py +44 -4
  84. pixeltable/functions/anthropic.py +226 -47
  85. pixeltable/functions/audio.py +148 -11
  86. pixeltable/functions/bedrock.py +137 -0
  87. pixeltable/functions/date.py +188 -0
  88. pixeltable/functions/deepseek.py +113 -0
  89. pixeltable/functions/document.py +81 -0
  90. pixeltable/functions/fal.py +76 -0
  91. pixeltable/functions/fireworks.py +72 -20
  92. pixeltable/functions/gemini.py +249 -0
  93. pixeltable/functions/globals.py +208 -53
  94. pixeltable/functions/groq.py +108 -0
  95. pixeltable/functions/huggingface.py +1088 -95
  96. pixeltable/functions/image.py +155 -84
  97. pixeltable/functions/json.py +8 -11
  98. pixeltable/functions/llama_cpp.py +31 -19
  99. pixeltable/functions/math.py +169 -0
  100. pixeltable/functions/mistralai.py +50 -75
  101. pixeltable/functions/net.py +70 -0
  102. pixeltable/functions/ollama.py +29 -36
  103. pixeltable/functions/openai.py +548 -160
  104. pixeltable/functions/openrouter.py +143 -0
  105. pixeltable/functions/replicate.py +15 -14
  106. pixeltable/functions/reve.py +250 -0
  107. pixeltable/functions/string.py +310 -85
  108. pixeltable/functions/timestamp.py +37 -19
  109. pixeltable/functions/together.py +77 -120
  110. pixeltable/functions/twelvelabs.py +188 -0
  111. pixeltable/functions/util.py +7 -2
  112. pixeltable/functions/uuid.py +30 -0
  113. pixeltable/functions/video.py +1528 -117
  114. pixeltable/functions/vision.py +26 -26
  115. pixeltable/functions/voyageai.py +289 -0
  116. pixeltable/functions/whisper.py +19 -10
  117. pixeltable/functions/whisperx.py +179 -0
  118. pixeltable/functions/yolox.py +112 -0
  119. pixeltable/globals.py +716 -236
  120. pixeltable/index/__init__.py +3 -1
  121. pixeltable/index/base.py +17 -21
  122. pixeltable/index/btree.py +32 -22
  123. pixeltable/index/embedding_index.py +155 -92
  124. pixeltable/io/__init__.py +12 -7
  125. pixeltable/io/datarows.py +140 -0
  126. pixeltable/io/external_store.py +83 -125
  127. pixeltable/io/fiftyone.py +24 -33
  128. pixeltable/io/globals.py +47 -182
  129. pixeltable/io/hf_datasets.py +96 -127
  130. pixeltable/io/label_studio.py +171 -156
  131. pixeltable/io/lancedb.py +3 -0
  132. pixeltable/io/pandas.py +136 -115
  133. pixeltable/io/parquet.py +40 -153
  134. pixeltable/io/table_data_conduit.py +702 -0
  135. pixeltable/io/utils.py +100 -0
  136. pixeltable/iterators/__init__.py +8 -4
  137. pixeltable/iterators/audio.py +207 -0
  138. pixeltable/iterators/base.py +9 -3
  139. pixeltable/iterators/document.py +144 -87
  140. pixeltable/iterators/image.py +17 -38
  141. pixeltable/iterators/string.py +15 -12
  142. pixeltable/iterators/video.py +523 -127
  143. pixeltable/metadata/__init__.py +33 -8
  144. pixeltable/metadata/converters/convert_10.py +2 -3
  145. pixeltable/metadata/converters/convert_13.py +2 -2
  146. pixeltable/metadata/converters/convert_15.py +15 -11
  147. pixeltable/metadata/converters/convert_16.py +4 -5
  148. pixeltable/metadata/converters/convert_17.py +4 -5
  149. pixeltable/metadata/converters/convert_18.py +4 -6
  150. pixeltable/metadata/converters/convert_19.py +6 -9
  151. pixeltable/metadata/converters/convert_20.py +3 -6
  152. pixeltable/metadata/converters/convert_21.py +6 -8
  153. pixeltable/metadata/converters/convert_22.py +3 -2
  154. pixeltable/metadata/converters/convert_23.py +33 -0
  155. pixeltable/metadata/converters/convert_24.py +55 -0
  156. pixeltable/metadata/converters/convert_25.py +19 -0
  157. pixeltable/metadata/converters/convert_26.py +23 -0
  158. pixeltable/metadata/converters/convert_27.py +29 -0
  159. pixeltable/metadata/converters/convert_28.py +13 -0
  160. pixeltable/metadata/converters/convert_29.py +110 -0
  161. pixeltable/metadata/converters/convert_30.py +63 -0
  162. pixeltable/metadata/converters/convert_31.py +11 -0
  163. pixeltable/metadata/converters/convert_32.py +15 -0
  164. pixeltable/metadata/converters/convert_33.py +17 -0
  165. pixeltable/metadata/converters/convert_34.py +21 -0
  166. pixeltable/metadata/converters/convert_35.py +9 -0
  167. pixeltable/metadata/converters/convert_36.py +38 -0
  168. pixeltable/metadata/converters/convert_37.py +15 -0
  169. pixeltable/metadata/converters/convert_38.py +39 -0
  170. pixeltable/metadata/converters/convert_39.py +124 -0
  171. pixeltable/metadata/converters/convert_40.py +73 -0
  172. pixeltable/metadata/converters/convert_41.py +12 -0
  173. pixeltable/metadata/converters/convert_42.py +9 -0
  174. pixeltable/metadata/converters/convert_43.py +44 -0
  175. pixeltable/metadata/converters/util.py +44 -18
  176. pixeltable/metadata/notes.py +21 -0
  177. pixeltable/metadata/schema.py +185 -42
  178. pixeltable/metadata/utils.py +74 -0
  179. pixeltable/mypy/__init__.py +3 -0
  180. pixeltable/mypy/mypy_plugin.py +123 -0
  181. pixeltable/plan.py +616 -225
  182. pixeltable/share/__init__.py +3 -0
  183. pixeltable/share/packager.py +797 -0
  184. pixeltable/share/protocol/__init__.py +33 -0
  185. pixeltable/share/protocol/common.py +165 -0
  186. pixeltable/share/protocol/operation_types.py +33 -0
  187. pixeltable/share/protocol/replica.py +119 -0
  188. pixeltable/share/publish.py +349 -0
  189. pixeltable/store.py +398 -232
  190. pixeltable/type_system.py +730 -267
  191. pixeltable/utils/__init__.py +40 -0
  192. pixeltable/utils/arrow.py +201 -29
  193. pixeltable/utils/av.py +298 -0
  194. pixeltable/utils/azure_store.py +346 -0
  195. pixeltable/utils/coco.py +26 -27
  196. pixeltable/utils/code.py +4 -4
  197. pixeltable/utils/console_output.py +46 -0
  198. pixeltable/utils/coroutine.py +24 -0
  199. pixeltable/utils/dbms.py +92 -0
  200. pixeltable/utils/description_helper.py +11 -12
  201. pixeltable/utils/documents.py +60 -61
  202. pixeltable/utils/exception_handler.py +36 -0
  203. pixeltable/utils/filecache.py +38 -22
  204. pixeltable/utils/formatter.py +88 -51
  205. pixeltable/utils/gcs_store.py +295 -0
  206. pixeltable/utils/http.py +133 -0
  207. pixeltable/utils/http_server.py +14 -13
  208. pixeltable/utils/iceberg.py +13 -0
  209. pixeltable/utils/image.py +17 -0
  210. pixeltable/utils/lancedb.py +90 -0
  211. pixeltable/utils/local_store.py +322 -0
  212. pixeltable/utils/misc.py +5 -0
  213. pixeltable/utils/object_stores.py +573 -0
  214. pixeltable/utils/pydantic.py +60 -0
  215. pixeltable/utils/pytorch.py +20 -20
  216. pixeltable/utils/s3_store.py +527 -0
  217. pixeltable/utils/sql.py +32 -5
  218. pixeltable/utils/system.py +30 -0
  219. pixeltable/utils/transactional_directory.py +4 -3
  220. pixeltable-0.5.7.dist-info/METADATA +579 -0
  221. pixeltable-0.5.7.dist-info/RECORD +227 -0
  222. {pixeltable-0.2.26.dist-info → pixeltable-0.5.7.dist-info}/WHEEL +1 -1
  223. pixeltable-0.5.7.dist-info/entry_points.txt +2 -0
  224. pixeltable/__version__.py +0 -3
  225. pixeltable/catalog/named_function.py +0 -36
  226. pixeltable/catalog/path_dict.py +0 -141
  227. pixeltable/dataframe.py +0 -894
  228. pixeltable/exec/expr_eval_node.py +0 -232
  229. pixeltable/ext/__init__.py +0 -14
  230. pixeltable/ext/functions/__init__.py +0 -8
  231. pixeltable/ext/functions/whisperx.py +0 -77
  232. pixeltable/ext/functions/yolox.py +0 -157
  233. pixeltable/tool/create_test_db_dump.py +0 -311
  234. pixeltable/tool/create_test_video.py +0 -81
  235. pixeltable/tool/doc_plugins/griffe.py +0 -50
  236. pixeltable/tool/doc_plugins/mkdocstrings.py +0 -6
  237. pixeltable/tool/doc_plugins/templates/material/udf.html.jinja +0 -135
  238. pixeltable/tool/embed_udf.py +0 -9
  239. pixeltable/tool/mypy_plugin.py +0 -55
  240. pixeltable/utils/media_store.py +0 -76
  241. pixeltable/utils/s3.py +0 -16
  242. pixeltable-0.2.26.dist-info/METADATA +0 -400
  243. pixeltable-0.2.26.dist-info/RECORD +0 -156
  244. pixeltable-0.2.26.dist-info/entry_points.txt +0 -3
  245. {pixeltable-0.2.26.dist-info → pixeltable-0.5.7.dist-info/licenses}/LICENSE +0 -0
@@ -2,27 +2,28 @@ from __future__ import annotations
2
2
 
3
3
  import inspect
4
4
  import logging
5
- from typing import TYPE_CHECKING, Any, Iterable, Literal, Optional
5
+ from typing import TYPE_CHECKING, Any, List, Literal
6
6
  from uuid import UUID
7
7
 
8
- import sqlalchemy.orm as orm
9
-
10
8
  import pixeltable.exceptions as excs
11
9
  import pixeltable.metadata.schema as md_schema
12
10
  import pixeltable.type_system as ts
13
11
  from pixeltable import catalog, exprs, func
14
- from pixeltable.env import Env
15
12
  from pixeltable.iterators import ComponentIterator
16
13
 
17
- from .catalog import Catalog
18
14
  from .column import Column
19
- from .globals import _POS_COLUMN_NAME, UpdateStatus, MediaValidation
15
+ from .globals import _POS_COLUMN_NAME, MediaValidation
20
16
  from .table import Table
21
- from .table_version import TableVersion
17
+ from .table_version import TableVersion, TableVersionKey, TableVersionMd
18
+ from .table_version_handle import TableVersionHandle
22
19
  from .table_version_path import TableVersionPath
20
+ from .tbl_ops import CreateStoreTableOp, LoadViewOp, TableOp
21
+ from .update_status import UpdateStatus
23
22
 
24
23
  if TYPE_CHECKING:
25
- import pixeltable as pxt
24
+ from pixeltable.catalog.table import TableMetadata
25
+ from pixeltable.globals import TableDataSource
26
+ from pixeltable.plan import SampleClause
26
27
 
27
28
  _logger = logging.getLogger('pixeltable')
28
29
 
@@ -35,34 +36,87 @@ class View(Table):
35
36
  The exception is a snapshot view without a predicate and without additional columns: in that case, the view
36
37
  is simply a reference to a specific set of base versions.
37
38
  """
38
- def __init__(
39
- self, id: UUID, dir_id: UUID, name: str, tbl_version_path: TableVersionPath, base_id: UUID,
40
- snapshot_only: bool):
39
+
40
+ def __init__(self, id: UUID, dir_id: UUID, name: str, tbl_version_path: TableVersionPath, snapshot_only: bool):
41
41
  super().__init__(id, dir_id, name, tbl_version_path)
42
- assert base_id in catalog.Catalog.get().tbl_dependents
43
- self._base_id = base_id # keep a reference to the base Table ID, so that we can keep track of its dependents
44
42
  self._snapshot_only = snapshot_only
43
+ if not snapshot_only:
44
+ self._tbl_version = tbl_version_path.tbl_version
45
+
46
+ def _display_name(self) -> str:
47
+ if self._tbl_version_path.is_replica():
48
+ return 'replica'
49
+ if self._tbl_version_path.is_snapshot():
50
+ return 'snapshot'
51
+ if self._tbl_version_path.is_view():
52
+ return 'view'
53
+ return 'table'
45
54
 
46
55
  @classmethod
47
- def _display_name(cls) -> str:
48
- return 'view'
56
+ def select_list_to_additional_columns(cls, select_list: list[tuple[exprs.Expr, str | None]]) -> dict[str, dict]:
57
+ """Returns a list of columns in the same format as the additional_columns parameter of View.create.
58
+ The source is the list of expressions from a select() statement on a Query.
59
+ If the column is a ColumnRef, to a base table column, it is marked to not be stored.sy
60
+ """
61
+ from pixeltable._query import Query
62
+
63
+ r: dict[str, dict] = {}
64
+ exps, names = Query._normalize_select_list([], select_list)
65
+ for expr, name in zip(exps, names):
66
+ stored = not isinstance(expr, exprs.ColumnRef)
67
+ r[name] = {'value': expr, 'stored': stored}
68
+ return r
49
69
 
50
70
  @classmethod
51
71
  def _create(
52
- cls, dir_id: UUID, name: str, base: TableVersionPath, additional_columns: dict[str, Any],
53
- predicate: Optional['pxt.exprs.Expr'], is_snapshot: bool, num_retained_versions: int, comment: str,
54
- media_validation: MediaValidation,
55
- iterator_cls: Optional[type[ComponentIterator]], iterator_args: Optional[dict]
56
- ) -> View:
57
- columns = cls._create_columns(additional_columns)
72
+ cls,
73
+ dir_id: UUID,
74
+ name: str,
75
+ base: TableVersionPath,
76
+ select_list: list[tuple[exprs.Expr, str | None]] | None,
77
+ additional_columns: dict[str, Any],
78
+ predicate: 'exprs.Expr' | None,
79
+ sample_clause: 'SampleClause' | None,
80
+ is_snapshot: bool,
81
+ create_default_idxs: bool,
82
+ num_retained_versions: int,
83
+ comment: str,
84
+ media_validation: MediaValidation,
85
+ iterator_cls: type[ComponentIterator] | None,
86
+ iterator_args: dict | None,
87
+ ) -> tuple[TableVersionMd, list[TableOp] | None]:
88
+ from pixeltable.plan import SampleClause
89
+
90
+ # Convert select_list to more additional_columns if present
91
+ include_base_columns: bool = select_list is None
92
+ select_list_columns: List[Column] = []
93
+ if not include_base_columns:
94
+ r = cls.select_list_to_additional_columns(select_list)
95
+ select_list_columns = cls._create_columns(r)
96
+
97
+ columns_from_additional_columns = cls._create_columns(additional_columns)
98
+ columns = select_list_columns + columns_from_additional_columns
58
99
  cls._verify_schema(columns)
59
100
 
60
- # verify that filter can be evaluated in the context of the base
101
+ # verify that filters can be evaluated in the context of the base
61
102
  if predicate is not None:
62
103
  if not predicate.is_bound_by([base]):
63
- raise excs.Error(f'Filter cannot be computed in the context of the base {base.tbl_name()}')
104
+ raise excs.Error(f'View filter cannot be computed in the context of the base table {base.tbl_name()!r}')
64
105
  # create a copy that we can modify and store
65
106
  predicate = predicate.copy()
107
+ if sample_clause is not None:
108
+ # make sure that the sample clause can be computed in the context of the base
109
+ if sample_clause.stratify_exprs is not None and not all(
110
+ stratify_expr.is_bound_by([base]) for stratify_expr in sample_clause.stratify_exprs
111
+ ):
112
+ raise excs.Error(
113
+ f'View sample clause cannot be computed in the context of the base table {base.tbl_name()!r}'
114
+ )
115
+ # create a copy that we can modify and store
116
+ sc = sample_clause
117
+ sample_clause = SampleClause(
118
+ sc.version, sc.n, sc.n_per_stratum, sc.fraction, sc.seed, sc.stratify_exprs.copy()
119
+ )
66
120
 
67
121
  # same for value exprs
68
122
  for col in columns:
@@ -71,108 +125,118 @@ class View(Table):
71
125
  # make sure that the value can be computed in the context of the base
72
126
  if col.value_expr is not None and not col.value_expr.is_bound_by([base]):
73
127
  raise excs.Error(
74
- f'Column {col.name}: value expression cannot be computed in the context of the base {base.tbl_name()}')
128
+ f'Column {col.name!r}: Value expression cannot be computed in the context of the '
129
+ f'base table {base.tbl_name()!r}'
130
+ )
75
131
 
76
132
  if iterator_cls is not None:
77
133
  assert iterator_args is not None
78
134
 
79
135
  # validate iterator_args
80
136
  py_signature = inspect.signature(iterator_cls.__init__)
137
+
138
+ # make sure iterator_args can be used to instantiate iterator_cls
139
+ bound_args: dict[str, Any]
81
140
  try:
82
- # make sure iterator_args can be used to instantiate iterator_cls
83
141
  bound_args = py_signature.bind(None, **iterator_args).arguments # None: arg for self
84
- # we ignore 'self'
85
- first_param_name = next(iter(py_signature.parameters)) # can't guarantee it's actually 'self'
86
- del bound_args[first_param_name]
87
-
88
- # construct Signature and type-check bound_args
89
- params = [
90
- func.Parameter(param_name, param_type, kind=inspect.Parameter.POSITIONAL_OR_KEYWORD)
91
- for param_name, param_type in iterator_cls.input_schema().items()
92
- ]
93
- sig = func.Signature(ts.InvalidType(), params)
94
- from pixeltable.exprs import FunctionCall
95
- FunctionCall.normalize_args(iterator_cls.__name__, sig, bound_args)
96
- except TypeError as e:
97
- raise excs.Error(f'Cannot instantiate iterator with given arguments: {e}')
142
+ except TypeError as exc:
143
+ raise excs.Error(f'Invalid iterator arguments: {exc}') from exc
144
+ # we ignore 'self'
145
+ first_param_name = next(iter(py_signature.parameters)) # can't guarantee it's actually 'self'
146
+ del bound_args[first_param_name]
147
+
148
+ # construct Signature and type-check bound_args
149
+ params = [
150
+ func.Parameter(param_name, param_type, kind=inspect.Parameter.POSITIONAL_OR_KEYWORD)
151
+ for param_name, param_type in iterator_cls.input_schema().items()
152
+ ]
153
+ sig = func.Signature(ts.InvalidType(), params)
154
+
155
+ expr_args = {k: exprs.Expr.from_object(v) for k, v in bound_args.items()}
156
+ sig.validate_args(expr_args, context=f'in iterator of type `{iterator_cls.__name__}`')
157
+ literal_args = {k: v.val if isinstance(v, exprs.Literal) else v for k, v in expr_args.items()}
98
158
 
99
159
  # prepend pos and output_schema columns to cols:
100
160
  # a component view exposes the pos column of its rowid;
101
161
  # we create that column here, so it gets assigned a column id;
102
162
  # stored=False: it is not stored separately (it's already stored as part of the rowid)
103
- iterator_cols = [Column(_POS_COLUMN_NAME, ts.IntType(), stored=False)]
104
- output_dict, unstored_cols = iterator_cls.output_schema(**bound_args)
105
- iterator_cols.extend([
106
- Column(col_name, col_type, stored=col_name not in unstored_cols)
107
- for col_name, col_type in output_dict.items()
108
- ])
163
+ iterator_cols = [Column(_POS_COLUMN_NAME, ts.IntType(), is_iterator_col=True, stored=False)]
164
+ output_dict, unstored_cols = iterator_cls.output_schema(**literal_args)
165
+ iterator_cols.extend(
166
+ [
167
+ Column(col_name, col_type, is_iterator_col=True, stored=col_name not in unstored_cols)
168
+ for col_name, col_type in output_dict.items()
169
+ ]
170
+ )
109
171
 
110
172
  iterator_col_names = {col.name for col in iterator_cols}
111
173
  for col in columns:
112
174
  if col.name in iterator_col_names:
113
- raise excs.Error(f'Duplicate name: column {col.name} is already present in the iterator output schema')
175
+ raise excs.Error(
176
+ f'Duplicate name: column {col.name!r} is already present in the iterator output schema'
177
+ )
114
178
  columns = iterator_cols + columns
115
179
 
116
- with orm.Session(Env.get().engine, future=True) as session:
117
- from pixeltable.exprs import InlineDict
118
- iterator_args_expr: exprs.Expr = InlineDict(iterator_args) if iterator_args is not None else None
119
- iterator_class_fqn = f'{iterator_cls.__module__}.{iterator_cls.__name__}' if iterator_cls is not None \
120
- else None
121
- base_version_path = cls._get_snapshot_path(base) if is_snapshot else base
122
- base_versions = [
123
- (tbl_version.id.hex, tbl_version.version if is_snapshot or tbl_version.is_snapshot else None)
124
- for tbl_version in base_version_path.get_tbl_versions()
125
- ]
180
+ from pixeltable.exprs import InlineDict
126
181
 
127
- # if this is a snapshot, we need to retarget all exprs to the snapshot tbl versions
128
- if is_snapshot:
129
- predicate = predicate.retarget(base_version_path) if predicate is not None else None
130
- iterator_args_expr = iterator_args_expr.retarget(base_version_path) \
131
- if iterator_args_expr is not None else None
132
- for col in columns:
133
- if col.value_expr is not None:
134
- col.set_value_expr(col.value_expr.retarget(base_version_path))
135
-
136
- view_md = md_schema.ViewMd(
137
- is_snapshot=is_snapshot, predicate=predicate.as_dict() if predicate is not None else None,
138
- base_versions=base_versions,
139
- iterator_class_fqn=iterator_class_fqn,
140
- iterator_args=iterator_args_expr.as_dict() if iterator_args_expr is not None else None)
141
-
142
- id, tbl_version = TableVersion.create(
143
- session, dir_id, name, columns, num_retained_versions, comment, media_validation=media_validation,
144
- base_path=base_version_path, view_md=view_md)
145
- if tbl_version is None:
146
- # this is purely a snapshot: we use the base's tbl version path
147
- view = cls(id, dir_id, name, base_version_path, base.tbl_id(), snapshot_only=True)
148
- _logger.info(f'created snapshot {name}')
149
- else:
150
- view = cls(
151
- id, dir_id, name, TableVersionPath(tbl_version, base=base_version_path), base.tbl_id(),
152
- snapshot_only=False)
153
- _logger.info(f'Created view `{name}`, id={tbl_version.id}')
154
-
155
- from pixeltable.plan import Planner
156
- plan, num_values_per_row = Planner.create_view_load_plan(view._tbl_version_path)
157
- num_rows, num_excs, cols_with_excs = tbl_version.store_tbl.insert_rows(
158
- plan, session.connection(), v_min=tbl_version.version)
159
- print(f'Created view `{name}` with {num_rows} rows, {num_excs} exceptions.')
160
-
161
- session.commit()
162
- cat = Catalog.get()
163
- cat.tbl_dependents[view._id] = []
164
- cat.tbl_dependents[base.tbl_id()].append(view)
165
- cat.tbls[view._id] = view
166
- return view
182
+ iterator_args_expr: exprs.Expr = InlineDict(iterator_args) if iterator_args is not None else None
183
+ iterator_class_fqn = f'{iterator_cls.__module__}.{iterator_cls.__name__}' if iterator_cls is not None else None
184
+ base_version_path = cls._get_snapshot_path(base) if is_snapshot else base
185
+
186
+ # if this is a snapshot, we need to retarget all exprs to the snapshot tbl versions
187
+ if is_snapshot:
188
+ predicate = predicate.retarget(base_version_path) if predicate is not None else None
189
+ if sample_clause is not None:
190
+ exprs.Expr.retarget_list(sample_clause.stratify_exprs, base_version_path)
191
+ iterator_args_expr = (
192
+ iterator_args_expr.retarget(base_version_path) if iterator_args_expr is not None else None
193
+ )
194
+ for col in columns:
195
+ if col.value_expr is not None:
196
+ col.set_value_expr(col.value_expr.retarget(base_version_path))
197
+
198
+ view_md = md_schema.ViewMd(
199
+ is_snapshot=is_snapshot,
200
+ include_base_columns=include_base_columns,
201
+ predicate=predicate.as_dict() if predicate is not None else None,
202
+ sample_clause=sample_clause.as_dict() if sample_clause is not None else None,
203
+ base_versions=base_version_path.as_md(),
204
+ iterator_class_fqn=iterator_class_fqn,
205
+ iterator_args=iterator_args_expr.as_dict() if iterator_args_expr is not None else None,
206
+ )
207
+
208
+ md = TableVersion.create_initial_md(
209
+ name,
210
+ columns,
211
+ num_retained_versions,
212
+ comment,
213
+ media_validation=media_validation,
214
+ view_md=view_md,
215
+ create_default_idxs=create_default_idxs,
216
+ )
217
+ if md.tbl_md.is_pure_snapshot:
218
+ # this is purely a snapshot: no store table to create or load
219
+ return md, None
220
+ else:
221
+ tbl_id = md.tbl_md.tbl_id
222
+ key = TableVersionKey(UUID(tbl_id), 0 if is_snapshot else None, None)
223
+ view_path = TableVersionPath(TableVersionHandle(key), base=base_version_path)
224
+ ops = [
225
+ TableOp(
226
+ tbl_id=tbl_id, op_sn=0, num_ops=2, needs_xact=False, create_store_table_op=CreateStoreTableOp()
227
+ ),
228
+ TableOp(
229
+ tbl_id=tbl_id, op_sn=1, num_ops=2, needs_xact=True, load_view_op=LoadViewOp(view_path.as_dict())
230
+ ),
231
+ ]
232
+ return md, ops
167
233
 
168
234
  @classmethod
169
- def _verify_column(
170
- cls, col: Column, existing_column_names: set[str], existing_query_names: Optional[set[str]] = None
171
- ) -> None:
235
+ def _verify_column(cls, col: Column) -> None:
172
236
  # make sure that columns are nullable or have a default
173
237
  if not col.col_type.nullable and not col.is_computed:
174
- raise excs.Error(f'Column {col.name}: non-computed columns in views must be nullable')
175
- super()._verify_column(col, existing_column_names, existing_query_names)
238
+ raise excs.Error(f'Column {col.name!r}: Non-computed columns in views must be nullable')
239
+ super()._verify_column(col)
176
240
 
177
241
  @classmethod
178
242
  def _get_snapshot_path(cls, tbl_version_path: TableVersionPath) -> TableVersionPath:
@@ -182,46 +246,99 @@ class View(Table):
182
246
  """
183
247
  if tbl_version_path.is_snapshot():
184
248
  return tbl_version_path
185
- tbl_version = tbl_version_path.tbl_version
186
- if not tbl_version.is_snapshot:
187
- # create and register snapshot version
188
- tbl_version = tbl_version.create_snapshot_copy()
189
- assert tbl_version.is_snapshot
249
+ tbl_version = tbl_version_path.tbl_version.get()
250
+ assert not tbl_version.is_snapshot
190
251
 
191
252
  return TableVersionPath(
192
- tbl_version,
193
- base=cls._get_snapshot_path(tbl_version_path.base) if tbl_version_path.base is not None else None)
194
-
195
- def _drop(self) -> None:
196
- cat = catalog.Catalog.get()
197
- # verify all dependents are deleted by now
198
- for dep in cat.tbl_dependents[self._id]:
199
- assert dep._is_dropped
200
- if self._snapshot_only:
201
- # there is not TableVersion to drop
202
- self._check_is_dropped()
203
- self.is_dropped = True
204
- with Env.get().engine.begin() as conn:
205
- TableVersion.delete_md(self._id, conn)
206
- # update catalog
207
- cat = catalog.Catalog.get()
208
- del cat.tbls[self._id]
209
- else:
210
- super()._drop()
211
- cat.tbl_dependents[self._base_id].remove(self)
212
- del cat.tbl_dependents[self._id]
253
+ TableVersionHandle(TableVersionKey(tbl_version.id, tbl_version.version, None)),
254
+ base=cls._get_snapshot_path(tbl_version_path.base) if tbl_version_path.base is not None else None,
255
+ )
256
+
257
+ def _is_named_pure_snapshot(self) -> bool:
258
+ """
259
+ Returns True if this is a named pure snapshot (i.e., a pure snapshot that is a separate schema object).
260
+ """
261
+ return self._id != self._tbl_version_path.tbl_id
262
+
263
+ def _is_anonymous_snapshot(self) -> bool:
264
+ """
265
+ Returns True if this is an unnamed snapshot (i.e., a snapshot that is not a separate schema object).
266
+ """
267
+ return self._snapshot_only and self._id == self._tbl_version_path.tbl_id
213
268
 
214
- def get_metadata(self) -> dict[str, Any]:
215
- md = super().get_metadata()
269
+ def _get_metadata(self) -> 'TableMetadata':
270
+ md = super()._get_metadata()
216
271
  md['is_view'] = True
217
272
  md['is_snapshot'] = self._tbl_version_path.is_snapshot()
273
+ if self._is_anonymous_snapshot():
274
+ # Update name and path with version qualifiers.
275
+ md['name'] = f'{self._name}:{self._tbl_version_path.version()}'
276
+ md['path'] = f'{self._path()}:{self._tbl_version_path.version()}'
277
+ base_tbl_id = self._base_tbl_id
278
+ if base_tbl_id is not None:
279
+ base_tbl = self._get_base_table()
280
+ base_path = '<anonymous base table>' if base_tbl is None else base_tbl._path()
281
+ base_version = self._effective_base_versions[0]
282
+ md['base'] = base_path if base_version is None else f'{base_path}:{base_version}'
218
283
  return md
219
284
 
220
285
  def insert(
221
- self, rows: Optional[Iterable[dict[str, Any]]] = None, /, *, print_stats: bool = False,
222
- on_error: Literal['abort', 'ignore'] = 'abort', **kwargs: Any
286
+ self,
287
+ source: TableDataSource | None = None,
288
+ /,
289
+ *,
290
+ source_format: Literal['csv', 'excel', 'parquet', 'json'] | None = None,
291
+ schema_overrides: dict[str, ts.ColumnType] | None = None,
292
+ on_error: Literal['abort', 'ignore'] = 'abort',
293
+ print_stats: bool = False,
294
+ **kwargs: Any,
223
295
  ) -> UpdateStatus:
224
- raise excs.Error(f'{self._display_name()} {self._name!r}: cannot insert into view')
296
+ raise excs.Error(f'{self._display_str()}: Cannot insert into a {self._display_name()}.')
297
+
298
+ def delete(self, where: exprs.Expr | None = None) -> UpdateStatus:
299
+ raise excs.Error(f'{self._display_str()}: Cannot delete from a {self._display_name()}.')
300
+
301
+ @property
302
+ def _base_tbl_id(self) -> UUID | None:
303
+ if self._tbl_version_path.tbl_id != self._id:
304
+ # _tbl_version_path represents a different schema object from this one. This can only happen if this is a
305
+ # named pure snapshot.
306
+ return self._tbl_version_path.tbl_id
307
+ if self._tbl_version_path.base is None:
308
+ return None
309
+ return self._tbl_version_path.base.tbl_id
310
+
311
+ def _get_base_table(self) -> 'Table' | None:
312
+ """Returns None if there is no base table, or if the base table is hidden."""
313
+ base_tbl_id = self._base_tbl_id
314
+ if base_tbl_id is None:
315
+ return None
316
+ with catalog.Catalog.get().begin_xact(tbl_id=base_tbl_id, for_write=False):
317
+ return catalog.Catalog.get().get_table_by_id(base_tbl_id)
318
+
319
+ @property
320
+ def _effective_base_versions(self) -> list[int | None]:
321
+ effective_versions = [tv.effective_version for tv in self._tbl_version_path.get_tbl_versions()]
322
+ if self._snapshot_only and not self._is_anonymous_snapshot():
323
+ return effective_versions # Named pure snapshot
324
+ else:
325
+ return effective_versions[1:]
326
+
327
+ def _table_descriptor(self) -> str:
328
+ result = [self._display_str()]
329
+ bases_descrs: list[str] = []
330
+ for base, effective_version in zip(self._get_base_tables(), self._effective_base_versions):
331
+ if effective_version is None:
332
+ bases_descrs.append(f'{base._path()!r}')
333
+ else:
334
+ base_descr = f'{base._path()}:{effective_version}'
335
+ bases_descrs.append(f'{base_descr!r}')
336
+ if len(bases_descrs) > 0:
337
+ # bases_descrs can be empty in the case of a table-replica
338
+ result.append(f' (of {", ".join(bases_descrs)})')
225
339
 
226
- def delete(self, where: Optional[exprs.Expr] = None) -> UpdateStatus:
227
- raise excs.Error(f'{self._display_name()} {self._name!r}: cannot delete from view')
340
+ if self._tbl_version_path.tbl_version.get().predicate is not None:
341
+ result.append(f'\nWhere: {self._tbl_version_path.tbl_version.get().predicate!s}')
342
+ if self._tbl_version_path.tbl_version.get().sample_clause is not None:
343
+ result.append(f'\nSample: {self._tbl_version.get().sample_clause!s}')
344
+ return ''.join(result)