pixeltable 0.4.0rc3__py3-none-any.whl → 0.4.20__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (202) hide show
  1. pixeltable/__init__.py +23 -5
  2. pixeltable/_version.py +1 -0
  3. pixeltable/catalog/__init__.py +5 -3
  4. pixeltable/catalog/catalog.py +1318 -404
  5. pixeltable/catalog/column.py +186 -115
  6. pixeltable/catalog/dir.py +1 -2
  7. pixeltable/catalog/globals.py +11 -43
  8. pixeltable/catalog/insertable_table.py +167 -79
  9. pixeltable/catalog/path.py +61 -23
  10. pixeltable/catalog/schema_object.py +9 -10
  11. pixeltable/catalog/table.py +626 -308
  12. pixeltable/catalog/table_metadata.py +101 -0
  13. pixeltable/catalog/table_version.py +713 -569
  14. pixeltable/catalog/table_version_handle.py +37 -6
  15. pixeltable/catalog/table_version_path.py +42 -29
  16. pixeltable/catalog/tbl_ops.py +50 -0
  17. pixeltable/catalog/update_status.py +191 -0
  18. pixeltable/catalog/view.py +108 -94
  19. pixeltable/config.py +128 -22
  20. pixeltable/dataframe.py +188 -100
  21. pixeltable/env.py +407 -136
  22. pixeltable/exceptions.py +6 -0
  23. pixeltable/exec/__init__.py +3 -0
  24. pixeltable/exec/aggregation_node.py +7 -8
  25. pixeltable/exec/cache_prefetch_node.py +83 -110
  26. pixeltable/exec/cell_materialization_node.py +231 -0
  27. pixeltable/exec/cell_reconstruction_node.py +135 -0
  28. pixeltable/exec/component_iteration_node.py +4 -3
  29. pixeltable/exec/data_row_batch.py +8 -65
  30. pixeltable/exec/exec_context.py +16 -4
  31. pixeltable/exec/exec_node.py +13 -36
  32. pixeltable/exec/expr_eval/evaluators.py +7 -6
  33. pixeltable/exec/expr_eval/expr_eval_node.py +27 -12
  34. pixeltable/exec/expr_eval/globals.py +8 -5
  35. pixeltable/exec/expr_eval/row_buffer.py +1 -2
  36. pixeltable/exec/expr_eval/schedulers.py +190 -30
  37. pixeltable/exec/globals.py +32 -0
  38. pixeltable/exec/in_memory_data_node.py +18 -18
  39. pixeltable/exec/object_store_save_node.py +293 -0
  40. pixeltable/exec/row_update_node.py +16 -9
  41. pixeltable/exec/sql_node.py +206 -101
  42. pixeltable/exprs/__init__.py +1 -1
  43. pixeltable/exprs/arithmetic_expr.py +27 -22
  44. pixeltable/exprs/array_slice.py +3 -3
  45. pixeltable/exprs/column_property_ref.py +34 -30
  46. pixeltable/exprs/column_ref.py +92 -96
  47. pixeltable/exprs/comparison.py +5 -5
  48. pixeltable/exprs/compound_predicate.py +5 -4
  49. pixeltable/exprs/data_row.py +152 -55
  50. pixeltable/exprs/expr.py +62 -43
  51. pixeltable/exprs/expr_dict.py +3 -3
  52. pixeltable/exprs/expr_set.py +17 -10
  53. pixeltable/exprs/function_call.py +75 -37
  54. pixeltable/exprs/globals.py +1 -2
  55. pixeltable/exprs/in_predicate.py +4 -4
  56. pixeltable/exprs/inline_expr.py +10 -27
  57. pixeltable/exprs/is_null.py +1 -3
  58. pixeltable/exprs/json_mapper.py +8 -8
  59. pixeltable/exprs/json_path.py +56 -22
  60. pixeltable/exprs/literal.py +5 -5
  61. pixeltable/exprs/method_ref.py +2 -2
  62. pixeltable/exprs/object_ref.py +2 -2
  63. pixeltable/exprs/row_builder.py +127 -53
  64. pixeltable/exprs/rowid_ref.py +8 -12
  65. pixeltable/exprs/similarity_expr.py +50 -25
  66. pixeltable/exprs/sql_element_cache.py +4 -4
  67. pixeltable/exprs/string_op.py +5 -5
  68. pixeltable/exprs/type_cast.py +3 -5
  69. pixeltable/func/__init__.py +1 -0
  70. pixeltable/func/aggregate_function.py +8 -8
  71. pixeltable/func/callable_function.py +9 -9
  72. pixeltable/func/expr_template_function.py +10 -10
  73. pixeltable/func/function.py +18 -20
  74. pixeltable/func/function_registry.py +6 -7
  75. pixeltable/func/globals.py +2 -3
  76. pixeltable/func/mcp.py +74 -0
  77. pixeltable/func/query_template_function.py +20 -18
  78. pixeltable/func/signature.py +43 -16
  79. pixeltable/func/tools.py +23 -13
  80. pixeltable/func/udf.py +18 -20
  81. pixeltable/functions/__init__.py +6 -0
  82. pixeltable/functions/anthropic.py +93 -33
  83. pixeltable/functions/audio.py +114 -10
  84. pixeltable/functions/bedrock.py +13 -6
  85. pixeltable/functions/date.py +1 -1
  86. pixeltable/functions/deepseek.py +20 -9
  87. pixeltable/functions/fireworks.py +2 -2
  88. pixeltable/functions/gemini.py +28 -11
  89. pixeltable/functions/globals.py +13 -13
  90. pixeltable/functions/groq.py +108 -0
  91. pixeltable/functions/huggingface.py +1046 -23
  92. pixeltable/functions/image.py +9 -18
  93. pixeltable/functions/llama_cpp.py +23 -8
  94. pixeltable/functions/math.py +3 -4
  95. pixeltable/functions/mistralai.py +4 -15
  96. pixeltable/functions/ollama.py +16 -9
  97. pixeltable/functions/openai.py +104 -82
  98. pixeltable/functions/openrouter.py +143 -0
  99. pixeltable/functions/replicate.py +2 -2
  100. pixeltable/functions/reve.py +250 -0
  101. pixeltable/functions/string.py +21 -28
  102. pixeltable/functions/timestamp.py +13 -14
  103. pixeltable/functions/together.py +4 -6
  104. pixeltable/functions/twelvelabs.py +92 -0
  105. pixeltable/functions/util.py +6 -1
  106. pixeltable/functions/video.py +1388 -106
  107. pixeltable/functions/vision.py +7 -7
  108. pixeltable/functions/whisper.py +15 -7
  109. pixeltable/functions/whisperx.py +179 -0
  110. pixeltable/{ext/functions → functions}/yolox.py +2 -4
  111. pixeltable/globals.py +332 -105
  112. pixeltable/index/base.py +13 -22
  113. pixeltable/index/btree.py +23 -22
  114. pixeltable/index/embedding_index.py +32 -44
  115. pixeltable/io/__init__.py +4 -2
  116. pixeltable/io/datarows.py +7 -6
  117. pixeltable/io/external_store.py +49 -77
  118. pixeltable/io/fiftyone.py +11 -11
  119. pixeltable/io/globals.py +29 -28
  120. pixeltable/io/hf_datasets.py +17 -9
  121. pixeltable/io/label_studio.py +70 -66
  122. pixeltable/io/lancedb.py +3 -0
  123. pixeltable/io/pandas.py +12 -11
  124. pixeltable/io/parquet.py +13 -93
  125. pixeltable/io/table_data_conduit.py +71 -47
  126. pixeltable/io/utils.py +3 -3
  127. pixeltable/iterators/__init__.py +2 -1
  128. pixeltable/iterators/audio.py +21 -11
  129. pixeltable/iterators/document.py +116 -55
  130. pixeltable/iterators/image.py +5 -2
  131. pixeltable/iterators/video.py +293 -13
  132. pixeltable/metadata/__init__.py +4 -2
  133. pixeltable/metadata/converters/convert_18.py +2 -2
  134. pixeltable/metadata/converters/convert_19.py +2 -2
  135. pixeltable/metadata/converters/convert_20.py +2 -2
  136. pixeltable/metadata/converters/convert_21.py +2 -2
  137. pixeltable/metadata/converters/convert_22.py +2 -2
  138. pixeltable/metadata/converters/convert_24.py +2 -2
  139. pixeltable/metadata/converters/convert_25.py +2 -2
  140. pixeltable/metadata/converters/convert_26.py +2 -2
  141. pixeltable/metadata/converters/convert_29.py +4 -4
  142. pixeltable/metadata/converters/convert_34.py +2 -2
  143. pixeltable/metadata/converters/convert_36.py +2 -2
  144. pixeltable/metadata/converters/convert_37.py +15 -0
  145. pixeltable/metadata/converters/convert_38.py +39 -0
  146. pixeltable/metadata/converters/convert_39.py +124 -0
  147. pixeltable/metadata/converters/convert_40.py +73 -0
  148. pixeltable/metadata/converters/util.py +13 -12
  149. pixeltable/metadata/notes.py +4 -0
  150. pixeltable/metadata/schema.py +79 -42
  151. pixeltable/metadata/utils.py +74 -0
  152. pixeltable/mypy/__init__.py +3 -0
  153. pixeltable/mypy/mypy_plugin.py +123 -0
  154. pixeltable/plan.py +274 -223
  155. pixeltable/share/__init__.py +1 -1
  156. pixeltable/share/packager.py +259 -129
  157. pixeltable/share/protocol/__init__.py +34 -0
  158. pixeltable/share/protocol/common.py +170 -0
  159. pixeltable/share/protocol/operation_types.py +33 -0
  160. pixeltable/share/protocol/replica.py +109 -0
  161. pixeltable/share/publish.py +213 -57
  162. pixeltable/store.py +238 -175
  163. pixeltable/type_system.py +104 -63
  164. pixeltable/utils/__init__.py +2 -3
  165. pixeltable/utils/arrow.py +108 -13
  166. pixeltable/utils/av.py +298 -0
  167. pixeltable/utils/azure_store.py +305 -0
  168. pixeltable/utils/code.py +3 -3
  169. pixeltable/utils/console_output.py +4 -1
  170. pixeltable/utils/coroutine.py +6 -23
  171. pixeltable/utils/dbms.py +31 -5
  172. pixeltable/utils/description_helper.py +4 -5
  173. pixeltable/utils/documents.py +5 -6
  174. pixeltable/utils/exception_handler.py +7 -30
  175. pixeltable/utils/filecache.py +6 -6
  176. pixeltable/utils/formatter.py +4 -6
  177. pixeltable/utils/gcs_store.py +283 -0
  178. pixeltable/utils/http_server.py +2 -3
  179. pixeltable/utils/iceberg.py +1 -2
  180. pixeltable/utils/image.py +17 -0
  181. pixeltable/utils/lancedb.py +88 -0
  182. pixeltable/utils/local_store.py +316 -0
  183. pixeltable/utils/misc.py +5 -0
  184. pixeltable/utils/object_stores.py +528 -0
  185. pixeltable/utils/pydantic.py +60 -0
  186. pixeltable/utils/pytorch.py +5 -6
  187. pixeltable/utils/s3_store.py +392 -0
  188. pixeltable-0.4.20.dist-info/METADATA +587 -0
  189. pixeltable-0.4.20.dist-info/RECORD +218 -0
  190. {pixeltable-0.4.0rc3.dist-info → pixeltable-0.4.20.dist-info}/WHEEL +1 -1
  191. pixeltable-0.4.20.dist-info/entry_points.txt +2 -0
  192. pixeltable/__version__.py +0 -3
  193. pixeltable/ext/__init__.py +0 -17
  194. pixeltable/ext/functions/__init__.py +0 -11
  195. pixeltable/ext/functions/whisperx.py +0 -77
  196. pixeltable/utils/media_store.py +0 -77
  197. pixeltable/utils/s3.py +0 -17
  198. pixeltable/utils/sample.py +0 -25
  199. pixeltable-0.4.0rc3.dist-info/METADATA +0 -435
  200. pixeltable-0.4.0rc3.dist-info/RECORD +0 -189
  201. pixeltable-0.4.0rc3.dist-info/entry_points.txt +0 -3
  202. {pixeltable-0.4.0rc3.dist-info → pixeltable-0.4.20.dist-info/licenses}/LICENSE +0 -0
@@ -2,29 +2,28 @@ from __future__ import annotations
2
2
 
3
3
  import inspect
4
4
  import logging
5
- from typing import TYPE_CHECKING, Any, List, Literal, Optional
5
+ from typing import TYPE_CHECKING, Any, List, Literal
6
6
  from uuid import UUID
7
7
 
8
8
  import pixeltable.exceptions as excs
9
9
  import pixeltable.metadata.schema as md_schema
10
10
  import pixeltable.type_system as ts
11
11
  from pixeltable import catalog, exprs, func
12
- from pixeltable.env import Env
13
12
  from pixeltable.iterators import ComponentIterator
14
13
 
15
- if TYPE_CHECKING:
16
- from pixeltable.plan import SampleClause
17
-
18
-
19
14
  from .column import Column
20
- from .globals import _POS_COLUMN_NAME, MediaValidation, UpdateStatus
15
+ from .globals import _POS_COLUMN_NAME, MediaValidation
21
16
  from .table import Table
22
- from .table_version import TableVersion
17
+ from .table_version import TableVersion, TableVersionCompleteMd
23
18
  from .table_version_handle import TableVersionHandle
24
19
  from .table_version_path import TableVersionPath
20
+ from .tbl_ops import CreateStoreTableOp, LoadViewOp, TableOp
21
+ from .update_status import UpdateStatus
25
22
 
26
23
  if TYPE_CHECKING:
24
+ from pixeltable.catalog.table import TableMetadata
27
25
  from pixeltable.globals import TableDataSource
26
+ from pixeltable.plan import SampleClause
28
27
 
29
28
  _logger = logging.getLogger('pixeltable')
30
29
 
@@ -41,13 +40,20 @@ class View(Table):
41
40
  def __init__(self, id: UUID, dir_id: UUID, name: str, tbl_version_path: TableVersionPath, snapshot_only: bool):
42
41
  super().__init__(id, dir_id, name, tbl_version_path)
43
42
  self._snapshot_only = snapshot_only
43
+ if not snapshot_only:
44
+ self._tbl_version = tbl_version_path.tbl_version
45
+
46
+ def _display_name(self) -> str:
47
+ if self._tbl_version_path.is_replica():
48
+ return 'replica'
49
+ if self._tbl_version_path.is_snapshot():
50
+ return 'snapshot'
51
+ if self._tbl_version_path.is_view():
52
+ return 'view'
53
+ return 'table'
44
54
 
45
55
  @classmethod
46
- def _display_name(cls) -> str:
47
- return 'view'
48
-
49
- @classmethod
50
- def select_list_to_additional_columns(cls, select_list: list[tuple[exprs.Expr, Optional[str]]]) -> dict[str, dict]:
56
+ def select_list_to_additional_columns(cls, select_list: list[tuple[exprs.Expr, str | None]]) -> dict[str, dict]:
51
57
  """Returns a list of columns in the same format as the additional_columns parameter of View.create.
52
58
  The source is the list of expressions from a select() statement on a DataFrame.
53
59
  If the column is a ColumnRef, to a base table column, it is marked to not be stored.sy
@@ -67,17 +73,18 @@ class View(Table):
67
73
  dir_id: UUID,
68
74
  name: str,
69
75
  base: TableVersionPath,
70
- select_list: Optional[list[tuple[exprs.Expr, Optional[str]]]],
76
+ select_list: list[tuple[exprs.Expr, str | None]] | None,
71
77
  additional_columns: dict[str, Any],
72
- predicate: Optional['exprs.Expr'],
73
- sample_clause: Optional['SampleClause'],
78
+ predicate: 'exprs.Expr' | None,
79
+ sample_clause: 'SampleClause' | None,
74
80
  is_snapshot: bool,
81
+ create_default_idxs: bool,
75
82
  num_retained_versions: int,
76
83
  comment: str,
77
84
  media_validation: MediaValidation,
78
- iterator_cls: Optional[type[ComponentIterator]],
79
- iterator_args: Optional[dict],
80
- ) -> View:
85
+ iterator_cls: type[ComponentIterator] | None,
86
+ iterator_args: dict | None,
87
+ ) -> tuple[TableVersionCompleteMd, list[TableOp] | None]:
81
88
  from pixeltable.plan import SampleClause
82
89
 
83
90
  # Convert select_list to more additional_columns if present
@@ -94,7 +101,7 @@ class View(Table):
94
101
  # verify that filters can be evaluated in the context of the base
95
102
  if predicate is not None:
96
103
  if not predicate.is_bound_by([base]):
97
- raise excs.Error(f'Filter cannot be computed in the context of the base {base.tbl_name()}')
104
+ raise excs.Error(f'View filter cannot be computed in the context of the base table {base.tbl_name()!r}')
98
105
  # create a copy that we can modify and store
99
106
  predicate = predicate.copy()
100
107
  if sample_clause is not None:
@@ -102,7 +109,9 @@ class View(Table):
102
109
  if sample_clause.stratify_exprs is not None and not all(
103
110
  stratify_expr.is_bound_by([base]) for stratify_expr in sample_clause.stratify_exprs
104
111
  ):
105
- raise excs.Error(f'Sample clause cannot be computed in the context of the base {base.tbl_name()}')
112
+ raise excs.Error(
113
+ f'View sample clause cannot be computed in the context of the base table {base.tbl_name()!r}'
114
+ )
106
115
  # create a copy that we can modify and store
107
116
  sc = sample_clause
108
117
  sample_clause = SampleClause(
@@ -116,8 +125,8 @@ class View(Table):
116
125
  # make sure that the value can be computed in the context of the base
117
126
  if col.value_expr is not None and not col.value_expr.is_bound_by([base]):
118
127
  raise excs.Error(
119
- f'Column {col.name}: value expression cannot be computed in the context of the '
120
- f'base {base.tbl_name()}'
128
+ f'Column {col.name!r}: Value expression cannot be computed in the context of the '
129
+ f'base table {base.tbl_name()!r}'
121
130
  )
122
131
 
123
132
  if iterator_cls is not None:
@@ -144,18 +153,18 @@ class View(Table):
144
153
  sig = func.Signature(ts.InvalidType(), params)
145
154
 
146
155
  expr_args = {k: exprs.Expr.from_object(v) for k, v in bound_args.items()}
147
- sig.validate_args(expr_args, context=f'in iterator {iterator_cls.__name__!r}')
156
+ sig.validate_args(expr_args, context=f'in iterator of type `{iterator_cls.__name__}`')
148
157
  literal_args = {k: v.val if isinstance(v, exprs.Literal) else v for k, v in expr_args.items()}
149
158
 
150
159
  # prepend pos and output_schema columns to cols:
151
160
  # a component view exposes the pos column of its rowid;
152
161
  # we create that column here, so it gets assigned a column id;
153
162
  # stored=False: it is not stored separately (it's already stored as part of the rowid)
154
- iterator_cols = [Column(_POS_COLUMN_NAME, ts.IntType(), stored=False)]
163
+ iterator_cols = [Column(_POS_COLUMN_NAME, ts.IntType(), is_iterator_col=True, stored=False)]
155
164
  output_dict, unstored_cols = iterator_cls.output_schema(**literal_args)
156
165
  iterator_cols.extend(
157
166
  [
158
- Column(col_name, col_type, stored=col_name not in unstored_cols)
167
+ Column(col_name, col_type, is_iterator_col=True, stored=col_name not in unstored_cols)
159
168
  for col_name, col_type in output_dict.items()
160
169
  ]
161
170
  )
@@ -164,11 +173,10 @@ class View(Table):
164
173
  for col in columns:
165
174
  if col.name in iterator_col_names:
166
175
  raise excs.Error(
167
- f'Duplicate name: column {col.name} is already present in the iterator output schema'
176
+ f'Duplicate name: column {col.name!r} is already present in the iterator output schema'
168
177
  )
169
178
  columns = iterator_cols + columns
170
179
 
171
- session = Env.get().session
172
180
  from pixeltable.exprs import InlineDict
173
181
 
174
182
  iterator_args_expr: exprs.Expr = InlineDict(iterator_args) if iterator_args is not None else None
@@ -197,55 +205,38 @@ class View(Table):
197
205
  iterator_args=iterator_args_expr.as_dict() if iterator_args_expr is not None else None,
198
206
  )
199
207
 
200
- id, tbl_version = TableVersion.create(
201
- dir_id,
208
+ md = TableVersion.create_initial_md(
202
209
  name,
203
210
  columns,
204
211
  num_retained_versions,
205
212
  comment,
206
213
  media_validation=media_validation,
207
- # base_path=base_version_path,
208
214
  view_md=view_md,
215
+ create_default_idxs=create_default_idxs,
209
216
  )
210
- if tbl_version is None:
211
- # this is purely a snapshot: we use the base's tbl version path
212
- view = cls(id, dir_id, name, base_version_path, snapshot_only=True)
213
- _logger.info(f'created snapshot {name}')
217
+ if md.tbl_md.is_pure_snapshot:
218
+ # this is purely a snapshot: no store table to create or load
219
+ return md, None
214
220
  else:
215
- view = cls(
216
- id,
217
- dir_id,
218
- name,
219
- TableVersionPath(
220
- TableVersionHandle(tbl_version.id, tbl_version.effective_version), base=base_version_path
221
- ),
222
- snapshot_only=False,
221
+ tbl_id = md.tbl_md.tbl_id
222
+ view_path = TableVersionPath(
223
+ TableVersionHandle(UUID(tbl_id), effective_version=0 if is_snapshot else None), base=base_version_path
223
224
  )
224
- _logger.info(f'Created view `{name}`, id={tbl_version.id}')
225
-
226
- from pixeltable.plan import Planner
227
-
228
- try:
229
- plan, _ = Planner.create_view_load_plan(view._tbl_version_path)
230
- num_rows, num_excs, _ = tbl_version.store_tbl.insert_rows(plan, v_min=tbl_version.version)
231
- except:
232
- # we need to remove the orphaned TableVersion instance
233
- del catalog.Catalog.get()._tbl_versions[tbl_version.id, tbl_version.effective_version]
234
- base_tbl_version = base.tbl_version.get()
235
- if tbl_version.effective_version is None and not base_tbl_version.is_snapshot:
236
- # also remove tbl_version from the base
237
- base_tbl_version.mutable_views.remove(TableVersionHandle.create(tbl_version))
238
- raise
239
- Env.get().console_logger.info(f'Created view `{name}` with {num_rows} rows, {num_excs} exceptions.')
240
-
241
- session.commit()
242
- return view
225
+ ops = [
226
+ TableOp(
227
+ tbl_id=tbl_id, op_sn=0, num_ops=2, needs_xact=False, create_store_table_op=CreateStoreTableOp()
228
+ ),
229
+ TableOp(
230
+ tbl_id=tbl_id, op_sn=1, num_ops=2, needs_xact=True, load_view_op=LoadViewOp(view_path.as_dict())
231
+ ),
232
+ ]
233
+ return md, ops
243
234
 
244
235
  @classmethod
245
236
  def _verify_column(cls, col: Column) -> None:
246
237
  # make sure that columns are nullable or have a default
247
238
  if not col.col_type.nullable and not col.is_computed:
248
- raise excs.Error(f'Column {col.name}: non-computed columns in views must be nullable')
239
+ raise excs.Error(f'Column {col.name!r}: Non-computed columns in views must be nullable')
249
240
  super()._verify_column(col)
250
241
 
251
242
  @classmethod
@@ -267,66 +258,89 @@ class View(Table):
267
258
  base=cls._get_snapshot_path(tbl_version_path.base) if tbl_version_path.base is not None else None,
268
259
  )
269
260
 
270
- def _drop(self) -> None:
271
- if self._snapshot_only:
272
- # there is not TableVersion to drop
273
- self._check_is_dropped()
274
- self.is_dropped = True
275
- catalog.Catalog.get().delete_tbl_md(self._id)
276
- else:
277
- super()._drop()
261
+ def _is_named_pure_snapshot(self) -> bool:
262
+ """
263
+ Returns True if this is a named pure snapshot (i.e., a pure snapshot that is a separate schema object).
264
+ """
265
+ return self._id != self._tbl_version_path.tbl_id
266
+
267
+ def _is_anonymous_snapshot(self) -> bool:
268
+ """
269
+ Returns True if this is an unnamed snapshot (i.e., a snapshot that is not a separate schema object).
270
+ """
271
+ return self._snapshot_only and self._id == self._tbl_version_path.tbl_id
278
272
 
279
- def get_metadata(self) -> dict[str, Any]:
280
- md = super().get_metadata()
273
+ def _get_metadata(self) -> 'TableMetadata':
274
+ md = super()._get_metadata()
281
275
  md['is_view'] = True
282
276
  md['is_snapshot'] = self._tbl_version_path.is_snapshot()
277
+ if self._is_anonymous_snapshot():
278
+ # Update name and path with version qualifiers.
279
+ md['name'] = f'{self._name}:{self._tbl_version_path.version()}'
280
+ md['path'] = f'{self._path()}:{self._tbl_version_path.version()}'
281
+ base_tbl_id = self._base_tbl_id
282
+ if base_tbl_id is not None:
283
+ base_tbl = self._get_base_table()
284
+ base_path = '<anonymous base table>' if base_tbl is None else base_tbl._path()
285
+ base_version = self._effective_base_versions[0]
286
+ md['base'] = base_path if base_version is None else f'{base_path}:{base_version}'
283
287
  return md
284
288
 
285
289
  def insert(
286
290
  self,
287
- source: Optional[TableDataSource] = None,
291
+ source: TableDataSource | None = None,
288
292
  /,
289
293
  *,
290
- source_format: Optional[Literal['csv', 'excel', 'parquet', 'json']] = None,
291
- schema_overrides: Optional[dict[str, ts.ColumnType]] = None,
294
+ source_format: Literal['csv', 'excel', 'parquet', 'json'] | None = None,
295
+ schema_overrides: dict[str, ts.ColumnType] | None = None,
292
296
  on_error: Literal['abort', 'ignore'] = 'abort',
293
297
  print_stats: bool = False,
294
298
  **kwargs: Any,
295
299
  ) -> UpdateStatus:
296
- raise excs.Error(f'{self._display_name()} {self._name!r}: cannot insert into view')
300
+ raise excs.Error(f'{self._display_str()}: Cannot insert into a {self._display_name()}.')
297
301
 
298
- def delete(self, where: Optional[exprs.Expr] = None) -> UpdateStatus:
299
- raise excs.Error(f'{self._display_name()} {self._name!r}: cannot delete from view')
302
+ def delete(self, where: exprs.Expr | None = None) -> UpdateStatus:
303
+ raise excs.Error(f'{self._display_str()}: Cannot delete from a {self._display_name()}.')
300
304
 
301
305
  @property
302
- def _base_table(self) -> Optional['Table']:
303
- # if this is a pure snapshot, our tbl_version_path only reflects the base (there is no TableVersion instance
304
- # for the snapshot itself)
305
- base_id = self._tbl_version.id if self._snapshot_only else self._tbl_version_path.base.tbl_version.id
306
- return catalog.Catalog.get().get_table_by_id(base_id)
306
+ def _base_tbl_id(self) -> UUID | None:
307
+ if self._tbl_version_path.tbl_id != self._id:
308
+ # _tbl_version_path represents a different schema object from this one. This can only happen if this is a
309
+ # named pure snapshot.
310
+ return self._tbl_version_path.tbl_id
311
+ if self._tbl_version_path.base is None:
312
+ return None
313
+ return self._tbl_version_path.base.tbl_id
314
+
315
+ def _get_base_table(self) -> 'Table' | None:
316
+ """Returns None if there is no base table, or if the base table is hidden."""
317
+ base_tbl_id = self._base_tbl_id
318
+ with catalog.Catalog.get().begin_xact(tbl_id=base_tbl_id, for_write=False):
319
+ return catalog.Catalog.get().get_table_by_id(base_tbl_id)
307
320
 
308
321
  @property
309
- def _effective_base_versions(self) -> list[Optional[int]]:
322
+ def _effective_base_versions(self) -> list[int | None]:
310
323
  effective_versions = [tv.effective_version for tv in self._tbl_version_path.get_tbl_versions()]
311
- if self._snapshot_only:
312
- return effective_versions
324
+ if self._snapshot_only and not self._is_anonymous_snapshot():
325
+ return effective_versions # Named pure snapshot
313
326
  else:
314
327
  return effective_versions[1:]
315
328
 
316
329
  def _table_descriptor(self) -> str:
317
- display_name = 'Snapshot' if self._snapshot_only else 'View'
318
- result = [f'{display_name} {self._path()!r}']
330
+ result = [self._display_str()]
319
331
  bases_descrs: list[str] = []
320
- for base, effective_version in zip(self._base_tables, self._effective_base_versions):
332
+ for base, effective_version in zip(self._get_base_tables(), self._effective_base_versions):
321
333
  if effective_version is None:
322
334
  bases_descrs.append(f'{base._path()!r}')
323
335
  else:
324
336
  base_descr = f'{base._path()}:{effective_version}'
325
337
  bases_descrs.append(f'{base_descr!r}')
326
- result.append(f' (of {", ".join(bases_descrs)})')
338
+ if len(bases_descrs) > 0:
339
+ # bases_descrs can be empty in the case of a table-replica
340
+ result.append(f' (of {", ".join(bases_descrs)})')
327
341
 
328
- if self._tbl_version.get().predicate is not None:
329
- result.append(f'\nWhere: {self._tbl_version.get().predicate!s}')
330
- if self._tbl_version.get().sample_clause is not None:
342
+ if self._tbl_version_path.tbl_version.get().predicate is not None:
343
+ result.append(f'\nWhere: {self._tbl_version_path.tbl_version.get().predicate!s}')
344
+ if self._tbl_version_path.tbl_version.get().sample_clause is not None:
331
345
  result.append(f'\nSample: {self._tbl_version.get().sample_clause!s}')
332
346
  return ''.join(result)
pixeltable/config.py CHANGED
@@ -4,11 +4,11 @@ import logging
4
4
  import os
5
5
  import shutil
6
6
  from pathlib import Path
7
- from typing import Any, ClassVar, Optional, TypeVar
7
+ from typing import Any, ClassVar, TypeVar
8
8
 
9
9
  import toml
10
10
 
11
- from pixeltable import exceptions as excs
11
+ from pixeltable import env, exceptions as excs
12
12
 
13
13
  _logger = logging.getLogger('pixeltable')
14
14
 
@@ -21,23 +21,30 @@ class Config:
21
21
  configuration values, which can be set in the config file or as environment variables.
22
22
  """
23
23
 
24
- __instance: ClassVar[Optional[Config]] = None
24
+ __instance: ClassVar[Config | None] = None
25
25
 
26
26
  __home: Path
27
27
  __config_file: Path
28
+ __config_overrides: dict[str, Any]
28
29
  __config_dict: dict[str, Any]
29
30
 
30
- def __init__(self) -> None:
31
+ def __init__(self, config_overrides: dict[str, Any]) -> None:
31
32
  assert self.__instance is None, 'Config is a singleton; use Config.get() to access the instance'
32
33
 
33
- self.__home = Path(os.environ.get('PIXELTABLE_HOME', str(Path.home() / '.pixeltable')))
34
+ for var in config_overrides:
35
+ if var not in KNOWN_CONFIG_OVERRIDES:
36
+ raise excs.Error(f'Unrecognized configuration variable: {var}')
37
+
38
+ self.__config_overrides = config_overrides
39
+
40
+ self.__home = Path(self.lookup_env('pixeltable', 'home', str(Path.home() / '.pixeltable')))
34
41
  if self.__home.exists() and not self.__home.is_dir():
35
- raise RuntimeError(f'{self.__home} is not a directory')
42
+ raise excs.Error(f'Not a directory: {self.__home}')
36
43
  if not self.__home.exists():
37
44
  print(f'Creating a Pixeltable instance at: {self.__home}')
38
45
  self.__home.mkdir()
39
46
 
40
- self.__config_file = Path(os.environ.get('PIXELTABLE_CONFIG', str(self.__home / 'config.toml')))
47
+ self.__config_file = Path(self.lookup_env('pixeltable', 'config', str(self.__home / 'config.toml')))
41
48
 
42
49
  self.__config_dict: dict[str, Any]
43
50
  if os.path.isfile(self.__config_file):
@@ -46,6 +53,12 @@ class Config:
46
53
  self.__config_dict = toml.load(stream)
47
54
  except Exception as exc:
48
55
  raise excs.Error(f'Could not read config file: {self.__config_file}') from exc
56
+ for section, section_dict in self.__config_dict.items():
57
+ if section not in KNOWN_CONFIG_OPTIONS:
58
+ raise excs.Error(f'Unrecognized section {section!r} in config file: {self.__config_file}')
59
+ for key in section_dict:
60
+ if key not in KNOWN_CONFIG_OPTIONS[section]:
61
+ raise excs.Error(f"Unrecognized option '{section}.{key}' in config file: {self.__config_file}")
49
62
  else:
50
63
  self.__config_dict = self.__create_default_config(self.__config_file)
51
64
  with open(self.__config_file, 'w', encoding='utf-8') as stream:
@@ -65,10 +78,22 @@ class Config:
65
78
 
66
79
  @classmethod
67
80
  def get(cls) -> Config:
68
- if cls.__instance is None:
69
- cls.__instance = cls()
81
+ cls.init({})
70
82
  return cls.__instance
71
83
 
84
+ @classmethod
85
+ def init(cls, config_overrides: dict[str, Any], reinit: bool = False) -> None:
86
+ if reinit:
87
+ cls.__instance = None
88
+ for cl in env._registered_clients.values():
89
+ cl.client_obj = None
90
+ if cls.__instance is None:
91
+ cls.__instance = cls(config_overrides)
92
+ elif len(config_overrides) > 0:
93
+ raise excs.Error(
94
+ 'Pixeltable has already been initialized; cannot specify new config values in the same session'
95
+ )
96
+
72
97
  @classmethod
73
98
  def __create_default_config(cls, config_path: Path) -> dict[str, Any]:
74
99
  free_disk_space_bytes = shutil.disk_usage(config_path.parent).free
@@ -76,28 +101,109 @@ class Config:
76
101
  file_cache_size_g = free_disk_space_bytes / 5 / (1 << 30)
77
102
  return {'pixeltable': {'file_cache_size_g': round(file_cache_size_g, 1), 'hide_warnings': False}}
78
103
 
79
- def get_value(self, key: str, expected_type: type[T], section: str = 'pixeltable') -> Optional[T]:
104
+ def lookup_env(self, section: str, key: str, default: Any = None) -> Any:
105
+ override_var = f'{section}.{key}'
80
106
  env_var = f'{section.upper()}_{key.upper()}'
81
- if env_var in os.environ:
82
- value = os.environ[env_var]
83
- elif section in self.__config_dict and key in self.__config_dict[section]:
84
- value = self.__config_dict[section][key]
85
- else:
86
- return None
107
+ if override_var in self.__config_overrides:
108
+ return self.__config_overrides[override_var]
109
+ if env_var in os.environ and len(os.environ[env_var]) > 0:
110
+ return os.environ[env_var]
111
+ return default
112
+
113
+ def get_value(self, key: str, expected_type: type[T], section: str = 'pixeltable') -> T | None:
114
+ value: Any = self.lookup_env(section, key) # Try to get from environment first
115
+ # Next try the config file
116
+ if value is None:
117
+ # Resolve nested section dicts
118
+ lookup_elems = [*section.split('.'), key]
119
+ value = self.__config_dict
120
+ for el in lookup_elems:
121
+ if isinstance(value, dict):
122
+ if el not in value:
123
+ return None
124
+ value = value[el]
125
+ else:
126
+ return None
127
+
128
+ if value is None:
129
+ return None # Not specified
87
130
 
88
131
  try:
132
+ if expected_type is bool and isinstance(value, str):
133
+ if value.lower() not in ('true', 'false'):
134
+ raise excs.Error(f"Invalid value for configuration parameter '{section}.{key}': {value}")
135
+ return value.lower() == 'true' # type: ignore[return-value]
89
136
  return expected_type(value) # type: ignore[call-arg]
90
- except ValueError as exc:
91
- raise excs.Error(f'Invalid value for configuration parameter {section}.{key}: {value}') from exc
137
+ except (ValueError, TypeError) as exc:
138
+ raise excs.Error(f"Invalid value for configuration parameter '{section}.{key}': {value}") from exc
92
139
 
93
- def get_string_value(self, key: str, section: str = 'pixeltable') -> Optional[str]:
140
+ def get_string_value(self, key: str, section: str = 'pixeltable') -> str | None:
94
141
  return self.get_value(key, str, section)
95
142
 
96
- def get_int_value(self, key: str, section: str = 'pixeltable') -> Optional[int]:
143
+ def get_int_value(self, key: str, section: str = 'pixeltable') -> int | None:
97
144
  return self.get_value(key, int, section)
98
145
 
99
- def get_float_value(self, key: str, section: str = 'pixeltable') -> Optional[float]:
146
+ def get_float_value(self, key: str, section: str = 'pixeltable') -> float | None:
100
147
  return self.get_value(key, float, section)
101
148
 
102
- def get_bool_value(self, key: str, section: str = 'pixeltable') -> Optional[bool]:
149
+ def get_bool_value(self, key: str, section: str = 'pixeltable') -> bool | None:
103
150
  return self.get_value(key, bool, section)
151
+
152
+
153
+ KNOWN_CONFIG_OPTIONS = {
154
+ 'pixeltable': {
155
+ 'home': 'Path to the Pixeltable home directory',
156
+ 'config': 'Path to the Pixeltable config file',
157
+ 'pgdata': 'Path to the Pixeltable postgres data directory',
158
+ 'db': 'Postgres database name',
159
+ 'file_cache_size_g': 'Size of the file cache in GB',
160
+ 'time_zone': 'Default time zone for timestamps',
161
+ 'hide_warnings': 'Hide warnings from the console',
162
+ 'verbosity': 'Verbosity level for console output',
163
+ 'api_key': 'API key for Pixeltable cloud',
164
+ 'input_media_dest': 'Default destination URI for input media data',
165
+ 'output_media_dest': 'Default destination URI for output (computed) media data',
166
+ 'r2_profile': 'AWS config profile name used to access R2 storage',
167
+ 's3_profile': 'AWS config profile name used to access S3 storage',
168
+ 'b2_profile': 'S3-compatible profile name used to access Backblaze B2 storage',
169
+ },
170
+ 'anthropic': {'api_key': 'Anthropic API key'},
171
+ 'azure': {'storage_account_name': 'Azure storage account name', 'storage_account_key': 'Azure storage account key'},
172
+ 'bedrock': {'api_key': 'AWS Bedrock API key'},
173
+ 'deepseek': {'api_key': 'Deepseek API key', 'rate_limit': 'Rate limit for Deepseek API requests'},
174
+ 'fireworks': {'api_key': 'Fireworks API key', 'rate_limit': 'Rate limit for Fireworks API requests'},
175
+ 'twelvelabs': {'api_key': 'TwelveLabs API key', 'rate_limit': 'Rate limit for TwelveLabs API requests'},
176
+ 'gemini': {'api_key': 'Gemini API key', 'rate_limits': 'Per-model rate limits for Gemini API requests'},
177
+ 'hf': {'auth_token': 'Hugging Face access token'},
178
+ 'imagen': {'rate_limits': 'Per-model rate limits for Imagen API requests'},
179
+ 'reve': {'api_key': 'Reve API key', 'rate_limit': 'Rate limit for Reve API requests (requests per minute)'},
180
+ 'groq': {'api_key': 'Groq API key', 'rate_limit': 'Rate limit for Groq API requests'},
181
+ 'label_studio': {'api_key': 'Label Studio API key', 'url': 'Label Studio server URL'},
182
+ 'mistral': {'api_key': 'Mistral API key', 'rate_limit': 'Rate limit for Mistral API requests'},
183
+ 'openai': {
184
+ 'api_key': 'OpenAI API key',
185
+ 'base_url': 'OpenAI API base URL',
186
+ 'api_version': 'API version if using Azure OpenAI',
187
+ 'rate_limits': 'Per-model rate limits for OpenAI API requests',
188
+ },
189
+ 'openrouter': {
190
+ 'api_key': 'OpenRouter API key',
191
+ 'site_url': 'Optional URL for your application (for OpenRouter analytics)',
192
+ 'app_name': 'Optional name for your application (for OpenRouter analytics)',
193
+ 'rate_limit': 'Rate limit for OpenRouter API requests',
194
+ },
195
+ 'replicate': {'api_token': 'Replicate API token'},
196
+ 'together': {
197
+ 'api_key': 'Together API key',
198
+ 'rate_limits': 'Per-model category rate limits for Together API requests',
199
+ },
200
+ 'veo': {'rate_limits': 'Per-model rate limits for Veo API requests'},
201
+ 'pypi': {'api_key': 'PyPI API key (for internal use only)'},
202
+ }
203
+
204
+
205
+ KNOWN_CONFIG_OVERRIDES = {
206
+ f'{section}.{key}': info
207
+ for section, section_dict in KNOWN_CONFIG_OPTIONS.items()
208
+ for key, info in section_dict.items()
209
+ }