pixeltable 0.4.0rc3__py3-none-any.whl → 0.4.20__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +23 -5
- pixeltable/_version.py +1 -0
- pixeltable/catalog/__init__.py +5 -3
- pixeltable/catalog/catalog.py +1318 -404
- pixeltable/catalog/column.py +186 -115
- pixeltable/catalog/dir.py +1 -2
- pixeltable/catalog/globals.py +11 -43
- pixeltable/catalog/insertable_table.py +167 -79
- pixeltable/catalog/path.py +61 -23
- pixeltable/catalog/schema_object.py +9 -10
- pixeltable/catalog/table.py +626 -308
- pixeltable/catalog/table_metadata.py +101 -0
- pixeltable/catalog/table_version.py +713 -569
- pixeltable/catalog/table_version_handle.py +37 -6
- pixeltable/catalog/table_version_path.py +42 -29
- pixeltable/catalog/tbl_ops.py +50 -0
- pixeltable/catalog/update_status.py +191 -0
- pixeltable/catalog/view.py +108 -94
- pixeltable/config.py +128 -22
- pixeltable/dataframe.py +188 -100
- pixeltable/env.py +407 -136
- pixeltable/exceptions.py +6 -0
- pixeltable/exec/__init__.py +3 -0
- pixeltable/exec/aggregation_node.py +7 -8
- pixeltable/exec/cache_prefetch_node.py +83 -110
- pixeltable/exec/cell_materialization_node.py +231 -0
- pixeltable/exec/cell_reconstruction_node.py +135 -0
- pixeltable/exec/component_iteration_node.py +4 -3
- pixeltable/exec/data_row_batch.py +8 -65
- pixeltable/exec/exec_context.py +16 -4
- pixeltable/exec/exec_node.py +13 -36
- pixeltable/exec/expr_eval/evaluators.py +7 -6
- pixeltable/exec/expr_eval/expr_eval_node.py +27 -12
- pixeltable/exec/expr_eval/globals.py +8 -5
- pixeltable/exec/expr_eval/row_buffer.py +1 -2
- pixeltable/exec/expr_eval/schedulers.py +190 -30
- pixeltable/exec/globals.py +32 -0
- pixeltable/exec/in_memory_data_node.py +18 -18
- pixeltable/exec/object_store_save_node.py +293 -0
- pixeltable/exec/row_update_node.py +16 -9
- pixeltable/exec/sql_node.py +206 -101
- pixeltable/exprs/__init__.py +1 -1
- pixeltable/exprs/arithmetic_expr.py +27 -22
- pixeltable/exprs/array_slice.py +3 -3
- pixeltable/exprs/column_property_ref.py +34 -30
- pixeltable/exprs/column_ref.py +92 -96
- pixeltable/exprs/comparison.py +5 -5
- pixeltable/exprs/compound_predicate.py +5 -4
- pixeltable/exprs/data_row.py +152 -55
- pixeltable/exprs/expr.py +62 -43
- pixeltable/exprs/expr_dict.py +3 -3
- pixeltable/exprs/expr_set.py +17 -10
- pixeltable/exprs/function_call.py +75 -37
- pixeltable/exprs/globals.py +1 -2
- pixeltable/exprs/in_predicate.py +4 -4
- pixeltable/exprs/inline_expr.py +10 -27
- pixeltable/exprs/is_null.py +1 -3
- pixeltable/exprs/json_mapper.py +8 -8
- pixeltable/exprs/json_path.py +56 -22
- pixeltable/exprs/literal.py +5 -5
- pixeltable/exprs/method_ref.py +2 -2
- pixeltable/exprs/object_ref.py +2 -2
- pixeltable/exprs/row_builder.py +127 -53
- pixeltable/exprs/rowid_ref.py +8 -12
- pixeltable/exprs/similarity_expr.py +50 -25
- pixeltable/exprs/sql_element_cache.py +4 -4
- pixeltable/exprs/string_op.py +5 -5
- pixeltable/exprs/type_cast.py +3 -5
- pixeltable/func/__init__.py +1 -0
- pixeltable/func/aggregate_function.py +8 -8
- pixeltable/func/callable_function.py +9 -9
- pixeltable/func/expr_template_function.py +10 -10
- pixeltable/func/function.py +18 -20
- pixeltable/func/function_registry.py +6 -7
- pixeltable/func/globals.py +2 -3
- pixeltable/func/mcp.py +74 -0
- pixeltable/func/query_template_function.py +20 -18
- pixeltable/func/signature.py +43 -16
- pixeltable/func/tools.py +23 -13
- pixeltable/func/udf.py +18 -20
- pixeltable/functions/__init__.py +6 -0
- pixeltable/functions/anthropic.py +93 -33
- pixeltable/functions/audio.py +114 -10
- pixeltable/functions/bedrock.py +13 -6
- pixeltable/functions/date.py +1 -1
- pixeltable/functions/deepseek.py +20 -9
- pixeltable/functions/fireworks.py +2 -2
- pixeltable/functions/gemini.py +28 -11
- pixeltable/functions/globals.py +13 -13
- pixeltable/functions/groq.py +108 -0
- pixeltable/functions/huggingface.py +1046 -23
- pixeltable/functions/image.py +9 -18
- pixeltable/functions/llama_cpp.py +23 -8
- pixeltable/functions/math.py +3 -4
- pixeltable/functions/mistralai.py +4 -15
- pixeltable/functions/ollama.py +16 -9
- pixeltable/functions/openai.py +104 -82
- pixeltable/functions/openrouter.py +143 -0
- pixeltable/functions/replicate.py +2 -2
- pixeltable/functions/reve.py +250 -0
- pixeltable/functions/string.py +21 -28
- pixeltable/functions/timestamp.py +13 -14
- pixeltable/functions/together.py +4 -6
- pixeltable/functions/twelvelabs.py +92 -0
- pixeltable/functions/util.py +6 -1
- pixeltable/functions/video.py +1388 -106
- pixeltable/functions/vision.py +7 -7
- pixeltable/functions/whisper.py +15 -7
- pixeltable/functions/whisperx.py +179 -0
- pixeltable/{ext/functions → functions}/yolox.py +2 -4
- pixeltable/globals.py +332 -105
- pixeltable/index/base.py +13 -22
- pixeltable/index/btree.py +23 -22
- pixeltable/index/embedding_index.py +32 -44
- pixeltable/io/__init__.py +4 -2
- pixeltable/io/datarows.py +7 -6
- pixeltable/io/external_store.py +49 -77
- pixeltable/io/fiftyone.py +11 -11
- pixeltable/io/globals.py +29 -28
- pixeltable/io/hf_datasets.py +17 -9
- pixeltable/io/label_studio.py +70 -66
- pixeltable/io/lancedb.py +3 -0
- pixeltable/io/pandas.py +12 -11
- pixeltable/io/parquet.py +13 -93
- pixeltable/io/table_data_conduit.py +71 -47
- pixeltable/io/utils.py +3 -3
- pixeltable/iterators/__init__.py +2 -1
- pixeltable/iterators/audio.py +21 -11
- pixeltable/iterators/document.py +116 -55
- pixeltable/iterators/image.py +5 -2
- pixeltable/iterators/video.py +293 -13
- pixeltable/metadata/__init__.py +4 -2
- pixeltable/metadata/converters/convert_18.py +2 -2
- pixeltable/metadata/converters/convert_19.py +2 -2
- pixeltable/metadata/converters/convert_20.py +2 -2
- pixeltable/metadata/converters/convert_21.py +2 -2
- pixeltable/metadata/converters/convert_22.py +2 -2
- pixeltable/metadata/converters/convert_24.py +2 -2
- pixeltable/metadata/converters/convert_25.py +2 -2
- pixeltable/metadata/converters/convert_26.py +2 -2
- pixeltable/metadata/converters/convert_29.py +4 -4
- pixeltable/metadata/converters/convert_34.py +2 -2
- pixeltable/metadata/converters/convert_36.py +2 -2
- pixeltable/metadata/converters/convert_37.py +15 -0
- pixeltable/metadata/converters/convert_38.py +39 -0
- pixeltable/metadata/converters/convert_39.py +124 -0
- pixeltable/metadata/converters/convert_40.py +73 -0
- pixeltable/metadata/converters/util.py +13 -12
- pixeltable/metadata/notes.py +4 -0
- pixeltable/metadata/schema.py +79 -42
- pixeltable/metadata/utils.py +74 -0
- pixeltable/mypy/__init__.py +3 -0
- pixeltable/mypy/mypy_plugin.py +123 -0
- pixeltable/plan.py +274 -223
- pixeltable/share/__init__.py +1 -1
- pixeltable/share/packager.py +259 -129
- pixeltable/share/protocol/__init__.py +34 -0
- pixeltable/share/protocol/common.py +170 -0
- pixeltable/share/protocol/operation_types.py +33 -0
- pixeltable/share/protocol/replica.py +109 -0
- pixeltable/share/publish.py +213 -57
- pixeltable/store.py +238 -175
- pixeltable/type_system.py +104 -63
- pixeltable/utils/__init__.py +2 -3
- pixeltable/utils/arrow.py +108 -13
- pixeltable/utils/av.py +298 -0
- pixeltable/utils/azure_store.py +305 -0
- pixeltable/utils/code.py +3 -3
- pixeltable/utils/console_output.py +4 -1
- pixeltable/utils/coroutine.py +6 -23
- pixeltable/utils/dbms.py +31 -5
- pixeltable/utils/description_helper.py +4 -5
- pixeltable/utils/documents.py +5 -6
- pixeltable/utils/exception_handler.py +7 -30
- pixeltable/utils/filecache.py +6 -6
- pixeltable/utils/formatter.py +4 -6
- pixeltable/utils/gcs_store.py +283 -0
- pixeltable/utils/http_server.py +2 -3
- pixeltable/utils/iceberg.py +1 -2
- pixeltable/utils/image.py +17 -0
- pixeltable/utils/lancedb.py +88 -0
- pixeltable/utils/local_store.py +316 -0
- pixeltable/utils/misc.py +5 -0
- pixeltable/utils/object_stores.py +528 -0
- pixeltable/utils/pydantic.py +60 -0
- pixeltable/utils/pytorch.py +5 -6
- pixeltable/utils/s3_store.py +392 -0
- pixeltable-0.4.20.dist-info/METADATA +587 -0
- pixeltable-0.4.20.dist-info/RECORD +218 -0
- {pixeltable-0.4.0rc3.dist-info → pixeltable-0.4.20.dist-info}/WHEEL +1 -1
- pixeltable-0.4.20.dist-info/entry_points.txt +2 -0
- pixeltable/__version__.py +0 -3
- pixeltable/ext/__init__.py +0 -17
- pixeltable/ext/functions/__init__.py +0 -11
- pixeltable/ext/functions/whisperx.py +0 -77
- pixeltable/utils/media_store.py +0 -77
- pixeltable/utils/s3.py +0 -17
- pixeltable/utils/sample.py +0 -25
- pixeltable-0.4.0rc3.dist-info/METADATA +0 -435
- pixeltable-0.4.0rc3.dist-info/RECORD +0 -189
- pixeltable-0.4.0rc3.dist-info/entry_points.txt +0 -3
- {pixeltable-0.4.0rc3.dist-info → pixeltable-0.4.20.dist-info/licenses}/LICENSE +0 -0
pixeltable/catalog/view.py
CHANGED
|
@@ -2,29 +2,28 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
import inspect
|
|
4
4
|
import logging
|
|
5
|
-
from typing import TYPE_CHECKING, Any, List, Literal
|
|
5
|
+
from typing import TYPE_CHECKING, Any, List, Literal
|
|
6
6
|
from uuid import UUID
|
|
7
7
|
|
|
8
8
|
import pixeltable.exceptions as excs
|
|
9
9
|
import pixeltable.metadata.schema as md_schema
|
|
10
10
|
import pixeltable.type_system as ts
|
|
11
11
|
from pixeltable import catalog, exprs, func
|
|
12
|
-
from pixeltable.env import Env
|
|
13
12
|
from pixeltable.iterators import ComponentIterator
|
|
14
13
|
|
|
15
|
-
if TYPE_CHECKING:
|
|
16
|
-
from pixeltable.plan import SampleClause
|
|
17
|
-
|
|
18
|
-
|
|
19
14
|
from .column import Column
|
|
20
|
-
from .globals import _POS_COLUMN_NAME, MediaValidation
|
|
15
|
+
from .globals import _POS_COLUMN_NAME, MediaValidation
|
|
21
16
|
from .table import Table
|
|
22
|
-
from .table_version import TableVersion
|
|
17
|
+
from .table_version import TableVersion, TableVersionCompleteMd
|
|
23
18
|
from .table_version_handle import TableVersionHandle
|
|
24
19
|
from .table_version_path import TableVersionPath
|
|
20
|
+
from .tbl_ops import CreateStoreTableOp, LoadViewOp, TableOp
|
|
21
|
+
from .update_status import UpdateStatus
|
|
25
22
|
|
|
26
23
|
if TYPE_CHECKING:
|
|
24
|
+
from pixeltable.catalog.table import TableMetadata
|
|
27
25
|
from pixeltable.globals import TableDataSource
|
|
26
|
+
from pixeltable.plan import SampleClause
|
|
28
27
|
|
|
29
28
|
_logger = logging.getLogger('pixeltable')
|
|
30
29
|
|
|
@@ -41,13 +40,20 @@ class View(Table):
|
|
|
41
40
|
def __init__(self, id: UUID, dir_id: UUID, name: str, tbl_version_path: TableVersionPath, snapshot_only: bool):
|
|
42
41
|
super().__init__(id, dir_id, name, tbl_version_path)
|
|
43
42
|
self._snapshot_only = snapshot_only
|
|
43
|
+
if not snapshot_only:
|
|
44
|
+
self._tbl_version = tbl_version_path.tbl_version
|
|
45
|
+
|
|
46
|
+
def _display_name(self) -> str:
|
|
47
|
+
if self._tbl_version_path.is_replica():
|
|
48
|
+
return 'replica'
|
|
49
|
+
if self._tbl_version_path.is_snapshot():
|
|
50
|
+
return 'snapshot'
|
|
51
|
+
if self._tbl_version_path.is_view():
|
|
52
|
+
return 'view'
|
|
53
|
+
return 'table'
|
|
44
54
|
|
|
45
55
|
@classmethod
|
|
46
|
-
def
|
|
47
|
-
return 'view'
|
|
48
|
-
|
|
49
|
-
@classmethod
|
|
50
|
-
def select_list_to_additional_columns(cls, select_list: list[tuple[exprs.Expr, Optional[str]]]) -> dict[str, dict]:
|
|
56
|
+
def select_list_to_additional_columns(cls, select_list: list[tuple[exprs.Expr, str | None]]) -> dict[str, dict]:
|
|
51
57
|
"""Returns a list of columns in the same format as the additional_columns parameter of View.create.
|
|
52
58
|
The source is the list of expressions from a select() statement on a DataFrame.
|
|
53
59
|
If the column is a ColumnRef, to a base table column, it is marked to not be stored.sy
|
|
@@ -67,17 +73,18 @@ class View(Table):
|
|
|
67
73
|
dir_id: UUID,
|
|
68
74
|
name: str,
|
|
69
75
|
base: TableVersionPath,
|
|
70
|
-
select_list:
|
|
76
|
+
select_list: list[tuple[exprs.Expr, str | None]] | None,
|
|
71
77
|
additional_columns: dict[str, Any],
|
|
72
|
-
predicate:
|
|
73
|
-
sample_clause:
|
|
78
|
+
predicate: 'exprs.Expr' | None,
|
|
79
|
+
sample_clause: 'SampleClause' | None,
|
|
74
80
|
is_snapshot: bool,
|
|
81
|
+
create_default_idxs: bool,
|
|
75
82
|
num_retained_versions: int,
|
|
76
83
|
comment: str,
|
|
77
84
|
media_validation: MediaValidation,
|
|
78
|
-
iterator_cls:
|
|
79
|
-
iterator_args:
|
|
80
|
-
) ->
|
|
85
|
+
iterator_cls: type[ComponentIterator] | None,
|
|
86
|
+
iterator_args: dict | None,
|
|
87
|
+
) -> tuple[TableVersionCompleteMd, list[TableOp] | None]:
|
|
81
88
|
from pixeltable.plan import SampleClause
|
|
82
89
|
|
|
83
90
|
# Convert select_list to more additional_columns if present
|
|
@@ -94,7 +101,7 @@ class View(Table):
|
|
|
94
101
|
# verify that filters can be evaluated in the context of the base
|
|
95
102
|
if predicate is not None:
|
|
96
103
|
if not predicate.is_bound_by([base]):
|
|
97
|
-
raise excs.Error(f'
|
|
104
|
+
raise excs.Error(f'View filter cannot be computed in the context of the base table {base.tbl_name()!r}')
|
|
98
105
|
# create a copy that we can modify and store
|
|
99
106
|
predicate = predicate.copy()
|
|
100
107
|
if sample_clause is not None:
|
|
@@ -102,7 +109,9 @@ class View(Table):
|
|
|
102
109
|
if sample_clause.stratify_exprs is not None and not all(
|
|
103
110
|
stratify_expr.is_bound_by([base]) for stratify_expr in sample_clause.stratify_exprs
|
|
104
111
|
):
|
|
105
|
-
raise excs.Error(
|
|
112
|
+
raise excs.Error(
|
|
113
|
+
f'View sample clause cannot be computed in the context of the base table {base.tbl_name()!r}'
|
|
114
|
+
)
|
|
106
115
|
# create a copy that we can modify and store
|
|
107
116
|
sc = sample_clause
|
|
108
117
|
sample_clause = SampleClause(
|
|
@@ -116,8 +125,8 @@ class View(Table):
|
|
|
116
125
|
# make sure that the value can be computed in the context of the base
|
|
117
126
|
if col.value_expr is not None and not col.value_expr.is_bound_by([base]):
|
|
118
127
|
raise excs.Error(
|
|
119
|
-
f'Column {col.name}:
|
|
120
|
-
f'base {base.tbl_name()}'
|
|
128
|
+
f'Column {col.name!r}: Value expression cannot be computed in the context of the '
|
|
129
|
+
f'base table {base.tbl_name()!r}'
|
|
121
130
|
)
|
|
122
131
|
|
|
123
132
|
if iterator_cls is not None:
|
|
@@ -144,18 +153,18 @@ class View(Table):
|
|
|
144
153
|
sig = func.Signature(ts.InvalidType(), params)
|
|
145
154
|
|
|
146
155
|
expr_args = {k: exprs.Expr.from_object(v) for k, v in bound_args.items()}
|
|
147
|
-
sig.validate_args(expr_args, context=f'in iterator {iterator_cls.__name__
|
|
156
|
+
sig.validate_args(expr_args, context=f'in iterator of type `{iterator_cls.__name__}`')
|
|
148
157
|
literal_args = {k: v.val if isinstance(v, exprs.Literal) else v for k, v in expr_args.items()}
|
|
149
158
|
|
|
150
159
|
# prepend pos and output_schema columns to cols:
|
|
151
160
|
# a component view exposes the pos column of its rowid;
|
|
152
161
|
# we create that column here, so it gets assigned a column id;
|
|
153
162
|
# stored=False: it is not stored separately (it's already stored as part of the rowid)
|
|
154
|
-
iterator_cols = [Column(_POS_COLUMN_NAME, ts.IntType(), stored=False)]
|
|
163
|
+
iterator_cols = [Column(_POS_COLUMN_NAME, ts.IntType(), is_iterator_col=True, stored=False)]
|
|
155
164
|
output_dict, unstored_cols = iterator_cls.output_schema(**literal_args)
|
|
156
165
|
iterator_cols.extend(
|
|
157
166
|
[
|
|
158
|
-
Column(col_name, col_type, stored=col_name not in unstored_cols)
|
|
167
|
+
Column(col_name, col_type, is_iterator_col=True, stored=col_name not in unstored_cols)
|
|
159
168
|
for col_name, col_type in output_dict.items()
|
|
160
169
|
]
|
|
161
170
|
)
|
|
@@ -164,11 +173,10 @@ class View(Table):
|
|
|
164
173
|
for col in columns:
|
|
165
174
|
if col.name in iterator_col_names:
|
|
166
175
|
raise excs.Error(
|
|
167
|
-
f'Duplicate name: column {col.name} is already present in the iterator output schema'
|
|
176
|
+
f'Duplicate name: column {col.name!r} is already present in the iterator output schema'
|
|
168
177
|
)
|
|
169
178
|
columns = iterator_cols + columns
|
|
170
179
|
|
|
171
|
-
session = Env.get().session
|
|
172
180
|
from pixeltable.exprs import InlineDict
|
|
173
181
|
|
|
174
182
|
iterator_args_expr: exprs.Expr = InlineDict(iterator_args) if iterator_args is not None else None
|
|
@@ -197,55 +205,38 @@ class View(Table):
|
|
|
197
205
|
iterator_args=iterator_args_expr.as_dict() if iterator_args_expr is not None else None,
|
|
198
206
|
)
|
|
199
207
|
|
|
200
|
-
|
|
201
|
-
dir_id,
|
|
208
|
+
md = TableVersion.create_initial_md(
|
|
202
209
|
name,
|
|
203
210
|
columns,
|
|
204
211
|
num_retained_versions,
|
|
205
212
|
comment,
|
|
206
213
|
media_validation=media_validation,
|
|
207
|
-
# base_path=base_version_path,
|
|
208
214
|
view_md=view_md,
|
|
215
|
+
create_default_idxs=create_default_idxs,
|
|
209
216
|
)
|
|
210
|
-
if
|
|
211
|
-
# this is purely a snapshot:
|
|
212
|
-
|
|
213
|
-
_logger.info(f'created snapshot {name}')
|
|
217
|
+
if md.tbl_md.is_pure_snapshot:
|
|
218
|
+
# this is purely a snapshot: no store table to create or load
|
|
219
|
+
return md, None
|
|
214
220
|
else:
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
name,
|
|
219
|
-
TableVersionPath(
|
|
220
|
-
TableVersionHandle(tbl_version.id, tbl_version.effective_version), base=base_version_path
|
|
221
|
-
),
|
|
222
|
-
snapshot_only=False,
|
|
221
|
+
tbl_id = md.tbl_md.tbl_id
|
|
222
|
+
view_path = TableVersionPath(
|
|
223
|
+
TableVersionHandle(UUID(tbl_id), effective_version=0 if is_snapshot else None), base=base_version_path
|
|
223
224
|
)
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
del catalog.Catalog.get()._tbl_versions[tbl_version.id, tbl_version.effective_version]
|
|
234
|
-
base_tbl_version = base.tbl_version.get()
|
|
235
|
-
if tbl_version.effective_version is None and not base_tbl_version.is_snapshot:
|
|
236
|
-
# also remove tbl_version from the base
|
|
237
|
-
base_tbl_version.mutable_views.remove(TableVersionHandle.create(tbl_version))
|
|
238
|
-
raise
|
|
239
|
-
Env.get().console_logger.info(f'Created view `{name}` with {num_rows} rows, {num_excs} exceptions.')
|
|
240
|
-
|
|
241
|
-
session.commit()
|
|
242
|
-
return view
|
|
225
|
+
ops = [
|
|
226
|
+
TableOp(
|
|
227
|
+
tbl_id=tbl_id, op_sn=0, num_ops=2, needs_xact=False, create_store_table_op=CreateStoreTableOp()
|
|
228
|
+
),
|
|
229
|
+
TableOp(
|
|
230
|
+
tbl_id=tbl_id, op_sn=1, num_ops=2, needs_xact=True, load_view_op=LoadViewOp(view_path.as_dict())
|
|
231
|
+
),
|
|
232
|
+
]
|
|
233
|
+
return md, ops
|
|
243
234
|
|
|
244
235
|
@classmethod
|
|
245
236
|
def _verify_column(cls, col: Column) -> None:
|
|
246
237
|
# make sure that columns are nullable or have a default
|
|
247
238
|
if not col.col_type.nullable and not col.is_computed:
|
|
248
|
-
raise excs.Error(f'Column {col.name}:
|
|
239
|
+
raise excs.Error(f'Column {col.name!r}: Non-computed columns in views must be nullable')
|
|
249
240
|
super()._verify_column(col)
|
|
250
241
|
|
|
251
242
|
@classmethod
|
|
@@ -267,66 +258,89 @@ class View(Table):
|
|
|
267
258
|
base=cls._get_snapshot_path(tbl_version_path.base) if tbl_version_path.base is not None else None,
|
|
268
259
|
)
|
|
269
260
|
|
|
270
|
-
def
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
261
|
+
def _is_named_pure_snapshot(self) -> bool:
|
|
262
|
+
"""
|
|
263
|
+
Returns True if this is a named pure snapshot (i.e., a pure snapshot that is a separate schema object).
|
|
264
|
+
"""
|
|
265
|
+
return self._id != self._tbl_version_path.tbl_id
|
|
266
|
+
|
|
267
|
+
def _is_anonymous_snapshot(self) -> bool:
|
|
268
|
+
"""
|
|
269
|
+
Returns True if this is an unnamed snapshot (i.e., a snapshot that is not a separate schema object).
|
|
270
|
+
"""
|
|
271
|
+
return self._snapshot_only and self._id == self._tbl_version_path.tbl_id
|
|
278
272
|
|
|
279
|
-
def
|
|
280
|
-
md = super().
|
|
273
|
+
def _get_metadata(self) -> 'TableMetadata':
|
|
274
|
+
md = super()._get_metadata()
|
|
281
275
|
md['is_view'] = True
|
|
282
276
|
md['is_snapshot'] = self._tbl_version_path.is_snapshot()
|
|
277
|
+
if self._is_anonymous_snapshot():
|
|
278
|
+
# Update name and path with version qualifiers.
|
|
279
|
+
md['name'] = f'{self._name}:{self._tbl_version_path.version()}'
|
|
280
|
+
md['path'] = f'{self._path()}:{self._tbl_version_path.version()}'
|
|
281
|
+
base_tbl_id = self._base_tbl_id
|
|
282
|
+
if base_tbl_id is not None:
|
|
283
|
+
base_tbl = self._get_base_table()
|
|
284
|
+
base_path = '<anonymous base table>' if base_tbl is None else base_tbl._path()
|
|
285
|
+
base_version = self._effective_base_versions[0]
|
|
286
|
+
md['base'] = base_path if base_version is None else f'{base_path}:{base_version}'
|
|
283
287
|
return md
|
|
284
288
|
|
|
285
289
|
def insert(
|
|
286
290
|
self,
|
|
287
|
-
source:
|
|
291
|
+
source: TableDataSource | None = None,
|
|
288
292
|
/,
|
|
289
293
|
*,
|
|
290
|
-
source_format:
|
|
291
|
-
schema_overrides:
|
|
294
|
+
source_format: Literal['csv', 'excel', 'parquet', 'json'] | None = None,
|
|
295
|
+
schema_overrides: dict[str, ts.ColumnType] | None = None,
|
|
292
296
|
on_error: Literal['abort', 'ignore'] = 'abort',
|
|
293
297
|
print_stats: bool = False,
|
|
294
298
|
**kwargs: Any,
|
|
295
299
|
) -> UpdateStatus:
|
|
296
|
-
raise excs.Error(f'{self.
|
|
300
|
+
raise excs.Error(f'{self._display_str()}: Cannot insert into a {self._display_name()}.')
|
|
297
301
|
|
|
298
|
-
def delete(self, where:
|
|
299
|
-
raise excs.Error(f'{self.
|
|
302
|
+
def delete(self, where: exprs.Expr | None = None) -> UpdateStatus:
|
|
303
|
+
raise excs.Error(f'{self._display_str()}: Cannot delete from a {self._display_name()}.')
|
|
300
304
|
|
|
301
305
|
@property
|
|
302
|
-
def
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
306
|
+
def _base_tbl_id(self) -> UUID | None:
|
|
307
|
+
if self._tbl_version_path.tbl_id != self._id:
|
|
308
|
+
# _tbl_version_path represents a different schema object from this one. This can only happen if this is a
|
|
309
|
+
# named pure snapshot.
|
|
310
|
+
return self._tbl_version_path.tbl_id
|
|
311
|
+
if self._tbl_version_path.base is None:
|
|
312
|
+
return None
|
|
313
|
+
return self._tbl_version_path.base.tbl_id
|
|
314
|
+
|
|
315
|
+
def _get_base_table(self) -> 'Table' | None:
|
|
316
|
+
"""Returns None if there is no base table, or if the base table is hidden."""
|
|
317
|
+
base_tbl_id = self._base_tbl_id
|
|
318
|
+
with catalog.Catalog.get().begin_xact(tbl_id=base_tbl_id, for_write=False):
|
|
319
|
+
return catalog.Catalog.get().get_table_by_id(base_tbl_id)
|
|
307
320
|
|
|
308
321
|
@property
|
|
309
|
-
def _effective_base_versions(self) -> list[
|
|
322
|
+
def _effective_base_versions(self) -> list[int | None]:
|
|
310
323
|
effective_versions = [tv.effective_version for tv in self._tbl_version_path.get_tbl_versions()]
|
|
311
|
-
if self._snapshot_only:
|
|
312
|
-
return effective_versions
|
|
324
|
+
if self._snapshot_only and not self._is_anonymous_snapshot():
|
|
325
|
+
return effective_versions # Named pure snapshot
|
|
313
326
|
else:
|
|
314
327
|
return effective_versions[1:]
|
|
315
328
|
|
|
316
329
|
def _table_descriptor(self) -> str:
|
|
317
|
-
|
|
318
|
-
result = [f'{display_name} {self._path()!r}']
|
|
330
|
+
result = [self._display_str()]
|
|
319
331
|
bases_descrs: list[str] = []
|
|
320
|
-
for base, effective_version in zip(self.
|
|
332
|
+
for base, effective_version in zip(self._get_base_tables(), self._effective_base_versions):
|
|
321
333
|
if effective_version is None:
|
|
322
334
|
bases_descrs.append(f'{base._path()!r}')
|
|
323
335
|
else:
|
|
324
336
|
base_descr = f'{base._path()}:{effective_version}'
|
|
325
337
|
bases_descrs.append(f'{base_descr!r}')
|
|
326
|
-
|
|
338
|
+
if len(bases_descrs) > 0:
|
|
339
|
+
# bases_descrs can be empty in the case of a table-replica
|
|
340
|
+
result.append(f' (of {", ".join(bases_descrs)})')
|
|
327
341
|
|
|
328
|
-
if self.
|
|
329
|
-
result.append(f'\nWhere: {self.
|
|
330
|
-
if self.
|
|
342
|
+
if self._tbl_version_path.tbl_version.get().predicate is not None:
|
|
343
|
+
result.append(f'\nWhere: {self._tbl_version_path.tbl_version.get().predicate!s}')
|
|
344
|
+
if self._tbl_version_path.tbl_version.get().sample_clause is not None:
|
|
331
345
|
result.append(f'\nSample: {self._tbl_version.get().sample_clause!s}')
|
|
332
346
|
return ''.join(result)
|
pixeltable/config.py
CHANGED
|
@@ -4,11 +4,11 @@ import logging
|
|
|
4
4
|
import os
|
|
5
5
|
import shutil
|
|
6
6
|
from pathlib import Path
|
|
7
|
-
from typing import Any, ClassVar,
|
|
7
|
+
from typing import Any, ClassVar, TypeVar
|
|
8
8
|
|
|
9
9
|
import toml
|
|
10
10
|
|
|
11
|
-
from pixeltable import exceptions as excs
|
|
11
|
+
from pixeltable import env, exceptions as excs
|
|
12
12
|
|
|
13
13
|
_logger = logging.getLogger('pixeltable')
|
|
14
14
|
|
|
@@ -21,23 +21,30 @@ class Config:
|
|
|
21
21
|
configuration values, which can be set in the config file or as environment variables.
|
|
22
22
|
"""
|
|
23
23
|
|
|
24
|
-
__instance: ClassVar[
|
|
24
|
+
__instance: ClassVar[Config | None] = None
|
|
25
25
|
|
|
26
26
|
__home: Path
|
|
27
27
|
__config_file: Path
|
|
28
|
+
__config_overrides: dict[str, Any]
|
|
28
29
|
__config_dict: dict[str, Any]
|
|
29
30
|
|
|
30
|
-
def __init__(self) -> None:
|
|
31
|
+
def __init__(self, config_overrides: dict[str, Any]) -> None:
|
|
31
32
|
assert self.__instance is None, 'Config is a singleton; use Config.get() to access the instance'
|
|
32
33
|
|
|
33
|
-
|
|
34
|
+
for var in config_overrides:
|
|
35
|
+
if var not in KNOWN_CONFIG_OVERRIDES:
|
|
36
|
+
raise excs.Error(f'Unrecognized configuration variable: {var}')
|
|
37
|
+
|
|
38
|
+
self.__config_overrides = config_overrides
|
|
39
|
+
|
|
40
|
+
self.__home = Path(self.lookup_env('pixeltable', 'home', str(Path.home() / '.pixeltable')))
|
|
34
41
|
if self.__home.exists() and not self.__home.is_dir():
|
|
35
|
-
raise
|
|
42
|
+
raise excs.Error(f'Not a directory: {self.__home}')
|
|
36
43
|
if not self.__home.exists():
|
|
37
44
|
print(f'Creating a Pixeltable instance at: {self.__home}')
|
|
38
45
|
self.__home.mkdir()
|
|
39
46
|
|
|
40
|
-
self.__config_file = Path(
|
|
47
|
+
self.__config_file = Path(self.lookup_env('pixeltable', 'config', str(self.__home / 'config.toml')))
|
|
41
48
|
|
|
42
49
|
self.__config_dict: dict[str, Any]
|
|
43
50
|
if os.path.isfile(self.__config_file):
|
|
@@ -46,6 +53,12 @@ class Config:
|
|
|
46
53
|
self.__config_dict = toml.load(stream)
|
|
47
54
|
except Exception as exc:
|
|
48
55
|
raise excs.Error(f'Could not read config file: {self.__config_file}') from exc
|
|
56
|
+
for section, section_dict in self.__config_dict.items():
|
|
57
|
+
if section not in KNOWN_CONFIG_OPTIONS:
|
|
58
|
+
raise excs.Error(f'Unrecognized section {section!r} in config file: {self.__config_file}')
|
|
59
|
+
for key in section_dict:
|
|
60
|
+
if key not in KNOWN_CONFIG_OPTIONS[section]:
|
|
61
|
+
raise excs.Error(f"Unrecognized option '{section}.{key}' in config file: {self.__config_file}")
|
|
49
62
|
else:
|
|
50
63
|
self.__config_dict = self.__create_default_config(self.__config_file)
|
|
51
64
|
with open(self.__config_file, 'w', encoding='utf-8') as stream:
|
|
@@ -65,10 +78,22 @@ class Config:
|
|
|
65
78
|
|
|
66
79
|
@classmethod
|
|
67
80
|
def get(cls) -> Config:
|
|
68
|
-
|
|
69
|
-
cls.__instance = cls()
|
|
81
|
+
cls.init({})
|
|
70
82
|
return cls.__instance
|
|
71
83
|
|
|
84
|
+
@classmethod
|
|
85
|
+
def init(cls, config_overrides: dict[str, Any], reinit: bool = False) -> None:
|
|
86
|
+
if reinit:
|
|
87
|
+
cls.__instance = None
|
|
88
|
+
for cl in env._registered_clients.values():
|
|
89
|
+
cl.client_obj = None
|
|
90
|
+
if cls.__instance is None:
|
|
91
|
+
cls.__instance = cls(config_overrides)
|
|
92
|
+
elif len(config_overrides) > 0:
|
|
93
|
+
raise excs.Error(
|
|
94
|
+
'Pixeltable has already been initialized; cannot specify new config values in the same session'
|
|
95
|
+
)
|
|
96
|
+
|
|
72
97
|
@classmethod
|
|
73
98
|
def __create_default_config(cls, config_path: Path) -> dict[str, Any]:
|
|
74
99
|
free_disk_space_bytes = shutil.disk_usage(config_path.parent).free
|
|
@@ -76,28 +101,109 @@ class Config:
|
|
|
76
101
|
file_cache_size_g = free_disk_space_bytes / 5 / (1 << 30)
|
|
77
102
|
return {'pixeltable': {'file_cache_size_g': round(file_cache_size_g, 1), 'hide_warnings': False}}
|
|
78
103
|
|
|
79
|
-
def
|
|
104
|
+
def lookup_env(self, section: str, key: str, default: Any = None) -> Any:
|
|
105
|
+
override_var = f'{section}.{key}'
|
|
80
106
|
env_var = f'{section.upper()}_{key.upper()}'
|
|
81
|
-
if
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
107
|
+
if override_var in self.__config_overrides:
|
|
108
|
+
return self.__config_overrides[override_var]
|
|
109
|
+
if env_var in os.environ and len(os.environ[env_var]) > 0:
|
|
110
|
+
return os.environ[env_var]
|
|
111
|
+
return default
|
|
112
|
+
|
|
113
|
+
def get_value(self, key: str, expected_type: type[T], section: str = 'pixeltable') -> T | None:
|
|
114
|
+
value: Any = self.lookup_env(section, key) # Try to get from environment first
|
|
115
|
+
# Next try the config file
|
|
116
|
+
if value is None:
|
|
117
|
+
# Resolve nested section dicts
|
|
118
|
+
lookup_elems = [*section.split('.'), key]
|
|
119
|
+
value = self.__config_dict
|
|
120
|
+
for el in lookup_elems:
|
|
121
|
+
if isinstance(value, dict):
|
|
122
|
+
if el not in value:
|
|
123
|
+
return None
|
|
124
|
+
value = value[el]
|
|
125
|
+
else:
|
|
126
|
+
return None
|
|
127
|
+
|
|
128
|
+
if value is None:
|
|
129
|
+
return None # Not specified
|
|
87
130
|
|
|
88
131
|
try:
|
|
132
|
+
if expected_type is bool and isinstance(value, str):
|
|
133
|
+
if value.lower() not in ('true', 'false'):
|
|
134
|
+
raise excs.Error(f"Invalid value for configuration parameter '{section}.{key}': {value}")
|
|
135
|
+
return value.lower() == 'true' # type: ignore[return-value]
|
|
89
136
|
return expected_type(value) # type: ignore[call-arg]
|
|
90
|
-
except ValueError as exc:
|
|
91
|
-
raise excs.Error(f
|
|
137
|
+
except (ValueError, TypeError) as exc:
|
|
138
|
+
raise excs.Error(f"Invalid value for configuration parameter '{section}.{key}': {value}") from exc
|
|
92
139
|
|
|
93
|
-
def get_string_value(self, key: str, section: str = 'pixeltable') ->
|
|
140
|
+
def get_string_value(self, key: str, section: str = 'pixeltable') -> str | None:
|
|
94
141
|
return self.get_value(key, str, section)
|
|
95
142
|
|
|
96
|
-
def get_int_value(self, key: str, section: str = 'pixeltable') ->
|
|
143
|
+
def get_int_value(self, key: str, section: str = 'pixeltable') -> int | None:
|
|
97
144
|
return self.get_value(key, int, section)
|
|
98
145
|
|
|
99
|
-
def get_float_value(self, key: str, section: str = 'pixeltable') ->
|
|
146
|
+
def get_float_value(self, key: str, section: str = 'pixeltable') -> float | None:
|
|
100
147
|
return self.get_value(key, float, section)
|
|
101
148
|
|
|
102
|
-
def get_bool_value(self, key: str, section: str = 'pixeltable') ->
|
|
149
|
+
def get_bool_value(self, key: str, section: str = 'pixeltable') -> bool | None:
|
|
103
150
|
return self.get_value(key, bool, section)
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
KNOWN_CONFIG_OPTIONS = {
|
|
154
|
+
'pixeltable': {
|
|
155
|
+
'home': 'Path to the Pixeltable home directory',
|
|
156
|
+
'config': 'Path to the Pixeltable config file',
|
|
157
|
+
'pgdata': 'Path to the Pixeltable postgres data directory',
|
|
158
|
+
'db': 'Postgres database name',
|
|
159
|
+
'file_cache_size_g': 'Size of the file cache in GB',
|
|
160
|
+
'time_zone': 'Default time zone for timestamps',
|
|
161
|
+
'hide_warnings': 'Hide warnings from the console',
|
|
162
|
+
'verbosity': 'Verbosity level for console output',
|
|
163
|
+
'api_key': 'API key for Pixeltable cloud',
|
|
164
|
+
'input_media_dest': 'Default destination URI for input media data',
|
|
165
|
+
'output_media_dest': 'Default destination URI for output (computed) media data',
|
|
166
|
+
'r2_profile': 'AWS config profile name used to access R2 storage',
|
|
167
|
+
's3_profile': 'AWS config profile name used to access S3 storage',
|
|
168
|
+
'b2_profile': 'S3-compatible profile name used to access Backblaze B2 storage',
|
|
169
|
+
},
|
|
170
|
+
'anthropic': {'api_key': 'Anthropic API key'},
|
|
171
|
+
'azure': {'storage_account_name': 'Azure storage account name', 'storage_account_key': 'Azure storage account key'},
|
|
172
|
+
'bedrock': {'api_key': 'AWS Bedrock API key'},
|
|
173
|
+
'deepseek': {'api_key': 'Deepseek API key', 'rate_limit': 'Rate limit for Deepseek API requests'},
|
|
174
|
+
'fireworks': {'api_key': 'Fireworks API key', 'rate_limit': 'Rate limit for Fireworks API requests'},
|
|
175
|
+
'twelvelabs': {'api_key': 'TwelveLabs API key', 'rate_limit': 'Rate limit for TwelveLabs API requests'},
|
|
176
|
+
'gemini': {'api_key': 'Gemini API key', 'rate_limits': 'Per-model rate limits for Gemini API requests'},
|
|
177
|
+
'hf': {'auth_token': 'Hugging Face access token'},
|
|
178
|
+
'imagen': {'rate_limits': 'Per-model rate limits for Imagen API requests'},
|
|
179
|
+
'reve': {'api_key': 'Reve API key', 'rate_limit': 'Rate limit for Reve API requests (requests per minute)'},
|
|
180
|
+
'groq': {'api_key': 'Groq API key', 'rate_limit': 'Rate limit for Groq API requests'},
|
|
181
|
+
'label_studio': {'api_key': 'Label Studio API key', 'url': 'Label Studio server URL'},
|
|
182
|
+
'mistral': {'api_key': 'Mistral API key', 'rate_limit': 'Rate limit for Mistral API requests'},
|
|
183
|
+
'openai': {
|
|
184
|
+
'api_key': 'OpenAI API key',
|
|
185
|
+
'base_url': 'OpenAI API base URL',
|
|
186
|
+
'api_version': 'API version if using Azure OpenAI',
|
|
187
|
+
'rate_limits': 'Per-model rate limits for OpenAI API requests',
|
|
188
|
+
},
|
|
189
|
+
'openrouter': {
|
|
190
|
+
'api_key': 'OpenRouter API key',
|
|
191
|
+
'site_url': 'Optional URL for your application (for OpenRouter analytics)',
|
|
192
|
+
'app_name': 'Optional name for your application (for OpenRouter analytics)',
|
|
193
|
+
'rate_limit': 'Rate limit for OpenRouter API requests',
|
|
194
|
+
},
|
|
195
|
+
'replicate': {'api_token': 'Replicate API token'},
|
|
196
|
+
'together': {
|
|
197
|
+
'api_key': 'Together API key',
|
|
198
|
+
'rate_limits': 'Per-model category rate limits for Together API requests',
|
|
199
|
+
},
|
|
200
|
+
'veo': {'rate_limits': 'Per-model rate limits for Veo API requests'},
|
|
201
|
+
'pypi': {'api_key': 'PyPI API key (for internal use only)'},
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
KNOWN_CONFIG_OVERRIDES = {
|
|
206
|
+
f'{section}.{key}': info
|
|
207
|
+
for section, section_dict in KNOWN_CONFIG_OPTIONS.items()
|
|
208
|
+
for key, info in section_dict.items()
|
|
209
|
+
}
|