pixeltable 0.3.14__py3-none-any.whl → 0.5.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pixeltable/__init__.py +42 -8
- pixeltable/{dataframe.py → _query.py} +470 -206
- pixeltable/_version.py +1 -0
- pixeltable/catalog/__init__.py +5 -4
- pixeltable/catalog/catalog.py +1785 -432
- pixeltable/catalog/column.py +190 -113
- pixeltable/catalog/dir.py +2 -4
- pixeltable/catalog/globals.py +19 -46
- pixeltable/catalog/insertable_table.py +191 -98
- pixeltable/catalog/path.py +63 -23
- pixeltable/catalog/schema_object.py +11 -15
- pixeltable/catalog/table.py +843 -436
- pixeltable/catalog/table_metadata.py +103 -0
- pixeltable/catalog/table_version.py +978 -657
- pixeltable/catalog/table_version_handle.py +72 -16
- pixeltable/catalog/table_version_path.py +112 -43
- pixeltable/catalog/tbl_ops.py +53 -0
- pixeltable/catalog/update_status.py +191 -0
- pixeltable/catalog/view.py +134 -90
- pixeltable/config.py +134 -22
- pixeltable/env.py +471 -157
- pixeltable/exceptions.py +6 -0
- pixeltable/exec/__init__.py +4 -1
- pixeltable/exec/aggregation_node.py +7 -8
- pixeltable/exec/cache_prefetch_node.py +83 -110
- pixeltable/exec/cell_materialization_node.py +268 -0
- pixeltable/exec/cell_reconstruction_node.py +168 -0
- pixeltable/exec/component_iteration_node.py +4 -3
- pixeltable/exec/data_row_batch.py +8 -65
- pixeltable/exec/exec_context.py +16 -4
- pixeltable/exec/exec_node.py +13 -36
- pixeltable/exec/expr_eval/evaluators.py +11 -7
- pixeltable/exec/expr_eval/expr_eval_node.py +27 -12
- pixeltable/exec/expr_eval/globals.py +8 -5
- pixeltable/exec/expr_eval/row_buffer.py +1 -2
- pixeltable/exec/expr_eval/schedulers.py +106 -56
- pixeltable/exec/globals.py +35 -0
- pixeltable/exec/in_memory_data_node.py +19 -19
- pixeltable/exec/object_store_save_node.py +293 -0
- pixeltable/exec/row_update_node.py +16 -9
- pixeltable/exec/sql_node.py +351 -84
- pixeltable/exprs/__init__.py +1 -1
- pixeltable/exprs/arithmetic_expr.py +27 -22
- pixeltable/exprs/array_slice.py +3 -3
- pixeltable/exprs/column_property_ref.py +36 -23
- pixeltable/exprs/column_ref.py +213 -89
- pixeltable/exprs/comparison.py +5 -5
- pixeltable/exprs/compound_predicate.py +5 -4
- pixeltable/exprs/data_row.py +164 -54
- pixeltable/exprs/expr.py +70 -44
- pixeltable/exprs/expr_dict.py +3 -3
- pixeltable/exprs/expr_set.py +17 -10
- pixeltable/exprs/function_call.py +100 -40
- pixeltable/exprs/globals.py +2 -2
- pixeltable/exprs/in_predicate.py +4 -4
- pixeltable/exprs/inline_expr.py +18 -32
- pixeltable/exprs/is_null.py +7 -3
- pixeltable/exprs/json_mapper.py +8 -8
- pixeltable/exprs/json_path.py +56 -22
- pixeltable/exprs/literal.py +27 -5
- pixeltable/exprs/method_ref.py +2 -2
- pixeltable/exprs/object_ref.py +2 -2
- pixeltable/exprs/row_builder.py +167 -67
- pixeltable/exprs/rowid_ref.py +25 -10
- pixeltable/exprs/similarity_expr.py +58 -40
- pixeltable/exprs/sql_element_cache.py +4 -4
- pixeltable/exprs/string_op.py +5 -5
- pixeltable/exprs/type_cast.py +3 -5
- pixeltable/func/__init__.py +1 -0
- pixeltable/func/aggregate_function.py +8 -8
- pixeltable/func/callable_function.py +9 -9
- pixeltable/func/expr_template_function.py +17 -11
- pixeltable/func/function.py +18 -20
- pixeltable/func/function_registry.py +6 -7
- pixeltable/func/globals.py +2 -3
- pixeltable/func/mcp.py +74 -0
- pixeltable/func/query_template_function.py +29 -27
- pixeltable/func/signature.py +46 -19
- pixeltable/func/tools.py +31 -13
- pixeltable/func/udf.py +18 -20
- pixeltable/functions/__init__.py +16 -0
- pixeltable/functions/anthropic.py +123 -77
- pixeltable/functions/audio.py +147 -10
- pixeltable/functions/bedrock.py +13 -6
- pixeltable/functions/date.py +7 -4
- pixeltable/functions/deepseek.py +35 -43
- pixeltable/functions/document.py +81 -0
- pixeltable/functions/fal.py +76 -0
- pixeltable/functions/fireworks.py +11 -20
- pixeltable/functions/gemini.py +195 -39
- pixeltable/functions/globals.py +142 -14
- pixeltable/functions/groq.py +108 -0
- pixeltable/functions/huggingface.py +1056 -24
- pixeltable/functions/image.py +115 -57
- pixeltable/functions/json.py +1 -1
- pixeltable/functions/llama_cpp.py +28 -13
- pixeltable/functions/math.py +67 -5
- pixeltable/functions/mistralai.py +18 -55
- pixeltable/functions/net.py +70 -0
- pixeltable/functions/ollama.py +20 -13
- pixeltable/functions/openai.py +240 -226
- pixeltable/functions/openrouter.py +143 -0
- pixeltable/functions/replicate.py +4 -4
- pixeltable/functions/reve.py +250 -0
- pixeltable/functions/string.py +239 -69
- pixeltable/functions/timestamp.py +16 -16
- pixeltable/functions/together.py +24 -84
- pixeltable/functions/twelvelabs.py +188 -0
- pixeltable/functions/util.py +6 -1
- pixeltable/functions/uuid.py +30 -0
- pixeltable/functions/video.py +1515 -107
- pixeltable/functions/vision.py +8 -8
- pixeltable/functions/voyageai.py +289 -0
- pixeltable/functions/whisper.py +16 -8
- pixeltable/functions/whisperx.py +179 -0
- pixeltable/{ext/functions → functions}/yolox.py +2 -4
- pixeltable/globals.py +362 -115
- pixeltable/index/base.py +17 -21
- pixeltable/index/btree.py +28 -22
- pixeltable/index/embedding_index.py +100 -118
- pixeltable/io/__init__.py +4 -2
- pixeltable/io/datarows.py +8 -7
- pixeltable/io/external_store.py +56 -105
- pixeltable/io/fiftyone.py +13 -13
- pixeltable/io/globals.py +31 -30
- pixeltable/io/hf_datasets.py +61 -16
- pixeltable/io/label_studio.py +74 -70
- pixeltable/io/lancedb.py +3 -0
- pixeltable/io/pandas.py +21 -12
- pixeltable/io/parquet.py +25 -105
- pixeltable/io/table_data_conduit.py +250 -123
- pixeltable/io/utils.py +4 -4
- pixeltable/iterators/__init__.py +2 -1
- pixeltable/iterators/audio.py +26 -25
- pixeltable/iterators/base.py +9 -3
- pixeltable/iterators/document.py +112 -78
- pixeltable/iterators/image.py +12 -15
- pixeltable/iterators/string.py +11 -4
- pixeltable/iterators/video.py +523 -120
- pixeltable/metadata/__init__.py +14 -3
- pixeltable/metadata/converters/convert_13.py +2 -2
- pixeltable/metadata/converters/convert_18.py +2 -2
- pixeltable/metadata/converters/convert_19.py +2 -2
- pixeltable/metadata/converters/convert_20.py +2 -2
- pixeltable/metadata/converters/convert_21.py +2 -2
- pixeltable/metadata/converters/convert_22.py +2 -2
- pixeltable/metadata/converters/convert_24.py +2 -2
- pixeltable/metadata/converters/convert_25.py +2 -2
- pixeltable/metadata/converters/convert_26.py +2 -2
- pixeltable/metadata/converters/convert_29.py +4 -4
- pixeltable/metadata/converters/convert_30.py +34 -21
- pixeltable/metadata/converters/convert_34.py +2 -2
- pixeltable/metadata/converters/convert_35.py +9 -0
- pixeltable/metadata/converters/convert_36.py +38 -0
- pixeltable/metadata/converters/convert_37.py +15 -0
- pixeltable/metadata/converters/convert_38.py +39 -0
- pixeltable/metadata/converters/convert_39.py +124 -0
- pixeltable/metadata/converters/convert_40.py +73 -0
- pixeltable/metadata/converters/convert_41.py +12 -0
- pixeltable/metadata/converters/convert_42.py +9 -0
- pixeltable/metadata/converters/convert_43.py +44 -0
- pixeltable/metadata/converters/util.py +20 -31
- pixeltable/metadata/notes.py +9 -0
- pixeltable/metadata/schema.py +140 -53
- pixeltable/metadata/utils.py +74 -0
- pixeltable/mypy/__init__.py +3 -0
- pixeltable/mypy/mypy_plugin.py +123 -0
- pixeltable/plan.py +382 -115
- pixeltable/share/__init__.py +1 -1
- pixeltable/share/packager.py +547 -83
- pixeltable/share/protocol/__init__.py +33 -0
- pixeltable/share/protocol/common.py +165 -0
- pixeltable/share/protocol/operation_types.py +33 -0
- pixeltable/share/protocol/replica.py +119 -0
- pixeltable/share/publish.py +257 -59
- pixeltable/store.py +311 -194
- pixeltable/type_system.py +373 -211
- pixeltable/utils/__init__.py +2 -3
- pixeltable/utils/arrow.py +131 -17
- pixeltable/utils/av.py +298 -0
- pixeltable/utils/azure_store.py +346 -0
- pixeltable/utils/coco.py +6 -6
- pixeltable/utils/code.py +3 -3
- pixeltable/utils/console_output.py +4 -1
- pixeltable/utils/coroutine.py +6 -23
- pixeltable/utils/dbms.py +32 -6
- pixeltable/utils/description_helper.py +4 -5
- pixeltable/utils/documents.py +7 -18
- pixeltable/utils/exception_handler.py +7 -30
- pixeltable/utils/filecache.py +6 -6
- pixeltable/utils/formatter.py +86 -48
- pixeltable/utils/gcs_store.py +295 -0
- pixeltable/utils/http.py +133 -0
- pixeltable/utils/http_server.py +2 -3
- pixeltable/utils/iceberg.py +1 -2
- pixeltable/utils/image.py +17 -0
- pixeltable/utils/lancedb.py +90 -0
- pixeltable/utils/local_store.py +322 -0
- pixeltable/utils/misc.py +5 -0
- pixeltable/utils/object_stores.py +573 -0
- pixeltable/utils/pydantic.py +60 -0
- pixeltable/utils/pytorch.py +5 -6
- pixeltable/utils/s3_store.py +527 -0
- pixeltable/utils/sql.py +26 -0
- pixeltable/utils/system.py +30 -0
- pixeltable-0.5.7.dist-info/METADATA +579 -0
- pixeltable-0.5.7.dist-info/RECORD +227 -0
- {pixeltable-0.3.14.dist-info → pixeltable-0.5.7.dist-info}/WHEEL +1 -1
- pixeltable-0.5.7.dist-info/entry_points.txt +2 -0
- pixeltable/__version__.py +0 -3
- pixeltable/catalog/named_function.py +0 -40
- pixeltable/ext/__init__.py +0 -17
- pixeltable/ext/functions/__init__.py +0 -11
- pixeltable/ext/functions/whisperx.py +0 -77
- pixeltable/utils/media_store.py +0 -77
- pixeltable/utils/s3.py +0 -17
- pixeltable-0.3.14.dist-info/METADATA +0 -434
- pixeltable-0.3.14.dist-info/RECORD +0 -186
- pixeltable-0.3.14.dist-info/entry_points.txt +0 -3
- {pixeltable-0.3.14.dist-info → pixeltable-0.5.7.dist-info/licenses}/LICENSE +0 -0
pixeltable/catalog/view.py
CHANGED
|
@@ -2,25 +2,28 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
import inspect
|
|
4
4
|
import logging
|
|
5
|
-
from typing import TYPE_CHECKING, Any, List, Literal
|
|
5
|
+
from typing import TYPE_CHECKING, Any, List, Literal
|
|
6
6
|
from uuid import UUID
|
|
7
7
|
|
|
8
8
|
import pixeltable.exceptions as excs
|
|
9
9
|
import pixeltable.metadata.schema as md_schema
|
|
10
10
|
import pixeltable.type_system as ts
|
|
11
11
|
from pixeltable import catalog, exprs, func
|
|
12
|
-
from pixeltable.env import Env
|
|
13
12
|
from pixeltable.iterators import ComponentIterator
|
|
14
13
|
|
|
15
14
|
from .column import Column
|
|
16
|
-
from .globals import _POS_COLUMN_NAME, MediaValidation
|
|
15
|
+
from .globals import _POS_COLUMN_NAME, MediaValidation
|
|
17
16
|
from .table import Table
|
|
18
|
-
from .table_version import TableVersion
|
|
17
|
+
from .table_version import TableVersion, TableVersionKey, TableVersionMd
|
|
19
18
|
from .table_version_handle import TableVersionHandle
|
|
20
19
|
from .table_version_path import TableVersionPath
|
|
20
|
+
from .tbl_ops import CreateStoreTableOp, LoadViewOp, TableOp
|
|
21
|
+
from .update_status import UpdateStatus
|
|
21
22
|
|
|
22
23
|
if TYPE_CHECKING:
|
|
24
|
+
from pixeltable.catalog.table import TableMetadata
|
|
23
25
|
from pixeltable.globals import TableDataSource
|
|
26
|
+
from pixeltable.plan import SampleClause
|
|
24
27
|
|
|
25
28
|
_logger = logging.getLogger('pixeltable')
|
|
26
29
|
|
|
@@ -37,21 +40,28 @@ class View(Table):
|
|
|
37
40
|
def __init__(self, id: UUID, dir_id: UUID, name: str, tbl_version_path: TableVersionPath, snapshot_only: bool):
|
|
38
41
|
super().__init__(id, dir_id, name, tbl_version_path)
|
|
39
42
|
self._snapshot_only = snapshot_only
|
|
43
|
+
if not snapshot_only:
|
|
44
|
+
self._tbl_version = tbl_version_path.tbl_version
|
|
45
|
+
|
|
46
|
+
def _display_name(self) -> str:
|
|
47
|
+
if self._tbl_version_path.is_replica():
|
|
48
|
+
return 'replica'
|
|
49
|
+
if self._tbl_version_path.is_snapshot():
|
|
50
|
+
return 'snapshot'
|
|
51
|
+
if self._tbl_version_path.is_view():
|
|
52
|
+
return 'view'
|
|
53
|
+
return 'table'
|
|
40
54
|
|
|
41
55
|
@classmethod
|
|
42
|
-
def
|
|
43
|
-
return 'view'
|
|
44
|
-
|
|
45
|
-
@classmethod
|
|
46
|
-
def select_list_to_additional_columns(cls, select_list: list[tuple[exprs.Expr, Optional[str]]]) -> dict[str, dict]:
|
|
56
|
+
def select_list_to_additional_columns(cls, select_list: list[tuple[exprs.Expr, str | None]]) -> dict[str, dict]:
|
|
47
57
|
"""Returns a list of columns in the same format as the additional_columns parameter of View.create.
|
|
48
|
-
The source is the list of expressions from a select() statement on a
|
|
58
|
+
The source is the list of expressions from a select() statement on a Query.
|
|
49
59
|
If the column is a ColumnRef, to a base table column, it is marked to not be stored.sy
|
|
50
60
|
"""
|
|
51
|
-
from pixeltable.
|
|
61
|
+
from pixeltable._query import Query
|
|
52
62
|
|
|
53
63
|
r: dict[str, dict] = {}
|
|
54
|
-
exps, names =
|
|
64
|
+
exps, names = Query._normalize_select_list([], select_list)
|
|
55
65
|
for expr, name in zip(exps, names):
|
|
56
66
|
stored = not isinstance(expr, exprs.ColumnRef)
|
|
57
67
|
r[name] = {'value': expr, 'stored': stored}
|
|
@@ -63,16 +73,20 @@ class View(Table):
|
|
|
63
73
|
dir_id: UUID,
|
|
64
74
|
name: str,
|
|
65
75
|
base: TableVersionPath,
|
|
66
|
-
select_list:
|
|
76
|
+
select_list: list[tuple[exprs.Expr, str | None]] | None,
|
|
67
77
|
additional_columns: dict[str, Any],
|
|
68
|
-
predicate:
|
|
78
|
+
predicate: 'exprs.Expr' | None,
|
|
79
|
+
sample_clause: 'SampleClause' | None,
|
|
69
80
|
is_snapshot: bool,
|
|
81
|
+
create_default_idxs: bool,
|
|
70
82
|
num_retained_versions: int,
|
|
71
83
|
comment: str,
|
|
72
84
|
media_validation: MediaValidation,
|
|
73
|
-
iterator_cls:
|
|
74
|
-
iterator_args:
|
|
75
|
-
) ->
|
|
85
|
+
iterator_cls: type[ComponentIterator] | None,
|
|
86
|
+
iterator_args: dict | None,
|
|
87
|
+
) -> tuple[TableVersionMd, list[TableOp] | None]:
|
|
88
|
+
from pixeltable.plan import SampleClause
|
|
89
|
+
|
|
76
90
|
# Convert select_list to more additional_columns if present
|
|
77
91
|
include_base_columns: bool = select_list is None
|
|
78
92
|
select_list_columns: List[Column] = []
|
|
@@ -84,12 +98,25 @@ class View(Table):
|
|
|
84
98
|
columns = select_list_columns + columns_from_additional_columns
|
|
85
99
|
cls._verify_schema(columns)
|
|
86
100
|
|
|
87
|
-
# verify that
|
|
101
|
+
# verify that filters can be evaluated in the context of the base
|
|
88
102
|
if predicate is not None:
|
|
89
103
|
if not predicate.is_bound_by([base]):
|
|
90
|
-
raise excs.Error(f'
|
|
104
|
+
raise excs.Error(f'View filter cannot be computed in the context of the base table {base.tbl_name()!r}')
|
|
91
105
|
# create a copy that we can modify and store
|
|
92
106
|
predicate = predicate.copy()
|
|
107
|
+
if sample_clause is not None:
|
|
108
|
+
# make sure that the sample clause can be computed in the context of the base
|
|
109
|
+
if sample_clause.stratify_exprs is not None and not all(
|
|
110
|
+
stratify_expr.is_bound_by([base]) for stratify_expr in sample_clause.stratify_exprs
|
|
111
|
+
):
|
|
112
|
+
raise excs.Error(
|
|
113
|
+
f'View sample clause cannot be computed in the context of the base table {base.tbl_name()!r}'
|
|
114
|
+
)
|
|
115
|
+
# create a copy that we can modify and store
|
|
116
|
+
sc = sample_clause
|
|
117
|
+
sample_clause = SampleClause(
|
|
118
|
+
sc.version, sc.n, sc.n_per_stratum, sc.fraction, sc.seed, sc.stratify_exprs.copy()
|
|
119
|
+
)
|
|
93
120
|
|
|
94
121
|
# same for value exprs
|
|
95
122
|
for col in columns:
|
|
@@ -98,8 +125,8 @@ class View(Table):
|
|
|
98
125
|
# make sure that the value can be computed in the context of the base
|
|
99
126
|
if col.value_expr is not None and not col.value_expr.is_bound_by([base]):
|
|
100
127
|
raise excs.Error(
|
|
101
|
-
f'Column {col.name}:
|
|
102
|
-
f'base {base.tbl_name()}'
|
|
128
|
+
f'Column {col.name!r}: Value expression cannot be computed in the context of the '
|
|
129
|
+
f'base table {base.tbl_name()!r}'
|
|
103
130
|
)
|
|
104
131
|
|
|
105
132
|
if iterator_cls is not None:
|
|
@@ -126,18 +153,18 @@ class View(Table):
|
|
|
126
153
|
sig = func.Signature(ts.InvalidType(), params)
|
|
127
154
|
|
|
128
155
|
expr_args = {k: exprs.Expr.from_object(v) for k, v in bound_args.items()}
|
|
129
|
-
sig.validate_args(expr_args, context=f'in iterator {iterator_cls.__name__
|
|
156
|
+
sig.validate_args(expr_args, context=f'in iterator of type `{iterator_cls.__name__}`')
|
|
130
157
|
literal_args = {k: v.val if isinstance(v, exprs.Literal) else v for k, v in expr_args.items()}
|
|
131
158
|
|
|
132
159
|
# prepend pos and output_schema columns to cols:
|
|
133
160
|
# a component view exposes the pos column of its rowid;
|
|
134
161
|
# we create that column here, so it gets assigned a column id;
|
|
135
162
|
# stored=False: it is not stored separately (it's already stored as part of the rowid)
|
|
136
|
-
iterator_cols = [Column(_POS_COLUMN_NAME, ts.IntType(), stored=False)]
|
|
163
|
+
iterator_cols = [Column(_POS_COLUMN_NAME, ts.IntType(), is_iterator_col=True, stored=False)]
|
|
137
164
|
output_dict, unstored_cols = iterator_cls.output_schema(**literal_args)
|
|
138
165
|
iterator_cols.extend(
|
|
139
166
|
[
|
|
140
|
-
Column(col_name, col_type, stored=col_name not in unstored_cols)
|
|
167
|
+
Column(col_name, col_type, is_iterator_col=True, stored=col_name not in unstored_cols)
|
|
141
168
|
for col_name, col_type in output_dict.items()
|
|
142
169
|
]
|
|
143
170
|
)
|
|
@@ -146,11 +173,10 @@ class View(Table):
|
|
|
146
173
|
for col in columns:
|
|
147
174
|
if col.name in iterator_col_names:
|
|
148
175
|
raise excs.Error(
|
|
149
|
-
f'Duplicate name: column {col.name} is already present in the iterator output schema'
|
|
176
|
+
f'Duplicate name: column {col.name!r} is already present in the iterator output schema'
|
|
150
177
|
)
|
|
151
178
|
columns = iterator_cols + columns
|
|
152
179
|
|
|
153
|
-
session = Env.get().session
|
|
154
180
|
from pixeltable.exprs import InlineDict
|
|
155
181
|
|
|
156
182
|
iterator_args_expr: exprs.Expr = InlineDict(iterator_args) if iterator_args is not None else None
|
|
@@ -160,6 +186,8 @@ class View(Table):
|
|
|
160
186
|
# if this is a snapshot, we need to retarget all exprs to the snapshot tbl versions
|
|
161
187
|
if is_snapshot:
|
|
162
188
|
predicate = predicate.retarget(base_version_path) if predicate is not None else None
|
|
189
|
+
if sample_clause is not None:
|
|
190
|
+
exprs.Expr.retarget_list(sample_clause.stratify_exprs, base_version_path)
|
|
163
191
|
iterator_args_expr = (
|
|
164
192
|
iterator_args_expr.retarget(base_version_path) if iterator_args_expr is not None else None
|
|
165
193
|
)
|
|
@@ -171,51 +199,43 @@ class View(Table):
|
|
|
171
199
|
is_snapshot=is_snapshot,
|
|
172
200
|
include_base_columns=include_base_columns,
|
|
173
201
|
predicate=predicate.as_dict() if predicate is not None else None,
|
|
202
|
+
sample_clause=sample_clause.as_dict() if sample_clause is not None else None,
|
|
174
203
|
base_versions=base_version_path.as_md(),
|
|
175
204
|
iterator_class_fqn=iterator_class_fqn,
|
|
176
205
|
iterator_args=iterator_args_expr.as_dict() if iterator_args_expr is not None else None,
|
|
177
206
|
)
|
|
178
207
|
|
|
179
|
-
|
|
180
|
-
dir_id,
|
|
208
|
+
md = TableVersion.create_initial_md(
|
|
181
209
|
name,
|
|
182
210
|
columns,
|
|
183
211
|
num_retained_versions,
|
|
184
212
|
comment,
|
|
185
213
|
media_validation=media_validation,
|
|
186
|
-
# base_path=base_version_path,
|
|
187
214
|
view_md=view_md,
|
|
215
|
+
create_default_idxs=create_default_idxs,
|
|
188
216
|
)
|
|
189
|
-
if
|
|
190
|
-
# this is purely a snapshot:
|
|
191
|
-
|
|
192
|
-
_logger.info(f'created snapshot {name}')
|
|
217
|
+
if md.tbl_md.is_pure_snapshot:
|
|
218
|
+
# this is purely a snapshot: no store table to create or load
|
|
219
|
+
return md, None
|
|
193
220
|
else:
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
221
|
+
tbl_id = md.tbl_md.tbl_id
|
|
222
|
+
key = TableVersionKey(UUID(tbl_id), 0 if is_snapshot else None, None)
|
|
223
|
+
view_path = TableVersionPath(TableVersionHandle(key), base=base_version_path)
|
|
224
|
+
ops = [
|
|
225
|
+
TableOp(
|
|
226
|
+
tbl_id=tbl_id, op_sn=0, num_ops=2, needs_xact=False, create_store_table_op=CreateStoreTableOp()
|
|
200
227
|
),
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
plan, _ = Planner.create_view_load_plan(view._tbl_version_path)
|
|
208
|
-
num_rows, num_excs, _ = tbl_version.store_tbl.insert_rows(plan, v_min=tbl_version.version)
|
|
209
|
-
Env.get().console_logger.info(f'Created view `{name}` with {num_rows} rows, {num_excs} exceptions.')
|
|
210
|
-
|
|
211
|
-
session.commit()
|
|
212
|
-
return view
|
|
228
|
+
TableOp(
|
|
229
|
+
tbl_id=tbl_id, op_sn=1, num_ops=2, needs_xact=True, load_view_op=LoadViewOp(view_path.as_dict())
|
|
230
|
+
),
|
|
231
|
+
]
|
|
232
|
+
return md, ops
|
|
213
233
|
|
|
214
234
|
@classmethod
|
|
215
235
|
def _verify_column(cls, col: Column) -> None:
|
|
216
236
|
# make sure that columns are nullable or have a default
|
|
217
237
|
if not col.col_type.nullable and not col.is_computed:
|
|
218
|
-
raise excs.Error(f'Column {col.name}:
|
|
238
|
+
raise excs.Error(f'Column {col.name!r}: Non-computed columns in views must be nullable')
|
|
219
239
|
super()._verify_column(col)
|
|
220
240
|
|
|
221
241
|
@classmethod
|
|
@@ -227,74 +247,98 @@ class View(Table):
|
|
|
227
247
|
if tbl_version_path.is_snapshot():
|
|
228
248
|
return tbl_version_path
|
|
229
249
|
tbl_version = tbl_version_path.tbl_version.get()
|
|
230
|
-
|
|
231
|
-
# create and register snapshot version
|
|
232
|
-
tbl_version = tbl_version.create_snapshot_copy()
|
|
233
|
-
assert tbl_version.is_snapshot
|
|
250
|
+
assert not tbl_version.is_snapshot
|
|
234
251
|
|
|
235
252
|
return TableVersionPath(
|
|
236
|
-
TableVersionHandle(tbl_version.id, tbl_version.
|
|
253
|
+
TableVersionHandle(TableVersionKey(tbl_version.id, tbl_version.version, None)),
|
|
237
254
|
base=cls._get_snapshot_path(tbl_version_path.base) if tbl_version_path.base is not None else None,
|
|
238
255
|
)
|
|
239
256
|
|
|
240
|
-
def
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
catalog.Catalog.get().delete_tbl_md(self._id)
|
|
246
|
-
else:
|
|
247
|
-
super()._drop()
|
|
257
|
+
def _is_named_pure_snapshot(self) -> bool:
|
|
258
|
+
"""
|
|
259
|
+
Returns True if this is a named pure snapshot (i.e., a pure snapshot that is a separate schema object).
|
|
260
|
+
"""
|
|
261
|
+
return self._id != self._tbl_version_path.tbl_id
|
|
248
262
|
|
|
249
|
-
def
|
|
250
|
-
|
|
263
|
+
def _is_anonymous_snapshot(self) -> bool:
|
|
264
|
+
"""
|
|
265
|
+
Returns True if this is an unnamed snapshot (i.e., a snapshot that is not a separate schema object).
|
|
266
|
+
"""
|
|
267
|
+
return self._snapshot_only and self._id == self._tbl_version_path.tbl_id
|
|
268
|
+
|
|
269
|
+
def _get_metadata(self) -> 'TableMetadata':
|
|
270
|
+
md = super()._get_metadata()
|
|
251
271
|
md['is_view'] = True
|
|
252
272
|
md['is_snapshot'] = self._tbl_version_path.is_snapshot()
|
|
273
|
+
if self._is_anonymous_snapshot():
|
|
274
|
+
# Update name and path with version qualifiers.
|
|
275
|
+
md['name'] = f'{self._name}:{self._tbl_version_path.version()}'
|
|
276
|
+
md['path'] = f'{self._path()}:{self._tbl_version_path.version()}'
|
|
277
|
+
base_tbl_id = self._base_tbl_id
|
|
278
|
+
if base_tbl_id is not None:
|
|
279
|
+
base_tbl = self._get_base_table()
|
|
280
|
+
base_path = '<anonymous base table>' if base_tbl is None else base_tbl._path()
|
|
281
|
+
base_version = self._effective_base_versions[0]
|
|
282
|
+
md['base'] = base_path if base_version is None else f'{base_path}:{base_version}'
|
|
253
283
|
return md
|
|
254
284
|
|
|
255
285
|
def insert(
|
|
256
286
|
self,
|
|
257
|
-
source:
|
|
287
|
+
source: TableDataSource | None = None,
|
|
258
288
|
/,
|
|
259
289
|
*,
|
|
260
|
-
source_format:
|
|
261
|
-
schema_overrides:
|
|
290
|
+
source_format: Literal['csv', 'excel', 'parquet', 'json'] | None = None,
|
|
291
|
+
schema_overrides: dict[str, ts.ColumnType] | None = None,
|
|
262
292
|
on_error: Literal['abort', 'ignore'] = 'abort',
|
|
263
293
|
print_stats: bool = False,
|
|
264
294
|
**kwargs: Any,
|
|
265
295
|
) -> UpdateStatus:
|
|
266
|
-
raise excs.Error(f'{self.
|
|
296
|
+
raise excs.Error(f'{self._display_str()}: Cannot insert into a {self._display_name()}.')
|
|
267
297
|
|
|
268
|
-
def delete(self, where:
|
|
269
|
-
raise excs.Error(f'{self.
|
|
298
|
+
def delete(self, where: exprs.Expr | None = None) -> UpdateStatus:
|
|
299
|
+
raise excs.Error(f'{self._display_str()}: Cannot delete from a {self._display_name()}.')
|
|
270
300
|
|
|
271
301
|
@property
|
|
272
|
-
def
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
302
|
+
def _base_tbl_id(self) -> UUID | None:
|
|
303
|
+
if self._tbl_version_path.tbl_id != self._id:
|
|
304
|
+
# _tbl_version_path represents a different schema object from this one. This can only happen if this is a
|
|
305
|
+
# named pure snapshot.
|
|
306
|
+
return self._tbl_version_path.tbl_id
|
|
307
|
+
if self._tbl_version_path.base is None:
|
|
308
|
+
return None
|
|
309
|
+
return self._tbl_version_path.base.tbl_id
|
|
310
|
+
|
|
311
|
+
def _get_base_table(self) -> 'Table' | None:
|
|
312
|
+
"""Returns None if there is no base table, or if the base table is hidden."""
|
|
313
|
+
base_tbl_id = self._base_tbl_id
|
|
314
|
+
if base_tbl_id is None:
|
|
315
|
+
return None
|
|
316
|
+
with catalog.Catalog.get().begin_xact(tbl_id=base_tbl_id, for_write=False):
|
|
317
|
+
return catalog.Catalog.get().get_table_by_id(base_tbl_id)
|
|
277
318
|
|
|
278
319
|
@property
|
|
279
|
-
def _effective_base_versions(self) -> list[
|
|
320
|
+
def _effective_base_versions(self) -> list[int | None]:
|
|
280
321
|
effective_versions = [tv.effective_version for tv in self._tbl_version_path.get_tbl_versions()]
|
|
281
|
-
if self._snapshot_only:
|
|
282
|
-
return effective_versions
|
|
322
|
+
if self._snapshot_only and not self._is_anonymous_snapshot():
|
|
323
|
+
return effective_versions # Named pure snapshot
|
|
283
324
|
else:
|
|
284
325
|
return effective_versions[1:]
|
|
285
326
|
|
|
286
327
|
def _table_descriptor(self) -> str:
|
|
287
|
-
|
|
288
|
-
result = [f'{display_name} {self._path!r}']
|
|
328
|
+
result = [self._display_str()]
|
|
289
329
|
bases_descrs: list[str] = []
|
|
290
|
-
for base, effective_version in zip(self.
|
|
330
|
+
for base, effective_version in zip(self._get_base_tables(), self._effective_base_versions):
|
|
291
331
|
if effective_version is None:
|
|
292
|
-
bases_descrs.append(f'{base._path!r}')
|
|
332
|
+
bases_descrs.append(f'{base._path()!r}')
|
|
293
333
|
else:
|
|
294
|
-
base_descr = f'{base._path}:{effective_version}'
|
|
334
|
+
base_descr = f'{base._path()}:{effective_version}'
|
|
295
335
|
bases_descrs.append(f'{base_descr!r}')
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
336
|
+
if len(bases_descrs) > 0:
|
|
337
|
+
# bases_descrs can be empty in the case of a table-replica
|
|
338
|
+
result.append(f' (of {", ".join(bases_descrs)})')
|
|
339
|
+
|
|
340
|
+
if self._tbl_version_path.tbl_version.get().predicate is not None:
|
|
341
|
+
result.append(f'\nWhere: {self._tbl_version_path.tbl_version.get().predicate!s}')
|
|
342
|
+
if self._tbl_version_path.tbl_version.get().sample_clause is not None:
|
|
343
|
+
result.append(f'\nSample: {self._tbl_version.get().sample_clause!s}')
|
|
300
344
|
return ''.join(result)
|
pixeltable/config.py
CHANGED
|
@@ -4,11 +4,11 @@ import logging
|
|
|
4
4
|
import os
|
|
5
5
|
import shutil
|
|
6
6
|
from pathlib import Path
|
|
7
|
-
from typing import Any, ClassVar,
|
|
7
|
+
from typing import Any, ClassVar, TypeVar
|
|
8
8
|
|
|
9
9
|
import toml
|
|
10
10
|
|
|
11
|
-
from pixeltable import exceptions as excs
|
|
11
|
+
from pixeltable import env, exceptions as excs
|
|
12
12
|
|
|
13
13
|
_logger = logging.getLogger('pixeltable')
|
|
14
14
|
|
|
@@ -21,23 +21,30 @@ class Config:
|
|
|
21
21
|
configuration values, which can be set in the config file or as environment variables.
|
|
22
22
|
"""
|
|
23
23
|
|
|
24
|
-
__instance: ClassVar[
|
|
24
|
+
__instance: ClassVar[Config | None] = None
|
|
25
25
|
|
|
26
26
|
__home: Path
|
|
27
27
|
__config_file: Path
|
|
28
|
+
__config_overrides: dict[str, Any]
|
|
28
29
|
__config_dict: dict[str, Any]
|
|
29
30
|
|
|
30
|
-
def __init__(self) -> None:
|
|
31
|
+
def __init__(self, config_overrides: dict[str, Any]) -> None:
|
|
31
32
|
assert self.__instance is None, 'Config is a singleton; use Config.get() to access the instance'
|
|
32
33
|
|
|
33
|
-
|
|
34
|
+
for var in config_overrides:
|
|
35
|
+
if var not in KNOWN_CONFIG_OVERRIDES:
|
|
36
|
+
raise excs.Error(f'Unrecognized configuration variable: {var}')
|
|
37
|
+
|
|
38
|
+
self.__config_overrides = config_overrides
|
|
39
|
+
|
|
40
|
+
self.__home = Path(self.lookup_env('pixeltable', 'home', str(Path.home() / '.pixeltable')))
|
|
34
41
|
if self.__home.exists() and not self.__home.is_dir():
|
|
35
|
-
raise
|
|
42
|
+
raise excs.Error(f'Not a directory: {self.__home}')
|
|
36
43
|
if not self.__home.exists():
|
|
37
44
|
print(f'Creating a Pixeltable instance at: {self.__home}')
|
|
38
45
|
self.__home.mkdir()
|
|
39
46
|
|
|
40
|
-
self.__config_file = Path(
|
|
47
|
+
self.__config_file = Path(self.lookup_env('pixeltable', 'config', str(self.__home / 'config.toml')))
|
|
41
48
|
|
|
42
49
|
self.__config_dict: dict[str, Any]
|
|
43
50
|
if os.path.isfile(self.__config_file):
|
|
@@ -46,6 +53,12 @@ class Config:
|
|
|
46
53
|
self.__config_dict = toml.load(stream)
|
|
47
54
|
except Exception as exc:
|
|
48
55
|
raise excs.Error(f'Could not read config file: {self.__config_file}') from exc
|
|
56
|
+
for section, section_dict in self.__config_dict.items():
|
|
57
|
+
if section not in KNOWN_CONFIG_OPTIONS:
|
|
58
|
+
raise excs.Error(f'Unrecognized section {section!r} in config file: {self.__config_file}')
|
|
59
|
+
for key in section_dict:
|
|
60
|
+
if key not in KNOWN_CONFIG_OPTIONS[section]:
|
|
61
|
+
raise excs.Error(f"Unrecognized option '{section}.{key}' in config file: {self.__config_file}")
|
|
49
62
|
else:
|
|
50
63
|
self.__config_dict = self.__create_default_config(self.__config_file)
|
|
51
64
|
with open(self.__config_file, 'w', encoding='utf-8') as stream:
|
|
@@ -65,10 +78,22 @@ class Config:
|
|
|
65
78
|
|
|
66
79
|
@classmethod
|
|
67
80
|
def get(cls) -> Config:
|
|
68
|
-
|
|
69
|
-
cls.__instance = cls()
|
|
81
|
+
cls.init({})
|
|
70
82
|
return cls.__instance
|
|
71
83
|
|
|
84
|
+
@classmethod
|
|
85
|
+
def init(cls, config_overrides: dict[str, Any], reinit: bool = False) -> None:
|
|
86
|
+
if reinit:
|
|
87
|
+
cls.__instance = None
|
|
88
|
+
for cl in env._registered_clients.values():
|
|
89
|
+
cl.client_obj = None
|
|
90
|
+
if cls.__instance is None:
|
|
91
|
+
cls.__instance = cls(config_overrides)
|
|
92
|
+
elif len(config_overrides) > 0:
|
|
93
|
+
raise excs.Error(
|
|
94
|
+
'Pixeltable has already been initialized; cannot specify new config values in the same session'
|
|
95
|
+
)
|
|
96
|
+
|
|
72
97
|
@classmethod
|
|
73
98
|
def __create_default_config(cls, config_path: Path) -> dict[str, Any]:
|
|
74
99
|
free_disk_space_bytes = shutil.disk_usage(config_path.parent).free
|
|
@@ -76,28 +101,115 @@ class Config:
|
|
|
76
101
|
file_cache_size_g = free_disk_space_bytes / 5 / (1 << 30)
|
|
77
102
|
return {'pixeltable': {'file_cache_size_g': round(file_cache_size_g, 1), 'hide_warnings': False}}
|
|
78
103
|
|
|
79
|
-
def
|
|
104
|
+
def lookup_env(self, section: str, key: str, default: Any = None) -> Any:
|
|
105
|
+
override_var = f'{section}.{key}'
|
|
80
106
|
env_var = f'{section.upper()}_{key.upper()}'
|
|
81
|
-
if
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
107
|
+
if override_var in self.__config_overrides:
|
|
108
|
+
return self.__config_overrides[override_var]
|
|
109
|
+
if env_var in os.environ and len(os.environ[env_var]) > 0:
|
|
110
|
+
return os.environ[env_var]
|
|
111
|
+
return default
|
|
112
|
+
|
|
113
|
+
def get_value(self, key: str, expected_type: type[T], section: str = 'pixeltable') -> T | None:
|
|
114
|
+
value: Any = self.lookup_env(section, key) # Try to get from environment first
|
|
115
|
+
# Next try the config file
|
|
116
|
+
if value is None:
|
|
117
|
+
# Resolve nested section dicts
|
|
118
|
+
lookup_elems = [*section.split('.'), key]
|
|
119
|
+
value = self.__config_dict
|
|
120
|
+
for el in lookup_elems:
|
|
121
|
+
if isinstance(value, dict):
|
|
122
|
+
if el not in value:
|
|
123
|
+
return None
|
|
124
|
+
value = value[el]
|
|
125
|
+
else:
|
|
126
|
+
return None
|
|
127
|
+
|
|
128
|
+
if value is None:
|
|
129
|
+
return None # Not specified
|
|
87
130
|
|
|
88
131
|
try:
|
|
132
|
+
if expected_type is bool and isinstance(value, str):
|
|
133
|
+
if value.lower() not in ('true', 'false'):
|
|
134
|
+
raise excs.Error(f"Invalid value for configuration parameter '{section}.{key}': {value}")
|
|
135
|
+
return value.lower() == 'true' # type: ignore[return-value]
|
|
89
136
|
return expected_type(value) # type: ignore[call-arg]
|
|
90
|
-
except ValueError as exc:
|
|
91
|
-
raise excs.Error(f
|
|
137
|
+
except (ValueError, TypeError) as exc:
|
|
138
|
+
raise excs.Error(f"Invalid value for configuration parameter '{section}.{key}': {value}") from exc
|
|
92
139
|
|
|
93
|
-
def get_string_value(self, key: str, section: str = 'pixeltable') ->
|
|
140
|
+
def get_string_value(self, key: str, section: str = 'pixeltable') -> str | None:
|
|
94
141
|
return self.get_value(key, str, section)
|
|
95
142
|
|
|
96
|
-
def get_int_value(self, key: str, section: str = 'pixeltable') ->
|
|
143
|
+
def get_int_value(self, key: str, section: str = 'pixeltable') -> int | None:
|
|
97
144
|
return self.get_value(key, int, section)
|
|
98
145
|
|
|
99
|
-
def get_float_value(self, key: str, section: str = 'pixeltable') ->
|
|
146
|
+
def get_float_value(self, key: str, section: str = 'pixeltable') -> float | None:
|
|
100
147
|
return self.get_value(key, float, section)
|
|
101
148
|
|
|
102
|
-
def get_bool_value(self, key: str, section: str = 'pixeltable') ->
|
|
149
|
+
def get_bool_value(self, key: str, section: str = 'pixeltable') -> bool | None:
|
|
103
150
|
return self.get_value(key, bool, section)
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
KNOWN_CONFIG_OPTIONS = {
|
|
154
|
+
'pixeltable': {
|
|
155
|
+
'home': 'Path to the Pixeltable home directory',
|
|
156
|
+
'config': 'Path to the Pixeltable config file',
|
|
157
|
+
'pgdata': 'Path to the Pixeltable postgres data directory',
|
|
158
|
+
'db': 'Postgres database name',
|
|
159
|
+
'file_cache_size_g': 'Size of the file cache in GB',
|
|
160
|
+
'time_zone': 'Default time zone for timestamps',
|
|
161
|
+
'hide_warnings': 'Hide warnings from the console',
|
|
162
|
+
'verbosity': 'Verbosity level for console output',
|
|
163
|
+
'api_key': 'API key for Pixeltable cloud',
|
|
164
|
+
'input_media_dest': 'Default destination URI for input media data',
|
|
165
|
+
'output_media_dest': 'Default destination URI for output (computed) media data',
|
|
166
|
+
'r2_profile': 'AWS config profile name used to access R2 storage',
|
|
167
|
+
's3_profile': 'AWS config profile name used to access S3 storage',
|
|
168
|
+
'b2_profile': 'AWS config profile name used to access Backblaze B2 storage',
|
|
169
|
+
'tigris_profile': 'AWS config profile name used to access Tigris object storage',
|
|
170
|
+
},
|
|
171
|
+
'anthropic': {'api_key': 'Anthropic API key'},
|
|
172
|
+
'azure': {'storage_account_name': 'Azure storage account name', 'storage_account_key': 'Azure storage account key'},
|
|
173
|
+
'bedrock': {'api_key': 'AWS Bedrock API key'},
|
|
174
|
+
'deepseek': {'api_key': 'Deepseek API key', 'rate_limit': 'Rate limit for Deepseek API requests'},
|
|
175
|
+
'fal': {'api_key': 'fal.ai API key', 'rate_limit': 'Rate limit for fal.ai API requests'},
|
|
176
|
+
'fireworks': {'api_key': 'Fireworks API key', 'rate_limit': 'Rate limit for Fireworks API requests'},
|
|
177
|
+
'gemini': {'api_key': 'Gemini API key', 'rate_limits': 'Per-model rate limits for Gemini API requests'},
|
|
178
|
+
'hf': {'auth_token': 'Hugging Face access token'},
|
|
179
|
+
'imagen': {'rate_limits': 'Per-model rate limits for Imagen API requests'},
|
|
180
|
+
'reve': {'api_key': 'Reve API key', 'rate_limit': 'Rate limit for Reve API requests (requests per minute)'},
|
|
181
|
+
'groq': {'api_key': 'Groq API key', 'rate_limit': 'Rate limit for Groq API requests'},
|
|
182
|
+
'label_studio': {'api_key': 'Label Studio API key', 'url': 'Label Studio server URL'},
|
|
183
|
+
'mistral': {'api_key': 'Mistral API key', 'rate_limit': 'Rate limit for Mistral API requests'},
|
|
184
|
+
'openai': {
|
|
185
|
+
'api_key': 'OpenAI API key',
|
|
186
|
+
'base_url': 'OpenAI API base URL',
|
|
187
|
+
'api_version': 'API version if using Azure OpenAI',
|
|
188
|
+
'rate_limits': 'Per-model rate limits for OpenAI API requests',
|
|
189
|
+
'max_connections': 'Maximum number of concurrent OpenAI API connections that can be established',
|
|
190
|
+
'max_keepalive_connections': 'Maximum number of keep-alive connections in the pool.'
|
|
191
|
+
' Must not exceed max_connections.',
|
|
192
|
+
},
|
|
193
|
+
'openrouter': {
|
|
194
|
+
'api_key': 'OpenRouter API key',
|
|
195
|
+
'site_url': 'Optional URL for your application (for OpenRouter analytics)',
|
|
196
|
+
'app_name': 'Optional name for your application (for OpenRouter analytics)',
|
|
197
|
+
'rate_limit': 'Rate limit for OpenRouter API requests',
|
|
198
|
+
},
|
|
199
|
+
'replicate': {'api_token': 'Replicate API token'},
|
|
200
|
+
'together': {
|
|
201
|
+
'api_key': 'Together API key',
|
|
202
|
+
'rate_limits': 'Per-model category rate limits for Together API requests',
|
|
203
|
+
},
|
|
204
|
+
'twelvelabs': {'api_key': 'TwelveLabs API key', 'rate_limit': 'Rate limit for TwelveLabs API requests'},
|
|
205
|
+
'veo': {'rate_limits': 'Per-model rate limits for Veo API requests'},
|
|
206
|
+
'voyage': {'api_key': 'Voyage AI API key', 'rate_limit': 'Rate limit for Voyage AI API requests'},
|
|
207
|
+
'pypi': {'api_key': 'PyPI API key (for internal use only)'},
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
KNOWN_CONFIG_OVERRIDES = {
|
|
212
|
+
f'{section}.{key}': info
|
|
213
|
+
for section, section_dict in KNOWN_CONFIG_OPTIONS.items()
|
|
214
|
+
for key, info in section_dict.items()
|
|
215
|
+
}
|