pixeltable 0.3.4__py3-none-any.whl → 0.3.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +1 -0
- pixeltable/__version__.py +2 -2
- pixeltable/catalog/catalog.py +9 -2
- pixeltable/catalog/column.py +1 -1
- pixeltable/catalog/dir.py +1 -1
- pixeltable/catalog/table.py +3 -1
- pixeltable/catalog/table_version.py +12 -2
- pixeltable/catalog/table_version_path.py +2 -2
- pixeltable/catalog/view.py +64 -20
- pixeltable/dataframe.py +11 -6
- pixeltable/env.py +12 -0
- pixeltable/exec/expr_eval/evaluators.py +4 -2
- pixeltable/exec/expr_eval/expr_eval_node.py +4 -1
- pixeltable/exprs/comparison.py +8 -4
- pixeltable/exprs/data_row.py +9 -7
- pixeltable/exprs/expr.py +2 -2
- pixeltable/exprs/function_call.py +155 -313
- pixeltable/exprs/json_mapper.py +25 -8
- pixeltable/exprs/json_path.py +6 -5
- pixeltable/exprs/object_ref.py +16 -5
- pixeltable/exprs/row_builder.py +10 -3
- pixeltable/func/aggregate_function.py +29 -15
- pixeltable/func/callable_function.py +11 -8
- pixeltable/func/expr_template_function.py +3 -9
- pixeltable/func/function.py +148 -74
- pixeltable/func/signature.py +65 -30
- pixeltable/func/tools.py +26 -26
- pixeltable/func/udf.py +1 -1
- pixeltable/functions/__init__.py +1 -0
- pixeltable/functions/anthropic.py +9 -3
- pixeltable/functions/deepseek.py +121 -0
- pixeltable/functions/image.py +7 -7
- pixeltable/functions/openai.py +30 -13
- pixeltable/functions/video.py +14 -7
- pixeltable/globals.py +14 -3
- pixeltable/index/embedding_index.py +4 -13
- pixeltable/io/globals.py +88 -77
- pixeltable/io/hf_datasets.py +34 -34
- pixeltable/io/pandas.py +75 -76
- pixeltable/io/parquet.py +19 -27
- pixeltable/io/utils.py +115 -0
- pixeltable/iterators/audio.py +2 -1
- pixeltable/iterators/video.py +1 -1
- pixeltable/metadata/__init__.py +2 -1
- pixeltable/metadata/converters/convert_15.py +18 -8
- pixeltable/metadata/converters/convert_27.py +31 -0
- pixeltable/metadata/converters/convert_28.py +15 -0
- pixeltable/metadata/converters/convert_29.py +111 -0
- pixeltable/metadata/converters/util.py +12 -1
- pixeltable/metadata/notes.py +3 -0
- pixeltable/metadata/schema.py +8 -0
- pixeltable/share/__init__.py +1 -0
- pixeltable/share/packager.py +41 -13
- pixeltable/share/publish.py +97 -0
- pixeltable/type_system.py +40 -14
- pixeltable/utils/__init__.py +41 -0
- pixeltable/utils/arrow.py +40 -7
- pixeltable/utils/formatter.py +1 -1
- {pixeltable-0.3.4.dist-info → pixeltable-0.3.6.dist-info}/METADATA +34 -49
- {pixeltable-0.3.4.dist-info → pixeltable-0.3.6.dist-info}/RECORD +63 -57
- {pixeltable-0.3.4.dist-info → pixeltable-0.3.6.dist-info}/WHEEL +1 -1
- {pixeltable-0.3.4.dist-info → pixeltable-0.3.6.dist-info}/LICENSE +0 -0
- {pixeltable-0.3.4.dist-info → pixeltable-0.3.6.dist-info}/entry_points.txt +0 -0
pixeltable/__init__.py
CHANGED
pixeltable/__version__.py
CHANGED
|
@@ -1,3 +1,3 @@
|
|
|
1
1
|
# These version placeholders will be replaced during build.
|
|
2
|
-
__version__ = '0.3.
|
|
3
|
-
__version_tuple__ = (0, 3,
|
|
2
|
+
__version__ = '0.3.6'
|
|
3
|
+
__version_tuple__ = (0, 3, 6)
|
pixeltable/catalog/catalog.py
CHANGED
|
@@ -58,7 +58,7 @@ class Catalog:
|
|
|
58
58
|
if session.query(sql.func.count(schema.Dir.id)).scalar() > 0:
|
|
59
59
|
return
|
|
60
60
|
# create a top-level directory, so that every schema object has a directory
|
|
61
|
-
dir_md = schema.DirMd(name='')
|
|
61
|
+
dir_md = schema.DirMd(name='', user=None, additional_md={})
|
|
62
62
|
dir_record = schema.Dir(parent_id=None, md=dataclasses.asdict(dir_md))
|
|
63
63
|
session.add(dir_record)
|
|
64
64
|
session.flush()
|
|
@@ -142,6 +142,7 @@ class Catalog:
|
|
|
142
142
|
base_tbl_id = base_path.tbl_id()
|
|
143
143
|
is_snapshot = view_md is not None and view_md.is_snapshot
|
|
144
144
|
snapshot_only = is_snapshot and view_md.predicate is None and len(schema_version_md.columns) == 0
|
|
145
|
+
include_base_columns = view_md is None or view_md.include_base_columns
|
|
145
146
|
if snapshot_only:
|
|
146
147
|
# this is a pure snapshot, without a physical table backing it
|
|
147
148
|
view_path = base_path
|
|
@@ -158,7 +159,13 @@ class Catalog:
|
|
|
158
159
|
view_path = TableVersionPath(tbl_version, base=base_path)
|
|
159
160
|
|
|
160
161
|
tbl: Table = View(
|
|
161
|
-
tbl_record.id,
|
|
162
|
+
tbl_record.id,
|
|
163
|
+
tbl_record.dir_id,
|
|
164
|
+
tbl_md.name,
|
|
165
|
+
view_path,
|
|
166
|
+
base_tbl_id,
|
|
167
|
+
snapshot_only=snapshot_only,
|
|
168
|
+
include_base_columns=include_base_columns,
|
|
162
169
|
)
|
|
163
170
|
self.tbl_dependents[base_tbl_id].append(tbl)
|
|
164
171
|
|
pixeltable/catalog/column.py
CHANGED
|
@@ -72,7 +72,7 @@ class Column:
|
|
|
72
72
|
col_type is None
|
|
73
73
|
- when loaded from md store: ``computed_with`` is set and col_type is set
|
|
74
74
|
|
|
75
|
-
``stored`` (only valid for computed
|
|
75
|
+
``stored`` (only valid for computed columns):
|
|
76
76
|
- if True: the column is present in the stored table
|
|
77
77
|
- if False: the column is not present in the stored table and recomputed during a query
|
|
78
78
|
- if None: the system chooses for you (at present, this is always False, but this may change in the future)
|
pixeltable/catalog/dir.py
CHANGED
|
@@ -32,7 +32,7 @@ class Dir(SchemaObject):
|
|
|
32
32
|
def _move(self, new_name: str, new_dir_id: UUID) -> None:
|
|
33
33
|
super()._move(new_name, new_dir_id)
|
|
34
34
|
with Env.get().engine.begin() as conn:
|
|
35
|
-
dir_md = schema.DirMd(name=new_name)
|
|
35
|
+
dir_md = schema.DirMd(name=new_name, user=None, additional_md={})
|
|
36
36
|
conn.execute(
|
|
37
37
|
sql.update(schema.Dir.__table__)
|
|
38
38
|
.values({schema.Dir.parent_id: self._dir_id, schema.Dir.md: dataclasses.asdict(dir_md)})
|
pixeltable/catalog/table.py
CHANGED
|
@@ -569,7 +569,7 @@ class Table(SchemaObject):
|
|
|
569
569
|
|
|
570
570
|
Args:
|
|
571
571
|
kwargs: Exactly one keyword argument of the form `col_name=expression`.
|
|
572
|
-
stored: Whether the column is materialized and stored or computed on demand.
|
|
572
|
+
stored: Whether the column is materialized and stored or computed on demand.
|
|
573
573
|
print_stats: If `True`, print execution metrics during evaluation.
|
|
574
574
|
on_error: Determines the behavior if an error occurs while evaluating the column expression for at least one
|
|
575
575
|
row.
|
|
@@ -682,6 +682,7 @@ class Table(SchemaObject):
|
|
|
682
682
|
elif isinstance(spec, exprs.Expr):
|
|
683
683
|
# create copy so we can modify it
|
|
684
684
|
value_expr = spec.copy()
|
|
685
|
+
value_expr.bind_rel_paths()
|
|
685
686
|
elif isinstance(spec, dict):
|
|
686
687
|
cls._validate_column_spec(name, spec)
|
|
687
688
|
if 'type' in spec:
|
|
@@ -692,6 +693,7 @@ class Table(SchemaObject):
|
|
|
692
693
|
if value_expr is not None and isinstance(value_expr, exprs.Expr):
|
|
693
694
|
# create copy so we can modify it
|
|
694
695
|
value_expr = value_expr.copy()
|
|
696
|
+
value_expr.bind_rel_paths()
|
|
695
697
|
stored = spec.get('stored', True)
|
|
696
698
|
primary_key = spec.get('primary_key')
|
|
697
699
|
media_validation_str = spec.get('media_validation')
|
|
@@ -59,6 +59,7 @@ class TableVersion:
|
|
|
59
59
|
schema_version: int
|
|
60
60
|
view_md: Optional[schema.ViewMd]
|
|
61
61
|
is_snapshot: bool
|
|
62
|
+
include_base_columns: bool
|
|
62
63
|
effective_version: Optional[int]
|
|
63
64
|
path: Optional[pxt.catalog.TableVersionPath]
|
|
64
65
|
base: Optional[TableVersion]
|
|
@@ -115,6 +116,7 @@ class TableVersion:
|
|
|
115
116
|
self.view_md = tbl_md.view_md # save this as-is, it's needed for _create_md()
|
|
116
117
|
is_view = tbl_md.view_md is not None
|
|
117
118
|
self.is_snapshot = (is_view and tbl_md.view_md.is_snapshot) or bool(is_snapshot)
|
|
119
|
+
self.include_base_columns = not is_view or tbl_md.view_md.include_base_columns
|
|
118
120
|
self.media_validation = MediaValidation[schema_version_md.media_validation.upper()]
|
|
119
121
|
# a mutable TableVersion doesn't have a static version
|
|
120
122
|
self.effective_version = self.version if self.is_snapshot else None
|
|
@@ -228,6 +230,7 @@ class TableVersion:
|
|
|
228
230
|
column_md = cls._create_column_md(cols)
|
|
229
231
|
table_md = schema.TableMd(
|
|
230
232
|
name=name,
|
|
233
|
+
user=None,
|
|
231
234
|
current_version=0,
|
|
232
235
|
current_schema_version=0,
|
|
233
236
|
next_col_id=len(cols),
|
|
@@ -237,6 +240,7 @@ class TableVersion:
|
|
|
237
240
|
index_md={},
|
|
238
241
|
external_stores=[],
|
|
239
242
|
view_md=view_md,
|
|
243
|
+
additional_md={},
|
|
240
244
|
)
|
|
241
245
|
# create a schema.Table here, we need it to call our c'tor;
|
|
242
246
|
# don't add it to the session yet, we might add index metadata
|
|
@@ -244,7 +248,7 @@ class TableVersion:
|
|
|
244
248
|
tbl_record = schema.Table(id=tbl_id, dir_id=dir_id, md=dataclasses.asdict(table_md))
|
|
245
249
|
|
|
246
250
|
# create schema.TableVersion
|
|
247
|
-
table_version_md = schema.TableVersionMd(created_at=timestamp, version=0, schema_version=0)
|
|
251
|
+
table_version_md = schema.TableVersionMd(created_at=timestamp, version=0, schema_version=0, additional_md={})
|
|
248
252
|
tbl_version_record = schema.TableVersion(
|
|
249
253
|
tbl_id=tbl_record.id, version=0, md=dataclasses.asdict(table_version_md)
|
|
250
254
|
)
|
|
@@ -266,6 +270,7 @@ class TableVersion:
|
|
|
266
270
|
num_retained_versions=num_retained_versions,
|
|
267
271
|
comment=comment,
|
|
268
272
|
media_validation=media_validation.name.lower(),
|
|
273
|
+
additional_md={},
|
|
269
274
|
)
|
|
270
275
|
schema_version_record = schema.TableSchemaVersion(
|
|
271
276
|
tbl_id=tbl_record.id, schema_version=0, md=dataclasses.asdict(schema_version_md)
|
|
@@ -1342,6 +1347,7 @@ class TableVersion:
|
|
|
1342
1347
|
def _create_tbl_md(self) -> schema.TableMd:
|
|
1343
1348
|
return schema.TableMd(
|
|
1344
1349
|
name=self.name,
|
|
1350
|
+
user=None,
|
|
1345
1351
|
current_version=self.version,
|
|
1346
1352
|
current_schema_version=self.schema_version,
|
|
1347
1353
|
next_col_id=self.next_col_id,
|
|
@@ -1351,10 +1357,13 @@ class TableVersion:
|
|
|
1351
1357
|
index_md=self.idx_md,
|
|
1352
1358
|
external_stores=self._create_stores_md(self.external_stores.values()),
|
|
1353
1359
|
view_md=self.view_md,
|
|
1360
|
+
additional_md={},
|
|
1354
1361
|
)
|
|
1355
1362
|
|
|
1356
1363
|
def _create_version_md(self, timestamp: float) -> schema.TableVersionMd:
|
|
1357
|
-
return schema.TableVersionMd(
|
|
1364
|
+
return schema.TableVersionMd(
|
|
1365
|
+
created_at=timestamp, version=self.version, schema_version=self.schema_version, additional_md={}
|
|
1366
|
+
)
|
|
1358
1367
|
|
|
1359
1368
|
def _create_schema_version_md(self, preceding_schema_version: int) -> schema.TableSchemaVersionMd:
|
|
1360
1369
|
column_md: dict[int, schema.SchemaColumn] = {}
|
|
@@ -1372,6 +1381,7 @@ class TableVersion:
|
|
|
1372
1381
|
num_retained_versions=self.num_retained_versions,
|
|
1373
1382
|
comment=self.comment,
|
|
1374
1383
|
media_validation=self.media_validation.name.lower(),
|
|
1384
|
+
additional_md={},
|
|
1375
1385
|
)
|
|
1376
1386
|
|
|
1377
1387
|
def as_dict(self) -> dict:
|
|
@@ -86,7 +86,7 @@ class TableVersionPath:
|
|
|
86
86
|
from pixeltable.exprs import ColumnRef
|
|
87
87
|
|
|
88
88
|
if col_name not in self.tbl_version.cols_by_name:
|
|
89
|
-
if self.base is None:
|
|
89
|
+
if self.base is None or not self.tbl_version.include_base_columns:
|
|
90
90
|
raise AttributeError(f'Column {col_name} unknown')
|
|
91
91
|
return self.base.get_column_ref(col_name)
|
|
92
92
|
col = self.tbl_version.cols_by_name[col_name]
|
|
@@ -95,7 +95,7 @@ class TableVersionPath:
|
|
|
95
95
|
def columns(self) -> list[Column]:
|
|
96
96
|
"""Return all user columns visible in this tbl version path, including columns from bases"""
|
|
97
97
|
result = list(self.tbl_version.cols_by_name.values())
|
|
98
|
-
if self.base is not None:
|
|
98
|
+
if self.base is not None and self.tbl_version.include_base_columns:
|
|
99
99
|
base_cols = self.base.columns()
|
|
100
100
|
# we only include base columns that don't conflict with one of our column names
|
|
101
101
|
result.extend(c for c in base_cols if c.name not in self.tbl_version.cols_by_name)
|
pixeltable/catalog/view.py
CHANGED
|
@@ -2,7 +2,7 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
import inspect
|
|
4
4
|
import logging
|
|
5
|
-
from typing import TYPE_CHECKING, Any, Iterable, Literal, Optional
|
|
5
|
+
from typing import TYPE_CHECKING, Any, Iterable, List, Literal, Optional
|
|
6
6
|
from uuid import UUID
|
|
7
7
|
|
|
8
8
|
import sqlalchemy.orm as orm
|
|
@@ -37,7 +37,14 @@ class View(Table):
|
|
|
37
37
|
"""
|
|
38
38
|
|
|
39
39
|
def __init__(
|
|
40
|
-
self,
|
|
40
|
+
self,
|
|
41
|
+
id: UUID,
|
|
42
|
+
dir_id: UUID,
|
|
43
|
+
name: str,
|
|
44
|
+
tbl_version_path: TableVersionPath,
|
|
45
|
+
base_id: UUID,
|
|
46
|
+
snapshot_only: bool,
|
|
47
|
+
include_base_columns: bool,
|
|
41
48
|
):
|
|
42
49
|
super().__init__(id, dir_id, name, tbl_version_path)
|
|
43
50
|
assert base_id in catalog.Catalog.get().tbl_dependents
|
|
@@ -48,12 +55,28 @@ class View(Table):
|
|
|
48
55
|
def _display_name(cls) -> str:
|
|
49
56
|
return 'view'
|
|
50
57
|
|
|
58
|
+
@classmethod
|
|
59
|
+
def select_list_to_additional_columns(cls, select_list: list[tuple[exprs.Expr, Optional[str]]]) -> dict[str, dict]:
|
|
60
|
+
"""Returns a list of columns in the same format as the additional_columns parameter of View.create.
|
|
61
|
+
The source is the list of expressions from a select() statement on a DataFrame.
|
|
62
|
+
If the column is a ColumnRef, to a base table column, it is marked to not be stored.sy
|
|
63
|
+
"""
|
|
64
|
+
from pixeltable.dataframe import DataFrame
|
|
65
|
+
|
|
66
|
+
r: dict[str, dict] = {}
|
|
67
|
+
exps, names = DataFrame._normalize_select_list([], select_list)
|
|
68
|
+
for expr, name in zip(exps, names):
|
|
69
|
+
stored = not isinstance(expr, exprs.ColumnRef)
|
|
70
|
+
r[name] = {'value': expr, 'stored': stored}
|
|
71
|
+
return r
|
|
72
|
+
|
|
51
73
|
@classmethod
|
|
52
74
|
def _create(
|
|
53
75
|
cls,
|
|
54
76
|
dir_id: UUID,
|
|
55
77
|
name: str,
|
|
56
78
|
base: TableVersionPath,
|
|
79
|
+
select_list: Optional[list[tuple[exprs.Expr, Optional[str]]]],
|
|
57
80
|
additional_columns: dict[str, Any],
|
|
58
81
|
predicate: Optional['pxt.exprs.Expr'],
|
|
59
82
|
is_snapshot: bool,
|
|
@@ -63,7 +86,15 @@ class View(Table):
|
|
|
63
86
|
iterator_cls: Optional[type[ComponentIterator]],
|
|
64
87
|
iterator_args: Optional[dict],
|
|
65
88
|
) -> View:
|
|
66
|
-
|
|
89
|
+
# Convert select_list to more additional_columns if present
|
|
90
|
+
include_base_columns: bool = select_list is None
|
|
91
|
+
select_list_columns: List[Column] = []
|
|
92
|
+
if not include_base_columns:
|
|
93
|
+
r = cls.select_list_to_additional_columns(select_list)
|
|
94
|
+
select_list_columns = cls._create_columns(r)
|
|
95
|
+
|
|
96
|
+
columns_from_additional_columns = cls._create_columns(additional_columns)
|
|
97
|
+
columns = select_list_columns + columns_from_additional_columns
|
|
67
98
|
cls._verify_schema(columns)
|
|
68
99
|
|
|
69
100
|
# verify that filter can be evaluated in the context of the base
|
|
@@ -88,31 +119,34 @@ class View(Table):
|
|
|
88
119
|
|
|
89
120
|
# validate iterator_args
|
|
90
121
|
py_signature = inspect.signature(iterator_cls.__init__)
|
|
122
|
+
|
|
123
|
+
# make sure iterator_args can be used to instantiate iterator_cls
|
|
124
|
+
bound_args: dict[str, Any]
|
|
91
125
|
try:
|
|
92
|
-
# make sure iterator_args can be used to instantiate iterator_cls
|
|
93
126
|
bound_args = py_signature.bind(None, **iterator_args).arguments # None: arg for self
|
|
94
|
-
# we ignore 'self'
|
|
95
|
-
first_param_name = next(iter(py_signature.parameters)) # can't guarantee it's actually 'self'
|
|
96
|
-
del bound_args[first_param_name]
|
|
97
|
-
|
|
98
|
-
# construct Signature and type-check bound_args
|
|
99
|
-
params = [
|
|
100
|
-
func.Parameter(param_name, param_type, kind=inspect.Parameter.POSITIONAL_OR_KEYWORD)
|
|
101
|
-
for param_name, param_type in iterator_cls.input_schema().items()
|
|
102
|
-
]
|
|
103
|
-
sig = func.Signature(ts.InvalidType(), params)
|
|
104
|
-
from pixeltable.exprs import FunctionCall
|
|
105
|
-
|
|
106
|
-
FunctionCall.normalize_args(iterator_cls.__name__, sig, bound_args)
|
|
107
127
|
except TypeError as e:
|
|
108
|
-
raise excs.Error(f'
|
|
128
|
+
raise excs.Error(f'Invalid iterator arguments: {e}')
|
|
129
|
+
# we ignore 'self'
|
|
130
|
+
first_param_name = next(iter(py_signature.parameters)) # can't guarantee it's actually 'self'
|
|
131
|
+
del bound_args[first_param_name]
|
|
132
|
+
|
|
133
|
+
# construct Signature and type-check bound_args
|
|
134
|
+
params = [
|
|
135
|
+
func.Parameter(param_name, param_type, kind=inspect.Parameter.POSITIONAL_OR_KEYWORD)
|
|
136
|
+
for param_name, param_type in iterator_cls.input_schema().items()
|
|
137
|
+
]
|
|
138
|
+
sig = func.Signature(ts.InvalidType(), params)
|
|
139
|
+
|
|
140
|
+
expr_args = {k: exprs.Expr.from_object(v) for k, v in bound_args.items()}
|
|
141
|
+
sig.validate_args(expr_args, context=f'in iterator {iterator_cls.__name__!r}')
|
|
142
|
+
literal_args = {k: v.val if isinstance(v, exprs.Literal) else v for k, v in expr_args.items()}
|
|
109
143
|
|
|
110
144
|
# prepend pos and output_schema columns to cols:
|
|
111
145
|
# a component view exposes the pos column of its rowid;
|
|
112
146
|
# we create that column here, so it gets assigned a column id;
|
|
113
147
|
# stored=False: it is not stored separately (it's already stored as part of the rowid)
|
|
114
148
|
iterator_cols = [Column(_POS_COLUMN_NAME, ts.IntType(), stored=False)]
|
|
115
|
-
output_dict, unstored_cols = iterator_cls.output_schema(**
|
|
149
|
+
output_dict, unstored_cols = iterator_cls.output_schema(**literal_args)
|
|
116
150
|
iterator_cols.extend(
|
|
117
151
|
[
|
|
118
152
|
Column(col_name, col_type, stored=col_name not in unstored_cols)
|
|
@@ -153,6 +187,7 @@ class View(Table):
|
|
|
153
187
|
|
|
154
188
|
view_md = md_schema.ViewMd(
|
|
155
189
|
is_snapshot=is_snapshot,
|
|
190
|
+
include_base_columns=include_base_columns,
|
|
156
191
|
predicate=predicate.as_dict() if predicate is not None else None,
|
|
157
192
|
base_versions=base_versions,
|
|
158
193
|
iterator_class_fqn=iterator_class_fqn,
|
|
@@ -172,7 +207,15 @@ class View(Table):
|
|
|
172
207
|
)
|
|
173
208
|
if tbl_version is None:
|
|
174
209
|
# this is purely a snapshot: we use the base's tbl version path
|
|
175
|
-
view = cls(
|
|
210
|
+
view = cls(
|
|
211
|
+
id,
|
|
212
|
+
dir_id,
|
|
213
|
+
name,
|
|
214
|
+
base_version_path,
|
|
215
|
+
base.tbl_id(),
|
|
216
|
+
snapshot_only=True,
|
|
217
|
+
include_base_columns=include_base_columns,
|
|
218
|
+
)
|
|
176
219
|
_logger.info(f'created snapshot {name}')
|
|
177
220
|
else:
|
|
178
221
|
view = cls(
|
|
@@ -182,6 +225,7 @@ class View(Table):
|
|
|
182
225
|
TableVersionPath(tbl_version, base=base_version_path),
|
|
183
226
|
base.tbl_id(),
|
|
184
227
|
snapshot_only=False,
|
|
228
|
+
include_base_columns=include_base_columns,
|
|
185
229
|
)
|
|
186
230
|
_logger.info(f'Created view `{name}`, id={tbl_version.id}')
|
|
187
231
|
|
pixeltable/dataframe.py
CHANGED
|
@@ -293,7 +293,7 @@ class DataFrame:
|
|
|
293
293
|
group_by_clause = self.group_by_clause
|
|
294
294
|
|
|
295
295
|
for item in self._select_list_exprs:
|
|
296
|
-
item.bind_rel_paths(
|
|
296
|
+
item.bind_rel_paths()
|
|
297
297
|
|
|
298
298
|
return plan.Planner.create_query_plan(
|
|
299
299
|
self._from_clause,
|
|
@@ -951,7 +951,7 @@ class DataFrame:
|
|
|
951
951
|
|
|
952
952
|
>>> df = person.where(t.year == 2014).update({'age': 30})
|
|
953
953
|
"""
|
|
954
|
-
self._validate_mutable('update')
|
|
954
|
+
self._validate_mutable('update', False)
|
|
955
955
|
return self._first_tbl.tbl_version.update(value_spec, where=self.where_clause, cascade=cascade)
|
|
956
956
|
|
|
957
957
|
def delete(self) -> UpdateStatus:
|
|
@@ -971,18 +971,23 @@ class DataFrame:
|
|
|
971
971
|
|
|
972
972
|
>>> df = person.where(t.age < 18).delete()
|
|
973
973
|
"""
|
|
974
|
-
self._validate_mutable('delete')
|
|
974
|
+
self._validate_mutable('delete', False)
|
|
975
975
|
if not self._first_tbl.is_insertable():
|
|
976
976
|
raise excs.Error(f'Cannot delete from view')
|
|
977
977
|
return self._first_tbl.tbl_version.delete(where=self.where_clause)
|
|
978
978
|
|
|
979
|
-
def _validate_mutable(self, op_name: str) -> None:
|
|
980
|
-
"""Tests whether this DataFrame can be mutated (such as by an update operation).
|
|
979
|
+
def _validate_mutable(self, op_name: str, allow_select: bool) -> None:
|
|
980
|
+
"""Tests whether this DataFrame can be mutated (such as by an update operation).
|
|
981
|
+
|
|
982
|
+
Args:
|
|
983
|
+
op_name: The name of the operation for which the test is being performed.
|
|
984
|
+
allow_select: If True, allow a select() specification in the Dataframe.
|
|
985
|
+
"""
|
|
981
986
|
if self.group_by_clause is not None or self.grouping_tbl is not None:
|
|
982
987
|
raise excs.Error(f'Cannot use `{op_name}` after `group_by`')
|
|
983
988
|
if self.order_by_clause is not None:
|
|
984
989
|
raise excs.Error(f'Cannot use `{op_name}` after `order_by`')
|
|
985
|
-
if self.select_list is not None:
|
|
990
|
+
if self.select_list is not None and not allow_select:
|
|
986
991
|
raise excs.Error(f'Cannot use `{op_name}` after `select`')
|
|
987
992
|
if self.limit_val is not None:
|
|
988
993
|
raise excs.Error(f'Cannot use `{op_name}` after `limit`')
|
pixeltable/env.py
CHANGED
|
@@ -76,6 +76,8 @@ class Env:
|
|
|
76
76
|
_module_log_level: dict[str, int] # module name -> log level
|
|
77
77
|
_config_file: Optional[Path]
|
|
78
78
|
_config: Optional[Config]
|
|
79
|
+
_file_cache_size_g: float
|
|
80
|
+
_pxt_api_key: Optional[str]
|
|
79
81
|
_stdout_handler: logging.StreamHandler
|
|
80
82
|
_initialized: bool
|
|
81
83
|
|
|
@@ -289,6 +291,7 @@ class Env:
|
|
|
289
291
|
f'(either add a `file_cache_size_g` entry to the `pixeltable` section of {self._config_file},\n'
|
|
290
292
|
'or set the PIXELTABLE_FILE_CACHE_SIZE_G environment variable)'
|
|
291
293
|
)
|
|
294
|
+
self._pxt_api_key = self._config.get_string_value('api_key')
|
|
292
295
|
|
|
293
296
|
# Disable spurious warnings
|
|
294
297
|
warnings.simplefilter('ignore', category=TqdmWarning)
|
|
@@ -459,6 +462,15 @@ class Env:
|
|
|
459
462
|
def _upgrade_metadata(self) -> None:
|
|
460
463
|
metadata.upgrade_md(self._sa_engine)
|
|
461
464
|
|
|
465
|
+
@property
|
|
466
|
+
def pxt_api_key(self) -> str:
|
|
467
|
+
if self._pxt_api_key is None:
|
|
468
|
+
raise excs.Error(
|
|
469
|
+
'No API key is configured. Set the PIXELTABLE_API_KEY environment variable, or add an entry to '
|
|
470
|
+
f'config.toml as described here:\nhttps://pixeltable.github.io/pixeltable/config/'
|
|
471
|
+
)
|
|
472
|
+
return self._pxt_api_key
|
|
473
|
+
|
|
462
474
|
def get_client(self, name: str) -> Any:
|
|
463
475
|
"""
|
|
464
476
|
Gets the client with the specified name, initializing it if necessary.
|
|
@@ -160,8 +160,10 @@ class FnCallEvaluator(Evaluator):
|
|
|
160
160
|
|
|
161
161
|
def _create_batch_call_args(self, call_args: list[FnCallArgs]) -> FnCallArgs:
|
|
162
162
|
"""Roll call_args into a single batched FnCallArgs"""
|
|
163
|
-
batch_args: list[list[Optional[Any]]] = [[None] * len(call_args) for _ in range(len(self.fn_call.
|
|
164
|
-
batch_kwargs: dict[str, list[Optional[Any]]] = {
|
|
163
|
+
batch_args: list[list[Optional[Any]]] = [[None] * len(call_args) for _ in range(len(self.fn_call.arg_idxs))]
|
|
164
|
+
batch_kwargs: dict[str, list[Optional[Any]]] = {
|
|
165
|
+
k: [None] * len(call_args) for k in self.fn_call.kwarg_idxs.keys()
|
|
166
|
+
}
|
|
165
167
|
assert isinstance(self.fn, func.CallableFunction)
|
|
166
168
|
for i, item in enumerate(call_args):
|
|
167
169
|
for j in range(len(item.args)):
|
|
@@ -308,7 +308,10 @@ class ExprEvalNode(ExecNode):
|
|
|
308
308
|
if self.exc_event.is_set():
|
|
309
309
|
# we got an exception that we need to propagate through __iter__()
|
|
310
310
|
_logger.debug(f'Propagating exception {self.error}')
|
|
311
|
-
|
|
311
|
+
if isinstance(self.error, excs.ExprEvalError):
|
|
312
|
+
raise self.error from self.error.exc
|
|
313
|
+
else:
|
|
314
|
+
raise self.error
|
|
312
315
|
if completed_aw in done:
|
|
313
316
|
self._log_state('completed_aw done')
|
|
314
317
|
completed_aw = None
|
pixeltable/exprs/comparison.py
CHANGED
|
@@ -1,11 +1,10 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
from typing import Any, Optional
|
|
3
|
+
from typing import TYPE_CHECKING, Any, Optional
|
|
4
4
|
|
|
5
5
|
import sqlalchemy as sql
|
|
6
6
|
|
|
7
7
|
import pixeltable.exceptions as excs
|
|
8
|
-
import pixeltable.index as index
|
|
9
8
|
import pixeltable.type_system as ts
|
|
10
9
|
|
|
11
10
|
from .column_ref import ColumnRef
|
|
@@ -16,12 +15,17 @@ from .literal import Literal
|
|
|
16
15
|
from .row_builder import RowBuilder
|
|
17
16
|
from .sql_element_cache import SqlElementCache
|
|
18
17
|
|
|
18
|
+
if TYPE_CHECKING:
|
|
19
|
+
from pixeltable import index
|
|
20
|
+
|
|
19
21
|
|
|
20
22
|
class Comparison(Expr):
|
|
21
23
|
is_search_arg_comparison: bool
|
|
22
24
|
operator: ComparisonOperator
|
|
23
25
|
|
|
24
26
|
def __init__(self, operator: ComparisonOperator, op1: Expr, op2: Expr):
|
|
27
|
+
from pixeltable import index
|
|
28
|
+
|
|
25
29
|
super().__init__(ts.BoolType())
|
|
26
30
|
self.operator = operator
|
|
27
31
|
|
|
@@ -38,8 +42,6 @@ class Comparison(Expr):
|
|
|
38
42
|
self.is_search_arg_comparison = False
|
|
39
43
|
self.components = [op1, op2]
|
|
40
44
|
|
|
41
|
-
import pixeltable.index as index
|
|
42
|
-
|
|
43
45
|
if (
|
|
44
46
|
self.is_search_arg_comparison
|
|
45
47
|
and self._op2.col_type.is_string_type()
|
|
@@ -71,6 +73,8 @@ class Comparison(Expr):
|
|
|
71
73
|
return self.components[1]
|
|
72
74
|
|
|
73
75
|
def sql_expr(self, sql_elements: SqlElementCache) -> Optional[sql.ColumnElement]:
|
|
76
|
+
from pixeltable import index
|
|
77
|
+
|
|
74
78
|
if str(self._op1.col_type.to_sa_type()) != str(self._op2.col_type.to_sa_type()):
|
|
75
79
|
# Comparing columns of different SQL types (e.g., string vs. json); this can only be done in Python
|
|
76
80
|
# TODO(aaron-siegel): We may be able to handle some cases in SQL by casting one side to the other's type
|
pixeltable/exprs/data_row.py
CHANGED
|
@@ -4,6 +4,7 @@ import datetime
|
|
|
4
4
|
import io
|
|
5
5
|
import urllib.parse
|
|
6
6
|
import urllib.request
|
|
7
|
+
from pathlib import Path
|
|
7
8
|
from typing import Any, Optional
|
|
8
9
|
|
|
9
10
|
import numpy as np
|
|
@@ -141,13 +142,13 @@ class DataRow:
|
|
|
141
142
|
self.file_paths[slot_idx] = None
|
|
142
143
|
self.file_urls[slot_idx] = None
|
|
143
144
|
|
|
144
|
-
def __getitem__(self, index:
|
|
145
|
+
def __getitem__(self, index: int) -> Any:
|
|
145
146
|
"""Returns in-memory value, ie, what is needed for expr evaluation"""
|
|
146
147
|
assert isinstance(index, int)
|
|
147
148
|
if not self.has_val[index]:
|
|
148
|
-
#
|
|
149
|
-
|
|
150
|
-
|
|
149
|
+
# This is a sufficiently cheap and sensitive validation that it makes sense to keep the assertion around
|
|
150
|
+
# even if python is running with -O.
|
|
151
|
+
raise AssertionError(index)
|
|
151
152
|
|
|
152
153
|
if self.file_urls[index] is not None and index in self.img_slot_idxs:
|
|
153
154
|
# if we need to load this from a file, it should have been materialized locally
|
|
@@ -206,9 +207,10 @@ class DataRow:
|
|
|
206
207
|
# local file path
|
|
207
208
|
assert self.file_urls[idx] is None and self.file_paths[idx] is None
|
|
208
209
|
if len(parsed.scheme) <= 1:
|
|
209
|
-
|
|
210
|
-
self.
|
|
211
|
-
|
|
210
|
+
path = str(Path(val).absolute()) # Ensure we're using an absolute pathname.
|
|
211
|
+
self.file_urls[idx] = urllib.parse.urljoin('file:', urllib.request.pathname2url(path))
|
|
212
|
+
self.file_paths[idx] = path
|
|
213
|
+
else: # file:// URL
|
|
212
214
|
self.file_urls[idx] = val
|
|
213
215
|
# Wrap the path in a url2pathname() call to ensure proper handling on Windows.
|
|
214
216
|
self.file_paths[idx] = urllib.parse.unquote(urllib.request.url2pathname(parsed.path))
|
pixeltable/exprs/expr.py
CHANGED
|
@@ -583,7 +583,7 @@ class Expr(abc.ABC):
|
|
|
583
583
|
|
|
584
584
|
def __bool__(self) -> bool:
|
|
585
585
|
raise TypeError(
|
|
586
|
-
'Pixeltable expressions cannot be used in conjunction with Python boolean operators (and/or/not)'
|
|
586
|
+
f'Pixeltable expressions cannot be used in conjunction with Python boolean operators (and/or/not)\n{self!r}'
|
|
587
587
|
)
|
|
588
588
|
|
|
589
589
|
def __lt__(self, other: object) -> 'exprs.Comparison':
|
|
@@ -784,7 +784,7 @@ class Expr(abc.ABC):
|
|
|
784
784
|
if (
|
|
785
785
|
len(params) >= 2
|
|
786
786
|
and second_param.kind not in (inspect.Parameter.VAR_POSITIONAL, inspect.Parameter.VAR_KEYWORD)
|
|
787
|
-
and second_param.default
|
|
787
|
+
and second_param.default is inspect.Parameter.empty
|
|
788
788
|
):
|
|
789
789
|
raise excs.Error(f'Function `{fn.__name__}` has multiple required parameters.')
|
|
790
790
|
except ValueError:
|