pixeltable 0.3.3__py3-none-any.whl → 0.3.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +1 -0
- pixeltable/__version__.py +2 -2
- pixeltable/catalog/catalog.py +9 -2
- pixeltable/catalog/column.py +1 -1
- pixeltable/catalog/dir.py +1 -1
- pixeltable/catalog/table.py +1 -1
- pixeltable/catalog/table_version.py +12 -2
- pixeltable/catalog/table_version_path.py +2 -2
- pixeltable/catalog/view.py +64 -20
- pixeltable/dataframe.py +14 -14
- pixeltable/env.py +20 -3
- pixeltable/exec/component_iteration_node.py +1 -2
- pixeltable/exec/expr_eval/evaluators.py +4 -2
- pixeltable/exec/expr_eval/expr_eval_node.py +4 -1
- pixeltable/exprs/comparison.py +8 -4
- pixeltable/exprs/data_row.py +5 -3
- pixeltable/exprs/expr.py +9 -2
- pixeltable/exprs/function_call.py +155 -313
- pixeltable/func/aggregate_function.py +29 -15
- pixeltable/func/callable_function.py +11 -8
- pixeltable/func/expr_template_function.py +3 -9
- pixeltable/func/function.py +148 -74
- pixeltable/func/signature.py +65 -30
- pixeltable/func/udf.py +1 -1
- pixeltable/functions/__init__.py +1 -0
- pixeltable/functions/deepseek.py +121 -0
- pixeltable/functions/image.py +7 -7
- pixeltable/functions/openai.py +49 -10
- pixeltable/functions/video.py +14 -7
- pixeltable/globals.py +14 -3
- pixeltable/index/embedding_index.py +4 -13
- pixeltable/io/globals.py +88 -77
- pixeltable/io/hf_datasets.py +34 -34
- pixeltable/io/pandas.py +75 -87
- pixeltable/io/parquet.py +19 -27
- pixeltable/io/utils.py +115 -0
- pixeltable/iterators/audio.py +2 -1
- pixeltable/iterators/video.py +1 -1
- pixeltable/metadata/__init__.py +2 -1
- pixeltable/metadata/converters/convert_15.py +18 -8
- pixeltable/metadata/converters/convert_27.py +31 -0
- pixeltable/metadata/converters/convert_28.py +15 -0
- pixeltable/metadata/converters/convert_29.py +111 -0
- pixeltable/metadata/converters/util.py +12 -1
- pixeltable/metadata/notes.py +3 -0
- pixeltable/metadata/schema.py +8 -0
- pixeltable/share/__init__.py +1 -0
- pixeltable/share/packager.py +246 -0
- pixeltable/share/publish.py +97 -0
- pixeltable/type_system.py +87 -42
- pixeltable/utils/__init__.py +41 -0
- pixeltable/utils/arrow.py +45 -12
- pixeltable/utils/formatter.py +1 -1
- pixeltable/utils/iceberg.py +14 -0
- pixeltable/utils/media_store.py +1 -1
- {pixeltable-0.3.3.dist-info → pixeltable-0.3.5.dist-info}/METADATA +37 -50
- {pixeltable-0.3.3.dist-info → pixeltable-0.3.5.dist-info}/RECORD +60 -51
- {pixeltable-0.3.3.dist-info → pixeltable-0.3.5.dist-info}/WHEEL +1 -1
- {pixeltable-0.3.3.dist-info → pixeltable-0.3.5.dist-info}/LICENSE +0 -0
- {pixeltable-0.3.3.dist-info → pixeltable-0.3.5.dist-info}/entry_points.txt +0 -0
pixeltable/__init__.py
CHANGED
pixeltable/__version__.py
CHANGED
|
@@ -1,3 +1,3 @@
|
|
|
1
1
|
# These version placeholders will be replaced during build.
|
|
2
|
-
__version__ = '0.3.
|
|
3
|
-
__version_tuple__ = (0, 3,
|
|
2
|
+
__version__ = '0.3.5'
|
|
3
|
+
__version_tuple__ = (0, 3, 5)
|
pixeltable/catalog/catalog.py
CHANGED
|
@@ -58,7 +58,7 @@ class Catalog:
|
|
|
58
58
|
if session.query(sql.func.count(schema.Dir.id)).scalar() > 0:
|
|
59
59
|
return
|
|
60
60
|
# create a top-level directory, so that every schema object has a directory
|
|
61
|
-
dir_md = schema.DirMd(name='')
|
|
61
|
+
dir_md = schema.DirMd(name='', user=None, additional_md={})
|
|
62
62
|
dir_record = schema.Dir(parent_id=None, md=dataclasses.asdict(dir_md))
|
|
63
63
|
session.add(dir_record)
|
|
64
64
|
session.flush()
|
|
@@ -142,6 +142,7 @@ class Catalog:
|
|
|
142
142
|
base_tbl_id = base_path.tbl_id()
|
|
143
143
|
is_snapshot = view_md is not None and view_md.is_snapshot
|
|
144
144
|
snapshot_only = is_snapshot and view_md.predicate is None and len(schema_version_md.columns) == 0
|
|
145
|
+
include_base_columns = view_md is None or view_md.include_base_columns
|
|
145
146
|
if snapshot_only:
|
|
146
147
|
# this is a pure snapshot, without a physical table backing it
|
|
147
148
|
view_path = base_path
|
|
@@ -158,7 +159,13 @@ class Catalog:
|
|
|
158
159
|
view_path = TableVersionPath(tbl_version, base=base_path)
|
|
159
160
|
|
|
160
161
|
tbl: Table = View(
|
|
161
|
-
tbl_record.id,
|
|
162
|
+
tbl_record.id,
|
|
163
|
+
tbl_record.dir_id,
|
|
164
|
+
tbl_md.name,
|
|
165
|
+
view_path,
|
|
166
|
+
base_tbl_id,
|
|
167
|
+
snapshot_only=snapshot_only,
|
|
168
|
+
include_base_columns=include_base_columns,
|
|
162
169
|
)
|
|
163
170
|
self.tbl_dependents[base_tbl_id].append(tbl)
|
|
164
171
|
|
pixeltable/catalog/column.py
CHANGED
|
@@ -72,7 +72,7 @@ class Column:
|
|
|
72
72
|
col_type is None
|
|
73
73
|
- when loaded from md store: ``computed_with`` is set and col_type is set
|
|
74
74
|
|
|
75
|
-
``stored`` (only valid for computed
|
|
75
|
+
``stored`` (only valid for computed columns):
|
|
76
76
|
- if True: the column is present in the stored table
|
|
77
77
|
- if False: the column is not present in the stored table and recomputed during a query
|
|
78
78
|
- if None: the system chooses for you (at present, this is always False, but this may change in the future)
|
pixeltable/catalog/dir.py
CHANGED
|
@@ -32,7 +32,7 @@ class Dir(SchemaObject):
|
|
|
32
32
|
def _move(self, new_name: str, new_dir_id: UUID) -> None:
|
|
33
33
|
super()._move(new_name, new_dir_id)
|
|
34
34
|
with Env.get().engine.begin() as conn:
|
|
35
|
-
dir_md = schema.DirMd(name=new_name)
|
|
35
|
+
dir_md = schema.DirMd(name=new_name, user=None, additional_md={})
|
|
36
36
|
conn.execute(
|
|
37
37
|
sql.update(schema.Dir.__table__)
|
|
38
38
|
.values({schema.Dir.parent_id: self._dir_id, schema.Dir.md: dataclasses.asdict(dir_md)})
|
pixeltable/catalog/table.py
CHANGED
|
@@ -569,7 +569,7 @@ class Table(SchemaObject):
|
|
|
569
569
|
|
|
570
570
|
Args:
|
|
571
571
|
kwargs: Exactly one keyword argument of the form `col_name=expression`.
|
|
572
|
-
stored: Whether the column is materialized and stored or computed on demand.
|
|
572
|
+
stored: Whether the column is materialized and stored or computed on demand.
|
|
573
573
|
print_stats: If `True`, print execution metrics during evaluation.
|
|
574
574
|
on_error: Determines the behavior if an error occurs while evaluating the column expression for at least one
|
|
575
575
|
row.
|
|
@@ -59,6 +59,7 @@ class TableVersion:
|
|
|
59
59
|
schema_version: int
|
|
60
60
|
view_md: Optional[schema.ViewMd]
|
|
61
61
|
is_snapshot: bool
|
|
62
|
+
include_base_columns: bool
|
|
62
63
|
effective_version: Optional[int]
|
|
63
64
|
path: Optional[pxt.catalog.TableVersionPath]
|
|
64
65
|
base: Optional[TableVersion]
|
|
@@ -115,6 +116,7 @@ class TableVersion:
|
|
|
115
116
|
self.view_md = tbl_md.view_md # save this as-is, it's needed for _create_md()
|
|
116
117
|
is_view = tbl_md.view_md is not None
|
|
117
118
|
self.is_snapshot = (is_view and tbl_md.view_md.is_snapshot) or bool(is_snapshot)
|
|
119
|
+
self.include_base_columns = not is_view or tbl_md.view_md.include_base_columns
|
|
118
120
|
self.media_validation = MediaValidation[schema_version_md.media_validation.upper()]
|
|
119
121
|
# a mutable TableVersion doesn't have a static version
|
|
120
122
|
self.effective_version = self.version if self.is_snapshot else None
|
|
@@ -228,6 +230,7 @@ class TableVersion:
|
|
|
228
230
|
column_md = cls._create_column_md(cols)
|
|
229
231
|
table_md = schema.TableMd(
|
|
230
232
|
name=name,
|
|
233
|
+
user=None,
|
|
231
234
|
current_version=0,
|
|
232
235
|
current_schema_version=0,
|
|
233
236
|
next_col_id=len(cols),
|
|
@@ -237,6 +240,7 @@ class TableVersion:
|
|
|
237
240
|
index_md={},
|
|
238
241
|
external_stores=[],
|
|
239
242
|
view_md=view_md,
|
|
243
|
+
additional_md={},
|
|
240
244
|
)
|
|
241
245
|
# create a schema.Table here, we need it to call our c'tor;
|
|
242
246
|
# don't add it to the session yet, we might add index metadata
|
|
@@ -244,7 +248,7 @@ class TableVersion:
|
|
|
244
248
|
tbl_record = schema.Table(id=tbl_id, dir_id=dir_id, md=dataclasses.asdict(table_md))
|
|
245
249
|
|
|
246
250
|
# create schema.TableVersion
|
|
247
|
-
table_version_md = schema.TableVersionMd(created_at=timestamp, version=0, schema_version=0)
|
|
251
|
+
table_version_md = schema.TableVersionMd(created_at=timestamp, version=0, schema_version=0, additional_md={})
|
|
248
252
|
tbl_version_record = schema.TableVersion(
|
|
249
253
|
tbl_id=tbl_record.id, version=0, md=dataclasses.asdict(table_version_md)
|
|
250
254
|
)
|
|
@@ -266,6 +270,7 @@ class TableVersion:
|
|
|
266
270
|
num_retained_versions=num_retained_versions,
|
|
267
271
|
comment=comment,
|
|
268
272
|
media_validation=media_validation.name.lower(),
|
|
273
|
+
additional_md={},
|
|
269
274
|
)
|
|
270
275
|
schema_version_record = schema.TableSchemaVersion(
|
|
271
276
|
tbl_id=tbl_record.id, schema_version=0, md=dataclasses.asdict(schema_version_md)
|
|
@@ -1342,6 +1347,7 @@ class TableVersion:
|
|
|
1342
1347
|
def _create_tbl_md(self) -> schema.TableMd:
|
|
1343
1348
|
return schema.TableMd(
|
|
1344
1349
|
name=self.name,
|
|
1350
|
+
user=None,
|
|
1345
1351
|
current_version=self.version,
|
|
1346
1352
|
current_schema_version=self.schema_version,
|
|
1347
1353
|
next_col_id=self.next_col_id,
|
|
@@ -1351,10 +1357,13 @@ class TableVersion:
|
|
|
1351
1357
|
index_md=self.idx_md,
|
|
1352
1358
|
external_stores=self._create_stores_md(self.external_stores.values()),
|
|
1353
1359
|
view_md=self.view_md,
|
|
1360
|
+
additional_md={},
|
|
1354
1361
|
)
|
|
1355
1362
|
|
|
1356
1363
|
def _create_version_md(self, timestamp: float) -> schema.TableVersionMd:
|
|
1357
|
-
return schema.TableVersionMd(
|
|
1364
|
+
return schema.TableVersionMd(
|
|
1365
|
+
created_at=timestamp, version=self.version, schema_version=self.schema_version, additional_md={}
|
|
1366
|
+
)
|
|
1358
1367
|
|
|
1359
1368
|
def _create_schema_version_md(self, preceding_schema_version: int) -> schema.TableSchemaVersionMd:
|
|
1360
1369
|
column_md: dict[int, schema.SchemaColumn] = {}
|
|
@@ -1372,6 +1381,7 @@ class TableVersion:
|
|
|
1372
1381
|
num_retained_versions=self.num_retained_versions,
|
|
1373
1382
|
comment=self.comment,
|
|
1374
1383
|
media_validation=self.media_validation.name.lower(),
|
|
1384
|
+
additional_md={},
|
|
1375
1385
|
)
|
|
1376
1386
|
|
|
1377
1387
|
def as_dict(self) -> dict:
|
|
@@ -86,7 +86,7 @@ class TableVersionPath:
|
|
|
86
86
|
from pixeltable.exprs import ColumnRef
|
|
87
87
|
|
|
88
88
|
if col_name not in self.tbl_version.cols_by_name:
|
|
89
|
-
if self.base is None:
|
|
89
|
+
if self.base is None or not self.tbl_version.include_base_columns:
|
|
90
90
|
raise AttributeError(f'Column {col_name} unknown')
|
|
91
91
|
return self.base.get_column_ref(col_name)
|
|
92
92
|
col = self.tbl_version.cols_by_name[col_name]
|
|
@@ -95,7 +95,7 @@ class TableVersionPath:
|
|
|
95
95
|
def columns(self) -> list[Column]:
|
|
96
96
|
"""Return all user columns visible in this tbl version path, including columns from bases"""
|
|
97
97
|
result = list(self.tbl_version.cols_by_name.values())
|
|
98
|
-
if self.base is not None:
|
|
98
|
+
if self.base is not None and self.tbl_version.include_base_columns:
|
|
99
99
|
base_cols = self.base.columns()
|
|
100
100
|
# we only include base columns that don't conflict with one of our column names
|
|
101
101
|
result.extend(c for c in base_cols if c.name not in self.tbl_version.cols_by_name)
|
pixeltable/catalog/view.py
CHANGED
|
@@ -2,7 +2,7 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
import inspect
|
|
4
4
|
import logging
|
|
5
|
-
from typing import TYPE_CHECKING, Any, Iterable, Literal, Optional
|
|
5
|
+
from typing import TYPE_CHECKING, Any, Iterable, List, Literal, Optional
|
|
6
6
|
from uuid import UUID
|
|
7
7
|
|
|
8
8
|
import sqlalchemy.orm as orm
|
|
@@ -37,7 +37,14 @@ class View(Table):
|
|
|
37
37
|
"""
|
|
38
38
|
|
|
39
39
|
def __init__(
|
|
40
|
-
self,
|
|
40
|
+
self,
|
|
41
|
+
id: UUID,
|
|
42
|
+
dir_id: UUID,
|
|
43
|
+
name: str,
|
|
44
|
+
tbl_version_path: TableVersionPath,
|
|
45
|
+
base_id: UUID,
|
|
46
|
+
snapshot_only: bool,
|
|
47
|
+
include_base_columns: bool,
|
|
41
48
|
):
|
|
42
49
|
super().__init__(id, dir_id, name, tbl_version_path)
|
|
43
50
|
assert base_id in catalog.Catalog.get().tbl_dependents
|
|
@@ -48,12 +55,28 @@ class View(Table):
|
|
|
48
55
|
def _display_name(cls) -> str:
|
|
49
56
|
return 'view'
|
|
50
57
|
|
|
58
|
+
@classmethod
|
|
59
|
+
def select_list_to_additional_columns(cls, select_list: list[tuple[exprs.Expr, Optional[str]]]) -> dict[str, dict]:
|
|
60
|
+
"""Returns a list of columns in the same format as the additional_columns parameter of View.create.
|
|
61
|
+
The source is the list of expressions from a select() statement on a DataFrame.
|
|
62
|
+
If the column is a ColumnRef, to a base table column, it is marked to not be stored.sy
|
|
63
|
+
"""
|
|
64
|
+
from pixeltable.dataframe import DataFrame
|
|
65
|
+
|
|
66
|
+
r: dict[str, dict] = {}
|
|
67
|
+
exps, names = DataFrame._normalize_select_list([], select_list)
|
|
68
|
+
for expr, name in zip(exps, names):
|
|
69
|
+
stored = not isinstance(expr, exprs.ColumnRef)
|
|
70
|
+
r[name] = {'value': expr, 'stored': stored}
|
|
71
|
+
return r
|
|
72
|
+
|
|
51
73
|
@classmethod
|
|
52
74
|
def _create(
|
|
53
75
|
cls,
|
|
54
76
|
dir_id: UUID,
|
|
55
77
|
name: str,
|
|
56
78
|
base: TableVersionPath,
|
|
79
|
+
select_list: Optional[list[tuple[exprs.Expr, Optional[str]]]],
|
|
57
80
|
additional_columns: dict[str, Any],
|
|
58
81
|
predicate: Optional['pxt.exprs.Expr'],
|
|
59
82
|
is_snapshot: bool,
|
|
@@ -63,7 +86,15 @@ class View(Table):
|
|
|
63
86
|
iterator_cls: Optional[type[ComponentIterator]],
|
|
64
87
|
iterator_args: Optional[dict],
|
|
65
88
|
) -> View:
|
|
66
|
-
|
|
89
|
+
# Convert select_list to more additional_columns if present
|
|
90
|
+
include_base_columns: bool = select_list is None
|
|
91
|
+
select_list_columns: List[Column] = []
|
|
92
|
+
if not include_base_columns:
|
|
93
|
+
r = cls.select_list_to_additional_columns(select_list)
|
|
94
|
+
select_list_columns = cls._create_columns(r)
|
|
95
|
+
|
|
96
|
+
columns_from_additional_columns = cls._create_columns(additional_columns)
|
|
97
|
+
columns = select_list_columns + columns_from_additional_columns
|
|
67
98
|
cls._verify_schema(columns)
|
|
68
99
|
|
|
69
100
|
# verify that filter can be evaluated in the context of the base
|
|
@@ -88,31 +119,34 @@ class View(Table):
|
|
|
88
119
|
|
|
89
120
|
# validate iterator_args
|
|
90
121
|
py_signature = inspect.signature(iterator_cls.__init__)
|
|
122
|
+
|
|
123
|
+
# make sure iterator_args can be used to instantiate iterator_cls
|
|
124
|
+
bound_args: dict[str, Any]
|
|
91
125
|
try:
|
|
92
|
-
# make sure iterator_args can be used to instantiate iterator_cls
|
|
93
126
|
bound_args = py_signature.bind(None, **iterator_args).arguments # None: arg for self
|
|
94
|
-
# we ignore 'self'
|
|
95
|
-
first_param_name = next(iter(py_signature.parameters)) # can't guarantee it's actually 'self'
|
|
96
|
-
del bound_args[first_param_name]
|
|
97
|
-
|
|
98
|
-
# construct Signature and type-check bound_args
|
|
99
|
-
params = [
|
|
100
|
-
func.Parameter(param_name, param_type, kind=inspect.Parameter.POSITIONAL_OR_KEYWORD)
|
|
101
|
-
for param_name, param_type in iterator_cls.input_schema().items()
|
|
102
|
-
]
|
|
103
|
-
sig = func.Signature(ts.InvalidType(), params)
|
|
104
|
-
from pixeltable.exprs import FunctionCall
|
|
105
|
-
|
|
106
|
-
FunctionCall.normalize_args(iterator_cls.__name__, sig, bound_args)
|
|
107
127
|
except TypeError as e:
|
|
108
|
-
raise excs.Error(f'
|
|
128
|
+
raise excs.Error(f'Invalid iterator arguments: {e}')
|
|
129
|
+
# we ignore 'self'
|
|
130
|
+
first_param_name = next(iter(py_signature.parameters)) # can't guarantee it's actually 'self'
|
|
131
|
+
del bound_args[first_param_name]
|
|
132
|
+
|
|
133
|
+
# construct Signature and type-check bound_args
|
|
134
|
+
params = [
|
|
135
|
+
func.Parameter(param_name, param_type, kind=inspect.Parameter.POSITIONAL_OR_KEYWORD)
|
|
136
|
+
for param_name, param_type in iterator_cls.input_schema().items()
|
|
137
|
+
]
|
|
138
|
+
sig = func.Signature(ts.InvalidType(), params)
|
|
139
|
+
|
|
140
|
+
expr_args = {k: exprs.Expr.from_object(v) for k, v in bound_args.items()}
|
|
141
|
+
sig.validate_args(expr_args, context=f'in iterator {iterator_cls.__name__!r}')
|
|
142
|
+
literal_args = {k: v.val if isinstance(v, exprs.Literal) else v for k, v in expr_args.items()}
|
|
109
143
|
|
|
110
144
|
# prepend pos and output_schema columns to cols:
|
|
111
145
|
# a component view exposes the pos column of its rowid;
|
|
112
146
|
# we create that column here, so it gets assigned a column id;
|
|
113
147
|
# stored=False: it is not stored separately (it's already stored as part of the rowid)
|
|
114
148
|
iterator_cols = [Column(_POS_COLUMN_NAME, ts.IntType(), stored=False)]
|
|
115
|
-
output_dict, unstored_cols = iterator_cls.output_schema(**
|
|
149
|
+
output_dict, unstored_cols = iterator_cls.output_schema(**literal_args)
|
|
116
150
|
iterator_cols.extend(
|
|
117
151
|
[
|
|
118
152
|
Column(col_name, col_type, stored=col_name not in unstored_cols)
|
|
@@ -153,6 +187,7 @@ class View(Table):
|
|
|
153
187
|
|
|
154
188
|
view_md = md_schema.ViewMd(
|
|
155
189
|
is_snapshot=is_snapshot,
|
|
190
|
+
include_base_columns=include_base_columns,
|
|
156
191
|
predicate=predicate.as_dict() if predicate is not None else None,
|
|
157
192
|
base_versions=base_versions,
|
|
158
193
|
iterator_class_fqn=iterator_class_fqn,
|
|
@@ -172,7 +207,15 @@ class View(Table):
|
|
|
172
207
|
)
|
|
173
208
|
if tbl_version is None:
|
|
174
209
|
# this is purely a snapshot: we use the base's tbl version path
|
|
175
|
-
view = cls(
|
|
210
|
+
view = cls(
|
|
211
|
+
id,
|
|
212
|
+
dir_id,
|
|
213
|
+
name,
|
|
214
|
+
base_version_path,
|
|
215
|
+
base.tbl_id(),
|
|
216
|
+
snapshot_only=True,
|
|
217
|
+
include_base_columns=include_base_columns,
|
|
218
|
+
)
|
|
176
219
|
_logger.info(f'created snapshot {name}')
|
|
177
220
|
else:
|
|
178
221
|
view = cls(
|
|
@@ -182,6 +225,7 @@ class View(Table):
|
|
|
182
225
|
TableVersionPath(tbl_version, base=base_version_path),
|
|
183
226
|
base.tbl_id(),
|
|
184
227
|
snapshot_only=False,
|
|
228
|
+
include_base_columns=include_base_columns,
|
|
185
229
|
)
|
|
186
230
|
_logger.info(f'Created view `{name}`, id={tbl_version.id}')
|
|
187
231
|
|
pixeltable/dataframe.py
CHANGED
|
@@ -578,15 +578,9 @@ class DataFrame:
|
|
|
578
578
|
# analyze select list; wrap literals with the corresponding expressions
|
|
579
579
|
select_list: list[tuple[exprs.Expr, Optional[str]]] = []
|
|
580
580
|
for raw_expr, name in base_list:
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
select_list.append((exprs.Expr.from_object(raw_expr), name))
|
|
585
|
-
elif isinstance(raw_expr, np.ndarray):
|
|
586
|
-
select_list.append((exprs.Expr.from_array(raw_expr), name))
|
|
587
|
-
else:
|
|
588
|
-
select_list.append((exprs.Literal(raw_expr), name))
|
|
589
|
-
expr = select_list[-1][0]
|
|
581
|
+
expr = exprs.Expr.from_object(raw_expr)
|
|
582
|
+
if expr is None:
|
|
583
|
+
raise excs.Error(f'Invalid expression: {raw_expr}')
|
|
590
584
|
if expr.col_type.is_invalid_type():
|
|
591
585
|
raise excs.Error(f'Invalid type: {raw_expr}')
|
|
592
586
|
if not expr.is_bound_by(self._from_clause.tbls):
|
|
@@ -594,6 +588,7 @@ class DataFrame:
|
|
|
594
588
|
f"Expression '{expr}' cannot be evaluated in the context of this query's tables "
|
|
595
589
|
f'({",".join(tbl.tbl_name() for tbl in self._from_clause.tbls)})'
|
|
596
590
|
)
|
|
591
|
+
select_list.append((expr, name))
|
|
597
592
|
|
|
598
593
|
# check user provided names do not conflict among themselves or with auto-generated ones
|
|
599
594
|
seen: set[str] = set()
|
|
@@ -956,7 +951,7 @@ class DataFrame:
|
|
|
956
951
|
|
|
957
952
|
>>> df = person.where(t.year == 2014).update({'age': 30})
|
|
958
953
|
"""
|
|
959
|
-
self._validate_mutable('update')
|
|
954
|
+
self._validate_mutable('update', False)
|
|
960
955
|
return self._first_tbl.tbl_version.update(value_spec, where=self.where_clause, cascade=cascade)
|
|
961
956
|
|
|
962
957
|
def delete(self) -> UpdateStatus:
|
|
@@ -976,18 +971,23 @@ class DataFrame:
|
|
|
976
971
|
|
|
977
972
|
>>> df = person.where(t.age < 18).delete()
|
|
978
973
|
"""
|
|
979
|
-
self._validate_mutable('delete')
|
|
974
|
+
self._validate_mutable('delete', False)
|
|
980
975
|
if not self._first_tbl.is_insertable():
|
|
981
976
|
raise excs.Error(f'Cannot delete from view')
|
|
982
977
|
return self._first_tbl.tbl_version.delete(where=self.where_clause)
|
|
983
978
|
|
|
984
|
-
def _validate_mutable(self, op_name: str) -> None:
|
|
985
|
-
"""Tests whether this DataFrame can be mutated (such as by an update operation).
|
|
979
|
+
def _validate_mutable(self, op_name: str, allow_select: bool) -> None:
|
|
980
|
+
"""Tests whether this DataFrame can be mutated (such as by an update operation).
|
|
981
|
+
|
|
982
|
+
Args:
|
|
983
|
+
op_name: The name of the operation for which the test is being performed.
|
|
984
|
+
allow_select: If True, allow a select() specification in the Dataframe.
|
|
985
|
+
"""
|
|
986
986
|
if self.group_by_clause is not None or self.grouping_tbl is not None:
|
|
987
987
|
raise excs.Error(f'Cannot use `{op_name}` after `group_by`')
|
|
988
988
|
if self.order_by_clause is not None:
|
|
989
989
|
raise excs.Error(f'Cannot use `{op_name}` after `order_by`')
|
|
990
|
-
if self.select_list is not None:
|
|
990
|
+
if self.select_list is not None and not allow_select:
|
|
991
991
|
raise excs.Error(f'Cannot use `{op_name}` after `select`')
|
|
992
992
|
if self.limit_val is not None:
|
|
993
993
|
raise excs.Error(f'Cannot use `{op_name}` after `limit`')
|
pixeltable/env.py
CHANGED
|
@@ -76,6 +76,8 @@ class Env:
|
|
|
76
76
|
_module_log_level: dict[str, int] # module name -> log level
|
|
77
77
|
_config_file: Optional[Path]
|
|
78
78
|
_config: Optional[Config]
|
|
79
|
+
_file_cache_size_g: float
|
|
80
|
+
_pxt_api_key: Optional[str]
|
|
79
81
|
_stdout_handler: logging.StreamHandler
|
|
80
82
|
_initialized: bool
|
|
81
83
|
|
|
@@ -289,6 +291,7 @@ class Env:
|
|
|
289
291
|
f'(either add a `file_cache_size_g` entry to the `pixeltable` section of {self._config_file},\n'
|
|
290
292
|
'or set the PIXELTABLE_FILE_CACHE_SIZE_G environment variable)'
|
|
291
293
|
)
|
|
294
|
+
self._pxt_api_key = self._config.get_string_value('api_key')
|
|
292
295
|
|
|
293
296
|
# Disable spurious warnings
|
|
294
297
|
warnings.simplefilter('ignore', category=TqdmWarning)
|
|
@@ -333,9 +336,7 @@ class Env:
|
|
|
333
336
|
http_logger.addHandler(http_fh)
|
|
334
337
|
http_logger.propagate = False
|
|
335
338
|
|
|
336
|
-
|
|
337
|
-
for path in glob.glob(f'{self._tmp_dir}/*'):
|
|
338
|
-
os.remove(path)
|
|
339
|
+
self.clear_tmp_dir()
|
|
339
340
|
|
|
340
341
|
self._db_name = os.environ.get('PIXELTABLE_DB', 'pixeltable')
|
|
341
342
|
self._pgdata_dir = Path(os.environ.get('PIXELTABLE_PGDATA', str(self._home / 'pgdata')))
|
|
@@ -461,6 +462,15 @@ class Env:
|
|
|
461
462
|
def _upgrade_metadata(self) -> None:
|
|
462
463
|
metadata.upgrade_md(self._sa_engine)
|
|
463
464
|
|
|
465
|
+
@property
|
|
466
|
+
def pxt_api_key(self) -> str:
|
|
467
|
+
if self._pxt_api_key is None:
|
|
468
|
+
raise excs.Error(
|
|
469
|
+
'No API key is configured. Set the PIXELTABLE_API_KEY environment variable, or add an entry to '
|
|
470
|
+
f'config.toml as described here:\nhttps://pixeltable.github.io/pixeltable/config/'
|
|
471
|
+
)
|
|
472
|
+
return self._pxt_api_key
|
|
473
|
+
|
|
464
474
|
def get_client(self, name: str) -> Any:
|
|
465
475
|
"""
|
|
466
476
|
Gets the client with the specified name, initializing it if necessary.
|
|
@@ -628,6 +638,13 @@ class Env:
|
|
|
628
638
|
)
|
|
629
639
|
self.__optional_packages['spacy'].is_installed = False
|
|
630
640
|
|
|
641
|
+
def clear_tmp_dir(self) -> None:
|
|
642
|
+
for path in glob.glob(f'{self._tmp_dir}/*'):
|
|
643
|
+
if os.path.isdir(path):
|
|
644
|
+
shutil.rmtree(path)
|
|
645
|
+
else:
|
|
646
|
+
os.remove(path)
|
|
647
|
+
|
|
631
648
|
def num_tmp_files(self) -> int:
|
|
632
649
|
return len(glob.glob(f'{self._tmp_dir}/*'))
|
|
633
650
|
|
|
@@ -160,8 +160,10 @@ class FnCallEvaluator(Evaluator):
|
|
|
160
160
|
|
|
161
161
|
def _create_batch_call_args(self, call_args: list[FnCallArgs]) -> FnCallArgs:
|
|
162
162
|
"""Roll call_args into a single batched FnCallArgs"""
|
|
163
|
-
batch_args: list[list[Optional[Any]]] = [[None] * len(call_args) for _ in range(len(self.fn_call.
|
|
164
|
-
batch_kwargs: dict[str, list[Optional[Any]]] = {
|
|
163
|
+
batch_args: list[list[Optional[Any]]] = [[None] * len(call_args) for _ in range(len(self.fn_call.arg_idxs))]
|
|
164
|
+
batch_kwargs: dict[str, list[Optional[Any]]] = {
|
|
165
|
+
k: [None] * len(call_args) for k in self.fn_call.kwarg_idxs.keys()
|
|
166
|
+
}
|
|
165
167
|
assert isinstance(self.fn, func.CallableFunction)
|
|
166
168
|
for i, item in enumerate(call_args):
|
|
167
169
|
for j in range(len(item.args)):
|
|
@@ -308,7 +308,10 @@ class ExprEvalNode(ExecNode):
|
|
|
308
308
|
if self.exc_event.is_set():
|
|
309
309
|
# we got an exception that we need to propagate through __iter__()
|
|
310
310
|
_logger.debug(f'Propagating exception {self.error}')
|
|
311
|
-
|
|
311
|
+
if isinstance(self.error, excs.ExprEvalError):
|
|
312
|
+
raise self.error from self.error.exc
|
|
313
|
+
else:
|
|
314
|
+
raise self.error
|
|
312
315
|
if completed_aw in done:
|
|
313
316
|
self._log_state('completed_aw done')
|
|
314
317
|
completed_aw = None
|
pixeltable/exprs/comparison.py
CHANGED
|
@@ -1,11 +1,10 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
from typing import Any, Optional
|
|
3
|
+
from typing import TYPE_CHECKING, Any, Optional
|
|
4
4
|
|
|
5
5
|
import sqlalchemy as sql
|
|
6
6
|
|
|
7
7
|
import pixeltable.exceptions as excs
|
|
8
|
-
import pixeltable.index as index
|
|
9
8
|
import pixeltable.type_system as ts
|
|
10
9
|
|
|
11
10
|
from .column_ref import ColumnRef
|
|
@@ -16,12 +15,17 @@ from .literal import Literal
|
|
|
16
15
|
from .row_builder import RowBuilder
|
|
17
16
|
from .sql_element_cache import SqlElementCache
|
|
18
17
|
|
|
18
|
+
if TYPE_CHECKING:
|
|
19
|
+
from pixeltable import index
|
|
20
|
+
|
|
19
21
|
|
|
20
22
|
class Comparison(Expr):
|
|
21
23
|
is_search_arg_comparison: bool
|
|
22
24
|
operator: ComparisonOperator
|
|
23
25
|
|
|
24
26
|
def __init__(self, operator: ComparisonOperator, op1: Expr, op2: Expr):
|
|
27
|
+
from pixeltable import index
|
|
28
|
+
|
|
25
29
|
super().__init__(ts.BoolType())
|
|
26
30
|
self.operator = operator
|
|
27
31
|
|
|
@@ -38,8 +42,6 @@ class Comparison(Expr):
|
|
|
38
42
|
self.is_search_arg_comparison = False
|
|
39
43
|
self.components = [op1, op2]
|
|
40
44
|
|
|
41
|
-
import pixeltable.index as index
|
|
42
|
-
|
|
43
45
|
if (
|
|
44
46
|
self.is_search_arg_comparison
|
|
45
47
|
and self._op2.col_type.is_string_type()
|
|
@@ -71,6 +73,8 @@ class Comparison(Expr):
|
|
|
71
73
|
return self.components[1]
|
|
72
74
|
|
|
73
75
|
def sql_expr(self, sql_elements: SqlElementCache) -> Optional[sql.ColumnElement]:
|
|
76
|
+
from pixeltable import index
|
|
77
|
+
|
|
74
78
|
if str(self._op1.col_type.to_sa_type()) != str(self._op2.col_type.to_sa_type()):
|
|
75
79
|
# Comparing columns of different SQL types (e.g., string vs. json); this can only be done in Python
|
|
76
80
|
# TODO(aaron-siegel): We may be able to handle some cases in SQL by casting one side to the other's type
|
pixeltable/exprs/data_row.py
CHANGED
|
@@ -4,6 +4,7 @@ import datetime
|
|
|
4
4
|
import io
|
|
5
5
|
import urllib.parse
|
|
6
6
|
import urllib.request
|
|
7
|
+
from pathlib import Path
|
|
7
8
|
from typing import Any, Optional
|
|
8
9
|
|
|
9
10
|
import numpy as np
|
|
@@ -206,9 +207,10 @@ class DataRow:
|
|
|
206
207
|
# local file path
|
|
207
208
|
assert self.file_urls[idx] is None and self.file_paths[idx] is None
|
|
208
209
|
if len(parsed.scheme) <= 1:
|
|
209
|
-
|
|
210
|
-
self.
|
|
211
|
-
|
|
210
|
+
path = str(Path(val).absolute()) # Ensure we're using an absolute pathname.
|
|
211
|
+
self.file_urls[idx] = urllib.parse.urljoin('file:', urllib.request.pathname2url(path))
|
|
212
|
+
self.file_paths[idx] = path
|
|
213
|
+
else: # file:// URL
|
|
212
214
|
self.file_urls[idx] = val
|
|
213
215
|
# Wrap the path in a url2pathname() call to ensure proper handling on Windows.
|
|
214
216
|
self.file_paths[idx] = urllib.parse.unquote(urllib.request.url2pathname(parsed.path))
|
pixeltable/exprs/expr.py
CHANGED
|
@@ -10,6 +10,7 @@ import typing
|
|
|
10
10
|
from typing import TYPE_CHECKING, Any, Callable, Iterable, Iterator, Optional, TypeVar, Union, overload
|
|
11
11
|
from uuid import UUID
|
|
12
12
|
|
|
13
|
+
import numpy as np
|
|
13
14
|
import sqlalchemy as sql
|
|
14
15
|
from typing_extensions import Self, _AnnotatedAlias
|
|
15
16
|
|
|
@@ -379,6 +380,12 @@ class Expr(abc.ABC):
|
|
|
379
380
|
@classmethod
|
|
380
381
|
def from_array(cls, elements: Iterable) -> Optional[Expr]:
|
|
381
382
|
from .inline_expr import InlineArray
|
|
383
|
+
from .literal import Literal
|
|
384
|
+
|
|
385
|
+
if isinstance(elements, np.ndarray):
|
|
386
|
+
pxttype = ts.ArrayType.from_literal(elements)
|
|
387
|
+
if pxttype is not None:
|
|
388
|
+
return Literal(elements, col_type=pxttype)
|
|
382
389
|
|
|
383
390
|
inline_array = InlineArray(elements)
|
|
384
391
|
return inline_array.maybe_literal()
|
|
@@ -576,7 +583,7 @@ class Expr(abc.ABC):
|
|
|
576
583
|
|
|
577
584
|
def __bool__(self) -> bool:
|
|
578
585
|
raise TypeError(
|
|
579
|
-
'Pixeltable expressions cannot be used in conjunction with Python boolean operators (and/or/not)'
|
|
586
|
+
f'Pixeltable expressions cannot be used in conjunction with Python boolean operators (and/or/not)\n{self!r}'
|
|
580
587
|
)
|
|
581
588
|
|
|
582
589
|
def __lt__(self, other: object) -> 'exprs.Comparison':
|
|
@@ -777,7 +784,7 @@ class Expr(abc.ABC):
|
|
|
777
784
|
if (
|
|
778
785
|
len(params) >= 2
|
|
779
786
|
and second_param.kind not in (inspect.Parameter.VAR_POSITIONAL, inspect.Parameter.VAR_KEYWORD)
|
|
780
|
-
and second_param.default
|
|
787
|
+
and second_param.default is inspect.Parameter.empty
|
|
781
788
|
):
|
|
782
789
|
raise excs.Error(f'Function `{fn.__name__}` has multiple required parameters.')
|
|
783
790
|
except ValueError:
|