pixeltable 0.3.15__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +1 -1
- pixeltable/__version__.py +2 -2
- pixeltable/catalog/__init__.py +9 -1
- pixeltable/catalog/catalog.py +559 -134
- pixeltable/catalog/column.py +36 -32
- pixeltable/catalog/dir.py +1 -2
- pixeltable/catalog/globals.py +12 -0
- pixeltable/catalog/insertable_table.py +30 -25
- pixeltable/catalog/schema_object.py +9 -6
- pixeltable/catalog/table.py +334 -267
- pixeltable/catalog/table_version.py +358 -241
- pixeltable/catalog/table_version_handle.py +18 -2
- pixeltable/catalog/table_version_path.py +86 -16
- pixeltable/catalog/view.py +47 -23
- pixeltable/dataframe.py +198 -19
- pixeltable/env.py +6 -4
- pixeltable/exceptions.py +6 -0
- pixeltable/exec/__init__.py +1 -1
- pixeltable/exec/exec_node.py +2 -0
- pixeltable/exec/expr_eval/evaluators.py +4 -1
- pixeltable/exec/expr_eval/expr_eval_node.py +4 -4
- pixeltable/exec/in_memory_data_node.py +1 -1
- pixeltable/exec/sql_node.py +188 -22
- pixeltable/exprs/column_property_ref.py +16 -6
- pixeltable/exprs/column_ref.py +33 -11
- pixeltable/exprs/comparison.py +1 -1
- pixeltable/exprs/data_row.py +5 -3
- pixeltable/exprs/expr.py +11 -4
- pixeltable/exprs/literal.py +2 -0
- pixeltable/exprs/row_builder.py +4 -6
- pixeltable/exprs/rowid_ref.py +8 -0
- pixeltable/exprs/similarity_expr.py +1 -0
- pixeltable/func/__init__.py +1 -0
- pixeltable/func/mcp.py +74 -0
- pixeltable/func/query_template_function.py +5 -3
- pixeltable/func/tools.py +12 -2
- pixeltable/func/udf.py +2 -2
- pixeltable/functions/__init__.py +1 -0
- pixeltable/functions/anthropic.py +19 -45
- pixeltable/functions/deepseek.py +19 -38
- pixeltable/functions/fireworks.py +9 -18
- pixeltable/functions/gemini.py +2 -3
- pixeltable/functions/groq.py +108 -0
- pixeltable/functions/llama_cpp.py +6 -6
- pixeltable/functions/mistralai.py +16 -53
- pixeltable/functions/ollama.py +1 -1
- pixeltable/functions/openai.py +82 -165
- pixeltable/functions/string.py +212 -58
- pixeltable/functions/together.py +22 -80
- pixeltable/globals.py +10 -4
- pixeltable/index/base.py +5 -0
- pixeltable/index/btree.py +5 -0
- pixeltable/index/embedding_index.py +5 -0
- pixeltable/io/external_store.py +10 -31
- pixeltable/io/label_studio.py +5 -5
- pixeltable/io/parquet.py +2 -2
- pixeltable/io/table_data_conduit.py +1 -32
- pixeltable/metadata/__init__.py +11 -2
- pixeltable/metadata/converters/convert_13.py +2 -2
- pixeltable/metadata/converters/convert_30.py +6 -11
- pixeltable/metadata/converters/convert_35.py +9 -0
- pixeltable/metadata/converters/convert_36.py +38 -0
- pixeltable/metadata/converters/convert_37.py +15 -0
- pixeltable/metadata/converters/util.py +3 -9
- pixeltable/metadata/notes.py +3 -0
- pixeltable/metadata/schema.py +13 -1
- pixeltable/plan.py +135 -12
- pixeltable/share/packager.py +138 -14
- pixeltable/share/publish.py +2 -2
- pixeltable/store.py +19 -13
- pixeltable/type_system.py +30 -0
- pixeltable/utils/dbms.py +1 -1
- pixeltable/utils/formatter.py +64 -42
- {pixeltable-0.3.15.dist-info → pixeltable-0.4.0.dist-info}/METADATA +2 -1
- {pixeltable-0.3.15.dist-info → pixeltable-0.4.0.dist-info}/RECORD +78 -73
- {pixeltable-0.3.15.dist-info → pixeltable-0.4.0.dist-info}/LICENSE +0 -0
- {pixeltable-0.3.15.dist-info → pixeltable-0.4.0.dist-info}/WHEEL +0 -0
- {pixeltable-0.3.15.dist-info → pixeltable-0.4.0.dist-info}/entry_points.txt +0 -0
pixeltable/catalog/column.py
CHANGED
|
@@ -15,7 +15,6 @@ from .globals import MediaValidation, is_valid_identifier
|
|
|
15
15
|
|
|
16
16
|
if TYPE_CHECKING:
|
|
17
17
|
from .table_version import TableVersion
|
|
18
|
-
from .table_version_handle import TableVersionHandle
|
|
19
18
|
from .table_version_path import TableVersionPath
|
|
20
19
|
|
|
21
20
|
_logger = logging.getLogger('pixeltable')
|
|
@@ -43,8 +42,9 @@ class Column:
|
|
|
43
42
|
sa_errortype_col: Optional[sql.schema.Column]
|
|
44
43
|
_value_expr: Optional[exprs.Expr]
|
|
45
44
|
value_expr_dict: Optional[dict[str, Any]]
|
|
46
|
-
|
|
47
|
-
|
|
45
|
+
# we store a TableVersion here, not a TableVersionHandle, because this column is owned by that TableVersion instance
|
|
46
|
+
# (re-resolving it later to a different instance doesn't make sense)
|
|
47
|
+
tbl: Optional[TableVersion]
|
|
48
48
|
|
|
49
49
|
def __init__(
|
|
50
50
|
self,
|
|
@@ -60,6 +60,7 @@ class Column:
|
|
|
60
60
|
sa_col_type: Optional[sql.sqltypes.TypeEngine] = None,
|
|
61
61
|
records_errors: Optional[bool] = None,
|
|
62
62
|
value_expr_dict: Optional[dict[str, Any]] = None,
|
|
63
|
+
tbl: Optional[TableVersion] = None,
|
|
63
64
|
):
|
|
64
65
|
"""Column constructor.
|
|
65
66
|
|
|
@@ -84,6 +85,7 @@ class Column:
|
|
|
84
85
|
if name is not None and not is_valid_identifier(name):
|
|
85
86
|
raise excs.Error(f"Invalid column name: '{name}'")
|
|
86
87
|
self.name = name
|
|
88
|
+
self.tbl = tbl
|
|
87
89
|
if col_type is None and computed_with is None:
|
|
88
90
|
raise excs.Error(f'Column `{name}`: col_type is required if computed_with is not specified')
|
|
89
91
|
|
|
@@ -92,6 +94,7 @@ class Column:
|
|
|
92
94
|
if computed_with is not None:
|
|
93
95
|
value_expr = exprs.Expr.from_object(computed_with)
|
|
94
96
|
if value_expr is None:
|
|
97
|
+
# TODO: this shouldn't be a user-facing error
|
|
95
98
|
raise excs.Error(
|
|
96
99
|
f'Column {name}: computed_with needs to be a valid Pixeltable expression, '
|
|
97
100
|
f'but it is a {type(computed_with)}'
|
|
@@ -99,13 +102,15 @@ class Column:
|
|
|
99
102
|
else:
|
|
100
103
|
self._value_expr = value_expr.copy()
|
|
101
104
|
self.col_type = self._value_expr.col_type
|
|
105
|
+
if self._value_expr is not None and self.value_expr_dict is None:
|
|
106
|
+
self.value_expr_dict = self._value_expr.as_dict()
|
|
102
107
|
|
|
103
108
|
if col_type is not None:
|
|
104
109
|
self.col_type = col_type
|
|
105
110
|
assert self.col_type is not None
|
|
106
111
|
|
|
107
112
|
self.stored = stored
|
|
108
|
-
self.dependent_cols = set() # cols with value_exprs that reference us; set by TableVersion
|
|
113
|
+
# self.dependent_cols = set() # cols with value_exprs that reference us; set by TableVersion
|
|
109
114
|
self.id = col_id
|
|
110
115
|
self.is_pk = is_pk
|
|
111
116
|
self._media_validation = media_validation
|
|
@@ -122,36 +127,35 @@ class Column:
|
|
|
122
127
|
self.sa_errormsg_col = None
|
|
123
128
|
self.sa_errortype_col = None
|
|
124
129
|
|
|
125
|
-
|
|
130
|
+
def init_value_expr(self) -> None:
|
|
131
|
+
from pixeltable import exprs
|
|
132
|
+
|
|
133
|
+
if self._value_expr is not None or self.value_expr_dict is None:
|
|
134
|
+
return
|
|
135
|
+
self._value_expr = exprs.Expr.from_dict(self.value_expr_dict)
|
|
136
|
+
self._value_expr.bind_rel_paths()
|
|
137
|
+
if not self._value_expr.is_valid:
|
|
138
|
+
message = (
|
|
139
|
+
dedent(
|
|
140
|
+
f"""
|
|
141
|
+
The computed column {self.name!r} in table {self.tbl.name!r} is no longer valid.
|
|
142
|
+
{{validation_error}}
|
|
143
|
+
You can continue to query existing data from this column, but evaluating it on new data will raise an error.
|
|
144
|
+
""" # noqa: E501
|
|
145
|
+
)
|
|
146
|
+
.strip()
|
|
147
|
+
.format(validation_error=self._value_expr.validation_error)
|
|
148
|
+
)
|
|
149
|
+
warnings.warn(message, category=excs.PixeltableWarning, stacklevel=2)
|
|
126
150
|
|
|
127
151
|
@property
|
|
128
152
|
def value_expr(self) -> Optional[exprs.Expr]:
|
|
129
|
-
|
|
130
|
-
# TODO: instantiate expr in the c'tor and add an Expr.prepare() that can create additional state after the
|
|
131
|
-
# catalog has been fully loaded; that way, we encounter bugs in the serialization/deserialization logic earlier
|
|
132
|
-
if self.value_expr_dict is not None and self._value_expr is None:
|
|
133
|
-
from pixeltable import exprs
|
|
134
|
-
|
|
135
|
-
self._value_expr = exprs.Expr.from_dict(self.value_expr_dict)
|
|
136
|
-
self._value_expr.bind_rel_paths()
|
|
137
|
-
if not self._value_expr.is_valid:
|
|
138
|
-
message = (
|
|
139
|
-
dedent(
|
|
140
|
-
f"""
|
|
141
|
-
The computed column {self.name!r} in table {self.tbl.get().name!r} is no longer valid.
|
|
142
|
-
{{validation_error}}
|
|
143
|
-
You can continue to query existing data from this column, but evaluating it on new data will raise an error.
|
|
144
|
-
""" # noqa: E501
|
|
145
|
-
)
|
|
146
|
-
.strip()
|
|
147
|
-
.format(validation_error=self._value_expr.validation_error)
|
|
148
|
-
)
|
|
149
|
-
warnings.warn(message, category=excs.PixeltableWarning, stacklevel=2)
|
|
153
|
+
assert self.value_expr_dict is None or self._value_expr is not None
|
|
150
154
|
return self._value_expr
|
|
151
155
|
|
|
152
156
|
def set_value_expr(self, value_expr: exprs.Expr) -> None:
|
|
153
157
|
self._value_expr = value_expr
|
|
154
|
-
self.value_expr_dict =
|
|
158
|
+
self.value_expr_dict = self._value_expr.as_dict()
|
|
155
159
|
|
|
156
160
|
def check_value_expr(self) -> None:
|
|
157
161
|
assert self._value_expr is not None
|
|
@@ -175,8 +179,8 @@ class Column:
|
|
|
175
179
|
# multiple dependents)
|
|
176
180
|
def get_idx_info(self, reference_tbl: Optional['TableVersionPath'] = None) -> dict[str, 'TableVersion.IndexInfo']:
|
|
177
181
|
assert self.tbl is not None
|
|
178
|
-
tbl = reference_tbl.tbl_version if reference_tbl is not None else self.tbl
|
|
179
|
-
return {name: info for name, info in tbl.
|
|
182
|
+
tbl = reference_tbl.tbl_version.get() if reference_tbl is not None else self.tbl
|
|
183
|
+
return {name: info for name, info in tbl.idxs_by_name.items() if info.col == self}
|
|
180
184
|
|
|
181
185
|
@property
|
|
182
186
|
def is_computed(self) -> bool:
|
|
@@ -199,14 +203,14 @@ class Column:
|
|
|
199
203
|
@property
|
|
200
204
|
def qualified_name(self) -> str:
|
|
201
205
|
assert self.tbl is not None
|
|
202
|
-
return f'{self.tbl.
|
|
206
|
+
return f'{self.tbl.name}.{self.name}'
|
|
203
207
|
|
|
204
208
|
@property
|
|
205
209
|
def media_validation(self) -> MediaValidation:
|
|
206
210
|
if self._media_validation is not None:
|
|
207
211
|
return self._media_validation
|
|
208
212
|
assert self.tbl is not None
|
|
209
|
-
return self.tbl.
|
|
213
|
+
return self.tbl.media_validation
|
|
210
214
|
|
|
211
215
|
@property
|
|
212
216
|
def is_required_for_insert(self) -> bool:
|
|
@@ -256,7 +260,7 @@ class Column:
|
|
|
256
260
|
return f'{self.name}: {self.col_type}'
|
|
257
261
|
|
|
258
262
|
def __repr__(self) -> str:
|
|
259
|
-
return f'Column({self.id!r}, {self.name!r}, tbl={self.tbl.
|
|
263
|
+
return f'Column({self.id!r}, {self.name!r}, tbl={self.tbl.name!r})'
|
|
260
264
|
|
|
261
265
|
def __hash__(self) -> int:
|
|
262
266
|
# TODO(aaron-siegel): This and __eq__ do not capture the table version. We need to rethink the Column
|
pixeltable/catalog/dir.py
CHANGED
|
@@ -38,13 +38,12 @@ class Dir(SchemaObject):
|
|
|
38
38
|
def _display_name(cls) -> str:
|
|
39
39
|
return 'directory'
|
|
40
40
|
|
|
41
|
-
@property
|
|
42
41
|
def _path(self) -> str:
|
|
43
42
|
"""Returns the path to this schema object."""
|
|
44
43
|
if self._dir_id is None:
|
|
45
44
|
# we're the root dir
|
|
46
45
|
return ''
|
|
47
|
-
return super()._path
|
|
46
|
+
return super()._path()
|
|
48
47
|
|
|
49
48
|
def _move(self, new_name: str, new_dir_id: UUID) -> None:
|
|
50
49
|
# print(
|
pixeltable/catalog/globals.py
CHANGED
|
@@ -5,6 +5,7 @@ import enum
|
|
|
5
5
|
import itertools
|
|
6
6
|
import logging
|
|
7
7
|
from typing import Optional
|
|
8
|
+
from uuid import UUID
|
|
8
9
|
|
|
9
10
|
from typing_extensions import Self
|
|
10
11
|
|
|
@@ -21,6 +22,17 @@ _ROWID_COLUMN_NAME = '_rowid'
|
|
|
21
22
|
_PREDEF_SYMBOLS: Optional[set[str]] = None
|
|
22
23
|
|
|
23
24
|
|
|
25
|
+
@dataclasses.dataclass(frozen=True)
|
|
26
|
+
class QColumnId:
|
|
27
|
+
"""Qualified column id"""
|
|
28
|
+
|
|
29
|
+
tbl_id: UUID
|
|
30
|
+
col_id: int
|
|
31
|
+
|
|
32
|
+
# def __hash__(self) -> int:
|
|
33
|
+
# return hash((self.tbl_id, self.col_id))
|
|
34
|
+
|
|
35
|
+
|
|
24
36
|
@dataclasses.dataclass
|
|
25
37
|
class UpdateStatus:
|
|
26
38
|
"""
|
|
@@ -51,6 +51,7 @@ class InsertableTable(Table):
|
|
|
51
51
|
def __init__(self, dir_id: UUID, tbl_version: TableVersionHandle):
|
|
52
52
|
tbl_version_path = TableVersionPath(tbl_version)
|
|
53
53
|
super().__init__(tbl_version.id, dir_id, tbl_version.get().name, tbl_version_path)
|
|
54
|
+
self._tbl_version = tbl_version
|
|
54
55
|
|
|
55
56
|
@classmethod
|
|
56
57
|
def _display_name(cls) -> str:
|
|
@@ -103,8 +104,8 @@ class InsertableTable(Table):
|
|
|
103
104
|
Env.get().console_logger.info(f'Created table `{name}`.')
|
|
104
105
|
return tbl
|
|
105
106
|
|
|
106
|
-
def
|
|
107
|
-
md = super().
|
|
107
|
+
def _get_metadata(self) -> dict[str, Any]:
|
|
108
|
+
md = super()._get_metadata()
|
|
108
109
|
md['is_view'] = False
|
|
109
110
|
md['is_snapshot'] = False
|
|
110
111
|
return md
|
|
@@ -138,37 +139,40 @@ class InsertableTable(Table):
|
|
|
138
139
|
print_stats: bool = False,
|
|
139
140
|
**kwargs: Any,
|
|
140
141
|
) -> UpdateStatus:
|
|
142
|
+
from pixeltable.catalog import Catalog
|
|
141
143
|
from pixeltable.io.table_data_conduit import UnkTableDataConduit
|
|
142
144
|
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
source
|
|
146
|
-
|
|
145
|
+
with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=True):
|
|
146
|
+
table = self
|
|
147
|
+
if source is None:
|
|
148
|
+
source = [kwargs]
|
|
149
|
+
kwargs = None
|
|
147
150
|
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
151
|
+
tds = UnkTableDataConduit(
|
|
152
|
+
source, source_format=source_format, src_schema_overrides=schema_overrides, extra_fields=kwargs
|
|
153
|
+
)
|
|
154
|
+
data_source = tds.specialize()
|
|
155
|
+
if data_source.source_column_map is None:
|
|
156
|
+
data_source.src_pk = []
|
|
154
157
|
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
+
assert isinstance(table, Table)
|
|
159
|
+
data_source.add_table_info(table)
|
|
160
|
+
data_source.prepare_for_insert_into_table()
|
|
158
161
|
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
162
|
+
fail_on_exception = OnErrorParameter.fail_on_exception(on_error)
|
|
163
|
+
return table.insert_table_data_source(
|
|
164
|
+
data_source=data_source, fail_on_exception=fail_on_exception, print_stats=print_stats
|
|
165
|
+
)
|
|
163
166
|
|
|
164
167
|
def insert_table_data_source(
|
|
165
168
|
self, data_source: TableDataConduit, fail_on_exception: bool, print_stats: bool = False
|
|
166
169
|
) -> pxt.UpdateStatus:
|
|
167
170
|
"""Insert row batches into this table from a `TableDataConduit`."""
|
|
171
|
+
from pixeltable.catalog import Catalog
|
|
168
172
|
from pixeltable.io.table_data_conduit import DFTableDataConduit
|
|
169
173
|
|
|
170
174
|
status = pxt.UpdateStatus()
|
|
171
|
-
with
|
|
175
|
+
with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=True):
|
|
172
176
|
if isinstance(data_source, DFTableDataConduit):
|
|
173
177
|
status += self._tbl_version.get().insert(
|
|
174
178
|
rows=None, df=data_source.pxt_df, print_stats=print_stats, fail_on_exception=fail_on_exception
|
|
@@ -186,7 +190,7 @@ class InsertableTable(Table):
|
|
|
186
190
|
|
|
187
191
|
def _validate_input_rows(self, rows: list[dict[str, Any]]) -> None:
|
|
188
192
|
"""Verify that the input rows match the table schema"""
|
|
189
|
-
valid_col_names = set(self.
|
|
193
|
+
valid_col_names = set(self._get_schema().keys())
|
|
190
194
|
reqd_col_names = set(self._tbl_version_path.tbl_version.get().get_required_col_names())
|
|
191
195
|
computed_col_names = set(self._tbl_version_path.tbl_version.get().get_computed_col_names())
|
|
192
196
|
for row in rows:
|
|
@@ -226,11 +230,12 @@ class InsertableTable(Table):
|
|
|
226
230
|
|
|
227
231
|
>>> tbl.delete(tbl.a > 5)
|
|
228
232
|
"""
|
|
229
|
-
|
|
233
|
+
from pixeltable.catalog import Catalog
|
|
234
|
+
|
|
235
|
+
with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=True):
|
|
230
236
|
return self._tbl_version.get().delete(where=where)
|
|
231
237
|
|
|
232
|
-
|
|
233
|
-
def _base_table(self) -> Optional['Table']:
|
|
238
|
+
def _get_base_table(self) -> Optional['Table']:
|
|
234
239
|
return None
|
|
235
240
|
|
|
236
241
|
@property
|
|
@@ -238,4 +243,4 @@ class InsertableTable(Table):
|
|
|
238
243
|
return []
|
|
239
244
|
|
|
240
245
|
def _table_descriptor(self) -> str:
|
|
241
|
-
return f'Table {self._path!r}'
|
|
246
|
+
return f'Table {self._path()!r}'
|
|
@@ -2,8 +2,6 @@ from abc import abstractmethod
|
|
|
2
2
|
from typing import TYPE_CHECKING, Any, Optional
|
|
3
3
|
from uuid import UUID
|
|
4
4
|
|
|
5
|
-
from pixeltable.env import Env
|
|
6
|
-
|
|
7
5
|
if TYPE_CHECKING:
|
|
8
6
|
from pixeltable import catalog
|
|
9
7
|
|
|
@@ -28,24 +26,29 @@ class SchemaObject:
|
|
|
28
26
|
"""Returns the parent directory of this schema object."""
|
|
29
27
|
from .catalog import Catalog
|
|
30
28
|
|
|
31
|
-
with
|
|
29
|
+
with Catalog.get().begin_xact(for_write=False):
|
|
32
30
|
if self._dir_id is None:
|
|
33
31
|
return None
|
|
34
32
|
return Catalog.get().get_dir(self._dir_id)
|
|
35
33
|
|
|
36
|
-
@property
|
|
37
34
|
def _path(self) -> str:
|
|
38
35
|
"""Returns the path to this schema object."""
|
|
39
36
|
from .catalog import Catalog
|
|
40
37
|
|
|
41
38
|
assert self._dir_id is not None
|
|
42
|
-
with
|
|
39
|
+
with Catalog.get().begin_xact(for_write=False):
|
|
43
40
|
path = Catalog.get().get_dir_path(self._dir_id)
|
|
44
41
|
return str(path.append(self._name))
|
|
45
42
|
|
|
46
43
|
def get_metadata(self) -> dict[str, Any]:
|
|
47
44
|
"""Returns metadata associated with this schema object."""
|
|
48
|
-
|
|
45
|
+
from pixeltable.catalog import Catalog
|
|
46
|
+
|
|
47
|
+
with Catalog.get().begin_xact(for_write=False):
|
|
48
|
+
return self._get_metadata()
|
|
49
|
+
|
|
50
|
+
def _get_metadata(self) -> dict[str, Any]:
|
|
51
|
+
return {'name': self._name, 'path': self._path()}
|
|
49
52
|
|
|
50
53
|
@classmethod
|
|
51
54
|
@abstractmethod
|