pixeltable 0.2.25__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +2 -2
- pixeltable/__version__.py +2 -2
- pixeltable/catalog/__init__.py +1 -1
- pixeltable/catalog/dir.py +6 -0
- pixeltable/catalog/globals.py +25 -0
- pixeltable/catalog/named_function.py +4 -0
- pixeltable/catalog/path_dict.py +37 -11
- pixeltable/catalog/schema_object.py +6 -0
- pixeltable/catalog/table.py +421 -231
- pixeltable/catalog/table_version.py +22 -8
- pixeltable/catalog/view.py +5 -7
- pixeltable/dataframe.py +439 -105
- pixeltable/env.py +19 -5
- pixeltable/exec/__init__.py +1 -1
- pixeltable/exec/exec_node.py +6 -7
- pixeltable/exec/expr_eval_node.py +1 -1
- pixeltable/exec/sql_node.py +92 -45
- pixeltable/exprs/__init__.py +1 -0
- pixeltable/exprs/arithmetic_expr.py +1 -1
- pixeltable/exprs/array_slice.py +1 -1
- pixeltable/exprs/column_property_ref.py +1 -1
- pixeltable/exprs/column_ref.py +29 -2
- pixeltable/exprs/comparison.py +1 -1
- pixeltable/exprs/compound_predicate.py +1 -1
- pixeltable/exprs/expr.py +12 -5
- pixeltable/exprs/expr_set.py +8 -0
- pixeltable/exprs/function_call.py +147 -39
- pixeltable/exprs/in_predicate.py +1 -1
- pixeltable/exprs/inline_expr.py +25 -5
- pixeltable/exprs/is_null.py +1 -1
- pixeltable/exprs/json_mapper.py +1 -1
- pixeltable/exprs/json_path.py +1 -1
- pixeltable/exprs/method_ref.py +1 -1
- pixeltable/exprs/row_builder.py +1 -1
- pixeltable/exprs/rowid_ref.py +1 -1
- pixeltable/exprs/similarity_expr.py +14 -7
- pixeltable/exprs/sql_element_cache.py +4 -0
- pixeltable/exprs/type_cast.py +2 -2
- pixeltable/exprs/variable.py +3 -0
- pixeltable/func/__init__.py +5 -4
- pixeltable/func/aggregate_function.py +151 -68
- pixeltable/func/callable_function.py +48 -16
- pixeltable/func/expr_template_function.py +64 -23
- pixeltable/func/function.py +195 -27
- pixeltable/func/function_registry.py +2 -1
- pixeltable/func/query_template_function.py +51 -9
- pixeltable/func/signature.py +64 -7
- pixeltable/func/tools.py +153 -0
- pixeltable/func/udf.py +57 -35
- pixeltable/functions/__init__.py +2 -2
- pixeltable/functions/anthropic.py +51 -4
- pixeltable/functions/gemini.py +85 -0
- pixeltable/functions/globals.py +54 -34
- pixeltable/functions/huggingface.py +10 -28
- pixeltable/functions/json.py +3 -8
- pixeltable/functions/math.py +67 -0
- pixeltable/functions/ollama.py +8 -8
- pixeltable/functions/openai.py +51 -4
- pixeltable/functions/timestamp.py +1 -1
- pixeltable/functions/video.py +3 -9
- pixeltable/functions/vision.py +1 -1
- pixeltable/globals.py +354 -80
- pixeltable/index/embedding_index.py +106 -34
- pixeltable/io/__init__.py +1 -1
- pixeltable/io/label_studio.py +1 -1
- pixeltable/io/parquet.py +39 -19
- pixeltable/iterators/document.py +12 -0
- pixeltable/metadata/__init__.py +1 -1
- pixeltable/metadata/converters/convert_16.py +2 -1
- pixeltable/metadata/converters/convert_17.py +2 -1
- pixeltable/metadata/converters/convert_22.py +17 -0
- pixeltable/metadata/converters/convert_23.py +35 -0
- pixeltable/metadata/converters/convert_24.py +56 -0
- pixeltable/metadata/converters/convert_25.py +19 -0
- pixeltable/metadata/converters/util.py +4 -2
- pixeltable/metadata/notes.py +4 -0
- pixeltable/metadata/schema.py +1 -0
- pixeltable/plan.py +128 -50
- pixeltable/store.py +1 -1
- pixeltable/type_system.py +196 -54
- pixeltable/utils/arrow.py +8 -3
- pixeltable/utils/description_helper.py +89 -0
- pixeltable/utils/documents.py +14 -0
- {pixeltable-0.2.25.dist-info → pixeltable-0.3.0.dist-info}/METADATA +30 -20
- pixeltable-0.3.0.dist-info/RECORD +155 -0
- {pixeltable-0.2.25.dist-info → pixeltable-0.3.0.dist-info}/WHEEL +1 -1
- pixeltable-0.3.0.dist-info/entry_points.txt +3 -0
- pixeltable/tool/create_test_db_dump.py +0 -311
- pixeltable/tool/create_test_video.py +0 -81
- pixeltable/tool/doc_plugins/griffe.py +0 -50
- pixeltable/tool/doc_plugins/mkdocstrings.py +0 -6
- pixeltable/tool/doc_plugins/templates/material/udf.html.jinja +0 -135
- pixeltable/tool/embed_udf.py +0 -9
- pixeltable/tool/mypy_plugin.py +0 -55
- pixeltable-0.2.25.dist-info/RECORD +0 -154
- pixeltable-0.2.25.dist-info/entry_points.txt +0 -3
- {pixeltable-0.2.25.dist-info → pixeltable-0.3.0.dist-info}/LICENSE +0 -0
pixeltable/globals.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import dataclasses
|
|
2
2
|
import logging
|
|
3
|
-
from typing import Any, Iterable, Optional, Union
|
|
3
|
+
from typing import Any, Iterable, Literal, Optional, Union
|
|
4
4
|
from uuid import UUID
|
|
5
5
|
|
|
6
6
|
import pandas as pd
|
|
@@ -20,11 +20,68 @@ from pixeltable.utils.filecache import FileCache
|
|
|
20
20
|
|
|
21
21
|
_logger = logging.getLogger('pixeltable')
|
|
22
22
|
|
|
23
|
-
|
|
24
23
|
def init() -> None:
|
|
25
24
|
"""Initializes the Pixeltable environment."""
|
|
26
25
|
_ = Catalog.get()
|
|
27
26
|
|
|
27
|
+
def _get_or_drop_existing_path(
|
|
28
|
+
path_str: str,
|
|
29
|
+
expected_obj_type: type[catalog.SchemaObject],
|
|
30
|
+
expected_snapshot: bool,
|
|
31
|
+
if_exists: catalog.IfExistsParam
|
|
32
|
+
) -> Optional[catalog.SchemaObject]:
|
|
33
|
+
"""Handle schema object path collision during creation according to the if_exists parameter.
|
|
34
|
+
|
|
35
|
+
Args:
|
|
36
|
+
path_str: An existing and valid path to the dir, table, view, or snapshot.
|
|
37
|
+
expected_obj_type: Whether the caller of this function is creating a dir, table, or view at the existing path.
|
|
38
|
+
expected_snapshot: Whether the caller of this function is creating a snapshot at the existing path.
|
|
39
|
+
if_exists: Directive regarding how to handle the existing path.
|
|
40
|
+
|
|
41
|
+
Returns:
|
|
42
|
+
A handle to the existing dir, table, view, or snapshot, if `if_exists='ignore'`, otherwise `None`.
|
|
43
|
+
|
|
44
|
+
Raises:
|
|
45
|
+
Error: If the existing path is not of the expected type, or if the existing path has dependents and
|
|
46
|
+
`if_exists='replace'` or `if_exists='replace_force'`.
|
|
47
|
+
"""
|
|
48
|
+
cat = Catalog.get()
|
|
49
|
+
path = catalog.Path(path_str)
|
|
50
|
+
assert cat.paths.get_object(path) is not None
|
|
51
|
+
|
|
52
|
+
if if_exists == catalog.IfExistsParam.ERROR:
|
|
53
|
+
raise excs.Error(f'Path `{path_str}` already exists.')
|
|
54
|
+
|
|
55
|
+
existing_path = cat.paths[path]
|
|
56
|
+
existing_path_is_snapshot = 'is_snapshot' in existing_path.get_metadata() and existing_path.get_metadata()['is_snapshot']
|
|
57
|
+
obj_type_str = 'Snapshot' if expected_snapshot else expected_obj_type._display_name().capitalize()
|
|
58
|
+
# Check if the existing path is of expected type.
|
|
59
|
+
if (not isinstance(existing_path, expected_obj_type)
|
|
60
|
+
or (expected_snapshot and not existing_path_is_snapshot)):
|
|
61
|
+
raise excs.Error(f'Path `{path_str}` already exists but is not a {obj_type_str}. Cannot {if_exists.name.lower()} it.')
|
|
62
|
+
|
|
63
|
+
# if_exists='ignore' return the handle to the existing object.
|
|
64
|
+
assert isinstance(existing_path, expected_obj_type)
|
|
65
|
+
if if_exists == catalog.IfExistsParam.IGNORE:
|
|
66
|
+
return existing_path
|
|
67
|
+
|
|
68
|
+
# Check if the existing object has dependents. If so, cannot replace it
|
|
69
|
+
# unless if_exists='replace_force'.
|
|
70
|
+
has_dependents = existing_path._has_dependents
|
|
71
|
+
if if_exists == catalog.IfExistsParam.REPLACE and has_dependents:
|
|
72
|
+
raise excs.Error(f"{obj_type_str} `{path_str}` already exists and has dependents. Use `if_exists='replace_force'` to replace it.")
|
|
73
|
+
else:
|
|
74
|
+
assert if_exists == catalog.IfExistsParam.REPLACE_FORCE or not has_dependents
|
|
75
|
+
# Drop the existing path so it can be replaced.
|
|
76
|
+
# Any errors during drop will be raised.
|
|
77
|
+
_logger.info(f"Dropping {obj_type_str} `{path_str}` to replace it.")
|
|
78
|
+
if isinstance(existing_path, catalog.Dir):
|
|
79
|
+
drop_dir(path_str, force=True)
|
|
80
|
+
else:
|
|
81
|
+
drop_table(path_str, force=True)
|
|
82
|
+
assert cat.paths.get_object(path) is None
|
|
83
|
+
|
|
84
|
+
return None
|
|
28
85
|
|
|
29
86
|
def create_table(
|
|
30
87
|
path_str: str,
|
|
@@ -33,7 +90,8 @@ def create_table(
|
|
|
33
90
|
primary_key: Optional[Union[str, list[str]]] = None,
|
|
34
91
|
num_retained_versions: int = 10,
|
|
35
92
|
comment: str = '',
|
|
36
|
-
media_validation: Literal['on_read', 'on_write'] = 'on_write'
|
|
93
|
+
media_validation: Literal['on_read', 'on_write'] = 'on_write',
|
|
94
|
+
if_exists: Literal['error', 'ignore', 'replace', 'replace_force'] = 'error'
|
|
37
95
|
) -> catalog.Table:
|
|
38
96
|
"""Create a new base table.
|
|
39
97
|
|
|
@@ -46,14 +104,28 @@ def create_table(
|
|
|
46
104
|
num_retained_versions: Number of versions of the table to retain.
|
|
47
105
|
comment: An optional comment; its meaning is user-defined.
|
|
48
106
|
media_validation: Media validation policy for the table.
|
|
107
|
+
|
|
49
108
|
- `'on_read'`: validate media files at query time
|
|
50
109
|
- `'on_write'`: validate media files during insert/update operations
|
|
110
|
+
if_exists: Directive regarding how to handle if the path already exists.
|
|
111
|
+
Must be one of the following:
|
|
112
|
+
|
|
113
|
+
- `'error'`: raise an error
|
|
114
|
+
- `'ignore'`: do nothing and return the existing table handle
|
|
115
|
+
- `'replace'`: if the existing table has no views, drop and replace it with a new one
|
|
116
|
+
- `'replace_force'`: drop the existing table and all its views, and create a new one
|
|
51
117
|
|
|
52
118
|
Returns:
|
|
53
|
-
A handle to the newly created
|
|
119
|
+
A handle to the newly created table, or to an already existing table at the path when `if_exists='ignore'`.
|
|
120
|
+
Please note the schema of the existing table may not match the schema provided in the call.
|
|
54
121
|
|
|
55
122
|
Raises:
|
|
56
|
-
Error: if
|
|
123
|
+
Error: if
|
|
124
|
+
|
|
125
|
+
- the path is invalid, or
|
|
126
|
+
- the path already exists and `if_exists='error'`, or
|
|
127
|
+
- the path already exists and is not a table, or
|
|
128
|
+
- an error occurs while attempting to create the table.
|
|
57
129
|
|
|
58
130
|
Examples:
|
|
59
131
|
Create a table with an int and a string column:
|
|
@@ -65,10 +137,27 @@ def create_table(
|
|
|
65
137
|
|
|
66
138
|
>>> tbl1 = pxt.get_table('orig_table')
|
|
67
139
|
... tbl2 = pxt.create_table('new_table', tbl1.where(tbl1.col1 < 10).select(tbl1.col2))
|
|
140
|
+
|
|
141
|
+
Create a table if does not already exist, otherwise get the existing table:
|
|
142
|
+
|
|
143
|
+
>>> tbl = pxt.create_table('my_table', schema={'col1': pxt.Int, 'col2': pxt.String}, if_exists='ignore')
|
|
144
|
+
|
|
145
|
+
Create a table with an int and a float column, and replace any existing table:
|
|
146
|
+
|
|
147
|
+
>>> tbl = pxt.create_table('my_table', schema={'col1': pxt.Int, 'col2': pxt.Float}, if_exists='replace')
|
|
68
148
|
"""
|
|
69
149
|
path = catalog.Path(path_str)
|
|
70
|
-
Catalog.get()
|
|
71
|
-
|
|
150
|
+
cat = Catalog.get()
|
|
151
|
+
|
|
152
|
+
if cat.paths.get_object(path) is not None:
|
|
153
|
+
# The table already exists. Handle it as per user directive.
|
|
154
|
+
_if_exists = catalog.IfExistsParam.validated(if_exists, 'if_exists')
|
|
155
|
+
existing_table = _get_or_drop_existing_path(path_str, catalog.InsertableTable, False, _if_exists)
|
|
156
|
+
if existing_table is not None:
|
|
157
|
+
assert isinstance(existing_table, catalog.Table)
|
|
158
|
+
return existing_table
|
|
159
|
+
|
|
160
|
+
dir = cat.paths[path.parent]
|
|
72
161
|
|
|
73
162
|
df: Optional[DataFrame] = None
|
|
74
163
|
if isinstance(schema_or_df, dict):
|
|
@@ -95,7 +184,7 @@ def create_table(
|
|
|
95
184
|
tbl = catalog.InsertableTable._create(
|
|
96
185
|
dir._id, path.name, schema, df, primary_key=primary_key, num_retained_versions=num_retained_versions,
|
|
97
186
|
comment=comment, media_validation=catalog.MediaValidation.validated(media_validation, 'media_validation'))
|
|
98
|
-
|
|
187
|
+
cat.paths[path] = tbl
|
|
99
188
|
|
|
100
189
|
_logger.info(f'Created table `{path_str}`.')
|
|
101
190
|
return tbl
|
|
@@ -111,7 +200,7 @@ def create_view(
|
|
|
111
200
|
num_retained_versions: int = 10,
|
|
112
201
|
comment: str = '',
|
|
113
202
|
media_validation: Literal['on_read', 'on_write'] = 'on_write',
|
|
114
|
-
|
|
203
|
+
if_exists: Literal['error', 'ignore', 'replace', 'replace_force'] = 'error',
|
|
115
204
|
) -> Optional[catalog.Table]:
|
|
116
205
|
"""Create a view of an existing table object (which itself can be a view or a snapshot or a base table).
|
|
117
206
|
|
|
@@ -129,43 +218,82 @@ def create_view(
|
|
|
129
218
|
the base table.
|
|
130
219
|
num_retained_versions: Number of versions of the view to retain.
|
|
131
220
|
comment: Optional comment for the view.
|
|
132
|
-
|
|
221
|
+
media_validation: Media validation policy for the view.
|
|
222
|
+
|
|
223
|
+
- `'on_read'`: validate media files at query time
|
|
224
|
+
- `'on_write'`: validate media files during insert/update operations
|
|
225
|
+
if_exists: Directive regarding how to handle if the path already exists.
|
|
226
|
+
Must be one of the following:
|
|
227
|
+
|
|
228
|
+
- `'error'`: raise an error
|
|
229
|
+
- `'ignore'`: do nothing and return the existing view handle
|
|
230
|
+
- `'replace'`: if the existing view has no dependents, drop and replace it with a new one
|
|
231
|
+
- `'replace_force'`: drop the existing view and all its dependents, and create a new one
|
|
133
232
|
|
|
134
233
|
Returns:
|
|
135
234
|
A handle to the [`Table`][pixeltable.Table] representing the newly created view. If the path already
|
|
136
|
-
exists
|
|
235
|
+
exists and `if_exists='ignore'`, returns a handle to the existing view. Please note the schema
|
|
236
|
+
or the base of the existing view may not match those provided in the call.
|
|
137
237
|
|
|
138
238
|
Raises:
|
|
139
|
-
Error: if
|
|
239
|
+
Error: if
|
|
240
|
+
|
|
241
|
+
- the path is invalid, or
|
|
242
|
+
- the path already exists and `if_exists='error'`, or
|
|
243
|
+
- the path already exists and is not a view, or
|
|
244
|
+
- an error occurs while attempting to create the view.
|
|
140
245
|
|
|
141
246
|
Examples:
|
|
142
247
|
Create a view `my_view` of an existing table `my_table`, filtering on rows where `col1` is greater than 10:
|
|
143
248
|
|
|
144
249
|
>>> tbl = pxt.get_table('my_table')
|
|
145
250
|
... view = pxt.create_view('my_view', tbl.where(tbl.col1 > 10))
|
|
251
|
+
|
|
252
|
+
Create a view `my_view` of an existing table `my_table`, filtering on rows where `col1` is greater than 10,
|
|
253
|
+
and if it not already exist. Otherwise, get the existing view named `my_view`:
|
|
254
|
+
|
|
255
|
+
>>> tbl = pxt.get_table('my_table')
|
|
256
|
+
... view = pxt.create_view('my_view', tbl.where(tbl.col1 > 10), if_exists='ignore')
|
|
257
|
+
|
|
258
|
+
Create a view `my_view` of an existing table `my_table`, filtering on rows where `col1` is greater than 100,
|
|
259
|
+
and replace any existing view named `my_view`:
|
|
260
|
+
|
|
261
|
+
>>> tbl = pxt.get_table('my_table')
|
|
262
|
+
... view = pxt.create_view('my_view', tbl.where(tbl.col1 > 100), if_exists='replace_force')
|
|
146
263
|
"""
|
|
147
264
|
where: Optional[exprs.Expr] = None
|
|
148
265
|
if isinstance(base, catalog.Table):
|
|
149
266
|
tbl_version_path = base._tbl_version_path
|
|
150
267
|
elif isinstance(base, DataFrame):
|
|
151
268
|
base._validate_mutable('create_view')
|
|
152
|
-
|
|
269
|
+
if len(base._from_clause.tbls) > 1:
|
|
270
|
+
raise excs.Error('Cannot create a view of a join')
|
|
271
|
+
tbl_version_path = base._from_clause.tbls[0]
|
|
153
272
|
where = base.where_clause
|
|
154
273
|
else:
|
|
155
274
|
raise excs.Error('`base` must be an instance of `Table` or `DataFrame`')
|
|
156
275
|
assert isinstance(base, catalog.Table) or isinstance(base, DataFrame)
|
|
276
|
+
|
|
157
277
|
path = catalog.Path(path_str)
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
278
|
+
cat = Catalog.get()
|
|
279
|
+
|
|
280
|
+
if cat.paths.get_object(path) is not None:
|
|
281
|
+
# The view already exists. Handle it as per user directive.
|
|
282
|
+
_if_exists = catalog.IfExistsParam.validated(if_exists, 'if_exists')
|
|
283
|
+
existing_path = _get_or_drop_existing_path(path_str, catalog.View, is_snapshot, _if_exists)
|
|
284
|
+
if existing_path is not None:
|
|
285
|
+
assert isinstance(existing_path, catalog.View)
|
|
286
|
+
return existing_path
|
|
287
|
+
|
|
288
|
+
dir = cat.paths[path.parent]
|
|
166
289
|
|
|
167
290
|
if additional_columns is None:
|
|
168
291
|
additional_columns = {}
|
|
292
|
+
else:
|
|
293
|
+
# additional columns should not be in the base table
|
|
294
|
+
for col_name in additional_columns.keys():
|
|
295
|
+
if col_name in [c.name for c in tbl_version_path.columns()]:
|
|
296
|
+
raise excs.Error(f"Column {col_name!r} already exists in the base table {tbl_version_path.get_column(col_name).tbl.name}.")
|
|
169
297
|
if iterator is None:
|
|
170
298
|
iterator_class, iterator_args = None, None
|
|
171
299
|
else:
|
|
@@ -176,7 +304,7 @@ def create_view(
|
|
|
176
304
|
is_snapshot=is_snapshot, iterator_cls=iterator_class, iterator_args=iterator_args,
|
|
177
305
|
num_retained_versions=num_retained_versions, comment=comment,
|
|
178
306
|
media_validation=catalog.MediaValidation.validated(media_validation, 'media_validation'))
|
|
179
|
-
|
|
307
|
+
cat.paths[path] = view
|
|
180
308
|
_logger.info(f'Created view `{path_str}`.')
|
|
181
309
|
FileCache.get().emit_eviction_warnings()
|
|
182
310
|
return view
|
|
@@ -191,7 +319,7 @@ def create_snapshot(
|
|
|
191
319
|
num_retained_versions: int = 10,
|
|
192
320
|
comment: str = '',
|
|
193
321
|
media_validation: Literal['on_read', 'on_write'] = 'on_write',
|
|
194
|
-
|
|
322
|
+
if_exists: Literal['error', 'ignore', 'replace', 'replace_force'] = 'error',
|
|
195
323
|
) -> Optional[catalog.Table]:
|
|
196
324
|
"""Create a snapshot of an existing table object (which itself can be a view or a snapshot or a base table).
|
|
197
325
|
|
|
@@ -206,21 +334,47 @@ def create_snapshot(
|
|
|
206
334
|
iterator: The iterator to use for this snapshot. If specified, then this snapshot will be a one-to-many view of
|
|
207
335
|
the base table.
|
|
208
336
|
num_retained_versions: Number of versions of the view to retain.
|
|
209
|
-
comment: Optional comment for the
|
|
210
|
-
|
|
337
|
+
comment: Optional comment for the snapshot.
|
|
338
|
+
media_validation: Media validation policy for the snapshot.
|
|
339
|
+
|
|
340
|
+
- `'on_read'`: validate media files at query time
|
|
341
|
+
- `'on_write'`: validate media files during insert/update operations
|
|
342
|
+
if_exists: Directive regarding how to handle if the path already exists.
|
|
343
|
+
Must be one of the following:
|
|
344
|
+
|
|
345
|
+
- `'error'`: raise an error
|
|
346
|
+
- `'ignore'`: do nothing and return the existing snapshot handle
|
|
347
|
+
- `'replace'`: if the existing snapshot has no dependents, drop and replace it with a new one
|
|
348
|
+
- `'replace_force'`: drop the existing snapshot and all its dependents, and create a new one
|
|
211
349
|
|
|
212
350
|
Returns:
|
|
213
|
-
A handle to the [`Table`][pixeltable.Table] representing the newly created snapshot.
|
|
214
|
-
|
|
351
|
+
A handle to the [`Table`][pixeltable.Table] representing the newly created snapshot.
|
|
352
|
+
Please note the schema or base of the existing snapshot may not match those provided in the call.
|
|
215
353
|
|
|
216
354
|
Raises:
|
|
217
|
-
Error: if
|
|
355
|
+
Error: if
|
|
356
|
+
|
|
357
|
+
- the path is invalid, or
|
|
358
|
+
- the path already exists and `if_exists='error'`, or
|
|
359
|
+
- the path already exists and is not a snapshot, or
|
|
360
|
+
- an error occurs while attempting to create the snapshot.
|
|
218
361
|
|
|
219
362
|
Examples:
|
|
220
|
-
Create a snapshot of `my_table`:
|
|
363
|
+
Create a snapshot `my_snapshot` of a table `my_table`:
|
|
221
364
|
|
|
222
365
|
>>> tbl = pxt.get_table('my_table')
|
|
223
366
|
... snapshot = pxt.create_snapshot('my_snapshot', tbl)
|
|
367
|
+
|
|
368
|
+
Create a snapshot `my_snapshot` of a view `my_view` with additional int column `col3`,
|
|
369
|
+
if `my_snapshot` does not already exist:
|
|
370
|
+
|
|
371
|
+
>>> view = pxt.get_table('my_view')
|
|
372
|
+
... snapshot = pxt.create_snapshot('my_snapshot', view, additional_columns={'col3': pxt.Int}, if_exists='ignore')
|
|
373
|
+
|
|
374
|
+
Create a snapshot `my_snapshot` on a table `my_table`, and replace any existing snapshot named `my_snapshot`:
|
|
375
|
+
|
|
376
|
+
>>> tbl = pxt.get_table('my_table')
|
|
377
|
+
... snapshot = pxt.create_snapshot('my_snapshot', tbl, if_exists='replace_force')
|
|
224
378
|
"""
|
|
225
379
|
return create_view(
|
|
226
380
|
path_str,
|
|
@@ -231,7 +385,7 @@ def create_snapshot(
|
|
|
231
385
|
num_retained_versions=num_retained_versions,
|
|
232
386
|
comment=comment,
|
|
233
387
|
media_validation=media_validation,
|
|
234
|
-
|
|
388
|
+
if_exists=if_exists,
|
|
235
389
|
)
|
|
236
390
|
|
|
237
391
|
|
|
@@ -296,16 +450,26 @@ def move(path: str, new_path: str) -> None:
|
|
|
296
450
|
obj._move(new_p.name, new_dir._id)
|
|
297
451
|
|
|
298
452
|
|
|
299
|
-
def drop_table(table: Union[str, catalog.Table], force: bool = False,
|
|
453
|
+
def drop_table(table: Union[str, catalog.Table], force: bool = False,
|
|
454
|
+
if_not_exists: Literal['error', 'ignore'] = 'error') -> None:
|
|
300
455
|
"""Drop a table, view, or snapshot.
|
|
301
456
|
|
|
302
457
|
Args:
|
|
303
458
|
table: Fully qualified name, or handle, of the table to be dropped.
|
|
304
459
|
force: If `True`, will also drop all views and sub-views of this table.
|
|
305
|
-
|
|
460
|
+
if_not_exists: Directive regarding how to handle if the path does not exist.
|
|
461
|
+
Must be one of the following:
|
|
462
|
+
|
|
463
|
+
- `'error'`: raise an error
|
|
464
|
+
- `'ignore'`: do nothing and return
|
|
306
465
|
|
|
307
466
|
Raises:
|
|
308
|
-
Error:
|
|
467
|
+
Error: if the qualified name
|
|
468
|
+
|
|
469
|
+
- is invalid, or
|
|
470
|
+
- does not exist and `if_not_exists='error'`, or
|
|
471
|
+
- does not designate a table object, or
|
|
472
|
+
- designates a table object but has dependents and `force=False`.
|
|
309
473
|
|
|
310
474
|
Examples:
|
|
311
475
|
Drop a table by its fully qualified name:
|
|
@@ -315,19 +479,25 @@ def drop_table(table: Union[str, catalog.Table], force: bool = False, ignore_err
|
|
|
315
479
|
>>> t = pxt.get_table('subdir.my_table')
|
|
316
480
|
... pxt.drop_table(t)
|
|
317
481
|
|
|
482
|
+
Drop a table if it exists, otherwise do nothing:
|
|
483
|
+
>>> pxt.drop_table('subdir.my_table', if_not_exists='ignore')
|
|
484
|
+
|
|
485
|
+
Drop a table and all its dependents:
|
|
486
|
+
>>> pxt.drop_table('subdir.my_table', force=True)
|
|
318
487
|
"""
|
|
319
488
|
cat = Catalog.get()
|
|
320
489
|
if isinstance(table, str):
|
|
321
490
|
tbl_path_obj = catalog.Path(table)
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
if
|
|
491
|
+
tbl = cat.paths.get_object(tbl_path_obj)
|
|
492
|
+
if tbl is None:
|
|
493
|
+
_if_not_exists = catalog.IfNotExistsParam.validated(if_not_exists, 'if_not_exists')
|
|
494
|
+
if _if_not_exists == catalog.IfNotExistsParam.IGNORE or force:
|
|
326
495
|
_logger.info(f'Skipped table `{table}` (does not exist).')
|
|
327
496
|
return
|
|
328
497
|
else:
|
|
329
|
-
raise
|
|
330
|
-
tbl
|
|
498
|
+
raise excs.Error(f'Table `{table}` does not exist.')
|
|
499
|
+
if not isinstance(tbl, catalog.Table):
|
|
500
|
+
raise excs.Error(f'{tbl} needs to be a {catalog.Table._display_name()} but is a {type(tbl)._display_name()}')
|
|
331
501
|
else:
|
|
332
502
|
tbl = table
|
|
333
503
|
tbl_path_obj = catalog.Path(tbl._path)
|
|
@@ -373,16 +543,30 @@ def list_tables(dir_path: str = '', recursive: bool = True) -> list[str]:
|
|
|
373
543
|
Catalog.get().paths.check_is_valid(path, expected=catalog.Dir)
|
|
374
544
|
return [str(p) for p in Catalog.get().paths.get_children(path, child_type=catalog.Table, recursive=recursive)]
|
|
375
545
|
|
|
376
|
-
|
|
377
|
-
def create_dir(path_str: str, ignore_errors: bool = False) -> Optional[catalog.Dir]:
|
|
546
|
+
def create_dir(path_str: str, if_exists: Literal['error', 'ignore', 'replace', 'replace_force'] = 'error') -> Optional[catalog.Dir]:
|
|
378
547
|
"""Create a directory.
|
|
379
548
|
|
|
380
549
|
Args:
|
|
381
550
|
path_str: Path to the directory.
|
|
382
|
-
|
|
551
|
+
if_exists: Directive regarding how to handle if the path already exists.
|
|
552
|
+
Must be one of the following:
|
|
553
|
+
|
|
554
|
+
- `'error'`: raise an error
|
|
555
|
+
- `'ignore'`: do nothing and return the existing directory handle
|
|
556
|
+
- `'replace'`: if the existing directory is empty, drop it and create a new one
|
|
557
|
+
- `'replace_force'`: drop the existing directory and all its children, and create a new one
|
|
558
|
+
|
|
559
|
+
Returns:
|
|
560
|
+
A handle to the newly created directory, or to an already existing directory at the path when `if_exists='ignore'`.
|
|
561
|
+
Please note the existing directory may not be empty.
|
|
383
562
|
|
|
384
563
|
Raises:
|
|
385
|
-
Error: If
|
|
564
|
+
Error: If
|
|
565
|
+
|
|
566
|
+
- the path is invalid, or
|
|
567
|
+
- the path already exists and `if_exists='error'`, or
|
|
568
|
+
- the path already exists and is not a directory, or
|
|
569
|
+
- an error occurs while attempting to create the directory.
|
|
386
570
|
|
|
387
571
|
Examples:
|
|
388
572
|
>>> pxt.create_dir('my_dir')
|
|
@@ -390,63 +574,93 @@ def create_dir(path_str: str, ignore_errors: bool = False) -> Optional[catalog.D
|
|
|
390
574
|
Create a subdirectory:
|
|
391
575
|
|
|
392
576
|
>>> pxt.create_dir('my_dir.sub_dir')
|
|
393
|
-
"""
|
|
394
|
-
try:
|
|
395
|
-
path = catalog.Path(path_str)
|
|
396
|
-
Catalog.get().paths.check_is_valid(path, expected=None)
|
|
397
|
-
parent = Catalog.get().paths[path.parent]
|
|
398
|
-
assert parent is not None
|
|
399
|
-
with orm.Session(Env.get().engine, future=True) as session:
|
|
400
|
-
dir_md = schema.DirMd(name=path.name)
|
|
401
|
-
dir_record = schema.Dir(parent_id=parent._id, md=dataclasses.asdict(dir_md))
|
|
402
|
-
session.add(dir_record)
|
|
403
|
-
session.flush()
|
|
404
|
-
assert dir_record.id is not None
|
|
405
|
-
assert isinstance(dir_record.id, UUID)
|
|
406
|
-
dir = catalog.Dir(dir_record.id, parent._id, path.name)
|
|
407
|
-
Catalog.get().paths[path] = dir
|
|
408
|
-
session.commit()
|
|
409
|
-
_logger.info(f'Created directory `{path_str}`.')
|
|
410
|
-
print(f'Created directory `{path_str}`.')
|
|
411
|
-
return dir
|
|
412
|
-
except excs.Error as e:
|
|
413
|
-
if ignore_errors:
|
|
414
|
-
return None
|
|
415
|
-
else:
|
|
416
|
-
raise e
|
|
417
577
|
|
|
578
|
+
Create a subdirectory only if it does not already exist, otherwise do nothing:
|
|
579
|
+
|
|
580
|
+
>>> pxt.create_dir('my_dir.sub_dir', if_exists='ignore')
|
|
418
581
|
|
|
419
|
-
|
|
582
|
+
Create a directory and replace if it already exists:
|
|
583
|
+
|
|
584
|
+
>>> pxt.create_dir('my_dir', if_exists='replace_force')
|
|
585
|
+
"""
|
|
586
|
+
path = catalog.Path(path_str)
|
|
587
|
+
cat = Catalog.get()
|
|
588
|
+
|
|
589
|
+
if cat.paths.get_object(path):
|
|
590
|
+
# The directory already exists. Handle it as per user directive.
|
|
591
|
+
_if_exists = catalog.IfExistsParam.validated(if_exists, 'if_exists')
|
|
592
|
+
existing_path = _get_or_drop_existing_path(path_str, catalog.Dir, False, _if_exists)
|
|
593
|
+
if existing_path is not None:
|
|
594
|
+
assert isinstance(existing_path, catalog.Dir)
|
|
595
|
+
return existing_path
|
|
596
|
+
|
|
597
|
+
parent = cat.paths[path.parent]
|
|
598
|
+
assert parent is not None
|
|
599
|
+
with orm.Session(Env.get().engine, future=True) as session:
|
|
600
|
+
dir_md = schema.DirMd(name=path.name)
|
|
601
|
+
dir_record = schema.Dir(parent_id=parent._id, md=dataclasses.asdict(dir_md))
|
|
602
|
+
session.add(dir_record)
|
|
603
|
+
session.flush()
|
|
604
|
+
assert dir_record.id is not None
|
|
605
|
+
assert isinstance(dir_record.id, UUID)
|
|
606
|
+
dir = catalog.Dir(dir_record.id, parent._id, path.name)
|
|
607
|
+
cat.paths[path] = dir
|
|
608
|
+
session.commit()
|
|
609
|
+
_logger.info(f'Created directory `{path_str}`.')
|
|
610
|
+
print(f'Created directory `{path_str}`.')
|
|
611
|
+
return dir
|
|
612
|
+
|
|
613
|
+
def drop_dir(path_str: str, force: bool = False, if_not_exists: Literal['error', 'ignore'] = 'error') -> None:
|
|
420
614
|
"""Remove a directory.
|
|
421
615
|
|
|
422
616
|
Args:
|
|
423
617
|
path_str: Name or path of the directory.
|
|
424
618
|
force: If `True`, will also drop all tables and subdirectories of this directory, recursively, along
|
|
425
619
|
with any views or snapshots that depend on any of the dropped tables.
|
|
426
|
-
|
|
427
|
-
|
|
620
|
+
if_not_exists: Directive regarding how to handle if the path does not exist.
|
|
621
|
+
Must be one of the following:
|
|
622
|
+
|
|
623
|
+
- `'error'`: raise an error
|
|
624
|
+
- `'ignore'`: do nothing and return
|
|
428
625
|
|
|
429
626
|
Raises:
|
|
430
|
-
Error: If the path
|
|
627
|
+
Error: If the path
|
|
628
|
+
|
|
629
|
+
- is invalid, or
|
|
630
|
+
- does not exist and `if_not_exists='error'`, or
|
|
631
|
+
- is not designate a directory, or
|
|
632
|
+
- is a direcotory but is not empty and `force=False`.
|
|
431
633
|
|
|
432
634
|
Examples:
|
|
635
|
+
Remove a directory, if it exists and is empty:
|
|
433
636
|
>>> pxt.drop_dir('my_dir')
|
|
434
637
|
|
|
435
638
|
Remove a subdirectory:
|
|
436
639
|
|
|
437
640
|
>>> pxt.drop_dir('my_dir.sub_dir')
|
|
641
|
+
|
|
642
|
+
Remove an existing directory if it is empty, but do nothing if it does not exist:
|
|
643
|
+
|
|
644
|
+
>>> pxt.drop_dir('my_dir.sub_dir', if_not_exists='ignore')
|
|
645
|
+
|
|
646
|
+
Remove an existing directory and all its contents:
|
|
647
|
+
|
|
648
|
+
>>> pxt.drop_dir('my_dir', force=True)
|
|
438
649
|
"""
|
|
439
650
|
cat = Catalog.get()
|
|
440
651
|
path = catalog.Path(path_str)
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
_logger.info(f'Skipped directory `{path}` (does not exist).')
|
|
652
|
+
obj = cat.paths.get_object(path)
|
|
653
|
+
if obj is None:
|
|
654
|
+
_if_not_exists = catalog.IfNotExistsParam.validated(if_not_exists, 'if_not_exists')
|
|
655
|
+
if _if_not_exists == catalog.IfNotExistsParam.IGNORE or force:
|
|
656
|
+
_logger.info(f'Skipped directory `{path_str}` (does not exist).')
|
|
447
657
|
return
|
|
448
658
|
else:
|
|
449
|
-
raise
|
|
659
|
+
raise excs.Error(f'Directory `{path_str}` does not exist.')
|
|
660
|
+
|
|
661
|
+
if not isinstance(obj, catalog.Dir):
|
|
662
|
+
raise excs.Error(
|
|
663
|
+
f'{str(path)} needs to be a {catalog.Dir._display_name()} but is a {type(obj)._display_name()}')
|
|
450
664
|
|
|
451
665
|
children = cat.paths.get_children(path, child_type=None, recursive=True)
|
|
452
666
|
|
|
@@ -507,7 +721,7 @@ def list_functions() -> Styler:
|
|
|
507
721
|
paths = ['.'.join(f.self_path.split('.')[:-1]) for f in functions]
|
|
508
722
|
names = [f.name for f in functions]
|
|
509
723
|
params = [
|
|
510
|
-
', '.join([param_name + ': ' + str(param_type) for param_name, param_type in f.
|
|
724
|
+
', '.join([param_name + ': ' + str(param_type) for param_name, param_type in f.signatures[0].parameters.items()])
|
|
511
725
|
for f in functions
|
|
512
726
|
]
|
|
513
727
|
pd_df = pd.DataFrame(
|
|
@@ -515,7 +729,7 @@ def list_functions() -> Styler:
|
|
|
515
729
|
'Path': paths,
|
|
516
730
|
'Function Name': names,
|
|
517
731
|
'Parameters': params,
|
|
518
|
-
'Return Type': [str(f.
|
|
732
|
+
'Return Type': [str(f.signatures[0].get_return_type()) for f in functions],
|
|
519
733
|
}
|
|
520
734
|
)
|
|
521
735
|
pd_df = pd_df.style.set_properties(None, **{'text-align': 'left'}).set_table_styles(
|
|
@@ -524,6 +738,66 @@ def list_functions() -> Styler:
|
|
|
524
738
|
return pd_df.hide(axis='index')
|
|
525
739
|
|
|
526
740
|
|
|
741
|
+
def tools(*args: Union[func.Function, func.tools.Tool]) -> func.tools.Tools:
|
|
742
|
+
"""
|
|
743
|
+
Specifies a collection of UDFs to be used as LLM tools. Pixeltable allows any UDF to be used as an input into an
|
|
744
|
+
LLM tool-calling API. To use one or more UDFs as tools, wrap them in a `pxt.tools` call and pass the return value
|
|
745
|
+
to an LLM API.
|
|
746
|
+
|
|
747
|
+
The UDFs can be specified directly or wrapped inside a [pxt.tool()][pixeltable.tool] invocation. If a UDF is
|
|
748
|
+
specified directly, the tool name will be the (unqualified) UDF name, and the tool description will consist of the
|
|
749
|
+
entire contents of the UDF docstring. If a UDF is wrapped in a `pxt.tool()` invocation, then the name and/or
|
|
750
|
+
description may be customized.
|
|
751
|
+
|
|
752
|
+
Args:
|
|
753
|
+
args: The UDFs to use as tools.
|
|
754
|
+
|
|
755
|
+
Returns:
|
|
756
|
+
A `Tools` instance that can be passed to an LLM tool-calling API or invoked to generate tool results.
|
|
757
|
+
|
|
758
|
+
Examples:
|
|
759
|
+
Create a tools instance with a single UDF:
|
|
760
|
+
|
|
761
|
+
>>> tools = pxt.tools(stock_price)
|
|
762
|
+
|
|
763
|
+
Create a tools instance with several UDFs:
|
|
764
|
+
|
|
765
|
+
>>> tools = pxt.tools(stock_price, weather_quote)
|
|
766
|
+
|
|
767
|
+
Create a tools instance, some of whose UDFs have customized metadata:
|
|
768
|
+
|
|
769
|
+
>>> tools = pxt.tools(
|
|
770
|
+
... stock_price,
|
|
771
|
+
... pxt.tool(weather_quote, description='Returns information about the weather in a particular location.'),
|
|
772
|
+
... pxt.tool(traffic_quote, name='traffic_conditions'),
|
|
773
|
+
... )
|
|
774
|
+
"""
|
|
775
|
+
return func.tools.Tools(tools=[
|
|
776
|
+
arg if isinstance(arg, func.tools.Tool) else tool(arg)
|
|
777
|
+
for arg in args
|
|
778
|
+
])
|
|
779
|
+
|
|
780
|
+
|
|
781
|
+
def tool(fn: func.Function, name: Optional[str] = None, description: Optional[str] = None) -> func.tools.Tool:
|
|
782
|
+
"""
|
|
783
|
+
Specifies a Pixeltable UDF to be used as an LLM tool with customizable metadata. See the documentation for
|
|
784
|
+
[pxt.tools()][pixeltable.tools] for more details.
|
|
785
|
+
|
|
786
|
+
Args:
|
|
787
|
+
fn: The UDF to use as a tool.
|
|
788
|
+
name: The name of the tool. If not specified, then the unqualified name of the UDF will be used by default.
|
|
789
|
+
description: The description of the tool. If not specified, then the entire contents of the UDF docstring
|
|
790
|
+
will be used by default.
|
|
791
|
+
|
|
792
|
+
Returns:
|
|
793
|
+
A `Tool` instance that can be passed to an LLM tool-calling API.
|
|
794
|
+
"""
|
|
795
|
+
if isinstance(fn, func.AggregateFunction):
|
|
796
|
+
raise excs.Error('Aggregator UDFs cannot be used as tools')
|
|
797
|
+
|
|
798
|
+
return func.tools.Tool(fn=fn, name=name, description=description)
|
|
799
|
+
|
|
800
|
+
|
|
527
801
|
def configure_logging(
|
|
528
802
|
*,
|
|
529
803
|
to_stdout: Optional[bool] = None,
|