pixeltable 0.2.28__py3-none-any.whl → 0.2.30__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +1 -1
- pixeltable/__version__.py +2 -2
- pixeltable/catalog/__init__.py +1 -1
- pixeltable/catalog/dir.py +6 -0
- pixeltable/catalog/globals.py +25 -0
- pixeltable/catalog/named_function.py +4 -0
- pixeltable/catalog/path_dict.py +37 -11
- pixeltable/catalog/schema_object.py +6 -0
- pixeltable/catalog/table.py +96 -19
- pixeltable/catalog/table_version.py +22 -8
- pixeltable/dataframe.py +201 -3
- pixeltable/env.py +9 -3
- pixeltable/exec/expr_eval_node.py +1 -1
- pixeltable/exec/sql_node.py +2 -2
- pixeltable/exprs/function_call.py +134 -29
- pixeltable/exprs/inline_expr.py +22 -2
- pixeltable/exprs/row_builder.py +1 -1
- pixeltable/exprs/similarity_expr.py +9 -2
- pixeltable/func/__init__.py +1 -0
- pixeltable/func/aggregate_function.py +151 -68
- pixeltable/func/callable_function.py +50 -16
- pixeltable/func/expr_template_function.py +62 -24
- pixeltable/func/function.py +191 -23
- pixeltable/func/function_registry.py +2 -1
- pixeltable/func/query_template_function.py +11 -6
- pixeltable/func/signature.py +64 -7
- pixeltable/func/tools.py +116 -0
- pixeltable/func/udf.py +57 -35
- pixeltable/functions/__init__.py +2 -2
- pixeltable/functions/anthropic.py +36 -2
- pixeltable/functions/globals.py +54 -34
- pixeltable/functions/json.py +3 -8
- pixeltable/functions/math.py +67 -0
- pixeltable/functions/ollama.py +4 -4
- pixeltable/functions/openai.py +31 -2
- pixeltable/functions/timestamp.py +1 -1
- pixeltable/functions/video.py +2 -8
- pixeltable/functions/vision.py +1 -1
- pixeltable/globals.py +347 -79
- pixeltable/index/embedding_index.py +44 -24
- pixeltable/metadata/__init__.py +1 -1
- pixeltable/metadata/converters/convert_16.py +2 -1
- pixeltable/metadata/converters/convert_17.py +2 -1
- pixeltable/metadata/converters/convert_23.py +35 -0
- pixeltable/metadata/converters/convert_24.py +47 -0
- pixeltable/metadata/converters/util.py +4 -2
- pixeltable/metadata/notes.py +2 -0
- pixeltable/metadata/schema.py +1 -0
- pixeltable/type_system.py +192 -48
- {pixeltable-0.2.28.dist-info → pixeltable-0.2.30.dist-info}/METADATA +4 -2
- {pixeltable-0.2.28.dist-info → pixeltable-0.2.30.dist-info}/RECORD +54 -57
- pixeltable-0.2.30.dist-info/entry_points.txt +3 -0
- pixeltable/tool/create_test_db_dump.py +0 -311
- pixeltable/tool/create_test_video.py +0 -81
- pixeltable/tool/doc_plugins/griffe.py +0 -50
- pixeltable/tool/doc_plugins/mkdocstrings.py +0 -6
- pixeltable/tool/doc_plugins/templates/material/udf.html.jinja +0 -135
- pixeltable/tool/embed_udf.py +0 -9
- pixeltable/tool/mypy_plugin.py +0 -55
- pixeltable-0.2.28.dist-info/entry_points.txt +0 -3
- {pixeltable-0.2.28.dist-info → pixeltable-0.2.30.dist-info}/LICENSE +0 -0
- {pixeltable-0.2.28.dist-info → pixeltable-0.2.30.dist-info}/WHEEL +0 -0
pixeltable/globals.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import dataclasses
|
|
2
2
|
import logging
|
|
3
|
-
from typing import Any, Iterable, Optional, Union, Literal
|
|
3
|
+
from typing import Any, Iterable, Optional, Union, Literal, Type
|
|
4
4
|
from uuid import UUID
|
|
5
5
|
|
|
6
6
|
import pandas as pd
|
|
@@ -20,11 +20,68 @@ from pixeltable.utils.filecache import FileCache
|
|
|
20
20
|
|
|
21
21
|
_logger = logging.getLogger('pixeltable')
|
|
22
22
|
|
|
23
|
-
|
|
24
23
|
def init() -> None:
|
|
25
24
|
"""Initializes the Pixeltable environment."""
|
|
26
25
|
_ = Catalog.get()
|
|
27
26
|
|
|
27
|
+
def _get_or_drop_existing_path(
|
|
28
|
+
path_str: str,
|
|
29
|
+
expected_obj_type: Type[catalog.SchemaObject],
|
|
30
|
+
expected_snapshot: bool,
|
|
31
|
+
if_exists: catalog.IfExistsParam
|
|
32
|
+
) -> Optional[catalog.SchemaObject]:
|
|
33
|
+
"""Handle schema object path collision during creation according to the if_exists parameter.
|
|
34
|
+
|
|
35
|
+
Args:
|
|
36
|
+
path_str: An existing and valid path to the dir, table, view, or snapshot.
|
|
37
|
+
expected_obj_type: Whether the caller of this function is creating a dir, table, or view at the existing path.
|
|
38
|
+
expected_snapshot: Whether the caller of this function is creating a snapshot at the existing path.
|
|
39
|
+
if_exists: Directive regarding how to handle the existing path.
|
|
40
|
+
|
|
41
|
+
Returns:
|
|
42
|
+
A handle to the existing dir, table, view, or snapshot, if `if_exists='ignore'`, otherwise `None`.
|
|
43
|
+
|
|
44
|
+
Raises:
|
|
45
|
+
Error: If the existing path is not of the expected type, or if the existing path has dependents and
|
|
46
|
+
`if_exists='replace'` or `if_exists='replace_force'`.
|
|
47
|
+
"""
|
|
48
|
+
cat = Catalog.get()
|
|
49
|
+
path = catalog.Path(path_str)
|
|
50
|
+
assert cat.paths.get_object(path) is not None
|
|
51
|
+
|
|
52
|
+
if if_exists == catalog.IfExistsParam.ERROR:
|
|
53
|
+
raise excs.Error(f'Path `{path_str}` already exists.')
|
|
54
|
+
|
|
55
|
+
existing_path = cat.paths[path]
|
|
56
|
+
existing_path_is_snapshot = 'is_snapshot' in existing_path.get_metadata() and existing_path.get_metadata()['is_snapshot']
|
|
57
|
+
obj_type_str = 'Snapshot' if expected_snapshot else expected_obj_type._display_name().capitalize()
|
|
58
|
+
# Check if the existing path is of expected type.
|
|
59
|
+
if (not isinstance(existing_path, expected_obj_type)
|
|
60
|
+
or (expected_snapshot and not existing_path_is_snapshot)):
|
|
61
|
+
raise excs.Error(f'Path `{path_str}` already exists but is not a {obj_type_str}. Cannot {if_exists.name.lower()} it.')
|
|
62
|
+
|
|
63
|
+
# if_exists='ignore' return the handle to the existing object.
|
|
64
|
+
assert isinstance(existing_path, expected_obj_type)
|
|
65
|
+
if if_exists == catalog.IfExistsParam.IGNORE:
|
|
66
|
+
return existing_path
|
|
67
|
+
|
|
68
|
+
# Check if the existing object has dependents. If so, cannot replace it
|
|
69
|
+
# unless if_exists='replace_force'.
|
|
70
|
+
has_dependents = existing_path._has_dependents
|
|
71
|
+
if if_exists == catalog.IfExistsParam.REPLACE and has_dependents:
|
|
72
|
+
raise excs.Error(f"{obj_type_str} `{path_str}` already exists and has dependents. Use `if_exists='replace_force'` to replace it.")
|
|
73
|
+
else:
|
|
74
|
+
assert if_exists == catalog.IfExistsParam.REPLACE_FORCE or not has_dependents
|
|
75
|
+
# Drop the existing path so it can be replaced.
|
|
76
|
+
# Any errors during drop will be raised.
|
|
77
|
+
_logger.info(f"Dropping {obj_type_str} `{path_str}` to replace it.")
|
|
78
|
+
if isinstance(existing_path, catalog.Dir):
|
|
79
|
+
drop_dir(path_str, force=True)
|
|
80
|
+
else:
|
|
81
|
+
drop_table(path_str, force=True)
|
|
82
|
+
assert cat.paths.get_object(path) is None
|
|
83
|
+
|
|
84
|
+
return None
|
|
28
85
|
|
|
29
86
|
def create_table(
|
|
30
87
|
path_str: str,
|
|
@@ -33,7 +90,8 @@ def create_table(
|
|
|
33
90
|
primary_key: Optional[Union[str, list[str]]] = None,
|
|
34
91
|
num_retained_versions: int = 10,
|
|
35
92
|
comment: str = '',
|
|
36
|
-
media_validation: Literal['on_read', 'on_write'] = 'on_write'
|
|
93
|
+
media_validation: Literal['on_read', 'on_write'] = 'on_write',
|
|
94
|
+
if_exists: Literal['error', 'ignore', 'replace', 'replace_force'] = 'error'
|
|
37
95
|
) -> catalog.Table:
|
|
38
96
|
"""Create a new base table.
|
|
39
97
|
|
|
@@ -49,12 +107,25 @@ def create_table(
|
|
|
49
107
|
|
|
50
108
|
- `'on_read'`: validate media files at query time
|
|
51
109
|
- `'on_write'`: validate media files during insert/update operations
|
|
110
|
+
if_exists: Directive regarding how to handle if the path already exists.
|
|
111
|
+
Must be one of the following:
|
|
112
|
+
|
|
113
|
+
- `'error'`: raise an error
|
|
114
|
+
- `'ignore'`: do nothing and return the existing table handle
|
|
115
|
+
- `'replace'`: if the existing table has no views, drop and replace it with a new one
|
|
116
|
+
- `'replace_force'`: drop the existing table and all its views, and create a new one
|
|
52
117
|
|
|
53
118
|
Returns:
|
|
54
|
-
A handle to the newly created
|
|
119
|
+
A handle to the newly created table, or to an already existing table at the path when `if_exists='ignore'`.
|
|
120
|
+
Please note the schema of the existing table may not match the schema provided in the call.
|
|
55
121
|
|
|
56
122
|
Raises:
|
|
57
|
-
Error: if
|
|
123
|
+
Error: if
|
|
124
|
+
|
|
125
|
+
- the path is invalid, or
|
|
126
|
+
- the path already exists and `if_exists='error'`, or
|
|
127
|
+
- the path already exists and is not a table, or
|
|
128
|
+
- an error occurs while attempting to create the table.
|
|
58
129
|
|
|
59
130
|
Examples:
|
|
60
131
|
Create a table with an int and a string column:
|
|
@@ -66,10 +137,27 @@ def create_table(
|
|
|
66
137
|
|
|
67
138
|
>>> tbl1 = pxt.get_table('orig_table')
|
|
68
139
|
... tbl2 = pxt.create_table('new_table', tbl1.where(tbl1.col1 < 10).select(tbl1.col2))
|
|
140
|
+
|
|
141
|
+
Create a table if does not already exist, otherwise get the existing table:
|
|
142
|
+
|
|
143
|
+
>>> tbl = pxt.create_table('my_table', schema={'col1': pxt.Int, 'col2': pxt.String}, if_exists='ignore')
|
|
144
|
+
|
|
145
|
+
Create a table with an int and a float column, and replace any existing table:
|
|
146
|
+
|
|
147
|
+
>>> tbl = pxt.create_table('my_table', schema={'col1': pxt.Int, 'col2': pxt.Float}, if_exists='replace')
|
|
69
148
|
"""
|
|
70
149
|
path = catalog.Path(path_str)
|
|
71
|
-
Catalog.get()
|
|
72
|
-
|
|
150
|
+
cat = Catalog.get()
|
|
151
|
+
|
|
152
|
+
if cat.paths.get_object(path) is not None:
|
|
153
|
+
# The table already exists. Handle it as per user directive.
|
|
154
|
+
_if_exists = catalog.IfExistsParam.validated(if_exists, 'if_exists')
|
|
155
|
+
existing_table = _get_or_drop_existing_path(path_str, catalog.InsertableTable, False, _if_exists)
|
|
156
|
+
if existing_table is not None:
|
|
157
|
+
assert isinstance(existing_table, catalog.Table)
|
|
158
|
+
return existing_table
|
|
159
|
+
|
|
160
|
+
dir = cat.paths[path.parent]
|
|
73
161
|
|
|
74
162
|
df: Optional[DataFrame] = None
|
|
75
163
|
if isinstance(schema_or_df, dict):
|
|
@@ -96,7 +184,7 @@ def create_table(
|
|
|
96
184
|
tbl = catalog.InsertableTable._create(
|
|
97
185
|
dir._id, path.name, schema, df, primary_key=primary_key, num_retained_versions=num_retained_versions,
|
|
98
186
|
comment=comment, media_validation=catalog.MediaValidation.validated(media_validation, 'media_validation'))
|
|
99
|
-
|
|
187
|
+
cat.paths[path] = tbl
|
|
100
188
|
|
|
101
189
|
_logger.info(f'Created table `{path_str}`.')
|
|
102
190
|
return tbl
|
|
@@ -112,7 +200,7 @@ def create_view(
|
|
|
112
200
|
num_retained_versions: int = 10,
|
|
113
201
|
comment: str = '',
|
|
114
202
|
media_validation: Literal['on_read', 'on_write'] = 'on_write',
|
|
115
|
-
|
|
203
|
+
if_exists: Literal['error', 'ignore', 'replace', 'replace_force'] = 'error',
|
|
116
204
|
) -> Optional[catalog.Table]:
|
|
117
205
|
"""Create a view of an existing table object (which itself can be a view or a snapshot or a base table).
|
|
118
206
|
|
|
@@ -130,20 +218,48 @@ def create_view(
|
|
|
130
218
|
the base table.
|
|
131
219
|
num_retained_versions: Number of versions of the view to retain.
|
|
132
220
|
comment: Optional comment for the view.
|
|
133
|
-
|
|
221
|
+
media_validation: Media validation policy for the view.
|
|
222
|
+
|
|
223
|
+
- `'on_read'`: validate media files at query time
|
|
224
|
+
- `'on_write'`: validate media files during insert/update operations
|
|
225
|
+
if_exists: Directive regarding how to handle if the path already exists.
|
|
226
|
+
Must be one of the following:
|
|
227
|
+
|
|
228
|
+
- `'error'`: raise an error
|
|
229
|
+
- `'ignore'`: do nothing and return the existing view handle
|
|
230
|
+
- `'replace'`: if the existing view has no dependents, drop and replace it with a new one
|
|
231
|
+
- `'replace_force'`: drop the existing view and all its dependents, and create a new one
|
|
134
232
|
|
|
135
233
|
Returns:
|
|
136
234
|
A handle to the [`Table`][pixeltable.Table] representing the newly created view. If the path already
|
|
137
|
-
exists
|
|
235
|
+
exists and `if_exists='ignore'`, returns a handle to the existing view. Please note the schema
|
|
236
|
+
or the base of the existing view may not match those provided in the call.
|
|
138
237
|
|
|
139
238
|
Raises:
|
|
140
|
-
Error: if
|
|
239
|
+
Error: if
|
|
240
|
+
|
|
241
|
+
- the path is invalid, or
|
|
242
|
+
- the path already exists and `if_exists='error'`, or
|
|
243
|
+
- the path already exists and is not a view, or
|
|
244
|
+
- an error occurs while attempting to create the view.
|
|
141
245
|
|
|
142
246
|
Examples:
|
|
143
247
|
Create a view `my_view` of an existing table `my_table`, filtering on rows where `col1` is greater than 10:
|
|
144
248
|
|
|
145
249
|
>>> tbl = pxt.get_table('my_table')
|
|
146
250
|
... view = pxt.create_view('my_view', tbl.where(tbl.col1 > 10))
|
|
251
|
+
|
|
252
|
+
Create a view `my_view` of an existing table `my_table`, filtering on rows where `col1` is greater than 10,
|
|
253
|
+
and if it not already exist. Otherwise, get the existing view named `my_view`:
|
|
254
|
+
|
|
255
|
+
>>> tbl = pxt.get_table('my_table')
|
|
256
|
+
... view = pxt.create_view('my_view', tbl.where(tbl.col1 > 10), if_exists='ignore')
|
|
257
|
+
|
|
258
|
+
Create a view `my_view` of an existing table `my_table`, filtering on rows where `col1` is greater than 100,
|
|
259
|
+
and replace any existing view named `my_view`:
|
|
260
|
+
|
|
261
|
+
>>> tbl = pxt.get_table('my_table')
|
|
262
|
+
... view = pxt.create_view('my_view', tbl.where(tbl.col1 > 100), if_exists='replace_force')
|
|
147
263
|
"""
|
|
148
264
|
where: Optional[exprs.Expr] = None
|
|
149
265
|
if isinstance(base, catalog.Table):
|
|
@@ -157,15 +273,19 @@ def create_view(
|
|
|
157
273
|
else:
|
|
158
274
|
raise excs.Error('`base` must be an instance of `Table` or `DataFrame`')
|
|
159
275
|
assert isinstance(base, catalog.Table) or isinstance(base, DataFrame)
|
|
276
|
+
|
|
160
277
|
path = catalog.Path(path_str)
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
278
|
+
cat = Catalog.get()
|
|
279
|
+
|
|
280
|
+
if cat.paths.get_object(path) is not None:
|
|
281
|
+
# The view already exists. Handle it as per user directive.
|
|
282
|
+
_if_exists = catalog.IfExistsParam.validated(if_exists, 'if_exists')
|
|
283
|
+
existing_path = _get_or_drop_existing_path(path_str, catalog.View, is_snapshot, _if_exists)
|
|
284
|
+
if existing_path is not None:
|
|
285
|
+
assert isinstance(existing_path, catalog.View)
|
|
286
|
+
return existing_path
|
|
287
|
+
|
|
288
|
+
dir = cat.paths[path.parent]
|
|
169
289
|
|
|
170
290
|
if additional_columns is None:
|
|
171
291
|
additional_columns = {}
|
|
@@ -179,7 +299,7 @@ def create_view(
|
|
|
179
299
|
is_snapshot=is_snapshot, iterator_cls=iterator_class, iterator_args=iterator_args,
|
|
180
300
|
num_retained_versions=num_retained_versions, comment=comment,
|
|
181
301
|
media_validation=catalog.MediaValidation.validated(media_validation, 'media_validation'))
|
|
182
|
-
|
|
302
|
+
cat.paths[path] = view
|
|
183
303
|
_logger.info(f'Created view `{path_str}`.')
|
|
184
304
|
FileCache.get().emit_eviction_warnings()
|
|
185
305
|
return view
|
|
@@ -194,7 +314,7 @@ def create_snapshot(
|
|
|
194
314
|
num_retained_versions: int = 10,
|
|
195
315
|
comment: str = '',
|
|
196
316
|
media_validation: Literal['on_read', 'on_write'] = 'on_write',
|
|
197
|
-
|
|
317
|
+
if_exists: Literal['error', 'ignore', 'replace', 'replace_force'] = 'error',
|
|
198
318
|
) -> Optional[catalog.Table]:
|
|
199
319
|
"""Create a snapshot of an existing table object (which itself can be a view or a snapshot or a base table).
|
|
200
320
|
|
|
@@ -209,21 +329,47 @@ def create_snapshot(
|
|
|
209
329
|
iterator: The iterator to use for this snapshot. If specified, then this snapshot will be a one-to-many view of
|
|
210
330
|
the base table.
|
|
211
331
|
num_retained_versions: Number of versions of the view to retain.
|
|
212
|
-
comment: Optional comment for the
|
|
213
|
-
|
|
332
|
+
comment: Optional comment for the snapshot.
|
|
333
|
+
media_validation: Media validation policy for the snapshot.
|
|
334
|
+
|
|
335
|
+
- `'on_read'`: validate media files at query time
|
|
336
|
+
- `'on_write'`: validate media files during insert/update operations
|
|
337
|
+
if_exists: Directive regarding how to handle if the path already exists.
|
|
338
|
+
Must be one of the following:
|
|
339
|
+
|
|
340
|
+
- `'error'`: raise an error
|
|
341
|
+
- `'ignore'`: do nothing and return the existing snapshot handle
|
|
342
|
+
- `'replace'`: if the existing snapshot has no dependents, drop and replace it with a new one
|
|
343
|
+
- `'replace_force'`: drop the existing snapshot and all its dependents, and create a new one
|
|
214
344
|
|
|
215
345
|
Returns:
|
|
216
|
-
A handle to the [`Table`][pixeltable.Table] representing the newly created snapshot.
|
|
217
|
-
|
|
346
|
+
A handle to the [`Table`][pixeltable.Table] representing the newly created snapshot.
|
|
347
|
+
Please note the schema or base of the existing snapshot may not match those provided in the call.
|
|
218
348
|
|
|
219
349
|
Raises:
|
|
220
|
-
Error: if
|
|
350
|
+
Error: if
|
|
351
|
+
|
|
352
|
+
- the path is invalid, or
|
|
353
|
+
- the path already exists and `if_exists='error'`, or
|
|
354
|
+
- the path already exists and is not a snapshot, or
|
|
355
|
+
- an error occurs while attempting to create the snapshot.
|
|
221
356
|
|
|
222
357
|
Examples:
|
|
223
|
-
Create a snapshot of `my_table`:
|
|
358
|
+
Create a snapshot `my_snapshot` of a table `my_table`:
|
|
224
359
|
|
|
225
360
|
>>> tbl = pxt.get_table('my_table')
|
|
226
361
|
... snapshot = pxt.create_snapshot('my_snapshot', tbl)
|
|
362
|
+
|
|
363
|
+
Create a snapshot `my_snapshot` of a view `my_view` with additional int column `col3`,
|
|
364
|
+
if `my_snapshot` does not already exist:
|
|
365
|
+
|
|
366
|
+
>>> view = pxt.get_table('my_view')
|
|
367
|
+
... snapshot = pxt.create_snapshot('my_snapshot', view, additional_columns={'col3': pxt.Int}, if_exists='ignore')
|
|
368
|
+
|
|
369
|
+
Create a snapshot `my_snapshot` on a table `my_table`, and replace any existing snapshot named `my_snapshot`:
|
|
370
|
+
|
|
371
|
+
>>> tbl = pxt.get_table('my_table')
|
|
372
|
+
... snapshot = pxt.create_snapshot('my_snapshot', tbl, if_exists='replace_force')
|
|
227
373
|
"""
|
|
228
374
|
return create_view(
|
|
229
375
|
path_str,
|
|
@@ -234,7 +380,7 @@ def create_snapshot(
|
|
|
234
380
|
num_retained_versions=num_retained_versions,
|
|
235
381
|
comment=comment,
|
|
236
382
|
media_validation=media_validation,
|
|
237
|
-
|
|
383
|
+
if_exists=if_exists,
|
|
238
384
|
)
|
|
239
385
|
|
|
240
386
|
|
|
@@ -299,16 +445,26 @@ def move(path: str, new_path: str) -> None:
|
|
|
299
445
|
obj._move(new_p.name, new_dir._id)
|
|
300
446
|
|
|
301
447
|
|
|
302
|
-
def drop_table(table: Union[str, catalog.Table], force: bool = False,
|
|
448
|
+
def drop_table(table: Union[str, catalog.Table], force: bool = False,
|
|
449
|
+
if_not_exists: Literal['error', 'ignore'] = 'error') -> None:
|
|
303
450
|
"""Drop a table, view, or snapshot.
|
|
304
451
|
|
|
305
452
|
Args:
|
|
306
453
|
table: Fully qualified name, or handle, of the table to be dropped.
|
|
307
454
|
force: If `True`, will also drop all views and sub-views of this table.
|
|
308
|
-
|
|
455
|
+
if_not_exists: Directive regarding how to handle if the path does not exist.
|
|
456
|
+
Must be one of the following:
|
|
457
|
+
|
|
458
|
+
- `'error'`: raise an error
|
|
459
|
+
- `'ignore'`: do nothing and return
|
|
309
460
|
|
|
310
461
|
Raises:
|
|
311
|
-
Error:
|
|
462
|
+
Error: if the qualified name
|
|
463
|
+
|
|
464
|
+
- is invalid, or
|
|
465
|
+
- does not exist and `if_not_exists='error'`, or
|
|
466
|
+
- does not designate a table object, or
|
|
467
|
+
- designates a table object but has dependents and `force=False`.
|
|
312
468
|
|
|
313
469
|
Examples:
|
|
314
470
|
Drop a table by its fully qualified name:
|
|
@@ -318,19 +474,25 @@ def drop_table(table: Union[str, catalog.Table], force: bool = False, ignore_err
|
|
|
318
474
|
>>> t = pxt.get_table('subdir.my_table')
|
|
319
475
|
... pxt.drop_table(t)
|
|
320
476
|
|
|
477
|
+
Drop a table if it exists, otherwise do nothing:
|
|
478
|
+
>>> pxt.drop_table('subdir.my_table', if_not_exists='ignore')
|
|
479
|
+
|
|
480
|
+
Drop a table and all its dependents:
|
|
481
|
+
>>> pxt.drop_table('subdir.my_table', force=True)
|
|
321
482
|
"""
|
|
322
483
|
cat = Catalog.get()
|
|
323
484
|
if isinstance(table, str):
|
|
324
485
|
tbl_path_obj = catalog.Path(table)
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
if
|
|
486
|
+
tbl = cat.paths.get_object(tbl_path_obj)
|
|
487
|
+
if tbl is None:
|
|
488
|
+
_if_not_exists = catalog.IfNotExistsParam.validated(if_not_exists, 'if_not_exists')
|
|
489
|
+
if _if_not_exists == catalog.IfNotExistsParam.IGNORE or force:
|
|
329
490
|
_logger.info(f'Skipped table `{table}` (does not exist).')
|
|
330
491
|
return
|
|
331
492
|
else:
|
|
332
|
-
raise
|
|
333
|
-
tbl
|
|
493
|
+
raise excs.Error(f'Table `{table}` does not exist.')
|
|
494
|
+
if not isinstance(tbl, catalog.Table):
|
|
495
|
+
raise excs.Error(f'{tbl} needs to be a {catalog.Table._display_name()} but is a {type(tbl)._display_name()}')
|
|
334
496
|
else:
|
|
335
497
|
tbl = table
|
|
336
498
|
tbl_path_obj = catalog.Path(tbl._path)
|
|
@@ -376,16 +538,30 @@ def list_tables(dir_path: str = '', recursive: bool = True) -> list[str]:
|
|
|
376
538
|
Catalog.get().paths.check_is_valid(path, expected=catalog.Dir)
|
|
377
539
|
return [str(p) for p in Catalog.get().paths.get_children(path, child_type=catalog.Table, recursive=recursive)]
|
|
378
540
|
|
|
379
|
-
|
|
380
|
-
def create_dir(path_str: str, ignore_errors: bool = False) -> Optional[catalog.Dir]:
|
|
541
|
+
def create_dir(path_str: str, if_exists: Literal['error', 'ignore', 'replace', 'replace_force'] = 'error') -> Optional[catalog.Dir]:
|
|
381
542
|
"""Create a directory.
|
|
382
543
|
|
|
383
544
|
Args:
|
|
384
545
|
path_str: Path to the directory.
|
|
385
|
-
|
|
546
|
+
if_exists: Directive regarding how to handle if the path already exists.
|
|
547
|
+
Must be one of the following:
|
|
548
|
+
|
|
549
|
+
- `'error'`: raise an error
|
|
550
|
+
- `'ignore'`: do nothing and return the existing directory handle
|
|
551
|
+
- `'replace'`: if the existing directory is empty, drop it and create a new one
|
|
552
|
+
- `'replace_force'`: drop the existing directory and all its children, and create a new one
|
|
553
|
+
|
|
554
|
+
Returns:
|
|
555
|
+
A handle to the newly created directory, or to an already existing directory at the path when `if_exists='ignore'`.
|
|
556
|
+
Please note the existing directory may not be empty.
|
|
386
557
|
|
|
387
558
|
Raises:
|
|
388
|
-
Error: If
|
|
559
|
+
Error: If
|
|
560
|
+
|
|
561
|
+
- the path is invalid, or
|
|
562
|
+
- the path already exists and `if_exists='error'`, or
|
|
563
|
+
- the path already exists and is not a directory, or
|
|
564
|
+
- an error occurs while attempting to create the directory.
|
|
389
565
|
|
|
390
566
|
Examples:
|
|
391
567
|
>>> pxt.create_dir('my_dir')
|
|
@@ -393,63 +569,93 @@ def create_dir(path_str: str, ignore_errors: bool = False) -> Optional[catalog.D
|
|
|
393
569
|
Create a subdirectory:
|
|
394
570
|
|
|
395
571
|
>>> pxt.create_dir('my_dir.sub_dir')
|
|
396
|
-
"""
|
|
397
|
-
try:
|
|
398
|
-
path = catalog.Path(path_str)
|
|
399
|
-
Catalog.get().paths.check_is_valid(path, expected=None)
|
|
400
|
-
parent = Catalog.get().paths[path.parent]
|
|
401
|
-
assert parent is not None
|
|
402
|
-
with orm.Session(Env.get().engine, future=True) as session:
|
|
403
|
-
dir_md = schema.DirMd(name=path.name)
|
|
404
|
-
dir_record = schema.Dir(parent_id=parent._id, md=dataclasses.asdict(dir_md))
|
|
405
|
-
session.add(dir_record)
|
|
406
|
-
session.flush()
|
|
407
|
-
assert dir_record.id is not None
|
|
408
|
-
assert isinstance(dir_record.id, UUID)
|
|
409
|
-
dir = catalog.Dir(dir_record.id, parent._id, path.name)
|
|
410
|
-
Catalog.get().paths[path] = dir
|
|
411
|
-
session.commit()
|
|
412
|
-
_logger.info(f'Created directory `{path_str}`.')
|
|
413
|
-
print(f'Created directory `{path_str}`.')
|
|
414
|
-
return dir
|
|
415
|
-
except excs.Error as e:
|
|
416
|
-
if ignore_errors:
|
|
417
|
-
return None
|
|
418
|
-
else:
|
|
419
|
-
raise e
|
|
420
572
|
|
|
573
|
+
Create a subdirectory only if it does not already exist, otherwise do nothing:
|
|
574
|
+
|
|
575
|
+
>>> pxt.create_dir('my_dir.sub_dir', if_exists='ignore')
|
|
576
|
+
|
|
577
|
+
Create a directory and replace if it already exists:
|
|
421
578
|
|
|
422
|
-
|
|
579
|
+
>>> pxt.create_dir('my_dir', if_exists='replace_force')
|
|
580
|
+
"""
|
|
581
|
+
path = catalog.Path(path_str)
|
|
582
|
+
cat = Catalog.get()
|
|
583
|
+
|
|
584
|
+
if cat.paths.get_object(path):
|
|
585
|
+
# The directory already exists. Handle it as per user directive.
|
|
586
|
+
_if_exists = catalog.IfExistsParam.validated(if_exists, 'if_exists')
|
|
587
|
+
existing_path = _get_or_drop_existing_path(path_str, catalog.Dir, False, _if_exists)
|
|
588
|
+
if existing_path is not None:
|
|
589
|
+
assert isinstance(existing_path, catalog.Dir)
|
|
590
|
+
return existing_path
|
|
591
|
+
|
|
592
|
+
parent = cat.paths[path.parent]
|
|
593
|
+
assert parent is not None
|
|
594
|
+
with orm.Session(Env.get().engine, future=True) as session:
|
|
595
|
+
dir_md = schema.DirMd(name=path.name)
|
|
596
|
+
dir_record = schema.Dir(parent_id=parent._id, md=dataclasses.asdict(dir_md))
|
|
597
|
+
session.add(dir_record)
|
|
598
|
+
session.flush()
|
|
599
|
+
assert dir_record.id is not None
|
|
600
|
+
assert isinstance(dir_record.id, UUID)
|
|
601
|
+
dir = catalog.Dir(dir_record.id, parent._id, path.name)
|
|
602
|
+
cat.paths[path] = dir
|
|
603
|
+
session.commit()
|
|
604
|
+
_logger.info(f'Created directory `{path_str}`.')
|
|
605
|
+
print(f'Created directory `{path_str}`.')
|
|
606
|
+
return dir
|
|
607
|
+
|
|
608
|
+
def drop_dir(path_str: str, force: bool = False, if_not_exists: Literal['error', 'ignore'] = 'error') -> None:
|
|
423
609
|
"""Remove a directory.
|
|
424
610
|
|
|
425
611
|
Args:
|
|
426
612
|
path_str: Name or path of the directory.
|
|
427
613
|
force: If `True`, will also drop all tables and subdirectories of this directory, recursively, along
|
|
428
614
|
with any views or snapshots that depend on any of the dropped tables.
|
|
429
|
-
|
|
430
|
-
|
|
615
|
+
if_not_exists: Directive regarding how to handle if the path does not exist.
|
|
616
|
+
Must be one of the following:
|
|
617
|
+
|
|
618
|
+
- `'error'`: raise an error
|
|
619
|
+
- `'ignore'`: do nothing and return
|
|
431
620
|
|
|
432
621
|
Raises:
|
|
433
|
-
Error: If the path
|
|
622
|
+
Error: If the path
|
|
623
|
+
|
|
624
|
+
- is invalid, or
|
|
625
|
+
- does not exist and `if_not_exists='error'`, or
|
|
626
|
+
- is not designate a directory, or
|
|
627
|
+
- is a direcotory but is not empty and `force=False`.
|
|
434
628
|
|
|
435
629
|
Examples:
|
|
630
|
+
Remove a directory, if it exists and is empty:
|
|
436
631
|
>>> pxt.drop_dir('my_dir')
|
|
437
632
|
|
|
438
633
|
Remove a subdirectory:
|
|
439
634
|
|
|
440
635
|
>>> pxt.drop_dir('my_dir.sub_dir')
|
|
636
|
+
|
|
637
|
+
Remove an existing directory if it is empty, but do nothing if it does not exist:
|
|
638
|
+
|
|
639
|
+
>>> pxt.drop_dir('my_dir.sub_dir', if_not_exists='ignore')
|
|
640
|
+
|
|
641
|
+
Remove an existing directory and all its contents:
|
|
642
|
+
|
|
643
|
+
>>> pxt.drop_dir('my_dir', force=True)
|
|
441
644
|
"""
|
|
442
645
|
cat = Catalog.get()
|
|
443
646
|
path = catalog.Path(path_str)
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
_logger.info(f'Skipped directory `{path}` (does not exist).')
|
|
647
|
+
obj = cat.paths.get_object(path)
|
|
648
|
+
if obj is None:
|
|
649
|
+
_if_not_exists = catalog.IfNotExistsParam.validated(if_not_exists, 'if_not_exists')
|
|
650
|
+
if _if_not_exists == catalog.IfNotExistsParam.IGNORE or force:
|
|
651
|
+
_logger.info(f'Skipped directory `{path_str}` (does not exist).')
|
|
450
652
|
return
|
|
451
653
|
else:
|
|
452
|
-
raise
|
|
654
|
+
raise excs.Error(f'Directory `{path_str}` does not exist.')
|
|
655
|
+
|
|
656
|
+
if not isinstance(obj, catalog.Dir):
|
|
657
|
+
raise excs.Error(
|
|
658
|
+
f'{str(path)} needs to be a {catalog.Dir._display_name()} but is a {type(obj)._display_name()}')
|
|
453
659
|
|
|
454
660
|
children = cat.paths.get_children(path, child_type=None, recursive=True)
|
|
455
661
|
|
|
@@ -510,7 +716,7 @@ def list_functions() -> Styler:
|
|
|
510
716
|
paths = ['.'.join(f.self_path.split('.')[:-1]) for f in functions]
|
|
511
717
|
names = [f.name for f in functions]
|
|
512
718
|
params = [
|
|
513
|
-
', '.join([param_name + ': ' + str(param_type) for param_name, param_type in f.
|
|
719
|
+
', '.join([param_name + ': ' + str(param_type) for param_name, param_type in f.signatures[0].parameters.items()])
|
|
514
720
|
for f in functions
|
|
515
721
|
]
|
|
516
722
|
pd_df = pd.DataFrame(
|
|
@@ -518,7 +724,7 @@ def list_functions() -> Styler:
|
|
|
518
724
|
'Path': paths,
|
|
519
725
|
'Function Name': names,
|
|
520
726
|
'Parameters': params,
|
|
521
|
-
'Return Type': [str(f.
|
|
727
|
+
'Return Type': [str(f.signatures[0].get_return_type()) for f in functions],
|
|
522
728
|
}
|
|
523
729
|
)
|
|
524
730
|
pd_df = pd_df.style.set_properties(None, **{'text-align': 'left'}).set_table_styles(
|
|
@@ -527,6 +733,68 @@ def list_functions() -> Styler:
|
|
|
527
733
|
return pd_df.hide(axis='index')
|
|
528
734
|
|
|
529
735
|
|
|
736
|
+
def tools(*args: Union[func.Function, func.tools.Tool]) -> func.tools.Tools:
|
|
737
|
+
"""
|
|
738
|
+
Specifies a collection of UDFs to be used as LLM tools. Pixeltable allows any UDF to be used as an input into an
|
|
739
|
+
LLM tool-calling API. To use one or more UDFs as tools, wrap them in a `pxt.tools` call and pass the return value
|
|
740
|
+
to an LLM API.
|
|
741
|
+
|
|
742
|
+
The UDFs can be specified directly or wrapped inside a [pxt.tool()][pixeltable.tool] invocation. If a UDF is
|
|
743
|
+
specified directly, the tool name will be the (unqualified) UDF name, and the tool description will consist of the
|
|
744
|
+
entire contents of the UDF docstring. If a UDF is wrapped in a `pxt.tool()` invocation, then the name and/or
|
|
745
|
+
description may be customized.
|
|
746
|
+
|
|
747
|
+
Args:
|
|
748
|
+
args: The UDFs to use as tools.
|
|
749
|
+
|
|
750
|
+
Returns:
|
|
751
|
+
A `Tools` instance that can be passed to an LLM tool-calling API or invoked to generate tool results.
|
|
752
|
+
|
|
753
|
+
Examples:
|
|
754
|
+
Create a tools instance with a single UDF:
|
|
755
|
+
|
|
756
|
+
>>> tools = pxt.tools(stock_price)
|
|
757
|
+
|
|
758
|
+
Create a tools instance with several UDFs:
|
|
759
|
+
|
|
760
|
+
>>> tools = pxt.tools(stock_price, weather_quote)
|
|
761
|
+
|
|
762
|
+
Create a tools instance, some of whose UDFs have customized metadata:
|
|
763
|
+
|
|
764
|
+
>>> tools = pxt.tools(
|
|
765
|
+
... stock_price,
|
|
766
|
+
... pxt.tool(weather_quote, description='Returns information about the weather in a particular location.'),
|
|
767
|
+
... pxt.tool(traffic_quote, name='traffic_conditions'),
|
|
768
|
+
... )
|
|
769
|
+
"""
|
|
770
|
+
return func.tools.Tools(tools=[
|
|
771
|
+
arg if isinstance(arg, func.tools.Tool) else tool(arg)
|
|
772
|
+
for arg in args
|
|
773
|
+
])
|
|
774
|
+
|
|
775
|
+
|
|
776
|
+
def tool(fn: func.Function, name: Optional[str] = None, description: Optional[str] = None) -> func.tools.Tool:
|
|
777
|
+
"""
|
|
778
|
+
Specifies a Pixeltable UDF to be used as an LLM tool with customizable metadata. See the documentation for
|
|
779
|
+
[pxt.tools()][pixeltable.tools] for more details.
|
|
780
|
+
|
|
781
|
+
Args:
|
|
782
|
+
fn: The UDF to use as a tool.
|
|
783
|
+
name: The name of the tool. If not specified, then the unqualified name of the UDF will be used by default.
|
|
784
|
+
description: The description of the tool. If not specified, then the entire contents of the UDF docstring
|
|
785
|
+
will be used by default.
|
|
786
|
+
|
|
787
|
+
Returns:
|
|
788
|
+
A `Tool` instance that can be passed to an LLM tool-calling API.
|
|
789
|
+
"""
|
|
790
|
+
if fn.self_path is None:
|
|
791
|
+
raise excs.Error('Only module UDFs can be used as tools (not locally defined UDFs)')
|
|
792
|
+
if isinstance(fn, func.AggregateFunction):
|
|
793
|
+
raise excs.Error('Aggregator UDFs cannot be used as tools')
|
|
794
|
+
|
|
795
|
+
return func.tools.Tool(fn=fn, name=name, description=description)
|
|
796
|
+
|
|
797
|
+
|
|
530
798
|
def configure_logging(
|
|
531
799
|
*,
|
|
532
800
|
to_stdout: Optional[bool] = None,
|