pixeltable 0.2.26__py3-none-any.whl → 0.5.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pixeltable/__init__.py +83 -19
- pixeltable/_query.py +1444 -0
- pixeltable/_version.py +1 -0
- pixeltable/catalog/__init__.py +7 -4
- pixeltable/catalog/catalog.py +2394 -119
- pixeltable/catalog/column.py +225 -104
- pixeltable/catalog/dir.py +38 -9
- pixeltable/catalog/globals.py +53 -34
- pixeltable/catalog/insertable_table.py +265 -115
- pixeltable/catalog/path.py +80 -17
- pixeltable/catalog/schema_object.py +28 -43
- pixeltable/catalog/table.py +1270 -677
- pixeltable/catalog/table_metadata.py +103 -0
- pixeltable/catalog/table_version.py +1270 -751
- pixeltable/catalog/table_version_handle.py +109 -0
- pixeltable/catalog/table_version_path.py +137 -42
- pixeltable/catalog/tbl_ops.py +53 -0
- pixeltable/catalog/update_status.py +191 -0
- pixeltable/catalog/view.py +251 -134
- pixeltable/config.py +215 -0
- pixeltable/env.py +736 -285
- pixeltable/exceptions.py +26 -2
- pixeltable/exec/__init__.py +7 -2
- pixeltable/exec/aggregation_node.py +39 -21
- pixeltable/exec/cache_prefetch_node.py +87 -109
- pixeltable/exec/cell_materialization_node.py +268 -0
- pixeltable/exec/cell_reconstruction_node.py +168 -0
- pixeltable/exec/component_iteration_node.py +25 -28
- pixeltable/exec/data_row_batch.py +11 -46
- pixeltable/exec/exec_context.py +26 -11
- pixeltable/exec/exec_node.py +35 -27
- pixeltable/exec/expr_eval/__init__.py +3 -0
- pixeltable/exec/expr_eval/evaluators.py +365 -0
- pixeltable/exec/expr_eval/expr_eval_node.py +413 -0
- pixeltable/exec/expr_eval/globals.py +200 -0
- pixeltable/exec/expr_eval/row_buffer.py +74 -0
- pixeltable/exec/expr_eval/schedulers.py +413 -0
- pixeltable/exec/globals.py +35 -0
- pixeltable/exec/in_memory_data_node.py +35 -27
- pixeltable/exec/object_store_save_node.py +293 -0
- pixeltable/exec/row_update_node.py +44 -29
- pixeltable/exec/sql_node.py +414 -115
- pixeltable/exprs/__init__.py +8 -5
- pixeltable/exprs/arithmetic_expr.py +79 -45
- pixeltable/exprs/array_slice.py +5 -5
- pixeltable/exprs/column_property_ref.py +40 -26
- pixeltable/exprs/column_ref.py +254 -61
- pixeltable/exprs/comparison.py +14 -9
- pixeltable/exprs/compound_predicate.py +9 -10
- pixeltable/exprs/data_row.py +213 -72
- pixeltable/exprs/expr.py +270 -104
- pixeltable/exprs/expr_dict.py +6 -5
- pixeltable/exprs/expr_set.py +20 -11
- pixeltable/exprs/function_call.py +383 -284
- pixeltable/exprs/globals.py +18 -5
- pixeltable/exprs/in_predicate.py +7 -7
- pixeltable/exprs/inline_expr.py +37 -37
- pixeltable/exprs/is_null.py +8 -4
- pixeltable/exprs/json_mapper.py +120 -54
- pixeltable/exprs/json_path.py +90 -60
- pixeltable/exprs/literal.py +61 -16
- pixeltable/exprs/method_ref.py +7 -6
- pixeltable/exprs/object_ref.py +19 -8
- pixeltable/exprs/row_builder.py +238 -75
- pixeltable/exprs/rowid_ref.py +53 -15
- pixeltable/exprs/similarity_expr.py +65 -50
- pixeltable/exprs/sql_element_cache.py +5 -5
- pixeltable/exprs/string_op.py +107 -0
- pixeltable/exprs/type_cast.py +25 -13
- pixeltable/exprs/variable.py +2 -2
- pixeltable/func/__init__.py +9 -5
- pixeltable/func/aggregate_function.py +197 -92
- pixeltable/func/callable_function.py +119 -35
- pixeltable/func/expr_template_function.py +101 -48
- pixeltable/func/function.py +375 -62
- pixeltable/func/function_registry.py +20 -19
- pixeltable/func/globals.py +6 -5
- pixeltable/func/mcp.py +74 -0
- pixeltable/func/query_template_function.py +151 -35
- pixeltable/func/signature.py +178 -49
- pixeltable/func/tools.py +164 -0
- pixeltable/func/udf.py +176 -53
- pixeltable/functions/__init__.py +44 -4
- pixeltable/functions/anthropic.py +226 -47
- pixeltable/functions/audio.py +148 -11
- pixeltable/functions/bedrock.py +137 -0
- pixeltable/functions/date.py +188 -0
- pixeltable/functions/deepseek.py +113 -0
- pixeltable/functions/document.py +81 -0
- pixeltable/functions/fal.py +76 -0
- pixeltable/functions/fireworks.py +72 -20
- pixeltable/functions/gemini.py +249 -0
- pixeltable/functions/globals.py +208 -53
- pixeltable/functions/groq.py +108 -0
- pixeltable/functions/huggingface.py +1088 -95
- pixeltable/functions/image.py +155 -84
- pixeltable/functions/json.py +8 -11
- pixeltable/functions/llama_cpp.py +31 -19
- pixeltable/functions/math.py +169 -0
- pixeltable/functions/mistralai.py +50 -75
- pixeltable/functions/net.py +70 -0
- pixeltable/functions/ollama.py +29 -36
- pixeltable/functions/openai.py +548 -160
- pixeltable/functions/openrouter.py +143 -0
- pixeltable/functions/replicate.py +15 -14
- pixeltable/functions/reve.py +250 -0
- pixeltable/functions/string.py +310 -85
- pixeltable/functions/timestamp.py +37 -19
- pixeltable/functions/together.py +77 -120
- pixeltable/functions/twelvelabs.py +188 -0
- pixeltable/functions/util.py +7 -2
- pixeltable/functions/uuid.py +30 -0
- pixeltable/functions/video.py +1528 -117
- pixeltable/functions/vision.py +26 -26
- pixeltable/functions/voyageai.py +289 -0
- pixeltable/functions/whisper.py +19 -10
- pixeltable/functions/whisperx.py +179 -0
- pixeltable/functions/yolox.py +112 -0
- pixeltable/globals.py +716 -236
- pixeltable/index/__init__.py +3 -1
- pixeltable/index/base.py +17 -21
- pixeltable/index/btree.py +32 -22
- pixeltable/index/embedding_index.py +155 -92
- pixeltable/io/__init__.py +12 -7
- pixeltable/io/datarows.py +140 -0
- pixeltable/io/external_store.py +83 -125
- pixeltable/io/fiftyone.py +24 -33
- pixeltable/io/globals.py +47 -182
- pixeltable/io/hf_datasets.py +96 -127
- pixeltable/io/label_studio.py +171 -156
- pixeltable/io/lancedb.py +3 -0
- pixeltable/io/pandas.py +136 -115
- pixeltable/io/parquet.py +40 -153
- pixeltable/io/table_data_conduit.py +702 -0
- pixeltable/io/utils.py +100 -0
- pixeltable/iterators/__init__.py +8 -4
- pixeltable/iterators/audio.py +207 -0
- pixeltable/iterators/base.py +9 -3
- pixeltable/iterators/document.py +144 -87
- pixeltable/iterators/image.py +17 -38
- pixeltable/iterators/string.py +15 -12
- pixeltable/iterators/video.py +523 -127
- pixeltable/metadata/__init__.py +33 -8
- pixeltable/metadata/converters/convert_10.py +2 -3
- pixeltable/metadata/converters/convert_13.py +2 -2
- pixeltable/metadata/converters/convert_15.py +15 -11
- pixeltable/metadata/converters/convert_16.py +4 -5
- pixeltable/metadata/converters/convert_17.py +4 -5
- pixeltable/metadata/converters/convert_18.py +4 -6
- pixeltable/metadata/converters/convert_19.py +6 -9
- pixeltable/metadata/converters/convert_20.py +3 -6
- pixeltable/metadata/converters/convert_21.py +6 -8
- pixeltable/metadata/converters/convert_22.py +3 -2
- pixeltable/metadata/converters/convert_23.py +33 -0
- pixeltable/metadata/converters/convert_24.py +55 -0
- pixeltable/metadata/converters/convert_25.py +19 -0
- pixeltable/metadata/converters/convert_26.py +23 -0
- pixeltable/metadata/converters/convert_27.py +29 -0
- pixeltable/metadata/converters/convert_28.py +13 -0
- pixeltable/metadata/converters/convert_29.py +110 -0
- pixeltable/metadata/converters/convert_30.py +63 -0
- pixeltable/metadata/converters/convert_31.py +11 -0
- pixeltable/metadata/converters/convert_32.py +15 -0
- pixeltable/metadata/converters/convert_33.py +17 -0
- pixeltable/metadata/converters/convert_34.py +21 -0
- pixeltable/metadata/converters/convert_35.py +9 -0
- pixeltable/metadata/converters/convert_36.py +38 -0
- pixeltable/metadata/converters/convert_37.py +15 -0
- pixeltable/metadata/converters/convert_38.py +39 -0
- pixeltable/metadata/converters/convert_39.py +124 -0
- pixeltable/metadata/converters/convert_40.py +73 -0
- pixeltable/metadata/converters/convert_41.py +12 -0
- pixeltable/metadata/converters/convert_42.py +9 -0
- pixeltable/metadata/converters/convert_43.py +44 -0
- pixeltable/metadata/converters/util.py +44 -18
- pixeltable/metadata/notes.py +21 -0
- pixeltable/metadata/schema.py +185 -42
- pixeltable/metadata/utils.py +74 -0
- pixeltable/mypy/__init__.py +3 -0
- pixeltable/mypy/mypy_plugin.py +123 -0
- pixeltable/plan.py +616 -225
- pixeltable/share/__init__.py +3 -0
- pixeltable/share/packager.py +797 -0
- pixeltable/share/protocol/__init__.py +33 -0
- pixeltable/share/protocol/common.py +165 -0
- pixeltable/share/protocol/operation_types.py +33 -0
- pixeltable/share/protocol/replica.py +119 -0
- pixeltable/share/publish.py +349 -0
- pixeltable/store.py +398 -232
- pixeltable/type_system.py +730 -267
- pixeltable/utils/__init__.py +40 -0
- pixeltable/utils/arrow.py +201 -29
- pixeltable/utils/av.py +298 -0
- pixeltable/utils/azure_store.py +346 -0
- pixeltable/utils/coco.py +26 -27
- pixeltable/utils/code.py +4 -4
- pixeltable/utils/console_output.py +46 -0
- pixeltable/utils/coroutine.py +24 -0
- pixeltable/utils/dbms.py +92 -0
- pixeltable/utils/description_helper.py +11 -12
- pixeltable/utils/documents.py +60 -61
- pixeltable/utils/exception_handler.py +36 -0
- pixeltable/utils/filecache.py +38 -22
- pixeltable/utils/formatter.py +88 -51
- pixeltable/utils/gcs_store.py +295 -0
- pixeltable/utils/http.py +133 -0
- pixeltable/utils/http_server.py +14 -13
- pixeltable/utils/iceberg.py +13 -0
- pixeltable/utils/image.py +17 -0
- pixeltable/utils/lancedb.py +90 -0
- pixeltable/utils/local_store.py +322 -0
- pixeltable/utils/misc.py +5 -0
- pixeltable/utils/object_stores.py +573 -0
- pixeltable/utils/pydantic.py +60 -0
- pixeltable/utils/pytorch.py +20 -20
- pixeltable/utils/s3_store.py +527 -0
- pixeltable/utils/sql.py +32 -5
- pixeltable/utils/system.py +30 -0
- pixeltable/utils/transactional_directory.py +4 -3
- pixeltable-0.5.7.dist-info/METADATA +579 -0
- pixeltable-0.5.7.dist-info/RECORD +227 -0
- {pixeltable-0.2.26.dist-info → pixeltable-0.5.7.dist-info}/WHEEL +1 -1
- pixeltable-0.5.7.dist-info/entry_points.txt +2 -0
- pixeltable/__version__.py +0 -3
- pixeltable/catalog/named_function.py +0 -36
- pixeltable/catalog/path_dict.py +0 -141
- pixeltable/dataframe.py +0 -894
- pixeltable/exec/expr_eval_node.py +0 -232
- pixeltable/ext/__init__.py +0 -14
- pixeltable/ext/functions/__init__.py +0 -8
- pixeltable/ext/functions/whisperx.py +0 -77
- pixeltable/ext/functions/yolox.py +0 -157
- pixeltable/tool/create_test_db_dump.py +0 -311
- pixeltable/tool/create_test_video.py +0 -81
- pixeltable/tool/doc_plugins/griffe.py +0 -50
- pixeltable/tool/doc_plugins/mkdocstrings.py +0 -6
- pixeltable/tool/doc_plugins/templates/material/udf.html.jinja +0 -135
- pixeltable/tool/embed_udf.py +0 -9
- pixeltable/tool/mypy_plugin.py +0 -55
- pixeltable/utils/media_store.py +0 -76
- pixeltable/utils/s3.py +0 -16
- pixeltable-0.2.26.dist-info/METADATA +0 -400
- pixeltable-0.2.26.dist-info/RECORD +0 -156
- pixeltable-0.2.26.dist-info/entry_points.txt +0 -3
- {pixeltable-0.2.26.dist-info → pixeltable-0.5.7.dist-info/licenses}/LICENSE +0 -0
pixeltable/globals.py
CHANGED
|
@@ -1,46 +1,94 @@
|
|
|
1
|
-
import
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
2
3
|
import logging
|
|
3
|
-
|
|
4
|
-
from
|
|
4
|
+
import os
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import TYPE_CHECKING, Any, Iterable, Literal, TypedDict, Union
|
|
5
7
|
|
|
6
8
|
import pandas as pd
|
|
7
|
-
import
|
|
9
|
+
import pydantic
|
|
8
10
|
from pandas.io.formats.style import Styler
|
|
9
|
-
from sqlalchemy.util.preloaded import orm
|
|
10
11
|
|
|
11
|
-
import
|
|
12
|
-
|
|
13
|
-
from pixeltable import
|
|
14
|
-
from pixeltable.
|
|
15
|
-
from pixeltable.dataframe import DataFrameResultSet
|
|
12
|
+
from pixeltable import Query, catalog, exceptions as excs, exprs, func, share, type_system as ts
|
|
13
|
+
from pixeltable.catalog import Catalog, TableVersionPath
|
|
14
|
+
from pixeltable.catalog.insertable_table import OnErrorParameter
|
|
15
|
+
from pixeltable.config import Config
|
|
16
16
|
from pixeltable.env import Env
|
|
17
|
+
from pixeltable.io.table_data_conduit import QueryTableDataConduit, TableDataConduit
|
|
17
18
|
from pixeltable.iterators import ComponentIterator
|
|
18
|
-
|
|
19
|
-
|
|
19
|
+
|
|
20
|
+
if TYPE_CHECKING:
|
|
21
|
+
import datasets # type: ignore[import-untyped]
|
|
22
|
+
|
|
23
|
+
RowData = list[dict[str, Any]]
|
|
24
|
+
TableDataSource = Union[
|
|
25
|
+
str,
|
|
26
|
+
os.PathLike,
|
|
27
|
+
Path, # OS paths, filenames, URLs
|
|
28
|
+
Iterable[dict[str, Any]], # dictionaries of values
|
|
29
|
+
Iterable[pydantic.BaseModel], # Pydantic model instances
|
|
30
|
+
catalog.Table, # Pixeltable Table
|
|
31
|
+
Query, # Pixeltable Query
|
|
32
|
+
pd.DataFrame, # pandas DataFrame
|
|
33
|
+
datasets.Dataset,
|
|
34
|
+
datasets.DatasetDict, # Huggingface datasets
|
|
35
|
+
]
|
|
36
|
+
|
|
20
37
|
|
|
21
38
|
_logger = logging.getLogger('pixeltable')
|
|
22
39
|
|
|
23
40
|
|
|
24
|
-
def init() -> None:
|
|
41
|
+
def init(config_overrides: dict[str, Any] | None = None) -> None:
|
|
25
42
|
"""Initializes the Pixeltable environment."""
|
|
43
|
+
if config_overrides is None:
|
|
44
|
+
config_overrides = {}
|
|
45
|
+
Config.init(config_overrides)
|
|
26
46
|
_ = Catalog.get()
|
|
27
47
|
|
|
28
48
|
|
|
29
49
|
def create_table(
|
|
30
|
-
|
|
31
|
-
|
|
50
|
+
path: str,
|
|
51
|
+
schema: dict[str, Any] | None = None,
|
|
32
52
|
*,
|
|
33
|
-
|
|
53
|
+
source: TableDataSource | None = None,
|
|
54
|
+
source_format: Literal['csv', 'excel', 'parquet', 'json'] | None = None,
|
|
55
|
+
schema_overrides: dict[str, Any] | None = None,
|
|
56
|
+
create_default_idxs: bool = True,
|
|
57
|
+
on_error: Literal['abort', 'ignore'] = 'abort',
|
|
58
|
+
primary_key: str | list[str] | None = None,
|
|
34
59
|
num_retained_versions: int = 10,
|
|
35
60
|
comment: str = '',
|
|
36
|
-
media_validation: Literal['on_read', 'on_write'] = 'on_write'
|
|
61
|
+
media_validation: Literal['on_read', 'on_write'] = 'on_write',
|
|
62
|
+
if_exists: Literal['error', 'ignore', 'replace', 'replace_force'] = 'error',
|
|
63
|
+
extra_args: dict[str, Any] | None = None, # Additional arguments to data source provider
|
|
37
64
|
) -> catalog.Table:
|
|
38
|
-
"""Create a new base table.
|
|
65
|
+
"""Create a new base table. Exactly one of `schema` or `source` must be provided.
|
|
66
|
+
|
|
67
|
+
If a `schema` is provided, then an empty table will be created with the specified schema.
|
|
68
|
+
|
|
69
|
+
If a `source` is provided, then Pixeltable will attempt to infer a data source format and table schema from the
|
|
70
|
+
contents of the specified data, and the data will be imported from the specified source into the new table. The
|
|
71
|
+
source format and/or schema can be specified directly via the `source_format` and `schema_overrides` parameters.
|
|
39
72
|
|
|
40
73
|
Args:
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
74
|
+
path: Pixeltable path (qualified name) of the table, such as `'my_table'` or `'my_dir.my_subdir.my_table'`.
|
|
75
|
+
schema: Schema for the new table, mapping column names to Pixeltable types.
|
|
76
|
+
source: A data source (file, URL, Table, Query, or list of rows) to import from.
|
|
77
|
+
source_format: Must be used in conjunction with a `source`.
|
|
78
|
+
If specified, then the given format will be used to read the source data. (Otherwise,
|
|
79
|
+
Pixeltable will attempt to infer the format from the source data.)
|
|
80
|
+
schema_overrides: Must be used in conjunction with a `source`.
|
|
81
|
+
If specified, then columns in `schema_overrides` will be given the specified types.
|
|
82
|
+
(Pixeltable will attempt to infer the types of any columns not specified.)
|
|
83
|
+
create_default_idxs: If True, creates a B-tree index on every scalar and media column that is not computed,
|
|
84
|
+
except for boolean columns.
|
|
85
|
+
on_error: Determines the behavior if an error occurs while evaluating a computed column or detecting an
|
|
86
|
+
invalid media file (such as a corrupt image) for one of the inserted rows.
|
|
87
|
+
|
|
88
|
+
- If `on_error='abort'`, then an exception will be raised and the rows will not be inserted.
|
|
89
|
+
- If `on_error='ignore'`, then execution will continue and the rows will be inserted. Any cells
|
|
90
|
+
with errors will have a `None` value for that cell, with information about the error stored in the
|
|
91
|
+
corresponding `tbl.col_name.errortype` and `tbl.col_name.errormsg` fields.
|
|
44
92
|
primary_key: An optional column name or list of column names to use as the primary key(s) of the
|
|
45
93
|
table.
|
|
46
94
|
num_retained_versions: Number of versions of the table to retain.
|
|
@@ -49,12 +97,28 @@ def create_table(
|
|
|
49
97
|
|
|
50
98
|
- `'on_read'`: validate media files at query time
|
|
51
99
|
- `'on_write'`: validate media files during insert/update operations
|
|
100
|
+
if_exists: Determines the behavior if a table already exists at the specified path location.
|
|
101
|
+
|
|
102
|
+
- `'error'`: raise an error
|
|
103
|
+
- `'ignore'`: do nothing and return the existing table handle
|
|
104
|
+
- `'replace'`: if the existing table has no views or snapshots, drop and replace it with a new one;
|
|
105
|
+
raise an error if the existing table has views or snapshots
|
|
106
|
+
- `'replace_force'`: drop the existing table and all its views and snapshots, and create a new one
|
|
107
|
+
extra_args: Must be used in conjunction with a `source`. If specified, then additional arguments will be
|
|
108
|
+
passed along to the source data provider.
|
|
52
109
|
|
|
53
110
|
Returns:
|
|
54
|
-
A handle to the newly created
|
|
111
|
+
A handle to the newly created table, or to an already existing table at the path when `if_exists='ignore'`.
|
|
112
|
+
Please note the schema of the existing table may not match the schema provided in the call.
|
|
55
113
|
|
|
56
114
|
Raises:
|
|
57
|
-
Error: if
|
|
115
|
+
Error: if
|
|
116
|
+
|
|
117
|
+
- the path is invalid, or
|
|
118
|
+
- the path already exists and `if_exists='error'`, or
|
|
119
|
+
- the path already exists and is not a table, or
|
|
120
|
+
- an error occurs while attempting to create the table, or
|
|
121
|
+
- an error occurs while attempting to import data from the source.
|
|
58
122
|
|
|
59
123
|
Examples:
|
|
60
124
|
Create a table with an int and a string column:
|
|
@@ -66,164 +130,293 @@ def create_table(
|
|
|
66
130
|
|
|
67
131
|
>>> tbl1 = pxt.get_table('orig_table')
|
|
68
132
|
... tbl2 = pxt.create_table('new_table', tbl1.where(tbl1.col1 < 10).select(tbl1.col2))
|
|
69
|
-
"""
|
|
70
|
-
path = catalog.Path(path_str)
|
|
71
|
-
Catalog.get().paths.check_is_valid(path, expected=None)
|
|
72
|
-
dir = Catalog.get().paths[path.parent]
|
|
73
|
-
|
|
74
|
-
df: Optional[DataFrame] = None
|
|
75
|
-
if isinstance(schema_or_df, dict):
|
|
76
|
-
schema = schema_or_df
|
|
77
|
-
elif isinstance(schema_or_df, DataFrame):
|
|
78
|
-
df = schema_or_df
|
|
79
|
-
schema = df.schema
|
|
80
|
-
elif isinstance(schema_or_df, DataFrameResultSet):
|
|
81
|
-
raise excs.Error('`schema_or_df` must be either a schema dictionary or a Pixeltable DataFrame. (Is there an extraneous call to `collect()`?)')
|
|
82
|
-
else:
|
|
83
|
-
raise excs.Error('`schema_or_df` must be either a schema dictionary or a Pixeltable DataFrame.')
|
|
84
133
|
|
|
85
|
-
|
|
86
|
-
|
|
134
|
+
Create a table if it does not already exist, otherwise get the existing table:
|
|
135
|
+
|
|
136
|
+
>>> tbl = pxt.create_table('my_table', schema={'col1': pxt.Int, 'col2': pxt.String}, if_exists='ignore')
|
|
137
|
+
|
|
138
|
+
Create a table with an int and a float column, and replace any existing table:
|
|
139
|
+
|
|
140
|
+
>>> tbl = pxt.create_table('my_table', schema={'col1': pxt.Int, 'col2': pxt.Float}, if_exists='replace')
|
|
141
|
+
|
|
142
|
+
Create a table from a CSV file:
|
|
143
|
+
|
|
144
|
+
>>> tbl = pxt.create_table('my_table', source='data.csv')
|
|
87
145
|
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
146
|
+
Create a table with an auto-generated UUID primary key:
|
|
147
|
+
|
|
148
|
+
>>> tbl = pxt.create_table(
|
|
149
|
+
... 'my_table',
|
|
150
|
+
... schema={'id': pxt.functions.uuid.uuid4(), 'data': pxt.String},
|
|
151
|
+
... primary_key=['id']
|
|
152
|
+
... )
|
|
153
|
+
"""
|
|
154
|
+
from pixeltable.io.table_data_conduit import UnkTableDataConduit
|
|
155
|
+
from pixeltable.io.utils import normalize_primary_key_parameter
|
|
156
|
+
|
|
157
|
+
if (schema is None) == (source is None):
|
|
158
|
+
raise excs.Error('Either a `schema` or a `source` must be provided (but not both)')
|
|
159
|
+
|
|
160
|
+
if schema is not None and (len(schema) == 0 or not isinstance(schema, dict)):
|
|
161
|
+
raise excs.Error('`schema` must be a non-empty dictionary')
|
|
162
|
+
|
|
163
|
+
path_obj = catalog.Path.parse(path)
|
|
164
|
+
if_exists_ = catalog.IfExistsParam.validated(if_exists, 'if_exists')
|
|
165
|
+
media_validation_ = catalog.MediaValidation.validated(media_validation, 'media_validation')
|
|
166
|
+
primary_key: list[str] | None = normalize_primary_key_parameter(primary_key)
|
|
167
|
+
data_source: TableDataConduit | None = None
|
|
168
|
+
if source is not None:
|
|
169
|
+
if isinstance(source, str) and source.strip().startswith('pxt://'):
|
|
170
|
+
raise excs.Error(
|
|
171
|
+
'create_table(): Creating a table directly from a cloud URI is not supported.'
|
|
172
|
+
' Please replicate the table locally first using `pxt.replicate()`:\n'
|
|
173
|
+
"replica_tbl = pxt.replicate('pxt://path/to/remote_table', 'local_replica_name')\n"
|
|
174
|
+
"pxt.create_table('new_table_name', source=replica_tbl)"
|
|
175
|
+
)
|
|
176
|
+
tds = UnkTableDataConduit(source, source_format=source_format, extra_fields=extra_args)
|
|
177
|
+
tds.check_source_format()
|
|
178
|
+
data_source = tds.specialize()
|
|
179
|
+
src_schema_overrides: dict[str, ts.ColumnType] = {}
|
|
180
|
+
if schema_overrides is not None:
|
|
181
|
+
for col_name, py_type in schema_overrides.items():
|
|
182
|
+
col_type = ts.ColumnType.normalize_type(py_type, nullable_default=True, allow_builtin_types=False)
|
|
183
|
+
if col_type is None:
|
|
184
|
+
raise excs.Error(f'Invalid type for column {col_name!r} in `schema_overrides`: {py_type}')
|
|
185
|
+
src_schema_overrides[col_name] = col_type
|
|
186
|
+
data_source.src_schema_overrides = src_schema_overrides
|
|
187
|
+
data_source.src_pk = primary_key
|
|
188
|
+
data_source.infer_schema()
|
|
189
|
+
schema = data_source.pxt_schema
|
|
190
|
+
primary_key = data_source.pxt_pk
|
|
191
|
+
is_direct_query = data_source.is_direct_query()
|
|
92
192
|
else:
|
|
93
|
-
|
|
94
|
-
|
|
193
|
+
is_direct_query = False
|
|
194
|
+
|
|
195
|
+
if len(schema) == 0 or not isinstance(schema, dict):
|
|
196
|
+
raise excs.Error(
|
|
197
|
+
'Unable to create a proper schema from supplied `source`. Please use appropriate `schema_overrides`.'
|
|
198
|
+
)
|
|
199
|
+
|
|
200
|
+
tbl, was_created = Catalog.get().create_table(
|
|
201
|
+
path_obj,
|
|
202
|
+
schema,
|
|
203
|
+
if_exists=if_exists_,
|
|
204
|
+
primary_key=primary_key,
|
|
205
|
+
comment=comment,
|
|
206
|
+
media_validation=media_validation_,
|
|
207
|
+
num_retained_versions=num_retained_versions,
|
|
208
|
+
create_default_idxs=create_default_idxs,
|
|
209
|
+
)
|
|
95
210
|
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
211
|
+
# TODO: combine data loading with table creation into a single transaction
|
|
212
|
+
if was_created:
|
|
213
|
+
fail_on_exception = OnErrorParameter.fail_on_exception(on_error)
|
|
214
|
+
if isinstance(data_source, QueryTableDataConduit):
|
|
215
|
+
query = data_source.pxt_query
|
|
216
|
+
with Catalog.get().begin_xact(tbl=tbl._tbl_version_path, for_write=True, lock_mutable_tree=True):
|
|
217
|
+
tbl._tbl_version.get().insert(None, query, fail_on_exception=fail_on_exception)
|
|
218
|
+
elif data_source is not None and not is_direct_query:
|
|
219
|
+
tbl.insert_table_data_source(data_source=data_source, fail_on_exception=fail_on_exception)
|
|
100
220
|
|
|
101
|
-
_logger.info(f'Created table `{path_str}`.')
|
|
102
221
|
return tbl
|
|
103
222
|
|
|
104
223
|
|
|
105
224
|
def create_view(
|
|
106
|
-
|
|
107
|
-
base:
|
|
225
|
+
path: str,
|
|
226
|
+
base: catalog.Table | Query,
|
|
108
227
|
*,
|
|
109
|
-
additional_columns:
|
|
228
|
+
additional_columns: dict[str, Any] | None = None,
|
|
110
229
|
is_snapshot: bool = False,
|
|
111
|
-
|
|
230
|
+
create_default_idxs: bool = False,
|
|
231
|
+
iterator: tuple[type[ComponentIterator], dict[str, Any]] | None = None,
|
|
112
232
|
num_retained_versions: int = 10,
|
|
113
233
|
comment: str = '',
|
|
114
234
|
media_validation: Literal['on_read', 'on_write'] = 'on_write',
|
|
115
|
-
|
|
116
|
-
) ->
|
|
235
|
+
if_exists: Literal['error', 'ignore', 'replace', 'replace_force'] = 'error',
|
|
236
|
+
) -> catalog.Table | None:
|
|
117
237
|
"""Create a view of an existing table object (which itself can be a view or a snapshot or a base table).
|
|
118
238
|
|
|
119
239
|
Args:
|
|
120
|
-
|
|
240
|
+
path: A name for the view; can be either a simple name such as `my_view`, or a pathname such as
|
|
121
241
|
`dir1.my_view`.
|
|
122
|
-
base: [`Table`][pixeltable.Table] (i.e., table or view or snapshot) or [`
|
|
242
|
+
base: [`Table`][pixeltable.Table] (i.e., table or view or snapshot) or [`Query`][pixeltable.Query] to
|
|
123
243
|
base the view on.
|
|
124
244
|
additional_columns: If specified, will add these columns to the view once it is created. The format
|
|
125
|
-
of the `additional_columns` parameter is identical to the format of the `
|
|
245
|
+
of the `additional_columns` parameter is identical to the format of the `schema` parameter in
|
|
126
246
|
[`create_table`][pixeltable.create_table].
|
|
127
247
|
is_snapshot: Whether the view is a snapshot. Setting this to `True` is equivalent to calling
|
|
128
248
|
[`create_snapshot`][pixeltable.create_snapshot].
|
|
249
|
+
create_default_idxs: Whether to create default indexes on the view's columns (the base's columns are excluded).
|
|
250
|
+
Cannot be `True` for snapshots.
|
|
129
251
|
iterator: The iterator to use for this view. If specified, then this view will be a one-to-many view of
|
|
130
252
|
the base table.
|
|
131
253
|
num_retained_versions: Number of versions of the view to retain.
|
|
132
254
|
comment: Optional comment for the view.
|
|
133
|
-
|
|
255
|
+
media_validation: Media validation policy for the view.
|
|
256
|
+
|
|
257
|
+
- `'on_read'`: validate media files at query time
|
|
258
|
+
- `'on_write'`: validate media files during insert/update operations
|
|
259
|
+
if_exists: Directive regarding how to handle if the path already exists.
|
|
260
|
+
Must be one of the following:
|
|
261
|
+
|
|
262
|
+
- `'error'`: raise an error
|
|
263
|
+
- `'ignore'`: do nothing and return the existing view handle
|
|
264
|
+
- `'replace'`: if the existing view has no dependents, drop and replace it with a new one
|
|
265
|
+
- `'replace_force'`: drop the existing view and all its dependents, and create a new one
|
|
134
266
|
|
|
135
267
|
Returns:
|
|
136
268
|
A handle to the [`Table`][pixeltable.Table] representing the newly created view. If the path already
|
|
137
|
-
exists
|
|
269
|
+
exists and `if_exists='ignore'`, returns a handle to the existing view. Please note the schema
|
|
270
|
+
or the base of the existing view may not match those provided in the call.
|
|
138
271
|
|
|
139
272
|
Raises:
|
|
140
|
-
Error: if
|
|
273
|
+
Error: if
|
|
274
|
+
|
|
275
|
+
- the path is invalid, or
|
|
276
|
+
- the path already exists and `if_exists='error'`, or
|
|
277
|
+
- the path already exists and is not a view, or
|
|
278
|
+
- an error occurs while attempting to create the view.
|
|
141
279
|
|
|
142
280
|
Examples:
|
|
143
281
|
Create a view `my_view` of an existing table `my_table`, filtering on rows where `col1` is greater than 10:
|
|
144
282
|
|
|
145
283
|
>>> tbl = pxt.get_table('my_table')
|
|
146
284
|
... view = pxt.create_view('my_view', tbl.where(tbl.col1 > 10))
|
|
285
|
+
|
|
286
|
+
Create a view `my_view` of an existing table `my_table`, filtering on rows where `col1` is greater than 10,
|
|
287
|
+
and if it not already exist. Otherwise, get the existing view named `my_view`:
|
|
288
|
+
|
|
289
|
+
>>> tbl = pxt.get_table('my_table')
|
|
290
|
+
... view = pxt.create_view('my_view', tbl.where(tbl.col1 > 10), if_exists='ignore')
|
|
291
|
+
|
|
292
|
+
Create a view `my_view` of an existing table `my_table`, filtering on rows where `col1` is greater than 100,
|
|
293
|
+
and replace any existing view named `my_view`:
|
|
294
|
+
|
|
295
|
+
>>> tbl = pxt.get_table('my_table')
|
|
296
|
+
... view = pxt.create_view('my_view', tbl.where(tbl.col1 > 100), if_exists='replace_force')
|
|
147
297
|
"""
|
|
148
|
-
|
|
298
|
+
if is_snapshot and create_default_idxs is True:
|
|
299
|
+
raise excs.Error('Cannot create default indexes on a snapshot')
|
|
300
|
+
tbl_version_path: TableVersionPath
|
|
301
|
+
select_list: list[tuple[exprs.Expr, str | None]] | None = None
|
|
302
|
+
where: exprs.Expr | None = None
|
|
149
303
|
if isinstance(base, catalog.Table):
|
|
150
304
|
tbl_version_path = base._tbl_version_path
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
raise excs.Error('Cannot create a view of a join')
|
|
305
|
+
sample_clause = None
|
|
306
|
+
elif isinstance(base, Query):
|
|
307
|
+
base._validate_mutable_op_sequence('create_view', allow_select=True)
|
|
155
308
|
tbl_version_path = base._from_clause.tbls[0]
|
|
156
309
|
where = base.where_clause
|
|
310
|
+
sample_clause = base.sample_clause
|
|
311
|
+
select_list = base.select_list
|
|
312
|
+
if sample_clause is not None and not is_snapshot and not sample_clause.is_repeatable:
|
|
313
|
+
raise excs.Error('Non-snapshot views cannot be created with non-fractional or stratified sampling')
|
|
157
314
|
else:
|
|
158
|
-
raise excs.Error('`base` must be an instance of `Table` or `
|
|
159
|
-
assert isinstance(base, catalog.Table
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
raise e
|
|
168
|
-
dir = Catalog.get().paths[path.parent]
|
|
315
|
+
raise excs.Error('`base` must be an instance of `Table` or `Query`')
|
|
316
|
+
assert isinstance(base, (catalog.Table, Query))
|
|
317
|
+
|
|
318
|
+
if tbl_version_path.is_replica():
|
|
319
|
+
raise excs.Error('Cannot create a view or snapshot on top of a replica')
|
|
320
|
+
|
|
321
|
+
path_obj = catalog.Path.parse(path)
|
|
322
|
+
if_exists_ = catalog.IfExistsParam.validated(if_exists, 'if_exists')
|
|
323
|
+
media_validation_ = catalog.MediaValidation.validated(media_validation, 'media_validation')
|
|
169
324
|
|
|
170
325
|
if additional_columns is None:
|
|
171
326
|
additional_columns = {}
|
|
172
|
-
if iterator is None:
|
|
173
|
-
iterator_class, iterator_args = None, None
|
|
174
327
|
else:
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
328
|
+
# additional columns should not be in the base table
|
|
329
|
+
for col_name in additional_columns:
|
|
330
|
+
if col_name in [c.name for c in tbl_version_path.columns()]:
|
|
331
|
+
raise excs.Error(
|
|
332
|
+
f'Column {col_name!r} already exists in the base table '
|
|
333
|
+
f'{tbl_version_path.get_column(col_name).get_tbl().name}.'
|
|
334
|
+
)
|
|
335
|
+
|
|
336
|
+
return Catalog.get().create_view(
|
|
337
|
+
path_obj,
|
|
338
|
+
tbl_version_path,
|
|
339
|
+
select_list=select_list,
|
|
340
|
+
where=where,
|
|
341
|
+
sample_clause=sample_clause,
|
|
342
|
+
additional_columns=additional_columns,
|
|
343
|
+
is_snapshot=is_snapshot,
|
|
344
|
+
create_default_idxs=create_default_idxs,
|
|
345
|
+
iterator=iterator,
|
|
346
|
+
num_retained_versions=num_retained_versions,
|
|
347
|
+
comment=comment,
|
|
348
|
+
media_validation=media_validation_,
|
|
349
|
+
if_exists=if_exists_,
|
|
350
|
+
)
|
|
186
351
|
|
|
187
352
|
|
|
188
353
|
def create_snapshot(
|
|
189
354
|
path_str: str,
|
|
190
|
-
base:
|
|
355
|
+
base: catalog.Table | Query,
|
|
191
356
|
*,
|
|
192
|
-
additional_columns:
|
|
193
|
-
iterator:
|
|
357
|
+
additional_columns: dict[str, Any] | None = None,
|
|
358
|
+
iterator: tuple[type[ComponentIterator], dict[str, Any]] | None = None,
|
|
194
359
|
num_retained_versions: int = 10,
|
|
195
360
|
comment: str = '',
|
|
196
361
|
media_validation: Literal['on_read', 'on_write'] = 'on_write',
|
|
197
|
-
|
|
198
|
-
) ->
|
|
362
|
+
if_exists: Literal['error', 'ignore', 'replace', 'replace_force'] = 'error',
|
|
363
|
+
) -> catalog.Table | None:
|
|
199
364
|
"""Create a snapshot of an existing table object (which itself can be a view or a snapshot or a base table).
|
|
200
365
|
|
|
201
366
|
Args:
|
|
202
367
|
path_str: A name for the snapshot; can be either a simple name such as `my_snapshot`, or a pathname such as
|
|
203
368
|
`dir1.my_snapshot`.
|
|
204
|
-
base: [`Table`][pixeltable.Table] (i.e., table or view or snapshot) or [`
|
|
369
|
+
base: [`Table`][pixeltable.Table] (i.e., table or view or snapshot) or [`Query`][pixeltable.Query] to
|
|
205
370
|
base the snapshot on.
|
|
206
371
|
additional_columns: If specified, will add these columns to the snapshot once it is created. The format
|
|
207
|
-
of the `additional_columns` parameter is identical to the format of the `
|
|
372
|
+
of the `additional_columns` parameter is identical to the format of the `schema` parameter in
|
|
208
373
|
[`create_table`][pixeltable.create_table].
|
|
209
374
|
iterator: The iterator to use for this snapshot. If specified, then this snapshot will be a one-to-many view of
|
|
210
375
|
the base table.
|
|
211
376
|
num_retained_versions: Number of versions of the view to retain.
|
|
212
|
-
comment: Optional comment for the
|
|
213
|
-
|
|
377
|
+
comment: Optional comment for the snapshot.
|
|
378
|
+
media_validation: Media validation policy for the snapshot.
|
|
379
|
+
|
|
380
|
+
- `'on_read'`: validate media files at query time
|
|
381
|
+
- `'on_write'`: validate media files during insert/update operations
|
|
382
|
+
if_exists: Directive regarding how to handle if the path already exists.
|
|
383
|
+
Must be one of the following:
|
|
384
|
+
|
|
385
|
+
- `'error'`: raise an error
|
|
386
|
+
- `'ignore'`: do nothing and return the existing snapshot handle
|
|
387
|
+
- `'replace'`: if the existing snapshot has no dependents, drop and replace it with a new one
|
|
388
|
+
- `'replace_force'`: drop the existing snapshot and all its dependents, and create a new one
|
|
214
389
|
|
|
215
390
|
Returns:
|
|
216
|
-
A handle to the [`Table`][pixeltable.Table] representing the newly created snapshot.
|
|
217
|
-
|
|
391
|
+
A handle to the [`Table`][pixeltable.Table] representing the newly created snapshot.
|
|
392
|
+
Please note the schema or base of the existing snapshot may not match those provided in the call.
|
|
218
393
|
|
|
219
394
|
Raises:
|
|
220
|
-
Error: if
|
|
395
|
+
Error: if
|
|
396
|
+
|
|
397
|
+
- the path is invalid, or
|
|
398
|
+
- the path already exists and `if_exists='error'`, or
|
|
399
|
+
- the path already exists and is not a snapshot, or
|
|
400
|
+
- an error occurs while attempting to create the snapshot.
|
|
221
401
|
|
|
222
402
|
Examples:
|
|
223
|
-
Create a snapshot of `my_table`:
|
|
403
|
+
Create a snapshot `my_snapshot` of a table `my_table`:
|
|
224
404
|
|
|
225
405
|
>>> tbl = pxt.get_table('my_table')
|
|
226
406
|
... snapshot = pxt.create_snapshot('my_snapshot', tbl)
|
|
407
|
+
|
|
408
|
+
Create a snapshot `my_snapshot` of a view `my_view` with additional int column `col3`,
|
|
409
|
+
if `my_snapshot` does not already exist:
|
|
410
|
+
|
|
411
|
+
>>> view = pxt.get_table('my_view')
|
|
412
|
+
... snapshot = pxt.create_snapshot(
|
|
413
|
+
... 'my_snapshot', view, additional_columns={'col3': pxt.Int}, if_exists='ignore'
|
|
414
|
+
... )
|
|
415
|
+
|
|
416
|
+
Create a snapshot `my_snapshot` on a table `my_table`, and replace any existing snapshot named `my_snapshot`:
|
|
417
|
+
|
|
418
|
+
>>> tbl = pxt.get_table('my_table')
|
|
419
|
+
... snapshot = pxt.create_snapshot('my_snapshot', tbl, if_exists='replace_force')
|
|
227
420
|
"""
|
|
228
421
|
return create_view(
|
|
229
422
|
path_str,
|
|
@@ -234,15 +427,71 @@ def create_snapshot(
|
|
|
234
427
|
num_retained_versions=num_retained_versions,
|
|
235
428
|
comment=comment,
|
|
236
429
|
media_validation=media_validation,
|
|
237
|
-
|
|
430
|
+
if_exists=if_exists,
|
|
238
431
|
)
|
|
239
432
|
|
|
240
433
|
|
|
241
|
-
def
|
|
434
|
+
def publish(
|
|
435
|
+
source: str | catalog.Table,
|
|
436
|
+
destination_uri: str,
|
|
437
|
+
bucket_name: str | None = None,
|
|
438
|
+
access: Literal['public', 'private'] = 'private',
|
|
439
|
+
) -> None:
|
|
440
|
+
"""
|
|
441
|
+
Publishes a replica of a local Pixeltable table to Pixeltable cloud. A given table can be published to at most one
|
|
442
|
+
URI per Pixeltable cloud database.
|
|
443
|
+
|
|
444
|
+
Args:
|
|
445
|
+
source: Path or table handle of the local table to be published.
|
|
446
|
+
destination_uri: Remote URI where the replica will be published, such as `'pxt://org_name/my_dir/my_table'`.
|
|
447
|
+
bucket_name: The name of the bucket to use to store replica's data. The bucket must be registered with
|
|
448
|
+
Pixeltable cloud. If no `bucket_name` is provided, the default storage bucket for the destination
|
|
449
|
+
database will be used.
|
|
450
|
+
access: Access control for the replica.
|
|
451
|
+
|
|
452
|
+
- `'public'`: Anyone can access this replica.
|
|
453
|
+
- `'private'`: Only the host organization can access.
|
|
454
|
+
"""
|
|
455
|
+
if not destination_uri.startswith('pxt://'):
|
|
456
|
+
raise excs.Error("`destination_uri` must be a remote Pixeltable URI with the prefix 'pxt://'")
|
|
457
|
+
|
|
458
|
+
if isinstance(source, str):
|
|
459
|
+
source = get_table(source)
|
|
460
|
+
|
|
461
|
+
share.push_replica(destination_uri, source, bucket_name, access)
|
|
462
|
+
|
|
463
|
+
|
|
464
|
+
def replicate(remote_uri: str, local_path: str) -> catalog.Table:
|
|
465
|
+
"""
|
|
466
|
+
Retrieve a replica from Pixeltable cloud as a local table. This will create a full local copy of the replica in a
|
|
467
|
+
way that preserves the table structure of the original source data. Once replicated, the local table can be
|
|
468
|
+
queried offline just as any other Pixeltable table.
|
|
469
|
+
|
|
470
|
+
Args:
|
|
471
|
+
remote_uri: Remote URI of the table to be replicated, such as `'pxt://org_name/my_dir/my_table'` or
|
|
472
|
+
`'pxt://org_name/my_dir/my_table:5'` (with version 5).
|
|
473
|
+
local_path: Local table path where the replica will be created, such as `'my_new_dir.my_new_tbl'`. It can be
|
|
474
|
+
the same or different from the cloud table name.
|
|
475
|
+
|
|
476
|
+
Returns:
|
|
477
|
+
A handle to the newly created local replica table.
|
|
478
|
+
"""
|
|
479
|
+
if not remote_uri.startswith('pxt://'):
|
|
480
|
+
raise excs.Error("`remote_uri` must be a remote Pixeltable URI with the prefix 'pxt://'")
|
|
481
|
+
|
|
482
|
+
return share.pull_replica(local_path, remote_uri)
|
|
483
|
+
|
|
484
|
+
|
|
485
|
+
def get_table(path: str, if_not_exists: Literal['error', 'ignore'] = 'error') -> catalog.Table | None:
|
|
242
486
|
"""Get a handle to an existing table, view, or snapshot.
|
|
243
487
|
|
|
244
488
|
Args:
|
|
245
489
|
path: Path to the table.
|
|
490
|
+
if_not_exists: Directive regarding how to handle if the path does not exist.
|
|
491
|
+
Must be one of the following:
|
|
492
|
+
|
|
493
|
+
- `'error'`: raise an error
|
|
494
|
+
- `'ignore'`: do nothing and return `None`
|
|
246
495
|
|
|
247
496
|
Returns:
|
|
248
497
|
A handle to the [`Table`][pixeltable.Table].
|
|
@@ -262,20 +511,39 @@ def get_table(path: str) -> catalog.Table:
|
|
|
262
511
|
Handles to views and snapshots are retrieved in the same way:
|
|
263
512
|
|
|
264
513
|
>>> tbl = pxt.get_table('my_snapshot')
|
|
514
|
+
|
|
515
|
+
Get a handle to a specific version of a table:
|
|
516
|
+
|
|
517
|
+
>>> tbl = pxt.get_table('my_table:722')
|
|
265
518
|
"""
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
return obj
|
|
519
|
+
if_not_exists_ = catalog.IfNotExistsParam.validated(if_not_exists, 'if_not_exists')
|
|
520
|
+
path_obj = catalog.Path.parse(path, allow_versioned_path=True)
|
|
521
|
+
tbl = Catalog.get().get_table(path_obj, if_not_exists_)
|
|
522
|
+
return tbl
|
|
271
523
|
|
|
272
524
|
|
|
273
|
-
def move(
|
|
525
|
+
def move(
|
|
526
|
+
path: str,
|
|
527
|
+
new_path: str,
|
|
528
|
+
*,
|
|
529
|
+
if_exists: Literal['error', 'ignore'] = 'error',
|
|
530
|
+
if_not_exists: Literal['error', 'ignore'] = 'error',
|
|
531
|
+
) -> None:
|
|
274
532
|
"""Move a schema object to a new directory and/or rename a schema object.
|
|
275
533
|
|
|
276
534
|
Args:
|
|
277
535
|
path: absolute path to the existing schema object.
|
|
278
536
|
new_path: absolute new path for the schema object.
|
|
537
|
+
if_exists: Directive regarding how to handle if a schema object already exists at the new path.
|
|
538
|
+
Must be one of the following:
|
|
539
|
+
|
|
540
|
+
- `'error'`: raise an error
|
|
541
|
+
- `'ignore'`: do nothing and return
|
|
542
|
+
if_not_exists: Directive regarding how to handle if the source path does not exist.
|
|
543
|
+
Must be one of the following:
|
|
544
|
+
|
|
545
|
+
- `'error'`: raise an error
|
|
546
|
+
- `'ignore'`: do nothing and return
|
|
279
547
|
|
|
280
548
|
Raises:
|
|
281
549
|
Error: If path does not exist or new_path already exists.
|
|
@@ -289,26 +557,40 @@ def move(path: str, new_path: str) -> None:
|
|
|
289
557
|
|
|
290
558
|
>>>> pxt.move('dir1.my_table', 'dir1.new_name')
|
|
291
559
|
"""
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
560
|
+
if_exists_ = catalog.IfExistsParam.validated(if_exists, 'if_exists')
|
|
561
|
+
if if_exists_ not in (catalog.IfExistsParam.ERROR, catalog.IfExistsParam.IGNORE):
|
|
562
|
+
raise excs.Error("`if_exists` must be one of 'error' or 'ignore'")
|
|
563
|
+
if_not_exists_ = catalog.IfNotExistsParam.validated(if_not_exists, 'if_not_exists')
|
|
564
|
+
if path == new_path:
|
|
565
|
+
raise excs.Error('move(): source and destination cannot be identical')
|
|
566
|
+
path_obj, new_path_obj = catalog.Path.parse(path), catalog.Path.parse(new_path)
|
|
567
|
+
if path_obj.is_ancestor(new_path_obj):
|
|
568
|
+
raise excs.Error(f'move(): cannot move {path!r} into its own subdirectory')
|
|
569
|
+
Catalog.get().move(path_obj, new_path_obj, if_exists_, if_not_exists_)
|
|
570
|
+
|
|
571
|
+
|
|
572
|
+
def drop_table(
|
|
573
|
+
table: str | catalog.Table, force: bool = False, if_not_exists: Literal['error', 'ignore'] = 'error'
|
|
574
|
+
) -> None:
|
|
575
|
+
"""Drop a table, view, snapshot, or replica.
|
|
304
576
|
|
|
305
577
|
Args:
|
|
306
|
-
table: Fully qualified name
|
|
578
|
+
table: Fully qualified name or table handle of the table to be dropped; or a remote URI of a cloud replica to
|
|
579
|
+
be deleted.
|
|
307
580
|
force: If `True`, will also drop all views and sub-views of this table.
|
|
308
|
-
|
|
581
|
+
if_not_exists: Directive regarding how to handle if the path does not exist.
|
|
582
|
+
Must be one of the following:
|
|
583
|
+
|
|
584
|
+
- `'error'`: raise an error
|
|
585
|
+
- `'ignore'`: do nothing and return
|
|
309
586
|
|
|
310
587
|
Raises:
|
|
311
|
-
Error:
|
|
588
|
+
Error: if the qualified name
|
|
589
|
+
|
|
590
|
+
- is invalid, or
|
|
591
|
+
- does not exist and `if_not_exists='error'`, or
|
|
592
|
+
- does not designate a table object, or
|
|
593
|
+
- designates a table object but has dependents and `force=False`.
|
|
312
594
|
|
|
313
595
|
Examples:
|
|
314
596
|
Drop a table by its fully qualified name:
|
|
@@ -318,34 +600,85 @@ def drop_table(table: Union[str, catalog.Table], force: bool = False, ignore_err
|
|
|
318
600
|
>>> t = pxt.get_table('subdir.my_table')
|
|
319
601
|
... pxt.drop_table(t)
|
|
320
602
|
|
|
603
|
+
Drop a table if it exists, otherwise do nothing:
|
|
604
|
+
>>> pxt.drop_table('subdir.my_table', if_not_exists='ignore')
|
|
605
|
+
|
|
606
|
+
Drop a table and all its dependents:
|
|
607
|
+
>>> pxt.drop_table('subdir.my_table', force=True)
|
|
321
608
|
"""
|
|
322
|
-
|
|
323
|
-
if isinstance(table,
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
if ignore_errors or force:
|
|
329
|
-
_logger.info(f'Skipped table `{table}` (does not exist).')
|
|
330
|
-
return
|
|
331
|
-
else:
|
|
332
|
-
raise e
|
|
333
|
-
tbl = cat.paths[tbl_path_obj]
|
|
609
|
+
tbl_path: str
|
|
610
|
+
if isinstance(table, catalog.Table):
|
|
611
|
+
# if we're dropping a table by handle, we first need to get the current path, then drop the S lock on
|
|
612
|
+
# the Table record, and then get X locks in the correct order (first containing directory, then table)
|
|
613
|
+
with Catalog.get().begin_xact(for_write=False):
|
|
614
|
+
tbl_path = table._path()
|
|
334
615
|
else:
|
|
335
|
-
|
|
336
|
-
|
|
616
|
+
assert isinstance(table, str)
|
|
617
|
+
tbl_path = table
|
|
618
|
+
|
|
619
|
+
if_not_exists_ = catalog.IfNotExistsParam.validated(if_not_exists, 'if_not_exists')
|
|
337
620
|
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
dependent_paths = [dep._path for dep in cat.tbl_dependents[tbl._id]]
|
|
621
|
+
if tbl_path.startswith('pxt://'):
|
|
622
|
+
# Remote table
|
|
341
623
|
if force:
|
|
342
|
-
|
|
343
|
-
|
|
624
|
+
raise excs.Error('Cannot use `force=True` with a cloud replica URI.')
|
|
625
|
+
# TODO: Handle if_not_exists properly
|
|
626
|
+
share.delete_replica(tbl_path)
|
|
627
|
+
else:
|
|
628
|
+
# Local table
|
|
629
|
+
path_obj = catalog.Path.parse(tbl_path)
|
|
630
|
+
Catalog.get().drop_table(path_obj, force=force, if_not_exists=if_not_exists_)
|
|
631
|
+
|
|
632
|
+
|
|
633
|
+
def get_dir_contents(dir_path: str = '', recursive: bool = True) -> 'DirContents':
|
|
634
|
+
"""Get the contents of a Pixeltable directory.
|
|
635
|
+
|
|
636
|
+
Args:
|
|
637
|
+
dir_path: Path to the directory. Defaults to the root directory.
|
|
638
|
+
recursive: If `False`, returns only those tables and directories that are directly contained in specified
|
|
639
|
+
directory; if `True`, returns all tables and directories that are descendants of the specified directory,
|
|
640
|
+
recursively.
|
|
641
|
+
|
|
642
|
+
Returns:
|
|
643
|
+
A [`DirContents`][pixeltable.DirContents] object representing the contents of the specified directory.
|
|
644
|
+
|
|
645
|
+
Raises:
|
|
646
|
+
Error: If the path does not exist or does not designate a directory.
|
|
647
|
+
|
|
648
|
+
Examples:
|
|
649
|
+
Get contents of top-level directory:
|
|
650
|
+
|
|
651
|
+
>>> pxt.get_dir_contents()
|
|
652
|
+
|
|
653
|
+
Get contents of 'dir1':
|
|
654
|
+
|
|
655
|
+
>>> pxt.get_dir_contents('dir1')
|
|
656
|
+
"""
|
|
657
|
+
path_obj = catalog.Path.parse(dir_path, allow_empty_path=True)
|
|
658
|
+
catalog_entries = Catalog.get().get_dir_contents(path_obj, recursive=recursive)
|
|
659
|
+
dirs: list[str] = []
|
|
660
|
+
tables: list[str] = []
|
|
661
|
+
_assemble_dir_contents(dir_path, catalog_entries, dirs, tables)
|
|
662
|
+
dirs.sort()
|
|
663
|
+
tables.sort()
|
|
664
|
+
return DirContents(dirs=dirs, tables=tables)
|
|
665
|
+
|
|
666
|
+
|
|
667
|
+
def _assemble_dir_contents(
|
|
668
|
+
dir_path: str, catalog_entries: dict[str, Catalog.DirEntry], dirs: list[str], tables: list[str]
|
|
669
|
+
) -> None:
|
|
670
|
+
for name, entry in catalog_entries.items():
|
|
671
|
+
if name.startswith('_'):
|
|
672
|
+
continue # Skip system paths
|
|
673
|
+
path = f'{dir_path}.{name}' if len(dir_path) > 0 else name
|
|
674
|
+
if entry.dir is not None:
|
|
675
|
+
dirs.append(path)
|
|
676
|
+
if entry.dir_entries is not None:
|
|
677
|
+
_assemble_dir_contents(path, entry.dir_entries, dirs, tables)
|
|
344
678
|
else:
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
_logger.info(f'Dropped table `{tbl._path}`.')
|
|
679
|
+
assert entry.table is not None
|
|
680
|
+
assert not entry.dir_entries
|
|
681
|
+
tables.append(path)
|
|
349
682
|
|
|
350
683
|
|
|
351
684
|
def list_tables(dir_path: str = '', recursive: bool = True) -> list[str]:
|
|
@@ -371,21 +704,42 @@ def list_tables(dir_path: str = '', recursive: bool = True) -> list[str]:
|
|
|
371
704
|
|
|
372
705
|
>>> pxt.list_tables('dir1')
|
|
373
706
|
"""
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
Catalog.get().paths.check_is_valid(path, expected=catalog.Dir)
|
|
377
|
-
return [str(p) for p in Catalog.get().paths.get_children(path, child_type=catalog.Table, recursive=recursive)]
|
|
707
|
+
return _list_tables(dir_path, recursive=recursive, allow_system_paths=False)
|
|
708
|
+
|
|
378
709
|
|
|
710
|
+
def _list_tables(dir_path: str = '', recursive: bool = True, allow_system_paths: bool = False) -> list[str]:
|
|
711
|
+
path_obj = catalog.Path.parse(dir_path, allow_empty_path=True, allow_system_path=allow_system_paths)
|
|
712
|
+
contents = Catalog.get().get_dir_contents(path_obj, recursive=recursive)
|
|
713
|
+
return [str(p) for p in _extract_paths(contents, parent=path_obj, entry_type=catalog.Table)]
|
|
379
714
|
|
|
380
|
-
|
|
715
|
+
|
|
716
|
+
def create_dir(
|
|
717
|
+
path: str, *, if_exists: Literal['error', 'ignore', 'replace', 'replace_force'] = 'error', parents: bool = False
|
|
718
|
+
) -> catalog.Dir | None:
|
|
381
719
|
"""Create a directory.
|
|
382
720
|
|
|
383
721
|
Args:
|
|
384
|
-
|
|
385
|
-
|
|
722
|
+
path: Path to the directory.
|
|
723
|
+
if_exists: Directive regarding how to handle if the path already exists.
|
|
724
|
+
Must be one of the following:
|
|
725
|
+
|
|
726
|
+
- `'error'`: raise an error
|
|
727
|
+
- `'ignore'`: do nothing and return the existing directory handle
|
|
728
|
+
- `'replace'`: if the existing directory is empty, drop it and create a new one
|
|
729
|
+
- `'replace_force'`: drop the existing directory and all its children, and create a new one
|
|
730
|
+
parents: Create missing parent directories.
|
|
731
|
+
|
|
732
|
+
Returns:
|
|
733
|
+
A handle to the newly created directory, or to an already existing directory at the path when
|
|
734
|
+
`if_exists='ignore'`. Please note the existing directory may not be empty.
|
|
386
735
|
|
|
387
736
|
Raises:
|
|
388
|
-
Error: If
|
|
737
|
+
Error: If
|
|
738
|
+
|
|
739
|
+
- the path is invalid, or
|
|
740
|
+
- the path already exists and `if_exists='error'`, or
|
|
741
|
+
- the path already exists and is not a directory, or
|
|
742
|
+
- an error occurs while attempting to create the directory.
|
|
389
743
|
|
|
390
744
|
Examples:
|
|
391
745
|
>>> pxt.create_dir('my_dir')
|
|
@@ -393,96 +747,155 @@ def create_dir(path_str: str, ignore_errors: bool = False) -> Optional[catalog.D
|
|
|
393
747
|
Create a subdirectory:
|
|
394
748
|
|
|
395
749
|
>>> pxt.create_dir('my_dir.sub_dir')
|
|
750
|
+
|
|
751
|
+
Create a subdirectory only if it does not already exist, otherwise do nothing:
|
|
752
|
+
|
|
753
|
+
>>> pxt.create_dir('my_dir.sub_dir', if_exists='ignore')
|
|
754
|
+
|
|
755
|
+
Create a directory and replace if it already exists:
|
|
756
|
+
|
|
757
|
+
>>> pxt.create_dir('my_dir', if_exists='replace_force')
|
|
758
|
+
|
|
759
|
+
Create a subdirectory along with its ancestors:
|
|
760
|
+
|
|
761
|
+
>>> pxt.create_dir('parent1.parent2.sub_dir', parents=True)
|
|
396
762
|
"""
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
parent = Catalog.get().paths[path.parent]
|
|
401
|
-
assert parent is not None
|
|
402
|
-
with orm.Session(Env.get().engine, future=True) as session:
|
|
403
|
-
dir_md = schema.DirMd(name=path.name)
|
|
404
|
-
dir_record = schema.Dir(parent_id=parent._id, md=dataclasses.asdict(dir_md))
|
|
405
|
-
session.add(dir_record)
|
|
406
|
-
session.flush()
|
|
407
|
-
assert dir_record.id is not None
|
|
408
|
-
assert isinstance(dir_record.id, UUID)
|
|
409
|
-
dir = catalog.Dir(dir_record.id, parent._id, path.name)
|
|
410
|
-
Catalog.get().paths[path] = dir
|
|
411
|
-
session.commit()
|
|
412
|
-
_logger.info(f'Created directory `{path_str}`.')
|
|
413
|
-
print(f'Created directory `{path_str}`.')
|
|
414
|
-
return dir
|
|
415
|
-
except excs.Error as e:
|
|
416
|
-
if ignore_errors:
|
|
417
|
-
return None
|
|
418
|
-
else:
|
|
419
|
-
raise e
|
|
763
|
+
path_obj = catalog.Path.parse(path)
|
|
764
|
+
if_exists_ = catalog.IfExistsParam.validated(if_exists, 'if_exists')
|
|
765
|
+
return Catalog.get().create_dir(path_obj, if_exists=if_exists_, parents=parents)
|
|
420
766
|
|
|
421
767
|
|
|
422
|
-
def drop_dir(
|
|
768
|
+
def drop_dir(path: str, force: bool = False, if_not_exists: Literal['error', 'ignore'] = 'error') -> None:
|
|
423
769
|
"""Remove a directory.
|
|
424
770
|
|
|
425
771
|
Args:
|
|
426
|
-
|
|
772
|
+
path: Name or path of the directory.
|
|
427
773
|
force: If `True`, will also drop all tables and subdirectories of this directory, recursively, along
|
|
428
774
|
with any views or snapshots that depend on any of the dropped tables.
|
|
429
|
-
|
|
430
|
-
|
|
775
|
+
if_not_exists: Directive regarding how to handle if the path does not exist.
|
|
776
|
+
Must be one of the following:
|
|
777
|
+
|
|
778
|
+
- `'error'`: raise an error
|
|
779
|
+
- `'ignore'`: do nothing and return
|
|
431
780
|
|
|
432
781
|
Raises:
|
|
433
|
-
Error: If the path
|
|
782
|
+
Error: If the path
|
|
783
|
+
|
|
784
|
+
- is invalid, or
|
|
785
|
+
- does not exist and `if_not_exists='error'`, or
|
|
786
|
+
- is not designate a directory, or
|
|
787
|
+
- is a direcotory but is not empty and `force=False`.
|
|
434
788
|
|
|
435
789
|
Examples:
|
|
790
|
+
Remove a directory, if it exists and is empty:
|
|
436
791
|
>>> pxt.drop_dir('my_dir')
|
|
437
792
|
|
|
438
793
|
Remove a subdirectory:
|
|
439
794
|
|
|
440
795
|
>>> pxt.drop_dir('my_dir.sub_dir')
|
|
796
|
+
|
|
797
|
+
Remove an existing directory if it is empty, but do nothing if it does not exist:
|
|
798
|
+
|
|
799
|
+
>>> pxt.drop_dir('my_dir.sub_dir', if_not_exists='ignore')
|
|
800
|
+
|
|
801
|
+
Remove an existing directory and all its contents:
|
|
802
|
+
|
|
803
|
+
>>> pxt.drop_dir('my_dir', force=True)
|
|
804
|
+
"""
|
|
805
|
+
path_obj = catalog.Path.parse(path) # validate format
|
|
806
|
+
if_not_exists_ = catalog.IfNotExistsParam.validated(if_not_exists, 'if_not_exists')
|
|
807
|
+
Catalog.get().drop_dir(path_obj, if_not_exists=if_not_exists_, force=force)
|
|
808
|
+
|
|
809
|
+
|
|
810
|
+
def ls(path: str = '') -> pd.DataFrame:
|
|
441
811
|
"""
|
|
812
|
+
List the contents of a Pixeltable directory.
|
|
813
|
+
|
|
814
|
+
This function returns a Pandas DataFrame representing a human-readable listing of the specified directory,
|
|
815
|
+
including various attributes such as version and base table, as appropriate.
|
|
816
|
+
|
|
817
|
+
To get a programmatic list of the directory's contents, use [get_dir_contents()][pixeltable.get_dir_contents]
|
|
818
|
+
instead.
|
|
819
|
+
"""
|
|
820
|
+
from pixeltable.catalog import retry_loop
|
|
821
|
+
from pixeltable.metadata import schema
|
|
822
|
+
|
|
442
823
|
cat = Catalog.get()
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
824
|
+
path_obj = catalog.Path.parse(path, allow_empty_path=True)
|
|
825
|
+
dir_entries = cat.get_dir_contents(path_obj)
|
|
826
|
+
|
|
827
|
+
@retry_loop(for_write=False)
|
|
828
|
+
def op() -> list[list[str]]:
|
|
829
|
+
rows: list[list[str]] = []
|
|
830
|
+
for name, entry in dir_entries.items():
|
|
831
|
+
if name.startswith('_'):
|
|
832
|
+
continue
|
|
833
|
+
if entry.dir is not None:
|
|
834
|
+
kind = 'dir'
|
|
835
|
+
version = ''
|
|
836
|
+
base = ''
|
|
837
|
+
else:
|
|
838
|
+
assert entry.table is not None
|
|
839
|
+
assert isinstance(entry.table, schema.Table)
|
|
840
|
+
tbl = cat.get_table_by_id(entry.table.id)
|
|
841
|
+
md = tbl.get_metadata()
|
|
842
|
+
base = md['base'] or ''
|
|
843
|
+
if base.startswith('_'):
|
|
844
|
+
base = '<anonymous base table>'
|
|
845
|
+
if md['is_replica']:
|
|
846
|
+
kind = 'replica'
|
|
847
|
+
elif md['is_snapshot']:
|
|
848
|
+
kind = 'snapshot'
|
|
849
|
+
elif md['is_view']:
|
|
850
|
+
kind = 'view'
|
|
851
|
+
else:
|
|
852
|
+
kind = 'table'
|
|
853
|
+
version = '' if kind == 'snapshot' else str(md['version'])
|
|
854
|
+
rows.append([name, kind, version, base])
|
|
855
|
+
return rows
|
|
856
|
+
|
|
857
|
+
rows = op()
|
|
858
|
+
|
|
859
|
+
rows = sorted(rows, key=lambda x: x[0])
|
|
860
|
+
df = pd.DataFrame(
|
|
861
|
+
{
|
|
862
|
+
'Name': [row[0] for row in rows],
|
|
863
|
+
'Kind': [row[1] for row in rows],
|
|
864
|
+
'Version': [row[2] for row in rows],
|
|
865
|
+
'Base': [row[3] for row in rows],
|
|
866
|
+
},
|
|
867
|
+
index=([''] * len(rows)),
|
|
868
|
+
)
|
|
869
|
+
return df
|
|
870
|
+
|
|
871
|
+
|
|
872
|
+
def _extract_paths(
|
|
873
|
+
dir_entries: dict[str, Catalog.DirEntry], parent: catalog.Path, entry_type: type[catalog.SchemaObject] | None = None
|
|
874
|
+
) -> list[catalog.Path]:
|
|
875
|
+
"""Convert nested dir_entries structure to a flattened list of paths."""
|
|
876
|
+
matches: list[str]
|
|
877
|
+
if entry_type is None:
|
|
878
|
+
matches = list(dir_entries.keys())
|
|
879
|
+
elif entry_type is catalog.Dir:
|
|
880
|
+
matches = [name for name, entry in dir_entries.items() if entry.dir is not None]
|
|
881
|
+
else:
|
|
882
|
+
matches = [name for name, entry in dir_entries.items() if entry.table is not None]
|
|
883
|
+
|
|
884
|
+
# Filter out system paths
|
|
885
|
+
matches = [name for name in matches if catalog.is_valid_identifier(name)]
|
|
886
|
+
result = [parent.append(name) for name in matches]
|
|
473
887
|
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
_logger.info(f'Removed directory `{path_str}`.')
|
|
888
|
+
for name, entry in dir_entries.items():
|
|
889
|
+
if len(entry.dir_entries) > 0 and catalog.is_valid_identifier(name):
|
|
890
|
+
result.extend(_extract_paths(entry.dir_entries, parent=parent.append(name), entry_type=entry_type))
|
|
891
|
+
return result
|
|
479
892
|
|
|
480
893
|
|
|
481
|
-
def list_dirs(
|
|
894
|
+
def list_dirs(path: str = '', recursive: bool = True) -> list[str]:
|
|
482
895
|
"""List the directories in a directory.
|
|
483
896
|
|
|
484
897
|
Args:
|
|
485
|
-
|
|
898
|
+
path: Name or path of the directory.
|
|
486
899
|
recursive: If `True`, lists all descendants of this directory recursively.
|
|
487
900
|
|
|
488
901
|
Returns:
|
|
@@ -495,9 +908,10 @@ def list_dirs(path_str: str = '', recursive: bool = True) -> list[str]:
|
|
|
495
908
|
>>> cl.list_dirs('my_dir', recursive=True)
|
|
496
909
|
['my_dir', 'my_dir.sub_dir1']
|
|
497
910
|
"""
|
|
498
|
-
|
|
499
|
-
Catalog.get()
|
|
500
|
-
|
|
911
|
+
path_obj = catalog.Path.parse(path, allow_empty_path=True) # validate format
|
|
912
|
+
cat = Catalog.get()
|
|
913
|
+
contents = cat.get_dir_contents(path_obj, recursive=recursive)
|
|
914
|
+
return [str(p) for p in _extract_paths(contents, parent=path_obj, entry_type=catalog.Dir)]
|
|
501
915
|
|
|
502
916
|
|
|
503
917
|
def list_functions() -> Styler:
|
|
@@ -510,7 +924,9 @@ def list_functions() -> Styler:
|
|
|
510
924
|
paths = ['.'.join(f.self_path.split('.')[:-1]) for f in functions]
|
|
511
925
|
names = [f.name for f in functions]
|
|
512
926
|
params = [
|
|
513
|
-
', '.join(
|
|
927
|
+
', '.join(
|
|
928
|
+
[param_name + ': ' + str(param_type) for param_name, param_type in f.signatures[0].parameters.items()]
|
|
929
|
+
)
|
|
514
930
|
for f in functions
|
|
515
931
|
]
|
|
516
932
|
pd_df = pd.DataFrame(
|
|
@@ -518,21 +934,74 @@ def list_functions() -> Styler:
|
|
|
518
934
|
'Path': paths,
|
|
519
935
|
'Function Name': names,
|
|
520
936
|
'Parameters': params,
|
|
521
|
-
'Return Type': [str(f.
|
|
937
|
+
'Return Type': [str(f.signatures[0].get_return_type()) for f in functions],
|
|
522
938
|
}
|
|
523
939
|
)
|
|
524
940
|
pd_df = pd_df.style.set_properties(None, **{'text-align': 'left'}).set_table_styles(
|
|
525
|
-
[
|
|
941
|
+
[{'selector': 'th', 'props': [('text-align', 'center')]}]
|
|
526
942
|
) # center-align headings
|
|
527
943
|
return pd_df.hide(axis='index')
|
|
528
944
|
|
|
529
945
|
|
|
946
|
+
def tools(*args: func.Function | func.tools.Tool) -> func.tools.Tools:
|
|
947
|
+
"""
|
|
948
|
+
Specifies a collection of UDFs to be used as LLM tools. Pixeltable allows any UDF to be used as an input into an
|
|
949
|
+
LLM tool-calling API. To use one or more UDFs as tools, wrap them in a `pxt.tools` call and pass the return value
|
|
950
|
+
to an LLM API.
|
|
951
|
+
|
|
952
|
+
The UDFs can be specified directly or wrapped inside a [pxt.tool()][pixeltable.tool] invocation. If a UDF is
|
|
953
|
+
specified directly, the tool name will be the (unqualified) UDF name, and the tool description will consist of the
|
|
954
|
+
entire contents of the UDF docstring. If a UDF is wrapped in a `pxt.tool()` invocation, then the name and/or
|
|
955
|
+
description may be customized.
|
|
956
|
+
|
|
957
|
+
Args:
|
|
958
|
+
args: The UDFs to use as tools.
|
|
959
|
+
|
|
960
|
+
Returns:
|
|
961
|
+
A `Tools` instance that can be passed to an LLM tool-calling API or invoked to generate tool results.
|
|
962
|
+
|
|
963
|
+
Examples:
|
|
964
|
+
Create a tools instance with a single UDF:
|
|
965
|
+
|
|
966
|
+
>>> tools = pxt.tools(stock_price)
|
|
967
|
+
|
|
968
|
+
Create a tools instance with several UDFs:
|
|
969
|
+
|
|
970
|
+
>>> tools = pxt.tools(stock_price, weather_quote)
|
|
971
|
+
|
|
972
|
+
Create a tools instance, some of whose UDFs have customized metadata:
|
|
973
|
+
|
|
974
|
+
>>> tools = pxt.tools(
|
|
975
|
+
... stock_price,
|
|
976
|
+
... pxt.tool(weather_quote, description='Returns information about the weather in a particular location.'),
|
|
977
|
+
... pxt.tool(traffic_quote, name='traffic_conditions'),
|
|
978
|
+
... )
|
|
979
|
+
"""
|
|
980
|
+
return func.tools.Tools(tools=[arg if isinstance(arg, func.tools.Tool) else tool(arg) for arg in args])
|
|
981
|
+
|
|
982
|
+
|
|
983
|
+
def tool(fn: func.Function, name: str | None = None, description: str | None = None) -> func.tools.Tool:
|
|
984
|
+
"""
|
|
985
|
+
Specifies a Pixeltable UDF to be used as an LLM tool with customizable metadata. See the documentation for
|
|
986
|
+
[pxt.tools()][pixeltable.tools] for more details.
|
|
987
|
+
|
|
988
|
+
Args:
|
|
989
|
+
fn: The UDF to use as a tool.
|
|
990
|
+
name: The name of the tool. If not specified, then the unqualified name of the UDF will be used by default.
|
|
991
|
+
description: The description of the tool. If not specified, then the entire contents of the UDF docstring
|
|
992
|
+
will be used by default.
|
|
993
|
+
|
|
994
|
+
Returns:
|
|
995
|
+
A `Tool` instance that can be passed to an LLM tool-calling API.
|
|
996
|
+
"""
|
|
997
|
+
if isinstance(fn, func.AggregateFunction):
|
|
998
|
+
raise excs.Error('Aggregator UDFs cannot be used as tools')
|
|
999
|
+
|
|
1000
|
+
return func.tools.Tool(fn=fn, name=name, description=description)
|
|
1001
|
+
|
|
1002
|
+
|
|
530
1003
|
def configure_logging(
|
|
531
|
-
*,
|
|
532
|
-
to_stdout: Optional[bool] = None,
|
|
533
|
-
level: Optional[int] = None,
|
|
534
|
-
add: Optional[str] = None,
|
|
535
|
-
remove: Optional[str] = None,
|
|
1004
|
+
*, to_stdout: bool | None = None, level: int | None = None, add: str | None = None, remove: str | None = None
|
|
536
1005
|
) -> None:
|
|
537
1006
|
"""Configure logging.
|
|
538
1007
|
|
|
@@ -546,4 +1015,15 @@ def configure_logging(
|
|
|
546
1015
|
|
|
547
1016
|
|
|
548
1017
|
def array(elements: Iterable) -> exprs.Expr:
|
|
549
|
-
return exprs.
|
|
1018
|
+
return exprs.Expr.from_array(elements)
|
|
1019
|
+
|
|
1020
|
+
|
|
1021
|
+
class DirContents(TypedDict):
|
|
1022
|
+
"""
|
|
1023
|
+
Represents the contents of a Pixeltable directory.
|
|
1024
|
+
"""
|
|
1025
|
+
|
|
1026
|
+
dirs: list[str]
|
|
1027
|
+
"""List of directory paths contained in this directory."""
|
|
1028
|
+
tables: list[str]
|
|
1029
|
+
"""List of table paths contained in this directory."""
|