pixeltable 0.4.0rc3__py3-none-any.whl → 0.4.20__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +23 -5
- pixeltable/_version.py +1 -0
- pixeltable/catalog/__init__.py +5 -3
- pixeltable/catalog/catalog.py +1318 -404
- pixeltable/catalog/column.py +186 -115
- pixeltable/catalog/dir.py +1 -2
- pixeltable/catalog/globals.py +11 -43
- pixeltable/catalog/insertable_table.py +167 -79
- pixeltable/catalog/path.py +61 -23
- pixeltable/catalog/schema_object.py +9 -10
- pixeltable/catalog/table.py +626 -308
- pixeltable/catalog/table_metadata.py +101 -0
- pixeltable/catalog/table_version.py +713 -569
- pixeltable/catalog/table_version_handle.py +37 -6
- pixeltable/catalog/table_version_path.py +42 -29
- pixeltable/catalog/tbl_ops.py +50 -0
- pixeltable/catalog/update_status.py +191 -0
- pixeltable/catalog/view.py +108 -94
- pixeltable/config.py +128 -22
- pixeltable/dataframe.py +188 -100
- pixeltable/env.py +407 -136
- pixeltable/exceptions.py +6 -0
- pixeltable/exec/__init__.py +3 -0
- pixeltable/exec/aggregation_node.py +7 -8
- pixeltable/exec/cache_prefetch_node.py +83 -110
- pixeltable/exec/cell_materialization_node.py +231 -0
- pixeltable/exec/cell_reconstruction_node.py +135 -0
- pixeltable/exec/component_iteration_node.py +4 -3
- pixeltable/exec/data_row_batch.py +8 -65
- pixeltable/exec/exec_context.py +16 -4
- pixeltable/exec/exec_node.py +13 -36
- pixeltable/exec/expr_eval/evaluators.py +7 -6
- pixeltable/exec/expr_eval/expr_eval_node.py +27 -12
- pixeltable/exec/expr_eval/globals.py +8 -5
- pixeltable/exec/expr_eval/row_buffer.py +1 -2
- pixeltable/exec/expr_eval/schedulers.py +190 -30
- pixeltable/exec/globals.py +32 -0
- pixeltable/exec/in_memory_data_node.py +18 -18
- pixeltable/exec/object_store_save_node.py +293 -0
- pixeltable/exec/row_update_node.py +16 -9
- pixeltable/exec/sql_node.py +206 -101
- pixeltable/exprs/__init__.py +1 -1
- pixeltable/exprs/arithmetic_expr.py +27 -22
- pixeltable/exprs/array_slice.py +3 -3
- pixeltable/exprs/column_property_ref.py +34 -30
- pixeltable/exprs/column_ref.py +92 -96
- pixeltable/exprs/comparison.py +5 -5
- pixeltable/exprs/compound_predicate.py +5 -4
- pixeltable/exprs/data_row.py +152 -55
- pixeltable/exprs/expr.py +62 -43
- pixeltable/exprs/expr_dict.py +3 -3
- pixeltable/exprs/expr_set.py +17 -10
- pixeltable/exprs/function_call.py +75 -37
- pixeltable/exprs/globals.py +1 -2
- pixeltable/exprs/in_predicate.py +4 -4
- pixeltable/exprs/inline_expr.py +10 -27
- pixeltable/exprs/is_null.py +1 -3
- pixeltable/exprs/json_mapper.py +8 -8
- pixeltable/exprs/json_path.py +56 -22
- pixeltable/exprs/literal.py +5 -5
- pixeltable/exprs/method_ref.py +2 -2
- pixeltable/exprs/object_ref.py +2 -2
- pixeltable/exprs/row_builder.py +127 -53
- pixeltable/exprs/rowid_ref.py +8 -12
- pixeltable/exprs/similarity_expr.py +50 -25
- pixeltable/exprs/sql_element_cache.py +4 -4
- pixeltable/exprs/string_op.py +5 -5
- pixeltable/exprs/type_cast.py +3 -5
- pixeltable/func/__init__.py +1 -0
- pixeltable/func/aggregate_function.py +8 -8
- pixeltable/func/callable_function.py +9 -9
- pixeltable/func/expr_template_function.py +10 -10
- pixeltable/func/function.py +18 -20
- pixeltable/func/function_registry.py +6 -7
- pixeltable/func/globals.py +2 -3
- pixeltable/func/mcp.py +74 -0
- pixeltable/func/query_template_function.py +20 -18
- pixeltable/func/signature.py +43 -16
- pixeltable/func/tools.py +23 -13
- pixeltable/func/udf.py +18 -20
- pixeltable/functions/__init__.py +6 -0
- pixeltable/functions/anthropic.py +93 -33
- pixeltable/functions/audio.py +114 -10
- pixeltable/functions/bedrock.py +13 -6
- pixeltable/functions/date.py +1 -1
- pixeltable/functions/deepseek.py +20 -9
- pixeltable/functions/fireworks.py +2 -2
- pixeltable/functions/gemini.py +28 -11
- pixeltable/functions/globals.py +13 -13
- pixeltable/functions/groq.py +108 -0
- pixeltable/functions/huggingface.py +1046 -23
- pixeltable/functions/image.py +9 -18
- pixeltable/functions/llama_cpp.py +23 -8
- pixeltable/functions/math.py +3 -4
- pixeltable/functions/mistralai.py +4 -15
- pixeltable/functions/ollama.py +16 -9
- pixeltable/functions/openai.py +104 -82
- pixeltable/functions/openrouter.py +143 -0
- pixeltable/functions/replicate.py +2 -2
- pixeltable/functions/reve.py +250 -0
- pixeltable/functions/string.py +21 -28
- pixeltable/functions/timestamp.py +13 -14
- pixeltable/functions/together.py +4 -6
- pixeltable/functions/twelvelabs.py +92 -0
- pixeltable/functions/util.py +6 -1
- pixeltable/functions/video.py +1388 -106
- pixeltable/functions/vision.py +7 -7
- pixeltable/functions/whisper.py +15 -7
- pixeltable/functions/whisperx.py +179 -0
- pixeltable/{ext/functions → functions}/yolox.py +2 -4
- pixeltable/globals.py +332 -105
- pixeltable/index/base.py +13 -22
- pixeltable/index/btree.py +23 -22
- pixeltable/index/embedding_index.py +32 -44
- pixeltable/io/__init__.py +4 -2
- pixeltable/io/datarows.py +7 -6
- pixeltable/io/external_store.py +49 -77
- pixeltable/io/fiftyone.py +11 -11
- pixeltable/io/globals.py +29 -28
- pixeltable/io/hf_datasets.py +17 -9
- pixeltable/io/label_studio.py +70 -66
- pixeltable/io/lancedb.py +3 -0
- pixeltable/io/pandas.py +12 -11
- pixeltable/io/parquet.py +13 -93
- pixeltable/io/table_data_conduit.py +71 -47
- pixeltable/io/utils.py +3 -3
- pixeltable/iterators/__init__.py +2 -1
- pixeltable/iterators/audio.py +21 -11
- pixeltable/iterators/document.py +116 -55
- pixeltable/iterators/image.py +5 -2
- pixeltable/iterators/video.py +293 -13
- pixeltable/metadata/__init__.py +4 -2
- pixeltable/metadata/converters/convert_18.py +2 -2
- pixeltable/metadata/converters/convert_19.py +2 -2
- pixeltable/metadata/converters/convert_20.py +2 -2
- pixeltable/metadata/converters/convert_21.py +2 -2
- pixeltable/metadata/converters/convert_22.py +2 -2
- pixeltable/metadata/converters/convert_24.py +2 -2
- pixeltable/metadata/converters/convert_25.py +2 -2
- pixeltable/metadata/converters/convert_26.py +2 -2
- pixeltable/metadata/converters/convert_29.py +4 -4
- pixeltable/metadata/converters/convert_34.py +2 -2
- pixeltable/metadata/converters/convert_36.py +2 -2
- pixeltable/metadata/converters/convert_37.py +15 -0
- pixeltable/metadata/converters/convert_38.py +39 -0
- pixeltable/metadata/converters/convert_39.py +124 -0
- pixeltable/metadata/converters/convert_40.py +73 -0
- pixeltable/metadata/converters/util.py +13 -12
- pixeltable/metadata/notes.py +4 -0
- pixeltable/metadata/schema.py +79 -42
- pixeltable/metadata/utils.py +74 -0
- pixeltable/mypy/__init__.py +3 -0
- pixeltable/mypy/mypy_plugin.py +123 -0
- pixeltable/plan.py +274 -223
- pixeltable/share/__init__.py +1 -1
- pixeltable/share/packager.py +259 -129
- pixeltable/share/protocol/__init__.py +34 -0
- pixeltable/share/protocol/common.py +170 -0
- pixeltable/share/protocol/operation_types.py +33 -0
- pixeltable/share/protocol/replica.py +109 -0
- pixeltable/share/publish.py +213 -57
- pixeltable/store.py +238 -175
- pixeltable/type_system.py +104 -63
- pixeltable/utils/__init__.py +2 -3
- pixeltable/utils/arrow.py +108 -13
- pixeltable/utils/av.py +298 -0
- pixeltable/utils/azure_store.py +305 -0
- pixeltable/utils/code.py +3 -3
- pixeltable/utils/console_output.py +4 -1
- pixeltable/utils/coroutine.py +6 -23
- pixeltable/utils/dbms.py +31 -5
- pixeltable/utils/description_helper.py +4 -5
- pixeltable/utils/documents.py +5 -6
- pixeltable/utils/exception_handler.py +7 -30
- pixeltable/utils/filecache.py +6 -6
- pixeltable/utils/formatter.py +4 -6
- pixeltable/utils/gcs_store.py +283 -0
- pixeltable/utils/http_server.py +2 -3
- pixeltable/utils/iceberg.py +1 -2
- pixeltable/utils/image.py +17 -0
- pixeltable/utils/lancedb.py +88 -0
- pixeltable/utils/local_store.py +316 -0
- pixeltable/utils/misc.py +5 -0
- pixeltable/utils/object_stores.py +528 -0
- pixeltable/utils/pydantic.py +60 -0
- pixeltable/utils/pytorch.py +5 -6
- pixeltable/utils/s3_store.py +392 -0
- pixeltable-0.4.20.dist-info/METADATA +587 -0
- pixeltable-0.4.20.dist-info/RECORD +218 -0
- {pixeltable-0.4.0rc3.dist-info → pixeltable-0.4.20.dist-info}/WHEEL +1 -1
- pixeltable-0.4.20.dist-info/entry_points.txt +2 -0
- pixeltable/__version__.py +0 -3
- pixeltable/ext/__init__.py +0 -17
- pixeltable/ext/functions/__init__.py +0 -11
- pixeltable/ext/functions/whisperx.py +0 -77
- pixeltable/utils/media_store.py +0 -77
- pixeltable/utils/s3.py +0 -17
- pixeltable/utils/sample.py +0 -25
- pixeltable-0.4.0rc3.dist-info/METADATA +0 -435
- pixeltable-0.4.0rc3.dist-info/RECORD +0 -189
- pixeltable-0.4.0rc3.dist-info/entry_points.txt +0 -3
- {pixeltable-0.4.0rc3.dist-info → pixeltable-0.4.20.dist-info/licenses}/LICENSE +0 -0
pixeltable/globals.py
CHANGED
|
@@ -3,15 +3,18 @@ from __future__ import annotations
|
|
|
3
3
|
import logging
|
|
4
4
|
import os
|
|
5
5
|
from pathlib import Path
|
|
6
|
-
from typing import TYPE_CHECKING, Any, Iterable,
|
|
6
|
+
from typing import TYPE_CHECKING, Any, Iterable, Literal, NamedTuple, Union
|
|
7
7
|
|
|
8
8
|
import pandas as pd
|
|
9
|
+
import pydantic
|
|
9
10
|
from pandas.io.formats.style import Styler
|
|
10
11
|
|
|
11
|
-
from pixeltable import DataFrame, catalog, exceptions as excs, exprs, func, share
|
|
12
|
+
from pixeltable import DataFrame, catalog, exceptions as excs, exprs, func, share, type_system as ts
|
|
12
13
|
from pixeltable.catalog import Catalog, TableVersionPath
|
|
13
14
|
from pixeltable.catalog.insertable_table import OnErrorParameter
|
|
15
|
+
from pixeltable.config import Config
|
|
14
16
|
from pixeltable.env import Env
|
|
17
|
+
from pixeltable.io.table_data_conduit import DFTableDataConduit, TableDataConduit
|
|
15
18
|
from pixeltable.iterators import ComponentIterator
|
|
16
19
|
|
|
17
20
|
if TYPE_CHECKING:
|
|
@@ -22,46 +25,62 @@ if TYPE_CHECKING:
|
|
|
22
25
|
str,
|
|
23
26
|
os.PathLike,
|
|
24
27
|
Path, # OS paths, filenames, URLs
|
|
25
|
-
|
|
26
|
-
|
|
28
|
+
Iterable[dict[str, Any]], # dictionaries of values
|
|
29
|
+
Iterable[pydantic.BaseModel], # Pydantic model instances
|
|
27
30
|
DataFrame, # Pixeltable DataFrame
|
|
28
31
|
pd.DataFrame, # pandas DataFrame
|
|
29
|
-
|
|
30
|
-
|
|
32
|
+
datasets.Dataset,
|
|
33
|
+
datasets.DatasetDict, # Huggingface datasets
|
|
31
34
|
]
|
|
32
35
|
|
|
33
36
|
|
|
34
37
|
_logger = logging.getLogger('pixeltable')
|
|
35
38
|
|
|
36
39
|
|
|
37
|
-
def init() -> None:
|
|
40
|
+
def init(config_overrides: dict[str, Any] | None = None) -> None:
|
|
38
41
|
"""Initializes the Pixeltable environment."""
|
|
42
|
+
if config_overrides is None:
|
|
43
|
+
config_overrides = {}
|
|
44
|
+
Config.init(config_overrides)
|
|
39
45
|
_ = Catalog.get()
|
|
40
46
|
|
|
41
47
|
|
|
42
48
|
def create_table(
|
|
43
|
-
|
|
44
|
-
schema:
|
|
49
|
+
path: str,
|
|
50
|
+
schema: dict[str, Any] | None = None,
|
|
45
51
|
*,
|
|
46
|
-
source:
|
|
47
|
-
source_format:
|
|
48
|
-
schema_overrides:
|
|
52
|
+
source: TableDataSource | None = None,
|
|
53
|
+
source_format: Literal['csv', 'excel', 'parquet', 'json'] | None = None,
|
|
54
|
+
schema_overrides: dict[str, Any] | None = None,
|
|
55
|
+
create_default_idxs: bool = True,
|
|
49
56
|
on_error: Literal['abort', 'ignore'] = 'abort',
|
|
50
|
-
primary_key:
|
|
57
|
+
primary_key: str | list[str] | None = None,
|
|
51
58
|
num_retained_versions: int = 10,
|
|
52
59
|
comment: str = '',
|
|
53
60
|
media_validation: Literal['on_read', 'on_write'] = 'on_write',
|
|
54
61
|
if_exists: Literal['error', 'ignore', 'replace', 'replace_force'] = 'error',
|
|
55
|
-
extra_args:
|
|
62
|
+
extra_args: dict[str, Any] | None = None, # Additional arguments to data source provider
|
|
56
63
|
) -> catalog.Table:
|
|
57
|
-
"""Create a new base table.
|
|
64
|
+
"""Create a new base table. Exactly one of `schema` or `source` must be provided.
|
|
65
|
+
|
|
66
|
+
If a `schema` is provided, then an empty table will be created with the specified schema.
|
|
67
|
+
|
|
68
|
+
If a `source` is provided, then Pixeltable will attempt to infer a data source format and table schema from the
|
|
69
|
+
contents of the specified data, and the data will be imported from the specified source into the new table. The
|
|
70
|
+
source format and/or schema can be specified directly via the `source_format` and `schema_overrides` parameters.
|
|
58
71
|
|
|
59
72
|
Args:
|
|
60
|
-
|
|
61
|
-
schema:
|
|
62
|
-
source: A data source
|
|
63
|
-
source_format:
|
|
64
|
-
|
|
73
|
+
path: Pixeltable path (qualified name) of the table, such as `'my_table'` or `'my_dir.my_subdir.my_table'`.
|
|
74
|
+
schema: Schema for the new table, mapping column names to Pixeltable types.
|
|
75
|
+
source: A data source (file, URL, DataFrame, or list of rows) to import from.
|
|
76
|
+
source_format: Must be used in conjunction with a `source`.
|
|
77
|
+
If specified, then the given format will be used to read the source data. (Otherwise,
|
|
78
|
+
Pixeltable will attempt to infer the format from the source data.)
|
|
79
|
+
schema_overrides: Must be used in conjunction with a `source`.
|
|
80
|
+
If specified, then columns in `schema_overrides` will be given the specified types.
|
|
81
|
+
(Pixeltable will attempt to infer the types of any columns not specified.)
|
|
82
|
+
create_default_idxs: If True, creates a B-tree index on every scalar and media column that is not computed,
|
|
83
|
+
except for boolean columns.
|
|
65
84
|
on_error: Determines the behavior if an error occurs while evaluating a computed column or detecting an
|
|
66
85
|
invalid media file (such as a corrupt image) for one of the inserted rows.
|
|
67
86
|
|
|
@@ -77,14 +96,15 @@ def create_table(
|
|
|
77
96
|
|
|
78
97
|
- `'on_read'`: validate media files at query time
|
|
79
98
|
- `'on_write'`: validate media files during insert/update operations
|
|
80
|
-
if_exists:
|
|
81
|
-
Must be one of the following:
|
|
99
|
+
if_exists: Determines the behavior if a table already exists at the specified path location.
|
|
82
100
|
|
|
83
101
|
- `'error'`: raise an error
|
|
84
102
|
- `'ignore'`: do nothing and return the existing table handle
|
|
85
|
-
- `'replace'`: if the existing table has no views, drop and replace it with a new one
|
|
86
|
-
|
|
87
|
-
|
|
103
|
+
- `'replace'`: if the existing table has no views or snapshots, drop and replace it with a new one;
|
|
104
|
+
raise an error if the existing table has views or snapshots
|
|
105
|
+
- `'replace_force'`: drop the existing table and all its views and snapshots, and create a new one
|
|
106
|
+
extra_args: Must be used in conjunction with a `source`. If specified, then additional arguments will be
|
|
107
|
+
passed along to the source data provider.
|
|
88
108
|
|
|
89
109
|
Returns:
|
|
90
110
|
A handle to the newly created table, or to an already existing table at the path when `if_exists='ignore'`.
|
|
@@ -110,7 +130,7 @@ def create_table(
|
|
|
110
130
|
>>> tbl1 = pxt.get_table('orig_table')
|
|
111
131
|
... tbl2 = pxt.create_table('new_table', tbl1.where(tbl1.col1 < 10).select(tbl1.col2))
|
|
112
132
|
|
|
113
|
-
Create a table if does not already exist, otherwise get the existing table:
|
|
133
|
+
Create a table if it does not already exist, otherwise get the existing table:
|
|
114
134
|
|
|
115
135
|
>>> tbl = pxt.create_table('my_table', schema={'col1': pxt.Int, 'col2': pxt.String}, if_exists='ignore')
|
|
116
136
|
|
|
@@ -122,27 +142,39 @@ def create_table(
|
|
|
122
142
|
|
|
123
143
|
>>> tbl = pxt.create_table('my_table', source='data.csv')
|
|
124
144
|
"""
|
|
125
|
-
from pixeltable.io.table_data_conduit import
|
|
145
|
+
from pixeltable.io.table_data_conduit import UnkTableDataConduit
|
|
126
146
|
from pixeltable.io.utils import normalize_primary_key_parameter
|
|
127
147
|
|
|
128
148
|
if (schema is None) == (source is None):
|
|
129
|
-
raise excs.Error('
|
|
149
|
+
raise excs.Error('Either a `schema` or a `source` must be provided (but not both)')
|
|
130
150
|
|
|
131
151
|
if schema is not None and (len(schema) == 0 or not isinstance(schema, dict)):
|
|
132
152
|
raise excs.Error('`schema` must be a non-empty dictionary')
|
|
133
153
|
|
|
134
|
-
path_obj = catalog.Path(
|
|
154
|
+
path_obj = catalog.Path.parse(path)
|
|
135
155
|
if_exists_ = catalog.IfExistsParam.validated(if_exists, 'if_exists')
|
|
136
156
|
media_validation_ = catalog.MediaValidation.validated(media_validation, 'media_validation')
|
|
137
|
-
primary_key:
|
|
138
|
-
|
|
139
|
-
tds = None
|
|
140
|
-
data_source = None
|
|
157
|
+
primary_key: list[str] | None = normalize_primary_key_parameter(primary_key)
|
|
158
|
+
data_source: TableDataConduit | None = None
|
|
141
159
|
if source is not None:
|
|
160
|
+
if isinstance(source, str) and source.strip().startswith('pxt://'):
|
|
161
|
+
raise excs.Error(
|
|
162
|
+
'create_table(): Creating a table directly from a cloud URI is not supported.'
|
|
163
|
+
' Please replicate the table locally first using `pxt.replicate()`:\n'
|
|
164
|
+
"replica_tbl = pxt.replicate('pxt://path/to/remote_table', 'local_replica_name')\n"
|
|
165
|
+
"pxt.create_table('new_table_name', source=replica_tbl)"
|
|
166
|
+
)
|
|
142
167
|
tds = UnkTableDataConduit(source, source_format=source_format, extra_fields=extra_args)
|
|
143
168
|
tds.check_source_format()
|
|
144
169
|
data_source = tds.specialize()
|
|
145
|
-
|
|
170
|
+
src_schema_overrides: dict[str, ts.ColumnType] = {}
|
|
171
|
+
if schema_overrides is not None:
|
|
172
|
+
for col_name, py_type in schema_overrides.items():
|
|
173
|
+
col_type = ts.ColumnType.normalize_type(py_type, nullable_default=True, allow_builtin_types=False)
|
|
174
|
+
if col_type is None:
|
|
175
|
+
raise excs.Error(f'Invalid type for column {col_name!r} in `schema_overrides`: {py_type}')
|
|
176
|
+
src_schema_overrides[col_name] = col_type
|
|
177
|
+
data_source.src_schema_overrides = src_schema_overrides
|
|
146
178
|
data_source.src_pk = primary_key
|
|
147
179
|
data_source.infer_schema()
|
|
148
180
|
schema = data_source.pxt_schema
|
|
@@ -156,35 +188,43 @@ def create_table(
|
|
|
156
188
|
'Unable to create a proper schema from supplied `source`. Please use appropriate `schema_overrides`.'
|
|
157
189
|
)
|
|
158
190
|
|
|
159
|
-
|
|
191
|
+
tbl, was_created = Catalog.get().create_table(
|
|
160
192
|
path_obj,
|
|
161
193
|
schema,
|
|
162
|
-
data_source.pxt_df if isinstance(data_source, DFTableDataConduit) else None,
|
|
163
194
|
if_exists=if_exists_,
|
|
164
195
|
primary_key=primary_key,
|
|
165
196
|
comment=comment,
|
|
166
197
|
media_validation=media_validation_,
|
|
167
198
|
num_retained_versions=num_retained_versions,
|
|
199
|
+
create_default_idxs=create_default_idxs,
|
|
168
200
|
)
|
|
169
|
-
|
|
201
|
+
|
|
202
|
+
# TODO: combine data loading with table creation into a single transaction
|
|
203
|
+
if was_created:
|
|
170
204
|
fail_on_exception = OnErrorParameter.fail_on_exception(on_error)
|
|
171
|
-
|
|
205
|
+
if isinstance(data_source, DFTableDataConduit):
|
|
206
|
+
df = data_source.pxt_df
|
|
207
|
+
with Catalog.get().begin_xact(tbl=tbl._tbl_version_path, for_write=True, lock_mutable_tree=True):
|
|
208
|
+
tbl._tbl_version.get().insert(None, df, fail_on_exception=fail_on_exception)
|
|
209
|
+
elif data_source is not None and not is_direct_df:
|
|
210
|
+
tbl.insert_table_data_source(data_source=data_source, fail_on_exception=fail_on_exception)
|
|
172
211
|
|
|
173
|
-
return
|
|
212
|
+
return tbl
|
|
174
213
|
|
|
175
214
|
|
|
176
215
|
def create_view(
|
|
177
216
|
path: str,
|
|
178
|
-
base:
|
|
217
|
+
base: catalog.Table | DataFrame,
|
|
179
218
|
*,
|
|
180
|
-
additional_columns:
|
|
219
|
+
additional_columns: dict[str, Any] | None = None,
|
|
181
220
|
is_snapshot: bool = False,
|
|
182
|
-
|
|
221
|
+
create_default_idxs: bool = False,
|
|
222
|
+
iterator: tuple[type[ComponentIterator], dict[str, Any]] | None = None,
|
|
183
223
|
num_retained_versions: int = 10,
|
|
184
224
|
comment: str = '',
|
|
185
225
|
media_validation: Literal['on_read', 'on_write'] = 'on_write',
|
|
186
226
|
if_exists: Literal['error', 'ignore', 'replace', 'replace_force'] = 'error',
|
|
187
|
-
) ->
|
|
227
|
+
) -> catalog.Table | None:
|
|
188
228
|
"""Create a view of an existing table object (which itself can be a view or a snapshot or a base table).
|
|
189
229
|
|
|
190
230
|
Args:
|
|
@@ -197,6 +237,8 @@ def create_view(
|
|
|
197
237
|
[`create_table`][pixeltable.create_table].
|
|
198
238
|
is_snapshot: Whether the view is a snapshot. Setting this to `True` is equivalent to calling
|
|
199
239
|
[`create_snapshot`][pixeltable.create_snapshot].
|
|
240
|
+
create_default_idxs: Whether to create default indexes on the view's columns (the base's columns are excluded).
|
|
241
|
+
Cannot be `True` for snapshots.
|
|
200
242
|
iterator: The iterator to use for this view. If specified, then this view will be a one-to-many view of
|
|
201
243
|
the base table.
|
|
202
244
|
num_retained_versions: Number of versions of the view to retain.
|
|
@@ -244,16 +286,16 @@ def create_view(
|
|
|
244
286
|
>>> tbl = pxt.get_table('my_table')
|
|
245
287
|
... view = pxt.create_view('my_view', tbl.where(tbl.col1 > 100), if_exists='replace_force')
|
|
246
288
|
"""
|
|
289
|
+
if is_snapshot and create_default_idxs is True:
|
|
290
|
+
raise excs.Error('Cannot create default indexes on a snapshot')
|
|
247
291
|
tbl_version_path: TableVersionPath
|
|
248
|
-
select_list:
|
|
249
|
-
where:
|
|
292
|
+
select_list: list[tuple[exprs.Expr, str | None]] | None = None
|
|
293
|
+
where: exprs.Expr | None = None
|
|
250
294
|
if isinstance(base, catalog.Table):
|
|
251
295
|
tbl_version_path = base._tbl_version_path
|
|
252
296
|
sample_clause = None
|
|
253
297
|
elif isinstance(base, DataFrame):
|
|
254
|
-
base.
|
|
255
|
-
if len(base._from_clause.tbls) > 1:
|
|
256
|
-
raise excs.Error('Cannot create a view of a join')
|
|
298
|
+
base._validate_mutable_op_sequence('create_view', allow_select=True)
|
|
257
299
|
tbl_version_path = base._from_clause.tbls[0]
|
|
258
300
|
where = base.where_clause
|
|
259
301
|
sample_clause = base.sample_clause
|
|
@@ -264,7 +306,7 @@ def create_view(
|
|
|
264
306
|
raise excs.Error('`base` must be an instance of `Table` or `DataFrame`')
|
|
265
307
|
assert isinstance(base, (catalog.Table, DataFrame))
|
|
266
308
|
|
|
267
|
-
path_obj = catalog.Path(path)
|
|
309
|
+
path_obj = catalog.Path.parse(path)
|
|
268
310
|
if_exists_ = catalog.IfExistsParam.validated(if_exists, 'if_exists')
|
|
269
311
|
media_validation_ = catalog.MediaValidation.validated(media_validation, 'media_validation')
|
|
270
312
|
|
|
@@ -276,7 +318,7 @@ def create_view(
|
|
|
276
318
|
if col_name in [c.name for c in tbl_version_path.columns()]:
|
|
277
319
|
raise excs.Error(
|
|
278
320
|
f'Column {col_name!r} already exists in the base table '
|
|
279
|
-
f'{tbl_version_path.get_column(col_name).
|
|
321
|
+
f'{tbl_version_path.get_column(col_name).get_tbl().name}.'
|
|
280
322
|
)
|
|
281
323
|
|
|
282
324
|
return Catalog.get().create_view(
|
|
@@ -287,6 +329,7 @@ def create_view(
|
|
|
287
329
|
sample_clause=sample_clause,
|
|
288
330
|
additional_columns=additional_columns,
|
|
289
331
|
is_snapshot=is_snapshot,
|
|
332
|
+
create_default_idxs=create_default_idxs,
|
|
290
333
|
iterator=iterator,
|
|
291
334
|
num_retained_versions=num_retained_versions,
|
|
292
335
|
comment=comment,
|
|
@@ -297,15 +340,15 @@ def create_view(
|
|
|
297
340
|
|
|
298
341
|
def create_snapshot(
|
|
299
342
|
path_str: str,
|
|
300
|
-
base:
|
|
343
|
+
base: catalog.Table | DataFrame,
|
|
301
344
|
*,
|
|
302
|
-
additional_columns:
|
|
303
|
-
iterator:
|
|
345
|
+
additional_columns: dict[str, Any] | None = None,
|
|
346
|
+
iterator: tuple[type[ComponentIterator], dict[str, Any]] | None = None,
|
|
304
347
|
num_retained_versions: int = 10,
|
|
305
348
|
comment: str = '',
|
|
306
349
|
media_validation: Literal['on_read', 'on_write'] = 'on_write',
|
|
307
350
|
if_exists: Literal['error', 'ignore', 'replace', 'replace_force'] = 'error',
|
|
308
|
-
) ->
|
|
351
|
+
) -> catalog.Table | None:
|
|
309
352
|
"""Create a snapshot of an existing table object (which itself can be a view or a snapshot or a base table).
|
|
310
353
|
|
|
311
354
|
Args:
|
|
@@ -376,36 +419,67 @@ def create_snapshot(
|
|
|
376
419
|
)
|
|
377
420
|
|
|
378
421
|
|
|
379
|
-
def
|
|
422
|
+
def publish(
|
|
423
|
+
source: str | catalog.Table,
|
|
424
|
+
destination_uri: str,
|
|
425
|
+
bucket_name: str | None = None,
|
|
426
|
+
access: Literal['public', 'private'] = 'private',
|
|
427
|
+
) -> None:
|
|
380
428
|
"""
|
|
381
|
-
|
|
382
|
-
|
|
429
|
+
Publishes a replica of a local Pixeltable table to Pixeltable cloud. A given table can be published to at most one
|
|
430
|
+
URI per Pixeltable cloud database.
|
|
383
431
|
|
|
384
432
|
Args:
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
433
|
+
source: Path or table handle of the local table to be published.
|
|
434
|
+
destination_uri: Remote URI where the replica will be published, such as `'pxt://org_name/my_dir/my_table'`.
|
|
435
|
+
bucket_name: The name of the bucket to use to store replica's data. The bucket must be registered with
|
|
436
|
+
Pixeltable cloud. If no `bucket_name` is provided, the default storage bucket for the destination
|
|
437
|
+
database will be used.
|
|
438
|
+
access: Access control for the replica.
|
|
439
|
+
|
|
440
|
+
- `'public'`: Anyone can access this replica.
|
|
441
|
+
- `'private'`: Only the host organization can access.
|
|
388
442
|
"""
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
443
|
+
if not destination_uri.startswith('pxt://'):
|
|
444
|
+
raise excs.Error("`destination_uri` must be a remote Pixeltable URI with the prefix 'pxt://'")
|
|
445
|
+
|
|
446
|
+
if isinstance(source, str):
|
|
447
|
+
source = get_table(source)
|
|
448
|
+
|
|
449
|
+
share.push_replica(destination_uri, source, bucket_name, access)
|
|
450
|
+
|
|
451
|
+
|
|
452
|
+
def replicate(remote_uri: str, local_path: str) -> catalog.Table:
|
|
453
|
+
"""
|
|
454
|
+
Retrieve a replica from Pixeltable cloud as a local table. This will create a full local copy of the replica in a
|
|
455
|
+
way that preserves the table structure of the original source data. Once replicated, the local table can be
|
|
456
|
+
queried offline just as any other Pixeltable table.
|
|
457
|
+
|
|
458
|
+
Args:
|
|
459
|
+
remote_uri: Remote URI of the table to be replicated, such as `'pxt://org_name/my_dir/my_table'` or
|
|
460
|
+
`'pxt://org_name/my_dir/my_table:5'` (with version 5).
|
|
461
|
+
local_path: Local table path where the replica will be created, such as `'my_new_dir.my_new_tbl'`. It can be
|
|
462
|
+
the same or different from the cloud table name.
|
|
463
|
+
|
|
464
|
+
Returns:
|
|
465
|
+
A handle to the newly created local replica table.
|
|
466
|
+
"""
|
|
467
|
+
if not remote_uri.startswith('pxt://'):
|
|
468
|
+
raise excs.Error("`remote_uri` must be a remote Pixeltable URI with the prefix 'pxt://'")
|
|
469
|
+
|
|
470
|
+
return share.pull_replica(local_path, remote_uri)
|
|
402
471
|
|
|
403
472
|
|
|
404
|
-
def get_table(path: str) -> catalog.Table:
|
|
473
|
+
def get_table(path: str, if_not_exists: Literal['error', 'ignore'] = 'error') -> catalog.Table | None:
|
|
405
474
|
"""Get a handle to an existing table, view, or snapshot.
|
|
406
475
|
|
|
407
476
|
Args:
|
|
408
477
|
path: Path to the table.
|
|
478
|
+
if_not_exists: Directive regarding how to handle if the path does not exist.
|
|
479
|
+
Must be one of the following:
|
|
480
|
+
|
|
481
|
+
- `'error'`: raise an error
|
|
482
|
+
- `'ignore'`: do nothing and return `None`
|
|
409
483
|
|
|
410
484
|
Returns:
|
|
411
485
|
A handle to the [`Table`][pixeltable.Table].
|
|
@@ -425,20 +499,39 @@ def get_table(path: str) -> catalog.Table:
|
|
|
425
499
|
Handles to views and snapshots are retrieved in the same way:
|
|
426
500
|
|
|
427
501
|
>>> tbl = pxt.get_table('my_snapshot')
|
|
502
|
+
|
|
503
|
+
Get a handle to a specific version of a table:
|
|
504
|
+
|
|
505
|
+
>>> tbl = pxt.get_table('my_table:722')
|
|
428
506
|
"""
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
_logger.debug(f'get_table(): tbl={tv.id}:{tv.effective_version} sa_tbl={id(tv.store_tbl.sa_tbl):x} tv={id(tv):x}')
|
|
507
|
+
if_not_exists_ = catalog.IfNotExistsParam.validated(if_not_exists, 'if_not_exists')
|
|
508
|
+
path_obj = catalog.Path.parse(path, allow_versioned_path=True)
|
|
509
|
+
tbl = Catalog.get().get_table(path_obj, if_not_exists_)
|
|
433
510
|
return tbl
|
|
434
511
|
|
|
435
512
|
|
|
436
|
-
def move(
|
|
513
|
+
def move(
|
|
514
|
+
path: str,
|
|
515
|
+
new_path: str,
|
|
516
|
+
*,
|
|
517
|
+
if_exists: Literal['error', 'ignore'] = 'error',
|
|
518
|
+
if_not_exists: Literal['error', 'ignore'] = 'error',
|
|
519
|
+
) -> None:
|
|
437
520
|
"""Move a schema object to a new directory and/or rename a schema object.
|
|
438
521
|
|
|
439
522
|
Args:
|
|
440
523
|
path: absolute path to the existing schema object.
|
|
441
524
|
new_path: absolute new path for the schema object.
|
|
525
|
+
if_exists: Directive regarding how to handle if a schema object already exists at the new path.
|
|
526
|
+
Must be one of the following:
|
|
527
|
+
|
|
528
|
+
- `'error'`: raise an error
|
|
529
|
+
- `'ignore'`: do nothing and return
|
|
530
|
+
if_not_exists: Directive regarding how to handle if the source path does not exist.
|
|
531
|
+
Must be one of the following:
|
|
532
|
+
|
|
533
|
+
- `'error'`: raise an error
|
|
534
|
+
- `'ignore'`: do nothing and return
|
|
442
535
|
|
|
443
536
|
Raises:
|
|
444
537
|
Error: If path does not exist or new_path already exists.
|
|
@@ -452,22 +545,26 @@ def move(path: str, new_path: str) -> None:
|
|
|
452
545
|
|
|
453
546
|
>>>> pxt.move('dir1.my_table', 'dir1.new_name')
|
|
454
547
|
"""
|
|
548
|
+
if_exists_ = catalog.IfExistsParam.validated(if_exists, 'if_exists')
|
|
549
|
+
if if_exists_ not in (catalog.IfExistsParam.ERROR, catalog.IfExistsParam.IGNORE):
|
|
550
|
+
raise excs.Error("`if_exists` must be one of 'error' or 'ignore'")
|
|
551
|
+
if_not_exists_ = catalog.IfNotExistsParam.validated(if_not_exists, 'if_not_exists')
|
|
455
552
|
if path == new_path:
|
|
456
553
|
raise excs.Error('move(): source and destination cannot be identical')
|
|
457
|
-
path_obj, new_path_obj = catalog.Path(path), catalog.Path(new_path)
|
|
554
|
+
path_obj, new_path_obj = catalog.Path.parse(path), catalog.Path.parse(new_path)
|
|
458
555
|
if path_obj.is_ancestor(new_path_obj):
|
|
459
556
|
raise excs.Error(f'move(): cannot move {path!r} into its own subdirectory')
|
|
460
|
-
|
|
461
|
-
cat.move(path_obj, new_path_obj)
|
|
557
|
+
Catalog.get().move(path_obj, new_path_obj, if_exists_, if_not_exists_)
|
|
462
558
|
|
|
463
559
|
|
|
464
560
|
def drop_table(
|
|
465
|
-
table:
|
|
561
|
+
table: str | catalog.Table, force: bool = False, if_not_exists: Literal['error', 'ignore'] = 'error'
|
|
466
562
|
) -> None:
|
|
467
|
-
"""Drop a table, view, or
|
|
563
|
+
"""Drop a table, view, snapshot, or replica.
|
|
468
564
|
|
|
469
565
|
Args:
|
|
470
|
-
table: Fully qualified name
|
|
566
|
+
table: Fully qualified name or table handle of the table to be dropped; or a remote URI of a cloud replica to
|
|
567
|
+
be deleted.
|
|
471
568
|
force: If `True`, will also drop all views and sub-views of this table.
|
|
472
569
|
if_not_exists: Directive regarding how to handle if the path does not exist.
|
|
473
570
|
Must be one of the following:
|
|
@@ -507,9 +604,69 @@ def drop_table(
|
|
|
507
604
|
assert isinstance(table, str)
|
|
508
605
|
tbl_path = table
|
|
509
606
|
|
|
510
|
-
path_obj = catalog.Path(tbl_path)
|
|
511
607
|
if_not_exists_ = catalog.IfNotExistsParam.validated(if_not_exists, 'if_not_exists')
|
|
512
|
-
|
|
608
|
+
|
|
609
|
+
if tbl_path.startswith('pxt://'):
|
|
610
|
+
# Remote table
|
|
611
|
+
if force:
|
|
612
|
+
raise excs.Error('Cannot use `force=True` with a cloud replica URI.')
|
|
613
|
+
# TODO: Handle if_not_exists properly
|
|
614
|
+
share.delete_replica(tbl_path)
|
|
615
|
+
else:
|
|
616
|
+
# Local table
|
|
617
|
+
path_obj = catalog.Path.parse(tbl_path)
|
|
618
|
+
Catalog.get().drop_table(path_obj, force=force, if_not_exists=if_not_exists_)
|
|
619
|
+
|
|
620
|
+
|
|
621
|
+
def get_dir_contents(dir_path: str = '', recursive: bool = True) -> 'DirContents':
|
|
622
|
+
"""Get the contents of a Pixeltable directory.
|
|
623
|
+
|
|
624
|
+
Args:
|
|
625
|
+
dir_path: Path to the directory. Defaults to the root directory.
|
|
626
|
+
recursive: If `False`, returns only those tables and directories that are directly contained in specified
|
|
627
|
+
directory; if `True`, returns all tables and directories that are descendants of the specified directory,
|
|
628
|
+
recursively.
|
|
629
|
+
|
|
630
|
+
Returns:
|
|
631
|
+
A [`DirContents`][pixeltable.DirContents] object representing the contents of the specified directory.
|
|
632
|
+
|
|
633
|
+
Raises:
|
|
634
|
+
Error: If the path does not exist or does not designate a directory.
|
|
635
|
+
|
|
636
|
+
Examples:
|
|
637
|
+
Get contents of top-level directory:
|
|
638
|
+
|
|
639
|
+
>>> pxt.get_dir_contents()
|
|
640
|
+
|
|
641
|
+
Get contents of 'dir1':
|
|
642
|
+
|
|
643
|
+
>>> pxt.get_dir_contents('dir1')
|
|
644
|
+
"""
|
|
645
|
+
path_obj = catalog.Path.parse(dir_path, allow_empty_path=True)
|
|
646
|
+
catalog_entries = Catalog.get().get_dir_contents(path_obj, recursive=recursive)
|
|
647
|
+
dirs: list[str] = []
|
|
648
|
+
tables: list[str] = []
|
|
649
|
+
_assemble_dir_contents(dir_path, catalog_entries, dirs, tables)
|
|
650
|
+
dirs.sort()
|
|
651
|
+
tables.sort()
|
|
652
|
+
return DirContents(dirs, tables)
|
|
653
|
+
|
|
654
|
+
|
|
655
|
+
def _assemble_dir_contents(
|
|
656
|
+
dir_path: str, catalog_entries: dict[str, Catalog.DirEntry], dirs: list[str], tables: list[str]
|
|
657
|
+
) -> None:
|
|
658
|
+
for name, entry in catalog_entries.items():
|
|
659
|
+
if name.startswith('_'):
|
|
660
|
+
continue # Skip system paths
|
|
661
|
+
path = f'{dir_path}.{name}' if len(dir_path) > 0 else name
|
|
662
|
+
if entry.dir is not None:
|
|
663
|
+
dirs.append(path)
|
|
664
|
+
if entry.dir_entries is not None:
|
|
665
|
+
_assemble_dir_contents(path, entry.dir_entries, dirs, tables)
|
|
666
|
+
else:
|
|
667
|
+
assert entry.table is not None
|
|
668
|
+
assert not entry.dir_entries
|
|
669
|
+
tables.append(path)
|
|
513
670
|
|
|
514
671
|
|
|
515
672
|
def list_tables(dir_path: str = '', recursive: bool = True) -> list[str]:
|
|
@@ -535,15 +692,18 @@ def list_tables(dir_path: str = '', recursive: bool = True) -> list[str]:
|
|
|
535
692
|
|
|
536
693
|
>>> pxt.list_tables('dir1')
|
|
537
694
|
"""
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
695
|
+
return _list_tables(dir_path, recursive=recursive, allow_system_paths=False)
|
|
696
|
+
|
|
697
|
+
|
|
698
|
+
def _list_tables(dir_path: str = '', recursive: bool = True, allow_system_paths: bool = False) -> list[str]:
|
|
699
|
+
path_obj = catalog.Path.parse(dir_path, allow_empty_path=True, allow_system_path=allow_system_paths)
|
|
700
|
+
contents = Catalog.get().get_dir_contents(path_obj, recursive=recursive)
|
|
541
701
|
return [str(p) for p in _extract_paths(contents, parent=path_obj, entry_type=catalog.Table)]
|
|
542
702
|
|
|
543
703
|
|
|
544
704
|
def create_dir(
|
|
545
|
-
path: str, if_exists: Literal['error', 'ignore', 'replace', 'replace_force'] = 'error', parents: bool = False
|
|
546
|
-
) ->
|
|
705
|
+
path: str, *, if_exists: Literal['error', 'ignore', 'replace', 'replace_force'] = 'error', parents: bool = False
|
|
706
|
+
) -> catalog.Dir | None:
|
|
547
707
|
"""Create a directory.
|
|
548
708
|
|
|
549
709
|
Args:
|
|
@@ -588,7 +748,7 @@ def create_dir(
|
|
|
588
748
|
|
|
589
749
|
>>> pxt.create_dir('parent1.parent2.sub_dir', parents=True)
|
|
590
750
|
"""
|
|
591
|
-
path_obj = catalog.Path(path)
|
|
751
|
+
path_obj = catalog.Path.parse(path)
|
|
592
752
|
if_exists_ = catalog.IfExistsParam.validated(if_exists, 'if_exists')
|
|
593
753
|
return Catalog.get().create_dir(path_obj, if_exists=if_exists_, parents=parents)
|
|
594
754
|
|
|
@@ -630,15 +790,75 @@ def drop_dir(path: str, force: bool = False, if_not_exists: Literal['error', 'ig
|
|
|
630
790
|
|
|
631
791
|
>>> pxt.drop_dir('my_dir', force=True)
|
|
632
792
|
"""
|
|
633
|
-
path_obj = catalog.Path(path) # validate format
|
|
793
|
+
path_obj = catalog.Path.parse(path) # validate format
|
|
634
794
|
if_not_exists_ = catalog.IfNotExistsParam.validated(if_not_exists, 'if_not_exists')
|
|
635
795
|
Catalog.get().drop_dir(path_obj, if_not_exists=if_not_exists_, force=force)
|
|
636
796
|
|
|
637
797
|
|
|
798
|
+
def ls(path: str = '') -> pd.DataFrame:
|
|
799
|
+
"""
|
|
800
|
+
List the contents of a Pixeltable directory.
|
|
801
|
+
|
|
802
|
+
This function returns a Pandas DataFrame representing a human-readable listing of the specified directory,
|
|
803
|
+
including various attributes such as version and base table, as appropriate.
|
|
804
|
+
|
|
805
|
+
To get a programmatic list of the directory's contents, use [get_dir_contents()][pixeltable.get_dir_contents]
|
|
806
|
+
instead.
|
|
807
|
+
"""
|
|
808
|
+
from pixeltable.catalog import retry_loop
|
|
809
|
+
from pixeltable.metadata import schema
|
|
810
|
+
|
|
811
|
+
cat = Catalog.get()
|
|
812
|
+
path_obj = catalog.Path.parse(path, allow_empty_path=True)
|
|
813
|
+
dir_entries = cat.get_dir_contents(path_obj)
|
|
814
|
+
|
|
815
|
+
@retry_loop(for_write=False)
|
|
816
|
+
def op() -> list[list[str]]:
|
|
817
|
+
rows: list[list[str]] = []
|
|
818
|
+
for name, entry in dir_entries.items():
|
|
819
|
+
if name.startswith('_'):
|
|
820
|
+
continue
|
|
821
|
+
if entry.dir is not None:
|
|
822
|
+
kind = 'dir'
|
|
823
|
+
version = ''
|
|
824
|
+
base = ''
|
|
825
|
+
else:
|
|
826
|
+
assert entry.table is not None
|
|
827
|
+
assert isinstance(entry.table, schema.Table)
|
|
828
|
+
tbl = cat.get_table_by_id(entry.table.id)
|
|
829
|
+
md = tbl.get_metadata()
|
|
830
|
+
base = md['base'] or ''
|
|
831
|
+
if base.startswith('_'):
|
|
832
|
+
base = '<anonymous base table>'
|
|
833
|
+
if md['is_replica']:
|
|
834
|
+
kind = 'replica'
|
|
835
|
+
elif md['is_snapshot']:
|
|
836
|
+
kind = 'snapshot'
|
|
837
|
+
elif md['is_view']:
|
|
838
|
+
kind = 'view'
|
|
839
|
+
else:
|
|
840
|
+
kind = 'table'
|
|
841
|
+
version = '' if kind == 'snapshot' else str(md['version'])
|
|
842
|
+
rows.append([name, kind, version, base])
|
|
843
|
+
return rows
|
|
844
|
+
|
|
845
|
+
rows = op()
|
|
846
|
+
|
|
847
|
+
rows = sorted(rows, key=lambda x: x[0])
|
|
848
|
+
df = pd.DataFrame(
|
|
849
|
+
{
|
|
850
|
+
'Name': [row[0] for row in rows],
|
|
851
|
+
'Kind': [row[1] for row in rows],
|
|
852
|
+
'Version': [row[2] for row in rows],
|
|
853
|
+
'Base': [row[3] for row in rows],
|
|
854
|
+
},
|
|
855
|
+
index=([''] * len(rows)),
|
|
856
|
+
)
|
|
857
|
+
return df
|
|
858
|
+
|
|
859
|
+
|
|
638
860
|
def _extract_paths(
|
|
639
|
-
dir_entries: dict[str, Catalog.DirEntry],
|
|
640
|
-
parent: catalog.Path,
|
|
641
|
-
entry_type: Optional[type[catalog.SchemaObject]] = None,
|
|
861
|
+
dir_entries: dict[str, Catalog.DirEntry], parent: catalog.Path, entry_type: type[catalog.SchemaObject] | None = None
|
|
642
862
|
) -> list[catalog.Path]:
|
|
643
863
|
"""Convert nested dir_entries structure to a flattened list of paths."""
|
|
644
864
|
matches: list[str]
|
|
@@ -676,7 +896,7 @@ def list_dirs(path: str = '', recursive: bool = True) -> list[str]:
|
|
|
676
896
|
>>> cl.list_dirs('my_dir', recursive=True)
|
|
677
897
|
['my_dir', 'my_dir.sub_dir1']
|
|
678
898
|
"""
|
|
679
|
-
path_obj = catalog.Path(path,
|
|
899
|
+
path_obj = catalog.Path.parse(path, allow_empty_path=True) # validate format
|
|
680
900
|
cat = Catalog.get()
|
|
681
901
|
contents = cat.get_dir_contents(path_obj, recursive=recursive)
|
|
682
902
|
return [str(p) for p in _extract_paths(contents, parent=path_obj, entry_type=catalog.Dir)]
|
|
@@ -711,7 +931,7 @@ def list_functions() -> Styler:
|
|
|
711
931
|
return pd_df.hide(axis='index')
|
|
712
932
|
|
|
713
933
|
|
|
714
|
-
def tools(*args:
|
|
934
|
+
def tools(*args: func.Function | func.tools.Tool) -> func.tools.Tools:
|
|
715
935
|
"""
|
|
716
936
|
Specifies a collection of UDFs to be used as LLM tools. Pixeltable allows any UDF to be used as an input into an
|
|
717
937
|
LLM tool-calling API. To use one or more UDFs as tools, wrap them in a `pxt.tools` call and pass the return value
|
|
@@ -748,7 +968,7 @@ def tools(*args: Union[func.Function, func.tools.Tool]) -> func.tools.Tools:
|
|
|
748
968
|
return func.tools.Tools(tools=[arg if isinstance(arg, func.tools.Tool) else tool(arg) for arg in args])
|
|
749
969
|
|
|
750
970
|
|
|
751
|
-
def tool(fn: func.Function, name:
|
|
971
|
+
def tool(fn: func.Function, name: str | None = None, description: str | None = None) -> func.tools.Tool:
|
|
752
972
|
"""
|
|
753
973
|
Specifies a Pixeltable UDF to be used as an LLM tool with customizable metadata. See the documentation for
|
|
754
974
|
[pxt.tools()][pixeltable.tools] for more details.
|
|
@@ -769,11 +989,7 @@ def tool(fn: func.Function, name: Optional[str] = None, description: Optional[st
|
|
|
769
989
|
|
|
770
990
|
|
|
771
991
|
def configure_logging(
|
|
772
|
-
*,
|
|
773
|
-
to_stdout: Optional[bool] = None,
|
|
774
|
-
level: Optional[int] = None,
|
|
775
|
-
add: Optional[str] = None,
|
|
776
|
-
remove: Optional[str] = None,
|
|
992
|
+
*, to_stdout: bool | None = None, level: int | None = None, add: str | None = None, remove: str | None = None
|
|
777
993
|
) -> None:
|
|
778
994
|
"""Configure logging.
|
|
779
995
|
|
|
@@ -788,3 +1004,14 @@ def configure_logging(
|
|
|
788
1004
|
|
|
789
1005
|
def array(elements: Iterable) -> exprs.Expr:
|
|
790
1006
|
return exprs.Expr.from_array(elements)
|
|
1007
|
+
|
|
1008
|
+
|
|
1009
|
+
class DirContents(NamedTuple):
|
|
1010
|
+
"""
|
|
1011
|
+
Represents the contents of a Pixeltable directory.
|
|
1012
|
+
"""
|
|
1013
|
+
|
|
1014
|
+
dirs: list[str]
|
|
1015
|
+
"""List of directory paths contained in this directory."""
|
|
1016
|
+
tables: list[str]
|
|
1017
|
+
"""List of table paths contained in this directory."""
|