pixeltable 0.3.14__py3-none-any.whl → 0.5.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pixeltable/__init__.py +42 -8
- pixeltable/{dataframe.py → _query.py} +470 -206
- pixeltable/_version.py +1 -0
- pixeltable/catalog/__init__.py +5 -4
- pixeltable/catalog/catalog.py +1785 -432
- pixeltable/catalog/column.py +190 -113
- pixeltable/catalog/dir.py +2 -4
- pixeltable/catalog/globals.py +19 -46
- pixeltable/catalog/insertable_table.py +191 -98
- pixeltable/catalog/path.py +63 -23
- pixeltable/catalog/schema_object.py +11 -15
- pixeltable/catalog/table.py +843 -436
- pixeltable/catalog/table_metadata.py +103 -0
- pixeltable/catalog/table_version.py +978 -657
- pixeltable/catalog/table_version_handle.py +72 -16
- pixeltable/catalog/table_version_path.py +112 -43
- pixeltable/catalog/tbl_ops.py +53 -0
- pixeltable/catalog/update_status.py +191 -0
- pixeltable/catalog/view.py +134 -90
- pixeltable/config.py +134 -22
- pixeltable/env.py +471 -157
- pixeltable/exceptions.py +6 -0
- pixeltable/exec/__init__.py +4 -1
- pixeltable/exec/aggregation_node.py +7 -8
- pixeltable/exec/cache_prefetch_node.py +83 -110
- pixeltable/exec/cell_materialization_node.py +268 -0
- pixeltable/exec/cell_reconstruction_node.py +168 -0
- pixeltable/exec/component_iteration_node.py +4 -3
- pixeltable/exec/data_row_batch.py +8 -65
- pixeltable/exec/exec_context.py +16 -4
- pixeltable/exec/exec_node.py +13 -36
- pixeltable/exec/expr_eval/evaluators.py +11 -7
- pixeltable/exec/expr_eval/expr_eval_node.py +27 -12
- pixeltable/exec/expr_eval/globals.py +8 -5
- pixeltable/exec/expr_eval/row_buffer.py +1 -2
- pixeltable/exec/expr_eval/schedulers.py +106 -56
- pixeltable/exec/globals.py +35 -0
- pixeltable/exec/in_memory_data_node.py +19 -19
- pixeltable/exec/object_store_save_node.py +293 -0
- pixeltable/exec/row_update_node.py +16 -9
- pixeltable/exec/sql_node.py +351 -84
- pixeltable/exprs/__init__.py +1 -1
- pixeltable/exprs/arithmetic_expr.py +27 -22
- pixeltable/exprs/array_slice.py +3 -3
- pixeltable/exprs/column_property_ref.py +36 -23
- pixeltable/exprs/column_ref.py +213 -89
- pixeltable/exprs/comparison.py +5 -5
- pixeltable/exprs/compound_predicate.py +5 -4
- pixeltable/exprs/data_row.py +164 -54
- pixeltable/exprs/expr.py +70 -44
- pixeltable/exprs/expr_dict.py +3 -3
- pixeltable/exprs/expr_set.py +17 -10
- pixeltable/exprs/function_call.py +100 -40
- pixeltable/exprs/globals.py +2 -2
- pixeltable/exprs/in_predicate.py +4 -4
- pixeltable/exprs/inline_expr.py +18 -32
- pixeltable/exprs/is_null.py +7 -3
- pixeltable/exprs/json_mapper.py +8 -8
- pixeltable/exprs/json_path.py +56 -22
- pixeltable/exprs/literal.py +27 -5
- pixeltable/exprs/method_ref.py +2 -2
- pixeltable/exprs/object_ref.py +2 -2
- pixeltable/exprs/row_builder.py +167 -67
- pixeltable/exprs/rowid_ref.py +25 -10
- pixeltable/exprs/similarity_expr.py +58 -40
- pixeltable/exprs/sql_element_cache.py +4 -4
- pixeltable/exprs/string_op.py +5 -5
- pixeltable/exprs/type_cast.py +3 -5
- pixeltable/func/__init__.py +1 -0
- pixeltable/func/aggregate_function.py +8 -8
- pixeltable/func/callable_function.py +9 -9
- pixeltable/func/expr_template_function.py +17 -11
- pixeltable/func/function.py +18 -20
- pixeltable/func/function_registry.py +6 -7
- pixeltable/func/globals.py +2 -3
- pixeltable/func/mcp.py +74 -0
- pixeltable/func/query_template_function.py +29 -27
- pixeltable/func/signature.py +46 -19
- pixeltable/func/tools.py +31 -13
- pixeltable/func/udf.py +18 -20
- pixeltable/functions/__init__.py +16 -0
- pixeltable/functions/anthropic.py +123 -77
- pixeltable/functions/audio.py +147 -10
- pixeltable/functions/bedrock.py +13 -6
- pixeltable/functions/date.py +7 -4
- pixeltable/functions/deepseek.py +35 -43
- pixeltable/functions/document.py +81 -0
- pixeltable/functions/fal.py +76 -0
- pixeltable/functions/fireworks.py +11 -20
- pixeltable/functions/gemini.py +195 -39
- pixeltable/functions/globals.py +142 -14
- pixeltable/functions/groq.py +108 -0
- pixeltable/functions/huggingface.py +1056 -24
- pixeltable/functions/image.py +115 -57
- pixeltable/functions/json.py +1 -1
- pixeltable/functions/llama_cpp.py +28 -13
- pixeltable/functions/math.py +67 -5
- pixeltable/functions/mistralai.py +18 -55
- pixeltable/functions/net.py +70 -0
- pixeltable/functions/ollama.py +20 -13
- pixeltable/functions/openai.py +240 -226
- pixeltable/functions/openrouter.py +143 -0
- pixeltable/functions/replicate.py +4 -4
- pixeltable/functions/reve.py +250 -0
- pixeltable/functions/string.py +239 -69
- pixeltable/functions/timestamp.py +16 -16
- pixeltable/functions/together.py +24 -84
- pixeltable/functions/twelvelabs.py +188 -0
- pixeltable/functions/util.py +6 -1
- pixeltable/functions/uuid.py +30 -0
- pixeltable/functions/video.py +1515 -107
- pixeltable/functions/vision.py +8 -8
- pixeltable/functions/voyageai.py +289 -0
- pixeltable/functions/whisper.py +16 -8
- pixeltable/functions/whisperx.py +179 -0
- pixeltable/{ext/functions → functions}/yolox.py +2 -4
- pixeltable/globals.py +362 -115
- pixeltable/index/base.py +17 -21
- pixeltable/index/btree.py +28 -22
- pixeltable/index/embedding_index.py +100 -118
- pixeltable/io/__init__.py +4 -2
- pixeltable/io/datarows.py +8 -7
- pixeltable/io/external_store.py +56 -105
- pixeltable/io/fiftyone.py +13 -13
- pixeltable/io/globals.py +31 -30
- pixeltable/io/hf_datasets.py +61 -16
- pixeltable/io/label_studio.py +74 -70
- pixeltable/io/lancedb.py +3 -0
- pixeltable/io/pandas.py +21 -12
- pixeltable/io/parquet.py +25 -105
- pixeltable/io/table_data_conduit.py +250 -123
- pixeltable/io/utils.py +4 -4
- pixeltable/iterators/__init__.py +2 -1
- pixeltable/iterators/audio.py +26 -25
- pixeltable/iterators/base.py +9 -3
- pixeltable/iterators/document.py +112 -78
- pixeltable/iterators/image.py +12 -15
- pixeltable/iterators/string.py +11 -4
- pixeltable/iterators/video.py +523 -120
- pixeltable/metadata/__init__.py +14 -3
- pixeltable/metadata/converters/convert_13.py +2 -2
- pixeltable/metadata/converters/convert_18.py +2 -2
- pixeltable/metadata/converters/convert_19.py +2 -2
- pixeltable/metadata/converters/convert_20.py +2 -2
- pixeltable/metadata/converters/convert_21.py +2 -2
- pixeltable/metadata/converters/convert_22.py +2 -2
- pixeltable/metadata/converters/convert_24.py +2 -2
- pixeltable/metadata/converters/convert_25.py +2 -2
- pixeltable/metadata/converters/convert_26.py +2 -2
- pixeltable/metadata/converters/convert_29.py +4 -4
- pixeltable/metadata/converters/convert_30.py +34 -21
- pixeltable/metadata/converters/convert_34.py +2 -2
- pixeltable/metadata/converters/convert_35.py +9 -0
- pixeltable/metadata/converters/convert_36.py +38 -0
- pixeltable/metadata/converters/convert_37.py +15 -0
- pixeltable/metadata/converters/convert_38.py +39 -0
- pixeltable/metadata/converters/convert_39.py +124 -0
- pixeltable/metadata/converters/convert_40.py +73 -0
- pixeltable/metadata/converters/convert_41.py +12 -0
- pixeltable/metadata/converters/convert_42.py +9 -0
- pixeltable/metadata/converters/convert_43.py +44 -0
- pixeltable/metadata/converters/util.py +20 -31
- pixeltable/metadata/notes.py +9 -0
- pixeltable/metadata/schema.py +140 -53
- pixeltable/metadata/utils.py +74 -0
- pixeltable/mypy/__init__.py +3 -0
- pixeltable/mypy/mypy_plugin.py +123 -0
- pixeltable/plan.py +382 -115
- pixeltable/share/__init__.py +1 -1
- pixeltable/share/packager.py +547 -83
- pixeltable/share/protocol/__init__.py +33 -0
- pixeltable/share/protocol/common.py +165 -0
- pixeltable/share/protocol/operation_types.py +33 -0
- pixeltable/share/protocol/replica.py +119 -0
- pixeltable/share/publish.py +257 -59
- pixeltable/store.py +311 -194
- pixeltable/type_system.py +373 -211
- pixeltable/utils/__init__.py +2 -3
- pixeltable/utils/arrow.py +131 -17
- pixeltable/utils/av.py +298 -0
- pixeltable/utils/azure_store.py +346 -0
- pixeltable/utils/coco.py +6 -6
- pixeltable/utils/code.py +3 -3
- pixeltable/utils/console_output.py +4 -1
- pixeltable/utils/coroutine.py +6 -23
- pixeltable/utils/dbms.py +32 -6
- pixeltable/utils/description_helper.py +4 -5
- pixeltable/utils/documents.py +7 -18
- pixeltable/utils/exception_handler.py +7 -30
- pixeltable/utils/filecache.py +6 -6
- pixeltable/utils/formatter.py +86 -48
- pixeltable/utils/gcs_store.py +295 -0
- pixeltable/utils/http.py +133 -0
- pixeltable/utils/http_server.py +2 -3
- pixeltable/utils/iceberg.py +1 -2
- pixeltable/utils/image.py +17 -0
- pixeltable/utils/lancedb.py +90 -0
- pixeltable/utils/local_store.py +322 -0
- pixeltable/utils/misc.py +5 -0
- pixeltable/utils/object_stores.py +573 -0
- pixeltable/utils/pydantic.py +60 -0
- pixeltable/utils/pytorch.py +5 -6
- pixeltable/utils/s3_store.py +527 -0
- pixeltable/utils/sql.py +26 -0
- pixeltable/utils/system.py +30 -0
- pixeltable-0.5.7.dist-info/METADATA +579 -0
- pixeltable-0.5.7.dist-info/RECORD +227 -0
- {pixeltable-0.3.14.dist-info → pixeltable-0.5.7.dist-info}/WHEEL +1 -1
- pixeltable-0.5.7.dist-info/entry_points.txt +2 -0
- pixeltable/__version__.py +0 -3
- pixeltable/catalog/named_function.py +0 -40
- pixeltable/ext/__init__.py +0 -17
- pixeltable/ext/functions/__init__.py +0 -11
- pixeltable/ext/functions/whisperx.py +0 -77
- pixeltable/utils/media_store.py +0 -77
- pixeltable/utils/s3.py +0 -17
- pixeltable-0.3.14.dist-info/METADATA +0 -434
- pixeltable-0.3.14.dist-info/RECORD +0 -186
- pixeltable-0.3.14.dist-info/entry_points.txt +0 -3
- {pixeltable-0.3.14.dist-info → pixeltable-0.5.7.dist-info/licenses}/LICENSE +0 -0
pixeltable/globals.py
CHANGED
|
@@ -3,15 +3,18 @@ from __future__ import annotations
|
|
|
3
3
|
import logging
|
|
4
4
|
import os
|
|
5
5
|
from pathlib import Path
|
|
6
|
-
from typing import TYPE_CHECKING, Any, Iterable,
|
|
6
|
+
from typing import TYPE_CHECKING, Any, Iterable, Literal, TypedDict, Union
|
|
7
7
|
|
|
8
8
|
import pandas as pd
|
|
9
|
+
import pydantic
|
|
9
10
|
from pandas.io.formats.style import Styler
|
|
10
11
|
|
|
11
|
-
from pixeltable import
|
|
12
|
+
from pixeltable import Query, catalog, exceptions as excs, exprs, func, share, type_system as ts
|
|
12
13
|
from pixeltable.catalog import Catalog, TableVersionPath
|
|
13
14
|
from pixeltable.catalog.insertable_table import OnErrorParameter
|
|
15
|
+
from pixeltable.config import Config
|
|
14
16
|
from pixeltable.env import Env
|
|
17
|
+
from pixeltable.io.table_data_conduit import QueryTableDataConduit, TableDataConduit
|
|
15
18
|
from pixeltable.iterators import ComponentIterator
|
|
16
19
|
|
|
17
20
|
if TYPE_CHECKING:
|
|
@@ -22,46 +25,63 @@ if TYPE_CHECKING:
|
|
|
22
25
|
str,
|
|
23
26
|
os.PathLike,
|
|
24
27
|
Path, # OS paths, filenames, URLs
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
+
Iterable[dict[str, Any]], # dictionaries of values
|
|
29
|
+
Iterable[pydantic.BaseModel], # Pydantic model instances
|
|
30
|
+
catalog.Table, # Pixeltable Table
|
|
31
|
+
Query, # Pixeltable Query
|
|
28
32
|
pd.DataFrame, # pandas DataFrame
|
|
29
|
-
|
|
30
|
-
|
|
33
|
+
datasets.Dataset,
|
|
34
|
+
datasets.DatasetDict, # Huggingface datasets
|
|
31
35
|
]
|
|
32
36
|
|
|
33
37
|
|
|
34
38
|
_logger = logging.getLogger('pixeltable')
|
|
35
39
|
|
|
36
40
|
|
|
37
|
-
def init() -> None:
|
|
41
|
+
def init(config_overrides: dict[str, Any] | None = None) -> None:
|
|
38
42
|
"""Initializes the Pixeltable environment."""
|
|
43
|
+
if config_overrides is None:
|
|
44
|
+
config_overrides = {}
|
|
45
|
+
Config.init(config_overrides)
|
|
39
46
|
_ = Catalog.get()
|
|
40
47
|
|
|
41
48
|
|
|
42
49
|
def create_table(
|
|
43
|
-
|
|
44
|
-
schema:
|
|
50
|
+
path: str,
|
|
51
|
+
schema: dict[str, Any] | None = None,
|
|
45
52
|
*,
|
|
46
|
-
source:
|
|
47
|
-
source_format:
|
|
48
|
-
schema_overrides:
|
|
53
|
+
source: TableDataSource | None = None,
|
|
54
|
+
source_format: Literal['csv', 'excel', 'parquet', 'json'] | None = None,
|
|
55
|
+
schema_overrides: dict[str, Any] | None = None,
|
|
56
|
+
create_default_idxs: bool = True,
|
|
49
57
|
on_error: Literal['abort', 'ignore'] = 'abort',
|
|
50
|
-
primary_key:
|
|
58
|
+
primary_key: str | list[str] | None = None,
|
|
51
59
|
num_retained_versions: int = 10,
|
|
52
60
|
comment: str = '',
|
|
53
61
|
media_validation: Literal['on_read', 'on_write'] = 'on_write',
|
|
54
62
|
if_exists: Literal['error', 'ignore', 'replace', 'replace_force'] = 'error',
|
|
55
|
-
extra_args:
|
|
63
|
+
extra_args: dict[str, Any] | None = None, # Additional arguments to data source provider
|
|
56
64
|
) -> catalog.Table:
|
|
57
|
-
"""Create a new base table.
|
|
65
|
+
"""Create a new base table. Exactly one of `schema` or `source` must be provided.
|
|
66
|
+
|
|
67
|
+
If a `schema` is provided, then an empty table will be created with the specified schema.
|
|
68
|
+
|
|
69
|
+
If a `source` is provided, then Pixeltable will attempt to infer a data source format and table schema from the
|
|
70
|
+
contents of the specified data, and the data will be imported from the specified source into the new table. The
|
|
71
|
+
source format and/or schema can be specified directly via the `source_format` and `schema_overrides` parameters.
|
|
58
72
|
|
|
59
73
|
Args:
|
|
60
|
-
|
|
61
|
-
schema:
|
|
62
|
-
source: A data source
|
|
63
|
-
source_format:
|
|
64
|
-
|
|
74
|
+
path: Pixeltable path (qualified name) of the table, such as `'my_table'` or `'my_dir.my_subdir.my_table'`.
|
|
75
|
+
schema: Schema for the new table, mapping column names to Pixeltable types.
|
|
76
|
+
source: A data source (file, URL, Table, Query, or list of rows) to import from.
|
|
77
|
+
source_format: Must be used in conjunction with a `source`.
|
|
78
|
+
If specified, then the given format will be used to read the source data. (Otherwise,
|
|
79
|
+
Pixeltable will attempt to infer the format from the source data.)
|
|
80
|
+
schema_overrides: Must be used in conjunction with a `source`.
|
|
81
|
+
If specified, then columns in `schema_overrides` will be given the specified types.
|
|
82
|
+
(Pixeltable will attempt to infer the types of any columns not specified.)
|
|
83
|
+
create_default_idxs: If True, creates a B-tree index on every scalar and media column that is not computed,
|
|
84
|
+
except for boolean columns.
|
|
65
85
|
on_error: Determines the behavior if an error occurs while evaluating a computed column or detecting an
|
|
66
86
|
invalid media file (such as a corrupt image) for one of the inserted rows.
|
|
67
87
|
|
|
@@ -77,14 +97,15 @@ def create_table(
|
|
|
77
97
|
|
|
78
98
|
- `'on_read'`: validate media files at query time
|
|
79
99
|
- `'on_write'`: validate media files during insert/update operations
|
|
80
|
-
if_exists:
|
|
81
|
-
Must be one of the following:
|
|
100
|
+
if_exists: Determines the behavior if a table already exists at the specified path location.
|
|
82
101
|
|
|
83
102
|
- `'error'`: raise an error
|
|
84
103
|
- `'ignore'`: do nothing and return the existing table handle
|
|
85
|
-
- `'replace'`: if the existing table has no views, drop and replace it with a new one
|
|
86
|
-
|
|
87
|
-
|
|
104
|
+
- `'replace'`: if the existing table has no views or snapshots, drop and replace it with a new one;
|
|
105
|
+
raise an error if the existing table has views or snapshots
|
|
106
|
+
- `'replace_force'`: drop the existing table and all its views and snapshots, and create a new one
|
|
107
|
+
extra_args: Must be used in conjunction with a `source`. If specified, then additional arguments will be
|
|
108
|
+
passed along to the source data provider.
|
|
88
109
|
|
|
89
110
|
Returns:
|
|
90
111
|
A handle to the newly created table, or to an already existing table at the path when `if_exists='ignore'`.
|
|
@@ -110,7 +131,7 @@ def create_table(
|
|
|
110
131
|
>>> tbl1 = pxt.get_table('orig_table')
|
|
111
132
|
... tbl2 = pxt.create_table('new_table', tbl1.where(tbl1.col1 < 10).select(tbl1.col2))
|
|
112
133
|
|
|
113
|
-
Create a table if does not already exist, otherwise get the existing table:
|
|
134
|
+
Create a table if it does not already exist, otherwise get the existing table:
|
|
114
135
|
|
|
115
136
|
>>> tbl = pxt.create_table('my_table', schema={'col1': pxt.Int, 'col2': pxt.String}, if_exists='ignore')
|
|
116
137
|
|
|
@@ -121,82 +142,112 @@ def create_table(
|
|
|
121
142
|
Create a table from a CSV file:
|
|
122
143
|
|
|
123
144
|
>>> tbl = pxt.create_table('my_table', source='data.csv')
|
|
145
|
+
|
|
146
|
+
Create a table with an auto-generated UUID primary key:
|
|
147
|
+
|
|
148
|
+
>>> tbl = pxt.create_table(
|
|
149
|
+
... 'my_table',
|
|
150
|
+
... schema={'id': pxt.functions.uuid.uuid4(), 'data': pxt.String},
|
|
151
|
+
... primary_key=['id']
|
|
152
|
+
... )
|
|
124
153
|
"""
|
|
125
|
-
from pixeltable.io.table_data_conduit import
|
|
154
|
+
from pixeltable.io.table_data_conduit import UnkTableDataConduit
|
|
126
155
|
from pixeltable.io.utils import normalize_primary_key_parameter
|
|
127
156
|
|
|
128
157
|
if (schema is None) == (source is None):
|
|
129
|
-
raise excs.Error('
|
|
158
|
+
raise excs.Error('Either a `schema` or a `source` must be provided (but not both)')
|
|
130
159
|
|
|
131
160
|
if schema is not None and (len(schema) == 0 or not isinstance(schema, dict)):
|
|
132
161
|
raise excs.Error('`schema` must be a non-empty dictionary')
|
|
133
162
|
|
|
134
|
-
path_obj = catalog.Path(
|
|
163
|
+
path_obj = catalog.Path.parse(path)
|
|
135
164
|
if_exists_ = catalog.IfExistsParam.validated(if_exists, 'if_exists')
|
|
136
165
|
media_validation_ = catalog.MediaValidation.validated(media_validation, 'media_validation')
|
|
137
|
-
primary_key:
|
|
138
|
-
|
|
139
|
-
tds = None
|
|
140
|
-
data_source = None
|
|
166
|
+
primary_key: list[str] | None = normalize_primary_key_parameter(primary_key)
|
|
167
|
+
data_source: TableDataConduit | None = None
|
|
141
168
|
if source is not None:
|
|
169
|
+
if isinstance(source, str) and source.strip().startswith('pxt://'):
|
|
170
|
+
raise excs.Error(
|
|
171
|
+
'create_table(): Creating a table directly from a cloud URI is not supported.'
|
|
172
|
+
' Please replicate the table locally first using `pxt.replicate()`:\n'
|
|
173
|
+
"replica_tbl = pxt.replicate('pxt://path/to/remote_table', 'local_replica_name')\n"
|
|
174
|
+
"pxt.create_table('new_table_name', source=replica_tbl)"
|
|
175
|
+
)
|
|
142
176
|
tds = UnkTableDataConduit(source, source_format=source_format, extra_fields=extra_args)
|
|
143
177
|
tds.check_source_format()
|
|
144
178
|
data_source = tds.specialize()
|
|
145
|
-
|
|
179
|
+
src_schema_overrides: dict[str, ts.ColumnType] = {}
|
|
180
|
+
if schema_overrides is not None:
|
|
181
|
+
for col_name, py_type in schema_overrides.items():
|
|
182
|
+
col_type = ts.ColumnType.normalize_type(py_type, nullable_default=True, allow_builtin_types=False)
|
|
183
|
+
if col_type is None:
|
|
184
|
+
raise excs.Error(f'Invalid type for column {col_name!r} in `schema_overrides`: {py_type}')
|
|
185
|
+
src_schema_overrides[col_name] = col_type
|
|
186
|
+
data_source.src_schema_overrides = src_schema_overrides
|
|
146
187
|
data_source.src_pk = primary_key
|
|
147
188
|
data_source.infer_schema()
|
|
148
189
|
schema = data_source.pxt_schema
|
|
149
190
|
primary_key = data_source.pxt_pk
|
|
150
|
-
|
|
191
|
+
is_direct_query = data_source.is_direct_query()
|
|
151
192
|
else:
|
|
152
|
-
|
|
193
|
+
is_direct_query = False
|
|
153
194
|
|
|
154
195
|
if len(schema) == 0 or not isinstance(schema, dict):
|
|
155
196
|
raise excs.Error(
|
|
156
197
|
'Unable to create a proper schema from supplied `source`. Please use appropriate `schema_overrides`.'
|
|
157
198
|
)
|
|
158
199
|
|
|
159
|
-
|
|
200
|
+
tbl, was_created = Catalog.get().create_table(
|
|
160
201
|
path_obj,
|
|
161
202
|
schema,
|
|
162
|
-
data_source.pxt_df if isinstance(data_source, DFTableDataConduit) else None,
|
|
163
203
|
if_exists=if_exists_,
|
|
164
204
|
primary_key=primary_key,
|
|
165
205
|
comment=comment,
|
|
166
206
|
media_validation=media_validation_,
|
|
167
207
|
num_retained_versions=num_retained_versions,
|
|
208
|
+
create_default_idxs=create_default_idxs,
|
|
168
209
|
)
|
|
169
|
-
|
|
210
|
+
|
|
211
|
+
# TODO: combine data loading with table creation into a single transaction
|
|
212
|
+
if was_created:
|
|
170
213
|
fail_on_exception = OnErrorParameter.fail_on_exception(on_error)
|
|
171
|
-
|
|
214
|
+
if isinstance(data_source, QueryTableDataConduit):
|
|
215
|
+
query = data_source.pxt_query
|
|
216
|
+
with Catalog.get().begin_xact(tbl=tbl._tbl_version_path, for_write=True, lock_mutable_tree=True):
|
|
217
|
+
tbl._tbl_version.get().insert(None, query, fail_on_exception=fail_on_exception)
|
|
218
|
+
elif data_source is not None and not is_direct_query:
|
|
219
|
+
tbl.insert_table_data_source(data_source=data_source, fail_on_exception=fail_on_exception)
|
|
172
220
|
|
|
173
|
-
return
|
|
221
|
+
return tbl
|
|
174
222
|
|
|
175
223
|
|
|
176
224
|
def create_view(
|
|
177
225
|
path: str,
|
|
178
|
-
base:
|
|
226
|
+
base: catalog.Table | Query,
|
|
179
227
|
*,
|
|
180
|
-
additional_columns:
|
|
228
|
+
additional_columns: dict[str, Any] | None = None,
|
|
181
229
|
is_snapshot: bool = False,
|
|
182
|
-
|
|
230
|
+
create_default_idxs: bool = False,
|
|
231
|
+
iterator: tuple[type[ComponentIterator], dict[str, Any]] | None = None,
|
|
183
232
|
num_retained_versions: int = 10,
|
|
184
233
|
comment: str = '',
|
|
185
234
|
media_validation: Literal['on_read', 'on_write'] = 'on_write',
|
|
186
235
|
if_exists: Literal['error', 'ignore', 'replace', 'replace_force'] = 'error',
|
|
187
|
-
) ->
|
|
236
|
+
) -> catalog.Table | None:
|
|
188
237
|
"""Create a view of an existing table object (which itself can be a view or a snapshot or a base table).
|
|
189
238
|
|
|
190
239
|
Args:
|
|
191
240
|
path: A name for the view; can be either a simple name such as `my_view`, or a pathname such as
|
|
192
241
|
`dir1.my_view`.
|
|
193
|
-
base: [`Table`][pixeltable.Table] (i.e., table or view or snapshot) or [`
|
|
242
|
+
base: [`Table`][pixeltable.Table] (i.e., table or view or snapshot) or [`Query`][pixeltable.Query] to
|
|
194
243
|
base the view on.
|
|
195
244
|
additional_columns: If specified, will add these columns to the view once it is created. The format
|
|
196
|
-
of the `additional_columns` parameter is identical to the format of the `
|
|
245
|
+
of the `additional_columns` parameter is identical to the format of the `schema` parameter in
|
|
197
246
|
[`create_table`][pixeltable.create_table].
|
|
198
247
|
is_snapshot: Whether the view is a snapshot. Setting this to `True` is equivalent to calling
|
|
199
248
|
[`create_snapshot`][pixeltable.create_snapshot].
|
|
249
|
+
create_default_idxs: Whether to create default indexes on the view's columns (the base's columns are excluded).
|
|
250
|
+
Cannot be `True` for snapshots.
|
|
200
251
|
iterator: The iterator to use for this view. If specified, then this view will be a one-to-many view of
|
|
201
252
|
the base table.
|
|
202
253
|
num_retained_versions: Number of versions of the view to retain.
|
|
@@ -244,23 +295,30 @@ def create_view(
|
|
|
244
295
|
>>> tbl = pxt.get_table('my_table')
|
|
245
296
|
... view = pxt.create_view('my_view', tbl.where(tbl.col1 > 100), if_exists='replace_force')
|
|
246
297
|
"""
|
|
298
|
+
if is_snapshot and create_default_idxs is True:
|
|
299
|
+
raise excs.Error('Cannot create default indexes on a snapshot')
|
|
247
300
|
tbl_version_path: TableVersionPath
|
|
248
|
-
select_list:
|
|
249
|
-
where:
|
|
301
|
+
select_list: list[tuple[exprs.Expr, str | None]] | None = None
|
|
302
|
+
where: exprs.Expr | None = None
|
|
250
303
|
if isinstance(base, catalog.Table):
|
|
251
304
|
tbl_version_path = base._tbl_version_path
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
raise excs.Error('Cannot create a view of a join')
|
|
305
|
+
sample_clause = None
|
|
306
|
+
elif isinstance(base, Query):
|
|
307
|
+
base._validate_mutable_op_sequence('create_view', allow_select=True)
|
|
256
308
|
tbl_version_path = base._from_clause.tbls[0]
|
|
257
309
|
where = base.where_clause
|
|
310
|
+
sample_clause = base.sample_clause
|
|
258
311
|
select_list = base.select_list
|
|
312
|
+
if sample_clause is not None and not is_snapshot and not sample_clause.is_repeatable:
|
|
313
|
+
raise excs.Error('Non-snapshot views cannot be created with non-fractional or stratified sampling')
|
|
259
314
|
else:
|
|
260
|
-
raise excs.Error('`base` must be an instance of `Table` or `
|
|
261
|
-
assert isinstance(base, (catalog.Table,
|
|
315
|
+
raise excs.Error('`base` must be an instance of `Table` or `Query`')
|
|
316
|
+
assert isinstance(base, (catalog.Table, Query))
|
|
317
|
+
|
|
318
|
+
if tbl_version_path.is_replica():
|
|
319
|
+
raise excs.Error('Cannot create a view or snapshot on top of a replica')
|
|
262
320
|
|
|
263
|
-
path_obj = catalog.Path(path)
|
|
321
|
+
path_obj = catalog.Path.parse(path)
|
|
264
322
|
if_exists_ = catalog.IfExistsParam.validated(if_exists, 'if_exists')
|
|
265
323
|
media_validation_ = catalog.MediaValidation.validated(media_validation, 'media_validation')
|
|
266
324
|
|
|
@@ -272,7 +330,7 @@ def create_view(
|
|
|
272
330
|
if col_name in [c.name for c in tbl_version_path.columns()]:
|
|
273
331
|
raise excs.Error(
|
|
274
332
|
f'Column {col_name!r} already exists in the base table '
|
|
275
|
-
f'{tbl_version_path.get_column(col_name).
|
|
333
|
+
f'{tbl_version_path.get_column(col_name).get_tbl().name}.'
|
|
276
334
|
)
|
|
277
335
|
|
|
278
336
|
return Catalog.get().create_view(
|
|
@@ -280,8 +338,10 @@ def create_view(
|
|
|
280
338
|
tbl_version_path,
|
|
281
339
|
select_list=select_list,
|
|
282
340
|
where=where,
|
|
341
|
+
sample_clause=sample_clause,
|
|
283
342
|
additional_columns=additional_columns,
|
|
284
343
|
is_snapshot=is_snapshot,
|
|
344
|
+
create_default_idxs=create_default_idxs,
|
|
285
345
|
iterator=iterator,
|
|
286
346
|
num_retained_versions=num_retained_versions,
|
|
287
347
|
comment=comment,
|
|
@@ -292,24 +352,24 @@ def create_view(
|
|
|
292
352
|
|
|
293
353
|
def create_snapshot(
|
|
294
354
|
path_str: str,
|
|
295
|
-
base:
|
|
355
|
+
base: catalog.Table | Query,
|
|
296
356
|
*,
|
|
297
|
-
additional_columns:
|
|
298
|
-
iterator:
|
|
357
|
+
additional_columns: dict[str, Any] | None = None,
|
|
358
|
+
iterator: tuple[type[ComponentIterator], dict[str, Any]] | None = None,
|
|
299
359
|
num_retained_versions: int = 10,
|
|
300
360
|
comment: str = '',
|
|
301
361
|
media_validation: Literal['on_read', 'on_write'] = 'on_write',
|
|
302
362
|
if_exists: Literal['error', 'ignore', 'replace', 'replace_force'] = 'error',
|
|
303
|
-
) ->
|
|
363
|
+
) -> catalog.Table | None:
|
|
304
364
|
"""Create a snapshot of an existing table object (which itself can be a view or a snapshot or a base table).
|
|
305
365
|
|
|
306
366
|
Args:
|
|
307
367
|
path_str: A name for the snapshot; can be either a simple name such as `my_snapshot`, or a pathname such as
|
|
308
368
|
`dir1.my_snapshot`.
|
|
309
|
-
base: [`Table`][pixeltable.Table] (i.e., table or view or snapshot) or [`
|
|
369
|
+
base: [`Table`][pixeltable.Table] (i.e., table or view or snapshot) or [`Query`][pixeltable.Query] to
|
|
310
370
|
base the snapshot on.
|
|
311
371
|
additional_columns: If specified, will add these columns to the snapshot once it is created. The format
|
|
312
|
-
of the `additional_columns` parameter is identical to the format of the `
|
|
372
|
+
of the `additional_columns` parameter is identical to the format of the `schema` parameter in
|
|
313
373
|
[`create_table`][pixeltable.create_table].
|
|
314
374
|
iterator: The iterator to use for this snapshot. If specified, then this snapshot will be a one-to-many view of
|
|
315
375
|
the base table.
|
|
@@ -371,36 +431,67 @@ def create_snapshot(
|
|
|
371
431
|
)
|
|
372
432
|
|
|
373
433
|
|
|
374
|
-
def
|
|
434
|
+
def publish(
|
|
435
|
+
source: str | catalog.Table,
|
|
436
|
+
destination_uri: str,
|
|
437
|
+
bucket_name: str | None = None,
|
|
438
|
+
access: Literal['public', 'private'] = 'private',
|
|
439
|
+
) -> None:
|
|
375
440
|
"""
|
|
376
|
-
|
|
377
|
-
|
|
441
|
+
Publishes a replica of a local Pixeltable table to Pixeltable cloud. A given table can be published to at most one
|
|
442
|
+
URI per Pixeltable cloud database.
|
|
378
443
|
|
|
379
444
|
Args:
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
445
|
+
source: Path or table handle of the local table to be published.
|
|
446
|
+
destination_uri: Remote URI where the replica will be published, such as `'pxt://org_name/my_dir/my_table'`.
|
|
447
|
+
bucket_name: The name of the bucket to use to store replica's data. The bucket must be registered with
|
|
448
|
+
Pixeltable cloud. If no `bucket_name` is provided, the default storage bucket for the destination
|
|
449
|
+
database will be used.
|
|
450
|
+
access: Access control for the replica.
|
|
451
|
+
|
|
452
|
+
- `'public'`: Anyone can access this replica.
|
|
453
|
+
- `'private'`: Only the host organization can access.
|
|
383
454
|
"""
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
455
|
+
if not destination_uri.startswith('pxt://'):
|
|
456
|
+
raise excs.Error("`destination_uri` must be a remote Pixeltable URI with the prefix 'pxt://'")
|
|
457
|
+
|
|
458
|
+
if isinstance(source, str):
|
|
459
|
+
source = get_table(source)
|
|
460
|
+
|
|
461
|
+
share.push_replica(destination_uri, source, bucket_name, access)
|
|
462
|
+
|
|
463
|
+
|
|
464
|
+
def replicate(remote_uri: str, local_path: str) -> catalog.Table:
|
|
465
|
+
"""
|
|
466
|
+
Retrieve a replica from Pixeltable cloud as a local table. This will create a full local copy of the replica in a
|
|
467
|
+
way that preserves the table structure of the original source data. Once replicated, the local table can be
|
|
468
|
+
queried offline just as any other Pixeltable table.
|
|
469
|
+
|
|
470
|
+
Args:
|
|
471
|
+
remote_uri: Remote URI of the table to be replicated, such as `'pxt://org_name/my_dir/my_table'` or
|
|
472
|
+
`'pxt://org_name/my_dir/my_table:5'` (with version 5).
|
|
473
|
+
local_path: Local table path where the replica will be created, such as `'my_new_dir.my_new_tbl'`. It can be
|
|
474
|
+
the same or different from the cloud table name.
|
|
475
|
+
|
|
476
|
+
Returns:
|
|
477
|
+
A handle to the newly created local replica table.
|
|
478
|
+
"""
|
|
479
|
+
if not remote_uri.startswith('pxt://'):
|
|
480
|
+
raise excs.Error("`remote_uri` must be a remote Pixeltable URI with the prefix 'pxt://'")
|
|
481
|
+
|
|
482
|
+
return share.pull_replica(local_path, remote_uri)
|
|
397
483
|
|
|
398
484
|
|
|
399
|
-
def get_table(path: str) -> catalog.Table:
|
|
485
|
+
def get_table(path: str, if_not_exists: Literal['error', 'ignore'] = 'error') -> catalog.Table | None:
|
|
400
486
|
"""Get a handle to an existing table, view, or snapshot.
|
|
401
487
|
|
|
402
488
|
Args:
|
|
403
489
|
path: Path to the table.
|
|
490
|
+
if_not_exists: Directive regarding how to handle if the path does not exist.
|
|
491
|
+
Must be one of the following:
|
|
492
|
+
|
|
493
|
+
- `'error'`: raise an error
|
|
494
|
+
- `'ignore'`: do nothing and return `None`
|
|
404
495
|
|
|
405
496
|
Returns:
|
|
406
497
|
A handle to the [`Table`][pixeltable.Table].
|
|
@@ -420,17 +511,39 @@ def get_table(path: str) -> catalog.Table:
|
|
|
420
511
|
Handles to views and snapshots are retrieved in the same way:
|
|
421
512
|
|
|
422
513
|
>>> tbl = pxt.get_table('my_snapshot')
|
|
514
|
+
|
|
515
|
+
Get a handle to a specific version of a table:
|
|
516
|
+
|
|
517
|
+
>>> tbl = pxt.get_table('my_table:722')
|
|
423
518
|
"""
|
|
424
|
-
|
|
425
|
-
|
|
519
|
+
if_not_exists_ = catalog.IfNotExistsParam.validated(if_not_exists, 'if_not_exists')
|
|
520
|
+
path_obj = catalog.Path.parse(path, allow_versioned_path=True)
|
|
521
|
+
tbl = Catalog.get().get_table(path_obj, if_not_exists_)
|
|
522
|
+
return tbl
|
|
426
523
|
|
|
427
524
|
|
|
428
|
-
def move(
|
|
525
|
+
def move(
|
|
526
|
+
path: str,
|
|
527
|
+
new_path: str,
|
|
528
|
+
*,
|
|
529
|
+
if_exists: Literal['error', 'ignore'] = 'error',
|
|
530
|
+
if_not_exists: Literal['error', 'ignore'] = 'error',
|
|
531
|
+
) -> None:
|
|
429
532
|
"""Move a schema object to a new directory and/or rename a schema object.
|
|
430
533
|
|
|
431
534
|
Args:
|
|
432
535
|
path: absolute path to the existing schema object.
|
|
433
536
|
new_path: absolute new path for the schema object.
|
|
537
|
+
if_exists: Directive regarding how to handle if a schema object already exists at the new path.
|
|
538
|
+
Must be one of the following:
|
|
539
|
+
|
|
540
|
+
- `'error'`: raise an error
|
|
541
|
+
- `'ignore'`: do nothing and return
|
|
542
|
+
if_not_exists: Directive regarding how to handle if the source path does not exist.
|
|
543
|
+
Must be one of the following:
|
|
544
|
+
|
|
545
|
+
- `'error'`: raise an error
|
|
546
|
+
- `'ignore'`: do nothing and return
|
|
434
547
|
|
|
435
548
|
Raises:
|
|
436
549
|
Error: If path does not exist or new_path already exists.
|
|
@@ -444,22 +557,26 @@ def move(path: str, new_path: str) -> None:
|
|
|
444
557
|
|
|
445
558
|
>>>> pxt.move('dir1.my_table', 'dir1.new_name')
|
|
446
559
|
"""
|
|
560
|
+
if_exists_ = catalog.IfExistsParam.validated(if_exists, 'if_exists')
|
|
561
|
+
if if_exists_ not in (catalog.IfExistsParam.ERROR, catalog.IfExistsParam.IGNORE):
|
|
562
|
+
raise excs.Error("`if_exists` must be one of 'error' or 'ignore'")
|
|
563
|
+
if_not_exists_ = catalog.IfNotExistsParam.validated(if_not_exists, 'if_not_exists')
|
|
447
564
|
if path == new_path:
|
|
448
565
|
raise excs.Error('move(): source and destination cannot be identical')
|
|
449
|
-
path_obj, new_path_obj = catalog.Path(path), catalog.Path(new_path)
|
|
566
|
+
path_obj, new_path_obj = catalog.Path.parse(path), catalog.Path.parse(new_path)
|
|
450
567
|
if path_obj.is_ancestor(new_path_obj):
|
|
451
568
|
raise excs.Error(f'move(): cannot move {path!r} into its own subdirectory')
|
|
452
|
-
|
|
453
|
-
cat.move(path_obj, new_path_obj)
|
|
569
|
+
Catalog.get().move(path_obj, new_path_obj, if_exists_, if_not_exists_)
|
|
454
570
|
|
|
455
571
|
|
|
456
572
|
def drop_table(
|
|
457
|
-
table:
|
|
573
|
+
table: str | catalog.Table, force: bool = False, if_not_exists: Literal['error', 'ignore'] = 'error'
|
|
458
574
|
) -> None:
|
|
459
|
-
"""Drop a table, view, or
|
|
575
|
+
"""Drop a table, view, snapshot, or replica.
|
|
460
576
|
|
|
461
577
|
Args:
|
|
462
|
-
table: Fully qualified name
|
|
578
|
+
table: Fully qualified name or table handle of the table to be dropped; or a remote URI of a cloud replica to
|
|
579
|
+
be deleted.
|
|
463
580
|
force: If `True`, will also drop all views and sub-views of this table.
|
|
464
581
|
if_not_exists: Directive regarding how to handle if the path does not exist.
|
|
465
582
|
Must be one of the following:
|
|
@@ -493,15 +610,75 @@ def drop_table(
|
|
|
493
610
|
if isinstance(table, catalog.Table):
|
|
494
611
|
# if we're dropping a table by handle, we first need to get the current path, then drop the S lock on
|
|
495
612
|
# the Table record, and then get X locks in the correct order (first containing directory, then table)
|
|
496
|
-
with
|
|
497
|
-
tbl_path = table._path
|
|
613
|
+
with Catalog.get().begin_xact(for_write=False):
|
|
614
|
+
tbl_path = table._path()
|
|
498
615
|
else:
|
|
499
616
|
assert isinstance(table, str)
|
|
500
617
|
tbl_path = table
|
|
501
618
|
|
|
502
|
-
path_obj = catalog.Path(tbl_path)
|
|
503
619
|
if_not_exists_ = catalog.IfNotExistsParam.validated(if_not_exists, 'if_not_exists')
|
|
504
|
-
|
|
620
|
+
|
|
621
|
+
if tbl_path.startswith('pxt://'):
|
|
622
|
+
# Remote table
|
|
623
|
+
if force:
|
|
624
|
+
raise excs.Error('Cannot use `force=True` with a cloud replica URI.')
|
|
625
|
+
# TODO: Handle if_not_exists properly
|
|
626
|
+
share.delete_replica(tbl_path)
|
|
627
|
+
else:
|
|
628
|
+
# Local table
|
|
629
|
+
path_obj = catalog.Path.parse(tbl_path)
|
|
630
|
+
Catalog.get().drop_table(path_obj, force=force, if_not_exists=if_not_exists_)
|
|
631
|
+
|
|
632
|
+
|
|
633
|
+
def get_dir_contents(dir_path: str = '', recursive: bool = True) -> 'DirContents':
|
|
634
|
+
"""Get the contents of a Pixeltable directory.
|
|
635
|
+
|
|
636
|
+
Args:
|
|
637
|
+
dir_path: Path to the directory. Defaults to the root directory.
|
|
638
|
+
recursive: If `False`, returns only those tables and directories that are directly contained in specified
|
|
639
|
+
directory; if `True`, returns all tables and directories that are descendants of the specified directory,
|
|
640
|
+
recursively.
|
|
641
|
+
|
|
642
|
+
Returns:
|
|
643
|
+
A [`DirContents`][pixeltable.DirContents] object representing the contents of the specified directory.
|
|
644
|
+
|
|
645
|
+
Raises:
|
|
646
|
+
Error: If the path does not exist or does not designate a directory.
|
|
647
|
+
|
|
648
|
+
Examples:
|
|
649
|
+
Get contents of top-level directory:
|
|
650
|
+
|
|
651
|
+
>>> pxt.get_dir_contents()
|
|
652
|
+
|
|
653
|
+
Get contents of 'dir1':
|
|
654
|
+
|
|
655
|
+
>>> pxt.get_dir_contents('dir1')
|
|
656
|
+
"""
|
|
657
|
+
path_obj = catalog.Path.parse(dir_path, allow_empty_path=True)
|
|
658
|
+
catalog_entries = Catalog.get().get_dir_contents(path_obj, recursive=recursive)
|
|
659
|
+
dirs: list[str] = []
|
|
660
|
+
tables: list[str] = []
|
|
661
|
+
_assemble_dir_contents(dir_path, catalog_entries, dirs, tables)
|
|
662
|
+
dirs.sort()
|
|
663
|
+
tables.sort()
|
|
664
|
+
return DirContents(dirs=dirs, tables=tables)
|
|
665
|
+
|
|
666
|
+
|
|
667
|
+
def _assemble_dir_contents(
|
|
668
|
+
dir_path: str, catalog_entries: dict[str, Catalog.DirEntry], dirs: list[str], tables: list[str]
|
|
669
|
+
) -> None:
|
|
670
|
+
for name, entry in catalog_entries.items():
|
|
671
|
+
if name.startswith('_'):
|
|
672
|
+
continue # Skip system paths
|
|
673
|
+
path = f'{dir_path}.{name}' if len(dir_path) > 0 else name
|
|
674
|
+
if entry.dir is not None:
|
|
675
|
+
dirs.append(path)
|
|
676
|
+
if entry.dir_entries is not None:
|
|
677
|
+
_assemble_dir_contents(path, entry.dir_entries, dirs, tables)
|
|
678
|
+
else:
|
|
679
|
+
assert entry.table is not None
|
|
680
|
+
assert not entry.dir_entries
|
|
681
|
+
tables.append(path)
|
|
505
682
|
|
|
506
683
|
|
|
507
684
|
def list_tables(dir_path: str = '', recursive: bool = True) -> list[str]:
|
|
@@ -527,15 +704,18 @@ def list_tables(dir_path: str = '', recursive: bool = True) -> list[str]:
|
|
|
527
704
|
|
|
528
705
|
>>> pxt.list_tables('dir1')
|
|
529
706
|
"""
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
707
|
+
return _list_tables(dir_path, recursive=recursive, allow_system_paths=False)
|
|
708
|
+
|
|
709
|
+
|
|
710
|
+
def _list_tables(dir_path: str = '', recursive: bool = True, allow_system_paths: bool = False) -> list[str]:
|
|
711
|
+
path_obj = catalog.Path.parse(dir_path, allow_empty_path=True, allow_system_path=allow_system_paths)
|
|
712
|
+
contents = Catalog.get().get_dir_contents(path_obj, recursive=recursive)
|
|
533
713
|
return [str(p) for p in _extract_paths(contents, parent=path_obj, entry_type=catalog.Table)]
|
|
534
714
|
|
|
535
715
|
|
|
536
716
|
def create_dir(
|
|
537
|
-
path: str, if_exists: Literal['error', 'ignore', 'replace', 'replace_force'] = 'error', parents: bool = False
|
|
538
|
-
) ->
|
|
717
|
+
path: str, *, if_exists: Literal['error', 'ignore', 'replace', 'replace_force'] = 'error', parents: bool = False
|
|
718
|
+
) -> catalog.Dir | None:
|
|
539
719
|
"""Create a directory.
|
|
540
720
|
|
|
541
721
|
Args:
|
|
@@ -580,7 +760,7 @@ def create_dir(
|
|
|
580
760
|
|
|
581
761
|
>>> pxt.create_dir('parent1.parent2.sub_dir', parents=True)
|
|
582
762
|
"""
|
|
583
|
-
path_obj = catalog.Path(path)
|
|
763
|
+
path_obj = catalog.Path.parse(path)
|
|
584
764
|
if_exists_ = catalog.IfExistsParam.validated(if_exists, 'if_exists')
|
|
585
765
|
return Catalog.get().create_dir(path_obj, if_exists=if_exists_, parents=parents)
|
|
586
766
|
|
|
@@ -622,15 +802,75 @@ def drop_dir(path: str, force: bool = False, if_not_exists: Literal['error', 'ig
|
|
|
622
802
|
|
|
623
803
|
>>> pxt.drop_dir('my_dir', force=True)
|
|
624
804
|
"""
|
|
625
|
-
path_obj = catalog.Path(path) # validate format
|
|
805
|
+
path_obj = catalog.Path.parse(path) # validate format
|
|
626
806
|
if_not_exists_ = catalog.IfNotExistsParam.validated(if_not_exists, 'if_not_exists')
|
|
627
807
|
Catalog.get().drop_dir(path_obj, if_not_exists=if_not_exists_, force=force)
|
|
628
808
|
|
|
629
809
|
|
|
810
|
+
def ls(path: str = '') -> pd.DataFrame:
|
|
811
|
+
"""
|
|
812
|
+
List the contents of a Pixeltable directory.
|
|
813
|
+
|
|
814
|
+
This function returns a Pandas DataFrame representing a human-readable listing of the specified directory,
|
|
815
|
+
including various attributes such as version and base table, as appropriate.
|
|
816
|
+
|
|
817
|
+
To get a programmatic list of the directory's contents, use [get_dir_contents()][pixeltable.get_dir_contents]
|
|
818
|
+
instead.
|
|
819
|
+
"""
|
|
820
|
+
from pixeltable.catalog import retry_loop
|
|
821
|
+
from pixeltable.metadata import schema
|
|
822
|
+
|
|
823
|
+
cat = Catalog.get()
|
|
824
|
+
path_obj = catalog.Path.parse(path, allow_empty_path=True)
|
|
825
|
+
dir_entries = cat.get_dir_contents(path_obj)
|
|
826
|
+
|
|
827
|
+
@retry_loop(for_write=False)
|
|
828
|
+
def op() -> list[list[str]]:
|
|
829
|
+
rows: list[list[str]] = []
|
|
830
|
+
for name, entry in dir_entries.items():
|
|
831
|
+
if name.startswith('_'):
|
|
832
|
+
continue
|
|
833
|
+
if entry.dir is not None:
|
|
834
|
+
kind = 'dir'
|
|
835
|
+
version = ''
|
|
836
|
+
base = ''
|
|
837
|
+
else:
|
|
838
|
+
assert entry.table is not None
|
|
839
|
+
assert isinstance(entry.table, schema.Table)
|
|
840
|
+
tbl = cat.get_table_by_id(entry.table.id)
|
|
841
|
+
md = tbl.get_metadata()
|
|
842
|
+
base = md['base'] or ''
|
|
843
|
+
if base.startswith('_'):
|
|
844
|
+
base = '<anonymous base table>'
|
|
845
|
+
if md['is_replica']:
|
|
846
|
+
kind = 'replica'
|
|
847
|
+
elif md['is_snapshot']:
|
|
848
|
+
kind = 'snapshot'
|
|
849
|
+
elif md['is_view']:
|
|
850
|
+
kind = 'view'
|
|
851
|
+
else:
|
|
852
|
+
kind = 'table'
|
|
853
|
+
version = '' if kind == 'snapshot' else str(md['version'])
|
|
854
|
+
rows.append([name, kind, version, base])
|
|
855
|
+
return rows
|
|
856
|
+
|
|
857
|
+
rows = op()
|
|
858
|
+
|
|
859
|
+
rows = sorted(rows, key=lambda x: x[0])
|
|
860
|
+
df = pd.DataFrame(
|
|
861
|
+
{
|
|
862
|
+
'Name': [row[0] for row in rows],
|
|
863
|
+
'Kind': [row[1] for row in rows],
|
|
864
|
+
'Version': [row[2] for row in rows],
|
|
865
|
+
'Base': [row[3] for row in rows],
|
|
866
|
+
},
|
|
867
|
+
index=([''] * len(rows)),
|
|
868
|
+
)
|
|
869
|
+
return df
|
|
870
|
+
|
|
871
|
+
|
|
630
872
|
def _extract_paths(
|
|
631
|
-
dir_entries: dict[str, Catalog.DirEntry],
|
|
632
|
-
parent: catalog.Path,
|
|
633
|
-
entry_type: Optional[type[catalog.SchemaObject]] = None,
|
|
873
|
+
dir_entries: dict[str, Catalog.DirEntry], parent: catalog.Path, entry_type: type[catalog.SchemaObject] | None = None
|
|
634
874
|
) -> list[catalog.Path]:
|
|
635
875
|
"""Convert nested dir_entries structure to a flattened list of paths."""
|
|
636
876
|
matches: list[str]
|
|
@@ -668,7 +908,7 @@ def list_dirs(path: str = '', recursive: bool = True) -> list[str]:
|
|
|
668
908
|
>>> cl.list_dirs('my_dir', recursive=True)
|
|
669
909
|
['my_dir', 'my_dir.sub_dir1']
|
|
670
910
|
"""
|
|
671
|
-
path_obj = catalog.Path(path,
|
|
911
|
+
path_obj = catalog.Path.parse(path, allow_empty_path=True) # validate format
|
|
672
912
|
cat = Catalog.get()
|
|
673
913
|
contents = cat.get_dir_contents(path_obj, recursive=recursive)
|
|
674
914
|
return [str(p) for p in _extract_paths(contents, parent=path_obj, entry_type=catalog.Dir)]
|
|
@@ -703,7 +943,7 @@ def list_functions() -> Styler:
|
|
|
703
943
|
return pd_df.hide(axis='index')
|
|
704
944
|
|
|
705
945
|
|
|
706
|
-
def tools(*args:
|
|
946
|
+
def tools(*args: func.Function | func.tools.Tool) -> func.tools.Tools:
|
|
707
947
|
"""
|
|
708
948
|
Specifies a collection of UDFs to be used as LLM tools. Pixeltable allows any UDF to be used as an input into an
|
|
709
949
|
LLM tool-calling API. To use one or more UDFs as tools, wrap them in a `pxt.tools` call and pass the return value
|
|
@@ -740,7 +980,7 @@ def tools(*args: Union[func.Function, func.tools.Tool]) -> func.tools.Tools:
|
|
|
740
980
|
return func.tools.Tools(tools=[arg if isinstance(arg, func.tools.Tool) else tool(arg) for arg in args])
|
|
741
981
|
|
|
742
982
|
|
|
743
|
-
def tool(fn: func.Function, name:
|
|
983
|
+
def tool(fn: func.Function, name: str | None = None, description: str | None = None) -> func.tools.Tool:
|
|
744
984
|
"""
|
|
745
985
|
Specifies a Pixeltable UDF to be used as an LLM tool with customizable metadata. See the documentation for
|
|
746
986
|
[pxt.tools()][pixeltable.tools] for more details.
|
|
@@ -761,11 +1001,7 @@ def tool(fn: func.Function, name: Optional[str] = None, description: Optional[st
|
|
|
761
1001
|
|
|
762
1002
|
|
|
763
1003
|
def configure_logging(
|
|
764
|
-
*,
|
|
765
|
-
to_stdout: Optional[bool] = None,
|
|
766
|
-
level: Optional[int] = None,
|
|
767
|
-
add: Optional[str] = None,
|
|
768
|
-
remove: Optional[str] = None,
|
|
1004
|
+
*, to_stdout: bool | None = None, level: int | None = None, add: str | None = None, remove: str | None = None
|
|
769
1005
|
) -> None:
|
|
770
1006
|
"""Configure logging.
|
|
771
1007
|
|
|
@@ -780,3 +1016,14 @@ def configure_logging(
|
|
|
780
1016
|
|
|
781
1017
|
def array(elements: Iterable) -> exprs.Expr:
|
|
782
1018
|
return exprs.Expr.from_array(elements)
|
|
1019
|
+
|
|
1020
|
+
|
|
1021
|
+
class DirContents(TypedDict):
|
|
1022
|
+
"""
|
|
1023
|
+
Represents the contents of a Pixeltable directory.
|
|
1024
|
+
"""
|
|
1025
|
+
|
|
1026
|
+
dirs: list[str]
|
|
1027
|
+
"""List of directory paths contained in this directory."""
|
|
1028
|
+
tables: list[str]
|
|
1029
|
+
"""List of table paths contained in this directory."""
|