pixeltable 0.3.8__py3-none-any.whl → 0.3.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +1 -2
- pixeltable/__version__.py +2 -2
- pixeltable/catalog/catalog.py +509 -103
- pixeltable/catalog/column.py +5 -0
- pixeltable/catalog/dir.py +15 -6
- pixeltable/catalog/globals.py +16 -0
- pixeltable/catalog/insertable_table.py +82 -41
- pixeltable/catalog/path.py +15 -0
- pixeltable/catalog/schema_object.py +7 -12
- pixeltable/catalog/table.py +81 -67
- pixeltable/catalog/table_version.py +23 -7
- pixeltable/catalog/view.py +9 -6
- pixeltable/env.py +15 -9
- pixeltable/exec/exec_node.py +1 -1
- pixeltable/exprs/__init__.py +2 -1
- pixeltable/exprs/arithmetic_expr.py +2 -0
- pixeltable/exprs/column_ref.py +38 -2
- pixeltable/exprs/expr.py +61 -12
- pixeltable/exprs/function_call.py +1 -4
- pixeltable/exprs/globals.py +12 -0
- pixeltable/exprs/json_mapper.py +4 -4
- pixeltable/exprs/json_path.py +10 -11
- pixeltable/exprs/similarity_expr.py +5 -20
- pixeltable/exprs/string_op.py +107 -0
- pixeltable/ext/functions/yolox.py +21 -64
- pixeltable/func/callable_function.py +5 -2
- pixeltable/func/query_template_function.py +6 -18
- pixeltable/func/tools.py +2 -2
- pixeltable/functions/__init__.py +1 -1
- pixeltable/functions/globals.py +16 -5
- pixeltable/globals.py +172 -262
- pixeltable/io/__init__.py +3 -2
- pixeltable/io/datarows.py +138 -0
- pixeltable/io/external_store.py +8 -5
- pixeltable/io/globals.py +7 -160
- pixeltable/io/hf_datasets.py +21 -98
- pixeltable/io/pandas.py +29 -43
- pixeltable/io/parquet.py +17 -42
- pixeltable/io/table_data_conduit.py +569 -0
- pixeltable/io/utils.py +6 -21
- pixeltable/metadata/__init__.py +1 -1
- pixeltable/metadata/converters/convert_30.py +50 -0
- pixeltable/metadata/converters/util.py +26 -1
- pixeltable/metadata/notes.py +1 -0
- pixeltable/metadata/schema.py +3 -0
- pixeltable/utils/arrow.py +32 -7
- pixeltable/utils/coroutine.py +41 -0
- {pixeltable-0.3.8.dist-info → pixeltable-0.3.10.dist-info}/METADATA +1 -1
- {pixeltable-0.3.8.dist-info → pixeltable-0.3.10.dist-info}/RECORD +52 -47
- {pixeltable-0.3.8.dist-info → pixeltable-0.3.10.dist-info}/WHEEL +1 -1
- {pixeltable-0.3.8.dist-info → pixeltable-0.3.10.dist-info}/LICENSE +0 -0
- {pixeltable-0.3.8.dist-info → pixeltable-0.3.10.dist-info}/entry_points.txt +0 -0
pixeltable/globals.py
CHANGED
|
@@ -1,17 +1,36 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
import logging
|
|
4
|
+
import os
|
|
2
5
|
import urllib.parse
|
|
3
|
-
from
|
|
4
|
-
from
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import TYPE_CHECKING, Any, Iterable, Iterator, Literal, Optional, Union
|
|
5
8
|
|
|
6
9
|
import pandas as pd
|
|
7
10
|
from pandas.io.formats.style import Styler
|
|
8
11
|
|
|
9
|
-
from pixeltable import DataFrame, catalog,
|
|
10
|
-
from pixeltable.catalog import Catalog,
|
|
11
|
-
from pixeltable.
|
|
12
|
+
from pixeltable import DataFrame, catalog, exceptions as excs, exprs, func, share
|
|
13
|
+
from pixeltable.catalog import Catalog, TableVersionPath
|
|
14
|
+
from pixeltable.catalog.insertable_table import OnErrorParameter
|
|
12
15
|
from pixeltable.env import Env
|
|
13
16
|
from pixeltable.iterators import ComponentIterator
|
|
14
|
-
|
|
17
|
+
|
|
18
|
+
if TYPE_CHECKING:
|
|
19
|
+
import datasets # type: ignore[import-untyped]
|
|
20
|
+
|
|
21
|
+
RowData = list[dict[str, Any]]
|
|
22
|
+
TableDataSource = Union[
|
|
23
|
+
str,
|
|
24
|
+
os.PathLike,
|
|
25
|
+
Path, # OS paths, filenames, URLs
|
|
26
|
+
Iterator[dict[str, Any]], # iterator producing dictionaries of values
|
|
27
|
+
RowData, # list of dictionaries
|
|
28
|
+
DataFrame, # Pixeltable DataFrame
|
|
29
|
+
pd.DataFrame, # pandas DataFrame
|
|
30
|
+
'datasets.Dataset',
|
|
31
|
+
'datasets.DatasetDict', # Huggingface datasets
|
|
32
|
+
]
|
|
33
|
+
|
|
15
34
|
|
|
16
35
|
_logger = logging.getLogger('pixeltable')
|
|
17
36
|
|
|
@@ -21,58 +40,36 @@ def init() -> None:
|
|
|
21
40
|
_ = Catalog.get()
|
|
22
41
|
|
|
23
42
|
|
|
24
|
-
def _handle_path_collision(
|
|
25
|
-
path: str, expected_obj_type: type[catalog.SchemaObject], expected_snapshot: bool, if_exists: catalog.IfExistsParam
|
|
26
|
-
) -> Optional[catalog.SchemaObject]:
|
|
27
|
-
cat = Catalog.get()
|
|
28
|
-
obj: Optional[catalog.SchemaObject]
|
|
29
|
-
if if_exists == catalog.IfExistsParam.ERROR:
|
|
30
|
-
_ = cat.get_schema_object(path, raise_if_exists=True)
|
|
31
|
-
obj = None
|
|
32
|
-
else:
|
|
33
|
-
obj = cat.get_schema_object(path)
|
|
34
|
-
is_snapshot = isinstance(obj, catalog.View) and obj._tbl_version_path.is_snapshot()
|
|
35
|
-
if obj is not None and (not isinstance(obj, expected_obj_type) or (expected_snapshot and not is_snapshot)):
|
|
36
|
-
obj_type_str = 'snapshot' if expected_snapshot else expected_obj_type._display_name()
|
|
37
|
-
raise excs.Error(
|
|
38
|
-
f'Path {path!r} already exists but is not a {obj_type_str}. Cannot {if_exists.name.lower()} it.'
|
|
39
|
-
)
|
|
40
|
-
if obj is None:
|
|
41
|
-
return None
|
|
42
|
-
|
|
43
|
-
if if_exists == IfExistsParam.IGNORE:
|
|
44
|
-
return obj
|
|
45
|
-
|
|
46
|
-
# drop the existing schema object
|
|
47
|
-
if isinstance(obj, catalog.Dir):
|
|
48
|
-
dir_contents = cat.get_dir_contents(obj._id)
|
|
49
|
-
if len(dir_contents) > 0 and if_exists == IfExistsParam.REPLACE:
|
|
50
|
-
raise excs.Error(
|
|
51
|
-
f'Directory {path!r} already exists and is not empty. Use `if_exists="replace_force"` to replace it.'
|
|
52
|
-
)
|
|
53
|
-
_drop_dir(obj._id, path, force=True)
|
|
54
|
-
else:
|
|
55
|
-
assert isinstance(obj, catalog.Table)
|
|
56
|
-
_drop_table(obj, force=if_exists == IfExistsParam.REPLACE_FORCE, is_replace=True)
|
|
57
|
-
return None
|
|
58
|
-
|
|
59
|
-
|
|
60
43
|
def create_table(
|
|
61
44
|
path_str: str,
|
|
62
|
-
|
|
45
|
+
schema: Optional[dict[str, Any]] = None,
|
|
63
46
|
*,
|
|
47
|
+
source: Optional[TableDataSource] = None,
|
|
48
|
+
source_format: Optional[Literal['csv', 'excel', 'parquet', 'json']] = None,
|
|
49
|
+
schema_overrides: Optional[dict[str, Any]] = None,
|
|
50
|
+
on_error: Literal['abort', 'ignore'] = 'abort',
|
|
64
51
|
primary_key: Optional[Union[str, list[str]]] = None,
|
|
65
52
|
num_retained_versions: int = 10,
|
|
66
53
|
comment: str = '',
|
|
67
54
|
media_validation: Literal['on_read', 'on_write'] = 'on_write',
|
|
68
55
|
if_exists: Literal['error', 'ignore', 'replace', 'replace_force'] = 'error',
|
|
56
|
+
extra_args: Optional[dict[str, Any]] = None, # Additional arguments to data source provider
|
|
69
57
|
) -> catalog.Table:
|
|
70
58
|
"""Create a new base table.
|
|
71
59
|
|
|
72
60
|
Args:
|
|
73
61
|
path_str: Path to the table.
|
|
74
|
-
|
|
75
|
-
|
|
62
|
+
schema: A dictionary that maps column names to column types
|
|
63
|
+
source: A data source from which a table schema can be inferred and data imported
|
|
64
|
+
source_format: A hint to the format of the source data
|
|
65
|
+
schema_overrides: If specified, then columns in `schema_overrides` will be given the specified types
|
|
66
|
+
on_error: Determines the behavior if an error occurs while evaluating a computed column or detecting an
|
|
67
|
+
invalid media file (such as a corrupt image) for one of the inserted rows.
|
|
68
|
+
|
|
69
|
+
- If `on_error='abort'`, then an exception will be raised and the rows will not be inserted.
|
|
70
|
+
- If `on_error='ignore'`, then execution will continue and the rows will be inserted. Any cells
|
|
71
|
+
with errors will have a `None` value for that cell, with information about the error stored in the
|
|
72
|
+
corresponding `tbl.col_name.errortype` and `tbl.col_name.errormsg` fields.
|
|
76
73
|
primary_key: An optional column name or list of column names to use as the primary key(s) of the
|
|
77
74
|
table.
|
|
78
75
|
num_retained_versions: Number of versions of the table to retain.
|
|
@@ -88,6 +85,7 @@ def create_table(
|
|
|
88
85
|
- `'ignore'`: do nothing and return the existing table handle
|
|
89
86
|
- `'replace'`: if the existing table has no views, drop and replace it with a new one
|
|
90
87
|
- `'replace_force'`: drop the existing table and all its views, and create a new one
|
|
88
|
+
extra_args: Additional arguments to pass to the source data provider
|
|
91
89
|
|
|
92
90
|
Returns:
|
|
93
91
|
A handle to the newly created table, or to an already existing table at the path when `if_exists='ignore'`.
|
|
@@ -99,7 +97,8 @@ def create_table(
|
|
|
99
97
|
- the path is invalid, or
|
|
100
98
|
- the path already exists and `if_exists='error'`, or
|
|
101
99
|
- the path already exists and is not a table, or
|
|
102
|
-
- an error occurs while attempting to create the table
|
|
100
|
+
- an error occurs while attempting to create the table, or
|
|
101
|
+
- an error occurs while attempting to import data from the source.
|
|
103
102
|
|
|
104
103
|
Examples:
|
|
105
104
|
Create a table with an int and a string column:
|
|
@@ -119,60 +118,64 @@ def create_table(
|
|
|
119
118
|
Create a table with an int and a float column, and replace any existing table:
|
|
120
119
|
|
|
121
120
|
>>> tbl = pxt.create_table('my_table', schema={'col1': pxt.Int, 'col2': pxt.Float}, if_exists='replace')
|
|
121
|
+
|
|
122
|
+
Create a table from a CSV file:
|
|
123
|
+
|
|
124
|
+
>>> tbl = pxt.create_table('my_table', source='data.csv')
|
|
122
125
|
"""
|
|
123
|
-
|
|
124
|
-
|
|
126
|
+
from pixeltable.io.table_data_conduit import DFTableDataConduit, UnkTableDataConduit
|
|
127
|
+
from pixeltable.io.utils import normalize_primary_key_parameter
|
|
128
|
+
|
|
129
|
+
if (schema is None) == (source is None):
|
|
130
|
+
raise excs.Error('Must provide either a `schema` or a `source`')
|
|
131
|
+
|
|
132
|
+
if schema is not None and (len(schema) == 0 or not isinstance(schema, dict)):
|
|
133
|
+
raise excs.Error('`schema` must be a non-empty dictionary')
|
|
134
|
+
|
|
135
|
+
path_obj = catalog.Path(path_str)
|
|
136
|
+
if_exists_ = catalog.IfExistsParam.validated(if_exists, 'if_exists')
|
|
137
|
+
media_validation_ = catalog.MediaValidation.validated(media_validation, 'media_validation')
|
|
138
|
+
primary_key: Optional[list[str]] = normalize_primary_key_parameter(primary_key)
|
|
139
|
+
table: catalog.Table = None
|
|
140
|
+
tds = None
|
|
141
|
+
data_source = None
|
|
142
|
+
if source is not None:
|
|
143
|
+
tds = UnkTableDataConduit(source, source_format=source_format, extra_fields=extra_args)
|
|
144
|
+
tds.check_source_format()
|
|
145
|
+
data_source = tds.specialize()
|
|
146
|
+
data_source.src_schema_overrides = schema_overrides
|
|
147
|
+
data_source.src_pk = primary_key
|
|
148
|
+
data_source.infer_schema()
|
|
149
|
+
schema = data_source.pxt_schema
|
|
150
|
+
primary_key = data_source.pxt_pk
|
|
151
|
+
is_direct_df = data_source.is_direct_df()
|
|
152
|
+
else:
|
|
153
|
+
is_direct_df = False
|
|
125
154
|
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
if existing is not None:
|
|
130
|
-
assert isinstance(existing, catalog.Table)
|
|
131
|
-
return existing
|
|
132
|
-
|
|
133
|
-
dir = cat.get_schema_object(str(path.parent), expected=catalog.Dir, raise_if_not_exists=True)
|
|
134
|
-
assert dir is not None
|
|
135
|
-
|
|
136
|
-
df: Optional[DataFrame] = None
|
|
137
|
-
if isinstance(schema_or_df, dict):
|
|
138
|
-
schema = schema_or_df
|
|
139
|
-
elif isinstance(schema_or_df, DataFrame):
|
|
140
|
-
df = schema_or_df
|
|
141
|
-
schema = df.schema
|
|
142
|
-
elif isinstance(schema_or_df, DataFrameResultSet):
|
|
143
|
-
raise excs.Error(
|
|
144
|
-
'`schema_or_df` must be either a schema dictionary or a Pixeltable DataFrame. '
|
|
145
|
-
'(Is there an extraneous call to `collect()`?)'
|
|
146
|
-
)
|
|
147
|
-
else:
|
|
148
|
-
raise excs.Error('`schema_or_df` must be either a schema dictionary or a Pixeltable DataFrame.')
|
|
149
|
-
|
|
150
|
-
if len(schema) == 0:
|
|
151
|
-
raise excs.Error(f'Table schema is empty: `{path_str}`')
|
|
152
|
-
|
|
153
|
-
if primary_key is None:
|
|
154
|
-
primary_key = []
|
|
155
|
-
elif isinstance(primary_key, str):
|
|
156
|
-
primary_key = [primary_key]
|
|
157
|
-
elif not isinstance(primary_key, list) or not all(isinstance(pk, str) for pk in primary_key):
|
|
158
|
-
raise excs.Error('primary_key must be a single column name or a list of column names')
|
|
159
|
-
|
|
160
|
-
tbl = catalog.InsertableTable._create(
|
|
161
|
-
dir._id,
|
|
162
|
-
path.name,
|
|
163
|
-
schema,
|
|
164
|
-
df,
|
|
165
|
-
primary_key=primary_key,
|
|
166
|
-
num_retained_versions=num_retained_versions,
|
|
167
|
-
comment=comment,
|
|
168
|
-
media_validation=catalog.MediaValidation.validated(media_validation, 'media_validation'),
|
|
155
|
+
if len(schema) == 0 or not isinstance(schema, dict):
|
|
156
|
+
raise excs.Error(
|
|
157
|
+
'Unable to create a proper schema from supplied `source`. Please use appropriate `schema_overrides`.'
|
|
169
158
|
)
|
|
170
|
-
|
|
171
|
-
|
|
159
|
+
|
|
160
|
+
table = Catalog.get().create_table(
|
|
161
|
+
path_obj,
|
|
162
|
+
schema,
|
|
163
|
+
data_source.pxt_df if isinstance(data_source, DFTableDataConduit) else None,
|
|
164
|
+
if_exists=if_exists_,
|
|
165
|
+
primary_key=primary_key,
|
|
166
|
+
comment=comment,
|
|
167
|
+
media_validation=media_validation_,
|
|
168
|
+
num_retained_versions=num_retained_versions,
|
|
169
|
+
)
|
|
170
|
+
if data_source is not None and not is_direct_df:
|
|
171
|
+
fail_on_exception = OnErrorParameter.fail_on_exception(on_error)
|
|
172
|
+
table.insert_table_data_source(data_source=data_source, fail_on_exception=fail_on_exception)
|
|
173
|
+
|
|
174
|
+
return table
|
|
172
175
|
|
|
173
176
|
|
|
174
177
|
def create_view(
|
|
175
|
-
|
|
178
|
+
path: str,
|
|
176
179
|
base: Union[catalog.Table, DataFrame],
|
|
177
180
|
*,
|
|
178
181
|
additional_columns: Optional[dict[str, Any]] = None,
|
|
@@ -186,7 +189,7 @@ def create_view(
|
|
|
186
189
|
"""Create a view of an existing table object (which itself can be a view or a snapshot or a base table).
|
|
187
190
|
|
|
188
191
|
Args:
|
|
189
|
-
|
|
192
|
+
path: A name for the view; can be either a simple name such as `my_view`, or a pathname such as
|
|
190
193
|
`dir1.my_view`.
|
|
191
194
|
base: [`Table`][pixeltable.Table] (i.e., table or view or snapshot) or [`DataFrame`][pixeltable.DataFrame] to
|
|
192
195
|
base the view on.
|
|
@@ -242,8 +245,9 @@ def create_view(
|
|
|
242
245
|
>>> tbl = pxt.get_table('my_table')
|
|
243
246
|
... view = pxt.create_view('my_view', tbl.where(tbl.col1 > 100), if_exists='replace_force')
|
|
244
247
|
"""
|
|
245
|
-
|
|
248
|
+
tbl_version_path: TableVersionPath
|
|
246
249
|
select_list: Optional[list[tuple[exprs.Expr, Optional[str]]]] = None
|
|
250
|
+
where: Optional[exprs.Expr] = None
|
|
247
251
|
if isinstance(base, catalog.Table):
|
|
248
252
|
tbl_version_path = base._tbl_version_path
|
|
249
253
|
elif isinstance(base, DataFrame):
|
|
@@ -257,51 +261,34 @@ def create_view(
|
|
|
257
261
|
raise excs.Error('`base` must be an instance of `Table` or `DataFrame`')
|
|
258
262
|
assert isinstance(base, (catalog.Table, DataFrame))
|
|
259
263
|
|
|
260
|
-
|
|
261
|
-
|
|
264
|
+
path_obj = catalog.Path(path)
|
|
265
|
+
if_exists_ = catalog.IfExistsParam.validated(if_exists, 'if_exists')
|
|
266
|
+
media_validation_ = catalog.MediaValidation.validated(media_validation, 'media_validation')
|
|
262
267
|
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
view = catalog.View._create(
|
|
289
|
-
dir._id,
|
|
290
|
-
path.name,
|
|
291
|
-
base=tbl_version_path,
|
|
292
|
-
select_list=select_list,
|
|
293
|
-
additional_columns=additional_columns,
|
|
294
|
-
predicate=where,
|
|
295
|
-
is_snapshot=is_snapshot,
|
|
296
|
-
iterator_cls=iterator_class,
|
|
297
|
-
iterator_args=iterator_args,
|
|
298
|
-
num_retained_versions=num_retained_versions,
|
|
299
|
-
comment=comment,
|
|
300
|
-
media_validation=catalog.MediaValidation.validated(media_validation, 'media_validation'),
|
|
301
|
-
)
|
|
302
|
-
FileCache.get().emit_eviction_warnings()
|
|
303
|
-
cat.add_tbl(view)
|
|
304
|
-
return view
|
|
268
|
+
if additional_columns is None:
|
|
269
|
+
additional_columns = {}
|
|
270
|
+
else:
|
|
271
|
+
# additional columns should not be in the base table
|
|
272
|
+
for col_name in additional_columns:
|
|
273
|
+
if col_name in [c.name for c in tbl_version_path.columns()]:
|
|
274
|
+
raise excs.Error(
|
|
275
|
+
f'Column {col_name!r} already exists in the base table '
|
|
276
|
+
f'{tbl_version_path.get_column(col_name).tbl.get().name}.'
|
|
277
|
+
)
|
|
278
|
+
|
|
279
|
+
return Catalog.get().create_view(
|
|
280
|
+
path_obj,
|
|
281
|
+
tbl_version_path,
|
|
282
|
+
select_list=select_list,
|
|
283
|
+
where=where,
|
|
284
|
+
additional_columns=additional_columns,
|
|
285
|
+
is_snapshot=is_snapshot,
|
|
286
|
+
iterator=iterator,
|
|
287
|
+
num_retained_versions=num_retained_versions,
|
|
288
|
+
comment=comment,
|
|
289
|
+
media_validation=media_validation_,
|
|
290
|
+
if_exists=if_exists_,
|
|
291
|
+
)
|
|
305
292
|
|
|
306
293
|
|
|
307
294
|
def create_snapshot(
|
|
@@ -410,11 +397,8 @@ def get_table(path: str) -> catalog.Table:
|
|
|
410
397
|
|
|
411
398
|
>>> tbl = pxt.get_table('my_snapshot')
|
|
412
399
|
"""
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
assert isinstance(obj, catalog.Table)
|
|
416
|
-
obj.ensure_md_loaded()
|
|
417
|
-
return obj
|
|
400
|
+
path_obj = catalog.Path(path)
|
|
401
|
+
return Catalog.get().get_table(path_obj)
|
|
418
402
|
|
|
419
403
|
|
|
420
404
|
def move(path: str, new_path: str) -> None:
|
|
@@ -436,14 +420,13 @@ def move(path: str, new_path: str) -> None:
|
|
|
436
420
|
|
|
437
421
|
>>>> pxt.move('dir1.my_table', 'dir1.new_name')
|
|
438
422
|
"""
|
|
423
|
+
if path == new_path:
|
|
424
|
+
raise excs.Error('move(): source and destination cannot be identical')
|
|
425
|
+
path_obj, new_path_obj = catalog.Path(path), catalog.Path(new_path)
|
|
426
|
+
if path_obj.is_ancestor(new_path_obj):
|
|
427
|
+
raise excs.Error(f'move(): cannot move {path!r} into its own subdirectory')
|
|
439
428
|
cat = Catalog.get()
|
|
440
|
-
|
|
441
|
-
obj = cat.get_schema_object(path, raise_if_not_exists=True)
|
|
442
|
-
new_p = catalog.Path(new_path)
|
|
443
|
-
dest_dir_path = str(new_p.parent)
|
|
444
|
-
dest_dir = cat.get_schema_object(dest_dir_path, expected=catalog.Dir, raise_if_not_exists=True)
|
|
445
|
-
_ = cat.get_schema_object(new_path, raise_if_exists=True)
|
|
446
|
-
obj._move(new_p.name, dest_dir._id)
|
|
429
|
+
cat.move(path_obj, new_path_obj)
|
|
447
430
|
|
|
448
431
|
|
|
449
432
|
def drop_table(
|
|
@@ -482,50 +465,19 @@ def drop_table(
|
|
|
482
465
|
Drop a table and all its dependents:
|
|
483
466
|
>>> pxt.drop_table('subdir.my_table', force=True)
|
|
484
467
|
"""
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
)
|
|
499
|
-
if tbl is None:
|
|
500
|
-
_logger.info(f'Skipped table `{table}` (does not exist).')
|
|
501
|
-
return
|
|
502
|
-
else:
|
|
503
|
-
tbl = table
|
|
504
|
-
_drop_table(tbl, force=force, is_replace=False)
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
def _drop_table(tbl: catalog.Table, force: bool, is_replace: bool) -> None:
|
|
508
|
-
cat = Catalog.get()
|
|
509
|
-
view_ids = cat.get_views(tbl._id)
|
|
510
|
-
if len(view_ids) > 0:
|
|
511
|
-
view_paths = [cat.get_tbl_path(id) for id in view_ids]
|
|
512
|
-
if force:
|
|
513
|
-
for view_path in view_paths:
|
|
514
|
-
drop_table(view_path, force=True)
|
|
515
|
-
else:
|
|
516
|
-
is_snapshot = tbl._tbl_version_path.is_snapshot()
|
|
517
|
-
obj_type_str = 'Snapshot' if is_snapshot else tbl._display_name().capitalize()
|
|
518
|
-
msg: str
|
|
519
|
-
if is_replace:
|
|
520
|
-
msg = (
|
|
521
|
-
f'{obj_type_str} {tbl._path()} already exists and has dependents: {", ".join(view_paths)}. '
|
|
522
|
-
"Use `if_exists='replace_force'` to replace it."
|
|
523
|
-
)
|
|
524
|
-
else:
|
|
525
|
-
msg = f'{obj_type_str} {tbl._path()} has dependents: {", ".join(view_paths)}'
|
|
526
|
-
raise excs.Error(msg)
|
|
527
|
-
tbl._drop()
|
|
528
|
-
_logger.info(f'Dropped table `{tbl._path()}`.')
|
|
468
|
+
tbl_path: str
|
|
469
|
+
if isinstance(table, catalog.Table):
|
|
470
|
+
# if we're dropping a table by handle, we first need to get the current path, then drop the S lock on
|
|
471
|
+
# the Table record, and then get X locks in the correct order (first containing directory, then table)
|
|
472
|
+
with Env.get().begin_xact():
|
|
473
|
+
tbl_path = table._path()
|
|
474
|
+
else:
|
|
475
|
+
assert isinstance(table, str)
|
|
476
|
+
tbl_path = table
|
|
477
|
+
|
|
478
|
+
path_obj = catalog.Path(tbl_path)
|
|
479
|
+
if_not_exists_ = catalog.IfNotExistsParam.validated(if_not_exists, 'if_not_exists')
|
|
480
|
+
Catalog.get().drop_table(path_obj, force=force, if_not_exists=if_not_exists_)
|
|
529
481
|
|
|
530
482
|
|
|
531
483
|
def list_tables(dir_path: str = '', recursive: bool = True) -> list[str]:
|
|
@@ -551,16 +503,14 @@ def list_tables(dir_path: str = '', recursive: bool = True) -> list[str]:
|
|
|
551
503
|
|
|
552
504
|
>>> pxt.list_tables('dir1')
|
|
553
505
|
"""
|
|
554
|
-
|
|
506
|
+
path_obj = catalog.Path(dir_path, empty_is_valid=True) # validate format
|
|
555
507
|
cat = Catalog.get()
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
contents = cat.get_dir_contents(dir._id, recursive=recursive)
|
|
559
|
-
return _extract_paths(contents, prefix=dir_path, entry_type=catalog.Table)
|
|
508
|
+
contents = cat.get_dir_contents(path_obj, recursive=recursive)
|
|
509
|
+
return [str(p) for p in _extract_paths(contents, parent=path_obj, entry_type=catalog.Table)]
|
|
560
510
|
|
|
561
511
|
|
|
562
512
|
def create_dir(
|
|
563
|
-
path: str, if_exists: Literal['error', 'ignore', 'replace', 'replace_force'] = 'error'
|
|
513
|
+
path: str, if_exists: Literal['error', 'ignore', 'replace', 'replace_force'] = 'error', parents: bool = False
|
|
564
514
|
) -> Optional[catalog.Dir]:
|
|
565
515
|
"""Create a directory.
|
|
566
516
|
|
|
@@ -573,6 +523,7 @@ def create_dir(
|
|
|
573
523
|
- `'ignore'`: do nothing and return the existing directory handle
|
|
574
524
|
- `'replace'`: if the existing directory is empty, drop it and create a new one
|
|
575
525
|
- `'replace_force'`: drop the existing directory and all its children, and create a new one
|
|
526
|
+
parents: Create missing parent directories.
|
|
576
527
|
|
|
577
528
|
Returns:
|
|
578
529
|
A handle to the newly created directory, or to an already existing directory at the path when
|
|
@@ -600,22 +551,14 @@ def create_dir(
|
|
|
600
551
|
Create a directory and replace if it already exists:
|
|
601
552
|
|
|
602
553
|
>>> pxt.create_dir('my_dir', if_exists='replace_force')
|
|
603
|
-
"""
|
|
604
|
-
path_obj = catalog.Path(path)
|
|
605
|
-
cat = Catalog.get()
|
|
606
554
|
|
|
607
|
-
|
|
608
|
-
if_exists_ = catalog.IfExistsParam.validated(if_exists, 'if_exists')
|
|
609
|
-
existing = _handle_path_collision(path, catalog.Dir, False, if_exists_)
|
|
610
|
-
if existing is not None:
|
|
611
|
-
assert isinstance(existing, catalog.Dir)
|
|
612
|
-
return existing
|
|
555
|
+
Create a subdirectory along with its ancestors:
|
|
613
556
|
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
|
|
618
|
-
|
|
557
|
+
>>> pxt.create_dir('parent1.parent2.sub_dir', parents=True)
|
|
558
|
+
"""
|
|
559
|
+
path_obj = catalog.Path(path)
|
|
560
|
+
if_exists_ = catalog.IfExistsParam.validated(if_exists, 'if_exists')
|
|
561
|
+
return Catalog.get().create_dir(path_obj, if_exists=if_exists_, parents=parents)
|
|
619
562
|
|
|
620
563
|
|
|
621
564
|
def drop_dir(path: str, force: bool = False, if_not_exists: Literal['error', 'ignore'] = 'error') -> None:
|
|
@@ -655,47 +598,16 @@ def drop_dir(path: str, force: bool = False, if_not_exists: Literal['error', 'ig
|
|
|
655
598
|
|
|
656
599
|
>>> pxt.drop_dir('my_dir', force=True)
|
|
657
600
|
"""
|
|
658
|
-
|
|
659
|
-
cat = Catalog.get()
|
|
601
|
+
path_obj = catalog.Path(path) # validate format
|
|
660
602
|
if_not_exists_ = catalog.IfNotExistsParam.validated(if_not_exists, 'if_not_exists')
|
|
661
|
-
|
|
662
|
-
dir = cat.get_schema_object(
|
|
663
|
-
path,
|
|
664
|
-
expected=catalog.Dir,
|
|
665
|
-
raise_if_not_exists=if_not_exists_ == catalog.IfNotExistsParam.ERROR and not force,
|
|
666
|
-
)
|
|
667
|
-
if dir is None:
|
|
668
|
-
_logger.info(f'Directory {path!r} does not exist, skipped drop_dir().')
|
|
669
|
-
return
|
|
670
|
-
_drop_dir(dir._id, path, force=force)
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
def _drop_dir(dir_id: UUID, path: str, force: bool = False) -> None:
|
|
674
|
-
cat = Catalog.get()
|
|
675
|
-
dir_entries = cat.get_dir_contents(dir_id, recursive=False)
|
|
676
|
-
if len(dir_entries) > 0 and not force:
|
|
677
|
-
raise excs.Error(f'Directory {path!r} is not empty.')
|
|
678
|
-
tbl_paths = [_join_path(path, entry.table.md['name']) for entry in dir_entries.values() if entry.table is not None]
|
|
679
|
-
dir_paths = [_join_path(path, entry.dir.md['name']) for entry in dir_entries.values() if entry.dir is not None]
|
|
680
|
-
|
|
681
|
-
for tbl_path in tbl_paths:
|
|
682
|
-
# check if the table still exists, it might be a view that already got force-deleted
|
|
683
|
-
if cat.get_schema_object(tbl_path, expected=catalog.Table, raise_if_not_exists=False) is not None:
|
|
684
|
-
drop_table(tbl_path, force=True)
|
|
685
|
-
for dir_path in dir_paths:
|
|
686
|
-
drop_dir(dir_path, force=True)
|
|
687
|
-
cat.drop_dir(dir_id)
|
|
688
|
-
_logger.info(f'Removed directory {path!r}.')
|
|
689
|
-
|
|
690
|
-
|
|
691
|
-
def _join_path(path: str, name: str) -> str:
|
|
692
|
-
"""Append name to path, if path is not empty."""
|
|
693
|
-
return f'{path}.{name}' if path else name
|
|
603
|
+
Catalog.get().drop_dir(path_obj, if_not_exists=if_not_exists_, force=force)
|
|
694
604
|
|
|
695
605
|
|
|
696
606
|
def _extract_paths(
|
|
697
|
-
dir_entries: dict[str, Catalog.DirEntry],
|
|
698
|
-
|
|
607
|
+
dir_entries: dict[str, Catalog.DirEntry],
|
|
608
|
+
parent: catalog.Path,
|
|
609
|
+
entry_type: Optional[type[catalog.SchemaObject]] = None,
|
|
610
|
+
) -> list[catalog.Path]:
|
|
699
611
|
"""Convert nested dir_entries structure to a flattened list of paths."""
|
|
700
612
|
matches: list[str]
|
|
701
613
|
if entry_type is None:
|
|
@@ -704,9 +616,9 @@ def _extract_paths(
|
|
|
704
616
|
matches = [name for name, entry in dir_entries.items() if entry.dir is not None]
|
|
705
617
|
else:
|
|
706
618
|
matches = [name for name, entry in dir_entries.items() if entry.table is not None]
|
|
707
|
-
result = [
|
|
619
|
+
result = [parent.append(name) for name in matches]
|
|
708
620
|
for name, entry in [(name, entry) for name, entry in dir_entries.items() if len(entry.dir_entries) > 0]:
|
|
709
|
-
result.extend(_extract_paths(entry.dir_entries,
|
|
621
|
+
result.extend(_extract_paths(entry.dir_entries, parent=parent.append(name), entry_type=entry_type))
|
|
710
622
|
return result
|
|
711
623
|
|
|
712
624
|
|
|
@@ -717,11 +629,11 @@ def publish_snapshot(dest_uri: str, table: catalog.Table) -> None:
|
|
|
717
629
|
share.publish_snapshot(dest_uri, table)
|
|
718
630
|
|
|
719
631
|
|
|
720
|
-
def list_dirs(
|
|
632
|
+
def list_dirs(path: str = '', recursive: bool = True) -> list[str]:
|
|
721
633
|
"""List the directories in a directory.
|
|
722
634
|
|
|
723
635
|
Args:
|
|
724
|
-
|
|
636
|
+
path: Name or path of the directory.
|
|
725
637
|
recursive: If `True`, lists all descendants of this directory recursively.
|
|
726
638
|
|
|
727
639
|
Returns:
|
|
@@ -734,12 +646,10 @@ def list_dirs(path_str: str = '', recursive: bool = True) -> list[str]:
|
|
|
734
646
|
>>> cl.list_dirs('my_dir', recursive=True)
|
|
735
647
|
['my_dir', 'my_dir.sub_dir1']
|
|
736
648
|
"""
|
|
737
|
-
|
|
649
|
+
path_obj = catalog.Path(path, empty_is_valid=True) # validate format
|
|
738
650
|
cat = Catalog.get()
|
|
739
|
-
|
|
740
|
-
|
|
741
|
-
contents = cat.get_dir_contents(dir._id, recursive=recursive)
|
|
742
|
-
return _extract_paths(contents, prefix=path_str, entry_type=catalog.Dir)
|
|
651
|
+
contents = cat.get_dir_contents(path_obj, recursive=recursive)
|
|
652
|
+
return [str(p) for p in _extract_paths(contents, parent=path_obj, entry_type=catalog.Dir)]
|
|
743
653
|
|
|
744
654
|
|
|
745
655
|
def list_functions() -> Styler:
|
pixeltable/io/__init__.py
CHANGED
|
@@ -1,11 +1,12 @@
|
|
|
1
|
+
from .datarows import import_json, import_rows
|
|
1
2
|
from .external_store import ExternalStore, SyncStatus
|
|
2
|
-
from .globals import create_label_studio_project, export_images_as_fo_dataset
|
|
3
|
+
from .globals import create_label_studio_project, export_images_as_fo_dataset
|
|
3
4
|
from .hf_datasets import import_huggingface_dataset
|
|
4
5
|
from .pandas import import_csv, import_excel, import_pandas
|
|
5
6
|
from .parquet import export_parquet, import_parquet
|
|
6
7
|
|
|
7
8
|
__default_dir = set(symbol for symbol in dir() if not symbol.startswith('_'))
|
|
8
|
-
__removed_symbols = {'globals', 'hf_datasets', 'pandas', 'parquet'}
|
|
9
|
+
__removed_symbols = {'globals', 'hf_datasets', 'pandas', 'parquet', 'datarows'}
|
|
9
10
|
__all__ = sorted(list(__default_dir - __removed_symbols))
|
|
10
11
|
|
|
11
12
|
|