pixeltable 0.3.5__py3-none-any.whl → 0.3.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +5 -3
- pixeltable/__version__.py +2 -2
- pixeltable/catalog/__init__.py +1 -0
- pixeltable/catalog/catalog.py +335 -128
- pixeltable/catalog/column.py +21 -5
- pixeltable/catalog/dir.py +19 -6
- pixeltable/catalog/insertable_table.py +34 -37
- pixeltable/catalog/named_function.py +0 -4
- pixeltable/catalog/schema_object.py +28 -42
- pixeltable/catalog/table.py +195 -158
- pixeltable/catalog/table_version.py +187 -232
- pixeltable/catalog/table_version_handle.py +50 -0
- pixeltable/catalog/table_version_path.py +49 -33
- pixeltable/catalog/view.py +56 -96
- pixeltable/config.py +103 -0
- pixeltable/dataframe.py +90 -90
- pixeltable/env.py +98 -168
- pixeltable/exec/aggregation_node.py +5 -4
- pixeltable/exec/cache_prefetch_node.py +1 -1
- pixeltable/exec/component_iteration_node.py +13 -9
- pixeltable/exec/data_row_batch.py +3 -3
- pixeltable/exec/exec_context.py +0 -4
- pixeltable/exec/exec_node.py +3 -2
- pixeltable/exec/expr_eval/schedulers.py +2 -1
- pixeltable/exec/in_memory_data_node.py +9 -4
- pixeltable/exec/row_update_node.py +1 -2
- pixeltable/exec/sql_node.py +20 -16
- pixeltable/exprs/column_ref.py +9 -9
- pixeltable/exprs/comparison.py +1 -1
- pixeltable/exprs/data_row.py +4 -4
- pixeltable/exprs/expr.py +20 -5
- pixeltable/exprs/function_call.py +98 -58
- pixeltable/exprs/json_mapper.py +25 -8
- pixeltable/exprs/json_path.py +6 -5
- pixeltable/exprs/object_ref.py +16 -5
- pixeltable/exprs/row_builder.py +15 -15
- pixeltable/exprs/rowid_ref.py +21 -7
- pixeltable/func/__init__.py +1 -1
- pixeltable/func/function.py +38 -6
- pixeltable/func/query_template_function.py +3 -6
- pixeltable/func/tools.py +26 -26
- pixeltable/func/udf.py +1 -1
- pixeltable/functions/__init__.py +2 -0
- pixeltable/functions/anthropic.py +9 -3
- pixeltable/functions/fireworks.py +7 -4
- pixeltable/functions/globals.py +4 -5
- pixeltable/functions/huggingface.py +1 -5
- pixeltable/functions/image.py +17 -7
- pixeltable/functions/llama_cpp.py +1 -1
- pixeltable/functions/mistralai.py +1 -1
- pixeltable/functions/ollama.py +4 -4
- pixeltable/functions/openai.py +26 -23
- pixeltable/functions/string.py +23 -30
- pixeltable/functions/timestamp.py +11 -6
- pixeltable/functions/together.py +14 -12
- pixeltable/functions/util.py +1 -1
- pixeltable/functions/video.py +5 -4
- pixeltable/functions/vision.py +6 -9
- pixeltable/functions/whisper.py +3 -3
- pixeltable/globals.py +246 -260
- pixeltable/index/__init__.py +2 -0
- pixeltable/index/base.py +1 -1
- pixeltable/index/btree.py +3 -1
- pixeltable/index/embedding_index.py +11 -5
- pixeltable/io/external_store.py +11 -12
- pixeltable/io/label_studio.py +4 -3
- pixeltable/io/parquet.py +57 -56
- pixeltable/iterators/__init__.py +4 -2
- pixeltable/iterators/audio.py +11 -11
- pixeltable/iterators/document.py +10 -10
- pixeltable/iterators/string.py +1 -2
- pixeltable/iterators/video.py +14 -15
- pixeltable/metadata/__init__.py +9 -5
- pixeltable/metadata/converters/convert_10.py +0 -1
- pixeltable/metadata/converters/convert_15.py +0 -2
- pixeltable/metadata/converters/convert_23.py +0 -2
- pixeltable/metadata/converters/convert_24.py +3 -3
- pixeltable/metadata/converters/convert_25.py +1 -1
- pixeltable/metadata/converters/convert_27.py +0 -2
- pixeltable/metadata/converters/convert_28.py +0 -2
- pixeltable/metadata/converters/convert_29.py +7 -8
- pixeltable/metadata/converters/util.py +7 -7
- pixeltable/metadata/schema.py +27 -19
- pixeltable/plan.py +68 -40
- pixeltable/share/packager.py +12 -9
- pixeltable/store.py +37 -38
- pixeltable/type_system.py +41 -28
- pixeltable/utils/filecache.py +2 -1
- {pixeltable-0.3.5.dist-info → pixeltable-0.3.7.dist-info}/METADATA +1 -1
- pixeltable-0.3.7.dist-info/RECORD +174 -0
- pixeltable-0.3.5.dist-info/RECORD +0 -172
- {pixeltable-0.3.5.dist-info → pixeltable-0.3.7.dist-info}/LICENSE +0 -0
- {pixeltable-0.3.5.dist-info → pixeltable-0.3.7.dist-info}/WHEEL +0 -0
- {pixeltable-0.3.5.dist-info → pixeltable-0.3.7.dist-info}/entry_points.txt +0 -0
pixeltable/globals.py
CHANGED
|
@@ -1,22 +1,16 @@
|
|
|
1
|
-
import dataclasses
|
|
2
1
|
import logging
|
|
3
2
|
import urllib.parse
|
|
4
|
-
from typing import Any, Iterable, Literal, Optional, Union
|
|
3
|
+
from typing import Any, Iterable, Literal, Optional, Union, cast
|
|
5
4
|
from uuid import UUID
|
|
6
5
|
|
|
7
6
|
import pandas as pd
|
|
8
|
-
import sqlalchemy as sql
|
|
9
7
|
from pandas.io.formats.style import Styler
|
|
10
|
-
from sqlalchemy.util.preloaded import orm
|
|
11
8
|
|
|
12
|
-
import
|
|
13
|
-
|
|
14
|
-
from pixeltable import DataFrame, catalog, func, share
|
|
15
|
-
from pixeltable.catalog import Catalog
|
|
9
|
+
from pixeltable import DataFrame, catalog, env, exceptions as excs, exprs, func, share
|
|
10
|
+
from pixeltable.catalog import Catalog, IfExistsParam, IfNotExistsParam
|
|
16
11
|
from pixeltable.dataframe import DataFrameResultSet
|
|
17
12
|
from pixeltable.env import Env
|
|
18
13
|
from pixeltable.iterators import ComponentIterator
|
|
19
|
-
from pixeltable.metadata import schema
|
|
20
14
|
from pixeltable.utils.filecache import FileCache
|
|
21
15
|
|
|
22
16
|
_logger = logging.getLogger('pixeltable')
|
|
@@ -27,68 +21,39 @@ def init() -> None:
|
|
|
27
21
|
_ = Catalog.get()
|
|
28
22
|
|
|
29
23
|
|
|
30
|
-
def
|
|
31
|
-
|
|
32
|
-
expected_obj_type: type[catalog.SchemaObject],
|
|
33
|
-
expected_snapshot: bool,
|
|
34
|
-
if_exists: catalog.IfExistsParam,
|
|
24
|
+
def _handle_path_collision(
|
|
25
|
+
path: str, expected_obj_type: type[catalog.SchemaObject], expected_snapshot: bool, if_exists: catalog.IfExistsParam
|
|
35
26
|
) -> Optional[catalog.SchemaObject]:
|
|
36
|
-
"""Handle schema object path collision during creation according to the if_exists parameter.
|
|
37
|
-
|
|
38
|
-
Args:
|
|
39
|
-
path_str: An existing and valid path to the dir, table, view, or snapshot.
|
|
40
|
-
expected_obj_type: Whether the caller of this function is creating a dir, table, or view at the existing path.
|
|
41
|
-
expected_snapshot: Whether the caller of this function is creating a snapshot at the existing path.
|
|
42
|
-
if_exists: Directive regarding how to handle the existing path.
|
|
43
|
-
|
|
44
|
-
Returns:
|
|
45
|
-
A handle to the existing dir, table, view, or snapshot, if `if_exists='ignore'`, otherwise `None`.
|
|
46
|
-
|
|
47
|
-
Raises:
|
|
48
|
-
Error: If the existing path is not of the expected type, or if the existing path has dependents and
|
|
49
|
-
`if_exists='replace'` or `if_exists='replace_force'`.
|
|
50
|
-
"""
|
|
51
27
|
cat = Catalog.get()
|
|
52
|
-
|
|
53
|
-
assert cat.paths.get_object(path) is not None
|
|
54
|
-
|
|
28
|
+
obj: Optional[catalog.SchemaObject]
|
|
55
29
|
if if_exists == catalog.IfExistsParam.ERROR:
|
|
56
|
-
|
|
30
|
+
_ = cat.get_schema_object(path, raise_if_exists=True)
|
|
31
|
+
obj = None
|
|
32
|
+
else:
|
|
33
|
+
obj = cat.get_schema_object(path)
|
|
34
|
+
is_snapshot = isinstance(obj, catalog.View) and obj._tbl_version_path.is_snapshot()
|
|
35
|
+
if obj is not None and (not isinstance(obj, expected_obj_type) or (expected_snapshot and not is_snapshot)):
|
|
36
|
+
obj_type_str = 'snapshot' if expected_snapshot else expected_obj_type._display_name()
|
|
37
|
+
raise excs.Error(
|
|
38
|
+
f'Path {path!r} already exists but is not a {obj_type_str}. Cannot {if_exists.name.lower()} it.'
|
|
39
|
+
)
|
|
40
|
+
if obj is None:
|
|
41
|
+
return None
|
|
57
42
|
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
'is_snapshot' in existing_path.get_metadata() and existing_path.get_metadata()['is_snapshot']
|
|
61
|
-
)
|
|
62
|
-
obj_type_str = 'Snapshot' if expected_snapshot else expected_obj_type._display_name().capitalize()
|
|
63
|
-
# Check if the existing path is of expected type.
|
|
64
|
-
if not isinstance(existing_path, expected_obj_type) or (expected_snapshot and not existing_path_is_snapshot):
|
|
65
|
-
raise excs.Error(
|
|
66
|
-
f'Path `{path_str}` already exists but is not a {obj_type_str}. Cannot {if_exists.name.lower()} it.'
|
|
67
|
-
)
|
|
43
|
+
if if_exists == IfExistsParam.IGNORE:
|
|
44
|
+
return obj
|
|
68
45
|
|
|
69
|
-
#
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
if if_exists == catalog.IfExistsParam.REPLACE and has_dependents:
|
|
78
|
-
raise excs.Error(
|
|
79
|
-
f"{obj_type_str} `{path_str}` already exists and has dependents. Use `if_exists='replace_force'` to replace it."
|
|
80
|
-
)
|
|
46
|
+
# drop the existing schema object
|
|
47
|
+
if isinstance(obj, catalog.Dir):
|
|
48
|
+
dir_contents = cat.get_dir_contents(obj._id)
|
|
49
|
+
if len(dir_contents) > 0 and if_exists == IfExistsParam.REPLACE:
|
|
50
|
+
raise excs.Error(
|
|
51
|
+
f'Directory {path!r} already exists and is not empty. Use `if_exists="replace_force"` to replace it.'
|
|
52
|
+
)
|
|
53
|
+
_drop_dir(obj._id, path, force=True)
|
|
81
54
|
else:
|
|
82
|
-
assert
|
|
83
|
-
|
|
84
|
-
# Any errors during drop will be raised.
|
|
85
|
-
_logger.info(f'Dropping {obj_type_str} `{path_str}` to replace it.')
|
|
86
|
-
if isinstance(existing_path, catalog.Dir):
|
|
87
|
-
drop_dir(path_str, force=True)
|
|
88
|
-
else:
|
|
89
|
-
drop_table(path_str, force=True)
|
|
90
|
-
assert cat.paths.get_object(path) is None
|
|
91
|
-
|
|
55
|
+
assert isinstance(obj, catalog.Table)
|
|
56
|
+
_drop_table(obj, force=if_exists == IfExistsParam.REPLACE_FORCE, is_replace=True)
|
|
92
57
|
return None
|
|
93
58
|
|
|
94
59
|
|
|
@@ -158,54 +123,52 @@ def create_table(
|
|
|
158
123
|
path = catalog.Path(path_str)
|
|
159
124
|
cat = Catalog.get()
|
|
160
125
|
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
126
|
+
with env.Env.get().begin_xact():
|
|
127
|
+
if_exists_ = catalog.IfExistsParam.validated(if_exists, 'if_exists')
|
|
128
|
+
existing = _handle_path_collision(path_str, catalog.InsertableTable, False, if_exists_)
|
|
129
|
+
if existing is not None:
|
|
130
|
+
assert isinstance(existing, catalog.Table)
|
|
131
|
+
return existing
|
|
132
|
+
|
|
133
|
+
dir = cat.get_schema_object(str(path.parent), expected=catalog.Dir, raise_if_not_exists=True)
|
|
134
|
+
assert dir is not None
|
|
135
|
+
|
|
136
|
+
df: Optional[DataFrame] = None
|
|
137
|
+
if isinstance(schema_or_df, dict):
|
|
138
|
+
schema = schema_or_df
|
|
139
|
+
elif isinstance(schema_or_df, DataFrame):
|
|
140
|
+
df = schema_or_df
|
|
141
|
+
schema = df.schema
|
|
142
|
+
elif isinstance(schema_or_df, DataFrameResultSet):
|
|
143
|
+
raise excs.Error(
|
|
144
|
+
'`schema_or_df` must be either a schema dictionary or a Pixeltable DataFrame. '
|
|
145
|
+
'(Is there an extraneous call to `collect()`?)'
|
|
146
|
+
)
|
|
147
|
+
else:
|
|
148
|
+
raise excs.Error('`schema_or_df` must be either a schema dictionary or a Pixeltable DataFrame.')
|
|
183
149
|
|
|
184
|
-
|
|
185
|
-
|
|
150
|
+
if len(schema) == 0:
|
|
151
|
+
raise excs.Error(f'Table schema is empty: `{path_str}`')
|
|
186
152
|
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
if not isinstance(primary_key, list) or not all(isinstance(pk, str) for pk in primary_key):
|
|
153
|
+
if primary_key is None:
|
|
154
|
+
primary_key = []
|
|
155
|
+
elif isinstance(primary_key, str):
|
|
156
|
+
primary_key = [primary_key]
|
|
157
|
+
elif not isinstance(primary_key, list) or not all(isinstance(pk, str) for pk in primary_key):
|
|
193
158
|
raise excs.Error('primary_key must be a single column name or a list of column names')
|
|
194
159
|
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
_logger.info(f'Created table `{path_str}`.')
|
|
208
|
-
return tbl
|
|
160
|
+
tbl = catalog.InsertableTable._create(
|
|
161
|
+
dir._id,
|
|
162
|
+
path.name,
|
|
163
|
+
schema,
|
|
164
|
+
df,
|
|
165
|
+
primary_key=primary_key,
|
|
166
|
+
num_retained_versions=num_retained_versions,
|
|
167
|
+
comment=comment,
|
|
168
|
+
media_validation=catalog.MediaValidation.validated(media_validation, 'media_validation'),
|
|
169
|
+
)
|
|
170
|
+
cat.add_tbl(tbl)
|
|
171
|
+
return tbl
|
|
209
172
|
|
|
210
173
|
|
|
211
174
|
def create_view(
|
|
@@ -292,53 +255,53 @@ def create_view(
|
|
|
292
255
|
select_list = base.select_list
|
|
293
256
|
else:
|
|
294
257
|
raise excs.Error('`base` must be an instance of `Table` or `DataFrame`')
|
|
295
|
-
assert isinstance(base, catalog.Table
|
|
258
|
+
assert isinstance(base, (catalog.Table, DataFrame))
|
|
296
259
|
|
|
297
260
|
path = catalog.Path(path_str)
|
|
298
261
|
cat = Catalog.get()
|
|
299
262
|
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
return existing_path
|
|
263
|
+
with Env.get().begin_xact():
|
|
264
|
+
if_exists_ = catalog.IfExistsParam.validated(if_exists, 'if_exists')
|
|
265
|
+
existing = _handle_path_collision(path_str, catalog.View, is_snapshot, if_exists_)
|
|
266
|
+
if existing is not None:
|
|
267
|
+
assert isinstance(existing, catalog.View)
|
|
268
|
+
return existing
|
|
307
269
|
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
if additional_columns is None:
|
|
311
|
-
additional_columns = {}
|
|
312
|
-
else:
|
|
313
|
-
# additional columns should not be in the base table
|
|
314
|
-
for col_name in additional_columns.keys():
|
|
315
|
-
if col_name in [c.name for c in tbl_version_path.columns()]:
|
|
316
|
-
raise excs.Error(
|
|
317
|
-
f'Column {col_name!r} already exists in the base table {tbl_version_path.get_column(col_name).tbl.name}.'
|
|
318
|
-
)
|
|
319
|
-
if iterator is None:
|
|
320
|
-
iterator_class, iterator_args = None, None
|
|
321
|
-
else:
|
|
322
|
-
iterator_class, iterator_args = iterator
|
|
270
|
+
dir = cat.get_schema_object(str(path.parent), expected=catalog.Dir, raise_if_not_exists=True)
|
|
271
|
+
assert dir is not None
|
|
323
272
|
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
273
|
+
if additional_columns is None:
|
|
274
|
+
additional_columns = {}
|
|
275
|
+
else:
|
|
276
|
+
# additional columns should not be in the base table
|
|
277
|
+
for col_name in additional_columns:
|
|
278
|
+
if col_name in [c.name for c in tbl_version_path.columns()]:
|
|
279
|
+
raise excs.Error(
|
|
280
|
+
f'Column {col_name!r} already exists in the base table '
|
|
281
|
+
f'{tbl_version_path.get_column(col_name).tbl.get().name}.'
|
|
282
|
+
)
|
|
283
|
+
if iterator is None:
|
|
284
|
+
iterator_class, iterator_args = None, None
|
|
285
|
+
else:
|
|
286
|
+
iterator_class, iterator_args = iterator
|
|
287
|
+
|
|
288
|
+
view = catalog.View._create(
|
|
289
|
+
dir._id,
|
|
290
|
+
path.name,
|
|
291
|
+
base=tbl_version_path,
|
|
292
|
+
select_list=select_list,
|
|
293
|
+
additional_columns=additional_columns,
|
|
294
|
+
predicate=where,
|
|
295
|
+
is_snapshot=is_snapshot,
|
|
296
|
+
iterator_cls=iterator_class,
|
|
297
|
+
iterator_args=iterator_args,
|
|
298
|
+
num_retained_versions=num_retained_versions,
|
|
299
|
+
comment=comment,
|
|
300
|
+
media_validation=catalog.MediaValidation.validated(media_validation, 'media_validation'),
|
|
301
|
+
)
|
|
302
|
+
FileCache.get().emit_eviction_warnings()
|
|
303
|
+
cat.add_tbl(view)
|
|
304
|
+
return view
|
|
342
305
|
|
|
343
306
|
|
|
344
307
|
def create_snapshot(
|
|
@@ -400,7 +363,9 @@ def create_snapshot(
|
|
|
400
363
|
if `my_snapshot` does not already exist:
|
|
401
364
|
|
|
402
365
|
>>> view = pxt.get_table('my_view')
|
|
403
|
-
... snapshot = pxt.create_snapshot(
|
|
366
|
+
... snapshot = pxt.create_snapshot(
|
|
367
|
+
... 'my_snapshot', view, additional_columns={'col3': pxt.Int}, if_exists='ignore'
|
|
368
|
+
... )
|
|
404
369
|
|
|
405
370
|
Create a snapshot `my_snapshot` on a table `my_table`, and replace any existing snapshot named `my_snapshot`:
|
|
406
371
|
|
|
@@ -445,11 +410,11 @@ def get_table(path: str) -> catalog.Table:
|
|
|
445
410
|
|
|
446
411
|
>>> tbl = pxt.get_table('my_snapshot')
|
|
447
412
|
"""
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
413
|
+
with Env.get().begin_xact():
|
|
414
|
+
obj = Catalog.get().get_schema_object(path, expected=catalog.Table, raise_if_not_exists=True)
|
|
415
|
+
assert isinstance(obj, catalog.Table)
|
|
416
|
+
obj.ensure_md_loaded()
|
|
417
|
+
return obj
|
|
453
418
|
|
|
454
419
|
|
|
455
420
|
def move(path: str, new_path: str) -> None:
|
|
@@ -471,14 +436,14 @@ def move(path: str, new_path: str) -> None:
|
|
|
471
436
|
|
|
472
437
|
>>>> pxt.move('dir1.my_table', 'dir1.new_name')
|
|
473
438
|
"""
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
439
|
+
cat = Catalog.get()
|
|
440
|
+
with Env.get().begin_xact():
|
|
441
|
+
obj = cat.get_schema_object(path, raise_if_not_exists=True)
|
|
442
|
+
new_p = catalog.Path(new_path)
|
|
443
|
+
dest_dir_path = str(new_p.parent)
|
|
444
|
+
dest_dir = cat.get_schema_object(dest_dir_path, expected=catalog.Dir, raise_if_not_exists=True)
|
|
445
|
+
_ = cat.get_schema_object(new_path, raise_if_exists=True)
|
|
446
|
+
obj._move(new_p.name, dest_dir._id)
|
|
482
447
|
|
|
483
448
|
|
|
484
449
|
def drop_table(
|
|
@@ -518,35 +483,49 @@ def drop_table(
|
|
|
518
483
|
>>> pxt.drop_table('subdir.my_table', force=True)
|
|
519
484
|
"""
|
|
520
485
|
cat = Catalog.get()
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
486
|
+
tbl: Optional[catalog.Table]
|
|
487
|
+
with Env.get().begin_xact():
|
|
488
|
+
if isinstance(table, str):
|
|
489
|
+
_ = catalog.Path(table) # validate path
|
|
490
|
+
if_not_exists_ = catalog.IfNotExistsParam.validated(if_not_exists, 'if_not_exists')
|
|
491
|
+
tbl = cast(
|
|
492
|
+
Optional[catalog.Table],
|
|
493
|
+
cat.get_schema_object(
|
|
494
|
+
table,
|
|
495
|
+
expected=catalog.Table,
|
|
496
|
+
raise_if_not_exists=if_not_exists_ == IfNotExistsParam.ERROR and not force,
|
|
497
|
+
),
|
|
498
|
+
)
|
|
499
|
+
if tbl is None:
|
|
527
500
|
_logger.info(f'Skipped table `{table}` (does not exist).')
|
|
528
501
|
return
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
raise excs.Error(
|
|
533
|
-
f'{tbl} needs to be a {catalog.Table._display_name()} but is a {type(tbl)._display_name()}'
|
|
534
|
-
)
|
|
535
|
-
else:
|
|
536
|
-
tbl = table
|
|
537
|
-
tbl_path_obj = catalog.Path(tbl._path)
|
|
502
|
+
else:
|
|
503
|
+
tbl = table
|
|
504
|
+
_drop_table(tbl, force=force, is_replace=False)
|
|
538
505
|
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
506
|
+
|
|
507
|
+
def _drop_table(tbl: catalog.Table, force: bool, is_replace: bool) -> None:
|
|
508
|
+
cat = Catalog.get()
|
|
509
|
+
view_ids = cat.get_views(tbl._id)
|
|
510
|
+
if len(view_ids) > 0:
|
|
511
|
+
view_paths = [cat.get_tbl_path(id) for id in view_ids]
|
|
542
512
|
if force:
|
|
543
|
-
for
|
|
544
|
-
drop_table(
|
|
513
|
+
for view_path in view_paths:
|
|
514
|
+
drop_table(view_path, force=True)
|
|
545
515
|
else:
|
|
546
|
-
|
|
516
|
+
is_snapshot = tbl._tbl_version_path.is_snapshot()
|
|
517
|
+
obj_type_str = 'Snapshot' if is_snapshot else tbl._display_name().capitalize()
|
|
518
|
+
msg: str
|
|
519
|
+
if is_replace:
|
|
520
|
+
msg = (
|
|
521
|
+
f'{obj_type_str} {tbl._path()} already exists and has dependents: {", ".join(view_paths)}. '
|
|
522
|
+
"Use `if_exists='replace_force'` to replace it."
|
|
523
|
+
)
|
|
524
|
+
else:
|
|
525
|
+
msg = f'{obj_type_str} {tbl._path()} has dependents: {", ".join(view_paths)}'
|
|
526
|
+
raise excs.Error(msg)
|
|
547
527
|
tbl._drop()
|
|
548
|
-
|
|
549
|
-
_logger.info(f'Dropped table `{tbl._path}`.')
|
|
528
|
+
_logger.info(f'Dropped table `{tbl._path()}`.')
|
|
550
529
|
|
|
551
530
|
|
|
552
531
|
def list_tables(dir_path: str = '', recursive: bool = True) -> list[str]:
|
|
@@ -572,19 +551,21 @@ def list_tables(dir_path: str = '', recursive: bool = True) -> list[str]:
|
|
|
572
551
|
|
|
573
552
|
>>> pxt.list_tables('dir1')
|
|
574
553
|
"""
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
554
|
+
_ = catalog.Path(dir_path, empty_is_valid=True) # validate format
|
|
555
|
+
cat = Catalog.get()
|
|
556
|
+
with Env.get().begin_xact():
|
|
557
|
+
dir = cat.get_schema_object(dir_path, expected=catalog.Dir, raise_if_not_exists=True)
|
|
558
|
+
contents = cat.get_dir_contents(dir._id, recursive=recursive)
|
|
559
|
+
return _extract_paths(contents, prefix=dir_path, entry_type=catalog.Table)
|
|
579
560
|
|
|
580
561
|
|
|
581
562
|
def create_dir(
|
|
582
|
-
|
|
563
|
+
path: str, if_exists: Literal['error', 'ignore', 'replace', 'replace_force'] = 'error'
|
|
583
564
|
) -> Optional[catalog.Dir]:
|
|
584
565
|
"""Create a directory.
|
|
585
566
|
|
|
586
567
|
Args:
|
|
587
|
-
|
|
568
|
+
path: Path to the directory.
|
|
588
569
|
if_exists: Directive regarding how to handle if the path already exists.
|
|
589
570
|
Must be one of the following:
|
|
590
571
|
|
|
@@ -594,8 +575,8 @@ def create_dir(
|
|
|
594
575
|
- `'replace_force'`: drop the existing directory and all its children, and create a new one
|
|
595
576
|
|
|
596
577
|
Returns:
|
|
597
|
-
A handle to the newly created directory, or to an already existing directory at the path when
|
|
598
|
-
Please note the existing directory may not be empty.
|
|
578
|
+
A handle to the newly created directory, or to an already existing directory at the path when
|
|
579
|
+
`if_exists='ignore'`. Please note the existing directory may not be empty.
|
|
599
580
|
|
|
600
581
|
Raises:
|
|
601
582
|
Error: If
|
|
@@ -620,38 +601,28 @@ def create_dir(
|
|
|
620
601
|
|
|
621
602
|
>>> pxt.create_dir('my_dir', if_exists='replace_force')
|
|
622
603
|
"""
|
|
623
|
-
|
|
604
|
+
path_obj = catalog.Path(path)
|
|
624
605
|
cat = Catalog.get()
|
|
625
606
|
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
dir_md = schema.DirMd(name=path.name, user=None, additional_md={})
|
|
638
|
-
dir_record = schema.Dir(parent_id=parent._id, md=dataclasses.asdict(dir_md))
|
|
639
|
-
session.add(dir_record)
|
|
640
|
-
session.flush()
|
|
641
|
-
assert dir_record.id is not None
|
|
642
|
-
assert isinstance(dir_record.id, UUID)
|
|
643
|
-
dir = catalog.Dir(dir_record.id, parent._id, path.name)
|
|
644
|
-
cat.paths[path] = dir
|
|
645
|
-
session.commit()
|
|
646
|
-
Env.get().console_logger.info(f'Created directory `{path_str}`.')
|
|
607
|
+
with env.Env.get().begin_xact():
|
|
608
|
+
if_exists_ = catalog.IfExistsParam.validated(if_exists, 'if_exists')
|
|
609
|
+
existing = _handle_path_collision(path, catalog.Dir, False, if_exists_)
|
|
610
|
+
if existing is not None:
|
|
611
|
+
assert isinstance(existing, catalog.Dir)
|
|
612
|
+
return existing
|
|
613
|
+
|
|
614
|
+
parent = cat.get_schema_object(str(path_obj.parent))
|
|
615
|
+
assert parent is not None
|
|
616
|
+
dir = catalog.Dir._create(parent._id, path_obj.name)
|
|
617
|
+
Env.get().console_logger.info(f'Created directory {path!r}.')
|
|
647
618
|
return dir
|
|
648
619
|
|
|
649
620
|
|
|
650
|
-
def drop_dir(
|
|
621
|
+
def drop_dir(path: str, force: bool = False, if_not_exists: Literal['error', 'ignore'] = 'error') -> None:
|
|
651
622
|
"""Remove a directory.
|
|
652
623
|
|
|
653
624
|
Args:
|
|
654
|
-
|
|
625
|
+
path: Name or path of the directory.
|
|
655
626
|
force: If `True`, will also drop all tables and subdirectories of this directory, recursively, along
|
|
656
627
|
with any views or snapshots that depend on any of the dropped tables.
|
|
657
628
|
if_not_exists: Directive regarding how to handle if the path does not exist.
|
|
@@ -684,47 +655,59 @@ def drop_dir(path_str: str, force: bool = False, if_not_exists: Literal['error',
|
|
|
684
655
|
|
|
685
656
|
>>> pxt.drop_dir('my_dir', force=True)
|
|
686
657
|
"""
|
|
658
|
+
_ = catalog.Path(path) # validate format
|
|
687
659
|
cat = Catalog.get()
|
|
688
|
-
|
|
689
|
-
|
|
690
|
-
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
return
|
|
695
|
-
else:
|
|
696
|
-
raise excs.Error(f'Directory `{path_str}` does not exist.')
|
|
697
|
-
|
|
698
|
-
if not isinstance(obj, catalog.Dir):
|
|
699
|
-
raise excs.Error(
|
|
700
|
-
f'{str(path)} needs to be a {catalog.Dir._display_name()} but is a {type(obj)._display_name()}'
|
|
660
|
+
if_not_exists_ = catalog.IfNotExistsParam.validated(if_not_exists, 'if_not_exists')
|
|
661
|
+
with Env.get().begin_xact():
|
|
662
|
+
dir = cat.get_schema_object(
|
|
663
|
+
path,
|
|
664
|
+
expected=catalog.Dir,
|
|
665
|
+
raise_if_not_exists=if_not_exists_ == catalog.IfNotExistsParam.ERROR and not force,
|
|
701
666
|
)
|
|
667
|
+
if dir is None:
|
|
668
|
+
_logger.info(f'Directory {path!r} does not exist, skipped drop_dir().')
|
|
669
|
+
return
|
|
670
|
+
_drop_dir(dir._id, path, force=force)
|
|
702
671
|
|
|
703
|
-
children = cat.paths.get_children(path, child_type=None, recursive=True)
|
|
704
|
-
|
|
705
|
-
if len(children) > 0 and not force:
|
|
706
|
-
raise excs.Error(f'Directory `{path_str}` is not empty.')
|
|
707
|
-
|
|
708
|
-
for child in children:
|
|
709
|
-
assert isinstance(child, catalog.Path)
|
|
710
|
-
# We need to check that the child is still in `cat.paths`, since it is possible it was
|
|
711
|
-
# already deleted as a dependent of a preceding child in the iteration.
|
|
712
|
-
try:
|
|
713
|
-
obj = cat.paths[child]
|
|
714
|
-
except excs.Error:
|
|
715
|
-
continue
|
|
716
|
-
if isinstance(obj, catalog.Dir):
|
|
717
|
-
drop_dir(str(child), force=True)
|
|
718
|
-
else:
|
|
719
|
-
assert isinstance(obj, catalog.Table)
|
|
720
|
-
assert not obj._is_dropped # else it should have been removed from `cat.paths` already
|
|
721
|
-
drop_table(str(child), force=True)
|
|
722
672
|
|
|
723
|
-
|
|
724
|
-
|
|
725
|
-
|
|
726
|
-
|
|
727
|
-
|
|
673
|
+
def _drop_dir(dir_id: UUID, path: str, force: bool = False) -> None:
|
|
674
|
+
cat = Catalog.get()
|
|
675
|
+
dir_entries = cat.get_dir_contents(dir_id, recursive=False)
|
|
676
|
+
if len(dir_entries) > 0 and not force:
|
|
677
|
+
raise excs.Error(f'Directory {path!r} is not empty.')
|
|
678
|
+
tbl_paths = [_join_path(path, entry.table.md['name']) for entry in dir_entries.values() if entry.table is not None]
|
|
679
|
+
dir_paths = [_join_path(path, entry.dir.md['name']) for entry in dir_entries.values() if entry.dir is not None]
|
|
680
|
+
|
|
681
|
+
for tbl_path in tbl_paths:
|
|
682
|
+
# check if the table still exists, it might be a view that already got force-deleted
|
|
683
|
+
if cat.get_schema_object(tbl_path, expected=catalog.Table, raise_if_not_exists=False) is not None:
|
|
684
|
+
drop_table(tbl_path, force=True)
|
|
685
|
+
for dir_path in dir_paths:
|
|
686
|
+
drop_dir(dir_path, force=True)
|
|
687
|
+
cat.drop_dir(dir_id)
|
|
688
|
+
_logger.info(f'Removed directory {path!r}.')
|
|
689
|
+
|
|
690
|
+
|
|
691
|
+
def _join_path(path: str, name: str) -> str:
|
|
692
|
+
"""Append name to path, if path is not empty."""
|
|
693
|
+
return f'{path}.{name}' if path else name
|
|
694
|
+
|
|
695
|
+
|
|
696
|
+
def _extract_paths(
|
|
697
|
+
dir_entries: dict[str, Catalog.DirEntry], prefix: str, entry_type: Optional[type[catalog.SchemaObject]] = None
|
|
698
|
+
) -> list[str]:
|
|
699
|
+
"""Convert nested dir_entries structure to a flattened list of paths."""
|
|
700
|
+
matches: list[str]
|
|
701
|
+
if entry_type is None:
|
|
702
|
+
matches = list(dir_entries.keys())
|
|
703
|
+
elif entry_type is catalog.Dir:
|
|
704
|
+
matches = [name for name, entry in dir_entries.items() if entry.dir is not None]
|
|
705
|
+
else:
|
|
706
|
+
matches = [name for name, entry in dir_entries.items() if entry.table is not None]
|
|
707
|
+
result = [_join_path(prefix, name) for name in matches]
|
|
708
|
+
for name, entry in [(name, entry) for name, entry in dir_entries.items() if len(entry.dir_entries) > 0]:
|
|
709
|
+
result.extend(_extract_paths(entry.dir_entries, prefix=_join_path(prefix, name), entry_type=entry_type))
|
|
710
|
+
return result
|
|
728
711
|
|
|
729
712
|
|
|
730
713
|
def publish_snapshot(dest_uri: str, table: catalog.Table) -> None:
|
|
@@ -751,9 +734,12 @@ def list_dirs(path_str: str = '', recursive: bool = True) -> list[str]:
|
|
|
751
734
|
>>> cl.list_dirs('my_dir', recursive=True)
|
|
752
735
|
['my_dir', 'my_dir.sub_dir1']
|
|
753
736
|
"""
|
|
754
|
-
|
|
755
|
-
Catalog.get()
|
|
756
|
-
|
|
737
|
+
_ = catalog.Path(path_str, empty_is_valid=True) # validate format
|
|
738
|
+
cat = Catalog.get()
|
|
739
|
+
with Env.get().begin_xact():
|
|
740
|
+
dir = cat.get_schema_object(path_str, expected=catalog.Dir, raise_if_not_exists=True)
|
|
741
|
+
contents = cat.get_dir_contents(dir._id, recursive=recursive)
|
|
742
|
+
return _extract_paths(contents, prefix=path_str, entry_type=catalog.Dir)
|
|
757
743
|
|
|
758
744
|
|
|
759
745
|
def list_functions() -> Styler:
|
|
@@ -780,7 +766,7 @@ def list_functions() -> Styler:
|
|
|
780
766
|
}
|
|
781
767
|
)
|
|
782
768
|
pd_df = pd_df.style.set_properties(None, **{'text-align': 'left'}).set_table_styles(
|
|
783
|
-
[
|
|
769
|
+
[{'selector': 'th', 'props': [('text-align', 'center')]}]
|
|
784
770
|
) # center-align headings
|
|
785
771
|
return pd_df.hide(axis='index')
|
|
786
772
|
|