pixeltable 0.3.7__py3-none-any.whl → 0.3.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__version__.py +2 -2
- pixeltable/catalog/catalog.py +509 -103
- pixeltable/catalog/column.py +1 -0
- pixeltable/catalog/dir.py +15 -6
- pixeltable/catalog/path.py +15 -0
- pixeltable/catalog/schema_object.py +7 -12
- pixeltable/catalog/table.py +3 -12
- pixeltable/catalog/table_version.py +5 -0
- pixeltable/catalog/view.py +0 -4
- pixeltable/env.py +14 -8
- pixeltable/exprs/__init__.py +2 -0
- pixeltable/exprs/arithmetic_expr.py +7 -11
- pixeltable/exprs/array_slice.py +1 -1
- pixeltable/exprs/column_property_ref.py +3 -3
- pixeltable/exprs/column_ref.py +5 -6
- pixeltable/exprs/comparison.py +2 -5
- pixeltable/exprs/compound_predicate.py +4 -4
- pixeltable/exprs/expr.py +32 -19
- pixeltable/exprs/expr_dict.py +3 -3
- pixeltable/exprs/expr_set.py +1 -1
- pixeltable/exprs/function_call.py +28 -41
- pixeltable/exprs/globals.py +3 -3
- pixeltable/exprs/in_predicate.py +1 -1
- pixeltable/exprs/inline_expr.py +3 -3
- pixeltable/exprs/is_null.py +1 -1
- pixeltable/exprs/json_mapper.py +5 -5
- pixeltable/exprs/json_path.py +27 -15
- pixeltable/exprs/literal.py +1 -1
- pixeltable/exprs/method_ref.py +2 -2
- pixeltable/exprs/row_builder.py +3 -5
- pixeltable/exprs/rowid_ref.py +4 -7
- pixeltable/exprs/similarity_expr.py +5 -5
- pixeltable/exprs/sql_element_cache.py +1 -1
- pixeltable/exprs/type_cast.py +2 -3
- pixeltable/exprs/variable.py +2 -2
- pixeltable/ext/__init__.py +2 -0
- pixeltable/ext/functions/__init__.py +2 -0
- pixeltable/ext/functions/yolox.py +3 -3
- pixeltable/func/__init__.py +2 -0
- pixeltable/func/aggregate_function.py +9 -9
- pixeltable/func/callable_function.py +7 -5
- pixeltable/func/expr_template_function.py +6 -16
- pixeltable/func/function.py +10 -8
- pixeltable/func/function_registry.py +1 -3
- pixeltable/func/query_template_function.py +8 -24
- pixeltable/func/signature.py +23 -22
- pixeltable/func/tools.py +3 -3
- pixeltable/func/udf.py +5 -3
- pixeltable/globals.py +118 -260
- pixeltable/share/__init__.py +2 -0
- pixeltable/share/packager.py +3 -3
- pixeltable/share/publish.py +3 -5
- pixeltable/utils/coco.py +4 -4
- pixeltable/utils/console_output.py +1 -3
- pixeltable/utils/coroutine.py +41 -0
- pixeltable/utils/description_helper.py +1 -1
- pixeltable/utils/documents.py +3 -3
- pixeltable/utils/filecache.py +18 -8
- pixeltable/utils/formatter.py +2 -3
- pixeltable/utils/media_store.py +1 -1
- pixeltable/utils/pytorch.py +1 -1
- pixeltable/utils/sql.py +4 -4
- pixeltable/utils/transactional_directory.py +2 -1
- {pixeltable-0.3.7.dist-info → pixeltable-0.3.9.dist-info}/METADATA +1 -1
- {pixeltable-0.3.7.dist-info → pixeltable-0.3.9.dist-info}/RECORD +68 -67
- {pixeltable-0.3.7.dist-info → pixeltable-0.3.9.dist-info}/LICENSE +0 -0
- {pixeltable-0.3.7.dist-info → pixeltable-0.3.9.dist-info}/WHEEL +0 -0
- {pixeltable-0.3.7.dist-info → pixeltable-0.3.9.dist-info}/entry_points.txt +0 -0
pixeltable/globals.py
CHANGED
|
@@ -1,17 +1,15 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
import urllib.parse
|
|
3
|
-
from typing import Any, Iterable, Literal, Optional, Union
|
|
4
|
-
from uuid import UUID
|
|
3
|
+
from typing import Any, Iterable, Literal, Optional, Union
|
|
5
4
|
|
|
6
5
|
import pandas as pd
|
|
7
6
|
from pandas.io.formats.style import Styler
|
|
8
7
|
|
|
9
|
-
from pixeltable import DataFrame, catalog,
|
|
10
|
-
from pixeltable.catalog import Catalog,
|
|
8
|
+
from pixeltable import DataFrame, catalog, exceptions as excs, exprs, func, share
|
|
9
|
+
from pixeltable.catalog import Catalog, TableVersionPath
|
|
11
10
|
from pixeltable.dataframe import DataFrameResultSet
|
|
12
11
|
from pixeltable.env import Env
|
|
13
12
|
from pixeltable.iterators import ComponentIterator
|
|
14
|
-
from pixeltable.utils.filecache import FileCache
|
|
15
13
|
|
|
16
14
|
_logger = logging.getLogger('pixeltable')
|
|
17
15
|
|
|
@@ -21,44 +19,8 @@ def init() -> None:
|
|
|
21
19
|
_ = Catalog.get()
|
|
22
20
|
|
|
23
21
|
|
|
24
|
-
def _handle_path_collision(
|
|
25
|
-
path: str, expected_obj_type: type[catalog.SchemaObject], expected_snapshot: bool, if_exists: catalog.IfExistsParam
|
|
26
|
-
) -> Optional[catalog.SchemaObject]:
|
|
27
|
-
cat = Catalog.get()
|
|
28
|
-
obj: Optional[catalog.SchemaObject]
|
|
29
|
-
if if_exists == catalog.IfExistsParam.ERROR:
|
|
30
|
-
_ = cat.get_schema_object(path, raise_if_exists=True)
|
|
31
|
-
obj = None
|
|
32
|
-
else:
|
|
33
|
-
obj = cat.get_schema_object(path)
|
|
34
|
-
is_snapshot = isinstance(obj, catalog.View) and obj._tbl_version_path.is_snapshot()
|
|
35
|
-
if obj is not None and (not isinstance(obj, expected_obj_type) or (expected_snapshot and not is_snapshot)):
|
|
36
|
-
obj_type_str = 'snapshot' if expected_snapshot else expected_obj_type._display_name()
|
|
37
|
-
raise excs.Error(
|
|
38
|
-
f'Path {path!r} already exists but is not a {obj_type_str}. Cannot {if_exists.name.lower()} it.'
|
|
39
|
-
)
|
|
40
|
-
if obj is None:
|
|
41
|
-
return None
|
|
42
|
-
|
|
43
|
-
if if_exists == IfExistsParam.IGNORE:
|
|
44
|
-
return obj
|
|
45
|
-
|
|
46
|
-
# drop the existing schema object
|
|
47
|
-
if isinstance(obj, catalog.Dir):
|
|
48
|
-
dir_contents = cat.get_dir_contents(obj._id)
|
|
49
|
-
if len(dir_contents) > 0 and if_exists == IfExistsParam.REPLACE:
|
|
50
|
-
raise excs.Error(
|
|
51
|
-
f'Directory {path!r} already exists and is not empty. Use `if_exists="replace_force"` to replace it.'
|
|
52
|
-
)
|
|
53
|
-
_drop_dir(obj._id, path, force=True)
|
|
54
|
-
else:
|
|
55
|
-
assert isinstance(obj, catalog.Table)
|
|
56
|
-
_drop_table(obj, force=if_exists == IfExistsParam.REPLACE_FORCE, is_replace=True)
|
|
57
|
-
return None
|
|
58
|
-
|
|
59
|
-
|
|
60
22
|
def create_table(
|
|
61
|
-
|
|
23
|
+
path: str,
|
|
62
24
|
schema_or_df: Union[dict[str, Any], DataFrame],
|
|
63
25
|
*,
|
|
64
26
|
primary_key: Optional[Union[str, list[str]]] = None,
|
|
@@ -70,7 +32,7 @@ def create_table(
|
|
|
70
32
|
"""Create a new base table.
|
|
71
33
|
|
|
72
34
|
Args:
|
|
73
|
-
|
|
35
|
+
path: Path to the table.
|
|
74
36
|
schema_or_df: Either a dictionary that maps column names to column types, or a
|
|
75
37
|
[`DataFrame`][pixeltable.DataFrame] whose contents and schema will be used to pre-populate the table.
|
|
76
38
|
primary_key: An optional column name or list of column names to use as the primary key(s) of the
|
|
@@ -120,59 +82,48 @@ def create_table(
|
|
|
120
82
|
|
|
121
83
|
>>> tbl = pxt.create_table('my_table', schema={'col1': pxt.Int, 'col2': pxt.Float}, if_exists='replace')
|
|
122
84
|
"""
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
df: Optional[DataFrame] = None
|
|
137
|
-
if isinstance(schema_or_df, dict):
|
|
138
|
-
schema = schema_or_df
|
|
139
|
-
elif isinstance(schema_or_df, DataFrame):
|
|
140
|
-
df = schema_or_df
|
|
141
|
-
schema = df.schema
|
|
142
|
-
elif isinstance(schema_or_df, DataFrameResultSet):
|
|
143
|
-
raise excs.Error(
|
|
144
|
-
'`schema_or_df` must be either a schema dictionary or a Pixeltable DataFrame. '
|
|
145
|
-
'(Is there an extraneous call to `collect()`?)'
|
|
146
|
-
)
|
|
147
|
-
else:
|
|
148
|
-
raise excs.Error('`schema_or_df` must be either a schema dictionary or a Pixeltable DataFrame.')
|
|
149
|
-
|
|
150
|
-
if len(schema) == 0:
|
|
151
|
-
raise excs.Error(f'Table schema is empty: `{path_str}`')
|
|
152
|
-
|
|
153
|
-
if primary_key is None:
|
|
154
|
-
primary_key = []
|
|
155
|
-
elif isinstance(primary_key, str):
|
|
156
|
-
primary_key = [primary_key]
|
|
157
|
-
elif not isinstance(primary_key, list) or not all(isinstance(pk, str) for pk in primary_key):
|
|
158
|
-
raise excs.Error('primary_key must be a single column name or a list of column names')
|
|
159
|
-
|
|
160
|
-
tbl = catalog.InsertableTable._create(
|
|
161
|
-
dir._id,
|
|
162
|
-
path.name,
|
|
163
|
-
schema,
|
|
164
|
-
df,
|
|
165
|
-
primary_key=primary_key,
|
|
166
|
-
num_retained_versions=num_retained_versions,
|
|
167
|
-
comment=comment,
|
|
168
|
-
media_validation=catalog.MediaValidation.validated(media_validation, 'media_validation'),
|
|
85
|
+
path_obj = catalog.Path(path)
|
|
86
|
+
if_exists_ = catalog.IfExistsParam.validated(if_exists, 'if_exists')
|
|
87
|
+
|
|
88
|
+
df: Optional[DataFrame] = None
|
|
89
|
+
if isinstance(schema_or_df, dict):
|
|
90
|
+
schema = schema_or_df
|
|
91
|
+
elif isinstance(schema_or_df, DataFrame):
|
|
92
|
+
df = schema_or_df
|
|
93
|
+
schema = df.schema
|
|
94
|
+
elif isinstance(schema_or_df, DataFrameResultSet):
|
|
95
|
+
raise excs.Error(
|
|
96
|
+
'`schema_or_df` must be either a schema dictionary or a Pixeltable DataFrame. '
|
|
97
|
+
'(Is there an extraneous call to `collect()`?)'
|
|
169
98
|
)
|
|
170
|
-
|
|
171
|
-
|
|
99
|
+
else:
|
|
100
|
+
raise excs.Error('`schema_or_df` must be either a schema dictionary or a Pixeltable DataFrame.')
|
|
101
|
+
|
|
102
|
+
if len(schema) == 0:
|
|
103
|
+
raise excs.Error(f'Table schema is empty: {path!r}')
|
|
104
|
+
|
|
105
|
+
if primary_key is None:
|
|
106
|
+
primary_key = []
|
|
107
|
+
elif isinstance(primary_key, str):
|
|
108
|
+
primary_key = [primary_key]
|
|
109
|
+
elif not isinstance(primary_key, list) or not all(isinstance(pk, str) for pk in primary_key):
|
|
110
|
+
raise excs.Error('primary_key must be a single column name or a list of column names')
|
|
111
|
+
|
|
112
|
+
media_validation_ = catalog.MediaValidation.validated(media_validation, 'media_validation')
|
|
113
|
+
return Catalog.get().create_table(
|
|
114
|
+
path_obj,
|
|
115
|
+
schema,
|
|
116
|
+
df,
|
|
117
|
+
if_exists=if_exists_,
|
|
118
|
+
primary_key=primary_key,
|
|
119
|
+
comment=comment,
|
|
120
|
+
media_validation=media_validation_,
|
|
121
|
+
num_retained_versions=num_retained_versions,
|
|
122
|
+
)
|
|
172
123
|
|
|
173
124
|
|
|
174
125
|
def create_view(
|
|
175
|
-
|
|
126
|
+
path: str,
|
|
176
127
|
base: Union[catalog.Table, DataFrame],
|
|
177
128
|
*,
|
|
178
129
|
additional_columns: Optional[dict[str, Any]] = None,
|
|
@@ -186,7 +137,7 @@ def create_view(
|
|
|
186
137
|
"""Create a view of an existing table object (which itself can be a view or a snapshot or a base table).
|
|
187
138
|
|
|
188
139
|
Args:
|
|
189
|
-
|
|
140
|
+
path: A name for the view; can be either a simple name such as `my_view`, or a pathname such as
|
|
190
141
|
`dir1.my_view`.
|
|
191
142
|
base: [`Table`][pixeltable.Table] (i.e., table or view or snapshot) or [`DataFrame`][pixeltable.DataFrame] to
|
|
192
143
|
base the view on.
|
|
@@ -242,8 +193,9 @@ def create_view(
|
|
|
242
193
|
>>> tbl = pxt.get_table('my_table')
|
|
243
194
|
... view = pxt.create_view('my_view', tbl.where(tbl.col1 > 100), if_exists='replace_force')
|
|
244
195
|
"""
|
|
245
|
-
|
|
196
|
+
tbl_version_path: TableVersionPath
|
|
246
197
|
select_list: Optional[list[tuple[exprs.Expr, Optional[str]]]] = None
|
|
198
|
+
where: Optional[exprs.Expr] = None
|
|
247
199
|
if isinstance(base, catalog.Table):
|
|
248
200
|
tbl_version_path = base._tbl_version_path
|
|
249
201
|
elif isinstance(base, DataFrame):
|
|
@@ -257,51 +209,34 @@ def create_view(
|
|
|
257
209
|
raise excs.Error('`base` must be an instance of `Table` or `DataFrame`')
|
|
258
210
|
assert isinstance(base, (catalog.Table, DataFrame))
|
|
259
211
|
|
|
260
|
-
|
|
261
|
-
|
|
212
|
+
path_obj = catalog.Path(path)
|
|
213
|
+
if_exists_ = catalog.IfExistsParam.validated(if_exists, 'if_exists')
|
|
214
|
+
media_validation_ = catalog.MediaValidation.validated(media_validation, 'media_validation')
|
|
262
215
|
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
view = catalog.View._create(
|
|
289
|
-
dir._id,
|
|
290
|
-
path.name,
|
|
291
|
-
base=tbl_version_path,
|
|
292
|
-
select_list=select_list,
|
|
293
|
-
additional_columns=additional_columns,
|
|
294
|
-
predicate=where,
|
|
295
|
-
is_snapshot=is_snapshot,
|
|
296
|
-
iterator_cls=iterator_class,
|
|
297
|
-
iterator_args=iterator_args,
|
|
298
|
-
num_retained_versions=num_retained_versions,
|
|
299
|
-
comment=comment,
|
|
300
|
-
media_validation=catalog.MediaValidation.validated(media_validation, 'media_validation'),
|
|
301
|
-
)
|
|
302
|
-
FileCache.get().emit_eviction_warnings()
|
|
303
|
-
cat.add_tbl(view)
|
|
304
|
-
return view
|
|
216
|
+
if additional_columns is None:
|
|
217
|
+
additional_columns = {}
|
|
218
|
+
else:
|
|
219
|
+
# additional columns should not be in the base table
|
|
220
|
+
for col_name in additional_columns:
|
|
221
|
+
if col_name in [c.name for c in tbl_version_path.columns()]:
|
|
222
|
+
raise excs.Error(
|
|
223
|
+
f'Column {col_name!r} already exists in the base table '
|
|
224
|
+
f'{tbl_version_path.get_column(col_name).tbl.get().name}.'
|
|
225
|
+
)
|
|
226
|
+
|
|
227
|
+
return Catalog.get().create_view(
|
|
228
|
+
path_obj,
|
|
229
|
+
tbl_version_path,
|
|
230
|
+
select_list=select_list,
|
|
231
|
+
where=where,
|
|
232
|
+
additional_columns=additional_columns,
|
|
233
|
+
is_snapshot=is_snapshot,
|
|
234
|
+
iterator=iterator,
|
|
235
|
+
num_retained_versions=num_retained_versions,
|
|
236
|
+
comment=comment,
|
|
237
|
+
media_validation=media_validation_,
|
|
238
|
+
if_exists=if_exists_,
|
|
239
|
+
)
|
|
305
240
|
|
|
306
241
|
|
|
307
242
|
def create_snapshot(
|
|
@@ -410,11 +345,8 @@ def get_table(path: str) -> catalog.Table:
|
|
|
410
345
|
|
|
411
346
|
>>> tbl = pxt.get_table('my_snapshot')
|
|
412
347
|
"""
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
assert isinstance(obj, catalog.Table)
|
|
416
|
-
obj.ensure_md_loaded()
|
|
417
|
-
return obj
|
|
348
|
+
path_obj = catalog.Path(path)
|
|
349
|
+
return Catalog.get().get_table(path_obj)
|
|
418
350
|
|
|
419
351
|
|
|
420
352
|
def move(path: str, new_path: str) -> None:
|
|
@@ -436,14 +368,13 @@ def move(path: str, new_path: str) -> None:
|
|
|
436
368
|
|
|
437
369
|
>>>> pxt.move('dir1.my_table', 'dir1.new_name')
|
|
438
370
|
"""
|
|
371
|
+
if path == new_path:
|
|
372
|
+
raise excs.Error('move(): source and destination cannot be identical')
|
|
373
|
+
path_obj, new_path_obj = catalog.Path(path), catalog.Path(new_path)
|
|
374
|
+
if path_obj.is_ancestor(new_path_obj):
|
|
375
|
+
raise excs.Error(f'move(): cannot move {path!r} into its own subdirectory')
|
|
439
376
|
cat = Catalog.get()
|
|
440
|
-
|
|
441
|
-
obj = cat.get_schema_object(path, raise_if_not_exists=True)
|
|
442
|
-
new_p = catalog.Path(new_path)
|
|
443
|
-
dest_dir_path = str(new_p.parent)
|
|
444
|
-
dest_dir = cat.get_schema_object(dest_dir_path, expected=catalog.Dir, raise_if_not_exists=True)
|
|
445
|
-
_ = cat.get_schema_object(new_path, raise_if_exists=True)
|
|
446
|
-
obj._move(new_p.name, dest_dir._id)
|
|
377
|
+
cat.move(path_obj, new_path_obj)
|
|
447
378
|
|
|
448
379
|
|
|
449
380
|
def drop_table(
|
|
@@ -482,50 +413,19 @@ def drop_table(
|
|
|
482
413
|
Drop a table and all its dependents:
|
|
483
414
|
>>> pxt.drop_table('subdir.my_table', force=True)
|
|
484
415
|
"""
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
)
|
|
499
|
-
if tbl is None:
|
|
500
|
-
_logger.info(f'Skipped table `{table}` (does not exist).')
|
|
501
|
-
return
|
|
502
|
-
else:
|
|
503
|
-
tbl = table
|
|
504
|
-
_drop_table(tbl, force=force, is_replace=False)
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
def _drop_table(tbl: catalog.Table, force: bool, is_replace: bool) -> None:
|
|
508
|
-
cat = Catalog.get()
|
|
509
|
-
view_ids = cat.get_views(tbl._id)
|
|
510
|
-
if len(view_ids) > 0:
|
|
511
|
-
view_paths = [cat.get_tbl_path(id) for id in view_ids]
|
|
512
|
-
if force:
|
|
513
|
-
for view_path in view_paths:
|
|
514
|
-
drop_table(view_path, force=True)
|
|
515
|
-
else:
|
|
516
|
-
is_snapshot = tbl._tbl_version_path.is_snapshot()
|
|
517
|
-
obj_type_str = 'Snapshot' if is_snapshot else tbl._display_name().capitalize()
|
|
518
|
-
msg: str
|
|
519
|
-
if is_replace:
|
|
520
|
-
msg = (
|
|
521
|
-
f'{obj_type_str} {tbl._path()} already exists and has dependents: {", ".join(view_paths)}. '
|
|
522
|
-
"Use `if_exists='replace_force'` to replace it."
|
|
523
|
-
)
|
|
524
|
-
else:
|
|
525
|
-
msg = f'{obj_type_str} {tbl._path()} has dependents: {", ".join(view_paths)}'
|
|
526
|
-
raise excs.Error(msg)
|
|
527
|
-
tbl._drop()
|
|
528
|
-
_logger.info(f'Dropped table `{tbl._path()}`.')
|
|
416
|
+
tbl_path: str
|
|
417
|
+
if isinstance(table, catalog.Table):
|
|
418
|
+
# if we're dropping a table by handle, we first need to get the current path, then drop the S lock on
|
|
419
|
+
# the Table record, and then get X locks in the correct order (first containing directory, then table)
|
|
420
|
+
with Env.get().begin_xact():
|
|
421
|
+
tbl_path = table._path()
|
|
422
|
+
else:
|
|
423
|
+
assert isinstance(table, str)
|
|
424
|
+
tbl_path = table
|
|
425
|
+
|
|
426
|
+
path_obj = catalog.Path(tbl_path)
|
|
427
|
+
if_not_exists_ = catalog.IfNotExistsParam.validated(if_not_exists, 'if_not_exists')
|
|
428
|
+
Catalog.get().drop_table(path_obj, force=force, if_not_exists=if_not_exists_)
|
|
529
429
|
|
|
530
430
|
|
|
531
431
|
def list_tables(dir_path: str = '', recursive: bool = True) -> list[str]:
|
|
@@ -551,16 +451,14 @@ def list_tables(dir_path: str = '', recursive: bool = True) -> list[str]:
|
|
|
551
451
|
|
|
552
452
|
>>> pxt.list_tables('dir1')
|
|
553
453
|
"""
|
|
554
|
-
|
|
454
|
+
path_obj = catalog.Path(dir_path, empty_is_valid=True) # validate format
|
|
555
455
|
cat = Catalog.get()
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
contents = cat.get_dir_contents(dir._id, recursive=recursive)
|
|
559
|
-
return _extract_paths(contents, prefix=dir_path, entry_type=catalog.Table)
|
|
456
|
+
contents = cat.get_dir_contents(path_obj, recursive=recursive)
|
|
457
|
+
return [str(p) for p in _extract_paths(contents, parent=path_obj, entry_type=catalog.Table)]
|
|
560
458
|
|
|
561
459
|
|
|
562
460
|
def create_dir(
|
|
563
|
-
path: str, if_exists: Literal['error', 'ignore', 'replace', 'replace_force'] = 'error'
|
|
461
|
+
path: str, if_exists: Literal['error', 'ignore', 'replace', 'replace_force'] = 'error', parents: bool = False
|
|
564
462
|
) -> Optional[catalog.Dir]:
|
|
565
463
|
"""Create a directory.
|
|
566
464
|
|
|
@@ -573,6 +471,7 @@ def create_dir(
|
|
|
573
471
|
- `'ignore'`: do nothing and return the existing directory handle
|
|
574
472
|
- `'replace'`: if the existing directory is empty, drop it and create a new one
|
|
575
473
|
- `'replace_force'`: drop the existing directory and all its children, and create a new one
|
|
474
|
+
parents: Create missing parent directories.
|
|
576
475
|
|
|
577
476
|
Returns:
|
|
578
477
|
A handle to the newly created directory, or to an already existing directory at the path when
|
|
@@ -600,22 +499,14 @@ def create_dir(
|
|
|
600
499
|
Create a directory and replace if it already exists:
|
|
601
500
|
|
|
602
501
|
>>> pxt.create_dir('my_dir', if_exists='replace_force')
|
|
603
|
-
"""
|
|
604
|
-
path_obj = catalog.Path(path)
|
|
605
|
-
cat = Catalog.get()
|
|
606
502
|
|
|
607
|
-
|
|
608
|
-
if_exists_ = catalog.IfExistsParam.validated(if_exists, 'if_exists')
|
|
609
|
-
existing = _handle_path_collision(path, catalog.Dir, False, if_exists_)
|
|
610
|
-
if existing is not None:
|
|
611
|
-
assert isinstance(existing, catalog.Dir)
|
|
612
|
-
return existing
|
|
503
|
+
Create a subdirectory along with its ancestors:
|
|
613
504
|
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
|
|
618
|
-
|
|
505
|
+
>>> pxt.create_dir('parent1.parent2.sub_dir', parents=True)
|
|
506
|
+
"""
|
|
507
|
+
path_obj = catalog.Path(path)
|
|
508
|
+
if_exists_ = catalog.IfExistsParam.validated(if_exists, 'if_exists')
|
|
509
|
+
return Catalog.get().create_dir(path_obj, if_exists=if_exists_, parents=parents)
|
|
619
510
|
|
|
620
511
|
|
|
621
512
|
def drop_dir(path: str, force: bool = False, if_not_exists: Literal['error', 'ignore'] = 'error') -> None:
|
|
@@ -655,47 +546,16 @@ def drop_dir(path: str, force: bool = False, if_not_exists: Literal['error', 'ig
|
|
|
655
546
|
|
|
656
547
|
>>> pxt.drop_dir('my_dir', force=True)
|
|
657
548
|
"""
|
|
658
|
-
|
|
659
|
-
cat = Catalog.get()
|
|
549
|
+
path_obj = catalog.Path(path) # validate format
|
|
660
550
|
if_not_exists_ = catalog.IfNotExistsParam.validated(if_not_exists, 'if_not_exists')
|
|
661
|
-
|
|
662
|
-
dir = cat.get_schema_object(
|
|
663
|
-
path,
|
|
664
|
-
expected=catalog.Dir,
|
|
665
|
-
raise_if_not_exists=if_not_exists_ == catalog.IfNotExistsParam.ERROR and not force,
|
|
666
|
-
)
|
|
667
|
-
if dir is None:
|
|
668
|
-
_logger.info(f'Directory {path!r} does not exist, skipped drop_dir().')
|
|
669
|
-
return
|
|
670
|
-
_drop_dir(dir._id, path, force=force)
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
def _drop_dir(dir_id: UUID, path: str, force: bool = False) -> None:
|
|
674
|
-
cat = Catalog.get()
|
|
675
|
-
dir_entries = cat.get_dir_contents(dir_id, recursive=False)
|
|
676
|
-
if len(dir_entries) > 0 and not force:
|
|
677
|
-
raise excs.Error(f'Directory {path!r} is not empty.')
|
|
678
|
-
tbl_paths = [_join_path(path, entry.table.md['name']) for entry in dir_entries.values() if entry.table is not None]
|
|
679
|
-
dir_paths = [_join_path(path, entry.dir.md['name']) for entry in dir_entries.values() if entry.dir is not None]
|
|
680
|
-
|
|
681
|
-
for tbl_path in tbl_paths:
|
|
682
|
-
# check if the table still exists, it might be a view that already got force-deleted
|
|
683
|
-
if cat.get_schema_object(tbl_path, expected=catalog.Table, raise_if_not_exists=False) is not None:
|
|
684
|
-
drop_table(tbl_path, force=True)
|
|
685
|
-
for dir_path in dir_paths:
|
|
686
|
-
drop_dir(dir_path, force=True)
|
|
687
|
-
cat.drop_dir(dir_id)
|
|
688
|
-
_logger.info(f'Removed directory {path!r}.')
|
|
689
|
-
|
|
690
|
-
|
|
691
|
-
def _join_path(path: str, name: str) -> str:
|
|
692
|
-
"""Append name to path, if path is not empty."""
|
|
693
|
-
return f'{path}.{name}' if path else name
|
|
551
|
+
Catalog.get().drop_dir(path_obj, if_not_exists=if_not_exists_, force=force)
|
|
694
552
|
|
|
695
553
|
|
|
696
554
|
def _extract_paths(
|
|
697
|
-
dir_entries: dict[str, Catalog.DirEntry],
|
|
698
|
-
|
|
555
|
+
dir_entries: dict[str, Catalog.DirEntry],
|
|
556
|
+
parent: catalog.Path,
|
|
557
|
+
entry_type: Optional[type[catalog.SchemaObject]] = None,
|
|
558
|
+
) -> list[catalog.Path]:
|
|
699
559
|
"""Convert nested dir_entries structure to a flattened list of paths."""
|
|
700
560
|
matches: list[str]
|
|
701
561
|
if entry_type is None:
|
|
@@ -704,9 +564,9 @@ def _extract_paths(
|
|
|
704
564
|
matches = [name for name, entry in dir_entries.items() if entry.dir is not None]
|
|
705
565
|
else:
|
|
706
566
|
matches = [name for name, entry in dir_entries.items() if entry.table is not None]
|
|
707
|
-
result = [
|
|
567
|
+
result = [parent.append(name) for name in matches]
|
|
708
568
|
for name, entry in [(name, entry) for name, entry in dir_entries.items() if len(entry.dir_entries) > 0]:
|
|
709
|
-
result.extend(_extract_paths(entry.dir_entries,
|
|
569
|
+
result.extend(_extract_paths(entry.dir_entries, parent=parent.append(name), entry_type=entry_type))
|
|
710
570
|
return result
|
|
711
571
|
|
|
712
572
|
|
|
@@ -717,11 +577,11 @@ def publish_snapshot(dest_uri: str, table: catalog.Table) -> None:
|
|
|
717
577
|
share.publish_snapshot(dest_uri, table)
|
|
718
578
|
|
|
719
579
|
|
|
720
|
-
def list_dirs(
|
|
580
|
+
def list_dirs(path: str = '', recursive: bool = True) -> list[str]:
|
|
721
581
|
"""List the directories in a directory.
|
|
722
582
|
|
|
723
583
|
Args:
|
|
724
|
-
|
|
584
|
+
path: Name or path of the directory.
|
|
725
585
|
recursive: If `True`, lists all descendants of this directory recursively.
|
|
726
586
|
|
|
727
587
|
Returns:
|
|
@@ -734,12 +594,10 @@ def list_dirs(path_str: str = '', recursive: bool = True) -> list[str]:
|
|
|
734
594
|
>>> cl.list_dirs('my_dir', recursive=True)
|
|
735
595
|
['my_dir', 'my_dir.sub_dir1']
|
|
736
596
|
"""
|
|
737
|
-
|
|
597
|
+
path_obj = catalog.Path(path, empty_is_valid=True) # validate format
|
|
738
598
|
cat = Catalog.get()
|
|
739
|
-
|
|
740
|
-
|
|
741
|
-
contents = cat.get_dir_contents(dir._id, recursive=recursive)
|
|
742
|
-
return _extract_paths(contents, prefix=path_str, entry_type=catalog.Dir)
|
|
599
|
+
contents = cat.get_dir_contents(path_obj, recursive=recursive)
|
|
600
|
+
return [str(p) for p in _extract_paths(contents, parent=path_obj, entry_type=catalog.Dir)]
|
|
743
601
|
|
|
744
602
|
|
|
745
603
|
def list_functions() -> Styler:
|
pixeltable/share/__init__.py
CHANGED
pixeltable/share/packager.py
CHANGED
|
@@ -66,8 +66,8 @@ class TablePackager:
|
|
|
66
66
|
'tables': [
|
|
67
67
|
{
|
|
68
68
|
'table_id': str(t._tbl_version.id),
|
|
69
|
-
# These are temporary; will replace with a better solution once the concurrency
|
|
70
|
-
# been merged
|
|
69
|
+
# These are temporary; will replace with a better solution once the concurrency
|
|
70
|
+
# changes to catalog have been merged
|
|
71
71
|
'table_md': dataclasses.asdict(t._tbl_version.get()._create_tbl_md()),
|
|
72
72
|
'table_version_md': dataclasses.asdict(
|
|
73
73
|
t._tbl_version.get()._create_version_md(datetime.now().timestamp())
|
|
@@ -98,7 +98,7 @@ class TablePackager:
|
|
|
98
98
|
for t in ancestors:
|
|
99
99
|
_logger.info(f"Exporting table '{t._path}'.")
|
|
100
100
|
self.__export_table(t)
|
|
101
|
-
_logger.info(
|
|
101
|
+
_logger.info('Building archive.')
|
|
102
102
|
bundle_path = self.__build_tarball()
|
|
103
103
|
_logger.info(f'Packaging complete: {bundle_path}')
|
|
104
104
|
return bundle_path
|
pixeltable/share/publish.py
CHANGED
|
@@ -1,16 +1,14 @@
|
|
|
1
|
-
import dataclasses
|
|
2
1
|
import os
|
|
3
2
|
import sys
|
|
4
3
|
import urllib.parse
|
|
5
4
|
import urllib.request
|
|
6
|
-
from datetime import datetime
|
|
7
5
|
from pathlib import Path
|
|
8
6
|
|
|
9
7
|
import requests
|
|
10
8
|
from tqdm import tqdm
|
|
11
9
|
|
|
12
10
|
import pixeltable as pxt
|
|
13
|
-
from pixeltable import exceptions as excs
|
|
11
|
+
from pixeltable import exceptions as excs
|
|
14
12
|
from pixeltable.env import Env
|
|
15
13
|
from pixeltable.utils import sha256sum
|
|
16
14
|
|
|
@@ -46,7 +44,7 @@ def publish_snapshot(dest_tbl_uri: str, src_tbl: pxt.Table) -> str:
|
|
|
46
44
|
else:
|
|
47
45
|
raise excs.Error(f'Unsupported destination: {destination_uri}')
|
|
48
46
|
|
|
49
|
-
Env.get().console_logger.info(
|
|
47
|
+
Env.get().console_logger.info('Finalizing snapshot ...')
|
|
50
48
|
|
|
51
49
|
finalize_request_json = {
|
|
52
50
|
'upload_id': upload_id,
|
|
@@ -83,7 +81,7 @@ def _upload_bundle_to_s3(bundle: Path, parsed_location: urllib.parse.ParseResult
|
|
|
83
81
|
upload_args = {'ChecksumAlgorithm': 'SHA256'}
|
|
84
82
|
|
|
85
83
|
progress_bar = tqdm(
|
|
86
|
-
desc=
|
|
84
|
+
desc='Uploading',
|
|
87
85
|
total=bundle.stat().st_size,
|
|
88
86
|
unit='B',
|
|
89
87
|
unit_scale=True,
|
pixeltable/utils/coco.py
CHANGED
|
@@ -103,7 +103,7 @@ def write_coco_dataset(df: pxt.DataFrame, dest_path: Path) -> Path:
|
|
|
103
103
|
# create annotation records for this image
|
|
104
104
|
for annotation in input_dict['annotations']:
|
|
105
105
|
ann_id += 1
|
|
106
|
-
|
|
106
|
+
_, _, w, h = annotation['bbox']
|
|
107
107
|
category = annotation['category']
|
|
108
108
|
categories.add(category)
|
|
109
109
|
annotations.append(
|
|
@@ -119,7 +119,7 @@ def write_coco_dataset(df: pxt.DataFrame, dest_path: Path) -> Path:
|
|
|
119
119
|
)
|
|
120
120
|
|
|
121
121
|
# replace category names with ids
|
|
122
|
-
category_ids = {category: id for id, category in enumerate(sorted(
|
|
122
|
+
category_ids = {category: id for id, category in enumerate(sorted(categories))}
|
|
123
123
|
for annotation in annotations:
|
|
124
124
|
annotation['category_id'] = category_ids[annotation['category_id']]
|
|
125
125
|
|
|
@@ -129,8 +129,8 @@ def write_coco_dataset(df: pxt.DataFrame, dest_path: Path) -> Path:
|
|
|
129
129
|
'categories': [{'id': id, 'name': category} for category, id in category_ids.items()],
|
|
130
130
|
}
|
|
131
131
|
output_path = dest_path / 'data.json'
|
|
132
|
-
with open(output_path, 'w') as
|
|
133
|
-
json.dump(result,
|
|
132
|
+
with open(output_path, 'w', encoding='utf-8') as fp:
|
|
133
|
+
json.dump(result, fp)
|
|
134
134
|
return output_path
|
|
135
135
|
|
|
136
136
|
|
|
@@ -34,9 +34,7 @@ class ConsoleOutputHandler(logging.StreamHandler):
|
|
|
34
34
|
|
|
35
35
|
class ConsoleMessageFilter(logging.Filter):
|
|
36
36
|
def filter(self, record: logging.LogRecord) -> bool:
|
|
37
|
-
|
|
38
|
-
return True
|
|
39
|
-
return False
|
|
37
|
+
return getattr(record, 'user_visible', False)
|
|
40
38
|
|
|
41
39
|
|
|
42
40
|
class ConsoleLogger(logging.LoggerAdapter):
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import threading
|
|
3
|
+
from concurrent.futures import ThreadPoolExecutor
|
|
4
|
+
from typing import Any, Coroutine, TypeVar
|
|
5
|
+
|
|
6
|
+
T = TypeVar('T')
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
# TODO This is a temporary hack to be able to run async UDFs in contexts that are not properly handled by the existing
|
|
10
|
+
# scheduler logic (e.g., inside the eval loop of a JsonMapper). Once the scheduler is fully general, it can be
|
|
11
|
+
# removed.
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def run_coroutine_synchronously(coroutine: Coroutine[Any, Any, T], timeout: float = 30) -> T:
|
|
15
|
+
"""
|
|
16
|
+
Runs the given coroutine synchronously, even if called in the context of a running event loop.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
def run_in_new_loop():
|
|
20
|
+
new_loop = asyncio.new_event_loop()
|
|
21
|
+
asyncio.set_event_loop(new_loop)
|
|
22
|
+
try:
|
|
23
|
+
return new_loop.run_until_complete(coroutine)
|
|
24
|
+
finally:
|
|
25
|
+
new_loop.close()
|
|
26
|
+
|
|
27
|
+
try:
|
|
28
|
+
loop = asyncio.get_running_loop()
|
|
29
|
+
except RuntimeError:
|
|
30
|
+
# No event loop; just call `asyncio.run()`
|
|
31
|
+
return asyncio.run(coroutine)
|
|
32
|
+
|
|
33
|
+
if threading.current_thread() is threading.main_thread():
|
|
34
|
+
if not loop.is_running():
|
|
35
|
+
return loop.run_until_complete(coroutine)
|
|
36
|
+
else:
|
|
37
|
+
with ThreadPoolExecutor() as pool:
|
|
38
|
+
future = pool.submit(run_in_new_loop)
|
|
39
|
+
return future.result(timeout=timeout)
|
|
40
|
+
else:
|
|
41
|
+
return asyncio.run_coroutine_threadsafe(coroutine, loop).result()
|