pixeltable 0.3.7__py3-none-any.whl → 0.3.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (68) hide show
  1. pixeltable/__version__.py +2 -2
  2. pixeltable/catalog/catalog.py +509 -103
  3. pixeltable/catalog/column.py +1 -0
  4. pixeltable/catalog/dir.py +15 -6
  5. pixeltable/catalog/path.py +15 -0
  6. pixeltable/catalog/schema_object.py +7 -12
  7. pixeltable/catalog/table.py +3 -12
  8. pixeltable/catalog/table_version.py +5 -0
  9. pixeltable/catalog/view.py +0 -4
  10. pixeltable/env.py +14 -8
  11. pixeltable/exprs/__init__.py +2 -0
  12. pixeltable/exprs/arithmetic_expr.py +7 -11
  13. pixeltable/exprs/array_slice.py +1 -1
  14. pixeltable/exprs/column_property_ref.py +3 -3
  15. pixeltable/exprs/column_ref.py +5 -6
  16. pixeltable/exprs/comparison.py +2 -5
  17. pixeltable/exprs/compound_predicate.py +4 -4
  18. pixeltable/exprs/expr.py +32 -19
  19. pixeltable/exprs/expr_dict.py +3 -3
  20. pixeltable/exprs/expr_set.py +1 -1
  21. pixeltable/exprs/function_call.py +28 -41
  22. pixeltable/exprs/globals.py +3 -3
  23. pixeltable/exprs/in_predicate.py +1 -1
  24. pixeltable/exprs/inline_expr.py +3 -3
  25. pixeltable/exprs/is_null.py +1 -1
  26. pixeltable/exprs/json_mapper.py +5 -5
  27. pixeltable/exprs/json_path.py +27 -15
  28. pixeltable/exprs/literal.py +1 -1
  29. pixeltable/exprs/method_ref.py +2 -2
  30. pixeltable/exprs/row_builder.py +3 -5
  31. pixeltable/exprs/rowid_ref.py +4 -7
  32. pixeltable/exprs/similarity_expr.py +5 -5
  33. pixeltable/exprs/sql_element_cache.py +1 -1
  34. pixeltable/exprs/type_cast.py +2 -3
  35. pixeltable/exprs/variable.py +2 -2
  36. pixeltable/ext/__init__.py +2 -0
  37. pixeltable/ext/functions/__init__.py +2 -0
  38. pixeltable/ext/functions/yolox.py +3 -3
  39. pixeltable/func/__init__.py +2 -0
  40. pixeltable/func/aggregate_function.py +9 -9
  41. pixeltable/func/callable_function.py +7 -5
  42. pixeltable/func/expr_template_function.py +6 -16
  43. pixeltable/func/function.py +10 -8
  44. pixeltable/func/function_registry.py +1 -3
  45. pixeltable/func/query_template_function.py +8 -24
  46. pixeltable/func/signature.py +23 -22
  47. pixeltable/func/tools.py +3 -3
  48. pixeltable/func/udf.py +5 -3
  49. pixeltable/globals.py +118 -260
  50. pixeltable/share/__init__.py +2 -0
  51. pixeltable/share/packager.py +3 -3
  52. pixeltable/share/publish.py +3 -5
  53. pixeltable/utils/coco.py +4 -4
  54. pixeltable/utils/console_output.py +1 -3
  55. pixeltable/utils/coroutine.py +41 -0
  56. pixeltable/utils/description_helper.py +1 -1
  57. pixeltable/utils/documents.py +3 -3
  58. pixeltable/utils/filecache.py +18 -8
  59. pixeltable/utils/formatter.py +2 -3
  60. pixeltable/utils/media_store.py +1 -1
  61. pixeltable/utils/pytorch.py +1 -1
  62. pixeltable/utils/sql.py +4 -4
  63. pixeltable/utils/transactional_directory.py +2 -1
  64. {pixeltable-0.3.7.dist-info → pixeltable-0.3.9.dist-info}/METADATA +1 -1
  65. {pixeltable-0.3.7.dist-info → pixeltable-0.3.9.dist-info}/RECORD +68 -67
  66. {pixeltable-0.3.7.dist-info → pixeltable-0.3.9.dist-info}/LICENSE +0 -0
  67. {pixeltable-0.3.7.dist-info → pixeltable-0.3.9.dist-info}/WHEEL +0 -0
  68. {pixeltable-0.3.7.dist-info → pixeltable-0.3.9.dist-info}/entry_points.txt +0 -0
pixeltable/globals.py CHANGED
@@ -1,17 +1,15 @@
1
1
  import logging
2
2
  import urllib.parse
3
- from typing import Any, Iterable, Literal, Optional, Union, cast
4
- from uuid import UUID
3
+ from typing import Any, Iterable, Literal, Optional, Union
5
4
 
6
5
  import pandas as pd
7
6
  from pandas.io.formats.style import Styler
8
7
 
9
- from pixeltable import DataFrame, catalog, env, exceptions as excs, exprs, func, share
10
- from pixeltable.catalog import Catalog, IfExistsParam, IfNotExistsParam
8
+ from pixeltable import DataFrame, catalog, exceptions as excs, exprs, func, share
9
+ from pixeltable.catalog import Catalog, TableVersionPath
11
10
  from pixeltable.dataframe import DataFrameResultSet
12
11
  from pixeltable.env import Env
13
12
  from pixeltable.iterators import ComponentIterator
14
- from pixeltable.utils.filecache import FileCache
15
13
 
16
14
  _logger = logging.getLogger('pixeltable')
17
15
 
@@ -21,44 +19,8 @@ def init() -> None:
21
19
  _ = Catalog.get()
22
20
 
23
21
 
24
- def _handle_path_collision(
25
- path: str, expected_obj_type: type[catalog.SchemaObject], expected_snapshot: bool, if_exists: catalog.IfExistsParam
26
- ) -> Optional[catalog.SchemaObject]:
27
- cat = Catalog.get()
28
- obj: Optional[catalog.SchemaObject]
29
- if if_exists == catalog.IfExistsParam.ERROR:
30
- _ = cat.get_schema_object(path, raise_if_exists=True)
31
- obj = None
32
- else:
33
- obj = cat.get_schema_object(path)
34
- is_snapshot = isinstance(obj, catalog.View) and obj._tbl_version_path.is_snapshot()
35
- if obj is not None and (not isinstance(obj, expected_obj_type) or (expected_snapshot and not is_snapshot)):
36
- obj_type_str = 'snapshot' if expected_snapshot else expected_obj_type._display_name()
37
- raise excs.Error(
38
- f'Path {path!r} already exists but is not a {obj_type_str}. Cannot {if_exists.name.lower()} it.'
39
- )
40
- if obj is None:
41
- return None
42
-
43
- if if_exists == IfExistsParam.IGNORE:
44
- return obj
45
-
46
- # drop the existing schema object
47
- if isinstance(obj, catalog.Dir):
48
- dir_contents = cat.get_dir_contents(obj._id)
49
- if len(dir_contents) > 0 and if_exists == IfExistsParam.REPLACE:
50
- raise excs.Error(
51
- f'Directory {path!r} already exists and is not empty. Use `if_exists="replace_force"` to replace it.'
52
- )
53
- _drop_dir(obj._id, path, force=True)
54
- else:
55
- assert isinstance(obj, catalog.Table)
56
- _drop_table(obj, force=if_exists == IfExistsParam.REPLACE_FORCE, is_replace=True)
57
- return None
58
-
59
-
60
22
  def create_table(
61
- path_str: str,
23
+ path: str,
62
24
  schema_or_df: Union[dict[str, Any], DataFrame],
63
25
  *,
64
26
  primary_key: Optional[Union[str, list[str]]] = None,
@@ -70,7 +32,7 @@ def create_table(
70
32
  """Create a new base table.
71
33
 
72
34
  Args:
73
- path_str: Path to the table.
35
+ path: Path to the table.
74
36
  schema_or_df: Either a dictionary that maps column names to column types, or a
75
37
  [`DataFrame`][pixeltable.DataFrame] whose contents and schema will be used to pre-populate the table.
76
38
  primary_key: An optional column name or list of column names to use as the primary key(s) of the
@@ -120,59 +82,48 @@ def create_table(
120
82
 
121
83
  >>> tbl = pxt.create_table('my_table', schema={'col1': pxt.Int, 'col2': pxt.Float}, if_exists='replace')
122
84
  """
123
- path = catalog.Path(path_str)
124
- cat = Catalog.get()
125
-
126
- with env.Env.get().begin_xact():
127
- if_exists_ = catalog.IfExistsParam.validated(if_exists, 'if_exists')
128
- existing = _handle_path_collision(path_str, catalog.InsertableTable, False, if_exists_)
129
- if existing is not None:
130
- assert isinstance(existing, catalog.Table)
131
- return existing
132
-
133
- dir = cat.get_schema_object(str(path.parent), expected=catalog.Dir, raise_if_not_exists=True)
134
- assert dir is not None
135
-
136
- df: Optional[DataFrame] = None
137
- if isinstance(schema_or_df, dict):
138
- schema = schema_or_df
139
- elif isinstance(schema_or_df, DataFrame):
140
- df = schema_or_df
141
- schema = df.schema
142
- elif isinstance(schema_or_df, DataFrameResultSet):
143
- raise excs.Error(
144
- '`schema_or_df` must be either a schema dictionary or a Pixeltable DataFrame. '
145
- '(Is there an extraneous call to `collect()`?)'
146
- )
147
- else:
148
- raise excs.Error('`schema_or_df` must be either a schema dictionary or a Pixeltable DataFrame.')
149
-
150
- if len(schema) == 0:
151
- raise excs.Error(f'Table schema is empty: `{path_str}`')
152
-
153
- if primary_key is None:
154
- primary_key = []
155
- elif isinstance(primary_key, str):
156
- primary_key = [primary_key]
157
- elif not isinstance(primary_key, list) or not all(isinstance(pk, str) for pk in primary_key):
158
- raise excs.Error('primary_key must be a single column name or a list of column names')
159
-
160
- tbl = catalog.InsertableTable._create(
161
- dir._id,
162
- path.name,
163
- schema,
164
- df,
165
- primary_key=primary_key,
166
- num_retained_versions=num_retained_versions,
167
- comment=comment,
168
- media_validation=catalog.MediaValidation.validated(media_validation, 'media_validation'),
85
+ path_obj = catalog.Path(path)
86
+ if_exists_ = catalog.IfExistsParam.validated(if_exists, 'if_exists')
87
+
88
+ df: Optional[DataFrame] = None
89
+ if isinstance(schema_or_df, dict):
90
+ schema = schema_or_df
91
+ elif isinstance(schema_or_df, DataFrame):
92
+ df = schema_or_df
93
+ schema = df.schema
94
+ elif isinstance(schema_or_df, DataFrameResultSet):
95
+ raise excs.Error(
96
+ '`schema_or_df` must be either a schema dictionary or a Pixeltable DataFrame. '
97
+ '(Is there an extraneous call to `collect()`?)'
169
98
  )
170
- cat.add_tbl(tbl)
171
- return tbl
99
+ else:
100
+ raise excs.Error('`schema_or_df` must be either a schema dictionary or a Pixeltable DataFrame.')
101
+
102
+ if len(schema) == 0:
103
+ raise excs.Error(f'Table schema is empty: {path!r}')
104
+
105
+ if primary_key is None:
106
+ primary_key = []
107
+ elif isinstance(primary_key, str):
108
+ primary_key = [primary_key]
109
+ elif not isinstance(primary_key, list) or not all(isinstance(pk, str) for pk in primary_key):
110
+ raise excs.Error('primary_key must be a single column name or a list of column names')
111
+
112
+ media_validation_ = catalog.MediaValidation.validated(media_validation, 'media_validation')
113
+ return Catalog.get().create_table(
114
+ path_obj,
115
+ schema,
116
+ df,
117
+ if_exists=if_exists_,
118
+ primary_key=primary_key,
119
+ comment=comment,
120
+ media_validation=media_validation_,
121
+ num_retained_versions=num_retained_versions,
122
+ )
172
123
 
173
124
 
174
125
  def create_view(
175
- path_str: str,
126
+ path: str,
176
127
  base: Union[catalog.Table, DataFrame],
177
128
  *,
178
129
  additional_columns: Optional[dict[str, Any]] = None,
@@ -186,7 +137,7 @@ def create_view(
186
137
  """Create a view of an existing table object (which itself can be a view or a snapshot or a base table).
187
138
 
188
139
  Args:
189
- path_str: A name for the view; can be either a simple name such as `my_view`, or a pathname such as
140
+ path: A name for the view; can be either a simple name such as `my_view`, or a pathname such as
190
141
  `dir1.my_view`.
191
142
  base: [`Table`][pixeltable.Table] (i.e., table or view or snapshot) or [`DataFrame`][pixeltable.DataFrame] to
192
143
  base the view on.
@@ -242,8 +193,9 @@ def create_view(
242
193
  >>> tbl = pxt.get_table('my_table')
243
194
  ... view = pxt.create_view('my_view', tbl.where(tbl.col1 > 100), if_exists='replace_force')
244
195
  """
245
- where: Optional[exprs.Expr] = None
196
+ tbl_version_path: TableVersionPath
246
197
  select_list: Optional[list[tuple[exprs.Expr, Optional[str]]]] = None
198
+ where: Optional[exprs.Expr] = None
247
199
  if isinstance(base, catalog.Table):
248
200
  tbl_version_path = base._tbl_version_path
249
201
  elif isinstance(base, DataFrame):
@@ -257,51 +209,34 @@ def create_view(
257
209
  raise excs.Error('`base` must be an instance of `Table` or `DataFrame`')
258
210
  assert isinstance(base, (catalog.Table, DataFrame))
259
211
 
260
- path = catalog.Path(path_str)
261
- cat = Catalog.get()
212
+ path_obj = catalog.Path(path)
213
+ if_exists_ = catalog.IfExistsParam.validated(if_exists, 'if_exists')
214
+ media_validation_ = catalog.MediaValidation.validated(media_validation, 'media_validation')
262
215
 
263
- with Env.get().begin_xact():
264
- if_exists_ = catalog.IfExistsParam.validated(if_exists, 'if_exists')
265
- existing = _handle_path_collision(path_str, catalog.View, is_snapshot, if_exists_)
266
- if existing is not None:
267
- assert isinstance(existing, catalog.View)
268
- return existing
269
-
270
- dir = cat.get_schema_object(str(path.parent), expected=catalog.Dir, raise_if_not_exists=True)
271
- assert dir is not None
272
-
273
- if additional_columns is None:
274
- additional_columns = {}
275
- else:
276
- # additional columns should not be in the base table
277
- for col_name in additional_columns:
278
- if col_name in [c.name for c in tbl_version_path.columns()]:
279
- raise excs.Error(
280
- f'Column {col_name!r} already exists in the base table '
281
- f'{tbl_version_path.get_column(col_name).tbl.get().name}.'
282
- )
283
- if iterator is None:
284
- iterator_class, iterator_args = None, None
285
- else:
286
- iterator_class, iterator_args = iterator
287
-
288
- view = catalog.View._create(
289
- dir._id,
290
- path.name,
291
- base=tbl_version_path,
292
- select_list=select_list,
293
- additional_columns=additional_columns,
294
- predicate=where,
295
- is_snapshot=is_snapshot,
296
- iterator_cls=iterator_class,
297
- iterator_args=iterator_args,
298
- num_retained_versions=num_retained_versions,
299
- comment=comment,
300
- media_validation=catalog.MediaValidation.validated(media_validation, 'media_validation'),
301
- )
302
- FileCache.get().emit_eviction_warnings()
303
- cat.add_tbl(view)
304
- return view
216
+ if additional_columns is None:
217
+ additional_columns = {}
218
+ else:
219
+ # additional columns should not be in the base table
220
+ for col_name in additional_columns:
221
+ if col_name in [c.name for c in tbl_version_path.columns()]:
222
+ raise excs.Error(
223
+ f'Column {col_name!r} already exists in the base table '
224
+ f'{tbl_version_path.get_column(col_name).tbl.get().name}.'
225
+ )
226
+
227
+ return Catalog.get().create_view(
228
+ path_obj,
229
+ tbl_version_path,
230
+ select_list=select_list,
231
+ where=where,
232
+ additional_columns=additional_columns,
233
+ is_snapshot=is_snapshot,
234
+ iterator=iterator,
235
+ num_retained_versions=num_retained_versions,
236
+ comment=comment,
237
+ media_validation=media_validation_,
238
+ if_exists=if_exists_,
239
+ )
305
240
 
306
241
 
307
242
  def create_snapshot(
@@ -410,11 +345,8 @@ def get_table(path: str) -> catalog.Table:
410
345
 
411
346
  >>> tbl = pxt.get_table('my_snapshot')
412
347
  """
413
- with Env.get().begin_xact():
414
- obj = Catalog.get().get_schema_object(path, expected=catalog.Table, raise_if_not_exists=True)
415
- assert isinstance(obj, catalog.Table)
416
- obj.ensure_md_loaded()
417
- return obj
348
+ path_obj = catalog.Path(path)
349
+ return Catalog.get().get_table(path_obj)
418
350
 
419
351
 
420
352
  def move(path: str, new_path: str) -> None:
@@ -436,14 +368,13 @@ def move(path: str, new_path: str) -> None:
436
368
 
437
369
  >>>> pxt.move('dir1.my_table', 'dir1.new_name')
438
370
  """
371
+ if path == new_path:
372
+ raise excs.Error('move(): source and destination cannot be identical')
373
+ path_obj, new_path_obj = catalog.Path(path), catalog.Path(new_path)
374
+ if path_obj.is_ancestor(new_path_obj):
375
+ raise excs.Error(f'move(): cannot move {path!r} into its own subdirectory')
439
376
  cat = Catalog.get()
440
- with Env.get().begin_xact():
441
- obj = cat.get_schema_object(path, raise_if_not_exists=True)
442
- new_p = catalog.Path(new_path)
443
- dest_dir_path = str(new_p.parent)
444
- dest_dir = cat.get_schema_object(dest_dir_path, expected=catalog.Dir, raise_if_not_exists=True)
445
- _ = cat.get_schema_object(new_path, raise_if_exists=True)
446
- obj._move(new_p.name, dest_dir._id)
377
+ cat.move(path_obj, new_path_obj)
447
378
 
448
379
 
449
380
  def drop_table(
@@ -482,50 +413,19 @@ def drop_table(
482
413
  Drop a table and all its dependents:
483
414
  >>> pxt.drop_table('subdir.my_table', force=True)
484
415
  """
485
- cat = Catalog.get()
486
- tbl: Optional[catalog.Table]
487
- with Env.get().begin_xact():
488
- if isinstance(table, str):
489
- _ = catalog.Path(table) # validate path
490
- if_not_exists_ = catalog.IfNotExistsParam.validated(if_not_exists, 'if_not_exists')
491
- tbl = cast(
492
- Optional[catalog.Table],
493
- cat.get_schema_object(
494
- table,
495
- expected=catalog.Table,
496
- raise_if_not_exists=if_not_exists_ == IfNotExistsParam.ERROR and not force,
497
- ),
498
- )
499
- if tbl is None:
500
- _logger.info(f'Skipped table `{table}` (does not exist).')
501
- return
502
- else:
503
- tbl = table
504
- _drop_table(tbl, force=force, is_replace=False)
505
-
506
-
507
- def _drop_table(tbl: catalog.Table, force: bool, is_replace: bool) -> None:
508
- cat = Catalog.get()
509
- view_ids = cat.get_views(tbl._id)
510
- if len(view_ids) > 0:
511
- view_paths = [cat.get_tbl_path(id) for id in view_ids]
512
- if force:
513
- for view_path in view_paths:
514
- drop_table(view_path, force=True)
515
- else:
516
- is_snapshot = tbl._tbl_version_path.is_snapshot()
517
- obj_type_str = 'Snapshot' if is_snapshot else tbl._display_name().capitalize()
518
- msg: str
519
- if is_replace:
520
- msg = (
521
- f'{obj_type_str} {tbl._path()} already exists and has dependents: {", ".join(view_paths)}. '
522
- "Use `if_exists='replace_force'` to replace it."
523
- )
524
- else:
525
- msg = f'{obj_type_str} {tbl._path()} has dependents: {", ".join(view_paths)}'
526
- raise excs.Error(msg)
527
- tbl._drop()
528
- _logger.info(f'Dropped table `{tbl._path()}`.')
416
+ tbl_path: str
417
+ if isinstance(table, catalog.Table):
418
+ # if we're dropping a table by handle, we first need to get the current path, then drop the S lock on
419
+ # the Table record, and then get X locks in the correct order (first containing directory, then table)
420
+ with Env.get().begin_xact():
421
+ tbl_path = table._path()
422
+ else:
423
+ assert isinstance(table, str)
424
+ tbl_path = table
425
+
426
+ path_obj = catalog.Path(tbl_path)
427
+ if_not_exists_ = catalog.IfNotExistsParam.validated(if_not_exists, 'if_not_exists')
428
+ Catalog.get().drop_table(path_obj, force=force, if_not_exists=if_not_exists_)
529
429
 
530
430
 
531
431
  def list_tables(dir_path: str = '', recursive: bool = True) -> list[str]:
@@ -551,16 +451,14 @@ def list_tables(dir_path: str = '', recursive: bool = True) -> list[str]:
551
451
 
552
452
  >>> pxt.list_tables('dir1')
553
453
  """
554
- _ = catalog.Path(dir_path, empty_is_valid=True) # validate format
454
+ path_obj = catalog.Path(dir_path, empty_is_valid=True) # validate format
555
455
  cat = Catalog.get()
556
- with Env.get().begin_xact():
557
- dir = cat.get_schema_object(dir_path, expected=catalog.Dir, raise_if_not_exists=True)
558
- contents = cat.get_dir_contents(dir._id, recursive=recursive)
559
- return _extract_paths(contents, prefix=dir_path, entry_type=catalog.Table)
456
+ contents = cat.get_dir_contents(path_obj, recursive=recursive)
457
+ return [str(p) for p in _extract_paths(contents, parent=path_obj, entry_type=catalog.Table)]
560
458
 
561
459
 
562
460
  def create_dir(
563
- path: str, if_exists: Literal['error', 'ignore', 'replace', 'replace_force'] = 'error'
461
+ path: str, if_exists: Literal['error', 'ignore', 'replace', 'replace_force'] = 'error', parents: bool = False
564
462
  ) -> Optional[catalog.Dir]:
565
463
  """Create a directory.
566
464
 
@@ -573,6 +471,7 @@ def create_dir(
573
471
  - `'ignore'`: do nothing and return the existing directory handle
574
472
  - `'replace'`: if the existing directory is empty, drop it and create a new one
575
473
  - `'replace_force'`: drop the existing directory and all its children, and create a new one
474
+ parents: Create missing parent directories.
576
475
 
577
476
  Returns:
578
477
  A handle to the newly created directory, or to an already existing directory at the path when
@@ -600,22 +499,14 @@ def create_dir(
600
499
  Create a directory and replace if it already exists:
601
500
 
602
501
  >>> pxt.create_dir('my_dir', if_exists='replace_force')
603
- """
604
- path_obj = catalog.Path(path)
605
- cat = Catalog.get()
606
502
 
607
- with env.Env.get().begin_xact():
608
- if_exists_ = catalog.IfExistsParam.validated(if_exists, 'if_exists')
609
- existing = _handle_path_collision(path, catalog.Dir, False, if_exists_)
610
- if existing is not None:
611
- assert isinstance(existing, catalog.Dir)
612
- return existing
503
+ Create a subdirectory along with its ancestors:
613
504
 
614
- parent = cat.get_schema_object(str(path_obj.parent))
615
- assert parent is not None
616
- dir = catalog.Dir._create(parent._id, path_obj.name)
617
- Env.get().console_logger.info(f'Created directory {path!r}.')
618
- return dir
505
+ >>> pxt.create_dir('parent1.parent2.sub_dir', parents=True)
506
+ """
507
+ path_obj = catalog.Path(path)
508
+ if_exists_ = catalog.IfExistsParam.validated(if_exists, 'if_exists')
509
+ return Catalog.get().create_dir(path_obj, if_exists=if_exists_, parents=parents)
619
510
 
620
511
 
621
512
  def drop_dir(path: str, force: bool = False, if_not_exists: Literal['error', 'ignore'] = 'error') -> None:
@@ -655,47 +546,16 @@ def drop_dir(path: str, force: bool = False, if_not_exists: Literal['error', 'ig
655
546
 
656
547
  >>> pxt.drop_dir('my_dir', force=True)
657
548
  """
658
- _ = catalog.Path(path) # validate format
659
- cat = Catalog.get()
549
+ path_obj = catalog.Path(path) # validate format
660
550
  if_not_exists_ = catalog.IfNotExistsParam.validated(if_not_exists, 'if_not_exists')
661
- with Env.get().begin_xact():
662
- dir = cat.get_schema_object(
663
- path,
664
- expected=catalog.Dir,
665
- raise_if_not_exists=if_not_exists_ == catalog.IfNotExistsParam.ERROR and not force,
666
- )
667
- if dir is None:
668
- _logger.info(f'Directory {path!r} does not exist, skipped drop_dir().')
669
- return
670
- _drop_dir(dir._id, path, force=force)
671
-
672
-
673
- def _drop_dir(dir_id: UUID, path: str, force: bool = False) -> None:
674
- cat = Catalog.get()
675
- dir_entries = cat.get_dir_contents(dir_id, recursive=False)
676
- if len(dir_entries) > 0 and not force:
677
- raise excs.Error(f'Directory {path!r} is not empty.')
678
- tbl_paths = [_join_path(path, entry.table.md['name']) for entry in dir_entries.values() if entry.table is not None]
679
- dir_paths = [_join_path(path, entry.dir.md['name']) for entry in dir_entries.values() if entry.dir is not None]
680
-
681
- for tbl_path in tbl_paths:
682
- # check if the table still exists, it might be a view that already got force-deleted
683
- if cat.get_schema_object(tbl_path, expected=catalog.Table, raise_if_not_exists=False) is not None:
684
- drop_table(tbl_path, force=True)
685
- for dir_path in dir_paths:
686
- drop_dir(dir_path, force=True)
687
- cat.drop_dir(dir_id)
688
- _logger.info(f'Removed directory {path!r}.')
689
-
690
-
691
- def _join_path(path: str, name: str) -> str:
692
- """Append name to path, if path is not empty."""
693
- return f'{path}.{name}' if path else name
551
+ Catalog.get().drop_dir(path_obj, if_not_exists=if_not_exists_, force=force)
694
552
 
695
553
 
696
554
  def _extract_paths(
697
- dir_entries: dict[str, Catalog.DirEntry], prefix: str, entry_type: Optional[type[catalog.SchemaObject]] = None
698
- ) -> list[str]:
555
+ dir_entries: dict[str, Catalog.DirEntry],
556
+ parent: catalog.Path,
557
+ entry_type: Optional[type[catalog.SchemaObject]] = None,
558
+ ) -> list[catalog.Path]:
699
559
  """Convert nested dir_entries structure to a flattened list of paths."""
700
560
  matches: list[str]
701
561
  if entry_type is None:
@@ -704,9 +564,9 @@ def _extract_paths(
704
564
  matches = [name for name, entry in dir_entries.items() if entry.dir is not None]
705
565
  else:
706
566
  matches = [name for name, entry in dir_entries.items() if entry.table is not None]
707
- result = [_join_path(prefix, name) for name in matches]
567
+ result = [parent.append(name) for name in matches]
708
568
  for name, entry in [(name, entry) for name, entry in dir_entries.items() if len(entry.dir_entries) > 0]:
709
- result.extend(_extract_paths(entry.dir_entries, prefix=_join_path(prefix, name), entry_type=entry_type))
569
+ result.extend(_extract_paths(entry.dir_entries, parent=parent.append(name), entry_type=entry_type))
710
570
  return result
711
571
 
712
572
 
@@ -717,11 +577,11 @@ def publish_snapshot(dest_uri: str, table: catalog.Table) -> None:
717
577
  share.publish_snapshot(dest_uri, table)
718
578
 
719
579
 
720
- def list_dirs(path_str: str = '', recursive: bool = True) -> list[str]:
580
+ def list_dirs(path: str = '', recursive: bool = True) -> list[str]:
721
581
  """List the directories in a directory.
722
582
 
723
583
  Args:
724
- path_str: Name or path of the directory.
584
+ path: Name or path of the directory.
725
585
  recursive: If `True`, lists all descendants of this directory recursively.
726
586
 
727
587
  Returns:
@@ -734,12 +594,10 @@ def list_dirs(path_str: str = '', recursive: bool = True) -> list[str]:
734
594
  >>> cl.list_dirs('my_dir', recursive=True)
735
595
  ['my_dir', 'my_dir.sub_dir1']
736
596
  """
737
- _ = catalog.Path(path_str, empty_is_valid=True) # validate format
597
+ path_obj = catalog.Path(path, empty_is_valid=True) # validate format
738
598
  cat = Catalog.get()
739
- with Env.get().begin_xact():
740
- dir = cat.get_schema_object(path_str, expected=catalog.Dir, raise_if_not_exists=True)
741
- contents = cat.get_dir_contents(dir._id, recursive=recursive)
742
- return _extract_paths(contents, prefix=path_str, entry_type=catalog.Dir)
599
+ contents = cat.get_dir_contents(path_obj, recursive=recursive)
600
+ return [str(p) for p in _extract_paths(contents, parent=path_obj, entry_type=catalog.Dir)]
743
601
 
744
602
 
745
603
  def list_functions() -> Styler:
@@ -1 +1,3 @@
1
+ # ruff: noqa: F401
2
+
1
3
  from .publish import publish_snapshot
@@ -66,8 +66,8 @@ class TablePackager:
66
66
  'tables': [
67
67
  {
68
68
  'table_id': str(t._tbl_version.id),
69
- # These are temporary; will replace with a better solution once the concurrency changes to catalog have
70
- # been merged
69
+ # These are temporary; will replace with a better solution once the concurrency
70
+ # changes to catalog have been merged
71
71
  'table_md': dataclasses.asdict(t._tbl_version.get()._create_tbl_md()),
72
72
  'table_version_md': dataclasses.asdict(
73
73
  t._tbl_version.get()._create_version_md(datetime.now().timestamp())
@@ -98,7 +98,7 @@ class TablePackager:
98
98
  for t in ancestors:
99
99
  _logger.info(f"Exporting table '{t._path}'.")
100
100
  self.__export_table(t)
101
- _logger.info(f'Building archive.')
101
+ _logger.info('Building archive.')
102
102
  bundle_path = self.__build_tarball()
103
103
  _logger.info(f'Packaging complete: {bundle_path}')
104
104
  return bundle_path
@@ -1,16 +1,14 @@
1
- import dataclasses
2
1
  import os
3
2
  import sys
4
3
  import urllib.parse
5
4
  import urllib.request
6
- from datetime import datetime
7
5
  from pathlib import Path
8
6
 
9
7
  import requests
10
8
  from tqdm import tqdm
11
9
 
12
10
  import pixeltable as pxt
13
- from pixeltable import exceptions as excs, metadata
11
+ from pixeltable import exceptions as excs
14
12
  from pixeltable.env import Env
15
13
  from pixeltable.utils import sha256sum
16
14
 
@@ -46,7 +44,7 @@ def publish_snapshot(dest_tbl_uri: str, src_tbl: pxt.Table) -> str:
46
44
  else:
47
45
  raise excs.Error(f'Unsupported destination: {destination_uri}')
48
46
 
49
- Env.get().console_logger.info(f'Finalizing snapshot ...')
47
+ Env.get().console_logger.info('Finalizing snapshot ...')
50
48
 
51
49
  finalize_request_json = {
52
50
  'upload_id': upload_id,
@@ -83,7 +81,7 @@ def _upload_bundle_to_s3(bundle: Path, parsed_location: urllib.parse.ParseResult
83
81
  upload_args = {'ChecksumAlgorithm': 'SHA256'}
84
82
 
85
83
  progress_bar = tqdm(
86
- desc=f'Uploading',
84
+ desc='Uploading',
87
85
  total=bundle.stat().st_size,
88
86
  unit='B',
89
87
  unit_scale=True,
pixeltable/utils/coco.py CHANGED
@@ -103,7 +103,7 @@ def write_coco_dataset(df: pxt.DataFrame, dest_path: Path) -> Path:
103
103
  # create annotation records for this image
104
104
  for annotation in input_dict['annotations']:
105
105
  ann_id += 1
106
- x, y, w, h = annotation['bbox']
106
+ _, _, w, h = annotation['bbox']
107
107
  category = annotation['category']
108
108
  categories.add(category)
109
109
  annotations.append(
@@ -119,7 +119,7 @@ def write_coco_dataset(df: pxt.DataFrame, dest_path: Path) -> Path:
119
119
  )
120
120
 
121
121
  # replace category names with ids
122
- category_ids = {category: id for id, category in enumerate(sorted(list(categories)))}
122
+ category_ids = {category: id for id, category in enumerate(sorted(categories))}
123
123
  for annotation in annotations:
124
124
  annotation['category_id'] = category_ids[annotation['category_id']]
125
125
 
@@ -129,8 +129,8 @@ def write_coco_dataset(df: pxt.DataFrame, dest_path: Path) -> Path:
129
129
  'categories': [{'id': id, 'name': category} for category, id in category_ids.items()],
130
130
  }
131
131
  output_path = dest_path / 'data.json'
132
- with open(output_path, 'w') as f:
133
- json.dump(result, f)
132
+ with open(output_path, 'w', encoding='utf-8') as fp:
133
+ json.dump(result, fp)
134
134
  return output_path
135
135
 
136
136
 
@@ -34,9 +34,7 @@ class ConsoleOutputHandler(logging.StreamHandler):
34
34
 
35
35
  class ConsoleMessageFilter(logging.Filter):
36
36
  def filter(self, record: logging.LogRecord) -> bool:
37
- if hasattr(record, 'user_visible') and record.user_visible:
38
- return True
39
- return False
37
+ return getattr(record, 'user_visible', False)
40
38
 
41
39
 
42
40
  class ConsoleLogger(logging.LoggerAdapter):
@@ -0,0 +1,41 @@
1
+ import asyncio
2
+ import threading
3
+ from concurrent.futures import ThreadPoolExecutor
4
+ from typing import Any, Coroutine, TypeVar
5
+
6
+ T = TypeVar('T')
7
+
8
+
9
+ # TODO This is a temporary hack to be able to run async UDFs in contexts that are not properly handled by the existing
10
+ # scheduler logic (e.g., inside the eval loop of a JsonMapper). Once the scheduler is fully general, it can be
11
+ # removed.
12
+
13
+
14
+ def run_coroutine_synchronously(coroutine: Coroutine[Any, Any, T], timeout: float = 30) -> T:
15
+ """
16
+ Runs the given coroutine synchronously, even if called in the context of a running event loop.
17
+ """
18
+
19
+ def run_in_new_loop():
20
+ new_loop = asyncio.new_event_loop()
21
+ asyncio.set_event_loop(new_loop)
22
+ try:
23
+ return new_loop.run_until_complete(coroutine)
24
+ finally:
25
+ new_loop.close()
26
+
27
+ try:
28
+ loop = asyncio.get_running_loop()
29
+ except RuntimeError:
30
+ # No event loop; just call `asyncio.run()`
31
+ return asyncio.run(coroutine)
32
+
33
+ if threading.current_thread() is threading.main_thread():
34
+ if not loop.is_running():
35
+ return loop.run_until_complete(coroutine)
36
+ else:
37
+ with ThreadPoolExecutor() as pool:
38
+ future = pool.submit(run_in_new_loop)
39
+ return future.result(timeout=timeout)
40
+ else:
41
+ return asyncio.run_coroutine_threadsafe(coroutine, loop).result()