pixeltable 0.3.6__py3-none-any.whl → 0.3.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (127) hide show
  1. pixeltable/__init__.py +5 -3
  2. pixeltable/__version__.py +2 -2
  3. pixeltable/catalog/__init__.py +1 -0
  4. pixeltable/catalog/catalog.py +335 -128
  5. pixeltable/catalog/column.py +22 -5
  6. pixeltable/catalog/dir.py +19 -6
  7. pixeltable/catalog/insertable_table.py +34 -37
  8. pixeltable/catalog/named_function.py +0 -4
  9. pixeltable/catalog/schema_object.py +28 -42
  10. pixeltable/catalog/table.py +193 -158
  11. pixeltable/catalog/table_version.py +191 -232
  12. pixeltable/catalog/table_version_handle.py +50 -0
  13. pixeltable/catalog/table_version_path.py +49 -33
  14. pixeltable/catalog/view.py +56 -96
  15. pixeltable/config.py +103 -0
  16. pixeltable/dataframe.py +89 -89
  17. pixeltable/env.py +98 -168
  18. pixeltable/exec/aggregation_node.py +5 -4
  19. pixeltable/exec/cache_prefetch_node.py +1 -1
  20. pixeltable/exec/component_iteration_node.py +13 -9
  21. pixeltable/exec/data_row_batch.py +3 -3
  22. pixeltable/exec/exec_context.py +0 -4
  23. pixeltable/exec/exec_node.py +3 -2
  24. pixeltable/exec/expr_eval/schedulers.py +2 -1
  25. pixeltable/exec/in_memory_data_node.py +9 -4
  26. pixeltable/exec/row_update_node.py +1 -2
  27. pixeltable/exec/sql_node.py +20 -16
  28. pixeltable/exprs/__init__.py +2 -0
  29. pixeltable/exprs/arithmetic_expr.py +7 -11
  30. pixeltable/exprs/array_slice.py +1 -1
  31. pixeltable/exprs/column_property_ref.py +3 -3
  32. pixeltable/exprs/column_ref.py +12 -13
  33. pixeltable/exprs/comparison.py +3 -6
  34. pixeltable/exprs/compound_predicate.py +4 -4
  35. pixeltable/exprs/expr.py +31 -22
  36. pixeltable/exprs/expr_dict.py +3 -3
  37. pixeltable/exprs/expr_set.py +1 -1
  38. pixeltable/exprs/function_call.py +110 -80
  39. pixeltable/exprs/globals.py +3 -3
  40. pixeltable/exprs/in_predicate.py +1 -1
  41. pixeltable/exprs/inline_expr.py +3 -3
  42. pixeltable/exprs/is_null.py +1 -1
  43. pixeltable/exprs/json_mapper.py +2 -2
  44. pixeltable/exprs/json_path.py +17 -10
  45. pixeltable/exprs/literal.py +1 -1
  46. pixeltable/exprs/method_ref.py +2 -2
  47. pixeltable/exprs/row_builder.py +8 -17
  48. pixeltable/exprs/rowid_ref.py +21 -10
  49. pixeltable/exprs/similarity_expr.py +5 -5
  50. pixeltable/exprs/sql_element_cache.py +1 -1
  51. pixeltable/exprs/type_cast.py +2 -3
  52. pixeltable/exprs/variable.py +2 -2
  53. pixeltable/ext/__init__.py +2 -0
  54. pixeltable/ext/functions/__init__.py +2 -0
  55. pixeltable/ext/functions/yolox.py +3 -3
  56. pixeltable/func/__init__.py +3 -1
  57. pixeltable/func/aggregate_function.py +9 -9
  58. pixeltable/func/callable_function.py +3 -4
  59. pixeltable/func/expr_template_function.py +6 -16
  60. pixeltable/func/function.py +48 -14
  61. pixeltable/func/function_registry.py +1 -3
  62. pixeltable/func/query_template_function.py +5 -12
  63. pixeltable/func/signature.py +23 -22
  64. pixeltable/func/tools.py +3 -3
  65. pixeltable/func/udf.py +6 -4
  66. pixeltable/functions/__init__.py +2 -0
  67. pixeltable/functions/fireworks.py +7 -4
  68. pixeltable/functions/globals.py +4 -5
  69. pixeltable/functions/huggingface.py +1 -5
  70. pixeltable/functions/image.py +17 -7
  71. pixeltable/functions/llama_cpp.py +1 -1
  72. pixeltable/functions/mistralai.py +1 -1
  73. pixeltable/functions/ollama.py +4 -4
  74. pixeltable/functions/openai.py +19 -19
  75. pixeltable/functions/string.py +23 -30
  76. pixeltable/functions/timestamp.py +11 -6
  77. pixeltable/functions/together.py +14 -12
  78. pixeltable/functions/util.py +1 -1
  79. pixeltable/functions/video.py +5 -4
  80. pixeltable/functions/vision.py +6 -9
  81. pixeltable/functions/whisper.py +3 -3
  82. pixeltable/globals.py +246 -260
  83. pixeltable/index/__init__.py +2 -0
  84. pixeltable/index/base.py +1 -1
  85. pixeltable/index/btree.py +3 -1
  86. pixeltable/index/embedding_index.py +11 -5
  87. pixeltable/io/external_store.py +11 -12
  88. pixeltable/io/label_studio.py +4 -3
  89. pixeltable/io/parquet.py +57 -56
  90. pixeltable/iterators/__init__.py +4 -2
  91. pixeltable/iterators/audio.py +11 -11
  92. pixeltable/iterators/document.py +10 -10
  93. pixeltable/iterators/string.py +1 -2
  94. pixeltable/iterators/video.py +14 -15
  95. pixeltable/metadata/__init__.py +9 -5
  96. pixeltable/metadata/converters/convert_10.py +0 -1
  97. pixeltable/metadata/converters/convert_15.py +0 -2
  98. pixeltable/metadata/converters/convert_23.py +0 -2
  99. pixeltable/metadata/converters/convert_24.py +3 -3
  100. pixeltable/metadata/converters/convert_25.py +1 -1
  101. pixeltable/metadata/converters/convert_27.py +0 -2
  102. pixeltable/metadata/converters/convert_28.py +0 -2
  103. pixeltable/metadata/converters/convert_29.py +7 -8
  104. pixeltable/metadata/converters/util.py +7 -7
  105. pixeltable/metadata/schema.py +27 -19
  106. pixeltable/plan.py +68 -40
  107. pixeltable/share/__init__.py +2 -0
  108. pixeltable/share/packager.py +15 -12
  109. pixeltable/share/publish.py +3 -5
  110. pixeltable/store.py +37 -38
  111. pixeltable/type_system.py +41 -28
  112. pixeltable/utils/coco.py +4 -4
  113. pixeltable/utils/console_output.py +1 -3
  114. pixeltable/utils/description_helper.py +1 -1
  115. pixeltable/utils/documents.py +3 -3
  116. pixeltable/utils/filecache.py +20 -9
  117. pixeltable/utils/formatter.py +2 -3
  118. pixeltable/utils/media_store.py +1 -1
  119. pixeltable/utils/pytorch.py +1 -1
  120. pixeltable/utils/sql.py +4 -4
  121. pixeltable/utils/transactional_directory.py +2 -1
  122. {pixeltable-0.3.6.dist-info → pixeltable-0.3.8.dist-info}/METADATA +1 -1
  123. pixeltable-0.3.8.dist-info/RECORD +174 -0
  124. pixeltable-0.3.6.dist-info/RECORD +0 -172
  125. {pixeltable-0.3.6.dist-info → pixeltable-0.3.8.dist-info}/LICENSE +0 -0
  126. {pixeltable-0.3.6.dist-info → pixeltable-0.3.8.dist-info}/WHEEL +0 -0
  127. {pixeltable-0.3.6.dist-info → pixeltable-0.3.8.dist-info}/entry_points.txt +0 -0
pixeltable/globals.py CHANGED
@@ -1,22 +1,16 @@
1
- import dataclasses
2
1
  import logging
3
2
  import urllib.parse
4
- from typing import Any, Iterable, Literal, Optional, Union
3
+ from typing import Any, Iterable, Literal, Optional, Union, cast
5
4
  from uuid import UUID
6
5
 
7
6
  import pandas as pd
8
- import sqlalchemy as sql
9
7
  from pandas.io.formats.style import Styler
10
- from sqlalchemy.util.preloaded import orm
11
8
 
12
- import pixeltable.exceptions as excs
13
- import pixeltable.exprs as exprs
14
- from pixeltable import DataFrame, catalog, func, share
15
- from pixeltable.catalog import Catalog
9
+ from pixeltable import DataFrame, catalog, env, exceptions as excs, exprs, func, share
10
+ from pixeltable.catalog import Catalog, IfExistsParam, IfNotExistsParam
16
11
  from pixeltable.dataframe import DataFrameResultSet
17
12
  from pixeltable.env import Env
18
13
  from pixeltable.iterators import ComponentIterator
19
- from pixeltable.metadata import schema
20
14
  from pixeltable.utils.filecache import FileCache
21
15
 
22
16
  _logger = logging.getLogger('pixeltable')
@@ -27,68 +21,39 @@ def init() -> None:
27
21
  _ = Catalog.get()
28
22
 
29
23
 
30
- def _get_or_drop_existing_path(
31
- path_str: str,
32
- expected_obj_type: type[catalog.SchemaObject],
33
- expected_snapshot: bool,
34
- if_exists: catalog.IfExistsParam,
24
+ def _handle_path_collision(
25
+ path: str, expected_obj_type: type[catalog.SchemaObject], expected_snapshot: bool, if_exists: catalog.IfExistsParam
35
26
  ) -> Optional[catalog.SchemaObject]:
36
- """Handle schema object path collision during creation according to the if_exists parameter.
37
-
38
- Args:
39
- path_str: An existing and valid path to the dir, table, view, or snapshot.
40
- expected_obj_type: Whether the caller of this function is creating a dir, table, or view at the existing path.
41
- expected_snapshot: Whether the caller of this function is creating a snapshot at the existing path.
42
- if_exists: Directive regarding how to handle the existing path.
43
-
44
- Returns:
45
- A handle to the existing dir, table, view, or snapshot, if `if_exists='ignore'`, otherwise `None`.
46
-
47
- Raises:
48
- Error: If the existing path is not of the expected type, or if the existing path has dependents and
49
- `if_exists='replace'` or `if_exists='replace_force'`.
50
- """
51
27
  cat = Catalog.get()
52
- path = catalog.Path(path_str)
53
- assert cat.paths.get_object(path) is not None
54
-
28
+ obj: Optional[catalog.SchemaObject]
55
29
  if if_exists == catalog.IfExistsParam.ERROR:
56
- raise excs.Error(f'Path `{path_str}` already exists.')
30
+ _ = cat.get_schema_object(path, raise_if_exists=True)
31
+ obj = None
32
+ else:
33
+ obj = cat.get_schema_object(path)
34
+ is_snapshot = isinstance(obj, catalog.View) and obj._tbl_version_path.is_snapshot()
35
+ if obj is not None and (not isinstance(obj, expected_obj_type) or (expected_snapshot and not is_snapshot)):
36
+ obj_type_str = 'snapshot' if expected_snapshot else expected_obj_type._display_name()
37
+ raise excs.Error(
38
+ f'Path {path!r} already exists but is not a {obj_type_str}. Cannot {if_exists.name.lower()} it.'
39
+ )
40
+ if obj is None:
41
+ return None
57
42
 
58
- existing_path = cat.paths[path]
59
- existing_path_is_snapshot = (
60
- 'is_snapshot' in existing_path.get_metadata() and existing_path.get_metadata()['is_snapshot']
61
- )
62
- obj_type_str = 'Snapshot' if expected_snapshot else expected_obj_type._display_name().capitalize()
63
- # Check if the existing path is of expected type.
64
- if not isinstance(existing_path, expected_obj_type) or (expected_snapshot and not existing_path_is_snapshot):
65
- raise excs.Error(
66
- f'Path `{path_str}` already exists but is not a {obj_type_str}. Cannot {if_exists.name.lower()} it.'
67
- )
43
+ if if_exists == IfExistsParam.IGNORE:
44
+ return obj
68
45
 
69
- # if_exists='ignore' return the handle to the existing object.
70
- assert isinstance(existing_path, expected_obj_type)
71
- if if_exists == catalog.IfExistsParam.IGNORE:
72
- return existing_path
73
-
74
- # Check if the existing object has dependents. If so, cannot replace it
75
- # unless if_exists='replace_force'.
76
- has_dependents = existing_path._has_dependents
77
- if if_exists == catalog.IfExistsParam.REPLACE and has_dependents:
78
- raise excs.Error(
79
- f"{obj_type_str} `{path_str}` already exists and has dependents. Use `if_exists='replace_force'` to replace it."
80
- )
46
+ # drop the existing schema object
47
+ if isinstance(obj, catalog.Dir):
48
+ dir_contents = cat.get_dir_contents(obj._id)
49
+ if len(dir_contents) > 0 and if_exists == IfExistsParam.REPLACE:
50
+ raise excs.Error(
51
+ f'Directory {path!r} already exists and is not empty. Use `if_exists="replace_force"` to replace it.'
52
+ )
53
+ _drop_dir(obj._id, path, force=True)
81
54
  else:
82
- assert if_exists == catalog.IfExistsParam.REPLACE_FORCE or not has_dependents
83
- # Drop the existing path so it can be replaced.
84
- # Any errors during drop will be raised.
85
- _logger.info(f'Dropping {obj_type_str} `{path_str}` to replace it.')
86
- if isinstance(existing_path, catalog.Dir):
87
- drop_dir(path_str, force=True)
88
- else:
89
- drop_table(path_str, force=True)
90
- assert cat.paths.get_object(path) is None
91
-
55
+ assert isinstance(obj, catalog.Table)
56
+ _drop_table(obj, force=if_exists == IfExistsParam.REPLACE_FORCE, is_replace=True)
92
57
  return None
93
58
 
94
59
 
@@ -158,54 +123,52 @@ def create_table(
158
123
  path = catalog.Path(path_str)
159
124
  cat = Catalog.get()
160
125
 
161
- if cat.paths.get_object(path) is not None:
162
- # The table already exists. Handle it as per user directive.
163
- _if_exists = catalog.IfExistsParam.validated(if_exists, 'if_exists')
164
- existing_table = _get_or_drop_existing_path(path_str, catalog.InsertableTable, False, _if_exists)
165
- if existing_table is not None:
166
- assert isinstance(existing_table, catalog.Table)
167
- return existing_table
168
-
169
- dir = cat.paths[path.parent]
170
-
171
- df: Optional[DataFrame] = None
172
- if isinstance(schema_or_df, dict):
173
- schema = schema_or_df
174
- elif isinstance(schema_or_df, DataFrame):
175
- df = schema_or_df
176
- schema = df.schema
177
- elif isinstance(schema_or_df, DataFrameResultSet):
178
- raise excs.Error(
179
- '`schema_or_df` must be either a schema dictionary or a Pixeltable DataFrame. (Is there an extraneous call to `collect()`?)'
180
- )
181
- else:
182
- raise excs.Error('`schema_or_df` must be either a schema dictionary or a Pixeltable DataFrame.')
126
+ with env.Env.get().begin_xact():
127
+ if_exists_ = catalog.IfExistsParam.validated(if_exists, 'if_exists')
128
+ existing = _handle_path_collision(path_str, catalog.InsertableTable, False, if_exists_)
129
+ if existing is not None:
130
+ assert isinstance(existing, catalog.Table)
131
+ return existing
132
+
133
+ dir = cat.get_schema_object(str(path.parent), expected=catalog.Dir, raise_if_not_exists=True)
134
+ assert dir is not None
135
+
136
+ df: Optional[DataFrame] = None
137
+ if isinstance(schema_or_df, dict):
138
+ schema = schema_or_df
139
+ elif isinstance(schema_or_df, DataFrame):
140
+ df = schema_or_df
141
+ schema = df.schema
142
+ elif isinstance(schema_or_df, DataFrameResultSet):
143
+ raise excs.Error(
144
+ '`schema_or_df` must be either a schema dictionary or a Pixeltable DataFrame. '
145
+ '(Is there an extraneous call to `collect()`?)'
146
+ )
147
+ else:
148
+ raise excs.Error('`schema_or_df` must be either a schema dictionary or a Pixeltable DataFrame.')
183
149
 
184
- if len(schema) == 0:
185
- raise excs.Error(f'Table schema is empty: `{path_str}`')
150
+ if len(schema) == 0:
151
+ raise excs.Error(f'Table schema is empty: `{path_str}`')
186
152
 
187
- if primary_key is None:
188
- primary_key = []
189
- elif isinstance(primary_key, str):
190
- primary_key = [primary_key]
191
- else:
192
- if not isinstance(primary_key, list) or not all(isinstance(pk, str) for pk in primary_key):
153
+ if primary_key is None:
154
+ primary_key = []
155
+ elif isinstance(primary_key, str):
156
+ primary_key = [primary_key]
157
+ elif not isinstance(primary_key, list) or not all(isinstance(pk, str) for pk in primary_key):
193
158
  raise excs.Error('primary_key must be a single column name or a list of column names')
194
159
 
195
- tbl = catalog.InsertableTable._create(
196
- dir._id,
197
- path.name,
198
- schema,
199
- df,
200
- primary_key=primary_key,
201
- num_retained_versions=num_retained_versions,
202
- comment=comment,
203
- media_validation=catalog.MediaValidation.validated(media_validation, 'media_validation'),
204
- )
205
- cat.paths[path] = tbl
206
-
207
- _logger.info(f'Created table `{path_str}`.')
208
- return tbl
160
+ tbl = catalog.InsertableTable._create(
161
+ dir._id,
162
+ path.name,
163
+ schema,
164
+ df,
165
+ primary_key=primary_key,
166
+ num_retained_versions=num_retained_versions,
167
+ comment=comment,
168
+ media_validation=catalog.MediaValidation.validated(media_validation, 'media_validation'),
169
+ )
170
+ cat.add_tbl(tbl)
171
+ return tbl
209
172
 
210
173
 
211
174
  def create_view(
@@ -292,53 +255,53 @@ def create_view(
292
255
  select_list = base.select_list
293
256
  else:
294
257
  raise excs.Error('`base` must be an instance of `Table` or `DataFrame`')
295
- assert isinstance(base, catalog.Table) or isinstance(base, DataFrame)
258
+ assert isinstance(base, (catalog.Table, DataFrame))
296
259
 
297
260
  path = catalog.Path(path_str)
298
261
  cat = Catalog.get()
299
262
 
300
- if cat.paths.get_object(path) is not None:
301
- # The view already exists. Handle it as per user directive.
302
- _if_exists = catalog.IfExistsParam.validated(if_exists, 'if_exists')
303
- existing_path = _get_or_drop_existing_path(path_str, catalog.View, is_snapshot, _if_exists)
304
- if existing_path is not None:
305
- assert isinstance(existing_path, catalog.View)
306
- return existing_path
263
+ with Env.get().begin_xact():
264
+ if_exists_ = catalog.IfExistsParam.validated(if_exists, 'if_exists')
265
+ existing = _handle_path_collision(path_str, catalog.View, is_snapshot, if_exists_)
266
+ if existing is not None:
267
+ assert isinstance(existing, catalog.View)
268
+ return existing
307
269
 
308
- dir = cat.paths[path.parent]
309
-
310
- if additional_columns is None:
311
- additional_columns = {}
312
- else:
313
- # additional columns should not be in the base table
314
- for col_name in additional_columns.keys():
315
- if col_name in [c.name for c in tbl_version_path.columns()]:
316
- raise excs.Error(
317
- f'Column {col_name!r} already exists in the base table {tbl_version_path.get_column(col_name).tbl.name}.'
318
- )
319
- if iterator is None:
320
- iterator_class, iterator_args = None, None
321
- else:
322
- iterator_class, iterator_args = iterator
270
+ dir = cat.get_schema_object(str(path.parent), expected=catalog.Dir, raise_if_not_exists=True)
271
+ assert dir is not None
323
272
 
324
- view = catalog.View._create(
325
- dir._id,
326
- path.name,
327
- base=tbl_version_path,
328
- select_list=select_list,
329
- additional_columns=additional_columns,
330
- predicate=where,
331
- is_snapshot=is_snapshot,
332
- iterator_cls=iterator_class,
333
- iterator_args=iterator_args,
334
- num_retained_versions=num_retained_versions,
335
- comment=comment,
336
- media_validation=catalog.MediaValidation.validated(media_validation, 'media_validation'),
337
- )
338
- cat.paths[path] = view
339
- _logger.info(f'Created view `{path_str}`.')
340
- FileCache.get().emit_eviction_warnings()
341
- return view
273
+ if additional_columns is None:
274
+ additional_columns = {}
275
+ else:
276
+ # additional columns should not be in the base table
277
+ for col_name in additional_columns:
278
+ if col_name in [c.name for c in tbl_version_path.columns()]:
279
+ raise excs.Error(
280
+ f'Column {col_name!r} already exists in the base table '
281
+ f'{tbl_version_path.get_column(col_name).tbl.get().name}.'
282
+ )
283
+ if iterator is None:
284
+ iterator_class, iterator_args = None, None
285
+ else:
286
+ iterator_class, iterator_args = iterator
287
+
288
+ view = catalog.View._create(
289
+ dir._id,
290
+ path.name,
291
+ base=tbl_version_path,
292
+ select_list=select_list,
293
+ additional_columns=additional_columns,
294
+ predicate=where,
295
+ is_snapshot=is_snapshot,
296
+ iterator_cls=iterator_class,
297
+ iterator_args=iterator_args,
298
+ num_retained_versions=num_retained_versions,
299
+ comment=comment,
300
+ media_validation=catalog.MediaValidation.validated(media_validation, 'media_validation'),
301
+ )
302
+ FileCache.get().emit_eviction_warnings()
303
+ cat.add_tbl(view)
304
+ return view
342
305
 
343
306
 
344
307
  def create_snapshot(
@@ -400,7 +363,9 @@ def create_snapshot(
400
363
  if `my_snapshot` does not already exist:
401
364
 
402
365
  >>> view = pxt.get_table('my_view')
403
- ... snapshot = pxt.create_snapshot('my_snapshot', view, additional_columns={'col3': pxt.Int}, if_exists='ignore')
366
+ ... snapshot = pxt.create_snapshot(
367
+ ... 'my_snapshot', view, additional_columns={'col3': pxt.Int}, if_exists='ignore'
368
+ ... )
404
369
 
405
370
  Create a snapshot `my_snapshot` on a table `my_table`, and replace any existing snapshot named `my_snapshot`:
406
371
 
@@ -445,11 +410,11 @@ def get_table(path: str) -> catalog.Table:
445
410
 
446
411
  >>> tbl = pxt.get_table('my_snapshot')
447
412
  """
448
- p = catalog.Path(path)
449
- Catalog.get().paths.check_is_valid(p, expected=catalog.Table)
450
- obj = Catalog.get().paths[p]
451
- assert isinstance(obj, catalog.Table)
452
- return obj
413
+ with Env.get().begin_xact():
414
+ obj = Catalog.get().get_schema_object(path, expected=catalog.Table, raise_if_not_exists=True)
415
+ assert isinstance(obj, catalog.Table)
416
+ obj.ensure_md_loaded()
417
+ return obj
453
418
 
454
419
 
455
420
  def move(path: str, new_path: str) -> None:
@@ -471,14 +436,14 @@ def move(path: str, new_path: str) -> None:
471
436
 
472
437
  >>>> pxt.move('dir1.my_table', 'dir1.new_name')
473
438
  """
474
- p = catalog.Path(path)
475
- Catalog.get().paths.check_is_valid(p, expected=catalog.SchemaObject)
476
- new_p = catalog.Path(new_path)
477
- Catalog.get().paths.check_is_valid(new_p, expected=None)
478
- obj = Catalog.get().paths[p]
479
- Catalog.get().paths.move(p, new_p)
480
- new_dir = Catalog.get().paths[new_p.parent]
481
- obj._move(new_p.name, new_dir._id)
439
+ cat = Catalog.get()
440
+ with Env.get().begin_xact():
441
+ obj = cat.get_schema_object(path, raise_if_not_exists=True)
442
+ new_p = catalog.Path(new_path)
443
+ dest_dir_path = str(new_p.parent)
444
+ dest_dir = cat.get_schema_object(dest_dir_path, expected=catalog.Dir, raise_if_not_exists=True)
445
+ _ = cat.get_schema_object(new_path, raise_if_exists=True)
446
+ obj._move(new_p.name, dest_dir._id)
482
447
 
483
448
 
484
449
  def drop_table(
@@ -518,35 +483,49 @@ def drop_table(
518
483
  >>> pxt.drop_table('subdir.my_table', force=True)
519
484
  """
520
485
  cat = Catalog.get()
521
- if isinstance(table, str):
522
- tbl_path_obj = catalog.Path(table)
523
- tbl = cat.paths.get_object(tbl_path_obj)
524
- if tbl is None:
525
- _if_not_exists = catalog.IfNotExistsParam.validated(if_not_exists, 'if_not_exists')
526
- if _if_not_exists == catalog.IfNotExistsParam.IGNORE or force:
486
+ tbl: Optional[catalog.Table]
487
+ with Env.get().begin_xact():
488
+ if isinstance(table, str):
489
+ _ = catalog.Path(table) # validate path
490
+ if_not_exists_ = catalog.IfNotExistsParam.validated(if_not_exists, 'if_not_exists')
491
+ tbl = cast(
492
+ Optional[catalog.Table],
493
+ cat.get_schema_object(
494
+ table,
495
+ expected=catalog.Table,
496
+ raise_if_not_exists=if_not_exists_ == IfNotExistsParam.ERROR and not force,
497
+ ),
498
+ )
499
+ if tbl is None:
527
500
  _logger.info(f'Skipped table `{table}` (does not exist).')
528
501
  return
529
- else:
530
- raise excs.Error(f'Table `{table}` does not exist.')
531
- if not isinstance(tbl, catalog.Table):
532
- raise excs.Error(
533
- f'{tbl} needs to be a {catalog.Table._display_name()} but is a {type(tbl)._display_name()}'
534
- )
535
- else:
536
- tbl = table
537
- tbl_path_obj = catalog.Path(tbl._path)
502
+ else:
503
+ tbl = table
504
+ _drop_table(tbl, force=force, is_replace=False)
538
505
 
539
- assert isinstance(tbl, catalog.Table)
540
- if len(cat.tbl_dependents[tbl._id]) > 0:
541
- dependent_paths = [dep._path for dep in cat.tbl_dependents[tbl._id]]
506
+
507
+ def _drop_table(tbl: catalog.Table, force: bool, is_replace: bool) -> None:
508
+ cat = Catalog.get()
509
+ view_ids = cat.get_views(tbl._id)
510
+ if len(view_ids) > 0:
511
+ view_paths = [cat.get_tbl_path(id) for id in view_ids]
542
512
  if force:
543
- for dependent_path in dependent_paths:
544
- drop_table(dependent_path, force=True)
513
+ for view_path in view_paths:
514
+ drop_table(view_path, force=True)
545
515
  else:
546
- raise excs.Error(f'Table {tbl._path} has dependents: {", ".join(dependent_paths)}')
516
+ is_snapshot = tbl._tbl_version_path.is_snapshot()
517
+ obj_type_str = 'Snapshot' if is_snapshot else tbl._display_name().capitalize()
518
+ msg: str
519
+ if is_replace:
520
+ msg = (
521
+ f'{obj_type_str} {tbl._path()} already exists and has dependents: {", ".join(view_paths)}. '
522
+ "Use `if_exists='replace_force'` to replace it."
523
+ )
524
+ else:
525
+ msg = f'{obj_type_str} {tbl._path()} has dependents: {", ".join(view_paths)}'
526
+ raise excs.Error(msg)
547
527
  tbl._drop()
548
- del cat.paths[tbl_path_obj]
549
- _logger.info(f'Dropped table `{tbl._path}`.')
528
+ _logger.info(f'Dropped table `{tbl._path()}`.')
550
529
 
551
530
 
552
531
  def list_tables(dir_path: str = '', recursive: bool = True) -> list[str]:
@@ -572,19 +551,21 @@ def list_tables(dir_path: str = '', recursive: bool = True) -> list[str]:
572
551
 
573
552
  >>> pxt.list_tables('dir1')
574
553
  """
575
- assert dir_path is not None
576
- path = catalog.Path(dir_path, empty_is_valid=True)
577
- Catalog.get().paths.check_is_valid(path, expected=catalog.Dir)
578
- return [str(p) for p in Catalog.get().paths.get_children(path, child_type=catalog.Table, recursive=recursive)]
554
+ _ = catalog.Path(dir_path, empty_is_valid=True) # validate format
555
+ cat = Catalog.get()
556
+ with Env.get().begin_xact():
557
+ dir = cat.get_schema_object(dir_path, expected=catalog.Dir, raise_if_not_exists=True)
558
+ contents = cat.get_dir_contents(dir._id, recursive=recursive)
559
+ return _extract_paths(contents, prefix=dir_path, entry_type=catalog.Table)
579
560
 
580
561
 
581
562
  def create_dir(
582
- path_str: str, if_exists: Literal['error', 'ignore', 'replace', 'replace_force'] = 'error'
563
+ path: str, if_exists: Literal['error', 'ignore', 'replace', 'replace_force'] = 'error'
583
564
  ) -> Optional[catalog.Dir]:
584
565
  """Create a directory.
585
566
 
586
567
  Args:
587
- path_str: Path to the directory.
568
+ path: Path to the directory.
588
569
  if_exists: Directive regarding how to handle if the path already exists.
589
570
  Must be one of the following:
590
571
 
@@ -594,8 +575,8 @@ def create_dir(
594
575
  - `'replace_force'`: drop the existing directory and all its children, and create a new one
595
576
 
596
577
  Returns:
597
- A handle to the newly created directory, or to an already existing directory at the path when `if_exists='ignore'`.
598
- Please note the existing directory may not be empty.
578
+ A handle to the newly created directory, or to an already existing directory at the path when
579
+ `if_exists='ignore'`. Please note the existing directory may not be empty.
599
580
 
600
581
  Raises:
601
582
  Error: If
@@ -620,38 +601,28 @@ def create_dir(
620
601
 
621
602
  >>> pxt.create_dir('my_dir', if_exists='replace_force')
622
603
  """
623
- path = catalog.Path(path_str)
604
+ path_obj = catalog.Path(path)
624
605
  cat = Catalog.get()
625
606
 
626
- if cat.paths.get_object(path):
627
- # The directory already exists. Handle it as per user directive.
628
- _if_exists = catalog.IfExistsParam.validated(if_exists, 'if_exists')
629
- existing_path = _get_or_drop_existing_path(path_str, catalog.Dir, False, _if_exists)
630
- if existing_path is not None:
631
- assert isinstance(existing_path, catalog.Dir)
632
- return existing_path
633
-
634
- parent = cat.paths[path.parent]
635
- assert parent is not None
636
- with orm.Session(Env.get().engine, future=True) as session:
637
- dir_md = schema.DirMd(name=path.name, user=None, additional_md={})
638
- dir_record = schema.Dir(parent_id=parent._id, md=dataclasses.asdict(dir_md))
639
- session.add(dir_record)
640
- session.flush()
641
- assert dir_record.id is not None
642
- assert isinstance(dir_record.id, UUID)
643
- dir = catalog.Dir(dir_record.id, parent._id, path.name)
644
- cat.paths[path] = dir
645
- session.commit()
646
- Env.get().console_logger.info(f'Created directory `{path_str}`.')
607
+ with env.Env.get().begin_xact():
608
+ if_exists_ = catalog.IfExistsParam.validated(if_exists, 'if_exists')
609
+ existing = _handle_path_collision(path, catalog.Dir, False, if_exists_)
610
+ if existing is not None:
611
+ assert isinstance(existing, catalog.Dir)
612
+ return existing
613
+
614
+ parent = cat.get_schema_object(str(path_obj.parent))
615
+ assert parent is not None
616
+ dir = catalog.Dir._create(parent._id, path_obj.name)
617
+ Env.get().console_logger.info(f'Created directory {path!r}.')
647
618
  return dir
648
619
 
649
620
 
650
- def drop_dir(path_str: str, force: bool = False, if_not_exists: Literal['error', 'ignore'] = 'error') -> None:
621
+ def drop_dir(path: str, force: bool = False, if_not_exists: Literal['error', 'ignore'] = 'error') -> None:
651
622
  """Remove a directory.
652
623
 
653
624
  Args:
654
- path_str: Name or path of the directory.
625
+ path: Name or path of the directory.
655
626
  force: If `True`, will also drop all tables and subdirectories of this directory, recursively, along
656
627
  with any views or snapshots that depend on any of the dropped tables.
657
628
  if_not_exists: Directive regarding how to handle if the path does not exist.
@@ -684,47 +655,59 @@ def drop_dir(path_str: str, force: bool = False, if_not_exists: Literal['error',
684
655
 
685
656
  >>> pxt.drop_dir('my_dir', force=True)
686
657
  """
658
+ _ = catalog.Path(path) # validate format
687
659
  cat = Catalog.get()
688
- path = catalog.Path(path_str)
689
- obj = cat.paths.get_object(path)
690
- if obj is None:
691
- _if_not_exists = catalog.IfNotExistsParam.validated(if_not_exists, 'if_not_exists')
692
- if _if_not_exists == catalog.IfNotExistsParam.IGNORE or force:
693
- _logger.info(f'Skipped directory `{path_str}` (does not exist).')
694
- return
695
- else:
696
- raise excs.Error(f'Directory `{path_str}` does not exist.')
697
-
698
- if not isinstance(obj, catalog.Dir):
699
- raise excs.Error(
700
- f'{str(path)} needs to be a {catalog.Dir._display_name()} but is a {type(obj)._display_name()}'
660
+ if_not_exists_ = catalog.IfNotExistsParam.validated(if_not_exists, 'if_not_exists')
661
+ with Env.get().begin_xact():
662
+ dir = cat.get_schema_object(
663
+ path,
664
+ expected=catalog.Dir,
665
+ raise_if_not_exists=if_not_exists_ == catalog.IfNotExistsParam.ERROR and not force,
701
666
  )
667
+ if dir is None:
668
+ _logger.info(f'Directory {path!r} does not exist, skipped drop_dir().')
669
+ return
670
+ _drop_dir(dir._id, path, force=force)
702
671
 
703
- children = cat.paths.get_children(path, child_type=None, recursive=True)
704
-
705
- if len(children) > 0 and not force:
706
- raise excs.Error(f'Directory `{path_str}` is not empty.')
707
-
708
- for child in children:
709
- assert isinstance(child, catalog.Path)
710
- # We need to check that the child is still in `cat.paths`, since it is possible it was
711
- # already deleted as a dependent of a preceding child in the iteration.
712
- try:
713
- obj = cat.paths[child]
714
- except excs.Error:
715
- continue
716
- if isinstance(obj, catalog.Dir):
717
- drop_dir(str(child), force=True)
718
- else:
719
- assert isinstance(obj, catalog.Table)
720
- assert not obj._is_dropped # else it should have been removed from `cat.paths` already
721
- drop_table(str(child), force=True)
722
672
 
723
- with Env.get().engine.begin() as conn:
724
- dir = Catalog.get().paths[path]
725
- conn.execute(sql.delete(schema.Dir.__table__).where(schema.Dir.id == dir._id))
726
- del Catalog.get().paths[path]
727
- _logger.info(f'Removed directory `{path_str}`.')
673
+ def _drop_dir(dir_id: UUID, path: str, force: bool = False) -> None:
674
+ cat = Catalog.get()
675
+ dir_entries = cat.get_dir_contents(dir_id, recursive=False)
676
+ if len(dir_entries) > 0 and not force:
677
+ raise excs.Error(f'Directory {path!r} is not empty.')
678
+ tbl_paths = [_join_path(path, entry.table.md['name']) for entry in dir_entries.values() if entry.table is not None]
679
+ dir_paths = [_join_path(path, entry.dir.md['name']) for entry in dir_entries.values() if entry.dir is not None]
680
+
681
+ for tbl_path in tbl_paths:
682
+ # check if the table still exists, it might be a view that already got force-deleted
683
+ if cat.get_schema_object(tbl_path, expected=catalog.Table, raise_if_not_exists=False) is not None:
684
+ drop_table(tbl_path, force=True)
685
+ for dir_path in dir_paths:
686
+ drop_dir(dir_path, force=True)
687
+ cat.drop_dir(dir_id)
688
+ _logger.info(f'Removed directory {path!r}.')
689
+
690
+
691
+ def _join_path(path: str, name: str) -> str:
692
+ """Append name to path, if path is not empty."""
693
+ return f'{path}.{name}' if path else name
694
+
695
+
696
+ def _extract_paths(
697
+ dir_entries: dict[str, Catalog.DirEntry], prefix: str, entry_type: Optional[type[catalog.SchemaObject]] = None
698
+ ) -> list[str]:
699
+ """Convert nested dir_entries structure to a flattened list of paths."""
700
+ matches: list[str]
701
+ if entry_type is None:
702
+ matches = list(dir_entries.keys())
703
+ elif entry_type is catalog.Dir:
704
+ matches = [name for name, entry in dir_entries.items() if entry.dir is not None]
705
+ else:
706
+ matches = [name for name, entry in dir_entries.items() if entry.table is not None]
707
+ result = [_join_path(prefix, name) for name in matches]
708
+ for name, entry in [(name, entry) for name, entry in dir_entries.items() if len(entry.dir_entries) > 0]:
709
+ result.extend(_extract_paths(entry.dir_entries, prefix=_join_path(prefix, name), entry_type=entry_type))
710
+ return result
728
711
 
729
712
 
730
713
  def publish_snapshot(dest_uri: str, table: catalog.Table) -> None:
@@ -751,9 +734,12 @@ def list_dirs(path_str: str = '', recursive: bool = True) -> list[str]:
751
734
  >>> cl.list_dirs('my_dir', recursive=True)
752
735
  ['my_dir', 'my_dir.sub_dir1']
753
736
  """
754
- path = catalog.Path(path_str, empty_is_valid=True)
755
- Catalog.get().paths.check_is_valid(path, expected=catalog.Dir)
756
- return [str(p) for p in Catalog.get().paths.get_children(path, child_type=catalog.Dir, recursive=recursive)]
737
+ _ = catalog.Path(path_str, empty_is_valid=True) # validate format
738
+ cat = Catalog.get()
739
+ with Env.get().begin_xact():
740
+ dir = cat.get_schema_object(path_str, expected=catalog.Dir, raise_if_not_exists=True)
741
+ contents = cat.get_dir_contents(dir._id, recursive=recursive)
742
+ return _extract_paths(contents, prefix=path_str, entry_type=catalog.Dir)
757
743
 
758
744
 
759
745
  def list_functions() -> Styler:
@@ -780,7 +766,7 @@ def list_functions() -> Styler:
780
766
  }
781
767
  )
782
768
  pd_df = pd_df.style.set_properties(None, **{'text-align': 'left'}).set_table_styles(
783
- [dict(selector='th', props=[('text-align', 'center')])]
769
+ [{'selector': 'th', 'props': [('text-align', 'center')]}]
784
770
  ) # center-align headings
785
771
  return pd_df.hide(axis='index')
786
772
 
@@ -1,3 +1,5 @@
1
+ # ruff: noqa: F401
2
+
1
3
  from .base import IndexBase
2
4
  from .btree import BtreeIndex
3
5
  from .embedding_index import EmbeddingIndex