pixeltable 0.3.14__py3-none-any.whl → 0.5.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (220) hide show
  1. pixeltable/__init__.py +42 -8
  2. pixeltable/{dataframe.py → _query.py} +470 -206
  3. pixeltable/_version.py +1 -0
  4. pixeltable/catalog/__init__.py +5 -4
  5. pixeltable/catalog/catalog.py +1785 -432
  6. pixeltable/catalog/column.py +190 -113
  7. pixeltable/catalog/dir.py +2 -4
  8. pixeltable/catalog/globals.py +19 -46
  9. pixeltable/catalog/insertable_table.py +191 -98
  10. pixeltable/catalog/path.py +63 -23
  11. pixeltable/catalog/schema_object.py +11 -15
  12. pixeltable/catalog/table.py +843 -436
  13. pixeltable/catalog/table_metadata.py +103 -0
  14. pixeltable/catalog/table_version.py +978 -657
  15. pixeltable/catalog/table_version_handle.py +72 -16
  16. pixeltable/catalog/table_version_path.py +112 -43
  17. pixeltable/catalog/tbl_ops.py +53 -0
  18. pixeltable/catalog/update_status.py +191 -0
  19. pixeltable/catalog/view.py +134 -90
  20. pixeltable/config.py +134 -22
  21. pixeltable/env.py +471 -157
  22. pixeltable/exceptions.py +6 -0
  23. pixeltable/exec/__init__.py +4 -1
  24. pixeltable/exec/aggregation_node.py +7 -8
  25. pixeltable/exec/cache_prefetch_node.py +83 -110
  26. pixeltable/exec/cell_materialization_node.py +268 -0
  27. pixeltable/exec/cell_reconstruction_node.py +168 -0
  28. pixeltable/exec/component_iteration_node.py +4 -3
  29. pixeltable/exec/data_row_batch.py +8 -65
  30. pixeltable/exec/exec_context.py +16 -4
  31. pixeltable/exec/exec_node.py +13 -36
  32. pixeltable/exec/expr_eval/evaluators.py +11 -7
  33. pixeltable/exec/expr_eval/expr_eval_node.py +27 -12
  34. pixeltable/exec/expr_eval/globals.py +8 -5
  35. pixeltable/exec/expr_eval/row_buffer.py +1 -2
  36. pixeltable/exec/expr_eval/schedulers.py +106 -56
  37. pixeltable/exec/globals.py +35 -0
  38. pixeltable/exec/in_memory_data_node.py +19 -19
  39. pixeltable/exec/object_store_save_node.py +293 -0
  40. pixeltable/exec/row_update_node.py +16 -9
  41. pixeltable/exec/sql_node.py +351 -84
  42. pixeltable/exprs/__init__.py +1 -1
  43. pixeltable/exprs/arithmetic_expr.py +27 -22
  44. pixeltable/exprs/array_slice.py +3 -3
  45. pixeltable/exprs/column_property_ref.py +36 -23
  46. pixeltable/exprs/column_ref.py +213 -89
  47. pixeltable/exprs/comparison.py +5 -5
  48. pixeltable/exprs/compound_predicate.py +5 -4
  49. pixeltable/exprs/data_row.py +164 -54
  50. pixeltable/exprs/expr.py +70 -44
  51. pixeltable/exprs/expr_dict.py +3 -3
  52. pixeltable/exprs/expr_set.py +17 -10
  53. pixeltable/exprs/function_call.py +100 -40
  54. pixeltable/exprs/globals.py +2 -2
  55. pixeltable/exprs/in_predicate.py +4 -4
  56. pixeltable/exprs/inline_expr.py +18 -32
  57. pixeltable/exprs/is_null.py +7 -3
  58. pixeltable/exprs/json_mapper.py +8 -8
  59. pixeltable/exprs/json_path.py +56 -22
  60. pixeltable/exprs/literal.py +27 -5
  61. pixeltable/exprs/method_ref.py +2 -2
  62. pixeltable/exprs/object_ref.py +2 -2
  63. pixeltable/exprs/row_builder.py +167 -67
  64. pixeltable/exprs/rowid_ref.py +25 -10
  65. pixeltable/exprs/similarity_expr.py +58 -40
  66. pixeltable/exprs/sql_element_cache.py +4 -4
  67. pixeltable/exprs/string_op.py +5 -5
  68. pixeltable/exprs/type_cast.py +3 -5
  69. pixeltable/func/__init__.py +1 -0
  70. pixeltable/func/aggregate_function.py +8 -8
  71. pixeltable/func/callable_function.py +9 -9
  72. pixeltable/func/expr_template_function.py +17 -11
  73. pixeltable/func/function.py +18 -20
  74. pixeltable/func/function_registry.py +6 -7
  75. pixeltable/func/globals.py +2 -3
  76. pixeltable/func/mcp.py +74 -0
  77. pixeltable/func/query_template_function.py +29 -27
  78. pixeltable/func/signature.py +46 -19
  79. pixeltable/func/tools.py +31 -13
  80. pixeltable/func/udf.py +18 -20
  81. pixeltable/functions/__init__.py +16 -0
  82. pixeltable/functions/anthropic.py +123 -77
  83. pixeltable/functions/audio.py +147 -10
  84. pixeltable/functions/bedrock.py +13 -6
  85. pixeltable/functions/date.py +7 -4
  86. pixeltable/functions/deepseek.py +35 -43
  87. pixeltable/functions/document.py +81 -0
  88. pixeltable/functions/fal.py +76 -0
  89. pixeltable/functions/fireworks.py +11 -20
  90. pixeltable/functions/gemini.py +195 -39
  91. pixeltable/functions/globals.py +142 -14
  92. pixeltable/functions/groq.py +108 -0
  93. pixeltable/functions/huggingface.py +1056 -24
  94. pixeltable/functions/image.py +115 -57
  95. pixeltable/functions/json.py +1 -1
  96. pixeltable/functions/llama_cpp.py +28 -13
  97. pixeltable/functions/math.py +67 -5
  98. pixeltable/functions/mistralai.py +18 -55
  99. pixeltable/functions/net.py +70 -0
  100. pixeltable/functions/ollama.py +20 -13
  101. pixeltable/functions/openai.py +240 -226
  102. pixeltable/functions/openrouter.py +143 -0
  103. pixeltable/functions/replicate.py +4 -4
  104. pixeltable/functions/reve.py +250 -0
  105. pixeltable/functions/string.py +239 -69
  106. pixeltable/functions/timestamp.py +16 -16
  107. pixeltable/functions/together.py +24 -84
  108. pixeltable/functions/twelvelabs.py +188 -0
  109. pixeltable/functions/util.py +6 -1
  110. pixeltable/functions/uuid.py +30 -0
  111. pixeltable/functions/video.py +1515 -107
  112. pixeltable/functions/vision.py +8 -8
  113. pixeltable/functions/voyageai.py +289 -0
  114. pixeltable/functions/whisper.py +16 -8
  115. pixeltable/functions/whisperx.py +179 -0
  116. pixeltable/{ext/functions → functions}/yolox.py +2 -4
  117. pixeltable/globals.py +362 -115
  118. pixeltable/index/base.py +17 -21
  119. pixeltable/index/btree.py +28 -22
  120. pixeltable/index/embedding_index.py +100 -118
  121. pixeltable/io/__init__.py +4 -2
  122. pixeltable/io/datarows.py +8 -7
  123. pixeltable/io/external_store.py +56 -105
  124. pixeltable/io/fiftyone.py +13 -13
  125. pixeltable/io/globals.py +31 -30
  126. pixeltable/io/hf_datasets.py +61 -16
  127. pixeltable/io/label_studio.py +74 -70
  128. pixeltable/io/lancedb.py +3 -0
  129. pixeltable/io/pandas.py +21 -12
  130. pixeltable/io/parquet.py +25 -105
  131. pixeltable/io/table_data_conduit.py +250 -123
  132. pixeltable/io/utils.py +4 -4
  133. pixeltable/iterators/__init__.py +2 -1
  134. pixeltable/iterators/audio.py +26 -25
  135. pixeltable/iterators/base.py +9 -3
  136. pixeltable/iterators/document.py +112 -78
  137. pixeltable/iterators/image.py +12 -15
  138. pixeltable/iterators/string.py +11 -4
  139. pixeltable/iterators/video.py +523 -120
  140. pixeltable/metadata/__init__.py +14 -3
  141. pixeltable/metadata/converters/convert_13.py +2 -2
  142. pixeltable/metadata/converters/convert_18.py +2 -2
  143. pixeltable/metadata/converters/convert_19.py +2 -2
  144. pixeltable/metadata/converters/convert_20.py +2 -2
  145. pixeltable/metadata/converters/convert_21.py +2 -2
  146. pixeltable/metadata/converters/convert_22.py +2 -2
  147. pixeltable/metadata/converters/convert_24.py +2 -2
  148. pixeltable/metadata/converters/convert_25.py +2 -2
  149. pixeltable/metadata/converters/convert_26.py +2 -2
  150. pixeltable/metadata/converters/convert_29.py +4 -4
  151. pixeltable/metadata/converters/convert_30.py +34 -21
  152. pixeltable/metadata/converters/convert_34.py +2 -2
  153. pixeltable/metadata/converters/convert_35.py +9 -0
  154. pixeltable/metadata/converters/convert_36.py +38 -0
  155. pixeltable/metadata/converters/convert_37.py +15 -0
  156. pixeltable/metadata/converters/convert_38.py +39 -0
  157. pixeltable/metadata/converters/convert_39.py +124 -0
  158. pixeltable/metadata/converters/convert_40.py +73 -0
  159. pixeltable/metadata/converters/convert_41.py +12 -0
  160. pixeltable/metadata/converters/convert_42.py +9 -0
  161. pixeltable/metadata/converters/convert_43.py +44 -0
  162. pixeltable/metadata/converters/util.py +20 -31
  163. pixeltable/metadata/notes.py +9 -0
  164. pixeltable/metadata/schema.py +140 -53
  165. pixeltable/metadata/utils.py +74 -0
  166. pixeltable/mypy/__init__.py +3 -0
  167. pixeltable/mypy/mypy_plugin.py +123 -0
  168. pixeltable/plan.py +382 -115
  169. pixeltable/share/__init__.py +1 -1
  170. pixeltable/share/packager.py +547 -83
  171. pixeltable/share/protocol/__init__.py +33 -0
  172. pixeltable/share/protocol/common.py +165 -0
  173. pixeltable/share/protocol/operation_types.py +33 -0
  174. pixeltable/share/protocol/replica.py +119 -0
  175. pixeltable/share/publish.py +257 -59
  176. pixeltable/store.py +311 -194
  177. pixeltable/type_system.py +373 -211
  178. pixeltable/utils/__init__.py +2 -3
  179. pixeltable/utils/arrow.py +131 -17
  180. pixeltable/utils/av.py +298 -0
  181. pixeltable/utils/azure_store.py +346 -0
  182. pixeltable/utils/coco.py +6 -6
  183. pixeltable/utils/code.py +3 -3
  184. pixeltable/utils/console_output.py +4 -1
  185. pixeltable/utils/coroutine.py +6 -23
  186. pixeltable/utils/dbms.py +32 -6
  187. pixeltable/utils/description_helper.py +4 -5
  188. pixeltable/utils/documents.py +7 -18
  189. pixeltable/utils/exception_handler.py +7 -30
  190. pixeltable/utils/filecache.py +6 -6
  191. pixeltable/utils/formatter.py +86 -48
  192. pixeltable/utils/gcs_store.py +295 -0
  193. pixeltable/utils/http.py +133 -0
  194. pixeltable/utils/http_server.py +2 -3
  195. pixeltable/utils/iceberg.py +1 -2
  196. pixeltable/utils/image.py +17 -0
  197. pixeltable/utils/lancedb.py +90 -0
  198. pixeltable/utils/local_store.py +322 -0
  199. pixeltable/utils/misc.py +5 -0
  200. pixeltable/utils/object_stores.py +573 -0
  201. pixeltable/utils/pydantic.py +60 -0
  202. pixeltable/utils/pytorch.py +5 -6
  203. pixeltable/utils/s3_store.py +527 -0
  204. pixeltable/utils/sql.py +26 -0
  205. pixeltable/utils/system.py +30 -0
  206. pixeltable-0.5.7.dist-info/METADATA +579 -0
  207. pixeltable-0.5.7.dist-info/RECORD +227 -0
  208. {pixeltable-0.3.14.dist-info → pixeltable-0.5.7.dist-info}/WHEEL +1 -1
  209. pixeltable-0.5.7.dist-info/entry_points.txt +2 -0
  210. pixeltable/__version__.py +0 -3
  211. pixeltable/catalog/named_function.py +0 -40
  212. pixeltable/ext/__init__.py +0 -17
  213. pixeltable/ext/functions/__init__.py +0 -11
  214. pixeltable/ext/functions/whisperx.py +0 -77
  215. pixeltable/utils/media_store.py +0 -77
  216. pixeltable/utils/s3.py +0 -17
  217. pixeltable-0.3.14.dist-info/METADATA +0 -434
  218. pixeltable-0.3.14.dist-info/RECORD +0 -186
  219. pixeltable-0.3.14.dist-info/entry_points.txt +0 -3
  220. {pixeltable-0.3.14.dist-info → pixeltable-0.5.7.dist-info/licenses}/LICENSE +0 -0
pixeltable/globals.py CHANGED
@@ -3,15 +3,18 @@ from __future__ import annotations
3
3
  import logging
4
4
  import os
5
5
  from pathlib import Path
6
- from typing import TYPE_CHECKING, Any, Iterable, Iterator, Literal, Optional, Union
6
+ from typing import TYPE_CHECKING, Any, Iterable, Literal, TypedDict, Union
7
7
 
8
8
  import pandas as pd
9
+ import pydantic
9
10
  from pandas.io.formats.style import Styler
10
11
 
11
- from pixeltable import DataFrame, catalog, exceptions as excs, exprs, func, share
12
+ from pixeltable import Query, catalog, exceptions as excs, exprs, func, share, type_system as ts
12
13
  from pixeltable.catalog import Catalog, TableVersionPath
13
14
  from pixeltable.catalog.insertable_table import OnErrorParameter
15
+ from pixeltable.config import Config
14
16
  from pixeltable.env import Env
17
+ from pixeltable.io.table_data_conduit import QueryTableDataConduit, TableDataConduit
15
18
  from pixeltable.iterators import ComponentIterator
16
19
 
17
20
  if TYPE_CHECKING:
@@ -22,46 +25,63 @@ if TYPE_CHECKING:
22
25
  str,
23
26
  os.PathLike,
24
27
  Path, # OS paths, filenames, URLs
25
- Iterator[dict[str, Any]], # iterator producing dictionaries of values
26
- RowData, # list of dictionaries
27
- DataFrame, # Pixeltable DataFrame
28
+ Iterable[dict[str, Any]], # dictionaries of values
29
+ Iterable[pydantic.BaseModel], # Pydantic model instances
30
+ catalog.Table, # Pixeltable Table
31
+ Query, # Pixeltable Query
28
32
  pd.DataFrame, # pandas DataFrame
29
- 'datasets.Dataset',
30
- 'datasets.DatasetDict', # Huggingface datasets
33
+ datasets.Dataset,
34
+ datasets.DatasetDict, # Huggingface datasets
31
35
  ]
32
36
 
33
37
 
34
38
  _logger = logging.getLogger('pixeltable')
35
39
 
36
40
 
37
- def init() -> None:
41
+ def init(config_overrides: dict[str, Any] | None = None) -> None:
38
42
  """Initializes the Pixeltable environment."""
43
+ if config_overrides is None:
44
+ config_overrides = {}
45
+ Config.init(config_overrides)
39
46
  _ = Catalog.get()
40
47
 
41
48
 
42
49
  def create_table(
43
- path_str: str,
44
- schema: Optional[dict[str, Any]] = None,
50
+ path: str,
51
+ schema: dict[str, Any] | None = None,
45
52
  *,
46
- source: Optional[TableDataSource] = None,
47
- source_format: Optional[Literal['csv', 'excel', 'parquet', 'json']] = None,
48
- schema_overrides: Optional[dict[str, Any]] = None,
53
+ source: TableDataSource | None = None,
54
+ source_format: Literal['csv', 'excel', 'parquet', 'json'] | None = None,
55
+ schema_overrides: dict[str, Any] | None = None,
56
+ create_default_idxs: bool = True,
49
57
  on_error: Literal['abort', 'ignore'] = 'abort',
50
- primary_key: Optional[Union[str, list[str]]] = None,
58
+ primary_key: str | list[str] | None = None,
51
59
  num_retained_versions: int = 10,
52
60
  comment: str = '',
53
61
  media_validation: Literal['on_read', 'on_write'] = 'on_write',
54
62
  if_exists: Literal['error', 'ignore', 'replace', 'replace_force'] = 'error',
55
- extra_args: Optional[dict[str, Any]] = None, # Additional arguments to data source provider
63
+ extra_args: dict[str, Any] | None = None, # Additional arguments to data source provider
56
64
  ) -> catalog.Table:
57
- """Create a new base table.
65
+ """Create a new base table. Exactly one of `schema` or `source` must be provided.
66
+
67
+ If a `schema` is provided, then an empty table will be created with the specified schema.
68
+
69
+ If a `source` is provided, then Pixeltable will attempt to infer a data source format and table schema from the
70
+ contents of the specified data, and the data will be imported from the specified source into the new table. The
71
+ source format and/or schema can be specified directly via the `source_format` and `schema_overrides` parameters.
58
72
 
59
73
  Args:
60
- path_str: Path to the table.
61
- schema: A dictionary that maps column names to column types
62
- source: A data source from which a table schema can be inferred and data imported
63
- source_format: A hint to the format of the source data
64
- schema_overrides: If specified, then columns in `schema_overrides` will be given the specified types
74
+ path: Pixeltable path (qualified name) of the table, such as `'my_table'` or `'my_dir.my_subdir.my_table'`.
75
+ schema: Schema for the new table, mapping column names to Pixeltable types.
76
+ source: A data source (file, URL, Table, Query, or list of rows) to import from.
77
+ source_format: Must be used in conjunction with a `source`.
78
+ If specified, then the given format will be used to read the source data. (Otherwise,
79
+ Pixeltable will attempt to infer the format from the source data.)
80
+ schema_overrides: Must be used in conjunction with a `source`.
81
+ If specified, then columns in `schema_overrides` will be given the specified types.
82
+ (Pixeltable will attempt to infer the types of any columns not specified.)
83
+ create_default_idxs: If True, creates a B-tree index on every scalar and media column that is not computed,
84
+ except for boolean columns.
65
85
  on_error: Determines the behavior if an error occurs while evaluating a computed column or detecting an
66
86
  invalid media file (such as a corrupt image) for one of the inserted rows.
67
87
 
@@ -77,14 +97,15 @@ def create_table(
77
97
 
78
98
  - `'on_read'`: validate media files at query time
79
99
  - `'on_write'`: validate media files during insert/update operations
80
- if_exists: Directive regarding how to handle if the path already exists.
81
- Must be one of the following:
100
+ if_exists: Determines the behavior if a table already exists at the specified path location.
82
101
 
83
102
  - `'error'`: raise an error
84
103
  - `'ignore'`: do nothing and return the existing table handle
85
- - `'replace'`: if the existing table has no views, drop and replace it with a new one
86
- - `'replace_force'`: drop the existing table and all its views, and create a new one
87
- extra_args: Additional arguments to pass to the source data provider
104
+ - `'replace'`: if the existing table has no views or snapshots, drop and replace it with a new one;
105
+ raise an error if the existing table has views or snapshots
106
+ - `'replace_force'`: drop the existing table and all its views and snapshots, and create a new one
107
+ extra_args: Must be used in conjunction with a `source`. If specified, then additional arguments will be
108
+ passed along to the source data provider.
88
109
 
89
110
  Returns:
90
111
  A handle to the newly created table, or to an already existing table at the path when `if_exists='ignore'`.
@@ -110,7 +131,7 @@ def create_table(
110
131
  >>> tbl1 = pxt.get_table('orig_table')
111
132
  ... tbl2 = pxt.create_table('new_table', tbl1.where(tbl1.col1 < 10).select(tbl1.col2))
112
133
 
113
- Create a table if does not already exist, otherwise get the existing table:
134
+ Create a table if it does not already exist, otherwise get the existing table:
114
135
 
115
136
  >>> tbl = pxt.create_table('my_table', schema={'col1': pxt.Int, 'col2': pxt.String}, if_exists='ignore')
116
137
 
@@ -121,82 +142,112 @@ def create_table(
121
142
  Create a table from a CSV file:
122
143
 
123
144
  >>> tbl = pxt.create_table('my_table', source='data.csv')
145
+
146
+ Create a table with an auto-generated UUID primary key:
147
+
148
+ >>> tbl = pxt.create_table(
149
+ ... 'my_table',
150
+ ... schema={'id': pxt.functions.uuid.uuid4(), 'data': pxt.String},
151
+ ... primary_key=['id']
152
+ ... )
124
153
  """
125
- from pixeltable.io.table_data_conduit import DFTableDataConduit, UnkTableDataConduit
154
+ from pixeltable.io.table_data_conduit import UnkTableDataConduit
126
155
  from pixeltable.io.utils import normalize_primary_key_parameter
127
156
 
128
157
  if (schema is None) == (source is None):
129
- raise excs.Error('Must provide either a `schema` or a `source`')
158
+ raise excs.Error('Either a `schema` or a `source` must be provided (but not both)')
130
159
 
131
160
  if schema is not None and (len(schema) == 0 or not isinstance(schema, dict)):
132
161
  raise excs.Error('`schema` must be a non-empty dictionary')
133
162
 
134
- path_obj = catalog.Path(path_str)
163
+ path_obj = catalog.Path.parse(path)
135
164
  if_exists_ = catalog.IfExistsParam.validated(if_exists, 'if_exists')
136
165
  media_validation_ = catalog.MediaValidation.validated(media_validation, 'media_validation')
137
- primary_key: Optional[list[str]] = normalize_primary_key_parameter(primary_key)
138
- table: catalog.Table = None
139
- tds = None
140
- data_source = None
166
+ primary_key: list[str] | None = normalize_primary_key_parameter(primary_key)
167
+ data_source: TableDataConduit | None = None
141
168
  if source is not None:
169
+ if isinstance(source, str) and source.strip().startswith('pxt://'):
170
+ raise excs.Error(
171
+ 'create_table(): Creating a table directly from a cloud URI is not supported.'
172
+ ' Please replicate the table locally first using `pxt.replicate()`:\n'
173
+ "replica_tbl = pxt.replicate('pxt://path/to/remote_table', 'local_replica_name')\n"
174
+ "pxt.create_table('new_table_name', source=replica_tbl)"
175
+ )
142
176
  tds = UnkTableDataConduit(source, source_format=source_format, extra_fields=extra_args)
143
177
  tds.check_source_format()
144
178
  data_source = tds.specialize()
145
- data_source.src_schema_overrides = schema_overrides
179
+ src_schema_overrides: dict[str, ts.ColumnType] = {}
180
+ if schema_overrides is not None:
181
+ for col_name, py_type in schema_overrides.items():
182
+ col_type = ts.ColumnType.normalize_type(py_type, nullable_default=True, allow_builtin_types=False)
183
+ if col_type is None:
184
+ raise excs.Error(f'Invalid type for column {col_name!r} in `schema_overrides`: {py_type}')
185
+ src_schema_overrides[col_name] = col_type
186
+ data_source.src_schema_overrides = src_schema_overrides
146
187
  data_source.src_pk = primary_key
147
188
  data_source.infer_schema()
148
189
  schema = data_source.pxt_schema
149
190
  primary_key = data_source.pxt_pk
150
- is_direct_df = data_source.is_direct_df()
191
+ is_direct_query = data_source.is_direct_query()
151
192
  else:
152
- is_direct_df = False
193
+ is_direct_query = False
153
194
 
154
195
  if len(schema) == 0 or not isinstance(schema, dict):
155
196
  raise excs.Error(
156
197
  'Unable to create a proper schema from supplied `source`. Please use appropriate `schema_overrides`.'
157
198
  )
158
199
 
159
- table = Catalog.get().create_table(
200
+ tbl, was_created = Catalog.get().create_table(
160
201
  path_obj,
161
202
  schema,
162
- data_source.pxt_df if isinstance(data_source, DFTableDataConduit) else None,
163
203
  if_exists=if_exists_,
164
204
  primary_key=primary_key,
165
205
  comment=comment,
166
206
  media_validation=media_validation_,
167
207
  num_retained_versions=num_retained_versions,
208
+ create_default_idxs=create_default_idxs,
168
209
  )
169
- if data_source is not None and not is_direct_df:
210
+
211
+ # TODO: combine data loading with table creation into a single transaction
212
+ if was_created:
170
213
  fail_on_exception = OnErrorParameter.fail_on_exception(on_error)
171
- table.insert_table_data_source(data_source=data_source, fail_on_exception=fail_on_exception)
214
+ if isinstance(data_source, QueryTableDataConduit):
215
+ query = data_source.pxt_query
216
+ with Catalog.get().begin_xact(tbl=tbl._tbl_version_path, for_write=True, lock_mutable_tree=True):
217
+ tbl._tbl_version.get().insert(None, query, fail_on_exception=fail_on_exception)
218
+ elif data_source is not None and not is_direct_query:
219
+ tbl.insert_table_data_source(data_source=data_source, fail_on_exception=fail_on_exception)
172
220
 
173
- return table
221
+ return tbl
174
222
 
175
223
 
176
224
  def create_view(
177
225
  path: str,
178
- base: Union[catalog.Table, DataFrame],
226
+ base: catalog.Table | Query,
179
227
  *,
180
- additional_columns: Optional[dict[str, Any]] = None,
228
+ additional_columns: dict[str, Any] | None = None,
181
229
  is_snapshot: bool = False,
182
- iterator: Optional[tuple[type[ComponentIterator], dict[str, Any]]] = None,
230
+ create_default_idxs: bool = False,
231
+ iterator: tuple[type[ComponentIterator], dict[str, Any]] | None = None,
183
232
  num_retained_versions: int = 10,
184
233
  comment: str = '',
185
234
  media_validation: Literal['on_read', 'on_write'] = 'on_write',
186
235
  if_exists: Literal['error', 'ignore', 'replace', 'replace_force'] = 'error',
187
- ) -> Optional[catalog.Table]:
236
+ ) -> catalog.Table | None:
188
237
  """Create a view of an existing table object (which itself can be a view or a snapshot or a base table).
189
238
 
190
239
  Args:
191
240
  path: A name for the view; can be either a simple name such as `my_view`, or a pathname such as
192
241
  `dir1.my_view`.
193
- base: [`Table`][pixeltable.Table] (i.e., table or view or snapshot) or [`DataFrame`][pixeltable.DataFrame] to
242
+ base: [`Table`][pixeltable.Table] (i.e., table or view or snapshot) or [`Query`][pixeltable.Query] to
194
243
  base the view on.
195
244
  additional_columns: If specified, will add these columns to the view once it is created. The format
196
- of the `additional_columns` parameter is identical to the format of the `schema_or_df` parameter in
245
+ of the `additional_columns` parameter is identical to the format of the `schema` parameter in
197
246
  [`create_table`][pixeltable.create_table].
198
247
  is_snapshot: Whether the view is a snapshot. Setting this to `True` is equivalent to calling
199
248
  [`create_snapshot`][pixeltable.create_snapshot].
249
+ create_default_idxs: Whether to create default indexes on the view's columns (the base's columns are excluded).
250
+ Cannot be `True` for snapshots.
200
251
  iterator: The iterator to use for this view. If specified, then this view will be a one-to-many view of
201
252
  the base table.
202
253
  num_retained_versions: Number of versions of the view to retain.
@@ -244,23 +295,30 @@ def create_view(
244
295
  >>> tbl = pxt.get_table('my_table')
245
296
  ... view = pxt.create_view('my_view', tbl.where(tbl.col1 > 100), if_exists='replace_force')
246
297
  """
298
+ if is_snapshot and create_default_idxs is True:
299
+ raise excs.Error('Cannot create default indexes on a snapshot')
247
300
  tbl_version_path: TableVersionPath
248
- select_list: Optional[list[tuple[exprs.Expr, Optional[str]]]] = None
249
- where: Optional[exprs.Expr] = None
301
+ select_list: list[tuple[exprs.Expr, str | None]] | None = None
302
+ where: exprs.Expr | None = None
250
303
  if isinstance(base, catalog.Table):
251
304
  tbl_version_path = base._tbl_version_path
252
- elif isinstance(base, DataFrame):
253
- base._validate_mutable('create_view', allow_select=True)
254
- if len(base._from_clause.tbls) > 1:
255
- raise excs.Error('Cannot create a view of a join')
305
+ sample_clause = None
306
+ elif isinstance(base, Query):
307
+ base._validate_mutable_op_sequence('create_view', allow_select=True)
256
308
  tbl_version_path = base._from_clause.tbls[0]
257
309
  where = base.where_clause
310
+ sample_clause = base.sample_clause
258
311
  select_list = base.select_list
312
+ if sample_clause is not None and not is_snapshot and not sample_clause.is_repeatable:
313
+ raise excs.Error('Non-snapshot views cannot be created with non-fractional or stratified sampling')
259
314
  else:
260
- raise excs.Error('`base` must be an instance of `Table` or `DataFrame`')
261
- assert isinstance(base, (catalog.Table, DataFrame))
315
+ raise excs.Error('`base` must be an instance of `Table` or `Query`')
316
+ assert isinstance(base, (catalog.Table, Query))
317
+
318
+ if tbl_version_path.is_replica():
319
+ raise excs.Error('Cannot create a view or snapshot on top of a replica')
262
320
 
263
- path_obj = catalog.Path(path)
321
+ path_obj = catalog.Path.parse(path)
264
322
  if_exists_ = catalog.IfExistsParam.validated(if_exists, 'if_exists')
265
323
  media_validation_ = catalog.MediaValidation.validated(media_validation, 'media_validation')
266
324
 
@@ -272,7 +330,7 @@ def create_view(
272
330
  if col_name in [c.name for c in tbl_version_path.columns()]:
273
331
  raise excs.Error(
274
332
  f'Column {col_name!r} already exists in the base table '
275
- f'{tbl_version_path.get_column(col_name).tbl.get().name}.'
333
+ f'{tbl_version_path.get_column(col_name).get_tbl().name}.'
276
334
  )
277
335
 
278
336
  return Catalog.get().create_view(
@@ -280,8 +338,10 @@ def create_view(
280
338
  tbl_version_path,
281
339
  select_list=select_list,
282
340
  where=where,
341
+ sample_clause=sample_clause,
283
342
  additional_columns=additional_columns,
284
343
  is_snapshot=is_snapshot,
344
+ create_default_idxs=create_default_idxs,
285
345
  iterator=iterator,
286
346
  num_retained_versions=num_retained_versions,
287
347
  comment=comment,
@@ -292,24 +352,24 @@ def create_view(
292
352
 
293
353
  def create_snapshot(
294
354
  path_str: str,
295
- base: Union[catalog.Table, DataFrame],
355
+ base: catalog.Table | Query,
296
356
  *,
297
- additional_columns: Optional[dict[str, Any]] = None,
298
- iterator: Optional[tuple[type[ComponentIterator], dict[str, Any]]] = None,
357
+ additional_columns: dict[str, Any] | None = None,
358
+ iterator: tuple[type[ComponentIterator], dict[str, Any]] | None = None,
299
359
  num_retained_versions: int = 10,
300
360
  comment: str = '',
301
361
  media_validation: Literal['on_read', 'on_write'] = 'on_write',
302
362
  if_exists: Literal['error', 'ignore', 'replace', 'replace_force'] = 'error',
303
- ) -> Optional[catalog.Table]:
363
+ ) -> catalog.Table | None:
304
364
  """Create a snapshot of an existing table object (which itself can be a view or a snapshot or a base table).
305
365
 
306
366
  Args:
307
367
  path_str: A name for the snapshot; can be either a simple name such as `my_snapshot`, or a pathname such as
308
368
  `dir1.my_snapshot`.
309
- base: [`Table`][pixeltable.Table] (i.e., table or view or snapshot) or [`DataFrame`][pixeltable.DataFrame] to
369
+ base: [`Table`][pixeltable.Table] (i.e., table or view or snapshot) or [`Query`][pixeltable.Query] to
310
370
  base the snapshot on.
311
371
  additional_columns: If specified, will add these columns to the snapshot once it is created. The format
312
- of the `additional_columns` parameter is identical to the format of the `schema_or_df` parameter in
372
+ of the `additional_columns` parameter is identical to the format of the `schema` parameter in
313
373
  [`create_table`][pixeltable.create_table].
314
374
  iterator: The iterator to use for this snapshot. If specified, then this snapshot will be a one-to-many view of
315
375
  the base table.
@@ -371,36 +431,67 @@ def create_snapshot(
371
431
  )
372
432
 
373
433
 
374
- def create_replica(destination: str, source: Union[str, catalog.Table]) -> Optional[catalog.Table]:
434
+ def publish(
435
+ source: str | catalog.Table,
436
+ destination_uri: str,
437
+ bucket_name: str | None = None,
438
+ access: Literal['public', 'private'] = 'private',
439
+ ) -> None:
375
440
  """
376
- Create a replica of a table. Can be used either to create a remote replica of a local table, or to create a local
377
- replica of a remote table. A given table can have at most one replica per Pixeltable instance.
441
+ Publishes a replica of a local Pixeltable table to Pixeltable cloud. A given table can be published to at most one
442
+ URI per Pixeltable cloud database.
378
443
 
379
444
  Args:
380
- destination: Path where the replica will be created. Can be either a local path such as `'my_dir.my_table'`, or
381
- a remote URI such as `'pxt://username/mydir.my_table'`.
382
- source: Path to the source table, or (if the source table is a local table) a handle to the source table.
445
+ source: Path or table handle of the local table to be published.
446
+ destination_uri: Remote URI where the replica will be published, such as `'pxt://org_name/my_dir/my_table'`.
447
+ bucket_name: The name of the bucket to use to store replica's data. The bucket must be registered with
448
+ Pixeltable cloud. If no `bucket_name` is provided, the default storage bucket for the destination
449
+ database will be used.
450
+ access: Access control for the replica.
451
+
452
+ - `'public'`: Anyone can access this replica.
453
+ - `'private'`: Only the host organization can access.
383
454
  """
384
- remote_dest = destination.startswith('pxt://')
385
- remote_source = isinstance(source, str) and source.startswith('pxt://')
386
- if remote_dest == remote_source:
387
- raise excs.Error('Exactly one of `destination` or `source` must be a remote URI.')
388
-
389
- if remote_dest:
390
- if isinstance(source, str):
391
- source = get_table(source)
392
- share.push_replica(destination, source)
393
- return None
394
- else:
395
- assert isinstance(source, str)
396
- return share.pull_replica(destination, source)
455
+ if not destination_uri.startswith('pxt://'):
456
+ raise excs.Error("`destination_uri` must be a remote Pixeltable URI with the prefix 'pxt://'")
457
+
458
+ if isinstance(source, str):
459
+ source = get_table(source)
460
+
461
+ share.push_replica(destination_uri, source, bucket_name, access)
462
+
463
+
464
+ def replicate(remote_uri: str, local_path: str) -> catalog.Table:
465
+ """
466
+ Retrieve a replica from Pixeltable cloud as a local table. This will create a full local copy of the replica in a
467
+ way that preserves the table structure of the original source data. Once replicated, the local table can be
468
+ queried offline just as any other Pixeltable table.
469
+
470
+ Args:
471
+ remote_uri: Remote URI of the table to be replicated, such as `'pxt://org_name/my_dir/my_table'` or
472
+ `'pxt://org_name/my_dir/my_table:5'` (with version 5).
473
+ local_path: Local table path where the replica will be created, such as `'my_new_dir.my_new_tbl'`. It can be
474
+ the same or different from the cloud table name.
475
+
476
+ Returns:
477
+ A handle to the newly created local replica table.
478
+ """
479
+ if not remote_uri.startswith('pxt://'):
480
+ raise excs.Error("`remote_uri` must be a remote Pixeltable URI with the prefix 'pxt://'")
481
+
482
+ return share.pull_replica(local_path, remote_uri)
397
483
 
398
484
 
399
- def get_table(path: str) -> catalog.Table:
485
+ def get_table(path: str, if_not_exists: Literal['error', 'ignore'] = 'error') -> catalog.Table | None:
400
486
  """Get a handle to an existing table, view, or snapshot.
401
487
 
402
488
  Args:
403
489
  path: Path to the table.
490
+ if_not_exists: Directive regarding how to handle if the path does not exist.
491
+ Must be one of the following:
492
+
493
+ - `'error'`: raise an error
494
+ - `'ignore'`: do nothing and return `None`
404
495
 
405
496
  Returns:
406
497
  A handle to the [`Table`][pixeltable.Table].
@@ -420,17 +511,39 @@ def get_table(path: str) -> catalog.Table:
420
511
  Handles to views and snapshots are retrieved in the same way:
421
512
 
422
513
  >>> tbl = pxt.get_table('my_snapshot')
514
+
515
+ Get a handle to a specific version of a table:
516
+
517
+ >>> tbl = pxt.get_table('my_table:722')
423
518
  """
424
- path_obj = catalog.Path(path)
425
- return Catalog.get().get_table(path_obj)
519
+ if_not_exists_ = catalog.IfNotExistsParam.validated(if_not_exists, 'if_not_exists')
520
+ path_obj = catalog.Path.parse(path, allow_versioned_path=True)
521
+ tbl = Catalog.get().get_table(path_obj, if_not_exists_)
522
+ return tbl
426
523
 
427
524
 
428
- def move(path: str, new_path: str) -> None:
525
+ def move(
526
+ path: str,
527
+ new_path: str,
528
+ *,
529
+ if_exists: Literal['error', 'ignore'] = 'error',
530
+ if_not_exists: Literal['error', 'ignore'] = 'error',
531
+ ) -> None:
429
532
  """Move a schema object to a new directory and/or rename a schema object.
430
533
 
431
534
  Args:
432
535
  path: absolute path to the existing schema object.
433
536
  new_path: absolute new path for the schema object.
537
+ if_exists: Directive regarding how to handle if a schema object already exists at the new path.
538
+ Must be one of the following:
539
+
540
+ - `'error'`: raise an error
541
+ - `'ignore'`: do nothing and return
542
+ if_not_exists: Directive regarding how to handle if the source path does not exist.
543
+ Must be one of the following:
544
+
545
+ - `'error'`: raise an error
546
+ - `'ignore'`: do nothing and return
434
547
 
435
548
  Raises:
436
549
  Error: If path does not exist or new_path already exists.
@@ -444,22 +557,26 @@ def move(path: str, new_path: str) -> None:
444
557
 
445
558
  >>>> pxt.move('dir1.my_table', 'dir1.new_name')
446
559
  """
560
+ if_exists_ = catalog.IfExistsParam.validated(if_exists, 'if_exists')
561
+ if if_exists_ not in (catalog.IfExistsParam.ERROR, catalog.IfExistsParam.IGNORE):
562
+ raise excs.Error("`if_exists` must be one of 'error' or 'ignore'")
563
+ if_not_exists_ = catalog.IfNotExistsParam.validated(if_not_exists, 'if_not_exists')
447
564
  if path == new_path:
448
565
  raise excs.Error('move(): source and destination cannot be identical')
449
- path_obj, new_path_obj = catalog.Path(path), catalog.Path(new_path)
566
+ path_obj, new_path_obj = catalog.Path.parse(path), catalog.Path.parse(new_path)
450
567
  if path_obj.is_ancestor(new_path_obj):
451
568
  raise excs.Error(f'move(): cannot move {path!r} into its own subdirectory')
452
- cat = Catalog.get()
453
- cat.move(path_obj, new_path_obj)
569
+ Catalog.get().move(path_obj, new_path_obj, if_exists_, if_not_exists_)
454
570
 
455
571
 
456
572
  def drop_table(
457
- table: Union[str, catalog.Table], force: bool = False, if_not_exists: Literal['error', 'ignore'] = 'error'
573
+ table: str | catalog.Table, force: bool = False, if_not_exists: Literal['error', 'ignore'] = 'error'
458
574
  ) -> None:
459
- """Drop a table, view, or snapshot.
575
+ """Drop a table, view, snapshot, or replica.
460
576
 
461
577
  Args:
462
- table: Fully qualified name, or handle, of the table to be dropped.
578
+ table: Fully qualified name or table handle of the table to be dropped; or a remote URI of a cloud replica to
579
+ be deleted.
463
580
  force: If `True`, will also drop all views and sub-views of this table.
464
581
  if_not_exists: Directive regarding how to handle if the path does not exist.
465
582
  Must be one of the following:
@@ -493,15 +610,75 @@ def drop_table(
493
610
  if isinstance(table, catalog.Table):
494
611
  # if we're dropping a table by handle, we first need to get the current path, then drop the S lock on
495
612
  # the Table record, and then get X locks in the correct order (first containing directory, then table)
496
- with Env.get().begin_xact():
497
- tbl_path = table._path
613
+ with Catalog.get().begin_xact(for_write=False):
614
+ tbl_path = table._path()
498
615
  else:
499
616
  assert isinstance(table, str)
500
617
  tbl_path = table
501
618
 
502
- path_obj = catalog.Path(tbl_path)
503
619
  if_not_exists_ = catalog.IfNotExistsParam.validated(if_not_exists, 'if_not_exists')
504
- Catalog.get().drop_table(path_obj, force=force, if_not_exists=if_not_exists_)
620
+
621
+ if tbl_path.startswith('pxt://'):
622
+ # Remote table
623
+ if force:
624
+ raise excs.Error('Cannot use `force=True` with a cloud replica URI.')
625
+ # TODO: Handle if_not_exists properly
626
+ share.delete_replica(tbl_path)
627
+ else:
628
+ # Local table
629
+ path_obj = catalog.Path.parse(tbl_path)
630
+ Catalog.get().drop_table(path_obj, force=force, if_not_exists=if_not_exists_)
631
+
632
+
633
+ def get_dir_contents(dir_path: str = '', recursive: bool = True) -> 'DirContents':
634
+ """Get the contents of a Pixeltable directory.
635
+
636
+ Args:
637
+ dir_path: Path to the directory. Defaults to the root directory.
638
+ recursive: If `False`, returns only those tables and directories that are directly contained in specified
639
+ directory; if `True`, returns all tables and directories that are descendants of the specified directory,
640
+ recursively.
641
+
642
+ Returns:
643
+ A [`DirContents`][pixeltable.DirContents] object representing the contents of the specified directory.
644
+
645
+ Raises:
646
+ Error: If the path does not exist or does not designate a directory.
647
+
648
+ Examples:
649
+ Get contents of top-level directory:
650
+
651
+ >>> pxt.get_dir_contents()
652
+
653
+ Get contents of 'dir1':
654
+
655
+ >>> pxt.get_dir_contents('dir1')
656
+ """
657
+ path_obj = catalog.Path.parse(dir_path, allow_empty_path=True)
658
+ catalog_entries = Catalog.get().get_dir_contents(path_obj, recursive=recursive)
659
+ dirs: list[str] = []
660
+ tables: list[str] = []
661
+ _assemble_dir_contents(dir_path, catalog_entries, dirs, tables)
662
+ dirs.sort()
663
+ tables.sort()
664
+ return DirContents(dirs=dirs, tables=tables)
665
+
666
+
667
+ def _assemble_dir_contents(
668
+ dir_path: str, catalog_entries: dict[str, Catalog.DirEntry], dirs: list[str], tables: list[str]
669
+ ) -> None:
670
+ for name, entry in catalog_entries.items():
671
+ if name.startswith('_'):
672
+ continue # Skip system paths
673
+ path = f'{dir_path}.{name}' if len(dir_path) > 0 else name
674
+ if entry.dir is not None:
675
+ dirs.append(path)
676
+ if entry.dir_entries is not None:
677
+ _assemble_dir_contents(path, entry.dir_entries, dirs, tables)
678
+ else:
679
+ assert entry.table is not None
680
+ assert not entry.dir_entries
681
+ tables.append(path)
505
682
 
506
683
 
507
684
  def list_tables(dir_path: str = '', recursive: bool = True) -> list[str]:
@@ -527,15 +704,18 @@ def list_tables(dir_path: str = '', recursive: bool = True) -> list[str]:
527
704
 
528
705
  >>> pxt.list_tables('dir1')
529
706
  """
530
- path_obj = catalog.Path(dir_path, empty_is_valid=True) # validate format
531
- cat = Catalog.get()
532
- contents = cat.get_dir_contents(path_obj, recursive=recursive)
707
+ return _list_tables(dir_path, recursive=recursive, allow_system_paths=False)
708
+
709
+
710
+ def _list_tables(dir_path: str = '', recursive: bool = True, allow_system_paths: bool = False) -> list[str]:
711
+ path_obj = catalog.Path.parse(dir_path, allow_empty_path=True, allow_system_path=allow_system_paths)
712
+ contents = Catalog.get().get_dir_contents(path_obj, recursive=recursive)
533
713
  return [str(p) for p in _extract_paths(contents, parent=path_obj, entry_type=catalog.Table)]
534
714
 
535
715
 
536
716
  def create_dir(
537
- path: str, if_exists: Literal['error', 'ignore', 'replace', 'replace_force'] = 'error', parents: bool = False
538
- ) -> Optional[catalog.Dir]:
717
+ path: str, *, if_exists: Literal['error', 'ignore', 'replace', 'replace_force'] = 'error', parents: bool = False
718
+ ) -> catalog.Dir | None:
539
719
  """Create a directory.
540
720
 
541
721
  Args:
@@ -580,7 +760,7 @@ def create_dir(
580
760
 
581
761
  >>> pxt.create_dir('parent1.parent2.sub_dir', parents=True)
582
762
  """
583
- path_obj = catalog.Path(path)
763
+ path_obj = catalog.Path.parse(path)
584
764
  if_exists_ = catalog.IfExistsParam.validated(if_exists, 'if_exists')
585
765
  return Catalog.get().create_dir(path_obj, if_exists=if_exists_, parents=parents)
586
766
 
@@ -622,15 +802,75 @@ def drop_dir(path: str, force: bool = False, if_not_exists: Literal['error', 'ig
622
802
 
623
803
  >>> pxt.drop_dir('my_dir', force=True)
624
804
  """
625
- path_obj = catalog.Path(path) # validate format
805
+ path_obj = catalog.Path.parse(path) # validate format
626
806
  if_not_exists_ = catalog.IfNotExistsParam.validated(if_not_exists, 'if_not_exists')
627
807
  Catalog.get().drop_dir(path_obj, if_not_exists=if_not_exists_, force=force)
628
808
 
629
809
 
810
+ def ls(path: str = '') -> pd.DataFrame:
811
+ """
812
+ List the contents of a Pixeltable directory.
813
+
814
+ This function returns a Pandas DataFrame representing a human-readable listing of the specified directory,
815
+ including various attributes such as version and base table, as appropriate.
816
+
817
+ To get a programmatic list of the directory's contents, use [get_dir_contents()][pixeltable.get_dir_contents]
818
+ instead.
819
+ """
820
+ from pixeltable.catalog import retry_loop
821
+ from pixeltable.metadata import schema
822
+
823
+ cat = Catalog.get()
824
+ path_obj = catalog.Path.parse(path, allow_empty_path=True)
825
+ dir_entries = cat.get_dir_contents(path_obj)
826
+
827
+ @retry_loop(for_write=False)
828
+ def op() -> list[list[str]]:
829
+ rows: list[list[str]] = []
830
+ for name, entry in dir_entries.items():
831
+ if name.startswith('_'):
832
+ continue
833
+ if entry.dir is not None:
834
+ kind = 'dir'
835
+ version = ''
836
+ base = ''
837
+ else:
838
+ assert entry.table is not None
839
+ assert isinstance(entry.table, schema.Table)
840
+ tbl = cat.get_table_by_id(entry.table.id)
841
+ md = tbl.get_metadata()
842
+ base = md['base'] or ''
843
+ if base.startswith('_'):
844
+ base = '<anonymous base table>'
845
+ if md['is_replica']:
846
+ kind = 'replica'
847
+ elif md['is_snapshot']:
848
+ kind = 'snapshot'
849
+ elif md['is_view']:
850
+ kind = 'view'
851
+ else:
852
+ kind = 'table'
853
+ version = '' if kind == 'snapshot' else str(md['version'])
854
+ rows.append([name, kind, version, base])
855
+ return rows
856
+
857
+ rows = op()
858
+
859
+ rows = sorted(rows, key=lambda x: x[0])
860
+ df = pd.DataFrame(
861
+ {
862
+ 'Name': [row[0] for row in rows],
863
+ 'Kind': [row[1] for row in rows],
864
+ 'Version': [row[2] for row in rows],
865
+ 'Base': [row[3] for row in rows],
866
+ },
867
+ index=([''] * len(rows)),
868
+ )
869
+ return df
870
+
871
+
630
872
  def _extract_paths(
631
- dir_entries: dict[str, Catalog.DirEntry],
632
- parent: catalog.Path,
633
- entry_type: Optional[type[catalog.SchemaObject]] = None,
873
+ dir_entries: dict[str, Catalog.DirEntry], parent: catalog.Path, entry_type: type[catalog.SchemaObject] | None = None
634
874
  ) -> list[catalog.Path]:
635
875
  """Convert nested dir_entries structure to a flattened list of paths."""
636
876
  matches: list[str]
@@ -668,7 +908,7 @@ def list_dirs(path: str = '', recursive: bool = True) -> list[str]:
668
908
  >>> cl.list_dirs('my_dir', recursive=True)
669
909
  ['my_dir', 'my_dir.sub_dir1']
670
910
  """
671
- path_obj = catalog.Path(path, empty_is_valid=True) # validate format
911
+ path_obj = catalog.Path.parse(path, allow_empty_path=True) # validate format
672
912
  cat = Catalog.get()
673
913
  contents = cat.get_dir_contents(path_obj, recursive=recursive)
674
914
  return [str(p) for p in _extract_paths(contents, parent=path_obj, entry_type=catalog.Dir)]
@@ -703,7 +943,7 @@ def list_functions() -> Styler:
703
943
  return pd_df.hide(axis='index')
704
944
 
705
945
 
706
- def tools(*args: Union[func.Function, func.tools.Tool]) -> func.tools.Tools:
946
+ def tools(*args: func.Function | func.tools.Tool) -> func.tools.Tools:
707
947
  """
708
948
  Specifies a collection of UDFs to be used as LLM tools. Pixeltable allows any UDF to be used as an input into an
709
949
  LLM tool-calling API. To use one or more UDFs as tools, wrap them in a `pxt.tools` call and pass the return value
@@ -740,7 +980,7 @@ def tools(*args: Union[func.Function, func.tools.Tool]) -> func.tools.Tools:
740
980
  return func.tools.Tools(tools=[arg if isinstance(arg, func.tools.Tool) else tool(arg) for arg in args])
741
981
 
742
982
 
743
- def tool(fn: func.Function, name: Optional[str] = None, description: Optional[str] = None) -> func.tools.Tool:
983
+ def tool(fn: func.Function, name: str | None = None, description: str | None = None) -> func.tools.Tool:
744
984
  """
745
985
  Specifies a Pixeltable UDF to be used as an LLM tool with customizable metadata. See the documentation for
746
986
  [pxt.tools()][pixeltable.tools] for more details.
@@ -761,11 +1001,7 @@ def tool(fn: func.Function, name: Optional[str] = None, description: Optional[st
761
1001
 
762
1002
 
763
1003
  def configure_logging(
764
- *,
765
- to_stdout: Optional[bool] = None,
766
- level: Optional[int] = None,
767
- add: Optional[str] = None,
768
- remove: Optional[str] = None,
1004
+ *, to_stdout: bool | None = None, level: int | None = None, add: str | None = None, remove: str | None = None
769
1005
  ) -> None:
770
1006
  """Configure logging.
771
1007
 
@@ -780,3 +1016,14 @@ def configure_logging(
780
1016
 
781
1017
  def array(elements: Iterable) -> exprs.Expr:
782
1018
  return exprs.Expr.from_array(elements)
1019
+
1020
+
1021
+ class DirContents(TypedDict):
1022
+ """
1023
+ Represents the contents of a Pixeltable directory.
1024
+ """
1025
+
1026
+ dirs: list[str]
1027
+ """List of directory paths contained in this directory."""
1028
+ tables: list[str]
1029
+ """List of table paths contained in this directory."""