pixeltable 0.2.24__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (101) hide show
  1. pixeltable/__init__.py +2 -2
  2. pixeltable/__version__.py +2 -2
  3. pixeltable/catalog/__init__.py +1 -1
  4. pixeltable/catalog/dir.py +6 -0
  5. pixeltable/catalog/globals.py +25 -0
  6. pixeltable/catalog/named_function.py +4 -0
  7. pixeltable/catalog/path_dict.py +37 -11
  8. pixeltable/catalog/schema_object.py +6 -0
  9. pixeltable/catalog/table.py +531 -251
  10. pixeltable/catalog/table_version.py +22 -8
  11. pixeltable/catalog/view.py +8 -7
  12. pixeltable/dataframe.py +439 -105
  13. pixeltable/env.py +19 -5
  14. pixeltable/exec/__init__.py +1 -1
  15. pixeltable/exec/exec_node.py +6 -7
  16. pixeltable/exec/expr_eval_node.py +1 -1
  17. pixeltable/exec/sql_node.py +92 -45
  18. pixeltable/exprs/__init__.py +1 -0
  19. pixeltable/exprs/arithmetic_expr.py +1 -1
  20. pixeltable/exprs/array_slice.py +1 -1
  21. pixeltable/exprs/column_property_ref.py +1 -1
  22. pixeltable/exprs/column_ref.py +29 -2
  23. pixeltable/exprs/comparison.py +1 -1
  24. pixeltable/exprs/compound_predicate.py +1 -1
  25. pixeltable/exprs/expr.py +12 -5
  26. pixeltable/exprs/expr_set.py +8 -0
  27. pixeltable/exprs/function_call.py +147 -39
  28. pixeltable/exprs/in_predicate.py +1 -1
  29. pixeltable/exprs/inline_expr.py +25 -5
  30. pixeltable/exprs/is_null.py +1 -1
  31. pixeltable/exprs/json_mapper.py +1 -1
  32. pixeltable/exprs/json_path.py +1 -1
  33. pixeltable/exprs/method_ref.py +1 -1
  34. pixeltable/exprs/row_builder.py +1 -1
  35. pixeltable/exprs/rowid_ref.py +1 -1
  36. pixeltable/exprs/similarity_expr.py +17 -7
  37. pixeltable/exprs/sql_element_cache.py +4 -0
  38. pixeltable/exprs/type_cast.py +2 -2
  39. pixeltable/exprs/variable.py +3 -0
  40. pixeltable/func/__init__.py +5 -4
  41. pixeltable/func/aggregate_function.py +151 -68
  42. pixeltable/func/callable_function.py +48 -16
  43. pixeltable/func/expr_template_function.py +64 -23
  44. pixeltable/func/function.py +227 -23
  45. pixeltable/func/function_registry.py +2 -1
  46. pixeltable/func/query_template_function.py +51 -9
  47. pixeltable/func/signature.py +65 -7
  48. pixeltable/func/tools.py +153 -0
  49. pixeltable/func/udf.py +57 -35
  50. pixeltable/functions/__init__.py +2 -2
  51. pixeltable/functions/anthropic.py +51 -4
  52. pixeltable/functions/gemini.py +85 -0
  53. pixeltable/functions/globals.py +54 -34
  54. pixeltable/functions/huggingface.py +10 -28
  55. pixeltable/functions/json.py +3 -8
  56. pixeltable/functions/math.py +67 -0
  57. pixeltable/functions/mistralai.py +0 -2
  58. pixeltable/functions/ollama.py +8 -8
  59. pixeltable/functions/openai.py +51 -4
  60. pixeltable/functions/timestamp.py +1 -1
  61. pixeltable/functions/video.py +3 -9
  62. pixeltable/functions/vision.py +1 -1
  63. pixeltable/globals.py +374 -89
  64. pixeltable/index/embedding_index.py +106 -29
  65. pixeltable/io/__init__.py +1 -1
  66. pixeltable/io/label_studio.py +1 -1
  67. pixeltable/io/parquet.py +39 -19
  68. pixeltable/iterators/__init__.py +1 -0
  69. pixeltable/iterators/document.py +12 -0
  70. pixeltable/iterators/image.py +100 -0
  71. pixeltable/iterators/video.py +7 -8
  72. pixeltable/metadata/__init__.py +1 -1
  73. pixeltable/metadata/converters/convert_16.py +2 -1
  74. pixeltable/metadata/converters/convert_17.py +2 -1
  75. pixeltable/metadata/converters/convert_22.py +17 -0
  76. pixeltable/metadata/converters/convert_23.py +35 -0
  77. pixeltable/metadata/converters/convert_24.py +56 -0
  78. pixeltable/metadata/converters/convert_25.py +19 -0
  79. pixeltable/metadata/converters/util.py +4 -2
  80. pixeltable/metadata/notes.py +4 -0
  81. pixeltable/metadata/schema.py +1 -0
  82. pixeltable/plan.py +129 -51
  83. pixeltable/store.py +1 -1
  84. pixeltable/type_system.py +196 -54
  85. pixeltable/utils/arrow.py +8 -3
  86. pixeltable/utils/description_helper.py +89 -0
  87. pixeltable/utils/documents.py +14 -0
  88. {pixeltable-0.2.24.dist-info → pixeltable-0.3.0.dist-info}/METADATA +32 -22
  89. pixeltable-0.3.0.dist-info/RECORD +155 -0
  90. {pixeltable-0.2.24.dist-info → pixeltable-0.3.0.dist-info}/WHEEL +1 -1
  91. pixeltable-0.3.0.dist-info/entry_points.txt +3 -0
  92. pixeltable/tool/create_test_db_dump.py +0 -308
  93. pixeltable/tool/create_test_video.py +0 -81
  94. pixeltable/tool/doc_plugins/griffe.py +0 -50
  95. pixeltable/tool/doc_plugins/mkdocstrings.py +0 -6
  96. pixeltable/tool/doc_plugins/templates/material/udf.html.jinja +0 -135
  97. pixeltable/tool/embed_udf.py +0 -9
  98. pixeltable/tool/mypy_plugin.py +0 -55
  99. pixeltable-0.2.24.dist-info/RECORD +0 -153
  100. pixeltable-0.2.24.dist-info/entry_points.txt +0 -3
  101. {pixeltable-0.2.24.dist-info → pixeltable-0.3.0.dist-info}/LICENSE +0 -0
pixeltable/globals.py CHANGED
@@ -1,6 +1,6 @@
1
1
  import dataclasses
2
2
  import logging
3
- from typing import Any, Iterable, Optional, Union, Literal
3
+ from typing import Any, Iterable, Literal, Optional, Union
4
4
  from uuid import UUID
5
5
 
6
6
  import pandas as pd
@@ -20,11 +20,68 @@ from pixeltable.utils.filecache import FileCache
20
20
 
21
21
  _logger = logging.getLogger('pixeltable')
22
22
 
23
-
24
23
  def init() -> None:
25
24
  """Initializes the Pixeltable environment."""
26
25
  _ = Catalog.get()
27
26
 
27
+ def _get_or_drop_existing_path(
28
+ path_str: str,
29
+ expected_obj_type: type[catalog.SchemaObject],
30
+ expected_snapshot: bool,
31
+ if_exists: catalog.IfExistsParam
32
+ ) -> Optional[catalog.SchemaObject]:
33
+ """Handle schema object path collision during creation according to the if_exists parameter.
34
+
35
+ Args:
36
+ path_str: An existing and valid path to the dir, table, view, or snapshot.
37
+ expected_obj_type: Whether the caller of this function is creating a dir, table, or view at the existing path.
38
+ expected_snapshot: Whether the caller of this function is creating a snapshot at the existing path.
39
+ if_exists: Directive regarding how to handle the existing path.
40
+
41
+ Returns:
42
+ A handle to the existing dir, table, view, or snapshot, if `if_exists='ignore'`, otherwise `None`.
43
+
44
+ Raises:
45
+ Error: If the existing path is not of the expected type, or if the existing path has dependents and
46
+ `if_exists='replace'` or `if_exists='replace_force'`.
47
+ """
48
+ cat = Catalog.get()
49
+ path = catalog.Path(path_str)
50
+ assert cat.paths.get_object(path) is not None
51
+
52
+ if if_exists == catalog.IfExistsParam.ERROR:
53
+ raise excs.Error(f'Path `{path_str}` already exists.')
54
+
55
+ existing_path = cat.paths[path]
56
+ existing_path_is_snapshot = 'is_snapshot' in existing_path.get_metadata() and existing_path.get_metadata()['is_snapshot']
57
+ obj_type_str = 'Snapshot' if expected_snapshot else expected_obj_type._display_name().capitalize()
58
+ # Check if the existing path is of expected type.
59
+ if (not isinstance(existing_path, expected_obj_type)
60
+ or (expected_snapshot and not existing_path_is_snapshot)):
61
+ raise excs.Error(f'Path `{path_str}` already exists but is not a {obj_type_str}. Cannot {if_exists.name.lower()} it.')
62
+
63
+ # if_exists='ignore' return the handle to the existing object.
64
+ assert isinstance(existing_path, expected_obj_type)
65
+ if if_exists == catalog.IfExistsParam.IGNORE:
66
+ return existing_path
67
+
68
+ # Check if the existing object has dependents. If so, cannot replace it
69
+ # unless if_exists='replace_force'.
70
+ has_dependents = existing_path._has_dependents
71
+ if if_exists == catalog.IfExistsParam.REPLACE and has_dependents:
72
+ raise excs.Error(f"{obj_type_str} `{path_str}` already exists and has dependents. Use `if_exists='replace_force'` to replace it.")
73
+ else:
74
+ assert if_exists == catalog.IfExistsParam.REPLACE_FORCE or not has_dependents
75
+ # Drop the existing path so it can be replaced.
76
+ # Any errors during drop will be raised.
77
+ _logger.info(f"Dropping {obj_type_str} `{path_str}` to replace it.")
78
+ if isinstance(existing_path, catalog.Dir):
79
+ drop_dir(path_str, force=True)
80
+ else:
81
+ drop_table(path_str, force=True)
82
+ assert cat.paths.get_object(path) is None
83
+
84
+ return None
28
85
 
29
86
  def create_table(
30
87
  path_str: str,
@@ -33,7 +90,8 @@ def create_table(
33
90
  primary_key: Optional[Union[str, list[str]]] = None,
34
91
  num_retained_versions: int = 10,
35
92
  comment: str = '',
36
- media_validation: Literal['on_read', 'on_write'] = 'on_write'
93
+ media_validation: Literal['on_read', 'on_write'] = 'on_write',
94
+ if_exists: Literal['error', 'ignore', 'replace', 'replace_force'] = 'error'
37
95
  ) -> catalog.Table:
38
96
  """Create a new base table.
39
97
 
@@ -46,14 +104,28 @@ def create_table(
46
104
  num_retained_versions: Number of versions of the table to retain.
47
105
  comment: An optional comment; its meaning is user-defined.
48
106
  media_validation: Media validation policy for the table.
107
+
49
108
  - `'on_read'`: validate media files at query time
50
109
  - `'on_write'`: validate media files during insert/update operations
110
+ if_exists: Directive regarding how to handle if the path already exists.
111
+ Must be one of the following:
112
+
113
+ - `'error'`: raise an error
114
+ - `'ignore'`: do nothing and return the existing table handle
115
+ - `'replace'`: if the existing table has no views, drop and replace it with a new one
116
+ - `'replace_force'`: drop the existing table and all its views, and create a new one
51
117
 
52
118
  Returns:
53
- A handle to the newly created [`Table`][pixeltable.Table].
119
+ A handle to the newly created table, or to an already existing table at the path when `if_exists='ignore'`.
120
+ Please note the schema of the existing table may not match the schema provided in the call.
54
121
 
55
122
  Raises:
56
- Error: if the path already exists or is invalid.
123
+ Error: if
124
+
125
+ - the path is invalid, or
126
+ - the path already exists and `if_exists='error'`, or
127
+ - the path already exists and is not a table, or
128
+ - an error occurs while attempting to create the table.
57
129
 
58
130
  Examples:
59
131
  Create a table with an int and a string column:
@@ -65,10 +137,27 @@ def create_table(
65
137
 
66
138
  >>> tbl1 = pxt.get_table('orig_table')
67
139
  ... tbl2 = pxt.create_table('new_table', tbl1.where(tbl1.col1 < 10).select(tbl1.col2))
140
+
141
+ Create a table if does not already exist, otherwise get the existing table:
142
+
143
+ >>> tbl = pxt.create_table('my_table', schema={'col1': pxt.Int, 'col2': pxt.String}, if_exists='ignore')
144
+
145
+ Create a table with an int and a float column, and replace any existing table:
146
+
147
+ >>> tbl = pxt.create_table('my_table', schema={'col1': pxt.Int, 'col2': pxt.Float}, if_exists='replace')
68
148
  """
69
149
  path = catalog.Path(path_str)
70
- Catalog.get().paths.check_is_valid(path, expected=None)
71
- dir = Catalog.get().paths[path.parent]
150
+ cat = Catalog.get()
151
+
152
+ if cat.paths.get_object(path) is not None:
153
+ # The table already exists. Handle it as per user directive.
154
+ _if_exists = catalog.IfExistsParam.validated(if_exists, 'if_exists')
155
+ existing_table = _get_or_drop_existing_path(path_str, catalog.InsertableTable, False, _if_exists)
156
+ if existing_table is not None:
157
+ assert isinstance(existing_table, catalog.Table)
158
+ return existing_table
159
+
160
+ dir = cat.paths[path.parent]
72
161
 
73
162
  df: Optional[DataFrame] = None
74
163
  if isinstance(schema_or_df, dict):
@@ -95,7 +184,7 @@ def create_table(
95
184
  tbl = catalog.InsertableTable._create(
96
185
  dir._id, path.name, schema, df, primary_key=primary_key, num_retained_versions=num_retained_versions,
97
186
  comment=comment, media_validation=catalog.MediaValidation.validated(media_validation, 'media_validation'))
98
- Catalog.get().paths[path] = tbl
187
+ cat.paths[path] = tbl
99
188
 
100
189
  _logger.info(f'Created table `{path_str}`.')
101
190
  return tbl
@@ -111,7 +200,7 @@ def create_view(
111
200
  num_retained_versions: int = 10,
112
201
  comment: str = '',
113
202
  media_validation: Literal['on_read', 'on_write'] = 'on_write',
114
- ignore_errors: bool = False,
203
+ if_exists: Literal['error', 'ignore', 'replace', 'replace_force'] = 'error',
115
204
  ) -> Optional[catalog.Table]:
116
205
  """Create a view of an existing table object (which itself can be a view or a snapshot or a base table).
117
206
 
@@ -129,43 +218,82 @@ def create_view(
129
218
  the base table.
130
219
  num_retained_versions: Number of versions of the view to retain.
131
220
  comment: Optional comment for the view.
132
- ignore_errors: if True, fail silently if the path already exists or is invalid.
221
+ media_validation: Media validation policy for the view.
222
+
223
+ - `'on_read'`: validate media files at query time
224
+ - `'on_write'`: validate media files during insert/update operations
225
+ if_exists: Directive regarding how to handle if the path already exists.
226
+ Must be one of the following:
227
+
228
+ - `'error'`: raise an error
229
+ - `'ignore'`: do nothing and return the existing view handle
230
+ - `'replace'`: if the existing view has no dependents, drop and replace it with a new one
231
+ - `'replace_force'`: drop the existing view and all its dependents, and create a new one
133
232
 
134
233
  Returns:
135
234
  A handle to the [`Table`][pixeltable.Table] representing the newly created view. If the path already
136
- exists or is invalid and `ignore_errors=True`, returns `None`.
235
+ exists and `if_exists='ignore'`, returns a handle to the existing view. Please note the schema
236
+ or the base of the existing view may not match those provided in the call.
137
237
 
138
238
  Raises:
139
- Error: if the path already exists or is invalid and `ignore_errors=False`.
239
+ Error: if
240
+
241
+ - the path is invalid, or
242
+ - the path already exists and `if_exists='error'`, or
243
+ - the path already exists and is not a view, or
244
+ - an error occurs while attempting to create the view.
140
245
 
141
246
  Examples:
142
247
  Create a view `my_view` of an existing table `my_table`, filtering on rows where `col1` is greater than 10:
143
248
 
144
249
  >>> tbl = pxt.get_table('my_table')
145
250
  ... view = pxt.create_view('my_view', tbl.where(tbl.col1 > 10))
251
+
252
+ Create a view `my_view` of an existing table `my_table`, filtering on rows where `col1` is greater than 10,
253
+ and if it not already exist. Otherwise, get the existing view named `my_view`:
254
+
255
+ >>> tbl = pxt.get_table('my_table')
256
+ ... view = pxt.create_view('my_view', tbl.where(tbl.col1 > 10), if_exists='ignore')
257
+
258
+ Create a view `my_view` of an existing table `my_table`, filtering on rows where `col1` is greater than 100,
259
+ and replace any existing view named `my_view`:
260
+
261
+ >>> tbl = pxt.get_table('my_table')
262
+ ... view = pxt.create_view('my_view', tbl.where(tbl.col1 > 100), if_exists='replace_force')
146
263
  """
147
264
  where: Optional[exprs.Expr] = None
148
265
  if isinstance(base, catalog.Table):
149
266
  tbl_version_path = base._tbl_version_path
150
267
  elif isinstance(base, DataFrame):
151
268
  base._validate_mutable('create_view')
152
- tbl_version_path = base.tbl
269
+ if len(base._from_clause.tbls) > 1:
270
+ raise excs.Error('Cannot create a view of a join')
271
+ tbl_version_path = base._from_clause.tbls[0]
153
272
  where = base.where_clause
154
273
  else:
155
274
  raise excs.Error('`base` must be an instance of `Table` or `DataFrame`')
156
275
  assert isinstance(base, catalog.Table) or isinstance(base, DataFrame)
276
+
157
277
  path = catalog.Path(path_str)
158
- try:
159
- Catalog.get().paths.check_is_valid(path, expected=None)
160
- except Exception as e:
161
- if ignore_errors:
162
- return None
163
- else:
164
- raise e
165
- dir = Catalog.get().paths[path.parent]
278
+ cat = Catalog.get()
279
+
280
+ if cat.paths.get_object(path) is not None:
281
+ # The view already exists. Handle it as per user directive.
282
+ _if_exists = catalog.IfExistsParam.validated(if_exists, 'if_exists')
283
+ existing_path = _get_or_drop_existing_path(path_str, catalog.View, is_snapshot, _if_exists)
284
+ if existing_path is not None:
285
+ assert isinstance(existing_path, catalog.View)
286
+ return existing_path
287
+
288
+ dir = cat.paths[path.parent]
166
289
 
167
290
  if additional_columns is None:
168
291
  additional_columns = {}
292
+ else:
293
+ # additional columns should not be in the base table
294
+ for col_name in additional_columns.keys():
295
+ if col_name in [c.name for c in tbl_version_path.columns()]:
296
+ raise excs.Error(f"Column {col_name!r} already exists in the base table {tbl_version_path.get_column(col_name).tbl.name}.")
169
297
  if iterator is None:
170
298
  iterator_class, iterator_args = None, None
171
299
  else:
@@ -176,7 +304,7 @@ def create_view(
176
304
  is_snapshot=is_snapshot, iterator_cls=iterator_class, iterator_args=iterator_args,
177
305
  num_retained_versions=num_retained_versions, comment=comment,
178
306
  media_validation=catalog.MediaValidation.validated(media_validation, 'media_validation'))
179
- Catalog.get().paths[path] = view
307
+ cat.paths[path] = view
180
308
  _logger.info(f'Created view `{path_str}`.')
181
309
  FileCache.get().emit_eviction_warnings()
182
310
  return view
@@ -191,7 +319,7 @@ def create_snapshot(
191
319
  num_retained_versions: int = 10,
192
320
  comment: str = '',
193
321
  media_validation: Literal['on_read', 'on_write'] = 'on_write',
194
- ignore_errors: bool = False,
322
+ if_exists: Literal['error', 'ignore', 'replace', 'replace_force'] = 'error',
195
323
  ) -> Optional[catalog.Table]:
196
324
  """Create a snapshot of an existing table object (which itself can be a view or a snapshot or a base table).
197
325
 
@@ -206,21 +334,47 @@ def create_snapshot(
206
334
  iterator: The iterator to use for this snapshot. If specified, then this snapshot will be a one-to-many view of
207
335
  the base table.
208
336
  num_retained_versions: Number of versions of the view to retain.
209
- comment: Optional comment for the view.
210
- ignore_errors: if True, fail silently if the path already exists or is invalid.
337
+ comment: Optional comment for the snapshot.
338
+ media_validation: Media validation policy for the snapshot.
339
+
340
+ - `'on_read'`: validate media files at query time
341
+ - `'on_write'`: validate media files during insert/update operations
342
+ if_exists: Directive regarding how to handle if the path already exists.
343
+ Must be one of the following:
344
+
345
+ - `'error'`: raise an error
346
+ - `'ignore'`: do nothing and return the existing snapshot handle
347
+ - `'replace'`: if the existing snapshot has no dependents, drop and replace it with a new one
348
+ - `'replace_force'`: drop the existing snapshot and all its dependents, and create a new one
211
349
 
212
350
  Returns:
213
- A handle to the [`Table`][pixeltable.Table] representing the newly created snapshot. If the path already
214
- exists or is invalid and `ignore_errors=True`, returns `None`.
351
+ A handle to the [`Table`][pixeltable.Table] representing the newly created snapshot.
352
+ Please note the schema or base of the existing snapshot may not match those provided in the call.
215
353
 
216
354
  Raises:
217
- Error: if the path already exists or is invalid and `ignore_errors=False`.
355
+ Error: if
356
+
357
+ - the path is invalid, or
358
+ - the path already exists and `if_exists='error'`, or
359
+ - the path already exists and is not a snapshot, or
360
+ - an error occurs while attempting to create the snapshot.
218
361
 
219
362
  Examples:
220
- Create a snapshot of `my_table`:
363
+ Create a snapshot `my_snapshot` of a table `my_table`:
221
364
 
222
365
  >>> tbl = pxt.get_table('my_table')
223
366
  ... snapshot = pxt.create_snapshot('my_snapshot', tbl)
367
+
368
+ Create a snapshot `my_snapshot` of a view `my_view` with additional int column `col3`,
369
+ if `my_snapshot` does not already exist:
370
+
371
+ >>> view = pxt.get_table('my_view')
372
+ ... snapshot = pxt.create_snapshot('my_snapshot', view, additional_columns={'col3': pxt.Int}, if_exists='ignore')
373
+
374
+ Create a snapshot `my_snapshot` on a table `my_table`, and replace any existing snapshot named `my_snapshot`:
375
+
376
+ >>> tbl = pxt.get_table('my_table')
377
+ ... snapshot = pxt.create_snapshot('my_snapshot', tbl, if_exists='replace_force')
224
378
  """
225
379
  return create_view(
226
380
  path_str,
@@ -231,7 +385,7 @@ def create_snapshot(
231
385
  num_retained_versions=num_retained_versions,
232
386
  comment=comment,
233
387
  media_validation=media_validation,
234
- ignore_errors=ignore_errors,
388
+ if_exists=if_exists,
235
389
  )
236
390
 
237
391
 
@@ -296,31 +450,58 @@ def move(path: str, new_path: str) -> None:
296
450
  obj._move(new_p.name, new_dir._id)
297
451
 
298
452
 
299
- def drop_table(path: str, force: bool = False, ignore_errors: bool = False) -> None:
453
+ def drop_table(table: Union[str, catalog.Table], force: bool = False,
454
+ if_not_exists: Literal['error', 'ignore'] = 'error') -> None:
300
455
  """Drop a table, view, or snapshot.
301
456
 
302
457
  Args:
303
- path: Path to the [`Table`][pixeltable.Table].
458
+ table: Fully qualified name, or handle, of the table to be dropped.
304
459
  force: If `True`, will also drop all views and sub-views of this table.
305
- ignore_errors: If `True`, return silently if the table does not exist (without throwing an exception).
460
+ if_not_exists: Directive regarding how to handle if the path does not exist.
461
+ Must be one of the following:
462
+
463
+ - `'error'`: raise an error
464
+ - `'ignore'`: do nothing and return
306
465
 
307
466
  Raises:
308
- Error: If the path does not exist or does not designate a table object, and `ignore_errors=False`.
467
+ Error: if the qualified name
468
+
469
+ - is invalid, or
470
+ - does not exist and `if_not_exists='error'`, or
471
+ - does not designate a table object, or
472
+ - designates a table object but has dependents and `force=False`.
309
473
 
310
474
  Examples:
311
- >>> pxt.drop_table('my_table')
475
+ Drop a table by its fully qualified name:
476
+ >>> pxt.drop_table('subdir.my_table')
477
+
478
+ Drop a table by its handle:
479
+ >>> t = pxt.get_table('subdir.my_table')
480
+ ... pxt.drop_table(t)
481
+
482
+ Drop a table if it exists, otherwise do nothing:
483
+ >>> pxt.drop_table('subdir.my_table', if_not_exists='ignore')
484
+
485
+ Drop a table and all its dependents:
486
+ >>> pxt.drop_table('subdir.my_table', force=True)
312
487
  """
313
488
  cat = Catalog.get()
314
- path_obj = catalog.Path(path)
315
- try:
316
- cat.paths.check_is_valid(path_obj, expected=catalog.Table)
317
- except Exception as e:
318
- if ignore_errors or force:
319
- _logger.info(f'Skipped table `{path}` (does not exist).')
320
- return
321
- else:
322
- raise e
323
- tbl = cat.paths[path_obj]
489
+ if isinstance(table, str):
490
+ tbl_path_obj = catalog.Path(table)
491
+ tbl = cat.paths.get_object(tbl_path_obj)
492
+ if tbl is None:
493
+ _if_not_exists = catalog.IfNotExistsParam.validated(if_not_exists, 'if_not_exists')
494
+ if _if_not_exists == catalog.IfNotExistsParam.IGNORE or force:
495
+ _logger.info(f'Skipped table `{table}` (does not exist).')
496
+ return
497
+ else:
498
+ raise excs.Error(f'Table `{table}` does not exist.')
499
+ if not isinstance(tbl, catalog.Table):
500
+ raise excs.Error(f'{tbl} needs to be a {catalog.Table._display_name()} but is a {type(tbl)._display_name()}')
501
+ else:
502
+ tbl = table
503
+ tbl_path_obj = catalog.Path(tbl._path)
504
+
324
505
  assert isinstance(tbl, catalog.Table)
325
506
  if len(cat.tbl_dependents[tbl._id]) > 0:
326
507
  dependent_paths = [dep._path for dep in cat.tbl_dependents[tbl._id]]
@@ -328,10 +509,10 @@ def drop_table(path: str, force: bool = False, ignore_errors: bool = False) -> N
328
509
  for dependent_path in dependent_paths:
329
510
  drop_table(dependent_path, force=True)
330
511
  else:
331
- raise excs.Error(f'Table {path} has dependents: {", ".join(dependent_paths)}')
512
+ raise excs.Error(f'Table {tbl._path} has dependents: {", ".join(dependent_paths)}')
332
513
  tbl._drop()
333
- del cat.paths[path_obj]
334
- _logger.info(f'Dropped table `{path}`.')
514
+ del cat.paths[tbl_path_obj]
515
+ _logger.info(f'Dropped table `{tbl._path}`.')
335
516
 
336
517
 
337
518
  def list_tables(dir_path: str = '', recursive: bool = True) -> list[str]:
@@ -362,16 +543,30 @@ def list_tables(dir_path: str = '', recursive: bool = True) -> list[str]:
362
543
  Catalog.get().paths.check_is_valid(path, expected=catalog.Dir)
363
544
  return [str(p) for p in Catalog.get().paths.get_children(path, child_type=catalog.Table, recursive=recursive)]
364
545
 
365
-
366
- def create_dir(path_str: str, ignore_errors: bool = False) -> Optional[catalog.Dir]:
546
+ def create_dir(path_str: str, if_exists: Literal['error', 'ignore', 'replace', 'replace_force'] = 'error') -> Optional[catalog.Dir]:
367
547
  """Create a directory.
368
548
 
369
549
  Args:
370
550
  path_str: Path to the directory.
371
- ignore_errors: if `True`, will return silently instead of throwing an exception if an error occurs.
551
+ if_exists: Directive regarding how to handle if the path already exists.
552
+ Must be one of the following:
553
+
554
+ - `'error'`: raise an error
555
+ - `'ignore'`: do nothing and return the existing directory handle
556
+ - `'replace'`: if the existing directory is empty, drop it and create a new one
557
+ - `'replace_force'`: drop the existing directory and all its children, and create a new one
558
+
559
+ Returns:
560
+ A handle to the newly created directory, or to an already existing directory at the path when `if_exists='ignore'`.
561
+ Please note the existing directory may not be empty.
372
562
 
373
563
  Raises:
374
- Error: If the path already exists or the parent is not a directory, and `ignore_errors=False`.
564
+ Error: If
565
+
566
+ - the path is invalid, or
567
+ - the path already exists and `if_exists='error'`, or
568
+ - the path already exists and is not a directory, or
569
+ - an error occurs while attempting to create the directory.
375
570
 
376
571
  Examples:
377
572
  >>> pxt.create_dir('my_dir')
@@ -379,63 +574,93 @@ def create_dir(path_str: str, ignore_errors: bool = False) -> Optional[catalog.D
379
574
  Create a subdirectory:
380
575
 
381
576
  >>> pxt.create_dir('my_dir.sub_dir')
382
- """
383
- try:
384
- path = catalog.Path(path_str)
385
- Catalog.get().paths.check_is_valid(path, expected=None)
386
- parent = Catalog.get().paths[path.parent]
387
- assert parent is not None
388
- with orm.Session(Env.get().engine, future=True) as session:
389
- dir_md = schema.DirMd(name=path.name)
390
- dir_record = schema.Dir(parent_id=parent._id, md=dataclasses.asdict(dir_md))
391
- session.add(dir_record)
392
- session.flush()
393
- assert dir_record.id is not None
394
- assert isinstance(dir_record.id, UUID)
395
- dir = catalog.Dir(dir_record.id, parent._id, path.name)
396
- Catalog.get().paths[path] = dir
397
- session.commit()
398
- _logger.info(f'Created directory `{path_str}`.')
399
- print(f'Created directory `{path_str}`.')
400
- return dir
401
- except excs.Error as e:
402
- if ignore_errors:
403
- return None
404
- else:
405
- raise e
406
577
 
578
+ Create a subdirectory only if it does not already exist, otherwise do nothing:
579
+
580
+ >>> pxt.create_dir('my_dir.sub_dir', if_exists='ignore')
581
+
582
+ Create a directory and replace if it already exists:
583
+
584
+ >>> pxt.create_dir('my_dir', if_exists='replace_force')
585
+ """
586
+ path = catalog.Path(path_str)
587
+ cat = Catalog.get()
407
588
 
408
- def drop_dir(path_str: str, force: bool = False, ignore_errors: bool = False) -> None:
589
+ if cat.paths.get_object(path):
590
+ # The directory already exists. Handle it as per user directive.
591
+ _if_exists = catalog.IfExistsParam.validated(if_exists, 'if_exists')
592
+ existing_path = _get_or_drop_existing_path(path_str, catalog.Dir, False, _if_exists)
593
+ if existing_path is not None:
594
+ assert isinstance(existing_path, catalog.Dir)
595
+ return existing_path
596
+
597
+ parent = cat.paths[path.parent]
598
+ assert parent is not None
599
+ with orm.Session(Env.get().engine, future=True) as session:
600
+ dir_md = schema.DirMd(name=path.name)
601
+ dir_record = schema.Dir(parent_id=parent._id, md=dataclasses.asdict(dir_md))
602
+ session.add(dir_record)
603
+ session.flush()
604
+ assert dir_record.id is not None
605
+ assert isinstance(dir_record.id, UUID)
606
+ dir = catalog.Dir(dir_record.id, parent._id, path.name)
607
+ cat.paths[path] = dir
608
+ session.commit()
609
+ _logger.info(f'Created directory `{path_str}`.')
610
+ print(f'Created directory `{path_str}`.')
611
+ return dir
612
+
613
+ def drop_dir(path_str: str, force: bool = False, if_not_exists: Literal['error', 'ignore'] = 'error') -> None:
409
614
  """Remove a directory.
410
615
 
411
616
  Args:
412
617
  path_str: Name or path of the directory.
413
618
  force: If `True`, will also drop all tables and subdirectories of this directory, recursively, along
414
619
  with any views or snapshots that depend on any of the dropped tables.
415
- ignore_errors: if `True`, will return silently instead of throwing an exception if the directory
416
- does not exist.
620
+ if_not_exists: Directive regarding how to handle if the path does not exist.
621
+ Must be one of the following:
622
+
623
+ - `'error'`: raise an error
624
+ - `'ignore'`: do nothing and return
417
625
 
418
626
  Raises:
419
- Error: If the path does not exist or does not designate a directory, or if the directory is not empty.
627
+ Error: If the path
628
+
629
+ - is invalid, or
630
+ - does not exist and `if_not_exists='error'`, or
631
+ - is not designate a directory, or
632
+ - is a direcotory but is not empty and `force=False`.
420
633
 
421
634
  Examples:
635
+ Remove a directory, if it exists and is empty:
422
636
  >>> pxt.drop_dir('my_dir')
423
637
 
424
638
  Remove a subdirectory:
425
639
 
426
640
  >>> pxt.drop_dir('my_dir.sub_dir')
641
+
642
+ Remove an existing directory if it is empty, but do nothing if it does not exist:
643
+
644
+ >>> pxt.drop_dir('my_dir.sub_dir', if_not_exists='ignore')
645
+
646
+ Remove an existing directory and all its contents:
647
+
648
+ >>> pxt.drop_dir('my_dir', force=True)
427
649
  """
428
650
  cat = Catalog.get()
429
651
  path = catalog.Path(path_str)
430
-
431
- try:
432
- cat.paths.check_is_valid(path, expected=catalog.Dir)
433
- except Exception as e:
434
- if ignore_errors or force:
435
- _logger.info(f'Skipped directory `{path}` (does not exist).')
652
+ obj = cat.paths.get_object(path)
653
+ if obj is None:
654
+ _if_not_exists = catalog.IfNotExistsParam.validated(if_not_exists, 'if_not_exists')
655
+ if _if_not_exists == catalog.IfNotExistsParam.IGNORE or force:
656
+ _logger.info(f'Skipped directory `{path_str}` (does not exist).')
436
657
  return
437
658
  else:
438
- raise e
659
+ raise excs.Error(f'Directory `{path_str}` does not exist.')
660
+
661
+ if not isinstance(obj, catalog.Dir):
662
+ raise excs.Error(
663
+ f'{str(path)} needs to be a {catalog.Dir._display_name()} but is a {type(obj)._display_name()}')
439
664
 
440
665
  children = cat.paths.get_children(path, child_type=None, recursive=True)
441
666
 
@@ -496,7 +721,7 @@ def list_functions() -> Styler:
496
721
  paths = ['.'.join(f.self_path.split('.')[:-1]) for f in functions]
497
722
  names = [f.name for f in functions]
498
723
  params = [
499
- ', '.join([param_name + ': ' + str(param_type) for param_name, param_type in f.signature.parameters.items()])
724
+ ', '.join([param_name + ': ' + str(param_type) for param_name, param_type in f.signatures[0].parameters.items()])
500
725
  for f in functions
501
726
  ]
502
727
  pd_df = pd.DataFrame(
@@ -504,7 +729,7 @@ def list_functions() -> Styler:
504
729
  'Path': paths,
505
730
  'Function Name': names,
506
731
  'Parameters': params,
507
- 'Return Type': [str(f.signature.get_return_type()) for f in functions],
732
+ 'Return Type': [str(f.signatures[0].get_return_type()) for f in functions],
508
733
  }
509
734
  )
510
735
  pd_df = pd_df.style.set_properties(None, **{'text-align': 'left'}).set_table_styles(
@@ -513,6 +738,66 @@ def list_functions() -> Styler:
513
738
  return pd_df.hide(axis='index')
514
739
 
515
740
 
741
+ def tools(*args: Union[func.Function, func.tools.Tool]) -> func.tools.Tools:
742
+ """
743
+ Specifies a collection of UDFs to be used as LLM tools. Pixeltable allows any UDF to be used as an input into an
744
+ LLM tool-calling API. To use one or more UDFs as tools, wrap them in a `pxt.tools` call and pass the return value
745
+ to an LLM API.
746
+
747
+ The UDFs can be specified directly or wrapped inside a [pxt.tool()][pixeltable.tool] invocation. If a UDF is
748
+ specified directly, the tool name will be the (unqualified) UDF name, and the tool description will consist of the
749
+ entire contents of the UDF docstring. If a UDF is wrapped in a `pxt.tool()` invocation, then the name and/or
750
+ description may be customized.
751
+
752
+ Args:
753
+ args: The UDFs to use as tools.
754
+
755
+ Returns:
756
+ A `Tools` instance that can be passed to an LLM tool-calling API or invoked to generate tool results.
757
+
758
+ Examples:
759
+ Create a tools instance with a single UDF:
760
+
761
+ >>> tools = pxt.tools(stock_price)
762
+
763
+ Create a tools instance with several UDFs:
764
+
765
+ >>> tools = pxt.tools(stock_price, weather_quote)
766
+
767
+ Create a tools instance, some of whose UDFs have customized metadata:
768
+
769
+ >>> tools = pxt.tools(
770
+ ... stock_price,
771
+ ... pxt.tool(weather_quote, description='Returns information about the weather in a particular location.'),
772
+ ... pxt.tool(traffic_quote, name='traffic_conditions'),
773
+ ... )
774
+ """
775
+ return func.tools.Tools(tools=[
776
+ arg if isinstance(arg, func.tools.Tool) else tool(arg)
777
+ for arg in args
778
+ ])
779
+
780
+
781
+ def tool(fn: func.Function, name: Optional[str] = None, description: Optional[str] = None) -> func.tools.Tool:
782
+ """
783
+ Specifies a Pixeltable UDF to be used as an LLM tool with customizable metadata. See the documentation for
784
+ [pxt.tools()][pixeltable.tools] for more details.
785
+
786
+ Args:
787
+ fn: The UDF to use as a tool.
788
+ name: The name of the tool. If not specified, then the unqualified name of the UDF will be used by default.
789
+ description: The description of the tool. If not specified, then the entire contents of the UDF docstring
790
+ will be used by default.
791
+
792
+ Returns:
793
+ A `Tool` instance that can be passed to an LLM tool-calling API.
794
+ """
795
+ if isinstance(fn, func.AggregateFunction):
796
+ raise excs.Error('Aggregator UDFs cannot be used as tools')
797
+
798
+ return func.tools.Tool(fn=fn, name=name, description=description)
799
+
800
+
516
801
  def configure_logging(
517
802
  *,
518
803
  to_stdout: Optional[bool] = None,