pixeltable 0.2.28__py3-none-any.whl → 0.2.30__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (62) hide show
  1. pixeltable/__init__.py +1 -1
  2. pixeltable/__version__.py +2 -2
  3. pixeltable/catalog/__init__.py +1 -1
  4. pixeltable/catalog/dir.py +6 -0
  5. pixeltable/catalog/globals.py +25 -0
  6. pixeltable/catalog/named_function.py +4 -0
  7. pixeltable/catalog/path_dict.py +37 -11
  8. pixeltable/catalog/schema_object.py +6 -0
  9. pixeltable/catalog/table.py +96 -19
  10. pixeltable/catalog/table_version.py +22 -8
  11. pixeltable/dataframe.py +201 -3
  12. pixeltable/env.py +9 -3
  13. pixeltable/exec/expr_eval_node.py +1 -1
  14. pixeltable/exec/sql_node.py +2 -2
  15. pixeltable/exprs/function_call.py +134 -29
  16. pixeltable/exprs/inline_expr.py +22 -2
  17. pixeltable/exprs/row_builder.py +1 -1
  18. pixeltable/exprs/similarity_expr.py +9 -2
  19. pixeltable/func/__init__.py +1 -0
  20. pixeltable/func/aggregate_function.py +151 -68
  21. pixeltable/func/callable_function.py +50 -16
  22. pixeltable/func/expr_template_function.py +62 -24
  23. pixeltable/func/function.py +191 -23
  24. pixeltable/func/function_registry.py +2 -1
  25. pixeltable/func/query_template_function.py +11 -6
  26. pixeltable/func/signature.py +64 -7
  27. pixeltable/func/tools.py +116 -0
  28. pixeltable/func/udf.py +57 -35
  29. pixeltable/functions/__init__.py +2 -2
  30. pixeltable/functions/anthropic.py +36 -2
  31. pixeltable/functions/globals.py +54 -34
  32. pixeltable/functions/json.py +3 -8
  33. pixeltable/functions/math.py +67 -0
  34. pixeltable/functions/ollama.py +4 -4
  35. pixeltable/functions/openai.py +31 -2
  36. pixeltable/functions/timestamp.py +1 -1
  37. pixeltable/functions/video.py +2 -8
  38. pixeltable/functions/vision.py +1 -1
  39. pixeltable/globals.py +347 -79
  40. pixeltable/index/embedding_index.py +44 -24
  41. pixeltable/metadata/__init__.py +1 -1
  42. pixeltable/metadata/converters/convert_16.py +2 -1
  43. pixeltable/metadata/converters/convert_17.py +2 -1
  44. pixeltable/metadata/converters/convert_23.py +35 -0
  45. pixeltable/metadata/converters/convert_24.py +47 -0
  46. pixeltable/metadata/converters/util.py +4 -2
  47. pixeltable/metadata/notes.py +2 -0
  48. pixeltable/metadata/schema.py +1 -0
  49. pixeltable/type_system.py +192 -48
  50. {pixeltable-0.2.28.dist-info → pixeltable-0.2.30.dist-info}/METADATA +4 -2
  51. {pixeltable-0.2.28.dist-info → pixeltable-0.2.30.dist-info}/RECORD +54 -57
  52. pixeltable-0.2.30.dist-info/entry_points.txt +3 -0
  53. pixeltable/tool/create_test_db_dump.py +0 -311
  54. pixeltable/tool/create_test_video.py +0 -81
  55. pixeltable/tool/doc_plugins/griffe.py +0 -50
  56. pixeltable/tool/doc_plugins/mkdocstrings.py +0 -6
  57. pixeltable/tool/doc_plugins/templates/material/udf.html.jinja +0 -135
  58. pixeltable/tool/embed_udf.py +0 -9
  59. pixeltable/tool/mypy_plugin.py +0 -55
  60. pixeltable-0.2.28.dist-info/entry_points.txt +0 -3
  61. {pixeltable-0.2.28.dist-info → pixeltable-0.2.30.dist-info}/LICENSE +0 -0
  62. {pixeltable-0.2.28.dist-info → pixeltable-0.2.30.dist-info}/WHEEL +0 -0
pixeltable/globals.py CHANGED
@@ -1,6 +1,6 @@
1
1
  import dataclasses
2
2
  import logging
3
- from typing import Any, Iterable, Optional, Union, Literal
3
+ from typing import Any, Iterable, Optional, Union, Literal, Type
4
4
  from uuid import UUID
5
5
 
6
6
  import pandas as pd
@@ -20,11 +20,68 @@ from pixeltable.utils.filecache import FileCache
20
20
 
21
21
  _logger = logging.getLogger('pixeltable')
22
22
 
23
-
24
23
  def init() -> None:
25
24
  """Initializes the Pixeltable environment."""
26
25
  _ = Catalog.get()
27
26
 
27
+ def _get_or_drop_existing_path(
28
+ path_str: str,
29
+ expected_obj_type: Type[catalog.SchemaObject],
30
+ expected_snapshot: bool,
31
+ if_exists: catalog.IfExistsParam
32
+ ) -> Optional[catalog.SchemaObject]:
33
+ """Handle schema object path collision during creation according to the if_exists parameter.
34
+
35
+ Args:
36
+ path_str: An existing and valid path to the dir, table, view, or snapshot.
37
+ expected_obj_type: Whether the caller of this function is creating a dir, table, or view at the existing path.
38
+ expected_snapshot: Whether the caller of this function is creating a snapshot at the existing path.
39
+ if_exists: Directive regarding how to handle the existing path.
40
+
41
+ Returns:
42
+ A handle to the existing dir, table, view, or snapshot, if `if_exists='ignore'`, otherwise `None`.
43
+
44
+ Raises:
45
+ Error: If the existing path is not of the expected type, or if the existing path has dependents and
46
+ `if_exists='replace'` or `if_exists='replace_force'`.
47
+ """
48
+ cat = Catalog.get()
49
+ path = catalog.Path(path_str)
50
+ assert cat.paths.get_object(path) is not None
51
+
52
+ if if_exists == catalog.IfExistsParam.ERROR:
53
+ raise excs.Error(f'Path `{path_str}` already exists.')
54
+
55
+ existing_path = cat.paths[path]
56
+ existing_path_is_snapshot = 'is_snapshot' in existing_path.get_metadata() and existing_path.get_metadata()['is_snapshot']
57
+ obj_type_str = 'Snapshot' if expected_snapshot else expected_obj_type._display_name().capitalize()
58
+ # Check if the existing path is of expected type.
59
+ if (not isinstance(existing_path, expected_obj_type)
60
+ or (expected_snapshot and not existing_path_is_snapshot)):
61
+ raise excs.Error(f'Path `{path_str}` already exists but is not a {obj_type_str}. Cannot {if_exists.name.lower()} it.')
62
+
63
+ # if_exists='ignore' return the handle to the existing object.
64
+ assert isinstance(existing_path, expected_obj_type)
65
+ if if_exists == catalog.IfExistsParam.IGNORE:
66
+ return existing_path
67
+
68
+ # Check if the existing object has dependents. If so, cannot replace it
69
+ # unless if_exists='replace_force'.
70
+ has_dependents = existing_path._has_dependents
71
+ if if_exists == catalog.IfExistsParam.REPLACE and has_dependents:
72
+ raise excs.Error(f"{obj_type_str} `{path_str}` already exists and has dependents. Use `if_exists='replace_force'` to replace it.")
73
+ else:
74
+ assert if_exists == catalog.IfExistsParam.REPLACE_FORCE or not has_dependents
75
+ # Drop the existing path so it can be replaced.
76
+ # Any errors during drop will be raised.
77
+ _logger.info(f"Dropping {obj_type_str} `{path_str}` to replace it.")
78
+ if isinstance(existing_path, catalog.Dir):
79
+ drop_dir(path_str, force=True)
80
+ else:
81
+ drop_table(path_str, force=True)
82
+ assert cat.paths.get_object(path) is None
83
+
84
+ return None
28
85
 
29
86
  def create_table(
30
87
  path_str: str,
@@ -33,7 +90,8 @@ def create_table(
33
90
  primary_key: Optional[Union[str, list[str]]] = None,
34
91
  num_retained_versions: int = 10,
35
92
  comment: str = '',
36
- media_validation: Literal['on_read', 'on_write'] = 'on_write'
93
+ media_validation: Literal['on_read', 'on_write'] = 'on_write',
94
+ if_exists: Literal['error', 'ignore', 'replace', 'replace_force'] = 'error'
37
95
  ) -> catalog.Table:
38
96
  """Create a new base table.
39
97
 
@@ -49,12 +107,25 @@ def create_table(
49
107
 
50
108
  - `'on_read'`: validate media files at query time
51
109
  - `'on_write'`: validate media files during insert/update operations
110
+ if_exists: Directive regarding how to handle if the path already exists.
111
+ Must be one of the following:
112
+
113
+ - `'error'`: raise an error
114
+ - `'ignore'`: do nothing and return the existing table handle
115
+ - `'replace'`: if the existing table has no views, drop and replace it with a new one
116
+ - `'replace_force'`: drop the existing table and all its views, and create a new one
52
117
 
53
118
  Returns:
54
- A handle to the newly created [`Table`][pixeltable.Table].
119
+ A handle to the newly created table, or to an already existing table at the path when `if_exists='ignore'`.
120
+ Please note the schema of the existing table may not match the schema provided in the call.
55
121
 
56
122
  Raises:
57
- Error: if the path already exists or is invalid.
123
+ Error: if
124
+
125
+ - the path is invalid, or
126
+ - the path already exists and `if_exists='error'`, or
127
+ - the path already exists and is not a table, or
128
+ - an error occurs while attempting to create the table.
58
129
 
59
130
  Examples:
60
131
  Create a table with an int and a string column:
@@ -66,10 +137,27 @@ def create_table(
66
137
 
67
138
  >>> tbl1 = pxt.get_table('orig_table')
68
139
  ... tbl2 = pxt.create_table('new_table', tbl1.where(tbl1.col1 < 10).select(tbl1.col2))
140
+
141
+ Create a table if does not already exist, otherwise get the existing table:
142
+
143
+ >>> tbl = pxt.create_table('my_table', schema={'col1': pxt.Int, 'col2': pxt.String}, if_exists='ignore')
144
+
145
+ Create a table with an int and a float column, and replace any existing table:
146
+
147
+ >>> tbl = pxt.create_table('my_table', schema={'col1': pxt.Int, 'col2': pxt.Float}, if_exists='replace')
69
148
  """
70
149
  path = catalog.Path(path_str)
71
- Catalog.get().paths.check_is_valid(path, expected=None)
72
- dir = Catalog.get().paths[path.parent]
150
+ cat = Catalog.get()
151
+
152
+ if cat.paths.get_object(path) is not None:
153
+ # The table already exists. Handle it as per user directive.
154
+ _if_exists = catalog.IfExistsParam.validated(if_exists, 'if_exists')
155
+ existing_table = _get_or_drop_existing_path(path_str, catalog.InsertableTable, False, _if_exists)
156
+ if existing_table is not None:
157
+ assert isinstance(existing_table, catalog.Table)
158
+ return existing_table
159
+
160
+ dir = cat.paths[path.parent]
73
161
 
74
162
  df: Optional[DataFrame] = None
75
163
  if isinstance(schema_or_df, dict):
@@ -96,7 +184,7 @@ def create_table(
96
184
  tbl = catalog.InsertableTable._create(
97
185
  dir._id, path.name, schema, df, primary_key=primary_key, num_retained_versions=num_retained_versions,
98
186
  comment=comment, media_validation=catalog.MediaValidation.validated(media_validation, 'media_validation'))
99
- Catalog.get().paths[path] = tbl
187
+ cat.paths[path] = tbl
100
188
 
101
189
  _logger.info(f'Created table `{path_str}`.')
102
190
  return tbl
@@ -112,7 +200,7 @@ def create_view(
112
200
  num_retained_versions: int = 10,
113
201
  comment: str = '',
114
202
  media_validation: Literal['on_read', 'on_write'] = 'on_write',
115
- ignore_errors: bool = False,
203
+ if_exists: Literal['error', 'ignore', 'replace', 'replace_force'] = 'error',
116
204
  ) -> Optional[catalog.Table]:
117
205
  """Create a view of an existing table object (which itself can be a view or a snapshot or a base table).
118
206
 
@@ -130,20 +218,48 @@ def create_view(
130
218
  the base table.
131
219
  num_retained_versions: Number of versions of the view to retain.
132
220
  comment: Optional comment for the view.
133
- ignore_errors: if True, fail silently if the path already exists or is invalid.
221
+ media_validation: Media validation policy for the view.
222
+
223
+ - `'on_read'`: validate media files at query time
224
+ - `'on_write'`: validate media files during insert/update operations
225
+ if_exists: Directive regarding how to handle if the path already exists.
226
+ Must be one of the following:
227
+
228
+ - `'error'`: raise an error
229
+ - `'ignore'`: do nothing and return the existing view handle
230
+ - `'replace'`: if the existing view has no dependents, drop and replace it with a new one
231
+ - `'replace_force'`: drop the existing view and all its dependents, and create a new one
134
232
 
135
233
  Returns:
136
234
  A handle to the [`Table`][pixeltable.Table] representing the newly created view. If the path already
137
- exists or is invalid and `ignore_errors=True`, returns `None`.
235
+ exists and `if_exists='ignore'`, returns a handle to the existing view. Please note the schema
236
+ or the base of the existing view may not match those provided in the call.
138
237
 
139
238
  Raises:
140
- Error: if the path already exists or is invalid and `ignore_errors=False`.
239
+ Error: if
240
+
241
+ - the path is invalid, or
242
+ - the path already exists and `if_exists='error'`, or
243
+ - the path already exists and is not a view, or
244
+ - an error occurs while attempting to create the view.
141
245
 
142
246
  Examples:
143
247
  Create a view `my_view` of an existing table `my_table`, filtering on rows where `col1` is greater than 10:
144
248
 
145
249
  >>> tbl = pxt.get_table('my_table')
146
250
  ... view = pxt.create_view('my_view', tbl.where(tbl.col1 > 10))
251
+
252
+ Create a view `my_view` of an existing table `my_table`, filtering on rows where `col1` is greater than 10,
253
+ and if it not already exist. Otherwise, get the existing view named `my_view`:
254
+
255
+ >>> tbl = pxt.get_table('my_table')
256
+ ... view = pxt.create_view('my_view', tbl.where(tbl.col1 > 10), if_exists='ignore')
257
+
258
+ Create a view `my_view` of an existing table `my_table`, filtering on rows where `col1` is greater than 100,
259
+ and replace any existing view named `my_view`:
260
+
261
+ >>> tbl = pxt.get_table('my_table')
262
+ ... view = pxt.create_view('my_view', tbl.where(tbl.col1 > 100), if_exists='replace_force')
147
263
  """
148
264
  where: Optional[exprs.Expr] = None
149
265
  if isinstance(base, catalog.Table):
@@ -157,15 +273,19 @@ def create_view(
157
273
  else:
158
274
  raise excs.Error('`base` must be an instance of `Table` or `DataFrame`')
159
275
  assert isinstance(base, catalog.Table) or isinstance(base, DataFrame)
276
+
160
277
  path = catalog.Path(path_str)
161
- try:
162
- Catalog.get().paths.check_is_valid(path, expected=None)
163
- except Exception as e:
164
- if ignore_errors:
165
- return None
166
- else:
167
- raise e
168
- dir = Catalog.get().paths[path.parent]
278
+ cat = Catalog.get()
279
+
280
+ if cat.paths.get_object(path) is not None:
281
+ # The view already exists. Handle it as per user directive.
282
+ _if_exists = catalog.IfExistsParam.validated(if_exists, 'if_exists')
283
+ existing_path = _get_or_drop_existing_path(path_str, catalog.View, is_snapshot, _if_exists)
284
+ if existing_path is not None:
285
+ assert isinstance(existing_path, catalog.View)
286
+ return existing_path
287
+
288
+ dir = cat.paths[path.parent]
169
289
 
170
290
  if additional_columns is None:
171
291
  additional_columns = {}
@@ -179,7 +299,7 @@ def create_view(
179
299
  is_snapshot=is_snapshot, iterator_cls=iterator_class, iterator_args=iterator_args,
180
300
  num_retained_versions=num_retained_versions, comment=comment,
181
301
  media_validation=catalog.MediaValidation.validated(media_validation, 'media_validation'))
182
- Catalog.get().paths[path] = view
302
+ cat.paths[path] = view
183
303
  _logger.info(f'Created view `{path_str}`.')
184
304
  FileCache.get().emit_eviction_warnings()
185
305
  return view
@@ -194,7 +314,7 @@ def create_snapshot(
194
314
  num_retained_versions: int = 10,
195
315
  comment: str = '',
196
316
  media_validation: Literal['on_read', 'on_write'] = 'on_write',
197
- ignore_errors: bool = False,
317
+ if_exists: Literal['error', 'ignore', 'replace', 'replace_force'] = 'error',
198
318
  ) -> Optional[catalog.Table]:
199
319
  """Create a snapshot of an existing table object (which itself can be a view or a snapshot or a base table).
200
320
 
@@ -209,21 +329,47 @@ def create_snapshot(
209
329
  iterator: The iterator to use for this snapshot. If specified, then this snapshot will be a one-to-many view of
210
330
  the base table.
211
331
  num_retained_versions: Number of versions of the view to retain.
212
- comment: Optional comment for the view.
213
- ignore_errors: if True, fail silently if the path already exists or is invalid.
332
+ comment: Optional comment for the snapshot.
333
+ media_validation: Media validation policy for the snapshot.
334
+
335
+ - `'on_read'`: validate media files at query time
336
+ - `'on_write'`: validate media files during insert/update operations
337
+ if_exists: Directive regarding how to handle if the path already exists.
338
+ Must be one of the following:
339
+
340
+ - `'error'`: raise an error
341
+ - `'ignore'`: do nothing and return the existing snapshot handle
342
+ - `'replace'`: if the existing snapshot has no dependents, drop and replace it with a new one
343
+ - `'replace_force'`: drop the existing snapshot and all its dependents, and create a new one
214
344
 
215
345
  Returns:
216
- A handle to the [`Table`][pixeltable.Table] representing the newly created snapshot. If the path already
217
- exists or is invalid and `ignore_errors=True`, returns `None`.
346
+ A handle to the [`Table`][pixeltable.Table] representing the newly created snapshot.
347
+ Please note the schema or base of the existing snapshot may not match those provided in the call.
218
348
 
219
349
  Raises:
220
- Error: if the path already exists or is invalid and `ignore_errors=False`.
350
+ Error: if
351
+
352
+ - the path is invalid, or
353
+ - the path already exists and `if_exists='error'`, or
354
+ - the path already exists and is not a snapshot, or
355
+ - an error occurs while attempting to create the snapshot.
221
356
 
222
357
  Examples:
223
- Create a snapshot of `my_table`:
358
+ Create a snapshot `my_snapshot` of a table `my_table`:
224
359
 
225
360
  >>> tbl = pxt.get_table('my_table')
226
361
  ... snapshot = pxt.create_snapshot('my_snapshot', tbl)
362
+
363
+ Create a snapshot `my_snapshot` of a view `my_view` with additional int column `col3`,
364
+ if `my_snapshot` does not already exist:
365
+
366
+ >>> view = pxt.get_table('my_view')
367
+ ... snapshot = pxt.create_snapshot('my_snapshot', view, additional_columns={'col3': pxt.Int}, if_exists='ignore')
368
+
369
+ Create a snapshot `my_snapshot` on a table `my_table`, and replace any existing snapshot named `my_snapshot`:
370
+
371
+ >>> tbl = pxt.get_table('my_table')
372
+ ... snapshot = pxt.create_snapshot('my_snapshot', tbl, if_exists='replace_force')
227
373
  """
228
374
  return create_view(
229
375
  path_str,
@@ -234,7 +380,7 @@ def create_snapshot(
234
380
  num_retained_versions=num_retained_versions,
235
381
  comment=comment,
236
382
  media_validation=media_validation,
237
- ignore_errors=ignore_errors,
383
+ if_exists=if_exists,
238
384
  )
239
385
 
240
386
 
@@ -299,16 +445,26 @@ def move(path: str, new_path: str) -> None:
299
445
  obj._move(new_p.name, new_dir._id)
300
446
 
301
447
 
302
- def drop_table(table: Union[str, catalog.Table], force: bool = False, ignore_errors: bool = False) -> None:
448
+ def drop_table(table: Union[str, catalog.Table], force: bool = False,
449
+ if_not_exists: Literal['error', 'ignore'] = 'error') -> None:
303
450
  """Drop a table, view, or snapshot.
304
451
 
305
452
  Args:
306
453
  table: Fully qualified name, or handle, of the table to be dropped.
307
454
  force: If `True`, will also drop all views and sub-views of this table.
308
- ignore_errors: If `True`, return silently if the table does not exist (without throwing an exception).
455
+ if_not_exists: Directive regarding how to handle if the path does not exist.
456
+ Must be one of the following:
457
+
458
+ - `'error'`: raise an error
459
+ - `'ignore'`: do nothing and return
309
460
 
310
461
  Raises:
311
- Error: If the name does not exist or does not designate a table object, and `ignore_errors=False`.
462
+ Error: if the qualified name
463
+
464
+ - is invalid, or
465
+ - does not exist and `if_not_exists='error'`, or
466
+ - does not designate a table object, or
467
+ - designates a table object but has dependents and `force=False`.
312
468
 
313
469
  Examples:
314
470
  Drop a table by its fully qualified name:
@@ -318,19 +474,25 @@ def drop_table(table: Union[str, catalog.Table], force: bool = False, ignore_err
318
474
  >>> t = pxt.get_table('subdir.my_table')
319
475
  ... pxt.drop_table(t)
320
476
 
477
+ Drop a table if it exists, otherwise do nothing:
478
+ >>> pxt.drop_table('subdir.my_table', if_not_exists='ignore')
479
+
480
+ Drop a table and all its dependents:
481
+ >>> pxt.drop_table('subdir.my_table', force=True)
321
482
  """
322
483
  cat = Catalog.get()
323
484
  if isinstance(table, str):
324
485
  tbl_path_obj = catalog.Path(table)
325
- try:
326
- cat.paths.check_is_valid(tbl_path_obj, expected=catalog.Table)
327
- except Exception as e:
328
- if ignore_errors or force:
486
+ tbl = cat.paths.get_object(tbl_path_obj)
487
+ if tbl is None:
488
+ _if_not_exists = catalog.IfNotExistsParam.validated(if_not_exists, 'if_not_exists')
489
+ if _if_not_exists == catalog.IfNotExistsParam.IGNORE or force:
329
490
  _logger.info(f'Skipped table `{table}` (does not exist).')
330
491
  return
331
492
  else:
332
- raise e
333
- tbl = cat.paths[tbl_path_obj]
493
+ raise excs.Error(f'Table `{table}` does not exist.')
494
+ if not isinstance(tbl, catalog.Table):
495
+ raise excs.Error(f'{tbl} needs to be a {catalog.Table._display_name()} but is a {type(tbl)._display_name()}')
334
496
  else:
335
497
  tbl = table
336
498
  tbl_path_obj = catalog.Path(tbl._path)
@@ -376,16 +538,30 @@ def list_tables(dir_path: str = '', recursive: bool = True) -> list[str]:
376
538
  Catalog.get().paths.check_is_valid(path, expected=catalog.Dir)
377
539
  return [str(p) for p in Catalog.get().paths.get_children(path, child_type=catalog.Table, recursive=recursive)]
378
540
 
379
-
380
- def create_dir(path_str: str, ignore_errors: bool = False) -> Optional[catalog.Dir]:
541
+ def create_dir(path_str: str, if_exists: Literal['error', 'ignore', 'replace', 'replace_force'] = 'error') -> Optional[catalog.Dir]:
381
542
  """Create a directory.
382
543
 
383
544
  Args:
384
545
  path_str: Path to the directory.
385
- ignore_errors: if `True`, will return silently instead of throwing an exception if an error occurs.
546
+ if_exists: Directive regarding how to handle if the path already exists.
547
+ Must be one of the following:
548
+
549
+ - `'error'`: raise an error
550
+ - `'ignore'`: do nothing and return the existing directory handle
551
+ - `'replace'`: if the existing directory is empty, drop it and create a new one
552
+ - `'replace_force'`: drop the existing directory and all its children, and create a new one
553
+
554
+ Returns:
555
+ A handle to the newly created directory, or to an already existing directory at the path when `if_exists='ignore'`.
556
+ Please note the existing directory may not be empty.
386
557
 
387
558
  Raises:
388
- Error: If the path already exists or the parent is not a directory, and `ignore_errors=False`.
559
+ Error: If
560
+
561
+ - the path is invalid, or
562
+ - the path already exists and `if_exists='error'`, or
563
+ - the path already exists and is not a directory, or
564
+ - an error occurs while attempting to create the directory.
389
565
 
390
566
  Examples:
391
567
  >>> pxt.create_dir('my_dir')
@@ -393,63 +569,93 @@ def create_dir(path_str: str, ignore_errors: bool = False) -> Optional[catalog.D
393
569
  Create a subdirectory:
394
570
 
395
571
  >>> pxt.create_dir('my_dir.sub_dir')
396
- """
397
- try:
398
- path = catalog.Path(path_str)
399
- Catalog.get().paths.check_is_valid(path, expected=None)
400
- parent = Catalog.get().paths[path.parent]
401
- assert parent is not None
402
- with orm.Session(Env.get().engine, future=True) as session:
403
- dir_md = schema.DirMd(name=path.name)
404
- dir_record = schema.Dir(parent_id=parent._id, md=dataclasses.asdict(dir_md))
405
- session.add(dir_record)
406
- session.flush()
407
- assert dir_record.id is not None
408
- assert isinstance(dir_record.id, UUID)
409
- dir = catalog.Dir(dir_record.id, parent._id, path.name)
410
- Catalog.get().paths[path] = dir
411
- session.commit()
412
- _logger.info(f'Created directory `{path_str}`.')
413
- print(f'Created directory `{path_str}`.')
414
- return dir
415
- except excs.Error as e:
416
- if ignore_errors:
417
- return None
418
- else:
419
- raise e
420
572
 
573
+ Create a subdirectory only if it does not already exist, otherwise do nothing:
574
+
575
+ >>> pxt.create_dir('my_dir.sub_dir', if_exists='ignore')
576
+
577
+ Create a directory and replace if it already exists:
421
578
 
422
- def drop_dir(path_str: str, force: bool = False, ignore_errors: bool = False) -> None:
579
+ >>> pxt.create_dir('my_dir', if_exists='replace_force')
580
+ """
581
+ path = catalog.Path(path_str)
582
+ cat = Catalog.get()
583
+
584
+ if cat.paths.get_object(path):
585
+ # The directory already exists. Handle it as per user directive.
586
+ _if_exists = catalog.IfExistsParam.validated(if_exists, 'if_exists')
587
+ existing_path = _get_or_drop_existing_path(path_str, catalog.Dir, False, _if_exists)
588
+ if existing_path is not None:
589
+ assert isinstance(existing_path, catalog.Dir)
590
+ return existing_path
591
+
592
+ parent = cat.paths[path.parent]
593
+ assert parent is not None
594
+ with orm.Session(Env.get().engine, future=True) as session:
595
+ dir_md = schema.DirMd(name=path.name)
596
+ dir_record = schema.Dir(parent_id=parent._id, md=dataclasses.asdict(dir_md))
597
+ session.add(dir_record)
598
+ session.flush()
599
+ assert dir_record.id is not None
600
+ assert isinstance(dir_record.id, UUID)
601
+ dir = catalog.Dir(dir_record.id, parent._id, path.name)
602
+ cat.paths[path] = dir
603
+ session.commit()
604
+ _logger.info(f'Created directory `{path_str}`.')
605
+ print(f'Created directory `{path_str}`.')
606
+ return dir
607
+
608
+ def drop_dir(path_str: str, force: bool = False, if_not_exists: Literal['error', 'ignore'] = 'error') -> None:
423
609
  """Remove a directory.
424
610
 
425
611
  Args:
426
612
  path_str: Name or path of the directory.
427
613
  force: If `True`, will also drop all tables and subdirectories of this directory, recursively, along
428
614
  with any views or snapshots that depend on any of the dropped tables.
429
- ignore_errors: if `True`, will return silently instead of throwing an exception if the directory
430
- does not exist.
615
+ if_not_exists: Directive regarding how to handle if the path does not exist.
616
+ Must be one of the following:
617
+
618
+ - `'error'`: raise an error
619
+ - `'ignore'`: do nothing and return
431
620
 
432
621
  Raises:
433
- Error: If the path does not exist or does not designate a directory, or if the directory is not empty.
622
+ Error: If the path
623
+
624
+ - is invalid, or
625
+ - does not exist and `if_not_exists='error'`, or
626
+ - is not designate a directory, or
627
+ - is a direcotory but is not empty and `force=False`.
434
628
 
435
629
  Examples:
630
+ Remove a directory, if it exists and is empty:
436
631
  >>> pxt.drop_dir('my_dir')
437
632
 
438
633
  Remove a subdirectory:
439
634
 
440
635
  >>> pxt.drop_dir('my_dir.sub_dir')
636
+
637
+ Remove an existing directory if it is empty, but do nothing if it does not exist:
638
+
639
+ >>> pxt.drop_dir('my_dir.sub_dir', if_not_exists='ignore')
640
+
641
+ Remove an existing directory and all its contents:
642
+
643
+ >>> pxt.drop_dir('my_dir', force=True)
441
644
  """
442
645
  cat = Catalog.get()
443
646
  path = catalog.Path(path_str)
444
-
445
- try:
446
- cat.paths.check_is_valid(path, expected=catalog.Dir)
447
- except Exception as e:
448
- if ignore_errors or force:
449
- _logger.info(f'Skipped directory `{path}` (does not exist).')
647
+ obj = cat.paths.get_object(path)
648
+ if obj is None:
649
+ _if_not_exists = catalog.IfNotExistsParam.validated(if_not_exists, 'if_not_exists')
650
+ if _if_not_exists == catalog.IfNotExistsParam.IGNORE or force:
651
+ _logger.info(f'Skipped directory `{path_str}` (does not exist).')
450
652
  return
451
653
  else:
452
- raise e
654
+ raise excs.Error(f'Directory `{path_str}` does not exist.')
655
+
656
+ if not isinstance(obj, catalog.Dir):
657
+ raise excs.Error(
658
+ f'{str(path)} needs to be a {catalog.Dir._display_name()} but is a {type(obj)._display_name()}')
453
659
 
454
660
  children = cat.paths.get_children(path, child_type=None, recursive=True)
455
661
 
@@ -510,7 +716,7 @@ def list_functions() -> Styler:
510
716
  paths = ['.'.join(f.self_path.split('.')[:-1]) for f in functions]
511
717
  names = [f.name for f in functions]
512
718
  params = [
513
- ', '.join([param_name + ': ' + str(param_type) for param_name, param_type in f.signature.parameters.items()])
719
+ ', '.join([param_name + ': ' + str(param_type) for param_name, param_type in f.signatures[0].parameters.items()])
514
720
  for f in functions
515
721
  ]
516
722
  pd_df = pd.DataFrame(
@@ -518,7 +724,7 @@ def list_functions() -> Styler:
518
724
  'Path': paths,
519
725
  'Function Name': names,
520
726
  'Parameters': params,
521
- 'Return Type': [str(f.signature.get_return_type()) for f in functions],
727
+ 'Return Type': [str(f.signatures[0].get_return_type()) for f in functions],
522
728
  }
523
729
  )
524
730
  pd_df = pd_df.style.set_properties(None, **{'text-align': 'left'}).set_table_styles(
@@ -527,6 +733,68 @@ def list_functions() -> Styler:
527
733
  return pd_df.hide(axis='index')
528
734
 
529
735
 
736
+ def tools(*args: Union[func.Function, func.tools.Tool]) -> func.tools.Tools:
737
+ """
738
+ Specifies a collection of UDFs to be used as LLM tools. Pixeltable allows any UDF to be used as an input into an
739
+ LLM tool-calling API. To use one or more UDFs as tools, wrap them in a `pxt.tools` call and pass the return value
740
+ to an LLM API.
741
+
742
+ The UDFs can be specified directly or wrapped inside a [pxt.tool()][pixeltable.tool] invocation. If a UDF is
743
+ specified directly, the tool name will be the (unqualified) UDF name, and the tool description will consist of the
744
+ entire contents of the UDF docstring. If a UDF is wrapped in a `pxt.tool()` invocation, then the name and/or
745
+ description may be customized.
746
+
747
+ Args:
748
+ args: The UDFs to use as tools.
749
+
750
+ Returns:
751
+ A `Tools` instance that can be passed to an LLM tool-calling API or invoked to generate tool results.
752
+
753
+ Examples:
754
+ Create a tools instance with a single UDF:
755
+
756
+ >>> tools = pxt.tools(stock_price)
757
+
758
+ Create a tools instance with several UDFs:
759
+
760
+ >>> tools = pxt.tools(stock_price, weather_quote)
761
+
762
+ Create a tools instance, some of whose UDFs have customized metadata:
763
+
764
+ >>> tools = pxt.tools(
765
+ ... stock_price,
766
+ ... pxt.tool(weather_quote, description='Returns information about the weather in a particular location.'),
767
+ ... pxt.tool(traffic_quote, name='traffic_conditions'),
768
+ ... )
769
+ """
770
+ return func.tools.Tools(tools=[
771
+ arg if isinstance(arg, func.tools.Tool) else tool(arg)
772
+ for arg in args
773
+ ])
774
+
775
+
776
+ def tool(fn: func.Function, name: Optional[str] = None, description: Optional[str] = None) -> func.tools.Tool:
777
+ """
778
+ Specifies a Pixeltable UDF to be used as an LLM tool with customizable metadata. See the documentation for
779
+ [pxt.tools()][pixeltable.tools] for more details.
780
+
781
+ Args:
782
+ fn: The UDF to use as a tool.
783
+ name: The name of the tool. If not specified, then the unqualified name of the UDF will be used by default.
784
+ description: The description of the tool. If not specified, then the entire contents of the UDF docstring
785
+ will be used by default.
786
+
787
+ Returns:
788
+ A `Tool` instance that can be passed to an LLM tool-calling API.
789
+ """
790
+ if fn.self_path is None:
791
+ raise excs.Error('Only module UDFs can be used as tools (not locally defined UDFs)')
792
+ if isinstance(fn, func.AggregateFunction):
793
+ raise excs.Error('Aggregator UDFs cannot be used as tools')
794
+
795
+ return func.tools.Tool(fn=fn, name=name, description=description)
796
+
797
+
530
798
  def configure_logging(
531
799
  *,
532
800
  to_stdout: Optional[bool] = None,