pixeltable 0.2.25__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (97) hide show
  1. pixeltable/__init__.py +2 -2
  2. pixeltable/__version__.py +2 -2
  3. pixeltable/catalog/__init__.py +1 -1
  4. pixeltable/catalog/dir.py +6 -0
  5. pixeltable/catalog/globals.py +25 -0
  6. pixeltable/catalog/named_function.py +4 -0
  7. pixeltable/catalog/path_dict.py +37 -11
  8. pixeltable/catalog/schema_object.py +6 -0
  9. pixeltable/catalog/table.py +421 -231
  10. pixeltable/catalog/table_version.py +22 -8
  11. pixeltable/catalog/view.py +5 -7
  12. pixeltable/dataframe.py +439 -105
  13. pixeltable/env.py +19 -5
  14. pixeltable/exec/__init__.py +1 -1
  15. pixeltable/exec/exec_node.py +6 -7
  16. pixeltable/exec/expr_eval_node.py +1 -1
  17. pixeltable/exec/sql_node.py +92 -45
  18. pixeltable/exprs/__init__.py +1 -0
  19. pixeltable/exprs/arithmetic_expr.py +1 -1
  20. pixeltable/exprs/array_slice.py +1 -1
  21. pixeltable/exprs/column_property_ref.py +1 -1
  22. pixeltable/exprs/column_ref.py +29 -2
  23. pixeltable/exprs/comparison.py +1 -1
  24. pixeltable/exprs/compound_predicate.py +1 -1
  25. pixeltable/exprs/expr.py +12 -5
  26. pixeltable/exprs/expr_set.py +8 -0
  27. pixeltable/exprs/function_call.py +147 -39
  28. pixeltable/exprs/in_predicate.py +1 -1
  29. pixeltable/exprs/inline_expr.py +25 -5
  30. pixeltable/exprs/is_null.py +1 -1
  31. pixeltable/exprs/json_mapper.py +1 -1
  32. pixeltable/exprs/json_path.py +1 -1
  33. pixeltable/exprs/method_ref.py +1 -1
  34. pixeltable/exprs/row_builder.py +1 -1
  35. pixeltable/exprs/rowid_ref.py +1 -1
  36. pixeltable/exprs/similarity_expr.py +14 -7
  37. pixeltable/exprs/sql_element_cache.py +4 -0
  38. pixeltable/exprs/type_cast.py +2 -2
  39. pixeltable/exprs/variable.py +3 -0
  40. pixeltable/func/__init__.py +5 -4
  41. pixeltable/func/aggregate_function.py +151 -68
  42. pixeltable/func/callable_function.py +48 -16
  43. pixeltable/func/expr_template_function.py +64 -23
  44. pixeltable/func/function.py +195 -27
  45. pixeltable/func/function_registry.py +2 -1
  46. pixeltable/func/query_template_function.py +51 -9
  47. pixeltable/func/signature.py +64 -7
  48. pixeltable/func/tools.py +153 -0
  49. pixeltable/func/udf.py +57 -35
  50. pixeltable/functions/__init__.py +2 -2
  51. pixeltable/functions/anthropic.py +51 -4
  52. pixeltable/functions/gemini.py +85 -0
  53. pixeltable/functions/globals.py +54 -34
  54. pixeltable/functions/huggingface.py +10 -28
  55. pixeltable/functions/json.py +3 -8
  56. pixeltable/functions/math.py +67 -0
  57. pixeltable/functions/ollama.py +8 -8
  58. pixeltable/functions/openai.py +51 -4
  59. pixeltable/functions/timestamp.py +1 -1
  60. pixeltable/functions/video.py +3 -9
  61. pixeltable/functions/vision.py +1 -1
  62. pixeltable/globals.py +354 -80
  63. pixeltable/index/embedding_index.py +106 -34
  64. pixeltable/io/__init__.py +1 -1
  65. pixeltable/io/label_studio.py +1 -1
  66. pixeltable/io/parquet.py +39 -19
  67. pixeltable/iterators/document.py +12 -0
  68. pixeltable/metadata/__init__.py +1 -1
  69. pixeltable/metadata/converters/convert_16.py +2 -1
  70. pixeltable/metadata/converters/convert_17.py +2 -1
  71. pixeltable/metadata/converters/convert_22.py +17 -0
  72. pixeltable/metadata/converters/convert_23.py +35 -0
  73. pixeltable/metadata/converters/convert_24.py +56 -0
  74. pixeltable/metadata/converters/convert_25.py +19 -0
  75. pixeltable/metadata/converters/util.py +4 -2
  76. pixeltable/metadata/notes.py +4 -0
  77. pixeltable/metadata/schema.py +1 -0
  78. pixeltable/plan.py +128 -50
  79. pixeltable/store.py +1 -1
  80. pixeltable/type_system.py +196 -54
  81. pixeltable/utils/arrow.py +8 -3
  82. pixeltable/utils/description_helper.py +89 -0
  83. pixeltable/utils/documents.py +14 -0
  84. {pixeltable-0.2.25.dist-info → pixeltable-0.3.0.dist-info}/METADATA +30 -20
  85. pixeltable-0.3.0.dist-info/RECORD +155 -0
  86. {pixeltable-0.2.25.dist-info → pixeltable-0.3.0.dist-info}/WHEEL +1 -1
  87. pixeltable-0.3.0.dist-info/entry_points.txt +3 -0
  88. pixeltable/tool/create_test_db_dump.py +0 -311
  89. pixeltable/tool/create_test_video.py +0 -81
  90. pixeltable/tool/doc_plugins/griffe.py +0 -50
  91. pixeltable/tool/doc_plugins/mkdocstrings.py +0 -6
  92. pixeltable/tool/doc_plugins/templates/material/udf.html.jinja +0 -135
  93. pixeltable/tool/embed_udf.py +0 -9
  94. pixeltable/tool/mypy_plugin.py +0 -55
  95. pixeltable-0.2.25.dist-info/RECORD +0 -154
  96. pixeltable-0.2.25.dist-info/entry_points.txt +0 -3
  97. {pixeltable-0.2.25.dist-info → pixeltable-0.3.0.dist-info}/LICENSE +0 -0
pixeltable/globals.py CHANGED
@@ -1,6 +1,6 @@
1
1
  import dataclasses
2
2
  import logging
3
- from typing import Any, Iterable, Optional, Union, Literal
3
+ from typing import Any, Iterable, Literal, Optional, Union
4
4
  from uuid import UUID
5
5
 
6
6
  import pandas as pd
@@ -20,11 +20,68 @@ from pixeltable.utils.filecache import FileCache
20
20
 
21
21
  _logger = logging.getLogger('pixeltable')
22
22
 
23
-
24
23
  def init() -> None:
25
24
  """Initializes the Pixeltable environment."""
26
25
  _ = Catalog.get()
27
26
 
27
+ def _get_or_drop_existing_path(
28
+ path_str: str,
29
+ expected_obj_type: type[catalog.SchemaObject],
30
+ expected_snapshot: bool,
31
+ if_exists: catalog.IfExistsParam
32
+ ) -> Optional[catalog.SchemaObject]:
33
+ """Handle schema object path collision during creation according to the if_exists parameter.
34
+
35
+ Args:
36
+ path_str: An existing and valid path to the dir, table, view, or snapshot.
37
+ expected_obj_type: Whether the caller of this function is creating a dir, table, or view at the existing path.
38
+ expected_snapshot: Whether the caller of this function is creating a snapshot at the existing path.
39
+ if_exists: Directive regarding how to handle the existing path.
40
+
41
+ Returns:
42
+ A handle to the existing dir, table, view, or snapshot, if `if_exists='ignore'`, otherwise `None`.
43
+
44
+ Raises:
45
+ Error: If the existing path is not of the expected type, or if the existing path has dependents and
46
+ `if_exists='replace'` or `if_exists='replace_force'`.
47
+ """
48
+ cat = Catalog.get()
49
+ path = catalog.Path(path_str)
50
+ assert cat.paths.get_object(path) is not None
51
+
52
+ if if_exists == catalog.IfExistsParam.ERROR:
53
+ raise excs.Error(f'Path `{path_str}` already exists.')
54
+
55
+ existing_path = cat.paths[path]
56
+ existing_path_is_snapshot = 'is_snapshot' in existing_path.get_metadata() and existing_path.get_metadata()['is_snapshot']
57
+ obj_type_str = 'Snapshot' if expected_snapshot else expected_obj_type._display_name().capitalize()
58
+ # Check if the existing path is of expected type.
59
+ if (not isinstance(existing_path, expected_obj_type)
60
+ or (expected_snapshot and not existing_path_is_snapshot)):
61
+ raise excs.Error(f'Path `{path_str}` already exists but is not a {obj_type_str}. Cannot {if_exists.name.lower()} it.')
62
+
63
+ # if_exists='ignore' return the handle to the existing object.
64
+ assert isinstance(existing_path, expected_obj_type)
65
+ if if_exists == catalog.IfExistsParam.IGNORE:
66
+ return existing_path
67
+
68
+ # Check if the existing object has dependents. If so, cannot replace it
69
+ # unless if_exists='replace_force'.
70
+ has_dependents = existing_path._has_dependents
71
+ if if_exists == catalog.IfExistsParam.REPLACE and has_dependents:
72
+ raise excs.Error(f"{obj_type_str} `{path_str}` already exists and has dependents. Use `if_exists='replace_force'` to replace it.")
73
+ else:
74
+ assert if_exists == catalog.IfExistsParam.REPLACE_FORCE or not has_dependents
75
+ # Drop the existing path so it can be replaced.
76
+ # Any errors during drop will be raised.
77
+ _logger.info(f"Dropping {obj_type_str} `{path_str}` to replace it.")
78
+ if isinstance(existing_path, catalog.Dir):
79
+ drop_dir(path_str, force=True)
80
+ else:
81
+ drop_table(path_str, force=True)
82
+ assert cat.paths.get_object(path) is None
83
+
84
+ return None
28
85
 
29
86
  def create_table(
30
87
  path_str: str,
@@ -33,7 +90,8 @@ def create_table(
33
90
  primary_key: Optional[Union[str, list[str]]] = None,
34
91
  num_retained_versions: int = 10,
35
92
  comment: str = '',
36
- media_validation: Literal['on_read', 'on_write'] = 'on_write'
93
+ media_validation: Literal['on_read', 'on_write'] = 'on_write',
94
+ if_exists: Literal['error', 'ignore', 'replace', 'replace_force'] = 'error'
37
95
  ) -> catalog.Table:
38
96
  """Create a new base table.
39
97
 
@@ -46,14 +104,28 @@ def create_table(
46
104
  num_retained_versions: Number of versions of the table to retain.
47
105
  comment: An optional comment; its meaning is user-defined.
48
106
  media_validation: Media validation policy for the table.
107
+
49
108
  - `'on_read'`: validate media files at query time
50
109
  - `'on_write'`: validate media files during insert/update operations
110
+ if_exists: Directive regarding how to handle if the path already exists.
111
+ Must be one of the following:
112
+
113
+ - `'error'`: raise an error
114
+ - `'ignore'`: do nothing and return the existing table handle
115
+ - `'replace'`: if the existing table has no views, drop and replace it with a new one
116
+ - `'replace_force'`: drop the existing table and all its views, and create a new one
51
117
 
52
118
  Returns:
53
- A handle to the newly created [`Table`][pixeltable.Table].
119
+ A handle to the newly created table, or to an already existing table at the path when `if_exists='ignore'`.
120
+ Please note the schema of the existing table may not match the schema provided in the call.
54
121
 
55
122
  Raises:
56
- Error: if the path already exists or is invalid.
123
+ Error: if
124
+
125
+ - the path is invalid, or
126
+ - the path already exists and `if_exists='error'`, or
127
+ - the path already exists and is not a table, or
128
+ - an error occurs while attempting to create the table.
57
129
 
58
130
  Examples:
59
131
  Create a table with an int and a string column:
@@ -65,10 +137,27 @@ def create_table(
65
137
 
66
138
  >>> tbl1 = pxt.get_table('orig_table')
67
139
  ... tbl2 = pxt.create_table('new_table', tbl1.where(tbl1.col1 < 10).select(tbl1.col2))
140
+
141
+ Create a table if does not already exist, otherwise get the existing table:
142
+
143
+ >>> tbl = pxt.create_table('my_table', schema={'col1': pxt.Int, 'col2': pxt.String}, if_exists='ignore')
144
+
145
+ Create a table with an int and a float column, and replace any existing table:
146
+
147
+ >>> tbl = pxt.create_table('my_table', schema={'col1': pxt.Int, 'col2': pxt.Float}, if_exists='replace')
68
148
  """
69
149
  path = catalog.Path(path_str)
70
- Catalog.get().paths.check_is_valid(path, expected=None)
71
- dir = Catalog.get().paths[path.parent]
150
+ cat = Catalog.get()
151
+
152
+ if cat.paths.get_object(path) is not None:
153
+ # The table already exists. Handle it as per user directive.
154
+ _if_exists = catalog.IfExistsParam.validated(if_exists, 'if_exists')
155
+ existing_table = _get_or_drop_existing_path(path_str, catalog.InsertableTable, False, _if_exists)
156
+ if existing_table is not None:
157
+ assert isinstance(existing_table, catalog.Table)
158
+ return existing_table
159
+
160
+ dir = cat.paths[path.parent]
72
161
 
73
162
  df: Optional[DataFrame] = None
74
163
  if isinstance(schema_or_df, dict):
@@ -95,7 +184,7 @@ def create_table(
95
184
  tbl = catalog.InsertableTable._create(
96
185
  dir._id, path.name, schema, df, primary_key=primary_key, num_retained_versions=num_retained_versions,
97
186
  comment=comment, media_validation=catalog.MediaValidation.validated(media_validation, 'media_validation'))
98
- Catalog.get().paths[path] = tbl
187
+ cat.paths[path] = tbl
99
188
 
100
189
  _logger.info(f'Created table `{path_str}`.')
101
190
  return tbl
@@ -111,7 +200,7 @@ def create_view(
111
200
  num_retained_versions: int = 10,
112
201
  comment: str = '',
113
202
  media_validation: Literal['on_read', 'on_write'] = 'on_write',
114
- ignore_errors: bool = False,
203
+ if_exists: Literal['error', 'ignore', 'replace', 'replace_force'] = 'error',
115
204
  ) -> Optional[catalog.Table]:
116
205
  """Create a view of an existing table object (which itself can be a view or a snapshot or a base table).
117
206
 
@@ -129,43 +218,82 @@ def create_view(
129
218
  the base table.
130
219
  num_retained_versions: Number of versions of the view to retain.
131
220
  comment: Optional comment for the view.
132
- ignore_errors: if True, fail silently if the path already exists or is invalid.
221
+ media_validation: Media validation policy for the view.
222
+
223
+ - `'on_read'`: validate media files at query time
224
+ - `'on_write'`: validate media files during insert/update operations
225
+ if_exists: Directive regarding how to handle if the path already exists.
226
+ Must be one of the following:
227
+
228
+ - `'error'`: raise an error
229
+ - `'ignore'`: do nothing and return the existing view handle
230
+ - `'replace'`: if the existing view has no dependents, drop and replace it with a new one
231
+ - `'replace_force'`: drop the existing view and all its dependents, and create a new one
133
232
 
134
233
  Returns:
135
234
  A handle to the [`Table`][pixeltable.Table] representing the newly created view. If the path already
136
- exists or is invalid and `ignore_errors=True`, returns `None`.
235
+ exists and `if_exists='ignore'`, returns a handle to the existing view. Please note the schema
236
+ or the base of the existing view may not match those provided in the call.
137
237
 
138
238
  Raises:
139
- Error: if the path already exists or is invalid and `ignore_errors=False`.
239
+ Error: if
240
+
241
+ - the path is invalid, or
242
+ - the path already exists and `if_exists='error'`, or
243
+ - the path already exists and is not a view, or
244
+ - an error occurs while attempting to create the view.
140
245
 
141
246
  Examples:
142
247
  Create a view `my_view` of an existing table `my_table`, filtering on rows where `col1` is greater than 10:
143
248
 
144
249
  >>> tbl = pxt.get_table('my_table')
145
250
  ... view = pxt.create_view('my_view', tbl.where(tbl.col1 > 10))
251
+
252
+ Create a view `my_view` of an existing table `my_table`, filtering on rows where `col1` is greater than 10,
253
+ and if it not already exist. Otherwise, get the existing view named `my_view`:
254
+
255
+ >>> tbl = pxt.get_table('my_table')
256
+ ... view = pxt.create_view('my_view', tbl.where(tbl.col1 > 10), if_exists='ignore')
257
+
258
+ Create a view `my_view` of an existing table `my_table`, filtering on rows where `col1` is greater than 100,
259
+ and replace any existing view named `my_view`:
260
+
261
+ >>> tbl = pxt.get_table('my_table')
262
+ ... view = pxt.create_view('my_view', tbl.where(tbl.col1 > 100), if_exists='replace_force')
146
263
  """
147
264
  where: Optional[exprs.Expr] = None
148
265
  if isinstance(base, catalog.Table):
149
266
  tbl_version_path = base._tbl_version_path
150
267
  elif isinstance(base, DataFrame):
151
268
  base._validate_mutable('create_view')
152
- tbl_version_path = base.tbl
269
+ if len(base._from_clause.tbls) > 1:
270
+ raise excs.Error('Cannot create a view of a join')
271
+ tbl_version_path = base._from_clause.tbls[0]
153
272
  where = base.where_clause
154
273
  else:
155
274
  raise excs.Error('`base` must be an instance of `Table` or `DataFrame`')
156
275
  assert isinstance(base, catalog.Table) or isinstance(base, DataFrame)
276
+
157
277
  path = catalog.Path(path_str)
158
- try:
159
- Catalog.get().paths.check_is_valid(path, expected=None)
160
- except Exception as e:
161
- if ignore_errors:
162
- return None
163
- else:
164
- raise e
165
- dir = Catalog.get().paths[path.parent]
278
+ cat = Catalog.get()
279
+
280
+ if cat.paths.get_object(path) is not None:
281
+ # The view already exists. Handle it as per user directive.
282
+ _if_exists = catalog.IfExistsParam.validated(if_exists, 'if_exists')
283
+ existing_path = _get_or_drop_existing_path(path_str, catalog.View, is_snapshot, _if_exists)
284
+ if existing_path is not None:
285
+ assert isinstance(existing_path, catalog.View)
286
+ return existing_path
287
+
288
+ dir = cat.paths[path.parent]
166
289
 
167
290
  if additional_columns is None:
168
291
  additional_columns = {}
292
+ else:
293
+ # additional columns should not be in the base table
294
+ for col_name in additional_columns.keys():
295
+ if col_name in [c.name for c in tbl_version_path.columns()]:
296
+ raise excs.Error(f"Column {col_name!r} already exists in the base table {tbl_version_path.get_column(col_name).tbl.name}.")
169
297
  if iterator is None:
170
298
  iterator_class, iterator_args = None, None
171
299
  else:
@@ -176,7 +304,7 @@ def create_view(
176
304
  is_snapshot=is_snapshot, iterator_cls=iterator_class, iterator_args=iterator_args,
177
305
  num_retained_versions=num_retained_versions, comment=comment,
178
306
  media_validation=catalog.MediaValidation.validated(media_validation, 'media_validation'))
179
- Catalog.get().paths[path] = view
307
+ cat.paths[path] = view
180
308
  _logger.info(f'Created view `{path_str}`.')
181
309
  FileCache.get().emit_eviction_warnings()
182
310
  return view
@@ -191,7 +319,7 @@ def create_snapshot(
191
319
  num_retained_versions: int = 10,
192
320
  comment: str = '',
193
321
  media_validation: Literal['on_read', 'on_write'] = 'on_write',
194
- ignore_errors: bool = False,
322
+ if_exists: Literal['error', 'ignore', 'replace', 'replace_force'] = 'error',
195
323
  ) -> Optional[catalog.Table]:
196
324
  """Create a snapshot of an existing table object (which itself can be a view or a snapshot or a base table).
197
325
 
@@ -206,21 +334,47 @@ def create_snapshot(
206
334
  iterator: The iterator to use for this snapshot. If specified, then this snapshot will be a one-to-many view of
207
335
  the base table.
208
336
  num_retained_versions: Number of versions of the view to retain.
209
- comment: Optional comment for the view.
210
- ignore_errors: if True, fail silently if the path already exists or is invalid.
337
+ comment: Optional comment for the snapshot.
338
+ media_validation: Media validation policy for the snapshot.
339
+
340
+ - `'on_read'`: validate media files at query time
341
+ - `'on_write'`: validate media files during insert/update operations
342
+ if_exists: Directive regarding how to handle if the path already exists.
343
+ Must be one of the following:
344
+
345
+ - `'error'`: raise an error
346
+ - `'ignore'`: do nothing and return the existing snapshot handle
347
+ - `'replace'`: if the existing snapshot has no dependents, drop and replace it with a new one
348
+ - `'replace_force'`: drop the existing snapshot and all its dependents, and create a new one
211
349
 
212
350
  Returns:
213
- A handle to the [`Table`][pixeltable.Table] representing the newly created snapshot. If the path already
214
- exists or is invalid and `ignore_errors=True`, returns `None`.
351
+ A handle to the [`Table`][pixeltable.Table] representing the newly created snapshot.
352
+ Please note the schema or base of the existing snapshot may not match those provided in the call.
215
353
 
216
354
  Raises:
217
- Error: if the path already exists or is invalid and `ignore_errors=False`.
355
+ Error: if
356
+
357
+ - the path is invalid, or
358
+ - the path already exists and `if_exists='error'`, or
359
+ - the path already exists and is not a snapshot, or
360
+ - an error occurs while attempting to create the snapshot.
218
361
 
219
362
  Examples:
220
- Create a snapshot of `my_table`:
363
+ Create a snapshot `my_snapshot` of a table `my_table`:
221
364
 
222
365
  >>> tbl = pxt.get_table('my_table')
223
366
  ... snapshot = pxt.create_snapshot('my_snapshot', tbl)
367
+
368
+ Create a snapshot `my_snapshot` of a view `my_view` with additional int column `col3`,
369
+ if `my_snapshot` does not already exist:
370
+
371
+ >>> view = pxt.get_table('my_view')
372
+ ... snapshot = pxt.create_snapshot('my_snapshot', view, additional_columns={'col3': pxt.Int}, if_exists='ignore')
373
+
374
+ Create a snapshot `my_snapshot` on a table `my_table`, and replace any existing snapshot named `my_snapshot`:
375
+
376
+ >>> tbl = pxt.get_table('my_table')
377
+ ... snapshot = pxt.create_snapshot('my_snapshot', tbl, if_exists='replace_force')
224
378
  """
225
379
  return create_view(
226
380
  path_str,
@@ -231,7 +385,7 @@ def create_snapshot(
231
385
  num_retained_versions=num_retained_versions,
232
386
  comment=comment,
233
387
  media_validation=media_validation,
234
- ignore_errors=ignore_errors,
388
+ if_exists=if_exists,
235
389
  )
236
390
 
237
391
 
@@ -296,16 +450,26 @@ def move(path: str, new_path: str) -> None:
296
450
  obj._move(new_p.name, new_dir._id)
297
451
 
298
452
 
299
- def drop_table(table: Union[str, catalog.Table], force: bool = False, ignore_errors: bool = False) -> None:
453
+ def drop_table(table: Union[str, catalog.Table], force: bool = False,
454
+ if_not_exists: Literal['error', 'ignore'] = 'error') -> None:
300
455
  """Drop a table, view, or snapshot.
301
456
 
302
457
  Args:
303
458
  table: Fully qualified name, or handle, of the table to be dropped.
304
459
  force: If `True`, will also drop all views and sub-views of this table.
305
- ignore_errors: If `True`, return silently if the table does not exist (without throwing an exception).
460
+ if_not_exists: Directive regarding how to handle if the path does not exist.
461
+ Must be one of the following:
462
+
463
+ - `'error'`: raise an error
464
+ - `'ignore'`: do nothing and return
306
465
 
307
466
  Raises:
308
- Error: If the name does not exist or does not designate a table object, and `ignore_errors=False`.
467
+ Error: if the qualified name
468
+
469
+ - is invalid, or
470
+ - does not exist and `if_not_exists='error'`, or
471
+ - does not designate a table object, or
472
+ - designates a table object but has dependents and `force=False`.
309
473
 
310
474
  Examples:
311
475
  Drop a table by its fully qualified name:
@@ -315,19 +479,25 @@ def drop_table(table: Union[str, catalog.Table], force: bool = False, ignore_err
315
479
  >>> t = pxt.get_table('subdir.my_table')
316
480
  ... pxt.drop_table(t)
317
481
 
482
+ Drop a table if it exists, otherwise do nothing:
483
+ >>> pxt.drop_table('subdir.my_table', if_not_exists='ignore')
484
+
485
+ Drop a table and all its dependents:
486
+ >>> pxt.drop_table('subdir.my_table', force=True)
318
487
  """
319
488
  cat = Catalog.get()
320
489
  if isinstance(table, str):
321
490
  tbl_path_obj = catalog.Path(table)
322
- try:
323
- cat.paths.check_is_valid(tbl_path_obj, expected=catalog.Table)
324
- except Exception as e:
325
- if ignore_errors or force:
491
+ tbl = cat.paths.get_object(tbl_path_obj)
492
+ if tbl is None:
493
+ _if_not_exists = catalog.IfNotExistsParam.validated(if_not_exists, 'if_not_exists')
494
+ if _if_not_exists == catalog.IfNotExistsParam.IGNORE or force:
326
495
  _logger.info(f'Skipped table `{table}` (does not exist).')
327
496
  return
328
497
  else:
329
- raise e
330
- tbl = cat.paths[tbl_path_obj]
498
+ raise excs.Error(f'Table `{table}` does not exist.')
499
+ if not isinstance(tbl, catalog.Table):
500
+ raise excs.Error(f'{tbl} needs to be a {catalog.Table._display_name()} but is a {type(tbl)._display_name()}')
331
501
  else:
332
502
  tbl = table
333
503
  tbl_path_obj = catalog.Path(tbl._path)
@@ -373,16 +543,30 @@ def list_tables(dir_path: str = '', recursive: bool = True) -> list[str]:
373
543
  Catalog.get().paths.check_is_valid(path, expected=catalog.Dir)
374
544
  return [str(p) for p in Catalog.get().paths.get_children(path, child_type=catalog.Table, recursive=recursive)]
375
545
 
376
-
377
- def create_dir(path_str: str, ignore_errors: bool = False) -> Optional[catalog.Dir]:
546
+ def create_dir(path_str: str, if_exists: Literal['error', 'ignore', 'replace', 'replace_force'] = 'error') -> Optional[catalog.Dir]:
378
547
  """Create a directory.
379
548
 
380
549
  Args:
381
550
  path_str: Path to the directory.
382
- ignore_errors: if `True`, will return silently instead of throwing an exception if an error occurs.
551
+ if_exists: Directive regarding how to handle if the path already exists.
552
+ Must be one of the following:
553
+
554
+ - `'error'`: raise an error
555
+ - `'ignore'`: do nothing and return the existing directory handle
556
+ - `'replace'`: if the existing directory is empty, drop it and create a new one
557
+ - `'replace_force'`: drop the existing directory and all its children, and create a new one
558
+
559
+ Returns:
560
+ A handle to the newly created directory, or to an already existing directory at the path when `if_exists='ignore'`.
561
+ Please note the existing directory may not be empty.
383
562
 
384
563
  Raises:
385
- Error: If the path already exists or the parent is not a directory, and `ignore_errors=False`.
564
+ Error: If
565
+
566
+ - the path is invalid, or
567
+ - the path already exists and `if_exists='error'`, or
568
+ - the path already exists and is not a directory, or
569
+ - an error occurs while attempting to create the directory.
386
570
 
387
571
  Examples:
388
572
  >>> pxt.create_dir('my_dir')
@@ -390,63 +574,93 @@ def create_dir(path_str: str, ignore_errors: bool = False) -> Optional[catalog.D
390
574
  Create a subdirectory:
391
575
 
392
576
  >>> pxt.create_dir('my_dir.sub_dir')
393
- """
394
- try:
395
- path = catalog.Path(path_str)
396
- Catalog.get().paths.check_is_valid(path, expected=None)
397
- parent = Catalog.get().paths[path.parent]
398
- assert parent is not None
399
- with orm.Session(Env.get().engine, future=True) as session:
400
- dir_md = schema.DirMd(name=path.name)
401
- dir_record = schema.Dir(parent_id=parent._id, md=dataclasses.asdict(dir_md))
402
- session.add(dir_record)
403
- session.flush()
404
- assert dir_record.id is not None
405
- assert isinstance(dir_record.id, UUID)
406
- dir = catalog.Dir(dir_record.id, parent._id, path.name)
407
- Catalog.get().paths[path] = dir
408
- session.commit()
409
- _logger.info(f'Created directory `{path_str}`.')
410
- print(f'Created directory `{path_str}`.')
411
- return dir
412
- except excs.Error as e:
413
- if ignore_errors:
414
- return None
415
- else:
416
- raise e
417
577
 
578
+ Create a subdirectory only if it does not already exist, otherwise do nothing:
579
+
580
+ >>> pxt.create_dir('my_dir.sub_dir', if_exists='ignore')
418
581
 
419
- def drop_dir(path_str: str, force: bool = False, ignore_errors: bool = False) -> None:
582
+ Create a directory and replace if it already exists:
583
+
584
+ >>> pxt.create_dir('my_dir', if_exists='replace_force')
585
+ """
586
+ path = catalog.Path(path_str)
587
+ cat = Catalog.get()
588
+
589
+ if cat.paths.get_object(path):
590
+ # The directory already exists. Handle it as per user directive.
591
+ _if_exists = catalog.IfExistsParam.validated(if_exists, 'if_exists')
592
+ existing_path = _get_or_drop_existing_path(path_str, catalog.Dir, False, _if_exists)
593
+ if existing_path is not None:
594
+ assert isinstance(existing_path, catalog.Dir)
595
+ return existing_path
596
+
597
+ parent = cat.paths[path.parent]
598
+ assert parent is not None
599
+ with orm.Session(Env.get().engine, future=True) as session:
600
+ dir_md = schema.DirMd(name=path.name)
601
+ dir_record = schema.Dir(parent_id=parent._id, md=dataclasses.asdict(dir_md))
602
+ session.add(dir_record)
603
+ session.flush()
604
+ assert dir_record.id is not None
605
+ assert isinstance(dir_record.id, UUID)
606
+ dir = catalog.Dir(dir_record.id, parent._id, path.name)
607
+ cat.paths[path] = dir
608
+ session.commit()
609
+ _logger.info(f'Created directory `{path_str}`.')
610
+ print(f'Created directory `{path_str}`.')
611
+ return dir
612
+
613
+ def drop_dir(path_str: str, force: bool = False, if_not_exists: Literal['error', 'ignore'] = 'error') -> None:
420
614
  """Remove a directory.
421
615
 
422
616
  Args:
423
617
  path_str: Name or path of the directory.
424
618
  force: If `True`, will also drop all tables and subdirectories of this directory, recursively, along
425
619
  with any views or snapshots that depend on any of the dropped tables.
426
- ignore_errors: if `True`, will return silently instead of throwing an exception if the directory
427
- does not exist.
620
+ if_not_exists: Directive regarding how to handle if the path does not exist.
621
+ Must be one of the following:
622
+
623
+ - `'error'`: raise an error
624
+ - `'ignore'`: do nothing and return
428
625
 
429
626
  Raises:
430
- Error: If the path does not exist or does not designate a directory, or if the directory is not empty.
627
+ Error: If the path
628
+
629
+ - is invalid, or
630
+ - does not exist and `if_not_exists='error'`, or
631
+ - is not designate a directory, or
632
+ - is a direcotory but is not empty and `force=False`.
431
633
 
432
634
  Examples:
635
+ Remove a directory, if it exists and is empty:
433
636
  >>> pxt.drop_dir('my_dir')
434
637
 
435
638
  Remove a subdirectory:
436
639
 
437
640
  >>> pxt.drop_dir('my_dir.sub_dir')
641
+
642
+ Remove an existing directory if it is empty, but do nothing if it does not exist:
643
+
644
+ >>> pxt.drop_dir('my_dir.sub_dir', if_not_exists='ignore')
645
+
646
+ Remove an existing directory and all its contents:
647
+
648
+ >>> pxt.drop_dir('my_dir', force=True)
438
649
  """
439
650
  cat = Catalog.get()
440
651
  path = catalog.Path(path_str)
441
-
442
- try:
443
- cat.paths.check_is_valid(path, expected=catalog.Dir)
444
- except Exception as e:
445
- if ignore_errors or force:
446
- _logger.info(f'Skipped directory `{path}` (does not exist).')
652
+ obj = cat.paths.get_object(path)
653
+ if obj is None:
654
+ _if_not_exists = catalog.IfNotExistsParam.validated(if_not_exists, 'if_not_exists')
655
+ if _if_not_exists == catalog.IfNotExistsParam.IGNORE or force:
656
+ _logger.info(f'Skipped directory `{path_str}` (does not exist).')
447
657
  return
448
658
  else:
449
- raise e
659
+ raise excs.Error(f'Directory `{path_str}` does not exist.')
660
+
661
+ if not isinstance(obj, catalog.Dir):
662
+ raise excs.Error(
663
+ f'{str(path)} needs to be a {catalog.Dir._display_name()} but is a {type(obj)._display_name()}')
450
664
 
451
665
  children = cat.paths.get_children(path, child_type=None, recursive=True)
452
666
 
@@ -507,7 +721,7 @@ def list_functions() -> Styler:
507
721
  paths = ['.'.join(f.self_path.split('.')[:-1]) for f in functions]
508
722
  names = [f.name for f in functions]
509
723
  params = [
510
- ', '.join([param_name + ': ' + str(param_type) for param_name, param_type in f.signature.parameters.items()])
724
+ ', '.join([param_name + ': ' + str(param_type) for param_name, param_type in f.signatures[0].parameters.items()])
511
725
  for f in functions
512
726
  ]
513
727
  pd_df = pd.DataFrame(
@@ -515,7 +729,7 @@ def list_functions() -> Styler:
515
729
  'Path': paths,
516
730
  'Function Name': names,
517
731
  'Parameters': params,
518
- 'Return Type': [str(f.signature.get_return_type()) for f in functions],
732
+ 'Return Type': [str(f.signatures[0].get_return_type()) for f in functions],
519
733
  }
520
734
  )
521
735
  pd_df = pd_df.style.set_properties(None, **{'text-align': 'left'}).set_table_styles(
@@ -524,6 +738,66 @@ def list_functions() -> Styler:
524
738
  return pd_df.hide(axis='index')
525
739
 
526
740
 
741
+ def tools(*args: Union[func.Function, func.tools.Tool]) -> func.tools.Tools:
742
+ """
743
+ Specifies a collection of UDFs to be used as LLM tools. Pixeltable allows any UDF to be used as an input into an
744
+ LLM tool-calling API. To use one or more UDFs as tools, wrap them in a `pxt.tools` call and pass the return value
745
+ to an LLM API.
746
+
747
+ The UDFs can be specified directly or wrapped inside a [pxt.tool()][pixeltable.tool] invocation. If a UDF is
748
+ specified directly, the tool name will be the (unqualified) UDF name, and the tool description will consist of the
749
+ entire contents of the UDF docstring. If a UDF is wrapped in a `pxt.tool()` invocation, then the name and/or
750
+ description may be customized.
751
+
752
+ Args:
753
+ args: The UDFs to use as tools.
754
+
755
+ Returns:
756
+ A `Tools` instance that can be passed to an LLM tool-calling API or invoked to generate tool results.
757
+
758
+ Examples:
759
+ Create a tools instance with a single UDF:
760
+
761
+ >>> tools = pxt.tools(stock_price)
762
+
763
+ Create a tools instance with several UDFs:
764
+
765
+ >>> tools = pxt.tools(stock_price, weather_quote)
766
+
767
+ Create a tools instance, some of whose UDFs have customized metadata:
768
+
769
+ >>> tools = pxt.tools(
770
+ ... stock_price,
771
+ ... pxt.tool(weather_quote, description='Returns information about the weather in a particular location.'),
772
+ ... pxt.tool(traffic_quote, name='traffic_conditions'),
773
+ ... )
774
+ """
775
+ return func.tools.Tools(tools=[
776
+ arg if isinstance(arg, func.tools.Tool) else tool(arg)
777
+ for arg in args
778
+ ])
779
+
780
+
781
+ def tool(fn: func.Function, name: Optional[str] = None, description: Optional[str] = None) -> func.tools.Tool:
782
+ """
783
+ Specifies a Pixeltable UDF to be used as an LLM tool with customizable metadata. See the documentation for
784
+ [pxt.tools()][pixeltable.tools] for more details.
785
+
786
+ Args:
787
+ fn: The UDF to use as a tool.
788
+ name: The name of the tool. If not specified, then the unqualified name of the UDF will be used by default.
789
+ description: The description of the tool. If not specified, then the entire contents of the UDF docstring
790
+ will be used by default.
791
+
792
+ Returns:
793
+ A `Tool` instance that can be passed to an LLM tool-calling API.
794
+ """
795
+ if isinstance(fn, func.AggregateFunction):
796
+ raise excs.Error('Aggregator UDFs cannot be used as tools')
797
+
798
+ return func.tools.Tool(fn=fn, name=name, description=description)
799
+
800
+
527
801
  def configure_logging(
528
802
  *,
529
803
  to_stdout: Optional[bool] = None,