pixeltable 0.3.14__py3-none-any.whl → 0.5.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (220) hide show
  1. pixeltable/__init__.py +42 -8
  2. pixeltable/{dataframe.py → _query.py} +470 -206
  3. pixeltable/_version.py +1 -0
  4. pixeltable/catalog/__init__.py +5 -4
  5. pixeltable/catalog/catalog.py +1785 -432
  6. pixeltable/catalog/column.py +190 -113
  7. pixeltable/catalog/dir.py +2 -4
  8. pixeltable/catalog/globals.py +19 -46
  9. pixeltable/catalog/insertable_table.py +191 -98
  10. pixeltable/catalog/path.py +63 -23
  11. pixeltable/catalog/schema_object.py +11 -15
  12. pixeltable/catalog/table.py +843 -436
  13. pixeltable/catalog/table_metadata.py +103 -0
  14. pixeltable/catalog/table_version.py +978 -657
  15. pixeltable/catalog/table_version_handle.py +72 -16
  16. pixeltable/catalog/table_version_path.py +112 -43
  17. pixeltable/catalog/tbl_ops.py +53 -0
  18. pixeltable/catalog/update_status.py +191 -0
  19. pixeltable/catalog/view.py +134 -90
  20. pixeltable/config.py +134 -22
  21. pixeltable/env.py +471 -157
  22. pixeltable/exceptions.py +6 -0
  23. pixeltable/exec/__init__.py +4 -1
  24. pixeltable/exec/aggregation_node.py +7 -8
  25. pixeltable/exec/cache_prefetch_node.py +83 -110
  26. pixeltable/exec/cell_materialization_node.py +268 -0
  27. pixeltable/exec/cell_reconstruction_node.py +168 -0
  28. pixeltable/exec/component_iteration_node.py +4 -3
  29. pixeltable/exec/data_row_batch.py +8 -65
  30. pixeltable/exec/exec_context.py +16 -4
  31. pixeltable/exec/exec_node.py +13 -36
  32. pixeltable/exec/expr_eval/evaluators.py +11 -7
  33. pixeltable/exec/expr_eval/expr_eval_node.py +27 -12
  34. pixeltable/exec/expr_eval/globals.py +8 -5
  35. pixeltable/exec/expr_eval/row_buffer.py +1 -2
  36. pixeltable/exec/expr_eval/schedulers.py +106 -56
  37. pixeltable/exec/globals.py +35 -0
  38. pixeltable/exec/in_memory_data_node.py +19 -19
  39. pixeltable/exec/object_store_save_node.py +293 -0
  40. pixeltable/exec/row_update_node.py +16 -9
  41. pixeltable/exec/sql_node.py +351 -84
  42. pixeltable/exprs/__init__.py +1 -1
  43. pixeltable/exprs/arithmetic_expr.py +27 -22
  44. pixeltable/exprs/array_slice.py +3 -3
  45. pixeltable/exprs/column_property_ref.py +36 -23
  46. pixeltable/exprs/column_ref.py +213 -89
  47. pixeltable/exprs/comparison.py +5 -5
  48. pixeltable/exprs/compound_predicate.py +5 -4
  49. pixeltable/exprs/data_row.py +164 -54
  50. pixeltable/exprs/expr.py +70 -44
  51. pixeltable/exprs/expr_dict.py +3 -3
  52. pixeltable/exprs/expr_set.py +17 -10
  53. pixeltable/exprs/function_call.py +100 -40
  54. pixeltable/exprs/globals.py +2 -2
  55. pixeltable/exprs/in_predicate.py +4 -4
  56. pixeltable/exprs/inline_expr.py +18 -32
  57. pixeltable/exprs/is_null.py +7 -3
  58. pixeltable/exprs/json_mapper.py +8 -8
  59. pixeltable/exprs/json_path.py +56 -22
  60. pixeltable/exprs/literal.py +27 -5
  61. pixeltable/exprs/method_ref.py +2 -2
  62. pixeltable/exprs/object_ref.py +2 -2
  63. pixeltable/exprs/row_builder.py +167 -67
  64. pixeltable/exprs/rowid_ref.py +25 -10
  65. pixeltable/exprs/similarity_expr.py +58 -40
  66. pixeltable/exprs/sql_element_cache.py +4 -4
  67. pixeltable/exprs/string_op.py +5 -5
  68. pixeltable/exprs/type_cast.py +3 -5
  69. pixeltable/func/__init__.py +1 -0
  70. pixeltable/func/aggregate_function.py +8 -8
  71. pixeltable/func/callable_function.py +9 -9
  72. pixeltable/func/expr_template_function.py +17 -11
  73. pixeltable/func/function.py +18 -20
  74. pixeltable/func/function_registry.py +6 -7
  75. pixeltable/func/globals.py +2 -3
  76. pixeltable/func/mcp.py +74 -0
  77. pixeltable/func/query_template_function.py +29 -27
  78. pixeltable/func/signature.py +46 -19
  79. pixeltable/func/tools.py +31 -13
  80. pixeltable/func/udf.py +18 -20
  81. pixeltable/functions/__init__.py +16 -0
  82. pixeltable/functions/anthropic.py +123 -77
  83. pixeltable/functions/audio.py +147 -10
  84. pixeltable/functions/bedrock.py +13 -6
  85. pixeltable/functions/date.py +7 -4
  86. pixeltable/functions/deepseek.py +35 -43
  87. pixeltable/functions/document.py +81 -0
  88. pixeltable/functions/fal.py +76 -0
  89. pixeltable/functions/fireworks.py +11 -20
  90. pixeltable/functions/gemini.py +195 -39
  91. pixeltable/functions/globals.py +142 -14
  92. pixeltable/functions/groq.py +108 -0
  93. pixeltable/functions/huggingface.py +1056 -24
  94. pixeltable/functions/image.py +115 -57
  95. pixeltable/functions/json.py +1 -1
  96. pixeltable/functions/llama_cpp.py +28 -13
  97. pixeltable/functions/math.py +67 -5
  98. pixeltable/functions/mistralai.py +18 -55
  99. pixeltable/functions/net.py +70 -0
  100. pixeltable/functions/ollama.py +20 -13
  101. pixeltable/functions/openai.py +240 -226
  102. pixeltable/functions/openrouter.py +143 -0
  103. pixeltable/functions/replicate.py +4 -4
  104. pixeltable/functions/reve.py +250 -0
  105. pixeltable/functions/string.py +239 -69
  106. pixeltable/functions/timestamp.py +16 -16
  107. pixeltable/functions/together.py +24 -84
  108. pixeltable/functions/twelvelabs.py +188 -0
  109. pixeltable/functions/util.py +6 -1
  110. pixeltable/functions/uuid.py +30 -0
  111. pixeltable/functions/video.py +1515 -107
  112. pixeltable/functions/vision.py +8 -8
  113. pixeltable/functions/voyageai.py +289 -0
  114. pixeltable/functions/whisper.py +16 -8
  115. pixeltable/functions/whisperx.py +179 -0
  116. pixeltable/{ext/functions → functions}/yolox.py +2 -4
  117. pixeltable/globals.py +362 -115
  118. pixeltable/index/base.py +17 -21
  119. pixeltable/index/btree.py +28 -22
  120. pixeltable/index/embedding_index.py +100 -118
  121. pixeltable/io/__init__.py +4 -2
  122. pixeltable/io/datarows.py +8 -7
  123. pixeltable/io/external_store.py +56 -105
  124. pixeltable/io/fiftyone.py +13 -13
  125. pixeltable/io/globals.py +31 -30
  126. pixeltable/io/hf_datasets.py +61 -16
  127. pixeltable/io/label_studio.py +74 -70
  128. pixeltable/io/lancedb.py +3 -0
  129. pixeltable/io/pandas.py +21 -12
  130. pixeltable/io/parquet.py +25 -105
  131. pixeltable/io/table_data_conduit.py +250 -123
  132. pixeltable/io/utils.py +4 -4
  133. pixeltable/iterators/__init__.py +2 -1
  134. pixeltable/iterators/audio.py +26 -25
  135. pixeltable/iterators/base.py +9 -3
  136. pixeltable/iterators/document.py +112 -78
  137. pixeltable/iterators/image.py +12 -15
  138. pixeltable/iterators/string.py +11 -4
  139. pixeltable/iterators/video.py +523 -120
  140. pixeltable/metadata/__init__.py +14 -3
  141. pixeltable/metadata/converters/convert_13.py +2 -2
  142. pixeltable/metadata/converters/convert_18.py +2 -2
  143. pixeltable/metadata/converters/convert_19.py +2 -2
  144. pixeltable/metadata/converters/convert_20.py +2 -2
  145. pixeltable/metadata/converters/convert_21.py +2 -2
  146. pixeltable/metadata/converters/convert_22.py +2 -2
  147. pixeltable/metadata/converters/convert_24.py +2 -2
  148. pixeltable/metadata/converters/convert_25.py +2 -2
  149. pixeltable/metadata/converters/convert_26.py +2 -2
  150. pixeltable/metadata/converters/convert_29.py +4 -4
  151. pixeltable/metadata/converters/convert_30.py +34 -21
  152. pixeltable/metadata/converters/convert_34.py +2 -2
  153. pixeltable/metadata/converters/convert_35.py +9 -0
  154. pixeltable/metadata/converters/convert_36.py +38 -0
  155. pixeltable/metadata/converters/convert_37.py +15 -0
  156. pixeltable/metadata/converters/convert_38.py +39 -0
  157. pixeltable/metadata/converters/convert_39.py +124 -0
  158. pixeltable/metadata/converters/convert_40.py +73 -0
  159. pixeltable/metadata/converters/convert_41.py +12 -0
  160. pixeltable/metadata/converters/convert_42.py +9 -0
  161. pixeltable/metadata/converters/convert_43.py +44 -0
  162. pixeltable/metadata/converters/util.py +20 -31
  163. pixeltable/metadata/notes.py +9 -0
  164. pixeltable/metadata/schema.py +140 -53
  165. pixeltable/metadata/utils.py +74 -0
  166. pixeltable/mypy/__init__.py +3 -0
  167. pixeltable/mypy/mypy_plugin.py +123 -0
  168. pixeltable/plan.py +382 -115
  169. pixeltable/share/__init__.py +1 -1
  170. pixeltable/share/packager.py +547 -83
  171. pixeltable/share/protocol/__init__.py +33 -0
  172. pixeltable/share/protocol/common.py +165 -0
  173. pixeltable/share/protocol/operation_types.py +33 -0
  174. pixeltable/share/protocol/replica.py +119 -0
  175. pixeltable/share/publish.py +257 -59
  176. pixeltable/store.py +311 -194
  177. pixeltable/type_system.py +373 -211
  178. pixeltable/utils/__init__.py +2 -3
  179. pixeltable/utils/arrow.py +131 -17
  180. pixeltable/utils/av.py +298 -0
  181. pixeltable/utils/azure_store.py +346 -0
  182. pixeltable/utils/coco.py +6 -6
  183. pixeltable/utils/code.py +3 -3
  184. pixeltable/utils/console_output.py +4 -1
  185. pixeltable/utils/coroutine.py +6 -23
  186. pixeltable/utils/dbms.py +32 -6
  187. pixeltable/utils/description_helper.py +4 -5
  188. pixeltable/utils/documents.py +7 -18
  189. pixeltable/utils/exception_handler.py +7 -30
  190. pixeltable/utils/filecache.py +6 -6
  191. pixeltable/utils/formatter.py +86 -48
  192. pixeltable/utils/gcs_store.py +295 -0
  193. pixeltable/utils/http.py +133 -0
  194. pixeltable/utils/http_server.py +2 -3
  195. pixeltable/utils/iceberg.py +1 -2
  196. pixeltable/utils/image.py +17 -0
  197. pixeltable/utils/lancedb.py +90 -0
  198. pixeltable/utils/local_store.py +322 -0
  199. pixeltable/utils/misc.py +5 -0
  200. pixeltable/utils/object_stores.py +573 -0
  201. pixeltable/utils/pydantic.py +60 -0
  202. pixeltable/utils/pytorch.py +5 -6
  203. pixeltable/utils/s3_store.py +527 -0
  204. pixeltable/utils/sql.py +26 -0
  205. pixeltable/utils/system.py +30 -0
  206. pixeltable-0.5.7.dist-info/METADATA +579 -0
  207. pixeltable-0.5.7.dist-info/RECORD +227 -0
  208. {pixeltable-0.3.14.dist-info → pixeltable-0.5.7.dist-info}/WHEEL +1 -1
  209. pixeltable-0.5.7.dist-info/entry_points.txt +2 -0
  210. pixeltable/__version__.py +0 -3
  211. pixeltable/catalog/named_function.py +0 -40
  212. pixeltable/ext/__init__.py +0 -17
  213. pixeltable/ext/functions/__init__.py +0 -11
  214. pixeltable/ext/functions/whisperx.py +0 -77
  215. pixeltable/utils/media_store.py +0 -77
  216. pixeltable/utils/s3.py +0 -17
  217. pixeltable-0.3.14.dist-info/METADATA +0 -434
  218. pixeltable-0.3.14.dist-info/RECORD +0 -186
  219. pixeltable-0.3.14.dist-info/entry_points.txt +0 -3
  220. {pixeltable-0.3.14.dist-info → pixeltable-0.5.7.dist-info/licenses}/LICENSE +0 -0
pixeltable/func/mcp.py ADDED
@@ -0,0 +1,74 @@
1
+ import asyncio
2
+ import inspect
3
+ from typing import TYPE_CHECKING, Any
4
+
5
+ import pixeltable as pxt
6
+ from pixeltable import exceptions as excs, type_system as ts
7
+ from pixeltable.func.signature import Parameter
8
+
9
+ if TYPE_CHECKING:
10
+ import mcp
11
+
12
+
13
+ def mcp_udfs(url: str) -> list['pxt.func.Function']:
14
+ return asyncio.run(mcp_udfs_async(url))
15
+
16
+
17
+ async def mcp_udfs_async(url: str) -> list['pxt.func.Function']:
18
+ import mcp
19
+ from mcp.client.streamable_http import streamablehttp_client
20
+
21
+ list_tools_result: mcp.types.ListToolsResult | None = None
22
+ async with (
23
+ streamablehttp_client(url) as (read_stream, write_stream, _),
24
+ mcp.ClientSession(read_stream, write_stream) as session,
25
+ ):
26
+ await session.initialize()
27
+ list_tools_result = await session.list_tools()
28
+ assert list_tools_result is not None
29
+
30
+ return [mcp_tool_to_udf(url, tool) for tool in list_tools_result.tools]
31
+
32
+
33
+ def mcp_tool_to_udf(url: str, mcp_tool: 'mcp.types.Tool') -> 'pxt.func.Function':
34
+ import mcp
35
+ from mcp.client.streamable_http import streamablehttp_client
36
+
37
+ async def invoke(**kwargs: Any) -> str:
38
+ # TODO: Cache session objects rather than creating a new one each time?
39
+ async with (
40
+ streamablehttp_client(url) as (read_stream, write_stream, _),
41
+ mcp.ClientSession(read_stream, write_stream) as session,
42
+ ):
43
+ await session.initialize()
44
+ res = await session.call_tool(name=mcp_tool.name, arguments=kwargs)
45
+ # TODO Handle image/audio responses?
46
+ return res.content[0].text # type: ignore[union-attr]
47
+
48
+ if mcp_tool.description is not None:
49
+ invoke.__doc__ = mcp_tool.description
50
+
51
+ input_schema = mcp_tool.inputSchema
52
+ params = {
53
+ name: __mcp_param_to_pxt_type(mcp_tool.name, name, param) for name, param in input_schema['properties'].items()
54
+ }
55
+ required = input_schema.get('required', [])
56
+
57
+ # Ensure that any params not appearing in `required` are nullable.
58
+ # (A required param might or might not be nullable, since its type might be an 'anyOf' containing a null.)
59
+ for name in params.keys() - required:
60
+ params[name] = params[name].copy(nullable=True)
61
+
62
+ signature = pxt.func.Signature(
63
+ return_type=ts.StringType(), # Return type is always string
64
+ parameters=[Parameter(name, col_type, inspect.Parameter.KEYWORD_ONLY) for name, col_type in params.items()],
65
+ )
66
+
67
+ return pxt.func.CallableFunction(signatures=[signature], py_fns=[invoke], self_name=mcp_tool.name)
68
+
69
+
70
+ def __mcp_param_to_pxt_type(tool_name: str, name: str, param: dict[str, Any]) -> ts.ColumnType:
71
+ pxt_type = ts.ColumnType.from_json_schema(param)
72
+ if pxt_type is None:
73
+ raise excs.Error(f'Unknown type schema for MCP parameter {name!r} of tool {tool_name!r}: {param}')
74
+ return pxt_type
@@ -2,7 +2,7 @@ from __future__ import annotations
2
2
 
3
3
  import inspect
4
4
  from functools import reduce
5
- from typing import TYPE_CHECKING, Any, Callable, Iterable, Optional, Union, overload
5
+ from typing import TYPE_CHECKING, Any, Callable, Iterable, overload
6
6
 
7
7
  from pixeltable import catalog, exceptions as excs, exprs, func, type_system as ts
8
8
 
@@ -10,41 +10,41 @@ from .function import Function
10
10
  from .signature import Signature
11
11
 
12
12
  if TYPE_CHECKING:
13
- from pixeltable import DataFrame
13
+ from pixeltable import Query
14
14
 
15
15
 
16
16
  class QueryTemplateFunction(Function):
17
- """A parameterized query/DataFrame from which an executable DataFrame is created with a function call."""
17
+ """A parameterized query from which an executable Query is created with a function call."""
18
18
 
19
- template_df: Optional['DataFrame']
20
- self_name: Optional[str]
21
- _comment: Optional[str]
19
+ template_df: 'Query' | None
20
+ self_name: str | None
21
+ _comment: str | None
22
22
 
23
23
  @classmethod
24
24
  def create(
25
- cls, template_callable: Callable, param_types: Optional[list[ts.ColumnType]], path: str, name: str
25
+ cls, template_callable: Callable, param_types: list[ts.ColumnType] | None, path: str, name: str
26
26
  ) -> QueryTemplateFunction:
27
27
  # we need to construct a template df and a signature
28
28
  py_sig = inspect.signature(template_callable)
29
29
  py_params = list(py_sig.parameters.values())
30
30
  params = Signature.create_parameters(py_params=py_params, param_types=param_types)
31
- # invoke template_callable with parameter expressions to construct a DataFrame with parameters
31
+ # invoke template_callable with parameter expressions to construct a Query with parameters
32
32
  var_exprs = [exprs.Variable(param.name, param.col_type) for param in params]
33
33
  template_df = template_callable(*var_exprs)
34
- from pixeltable import DataFrame
34
+ from pixeltable import Query
35
35
 
36
- assert isinstance(template_df, DataFrame)
36
+ assert isinstance(template_df, Query)
37
37
  # we take params and return json
38
38
  sig = Signature(return_type=ts.JsonType(), parameters=params)
39
39
  return QueryTemplateFunction(template_df, sig, path=path, name=name, comment=inspect.getdoc(template_callable))
40
40
 
41
41
  def __init__(
42
42
  self,
43
- template_df: Optional['DataFrame'],
43
+ template_df: 'Query' | None,
44
44
  sig: Signature,
45
- path: Optional[str] = None,
46
- name: Optional[str] = None,
47
- comment: Optional[str] = None,
45
+ path: str | None = None,
46
+ name: str | None = None,
47
+ comment: str | None = None,
48
48
  ):
49
49
  assert sig is not None
50
50
  super().__init__([sig], self_path=path)
@@ -82,7 +82,7 @@ class QueryTemplateFunction(Function):
82
82
  def name(self) -> str:
83
83
  return self.self_name
84
84
 
85
- def comment(self) -> Optional[str]:
85
+ def comment(self) -> str | None:
86
86
  return self._comment
87
87
 
88
88
  def _as_dict(self) -> dict:
@@ -90,9 +90,9 @@ class QueryTemplateFunction(Function):
90
90
 
91
91
  @classmethod
92
92
  def _from_dict(cls, d: dict) -> Function:
93
- from pixeltable.dataframe import DataFrame
93
+ from pixeltable._query import Query
94
94
 
95
- return cls(DataFrame.from_dict(d['df']), Signature.from_dict(d['signature']), name=d['name'])
95
+ return cls(Query.from_dict(d['df']), Signature.from_dict(d['signature']), name=d['name'])
96
96
 
97
97
 
98
98
  @overload
@@ -100,11 +100,11 @@ def query(py_fn: Callable) -> QueryTemplateFunction: ...
100
100
 
101
101
 
102
102
  @overload
103
- def query(*, param_types: Optional[list[ts.ColumnType]] = None) -> Callable[[Callable], QueryTemplateFunction]: ...
103
+ def query(*, param_types: list[ts.ColumnType] | None = None) -> Callable[[Callable], QueryTemplateFunction]: ...
104
104
 
105
105
 
106
106
  def query(*args: Any, **kwargs: Any) -> Any:
107
- def make_query_template(py_fn: Callable, param_types: Optional[list[ts.ColumnType]]) -> QueryTemplateFunction:
107
+ def make_query_template(py_fn: Callable, param_types: list[ts.ColumnType] | None) -> QueryTemplateFunction:
108
108
  if py_fn.__module__ != '__main__' and py_fn.__name__.isidentifier():
109
109
  # this is a named function in a module
110
110
  function_path = f'{py_fn.__module__}.{py_fn.__qualname__}'
@@ -127,10 +127,10 @@ def query(*args: Any, **kwargs: Any) -> Any:
127
127
 
128
128
  def retrieval_udf(
129
129
  table: catalog.Table,
130
- name: Optional[str] = None,
131
- description: Optional[str] = None,
132
- parameters: Optional[Iterable[Union[str, exprs.ColumnRef]]] = None,
133
- limit: Optional[int] = 10,
130
+ name: str | None = None,
131
+ description: str | None = None,
132
+ parameters: Iterable[str | exprs.ColumnRef] | None = None,
133
+ limit: int | None = 10,
134
134
  ) -> func.QueryTemplateFunction:
135
135
  """
136
136
  Constructs a retrieval UDF for the given table. The retrieval UDF is a UDF whose parameters are
@@ -157,18 +157,20 @@ def retrieval_udf(
157
157
  """
158
158
  # Argument validation
159
159
  col_refs: list[exprs.ColumnRef]
160
+ # TODO: get rid of references to ColumnRef internals and replace instead with a public interface
161
+ col_names = table.columns()
160
162
  if parameters is None:
161
- col_refs = [table[col_name] for col_name in table.columns if not table[col_name].col.is_computed]
163
+ col_refs = [table[col_name] for col_name in col_names if not table[col_name].col.is_computed]
162
164
  else:
163
165
  for param in parameters:
164
- if isinstance(param, str) and param not in table.columns:
165
- raise excs.Error(f'The specified parameter {param!r} is not a column of the table {table._path!r}')
166
+ if isinstance(param, str) and param not in col_names:
167
+ raise excs.Error(f'The specified parameter {param!r} is not a column of the table {table._path()!r}')
166
168
  col_refs = [table[param] if isinstance(param, str) else param for param in parameters]
167
169
 
168
170
  if len(col_refs) == 0:
169
171
  raise excs.Error('Parameter list cannot be empty.')
170
172
 
171
- # Construct the dataframe
173
+ # Construct the Query
172
174
  predicates = [col_ref == exprs.Variable(col_ref.col.name, col_ref.col.col_type) for col_ref in col_refs]
173
175
  where_clause = reduce(lambda c1, c2: c1 & c2, predicates)
174
176
  df = table.select().where(where_clause)
@@ -4,7 +4,7 @@ import dataclasses
4
4
  import inspect
5
5
  import logging
6
6
  import typing
7
- from typing import TYPE_CHECKING, Any, Callable, ClassVar, Optional
7
+ from typing import TYPE_CHECKING, Any, Callable, ClassVar
8
8
 
9
9
  import pixeltable.exceptions as excs
10
10
  import pixeltable.type_system as ts
@@ -18,11 +18,11 @@ _logger = logging.getLogger('pixeltable')
18
18
  @dataclasses.dataclass
19
19
  class Parameter:
20
20
  name: str
21
- col_type: Optional[ts.ColumnType] # None for variable parameters
21
+ col_type: ts.ColumnType | None # None for variable parameters
22
22
  kind: inspect._ParameterKind
23
23
  # for some reason, this needs to precede is_batched in the dataclass definition,
24
24
  # otherwise Python complains that an argument with a default is followed by an argument without a default
25
- default: Optional['exprs.Literal'] = None # default value for the parameter
25
+ default: 'exprs.Literal' | None = None # default value for the parameter
26
26
  is_batched: bool = False # True if the parameter is a batched parameter (eg, Batch[dict])
27
27
 
28
28
  def __post_init__(self) -> None:
@@ -84,8 +84,28 @@ class Signature:
84
84
  """
85
85
 
86
86
  SPECIAL_PARAM_NAMES: ClassVar[list[str]] = ['group_by', 'order_by']
87
-
88
- def __init__(self, return_type: ts.ColumnType, parameters: list[Parameter], is_batched: bool = False):
87
+ SYSTEM_PARAM_NAMES: ClassVar[list[str]] = ['_runtime_ctx']
88
+
89
+ return_type: ts.ColumnType
90
+ is_batched: bool
91
+ parameters: dict[str, Parameter] # name -> Parameter
92
+ parameters_by_pos: list[Parameter] # ordered by position in the signature
93
+ constant_parameters: list[Parameter] # parameters that are not batched
94
+ batched_parameters: list[Parameter] # parameters that are batched
95
+ required_parameters: list[Parameter] # parameters that do not have a default value
96
+
97
+ # the names of recognized system parameters in the signature; these are excluded from self.parameters
98
+ system_parameters: list[str]
99
+
100
+ py_signature: inspect.Signature
101
+
102
+ def __init__(
103
+ self,
104
+ return_type: ts.ColumnType,
105
+ parameters: list[Parameter],
106
+ is_batched: bool = False,
107
+ system_parameters: list[str] | None = None,
108
+ ):
89
109
  assert isinstance(return_type, ts.ColumnType)
90
110
  self.return_type = return_type
91
111
  self.is_batched = is_batched
@@ -95,6 +115,7 @@ class Signature:
95
115
  self.constant_parameters = [p for p in parameters if not p.is_batched]
96
116
  self.batched_parameters = [p for p in parameters if p.is_batched]
97
117
  self.required_parameters = [p for p in parameters if not p.has_default()]
118
+ self.system_parameters = system_parameters if system_parameters is not None else []
98
119
  self.py_signature = inspect.Signature([p.to_py_param() for p in self.parameters_by_pos])
99
120
 
100
121
  def get_return_type(self) -> ts.ColumnType:
@@ -151,12 +172,12 @@ class Signature:
151
172
 
152
173
  return True
153
174
 
154
- def validate_args(self, bound_args: dict[str, Optional['exprs.Expr']], context: str = '') -> None:
175
+ def validate_args(self, bound_args: dict[str, 'exprs.Expr' | None], context: str = '') -> None:
155
176
  if context:
156
177
  context = f' ({context})'
157
178
 
158
179
  for param_name, arg in bound_args.items():
159
- assert param_name in self.parameters
180
+ assert param_name in self.parameters, f'{param_name!r} not in {list(self.parameters.keys())}'
160
181
  param = self.parameters[param_name]
161
182
  is_var_param = param.kind in {inspect.Parameter.VAR_POSITIONAL, inspect.Parameter.VAR_KEYWORD}
162
183
  if is_var_param:
@@ -206,15 +227,15 @@ class Signature:
206
227
  elif p.kind == inspect.Parameter.VAR_KEYWORD:
207
228
  param_strs.append(f'**{p.name}')
208
229
  else:
209
- param_strs.append(f'{p.name}: {p.col_type}')
210
- return f'({", ".join(param_strs)}) -> {self.get_return_type()}'
230
+ param_strs.append(f'{p.name}: pxt.{p.col_type}')
231
+ return f'({", ".join(param_strs)}) -> pxt.{self.get_return_type()}'
211
232
 
212
233
  @classmethod
213
- def _infer_type(cls, annotation: Optional[type]) -> tuple[Optional[ts.ColumnType], Optional[bool]]:
234
+ def _infer_type(cls, annotation: type | None) -> tuple[ts.ColumnType | None, bool | None]:
214
235
  """Returns: (column type, is_batched) or (None, ...) if the type cannot be inferred"""
215
236
  if annotation is None:
216
237
  return (None, None)
217
- py_type: Optional[type] = None
238
+ py_type: type | None = None
218
239
  is_batched = False
219
240
  if typing.get_origin(annotation) == typing.Annotated:
220
241
  type_args = typing.get_args(annotation)
@@ -231,12 +252,13 @@ class Signature:
231
252
  @classmethod
232
253
  def create_parameters(
233
254
  cls,
234
- py_fn: Optional[Callable] = None,
235
- py_params: Optional[list[inspect.Parameter]] = None,
236
- param_types: Optional[list[ts.ColumnType]] = None,
237
- type_substitutions: Optional[dict] = None,
255
+ py_fn: Callable | None = None,
256
+ py_params: list[inspect.Parameter] | None = None,
257
+ param_types: list[ts.ColumnType] | None = None,
258
+ type_substitutions: dict | None = None,
238
259
  is_cls_method: bool = False,
239
260
  ) -> list[Parameter]:
261
+ """Ignores parameters starting with '_'."""
240
262
  from pixeltable import exprs
241
263
 
242
264
  assert (py_fn is None) != (py_params is None)
@@ -251,6 +273,10 @@ class Signature:
251
273
  for idx, param in enumerate(py_params):
252
274
  if is_cls_method and idx == 0:
253
275
  continue # skip 'self' or 'cls' parameter
276
+ if param.name in cls.SYSTEM_PARAM_NAMES:
277
+ continue # skip system parameters
278
+ if param.name.startswith('_'):
279
+ raise excs.Error(f"{param.name!r}: parameters starting with '_' are reserved")
254
280
  if param.name in cls.SPECIAL_PARAM_NAMES:
255
281
  raise excs.Error(f'{param.name!r} is a reserved parameter name')
256
282
  if param.kind in (inspect.Parameter.VAR_POSITIONAL, inspect.Parameter.VAR_KEYWORD):
@@ -284,9 +310,9 @@ class Signature:
284
310
  def create(
285
311
  cls,
286
312
  py_fn: Callable,
287
- param_types: Optional[list[ts.ColumnType]] = None,
288
- return_type: Optional[ts.ColumnType] = None,
289
- type_substitutions: Optional[dict] = None,
313
+ param_types: list[ts.ColumnType] | None = None,
314
+ return_type: ts.ColumnType | None = None,
315
+ type_substitutions: dict | None = None,
290
316
  is_cls_method: bool = False,
291
317
  ) -> Signature:
292
318
  """Create a signature for the given Callable.
@@ -308,5 +334,6 @@ class Signature:
308
334
  raise excs.Error('Cannot infer pixeltable return type')
309
335
  else:
310
336
  _, return_is_batched = cls._infer_type(sig.return_annotation)
337
+ system_params = [param_name for param_name in sig.parameters if param_name in cls.SYSTEM_PARAM_NAMES]
311
338
 
312
- return Signature(return_type, parameters, return_is_batched)
339
+ return Signature(return_type, parameters, return_is_batched, system_parameters=system_params)
pixeltable/func/tools.py CHANGED
@@ -1,8 +1,10 @@
1
- from typing import TYPE_CHECKING, Any, Callable, Optional, TypeVar, Union
1
+ import json
2
+ import uuid
3
+ from typing import TYPE_CHECKING, Any, Callable, TypeVar
2
4
 
3
5
  import pydantic
4
6
 
5
- import pixeltable.exceptions as excs
7
+ from pixeltable import exceptions as excs, type_system as ts
6
8
 
7
9
  from .function import Function
8
10
  from .signature import Parameter
@@ -28,8 +30,8 @@ class Tool(pydantic.BaseModel):
28
30
  model_config = pydantic.ConfigDict(arbitrary_types_allowed=True)
29
31
 
30
32
  fn: Function
31
- name: Optional[str] = None
32
- description: Optional[str] = None
33
+ name: str | None = None
34
+ description: str | None = None
33
35
 
34
36
  @property
35
37
  def parameters(self) -> dict[str, Parameter]:
@@ -69,13 +71,17 @@ class Tool(pydantic.BaseModel):
69
71
  return _extract_float_tool_arg(kwargs, param_name=param.name)
70
72
  if param.col_type.is_bool_type():
71
73
  return _extract_bool_tool_arg(kwargs, param_name=param.name)
72
- raise AssertionError()
74
+ if param.col_type.is_json_type():
75
+ return _extract_json_tool_arg(kwargs, param_name=param.name)
76
+ if param.col_type.is_uuid_type():
77
+ return _extract_uuid_tool_arg(kwargs, param_name=param.name)
78
+ raise AssertionError(param.col_type)
73
79
 
74
80
 
75
81
  class ToolChoice(pydantic.BaseModel):
76
82
  auto: bool
77
83
  required: bool
78
- tool: Optional[str]
84
+ tool: str | None
79
85
  parallel_tool_calls: bool
80
86
 
81
87
 
@@ -97,12 +103,12 @@ class Tools(pydantic.BaseModel):
97
103
  self,
98
104
  auto: bool = False,
99
105
  required: bool = False,
100
- tool: Union[str, Function, None] = None,
106
+ tool: str | Function | None = None,
101
107
  parallel_tool_calls: bool = True,
102
108
  ) -> ToolChoice:
103
109
  if sum([auto, required, tool is not None]) != 1:
104
110
  raise excs.Error('Exactly one of `auto`, `required`, or `tool` must be specified.')
105
- tool_name: Optional[str] = None
111
+ tool_name: str | None = None
106
112
  if tool is not None:
107
113
  try:
108
114
  tool_obj = next(
@@ -118,29 +124,41 @@ class Tools(pydantic.BaseModel):
118
124
 
119
125
 
120
126
  @udf
121
- def _extract_str_tool_arg(kwargs: dict[str, Any], param_name: str) -> Optional[str]:
127
+ def _extract_str_tool_arg(kwargs: dict[str, Any], param_name: str) -> str | None:
122
128
  return _extract_arg(str, kwargs, param_name)
123
129
 
124
130
 
125
131
  @udf
126
- def _extract_int_tool_arg(kwargs: dict[str, Any], param_name: str) -> Optional[int]:
132
+ def _extract_int_tool_arg(kwargs: dict[str, Any], param_name: str) -> int | None:
127
133
  return _extract_arg(int, kwargs, param_name)
128
134
 
129
135
 
130
136
  @udf
131
- def _extract_float_tool_arg(kwargs: dict[str, Any], param_name: str) -> Optional[float]:
137
+ def _extract_float_tool_arg(kwargs: dict[str, Any], param_name: str) -> float | None:
132
138
  return _extract_arg(float, kwargs, param_name)
133
139
 
134
140
 
135
141
  @udf
136
- def _extract_bool_tool_arg(kwargs: dict[str, Any], param_name: str) -> Optional[bool]:
142
+ def _extract_bool_tool_arg(kwargs: dict[str, Any], param_name: str) -> bool | None:
137
143
  return _extract_arg(bool, kwargs, param_name)
138
144
 
139
145
 
146
+ @udf
147
+ def _extract_json_tool_arg(kwargs: dict[str, Any], param_name: str) -> ts.Json | None:
148
+ if param_name in kwargs:
149
+ return json.loads(kwargs[param_name])
150
+ return None
151
+
152
+
153
+ @udf
154
+ def _extract_uuid_tool_arg(kwargs: dict[str, Any], param_name: str) -> uuid.UUID | None:
155
+ return _extract_arg(uuid.UUID, kwargs, param_name)
156
+
157
+
140
158
  T = TypeVar('T')
141
159
 
142
160
 
143
- def _extract_arg(eval_fn: Callable[[Any], T], kwargs: dict[str, Any], param_name: str) -> Optional[T]:
161
+ def _extract_arg(eval_fn: Callable[[Any], T], kwargs: dict[str, Any], param_name: str) -> T | None:
144
162
  if param_name in kwargs:
145
163
  return eval_fn(kwargs[param_name])
146
164
  return None
pixeltable/func/udf.py CHANGED
@@ -1,7 +1,7 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import inspect
4
- from typing import TYPE_CHECKING, Any, Callable, Optional, Sequence, overload
4
+ from typing import TYPE_CHECKING, Any, Callable, Sequence, overload
5
5
 
6
6
  import pixeltable.exceptions as excs
7
7
  import pixeltable.type_system as ts
@@ -26,12 +26,12 @@ def udf(decorated_fn: Callable) -> CallableFunction: ...
26
26
  @overload
27
27
  def udf(
28
28
  *,
29
- batch_size: Optional[int] = None,
30
- substitute_fn: Optional[Callable] = None,
29
+ batch_size: int | None = None,
30
+ substitute_fn: Callable | None = None,
31
31
  is_method: bool = False,
32
32
  is_property: bool = False,
33
- resource_pool: Optional[str] = None,
34
- type_substitutions: Optional[Sequence[dict]] = None,
33
+ resource_pool: str | None = None,
34
+ type_substitutions: Sequence[dict] | None = None,
35
35
  _force_stored: bool = False,
36
36
  ) -> Callable[[Callable], CallableFunction]: ...
37
37
 
@@ -39,7 +39,7 @@ def udf(
39
39
  # pxt.udf() called explicitly on a Table:
40
40
  @overload
41
41
  def udf(
42
- table: catalog.Table, /, *, return_value: Any = None, description: Optional[str] = None
42
+ table: catalog.Table, /, *, return_value: Any = None, description: str | None = None
43
43
  ) -> ExprTemplateFunction: ...
44
44
 
45
45
 
@@ -96,15 +96,15 @@ def udf(*args, **kwargs): # type: ignore[no-untyped-def]
96
96
 
97
97
  def make_function(
98
98
  decorated_fn: Callable,
99
- return_type: Optional[ts.ColumnType] = None,
100
- param_types: Optional[list[ts.ColumnType]] = None,
101
- batch_size: Optional[int] = None,
102
- substitute_fn: Optional[Callable] = None,
99
+ return_type: ts.ColumnType | None = None,
100
+ param_types: list[ts.ColumnType] | None = None,
101
+ batch_size: int | None = None,
102
+ substitute_fn: Callable | None = None,
103
103
  is_method: bool = False,
104
104
  is_property: bool = False,
105
- resource_pool: Optional[str] = None,
106
- type_substitutions: Optional[Sequence[dict]] = None,
107
- function_name: Optional[str] = None,
105
+ resource_pool: str | None = None,
106
+ type_substitutions: Sequence[dict] | None = None,
107
+ function_name: str | None = None,
108
108
  force_stored: bool = False,
109
109
  ) -> CallableFunction:
110
110
  """
@@ -201,11 +201,11 @@ def expr_udf(py_fn: Callable) -> ExprTemplateFunction: ...
201
201
 
202
202
 
203
203
  @overload
204
- def expr_udf(*, param_types: Optional[list[ts.ColumnType]] = None) -> Callable[[Callable], ExprTemplateFunction]: ...
204
+ def expr_udf(*, param_types: list[ts.ColumnType] | None = None) -> Callable[[Callable], ExprTemplateFunction]: ...
205
205
 
206
206
 
207
207
  def expr_udf(*args: Any, **kwargs: Any) -> Any:
208
- def make_expr_template(py_fn: Callable, param_types: Optional[list[ts.ColumnType]]) -> ExprTemplateFunction:
208
+ def make_expr_template(py_fn: Callable, param_types: list[ts.ColumnType] | None) -> ExprTemplateFunction:
209
209
  from pixeltable import exprs
210
210
 
211
211
  if py_fn.__module__ != '__main__' and py_fn.__name__.isidentifier():
@@ -237,9 +237,7 @@ def expr_udf(*args: Any, **kwargs: Any) -> Any:
237
237
  return lambda py_fn: make_expr_template(py_fn, kwargs['param_types'])
238
238
 
239
239
 
240
- def from_table(
241
- tbl: catalog.Table, return_value: Optional['exprs.Expr'], description: Optional[str]
242
- ) -> ExprTemplateFunction:
240
+ def from_table(tbl: catalog.Table, return_value: 'exprs.Expr' | None, description: str | None) -> ExprTemplateFunction:
243
241
  """
244
242
  Constructs an `ExprTemplateFunction` from a `Table`.
245
243
 
@@ -262,7 +260,7 @@ def from_table(
262
260
  """
263
261
  from pixeltable import exprs
264
262
 
265
- ancestors = [tbl, *tbl._base_tables]
263
+ ancestors = [tbl, *tbl._get_base_tables()]
266
264
  ancestors.reverse() # We must traverse the ancestors in order from base to derived
267
265
 
268
266
  subst: dict[exprs.Expr, exprs.Expr] = {}
@@ -297,7 +295,7 @@ def from_table(
297
295
 
298
296
  if description is None:
299
297
  # Default description is the table comment
300
- description = tbl._comment
298
+ description = tbl._get_comment()
301
299
  if len(description) == 0:
302
300
  description = f"UDF for table '{tbl._name}'"
303
301
 
@@ -1,3 +1,9 @@
1
+ """
2
+ General Pixeltable UDFs.
3
+
4
+ This parent module contains general-purpose UDFs that apply to multiple data types.
5
+ """
6
+
1
7
  # ruff: noqa: F401
2
8
 
3
9
  from pixeltable.utils.code import local_public_names
@@ -8,23 +14,33 @@ from . import (
8
14
  bedrock,
9
15
  date,
10
16
  deepseek,
17
+ fal,
11
18
  fireworks,
12
19
  gemini,
20
+ groq,
13
21
  huggingface,
14
22
  image,
15
23
  json,
16
24
  llama_cpp,
17
25
  math,
18
26
  mistralai,
27
+ net,
19
28
  ollama,
20
29
  openai,
30
+ openrouter,
21
31
  replicate,
32
+ reve,
22
33
  string,
23
34
  timestamp,
24
35
  together,
36
+ twelvelabs,
37
+ uuid,
25
38
  video,
26
39
  vision,
40
+ voyageai,
27
41
  whisper,
42
+ whisperx,
43
+ yolox,
28
44
  )
29
45
  from .globals import count, map, max, mean, min, sum
30
46