pixeltable 0.2.26__py3-none-any.whl → 0.5.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (245) hide show
  1. pixeltable/__init__.py +83 -19
  2. pixeltable/_query.py +1444 -0
  3. pixeltable/_version.py +1 -0
  4. pixeltable/catalog/__init__.py +7 -4
  5. pixeltable/catalog/catalog.py +2394 -119
  6. pixeltable/catalog/column.py +225 -104
  7. pixeltable/catalog/dir.py +38 -9
  8. pixeltable/catalog/globals.py +53 -34
  9. pixeltable/catalog/insertable_table.py +265 -115
  10. pixeltable/catalog/path.py +80 -17
  11. pixeltable/catalog/schema_object.py +28 -43
  12. pixeltable/catalog/table.py +1270 -677
  13. pixeltable/catalog/table_metadata.py +103 -0
  14. pixeltable/catalog/table_version.py +1270 -751
  15. pixeltable/catalog/table_version_handle.py +109 -0
  16. pixeltable/catalog/table_version_path.py +137 -42
  17. pixeltable/catalog/tbl_ops.py +53 -0
  18. pixeltable/catalog/update_status.py +191 -0
  19. pixeltable/catalog/view.py +251 -134
  20. pixeltable/config.py +215 -0
  21. pixeltable/env.py +736 -285
  22. pixeltable/exceptions.py +26 -2
  23. pixeltable/exec/__init__.py +7 -2
  24. pixeltable/exec/aggregation_node.py +39 -21
  25. pixeltable/exec/cache_prefetch_node.py +87 -109
  26. pixeltable/exec/cell_materialization_node.py +268 -0
  27. pixeltable/exec/cell_reconstruction_node.py +168 -0
  28. pixeltable/exec/component_iteration_node.py +25 -28
  29. pixeltable/exec/data_row_batch.py +11 -46
  30. pixeltable/exec/exec_context.py +26 -11
  31. pixeltable/exec/exec_node.py +35 -27
  32. pixeltable/exec/expr_eval/__init__.py +3 -0
  33. pixeltable/exec/expr_eval/evaluators.py +365 -0
  34. pixeltable/exec/expr_eval/expr_eval_node.py +413 -0
  35. pixeltable/exec/expr_eval/globals.py +200 -0
  36. pixeltable/exec/expr_eval/row_buffer.py +74 -0
  37. pixeltable/exec/expr_eval/schedulers.py +413 -0
  38. pixeltable/exec/globals.py +35 -0
  39. pixeltable/exec/in_memory_data_node.py +35 -27
  40. pixeltable/exec/object_store_save_node.py +293 -0
  41. pixeltable/exec/row_update_node.py +44 -29
  42. pixeltable/exec/sql_node.py +414 -115
  43. pixeltable/exprs/__init__.py +8 -5
  44. pixeltable/exprs/arithmetic_expr.py +79 -45
  45. pixeltable/exprs/array_slice.py +5 -5
  46. pixeltable/exprs/column_property_ref.py +40 -26
  47. pixeltable/exprs/column_ref.py +254 -61
  48. pixeltable/exprs/comparison.py +14 -9
  49. pixeltable/exprs/compound_predicate.py +9 -10
  50. pixeltable/exprs/data_row.py +213 -72
  51. pixeltable/exprs/expr.py +270 -104
  52. pixeltable/exprs/expr_dict.py +6 -5
  53. pixeltable/exprs/expr_set.py +20 -11
  54. pixeltable/exprs/function_call.py +383 -284
  55. pixeltable/exprs/globals.py +18 -5
  56. pixeltable/exprs/in_predicate.py +7 -7
  57. pixeltable/exprs/inline_expr.py +37 -37
  58. pixeltable/exprs/is_null.py +8 -4
  59. pixeltable/exprs/json_mapper.py +120 -54
  60. pixeltable/exprs/json_path.py +90 -60
  61. pixeltable/exprs/literal.py +61 -16
  62. pixeltable/exprs/method_ref.py +7 -6
  63. pixeltable/exprs/object_ref.py +19 -8
  64. pixeltable/exprs/row_builder.py +238 -75
  65. pixeltable/exprs/rowid_ref.py +53 -15
  66. pixeltable/exprs/similarity_expr.py +65 -50
  67. pixeltable/exprs/sql_element_cache.py +5 -5
  68. pixeltable/exprs/string_op.py +107 -0
  69. pixeltable/exprs/type_cast.py +25 -13
  70. pixeltable/exprs/variable.py +2 -2
  71. pixeltable/func/__init__.py +9 -5
  72. pixeltable/func/aggregate_function.py +197 -92
  73. pixeltable/func/callable_function.py +119 -35
  74. pixeltable/func/expr_template_function.py +101 -48
  75. pixeltable/func/function.py +375 -62
  76. pixeltable/func/function_registry.py +20 -19
  77. pixeltable/func/globals.py +6 -5
  78. pixeltable/func/mcp.py +74 -0
  79. pixeltable/func/query_template_function.py +151 -35
  80. pixeltable/func/signature.py +178 -49
  81. pixeltable/func/tools.py +164 -0
  82. pixeltable/func/udf.py +176 -53
  83. pixeltable/functions/__init__.py +44 -4
  84. pixeltable/functions/anthropic.py +226 -47
  85. pixeltable/functions/audio.py +148 -11
  86. pixeltable/functions/bedrock.py +137 -0
  87. pixeltable/functions/date.py +188 -0
  88. pixeltable/functions/deepseek.py +113 -0
  89. pixeltable/functions/document.py +81 -0
  90. pixeltable/functions/fal.py +76 -0
  91. pixeltable/functions/fireworks.py +72 -20
  92. pixeltable/functions/gemini.py +249 -0
  93. pixeltable/functions/globals.py +208 -53
  94. pixeltable/functions/groq.py +108 -0
  95. pixeltable/functions/huggingface.py +1088 -95
  96. pixeltable/functions/image.py +155 -84
  97. pixeltable/functions/json.py +8 -11
  98. pixeltable/functions/llama_cpp.py +31 -19
  99. pixeltable/functions/math.py +169 -0
  100. pixeltable/functions/mistralai.py +50 -75
  101. pixeltable/functions/net.py +70 -0
  102. pixeltable/functions/ollama.py +29 -36
  103. pixeltable/functions/openai.py +548 -160
  104. pixeltable/functions/openrouter.py +143 -0
  105. pixeltable/functions/replicate.py +15 -14
  106. pixeltable/functions/reve.py +250 -0
  107. pixeltable/functions/string.py +310 -85
  108. pixeltable/functions/timestamp.py +37 -19
  109. pixeltable/functions/together.py +77 -120
  110. pixeltable/functions/twelvelabs.py +188 -0
  111. pixeltable/functions/util.py +7 -2
  112. pixeltable/functions/uuid.py +30 -0
  113. pixeltable/functions/video.py +1528 -117
  114. pixeltable/functions/vision.py +26 -26
  115. pixeltable/functions/voyageai.py +289 -0
  116. pixeltable/functions/whisper.py +19 -10
  117. pixeltable/functions/whisperx.py +179 -0
  118. pixeltable/functions/yolox.py +112 -0
  119. pixeltable/globals.py +716 -236
  120. pixeltable/index/__init__.py +3 -1
  121. pixeltable/index/base.py +17 -21
  122. pixeltable/index/btree.py +32 -22
  123. pixeltable/index/embedding_index.py +155 -92
  124. pixeltable/io/__init__.py +12 -7
  125. pixeltable/io/datarows.py +140 -0
  126. pixeltable/io/external_store.py +83 -125
  127. pixeltable/io/fiftyone.py +24 -33
  128. pixeltable/io/globals.py +47 -182
  129. pixeltable/io/hf_datasets.py +96 -127
  130. pixeltable/io/label_studio.py +171 -156
  131. pixeltable/io/lancedb.py +3 -0
  132. pixeltable/io/pandas.py +136 -115
  133. pixeltable/io/parquet.py +40 -153
  134. pixeltable/io/table_data_conduit.py +702 -0
  135. pixeltable/io/utils.py +100 -0
  136. pixeltable/iterators/__init__.py +8 -4
  137. pixeltable/iterators/audio.py +207 -0
  138. pixeltable/iterators/base.py +9 -3
  139. pixeltable/iterators/document.py +144 -87
  140. pixeltable/iterators/image.py +17 -38
  141. pixeltable/iterators/string.py +15 -12
  142. pixeltable/iterators/video.py +523 -127
  143. pixeltable/metadata/__init__.py +33 -8
  144. pixeltable/metadata/converters/convert_10.py +2 -3
  145. pixeltable/metadata/converters/convert_13.py +2 -2
  146. pixeltable/metadata/converters/convert_15.py +15 -11
  147. pixeltable/metadata/converters/convert_16.py +4 -5
  148. pixeltable/metadata/converters/convert_17.py +4 -5
  149. pixeltable/metadata/converters/convert_18.py +4 -6
  150. pixeltable/metadata/converters/convert_19.py +6 -9
  151. pixeltable/metadata/converters/convert_20.py +3 -6
  152. pixeltable/metadata/converters/convert_21.py +6 -8
  153. pixeltable/metadata/converters/convert_22.py +3 -2
  154. pixeltable/metadata/converters/convert_23.py +33 -0
  155. pixeltable/metadata/converters/convert_24.py +55 -0
  156. pixeltable/metadata/converters/convert_25.py +19 -0
  157. pixeltable/metadata/converters/convert_26.py +23 -0
  158. pixeltable/metadata/converters/convert_27.py +29 -0
  159. pixeltable/metadata/converters/convert_28.py +13 -0
  160. pixeltable/metadata/converters/convert_29.py +110 -0
  161. pixeltable/metadata/converters/convert_30.py +63 -0
  162. pixeltable/metadata/converters/convert_31.py +11 -0
  163. pixeltable/metadata/converters/convert_32.py +15 -0
  164. pixeltable/metadata/converters/convert_33.py +17 -0
  165. pixeltable/metadata/converters/convert_34.py +21 -0
  166. pixeltable/metadata/converters/convert_35.py +9 -0
  167. pixeltable/metadata/converters/convert_36.py +38 -0
  168. pixeltable/metadata/converters/convert_37.py +15 -0
  169. pixeltable/metadata/converters/convert_38.py +39 -0
  170. pixeltable/metadata/converters/convert_39.py +124 -0
  171. pixeltable/metadata/converters/convert_40.py +73 -0
  172. pixeltable/metadata/converters/convert_41.py +12 -0
  173. pixeltable/metadata/converters/convert_42.py +9 -0
  174. pixeltable/metadata/converters/convert_43.py +44 -0
  175. pixeltable/metadata/converters/util.py +44 -18
  176. pixeltable/metadata/notes.py +21 -0
  177. pixeltable/metadata/schema.py +185 -42
  178. pixeltable/metadata/utils.py +74 -0
  179. pixeltable/mypy/__init__.py +3 -0
  180. pixeltable/mypy/mypy_plugin.py +123 -0
  181. pixeltable/plan.py +616 -225
  182. pixeltable/share/__init__.py +3 -0
  183. pixeltable/share/packager.py +797 -0
  184. pixeltable/share/protocol/__init__.py +33 -0
  185. pixeltable/share/protocol/common.py +165 -0
  186. pixeltable/share/protocol/operation_types.py +33 -0
  187. pixeltable/share/protocol/replica.py +119 -0
  188. pixeltable/share/publish.py +349 -0
  189. pixeltable/store.py +398 -232
  190. pixeltable/type_system.py +730 -267
  191. pixeltable/utils/__init__.py +40 -0
  192. pixeltable/utils/arrow.py +201 -29
  193. pixeltable/utils/av.py +298 -0
  194. pixeltable/utils/azure_store.py +346 -0
  195. pixeltable/utils/coco.py +26 -27
  196. pixeltable/utils/code.py +4 -4
  197. pixeltable/utils/console_output.py +46 -0
  198. pixeltable/utils/coroutine.py +24 -0
  199. pixeltable/utils/dbms.py +92 -0
  200. pixeltable/utils/description_helper.py +11 -12
  201. pixeltable/utils/documents.py +60 -61
  202. pixeltable/utils/exception_handler.py +36 -0
  203. pixeltable/utils/filecache.py +38 -22
  204. pixeltable/utils/formatter.py +88 -51
  205. pixeltable/utils/gcs_store.py +295 -0
  206. pixeltable/utils/http.py +133 -0
  207. pixeltable/utils/http_server.py +14 -13
  208. pixeltable/utils/iceberg.py +13 -0
  209. pixeltable/utils/image.py +17 -0
  210. pixeltable/utils/lancedb.py +90 -0
  211. pixeltable/utils/local_store.py +322 -0
  212. pixeltable/utils/misc.py +5 -0
  213. pixeltable/utils/object_stores.py +573 -0
  214. pixeltable/utils/pydantic.py +60 -0
  215. pixeltable/utils/pytorch.py +20 -20
  216. pixeltable/utils/s3_store.py +527 -0
  217. pixeltable/utils/sql.py +32 -5
  218. pixeltable/utils/system.py +30 -0
  219. pixeltable/utils/transactional_directory.py +4 -3
  220. pixeltable-0.5.7.dist-info/METADATA +579 -0
  221. pixeltable-0.5.7.dist-info/RECORD +227 -0
  222. {pixeltable-0.2.26.dist-info → pixeltable-0.5.7.dist-info}/WHEEL +1 -1
  223. pixeltable-0.5.7.dist-info/entry_points.txt +2 -0
  224. pixeltable/__version__.py +0 -3
  225. pixeltable/catalog/named_function.py +0 -36
  226. pixeltable/catalog/path_dict.py +0 -141
  227. pixeltable/dataframe.py +0 -894
  228. pixeltable/exec/expr_eval_node.py +0 -232
  229. pixeltable/ext/__init__.py +0 -14
  230. pixeltable/ext/functions/__init__.py +0 -8
  231. pixeltable/ext/functions/whisperx.py +0 -77
  232. pixeltable/ext/functions/yolox.py +0 -157
  233. pixeltable/tool/create_test_db_dump.py +0 -311
  234. pixeltable/tool/create_test_video.py +0 -81
  235. pixeltable/tool/doc_plugins/griffe.py +0 -50
  236. pixeltable/tool/doc_plugins/mkdocstrings.py +0 -6
  237. pixeltable/tool/doc_plugins/templates/material/udf.html.jinja +0 -135
  238. pixeltable/tool/embed_udf.py +0 -9
  239. pixeltable/tool/mypy_plugin.py +0 -55
  240. pixeltable/utils/media_store.py +0 -76
  241. pixeltable/utils/s3.py +0 -16
  242. pixeltable-0.2.26.dist-info/METADATA +0 -400
  243. pixeltable-0.2.26.dist-info/RECORD +0 -156
  244. pixeltable-0.2.26.dist-info/entry_points.txt +0 -3
  245. {pixeltable-0.2.26.dist-info → pixeltable-0.5.7.dist-info/licenses}/LICENSE +0 -0
@@ -1,45 +1,45 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import dataclasses
4
- import enum
5
4
  import inspect
6
- import json
7
5
  import logging
8
6
  import typing
9
- from typing import Any, Callable, Optional, Union
7
+ from typing import TYPE_CHECKING, Any, Callable, ClassVar
10
8
 
11
9
  import pixeltable.exceptions as excs
12
10
  import pixeltable.type_system as ts
13
11
 
12
+ if TYPE_CHECKING:
13
+ from pixeltable import exprs
14
+
14
15
  _logger = logging.getLogger('pixeltable')
15
16
 
16
17
 
17
18
  @dataclasses.dataclass
18
19
  class Parameter:
19
20
  name: str
20
- col_type: Optional[ts.ColumnType] # None for variable parameters
21
+ col_type: ts.ColumnType | None # None for variable parameters
21
22
  kind: inspect._ParameterKind
22
23
  # for some reason, this needs to precede is_batched in the dataclass definition,
23
24
  # otherwise Python complains that an argument with a default is followed by an argument without a default
24
- default: Any = inspect.Parameter.empty # default value for the parameter
25
+ default: 'exprs.Literal' | None = None # default value for the parameter
25
26
  is_batched: bool = False # True if the parameter is a batched parameter (eg, Batch[dict])
26
27
 
27
28
  def __post_init__(self) -> None:
28
- # make sure that default is json-serializable and of the correct type
29
- if self.default is inspect.Parameter.empty or self.default is None:
30
- return
31
- try:
32
- _ = json.dumps(self.default)
33
- except TypeError:
34
- raise excs.Error(f'Default value for parameter {self.name} is not JSON-serializable: {str(self.default)}')
35
- if self.col_type is not None:
36
- try:
37
- self.col_type.validate_literal(self.default)
38
- except TypeError as e:
39
- raise excs.Error(f'Default value for parameter {self.name}: {str(e)}')
29
+ from pixeltable import exprs
30
+
31
+ if self.default is not None:
32
+ if self.col_type is None:
33
+ raise excs.Error(f'Cannot have a default value for variable parameter {self.name!r}')
34
+ if not isinstance(self.default, exprs.Literal):
35
+ raise excs.Error(f'Default value for parameter {self.name!r} is not a constant')
36
+ if not self.col_type.is_supertype_of(self.default.col_type):
37
+ raise excs.Error(
38
+ f'Default value for parameter {self.name!r} is not of type {self.col_type!r}: {self.default}'
39
+ )
40
40
 
41
41
  def has_default(self) -> bool:
42
- return self.default is not inspect.Parameter.empty
42
+ return self.default is not None
43
43
 
44
44
  def as_dict(self) -> dict[str, Any]:
45
45
  return {
@@ -47,27 +47,29 @@ class Parameter:
47
47
  'col_type': self.col_type.as_dict() if self.col_type is not None else None,
48
48
  'kind': self.kind.name,
49
49
  'is_batched': self.is_batched,
50
- 'has_default': self.has_default(),
51
- 'default': self.default if self.has_default() else None,
50
+ 'default': None if self.default is None else self.default.as_dict(),
52
51
  }
53
52
 
54
53
  @classmethod
55
54
  def from_dict(cls, d: dict[str, Any]) -> Parameter:
56
- has_default = d['has_default']
57
- if has_default:
58
- default = d['default']
59
- else:
60
- default = inspect.Parameter.empty
55
+ from pixeltable import exprs
56
+
57
+ assert d['default'] is None or isinstance(d['default'], dict), d
58
+ default = None if d['default'] is None else exprs.Literal.from_dict(d['default'])
61
59
  return cls(
62
60
  name=d['name'],
63
61
  col_type=ts.ColumnType.from_dict(d['col_type']) if d['col_type'] is not None else None,
64
62
  kind=getattr(inspect.Parameter, d['kind']),
65
63
  is_batched=d['is_batched'],
66
- default=default
64
+ default=default,
67
65
  )
68
66
 
69
67
  def to_py_param(self) -> inspect.Parameter:
70
- return inspect.Parameter(self.name, self.kind, default=self.default)
68
+ py_default = self.default.val if self.default is not None else inspect.Parameter.empty
69
+ return inspect.Parameter(self.name, self.kind, default=py_default)
70
+
71
+ def __hash__(self) -> int:
72
+ return hash((self.name, self.col_type, self.kind, self.default, self.is_batched))
71
73
 
72
74
 
73
75
  T = typing.TypeVar('T')
@@ -80,9 +82,30 @@ class Signature:
80
82
 
81
83
  - self.is_batched: return type is a Batch[...] type
82
84
  """
83
- SPECIAL_PARAM_NAMES = ['group_by', 'order_by']
84
85
 
85
- def __init__(self, return_type: ts.ColumnType, parameters: list[Parameter], is_batched: bool = False):
86
+ SPECIAL_PARAM_NAMES: ClassVar[list[str]] = ['group_by', 'order_by']
87
+ SYSTEM_PARAM_NAMES: ClassVar[list[str]] = ['_runtime_ctx']
88
+
89
+ return_type: ts.ColumnType
90
+ is_batched: bool
91
+ parameters: dict[str, Parameter] # name -> Parameter
92
+ parameters_by_pos: list[Parameter] # ordered by position in the signature
93
+ constant_parameters: list[Parameter] # parameters that are not batched
94
+ batched_parameters: list[Parameter] # parameters that are batched
95
+ required_parameters: list[Parameter] # parameters that do not have a default value
96
+
97
+ # the names of recognized system parameters in the signature; these are excluded from self.parameters
98
+ system_parameters: list[str]
99
+
100
+ py_signature: inspect.Signature
101
+
102
+ def __init__(
103
+ self,
104
+ return_type: ts.ColumnType,
105
+ parameters: list[Parameter],
106
+ is_batched: bool = False,
107
+ system_parameters: list[str] | None = None,
108
+ ):
86
109
  assert isinstance(return_type, ts.ColumnType)
87
110
  self.return_type = return_type
88
111
  self.is_batched = is_batched
@@ -92,6 +115,7 @@ class Signature:
92
115
  self.constant_parameters = [p for p in parameters if not p.is_batched]
93
116
  self.batched_parameters = [p for p in parameters if p.is_batched]
94
117
  self.required_parameters = [p for p in parameters if not p.has_default()]
118
+ self.system_parameters = system_parameters if system_parameters is not None else []
95
119
  self.py_signature = inspect.Signature([p.to_py_param() for p in self.parameters_by_pos])
96
120
 
97
121
  def get_return_type(self) -> ts.ColumnType:
@@ -111,6 +135,74 @@ class Signature:
111
135
  parameters = [Parameter.from_dict(param_dict) for param_dict in d['parameters']]
112
136
  return cls(ts.ColumnType.from_dict(d['return_type']), parameters, d['is_batched'])
113
137
 
138
+ def is_consistent_with(self, other: Signature) -> bool:
139
+ """
140
+ Returns True if this signature is consistent with the other signature.
141
+ S is consistent with T if we could safely replace S by T in any call where S is used. Specifically:
142
+ (i) S.return_type is a supertype of T.return_type
143
+ (ii) For each parameter p in S, there is a parameter q in T such that:
144
+ - p and q have the same name and kind
145
+ - q.col_type is a supertype of p.col_type
146
+ (iii) For each *required* parameter q in T, there is a parameter p in S with the same name (in which
147
+ case the kinds and types must also match, by condition (ii)).
148
+ """
149
+ # Check (i)
150
+ if not self.get_return_type().is_supertype_of(other.get_return_type(), ignore_nullable=True):
151
+ return False
152
+
153
+ # Check (ii)
154
+ for param_name, param in self.parameters.items():
155
+ if param_name not in other.parameters:
156
+ return False
157
+ other_param = other.parameters[param_name]
158
+ if (
159
+ param.kind != other_param.kind
160
+ or (param.col_type is None) != (other_param.col_type is None) # this can happen if they are varargs
161
+ or (
162
+ param.col_type is not None
163
+ and not other_param.col_type.is_supertype_of(param.col_type, ignore_nullable=True)
164
+ )
165
+ ):
166
+ return False
167
+
168
+ # Check (iii)
169
+ for other_param in other.required_parameters: # noqa: SIM110
170
+ if other_param.name not in self.parameters:
171
+ return False
172
+
173
+ return True
174
+
175
+ def validate_args(self, bound_args: dict[str, 'exprs.Expr' | None], context: str = '') -> None:
176
+ if context:
177
+ context = f' ({context})'
178
+
179
+ for param_name, arg in bound_args.items():
180
+ assert param_name in self.parameters, f'{param_name!r} not in {list(self.parameters.keys())}'
181
+ param = self.parameters[param_name]
182
+ is_var_param = param.kind in {inspect.Parameter.VAR_POSITIONAL, inspect.Parameter.VAR_KEYWORD}
183
+ if is_var_param:
184
+ continue
185
+ assert param.col_type is not None
186
+
187
+ if arg is None:
188
+ raise excs.Error(f'Parameter {param_name!r}{context}: invalid argument')
189
+
190
+ # Check that the argument is consistent with the expected parameter type, with the allowance that
191
+ # non-nullable parameters can still accept nullable arguments (since in that event, FunctionCall.eval()
192
+ # detects the Nones and skips evaluation).
193
+ if not (
194
+ param.col_type.is_supertype_of(arg.col_type, ignore_nullable=True)
195
+ # TODO: this is a hack to allow JSON columns to be passed to functions that accept scalar
196
+ # types. It's necessary to avoid littering notebooks with `apply(str)` calls or equivalent.
197
+ # (Previously, this wasn't necessary because `is_supertype_of()` was improperly implemented.)
198
+ # We need to think through the right way to handle this scenario.
199
+ or (arg.col_type.is_json_type() and param.col_type.is_scalar_type())
200
+ ):
201
+ raise excs.Error(
202
+ f'Parameter {param_name!r}{context}: argument type {arg.col_type} does not'
203
+ f' match parameter type {param.col_type}'
204
+ )
205
+
114
206
  def __eq__(self, other: object) -> bool:
115
207
  if not isinstance(other, Signature):
116
208
  return False
@@ -124,6 +216,9 @@ class Signature:
124
216
  return False
125
217
  return True
126
218
 
219
+ def __hash__(self) -> int:
220
+ return hash((self.return_type, self.parameters))
221
+
127
222
  def __str__(self) -> str:
128
223
  param_strs: list[str] = []
129
224
  for p in self.parameters.values():
@@ -132,21 +227,21 @@ class Signature:
132
227
  elif p.kind == inspect.Parameter.VAR_KEYWORD:
133
228
  param_strs.append(f'**{p.name}')
134
229
  else:
135
- param_strs.append(f'{p.name}: {str(p.col_type)}')
136
- return f'({", ".join(param_strs)}) -> {str(self.get_return_type())}'
230
+ param_strs.append(f'{p.name}: pxt.{p.col_type}')
231
+ return f'({", ".join(param_strs)}) -> pxt.{self.get_return_type()}'
137
232
 
138
233
  @classmethod
139
- def _infer_type(cls, annotation: Optional[type]) -> tuple[Optional[ts.ColumnType], Optional[bool]]:
234
+ def _infer_type(cls, annotation: type | None) -> tuple[ts.ColumnType | None, bool | None]:
140
235
  """Returns: (column type, is_batched) or (None, ...) if the type cannot be inferred"""
141
236
  if annotation is None:
142
237
  return (None, None)
143
- py_type: Optional[type] = None
238
+ py_type: type | None = None
144
239
  is_batched = False
145
240
  if typing.get_origin(annotation) == typing.Annotated:
146
241
  type_args = typing.get_args(annotation)
147
242
  if len(type_args) == 2 and type_args[1] == 'pxt-batch':
148
243
  # this is our Batch
149
- assert typing.get_origin(type_args[0]) == list
244
+ assert typing.get_origin(type_args[0]) is list
150
245
  is_batched = True
151
246
  py_type = typing.get_args(type_args[0])[0]
152
247
  if py_type is None:
@@ -156,55 +251,89 @@ class Signature:
156
251
 
157
252
  @classmethod
158
253
  def create_parameters(
159
- cls, py_fn: Optional[Callable] = None, py_params: Optional[list[inspect.Parameter]] = None,
160
- param_types: Optional[list[ts.ColumnType]] = None
254
+ cls,
255
+ py_fn: Callable | None = None,
256
+ py_params: list[inspect.Parameter] | None = None,
257
+ param_types: list[ts.ColumnType] | None = None,
258
+ type_substitutions: dict | None = None,
259
+ is_cls_method: bool = False,
161
260
  ) -> list[Parameter]:
261
+ """Ignores parameters starting with '_'."""
262
+ from pixeltable import exprs
263
+
162
264
  assert (py_fn is None) != (py_params is None)
163
265
  if py_fn is not None:
164
266
  sig = inspect.signature(py_fn)
165
267
  py_params = list(sig.parameters.values())
166
268
  parameters: list[Parameter] = []
167
269
 
270
+ if type_substitutions is None:
271
+ type_substitutions = {}
272
+
168
273
  for idx, param in enumerate(py_params):
274
+ if is_cls_method and idx == 0:
275
+ continue # skip 'self' or 'cls' parameter
276
+ if param.name in cls.SYSTEM_PARAM_NAMES:
277
+ continue # skip system parameters
278
+ if param.name.startswith('_'):
279
+ raise excs.Error(f"{param.name!r}: parameters starting with '_' are reserved")
169
280
  if param.name in cls.SPECIAL_PARAM_NAMES:
170
- raise excs.Error(f"'{param.name}' is a reserved parameter name")
171
- if param.kind == inspect.Parameter.VAR_POSITIONAL or param.kind == inspect.Parameter.VAR_KEYWORD:
281
+ raise excs.Error(f'{param.name!r} is a reserved parameter name')
282
+ if param.kind in (inspect.Parameter.VAR_POSITIONAL, inspect.Parameter.VAR_KEYWORD):
172
283
  parameters.append(Parameter(param.name, col_type=None, kind=param.kind))
173
284
  continue
174
285
 
175
286
  # check non-var parameters for name collisions and default value compatibility
176
287
  if param_types is not None:
177
288
  if idx >= len(param_types):
178
- raise excs.Error(f'Missing type for parameter {param.name}')
289
+ raise excs.Error(f'Missing type for parameter {param.name!r}')
179
290
  param_type = param_types[idx]
180
291
  is_batched = False
181
292
  else:
182
- param_type, is_batched = cls._infer_type(param.annotation)
293
+ # Look up the substitution for param.annotation, defaulting to param.annotation if there is none
294
+ py_type = type_substitutions.get(param.annotation, param.annotation)
295
+ param_type, is_batched = cls._infer_type(py_type)
183
296
  if param_type is None:
184
- raise excs.Error(f'Cannot infer pixeltable type for parameter {param.name}')
297
+ raise excs.Error(f'Cannot infer pixeltable type for parameter {param.name!r}')
185
298
 
186
- parameters.append(Parameter(
187
- param.name, col_type=param_type, kind=param.kind, is_batched=is_batched, default=param.default))
299
+ default = None if param.default is inspect.Parameter.empty else exprs.Expr.from_object(param.default)
300
+ if not (default is None or isinstance(default, exprs.Literal)):
301
+ raise excs.Error(f'Default value for parameter {param.name!r} must be a constant')
302
+
303
+ parameters.append(
304
+ Parameter(param.name, col_type=param_type, kind=param.kind, is_batched=is_batched, default=default)
305
+ )
188
306
 
189
307
  return parameters
190
308
 
191
309
  @classmethod
192
310
  def create(
193
- cls, py_fn: Callable,
194
- param_types: Optional[list[ts.ColumnType]] = None,
195
- return_type: Optional[ts.ColumnType] = None
311
+ cls,
312
+ py_fn: Callable,
313
+ param_types: list[ts.ColumnType] | None = None,
314
+ return_type: ts.ColumnType | None = None,
315
+ type_substitutions: dict | None = None,
316
+ is_cls_method: bool = False,
196
317
  ) -> Signature:
197
318
  """Create a signature for the given Callable.
198
319
  Infer the parameter and return types, if none are specified.
199
320
  Raises an exception if the types cannot be inferred.
200
321
  """
201
- parameters = cls.create_parameters(py_fn=py_fn, param_types=param_types)
322
+ if type_substitutions is None:
323
+ type_substitutions = {}
324
+
325
+ parameters = cls.create_parameters(
326
+ py_fn=py_fn, param_types=param_types, is_cls_method=is_cls_method, type_substitutions=type_substitutions
327
+ )
202
328
  sig = inspect.signature(py_fn)
203
329
  if return_type is None:
204
- return_type, return_is_batched = cls._infer_type(sig.return_annotation)
330
+ # Look up the substitution for sig.return_annotation, defaulting to return_annotation if there is none
331
+ py_type = type_substitutions.get(sig.return_annotation, sig.return_annotation)
332
+ return_type, return_is_batched = cls._infer_type(py_type)
205
333
  if return_type is None:
206
334
  raise excs.Error('Cannot infer pixeltable return type')
207
335
  else:
208
336
  _, return_is_batched = cls._infer_type(sig.return_annotation)
337
+ system_params = [param_name for param_name in sig.parameters if param_name in cls.SYSTEM_PARAM_NAMES]
209
338
 
210
- return Signature(return_type, parameters, return_is_batched)
339
+ return Signature(return_type, parameters, return_is_batched, system_parameters=system_params)
@@ -0,0 +1,164 @@
1
+ import json
2
+ import uuid
3
+ from typing import TYPE_CHECKING, Any, Callable, TypeVar
4
+
5
+ import pydantic
6
+
7
+ from pixeltable import exceptions as excs, type_system as ts
8
+
9
+ from .function import Function
10
+ from .signature import Parameter
11
+ from .udf import udf
12
+
13
+ if TYPE_CHECKING:
14
+ from pixeltable import exprs
15
+
16
+
17
+ # The Tool and Tools classes are containers that hold Pixeltable UDFs and related metadata, so that they can be
18
+ # realized as LLM tools. They are implemented as Pydantic models in order to provide a canonical way of converting
19
+ # to JSON, via the Pydantic `model_serializer` interface. In this way, they can be passed directly as UDF
20
+ # parameters as described in the `pixeltable.tools` and `pixeltable.tool` docstrings.
21
+ #
22
+ # (The dataclass dict serializer is insufficiently flexible for this purpose: `Tool` contains a member of type
23
+ # `Function`, which is not natively JSON-serializable; Pydantic provides a way of customizing its default
24
+ # serialization behavior, whereas dataclasses do not.)
25
+
26
+
27
+ class Tool(pydantic.BaseModel):
28
+ # Allow arbitrary types so that we can include a Pixeltable function in the schema.
29
+ # We will implement a model_serializer to ensure the Tool model can be serialized.
30
+ model_config = pydantic.ConfigDict(arbitrary_types_allowed=True)
31
+
32
+ fn: Function
33
+ name: str | None = None
34
+ description: str | None = None
35
+
36
+ @property
37
+ def parameters(self) -> dict[str, Parameter]:
38
+ return self.fn.signature.parameters
39
+
40
+ @pydantic.model_serializer
41
+ def ser_model(self) -> dict[str, Any]:
42
+ return {
43
+ 'name': self.name or self.fn.name,
44
+ 'description': self.description or self.fn.comment(),
45
+ 'parameters': {
46
+ 'type': 'object',
47
+ 'properties': {param.name: param.col_type._to_json_schema() for param in self.parameters.values()},
48
+ },
49
+ 'required': [param.name for param in self.parameters.values() if not param.col_type.nullable],
50
+ 'additionalProperties': False, # TODO Handle kwargs?
51
+ }
52
+
53
+ # The output of `tool_calls` must be a dict in standardized tool invocation format:
54
+ # {tool_name: [{'args': {name1: value1, name2: value2, ...}}, ...], ...}
55
+ def invoke(self, tool_calls: 'exprs.Expr') -> 'exprs.Expr':
56
+ import pixeltable.functions as pxtf
57
+
58
+ func_name = self.name or self.fn.name
59
+ return pxtf.map(tool_calls[func_name]['*'], lambda x: self.__invoke_kwargs(x.args))
60
+
61
+ def __invoke_kwargs(self, kwargs: 'exprs.Expr') -> 'exprs.FunctionCall':
62
+ kwargs = {param.name: self.__extract_tool_arg(param, kwargs) for param in self.parameters.values()}
63
+ return self.fn(**kwargs)
64
+
65
+ def __extract_tool_arg(self, param: Parameter, kwargs: 'exprs.Expr') -> 'exprs.FunctionCall':
66
+ if param.col_type.is_string_type():
67
+ return _extract_str_tool_arg(kwargs, param_name=param.name)
68
+ if param.col_type.is_int_type():
69
+ return _extract_int_tool_arg(kwargs, param_name=param.name)
70
+ if param.col_type.is_float_type():
71
+ return _extract_float_tool_arg(kwargs, param_name=param.name)
72
+ if param.col_type.is_bool_type():
73
+ return _extract_bool_tool_arg(kwargs, param_name=param.name)
74
+ if param.col_type.is_json_type():
75
+ return _extract_json_tool_arg(kwargs, param_name=param.name)
76
+ if param.col_type.is_uuid_type():
77
+ return _extract_uuid_tool_arg(kwargs, param_name=param.name)
78
+ raise AssertionError(param.col_type)
79
+
80
+
81
+ class ToolChoice(pydantic.BaseModel):
82
+ auto: bool
83
+ required: bool
84
+ tool: str | None
85
+ parallel_tool_calls: bool
86
+
87
+
88
+ class Tools(pydantic.BaseModel):
89
+ tools: list[Tool]
90
+
91
+ @pydantic.model_serializer
92
+ def ser_model(self) -> list[dict[str, Any]]:
93
+ return [tool.ser_model() for tool in self.tools]
94
+
95
+ # `tool_calls` must be in standardized tool invocation format:
96
+ # {tool_name: {'args': {name1: value1, name2: value2, ...}}, ...}
97
+ def _invoke(self, tool_calls: 'exprs.Expr') -> 'exprs.InlineDict':
98
+ from pixeltable import exprs
99
+
100
+ return exprs.InlineDict({tool.name or tool.fn.name: tool.invoke(tool_calls) for tool in self.tools})
101
+
102
+ def choice(
103
+ self,
104
+ auto: bool = False,
105
+ required: bool = False,
106
+ tool: str | Function | None = None,
107
+ parallel_tool_calls: bool = True,
108
+ ) -> ToolChoice:
109
+ if sum([auto, required, tool is not None]) != 1:
110
+ raise excs.Error('Exactly one of `auto`, `required`, or `tool` must be specified.')
111
+ tool_name: str | None = None
112
+ if tool is not None:
113
+ try:
114
+ tool_obj = next(
115
+ t
116
+ for t in self.tools
117
+ if (isinstance(tool, Function) and t.fn == tool)
118
+ or (isinstance(tool, str) and (t.name or t.fn.name) == tool)
119
+ )
120
+ tool_name = tool_obj.name or tool_obj.fn.name
121
+ except StopIteration:
122
+ raise excs.Error(f'That tool is not in the specified list of tools: {tool}') from None
123
+ return ToolChoice(auto=auto, required=required, tool=tool_name, parallel_tool_calls=parallel_tool_calls)
124
+
125
+
126
+ @udf
127
+ def _extract_str_tool_arg(kwargs: dict[str, Any], param_name: str) -> str | None:
128
+ return _extract_arg(str, kwargs, param_name)
129
+
130
+
131
+ @udf
132
+ def _extract_int_tool_arg(kwargs: dict[str, Any], param_name: str) -> int | None:
133
+ return _extract_arg(int, kwargs, param_name)
134
+
135
+
136
+ @udf
137
+ def _extract_float_tool_arg(kwargs: dict[str, Any], param_name: str) -> float | None:
138
+ return _extract_arg(float, kwargs, param_name)
139
+
140
+
141
+ @udf
142
+ def _extract_bool_tool_arg(kwargs: dict[str, Any], param_name: str) -> bool | None:
143
+ return _extract_arg(bool, kwargs, param_name)
144
+
145
+
146
+ @udf
147
+ def _extract_json_tool_arg(kwargs: dict[str, Any], param_name: str) -> ts.Json | None:
148
+ if param_name in kwargs:
149
+ return json.loads(kwargs[param_name])
150
+ return None
151
+
152
+
153
+ @udf
154
+ def _extract_uuid_tool_arg(kwargs: dict[str, Any], param_name: str) -> uuid.UUID | None:
155
+ return _extract_arg(uuid.UUID, kwargs, param_name)
156
+
157
+
158
+ T = TypeVar('T')
159
+
160
+
161
+ def _extract_arg(eval_fn: Callable[[Any], T], kwargs: dict[str, Any], param_name: str) -> T | None:
162
+ if param_name in kwargs:
163
+ return eval_fn(kwargs[param_name])
164
+ return None