pixeltable 0.2.26__py3-none-any.whl → 0.5.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (245) hide show
  1. pixeltable/__init__.py +83 -19
  2. pixeltable/_query.py +1444 -0
  3. pixeltable/_version.py +1 -0
  4. pixeltable/catalog/__init__.py +7 -4
  5. pixeltable/catalog/catalog.py +2394 -119
  6. pixeltable/catalog/column.py +225 -104
  7. pixeltable/catalog/dir.py +38 -9
  8. pixeltable/catalog/globals.py +53 -34
  9. pixeltable/catalog/insertable_table.py +265 -115
  10. pixeltable/catalog/path.py +80 -17
  11. pixeltable/catalog/schema_object.py +28 -43
  12. pixeltable/catalog/table.py +1270 -677
  13. pixeltable/catalog/table_metadata.py +103 -0
  14. pixeltable/catalog/table_version.py +1270 -751
  15. pixeltable/catalog/table_version_handle.py +109 -0
  16. pixeltable/catalog/table_version_path.py +137 -42
  17. pixeltable/catalog/tbl_ops.py +53 -0
  18. pixeltable/catalog/update_status.py +191 -0
  19. pixeltable/catalog/view.py +251 -134
  20. pixeltable/config.py +215 -0
  21. pixeltable/env.py +736 -285
  22. pixeltable/exceptions.py +26 -2
  23. pixeltable/exec/__init__.py +7 -2
  24. pixeltable/exec/aggregation_node.py +39 -21
  25. pixeltable/exec/cache_prefetch_node.py +87 -109
  26. pixeltable/exec/cell_materialization_node.py +268 -0
  27. pixeltable/exec/cell_reconstruction_node.py +168 -0
  28. pixeltable/exec/component_iteration_node.py +25 -28
  29. pixeltable/exec/data_row_batch.py +11 -46
  30. pixeltable/exec/exec_context.py +26 -11
  31. pixeltable/exec/exec_node.py +35 -27
  32. pixeltable/exec/expr_eval/__init__.py +3 -0
  33. pixeltable/exec/expr_eval/evaluators.py +365 -0
  34. pixeltable/exec/expr_eval/expr_eval_node.py +413 -0
  35. pixeltable/exec/expr_eval/globals.py +200 -0
  36. pixeltable/exec/expr_eval/row_buffer.py +74 -0
  37. pixeltable/exec/expr_eval/schedulers.py +413 -0
  38. pixeltable/exec/globals.py +35 -0
  39. pixeltable/exec/in_memory_data_node.py +35 -27
  40. pixeltable/exec/object_store_save_node.py +293 -0
  41. pixeltable/exec/row_update_node.py +44 -29
  42. pixeltable/exec/sql_node.py +414 -115
  43. pixeltable/exprs/__init__.py +8 -5
  44. pixeltable/exprs/arithmetic_expr.py +79 -45
  45. pixeltable/exprs/array_slice.py +5 -5
  46. pixeltable/exprs/column_property_ref.py +40 -26
  47. pixeltable/exprs/column_ref.py +254 -61
  48. pixeltable/exprs/comparison.py +14 -9
  49. pixeltable/exprs/compound_predicate.py +9 -10
  50. pixeltable/exprs/data_row.py +213 -72
  51. pixeltable/exprs/expr.py +270 -104
  52. pixeltable/exprs/expr_dict.py +6 -5
  53. pixeltable/exprs/expr_set.py +20 -11
  54. pixeltable/exprs/function_call.py +383 -284
  55. pixeltable/exprs/globals.py +18 -5
  56. pixeltable/exprs/in_predicate.py +7 -7
  57. pixeltable/exprs/inline_expr.py +37 -37
  58. pixeltable/exprs/is_null.py +8 -4
  59. pixeltable/exprs/json_mapper.py +120 -54
  60. pixeltable/exprs/json_path.py +90 -60
  61. pixeltable/exprs/literal.py +61 -16
  62. pixeltable/exprs/method_ref.py +7 -6
  63. pixeltable/exprs/object_ref.py +19 -8
  64. pixeltable/exprs/row_builder.py +238 -75
  65. pixeltable/exprs/rowid_ref.py +53 -15
  66. pixeltable/exprs/similarity_expr.py +65 -50
  67. pixeltable/exprs/sql_element_cache.py +5 -5
  68. pixeltable/exprs/string_op.py +107 -0
  69. pixeltable/exprs/type_cast.py +25 -13
  70. pixeltable/exprs/variable.py +2 -2
  71. pixeltable/func/__init__.py +9 -5
  72. pixeltable/func/aggregate_function.py +197 -92
  73. pixeltable/func/callable_function.py +119 -35
  74. pixeltable/func/expr_template_function.py +101 -48
  75. pixeltable/func/function.py +375 -62
  76. pixeltable/func/function_registry.py +20 -19
  77. pixeltable/func/globals.py +6 -5
  78. pixeltable/func/mcp.py +74 -0
  79. pixeltable/func/query_template_function.py +151 -35
  80. pixeltable/func/signature.py +178 -49
  81. pixeltable/func/tools.py +164 -0
  82. pixeltable/func/udf.py +176 -53
  83. pixeltable/functions/__init__.py +44 -4
  84. pixeltable/functions/anthropic.py +226 -47
  85. pixeltable/functions/audio.py +148 -11
  86. pixeltable/functions/bedrock.py +137 -0
  87. pixeltable/functions/date.py +188 -0
  88. pixeltable/functions/deepseek.py +113 -0
  89. pixeltable/functions/document.py +81 -0
  90. pixeltable/functions/fal.py +76 -0
  91. pixeltable/functions/fireworks.py +72 -20
  92. pixeltable/functions/gemini.py +249 -0
  93. pixeltable/functions/globals.py +208 -53
  94. pixeltable/functions/groq.py +108 -0
  95. pixeltable/functions/huggingface.py +1088 -95
  96. pixeltable/functions/image.py +155 -84
  97. pixeltable/functions/json.py +8 -11
  98. pixeltable/functions/llama_cpp.py +31 -19
  99. pixeltable/functions/math.py +169 -0
  100. pixeltable/functions/mistralai.py +50 -75
  101. pixeltable/functions/net.py +70 -0
  102. pixeltable/functions/ollama.py +29 -36
  103. pixeltable/functions/openai.py +548 -160
  104. pixeltable/functions/openrouter.py +143 -0
  105. pixeltable/functions/replicate.py +15 -14
  106. pixeltable/functions/reve.py +250 -0
  107. pixeltable/functions/string.py +310 -85
  108. pixeltable/functions/timestamp.py +37 -19
  109. pixeltable/functions/together.py +77 -120
  110. pixeltable/functions/twelvelabs.py +188 -0
  111. pixeltable/functions/util.py +7 -2
  112. pixeltable/functions/uuid.py +30 -0
  113. pixeltable/functions/video.py +1528 -117
  114. pixeltable/functions/vision.py +26 -26
  115. pixeltable/functions/voyageai.py +289 -0
  116. pixeltable/functions/whisper.py +19 -10
  117. pixeltable/functions/whisperx.py +179 -0
  118. pixeltable/functions/yolox.py +112 -0
  119. pixeltable/globals.py +716 -236
  120. pixeltable/index/__init__.py +3 -1
  121. pixeltable/index/base.py +17 -21
  122. pixeltable/index/btree.py +32 -22
  123. pixeltable/index/embedding_index.py +155 -92
  124. pixeltable/io/__init__.py +12 -7
  125. pixeltable/io/datarows.py +140 -0
  126. pixeltable/io/external_store.py +83 -125
  127. pixeltable/io/fiftyone.py +24 -33
  128. pixeltable/io/globals.py +47 -182
  129. pixeltable/io/hf_datasets.py +96 -127
  130. pixeltable/io/label_studio.py +171 -156
  131. pixeltable/io/lancedb.py +3 -0
  132. pixeltable/io/pandas.py +136 -115
  133. pixeltable/io/parquet.py +40 -153
  134. pixeltable/io/table_data_conduit.py +702 -0
  135. pixeltable/io/utils.py +100 -0
  136. pixeltable/iterators/__init__.py +8 -4
  137. pixeltable/iterators/audio.py +207 -0
  138. pixeltable/iterators/base.py +9 -3
  139. pixeltable/iterators/document.py +144 -87
  140. pixeltable/iterators/image.py +17 -38
  141. pixeltable/iterators/string.py +15 -12
  142. pixeltable/iterators/video.py +523 -127
  143. pixeltable/metadata/__init__.py +33 -8
  144. pixeltable/metadata/converters/convert_10.py +2 -3
  145. pixeltable/metadata/converters/convert_13.py +2 -2
  146. pixeltable/metadata/converters/convert_15.py +15 -11
  147. pixeltable/metadata/converters/convert_16.py +4 -5
  148. pixeltable/metadata/converters/convert_17.py +4 -5
  149. pixeltable/metadata/converters/convert_18.py +4 -6
  150. pixeltable/metadata/converters/convert_19.py +6 -9
  151. pixeltable/metadata/converters/convert_20.py +3 -6
  152. pixeltable/metadata/converters/convert_21.py +6 -8
  153. pixeltable/metadata/converters/convert_22.py +3 -2
  154. pixeltable/metadata/converters/convert_23.py +33 -0
  155. pixeltable/metadata/converters/convert_24.py +55 -0
  156. pixeltable/metadata/converters/convert_25.py +19 -0
  157. pixeltable/metadata/converters/convert_26.py +23 -0
  158. pixeltable/metadata/converters/convert_27.py +29 -0
  159. pixeltable/metadata/converters/convert_28.py +13 -0
  160. pixeltable/metadata/converters/convert_29.py +110 -0
  161. pixeltable/metadata/converters/convert_30.py +63 -0
  162. pixeltable/metadata/converters/convert_31.py +11 -0
  163. pixeltable/metadata/converters/convert_32.py +15 -0
  164. pixeltable/metadata/converters/convert_33.py +17 -0
  165. pixeltable/metadata/converters/convert_34.py +21 -0
  166. pixeltable/metadata/converters/convert_35.py +9 -0
  167. pixeltable/metadata/converters/convert_36.py +38 -0
  168. pixeltable/metadata/converters/convert_37.py +15 -0
  169. pixeltable/metadata/converters/convert_38.py +39 -0
  170. pixeltable/metadata/converters/convert_39.py +124 -0
  171. pixeltable/metadata/converters/convert_40.py +73 -0
  172. pixeltable/metadata/converters/convert_41.py +12 -0
  173. pixeltable/metadata/converters/convert_42.py +9 -0
  174. pixeltable/metadata/converters/convert_43.py +44 -0
  175. pixeltable/metadata/converters/util.py +44 -18
  176. pixeltable/metadata/notes.py +21 -0
  177. pixeltable/metadata/schema.py +185 -42
  178. pixeltable/metadata/utils.py +74 -0
  179. pixeltable/mypy/__init__.py +3 -0
  180. pixeltable/mypy/mypy_plugin.py +123 -0
  181. pixeltable/plan.py +616 -225
  182. pixeltable/share/__init__.py +3 -0
  183. pixeltable/share/packager.py +797 -0
  184. pixeltable/share/protocol/__init__.py +33 -0
  185. pixeltable/share/protocol/common.py +165 -0
  186. pixeltable/share/protocol/operation_types.py +33 -0
  187. pixeltable/share/protocol/replica.py +119 -0
  188. pixeltable/share/publish.py +349 -0
  189. pixeltable/store.py +398 -232
  190. pixeltable/type_system.py +730 -267
  191. pixeltable/utils/__init__.py +40 -0
  192. pixeltable/utils/arrow.py +201 -29
  193. pixeltable/utils/av.py +298 -0
  194. pixeltable/utils/azure_store.py +346 -0
  195. pixeltable/utils/coco.py +26 -27
  196. pixeltable/utils/code.py +4 -4
  197. pixeltable/utils/console_output.py +46 -0
  198. pixeltable/utils/coroutine.py +24 -0
  199. pixeltable/utils/dbms.py +92 -0
  200. pixeltable/utils/description_helper.py +11 -12
  201. pixeltable/utils/documents.py +60 -61
  202. pixeltable/utils/exception_handler.py +36 -0
  203. pixeltable/utils/filecache.py +38 -22
  204. pixeltable/utils/formatter.py +88 -51
  205. pixeltable/utils/gcs_store.py +295 -0
  206. pixeltable/utils/http.py +133 -0
  207. pixeltable/utils/http_server.py +14 -13
  208. pixeltable/utils/iceberg.py +13 -0
  209. pixeltable/utils/image.py +17 -0
  210. pixeltable/utils/lancedb.py +90 -0
  211. pixeltable/utils/local_store.py +322 -0
  212. pixeltable/utils/misc.py +5 -0
  213. pixeltable/utils/object_stores.py +573 -0
  214. pixeltable/utils/pydantic.py +60 -0
  215. pixeltable/utils/pytorch.py +20 -20
  216. pixeltable/utils/s3_store.py +527 -0
  217. pixeltable/utils/sql.py +32 -5
  218. pixeltable/utils/system.py +30 -0
  219. pixeltable/utils/transactional_directory.py +4 -3
  220. pixeltable-0.5.7.dist-info/METADATA +579 -0
  221. pixeltable-0.5.7.dist-info/RECORD +227 -0
  222. {pixeltable-0.2.26.dist-info → pixeltable-0.5.7.dist-info}/WHEEL +1 -1
  223. pixeltable-0.5.7.dist-info/entry_points.txt +2 -0
  224. pixeltable/__version__.py +0 -3
  225. pixeltable/catalog/named_function.py +0 -36
  226. pixeltable/catalog/path_dict.py +0 -141
  227. pixeltable/dataframe.py +0 -894
  228. pixeltable/exec/expr_eval_node.py +0 -232
  229. pixeltable/ext/__init__.py +0 -14
  230. pixeltable/ext/functions/__init__.py +0 -8
  231. pixeltable/ext/functions/whisperx.py +0 -77
  232. pixeltable/ext/functions/yolox.py +0 -157
  233. pixeltable/tool/create_test_db_dump.py +0 -311
  234. pixeltable/tool/create_test_video.py +0 -81
  235. pixeltable/tool/doc_plugins/griffe.py +0 -50
  236. pixeltable/tool/doc_plugins/mkdocstrings.py +0 -6
  237. pixeltable/tool/doc_plugins/templates/material/udf.html.jinja +0 -135
  238. pixeltable/tool/embed_udf.py +0 -9
  239. pixeltable/tool/mypy_plugin.py +0 -55
  240. pixeltable/utils/media_store.py +0 -76
  241. pixeltable/utils/s3.py +0 -16
  242. pixeltable-0.2.26.dist-info/METADATA +0 -400
  243. pixeltable-0.2.26.dist-info/RECORD +0 -156
  244. pixeltable-0.2.26.dist-info/entry_points.txt +0 -3
  245. {pixeltable-0.2.26.dist-info → pixeltable-0.5.7.dist-info/licenses}/LICENSE +0 -0
@@ -4,25 +4,25 @@ import dataclasses
4
4
  import importlib
5
5
  import logging
6
6
  import sys
7
- from typing import Optional
8
7
  from uuid import UUID
9
8
 
10
9
  import sqlalchemy as sql
11
10
 
12
- import pixeltable.env as env
13
- import pixeltable.exceptions as excs
14
- import pixeltable.type_system as ts
11
+ from pixeltable import env, exceptions as excs, type_system as ts
15
12
  from pixeltable.metadata import schema
13
+
16
14
  from .function import Function
17
15
 
18
16
  _logger = logging.getLogger('pixeltable')
19
17
 
18
+
20
19
  class FunctionRegistry:
21
20
  """
22
21
  A central registry for all Functions. Handles interactions with the backing store.
23
22
  Function are loaded from the store on demand.
24
23
  """
25
- _instance: Optional[FunctionRegistry] = None
24
+
25
+ _instance: FunctionRegistry | None = None
26
26
 
27
27
  @classmethod
28
28
  def get(cls) -> FunctionRegistry:
@@ -30,7 +30,7 @@ class FunctionRegistry:
30
30
  cls._instance = FunctionRegistry()
31
31
  return cls._instance
32
32
 
33
- def __init__(self):
33
+ def __init__(self) -> None:
34
34
  self.stored_fns_by_id: dict[UUID, Function] = {}
35
35
  self.module_fns: dict[str, Function] = {} # fqn -> Function
36
36
  self.type_methods: dict[ts.ColumnType.Type, dict[str, Function]] = {}
@@ -68,7 +68,7 @@ class FunctionRegistry:
68
68
  raise excs.Error(f'A UDF with that name already exists: {fqn}')
69
69
  self.module_fns[fqn] = fn
70
70
  if fn.is_method or fn.is_property:
71
- base_type = fn.signature.parameters_by_pos[0].col_type.type_enum
71
+ base_type = fn.signatures[0].parameters_by_pos[0].col_type.type_enum
72
72
  if base_type not in self.type_methods:
73
73
  self.type_methods[base_type] = {}
74
74
  if fn.name in self.type_methods[base_type]:
@@ -93,7 +93,7 @@ class FunctionRegistry:
93
93
  # stored_fn_md.append(md)
94
94
  return list(self.module_fns.values())
95
95
 
96
- # def get_function(self, *, id: Optional[UUID] = None, fqn: Optional[str] = None) -> Function:
96
+ # def get_function(self, *, id: UUID | None = None, fqn: str | None = None) -> Function:
97
97
  # assert (id is not None) != (fqn is not None)
98
98
  # if id is not None:
99
99
  # if id not in self.stored_fns_by_id:
@@ -142,7 +142,7 @@ class FunctionRegistry:
142
142
  return list(self.type_methods[base_type].values())
143
143
  return []
144
144
 
145
- def lookup_type_method(self, base_type: ts.ColumnType.Type, name: str) -> Optional[Function]:
145
+ def lookup_type_method(self, base_type: ts.ColumnType.Type, name: str) -> Function | None:
146
146
  """
147
147
  Look up a method (or property) by name for a given base type. If no such method is registered, return None.
148
148
  """
@@ -150,14 +150,16 @@ class FunctionRegistry:
150
150
  return self.type_methods[base_type][name]
151
151
  return None
152
152
 
153
- #def create_function(self, md: schema.FunctionMd, binary_obj: bytes, dir_id: Optional[UUID] = None) -> UUID:
154
- def create_stored_function(self, pxt_fn: Function, dir_id: Optional[UUID] = None) -> UUID:
153
+ # def create_function(self, md: schema.FunctionMd, binary_obj: bytes, dir_id: UUID | None = None) -> UUID:
154
+ def create_stored_function(self, pxt_fn: Function, dir_id: UUID | None = None) -> UUID:
155
155
  fn_md, binary_obj = pxt_fn.to_store()
156
156
  md = schema.FunctionMd(name=pxt_fn.name, md=fn_md, py_version=sys.version, class_name=pxt_fn.__class__.__name__)
157
157
  with env.Env.get().engine.begin() as conn:
158
158
  res = conn.execute(
159
- sql.insert(schema.Function.__table__)
160
- .values(dir_id=dir_id, md=dataclasses.asdict(md), binary_obj=binary_obj))
159
+ sql.insert(schema.Function.__table__).values(
160
+ dir_id=dir_id, md=dataclasses.asdict(md), binary_obj=binary_obj
161
+ )
162
+ )
161
163
  id = res.inserted_primary_key[0]
162
164
  _logger.info(f'Created function {pxt_fn.name} (id {id}) in store')
163
165
  self.stored_fns_by_id[id] = pxt_fn
@@ -166,8 +168,9 @@ class FunctionRegistry:
166
168
  def get_stored_function(self, id: UUID) -> Function:
167
169
  if id in self.stored_fns_by_id:
168
170
  return self.stored_fns_by_id[id]
169
- stmt = sql.select(schema.Function.md, schema.Function.binary_obj, schema.Function.dir_id)\
170
- .where(schema.Function.id == id)
171
+ stmt = sql.select(schema.Function.md, schema.Function.binary_obj, schema.Function.dir_id).where(
172
+ schema.Function.id == id
173
+ )
171
174
  with env.Env.get().engine.begin() as conn:
172
175
  row = conn.execute(stmt).fetchone()
173
176
  if row is None:
@@ -180,7 +183,7 @@ class FunctionRegistry:
180
183
  self.stored_fns_by_id[id] = instance
181
184
  return instance
182
185
 
183
- # def create_function(self, fn: Function, dir_id: Optional[UUID] = None, name: Optional[str] = None) -> None:
186
+ # def create_function(self, fn: Function, dir_id: UUID | None = None, name: str | None = None) -> None:
184
187
  # with env.Env.get().engine.begin() as conn:
185
188
  # _logger.debug(f'Pickling function {name}')
186
189
  # eval_fn_str = cloudpickle.dumps(fn.eval_fn) if fn.eval_fn is not None else None
@@ -237,7 +240,5 @@ class FunctionRegistry:
237
240
  def delete_function(self, id: UUID) -> None:
238
241
  assert id is not None
239
242
  with env.Env.get().engine.begin() as conn:
240
- conn.execute(
241
- sql.delete(schema.Function.__table__)
242
- .where(schema.Function.id == id))
243
+ conn.execute(sql.delete(schema.Function.__table__).where(schema.Function.id == id))
243
244
  _logger.info(f'Deleted function with id {id} from store')
@@ -1,14 +1,13 @@
1
1
  import importlib
2
2
  import inspect
3
3
  from types import ModuleType
4
- from typing import Optional
5
4
 
6
5
  import pixeltable.exceptions as excs
7
6
 
8
7
 
9
- def resolve_symbol(symbol_path: str) -> Optional[object]:
8
+ def resolve_symbol(symbol_path: str) -> object | None:
10
9
  path_elems = symbol_path.split('.')
11
- module: Optional[ModuleType] = None
10
+ module: ModuleType | None = None
12
11
  i = len(path_elems) - 1
13
12
  while i > 0 and module is None:
14
13
  try:
@@ -28,10 +27,12 @@ def validate_symbol_path(fn_path: str) -> None:
28
27
  fn_name = path_elems[-1]
29
28
  if any(el == '<locals>' for el in path_elems):
30
29
  raise excs.Error(
31
- f'{fn_name}(): nested functions are not supported. Move the function to the module level or into a class.')
30
+ f'{fn_name}(): nested functions are not supported. Move the function to the module level or into a class.'
31
+ )
32
32
  if any(not el.isidentifier() for el in path_elems):
33
33
  raise excs.Error(
34
- f'{fn_name}(): cannot resolve symbol path {fn_path}. Move the function to the module level or into a class.')
34
+ f'{fn_name}(): cannot resolve symbol path {fn_path}. Move the function to the module level or into a class.'
35
+ )
35
36
 
36
37
 
37
38
  def get_caller_module_path() -> str:
pixeltable/func/mcp.py ADDED
@@ -0,0 +1,74 @@
1
+ import asyncio
2
+ import inspect
3
+ from typing import TYPE_CHECKING, Any
4
+
5
+ import pixeltable as pxt
6
+ from pixeltable import exceptions as excs, type_system as ts
7
+ from pixeltable.func.signature import Parameter
8
+
9
+ if TYPE_CHECKING:
10
+ import mcp
11
+
12
+
13
+ def mcp_udfs(url: str) -> list['pxt.func.Function']:
14
+ return asyncio.run(mcp_udfs_async(url))
15
+
16
+
17
+ async def mcp_udfs_async(url: str) -> list['pxt.func.Function']:
18
+ import mcp
19
+ from mcp.client.streamable_http import streamablehttp_client
20
+
21
+ list_tools_result: mcp.types.ListToolsResult | None = None
22
+ async with (
23
+ streamablehttp_client(url) as (read_stream, write_stream, _),
24
+ mcp.ClientSession(read_stream, write_stream) as session,
25
+ ):
26
+ await session.initialize()
27
+ list_tools_result = await session.list_tools()
28
+ assert list_tools_result is not None
29
+
30
+ return [mcp_tool_to_udf(url, tool) for tool in list_tools_result.tools]
31
+
32
+
33
+ def mcp_tool_to_udf(url: str, mcp_tool: 'mcp.types.Tool') -> 'pxt.func.Function':
34
+ import mcp
35
+ from mcp.client.streamable_http import streamablehttp_client
36
+
37
+ async def invoke(**kwargs: Any) -> str:
38
+ # TODO: Cache session objects rather than creating a new one each time?
39
+ async with (
40
+ streamablehttp_client(url) as (read_stream, write_stream, _),
41
+ mcp.ClientSession(read_stream, write_stream) as session,
42
+ ):
43
+ await session.initialize()
44
+ res = await session.call_tool(name=mcp_tool.name, arguments=kwargs)
45
+ # TODO Handle image/audio responses?
46
+ return res.content[0].text # type: ignore[union-attr]
47
+
48
+ if mcp_tool.description is not None:
49
+ invoke.__doc__ = mcp_tool.description
50
+
51
+ input_schema = mcp_tool.inputSchema
52
+ params = {
53
+ name: __mcp_param_to_pxt_type(mcp_tool.name, name, param) for name, param in input_schema['properties'].items()
54
+ }
55
+ required = input_schema.get('required', [])
56
+
57
+ # Ensure that any params not appearing in `required` are nullable.
58
+ # (A required param might or might not be nullable, since its type might be an 'anyOf' containing a null.)
59
+ for name in params.keys() - required:
60
+ params[name] = params[name].copy(nullable=True)
61
+
62
+ signature = pxt.func.Signature(
63
+ return_type=ts.StringType(), # Return type is always string
64
+ parameters=[Parameter(name, col_type, inspect.Parameter.KEYWORD_ONLY) for name, col_type in params.items()],
65
+ )
66
+
67
+ return pxt.func.CallableFunction(signatures=[signature], py_fns=[invoke], self_name=mcp_tool.name)
68
+
69
+
70
+ def __mcp_param_to_pxt_type(tool_name: str, name: str, param: dict[str, Any]) -> ts.ColumnType:
71
+ pxt_type = ts.ColumnType.from_json_schema(param)
72
+ if pxt_type is None:
73
+ raise excs.Error(f'Unknown type schema for MCP parameter {name!r} of tool {tool_name!r}: {param}')
74
+ return pxt_type
@@ -1,69 +1,77 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import inspect
4
- from typing import Any, Callable, Optional
4
+ from functools import reduce
5
+ from typing import TYPE_CHECKING, Any, Callable, Iterable, overload
5
6
 
6
- import sqlalchemy as sql
7
-
8
- import pixeltable as pxt
9
- from pixeltable import exprs
7
+ from pixeltable import catalog, exceptions as excs, exprs, func, type_system as ts
10
8
 
11
9
  from .function import Function
12
10
  from .signature import Signature
13
11
 
12
+ if TYPE_CHECKING:
13
+ from pixeltable import Query
14
+
14
15
 
15
16
  class QueryTemplateFunction(Function):
16
- """A parameterized query/DataFrame from which an executable DataFrame is created with a function call."""
17
+ """A parameterized query from which an executable Query is created with a function call."""
18
+
19
+ template_df: 'Query' | None
20
+ self_name: str | None
21
+ _comment: str | None
17
22
 
18
23
  @classmethod
19
24
  def create(
20
- cls, template_callable: Callable, param_types: Optional[list[pxt.ColumnType]], path: str, name: str
25
+ cls, template_callable: Callable, param_types: list[ts.ColumnType] | None, path: str, name: str
21
26
  ) -> QueryTemplateFunction:
22
27
  # we need to construct a template df and a signature
23
28
  py_sig = inspect.signature(template_callable)
24
29
  py_params = list(py_sig.parameters.values())
25
30
  params = Signature.create_parameters(py_params=py_params, param_types=param_types)
26
- # invoke template_callable with parameter expressions to construct a DataFrame with parameters
31
+ # invoke template_callable with parameter expressions to construct a Query with parameters
27
32
  var_exprs = [exprs.Variable(param.name, param.col_type) for param in params]
28
33
  template_df = template_callable(*var_exprs)
29
- from pixeltable import DataFrame
30
- assert isinstance(template_df, DataFrame)
34
+ from pixeltable import Query
35
+
36
+ assert isinstance(template_df, Query)
31
37
  # we take params and return json
32
- sig = Signature(return_type=pxt.JsonType(), parameters=params)
33
- return QueryTemplateFunction(template_df, sig, path=path, name=name)
38
+ sig = Signature(return_type=ts.JsonType(), parameters=params)
39
+ return QueryTemplateFunction(template_df, sig, path=path, name=name, comment=inspect.getdoc(template_callable))
34
40
 
35
41
  def __init__(
36
- self, template_df: Optional['pxt.DataFrame'], sig: Optional[Signature], path: Optional[str] = None,
37
- name: Optional[str] = None,
42
+ self,
43
+ template_df: 'Query' | None,
44
+ sig: Signature,
45
+ path: str | None = None,
46
+ name: str | None = None,
47
+ comment: str | None = None,
38
48
  ):
39
- super().__init__(sig, self_path=path)
49
+ assert sig is not None
50
+ super().__init__([sig], self_path=path)
40
51
  self.self_name = name
41
52
  self.template_df = template_df
53
+ self._comment = comment
42
54
 
43
- # if we're running as part of an ongoing update operation, we need to use the same connection, otherwise
44
- # we end up with a deadlock
45
- # TODO: figure out a more general way to make execution state available
46
- self.conn: Optional[sql.engine.Connection] = None
55
+ def _update_as_overload_resolution(self, signature_idx: int) -> None:
56
+ pass # only one signature supported for QueryTemplateFunction
47
57
 
48
- # convert defaults to Literals
49
- self.defaults: dict[str, exprs.Literal] = {} # key: param name, value: default value converted to a Literal
50
- param_types = self.template_df.parameters()
51
- for param in [p for p in self.signature.parameters.values() if p.has_default()]:
52
- assert param.name in param_types
53
- param_type = param_types[param.name]
54
- literal_default = exprs.Literal(param.default, col_type=param_type)
55
- self.defaults[param.name] = literal_default
56
-
57
- def set_conn(self, conn: Optional[sql.engine.Connection]) -> None:
58
- self.conn = conn
58
+ @property
59
+ def is_async(self) -> bool:
60
+ return True
59
61
 
60
- def exec(self, *args: Any, **kwargs: Any) -> Any:
62
+ async def aexec(self, *args: Any, **kwargs: Any) -> Any:
63
+ # assert not self.is_polymorphic
61
64
  bound_args = self.signature.py_signature.bind(*args, **kwargs).arguments
62
65
  # apply defaults, otherwise we might have Parameters left over
63
66
  bound_args.update(
64
- {param_name: default for param_name, default in self.defaults.items() if param_name not in bound_args})
67
+ {
68
+ param.name: param.default
69
+ for param in self.signature.parameters.values()
70
+ if param.has_default() and param.name not in bound_args
71
+ }
72
+ )
65
73
  bound_df = self.template_df.bind(bound_args)
66
- result = bound_df._collect(self.conn)
74
+ result = await bound_df._acollect()
67
75
  return list(result)
68
76
 
69
77
  @property
@@ -74,10 +82,118 @@ class QueryTemplateFunction(Function):
74
82
  def name(self) -> str:
75
83
  return self.self_name
76
84
 
85
+ def comment(self) -> str | None:
86
+ return self._comment
87
+
77
88
  def _as_dict(self) -> dict:
78
89
  return {'name': self.name, 'signature': self.signature.as_dict(), 'df': self.template_df.as_dict()}
79
90
 
80
91
  @classmethod
81
92
  def _from_dict(cls, d: dict) -> Function:
82
- from pixeltable.dataframe import DataFrame
83
- return cls(DataFrame.from_dict(d['df']), Signature.from_dict(d['signature']), name=d['name'])
93
+ from pixeltable._query import Query
94
+
95
+ return cls(Query.from_dict(d['df']), Signature.from_dict(d['signature']), name=d['name'])
96
+
97
+
98
+ @overload
99
+ def query(py_fn: Callable) -> QueryTemplateFunction: ...
100
+
101
+
102
+ @overload
103
+ def query(*, param_types: list[ts.ColumnType] | None = None) -> Callable[[Callable], QueryTemplateFunction]: ...
104
+
105
+
106
+ def query(*args: Any, **kwargs: Any) -> Any:
107
+ def make_query_template(py_fn: Callable, param_types: list[ts.ColumnType] | None) -> QueryTemplateFunction:
108
+ if py_fn.__module__ != '__main__' and py_fn.__name__.isidentifier():
109
+ # this is a named function in a module
110
+ function_path = f'{py_fn.__module__}.{py_fn.__qualname__}'
111
+ else:
112
+ function_path = None
113
+ query_name = py_fn.__name__
114
+ query_fn = QueryTemplateFunction.create(py_fn, param_types=param_types, path=function_path, name=query_name)
115
+ return query_fn
116
+
117
+ # TODO: verify that the inferred return type matches that of the template
118
+ # TODO: verify that the signature doesn't contain batched parameters
119
+
120
+ if len(args) == 1:
121
+ assert len(kwargs) == 0 and callable(args[0])
122
+ return make_query_template(args[0], None)
123
+ else:
124
+ assert len(args) == 0 and len(kwargs) == 1 and 'param_types' in kwargs
125
+ return lambda py_fn: make_query_template(py_fn, kwargs['param_types'])
126
+
127
+
128
+ def retrieval_udf(
129
+ table: catalog.Table,
130
+ name: str | None = None,
131
+ description: str | None = None,
132
+ parameters: Iterable[str | exprs.ColumnRef] | None = None,
133
+ limit: int | None = 10,
134
+ ) -> func.QueryTemplateFunction:
135
+ """
136
+ Constructs a retrieval UDF for the given table. The retrieval UDF is a UDF whose parameters are
137
+ columns of the table and whose return value is a list of rows from the table. The return value of
138
+ ```python
139
+ f(col1=x, col2=y, ...)
140
+ ```
141
+ will be a list of all rows from the table that match the specified arguments.
142
+
143
+ Args:
144
+ table: The table to use as the dataset for the retrieval tool.
145
+ name: The name of the tool. If not specified, then the name of the table will be used by default.
146
+ description: The description of the tool. If not specified, then a default description will be generated.
147
+ parameters: The columns of the table to use as parameters. If not specified, all data columns
148
+ (non-computed columns) will be used as parameters.
149
+
150
+ All of the specified parameters will be required parameters of the tool, regardless of their status
151
+ as columns.
152
+ limit: The maximum number of rows to return. If not specified, then all matching rows will be returned.
153
+
154
+ Returns:
155
+ A list of dictionaries containing data from the table, one per row that matches the input arguments.
156
+ If there are no matching rows, an empty list will be returned.
157
+ """
158
+ # Argument validation
159
+ col_refs: list[exprs.ColumnRef]
160
+ # TODO: get rid of references to ColumnRef internals and replace instead with a public interface
161
+ col_names = table.columns()
162
+ if parameters is None:
163
+ col_refs = [table[col_name] for col_name in col_names if not table[col_name].col.is_computed]
164
+ else:
165
+ for param in parameters:
166
+ if isinstance(param, str) and param not in col_names:
167
+ raise excs.Error(f'The specified parameter {param!r} is not a column of the table {table._path()!r}')
168
+ col_refs = [table[param] if isinstance(param, str) else param for param in parameters]
169
+
170
+ if len(col_refs) == 0:
171
+ raise excs.Error('Parameter list cannot be empty.')
172
+
173
+ # Construct the Query
174
+ predicates = [col_ref == exprs.Variable(col_ref.col.name, col_ref.col.col_type) for col_ref in col_refs]
175
+ where_clause = reduce(lambda c1, c2: c1 & c2, predicates)
176
+ df = table.select().where(where_clause)
177
+ if limit is not None:
178
+ df = df.limit(limit)
179
+
180
+ # Construct the signature
181
+ query_params = [
182
+ func.Parameter(col_ref.col.name, col_ref.col.col_type, inspect.Parameter.POSITIONAL_OR_KEYWORD)
183
+ for col_ref in col_refs
184
+ ]
185
+ query_signature = func.Signature(return_type=ts.JsonType(), parameters=query_params)
186
+
187
+ # Construct a name and/or description if not provided
188
+ if name is None:
189
+ name = table._name
190
+ if description is None:
191
+ description = (
192
+ f'Retrieves an entry from the dataset {name!r} that matches the given parameters.\n\nParameters:\n'
193
+ )
194
+ description += '\n'.join(
195
+ [f' {col_ref.col.name}: of type `{col_ref.col.col_type._to_base_str()}`' for col_ref in col_refs]
196
+ )
197
+
198
+ fn = func.QueryTemplateFunction(df, query_signature, name=name, comment=description)
199
+ return fn