pixeltable 0.2.26__py3-none-any.whl → 0.5.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (245) hide show
  1. pixeltable/__init__.py +83 -19
  2. pixeltable/_query.py +1444 -0
  3. pixeltable/_version.py +1 -0
  4. pixeltable/catalog/__init__.py +7 -4
  5. pixeltable/catalog/catalog.py +2394 -119
  6. pixeltable/catalog/column.py +225 -104
  7. pixeltable/catalog/dir.py +38 -9
  8. pixeltable/catalog/globals.py +53 -34
  9. pixeltable/catalog/insertable_table.py +265 -115
  10. pixeltable/catalog/path.py +80 -17
  11. pixeltable/catalog/schema_object.py +28 -43
  12. pixeltable/catalog/table.py +1270 -677
  13. pixeltable/catalog/table_metadata.py +103 -0
  14. pixeltable/catalog/table_version.py +1270 -751
  15. pixeltable/catalog/table_version_handle.py +109 -0
  16. pixeltable/catalog/table_version_path.py +137 -42
  17. pixeltable/catalog/tbl_ops.py +53 -0
  18. pixeltable/catalog/update_status.py +191 -0
  19. pixeltable/catalog/view.py +251 -134
  20. pixeltable/config.py +215 -0
  21. pixeltable/env.py +736 -285
  22. pixeltable/exceptions.py +26 -2
  23. pixeltable/exec/__init__.py +7 -2
  24. pixeltable/exec/aggregation_node.py +39 -21
  25. pixeltable/exec/cache_prefetch_node.py +87 -109
  26. pixeltable/exec/cell_materialization_node.py +268 -0
  27. pixeltable/exec/cell_reconstruction_node.py +168 -0
  28. pixeltable/exec/component_iteration_node.py +25 -28
  29. pixeltable/exec/data_row_batch.py +11 -46
  30. pixeltable/exec/exec_context.py +26 -11
  31. pixeltable/exec/exec_node.py +35 -27
  32. pixeltable/exec/expr_eval/__init__.py +3 -0
  33. pixeltable/exec/expr_eval/evaluators.py +365 -0
  34. pixeltable/exec/expr_eval/expr_eval_node.py +413 -0
  35. pixeltable/exec/expr_eval/globals.py +200 -0
  36. pixeltable/exec/expr_eval/row_buffer.py +74 -0
  37. pixeltable/exec/expr_eval/schedulers.py +413 -0
  38. pixeltable/exec/globals.py +35 -0
  39. pixeltable/exec/in_memory_data_node.py +35 -27
  40. pixeltable/exec/object_store_save_node.py +293 -0
  41. pixeltable/exec/row_update_node.py +44 -29
  42. pixeltable/exec/sql_node.py +414 -115
  43. pixeltable/exprs/__init__.py +8 -5
  44. pixeltable/exprs/arithmetic_expr.py +79 -45
  45. pixeltable/exprs/array_slice.py +5 -5
  46. pixeltable/exprs/column_property_ref.py +40 -26
  47. pixeltable/exprs/column_ref.py +254 -61
  48. pixeltable/exprs/comparison.py +14 -9
  49. pixeltable/exprs/compound_predicate.py +9 -10
  50. pixeltable/exprs/data_row.py +213 -72
  51. pixeltable/exprs/expr.py +270 -104
  52. pixeltable/exprs/expr_dict.py +6 -5
  53. pixeltable/exprs/expr_set.py +20 -11
  54. pixeltable/exprs/function_call.py +383 -284
  55. pixeltable/exprs/globals.py +18 -5
  56. pixeltable/exprs/in_predicate.py +7 -7
  57. pixeltable/exprs/inline_expr.py +37 -37
  58. pixeltable/exprs/is_null.py +8 -4
  59. pixeltable/exprs/json_mapper.py +120 -54
  60. pixeltable/exprs/json_path.py +90 -60
  61. pixeltable/exprs/literal.py +61 -16
  62. pixeltable/exprs/method_ref.py +7 -6
  63. pixeltable/exprs/object_ref.py +19 -8
  64. pixeltable/exprs/row_builder.py +238 -75
  65. pixeltable/exprs/rowid_ref.py +53 -15
  66. pixeltable/exprs/similarity_expr.py +65 -50
  67. pixeltable/exprs/sql_element_cache.py +5 -5
  68. pixeltable/exprs/string_op.py +107 -0
  69. pixeltable/exprs/type_cast.py +25 -13
  70. pixeltable/exprs/variable.py +2 -2
  71. pixeltable/func/__init__.py +9 -5
  72. pixeltable/func/aggregate_function.py +197 -92
  73. pixeltable/func/callable_function.py +119 -35
  74. pixeltable/func/expr_template_function.py +101 -48
  75. pixeltable/func/function.py +375 -62
  76. pixeltable/func/function_registry.py +20 -19
  77. pixeltable/func/globals.py +6 -5
  78. pixeltable/func/mcp.py +74 -0
  79. pixeltable/func/query_template_function.py +151 -35
  80. pixeltable/func/signature.py +178 -49
  81. pixeltable/func/tools.py +164 -0
  82. pixeltable/func/udf.py +176 -53
  83. pixeltable/functions/__init__.py +44 -4
  84. pixeltable/functions/anthropic.py +226 -47
  85. pixeltable/functions/audio.py +148 -11
  86. pixeltable/functions/bedrock.py +137 -0
  87. pixeltable/functions/date.py +188 -0
  88. pixeltable/functions/deepseek.py +113 -0
  89. pixeltable/functions/document.py +81 -0
  90. pixeltable/functions/fal.py +76 -0
  91. pixeltable/functions/fireworks.py +72 -20
  92. pixeltable/functions/gemini.py +249 -0
  93. pixeltable/functions/globals.py +208 -53
  94. pixeltable/functions/groq.py +108 -0
  95. pixeltable/functions/huggingface.py +1088 -95
  96. pixeltable/functions/image.py +155 -84
  97. pixeltable/functions/json.py +8 -11
  98. pixeltable/functions/llama_cpp.py +31 -19
  99. pixeltable/functions/math.py +169 -0
  100. pixeltable/functions/mistralai.py +50 -75
  101. pixeltable/functions/net.py +70 -0
  102. pixeltable/functions/ollama.py +29 -36
  103. pixeltable/functions/openai.py +548 -160
  104. pixeltable/functions/openrouter.py +143 -0
  105. pixeltable/functions/replicate.py +15 -14
  106. pixeltable/functions/reve.py +250 -0
  107. pixeltable/functions/string.py +310 -85
  108. pixeltable/functions/timestamp.py +37 -19
  109. pixeltable/functions/together.py +77 -120
  110. pixeltable/functions/twelvelabs.py +188 -0
  111. pixeltable/functions/util.py +7 -2
  112. pixeltable/functions/uuid.py +30 -0
  113. pixeltable/functions/video.py +1528 -117
  114. pixeltable/functions/vision.py +26 -26
  115. pixeltable/functions/voyageai.py +289 -0
  116. pixeltable/functions/whisper.py +19 -10
  117. pixeltable/functions/whisperx.py +179 -0
  118. pixeltable/functions/yolox.py +112 -0
  119. pixeltable/globals.py +716 -236
  120. pixeltable/index/__init__.py +3 -1
  121. pixeltable/index/base.py +17 -21
  122. pixeltable/index/btree.py +32 -22
  123. pixeltable/index/embedding_index.py +155 -92
  124. pixeltable/io/__init__.py +12 -7
  125. pixeltable/io/datarows.py +140 -0
  126. pixeltable/io/external_store.py +83 -125
  127. pixeltable/io/fiftyone.py +24 -33
  128. pixeltable/io/globals.py +47 -182
  129. pixeltable/io/hf_datasets.py +96 -127
  130. pixeltable/io/label_studio.py +171 -156
  131. pixeltable/io/lancedb.py +3 -0
  132. pixeltable/io/pandas.py +136 -115
  133. pixeltable/io/parquet.py +40 -153
  134. pixeltable/io/table_data_conduit.py +702 -0
  135. pixeltable/io/utils.py +100 -0
  136. pixeltable/iterators/__init__.py +8 -4
  137. pixeltable/iterators/audio.py +207 -0
  138. pixeltable/iterators/base.py +9 -3
  139. pixeltable/iterators/document.py +144 -87
  140. pixeltable/iterators/image.py +17 -38
  141. pixeltable/iterators/string.py +15 -12
  142. pixeltable/iterators/video.py +523 -127
  143. pixeltable/metadata/__init__.py +33 -8
  144. pixeltable/metadata/converters/convert_10.py +2 -3
  145. pixeltable/metadata/converters/convert_13.py +2 -2
  146. pixeltable/metadata/converters/convert_15.py +15 -11
  147. pixeltable/metadata/converters/convert_16.py +4 -5
  148. pixeltable/metadata/converters/convert_17.py +4 -5
  149. pixeltable/metadata/converters/convert_18.py +4 -6
  150. pixeltable/metadata/converters/convert_19.py +6 -9
  151. pixeltable/metadata/converters/convert_20.py +3 -6
  152. pixeltable/metadata/converters/convert_21.py +6 -8
  153. pixeltable/metadata/converters/convert_22.py +3 -2
  154. pixeltable/metadata/converters/convert_23.py +33 -0
  155. pixeltable/metadata/converters/convert_24.py +55 -0
  156. pixeltable/metadata/converters/convert_25.py +19 -0
  157. pixeltable/metadata/converters/convert_26.py +23 -0
  158. pixeltable/metadata/converters/convert_27.py +29 -0
  159. pixeltable/metadata/converters/convert_28.py +13 -0
  160. pixeltable/metadata/converters/convert_29.py +110 -0
  161. pixeltable/metadata/converters/convert_30.py +63 -0
  162. pixeltable/metadata/converters/convert_31.py +11 -0
  163. pixeltable/metadata/converters/convert_32.py +15 -0
  164. pixeltable/metadata/converters/convert_33.py +17 -0
  165. pixeltable/metadata/converters/convert_34.py +21 -0
  166. pixeltable/metadata/converters/convert_35.py +9 -0
  167. pixeltable/metadata/converters/convert_36.py +38 -0
  168. pixeltable/metadata/converters/convert_37.py +15 -0
  169. pixeltable/metadata/converters/convert_38.py +39 -0
  170. pixeltable/metadata/converters/convert_39.py +124 -0
  171. pixeltable/metadata/converters/convert_40.py +73 -0
  172. pixeltable/metadata/converters/convert_41.py +12 -0
  173. pixeltable/metadata/converters/convert_42.py +9 -0
  174. pixeltable/metadata/converters/convert_43.py +44 -0
  175. pixeltable/metadata/converters/util.py +44 -18
  176. pixeltable/metadata/notes.py +21 -0
  177. pixeltable/metadata/schema.py +185 -42
  178. pixeltable/metadata/utils.py +74 -0
  179. pixeltable/mypy/__init__.py +3 -0
  180. pixeltable/mypy/mypy_plugin.py +123 -0
  181. pixeltable/plan.py +616 -225
  182. pixeltable/share/__init__.py +3 -0
  183. pixeltable/share/packager.py +797 -0
  184. pixeltable/share/protocol/__init__.py +33 -0
  185. pixeltable/share/protocol/common.py +165 -0
  186. pixeltable/share/protocol/operation_types.py +33 -0
  187. pixeltable/share/protocol/replica.py +119 -0
  188. pixeltable/share/publish.py +349 -0
  189. pixeltable/store.py +398 -232
  190. pixeltable/type_system.py +730 -267
  191. pixeltable/utils/__init__.py +40 -0
  192. pixeltable/utils/arrow.py +201 -29
  193. pixeltable/utils/av.py +298 -0
  194. pixeltable/utils/azure_store.py +346 -0
  195. pixeltable/utils/coco.py +26 -27
  196. pixeltable/utils/code.py +4 -4
  197. pixeltable/utils/console_output.py +46 -0
  198. pixeltable/utils/coroutine.py +24 -0
  199. pixeltable/utils/dbms.py +92 -0
  200. pixeltable/utils/description_helper.py +11 -12
  201. pixeltable/utils/documents.py +60 -61
  202. pixeltable/utils/exception_handler.py +36 -0
  203. pixeltable/utils/filecache.py +38 -22
  204. pixeltable/utils/formatter.py +88 -51
  205. pixeltable/utils/gcs_store.py +295 -0
  206. pixeltable/utils/http.py +133 -0
  207. pixeltable/utils/http_server.py +14 -13
  208. pixeltable/utils/iceberg.py +13 -0
  209. pixeltable/utils/image.py +17 -0
  210. pixeltable/utils/lancedb.py +90 -0
  211. pixeltable/utils/local_store.py +322 -0
  212. pixeltable/utils/misc.py +5 -0
  213. pixeltable/utils/object_stores.py +573 -0
  214. pixeltable/utils/pydantic.py +60 -0
  215. pixeltable/utils/pytorch.py +20 -20
  216. pixeltable/utils/s3_store.py +527 -0
  217. pixeltable/utils/sql.py +32 -5
  218. pixeltable/utils/system.py +30 -0
  219. pixeltable/utils/transactional_directory.py +4 -3
  220. pixeltable-0.5.7.dist-info/METADATA +579 -0
  221. pixeltable-0.5.7.dist-info/RECORD +227 -0
  222. {pixeltable-0.2.26.dist-info → pixeltable-0.5.7.dist-info}/WHEEL +1 -1
  223. pixeltable-0.5.7.dist-info/entry_points.txt +2 -0
  224. pixeltable/__version__.py +0 -3
  225. pixeltable/catalog/named_function.py +0 -36
  226. pixeltable/catalog/path_dict.py +0 -141
  227. pixeltable/dataframe.py +0 -894
  228. pixeltable/exec/expr_eval_node.py +0 -232
  229. pixeltable/ext/__init__.py +0 -14
  230. pixeltable/ext/functions/__init__.py +0 -8
  231. pixeltable/ext/functions/whisperx.py +0 -77
  232. pixeltable/ext/functions/yolox.py +0 -157
  233. pixeltable/tool/create_test_db_dump.py +0 -311
  234. pixeltable/tool/create_test_video.py +0 -81
  235. pixeltable/tool/doc_plugins/griffe.py +0 -50
  236. pixeltable/tool/doc_plugins/mkdocstrings.py +0 -6
  237. pixeltable/tool/doc_plugins/templates/material/udf.html.jinja +0 -135
  238. pixeltable/tool/embed_udf.py +0 -9
  239. pixeltable/tool/mypy_plugin.py +0 -55
  240. pixeltable/utils/media_store.py +0 -76
  241. pixeltable/utils/s3.py +0 -16
  242. pixeltable-0.2.26.dist-info/METADATA +0 -400
  243. pixeltable-0.2.26.dist-info/RECORD +0 -156
  244. pixeltable-0.2.26.dist-info/entry_points.txt +0 -3
  245. {pixeltable-0.2.26.dist-info → pixeltable-0.5.7.dist-info/licenses}/LICENSE +0 -0
@@ -0,0 +1,249 @@
1
+ """
2
+ Pixeltable UDFs
3
+ that wrap various endpoints from the Google Gemini API. In order to use them, you must
4
+ first `pip install google-genai` and configure your Gemini credentials, as described in
5
+ the [Working with Gemini](https://docs.pixeltable.com/notebooks/integrations/working-with-gemini) tutorial.
6
+ """
7
+
8
+ import asyncio
9
+ import io
10
+ from pathlib import Path
11
+ from typing import TYPE_CHECKING
12
+
13
+ import PIL.Image
14
+
15
+ import pixeltable as pxt
16
+ from pixeltable import env, exceptions as excs, exprs
17
+ from pixeltable.utils.code import local_public_names
18
+ from pixeltable.utils.local_store import TempStore
19
+
20
+ if TYPE_CHECKING:
21
+ from google import genai
22
+
23
+
24
+ @env.register_client('gemini')
25
+ def _(api_key: str) -> 'genai.client.Client':
26
+ from google import genai
27
+
28
+ return genai.client.Client(api_key=api_key)
29
+
30
+
31
+ def _genai_client() -> 'genai.client.Client':
32
+ return env.Env.get().get_client('gemini')
33
+
34
+
35
+ @pxt.udf(resource_pool='request-rate:gemini')
36
+ async def generate_content(
37
+ contents: pxt.Json, *, model: str, config: dict | None = None, tools: list[dict] | None = None
38
+ ) -> dict:
39
+ """
40
+ Generate content from the specified model.
41
+
42
+ Request throttling:
43
+ Applies the rate limit set in the config (section `gemini.rate_limits`; use the model id as the key). If no rate
44
+ limit is configured, uses a default of 600 RPM.
45
+
46
+ __Requirements:__
47
+
48
+ - `pip install google-genai`
49
+
50
+ Args:
51
+ contents: The input content to generate from. Can be a prompt, or a list containing images and text
52
+ prompts, as described in: <https://ai.google.dev/gemini-api/docs/text-generation>
53
+ model: The name of the model to use.
54
+ config: Configuration for generation, corresponding to keyword arguments of
55
+ `genai.types.GenerateContentConfig`. For details on the parameters, see:
56
+ <https://googleapis.github.io/python-genai/genai.html#genai.types.GenerateContentConfig>
57
+ tools: An optional list of Pixeltable tools to use. It is also possible to specify tools manually via the
58
+ `config['tools']` parameter, but at most one of `config['tools']` or `tools` may be used.
59
+
60
+ Returns:
61
+ A dictionary containing the response and other metadata.
62
+
63
+ Examples:
64
+ Add a computed column that applies the model `gemini-2.5-flash`
65
+ to an existing Pixeltable column `tbl.prompt` of the table `tbl`:
66
+
67
+ >>> tbl.add_computed_column(response=generate_content(tbl.prompt, model='gemini-2.5-flash'))
68
+
69
+ Add a computed column that applies the model `gemini-2.5-flash` for image understanding
70
+ """
71
+ env.Env.get().require_package('google.genai')
72
+ from google.genai import types
73
+
74
+ config_: types.GenerateContentConfig
75
+ if config is None and tools is None:
76
+ config_ = None
77
+ else:
78
+ if config is None:
79
+ config_ = types.GenerateContentConfig()
80
+ else:
81
+ config_ = types.GenerateContentConfig(**config)
82
+ if tools is not None:
83
+ gemini_tools = [__convert_pxt_tool(tool) for tool in tools]
84
+ config_.tools = [types.Tool(function_declarations=gemini_tools)]
85
+
86
+ response = await _genai_client().aio.models.generate_content(model=model, contents=contents, config=config_)
87
+ return response.model_dump()
88
+
89
+
90
+ def __convert_pxt_tool(pxt_tool: dict) -> dict:
91
+ return {
92
+ 'name': pxt_tool['name'],
93
+ 'description': pxt_tool['description'],
94
+ 'parameters': {
95
+ 'type': 'object',
96
+ 'properties': pxt_tool['parameters']['properties'],
97
+ 'required': pxt_tool['required'],
98
+ },
99
+ }
100
+
101
+
102
+ def invoke_tools(tools: pxt.func.Tools, response: exprs.Expr) -> exprs.InlineDict:
103
+ """Converts an OpenAI response dict to Pixeltable tool invocation format and calls `tools._invoke()`."""
104
+ return tools._invoke(_gemini_response_to_pxt_tool_calls(response))
105
+
106
+
107
+ @pxt.udf
108
+ def _gemini_response_to_pxt_tool_calls(response: dict) -> dict | None:
109
+ pxt_tool_calls: dict[str, list[dict]] = {}
110
+ for part in response['candidates'][0]['content']['parts']:
111
+ tool_call = part.get('function_call')
112
+ if tool_call is not None:
113
+ tool_name = tool_call['name']
114
+ if tool_name not in pxt_tool_calls:
115
+ pxt_tool_calls[tool_name] = []
116
+ pxt_tool_calls[tool_name].append({'args': tool_call['args']})
117
+ if len(pxt_tool_calls) == 0:
118
+ return None
119
+ return pxt_tool_calls
120
+
121
+
122
+ @generate_content.resource_pool
123
+ def _(model: str) -> str:
124
+ return f'request-rate:gemini:{model}'
125
+
126
+
127
+ @pxt.udf(resource_pool='request-rate:imagen')
128
+ async def generate_images(prompt: str, *, model: str, config: dict | None = None) -> PIL.Image.Image:
129
+ """
130
+ Generates images based on a text description and configuration. For additional details, see:
131
+ <https://ai.google.dev/gemini-api/docs/image-generation>
132
+
133
+ Request throttling:
134
+ Applies the rate limit set in the config (section `imagen.rate_limits`; use the model id as the key). If no rate
135
+ limit is configured, uses a default of 600 RPM.
136
+
137
+ __Requirements:__
138
+
139
+ - `pip install google-genai`
140
+
141
+ Args:
142
+ prompt: A text description of the images to generate.
143
+ model: The model to use.
144
+ config: Configuration for generation, corresponding to keyword arguments of
145
+ `genai.types.GenerateImagesConfig`. For details on the parameters, see:
146
+ <https://googleapis.github.io/python-genai/genai.html#genai.types.GenerateImagesConfig>
147
+
148
+ Returns:
149
+ The generated image.
150
+
151
+ Examples:
152
+ Add a computed column that applies the model `imagen-4.0-generate-001`
153
+ to an existing Pixeltable column `tbl.prompt` of the table `tbl`:
154
+
155
+ >>> tbl.add_computed_column(response=generate_images(tbl.prompt, model='imagen-4.0-generate-001'))
156
+ """
157
+ env.Env.get().require_package('google.genai')
158
+ from google.genai.types import GenerateImagesConfig
159
+
160
+ config_ = GenerateImagesConfig(**config) if config else None
161
+ response = await _genai_client().aio.models.generate_images(model=model, prompt=prompt, config=config_)
162
+ return response.generated_images[0].image._pil_image
163
+
164
+
165
+ @generate_images.resource_pool
166
+ def _(model: str) -> str:
167
+ return f'request-rate:imagen:{model}'
168
+
169
+
170
+ @pxt.udf(resource_pool='request-rate:veo')
171
+ async def generate_videos(
172
+ prompt: str | None = None, image: PIL.Image.Image | None = None, *, model: str, config: dict | None = None
173
+ ) -> pxt.Video:
174
+ """
175
+ Generates videos based on a text description and configuration. For additional details, see:
176
+ <https://ai.google.dev/gemini-api/docs/video>
177
+
178
+ At least one of `prompt` or `image` must be provided.
179
+
180
+ Request throttling:
181
+ Applies the rate limit set in the config (section `veo.rate_limits`; use the model id as the key). If no rate
182
+ limit is configured, uses a default of 600 RPM.
183
+
184
+ __Requirements:__
185
+
186
+ - `pip install google-genai`
187
+
188
+ Args:
189
+ prompt: A text description of the videos to generate.
190
+ image: An image to use as the first frame of the video.
191
+ model: The model to use.
192
+ config: Configuration for generation, corresponding to keyword arguments of
193
+ `genai.types.GenerateVideosConfig`. For details on the parameters, see:
194
+ <https://googleapis.github.io/python-genai/genai.html#genai.types.GenerateVideosConfig>
195
+
196
+ Returns:
197
+ The generated video.
198
+
199
+ Examples:
200
+ Add a computed column that applies the model `veo-3.0-generate-001`
201
+ to an existing Pixeltable column `tbl.prompt` of the table `tbl`:
202
+
203
+ >>> tbl.add_computed_column(response=generate_videos(tbl.prompt, model='veo-3.0-generate-001'))
204
+ """
205
+ env.Env.get().require_package('google.genai')
206
+ from google.genai import types
207
+
208
+ if prompt is None and image is None:
209
+ raise excs.Error('At least one of `prompt` or `image` must be provided.')
210
+
211
+ image_: types.Image | None = None
212
+ if image is not None:
213
+ with io.BytesIO() as buffer:
214
+ image.save(buffer, format='webp')
215
+ image_ = types.Image(image_bytes=buffer.getvalue(), mime_type='image/webp')
216
+
217
+ config_ = types.GenerateVideosConfig(**config) if config else None
218
+
219
+ operation = await _genai_client().aio.models.generate_videos(
220
+ model=model, prompt=prompt, image=image_, config=config_
221
+ )
222
+ while not operation.done:
223
+ await asyncio.sleep(3)
224
+ operation = await _genai_client().aio.operations.get(operation)
225
+
226
+ if operation.error:
227
+ raise Exception(f'Video generation failed: {operation.error}')
228
+
229
+ video = operation.response.generated_videos[0]
230
+
231
+ video_bytes = await _genai_client().aio.files.download(file=video.video) # type: ignore[arg-type]
232
+ assert video_bytes is not None
233
+
234
+ # Create a temporary file to store the video bytes
235
+ output_path = TempStore.create_path(extension='.mp4')
236
+ Path(output_path).write_bytes(video_bytes)
237
+ return str(output_path)
238
+
239
+
240
+ @generate_videos.resource_pool
241
+ def _(model: str) -> str:
242
+ return f'request-rate:veo:{model}'
243
+
244
+
245
+ __all__ = local_public_names(__name__)
246
+
247
+
248
+ def __dir__() -> list[str]:
249
+ return __all__
@@ -1,54 +1,105 @@
1
1
  import builtins
2
- from typing import _GenericAlias # type: ignore[attr-defined]
3
- from typing import Optional, Union
2
+ import typing
3
+ from typing import Any, Callable
4
4
 
5
5
  import sqlalchemy as sql
6
6
 
7
- import pixeltable.func as func
8
- import pixeltable.type_system as ts
9
- from pixeltable import exprs
7
+ from pixeltable import exceptions as excs, exprs, func, type_system as ts
10
8
  from pixeltable.utils.code import local_public_names
11
9
 
10
+ from typing import _GenericAlias # type: ignore[attr-defined] # isort: skip
11
+
12
12
 
13
13
  # TODO: remove and replace calls with astype()
14
- def cast(expr: exprs.Expr, target_type: Union[ts.ColumnType, type, _GenericAlias]) -> exprs.Expr:
14
+ def cast(expr: exprs.Expr, target_type: ts.ColumnType | type | _GenericAlias) -> exprs.Expr:
15
15
  expr.col_type = ts.ColumnType.normalize_type(target_type)
16
16
  return expr
17
17
 
18
18
 
19
- @func.uda(
20
- update_types=[ts.IntType(nullable=True)], value_type=ts.IntType(nullable=False),
21
- allows_window=True, requires_order_by=False)
22
- class sum(func.Aggregator):
23
- """Sums the selected integers or floats."""
24
- def __init__(self):
25
- self.sum: Optional[int] = None
26
-
27
- def update(self, val: Optional[int]) -> None:
19
+ T = typing.TypeVar('T')
20
+
21
+
22
+ @func.uda(allows_window=True, type_substitutions=({T: int | None}, {T: float | None})) # type: ignore[misc]
23
+ class sum(func.Aggregator, typing.Generic[T]):
24
+ """
25
+ Aggregate function that computes the sum of non-null values of a numeric column or grouping.
26
+
27
+ Args:
28
+ val: The numeric value to add to the sum.
29
+
30
+ Returns:
31
+ The sum of the non-null values, or `None` if there are no non-null values.
32
+
33
+ Examples:
34
+ Sum the values in the `value` column of the table `tbl`:
35
+
36
+ >>> tbl.select(pxt.functions.sum(tbl.value)).collect()
37
+
38
+ Group by the `category` column and compute the sum of the `value` column for each category,
39
+ assigning the name `'category_total'` to the new column:
40
+
41
+ >>> tbl.group_by(tbl.category).select(
42
+ ... tbl.category,
43
+ ... category_total=pxt.functions.sum(tbl.value)
44
+ ... ).collect()
45
+ """
46
+
47
+ def __init__(self) -> None:
48
+ self.sum: T = None
49
+
50
+ def update(self, val: T) -> None:
28
51
  if val is None:
29
52
  return
30
53
  if self.sum is None:
31
54
  self.sum = val
32
55
  else:
33
- self.sum += val
56
+ self.sum += val # type: ignore[operator]
34
57
 
35
- def value(self) -> Union[int, float]:
58
+ def value(self) -> T:
36
59
  return self.sum
37
60
 
38
61
 
39
62
  @sum.to_sql
40
- def _(val: sql.ColumnElement) -> Optional[sql.ColumnElement]:
63
+ def _(val: sql.ColumnElement) -> sql.ColumnElement | None:
41
64
  # This can produce a Decimal. We are deliberately avoiding an explicit cast to a Bigint here, because that can
42
65
  # cause overflows in Postgres. We're instead doing the conversion to the target type in SqlNode.__iter__().
43
66
  return sql.sql.func.sum(val)
44
67
 
45
68
 
46
- @func.uda(update_types=[ts.IntType(nullable=True)], value_type=ts.IntType(), allows_window=True, requires_order_by=False)
47
- class count(func.Aggregator):
48
- def __init__(self):
69
+ @func.uda(
70
+ allows_window=True,
71
+ # Allow counting non-null values of any type
72
+ # TODO: should we have an "Any" type that can be used here?
73
+ type_substitutions=tuple({T: t | None} for t in ts.ALL_PIXELTABLE_TYPES), # type: ignore[misc]
74
+ )
75
+ class count(func.Aggregator, typing.Generic[T]):
76
+ """
77
+ Aggregate function that counts the number of non-null values in a column or grouping.
78
+
79
+ Args:
80
+ val: The value to count.
81
+
82
+ Returns:
83
+ The count of non-null values.
84
+
85
+ Examples:
86
+ Count the number of non-null values in the `value` column of the table `tbl`:
87
+
88
+ >>> tbl.select(pxt.functions.count(tbl.value)).collect()
89
+
90
+ Group by the `category` column and compute the count of non-null values in the `value` column
91
+ for each category, assigning the name `'category_count'` to the new column:
92
+
93
+ >>> tbl.group_by(tbl.category).select(
94
+ ... tbl.category,
95
+ ... category_count=pxt.functions.count(tbl.value)
96
+ ... ).collect()
97
+ """
98
+
99
+ def __init__(self) -> None:
49
100
  self.count = 0
50
101
 
51
- def update(self, val: Optional[int]) -> None:
102
+ def update(self, val: T) -> None:
52
103
  if val is not None:
53
104
  self.count += 1
54
105
 
@@ -57,88 +108,192 @@ class count(func.Aggregator):
57
108
 
58
109
 
59
110
  @count.to_sql
60
- def _(val: sql.ColumnElement) -> Optional[sql.ColumnElement]:
111
+ def _(val: sql.ColumnElement) -> sql.ColumnElement | None:
61
112
  return sql.sql.func.count(val)
62
113
 
63
114
 
64
115
  @func.uda(
65
- update_types=[ts.IntType(nullable=True)], value_type=ts.IntType(nullable=True), allows_window=True,
66
- requires_order_by=False)
67
- class min(func.Aggregator):
68
- def __init__(self):
69
- self.val: Optional[int] = None
116
+ allows_window=True,
117
+ type_substitutions=tuple({T: t | None} for t in (str, int, float, bool, ts.Timestamp)), # type: ignore[misc]
118
+ )
119
+ class min(func.Aggregator, typing.Generic[T]):
120
+ """
121
+ Aggregate function that computes the minimum value in a column or grouping.
122
+
123
+ Args:
124
+ val: The value to compare.
125
+
126
+ Returns:
127
+ The minimum value, or `None` if there are no non-null values.
128
+
129
+ Examples:
130
+ Compute the minimum value in the `value` column of the table `tbl`:
131
+
132
+ >>> tbl.select(pxt.functions.min(tbl.value)).collect()
70
133
 
71
- def update(self, val: Optional[int]) -> None:
134
+ Group by the `category` column and compute the minimum value in the `value` column for each category,
135
+ assigning the name `'category_min'` to the new column:
136
+
137
+ >>> tbl.group_by(tbl.category).select(
138
+ ... tbl.category,
139
+ ... category_min=pxt.functions.min(tbl.value)
140
+ ... ).collect()
141
+ """
142
+
143
+ def __init__(self) -> None:
144
+ self.val: T = None
145
+
146
+ def update(self, val: T) -> None:
72
147
  if val is None:
73
148
  return
74
149
  if self.val is None:
75
150
  self.val = val
76
151
  else:
77
- self.val = builtins.min(self.val, val)
152
+ self.val = builtins.min(self.val, val) # type: ignore[call-overload]
78
153
 
79
- def value(self) -> Optional[int]:
154
+ def value(self) -> T:
80
155
  return self.val
81
156
 
82
157
 
83
158
  @min.to_sql
84
- def _(val: sql.ColumnElement) -> Optional[sql.ColumnElement]:
159
+ def _(val: sql.ColumnElement) -> sql.ColumnElement | None:
160
+ if val.type.python_type is bool:
161
+ # TODO: min/max aggregation of booleans is not supported in Postgres (but it is in Python).
162
+ # Right now we simply force the computation to be done in Python; we might consider implementing an alternate
163
+ # way of doing it in SQL. (min/max of booleans is simply logical and/or, respectively.)
164
+ return None
85
165
  return sql.sql.func.min(val)
86
166
 
87
167
 
88
168
  @func.uda(
89
- update_types=[ts.IntType(nullable=True)], value_type=ts.IntType(nullable=True), allows_window=True,
90
- requires_order_by=False)
91
- class max(func.Aggregator):
92
- def __init__(self):
93
- self.val: Optional[int] = None
169
+ allows_window=True,
170
+ type_substitutions=tuple({T: t | None} for t in (str, int, float, bool, ts.Timestamp)), # type: ignore[misc]
171
+ )
172
+ class max(func.Aggregator, typing.Generic[T]):
173
+ """
174
+ Aggregate function that computes the maximum value in a column or grouping.
175
+
176
+ Args:
177
+ val: The value to compare.
178
+
179
+ Returns:
180
+ The maximum value, or `None` if there are no non-null values.
181
+
182
+ Examples:
183
+ Compute the maximum value in the `value` column of the table `tbl`:
94
184
 
95
- def update(self, val: Optional[int]) -> None:
185
+ >>> tbl.select(pxt.functions.max(tbl.value)).collect()
186
+
187
+ Group by the `category` column and compute the maximum value in the `value` column for each category,
188
+ assigning the name `'category_max'` to the new column:
189
+
190
+ >>> tbl.group_by(tbl.category).select(
191
+ ... tbl.category,
192
+ ... category_max=pxt.functions.max(tbl.value)
193
+ ... ).collect()
194
+ """
195
+
196
+ def __init__(self) -> None:
197
+ self.val: T = None
198
+
199
+ def update(self, val: T) -> None:
96
200
  if val is None:
97
201
  return
98
202
  if self.val is None:
99
203
  self.val = val
100
204
  else:
101
- self.val = builtins.max(self.val, val)
205
+ self.val = builtins.max(self.val, val) # type: ignore[call-overload]
102
206
 
103
- def value(self) -> Optional[int]:
207
+ def value(self) -> T:
104
208
  return self.val
105
209
 
106
210
 
107
211
  @max.to_sql
108
- def _(val: sql.ColumnElement) -> Optional[sql.ColumnElement]:
212
+ def _(val: sql.ColumnElement) -> sql.ColumnElement | None:
213
+ if val.type.python_type is bool:
214
+ # TODO: see comment in @min.to_sql.
215
+ return None
109
216
  return sql.sql.func.max(val)
110
217
 
111
218
 
112
- @func.uda(
113
- update_types=[ts.IntType(nullable=True)], value_type=ts.FloatType(nullable=True), allows_window=False,
114
- requires_order_by=False)
115
- class mean(func.Aggregator):
116
- def __init__(self):
117
- self.sum: Optional[int] = None
219
+ @func.uda(type_substitutions=({T: int | None}, {T: float | None})) # type: ignore[misc]
220
+ class mean(func.Aggregator, typing.Generic[T]):
221
+ """
222
+ Aggregate function that computes the mean (average) of non-null values of a numeric column or grouping.
223
+
224
+ Args:
225
+ val: The numeric value to include in the mean.
226
+
227
+ Returns:
228
+ The mean of the non-null values, or `None` if there are no non-null values.
229
+
230
+ Examples:
231
+ Compute the mean of the values in the `value` column of the table `tbl`:
232
+
233
+ >>> tbl.select(pxt.functions.mean(tbl.value)).collect()
234
+
235
+ Group by the `category` column and compute the mean of the `value` column for each category,
236
+ assigning the name `'category_mean'` to the new column:
237
+
238
+ >>> tbl.group_by(tbl.category).select(
239
+ ... tbl.category,
240
+ ... category_mean=pxt.functions.mean(tbl.value)
241
+ ... ).collect()
242
+ """
243
+
244
+ def __init__(self) -> None:
245
+ self.sum: T = None
118
246
  self.count = 0
119
247
 
120
- def update(self, val: Optional[int]) -> None:
248
+ def update(self, val: T) -> None:
121
249
  if val is None:
122
250
  return
123
251
  if self.sum is None:
124
252
  self.sum = val
125
253
  else:
126
- self.sum += val
254
+ self.sum += val # type: ignore[operator]
127
255
  self.count += 1
128
256
 
129
- def value(self) -> Optional[float]:
257
+ def value(self) -> float | None: # Always a float
130
258
  if self.count == 0:
131
259
  return None
132
- return self.sum / self.count
260
+ return self.sum / self.count # type: ignore[operator]
133
261
 
134
262
 
135
263
  @mean.to_sql
136
- def _(val: sql.ColumnElement) -> Optional[sql.ColumnElement]:
264
+ def _(val: sql.ColumnElement) -> sql.ColumnElement | None:
137
265
  return sql.sql.func.avg(val)
138
266
 
139
267
 
268
+ def map(expr: exprs.Expr, fn: Callable[[exprs.Expr], Any]) -> exprs.Expr:
269
+ """
270
+ Applies a mapping function to each element of a list.
271
+
272
+ Args:
273
+ expr: The list expression to map over; must be an expression of type `pxt.Json`.
274
+ fn: An operation on Pixeltable expressions that will be applied to each element of the JSON array.
275
+
276
+ Examples:
277
+ Given a table `tbl` with a column `data` of type `pxt.Json` containing lists of integers, add a computed
278
+ column that produces new lists with each integer doubled:
279
+
280
+ >>> tbl.add_computed_column(
281
+ ... doubled=pxt.functions.map(t.data, lambda x: x * 2)
282
+ ... )
283
+ """
284
+ target_expr: exprs.Expr
285
+ try:
286
+ target_expr = exprs.Expr.from_object(fn(exprs.json_path.RELATIVE_PATH_ROOT))
287
+ except Exception as e:
288
+ raise excs.Error(
289
+ 'Failed to evaluate map function. '
290
+ '(The `fn` argument to `map()` must produce a valid Pixeltable expression.)'
291
+ ) from e
292
+ return exprs.JsonMapper(expr, target_expr)
293
+
294
+
140
295
  __all__ = local_public_names(__name__)
141
296
 
142
297
 
143
- def __dir__():
298
+ def __dir__() -> list[str]:
144
299
  return __all__