pixeltable 0.2.26__py3-none-any.whl → 0.5.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (245) hide show
  1. pixeltable/__init__.py +83 -19
  2. pixeltable/_query.py +1444 -0
  3. pixeltable/_version.py +1 -0
  4. pixeltable/catalog/__init__.py +7 -4
  5. pixeltable/catalog/catalog.py +2394 -119
  6. pixeltable/catalog/column.py +225 -104
  7. pixeltable/catalog/dir.py +38 -9
  8. pixeltable/catalog/globals.py +53 -34
  9. pixeltable/catalog/insertable_table.py +265 -115
  10. pixeltable/catalog/path.py +80 -17
  11. pixeltable/catalog/schema_object.py +28 -43
  12. pixeltable/catalog/table.py +1270 -677
  13. pixeltable/catalog/table_metadata.py +103 -0
  14. pixeltable/catalog/table_version.py +1270 -751
  15. pixeltable/catalog/table_version_handle.py +109 -0
  16. pixeltable/catalog/table_version_path.py +137 -42
  17. pixeltable/catalog/tbl_ops.py +53 -0
  18. pixeltable/catalog/update_status.py +191 -0
  19. pixeltable/catalog/view.py +251 -134
  20. pixeltable/config.py +215 -0
  21. pixeltable/env.py +736 -285
  22. pixeltable/exceptions.py +26 -2
  23. pixeltable/exec/__init__.py +7 -2
  24. pixeltable/exec/aggregation_node.py +39 -21
  25. pixeltable/exec/cache_prefetch_node.py +87 -109
  26. pixeltable/exec/cell_materialization_node.py +268 -0
  27. pixeltable/exec/cell_reconstruction_node.py +168 -0
  28. pixeltable/exec/component_iteration_node.py +25 -28
  29. pixeltable/exec/data_row_batch.py +11 -46
  30. pixeltable/exec/exec_context.py +26 -11
  31. pixeltable/exec/exec_node.py +35 -27
  32. pixeltable/exec/expr_eval/__init__.py +3 -0
  33. pixeltable/exec/expr_eval/evaluators.py +365 -0
  34. pixeltable/exec/expr_eval/expr_eval_node.py +413 -0
  35. pixeltable/exec/expr_eval/globals.py +200 -0
  36. pixeltable/exec/expr_eval/row_buffer.py +74 -0
  37. pixeltable/exec/expr_eval/schedulers.py +413 -0
  38. pixeltable/exec/globals.py +35 -0
  39. pixeltable/exec/in_memory_data_node.py +35 -27
  40. pixeltable/exec/object_store_save_node.py +293 -0
  41. pixeltable/exec/row_update_node.py +44 -29
  42. pixeltable/exec/sql_node.py +414 -115
  43. pixeltable/exprs/__init__.py +8 -5
  44. pixeltable/exprs/arithmetic_expr.py +79 -45
  45. pixeltable/exprs/array_slice.py +5 -5
  46. pixeltable/exprs/column_property_ref.py +40 -26
  47. pixeltable/exprs/column_ref.py +254 -61
  48. pixeltable/exprs/comparison.py +14 -9
  49. pixeltable/exprs/compound_predicate.py +9 -10
  50. pixeltable/exprs/data_row.py +213 -72
  51. pixeltable/exprs/expr.py +270 -104
  52. pixeltable/exprs/expr_dict.py +6 -5
  53. pixeltable/exprs/expr_set.py +20 -11
  54. pixeltable/exprs/function_call.py +383 -284
  55. pixeltable/exprs/globals.py +18 -5
  56. pixeltable/exprs/in_predicate.py +7 -7
  57. pixeltable/exprs/inline_expr.py +37 -37
  58. pixeltable/exprs/is_null.py +8 -4
  59. pixeltable/exprs/json_mapper.py +120 -54
  60. pixeltable/exprs/json_path.py +90 -60
  61. pixeltable/exprs/literal.py +61 -16
  62. pixeltable/exprs/method_ref.py +7 -6
  63. pixeltable/exprs/object_ref.py +19 -8
  64. pixeltable/exprs/row_builder.py +238 -75
  65. pixeltable/exprs/rowid_ref.py +53 -15
  66. pixeltable/exprs/similarity_expr.py +65 -50
  67. pixeltable/exprs/sql_element_cache.py +5 -5
  68. pixeltable/exprs/string_op.py +107 -0
  69. pixeltable/exprs/type_cast.py +25 -13
  70. pixeltable/exprs/variable.py +2 -2
  71. pixeltable/func/__init__.py +9 -5
  72. pixeltable/func/aggregate_function.py +197 -92
  73. pixeltable/func/callable_function.py +119 -35
  74. pixeltable/func/expr_template_function.py +101 -48
  75. pixeltable/func/function.py +375 -62
  76. pixeltable/func/function_registry.py +20 -19
  77. pixeltable/func/globals.py +6 -5
  78. pixeltable/func/mcp.py +74 -0
  79. pixeltable/func/query_template_function.py +151 -35
  80. pixeltable/func/signature.py +178 -49
  81. pixeltable/func/tools.py +164 -0
  82. pixeltable/func/udf.py +176 -53
  83. pixeltable/functions/__init__.py +44 -4
  84. pixeltable/functions/anthropic.py +226 -47
  85. pixeltable/functions/audio.py +148 -11
  86. pixeltable/functions/bedrock.py +137 -0
  87. pixeltable/functions/date.py +188 -0
  88. pixeltable/functions/deepseek.py +113 -0
  89. pixeltable/functions/document.py +81 -0
  90. pixeltable/functions/fal.py +76 -0
  91. pixeltable/functions/fireworks.py +72 -20
  92. pixeltable/functions/gemini.py +249 -0
  93. pixeltable/functions/globals.py +208 -53
  94. pixeltable/functions/groq.py +108 -0
  95. pixeltable/functions/huggingface.py +1088 -95
  96. pixeltable/functions/image.py +155 -84
  97. pixeltable/functions/json.py +8 -11
  98. pixeltable/functions/llama_cpp.py +31 -19
  99. pixeltable/functions/math.py +169 -0
  100. pixeltable/functions/mistralai.py +50 -75
  101. pixeltable/functions/net.py +70 -0
  102. pixeltable/functions/ollama.py +29 -36
  103. pixeltable/functions/openai.py +548 -160
  104. pixeltable/functions/openrouter.py +143 -0
  105. pixeltable/functions/replicate.py +15 -14
  106. pixeltable/functions/reve.py +250 -0
  107. pixeltable/functions/string.py +310 -85
  108. pixeltable/functions/timestamp.py +37 -19
  109. pixeltable/functions/together.py +77 -120
  110. pixeltable/functions/twelvelabs.py +188 -0
  111. pixeltable/functions/util.py +7 -2
  112. pixeltable/functions/uuid.py +30 -0
  113. pixeltable/functions/video.py +1528 -117
  114. pixeltable/functions/vision.py +26 -26
  115. pixeltable/functions/voyageai.py +289 -0
  116. pixeltable/functions/whisper.py +19 -10
  117. pixeltable/functions/whisperx.py +179 -0
  118. pixeltable/functions/yolox.py +112 -0
  119. pixeltable/globals.py +716 -236
  120. pixeltable/index/__init__.py +3 -1
  121. pixeltable/index/base.py +17 -21
  122. pixeltable/index/btree.py +32 -22
  123. pixeltable/index/embedding_index.py +155 -92
  124. pixeltable/io/__init__.py +12 -7
  125. pixeltable/io/datarows.py +140 -0
  126. pixeltable/io/external_store.py +83 -125
  127. pixeltable/io/fiftyone.py +24 -33
  128. pixeltable/io/globals.py +47 -182
  129. pixeltable/io/hf_datasets.py +96 -127
  130. pixeltable/io/label_studio.py +171 -156
  131. pixeltable/io/lancedb.py +3 -0
  132. pixeltable/io/pandas.py +136 -115
  133. pixeltable/io/parquet.py +40 -153
  134. pixeltable/io/table_data_conduit.py +702 -0
  135. pixeltable/io/utils.py +100 -0
  136. pixeltable/iterators/__init__.py +8 -4
  137. pixeltable/iterators/audio.py +207 -0
  138. pixeltable/iterators/base.py +9 -3
  139. pixeltable/iterators/document.py +144 -87
  140. pixeltable/iterators/image.py +17 -38
  141. pixeltable/iterators/string.py +15 -12
  142. pixeltable/iterators/video.py +523 -127
  143. pixeltable/metadata/__init__.py +33 -8
  144. pixeltable/metadata/converters/convert_10.py +2 -3
  145. pixeltable/metadata/converters/convert_13.py +2 -2
  146. pixeltable/metadata/converters/convert_15.py +15 -11
  147. pixeltable/metadata/converters/convert_16.py +4 -5
  148. pixeltable/metadata/converters/convert_17.py +4 -5
  149. pixeltable/metadata/converters/convert_18.py +4 -6
  150. pixeltable/metadata/converters/convert_19.py +6 -9
  151. pixeltable/metadata/converters/convert_20.py +3 -6
  152. pixeltable/metadata/converters/convert_21.py +6 -8
  153. pixeltable/metadata/converters/convert_22.py +3 -2
  154. pixeltable/metadata/converters/convert_23.py +33 -0
  155. pixeltable/metadata/converters/convert_24.py +55 -0
  156. pixeltable/metadata/converters/convert_25.py +19 -0
  157. pixeltable/metadata/converters/convert_26.py +23 -0
  158. pixeltable/metadata/converters/convert_27.py +29 -0
  159. pixeltable/metadata/converters/convert_28.py +13 -0
  160. pixeltable/metadata/converters/convert_29.py +110 -0
  161. pixeltable/metadata/converters/convert_30.py +63 -0
  162. pixeltable/metadata/converters/convert_31.py +11 -0
  163. pixeltable/metadata/converters/convert_32.py +15 -0
  164. pixeltable/metadata/converters/convert_33.py +17 -0
  165. pixeltable/metadata/converters/convert_34.py +21 -0
  166. pixeltable/metadata/converters/convert_35.py +9 -0
  167. pixeltable/metadata/converters/convert_36.py +38 -0
  168. pixeltable/metadata/converters/convert_37.py +15 -0
  169. pixeltable/metadata/converters/convert_38.py +39 -0
  170. pixeltable/metadata/converters/convert_39.py +124 -0
  171. pixeltable/metadata/converters/convert_40.py +73 -0
  172. pixeltable/metadata/converters/convert_41.py +12 -0
  173. pixeltable/metadata/converters/convert_42.py +9 -0
  174. pixeltable/metadata/converters/convert_43.py +44 -0
  175. pixeltable/metadata/converters/util.py +44 -18
  176. pixeltable/metadata/notes.py +21 -0
  177. pixeltable/metadata/schema.py +185 -42
  178. pixeltable/metadata/utils.py +74 -0
  179. pixeltable/mypy/__init__.py +3 -0
  180. pixeltable/mypy/mypy_plugin.py +123 -0
  181. pixeltable/plan.py +616 -225
  182. pixeltable/share/__init__.py +3 -0
  183. pixeltable/share/packager.py +797 -0
  184. pixeltable/share/protocol/__init__.py +33 -0
  185. pixeltable/share/protocol/common.py +165 -0
  186. pixeltable/share/protocol/operation_types.py +33 -0
  187. pixeltable/share/protocol/replica.py +119 -0
  188. pixeltable/share/publish.py +349 -0
  189. pixeltable/store.py +398 -232
  190. pixeltable/type_system.py +730 -267
  191. pixeltable/utils/__init__.py +40 -0
  192. pixeltable/utils/arrow.py +201 -29
  193. pixeltable/utils/av.py +298 -0
  194. pixeltable/utils/azure_store.py +346 -0
  195. pixeltable/utils/coco.py +26 -27
  196. pixeltable/utils/code.py +4 -4
  197. pixeltable/utils/console_output.py +46 -0
  198. pixeltable/utils/coroutine.py +24 -0
  199. pixeltable/utils/dbms.py +92 -0
  200. pixeltable/utils/description_helper.py +11 -12
  201. pixeltable/utils/documents.py +60 -61
  202. pixeltable/utils/exception_handler.py +36 -0
  203. pixeltable/utils/filecache.py +38 -22
  204. pixeltable/utils/formatter.py +88 -51
  205. pixeltable/utils/gcs_store.py +295 -0
  206. pixeltable/utils/http.py +133 -0
  207. pixeltable/utils/http_server.py +14 -13
  208. pixeltable/utils/iceberg.py +13 -0
  209. pixeltable/utils/image.py +17 -0
  210. pixeltable/utils/lancedb.py +90 -0
  211. pixeltable/utils/local_store.py +322 -0
  212. pixeltable/utils/misc.py +5 -0
  213. pixeltable/utils/object_stores.py +573 -0
  214. pixeltable/utils/pydantic.py +60 -0
  215. pixeltable/utils/pytorch.py +20 -20
  216. pixeltable/utils/s3_store.py +527 -0
  217. pixeltable/utils/sql.py +32 -5
  218. pixeltable/utils/system.py +30 -0
  219. pixeltable/utils/transactional_directory.py +4 -3
  220. pixeltable-0.5.7.dist-info/METADATA +579 -0
  221. pixeltable-0.5.7.dist-info/RECORD +227 -0
  222. {pixeltable-0.2.26.dist-info → pixeltable-0.5.7.dist-info}/WHEEL +1 -1
  223. pixeltable-0.5.7.dist-info/entry_points.txt +2 -0
  224. pixeltable/__version__.py +0 -3
  225. pixeltable/catalog/named_function.py +0 -36
  226. pixeltable/catalog/path_dict.py +0 -141
  227. pixeltable/dataframe.py +0 -894
  228. pixeltable/exec/expr_eval_node.py +0 -232
  229. pixeltable/ext/__init__.py +0 -14
  230. pixeltable/ext/functions/__init__.py +0 -8
  231. pixeltable/ext/functions/whisperx.py +0 -77
  232. pixeltable/ext/functions/yolox.py +0 -157
  233. pixeltable/tool/create_test_db_dump.py +0 -311
  234. pixeltable/tool/create_test_video.py +0 -81
  235. pixeltable/tool/doc_plugins/griffe.py +0 -50
  236. pixeltable/tool/doc_plugins/mkdocstrings.py +0 -6
  237. pixeltable/tool/doc_plugins/templates/material/udf.html.jinja +0 -135
  238. pixeltable/tool/embed_udf.py +0 -9
  239. pixeltable/tool/mypy_plugin.py +0 -55
  240. pixeltable/utils/media_store.py +0 -76
  241. pixeltable/utils/s3.py +0 -16
  242. pixeltable-0.2.26.dist-info/METADATA +0 -400
  243. pixeltable-0.2.26.dist-info/RECORD +0 -156
  244. pixeltable-0.2.26.dist-info/entry_points.txt +0 -3
  245. {pixeltable-0.2.26.dist-info → pixeltable-0.5.7.dist-info/licenses}/LICENSE +0 -0
@@ -0,0 +1,169 @@
1
+ """
2
+ Pixeltable UDFs for mathematical operations.
3
+
4
+ Example:
5
+ ```python
6
+ import pixeltable as pxt
7
+
8
+ t = pxt.get_table(...)
9
+ t.select(t.float_col.floor()).collect()
10
+ ```
11
+ """
12
+
13
+ import builtins
14
+ import math
15
+
16
+ import sqlalchemy as sql
17
+
18
+ import pixeltable as pxt
19
+ from pixeltable.utils.code import local_public_names
20
+
21
+
22
+ @pxt.udf(is_method=True)
23
+ def abs(self: float) -> float:
24
+ """
25
+ Return the absolute value of the given number.
26
+
27
+ Equivalent to Python [`builtins.abs()`](https://docs.python.org/3/library/functions.html#abs).
28
+ """
29
+ return builtins.abs(self)
30
+
31
+
32
+ @abs.to_sql
33
+ def _(self: sql.ColumnElement) -> sql.ColumnElement:
34
+ return sql.func.abs(self)
35
+
36
+
37
+ @pxt.udf(is_method=True)
38
+ def ceil(self: float) -> float:
39
+ """
40
+ Return the ceiling of the given number.
41
+
42
+ Equivalent to Python [`float(math.ceil(self))`](https://docs.python.org/3/library/math.html#math.ceil) if `self`
43
+ is finite, or `self` itself if `self` is infinite. (This is slightly different from the default behavior of
44
+ `math.ceil(self)`, which always returns an `int` and raises an error if `self` is infinite. The behavior in
45
+ Pixeltable generalizes the Python operator and is chosen to align with the SQL standard.)
46
+ """
47
+ # This ensures the same behavior as SQL
48
+ if math.isfinite(self):
49
+ return float(math.ceil(self))
50
+ else:
51
+ return self
52
+
53
+
54
+ @ceil.to_sql
55
+ def _(self: sql.ColumnElement) -> sql.ColumnElement:
56
+ return sql.func.ceiling(self)
57
+
58
+
59
+ @pxt.udf(is_method=True)
60
+ def floor(self: float) -> float:
61
+ """
62
+ Return the ceiling of the given number.
63
+
64
+ Equivalent to Python [`float(math.floor(self))`](https://docs.python.org/3/library/math.html#math.ceil) if `self`
65
+ is finite, or `self` itself if `self` is infinite. (This is slightly different from the default behavior of
66
+ `math.floor(self)`, which always returns an `int` and raises an error if `self` is infinite. The behavior of
67
+ Pixeltable generalizes the Python operator and is chosen to align with the SQL standard.)
68
+ """
69
+ # This ensures the same behavior as SQL
70
+ if math.isfinite(self):
71
+ return float(math.floor(self))
72
+ else:
73
+ return self
74
+
75
+
76
+ @floor.to_sql
77
+ def _(self: sql.ColumnElement) -> sql.ColumnElement:
78
+ return sql.func.floor(self)
79
+
80
+
81
+ @pxt.udf(is_method=True)
82
+ def round(self: float, digits: int | None = None) -> float:
83
+ """
84
+ Round a number to a given precision in decimal digits.
85
+
86
+ Equivalent to Python [`builtins.round(self, digits or 0)`](https://docs.python.org/3/library/functions.html#round).
87
+ Note that if `digits` is not specified, the behavior matches `builtins.round(self, 0)` rather than
88
+ `builtins.round(self)`; this ensures that the return type is always `float` (as in SQL) rather than `int`.
89
+ """
90
+ # Set digits explicitly to 0 to guarantee a return type of float; this ensures the same behavior as SQL
91
+ return builtins.round(self, digits or 0)
92
+
93
+
94
+ @round.to_sql
95
+ def _(self: sql.ColumnElement, digits: sql.ColumnElement | None = None) -> sql.ColumnElement:
96
+ if digits is None:
97
+ return sql.func.round(self)
98
+ else:
99
+ return sql.cast(sql.func.round(sql.cast(self, sql.Numeric), sql.cast(digits, sql.Integer)), sql.Float)
100
+
101
+
102
+ @pxt.udf(is_method=True)
103
+ def pow(self: int, other: int) -> float:
104
+ """
105
+ Raise `self` to the power of `other`.
106
+
107
+ Equivalent to Python [`self ** other`](https://docs.python.org/3/library/functions.html#pow).
108
+ """
109
+ return self**other
110
+
111
+
112
+ @pow.to_sql
113
+ def _(self: sql.ColumnElement, other: sql.ColumnElement) -> sql.ColumnElement:
114
+ return sql.func.pow(self, other)
115
+
116
+
117
+ @pxt.udf(is_method=True)
118
+ def bitwise_and(self: int, other: int) -> int:
119
+ """
120
+ Bitwise AND of two integers.
121
+
122
+ Equivalent to Python
123
+ [`self & other`](https://docs.python.org/3/library/stdtypes.html#bitwise-operations-on-integer-types).
124
+ """
125
+ return self & other
126
+
127
+
128
+ @bitwise_and.to_sql
129
+ def _(self: sql.ColumnElement, other: sql.ColumnElement) -> sql.ColumnElement:
130
+ return self.bitwise_and(other)
131
+
132
+
133
+ @pxt.udf(is_method=True)
134
+ def bitwise_or(self: int, other: int) -> int:
135
+ """
136
+ Bitwise OR of two integers.
137
+
138
+ Equivalent to Python
139
+ [`self | other`](https://docs.python.org/3/library/stdtypes.html#bitwise-operations-on-integer-types).
140
+ """
141
+ return self | other
142
+
143
+
144
+ @bitwise_or.to_sql
145
+ def _(self: sql.ColumnElement, other: sql.ColumnElement) -> sql.ColumnElement:
146
+ return self.bitwise_or(other)
147
+
148
+
149
+ @pxt.udf(is_method=True)
150
+ def bitwise_xor(self: int, other: int) -> int:
151
+ """
152
+ Bitwise XOR of two integers.
153
+
154
+ Equivalent to Python
155
+ [`self ^ other`](https://docs.python.org/3/library/stdtypes.html#bitwise-operations-on-integer-types).
156
+ """
157
+ return self ^ other
158
+
159
+
160
+ @bitwise_xor.to_sql
161
+ def _(self: sql.ColumnElement, other: sql.ColumnElement) -> sql.ColumnElement:
162
+ return self.bitwise_xor(other)
163
+
164
+
165
+ __all__ = local_public_names(__name__)
166
+
167
+
168
+ def __dir__() -> list[str]:
169
+ return __all__
@@ -1,26 +1,28 @@
1
1
  """
2
- Pixeltable [UDFs](https://pixeltable.readme.io/docs/user-defined-functions-udfs)
2
+ Pixeltable UDFs
3
3
  that wrap various endpoints from the Mistral AI API. In order to use them, you must
4
4
  first `pip install mistralai` and configure your Mistral AI credentials, as described in
5
- the [Working with Mistral AI](https://pixeltable.readme.io/docs/working-with-mistralai) tutorial.
5
+ the [Working with Mistral AI](https://docs.pixeltable.com/notebooks/integrations/working-with-mistralai) tutorial.
6
6
  """
7
7
 
8
- from typing import TYPE_CHECKING, Optional, TypeVar, Union
8
+ from typing import TYPE_CHECKING, Any
9
9
 
10
10
  import numpy as np
11
11
 
12
12
  import pixeltable as pxt
13
+ import pixeltable.type_system as ts
13
14
  from pixeltable.env import Env, register_client
14
15
  from pixeltable.func.signature import Batch
15
16
  from pixeltable.utils.code import local_public_names
16
17
 
17
18
  if TYPE_CHECKING:
18
- import mistralai.types.basemodel
19
+ import mistralai
19
20
 
20
21
 
21
22
  @register_client('mistral')
22
23
  def _(api_key: str) -> 'mistralai.Mistral':
23
24
  import mistralai
25
+
24
26
  return mistralai.Mistral(api_key=api_key)
25
27
 
26
28
 
@@ -28,18 +30,9 @@ def _mistralai_client() -> 'mistralai.Mistral':
28
30
  return Env.get().get_client('mistral')
29
31
 
30
32
 
31
- @pxt.udf
32
- def chat_completions(
33
- messages: list[dict[str, str]],
34
- *,
35
- model: str,
36
- temperature: Optional[float] = 0.7,
37
- top_p: Optional[float] = 1.0,
38
- max_tokens: Optional[int] = None,
39
- stop: Optional[list[str]] = None,
40
- random_seed: Optional[int] = None,
41
- response_format: Optional[dict] = None,
42
- safe_prompt: Optional[bool] = False,
33
+ @pxt.udf(resource_pool='request-rate:mistral')
34
+ async def chat_completions(
35
+ messages: list[dict[str, str]], *, model: str, model_kwargs: dict[str, Any] | None = None
43
36
  ) -> dict:
44
37
  """
45
38
  Chat Completion API.
@@ -47,6 +40,10 @@ def chat_completions(
47
40
  Equivalent to the Mistral AI `chat/completions` API endpoint.
48
41
  For additional details, see: <https://docs.mistral.ai/api/#tag/chat>
49
42
 
43
+ Request throttling:
44
+ Applies the rate limit set in the config (section `mistral`, key `rate_limit`). If no rate
45
+ limit is configured, uses a default of 600 RPM.
46
+
50
47
  __Requirements:__
51
48
 
52
49
  - `pip install mistralai`
@@ -54,8 +51,8 @@ def chat_completions(
54
51
  Args:
55
52
  messages: The prompt(s) to generate completions for.
56
53
  model: ID of the model to use. (See overview here: <https://docs.mistral.ai/getting-started/models/>)
57
-
58
- For details on the other parameters, see: <https://docs.mistral.ai/api/#tag/chat>
54
+ model_kwargs: Additional keyword args for the Mistral `chat/completions` API.
55
+ For details on the available parameters, see: <https://docs.mistral.ai/api/#tag/chat>
59
56
 
60
57
  Returns:
61
58
  A dictionary containing the response and other metadata.
@@ -65,41 +62,32 @@ def chat_completions(
65
62
  to an existing Pixeltable column `tbl.prompt` of the table `tbl`:
66
63
 
67
64
  >>> messages = [{'role': 'user', 'content': tbl.prompt}]
68
- ... tbl['response'] = completions(messages, model='mistral-latest-small')
65
+ ... tbl.add_computed_column(response=completions(messages, model='mistral-latest-small'))
69
66
  """
67
+ if model_kwargs is None:
68
+ model_kwargs = {}
69
+
70
70
  Env.get().require_package('mistralai')
71
- return _mistralai_client().chat.complete(
71
+ result = await _mistralai_client().chat.complete_async(
72
72
  messages=messages, # type: ignore[arg-type]
73
73
  model=model,
74
- temperature=temperature,
75
- top_p=top_p,
76
- max_tokens=_opt(max_tokens),
77
- stop=stop,
78
- random_seed=_opt(random_seed),
79
- response_format=response_format, # type: ignore[arg-type]
80
- safe_prompt=safe_prompt,
81
- ).dict()
82
-
83
-
84
- @pxt.udf
85
- def fim_completions(
86
- prompt: str,
87
- *,
88
- model: str,
89
- temperature: Optional[float] = 0.7,
90
- top_p: Optional[float] = 1.0,
91
- max_tokens: Optional[int] = None,
92
- min_tokens: Optional[int] = None,
93
- stop: Optional[list[str]] = None,
94
- random_seed: Optional[int] = None,
95
- suffix: Optional[str] = None,
96
- ) -> dict:
74
+ **model_kwargs,
75
+ )
76
+ return result.dict()
77
+
78
+
79
+ @pxt.udf(resource_pool='request-rate:mistral')
80
+ async def fim_completions(prompt: str, *, model: str, model_kwargs: dict[str, Any] | None = None) -> dict:
97
81
  """
98
82
  Fill-in-the-middle Completion API.
99
83
 
100
84
  Equivalent to the Mistral AI `fim/completions` API endpoint.
101
85
  For additional details, see: <https://docs.mistral.ai/api/#tag/fim>
102
86
 
87
+ Request throttling:
88
+ Applies the rate limit set in the config (section `mistral`, key `rate_limit`). If no rate
89
+ limit is configured, uses a default of 600 RPM.
90
+
103
91
  __Requirements:__
104
92
 
105
93
  - `pip install mistralai`
@@ -107,6 +95,8 @@ def fim_completions(
107
95
  Args:
108
96
  prompt: The text/code to complete.
109
97
  model: ID of the model to use. (See overview here: <https://docs.mistral.ai/getting-started/models/>)
98
+ model_kwargs: Additional keyword args for the Mistral `fim/completions` API.
99
+ For details on the available parameters, see: <https://docs.mistral.ai/api/#tag/fim>
110
100
 
111
101
  For details on the other parameters, see: <https://docs.mistral.ai/api/#tag/fim>
112
102
 
@@ -117,35 +107,31 @@ def fim_completions(
117
107
  Add a computed column that applies the model `codestral-latest`
118
108
  to an existing Pixeltable column `tbl.prompt` of the table `tbl`:
119
109
 
120
- >>> tbl['response'] = completions(tbl.prompt, model='codestral-latest')
110
+ >>> tbl.add_computed_column(response=completions(tbl.prompt, model='codestral-latest'))
121
111
  """
112
+ if model_kwargs is None:
113
+ model_kwargs = {}
114
+
122
115
  Env.get().require_package('mistralai')
123
- return _mistralai_client().fim.complete(
124
- prompt=prompt,
125
- model=model,
126
- temperature=temperature,
127
- top_p=top_p,
128
- max_tokens=_opt(max_tokens),
129
- min_tokens=_opt(min_tokens),
130
- stop=stop,
131
- random_seed=_opt(random_seed),
132
- suffix=_opt(suffix)
133
- ).dict()
116
+ result = await _mistralai_client().fim.complete_async(prompt=prompt, model=model, **model_kwargs)
117
+ return result.dict()
134
118
 
135
119
 
136
- _embedding_dimensions_cache: dict[str, int] = {
137
- 'mistral-embed': 1024
138
- }
120
+ _embedding_dimensions_cache: dict[str, int] = {'mistral-embed': 1024}
139
121
 
140
122
 
141
- @pxt.udf(batch_size=16)
142
- def embeddings(input: Batch[str], *, model: str) -> Batch[pxt.Array[(None,), pxt.Float]]:
123
+ @pxt.udf(batch_size=16, resource_pool='request-rate:mistral')
124
+ async def embeddings(input: Batch[str], *, model: str) -> Batch[pxt.Array[(None,), pxt.Float]]: # noqa: RUF029
143
125
  """
144
126
  Embeddings API.
145
127
 
146
128
  Equivalent to the Mistral AI `embeddings` API endpoint.
147
129
  For additional details, see: <https://docs.mistral.ai/api/#tag/embeddings>
148
130
 
131
+ Request throttling:
132
+ Applies the rate limit set in the config (section `mistral`, key `rate_limit`). If no rate
133
+ limit is configured, uses a default of 600 RPM.
134
+
149
135
  __Requirements:__
150
136
 
151
137
  - `pip install mistralai`
@@ -158,29 +144,18 @@ def embeddings(input: Batch[str], *, model: str) -> Batch[pxt.Array[(None,), pxt
158
144
  An array representing the application of the given embedding to `input`.
159
145
  """
160
146
  Env.get().require_package('mistralai')
161
- result = _mistralai_client().embeddings.create(
162
- inputs=input,
163
- model=model,
164
- )
147
+ result = _mistralai_client().embeddings.create(inputs=input, model=model)
165
148
  return [np.array(data.embedding, dtype=np.float64) for data in result.data]
166
149
 
167
150
 
168
151
  @embeddings.conditional_return_type
169
- def _(model: str) -> pxt.ArrayType:
152
+ def _(model: str) -> ts.ArrayType:
170
153
  dimensions = _embedding_dimensions_cache.get(model) # `None` if unknown model
171
- return pxt.ArrayType((dimensions,), dtype=pxt.FloatType())
172
-
173
-
174
- _T = TypeVar('_T')
175
-
176
-
177
- def _opt(arg: Optional[_T]) -> Union[_T, 'mistralai.types.basemodel.Unset']:
178
- from mistralai.types import UNSET
179
- return arg if arg is not None else UNSET
154
+ return ts.ArrayType((dimensions,), dtype=ts.FloatType())
180
155
 
181
156
 
182
157
  __all__ = local_public_names(__name__)
183
158
 
184
159
 
185
- def __dir__():
160
+ def __dir__() -> list[str]:
186
161
  return __all__
@@ -0,0 +1,70 @@
1
+ """
2
+ Pixeltable UDF for converting media file URIs to presigned HTTP URLs.
3
+ """
4
+
5
+ from pixeltable import exceptions as excs
6
+ from pixeltable.func.udf import udf
7
+ from pixeltable.utils.code import local_public_names
8
+ from pixeltable.utils.object_stores import ObjectOps, ObjectPath, StorageTarget
9
+
10
+
11
+ @udf
12
+ def presigned_url(uri: str, expiration_seconds: int) -> str:
13
+ """
14
+ Convert a blob storage URI to a presigned HTTP URL for direct access.
15
+
16
+ Generates a time-limited, publicly accessible URL from cloud storage URIs
17
+ (S3, GCS, Azure, etc.) that can be used to serve media files over HTTP.
18
+
19
+ Note:
20
+ This function uses presigned URLs from storage providers. Provider-specific
21
+ limitations apply:
22
+
23
+ - Google Cloud Storage: maximum 7-day expiration
24
+ - AWS S3: requires proper region configuration
25
+ - Azure: subject to storage account access policies
26
+
27
+ Args:
28
+ uri: The media file URI (e.g., `s3://bucket/path`, `gs://bucket/path`, `azure://container/path`)
29
+ expiration_seconds: How long the URL remains valid
30
+
31
+ Returns:
32
+ A presigned HTTP URL for accessing the file
33
+
34
+ Raises:
35
+ Error: If the URI is a local file:// path
36
+
37
+ Examples:
38
+ Generate a presigned URL for a video column with 1-hour expiration:
39
+
40
+ >>> tbl.select(
41
+ ... original_url=tbl.video.fileurl,
42
+ ... presigned_url=pxtf.net.presigned_url(tbl.video.fileurl, 3600)
43
+ ... ).collect()
44
+ """
45
+ if not uri:
46
+ return uri
47
+
48
+ # Parse the object storage address from the URI
49
+ soa = ObjectPath.parse_object_storage_addr(uri, allow_obj_name=True)
50
+
51
+ # HTTP/HTTPS URLs are already publicly accessible
52
+ if soa.storage_target == StorageTarget.HTTP_STORE:
53
+ return uri
54
+
55
+ # For file:// URLs, we can't generate presigned URLs
56
+ if soa.storage_target == StorageTarget.LOCAL_STORE:
57
+ raise excs.Error(
58
+ 'Cannot generate presigned URL for local file:// URLs. '
59
+ 'Please use cloud storage (S3, GCS, Azure) for presigned URLs.'
60
+ )
61
+
62
+ store = ObjectOps.get_store(soa, allow_obj_name=True)
63
+ return store.create_presigned_url(soa, expiration_seconds)
64
+
65
+
66
+ __all__ = local_public_names(__name__)
67
+
68
+
69
+ def __dir__() -> list[str]:
70
+ return __all__
@@ -1,4 +1,11 @@
1
- from typing import TYPE_CHECKING, Optional
1
+ """
2
+ Pixeltable UDFs for Ollama local models.
3
+
4
+ Provides integration with Ollama for running large language models locally,
5
+ including chat completions and embeddings.
6
+ """
7
+
8
+ from typing import TYPE_CHECKING
2
9
 
3
10
  import numpy as np
4
11
 
@@ -14,10 +21,11 @@ if TYPE_CHECKING:
14
21
  @env.register_client('ollama')
15
22
  def _(host: str) -> 'ollama.Client':
16
23
  import ollama
24
+
17
25
  return ollama.Client(host=host)
18
26
 
19
27
 
20
- def _ollama_client() -> Optional['ollama.Client']:
28
+ def _ollama_client() -> 'ollama.Client | None':
21
29
  try:
22
30
  return env.Env.get().get_client('ollama')
23
31
  except Exception:
@@ -32,10 +40,10 @@ def generate(
32
40
  suffix: str = '',
33
41
  system: str = '',
34
42
  template: str = '',
35
- context: Optional[list[int]] = None,
43
+ context: list[int] | None = None,
36
44
  raw: bool = False,
37
- format: str = '',
38
- options: Optional[dict] = None,
45
+ format: str | None = None,
46
+ options: dict | None = None,
39
47
  ) -> dict:
40
48
  """
41
49
  Generate a response for a given prompt with a provided model.
@@ -44,14 +52,14 @@ def generate(
44
52
  prompt: The prompt to generate a response for.
45
53
  model: The model name.
46
54
  suffix: The text after the model response.
47
- format: The format of the response; must be one of `'json'` or `''` (the empty string).
55
+ format: The format of the response; must be one of `'json'` or `None`.
48
56
  system: System message.
49
57
  template: Prompt template to use.
50
58
  context: The context parameter returned from a previous call to `generate()`.
51
59
  raw: If `True`, no formatting will be applied to the prompt.
52
- options: Additional options to pass to the `chat` call, such as `max_tokens`, `temperature`, `top_p`, and `top_k`.
53
- For details, see the
54
- [Valid Parameters and Values](https://github.com/ollama/ollama/blob/main/docs/modelfile.md#valid-parameters-and-values)
60
+ options: Additional options for the Ollama `chat` call, such as `max_tokens`, `temperature`, `top_p`, and
61
+ `top_k`. For details, see the
62
+ [Valid Parameters and Values](https://github.com/ollama/ollama/blob/main/docs/modelfile.mdx#valid-parameters-and-values)
55
63
  section of the Ollama documentation.
56
64
  """
57
65
  env.Env.get().require_package('ollama')
@@ -76,9 +84,9 @@ def chat(
76
84
  messages: list[dict],
77
85
  *,
78
86
  model: str,
79
- tools: Optional[list[dict]] = None,
80
- format: str = '',
81
- options: Optional[dict] = None,
87
+ tools: list[dict] | None = None,
88
+ format: str | None = None,
89
+ options: dict | None = None,
82
90
  ) -> dict:
83
91
  """
84
92
  Generate the next message in a chat with a provided model.
@@ -87,32 +95,22 @@ def chat(
87
95
  messages: The messages of the chat.
88
96
  model: The model name.
89
97
  tools: Tools for the model to use.
90
- format: The format of the response; must be one of `'json'` or `''` (the empty string).
91
- options: Additional options to pass to the `chat` call, such as `max_tokens`, `temperature`, `top_p`, and `top_k`.
92
- For details, see the
93
- [Valid Parameters and Values](https://github.com/ollama/ollama/blob/main/docs/modelfile.md#valid-parameters-and-values)
98
+ format: The format of the response; must be one of `'json'` or `None`.
99
+ options: Additional options to pass to the `chat` call, such as `max_tokens`, `temperature`, `top_p`, and
100
+ `top_k`. For details, see the
101
+ [Valid Parameters and Values](https://github.com/ollama/ollama/blob/main/docs/modelfile.mdx#valid-parameters-and-values)
94
102
  section of the Ollama documentation.
95
103
  """
96
104
  env.Env.get().require_package('ollama')
97
105
  import ollama
98
106
 
99
107
  client = _ollama_client() or ollama
100
- return client.chat(
101
- model=model,
102
- messages=messages,
103
- tools=tools,
104
- format=format,
105
- options=options,
106
- ).dict() # type: ignore[call-overload]
108
+ return client.chat(model=model, messages=messages, tools=tools, format=format, options=options).dict() # type: ignore[call-overload]
107
109
 
108
110
 
109
111
  @pxt.udf(batch_size=16)
110
112
  def embed(
111
- input: Batch[str],
112
- *,
113
- model: str,
114
- truncate: bool = True,
115
- options: Optional[dict] = None,
113
+ input: Batch[str], *, model: str, truncate: bool = True, options: dict | None = None
116
114
  ) -> Batch[pxt.Array[(None,), pxt.Float]]:
117
115
  """
118
116
  Generate embeddings from a model.
@@ -124,24 +122,19 @@ def embed(
124
122
  Returns error if false and context length is exceeded.
125
123
  options: Additional options to pass to the `embed` call.
126
124
  For details, see the
127
- [Valid Parameters and Values](https://github.com/ollama/ollama/blob/main/docs/modelfile.md#valid-parameters-and-values)
125
+ [Valid Parameters and Values](https://github.com/ollama/ollama/blob/main/docs/modelfile.mdx#valid-parameters-and-values)
128
126
  section of the Ollama documentation.
129
127
  """
130
128
  env.Env.get().require_package('ollama')
131
129
  import ollama
132
130
 
133
131
  client = _ollama_client() or ollama
134
- results = client.embed(
135
- model=model,
136
- input=input,
137
- truncate=truncate,
138
- options=options,
139
- ).dict()
132
+ results = client.embed(model=model, input=input, truncate=truncate, options=options).dict()
140
133
  return [np.array(data, dtype=np.float64) for data in results['embeddings']]
141
134
 
142
135
 
143
136
  __all__ = local_public_names(__name__)
144
137
 
145
138
 
146
- def __dir__():
139
+ def __dir__() -> list[str]:
147
140
  return __all__