pixeltable 0.2.26__py3-none-any.whl → 0.5.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (245) hide show
  1. pixeltable/__init__.py +83 -19
  2. pixeltable/_query.py +1444 -0
  3. pixeltable/_version.py +1 -0
  4. pixeltable/catalog/__init__.py +7 -4
  5. pixeltable/catalog/catalog.py +2394 -119
  6. pixeltable/catalog/column.py +225 -104
  7. pixeltable/catalog/dir.py +38 -9
  8. pixeltable/catalog/globals.py +53 -34
  9. pixeltable/catalog/insertable_table.py +265 -115
  10. pixeltable/catalog/path.py +80 -17
  11. pixeltable/catalog/schema_object.py +28 -43
  12. pixeltable/catalog/table.py +1270 -677
  13. pixeltable/catalog/table_metadata.py +103 -0
  14. pixeltable/catalog/table_version.py +1270 -751
  15. pixeltable/catalog/table_version_handle.py +109 -0
  16. pixeltable/catalog/table_version_path.py +137 -42
  17. pixeltable/catalog/tbl_ops.py +53 -0
  18. pixeltable/catalog/update_status.py +191 -0
  19. pixeltable/catalog/view.py +251 -134
  20. pixeltable/config.py +215 -0
  21. pixeltable/env.py +736 -285
  22. pixeltable/exceptions.py +26 -2
  23. pixeltable/exec/__init__.py +7 -2
  24. pixeltable/exec/aggregation_node.py +39 -21
  25. pixeltable/exec/cache_prefetch_node.py +87 -109
  26. pixeltable/exec/cell_materialization_node.py +268 -0
  27. pixeltable/exec/cell_reconstruction_node.py +168 -0
  28. pixeltable/exec/component_iteration_node.py +25 -28
  29. pixeltable/exec/data_row_batch.py +11 -46
  30. pixeltable/exec/exec_context.py +26 -11
  31. pixeltable/exec/exec_node.py +35 -27
  32. pixeltable/exec/expr_eval/__init__.py +3 -0
  33. pixeltable/exec/expr_eval/evaluators.py +365 -0
  34. pixeltable/exec/expr_eval/expr_eval_node.py +413 -0
  35. pixeltable/exec/expr_eval/globals.py +200 -0
  36. pixeltable/exec/expr_eval/row_buffer.py +74 -0
  37. pixeltable/exec/expr_eval/schedulers.py +413 -0
  38. pixeltable/exec/globals.py +35 -0
  39. pixeltable/exec/in_memory_data_node.py +35 -27
  40. pixeltable/exec/object_store_save_node.py +293 -0
  41. pixeltable/exec/row_update_node.py +44 -29
  42. pixeltable/exec/sql_node.py +414 -115
  43. pixeltable/exprs/__init__.py +8 -5
  44. pixeltable/exprs/arithmetic_expr.py +79 -45
  45. pixeltable/exprs/array_slice.py +5 -5
  46. pixeltable/exprs/column_property_ref.py +40 -26
  47. pixeltable/exprs/column_ref.py +254 -61
  48. pixeltable/exprs/comparison.py +14 -9
  49. pixeltable/exprs/compound_predicate.py +9 -10
  50. pixeltable/exprs/data_row.py +213 -72
  51. pixeltable/exprs/expr.py +270 -104
  52. pixeltable/exprs/expr_dict.py +6 -5
  53. pixeltable/exprs/expr_set.py +20 -11
  54. pixeltable/exprs/function_call.py +383 -284
  55. pixeltable/exprs/globals.py +18 -5
  56. pixeltable/exprs/in_predicate.py +7 -7
  57. pixeltable/exprs/inline_expr.py +37 -37
  58. pixeltable/exprs/is_null.py +8 -4
  59. pixeltable/exprs/json_mapper.py +120 -54
  60. pixeltable/exprs/json_path.py +90 -60
  61. pixeltable/exprs/literal.py +61 -16
  62. pixeltable/exprs/method_ref.py +7 -6
  63. pixeltable/exprs/object_ref.py +19 -8
  64. pixeltable/exprs/row_builder.py +238 -75
  65. pixeltable/exprs/rowid_ref.py +53 -15
  66. pixeltable/exprs/similarity_expr.py +65 -50
  67. pixeltable/exprs/sql_element_cache.py +5 -5
  68. pixeltable/exprs/string_op.py +107 -0
  69. pixeltable/exprs/type_cast.py +25 -13
  70. pixeltable/exprs/variable.py +2 -2
  71. pixeltable/func/__init__.py +9 -5
  72. pixeltable/func/aggregate_function.py +197 -92
  73. pixeltable/func/callable_function.py +119 -35
  74. pixeltable/func/expr_template_function.py +101 -48
  75. pixeltable/func/function.py +375 -62
  76. pixeltable/func/function_registry.py +20 -19
  77. pixeltable/func/globals.py +6 -5
  78. pixeltable/func/mcp.py +74 -0
  79. pixeltable/func/query_template_function.py +151 -35
  80. pixeltable/func/signature.py +178 -49
  81. pixeltable/func/tools.py +164 -0
  82. pixeltable/func/udf.py +176 -53
  83. pixeltable/functions/__init__.py +44 -4
  84. pixeltable/functions/anthropic.py +226 -47
  85. pixeltable/functions/audio.py +148 -11
  86. pixeltable/functions/bedrock.py +137 -0
  87. pixeltable/functions/date.py +188 -0
  88. pixeltable/functions/deepseek.py +113 -0
  89. pixeltable/functions/document.py +81 -0
  90. pixeltable/functions/fal.py +76 -0
  91. pixeltable/functions/fireworks.py +72 -20
  92. pixeltable/functions/gemini.py +249 -0
  93. pixeltable/functions/globals.py +208 -53
  94. pixeltable/functions/groq.py +108 -0
  95. pixeltable/functions/huggingface.py +1088 -95
  96. pixeltable/functions/image.py +155 -84
  97. pixeltable/functions/json.py +8 -11
  98. pixeltable/functions/llama_cpp.py +31 -19
  99. pixeltable/functions/math.py +169 -0
  100. pixeltable/functions/mistralai.py +50 -75
  101. pixeltable/functions/net.py +70 -0
  102. pixeltable/functions/ollama.py +29 -36
  103. pixeltable/functions/openai.py +548 -160
  104. pixeltable/functions/openrouter.py +143 -0
  105. pixeltable/functions/replicate.py +15 -14
  106. pixeltable/functions/reve.py +250 -0
  107. pixeltable/functions/string.py +310 -85
  108. pixeltable/functions/timestamp.py +37 -19
  109. pixeltable/functions/together.py +77 -120
  110. pixeltable/functions/twelvelabs.py +188 -0
  111. pixeltable/functions/util.py +7 -2
  112. pixeltable/functions/uuid.py +30 -0
  113. pixeltable/functions/video.py +1528 -117
  114. pixeltable/functions/vision.py +26 -26
  115. pixeltable/functions/voyageai.py +289 -0
  116. pixeltable/functions/whisper.py +19 -10
  117. pixeltable/functions/whisperx.py +179 -0
  118. pixeltable/functions/yolox.py +112 -0
  119. pixeltable/globals.py +716 -236
  120. pixeltable/index/__init__.py +3 -1
  121. pixeltable/index/base.py +17 -21
  122. pixeltable/index/btree.py +32 -22
  123. pixeltable/index/embedding_index.py +155 -92
  124. pixeltable/io/__init__.py +12 -7
  125. pixeltable/io/datarows.py +140 -0
  126. pixeltable/io/external_store.py +83 -125
  127. pixeltable/io/fiftyone.py +24 -33
  128. pixeltable/io/globals.py +47 -182
  129. pixeltable/io/hf_datasets.py +96 -127
  130. pixeltable/io/label_studio.py +171 -156
  131. pixeltable/io/lancedb.py +3 -0
  132. pixeltable/io/pandas.py +136 -115
  133. pixeltable/io/parquet.py +40 -153
  134. pixeltable/io/table_data_conduit.py +702 -0
  135. pixeltable/io/utils.py +100 -0
  136. pixeltable/iterators/__init__.py +8 -4
  137. pixeltable/iterators/audio.py +207 -0
  138. pixeltable/iterators/base.py +9 -3
  139. pixeltable/iterators/document.py +144 -87
  140. pixeltable/iterators/image.py +17 -38
  141. pixeltable/iterators/string.py +15 -12
  142. pixeltable/iterators/video.py +523 -127
  143. pixeltable/metadata/__init__.py +33 -8
  144. pixeltable/metadata/converters/convert_10.py +2 -3
  145. pixeltable/metadata/converters/convert_13.py +2 -2
  146. pixeltable/metadata/converters/convert_15.py +15 -11
  147. pixeltable/metadata/converters/convert_16.py +4 -5
  148. pixeltable/metadata/converters/convert_17.py +4 -5
  149. pixeltable/metadata/converters/convert_18.py +4 -6
  150. pixeltable/metadata/converters/convert_19.py +6 -9
  151. pixeltable/metadata/converters/convert_20.py +3 -6
  152. pixeltable/metadata/converters/convert_21.py +6 -8
  153. pixeltable/metadata/converters/convert_22.py +3 -2
  154. pixeltable/metadata/converters/convert_23.py +33 -0
  155. pixeltable/metadata/converters/convert_24.py +55 -0
  156. pixeltable/metadata/converters/convert_25.py +19 -0
  157. pixeltable/metadata/converters/convert_26.py +23 -0
  158. pixeltable/metadata/converters/convert_27.py +29 -0
  159. pixeltable/metadata/converters/convert_28.py +13 -0
  160. pixeltable/metadata/converters/convert_29.py +110 -0
  161. pixeltable/metadata/converters/convert_30.py +63 -0
  162. pixeltable/metadata/converters/convert_31.py +11 -0
  163. pixeltable/metadata/converters/convert_32.py +15 -0
  164. pixeltable/metadata/converters/convert_33.py +17 -0
  165. pixeltable/metadata/converters/convert_34.py +21 -0
  166. pixeltable/metadata/converters/convert_35.py +9 -0
  167. pixeltable/metadata/converters/convert_36.py +38 -0
  168. pixeltable/metadata/converters/convert_37.py +15 -0
  169. pixeltable/metadata/converters/convert_38.py +39 -0
  170. pixeltable/metadata/converters/convert_39.py +124 -0
  171. pixeltable/metadata/converters/convert_40.py +73 -0
  172. pixeltable/metadata/converters/convert_41.py +12 -0
  173. pixeltable/metadata/converters/convert_42.py +9 -0
  174. pixeltable/metadata/converters/convert_43.py +44 -0
  175. pixeltable/metadata/converters/util.py +44 -18
  176. pixeltable/metadata/notes.py +21 -0
  177. pixeltable/metadata/schema.py +185 -42
  178. pixeltable/metadata/utils.py +74 -0
  179. pixeltable/mypy/__init__.py +3 -0
  180. pixeltable/mypy/mypy_plugin.py +123 -0
  181. pixeltable/plan.py +616 -225
  182. pixeltable/share/__init__.py +3 -0
  183. pixeltable/share/packager.py +797 -0
  184. pixeltable/share/protocol/__init__.py +33 -0
  185. pixeltable/share/protocol/common.py +165 -0
  186. pixeltable/share/protocol/operation_types.py +33 -0
  187. pixeltable/share/protocol/replica.py +119 -0
  188. pixeltable/share/publish.py +349 -0
  189. pixeltable/store.py +398 -232
  190. pixeltable/type_system.py +730 -267
  191. pixeltable/utils/__init__.py +40 -0
  192. pixeltable/utils/arrow.py +201 -29
  193. pixeltable/utils/av.py +298 -0
  194. pixeltable/utils/azure_store.py +346 -0
  195. pixeltable/utils/coco.py +26 -27
  196. pixeltable/utils/code.py +4 -4
  197. pixeltable/utils/console_output.py +46 -0
  198. pixeltable/utils/coroutine.py +24 -0
  199. pixeltable/utils/dbms.py +92 -0
  200. pixeltable/utils/description_helper.py +11 -12
  201. pixeltable/utils/documents.py +60 -61
  202. pixeltable/utils/exception_handler.py +36 -0
  203. pixeltable/utils/filecache.py +38 -22
  204. pixeltable/utils/formatter.py +88 -51
  205. pixeltable/utils/gcs_store.py +295 -0
  206. pixeltable/utils/http.py +133 -0
  207. pixeltable/utils/http_server.py +14 -13
  208. pixeltable/utils/iceberg.py +13 -0
  209. pixeltable/utils/image.py +17 -0
  210. pixeltable/utils/lancedb.py +90 -0
  211. pixeltable/utils/local_store.py +322 -0
  212. pixeltable/utils/misc.py +5 -0
  213. pixeltable/utils/object_stores.py +573 -0
  214. pixeltable/utils/pydantic.py +60 -0
  215. pixeltable/utils/pytorch.py +20 -20
  216. pixeltable/utils/s3_store.py +527 -0
  217. pixeltable/utils/sql.py +32 -5
  218. pixeltable/utils/system.py +30 -0
  219. pixeltable/utils/transactional_directory.py +4 -3
  220. pixeltable-0.5.7.dist-info/METADATA +579 -0
  221. pixeltable-0.5.7.dist-info/RECORD +227 -0
  222. {pixeltable-0.2.26.dist-info → pixeltable-0.5.7.dist-info}/WHEEL +1 -1
  223. pixeltable-0.5.7.dist-info/entry_points.txt +2 -0
  224. pixeltable/__version__.py +0 -3
  225. pixeltable/catalog/named_function.py +0 -36
  226. pixeltable/catalog/path_dict.py +0 -141
  227. pixeltable/dataframe.py +0 -894
  228. pixeltable/exec/expr_eval_node.py +0 -232
  229. pixeltable/ext/__init__.py +0 -14
  230. pixeltable/ext/functions/__init__.py +0 -8
  231. pixeltable/ext/functions/whisperx.py +0 -77
  232. pixeltable/ext/functions/yolox.py +0 -157
  233. pixeltable/tool/create_test_db_dump.py +0 -311
  234. pixeltable/tool/create_test_video.py +0 -81
  235. pixeltable/tool/doc_plugins/griffe.py +0 -50
  236. pixeltable/tool/doc_plugins/mkdocstrings.py +0 -6
  237. pixeltable/tool/doc_plugins/templates/material/udf.html.jinja +0 -135
  238. pixeltable/tool/embed_udf.py +0 -9
  239. pixeltable/tool/mypy_plugin.py +0 -55
  240. pixeltable/utils/media_store.py +0 -76
  241. pixeltable/utils/s3.py +0 -16
  242. pixeltable-0.2.26.dist-info/METADATA +0 -400
  243. pixeltable-0.2.26.dist-info/RECORD +0 -156
  244. pixeltable-0.2.26.dist-info/entry_points.txt +0 -3
  245. {pixeltable-0.2.26.dist-info → pixeltable-0.5.7.dist-info/licenses}/LICENSE +0 -0
@@ -0,0 +1,188 @@
1
+ """
2
+ Pixeltable UDFs for `DateType`.
3
+
4
+ Usage example:
5
+ ```python
6
+ import pixeltable as pxt
7
+
8
+ t = pxt.get_table(...)
9
+ t.select(t.date_col.year, t.date_col.weekday()).collect()
10
+ ```
11
+ """
12
+
13
+ from datetime import date, timedelta
14
+
15
+ import sqlalchemy as sql
16
+
17
+ import pixeltable as pxt
18
+ from pixeltable.utils.code import local_public_names
19
+
20
+ _SQL_ZERO = sql.literal(0)
21
+
22
+ # NOT YET SUPPORTED date +/- integer
23
+ # NOT YET SUPPORTED date1 - date2 -> integer
24
+ # NOT YET SUPPORTED timestamp(date)
25
+ # NOT YET SUPPORTED date(timestamp)
26
+
27
+
28
+ @pxt.udf(is_property=True)
29
+ def year(self: date) -> int:
30
+ """
31
+ Between 1 and 9999 inclusive.
32
+
33
+ (Between [`MINYEAR`](https://docs.python.org/3/library/datetime.html#datetime.MINYEAR) and
34
+ [`MAXYEAR`](https://docs.python.org/3/library/datetime.html#datetime.MAXYEAR) as defined by the Python `datetime`
35
+ library).
36
+
37
+ Equivalent to [`date.year`](https://docs.python.org/3/library/datetime.html#datetime.date.year).
38
+ """
39
+ return self.year
40
+
41
+
42
+ @year.to_sql
43
+ def _(self: sql.ColumnElement) -> sql.ColumnElement:
44
+ return sql.extract('year', self)
45
+
46
+
47
+ @pxt.udf(is_property=True)
48
+ def month(self: date) -> int:
49
+ """
50
+ Between 1 and 12 inclusive.
51
+
52
+ Equivalent to [`date.month`](https://docs.python.org/3/library/datetime.html#datetime.date.month).
53
+ """
54
+ return self.month
55
+
56
+
57
+ @month.to_sql
58
+ def _(self: sql.ColumnElement) -> sql.ColumnElement:
59
+ return sql.extract('month', self)
60
+
61
+
62
+ @pxt.udf(is_property=True)
63
+ def day(self: date) -> int:
64
+ """
65
+ Between 1 and the number of days in the given month of the given year.
66
+
67
+ Equivalent to [`date.day`](https://docs.python.org/3/library/datetime.html#datetime.date.day).
68
+ """
69
+ return self.day
70
+
71
+
72
+ @day.to_sql
73
+ def _(self: sql.ColumnElement) -> sql.ColumnElement:
74
+ return sql.extract('day', self)
75
+
76
+
77
+ @pxt.udf(is_method=True)
78
+ def make_date(year: int, month: int, day: int) -> date:
79
+ """
80
+ Create a date.
81
+
82
+ Equivalent to [`datetime()`](https://docs.python.org/3/library/datetime.html#datetime.date).
83
+ """
84
+ return date(year, month, day)
85
+
86
+
87
+ @make_date.to_sql
88
+ def _(year: sql.ColumnElement, month: sql.ColumnElement, day: sql.ColumnElement) -> sql.ColumnElement:
89
+ return sql.func.make_date(year.cast(sql.Integer), month.cast(sql.Integer), day.cast(sql.Integer))
90
+
91
+
92
+ @pxt.udf(is_method=True)
93
+ def weekday(self: date) -> int:
94
+ """
95
+ Between 0 (Monday) and 6 (Sunday) inclusive.
96
+
97
+ Equivalent to [`date.weekday()`](https://docs.python.org/3/library/datetime.html#datetime.date.weekday).
98
+ """
99
+ return self.weekday()
100
+
101
+
102
+ @weekday.to_sql
103
+ def _(self: sql.ColumnElement) -> sql.ColumnElement:
104
+ return sql.extract('isodow', self) - 1
105
+
106
+
107
+ @pxt.udf(is_method=True)
108
+ def isoweekday(self: date) -> int:
109
+ """
110
+ Return the day of the week as an integer, where Monday is 1 and Sunday is 7.
111
+
112
+ Equivalent to [`date.isoweekday()`](https://docs.python.org/3/library/datetime.html#datetime.date.isoweekday).
113
+ """
114
+ return self.isoweekday()
115
+
116
+
117
+ @isoweekday.to_sql
118
+ def _(self: sql.ColumnElement) -> sql.ColumnElement:
119
+ return sql.extract('isodow', self)
120
+
121
+
122
+ @pxt.udf(is_method=True)
123
+ def isocalendar(self: date) -> dict:
124
+ """
125
+ Return a dictionary with three entries: `'year'`, `'week'`, and `'weekday'`.
126
+
127
+ Equivalent to
128
+ [`date.isocalendar()`](https://docs.python.org/3/library/datetime.html#datetime.date.isocalendar).
129
+ """
130
+ iso_year, iso_week, iso_weekday = self.isocalendar()
131
+ return {'year': iso_year, 'week': iso_week, 'weekday': iso_weekday}
132
+
133
+
134
+ @pxt.udf(is_method=True)
135
+ def isoformat(self: date, sep: str = 'T', timespec: str = 'auto') -> str:
136
+ """
137
+ Return a string representing the date and time in ISO 8601 format.
138
+
139
+ Equivalent to [`date.isoformat()`](https://docs.python.org/3/library/datetime.html#datetime.date.isoformat).
140
+
141
+ Args:
142
+ sep: Separator between date and time.
143
+ timespec: The number of additional terms in the output. See the
144
+ [`date.isoformat()`](https://docs.python.org/3/library/datetime.html#datetime.date.isoformat)
145
+ documentation for more details.
146
+ """
147
+ return self.isoformat()
148
+
149
+
150
+ @pxt.udf(is_method=True)
151
+ def toordinal(self: date) -> int:
152
+ """
153
+ Return the proleptic Gregorian ordinal of the date, where January 1 of year 1 has ordinal 1.
154
+
155
+ Equivalent to [`date.toordinal()`](https://docs.python.org/3/library/datetime.html#datetime.date.toordinal).
156
+ """
157
+ return self.toordinal()
158
+
159
+
160
+ @pxt.udf(is_method=True)
161
+ def strftime(self: date, format: str) -> str:
162
+ """
163
+ Return a string representing the date and time, controlled by an explicit format string.
164
+
165
+ Equivalent to [`date.strftime()`](https://docs.python.org/3/library/datetime.html#datetime.date.strftime).
166
+
167
+ Args:
168
+ format: The format string to control the output. For a complete list of formatting directives, see
169
+ [`strftime()` and `strptime()` Behavior](https://docs.python.org/3/library/datetime.html#strftime-and-strptime-behavior).
170
+ """
171
+ return self.strftime(format)
172
+
173
+
174
+ @pxt.udf(is_method=True)
175
+ def add_days(self: date, n: int) -> date:
176
+ """
177
+ Add `n` days to the date.
178
+
179
+ Equivalent to [`date + timedelta(days=n)`](https://docs.python.org/3/library/datetime.html#datetime.timedelta).
180
+ """
181
+ return self + timedelta(days=n)
182
+
183
+
184
+ __all__ = local_public_names(__name__)
185
+
186
+
187
+ def __dir__() -> list[str]:
188
+ return __all__
@@ -0,0 +1,113 @@
1
+ """
2
+ Pixeltable UDFs for Deepseek AI models.
3
+
4
+ Provides integration with Deepseek's language models for chat completions
5
+ and other AI capabilities.
6
+ """
7
+
8
+ import json
9
+ from typing import TYPE_CHECKING, Any
10
+
11
+ import httpx
12
+
13
+ import pixeltable as pxt
14
+ from pixeltable import env
15
+ from pixeltable.utils.code import local_public_names
16
+
17
+ if TYPE_CHECKING:
18
+ import openai
19
+
20
+
21
+ @env.register_client('deepseek')
22
+ def _(api_key: str) -> 'openai.AsyncOpenAI':
23
+ import openai
24
+
25
+ return openai.AsyncOpenAI(
26
+ api_key=api_key,
27
+ base_url='https://api.deepseek.com',
28
+ http_client=httpx.AsyncClient(limits=httpx.Limits(max_keepalive_connections=100, max_connections=500)),
29
+ )
30
+
31
+
32
+ def _deepseek_client() -> 'openai.AsyncOpenAI':
33
+ return env.Env.get().get_client('deepseek')
34
+
35
+
36
+ @pxt.udf(resource_pool='request-rate:deepseek')
37
+ async def chat_completions(
38
+ messages: list,
39
+ *,
40
+ model: str,
41
+ model_kwargs: dict[str, Any] | None = None,
42
+ tools: list[dict[str, Any]] | None = None,
43
+ tool_choice: dict[str, Any] | None = None,
44
+ ) -> dict:
45
+ """
46
+ Creates a model response for the given chat conversation.
47
+
48
+ Equivalent to the Deepseek `chat/completions` API endpoint.
49
+ For additional details, see: <https://api-docs.deepseek.com/api/create-chat-completion>
50
+
51
+ Deepseek uses the OpenAI SDK, so you will need to install the `openai` package to use this UDF.
52
+
53
+ Request throttling:
54
+ Applies the rate limit set in the config (section `deepseek`, key `rate_limit`). If no rate
55
+ limit is configured, uses a default of 600 RPM.
56
+
57
+ __Requirements:__
58
+
59
+ - `pip install openai`
60
+
61
+ Args:
62
+ messages: A list of messages to use for chat completion, as described in the Deepseek API documentation.
63
+ model: The model to use for chat completion.
64
+ model_kwargs: Additional keyword args for the Deepseek `chat/completions` API.
65
+ For details on the available parameters, see: <https://api-docs.deepseek.com/api/create-chat-completion>
66
+ tools: An optional list of Pixeltable tools to use for the request.
67
+ tool_choice: An optional tool choice configuration.
68
+
69
+ Returns:
70
+ A dictionary containing the response and other metadata.
71
+
72
+ Examples:
73
+ Add a computed column that applies the model `deepseek-chat` to an existing Pixeltable column `tbl.prompt`
74
+ of the table `tbl`:
75
+
76
+ >>> messages = [
77
+ ... {'role': 'system', 'content': 'You are a helpful assistant.'},
78
+ ... {'role': 'user', 'content': tbl.prompt}
79
+ ... ]
80
+ >>> tbl.add_computed_column(response=chat_completions(messages, model='deepseek-chat'))
81
+ """
82
+ if model_kwargs is None:
83
+ model_kwargs = {}
84
+
85
+ if tools is not None:
86
+ model_kwargs['tools'] = [{'type': 'function', 'function': tool} for tool in tools]
87
+
88
+ if tool_choice is not None:
89
+ if tool_choice['auto']:
90
+ model_kwargs['tool_choice'] = 'auto'
91
+ elif tool_choice['required']:
92
+ model_kwargs['tool_choice'] = 'required'
93
+ else:
94
+ assert tool_choice['tool'] is not None
95
+ model_kwargs['tool_choice'] = {'type': 'function', 'function': {'name': tool_choice['tool']}}
96
+
97
+ if tool_choice is not None and not tool_choice['parallel_tool_calls']:
98
+ if 'extra_body' not in model_kwargs:
99
+ model_kwargs['extra_body'] = {}
100
+ model_kwargs['extra_body']['parallel_tool_calls'] = False
101
+
102
+ result = await _deepseek_client().chat.completions.with_raw_response.create(
103
+ messages=messages, model=model, **model_kwargs
104
+ )
105
+
106
+ return json.loads(result.text)
107
+
108
+
109
+ __all__ = local_public_names(__name__)
110
+
111
+
112
+ def __dir__() -> list[str]:
113
+ return __all__
@@ -0,0 +1,81 @@
1
+ """
2
+ Pixeltable UDFs for `DocumentType`.
3
+ """
4
+
5
+ from typing import Any, Literal
6
+
7
+ import pixeltable as pxt
8
+
9
+
10
+ def document_splitter(
11
+ document: Any,
12
+ separators: str,
13
+ *,
14
+ elements: list[Literal['text', 'image']] | None = None,
15
+ limit: int | None = None,
16
+ overlap: int | None = None,
17
+ metadata: str = '',
18
+ skip_tags: list[str] | None = None,
19
+ tiktoken_encoding: str | None = 'cl100k_base',
20
+ tiktoken_target_model: str | None = None,
21
+ image_dpi: int = 300,
22
+ image_format: str = 'png',
23
+ ) -> tuple[type[pxt.iterators.ComponentIterator], dict[str, Any]]:
24
+ """Iterator over chunks of a document. The document is chunked according to the specified `separators`.
25
+
26
+ The iterator yields a `text` field containing the text of the chunk, and it may also
27
+ include additional metadata fields if specified in the `metadata` parameter, as explained below.
28
+
29
+ Chunked text will be cleaned with `ftfy.fix_text` to fix up common problems with unicode sequences.
30
+
31
+ Args:
32
+ separators: separators to use to chunk the document. Options are:
33
+ `'heading'`, `'paragraph'`, `'sentence'`, `'token_limit'`, `'char_limit'`, `'page'`.
34
+ This may be a comma-separated string, e.g., `'heading,token_limit'`.
35
+ elements: list of elements to extract from the document. Options are:
36
+ `'text'`, `'image'`. Defaults to `['text']` if not specified. The `'image'` element is only supported
37
+ for the `'page'` separator on PDF documents.
38
+ limit: the maximum number of tokens or characters in each chunk, if `'token_limit'`
39
+ or `'char_limit'` is specified.
40
+ metadata: additional metadata fields to include in the output. Options are:
41
+ `'title'`, `'heading'` (HTML and Markdown), `'sourceline'` (HTML), `'page'` (PDF), `'bounding_box'`
42
+ (PDF). The input may be a comma-separated string, e.g., `'title,heading,sourceline'`.
43
+ image_dpi: DPI to use when extracting images from PDFs. Defaults to 300.
44
+ image_format: format to use when extracting images from PDFs. Defaults to 'png'.
45
+
46
+ Examples:
47
+ All these examples assume an existing table `tbl` with a column `doc` of type `pxt.Document`.
48
+
49
+ Create a view that splits all documents into chunks of up to 300 tokens:
50
+
51
+ >>> pxt.create_view('chunks', tbl, iterator=document_splitter(tbl.doc, separators='token_limit', limit=300))
52
+
53
+ Create a view that splits all documents along sentence boundaries, including title and heading metadata:
54
+
55
+ >>> pxt.create_view(
56
+ ... 'sentence_chunks',
57
+ ... tbl,
58
+ ... iterator=document_splitter(tbl.doc, separators='sentence', metadata='title,heading')
59
+ ... )
60
+ """
61
+
62
+ kwargs: dict[str, Any] = {}
63
+ if elements is not None:
64
+ kwargs['elements'] = elements
65
+ if limit is not None:
66
+ kwargs['limit'] = limit
67
+ if overlap is not None:
68
+ kwargs['overlap'] = overlap
69
+ if metadata != '':
70
+ kwargs['metadata'] = metadata
71
+ if skip_tags is not None:
72
+ kwargs['skip_tags'] = skip_tags
73
+ if tiktoken_encoding != 'cl100k_base':
74
+ kwargs['tiktoken_encoding'] = tiktoken_encoding
75
+ if tiktoken_target_model is not None:
76
+ kwargs['tiktoken_target_model'] = tiktoken_target_model
77
+ if image_dpi != 300:
78
+ kwargs['image_dpi'] = image_dpi
79
+ if image_format != 'png':
80
+ kwargs['image_format'] = image_format
81
+ return pxt.iterators.document.DocumentSplitter._create(document=document, separators=separators, **kwargs)
@@ -0,0 +1,76 @@
1
+ """
2
+ Pixeltable UDFs
3
+ that wrap various endpoints from the fal.ai API. In order to use them, you must
4
+ first `pip install fal-client` and configure your fal.ai credentials, as described in
5
+ the [Working with fal.ai](https://docs.pixeltable.com/notebooks/integrations/working-with-fal) tutorial.
6
+ """
7
+
8
+ from typing import TYPE_CHECKING, Any
9
+
10
+ import pixeltable as pxt
11
+ from pixeltable.env import Env, register_client
12
+ from pixeltable.utils.code import local_public_names
13
+
14
+ if TYPE_CHECKING:
15
+ import fal_client
16
+
17
+
18
+ @register_client('fal')
19
+ def _(api_key: str) -> 'fal_client.AsyncClient':
20
+ import fal_client
21
+
22
+ return fal_client.AsyncClient(key=api_key)
23
+
24
+
25
+ def _fal_client() -> 'fal_client.AsyncClient':
26
+ return Env.get().get_client('fal')
27
+
28
+
29
+ @pxt.udf(resource_pool='request-rate:fal')
30
+ async def run(input: dict[str, Any], *, app: str) -> pxt.Json:
31
+ """
32
+ Run a model on fal.ai.
33
+
34
+ Uses fal's queue-based subscribe mechanism for reliable execution.
35
+ For additional details, see: <https://fal.ai/docs>
36
+
37
+ Request throttling:
38
+ Applies the rate limit set in the config (section `fal`, key `rate_limit`). If no rate
39
+ limit is configured, uses a default of 600 RPM.
40
+
41
+ __Requirements:__
42
+
43
+ - `pip install fal-client`
44
+
45
+ Args:
46
+ input: The input parameters for the model.
47
+ app: The name or ID of the fal.ai application to run (e.g., 'fal-ai/flux/schnell').
48
+
49
+ Returns:
50
+ The output of the model as a JSON object.
51
+
52
+ Examples:
53
+ Add a computed column that applies the model `fal-ai/flux/schnell`
54
+ to an existing Pixeltable column `tbl.prompt` of the table `tbl`:
55
+
56
+ >>> input = {'prompt': tbl.prompt}
57
+ ... tbl.add_computed_column(response=run(input, app='fal-ai/flux/schnell'))
58
+
59
+ Add a computed column that uses the model `fal-ai/fast-sdxl`
60
+ to generate images from an existing Pixeltable column `tbl.prompt`:
61
+
62
+ >>> input = {'prompt': tbl.prompt, 'image_size': 'square', 'num_inference_steps': 25}
63
+ ... tbl.add_computed_column(response=run(input, app='fal-ai/fast-sdxl'))
64
+ ... tbl.add_computed_column(image=tbl.response['images'][0]['url'].astype(pxt.Image))
65
+ """
66
+ Env.get().require_package('fal_client')
67
+ client = _fal_client()
68
+ result = await client.subscribe(app, arguments=input)
69
+ return result
70
+
71
+
72
+ __all__ = local_public_names(__name__)
73
+
74
+
75
+ def __dir__() -> list[str]:
76
+ return __all__
@@ -1,14 +1,15 @@
1
1
  """
2
- Pixeltable [UDFs](https://pixeltable.readme.io/docs/user-defined-functions-udfs)
2
+ Pixeltable UDFs
3
3
  that wrap various endpoints from the Fireworks AI API. In order to use them, you must
4
4
  first `pip install fireworks-ai` and configure your Fireworks AI credentials, as described in
5
- the [Working with Fireworks](https://pixeltable.readme.io/docs/working-with-fireworks) tutorial.
5
+ the [Working with Fireworks](https://docs.pixeltable.com/notebooks/integrations/working-with-fireworks) tutorial.
6
6
  """
7
7
 
8
- from typing import Optional, TYPE_CHECKING
8
+ from typing import TYPE_CHECKING, Any
9
9
 
10
10
  import pixeltable as pxt
11
11
  from pixeltable import env
12
+ from pixeltable.config import Config
12
13
  from pixeltable.utils.code import local_public_names
13
14
 
14
15
  if TYPE_CHECKING:
@@ -26,21 +27,19 @@ def _fireworks_client() -> 'fireworks.client.Fireworks':
26
27
  return env.Env.get().get_client('fireworks')
27
28
 
28
29
 
29
- @pxt.udf
30
- def chat_completions(
31
- messages: list[dict[str, str]],
32
- *,
33
- model: str,
34
- max_tokens: Optional[int] = None,
35
- top_k: Optional[int] = None,
36
- top_p: Optional[float] = None,
37
- temperature: Optional[float] = None,
30
+ @pxt.udf(resource_pool='request-rate:fireworks')
31
+ async def chat_completions(
32
+ messages: list[dict[str, str]], *, model: str, model_kwargs: dict[str, Any] | None = None
38
33
  ) -> dict:
39
34
  """
40
35
  Creates a model response for the given chat conversation.
41
36
 
42
37
  Equivalent to the Fireworks AI `chat/completions` API endpoint.
43
- For additional details, see: [https://docs.fireworks.ai/api-reference/post-chatcompletions](https://docs.fireworks.ai/api-reference/post-chatcompletions)
38
+ For additional details, see: <https://docs.fireworks.ai/api-reference/post-chatcompletions>
39
+
40
+ Request throttling:
41
+ Applies the rate limit set in the config (section `fireworks`, key `rate_limit`). If no rate
42
+ limit is configured, uses a default of 600 RPM.
44
43
 
45
44
  __Requirements:__
46
45
 
@@ -49,8 +48,8 @@ def chat_completions(
49
48
  Args:
50
49
  messages: A list of messages comprising the conversation so far.
51
50
  model: The name of the model to use.
52
-
53
- For details on the other parameters, see: [https://docs.fireworks.ai/api-reference/post-chatcompletions](https://docs.fireworks.ai/api-reference/post-chatcompletions)
51
+ model_kwargs: Additional keyword args for the Fireworks `chat_completions` API. For details on the available
52
+ parameters, see: <https://docs.fireworks.ai/api-reference/post-chatcompletions>
54
53
 
55
54
  Returns:
56
55
  A dictionary containing the response and other metadata.
@@ -60,15 +59,68 @@ def chat_completions(
60
59
  to an existing Pixeltable column `tbl.prompt` of the table `tbl`:
61
60
 
62
61
  >>> messages = [{'role': 'user', 'content': tbl.prompt}]
63
- ... tbl['response'] = chat_completions(messages, model='accounts/fireworks/models/mixtral-8x22b-instruct')
62
+ ... tbl.add_computed_column(
63
+ ... response=chat_completions(messages, model='accounts/fireworks/models/mixtral-8x22b-instruct')
64
+ ... )
64
65
  """
65
- kwargs = {'max_tokens': max_tokens, 'top_k': top_k, 'top_p': top_p, 'temperature': temperature}
66
- kwargs_not_none = {k: v for k, v in kwargs.items() if v is not None}
67
- return _fireworks_client().chat.completions.create(model=model, messages=messages, **kwargs_not_none).dict()
66
+ if model_kwargs is None:
67
+ model_kwargs = {}
68
+
69
+ # for debugging purposes:
70
+ # res_sync = _fireworks_client().chat.completions.create(model=model, messages=messages, **kwargs_not_none)
71
+ # res_sync_dict = res_sync.dict()
72
+
73
+ if 'request_timeout' not in model_kwargs:
74
+ model_kwargs['request_timeout'] = Config.get().get_int_value('timeout', section='fireworks') or 600
75
+ # TODO: this timeout doesn't really work, I think it only applies to returning the stream, but not to the timing
76
+ # of the chunks; addressing this would require a timeout for the task running this udf
77
+ stream = _fireworks_client().chat.completions.acreate(model=model, messages=messages, **model_kwargs)
78
+ chunks = []
79
+ async for chunk in stream:
80
+ chunks.append(chunk)
81
+
82
+ res = {
83
+ 'id': chunks[0].id,
84
+ 'object': 'chat.completion',
85
+ 'created': chunks[0].created,
86
+ 'model': chunks[0].model,
87
+ 'choices': [
88
+ {
89
+ 'index': 0,
90
+ 'message': {
91
+ 'role': None,
92
+ 'content': '',
93
+ 'tool_calls': None,
94
+ 'tool_call_id': None,
95
+ 'function': None,
96
+ 'name': None,
97
+ },
98
+ 'finish_reason': None,
99
+ 'logprobs': None,
100
+ 'raw_output': None,
101
+ }
102
+ ],
103
+ 'usage': {},
104
+ }
105
+ for chunk in chunks:
106
+ d = chunk.dict()
107
+ if 'usage' in d and d['usage'] is not None:
108
+ res['usage'] = d['usage']
109
+ if chunk.choices[0].finish_reason is not None:
110
+ res['choices'][0]['finish_reason'] = chunk.choices[0].finish_reason
111
+ if chunk.choices[0].delta.role is not None:
112
+ res['choices'][0]['message']['role'] = chunk.choices[0].delta.role
113
+ if chunk.choices[0].delta.content is not None:
114
+ res['choices'][0]['message']['content'] += chunk.choices[0].delta.content
115
+ if chunk.choices[0].delta.tool_calls is not None:
116
+ res['choices'][0]['message']['tool_calls'] = chunk.choices[0].delta.tool_calls
117
+ if chunk.choices[0].delta.function is not None:
118
+ res['choices'][0]['message']['function'] = chunk.choices[0].delta.function
119
+ return res
68
120
 
69
121
 
70
122
  __all__ = local_public_names(__name__)
71
123
 
72
124
 
73
- def __dir__():
125
+ def __dir__() -> list[str]:
74
126
  return __all__