pixeltable 0.3.14__py3-none-any.whl → 0.5.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (220) hide show
  1. pixeltable/__init__.py +42 -8
  2. pixeltable/{dataframe.py → _query.py} +470 -206
  3. pixeltable/_version.py +1 -0
  4. pixeltable/catalog/__init__.py +5 -4
  5. pixeltable/catalog/catalog.py +1785 -432
  6. pixeltable/catalog/column.py +190 -113
  7. pixeltable/catalog/dir.py +2 -4
  8. pixeltable/catalog/globals.py +19 -46
  9. pixeltable/catalog/insertable_table.py +191 -98
  10. pixeltable/catalog/path.py +63 -23
  11. pixeltable/catalog/schema_object.py +11 -15
  12. pixeltable/catalog/table.py +843 -436
  13. pixeltable/catalog/table_metadata.py +103 -0
  14. pixeltable/catalog/table_version.py +978 -657
  15. pixeltable/catalog/table_version_handle.py +72 -16
  16. pixeltable/catalog/table_version_path.py +112 -43
  17. pixeltable/catalog/tbl_ops.py +53 -0
  18. pixeltable/catalog/update_status.py +191 -0
  19. pixeltable/catalog/view.py +134 -90
  20. pixeltable/config.py +134 -22
  21. pixeltable/env.py +471 -157
  22. pixeltable/exceptions.py +6 -0
  23. pixeltable/exec/__init__.py +4 -1
  24. pixeltable/exec/aggregation_node.py +7 -8
  25. pixeltable/exec/cache_prefetch_node.py +83 -110
  26. pixeltable/exec/cell_materialization_node.py +268 -0
  27. pixeltable/exec/cell_reconstruction_node.py +168 -0
  28. pixeltable/exec/component_iteration_node.py +4 -3
  29. pixeltable/exec/data_row_batch.py +8 -65
  30. pixeltable/exec/exec_context.py +16 -4
  31. pixeltable/exec/exec_node.py +13 -36
  32. pixeltable/exec/expr_eval/evaluators.py +11 -7
  33. pixeltable/exec/expr_eval/expr_eval_node.py +27 -12
  34. pixeltable/exec/expr_eval/globals.py +8 -5
  35. pixeltable/exec/expr_eval/row_buffer.py +1 -2
  36. pixeltable/exec/expr_eval/schedulers.py +106 -56
  37. pixeltable/exec/globals.py +35 -0
  38. pixeltable/exec/in_memory_data_node.py +19 -19
  39. pixeltable/exec/object_store_save_node.py +293 -0
  40. pixeltable/exec/row_update_node.py +16 -9
  41. pixeltable/exec/sql_node.py +351 -84
  42. pixeltable/exprs/__init__.py +1 -1
  43. pixeltable/exprs/arithmetic_expr.py +27 -22
  44. pixeltable/exprs/array_slice.py +3 -3
  45. pixeltable/exprs/column_property_ref.py +36 -23
  46. pixeltable/exprs/column_ref.py +213 -89
  47. pixeltable/exprs/comparison.py +5 -5
  48. pixeltable/exprs/compound_predicate.py +5 -4
  49. pixeltable/exprs/data_row.py +164 -54
  50. pixeltable/exprs/expr.py +70 -44
  51. pixeltable/exprs/expr_dict.py +3 -3
  52. pixeltable/exprs/expr_set.py +17 -10
  53. pixeltable/exprs/function_call.py +100 -40
  54. pixeltable/exprs/globals.py +2 -2
  55. pixeltable/exprs/in_predicate.py +4 -4
  56. pixeltable/exprs/inline_expr.py +18 -32
  57. pixeltable/exprs/is_null.py +7 -3
  58. pixeltable/exprs/json_mapper.py +8 -8
  59. pixeltable/exprs/json_path.py +56 -22
  60. pixeltable/exprs/literal.py +27 -5
  61. pixeltable/exprs/method_ref.py +2 -2
  62. pixeltable/exprs/object_ref.py +2 -2
  63. pixeltable/exprs/row_builder.py +167 -67
  64. pixeltable/exprs/rowid_ref.py +25 -10
  65. pixeltable/exprs/similarity_expr.py +58 -40
  66. pixeltable/exprs/sql_element_cache.py +4 -4
  67. pixeltable/exprs/string_op.py +5 -5
  68. pixeltable/exprs/type_cast.py +3 -5
  69. pixeltable/func/__init__.py +1 -0
  70. pixeltable/func/aggregate_function.py +8 -8
  71. pixeltable/func/callable_function.py +9 -9
  72. pixeltable/func/expr_template_function.py +17 -11
  73. pixeltable/func/function.py +18 -20
  74. pixeltable/func/function_registry.py +6 -7
  75. pixeltable/func/globals.py +2 -3
  76. pixeltable/func/mcp.py +74 -0
  77. pixeltable/func/query_template_function.py +29 -27
  78. pixeltable/func/signature.py +46 -19
  79. pixeltable/func/tools.py +31 -13
  80. pixeltable/func/udf.py +18 -20
  81. pixeltable/functions/__init__.py +16 -0
  82. pixeltable/functions/anthropic.py +123 -77
  83. pixeltable/functions/audio.py +147 -10
  84. pixeltable/functions/bedrock.py +13 -6
  85. pixeltable/functions/date.py +7 -4
  86. pixeltable/functions/deepseek.py +35 -43
  87. pixeltable/functions/document.py +81 -0
  88. pixeltable/functions/fal.py +76 -0
  89. pixeltable/functions/fireworks.py +11 -20
  90. pixeltable/functions/gemini.py +195 -39
  91. pixeltable/functions/globals.py +142 -14
  92. pixeltable/functions/groq.py +108 -0
  93. pixeltable/functions/huggingface.py +1056 -24
  94. pixeltable/functions/image.py +115 -57
  95. pixeltable/functions/json.py +1 -1
  96. pixeltable/functions/llama_cpp.py +28 -13
  97. pixeltable/functions/math.py +67 -5
  98. pixeltable/functions/mistralai.py +18 -55
  99. pixeltable/functions/net.py +70 -0
  100. pixeltable/functions/ollama.py +20 -13
  101. pixeltable/functions/openai.py +240 -226
  102. pixeltable/functions/openrouter.py +143 -0
  103. pixeltable/functions/replicate.py +4 -4
  104. pixeltable/functions/reve.py +250 -0
  105. pixeltable/functions/string.py +239 -69
  106. pixeltable/functions/timestamp.py +16 -16
  107. pixeltable/functions/together.py +24 -84
  108. pixeltable/functions/twelvelabs.py +188 -0
  109. pixeltable/functions/util.py +6 -1
  110. pixeltable/functions/uuid.py +30 -0
  111. pixeltable/functions/video.py +1515 -107
  112. pixeltable/functions/vision.py +8 -8
  113. pixeltable/functions/voyageai.py +289 -0
  114. pixeltable/functions/whisper.py +16 -8
  115. pixeltable/functions/whisperx.py +179 -0
  116. pixeltable/{ext/functions → functions}/yolox.py +2 -4
  117. pixeltable/globals.py +362 -115
  118. pixeltable/index/base.py +17 -21
  119. pixeltable/index/btree.py +28 -22
  120. pixeltable/index/embedding_index.py +100 -118
  121. pixeltable/io/__init__.py +4 -2
  122. pixeltable/io/datarows.py +8 -7
  123. pixeltable/io/external_store.py +56 -105
  124. pixeltable/io/fiftyone.py +13 -13
  125. pixeltable/io/globals.py +31 -30
  126. pixeltable/io/hf_datasets.py +61 -16
  127. pixeltable/io/label_studio.py +74 -70
  128. pixeltable/io/lancedb.py +3 -0
  129. pixeltable/io/pandas.py +21 -12
  130. pixeltable/io/parquet.py +25 -105
  131. pixeltable/io/table_data_conduit.py +250 -123
  132. pixeltable/io/utils.py +4 -4
  133. pixeltable/iterators/__init__.py +2 -1
  134. pixeltable/iterators/audio.py +26 -25
  135. pixeltable/iterators/base.py +9 -3
  136. pixeltable/iterators/document.py +112 -78
  137. pixeltable/iterators/image.py +12 -15
  138. pixeltable/iterators/string.py +11 -4
  139. pixeltable/iterators/video.py +523 -120
  140. pixeltable/metadata/__init__.py +14 -3
  141. pixeltable/metadata/converters/convert_13.py +2 -2
  142. pixeltable/metadata/converters/convert_18.py +2 -2
  143. pixeltable/metadata/converters/convert_19.py +2 -2
  144. pixeltable/metadata/converters/convert_20.py +2 -2
  145. pixeltable/metadata/converters/convert_21.py +2 -2
  146. pixeltable/metadata/converters/convert_22.py +2 -2
  147. pixeltable/metadata/converters/convert_24.py +2 -2
  148. pixeltable/metadata/converters/convert_25.py +2 -2
  149. pixeltable/metadata/converters/convert_26.py +2 -2
  150. pixeltable/metadata/converters/convert_29.py +4 -4
  151. pixeltable/metadata/converters/convert_30.py +34 -21
  152. pixeltable/metadata/converters/convert_34.py +2 -2
  153. pixeltable/metadata/converters/convert_35.py +9 -0
  154. pixeltable/metadata/converters/convert_36.py +38 -0
  155. pixeltable/metadata/converters/convert_37.py +15 -0
  156. pixeltable/metadata/converters/convert_38.py +39 -0
  157. pixeltable/metadata/converters/convert_39.py +124 -0
  158. pixeltable/metadata/converters/convert_40.py +73 -0
  159. pixeltable/metadata/converters/convert_41.py +12 -0
  160. pixeltable/metadata/converters/convert_42.py +9 -0
  161. pixeltable/metadata/converters/convert_43.py +44 -0
  162. pixeltable/metadata/converters/util.py +20 -31
  163. pixeltable/metadata/notes.py +9 -0
  164. pixeltable/metadata/schema.py +140 -53
  165. pixeltable/metadata/utils.py +74 -0
  166. pixeltable/mypy/__init__.py +3 -0
  167. pixeltable/mypy/mypy_plugin.py +123 -0
  168. pixeltable/plan.py +382 -115
  169. pixeltable/share/__init__.py +1 -1
  170. pixeltable/share/packager.py +547 -83
  171. pixeltable/share/protocol/__init__.py +33 -0
  172. pixeltable/share/protocol/common.py +165 -0
  173. pixeltable/share/protocol/operation_types.py +33 -0
  174. pixeltable/share/protocol/replica.py +119 -0
  175. pixeltable/share/publish.py +257 -59
  176. pixeltable/store.py +311 -194
  177. pixeltable/type_system.py +373 -211
  178. pixeltable/utils/__init__.py +2 -3
  179. pixeltable/utils/arrow.py +131 -17
  180. pixeltable/utils/av.py +298 -0
  181. pixeltable/utils/azure_store.py +346 -0
  182. pixeltable/utils/coco.py +6 -6
  183. pixeltable/utils/code.py +3 -3
  184. pixeltable/utils/console_output.py +4 -1
  185. pixeltable/utils/coroutine.py +6 -23
  186. pixeltable/utils/dbms.py +32 -6
  187. pixeltable/utils/description_helper.py +4 -5
  188. pixeltable/utils/documents.py +7 -18
  189. pixeltable/utils/exception_handler.py +7 -30
  190. pixeltable/utils/filecache.py +6 -6
  191. pixeltable/utils/formatter.py +86 -48
  192. pixeltable/utils/gcs_store.py +295 -0
  193. pixeltable/utils/http.py +133 -0
  194. pixeltable/utils/http_server.py +2 -3
  195. pixeltable/utils/iceberg.py +1 -2
  196. pixeltable/utils/image.py +17 -0
  197. pixeltable/utils/lancedb.py +90 -0
  198. pixeltable/utils/local_store.py +322 -0
  199. pixeltable/utils/misc.py +5 -0
  200. pixeltable/utils/object_stores.py +573 -0
  201. pixeltable/utils/pydantic.py +60 -0
  202. pixeltable/utils/pytorch.py +5 -6
  203. pixeltable/utils/s3_store.py +527 -0
  204. pixeltable/utils/sql.py +26 -0
  205. pixeltable/utils/system.py +30 -0
  206. pixeltable-0.5.7.dist-info/METADATA +579 -0
  207. pixeltable-0.5.7.dist-info/RECORD +227 -0
  208. {pixeltable-0.3.14.dist-info → pixeltable-0.5.7.dist-info}/WHEEL +1 -1
  209. pixeltable-0.5.7.dist-info/entry_points.txt +2 -0
  210. pixeltable/__version__.py +0 -3
  211. pixeltable/catalog/named_function.py +0 -40
  212. pixeltable/ext/__init__.py +0 -17
  213. pixeltable/ext/functions/__init__.py +0 -11
  214. pixeltable/ext/functions/whisperx.py +0 -77
  215. pixeltable/utils/media_store.py +0 -77
  216. pixeltable/utils/s3.py +0 -17
  217. pixeltable-0.3.14.dist-info/METADATA +0 -434
  218. pixeltable-0.3.14.dist-info/RECORD +0 -186
  219. pixeltable-0.3.14.dist-info/entry_points.txt +0 -3
  220. {pixeltable-0.3.14.dist-info → pixeltable-0.5.7.dist-info/licenses}/LICENSE +0 -0
@@ -1,14 +1,14 @@
1
1
  """
2
- Pixeltable [UDFs](https://pixeltable.readme.io/docs/user-defined-functions-udfs)
2
+ Pixeltable UDFs
3
3
  that wrap various endpoints from the Anthropic API. In order to use them, you must
4
4
  first `pip install anthropic` and configure your Anthropic credentials, as described in
5
- the [Working with Anthropic](https://pixeltable.readme.io/docs/working-with-anthropic) tutorial.
5
+ the [Working with Anthropic](https://docs.pixeltable.com/notebooks/integrations/working-with-anthropic) tutorial.
6
6
  """
7
7
 
8
8
  import datetime
9
9
  import json
10
10
  import logging
11
- from typing import TYPE_CHECKING, Any, Iterable, Optional, TypeVar, Union, cast
11
+ from typing import TYPE_CHECKING, Any, Iterable, cast
12
12
 
13
13
  import httpx
14
14
 
@@ -16,6 +16,7 @@ import pixeltable as pxt
16
16
  from pixeltable import env, exprs
17
17
  from pixeltable.func import Tools
18
18
  from pixeltable.utils.code import local_public_names
19
+ from pixeltable.utils.http import exponential_backoff
19
20
 
20
21
  if TYPE_CHECKING:
21
22
  import anthropic
@@ -38,6 +39,64 @@ def _anthropic_client() -> 'anthropic.AsyncAnthropic':
38
39
  return env.Env.get().get_client('anthropic')
39
40
 
40
41
 
42
+ def _get_header_info(
43
+ headers: httpx.Headers,
44
+ ) -> tuple[
45
+ tuple[int, int, datetime.datetime] | None,
46
+ tuple[int, int, datetime.datetime] | None,
47
+ tuple[int, int, datetime.datetime] | None,
48
+ ]:
49
+ """Extract rate limit info from Anthropic API response headers."""
50
+ requests_limit_str = headers.get('anthropic-ratelimit-requests-limit')
51
+ requests_limit = int(requests_limit_str) if requests_limit_str is not None else None
52
+ requests_remaining_str = headers.get('anthropic-ratelimit-requests-remaining')
53
+ requests_remaining = int(requests_remaining_str) if requests_remaining_str is not None else None
54
+ requests_reset_str = headers.get('anthropic-ratelimit-requests-reset')
55
+ requests_reset = (
56
+ datetime.datetime.fromisoformat(requests_reset_str.replace('Z', '+00:00')) if requests_reset_str else None
57
+ )
58
+ requests_info = (
59
+ (requests_limit, requests_remaining, requests_reset) if requests_reset and requests_remaining else None
60
+ )
61
+
62
+ input_tokens_limit_str = headers.get('anthropic-ratelimit-input-tokens-limit')
63
+ input_tokens_limit = int(input_tokens_limit_str) if input_tokens_limit_str is not None else None
64
+ input_tokens_remaining_str = headers.get('anthropic-ratelimit-input-tokens-remaining')
65
+ input_tokens_remaining = int(input_tokens_remaining_str) if input_tokens_remaining_str is not None else None
66
+ input_tokens_reset_str = headers.get('anthropic-ratelimit-input-tokens-reset')
67
+ input_tokens_reset = (
68
+ datetime.datetime.fromisoformat(input_tokens_reset_str.replace('Z', '+00:00'))
69
+ if input_tokens_reset_str
70
+ else None
71
+ )
72
+ input_tokens_info = (
73
+ (input_tokens_limit, input_tokens_remaining, input_tokens_reset)
74
+ if input_tokens_reset and input_tokens_remaining
75
+ else None
76
+ )
77
+
78
+ output_tokens_limit_str = headers.get('anthropic-ratelimit-output-tokens-limit')
79
+ output_tokens_limit = int(output_tokens_limit_str) if output_tokens_limit_str is not None else None
80
+ output_tokens_remaining_str = headers.get('anthropic-ratelimit-output-tokens-remaining')
81
+ output_tokens_remaining = int(output_tokens_remaining_str) if output_tokens_remaining_str is not None else None
82
+ output_tokens_reset_str = headers.get('anthropic-ratelimit-output-tokens-reset')
83
+ output_tokens_reset = (
84
+ datetime.datetime.fromisoformat(output_tokens_reset_str.replace('Z', '+00:00'))
85
+ if output_tokens_reset_str
86
+ else None
87
+ )
88
+ output_tokens_info = (
89
+ (output_tokens_limit, output_tokens_remaining, output_tokens_reset)
90
+ if output_tokens_reset and output_tokens_remaining
91
+ else None
92
+ )
93
+
94
+ if requests_info is None or input_tokens_info is None or output_tokens_info is None:
95
+ _logger.debug(f'get_header_info(): incomplete rate limit info: {headers}')
96
+
97
+ return requests_info, input_tokens_info, output_tokens_info
98
+
99
+
41
100
  class AnthropicRateLimitsInfo(env.RateLimitsInfo):
42
101
  def __init__(self) -> None:
43
102
  super().__init__(self._get_request_resources)
@@ -51,12 +110,38 @@ class AnthropicRateLimitsInfo(env.RateLimitsInfo):
51
110
  input_len += len(message['content'])
52
111
  return {'requests': 1, 'input_tokens': int(input_len / 4), 'output_tokens': max_tokens}
53
112
 
54
- def get_retry_delay(self, exc: Exception) -> Optional[float]:
113
+ def record_exc(self, request_ts: datetime.datetime, exc: Exception) -> None:
114
+ import anthropic
115
+
116
+ if (
117
+ not isinstance(exc, anthropic.APIError)
118
+ or not hasattr(exc, 'response')
119
+ or not hasattr(exc.response, 'headers')
120
+ ):
121
+ return
122
+ requests_info, input_tokens_info, output_tokens_info = _get_header_info(exc.response.headers)
123
+ _logger.debug(
124
+ f'record_exc(): request_ts: {request_ts}, requests_info={requests_info} '
125
+ f'input_tokens_info={input_tokens_info} output_tokens_info={output_tokens_info}'
126
+ )
127
+ self.record(
128
+ request_ts=request_ts,
129
+ requests=requests_info,
130
+ input_tokens=input_tokens_info,
131
+ output_tokens=output_tokens_info,
132
+ )
133
+ self.has_exc = True
134
+
135
+ retry_after_str = exc.response.headers.get('retry-after')
136
+ if retry_after_str is not None:
137
+ _logger.debug(f'retry-after: {retry_after_str}')
138
+
139
+ def get_retry_delay(self, exc: Exception, attempt: int) -> float | None:
55
140
  import anthropic
56
141
 
57
142
  # deal with timeouts separately, they don't come with headers
58
143
  if isinstance(exc, anthropic.APITimeoutError):
59
- return 1.0
144
+ return exponential_backoff(attempt)
60
145
 
61
146
  if not isinstance(exc, anthropic.APIStatusError):
62
147
  return None
@@ -64,8 +149,7 @@ class AnthropicRateLimitsInfo(env.RateLimitsInfo):
64
149
  should_retry_str = exc.response.headers.get('x-should-retry', '')
65
150
  if should_retry_str.lower() != 'true':
66
151
  return None
67
- retry_after_str = exc.response.headers.get('retry-after', '1')
68
- return int(retry_after_str)
152
+ return super().get_retry_delay(exc, attempt)
69
153
 
70
154
 
71
155
  @pxt.udf
@@ -73,16 +157,11 @@ async def messages(
73
157
  messages: list[dict[str, str]],
74
158
  *,
75
159
  model: str,
76
- max_tokens: int = 1024,
77
- metadata: Optional[dict[str, Any]] = None,
78
- stop_sequences: Optional[list[str]] = None,
79
- system: Optional[str] = None,
80
- temperature: Optional[float] = None,
81
- tool_choice: Optional[dict] = None,
82
- tools: Optional[list[dict]] = None,
83
- top_k: Optional[int] = None,
84
- top_p: Optional[float] = None,
85
- timeout: Optional[float] = None,
160
+ max_tokens: int,
161
+ model_kwargs: dict[str, Any] | None = None,
162
+ tools: list[dict[str, Any]] | None = None,
163
+ tool_choice: dict[str, Any] | None = None,
164
+ _runtime_ctx: env.RuntimeCtx | None = None,
86
165
  ) -> dict:
87
166
  """
88
167
  Create a Message.
@@ -101,25 +180,27 @@ async def messages(
101
180
  Args:
102
181
  messages: Input messages.
103
182
  model: The model that will complete your prompt.
104
-
105
- For details on the other parameters, see: <https://docs.anthropic.com/en/api/messages>
183
+ model_kwargs: Additional keyword args for the Anthropic `messages` API.
184
+ For details on the available parameters, see: <https://docs.anthropic.com/en/api/messages>
185
+ tools: An optional list of Pixeltable tools to use for the request.
186
+ tool_choice: An optional tool choice configuration.
106
187
 
107
188
  Returns:
108
189
  A dictionary containing the response and other metadata.
109
190
 
110
191
  Examples:
111
- Add a computed column that applies the model `claude-3-haiku-20240307`
192
+ Add a computed column that applies the model `claude-3-5-sonnet-20241022`
112
193
  to an existing Pixeltable column `tbl.prompt` of the table `tbl`:
113
194
 
114
195
  >>> msgs = [{'role': 'user', 'content': tbl.prompt}]
115
- ... tbl.add_computed_column(response=messages(msgs, model='claude-3-haiku-20240307'))
196
+ ... tbl.add_computed_column(response=messages(msgs, model='claude-3-5-sonnet-20241022'))
116
197
  """
117
-
118
- # it doesn't look like count_tokens() actually exists in the current version of the library
198
+ if model_kwargs is None:
199
+ model_kwargs = {}
119
200
 
120
201
  if tools is not None:
121
202
  # Reformat `tools` into Anthropic format
122
- tools = [
203
+ model_kwargs['tools'] = [
123
204
  {
124
205
  'name': tool['name'],
125
206
  'description': tool['description'],
@@ -132,17 +213,16 @@ async def messages(
132
213
  for tool in tools
133
214
  ]
134
215
 
135
- tool_choice_: Optional[dict] = None
136
216
  if tool_choice is not None:
137
217
  if tool_choice['auto']:
138
- tool_choice_ = {'type': 'auto'}
218
+ model_kwargs['tool_choice'] = {'type': 'auto'}
139
219
  elif tool_choice['required']:
140
- tool_choice_ = {'type': 'any'}
220
+ model_kwargs['tool_choice'] = {'type': 'any'}
141
221
  else:
142
222
  assert tool_choice['tool'] is not None
143
- tool_choice_ = {'type': 'tool', 'name': tool_choice['tool']}
223
+ model_kwargs['tool_choice'] = {'type': 'tool', 'name': tool_choice['tool']}
144
224
  if not tool_choice['parallel_tool_calls']:
145
- tool_choice_['disable_parallel_tool_use'] = True
225
+ model_kwargs['tool_choice']['disable_parallel_tool_use'] = True
146
226
 
147
227
  # make sure the pool info exists prior to making the request
148
228
  resource_pool_id = f'rate-limits:anthropic:{model}'
@@ -152,48 +232,23 @@ async def messages(
152
232
  # TODO: timeouts should be set system-wide and be user-configurable
153
233
  from anthropic.types import MessageParam
154
234
 
155
- # cast(Any, ...): avoid mypy errors
235
+ start_ts = datetime.datetime.now(tz=datetime.timezone.utc)
236
+
156
237
  result = await _anthropic_client().messages.with_raw_response.create(
157
- messages=cast(Iterable[MessageParam], messages),
158
- model=model,
159
- max_tokens=max_tokens,
160
- metadata=_opt(cast(Any, metadata)),
161
- stop_sequences=_opt(stop_sequences),
162
- system=_opt(system),
163
- temperature=_opt(cast(Any, temperature)),
164
- tools=_opt(cast(Any, tools)),
165
- tool_choice=_opt(cast(Any, tool_choice_)),
166
- top_k=_opt(top_k),
167
- top_p=_opt(top_p),
168
- timeout=_opt(timeout),
238
+ messages=cast(Iterable[MessageParam], messages), model=model, max_tokens=max_tokens, **model_kwargs
169
239
  )
170
240
 
171
- requests_limit_str = result.headers.get('anthropic-ratelimit-requests-limit')
172
- requests_limit = int(requests_limit_str) if requests_limit_str is not None else None
173
- requests_remaining_str = result.headers.get('anthropic-ratelimit-requests-remaining')
174
- requests_remaining = int(requests_remaining_str) if requests_remaining_str is not None else None
175
- requests_reset_str = result.headers.get('anthropic-ratelimit-requests-reset')
176
- requests_reset = datetime.datetime.fromisoformat(requests_reset_str.replace('Z', '+00:00'))
177
- input_tokens_limit_str = result.headers.get('anthropic-ratelimit-input-tokens-limit')
178
- input_tokens_limit = int(input_tokens_limit_str) if input_tokens_limit_str is not None else None
179
- input_tokens_remaining_str = result.headers.get('anthropic-ratelimit-input-tokens-remaining')
180
- input_tokens_remaining = int(input_tokens_remaining_str) if input_tokens_remaining_str is not None else None
181
- input_tokens_reset_str = result.headers.get('anthropic-ratelimit-input-tokens-reset')
182
- input_tokens_reset = datetime.datetime.fromisoformat(input_tokens_reset_str.replace('Z', '+00:00'))
183
- output_tokens_limit_str = result.headers.get('anthropic-ratelimit-output-tokens-limit')
184
- output_tokens_limit = int(output_tokens_limit_str) if output_tokens_limit_str is not None else None
185
- output_tokens_remaining_str = result.headers.get('anthropic-ratelimit-output-tokens-remaining')
186
- output_tokens_remaining = int(output_tokens_remaining_str) if output_tokens_remaining_str is not None else None
187
- output_tokens_reset_str = result.headers.get('anthropic-ratelimit-output-tokens-reset')
188
- output_tokens_reset = datetime.datetime.fromisoformat(output_tokens_reset_str.replace('Z', '+00:00'))
189
- retry_after_str = result.headers.get('retry-after')
190
- if retry_after_str is not None:
191
- _logger.debug(f'retry-after: {retry_after_str}')
192
-
241
+ requests_info, input_tokens_info, output_tokens_info = _get_header_info(result.headers)
242
+ # retry_after_str = result.headers.get('retry-after')
243
+ # if retry_after_str is not None:
244
+ # _logger.debug(f'retry-after: {retry_after_str}')
245
+ is_retry = _runtime_ctx is not None and _runtime_ctx.is_retry
193
246
  rate_limits_info.record(
194
- requests=(requests_limit, requests_remaining, requests_reset),
195
- input_tokens=(input_tokens_limit, input_tokens_remaining, input_tokens_reset),
196
- output_tokens=(output_tokens_limit, output_tokens_remaining, output_tokens_reset),
247
+ request_ts=start_ts,
248
+ requests=requests_info,
249
+ input_tokens=input_tokens_info,
250
+ output_tokens=output_tokens_info,
251
+ reset_exc=is_retry,
197
252
  )
198
253
 
199
254
  result_dict = json.loads(result.text)
@@ -211,7 +266,7 @@ def invoke_tools(tools: Tools, response: exprs.Expr) -> exprs.InlineDict:
211
266
 
212
267
 
213
268
  @pxt.udf
214
- def _anthropic_response_to_pxt_tool_calls(response: dict) -> Optional[dict]:
269
+ def _anthropic_response_to_pxt_tool_calls(response: dict) -> dict | None:
215
270
  anthropic_tool_calls = [r for r in response['content'] if r['type'] == 'tool_use']
216
271
  if len(anthropic_tool_calls) == 0:
217
272
  return None
@@ -224,15 +279,6 @@ def _anthropic_response_to_pxt_tool_calls(response: dict) -> Optional[dict]:
224
279
  return pxt_tool_calls
225
280
 
226
281
 
227
- _T = TypeVar('_T')
228
-
229
-
230
- def _opt(arg: _T) -> Union[_T, 'anthropic.NotGiven']:
231
- import anthropic
232
-
233
- return arg if arg is not None else anthropic.NOT_GIVEN
234
-
235
-
236
282
  __all__ = local_public_names(__name__)
237
283
 
238
284
 
@@ -1,26 +1,163 @@
1
1
  """
2
- Pixeltable [UDFs](https://pixeltable.readme.io/docs/user-defined-functions-udfs) for `AudioType`.
2
+ Pixeltable UDFs for `AudioType`.
3
+ """
3
4
 
4
- Example:
5
- ```python
6
- import pixeltable as pxt
7
- import pixeltable.functions as pxtf
5
+ from typing import Any
8
6
 
9
- t = pxt.get_table(...)
10
- t.select(pxtf.audio.get_metadata()).collect()
11
- ```
12
- """
7
+ import av
8
+ import numpy as np
13
9
 
14
10
  import pixeltable as pxt
11
+ import pixeltable.utils.av as av_utils
15
12
  from pixeltable.utils.code import local_public_names
13
+ from pixeltable.utils.local_store import TempStore
16
14
 
17
15
 
18
16
  @pxt.udf(is_method=True)
19
17
  def get_metadata(audio: pxt.Audio) -> dict:
20
18
  """
21
19
  Gets various metadata associated with an audio file and returns it as a dictionary.
20
+
21
+ Args:
22
+ audio: The audio to get metadata for.
23
+
24
+ Returns:
25
+ A `dict` such as the following:
26
+
27
+ ```json
28
+ {
29
+ 'size': 2568827,
30
+ 'streams': [
31
+ {
32
+ 'type': 'audio',
33
+ 'frames': 0,
34
+ 'duration': 2646000,
35
+ 'metadata': {},
36
+ 'time_base': 2.2675736961451248e-05,
37
+ 'codec_context': {
38
+ 'name': 'flac',
39
+ 'profile': None,
40
+ 'channels': 1,
41
+ 'codec_tag': '\\x00\\x00\\x00\\x00',
42
+ },
43
+ 'duration_seconds': 60.0,
44
+ }
45
+ ],
46
+ 'bit_rate': 342510,
47
+ 'metadata': {'encoder': 'Lavf61.1.100'},
48
+ 'bit_exact': False,
49
+ }
50
+ ```
51
+
52
+ Examples:
53
+ Extract metadata for files in the `audio_col` column of the table `tbl`:
54
+
55
+ >>> tbl.select(tbl.audio_col.get_metadata()).collect()
56
+ """
57
+ return av_utils.get_metadata(audio)
58
+
59
+
60
+ @pxt.udf()
61
+ def encode_audio(
62
+ audio_data: pxt.Array[pxt.Float], *, input_sample_rate: int, format: str, output_sample_rate: int | None = None
63
+ ) -> pxt.Audio:
64
+ """
65
+ Encodes an audio clip represented as an array into a specified audio format.
66
+
67
+ Parameters:
68
+ audio_data: An array of sampled amplitudes. The accepted array shapes are `(N,)` or `(1, N)` for mono audio
69
+ or `(2, N)` for stereo.
70
+ input_sample_rate: The sample rate of the input audio data.
71
+ format: The desired output audio format. The supported formats are 'wav', 'mp3', 'flac', and 'mp4'.
72
+ output_sample_rate: The desired sample rate for the output audio. Defaults to the input sample rate if
73
+ unspecified.
74
+
75
+ Examples:
76
+ Add a computed column with encoded FLAC audio files to a table with audio data (as arrays of floats) and sample
77
+ rates:
78
+
79
+ >>> t.add_computed_column(
80
+ ... audio_file=encode_audio(
81
+ ... t.audio_data, input_sample_rate=t.sample_rate, format='flac'
82
+ ... )
83
+ ... )
84
+ """
85
+ if format not in av_utils.AUDIO_FORMATS:
86
+ raise pxt.Error(f'Only the following formats are supported: {av_utils.AUDIO_FORMATS.keys()}')
87
+ if output_sample_rate is None:
88
+ output_sample_rate = input_sample_rate
89
+
90
+ codec, ext = av_utils.AUDIO_FORMATS[format]
91
+ output_path = str(TempStore.create_path(extension=f'.{ext}'))
92
+
93
+ match audio_data.shape:
94
+ case (_,):
95
+ # Mono audio as 1D array, reshape for pyav
96
+ layout = 'mono'
97
+ audio_data_transformed = audio_data[None, :]
98
+ case (1, _):
99
+ # Mono audio as 2D array, simply reshape and transpose the input for pyav
100
+ layout = 'mono'
101
+ audio_data_transformed = audio_data.reshape(-1, 1).transpose()
102
+ case (2, _):
103
+ # Stereo audio. Input layout: [[L0, L1, L2, ...],[R0, R1, R2, ...]],
104
+ # pyav expects: [L0, R0, L1, R1, L2, R2, ...]
105
+ layout = 'stereo'
106
+ audio_data_transformed = np.empty(audio_data.shape[1] * 2, dtype=audio_data.dtype)
107
+ audio_data_transformed[0::2] = audio_data[0]
108
+ audio_data_transformed[1::2] = audio_data[1]
109
+ audio_data_transformed = audio_data_transformed.reshape(1, -1)
110
+ case _:
111
+ raise pxt.Error(
112
+ f'Supported input array shapes are (N,), (1, N) for mono and (2, N) for stereo, got {audio_data.shape}'
113
+ )
114
+
115
+ with av.open(output_path, mode='w') as output_container:
116
+ stream = output_container.add_stream(codec, rate=output_sample_rate)
117
+ assert isinstance(stream, av.AudioStream)
118
+
119
+ frame = av.AudioFrame.from_ndarray(audio_data_transformed, format='flt', layout=layout)
120
+ frame.sample_rate = input_sample_rate
121
+
122
+ for packet in stream.encode(frame):
123
+ output_container.mux(packet)
124
+ for packet in stream.encode():
125
+ output_container.mux(packet)
126
+
127
+ return output_path
128
+
129
+
130
+ def audio_splitter(
131
+ audio: Any, chunk_duration_sec: float, *, overlap_sec: float = 0.0, min_chunk_duration_sec: float = 0.0
132
+ ) -> tuple[type[pxt.iterators.ComponentIterator], dict[str, Any]]:
133
+ """
134
+ Iterator over chunks of an audio file. The audio file is split into smaller chunks,
135
+ where the duration of each chunk is determined by chunk_duration_sec.
136
+ The iterator yields audio chunks as pxt.Audio, along with the start and end time of each chunk.
137
+ If the input contains no audio, no chunks are yielded.
138
+
139
+ Args:
140
+ chunk_duration_sec: Audio chunk duration in seconds
141
+ overlap_sec: Overlap between consecutive chunks in seconds
142
+ min_chunk_duration_sec: Drop the last chunk if it is smaller than min_chunk_duration_sec
143
+
144
+ Examples:
145
+ This example assumes an existing table `tbl` with a column `audio` of type `pxt.Audio`.
146
+
147
+ Create a view that splits all audio files into chunks of 30 seconds with 5 seconds overlap:
148
+
149
+ >>> pxt.create_view(
150
+ ... 'audio_chunks',
151
+ ... tbl,
152
+ ... iterator=audio_splitter(tbl.audio, chunk_duration_sec=30.0, overlap_sec=5.0)
153
+ ... )
22
154
  """
23
- return pxt.functions.video._get_metadata(audio)
155
+ kwargs: dict[str, Any] = {}
156
+ if overlap_sec != 0.0:
157
+ kwargs['overlap_sec'] = overlap_sec
158
+ if min_chunk_duration_sec != 0.0:
159
+ kwargs['min_chunk_duration_sec'] = min_chunk_duration_sec
160
+ return pxt.iterators.AudioSplitter._create(audio=audio, chunk_duration_sec=chunk_duration_sec, **kwargs)
24
161
 
25
162
 
26
163
  __all__ = local_public_names(__name__)
@@ -1,5 +1,12 @@
1
+ """
2
+ Pixeltable UDFs for AWS Bedrock AI models.
3
+
4
+ Provides integration with AWS Bedrock for accessing various foundation models
5
+ including Anthropic Claude, Amazon Titan, and other providers.
6
+ """
7
+
1
8
  import logging
2
- from typing import TYPE_CHECKING, Any, Optional
9
+ from typing import TYPE_CHECKING, Any
3
10
 
4
11
  import pixeltable as pxt
5
12
  from pixeltable import env, exprs
@@ -29,10 +36,10 @@ def converse(
29
36
  messages: list[dict[str, Any]],
30
37
  *,
31
38
  model_id: str,
32
- system: Optional[list[dict[str, Any]]] = None,
33
- inference_config: Optional[dict] = None,
34
- additional_model_request_fields: Optional[dict] = None,
35
- tool_config: Optional[list[dict]] = None,
39
+ system: list[dict[str, Any]] | None = None,
40
+ inference_config: dict | None = None,
41
+ additional_model_request_fields: dict | None = None,
42
+ tool_config: list[dict] | None = None,
36
43
  ) -> dict:
37
44
  """
38
45
  Generate a conversation response.
@@ -104,7 +111,7 @@ def invoke_tools(tools: Tools, response: exprs.Expr) -> exprs.InlineDict:
104
111
 
105
112
 
106
113
  @pxt.udf
107
- def _bedrock_response_to_pxt_tool_calls(response: dict) -> Optional[dict]:
114
+ def _bedrock_response_to_pxt_tool_calls(response: dict) -> dict | None:
108
115
  if response.get('stopReason') != 'tool_use':
109
116
  return None
110
117
 
@@ -1,5 +1,5 @@
1
1
  """
2
- Pixeltable [UDFs](https://pixeltable.readme.io/docs/user-defined-functions-udfs) for `DateType`.
2
+ Pixeltable UDFs for `DateType`.
3
3
 
4
4
  Usage example:
5
5
  ```python
@@ -28,8 +28,11 @@ _SQL_ZERO = sql.literal(0)
28
28
  @pxt.udf(is_property=True)
29
29
  def year(self: date) -> int:
30
30
  """
31
- Between [`MINYEAR`](https://docs.python.org/3/library/datetime.html#datetime.MINYEAR) and
32
- [`MAXYEAR`](https://docs.python.org/3/library/datetime.html#datetime.MAXYEAR) inclusive.
31
+ Between 1 and 9999 inclusive.
32
+
33
+ (Between [`MINYEAR`](https://docs.python.org/3/library/datetime.html#datetime.MINYEAR) and
34
+ [`MAXYEAR`](https://docs.python.org/3/library/datetime.html#datetime.MAXYEAR) as defined by the Python `datetime`
35
+ library).
33
36
 
34
37
  Equivalent to [`date.year`](https://docs.python.org/3/library/datetime.html#datetime.date.year).
35
38
  """
@@ -83,7 +86,7 @@ def make_date(year: int, month: int, day: int) -> date:
83
86
 
84
87
  @make_date.to_sql
85
88
  def _(year: sql.ColumnElement, month: sql.ColumnElement, day: sql.ColumnElement) -> sql.ColumnElement:
86
- return sql.func.make_date(sql.cast(year, sql.Integer), sql.cast(month, sql.Integer), sql.cast(day, sql.Integer))
89
+ return sql.func.make_date(year.cast(sql.Integer), month.cast(sql.Integer), day.cast(sql.Integer))
87
90
 
88
91
 
89
92
  @pxt.udf(is_method=True)
@@ -1,5 +1,12 @@
1
+ """
2
+ Pixeltable UDFs for Deepseek AI models.
3
+
4
+ Provides integration with Deepseek's language models for chat completions
5
+ and other AI capabilities.
6
+ """
7
+
1
8
  import json
2
- from typing import TYPE_CHECKING, Any, Optional, Union, cast
9
+ from typing import TYPE_CHECKING, Any
3
10
 
4
11
  import httpx
5
12
 
@@ -7,8 +14,6 @@ import pixeltable as pxt
7
14
  from pixeltable import env
8
15
  from pixeltable.utils.code import local_public_names
9
16
 
10
- from .openai import _opt
11
-
12
17
  if TYPE_CHECKING:
13
18
  import openai
14
19
 
@@ -28,22 +33,14 @@ def _deepseek_client() -> 'openai.AsyncOpenAI':
28
33
  return env.Env.get().get_client('deepseek')
29
34
 
30
35
 
31
- @pxt.udf
36
+ @pxt.udf(resource_pool='request-rate:deepseek')
32
37
  async def chat_completions(
33
38
  messages: list,
34
39
  *,
35
40
  model: str,
36
- frequency_penalty: Optional[float] = None,
37
- logprobs: Optional[bool] = None,
38
- top_logprobs: Optional[int] = None,
39
- max_tokens: Optional[int] = None,
40
- presence_penalty: Optional[float] = None,
41
- response_format: Optional[dict] = None,
42
- stop: Optional[list[str]] = None,
43
- temperature: Optional[float] = None,
44
- tools: Optional[list[dict]] = None,
45
- tool_choice: Optional[dict] = None,
46
- top_p: Optional[float] = None,
41
+ model_kwargs: dict[str, Any] | None = None,
42
+ tools: list[dict[str, Any]] | None = None,
43
+ tool_choice: dict[str, Any] | None = None,
47
44
  ) -> dict:
48
45
  """
49
46
  Creates a model response for the given chat conversation.
@@ -53,6 +50,10 @@ async def chat_completions(
53
50
 
54
51
  Deepseek uses the OpenAI SDK, so you will need to install the `openai` package to use this UDF.
55
52
 
53
+ Request throttling:
54
+ Applies the rate limit set in the config (section `deepseek`, key `rate_limit`). If no rate
55
+ limit is configured, uses a default of 600 RPM.
56
+
56
57
  __Requirements:__
57
58
 
58
59
  - `pip install openai`
@@ -60,8 +61,10 @@ async def chat_completions(
60
61
  Args:
61
62
  messages: A list of messages to use for chat completion, as described in the Deepseek API documentation.
62
63
  model: The model to use for chat completion.
63
-
64
- For details on the other parameters, see: <https://api-docs.deepseek.com/api/create-chat-completion>
64
+ model_kwargs: Additional keyword args for the Deepseek `chat/completions` API.
65
+ For details on the available parameters, see: <https://api-docs.deepseek.com/api/create-chat-completion>
66
+ tools: An optional list of Pixeltable tools to use for the request.
67
+ tool_choice: An optional tool choice configuration.
65
68
 
66
69
  Returns:
67
70
  A dictionary containing the response and other metadata.
@@ -71,44 +74,33 @@ async def chat_completions(
71
74
  of the table `tbl`:
72
75
 
73
76
  >>> messages = [
74
- {'role': 'system', 'content': 'You are a helpful assistant.'},
75
- {'role': 'user', 'content': tbl.prompt}
76
- ]
77
- tbl.add_computed_column(response=chat_completions(messages, model='deepseek-chat'))
77
+ ... {'role': 'system', 'content': 'You are a helpful assistant.'},
78
+ ... {'role': 'user', 'content': tbl.prompt}
79
+ ... ]
80
+ >>> tbl.add_computed_column(response=chat_completions(messages, model='deepseek-chat'))
78
81
  """
82
+ if model_kwargs is None:
83
+ model_kwargs = {}
84
+
79
85
  if tools is not None:
80
- tools = [{'type': 'function', 'function': tool} for tool in tools]
86
+ model_kwargs['tools'] = [{'type': 'function', 'function': tool} for tool in tools]
81
87
 
82
- tool_choice_: Union[str, dict, None] = None
83
88
  if tool_choice is not None:
84
89
  if tool_choice['auto']:
85
- tool_choice_ = 'auto'
90
+ model_kwargs['tool_choice'] = 'auto'
86
91
  elif tool_choice['required']:
87
- tool_choice_ = 'required'
92
+ model_kwargs['tool_choice'] = 'required'
88
93
  else:
89
94
  assert tool_choice['tool'] is not None
90
- tool_choice_ = {'type': 'function', 'function': {'name': tool_choice['tool']}}
95
+ model_kwargs['tool_choice'] = {'type': 'function', 'function': {'name': tool_choice['tool']}}
91
96
 
92
- extra_body: Optional[dict[str, Any]] = None
93
97
  if tool_choice is not None and not tool_choice['parallel_tool_calls']:
94
- extra_body = {'parallel_tool_calls': False}
98
+ if 'extra_body' not in model_kwargs:
99
+ model_kwargs['extra_body'] = {}
100
+ model_kwargs['extra_body']['parallel_tool_calls'] = False
95
101
 
96
- # cast(Any, ...): avoid mypy errors
97
102
  result = await _deepseek_client().chat.completions.with_raw_response.create(
98
- messages=messages,
99
- model=model,
100
- frequency_penalty=_opt(frequency_penalty),
101
- logprobs=_opt(logprobs),
102
- top_logprobs=_opt(top_logprobs),
103
- max_tokens=_opt(max_tokens),
104
- presence_penalty=_opt(presence_penalty),
105
- response_format=_opt(cast(Any, response_format)),
106
- stop=_opt(stop),
107
- temperature=_opt(temperature),
108
- tools=_opt(cast(Any, tools)),
109
- tool_choice=_opt(cast(Any, tool_choice_)),
110
- top_p=_opt(top_p),
111
- extra_body=extra_body,
103
+ messages=messages, model=model, **model_kwargs
112
104
  )
113
105
 
114
106
  return json.loads(result.text)