pixeltable 0.2.26__py3-none-any.whl → 0.5.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (245) hide show
  1. pixeltable/__init__.py +83 -19
  2. pixeltable/_query.py +1444 -0
  3. pixeltable/_version.py +1 -0
  4. pixeltable/catalog/__init__.py +7 -4
  5. pixeltable/catalog/catalog.py +2394 -119
  6. pixeltable/catalog/column.py +225 -104
  7. pixeltable/catalog/dir.py +38 -9
  8. pixeltable/catalog/globals.py +53 -34
  9. pixeltable/catalog/insertable_table.py +265 -115
  10. pixeltable/catalog/path.py +80 -17
  11. pixeltable/catalog/schema_object.py +28 -43
  12. pixeltable/catalog/table.py +1270 -677
  13. pixeltable/catalog/table_metadata.py +103 -0
  14. pixeltable/catalog/table_version.py +1270 -751
  15. pixeltable/catalog/table_version_handle.py +109 -0
  16. pixeltable/catalog/table_version_path.py +137 -42
  17. pixeltable/catalog/tbl_ops.py +53 -0
  18. pixeltable/catalog/update_status.py +191 -0
  19. pixeltable/catalog/view.py +251 -134
  20. pixeltable/config.py +215 -0
  21. pixeltable/env.py +736 -285
  22. pixeltable/exceptions.py +26 -2
  23. pixeltable/exec/__init__.py +7 -2
  24. pixeltable/exec/aggregation_node.py +39 -21
  25. pixeltable/exec/cache_prefetch_node.py +87 -109
  26. pixeltable/exec/cell_materialization_node.py +268 -0
  27. pixeltable/exec/cell_reconstruction_node.py +168 -0
  28. pixeltable/exec/component_iteration_node.py +25 -28
  29. pixeltable/exec/data_row_batch.py +11 -46
  30. pixeltable/exec/exec_context.py +26 -11
  31. pixeltable/exec/exec_node.py +35 -27
  32. pixeltable/exec/expr_eval/__init__.py +3 -0
  33. pixeltable/exec/expr_eval/evaluators.py +365 -0
  34. pixeltable/exec/expr_eval/expr_eval_node.py +413 -0
  35. pixeltable/exec/expr_eval/globals.py +200 -0
  36. pixeltable/exec/expr_eval/row_buffer.py +74 -0
  37. pixeltable/exec/expr_eval/schedulers.py +413 -0
  38. pixeltable/exec/globals.py +35 -0
  39. pixeltable/exec/in_memory_data_node.py +35 -27
  40. pixeltable/exec/object_store_save_node.py +293 -0
  41. pixeltable/exec/row_update_node.py +44 -29
  42. pixeltable/exec/sql_node.py +414 -115
  43. pixeltable/exprs/__init__.py +8 -5
  44. pixeltable/exprs/arithmetic_expr.py +79 -45
  45. pixeltable/exprs/array_slice.py +5 -5
  46. pixeltable/exprs/column_property_ref.py +40 -26
  47. pixeltable/exprs/column_ref.py +254 -61
  48. pixeltable/exprs/comparison.py +14 -9
  49. pixeltable/exprs/compound_predicate.py +9 -10
  50. pixeltable/exprs/data_row.py +213 -72
  51. pixeltable/exprs/expr.py +270 -104
  52. pixeltable/exprs/expr_dict.py +6 -5
  53. pixeltable/exprs/expr_set.py +20 -11
  54. pixeltable/exprs/function_call.py +383 -284
  55. pixeltable/exprs/globals.py +18 -5
  56. pixeltable/exprs/in_predicate.py +7 -7
  57. pixeltable/exprs/inline_expr.py +37 -37
  58. pixeltable/exprs/is_null.py +8 -4
  59. pixeltable/exprs/json_mapper.py +120 -54
  60. pixeltable/exprs/json_path.py +90 -60
  61. pixeltable/exprs/literal.py +61 -16
  62. pixeltable/exprs/method_ref.py +7 -6
  63. pixeltable/exprs/object_ref.py +19 -8
  64. pixeltable/exprs/row_builder.py +238 -75
  65. pixeltable/exprs/rowid_ref.py +53 -15
  66. pixeltable/exprs/similarity_expr.py +65 -50
  67. pixeltable/exprs/sql_element_cache.py +5 -5
  68. pixeltable/exprs/string_op.py +107 -0
  69. pixeltable/exprs/type_cast.py +25 -13
  70. pixeltable/exprs/variable.py +2 -2
  71. pixeltable/func/__init__.py +9 -5
  72. pixeltable/func/aggregate_function.py +197 -92
  73. pixeltable/func/callable_function.py +119 -35
  74. pixeltable/func/expr_template_function.py +101 -48
  75. pixeltable/func/function.py +375 -62
  76. pixeltable/func/function_registry.py +20 -19
  77. pixeltable/func/globals.py +6 -5
  78. pixeltable/func/mcp.py +74 -0
  79. pixeltable/func/query_template_function.py +151 -35
  80. pixeltable/func/signature.py +178 -49
  81. pixeltable/func/tools.py +164 -0
  82. pixeltable/func/udf.py +176 -53
  83. pixeltable/functions/__init__.py +44 -4
  84. pixeltable/functions/anthropic.py +226 -47
  85. pixeltable/functions/audio.py +148 -11
  86. pixeltable/functions/bedrock.py +137 -0
  87. pixeltable/functions/date.py +188 -0
  88. pixeltable/functions/deepseek.py +113 -0
  89. pixeltable/functions/document.py +81 -0
  90. pixeltable/functions/fal.py +76 -0
  91. pixeltable/functions/fireworks.py +72 -20
  92. pixeltable/functions/gemini.py +249 -0
  93. pixeltable/functions/globals.py +208 -53
  94. pixeltable/functions/groq.py +108 -0
  95. pixeltable/functions/huggingface.py +1088 -95
  96. pixeltable/functions/image.py +155 -84
  97. pixeltable/functions/json.py +8 -11
  98. pixeltable/functions/llama_cpp.py +31 -19
  99. pixeltable/functions/math.py +169 -0
  100. pixeltable/functions/mistralai.py +50 -75
  101. pixeltable/functions/net.py +70 -0
  102. pixeltable/functions/ollama.py +29 -36
  103. pixeltable/functions/openai.py +548 -160
  104. pixeltable/functions/openrouter.py +143 -0
  105. pixeltable/functions/replicate.py +15 -14
  106. pixeltable/functions/reve.py +250 -0
  107. pixeltable/functions/string.py +310 -85
  108. pixeltable/functions/timestamp.py +37 -19
  109. pixeltable/functions/together.py +77 -120
  110. pixeltable/functions/twelvelabs.py +188 -0
  111. pixeltable/functions/util.py +7 -2
  112. pixeltable/functions/uuid.py +30 -0
  113. pixeltable/functions/video.py +1528 -117
  114. pixeltable/functions/vision.py +26 -26
  115. pixeltable/functions/voyageai.py +289 -0
  116. pixeltable/functions/whisper.py +19 -10
  117. pixeltable/functions/whisperx.py +179 -0
  118. pixeltable/functions/yolox.py +112 -0
  119. pixeltable/globals.py +716 -236
  120. pixeltable/index/__init__.py +3 -1
  121. pixeltable/index/base.py +17 -21
  122. pixeltable/index/btree.py +32 -22
  123. pixeltable/index/embedding_index.py +155 -92
  124. pixeltable/io/__init__.py +12 -7
  125. pixeltable/io/datarows.py +140 -0
  126. pixeltable/io/external_store.py +83 -125
  127. pixeltable/io/fiftyone.py +24 -33
  128. pixeltable/io/globals.py +47 -182
  129. pixeltable/io/hf_datasets.py +96 -127
  130. pixeltable/io/label_studio.py +171 -156
  131. pixeltable/io/lancedb.py +3 -0
  132. pixeltable/io/pandas.py +136 -115
  133. pixeltable/io/parquet.py +40 -153
  134. pixeltable/io/table_data_conduit.py +702 -0
  135. pixeltable/io/utils.py +100 -0
  136. pixeltable/iterators/__init__.py +8 -4
  137. pixeltable/iterators/audio.py +207 -0
  138. pixeltable/iterators/base.py +9 -3
  139. pixeltable/iterators/document.py +144 -87
  140. pixeltable/iterators/image.py +17 -38
  141. pixeltable/iterators/string.py +15 -12
  142. pixeltable/iterators/video.py +523 -127
  143. pixeltable/metadata/__init__.py +33 -8
  144. pixeltable/metadata/converters/convert_10.py +2 -3
  145. pixeltable/metadata/converters/convert_13.py +2 -2
  146. pixeltable/metadata/converters/convert_15.py +15 -11
  147. pixeltable/metadata/converters/convert_16.py +4 -5
  148. pixeltable/metadata/converters/convert_17.py +4 -5
  149. pixeltable/metadata/converters/convert_18.py +4 -6
  150. pixeltable/metadata/converters/convert_19.py +6 -9
  151. pixeltable/metadata/converters/convert_20.py +3 -6
  152. pixeltable/metadata/converters/convert_21.py +6 -8
  153. pixeltable/metadata/converters/convert_22.py +3 -2
  154. pixeltable/metadata/converters/convert_23.py +33 -0
  155. pixeltable/metadata/converters/convert_24.py +55 -0
  156. pixeltable/metadata/converters/convert_25.py +19 -0
  157. pixeltable/metadata/converters/convert_26.py +23 -0
  158. pixeltable/metadata/converters/convert_27.py +29 -0
  159. pixeltable/metadata/converters/convert_28.py +13 -0
  160. pixeltable/metadata/converters/convert_29.py +110 -0
  161. pixeltable/metadata/converters/convert_30.py +63 -0
  162. pixeltable/metadata/converters/convert_31.py +11 -0
  163. pixeltable/metadata/converters/convert_32.py +15 -0
  164. pixeltable/metadata/converters/convert_33.py +17 -0
  165. pixeltable/metadata/converters/convert_34.py +21 -0
  166. pixeltable/metadata/converters/convert_35.py +9 -0
  167. pixeltable/metadata/converters/convert_36.py +38 -0
  168. pixeltable/metadata/converters/convert_37.py +15 -0
  169. pixeltable/metadata/converters/convert_38.py +39 -0
  170. pixeltable/metadata/converters/convert_39.py +124 -0
  171. pixeltable/metadata/converters/convert_40.py +73 -0
  172. pixeltable/metadata/converters/convert_41.py +12 -0
  173. pixeltable/metadata/converters/convert_42.py +9 -0
  174. pixeltable/metadata/converters/convert_43.py +44 -0
  175. pixeltable/metadata/converters/util.py +44 -18
  176. pixeltable/metadata/notes.py +21 -0
  177. pixeltable/metadata/schema.py +185 -42
  178. pixeltable/metadata/utils.py +74 -0
  179. pixeltable/mypy/__init__.py +3 -0
  180. pixeltable/mypy/mypy_plugin.py +123 -0
  181. pixeltable/plan.py +616 -225
  182. pixeltable/share/__init__.py +3 -0
  183. pixeltable/share/packager.py +797 -0
  184. pixeltable/share/protocol/__init__.py +33 -0
  185. pixeltable/share/protocol/common.py +165 -0
  186. pixeltable/share/protocol/operation_types.py +33 -0
  187. pixeltable/share/protocol/replica.py +119 -0
  188. pixeltable/share/publish.py +349 -0
  189. pixeltable/store.py +398 -232
  190. pixeltable/type_system.py +730 -267
  191. pixeltable/utils/__init__.py +40 -0
  192. pixeltable/utils/arrow.py +201 -29
  193. pixeltable/utils/av.py +298 -0
  194. pixeltable/utils/azure_store.py +346 -0
  195. pixeltable/utils/coco.py +26 -27
  196. pixeltable/utils/code.py +4 -4
  197. pixeltable/utils/console_output.py +46 -0
  198. pixeltable/utils/coroutine.py +24 -0
  199. pixeltable/utils/dbms.py +92 -0
  200. pixeltable/utils/description_helper.py +11 -12
  201. pixeltable/utils/documents.py +60 -61
  202. pixeltable/utils/exception_handler.py +36 -0
  203. pixeltable/utils/filecache.py +38 -22
  204. pixeltable/utils/formatter.py +88 -51
  205. pixeltable/utils/gcs_store.py +295 -0
  206. pixeltable/utils/http.py +133 -0
  207. pixeltable/utils/http_server.py +14 -13
  208. pixeltable/utils/iceberg.py +13 -0
  209. pixeltable/utils/image.py +17 -0
  210. pixeltable/utils/lancedb.py +90 -0
  211. pixeltable/utils/local_store.py +322 -0
  212. pixeltable/utils/misc.py +5 -0
  213. pixeltable/utils/object_stores.py +573 -0
  214. pixeltable/utils/pydantic.py +60 -0
  215. pixeltable/utils/pytorch.py +20 -20
  216. pixeltable/utils/s3_store.py +527 -0
  217. pixeltable/utils/sql.py +32 -5
  218. pixeltable/utils/system.py +30 -0
  219. pixeltable/utils/transactional_directory.py +4 -3
  220. pixeltable-0.5.7.dist-info/METADATA +579 -0
  221. pixeltable-0.5.7.dist-info/RECORD +227 -0
  222. {pixeltable-0.2.26.dist-info → pixeltable-0.5.7.dist-info}/WHEEL +1 -1
  223. pixeltable-0.5.7.dist-info/entry_points.txt +2 -0
  224. pixeltable/__version__.py +0 -3
  225. pixeltable/catalog/named_function.py +0 -36
  226. pixeltable/catalog/path_dict.py +0 -141
  227. pixeltable/dataframe.py +0 -894
  228. pixeltable/exec/expr_eval_node.py +0 -232
  229. pixeltable/ext/__init__.py +0 -14
  230. pixeltable/ext/functions/__init__.py +0 -8
  231. pixeltable/ext/functions/whisperx.py +0 -77
  232. pixeltable/ext/functions/yolox.py +0 -157
  233. pixeltable/tool/create_test_db_dump.py +0 -311
  234. pixeltable/tool/create_test_video.py +0 -81
  235. pixeltable/tool/doc_plugins/griffe.py +0 -50
  236. pixeltable/tool/doc_plugins/mkdocstrings.py +0 -6
  237. pixeltable/tool/doc_plugins/templates/material/udf.html.jinja +0 -135
  238. pixeltable/tool/embed_udf.py +0 -9
  239. pixeltable/tool/mypy_plugin.py +0 -55
  240. pixeltable/utils/media_store.py +0 -76
  241. pixeltable/utils/s3.py +0 -16
  242. pixeltable-0.2.26.dist-info/METADATA +0 -400
  243. pixeltable-0.2.26.dist-info/RECORD +0 -156
  244. pixeltable-0.2.26.dist-info/entry_points.txt +0 -3
  245. {pixeltable-0.2.26.dist-info → pixeltable-0.5.7.dist-info/licenses}/LICENSE +0 -0
@@ -1,55 +1,167 @@
1
1
  """
2
- Pixeltable [UDFs](https://pixeltable.readme.io/docs/user-defined-functions-udfs)
2
+ Pixeltable UDFs
3
3
  that wrap various endpoints from the Anthropic API. In order to use them, you must
4
4
  first `pip install anthropic` and configure your Anthropic credentials, as described in
5
- the [Working with Anthropic](https://pixeltable.readme.io/docs/working-with-anthropic) tutorial.
5
+ the [Working with Anthropic](https://docs.pixeltable.com/notebooks/integrations/working-with-anthropic) tutorial.
6
6
  """
7
7
 
8
- from typing import TYPE_CHECKING, Any, Callable, Optional, TypeVar, Union
8
+ import datetime
9
+ import json
10
+ import logging
11
+ from typing import TYPE_CHECKING, Any, Iterable, cast
9
12
 
10
- import tenacity
13
+ import httpx
11
14
 
12
15
  import pixeltable as pxt
13
- from pixeltable import env
16
+ from pixeltable import env, exprs
17
+ from pixeltable.func import Tools
14
18
  from pixeltable.utils.code import local_public_names
19
+ from pixeltable.utils.http import exponential_backoff
15
20
 
16
21
  if TYPE_CHECKING:
17
22
  import anthropic
18
23
 
24
+ _logger = logging.getLogger('pixeltable')
25
+
19
26
 
20
27
  @env.register_client('anthropic')
21
- def _(api_key: str) -> 'anthropic.Anthropic':
28
+ def _(api_key: str) -> 'anthropic.AsyncAnthropic':
22
29
  import anthropic
23
- return anthropic.Anthropic(api_key=api_key)
30
+
31
+ return anthropic.AsyncAnthropic(
32
+ api_key=api_key,
33
+ # recommended to increase limits for async client to avoid connection errors
34
+ http_client=httpx.AsyncClient(limits=httpx.Limits(max_keepalive_connections=100, max_connections=500)),
35
+ )
24
36
 
25
37
 
26
- def _anthropic_client() -> 'anthropic.Anthropic':
38
+ def _anthropic_client() -> 'anthropic.AsyncAnthropic':
27
39
  return env.Env.get().get_client('anthropic')
28
40
 
29
41
 
30
- def _retry(fn: Callable) -> Callable:
31
- import anthropic
32
- return tenacity.retry(
33
- retry=tenacity.retry_if_exception_type(anthropic.RateLimitError),
34
- wait=tenacity.wait_random_exponential(multiplier=1, max=60),
35
- stop=tenacity.stop_after_attempt(20),
36
- )(fn)
42
+ def _get_header_info(
43
+ headers: httpx.Headers,
44
+ ) -> tuple[
45
+ tuple[int, int, datetime.datetime] | None,
46
+ tuple[int, int, datetime.datetime] | None,
47
+ tuple[int, int, datetime.datetime] | None,
48
+ ]:
49
+ """Extract rate limit info from Anthropic API response headers."""
50
+ requests_limit_str = headers.get('anthropic-ratelimit-requests-limit')
51
+ requests_limit = int(requests_limit_str) if requests_limit_str is not None else None
52
+ requests_remaining_str = headers.get('anthropic-ratelimit-requests-remaining')
53
+ requests_remaining = int(requests_remaining_str) if requests_remaining_str is not None else None
54
+ requests_reset_str = headers.get('anthropic-ratelimit-requests-reset')
55
+ requests_reset = (
56
+ datetime.datetime.fromisoformat(requests_reset_str.replace('Z', '+00:00')) if requests_reset_str else None
57
+ )
58
+ requests_info = (
59
+ (requests_limit, requests_remaining, requests_reset) if requests_reset and requests_remaining else None
60
+ )
61
+
62
+ input_tokens_limit_str = headers.get('anthropic-ratelimit-input-tokens-limit')
63
+ input_tokens_limit = int(input_tokens_limit_str) if input_tokens_limit_str is not None else None
64
+ input_tokens_remaining_str = headers.get('anthropic-ratelimit-input-tokens-remaining')
65
+ input_tokens_remaining = int(input_tokens_remaining_str) if input_tokens_remaining_str is not None else None
66
+ input_tokens_reset_str = headers.get('anthropic-ratelimit-input-tokens-reset')
67
+ input_tokens_reset = (
68
+ datetime.datetime.fromisoformat(input_tokens_reset_str.replace('Z', '+00:00'))
69
+ if input_tokens_reset_str
70
+ else None
71
+ )
72
+ input_tokens_info = (
73
+ (input_tokens_limit, input_tokens_remaining, input_tokens_reset)
74
+ if input_tokens_reset and input_tokens_remaining
75
+ else None
76
+ )
77
+
78
+ output_tokens_limit_str = headers.get('anthropic-ratelimit-output-tokens-limit')
79
+ output_tokens_limit = int(output_tokens_limit_str) if output_tokens_limit_str is not None else None
80
+ output_tokens_remaining_str = headers.get('anthropic-ratelimit-output-tokens-remaining')
81
+ output_tokens_remaining = int(output_tokens_remaining_str) if output_tokens_remaining_str is not None else None
82
+ output_tokens_reset_str = headers.get('anthropic-ratelimit-output-tokens-reset')
83
+ output_tokens_reset = (
84
+ datetime.datetime.fromisoformat(output_tokens_reset_str.replace('Z', '+00:00'))
85
+ if output_tokens_reset_str
86
+ else None
87
+ )
88
+ output_tokens_info = (
89
+ (output_tokens_limit, output_tokens_remaining, output_tokens_reset)
90
+ if output_tokens_reset and output_tokens_remaining
91
+ else None
92
+ )
93
+
94
+ if requests_info is None or input_tokens_info is None or output_tokens_info is None:
95
+ _logger.debug(f'get_header_info(): incomplete rate limit info: {headers}')
96
+
97
+ return requests_info, input_tokens_info, output_tokens_info
98
+
99
+
100
+ class AnthropicRateLimitsInfo(env.RateLimitsInfo):
101
+ def __init__(self) -> None:
102
+ super().__init__(self._get_request_resources)
103
+
104
+ def _get_request_resources(self, messages: dict, max_tokens: int) -> dict[str, int]:
105
+ input_len = 0
106
+ for message in messages:
107
+ if 'role' in message:
108
+ input_len += len(message['role'])
109
+ if 'content' in message:
110
+ input_len += len(message['content'])
111
+ return {'requests': 1, 'input_tokens': int(input_len / 4), 'output_tokens': max_tokens}
112
+
113
+ def record_exc(self, request_ts: datetime.datetime, exc: Exception) -> None:
114
+ import anthropic
115
+
116
+ if (
117
+ not isinstance(exc, anthropic.APIError)
118
+ or not hasattr(exc, 'response')
119
+ or not hasattr(exc.response, 'headers')
120
+ ):
121
+ return
122
+ requests_info, input_tokens_info, output_tokens_info = _get_header_info(exc.response.headers)
123
+ _logger.debug(
124
+ f'record_exc(): request_ts: {request_ts}, requests_info={requests_info} '
125
+ f'input_tokens_info={input_tokens_info} output_tokens_info={output_tokens_info}'
126
+ )
127
+ self.record(
128
+ request_ts=request_ts,
129
+ requests=requests_info,
130
+ input_tokens=input_tokens_info,
131
+ output_tokens=output_tokens_info,
132
+ )
133
+ self.has_exc = True
134
+
135
+ retry_after_str = exc.response.headers.get('retry-after')
136
+ if retry_after_str is not None:
137
+ _logger.debug(f'retry-after: {retry_after_str}')
138
+
139
+ def get_retry_delay(self, exc: Exception, attempt: int) -> float | None:
140
+ import anthropic
141
+
142
+ # deal with timeouts separately, they don't come with headers
143
+ if isinstance(exc, anthropic.APITimeoutError):
144
+ return exponential_backoff(attempt)
145
+
146
+ if not isinstance(exc, anthropic.APIStatusError):
147
+ return None
148
+ _logger.debug(f'headers={exc.response.headers}')
149
+ should_retry_str = exc.response.headers.get('x-should-retry', '')
150
+ if should_retry_str.lower() != 'true':
151
+ return None
152
+ return super().get_retry_delay(exc, attempt)
37
153
 
38
154
 
39
155
  @pxt.udf
40
- def messages(
156
+ async def messages(
41
157
  messages: list[dict[str, str]],
42
158
  *,
43
159
  model: str,
44
- max_tokens: int = 1024,
45
- metadata: Optional[dict[str, Any]] = None,
46
- stop_sequences: Optional[list[str]] = None,
47
- system: Optional[str] = None,
48
- temperature: Optional[float] = None,
49
- tool_choice: Optional[list[dict]] = None,
50
- tools: Optional[dict] = None,
51
- top_k: Optional[int] = None,
52
- top_p: Optional[float] = None,
160
+ max_tokens: int,
161
+ model_kwargs: dict[str, Any] | None = None,
162
+ tools: list[dict[str, Any]] | None = None,
163
+ tool_choice: dict[str, Any] | None = None,
164
+ _runtime_ctx: env.RuntimeCtx | None = None,
53
165
  ) -> dict:
54
166
  """
55
167
  Create a Message.
@@ -57,6 +169,10 @@ def messages(
57
169
  Equivalent to the Anthropic `messages` API endpoint.
58
170
  For additional details, see: <https://docs.anthropic.com/en/api/messages>
59
171
 
172
+ Request throttling:
173
+ Uses the rate limit-related headers returned by the API to throttle requests adaptively, based on available
174
+ request and token capacity. No configuration is necessary.
175
+
60
176
  __Requirements:__
61
177
 
62
178
  - `pip install anthropic`
@@ -64,44 +180,107 @@ def messages(
64
180
  Args:
65
181
  messages: Input messages.
66
182
  model: The model that will complete your prompt.
67
-
68
- For details on the other parameters, see: <https://docs.anthropic.com/en/api/messages>
183
+ model_kwargs: Additional keyword args for the Anthropic `messages` API.
184
+ For details on the available parameters, see: <https://docs.anthropic.com/en/api/messages>
185
+ tools: An optional list of Pixeltable tools to use for the request.
186
+ tool_choice: An optional tool choice configuration.
69
187
 
70
188
  Returns:
71
189
  A dictionary containing the response and other metadata.
72
190
 
73
191
  Examples:
74
- Add a computed column that applies the model `claude-3-haiku-20240307`
192
+ Add a computed column that applies the model `claude-3-5-sonnet-20241022`
75
193
  to an existing Pixeltable column `tbl.prompt` of the table `tbl`:
76
194
 
77
195
  >>> msgs = [{'role': 'user', 'content': tbl.prompt}]
78
- ... tbl['response'] = messages(msgs, model='claude-3-haiku-20240307')
196
+ ... tbl.add_computed_column(response=messages(msgs, model='claude-3-5-sonnet-20241022'))
79
197
  """
80
- return _retry(_anthropic_client().messages.create)(
81
- messages=messages,
82
- model=model,
83
- max_tokens=max_tokens,
84
- metadata=_opt(metadata),
85
- stop_sequences=_opt(stop_sequences),
86
- system=_opt(system),
87
- temperature=_opt(temperature),
88
- tool_choice=_opt(tool_choice),
89
- tools=_opt(tools),
90
- top_k=_opt(top_k),
91
- top_p=_opt(top_p),
92
- ).dict()
198
+ if model_kwargs is None:
199
+ model_kwargs = {}
93
200
 
201
+ if tools is not None:
202
+ # Reformat `tools` into Anthropic format
203
+ model_kwargs['tools'] = [
204
+ {
205
+ 'name': tool['name'],
206
+ 'description': tool['description'],
207
+ 'input_schema': {
208
+ 'type': 'object',
209
+ 'properties': tool['parameters']['properties'],
210
+ 'required': tool['required'],
211
+ },
212
+ }
213
+ for tool in tools
214
+ ]
94
215
 
95
- _T = TypeVar('_T')
216
+ if tool_choice is not None:
217
+ if tool_choice['auto']:
218
+ model_kwargs['tool_choice'] = {'type': 'auto'}
219
+ elif tool_choice['required']:
220
+ model_kwargs['tool_choice'] = {'type': 'any'}
221
+ else:
222
+ assert tool_choice['tool'] is not None
223
+ model_kwargs['tool_choice'] = {'type': 'tool', 'name': tool_choice['tool']}
224
+ if not tool_choice['parallel_tool_calls']:
225
+ model_kwargs['tool_choice']['disable_parallel_tool_use'] = True
96
226
 
227
+ # make sure the pool info exists prior to making the request
228
+ resource_pool_id = f'rate-limits:anthropic:{model}'
229
+ rate_limits_info = env.Env.get().get_resource_pool_info(resource_pool_id, AnthropicRateLimitsInfo)
230
+ assert isinstance(rate_limits_info, env.RateLimitsInfo)
97
231
 
98
- def _opt(arg: _T) -> Union[_T, 'anthropic.NotGiven']:
99
- import anthropic
100
- return arg if arg is not None else anthropic.NOT_GIVEN
232
+ # TODO: timeouts should be set system-wide and be user-configurable
233
+ from anthropic.types import MessageParam
234
+
235
+ start_ts = datetime.datetime.now(tz=datetime.timezone.utc)
236
+
237
+ result = await _anthropic_client().messages.with_raw_response.create(
238
+ messages=cast(Iterable[MessageParam], messages), model=model, max_tokens=max_tokens, **model_kwargs
239
+ )
240
+
241
+ requests_info, input_tokens_info, output_tokens_info = _get_header_info(result.headers)
242
+ # retry_after_str = result.headers.get('retry-after')
243
+ # if retry_after_str is not None:
244
+ # _logger.debug(f'retry-after: {retry_after_str}')
245
+ is_retry = _runtime_ctx is not None and _runtime_ctx.is_retry
246
+ rate_limits_info.record(
247
+ request_ts=start_ts,
248
+ requests=requests_info,
249
+ input_tokens=input_tokens_info,
250
+ output_tokens=output_tokens_info,
251
+ reset_exc=is_retry,
252
+ )
253
+
254
+ result_dict = json.loads(result.text)
255
+ return result_dict
256
+
257
+
258
+ @messages.resource_pool
259
+ def _(model: str) -> str:
260
+ return f'rate-limits:anthropic:{model}'
261
+
262
+
263
+ def invoke_tools(tools: Tools, response: exprs.Expr) -> exprs.InlineDict:
264
+ """Converts an Anthropic response dict to Pixeltable tool invocation format and calls `tools._invoke()`."""
265
+ return tools._invoke(_anthropic_response_to_pxt_tool_calls(response))
266
+
267
+
268
+ @pxt.udf
269
+ def _anthropic_response_to_pxt_tool_calls(response: dict) -> dict | None:
270
+ anthropic_tool_calls = [r for r in response['content'] if r['type'] == 'tool_use']
271
+ if len(anthropic_tool_calls) == 0:
272
+ return None
273
+ pxt_tool_calls: dict[str, list[dict[str, Any]]] = {}
274
+ for tool_call in anthropic_tool_calls:
275
+ tool_name = tool_call['name']
276
+ if tool_name not in pxt_tool_calls:
277
+ pxt_tool_calls[tool_name] = []
278
+ pxt_tool_calls[tool_name].append({'args': tool_call['input']})
279
+ return pxt_tool_calls
101
280
 
102
281
 
103
282
  __all__ = local_public_names(__name__)
104
283
 
105
284
 
106
- def __dir__():
285
+ def __dir__() -> list[str]:
107
286
  return __all__
@@ -1,30 +1,167 @@
1
1
  """
2
- Pixeltable [UDFs](https://pixeltable.readme.io/docs/user-defined-functions-udfs) for `AudioType`.
2
+ Pixeltable UDFs for `AudioType`.
3
+ """
3
4
 
4
- Example:
5
- ```python
6
- import pixeltable as pxt
7
- import pixeltable.functions as pxtf
5
+ from typing import Any
8
6
 
9
- t = pxt.get_table(...)
10
- t.select(pxtf.audio.get_metadata()).collect()
11
- ```
12
- """
7
+ import av
8
+ import numpy as np
13
9
 
14
10
  import pixeltable as pxt
11
+ import pixeltable.utils.av as av_utils
15
12
  from pixeltable.utils.code import local_public_names
13
+ from pixeltable.utils.local_store import TempStore
16
14
 
17
15
 
18
16
  @pxt.udf(is_method=True)
19
17
  def get_metadata(audio: pxt.Audio) -> dict:
20
18
  """
21
19
  Gets various metadata associated with an audio file and returns it as a dictionary.
20
+
21
+ Args:
22
+ audio: The audio to get metadata for.
23
+
24
+ Returns:
25
+ A `dict` such as the following:
26
+
27
+ ```json
28
+ {
29
+ 'size': 2568827,
30
+ 'streams': [
31
+ {
32
+ 'type': 'audio',
33
+ 'frames': 0,
34
+ 'duration': 2646000,
35
+ 'metadata': {},
36
+ 'time_base': 2.2675736961451248e-05,
37
+ 'codec_context': {
38
+ 'name': 'flac',
39
+ 'profile': None,
40
+ 'channels': 1,
41
+ 'codec_tag': '\\x00\\x00\\x00\\x00',
42
+ },
43
+ 'duration_seconds': 60.0,
44
+ }
45
+ ],
46
+ 'bit_rate': 342510,
47
+ 'metadata': {'encoder': 'Lavf61.1.100'},
48
+ 'bit_exact': False,
49
+ }
50
+ ```
51
+
52
+ Examples:
53
+ Extract metadata for files in the `audio_col` column of the table `tbl`:
54
+
55
+ >>> tbl.select(tbl.audio_col.get_metadata()).collect()
56
+ """
57
+ return av_utils.get_metadata(audio)
58
+
59
+
60
+ @pxt.udf()
61
+ def encode_audio(
62
+ audio_data: pxt.Array[pxt.Float], *, input_sample_rate: int, format: str, output_sample_rate: int | None = None
63
+ ) -> pxt.Audio:
64
+ """
65
+ Encodes an audio clip represented as an array into a specified audio format.
66
+
67
+ Parameters:
68
+ audio_data: An array of sampled amplitudes. The accepted array shapes are `(N,)` or `(1, N)` for mono audio
69
+ or `(2, N)` for stereo.
70
+ input_sample_rate: The sample rate of the input audio data.
71
+ format: The desired output audio format. The supported formats are 'wav', 'mp3', 'flac', and 'mp4'.
72
+ output_sample_rate: The desired sample rate for the output audio. Defaults to the input sample rate if
73
+ unspecified.
74
+
75
+ Examples:
76
+ Add a computed column with encoded FLAC audio files to a table with audio data (as arrays of floats) and sample
77
+ rates:
78
+
79
+ >>> t.add_computed_column(
80
+ ... audio_file=encode_audio(
81
+ ... t.audio_data, input_sample_rate=t.sample_rate, format='flac'
82
+ ... )
83
+ ... )
84
+ """
85
+ if format not in av_utils.AUDIO_FORMATS:
86
+ raise pxt.Error(f'Only the following formats are supported: {av_utils.AUDIO_FORMATS.keys()}')
87
+ if output_sample_rate is None:
88
+ output_sample_rate = input_sample_rate
89
+
90
+ codec, ext = av_utils.AUDIO_FORMATS[format]
91
+ output_path = str(TempStore.create_path(extension=f'.{ext}'))
92
+
93
+ match audio_data.shape:
94
+ case (_,):
95
+ # Mono audio as 1D array, reshape for pyav
96
+ layout = 'mono'
97
+ audio_data_transformed = audio_data[None, :]
98
+ case (1, _):
99
+ # Mono audio as 2D array, simply reshape and transpose the input for pyav
100
+ layout = 'mono'
101
+ audio_data_transformed = audio_data.reshape(-1, 1).transpose()
102
+ case (2, _):
103
+ # Stereo audio. Input layout: [[L0, L1, L2, ...],[R0, R1, R2, ...]],
104
+ # pyav expects: [L0, R0, L1, R1, L2, R2, ...]
105
+ layout = 'stereo'
106
+ audio_data_transformed = np.empty(audio_data.shape[1] * 2, dtype=audio_data.dtype)
107
+ audio_data_transformed[0::2] = audio_data[0]
108
+ audio_data_transformed[1::2] = audio_data[1]
109
+ audio_data_transformed = audio_data_transformed.reshape(1, -1)
110
+ case _:
111
+ raise pxt.Error(
112
+ f'Supported input array shapes are (N,), (1, N) for mono and (2, N) for stereo, got {audio_data.shape}'
113
+ )
114
+
115
+ with av.open(output_path, mode='w') as output_container:
116
+ stream = output_container.add_stream(codec, rate=output_sample_rate)
117
+ assert isinstance(stream, av.AudioStream)
118
+
119
+ frame = av.AudioFrame.from_ndarray(audio_data_transformed, format='flt', layout=layout)
120
+ frame.sample_rate = input_sample_rate
121
+
122
+ for packet in stream.encode(frame):
123
+ output_container.mux(packet)
124
+ for packet in stream.encode():
125
+ output_container.mux(packet)
126
+
127
+ return output_path
128
+
129
+
130
+ def audio_splitter(
131
+ audio: Any, chunk_duration_sec: float, *, overlap_sec: float = 0.0, min_chunk_duration_sec: float = 0.0
132
+ ) -> tuple[type[pxt.iterators.ComponentIterator], dict[str, Any]]:
133
+ """
134
+ Iterator over chunks of an audio file. The audio file is split into smaller chunks,
135
+ where the duration of each chunk is determined by chunk_duration_sec.
136
+ The iterator yields audio chunks as pxt.Audio, along with the start and end time of each chunk.
137
+ If the input contains no audio, no chunks are yielded.
138
+
139
+ Args:
140
+ chunk_duration_sec: Audio chunk duration in seconds
141
+ overlap_sec: Overlap between consecutive chunks in seconds
142
+ min_chunk_duration_sec: Drop the last chunk if it is smaller than min_chunk_duration_sec
143
+
144
+ Examples:
145
+ This example assumes an existing table `tbl` with a column `audio` of type `pxt.Audio`.
146
+
147
+ Create a view that splits all audio files into chunks of 30 seconds with 5 seconds overlap:
148
+
149
+ >>> pxt.create_view(
150
+ ... 'audio_chunks',
151
+ ... tbl,
152
+ ... iterator=audio_splitter(tbl.audio, chunk_duration_sec=30.0, overlap_sec=5.0)
153
+ ... )
22
154
  """
23
- return pxt.functions.video._get_metadata(audio)
155
+ kwargs: dict[str, Any] = {}
156
+ if overlap_sec != 0.0:
157
+ kwargs['overlap_sec'] = overlap_sec
158
+ if min_chunk_duration_sec != 0.0:
159
+ kwargs['min_chunk_duration_sec'] = min_chunk_duration_sec
160
+ return pxt.iterators.AudioSplitter._create(audio=audio, chunk_duration_sec=chunk_duration_sec, **kwargs)
24
161
 
25
162
 
26
163
  __all__ = local_public_names(__name__)
27
164
 
28
165
 
29
- def __dir__():
166
+ def __dir__() -> list[str]:
30
167
  return __all__
@@ -0,0 +1,137 @@
1
+ """
2
+ Pixeltable UDFs for AWS Bedrock AI models.
3
+
4
+ Provides integration with AWS Bedrock for accessing various foundation models
5
+ including Anthropic Claude, Amazon Titan, and other providers.
6
+ """
7
+
8
+ import logging
9
+ from typing import TYPE_CHECKING, Any
10
+
11
+ import pixeltable as pxt
12
+ from pixeltable import env, exprs
13
+ from pixeltable.func import Tools
14
+ from pixeltable.utils.code import local_public_names
15
+
16
+ if TYPE_CHECKING:
17
+ from botocore.client import BaseClient
18
+
19
+ _logger = logging.getLogger('pixeltable')
20
+
21
+
22
+ @env.register_client('bedrock')
23
+ def _() -> 'BaseClient':
24
+ import boto3
25
+
26
+ return boto3.client(service_name='bedrock-runtime')
27
+
28
+
29
+ # boto3 typing is weird; type information is dynamically defined, so the best we can do for the static checker is `Any`
30
+ def _bedrock_client() -> Any:
31
+ return env.Env.get().get_client('bedrock')
32
+
33
+
34
+ @pxt.udf
35
+ def converse(
36
+ messages: list[dict[str, Any]],
37
+ *,
38
+ model_id: str,
39
+ system: list[dict[str, Any]] | None = None,
40
+ inference_config: dict | None = None,
41
+ additional_model_request_fields: dict | None = None,
42
+ tool_config: list[dict] | None = None,
43
+ ) -> dict:
44
+ """
45
+ Generate a conversation response.
46
+
47
+ Equivalent to the AWS Bedrock `converse` API endpoint.
48
+ For additional details, see: <https://docs.aws.amazon.com/bedrock/latest/userguide/conversation-inference.html>
49
+
50
+ __Requirements:__
51
+
52
+ - `pip install boto3`
53
+
54
+ Args:
55
+ messages: Input messages.
56
+ model_id: The model that will complete your prompt.
57
+ system: An optional system prompt.
58
+ inference_config: Base inference parameters to use.
59
+ additional_model_request_fields: Additional inference parameters to use.
60
+
61
+ For details on the optional parameters, see:
62
+ <https://docs.aws.amazon.com/bedrock/latest/userguide/conversation-inference.html>
63
+
64
+ Returns:
65
+ A dictionary containing the response and other metadata.
66
+
67
+ Examples:
68
+ Add a computed column that applies the model `anthropic.claude-3-haiku-20240307-v1:0`
69
+ to an existing Pixeltable column `tbl.prompt` of the table `tbl`:
70
+
71
+ >>> msgs = [{'role': 'user', 'content': [{'text': tbl.prompt}]}]
72
+ ... tbl.add_computed_column(response=messages(msgs, model_id='anthropic.claude-3-haiku-20240307-v1:0'))
73
+ """
74
+
75
+ kwargs: dict[str, Any] = {'messages': messages, 'modelId': model_id}
76
+
77
+ if system is not None:
78
+ kwargs['system'] = system
79
+ if inference_config is not None:
80
+ kwargs['inferenceConfig'] = inference_config
81
+ if additional_model_request_fields is not None:
82
+ kwargs['additionalModelRequestFields'] = additional_model_request_fields
83
+
84
+ if tool_config is not None:
85
+ tool_config_ = {
86
+ 'tools': [
87
+ {
88
+ 'toolSpec': {
89
+ 'name': tool['name'],
90
+ 'description': tool['description'],
91
+ 'inputSchema': {
92
+ 'json': {
93
+ 'type': 'object',
94
+ 'properties': tool['parameters']['properties'],
95
+ 'required': tool['required'],
96
+ }
97
+ },
98
+ }
99
+ }
100
+ for tool in tool_config
101
+ ]
102
+ }
103
+ kwargs['toolConfig'] = tool_config_
104
+
105
+ return _bedrock_client().converse(**kwargs)
106
+
107
+
108
+ def invoke_tools(tools: Tools, response: exprs.Expr) -> exprs.InlineDict:
109
+ """Converts an Anthropic response dict to Pixeltable tool invocation format and calls `tools._invoke()`."""
110
+ return tools._invoke(_bedrock_response_to_pxt_tool_calls(response))
111
+
112
+
113
+ @pxt.udf
114
+ def _bedrock_response_to_pxt_tool_calls(response: dict) -> dict | None:
115
+ if response.get('stopReason') != 'tool_use':
116
+ return None
117
+
118
+ pxt_tool_calls: dict[str, list[dict[str, Any]]] = {}
119
+ for message in response['output']['message']['content']:
120
+ if 'toolUse' in message:
121
+ tool_call = message['toolUse']
122
+ tool_name = tool_call['name']
123
+ if tool_name not in pxt_tool_calls:
124
+ pxt_tool_calls[tool_name] = []
125
+ pxt_tool_calls[tool_name].append({'args': tool_call['input']})
126
+
127
+ if len(pxt_tool_calls) == 0:
128
+ return None
129
+
130
+ return pxt_tool_calls
131
+
132
+
133
+ __all__ = local_public_names(__name__)
134
+
135
+
136
+ def __dir__() -> list[str]:
137
+ return __all__