pixeltable 0.4.0rc3__py3-none-any.whl → 0.4.20__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +23 -5
- pixeltable/_version.py +1 -0
- pixeltable/catalog/__init__.py +5 -3
- pixeltable/catalog/catalog.py +1318 -404
- pixeltable/catalog/column.py +186 -115
- pixeltable/catalog/dir.py +1 -2
- pixeltable/catalog/globals.py +11 -43
- pixeltable/catalog/insertable_table.py +167 -79
- pixeltable/catalog/path.py +61 -23
- pixeltable/catalog/schema_object.py +9 -10
- pixeltable/catalog/table.py +626 -308
- pixeltable/catalog/table_metadata.py +101 -0
- pixeltable/catalog/table_version.py +713 -569
- pixeltable/catalog/table_version_handle.py +37 -6
- pixeltable/catalog/table_version_path.py +42 -29
- pixeltable/catalog/tbl_ops.py +50 -0
- pixeltable/catalog/update_status.py +191 -0
- pixeltable/catalog/view.py +108 -94
- pixeltable/config.py +128 -22
- pixeltable/dataframe.py +188 -100
- pixeltable/env.py +407 -136
- pixeltable/exceptions.py +6 -0
- pixeltable/exec/__init__.py +3 -0
- pixeltable/exec/aggregation_node.py +7 -8
- pixeltable/exec/cache_prefetch_node.py +83 -110
- pixeltable/exec/cell_materialization_node.py +231 -0
- pixeltable/exec/cell_reconstruction_node.py +135 -0
- pixeltable/exec/component_iteration_node.py +4 -3
- pixeltable/exec/data_row_batch.py +8 -65
- pixeltable/exec/exec_context.py +16 -4
- pixeltable/exec/exec_node.py +13 -36
- pixeltable/exec/expr_eval/evaluators.py +7 -6
- pixeltable/exec/expr_eval/expr_eval_node.py +27 -12
- pixeltable/exec/expr_eval/globals.py +8 -5
- pixeltable/exec/expr_eval/row_buffer.py +1 -2
- pixeltable/exec/expr_eval/schedulers.py +190 -30
- pixeltable/exec/globals.py +32 -0
- pixeltable/exec/in_memory_data_node.py +18 -18
- pixeltable/exec/object_store_save_node.py +293 -0
- pixeltable/exec/row_update_node.py +16 -9
- pixeltable/exec/sql_node.py +206 -101
- pixeltable/exprs/__init__.py +1 -1
- pixeltable/exprs/arithmetic_expr.py +27 -22
- pixeltable/exprs/array_slice.py +3 -3
- pixeltable/exprs/column_property_ref.py +34 -30
- pixeltable/exprs/column_ref.py +92 -96
- pixeltable/exprs/comparison.py +5 -5
- pixeltable/exprs/compound_predicate.py +5 -4
- pixeltable/exprs/data_row.py +152 -55
- pixeltable/exprs/expr.py +62 -43
- pixeltable/exprs/expr_dict.py +3 -3
- pixeltable/exprs/expr_set.py +17 -10
- pixeltable/exprs/function_call.py +75 -37
- pixeltable/exprs/globals.py +1 -2
- pixeltable/exprs/in_predicate.py +4 -4
- pixeltable/exprs/inline_expr.py +10 -27
- pixeltable/exprs/is_null.py +1 -3
- pixeltable/exprs/json_mapper.py +8 -8
- pixeltable/exprs/json_path.py +56 -22
- pixeltable/exprs/literal.py +5 -5
- pixeltable/exprs/method_ref.py +2 -2
- pixeltable/exprs/object_ref.py +2 -2
- pixeltable/exprs/row_builder.py +127 -53
- pixeltable/exprs/rowid_ref.py +8 -12
- pixeltable/exprs/similarity_expr.py +50 -25
- pixeltable/exprs/sql_element_cache.py +4 -4
- pixeltable/exprs/string_op.py +5 -5
- pixeltable/exprs/type_cast.py +3 -5
- pixeltable/func/__init__.py +1 -0
- pixeltable/func/aggregate_function.py +8 -8
- pixeltable/func/callable_function.py +9 -9
- pixeltable/func/expr_template_function.py +10 -10
- pixeltable/func/function.py +18 -20
- pixeltable/func/function_registry.py +6 -7
- pixeltable/func/globals.py +2 -3
- pixeltable/func/mcp.py +74 -0
- pixeltable/func/query_template_function.py +20 -18
- pixeltable/func/signature.py +43 -16
- pixeltable/func/tools.py +23 -13
- pixeltable/func/udf.py +18 -20
- pixeltable/functions/__init__.py +6 -0
- pixeltable/functions/anthropic.py +93 -33
- pixeltable/functions/audio.py +114 -10
- pixeltable/functions/bedrock.py +13 -6
- pixeltable/functions/date.py +1 -1
- pixeltable/functions/deepseek.py +20 -9
- pixeltable/functions/fireworks.py +2 -2
- pixeltable/functions/gemini.py +28 -11
- pixeltable/functions/globals.py +13 -13
- pixeltable/functions/groq.py +108 -0
- pixeltable/functions/huggingface.py +1046 -23
- pixeltable/functions/image.py +9 -18
- pixeltable/functions/llama_cpp.py +23 -8
- pixeltable/functions/math.py +3 -4
- pixeltable/functions/mistralai.py +4 -15
- pixeltable/functions/ollama.py +16 -9
- pixeltable/functions/openai.py +104 -82
- pixeltable/functions/openrouter.py +143 -0
- pixeltable/functions/replicate.py +2 -2
- pixeltable/functions/reve.py +250 -0
- pixeltable/functions/string.py +21 -28
- pixeltable/functions/timestamp.py +13 -14
- pixeltable/functions/together.py +4 -6
- pixeltable/functions/twelvelabs.py +92 -0
- pixeltable/functions/util.py +6 -1
- pixeltable/functions/video.py +1388 -106
- pixeltable/functions/vision.py +7 -7
- pixeltable/functions/whisper.py +15 -7
- pixeltable/functions/whisperx.py +179 -0
- pixeltable/{ext/functions → functions}/yolox.py +2 -4
- pixeltable/globals.py +332 -105
- pixeltable/index/base.py +13 -22
- pixeltable/index/btree.py +23 -22
- pixeltable/index/embedding_index.py +32 -44
- pixeltable/io/__init__.py +4 -2
- pixeltable/io/datarows.py +7 -6
- pixeltable/io/external_store.py +49 -77
- pixeltable/io/fiftyone.py +11 -11
- pixeltable/io/globals.py +29 -28
- pixeltable/io/hf_datasets.py +17 -9
- pixeltable/io/label_studio.py +70 -66
- pixeltable/io/lancedb.py +3 -0
- pixeltable/io/pandas.py +12 -11
- pixeltable/io/parquet.py +13 -93
- pixeltable/io/table_data_conduit.py +71 -47
- pixeltable/io/utils.py +3 -3
- pixeltable/iterators/__init__.py +2 -1
- pixeltable/iterators/audio.py +21 -11
- pixeltable/iterators/document.py +116 -55
- pixeltable/iterators/image.py +5 -2
- pixeltable/iterators/video.py +293 -13
- pixeltable/metadata/__init__.py +4 -2
- pixeltable/metadata/converters/convert_18.py +2 -2
- pixeltable/metadata/converters/convert_19.py +2 -2
- pixeltable/metadata/converters/convert_20.py +2 -2
- pixeltable/metadata/converters/convert_21.py +2 -2
- pixeltable/metadata/converters/convert_22.py +2 -2
- pixeltable/metadata/converters/convert_24.py +2 -2
- pixeltable/metadata/converters/convert_25.py +2 -2
- pixeltable/metadata/converters/convert_26.py +2 -2
- pixeltable/metadata/converters/convert_29.py +4 -4
- pixeltable/metadata/converters/convert_34.py +2 -2
- pixeltable/metadata/converters/convert_36.py +2 -2
- pixeltable/metadata/converters/convert_37.py +15 -0
- pixeltable/metadata/converters/convert_38.py +39 -0
- pixeltable/metadata/converters/convert_39.py +124 -0
- pixeltable/metadata/converters/convert_40.py +73 -0
- pixeltable/metadata/converters/util.py +13 -12
- pixeltable/metadata/notes.py +4 -0
- pixeltable/metadata/schema.py +79 -42
- pixeltable/metadata/utils.py +74 -0
- pixeltable/mypy/__init__.py +3 -0
- pixeltable/mypy/mypy_plugin.py +123 -0
- pixeltable/plan.py +274 -223
- pixeltable/share/__init__.py +1 -1
- pixeltable/share/packager.py +259 -129
- pixeltable/share/protocol/__init__.py +34 -0
- pixeltable/share/protocol/common.py +170 -0
- pixeltable/share/protocol/operation_types.py +33 -0
- pixeltable/share/protocol/replica.py +109 -0
- pixeltable/share/publish.py +213 -57
- pixeltable/store.py +238 -175
- pixeltable/type_system.py +104 -63
- pixeltable/utils/__init__.py +2 -3
- pixeltable/utils/arrow.py +108 -13
- pixeltable/utils/av.py +298 -0
- pixeltable/utils/azure_store.py +305 -0
- pixeltable/utils/code.py +3 -3
- pixeltable/utils/console_output.py +4 -1
- pixeltable/utils/coroutine.py +6 -23
- pixeltable/utils/dbms.py +31 -5
- pixeltable/utils/description_helper.py +4 -5
- pixeltable/utils/documents.py +5 -6
- pixeltable/utils/exception_handler.py +7 -30
- pixeltable/utils/filecache.py +6 -6
- pixeltable/utils/formatter.py +4 -6
- pixeltable/utils/gcs_store.py +283 -0
- pixeltable/utils/http_server.py +2 -3
- pixeltable/utils/iceberg.py +1 -2
- pixeltable/utils/image.py +17 -0
- pixeltable/utils/lancedb.py +88 -0
- pixeltable/utils/local_store.py +316 -0
- pixeltable/utils/misc.py +5 -0
- pixeltable/utils/object_stores.py +528 -0
- pixeltable/utils/pydantic.py +60 -0
- pixeltable/utils/pytorch.py +5 -6
- pixeltable/utils/s3_store.py +392 -0
- pixeltable-0.4.20.dist-info/METADATA +587 -0
- pixeltable-0.4.20.dist-info/RECORD +218 -0
- {pixeltable-0.4.0rc3.dist-info → pixeltable-0.4.20.dist-info}/WHEEL +1 -1
- pixeltable-0.4.20.dist-info/entry_points.txt +2 -0
- pixeltable/__version__.py +0 -3
- pixeltable/ext/__init__.py +0 -17
- pixeltable/ext/functions/__init__.py +0 -11
- pixeltable/ext/functions/whisperx.py +0 -77
- pixeltable/utils/media_store.py +0 -77
- pixeltable/utils/s3.py +0 -17
- pixeltable/utils/sample.py +0 -25
- pixeltable-0.4.0rc3.dist-info/METADATA +0 -435
- pixeltable-0.4.0rc3.dist-info/RECORD +0 -189
- pixeltable-0.4.0rc3.dist-info/entry_points.txt +0 -3
- {pixeltable-0.4.0rc3.dist-info → pixeltable-0.4.20.dist-info/licenses}/LICENSE +0 -0
pixeltable/functions/bedrock.py
CHANGED
|
@@ -1,5 +1,12 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Pixeltable UDFs for AWS Bedrock AI models.
|
|
3
|
+
|
|
4
|
+
Provides integration with AWS Bedrock for accessing various foundation models
|
|
5
|
+
including Anthropic Claude, Amazon Titan, and other providers.
|
|
6
|
+
"""
|
|
7
|
+
|
|
1
8
|
import logging
|
|
2
|
-
from typing import TYPE_CHECKING, Any
|
|
9
|
+
from typing import TYPE_CHECKING, Any
|
|
3
10
|
|
|
4
11
|
import pixeltable as pxt
|
|
5
12
|
from pixeltable import env, exprs
|
|
@@ -29,10 +36,10 @@ def converse(
|
|
|
29
36
|
messages: list[dict[str, Any]],
|
|
30
37
|
*,
|
|
31
38
|
model_id: str,
|
|
32
|
-
system:
|
|
33
|
-
inference_config:
|
|
34
|
-
additional_model_request_fields:
|
|
35
|
-
tool_config:
|
|
39
|
+
system: list[dict[str, Any]] | None = None,
|
|
40
|
+
inference_config: dict | None = None,
|
|
41
|
+
additional_model_request_fields: dict | None = None,
|
|
42
|
+
tool_config: list[dict] | None = None,
|
|
36
43
|
) -> dict:
|
|
37
44
|
"""
|
|
38
45
|
Generate a conversation response.
|
|
@@ -104,7 +111,7 @@ def invoke_tools(tools: Tools, response: exprs.Expr) -> exprs.InlineDict:
|
|
|
104
111
|
|
|
105
112
|
|
|
106
113
|
@pxt.udf
|
|
107
|
-
def _bedrock_response_to_pxt_tool_calls(response: dict) ->
|
|
114
|
+
def _bedrock_response_to_pxt_tool_calls(response: dict) -> dict | None:
|
|
108
115
|
if response.get('stopReason') != 'tool_use':
|
|
109
116
|
return None
|
|
110
117
|
|
pixeltable/functions/date.py
CHANGED
|
@@ -83,7 +83,7 @@ def make_date(year: int, month: int, day: int) -> date:
|
|
|
83
83
|
|
|
84
84
|
@make_date.to_sql
|
|
85
85
|
def _(year: sql.ColumnElement, month: sql.ColumnElement, day: sql.ColumnElement) -> sql.ColumnElement:
|
|
86
|
-
return sql.func.make_date(
|
|
86
|
+
return sql.func.make_date(year.cast(sql.Integer), month.cast(sql.Integer), day.cast(sql.Integer))
|
|
87
87
|
|
|
88
88
|
|
|
89
89
|
@pxt.udf(is_method=True)
|
pixeltable/functions/deepseek.py
CHANGED
|
@@ -1,5 +1,12 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Pixeltable UDFs for Deepseek AI models.
|
|
3
|
+
|
|
4
|
+
Provides integration with Deepseek's language models for chat completions
|
|
5
|
+
and other AI capabilities.
|
|
6
|
+
"""
|
|
7
|
+
|
|
1
8
|
import json
|
|
2
|
-
from typing import TYPE_CHECKING, Any
|
|
9
|
+
from typing import TYPE_CHECKING, Any
|
|
3
10
|
|
|
4
11
|
import httpx
|
|
5
12
|
|
|
@@ -26,14 +33,14 @@ def _deepseek_client() -> 'openai.AsyncOpenAI':
|
|
|
26
33
|
return env.Env.get().get_client('deepseek')
|
|
27
34
|
|
|
28
35
|
|
|
29
|
-
@pxt.udf
|
|
36
|
+
@pxt.udf(resource_pool='request-rate:deepseek')
|
|
30
37
|
async def chat_completions(
|
|
31
38
|
messages: list,
|
|
32
39
|
*,
|
|
33
40
|
model: str,
|
|
34
|
-
model_kwargs:
|
|
35
|
-
tools:
|
|
36
|
-
tool_choice:
|
|
41
|
+
model_kwargs: dict[str, Any] | None = None,
|
|
42
|
+
tools: list[dict[str, Any]] | None = None,
|
|
43
|
+
tool_choice: dict[str, Any] | None = None,
|
|
37
44
|
) -> dict:
|
|
38
45
|
"""
|
|
39
46
|
Creates a model response for the given chat conversation.
|
|
@@ -43,6 +50,10 @@ async def chat_completions(
|
|
|
43
50
|
|
|
44
51
|
Deepseek uses the OpenAI SDK, so you will need to install the `openai` package to use this UDF.
|
|
45
52
|
|
|
53
|
+
Request throttling:
|
|
54
|
+
Applies the rate limit set in the config (section `deepseek`, key `rate_limit`). If no rate
|
|
55
|
+
limit is configured, uses a default of 600 RPM.
|
|
56
|
+
|
|
46
57
|
__Requirements:__
|
|
47
58
|
|
|
48
59
|
- `pip install openai`
|
|
@@ -63,10 +74,10 @@ async def chat_completions(
|
|
|
63
74
|
of the table `tbl`:
|
|
64
75
|
|
|
65
76
|
>>> messages = [
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
77
|
+
... {'role': 'system', 'content': 'You are a helpful assistant.'},
|
|
78
|
+
... {'role': 'user', 'content': tbl.prompt}
|
|
79
|
+
... ]
|
|
80
|
+
>>> tbl.add_computed_column(response=chat_completions(messages, model='deepseek-chat'))
|
|
70
81
|
"""
|
|
71
82
|
if model_kwargs is None:
|
|
72
83
|
model_kwargs = {}
|
|
@@ -5,7 +5,7 @@ first `pip install fireworks-ai` and configure your Fireworks AI credentials, as
|
|
|
5
5
|
the [Working with Fireworks](https://pixeltable.readme.io/docs/working-with-fireworks) tutorial.
|
|
6
6
|
"""
|
|
7
7
|
|
|
8
|
-
from typing import TYPE_CHECKING, Any
|
|
8
|
+
from typing import TYPE_CHECKING, Any
|
|
9
9
|
|
|
10
10
|
import pixeltable as pxt
|
|
11
11
|
from pixeltable import env
|
|
@@ -29,7 +29,7 @@ def _fireworks_client() -> 'fireworks.client.Fireworks':
|
|
|
29
29
|
|
|
30
30
|
@pxt.udf(resource_pool='request-rate:fireworks')
|
|
31
31
|
async def chat_completions(
|
|
32
|
-
messages: list[dict[str, str]], *, model: str, model_kwargs:
|
|
32
|
+
messages: list[dict[str, str]], *, model: str, model_kwargs: dict[str, Any] | None = None
|
|
33
33
|
) -> dict:
|
|
34
34
|
"""
|
|
35
35
|
Creates a model response for the given chat conversation.
|
pixeltable/functions/gemini.py
CHANGED
|
@@ -7,14 +7,15 @@ the [Working with Gemini](https://pixeltable.readme.io/docs/working-with-gemini)
|
|
|
7
7
|
|
|
8
8
|
import asyncio
|
|
9
9
|
import io
|
|
10
|
-
import tempfile
|
|
11
10
|
from pathlib import Path
|
|
12
|
-
from typing import TYPE_CHECKING
|
|
11
|
+
from typing import TYPE_CHECKING
|
|
13
12
|
|
|
14
13
|
import PIL.Image
|
|
15
14
|
|
|
16
15
|
import pixeltable as pxt
|
|
17
16
|
from pixeltable import env, exceptions as excs, exprs
|
|
17
|
+
from pixeltable.utils.code import local_public_names
|
|
18
|
+
from pixeltable.utils.local_store import TempStore
|
|
18
19
|
|
|
19
20
|
if TYPE_CHECKING:
|
|
20
21
|
from google import genai
|
|
@@ -33,14 +34,14 @@ def _genai_client() -> 'genai.client.Client':
|
|
|
33
34
|
|
|
34
35
|
@pxt.udf(resource_pool='request-rate:gemini')
|
|
35
36
|
async def generate_content(
|
|
36
|
-
contents: str, *, model: str, config:
|
|
37
|
+
contents: str, *, model: str, config: dict | None = None, tools: list[dict] | None = None
|
|
37
38
|
) -> dict:
|
|
38
39
|
"""
|
|
39
40
|
Generate content from the specified model. For additional details, see:
|
|
40
41
|
<https://ai.google.dev/gemini-api/docs/text-generation>
|
|
41
42
|
|
|
42
43
|
Request throttling:
|
|
43
|
-
Applies the rate limit set in the config (section `gemini
|
|
44
|
+
Applies the rate limit set in the config (section `gemini.rate_limits`; use the model id as the key). If no rate
|
|
44
45
|
limit is configured, uses a default of 600 RPM.
|
|
45
46
|
|
|
46
47
|
__Requirements:__
|
|
@@ -102,7 +103,7 @@ def invoke_tools(tools: pxt.func.Tools, response: exprs.Expr) -> exprs.InlineDic
|
|
|
102
103
|
|
|
103
104
|
|
|
104
105
|
@pxt.udf
|
|
105
|
-
def _gemini_response_to_pxt_tool_calls(response: dict) ->
|
|
106
|
+
def _gemini_response_to_pxt_tool_calls(response: dict) -> dict | None:
|
|
106
107
|
pxt_tool_calls: dict[str, list[dict]] = {}
|
|
107
108
|
for part in response['candidates'][0]['content']['parts']:
|
|
108
109
|
tool_call = part.get('function_call')
|
|
@@ -122,11 +123,15 @@ def _(model: str) -> str:
|
|
|
122
123
|
|
|
123
124
|
|
|
124
125
|
@pxt.udf(resource_pool='request-rate:imagen')
|
|
125
|
-
async def generate_images(prompt: str, *, model: str, config:
|
|
126
|
+
async def generate_images(prompt: str, *, model: str, config: dict | None = None) -> PIL.Image.Image:
|
|
126
127
|
"""
|
|
127
128
|
Generates images based on a text description and configuration. For additional details, see:
|
|
128
129
|
<https://ai.google.dev/gemini-api/docs/image-generation>
|
|
129
130
|
|
|
131
|
+
Request throttling:
|
|
132
|
+
Applies the rate limit set in the config (section `imagen.rate_limits`; use the model id as the key). If no rate
|
|
133
|
+
limit is configured, uses a default of 600 RPM.
|
|
134
|
+
|
|
130
135
|
__Requirements:__
|
|
131
136
|
|
|
132
137
|
- `pip install google-genai`
|
|
@@ -162,12 +167,16 @@ def _(model: str) -> str:
|
|
|
162
167
|
|
|
163
168
|
@pxt.udf(resource_pool='request-rate:veo')
|
|
164
169
|
async def generate_videos(
|
|
165
|
-
prompt:
|
|
170
|
+
prompt: str | None = None, image: PIL.Image.Image | None = None, *, model: str, config: dict | None = None
|
|
166
171
|
) -> pxt.Video:
|
|
167
172
|
"""
|
|
168
173
|
Generates videos based on a text description and configuration. For additional details, see:
|
|
169
174
|
<https://ai.google.dev/gemini-api/docs/video-generation>
|
|
170
175
|
|
|
176
|
+
Request throttling:
|
|
177
|
+
Applies the rate limit set in the config (section `veo.rate_limits`; use the model id as the key). If no rate
|
|
178
|
+
limit is configured, uses a default of 600 RPM.
|
|
179
|
+
|
|
171
180
|
__Requirements:__
|
|
172
181
|
|
|
173
182
|
- `pip install google-genai`
|
|
@@ -196,7 +205,7 @@ async def generate_videos(
|
|
|
196
205
|
if prompt is None and image is None:
|
|
197
206
|
raise excs.Error('At least one of `prompt` or `image` must be provided.')
|
|
198
207
|
|
|
199
|
-
image_:
|
|
208
|
+
image_: types.Image | None = None
|
|
200
209
|
if image is not None:
|
|
201
210
|
with io.BytesIO() as buffer:
|
|
202
211
|
image.save(buffer, format='jpeg')
|
|
@@ -215,11 +224,19 @@ async def generate_videos(
|
|
|
215
224
|
video_bytes = await _genai_client().aio.files.download(file=video.video) # type: ignore[arg-type]
|
|
216
225
|
assert video_bytes is not None
|
|
217
226
|
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
227
|
+
# Create a temporary file to store the video bytes
|
|
228
|
+
output_path = TempStore.create_path(extension='.mp4')
|
|
229
|
+
Path(output_path).write_bytes(video_bytes)
|
|
230
|
+
return str(output_path)
|
|
221
231
|
|
|
222
232
|
|
|
223
233
|
@generate_videos.resource_pool
|
|
224
234
|
def _(model: str) -> str:
|
|
225
235
|
return f'request-rate:veo:{model}'
|
|
236
|
+
|
|
237
|
+
|
|
238
|
+
__all__ = local_public_names(__name__)
|
|
239
|
+
|
|
240
|
+
|
|
241
|
+
def __dir__() -> list[str]:
|
|
242
|
+
return __all__
|
pixeltable/functions/globals.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import builtins
|
|
2
2
|
import typing
|
|
3
|
-
from typing import Any, Callable
|
|
3
|
+
from typing import Any, Callable
|
|
4
4
|
|
|
5
5
|
import sqlalchemy as sql
|
|
6
6
|
|
|
@@ -11,7 +11,7 @@ from typing import _GenericAlias # type: ignore[attr-defined] # isort: skip
|
|
|
11
11
|
|
|
12
12
|
|
|
13
13
|
# TODO: remove and replace calls with astype()
|
|
14
|
-
def cast(expr: exprs.Expr, target_type:
|
|
14
|
+
def cast(expr: exprs.Expr, target_type: ts.ColumnType | type | _GenericAlias) -> exprs.Expr:
|
|
15
15
|
expr.col_type = ts.ColumnType.normalize_type(target_type)
|
|
16
16
|
return expr
|
|
17
17
|
|
|
@@ -19,7 +19,7 @@ def cast(expr: exprs.Expr, target_type: Union[ts.ColumnType, type, _GenericAlias
|
|
|
19
19
|
T = typing.TypeVar('T')
|
|
20
20
|
|
|
21
21
|
|
|
22
|
-
@func.uda(allows_window=True, type_substitutions=({T:
|
|
22
|
+
@func.uda(allows_window=True, type_substitutions=({T: int | None}, {T: float | None})) # type: ignore[misc]
|
|
23
23
|
class sum(func.Aggregator, typing.Generic[T]):
|
|
24
24
|
"""Sums the selected integers or floats."""
|
|
25
25
|
|
|
@@ -39,7 +39,7 @@ class sum(func.Aggregator, typing.Generic[T]):
|
|
|
39
39
|
|
|
40
40
|
|
|
41
41
|
@sum.to_sql
|
|
42
|
-
def _(val: sql.ColumnElement) ->
|
|
42
|
+
def _(val: sql.ColumnElement) -> sql.ColumnElement | None:
|
|
43
43
|
# This can produce a Decimal. We are deliberately avoiding an explicit cast to a Bigint here, because that can
|
|
44
44
|
# cause overflows in Postgres. We're instead doing the conversion to the target type in SqlNode.__iter__().
|
|
45
45
|
return sql.sql.func.sum(val)
|
|
@@ -49,7 +49,7 @@ def _(val: sql.ColumnElement) -> Optional[sql.ColumnElement]:
|
|
|
49
49
|
allows_window=True,
|
|
50
50
|
# Allow counting non-null values of any type
|
|
51
51
|
# TODO: should we have an "Any" type that can be used here?
|
|
52
|
-
type_substitutions=tuple({T:
|
|
52
|
+
type_substitutions=tuple({T: t | None} for t in ts.ALL_PIXELTABLE_TYPES), # type: ignore[misc]
|
|
53
53
|
)
|
|
54
54
|
class count(func.Aggregator, typing.Generic[T]):
|
|
55
55
|
def __init__(self) -> None:
|
|
@@ -64,13 +64,13 @@ class count(func.Aggregator, typing.Generic[T]):
|
|
|
64
64
|
|
|
65
65
|
|
|
66
66
|
@count.to_sql
|
|
67
|
-
def _(val: sql.ColumnElement) ->
|
|
67
|
+
def _(val: sql.ColumnElement) -> sql.ColumnElement | None:
|
|
68
68
|
return sql.sql.func.count(val)
|
|
69
69
|
|
|
70
70
|
|
|
71
71
|
@func.uda(
|
|
72
72
|
allows_window=True,
|
|
73
|
-
type_substitutions=tuple({T:
|
|
73
|
+
type_substitutions=tuple({T: t | None} for t in (str, int, float, bool, ts.Timestamp)), # type: ignore[misc]
|
|
74
74
|
)
|
|
75
75
|
class min(func.Aggregator, typing.Generic[T]):
|
|
76
76
|
def __init__(self) -> None:
|
|
@@ -89,7 +89,7 @@ class min(func.Aggregator, typing.Generic[T]):
|
|
|
89
89
|
|
|
90
90
|
|
|
91
91
|
@min.to_sql
|
|
92
|
-
def _(val: sql.ColumnElement) ->
|
|
92
|
+
def _(val: sql.ColumnElement) -> sql.ColumnElement | None:
|
|
93
93
|
if val.type.python_type is bool:
|
|
94
94
|
# TODO: min/max aggregation of booleans is not supported in Postgres (but it is in Python).
|
|
95
95
|
# Right now we simply force the computation to be done in Python; we might consider implementing an alternate
|
|
@@ -100,7 +100,7 @@ def _(val: sql.ColumnElement) -> Optional[sql.ColumnElement]:
|
|
|
100
100
|
|
|
101
101
|
@func.uda(
|
|
102
102
|
allows_window=True,
|
|
103
|
-
type_substitutions=tuple({T:
|
|
103
|
+
type_substitutions=tuple({T: t | None} for t in (str, int, float, bool, ts.Timestamp)), # type: ignore[misc]
|
|
104
104
|
)
|
|
105
105
|
class max(func.Aggregator, typing.Generic[T]):
|
|
106
106
|
def __init__(self) -> None:
|
|
@@ -119,14 +119,14 @@ class max(func.Aggregator, typing.Generic[T]):
|
|
|
119
119
|
|
|
120
120
|
|
|
121
121
|
@max.to_sql
|
|
122
|
-
def _(val: sql.ColumnElement) ->
|
|
122
|
+
def _(val: sql.ColumnElement) -> sql.ColumnElement | None:
|
|
123
123
|
if val.type.python_type is bool:
|
|
124
124
|
# TODO: see comment in @min.to_sql.
|
|
125
125
|
return None
|
|
126
126
|
return sql.sql.func.max(val)
|
|
127
127
|
|
|
128
128
|
|
|
129
|
-
@func.uda(type_substitutions=({T:
|
|
129
|
+
@func.uda(type_substitutions=({T: int | None}, {T: float | None})) # type: ignore[misc]
|
|
130
130
|
class mean(func.Aggregator, typing.Generic[T]):
|
|
131
131
|
def __init__(self) -> None:
|
|
132
132
|
self.sum: T = None
|
|
@@ -141,14 +141,14 @@ class mean(func.Aggregator, typing.Generic[T]):
|
|
|
141
141
|
self.sum += val # type: ignore[operator]
|
|
142
142
|
self.count += 1
|
|
143
143
|
|
|
144
|
-
def value(self) ->
|
|
144
|
+
def value(self) -> float | None: # Always a float
|
|
145
145
|
if self.count == 0:
|
|
146
146
|
return None
|
|
147
147
|
return self.sum / self.count # type: ignore[operator]
|
|
148
148
|
|
|
149
149
|
|
|
150
150
|
@mean.to_sql
|
|
151
|
-
def _(val: sql.ColumnElement) ->
|
|
151
|
+
def _(val: sql.ColumnElement) -> sql.ColumnElement | None:
|
|
152
152
|
return sql.sql.func.avg(val)
|
|
153
153
|
|
|
154
154
|
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Pixeltable [UDFs](https://pixeltable.readme.io/docs/user-defined-functions-udfs)
|
|
3
|
+
that wrap various endpoints from the Groq API. In order to use them, you must
|
|
4
|
+
first `pip install groq` and configure your Groq credentials, as described in
|
|
5
|
+
the [Working with Groq](https://pixeltable.readme.io/docs/working-with-groq) tutorial.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from typing import TYPE_CHECKING, Any
|
|
9
|
+
|
|
10
|
+
import pixeltable as pxt
|
|
11
|
+
from pixeltable import exprs
|
|
12
|
+
from pixeltable.env import Env, register_client
|
|
13
|
+
from pixeltable.utils.code import local_public_names
|
|
14
|
+
|
|
15
|
+
from .openai import _openai_response_to_pxt_tool_calls
|
|
16
|
+
|
|
17
|
+
if TYPE_CHECKING:
|
|
18
|
+
import groq
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@register_client('groq')
|
|
22
|
+
def _(api_key: str) -> 'groq.AsyncGroq':
|
|
23
|
+
import groq
|
|
24
|
+
|
|
25
|
+
return groq.AsyncGroq(api_key=api_key)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _groq_client() -> 'groq.AsyncGroq':
|
|
29
|
+
return Env.get().get_client('groq')
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@pxt.udf(resource_pool='request-rate:groq')
|
|
33
|
+
async def chat_completions(
|
|
34
|
+
messages: list[dict[str, str]],
|
|
35
|
+
*,
|
|
36
|
+
model: str,
|
|
37
|
+
model_kwargs: dict[str, Any] | None = None,
|
|
38
|
+
tools: list[dict[str, Any]] | None = None,
|
|
39
|
+
tool_choice: dict[str, Any] | None = None,
|
|
40
|
+
) -> dict:
|
|
41
|
+
"""
|
|
42
|
+
Chat Completion API.
|
|
43
|
+
|
|
44
|
+
Equivalent to the Groq `chat/completions` API endpoint.
|
|
45
|
+
For additional details, see: <https://console.groq.com/docs/api-reference#chat-create>
|
|
46
|
+
|
|
47
|
+
Request throttling:
|
|
48
|
+
Applies the rate limit set in the config (section `groq`, key `rate_limit`). If no rate
|
|
49
|
+
limit is configured, uses a default of 600 RPM.
|
|
50
|
+
|
|
51
|
+
__Requirements:__
|
|
52
|
+
|
|
53
|
+
- `pip install groq`
|
|
54
|
+
|
|
55
|
+
Args:
|
|
56
|
+
messages: A list of messages comprising the conversation so far.
|
|
57
|
+
model: ID of the model to use. (See overview here: <https://console.groq.com/docs/models>)
|
|
58
|
+
model_kwargs: Additional keyword args for the Groq `chat/completions` API.
|
|
59
|
+
For details on the available parameters, see: <https://console.groq.com/docs/api-reference#chat-create>
|
|
60
|
+
|
|
61
|
+
Returns:
|
|
62
|
+
A dictionary containing the response and other metadata.
|
|
63
|
+
|
|
64
|
+
Examples:
|
|
65
|
+
Add a computed column that applies the model `llama-3.1-8b-instant`
|
|
66
|
+
to an existing Pixeltable column `tbl.prompt` of the table `tbl`:
|
|
67
|
+
|
|
68
|
+
>>> messages = [{'role': 'user', 'content': tbl.prompt}]
|
|
69
|
+
... tbl.add_computed_column(response=chat_completions(messages, model='llama-3.1-8b-instant'))
|
|
70
|
+
"""
|
|
71
|
+
if model_kwargs is None:
|
|
72
|
+
model_kwargs = {}
|
|
73
|
+
|
|
74
|
+
Env.get().require_package('groq')
|
|
75
|
+
|
|
76
|
+
if tools is not None:
|
|
77
|
+
model_kwargs['tools'] = [{'type': 'function', 'function': tool} for tool in tools]
|
|
78
|
+
|
|
79
|
+
if tool_choice is not None:
|
|
80
|
+
if tool_choice['auto']:
|
|
81
|
+
model_kwargs['tool_choice'] = 'auto'
|
|
82
|
+
elif tool_choice['required']:
|
|
83
|
+
model_kwargs['tool_choice'] = 'required'
|
|
84
|
+
else:
|
|
85
|
+
assert tool_choice['tool'] is not None
|
|
86
|
+
model_kwargs['tool_choice'] = {'type': 'function', 'function': {'name': tool_choice['tool']}}
|
|
87
|
+
|
|
88
|
+
if tool_choice is not None and not tool_choice['parallel_tool_calls']:
|
|
89
|
+
model_kwargs['parallel_tool_calls'] = False
|
|
90
|
+
|
|
91
|
+
result = await _groq_client().chat.completions.create(
|
|
92
|
+
messages=messages, # type: ignore[arg-type]
|
|
93
|
+
model=model,
|
|
94
|
+
**model_kwargs,
|
|
95
|
+
)
|
|
96
|
+
return result.model_dump()
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def invoke_tools(tools: pxt.func.Tools, response: exprs.Expr) -> exprs.InlineDict:
|
|
100
|
+
"""Converts an OpenAI response dict to Pixeltable tool invocation format and calls `tools._invoke()`."""
|
|
101
|
+
return tools._invoke(_openai_response_to_pxt_tool_calls(response))
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
__all__ = local_public_names(__name__)
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def __dir__() -> list[str]:
|
|
108
|
+
return __all__
|