pixeltable 0.3.14__py3-none-any.whl → 0.5.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pixeltable/__init__.py +42 -8
- pixeltable/{dataframe.py → _query.py} +470 -206
- pixeltable/_version.py +1 -0
- pixeltable/catalog/__init__.py +5 -4
- pixeltable/catalog/catalog.py +1785 -432
- pixeltable/catalog/column.py +190 -113
- pixeltable/catalog/dir.py +2 -4
- pixeltable/catalog/globals.py +19 -46
- pixeltable/catalog/insertable_table.py +191 -98
- pixeltable/catalog/path.py +63 -23
- pixeltable/catalog/schema_object.py +11 -15
- pixeltable/catalog/table.py +843 -436
- pixeltable/catalog/table_metadata.py +103 -0
- pixeltable/catalog/table_version.py +978 -657
- pixeltable/catalog/table_version_handle.py +72 -16
- pixeltable/catalog/table_version_path.py +112 -43
- pixeltable/catalog/tbl_ops.py +53 -0
- pixeltable/catalog/update_status.py +191 -0
- pixeltable/catalog/view.py +134 -90
- pixeltable/config.py +134 -22
- pixeltable/env.py +471 -157
- pixeltable/exceptions.py +6 -0
- pixeltable/exec/__init__.py +4 -1
- pixeltable/exec/aggregation_node.py +7 -8
- pixeltable/exec/cache_prefetch_node.py +83 -110
- pixeltable/exec/cell_materialization_node.py +268 -0
- pixeltable/exec/cell_reconstruction_node.py +168 -0
- pixeltable/exec/component_iteration_node.py +4 -3
- pixeltable/exec/data_row_batch.py +8 -65
- pixeltable/exec/exec_context.py +16 -4
- pixeltable/exec/exec_node.py +13 -36
- pixeltable/exec/expr_eval/evaluators.py +11 -7
- pixeltable/exec/expr_eval/expr_eval_node.py +27 -12
- pixeltable/exec/expr_eval/globals.py +8 -5
- pixeltable/exec/expr_eval/row_buffer.py +1 -2
- pixeltable/exec/expr_eval/schedulers.py +106 -56
- pixeltable/exec/globals.py +35 -0
- pixeltable/exec/in_memory_data_node.py +19 -19
- pixeltable/exec/object_store_save_node.py +293 -0
- pixeltable/exec/row_update_node.py +16 -9
- pixeltable/exec/sql_node.py +351 -84
- pixeltable/exprs/__init__.py +1 -1
- pixeltable/exprs/arithmetic_expr.py +27 -22
- pixeltable/exprs/array_slice.py +3 -3
- pixeltable/exprs/column_property_ref.py +36 -23
- pixeltable/exprs/column_ref.py +213 -89
- pixeltable/exprs/comparison.py +5 -5
- pixeltable/exprs/compound_predicate.py +5 -4
- pixeltable/exprs/data_row.py +164 -54
- pixeltable/exprs/expr.py +70 -44
- pixeltable/exprs/expr_dict.py +3 -3
- pixeltable/exprs/expr_set.py +17 -10
- pixeltable/exprs/function_call.py +100 -40
- pixeltable/exprs/globals.py +2 -2
- pixeltable/exprs/in_predicate.py +4 -4
- pixeltable/exprs/inline_expr.py +18 -32
- pixeltable/exprs/is_null.py +7 -3
- pixeltable/exprs/json_mapper.py +8 -8
- pixeltable/exprs/json_path.py +56 -22
- pixeltable/exprs/literal.py +27 -5
- pixeltable/exprs/method_ref.py +2 -2
- pixeltable/exprs/object_ref.py +2 -2
- pixeltable/exprs/row_builder.py +167 -67
- pixeltable/exprs/rowid_ref.py +25 -10
- pixeltable/exprs/similarity_expr.py +58 -40
- pixeltable/exprs/sql_element_cache.py +4 -4
- pixeltable/exprs/string_op.py +5 -5
- pixeltable/exprs/type_cast.py +3 -5
- pixeltable/func/__init__.py +1 -0
- pixeltable/func/aggregate_function.py +8 -8
- pixeltable/func/callable_function.py +9 -9
- pixeltable/func/expr_template_function.py +17 -11
- pixeltable/func/function.py +18 -20
- pixeltable/func/function_registry.py +6 -7
- pixeltable/func/globals.py +2 -3
- pixeltable/func/mcp.py +74 -0
- pixeltable/func/query_template_function.py +29 -27
- pixeltable/func/signature.py +46 -19
- pixeltable/func/tools.py +31 -13
- pixeltable/func/udf.py +18 -20
- pixeltable/functions/__init__.py +16 -0
- pixeltable/functions/anthropic.py +123 -77
- pixeltable/functions/audio.py +147 -10
- pixeltable/functions/bedrock.py +13 -6
- pixeltable/functions/date.py +7 -4
- pixeltable/functions/deepseek.py +35 -43
- pixeltable/functions/document.py +81 -0
- pixeltable/functions/fal.py +76 -0
- pixeltable/functions/fireworks.py +11 -20
- pixeltable/functions/gemini.py +195 -39
- pixeltable/functions/globals.py +142 -14
- pixeltable/functions/groq.py +108 -0
- pixeltable/functions/huggingface.py +1056 -24
- pixeltable/functions/image.py +115 -57
- pixeltable/functions/json.py +1 -1
- pixeltable/functions/llama_cpp.py +28 -13
- pixeltable/functions/math.py +67 -5
- pixeltable/functions/mistralai.py +18 -55
- pixeltable/functions/net.py +70 -0
- pixeltable/functions/ollama.py +20 -13
- pixeltable/functions/openai.py +240 -226
- pixeltable/functions/openrouter.py +143 -0
- pixeltable/functions/replicate.py +4 -4
- pixeltable/functions/reve.py +250 -0
- pixeltable/functions/string.py +239 -69
- pixeltable/functions/timestamp.py +16 -16
- pixeltable/functions/together.py +24 -84
- pixeltable/functions/twelvelabs.py +188 -0
- pixeltable/functions/util.py +6 -1
- pixeltable/functions/uuid.py +30 -0
- pixeltable/functions/video.py +1515 -107
- pixeltable/functions/vision.py +8 -8
- pixeltable/functions/voyageai.py +289 -0
- pixeltable/functions/whisper.py +16 -8
- pixeltable/functions/whisperx.py +179 -0
- pixeltable/{ext/functions → functions}/yolox.py +2 -4
- pixeltable/globals.py +362 -115
- pixeltable/index/base.py +17 -21
- pixeltable/index/btree.py +28 -22
- pixeltable/index/embedding_index.py +100 -118
- pixeltable/io/__init__.py +4 -2
- pixeltable/io/datarows.py +8 -7
- pixeltable/io/external_store.py +56 -105
- pixeltable/io/fiftyone.py +13 -13
- pixeltable/io/globals.py +31 -30
- pixeltable/io/hf_datasets.py +61 -16
- pixeltable/io/label_studio.py +74 -70
- pixeltable/io/lancedb.py +3 -0
- pixeltable/io/pandas.py +21 -12
- pixeltable/io/parquet.py +25 -105
- pixeltable/io/table_data_conduit.py +250 -123
- pixeltable/io/utils.py +4 -4
- pixeltable/iterators/__init__.py +2 -1
- pixeltable/iterators/audio.py +26 -25
- pixeltable/iterators/base.py +9 -3
- pixeltable/iterators/document.py +112 -78
- pixeltable/iterators/image.py +12 -15
- pixeltable/iterators/string.py +11 -4
- pixeltable/iterators/video.py +523 -120
- pixeltable/metadata/__init__.py +14 -3
- pixeltable/metadata/converters/convert_13.py +2 -2
- pixeltable/metadata/converters/convert_18.py +2 -2
- pixeltable/metadata/converters/convert_19.py +2 -2
- pixeltable/metadata/converters/convert_20.py +2 -2
- pixeltable/metadata/converters/convert_21.py +2 -2
- pixeltable/metadata/converters/convert_22.py +2 -2
- pixeltable/metadata/converters/convert_24.py +2 -2
- pixeltable/metadata/converters/convert_25.py +2 -2
- pixeltable/metadata/converters/convert_26.py +2 -2
- pixeltable/metadata/converters/convert_29.py +4 -4
- pixeltable/metadata/converters/convert_30.py +34 -21
- pixeltable/metadata/converters/convert_34.py +2 -2
- pixeltable/metadata/converters/convert_35.py +9 -0
- pixeltable/metadata/converters/convert_36.py +38 -0
- pixeltable/metadata/converters/convert_37.py +15 -0
- pixeltable/metadata/converters/convert_38.py +39 -0
- pixeltable/metadata/converters/convert_39.py +124 -0
- pixeltable/metadata/converters/convert_40.py +73 -0
- pixeltable/metadata/converters/convert_41.py +12 -0
- pixeltable/metadata/converters/convert_42.py +9 -0
- pixeltable/metadata/converters/convert_43.py +44 -0
- pixeltable/metadata/converters/util.py +20 -31
- pixeltable/metadata/notes.py +9 -0
- pixeltable/metadata/schema.py +140 -53
- pixeltable/metadata/utils.py +74 -0
- pixeltable/mypy/__init__.py +3 -0
- pixeltable/mypy/mypy_plugin.py +123 -0
- pixeltable/plan.py +382 -115
- pixeltable/share/__init__.py +1 -1
- pixeltable/share/packager.py +547 -83
- pixeltable/share/protocol/__init__.py +33 -0
- pixeltable/share/protocol/common.py +165 -0
- pixeltable/share/protocol/operation_types.py +33 -0
- pixeltable/share/protocol/replica.py +119 -0
- pixeltable/share/publish.py +257 -59
- pixeltable/store.py +311 -194
- pixeltable/type_system.py +373 -211
- pixeltable/utils/__init__.py +2 -3
- pixeltable/utils/arrow.py +131 -17
- pixeltable/utils/av.py +298 -0
- pixeltable/utils/azure_store.py +346 -0
- pixeltable/utils/coco.py +6 -6
- pixeltable/utils/code.py +3 -3
- pixeltable/utils/console_output.py +4 -1
- pixeltable/utils/coroutine.py +6 -23
- pixeltable/utils/dbms.py +32 -6
- pixeltable/utils/description_helper.py +4 -5
- pixeltable/utils/documents.py +7 -18
- pixeltable/utils/exception_handler.py +7 -30
- pixeltable/utils/filecache.py +6 -6
- pixeltable/utils/formatter.py +86 -48
- pixeltable/utils/gcs_store.py +295 -0
- pixeltable/utils/http.py +133 -0
- pixeltable/utils/http_server.py +2 -3
- pixeltable/utils/iceberg.py +1 -2
- pixeltable/utils/image.py +17 -0
- pixeltable/utils/lancedb.py +90 -0
- pixeltable/utils/local_store.py +322 -0
- pixeltable/utils/misc.py +5 -0
- pixeltable/utils/object_stores.py +573 -0
- pixeltable/utils/pydantic.py +60 -0
- pixeltable/utils/pytorch.py +5 -6
- pixeltable/utils/s3_store.py +527 -0
- pixeltable/utils/sql.py +26 -0
- pixeltable/utils/system.py +30 -0
- pixeltable-0.5.7.dist-info/METADATA +579 -0
- pixeltable-0.5.7.dist-info/RECORD +227 -0
- {pixeltable-0.3.14.dist-info → pixeltable-0.5.7.dist-info}/WHEEL +1 -1
- pixeltable-0.5.7.dist-info/entry_points.txt +2 -0
- pixeltable/__version__.py +0 -3
- pixeltable/catalog/named_function.py +0 -40
- pixeltable/ext/__init__.py +0 -17
- pixeltable/ext/functions/__init__.py +0 -11
- pixeltable/ext/functions/whisperx.py +0 -77
- pixeltable/utils/media_store.py +0 -77
- pixeltable/utils/s3.py +0 -17
- pixeltable-0.3.14.dist-info/METADATA +0 -434
- pixeltable-0.3.14.dist-info/RECORD +0 -186
- pixeltable-0.3.14.dist-info/entry_points.txt +0 -3
- {pixeltable-0.3.14.dist-info → pixeltable-0.5.7.dist-info/licenses}/LICENSE +0 -0
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Pixeltable UDFs for `DocumentType`.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from typing import Any, Literal
|
|
6
|
+
|
|
7
|
+
import pixeltable as pxt
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def document_splitter(
|
|
11
|
+
document: Any,
|
|
12
|
+
separators: str,
|
|
13
|
+
*,
|
|
14
|
+
elements: list[Literal['text', 'image']] | None = None,
|
|
15
|
+
limit: int | None = None,
|
|
16
|
+
overlap: int | None = None,
|
|
17
|
+
metadata: str = '',
|
|
18
|
+
skip_tags: list[str] | None = None,
|
|
19
|
+
tiktoken_encoding: str | None = 'cl100k_base',
|
|
20
|
+
tiktoken_target_model: str | None = None,
|
|
21
|
+
image_dpi: int = 300,
|
|
22
|
+
image_format: str = 'png',
|
|
23
|
+
) -> tuple[type[pxt.iterators.ComponentIterator], dict[str, Any]]:
|
|
24
|
+
"""Iterator over chunks of a document. The document is chunked according to the specified `separators`.
|
|
25
|
+
|
|
26
|
+
The iterator yields a `text` field containing the text of the chunk, and it may also
|
|
27
|
+
include additional metadata fields if specified in the `metadata` parameter, as explained below.
|
|
28
|
+
|
|
29
|
+
Chunked text will be cleaned with `ftfy.fix_text` to fix up common problems with unicode sequences.
|
|
30
|
+
|
|
31
|
+
Args:
|
|
32
|
+
separators: separators to use to chunk the document. Options are:
|
|
33
|
+
`'heading'`, `'paragraph'`, `'sentence'`, `'token_limit'`, `'char_limit'`, `'page'`.
|
|
34
|
+
This may be a comma-separated string, e.g., `'heading,token_limit'`.
|
|
35
|
+
elements: list of elements to extract from the document. Options are:
|
|
36
|
+
`'text'`, `'image'`. Defaults to `['text']` if not specified. The `'image'` element is only supported
|
|
37
|
+
for the `'page'` separator on PDF documents.
|
|
38
|
+
limit: the maximum number of tokens or characters in each chunk, if `'token_limit'`
|
|
39
|
+
or `'char_limit'` is specified.
|
|
40
|
+
metadata: additional metadata fields to include in the output. Options are:
|
|
41
|
+
`'title'`, `'heading'` (HTML and Markdown), `'sourceline'` (HTML), `'page'` (PDF), `'bounding_box'`
|
|
42
|
+
(PDF). The input may be a comma-separated string, e.g., `'title,heading,sourceline'`.
|
|
43
|
+
image_dpi: DPI to use when extracting images from PDFs. Defaults to 300.
|
|
44
|
+
image_format: format to use when extracting images from PDFs. Defaults to 'png'.
|
|
45
|
+
|
|
46
|
+
Examples:
|
|
47
|
+
All these examples assume an existing table `tbl` with a column `doc` of type `pxt.Document`.
|
|
48
|
+
|
|
49
|
+
Create a view that splits all documents into chunks of up to 300 tokens:
|
|
50
|
+
|
|
51
|
+
>>> pxt.create_view('chunks', tbl, iterator=document_splitter(tbl.doc, separators='token_limit', limit=300))
|
|
52
|
+
|
|
53
|
+
Create a view that splits all documents along sentence boundaries, including title and heading metadata:
|
|
54
|
+
|
|
55
|
+
>>> pxt.create_view(
|
|
56
|
+
... 'sentence_chunks',
|
|
57
|
+
... tbl,
|
|
58
|
+
... iterator=document_splitter(tbl.doc, separators='sentence', metadata='title,heading')
|
|
59
|
+
... )
|
|
60
|
+
"""
|
|
61
|
+
|
|
62
|
+
kwargs: dict[str, Any] = {}
|
|
63
|
+
if elements is not None:
|
|
64
|
+
kwargs['elements'] = elements
|
|
65
|
+
if limit is not None:
|
|
66
|
+
kwargs['limit'] = limit
|
|
67
|
+
if overlap is not None:
|
|
68
|
+
kwargs['overlap'] = overlap
|
|
69
|
+
if metadata != '':
|
|
70
|
+
kwargs['metadata'] = metadata
|
|
71
|
+
if skip_tags is not None:
|
|
72
|
+
kwargs['skip_tags'] = skip_tags
|
|
73
|
+
if tiktoken_encoding != 'cl100k_base':
|
|
74
|
+
kwargs['tiktoken_encoding'] = tiktoken_encoding
|
|
75
|
+
if tiktoken_target_model is not None:
|
|
76
|
+
kwargs['tiktoken_target_model'] = tiktoken_target_model
|
|
77
|
+
if image_dpi != 300:
|
|
78
|
+
kwargs['image_dpi'] = image_dpi
|
|
79
|
+
if image_format != 'png':
|
|
80
|
+
kwargs['image_format'] = image_format
|
|
81
|
+
return pxt.iterators.document.DocumentSplitter._create(document=document, separators=separators, **kwargs)
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Pixeltable UDFs
|
|
3
|
+
that wrap various endpoints from the fal.ai API. In order to use them, you must
|
|
4
|
+
first `pip install fal-client` and configure your fal.ai credentials, as described in
|
|
5
|
+
the [Working with fal.ai](https://docs.pixeltable.com/notebooks/integrations/working-with-fal) tutorial.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from typing import TYPE_CHECKING, Any
|
|
9
|
+
|
|
10
|
+
import pixeltable as pxt
|
|
11
|
+
from pixeltable.env import Env, register_client
|
|
12
|
+
from pixeltable.utils.code import local_public_names
|
|
13
|
+
|
|
14
|
+
if TYPE_CHECKING:
|
|
15
|
+
import fal_client
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@register_client('fal')
|
|
19
|
+
def _(api_key: str) -> 'fal_client.AsyncClient':
|
|
20
|
+
import fal_client
|
|
21
|
+
|
|
22
|
+
return fal_client.AsyncClient(key=api_key)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def _fal_client() -> 'fal_client.AsyncClient':
|
|
26
|
+
return Env.get().get_client('fal')
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@pxt.udf(resource_pool='request-rate:fal')
|
|
30
|
+
async def run(input: dict[str, Any], *, app: str) -> pxt.Json:
|
|
31
|
+
"""
|
|
32
|
+
Run a model on fal.ai.
|
|
33
|
+
|
|
34
|
+
Uses fal's queue-based subscribe mechanism for reliable execution.
|
|
35
|
+
For additional details, see: <https://fal.ai/docs>
|
|
36
|
+
|
|
37
|
+
Request throttling:
|
|
38
|
+
Applies the rate limit set in the config (section `fal`, key `rate_limit`). If no rate
|
|
39
|
+
limit is configured, uses a default of 600 RPM.
|
|
40
|
+
|
|
41
|
+
__Requirements:__
|
|
42
|
+
|
|
43
|
+
- `pip install fal-client`
|
|
44
|
+
|
|
45
|
+
Args:
|
|
46
|
+
input: The input parameters for the model.
|
|
47
|
+
app: The name or ID of the fal.ai application to run (e.g., 'fal-ai/flux/schnell').
|
|
48
|
+
|
|
49
|
+
Returns:
|
|
50
|
+
The output of the model as a JSON object.
|
|
51
|
+
|
|
52
|
+
Examples:
|
|
53
|
+
Add a computed column that applies the model `fal-ai/flux/schnell`
|
|
54
|
+
to an existing Pixeltable column `tbl.prompt` of the table `tbl`:
|
|
55
|
+
|
|
56
|
+
>>> input = {'prompt': tbl.prompt}
|
|
57
|
+
... tbl.add_computed_column(response=run(input, app='fal-ai/flux/schnell'))
|
|
58
|
+
|
|
59
|
+
Add a computed column that uses the model `fal-ai/fast-sdxl`
|
|
60
|
+
to generate images from an existing Pixeltable column `tbl.prompt`:
|
|
61
|
+
|
|
62
|
+
>>> input = {'prompt': tbl.prompt, 'image_size': 'square', 'num_inference_steps': 25}
|
|
63
|
+
... tbl.add_computed_column(response=run(input, app='fal-ai/fast-sdxl'))
|
|
64
|
+
... tbl.add_computed_column(image=tbl.response['images'][0]['url'].astype(pxt.Image))
|
|
65
|
+
"""
|
|
66
|
+
Env.get().require_package('fal_client')
|
|
67
|
+
client = _fal_client()
|
|
68
|
+
result = await client.subscribe(app, arguments=input)
|
|
69
|
+
return result
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
__all__ = local_public_names(__name__)
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def __dir__() -> list[str]:
|
|
76
|
+
return __all__
|
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
"""
|
|
2
|
-
Pixeltable
|
|
2
|
+
Pixeltable UDFs
|
|
3
3
|
that wrap various endpoints from the Fireworks AI API. In order to use them, you must
|
|
4
4
|
first `pip install fireworks-ai` and configure your Fireworks AI credentials, as described in
|
|
5
|
-
the [Working with Fireworks](https://pixeltable.
|
|
5
|
+
the [Working with Fireworks](https://docs.pixeltable.com/notebooks/integrations/working-with-fireworks) tutorial.
|
|
6
6
|
"""
|
|
7
7
|
|
|
8
|
-
from typing import TYPE_CHECKING,
|
|
8
|
+
from typing import TYPE_CHECKING, Any
|
|
9
9
|
|
|
10
10
|
import pixeltable as pxt
|
|
11
11
|
from pixeltable import env
|
|
@@ -29,14 +29,7 @@ def _fireworks_client() -> 'fireworks.client.Fireworks':
|
|
|
29
29
|
|
|
30
30
|
@pxt.udf(resource_pool='request-rate:fireworks')
|
|
31
31
|
async def chat_completions(
|
|
32
|
-
messages: list[dict[str, str]],
|
|
33
|
-
*,
|
|
34
|
-
model: str,
|
|
35
|
-
max_tokens: Optional[int] = None,
|
|
36
|
-
top_k: Optional[int] = None,
|
|
37
|
-
top_p: Optional[float] = None,
|
|
38
|
-
temperature: Optional[float] = None,
|
|
39
|
-
request_timeout: Optional[int] = None,
|
|
32
|
+
messages: list[dict[str, str]], *, model: str, model_kwargs: dict[str, Any] | None = None
|
|
40
33
|
) -> dict:
|
|
41
34
|
"""
|
|
42
35
|
Creates a model response for the given chat conversation.
|
|
@@ -55,8 +48,8 @@ async def chat_completions(
|
|
|
55
48
|
Args:
|
|
56
49
|
messages: A list of messages comprising the conversation so far.
|
|
57
50
|
model: The name of the model to use.
|
|
58
|
-
|
|
59
|
-
|
|
51
|
+
model_kwargs: Additional keyword args for the Fireworks `chat_completions` API. For details on the available
|
|
52
|
+
parameters, see: <https://docs.fireworks.ai/api-reference/post-chatcompletions>
|
|
60
53
|
|
|
61
54
|
Returns:
|
|
62
55
|
A dictionary containing the response and other metadata.
|
|
@@ -70,20 +63,18 @@ async def chat_completions(
|
|
|
70
63
|
... response=chat_completions(messages, model='accounts/fireworks/models/mixtral-8x22b-instruct')
|
|
71
64
|
... )
|
|
72
65
|
"""
|
|
73
|
-
|
|
74
|
-
|
|
66
|
+
if model_kwargs is None:
|
|
67
|
+
model_kwargs = {}
|
|
75
68
|
|
|
76
69
|
# for debugging purposes:
|
|
77
70
|
# res_sync = _fireworks_client().chat.completions.create(model=model, messages=messages, **kwargs_not_none)
|
|
78
71
|
# res_sync_dict = res_sync.dict()
|
|
79
72
|
|
|
80
|
-
if request_timeout
|
|
81
|
-
request_timeout = Config.get().get_int_value('timeout', section='fireworks') or 600
|
|
73
|
+
if 'request_timeout' not in model_kwargs:
|
|
74
|
+
model_kwargs['request_timeout'] = Config.get().get_int_value('timeout', section='fireworks') or 600
|
|
82
75
|
# TODO: this timeout doesn't really work, I think it only applies to returning the stream, but not to the timing
|
|
83
76
|
# of the chunks; addressing this would require a timeout for the task running this udf
|
|
84
|
-
stream = _fireworks_client().chat.completions.acreate(
|
|
85
|
-
model=model, messages=messages, request_timeout=request_timeout, **kwargs_not_none
|
|
86
|
-
)
|
|
77
|
+
stream = _fireworks_client().chat.completions.acreate(model=model, messages=messages, **model_kwargs)
|
|
87
78
|
chunks = []
|
|
88
79
|
async for chunk in stream:
|
|
89
80
|
chunks.append(chunk)
|
pixeltable/functions/gemini.py
CHANGED
|
@@ -1,14 +1,21 @@
|
|
|
1
1
|
"""
|
|
2
|
-
Pixeltable
|
|
2
|
+
Pixeltable UDFs
|
|
3
3
|
that wrap various endpoints from the Google Gemini API. In order to use them, you must
|
|
4
4
|
first `pip install google-genai` and configure your Gemini credentials, as described in
|
|
5
|
-
the [Working with Gemini](https://pixeltable.
|
|
5
|
+
the [Working with Gemini](https://docs.pixeltable.com/notebooks/integrations/working-with-gemini) tutorial.
|
|
6
6
|
"""
|
|
7
7
|
|
|
8
|
-
|
|
8
|
+
import asyncio
|
|
9
|
+
import io
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
from typing import TYPE_CHECKING
|
|
12
|
+
|
|
13
|
+
import PIL.Image
|
|
9
14
|
|
|
10
15
|
import pixeltable as pxt
|
|
11
|
-
from pixeltable import env
|
|
16
|
+
from pixeltable import env, exceptions as excs, exprs
|
|
17
|
+
from pixeltable.utils.code import local_public_names
|
|
18
|
+
from pixeltable.utils.local_store import TempStore
|
|
12
19
|
|
|
13
20
|
if TYPE_CHECKING:
|
|
14
21
|
from google import genai
|
|
@@ -27,26 +34,13 @@ def _genai_client() -> 'genai.client.Client':
|
|
|
27
34
|
|
|
28
35
|
@pxt.udf(resource_pool='request-rate:gemini')
|
|
29
36
|
async def generate_content(
|
|
30
|
-
contents: str,
|
|
31
|
-
*,
|
|
32
|
-
model: str,
|
|
33
|
-
candidate_count: Optional[int] = None,
|
|
34
|
-
stop_sequences: Optional[list[str]] = None,
|
|
35
|
-
max_output_tokens: Optional[int] = None,
|
|
36
|
-
temperature: Optional[float] = None,
|
|
37
|
-
top_p: Optional[float] = None,
|
|
38
|
-
top_k: Optional[int] = None,
|
|
39
|
-
response_mime_type: Optional[str] = None,
|
|
40
|
-
response_schema: Optional[dict] = None,
|
|
41
|
-
presence_penalty: Optional[float] = None,
|
|
42
|
-
frequency_penalty: Optional[float] = None,
|
|
37
|
+
contents: pxt.Json, *, model: str, config: dict | None = None, tools: list[dict] | None = None
|
|
43
38
|
) -> dict:
|
|
44
39
|
"""
|
|
45
|
-
Generate content from the specified model.
|
|
46
|
-
<https://ai.google.dev/gemini-api/docs>
|
|
40
|
+
Generate content from the specified model.
|
|
47
41
|
|
|
48
42
|
Request throttling:
|
|
49
|
-
Applies the rate limit set in the config (section `gemini
|
|
43
|
+
Applies the rate limit set in the config (section `gemini.rate_limits`; use the model id as the key). If no rate
|
|
50
44
|
limit is configured, uses a default of 600 RPM.
|
|
51
45
|
|
|
52
46
|
__Requirements:__
|
|
@@ -54,40 +48,202 @@ async def generate_content(
|
|
|
54
48
|
- `pip install google-genai`
|
|
55
49
|
|
|
56
50
|
Args:
|
|
57
|
-
contents: The input content to generate from.
|
|
51
|
+
contents: The input content to generate from. Can be a prompt, or a list containing images and text
|
|
52
|
+
prompts, as described in: <https://ai.google.dev/gemini-api/docs/text-generation>
|
|
58
53
|
model: The name of the model to use.
|
|
59
|
-
|
|
60
|
-
|
|
54
|
+
config: Configuration for generation, corresponding to keyword arguments of
|
|
55
|
+
`genai.types.GenerateContentConfig`. For details on the parameters, see:
|
|
56
|
+
<https://googleapis.github.io/python-genai/genai.html#genai.types.GenerateContentConfig>
|
|
57
|
+
tools: An optional list of Pixeltable tools to use. It is also possible to specify tools manually via the
|
|
58
|
+
`config['tools']` parameter, but at most one of `config['tools']` or `tools` may be used.
|
|
61
59
|
|
|
62
60
|
Returns:
|
|
63
61
|
A dictionary containing the response and other metadata.
|
|
64
62
|
|
|
65
63
|
Examples:
|
|
66
|
-
Add a computed column that applies the model `gemini-
|
|
64
|
+
Add a computed column that applies the model `gemini-2.5-flash`
|
|
67
65
|
to an existing Pixeltable column `tbl.prompt` of the table `tbl`:
|
|
68
66
|
|
|
69
|
-
>>> tbl.add_computed_column(response=generate_content(tbl.prompt, model='gemini-
|
|
67
|
+
>>> tbl.add_computed_column(response=generate_content(tbl.prompt, model='gemini-2.5-flash'))
|
|
68
|
+
|
|
69
|
+
Add a computed column that applies the model `gemini-2.5-flash` for image understanding
|
|
70
70
|
"""
|
|
71
71
|
env.Env.get().require_package('google.genai')
|
|
72
72
|
from google.genai import types
|
|
73
73
|
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
response = await _genai_client().aio.models.generate_content(model=model, contents=contents, config=config)
|
|
74
|
+
config_: types.GenerateContentConfig
|
|
75
|
+
if config is None and tools is None:
|
|
76
|
+
config_ = None
|
|
77
|
+
else:
|
|
78
|
+
if config is None:
|
|
79
|
+
config_ = types.GenerateContentConfig()
|
|
80
|
+
else:
|
|
81
|
+
config_ = types.GenerateContentConfig(**config)
|
|
82
|
+
if tools is not None:
|
|
83
|
+
gemini_tools = [__convert_pxt_tool(tool) for tool in tools]
|
|
84
|
+
config_.tools = [types.Tool(function_declarations=gemini_tools)]
|
|
85
|
+
|
|
86
|
+
response = await _genai_client().aio.models.generate_content(model=model, contents=contents, config=config_)
|
|
88
87
|
return response.model_dump()
|
|
89
88
|
|
|
90
89
|
|
|
90
|
+
def __convert_pxt_tool(pxt_tool: dict) -> dict:
|
|
91
|
+
return {
|
|
92
|
+
'name': pxt_tool['name'],
|
|
93
|
+
'description': pxt_tool['description'],
|
|
94
|
+
'parameters': {
|
|
95
|
+
'type': 'object',
|
|
96
|
+
'properties': pxt_tool['parameters']['properties'],
|
|
97
|
+
'required': pxt_tool['required'],
|
|
98
|
+
},
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def invoke_tools(tools: pxt.func.Tools, response: exprs.Expr) -> exprs.InlineDict:
|
|
103
|
+
"""Converts an OpenAI response dict to Pixeltable tool invocation format and calls `tools._invoke()`."""
|
|
104
|
+
return tools._invoke(_gemini_response_to_pxt_tool_calls(response))
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
@pxt.udf
|
|
108
|
+
def _gemini_response_to_pxt_tool_calls(response: dict) -> dict | None:
|
|
109
|
+
pxt_tool_calls: dict[str, list[dict]] = {}
|
|
110
|
+
for part in response['candidates'][0]['content']['parts']:
|
|
111
|
+
tool_call = part.get('function_call')
|
|
112
|
+
if tool_call is not None:
|
|
113
|
+
tool_name = tool_call['name']
|
|
114
|
+
if tool_name not in pxt_tool_calls:
|
|
115
|
+
pxt_tool_calls[tool_name] = []
|
|
116
|
+
pxt_tool_calls[tool_name].append({'args': tool_call['args']})
|
|
117
|
+
if len(pxt_tool_calls) == 0:
|
|
118
|
+
return None
|
|
119
|
+
return pxt_tool_calls
|
|
120
|
+
|
|
121
|
+
|
|
91
122
|
@generate_content.resource_pool
|
|
92
123
|
def _(model: str) -> str:
|
|
93
124
|
return f'request-rate:gemini:{model}'
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
@pxt.udf(resource_pool='request-rate:imagen')
|
|
128
|
+
async def generate_images(prompt: str, *, model: str, config: dict | None = None) -> PIL.Image.Image:
|
|
129
|
+
"""
|
|
130
|
+
Generates images based on a text description and configuration. For additional details, see:
|
|
131
|
+
<https://ai.google.dev/gemini-api/docs/image-generation>
|
|
132
|
+
|
|
133
|
+
Request throttling:
|
|
134
|
+
Applies the rate limit set in the config (section `imagen.rate_limits`; use the model id as the key). If no rate
|
|
135
|
+
limit is configured, uses a default of 600 RPM.
|
|
136
|
+
|
|
137
|
+
__Requirements:__
|
|
138
|
+
|
|
139
|
+
- `pip install google-genai`
|
|
140
|
+
|
|
141
|
+
Args:
|
|
142
|
+
prompt: A text description of the images to generate.
|
|
143
|
+
model: The model to use.
|
|
144
|
+
config: Configuration for generation, corresponding to keyword arguments of
|
|
145
|
+
`genai.types.GenerateImagesConfig`. For details on the parameters, see:
|
|
146
|
+
<https://googleapis.github.io/python-genai/genai.html#genai.types.GenerateImagesConfig>
|
|
147
|
+
|
|
148
|
+
Returns:
|
|
149
|
+
The generated image.
|
|
150
|
+
|
|
151
|
+
Examples:
|
|
152
|
+
Add a computed column that applies the model `imagen-4.0-generate-001`
|
|
153
|
+
to an existing Pixeltable column `tbl.prompt` of the table `tbl`:
|
|
154
|
+
|
|
155
|
+
>>> tbl.add_computed_column(response=generate_images(tbl.prompt, model='imagen-4.0-generate-001'))
|
|
156
|
+
"""
|
|
157
|
+
env.Env.get().require_package('google.genai')
|
|
158
|
+
from google.genai.types import GenerateImagesConfig
|
|
159
|
+
|
|
160
|
+
config_ = GenerateImagesConfig(**config) if config else None
|
|
161
|
+
response = await _genai_client().aio.models.generate_images(model=model, prompt=prompt, config=config_)
|
|
162
|
+
return response.generated_images[0].image._pil_image
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
@generate_images.resource_pool
|
|
166
|
+
def _(model: str) -> str:
|
|
167
|
+
return f'request-rate:imagen:{model}'
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
@pxt.udf(resource_pool='request-rate:veo')
|
|
171
|
+
async def generate_videos(
|
|
172
|
+
prompt: str | None = None, image: PIL.Image.Image | None = None, *, model: str, config: dict | None = None
|
|
173
|
+
) -> pxt.Video:
|
|
174
|
+
"""
|
|
175
|
+
Generates videos based on a text description and configuration. For additional details, see:
|
|
176
|
+
<https://ai.google.dev/gemini-api/docs/video>
|
|
177
|
+
|
|
178
|
+
At least one of `prompt` or `image` must be provided.
|
|
179
|
+
|
|
180
|
+
Request throttling:
|
|
181
|
+
Applies the rate limit set in the config (section `veo.rate_limits`; use the model id as the key). If no rate
|
|
182
|
+
limit is configured, uses a default of 600 RPM.
|
|
183
|
+
|
|
184
|
+
__Requirements:__
|
|
185
|
+
|
|
186
|
+
- `pip install google-genai`
|
|
187
|
+
|
|
188
|
+
Args:
|
|
189
|
+
prompt: A text description of the videos to generate.
|
|
190
|
+
image: An image to use as the first frame of the video.
|
|
191
|
+
model: The model to use.
|
|
192
|
+
config: Configuration for generation, corresponding to keyword arguments of
|
|
193
|
+
`genai.types.GenerateVideosConfig`. For details on the parameters, see:
|
|
194
|
+
<https://googleapis.github.io/python-genai/genai.html#genai.types.GenerateVideosConfig>
|
|
195
|
+
|
|
196
|
+
Returns:
|
|
197
|
+
The generated video.
|
|
198
|
+
|
|
199
|
+
Examples:
|
|
200
|
+
Add a computed column that applies the model `veo-3.0-generate-001`
|
|
201
|
+
to an existing Pixeltable column `tbl.prompt` of the table `tbl`:
|
|
202
|
+
|
|
203
|
+
>>> tbl.add_computed_column(response=generate_videos(tbl.prompt, model='veo-3.0-generate-001'))
|
|
204
|
+
"""
|
|
205
|
+
env.Env.get().require_package('google.genai')
|
|
206
|
+
from google.genai import types
|
|
207
|
+
|
|
208
|
+
if prompt is None and image is None:
|
|
209
|
+
raise excs.Error('At least one of `prompt` or `image` must be provided.')
|
|
210
|
+
|
|
211
|
+
image_: types.Image | None = None
|
|
212
|
+
if image is not None:
|
|
213
|
+
with io.BytesIO() as buffer:
|
|
214
|
+
image.save(buffer, format='webp')
|
|
215
|
+
image_ = types.Image(image_bytes=buffer.getvalue(), mime_type='image/webp')
|
|
216
|
+
|
|
217
|
+
config_ = types.GenerateVideosConfig(**config) if config else None
|
|
218
|
+
|
|
219
|
+
operation = await _genai_client().aio.models.generate_videos(
|
|
220
|
+
model=model, prompt=prompt, image=image_, config=config_
|
|
221
|
+
)
|
|
222
|
+
while not operation.done:
|
|
223
|
+
await asyncio.sleep(3)
|
|
224
|
+
operation = await _genai_client().aio.operations.get(operation)
|
|
225
|
+
|
|
226
|
+
if operation.error:
|
|
227
|
+
raise Exception(f'Video generation failed: {operation.error}')
|
|
228
|
+
|
|
229
|
+
video = operation.response.generated_videos[0]
|
|
230
|
+
|
|
231
|
+
video_bytes = await _genai_client().aio.files.download(file=video.video) # type: ignore[arg-type]
|
|
232
|
+
assert video_bytes is not None
|
|
233
|
+
|
|
234
|
+
# Create a temporary file to store the video bytes
|
|
235
|
+
output_path = TempStore.create_path(extension='.mp4')
|
|
236
|
+
Path(output_path).write_bytes(video_bytes)
|
|
237
|
+
return str(output_path)
|
|
238
|
+
|
|
239
|
+
|
|
240
|
+
@generate_videos.resource_pool
|
|
241
|
+
def _(model: str) -> str:
|
|
242
|
+
return f'request-rate:veo:{model}'
|
|
243
|
+
|
|
244
|
+
|
|
245
|
+
__all__ = local_public_names(__name__)
|
|
246
|
+
|
|
247
|
+
|
|
248
|
+
def __dir__() -> list[str]:
|
|
249
|
+
return __all__
|