PyPI - pixeltable - Versions diffs - 0.2.24__py3-none-any.whl → 0.3.0__py3-none-any.whl - Mend

pixeltable 0.2.24py3-none-any.whl → 0.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of pixeltable might be problematic. Click here for more details.

Files changed (101) hide show

pixeltable/__init__.py +2 -2
pixeltable/__version__.py +2 -2
pixeltable/catalog/__init__.py +1 -1
pixeltable/catalog/dir.py +6 -0
pixeltable/catalog/globals.py +25 -0
pixeltable/catalog/named_function.py +4 -0
pixeltable/catalog/path_dict.py +37 -11
pixeltable/catalog/schema_object.py +6 -0
pixeltable/catalog/table.py +531 -251
pixeltable/catalog/table_version.py +22 -8
pixeltable/catalog/view.py +8 -7
pixeltable/dataframe.py +439 -105
pixeltable/env.py +19 -5
pixeltable/exec/__init__.py +1 -1
pixeltable/exec/exec_node.py +6 -7
pixeltable/exec/expr_eval_node.py +1 -1
pixeltable/exec/sql_node.py +92 -45
pixeltable/exprs/__init__.py +1 -0
pixeltable/exprs/arithmetic_expr.py +1 -1
pixeltable/exprs/array_slice.py +1 -1
pixeltable/exprs/column_property_ref.py +1 -1
pixeltable/exprs/column_ref.py +29 -2
pixeltable/exprs/comparison.py +1 -1
pixeltable/exprs/compound_predicate.py +1 -1
pixeltable/exprs/expr.py +12 -5
pixeltable/exprs/expr_set.py +8 -0
pixeltable/exprs/function_call.py +147 -39
pixeltable/exprs/in_predicate.py +1 -1
pixeltable/exprs/inline_expr.py +25 -5
pixeltable/exprs/is_null.py +1 -1
pixeltable/exprs/json_mapper.py +1 -1
pixeltable/exprs/json_path.py +1 -1
pixeltable/exprs/method_ref.py +1 -1
pixeltable/exprs/row_builder.py +1 -1
pixeltable/exprs/rowid_ref.py +1 -1
pixeltable/exprs/similarity_expr.py +17 -7
pixeltable/exprs/sql_element_cache.py +4 -0
pixeltable/exprs/type_cast.py +2 -2
pixeltable/exprs/variable.py +3 -0
pixeltable/func/__init__.py +5 -4
pixeltable/func/aggregate_function.py +151 -68
pixeltable/func/callable_function.py +48 -16
pixeltable/func/expr_template_function.py +64 -23
pixeltable/func/function.py +227 -23
pixeltable/func/function_registry.py +2 -1
pixeltable/func/query_template_function.py +51 -9
pixeltable/func/signature.py +65 -7
pixeltable/func/tools.py +153 -0
pixeltable/func/udf.py +57 -35
pixeltable/functions/__init__.py +2 -2
pixeltable/functions/anthropic.py +51 -4
pixeltable/functions/gemini.py +85 -0
pixeltable/functions/globals.py +54 -34
pixeltable/functions/huggingface.py +10 -28
pixeltable/functions/json.py +3 -8
pixeltable/functions/math.py +67 -0
pixeltable/functions/mistralai.py +0 -2
pixeltable/functions/ollama.py +8 -8
pixeltable/functions/openai.py +51 -4
pixeltable/functions/timestamp.py +1 -1
pixeltable/functions/video.py +3 -9
pixeltable/functions/vision.py +1 -1
pixeltable/globals.py +374 -89
pixeltable/index/embedding_index.py +106 -29
pixeltable/io/__init__.py +1 -1
pixeltable/io/label_studio.py +1 -1
pixeltable/io/parquet.py +39 -19
pixeltable/iterators/__init__.py +1 -0
pixeltable/iterators/document.py +12 -0
pixeltable/iterators/image.py +100 -0
pixeltable/iterators/video.py +7 -8
pixeltable/metadata/__init__.py +1 -1
pixeltable/metadata/converters/convert_16.py +2 -1
pixeltable/metadata/converters/convert_17.py +2 -1
pixeltable/metadata/converters/convert_22.py +17 -0
pixeltable/metadata/converters/convert_23.py +35 -0
pixeltable/metadata/converters/convert_24.py +56 -0
pixeltable/metadata/converters/convert_25.py +19 -0
pixeltable/metadata/converters/util.py +4 -2
pixeltable/metadata/notes.py +4 -0
pixeltable/metadata/schema.py +1 -0
pixeltable/plan.py +129 -51
pixeltable/store.py +1 -1
pixeltable/type_system.py +196 -54
pixeltable/utils/arrow.py +8 -3
pixeltable/utils/description_helper.py +89 -0
pixeltable/utils/documents.py +14 -0
{pixeltable-0.2.24.dist-info → pixeltable-0.3.0.dist-info}/METADATA +32 -22
pixeltable-0.3.0.dist-info/RECORD +155 -0
{pixeltable-0.2.24.dist-info → pixeltable-0.3.0.dist-info}/WHEEL +1 -1
pixeltable-0.3.0.dist-info/entry_points.txt +3 -0
pixeltable/tool/create_test_db_dump.py +0 -308
pixeltable/tool/create_test_video.py +0 -81
pixeltable/tool/doc_plugins/griffe.py +0 -50
pixeltable/tool/doc_plugins/mkdocstrings.py +0 -6
pixeltable/tool/doc_plugins/templates/material/udf.html.jinja +0 -135
pixeltable/tool/embed_udf.py +0 -9
pixeltable/tool/mypy_plugin.py +0 -55
pixeltable-0.2.24.dist-info/RECORD +0 -153
pixeltable-0.2.24.dist-info/entry_points.txt +0 -3
{pixeltable-0.2.24.dist-info → pixeltable-0.3.0.dist-info}/LICENSE +0 -0

pixeltable/functions/huggingface.py CHANGED Viewed

@@ -144,9 +144,9 @@ def cross_encoder_list(sentence1: str, sentences2: list, *, model_id: str) -> li
 @pxt.udf(batch_size=32)
-def clip_text(text: Batch[str], *, model_id: str) -> Batch[pxt.Array[(None,), pxt.Float]]:
+def clip(text: Batch[str], *, model_id: str) -> Batch[pxt.Array[(None,), pxt.Float]]:
     """
-    Computes a CLIP embedding for the specified text. `model_id` should be a reference to a pretrained
+    Computes a CLIP embedding for the specified text or image. `model_id` should be a reference to a pretrained
     [CLIP Model](https://huggingface.co/docs/transformers/model_doc/clip).
     __Requirements:__
@@ -164,7 +164,11 @@ def clip_text(text: Batch[str], *, model_id: str) -> Batch[pxt.Array[(None,), px
         Add a computed column that applies the model `openai/clip-vit-base-patch32` to an existing
         Pixeltable column `tbl.text` of the table `tbl`:
-        >>> tbl['result'] = clip_text(tbl.text, model_id='openai/clip-vit-base-patch32')
+        >>> tbl.add_computed_column(
+        ...     result=clip(tbl.text, model_id='openai/clip-vit-base-patch32')
+        ... )
+        The same would work with an image column `tbl.image` in place of `tbl.text`.
     """
     env.Env.get().require_package('transformers')
     device = resolve_torch_device('auto')
@@ -181,29 +185,8 @@ def clip_text(text: Batch[str], *, model_id: str) -> Batch[pxt.Array[(None,), px
     return [embeddings[i] for i in range(embeddings.shape[0])]
-@pxt.udf(batch_size=32)
-def clip_image(image: Batch[PIL.Image.Image], *, model_id: str) -> Batch[pxt.Array[(None,), pxt.Float]]:
-    """
-    Computes a CLIP embedding for the specified image. `model_id` should be a reference to a pretrained
-    [CLIP Model](https://huggingface.co/docs/transformers/model_doc/clip).
-    __Requirements:__
-    - `pip install torch transformers`
-    Args:
-        image: The image to embed.
-        model_id: The pretrained model to use for the embedding.
-    Returns:
-        An array containing the output of the embedding model.
-    Examples:
-        Add a computed column that applies the model `openai/clip-vit-base-patch32` to an existing
-        Pixeltable column `image` of the table `tbl`:
-        >>> tbl['result'] = clip_image(tbl.image, model_id='openai/clip-vit-base-patch32')
-    """
+@clip.overload
+def _(image: Batch[PIL.Image.Image], *, model_id: str) -> Batch[pxt.Array[(None,), pxt.Float]]:
     env.Env.get().require_package('transformers')
     device = resolve_torch_device('auto')
     import torch
@@ -219,8 +202,7 @@ def clip_image(image: Batch[PIL.Image.Image], *, model_id: str) -> Batch[pxt.Arr
     return [embeddings[i] for i in range(embeddings.shape[0])]
-@clip_text.conditional_return_type
-@clip_image.conditional_return_type
+@clip.conditional_return_type
 def _(model_id: str) -> pxt.ArrayType:
     try:
         from transformers import CLIPModel

pixeltable/functions/json.py CHANGED Viewed

@@ -16,20 +16,15 @@ import pixeltable as pxt
 from pixeltable.utils.code import local_public_names
-@pxt.uda(
-    update_types=[pxt.JsonType(nullable=True)],
-    value_type=pxt.JsonType(),
-    requires_order_by=False,
-    allows_window=False,
-)
+@pxt.uda
 class make_list(pxt.Aggregator):
     """
     Collects arguments into a list.
     """
-    def __init__(self):
+    def __init__(self) -> None:
         self.output: list[Any] = []
-    def update(self, obj: Any) -> None:
+    def update(self, obj: pxt.Json) -> None:
         if obj is None:
             return
         self.output.append(obj)

pixeltable/functions/math.py ADDED Viewed

@@ -0,0 +1,67 @@
+import builtins
+import math
+from typing import Optional
+import sqlalchemy as sql
+import pixeltable as pxt
+from pixeltable.utils.code import local_public_names
+@pxt.udf(is_method=True)
+def abs(self: float) -> float:
+    return builtins.abs(self)
+@abs.to_sql
+def _(self: sql.ColumnElement) -> sql.ColumnElement:
+    return sql.func.abs(self)
+@pxt.udf(is_method=True)
+def ceil(self: float) -> float:
+    # This ensures the same behavior as SQL
+    if math.isfinite(self):
+        return float(math.ceil(self))
+    else:
+        return self
+@ceil.to_sql
+def _(self: sql.ColumnElement) -> sql.ColumnElement:
+    return sql.func.ceiling(self)
+@pxt.udf(is_method=True)
+def floor(self: float) -> float:
+    # This ensures the same behavior as SQL
+    if math.isfinite(self):
+        return float(math.floor(self))
+    else:
+        return self
+@floor.to_sql
+def _(self: sql.ColumnElement) -> sql.ColumnElement:
+    return sql.func.floor(self)
+@pxt.udf(is_method=True)
+def round(self: float, digits: Optional[int] = None) -> float:
+    # Set digits explicitly to 0 to guarantee a return type of float; this ensures the same behavior as SQL
+    return builtins.round(self, digits or 0)
+@round.to_sql
+def _(self: sql.ColumnElement, digits: Optional[sql.ColumnElement] = None) -> sql.ColumnElement:
+    if digits is None:
+        return sql.func.round(self)
+    else:
+        return sql.func.round(sql.cast(self, sql.Numeric), sql.cast(digits, sql.Integer))
+__all__ = local_public_names(__name__)
+def __dir__():
+    return __all__

pixeltable/functions/mistralai.py CHANGED Viewed

@@ -36,7 +36,6 @@ def chat_completions(
     temperature: Optional[float] = 0.7,
     top_p: Optional[float] = 1.0,
     max_tokens: Optional[int] = None,
-    min_tokens: Optional[int] = None,
     stop: Optional[list[str]] = None,
     random_seed: Optional[int] = None,
     response_format: Optional[dict] = None,
@@ -75,7 +74,6 @@ def chat_completions(
         temperature=temperature,
         top_p=top_p,
         max_tokens=_opt(max_tokens),
-        min_tokens=_opt(min_tokens),
         stop=stop,
         random_seed=_opt(random_seed),
         response_format=response_format,  # type: ignore[arg-type]

pixeltable/functions/ollama.py CHANGED Viewed

@@ -34,7 +34,7 @@ def generate(
     template: str = '',
     context: Optional[list[int]] = None,
     raw: bool = False,
-    format: str = '',
+    format: Optional[str] = None,
     options: Optional[dict] = None,
 ) -> dict:
     """
@@ -44,7 +44,7 @@ def generate(
         prompt: The prompt to generate a response for.
         model: The model name.
         suffix: The text after the model response.
-        format: The format of the response; must be one of `'json'` or `''` (the empty string).
+        format: The format of the response; must be one of `'json'` or `None`.
         system: System message.
         template: Prompt template to use.
         context: The context parameter returned from a previous call to `generate()`.
@@ -68,7 +68,7 @@ def generate(
         raw=raw,
         format=format,
         options=options,
-    )  # type: ignore[call-overload]
+    ).dict()  # type: ignore[call-overload]
 @pxt.udf
@@ -77,7 +77,7 @@ def chat(
     *,
     model: str,
     tools: Optional[list[dict]] = None,
-    format: str = '',
+    format: Optional[str] = None,
     options: Optional[dict] = None,
 ) -> dict:
     """
@@ -87,7 +87,7 @@ def chat(
         messages: The messages of the chat.
         model: The model name.
         tools: Tools for the model to use.
-        format: The format of the response; must be one of `'json'` or `''` (the empty string).
+        format: The format of the response; must be one of `'json'` or `None`.
         options: Additional options to pass to the `chat` call, such as `max_tokens`, `temperature`, `top_p`, and `top_k`.
             For details, see the
             [Valid Parameters and Values](https://github.com/ollama/ollama/blob/main/docs/modelfile.md#valid-parameters-and-values)
@@ -103,7 +103,7 @@ def chat(
         tools=tools,
         format=format,
         options=options,
-    )  # type: ignore[call-overload]
+    ).dict()  # type: ignore[call-overload]
 @pxt.udf(batch_size=16)
@@ -135,8 +135,8 @@ def embed(
         model=model,
         input=input,
         truncate=truncate,
-        options=options,  # type: ignore[arg-type]
-    )
+        options=options,
+    ).dict()
     return [np.array(data, dtype=np.float64) for data in results['embeddings']]

pixeltable/functions/openai.py CHANGED Viewed

@@ -7,17 +7,18 @@ the [Working with OpenAI](https://pixeltable.readme.io/docs/working-with-openai)
 import base64
 import io
+import json
 import pathlib
 import uuid
-from typing import TYPE_CHECKING, Callable, Optional, TypeVar, Union
+from typing import TYPE_CHECKING, Any, Callable, Optional, TypeVar, Union
 import numpy as np
 import PIL.Image
 import tenacity
 import pixeltable as pxt
-from pixeltable import env
-from pixeltable.func import Batch
+from pixeltable import env, exprs
+from pixeltable.func import Batch, Tools
 from pixeltable.utils.code import local_public_names
 if TYPE_CHECKING:
@@ -225,6 +226,33 @@ def chat_completions(
             ]
             tbl['response'] = chat_completions(messages, model='gpt-4o-mini')
     """
+    if tools is not None:
+        tools = [
+            {
+                'type': 'function',
+                'function': tool
+            }
+            for tool in tools
+        ]
+    tool_choice_: Union[str, dict, None] = None
+    if tool_choice is not None:
+        if tool_choice['auto']:
+            tool_choice_ = 'auto'
+        elif tool_choice['required']:
+            tool_choice_ = 'required'
+        else:
+            assert tool_choice['tool'] is not None
+            tool_choice_ = {
+                'type': 'function',
+                'function': {'name': tool_choice['tool']}
+            }
+    extra_body: Optional[dict[str, Any]] = None
+    if tool_choice is not None and not tool_choice['parallel_tool_calls']:
+        extra_body = {'parallel_tool_calls': False}
     result = _retry(_openai_client().chat.completions.create)(
         messages=messages,
         model=model,
@@ -241,8 +269,9 @@ def chat_completions(
         temperature=_opt(temperature),
         top_p=_opt(top_p),
         tools=_opt(tools),
-        tool_choice=_opt(tool_choice),
+        tool_choice=_opt(tool_choice_),
         user=_opt(user),
+        extra_body=extra_body,
     )
     return result.dict()
@@ -453,6 +482,24 @@ def moderations(input: str, *, model: Optional[str] = None) -> dict:
     return result.dict()
+def invoke_tools(tools: Tools, response: exprs.Expr) -> exprs.InlineDict:
+    """Converts an OpenAI response dict to Pixeltable tool invocation format and calls `tools._invoke()`."""
+    return tools._invoke(_openai_response_to_pxt_tool_calls(response))
+@pxt.udf
+def _openai_response_to_pxt_tool_calls(response: dict) -> Optional[dict]:
+    openai_tool_calls = response['choices'][0]['message']['tool_calls']
+    if openai_tool_calls is not None:
+        return {
+            tool_call['function']['name']: {
+                'args': json.loads(tool_call['function']['arguments'])
+            }
+            for tool_call in openai_tool_calls
+        }
+    return None
 _T = TypeVar('_T')

pixeltable/functions/timestamp.py CHANGED Viewed

@@ -232,7 +232,7 @@ def _(
         sql.cast(day, sql.Integer),
         sql.cast(hour, sql.Integer),
         sql.cast(minute, sql.Integer),
-        sql.cast(second + microsecond / 1000000.0, sql.Double))
+        sql.cast(second + microsecond / 1000000.0, sql.Float))
 # @pxt.udf
 # def date(self: datetime) -> datetime:

pixeltable/functions/video.py CHANGED Viewed

@@ -47,13 +47,7 @@ _format_defaults = {  # format -> (codec, ext)
 #         output_container.mux(packet)
-@pxt.uda(
-    init_types=[pxt.IntType()],
-    update_types=[pxt.ImageType()],
-    value_type=pxt.VideoType(),
-    requires_order_by=True,
-    allows_window=False,
-)
+@pxt.uda(requires_order_by=True)
 class make_video(pxt.Aggregator):
     """
     Aggregator that creates a video from a sequence of images.
@@ -80,7 +74,7 @@ class make_video(pxt.Aggregator):
         for packet in self.stream.encode(av_frame):
             self.container.mux(packet)
-    def value(self) -> str:
+    def value(self) -> pxt.Video:
         for packet in self.stream.encode():
             self.container.mux(packet)
         self.container.close()
@@ -132,7 +126,7 @@ def _get_metadata(path: str) -> dict:
         assert isinstance(container, av.container.InputContainer)
         streams_info = [__get_stream_metadata(stream) for stream in container.streams]
         result = {
-            'bit_exact': container.bit_exact,
+            'bit_exact': getattr(container, 'bit_exact', False),
             'bit_rate': container.bit_rate,
             'size': container.size,
             'metadata': container.metadata,

pixeltable/functions/vision.py CHANGED Viewed

@@ -220,7 +220,7 @@ def eval_detections(
     return result
-@pxt.uda(update_types=[pxt.JsonType()], value_type=pxt.JsonType(), allows_std_agg=True, allows_window=False)
+@pxt.uda
 class mean_ap(pxt.Aggregator):
     """
     Calculates the mean average precision (mAP) over

pixeltable 0.2.24__py3-none-any.whl → 0.3.0__py3-none-any.whl

Potentially problematic release.

pixeltable 0.2.24py3-none-any.whl → 0.3.0py3-none-any.whl