PyPI - pixeltable - Versions diffs - 0.2.12__py3-none-any.whl → 0.2.14__py3-none-any.whl - Mend

pixeltable 0.2.12py3-none-any.whl → 0.2.14py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of pixeltable might be problematic. Click here for more details.

Files changed (67) hide show

pixeltable/__init__.py +1 -1
pixeltable/__version__.py +2 -2
pixeltable/catalog/column.py +5 -0
pixeltable/catalog/globals.py +8 -0
pixeltable/catalog/insertable_table.py +2 -2
pixeltable/catalog/table.py +27 -9
pixeltable/catalog/table_version.py +41 -68
pixeltable/catalog/view.py +3 -3
pixeltable/dataframe.py +7 -6
pixeltable/exec/__init__.py +2 -1
pixeltable/exec/expr_eval_node.py +8 -1
pixeltable/exec/row_update_node.py +61 -0
pixeltable/exec/{sql_scan_node.py → sql_node.py} +120 -56
pixeltable/exprs/__init__.py +1 -2
pixeltable/exprs/comparison.py +5 -5
pixeltable/exprs/compound_predicate.py +12 -12
pixeltable/exprs/expr.py +67 -22
pixeltable/exprs/function_call.py +60 -29
pixeltable/exprs/globals.py +2 -0
pixeltable/exprs/in_predicate.py +3 -3
pixeltable/exprs/inline_array.py +18 -11
pixeltable/exprs/is_null.py +5 -5
pixeltable/exprs/method_ref.py +63 -0
pixeltable/ext/__init__.py +9 -0
pixeltable/ext/functions/__init__.py +8 -0
pixeltable/ext/functions/whisperx.py +45 -5
pixeltable/ext/functions/yolox.py +60 -14
pixeltable/func/aggregate_function.py +10 -4
pixeltable/func/callable_function.py +16 -4
pixeltable/func/expr_template_function.py +1 -1
pixeltable/func/function.py +12 -2
pixeltable/func/function_registry.py +26 -9
pixeltable/func/udf.py +32 -4
pixeltable/functions/__init__.py +1 -1
pixeltable/functions/fireworks.py +33 -0
pixeltable/functions/globals.py +36 -1
pixeltable/functions/huggingface.py +155 -7
pixeltable/functions/image.py +242 -40
pixeltable/functions/openai.py +214 -0
pixeltable/functions/string.py +600 -8
pixeltable/functions/timestamp.py +210 -0
pixeltable/functions/together.py +106 -0
pixeltable/functions/video.py +28 -10
pixeltable/functions/whisper.py +32 -0
pixeltable/globals.py +3 -3
pixeltable/io/__init__.py +1 -1
pixeltable/io/globals.py +186 -5
pixeltable/io/label_studio.py +42 -2
pixeltable/io/pandas.py +70 -34
pixeltable/metadata/__init__.py +1 -1
pixeltable/metadata/converters/convert_18.py +39 -0
pixeltable/metadata/notes.py +10 -0
pixeltable/plan.py +82 -7
pixeltable/tool/create_test_db_dump.py +4 -5
pixeltable/tool/doc_plugins/griffe.py +81 -0
pixeltable/tool/doc_plugins/mkdocstrings.py +6 -0
pixeltable/tool/doc_plugins/templates/material/udf.html.jinja +135 -0
pixeltable/type_system.py +15 -14
pixeltable/utils/s3.py +1 -1
pixeltable-0.2.14.dist-info/METADATA +206 -0
{pixeltable-0.2.12.dist-info → pixeltable-0.2.14.dist-info}/RECORD +64 -56
pixeltable-0.2.14.dist-info/entry_points.txt +3 -0
pixeltable/exprs/image_member_access.py +0 -96
pixeltable/exprs/predicate.py +0 -44
pixeltable-0.2.12.dist-info/METADATA +0 -137
{pixeltable-0.2.12.dist-info → pixeltable-0.2.14.dist-info}/LICENSE +0 -0
{pixeltable-0.2.12.dist-info → pixeltable-0.2.14.dist-info}/WHEEL +0 -0

pixeltable/func/udf.py CHANGED Viewed

@@ -2,7 +2,6 @@ from __future__ import annotations
 from typing import List, Callable, Optional, overload, Any
-import pixeltable as pxt
 import pixeltable.exceptions as excs
 import pixeltable.type_system as ts
 from .callable_function import CallableFunction
@@ -26,6 +25,8 @@ def udf(
         param_types: Optional[List[ts.ColumnType]] = None,
         batch_size: Optional[int] = None,
         substitute_fn: Optional[Callable] = None,
+        is_method: bool = False,
+        is_property: bool = False,
         _force_stored: bool = False
 ) -> Callable[[Callable], Function]: ...
@@ -56,6 +57,8 @@ def udf(*args, **kwargs):
         param_types = kwargs.pop('param_types', None)
         batch_size = kwargs.pop('batch_size', None)
         substitute_fn = kwargs.pop('substitute_fn', None)
+        is_method = kwargs.pop('is_method', None)
+        is_property = kwargs.pop('is_property', None)
         force_stored = kwargs.pop('_force_stored', False)
         if len(kwargs) > 0:
             raise excs.Error(f'Invalid @udf decorator kwargs: {", ".join(kwargs.keys())}')
@@ -64,8 +67,15 @@ def udf(*args, **kwargs):
         def decorator(decorated_fn: Callable):
             return make_function(
-                decorated_fn, return_type, param_types, batch_size,
-                substitute_fn=substitute_fn, force_stored=force_stored)
+                decorated_fn,
+                return_type,
+                param_types,
+                batch_size,
+                substitute_fn=substitute_fn,
+                is_method=is_method,
+                is_property=is_property,
+                force_stored=force_stored
+            )
         return decorator
@@ -76,6 +86,8 @@ def make_function(
     param_types: Optional[List[ts.ColumnType]] = None,
     batch_size: Optional[int] = None,
     substitute_fn: Optional[Callable] = None,
+    is_method: bool = False,
+    is_property: bool = False,
     function_name: Optional[str] = None,
     force_stored: bool = False
 ) -> Function:
@@ -112,6 +124,15 @@ def make_function(
     if batch_size is None and len(sig.batched_parameters) > 0:
         raise excs.Error(f'{errmsg_name}(): batched parameters in udf, but no `batch_size` given')
+    if is_method and is_property:
+        raise excs.Error(f'Cannot specify both `is_method` and `is_property` (in function `{function_name}`)')
+    if is_property and len(sig.parameters) != 1:
+        raise excs.Error(
+            f"`is_property=True` expects a UDF with exactly 1 parameter, but `{function_name}` has {len(sig.parameters)}"
+        )
+    if (is_method or is_property) and function_path is None:
+        raise excs.Error('Stored functions cannot be declared using `is_method` or `is_property`')
     if substitute_fn is None:
         py_fn = decorated_fn
     else:
@@ -120,7 +141,14 @@ def make_function(
         py_fn = substitute_fn
     result = CallableFunction(
-        signature=sig, py_fn=py_fn, self_path=function_path, self_name=function_name, batch_size=batch_size)
+        signature=sig,
+        py_fn=py_fn,
+        self_path=function_path,
+        self_name=function_name,
+        batch_size=batch_size,
+        is_method=is_method,
+        is_property=is_property
+    )
     # If this function is part of a module, register it
     if function_path is not None:

pixeltable/functions/__init__.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from . import fireworks, huggingface, image, openai, string, together, video
+from . import fireworks, huggingface, image, openai, string, together, video, timestamp
 from .globals import *
 from pixeltable.utils.code import local_public_names

pixeltable/functions/fireworks.py CHANGED Viewed

@@ -1,3 +1,10 @@
+"""
+Pixeltable [UDFs](https://pixeltable.readme.io/docs/user-defined-functions-udfs)
+that wrap various endpoints from the Fireworks AI API. In order to use them, you must
+first `pip install fireworks-ai` and configure your Fireworks AI credentials, as described in
+the [Working with Fireworks](https://pixeltable.readme.io/docs/working-with-fireworks) tutorial.
+"""
 from typing import Optional, TYPE_CHECKING
 import pixeltable as pxt
@@ -29,6 +36,32 @@ def chat_completions(
     top_p: Optional[float] = None,
     temperature: Optional[float] = None,
 ) -> dict:
+    """
+    Creates a model response for the given chat conversation.
+    Equivalent to the Fireworks AI `chat/completions` API endpoint.
+    For additional details, see: [https://docs.fireworks.ai/api-reference/post-chatcompletions](https://docs.fireworks.ai/api-reference/post-chatcompletions)
+    __Requirements:__
+    - `pip install fireworks-ai`
+    Args:
+        messages: A list of messages comprising the conversation so far.
+        model: The name of the model to use.
+    For details on the other parameters, see: [https://docs.fireworks.ai/api-reference/post-chatcompletions](https://docs.fireworks.ai/api-reference/post-chatcompletions)
+    Returns:
+        A dictionary containing the response and other metadata.
+    Examples:
+        Add a computed column that applies the model `accounts/fireworks/models/mixtral-8x22b-instruct`
+        to an existing Pixeltable column `tbl.prompt` of the table `tbl`:
+        >>> messages = [{'role': 'user', 'content': tbl.prompt}]
+        ... tbl['response'] = chat_completions(tbl.prompt, model='accounts/fireworks/models/mixtral-8x22b-instruct')
+    """
     kwargs = {'max_tokens': max_tokens, 'top_k': top_k, 'top_p': top_p, 'temperature': temperature}
     kwargs_not_none = {k: v for k, v in kwargs.items() if v is not None}
     return _fireworks_client().chat.completions.create(model=model, messages=messages, **kwargs_not_none).dict()

pixeltable/functions/globals.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from typing import Union
+from typing import Optional, Union
 import pixeltable.func as func
 import pixeltable.type_system as ts
@@ -14,6 +14,7 @@ def cast(expr: exprs.Expr, target_type: ts.ColumnType) -> exprs.Expr:
 @func.uda(update_types=[ts.IntType()], value_type=ts.IntType(), allows_window=True, requires_order_by=False)
 class sum(func.Aggregator):
+    """Sums the selected integers or floats."""
     def __init__(self):
         self.sum: Union[int, float] = 0
@@ -38,6 +39,40 @@ class count(func.Aggregator):
         return self.count
+@func.uda(update_types=[ts.FloatType()], value_type=ts.FloatType(nullable=True), allows_window=True, requires_order_by=False)
+class max(func.Aggregator):
+    def __init__(self):
+        self.val = None
+    def update(self, val: Optional[float]) -> None:
+        if val is not None:
+            if self.val is None:
+                self.val = val
+            else:
+                import builtins
+                self.val = builtins.max(self.val, val)
+    def value(self) -> Optional[float]:
+        return self.val
+@func.uda(update_types=[ts.FloatType()], value_type=ts.FloatType(nullable=True), allows_window=True, requires_order_by=False)
+class min(func.Aggregator):
+    def __init__(self):
+        self.val = None
+    def update(self, val: Optional[float]) -> None:
+        if val is not None:
+            if self.val is None:
+                self.val = val
+            else:
+                import builtins
+                self.val = builtins.min(self.val, val)
+    def value(self) -> Optional[float]:
+        return self.val
 @func.uda(update_types=[ts.IntType()], value_type=ts.FloatType(), allows_window=False, requires_order_by=False)
 class mean(func.Aggregator):
     def __init__(self):

pixeltable/functions/huggingface.py CHANGED Viewed

@@ -1,3 +1,12 @@
+"""
+Pixeltable [UDFs](https://pixeltable.readme.io/docs/user-defined-functions-udfs)
+that wrap various models from the Hugging Face `transformers` package.
+These UDFs will cause Pixeltable to invoke the relevant models locally. In order to use them, you must
+first `pip install transformers` (or in some cases, `sentence-transformers`, as noted in the specific
+UDFs).
+"""
 from typing import Callable, TypeVar, Optional, Any
 import PIL.Image
@@ -13,15 +22,39 @@ from pixeltable.utils.code import local_public_names
 @pxt.udf(batch_size=32, return_type=ts.ArrayType((None,), dtype=ts.FloatType()))
 def sentence_transformer(
-    sentences: Batch[str], *, model_id: str, normalize_embeddings: bool = False
+    sentence: Batch[str], *, model_id: str, normalize_embeddings: bool = False
 ) -> Batch[np.ndarray]:
-    """Runs the specified sentence transformer model."""
+    """
+    Computes sentence embeddings. `model_id` should be a pretrained Sentence Transformers model, as described
+    in the [Sentence Transformers Pretrained Models](https://sbert.net/docs/sentence_transformer/pretrained_models.html)
+    documentation.
+    __Requirements:__
+    - `pip install sentence-transformers`
+    Args:
+        sentence: The sentence to embed.
+        model_id: The pretrained model to use for the encoding.
+        normalize_embeddings: If `True`, normalizes embeddings to length 1; see the
+            [Sentence Transformers API Docs](https://sbert.net/docs/package_reference/sentence_transformer/SentenceTransformer.html)
+            for more details
+    Returns:
+        An array containing the output of the embedding model.
+    Examples:
+        Add a computed column that applies the model `all-mpnet-base-2` to an existing Pixeltable column `tbl.sentence`
+        of the table `tbl`:
+        >>> tbl['result'] = sentence_transformer(tbl.sentence, model_id='all-mpnet-base-v2')
+    """
     env.Env.get().require_package('sentence_transformers')
     from sentence_transformers import SentenceTransformer
     model = _lookup_model(model_id, SentenceTransformer)
-    array = model.encode(sentences, normalize_embeddings=normalize_embeddings)
+    array = model.encode(sentence, normalize_embeddings=normalize_embeddings)
     return [array[i] for i in range(array.shape[0])]
@@ -49,7 +82,32 @@ def sentence_transformer_list(sentences: list, *, model_id: str, normalize_embed
 @pxt.udf(batch_size=32)
 def cross_encoder(sentences1: Batch[str], sentences2: Batch[str], *, model_id: str) -> Batch[float]:
-    """Runs the specified cross-encoder model."""
+    """
+    Performs predicts on the given sentence pair.
+    `model_id` should be a pretrained Cross-Encoder model, as described in the
+    [Cross-Encoder Pretrained Models](https://www.sbert.net/docs/cross_encoder/pretrained_models.html)
+    documentation.
+    __Requirements:__
+    - `pip install sentence-transformers`
+    Parameters:
+        sentences1: The first sentence to be paired.
+        sentences2: The second sentence to be paired.
+        model_id: The identifier of the cross-encoder model to use.
+    Returns:
+        The similarity score between the inputs.
+    Examples:
+        Add a computed column that applies the model `ms-marco-MiniLM-L-4-v2` to the sentences in
+        columns `tbl.sentence1` and `tbl.sentence2`:
+        >>> tbl['result'] = sentence_transformer(
+                tbl.sentence1, tbl.sentence2, model_id='ms-marco-MiniLM-L-4-v2'
+            )
+    """
     env.Env.get().require_package('sentence_transformers')
     from sentence_transformers import CrossEncoder
@@ -72,7 +130,27 @@ def cross_encoder_list(sentence1: str, sentences2: list, *, model_id: str) -> li
 @pxt.udf(batch_size=32, return_type=ts.ArrayType((None,), dtype=ts.FloatType(), nullable=False))
 def clip_text(text: Batch[str], *, model_id: str) -> Batch[np.ndarray]:
-    """Runs the specified CLIP model on text."""
+    """
+    Computes a CLIP embedding for the specified text. `model_id` should be a reference to a pretrained
+    [CLIP Model](https://huggingface.co/docs/transformers/model_doc/clip).
+    __Requirements:__
+    - `pip install transformers`
+    Args:
+        text: The string to embed.
+        model_id: The pretrained model to use for the embedding.
+    Returns:
+        An array containing the output of the embedding model.
+    Examples:
+        Add a computed column that applies the model `openai/clip-vit-base-patch32` to an existing
+        Pixeltable column `tbl.text` of the table `tbl`:
+        >>> tbl['result'] = clip_text(tbl.text, model_id='openai/clip-vit-base-patch32')
+    """
     env.Env.get().require_package('transformers')
     device = resolve_torch_device('auto')
     import torch
@@ -90,7 +168,27 @@ def clip_text(text: Batch[str], *, model_id: str) -> Batch[np.ndarray]:
 @pxt.udf(batch_size=32, return_type=ts.ArrayType((None,), dtype=ts.FloatType(), nullable=False))
 def clip_image(image: Batch[PIL.Image.Image], *, model_id: str) -> Batch[np.ndarray]:
-    """Runs the specified CLIP model on images."""
+    """
+    Computes a CLIP embedding for the specified image. `model_id` should be a reference to a pretrained
+    [CLIP Model](https://huggingface.co/docs/transformers/model_doc/clip).
+    __Requirements:__
+    - `pip install transformers`
+    Args:
+        image: The image to embed.
+        model_id: The pretrained model to use for the embedding.
+    Returns:
+        An array containing the output of the embedding model.
+    Examples:
+        Add a computed column that applies the model `openai/clip-vit-base-patch32` to an existing
+        Pixeltable column `tbl.image` of the table `tbl`:
+        >>> tbl['result'] = clip_image(tbl.image, model_id='openai/clip-vit-base-patch32')
+    """
     env.Env.get().require_package('transformers')
     device = resolve_torch_device('auto')
     import torch
@@ -120,7 +218,41 @@ def _(model_id: str) -> ts.ArrayType:
 @pxt.udf(batch_size=4)
 def detr_for_object_detection(image: Batch[PIL.Image.Image], *, model_id: str, threshold: float = 0.5) -> Batch[dict]:
-    """Runs the specified DETR model."""
+    """
+    Computes DETR object detections for the specified image. `model_id` should be a reference to a pretrained
+    [DETR Model](https://huggingface.co/docs/transformers/model_doc/detr).
+    __Requirements:__
+    - `pip install transformers`
+    Args:
+        image: The image to embed.
+        model_id: The pretrained model to use for the embedding.
+    Returns:
+        A dictionary containing the output of the object detection model, in the following format:
+    ```python
+    {
+        'scores': [0.99, 0.999],  # list of confidence scores for each detected object
+        'labels': [25, 25],  # list of COCO class labels for each detected object
+        'label_text': ['giraffe', 'giraffe'],  # corresponding text names of class labels
+        'boxes': [[51.942, 356.174, 181.481, 413.975], [383.225, 58.66, 605.64, 361.346]]
+            # list of bounding boxes for each detected object, as [x1, y1, x2, y2]
+    }
+    ```
+    Examples:
+        Add a computed column that applies the model `facebook/detr-resnet-50` to an existing
+        Pixeltable column `tbl.image` of the table `tbl`:
+        >>> tbl['detections'] = detr_for_object_detection(
+        ...     tbl.image,
+        ...     model_id='facebook/detr-resnet-50',
+        ...     threshold=0.8
+        ... )
+    """
     env.Env.get().require_package('transformers')
     device = resolve_torch_device('auto')
     import torch
@@ -152,6 +284,22 @@ def detr_for_object_detection(image: Batch[PIL.Image.Image], *, model_id: str, t
 @pxt.udf
 def detr_to_coco(image: PIL.Image.Image, detr_info: dict[str, Any]) -> dict[str, Any]:
+    """
+    Converts the output of a DETR object detection model to COCO format.
+    Args:
+        image: The image for which detections were computed.
+        detr_info: The output of a DETR object detection model, as returned by `detr_for_object_detection`.
+    Returns:
+        A dictionary containing the data from `detr_info`, converted to COCO format.
+    Examples:
+        Add a computed column that converts the output `tbl.detections` to COCO format, where `tbl.image`
+        is the image for which detections were computed:
+        >>> tbl['detections_coco'] = detr_to_coco(tbl.image, tbl.detections)
+    """
     bboxes, labels = detr_info['boxes'], detr_info['labels']
     annotations = [
         {'bbox': [bbox[0], bbox[1], bbox[2] - bbox[0], bbox[3] - bbox[1]], 'category': label}

pixeltable 0.2.12__py3-none-any.whl → 0.2.14__py3-none-any.whl

Potentially problematic release.

pixeltable 0.2.12py3-none-any.whl → 0.2.14py3-none-any.whl