PyPI - pixeltable - Versions diffs - 0.2.22__py3-none-any.whl → 0.2.23__py3-none-any.whl - Mend

pixeltable 0.2.22py3-none-any.whl → 0.2.23py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (40) hide show

pixeltable/__init__.py +2 -2
pixeltable/__version__.py +2 -2
pixeltable/catalog/column.py +8 -22
pixeltable/catalog/insertable_table.py +26 -8
pixeltable/catalog/table.py +179 -83
pixeltable/catalog/table_version.py +13 -39
pixeltable/catalog/table_version_path.py +2 -2
pixeltable/catalog/view.py +2 -2
pixeltable/dataframe.py +20 -28
pixeltable/env.py +2 -0
pixeltable/exec/cache_prefetch_node.py +189 -43
pixeltable/exec/data_row_batch.py +3 -3
pixeltable/exec/exec_context.py +2 -2
pixeltable/exec/exec_node.py +2 -2
pixeltable/exec/expr_eval_node.py +8 -8
pixeltable/exprs/arithmetic_expr.py +9 -4
pixeltable/exprs/column_ref.py +4 -0
pixeltable/exprs/comparison.py +5 -0
pixeltable/exprs/json_path.py +1 -1
pixeltable/func/aggregate_function.py +8 -8
pixeltable/func/expr_template_function.py +6 -5
pixeltable/func/udf.py +6 -11
pixeltable/functions/huggingface.py +136 -25
pixeltable/functions/llama_cpp.py +3 -2
pixeltable/functions/mistralai.py +1 -1
pixeltable/functions/openai.py +1 -1
pixeltable/functions/together.py +1 -1
pixeltable/functions/util.py +5 -2
pixeltable/globals.py +55 -6
pixeltable/plan.py +1 -1
pixeltable/tool/create_test_db_dump.py +1 -1
pixeltable/type_system.py +83 -35
pixeltable/utils/coco.py +5 -5
pixeltable/utils/formatter.py +3 -3
pixeltable/utils/s3.py +6 -3
{pixeltable-0.2.22.dist-info → pixeltable-0.2.23.dist-info}/METADATA +119 -46
{pixeltable-0.2.22.dist-info → pixeltable-0.2.23.dist-info}/RECORD +40 -40
{pixeltable-0.2.22.dist-info → pixeltable-0.2.23.dist-info}/LICENSE +0 -0
{pixeltable-0.2.22.dist-info → pixeltable-0.2.23.dist-info}/WHEEL +0 -0
{pixeltable-0.2.22.dist-info → pixeltable-0.2.23.dist-info}/entry_points.txt +0 -0

pixeltable/exec/expr_eval_node.py CHANGED Viewed

@@ -3,7 +3,7 @@ import sys
 import time
 import warnings
 from dataclasses import dataclass
-from typing import Iterable, List, Optional
+from typing import Iterable, Optional
 from tqdm import TqdmWarning, tqdm
@@ -22,10 +22,10 @@ class ExprEvalNode(ExecNode):
     @dataclass
     class Cohort:
         """List of exprs that form an evaluation context and contain calls to at most one external function"""
-        exprs_: List[exprs.Expr]
+        exprs_: list[exprs.Expr]
         batched_fn: Optional[CallableFunction]
-        segment_ctxs: List['exprs.RowBuilder.EvalCtx']
-        target_slot_idxs: List[int]
+        segment_ctxs: list['exprs.RowBuilder.EvalCtx']
+        target_slot_idxs: list[int]
         batch_size: int = 8
     def __init__(
@@ -38,7 +38,7 @@ class ExprEvalNode(ExecNode):
         # we're only materializing exprs that are not already in the input
         self.target_exprs = [e for e in output_exprs if e.slot_idx not in input_slot_idxs]
         self.pbar: Optional[tqdm] = None
-        self.cohorts: List[ExprEvalNode.Cohort] = []
+        self.cohorts: list[ExprEvalNode.Cohort] = []
         self._create_cohorts()
     def __next__(self) -> DataRowBatch:
@@ -83,7 +83,7 @@ class ExprEvalNode(ExecNode):
         all_exprs = self.row_builder.get_dependencies(self.target_exprs)
         # break up all_exprs into cohorts such that each cohort contains calls to at most one external function;
         # seed the cohorts with only the ext fn calls
-        cohorts: List[List[exprs.Expr]] = []
+        cohorts: list[list[exprs.Expr]] = []
         current_batched_fn: Optional[CallableFunction] = None
         for e in all_exprs:
             if not self._is_batched_fn_call(e):
@@ -100,7 +100,7 @@ class ExprEvalNode(ExecNode):
         # cohorts are evaluated in order, so we can exclude the target slots from preceding cohorts and input slots
         exclude = set(e.slot_idx for e in self.input_exprs)
         all_target_slot_idxs = set(e.slot_idx for e in self.target_exprs)
-        target_slot_idxs: List[List[int]] = []  # the ones materialized by each cohort
+        target_slot_idxs: list[list[int]] = []  # the ones materialized by each cohort
         for i in range(len(cohorts)):
             cohorts[i] = self.row_builder.get_dependencies(
                 cohorts[i], exclude=[self.row_builder.unique_exprs[slot_idx] for slot_idx in exclude])
@@ -171,7 +171,7 @@ class ExprEvalNode(ExecNode):
                     arg_batches: list[list[exprs.Expr]] = [[] for _ in range(len(fn_call.args))]
                     kwarg_batches: dict[str, list[exprs.Expr]] = {k: [] for k in fn_call.kwargs.keys()}
-                    valid_batch_idxs: List[int] = []  # rows with exceptions are not valid
+                    valid_batch_idxs: list[int] = []  # rows with exceptions are not valid
                     for row_idx in range(batch_start_idx, batch_start_idx + num_batch_rows):
                         row = rows[row_idx]
                         if row.has_exc(fn_call.slot_idx):

pixeltable/exprs/arithmetic_expr.py CHANGED Viewed

@@ -69,11 +69,15 @@ class ArithmeticExpr(Expr):
             return left * right
         if self.operator == ArithmeticOperator.DIV:
             assert self.col_type.is_float_type()
+            # Avoid DivisionByZero: if right is 0, make this a NULL
+            # TODO: Should we cast the NULLs to NaNs when they are retrieved back into Python?
+            nullif = sql.sql.func.nullif(right, 0)
             # We have to cast to a `float`, or else we'll get a `Decimal`
-            return sql.sql.expression.cast(left / right, sql.Float)
+            return sql.sql.expression.cast(left / nullif, sql.Float)
         if self.operator == ArithmeticOperator.MOD:
             if self.col_type.is_int_type():
-                return left % right
+                nullif = sql.sql.func.nullif(right, 0)
+                return left % nullif
             if self.col_type.is_float_type():
                 # Postgres does not support modulus for floats
                 return None
@@ -83,10 +87,11 @@ class ArithmeticExpr(Expr):
             # We need the behavior to be consistent, so that expressions will evaluate the same way
             # whether or not their operands can be translated to SQL. These SQL clauses should
             # mimic the behavior of Python's // operator.
+            nullif = sql.sql.func.nullif(right, 0)
             if self.col_type.is_int_type():
-                return sql.sql.expression.cast(sql.func.floor(left / right), sql.Integer)
+                return sql.sql.expression.cast(sql.func.floor(left / nullif), sql.Integer)
             if self.col_type.is_float_type():
-                return sql.sql.expression.cast(sql.func.floor(left / right), sql.Float)
+                return sql.sql.expression.cast(sql.func.floor(left / nullif), sql.Float)
         assert False
     def eval(self, data_row: DataRow, row_builder: RowBuilder) -> None:

pixeltable/exprs/column_ref.py CHANGED Viewed

@@ -135,6 +135,10 @@ class ColumnRef(Expr):
     def __repr__(self) -> str:
         return f'ColumnRef({self.col!r})'
+    def _repr_html_(self) -> str:
+        tbl = catalog.Catalog.get().tbls[self.col.tbl.id]
+        return tbl._description_html(cols=[self.col])._repr_html_()  # type: ignore[attr-defined]
     def sql_expr(self, _: SqlElementCache) -> Optional[sql.ColumnElement]:
         return None if self.perform_validation else self.col.sa_col

pixeltable/exprs/comparison.py CHANGED Viewed

@@ -67,6 +67,11 @@ class Comparison(Expr):
         return self.components[1]
     def sql_expr(self, sql_elements: SqlElementCache) -> Optional[sql.ColumnElement]:
+        if str(self._op1.col_type.to_sa_type()) != str(self._op2.col_type.to_sa_type()):
+            # Comparing columns of different SQL types (e.g., string vs. json); this can only be done in Python
+            # TODO(aaron-siegel): We may be able to handle some cases in SQL by casting one side to the other's type
+            return None
         left = sql_elements.get(self._op1)
         if self.is_search_arg_comparison:
             # reference the index value column if there is an index and this is not a snapshot

pixeltable/exprs/json_path.py CHANGED Viewed

@@ -32,7 +32,7 @@ class JsonPath(Expr):
         """
         if path_elements is None:
             path_elements = []
-        super().__init__(ts.JsonType())
+        super().__init__(ts.JsonType(nullable=True))  # JsonPath expressions are always nullable
         if anchor is not None:
             self.components = [anchor]
         self.path_elements: list[Union[str, int, slice]] = path_elements

pixeltable/func/aggregate_function.py CHANGED Viewed

@@ -2,7 +2,7 @@ from __future__ import annotations
 import abc
 import inspect
-from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Type
+from typing import TYPE_CHECKING, Any, Callable, Optional
 import pixeltable.exceptions as excs
 import pixeltable.type_system as ts
@@ -36,8 +36,8 @@ class AggregateFunction(Function):
     RESERVED_PARAMS = {ORDER_BY_PARAM, GROUP_BY_PARAM}
     def __init__(
-            self, aggregator_class: Type[Aggregator], self_path: str,
-            init_types: List[ts.ColumnType], update_types: List[ts.ColumnType], value_type: ts.ColumnType,
+            self, aggregator_class: type[Aggregator], self_path: str,
+            init_types: list[ts.ColumnType], update_types: list[ts.ColumnType], value_type: ts.ColumnType,
             requires_order_by: bool, allows_std_agg: bool, allows_window: bool):
         self.agg_cls = aggregator_class
         self.requires_order_by = requires_order_by
@@ -128,7 +128,7 @@ class AggregateFunction(Function):
             order_by_clause=[order_by_clause] if order_by_clause is not None else [],
             group_by_clause=[group_by_clause] if group_by_clause is not None else [])
-    def validate_call(self, bound_args: Dict[str, Any]) -> None:
+    def validate_call(self, bound_args: dict[str, Any]) -> None:
         # check that init parameters are not Exprs
         # TODO: do this in the planner (check that init parameters are either constants or only refer to grouping exprs)
         import pixeltable.exprs as exprs
@@ -146,10 +146,10 @@ class AggregateFunction(Function):
 def uda(
         *,
         value_type: ts.ColumnType,
-        update_types: List[ts.ColumnType],
-        init_types: Optional[List[ts.ColumnType]] = None,
+        update_types: list[ts.ColumnType],
+        init_types: Optional[list[ts.ColumnType]] = None,
         requires_order_by: bool = False, allows_std_agg: bool = True, allows_window: bool = False,
-) -> Callable[[Type[Aggregator]], AggregateFunction]:
+) -> Callable[[type[Aggregator]], AggregateFunction]:
     """Decorator for user-defined aggregate functions.
     The decorated class must inherit from Aggregator and implement the following methods:
@@ -171,7 +171,7 @@ def uda(
     if init_types is None:
         init_types = []
-    def decorator(cls: Type[Aggregator]) -> AggregateFunction:
+    def decorator(cls: type[Aggregator]) -> AggregateFunction:
         # validate type parameters
         num_init_params = len(inspect.signature(cls.__init__).parameters) - 1
         if num_init_params > 0:

pixeltable/func/expr_template_function.py CHANGED Viewed

@@ -1,10 +1,11 @@
 import inspect
-from typing import Dict, Optional, Any
+from typing import Any, Optional
 import pixeltable
 import pixeltable.exceptions as excs
 from .function import Function
-from .signature import Signature, Parameter
+from .signature import Signature
 class ExprTemplateFunction(Function):
@@ -22,7 +23,7 @@ class ExprTemplateFunction(Function):
         self.param_exprs_by_name = {p.name: p for p in self.param_exprs}
         # verify default values
-        self.defaults: Dict[str, exprs.Literal] = {}  # key: param name, value: default value converted to a Literal
+        self.defaults: dict[str, exprs.Literal] = {}  # key: param name, value: default value converted to a Literal
         for param in signature.parameters.values():
             if param.default is inspect.Parameter.empty:
                 continue
@@ -77,7 +78,7 @@ class ExprTemplateFunction(Function):
     def name(self) -> str:
         return self.self_name
-    def _as_dict(self) -> Dict:
+    def _as_dict(self) -> dict:
         if self.self_path is not None:
             return super()._as_dict()
         return {
@@ -87,7 +88,7 @@ class ExprTemplateFunction(Function):
         }
     @classmethod
-    def _from_dict(cls, d: Dict) -> Function:
+    def _from_dict(cls, d: dict) -> Function:
         if 'expr' not in d:
             return super()._from_dict(d)
         assert 'signature' in d and 'name' in d

pixeltable/func/udf.py CHANGED Viewed

@@ -1,9 +1,10 @@
 from __future__ import annotations
-from typing import List, Callable, Optional, overload, Any
+from typing import Any, Callable, Optional, overload
 import pixeltable.exceptions as excs
 import pixeltable.type_system as ts
 from .callable_function import CallableFunction
 from .expr_template_function import ExprTemplateFunction
 from .function import Function
@@ -21,8 +22,6 @@ def udf(decorated_fn: Callable) -> Function: ...
 @overload
 def udf(
         *,
-        return_type: Optional[ts.ColumnType] = None,
-        param_types: Optional[List[ts.ColumnType]] = None,
         batch_size: Optional[int] = None,
         substitute_fn: Optional[Callable] = None,
         is_method: bool = False,
@@ -49,8 +48,6 @@ def udf(*args, **kwargs):
         # Decorator schema invoked with parentheses: @pxt.udf(**kwargs)
         # Create a decorator for the specified schema.
-        return_type = kwargs.pop('return_type', None)
-        param_types = kwargs.pop('param_types', None)
         batch_size = kwargs.pop('batch_size', None)
         substitute_fn = kwargs.pop('substitute_fn', None)
         is_method = kwargs.pop('is_method', None)
@@ -64,9 +61,7 @@ def udf(*args, **kwargs):
         def decorator(decorated_fn: Callable):
             return make_function(
                 decorated_fn,
-                return_type,
-                param_types,
-                batch_size,
+                batch_size=batch_size,
                 substitute_fn=substitute_fn,
                 is_method=is_method,
                 is_property=is_property,
@@ -79,7 +74,7 @@ def udf(*args, **kwargs):
 def make_function(
     decorated_fn: Callable,
     return_type: Optional[ts.ColumnType] = None,
-    param_types: Optional[List[ts.ColumnType]] = None,
+    param_types: Optional[list[ts.ColumnType]] = None,
     batch_size: Optional[int] = None,
     substitute_fn: Optional[Callable] = None,
     is_method: bool = False,
@@ -158,10 +153,10 @@ def make_function(
 def expr_udf(py_fn: Callable) -> ExprTemplateFunction: ...
 @overload
-def expr_udf(*, param_types: Optional[List[ts.ColumnType]] = None) -> Callable[[Callable], ExprTemplateFunction]: ...
+def expr_udf(*, param_types: Optional[list[ts.ColumnType]] = None) -> Callable[[Callable], ExprTemplateFunction]: ...
 def expr_udf(*args: Any, **kwargs: Any) -> Any:
-    def make_expr_template(py_fn: Callable, param_types: Optional[List[ts.ColumnType]]) -> ExprTemplateFunction:
+    def make_expr_template(py_fn: Callable, param_types: Optional[list[ts.ColumnType]]) -> ExprTemplateFunction:
         if py_fn.__module__ != '__main__' and py_fn.__name__.isidentifier():
             # this is a named function in a module
             function_path = f'{py_fn.__module__}.{py_fn.__qualname__}'

pixeltable/functions/huggingface.py CHANGED Viewed

@@ -7,21 +7,22 @@ first `pip install transformers` (or in some cases, `sentence-transformers`, as
 UDFs).
 """
-from typing import Callable, TypeVar, Optional, Any
+from typing import Any, Callable, Optional, TypeVar
 import PIL.Image
 import pixeltable as pxt
 import pixeltable.env as env
+import pixeltable.exceptions as excs
 from pixeltable.func import Batch
-from pixeltable.functions.util import resolve_torch_device, normalize_image_mode
+from pixeltable.functions.util import normalize_image_mode, resolve_torch_device
 from pixeltable.utils.code import local_public_names
 @pxt.udf(batch_size=32)
 def sentence_transformer(
     sentence: Batch[str], *, model_id: str, normalize_embeddings: bool = False
-) -> Batch[pxt.Array[(None,), float]]:
+) -> Batch[pxt.Array[(None,), pxt.Float]]:
     """
     Computes sentence embeddings. `model_id` should be a pretrained Sentence Transformers model, as described
     in the [Sentence Transformers Pretrained Models](https://sbert.net/docs/sentence_transformer/pretrained_models.html)
@@ -29,7 +30,7 @@ def sentence_transformer(
     __Requirements:__
-    - `pip install sentence-transformers`
+    - `pip install torch sentence-transformers`
     Args:
         sentence: The sentence to embed.
@@ -48,11 +49,15 @@ def sentence_transformer(
         >>> tbl['result'] = sentence_transformer(tbl.sentence, model_id='all-mpnet-base-v2')
     """
     env.Env.get().require_package('sentence_transformers')
+    device = resolve_torch_device('auto')
+    import torch
     from sentence_transformers import SentenceTransformer  # type: ignore
-    model = _lookup_model(model_id, SentenceTransformer)
+    # specifying the device, moves the model to device (gpu:cuda/mps, cpu)
+    model = _lookup_model(model_id, SentenceTransformer, device=device, pass_device_to_create=True)
-    array = model.encode(sentence, normalize_embeddings=normalize_embeddings)
+    # specifying the device, uses it for computation
+    array = model.encode(sentence, device=device, normalize_embeddings=normalize_embeddings)
     return [array[i] for i in range(array.shape[0])]
@@ -70,11 +75,15 @@ def _(model_id: str) -> pxt.ArrayType:
 @pxt.udf
 def sentence_transformer_list(sentences: list, *, model_id: str, normalize_embeddings: bool = False) -> list:
     env.Env.get().require_package('sentence_transformers')
+    device = resolve_torch_device('auto')
+    import torch
     from sentence_transformers import SentenceTransformer
-    model = _lookup_model(model_id, SentenceTransformer)
+    # specifying the device, moves the model to device (gpu:cuda/mps, cpu)
+    model = _lookup_model(model_id, SentenceTransformer, device=device, pass_device_to_create=True)
-    array = model.encode(sentences, normalize_embeddings=normalize_embeddings)
+    # specifying the device, uses it for computation
+    array = model.encode(sentences, device=device, normalize_embeddings=normalize_embeddings)
     return [array[i].tolist() for i in range(array.shape[0])]
@@ -88,7 +97,7 @@ def cross_encoder(sentences1: Batch[str], sentences2: Batch[str], *, model_id: s
     __Requirements:__
-    - `pip install sentence-transformers`
+    - `pip install torch sentence-transformers`
     Parameters:
         sentences1: The first sentence to be paired.
@@ -107,9 +116,13 @@ def cross_encoder(sentences1: Batch[str], sentences2: Batch[str], *, model_id: s
             )
     """
     env.Env.get().require_package('sentence_transformers')
+    device = resolve_torch_device('auto')
+    import torch
     from sentence_transformers import CrossEncoder
-    model = _lookup_model(model_id, CrossEncoder)
+    # specifying the device, moves the model to device (gpu:cuda/mps, cpu)
+    # and uses the device for predict computation
+    model = _lookup_model(model_id, CrossEncoder, device=device, pass_device_to_create=True)
     array = model.predict([[s1, s2] for s1, s2 in zip(sentences1, sentences2)], convert_to_numpy=True)
     return array.tolist()
@@ -118,23 +131,27 @@ def cross_encoder(sentences1: Batch[str], sentences2: Batch[str], *, model_id: s
 @pxt.udf
 def cross_encoder_list(sentence1: str, sentences2: list, *, model_id: str) -> list:
     env.Env.get().require_package('sentence_transformers')
+    device = resolve_torch_device('auto')
+    import torch
     from sentence_transformers import CrossEncoder
-    model = _lookup_model(model_id, CrossEncoder)
+    # specifying the device, moves the model to device (gpu:cuda/mps, cpu)
+    # and uses the device for predict computation
+    model = _lookup_model(model_id, CrossEncoder, device=device, pass_device_to_create=True)
     array = model.predict([[sentence1, s2] for s2 in sentences2], convert_to_numpy=True)
     return array.tolist()
 @pxt.udf(batch_size=32)
-def clip_text(text: Batch[str], *, model_id: str) -> Batch[pxt.Array[(None,), float]]:
+def clip_text(text: Batch[str], *, model_id: str) -> Batch[pxt.Array[(None,), pxt.Float]]:
     """
     Computes a CLIP embedding for the specified text. `model_id` should be a reference to a pretrained
     [CLIP Model](https://huggingface.co/docs/transformers/model_doc/clip).
     __Requirements:__
-    - `pip install transformers`
+    - `pip install torch transformers`
     Args:
         text: The string to embed.
@@ -165,14 +182,14 @@ def clip_text(text: Batch[str], *, model_id: str) -> Batch[pxt.Array[(None,), fl
 @pxt.udf(batch_size=32)
-def clip_image(image: Batch[PIL.Image.Image], *, model_id: str) -> Batch[pxt.Array[(None,), float]]:
+def clip_image(image: Batch[PIL.Image.Image], *, model_id: str) -> Batch[pxt.Array[(None,), pxt.Float]]:
     """
     Computes a CLIP embedding for the specified image. `model_id` should be a reference to a pretrained
     [CLIP Model](https://huggingface.co/docs/transformers/model_doc/clip).
     __Requirements:__
-    - `pip install transformers`
+    - `pip install torch transformers`
     Args:
         image: The image to embed.
@@ -215,14 +232,20 @@ def _(model_id: str) -> pxt.ArrayType:
 @pxt.udf(batch_size=4)
-def detr_for_object_detection(image: Batch[PIL.Image.Image], *, model_id: str, threshold: float = 0.5) -> Batch[dict]:
+def detr_for_object_detection(
+    image: Batch[PIL.Image.Image],
+    *,
+    model_id: str,
+    threshold: float = 0.5,
+    revision: str = 'no_timm',
+) -> Batch[dict]:
     """
     Computes DETR object detections for the specified image. `model_id` should be a reference to a pretrained
     [DETR Model](https://huggingface.co/docs/transformers/model_doc/detr).
     __Requirements:__
-    - `pip install transformers`
+    - `pip install torch transformers`
     Args:
         image: The image to embed.
@@ -254,12 +277,12 @@ def detr_for_object_detection(image: Batch[PIL.Image.Image], *, model_id: str, t
     env.Env.get().require_package('transformers')
     device = resolve_torch_device('auto')
     import torch
-    from transformers import DetrImageProcessor, DetrForObjectDetection
+    from transformers import DetrForObjectDetection, DetrImageProcessor
     model = _lookup_model(
-        model_id, lambda x: DetrForObjectDetection.from_pretrained(x, revision='no_timm'), device=device
+        model_id, lambda x: DetrForObjectDetection.from_pretrained(x, revision=revision), device=device
     )
-    processor = _lookup_processor(model_id, lambda x: DetrImageProcessor.from_pretrained(x, revision='no_timm'))
+    processor = _lookup_processor(model_id, lambda x: DetrImageProcessor.from_pretrained(x, revision=revision))
     normalized_images = [normalize_image_mode(img) for img in image]
     with torch.no_grad():
@@ -299,7 +322,7 @@ def vit_for_image_classification(
     __Requirements:__
-    - `pip install transformers`
+    - `pip install torch transformers`
     Args:
         image: The image to classify.
@@ -330,7 +353,7 @@ def vit_for_image_classification(
     env.Env.get().require_package('transformers')
     device = resolve_torch_device('auto')
     import torch
-    from transformers import ViTImageProcessor, ViTForImageClassification
+    from transformers import ViTForImageClassification, ViTImageProcessor
     model: ViTForImageClassification = _lookup_model(model_id, ViTForImageClassification.from_pretrained, device=device)
     processor = _lookup_processor(model_id, ViTImageProcessor.from_pretrained)
@@ -356,6 +379,86 @@ def vit_for_image_classification(
     ]
+@pxt.udf
+def speech2text_for_conditional_generation(
+    audio: pxt.Audio,
+    *,
+    model_id: str,
+    language: Optional[str] = None,
+) -> str:
+    """
+    Transcribes or translates speech to text using a Speech2Text model. `model_id` should be a reference to a
+    pretrained [Speech2Text](https://huggingface.co/docs/transformers/en/model_doc/speech_to_text) model.
+    __Requirements:__
+    - `pip install torch torchaudio sentencepiece transformers`
+    Args:
+        audio: The audio clip to transcribe or translate.
+        model_id: The pretrained model to use for the transcription or translation.
+        language: If using a multilingual translation model, the language code to translate to. If not provided,
+            the model's default language will be used. If the model is not translation model, is not a
+            multilingual model, or does not support the specified language, an error will be raised.
+    Returns:
+        The transcribed or translated text.
+    Examples:
+        Add a computed column that applies the model `facebook/s2t-small-librispeech-asr` to an existing
+        Pixeltable column `audio` of the table `tbl`:
+        >>> tbl['transcription'] = speech2text_for_conditional_generation(
+        ...     tbl.audio,
+        ...     model_id='facebook/s2t-small-librispeech-asr'
+        ... )
+        Add a computed column that applies the model `facebook/s2t-medium-mustc-multilingual-st` to an existing
+        Pixeltable column `audio` of the table `tbl`, translating the audio to French:
+        >>> tbl['translation'] = speech2text_for_conditional_generation(
+        ...     tbl.audio,
+        ...     model_id='facebook/s2t-medium-mustc-multilingual-st',
+        ...     language='fr'
+        ... )
+    """
+    env.Env.get().require_package('transformers')
+    env.Env.get().require_package('torchaudio')
+    env.Env.get().require_package('sentencepiece')
+    device = resolve_torch_device('auto', allow_mps=False)  # Doesn't seem to work on 'mps'; use 'cpu' instead
+    import librosa
+    import torch
+    from transformers import Speech2TextForConditionalGeneration, Speech2TextProcessor
+    # facebook/s2t-small-librispeech-asr
+    # facebook/s2t-small-mustc-en-fr-st
+    model = _lookup_model(model_id, Speech2TextForConditionalGeneration.from_pretrained, device=device)
+    processor = _lookup_processor(model_id, Speech2TextProcessor.from_pretrained)
+    assert isinstance(processor, Speech2TextProcessor)
+    if language is not None and language not in processor.tokenizer.lang_code_to_id:
+        raise excs.Error(
+            f"Language code '{language}' is not supported by the model '{model_id}'. "
+            f"Supported languages are: {list(processor.tokenizer.lang_code_to_id.keys())}")
+    forced_bos_token_id: Optional[int] = None if language is None else processor.tokenizer.lang_code_to_id[language]
+    # Get the model's sampling rate. Default to 16 kHz (the standard) if not in config
+    model_sampling_rate = getattr(model.config, 'sampling_rate', 16_000)
+    waveform, sampling_rate = librosa.load(audio, sr=model_sampling_rate, mono=True)
+    with torch.no_grad():
+        inputs = processor(
+            waveform,
+            sampling_rate=sampling_rate,
+            return_tensors='pt'
+        )
+        generated_ids = model.generate(**inputs.to(device), forced_bos_token_id=forced_bos_token_id).to('cpu')
+    transcription = processor.batch_decode(generated_ids, skip_special_tokens=True)
+    return transcription
 @pxt.udf
 def detr_to_coco(image: PIL.Image.Image, detr_info: dict[str, Any]) -> dict[str, Any]:
     """
@@ -385,14 +488,22 @@ def detr_to_coco(image: PIL.Image.Image, detr_info: dict[str, Any]) -> dict[str,
 T = TypeVar('T')
-def _lookup_model(model_id: str, create: Callable[[str], T], device: Optional[str] = None) -> T:
+def _lookup_model(
+    model_id: str,
+    create: Callable[..., T],
+    device: Optional[str] = None,
+    pass_device_to_create: bool = False
+) -> T:
     from torch import nn
     key = (model_id, create, device)  # For safety, include the `create` callable in the cache key
     if key not in _model_cache:
-        model = create(model_id)
+        if pass_device_to_create:
+            model = create(model_id, device=device)
+        else:
+            model = create(model_id)
         if isinstance(model, nn.Module):
-            if device is not None:
+            if not pass_device_to_create and device is not None:
                 model.to(device)
             model.eval()
         _model_cache[key] = model

pixeltable/functions/llama_cpp.py CHANGED Viewed

@@ -76,7 +76,7 @@ def _lookup_local_model(model_path: str, n_gpu_layers: int) -> 'llama_cpp.Llama'
     key = (model_path, None, n_gpu_layers)
     if key not in _model_cache:
-        llm = llama_cpp.Llama(model_path, n_gpu_layers=n_gpu_layers)
+        llm = llama_cpp.Llama(model_path, n_gpu_layers=n_gpu_layers, verbose=False)
         _model_cache[key] = llm
     return _model_cache[key]
@@ -89,7 +89,8 @@ def _lookup_pretrained_model(repo_id: str, filename: Optional[str], n_gpu_layers
         llm = llama_cpp.Llama.from_pretrained(
             repo_id=repo_id,
             filename=filename,
-            n_gpu_layers=n_gpu_layers
+            n_gpu_layers=n_gpu_layers,
+            verbose=False,
         )
         _model_cache[key] = llm
     return _model_cache[key]

pixeltable/functions/mistralai.py CHANGED Viewed

@@ -141,7 +141,7 @@ _embedding_dimensions_cache: dict[str, int] = {
 @pxt.udf(batch_size=16)
-def embeddings(input: Batch[str], *, model: str) -> Batch[pxt.Array[(None,), float]]:
+def embeddings(input: Batch[str], *, model: str) -> Batch[pxt.Array[(None,), pxt.Float]]:
     """
     Embeddings API.

pixeltable/functions/openai.py CHANGED Viewed

@@ -304,7 +304,7 @@ _embedding_dimensions_cache: dict[str, int] = {
 @pxt.udf(batch_size=32)
 def embeddings(
     input: Batch[str], *, model: str, dimensions: Optional[int] = None, user: Optional[str] = None
-) -> Batch[pxt.Array[(None,), float]]:
+) -> Batch[pxt.Array[(None,), pxt.Float]]:
     """
     Creates an embedding vector representing the input text.

pixeltable/functions/together.py CHANGED Viewed

@@ -186,7 +186,7 @@ _embedding_dimensions_cache = {
 @pxt.udf(batch_size=32)
-def embeddings(input: Batch[str], *, model: str) -> Batch[pxt.Array[(None,), float]]:
+def embeddings(input: Batch[str], *, model: str) -> Batch[pxt.Array[(None,), pxt.Float]]:
     """
     Query an embedding model for a given string of text.

pixeltable/functions/util.py CHANGED Viewed

@@ -1,13 +1,16 @@
 import PIL.Image
+from pixeltable.env import Env
-def resolve_torch_device(device: str) -> str:
+def resolve_torch_device(device: str, allow_mps: bool = True) -> str:
+    Env.get().require_package('torch')
     import torch
     if device == 'auto':
         if torch.cuda.is_available():
             return 'cuda'
-        if torch.backends.mps.is_available():
+        if allow_mps and torch.backends.mps.is_available():
             return 'mps'
         return 'cpu'
     return device

pixeltable 0.2.22__py3-none-any.whl → 0.2.23__py3-none-any.whl

pixeltable 0.2.22py3-none-any.whl → 0.2.23py3-none-any.whl